// Copyright 1998-2000 Omni Development, Inc.  All rights reserved.
//
// This software may only be used and reproduced according to the
// terms in the file OmniSourceLicense.html, which should be
// distributed with this project and can also be found at
// http://www.omnigroup.com/DeveloperResources/OmniSourceLicense.html.

#import "OHWord.h"

#import <Foundation/Foundation.h>
#import <AppKit/AppKit.h>
#import <OmniBase/OmniBase.h>
#import <OmniFoundation/OmniFoundation.h>

#import <OmniHTML/OHHTMLView.h>
#import <OmniHTML/OHTextBuilder.h>

RCS_ID("$Header: /Network/Source/CVS/OmniGroup/Frameworks/OmniHTML/View.subproj/OHWord.m,v 1.18 2000/03/25 06:34:02 wjs Exp $")

@implementation OHWord

static CSBitmap romanRomanWordCSBitmap, notRomanWordCSBitmap;
static CSBitmap spaceCSBitmap, notSpaceCSBitmap;
static CSBitmap newlineReturnCSBitmap;
static CSBitmap attachmentCSBitmap;

+ (void)initialize;
{
    NSCharacterSet *attachmentSet, *returnSet;
    NSMutableCharacterSet *spaceSet, *notRomanWordSet;
    unichar attachmentCharacter = NSAttachmentCharacter;
    unichar noBreakSpace = NO_BREAK_SPACE;

    attachmentSet = [NSCharacterSet characterSetWithCharactersInString:[NSString stringWithCharacters:&attachmentCharacter length:1]];
    attachmentCSBitmap = bitmapForCharacterSetDoRetain(attachmentSet, YES);

    spaceSet = [[NSCharacterSet whitespaceCharacterSet] mutableCopyWithZone:[(NSObject *)self zone]];
    [spaceSet removeCharactersInString:[NSString stringWithCharacter:noBreakSpace]];
    [spaceSet removeCharactersInString:@"\t"];
    spaceCSBitmap = bitmapForCharacterSetDoRetain(spaceSet, YES);
    notSpaceCSBitmap = bitmapForCharacterSetDoRetain([spaceSet invertedSet], YES);

    returnSet = [NSCharacterSet characterSetWithCharactersInString:@"\n\r"];
    newlineReturnCSBitmap = bitmapForCharacterSetDoRetain(returnSet, YES);

    notRomanWordSet = [attachmentSet mutableCopyWithZone:[(NSObject *)self zone]];
    [notRomanWordSet formUnionWithCharacterSet:spaceSet];
    [notRomanWordSet addCharactersInString:@"\t"];
    [notRomanWordSet formUnionWithCharacterSet:returnSet];

    // Other characters that shouldn't group with Roman words.  The "International Layout in CSS" document (currently at http://www.w3.org/TR/1999/WD-i18n-format-19990127/) says that in "normal" mode we're free to break wherever we want, but recommends that in addition to breaking between any CJK characters (e.g., Kanji) we also allow breaks between small katakana and hiragana characters because that is the preference in modern Japanese typography.

    // The character ranges are from ftp://ftp.unicode.org/Public/3.0-Update/Blocks-3.beta.txt

    // CJK
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x2e80, 0x0080)]; // CJK Radicals Supplement
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x3000, 0x0040)]; // CJK Symbols and Punctuation
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x3200, 0x0100)]; // Enclosed CJK Letters and Months
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x3300, 0x0100)]; // CJK Compatibility
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x3400, 0x19b6)]; // CJK Unified Ideographs Extension A
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x4e00, 0x5200)]; // CJK Unified Ideographs
    [notRomanWordSet addCharactersInRange:NSMakeRange(0xf900, 0x0200)]; // CJK Compatibility Ideographs
    [notRomanWordSet addCharactersInRange:NSMakeRange(0xfe30, 0x0020)]; // CJK Compatibility Forms

    // Hirana and Katakana
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x3040, 0x0060)]; // Hiragana
    [notRomanWordSet addCharactersInRange:NSMakeRange(0x30a0, 0x0060)]; // Katakana

    // Cache the bitmap representations of this set and its inverse
    notRomanWordCSBitmap = bitmapForCharacterSetDoRetain(notRomanWordSet, YES);
    romanRomanWordCSBitmap = bitmapForCharacterSetDoRetain([notRomanWordSet invertedSet], YES);
    [notRomanWordSet release];
}


// Init and dealloc

- initWithStringScanner:(OFStringScanner *)scanner paragraphBounds:(NSRect)paragraphBounds paragraphGlyphRange:(NSRange)paragraphGlyphRange inHTMLView:(OHHTMLView *)htmlView;
{
    NSLayoutManager *layoutManager;
    unichar ch;
    NSRange wordGlyphRange;
    NSPoint firstCharacterPoint, lastCharacterPlusOnePoint;

    layoutManager = [htmlView layoutManager];

    ch = scannerPeekCharacter(scanner);
    characterRange.location = scannerScanLocation(scanner);
    if (characterIsMemberOfCSBitmap(romanRomanWordCSBitmap, ch)) {
        // letters
        scannerScanUpToCharacterInCSBitmap(scanner, notRomanWordCSBitmap);
        type = OHWordTypeLetters;
    } else if (ch == '\t') {
        // tab
        scannerSkipPeekedCharacter(scanner);
        type = OHWordTypeTab;
    } else if (characterIsMemberOfCSBitmap(spaceCSBitmap, ch)) {
        // space
        scannerScanUpToCharacterInCSBitmap(scanner, notSpaceCSBitmap);
        type = OHWordTypeSpaces;
    } else if (characterIsMemberOfCSBitmap(attachmentCSBitmap, ch)) {
        // attachment (e.g., inline image)
        scannerSkipPeekedCharacter(scanner);
        type = OHWordTypeAttachment;
    } else if (ch == '\r' || ch == '\n') {
        // newline
        scannerSkipPeekedCharacter(scanner);
        type = OHWordTypeReturn;
        if (ch == '\r' && scannerPeekCharacter(scanner) == '\n') {
            scannerSkipPeekedCharacter(scanner);
            // Treat "\r\n" sequence as single newline.
        }
    } else {
        // A non-Roman "word" character, e.g. Kanji
        scannerSkipPeekedCharacter(scanner);
        type = OHWordTypeLetters;
    }

    characterRange.length = scannerScanLocation(scanner) - characterRange.location;

    wordGlyphRange = [layoutManager glyphRangeForCharacterRange:characterRange actualCharacterRange:NULL];

    // TODO: ASSERT wordGlyphRange is contained by paragraphGlyphRange.

    firstCharacterPoint = [layoutManager locationForGlyphAtIndex:wordGlyphRange.location];
    x = firstCharacterPoint.x;

    switch (type) {
        case OHWordTypeReturn:
            // NSLayoutManager will make returns as wide as the remaining container width, which in this case will be darn big.
            width = 0.0; // We want them to take up no space
            break;
        case OHWordTypeAttachment:
        case OHWordTypeTab:
            width = 0.0; // Calculated differently
            break;
        default: // Letters and spaces
            if (NSMaxRange(wordGlyphRange) < NSMaxRange(paragraphGlyphRange))
                lastCharacterPlusOnePoint = [layoutManager locationForGlyphAtIndex:NSMaxRange(wordGlyphRange)];
            else
                lastCharacterPlusOnePoint = NSMakePoint(NSMaxX(paragraphBounds), NSMaxY(paragraphBounds));
            width = ceil(lastCharacterPlusOnePoint.x - firstCharacterPoint.x);

            if (width < 0.0) {
                // The width ends up being negative when rendering http://www.w3.org/TR/1998/REC-xml-19980210.  The word's string is @"|\U2020[#xAC00-#xD7A3]" (or @"|\262[#xAC00-#xD7A3]" if you put it in a dictionary and print that), firstCharacterPoint={x=18889, y=9} and lastCharacterPlusOnePoint={x=2214.78394, y=12}.
                // Returning a negative width is, of course, bad.  We end up trying to make a width range out of it, which means that it becomes a very large unsigned int, messing up our layout.
                width = 0.0;
            }
            break;
    }

    return self;
}

- (OHBasicCell *)attachmentCellInHTMLView:(OHHTMLView *)htmlView;
{
    if (type != OHWordTypeAttachment)
        return nil;

    return (OHBasicCell *)[[[[htmlView textStorage] attributesAtIndex:characterRange.location effectiveRange:NULL] objectForKey:NSAttachmentAttributeName] attachmentCell];
}

- (float)widthInHTMLView:(OHHTMLView *)htmlView;
{
    switch (type) {
        case OHWordTypeTab:
            // This method should never be called for tabs, because they need to know their paragraph (for the ruler) and horizontal location to calculate their width.
            // OHLine will tell us our width (using -setTabWidth:) rather than asking for it.
            OBASSERT(type != OHWordTypeTab);
            return width;
        case OHWordTypeAttachment:
            return NSWidth([[self attachmentCellInHTMLView:htmlView] cellFrame]);
        default:
            return width;
    }
}

- (OHWidthRange)widthRangeInHTMLView:(OHHTMLView *)htmlView;
{
    switch (type) {
        case OHWordTypeTab:
            // TODO: Calculate a tab's width range more accurately.  See also -positionOfTabStopFollowingPosition:
            return OHMakeWidthRange(1.0, 56.0);
        case OHWordTypeAttachment:
            return [[self attachmentCellInHTMLView:htmlView] widthRange];
        default:
            return OHMakeWidthRange(ceil(width), ceil(width));
    }
}

- (void)setTabWidth:(float)newWidth;
{
    OBPRECONDITION(type == OHWordTypeTab);
    width = newWidth;
}

// Debugging

static NSString *OHStringFromWordType(OHWordType aType)
{
    switch (aType) {
        case OHWordTypeLetters:
            return @"OHWordTypeLetters";
        case OHWordTypeSpaces:
            return @"OHWordTypeSpaces";
        case OHWordTypeTab:
            return @"OHWordTypeTab";
        case OHWordTypeReturn:
            return @"OHWordTypeReturn";
        case OHWordTypeAttachment:
            return @"OHWordTypeAttachment";
        default: // This shouldn't ever happen
            return @"*** Unknown ***";
    }
}

- (NSMutableDictionary *)debugDictionary;
{
    NSMutableDictionary *debugDictionary;

    debugDictionary = [super debugDictionary];
    [debugDictionary setObject:[NSString stringWithFormat:@"%0.1f", width] forKey:@"width"];
    [debugDictionary setObject:NSStringFromRange(characterRange) forKey:@"characterRange"];
    [debugDictionary setObject:[NSString stringWithFormat:@"%0.1f", x] forKey:@"x"];
    [debugDictionary setObject:OHStringFromWordType(type) forKey:@"type"];
    return debugDictionary;
}

@end
