// Copyright 1997-2000 Omni Development, Inc.  All rights reserved.
//
// This software may only be used and reproduced according to the
// terms in the file OmniSourceLicense.html, which should be
// distributed with this project and can also be found at
// http://www.omnigroup.com/DeveloperResources/OmniSourceLicense.html.

#import <OmniFoundation/OFCharacterScanner.h>

#import <Foundation/Foundation.h>
#import <OmniBase/OmniBase.h>

#import <OmniFoundation/CFString-OFExtensions.h>
#import <OmniFoundation/OFTrie.h>
#import <OmniFoundation/OFTrieBucket.h>
#import <OmniFoundation/OFTrieNode.h>
#import <OmniFoundation/OFStringDecoder.h>

RCS_ID("$Header: /Network/Source/CVS/OmniGroup/Frameworks/OmniFoundation/OFCharacterScanner.m,v 1.13 2000/10/16 15:32:46 wiml Exp $")

@implementation OFCharacterScanner

const unichar OFCharacterScannerEndOfDataCharacter = '\0';
static NSCharacterSet *endOfLineSet;

+ (void)initialize;
{
    static BOOL initialized = NO;

    [super initialize];
    if (initialized)
	return;
    initialized = YES;

    endOfLineSet = [[NSCharacterSet characterSetWithCharactersInString:@"\r\n"] retain];
}

- init;
{
    if (!(self = [super init]))
	return nil;

    inputBuffer = NULL;
    scanEnd = inputBuffer;
    scanLocation = scanEnd;
    inputStringPosition = 0;

    return self;
}

- (void)dealloc;
{
    if (inputBuffer && freeInputBuffer)
	NSZoneFree(NULL, inputBuffer);
    [super dealloc];
}


// Declared methods

/* This is called by the Scanner when it needs a new bufferful of data. Default implementation is to return NO, which indicates EOF. */
- (BOOL)fetchMoreData;
{
    return NO;
}

/* Calls -fetchMoreDataFromCharacters:length:freeWhenDone: with the contents of the inputString */
- (BOOL)fetchMoreDataFromString:(NSString *)inputString;
{
    unsigned int length;
    unichar *newBuffer = NULL;

    length = [inputString length];
    if (length)
        newBuffer = NSZoneMalloc(NULL, sizeof(unichar) * length);
    [inputString getCharacters:newBuffer];
    return [self fetchMoreDataFromCharacters:newBuffer length:length offset:inputStringPosition + (scanEnd - inputBuffer) freeWhenDone:YES];
}

- (BOOL)fetchMoreDataFromCharacters:(unichar *)characters length:(unsigned int)length offset:(unsigned int)offset freeWhenDone:(BOOL)doFreeWhenDone;
{
    unsigned int oldScanPosition = inputStringPosition + ( scanLocation - inputBuffer );
    
    OBASSERT(characters != NULL || length == 0);
    
    OBASSERT(offset <= oldScanPosition);
    OBASSERT((offset + length) >= oldScanPosition);
    
    if (freeInputBuffer)
        NSZoneFree(NULL, inputBuffer);
    freeInputBuffer = doFreeWhenDone;

    inputStringPosition = offset;

    if (!characters || !length) {
        inputBuffer = NULL;
        scanEnd = inputBuffer;
        scanLocation = scanEnd;
        return NO;
    } else {
        inputBuffer = characters;
        scanLocation = inputBuffer + oldScanPosition - inputStringPosition;
        scanEnd = inputBuffer + length;
        return YES;
    }
}

- (void)_rewindCharacterSource
{
    /* This should not happen if the caller is using -setRewindMark, etc. */
    [NSException raise:OFCharacterConversionExceptionName format:@"Attempt to rewind a nonrewindable stream"];
}

- (unichar)peekCharacter;
{
    return scannerPeekCharacter(self);
}

- (void)skipPeekedCharacter;
{
    scannerSkipPeekedCharacter(self);
}

- (unichar)readCharacter;
{
    return scannerReadCharacter(self);
}

- (void)setRewindMark
{
    OBPRECONDITION(!haveRewindMark);
    if (haveRewindMark) {
        [NSException raise:OFCharacterConversionExceptionName format:@"Attempt to set rewind mark on scanner which already has one"];
    }
    
    haveRewindMark = YES;
    rewindMarkOffset = [self scanLocation];
}
    
- (void)rewindToMark
{
    OBPRECONDITION(haveRewindMark);
    if (!haveRewindMark) {
        [NSException raise:OFCharacterConversionExceptionName format:@"Attempt to use nonexistent rewind mark"];
    }
    
    [self setScanLocation:rewindMarkOffset];
    haveRewindMark = NO;
}

- (void)discardRewindMark
{
    OBPRECONDITION(haveRewindMark);
    if (!haveRewindMark) {
        [NSException raise:OFCharacterConversionExceptionName format:@"Attempt to discard nonexistent rewind mark"];
    }
    
    haveRewindMark = NO;
}


- (unsigned int)scanLocation;
{
    return scannerScanLocation(self);
}

- (void)setScanLocation:(unsigned int)aLocation;
{
    if (aLocation >= inputStringPosition) {
	unsigned int inputLocation = aLocation - inputStringPosition;

	if (inputLocation <= (unsigned)(scanEnd - inputBuffer)) {
	   scanLocation = inputBuffer + inputLocation;
           return;
        }
    }
    
    scanEnd = inputBuffer;
    scanLocation = scanEnd;
    inputStringPosition = aLocation;
    [self _rewindCharacterSource];
}

- (void)skipCharacters:(int)anOffset;
{
    if ( (scanLocation + anOffset < inputBuffer) ||
         (scanLocation + anOffset >= scanEnd) ) {
	[self setScanLocation:(scanLocation - inputBuffer) + anOffset + inputStringPosition];
    } else {
        scanLocation += anOffset;
    }
}

- (BOOL)scanUpToCharacter:(unichar)aCharacter;
{
    return scannerScanUpToCharacter(self, aCharacter);
}

- (BOOL)scanUpToCharacterInSet:(NSCharacterSet *)delimiterCharacterSet;
{
    return scannerScanUpToCharacterInSet(self, delimiterCharacterSet);
}    

#define SAFE_ALLOCA_SIZE (8 * 8192)

/* Returns YES if the string is found, NO otherwise. Positions the scanner immediately before the pattern string, or at the end of the input string, depending. */

- (BOOL)scanUpToString:(NSString *)delimiterString;
{
    unichar *buffer, *ptr;
    int length, left;
    BOOL stringFound;
    BOOL useMalloc;

    length = [delimiterString length];
    if (length == 0)
        return YES;

    stringFound = NO;
    useMalloc = length * sizeof(unichar) >= SAFE_ALLOCA_SIZE;
    if (useMalloc) {
        buffer = (unichar *)NSZoneMalloc(NULL, length * sizeof(unichar));
    } else {
        buffer = (unichar *)alloca(length * sizeof(unichar));
    }
    [delimiterString getCharacters:buffer];
    while (scannerScanUpToCharacter(self, *buffer)) {
        ptr = buffer;
        left = length;
        [self setRewindMark];
        while(left--) {
            if (scannerPeekCharacter(self) != *ptr++) {
                break;
            }
            scannerSkipPeekedCharacter(self);
        }
        [self rewindToMark];
        if (left == -1) {
            stringFound = YES;
            break;
        } else {
            scannerSkipPeekedCharacter(self);
        }
    }

    if (useMalloc)
        NSZoneFree(NULL, buffer);

    return stringFound;
}

//#warning This breaks when [string lowercaseString] or [string uppercaseString] change string length
// ...which it does in Unicode in some cases.
// ...except that CoreFoundation and Foundation don't support this behavior
// ...although their APIs suggest they might in the future
- (BOOL)scanUpToStringCaseInsensitive:(NSString *)delimiterString;
{
    unichar *lowerBuffer, *upperBuffer, *lowerPtr, *upperPtr;
    int length, left;
    BOOL stringFound;
    BOOL useMalloc;
    CSBitmap bitmap;
    NSMutableCharacterSet *set;
    unichar c;

    length = [delimiterString length];
    if (length == 0)
        return YES;

    stringFound = NO;
    useMalloc = length * sizeof(unichar) >= SAFE_ALLOCA_SIZE;
    if (useMalloc) {
        lowerBuffer = (unichar *)NSZoneMalloc(NULL, length * sizeof(unichar));
        upperBuffer = (unichar *)NSZoneMalloc(NULL, length * sizeof(unichar));
    } else {
        lowerBuffer = (unichar *)alloca(length * sizeof(unichar));
        upperBuffer = (unichar *)alloca(length * sizeof(unichar));
    }
    [[delimiterString lowercaseString] getCharacters:lowerBuffer];
    [[delimiterString uppercaseString] getCharacters:upperBuffer];
    set = [[NSMutableCharacterSet alloc] init];
    [set addCharactersInRange:NSMakeRange(*lowerBuffer, 1)];
    [set addCharactersInRange:NSMakeRange(*upperBuffer, 1)];
    bitmap = bitmapForCharacterSetDoRetain(set, NO);

#warning This algorithm is incorrect
// it breaks for strings which contain a prefix of themselves such as "fofoo"
    while (scannerScanUpToCharacterInCSBitmap(self, bitmap)) {
        lowerPtr = lowerBuffer;
        upperPtr = upperBuffer;
        left = length;
        while(left--) {
            c = scannerPeekCharacter(self);
            if ((c != *lowerPtr) && (c != *upperPtr)) {
                break;
            }
            scannerSkipPeekedCharacter(self);
            lowerPtr++;
            upperPtr++;
        }
        if (left == -1) {
            stringFound = YES;
            break;
        }
    }
    [set release];
    if (useMalloc) {
        NSZoneFree(NULL, lowerBuffer);
        NSZoneFree(NULL, upperBuffer);
    }

    if (stringFound) {
#warning Trying to rewind here without using -setRewindMark and -rewindToMark
        [self setScanLocation:[self scanLocation] - length];
    }

    return stringFound;
}

static inline NSString *
readTokenFragmentWithDelimiterCharacter(
    OFCharacterScanner *self,
    unichar character)
{
    unichar *startLocation;

    if (!scannerHasData(self))
        return nil;
    startLocation = self->scanLocation;
    while (self->scanLocation < self->scanEnd) {
        if (character == *self->scanLocation)
            break;
        self->scanLocation++;
    }
    return [NSString stringWithCharacters:startLocation length:self->scanLocation - startLocation];
}

- (NSString *)readTokenFragmentWithDelimiterCharacter:(unichar)character;
{
    return readTokenFragmentWithDelimiterCharacter(self, character);
}

static inline NSString *
readRetainedTokenFragmentWithDelimiterCSBitmap(
    OFCharacterScanner *self, 
    CSBitmap delimiterBitmapRep,
    BOOL forceLowercase)
{
    unichar *startLocation;
    CFIndex length;
    
    if (!scannerHasData(self))
	return nil;
    startLocation = self->scanLocation;
    while (self->scanLocation < self->scanEnd) {
	if (characterIsMemberOfCSBitmap(delimiterBitmapRep, *self->scanLocation))
	    break;
	self->scanLocation++;
    }

    length = self->scanLocation - startLocation;
    
    if (forceLowercase) {
        unichar *buffer;
        
        buffer = malloc(sizeof(unichar) * length);
        OFLowercaseCharactersInto(startLocation, buffer, length);
        
        // The string now owns the buffer created above
        return (NSString *)CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, buffer, length, kCFAllocatorDefault);
    } else {
        return (NSString *)CFStringCreateWithCharacters(kCFAllocatorDefault, startLocation, length);
    }
}

- (NSString *)readTokenFragmentWithDelimiterCSBitmap:
    (CSBitmap)delimiterCSBitmap;
{
    return [readRetainedTokenFragmentWithDelimiterCSBitmap(self, delimiterCSBitmap, NO) autorelease];
}

- (NSString *)readTokenFragmentWithDelimiters:(NSCharacterSet *)delimiterCharacterSet;
{
    CSBitmap delimiterBitmapRep;
    
    if (!scannerHasData(self))
	return nil;
    delimiterBitmapRep = [[delimiterCharacterSet bitmapRepresentation] bytes];
    return [readRetainedTokenFragmentWithDelimiterCSBitmap(self, delimiterBitmapRep, NO) autorelease];
}


struct {
    unsigned int calls;
    unsigned int nils;
    unsigned int fragments;
    unsigned int appends;
    unsigned int lowers;
} OFCharacterScannerStats;

static inline NSString *
readFullTokenWithDelimiterCSBitmap(OFCharacterScanner *self, CSBitmap delimiterCSBitmap, BOOL forceLowercase)
{
    NSString *resultString = nil, *fragment;

    OFCharacterScannerStats.calls++;
    
    if (!scannerHasData(self)) {
        OFCharacterScannerStats.nils++;
	return nil;
    }
    
    do {
	fragment = readRetainedTokenFragmentWithDelimiterCSBitmap(self, delimiterCSBitmap, forceLowercase);
	if (!fragment)
	    break;
            
        OFCharacterScannerStats.fragments++;
        
        if (resultString) {
            // this case should be uncommon
            NSString *old = resultString;

            OFCharacterScannerStats.appends++;

            resultString = [[old stringByAppendingString:fragment] retain];
            [old release];
            [fragment release];
        } else {
            resultString = fragment;
        }
    } while (!characterIsMemberOfCSBitmap(delimiterCSBitmap, scannerPeekCharacter(self)));

    return [resultString autorelease];
}

- (NSString *)readFullTokenWithDelimiterCSBitmap:(CSBitmap)delimiterCSBitmap forceLowercase:(BOOL)forceLowercase;
{
    return readFullTokenWithDelimiterCSBitmap(self, delimiterCSBitmap, forceLowercase);
}

- (NSString *)readFullTokenWithDelimiterCSBitmap:(CSBitmap)delimiterCSBitmap;
{
    return readFullTokenWithDelimiterCSBitmap(self, delimiterCSBitmap, NO);
}

static inline NSString *
readFullTokenWithDelimiterCharacter(OFCharacterScanner *self, unichar delimiterCharacter, BOOL forceLowercase)
{
    NSString *resultString = nil, *fragment;

    if (!scannerHasData(self))
	return nil;
    do {
	fragment = readTokenFragmentWithDelimiterCharacter(self, delimiterCharacter);
	if (!fragment)
	    break;
	if (resultString)
	    resultString = [resultString stringByAppendingString:fragment];
	else
	    resultString = fragment;
    } while (delimiterCharacter != scannerPeekCharacter(self));

    if (forceLowercase && resultString)
	resultString = [resultString lowercaseString];
    return resultString;
}

- (NSString *)readFullTokenWithDelimiterCharacter:(unichar)delimiterCharacter forceLowercase:(BOOL)forceLowercase;
{
    return readFullTokenWithDelimiterCharacter(self, delimiterCharacter, forceLowercase);
}

- (NSString *)readFullTokenWithDelimiterCharacter:(unichar)delimiterCharacter;
{
    return readFullTokenWithDelimiterCharacter(self, delimiterCharacter, NO);
}

- (NSString *)readFullTokenWithDelimiters:(NSCharacterSet *)delimiterCharacterSet forceLowercase:(BOOL)forceLowercase;
{
    CSBitmap delimiterBitmapRep;

    if (!scannerHasData(self))
	return nil;
    delimiterBitmapRep = [[delimiterCharacterSet bitmapRepresentation] bytes];
    return readFullTokenWithDelimiterCSBitmap(self, delimiterBitmapRep, forceLowercase);
}

- (NSString *)readFullTokenOfSet:(NSCharacterSet *)tokenSet;
{
    return [self readFullTokenWithDelimiters:[tokenSet invertedSet] forceLowercase:NO];
}

- (NSString *)readLine;
{
    NSString *line;

    line = [self readFullTokenWithDelimiters:endOfLineSet forceLowercase:NO];
    if (!line)
	return nil;
    if (scannerPeekCharacter(self) == '\r')
	scannerSkipPeekedCharacter(self);
    if (scannerPeekCharacter(self) == '\n')
	scannerSkipPeekedCharacter(self);
    return line;
}

- (NSString *)readCharacterCount:(unsigned int)count;
{
    unsigned int bufferedCharacterCount;

    bufferedCharacterCount = scanEnd - scanLocation;
    if (count <= bufferedCharacterCount) {
        NSString *result;

        result = [NSString stringWithCharacters:scanLocation length:count];
	scanLocation += count;
	return result;
    } else {
        NSMutableString *result;
        unsigned int charactersNeeded;

        result = [NSMutableString string];
        charactersNeeded = count;
        do {
            NSString *substring;

            substring = [[NSString alloc] initWithCharactersNoCopy:scanLocation length:bufferedCharacterCount freeWhenDone:NO];
            [result appendString:substring];
            [substring release];
            charactersNeeded -= bufferedCharacterCount;
            if (![self fetchMoreData])
                return nil;
            bufferedCharacterCount = scanEnd - scanLocation;
        } while (charactersNeeded > bufferedCharacterCount);
        if (charactersNeeded > 0) {
            NSString *substring;

            substring = [[NSString alloc] initWithCharactersNoCopy:scanLocation length:charactersNeeded freeWhenDone:NO];
            [result appendString:substring];
            [substring release];
            scanLocation += charactersNeeded;
        }
        OBASSERT([result length] == count);
        return result;
   }
}

- (unsigned int)scanUnsignedIntegerMaximumDigits:(unsigned int)maximumDigits;
{
    unsigned int resultInt = 0;

    while (maximumDigits-- > 0) {
        unichar nextCharacter;

        nextCharacter = scannerPeekCharacter(self);
        if (nextCharacter >= '0' && nextCharacter <= '9') {
            scannerSkipPeekedCharacter(self);
            resultInt = resultInt * 10 + (nextCharacter - '0');
        }
    }
    return resultInt;
}

- (int)scanIntegerMaximumDigits:(unsigned int)maximumDigits;
{
    int sign = 1;

    switch (scannerPeekCharacter(self)) {
        case '-':
            sign = -1;
            // no break
        case '+':
            scannerSkipPeekedCharacter(self);
            break;
        default:
            break;
    }
    return sign * (int)[self scanUnsignedIntegerMaximumDigits:maximumDigits];
}

- (BOOL)scanString:(NSString *)string peek:(BOOL)doPeek;
{
    unichar *buffer, *ptr;
    unsigned int length;
    BOOL stringFound;
    BOOL useMalloc;

    length = [string length];
    useMalloc = length * sizeof(unichar) >= SAFE_ALLOCA_SIZE;
    if (useMalloc) {
	buffer = (unichar *)NSZoneMalloc(NULL, length * sizeof(unichar));
    } else {
        buffer = (unichar *)alloca(length * sizeof(unichar));
    }
    [string getCharacters:buffer];
    [self setRewindMark];
    ptr = buffer;
    stringFound = YES;
    while (length--) {
        if (scannerReadCharacter(self) != *ptr++) {
	    stringFound = NO;
	    break;
	}
    }
    if (useMalloc) {
        NSZoneFree(NULL, buffer);
    }

    if (!stringFound || doPeek)
        [self rewindToMark];
    else
        [self discardRewindMark];

    return stringFound;
}

#define CLASS_OF(anObject) (*(Class *)(anObject))

- (OFTrieBucket *)readLongestTrieElement:(OFTrie *)trie;
{
    OFTrieNode *node;
    Class trieNodeClass;
    OFTrieBucket *bucket, *lastFoundBucket = nil;
    unichar *lowerCheck, *upperCheck;
    unichar currentCharacter;

    node = [trie headNode];
    if (!node->count)
	return nil;

    trieNodeClass = CLASS_OF(node);
    while ((currentCharacter = scannerPeekCharacter(self)) != OFCharacterScannerEndOfDataCharacter) {
	if ((node = trieFindChild(node, currentCharacter))) {
	    if (CLASS_OF(node) != trieNodeClass) {
		bucket = (OFTrieBucket *)node;
		lowerCheck = bucket->lowerCharacters;
		upperCheck = bucket->upperCharacters;
		
		scannerSkipPeekedCharacter(self);
		while (*lowerCheck && ((currentCharacter = scannerPeekCharacter(self)) != OFCharacterScannerEndOfDataCharacter)) {
		    if (currentCharacter != *lowerCheck && currentCharacter != *upperCheck)
			break;
		    scannerSkipPeekedCharacter(self);
		    lowerCheck++, upperCheck++;
		}
		if (*lowerCheck) {
		    break;
		} else {
                    if (lastFoundBucket)
                        [self discardRewindMark];
		    return bucket;
		}
	    } else if (!*node->characters) {
		// lastFoundScanLocation = scannerScanLocation(self) + 1;
                if (lastFoundBucket)
                    [self discardRewindMark];  // TODO: Make these inlines for speed, this is an inner loop
                [self setRewindMark];
		lastFoundBucket = *node->children;
	    }
	} else {
	    break;
	}
	scannerSkipPeekedCharacter(self);
    }
    if (lastFoundBucket) {
	[self rewindToMark];
        scannerSkipPeekedCharacter(self);
        // [self setScanLocation:lastFoundScanLocation];
    }
    return lastFoundBucket;
}

- (OFTrieBucket *)readShortestTrieElement:(OFTrie *)trie;
{
    OFTrieNode *node;
    Class trieNodeClass;
    OFTrieBucket *bucket;
    unichar *lowerCheck, *upperCheck;
    unichar currentCharacter;

    node = [trie headNode];
    if (!node->count)
	return nil;

    trieNodeClass = CLASS_OF(node);
    while ((currentCharacter = scannerPeekCharacter(self)) != OFCharacterScannerEndOfDataCharacter) {
	if ((node = trieFindChild(node, currentCharacter))) {
	    if (CLASS_OF(node) != trieNodeClass) {
		bucket = (OFTrieBucket *)node;
		lowerCheck = bucket->lowerCharacters;
		upperCheck = bucket->upperCharacters;
		
		while (*lowerCheck && ((currentCharacter = scannerPeekCharacter(self)) != OFCharacterScannerEndOfDataCharacter)) {
		    if (currentCharacter != *lowerCheck && currentCharacter != *upperCheck)
			break;
		    scannerSkipPeekedCharacter(self);
		    lowerCheck++, upperCheck++;
		}
		if (*lowerCheck) {
		    break;
		} else {
		    return bucket;
		}
	    } else if (!*node->characters) {
		return *node->children;
	    }
	} else {
	    break;
	}
	scannerSkipPeekedCharacter(self);
    }
    return nil;
}

// Debugging methods

- (NSMutableDictionary *)debugDictionary;
{
    NSMutableDictionary *debugDictionary;

    debugDictionary = [super debugDictionary];

    if (inputBuffer) {
        [debugDictionary setObject:[NSString stringWithCharacters:inputBuffer length:scanEnd - inputBuffer] forKey:@"inputString"];
        [debugDictionary setObject:[NSString stringWithFormat:@"%d", scanEnd - inputBuffer] forKey:@"inputStringLength"];
        [debugDictionary setObject:[NSString stringWithFormat:@"%d", scanLocation - inputBuffer] forKey:@"inputScanLocation"];
    }
    [debugDictionary setObject:[NSString stringWithFormat:@"%d", inputStringPosition] forKey:@"inputStringPosition"];

    return debugDictionary;
}

@end

