// Copyright 1997-2002 Omni Development, Inc.  All rights reserved.
//
// This software may only be used and reproduced according to the
// terms in the file OmniSourceLicense.html, which should be
// distributed with this project and can also be found at
// http://www.omnigroup.com/DeveloperResources/OmniSourceLicense.html.

#import <OWF/OWSGMLProcessor.h>

#import <Foundation/Foundation.h>
#import <OmniBase/OmniBase.h>
#import <OmniFoundation/OmniFoundation.h>

#import "NSString-OWSGMLString.h"
#import "OWAbstractObjectStream.h"
#import "OWContentCache.h"
#import "OWContentContainer.h"
#import "OWContentInfo.h"
#import "OWContentType.h"
#import "OWCSSStyleSheet.h"
#import "OWDocumentTitle.h"
#import "OWHeaderDictionary.h"
#import "OWObjectStreamCursor.h"
#import "OWPipeline.h"
#import "OWAddress.h"
#import "OWSGMLAppliedMethods.h"
#import "OWSGMLDTD.h"
#import "OWSGMLMethods.h"
#import "OWSGMLTag.h"
#import "OWSGMLTagType.h"
#import "OWURL.h"


RCS_ID("$Header: /Network/Source/CVS/OmniGroup/Frameworks/OWF/Processors.subproj/SGML.subproj/OWSGMLProcessor.m,v 1.53 2002/03/09 01:53:53 kc Exp $")

@interface OWSGMLProcessor (Private)
@end

@implementation OWSGMLProcessor

static NSMutableDictionary *sgmlMethodsDictionary = nil;
static NSUserDefaults *defaults = nil;
static BOOL debugSGMLProcessing = NO;

+ (void)initialize;
{
    static BOOL initialized = NO;
    OWSGMLMethods *classSGMLMethods;

    [super initialize];

    if (initialized) {
        OWSGMLMethods *superclassSGMLMethods;

        superclassSGMLMethods = [sgmlMethodsDictionary objectForKey:[(NSObject *)[self superclass] description]];
        classSGMLMethods = [[OWSGMLMethods alloc] initWithParent:superclassSGMLMethods];
    } else {
        initialized = YES;

        sgmlMethodsDictionary = [[NSMutableDictionary alloc] init];
        classSGMLMethods = [[OWSGMLMethods alloc] init];
        defaults = [NSUserDefaults standardUserDefaults];
    }
    [sgmlMethodsDictionary setObject:classSGMLMethods forKey:[(NSObject *)self description]];
    [classSGMLMethods release];
}

+ (OWSGMLMethods *)sgmlMethods;
{
    return [sgmlMethodsDictionary objectForKey:[(NSObject *)self description]];
}

+ (OWSGMLDTD *)dtd;
{
    return nil;
}

+ (void)setDebug:(BOOL)newDebugSetting;
{
    debugSGMLProcessing = newDebugSetting;
}

- initWithPipeline:(OWPipeline *)aPipeline;
{
    OWAddress *pipelineAddress;
    OWSGMLDTD *dtd;
    unsigned int tagCount;
    OFZone *myZone;

    if (![super initWithPipeline:aPipeline])
        return nil;

    myZone = [OFZone zoneForObject:self];
    
    pipelineAddress = [pipeline lastAddress];
    if (!pipelineAddress)
        pipelineAddress = [pipeline contextObjectForKey:@"HistoryAddress"];

    [self setBaseAddress:pipelineAddress];
    // GRT: Disable this until I figure out what the problem is with it (it was to do away with any cached error title in case this document has no real title of its own)
    //[OWDocumentTitle cacheRealTitle:nil forAddress:baseAddress];

    dtd = [isa dtd];
    appliedMethods = [[OWSGMLAppliedMethods allocWithZone:[myZone nsZone]] initFromSGMLMethods:[isa sgmlMethods] dtd:dtd forTargetClass:isa];

    tagCount = [dtd tagCount];
    if (tagCount > 0) {
        openTags = NSZoneCalloc([myZone nsZone], tagCount,sizeof(unsigned int));
        implicitlyClosedTags = NSZoneCalloc([myZone nsZone], tagCount,sizeof(unsigned int));
    }
    return self;
}

- (void)dealloc;
{
    [appliedMethods release];
    [baseAddress release];
    if (openTags)
        NSZoneFree(NSZoneFromPointer(openTags), openTags);
    if (implicitlyClosedTags)
        NSZoneFree(NSZoneFromPointer(implicitlyClosedTags), implicitlyClosedTags);
    [undoers release];
    [styleSheet release];
    [super dealloc];
}

- (void)setBaseAddress:(OWAddress *)anAddress;
{
    if (baseAddress == anAddress)
	return;
    [anAddress retain];
    [baseAddress release];
    baseAddress = anAddress;
}

- (BOOL)hasOpenTagOfType:(OWSGMLTagType *)tagType;
{
    return [self _hasOpenTagOfTypeIndex:[tagType dtdIndex]];
}

- (void)openTagOfType:(OWSGMLTagType *)tagType;
{
    [self _openTagOfTypeIndex:[tagType dtdIndex]];
}

- (void)closeTagOfType:(OWSGMLTagType *)tagType;
{
    [self _closeTagAtIndexWasImplicit:[tagType dtdIndex]];
}

- (void)processContentForTag:(OWSGMLTag *)tag;
{
    OWSGMLTagType *tagType;
    unsigned int tagIndex;
    id <OWSGMLToken> sgmlToken;

    if (tag) {
	tagType = sgmlTagType(tag);
	tagIndex = [tagType dtdIndex];
	[self _openTagOfTypeIndex:tagIndex];
    } else {
	tagType = nil;
	tagIndex = NSNotFound;
    }

    while ((sgmlToken = [objectCursor readObject])) {
        switch ([sgmlToken tokenType]) {
            case OWSGMLTokenTypeStartTag:
                [self processTag:(id)sgmlToken];
                break;
            case OWSGMLTokenTypeCData:
                [self processCData:(id)sgmlToken];
                break;
            case OWSGMLTokenTypeEndTag: {
                OWSGMLTagType *closeTagType;
                
                closeTagType = sgmlTagType((OWSGMLTag *)sgmlToken);
                if (closeTagType == tagType) { // matching end tag?
                    if ([self _closeTagAtIndexWasImplicit:tagIndex])
                        break; // Nope, turns out we just implicitly closed this tag, so it's not our matching end tag
                    else
                        return; // Yup, this is our end tag, let's bail
                } else if (![self processEndTag:(id)sgmlToken] // end tag method not registered
                           && tag // We're not at the top level
                           && [self _hasOpenTagOfTypeIndex:[closeTagType dtdIndex]]) { // matching open tag before
                    [objectCursor ungetObject:sgmlToken];
                    [self _implicitlyCloseTagAtIndex:tagIndex];
                    return;
                }
                break;
            }
            default:
                break;
        }
    }
    
    if (tag)
        [self _closeTagAtIndexWasImplicit:tagIndex];
}

- (void)processUnknownTag:(OWSGMLTag *)tag;
{
    // We used to process the content for unknown tags, but this can lead to incredibly deep recursion if you're using a processor (such as our image map processor) which hasn't registered a method to handle, say, <img> tags (which don't have a matching close tag).  This caused crashes on pages like http://www.seatimes.com/classified/rent/b_docs/capts.html where we'd run out out of stack space.
}

- (void)processIgnoredContentsTag:(OWSGMLTag *)tag;
{
    id <OWSGMLToken> sgmlToken;
    OWSGMLTagType *tagType;

    tagType = sgmlTagType(tag);
    while ((sgmlToken = [objectCursor readObject])) {
        switch ([sgmlToken tokenType]) {
            case OWSGMLTokenTypeEndTag:
                if (sgmlTagType((OWSGMLTag *)sgmlToken) == tagType)
                    return;
            default:
                break;
        }
    }
}

- (void)processTag:(OWSGMLTag *)tag;
{
    // Call registered method to handle this tag
    sgmlAppliedMethodsInvokeTag(appliedMethods, tagTypeDtdIndex(sgmlTagType(tag)), self, tag);
}


- (BOOL)processEndTag:(OWSGMLTag *)tag;
{
    return sgmlAppliedMethodsInvokeEndTag(appliedMethods, tagTypeDtdIndex(sgmlTagType(tag)), self, tag);
}

- (void)processCData:(NSString *)cData;
{
}

- (void)process;
{
    [self processContentForTag:nil];
}

- (void)processEnd;
{
    // TODO: slow?
    if ([[NSUserDefaults standardUserDefaults] boolForKey:@"OWRetainProcessedSource"]) {
        [[pipeline contentCacheForLastAddress] registerContent:[OWContentContainer containerWithContent:[objectCursor objectStream] contentType:[OWContentType contentTypeForString:@"Omni/ProcessedSource"]]];
    }
    
    [super processEnd];
}

- (OWAddress *)baseAddress;
{
    return baseAddress;
}

- (NSMutableDictionary *)debugDictionary;
{
    NSMutableDictionary *debugDictionary;

    debugDictionary = [super debugDictionary];
    if (baseAddress)
	[debugDictionary setObject:baseAddress forKey:@"baseAddress"];

    return debugDictionary;
}

@end


@implementation OWSGMLProcessor (Tags)

static OWSGMLTagType *anchorTagType;
static OWSGMLTagType *baseTagType;
static OWSGMLTagType *bodyTagType;
static OWSGMLTagType *headTagType;
static OWSGMLTagType *htmlTagType;
static OWSGMLTagType *metaTagType;
static OWSGMLTagType *titleTagType;
static OWSGMLTagType *styleTagType;

static unsigned int anchorEffectAttributeIndex;
static unsigned int anchorHrefAttributeIndex;
static unsigned int anchorTargetAttributeIndex;
static unsigned int anchorTitleAttributeIndex;
static unsigned int baseHrefAttributeIndex;
static unsigned int baseTargetAttributeIndex;
static unsigned int metaNameAttributeIndex;
static unsigned int metaContentAttributeIndex;
static unsigned int metaHTTPEquivAttributeIndex;
static unsigned int metaCharSetAttributeIndex;

+ (void)didLoad;
{
    OWSGMLMethods *methods;
    OWSGMLDTD *dtd;

    // NOTE:
    //
    // You CANNOT add any tags here which aren't also applicable to frameset pages, because the SGMLFrameRecognizer subclass depends on any non-frame tags being unrecognized in its superclass (us) so it can switch the document to HTML.

    dtd = [self dtd];

    anchorTagType = [dtd tagTypeNamed:@"a"];
    baseTagType = [dtd tagTypeNamed:@"base"];
    bodyTagType = [dtd tagTypeNamed:@"body"];
    headTagType = [dtd tagTypeNamed:@"head"];
    htmlTagType = [dtd tagTypeNamed:@"html"];
    metaTagType = [dtd tagTypeNamed:@"meta"];
    titleTagType = [dtd tagTypeNamed:@"title"];
    styleTagType = [dtd tagTypeNamed:@"style"];
    [styleTagType setContentHandling:OWSGMLTagContentHandlingNonSGML];

    anchorHrefAttributeIndex = [anchorTagType addAttributeNamed:@"href"];
    anchorTargetAttributeIndex = [anchorTagType addAttributeNamed:@"target"];
    anchorEffectAttributeIndex = [anchorTagType addAttributeNamed:@"effect"];
    anchorTitleAttributeIndex = [anchorTagType addAttributeNamed:@"title"];

    baseHrefAttributeIndex = [baseTagType addAttributeNamed:@"href"];
    baseTargetAttributeIndex = [baseTagType addAttributeNamed:@"target"];

    metaNameAttributeIndex = [metaTagType addAttributeNamed:@"name"];
    metaContentAttributeIndex = [metaTagType addAttributeNamed:@"content"];
    metaHTTPEquivAttributeIndex = [metaTagType addAttributeNamed:@"http-equiv"];
    metaCharSetAttributeIndex = [metaTagType addAttributeNamed:@"charset"];

    methods = [self sgmlMethods];

    [methods registerMethod:@"Meaningless" forTagName:@"html"];
    [methods registerMethod:@"Meaningless" forTagName:@"head"];
    [methods registerMethod:@"Base" forTagName:@"base"];
    [methods registerMethod:@"Meta" forTagName:@"meta"];
    [methods registerMethod:@"Title" forTagName:@"title"];
    [methods registerMethod:@"Style" forTagName:@"style"];
}

- (OWAddress *)addressForAnchorTag:(OWSGMLTag *)anchorTag;
{
    NSString *href, *title, *target;
    OWAddress *address;

    href = sgmlTagValueForAttributeAtIndex(anchorTag, anchorHrefAttributeIndex);

    if (!href)
	return nil;

    target = sgmlTagValueForAttributeAtIndex(anchorTag, anchorTargetAttributeIndex);
    if (!target)
	target = [baseAddress target];
	
    address = [baseAddress addressForRelativeString:href inPipeline:pipeline target:target effect:[OWAddress effectForString:sgmlTagValueForAttributeAtIndex(anchorTag, anchorEffectAttributeIndex)]];

    title = sgmlTagValueForAttributeAtIndex(anchorTag, anchorTitleAttributeIndex);
    if (title && [title length] > 0) {
	// We now have a guess as to what this document's title is
	[OWDocumentTitle cacheGuessTitle:title forAddress:address];
    }

    return address;
}

- (void)processMeaninglessTag:(OWSGMLTag *)tag;
{
}

- (void)processBaseTag:(OWSGMLTag *)tag;
{
    NSString *href, *target;
    OWAddress *address;

    href = sgmlTagValueForAttributeAtIndex(tag, baseHrefAttributeIndex);
    target = sgmlTagValueForAttributeAtIndex(tag, baseTargetAttributeIndex);

    if (href) {
	address = [OWAddress addressWithURL:[OWURL urlFromString:href] target:target effect:OWAddressEffectFollowInWindow];
    } else if (target) {
	address = [baseAddress addressWithTarget:target];
    } else
	return;
    if (address)
        [self setBaseAddress:address];
}

- (void)processMetaTag:(OWSGMLTag *)tag;
{
    NSString *httpEquivalentHeaderKey;

    httpEquivalentHeaderKey = sgmlTagValueForAttributeAtIndex(tag, metaHTTPEquivAttributeIndex);
    if (httpEquivalentHeaderKey) {
        NSString *headerValue;

        headerValue = sgmlTagValueForAttributeAtIndex(tag, metaContentAttributeIndex);
        if (headerValue)
            [pipeline addHeader:httpEquivalentHeaderKey value:headerValue];
        // Note that the <meta> tag could have just specified a new string encoding or content type. Rght now changes in the string encoding are handled by the ugly hack in OWHTMLToSGMLObjects; other changes are not handled at all unless by the pipeline.
    }
}

- (void)processTitleTag:(OWSGMLTag *)tag;
{
    id <OWSGMLToken> sgmlToken;
    NSMutableString *titleString;
    OWSGMLTagType *tagType;

    titleString = [NSMutableString stringWithCapacity:128];
    while ((sgmlToken = [objectCursor readObject])) {
        switch ([sgmlToken tokenType]) {
            case OWSGMLTokenTypeCData:
                [titleString appendString:[sgmlToken string]];
                break;
            case OWSGMLTokenTypeEndTag:
                tagType = [(OWSGMLTag *)sgmlToken tagType];
                if (tagType == titleTagType || tagType == headTagType)
                    goto exitAndCacheTitle;
            case OWSGMLTokenTypeStartTag:
                tagType = [(OWSGMLTag *)sgmlToken tagType];
                if (tagType == bodyTagType)
                    goto exitAndCacheTitle;
            default:
#ifdef DEBUG
                NSLog(@"HTML: Ignoring %@ within %@", sgmlToken, tag);
#endif
                break;
        }
    }

exitAndCacheTitle:
    [OWDocumentTitle cacheRealTitle:[titleString stringByCollapsingWhitespaceAndRemovingSurroundingWhitespace] forAddress:baseAddress];
}

- (void)processStyleTag:(OWSGMLTag *)tag;
{
    id <OWSGMLToken> sgmlToken;
    NSString *styleSheetContent = @"";
    
    if (!styleSheet)
        styleSheet = [[OWCSSStyleSheet alloc] init];

    while ((sgmlToken = [objectCursor readObject])) {
        switch ([sgmlToken tokenType]) {
            case OWSGMLTokenTypeCData:
                styleSheetContent = [styleSheetContent stringByAppendingString:[sgmlToken string]];
                break;
            case OWSGMLTokenTypeEndTag:
                // This pretty much has to be an </STYLE> tag, because style is marked as non-SGML
                OBASSERT([(OWSGMLTag *)sgmlToken tagType] == [tag tagType]);
                goto done;
            default:
#ifdef DEBUG
                NSLog(@"HTML: Ignoring %@ within %@", sgmlToken, tag);
#endif
                break;
        }
    }

done:
    [styleSheet parseStyleSheetString:styleSheetContent parentContentInfo:[pipeline contentInfo]];
}

@end

@implementation OWSGMLProcessor (SubclassesOnly)

- (BOOL)_hasOpenTagOfTypeIndex:(unsigned int)tagIndex;
{
    return openTags[tagIndex] > 0;
}

- (void)_openTagOfTypeIndex:(unsigned int)tagIndex;
{
    openTags[tagIndex]++;
    implicitlyClosedTags[tagIndex] = 0;
}

- (void)_implicitlyCloseTagAtIndex:(unsigned int)tagIndex;
{
    implicitlyClosedTags[tagIndex]++;
    openTags[tagIndex]--;
}

- (BOOL)_closeTagAtIndexWasImplicit:(unsigned int)tagIndex;
{
    BOOL result;
    
    if ((result = implicitlyClosedTags[tagIndex] > 0))    
        implicitlyClosedTags[tagIndex]--;
    else if (openTags[tagIndex] > 0)
        openTags[tagIndex]--;
    return result;
}

@end

@implementation OWSGMLProcessor (Private)
@end
