// Copyright 1997-2000 Omni Development, Inc.  All rights reserved.
//
// This software may only be used and reproduced according to the
// terms in the file OmniSourceLicense.html, which should be
// distributed with this project and can also be found at
// http://www.omnigroup.com/DeveloperResources/OmniSourceLicense.html.

#import <OWF/OWSGMLProcessor.h>

#import <Foundation/Foundation.h>
#import <OmniBase/OmniBase.h>
#import <OmniFoundation/OmniFoundation.h>

#import <OWF/NSString-OWSGMLString.h>
#import <OWF/OWDocumentTitle.h>
#import <OWF/OWHeaderDictionary.h>
#import <OWF/OWObjectStreamCursor.h>
#import <OWF/OWPipeline.h>
#import <OWF/OWAddress.h>
#import <OWF/OWSGMLAppliedMethods.h>
#import <OWF/OWSGMLDTD.h>
#import <OWF/OWSGMLMethods.h>
#import <OWF/OWSGMLTag.h>
#import <OWF/OWSGMLTagType.h>
#import <OWF/OWURL.h>

RCS_ID("$Header: /Network/Source/CVS/OmniGroup/Frameworks/OWF/Processors.subproj/SGML.subproj/OWSGMLProcessor.m,v 1.20 2000/06/26 23:19:00 bungi Exp $")

@interface OWSGMLProcessor (Private)
- (BOOL)hasOpenTagAtIndex:(unsigned int)tagIndex;
- (void)openTagAtIndex:(unsigned int)tagIndex;
- (void)closeTagAtIndex:(unsigned int)tagIndex;
@end

@implementation OWSGMLProcessor

static NSMutableDictionary *sgmlMethodsDictionary = nil;
static NSUserDefaults *defaults = nil;
static BOOL debugSGMLProcessing = NO;

+ (void)initialize;
{
    static BOOL initialized = NO;
    OWSGMLMethods *classSGMLMethods;

    [super initialize];

    if (initialized) {
        OWSGMLMethods *superclassSGMLMethods;

        superclassSGMLMethods = [sgmlMethodsDictionary objectForKey:
           [(NSObject *)[self superclass] description]];
        classSGMLMethods = [[OWSGMLMethods alloc] initWithParent:superclassSGMLMethods];
    } else {
        initialized = YES;

        sgmlMethodsDictionary = [[NSMutableDictionary alloc] init];
        classSGMLMethods = [[OWSGMLMethods alloc] init];
        defaults = [NSUserDefaults standardUserDefaults];
    }
    [sgmlMethodsDictionary setObject:classSGMLMethods forKey:[(NSObject *)self description]];
    [classSGMLMethods release];
}

+ (OWSGMLMethods *)sgmlMethods;
{
    return [sgmlMethodsDictionary objectForKey:[(NSObject *)self description]];
}

+ (OWSGMLDTD *)dtd;
{
    return nil;
}

+ (void)setDebug:(BOOL)newDebugSetting;
{
    debugSGMLProcessing = newDebugSetting;
}

- initWithPipeline:(OWPipeline *)aPipeline;
{
    OWAddress *pipelineAddress;
    OWSGMLDTD *dtd;
    unsigned int tagCount;
    OFZone *myZone;

    if (![super initWithPipeline:aPipeline])
        return nil;

    myZone = [OFZone zoneForObject:self];
    
    pipelineAddress = [pipeline lastAddress];
    if (!pipelineAddress)
        pipelineAddress = [pipeline contextObjectForKey:@"HistoryAddress"];

    [self setBaseAddress:pipelineAddress];

    dtd = [isa dtd];
    appliedMethods = [[OWSGMLAppliedMethods allocWithZone:[myZone nsZone]] initFromSGMLMethods:[isa sgmlMethods] dtd:dtd forTargetClass:isa];

    tagCount = [dtd tagCount];
    if (tagCount > 0) {
        unsigned int tagIndex;

        openTags = NSZoneMalloc([myZone nsZone], tagCount * sizeof(unsigned int));
        for (tagIndex = 0; tagIndex < tagCount; tagIndex++)
            openTags[tagIndex] = 0;
    }

    return self;
}

- (void)dealloc;
{
    [appliedMethods release];
    [baseAddress release];
    if (openTags)
        NSZoneFree(NSZoneFromPointer(openTags), openTags);
    [super dealloc];
}

- (void)setBaseAddress:(OWAddress *)anAddress;
{
    if (baseAddress == anAddress)
	return;
    [anAddress retain];
    [baseAddress release];
    baseAddress = anAddress;
}

- (BOOL)hasOpenTagOfType:(OWSGMLTagType *)tagType;
{
    return [self hasOpenTagAtIndex:[tagType dtdIndex]];
}

- (void)openTagOfType:(OWSGMLTagType *)tagType;
{
    [self openTagAtIndex:[tagType dtdIndex]];
}

- (void)closeTagOfType:(OWSGMLTagType *)tagType;
{
    [self closeTagAtIndex:[tagType dtdIndex]];
}

- (void)processContentForTag:(OWSGMLTag *)tag;
{
    OWSGMLTagType *tagType;
    unsigned int tagIndex;
    id <OWSGMLToken> sgmlToken;

    if (tag) {
	tagType = sgmlTagType(tag);
	tagIndex = [tagType dtdIndex];
	[self openTagAtIndex:tagIndex];
    } else {
	tagType = nil;
	tagIndex = NSNotFound;
    }

    while ((sgmlToken = [objectCursor readObject])) {
        switch ([sgmlToken tokenType]) {
            case OWSGMLTokenTypeStartTag:
                [self processTag:(id)sgmlToken];
                break;
            case OWSGMLTokenTypeCData:
                [self processCData:(id)sgmlToken];
                break;
            case OWSGMLTokenTypeEndTag:
            {
                OWSGMLTagType *closeTagType;

                closeTagType = sgmlTagType((OWSGMLTag *)sgmlToken);
                if (closeTagType == tagType) { // matching end tag
                    [self closeTagAtIndex:tagIndex];
                    return;
                } else if (![self processEndTag:(id)sgmlToken] // end tag method not registered
                           && tag // We're not at the top level
                           && [self hasOpenTagAtIndex:[closeTagType dtdIndex]]) { // matching open tag before
                    [objectCursor ungetObject:sgmlToken];
                    [self closeTagAtIndex:tagIndex];
                    return;
                }
                break;
            }
            default:
                break;
        }
    }
    if (tag)
        [self closeTagAtIndex:tagIndex];
}

- (void)processUnknownTag:(OWSGMLTag *)tag;
{
    // We used to process the content for unknown tags, but this can lead to incredibly deep recursion if you're using a processor (such as our image map processor) which hasn't registered a method to handle, say, <img> tags (which don't have a matching close tag).  This caused crashes on pages like http://www.seatimes.com/classified/rent/b_docs/capts.html where we'd run out out of stack space.
}

- (void)processIgnoredContentsTag:(OWSGMLTag *)tag;
{
    id <OWSGMLToken> sgmlToken;
    OWSGMLTagType *tagType;

    tagType = sgmlTagType(tag);
    while ((sgmlToken = [objectCursor readObject])) {
        switch ([sgmlToken tokenType]) {
            case OWSGMLTokenTypeEndTag:
                if (sgmlTagType((OWSGMLTag *)sgmlToken) == tagType)
                    return;
            default:
                break;
        }
    }
}

- (void)processTag:(OWSGMLTag *)tag;
{
    sgmlAppliedMethodsInvokeTag(appliedMethods, tagTypeDtdIndex(sgmlTagType(tag)), self, tag);
}

- (BOOL)processEndTag:(OWSGMLTag *)tag;
{
    return sgmlAppliedMethodsInvokeEndTag(appliedMethods, tagTypeDtdIndex(sgmlTagType(tag)), self, tag);
}

- (void)processCData:(NSString *)cData;
{
}

- (void)process;
{
    [self processContentForTag:nil];
}

- (OWAddress *)baseAddress;
{
    return baseAddress;
}

- (NSMutableDictionary *)debugDictionary;
{
    NSMutableDictionary *debugDictionary;

    debugDictionary = [super debugDictionary];
    if (baseAddress)
	[debugDictionary setObject:baseAddress forKey:@"baseAddress"];

    return debugDictionary;
}

@end


@implementation OWSGMLProcessor (Tags)

static OWSGMLTagType *anchorTagType;
static OWSGMLTagType *baseTagType;
static OWSGMLTagType *bodyTagType;
static OWSGMLTagType *headTagType;
static OWSGMLTagType *htmlTagType;
static OWSGMLTagType *metaTagType;
static OWSGMLTagType *titleTagType;

static unsigned int anchorEffectAttributeIndex;
static unsigned int anchorHrefAttributeIndex;
static unsigned int anchorTargetAttributeIndex;
static unsigned int anchorTitleAttributeIndex;
static unsigned int baseHrefAttributeIndex;
static unsigned int baseTargetAttributeIndex;
static unsigned int metaContentAttributeIndex;
static unsigned int metaHTTPEquivAttributeIndex;

+ (void)didLoad;
{
    OWSGMLMethods *methods;
    OWSGMLDTD *dtd;

    // NOTE:
    //
    // You CANNOT add any tags here which aren't also applicable to frameset pages, because the SGMLFrameRecognizer subclass depends on any non-frame tags being unrecognized in its superclass (us) so it can switch the document to HTML.

    dtd = [self dtd];

    anchorTagType = [dtd tagTypeNamed:@"a"];
    baseTagType = [dtd tagTypeNamed:@"base"];
    bodyTagType = [dtd tagTypeNamed:@"body"];
    headTagType = [dtd tagTypeNamed:@"head"];
    htmlTagType = [dtd tagTypeNamed:@"html"];
    metaTagType = [dtd tagTypeNamed:@"meta"];
    titleTagType = [dtd tagTypeNamed:@"title"];

    anchorHrefAttributeIndex = [anchorTagType addAttributeNamed:@"href"];
    anchorTargetAttributeIndex = [anchorTagType addAttributeNamed:@"target"];
    anchorEffectAttributeIndex = [anchorTagType addAttributeNamed:@"effect"];
    anchorTitleAttributeIndex = [anchorTagType addAttributeNamed:@"title"];

    baseHrefAttributeIndex = [baseTagType addAttributeNamed:@"href"];
    baseTargetAttributeIndex = [baseTagType addAttributeNamed:@"target"];

    metaHTTPEquivAttributeIndex = [metaTagType addAttributeNamed:@"http-equiv"];
    metaContentAttributeIndex = [metaTagType addAttributeNamed:@"content"];

    methods = [self sgmlMethods];

    [methods registerMethod:@"Meaningless" forTagName:@"html"];
    [methods registerMethod:@"Meaningless" forTagName:@"head"];
    [methods registerMethod:@"Base" forTagName:@"base"];
    [methods registerMethod:@"Meta" forTagName:@"meta"];
    [methods registerMethod:@"Title" forTagName:@"title"];
}

- (OWAddress *)addressForAnchorTag:(OWSGMLTag *)anchorTag;
{
    NSString *href, *title, *target;
    OWAddress *address;

    href = sgmlTagValueForAttributeAtIndex(anchorTag, anchorHrefAttributeIndex);

    if (!href)
	return nil;

    target = sgmlTagValueForAttributeAtIndex(anchorTag, anchorTargetAttributeIndex);
    if (!target)
	target = [baseAddress target];
	
    address = [baseAddress addressForRelativeString:href inPipeline:pipeline target:target effect:[OWAddress effectForString:sgmlTagValueForAttributeAtIndex(anchorTag, anchorEffectAttributeIndex)]];

    title = sgmlTagValueForAttributeAtIndex(anchorTag, anchorTitleAttributeIndex);
    if (title && [title length] > 0) {
	// We now have a guess as to what this document's title is
	[OWDocumentTitle cacheGuessTitle:title forAddress:address];
    }

    return address;
}

- (void)processMeaninglessTag:(OWSGMLTag *)tag;
{
}

- (void)processBaseTag:(OWSGMLTag *)tag;
{
    NSString *href, *target;
    OWAddress *address;

    href = sgmlTagValueForAttributeAtIndex(tag, baseHrefAttributeIndex);
    target = sgmlTagValueForAttributeAtIndex(tag, baseTargetAttributeIndex);

    if (href) {
	address = [OWAddress addressWithURL:[OWURL urlFromString:href] target:target effect:OWAddressEffectFollowInWindow];
    } else if (target) {
	address = [baseAddress addressWithTarget:target];
    } else
	return;
    if (address)
        [self setBaseAddress:address];
}

- (void)processMetaTag:(OWSGMLTag *)tag;
{
    NSString *httpEquivalentHeaderKey;

    httpEquivalentHeaderKey = sgmlTagValueForAttributeAtIndex(tag, metaHTTPEquivAttributeIndex);
    if (httpEquivalentHeaderKey) {
        NSString *headerValue;

        headerValue = sgmlTagValueForAttributeAtIndex(tag, metaContentAttributeIndex);
        if (headerValue)
            [pipeline addHeader: httpEquivalentHeaderKey value: headerValue];
    }
}

- (void)processTitleTag:(OWSGMLTag *)tag;
{
    id <OWSGMLToken> sgmlToken;
    NSMutableString *titleString;
    OWSGMLTagType *tagType;

    titleString = [NSMutableString stringWithCapacity:128];
    while ((sgmlToken = [objectCursor readObject])) {
        switch ([sgmlToken tokenType]) {
            case OWSGMLTokenTypeCData:
                [titleString appendString:[sgmlToken string]];
                break;
            case OWSGMLTokenTypeEndTag:
                tagType = [(OWSGMLTag *)sgmlToken tagType];
                if (tagType == titleTagType || tagType == headTagType)
                    goto exitAndCacheTitle;
            case OWSGMLTokenTypeStartTag:
                tagType = [(OWSGMLTag *)sgmlToken tagType];
                if (tagType == bodyTagType)
                    goto exitAndCacheTitle;
            default:
#ifdef DEBUG
                NSLog(@"HTML: Ignoring %@ within %@", sgmlToken, tag);
#endif
                break;
        }
    }

exitAndCacheTitle:
    [OWDocumentTitle cacheRealTitle:[titleString stringByCollapsingWhitespaceAndRemovingSurroundingWhitespace] forAddress:baseAddress];
}

@end

@implementation OWSGMLProcessor (Private)

- (BOOL)hasOpenTagAtIndex:(unsigned int)tagIndex;
{
    return openTags[tagIndex] > 0;
}

- (void)openTagAtIndex:(unsigned int)tagIndex;
{
    openTags[tagIndex]++;
}

- (void)closeTagAtIndex:(unsigned int)tagIndex;
{
    if (openTags[tagIndex] > 0)
	openTags[tagIndex]--;
}

@end
