/* simplewebkit
WebHTMLDocumentRepresentation.m
Copyright (C) 2007 Free Software Foundation, Inc.
Author: Dr. H. Nikolaus Schaller
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; see the file COPYING.LIB.
If not, write to the Free Software Foundation,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
// parse HTML document into DOMHTML node structure (using NSXMLParser as the scanner)
#import "Private.h"
#import "WebHTMLDocumentRepresentation.h"
#import
#if __mySTEP__ // don't include twice since we have it in our own Foundation
#import
#else // always use our own NSXMLParser that supports -_setEncoding and _tagPath and other HTML compatibility extensions
#define NSXMLParser WebKitXMLParser // rename class to avoid linker conflicts with Foundation
#define parser Webparser // rename methods to avoid compiler conflicts with Foundation
#define parserDidStartDocument WebparserDidStartDocument // rename methods to avoid compiler conflicts with Foundation
#define parserDidEndDocument WebparserDidEndDocument // rename methods to avoid compiler conflicts with Foundation
#define __WebKit__ 1 // this disables some includes in mySTEP NSXMLParser.h/.m
#include "NSXMLParser.h" // directly include header here - note that the class is renamed!
#include "NSXMLParser.m" // directly include source here - note that the class is renamed!
#endif
@interface NSXMLParser (Private)
- (void) _parseData:(NSData *) data;
@end
@implementation _WebHTMLDocumentRepresentation
static NSDictionary *tagtable;
- (id) init;
{
if((self=[super init]))
{
if(!tagtable)
{
NSBundle *bundle=[NSBundle bundleForClass:isa];
NSString *path=[bundle pathForResource:@"DOMHTML" ofType:@"plist"];
#if 0
NSLog(@"bundle for class %@=%@", NSStringFromClass(isa), bundle);
NSLog(@"tagtable path=%@", path);
#endif
tagtable=[[NSDictionary alloc] initWithContentsOfFile:path];
#if 0
NSLog(@"path=%@", path);
NSLog(@"tagtable=%@", tagtable);
#endif
NSAssert(tagtable, @"could not load tag table");
}
}
return self;
}
- (void) dealloc;
{
#if 0
NSLog(@"dealloc %@: %@", NSStringFromClass(isa), self);
#endif
[_root _setVisualRepresentation:nil];
[_elementStack release];
[super dealloc];
}
- (NSString *) description;
{
return [NSString stringWithFormat:@"%@\n%@", [super description], _root];
}
- (void) _abortParsing;
{
[_parser abortParsing];
}
// methods from WebDocumentRepresentation protocol
- (void) setDataSource:(WebDataSource *) dataSource;
{
Class viewclass;
WebFrame *frame=[dataSource webFrame];
WebFrameView *frameView=[frame frameView];
NSView *view;
viewclass=[WebView _viewClassForMIMEType:[[dataSource response] MIMEType]]; // well, we should know that...
view=[[viewclass alloc] initWithFrame:[frameView _recommendedDocumentFrame]];
[view setDataSource:dataSource];
[frameView _setDocumentView:view];
[view release];
_root=[[[DOMHTMLDocument alloc] _initWithName:@"#document" namespaceURI:nil] autorelease]; // a new root
[_root _setVisualRepresentation:view]; // make the view receive change notifications
[frame _setDOMDocument:(RENAME(DOMDocument) *) _root];
[(DOMHTMLDocument *) _root _setWebFrame:frame];
[(DOMHTMLDocument *) _root _setWebDataSource:dataSource];
_html=[[[DOMHTMLHtmlElement alloc] _initWithName:@"HTML" namespaceURI:nil] autorelease]; // build a minimal tree
[_root appendChild:_html];
_body=[[[DOMHTMLBodyElement alloc] _initWithName:@"BODY" namespaceURI:nil] autorelease];
[_html appendChild:_body];
_parser=[[NSXMLParser alloc] init]; // initialize for incremental parsing
[_parser setDelegate:self];
[_parser _setEncoding:[dataSource _textEncoding]];
#if 0
NSLog(@"parser: %@", _parser);
#endif
[_elementStack release];
_elementStack=[[NSMutableArray alloc] initWithCapacity:20];
[_elementStack addObject:_body]; // append whatever is parsed to body
[super setDataSource:dataSource];
}
- (void) finishedLoadingWithDataSource:(WebDataSource *) source;
{
#if 0
NSLog(@"WebHTMLDocumentRepresentation finishedLoadingWithDataSource:%@", source);
#endif
[_parser _parseData:nil]; // notify parser that no more data will arrive
}
- (void) receivedError:(NSError *) error withDataSource:(WebDataSource *) source;
{
#if 0
NSLog(@"WebHTMLDocumentRepresentation receivedError: %@", error);
#endif
}
- (void) receivedData:(NSData *) data withDataSource:(WebDataSource *) source;
{ // we are repeatedly called for each data fragment!
NSString *title;
NSAutoreleasePool *arp;
#if 0
NSLog(@"WebHTMLDocumentRepresentation receivedData %@", data);
// NSLog(@"document source: %@", [self documentSource]);
#endif
#if 0 // RUN A PARSER ROBUSTNESS TEST - NOTE: this might need Gigabytes to print the logs
{ // pass byte for byte to check if the parser correctly handles incomplete tags
unsigned i, len=[data length];
for(i=0; i *)[[[source webFrame] frameView] documentView] dataSourceUpdated:source]; // notify frame view
}
- (id) _parser; { return _parser; }
- (DOMHTMLElement *) _lastObject; { return [_elementStack lastObject]; }
- (DOMHTMLElement *) _root; { return _root; } // the root node
- (DOMHTMLElement *) _html; { return _html; } // the node
- (DOMHTMLElement *) _body; { return _body; } // the node
- (DOMHTMLElement *) _head;
{ // the node
if(!_head)
{ // create if requested
_head=[[DOMHTMLHeadElement alloc] _initWithName:@"HEAD" namespaceURI:nil];
[_html insertBefore:_head :_body]; // insert before
}
return _head;
}
- (NSString *) title;
{ // return the value of the first DOMHTMLTitleElement's #text
DOMNodeList *children=[_head childNodes];
int i, cnt=[children length];
for(i=0; i fragment %@", [t data]);
#endif
title=[title stringByAppendingString:[t data]]; // splice
}
}
#if 0
NSLog(@"found %@", title);
#endif
return [title stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; // found!
}
}
#if 0
NSLog(@"no found in %@", _head);
#endif
return nil;
}
- (BOOL) canProvideDocumentSource; { return YES; }
- (NSString *) documentSource;
{
NSStringEncoding enc=[_dataSource _textEncoding];
NSString *r=[[[NSString alloc] initWithData:[_dataSource data] encoding:enc] autorelease];
if(!r)
r=[[[NSString alloc] initWithData:[_dataSource data] encoding:NSASCIIStringEncoding] autorelease];
if(!r)
r=@"";
return r;
}
// XML Parser delegate methods for parsing HTML
- (void) parser:(NSXMLParser *) parser parseErrorOccurred:(NSError *) parseError;
{
NSLog(@"%@ parseErrorOccurred: %@", NSStringFromClass(isa), parseError);
}
- (void) parser:(NSXMLParser *) parser foundCharacters:(NSString *) string;
{
if([string length] > 0)
{
DOMText *r=[[DOMText alloc] _initWithName:@"#text" namespaceURI:nil];
#if 0
NSLog(@"%@ foundCharacters: %@", NSStringFromClass(isa), string);
#endif
[r setData:string];
[[_elementStack lastObject] appendChild:r];
[r release];
}
}
- (void) parser:(NSXMLParser *) parser foundComment:(NSString *) comment;
{
if([comment length] > 0)
{
DOMComment *r=[[DOMComment alloc] _initWithName:@"#comment" namespaceURI:nil];
#if 0
NSLog(@"%@ foundComment: %@", NSStringFromClass(isa), string);
#endif
[r setData:comment];
[[_elementStack lastObject] appendChild:r];
[r release];
}
}
- (void) parser:(NSXMLParser *) parser foundCData:(NSData *) cdata;
{
DOMCDATASection *r=[[DOMCDATASection alloc] _initWithName:@"#CDATA" namespaceURI:nil];
NSString *string=[[NSString alloc] initWithData:cdata encoding:NSUTF8StringEncoding]; // which encoding???
#if 0
NSLog(@"%@ foundCDATA: %@", NSStringFromClass(isa), string);
#endif
[r setData:string];
[[_elementStack lastObject] appendChild:r];
[r release];
[string release];
}
/* FIXME:
- ... foundDOCTYPE:
add DOMDocumentType child to DOMDocument so that -[DOMDocument doctype] can return it
*/
- (void) parser:(NSXMLParser *) parser foundIgnorableWhitespace:(NSString *) whitespaceString;
{
#if 0 // ignore ignorable text
if([whitespaceString length] > 0)
{
DOMText *r=[[DOMText alloc] _initWithName:@"#text" namespaceURI:nil];
#if 0
NSLog(@"%@ foundIgnorableWhitespace: %@", NSStringFromClass(isa), whitespaceString);
#endif
[r setData:whitespaceString];
[[_elementStack lastObject] appendChild:r];
[r release];
}
#endif
}
- (void) parser:(NSXMLParser *) parser didStartElement:(NSString *) tag namespaceURI:(NSString *) uri qualifiedName:(NSString *) name attributes:(NSDictionary *) attributes;
{ // handle opening tags
Class c=NSClassFromString([tagtable objectForKey:tag]);
DOMHTMLElement *newElement=nil;
DOMHTMLElement *parent;
NSEnumerator *e;
NSString *key;
DOMHTMLNestingStyle nesting;
#if 0
NSLog(@"%@ %@: <%@> -> %@", NSStringFromClass(isa), [parser _tagPath], tag, NSStringFromClass(c));
#endif
if(!c)
{
#if 0
NSLog(@"%@ %@: <%@> ignored - no class found", NSStringFromClass(isa), [parser _tagPath], tag);
#endif
return; // ignore
}
nesting=[c _nesting];
if(nesting == DOMHTMLIgnore)
return; // ignore
if(nesting == DOMHTMLLazyNesting)
{ // virtually close previous node if both are lazily nested -- xxx
yyy
#if 0 // some test code
id last=[_elementStack lastObject];
Class class=[last class];
DOMHTMLNestingStyle nesting=[class _nesting];
if([newElement isKindOfClass:[DOMHTMLParagraphElement class]])
NSLog(@"last %@", last);
#endif
if([[[_elementStack lastObject] class] _nesting] == DOMHTMLLazyNesting)
{ // has been pushed
[[_elementStack lastObject] _elementLoaded]; // run any finalizing code
[_elementStack removeLastObject]; // go up one level
}
}
parent=[c _designatedParentNode:self];
#if 0
NSLog(@"<%@> parent node=%@", tag, parent);
#endif
if(nesting == DOMHTMLSingletonNesting)
{ // look if designated parent already has a singleton node
NSArray *children=[[parent childNodes] _list];
unsigned int i, cnt=[children count];
NSString *t=[tag uppercaseString];
for(i=0; i of class %@", tag, NSStringFromClass(c));
return; // ignore if we can't allocate
}
[parent appendChild:newElement]; // make sibling
}
e=[attributes keyEnumerator];
while((key=[e nextObject]))
{ // attach or merge attributes
NSString *val=[attributes objectForKey:key];
if([val class] == [NSNull class])
val=nil;
if(![newElement hasAttribute:key])
[newElement setAttribute:key :val]; // like Safari: merges only not-yet-existing attributes from a repeated tag (e.g. )
}
if(nesting == DOMHTMLStandardNesting || nesting == DOMHTMLLazyNesting)
[_elementStack addObject:newElement]; // go down one level for new element
[newElement _elementDidAwakeFromDocumentRepresentation:self];
}
- (void) parser:(NSXMLParser *) parser didEndElement:(NSString *) tag namespaceURI:(NSString *) uri qualifiedName:(NSString *) name;
{ // handle closing tags
Class c=NSClassFromString([tagtable objectForKey:tag]);
DOMHTMLNestingStyle nesting=[c _nesting];
DOMHTMLElement *element;
#if 0
NSLog(@"%@ %@: %@> -> %@", NSStringFromClass(isa), [parser _tagPath], tag, NSStringFromClass(c));
#endif
if(!c)
return; // ignore
if(nesting == DOMHTMLIgnore)
return; // ignore
tag=[tag uppercaseString];
element=[_elementStack lastObject];
if([[element nodeName] isEqualToString:tag])
{ // has been pushed
[element _elementLoaded]; // any finalizing code
[_elementStack removeLastObject]; // go up one level
}
}
- (void) parserDidEndDocument:(NSXMLParser *) parser
{ // done
#if 0
NSLog(@"WebHTMLDocumentRepresentation parserDidEndDocument:%@", parser);
#endif
[_parser release];
_parser=nil;
[[_dataSource webFrame] _finishedLoading]; // notify
}
@end
@implementation _WebRTFDocumentRepresentation
// methods from WebDocumentRepresentation protocol
- (void) setDataSource:(WebDataSource *) dataSource;
{
Class viewclass;
WebFrame *frame=[dataSource webFrame];
WebFrameView *frameView=[frame frameView];
NSView *view;
viewclass=[WebView _viewClassForMIMEType:[[dataSource response] MIMEType]]; // well, we should know that...
view=[[viewclass alloc] initWithFrame:[frameView frame]];
[view setDataSource:dataSource];
[frameView _setDocumentView:view];
[view release];
[super setDataSource:dataSource];
}
- (void) finishedLoadingWithDataSource:(WebDataSource *) source;
{
[[source webFrame] _finishedLoading]; // notify
}
- (void) receivedError:(NSError *) error withDataSource:(WebDataSource *) source;
{
#if 0
NSLog(@"WebHTMLDocumentRepresentation receivedError: %@", error);
#endif
}
- (void) receivedData:(NSData *) data withDataSource:(WebDataSource *) source;
{ // we are repeatedly called for each data fragment!
[(NSView *)[[[source webFrame] frameView] documentView] dataSourceUpdated:source]; // notify frame view
}
- (NSString *) title;
{ // try to get from RTF
return nil;
}
- (BOOL) canProvideDocumentSource; { return NO; }
- (NSString *) documentSource; { return nil; }
@end