SGML Parsing
Basic usage of SAX to parse and/or check SGML (or HTML or XML) and perform custom event processing as input markup is read:
var sgml = require('sgml')
var entitymanager = new sgml.NoopEntitymanager()
var resolver = new sgml.Resolver()
var parser = new sgml.Parser()
// implement handler functions according to your needs
parser.documentHandler = {
startDocument: function() { ... },
endDocument: function() { ... },
characters: function(text) { ... },
startElement: function(name, attributes) { ... },
endElement: function(name) { ... },
}
// other handlers get initialized to no-op or default handlers
errorhandler = new sgml.Errorhandler()
parser.dtdHandler = new sgml.DtdHandler()
parser.errorHandler = errorhandler
parser.lexicalHandler = new sgml.LexicalHandler()
parser.entityResolver = new sgml.Resolver()
// we're going to parse from a string
recordmanager = new PlatformStringRecordmanager(errorhandler, parser)
recordmanager.set_input(
"<!doctype html [ <!element html - - (#pcdata)> ]><html>hello</html>"
)
parser.recordManager = recordmanager
recordmanager.start_records()
// ... Your handler functions will get called
// as the input stream is parsed
Parsing SGML and writing normalized result to a stream
Can be used to eg. sanitize HTML into XML for further processing.
var sgml = require("sgml")
var entitymanager = new sgml.NoopEntitymanager()
var errorhandler = new sgml.Errorhandler()
var resolver = new sgml.Resolver()
var parser = new sgml.Parser()
outputstream = process.stdout
outputhandler = new sgml.Outputhandler(outputstream, entitymanager)
outputhandler.output_format = "html"
parser.documentHandler = outputhandler
parser.dtdHandler = outputhandler
parser.errorHandler = errorhandler
parser.lexicalHandler = outputhandler
parser.entityResolver = resolver
recordmanager = new PlatformStringRecordmanager(errorhandler, parser)
recordmanager.set_input(
"<!doctype html [ <!element html - - (#pcdata)> ]><html>hello</html>"
)
parser.recordManager = recordmanager
recordmanager.start_records()