Spaces:
Runtime error
Runtime error
| /* | |
| * Summary: interface for an HTML 4.0 non-verifying parser | |
| * Description: this module implements an HTML 4.0 non-verifying parser | |
| * with API compatible with the XML parser ones. It should | |
| * be able to parse "real world" HTML, even if severely | |
| * broken from a specification point of view. | |
| * | |
| * Copy: See Copyright for the status of this software. | |
| * | |
| * Author: Daniel Veillard | |
| */ | |
| extern "C" { | |
| /* | |
| * Most of the back-end structures from XML and HTML are shared. | |
| */ | |
| typedef xmlParserCtxt htmlParserCtxt; | |
| typedef xmlParserCtxtPtr htmlParserCtxtPtr; | |
| typedef xmlParserNodeInfo htmlParserNodeInfo; | |
| typedef xmlSAXHandler htmlSAXHandler; | |
| typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; | |
| typedef xmlParserInput htmlParserInput; | |
| typedef xmlParserInputPtr htmlParserInputPtr; | |
| typedef xmlDocPtr htmlDocPtr; | |
| typedef xmlNodePtr htmlNodePtr; | |
| /* | |
| * Internal description of an HTML element, representing HTML 4.01 | |
| * and XHTML 1.0 (which share the same structure). | |
| */ | |
| typedef struct _htmlElemDesc htmlElemDesc; | |
| typedef htmlElemDesc *htmlElemDescPtr; | |
| struct _htmlElemDesc { | |
| const char *name; /* The tag name */ | |
| char startTag; /* Whether the start tag can be implied */ | |
| char endTag; /* Whether the end tag can be implied */ | |
| char saveEndTag; /* Whether the end tag should be saved */ | |
| char empty; /* Is this an empty element ? */ | |
| char depr; /* Is this a deprecated element ? */ | |
| char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ | |
| char isinline; /* is this a block 0 or inline 1 element */ | |
| const char *desc; /* the description */ | |
| /* NRK Jan.2003 | |
| * New fields encapsulating HTML structure | |
| * | |
| * Bugs: | |
| * This is a very limited representation. It fails to tell us when | |
| * an element *requires* subelements (we only have whether they're | |
| * allowed or not), and it doesn't tell us where CDATA and PCDATA | |
| * are allowed. Some element relationships are not fully represented: | |
| * these are flagged with the word MODIFIER | |
| */ | |
| const char** subelts; /* allowed sub-elements of this element */ | |
| const char* defaultsubelt; /* subelement for suggested auto-repair | |
| if necessary or NULL */ | |
| const char** attrs_opt; /* Optional Attributes */ | |
| const char** attrs_depr; /* Additional deprecated attributes */ | |
| const char** attrs_req; /* Required attributes */ | |
| }; | |
| /* | |
| * Internal description of an HTML entity. | |
| */ | |
| typedef struct _htmlEntityDesc htmlEntityDesc; | |
| typedef htmlEntityDesc *htmlEntityDescPtr; | |
| struct _htmlEntityDesc { | |
| unsigned int value; /* the UNICODE value for the character */ | |
| const char *name; /* The entity name */ | |
| const char *desc; /* the description */ | |
| }; | |
| /* | |
| * There is only few public functions. | |
| */ | |
| XMLPUBFUN const htmlElemDesc * XMLCALL | |
| htmlTagLookup (const xmlChar *tag); | |
| XMLPUBFUN const htmlEntityDesc * XMLCALL | |
| htmlEntityLookup(const xmlChar *name); | |
| XMLPUBFUN const htmlEntityDesc * XMLCALL | |
| htmlEntityValueLookup(unsigned int value); | |
| XMLPUBFUN int XMLCALL | |
| htmlIsAutoClosed(htmlDocPtr doc, | |
| htmlNodePtr elem); | |
| XMLPUBFUN int XMLCALL | |
| htmlAutoCloseTag(htmlDocPtr doc, | |
| const xmlChar *name, | |
| htmlNodePtr elem); | |
| XMLPUBFUN const htmlEntityDesc * XMLCALL | |
| htmlParseEntityRef(htmlParserCtxtPtr ctxt, | |
| const xmlChar **str); | |
| XMLPUBFUN int XMLCALL | |
| htmlParseCharRef(htmlParserCtxtPtr ctxt); | |
| XMLPUBFUN void XMLCALL | |
| htmlParseElement(htmlParserCtxtPtr ctxt); | |
| XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
| htmlNewParserCtxt(void); | |
| XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
| htmlCreateMemoryParserCtxt(const char *buffer, | |
| int size); | |
| XMLPUBFUN int XMLCALL | |
| htmlParseDocument(htmlParserCtxtPtr ctxt); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlSAXParseDoc (const xmlChar *cur, | |
| const char *encoding, | |
| htmlSAXHandlerPtr sax, | |
| void *userData); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlParseDoc (const xmlChar *cur, | |
| const char *encoding); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlSAXParseFile(const char *filename, | |
| const char *encoding, | |
| htmlSAXHandlerPtr sax, | |
| void *userData); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlParseFile (const char *filename, | |
| const char *encoding); | |
| XMLPUBFUN int XMLCALL | |
| UTF8ToHtml (unsigned char *out, | |
| int *outlen, | |
| const unsigned char *in, | |
| int *inlen); | |
| XMLPUBFUN int XMLCALL | |
| htmlEncodeEntities(unsigned char *out, | |
| int *outlen, | |
| const unsigned char *in, | |
| int *inlen, int quoteChar); | |
| XMLPUBFUN int XMLCALL | |
| htmlIsScriptAttribute(const xmlChar *name); | |
| XMLPUBFUN int XMLCALL | |
| htmlHandleOmittedElem(int val); | |
| /** | |
| * Interfaces for the Push mode. | |
| */ | |
| XMLPUBFUN htmlParserCtxtPtr XMLCALL | |
| htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, | |
| void *user_data, | |
| const char *chunk, | |
| int size, | |
| const char *filename, | |
| xmlCharEncoding enc); | |
| XMLPUBFUN int XMLCALL | |
| htmlParseChunk (htmlParserCtxtPtr ctxt, | |
| const char *chunk, | |
| int size, | |
| int terminate); | |
| XMLPUBFUN void XMLCALL | |
| htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); | |
| /* | |
| * New set of simpler/more flexible APIs | |
| */ | |
| /** | |
| * xmlParserOption: | |
| * | |
| * This is the set of XML parser options that can be passed down | |
| * to the xmlReadDoc() and similar calls. | |
| */ | |
| typedef enum { | |
| HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */ | |
| HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */ | |
| HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ | |
| HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ | |
| HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ | |
| HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ | |
| HTML_PARSE_NONET = 1<<11,/* Forbid network access */ | |
| HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */ | |
| HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */ | |
| HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */ | |
| } htmlParserOption; | |
| XMLPUBFUN void XMLCALL | |
| htmlCtxtReset (htmlParserCtxtPtr ctxt); | |
| XMLPUBFUN int XMLCALL | |
| htmlCtxtUseOptions (htmlParserCtxtPtr ctxt, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlReadDoc (const xmlChar *cur, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlReadFile (const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlReadMemory (const char *buffer, | |
| int size, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlReadFd (int fd, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlReadIO (xmlInputReadCallback ioread, | |
| xmlInputCloseCallback ioclose, | |
| void *ioctx, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlCtxtReadDoc (xmlParserCtxtPtr ctxt, | |
| const xmlChar *cur, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlCtxtReadFile (xmlParserCtxtPtr ctxt, | |
| const char *filename, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlCtxtReadMemory (xmlParserCtxtPtr ctxt, | |
| const char *buffer, | |
| int size, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlCtxtReadFd (xmlParserCtxtPtr ctxt, | |
| int fd, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| XMLPUBFUN htmlDocPtr XMLCALL | |
| htmlCtxtReadIO (xmlParserCtxtPtr ctxt, | |
| xmlInputReadCallback ioread, | |
| xmlInputCloseCallback ioclose, | |
| void *ioctx, | |
| const char *URL, | |
| const char *encoding, | |
| int options); | |
| /* NRK/Jan2003: further knowledge of HTML structure | |
| */ | |
| typedef enum { | |
| HTML_NA = 0 , /* something we don't check at all */ | |
| HTML_INVALID = 0x1 , | |
| HTML_DEPRECATED = 0x2 , | |
| HTML_VALID = 0x4 , | |
| HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ | |
| } htmlStatus ; | |
| /* Using htmlElemDesc rather than name here, to emphasise the fact | |
| that otherwise there's a lookup overhead | |
| */ | |
| XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; | |
| XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; | |
| XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; | |
| XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ; | |
| /** | |
| * htmlDefaultSubelement: | |
| * @elt: HTML element | |
| * | |
| * Returns the default subelement for this element | |
| */ | |
| /** | |
| * htmlElementAllowedHereDesc: | |
| * @parent: HTML parent element | |
| * @elt: HTML element | |
| * | |
| * Checks whether an HTML element description may be a | |
| * direct child of the specified element. | |
| * | |
| * Returns 1 if allowed; 0 otherwise. | |
| */ | |
| /** | |
| * htmlRequiredAttrs: | |
| * @elt: HTML element | |
| * | |
| * Returns the attributes required for the specified element. | |
| */ | |
| } | |