| | from __future__ import absolute_import, division, unicode_literals |
| | from pip._vendor.six import with_metaclass, viewkeys |
| |
|
| | import types |
| |
|
| | from . import _inputstream |
| | from . import _tokenizer |
| |
|
| | from . import treebuilders |
| | from .treebuilders.base import Marker |
| |
|
| | from . import _utils |
| | from .constants import ( |
| | spaceCharacters, asciiUpper2Lower, |
| | specialElements, headingElements, cdataElements, rcdataElements, |
| | tokenTypes, tagTokenTypes, |
| | namespaces, |
| | htmlIntegrationPointElements, mathmlTextIntegrationPointElements, |
| | adjustForeignAttributes as adjustForeignAttributesMap, |
| | adjustMathMLAttributes, adjustSVGAttributes, |
| | E, |
| | _ReparseException |
| | ) |
| |
|
| |
|
| | def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): |
| | """Parse an HTML document as a string or file-like object into a tree |
| | |
| | :arg doc: the document to parse as a string or file-like object |
| | |
| | :arg treebuilder: the treebuilder to use when parsing |
| | |
| | :arg namespaceHTMLElements: whether or not to namespace HTML elements |
| | |
| | :returns: parsed tree |
| | |
| | Example: |
| | |
| | >>> from html5lib.html5parser import parse |
| | >>> parse('<html><body><p>This is a doc</p></body></html>') |
| | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> |
| | |
| | """ |
| | tb = treebuilders.getTreeBuilder(treebuilder) |
| | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) |
| | return p.parse(doc, **kwargs) |
| |
|
| |
|
| | def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): |
| | """Parse an HTML fragment as a string or file-like object into a tree |
| | |
| | :arg doc: the fragment to parse as a string or file-like object |
| | |
| | :arg container: the container context to parse the fragment in |
| | |
| | :arg treebuilder: the treebuilder to use when parsing |
| | |
| | :arg namespaceHTMLElements: whether or not to namespace HTML elements |
| | |
| | :returns: parsed tree |
| | |
| | Example: |
| | |
| | >>> from html5lib.html5libparser import parseFragment |
| | >>> parseFragment('<b>this is a fragment</b>') |
| | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> |
| | |
| | """ |
| | tb = treebuilders.getTreeBuilder(treebuilder) |
| | p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) |
| | return p.parseFragment(doc, container=container, **kwargs) |
| |
|
| |
|
| | def method_decorator_metaclass(function): |
| | class Decorated(type): |
| | def __new__(meta, classname, bases, classDict): |
| | for attributeName, attribute in classDict.items(): |
| | if isinstance(attribute, types.FunctionType): |
| | attribute = function(attribute) |
| |
|
| | classDict[attributeName] = attribute |
| | return type.__new__(meta, classname, bases, classDict) |
| | return Decorated |
| |
|
| |
|
| | class HTMLParser(object): |
| | """HTML parser |
| | |
| | Generates a tree structure from a stream of (possibly malformed) HTML. |
| | |
| | """ |
| |
|
| | def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): |
| | """ |
| | :arg tree: a treebuilder class controlling the type of tree that will be |
| | returned. Built in treebuilders can be accessed through |
| | html5lib.treebuilders.getTreeBuilder(treeType) |
| | |
| | :arg strict: raise an exception when a parse error is encountered |
| | |
| | :arg namespaceHTMLElements: whether or not to namespace HTML elements |
| | |
| | :arg debug: whether or not to enable debug mode which logs things |
| | |
| | Example: |
| | |
| | >>> from html5lib.html5parser import HTMLParser |
| | >>> parser = HTMLParser() # generates parser with etree builder |
| | >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict |
| | |
| | """ |
| |
|
| | |
| | self.strict = strict |
| |
|
| | if tree is None: |
| | tree = treebuilders.getTreeBuilder("etree") |
| | self.tree = tree(namespaceHTMLElements) |
| | self.errors = [] |
| |
|
| | self.phases = {name: cls(self, self.tree) for name, cls in |
| | getPhases(debug).items()} |
| |
|
| | def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): |
| |
|
| | self.innerHTMLMode = innerHTML |
| | self.container = container |
| | self.scripting = scripting |
| | self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) |
| | self.reset() |
| |
|
| | try: |
| | self.mainLoop() |
| | except _ReparseException: |
| | self.reset() |
| | self.mainLoop() |
| |
|
| | def reset(self): |
| | self.tree.reset() |
| | self.firstStartTag = False |
| | self.errors = [] |
| | self.log = [] |
| | |
| | self.compatMode = "no quirks" |
| |
|
| | if self.innerHTMLMode: |
| | self.innerHTML = self.container.lower() |
| |
|
| | if self.innerHTML in cdataElements: |
| | self.tokenizer.state = self.tokenizer.rcdataState |
| | elif self.innerHTML in rcdataElements: |
| | self.tokenizer.state = self.tokenizer.rawtextState |
| | elif self.innerHTML == 'plaintext': |
| | self.tokenizer.state = self.tokenizer.plaintextState |
| | else: |
| | |
| | |
| | pass |
| | self.phase = self.phases["beforeHtml"] |
| | self.phase.insertHtmlElement() |
| | self.resetInsertionMode() |
| | else: |
| | self.innerHTML = False |
| | self.phase = self.phases["initial"] |
| |
|
| | self.lastPhase = None |
| |
|
| | self.beforeRCDataPhase = None |
| |
|
| | self.framesetOK = True |
| |
|
| | @property |
| | def documentEncoding(self): |
| | """Name of the character encoding that was used to decode the input stream, or |
| | :obj:`None` if that is not determined yet |
| | |
| | """ |
| | if not hasattr(self, 'tokenizer'): |
| | return None |
| | return self.tokenizer.stream.charEncoding[0].name |
| |
|
| | def isHTMLIntegrationPoint(self, element): |
| | if (element.name == "annotation-xml" and |
| | element.namespace == namespaces["mathml"]): |
| | return ("encoding" in element.attributes and |
| | element.attributes["encoding"].translate( |
| | asciiUpper2Lower) in |
| | ("text/html", "application/xhtml+xml")) |
| | else: |
| | return (element.namespace, element.name) in htmlIntegrationPointElements |
| |
|
| | def isMathMLTextIntegrationPoint(self, element): |
| | return (element.namespace, element.name) in mathmlTextIntegrationPointElements |
| |
|
| | def mainLoop(self): |
| | CharactersToken = tokenTypes["Characters"] |
| | SpaceCharactersToken = tokenTypes["SpaceCharacters"] |
| | StartTagToken = tokenTypes["StartTag"] |
| | EndTagToken = tokenTypes["EndTag"] |
| | CommentToken = tokenTypes["Comment"] |
| | DoctypeToken = tokenTypes["Doctype"] |
| | ParseErrorToken = tokenTypes["ParseError"] |
| |
|
| | for token in self.tokenizer: |
| | prev_token = None |
| | new_token = token |
| | while new_token is not None: |
| | prev_token = new_token |
| | currentNode = self.tree.openElements[-1] if self.tree.openElements else None |
| | currentNodeNamespace = currentNode.namespace if currentNode else None |
| | currentNodeName = currentNode.name if currentNode else None |
| |
|
| | type = new_token["type"] |
| |
|
| | if type == ParseErrorToken: |
| | self.parseError(new_token["data"], new_token.get("datavars", {})) |
| | new_token = None |
| | else: |
| | if (len(self.tree.openElements) == 0 or |
| | currentNodeNamespace == self.tree.defaultNamespace or |
| | (self.isMathMLTextIntegrationPoint(currentNode) and |
| | ((type == StartTagToken and |
| | token["name"] not in frozenset(["mglyph", "malignmark"])) or |
| | type in (CharactersToken, SpaceCharactersToken))) or |
| | (currentNodeNamespace == namespaces["mathml"] and |
| | currentNodeName == "annotation-xml" and |
| | type == StartTagToken and |
| | token["name"] == "svg") or |
| | (self.isHTMLIntegrationPoint(currentNode) and |
| | type in (StartTagToken, CharactersToken, SpaceCharactersToken))): |
| | phase = self.phase |
| | else: |
| | phase = self.phases["inForeignContent"] |
| |
|
| | if type == CharactersToken: |
| | new_token = phase.processCharacters(new_token) |
| | elif type == SpaceCharactersToken: |
| | new_token = phase.processSpaceCharacters(new_token) |
| | elif type == StartTagToken: |
| | new_token = phase.processStartTag(new_token) |
| | elif type == EndTagToken: |
| | new_token = phase.processEndTag(new_token) |
| | elif type == CommentToken: |
| | new_token = phase.processComment(new_token) |
| | elif type == DoctypeToken: |
| | new_token = phase.processDoctype(new_token) |
| |
|
| | if (type == StartTagToken and prev_token["selfClosing"] and |
| | not prev_token["selfClosingAcknowledged"]): |
| | self.parseError("non-void-element-with-trailing-solidus", |
| | {"name": prev_token["name"]}) |
| |
|
| | |
| | reprocess = True |
| | phases = [] |
| | while reprocess: |
| | phases.append(self.phase) |
| | reprocess = self.phase.processEOF() |
| | if reprocess: |
| | assert self.phase not in phases |
| |
|
| | def parse(self, stream, *args, **kwargs): |
| | """Parse a HTML document into a well-formed tree |
| | |
| | :arg stream: a file-like object or string containing the HTML to be parsed |
| | |
| | The optional encoding parameter must be a string that indicates |
| | the encoding. If specified, that encoding will be used, |
| | regardless of any BOM or later declaration (such as in a meta |
| | element). |
| | |
| | :arg scripting: treat noscript elements as if JavaScript was turned on |
| | |
| | :returns: parsed tree |
| | |
| | Example: |
| | |
| | >>> from html5lib.html5parser import HTMLParser |
| | >>> parser = HTMLParser() |
| | >>> parser.parse('<html><body><p>This is a doc</p></body></html>') |
| | <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> |
| | |
| | """ |
| | self._parse(stream, False, None, *args, **kwargs) |
| | return self.tree.getDocument() |
| |
|
| | def parseFragment(self, stream, *args, **kwargs): |
| | """Parse a HTML fragment into a well-formed tree fragment |
| | |
| | :arg container: name of the element we're setting the innerHTML |
| | property if set to None, default to 'div' |
| | |
| | :arg stream: a file-like object or string containing the HTML to be parsed |
| | |
| | The optional encoding parameter must be a string that indicates |
| | the encoding. If specified, that encoding will be used, |
| | regardless of any BOM or later declaration (such as in a meta |
| | element) |
| | |
| | :arg scripting: treat noscript elements as if JavaScript was turned on |
| | |
| | :returns: parsed tree |
| | |
| | Example: |
| | |
| | >>> from html5lib.html5libparser import HTMLParser |
| | >>> parser = HTMLParser() |
| | >>> parser.parseFragment('<b>this is a fragment</b>') |
| | <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> |
| | |
| | """ |
| | self._parse(stream, True, *args, **kwargs) |
| | return self.tree.getFragment() |
| |
|
| | def parseError(self, errorcode="XXX-undefined-error", datavars=None): |
| | |
| | if datavars is None: |
| | datavars = {} |
| | self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) |
| | if self.strict: |
| | raise ParseError(E[errorcode] % datavars) |
| |
|
| | def adjustMathMLAttributes(self, token): |
| | adjust_attributes(token, adjustMathMLAttributes) |
| |
|
| | def adjustSVGAttributes(self, token): |
| | adjust_attributes(token, adjustSVGAttributes) |
| |
|
| | def adjustForeignAttributes(self, token): |
| | adjust_attributes(token, adjustForeignAttributesMap) |
| |
|
| | def reparseTokenNormal(self, token): |
| | |
| | self.parser.phase() |
| |
|
| | def resetInsertionMode(self): |
| | |
| | |
| | last = False |
| | newModes = { |
| | "select": "inSelect", |
| | "td": "inCell", |
| | "th": "inCell", |
| | "tr": "inRow", |
| | "tbody": "inTableBody", |
| | "thead": "inTableBody", |
| | "tfoot": "inTableBody", |
| | "caption": "inCaption", |
| | "colgroup": "inColumnGroup", |
| | "table": "inTable", |
| | "head": "inBody", |
| | "body": "inBody", |
| | "frameset": "inFrameset", |
| | "html": "beforeHead" |
| | } |
| | for node in self.tree.openElements[::-1]: |
| | nodeName = node.name |
| | new_phase = None |
| | if node == self.tree.openElements[0]: |
| | assert self.innerHTML |
| | last = True |
| | nodeName = self.innerHTML |
| | |
| | |
| | if nodeName in ("select", "colgroup", "head", "html"): |
| | assert self.innerHTML |
| |
|
| | if not last and node.namespace != self.tree.defaultNamespace: |
| | continue |
| |
|
| | if nodeName in newModes: |
| | new_phase = self.phases[newModes[nodeName]] |
| | break |
| | elif last: |
| | new_phase = self.phases["inBody"] |
| | break |
| |
|
| | self.phase = new_phase |
| |
|
| | def parseRCDataRawtext(self, token, contentType): |
| | |
| | assert contentType in ("RAWTEXT", "RCDATA") |
| |
|
| | self.tree.insertElement(token) |
| |
|
| | if contentType == "RAWTEXT": |
| | self.tokenizer.state = self.tokenizer.rawtextState |
| | else: |
| | self.tokenizer.state = self.tokenizer.rcdataState |
| |
|
| | self.originalPhase = self.phase |
| |
|
| | self.phase = self.phases["text"] |
| |
|
| |
|
| | @_utils.memoize |
| | def getPhases(debug): |
| | def log(function): |
| | """Logger that records which phase processes each token""" |
| | type_names = {value: key for key, value in tokenTypes.items()} |
| |
|
| | def wrapped(self, *args, **kwargs): |
| | if function.__name__.startswith("process") and len(args) > 0: |
| | token = args[0] |
| | info = {"type": type_names[token['type']]} |
| | if token['type'] in tagTokenTypes: |
| | info["name"] = token['name'] |
| |
|
| | self.parser.log.append((self.parser.tokenizer.state.__name__, |
| | self.parser.phase.__class__.__name__, |
| | self.__class__.__name__, |
| | function.__name__, |
| | info)) |
| | return function(self, *args, **kwargs) |
| | else: |
| | return function(self, *args, **kwargs) |
| | return wrapped |
| |
|
| | def getMetaclass(use_metaclass, metaclass_func): |
| | if use_metaclass: |
| | return method_decorator_metaclass(metaclass_func) |
| | else: |
| | return type |
| |
|
| | |
| | class Phase(with_metaclass(getMetaclass(debug, log))): |
| | """Base class for helper object that implements each phase of processing |
| | """ |
| | __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") |
| |
|
| | def __init__(self, parser, tree): |
| | self.parser = parser |
| | self.tree = tree |
| | self.__startTagCache = {} |
| | self.__endTagCache = {} |
| |
|
| | def processEOF(self): |
| | raise NotImplementedError |
| |
|
| | def processComment(self, token): |
| | |
| | |
| | self.tree.insertComment(token, self.tree.openElements[-1]) |
| |
|
| | def processDoctype(self, token): |
| | self.parser.parseError("unexpected-doctype") |
| |
|
| | def processCharacters(self, token): |
| | self.tree.insertText(token["data"]) |
| |
|
| | def processSpaceCharacters(self, token): |
| | self.tree.insertText(token["data"]) |
| |
|
| | def processStartTag(self, token): |
| | |
| | |
| | |
| | name = token["name"] |
| | |
| | |
| | if name in self.__startTagCache: |
| | func = self.__startTagCache[name] |
| | else: |
| | func = self.__startTagCache[name] = self.startTagHandler[name] |
| | |
| | while len(self.__startTagCache) > len(self.startTagHandler) * 1.1: |
| | |
| | self.__startTagCache.pop(next(iter(self.__startTagCache))) |
| | return func(token) |
| |
|
| | def startTagHtml(self, token): |
| | if not self.parser.firstStartTag and token["name"] == "html": |
| | self.parser.parseError("non-html-root") |
| | |
| | |
| | for attr, value in token["data"].items(): |
| | if attr not in self.tree.openElements[0].attributes: |
| | self.tree.openElements[0].attributes[attr] = value |
| | self.parser.firstStartTag = False |
| |
|
| | def processEndTag(self, token): |
| | |
| | |
| | |
| | name = token["name"] |
| | |
| | |
| | if name in self.__endTagCache: |
| | func = self.__endTagCache[name] |
| | else: |
| | func = self.__endTagCache[name] = self.endTagHandler[name] |
| | |
| | while len(self.__endTagCache) > len(self.endTagHandler) * 1.1: |
| | |
| | self.__endTagCache.pop(next(iter(self.__endTagCache))) |
| | return func(token) |
| |
|
| | class InitialPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processSpaceCharacters(self, token): |
| | pass |
| |
|
| | def processComment(self, token): |
| | self.tree.insertComment(token, self.tree.document) |
| |
|
| | def processDoctype(self, token): |
| | name = token["name"] |
| | publicId = token["publicId"] |
| | systemId = token["systemId"] |
| | correct = token["correct"] |
| |
|
| | if (name != "html" or publicId is not None or |
| | systemId is not None and systemId != "about:legacy-compat"): |
| | self.parser.parseError("unknown-doctype") |
| |
|
| | if publicId is None: |
| | publicId = "" |
| |
|
| | self.tree.insertDoctype(token) |
| |
|
| | if publicId != "": |
| | publicId = publicId.translate(asciiUpper2Lower) |
| |
|
| | if (not correct or token["name"] != "html" or |
| | publicId.startswith( |
| | ("+//silmaril//dtd html pro v0r11 19970101//", |
| | "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", |
| | "-//as//dtd html 3.0 aswedit + extensions//", |
| | "-//ietf//dtd html 2.0 level 1//", |
| | "-//ietf//dtd html 2.0 level 2//", |
| | "-//ietf//dtd html 2.0 strict level 1//", |
| | "-//ietf//dtd html 2.0 strict level 2//", |
| | "-//ietf//dtd html 2.0 strict//", |
| | "-//ietf//dtd html 2.0//", |
| | "-//ietf//dtd html 2.1e//", |
| | "-//ietf//dtd html 3.0//", |
| | "-//ietf//dtd html 3.2 final//", |
| | "-//ietf//dtd html 3.2//", |
| | "-//ietf//dtd html 3//", |
| | "-//ietf//dtd html level 0//", |
| | "-//ietf//dtd html level 1//", |
| | "-//ietf//dtd html level 2//", |
| | "-//ietf//dtd html level 3//", |
| | "-//ietf//dtd html strict level 0//", |
| | "-//ietf//dtd html strict level 1//", |
| | "-//ietf//dtd html strict level 2//", |
| | "-//ietf//dtd html strict level 3//", |
| | "-//ietf//dtd html strict//", |
| | "-//ietf//dtd html//", |
| | "-//metrius//dtd metrius presentational//", |
| | "-//microsoft//dtd internet explorer 2.0 html strict//", |
| | "-//microsoft//dtd internet explorer 2.0 html//", |
| | "-//microsoft//dtd internet explorer 2.0 tables//", |
| | "-//microsoft//dtd internet explorer 3.0 html strict//", |
| | "-//microsoft//dtd internet explorer 3.0 html//", |
| | "-//microsoft//dtd internet explorer 3.0 tables//", |
| | "-//netscape comm. corp.//dtd html//", |
| | "-//netscape comm. corp.//dtd strict html//", |
| | "-//o'reilly and associates//dtd html 2.0//", |
| | "-//o'reilly and associates//dtd html extended 1.0//", |
| | "-//o'reilly and associates//dtd html extended relaxed 1.0//", |
| | "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", |
| | "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", |
| | "-//spyglass//dtd html 2.0 extended//", |
| | "-//sq//dtd html 2.0 hotmetal + extensions//", |
| | "-//sun microsystems corp.//dtd hotjava html//", |
| | "-//sun microsystems corp.//dtd hotjava strict html//", |
| | "-//w3c//dtd html 3 1995-03-24//", |
| | "-//w3c//dtd html 3.2 draft//", |
| | "-//w3c//dtd html 3.2 final//", |
| | "-//w3c//dtd html 3.2//", |
| | "-//w3c//dtd html 3.2s draft//", |
| | "-//w3c//dtd html 4.0 frameset//", |
| | "-//w3c//dtd html 4.0 transitional//", |
| | "-//w3c//dtd html experimental 19960712//", |
| | "-//w3c//dtd html experimental 970421//", |
| | "-//w3c//dtd w3 html//", |
| | "-//w3o//dtd w3 html 3.0//", |
| | "-//webtechs//dtd mozilla html 2.0//", |
| | "-//webtechs//dtd mozilla html//")) or |
| | publicId in ("-//w3o//dtd w3 html strict 3.0//en//", |
| | "-/w3c/dtd html 4.0 transitional/en", |
| | "html") or |
| | publicId.startswith( |
| | ("-//w3c//dtd html 4.01 frameset//", |
| | "-//w3c//dtd html 4.01 transitional//")) and |
| | systemId is None or |
| | systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): |
| | self.parser.compatMode = "quirks" |
| | elif (publicId.startswith( |
| | ("-//w3c//dtd xhtml 1.0 frameset//", |
| | "-//w3c//dtd xhtml 1.0 transitional//")) or |
| | publicId.startswith( |
| | ("-//w3c//dtd html 4.01 frameset//", |
| | "-//w3c//dtd html 4.01 transitional//")) and |
| | systemId is not None): |
| | self.parser.compatMode = "limited quirks" |
| |
|
| | self.parser.phase = self.parser.phases["beforeHtml"] |
| |
|
| | def anythingElse(self): |
| | self.parser.compatMode = "quirks" |
| | self.parser.phase = self.parser.phases["beforeHtml"] |
| |
|
| | def processCharacters(self, token): |
| | self.parser.parseError("expected-doctype-but-got-chars") |
| | self.anythingElse() |
| | return token |
| |
|
| | def processStartTag(self, token): |
| | self.parser.parseError("expected-doctype-but-got-start-tag", |
| | {"name": token["name"]}) |
| | self.anythingElse() |
| | return token |
| |
|
| | def processEndTag(self, token): |
| | self.parser.parseError("expected-doctype-but-got-end-tag", |
| | {"name": token["name"]}) |
| | self.anythingElse() |
| | return token |
| |
|
| | def processEOF(self): |
| | self.parser.parseError("expected-doctype-but-got-eof") |
| | self.anythingElse() |
| | return True |
| |
|
| | class BeforeHtmlPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | |
| | def insertHtmlElement(self): |
| | self.tree.insertRoot(impliedTagToken("html", "StartTag")) |
| | self.parser.phase = self.parser.phases["beforeHead"] |
| |
|
| | |
| | def processEOF(self): |
| | self.insertHtmlElement() |
| | return True |
| |
|
| | def processComment(self, token): |
| | self.tree.insertComment(token, self.tree.document) |
| |
|
| | def processSpaceCharacters(self, token): |
| | pass |
| |
|
| | def processCharacters(self, token): |
| | self.insertHtmlElement() |
| | return token |
| |
|
| | def processStartTag(self, token): |
| | if token["name"] == "html": |
| | self.parser.firstStartTag = True |
| | self.insertHtmlElement() |
| | return token |
| |
|
| | def processEndTag(self, token): |
| | if token["name"] not in ("head", "body", "html", "br"): |
| | self.parser.parseError("unexpected-end-tag-before-html", |
| | {"name": token["name"]}) |
| | else: |
| | self.insertHtmlElement() |
| | return token |
| |
|
| | class BeforeHeadPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | self.startTagHead(impliedTagToken("head", "StartTag")) |
| | return True |
| |
|
| | def processSpaceCharacters(self, token): |
| | pass |
| |
|
| | def processCharacters(self, token): |
| | self.startTagHead(impliedTagToken("head", "StartTag")) |
| | return token |
| |
|
| | def startTagHtml(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagHead(self, token): |
| | self.tree.insertElement(token) |
| | self.tree.headPointer = self.tree.openElements[-1] |
| | self.parser.phase = self.parser.phases["inHead"] |
| |
|
| | def startTagOther(self, token): |
| | self.startTagHead(impliedTagToken("head", "StartTag")) |
| | return token |
| |
|
| | def endTagImplyHead(self, token): |
| | self.startTagHead(impliedTagToken("head", "StartTag")) |
| | return token |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("end-tag-after-implied-root", |
| | {"name": token["name"]}) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", startTagHtml), |
| | ("head", startTagHead) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | (("head", "body", "html", "br"), endTagImplyHead) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InHeadPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | |
| | def processEOF(self): |
| | self.anythingElse() |
| | return True |
| |
|
| | def processCharacters(self, token): |
| | self.anythingElse() |
| | return token |
| |
|
| | def startTagHtml(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagHead(self, token): |
| | self.parser.parseError("two-heads-are-not-better-than-one") |
| |
|
| | def startTagBaseLinkCommand(self, token): |
| | self.tree.insertElement(token) |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| |
|
| | def startTagMeta(self, token): |
| | self.tree.insertElement(token) |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| |
|
| | attributes = token["data"] |
| | if self.parser.tokenizer.stream.charEncoding[1] == "tentative": |
| | if "charset" in attributes: |
| | self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) |
| | elif ("content" in attributes and |
| | "http-equiv" in attributes and |
| | attributes["http-equiv"].lower() == "content-type"): |
| | |
| | |
| | |
| | |
| | data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) |
| | parser = _inputstream.ContentAttrParser(data) |
| | codec = parser.parse() |
| | self.parser.tokenizer.stream.changeEncoding(codec) |
| |
|
| | def startTagTitle(self, token): |
| | self.parser.parseRCDataRawtext(token, "RCDATA") |
| |
|
| | def startTagNoFramesStyle(self, token): |
| | |
| | self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| |
|
| | def startTagNoscript(self, token): |
| | if self.parser.scripting: |
| | self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| | else: |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inHeadNoscript"] |
| |
|
| | def startTagScript(self, token): |
| | self.tree.insertElement(token) |
| | self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState |
| | self.parser.originalPhase = self.parser.phase |
| | self.parser.phase = self.parser.phases["text"] |
| |
|
| | def startTagOther(self, token): |
| | self.anythingElse() |
| | return token |
| |
|
| | def endTagHead(self, token): |
| | node = self.parser.tree.openElements.pop() |
| | assert node.name == "head", "Expected head got %s" % node.name |
| | self.parser.phase = self.parser.phases["afterHead"] |
| |
|
| | def endTagHtmlBodyBr(self, token): |
| | self.anythingElse() |
| | return token |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | def anythingElse(self): |
| | self.endTagHead(impliedTagToken("head")) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", startTagHtml), |
| | ("title", startTagTitle), |
| | (("noframes", "style"), startTagNoFramesStyle), |
| | ("noscript", startTagNoscript), |
| | ("script", startTagScript), |
| | (("base", "basefont", "bgsound", "command", "link"), |
| | startTagBaseLinkCommand), |
| | ("meta", startTagMeta), |
| | ("head", startTagHead) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("head", endTagHead), |
| | (("br", "html", "body"), endTagHtmlBodyBr) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InHeadNoscriptPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | self.parser.parseError("eof-in-head-noscript") |
| | self.anythingElse() |
| | return True |
| |
|
| | def processComment(self, token): |
| | return self.parser.phases["inHead"].processComment(token) |
| |
|
| | def processCharacters(self, token): |
| | self.parser.parseError("char-in-head-noscript") |
| | self.anythingElse() |
| | return token |
| |
|
| | def processSpaceCharacters(self, token): |
| | return self.parser.phases["inHead"].processSpaceCharacters(token) |
| |
|
| | def startTagHtml(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagBaseLinkCommand(self, token): |
| | return self.parser.phases["inHead"].processStartTag(token) |
| |
|
| | def startTagHeadNoscript(self, token): |
| | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) |
| | self.anythingElse() |
| | return token |
| |
|
| | def endTagNoscript(self, token): |
| | node = self.parser.tree.openElements.pop() |
| | assert node.name == "noscript", "Expected noscript got %s" % node.name |
| | self.parser.phase = self.parser.phases["inHead"] |
| |
|
| | def endTagBr(self, token): |
| | self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) |
| | self.anythingElse() |
| | return token |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | def anythingElse(self): |
| | |
| | self.endTagNoscript(impliedTagToken("noscript")) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", startTagHtml), |
| | (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand), |
| | (("head", "noscript"), startTagHeadNoscript), |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("noscript", endTagNoscript), |
| | ("br", endTagBr), |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class AfterHeadPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | self.anythingElse() |
| | return True |
| |
|
| | def processCharacters(self, token): |
| | self.anythingElse() |
| | return token |
| |
|
| | def startTagHtml(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagBody(self, token): |
| | self.parser.framesetOK = False |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inBody"] |
| |
|
| | def startTagFrameset(self, token): |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inFrameset"] |
| |
|
| | def startTagFromHead(self, token): |
| | self.parser.parseError("unexpected-start-tag-out-of-my-head", |
| | {"name": token["name"]}) |
| | self.tree.openElements.append(self.tree.headPointer) |
| | self.parser.phases["inHead"].processStartTag(token) |
| | for node in self.tree.openElements[::-1]: |
| | if node.name == "head": |
| | self.tree.openElements.remove(node) |
| | break |
| |
|
| | def startTagHead(self, token): |
| | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) |
| |
|
| | def startTagOther(self, token): |
| | self.anythingElse() |
| | return token |
| |
|
| | def endTagHtmlBodyBr(self, token): |
| | self.anythingElse() |
| | return token |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | def anythingElse(self): |
| | self.tree.insertElement(impliedTagToken("body", "StartTag")) |
| | self.parser.phase = self.parser.phases["inBody"] |
| | self.parser.framesetOK = True |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", startTagHtml), |
| | ("body", startTagBody), |
| | ("frameset", startTagFrameset), |
| | (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", |
| | "style", "title"), |
| | startTagFromHead), |
| | ("head", startTagHead) |
| | ]) |
| | startTagHandler.default = startTagOther |
| | endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), |
| | endTagHtmlBodyBr)]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InBodyPhase(Phase): |
| | |
| | |
| | __slots__ = ("processSpaceCharacters",) |
| |
|
| | def __init__(self, *args, **kwargs): |
| | super(InBodyPhase, self).__init__(*args, **kwargs) |
| | |
| | self.processSpaceCharacters = self.processSpaceCharactersNonPre |
| |
|
| | def isMatchingFormattingElement(self, node1, node2): |
| | return (node1.name == node2.name and |
| | node1.namespace == node2.namespace and |
| | node1.attributes == node2.attributes) |
| |
|
| | |
| | def addFormattingElement(self, token): |
| | self.tree.insertElement(token) |
| | element = self.tree.openElements[-1] |
| |
|
| | matchingElements = [] |
| | for node in self.tree.activeFormattingElements[::-1]: |
| | if node is Marker: |
| | break |
| | elif self.isMatchingFormattingElement(node, element): |
| | matchingElements.append(node) |
| |
|
| | assert len(matchingElements) <= 3 |
| | if len(matchingElements) == 3: |
| | self.tree.activeFormattingElements.remove(matchingElements[-1]) |
| | self.tree.activeFormattingElements.append(element) |
| |
|
| | |
| | def processEOF(self): |
| | allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", |
| | "tfoot", "th", "thead", "tr", "body", |
| | "html")) |
| | for node in self.tree.openElements[::-1]: |
| | if node.name not in allowed_elements: |
| | self.parser.parseError("expected-closing-tag-but-got-eof") |
| | break |
| | |
| |
|
| | def processSpaceCharactersDropNewline(self, token): |
| | |
| | |
| | data = token["data"] |
| | self.processSpaceCharacters = self.processSpaceCharactersNonPre |
| | if (data.startswith("\n") and |
| | self.tree.openElements[-1].name in ("pre", "listing", "textarea") and |
| | not self.tree.openElements[-1].hasContent()): |
| | data = data[1:] |
| | if data: |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertText(data) |
| |
|
| | def processCharacters(self, token): |
| | if token["data"] == "\u0000": |
| | |
| | return |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertText(token["data"]) |
| | |
| | if (self.parser.framesetOK and |
| | any([char not in spaceCharacters |
| | for char in token["data"]])): |
| | self.parser.framesetOK = False |
| |
|
| | def processSpaceCharactersNonPre(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertText(token["data"]) |
| |
|
| | def startTagProcessInHead(self, token): |
| | return self.parser.phases["inHead"].processStartTag(token) |
| |
|
| | def startTagBody(self, token): |
| | self.parser.parseError("unexpected-start-tag", {"name": "body"}) |
| | if (len(self.tree.openElements) == 1 or |
| | self.tree.openElements[1].name != "body"): |
| | assert self.parser.innerHTML |
| | else: |
| | self.parser.framesetOK = False |
| | for attr, value in token["data"].items(): |
| | if attr not in self.tree.openElements[1].attributes: |
| | self.tree.openElements[1].attributes[attr] = value |
| |
|
| | def startTagFrameset(self, token): |
| | self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) |
| | if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): |
| | assert self.parser.innerHTML |
| | elif not self.parser.framesetOK: |
| | pass |
| | else: |
| | if self.tree.openElements[1].parent: |
| | self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) |
| | while self.tree.openElements[-1].name != "html": |
| | self.tree.openElements.pop() |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inFrameset"] |
| |
|
| | def startTagCloseP(self, token): |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.endTagP(impliedTagToken("p")) |
| | self.tree.insertElement(token) |
| |
|
| | def startTagPreListing(self, token): |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.endTagP(impliedTagToken("p")) |
| | self.tree.insertElement(token) |
| | self.parser.framesetOK = False |
| | self.processSpaceCharacters = self.processSpaceCharactersDropNewline |
| |
|
| | def startTagForm(self, token): |
| | if self.tree.formPointer: |
| | self.parser.parseError("unexpected-start-tag", {"name": "form"}) |
| | else: |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.endTagP(impliedTagToken("p")) |
| | self.tree.insertElement(token) |
| | self.tree.formPointer = self.tree.openElements[-1] |
| |
|
| | def startTagListItem(self, token): |
| | self.parser.framesetOK = False |
| |
|
| | stopNamesMap = {"li": ["li"], |
| | "dt": ["dt", "dd"], |
| | "dd": ["dt", "dd"]} |
| | stopNames = stopNamesMap[token["name"]] |
| | for node in reversed(self.tree.openElements): |
| | if node.name in stopNames: |
| | self.parser.phase.processEndTag( |
| | impliedTagToken(node.name, "EndTag")) |
| | break |
| | if (node.nameTuple in specialElements and |
| | node.name not in ("address", "div", "p")): |
| | break |
| |
|
| | if self.tree.elementInScope("p", variant="button"): |
| | self.parser.phase.processEndTag( |
| | impliedTagToken("p", "EndTag")) |
| |
|
| | self.tree.insertElement(token) |
| |
|
| | def startTagPlaintext(self, token): |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.endTagP(impliedTagToken("p")) |
| | self.tree.insertElement(token) |
| | self.parser.tokenizer.state = self.parser.tokenizer.plaintextState |
| |
|
| | def startTagHeading(self, token): |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.endTagP(impliedTagToken("p")) |
| | if self.tree.openElements[-1].name in headingElements: |
| | self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) |
| | self.tree.openElements.pop() |
| | self.tree.insertElement(token) |
| |
|
| | def startTagA(self, token): |
| | afeAElement = self.tree.elementInActiveFormattingElements("a") |
| | if afeAElement: |
| | self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| | {"startName": "a", "endName": "a"}) |
| | self.endTagFormatting(impliedTagToken("a")) |
| | if afeAElement in self.tree.openElements: |
| | self.tree.openElements.remove(afeAElement) |
| | if afeAElement in self.tree.activeFormattingElements: |
| | self.tree.activeFormattingElements.remove(afeAElement) |
| | self.tree.reconstructActiveFormattingElements() |
| | self.addFormattingElement(token) |
| |
|
| | def startTagFormatting(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.addFormattingElement(token) |
| |
|
| | def startTagNobr(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | if self.tree.elementInScope("nobr"): |
| | self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| | {"startName": "nobr", "endName": "nobr"}) |
| | self.processEndTag(impliedTagToken("nobr")) |
| | |
| | self.tree.reconstructActiveFormattingElements() |
| | self.addFormattingElement(token) |
| |
|
| | def startTagButton(self, token): |
| | if self.tree.elementInScope("button"): |
| | self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| | {"startName": "button", "endName": "button"}) |
| | self.processEndTag(impliedTagToken("button")) |
| | return token |
| | else: |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertElement(token) |
| | self.parser.framesetOK = False |
| |
|
| | def startTagAppletMarqueeObject(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertElement(token) |
| | self.tree.activeFormattingElements.append(Marker) |
| | self.parser.framesetOK = False |
| |
|
| | def startTagXmp(self, token): |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.endTagP(impliedTagToken("p")) |
| | self.tree.reconstructActiveFormattingElements() |
| | self.parser.framesetOK = False |
| | self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| |
|
| | def startTagTable(self, token): |
| | if self.parser.compatMode != "quirks": |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.processEndTag(impliedTagToken("p")) |
| | self.tree.insertElement(token) |
| | self.parser.framesetOK = False |
| | self.parser.phase = self.parser.phases["inTable"] |
| |
|
| | def startTagVoidFormatting(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertElement(token) |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| | self.parser.framesetOK = False |
| |
|
| | def startTagInput(self, token): |
| | framesetOK = self.parser.framesetOK |
| | self.startTagVoidFormatting(token) |
| | if ("type" in token["data"] and |
| | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): |
| | |
| | self.parser.framesetOK = framesetOK |
| |
|
| | def startTagParamSource(self, token): |
| | self.tree.insertElement(token) |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| |
|
| | def startTagHr(self, token): |
| | if self.tree.elementInScope("p", variant="button"): |
| | self.endTagP(impliedTagToken("p")) |
| | self.tree.insertElement(token) |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| | self.parser.framesetOK = False |
| |
|
| | def startTagImage(self, token): |
| | |
| | self.parser.parseError("unexpected-start-tag-treated-as", |
| | {"originalName": "image", "newName": "img"}) |
| | self.processStartTag(impliedTagToken("img", "StartTag", |
| | attributes=token["data"], |
| | selfClosing=token["selfClosing"])) |
| |
|
| | def startTagIsIndex(self, token): |
| | self.parser.parseError("deprecated-tag", {"name": "isindex"}) |
| | if self.tree.formPointer: |
| | return |
| | form_attrs = {} |
| | if "action" in token["data"]: |
| | form_attrs["action"] = token["data"]["action"] |
| | self.processStartTag(impliedTagToken("form", "StartTag", |
| | attributes=form_attrs)) |
| | self.processStartTag(impliedTagToken("hr", "StartTag")) |
| | self.processStartTag(impliedTagToken("label", "StartTag")) |
| | |
| | if "prompt" in token["data"]: |
| | prompt = token["data"]["prompt"] |
| | else: |
| | prompt = "This is a searchable index. Enter search keywords: " |
| | self.processCharacters( |
| | {"type": tokenTypes["Characters"], "data": prompt}) |
| | attributes = token["data"].copy() |
| | if "action" in attributes: |
| | del attributes["action"] |
| | if "prompt" in attributes: |
| | del attributes["prompt"] |
| | attributes["name"] = "isindex" |
| | self.processStartTag(impliedTagToken("input", "StartTag", |
| | attributes=attributes, |
| | selfClosing=token["selfClosing"])) |
| | self.processEndTag(impliedTagToken("label")) |
| | self.processStartTag(impliedTagToken("hr", "StartTag")) |
| | self.processEndTag(impliedTagToken("form")) |
| |
|
| | def startTagTextarea(self, token): |
| | self.tree.insertElement(token) |
| | self.parser.tokenizer.state = self.parser.tokenizer.rcdataState |
| | self.processSpaceCharacters = self.processSpaceCharactersDropNewline |
| | self.parser.framesetOK = False |
| |
|
| | def startTagIFrame(self, token): |
| | self.parser.framesetOK = False |
| | self.startTagRawtext(token) |
| |
|
| | def startTagNoscript(self, token): |
| | if self.parser.scripting: |
| | self.startTagRawtext(token) |
| | else: |
| | self.startTagOther(token) |
| |
|
| | def startTagRawtext(self, token): |
| | """iframe, noembed noframes, noscript(if scripting enabled)""" |
| | self.parser.parseRCDataRawtext(token, "RAWTEXT") |
| |
|
| | def startTagOpt(self, token): |
| | if self.tree.openElements[-1].name == "option": |
| | self.parser.phase.processEndTag(impliedTagToken("option")) |
| | self.tree.reconstructActiveFormattingElements() |
| | self.parser.tree.insertElement(token) |
| |
|
| | def startTagSelect(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertElement(token) |
| | self.parser.framesetOK = False |
| | if self.parser.phase in (self.parser.phases["inTable"], |
| | self.parser.phases["inCaption"], |
| | self.parser.phases["inColumnGroup"], |
| | self.parser.phases["inTableBody"], |
| | self.parser.phases["inRow"], |
| | self.parser.phases["inCell"]): |
| | self.parser.phase = self.parser.phases["inSelectInTable"] |
| | else: |
| | self.parser.phase = self.parser.phases["inSelect"] |
| |
|
| | def startTagRpRt(self, token): |
| | if self.tree.elementInScope("ruby"): |
| | self.tree.generateImpliedEndTags() |
| | if self.tree.openElements[-1].name != "ruby": |
| | self.parser.parseError() |
| | self.tree.insertElement(token) |
| |
|
| | def startTagMath(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.parser.adjustMathMLAttributes(token) |
| | self.parser.adjustForeignAttributes(token) |
| | token["namespace"] = namespaces["mathml"] |
| | self.tree.insertElement(token) |
| | |
| | |
| | if token["selfClosing"]: |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| |
|
| | def startTagSvg(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.parser.adjustSVGAttributes(token) |
| | self.parser.adjustForeignAttributes(token) |
| | token["namespace"] = namespaces["svg"] |
| | self.tree.insertElement(token) |
| | |
| | |
| | if token["selfClosing"]: |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| |
|
| | def startTagMisplaced(self, token): |
| | """ Elements that should be children of other elements that have a |
| | different insertion mode; here they are ignored |
| | "caption", "col", "colgroup", "frame", "frameset", "head", |
| | "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", |
| | "tr", "noscript" |
| | """ |
| | self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) |
| |
|
| | def startTagOther(self, token): |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertElement(token) |
| |
|
| | def endTagP(self, token): |
| | if not self.tree.elementInScope("p", variant="button"): |
| | self.startTagCloseP(impliedTagToken("p", "StartTag")) |
| | self.parser.parseError("unexpected-end-tag", {"name": "p"}) |
| | self.endTagP(impliedTagToken("p", "EndTag")) |
| | else: |
| | self.tree.generateImpliedEndTags("p") |
| | if self.tree.openElements[-1].name != "p": |
| | self.parser.parseError("unexpected-end-tag", {"name": "p"}) |
| | node = self.tree.openElements.pop() |
| | while node.name != "p": |
| | node = self.tree.openElements.pop() |
| |
|
| | def endTagBody(self, token): |
| | if not self.tree.elementInScope("body"): |
| | self.parser.parseError() |
| | return |
| | elif self.tree.openElements[-1].name != "body": |
| | for node in self.tree.openElements[2:]: |
| | if node.name not in frozenset(("dd", "dt", "li", "optgroup", |
| | "option", "p", "rp", "rt", |
| | "tbody", "td", "tfoot", |
| | "th", "thead", "tr", "body", |
| | "html")): |
| | |
| | self.parser.parseError( |
| | "expected-one-end-tag-but-got-another", |
| | {"gotName": "body", "expectedName": node.name}) |
| | break |
| | self.parser.phase = self.parser.phases["afterBody"] |
| |
|
| | def endTagHtml(self, token): |
| | |
| | if self.tree.elementInScope("body"): |
| | self.endTagBody(impliedTagToken("body")) |
| | return token |
| |
|
| | def endTagBlock(self, token): |
| | |
| | if token["name"] == "pre": |
| | self.processSpaceCharacters = self.processSpaceCharactersNonPre |
| | inScope = self.tree.elementInScope(token["name"]) |
| | if inScope: |
| | self.tree.generateImpliedEndTags() |
| | if self.tree.openElements[-1].name != token["name"]: |
| | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) |
| | if inScope: |
| | node = self.tree.openElements.pop() |
| | while node.name != token["name"]: |
| | node = self.tree.openElements.pop() |
| |
|
| | def endTagForm(self, token): |
| | node = self.tree.formPointer |
| | self.tree.formPointer = None |
| | if node is None or not self.tree.elementInScope(node): |
| | self.parser.parseError("unexpected-end-tag", |
| | {"name": "form"}) |
| | else: |
| | self.tree.generateImpliedEndTags() |
| | if self.tree.openElements[-1] != node: |
| | self.parser.parseError("end-tag-too-early-ignored", |
| | {"name": "form"}) |
| | self.tree.openElements.remove(node) |
| |
|
| | def endTagListItem(self, token): |
| | if token["name"] == "li": |
| | variant = "list" |
| | else: |
| | variant = None |
| | if not self.tree.elementInScope(token["name"], variant=variant): |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| | else: |
| | self.tree.generateImpliedEndTags(exclude=token["name"]) |
| | if self.tree.openElements[-1].name != token["name"]: |
| | self.parser.parseError( |
| | "end-tag-too-early", |
| | {"name": token["name"]}) |
| | node = self.tree.openElements.pop() |
| | while node.name != token["name"]: |
| | node = self.tree.openElements.pop() |
| |
|
| | def endTagHeading(self, token): |
| | for item in headingElements: |
| | if self.tree.elementInScope(item): |
| | self.tree.generateImpliedEndTags() |
| | break |
| | if self.tree.openElements[-1].name != token["name"]: |
| | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) |
| |
|
| | for item in headingElements: |
| | if self.tree.elementInScope(item): |
| | item = self.tree.openElements.pop() |
| | while item.name not in headingElements: |
| | item = self.tree.openElements.pop() |
| | break |
| |
|
| | def endTagFormatting(self, token): |
| | """The much-feared adoption agency algorithm""" |
| | |
| | |
| |
|
| | |
| | outerLoopCounter = 0 |
| |
|
| | |
| | while outerLoopCounter < 8: |
| |
|
| | |
| | outerLoopCounter += 1 |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | formattingElement = self.tree.elementInActiveFormattingElements( |
| | token["name"]) |
| | if (not formattingElement or |
| | (formattingElement in self.tree.openElements and |
| | not self.tree.elementInScope(formattingElement.name))): |
| | |
| | |
| | |
| | self.endTagOther(token) |
| | return |
| |
|
| | |
| | |
| | |
| | |
| | elif formattingElement not in self.tree.openElements: |
| | self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) |
| | self.tree.activeFormattingElements.remove(formattingElement) |
| | return |
| |
|
| | |
| | |
| | |
| | |
| | elif not self.tree.elementInScope(formattingElement.name): |
| | self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) |
| | return |
| |
|
| | |
| | |
| | |
| | |
| | |
| | else: |
| | if formattingElement != self.tree.openElements[-1]: |
| | self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | afeIndex = self.tree.openElements.index(formattingElement) |
| | furthestBlock = None |
| | for element in self.tree.openElements[afeIndex:]: |
| | if element.nameTuple in specialElements: |
| | furthestBlock = element |
| | break |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | if furthestBlock is None: |
| | element = self.tree.openElements.pop() |
| | while element != formattingElement: |
| | element = self.tree.openElements.pop() |
| | self.tree.activeFormattingElements.remove(element) |
| | return |
| |
|
| | |
| | commonAncestor = self.tree.openElements[afeIndex - 1] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | bookmark = self.tree.activeFormattingElements.index(formattingElement) |
| |
|
| | |
| | lastNode = node = furthestBlock |
| | innerLoopCounter = 0 |
| |
|
| | index = self.tree.openElements.index(node) |
| | while innerLoopCounter < 3: |
| | innerLoopCounter += 1 |
| | |
| | index -= 1 |
| | node = self.tree.openElements[index] |
| | if node not in self.tree.activeFormattingElements: |
| | self.tree.openElements.remove(node) |
| | continue |
| | |
| | if node == formattingElement: |
| | break |
| | |
| | if lastNode == furthestBlock: |
| | bookmark = self.tree.activeFormattingElements.index(node) + 1 |
| | |
| | clone = node.cloneNode() |
| | |
| | self.tree.activeFormattingElements[ |
| | self.tree.activeFormattingElements.index(node)] = clone |
| | self.tree.openElements[ |
| | self.tree.openElements.index(node)] = clone |
| | node = clone |
| | |
| | |
| | if lastNode.parent: |
| | lastNode.parent.removeChild(lastNode) |
| | node.appendChild(lastNode) |
| | |
| | lastNode = node |
| |
|
| | |
| | |
| | |
| | |
| | if lastNode.parent: |
| | lastNode.parent.removeChild(lastNode) |
| |
|
| | if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): |
| | parent, insertBefore = self.tree.getTableMisnestedNodePosition() |
| | parent.insertBefore(lastNode, insertBefore) |
| | else: |
| | commonAncestor.appendChild(lastNode) |
| |
|
| | |
| | clone = formattingElement.cloneNode() |
| |
|
| | |
| | furthestBlock.reparentChildren(clone) |
| |
|
| | |
| | furthestBlock.appendChild(clone) |
| |
|
| | |
| | self.tree.activeFormattingElements.remove(formattingElement) |
| | self.tree.activeFormattingElements.insert(bookmark, clone) |
| |
|
| | |
| | self.tree.openElements.remove(formattingElement) |
| | self.tree.openElements.insert( |
| | self.tree.openElements.index(furthestBlock) + 1, clone) |
| |
|
| | def endTagAppletMarqueeObject(self, token): |
| | if self.tree.elementInScope(token["name"]): |
| | self.tree.generateImpliedEndTags() |
| | if self.tree.openElements[-1].name != token["name"]: |
| | self.parser.parseError("end-tag-too-early", {"name": token["name"]}) |
| |
|
| | if self.tree.elementInScope(token["name"]): |
| | element = self.tree.openElements.pop() |
| | while element.name != token["name"]: |
| | element = self.tree.openElements.pop() |
| | self.tree.clearActiveFormattingElements() |
| |
|
| | def endTagBr(self, token): |
| | self.parser.parseError("unexpected-end-tag-treated-as", |
| | {"originalName": "br", "newName": "br element"}) |
| | self.tree.reconstructActiveFormattingElements() |
| | self.tree.insertElement(impliedTagToken("br", "StartTag")) |
| | self.tree.openElements.pop() |
| |
|
| | def endTagOther(self, token): |
| | for node in self.tree.openElements[::-1]: |
| | if node.name == token["name"]: |
| | self.tree.generateImpliedEndTags(exclude=token["name"]) |
| | if self.tree.openElements[-1].name != token["name"]: |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| | while self.tree.openElements.pop() != node: |
| | pass |
| | break |
| | else: |
| | if node.nameTuple in specialElements: |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| | break |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | (("base", "basefont", "bgsound", "command", "link", "meta", |
| | "script", "style", "title"), |
| | startTagProcessInHead), |
| | ("body", startTagBody), |
| | ("frameset", startTagFrameset), |
| | (("address", "article", "aside", "blockquote", "center", "details", |
| | "dir", "div", "dl", "fieldset", "figcaption", "figure", |
| | "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", |
| | "section", "summary", "ul"), |
| | startTagCloseP), |
| | (headingElements, startTagHeading), |
| | (("pre", "listing"), startTagPreListing), |
| | ("form", startTagForm), |
| | (("li", "dd", "dt"), startTagListItem), |
| | ("plaintext", startTagPlaintext), |
| | ("a", startTagA), |
| | (("b", "big", "code", "em", "font", "i", "s", "small", "strike", |
| | "strong", "tt", "u"), startTagFormatting), |
| | ("nobr", startTagNobr), |
| | ("button", startTagButton), |
| | (("applet", "marquee", "object"), startTagAppletMarqueeObject), |
| | ("xmp", startTagXmp), |
| | ("table", startTagTable), |
| | (("area", "br", "embed", "img", "keygen", "wbr"), |
| | startTagVoidFormatting), |
| | (("param", "source", "track"), startTagParamSource), |
| | ("input", startTagInput), |
| | ("hr", startTagHr), |
| | ("image", startTagImage), |
| | ("isindex", startTagIsIndex), |
| | ("textarea", startTagTextarea), |
| | ("iframe", startTagIFrame), |
| | ("noscript", startTagNoscript), |
| | (("noembed", "noframes"), startTagRawtext), |
| | ("select", startTagSelect), |
| | (("rp", "rt"), startTagRpRt), |
| | (("option", "optgroup"), startTagOpt), |
| | (("math"), startTagMath), |
| | (("svg"), startTagSvg), |
| | (("caption", "col", "colgroup", "frame", "head", |
| | "tbody", "td", "tfoot", "th", "thead", |
| | "tr"), startTagMisplaced) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("body", endTagBody), |
| | ("html", endTagHtml), |
| | (("address", "article", "aside", "blockquote", "button", "center", |
| | "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", |
| | "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", |
| | "section", "summary", "ul"), endTagBlock), |
| | ("form", endTagForm), |
| | ("p", endTagP), |
| | (("dd", "dt", "li"), endTagListItem), |
| | (headingElements, endTagHeading), |
| | (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", |
| | "strike", "strong", "tt", "u"), endTagFormatting), |
| | (("applet", "marquee", "object"), endTagAppletMarqueeObject), |
| | ("br", endTagBr), |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class TextPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processCharacters(self, token): |
| | self.tree.insertText(token["data"]) |
| |
|
| | def processEOF(self): |
| | self.parser.parseError("expected-named-closing-tag-but-got-eof", |
| | {"name": self.tree.openElements[-1].name}) |
| | self.tree.openElements.pop() |
| | self.parser.phase = self.parser.originalPhase |
| | return True |
| |
|
| | def startTagOther(self, token): |
| | assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] |
| |
|
| | def endTagScript(self, token): |
| | node = self.tree.openElements.pop() |
| | assert node.name == "script" |
| | self.parser.phase = self.parser.originalPhase |
| | |
| | |
| |
|
| | def endTagOther(self, token): |
| | self.tree.openElements.pop() |
| | self.parser.phase = self.parser.originalPhase |
| |
|
| | startTagHandler = _utils.MethodDispatcher([]) |
| | startTagHandler.default = startTagOther |
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("script", endTagScript)]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InTablePhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | |
| | def clearStackToTableContext(self): |
| | |
| | while self.tree.openElements[-1].name not in ("table", "html"): |
| | |
| | |
| | self.tree.openElements.pop() |
| | |
| |
|
| | |
| | def processEOF(self): |
| | if self.tree.openElements[-1].name != "html": |
| | self.parser.parseError("eof-in-table") |
| | else: |
| | assert self.parser.innerHTML |
| | |
| |
|
| | def processSpaceCharacters(self, token): |
| | originalPhase = self.parser.phase |
| | self.parser.phase = self.parser.phases["inTableText"] |
| | self.parser.phase.originalPhase = originalPhase |
| | self.parser.phase.processSpaceCharacters(token) |
| |
|
| | def processCharacters(self, token): |
| | originalPhase = self.parser.phase |
| | self.parser.phase = self.parser.phases["inTableText"] |
| | self.parser.phase.originalPhase = originalPhase |
| | self.parser.phase.processCharacters(token) |
| |
|
| | def insertText(self, token): |
| | |
| | |
| | self.tree.insertFromTable = True |
| | self.parser.phases["inBody"].processCharacters(token) |
| | self.tree.insertFromTable = False |
| |
|
| | def startTagCaption(self, token): |
| | self.clearStackToTableContext() |
| | self.tree.activeFormattingElements.append(Marker) |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inCaption"] |
| |
|
| | def startTagColgroup(self, token): |
| | self.clearStackToTableContext() |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inColumnGroup"] |
| |
|
| | def startTagCol(self, token): |
| | self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) |
| | return token |
| |
|
| | def startTagRowGroup(self, token): |
| | self.clearStackToTableContext() |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inTableBody"] |
| |
|
| | def startTagImplyTbody(self, token): |
| | self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) |
| | return token |
| |
|
| | def startTagTable(self, token): |
| | self.parser.parseError("unexpected-start-tag-implies-end-tag", |
| | {"startName": "table", "endName": "table"}) |
| | self.parser.phase.processEndTag(impliedTagToken("table")) |
| | if not self.parser.innerHTML: |
| | return token |
| |
|
| | def startTagStyleScript(self, token): |
| | return self.parser.phases["inHead"].processStartTag(token) |
| |
|
| | def startTagInput(self, token): |
| | if ("type" in token["data"] and |
| | token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): |
| | self.parser.parseError("unexpected-hidden-input-in-table") |
| | self.tree.insertElement(token) |
| | |
| | self.tree.openElements.pop() |
| | else: |
| | self.startTagOther(token) |
| |
|
| | def startTagForm(self, token): |
| | self.parser.parseError("unexpected-form-in-table") |
| | if self.tree.formPointer is None: |
| | self.tree.insertElement(token) |
| | self.tree.formPointer = self.tree.openElements[-1] |
| | self.tree.openElements.pop() |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) |
| | |
| | self.tree.insertFromTable = True |
| | self.parser.phases["inBody"].processStartTag(token) |
| | self.tree.insertFromTable = False |
| |
|
| | def endTagTable(self, token): |
| | if self.tree.elementInScope("table", variant="table"): |
| | self.tree.generateImpliedEndTags() |
| | if self.tree.openElements[-1].name != "table": |
| | self.parser.parseError("end-tag-too-early-named", |
| | {"gotName": "table", |
| | "expectedName": self.tree.openElements[-1].name}) |
| | while self.tree.openElements[-1].name != "table": |
| | self.tree.openElements.pop() |
| | self.tree.openElements.pop() |
| | self.parser.resetInsertionMode() |
| | else: |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| |
|
| | def endTagIgnore(self, token): |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) |
| | |
| | self.tree.insertFromTable = True |
| | self.parser.phases["inBody"].processEndTag(token) |
| | self.tree.insertFromTable = False |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | ("caption", startTagCaption), |
| | ("colgroup", startTagColgroup), |
| | ("col", startTagCol), |
| | (("tbody", "tfoot", "thead"), startTagRowGroup), |
| | (("td", "th", "tr"), startTagImplyTbody), |
| | ("table", startTagTable), |
| | (("style", "script"), startTagStyleScript), |
| | ("input", startTagInput), |
| | ("form", startTagForm) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("table", endTagTable), |
| | (("body", "caption", "col", "colgroup", "html", "tbody", "td", |
| | "tfoot", "th", "thead", "tr"), endTagIgnore) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InTableTextPhase(Phase): |
| | __slots__ = ("originalPhase", "characterTokens") |
| |
|
| | def __init__(self, *args, **kwargs): |
| | super(InTableTextPhase, self).__init__(*args, **kwargs) |
| | self.originalPhase = None |
| | self.characterTokens = [] |
| |
|
| | def flushCharacters(self): |
| | data = "".join([item["data"] for item in self.characterTokens]) |
| | if any([item not in spaceCharacters for item in data]): |
| | token = {"type": tokenTypes["Characters"], "data": data} |
| | self.parser.phases["inTable"].insertText(token) |
| | elif data: |
| | self.tree.insertText(data) |
| | self.characterTokens = [] |
| |
|
| | def processComment(self, token): |
| | self.flushCharacters() |
| | self.parser.phase = self.originalPhase |
| | return token |
| |
|
| | def processEOF(self): |
| | self.flushCharacters() |
| | self.parser.phase = self.originalPhase |
| | return True |
| |
|
| | def processCharacters(self, token): |
| | if token["data"] == "\u0000": |
| | return |
| | self.characterTokens.append(token) |
| |
|
| | def processSpaceCharacters(self, token): |
| | |
| | self.characterTokens.append(token) |
| | |
| |
|
| | def processStartTag(self, token): |
| | self.flushCharacters() |
| | self.parser.phase = self.originalPhase |
| | return token |
| |
|
| | def processEndTag(self, token): |
| | self.flushCharacters() |
| | self.parser.phase = self.originalPhase |
| | return token |
| |
|
| | class InCaptionPhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | def ignoreEndTagCaption(self): |
| | return not self.tree.elementInScope("caption", variant="table") |
| |
|
| | def processEOF(self): |
| | self.parser.phases["inBody"].processEOF() |
| |
|
| | def processCharacters(self, token): |
| | return self.parser.phases["inBody"].processCharacters(token) |
| |
|
| | def startTagTableElement(self, token): |
| | self.parser.parseError() |
| | |
| | ignoreEndTag = self.ignoreEndTagCaption() |
| | self.parser.phase.processEndTag(impliedTagToken("caption")) |
| | if not ignoreEndTag: |
| | return token |
| |
|
| | def startTagOther(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def endTagCaption(self, token): |
| | if not self.ignoreEndTagCaption(): |
| | |
| | self.tree.generateImpliedEndTags() |
| | if self.tree.openElements[-1].name != "caption": |
| | self.parser.parseError("expected-one-end-tag-but-got-another", |
| | {"gotName": "caption", |
| | "expectedName": self.tree.openElements[-1].name}) |
| | while self.tree.openElements[-1].name != "caption": |
| | self.tree.openElements.pop() |
| | self.tree.openElements.pop() |
| | self.tree.clearActiveFormattingElements() |
| | self.parser.phase = self.parser.phases["inTable"] |
| | else: |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| |
|
| | def endTagTable(self, token): |
| | self.parser.parseError() |
| | ignoreEndTag = self.ignoreEndTagCaption() |
| | self.parser.phase.processEndTag(impliedTagToken("caption")) |
| | if not ignoreEndTag: |
| | return token |
| |
|
| | def endTagIgnore(self, token): |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | def endTagOther(self, token): |
| | return self.parser.phases["inBody"].processEndTag(token) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", |
| | "thead", "tr"), startTagTableElement) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("caption", endTagCaption), |
| | ("table", endTagTable), |
| | (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", |
| | "thead", "tr"), endTagIgnore) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InColumnGroupPhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | def ignoreEndTagColgroup(self): |
| | return self.tree.openElements[-1].name == "html" |
| |
|
| | def processEOF(self): |
| | if self.tree.openElements[-1].name == "html": |
| | assert self.parser.innerHTML |
| | return |
| | else: |
| | ignoreEndTag = self.ignoreEndTagColgroup() |
| | self.endTagColgroup(impliedTagToken("colgroup")) |
| | if not ignoreEndTag: |
| | return True |
| |
|
| | def processCharacters(self, token): |
| | ignoreEndTag = self.ignoreEndTagColgroup() |
| | self.endTagColgroup(impliedTagToken("colgroup")) |
| | if not ignoreEndTag: |
| | return token |
| |
|
| | def startTagCol(self, token): |
| | self.tree.insertElement(token) |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| |
|
| | def startTagOther(self, token): |
| | ignoreEndTag = self.ignoreEndTagColgroup() |
| | self.endTagColgroup(impliedTagToken("colgroup")) |
| | if not ignoreEndTag: |
| | return token |
| |
|
| | def endTagColgroup(self, token): |
| | if self.ignoreEndTagColgroup(): |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| | else: |
| | self.tree.openElements.pop() |
| | self.parser.phase = self.parser.phases["inTable"] |
| |
|
| | def endTagCol(self, token): |
| | self.parser.parseError("no-end-tag", {"name": "col"}) |
| |
|
| | def endTagOther(self, token): |
| | ignoreEndTag = self.ignoreEndTagColgroup() |
| | self.endTagColgroup(impliedTagToken("colgroup")) |
| | if not ignoreEndTag: |
| | return token |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | ("col", startTagCol) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("colgroup", endTagColgroup), |
| | ("col", endTagCol) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InTableBodyPhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | |
| | def clearStackToTableBodyContext(self): |
| | while self.tree.openElements[-1].name not in ("tbody", "tfoot", |
| | "thead", "html"): |
| | |
| | |
| | self.tree.openElements.pop() |
| | if self.tree.openElements[-1].name == "html": |
| | assert self.parser.innerHTML |
| |
|
| | |
| | def processEOF(self): |
| | self.parser.phases["inTable"].processEOF() |
| |
|
| | def processSpaceCharacters(self, token): |
| | return self.parser.phases["inTable"].processSpaceCharacters(token) |
| |
|
| | def processCharacters(self, token): |
| | return self.parser.phases["inTable"].processCharacters(token) |
| |
|
| | def startTagTr(self, token): |
| | self.clearStackToTableBodyContext() |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inRow"] |
| |
|
| | def startTagTableCell(self, token): |
| | self.parser.parseError("unexpected-cell-in-table-body", |
| | {"name": token["name"]}) |
| | self.startTagTr(impliedTagToken("tr", "StartTag")) |
| | return token |
| |
|
| | def startTagTableOther(self, token): |
| | |
| | if (self.tree.elementInScope("tbody", variant="table") or |
| | self.tree.elementInScope("thead", variant="table") or |
| | self.tree.elementInScope("tfoot", variant="table")): |
| | self.clearStackToTableBodyContext() |
| | self.endTagTableRowGroup( |
| | impliedTagToken(self.tree.openElements[-1].name)) |
| | return token |
| | else: |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| |
|
| | def startTagOther(self, token): |
| | return self.parser.phases["inTable"].processStartTag(token) |
| |
|
| | def endTagTableRowGroup(self, token): |
| | if self.tree.elementInScope(token["name"], variant="table"): |
| | self.clearStackToTableBodyContext() |
| | self.tree.openElements.pop() |
| | self.parser.phase = self.parser.phases["inTable"] |
| | else: |
| | self.parser.parseError("unexpected-end-tag-in-table-body", |
| | {"name": token["name"]}) |
| |
|
| | def endTagTable(self, token): |
| | if (self.tree.elementInScope("tbody", variant="table") or |
| | self.tree.elementInScope("thead", variant="table") or |
| | self.tree.elementInScope("tfoot", variant="table")): |
| | self.clearStackToTableBodyContext() |
| | self.endTagTableRowGroup( |
| | impliedTagToken(self.tree.openElements[-1].name)) |
| | return token |
| | else: |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| |
|
| | def endTagIgnore(self, token): |
| | self.parser.parseError("unexpected-end-tag-in-table-body", |
| | {"name": token["name"]}) |
| |
|
| | def endTagOther(self, token): |
| | return self.parser.phases["inTable"].processEndTag(token) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | ("tr", startTagTr), |
| | (("td", "th"), startTagTableCell), |
| | (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), |
| | startTagTableOther) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | (("tbody", "tfoot", "thead"), endTagTableRowGroup), |
| | ("table", endTagTable), |
| | (("body", "caption", "col", "colgroup", "html", "td", "th", |
| | "tr"), endTagIgnore) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InRowPhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | |
| | def clearStackToTableRowContext(self): |
| | while self.tree.openElements[-1].name not in ("tr", "html"): |
| | self.parser.parseError("unexpected-implied-end-tag-in-table-row", |
| | {"name": self.tree.openElements[-1].name}) |
| | self.tree.openElements.pop() |
| |
|
| | def ignoreEndTagTr(self): |
| | return not self.tree.elementInScope("tr", variant="table") |
| |
|
| | |
| | def processEOF(self): |
| | self.parser.phases["inTable"].processEOF() |
| |
|
| | def processSpaceCharacters(self, token): |
| | return self.parser.phases["inTable"].processSpaceCharacters(token) |
| |
|
| | def processCharacters(self, token): |
| | return self.parser.phases["inTable"].processCharacters(token) |
| |
|
| | def startTagTableCell(self, token): |
| | self.clearStackToTableRowContext() |
| | self.tree.insertElement(token) |
| | self.parser.phase = self.parser.phases["inCell"] |
| | self.tree.activeFormattingElements.append(Marker) |
| |
|
| | def startTagTableOther(self, token): |
| | ignoreEndTag = self.ignoreEndTagTr() |
| | self.endTagTr(impliedTagToken("tr")) |
| | |
| | if not ignoreEndTag: |
| | return token |
| |
|
| | def startTagOther(self, token): |
| | return self.parser.phases["inTable"].processStartTag(token) |
| |
|
| | def endTagTr(self, token): |
| | if not self.ignoreEndTagTr(): |
| | self.clearStackToTableRowContext() |
| | self.tree.openElements.pop() |
| | self.parser.phase = self.parser.phases["inTableBody"] |
| | else: |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| |
|
| | def endTagTable(self, token): |
| | ignoreEndTag = self.ignoreEndTagTr() |
| | self.endTagTr(impliedTagToken("tr")) |
| | |
| | |
| | if not ignoreEndTag: |
| | return token |
| |
|
| | def endTagTableRowGroup(self, token): |
| | if self.tree.elementInScope(token["name"], variant="table"): |
| | self.endTagTr(impliedTagToken("tr")) |
| | return token |
| | else: |
| | self.parser.parseError() |
| |
|
| | def endTagIgnore(self, token): |
| | self.parser.parseError("unexpected-end-tag-in-table-row", |
| | {"name": token["name"]}) |
| |
|
| | def endTagOther(self, token): |
| | return self.parser.phases["inTable"].processEndTag(token) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | (("td", "th"), startTagTableCell), |
| | (("caption", "col", "colgroup", "tbody", "tfoot", "thead", |
| | "tr"), startTagTableOther) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("tr", endTagTr), |
| | ("table", endTagTable), |
| | (("tbody", "tfoot", "thead"), endTagTableRowGroup), |
| | (("body", "caption", "col", "colgroup", "html", "td", "th"), |
| | endTagIgnore) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InCellPhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | |
| | def closeCell(self): |
| | if self.tree.elementInScope("td", variant="table"): |
| | self.endTagTableCell(impliedTagToken("td")) |
| | elif self.tree.elementInScope("th", variant="table"): |
| | self.endTagTableCell(impliedTagToken("th")) |
| |
|
| | |
| | def processEOF(self): |
| | self.parser.phases["inBody"].processEOF() |
| |
|
| | def processCharacters(self, token): |
| | return self.parser.phases["inBody"].processCharacters(token) |
| |
|
| | def startTagTableOther(self, token): |
| | if (self.tree.elementInScope("td", variant="table") or |
| | self.tree.elementInScope("th", variant="table")): |
| | self.closeCell() |
| | return token |
| | else: |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| |
|
| | def startTagOther(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def endTagTableCell(self, token): |
| | if self.tree.elementInScope(token["name"], variant="table"): |
| | self.tree.generateImpliedEndTags(token["name"]) |
| | if self.tree.openElements[-1].name != token["name"]: |
| | self.parser.parseError("unexpected-cell-end-tag", |
| | {"name": token["name"]}) |
| | while True: |
| | node = self.tree.openElements.pop() |
| | if node.name == token["name"]: |
| | break |
| | else: |
| | self.tree.openElements.pop() |
| | self.tree.clearActiveFormattingElements() |
| | self.parser.phase = self.parser.phases["inRow"] |
| | else: |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | def endTagIgnore(self, token): |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | def endTagImply(self, token): |
| | if self.tree.elementInScope(token["name"], variant="table"): |
| | self.closeCell() |
| | return token |
| | else: |
| | |
| | self.parser.parseError() |
| |
|
| | def endTagOther(self, token): |
| | return self.parser.phases["inBody"].processEndTag(token) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", |
| | "thead", "tr"), startTagTableOther) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | (("td", "th"), endTagTableCell), |
| | (("body", "caption", "col", "colgroup", "html"), endTagIgnore), |
| | (("table", "tbody", "tfoot", "thead", "tr"), endTagImply) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InSelectPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | |
| | def processEOF(self): |
| | if self.tree.openElements[-1].name != "html": |
| | self.parser.parseError("eof-in-select") |
| | else: |
| | assert self.parser.innerHTML |
| |
|
| | def processCharacters(self, token): |
| | if token["data"] == "\u0000": |
| | return |
| | self.tree.insertText(token["data"]) |
| |
|
| | def startTagOption(self, token): |
| | |
| | if self.tree.openElements[-1].name == "option": |
| | self.tree.openElements.pop() |
| | self.tree.insertElement(token) |
| |
|
| | def startTagOptgroup(self, token): |
| | if self.tree.openElements[-1].name == "option": |
| | self.tree.openElements.pop() |
| | if self.tree.openElements[-1].name == "optgroup": |
| | self.tree.openElements.pop() |
| | self.tree.insertElement(token) |
| |
|
| | def startTagSelect(self, token): |
| | self.parser.parseError("unexpected-select-in-select") |
| | self.endTagSelect(impliedTagToken("select")) |
| |
|
| | def startTagInput(self, token): |
| | self.parser.parseError("unexpected-input-in-select") |
| | if self.tree.elementInScope("select", variant="select"): |
| | self.endTagSelect(impliedTagToken("select")) |
| | return token |
| | else: |
| | assert self.parser.innerHTML |
| |
|
| | def startTagScript(self, token): |
| | return self.parser.phases["inHead"].processStartTag(token) |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("unexpected-start-tag-in-select", |
| | {"name": token["name"]}) |
| |
|
| | def endTagOption(self, token): |
| | if self.tree.openElements[-1].name == "option": |
| | self.tree.openElements.pop() |
| | else: |
| | self.parser.parseError("unexpected-end-tag-in-select", |
| | {"name": "option"}) |
| |
|
| | def endTagOptgroup(self, token): |
| | |
| | if (self.tree.openElements[-1].name == "option" and |
| | self.tree.openElements[-2].name == "optgroup"): |
| | self.tree.openElements.pop() |
| | |
| | if self.tree.openElements[-1].name == "optgroup": |
| | self.tree.openElements.pop() |
| | |
| | else: |
| | self.parser.parseError("unexpected-end-tag-in-select", |
| | {"name": "optgroup"}) |
| |
|
| | def endTagSelect(self, token): |
| | if self.tree.elementInScope("select", variant="select"): |
| | node = self.tree.openElements.pop() |
| | while node.name != "select": |
| | node = self.tree.openElements.pop() |
| | self.parser.resetInsertionMode() |
| | else: |
| | |
| | assert self.parser.innerHTML |
| | self.parser.parseError() |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag-in-select", |
| | {"name": token["name"]}) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | ("option", startTagOption), |
| | ("optgroup", startTagOptgroup), |
| | ("select", startTagSelect), |
| | (("input", "keygen", "textarea"), startTagInput), |
| | ("script", startTagScript) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("option", endTagOption), |
| | ("optgroup", endTagOptgroup), |
| | ("select", endTagSelect) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InSelectInTablePhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | self.parser.phases["inSelect"].processEOF() |
| |
|
| | def processCharacters(self, token): |
| | return self.parser.phases["inSelect"].processCharacters(token) |
| |
|
| | def startTagTable(self, token): |
| | self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) |
| | self.endTagOther(impliedTagToken("select")) |
| | return token |
| |
|
| | def startTagOther(self, token): |
| | return self.parser.phases["inSelect"].processStartTag(token) |
| |
|
| | def endTagTable(self, token): |
| | self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) |
| | if self.tree.elementInScope(token["name"], variant="table"): |
| | self.endTagOther(impliedTagToken("select")) |
| | return token |
| |
|
| | def endTagOther(self, token): |
| | return self.parser.phases["inSelect"].processEndTag(token) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), |
| | startTagTable) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), |
| | endTagTable) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InForeignContentPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", |
| | "center", "code", "dd", "div", "dl", "dt", |
| | "em", "embed", "h1", "h2", "h3", |
| | "h4", "h5", "h6", "head", "hr", "i", "img", |
| | "li", "listing", "menu", "meta", "nobr", |
| | "ol", "p", "pre", "ruby", "s", "small", |
| | "span", "strong", "strike", "sub", "sup", |
| | "table", "tt", "u", "ul", "var"]) |
| |
|
| | def adjustSVGTagNames(self, token): |
| | replacements = {"altglyph": "altGlyph", |
| | "altglyphdef": "altGlyphDef", |
| | "altglyphitem": "altGlyphItem", |
| | "animatecolor": "animateColor", |
| | "animatemotion": "animateMotion", |
| | "animatetransform": "animateTransform", |
| | "clippath": "clipPath", |
| | "feblend": "feBlend", |
| | "fecolormatrix": "feColorMatrix", |
| | "fecomponenttransfer": "feComponentTransfer", |
| | "fecomposite": "feComposite", |
| | "feconvolvematrix": "feConvolveMatrix", |
| | "fediffuselighting": "feDiffuseLighting", |
| | "fedisplacementmap": "feDisplacementMap", |
| | "fedistantlight": "feDistantLight", |
| | "feflood": "feFlood", |
| | "fefunca": "feFuncA", |
| | "fefuncb": "feFuncB", |
| | "fefuncg": "feFuncG", |
| | "fefuncr": "feFuncR", |
| | "fegaussianblur": "feGaussianBlur", |
| | "feimage": "feImage", |
| | "femerge": "feMerge", |
| | "femergenode": "feMergeNode", |
| | "femorphology": "feMorphology", |
| | "feoffset": "feOffset", |
| | "fepointlight": "fePointLight", |
| | "fespecularlighting": "feSpecularLighting", |
| | "fespotlight": "feSpotLight", |
| | "fetile": "feTile", |
| | "feturbulence": "feTurbulence", |
| | "foreignobject": "foreignObject", |
| | "glyphref": "glyphRef", |
| | "lineargradient": "linearGradient", |
| | "radialgradient": "radialGradient", |
| | "textpath": "textPath"} |
| |
|
| | if token["name"] in replacements: |
| | token["name"] = replacements[token["name"]] |
| |
|
| | def processCharacters(self, token): |
| | if token["data"] == "\u0000": |
| | token["data"] = "\uFFFD" |
| | elif (self.parser.framesetOK and |
| | any(char not in spaceCharacters for char in token["data"])): |
| | self.parser.framesetOK = False |
| | Phase.processCharacters(self, token) |
| |
|
| | def processStartTag(self, token): |
| | currentNode = self.tree.openElements[-1] |
| | if (token["name"] in self.breakoutElements or |
| | (token["name"] == "font" and |
| | set(token["data"].keys()) & {"color", "face", "size"})): |
| | self.parser.parseError("unexpected-html-element-in-foreign-content", |
| | {"name": token["name"]}) |
| | while (self.tree.openElements[-1].namespace != |
| | self.tree.defaultNamespace and |
| | not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and |
| | not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): |
| | self.tree.openElements.pop() |
| | return token |
| |
|
| | else: |
| | if currentNode.namespace == namespaces["mathml"]: |
| | self.parser.adjustMathMLAttributes(token) |
| | elif currentNode.namespace == namespaces["svg"]: |
| | self.adjustSVGTagNames(token) |
| | self.parser.adjustSVGAttributes(token) |
| | self.parser.adjustForeignAttributes(token) |
| | token["namespace"] = currentNode.namespace |
| | self.tree.insertElement(token) |
| | if token["selfClosing"]: |
| | self.tree.openElements.pop() |
| | token["selfClosingAcknowledged"] = True |
| |
|
| | def processEndTag(self, token): |
| | nodeIndex = len(self.tree.openElements) - 1 |
| | node = self.tree.openElements[-1] |
| | if node.name.translate(asciiUpper2Lower) != token["name"]: |
| | self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) |
| |
|
| | while True: |
| | if node.name.translate(asciiUpper2Lower) == token["name"]: |
| | |
| | if self.parser.phase == self.parser.phases["inTableText"]: |
| | self.parser.phase.flushCharacters() |
| | self.parser.phase = self.parser.phase.originalPhase |
| | while self.tree.openElements.pop() != node: |
| | assert self.tree.openElements |
| | new_token = None |
| | break |
| | nodeIndex -= 1 |
| |
|
| | node = self.tree.openElements[nodeIndex] |
| | if node.namespace != self.tree.defaultNamespace: |
| | continue |
| | else: |
| | new_token = self.parser.phase.processEndTag(token) |
| | break |
| | return new_token |
| |
|
| | class AfterBodyPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | |
| | pass |
| |
|
| | def processComment(self, token): |
| | |
| | |
| | self.tree.insertComment(token, self.tree.openElements[0]) |
| |
|
| | def processCharacters(self, token): |
| | self.parser.parseError("unexpected-char-after-body") |
| | self.parser.phase = self.parser.phases["inBody"] |
| | return token |
| |
|
| | def startTagHtml(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("unexpected-start-tag-after-body", |
| | {"name": token["name"]}) |
| | self.parser.phase = self.parser.phases["inBody"] |
| | return token |
| |
|
| | def endTagHtml(self, name): |
| | if self.parser.innerHTML: |
| | self.parser.parseError("unexpected-end-tag-after-body-innerhtml") |
| | else: |
| | self.parser.phase = self.parser.phases["afterAfterBody"] |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag-after-body", |
| | {"name": token["name"]}) |
| | self.parser.phase = self.parser.phases["inBody"] |
| | return token |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", startTagHtml) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class InFramesetPhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | if self.tree.openElements[-1].name != "html": |
| | self.parser.parseError("eof-in-frameset") |
| | else: |
| | assert self.parser.innerHTML |
| |
|
| | def processCharacters(self, token): |
| | self.parser.parseError("unexpected-char-in-frameset") |
| |
|
| | def startTagFrameset(self, token): |
| | self.tree.insertElement(token) |
| |
|
| | def startTagFrame(self, token): |
| | self.tree.insertElement(token) |
| | self.tree.openElements.pop() |
| |
|
| | def startTagNoframes(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("unexpected-start-tag-in-frameset", |
| | {"name": token["name"]}) |
| |
|
| | def endTagFrameset(self, token): |
| | if self.tree.openElements[-1].name == "html": |
| | |
| | self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") |
| | else: |
| | self.tree.openElements.pop() |
| | if (not self.parser.innerHTML and |
| | self.tree.openElements[-1].name != "frameset"): |
| | |
| | |
| | self.parser.phase = self.parser.phases["afterFrameset"] |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag-in-frameset", |
| | {"name": token["name"]}) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | ("frameset", startTagFrameset), |
| | ("frame", startTagFrame), |
| | ("noframes", startTagNoframes) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("frameset", endTagFrameset) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class AfterFramesetPhase(Phase): |
| | |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | |
| | pass |
| |
|
| | def processCharacters(self, token): |
| | self.parser.parseError("unexpected-char-after-frameset") |
| |
|
| | def startTagNoframes(self, token): |
| | return self.parser.phases["inHead"].processStartTag(token) |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("unexpected-start-tag-after-frameset", |
| | {"name": token["name"]}) |
| |
|
| | def endTagHtml(self, token): |
| | self.parser.phase = self.parser.phases["afterAfterFrameset"] |
| |
|
| | def endTagOther(self, token): |
| | self.parser.parseError("unexpected-end-tag-after-frameset", |
| | {"name": token["name"]}) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", Phase.startTagHtml), |
| | ("noframes", startTagNoframes) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | endTagHandler = _utils.MethodDispatcher([ |
| | ("html", endTagHtml) |
| | ]) |
| | endTagHandler.default = endTagOther |
| |
|
| | class AfterAfterBodyPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | pass |
| |
|
| | def processComment(self, token): |
| | self.tree.insertComment(token, self.tree.document) |
| |
|
| | def processSpaceCharacters(self, token): |
| | return self.parser.phases["inBody"].processSpaceCharacters(token) |
| |
|
| | def processCharacters(self, token): |
| | self.parser.parseError("expected-eof-but-got-char") |
| | self.parser.phase = self.parser.phases["inBody"] |
| | return token |
| |
|
| | def startTagHtml(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("expected-eof-but-got-start-tag", |
| | {"name": token["name"]}) |
| | self.parser.phase = self.parser.phases["inBody"] |
| | return token |
| |
|
| | def processEndTag(self, token): |
| | self.parser.parseError("expected-eof-but-got-end-tag", |
| | {"name": token["name"]}) |
| | self.parser.phase = self.parser.phases["inBody"] |
| | return token |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", startTagHtml) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | class AfterAfterFramesetPhase(Phase): |
| | __slots__ = tuple() |
| |
|
| | def processEOF(self): |
| | pass |
| |
|
| | def processComment(self, token): |
| | self.tree.insertComment(token, self.tree.document) |
| |
|
| | def processSpaceCharacters(self, token): |
| | return self.parser.phases["inBody"].processSpaceCharacters(token) |
| |
|
| | def processCharacters(self, token): |
| | self.parser.parseError("expected-eof-but-got-char") |
| |
|
| | def startTagHtml(self, token): |
| | return self.parser.phases["inBody"].processStartTag(token) |
| |
|
| | def startTagNoFrames(self, token): |
| | return self.parser.phases["inHead"].processStartTag(token) |
| |
|
| | def startTagOther(self, token): |
| | self.parser.parseError("expected-eof-but-got-start-tag", |
| | {"name": token["name"]}) |
| |
|
| | def processEndTag(self, token): |
| | self.parser.parseError("expected-eof-but-got-end-tag", |
| | {"name": token["name"]}) |
| |
|
| | startTagHandler = _utils.MethodDispatcher([ |
| | ("html", startTagHtml), |
| | ("noframes", startTagNoFrames) |
| | ]) |
| | startTagHandler.default = startTagOther |
| |
|
| | |
| |
|
| | return { |
| | "initial": InitialPhase, |
| | "beforeHtml": BeforeHtmlPhase, |
| | "beforeHead": BeforeHeadPhase, |
| | "inHead": InHeadPhase, |
| | "inHeadNoscript": InHeadNoscriptPhase, |
| | "afterHead": AfterHeadPhase, |
| | "inBody": InBodyPhase, |
| | "text": TextPhase, |
| | "inTable": InTablePhase, |
| | "inTableText": InTableTextPhase, |
| | "inCaption": InCaptionPhase, |
| | "inColumnGroup": InColumnGroupPhase, |
| | "inTableBody": InTableBodyPhase, |
| | "inRow": InRowPhase, |
| | "inCell": InCellPhase, |
| | "inSelect": InSelectPhase, |
| | "inSelectInTable": InSelectInTablePhase, |
| | "inForeignContent": InForeignContentPhase, |
| | "afterBody": AfterBodyPhase, |
| | "inFrameset": InFramesetPhase, |
| | "afterFrameset": AfterFramesetPhase, |
| | "afterAfterBody": AfterAfterBodyPhase, |
| | "afterAfterFrameset": AfterAfterFramesetPhase, |
| | |
| | } |
| |
|
| |
|
| | def adjust_attributes(token, replacements): |
| | needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) |
| | if needs_adjustment: |
| | token['data'] = type(token['data'])((replacements.get(k, k), v) |
| | for k, v in token['data'].items()) |
| |
|
| |
|
| | def impliedTagToken(name, type="EndTag", attributes=None, |
| | selfClosing=False): |
| | if attributes is None: |
| | attributes = {} |
| | return {"type": tokenTypes[type], "name": name, "data": attributes, |
| | "selfClosing": selfClosing} |
| |
|
| |
|
| | class ParseError(Exception): |
| | """Error in parsed document""" |
| | pass |
| |
|