|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
Python-Markdown provides two serializers which render [`ElementTree.Element`][xml.etree.ElementTree.Element] |
|
|
objects to a string of HTML. Both functions wrap the same underlying code with only a few minor |
|
|
differences as outlined below: |
|
|
|
|
|
1. Empty (self-closing) tags are rendered as `<tag>` for HTML and as `<tag />` for XHTML. |
|
|
2. Boolean attributes are rendered as `attrname` for HTML and as `attrname="attrname"` for XHTML. |
|
|
""" |
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
from xml.etree.ElementTree import ProcessingInstruction |
|
|
from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY |
|
|
import re |
|
|
from typing import Callable, Literal, NoReturn |
|
|
|
|
|
__all__ = ['to_html_string', 'to_xhtml_string'] |
|
|
|
|
|
RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I) |
|
|
|
|
|
|
|
|
def _raise_serialization_error(text: str) -> NoReturn: |
|
|
raise TypeError( |
|
|
"cannot serialize {!r} (type {})".format(text, type(text).__name__) |
|
|
) |
|
|
|
|
|
|
|
|
def _escape_cdata(text) -> str: |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
if "&" in text: |
|
|
|
|
|
text = RE_AMP.sub('&', text) |
|
|
if "<" in text: |
|
|
text = text.replace("<", "<") |
|
|
if ">" in text: |
|
|
text = text.replace(">", ">") |
|
|
return text |
|
|
except (TypeError, AttributeError): |
|
|
_raise_serialization_error(text) |
|
|
|
|
|
|
|
|
def _escape_attrib(text: str) -> str: |
|
|
|
|
|
try: |
|
|
if "&" in text: |
|
|
|
|
|
text = RE_AMP.sub('&', text) |
|
|
if "<" in text: |
|
|
text = text.replace("<", "<") |
|
|
if ">" in text: |
|
|
text = text.replace(">", ">") |
|
|
if "\"" in text: |
|
|
text = text.replace("\"", """) |
|
|
if "\n" in text: |
|
|
text = text.replace("\n", " ") |
|
|
return text |
|
|
except (TypeError, AttributeError): |
|
|
_raise_serialization_error(text) |
|
|
|
|
|
|
|
|
def _escape_attrib_html(text: str) -> str: |
|
|
|
|
|
try: |
|
|
if "&" in text: |
|
|
|
|
|
text = RE_AMP.sub('&', text) |
|
|
if "<" in text: |
|
|
text = text.replace("<", "<") |
|
|
if ">" in text: |
|
|
text = text.replace(">", ">") |
|
|
if "\"" in text: |
|
|
text = text.replace("\"", """) |
|
|
return text |
|
|
except (TypeError, AttributeError): |
|
|
_raise_serialization_error(text) |
|
|
|
|
|
|
|
|
def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal["html", "xhtml"]) -> None: |
|
|
tag = elem.tag |
|
|
text = elem.text |
|
|
if tag is Comment: |
|
|
write("<!--%s-->" % _escape_cdata(text)) |
|
|
elif tag is ProcessingInstruction: |
|
|
write("<?%s?>" % _escape_cdata(text)) |
|
|
elif tag is None: |
|
|
if text: |
|
|
write(_escape_cdata(text)) |
|
|
for e in elem: |
|
|
_serialize_html(write, e, format) |
|
|
else: |
|
|
namespace_uri = None |
|
|
if isinstance(tag, QName): |
|
|
|
|
|
if tag.text[:1] == "{": |
|
|
namespace_uri, tag = tag.text[1:].split("}", 1) |
|
|
else: |
|
|
raise ValueError('QName objects must define a tag.') |
|
|
write("<" + tag) |
|
|
items = elem.items() |
|
|
if items: |
|
|
items = sorted(items) |
|
|
for k, v in items: |
|
|
if isinstance(k, QName): |
|
|
|
|
|
k = k.text |
|
|
if isinstance(v, QName): |
|
|
|
|
|
v = v.text |
|
|
else: |
|
|
v = _escape_attrib_html(v) |
|
|
if k == v and format == 'html': |
|
|
|
|
|
write(" %s" % v) |
|
|
else: |
|
|
write(' {}="{}"'.format(k, v)) |
|
|
if namespace_uri: |
|
|
write(' xmlns="%s"' % (_escape_attrib(namespace_uri))) |
|
|
if format == "xhtml" and tag.lower() in HTML_EMPTY: |
|
|
write(" />") |
|
|
else: |
|
|
write(">") |
|
|
if text: |
|
|
if tag.lower() in ["script", "style"]: |
|
|
write(text) |
|
|
else: |
|
|
write(_escape_cdata(text)) |
|
|
for e in elem: |
|
|
_serialize_html(write, e, format) |
|
|
if tag.lower() not in HTML_EMPTY: |
|
|
write("</" + tag + ">") |
|
|
if elem.tail: |
|
|
write(_escape_cdata(elem.tail)) |
|
|
|
|
|
|
|
|
def _write_html(root: Element, format: Literal["html", "xhtml"] = "html") -> str: |
|
|
assert root is not None |
|
|
data: list[str] = [] |
|
|
write = data.append |
|
|
_serialize_html(write, root, format) |
|
|
return "".join(data) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def to_html_string(element: Element) -> str: |
|
|
""" Serialize element and its children to a string of HTML5. """ |
|
|
return _write_html(ElementTree(element).getroot(), format="html") |
|
|
|
|
|
|
|
|
def to_xhtml_string(element: Element) -> str: |
|
|
""" Serialize element and its children to a string of XHTML. """ |
|
|
return _write_html(ElementTree(element).getroot(), format="xhtml") |
|
|
|