| | """Shim module exporting the same ElementTree API for lxml and |
| | xml.etree backends. |
| | |
| | When lxml is installed, it is automatically preferred over the built-in |
| | xml.etree module. |
| | On Python 2.7, the cElementTree module is preferred over the pure-python |
| | ElementTree module. |
| | |
| | Besides exporting a unified interface, this also defines extra functions |
| | or subclasses built-in ElementTree classes to add features that are |
| | only availble in lxml, like OrderedDict for attributes, pretty_print and |
| | iterwalk. |
| | """ |
| |
|
| | from fontTools.misc.textTools import tostr |
| |
|
| |
|
| | XML_DECLARATION = """<?xml version='1.0' encoding='%s'?>""" |
| |
|
| | __all__ = [ |
| | |
| | "Comment", |
| | "dump", |
| | "Element", |
| | "ElementTree", |
| | "fromstring", |
| | "fromstringlist", |
| | "iselement", |
| | "iterparse", |
| | "parse", |
| | "ParseError", |
| | "PI", |
| | "ProcessingInstruction", |
| | "QName", |
| | "SubElement", |
| | "tostring", |
| | "tostringlist", |
| | "TreeBuilder", |
| | "XML", |
| | "XMLParser", |
| | "register_namespace", |
| | ] |
| |
|
| | try: |
| | from lxml.etree import * |
| |
|
| | _have_lxml = True |
| | except ImportError: |
| | try: |
| | from xml.etree.cElementTree import * |
| |
|
| | |
| | |
| | from xml.etree.ElementTree import XML |
| | except ImportError: |
| | from xml.etree.ElementTree import * |
| | _have_lxml = False |
| |
|
| | import sys |
| |
|
| | |
| | PY36 = sys.version_info >= (3, 6) |
| | try: |
| | import __pypy__ |
| | except ImportError: |
| | __pypy__ = None |
| | _dict_is_ordered = bool(PY36 or __pypy__) |
| | del PY36, __pypy__ |
| |
|
| | if _dict_is_ordered: |
| | _Attrib = dict |
| | else: |
| | from collections import OrderedDict as _Attrib |
| |
|
| | if isinstance(Element, type): |
| | _Element = Element |
| | else: |
| | |
| | |
| | from xml.etree.ElementTree import Element as _Element |
| |
|
| | class Element(_Element): |
| | """Element subclass that keeps the order of attributes.""" |
| |
|
| | def __init__(self, tag, attrib=_Attrib(), **extra): |
| | super(Element, self).__init__(tag) |
| | self.attrib = _Attrib() |
| | if attrib: |
| | self.attrib.update(attrib) |
| | if extra: |
| | self.attrib.update(extra) |
| |
|
| | def SubElement(parent, tag, attrib=_Attrib(), **extra): |
| | """Must override SubElement as well otherwise _elementtree.SubElement |
| | fails if 'parent' is a subclass of Element object. |
| | """ |
| | element = parent.__class__(tag, attrib, **extra) |
| | parent.append(element) |
| | return element |
| |
|
| | def _iterwalk(element, events, tag): |
| | include = tag is None or element.tag == tag |
| | if include and "start" in events: |
| | yield ("start", element) |
| | for e in element: |
| | for item in _iterwalk(e, events, tag): |
| | yield item |
| | if include: |
| | yield ("end", element) |
| |
|
| | def iterwalk(element_or_tree, events=("end",), tag=None): |
| | """A tree walker that generates events from an existing tree as |
| | if it was parsing XML data with iterparse(). |
| | Drop-in replacement for lxml.etree.iterwalk. |
| | """ |
| | if iselement(element_or_tree): |
| | element = element_or_tree |
| | else: |
| | element = element_or_tree.getroot() |
| | if tag == "*": |
| | tag = None |
| | for item in _iterwalk(element, events, tag): |
| | yield item |
| |
|
| | _ElementTree = ElementTree |
| |
|
| | class ElementTree(_ElementTree): |
| | """ElementTree subclass that adds 'pretty_print' and 'doctype' |
| | arguments to the 'write' method. |
| | Currently these are only supported for the default XML serialization |
| | 'method', and not also for "html" or "text", for these are delegated |
| | to the base class. |
| | """ |
| |
|
| | def write( |
| | self, |
| | file_or_filename, |
| | encoding=None, |
| | xml_declaration=False, |
| | method=None, |
| | doctype=None, |
| | pretty_print=False, |
| | ): |
| | if method and method != "xml": |
| | |
| | super(ElementTree, self).write( |
| | file_or_filename, |
| | encoding=encoding, |
| | xml_declaration=xml_declaration, |
| | method=method, |
| | ) |
| | return |
| |
|
| | if encoding is not None and encoding.lower() == "unicode": |
| | if xml_declaration: |
| | raise ValueError( |
| | "Serialisation to unicode must not request an XML declaration" |
| | ) |
| | write_declaration = False |
| | encoding = "unicode" |
| | elif xml_declaration is None: |
| | |
| | write_declaration = encoding is not None and encoding.upper() not in ( |
| | "ASCII", |
| | "UTF-8", |
| | "UTF8", |
| | "US-ASCII", |
| | ) |
| | else: |
| | write_declaration = xml_declaration |
| |
|
| | if encoding is None: |
| | encoding = "ASCII" |
| |
|
| | if pretty_print: |
| | |
| | _indent(self._root) |
| |
|
| | with _get_writer(file_or_filename, encoding) as write: |
| | if write_declaration: |
| | write(XML_DECLARATION % encoding.upper()) |
| | if pretty_print: |
| | write("\n") |
| | if doctype: |
| | write(_tounicode(doctype)) |
| | if pretty_print: |
| | write("\n") |
| |
|
| | qnames, namespaces = _namespaces(self._root) |
| | _serialize_xml(write, self._root, qnames, namespaces) |
| |
|
| | import io |
| |
|
| | def tostring( |
| | element, |
| | encoding=None, |
| | xml_declaration=None, |
| | method=None, |
| | doctype=None, |
| | pretty_print=False, |
| | ): |
| | """Custom 'tostring' function that uses our ElementTree subclass, with |
| | pretty_print support. |
| | """ |
| | stream = io.StringIO() if encoding == "unicode" else io.BytesIO() |
| | ElementTree(element).write( |
| | stream, |
| | encoding=encoding, |
| | xml_declaration=xml_declaration, |
| | method=method, |
| | doctype=doctype, |
| | pretty_print=pretty_print, |
| | ) |
| | return stream.getvalue() |
| |
|
| | |
| |
|
| | import re |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | UCS2 = sys.maxunicode < 0x10FFFF |
| | if UCS2: |
| | _invalid_xml_string = re.compile( |
| | "[\u0000-\u0008\u000B-\u000C\u000E-\u001F\uFFFE-\uFFFF]" |
| | ) |
| | else: |
| | _invalid_xml_string = re.compile( |
| | "[\u0000-\u0008\u000B-\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE-\uFFFF]" |
| | ) |
| |
|
| | def _tounicode(s): |
| | """Test if a string is valid user input and decode it to unicode string |
| | using ASCII encoding if it's a bytes string. |
| | Reject all bytes/unicode input that contains non-XML characters. |
| | Reject all bytes input that contains non-ASCII characters. |
| | """ |
| | try: |
| | s = tostr(s, encoding="ascii", errors="strict") |
| | except UnicodeDecodeError: |
| | raise ValueError( |
| | "Bytes strings can only contain ASCII characters. " |
| | "Use unicode strings for non-ASCII characters." |
| | ) |
| | except AttributeError: |
| | _raise_serialization_error(s) |
| | if s and _invalid_xml_string.search(s): |
| | raise ValueError( |
| | "All strings must be XML compatible: Unicode or ASCII, " |
| | "no NULL bytes or control characters" |
| | ) |
| | return s |
| |
|
| | import contextlib |
| |
|
| | @contextlib.contextmanager |
| | def _get_writer(file_or_filename, encoding): |
| | |
| | try: |
| | write = file_or_filename.write |
| | except AttributeError: |
| | |
| | f = open( |
| | file_or_filename, |
| | "w", |
| | encoding="utf-8" if encoding == "unicode" else encoding, |
| | errors="xmlcharrefreplace", |
| | ) |
| | with f: |
| | yield f.write |
| | else: |
| | |
| | |
| | if encoding == "unicode": |
| | |
| | yield write |
| | else: |
| | |
| | detach_buffer = False |
| | if isinstance(file_or_filename, io.BufferedIOBase): |
| | buf = file_or_filename |
| | elif isinstance(file_or_filename, io.RawIOBase): |
| | buf = io.BufferedWriter(file_or_filename) |
| | detach_buffer = True |
| | else: |
| | |
| | |
| | buf = io.BufferedIOBase() |
| | buf.writable = lambda: True |
| | buf.write = write |
| | try: |
| | |
| | |
| | buf.seekable = file_or_filename.seekable |
| | buf.tell = file_or_filename.tell |
| | except AttributeError: |
| | pass |
| | wrapper = io.TextIOWrapper( |
| | buf, |
| | encoding=encoding, |
| | errors="xmlcharrefreplace", |
| | newline="\n", |
| | ) |
| | try: |
| | yield wrapper.write |
| | finally: |
| | |
| | |
| | wrapper.detach() |
| | if detach_buffer: |
| | buf.detach() |
| |
|
| | from xml.etree.ElementTree import _namespace_map |
| |
|
| | def _namespaces(elem): |
| | |
| |
|
| | |
| | qnames = {None: None} |
| |
|
| | |
| | namespaces = {} |
| |
|
| | def add_qname(qname): |
| | |
| | try: |
| | qname = _tounicode(qname) |
| | if qname[:1] == "{": |
| | uri, tag = qname[1:].rsplit("}", 1) |
| | prefix = namespaces.get(uri) |
| | if prefix is None: |
| | prefix = _namespace_map.get(uri) |
| | if prefix is None: |
| | prefix = "ns%d" % len(namespaces) |
| | else: |
| | prefix = _tounicode(prefix) |
| | if prefix != "xml": |
| | namespaces[uri] = prefix |
| | if prefix: |
| | qnames[qname] = "%s:%s" % (prefix, tag) |
| | else: |
| | qnames[qname] = tag |
| | else: |
| | qnames[qname] = qname |
| | except TypeError: |
| | _raise_serialization_error(qname) |
| |
|
| | |
| | for elem in elem.iter(): |
| | tag = elem.tag |
| | if isinstance(tag, QName): |
| | if tag.text not in qnames: |
| | add_qname(tag.text) |
| | elif isinstance(tag, str): |
| | if tag not in qnames: |
| | add_qname(tag) |
| | elif tag is not None and tag is not Comment and tag is not PI: |
| | _raise_serialization_error(tag) |
| | for key, value in elem.items(): |
| | if isinstance(key, QName): |
| | key = key.text |
| | if key not in qnames: |
| | add_qname(key) |
| | if isinstance(value, QName) and value.text not in qnames: |
| | add_qname(value.text) |
| | text = elem.text |
| | if isinstance(text, QName) and text.text not in qnames: |
| | add_qname(text.text) |
| | return qnames, namespaces |
| |
|
| | def _serialize_xml(write, elem, qnames, namespaces, **kwargs): |
| | tag = elem.tag |
| | text = elem.text |
| | if tag is Comment: |
| | write("<!--%s-->" % _tounicode(text)) |
| | elif tag is ProcessingInstruction: |
| | write("<?%s?>" % _tounicode(text)) |
| | else: |
| | tag = qnames[_tounicode(tag) if tag is not None else None] |
| | if tag is None: |
| | if text: |
| | write(_escape_cdata(text)) |
| | for e in elem: |
| | _serialize_xml(write, e, qnames, None) |
| | else: |
| | write("<" + tag) |
| | if namespaces: |
| | for uri, prefix in sorted( |
| | namespaces.items(), key=lambda x: x[1] |
| | ): |
| | if prefix: |
| | prefix = ":" + prefix |
| | write(' xmlns%s="%s"' % (prefix, _escape_attrib(uri))) |
| | attrs = elem.attrib |
| | if attrs: |
| | |
| | if len(attrs) <= 1 or type(attrs) is _Attrib: |
| | items = attrs.items() |
| | else: |
| | |
| | items = sorted(attrs.items()) |
| | for k, v in items: |
| | if isinstance(k, QName): |
| | k = _tounicode(k.text) |
| | else: |
| | k = _tounicode(k) |
| | if isinstance(v, QName): |
| | v = qnames[_tounicode(v.text)] |
| | else: |
| | v = _escape_attrib(v) |
| | write(' %s="%s"' % (qnames[k], v)) |
| | if text is not None or len(elem): |
| | write(">") |
| | if text: |
| | write(_escape_cdata(text)) |
| | for e in elem: |
| | _serialize_xml(write, e, qnames, None) |
| | write("</" + tag + ">") |
| | else: |
| | write("/>") |
| | if elem.tail: |
| | write(_escape_cdata(elem.tail)) |
| |
|
| | def _raise_serialization_error(text): |
| | raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__)) |
| |
|
| | def _escape_cdata(text): |
| | |
| | try: |
| | text = _tounicode(text) |
| | |
| | if "&" in text: |
| | text = text.replace("&", "&") |
| | if "<" in text: |
| | text = text.replace("<", "<") |
| | if ">" in text: |
| | text = text.replace(">", ">") |
| | return text |
| | except (TypeError, AttributeError): |
| | _raise_serialization_error(text) |
| |
|
| | def _escape_attrib(text): |
| | |
| | try: |
| | text = _tounicode(text) |
| | if "&" in text: |
| | text = text.replace("&", "&") |
| | if "<" in text: |
| | text = text.replace("<", "<") |
| | if ">" in text: |
| | text = text.replace(">", ">") |
| | if '"' in text: |
| | text = text.replace('"', """) |
| | if "\n" in text: |
| | text = text.replace("\n", " ") |
| | return text |
| | except (TypeError, AttributeError): |
| | _raise_serialization_error(text) |
| |
|
| | def _indent(elem, level=0): |
| | |
| | i = "\n" + level * " " |
| | if len(elem): |
| | if not elem.text or not elem.text.strip(): |
| | elem.text = i + " " |
| | if not elem.tail or not elem.tail.strip(): |
| | elem.tail = i |
| | for elem in elem: |
| | _indent(elem, level + 1) |
| | if not elem.tail or not elem.tail.strip(): |
| | elem.tail = i |
| | else: |
| | if level and (not elem.tail or not elem.tail.strip()): |
| | elem.tail = i |
| |
|