|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
This module contains various contacts, classes and functions which get referenced and used |
|
|
throughout the code base. |
|
|
""" |
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
import re |
|
|
import sys |
|
|
import warnings |
|
|
from functools import wraps, lru_cache |
|
|
from itertools import count |
|
|
from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from markdown import Markdown |
|
|
import xml.etree.ElementTree as etree |
|
|
|
|
|
_T = TypeVar('_T') |
|
|
|
|
|
|
|
|
""" |
|
|
Constants you might want to modify |
|
|
----------------------------------------------------------------------------- |
|
|
""" |
|
|
|
|
|
|
|
|
BLOCK_LEVEL_ELEMENTS: list[str] = [ |
|
|
|
|
|
|
|
|
'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', |
|
|
'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', |
|
|
'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', |
|
|
'p', 'pre', 'section', 'table', 'ul', |
|
|
|
|
|
'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend', |
|
|
'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', |
|
|
'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video', |
|
|
'center' |
|
|
] |
|
|
""" |
|
|
List of HTML tags which get treated as block-level elements. Same as the `block_level_elements` |
|
|
attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the |
|
|
attribute on the class. This remains for compatibility with older extensions. |
|
|
""" |
|
|
|
|
|
|
|
|
STX = '\u0002' |
|
|
""" "Start of Text" marker for placeholder templates. """ |
|
|
ETX = '\u0003' |
|
|
""" "End of Text" marker for placeholder templates. """ |
|
|
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" |
|
|
""" Prefix for inline placeholder template. """ |
|
|
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX |
|
|
""" Placeholder template for stashed inline text. """ |
|
|
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') |
|
|
""" Regular Expression which matches inline placeholders. """ |
|
|
AMP_SUBSTITUTE = STX+"amp"+ETX |
|
|
""" Placeholder template for HTML entities. """ |
|
|
HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX |
|
|
""" Placeholder template for raw HTML. """ |
|
|
HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') |
|
|
""" Regular expression which matches HTML placeholders. """ |
|
|
TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX |
|
|
""" Placeholder template for tags. """ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RTL_BIDI_RANGES = ( |
|
|
('\u0590', '\u07FF'), |
|
|
|
|
|
|
|
|
|
|
|
('\u2D30', '\u2D7F') |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=None) |
|
|
def get_installed_extensions(): |
|
|
""" Return all entry_points in the `markdown.extensions` group. """ |
|
|
if sys.version_info >= (3, 10): |
|
|
from importlib import metadata |
|
|
else: |
|
|
import importlib_metadata as metadata |
|
|
|
|
|
return metadata.entry_points(group='markdown.extensions') |
|
|
|
|
|
|
|
|
def deprecated(message: str, stacklevel: int = 2): |
|
|
""" |
|
|
Raise a [`DeprecationWarning`][] when wrapped function/method is called. |
|
|
|
|
|
Usage: |
|
|
|
|
|
```python |
|
|
@deprecated("This method will be removed in version X; use Y instead.") |
|
|
def some_method(): |
|
|
pass |
|
|
``` |
|
|
""" |
|
|
def wrapper(func): |
|
|
@wraps(func) |
|
|
def deprecated_func(*args, **kwargs): |
|
|
warnings.warn( |
|
|
f"'{func.__name__}' is deprecated. {message}", |
|
|
category=DeprecationWarning, |
|
|
stacklevel=stacklevel |
|
|
) |
|
|
return func(*args, **kwargs) |
|
|
return deprecated_func |
|
|
return wrapper |
|
|
|
|
|
|
|
|
def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: |
|
|
"""Parses a string representing a boolean value. If parsing was successful, |
|
|
returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, |
|
|
or `None`. If parsing was not successful, raises `ValueError`, or, if |
|
|
`fail_on_errors=False`, returns `None`.""" |
|
|
if not isinstance(value, str): |
|
|
if preserve_none and value is None: |
|
|
return value |
|
|
return bool(value) |
|
|
elif preserve_none and value.lower() == 'none': |
|
|
return None |
|
|
elif value.lower() in ('true', 'yes', 'y', 'on', '1'): |
|
|
return True |
|
|
elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): |
|
|
return False |
|
|
elif fail_on_errors: |
|
|
raise ValueError('Cannot parse bool value: %r' % value) |
|
|
|
|
|
|
|
|
def code_escape(text: str) -> str: |
|
|
"""HTML escape a string of code.""" |
|
|
if "&" in text: |
|
|
text = text.replace("&", "&") |
|
|
if "<" in text: |
|
|
text = text.replace("<", "<") |
|
|
if ">" in text: |
|
|
text = text.replace(">", ">") |
|
|
return text |
|
|
|
|
|
|
|
|
def _get_stack_depth(size: int = 2) -> int: |
|
|
"""Get current stack depth, performantly. |
|
|
""" |
|
|
frame = sys._getframe(size) |
|
|
|
|
|
for size in count(size): |
|
|
frame = frame.f_back |
|
|
if not frame: |
|
|
return size |
|
|
|
|
|
|
|
|
def nearing_recursion_limit() -> bool: |
|
|
"""Return true if current stack depth is within 100 of maximum limit.""" |
|
|
return sys.getrecursionlimit() - _get_stack_depth() < 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AtomicString(str): |
|
|
"""A string which should not be further processed.""" |
|
|
pass |
|
|
|
|
|
|
|
|
class Processor: |
|
|
""" The base class for all processors. |
|
|
|
|
|
Attributes: |
|
|
Processor.md: The `Markdown` instance passed in an initialization. |
|
|
|
|
|
Arguments: |
|
|
md: The `Markdown` instance this processor is a part of. |
|
|
|
|
|
""" |
|
|
def __init__(self, md: Markdown | None = None): |
|
|
self.md = md |
|
|
|
|
|
|
|
|
if TYPE_CHECKING: |
|
|
class TagData(TypedDict): |
|
|
tag: str |
|
|
attrs: dict[str, str] |
|
|
left_index: int |
|
|
right_index: int |
|
|
|
|
|
|
|
|
class HtmlStash: |
|
|
""" |
|
|
This class is used for stashing HTML objects that we extract |
|
|
in the beginning and replace with place-holders. |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
""" Create an `HtmlStash`. """ |
|
|
self.html_counter = 0 |
|
|
self.rawHtmlBlocks: list[str | etree.Element] = [] |
|
|
self.tag_counter = 0 |
|
|
self.tag_data: list[TagData] = [] |
|
|
|
|
|
def store(self, html: str | etree.Element) -> str: |
|
|
""" |
|
|
Saves an HTML segment for later reinsertion. Returns a |
|
|
placeholder string that needs to be inserted into the |
|
|
document. |
|
|
|
|
|
Keyword arguments: |
|
|
html: An html segment. |
|
|
|
|
|
Returns: |
|
|
A placeholder string. |
|
|
|
|
|
""" |
|
|
self.rawHtmlBlocks.append(html) |
|
|
placeholder = self.get_placeholder(self.html_counter) |
|
|
self.html_counter += 1 |
|
|
return placeholder |
|
|
|
|
|
def reset(self) -> None: |
|
|
""" Clear the stash. """ |
|
|
self.html_counter = 0 |
|
|
self.rawHtmlBlocks = [] |
|
|
|
|
|
def get_placeholder(self, key: int) -> str: |
|
|
return HTML_PLACEHOLDER % key |
|
|
|
|
|
def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str: |
|
|
"""Store tag data and return a placeholder.""" |
|
|
self.tag_data.append({'tag': tag, 'attrs': attrs, |
|
|
'left_index': left_index, |
|
|
'right_index': right_index}) |
|
|
placeholder = TAG_PLACEHOLDER % str(self.tag_counter) |
|
|
self.tag_counter += 1 |
|
|
return placeholder |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _PriorityItem(NamedTuple): |
|
|
name: str |
|
|
priority: float |
|
|
|
|
|
|
|
|
class Registry(Generic[_T]): |
|
|
""" |
|
|
A priority sorted registry. |
|
|
|
|
|
A `Registry` instance provides two public methods to alter the data of the |
|
|
registry: `register` and `deregister`. Use `register` to add items and |
|
|
`deregister` to remove items. See each method for specifics. |
|
|
|
|
|
When registering an item, a "name" and a "priority" must be provided. All |
|
|
items are automatically sorted by "priority" from highest to lowest. The |
|
|
"name" is used to remove ("deregister") and get items. |
|
|
|
|
|
A `Registry` instance it like a list (which maintains order) when reading |
|
|
data. You may iterate over the items, get an item and get a count (length) |
|
|
of all items. You may also check that the registry contains an item. |
|
|
|
|
|
When getting an item you may use either the index of the item or the |
|
|
string-based "name". For example: |
|
|
|
|
|
registry = Registry() |
|
|
registry.register(SomeItem(), 'itemname', 20) |
|
|
# Get the item by index |
|
|
item = registry[0] |
|
|
# Get the item by name |
|
|
item = registry['itemname'] |
|
|
|
|
|
When checking that the registry contains an item, you may use either the |
|
|
string-based "name", or a reference to the actual item. For example: |
|
|
|
|
|
someitem = SomeItem() |
|
|
registry.register(someitem, 'itemname', 20) |
|
|
# Contains the name |
|
|
assert 'itemname' in registry |
|
|
# Contains the item instance |
|
|
assert someitem in registry |
|
|
|
|
|
The method `get_index_for_name` is also available to obtain the index of |
|
|
an item using that item's assigned "name". |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
self._data: dict[str, _T] = {} |
|
|
self._priority: list[_PriorityItem] = [] |
|
|
self._is_sorted = False |
|
|
|
|
|
def __contains__(self, item: str | _T) -> bool: |
|
|
if isinstance(item, str): |
|
|
|
|
|
return item in self._data.keys() |
|
|
|
|
|
return item in self._data.values() |
|
|
|
|
|
def __iter__(self) -> Iterator[_T]: |
|
|
self._sort() |
|
|
return iter([self._data[k] for k, p in self._priority]) |
|
|
|
|
|
@overload |
|
|
def __getitem__(self, key: str | int) -> _T: |
|
|
... |
|
|
|
|
|
@overload |
|
|
def __getitem__(self, key: slice) -> Registry[_T]: |
|
|
... |
|
|
|
|
|
def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]: |
|
|
self._sort() |
|
|
if isinstance(key, slice): |
|
|
data: Registry[_T] = Registry() |
|
|
for k, p in self._priority[key]: |
|
|
data.register(self._data[k], k, p) |
|
|
return data |
|
|
if isinstance(key, int): |
|
|
return self._data[self._priority[key].name] |
|
|
return self._data[key] |
|
|
|
|
|
def __len__(self) -> int: |
|
|
return len(self._priority) |
|
|
|
|
|
def __repr__(self): |
|
|
return '<{}({})>'.format(self.__class__.__name__, list(self)) |
|
|
|
|
|
def get_index_for_name(self, name: str) -> int: |
|
|
""" |
|
|
Return the index of the given name. |
|
|
""" |
|
|
if name in self: |
|
|
self._sort() |
|
|
return self._priority.index( |
|
|
[x for x in self._priority if x.name == name][0] |
|
|
) |
|
|
raise ValueError('No item named "{}" exists.'.format(name)) |
|
|
|
|
|
def register(self, item: _T, name: str, priority: float) -> None: |
|
|
""" |
|
|
Add an item to the registry with the given name and priority. |
|
|
|
|
|
Arguments: |
|
|
item: The item being registered. |
|
|
name: A string used to reference the item. |
|
|
priority: An integer or float used to sort against all items. |
|
|
|
|
|
If an item is registered with a "name" which already exists, the |
|
|
existing item is replaced with the new item. Treat carefully as the |
|
|
old item is lost with no way to recover it. The new item will be |
|
|
sorted according to its priority and will **not** retain the position |
|
|
of the old item. |
|
|
""" |
|
|
if name in self: |
|
|
|
|
|
self.deregister(name) |
|
|
self._is_sorted = False |
|
|
self._data[name] = item |
|
|
self._priority.append(_PriorityItem(name, priority)) |
|
|
|
|
|
def deregister(self, name: str, strict: bool = True) -> None: |
|
|
""" |
|
|
Remove an item from the registry. |
|
|
|
|
|
Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`. |
|
|
""" |
|
|
try: |
|
|
index = self.get_index_for_name(name) |
|
|
del self._priority[index] |
|
|
del self._data[name] |
|
|
except ValueError: |
|
|
if strict: |
|
|
raise |
|
|
|
|
|
def _sort(self) -> None: |
|
|
""" |
|
|
Sort the registry by priority from highest to lowest. |
|
|
|
|
|
This method is called internally and should never be explicitly called. |
|
|
""" |
|
|
if not self._is_sorted: |
|
|
self._priority.sort(key=lambda item: item.priority, reverse=True) |
|
|
self._is_sorted = True |
|
|
|