| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| |
|
| | |
| |
|
| | """ |
| | This extension adds abbreviation handling to Python-Markdown. |
| | |
| | See the [documentation](https://Python-Markdown.github.io/extensions/abbreviations) |
| | for details. |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | from . import Extension |
| | from ..blockprocessors import BlockProcessor |
| | from ..inlinepatterns import InlineProcessor |
| | from ..treeprocessors import Treeprocessor |
| | from ..util import AtomicString, deprecated |
| | from typing import TYPE_CHECKING |
| | import re |
| | import xml.etree.ElementTree as etree |
| |
|
| | if TYPE_CHECKING: |
| | from .. import Markdown |
| | from ..blockparser import BlockParser |
| |
|
| |
|
| | class AbbrExtension(Extension): |
| | """ Abbreviation Extension for Python-Markdown. """ |
| |
|
| | def __init__(self, **kwargs): |
| | """ Initiate Extension and set up configs. """ |
| | self.config = { |
| | 'glossary': [ |
| | {}, |
| | 'A dictionary where the `key` is the abbreviation and the `value` is the definition.' |
| | "Default: `{}`" |
| | ], |
| | } |
| | """ Default configuration options. """ |
| | super().__init__(**kwargs) |
| | self.abbrs = {} |
| | self.glossary = {} |
| |
|
| | def reset(self): |
| | """ Clear all previously defined abbreviations. """ |
| | self.abbrs.clear() |
| | if (self.glossary): |
| | self.abbrs.update(self.glossary) |
| |
|
| | def reset_glossary(self): |
| | """ Clear all abbreviations from the glossary. """ |
| | self.glossary.clear() |
| |
|
| | def load_glossary(self, dictionary: dict[str, str]): |
| | """Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten.""" |
| | if dictionary: |
| | self.glossary = {**dictionary, **self.glossary} |
| |
|
| | def extendMarkdown(self, md): |
| | """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """ |
| | if (self.config['glossary'][0]): |
| | self.load_glossary(self.config['glossary'][0]) |
| | self.abbrs.update(self.glossary) |
| | md.registerExtension(self) |
| | md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7) |
| | md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16) |
| |
|
| |
|
| | class AbbrTreeprocessor(Treeprocessor): |
| | """ Replace abbreviation text with `<abbr>` elements. """ |
| |
|
| | def __init__(self, md: Markdown | None = None, abbrs: dict | None = None): |
| | self.abbrs: dict = abbrs if abbrs is not None else {} |
| | self.RE: re.RegexObject | None = None |
| | super().__init__(md) |
| |
|
| | def create_element(self, title: str, text: str, tail: str) -> etree.Element: |
| | ''' Create an `abbr` element. ''' |
| | abbr = etree.Element('abbr', {'title': title}) |
| | abbr.text = AtomicString(text) |
| | abbr.tail = tail |
| | return abbr |
| |
|
| | def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -> None: |
| | ''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. ''' |
| | for child in reversed(el): |
| | self.iter_element(child, el) |
| | if text := el.text: |
| | if not isinstance(text, AtomicString): |
| | for m in reversed(list(self.RE.finditer(text))): |
| | if self.abbrs[m.group(0)]: |
| | abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), text[m.end():]) |
| | el.insert(0, abbr) |
| | text = text[:m.start()] |
| | el.text = text |
| | if parent is not None and el.tail: |
| | tail = el.tail |
| | index = list(parent).index(el) + 1 |
| | if not isinstance(tail, AtomicString): |
| | for m in reversed(list(self.RE.finditer(tail))): |
| | abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), tail[m.end():]) |
| | parent.insert(index, abbr) |
| | tail = tail[:m.start()] |
| | el.tail = tail |
| |
|
| | def run(self, root: etree.Element) -> etree.Element | None: |
| | ''' Step through tree to find known abbreviations. ''' |
| | if not self.abbrs: |
| | |
| | return |
| | |
| | abbr_list = list(self.abbrs.keys()) |
| | abbr_list.sort(key=len, reverse=True) |
| | self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in abbr_list) })\\b") |
| | |
| | self.iter_element(root) |
| |
|
| |
|
| | class AbbrBlockprocessor(BlockProcessor): |
| | """ Parse text for abbreviation references. """ |
| |
|
| | RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE) |
| |
|
| | def __init__(self, parser: BlockParser, abbrs: dict): |
| | self.abbrs: dict = abbrs |
| | super().__init__(parser) |
| |
|
| | def test(self, parent: etree.Element, block: str) -> bool: |
| | return True |
| |
|
| | def run(self, parent: etree.Element, blocks: list[str]) -> bool: |
| | """ |
| | Find and remove all abbreviation references from the text. |
| | Each reference is added to the abbreviation collection. |
| | |
| | """ |
| | block = blocks.pop(0) |
| | m = self.RE.search(block) |
| | if m: |
| | abbr = m.group('abbr').strip() |
| | title = m.group('title').strip() |
| | if title and abbr: |
| | if title == "''" or title == '""': |
| | self.abbrs.pop(abbr) |
| | else: |
| | self.abbrs[abbr] = title |
| | if block[m.end():].strip(): |
| | |
| | blocks.insert(0, block[m.end():].lstrip('\n')) |
| | if block[:m.start()].strip(): |
| | |
| | blocks.insert(0, block[:m.start()].rstrip('\n')) |
| | return True |
| | |
| | blocks.insert(0, block) |
| | return False |
| |
|
| |
|
| | AbbrPreprocessor = deprecated("This class has been renamed to `AbbrBlockprocessor`.")(AbbrBlockprocessor) |
| |
|
| |
|
| | @deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.") |
| | class AbbrInlineProcessor(InlineProcessor): |
| | """ Abbreviation inline pattern. """ |
| |
|
| | def __init__(self, pattern: str, title: str): |
| | super().__init__(pattern) |
| | self.title = title |
| |
|
| | def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: |
| | abbr = etree.Element('abbr') |
| | abbr.text = AtomicString(m.group('abbr')) |
| | abbr.set('title', self.title) |
| | return abbr, m.start(0), m.end(0) |
| |
|
| |
|
| | def makeExtension(**kwargs): |
| | return AbbrExtension(**kwargs) |
| |
|