Upload 38004 files

1f5470c verified 9 months ago

7.14 kB

	# Abbreviation Extension for Python-Markdown
	# ==========================================

	# This extension adds abbreviation handling to Python-Markdown.

	# See https://Python-Markdown.github.io/extensions/abbreviations
	# for documentation.

	# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/)
	# and [Seemant Kulleen](http://www.kulleen.org/)

	# All changes Copyright 2008-2014 The Python Markdown Project

	# License: [BSD](https://opensource.org/licenses/bsd-license.php)

	"""
	This extension adds abbreviation handling to Python-Markdown.

	See the [documentation](https://Python-Markdown.github.io/extensions/abbreviations)
	for details.
	"""

	from __future__ import annotations

	from . import Extension
	from ..blockprocessors import BlockProcessor
	from ..inlinepatterns import InlineProcessor
	from ..treeprocessors import Treeprocessor
	from ..util import AtomicString, deprecated
	from typing import TYPE_CHECKING
	import re
	import xml.etree.ElementTree as etree

	if TYPE_CHECKING: # pragma: no cover
	from .. import Markdown
	from ..blockparser import BlockParser


	class AbbrExtension(Extension):
	""" Abbreviation Extension for Python-Markdown. """

	def __init__(self, **kwargs):
	""" Initiate Extension and set up configs. """
	self.config = {
	'glossary': [
	{},
	'A dictionary where the `key` is the abbreviation and the `value` is the definition.'
	"Default: `{}`"
	],
	}
	""" Default configuration options. """
	super().__init__(**kwargs)
	self.abbrs = {}
	self.glossary = {}

	def reset(self):
	""" Clear all previously defined abbreviations. """
	self.abbrs.clear()
	if (self.glossary):
	self.abbrs.update(self.glossary)

	def reset_glossary(self):
	""" Clear all abbreviations from the glossary. """
	self.glossary.clear()

	def load_glossary(self, dictionary: dict[str, str]):
	"""Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten."""
	if dictionary:
	self.glossary = {dictionary, self.glossary}

	def extendMarkdown(self, md):
	""" Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
	if (self.config['glossary'][0]):
	self.load_glossary(self.config['glossary'][0])
	self.abbrs.update(self.glossary)
	md.registerExtension(self)
	md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
	md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16)


	class AbbrTreeprocessor(Treeprocessor):
	""" Replace abbreviation text with `<abbr>` elements. """

	def __init__(self, md: Markdown \| None = None, abbrs: dict \| None = None):
	self.abbrs: dict = abbrs if abbrs is not None else {}
	self.RE: re.RegexObject \| None = None
	super().__init__(md)

	def create_element(self, title: str, text: str, tail: str) -> etree.Element:
	''' Create an `abbr` element. '''
	abbr = etree.Element('abbr', {'title': title})
	abbr.text = AtomicString(text)
	abbr.tail = tail
	return abbr

	def iter_element(self, el: etree.Element, parent: etree.Element \| None = None) -> None:
	''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
	for child in reversed(el):
	self.iter_element(child, el)
	if text := el.text:
	if not isinstance(text, AtomicString):
	for m in reversed(list(self.RE.finditer(text))):
	if self.abbrs[m.group(0)]:
	abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), text[m.end():])
	el.insert(0, abbr)
	text = text[:m.start()]
	el.text = text
	if parent is not None and el.tail:
	tail = el.tail
	index = list(parent).index(el) + 1
	if not isinstance(tail, AtomicString):
	for m in reversed(list(self.RE.finditer(tail))):
	abbr = self.create_element(self.abbrs[m.group(0)], m.group(0), tail[m.end():])
	parent.insert(index, abbr)
	tail = tail[:m.start()]
	el.tail = tail

	def run(self, root: etree.Element) -> etree.Element \| None:
	''' Step through tree to find known abbreviations. '''
	if not self.abbrs:
	# No abbreviations defined. Skip running processor.
	return
	# Build and compile regex
	abbr_list = list(self.abbrs.keys())
	abbr_list.sort(key=len, reverse=True)
	self.RE = re.compile(f"\\b(?:{ '\|'.join(re.escape(key) for key in abbr_list) })\\b")
	# Step through tree and modify on matches
	self.iter_element(root)


	class AbbrBlockprocessor(BlockProcessor):
	""" Parse text for abbreviation references. """

	RE = re.compile(r'^[]\[(?P<abbr>[^\\]?)\][ ]?:[ ]\n?[ ](?P<title>.*)$', re.MULTILINE)

	def __init__(self, parser: BlockParser, abbrs: dict):
	self.abbrs: dict = abbrs
	super().__init__(parser)

	def test(self, parent: etree.Element, block: str) -> bool:
	return True

	def run(self, parent: etree.Element, blocks: list[str]) -> bool:
	"""
	Find and remove all abbreviation references from the text.
	Each reference is added to the abbreviation collection.

	"""
	block = blocks.pop(0)
	m = self.RE.search(block)
	if m:
	abbr = m.group('abbr').strip()
	title = m.group('title').strip()
	if title and abbr:
	if title == "''" or title == '""':
	self.abbrs.pop(abbr)
	else:
	self.abbrs[abbr] = title
	if block[m.end():].strip():
	# Add any content after match back to blocks as separate block
	blocks.insert(0, block[m.end():].lstrip('\n'))
	if block[:m.start()].strip():
	# Add any content before match back to blocks as separate block
	blocks.insert(0, block[:m.start()].rstrip('\n'))
	return True
	# No match. Restore block.
	blocks.insert(0, block)
	return False


	AbbrPreprocessor = deprecated("This class has been renamed to `AbbrBlockprocessor`.")(AbbrBlockprocessor)


	@deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.")
	class AbbrInlineProcessor(InlineProcessor):
	""" Abbreviation inline pattern. """

	def __init__(self, pattern: str, title: str):
	super().__init__(pattern)
	self.title = title

	def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
	abbr = etree.Element('abbr')
	abbr.text = AtomicString(m.group('abbr'))
	abbr.set('title', self.title)
	return abbr, m.start(0), m.end(0)


	def makeExtension(**kwargs): # pragma: no cover
	return AbbrExtension(**kwargs)