tei-annotator / tests /test_resolver.py
cmboulanger's picture
full implementation
37eaffd
import pytest
from tei_annotator.models.spans import SpanDescriptor
from tei_annotator.postprocessing.resolver import resolve_spans
SOURCE = "He said John Smith yesterday, and John Smith agreed."
def _span(element, text, context, attrs=None):
return SpanDescriptor(element=element, text=text, context=context, attrs=attrs or {})
def test_exact_context_match():
span = _span("persName", "John Smith", "said John Smith yesterday")
resolved = resolve_spans(SOURCE, [span])
assert len(resolved) == 1
rs = resolved[0]
assert rs.start == SOURCE.index("John Smith")
assert rs.end == rs.start + len("John Smith")
assert not rs.fuzzy_match
def test_context_not_found_rejected():
span = _span("persName", "John Smith", "this context does not exist xyz987")
assert resolve_spans(SOURCE, [span]) == []
def test_text_not_in_context_window_rejected():
span = _span("persName", "Jane Doe", "said John Smith yesterday")
assert resolve_spans(SOURCE, [span]) == []
def test_source_slice_verified():
span = _span("persName", "John Smith", "said John Smith yesterday")
resolved = resolve_spans(SOURCE, [span])
assert len(resolved) == 1
rs = resolved[0]
assert SOURCE[rs.start : rs.end] == "John Smith"
def test_attrs_preserved():
span = _span("persName", "John Smith", "said John Smith yesterday", {"ref": "#js"})
resolved = resolve_spans(SOURCE, [span])
assert len(resolved) == 1
assert resolved[0].attrs == {"ref": "#js"}
def test_multiple_spans_resolved():
spans = [
_span("persName", "John Smith", "He said John Smith yesterday"),
_span("persName", "John Smith", "and John Smith agreed"),
]
resolved = resolve_spans(SOURCE, spans)
assert len(resolved) == 2
assert resolved[0].start != resolved[1].start
def test_empty_span_list():
assert resolve_spans(SOURCE, []) == []
def test_children_start_empty():
span = _span("persName", "John Smith", "said John Smith yesterday")
resolved = resolve_spans(SOURCE, [span])
assert resolved[0].children == []