import pytest from tei_annotator.models.spans import SpanDescriptor from tei_annotator.postprocessing.resolver import resolve_spans SOURCE = "He said John Smith yesterday, and John Smith agreed." def _span(element, text, context, attrs=None): return SpanDescriptor(element=element, text=text, context=context, attrs=attrs or {}) def test_exact_context_match(): span = _span("persName", "John Smith", "said John Smith yesterday") resolved = resolve_spans(SOURCE, [span]) assert len(resolved) == 1 rs = resolved[0] assert rs.start == SOURCE.index("John Smith") assert rs.end == rs.start + len("John Smith") assert not rs.fuzzy_match def test_context_not_found_rejected(): span = _span("persName", "John Smith", "this context does not exist xyz987") assert resolve_spans(SOURCE, [span]) == [] def test_text_not_in_context_window_rejected(): span = _span("persName", "Jane Doe", "said John Smith yesterday") assert resolve_spans(SOURCE, [span]) == [] def test_source_slice_verified(): span = _span("persName", "John Smith", "said John Smith yesterday") resolved = resolve_spans(SOURCE, [span]) assert len(resolved) == 1 rs = resolved[0] assert SOURCE[rs.start : rs.end] == "John Smith" def test_attrs_preserved(): span = _span("persName", "John Smith", "said John Smith yesterday", {"ref": "#js"}) resolved = resolve_spans(SOURCE, [span]) assert len(resolved) == 1 assert resolved[0].attrs == {"ref": "#js"} def test_multiple_spans_resolved(): spans = [ _span("persName", "John Smith", "He said John Smith yesterday"), _span("persName", "John Smith", "and John Smith agreed"), ] resolved = resolve_spans(SOURCE, spans) assert len(resolved) == 2 assert resolved[0].start != resolved[1].start def test_empty_span_list(): assert resolve_spans(SOURCE, []) == [] def test_children_start_empty(): span = _span("persName", "John Smith", "said John Smith yesterday") resolved = resolve_spans(SOURCE, [span]) assert resolved[0].children == []