File size: 2,381 Bytes
37eaffd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from tei_annotator.models.schema import TEIAttribute, TEIElement, TEISchema
from tei_annotator.models.spans import ResolvedSpan
from tei_annotator.postprocessing.validator import validate_spans

SOURCE = "He met John Smith."


def _schema():
    return TEISchema(
        elements=[
            TEIElement(
                tag="persName",
                description="a person's name",
                attributes=[
                    TEIAttribute(name="ref", description="reference URI"),
                    TEIAttribute(
                        name="cert",
                        description="certainty",
                        allowed_values=["high", "low"],
                    ),
                ],
            )
        ]
    )


def _span(element, start, end, attrs=None):
    return ResolvedSpan(element=element, start=start, end=end, attrs=attrs or {})


# SOURCE: "He met John Smith."
# positions: H=0 e=1 ' '=2 m=3 e=4 t=5 ' '=6 J=7 o=8 h=9 n=10 ' '=11 S=12 m=13 i=14 t=15 h=16 .=17
# "John Smith" => [7:17]


def test_valid_span_passes():
    result = validate_spans([_span("persName", 7, 17)], _schema(), SOURCE)
    assert len(result) == 1


def test_unknown_element_rejected():
    result = validate_spans([_span("orgName", 7, 17)], _schema(), SOURCE)
    assert len(result) == 0


def test_unknown_attribute_rejected():
    result = validate_spans(
        [_span("persName", 7, 17, {"unknown_attr": "val"})], _schema(), SOURCE
    )
    assert len(result) == 0


def test_invalid_attribute_value_rejected():
    result = validate_spans(
        [_span("persName", 7, 17, {"cert": "medium"})], _schema(), SOURCE
    )
    assert len(result) == 0


def test_valid_constrained_attribute_passes():
    result = validate_spans(
        [_span("persName", 7, 17, {"cert": "high"})], _schema(), SOURCE
    )
    assert len(result) == 1


def test_free_string_attribute_passes():
    result = validate_spans(
        [_span("persName", 7, 17, {"ref": "http://example.com/p/1"})], _schema(), SOURCE
    )
    assert len(result) == 1


def test_out_of_bounds_span_rejected():
    result = validate_spans([_span("persName", -1, 5)], _schema(), SOURCE)
    assert len(result) == 0
    result2 = validate_spans([_span("persName", 5, 200)], _schema(), SOURCE)
    assert len(result2) == 0


def test_empty_span_list():
    assert validate_spans([], _schema(), SOURCE) == []