Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from ..models.schema import TEISchema | |
| from ..models.spans import ResolvedSpan | |
| def validate_spans( | |
| spans: list[ResolvedSpan], | |
| schema: TEISchema, | |
| source: str, | |
| ) -> list[ResolvedSpan]: | |
| """ | |
| Filter out spans that fail schema validation. | |
| Rejected when: | |
| - element is not in the schema | |
| - an attribute name is not listed for that element | |
| - an attribute value is not in the element's allowed_values (when constrained) | |
| - span bounds are out of range | |
| """ | |
| valid: list[ResolvedSpan] = [] | |
| for span in spans: | |
| # Bounds sanity check | |
| if span.start < 0 or span.end > len(source) or span.start >= span.end: | |
| continue | |
| elem = schema.get(span.element) | |
| if elem is None: | |
| continue # element not in schema | |
| allowed_names = {a.name for a in elem.attributes} | |
| attr_ok = True | |
| for attr_name, attr_value in span.attrs.items(): | |
| if attr_name not in allowed_names: | |
| attr_ok = False | |
| break | |
| attr_def = next((a for a in elem.attributes if a.name == attr_name), None) | |
| if attr_def and attr_def.allowed_values is not None: | |
| if attr_value not in attr_def.allowed_values: | |
| attr_ok = False | |
| break | |
| if not attr_ok: | |
| continue | |
| valid.append(span) | |
| return valid | |