Picarones / tests /domain /test_documents.py
Claude
test(rename): dé-sprintage tests/domain (7 fichiers, git mv)
ade2cbf unverified
"""Sprint A14-S4 — ``DocumentRef`` et ``GroundTruthRef`` multi-niveaux."""
from __future__ import annotations
import pytest
from picarones.domain import (
ArtifactType,
CorpusSpecError,
DocumentRef,
GroundTruthRef,
)
class TestDocumentRefBasics:
def test_minimal_document(self) -> None:
d = DocumentRef(id="folio_001")
assert d.id == "folio_001"
assert d.image_uri is None
assert d.ground_truths == ()
def test_document_with_image_and_text_gt(self) -> None:
d = DocumentRef(
id="folio_001",
image_uri="/corpus/folio_001.png",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/corpus/folio_001.gt.txt"),
),
)
assert d.image_uri == "/corpus/folio_001.png"
assert len(d.ground_truths) == 1
def test_id_validation_rejects_spaces(self) -> None:
with pytest.raises(CorpusSpecError, match="document id invalide"):
DocumentRef(id="bad id")
class TestMultiLevelGT:
def test_multi_level_gt(self) -> None:
d = DocumentRef(
id="folio_001",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/x.gt.txt"),
GroundTruthRef(type=ArtifactType.ALTO_XML, uri="/x.gt.alto.xml"),
GroundTruthRef(type=ArtifactType.READING_ORDER, uri="/x.ro.json"),
),
)
assert len(d.ground_truths) == 3
assert d.available_gt_types == (
ArtifactType.RAW_TEXT,
ArtifactType.ALTO_XML,
ArtifactType.READING_ORDER,
)
def test_gt_for_returns_matching_level(self) -> None:
d = DocumentRef(
id="x",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/x.txt"),
GroundTruthRef(type=ArtifactType.ALTO_XML, uri="/x.xml"),
),
)
gt = d.gt_for(ArtifactType.ALTO_XML)
assert gt is not None
assert gt.uri == "/x.xml"
def test_gt_for_returns_none_when_absent(self) -> None:
d = DocumentRef(id="x")
assert d.gt_for(ArtifactType.RAW_TEXT) is None
def test_duplicate_gt_type_rejected(self) -> None:
with pytest.raises(CorpusSpecError, match="GT dupliquée"):
DocumentRef(
id="x",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/a.txt"),
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/b.txt"),
),
)
class TestDocumentRefImmutability:
def test_frozen_blocks_mutation(self) -> None:
from pydantic import ValidationError
d = DocumentRef(id="x")
with pytest.raises(ValidationError):
d.id = "y" # type: ignore[misc]
def test_json_roundtrip(self) -> None:
d = DocumentRef(
id="vol_a/folio_001",
image_uri="/c/folio_001.png",
ground_truths=(
GroundTruthRef(type=ArtifactType.ALTO_XML, uri="/x.xml"),
),
)
j = d.model_dump_json()
d2 = DocumentRef.model_validate_json(j)
assert d == d2