Picarones / tests /pipeline /test_artifact_cache.py
Claude
test(rename): dé-sprintage tests/pipeline (13 fichiers, git mv)
f371b8a unverified
"""Sprint A14-S7 — ``ArtifactCache`` minimal.
Vérifie compute_key déterministe, get/put basique, et garde-fou
"un seul input sans content_hash → pas de clé".
"""
from __future__ import annotations
from picarones.domain import Artifact, ArtifactType
from picarones.pipeline import ArtifactCache, PipelineStep
def _hashed_artifact(
suffix: str, type_: ArtifactType, content_hash: str | None = None,
) -> Artifact:
return Artifact(
id=f"d1:{suffix}",
document_id="d1",
type=type_,
content_hash=content_hash,
)
def _ocr_step() -> PipelineStep:
return PipelineStep(
id="ocr", kind="ocr", adapter_name="tesseract",
params={"lang": "fra"},
input_types=(ArtifactType.IMAGE,),
output_types=(ArtifactType.RAW_TEXT,),
)
class TestComputeKey:
def test_returns_string_when_all_inputs_have_hash(self) -> None:
cache = ArtifactCache()
img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64)
key = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0")
assert key is not None
assert len(key) == 64 # SHA-256 hex
def test_deterministic(self) -> None:
cache = ArtifactCache()
img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64)
k1 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0")
k2 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0")
assert k1 == k2
def test_different_content_hash_different_key(self) -> None:
cache = ArtifactCache()
img_a = _hashed_artifact("a", ArtifactType.IMAGE, "a" * 64)
img_b = _hashed_artifact("b", ArtifactType.IMAGE, "b" * 64)
k_a = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img_a}, "1.0.0")
k_b = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img_b}, "1.0.0")
assert k_a != k_b
def test_different_code_version_different_key(self) -> None:
cache = ArtifactCache()
img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64)
k1 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0")
k2 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "2.0.0")
assert k1 != k2
def test_different_step_params_different_key(self) -> None:
cache = ArtifactCache()
img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64)
step_fra = PipelineStep(
id="ocr", kind="ocr", adapter_name="tesseract",
params={"lang": "fra"},
input_types=(ArtifactType.IMAGE,),
output_types=(ArtifactType.RAW_TEXT,),
)
step_eng = PipelineStep(
id="ocr", kind="ocr", adapter_name="tesseract",
params={"lang": "eng"},
input_types=(ArtifactType.IMAGE,),
output_types=(ArtifactType.RAW_TEXT,),
)
k_fra = cache.compute_key(step_fra, {ArtifactType.IMAGE: img}, "1.0.0")
k_eng = cache.compute_key(step_eng, {ArtifactType.IMAGE: img}, "1.0.0")
assert k_fra != k_eng
def test_returns_none_when_input_has_no_hash(self) -> None:
cache = ArtifactCache()
img = _hashed_artifact("img", ArtifactType.IMAGE, content_hash=None)
key = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0")
assert key is None
class TestGetPutClear:
def test_get_miss_returns_none(self) -> None:
cache = ArtifactCache()
assert cache.get("non_existent") is None
def test_put_then_get_returns_outputs(self) -> None:
cache = ArtifactCache()
artifacts = {
ArtifactType.RAW_TEXT: _hashed_artifact(
"raw", ArtifactType.RAW_TEXT, "f" * 64,
),
}
cache.put("k1", artifacts)
cached = cache.get("k1")
assert cached is not None
assert ArtifactType.RAW_TEXT in cached
def test_put_with_none_key_is_noop(self) -> None:
cache = ArtifactCache()
cache.put(None, {ArtifactType.RAW_TEXT: _hashed_artifact(
"raw", ArtifactType.RAW_TEXT, "f" * 64,
)})
assert len(cache) == 0
def test_get_with_none_key_returns_none(self) -> None:
cache = ArtifactCache()
assert cache.get(None) is None
def test_clear(self) -> None:
cache = ArtifactCache()
cache.put("k", {ArtifactType.RAW_TEXT: _hashed_artifact(
"raw", ArtifactType.RAW_TEXT, "f" * 64,
)})
assert len(cache) == 1
cache.clear()
assert len(cache) == 0
def test_contains(self) -> None:
cache = ArtifactCache()
cache.put("foo", {})
assert "foo" in cache
assert "bar" not in cache
def test_keys(self) -> None:
cache = ArtifactCache()
cache.put("a", {})
cache.put("b", {})
assert sorted(cache.keys()) == ["a", "b"]
def test_put_makes_defensive_copy(self) -> None:
"""Modifier le dict d'origine après put() ne doit pas
affecter le contenu du cache."""
cache = ArtifactCache()
artifacts = {
ArtifactType.RAW_TEXT: _hashed_artifact(
"raw", ArtifactType.RAW_TEXT, "f" * 64,
),
}
cache.put("k", artifacts)
artifacts.clear()
cached = cache.get("k")
assert cached is not None
assert ArtifactType.RAW_TEXT in cached