Spaces:
Sleeping
Sleeping
| """Sprint A14-S7 — ``ArtifactCache`` minimal. | |
| Vérifie compute_key déterministe, get/put basique, et garde-fou | |
| "un seul input sans content_hash → pas de clé". | |
| """ | |
| from __future__ import annotations | |
| from picarones.domain import Artifact, ArtifactType | |
| from picarones.pipeline import ArtifactCache, PipelineStep | |
| def _hashed_artifact( | |
| suffix: str, type_: ArtifactType, content_hash: str | None = None, | |
| ) -> Artifact: | |
| return Artifact( | |
| id=f"d1:{suffix}", | |
| document_id="d1", | |
| type=type_, | |
| content_hash=content_hash, | |
| ) | |
| def _ocr_step() -> PipelineStep: | |
| return PipelineStep( | |
| id="ocr", kind="ocr", adapter_name="tesseract", | |
| params={"lang": "fra"}, | |
| input_types=(ArtifactType.IMAGE,), | |
| output_types=(ArtifactType.RAW_TEXT,), | |
| ) | |
| class TestComputeKey: | |
| def test_returns_string_when_all_inputs_have_hash(self) -> None: | |
| cache = ArtifactCache() | |
| img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64) | |
| key = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0") | |
| assert key is not None | |
| assert len(key) == 64 # SHA-256 hex | |
| def test_deterministic(self) -> None: | |
| cache = ArtifactCache() | |
| img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64) | |
| k1 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0") | |
| k2 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0") | |
| assert k1 == k2 | |
| def test_different_content_hash_different_key(self) -> None: | |
| cache = ArtifactCache() | |
| img_a = _hashed_artifact("a", ArtifactType.IMAGE, "a" * 64) | |
| img_b = _hashed_artifact("b", ArtifactType.IMAGE, "b" * 64) | |
| k_a = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img_a}, "1.0.0") | |
| k_b = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img_b}, "1.0.0") | |
| assert k_a != k_b | |
| def test_different_code_version_different_key(self) -> None: | |
| cache = ArtifactCache() | |
| img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64) | |
| k1 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0") | |
| k2 = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "2.0.0") | |
| assert k1 != k2 | |
| def test_different_step_params_different_key(self) -> None: | |
| cache = ArtifactCache() | |
| img = _hashed_artifact("img", ArtifactType.IMAGE, "a" * 64) | |
| step_fra = PipelineStep( | |
| id="ocr", kind="ocr", adapter_name="tesseract", | |
| params={"lang": "fra"}, | |
| input_types=(ArtifactType.IMAGE,), | |
| output_types=(ArtifactType.RAW_TEXT,), | |
| ) | |
| step_eng = PipelineStep( | |
| id="ocr", kind="ocr", adapter_name="tesseract", | |
| params={"lang": "eng"}, | |
| input_types=(ArtifactType.IMAGE,), | |
| output_types=(ArtifactType.RAW_TEXT,), | |
| ) | |
| k_fra = cache.compute_key(step_fra, {ArtifactType.IMAGE: img}, "1.0.0") | |
| k_eng = cache.compute_key(step_eng, {ArtifactType.IMAGE: img}, "1.0.0") | |
| assert k_fra != k_eng | |
| def test_returns_none_when_input_has_no_hash(self) -> None: | |
| cache = ArtifactCache() | |
| img = _hashed_artifact("img", ArtifactType.IMAGE, content_hash=None) | |
| key = cache.compute_key(_ocr_step(), {ArtifactType.IMAGE: img}, "1.0.0") | |
| assert key is None | |
| class TestGetPutClear: | |
| def test_get_miss_returns_none(self) -> None: | |
| cache = ArtifactCache() | |
| assert cache.get("non_existent") is None | |
| def test_put_then_get_returns_outputs(self) -> None: | |
| cache = ArtifactCache() | |
| artifacts = { | |
| ArtifactType.RAW_TEXT: _hashed_artifact( | |
| "raw", ArtifactType.RAW_TEXT, "f" * 64, | |
| ), | |
| } | |
| cache.put("k1", artifacts) | |
| cached = cache.get("k1") | |
| assert cached is not None | |
| assert ArtifactType.RAW_TEXT in cached | |
| def test_put_with_none_key_is_noop(self) -> None: | |
| cache = ArtifactCache() | |
| cache.put(None, {ArtifactType.RAW_TEXT: _hashed_artifact( | |
| "raw", ArtifactType.RAW_TEXT, "f" * 64, | |
| )}) | |
| assert len(cache) == 0 | |
| def test_get_with_none_key_returns_none(self) -> None: | |
| cache = ArtifactCache() | |
| assert cache.get(None) is None | |
| def test_clear(self) -> None: | |
| cache = ArtifactCache() | |
| cache.put("k", {ArtifactType.RAW_TEXT: _hashed_artifact( | |
| "raw", ArtifactType.RAW_TEXT, "f" * 64, | |
| )}) | |
| assert len(cache) == 1 | |
| cache.clear() | |
| assert len(cache) == 0 | |
| def test_contains(self) -> None: | |
| cache = ArtifactCache() | |
| cache.put("foo", {}) | |
| assert "foo" in cache | |
| assert "bar" not in cache | |
| def test_keys(self) -> None: | |
| cache = ArtifactCache() | |
| cache.put("a", {}) | |
| cache.put("b", {}) | |
| assert sorted(cache.keys()) == ["a", "b"] | |
| def test_put_makes_defensive_copy(self) -> None: | |
| """Modifier le dict d'origine après put() ne doit pas | |
| affecter le contenu du cache.""" | |
| cache = ArtifactCache() | |
| artifacts = { | |
| ArtifactType.RAW_TEXT: _hashed_artifact( | |
| "raw", ArtifactType.RAW_TEXT, "f" * 64, | |
| ), | |
| } | |
| cache.put("k", artifacts) | |
| artifacts.clear() | |
| cached = cache.get("k") | |
| assert cached is not None | |
| assert ArtifactType.RAW_TEXT in cached | |