Picarones / tests /test_sprint4_normalization_iiif.py
Claude
fix: résoudre les 64 erreurs ruff pré-existantes révélées par le lint actif
6362212 unverified
"""Tests Sprint 4 : normalisation diplomatique, import IIIF, adaptateurs API OCR."""
from __future__ import annotations
import pytest
from picarones.core.normalization import (
NormalizationProfile,
DEFAULT_DIPLOMATIC_PROFILE,
_apply_diplomatic_table,
get_builtin_profile,
)
from picarones.core.metrics import compute_metrics, aggregate_metrics, MetricsResult
from picarones.importers.iiif import (
IIIFManifestParser,
parse_page_selector,
_extract_label,
_best_image_url_v2,
_best_image_url_v3,
_guess_extension,
_slugify,
)
# ===========================================================================
# Tests NormalizationProfile
# ===========================================================================
class TestNormalizationProfile:
def test_default_nfc_only(self):
profile = NormalizationProfile(name="test")
assert profile.nfc is True
assert profile.caseless is False
assert profile.diplomatic_table == {}
def test_normalize_nfc(self):
profile = NormalizationProfile(name="nfc_only")
# NFD vs NFC : après NFC, les deux doivent être identiques
decomposed = "e\u0301" # e + accent
assert profile.normalize(decomposed) == "\u00e9" # é NFC
def test_normalize_caseless(self):
profile = NormalizationProfile(name="caseless", caseless=True)
assert profile.normalize("Bonjour MONDE") == "bonjour monde"
def test_normalize_diplomatic_table(self):
profile = NormalizationProfile(
name="test",
diplomatic_table={"ſ": "s", "u": "v"}
)
# "maiſon": ſ→s gives "maison", no u present → "maison"
assert profile.normalize("maiſon") == "maison"
# "uers" (vers ancien): u→v gives "vers"
assert profile.normalize("uers") == "vers"
def test_normalize_order_nfc_then_caseless_then_diplomatic(self):
"""L'ordre est : NFC → caseless → table diplomatique."""
profile = NormalizationProfile(
name="combined",
caseless=True,
diplomatic_table={"ſ": "s"}
)
result = profile.normalize("Maiſon")
assert result == "maison"
def test_as_dict(self):
profile = NormalizationProfile(
name="medieval_french",
nfc=True,
caseless=False,
diplomatic_table={"ſ": "s"},
description="Test",
)
d = profile.as_dict()
assert d["name"] == "medieval_french"
assert d["diplomatic_table"] == {"ſ": "s"}
assert d["caseless"] is False
def test_from_dict(self):
data = {
"name": "custom",
"caseless": True,
"diplomatic": {"ſ": "s", "u": "v"},
"description": "Custom profile",
}
profile = NormalizationProfile.from_dict(data)
assert profile.name == "custom"
assert profile.caseless is True
assert profile.diplomatic_table == {"ſ": "s", "u": "v"}
def test_from_dict_defaults(self):
profile = NormalizationProfile.from_dict({})
assert profile.name == "custom"
assert profile.nfc is True
assert profile.caseless is False
def test_from_yaml(self, tmp_path):
yaml_content = "name: my_profile\ncaseless: false\ndiplomatic:\n \u017f: s\n u: v\n"
yaml_file = tmp_path / "profile.yaml"
yaml_file.write_text(yaml_content, encoding="utf-8")
try:
profile = NormalizationProfile.from_yaml(yaml_file)
assert profile.name == "my_profile"
assert profile.diplomatic_table == {"\u017f": "s", "u": "v"}
except RuntimeError as e:
if "pyyaml" in str(e):
pytest.skip("pyyaml non installé")
raise
class TestApplyDiplomaticTable:
def test_simple_substitutions(self):
table = {"ſ": "s", "u": "v"}
# "maiſon": ſ→s gives "maison"; no u → "maison"
assert _apply_diplomatic_table("maiſon", table) == "maison"
# "uers": u→v gives "vers"
assert _apply_diplomatic_table("uers", table) == "vers"
def test_multi_char_key_priority(self):
"""Les clés multi-chars sont appliquées avant les clés simples."""
table = {"ae": "X", "a": "Y"}
# "ae" doit être remplacé en "X" et non "Ye"
result = _apply_diplomatic_table("aeb", table)
assert result == "Xb"
def test_ampersand_to_et(self):
table = {"&": "et"}
assert _apply_diplomatic_table("noir & blanc", table) == "noir et blanc"
def test_empty_table(self):
assert _apply_diplomatic_table("hello", {}) == "hello"
def test_empty_text(self):
assert _apply_diplomatic_table("", {"a": "b"}) == ""
class TestGetBuiltinProfile:
def test_medieval_french(self):
profile = get_builtin_profile("medieval_french")
assert profile.name == "medieval_french"
assert "ſ" in profile.diplomatic_table
assert profile.diplomatic_table["ſ"] == "s"
def test_early_modern_french(self):
profile = get_builtin_profile("early_modern_french")
assert "ſ" in profile.diplomatic_table
def test_medieval_latin(self):
profile = get_builtin_profile("medieval_latin")
assert "ꝑ" in profile.diplomatic_table
def test_minimal(self):
profile = get_builtin_profile("minimal")
assert "ſ" in profile.diplomatic_table
assert "u" not in profile.diplomatic_table
def test_nfc(self):
profile = get_builtin_profile("nfc")
assert profile.nfc is True
assert profile.diplomatic_table == {}
def test_caseless(self):
profile = get_builtin_profile("caseless")
assert profile.caseless is True
def test_unknown_raises_key_error(self):
with pytest.raises(KeyError, match="inexistant"):
get_builtin_profile("inexistant")
def test_default_profile_is_medieval_french(self):
assert DEFAULT_DIPLOMATIC_PROFILE.name == "medieval_french"
# ===========================================================================
# Tests CER diplomatique dans compute_metrics
# ===========================================================================
class TestDiplomaticCER:
def test_cer_diplomatic_computed_by_default(self):
"""Le CER diplomatique est calculé par défaut avec le profil médiéval."""
result = compute_metrics("maiſon", "maison")
assert result.cer_diplomatic is not None
assert result.diplomatic_profile_name == "medieval_french"
def test_cer_diplomatic_lower_than_exact_for_long_s(self):
"""
Avec ſ→s : le CER diplomatique doit être 0.0 pour "maiſon" vs "maison"
car après normalisation les deux deviennent "maivon" ou "maison".
"""
# "maiſon" vs "maison" — différence uniquement sur ſ vs s
result = compute_metrics("maiſon", "maison")
# CER brut > 0 (ſ ≠ s, deux bytes UTF-8 vs un)
assert result.cer > 0.0
# CER diplomatique = 0 car ſ et s sont équivalents dans le profil médiéval
assert result.cer_diplomatic == pytest.approx(0.0)
def test_cer_diplomatic_in_as_dict(self):
result = compute_metrics("maiſon", "maison")
d = result.as_dict()
assert "cer_diplomatic" in d
assert "diplomatic_profile_name" in d
def test_cer_diplomatic_with_custom_profile(self):
from picarones.core.normalization import NormalizationProfile
profile = NormalizationProfile(
name="test_profile",
diplomatic_table={"ſ": "s"}
)
result = compute_metrics("maiſon", "maison", normalization_profile=profile)
assert result.cer_diplomatic == pytest.approx(0.0)
assert result.diplomatic_profile_name == "test_profile"
def test_cer_diplomatic_not_in_as_dict_when_none(self):
"""Si le CER diplomatique n'a pas pu être calculé, il n'est pas dans as_dict."""
result = MetricsResult(
cer=0.1, cer_nfc=0.1, cer_caseless=0.1,
wer=0.1, wer_normalized=0.1, mer=0.1, wil=0.1,
reference_length=10, hypothesis_length=10,
cer_diplomatic=None, diplomatic_profile_name=None,
)
d = result.as_dict()
assert "cer_diplomatic" not in d
def test_aggregate_metrics_includes_diplomatic_cer(self):
"""aggregate_metrics doit agréger cer_diplomatic quand disponible."""
results = [
MetricsResult(
cer=0.1, cer_nfc=0.1, cer_caseless=0.1,
wer=0.1, wer_normalized=0.1, mer=0.1, wil=0.1,
reference_length=10, hypothesis_length=10,
cer_diplomatic=0.05, diplomatic_profile_name="medieval_french",
),
MetricsResult(
cer=0.2, cer_nfc=0.2, cer_caseless=0.2,
wer=0.2, wer_normalized=0.2, mer=0.2, wil=0.2,
reference_length=10, hypothesis_length=10,
cer_diplomatic=0.10, diplomatic_profile_name="medieval_french",
),
]
agg = aggregate_metrics(results)
assert "cer_diplomatic" in agg
assert agg["cer_diplomatic"]["mean"] == pytest.approx(0.075)
assert agg["cer_diplomatic"].get("profile") == "medieval_french"
# ===========================================================================
# Tests parse_page_selector
# ===========================================================================
class TestParsePageSelector:
def test_all(self):
assert parse_page_selector("all", 10) == list(range(10))
def test_empty_string(self):
assert parse_page_selector("", 5) == list(range(5))
def test_single_page(self):
assert parse_page_selector("3", 10) == [2] # 0-based
def test_range(self):
assert parse_page_selector("1-5", 10) == [0, 1, 2, 3, 4]
def test_comma_list(self):
assert parse_page_selector("1,3,5", 10) == [0, 2, 4]
def test_combined(self):
result = parse_page_selector("1-3,5,8-9", 10)
assert result == [0, 1, 2, 4, 7, 8]
def test_deduplication(self):
result = parse_page_selector("1,1,2", 5)
assert result == [0, 1]
def test_sorted_output(self):
result = parse_page_selector("5,1,3", 10)
assert result == [0, 2, 4]
def test_page_out_of_range_raises(self):
with pytest.raises(ValueError):
parse_page_selector("15", 10)
def test_range_out_of_bounds_raises(self):
with pytest.raises(ValueError):
parse_page_selector("1-15", 10)
def test_invalid_syntax_raises(self):
with pytest.raises((ValueError, Exception)):
parse_page_selector("abc", 10)
def test_last_page(self):
assert parse_page_selector("10", 10) == [9]
def test_first_page(self):
assert parse_page_selector("1", 10) == [0]
# ===========================================================================
# Tests IIIFManifestParser — IIIF v2
# ===========================================================================
def _make_v2_manifest(num_canvases: int = 3, with_service: bool = False) -> dict:
"""Fabrique un manifeste IIIF v2 minimal de test."""
canvases = []
for i in range(num_canvases):
resource: dict
if with_service:
resource = {
"@type": "dctypes:Image",
"service": {"@id": f"https://example.com/iiif/img{i+1}"},
}
else:
resource = {
"@type": "dctypes:Image",
"@id": f"https://example.com/images/img{i+1}.jpg",
}
canvases.append({
"@id": f"https://example.com/canvas/{i+1}",
"@type": "sc:Canvas",
"label": f"f. {i+1}r",
"width": 2000,
"height": 3000,
"images": [
{
"@type": "oa:Annotation",
"motivation": "sc:painting",
"resource": resource,
"on": f"https://example.com/canvas/{i+1}",
}
],
})
return {
"@context": "http://iiif.io/api/presentation/2/context.json",
"@type": "sc:Manifest",
"@id": "https://example.com/manifest.json",
"label": "Manuscript de test",
"sequences": [
{
"@type": "sc:Sequence",
"canvases": canvases,
}
],
}
def _make_v3_manifest(num_canvases: int = 3) -> dict:
"""Fabrique un manifeste IIIF v3 minimal de test."""
items = []
for i in range(num_canvases):
items.append({
"id": f"https://example.com/canvas/{i+1}",
"type": "Canvas",
"label": {"fr": [f"Page {i+1}"]},
"width": 1500,
"height": 2200,
"items": [
{
"id": f"https://example.com/canvas/{i+1}/ap",
"type": "AnnotationPage",
"items": [
{
"id": f"https://example.com/canvas/{i+1}/ap/a",
"type": "Annotation",
"motivation": "painting",
"body": {
"id": f"https://example.com/images/{i+1}/full/max/0/default.jpg",
"type": "Image",
"format": "image/jpeg",
},
"target": f"https://example.com/canvas/{i+1}",
}
],
}
],
})
return {
"@context": "http://iiif.io/api/presentation/3/context.json",
"id": "https://example.com/manifest.json",
"type": "Manifest",
"label": {"fr": ["Manuscrit v3 de test"]},
"items": items,
}
class TestIIIFManifestParserV2:
def test_version_detection(self):
manifest = _make_v2_manifest()
parser = IIIFManifestParser(manifest)
assert parser.version == 2
def test_canvases_count(self):
parser = IIIFManifestParser(_make_v2_manifest(5))
assert len(parser.canvases()) == 5
def test_canvas_label(self):
parser = IIIFManifestParser(_make_v2_manifest())
canvases = parser.canvases()
assert canvases[0].label == "f. 1r"
assert canvases[1].label == "f. 2r"
def test_canvas_image_url_direct(self):
parser = IIIFManifestParser(_make_v2_manifest())
canvases = parser.canvases()
assert canvases[0].image_url == "https://example.com/images/img1.jpg"
def test_canvas_image_url_via_service(self):
parser = IIIFManifestParser(_make_v2_manifest(with_service=True))
canvases = parser.canvases()
assert "/full/max/0/default.jpg" in canvases[0].image_url
def test_canvas_dimensions(self):
parser = IIIFManifestParser(_make_v2_manifest())
c = parser.canvases()[0]
assert c.width == 2000
assert c.height == 3000
def test_canvas_index(self):
parser = IIIFManifestParser(_make_v2_manifest(3))
canvases = parser.canvases()
for i, c in enumerate(canvases):
assert c.index == i
def test_label(self):
parser = IIIFManifestParser(_make_v2_manifest())
assert parser.label == "Manuscript de test"
def test_empty_sequences(self):
manifest = {
"@context": "http://iiif.io/api/presentation/2/context.json",
"@type": "sc:Manifest",
"label": "Empty",
"sequences": [],
}
parser = IIIFManifestParser(manifest)
assert parser.canvases() == []
class TestIIIFManifestParserV3:
def test_version_detection(self):
manifest = _make_v3_manifest()
parser = IIIFManifestParser(manifest)
assert parser.version == 3
def test_canvases_count(self):
parser = IIIFManifestParser(_make_v3_manifest(4))
assert len(parser.canvases()) == 4
def test_canvas_label_from_language_map(self):
parser = IIIFManifestParser(_make_v3_manifest())
canvases = parser.canvases()
assert "Page 1" in canvases[0].label
def test_canvas_image_url(self):
parser = IIIFManifestParser(_make_v3_manifest())
canvases = parser.canvases()
assert "default.jpg" in canvases[0].image_url
def test_manifest_label_language_map(self):
parser = IIIFManifestParser(_make_v3_manifest())
assert "v3" in parser.label.lower() or "test" in parser.label.lower()
def test_type_manifest_triggers_v3(self):
"""Un manifeste avec type == 'Manifest' est détecté comme v3."""
manifest = {"type": "Manifest", "items": []}
parser = IIIFManifestParser(manifest)
assert parser.version == 3
class TestExtractLabel:
def test_string(self):
assert _extract_label("Page 1") == "Page 1"
def test_list(self):
assert _extract_label(["Page 1", "Page 2"]) == "Page 1"
def test_dict_fr(self):
assert _extract_label({"fr": ["Folio 1r"]}) == "Folio 1r"
def test_dict_en(self):
assert _extract_label({"en": ["Folio 1r"]}) == "Folio 1r"
def test_dict_none_key(self):
assert _extract_label({"none": ["Label"]}) == "Label"
def test_empty_string(self):
assert _extract_label("") == ""
def test_none_value(self):
result = _extract_label(None)
assert isinstance(result, str)
class TestBestImageUrlV2:
def test_direct_id(self):
resource = {"@id": "https://example.com/img.jpg"}
url = _best_image_url_v2(resource, {})
assert url == "https://example.com/img.jpg"
def test_service_id(self):
resource = {
"@id": "https://example.com/info.json",
"service": {"@id": "https://example.com/iiif/img1"},
}
url = _best_image_url_v2(resource, {})
assert url == "https://example.com/iiif/img1/full/max/0/default.jpg"
def test_service_list(self):
resource = {
"service": [
{"@id": "https://example.com/iiif/img2"},
]
}
url = _best_image_url_v2(resource, {})
assert url == "https://example.com/iiif/img2/full/max/0/default.jpg"
class TestBestImageUrlV3:
def test_direct_body_image(self):
canvas = {
"items": [
{
"type": "AnnotationPage",
"items": [
{
"type": "Annotation",
"motivation": "painting",
"body": {
"id": "https://example.com/img.jpg",
"type": "Image",
},
}
],
}
]
}
url = _best_image_url_v3(canvas)
assert url == "https://example.com/img.jpg"
def test_body_via_service(self):
canvas = {
"items": [
{
"items": [
{
"body": {
"type": "Image",
"id": "",
"service": [{"id": "https://example.com/iiif/3/img1"}],
}
}
]
}
]
}
url = _best_image_url_v3(canvas)
assert "/full/max/0/default.jpg" in url
def test_empty_canvas(self):
url = _best_image_url_v3({})
assert url == ""
class TestGuessExtension:
def test_jpg(self):
assert _guess_extension("https://example.com/img.jpg") == ".jpg"
def test_png(self):
assert _guess_extension("https://example.com/img.png") == ".png"
def test_tiff(self):
assert _guess_extension("https://example.com/img.tiff") == ".tiff"
def test_iiif_default(self):
# URL IIIF standard contient /default.jpg
url = "https://example.com/iiif/img/full/max/0/default.jpg"
assert _guess_extension(url) == ".jpg"
def test_unknown_defaults_to_jpg(self):
assert _guess_extension("https://example.com/resource/123") == ".jpg"
class TestSlugify:
def test_simple(self):
assert _slugify("Page 1") == "Page_1"
def test_special_chars_removed(self):
result = _slugify("f. 1r (recto)")
assert "/" not in result
assert "." not in result
def test_max_length(self):
long_label = "x" * 100
assert len(_slugify(long_label)) <= 60
def test_empty(self):
assert _slugify("") == ""
# ===========================================================================
# Tests structure des nouveaux moteurs OCR (sans appel réseau)
# ===========================================================================
class TestMistralOCREngine:
def test_import(self):
from picarones.engines.mistral_ocr import MistralOCREngine
assert MistralOCREngine is not None
def test_name(self):
from picarones.engines.mistral_ocr import MistralOCREngine
engine = MistralOCREngine()
assert engine.name == "mistral_ocr"
def test_version_default_model(self):
from picarones.engines.mistral_ocr import MistralOCREngine
engine = MistralOCREngine()
# Le modèle par défaut est désormais mistral-ocr-latest (API OCR native)
assert "mistral-ocr" in engine.version()
def test_version_custom_model(self):
from picarones.engines.mistral_ocr import MistralOCREngine
engine = MistralOCREngine({"model": "pixtral-large-latest"})
assert engine.version() == "pixtral-large-latest"
def test_missing_api_key_raises(self, monkeypatch, tmp_path):
from picarones.engines.mistral_ocr import MistralOCREngine
monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
engine = MistralOCREngine()
# Créer un fichier image factice
img = tmp_path / "test.jpg"
img.write_bytes(b"\xff\xd8\xff") # JPEG header minimal
with pytest.raises(RuntimeError, match="MISTRAL_API_KEY"):
engine._run_ocr(img)
def test_exported_from_engines(self):
from picarones.engines import MistralOCREngine
assert MistralOCREngine is not None
class TestGoogleVisionEngine:
def test_import(self):
from picarones.engines.google_vision import GoogleVisionEngine
assert GoogleVisionEngine is not None
def test_name(self):
from picarones.engines.google_vision import GoogleVisionEngine
engine = GoogleVisionEngine()
assert engine.name == "google_vision"
def test_version(self):
from picarones.engines.google_vision import GoogleVisionEngine
engine = GoogleVisionEngine()
assert engine.version() == "v1"
def test_missing_credentials_raises(self, monkeypatch, tmp_path):
from picarones.engines.google_vision import GoogleVisionEngine
monkeypatch.delenv("GOOGLE_APPLICATION_CREDENTIALS", raising=False)
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
engine = GoogleVisionEngine()
img = tmp_path / "test.jpg"
img.write_bytes(b"\xff\xd8\xff")
with pytest.raises(RuntimeError):
engine._run_ocr(img)
def test_exported_from_engines(self):
from picarones.engines import GoogleVisionEngine
assert GoogleVisionEngine is not None
class TestAzureDocIntelEngine:
def test_import(self):
from picarones.engines.azure_doc_intel import AzureDocIntelEngine
assert AzureDocIntelEngine is not None
def test_name(self):
from picarones.engines.azure_doc_intel import AzureDocIntelEngine
engine = AzureDocIntelEngine()
assert engine.name == "azure_doc_intel"
def test_missing_key_raises(self, monkeypatch, tmp_path):
from picarones.engines.azure_doc_intel import AzureDocIntelEngine
monkeypatch.delenv("AZURE_DOC_INTEL_KEY", raising=False)
monkeypatch.delenv("AZURE_DOC_INTEL_ENDPOINT", raising=False)
engine = AzureDocIntelEngine()
img = tmp_path / "test.jpg"
img.write_bytes(b"\xff\xd8\xff")
with pytest.raises(RuntimeError):
engine._run_ocr(img)
def test_exported_from_engines(self):
from picarones.engines import AzureDocIntelEngine
assert AzureDocIntelEngine is not None
# ===========================================================================
# Tests CLI — commande import iiif
# ===========================================================================
class TestCLIImportIIIF:
def test_import_group_exists(self):
from picarones.cli import cli
from click.testing import CliRunner
runner = CliRunner()
result = runner.invoke(cli, ["import", "--help"])
assert result.exit_code == 0
def test_import_iiif_command_exists(self):
from picarones.cli import cli
from click.testing import CliRunner
runner = CliRunner()
result = runner.invoke(cli, ["import", "iiif", "--help"])
assert result.exit_code == 0
assert "manifest_url" in result.output.lower() or "MANIFEST_URL" in result.output
def test_import_iiif_options(self):
from picarones.cli import cli
from click.testing import CliRunner
runner = CliRunner()
result = runner.invoke(cli, ["import", "iiif", "--help"])
assert "--pages" in result.output
assert "--output" in result.output
def test_import_iiif_requires_url(self):
from picarones.cli import cli
from click.testing import CliRunner
runner = CliRunner()
result = runner.invoke(cli, ["import", "iiif"])
# Sans URL, doit afficher une erreur
assert result.exit_code != 0
# ===========================================================================
# Tests fixtures Sprint 4 (CER diplomatique dans la démo)
# ===========================================================================
class TestFixturesDiplomaticCER:
def test_gt_texts_contain_medieval_graphies(self):
"""Les textes GT de démo doivent contenir des graphies médiévales."""
from picarones.fixtures import _GT_TEXTS
all_gt = " ".join(_GT_TEXTS)
# Les GT doivent contenir au moins ſ, & ou æ/œ
has_medieval_chars = any(c in all_gt for c in ["ſ", "&", "æ", "œ"])
assert has_medieval_chars, "Les GT de démo doivent inclure des graphies médiévales pour illustrer le CER diplomatique"
def test_benchmark_results_have_diplomatic_cer(self):
"""Les résultats du benchmark fictif doivent inclure le CER diplomatique."""
from picarones.fixtures import generate_sample_benchmark
bm = generate_sample_benchmark()
for engine_report in bm.engine_reports:
for doc_result in engine_report.document_results:
if doc_result.metrics.error is None:
# Le CER diplomatique doit être calculé
assert doc_result.metrics.cer_diplomatic is not None, (
f"CER diplomatique manquant pour {engine_report.engine_name}"
)
break # Un seul doc suffit pour vérifier
def test_diplomatic_cer_lower_for_medieval_graphies(self):
"""Pour un texte avec ſ, le CER diplomatique doit être ≤ CER exact."""
result = compute_metrics(
"maiſon & jardin", # GT avec graphies médiévales
"maison et jardin", # OCR avec graphies modernisées
)
assert result.cer_diplomatic is not None
# CER diplomatique doit être inférieur ou égal au CER exact
assert result.cer_diplomatic <= result.cer
# ===========================================================================
# Tests rapport HTML Sprint 4 (CER diplomatique affiché)
# ===========================================================================
class TestReportDiplomaticCER:
def test_report_data_has_cer_diplomatic(self):
"""_build_report_data doit inclure cer_diplomatic dans engines_summary."""
from picarones.fixtures import generate_sample_benchmark
from picarones.report.generator import _build_report_data
bm = generate_sample_benchmark()
data = _build_report_data(bm, images_b64={})
# Chaque entrée engines doit avoir cer_diplomatic (ou None)
assert "engines" in data
for engine_data in data["engines"]:
assert "cer_diplomatic" in engine_data, (
f"cer_diplomatic manquant dans {engine_data.get('name', '?')}"
)
def test_html_contains_cer_diplo_column(self, tmp_path):
"""Le HTML généré doit contenir la colonne CER diplo."""
from picarones.fixtures import generate_sample_benchmark
from picarones.report.generator import ReportGenerator
bm = generate_sample_benchmark()
out = tmp_path / "report_test.html"
ReportGenerator(bm).generate(out)
html = out.read_text(encoding="utf-8")
assert "diplo" in html.lower() or "diplomatique" in html.lower(), (
"Le rapport HTML doit mentionner le CER diplomatique"
)
def test_html_contains_medieval_graphie_indicator(self, tmp_path):
"""Le rapport doit mentionner les graphies médiévales (ſ=s ou u=v)."""
from picarones.fixtures import generate_sample_benchmark
from picarones.report.generator import ReportGenerator
bm = generate_sample_benchmark()
out = tmp_path / "report_test.html"
ReportGenerator(bm).generate(out)
html = out.read_text(encoding="utf-8")
# Le tooltip ou la légende doit mentionner les correspondances diplomatiques
assert "ſ=s" in html or "u=v" in html or "diplomatique" in html.lower()