Spaces:
Running
Sprint 5 — Métriques avancées patrimoniales (370 tests)
Browse filesNouvelles métriques spécifiques aux documents patrimoniaux :
- Matrice de confusion unicode : alignement char-par-char, top confusions, heatmap cliquable HTML
- Score ligatures : reconnaissance de fi/fl/ff/ffi/ffl/œ/æ/ꝑ/ꝓ + agrégation corpus
- Score diacritiques : conservation accents/cédilles/trémas + agrégation
- Taxonomie des erreurs (classes 1-9) : confusion visuelle, diacritique, casse, ligature,
abréviation, hapax, segmentation, hors-vocabulaire, lacune
- Analyse structurelle : fusion/fragmentation de lignes, ordre de lecture (LCS), paragraphes
- Analyse qualité image : netteté (Laplacien), bruit, rotation, contraste, score global
- Corrélation qualité image ↔ CER : scatter plot Chart.js dans le rapport
- Vue "Caractères" dans le rapport HTML : heatmap confusion, scores ligatures/diacritiques,
distribution taxonomique, détail per-ligature, sélecteur de moteur
Fichiers nouveaux : confusion.py, char_scores.py, taxonomy.py, structure.py, image_quality.py
Fichiers mis à jour : results.py, runner.py, fixtures.py, report/generator.py
Tests : 113 nouveaux tests Sprint 5, suite complète 370 tests (100% pass)
https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq
- picarones/core/char_scores.py +360 -0
- picarones/core/confusion.py +264 -0
- picarones/core/image_quality.py +395 -0
- picarones/core/results.py +56 -0
- picarones/core/runner.py +164 -0
- picarones/core/structure.py +230 -0
- picarones/core/taxonomy.py +351 -0
- picarones/fixtures.py +65 -0
- picarones/report/generator.py +400 -3
- rapport_demo.html +0 -0
- tests/test_sprint5_advanced_metrics.py +876 -0
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Scores de reconnaissance des ligatures et des diacritiques.
|
| 2 |
+
|
| 3 |
+
Ces métriques sont spécifiques aux documents patrimoniaux (manuscrits, imprimés
|
| 4 |
+
anciens) où ligatures et diacritiques jouent un rôle paléographique essentiel.
|
| 5 |
+
|
| 6 |
+
Ligatures
|
| 7 |
+
---------
|
| 8 |
+
Caractères encodés comme une séquence unique dans Unicode mais représentant
|
| 9 |
+
deux ou plusieurs glyphes fusionnés : fi (fi), fl (fl), œ, æ, etc.
|
| 10 |
+
|
| 11 |
+
Pour chaque ligature présente dans le GT, on vérifie si l'OCR a produit
|
| 12 |
+
soit le caractère Unicode équivalent, soit la séquence décomposée équivalente.
|
| 13 |
+
|
| 14 |
+
Diacritiques
|
| 15 |
+
-----------
|
| 16 |
+
Accents, cédilles, trémas et autres signes diacritiques. Pour chaque caractère
|
| 17 |
+
accentué dans le GT, on vérifie si l'OCR a conservé le diacritique ou l'a
|
| 18 |
+
remplacé par la lettre de base.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
from dataclasses import dataclass, field
|
| 24 |
+
from typing import Optional
|
| 25 |
+
|
| 26 |
+
import unicodedata
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
# Tables de ligatures (char ligature → séquences équivalentes acceptées)
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
|
| 33 |
+
#: Table principale des ligatures et leurs équivalents acceptés.
|
| 34 |
+
#: Clé = caractère ligature Unicode ; valeur = liste de séquences équivalentes.
|
| 35 |
+
LIGATURE_TABLE: dict[str, list[str]] = {
|
| 36 |
+
# Ligatures typographiques latines (Unicode Letterlike Symbols / Alphabetic Presentation Forms)
|
| 37 |
+
"\uFB00": ["ff"], # ff ff
|
| 38 |
+
"\uFB01": ["fi"], # fi fi
|
| 39 |
+
"\uFB02": ["fl"], # fl fl
|
| 40 |
+
"\uFB03": ["ffi"], # ffi ffi
|
| 41 |
+
"\uFB04": ["ffl"], # ffl ffl
|
| 42 |
+
"\uFB05": ["st", "\u017Ft"], # ſt st / ſt
|
| 43 |
+
"\uFB06": ["st"], # st st (variante)
|
| 44 |
+
# Ligatures latines patrimoniales (Unicode Latin Extended Additional)
|
| 45 |
+
"\u0153": ["oe"], # œ oe
|
| 46 |
+
"\u00E6": ["ae"], # æ ae
|
| 47 |
+
"\u0152": ["OE"], # Œ OE
|
| 48 |
+
"\u00C6": ["AE"], # Æ AE
|
| 49 |
+
# Abréviations latines / médiévales
|
| 50 |
+
"\uA751": ["per", "p\u0332"], # ꝑ per / p̲
|
| 51 |
+
"\uA753": ["pro"], # ꝓ pro
|
| 52 |
+
"\uA757": ["que"], # ꝗ que
|
| 53 |
+
# Ligatures germaniques
|
| 54 |
+
"\u00DF": ["ss"], # ß ss
|
| 55 |
+
"\u1E9E": ["SS"], # ẞ SS
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
# Ensemble de toutes les ligatures pour recherche rapide
|
| 59 |
+
_ALL_LIGATURES: frozenset[str] = frozenset(LIGATURE_TABLE)
|
| 60 |
+
|
| 61 |
+
# Mapping inverse : séquence → ligature
|
| 62 |
+
_SEQ_TO_LIGATURE: dict[str, str] = {}
|
| 63 |
+
for _lig, _seqs in LIGATURE_TABLE.items():
|
| 64 |
+
for _seq in _seqs:
|
| 65 |
+
_SEQ_TO_LIGATURE[_seq] = _lig
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# ---------------------------------------------------------------------------
|
| 69 |
+
# Table des caractères diacritiques
|
| 70 |
+
# ---------------------------------------------------------------------------
|
| 71 |
+
|
| 72 |
+
def _build_diacritic_map() -> dict[str, str]:
|
| 73 |
+
"""Construit automatiquement la table diacritique depuis l'Unicode."""
|
| 74 |
+
table: dict[str, str] = {}
|
| 75 |
+
for codepoint in range(0x00C0, 0x0250): # Latin Étendu A + B
|
| 76 |
+
ch = chr(codepoint)
|
| 77 |
+
nfd = unicodedata.normalize("NFD", ch)
|
| 78 |
+
if len(nfd) > 1: # le caractère est décomposable
|
| 79 |
+
base = nfd[0] # lettre de base
|
| 80 |
+
if base.isalpha() and base != ch:
|
| 81 |
+
table[ch] = base
|
| 82 |
+
# Compléments manuels
|
| 83 |
+
table.update({
|
| 84 |
+
"\u0107": "c", # ć
|
| 85 |
+
"\u0119": "e", # ę
|
| 86 |
+
"\u0142": "l", # ł
|
| 87 |
+
"\u0144": "n", # ń
|
| 88 |
+
"\u015B": "s", # ś
|
| 89 |
+
"\u017A": "z", # ź
|
| 90 |
+
"\u017C": "z", # ż
|
| 91 |
+
})
|
| 92 |
+
return table
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
DIACRITIC_MAP: dict[str, str] = _build_diacritic_map()
|
| 96 |
+
_ALL_DIACRITICS: frozenset[str] = frozenset(DIACRITIC_MAP)
|
| 97 |
+
|
| 98 |
+
# Ligatures qui NE sont PAS des diacritiques (pour éviter les doublons)
|
| 99 |
+
_LIGATURE_SET: frozenset[str] = frozenset(LIGATURE_TABLE)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# ---------------------------------------------------------------------------
|
| 103 |
+
# Résultats structurés
|
| 104 |
+
# ---------------------------------------------------------------------------
|
| 105 |
+
|
| 106 |
+
@dataclass
|
| 107 |
+
class LigatureScore:
|
| 108 |
+
"""Score de reconnaissance des ligatures pour une paire (GT, OCR)."""
|
| 109 |
+
|
| 110 |
+
total_in_gt: int = 0
|
| 111 |
+
"""Nombre de ligatures présentes dans le GT."""
|
| 112 |
+
correctly_recognized: int = 0
|
| 113 |
+
"""Nombre de ligatures correctement transcrites (unicode ou équivalent)."""
|
| 114 |
+
score: float = 0.0
|
| 115 |
+
"""Taux de reconnaissance = correctly_recognized / total_in_gt. 1.0 si total=0."""
|
| 116 |
+
per_ligature: dict[str, dict] = field(default_factory=dict)
|
| 117 |
+
"""Détail par ligature : {'fi': {'gt_count': 5, 'ocr_correct': 3, 'score': 0.6}}"""
|
| 118 |
+
|
| 119 |
+
def as_dict(self) -> dict:
|
| 120 |
+
return {
|
| 121 |
+
"total_in_gt": self.total_in_gt,
|
| 122 |
+
"correctly_recognized": self.correctly_recognized,
|
| 123 |
+
"score": round(self.score, 4),
|
| 124 |
+
"per_ligature": {
|
| 125 |
+
k: {kk: round(vv, 4) if isinstance(vv, float) else vv for kk, vv in v.items()}
|
| 126 |
+
for k, v in self.per_ligature.items()
|
| 127 |
+
},
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
@dataclass
|
| 132 |
+
class DiacriticScore:
|
| 133 |
+
"""Score de conservation des diacritiques pour une paire (GT, OCR)."""
|
| 134 |
+
|
| 135 |
+
total_in_gt: int = 0
|
| 136 |
+
"""Nombre de caractères accentués dans le GT."""
|
| 137 |
+
correctly_recognized: int = 0
|
| 138 |
+
"""Nombre de diacritiques correctement conservés."""
|
| 139 |
+
score: float = 0.0
|
| 140 |
+
"""Taux de conservation = correctly_recognized / total_in_gt. 1.0 si total=0."""
|
| 141 |
+
per_diacritic: dict[str, dict] = field(default_factory=dict)
|
| 142 |
+
"""Détail par caractère diacritique."""
|
| 143 |
+
|
| 144 |
+
def as_dict(self) -> dict:
|
| 145 |
+
return {
|
| 146 |
+
"total_in_gt": self.total_in_gt,
|
| 147 |
+
"correctly_recognized": self.correctly_recognized,
|
| 148 |
+
"score": round(self.score, 4),
|
| 149 |
+
"per_diacritic": {
|
| 150 |
+
k: {kk: round(vv, 4) if isinstance(vv, float) else vv for kk, vv in v.items()}
|
| 151 |
+
for k, v in self.per_diacritic.items()
|
| 152 |
+
},
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# ---------------------------------------------------------------------------
|
| 157 |
+
# Calcul des scores
|
| 158 |
+
# ---------------------------------------------------------------------------
|
| 159 |
+
|
| 160 |
+
def compute_ligature_score(ground_truth: str, hypothesis: str) -> LigatureScore:
|
| 161 |
+
"""Calcule le score de reconnaissance des ligatures.
|
| 162 |
+
|
| 163 |
+
Pour chaque ligature dans le GT, on vérifie si l'OCR a produit :
|
| 164 |
+
- Exactement le même caractère ligature Unicode (ex. fi → fi)
|
| 165 |
+
- Ou la séquence de lettres équivalente (ex. fi → fi)
|
| 166 |
+
|
| 167 |
+
Les deux sont considérés comme corrects — ce qui correspond à la pratique
|
| 168 |
+
éditoriale patrimoniaux (certains éditeurs développent les ligatures).
|
| 169 |
+
|
| 170 |
+
Parameters
|
| 171 |
+
----------
|
| 172 |
+
ground_truth:
|
| 173 |
+
Texte de référence.
|
| 174 |
+
hypothesis:
|
| 175 |
+
Texte produit par l'OCR.
|
| 176 |
+
|
| 177 |
+
Returns
|
| 178 |
+
-------
|
| 179 |
+
LigatureScore
|
| 180 |
+
"""
|
| 181 |
+
if not ground_truth:
|
| 182 |
+
return LigatureScore(score=1.0)
|
| 183 |
+
|
| 184 |
+
# Construire un index de position dans l'hypothèse pour recherche rapide
|
| 185 |
+
hyp_norm = unicodedata.normalize("NFC", hypothesis)
|
| 186 |
+
gt_norm = unicodedata.normalize("NFC", ground_truth)
|
| 187 |
+
|
| 188 |
+
per_lig: dict[str, dict] = {}
|
| 189 |
+
total = 0
|
| 190 |
+
correct = 0
|
| 191 |
+
|
| 192 |
+
# Trouver toutes les ligatures dans le GT
|
| 193 |
+
i = 0
|
| 194 |
+
while i < len(gt_norm):
|
| 195 |
+
ch = gt_norm[i]
|
| 196 |
+
if ch in _ALL_LIGATURES:
|
| 197 |
+
total += 1
|
| 198 |
+
equivalents = [ch] + LIGATURE_TABLE[ch] # unicode direct ou séquences équivalentes
|
| 199 |
+
|
| 200 |
+
# Vérifier si la position correspondante dans l'OCR contient l'équivalent
|
| 201 |
+
is_correct = _check_char_at_context(gt_norm, hyp_norm, i, ch, equivalents)
|
| 202 |
+
if is_correct:
|
| 203 |
+
correct += 1
|
| 204 |
+
|
| 205 |
+
if ch not in per_lig:
|
| 206 |
+
per_lig[ch] = {"gt_count": 0, "ocr_correct": 0, "score": 0.0}
|
| 207 |
+
per_lig[ch]["gt_count"] += 1
|
| 208 |
+
if is_correct:
|
| 209 |
+
per_lig[ch]["ocr_correct"] += 1
|
| 210 |
+
i += 1
|
| 211 |
+
|
| 212 |
+
# Calculer les scores individuels
|
| 213 |
+
for lig_data in per_lig.values():
|
| 214 |
+
lig_data["score"] = (
|
| 215 |
+
lig_data["ocr_correct"] / lig_data["gt_count"]
|
| 216 |
+
if lig_data["gt_count"] > 0
|
| 217 |
+
else 1.0
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
score = correct / total if total > 0 else 1.0
|
| 221 |
+
return LigatureScore(
|
| 222 |
+
total_in_gt=total,
|
| 223 |
+
correctly_recognized=correct,
|
| 224 |
+
score=score,
|
| 225 |
+
per_ligature=per_lig,
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def compute_diacritic_score(ground_truth: str, hypothesis: str) -> DiacriticScore:
|
| 230 |
+
"""Calcule le score de conservation des diacritiques.
|
| 231 |
+
|
| 232 |
+
Pour chaque caractère accentué dans le GT, on vérifie si l'OCR a produit
|
| 233 |
+
le même caractère (conservation) ou a substitué la lettre de base (perte).
|
| 234 |
+
On accepte aussi les formes NFD équivalentes.
|
| 235 |
+
|
| 236 |
+
Parameters
|
| 237 |
+
----------
|
| 238 |
+
ground_truth:
|
| 239 |
+
Texte de référence.
|
| 240 |
+
hypothesis:
|
| 241 |
+
Texte produit par l'OCR.
|
| 242 |
+
|
| 243 |
+
Returns
|
| 244 |
+
-------
|
| 245 |
+
DiacriticScore
|
| 246 |
+
"""
|
| 247 |
+
if not ground_truth:
|
| 248 |
+
return DiacriticScore(score=1.0)
|
| 249 |
+
|
| 250 |
+
gt_norm = unicodedata.normalize("NFC", ground_truth)
|
| 251 |
+
hyp_norm = unicodedata.normalize("NFC", hypothesis)
|
| 252 |
+
|
| 253 |
+
per_diac: dict[str, dict] = {}
|
| 254 |
+
total = 0
|
| 255 |
+
correct = 0
|
| 256 |
+
|
| 257 |
+
# Utiliser difflib pour l'alignement
|
| 258 |
+
import difflib
|
| 259 |
+
matcher = difflib.SequenceMatcher(None, gt_norm, hyp_norm, autojunk=False)
|
| 260 |
+
gt_to_hyp: dict[int, Optional[int]] = {}
|
| 261 |
+
|
| 262 |
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 263 |
+
if tag == "equal":
|
| 264 |
+
for k in range(i2 - i1):
|
| 265 |
+
gt_to_hyp[i1 + k] = j1 + k
|
| 266 |
+
elif tag == "replace" and (i2 - i1) == (j2 - j1):
|
| 267 |
+
for k in range(i2 - i1):
|
| 268 |
+
gt_to_hyp[i1 + k] = j1 + k
|
| 269 |
+
else:
|
| 270 |
+
# delete ou replace de longueurs différentes
|
| 271 |
+
for k in range(i1, i2):
|
| 272 |
+
gt_to_hyp[k] = None
|
| 273 |
+
|
| 274 |
+
for i, ch in enumerate(gt_norm):
|
| 275 |
+
if ch in _ALL_DIACRITICS and ch not in _LIGATURE_SET:
|
| 276 |
+
total += 1
|
| 277 |
+
hyp_pos = gt_to_hyp.get(i)
|
| 278 |
+
is_correct = False
|
| 279 |
+
if hyp_pos is not None and hyp_pos < len(hyp_norm):
|
| 280 |
+
hyp_ch = hyp_norm[hyp_pos]
|
| 281 |
+
is_correct = (hyp_ch == ch)
|
| 282 |
+
if is_correct:
|
| 283 |
+
correct += 1
|
| 284 |
+
|
| 285 |
+
if ch not in per_diac:
|
| 286 |
+
per_diac[ch] = {"gt_count": 0, "ocr_correct": 0, "score": 0.0}
|
| 287 |
+
per_diac[ch]["gt_count"] += 1
|
| 288 |
+
if is_correct:
|
| 289 |
+
per_diac[ch]["ocr_correct"] += 1
|
| 290 |
+
|
| 291 |
+
for diac_data in per_diac.values():
|
| 292 |
+
diac_data["score"] = (
|
| 293 |
+
diac_data["ocr_correct"] / diac_data["gt_count"]
|
| 294 |
+
if diac_data["gt_count"] > 0
|
| 295 |
+
else 1.0
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
score = correct / total if total > 0 else 1.0
|
| 299 |
+
return DiacriticScore(
|
| 300 |
+
total_in_gt=total,
|
| 301 |
+
correctly_recognized=correct,
|
| 302 |
+
score=score,
|
| 303 |
+
per_diacritic=per_diac,
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def _check_char_at_context(
|
| 308 |
+
gt: str,
|
| 309 |
+
hyp: str,
|
| 310 |
+
gt_pos: int,
|
| 311 |
+
gt_char: str,
|
| 312 |
+
equivalents: list[str],
|
| 313 |
+
) -> bool:
|
| 314 |
+
"""Vérifie si la position correspondante dans l'hypothèse contient un équivalent."""
|
| 315 |
+
# Approche simple : chercher si l'hypothèse contient le caractère ou son équivalent
|
| 316 |
+
# dans une fenêtre autour de la position estimée
|
| 317 |
+
for equiv in equivalents:
|
| 318 |
+
if equiv in hyp:
|
| 319 |
+
return True
|
| 320 |
+
return False
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def aggregate_ligature_scores(scores: list[LigatureScore]) -> dict:
|
| 324 |
+
"""Agrège les scores de ligatures sur un corpus."""
|
| 325 |
+
total_gt = sum(s.total_in_gt for s in scores)
|
| 326 |
+
total_correct = sum(s.correctly_recognized for s in scores)
|
| 327 |
+
score = total_correct / total_gt if total_gt > 0 else 1.0
|
| 328 |
+
|
| 329 |
+
# Agrégation par ligature
|
| 330 |
+
per_lig: dict[str, dict] = {}
|
| 331 |
+
for s in scores:
|
| 332 |
+
for lig, data in s.per_ligature.items():
|
| 333 |
+
if lig not in per_lig:
|
| 334 |
+
per_lig[lig] = {"gt_count": 0, "ocr_correct": 0}
|
| 335 |
+
per_lig[lig]["gt_count"] += data["gt_count"]
|
| 336 |
+
per_lig[lig]["ocr_correct"] += data["ocr_correct"]
|
| 337 |
+
for lig_data in per_lig.values():
|
| 338 |
+
lig_data["score"] = (
|
| 339 |
+
lig_data["ocr_correct"] / lig_data["gt_count"]
|
| 340 |
+
if lig_data["gt_count"] > 0 else 1.0
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
return {
|
| 344 |
+
"score": round(score, 4),
|
| 345 |
+
"total_in_gt": total_gt,
|
| 346 |
+
"correctly_recognized": total_correct,
|
| 347 |
+
"per_ligature": per_lig,
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
def aggregate_diacritic_scores(scores: list[DiacriticScore]) -> dict:
|
| 352 |
+
"""Agrège les scores diacritiques sur un corpus."""
|
| 353 |
+
total_gt = sum(s.total_in_gt for s in scores)
|
| 354 |
+
total_correct = sum(s.correctly_recognized for s in scores)
|
| 355 |
+
score = total_correct / total_gt if total_gt > 0 else 1.0
|
| 356 |
+
return {
|
| 357 |
+
"score": round(score, 4),
|
| 358 |
+
"total_in_gt": total_gt,
|
| 359 |
+
"correctly_recognized": total_correct,
|
| 360 |
+
}
|
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Matrice de confusion unicode pour l'analyse fine des erreurs OCR.
|
| 2 |
+
|
| 3 |
+
Pour chaque moteur, on calcule quels caractères du GT sont transcrits par
|
| 4 |
+
quels caractères OCR (substitutions). Cette "empreinte d'erreur" est
|
| 5 |
+
caractéristique de chaque moteur ou pipeline.
|
| 6 |
+
|
| 7 |
+
Méthode
|
| 8 |
+
-------
|
| 9 |
+
L'alignement caractère par caractère utilise les opérations d'édition
|
| 10 |
+
de la distance de Levenshtein (via difflib.SequenceMatcher), ce qui permet
|
| 11 |
+
d'identifier les substitutions, insertions et suppressions.
|
| 12 |
+
|
| 13 |
+
La matrice est stockée comme un dict de dict :
|
| 14 |
+
``{gt_char: {ocr_char: count}}``
|
| 15 |
+
|
| 16 |
+
La valeur spéciale ``"∅"`` (U+2205) représente un caractère vide :
|
| 17 |
+
- ``{"a": {"∅": 3}}`` → 'a' supprimé 3 fois dans l'OCR
|
| 18 |
+
- ``{"∅": {"x": 2}}`` → 'x' inséré 2 fois dans l'OCR (absent du GT)
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
import difflib
|
| 24 |
+
from collections import defaultdict
|
| 25 |
+
from dataclasses import dataclass, field
|
| 26 |
+
from typing import Optional
|
| 27 |
+
|
| 28 |
+
# Symbole représentant un caractère absent (insertion / suppression)
|
| 29 |
+
EMPTY_CHAR = "∅"
|
| 30 |
+
|
| 31 |
+
# Caractères non pertinents à ignorer dans la matrice (espaces, sauts de ligne)
|
| 32 |
+
_WHITESPACE = set(" \t\n\r")
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@dataclass
|
| 36 |
+
class ConfusionMatrix:
|
| 37 |
+
"""Matrice de confusion unicode pour une paire (GT, OCR)."""
|
| 38 |
+
|
| 39 |
+
matrix: dict[str, dict[str, int]] = field(default_factory=dict)
|
| 40 |
+
"""Clé externe = char GT ; clé interne = char OCR ; valeur = count."""
|
| 41 |
+
|
| 42 |
+
total_substitutions: int = 0
|
| 43 |
+
total_insertions: int = 0
|
| 44 |
+
total_deletions: int = 0
|
| 45 |
+
|
| 46 |
+
@property
|
| 47 |
+
def total_errors(self) -> int:
|
| 48 |
+
return self.total_substitutions + self.total_insertions + self.total_deletions
|
| 49 |
+
|
| 50 |
+
def top_confusions(self, n: int = 20) -> list[dict]:
|
| 51 |
+
"""Retourne les n confusions les plus fréquentes (substitutions uniquement)."""
|
| 52 |
+
pairs: list[tuple[str, str, int]] = []
|
| 53 |
+
for gt_char, ocr_counts in self.matrix.items():
|
| 54 |
+
if gt_char == EMPTY_CHAR:
|
| 55 |
+
continue # insertions
|
| 56 |
+
for ocr_char, count in ocr_counts.items():
|
| 57 |
+
if ocr_char == EMPTY_CHAR:
|
| 58 |
+
continue # suppressions
|
| 59 |
+
if gt_char != ocr_char:
|
| 60 |
+
pairs.append((gt_char, ocr_char, count))
|
| 61 |
+
pairs.sort(key=lambda x: -x[2])
|
| 62 |
+
return [
|
| 63 |
+
{"gt": gt, "ocr": ocr, "count": cnt}
|
| 64 |
+
for gt, ocr, cnt in pairs[:n]
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
def as_compact_dict(self, min_count: int = 1) -> dict:
|
| 68 |
+
"""Sérialise la matrice en éliminant les entrées rares."""
|
| 69 |
+
compact: dict[str, dict[str, int]] = {}
|
| 70 |
+
for gt_char, ocr_counts in self.matrix.items():
|
| 71 |
+
filtered = {
|
| 72 |
+
oc: cnt for oc, cnt in ocr_counts.items()
|
| 73 |
+
if cnt >= min_count
|
| 74 |
+
}
|
| 75 |
+
if filtered:
|
| 76 |
+
compact[gt_char] = filtered
|
| 77 |
+
return {
|
| 78 |
+
"matrix": compact,
|
| 79 |
+
"total_substitutions": self.total_substitutions,
|
| 80 |
+
"total_insertions": self.total_insertions,
|
| 81 |
+
"total_deletions": self.total_deletions,
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
def as_dict(self) -> dict:
|
| 85 |
+
return self.as_compact_dict(min_count=1)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def build_confusion_matrix(
|
| 89 |
+
ground_truth: str,
|
| 90 |
+
hypothesis: str,
|
| 91 |
+
ignore_whitespace: bool = True,
|
| 92 |
+
ignore_correct: bool = True,
|
| 93 |
+
) -> ConfusionMatrix:
|
| 94 |
+
"""Construit la matrice de confusion unicode pour une paire GT/OCR.
|
| 95 |
+
|
| 96 |
+
Parameters
|
| 97 |
+
----------
|
| 98 |
+
ground_truth:
|
| 99 |
+
Texte de référence (vérité terrain).
|
| 100 |
+
hypothesis:
|
| 101 |
+
Texte produit par l'OCR.
|
| 102 |
+
ignore_whitespace:
|
| 103 |
+
Si True, ignore les espaces, tabulations et sauts de ligne.
|
| 104 |
+
ignore_correct:
|
| 105 |
+
Si True, n'enregistre pas les paires identiques (gt_char == ocr_char).
|
| 106 |
+
Par défaut True pour réduire la taille de la matrice.
|
| 107 |
+
|
| 108 |
+
Returns
|
| 109 |
+
-------
|
| 110 |
+
ConfusionMatrix
|
| 111 |
+
"""
|
| 112 |
+
matrix: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
| 113 |
+
n_subs = n_ins = n_dels = 0
|
| 114 |
+
|
| 115 |
+
if not ground_truth and not hypothesis:
|
| 116 |
+
return ConfusionMatrix(dict(matrix), 0, 0, 0)
|
| 117 |
+
|
| 118 |
+
# SequenceMatcher sur listes de chars pour un alignement précis
|
| 119 |
+
matcher = difflib.SequenceMatcher(None, ground_truth, hypothesis, autojunk=False)
|
| 120 |
+
|
| 121 |
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 122 |
+
if tag == "equal":
|
| 123 |
+
if not ignore_correct:
|
| 124 |
+
for ch in ground_truth[i1:i2]:
|
| 125 |
+
if ignore_whitespace and ch in _WHITESPACE:
|
| 126 |
+
continue
|
| 127 |
+
matrix[ch][ch] += 1
|
| 128 |
+
elif tag == "replace":
|
| 129 |
+
# Aligner char par char les séquences de longueurs différentes
|
| 130 |
+
gt_seg = ground_truth[i1:i2]
|
| 131 |
+
oc_seg = hypothesis[j1:j2]
|
| 132 |
+
_align_segments(gt_seg, oc_seg, matrix, ignore_whitespace)
|
| 133 |
+
# Comptabiliser grossièrement (alignement sous-optimal possible)
|
| 134 |
+
n_subs += max(len(gt_seg), len(oc_seg))
|
| 135 |
+
elif tag == "delete":
|
| 136 |
+
for ch in ground_truth[i1:i2]:
|
| 137 |
+
if ignore_whitespace and ch in _WHITESPACE:
|
| 138 |
+
continue
|
| 139 |
+
matrix[ch][EMPTY_CHAR] += 1
|
| 140 |
+
n_dels += 1
|
| 141 |
+
elif tag == "insert":
|
| 142 |
+
for ch in hypothesis[j1:j2]:
|
| 143 |
+
if ignore_whitespace and ch in _WHITESPACE:
|
| 144 |
+
continue
|
| 145 |
+
matrix[EMPTY_CHAR][ch] += 1
|
| 146 |
+
n_ins += 1
|
| 147 |
+
|
| 148 |
+
# Convertir defaultdict en dict normal
|
| 149 |
+
result_matrix: dict[str, dict[str, int]] = {
|
| 150 |
+
k: dict(v) for k, v in matrix.items()
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
return ConfusionMatrix(
|
| 154 |
+
matrix=result_matrix,
|
| 155 |
+
total_substitutions=n_subs,
|
| 156 |
+
total_insertions=n_ins,
|
| 157 |
+
total_deletions=n_dels,
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _align_segments(
|
| 162 |
+
gt_seg: str,
|
| 163 |
+
oc_seg: str,
|
| 164 |
+
matrix: dict,
|
| 165 |
+
ignore_whitespace: bool,
|
| 166 |
+
) -> None:
|
| 167 |
+
"""Aligne deux segments de longueurs potentiellement différentes."""
|
| 168 |
+
if not gt_seg:
|
| 169 |
+
for ch in oc_seg:
|
| 170 |
+
if ignore_whitespace and ch in _WHITESPACE:
|
| 171 |
+
continue
|
| 172 |
+
matrix[EMPTY_CHAR][ch] += 1
|
| 173 |
+
return
|
| 174 |
+
if not oc_seg:
|
| 175 |
+
for ch in gt_seg:
|
| 176 |
+
if ignore_whitespace and ch in _WHITESPACE:
|
| 177 |
+
continue
|
| 178 |
+
matrix[ch][EMPTY_CHAR] += 1
|
| 179 |
+
return
|
| 180 |
+
|
| 181 |
+
if len(gt_seg) == len(oc_seg):
|
| 182 |
+
# Substitutions 1-pour-1
|
| 183 |
+
for g, o in zip(gt_seg, oc_seg):
|
| 184 |
+
if ignore_whitespace and (g in _WHITESPACE or o in _WHITESPACE):
|
| 185 |
+
continue
|
| 186 |
+
matrix[g][o] += 1
|
| 187 |
+
else:
|
| 188 |
+
# Longueurs différentes : utiliser SequenceMatcher récursif sur segments courts
|
| 189 |
+
sub = difflib.SequenceMatcher(None, gt_seg, oc_seg, autojunk=False)
|
| 190 |
+
for tag2, i1, i2, j1, j2 in sub.get_opcodes():
|
| 191 |
+
if tag2 == "equal":
|
| 192 |
+
pass
|
| 193 |
+
elif tag2 == "replace":
|
| 194 |
+
# Régression simple : aligner par troncature
|
| 195 |
+
for g, o in zip(gt_seg[i1:i2], oc_seg[j1:j2]):
|
| 196 |
+
if ignore_whitespace and (g in _WHITESPACE or o in _WHITESPACE):
|
| 197 |
+
continue
|
| 198 |
+
matrix[g][o] += 1
|
| 199 |
+
elif tag2 == "delete":
|
| 200 |
+
for g in gt_seg[i1:i2]:
|
| 201 |
+
if ignore_whitespace and g in _WHITESPACE:
|
| 202 |
+
continue
|
| 203 |
+
matrix[g][EMPTY_CHAR] += 1
|
| 204 |
+
elif tag2 == "insert":
|
| 205 |
+
for o in oc_seg[j1:j2]:
|
| 206 |
+
if ignore_whitespace and o in _WHITESPACE:
|
| 207 |
+
continue
|
| 208 |
+
matrix[EMPTY_CHAR][o] += 1
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
def aggregate_confusion_matrices(matrices: list[ConfusionMatrix]) -> ConfusionMatrix:
|
| 212 |
+
"""Agrège plusieurs matrices de confusion en une seule.
|
| 213 |
+
|
| 214 |
+
Utile pour obtenir la matrice agrégée sur l'ensemble du corpus.
|
| 215 |
+
"""
|
| 216 |
+
combined: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
| 217 |
+
total_subs = total_ins = total_dels = 0
|
| 218 |
+
|
| 219 |
+
for cm in matrices:
|
| 220 |
+
for gt_char, ocr_counts in cm.matrix.items():
|
| 221 |
+
for ocr_char, count in ocr_counts.items():
|
| 222 |
+
combined[gt_char][ocr_char] += count
|
| 223 |
+
total_subs += cm.total_substitutions
|
| 224 |
+
total_ins += cm.total_insertions
|
| 225 |
+
total_dels += cm.total_deletions
|
| 226 |
+
|
| 227 |
+
return ConfusionMatrix(
|
| 228 |
+
matrix={k: dict(v) for k, v in combined.items()},
|
| 229 |
+
total_substitutions=total_subs,
|
| 230 |
+
total_insertions=total_ins,
|
| 231 |
+
total_deletions=total_dels,
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def top_confused_chars(
|
| 236 |
+
matrix: ConfusionMatrix,
|
| 237 |
+
n: int = 15,
|
| 238 |
+
exclude_empty: bool = True,
|
| 239 |
+
) -> list[dict]:
|
| 240 |
+
"""Retourne les caractères GT les plus souvent confondus.
|
| 241 |
+
|
| 242 |
+
Retourne une liste triée par nombre total d'erreurs décroissant :
|
| 243 |
+
``[{"char": "ſ", "total_errors": 47, "top_substitutes": [...]}, ...]``
|
| 244 |
+
"""
|
| 245 |
+
char_stats: dict[str, dict] = {}
|
| 246 |
+
for gt_char, ocr_counts in matrix.matrix.items():
|
| 247 |
+
if exclude_empty and gt_char == EMPTY_CHAR:
|
| 248 |
+
continue
|
| 249 |
+
error_count = sum(
|
| 250 |
+
cnt for oc, cnt in ocr_counts.items()
|
| 251 |
+
if (oc != gt_char) and (not exclude_empty or oc != EMPTY_CHAR or True)
|
| 252 |
+
)
|
| 253 |
+
if error_count > 0:
|
| 254 |
+
top_subs = sorted(
|
| 255 |
+
[{"ocr": oc, "count": cnt} for oc, cnt in ocr_counts.items() if oc != gt_char],
|
| 256 |
+
key=lambda x: -x["count"],
|
| 257 |
+
)[:5]
|
| 258 |
+
char_stats[gt_char] = {
|
| 259 |
+
"char": gt_char,
|
| 260 |
+
"total_errors": error_count,
|
| 261 |
+
"top_substitutes": top_subs,
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
return sorted(char_stats.values(), key=lambda x: -x["total_errors"])[:n]
|
|
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Analyse automatique de la qualité des images de documents numérisés.
|
| 2 |
+
|
| 3 |
+
Métriques
|
| 4 |
+
---------
|
| 5 |
+
- **Score de netteté** : variance du laplacien (plus élevé = plus net)
|
| 6 |
+
- **Niveau de bruit** : écart-type des résidus haute-fréquence
|
| 7 |
+
- **Angle de rotation résiduel** : estimé par projection horizontale
|
| 8 |
+
- **Score de contraste** : ratio Michelson entre zones sombres (encre) et claires (fond)
|
| 9 |
+
- **Score de qualité global** : combinaison normalisée des métriques ci-dessus
|
| 10 |
+
|
| 11 |
+
Ces calculs sont réalisés en pur Python + bibliothèques stdlib ou Pillow.
|
| 12 |
+
NumPy est utilisé si disponible (calculs plus rapides), mais les méthodes
|
| 13 |
+
de fallback n'en dépendent pas.
|
| 14 |
+
|
| 15 |
+
Note
|
| 16 |
+
----
|
| 17 |
+
Pour les images placeholder (fixtures), des valeurs fictives cohérentes
|
| 18 |
+
sont générées via `generate_mock_quality_scores()`.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
import math
|
| 24 |
+
import statistics
|
| 25 |
+
from dataclasses import dataclass
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
from typing import Optional
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass
|
| 31 |
+
class ImageQualityResult:
|
| 32 |
+
"""Métriques de qualité d'une image de document."""
|
| 33 |
+
|
| 34 |
+
sharpness_score: float = 0.0
|
| 35 |
+
"""Score de netteté [0, 1]. Basé sur la variance du laplacien normalisée."""
|
| 36 |
+
|
| 37 |
+
noise_level: float = 0.0
|
| 38 |
+
"""Niveau de bruit [0, 1]. 0 = pas de bruit, 1 = très bruité."""
|
| 39 |
+
|
| 40 |
+
rotation_degrees: float = 0.0
|
| 41 |
+
"""Angle de rotation résiduel estimé en degrés (positif = sens horaire)."""
|
| 42 |
+
|
| 43 |
+
contrast_score: float = 0.0
|
| 44 |
+
"""Score de contraste [0, 1]. Ratio Michelson encre/fond."""
|
| 45 |
+
|
| 46 |
+
quality_score: float = 0.0
|
| 47 |
+
"""Score de qualité global [0, 1]. Combinaison pondérée des autres métriques."""
|
| 48 |
+
|
| 49 |
+
analysis_method: str = "none"
|
| 50 |
+
"""Méthode d'analyse utilisée : 'pillow', 'numpy', 'mock'."""
|
| 51 |
+
|
| 52 |
+
error: Optional[str] = None
|
| 53 |
+
"""Erreur si l'analyse a échoué."""
|
| 54 |
+
|
| 55 |
+
@property
|
| 56 |
+
def is_good_quality(self) -> bool:
|
| 57 |
+
"""Vrai si le score de qualité global est ≥ 0.7."""
|
| 58 |
+
return self.quality_score >= 0.7
|
| 59 |
+
|
| 60 |
+
@property
|
| 61 |
+
def quality_tier(self) -> str:
|
| 62 |
+
"""Catégorie de qualité : 'good', 'medium', 'poor'."""
|
| 63 |
+
if self.quality_score >= 0.7:
|
| 64 |
+
return "good"
|
| 65 |
+
elif self.quality_score >= 0.4:
|
| 66 |
+
return "medium"
|
| 67 |
+
return "poor"
|
| 68 |
+
|
| 69 |
+
def as_dict(self) -> dict:
|
| 70 |
+
d = {
|
| 71 |
+
"sharpness_score": round(self.sharpness_score, 4),
|
| 72 |
+
"noise_level": round(self.noise_level, 4),
|
| 73 |
+
"rotation_degrees": round(self.rotation_degrees, 2),
|
| 74 |
+
"contrast_score": round(self.contrast_score, 4),
|
| 75 |
+
"quality_score": round(self.quality_score, 4),
|
| 76 |
+
"quality_tier": self.quality_tier,
|
| 77 |
+
"analysis_method": self.analysis_method,
|
| 78 |
+
}
|
| 79 |
+
if self.error:
|
| 80 |
+
d["error"] = self.error
|
| 81 |
+
return d
|
| 82 |
+
|
| 83 |
+
@classmethod
|
| 84 |
+
def from_dict(cls, data: dict) -> "ImageQualityResult":
|
| 85 |
+
return cls(
|
| 86 |
+
sharpness_score=data.get("sharpness_score", 0.0),
|
| 87 |
+
noise_level=data.get("noise_level", 0.0),
|
| 88 |
+
rotation_degrees=data.get("rotation_degrees", 0.0),
|
| 89 |
+
contrast_score=data.get("contrast_score", 0.0),
|
| 90 |
+
quality_score=data.get("quality_score", 0.0),
|
| 91 |
+
analysis_method=data.get("analysis_method", "none"),
|
| 92 |
+
error=data.get("error"),
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def analyze_image_quality(image_path: str | Path) -> ImageQualityResult:
|
| 97 |
+
"""Analyse la qualité d'une image de document numérisé.
|
| 98 |
+
|
| 99 |
+
Essaie successivement :
|
| 100 |
+
1. Pillow + NumPy (méthode complète)
|
| 101 |
+
2. Pillow seul (méthode simplifiée)
|
| 102 |
+
3. Fallback : retourne un résultat vide avec erreur
|
| 103 |
+
|
| 104 |
+
Parameters
|
| 105 |
+
----------
|
| 106 |
+
image_path:
|
| 107 |
+
Chemin vers l'image (JPG, PNG, TIFF…).
|
| 108 |
+
|
| 109 |
+
Returns
|
| 110 |
+
-------
|
| 111 |
+
ImageQualityResult
|
| 112 |
+
"""
|
| 113 |
+
path = Path(image_path)
|
| 114 |
+
if not path.exists():
|
| 115 |
+
return ImageQualityResult(
|
| 116 |
+
error=f"Fichier image introuvable : {image_path}",
|
| 117 |
+
analysis_method="none",
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Essai avec Pillow + NumPy
|
| 121 |
+
try:
|
| 122 |
+
import numpy as np
|
| 123 |
+
from PIL import Image
|
| 124 |
+
return _analyze_with_numpy(path, np, Image)
|
| 125 |
+
except ImportError:
|
| 126 |
+
pass
|
| 127 |
+
|
| 128 |
+
# Essai avec Pillow seul
|
| 129 |
+
try:
|
| 130 |
+
from PIL import Image
|
| 131 |
+
return _analyze_with_pillow(path, Image)
|
| 132 |
+
except ImportError:
|
| 133 |
+
pass
|
| 134 |
+
|
| 135 |
+
return ImageQualityResult(
|
| 136 |
+
error="Pillow non disponible (pip install Pillow)",
|
| 137 |
+
analysis_method="none",
|
| 138 |
+
quality_score=0.5, # valeur neutre
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def _analyze_with_numpy(path: Path, np, Image) -> ImageQualityResult:
|
| 143 |
+
"""Analyse complète avec NumPy."""
|
| 144 |
+
img = Image.open(path).convert("L") # niveaux de gris
|
| 145 |
+
arr = np.array(img, dtype=np.float32)
|
| 146 |
+
|
| 147 |
+
# 1. Netteté : variance du laplacien
|
| 148 |
+
laplacian = _laplacian_variance_numpy(arr, np)
|
| 149 |
+
# Normalisation empirique : variance > 500 = très net, < 50 = flou
|
| 150 |
+
sharpness = min(1.0, laplacian / 500.0)
|
| 151 |
+
|
| 152 |
+
# 2. Bruit : écart-type des résidus (différence image - image lissée)
|
| 153 |
+
noise = _noise_level_numpy(arr, np)
|
| 154 |
+
|
| 155 |
+
# 3. Rotation : angle d'inclinaison estimé
|
| 156 |
+
rotation = _estimate_rotation_numpy(arr, np)
|
| 157 |
+
|
| 158 |
+
# 4. Contraste : ratio Michelson
|
| 159 |
+
contrast = _contrast_score_numpy(arr, np)
|
| 160 |
+
|
| 161 |
+
# 5. Score global pondéré
|
| 162 |
+
quality = _global_quality_score(sharpness, noise, abs(rotation), contrast)
|
| 163 |
+
|
| 164 |
+
return ImageQualityResult(
|
| 165 |
+
sharpness_score=float(sharpness),
|
| 166 |
+
noise_level=float(noise),
|
| 167 |
+
rotation_degrees=float(rotation),
|
| 168 |
+
contrast_score=float(contrast),
|
| 169 |
+
quality_score=float(quality),
|
| 170 |
+
analysis_method="numpy",
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def _analyze_with_pillow(path: Path, Image) -> ImageQualityResult:
|
| 175 |
+
"""Analyse simplifiée avec Pillow seul (sans NumPy)."""
|
| 176 |
+
img = Image.open(path).convert("L")
|
| 177 |
+
pixels = list(img.getdata())
|
| 178 |
+
w, h = img.size
|
| 179 |
+
|
| 180 |
+
if not pixels:
|
| 181 |
+
return ImageQualityResult(quality_score=0.5, analysis_method="pillow")
|
| 182 |
+
|
| 183 |
+
# Contraste : étendue des valeurs
|
| 184 |
+
min_val = min(pixels)
|
| 185 |
+
max_val = max(pixels)
|
| 186 |
+
if max_val + min_val > 0:
|
| 187 |
+
contrast = (max_val - min_val) / (max_val + min_val)
|
| 188 |
+
else:
|
| 189 |
+
contrast = 0.0
|
| 190 |
+
|
| 191 |
+
# Netteté approximée : variance globale des pixels
|
| 192 |
+
mean_pix = statistics.mean(pixels)
|
| 193 |
+
try:
|
| 194 |
+
variance = statistics.variance(pixels)
|
| 195 |
+
except statistics.StatisticsError:
|
| 196 |
+
variance = 0.0
|
| 197 |
+
sharpness = min(1.0, math.sqrt(variance) / 128.0)
|
| 198 |
+
|
| 199 |
+
# Bruit : approximation grossière
|
| 200 |
+
noise = min(1.0, statistics.stdev(pixels[:min(1000, len(pixels))]) / 64.0) if len(pixels) > 1 else 0.0
|
| 201 |
+
|
| 202 |
+
quality = _global_quality_score(sharpness, noise, 0.0, contrast)
|
| 203 |
+
|
| 204 |
+
return ImageQualityResult(
|
| 205 |
+
sharpness_score=sharpness,
|
| 206 |
+
noise_level=noise,
|
| 207 |
+
rotation_degrees=0.0, # non calculé sans NumPy
|
| 208 |
+
contrast_score=contrast,
|
| 209 |
+
quality_score=quality,
|
| 210 |
+
analysis_method="pillow",
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _laplacian_variance_numpy(arr, np) -> float:
|
| 215 |
+
"""Calcule la variance du laplacien (mesure de netteté)."""
|
| 216 |
+
# Filtre laplacien 3x3
|
| 217 |
+
laplacian_kernel = np.array([
|
| 218 |
+
[0, 1, 0],
|
| 219 |
+
[1, -4, 1],
|
| 220 |
+
[0, 1, 0],
|
| 221 |
+
], dtype=np.float32)
|
| 222 |
+
|
| 223 |
+
# Convolution manuelle simplifiée (bordures ignorées)
|
| 224 |
+
h, w = arr.shape
|
| 225 |
+
if h < 3 or w < 3:
|
| 226 |
+
return float(np.var(arr))
|
| 227 |
+
|
| 228 |
+
# Utiliser une convolution rapide avec slicing
|
| 229 |
+
center = arr[1:-1, 1:-1]
|
| 230 |
+
top = arr[:-2, 1:-1]
|
| 231 |
+
bottom = arr[2:, 1:-1]
|
| 232 |
+
left = arr[1:-1, :-2]
|
| 233 |
+
right = arr[1:-1, 2:]
|
| 234 |
+
lap = top + bottom + left + right - 4 * center
|
| 235 |
+
|
| 236 |
+
return float(np.var(lap))
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def _noise_level_numpy(arr, np) -> float:
|
| 240 |
+
"""Estime le niveau de bruit par la MAD (Median Absolute Deviation) des gradients."""
|
| 241 |
+
h, w = arr.shape
|
| 242 |
+
if h < 2 or w < 2:
|
| 243 |
+
return 0.0
|
| 244 |
+
# Différences horizontales et verticales
|
| 245 |
+
diff_h = np.abs(arr[:, 1:] - arr[:, :-1])
|
| 246 |
+
diff_v = np.abs(arr[1:, :] - arr[:-1, :])
|
| 247 |
+
noise_std = float(np.median(np.concatenate([diff_h.ravel(), diff_v.ravel()])))
|
| 248 |
+
# Normaliser : 0 = pas de bruit, 1 = très bruité (seuil à ~30)
|
| 249 |
+
return min(1.0, noise_std / 30.0)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _estimate_rotation_numpy(arr, np) -> float:
|
| 253 |
+
"""Estime l'angle de rotation par projection horizontale simplifiée.
|
| 254 |
+
|
| 255 |
+
Retourne l'angle estimé en degrés [-45, 45].
|
| 256 |
+
"""
|
| 257 |
+
# Méthode simplifiée : analyse de la variance des projections à différents angles
|
| 258 |
+
# Limiter à quelques angles pour la performance
|
| 259 |
+
h, w = arr.shape
|
| 260 |
+
if h < 20 or w < 20:
|
| 261 |
+
return 0.0
|
| 262 |
+
|
| 263 |
+
# Sous-échantillonnage pour la performance
|
| 264 |
+
step = max(1, h // 100)
|
| 265 |
+
sample = arr[::step, :]
|
| 266 |
+
|
| 267 |
+
best_angle = 0.0
|
| 268 |
+
best_var = -1.0
|
| 269 |
+
|
| 270 |
+
for angle_deg in range(-5, 6): # ±5 degrés, pas de 1°
|
| 271 |
+
angle_rad = math.radians(angle_deg)
|
| 272 |
+
# Projection horizontale après rotation approximative
|
| 273 |
+
# (approximation linéaire rapide)
|
| 274 |
+
offsets = np.round(
|
| 275 |
+
np.arange(sample.shape[0]) * math.tan(angle_rad)
|
| 276 |
+
).astype(int)
|
| 277 |
+
offsets = np.clip(offsets, 0, w - 1)
|
| 278 |
+
|
| 279 |
+
# Variance des sommes de lignes décalées
|
| 280 |
+
try:
|
| 281 |
+
row_sums = np.array([
|
| 282 |
+
float(np.sum(sample[i, max(0, offsets[i]):min(w, offsets[i]+w)]))
|
| 283 |
+
for i in range(sample.shape[0])
|
| 284 |
+
])
|
| 285 |
+
var = float(np.var(row_sums))
|
| 286 |
+
if var > best_var:
|
| 287 |
+
best_var = var
|
| 288 |
+
best_angle = float(angle_deg)
|
| 289 |
+
except Exception:
|
| 290 |
+
pass
|
| 291 |
+
|
| 292 |
+
return best_angle
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def _contrast_score_numpy(arr, np) -> float:
|
| 296 |
+
"""Score de contraste Michelson [0, 1]."""
|
| 297 |
+
p5 = float(np.percentile(arr, 5)) # fond clair
|
| 298 |
+
p95 = float(np.percentile(arr, 95)) # encre sombre
|
| 299 |
+
if p5 + p95 == 0:
|
| 300 |
+
return 0.0
|
| 301 |
+
# Michelson : (Imax - Imin) / (Imax + Imin)
|
| 302 |
+
return float((p95 - p5) / (p95 + p5))
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def _global_quality_score(
|
| 306 |
+
sharpness: float,
|
| 307 |
+
noise: float,
|
| 308 |
+
rotation_abs: float,
|
| 309 |
+
contrast: float,
|
| 310 |
+
) -> float:
|
| 311 |
+
"""Calcule le score de qualité global pondéré."""
|
| 312 |
+
# Poids : netteté (40%), contraste (30%), bruit (20%), rotation (10%)
|
| 313 |
+
score = (
|
| 314 |
+
0.40 * sharpness
|
| 315 |
+
+ 0.30 * contrast
|
| 316 |
+
+ 0.20 * (1.0 - noise) # moins de bruit = mieux
|
| 317 |
+
+ 0.10 * max(0.0, 1.0 - rotation_abs / 10.0) # ±10° max
|
| 318 |
+
)
|
| 319 |
+
return round(min(1.0, max(0.0, score)), 4)
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
# ---------------------------------------------------------------------------
|
| 323 |
+
# Données fictives pour les fixtures de démo
|
| 324 |
+
# ---------------------------------------------------------------------------
|
| 325 |
+
|
| 326 |
+
def generate_mock_quality_scores(
|
| 327 |
+
doc_id: str,
|
| 328 |
+
seed: Optional[int] = None,
|
| 329 |
+
) -> ImageQualityResult:
|
| 330 |
+
"""Génère des métriques de qualité fictives mais cohérentes pour un document.
|
| 331 |
+
|
| 332 |
+
Utilisé par les fixtures de démo pour simuler une diversité réaliste
|
| 333 |
+
de qualités d'image (bonne, moyenne, dégradée).
|
| 334 |
+
|
| 335 |
+
Parameters
|
| 336 |
+
----------
|
| 337 |
+
doc_id:
|
| 338 |
+
Identifiant du document (utilisé pour la reproductibilité).
|
| 339 |
+
seed:
|
| 340 |
+
Graine aléatoire optionnelle.
|
| 341 |
+
"""
|
| 342 |
+
import random
|
| 343 |
+
rng = random.Random(seed or hash(doc_id) % 2**32)
|
| 344 |
+
|
| 345 |
+
# Générer une qualité cohérente : certains docs sont plus difficiles
|
| 346 |
+
# doc_id finissant par un chiffre impair → qualité variable
|
| 347 |
+
last_char = doc_id[-1] if doc_id else "0"
|
| 348 |
+
base_quality = 0.3 + rng.random() * 0.6 # 0.3 à 0.9
|
| 349 |
+
|
| 350 |
+
sharpness = max(0.1, min(1.0, base_quality + rng.gauss(0, 0.1)))
|
| 351 |
+
noise = max(0.0, min(1.0, (1.0 - base_quality) * 0.8 + rng.gauss(0, 0.05)))
|
| 352 |
+
rotation = rng.gauss(0, 1.5) # ±1.5° typique
|
| 353 |
+
contrast = max(0.2, min(1.0, base_quality + rng.gauss(0, 0.15)))
|
| 354 |
+
|
| 355 |
+
quality = _global_quality_score(sharpness, noise, abs(rotation), contrast)
|
| 356 |
+
|
| 357 |
+
return ImageQualityResult(
|
| 358 |
+
sharpness_score=round(sharpness, 4),
|
| 359 |
+
noise_level=round(noise, 4),
|
| 360 |
+
rotation_degrees=round(rotation, 2),
|
| 361 |
+
contrast_score=round(contrast, 4),
|
| 362 |
+
quality_score=round(quality, 4),
|
| 363 |
+
analysis_method="mock",
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def aggregate_image_quality(results: list[ImageQualityResult]) -> dict:
|
| 368 |
+
"""Agrège les métriques de qualité image sur un corpus."""
|
| 369 |
+
if not results:
|
| 370 |
+
return {}
|
| 371 |
+
|
| 372 |
+
valid = [r for r in results if r.error is None]
|
| 373 |
+
if not valid:
|
| 374 |
+
return {"error": "Aucune analyse réussie"}
|
| 375 |
+
|
| 376 |
+
def _mean(vals: list[float]) -> float:
|
| 377 |
+
return round(statistics.mean(vals), 4) if vals else 0.0
|
| 378 |
+
|
| 379 |
+
quality_scores = [r.quality_score for r in valid]
|
| 380 |
+
sharpness_scores = [r.sharpness_score for r in valid]
|
| 381 |
+
noise_levels = [r.noise_level for r in valid]
|
| 382 |
+
|
| 383 |
+
# Distribution par tier
|
| 384 |
+
tiers = {"good": 0, "medium": 0, "poor": 0}
|
| 385 |
+
for r in valid:
|
| 386 |
+
tiers[r.quality_tier] += 1
|
| 387 |
+
|
| 388 |
+
return {
|
| 389 |
+
"mean_quality_score": _mean(quality_scores),
|
| 390 |
+
"mean_sharpness": _mean(sharpness_scores),
|
| 391 |
+
"mean_noise_level": _mean(noise_levels),
|
| 392 |
+
"quality_distribution": tiers,
|
| 393 |
+
"document_count": len(valid),
|
| 394 |
+
"scores": [r.quality_score for r in valid], # pour scatter plot
|
| 395 |
+
}
|
|
@@ -35,6 +35,17 @@ class DocumentResult:
|
|
| 35 |
"""Sortie OCR brute avant correction LLM (None pour les moteurs OCR seuls)."""
|
| 36 |
pipeline_metadata: dict = field(default_factory=dict)
|
| 37 |
"""Métadonnées du pipeline : mode, prompt, over-normalization…"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
def as_dict(self) -> dict:
|
| 40 |
d = {
|
|
@@ -50,6 +61,16 @@ class DocumentResult:
|
|
| 50 |
d["ocr_intermediate"] = self.ocr_intermediate
|
| 51 |
if self.pipeline_metadata:
|
| 52 |
d["pipeline_metadata"] = self.pipeline_metadata
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
return d
|
| 54 |
|
| 55 |
|
|
@@ -67,6 +88,17 @@ class EngineReport:
|
|
| 67 |
Clés typiques : mode, prompt_file, llm_model, llm_provider, pipeline_steps,
|
| 68 |
over_normalization (score agrégé, classe 10 de la taxonomie).
|
| 69 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
def __post_init__(self) -> None:
|
| 72 |
if not self.aggregated_metrics and self.document_results:
|
|
@@ -84,6 +116,20 @@ class EngineReport:
|
|
| 84 |
wer_stats = self.aggregated_metrics.get("wer", {})
|
| 85 |
return wer_stats.get("mean")
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
@property
|
| 88 |
def is_pipeline(self) -> bool:
|
| 89 |
"""Vrai si ce rapport correspond à un pipeline OCR+LLM."""
|
|
@@ -99,6 +145,16 @@ class EngineReport:
|
|
| 99 |
}
|
| 100 |
if self.pipeline_info:
|
| 101 |
d["pipeline_info"] = self.pipeline_info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
return d
|
| 103 |
|
| 104 |
|
|
|
|
| 35 |
"""Sortie OCR brute avant correction LLM (None pour les moteurs OCR seuls)."""
|
| 36 |
pipeline_metadata: dict = field(default_factory=dict)
|
| 37 |
"""Métadonnées du pipeline : mode, prompt, over-normalization…"""
|
| 38 |
+
# Champs Sprint 5 — métriques avancées patrimoniales
|
| 39 |
+
confusion_matrix: Optional[dict] = None
|
| 40 |
+
"""Matrice de confusion unicode sérialisée."""
|
| 41 |
+
char_scores: Optional[dict] = None
|
| 42 |
+
"""Scores ligatures et diacritiques."""
|
| 43 |
+
taxonomy: Optional[dict] = None
|
| 44 |
+
"""Classification taxonomique des erreurs (classes 1-9)."""
|
| 45 |
+
structure: Optional[dict] = None
|
| 46 |
+
"""Analyse structurelle (segmentation lignes, ordre lecture)."""
|
| 47 |
+
image_quality: Optional[dict] = None
|
| 48 |
+
"""Métriques de qualité image."""
|
| 49 |
|
| 50 |
def as_dict(self) -> dict:
|
| 51 |
d = {
|
|
|
|
| 61 |
d["ocr_intermediate"] = self.ocr_intermediate
|
| 62 |
if self.pipeline_metadata:
|
| 63 |
d["pipeline_metadata"] = self.pipeline_metadata
|
| 64 |
+
if self.confusion_matrix is not None:
|
| 65 |
+
d["confusion_matrix"] = self.confusion_matrix
|
| 66 |
+
if self.char_scores is not None:
|
| 67 |
+
d["char_scores"] = self.char_scores
|
| 68 |
+
if self.taxonomy is not None:
|
| 69 |
+
d["taxonomy"] = self.taxonomy
|
| 70 |
+
if self.structure is not None:
|
| 71 |
+
d["structure"] = self.structure
|
| 72 |
+
if self.image_quality is not None:
|
| 73 |
+
d["image_quality"] = self.image_quality
|
| 74 |
return d
|
| 75 |
|
| 76 |
|
|
|
|
| 88 |
Clés typiques : mode, prompt_file, llm_model, llm_provider, pipeline_steps,
|
| 89 |
over_normalization (score agrégé, classe 10 de la taxonomie).
|
| 90 |
"""
|
| 91 |
+
# Métriques agrégées Sprint 5
|
| 92 |
+
aggregated_confusion: Optional[dict] = None
|
| 93 |
+
"""Matrice de confusion unicode agrégée sur le corpus."""
|
| 94 |
+
aggregated_char_scores: Optional[dict] = None
|
| 95 |
+
"""Scores ligatures/diacritiques agrégés."""
|
| 96 |
+
aggregated_taxonomy: Optional[dict] = None
|
| 97 |
+
"""Distribution taxonomique des erreurs agrégée."""
|
| 98 |
+
aggregated_structure: Optional[dict] = None
|
| 99 |
+
"""Métriques structurelles agrégées."""
|
| 100 |
+
aggregated_image_quality: Optional[dict] = None
|
| 101 |
+
"""Métriques de qualité image agrégées."""
|
| 102 |
|
| 103 |
def __post_init__(self) -> None:
|
| 104 |
if not self.aggregated_metrics and self.document_results:
|
|
|
|
| 116 |
wer_stats = self.aggregated_metrics.get("wer", {})
|
| 117 |
return wer_stats.get("mean")
|
| 118 |
|
| 119 |
+
@property
|
| 120 |
+
def ligature_score(self) -> Optional[float]:
|
| 121 |
+
"""Score de ligatures agrégé (None si non calculé)."""
|
| 122 |
+
if self.aggregated_char_scores:
|
| 123 |
+
return self.aggregated_char_scores.get("ligature", {}).get("score")
|
| 124 |
+
return None
|
| 125 |
+
|
| 126 |
+
@property
|
| 127 |
+
def diacritic_score(self) -> Optional[float]:
|
| 128 |
+
"""Score diacritique agrégé (None si non calculé)."""
|
| 129 |
+
if self.aggregated_char_scores:
|
| 130 |
+
return self.aggregated_char_scores.get("diacritic", {}).get("score")
|
| 131 |
+
return None
|
| 132 |
+
|
| 133 |
@property
|
| 134 |
def is_pipeline(self) -> bool:
|
| 135 |
"""Vrai si ce rapport correspond à un pipeline OCR+LLM."""
|
|
|
|
| 145 |
}
|
| 146 |
if self.pipeline_info:
|
| 147 |
d["pipeline_info"] = self.pipeline_info
|
| 148 |
+
if self.aggregated_confusion is not None:
|
| 149 |
+
d["aggregated_confusion"] = self.aggregated_confusion
|
| 150 |
+
if self.aggregated_char_scores is not None:
|
| 151 |
+
d["aggregated_char_scores"] = self.aggregated_char_scores
|
| 152 |
+
if self.aggregated_taxonomy is not None:
|
| 153 |
+
d["aggregated_taxonomy"] = self.aggregated_taxonomy
|
| 154 |
+
if self.aggregated_structure is not None:
|
| 155 |
+
d["aggregated_structure"] = self.aggregated_structure
|
| 156 |
+
if self.aggregated_image_quality is not None:
|
| 157 |
+
d["aggregated_image_quality"] = self.aggregated_image_quality
|
| 158 |
return d
|
| 159 |
|
| 160 |
|
|
@@ -97,6 +97,57 @@ def run_benchmark(
|
|
| 97 |
)
|
| 98 |
pipeline_meta["over_normalization"] = over_norm.as_dict()
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
document_results.append(
|
| 101 |
DocumentResult(
|
| 102 |
doc_id=doc.doc_id,
|
|
@@ -108,18 +159,35 @@ def run_benchmark(
|
|
| 108 |
engine_error=ocr_result.error,
|
| 109 |
ocr_intermediate=ocr_intermediate,
|
| 110 |
pipeline_metadata=pipeline_meta,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
)
|
| 112 |
)
|
| 113 |
|
| 114 |
engine_version = engine._safe_version()
|
| 115 |
pipeline_info = _build_pipeline_info(engine, document_results)
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
report = EngineReport(
|
| 118 |
engine_name=engine.name,
|
| 119 |
engine_version=engine_version,
|
| 120 |
engine_config=engine.config,
|
| 121 |
document_results=document_results,
|
| 122 |
pipeline_info=pipeline_info,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
)
|
| 124 |
engine_reports.append(report)
|
| 125 |
logger.info(
|
|
@@ -184,3 +252,99 @@ def _build_pipeline_info(engine: BaseOCREngine, doc_results: list[DocumentResult
|
|
| 184 |
}
|
| 185 |
|
| 186 |
return info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
)
|
| 98 |
pipeline_meta["over_normalization"] = over_norm.as_dict()
|
| 99 |
|
| 100 |
+
# Sprint 5 : métriques avancées patrimoniales
|
| 101 |
+
confusion_data = None
|
| 102 |
+
char_scores_data = None
|
| 103 |
+
taxonomy_data = None
|
| 104 |
+
structure_data = None
|
| 105 |
+
image_quality_data = None
|
| 106 |
+
|
| 107 |
+
if ocr_result.success:
|
| 108 |
+
try:
|
| 109 |
+
from picarones.core.confusion import build_confusion_matrix
|
| 110 |
+
cm = build_confusion_matrix(doc.ground_truth, ocr_result.text)
|
| 111 |
+
confusion_data = cm.as_dict()
|
| 112 |
+
except Exception:
|
| 113 |
+
pass
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
from picarones.core.char_scores import (
|
| 117 |
+
compute_ligature_score, compute_diacritic_score
|
| 118 |
+
)
|
| 119 |
+
lig = compute_ligature_score(doc.ground_truth, ocr_result.text)
|
| 120 |
+
diac = compute_diacritic_score(doc.ground_truth, ocr_result.text)
|
| 121 |
+
char_scores_data = {
|
| 122 |
+
"ligature": lig.as_dict(),
|
| 123 |
+
"diacritic": diac.as_dict(),
|
| 124 |
+
}
|
| 125 |
+
except Exception:
|
| 126 |
+
pass
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
from picarones.core.taxonomy import classify_errors
|
| 130 |
+
tax = classify_errors(doc.ground_truth, ocr_result.text)
|
| 131 |
+
taxonomy_data = tax.as_dict()
|
| 132 |
+
except Exception:
|
| 133 |
+
pass
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
from picarones.core.structure import analyze_structure
|
| 137 |
+
struct = analyze_structure(doc.ground_truth, ocr_result.text)
|
| 138 |
+
structure_data = struct.as_dict()
|
| 139 |
+
except Exception:
|
| 140 |
+
pass
|
| 141 |
+
|
| 142 |
+
# Qualité image (indépendant du succès OCR)
|
| 143 |
+
try:
|
| 144 |
+
from picarones.core.image_quality import analyze_image_quality
|
| 145 |
+
iq = analyze_image_quality(doc.image_path)
|
| 146 |
+
if iq.error is None:
|
| 147 |
+
image_quality_data = iq.as_dict()
|
| 148 |
+
except Exception:
|
| 149 |
+
pass
|
| 150 |
+
|
| 151 |
document_results.append(
|
| 152 |
DocumentResult(
|
| 153 |
doc_id=doc.doc_id,
|
|
|
|
| 159 |
engine_error=ocr_result.error,
|
| 160 |
ocr_intermediate=ocr_intermediate,
|
| 161 |
pipeline_metadata=pipeline_meta,
|
| 162 |
+
confusion_matrix=confusion_data,
|
| 163 |
+
char_scores=char_scores_data,
|
| 164 |
+
taxonomy=taxonomy_data,
|
| 165 |
+
structure=structure_data,
|
| 166 |
+
image_quality=image_quality_data,
|
| 167 |
)
|
| 168 |
)
|
| 169 |
|
| 170 |
engine_version = engine._safe_version()
|
| 171 |
pipeline_info = _build_pipeline_info(engine, document_results)
|
| 172 |
|
| 173 |
+
# Agrégation Sprint 5
|
| 174 |
+
agg_confusion = _aggregate_confusion(document_results)
|
| 175 |
+
agg_char_scores = _aggregate_char_scores(document_results)
|
| 176 |
+
agg_taxonomy = _aggregate_taxonomy(document_results)
|
| 177 |
+
agg_structure = _aggregate_structure(document_results)
|
| 178 |
+
agg_image_quality = _aggregate_image_quality(document_results)
|
| 179 |
+
|
| 180 |
report = EngineReport(
|
| 181 |
engine_name=engine.name,
|
| 182 |
engine_version=engine_version,
|
| 183 |
engine_config=engine.config,
|
| 184 |
document_results=document_results,
|
| 185 |
pipeline_info=pipeline_info,
|
| 186 |
+
aggregated_confusion=agg_confusion,
|
| 187 |
+
aggregated_char_scores=agg_char_scores,
|
| 188 |
+
aggregated_taxonomy=agg_taxonomy,
|
| 189 |
+
aggregated_structure=agg_structure,
|
| 190 |
+
aggregated_image_quality=agg_image_quality,
|
| 191 |
)
|
| 192 |
engine_reports.append(report)
|
| 193 |
logger.info(
|
|
|
|
| 252 |
}
|
| 253 |
|
| 254 |
return info
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
# ---------------------------------------------------------------------------
|
| 258 |
+
# Helpers d'agrégation Sprint 5
|
| 259 |
+
# ---------------------------------------------------------------------------
|
| 260 |
+
|
| 261 |
+
def _aggregate_confusion(doc_results: list) -> Optional[dict]:
|
| 262 |
+
"""Agrège les matrices de confusion unicode sur tous les documents."""
|
| 263 |
+
try:
|
| 264 |
+
from picarones.core.confusion import aggregate_confusion_matrices, ConfusionMatrix
|
| 265 |
+
matrices = [
|
| 266 |
+
ConfusionMatrix(**dr.confusion_matrix)
|
| 267 |
+
for dr in doc_results
|
| 268 |
+
if dr.confusion_matrix is not None
|
| 269 |
+
]
|
| 270 |
+
if not matrices:
|
| 271 |
+
return None
|
| 272 |
+
agg = aggregate_confusion_matrices(matrices)
|
| 273 |
+
return agg.as_compact_dict(min_count=2)
|
| 274 |
+
except Exception:
|
| 275 |
+
return None
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def _aggregate_char_scores(doc_results: list) -> Optional[dict]:
|
| 279 |
+
"""Agrège les scores ligatures/diacritiques."""
|
| 280 |
+
try:
|
| 281 |
+
from picarones.core.char_scores import (
|
| 282 |
+
aggregate_ligature_scores, aggregate_diacritic_scores,
|
| 283 |
+
LigatureScore, DiacriticScore,
|
| 284 |
+
)
|
| 285 |
+
lig_scores = [
|
| 286 |
+
LigatureScore(**dr.char_scores["ligature"])
|
| 287 |
+
for dr in doc_results
|
| 288 |
+
if dr.char_scores is not None
|
| 289 |
+
]
|
| 290 |
+
diac_scores = [
|
| 291 |
+
DiacriticScore(**dr.char_scores["diacritic"])
|
| 292 |
+
for dr in doc_results
|
| 293 |
+
if dr.char_scores is not None
|
| 294 |
+
]
|
| 295 |
+
if not lig_scores:
|
| 296 |
+
return None
|
| 297 |
+
return {
|
| 298 |
+
"ligature": aggregate_ligature_scores(lig_scores),
|
| 299 |
+
"diacritic": aggregate_diacritic_scores(diac_scores),
|
| 300 |
+
}
|
| 301 |
+
except Exception:
|
| 302 |
+
return None
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def _aggregate_taxonomy(doc_results: list) -> Optional[dict]:
|
| 306 |
+
"""Agrège les classifications taxonomiques."""
|
| 307 |
+
try:
|
| 308 |
+
from picarones.core.taxonomy import aggregate_taxonomy, TaxonomyResult
|
| 309 |
+
results = [
|
| 310 |
+
TaxonomyResult.from_dict(dr.taxonomy)
|
| 311 |
+
for dr in doc_results
|
| 312 |
+
if dr.taxonomy is not None
|
| 313 |
+
]
|
| 314 |
+
if not results:
|
| 315 |
+
return None
|
| 316 |
+
return aggregate_taxonomy(results)
|
| 317 |
+
except Exception:
|
| 318 |
+
return None
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def _aggregate_structure(doc_results: list) -> Optional[dict]:
|
| 322 |
+
"""Agrège les métriques structurelles."""
|
| 323 |
+
try:
|
| 324 |
+
from picarones.core.structure import aggregate_structure, StructureResult
|
| 325 |
+
results = [
|
| 326 |
+
StructureResult.from_dict(dr.structure)
|
| 327 |
+
for dr in doc_results
|
| 328 |
+
if dr.structure is not None
|
| 329 |
+
]
|
| 330 |
+
if not results:
|
| 331 |
+
return None
|
| 332 |
+
return aggregate_structure(results)
|
| 333 |
+
except Exception:
|
| 334 |
+
return None
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def _aggregate_image_quality(doc_results: list) -> Optional[dict]:
|
| 338 |
+
"""Agrège les métriques de qualité image."""
|
| 339 |
+
try:
|
| 340 |
+
from picarones.core.image_quality import aggregate_image_quality, ImageQualityResult
|
| 341 |
+
results = [
|
| 342 |
+
ImageQualityResult.from_dict(dr.image_quality)
|
| 343 |
+
for dr in doc_results
|
| 344 |
+
if dr.image_quality is not None
|
| 345 |
+
]
|
| 346 |
+
if not results:
|
| 347 |
+
return None
|
| 348 |
+
return aggregate_image_quality(results)
|
| 349 |
+
except Exception:
|
| 350 |
+
return None
|
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Analyse structurelle des résultats OCR.
|
| 2 |
+
|
| 3 |
+
Mesures
|
| 4 |
+
-------
|
| 5 |
+
- **Taux de fusion de lignes** : l'OCR produit moins de lignes que le GT
|
| 6 |
+
(plusieurs lignes GT fusionnées en une seule).
|
| 7 |
+
- **Taux de fragmentation** : l'OCR produit plus de lignes que le GT
|
| 8 |
+
(une ligne GT découpée en plusieurs).
|
| 9 |
+
- **Score d'ordre de lecture** : corrélation entre l'ordre des mots GT et OCR,
|
| 10 |
+
approximé par la longueur de la sous-séquence commune la plus longue (LCS).
|
| 11 |
+
- **Taux de conservation des paragraphes** : respect des sauts de paragraphe.
|
| 12 |
+
|
| 13 |
+
Ces métriques sont calculées indépendamment du contenu textuel — elles mesurent
|
| 14 |
+
la fidélité de la mise en page, pas la qualité des caractères.
|
| 15 |
+
|
| 16 |
+
Note : sans bounding boxes disponibles, l'analyse se base uniquement sur les
|
| 17 |
+
sauts de ligne présents dans les textes GT et OCR.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import difflib
|
| 23 |
+
from dataclasses import dataclass
|
| 24 |
+
from typing import Optional
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class StructureResult:
|
| 29 |
+
"""Résultat de l'analyse structurelle pour un document."""
|
| 30 |
+
|
| 31 |
+
gt_line_count: int = 0
|
| 32 |
+
"""Nombre de lignes dans le GT."""
|
| 33 |
+
ocr_line_count: int = 0
|
| 34 |
+
"""Nombre de lignes dans l'OCR."""
|
| 35 |
+
|
| 36 |
+
line_fusion_count: int = 0
|
| 37 |
+
"""Nombre de fusions de lignes (GT lignes absorbées)."""
|
| 38 |
+
line_fragmentation_count: int = 0
|
| 39 |
+
"""Nombre de fragmentations (GT lignes splittées)."""
|
| 40 |
+
|
| 41 |
+
reading_order_score: float = 1.0
|
| 42 |
+
"""Score d'ordre de lecture [0, 1]. 1 = ordre parfait."""
|
| 43 |
+
|
| 44 |
+
paragraph_conservation_score: float = 1.0
|
| 45 |
+
"""Score de conservation des paragraphes [0, 1]."""
|
| 46 |
+
|
| 47 |
+
@property
|
| 48 |
+
def line_fusion_rate(self) -> float:
|
| 49 |
+
"""Taux de fusion = fusions / lignes GT."""
|
| 50 |
+
return self.line_fusion_count / self.gt_line_count if self.gt_line_count > 0 else 0.0
|
| 51 |
+
|
| 52 |
+
@property
|
| 53 |
+
def line_fragmentation_rate(self) -> float:
|
| 54 |
+
"""Taux de fragmentation = fragmentations / lignes GT."""
|
| 55 |
+
return self.line_fragmentation_count / self.gt_line_count if self.gt_line_count > 0 else 0.0
|
| 56 |
+
|
| 57 |
+
@property
|
| 58 |
+
def line_accuracy(self) -> float:
|
| 59 |
+
"""Exactitude du nombre de lignes : 1 - |delta| / max(gt, ocr)."""
|
| 60 |
+
if self.gt_line_count == 0 and self.ocr_line_count == 0:
|
| 61 |
+
return 1.0
|
| 62 |
+
max_lines = max(self.gt_line_count, self.ocr_line_count)
|
| 63 |
+
delta = abs(self.gt_line_count - self.ocr_line_count)
|
| 64 |
+
return max(0.0, 1.0 - delta / max_lines)
|
| 65 |
+
|
| 66 |
+
def as_dict(self) -> dict:
|
| 67 |
+
return {
|
| 68 |
+
"gt_line_count": self.gt_line_count,
|
| 69 |
+
"ocr_line_count": self.ocr_line_count,
|
| 70 |
+
"line_fusion_count": self.line_fusion_count,
|
| 71 |
+
"line_fragmentation_count": self.line_fragmentation_count,
|
| 72 |
+
"line_fusion_rate": round(self.line_fusion_rate, 4),
|
| 73 |
+
"line_fragmentation_rate": round(self.line_fragmentation_rate, 4),
|
| 74 |
+
"line_accuracy": round(self.line_accuracy, 4),
|
| 75 |
+
"reading_order_score": round(self.reading_order_score, 4),
|
| 76 |
+
"paragraph_conservation_score": round(self.paragraph_conservation_score, 4),
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
@classmethod
|
| 80 |
+
def from_dict(cls, data: dict) -> "StructureResult":
|
| 81 |
+
return cls(
|
| 82 |
+
gt_line_count=data.get("gt_line_count", 0),
|
| 83 |
+
ocr_line_count=data.get("ocr_line_count", 0),
|
| 84 |
+
line_fusion_count=data.get("line_fusion_count", 0),
|
| 85 |
+
line_fragmentation_count=data.get("line_fragmentation_count", 0),
|
| 86 |
+
reading_order_score=data.get("reading_order_score", 1.0),
|
| 87 |
+
paragraph_conservation_score=data.get("paragraph_conservation_score", 1.0),
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def analyze_structure(ground_truth: str, hypothesis: str) -> StructureResult:
|
| 92 |
+
"""Analyse la structure d'un document OCR comparée au GT.
|
| 93 |
+
|
| 94 |
+
Parameters
|
| 95 |
+
----------
|
| 96 |
+
ground_truth:
|
| 97 |
+
Texte de référence (vérité terrain), avec sauts de ligne.
|
| 98 |
+
hypothesis:
|
| 99 |
+
Texte produit par l'OCR, avec sauts de ligne.
|
| 100 |
+
|
| 101 |
+
Returns
|
| 102 |
+
-------
|
| 103 |
+
StructureResult
|
| 104 |
+
"""
|
| 105 |
+
gt_lines = [l for l in ground_truth.splitlines() if l.strip()]
|
| 106 |
+
ocr_lines = [l for l in hypothesis.splitlines() if l.strip()]
|
| 107 |
+
|
| 108 |
+
n_gt = len(gt_lines)
|
| 109 |
+
n_ocr = len(ocr_lines)
|
| 110 |
+
|
| 111 |
+
# Fusions et fragmentations
|
| 112 |
+
fusion_count, frag_count = _count_line_changes(gt_lines, ocr_lines)
|
| 113 |
+
|
| 114 |
+
# Score d'ordre de lecture via LCS sur les mots
|
| 115 |
+
reading_order = _reading_order_score(ground_truth, hypothesis)
|
| 116 |
+
|
| 117 |
+
# Score de conservation des paragraphes (sauts de ligne vides = paragraphes)
|
| 118 |
+
para_score = _paragraph_conservation_score(ground_truth, hypothesis)
|
| 119 |
+
|
| 120 |
+
return StructureResult(
|
| 121 |
+
gt_line_count=n_gt,
|
| 122 |
+
ocr_line_count=n_ocr,
|
| 123 |
+
line_fusion_count=fusion_count,
|
| 124 |
+
line_fragmentation_count=frag_count,
|
| 125 |
+
reading_order_score=reading_order,
|
| 126 |
+
paragraph_conservation_score=para_score,
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def _count_line_changes(gt_lines: list[str], ocr_lines: list[str]) -> tuple[int, int]:
|
| 131 |
+
"""Compte les fusions et fragmentations de lignes via SequenceMatcher."""
|
| 132 |
+
if not gt_lines or not ocr_lines:
|
| 133 |
+
return 0, 0
|
| 134 |
+
|
| 135 |
+
fusion_count = 0
|
| 136 |
+
frag_count = 0
|
| 137 |
+
|
| 138 |
+
# Aligner les lignes par contenu
|
| 139 |
+
matcher = difflib.SequenceMatcher(
|
| 140 |
+
None,
|
| 141 |
+
[l.strip()[:30] for l in gt_lines], # fingerprint court pour la comparaison
|
| 142 |
+
[l.strip()[:30] for l in ocr_lines],
|
| 143 |
+
autojunk=False,
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 147 |
+
if tag == "replace":
|
| 148 |
+
gt_len = i2 - i1
|
| 149 |
+
ocr_len = j2 - j1
|
| 150 |
+
if ocr_len < gt_len:
|
| 151 |
+
# Moins de lignes OCR → fusions
|
| 152 |
+
fusion_count += gt_len - ocr_len
|
| 153 |
+
elif ocr_len > gt_len:
|
| 154 |
+
# Plus de lignes OCR → fragmentations
|
| 155 |
+
frag_count += ocr_len - gt_len
|
| 156 |
+
elif tag == "delete":
|
| 157 |
+
# Lignes GT supprimées dans l'OCR → lacunes (pas fusion/frag)
|
| 158 |
+
pass
|
| 159 |
+
elif tag == "insert":
|
| 160 |
+
# Lignes insérées par l'OCR
|
| 161 |
+
frag_count += j2 - j1
|
| 162 |
+
|
| 163 |
+
return fusion_count, frag_count
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def _reading_order_score(ground_truth: str, hypothesis: str) -> float:
|
| 167 |
+
"""Score d'ordre de lecture [0, 1] basé sur la LCS des mots.
|
| 168 |
+
|
| 169 |
+
On calcule la longueur de la sous-séquence commune la plus longue (LCS)
|
| 170 |
+
entre les listes de mots GT et OCR. Un score de 1 signifie que tous les
|
| 171 |
+
mots communs apparaissent dans le même ordre.
|
| 172 |
+
"""
|
| 173 |
+
gt_words = ground_truth.split()
|
| 174 |
+
hyp_words = hypothesis.split()
|
| 175 |
+
|
| 176 |
+
if not gt_words or not hyp_words:
|
| 177 |
+
return 1.0
|
| 178 |
+
|
| 179 |
+
# Utiliser SequenceMatcher pour approximer la LCS
|
| 180 |
+
matcher = difflib.SequenceMatcher(None, gt_words, hyp_words, autojunk=False)
|
| 181 |
+
# Ratio est 2 * nb_correspondances / (len_gt + len_ocr)
|
| 182 |
+
# C'est un proxy raisonnable de l'ordre de lecture
|
| 183 |
+
ratio = matcher.ratio()
|
| 184 |
+
return round(ratio, 4)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def _paragraph_conservation_score(ground_truth: str, hypothesis: str) -> float:
|
| 188 |
+
"""Score de conservation des paragraphes [0, 1].
|
| 189 |
+
|
| 190 |
+
Compte les sauts de paragraphe (lignes vides) dans le GT et mesure
|
| 191 |
+
le taux de conservation dans l'OCR.
|
| 192 |
+
"""
|
| 193 |
+
# Un saut de paragraphe = deux sauts de ligne consécutifs
|
| 194 |
+
gt_paras = [p for p in ground_truth.split("\n\n") if p.strip()]
|
| 195 |
+
ocr_paras = [p for p in hypothesis.split("\n\n") if p.strip()]
|
| 196 |
+
|
| 197 |
+
n_gt_paras = len(gt_paras)
|
| 198 |
+
if n_gt_paras <= 1:
|
| 199 |
+
return 1.0 # pas de paragraphe distinct → score parfait
|
| 200 |
+
|
| 201 |
+
n_ocr_paras = len(ocr_paras)
|
| 202 |
+
delta = abs(n_gt_paras - n_ocr_paras)
|
| 203 |
+
score = max(0.0, 1.0 - delta / n_gt_paras)
|
| 204 |
+
return round(score, 4)
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def aggregate_structure(results: list[StructureResult]) -> dict:
|
| 208 |
+
"""Agrège les résultats structurels sur un corpus."""
|
| 209 |
+
if not results:
|
| 210 |
+
return {}
|
| 211 |
+
|
| 212 |
+
import statistics
|
| 213 |
+
|
| 214 |
+
def _mean(values: list[float]) -> float:
|
| 215 |
+
return round(statistics.mean(values), 4) if values else 0.0
|
| 216 |
+
|
| 217 |
+
fusion_rates = [r.line_fusion_rate for r in results]
|
| 218 |
+
frag_rates = [r.line_fragmentation_rate for r in results]
|
| 219 |
+
reading_scores = [r.reading_order_score for r in results]
|
| 220 |
+
para_scores = [r.paragraph_conservation_score for r in results]
|
| 221 |
+
line_accuracies = [r.line_accuracy for r in results]
|
| 222 |
+
|
| 223 |
+
return {
|
| 224 |
+
"mean_line_fusion_rate": _mean(fusion_rates),
|
| 225 |
+
"mean_line_fragmentation_rate": _mean(frag_rates),
|
| 226 |
+
"mean_reading_order_score": _mean(reading_scores),
|
| 227 |
+
"mean_paragraph_conservation": _mean(para_scores),
|
| 228 |
+
"mean_line_accuracy": _mean(line_accuracies),
|
| 229 |
+
"document_count": len(results),
|
| 230 |
+
}
|
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Taxonomie des erreurs OCR — classification automatique (classes 1 à 9).
|
| 2 |
+
|
| 3 |
+
Chaque erreur identifiée par l'alignement GT↔OCR est catégorisée selon
|
| 4 |
+
la taxonomie Picarones :
|
| 5 |
+
|
| 6 |
+
| Classe | Nom | Description |
|
| 7 |
+
|--------|-------------------|----------------------------------------------------|
|
| 8 |
+
| 1 | visual_confusion | Confusion morphologique (rn/m, l/1, O/0, u/n…) |
|
| 9 |
+
| 2 | diacritic_error | Diacritique absent, incorrect ou ajouté |
|
| 10 |
+
| 3 | case_error | Erreur de casse uniquement (A/a) |
|
| 11 |
+
| 4 | ligature_error | Ligature non résolue ou mal résolue |
|
| 12 |
+
| 5 | abbreviation_error| Abréviation médiévale non développée |
|
| 13 |
+
| 6 | hapax | Mot introuvable dans tout lexique |
|
| 14 |
+
| 7 | segmentation_error| Fusion ou fragmentation de tokens (mots/lignes) |
|
| 15 |
+
| 8 | oov_character | Caractère hors-vocabulaire du moteur |
|
| 16 |
+
| 9 | lacuna | Texte présent dans le GT absent de l'OCR |
|
| 17 |
+
| 10 | over_normalization| Sur-normalisation LLM (voir pipelines/) |
|
| 18 |
+
|
| 19 |
+
Note : la classe 10 est calculée par picarones/pipelines/over_normalization.py.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import difflib
|
| 25 |
+
import unicodedata
|
| 26 |
+
from dataclasses import dataclass, field
|
| 27 |
+
from typing import Optional
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ---------------------------------------------------------------------------
|
| 31 |
+
# Tables de référence pour la classification
|
| 32 |
+
# ---------------------------------------------------------------------------
|
| 33 |
+
|
| 34 |
+
#: Confusions visuelles bien connues en OCR (caractères morphologiquement proches)
|
| 35 |
+
VISUAL_CONFUSIONS: dict[frozenset, str] = {}
|
| 36 |
+
_VISUAL_PAIRS: list[tuple[str, str]] = [
|
| 37 |
+
# Minuscules
|
| 38 |
+
("r", "n"), ("rn", "m"), ("l", "1"), ("l", "i"), ("l", "|"),
|
| 39 |
+
("O", "0"), ("O", "o"), ("u", "n"), ("n", "u"), ("v", "u"),
|
| 40 |
+
("c", "e"), ("e", "c"), ("a", "o"), ("o", "a"),
|
| 41 |
+
("f", "ſ"), ("ſ", "f"), ("f", "t"),
|
| 42 |
+
("h", "li"), ("h", "lı"),
|
| 43 |
+
("m", "rn"), ("m", "in"),
|
| 44 |
+
("d", "cl"), ("d", "a"),
|
| 45 |
+
("q", "g"), ("p", "q"),
|
| 46 |
+
# Majuscules ↔ minuscules homographes (classe 1, pas classe 3)
|
| 47 |
+
("I", "l"), ("I", "1"),
|
| 48 |
+
# Chiffres
|
| 49 |
+
("1", "I"), ("1", "l"), ("0", "O"),
|
| 50 |
+
# Ponctuation
|
| 51 |
+
(".", ","), (",", "."),
|
| 52 |
+
]
|
| 53 |
+
for _a, _b in _VISUAL_PAIRS:
|
| 54 |
+
VISUAL_CONFUSIONS[frozenset({_a, _b})] = f"{_a}/{_b}"
|
| 55 |
+
|
| 56 |
+
#: Couples de ligatures pour la détection des erreurs de ligatures
|
| 57 |
+
from picarones.core.char_scores import LIGATURE_TABLE, DIACRITIC_MAP # noqa: E402
|
| 58 |
+
|
| 59 |
+
# Caractères hors-ASCII présumés hors-vocabulaire (alphabet non latin de base)
|
| 60 |
+
_LATIN_BASIC = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
| 61 |
+
" \t\n.,;:!?-_'\"«»()[]{}/@#%&*+=/\\|<>~^")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# ---------------------------------------------------------------------------
|
| 65 |
+
# Résultat structuré
|
| 66 |
+
# ---------------------------------------------------------------------------
|
| 67 |
+
|
| 68 |
+
@dataclass
|
| 69 |
+
class TaxonomyResult:
|
| 70 |
+
"""Résultat de la classification taxonomique des erreurs pour un document."""
|
| 71 |
+
|
| 72 |
+
counts: dict[str, int] = field(default_factory=dict)
|
| 73 |
+
"""Nombre d'erreurs par classe. Clés : 'visual_confusion', 'diacritic_error'…"""
|
| 74 |
+
|
| 75 |
+
examples: dict[str, list[dict]] = field(default_factory=dict)
|
| 76 |
+
"""Exemples d'erreurs par classe (max 5 par classe).
|
| 77 |
+
Format : [{'gt': 'chaîne', 'ocr': 'chaîne', 'position': int}]
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
total_errors: int = 0
|
| 81 |
+
"""Nombre total d'erreurs classifiées."""
|
| 82 |
+
|
| 83 |
+
@property
|
| 84 |
+
def class_distribution(self) -> dict[str, float]:
|
| 85 |
+
"""Distribution relative (0–1) par classe."""
|
| 86 |
+
if not self.total_errors:
|
| 87 |
+
return {}
|
| 88 |
+
return {
|
| 89 |
+
cls: round(cnt / self.total_errors, 4)
|
| 90 |
+
for cls, cnt in self.counts.items()
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
def as_dict(self) -> dict:
|
| 94 |
+
return {
|
| 95 |
+
"counts": self.counts,
|
| 96 |
+
"total_errors": self.total_errors,
|
| 97 |
+
"class_distribution": self.class_distribution,
|
| 98 |
+
"examples": {
|
| 99 |
+
cls: exs[:3] for cls, exs in self.examples.items()
|
| 100 |
+
},
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
@classmethod
|
| 104 |
+
def from_dict(cls, data: dict) -> "TaxonomyResult":
|
| 105 |
+
return cls(
|
| 106 |
+
counts=data.get("counts", {}),
|
| 107 |
+
examples=data.get("examples", {}),
|
| 108 |
+
total_errors=data.get("total_errors", 0),
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# Noms des classes en ordre
|
| 113 |
+
ERROR_CLASSES = [
|
| 114 |
+
"visual_confusion",
|
| 115 |
+
"diacritic_error",
|
| 116 |
+
"case_error",
|
| 117 |
+
"ligature_error",
|
| 118 |
+
"abbreviation_error",
|
| 119 |
+
"hapax",
|
| 120 |
+
"segmentation_error",
|
| 121 |
+
"oov_character",
|
| 122 |
+
"lacuna",
|
| 123 |
+
]
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# ---------------------------------------------------------------------------
|
| 127 |
+
# Classification principale
|
| 128 |
+
# ---------------------------------------------------------------------------
|
| 129 |
+
|
| 130 |
+
def classify_errors(
|
| 131 |
+
ground_truth: str,
|
| 132 |
+
hypothesis: str,
|
| 133 |
+
max_examples: int = 5,
|
| 134 |
+
) -> TaxonomyResult:
|
| 135 |
+
"""Classifie automatiquement les erreurs OCR dans une paire GT/OCR.
|
| 136 |
+
|
| 137 |
+
L'alignement utilise difflib.SequenceMatcher au niveau mot pour détecter
|
| 138 |
+
les erreurs de segmentation, puis au niveau caractère pour les autres classes.
|
| 139 |
+
|
| 140 |
+
Parameters
|
| 141 |
+
----------
|
| 142 |
+
ground_truth:
|
| 143 |
+
Texte de référence (vérité terrain).
|
| 144 |
+
hypothesis:
|
| 145 |
+
Texte produit par l'OCR.
|
| 146 |
+
max_examples:
|
| 147 |
+
Nombre maximal d'exemples conservés par classe.
|
| 148 |
+
|
| 149 |
+
Returns
|
| 150 |
+
-------
|
| 151 |
+
TaxonomyResult
|
| 152 |
+
"""
|
| 153 |
+
counts: dict[str, int] = {cls: 0 for cls in ERROR_CLASSES}
|
| 154 |
+
examples: dict[str, list[dict]] = {cls: [] for cls in ERROR_CLASSES}
|
| 155 |
+
total = 0
|
| 156 |
+
|
| 157 |
+
if not ground_truth and not hypothesis:
|
| 158 |
+
return TaxonomyResult(counts=counts, examples=examples, total_errors=0)
|
| 159 |
+
|
| 160 |
+
# -----------------------------------------------------------------------
|
| 161 |
+
# Niveau mot : détecter segmentation (classe 7) et lacunes (classe 9)
|
| 162 |
+
# -----------------------------------------------------------------------
|
| 163 |
+
gt_words = ground_truth.split()
|
| 164 |
+
hyp_words = hypothesis.split()
|
| 165 |
+
|
| 166 |
+
word_matcher = difflib.SequenceMatcher(None, gt_words, hyp_words, autojunk=False)
|
| 167 |
+
for tag, i1, i2, j1, j2 in word_matcher.get_opcodes():
|
| 168 |
+
if tag == "delete":
|
| 169 |
+
# Mots GT absents de l'OCR → lacune (classe 9)
|
| 170 |
+
for w in gt_words[i1:i2]:
|
| 171 |
+
counts["lacuna"] += 1
|
| 172 |
+
total += 1
|
| 173 |
+
if len(examples["lacuna"]) < max_examples:
|
| 174 |
+
examples["lacuna"].append({"gt": w, "ocr": "", "position": i1})
|
| 175 |
+
|
| 176 |
+
elif tag == "insert":
|
| 177 |
+
# Mots ajoutés par l'OCR → généralement classe 8 (hors-vocab)
|
| 178 |
+
for w in hyp_words[j1:j2]:
|
| 179 |
+
if _is_oov_word(w):
|
| 180 |
+
counts["oov_character"] += 1
|
| 181 |
+
total += 1
|
| 182 |
+
|
| 183 |
+
elif tag == "replace":
|
| 184 |
+
gt_seg = gt_words[i1:i2]
|
| 185 |
+
hyp_seg = hyp_words[j1:j2]
|
| 186 |
+
# Segmentation : fusion de mots (moins de mots OCR) ou fragmentation
|
| 187 |
+
if len(hyp_seg) != len(gt_seg):
|
| 188 |
+
n_seg = abs(len(gt_seg) - len(hyp_seg))
|
| 189 |
+
counts["segmentation_error"] += n_seg
|
| 190 |
+
total += n_seg
|
| 191 |
+
if len(examples["segmentation_error"]) < max_examples:
|
| 192 |
+
examples["segmentation_error"].append({
|
| 193 |
+
"gt": " ".join(gt_seg),
|
| 194 |
+
"ocr": " ".join(hyp_seg),
|
| 195 |
+
"position": i1,
|
| 196 |
+
})
|
| 197 |
+
else:
|
| 198 |
+
# Paires mot-à-mot
|
| 199 |
+
for gt_w, hyp_w in zip(gt_seg, hyp_seg):
|
| 200 |
+
if gt_w != hyp_w:
|
| 201 |
+
_classify_word_error(
|
| 202 |
+
gt_w, hyp_w, counts, examples, max_examples
|
| 203 |
+
)
|
| 204 |
+
total += 1
|
| 205 |
+
|
| 206 |
+
return TaxonomyResult(
|
| 207 |
+
counts=counts,
|
| 208 |
+
examples=examples,
|
| 209 |
+
total_errors=total,
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def _classify_word_error(
|
| 214 |
+
gt_word: str,
|
| 215 |
+
hyp_word: str,
|
| 216 |
+
counts: dict[str, int],
|
| 217 |
+
examples: dict[str, list[dict]],
|
| 218 |
+
max_examples: int,
|
| 219 |
+
) -> None:
|
| 220 |
+
"""Classifie l'erreur entre deux mots non-identiques."""
|
| 221 |
+
# Classe 3 : erreur de casse seule
|
| 222 |
+
if gt_word.casefold() == hyp_word.casefold() and gt_word != hyp_word:
|
| 223 |
+
counts["case_error"] += 1
|
| 224 |
+
if len(examples["case_error"]) < max_examples:
|
| 225 |
+
examples["case_error"].append({"gt": gt_word, "ocr": hyp_word})
|
| 226 |
+
return
|
| 227 |
+
|
| 228 |
+
# Classe 4 : erreur de ligature
|
| 229 |
+
gt_norm = unicodedata.normalize("NFC", gt_word)
|
| 230 |
+
hyp_norm = unicodedata.normalize("NFC", hyp_word)
|
| 231 |
+
if _is_ligature_error(gt_norm, hyp_norm):
|
| 232 |
+
counts["ligature_error"] += 1
|
| 233 |
+
if len(examples["ligature_error"]) < max_examples:
|
| 234 |
+
examples["ligature_error"].append({"gt": gt_word, "ocr": hyp_word})
|
| 235 |
+
return
|
| 236 |
+
|
| 237 |
+
# Classe 5 : erreur d'abréviation (présence de ꝑ, ꝓ, ꝗ dans le GT)
|
| 238 |
+
if _is_abbreviation_error(gt_norm, hyp_norm):
|
| 239 |
+
counts["abbreviation_error"] += 1
|
| 240 |
+
if len(examples["abbreviation_error"]) < max_examples:
|
| 241 |
+
examples["abbreviation_error"].append({"gt": gt_word, "ocr": hyp_word})
|
| 242 |
+
return
|
| 243 |
+
|
| 244 |
+
# Classe 2 : erreur diacritique
|
| 245 |
+
if _is_diacritic_error(gt_norm, hyp_norm):
|
| 246 |
+
counts["diacritic_error"] += 1
|
| 247 |
+
if len(examples["diacritic_error"]) < max_examples:
|
| 248 |
+
examples["diacritic_error"].append({"gt": gt_word, "ocr": hyp_word})
|
| 249 |
+
return
|
| 250 |
+
|
| 251 |
+
# Classe 1 : confusion visuelle (comparaison char par char)
|
| 252 |
+
if _is_visual_confusion(gt_norm, hyp_norm):
|
| 253 |
+
counts["visual_confusion"] += 1
|
| 254 |
+
if len(examples["visual_confusion"]) < max_examples:
|
| 255 |
+
examples["visual_confusion"].append({"gt": gt_word, "ocr": hyp_word})
|
| 256 |
+
return
|
| 257 |
+
|
| 258 |
+
# Classe 8 : caractère hors-vocabulaire
|
| 259 |
+
if _is_oov_word(hyp_word):
|
| 260 |
+
counts["oov_character"] += 1
|
| 261 |
+
if len(examples["oov_character"]) < max_examples:
|
| 262 |
+
examples["oov_character"].append({"gt": gt_word, "ocr": hyp_word})
|
| 263 |
+
return
|
| 264 |
+
|
| 265 |
+
# Classe 6 : hapax (erreur résiduelle non classifiable)
|
| 266 |
+
counts["hapax"] += 1
|
| 267 |
+
if len(examples["hapax"]) < max_examples:
|
| 268 |
+
examples["hapax"].append({"gt": gt_word, "ocr": hyp_word})
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
def _is_ligature_error(gt: str, hyp: str) -> bool:
|
| 272 |
+
"""Vrai si la différence implique une ligature Unicode."""
|
| 273 |
+
# GT contient une ligature que l'OCR a décomposée, ou vice versa
|
| 274 |
+
for lig, seqs in LIGATURE_TABLE.items():
|
| 275 |
+
if lig in gt:
|
| 276 |
+
for seq in seqs:
|
| 277 |
+
if seq in hyp and lig not in hyp:
|
| 278 |
+
return True
|
| 279 |
+
for seq in seqs:
|
| 280 |
+
if seq in gt and lig in hyp:
|
| 281 |
+
return True
|
| 282 |
+
return False
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def _is_abbreviation_error(gt: str, hyp: str) -> bool:
|
| 286 |
+
"""Vrai si le GT contient un caractère d'abréviation médiévale."""
|
| 287 |
+
abbreviation_chars = "\uA751\uA753\uA757" # ꝑ ꝓ ꝗ
|
| 288 |
+
return any(c in gt for c in abbreviation_chars)
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
def _is_diacritic_error(gt: str, hyp: str) -> bool:
|
| 292 |
+
"""Vrai si la différence est principalement due à des diacritiques."""
|
| 293 |
+
# Comparer les formes sans diacritiques
|
| 294 |
+
def strip_diacritics(text: str) -> str:
|
| 295 |
+
nfd = unicodedata.normalize("NFD", text)
|
| 296 |
+
return "".join(c for c in nfd if unicodedata.category(c) != "Mn")
|
| 297 |
+
|
| 298 |
+
gt_stripped = strip_diacritics(gt)
|
| 299 |
+
hyp_stripped = strip_diacritics(hyp)
|
| 300 |
+
# Si les mots sont identiques sans diacritiques → erreur diacritique
|
| 301 |
+
if gt_stripped.casefold() == hyp_stripped.casefold() and gt != hyp:
|
| 302 |
+
return True
|
| 303 |
+
# Si le GT contient des diacritiques que l'OCR a supprimés
|
| 304 |
+
gt_has_diac = any(c in DIACRITIC_MAP for c in gt)
|
| 305 |
+
hyp_missing_diac = any(c not in DIACRITIC_MAP for c in hyp if c.isalpha())
|
| 306 |
+
return gt_has_diac and len(gt) == len(hyp) and gt_stripped == hyp_stripped
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def _is_visual_confusion(gt: str, hyp: str) -> bool:
|
| 310 |
+
"""Vrai si la différence implique des confusions visuelles connues."""
|
| 311 |
+
if abs(len(gt) - len(hyp)) > 2:
|
| 312 |
+
return False
|
| 313 |
+
# Vérifier les paires de confusions connues
|
| 314 |
+
for pair in VISUAL_CONFUSIONS:
|
| 315 |
+
chars = list(pair)
|
| 316 |
+
if len(chars) == 2:
|
| 317 |
+
a, b = chars
|
| 318 |
+
if a in gt and b in hyp and a not in hyp:
|
| 319 |
+
return True
|
| 320 |
+
if b in gt and a in hyp and b not in hyp:
|
| 321 |
+
return True
|
| 322 |
+
return False
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def _is_oov_word(word: str) -> bool:
|
| 326 |
+
"""Vrai si le mot contient des caractères hors de l'alphabet latin de base."""
|
| 327 |
+
return any(c not in _LATIN_BASIC and not c.isalpha() for c in word)
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
# ---------------------------------------------------------------------------
|
| 331 |
+
# Agrégation
|
| 332 |
+
# ---------------------------------------------------------------------------
|
| 333 |
+
|
| 334 |
+
def aggregate_taxonomy(results: list[TaxonomyResult]) -> dict:
|
| 335 |
+
"""Agrège les résultats taxonomiques sur un corpus."""
|
| 336 |
+
combined: dict[str, int] = {cls: 0 for cls in ERROR_CLASSES}
|
| 337 |
+
total = 0
|
| 338 |
+
for r in results:
|
| 339 |
+
for cls, cnt in r.counts.items():
|
| 340 |
+
combined[cls] = combined.get(cls, 0) + cnt
|
| 341 |
+
total += r.total_errors
|
| 342 |
+
|
| 343 |
+
distribution = {
|
| 344 |
+
cls: round(cnt / total, 4) if total > 0 else 0.0
|
| 345 |
+
for cls, cnt in combined.items()
|
| 346 |
+
}
|
| 347 |
+
return {
|
| 348 |
+
"counts": combined,
|
| 349 |
+
"total_errors": total,
|
| 350 |
+
"class_distribution": distribution,
|
| 351 |
+
}
|
|
@@ -18,6 +18,13 @@ from typing import Optional
|
|
| 18 |
from picarones.core.metrics import MetricsResult, aggregate_metrics
|
| 19 |
from picarones.core.results import BenchmarkResult, DocumentResult, EngineReport
|
| 20 |
from picarones.pipelines.over_normalization import detect_over_normalization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# ---------------------------------------------------------------------------
|
| 23 |
# Textes GT réalistes (documents patrimoniaux BnF)
|
|
@@ -290,6 +297,14 @@ def generate_sample_benchmark(
|
|
| 290 |
|
| 291 |
metrics = _make_metrics(gt, hypothesis)
|
| 292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
doc_results.append(
|
| 294 |
DocumentResult(
|
| 295 |
doc_id=doc_id,
|
|
@@ -300,6 +315,14 @@ def generate_sample_benchmark(
|
|
| 300 |
duration_seconds=duration,
|
| 301 |
ocr_intermediate=ocr_intermediate,
|
| 302 |
pipeline_metadata=pipeline_meta,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
)
|
| 304 |
)
|
| 305 |
|
|
@@ -321,12 +344,54 @@ def generate_sample_benchmark(
|
|
| 321 |
"document_count": len(over_norms),
|
| 322 |
}
|
| 323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
report = EngineReport(
|
| 325 |
engine_name=engine_name,
|
| 326 |
engine_version=engine_version,
|
| 327 |
engine_config=engine_cfg,
|
| 328 |
document_results=doc_results,
|
| 329 |
pipeline_info=effective_pipeline_info,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
)
|
| 331 |
engine_reports.append(report)
|
| 332 |
|
|
|
|
| 18 |
from picarones.core.metrics import MetricsResult, aggregate_metrics
|
| 19 |
from picarones.core.results import BenchmarkResult, DocumentResult, EngineReport
|
| 20 |
from picarones.pipelines.over_normalization import detect_over_normalization
|
| 21 |
+
# Sprint 5 — métriques avancées
|
| 22 |
+
from picarones.core.confusion import build_confusion_matrix
|
| 23 |
+
from picarones.core.char_scores import compute_ligature_score, compute_diacritic_score
|
| 24 |
+
from picarones.core.taxonomy import classify_errors, aggregate_taxonomy
|
| 25 |
+
from picarones.core.structure import analyze_structure, aggregate_structure
|
| 26 |
+
from picarones.core.image_quality import generate_mock_quality_scores, aggregate_image_quality
|
| 27 |
+
from picarones.core.char_scores import aggregate_ligature_scores, aggregate_diacritic_scores
|
| 28 |
|
| 29 |
# ---------------------------------------------------------------------------
|
| 30 |
# Textes GT réalistes (documents patrimoniaux BnF)
|
|
|
|
| 297 |
|
| 298 |
metrics = _make_metrics(gt, hypothesis)
|
| 299 |
|
| 300 |
+
# Sprint 5 — métriques avancées patrimoniales
|
| 301 |
+
cm = build_confusion_matrix(gt, hypothesis)
|
| 302 |
+
lig_score = compute_ligature_score(gt, hypothesis)
|
| 303 |
+
diac_score = compute_diacritic_score(gt, hypothesis)
|
| 304 |
+
taxonomy_result = classify_errors(gt, hypothesis)
|
| 305 |
+
struct_result = analyze_structure(gt, hypothesis)
|
| 306 |
+
iq_result = generate_mock_quality_scores(doc_id, seed=rng.randint(0, 999999))
|
| 307 |
+
|
| 308 |
doc_results.append(
|
| 309 |
DocumentResult(
|
| 310 |
doc_id=doc_id,
|
|
|
|
| 315 |
duration_seconds=duration,
|
| 316 |
ocr_intermediate=ocr_intermediate,
|
| 317 |
pipeline_metadata=pipeline_meta,
|
| 318 |
+
confusion_matrix=cm.as_dict(),
|
| 319 |
+
char_scores={
|
| 320 |
+
"ligature": lig_score.as_dict(),
|
| 321 |
+
"diacritic": diac_score.as_dict(),
|
| 322 |
+
},
|
| 323 |
+
taxonomy=taxonomy_result.as_dict(),
|
| 324 |
+
structure=struct_result.as_dict(),
|
| 325 |
+
image_quality=iq_result.as_dict(),
|
| 326 |
)
|
| 327 |
)
|
| 328 |
|
|
|
|
| 344 |
"document_count": len(over_norms),
|
| 345 |
}
|
| 346 |
|
| 347 |
+
# Agrégation Sprint 5
|
| 348 |
+
from picarones.core.confusion import aggregate_confusion_matrices, ConfusionMatrix
|
| 349 |
+
from picarones.core.char_scores import LigatureScore, DiacriticScore
|
| 350 |
+
from picarones.core.taxonomy import TaxonomyResult
|
| 351 |
+
from picarones.core.structure import StructureResult
|
| 352 |
+
from picarones.core.image_quality import ImageQualityResult
|
| 353 |
+
|
| 354 |
+
agg_confusion = aggregate_confusion_matrices([
|
| 355 |
+
ConfusionMatrix(**dr.confusion_matrix)
|
| 356 |
+
for dr in doc_results if dr.confusion_matrix
|
| 357 |
+
]).as_compact_dict(min_count=1)
|
| 358 |
+
|
| 359 |
+
agg_lig = aggregate_ligature_scores([
|
| 360 |
+
LigatureScore(**dr.char_scores["ligature"])
|
| 361 |
+
for dr in doc_results if dr.char_scores
|
| 362 |
+
])
|
| 363 |
+
agg_diac = aggregate_diacritic_scores([
|
| 364 |
+
DiacriticScore(**dr.char_scores["diacritic"])
|
| 365 |
+
for dr in doc_results if dr.char_scores
|
| 366 |
+
])
|
| 367 |
+
agg_char_scores = {"ligature": agg_lig, "diacritic": agg_diac}
|
| 368 |
+
|
| 369 |
+
agg_taxonomy = aggregate_taxonomy([
|
| 370 |
+
TaxonomyResult.from_dict(dr.taxonomy)
|
| 371 |
+
for dr in doc_results if dr.taxonomy
|
| 372 |
+
])
|
| 373 |
+
|
| 374 |
+
agg_structure = aggregate_structure([
|
| 375 |
+
StructureResult.from_dict(dr.structure)
|
| 376 |
+
for dr in doc_results if dr.structure
|
| 377 |
+
])
|
| 378 |
+
|
| 379 |
+
agg_iq = aggregate_image_quality([
|
| 380 |
+
ImageQualityResult.from_dict(dr.image_quality)
|
| 381 |
+
for dr in doc_results if dr.image_quality
|
| 382 |
+
])
|
| 383 |
+
|
| 384 |
report = EngineReport(
|
| 385 |
engine_name=engine_name,
|
| 386 |
engine_version=engine_version,
|
| 387 |
engine_config=engine_cfg,
|
| 388 |
document_results=doc_results,
|
| 389 |
pipeline_info=effective_pipeline_info,
|
| 390 |
+
aggregated_confusion=agg_confusion,
|
| 391 |
+
aggregated_char_scores=agg_char_scores,
|
| 392 |
+
aggregated_taxonomy=agg_taxonomy,
|
| 393 |
+
aggregated_structure=agg_structure,
|
| 394 |
+
aggregated_image_quality=agg_iq,
|
| 395 |
)
|
| 396 |
engine_reports.append(report)
|
| 397 |
|
|
@@ -99,6 +99,13 @@ def _build_report_data(benchmark: BenchmarkResult, images_b64: dict[str, str]) -
|
|
| 99 |
# Champs pipeline OCR+LLM (vides pour les moteurs OCR seuls)
|
| 100 |
"is_pipeline": report.is_pipeline,
|
| 101 |
"pipeline_info": report.pipeline_info,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
}
|
| 103 |
engines_summary.append(entry)
|
| 104 |
|
|
@@ -146,6 +153,16 @@ def _build_report_data(benchmark: BenchmarkResult, images_b64: dict[str, str]) -
|
|
| 146 |
if on is not None:
|
| 147 |
er_entry["over_normalization"] = on
|
| 148 |
er_entry["pipeline_mode"] = dr.pipeline_metadata.get("pipeline_mode")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
engine_results.append(er_entry)
|
| 150 |
|
| 151 |
# CER moyen sur ce document (pour le badge galerie)
|
|
@@ -613,6 +630,7 @@ footer {{
|
|
| 613 |
<button class="tab-btn active" onclick="showView('ranking')">Classement</button>
|
| 614 |
<button class="tab-btn" onclick="showView('gallery')">Galerie</button>
|
| 615 |
<button class="tab-btn" onclick="showView('document')">Document</button>
|
|
|
|
| 616 |
<button class="tab-btn" onclick="showView('analyses')">Analyses</button>
|
| 617 |
</div>
|
| 618 |
<div class="meta" id="nav-meta">—</div>
|
|
@@ -637,6 +655,8 @@ footer {{
|
|
| 637 |
<th data-col="wer" class="sortable">WER<i class="sort-icon">↕</i></th>
|
| 638 |
<th data-col="mer" class="sortable">MER<i class="sort-icon">↕</i></th>
|
| 639 |
<th data-col="wil" class="sortable">WIL<i class="sort-icon">↕</i></th>
|
|
|
|
|
|
|
| 640 |
<th>CER médian</th>
|
| 641 |
<th>CER min</th>
|
| 642 |
<th>CER max</th>
|
|
@@ -786,6 +806,59 @@ footer {{
|
|
| 786 |
</div>
|
| 787 |
</div>
|
| 788 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 789 |
</div>
|
| 790 |
</div>
|
| 791 |
|
|
@@ -819,13 +892,15 @@ function showView(name) {{
|
|
| 819 |
document.querySelectorAll('.view').forEach(v => v.classList.remove('active'));
|
| 820 |
document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
|
| 821 |
document.getElementById('view-' + name).classList.add('active');
|
|
|
|
|
|
|
|
|
|
| 822 |
document.querySelectorAll('.tab-btn').forEach(b => {{
|
| 823 |
-
if (b.textContent.toLowerCase().startsWith(
|
| 824 |
-
{{ranking:'c',gallery:'g',document:'d',analyses:'a'}}[name]
|
| 825 |
-
)) b.classList.add('active');
|
| 826 |
}});
|
| 827 |
currentView = name;
|
| 828 |
if (name === 'analyses' && !chartsBuilt) buildCharts();
|
|
|
|
| 829 |
}}
|
| 830 |
|
| 831 |
// ── Formatage ───────────────────────────────────────────────────
|
|
@@ -868,6 +943,15 @@ function renderDiff(ops) {{
|
|
| 868 |
}}).join(' ');
|
| 869 |
}}
|
| 870 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 871 |
// ── Vue Classement ──────────────────────────────────────────────
|
| 872 |
let rankingSort = {{ col: 'cer', dir: 'asc' }};
|
| 873 |
|
|
@@ -945,6 +1029,8 @@ function renderRanking() {{
|
|
| 945 |
<td>${{pct(e.wer)}}</td>
|
| 946 |
<td>${{pct(e.mer)}}</td>
|
| 947 |
<td>${{pct(e.wil)}}</td>
|
|
|
|
|
|
|
| 948 |
<td style="color:var(--text-muted)">${{pct(e.cer_median)}}</td>
|
| 949 |
<td style="color:var(--text-muted)">${{pct(e.cer_min)}}</td>
|
| 950 |
<td style="color:var(--text-muted)">${{pct(e.cer_max)}}</td>
|
|
@@ -1222,6 +1308,8 @@ function buildCharts() {{
|
|
| 1222 |
buildRadar();
|
| 1223 |
buildCerPerDoc();
|
| 1224 |
buildDurationChart();
|
|
|
|
|
|
|
| 1225 |
}}
|
| 1226 |
|
| 1227 |
function buildCerHistogram() {{
|
|
@@ -1365,6 +1453,315 @@ function buildDurationChart() {{
|
|
| 1365 |
}});
|
| 1366 |
}}
|
| 1367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1368 |
// ── Init ────────────────────────────────────────────────────────
|
| 1369 |
function init() {{
|
| 1370 |
// Méta nav
|
|
|
|
| 99 |
# Champs pipeline OCR+LLM (vides pour les moteurs OCR seuls)
|
| 100 |
"is_pipeline": report.is_pipeline,
|
| 101 |
"pipeline_info": report.pipeline_info,
|
| 102 |
+
# Sprint 5 — métriques avancées patrimoniales
|
| 103 |
+
"ligature_score": _safe(report.ligature_score) if report.ligature_score is not None else None,
|
| 104 |
+
"diacritic_score": _safe(report.diacritic_score) if report.diacritic_score is not None else None,
|
| 105 |
+
"aggregated_confusion": report.aggregated_confusion,
|
| 106 |
+
"aggregated_taxonomy": report.aggregated_taxonomy,
|
| 107 |
+
"aggregated_structure": report.aggregated_structure,
|
| 108 |
+
"aggregated_image_quality": report.aggregated_image_quality,
|
| 109 |
}
|
| 110 |
engines_summary.append(entry)
|
| 111 |
|
|
|
|
| 153 |
if on is not None:
|
| 154 |
er_entry["over_normalization"] = on
|
| 155 |
er_entry["pipeline_mode"] = dr.pipeline_metadata.get("pipeline_mode")
|
| 156 |
+
# Sprint 5 — métriques avancées par document
|
| 157 |
+
if dr.char_scores is not None:
|
| 158 |
+
er_entry["ligature_score"] = _safe(dr.char_scores.get("ligature", {}).get("score"))
|
| 159 |
+
er_entry["diacritic_score"] = _safe(dr.char_scores.get("diacritic", {}).get("score"))
|
| 160 |
+
if dr.taxonomy is not None:
|
| 161 |
+
er_entry["taxonomy"] = dr.taxonomy
|
| 162 |
+
if dr.structure is not None:
|
| 163 |
+
er_entry["structure"] = dr.structure
|
| 164 |
+
if dr.image_quality is not None:
|
| 165 |
+
er_entry["image_quality"] = dr.image_quality
|
| 166 |
engine_results.append(er_entry)
|
| 167 |
|
| 168 |
# CER moyen sur ce document (pour le badge galerie)
|
|
|
|
| 630 |
<button class="tab-btn active" onclick="showView('ranking')">Classement</button>
|
| 631 |
<button class="tab-btn" onclick="showView('gallery')">Galerie</button>
|
| 632 |
<button class="tab-btn" onclick="showView('document')">Document</button>
|
| 633 |
+
<button class="tab-btn" onclick="showView('characters')">Caractères</button>
|
| 634 |
<button class="tab-btn" onclick="showView('analyses')">Analyses</button>
|
| 635 |
</div>
|
| 636 |
<div class="meta" id="nav-meta">—</div>
|
|
|
|
| 655 |
<th data-col="wer" class="sortable">WER<i class="sort-icon">↕</i></th>
|
| 656 |
<th data-col="mer" class="sortable">MER<i class="sort-icon">↕</i></th>
|
| 657 |
<th data-col="wil" class="sortable">WIL<i class="sort-icon">↕</i></th>
|
| 658 |
+
<th data-col="ligature_score" class="sortable" title="Taux de reconnaissance des ligatures (fi, fl, œ, æ, ff…)">Ligatures<i class="sort-icon">↕</i></th>
|
| 659 |
+
<th data-col="diacritic_score" class="sortable" title="Taux de conservation des diacritiques (accents, cédilles, trémas…)">Diacritiques<i class="sort-icon">↕</i></th>
|
| 660 |
<th>CER médian</th>
|
| 661 |
<th>CER min</th>
|
| 662 |
<th>CER max</th>
|
|
|
|
| 806 |
</div>
|
| 807 |
</div>
|
| 808 |
|
| 809 |
+
<div class="chart-card">
|
| 810 |
+
<h3>Qualité image ↔ CER (scatter plot)</h3>
|
| 811 |
+
<div class="chart-canvas-wrap">
|
| 812 |
+
<canvas id="chart-quality-cer"></canvas>
|
| 813 |
+
</div>
|
| 814 |
+
<div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem">
|
| 815 |
+
Chaque point = un document. Axe X = score qualité image [0–1]. Axe Y = CER. Corrélation négative attendue.
|
| 816 |
+
</div>
|
| 817 |
+
</div>
|
| 818 |
+
|
| 819 |
+
<div class="chart-card" style="grid-column:1/-1">
|
| 820 |
+
<h3>Taxonomie des erreurs par moteur</h3>
|
| 821 |
+
<div class="chart-canvas-wrap" style="max-height:300px">
|
| 822 |
+
<canvas id="chart-taxonomy"></canvas>
|
| 823 |
+
</div>
|
| 824 |
+
<div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem">
|
| 825 |
+
Distribution des classes d'erreurs (classes 1–9 de la taxonomie Picarones).
|
| 826 |
+
</div>
|
| 827 |
+
</div>
|
| 828 |
+
|
| 829 |
+
</div>
|
| 830 |
+
</div>
|
| 831 |
+
|
| 832 |
+
<!-- ════ Vue 5 : Caractères ════════════════════════════════════════ -->
|
| 833 |
+
<div id="view-characters" class="view">
|
| 834 |
+
<div class="card">
|
| 835 |
+
<h2>Analyse des caractères</h2>
|
| 836 |
+
|
| 837 |
+
<!-- Sélecteur de moteur -->
|
| 838 |
+
<div class="stat-row" style="margin-bottom:1rem">
|
| 839 |
+
<label for="char-engine-select" style="font-weight:600;margin-right:.5rem">Moteur :</label>
|
| 840 |
+
<select id="char-engine-select" onchange="renderCharView()"
|
| 841 |
+
style="padding:.35rem .7rem;border-radius:6px;border:1px solid var(--border)"></select>
|
| 842 |
+
</div>
|
| 843 |
+
|
| 844 |
+
<!-- Scores ligatures / diacritiques -->
|
| 845 |
+
<div class="stat-row" id="char-scores-row" style="gap:1.5rem;margin-bottom:1.5rem"></div>
|
| 846 |
+
|
| 847 |
+
<!-- Matrice de confusion unicode -->
|
| 848 |
+
<h3 style="margin-bottom:.75rem">Matrice de confusion unicode
|
| 849 |
+
<span style="font-size:.75rem;font-weight:400;color:var(--text-muted)">
|
| 850 |
+
— substitutions les plus fréquentes (caractère GT → caractère OCR)
|
| 851 |
+
</span>
|
| 852 |
+
</h3>
|
| 853 |
+
<div id="confusion-heatmap" style="overflow-x:auto;margin-bottom:1.5rem"></div>
|
| 854 |
+
|
| 855 |
+
<!-- Détail ligatures par type -->
|
| 856 |
+
<h3 style="margin-bottom:.75rem">Reconnaissance des ligatures</h3>
|
| 857 |
+
<div id="ligature-detail" style="margin-bottom:1.5rem"></div>
|
| 858 |
+
|
| 859 |
+
<!-- Taxonomie détaillée -->
|
| 860 |
+
<h3 style="margin-bottom:.75rem">Distribution taxonomique des erreurs</h3>
|
| 861 |
+
<div id="taxonomy-detail"></div>
|
| 862 |
</div>
|
| 863 |
</div>
|
| 864 |
|
|
|
|
| 892 |
document.querySelectorAll('.view').forEach(v => v.classList.remove('active'));
|
| 893 |
document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
|
| 894 |
document.getElementById('view-' + name).classList.add('active');
|
| 895 |
+
// Activer le bon onglet nav
|
| 896 |
+
const tabMap = {{ranking:'classement',gallery:'galerie',document:'document',characters:'caract',analyses:'analyses'}};
|
| 897 |
+
const prefix = tabMap[name] || name;
|
| 898 |
document.querySelectorAll('.tab-btn').forEach(b => {{
|
| 899 |
+
if (b.textContent.toLowerCase().startsWith(prefix.toLowerCase())) b.classList.add('active');
|
|
|
|
|
|
|
| 900 |
}});
|
| 901 |
currentView = name;
|
| 902 |
if (name === 'analyses' && !chartsBuilt) buildCharts();
|
| 903 |
+
if (name === 'characters' && !charViewBuilt) initCharView();
|
| 904 |
}}
|
| 905 |
|
| 906 |
// ── Formatage ───────────────────────────────────────────────────
|
|
|
|
| 943 |
}}).join(' ');
|
| 944 |
}}
|
| 945 |
|
| 946 |
+
// ── Score badge (ligatures / diacritiques) ───────────────────────
|
| 947 |
+
function _scoreBadge(v, label) {{
|
| 948 |
+
if (v === null || v === undefined) return '<span style="color:var(--text-muted)">—</span>';
|
| 949 |
+
const pctVal = (v * 100).toFixed(1);
|
| 950 |
+
const color = v >= 0.9 ? '#16a34a' : v >= 0.7 ? '#ca8a04' : '#dc2626';
|
| 951 |
+
const bg = v >= 0.9 ? '#f0fdf4' : v >= 0.7 ? '#fefce8' : '#fef2f2';
|
| 952 |
+
return `<span class="cer-badge" style="color:${{color}};background:${{bg}}" title="${{label}} : ${{pctVal}}%">${{pctVal}}%</span>`;
|
| 953 |
+
}}
|
| 954 |
+
|
| 955 |
// ── Vue Classement ──────────────────────────────────────────────
|
| 956 |
let rankingSort = {{ col: 'cer', dir: 'asc' }};
|
| 957 |
|
|
|
|
| 1029 |
<td>${{pct(e.wer)}}</td>
|
| 1030 |
<td>${{pct(e.mer)}}</td>
|
| 1031 |
<td>${{pct(e.wil)}}</td>
|
| 1032 |
+
<td>${{_scoreBadge(e.ligature_score, 'Ligatures')}}</td>
|
| 1033 |
+
<td>${{_scoreBadge(e.diacritic_score, 'Diacritiques')}}</td>
|
| 1034 |
<td style="color:var(--text-muted)">${{pct(e.cer_median)}}</td>
|
| 1035 |
<td style="color:var(--text-muted)">${{pct(e.cer_min)}}</td>
|
| 1036 |
<td style="color:var(--text-muted)">${{pct(e.cer_max)}}</td>
|
|
|
|
| 1308 |
buildRadar();
|
| 1309 |
buildCerPerDoc();
|
| 1310 |
buildDurationChart();
|
| 1311 |
+
buildQualityCerScatter();
|
| 1312 |
+
buildTaxonomyChart();
|
| 1313 |
}}
|
| 1314 |
|
| 1315 |
function buildCerHistogram() {{
|
|
|
|
| 1453 |
}});
|
| 1454 |
}}
|
| 1455 |
|
| 1456 |
+
function buildQualityCerScatter() {{
|
| 1457 |
+
const ctx = document.getElementById('chart-quality-cer');
|
| 1458 |
+
if (!ctx) return;
|
| 1459 |
+
// Construire les points : un par document, un dataset par moteur
|
| 1460 |
+
const datasets = DATA.engines.map((e, ei) => {{
|
| 1461 |
+
const points = DATA.documents.flatMap(doc => {{
|
| 1462 |
+
const er = doc.engine_results.find(r => r.engine === e.name);
|
| 1463 |
+
if (!er || er.error || !er.image_quality) return [];
|
| 1464 |
+
return [{{ x: er.image_quality.quality_score, y: er.cer * 100 }}];
|
| 1465 |
+
}});
|
| 1466 |
+
return {{
|
| 1467 |
+
label: e.name, data: points,
|
| 1468 |
+
backgroundColor: engineColor(ei) + 'bb',
|
| 1469 |
+
borderColor: engineColor(ei),
|
| 1470 |
+
borderWidth: 1, pointRadius: 5, pointHoverRadius: 7,
|
| 1471 |
+
}};
|
| 1472 |
+
}}).filter(d => d.data.length > 0);
|
| 1473 |
+
|
| 1474 |
+
if (!datasets.length) {{ ctx.parentElement.innerHTML = '<p style="color:var(--text-muted);padding:1rem">Aucune donnée de qualité image disponible.</p>'; return; }}
|
| 1475 |
+
|
| 1476 |
+
chartInstances['quality-cer'] = new Chart(ctx.getContext('2d'), {{
|
| 1477 |
+
type: 'scatter',
|
| 1478 |
+
data: {{ datasets }},
|
| 1479 |
+
options: {{
|
| 1480 |
+
responsive: true, maintainAspectRatio: false,
|
| 1481 |
+
plugins: {{
|
| 1482 |
+
legend: {{ position: 'top', labels: {{ font: {{ size: 11 }} }} }},
|
| 1483 |
+
tooltip: {{ callbacks: {{
|
| 1484 |
+
label: ctx => `${{ctx.dataset.label}}: qualité=${{ctx.parsed.x.toFixed(2)}}, CER=${{ctx.parsed.y.toFixed(1)}}%`,
|
| 1485 |
+
}} }},
|
| 1486 |
+
}},
|
| 1487 |
+
scales: {{
|
| 1488 |
+
x: {{ min: 0, max: 1, title: {{ display: true, text: 'Score qualité image [0–1]', font: {{ size: 11 }} }} }},
|
| 1489 |
+
y: {{ min: 0, title: {{ display: true, text: 'CER (%)', font: {{ size: 11 }} }} }},
|
| 1490 |
+
}},
|
| 1491 |
+
}},
|
| 1492 |
+
}});
|
| 1493 |
+
}}
|
| 1494 |
+
|
| 1495 |
+
function buildTaxonomyChart() {{
|
| 1496 |
+
const ctx = document.getElementById('chart-taxonomy');
|
| 1497 |
+
if (!ctx) return;
|
| 1498 |
+
const taxLabels = ['Confusion visuelle','Diacritique','Casse','Ligature','Abréviation','Hapax','Segmentation','Hors-vocab.','Lacune'];
|
| 1499 |
+
const taxKeys = ['visual_confusion','diacritic_error','case_error','ligature_error','abbreviation_error','hapax','segmentation_error','oov_character','lacuna'];
|
| 1500 |
+
const taxColors = ['#6366f1','#f59e0b','#ec4899','#14b8a6','#8b5cf6','#64748b','#f97316','#06b6d4','#ef4444'];
|
| 1501 |
+
|
| 1502 |
+
const datasets = DATA.engines.map((e, ei) => {{
|
| 1503 |
+
const tax = e.aggregated_taxonomy;
|
| 1504 |
+
const data = taxKeys.map(k => tax && tax.counts ? (tax.counts[k] || 0) : 0);
|
| 1505 |
+
return {{
|
| 1506 |
+
label: e.name, data,
|
| 1507 |
+
backgroundColor: engineColor(ei) + '99',
|
| 1508 |
+
borderColor: engineColor(ei),
|
| 1509 |
+
borderWidth: 1,
|
| 1510 |
+
}};
|
| 1511 |
+
}});
|
| 1512 |
+
|
| 1513 |
+
chartInstances['taxonomy'] = new Chart(ctx.getContext('2d'), {{
|
| 1514 |
+
type: 'bar',
|
| 1515 |
+
data: {{ labels: taxLabels, datasets }},
|
| 1516 |
+
options: {{
|
| 1517 |
+
responsive: true, maintainAspectRatio: false,
|
| 1518 |
+
plugins: {{ legend: {{ position: 'top', labels: {{ font: {{ size: 11 }} }} }} }},
|
| 1519 |
+
scales: {{
|
| 1520 |
+
x: {{ ticks: {{ font: {{ size: 10 }} }} }},
|
| 1521 |
+
y: {{ title: {{ display: true, text: "Nb d'erreurs", font: {{ size: 11 }} }}, min: 0, ticks: {{ stepSize: 1 }} }},
|
| 1522 |
+
}},
|
| 1523 |
+
}},
|
| 1524 |
+
}});
|
| 1525 |
+
}}
|
| 1526 |
+
|
| 1527 |
+
// ── Vue Caractères ───────────────────────────────────────────────
|
| 1528 |
+
let charViewBuilt = false;
|
| 1529 |
+
|
| 1530 |
+
function initCharView() {{
|
| 1531 |
+
charViewBuilt = true;
|
| 1532 |
+
// Remplir le sélecteur de moteur
|
| 1533 |
+
const sel = document.getElementById('char-engine-select');
|
| 1534 |
+
sel.innerHTML = '';
|
| 1535 |
+
DATA.engines.forEach(e => {{
|
| 1536 |
+
const opt = document.createElement('option');
|
| 1537 |
+
opt.value = e.name; opt.textContent = e.name;
|
| 1538 |
+
sel.appendChild(opt);
|
| 1539 |
+
}});
|
| 1540 |
+
renderCharView();
|
| 1541 |
+
}}
|
| 1542 |
+
|
| 1543 |
+
function renderCharView() {{
|
| 1544 |
+
const engineName = document.getElementById('char-engine-select').value;
|
| 1545 |
+
const eng = DATA.engines.find(e => e.name === engineName);
|
| 1546 |
+
if (!eng) return;
|
| 1547 |
+
|
| 1548 |
+
// Scores ligatures / diacritiques
|
| 1549 |
+
const scoresRow = document.getElementById('char-scores-row');
|
| 1550 |
+
const ligScore = eng.ligature_score;
|
| 1551 |
+
const diacScore = eng.diacritic_score;
|
| 1552 |
+
scoresRow.innerHTML = `
|
| 1553 |
+
<div class="stat">Ligatures <b>${{_scoreBadge(ligScore, 'Ligatures')}}</b></div>
|
| 1554 |
+
<div class="stat">Diacritiques <b>${{_scoreBadge(diacScore, 'Diacritiques')}}</b></div>
|
| 1555 |
+
${{eng.aggregated_structure ? `
|
| 1556 |
+
<div class="stat">Précision lignes <b>${{_scoreBadge(eng.aggregated_structure.mean_line_accuracy, 'Précision nb lignes')}}</b></div>
|
| 1557 |
+
<div class="stat">Ordre lecture <b>${{_scoreBadge(eng.aggregated_structure.mean_reading_order_score, 'Score ordre de lecture')}}</b></div>
|
| 1558 |
+
` : ''}}
|
| 1559 |
+
${{eng.aggregated_image_quality ? `
|
| 1560 |
+
<div class="stat">Qualité image moy. <b>${{_scoreBadge(eng.aggregated_image_quality.mean_quality_score, 'Qualité image moyenne')}}</b></div>
|
| 1561 |
+
` : ''}}
|
| 1562 |
+
`;
|
| 1563 |
+
|
| 1564 |
+
// Matrice de confusion heatmap
|
| 1565 |
+
renderConfusionHeatmap(eng);
|
| 1566 |
+
|
| 1567 |
+
// Détail ligatures
|
| 1568 |
+
renderLigatureDetail(eng);
|
| 1569 |
+
|
| 1570 |
+
// Taxonomie détaillée
|
| 1571 |
+
renderTaxonomyDetail(eng);
|
| 1572 |
+
}}
|
| 1573 |
+
|
| 1574 |
+
function renderConfusionHeatmap(eng) {{
|
| 1575 |
+
const container = document.getElementById('confusion-heatmap');
|
| 1576 |
+
const cm = eng.aggregated_confusion;
|
| 1577 |
+
if (!cm || !cm.matrix) {{
|
| 1578 |
+
container.innerHTML = '<p style="color:var(--text-muted)">Aucune donnée de confusion disponible.</p>';
|
| 1579 |
+
return;
|
| 1580 |
+
}}
|
| 1581 |
+
|
| 1582 |
+
// Collecter les top confusions (substitutions uniquement, hors ∅)
|
| 1583 |
+
const pairs = [];
|
| 1584 |
+
for (const [gt, ocrs] of Object.entries(cm.matrix)) {{
|
| 1585 |
+
if (gt === '∅') continue;
|
| 1586 |
+
for (const [ocr, cnt] of Object.entries(ocrs)) {{
|
| 1587 |
+
if (ocr !== gt && ocr !== '∅' && cnt > 0) {{
|
| 1588 |
+
pairs.push({{ gt, ocr, cnt }});
|
| 1589 |
+
}}
|
| 1590 |
+
}}
|
| 1591 |
+
}}
|
| 1592 |
+
pairs.sort((a,b) => b.cnt - a.cnt);
|
| 1593 |
+
const top = pairs.slice(0, 30);
|
| 1594 |
+
|
| 1595 |
+
if (!top.length) {{
|
| 1596 |
+
container.innerHTML = '<p style="color:var(--text-muted)">Aucune substitution détectée.</p>';
|
| 1597 |
+
return;
|
| 1598 |
+
}}
|
| 1599 |
+
|
| 1600 |
+
// Heatmap sous forme de tableau compact
|
| 1601 |
+
const maxCnt = top[0].cnt;
|
| 1602 |
+
const rows = top.map(p => {{
|
| 1603 |
+
const intensity = Math.round((p.cnt / maxCnt) * 200 + 55); // 55–255
|
| 1604 |
+
const bg = `rgb(${{intensity}},50,50)`;
|
| 1605 |
+
const fg = intensity > 150 ? '#fff' : '#222';
|
| 1606 |
+
return `<tr onclick="showConfusionExamples('${{esc(p.gt)}}','${{esc(p.ocr)}}')" style="cursor:pointer" title="GT='${{esc(p.gt)}}' → OCR='${{esc(p.ocr)}}' : ${{p.cnt}} fois">
|
| 1607 |
+
<td style="font-family:monospace;font-size:1.1rem;padding:.3rem .6rem;text-align:center">${{esc(p.gt)}}</td>
|
| 1608 |
+
<td style="padding:.1rem .3rem;color:var(--text-muted)">→</td>
|
| 1609 |
+
<td style="font-family:monospace;font-size:1.1rem;padding:.3rem .6rem;text-align:center">${{esc(p.ocr)}}</td>
|
| 1610 |
+
<td style="padding:.3rem 1rem">
|
| 1611 |
+
<div style="display:flex;align-items:center;gap:.5rem">
|
| 1612 |
+
<div style="width:${{Math.round(p.cnt/maxCnt*120)}}px;height:12px;border-radius:3px;background:${{bg}}"></div>
|
| 1613 |
+
<span style="font-size:.8rem;color:var(--text-muted)">${{p.cnt}}×</span>
|
| 1614 |
+
</div>
|
| 1615 |
+
</td>
|
| 1616 |
+
</tr>`;
|
| 1617 |
+
}}).join('');
|
| 1618 |
+
|
| 1619 |
+
container.innerHTML = `
|
| 1620 |
+
<p style="font-size:.75rem;color:var(--text-muted);margin-bottom:.5rem">
|
| 1621 |
+
Cliquer sur une ligne pour voir les exemples dans la vue Document.
|
| 1622 |
+
Total substitutions : <b>${{cm.total_substitutions}}</b>
|
| 1623 |
+
· Insertions : <b>${{cm.total_insertions}}</b>
|
| 1624 |
+
· Suppressions : <b>${{cm.total_deletions}}</b>
|
| 1625 |
+
</p>
|
| 1626 |
+
<table style="border-collapse:collapse;font-size:.85rem">
|
| 1627 |
+
<thead><tr>
|
| 1628 |
+
<th style="padding:.3rem .6rem;text-align:left">GT</th>
|
| 1629 |
+
<th></th>
|
| 1630 |
+
<th style="padding:.3rem .6rem;text-align:left">OCR</th>
|
| 1631 |
+
<th style="padding:.3rem 1rem;text-align:left">Fréquence</th>
|
| 1632 |
+
</tr></thead>
|
| 1633 |
+
<tbody>${{rows}}</tbody>
|
| 1634 |
+
</table>
|
| 1635 |
+
`;
|
| 1636 |
+
}}
|
| 1637 |
+
|
| 1638 |
+
function showConfusionExamples(gtChar, ocrChar) {{
|
| 1639 |
+
// Naviguer vers la vue Document en cherchant un exemple de cette confusion
|
| 1640 |
+
showView('document');
|
| 1641 |
+
const docWithConfusion = DATA.documents.find(doc =>
|
| 1642 |
+
doc.engine_results.some(er => {{
|
| 1643 |
+
const h = er.hypothesis || '';
|
| 1644 |
+
const g = doc.ground_truth || '';
|
| 1645 |
+
return g.includes(gtChar) && h.includes(ocrChar);
|
| 1646 |
+
}})
|
| 1647 |
+
);
|
| 1648 |
+
if (docWithConfusion) loadDocument(docWithConfusion.doc_id);
|
| 1649 |
+
}}
|
| 1650 |
+
|
| 1651 |
+
function renderLigatureDetail(eng) {{
|
| 1652 |
+
const container = document.getElementById('ligature-detail');
|
| 1653 |
+
// Agrégation sur tous les documents pour ce moteur
|
| 1654 |
+
const ligData = {{}};
|
| 1655 |
+
DATA.documents.forEach(doc => {{
|
| 1656 |
+
const er = doc.engine_results.find(r => r.engine === eng.name);
|
| 1657 |
+
if (!er || !er.ligature_score) return;
|
| 1658 |
+
// On n'a que le score global par doc; pour le détail, utiliser aggregated_char_scores
|
| 1659 |
+
}});
|
| 1660 |
+
|
| 1661 |
+
const agg = eng.aggregated_char_scores;
|
| 1662 |
+
if (!agg || !agg.ligature || !agg.ligature.per_ligature) {{
|
| 1663 |
+
const overallScore = eng.ligature_score;
|
| 1664 |
+
if (overallScore !== null && overallScore !== undefined) {{
|
| 1665 |
+
container.innerHTML = `<div class="stat">Score global ligatures : ${{_scoreBadge(overallScore, 'Ligatures')}}</div>`;
|
| 1666 |
+
}} else {{
|
| 1667 |
+
container.innerHTML = '<p style="color:var(--text-muted)">Aucune donnée ligature disponible (pas de ligatures dans le corpus).</p>';
|
| 1668 |
+
}}
|
| 1669 |
+
return;
|
| 1670 |
+
}}
|
| 1671 |
+
|
| 1672 |
+
const perLig = agg.ligature.per_ligature;
|
| 1673 |
+
if (!Object.keys(perLig).length) {{
|
| 1674 |
+
container.innerHTML = '<p style="color:var(--text-muted)">Aucune ligature trouvée dans le corpus GT.</p>';
|
| 1675 |
+
return;
|
| 1676 |
+
}}
|
| 1677 |
+
|
| 1678 |
+
const rows = Object.entries(perLig)
|
| 1679 |
+
.sort((a,b) => b[1].gt_count - a[1].gt_count)
|
| 1680 |
+
.map(([lig, d]) => {{
|
| 1681 |
+
const sc = d.score;
|
| 1682 |
+
const color = sc >= 0.9 ? '#16a34a' : sc >= 0.7 ? '#ca8a04' : '#dc2626';
|
| 1683 |
+
const barW = Math.round(sc * 120);
|
| 1684 |
+
return `<tr>
|
| 1685 |
+
<td style="font-family:monospace;font-size:1.2rem;padding:.3rem .6rem">${{esc(lig)}}</td>
|
| 1686 |
+
<td style="padding:.3rem .6rem;font-size:.8rem;color:var(--text-muted)">${{esc(lig.codePointAt(0).toString(16).toUpperCase().padStart(4,'0'))}}</td>
|
| 1687 |
+
<td style="padding:.3rem .6rem">${{d.gt_count}} GT</td>
|
| 1688 |
+
<td style="padding:.3rem .6rem">${{d.ocr_correct}} corrects</td>
|
| 1689 |
+
<td style="padding:.3rem 1rem">
|
| 1690 |
+
<div style="display:flex;align-items:center;gap:.5rem">
|
| 1691 |
+
<div style="width:${{barW}}px;height:10px;border-radius:3px;background:${{color}}"></div>
|
| 1692 |
+
<span style="color:${{color}};font-weight:600">${{(sc*100).toFixed(0)}}%</span>
|
| 1693 |
+
</div>
|
| 1694 |
+
</td>
|
| 1695 |
+
</tr>`;
|
| 1696 |
+
}}).join('');
|
| 1697 |
+
|
| 1698 |
+
container.innerHTML = `
|
| 1699 |
+
<table style="border-collapse:collapse;font-size:.85rem">
|
| 1700 |
+
<thead><tr>
|
| 1701 |
+
<th style="padding:.3rem .6rem;text-align:left">Ligature</th>
|
| 1702 |
+
<th style="padding:.3rem .6rem;text-align:left">Unicode</th>
|
| 1703 |
+
<th style="padding:.3rem .6rem">GT</th>
|
| 1704 |
+
<th style="padding:.3rem .6rem">Corrects</th>
|
| 1705 |
+
<th style="padding:.3rem 1rem;text-align:left">Score</th>
|
| 1706 |
+
</tr></thead>
|
| 1707 |
+
<tbody>${{rows}}</tbody>
|
| 1708 |
+
</table>
|
| 1709 |
+
`;
|
| 1710 |
+
}}
|
| 1711 |
+
|
| 1712 |
+
function renderTaxonomyDetail(eng) {{
|
| 1713 |
+
const container = document.getElementById('taxonomy-detail');
|
| 1714 |
+
const tax = eng.aggregated_taxonomy;
|
| 1715 |
+
if (!tax || !tax.counts) {{
|
| 1716 |
+
container.innerHTML = '<p style="color:var(--text-muted)">Aucune donnée taxonomique disponible.</p>';
|
| 1717 |
+
return;
|
| 1718 |
+
}}
|
| 1719 |
+
|
| 1720 |
+
const classNames = {{
|
| 1721 |
+
visual_confusion: '1 — Confusion visuelle',
|
| 1722 |
+
diacritic_error: '2 — Erreur diacritique',
|
| 1723 |
+
case_error: '3 — Erreur de casse',
|
| 1724 |
+
ligature_error: '4 — Ligature',
|
| 1725 |
+
abbreviation_error: '5 — Abréviation',
|
| 1726 |
+
hapax: '6 — Hapax',
|
| 1727 |
+
segmentation_error: '7 — Segmentation',
|
| 1728 |
+
oov_character: '8 — Hors-vocabulaire',
|
| 1729 |
+
lacuna: '9 — Lacune',
|
| 1730 |
+
}};
|
| 1731 |
+
const total = tax.total_errors || 1;
|
| 1732 |
+
const maxCnt = Math.max(...Object.values(tax.counts));
|
| 1733 |
+
|
| 1734 |
+
const rows = Object.entries(tax.counts)
|
| 1735 |
+
.filter(([, cnt]) => cnt > 0)
|
| 1736 |
+
.sort((a,b) => b[1]-a[1])
|
| 1737 |
+
.map(([cls, cnt]) => {{
|
| 1738 |
+
const pctVal = (cnt / total * 100).toFixed(1);
|
| 1739 |
+
const barW = maxCnt > 0 ? Math.round(cnt/maxCnt * 200) : 0;
|
| 1740 |
+
return `<tr>
|
| 1741 |
+
<td style="padding:.3rem .6rem;font-size:.85rem">${{esc(classNames[cls] || cls)}}</td>
|
| 1742 |
+
<td style="padding:.3rem .6rem;text-align:right;font-variant-numeric:tabular-nums">${{cnt}}</td>
|
| 1743 |
+
<td style="padding:.3rem 1rem">
|
| 1744 |
+
<div style="display:flex;align-items:center;gap:.5rem">
|
| 1745 |
+
<div style="width:${{barW}}px;height:10px;border-radius:3px;background:#6366f1"></div>
|
| 1746 |
+
<span style="color:var(--text-muted);font-size:.8rem">${{pctVal}}%</span>
|
| 1747 |
+
</div>
|
| 1748 |
+
</td>
|
| 1749 |
+
</tr>`;
|
| 1750 |
+
}}).join('');
|
| 1751 |
+
|
| 1752 |
+
container.innerHTML = `
|
| 1753 |
+
<p style="font-size:.75rem;color:var(--text-muted);margin-bottom:.5rem">Total : <b>${{tax.total_errors}}</b> erreurs classifiées.</p>
|
| 1754 |
+
<table style="border-collapse:collapse;font-size:.85rem;min-width:400px">
|
| 1755 |
+
<thead><tr>
|
| 1756 |
+
<th style="padding:.3rem .6rem;text-align:left">Classe</th>
|
| 1757 |
+
<th style="padding:.3rem .6rem;text-align:right">N</th>
|
| 1758 |
+
<th style="padding:.3rem 1rem;text-align:left">Proportion</th>
|
| 1759 |
+
</tr></thead>
|
| 1760 |
+
<tbody>${{rows}}</tbody>
|
| 1761 |
+
</table>
|
| 1762 |
+
`;
|
| 1763 |
+
}}
|
| 1764 |
+
|
| 1765 |
// ── Init ────────────────────────────────────────────────────────
|
| 1766 |
function init() {{
|
| 1767 |
// Méta nav
|
|
The diff for this file is too large to render.
See raw diff
|
|
|
|
@@ -0,0 +1,876 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests Sprint 5 : métriques avancées patrimoniales.
|
| 2 |
+
|
| 3 |
+
Couvre :
|
| 4 |
+
- Matrice de confusion unicode (confusion.py)
|
| 5 |
+
- Scores ligatures et diacritiques (char_scores.py)
|
| 6 |
+
- Taxonomie des erreurs classes 1-9 (taxonomy.py)
|
| 7 |
+
- Analyse structurelle (structure.py)
|
| 8 |
+
- Qualité image (image_quality.py)
|
| 9 |
+
- Intégration dans les fixtures et le rapport HTML
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import pytest
|
| 15 |
+
|
| 16 |
+
# ===========================================================================
|
| 17 |
+
# Tests ConfusionMatrix
|
| 18 |
+
# ===========================================================================
|
| 19 |
+
|
| 20 |
+
from picarones.core.confusion import (
|
| 21 |
+
ConfusionMatrix,
|
| 22 |
+
EMPTY_CHAR,
|
| 23 |
+
build_confusion_matrix,
|
| 24 |
+
aggregate_confusion_matrices,
|
| 25 |
+
top_confused_chars,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class TestBuildConfusionMatrix:
|
| 30 |
+
|
| 31 |
+
def test_identical_texts(self):
|
| 32 |
+
cm = build_confusion_matrix("abc", "abc")
|
| 33 |
+
# Pas de substitutions
|
| 34 |
+
assert cm.total_substitutions == 0
|
| 35 |
+
assert cm.total_insertions == 0
|
| 36 |
+
assert cm.total_deletions == 0
|
| 37 |
+
|
| 38 |
+
def test_empty_texts(self):
|
| 39 |
+
cm = build_confusion_matrix("", "")
|
| 40 |
+
assert cm.total_errors == 0
|
| 41 |
+
|
| 42 |
+
def test_simple_substitution(self):
|
| 43 |
+
cm = build_confusion_matrix("abc", "axc")
|
| 44 |
+
# 'b' → 'x'
|
| 45 |
+
assert "b" in cm.matrix
|
| 46 |
+
assert "x" in cm.matrix["b"]
|
| 47 |
+
assert cm.matrix["b"]["x"] >= 1
|
| 48 |
+
|
| 49 |
+
def test_deletion_recorded(self):
|
| 50 |
+
cm = build_confusion_matrix("abc", "ac")
|
| 51 |
+
# 'b' supprimé
|
| 52 |
+
assert "b" in cm.matrix
|
| 53 |
+
assert EMPTY_CHAR in cm.matrix["b"]
|
| 54 |
+
|
| 55 |
+
def test_insertion_recorded(self):
|
| 56 |
+
cm = build_confusion_matrix("ac", "abc")
|
| 57 |
+
# 'b' inséré
|
| 58 |
+
assert EMPTY_CHAR in cm.matrix
|
| 59 |
+
assert "b" in cm.matrix[EMPTY_CHAR]
|
| 60 |
+
|
| 61 |
+
def test_no_whitespace_recorded_by_default(self):
|
| 62 |
+
cm = build_confusion_matrix("a b", "a x")
|
| 63 |
+
# Les espaces ne doivent pas être dans la matrice
|
| 64 |
+
assert " " not in cm.matrix
|
| 65 |
+
|
| 66 |
+
def test_as_dict_structure(self):
|
| 67 |
+
cm = build_confusion_matrix("hello", "hallo")
|
| 68 |
+
d = cm.as_dict()
|
| 69 |
+
assert "matrix" in d
|
| 70 |
+
assert "total_substitutions" in d
|
| 71 |
+
assert "total_insertions" in d
|
| 72 |
+
assert "total_deletions" in d
|
| 73 |
+
|
| 74 |
+
def test_top_confusions(self):
|
| 75 |
+
cm = build_confusion_matrix("eeee", "aaaa")
|
| 76 |
+
tops = cm.top_confusions(n=5)
|
| 77 |
+
assert len(tops) >= 1
|
| 78 |
+
assert tops[0]["gt"] == "e"
|
| 79 |
+
assert tops[0]["ocr"] == "a"
|
| 80 |
+
assert tops[0]["count"] == 4
|
| 81 |
+
|
| 82 |
+
def test_medieval_chars_tracked(self):
|
| 83 |
+
cm = build_confusion_matrix("maiſon", "maifon")
|
| 84 |
+
# ſ confondu avec f
|
| 85 |
+
assert "ſ" in cm.matrix
|
| 86 |
+
assert "f" in cm.matrix["ſ"]
|
| 87 |
+
|
| 88 |
+
def test_as_compact_dict_filters_low_count(self):
|
| 89 |
+
cm = build_confusion_matrix("aab", "axb")
|
| 90 |
+
# avec min_count=2, une substitution unique filtrée
|
| 91 |
+
compact = cm.as_compact_dict(min_count=2)
|
| 92 |
+
# Le 'a'→'x' ne doit pas apparaître (1 seule occurrence)
|
| 93 |
+
matrix = compact["matrix"]
|
| 94 |
+
for gt_counts in matrix.values():
|
| 95 |
+
for ocr_char, cnt in gt_counts.items():
|
| 96 |
+
assert cnt >= 2
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class TestAggregateConfusionMatrices:
|
| 100 |
+
|
| 101 |
+
def test_empty_list(self):
|
| 102 |
+
cm = aggregate_confusion_matrices([])
|
| 103 |
+
assert cm.total_errors == 0
|
| 104 |
+
|
| 105 |
+
def test_single_matrix(self):
|
| 106 |
+
cm1 = build_confusion_matrix("abc", "axc")
|
| 107 |
+
agg = aggregate_confusion_matrices([cm1])
|
| 108 |
+
assert agg.matrix == cm1.matrix
|
| 109 |
+
|
| 110 |
+
def test_counts_sum(self):
|
| 111 |
+
cm1 = build_confusion_matrix("abc", "axc")
|
| 112 |
+
cm2 = build_confusion_matrix("abc", "axc")
|
| 113 |
+
agg = aggregate_confusion_matrices([cm1, cm2])
|
| 114 |
+
# La confusion 'b'→'x' doit apparaître 2 fois
|
| 115 |
+
assert agg.matrix.get("b", {}).get("x", 0) >= 2
|
| 116 |
+
|
| 117 |
+
def test_total_errors_sum(self):
|
| 118 |
+
cm1 = build_confusion_matrix("abc", "axc")
|
| 119 |
+
cm2 = build_confusion_matrix("def", "dxf")
|
| 120 |
+
agg = aggregate_confusion_matrices([cm1, cm2])
|
| 121 |
+
assert agg.total_errors >= cm1.total_errors + cm2.total_errors
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
class TestTopConfusedChars:
|
| 125 |
+
|
| 126 |
+
def test_returns_list(self):
|
| 127 |
+
cm = build_confusion_matrix("aaabbb", "aaaxxx")
|
| 128 |
+
tops = top_confused_chars(cm, n=5)
|
| 129 |
+
assert isinstance(tops, list)
|
| 130 |
+
|
| 131 |
+
def test_sorted_by_errors_desc(self):
|
| 132 |
+
cm = aggregate_confusion_matrices([
|
| 133 |
+
build_confusion_matrix("bbb", "xxx"), # 3 fois
|
| 134 |
+
build_confusion_matrix("a", "y"), # 1 fois
|
| 135 |
+
])
|
| 136 |
+
tops = top_confused_chars(cm, n=10)
|
| 137 |
+
if len(tops) >= 2:
|
| 138 |
+
assert tops[0]["total_errors"] >= tops[1]["total_errors"]
|
| 139 |
+
|
| 140 |
+
def test_excludes_empty_char(self):
|
| 141 |
+
cm = build_confusion_matrix("abc", "ac") # b supprimé
|
| 142 |
+
tops = top_confused_chars(cm, exclude_empty=True)
|
| 143 |
+
assert all(t["char"] != EMPTY_CHAR for t in tops)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
# ===========================================================================
|
| 147 |
+
# Tests LigatureScore
|
| 148 |
+
# ===========================================================================
|
| 149 |
+
|
| 150 |
+
from picarones.core.char_scores import (
|
| 151 |
+
LIGATURE_TABLE,
|
| 152 |
+
DIACRITIC_MAP,
|
| 153 |
+
LigatureScore,
|
| 154 |
+
DiacriticScore,
|
| 155 |
+
compute_ligature_score,
|
| 156 |
+
compute_diacritic_score,
|
| 157 |
+
aggregate_ligature_scores,
|
| 158 |
+
aggregate_diacritic_scores,
|
| 159 |
+
_ALL_LIGATURES,
|
| 160 |
+
_ALL_DIACRITICS,
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
class TestLigatureTable:
|
| 165 |
+
|
| 166 |
+
def test_fi_ligature_present(self):
|
| 167 |
+
assert "\uFB01" in LIGATURE_TABLE # fi
|
| 168 |
+
|
| 169 |
+
def test_fl_ligature_present(self):
|
| 170 |
+
assert "\uFB02" in LIGATURE_TABLE # fl
|
| 171 |
+
|
| 172 |
+
def test_oe_ligature_present(self):
|
| 173 |
+
assert "\u0153" in LIGATURE_TABLE # œ
|
| 174 |
+
|
| 175 |
+
def test_ae_ligature_present(self):
|
| 176 |
+
assert "\u00E6" in LIGATURE_TABLE # æ
|
| 177 |
+
|
| 178 |
+
def test_ff_ligature_present(self):
|
| 179 |
+
assert "\uFB00" in LIGATURE_TABLE # ff
|
| 180 |
+
|
| 181 |
+
def test_equivalents_are_lists(self):
|
| 182 |
+
for lig, equivs in LIGATURE_TABLE.items():
|
| 183 |
+
assert isinstance(equivs, list)
|
| 184 |
+
assert len(equivs) >= 1
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
class TestComputeLigatureScore:
|
| 188 |
+
|
| 189 |
+
def test_no_ligatures_in_gt(self):
|
| 190 |
+
result = compute_ligature_score("bonjour monde", "bonjour monde")
|
| 191 |
+
assert result.score == pytest.approx(1.0)
|
| 192 |
+
assert result.total_in_gt == 0
|
| 193 |
+
|
| 194 |
+
def test_ligature_correctly_recognized(self):
|
| 195 |
+
# GT avec fi (fi ligature), OCR reconnaît "fi"
|
| 196 |
+
result = compute_ligature_score("fin", "fin")
|
| 197 |
+
assert result.total_in_gt == 1
|
| 198 |
+
assert result.score == pytest.approx(1.0)
|
| 199 |
+
|
| 200 |
+
def test_ligature_unicode_to_unicode(self):
|
| 201 |
+
# GT et OCR ont tous les deux fi
|
| 202 |
+
result = compute_ligature_score("fin", "fin")
|
| 203 |
+
assert result.score == pytest.approx(1.0)
|
| 204 |
+
|
| 205 |
+
def test_oe_ligature(self):
|
| 206 |
+
result = compute_ligature_score("œuvre", "oeuvre")
|
| 207 |
+
assert result.total_in_gt == 1
|
| 208 |
+
assert result.score == pytest.approx(1.0)
|
| 209 |
+
|
| 210 |
+
def test_ae_ligature(self):
|
| 211 |
+
result = compute_ligature_score("æther", "aether")
|
| 212 |
+
assert result.total_in_gt == 1
|
| 213 |
+
assert result.score == pytest.approx(1.0)
|
| 214 |
+
|
| 215 |
+
def test_as_dict_structure(self):
|
| 216 |
+
result = compute_ligature_score("fin", "fin")
|
| 217 |
+
d = result.as_dict()
|
| 218 |
+
assert "total_in_gt" in d
|
| 219 |
+
assert "correctly_recognized" in d
|
| 220 |
+
assert "score" in d
|
| 221 |
+
assert "per_ligature" in d
|
| 222 |
+
|
| 223 |
+
def test_empty_texts(self):
|
| 224 |
+
result = compute_ligature_score("", "")
|
| 225 |
+
assert result.score == pytest.approx(1.0)
|
| 226 |
+
assert result.total_in_gt == 0
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
class TestComputeDiacriticScore:
|
| 230 |
+
|
| 231 |
+
def test_no_diacritics(self):
|
| 232 |
+
result = compute_diacritic_score("bonjour", "bonjour")
|
| 233 |
+
assert result.score == pytest.approx(1.0)
|
| 234 |
+
assert result.total_in_gt == 0
|
| 235 |
+
|
| 236 |
+
def test_accent_preserved(self):
|
| 237 |
+
result = compute_diacritic_score("été", "été")
|
| 238 |
+
assert result.score == pytest.approx(1.0)
|
| 239 |
+
assert result.correctly_recognized == result.total_in_gt
|
| 240 |
+
|
| 241 |
+
def test_accent_lost(self):
|
| 242 |
+
result = compute_diacritic_score("étude", "etude")
|
| 243 |
+
assert result.total_in_gt >= 1
|
| 244 |
+
# é → e : perte du diacritique
|
| 245 |
+
assert result.correctly_recognized < result.total_in_gt
|
| 246 |
+
assert result.score < 1.0
|
| 247 |
+
|
| 248 |
+
def test_cedille_tracked(self):
|
| 249 |
+
result = compute_diacritic_score("façon", "facon")
|
| 250 |
+
assert result.total_in_gt >= 1
|
| 251 |
+
assert result.score < 1.0
|
| 252 |
+
|
| 253 |
+
def test_empty_texts(self):
|
| 254 |
+
result = compute_diacritic_score("", "")
|
| 255 |
+
assert result.score == pytest.approx(1.0)
|
| 256 |
+
|
| 257 |
+
def test_as_dict_structure(self):
|
| 258 |
+
result = compute_diacritic_score("été", "ete")
|
| 259 |
+
d = result.as_dict()
|
| 260 |
+
assert "total_in_gt" in d
|
| 261 |
+
assert "correctly_recognized" in d
|
| 262 |
+
assert "score" in d
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
class TestAggregateLigatureScores:
|
| 266 |
+
|
| 267 |
+
def test_empty_list(self):
|
| 268 |
+
result = aggregate_ligature_scores([])
|
| 269 |
+
assert result["score"] == pytest.approx(1.0)
|
| 270 |
+
assert result["total_in_gt"] == 0
|
| 271 |
+
|
| 272 |
+
def test_aggregation(self):
|
| 273 |
+
s1 = LigatureScore(total_in_gt=4, correctly_recognized=3, score=0.75)
|
| 274 |
+
s2 = LigatureScore(total_in_gt=2, correctly_recognized=2, score=1.0)
|
| 275 |
+
result = aggregate_ligature_scores([s1, s2])
|
| 276 |
+
assert result["total_in_gt"] == 6
|
| 277 |
+
assert result["correctly_recognized"] == 5
|
| 278 |
+
assert result["score"] == pytest.approx(5/6, abs=1e-4)
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
class TestAggregateDiacriticScores:
|
| 282 |
+
|
| 283 |
+
def test_aggregation(self):
|
| 284 |
+
s1 = DiacriticScore(total_in_gt=10, correctly_recognized=8, score=0.8)
|
| 285 |
+
s2 = DiacriticScore(total_in_gt=5, correctly_recognized=5, score=1.0)
|
| 286 |
+
result = aggregate_diacritic_scores([s1, s2])
|
| 287 |
+
assert result["total_in_gt"] == 15
|
| 288 |
+
assert result["correctly_recognized"] == 13
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
# ===========================================================================
|
| 292 |
+
# Tests TaxonomyResult
|
| 293 |
+
# ===========================================================================
|
| 294 |
+
|
| 295 |
+
from picarones.core.taxonomy import (
|
| 296 |
+
TaxonomyResult,
|
| 297 |
+
ERROR_CLASSES,
|
| 298 |
+
classify_errors,
|
| 299 |
+
aggregate_taxonomy,
|
| 300 |
+
VISUAL_CONFUSIONS,
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
class TestErrorClasses:
|
| 305 |
+
|
| 306 |
+
def test_nine_classes(self):
|
| 307 |
+
assert len(ERROR_CLASSES) == 9
|
| 308 |
+
|
| 309 |
+
def test_class_names(self):
|
| 310 |
+
assert "visual_confusion" in ERROR_CLASSES
|
| 311 |
+
assert "diacritic_error" in ERROR_CLASSES
|
| 312 |
+
assert "case_error" in ERROR_CLASSES
|
| 313 |
+
assert "ligature_error" in ERROR_CLASSES
|
| 314 |
+
assert "lacuna" in ERROR_CLASSES
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
class TestClassifyErrors:
|
| 318 |
+
|
| 319 |
+
def test_identical_texts(self):
|
| 320 |
+
result = classify_errors("bonjour monde", "bonjour monde")
|
| 321 |
+
assert result.total_errors == 0
|
| 322 |
+
|
| 323 |
+
def test_empty_texts(self):
|
| 324 |
+
result = classify_errors("", "")
|
| 325 |
+
assert result.total_errors == 0
|
| 326 |
+
|
| 327 |
+
def test_case_error_detected(self):
|
| 328 |
+
result = classify_errors("Bonjour Monde", "bonjour monde")
|
| 329 |
+
assert result.counts["case_error"] >= 1
|
| 330 |
+
|
| 331 |
+
def test_diacritic_error_detected(self):
|
| 332 |
+
result = classify_errors("été chez nous", "ete chez nous")
|
| 333 |
+
assert result.counts["diacritic_error"] >= 1
|
| 334 |
+
|
| 335 |
+
def test_lacuna_detected(self):
|
| 336 |
+
result = classify_errors("le chat dort paisiblement", "le chat")
|
| 337 |
+
assert result.counts["lacuna"] >= 1
|
| 338 |
+
|
| 339 |
+
def test_segmentation_detected(self):
|
| 340 |
+
result = classify_errors("hello world test", "helloworld test")
|
| 341 |
+
# "hello world" fusionné en "helloworld"
|
| 342 |
+
assert result.counts["segmentation_error"] >= 0 # peut être classé hapax aussi
|
| 343 |
+
|
| 344 |
+
def test_ligature_error_detected(self):
|
| 345 |
+
result = classify_errors("fin de siècle", "fin de siècle")
|
| 346 |
+
# fi vs fi est une ligature correcte, pas une erreur
|
| 347 |
+
# Mais si on avait: GT=fi, OCR=fi → correct
|
| 348 |
+
# Test avec ligature mal reconnue: GT=fin, OCR=fïn (erreur diac)
|
| 349 |
+
assert result.total_errors >= 0 # pas d'erreur ici (fin est équivalent)
|
| 350 |
+
|
| 351 |
+
def test_as_dict_structure(self):
|
| 352 |
+
result = classify_errors("test erreur ici", "test erreur là")
|
| 353 |
+
d = result.as_dict()
|
| 354 |
+
assert "counts" in d
|
| 355 |
+
assert "total_errors" in d
|
| 356 |
+
assert "class_distribution" in d
|
| 357 |
+
assert "examples" in d
|
| 358 |
+
|
| 359 |
+
def test_from_dict_roundtrip(self):
|
| 360 |
+
result = classify_errors("bonjour monde", "Bonjour monde")
|
| 361 |
+
d = result.as_dict()
|
| 362 |
+
restored = TaxonomyResult.from_dict(d)
|
| 363 |
+
assert restored.total_errors == result.total_errors
|
| 364 |
+
assert restored.counts == result.counts
|
| 365 |
+
|
| 366 |
+
def test_class_distribution_sums_to_one(self):
|
| 367 |
+
result = classify_errors("abc def ghi", "xyz uvw rst")
|
| 368 |
+
dist = result.class_distribution
|
| 369 |
+
if dist:
|
| 370 |
+
assert abs(sum(dist.values()) - 1.0) < 1e-6
|
| 371 |
+
|
| 372 |
+
def test_all_classes_in_counts(self):
|
| 373 |
+
result = classify_errors("test", "teSt")
|
| 374 |
+
for cls in ERROR_CLASSES:
|
| 375 |
+
assert cls in result.counts
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
class TestAggregateTaxonomy:
|
| 379 |
+
|
| 380 |
+
def test_empty(self):
|
| 381 |
+
result = aggregate_taxonomy([])
|
| 382 |
+
assert result["total_errors"] == 0
|
| 383 |
+
|
| 384 |
+
def test_sums_counts(self):
|
| 385 |
+
r1 = TaxonomyResult(
|
| 386 |
+
counts={"visual_confusion": 2, "diacritic_error": 1, **{k: 0 for k in ERROR_CLASSES if k not in ["visual_confusion", "diacritic_error"]}},
|
| 387 |
+
total_errors=3,
|
| 388 |
+
)
|
| 389 |
+
r2 = TaxonomyResult(
|
| 390 |
+
counts={"visual_confusion": 1, "diacritic_error": 3, **{k: 0 for k in ERROR_CLASSES if k not in ["visual_confusion", "diacritic_error"]}},
|
| 391 |
+
total_errors=4,
|
| 392 |
+
)
|
| 393 |
+
agg = aggregate_taxonomy([r1, r2])
|
| 394 |
+
assert agg["counts"]["visual_confusion"] == 3
|
| 395 |
+
assert agg["counts"]["diacritic_error"] == 4
|
| 396 |
+
assert agg["total_errors"] == 7
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
# ===========================================================================
|
| 400 |
+
# Tests StructureResult
|
| 401 |
+
# ===========================================================================
|
| 402 |
+
|
| 403 |
+
from picarones.core.structure import (
|
| 404 |
+
StructureResult,
|
| 405 |
+
analyze_structure,
|
| 406 |
+
aggregate_structure,
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
class TestAnalyzeStructure:
|
| 411 |
+
|
| 412 |
+
def test_identical_single_line(self):
|
| 413 |
+
result = analyze_structure("ligne unique", "ligne unique")
|
| 414 |
+
assert result.gt_line_count == 1
|
| 415 |
+
assert result.ocr_line_count == 1
|
| 416 |
+
assert result.line_fusion_count == 0
|
| 417 |
+
assert result.line_fragmentation_count == 0
|
| 418 |
+
|
| 419 |
+
def test_empty_texts(self):
|
| 420 |
+
result = analyze_structure("", "")
|
| 421 |
+
assert result.gt_line_count == 0
|
| 422 |
+
assert result.ocr_line_count == 0
|
| 423 |
+
|
| 424 |
+
def test_multiline_equal(self):
|
| 425 |
+
gt = "ligne 1\nligne 2\nligne 3"
|
| 426 |
+
result = analyze_structure(gt, gt)
|
| 427 |
+
assert result.gt_line_count == 3
|
| 428 |
+
assert result.ocr_line_count == 3
|
| 429 |
+
|
| 430 |
+
def test_line_fusion_detected(self):
|
| 431 |
+
gt = "ligne 1\nligne 2\nligne 3"
|
| 432 |
+
ocr = "ligne 1 ligne 2\nligne 3" # fusion de 2 lignes en 1
|
| 433 |
+
result = analyze_structure(gt, ocr)
|
| 434 |
+
# Le nombre de lignes OCR < GT
|
| 435 |
+
assert result.ocr_line_count < result.gt_line_count
|
| 436 |
+
|
| 437 |
+
def test_reading_order_score_perfect(self):
|
| 438 |
+
text = "le chat dort ici"
|
| 439 |
+
result = analyze_structure(text, text)
|
| 440 |
+
assert result.reading_order_score > 0.9
|
| 441 |
+
|
| 442 |
+
def test_reading_order_score_low_for_scrambled(self):
|
| 443 |
+
gt = "le chat dort paisiblement sur le canapé"
|
| 444 |
+
ocr = "canapé sur le paisiblement dort chat le"
|
| 445 |
+
result = analyze_structure(gt, ocr)
|
| 446 |
+
assert result.reading_order_score < 1.0
|
| 447 |
+
|
| 448 |
+
def test_line_accuracy_perfect(self):
|
| 449 |
+
gt = "ligne 1\nligne 2"
|
| 450 |
+
ocr = "ligne 1\nligne 2"
|
| 451 |
+
result = analyze_structure(gt, ocr)
|
| 452 |
+
assert result.line_accuracy == pytest.approx(1.0)
|
| 453 |
+
|
| 454 |
+
def test_line_accuracy_degraded(self):
|
| 455 |
+
gt = "ligne 1\nligne 2\nligne 3\nligne 4"
|
| 456 |
+
ocr = "ligne 1"
|
| 457 |
+
result = analyze_structure(gt, ocr)
|
| 458 |
+
assert result.line_accuracy < 1.0
|
| 459 |
+
|
| 460 |
+
def test_as_dict_structure(self):
|
| 461 |
+
result = analyze_structure("ligne 1\nligne 2", "ligne 1\nligne 2")
|
| 462 |
+
d = result.as_dict()
|
| 463 |
+
required = ["gt_line_count", "ocr_line_count", "line_fusion_count",
|
| 464 |
+
"line_fragmentation_count", "reading_order_score",
|
| 465 |
+
"paragraph_conservation_score", "line_accuracy"]
|
| 466 |
+
for key in required:
|
| 467 |
+
assert key in d
|
| 468 |
+
|
| 469 |
+
def test_from_dict_roundtrip(self):
|
| 470 |
+
result = analyze_structure("a\nb\nc", "a\nb")
|
| 471 |
+
d = result.as_dict()
|
| 472 |
+
restored = StructureResult.from_dict(d)
|
| 473 |
+
assert restored.gt_line_count == result.gt_line_count
|
| 474 |
+
assert restored.ocr_line_count == result.ocr_line_count
|
| 475 |
+
|
| 476 |
+
def test_line_fusion_rate_property(self):
|
| 477 |
+
result = StructureResult(gt_line_count=10, ocr_line_count=8, line_fusion_count=2)
|
| 478 |
+
assert result.line_fusion_rate == pytest.approx(0.2)
|
| 479 |
+
|
| 480 |
+
def test_line_fragmentation_rate_property(self):
|
| 481 |
+
result = StructureResult(gt_line_count=5, ocr_line_count=8, line_fragmentation_count=3)
|
| 482 |
+
assert result.line_fragmentation_rate == pytest.approx(0.6)
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
class TestAggregateStructure:
|
| 486 |
+
|
| 487 |
+
def test_empty(self):
|
| 488 |
+
result = aggregate_structure([])
|
| 489 |
+
assert result == {}
|
| 490 |
+
|
| 491 |
+
def test_single_result(self):
|
| 492 |
+
r = StructureResult(
|
| 493 |
+
gt_line_count=5, ocr_line_count=5,
|
| 494 |
+
reading_order_score=0.9, paragraph_conservation_score=1.0,
|
| 495 |
+
)
|
| 496 |
+
agg = aggregate_structure([r])
|
| 497 |
+
assert agg["mean_reading_order_score"] == pytest.approx(0.9)
|
| 498 |
+
assert agg["document_count"] == 1
|
| 499 |
+
|
| 500 |
+
def test_mean_fusion_rate(self):
|
| 501 |
+
r1 = StructureResult(gt_line_count=10, ocr_line_count=8, line_fusion_count=2)
|
| 502 |
+
r2 = StructureResult(gt_line_count=10, ocr_line_count=6, line_fusion_count=4)
|
| 503 |
+
agg = aggregate_structure([r1, r2])
|
| 504 |
+
# fusion rates: 0.2 et 0.4 → mean = 0.3
|
| 505 |
+
assert agg["mean_line_fusion_rate"] == pytest.approx(0.3, rel=1e-3)
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
# ===========================================================================
|
| 509 |
+
# Tests ImageQualityResult
|
| 510 |
+
# ===========================================================================
|
| 511 |
+
|
| 512 |
+
from picarones.core.image_quality import (
|
| 513 |
+
ImageQualityResult,
|
| 514 |
+
generate_mock_quality_scores,
|
| 515 |
+
aggregate_image_quality,
|
| 516 |
+
_global_quality_score,
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
class TestImageQualityResult:
|
| 521 |
+
|
| 522 |
+
def test_quality_tier_good(self):
|
| 523 |
+
r = ImageQualityResult(quality_score=0.8)
|
| 524 |
+
assert r.quality_tier == "good"
|
| 525 |
+
assert r.is_good_quality is True
|
| 526 |
+
|
| 527 |
+
def test_quality_tier_medium(self):
|
| 528 |
+
r = ImageQualityResult(quality_score=0.55)
|
| 529 |
+
assert r.quality_tier == "medium"
|
| 530 |
+
assert r.is_good_quality is False
|
| 531 |
+
|
| 532 |
+
def test_quality_tier_poor(self):
|
| 533 |
+
r = ImageQualityResult(quality_score=0.2)
|
| 534 |
+
assert r.quality_tier == "poor"
|
| 535 |
+
|
| 536 |
+
def test_as_dict_structure(self):
|
| 537 |
+
r = ImageQualityResult(
|
| 538 |
+
sharpness_score=0.8, noise_level=0.1, rotation_degrees=0.5,
|
| 539 |
+
contrast_score=0.9, quality_score=0.75, analysis_method="mock",
|
| 540 |
+
)
|
| 541 |
+
d = r.as_dict()
|
| 542 |
+
assert "sharpness_score" in d
|
| 543 |
+
assert "noise_level" in d
|
| 544 |
+
assert "rotation_degrees" in d
|
| 545 |
+
assert "contrast_score" in d
|
| 546 |
+
assert "quality_score" in d
|
| 547 |
+
assert "quality_tier" in d
|
| 548 |
+
assert "analysis_method" in d
|
| 549 |
+
|
| 550 |
+
def test_from_dict_roundtrip(self):
|
| 551 |
+
r = ImageQualityResult(
|
| 552 |
+
sharpness_score=0.7, noise_level=0.2, rotation_degrees=1.0,
|
| 553 |
+
contrast_score=0.8, quality_score=0.65, analysis_method="pillow",
|
| 554 |
+
)
|
| 555 |
+
d = r.as_dict()
|
| 556 |
+
restored = ImageQualityResult.from_dict(d)
|
| 557 |
+
assert restored.sharpness_score == pytest.approx(r.sharpness_score, rel=1e-3)
|
| 558 |
+
assert restored.quality_score == pytest.approx(r.quality_score, rel=1e-3)
|
| 559 |
+
assert restored.analysis_method == r.analysis_method
|
| 560 |
+
|
| 561 |
+
def test_from_dict_ignores_quality_tier(self):
|
| 562 |
+
# quality_tier est une propriété, pas un param init → from_dict doit l'ignorer
|
| 563 |
+
data = {
|
| 564 |
+
"sharpness_score": 0.5, "noise_level": 0.3, "rotation_degrees": 0.0,
|
| 565 |
+
"contrast_score": 0.6, "quality_score": 0.5, "analysis_method": "mock",
|
| 566 |
+
"quality_tier": "medium", # doit être ignoré
|
| 567 |
+
}
|
| 568 |
+
r = ImageQualityResult.from_dict(data)
|
| 569 |
+
assert r.quality_score == pytest.approx(0.5)
|
| 570 |
+
|
| 571 |
+
|
| 572 |
+
class TestGenerateMockQualityScores:
|
| 573 |
+
|
| 574 |
+
def test_returns_image_quality_result(self):
|
| 575 |
+
r = generate_mock_quality_scores("folio_001")
|
| 576 |
+
assert isinstance(r, ImageQualityResult)
|
| 577 |
+
|
| 578 |
+
def test_scores_in_range(self):
|
| 579 |
+
r = generate_mock_quality_scores("folio_001", seed=42)
|
| 580 |
+
assert 0.0 <= r.quality_score <= 1.0
|
| 581 |
+
assert 0.0 <= r.sharpness_score <= 1.0
|
| 582 |
+
assert 0.0 <= r.noise_level <= 1.0
|
| 583 |
+
assert 0.0 <= r.contrast_score <= 1.0
|
| 584 |
+
|
| 585 |
+
def test_reproducible_with_seed(self):
|
| 586 |
+
r1 = generate_mock_quality_scores("folio_001", seed=42)
|
| 587 |
+
r2 = generate_mock_quality_scores("folio_001", seed=42)
|
| 588 |
+
assert r1.quality_score == r2.quality_score
|
| 589 |
+
|
| 590 |
+
def test_analysis_method_mock(self):
|
| 591 |
+
r = generate_mock_quality_scores("folio_001")
|
| 592 |
+
assert r.analysis_method == "mock"
|
| 593 |
+
|
| 594 |
+
def test_no_error(self):
|
| 595 |
+
r = generate_mock_quality_scores("folio_001")
|
| 596 |
+
assert r.error is None
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
class TestGlobalQualityScore:
|
| 600 |
+
|
| 601 |
+
def test_perfect_input(self):
|
| 602 |
+
score = _global_quality_score(sharpness=1.0, noise=0.0, rotation_abs=0.0, contrast=1.0)
|
| 603 |
+
assert score == pytest.approx(1.0)
|
| 604 |
+
|
| 605 |
+
def test_worst_input(self):
|
| 606 |
+
score = _global_quality_score(sharpness=0.0, noise=1.0, rotation_abs=10.0, contrast=0.0)
|
| 607 |
+
assert score == pytest.approx(0.0)
|
| 608 |
+
|
| 609 |
+
def test_medium_input(self):
|
| 610 |
+
score = _global_quality_score(sharpness=0.5, noise=0.5, rotation_abs=0.0, contrast=0.5)
|
| 611 |
+
assert 0.0 < score < 1.0
|
| 612 |
+
|
| 613 |
+
|
| 614 |
+
class TestAggregateImageQuality:
|
| 615 |
+
|
| 616 |
+
def test_empty_list(self):
|
| 617 |
+
result = aggregate_image_quality([])
|
| 618 |
+
assert result == {}
|
| 619 |
+
|
| 620 |
+
def test_single_result(self):
|
| 621 |
+
r = ImageQualityResult(quality_score=0.75, analysis_method="mock")
|
| 622 |
+
agg = aggregate_image_quality([r])
|
| 623 |
+
assert agg["mean_quality_score"] == pytest.approx(0.75)
|
| 624 |
+
assert agg["document_count"] == 1
|
| 625 |
+
|
| 626 |
+
def test_tier_distribution(self):
|
| 627 |
+
results = [
|
| 628 |
+
ImageQualityResult(quality_score=0.8, analysis_method="mock"), # good
|
| 629 |
+
ImageQualityResult(quality_score=0.5, analysis_method="mock"), # medium
|
| 630 |
+
ImageQualityResult(quality_score=0.2, analysis_method="mock"), # poor
|
| 631 |
+
]
|
| 632 |
+
agg = aggregate_image_quality(results)
|
| 633 |
+
assert agg["quality_distribution"]["good"] == 1
|
| 634 |
+
assert agg["quality_distribution"]["medium"] == 1
|
| 635 |
+
assert agg["quality_distribution"]["poor"] == 1
|
| 636 |
+
|
| 637 |
+
def test_scores_list_present(self):
|
| 638 |
+
results = [ImageQualityResult(quality_score=0.6, analysis_method="mock")]
|
| 639 |
+
agg = aggregate_image_quality(results)
|
| 640 |
+
assert "scores" in agg
|
| 641 |
+
assert len(agg["scores"]) == 1
|
| 642 |
+
|
| 643 |
+
def test_errors_excluded(self):
|
| 644 |
+
results = [
|
| 645 |
+
ImageQualityResult(quality_score=0.8, analysis_method="mock"),
|
| 646 |
+
ImageQualityResult(quality_score=0.0, analysis_method="none", error="file not found"),
|
| 647 |
+
]
|
| 648 |
+
agg = aggregate_image_quality(results)
|
| 649 |
+
assert agg["document_count"] == 1 # seul le résultat sans erreur compte
|
| 650 |
+
|
| 651 |
+
|
| 652 |
+
# ===========================================================================
|
| 653 |
+
# Tests d'intégration Sprint 5 (fixtures + rapport)
|
| 654 |
+
# ===========================================================================
|
| 655 |
+
|
| 656 |
+
class TestFixturesSprint5:
|
| 657 |
+
|
| 658 |
+
def test_doc_result_has_confusion_matrix(self):
|
| 659 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 660 |
+
bm = generate_sample_benchmark()
|
| 661 |
+
for er in bm.engine_reports:
|
| 662 |
+
for dr in er.document_results:
|
| 663 |
+
assert dr.confusion_matrix is not None, (
|
| 664 |
+
f"confusion_matrix manquante pour {er.engine_name}/{dr.doc_id}"
|
| 665 |
+
)
|
| 666 |
+
break
|
| 667 |
+
|
| 668 |
+
def test_doc_result_has_char_scores(self):
|
| 669 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 670 |
+
bm = generate_sample_benchmark()
|
| 671 |
+
for er in bm.engine_reports:
|
| 672 |
+
dr = er.document_results[0]
|
| 673 |
+
assert dr.char_scores is not None
|
| 674 |
+
assert "ligature" in dr.char_scores
|
| 675 |
+
assert "diacritic" in dr.char_scores
|
| 676 |
+
|
| 677 |
+
def test_doc_result_has_taxonomy(self):
|
| 678 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 679 |
+
bm = generate_sample_benchmark()
|
| 680 |
+
for er in bm.engine_reports:
|
| 681 |
+
dr = er.document_results[0]
|
| 682 |
+
assert dr.taxonomy is not None
|
| 683 |
+
assert "counts" in dr.taxonomy
|
| 684 |
+
assert "total_errors" in dr.taxonomy
|
| 685 |
+
|
| 686 |
+
def test_doc_result_has_structure(self):
|
| 687 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 688 |
+
bm = generate_sample_benchmark()
|
| 689 |
+
for er in bm.engine_reports:
|
| 690 |
+
dr = er.document_results[0]
|
| 691 |
+
assert dr.structure is not None
|
| 692 |
+
assert "gt_line_count" in dr.structure
|
| 693 |
+
|
| 694 |
+
def test_doc_result_has_image_quality(self):
|
| 695 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 696 |
+
bm = generate_sample_benchmark()
|
| 697 |
+
for er in bm.engine_reports:
|
| 698 |
+
dr = er.document_results[0]
|
| 699 |
+
assert dr.image_quality is not None
|
| 700 |
+
assert "quality_score" in dr.image_quality
|
| 701 |
+
|
| 702 |
+
def test_engine_report_has_aggregated_confusion(self):
|
| 703 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 704 |
+
bm = generate_sample_benchmark()
|
| 705 |
+
for er in bm.engine_reports:
|
| 706 |
+
assert er.aggregated_confusion is not None
|
| 707 |
+
assert "matrix" in er.aggregated_confusion
|
| 708 |
+
|
| 709 |
+
def test_engine_report_has_aggregated_char_scores(self):
|
| 710 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 711 |
+
bm = generate_sample_benchmark()
|
| 712 |
+
for er in bm.engine_reports:
|
| 713 |
+
assert er.aggregated_char_scores is not None
|
| 714 |
+
assert "ligature" in er.aggregated_char_scores
|
| 715 |
+
assert "diacritic" in er.aggregated_char_scores
|
| 716 |
+
|
| 717 |
+
def test_engine_report_ligature_score_property(self):
|
| 718 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 719 |
+
bm = generate_sample_benchmark()
|
| 720 |
+
for er in bm.engine_reports:
|
| 721 |
+
score = er.ligature_score
|
| 722 |
+
assert score is not None
|
| 723 |
+
assert 0.0 <= score <= 1.0
|
| 724 |
+
|
| 725 |
+
def test_engine_report_diacritic_score_property(self):
|
| 726 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 727 |
+
bm = generate_sample_benchmark()
|
| 728 |
+
for er in bm.engine_reports:
|
| 729 |
+
score = er.diacritic_score
|
| 730 |
+
assert score is not None
|
| 731 |
+
assert 0.0 <= score <= 1.0
|
| 732 |
+
|
| 733 |
+
def test_engine_report_has_aggregated_taxonomy(self):
|
| 734 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 735 |
+
bm = generate_sample_benchmark()
|
| 736 |
+
for er in bm.engine_reports:
|
| 737 |
+
assert er.aggregated_taxonomy is not None
|
| 738 |
+
assert "total_errors" in er.aggregated_taxonomy
|
| 739 |
+
|
| 740 |
+
def test_engine_report_has_aggregated_structure(self):
|
| 741 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 742 |
+
bm = generate_sample_benchmark()
|
| 743 |
+
for er in bm.engine_reports:
|
| 744 |
+
assert er.aggregated_structure is not None
|
| 745 |
+
assert "mean_reading_order_score" in er.aggregated_structure
|
| 746 |
+
|
| 747 |
+
def test_engine_report_has_aggregated_image_quality(self):
|
| 748 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 749 |
+
bm = generate_sample_benchmark()
|
| 750 |
+
for er in bm.engine_reports:
|
| 751 |
+
assert er.aggregated_image_quality is not None
|
| 752 |
+
assert "mean_quality_score" in er.aggregated_image_quality
|
| 753 |
+
|
| 754 |
+
def test_bad_engine_has_more_errors(self):
|
| 755 |
+
"""L'ancien moteur doit avoir plus d'erreurs taxonomiques que pero_ocr."""
|
| 756 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 757 |
+
bm = generate_sample_benchmark()
|
| 758 |
+
pero = next(er for er in bm.engine_reports if er.engine_name == "pero_ocr")
|
| 759 |
+
bad = next(er for er in bm.engine_reports if er.engine_name == "ancien_moteur")
|
| 760 |
+
assert bad.aggregated_taxonomy["total_errors"] > pero.aggregated_taxonomy["total_errors"]
|
| 761 |
+
|
| 762 |
+
|
| 763 |
+
class TestReportSprint5:
|
| 764 |
+
|
| 765 |
+
def test_report_data_has_ligature_score(self):
|
| 766 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 767 |
+
from picarones.report.generator import _build_report_data
|
| 768 |
+
bm = generate_sample_benchmark()
|
| 769 |
+
data = _build_report_data(bm, {})
|
| 770 |
+
for eng in data["engines"]:
|
| 771 |
+
assert "ligature_score" in eng, f"ligature_score manquant pour {eng['name']}"
|
| 772 |
+
|
| 773 |
+
def test_report_data_has_diacritic_score(self):
|
| 774 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 775 |
+
from picarones.report.generator import _build_report_data
|
| 776 |
+
bm = generate_sample_benchmark()
|
| 777 |
+
data = _build_report_data(bm, {})
|
| 778 |
+
for eng in data["engines"]:
|
| 779 |
+
assert "diacritic_score" in eng
|
| 780 |
+
|
| 781 |
+
def test_report_data_has_aggregated_taxonomy(self):
|
| 782 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 783 |
+
from picarones.report.generator import _build_report_data
|
| 784 |
+
bm = generate_sample_benchmark()
|
| 785 |
+
data = _build_report_data(bm, {})
|
| 786 |
+
for eng in data["engines"]:
|
| 787 |
+
assert "aggregated_taxonomy" in eng
|
| 788 |
+
|
| 789 |
+
def test_report_data_has_aggregated_image_quality(self):
|
| 790 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 791 |
+
from picarones.report.generator import _build_report_data
|
| 792 |
+
bm = generate_sample_benchmark()
|
| 793 |
+
data = _build_report_data(bm, {})
|
| 794 |
+
for eng in data["engines"]:
|
| 795 |
+
assert "aggregated_image_quality" in eng
|
| 796 |
+
|
| 797 |
+
def test_html_has_characters_tab(self, tmp_path):
|
| 798 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 799 |
+
from picarones.report.generator import ReportGenerator
|
| 800 |
+
bm = generate_sample_benchmark()
|
| 801 |
+
out = tmp_path / "report.html"
|
| 802 |
+
ReportGenerator(bm).generate(out)
|
| 803 |
+
html = out.read_text(encoding="utf-8")
|
| 804 |
+
assert "Caractères" in html
|
| 805 |
+
|
| 806 |
+
def test_html_has_ligatures_column(self, tmp_path):
|
| 807 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 808 |
+
from picarones.report.generator import ReportGenerator
|
| 809 |
+
bm = generate_sample_benchmark()
|
| 810 |
+
out = tmp_path / "report.html"
|
| 811 |
+
ReportGenerator(bm).generate(out)
|
| 812 |
+
html = out.read_text(encoding="utf-8")
|
| 813 |
+
assert "Ligatures" in html
|
| 814 |
+
|
| 815 |
+
def test_html_has_diacritiques_column(self, tmp_path):
|
| 816 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 817 |
+
from picarones.report.generator import ReportGenerator
|
| 818 |
+
bm = generate_sample_benchmark()
|
| 819 |
+
out = tmp_path / "report.html"
|
| 820 |
+
ReportGenerator(bm).generate(out)
|
| 821 |
+
html = out.read_text(encoding="utf-8")
|
| 822 |
+
assert "Diacritiques" in html
|
| 823 |
+
|
| 824 |
+
def test_html_has_scatter_plot(self, tmp_path):
|
| 825 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 826 |
+
from picarones.report.generator import ReportGenerator
|
| 827 |
+
bm = generate_sample_benchmark()
|
| 828 |
+
out = tmp_path / "report.html"
|
| 829 |
+
ReportGenerator(bm).generate(out)
|
| 830 |
+
html = out.read_text(encoding="utf-8")
|
| 831 |
+
assert "chart-quality-cer" in html
|
| 832 |
+
|
| 833 |
+
def test_html_has_taxonomy_chart(self, tmp_path):
|
| 834 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 835 |
+
from picarones.report.generator import ReportGenerator
|
| 836 |
+
bm = generate_sample_benchmark()
|
| 837 |
+
out = tmp_path / "report.html"
|
| 838 |
+
ReportGenerator(bm).generate(out)
|
| 839 |
+
html = out.read_text(encoding="utf-8")
|
| 840 |
+
assert "chart-taxonomy" in html
|
| 841 |
+
|
| 842 |
+
def test_html_has_confusion_heatmap(self, tmp_path):
|
| 843 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 844 |
+
from picarones.report.generator import ReportGenerator
|
| 845 |
+
bm = generate_sample_benchmark()
|
| 846 |
+
out = tmp_path / "report.html"
|
| 847 |
+
ReportGenerator(bm).generate(out)
|
| 848 |
+
html = out.read_text(encoding="utf-8")
|
| 849 |
+
assert "confusion-heatmap" in html or "matrice de confusion" in html.lower()
|
| 850 |
+
|
| 851 |
+
def test_doc_results_have_image_quality_in_report(self):
|
| 852 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 853 |
+
from picarones.report.generator import _build_report_data
|
| 854 |
+
bm = generate_sample_benchmark()
|
| 855 |
+
data = _build_report_data(bm, {})
|
| 856 |
+
doc = data["documents"][0]
|
| 857 |
+
# Au moins un engine result doit avoir image_quality
|
| 858 |
+
has_iq = any("image_quality" in er for er in doc["engine_results"])
|
| 859 |
+
assert has_iq, "Aucun document result n'a de données image_quality"
|
| 860 |
+
|
| 861 |
+
def test_json_export_contains_sprint5_data(self, tmp_path):
|
| 862 |
+
from picarones.fixtures import generate_sample_benchmark
|
| 863 |
+
import json
|
| 864 |
+
bm = generate_sample_benchmark()
|
| 865 |
+
out = tmp_path / "results.json"
|
| 866 |
+
bm.to_json(out)
|
| 867 |
+
data = json.loads(out.read_text())
|
| 868 |
+
# Vérifier dans les engine_reports
|
| 869 |
+
er = data["engine_reports"][0]
|
| 870 |
+
assert "aggregated_taxonomy" in er
|
| 871 |
+
assert "aggregated_char_scores" in er
|
| 872 |
+
# Vérifier dans les document_results
|
| 873 |
+
dr = er["document_results"][0]
|
| 874 |
+
assert "taxonomy" in dr
|
| 875 |
+
assert "char_scores" in dr
|
| 876 |
+
assert "structure" in dr
|