Spaces:
Running
Running
Claude
fix: résoudre les 64 erreurs ruff pré-existantes révélées par le lint actif
6362212 unverified | """Analyse de robustesse des moteurs OCR face aux dégradations d'image. | |
| Fonctionnement | |
| -------------- | |
| 1. Génération de versions dégradées des images du corpus à différents niveaux : | |
| - Bruit gaussien (sigma croissant) | |
| - Flou gaussien (kernel size croissant) | |
| - Rotation (angle croissant) | |
| - Réduction de résolution (facteur de downscaling) | |
| - Binarisation (seuillage Otsu ou fixe) | |
| 2. Exécution du moteur OCR sur chaque version dégradée | |
| 3. Calcul du CER pour chaque niveau de dégradation | |
| 4. Génération de courbes de robustesse (CER en fonction du niveau) | |
| 5. Identification du seuil critique (niveau à partir duquel CER > seuil) | |
| Usage | |
| ----- | |
| from picarones.core.robustness import RobustnessAnalyzer | |
| analyzer = RobustnessAnalyzer(engine, degradation_types=["noise", "blur"]) | |
| report = analyzer.analyze(corpus) | |
| print(report.critical_thresholds) | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| import math | |
| import os | |
| import tempfile | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import TYPE_CHECKING, Optional | |
| if TYPE_CHECKING: | |
| from picarones.core.corpus import Corpus, Document | |
| from picarones.engines.base import BaseOCREngine | |
| logger = logging.getLogger(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Paramètres de dégradation | |
| # --------------------------------------------------------------------------- | |
| # Niveaux de dégradation pour chaque type | |
| DEGRADATION_LEVELS: dict[str, list] = { | |
| "noise": [0, 5, 15, 30, 50, 80], # sigma du bruit gaussien | |
| "blur": [0, 1, 2, 3, 5, 8], # rayon du flou gaussien (pixels) | |
| "rotation": [0, 1, 2, 5, 10, 20], # angle de rotation (degrés) | |
| "resolution": [1.0, 0.75, 0.5, 0.33, 0.25, 0.1], # facteur de résolution | |
| "binarization": [0, 64, 96, 128, 160, 192], # seuil de binarisation (0 = Otsu) | |
| } | |
| DEGRADATION_LABELS: dict[str, list[str]] = { | |
| "noise": ["original", "σ=5", "σ=15", "σ=30", "σ=50", "σ=80"], | |
| "blur": ["original", "r=1", "r=2", "r=3", "r=5", "r=8"], | |
| "rotation": ["0°", "1°", "2°", "5°", "10°", "20°"], | |
| "resolution": ["100%", "75%", "50%", "33%", "25%", "10%"], | |
| "binarization": ["original", "seuil=64", "seuil=96", "seuil=128", "seuil=160", "seuil=192"], | |
| } | |
| ALL_DEGRADATION_TYPES = list(DEGRADATION_LEVELS.keys()) | |
| # --------------------------------------------------------------------------- | |
| # Dégradation d'image (pure Python + stdlib, optionnellement Pillow/NumPy) | |
| # --------------------------------------------------------------------------- | |
| def _apply_gaussian_noise(pixels: list[list[list[int]]], sigma: float, rng_seed: int = 0) -> list[list[list[int]]]: | |
| """Applique du bruit gaussien (pure Python).""" | |
| import random | |
| rng = random.Random(rng_seed) | |
| h = len(pixels) | |
| w = len(pixels[0]) if h > 0 else 0 | |
| result = [] | |
| for y in range(h): | |
| row = [] | |
| for x in range(w): | |
| pixel = [] | |
| for c in pixels[y][x]: | |
| noise = rng.gauss(0, sigma) | |
| val = int(c + noise) | |
| pixel.append(max(0, min(255, val))) | |
| row.append(pixel) | |
| result.append(row) | |
| return result | |
| def _apply_box_blur(pixels: list[list[list[int]]], radius: int) -> list[list[list[int]]]: | |
| """Applique un flou de boîte (approximation du flou gaussien, pure Python).""" | |
| if radius <= 0: | |
| return pixels | |
| h = len(pixels) | |
| w = len(pixels[0]) if h > 0 else 0 | |
| channels = len(pixels[0][0]) if h > 0 and w > 0 else 3 | |
| def blur_pass(data: list[list[list[int]]]) -> list[list[list[int]]]: | |
| out = [] | |
| for y in range(h): | |
| row = [] | |
| for x in range(w): | |
| totals = [0] * channels | |
| count = 0 | |
| for dy in range(-radius, radius + 1): | |
| for dx in range(-radius, radius + 1): | |
| ny, nx = y + dy, x + dx | |
| if 0 <= ny < h and 0 <= nx < w: | |
| for c in range(channels): | |
| totals[c] += data[ny][nx][c] | |
| count += 1 | |
| row.append([t // count for t in totals]) | |
| out.append(row) | |
| return out | |
| return blur_pass(pixels) | |
| def _apply_rotation_simple(pixels: list[list[list[int]]], angle_deg: float) -> list[list[list[int]]]: | |
| """Rotation avec interpolation au plus proche voisin (pure Python). | |
| Pour des angles faibles, l'effet est réaliste. | |
| """ | |
| if angle_deg == 0: | |
| return pixels | |
| h = len(pixels) | |
| w = len(pixels[0]) if h > 0 else 0 | |
| channels = len(pixels[0][0]) if h > 0 and w > 0 else 3 | |
| angle_rad = math.radians(angle_deg) | |
| cos_a = math.cos(angle_rad) | |
| sin_a = math.sin(angle_rad) | |
| cx, cy = w / 2, h / 2 | |
| result = [[[245, 240, 232][:channels] for _ in range(w)] for _ in range(h)] | |
| for y in range(h): | |
| for x in range(w): | |
| # Coordonnées source | |
| sx = cos_a * (x - cx) + sin_a * (y - cy) + cx | |
| sy = -sin_a * (x - cx) + cos_a * (y - cy) + cy | |
| ix, iy = int(round(sx)), int(round(sy)) | |
| if 0 <= ix < w and 0 <= iy < h: | |
| result[y][x] = list(pixels[iy][ix]) | |
| return result | |
| def _apply_resolution_reduction( | |
| pixels: list[list[list[int]]], factor: float | |
| ) -> list[list[list[int]]]: | |
| """Réduit la résolution puis remonte à la taille originale (pixelisation).""" | |
| if factor >= 1.0: | |
| return pixels | |
| h = len(pixels) | |
| w = len(pixels[0]) if h > 0 else 0 | |
| new_h = max(1, int(h * factor)) | |
| new_w = max(1, int(w * factor)) | |
| # Downscale | |
| small = [] | |
| for y in range(new_h): | |
| row = [] | |
| src_y = int(y / factor) | |
| for x in range(new_w): | |
| src_x = int(x / factor) | |
| row.append(list(pixels[min(src_y, h - 1)][min(src_x, w - 1)])) | |
| small.append(row) | |
| # Upscale (nearest-neighbor) | |
| result = [] | |
| for y in range(h): | |
| row = [] | |
| src_y = min(int(y * factor), new_h - 1) | |
| for x in range(w): | |
| src_x = min(int(x * factor), new_w - 1) | |
| row.append(list(small[src_y][src_x])) | |
| result.append(row) | |
| return result | |
| def _apply_binarization( | |
| pixels: list[list[list[int]]], threshold: int | |
| ) -> list[list[list[int]]]: | |
| """Binarise l'image (seuillage fixe sur luminosité).""" | |
| h = len(pixels) | |
| w = len(pixels[0]) if h > 0 else 0 | |
| result = [] | |
| # Calculer le seuil Otsu si threshold == 0 | |
| if threshold == 0: | |
| histogram = [0] * 256 | |
| total = h * w | |
| for y in range(h): | |
| for x in range(w): | |
| p = pixels[y][x] | |
| lum = int(0.299 * p[0] + 0.587 * p[1] + 0.114 * p[2]) if len(p) >= 3 else p[0] | |
| histogram[lum] += 1 | |
| # Otsu simplifié | |
| best_thresh = 128 | |
| best_var = -1.0 | |
| total_sum = sum(i * histogram[i] for i in range(256)) | |
| w0, w1, sum0 = 0, total, 0.0 | |
| for t in range(256): | |
| w0 += histogram[t] | |
| if w0 == 0: | |
| continue | |
| w1 = total - w0 | |
| if w1 == 0: | |
| break | |
| sum0 += t * histogram[t] | |
| mean0 = sum0 / w0 | |
| mean1 = (total_sum - sum0) / w1 | |
| var = w0 * w1 * (mean0 - mean1) ** 2 | |
| if var > best_var: | |
| best_var = var | |
| best_thresh = t | |
| threshold = best_thresh | |
| for y in range(h): | |
| row = [] | |
| for x in range(w): | |
| p = pixels[y][x] | |
| lum = int(0.299 * p[0] + 0.587 * p[1] + 0.114 * p[2]) if len(p) >= 3 else p[0] | |
| val = 255 if lum >= threshold else 0 | |
| row.append([val] * len(p)) | |
| result.append(row) | |
| return result | |
| def degrade_image_bytes( | |
| png_bytes: bytes, | |
| degradation_type: str, | |
| level: float, | |
| ) -> bytes: | |
| """Dégrade une image PNG et retourne les bytes PNG modifiés. | |
| Utilise Pillow si disponible, sinon utilise l'implémentation pure Python. | |
| Parameters | |
| ---------- | |
| png_bytes: | |
| Bytes de l'image PNG source. | |
| degradation_type: | |
| Type de dégradation (``"noise"``, ``"blur"``, ``"rotation"``, | |
| ``"resolution"``, ``"binarization"``). | |
| level: | |
| Niveau de dégradation (valeur numérique selon le type). | |
| Returns | |
| ------- | |
| bytes | |
| Bytes de l'image PNG dégradée. | |
| """ | |
| try: | |
| return _degrade_pillow(png_bytes, degradation_type, level) | |
| except ImportError: | |
| return _degrade_pure_python(png_bytes, degradation_type, level) | |
| def _degrade_pillow(png_bytes: bytes, degradation_type: str, level: float) -> bytes: | |
| """Dégradation avec Pillow (meilleure qualité).""" | |
| import io | |
| from PIL import Image, ImageFilter | |
| img = Image.open(io.BytesIO(png_bytes)).convert("RGB") | |
| if degradation_type == "noise": | |
| if level > 0: | |
| import random | |
| data = list(img.getdata()) | |
| rng = random.Random(0) | |
| noisy = [] | |
| for r, g, b in data: | |
| noisy.append(( | |
| max(0, min(255, int(r + rng.gauss(0, level)))), | |
| max(0, min(255, int(g + rng.gauss(0, level)))), | |
| max(0, min(255, int(b + rng.gauss(0, level)))), | |
| )) | |
| img.putdata(noisy) | |
| elif degradation_type == "blur": | |
| if level > 0: | |
| img = img.filter(ImageFilter.GaussianBlur(radius=level)) | |
| elif degradation_type == "rotation": | |
| if level != 0: | |
| img = img.rotate(-level, expand=False, fillcolor=(245, 240, 232)) | |
| elif degradation_type == "resolution": | |
| if level < 1.0: | |
| w, h = img.size | |
| new_w, new_h = max(1, int(w * level)), max(1, int(h * level)) | |
| img = img.resize((new_w, new_h), Image.NEAREST) | |
| img = img.resize((w, h), Image.NEAREST) | |
| elif degradation_type == "binarization": | |
| img = img.convert("L") # niveaux de gris | |
| if level == 0: | |
| # Seuillage Otsu : calcul du seuil optimal | |
| histogram = img.histogram() | |
| total = img.size[0] * img.size[1] | |
| best_thresh, best_var = 128, -1.0 | |
| total_sum = sum(i * histogram[i] for i in range(256)) | |
| w0, sum0 = 0, 0.0 | |
| for t in range(256): | |
| w0 += histogram[t] | |
| if w0 == 0: | |
| continue | |
| w1 = total - w0 | |
| if w1 == 0: | |
| break | |
| sum0 += t * histogram[t] | |
| var = w0 * w1 * (sum0 / w0 - (total_sum - sum0) / w1) ** 2 | |
| if var > best_var: | |
| best_var = var | |
| best_thresh = t | |
| threshold = best_thresh | |
| else: | |
| threshold = int(level) | |
| img = img.point(lambda p: 255 if p >= threshold else 0, "1").convert("RGB") | |
| buf = io.BytesIO() | |
| img.save(buf, format="PNG") | |
| return buf.getvalue() | |
| def _degrade_pure_python(png_bytes: bytes, degradation_type: str, level: float) -> bytes: | |
| """Dégradation en pur Python (sans Pillow). | |
| Décode le PNG, applique la transformation, ré-encode en PNG. | |
| Note : n'implémente pas le décodage PNG complet — utilise des stubs. | |
| """ | |
| # Pour l'implémentation pure Python, on applique des transformations | |
| # minimales sur les bytes bruts en créant une image de test synthétique. | |
| # En pratique, Pillow est presque toujours disponible dans l'environnement Picarones. | |
| logger.warning( | |
| "Pillow non disponible : dégradation '%s' appliquée en mode dégradé (stub)", | |
| degradation_type, | |
| ) | |
| # Retourner l'image originale légèrement modifiée (simulation) | |
| return png_bytes | |
| # --------------------------------------------------------------------------- | |
| # Structures de résultats | |
| # --------------------------------------------------------------------------- | |
| class DegradationCurve: | |
| """Courbe CER vs niveau de dégradation pour un moteur et un type de dégradation.""" | |
| engine_name: str | |
| degradation_type: str | |
| levels: list[float] | |
| labels: list[str] | |
| cer_values: list[Optional[float]] | |
| """CER moyen (0-1) à chaque niveau. None si calcul impossible.""" | |
| critical_threshold_level: Optional[float] = None | |
| """Niveau à partir duquel CER > cer_threshold.""" | |
| cer_threshold: float = 0.20 | |
| """Seuil de CER utilisé pour déterminer le niveau critique.""" | |
| def as_dict(self) -> dict: | |
| return { | |
| "engine_name": self.engine_name, | |
| "degradation_type": self.degradation_type, | |
| "levels": self.levels, | |
| "labels": self.labels, | |
| "cer_values": self.cer_values, | |
| "critical_threshold_level": self.critical_threshold_level, | |
| "cer_threshold": self.cer_threshold, | |
| } | |
| class RobustnessReport: | |
| """Rapport complet d'analyse de robustesse pour un ou plusieurs moteurs.""" | |
| engine_names: list[str] | |
| corpus_name: str | |
| degradation_types: list[str] | |
| curves: list[DegradationCurve] | |
| summary: dict = field(default_factory=dict) | |
| """Résumé : moteur le plus robuste par type de dégradation, seuils critiques…""" | |
| def get_curves_for_engine(self, engine_name: str) -> list[DegradationCurve]: | |
| return [c for c in self.curves if c.engine_name == engine_name] | |
| def get_curves_for_type(self, degradation_type: str) -> list[DegradationCurve]: | |
| return [c for c in self.curves if c.degradation_type == degradation_type] | |
| def as_dict(self) -> dict: | |
| return { | |
| "engine_names": self.engine_names, | |
| "corpus_name": self.corpus_name, | |
| "degradation_types": self.degradation_types, | |
| "curves": [c.as_dict() for c in self.curves], | |
| "summary": self.summary, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Analyseur de robustesse | |
| # --------------------------------------------------------------------------- | |
| class RobustnessAnalyzer: | |
| """Lance une analyse de robustesse sur un corpus. | |
| Parameters | |
| ---------- | |
| engines: | |
| Un ou plusieurs moteurs OCR (``BaseOCREngine``). | |
| degradation_types: | |
| Liste des types de dégradation à tester. | |
| Par défaut : tous (``"noise"``, ``"blur"``, ``"rotation"``, | |
| ``"resolution"``, ``"binarization"``). | |
| cer_threshold: | |
| Seuil de CER pour définir le niveau critique (défaut : 0.20 = 20%). | |
| custom_levels: | |
| Niveaux personnalisés par type (remplace les valeurs par défaut). | |
| Examples | |
| -------- | |
| >>> from picarones.engines.tesseract import TesseractEngine | |
| >>> from picarones.core.robustness import RobustnessAnalyzer | |
| >>> engine = TesseractEngine(config={"lang": "fra"}) | |
| >>> analyzer = RobustnessAnalyzer([engine], degradation_types=["noise", "blur"]) | |
| >>> report = analyzer.analyze(corpus) | |
| """ | |
| def __init__( | |
| self, | |
| engines: "list[BaseOCREngine]", | |
| degradation_types: Optional[list[str]] = None, | |
| cer_threshold: float = 0.20, | |
| custom_levels: Optional[dict[str, list]] = None, | |
| ) -> None: | |
| if not isinstance(engines, list): | |
| engines = [engines] | |
| self.engines = engines | |
| self.degradation_types = degradation_types or ALL_DEGRADATION_TYPES | |
| self.cer_threshold = cer_threshold | |
| self.levels = dict(DEGRADATION_LEVELS) | |
| if custom_levels: | |
| self.levels.update(custom_levels) | |
| def analyze( | |
| self, | |
| corpus: "Corpus", | |
| show_progress: bool = True, | |
| max_docs: int = 10, | |
| ) -> RobustnessReport: | |
| """Lance l'analyse de robustesse sur le corpus. | |
| Parameters | |
| ---------- | |
| corpus: | |
| Corpus Picarones avec images et GT. | |
| show_progress: | |
| Affiche la progression. | |
| max_docs: | |
| Nombre maximum de documents à traiter (pour la rapidité). | |
| Returns | |
| ------- | |
| RobustnessReport | |
| """ | |
| from picarones.core.metrics import compute_metrics | |
| docs = corpus.documents[:max_docs] | |
| curves: list[DegradationCurve] = [] | |
| for engine in self.engines: | |
| for deg_type in self.degradation_types: | |
| levels = self.levels[deg_type] | |
| labels = DEGRADATION_LABELS.get(deg_type, [str(lv) for lv in levels]) | |
| cer_per_level: list[Optional[float]] = [] | |
| if show_progress: | |
| try: | |
| from tqdm import tqdm | |
| level_iter = tqdm( | |
| list(enumerate(levels)), | |
| desc=f"{engine.name} / {deg_type}", | |
| ) | |
| except ImportError: | |
| level_iter = enumerate(levels) | |
| else: | |
| level_iter = enumerate(levels) | |
| for lvl_idx, level in level_iter: | |
| doc_cers: list[float] = [] | |
| for doc in docs: | |
| gt = doc.ground_truth.strip() | |
| if not gt: | |
| continue | |
| # Obtenir l'image (fichier ou data URI) | |
| degraded_bytes = self._get_degraded_image( | |
| doc, deg_type, level | |
| ) | |
| if degraded_bytes is None: | |
| continue | |
| # Sauvegarder temporairement et OCR | |
| with tempfile.NamedTemporaryFile( | |
| suffix=".png", delete=False | |
| ) as tmp: | |
| tmp.write(degraded_bytes) | |
| tmp_path = tmp.name | |
| try: | |
| ocr_result = engine.run(tmp_path) | |
| hypothesis = ocr_result.text | |
| metrics = compute_metrics(gt, hypothesis) | |
| doc_cers.append(metrics.cer) | |
| except Exception as exc: | |
| logger.debug( | |
| "Erreur OCR %s niveau %s=%s: %s", | |
| engine.name, deg_type, level, exc | |
| ) | |
| finally: | |
| try: | |
| os.unlink(tmp_path) | |
| except OSError: | |
| pass | |
| if doc_cers: | |
| cer_per_level.append(sum(doc_cers) / len(doc_cers)) | |
| else: | |
| cer_per_level.append(None) | |
| # Calculer le niveau critique | |
| critical = self._find_critical_level( | |
| levels, cer_per_level, self.cer_threshold | |
| ) | |
| curves.append(DegradationCurve( | |
| engine_name=engine.name, | |
| degradation_type=deg_type, | |
| levels=levels, | |
| labels=labels[:len(levels)], | |
| cer_values=cer_per_level, | |
| critical_threshold_level=critical, | |
| cer_threshold=self.cer_threshold, | |
| )) | |
| summary = self._build_summary(curves) | |
| return RobustnessReport( | |
| engine_names=[e.name for e in self.engines], | |
| corpus_name=corpus.name, | |
| degradation_types=self.degradation_types, | |
| curves=curves, | |
| summary=summary, | |
| ) | |
| def _get_degraded_image( | |
| self, | |
| doc: "Document", | |
| degradation_type: str, | |
| level: float, | |
| ) -> Optional[bytes]: | |
| """Retourne les bytes PNG de l'image dégradée.""" | |
| # Charger l'image originale | |
| original_bytes = self._load_image(doc) | |
| if original_bytes is None: | |
| return None | |
| # Niveau 0 = image originale (sauf binarisation à 0 = Otsu) | |
| if (degradation_type == "noise" and level == 0) or \ | |
| (degradation_type == "blur" and level == 0) or \ | |
| (degradation_type == "rotation" and level == 0) or \ | |
| (degradation_type == "resolution" and level >= 1.0): | |
| return original_bytes | |
| return degrade_image_bytes(original_bytes, degradation_type, level) | |
| def _load_image(self, doc: "Document") -> Optional[bytes]: | |
| """Charge les bytes PNG de l'image d'un document.""" | |
| img_path = doc.image_path | |
| # Data URI (base64) | |
| if img_path.startswith("data:image/"): | |
| import base64 | |
| try: | |
| _, b64 = img_path.split(",", 1) | |
| return base64.b64decode(b64) | |
| except Exception as exc: | |
| logger.debug("Impossible de décoder data URI: %s", exc) | |
| return None | |
| # Fichier local | |
| path = Path(img_path) | |
| if path.exists(): | |
| return path.read_bytes() | |
| logger.debug("Image introuvable : %s", img_path) | |
| return None | |
| def _find_critical_level( | |
| levels: list[float], | |
| cer_values: list[Optional[float]], | |
| threshold: float, | |
| ) -> Optional[float]: | |
| """Trouve le niveau à partir duquel CER dépasse le seuil.""" | |
| for level, cer in zip(levels, cer_values): | |
| if cer is not None and cer > threshold: | |
| return level | |
| return None | |
| def _build_summary(curves: list[DegradationCurve]) -> dict: | |
| """Construit le résumé de l'analyse.""" | |
| summary: dict = {} | |
| # Par type de dégradation : moteur le plus robuste | |
| by_type: dict[str, dict[str, list]] = {} | |
| for curve in curves: | |
| dt = curve.degradation_type | |
| if dt not in by_type: | |
| by_type[dt] = {} | |
| valid_cers = [c for c in curve.cer_values if c is not None] | |
| if valid_cers: | |
| by_type[dt][curve.engine_name] = valid_cers | |
| for dt, engine_cers in by_type.items(): | |
| if not engine_cers: | |
| continue | |
| # Robustesse = CER moyen sur tous les niveaux (plus bas = plus robuste) | |
| best_engine = min(engine_cers, key=lambda e: sum(engine_cers[e]) / len(engine_cers[e])) | |
| summary[f"most_robust_{dt}"] = best_engine | |
| # Seuils critiques par moteur | |
| for curve in curves: | |
| key = f"critical_{curve.engine_name}_{curve.degradation_type}" | |
| summary[key] = curve.critical_threshold_level | |
| return summary | |
| # --------------------------------------------------------------------------- | |
| # Données de démonstration de robustesse | |
| # --------------------------------------------------------------------------- | |
| def generate_demo_robustness_report( | |
| engine_names: Optional[list[str]] = None, | |
| seed: int = 42, | |
| ) -> RobustnessReport: | |
| """Génère un rapport de robustesse fictif mais réaliste pour la démo. | |
| Parameters | |
| ---------- | |
| engine_names: | |
| Noms des moteurs à simuler (défaut : tesseract, pero_ocr). | |
| seed: | |
| Graine aléatoire. | |
| Returns | |
| ------- | |
| RobustnessReport | |
| """ | |
| import random | |
| rng = random.Random(seed) | |
| if engine_names is None: | |
| engine_names = ["tesseract", "pero_ocr"] | |
| # CER de base par moteur | |
| base_cer = { | |
| "tesseract": 0.12, | |
| "pero_ocr": 0.07, | |
| "ancien_moteur": 0.25, | |
| } | |
| # Sensibilité par type de dégradation (facteur multiplicatif par niveau) | |
| sensitivity = { | |
| "tesseract": { | |
| "noise": 0.04, "blur": 0.05, "rotation": 0.06, | |
| "resolution": 0.12, "binarization": 0.03, | |
| }, | |
| "pero_ocr": { | |
| "noise": 0.02, "blur": 0.03, "rotation": 0.04, | |
| "resolution": 0.08, "binarization": 0.02, | |
| }, | |
| "ancien_moteur": { | |
| "noise": 0.06, "blur": 0.08, "rotation": 0.10, | |
| "resolution": 0.15, "binarization": 0.05, | |
| }, | |
| } | |
| deg_types = ALL_DEGRADATION_TYPES | |
| curves: list[DegradationCurve] = [] | |
| for engine_name in engine_names: | |
| cer_base = base_cer.get(engine_name, 0.15) | |
| sens = sensitivity.get(engine_name, {dt: 0.05 for dt in deg_types}) | |
| for deg_type in deg_types: | |
| levels = DEGRADATION_LEVELS[deg_type] | |
| labels = DEGRADATION_LABELS[deg_type] | |
| s = sens.get(deg_type, 0.05) | |
| cer_values = [] | |
| for i, level in enumerate(levels): | |
| noise = rng.gauss(0, 0.005) | |
| cer = min(1.0, cer_base + s * i + noise) | |
| cer_values.append(round(max(0.0, cer), 4)) | |
| critical = RobustnessAnalyzer._find_critical_level(levels, cer_values, 0.20) | |
| curves.append(DegradationCurve( | |
| engine_name=engine_name, | |
| degradation_type=deg_type, | |
| levels=list(levels), | |
| labels=labels[:len(levels)], | |
| cer_values=cer_values, | |
| critical_threshold_level=critical, | |
| cer_threshold=0.20, | |
| )) | |
| summary = RobustnessAnalyzer._build_summary(curves) | |
| return RobustnessReport( | |
| engine_names=engine_names, | |
| corpus_name="Corpus de démonstration — Chroniques médiévales", | |
| degradation_types=deg_types, | |
| curves=curves, | |
| summary=summary, | |
| ) | |