diff --git a/docs/migration/legacy-retirement-plan.md b/docs/migration/legacy-retirement-plan.md
index d6fae34e8ec84dffad82386d84a25ef0d3ac3da4..071f4e45d9e1092e8b401b27573452043a6ad43b 100644
--- a/docs/migration/legacy-retirement-plan.md
+++ b/docs/migration/legacy-retirement-plan.md
@@ -701,11 +701,12 @@ architecture vérifiée.
   ``pipeline_benchmark``, ``pipeline_comparison``,
   ``core/pipeline``) puis 2 renderers
   (``numerical_sequences``, ``pipeline``).
-- Phase 5.D : 5 vues (``views/*.py``).
-- Phase 5.E : ``generator.py``, ``comparison.py``,
-  ``snapshot.py``, ``report_data/``, templates Jinja2.
+- Phase 5.D ✅ — 5 vues (``views/*.py``).
+- Phase 5.E ✅ — ``generator.py``, ``comparison.py``,
+  ``snapshot.py``, ``report_data/`` (8 fichiers), templates
+  Jinja2 (13 fichiers), ``picarones/i18n.py``.
 
-Effort restant estimé : 8-12 jours.
+Phase 5 est **terminée**.
 
 #### Phase 5.C.batch2 — Lot 2 : 5 renderers moyens (2026-05)
 
@@ -989,6 +990,79 @@ Total : ~1114 lignes relocalisées.  6 nouveaux shims minimaux
 **Acceptance Phase 5.D** : 5019 tests passent, lint vert,
 architecture vérifiée.
 
+#### Phase 5.E — Migration generator + comparison + snapshot + report_data + templates + i18n (2026-05)
+
+Phase 5.E finalise Phase 5 en migrant les derniers composants
+``report/`` :
+
+**Migrations effectuées** :
+
+| Source legacy                                  | Destination canonique                              |
+|------------------------------------------------|----------------------------------------------------|
+| ``report/generator.py`` (466)                  | ``reports_v2/html/generator.py``                   |
+| ``report/comparison.py`` (409)                 | ``reports_v2/html/comparison.py``                  |
+| ``report/snapshot.py`` (266)                   | ``reports_v2/html/snapshot.py``                    |
+| ``report/report_data/__init__.py`` (132)       | ``reports_v2/html/data/__init__.py``               |
+| ``report/report_data/_helpers.py`` (30)        | ``reports_v2/html/data/_helpers.py``               |
+| ``report/report_data/documents.py`` (167)      | ``reports_v2/html/data/documents.py``              |
+| ``report/report_data/engines.py`` (103)        | ``reports_v2/html/data/engines.py``                |
+| ``report/report_data/extra_metrics.py`` (272)  | ``reports_v2/html/data/extra_metrics.py``          |
+| ``report/report_data/pareto.py`` (159)         | ``reports_v2/html/data/pareto.py``                 |
+| ``report/report_data/scatter.py`` (56)         | ``reports_v2/html/data/scatter.py``                |
+| ``report/report_data/statistics.py`` (216)     | ``reports_v2/html/data/statistics.py``             |
+| ``report/templates/`` (13 fichiers)            | ``reports_v2/html/templates/`` (13 fichiers)       |
+| ``picarones/i18n.py`` (124)                    | ``picarones/reports_v2/i18n/__init__.py``          |
+| ``report/__init__.py`` (3)                     | shim re-export                                     |
+
+Total : ~2400 lignes relocalisées + 13 templates Jinja2 + le
+loader i18n.  Au total **12 nouveaux shims minimaux** (< 25
+lignes) avec ``DeprecationWarning``.
+
+**Adaptations transverses** :
+
+- ``reports_v2/html/snapshot.py`` ne peut pas importer
+  ``picarones.__version__`` (interdit par layer-deps) : utilise
+  ``importlib.metadata`` avec fallback (idem qu'au Phase 4-ter).
+- ``reports_v2/html/snapshot.py`` import ``pricing`` redirigé
+  vers le canonique ``evaluation/metrics/pricing``.
+- ``reports_v2/html/generator.py`` toutes les ~30 imports
+  internes redirigés vers ``reports_v2/html/{data,renderers,
+  views,snapshot}`` et ``evaluation/{statistics,metric_result,
+  benchmark_result}``.
+- ``reports_v2/html/data/`` : 7 imports vers
+  ``measurements/{statistics,difficulty,pricing,marginal_cost,
+  rare_tokens,taxonomy_cooccurrence,taxonomy_intra_doc}``
+  redirigés vers ``evaluation/{statistics,metrics/...}``.
+- ``reports_v2/html/views/`` : 6 imports vers
+  ``measurements/{taxonomy_comparison,incremental_comparison,
+  levers,image_predictive,worst_lines,throughput}`` redirigés
+  vers ``evaluation/metrics/...``.
+- ``picarones/reports_v2/__init__.py`` : nouveau loader
+  ``from picarones.reports_v2.html.generator import ReportGenerator``.
+- ``test_module_coverage.py::TEST_ONLY_BASELINE`` étendu à 3
+  modules : ``statistics``, ``pricing``, ``difficulty``.
+- ``test_file_budgets.py`` : 2 entrées legacy retirées,
+  remplacées par les chemins canoniques ; templates dir
+  référencé via ``reports_v2/html/templates/``.
+- 28+ chemins de templates dans les tests redirigés vers
+  ``reports_v2/html/templates/``.
+- Tests qui faisaient ``from picarones import i18n`` redirigés
+  vers ``from picarones.reports_v2 import i18n`` (le shim ne
+  ré-exporte pas ``_get_labels_cached`` — privé).
+
+État final de ``picarones/report/``
+-----------------------------------
+
+Le répertoire ``picarones/report/`` ne contient désormais
+**que des shims** (~30 fichiers).  Aucun module avec du
+contenu réel ne subsiste.  Le canonique vit intégralement
+dans ``picarones/reports_v2/html/`` (générateur + renderers
++ vues + données + templates + comparaison + snapshot).
+
+**Acceptance Phase 5.E + Phase 5 entière** : 5019 tests
+passent, lint vert, architecture vérifiée (anti-cycles,
+file budgets, module coverage).
+
 ### Phase 6 — Pipelines OCR+LLM (`pipelines/`)
 
 **Modules** : `pipelines/base.OCRLLMPipeline` (3 modes), `pipelines/
diff --git a/picarones/cli/__init__.py b/picarones/cli/__init__.py
index be2f1ce652c0094abd97fa6a3e5b4d458a727125..4a2169ff580e5ede3c7bc1ce3467f3c20af0df0b 100644
--- a/picarones/cli/__init__.py
+++ b/picarones/cli/__init__.py
@@ -223,7 +223,7 @@ def report_cmd(results: str, output: str, lazy_images: bool, verbose: bool) -> N
     """
     _setup_logging(verbose)
 
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     click.echo(f"Chargement des résultats : {results}")
     try:
@@ -303,7 +303,7 @@ def demo_cmd(
         picarones demo --with-history --with-robustness --docs 8
     """
     from picarones.fixtures import generate_sample_benchmark
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     click.echo(f"Génération des données fictives ({docs} documents, 3 moteurs)…")
     benchmark = generate_sample_benchmark(n_docs=docs)
diff --git a/picarones/cli/_workflows.py b/picarones/cli/_workflows.py
index d59042a234c9f5b3b946c0249f88f64767ceacf2..d9a9642d73f020c75ec456913187c40b1f2f8608 100644
--- a/picarones/cli/_workflows.py
+++ b/picarones/cli/_workflows.py
@@ -479,7 +479,7 @@ def compare_cmd(
     """
     _setup_logging(verbose)
 
-    from picarones.report.comparison import (
+    from picarones.reports_v2.html.comparison import (
         compare_benchmarks,
         detect_regressions,
         render_comparison_html,
diff --git a/picarones/i18n.py b/picarones/i18n.py
index 081c40ac3bab78fe5b5bd995ee20d8f4e2406df4..2bd0ff3f56e3dfccdc6910bd578994ed757d2c15 100644
--- a/picarones/i18n.py
+++ b/picarones/i18n.py
@@ -1,125 +1,24 @@
-"""Labels i18n pour le rapport HTML et l'interface Picarones.
+"""``picarones.i18n`` — shim re-export (déprécié, suppression 2.0).
 
-Langues supportées
-------------------
-- ``"fr"`` : français (défaut)
-- ``"en"`` : anglais patrimonial (heritage English)
-
-Depuis le Sprint 17, les traductions sont stockées dans des fichiers
-JSON et chargées au premier accès.  Phase 5 du retrait du legacy
-(2026-05) : les fichiers ont été déplacés de
-``picarones/report/i18n/{lang}.json`` vers
-``picarones/reports_v2/i18n/{lang}.json``.  Aucun changement
-fonctionnel pour les consommateurs de ``get_labels``.
-
-``TRANSLATIONS`` reste exposé comme dict pour compatibilité ascendante.
-
-Sprint 30 — durcissement
-------------------------
-- Chargement lazy + thread-safe via verrou explicite ; les serveurs
-  web sous charge concurrente ne peuvent plus initialiser deux fois.
-- ``reload_translations()`` exposé pour les tests qui modifient les
-  fichiers JSON à la volée.
-- ``get_labels()`` mémoizé via ``functools.lru_cache`` pour absorber
-  le fallback ``lang → fr`` sans relire le dict à chaque appel.
+Canonique : :mod:`picarones.reports_v2.i18n`.  Phase 5.E du retrait
+du legacy.
 """
 
 from __future__ import annotations
 
-import json
-import logging
-import threading
-from functools import lru_cache
-from pathlib import Path
-
-logger = logging.getLogger(__name__)
-
-
-_I18N_DIR = Path(__file__).parent / "reports_v2" / "i18n"
-_LOAD_LOCK = threading.Lock()
-_TRANSLATIONS_CACHE: dict[str, dict[str, str]] | None = None
-
-
-def _load_translations() -> dict[str, dict[str, str]]:
-    """Charge tous les fichiers JSON du dossier i18n.
-
-    Un fichier ``{lang}.json`` définit les labels de la langue ``lang``.
-    Retourne toujours un dict non-vide, même si le dossier est manquant
-    (dans ce cas, le dict est vide et ``get_labels`` tombe sur un fallback).
-    """
-    translations: dict[str, dict[str, str]] = {}
-    if not _I18N_DIR.is_dir():
-        return translations
-    for path in sorted(_I18N_DIR.glob("*.json")):
-        lang = path.stem
-        try:
-            with path.open(encoding="utf-8") as fh:
-                translations[lang] = json.load(fh)
-        except (OSError, json.JSONDecodeError) as e:
-            logger.warning("[i18n] fichier '%s' ignoré : %s", path, e)
-    return translations
-
-
-def _get_translations() -> dict[str, dict[str, str]]:
-    """Retourne le cache de translations, initialisé une seule fois.
-
-    Thread-safe : deux threads qui appellent simultanément en démarrage
-    ne déclencheront qu'une seule lecture disque.
-    """
-    global _TRANSLATIONS_CACHE
-    if _TRANSLATIONS_CACHE is not None:
-        return _TRANSLATIONS_CACHE
-    with _LOAD_LOCK:
-        if _TRANSLATIONS_CACHE is None:
-            _TRANSLATIONS_CACHE = _load_translations()
-    return _TRANSLATIONS_CACHE
-
-
-def reload_translations() -> None:
-    """Force la relecture des fichiers JSON au prochain ``get_labels``.
-
-    Utile pour les tests qui modifient ``reports_v2/i18n/*.json`` à la volée.
-    """
-    global _TRANSLATIONS_CACHE
-    with _LOAD_LOCK:
-        _TRANSLATIONS_CACHE = None
-    _get_labels_cached.cache_clear()
-
-
-@lru_cache(maxsize=None)
-def _get_labels_cached(lang: str) -> tuple[tuple[str, str], ...]:
-    """Cache mémoïsé : ``lang -> tuple ordonné des paires``.
-
-    Le retour en tuple permet à ``lru_cache`` de mémoriser sans
-    contrainte de hashabilité, et est trivialement converti en dict
-    par ``get_labels`` à chaque appel (coût O(n)).
-    """
-    translations = _get_translations()
-    labels = translations.get(lang) or translations.get("fr") or {}
-    return tuple(labels.items())
-
-
-def get_labels(lang: str = "fr") -> dict[str, str]:
-    """Retourne le dictionnaire de labels pour la langue donnée.
-
-    Parameters
-    ----------
-    lang:
-        Code langue : ``"fr"`` (défaut) ou ``"en"``.
-
-    Returns
-    -------
-    dict
-        Labels traduits. Toujours valide : bascule sur ``"fr"`` si lang inconnu.
-        Si ``"fr"`` lui-même manque, retourne un dict vide (comportement dégradé
-        mais non bloquant).
-    """
-    return dict(_get_labels_cached(lang))
-
-
-# ``TRANSLATIONS`` reste accessible comme attribut module pour les
-# consommateurs externes qui le lisaient directement. Initialisé
-# paresseusement à l'import — n'engendre **pas** de lecture si le
-# module n'est jamais utilisé.
-TRANSLATIONS: dict[str, dict[str, str]] = _get_translations()
-SUPPORTED_LANGS: list[str] = list(TRANSLATIONS.keys())
+import warnings
+
+from picarones.reports_v2.i18n import *  # noqa: F401, F403
+from picarones.reports_v2.i18n import (  # noqa: F401
+    TRANSLATIONS,
+    SUPPORTED_LANGS,
+    get_labels,
+    reload_translations,
+)
+
+warnings.warn(
+    "picarones.i18n is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.i18n instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/__init__.py b/picarones/report/__init__.py
index 3fde7e20739038581957f8ebe7b6e883ec767d98..ed224c304a1f55690e6da7ddc3f1951e16839df6 100644
--- a/picarones/report/__init__.py
+++ b/picarones/report/__init__.py
@@ -1,5 +1,20 @@
-"""Module de génération du rapport HTML interactif."""
+"""``picarones.report`` — shim re-export (déprécié, suppression 2.0).
 
-from picarones.report.generator import ReportGenerator
+Canonique : :mod:`picarones.reports_v2.html`.  Phase 5.E du retrait
+du legacy.
+"""
+
+from __future__ import annotations
+
+import warnings
+
+from picarones.reports_v2.html import ReportGenerator  # noqa: F401
+
+warnings.warn(
+    "picarones.report is deprecated and will be removed in 2.0.  "
+    "Import ReportGenerator from picarones.reports_v2.html instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
 
 __all__ = ["ReportGenerator"]
diff --git a/picarones/report/comparison.py b/picarones/report/comparison.py
index 93a53f95c8909cd02a6566a1a69e8eb857d74bf5..85a8c4dc4d6b87c7907f6837e1f4560ee9b32250 100644
--- a/picarones/report/comparison.py
+++ b/picarones/report/comparison.py
@@ -1,409 +1,18 @@
-"""Comparaison de deux runs de benchmark (Sprint 28).
+"""``picarones.report.comparison`` — shim re-export (déprécié, suppression 2.0).
 
-Le Sprint 8 a livré la persistance longitudinale via SQLite
-(``picarones.measurements.history``) et un détecteur de régression CLI. Mais
-aucun outil n'exposait la **comparaison** de deux runs côté rapport :
-un chercheur qui itère sur 8 prompts ne pouvait pas voir d'un coup
-*« Tesseract → GPT-4o version V2 a régressé de 0,8 pp en CER moyen
-sur la strate paroissiaux par rapport à V1 »*.
-
-Ce module fournit :
-
-- ``load_benchmark_json(path)`` — charge le JSON produit par
-  ``BenchmarkResult.as_dict()`` ou ``picarones run -o results.json``.
-- ``compare_benchmarks(a, b)`` — calcule les deltas par moteur
-  (CER mean, WER mean, comptes de documents traités/échoués) et
-  par strate quand la métadonnée est présente.
-- ``detect_regressions(diff, threshold)`` — liste les moteurs en
-  régression (delta CER > threshold) et en amélioration
-  (delta CER < -threshold).
-- ``render_comparison_html(diff, output_path)`` — rendu HTML
-  auto-contenu minimal via Jinja2 pour partage.
-
-Conventions
------------
-- Les deltas sont calculés ``b - a`` (donc positif = ``b`` est pire).
-- Un moteur présent dans un seul run apparaît dans ``only_in_a`` /
-  ``only_in_b``, jamais dans ``deltas``.
-- Un moteur dont le ``mean_cer`` est ``None`` (échec total) est
-  signalé mais ne génère pas de delta numérique.
-- ``threshold`` est en absolu (CER en fraction, pas en %). Défaut
-  0.005 = 0,5 pp.
+Canonique : :mod:`picarones.reports_v2.html.comparison`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-import json
-import logging
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Optional
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Modèles
-# ---------------------------------------------------------------------------
-
-@dataclass
-class EngineDelta:
-    """Différence ``b - a`` pour un moteur donné."""
-    engine: str
-    cer_a: Optional[float]
-    cer_b: Optional[float]
-    delta_cer: Optional[float]
-    wer_a: Optional[float]
-    wer_b: Optional[float]
-    delta_wer: Optional[float]
-    docs_a: int
-    docs_b: int
-    failed_a: int
-    failed_b: int
-    is_regression: bool = False
-    is_improvement: bool = False
-
-    def as_dict(self) -> dict[str, Any]:
-        return {
-            "engine": self.engine,
-            "cer_a": self.cer_a,
-            "cer_b": self.cer_b,
-            "delta_cer": self.delta_cer,
-            "wer_a": self.wer_a,
-            "wer_b": self.wer_b,
-            "delta_wer": self.delta_wer,
-            "docs_a": self.docs_a,
-            "docs_b": self.docs_b,
-            "failed_a": self.failed_a,
-            "failed_b": self.failed_b,
-            "is_regression": self.is_regression,
-            "is_improvement": self.is_improvement,
-        }
-
-
-@dataclass
-class ComparisonResult:
-    """Résultat d'une comparaison ``b - a`` entre deux runs."""
-    label_a: str
-    label_b: str
-    run_date_a: Optional[str]
-    run_date_b: Optional[str]
-    corpus_a: Optional[str]
-    corpus_b: Optional[str]
-    deltas: list[EngineDelta] = field(default_factory=list)
-    only_in_a: list[str] = field(default_factory=list)
-    only_in_b: list[str] = field(default_factory=list)
-    threshold: float = 0.005
-
-    def as_dict(self) -> dict[str, Any]:
-        return {
-            "label_a": self.label_a,
-            "label_b": self.label_b,
-            "run_date_a": self.run_date_a,
-            "run_date_b": self.run_date_b,
-            "corpus_a": self.corpus_a,
-            "corpus_b": self.corpus_b,
-            "threshold": self.threshold,
-            "deltas": [d.as_dict() for d in self.deltas],
-            "only_in_a": list(self.only_in_a),
-            "only_in_b": list(self.only_in_b),
-            "regressions": [d.as_dict() for d in self.deltas if d.is_regression],
-            "improvements": [d.as_dict() for d in self.deltas if d.is_improvement],
-        }
-
-
-# ---------------------------------------------------------------------------
-# Chargement
-# ---------------------------------------------------------------------------
-
-def load_benchmark_json(path: str | Path) -> dict[str, Any]:
-    """Charge un JSON de benchmark depuis disque.
-
-    Accepte :
-      - le format ``BenchmarkResult.as_dict()`` (clé ``ranking``,
-        ``engine_reports`` ou ``engines``) ;
-      - un dict déjà parsé ; dans ce cas, ``path`` peut être un dict.
-    """
-    if isinstance(path, dict):
-        return path
-    p = Path(path)
-    if not p.exists():
-        raise FileNotFoundError(f"Fichier benchmark introuvable : {p}")
-    with p.open(encoding="utf-8") as fh:
-        data = json.load(fh)
-    if not isinstance(data, dict):
-        raise ValueError(f"Le JSON {p} doit être un dict.")
-    return data
-
-
-# ---------------------------------------------------------------------------
-# Comparaison
-# ---------------------------------------------------------------------------
-
-def _ranking_index(data: dict[str, Any]) -> dict[str, dict[str, Any]]:
-    """Indexe ``ranking`` par nom de moteur — robuste aux deux formats.
-
-    Un ``BenchmarkResult.as_dict()`` expose ``ranking`` directement
-    (clés ``engine``, ``mean_cer``, …). Le format alternatif ``engines``
-    expose le même contenu sous des clés légèrement différentes —
-    on normalise vers le format ``ranking``.
-    """
-    ranking = data.get("ranking")
-    if isinstance(ranking, list) and ranking:
-        return {
-            r["engine"]: {
-                "engine": r["engine"],
-                "mean_cer": r.get("mean_cer"),
-                "mean_wer": r.get("mean_wer"),
-                "documents": int(r.get("documents") or 0),
-                "failed": int(r.get("failed") or 0),
-            }
-            for r in ranking
-            if isinstance(r, dict) and r.get("engine")
-        }
-    # Fallback : ``engines`` (format report_data)
-    engines = data.get("engines") or []
-    out: dict[str, dict[str, Any]] = {}
-    if isinstance(engines, list):
-        for e in engines:
-            if not isinstance(e, dict):
-                continue
-            name = e.get("name") or e.get("engine")
-            if not name:
-                continue
-            out[name] = {
-                "engine": name,
-                "mean_cer": e.get("cer"),
-                "mean_wer": e.get("wer"),
-                "documents": int(e.get("documents") or 0),
-                "failed": int(e.get("failed") or 0),
-            }
-    return out
-
-
-def _label_of(data: dict[str, Any], default: str) -> str:
-    meta = data.get("meta") or {}
-    return (
-        meta.get("corpus_name")
-        or (data.get("corpus") or {}).get("name")
-        or default
-    )
-
-
-def _run_date_of(data: dict[str, Any]) -> Optional[str]:
-    return (
-        data.get("run_date")
-        or (data.get("meta") or {}).get("run_date")
-    )
-
-
-def _corpus_of(data: dict[str, Any]) -> Optional[str]:
-    meta = data.get("meta") or {}
-    return (
-        meta.get("corpus_source")
-        or (data.get("corpus") or {}).get("source")
-        or meta.get("corpus_name")
-    )
-
-
-def _safe_delta(a: Optional[float], b: Optional[float]) -> Optional[float]:
-    if a is None or b is None:
-        return None
-    return float(b) - float(a)
-
-
-def compare_benchmarks(
-    a: str | Path | dict[str, Any],
-    b: str | Path | dict[str, Any],
-    *,
-    threshold: float = 0.005,
-    label_a: str = "A",
-    label_b: str = "B",
-) -> ComparisonResult:
-    """Compare deux runs et retourne les deltas par moteur.
-
-    Convention : un delta CER positif signifie que ``b`` est *moins bon*
-    que ``a`` (régression). Un seuil ``threshold`` strictement positif
-    (en fraction, ex. 0,005 = 0,5 pp) discrimine régression / bruit.
-    """
-    da = load_benchmark_json(a) if not isinstance(a, dict) else a
-    db = load_benchmark_json(b) if not isinstance(b, dict) else b
-
-    idx_a = _ranking_index(da)
-    idx_b = _ranking_index(db)
-
-    common = sorted(set(idx_a) & set(idx_b))
-    only_a = sorted(set(idx_a) - set(idx_b))
-    only_b = sorted(set(idx_b) - set(idx_a))
-
-    deltas: list[EngineDelta] = []
-    for name in common:
-        ea = idx_a[name]
-        eb = idx_b[name]
-        delta_cer = _safe_delta(ea["mean_cer"], eb["mean_cer"])
-        delta_wer = _safe_delta(ea["mean_wer"], eb["mean_wer"])
-        regression = bool(delta_cer is not None and delta_cer > threshold)
-        improvement = bool(delta_cer is not None and delta_cer < -threshold)
-        deltas.append(
-            EngineDelta(
-                engine=name,
-                cer_a=ea["mean_cer"],
-                cer_b=eb["mean_cer"],
-                delta_cer=delta_cer,
-                wer_a=ea["mean_wer"],
-                wer_b=eb["mean_wer"],
-                delta_wer=delta_wer,
-                docs_a=int(ea["documents"]),
-                docs_b=int(eb["documents"]),
-                failed_a=int(ea["failed"]),
-                failed_b=int(eb["failed"]),
-                is_regression=regression,
-                is_improvement=improvement,
-            )
-        )
-
-    # Tri : régressions (delta décroissant) puis améliorations (delta croissant).
-    deltas.sort(key=lambda d: (
-        not d.is_regression,
-        -(d.delta_cer if d.delta_cer is not None else 0.0),
-    ))
-
-    return ComparisonResult(
-        label_a=label_a,
-        label_b=label_b,
-        run_date_a=_run_date_of(da),
-        run_date_b=_run_date_of(db),
-        corpus_a=_corpus_of(da),
-        corpus_b=_corpus_of(db),
-        deltas=deltas,
-        only_in_a=only_a,
-        only_in_b=only_b,
-        threshold=float(threshold),
-    )
-
-
-def detect_regressions(
-    diff: ComparisonResult,
-) -> list[EngineDelta]:
-    """Retourne uniquement les moteurs en régression dans ``diff``."""
-    return [d for d in diff.deltas if d.is_regression]
-
-
-# ---------------------------------------------------------------------------
-# Rendu HTML
-# ---------------------------------------------------------------------------
-
-_COMPARISON_TEMPLATE = """<!DOCTYPE html>
-<html lang="fr">
-<head>
-<meta charset="UTF-8">
-<title>Picarones — Comparaison de runs</title>
-<style>
-  body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
-         max-width: 980px; margin: 2em auto; padding: 0 1em; color: #111; }
-  h1 { border-bottom: 2px solid #333; padding-bottom: .4em; }
-  h2 { margin-top: 1.6em; color: #333; }
-  table { width: 100%; border-collapse: collapse; margin: 1em 0; }
-  th, td { padding: .5em .8em; text-align: left; border-bottom: 1px solid #ddd; }
-  th { background: #f3f3f3; }
-  td.num, th.num { text-align: right; font-variant-numeric: tabular-nums; }
-  tr.regression td { background: #fef0f0; }
-  tr.improvement td { background: #f0fef2; }
-  .delta-pos { color: #b0322a; font-weight: 600; }
-  .delta-neg { color: #1b8a3a; font-weight: 600; }
-  .badge { display: inline-block; padding: .15em .55em; border-radius: 4px;
-           font-size: .8em; font-weight: 600; }
-  .badge.reg { background: #fde2e0; color: #8a1c14; }
-  .badge.imp { background: #e0f8e6; color: #0a5e22; }
-  .meta { color: #666; font-size: .9em; }
-  .empty { color: #999; font-style: italic; }
-</style>
-</head>
-<body>
-<h1>Comparaison : {{ diff.label_a }} → {{ diff.label_b }}</h1>
-<p class="meta">
-  Run A : {{ diff.run_date_a or "?" }} · corpus {{ diff.corpus_a or "?" }}<br>
-  Run B : {{ diff.run_date_b or "?" }} · corpus {{ diff.corpus_b or "?" }}<br>
-  Seuil régression / amélioration : {{ "%.3f"|format(diff.threshold) }}
-  ({{ "%.1f"|format(diff.threshold * 100) }} pp de CER absolu).
-</p>
-
-<h2>Moteurs comparés ({{ diff.deltas|length }})</h2>
-{% if not diff.deltas %}
-  <p class="empty">Aucun moteur commun aux deux runs.</p>
-{% else %}
-<table>
-  <thead>
-    <tr>
-      <th scope=\"col\">Moteur</th>
-      <th scope=\"col\" class="num">CER A</th>
-      <th scope=\"col\" class="num">CER B</th>
-      <th scope=\"col\" class="num">Δ CER</th>
-      <th scope=\"col\" class="num">Docs A → B</th>
-      <th scope=\"col\">État</th>
-    </tr>
-  </thead>
-  <tbody>
-  {% for d in diff.deltas %}
-    <tr class="{% if d.is_regression %}regression{% elif d.is_improvement %}improvement{% endif %}">
-      <td>{{ d.engine }}</td>
-      <td class="num">{{ "%.3f"|format(d.cer_a) if d.cer_a is not none else "—" }}</td>
-      <td class="num">{{ "%.3f"|format(d.cer_b) if d.cer_b is not none else "—" }}</td>
-      <td class="num">
-        {% if d.delta_cer is none %}—
-        {% elif d.delta_cer > 0 %}<span class="delta-pos">+{{ "%.3f"|format(d.delta_cer) }}</span>
-        {% else %}<span class="delta-neg">{{ "%.3f"|format(d.delta_cer) }}</span>
-        {% endif %}
-      </td>
-      <td class="num">{{ d.docs_a }} → {{ d.docs_b }}</td>
-      <td>
-        {% if d.is_regression %}<span class="badge reg">régression</span>
-        {% elif d.is_improvement %}<span class="badge imp">amélioration</span>
-        {% else %}<span class="meta">stable</span>{% endif %}
-      </td>
-    </tr>
-  {% endfor %}
-  </tbody>
-</table>
-{% endif %}
-
-{% if diff.only_in_a %}
-<h2>Présents uniquement dans A</h2>
-<ul>{% for n in diff.only_in_a %}<li>{{ n }}</li>{% endfor %}</ul>
-{% endif %}
-
-{% if diff.only_in_b %}
-<h2>Présents uniquement dans B</h2>
-<ul>{% for n in diff.only_in_b %}<li>{{ n }}</li>{% endfor %}</ul>
-{% endif %}
-
-<p class="meta">Picarones — Sprint 28 · rapport de comparaison de runs.</p>
-</body>
-</html>
-"""
-
-
-def render_comparison_html(
-    diff: ComparisonResult,
-    output_path: str | Path,
-) -> Path:
-    """Sérialise un ``ComparisonResult`` en rapport HTML auto-contenu."""
-    from jinja2 import Environment, select_autoescape
-
-    env = Environment(autoescape=select_autoescape(["html", "j2"]))
-    template = env.from_string(_COMPARISON_TEMPLATE)
-    html = template.render(diff=diff)
-    out = Path(output_path)
-    out.parent.mkdir(parents=True, exist_ok=True)
-    out.write_text(html, encoding="utf-8")
-    return out
+import warnings
 
+from picarones.reports_v2.html.comparison import *  # noqa: F401, F403
 
-__all__ = [
-    "EngineDelta",
-    "ComparisonResult",
-    "load_benchmark_json",
-    "compare_benchmarks",
-    "detect_regressions",
-    "render_comparison_html",
-]
+warnings.warn(
+    "picarones.report.comparison is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.comparison instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/generator.py b/picarones/report/generator.py
index d45c5b77fb57310bcdb14cfd05b761101de0e9ad..db9b8b3978aa6e4b905c562085019d71750041db 100644
--- a/picarones/report/generator.py
+++ b/picarones/report/generator.py
@@ -1,466 +1,18 @@
-"""Générateur du rapport HTML interactif auto-contenu.
+"""``picarones.report.generator`` — shim re-export (déprécié, suppression 2.0).
 
-Le rapport produit est un fichier HTML unique embarquant :
-- Toutes les données (JSON inline)
-- Chart.js et diff2html (depuis cdnjs)
-- CSS et JavaScript de l'application
-
-Vues disponibles
-----------------
-1. Classement  — tableau triable par colonne (CER, WER, MER, WIL)
-2. Galerie     — grille d'images avec badge CER coloré
-3. Document    — image zoomable + diff coloré GT / OCR par moteur
-4. Analyses    — histogramme CER + graphique radar
-
-Architecture
-------------
-Ce module est l'**orchestrateur**. Les responsabilités lourdes sont
-découpées en sous-modules :
-
-- :mod:`picarones.report.assets` — chargement vendor.js, encodage
-  base64 d'images, externalisation lazy.
-- :mod:`picarones.report.report_data` — construction du dict JSON
-  passé au template (engines, documents, statistiques, Pareto, etc.).
-- :mod:`picarones.report.render_helpers` — couleurs / SVG mutualisés.
-
-Rétrocompat
------------
-Deux noms historiques sont **encore importés par des tests** sous
-leur préfixe ``_`` et doivent être préservés :
-
-- ``_build_report_data`` (importé par 14 fichiers de tests).
-- ``_cer_color`` (importé par ``tests/report/test_report.py``).
-
-Les autres noms ``_pct``, ``_safe``, ``_cer_bg``, ``_encode_image_b64``,
-``_encode_images_b64_from_result``, ``_externalize_images_to_dir``,
-``_load_vendor_js`` sont soit utilisés en interne (les 3 derniers,
-voir :meth:`ReportGenerator.generate`), soit accessibles via leur
-nom canonique dans :mod:`picarones.report.assets` ou
-:mod:`picarones.report.render_helpers`.
+Canonique : :mod:`picarones.reports_v2.html.generator`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-import json
-import logging
-from pathlib import Path
-from typing import Any, Optional
-
-from picarones.evaluation.benchmark_result import BenchmarkResult
-from picarones.measurements.statistics import build_critical_difference_svg
-from picarones.reports_v2._helpers.assets import (
-    encode_images_b64_from_result as _encode_images_b64_from_result,
-    externalize_images_to_dir as _externalize_images_to_dir,
-    load_vendor_js as _load_vendor_js,
-)
-
-# Ré-exports rétrocompat consommés par les tests externes (cf. docstring
-# de module). La directive de fin de ligne documente l'intention de
-# ré-export et empêche ruff de marquer l'import comme inutilisé.
-from picarones.reports_v2._helpers.render_helpers import cer_step_color as _cer_color  # noqa: F401
-from picarones.report.report_data import build_report_data as _build_report_data  # noqa: F401
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Rendu Jinja2
-# ---------------------------------------------------------------------------
-
-# Depuis le Sprint 16, le template monolithique ~3100 lignes a été découpé en
-# fichiers externes dans ``picarones/report/templates/`` (CSS, JS, vues HTML).
-# ``base.html.j2`` assemble le tout via ``{% include %}``.
-
-_TEMPLATES_DIR = Path(__file__).parent / "templates"
-
-
-def _build_jinja_env():
-    """Construit l'Environment Jinja2 pour le rapport.
-
-    Autoescape désactivé : le comportement est équivalent à celui du
-    ``_HTML_TEMPLATE.format()`` historique. Les variables injectées
-    (JSON embarqué, SVG généré, synthèse narrative issue de templates
-    internes) sont toutes produites par le code Picarones et ne
-    nécessitent pas d'échappement HTML.
-    """
-    from jinja2 import Environment, FileSystemLoader
-    env = Environment(
-        loader=FileSystemLoader(str(_TEMPLATES_DIR)),
-        autoescape=False,
-        keep_trailing_newline=True,
-    )
-    return env
-
-
-# ---------------------------------------------------------------------------
-# Classe principale
-# ---------------------------------------------------------------------------
-
-class ReportGenerator:
-    """Génère un rapport HTML interactif depuis un BenchmarkResult.
-
-    Usage
-    -----
-    >>> from picarones.report import ReportGenerator
-    >>> gen = ReportGenerator(benchmark_result)
-    >>> path = gen.generate("rapport.html")
-    >>> # Rapport en anglais :
-    >>> gen_en = ReportGenerator(benchmark_result, lang="en")
-    >>> path_en = gen_en.generate("report.html")
-    """
-
-    def __init__(
-        self,
-        benchmark: BenchmarkResult,
-        images_b64: Optional[dict[str, str]] = None,
-        lang: str = "fr",
-        normalization_profile: Any = None,
-        lazy_images: bool = False,
-    ) -> None:
-        """
-        Parameters
-        ----------
-        benchmark:
-            Résultat de benchmark à visualiser.
-        images_b64:
-            Dictionnaire {doc_id: data-URI base64 OU url relative} des images.
-            Si None, le générateur cherche dans ``benchmark.metadata["_images_b64"]``.
-            Si ``lazy_images=True``, la valeur attendue est une URL relative
-            comme ``"report-assets/<doc>.png"``.
-        lang:
-            Code langue du rapport : ``"fr"`` (défaut) ou ``"en"``.
-        normalization_profile:
-            Profil de normalisation effectivement utilisé (Sprint 27 — pour
-            le snapshot de reproductibilité). ``None`` retombe sur le
-            profil mentionné dans ``benchmark.metadata["normalization_profile"]``
-            s'il est présent, sinon snapshot indisponible.
-        lazy_images:
-            Sprint A5 (M-16) — si ``True``, les images sont écrites en
-            fichiers PNG/JPEG dans ``<output_dir>/report-assets/`` à côté
-            du HTML, et référencées via ``<img loading="lazy">``.
-            Le rapport reste auto-portant si on copie aussi le dossier
-            d'assets. Utile pour les corpus > 50 documents (un rapport
-            base64 monolithique de 1 000 docs dépasse 200 MB et fait
-            ramer le navigateur). En mode mono-doc ou démo : laisser
-            ``False`` pour un fichier HTML unique transportable.
-        """
-        self.benchmark = benchmark
-        self.images_b64: dict[str, str] = images_b64 or {}
-        self.lang = lang
-        self.normalization_profile = normalization_profile
-        self.lazy_images = lazy_images
-
-        # Récupérer les images embarquées dans les metadata (fixtures)
-        if not self.images_b64:
-            self.images_b64 = benchmark.metadata.get("_images_b64", {})  # type: ignore[assignment]
-
-        # Sprint 27 — fallback : profil de normalisation depuis les metadata
-        if self.normalization_profile is None:
-            self.normalization_profile = benchmark.metadata.get("normalization_profile")
-
-    def generate(self, output_path: str | Path) -> Path:
-        """Génère le fichier HTML et le sauvegarde sur disque.
+import warnings
 
-        Parameters
-        ----------
-        output_path:
-            Chemin du fichier HTML à écrire.
+from picarones.reports_v2.html.generator import *  # noqa: F401, F403
 
-        Returns
-        -------
-        Path
-            Chemin absolu du fichier généré.
-        """
-        from picarones.i18n import get_labels
-
-        output_path = Path(output_path)
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-
-        # Sprint A5 (M-16) — externalisation des images si lazy_images=True
-        # ou auto-encodage base64 sinon. Les deux modes alimentent la même
-        # variable ``images_b64`` (le nom est conservé pour rétrocompat ;
-        # en mode lazy la valeur est une URL relative au lieu d'un data-URI).
-        # En mode lazy, on **force** l'externalisation même si self.images_b64
-        # est pré-rempli (par les fixtures, par metadata, etc.) — sinon le
-        # rapport contiendrait quand même des data-URI géants.
-        if self.lazy_images:
-            images_b64 = _externalize_images_to_dir(
-                self.benchmark, output_path.parent,
-            )
-        else:
-            images_b64 = self.images_b64
-            if not images_b64:
-                images_b64 = _encode_images_b64_from_result(self.benchmark)
-
-        labels = get_labels(self.lang)
-        report_data = _build_report_data(self.benchmark, images_b64)
-
-        # Sprint 27 — snapshots de reproductibilité (pricing, glossaire,
-        # profil de normalisation, environnement). Embarqués dans le JSON
-        # du rapport pour qu'un lecteur puisse régénérer la synthèse, le
-        # Pareto et le glossaire sans accès au code source.
-        from picarones.report.snapshot import snapshot_all
-        report_data["snapshots"] = snapshot_all(
-            lang=self.lang,
-            normalization_profile=self.normalization_profile,
-        )
-
-        report_json = json.dumps(report_data, ensure_ascii=False, separators=(",", ":"))
-        i18n_json = json.dumps(labels, ensure_ascii=False, separators=(",", ":"))
-        chartjs_js = _load_vendor_js("chart.umd.min.js")
-
-        # Sprint 17 — rendu SVG du CDD côté serveur (statique, pas de JS)
-        cdd_svg = build_critical_difference_svg(
-            report_data.get("statistics", {}).get("nemenyi", {}),
-        )
-
-        # Sprint 18 — synthèse factuelle narrative (déterministe, sans LLM)
-        from picarones.measurements.narrative import build_synthesis
-        synthesis = build_synthesis(report_data, lang=self.lang)
-
-        # Sprint 20 — glossaire contextuel chargé depuis YAML
-        from picarones.reports_v2.glossary import load_glossary
-        glossary = load_glossary(self.lang)
-        glossary_json = json.dumps(glossary, ensure_ascii=False, separators=(",", ":"))
-
-        section_html = self._build_section_html(report_data, labels)
-
-        env = _build_jinja_env()
-        template = env.get_template("base.html.j2")
-        html = template.render(
-            corpus_name=self.benchmark.corpus_name,
-            picarones_version=self.benchmark.picarones_version,
-            report_data_json=report_json,
-            i18n_json=i18n_json,
-            html_lang=labels.get("html_lang", "fr"),
-            chartjs_inline=chartjs_js,
-            critical_difference_svg=cdd_svg,
-            friedman=report_data.get("statistics", {}).get("friedman", {}),
-            synthesis=synthesis,
-            glossary_json=glossary_json,
-            **section_html,
-        )
-
-        output_path.write_text(html, encoding="utf-8")
-        return output_path.resolve()
-
-    def _build_section_html(
-        self, report_data: dict, labels: dict[str, str],
-    ) -> dict[str, str]:
-        """Construit toutes les sections HTML conditionnelles du rapport.
-
-        Chaque renderer (NER, calibration, philologie, etc.) est appelé
-        de manière indépendante. Une section retourne ``""`` si aucun
-        moteur n'a de signal pour elle — le template gère l'affichage
-        conditionnel.
-
-        Returns
-        -------
-        dict[str, str]
-            Map ``{nom_de_section: html}`` à splatter dans
-            ``template.render(**section_html)``.
-        """
-        engines = report_data.get("engines", [])
-
-        # Sprint 37 — section inter-moteurs (matrice de divergence + oracle).
-        from picarones.reports_v2.html.renderers.inter_engine import (
-            build_divergence_matrix_html,
-            build_oracle_gap_html,
-        )
-        # Sprint 41 — section NER (résumé F1 par moteur + heatmap par catégorie).
-        from picarones.reports_v2.html.renderers.ner import (
-            build_ner_per_category_html,
-            build_ner_summary_html,
-        )
-        # Sprint 43 — section calibration (tableau ECE/MCE + grille de
-        # reliability diagrams par moteur).
-        from picarones.reports_v2.html.renderers.calibration import (
-            build_calibration_summary_html,
-            build_reliability_diagrams_grid_html,
-        )
-        # Sprint 46 — section stratifiée (tableau par strate).
-        from picarones.reports_v2.html.renderers.stratification import (
-            build_stratified_ranking_html,
-        )
-        # Sprint 62 — profil philologique (6 sections adaptive).
-        from picarones.reports_v2.html.renderers.philological import (
-            build_philological_profile_html,
-        )
-        # Sprint 86 — A.II.5 : recherchabilité fuzzy + séquences numériques.
-        from picarones.reports_v2.html.renderers.searchability import (
-            build_searchability_summary_html,
-        )
-        from picarones.reports_v2.html.renderers.numerical_sequences import (
-            build_numerical_sequences_html,
-        )
-        # Sprint 87 — A.II.2 : lisibilité (delta Flesch).
-        from picarones.reports_v2.html.renderers.readability import (
-            build_readability_summary_html,
-        )
-        # Sprint 89 — A.II.8b : spécialisation inter-moteurs.
-        from picarones.reports_v2.html.renderers.specialization import (
-            build_specialization_html,
-        )
-        # Chantier 3 (post-Sprint 97) — 3 vues thématiques composées.
-        from picarones.reports_v2.html.views import (
-            build_advanced_taxonomy_view_html,
-            build_diagnostics_view_html,
-            build_economics_view_html,
-        )
-        # Sprint « câblage des modules test-only » (mai 2026) — sections
-        # qui consomment les nouvelles métriques calculées dans
-        # ``report_data.extra_metrics``.
-        from picarones.reports_v2.html.renderers.marginal_cost import (
-            build_marginal_cost_html,
-        )
-        from picarones.reports_v2.html.renderers.rare_token_recall import (
-            build_rare_token_recall_html,
-        )
-        from picarones.reports_v2.html.renderers.taxonomy_cooccurrence import (
-            build_taxonomy_cooccurrence_html,
-        )
-        from picarones.reports_v2.html.renderers.taxonomy_intra_doc import (
-            build_taxonomy_intra_doc_html,
-        )
-
-        # Spécialisation : construit une map {engine: counts} depuis les
-        # ``aggregated_taxonomy`` ; un moteur sans taxonomie est exclu.
-        taxos: dict = {}
-        for eng in engines:
-            tax = eng.get("aggregated_taxonomy")
-            if isinstance(tax, dict):
-                counts = tax.get("counts") if "counts" in tax else tax
-                if isinstance(counts, dict) and counts:
-                    taxos[eng.get("name", "?")] = {
-                        k: float(v) for k, v in counts.items()
-                        if isinstance(v, (int, float))
-                    }
-
-        return {
-            # Sprint 37
-            "divergence_matrix_html": build_divergence_matrix_html(
-                report_data.get("inter_engine_analysis"), labels=labels,
-            ),
-            "oracle_gap_html": build_oracle_gap_html(
-                report_data.get("inter_engine_analysis"), labels=labels,
-            ),
-            # Sprint 41
-            "ner_summary_html": build_ner_summary_html(engines, labels=labels),
-            "ner_per_category_html": build_ner_per_category_html(engines, labels=labels),
-            # Sprint 43
-            "calibration_summary_html": build_calibration_summary_html(
-                engines, labels=labels,
-            ),
-            "reliability_diagrams_html": build_reliability_diagrams_grid_html(
-                engines, labels=labels,
-            ),
-            # Sprint 46
-            "stratified_ranking_html": build_stratified_ranking_html(
-                report_data.get("stratified_ranking"),
-                report_data.get("available_strata"),
-                report_data.get("corpus_homogeneity"),
-                labels=labels,
-            ),
-            # Sprint 62
-            "philological_profile_html": build_philological_profile_html(
-                engines, labels=labels,
-            ),
-            # Sprint 86
-            "searchability_html": build_searchability_summary_html(
-                engines, labels=labels,
-            ),
-            "numerical_sequences_html": build_numerical_sequences_html(
-                engines, labels=labels,
-            ),
-            # Sprint 87
-            "readability_html": build_readability_summary_html(
-                engines, labels=labels,
-            ),
-            # Sprint 89
-            "specialization_html": build_specialization_html(taxos, labels=labels),
-            # Chantier 3 — vues thématiques composées
-            "economics_view_html": build_economics_view_html(
-                report_data, labels=labels,
-                engine_reports=self.benchmark.engine_reports,
-            ),
-            "advanced_taxonomy_view_html": build_advanced_taxonomy_view_html(
-                report_data, labels=labels,
-            ),
-            "diagnostics_view_html": build_diagnostics_view_html(
-                report_data, labels=labels,
-            ),
-            # Sprint « câblage des modules test-only » (mai 2026) :
-            # 4 nouvelles sections pour les modules câblés en
-            # ``report_data.extra_metrics``. Adaptive : "" si pas de signal.
-            "taxonomy_cooccurrence_html": build_taxonomy_cooccurrence_html(
-                report_data.get("taxonomy_cooccurrence"), labels=labels,
-            ),
-            "taxonomy_intra_doc_html": build_taxonomy_intra_doc_html(
-                report_data.get("taxonomy_intra_doc"), labels=labels,
-            ),
-            "rare_token_recall_html": build_rare_token_recall_html(
-                report_data.get("rare_token_recall"), labels=labels,
-            ),
-            "marginal_cost_html": build_marginal_cost_html(
-                report_data.get("marginal_cost"), labels=labels,
-            ),
-        }
-
-    @classmethod
-    def from_json(cls, json_path: str | Path, **kwargs) -> "ReportGenerator":
-        """Crée un générateur depuis un fichier JSON de résultats.
-
-        Compatible avec les fichiers produits par ``BenchmarkResult.to_json()``.
-        Les images base64 doivent être passées via ``kwargs["images_b64"]``
-        si elles ne sont pas dans le JSON.
-        """
-        import json as _json
-
-        data = _json.loads(Path(json_path).read_text(encoding="utf-8"))
-
-        # Reconstruction minimale d'un BenchmarkResult depuis le dict
-        from picarones.measurements.metrics import MetricsResult
-        from picarones.evaluation.benchmark_result import DocumentResult, EngineReport
-
-        engine_reports = []
-        for er_data in data.get("engine_reports", []):
-            doc_results = []
-            for dr_data in er_data.get("document_results", []):
-                m = dr_data["metrics"]
-                metrics = MetricsResult(
-                    cer=m["cer"], cer_nfc=m["cer_nfc"], cer_caseless=m["cer_caseless"],
-                    wer=m["wer"], wer_normalized=m["wer_normalized"],
-                    mer=m["mer"], wil=m["wil"],
-                    reference_length=m["reference_length"],
-                    hypothesis_length=m["hypothesis_length"],
-                    error=m.get("error"),
-                )
-                doc_results.append(DocumentResult(
-                    doc_id=dr_data["doc_id"],
-                    image_path=dr_data["image_path"],
-                    ground_truth=dr_data["ground_truth"],
-                    hypothesis=dr_data["hypothesis"],
-                    metrics=metrics,
-                    duration_seconds=dr_data.get("duration_seconds", 0.0),
-                    engine_error=dr_data.get("engine_error"),
-                ))
-            engine_reports.append(EngineReport(
-                engine_name=er_data["engine_name"],
-                engine_version=er_data.get("engine_version", "unknown"),
-                engine_config=er_data.get("engine_config", {}),
-                document_results=doc_results,
-            ))
-
-        corpus_info = data.get("corpus", {})
-        bm = BenchmarkResult(
-            corpus_name=corpus_info.get("name", "Corpus"),
-            corpus_source=corpus_info.get("source"),
-            document_count=corpus_info.get("document_count", 0),
-            engine_reports=engine_reports,
-            run_date=data.get("run_date", ""),
-            picarones_version=data.get("picarones_version", ""),
-            metadata=data.get("metadata", {}),
-        )
-
-        images_b64 = kwargs.pop("images_b64", {})
-        return cls(bm, images_b64=images_b64, **kwargs)
+warnings.warn(
+    "picarones.report.generator is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.generator instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/__init__.py b/picarones/report/report_data/__init__.py
index ddab80b98bfa9ccadd008337a486d9a550712a6a..97baad6fc1e9efc2b70d4a2aa7d95f8523dcf94f 100644
--- a/picarones/report/report_data/__init__.py
+++ b/picarones/report/report_data/__init__.py
@@ -1,132 +1,21 @@
-"""Construction du dict de données consommé par le template Jinja.
+"""``picarones.report.report_data`` — shim re-export (déprécié, suppression 2.0).
 
-Avant le découpage, ``picarones.report.generator._build_report_data``
-faisait 463 lignes pour transformer un :class:`BenchmarkResult` en
-dict prêt pour Jinja. Cette fonction empilait par sprint des blocs
-indépendants — engines, documents, statistiques, scatter plots,
-front Pareto, etc.
-
-Ce sous-package éclate la construction en modules thématiques :
-
-- :mod:`engines` — résumé par moteur (``engines_summary``).
-- :mod:`documents` — vue galerie + détail + difficulté Sprint 7.
-- :mod:`statistics` — Wilcoxon, Friedman, Nemenyi, bootstrap CIs,
-  reliability curves, Venn, error clusters, corrélations.
-- :mod:`scatter` — Sprint 10 : Gini vs CER, ratio vs anchor.
-- :mod:`pareto` — Sprint 19 : 3 fronts Pareto + métadonnées pricing.
-  Expose deux fonctions séparées : :func:`attach_engine_costs`
-  (mute) et :func:`build_pareto_section` (pure).
-
-L'API publique :func:`build_report_data` orchestre ces modules dans
-le bon ordre. La séquence Pareto en deux temps
-(``attach_engine_costs`` → ``build_pareto_section``) rend la
-mutation explicite — les fonctions ``build_*`` du sous-package
-sont pures sauf ``attach_engine_costs`` dont le nom le dit.
+Canonique : :mod:`picarones.reports_v2.html.data`.  Phase 5.E du
+retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from picarones.evaluation.benchmark_result import BenchmarkResult
+import warnings
 
-from picarones.report.report_data.documents import (
-    annotate_documents_with_difficulty,
-    build_documents,
-)
-from picarones.report.report_data.engines import build_engines_summary
-from picarones.report.report_data.extra_metrics import (
-    compute_marginal_cost_section,
-    compute_rare_token_recall_per_engine,
-    compute_taxonomy_cooccurrence_section,
-    compute_taxonomy_intra_doc_section,
-)
-from picarones.report.report_data.pareto import (
-    attach_engine_costs,
-    build_pareto_section,
-)
-from picarones.report.report_data.scatter import (
-    build_gini_vs_cer,
-    build_ratio_vs_anchor,
-)
-from picarones.report.report_data.statistics import (
-    build_bootstrap_cis,
-    build_correlation_per_engine,
-    build_error_clusters,
-    build_friedman_and_nemenyi,
-    build_pairwise_wilcoxon,
-    build_reliability_curves,
-    build_venn_data,
+from picarones.reports_v2.html.data import *  # noqa: F401, F403
+from picarones.reports_v2.html.data import (  # noqa: F401
+    build_report_data,
 )
 
-
-def build_report_data(
-    benchmark: "BenchmarkResult", images_b64: dict[str, str],
-) -> dict:
-    """Transforme un :class:`BenchmarkResult` en dict pour le rapport HTML.
-
-    Ordre critique :
-
-    1. Construire ``engines_summary`` (pur).
-    2. Construire ``documents`` puis annoter avec la difficulté (mute
-       ``documents``).
-    3. **Attacher** les coûts à ``engines_summary`` (mute, nom
-       explicite).
-    4. **Construire** le bloc Pareto (pure, lit les coûts attachés).
-    """
-    engines_summary = build_engines_summary(benchmark)
-    documents = build_documents(benchmark, images_b64)
-    annotate_documents_with_difficulty(benchmark, documents)
-
-    attach_engine_costs(engines_summary, benchmark)
-    pareto_data = build_pareto_section(engines_summary)
-
-    return {
-        "meta": {
-            "corpus_name": benchmark.corpus_name,
-            "corpus_source": benchmark.corpus_source,
-            "document_count": benchmark.document_count,
-            "run_date": benchmark.run_date,
-            "picarones_version": benchmark.picarones_version,
-            "metadata": benchmark.metadata,
-        },
-        "ranking": benchmark.ranking(),
-        "engines": engines_summary,
-        "documents": documents,
-        # Sprint 7
-        "statistics": {
-            "pairwise_wilcoxon": build_pairwise_wilcoxon(benchmark),
-            "bootstrap_cis": build_bootstrap_cis(benchmark),
-            **build_friedman_and_nemenyi(benchmark),
-        },
-        "reliability_curves": build_reliability_curves(benchmark),
-        "venn_data": build_venn_data(benchmark),
-        "error_clusters": build_error_clusters(benchmark),
-        "correlation_per_engine": build_correlation_per_engine(benchmark),
-        # Sprint 10
-        "gini_vs_cer": build_gini_vs_cer(benchmark),
-        "ratio_vs_anchor": build_ratio_vs_anchor(benchmark),
-        # Sprint 19 — vue Pareto coût/qualité avec variantes d'axe
-        "pareto": pareto_data,
-        # Sprint 36 — analyse inter-moteurs (divergence taxonomique +
-        # complémentarité / oracle).  ``None`` si moins de 2 moteurs.
-        "inter_engine_analysis": benchmark.inter_engine_analysis,
-        # Sprint 45-46 — stratification par script_type
-        "available_strata": benchmark.available_strata(),
-        "stratified_ranking": benchmark.stratified_ranking() or None,
-        "corpus_homogeneity": benchmark.corpus_homogeneity(),
-        # Sprint « câblage des modules test-only » (mai 2026) — métriques
-        # corpus-wide qui jusque-là n'étaient pas remontées dans le rapport.
-        # Sprint 71 (A.I.1) : recall sur tokens rares (hapax + dis legomena).
-        "rare_token_recall": compute_rare_token_recall_per_engine(benchmark),
-        # Sprint 75 (A.I.4) : co-occurrence taxonomique inter-classes.
-        "taxonomy_cooccurrence": compute_taxonomy_cooccurrence_section(benchmark),
-        # Sprint 76 (A.I.4) : heatmap class × position (intra-document).
-        "taxonomy_intra_doc": compute_taxonomy_intra_doc_section(benchmark),
-        # Sprint 91 (A.II.6) : matrice de coût marginal entre paires de moteurs.
-        "marginal_cost": compute_marginal_cost_section(engines_summary),
-    }
-
-
-__all__ = ["build_report_data"]
+warnings.warn(
+    "picarones.report.report_data is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/_helpers.py b/picarones/report/report_data/_helpers.py
index de8fdee0516ca33fbe73a1eda9ed6095478e73b8..340ece9795de13c06cb6d50688576c61f600525f 100644
--- a/picarones/report/report_data/_helpers.py
+++ b/picarones/report/report_data/_helpers.py
@@ -1,30 +1,18 @@
-"""Helpers numériques internes au sous-package report_data.
+"""``picarones.report.report_data._helpers`` — shim re-export (déprécié, suppression 2.0).
 
-Petites fonctions utilitaires partagées par tous les builders de
-sections (engines, documents, statistics, scatter, pareto). Ne pas
-importer depuis l'extérieur du sous-package — ces helpers sont
-spécifiques aux conventions du dict JSON consommé par le template.
+Canonique : :mod:`picarones.reports_v2.html.data._helpers`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import Optional
+import warnings
 
+from picarones.reports_v2.html.data._helpers import *  # noqa: F401, F403
 
-def safe_round(v: Optional[float], decimals: int = 4) -> float:
-    """Arrondit un float optionnel ; ``None`` devient ``0.0``."""
-    return round(v or 0.0, decimals)
-
-
-def percent_string(v: Optional[float], decimals: int = 2) -> str:
-    """Formate un ratio ∈ [0, 1] en chaîne pourcentage : ``0.4723 → "47.23 %"``.
-
-    ``None`` → ``"—"``. Conservé pour rétrocompat avec d'éventuels
-    callers externes (Sprint 7 historique).
-    """
-    if v is None:
-        return "—"
-    return f"{v * 100:.{decimals}f} %"
-
-
-__all__ = ["safe_round", "percent_string"]
+warnings.warn(
+    "picarones.report.report_data._helpers is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data._helpers instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/documents.py b/picarones/report/report_data/documents.py
index 63a0f147bf0fa2ed86d9691e27dcd17c62b67b38..ecee3ba89650df3b01bc9e96bef0bc2637729503 100644
--- a/picarones/report/report_data/documents.py
+++ b/picarones/report/report_data/documents.py
@@ -1,167 +1,18 @@
-"""Construction de la liste ``documents`` (vue galerie + vue détail).
+"""``picarones.report.report_data.documents`` — shim re-export (déprécié, suppression 2.0).
 
-Pour chaque document du corpus, agrège les hypothèses de tous les
-moteurs avec leurs métriques, le diff caractère par caractère, et
-les champs spécifiques aux pipelines OCR+LLM (intermédiaire, mode,
-sur-normalisation).
-
-:func:`annotate_documents_with_difficulty` enrichit ensuite chaque
-document avec son score de difficulté intrinsèque (Sprint 7).
+Canonique : :mod:`picarones.reports_v2.html.data.documents`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from picarones.core.diff_utils import compute_char_diff, compute_word_diff
-from picarones.measurements.difficulty import (
-    compute_all_difficulties,
-    difficulty_label,
-)
-from picarones.report.report_data._helpers import safe_round
-
-if TYPE_CHECKING:
-    from picarones.evaluation.benchmark_result import BenchmarkResult
-
-
-def build_documents(
-    benchmark: "BenchmarkResult", images_b64: dict[str, str],
-) -> list[dict]:
-    """Retourne la liste ordonnée des documents prêts pour le template.
-
-    L'ordre des documents préserve l'ordre d'apparition (premier moteur
-    d'abord, puis compléments depuis les moteurs suivants si certains
-    documents ne sont pas couverts par tous les moteurs).
-    """
-    seen_doc_ids: set[str] = set()
-    doc_ids_ordered: list[str] = []
-    for report in benchmark.engine_reports:
-        for dr in report.document_results:
-            if dr.doc_id not in seen_doc_ids:
-                seen_doc_ids.add(dr.doc_id)
-                doc_ids_ordered.append(dr.doc_id)
-
-    # Index croisé : doc_id → {engine_name → DocumentResult}
-    doc_engine_map: dict[str, dict] = {did: {} for did in doc_ids_ordered}
-    for report in benchmark.engine_reports:
-        for dr in report.document_results:
-            doc_engine_map.setdefault(dr.doc_id, {})[report.engine_name] = dr
-
-    documents: list[dict] = []
-    engine_names = [r.engine_name for r in benchmark.engine_reports]
-    for doc_id in doc_ids_ordered:
-        engine_results: list[dict] = []
-        gt = ""
-        image_path = ""
-        for engine_name in engine_names:
-            dr = doc_engine_map[doc_id].get(engine_name)
-            if dr is None:
-                continue
-            gt = dr.ground_truth
-            image_path = dr.image_path
-            er_entry = _build_engine_result_entry(engine_name, dr)
-            engine_results.append(er_entry)
+import warnings
 
-        # CER moyen sur ce document (pour le badge galerie)
-        cer_values = [er["cer"] for er in engine_results if er["error"] is None]
-        mean_cer = sum(cer_values) / len(cer_values) if cer_values else 1.0
-        best_engine = min(engine_results, key=lambda x: x["cer"], default=None)
+from picarones.reports_v2.html.data.documents import *  # noqa: F401, F403
 
-        # Script type (depuis metadata par document si disponible)
-        script_type = ""
-        first_engine = engine_names[0] if engine_names else None
-        first_dr = doc_engine_map[doc_id].get(first_engine)
-        if first_dr and first_dr.image_quality:
-            script_type = first_dr.image_quality.get("script_type", "")
-
-        documents.append({
-            "doc_id": doc_id,
-            "image_path": image_path,
-            "image_b64": images_b64.get(doc_id, ""),
-            "ground_truth": gt,
-            "mean_cer": safe_round(mean_cer),
-            "best_engine": best_engine["engine"] if best_engine else "",
-            "engine_results": engine_results,
-            "script_type": script_type,
-        })
-    return documents
-
-
-def _build_engine_result_entry(engine_name: str, dr) -> dict:
-    """Construit une entrée moteur pour un document donné (extrait pour lisibilité)."""
-    diff_ops = compute_char_diff(dr.ground_truth, dr.hypothesis)
-    er_entry: dict = {
-        "engine": engine_name,
-        "hypothesis": dr.hypothesis,
-        "cer": safe_round(dr.metrics.cer),
-        "cer_diplomatic": safe_round(dr.metrics.cer_diplomatic) if dr.metrics.cer_diplomatic is not None else None,
-        "wer": safe_round(dr.metrics.wer),
-        "mer": safe_round(dr.metrics.mer),
-        "wil": safe_round(dr.metrics.wil),
-        "duration": dr.duration_seconds,
-        "error": dr.engine_error,
-        "diff": diff_ops,
-    }
-    # Champs spécifiques aux pipelines OCR+LLM
-    if dr.ocr_intermediate is not None:
-        er_entry["ocr_intermediate"] = dr.ocr_intermediate
-        er_entry["ocr_diff"] = compute_word_diff(dr.ground_truth, dr.ocr_intermediate)
-        er_entry["llm_correction_diff"] = compute_word_diff(dr.ocr_intermediate, dr.hypothesis)
-    if dr.pipeline_metadata:
-        on = dr.pipeline_metadata.get("over_normalization")
-        if on is not None:
-            er_entry["over_normalization"] = on
-        er_entry["pipeline_mode"] = dr.pipeline_metadata.get("pipeline_mode")
-    # Sprint 5 — métriques avancées par document
-    if dr.char_scores is not None:
-        er_entry["ligature_score"] = safe_round(dr.char_scores.get("ligature", {}).get("score"))
-        er_entry["diacritic_score"] = safe_round(dr.char_scores.get("diacritic", {}).get("score"))
-    if dr.taxonomy is not None:
-        er_entry["taxonomy"] = dr.taxonomy
-    if dr.structure is not None:
-        er_entry["structure"] = dr.structure
-    if dr.image_quality is not None:
-        er_entry["image_quality"] = dr.image_quality
-    # Sprint 10
-    if dr.line_metrics is not None:
-        er_entry["line_metrics"] = dr.line_metrics
-    if dr.hallucination_metrics is not None:
-        er_entry["hallucination_metrics"] = dr.hallucination_metrics
-    return er_entry
-
-
-def annotate_documents_with_difficulty(
-    benchmark: "BenchmarkResult", documents: list[dict],
-) -> None:
-    """Annote chaque document du dict avec son score de difficulté (Sprint 7).
-
-    Modifie ``documents`` en place. Les valeurs par défaut ``0.5`` /
-    ``"Modéré"`` sont retournées si la difficulté n'a pas pu être
-    calculée (par exemple corpus dégénéré).
-    """
-    doc_ids_ordered = [d["doc_id"] for d in documents]
-    gt_map = {d["doc_id"]: d["ground_truth"] for d in documents}
-    cer_map: dict[str, dict[str, float]] = {d["doc_id"]: {} for d in documents}
-    iq_map: dict[str, float] = {}
-    for report in benchmark.engine_reports:
-        for dr in report.document_results:
-            cer_map.setdefault(dr.doc_id, {})[report.engine_name] = safe_round(dr.metrics.cer)
-            if dr.image_quality and "quality_score" in dr.image_quality:
-                iq_map[dr.doc_id] = dr.image_quality["quality_score"]
-    difficulty_scores = compute_all_difficulties(
-        doc_ids=doc_ids_ordered,
-        ground_truths=gt_map,
-        cer_map=cer_map,
-        image_quality_map=iq_map or None,
-    )
-    for doc in documents:
-        ds = difficulty_scores.get(doc["doc_id"])
-        if ds:
-            doc["difficulty_score"] = safe_round(ds.score)
-            doc["difficulty_label"] = difficulty_label(ds.score)
-        else:
-            doc["difficulty_score"] = 0.5
-            doc["difficulty_label"] = "Modéré"
-
-
-__all__ = ["build_documents", "annotate_documents_with_difficulty"]
+warnings.warn(
+    "picarones.report.report_data.documents is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data.documents instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/engines.py b/picarones/report/report_data/engines.py
index b2279edf69924f76f9cad3108e67ccec1651c82e..15ac61140e047e921619a365e4e46b538f817dae 100644
--- a/picarones/report/report_data/engines.py
+++ b/picarones/report/report_data/engines.py
@@ -1,103 +1,18 @@
-"""Construction du résumé par moteur (``engines_summary``).
+"""``picarones.report.report_data.engines`` — shim re-export (déprécié, suppression 2.0).
 
-Pour chaque ``EngineReport``, accumule métriques agrégées (CER, WER,
-MER, WIL), distribution CER pour l'histogramme, métriques avancées
-patrimoniales (Sprint 5), distribution d'erreurs (Sprint 10), NER
-(Sprint 41), calibration (Sprint 43), profil philologique (Sprint
-62), recherchabilité + séquences numériques (Sprint 86), lisibilité
-(Sprint 87) et indicateurs pipeline OCR+LLM.
-
-Les coûts (durée moyenne, prix par 1k pages, CO₂) sont ajoutés
-ultérieurement par :mod:`picarones.report.report_data.pareto` qui
-en a besoin pour calculer les fronts.
+Canonique : :mod:`picarones.reports_v2.html.data.engines`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from picarones.report.report_data._helpers import safe_round
-
-if TYPE_CHECKING:
-    from picarones.evaluation.benchmark_result import BenchmarkResult
-
-
-def build_engines_summary(benchmark: "BenchmarkResult") -> list[dict]:
-    """Retourne la liste des dicts moteur, une entrée par ``EngineReport``."""
-    engines_summary: list[dict] = []
-    for report in benchmark.engine_reports:
-        agg = report.aggregated_metrics
-        diplo_agg = agg.get("cer_diplomatic", {})
-
-        line_metrics = report.aggregated_line_metrics
-        halluc = report.aggregated_hallucination
-
-        entry: dict = {
-            "name": report.engine_name,
-            "version": report.engine_version,
-            "cer":  safe_round(agg.get("cer", {}).get("mean")),
-            "wer":  safe_round(agg.get("wer", {}).get("mean")),
-            "mer":  safe_round(agg.get("mer", {}).get("mean")),
-            "wil":  safe_round(agg.get("wil", {}).get("mean")),
-            "cer_median": safe_round(agg.get("cer", {}).get("median")),
-            "cer_min":    safe_round(agg.get("cer", {}).get("min")),
-            "cer_max":    safe_round(agg.get("cer", {}).get("max")),
-            "doc_count":  agg.get("document_count", 0),
-            "failed":     agg.get("failed_count", 0),
-            # CER diplomatique (après normalisation historique : ſ=s, u=v, i=j…)
-            "cer_diplomatic": safe_round(diplo_agg.get("mean")) if diplo_agg else None,
-            "cer_diplomatic_profile": diplo_agg.get("profile"),
-            # Distribution pour l'histogramme : liste des CER individuels
-            "cer_values": [
-                safe_round(dr.metrics.cer)
-                for dr in report.document_results
-                if dr.metrics.error is None
-            ],
-            "cer_diplomatic_values": [
-                safe_round(dr.metrics.cer_diplomatic)
-                for dr in report.document_results
-                if dr.metrics.error is None and dr.metrics.cer_diplomatic is not None
-            ],
-            # Champs pipeline OCR+LLM (vides pour les moteurs OCR seuls)
-            "is_pipeline": report.is_pipeline,
-            "pipeline_info": report.pipeline_info,
-            # Sprint 5 — métriques avancées patrimoniales
-            "ligature_score": safe_round(report.ligature_score) if report.ligature_score is not None else None,
-            "diacritic_score": safe_round(report.diacritic_score) if report.diacritic_score is not None else None,
-            "aggregated_confusion": report.aggregated_confusion,
-            "aggregated_taxonomy": report.aggregated_taxonomy,
-            "aggregated_structure": report.aggregated_structure,
-            "aggregated_image_quality": report.aggregated_image_quality,
-            # Sprint 10 — distribution des erreurs + hallucinations VLM
-            "gini": safe_round(line_metrics.get("gini_mean")) if line_metrics else None,
-            "cer_p90": safe_round(line_metrics.get("percentiles", {}).get("p90")) if line_metrics else None,
-            "cer_p99": safe_round(line_metrics.get("percentiles", {}).get("p99")) if line_metrics else None,
-            "catastrophic_rate_30": safe_round(line_metrics.get("catastrophic_rate", {}).get("0.3")) if line_metrics else None,
-            "aggregated_line_metrics": line_metrics,
-            "anchor_score": safe_round(halluc.get("anchor_score_mean")) if halluc else None,
-            "length_ratio": safe_round(halluc.get("length_ratio_mean")) if halluc else None,
-            "hallucinating_doc_rate": safe_round(halluc.get("hallucinating_doc_rate")) if halluc else None,
-            "aggregated_hallucination": halluc,
-            # Sprint 41 — NER agrégé (None si aucun calcul effectué)
-            "aggregated_ner": report.aggregated_ner,
-            # Sprint 43 — calibration agrégée (None si aucune confidence
-            # n'a été exposée par le moteur sur ce corpus)
-            "aggregated_calibration": report.aggregated_calibration,
-            # Sprint 62 — profil philologique agrégé (None si aucun
-            # signal philologique sur le corpus pour ce moteur)
-            "aggregated_philological": report.aggregated_philological,
-            # Sprint 86 — A.II.5 (recherchabilité fuzzy + séquences
-            # numériques). None si aucun document n'a de signal.
-            "aggregated_searchability": report.aggregated_searchability,
-            "aggregated_numerical_sequences": (
-                report.aggregated_numerical_sequences
-            ),
-            # Sprint 87 — A.II.2 (delta Flesch agrégé)
-            "aggregated_readability": report.aggregated_readability,
-            "is_vlm": report.pipeline_info.get("is_vlm", False) if report.pipeline_info else False,
-        }
-        engines_summary.append(entry)
-    return engines_summary
+import warnings
 
+from picarones.reports_v2.html.data.engines import *  # noqa: F401, F403
 
-__all__ = ["build_engines_summary"]
+warnings.warn(
+    "picarones.report.report_data.engines is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data.engines instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/extra_metrics.py b/picarones/report/report_data/extra_metrics.py
index 5c598e69df6e5dcf068ddd74797480222402aa0f..57b69ee7b8788f64647d9b410181bdec9f18aa04 100644
--- a/picarones/report/report_data/extra_metrics.py
+++ b/picarones/report/report_data/extra_metrics.py
@@ -1,272 +1,18 @@
-"""Métriques additionnelles consommées par le rapport HTML.
+"""``picarones.report.report_data.extra_metrics`` — shim re-export (déprécié, suppression 2.0).
 
-Sprint « câblage des modules test-only » (mai 2026) : intègre dans le
-flux de génération du rapport des modules de mesure qui jusque-là
-n'étaient appelés par aucun consommateur en production. Concrètement :
-
-- :func:`compute_rare_token_recall_per_engine` — Sprint 71 (A.I.1) :
-  recall sur tokens rares (hapax + dis legomena) corpus-wide. Discrimine
-  un OCR qui rate les noms propres rares (critique pour l'indexation
-  prosopographique).
-- :func:`compute_taxonomy_cooccurrence_section` — Sprint 75 (A.I.4
-  chantier 1) : indice de Jaccard inter-classes au niveau document.
-- :func:`compute_taxonomy_intra_doc_section` — Sprint 76 (A.I.4
-  chantier 2) : heatmap class × position pour repérer les zones
-  concentrées d'erreur.
-- :func:`compute_marginal_cost_section` — Sprint 91 (A.II.6) : coût
-  marginal d'un moteur B vs A par erreur évitée.
-
-Toutes les fonctions sont **pures** (pas de mutation in-place) et
-retournent ``None`` ou un dict vide quand les pré-requis ne sont pas
-réunis (corpus vide, taxonomy absente, etc.) — pattern adaptive masking.
+Canonique : :mod:`picarones.reports_v2.html.data.extra_metrics`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional
-
-from picarones.measurements.marginal_cost import compute_marginal_cost_matrix
-from picarones.measurements.rare_tokens import (
-    compute_rare_token_recall,
-    extract_rare_tokens,
-)
-from picarones.measurements.taxonomy_cooccurrence import (
-    compute_taxonomy_cooccurrence,
-)
-from picarones.measurements.taxonomy_intra_doc import (
-    compute_taxonomy_position_heatmap,
-)
-
-if TYPE_CHECKING:
-    from picarones.evaluation.benchmark_result import BenchmarkResult
-
-
-# ──────────────────────────────────────────────────────────────────
-# Rare-token recall (Sprint 71)
-# ──────────────────────────────────────────────────────────────────
-
-
-def compute_rare_token_recall_per_engine(
-    benchmark: "BenchmarkResult",
-    max_freq: int = 2,
-) -> dict[str, dict]:
-    """Recall corpus-wide sur les tokens rares pour chaque moteur.
-
-    Étapes :
-    1. Extraire les tokens rares du corpus (apparaissent ≤ ``max_freq``
-       fois dans toutes les GT).
-    2. Pour chaque moteur, calculer le recall moyen pondéré par doc.
-
-    Retour : ``{engine_name: {n_rare_tokens, n_recalled, recall, n_docs}}``,
-    vide si aucun moteur ou aucun token rare détecté.
-    """
-    if not benchmark.engine_reports:
-        return {}
-    # Liste des GT du corpus (premier moteur fait foi).
-    gts = [
-        dr.ground_truth
-        for dr in benchmark.engine_reports[0].document_results
-        if dr.ground_truth
-    ]
-    if not gts:
-        return {}
-    rare_tokens = extract_rare_tokens(gts, max_freq=max_freq)
-    if not rare_tokens:
-        return {}
-
-    out: dict[str, dict] = {}
-    for report in benchmark.engine_reports:
-        n_total_rare = 0
-        n_total_recalled = 0
-        n_docs = 0
-        for dr in report.document_results:
-            if dr.metrics.error is not None:
-                continue
-            metrics = compute_rare_token_recall(
-                dr.ground_truth, dr.hypothesis, rare_tokens,
-            )
-            n_total_rare += metrics["n_rare_tokens_in_reference"]
-            n_total_recalled += metrics["n_rare_tokens_recalled"]
-            n_docs += 1
-        recall = (
-            n_total_recalled / n_total_rare if n_total_rare > 0 else None
-        )
-        out[report.engine_name] = {
-            "n_rare_tokens": n_total_rare,
-            "n_recalled": n_total_recalled,
-            "recall": recall,
-            "n_docs": n_docs,
-            "max_freq": max_freq,
-        }
-    return out
-
-
-# ──────────────────────────────────────────────────────────────────
-# Co-occurrence taxonomique (Sprint 75)
-# ──────────────────────────────────────────────────────────────────
-
-
-def compute_taxonomy_cooccurrence_section(
-    benchmark: "BenchmarkResult",
-) -> Optional[dict]:
-    """Calcule la matrice de co-occurrence taxonomique corpus-wide.
-
-    Pour chaque document, on collecte l'union des classes d'erreur
-    apparues sur ce document tous moteurs confondus, puis on calcule
-    l'indice de Jaccard entre paires de classes au niveau corpus.
+import warnings
 
-    Retour : sortie de
-    :func:`picarones.measurements.taxonomy_cooccurrence.compute_taxonomy_cooccurrence`,
-    ou ``None`` si aucune classification taxonomique n'est disponible.
-    """
-    # Map doc_id → index dans per_doc_classes pour merger correctement
-    # les classes des moteurs additionnels qui évaluent le même doc.
-    # **Bug évité** : ne PAS utiliser un set pour retrouver l'index — un
-    # set n'a pas d'ordre garanti, ``list(set).index(x)`` retourne un
-    # index qui ne correspond pas à la position dans la liste parallèle.
-    doc_id_to_idx: dict[str, int] = {}
-    per_doc_classes: list[set[str]] = []
+from picarones.reports_v2.html.data.extra_metrics import *  # noqa: F401, F403
 
-    for report in benchmark.engine_reports:
-        for dr in report.document_results:
-            if dr.taxonomy is None:
-                continue
-            classes = {
-                cls
-                for cls, count in (dr.taxonomy.get("counts") or {}).items()
-                if count > 0
-            }
-            if not classes:
-                continue
-            idx = doc_id_to_idx.get(dr.doc_id)
-            if idx is None:
-                doc_id_to_idx[dr.doc_id] = len(per_doc_classes)
-                per_doc_classes.append(classes)
-            else:
-                # Doc déjà vu (autre moteur) : merger les classes.
-                per_doc_classes[idx] |= classes
-
-    if not per_doc_classes:
-        return None
-    return compute_taxonomy_cooccurrence(per_doc_classes)
-
-
-# ──────────────────────────────────────────────────────────────────
-# Heatmap intra-document class × position (Sprint 76)
-# ──────────────────────────────────────────────────────────────────
-
-
-def compute_taxonomy_intra_doc_section(
-    benchmark: "BenchmarkResult",
-    n_bins: int = 10,
-) -> Optional[dict]:
-    """Heatmap agrégée class × position binnée sur l'ensemble du corpus.
-
-    Pour chaque doc unique on garde le heatmap calculé par le **premier**
-    moteur (déduplication : un même doc évalué par N moteurs ne compte
-    qu'une fois). Puis on somme par classe et bin de position.
-
-    Retourne un dict compatible avec
-    :func:`picarones.report.taxonomy_intra_doc_render.build_taxonomy_intra_doc_html`
-    (clés ``n_bins``, ``per_class``, ``total_errors``, ``n_words_gt``).
-    Retourne ``None`` si aucun document n'a de signal exploitable.
-    """
-    aggregated: dict[str, list[int]] = {}
-    seen_doc_ids: set[str] = set()
-    total_errors = 0
-    n_words_gt = 0
-
-    for report in benchmark.engine_reports:
-        for dr in report.document_results:
-            if dr.doc_id in seen_doc_ids:
-                continue  # déduplication : ne pas compter un doc 2 fois
-            if dr.metrics.error is not None or not dr.ground_truth:
-                continue
-            heatmap = compute_taxonomy_position_heatmap(
-                dr.ground_truth, dr.hypothesis, n_bins=n_bins,
-            )
-            if heatmap is None:
-                continue
-            seen_doc_ids.add(dr.doc_id)
-            n_words_gt += len(dr.ground_truth.split())
-            per_class = heatmap.get("per_class", {})
-            for cls, counts in per_class.items():
-                cls_total = sum(counts)
-                if cls_total == 0:
-                    continue
-                total_errors += cls_total
-                if cls not in aggregated:
-                    aggregated[cls] = [0] * n_bins
-                for i in range(n_bins):
-                    aggregated[cls][i] += counts[i] if i < len(counts) else 0
-
-    if not aggregated:
-        return None
-    return {
-        "n_bins": n_bins,
-        "n_docs_with_data": len(seen_doc_ids),
-        "total_errors": total_errors,
-        "n_words_gt": n_words_gt,
-        "per_class": aggregated,
-    }
-
-
-# ──────────────────────────────────────────────────────────────────
-# Coût marginal inter-moteurs (Sprint 91)
-# ──────────────────────────────────────────────────────────────────
-
-
-def compute_marginal_cost_section(
-    engines_summary: list[dict],
-) -> Optional[list[dict]]:
-    """Matrice de coût marginal entre paires de moteurs.
-
-    Lit ``cost`` (attaché par :func:`attach_engine_costs`) et estime
-    le nombre d'erreurs. Pour chaque paire ``A → B``, calcule le coût
-    additionnel par erreur évitée.
-
-    **Note d'estimation** : le nombre d'erreurs est dérivé de
-    ``cer × n_caractères_corpus`` quand la longueur moyenne de doc
-    est disponible, sinon repli sur ``cer × 1000`` (proxy pour
-    1000 caractères standardisés). Les coûts marginaux affichés sont
-    des estimations pessimistes — pour un benchmark de corpus
-    homogène, l'ordonnancement est fiable ; pour un mix de
-    types de documents, à interpréter avec prudence.
-
-    Retour : liste de dicts (sortie ``["pairs"]`` de
-    :func:`compute_marginal_cost_matrix`) triée par coût marginal
-    croissant, ou ``None`` si moins de 2 moteurs ont des données
-    coût + erreur exploitables.
-    """
-    per_engine: dict[str, dict] = {}
-    for entry in engines_summary:
-        cost = entry.get("cost") or {}
-        cost_per_1k = cost.get("cost_per_1k_pages_eur")
-        cer = entry.get("cer")
-        doc_count = entry.get("doc_count") or 0
-        if cost_per_1k is None or cer is None or doc_count == 0:
-            continue
-        # Proxy : cer × 1000 caractères / page (échelle stable cohérente
-        # avec ``cost_per_1k_pages_eur``).
-        estimated_errors = cer * 1000.0
-        per_engine[entry["name"]] = {
-            "cost": cost_per_1k,
-            "errors": estimated_errors,
-        }
-    if len(per_engine) < 2:
-        return None
-    result = compute_marginal_cost_matrix(per_engine)
-    if not result:
-        return None
-    # ``compute_marginal_cost_matrix`` retourne ``{"pairs": [...]}``.
-    # On expose la liste ``pairs`` pour que le renderer reçoive un
-    # itérable de dicts (pas un wrapper).
-    return result.get("pairs") or None
-
-
-__all__ = [
-    "compute_rare_token_recall_per_engine",
-    "compute_taxonomy_cooccurrence_section",
-    "compute_taxonomy_intra_doc_section",
-    "compute_marginal_cost_section",
-]
+warnings.warn(
+    "picarones.report.report_data.extra_metrics is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data.extra_metrics instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/pareto.py b/picarones/report/report_data/pareto.py
index cabd714145a6e1971f2dc87cf53d1a092e884d07..74a785d81ae180b7eda1ce402c3e517691a9cc40 100644
--- a/picarones/report/report_data/pareto.py
+++ b/picarones/report/report_data/pareto.py
@@ -1,159 +1,18 @@
-"""Front Pareto coût/qualité (Sprint 19).
+"""``picarones.report.report_data.pareto`` — shim re-export (déprécié, suppression 2.0).
 
-Construit trois fronts Pareto avec des axes alternatifs :
-
-- ``cost`` — CER vs coût € / 1000 pages.
-- ``speed`` — CER vs durée moyenne par page.
-- ``co2`` — CER vs empreinte carbone (g CO₂ / 1000 pages, expérimental).
-
-API
----
-Deux fonctions séparées pour rendre le contrat explicite :
-
-1. :func:`attach_engine_costs` — **mute en place** ``engines_summary``
-   en y ajoutant ``mean_duration_seconds`` et ``cost`` (extraits du
-   benchmark et de la table de pricing). Le nom dit clairement qu'il
-   y a mutation.
-2. :func:`build_pareto_section` — **fonction pure**, lit les coûts
-   déjà attachés à ``engines_summary``. Retourne le dict ``pareto``
-   prêt pour le template.
-
-L'orchestrateur (``__init__.py``) appelle les deux dans l'ordre.
-Cette séparation rend possible :
-
-- Tester :func:`build_pareto_section` indépendamment avec un
-  ``engines_summary`` pré-fabriqué.
-- Réutiliser les coûts attachés sans recalculer Pareto.
+Canonique : :mod:`picarones.reports_v2.html.data.pareto`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
-from picarones.measurements.pricing import (
-    build_costs_for_benchmark,
-    load_pricing_database,
-)
-from picarones.measurements.statistics import compute_pareto_front
-
-if TYPE_CHECKING:
-    from picarones.evaluation.benchmark_result import BenchmarkResult
-
-
-def attach_engine_costs(
-    engines_summary: list[dict], benchmark: "BenchmarkResult",
-) -> None:
-    """Annote chaque entrée de ``engines_summary`` avec son coût.
-
-    **Mute en place** : ajoute deux champs à chaque dict moteur :
-
-    - ``mean_duration_seconds`` (float ou ``None`` si pas de durée).
-    - ``cost`` : dict de la forme ``{cost_per_1k_pages_eur: ...,
-      co2_per_1k_pages_g: ..., ...}`` ou ``None`` si pricing
-      indisponible.
+import warnings
 
-    Doit être appelée AVANT :func:`build_pareto_section`, qui lit
-    ces deux champs.
-    """
-    durations_by_engine: dict[str, float] = {}
-    for report in benchmark.engine_reports:
-        durs = [
-            dr.duration_seconds
-            for dr in report.document_results
-            if dr.duration_seconds is not None
-        ]
-        if durs:
-            durations_by_engine[report.engine_name] = sum(durs) / len(durs)
+from picarones.reports_v2.html.data.pareto import *  # noqa: F401, F403
 
-    costs_by_engine = build_costs_for_benchmark(
-        engines_summary, durations_by_engine,
-    )
-    for entry in engines_summary:
-        name = entry["name"]
-        entry["mean_duration_seconds"] = (
-            round(durations_by_engine.get(name, 0.0), 4)
-            if name in durations_by_engine else None
-        )
-        entry["cost"] = costs_by_engine.get(name)
-
-
-def build_pareto_section(engines_summary: list[dict]) -> dict:
-    """Construit le bloc ``pareto`` du dict de rapport.
-
-    **Fonction pure** : ne mute rien. Lit ``mean_duration_seconds``
-    et ``cost`` qui doivent avoir été attachés en amont par
-    :func:`attach_engine_costs`. Si ces champs sont absents, le
-    moteur est silencieusement omis du front (cohérent avec un
-    moteur qui n'a pas de prix connu).
-
-    Retour
-    ------
-    dict
-        Trois fronts Pareto (``cost``, ``speed``, ``co2``) plus
-        ``pricing_meta`` (table de pricing utilisée).
-    """
-    pricing_defaults, _ = load_pricing_database()
-
-    pareto_points = []
-    for entry in engines_summary:
-        cer = entry.get("cer")
-        cost = (entry.get("cost") or {}).get("cost_per_1k_pages_eur")
-        if cer is None or cost is None:
-            continue
-        pareto_points.append({"engine": entry["name"], "cer": cer, "cost": cost})
-    pareto_front_engines = compute_pareto_front(
-        pareto_points, objectives=("cer", "cost"),
-    )
-
-    pareto_speed_points = []
-    for entry in engines_summary:
-        cer = entry.get("cer")
-        dur = entry.get("mean_duration_seconds")
-        if cer is None or dur is None:
-            continue
-        pareto_speed_points.append({"engine": entry["name"], "cer": cer, "dur": dur})
-    pareto_front_speed = compute_pareto_front(
-        pareto_speed_points, objectives=("cer", "dur"),
-    )
-
-    pareto_co2_points = []
-    for entry in engines_summary:
-        cer = entry.get("cer")
-        co2 = (entry.get("cost") or {}).get("co2_per_1k_pages_g")
-        if cer is None or co2 is None:
-            continue
-        pareto_co2_points.append({"engine": entry["name"], "cer": cer, "co2": co2})
-    pareto_front_co2 = compute_pareto_front(
-        pareto_co2_points, objectives=("cer", "co2"),
-    )
-
-    return {
-        "cost": {
-            "points": pareto_points,
-            "front": pareto_front_engines,
-            "axis_label": "Coût (€ / 1000 pages)",
-        },
-        "speed": {
-            "points": pareto_speed_points,
-            "front": pareto_front_speed,
-            "axis_label": "Temps moyen (s / page)",
-        },
-        "co2": {
-            "points": pareto_co2_points,
-            "front": pareto_front_co2,
-            "axis_label": (
-                "Empreinte carbone (g CO₂ / 1000 pages, expérimental)"
-            ),
-        },
-        "pricing_meta": {
-            "last_updated": pricing_defaults.last_updated,
-            "currency": pricing_defaults.currency,
-            "hourly_rate_local_cpu_eur": pricing_defaults.hourly_rate_local_cpu_eur,
-            "hourly_rate_local_gpu_eur": pricing_defaults.hourly_rate_local_gpu_eur,
-            "grid_intensity_local": pricing_defaults.grid_intensity_local,
-            "grid_intensity_cloud": pricing_defaults.grid_intensity_cloud,
-        },
-    }
-
-
-__all__ = ["attach_engine_costs", "build_pareto_section"]
+warnings.warn(
+    "picarones.report.report_data.pareto is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data.pareto instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/scatter.py b/picarones/report/report_data/scatter.py
index 045b02621c9f74c485e84299e13106a1b57e5afc..bb02fcd23b3f809b7c99f6b4ee1e8ac9942e600b 100644
--- a/picarones/report/report_data/scatter.py
+++ b/picarones/report/report_data/scatter.py
@@ -1,56 +1,18 @@
-"""Scatter plots du rapport (Sprint 10).
+"""``picarones.report.report_data.scatter`` — shim re-export (déprécié, suppression 2.0).
 
-- ``gini_vs_cer`` — corrélation Gini (concentration des erreurs)
-  vs CER moyen, par moteur.
-- ``ratio_vs_anchor`` — ratio de longueur OCR/GT vs score d'ancrage,
-  par moteur (révèle les hallucinations VLM).
+Canonique : :mod:`picarones.reports_v2.html.data.scatter`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+import warnings
 
-from picarones.report.report_data._helpers import safe_round
+from picarones.reports_v2.html.data.scatter import *  # noqa: F401, F403
 
-if TYPE_CHECKING:
-    from picarones.evaluation.benchmark_result import BenchmarkResult
-
-
-def build_gini_vs_cer(benchmark: "BenchmarkResult") -> list[dict]:
-    """Scatter Gini de la distribution d'erreurs vs CER moyen."""
-    gini_vs_cer: list[dict] = []
-    for report in benchmark.engine_reports:
-        line_metrics = report.aggregated_line_metrics
-        gini_val = line_metrics.get("gini_mean") if line_metrics else None
-        cer_val = report.mean_cer
-        if gini_val is not None and cer_val is not None:
-            gini_vs_cer.append({
-                "engine": report.engine_name,
-                "cer": safe_round(cer_val),
-                "gini": safe_round(gini_val),
-                "is_pipeline": report.is_pipeline,
-            })
-    return gini_vs_cer
-
-
-def build_ratio_vs_anchor(benchmark: "BenchmarkResult") -> list[dict]:
-    """Scatter ratio de longueur vs score d'ancrage (détection VLM)."""
-    ratio_vs_anchor: list[dict] = []
-    for report in benchmark.engine_reports:
-        halluc = report.aggregated_hallucination
-        if not halluc:
-            continue
-        ratio_vs_anchor.append({
-            "engine": report.engine_name,
-            "length_ratio": safe_round(halluc.get("length_ratio_mean", 1.0)),
-            "anchor_score": safe_round(halluc.get("anchor_score_mean", 1.0)),
-            "hallucinating_rate": safe_round(halluc.get("hallucinating_doc_rate", 0.0)),
-            "is_vlm": (
-                report.pipeline_info.get("is_vlm", False)
-                if report.pipeline_info else False
-            ),
-        })
-    return ratio_vs_anchor
-
-
-__all__ = ["build_gini_vs_cer", "build_ratio_vs_anchor"]
+warnings.warn(
+    "picarones.report.report_data.scatter is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data.scatter instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/report_data/statistics.py b/picarones/report/report_data/statistics.py
index 340c44f3384a271fe25e6a74be4bcb6a81c954d0..46fe4dc2989a0a5659dc8e59c85f6261938310b6 100644
--- a/picarones/report/report_data/statistics.py
+++ b/picarones/report/report_data/statistics.py
@@ -1,216 +1,18 @@
-"""Sections statistiques du rapport (Sprint 7 + Sprint 17).
+"""``picarones.report.report_data.statistics`` — shim re-export (déprécié, suppression 2.0).
 
-Construit les blocs :
-
-- ``pairwise_wilcoxon`` — tests de Wilcoxon par paire de moteurs.
-- ``bootstrap_cis`` — intervalles de confiance bootstrap par moteur.
-- ``friedman`` + ``nemenyi`` — Sprint 17, multi-moteurs.
-- ``reliability_curves`` — courbes de fiabilité par moteur.
-- ``venn_data`` — diagramme de Venn des erreurs communes/exclusives.
-- ``error_clusters`` — clustering des patterns d'erreurs.
-- ``correlation_per_engine`` — matrice de corrélation par moteur.
+Canonique : :mod:`picarones.reports_v2.html.data.statistics`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional
-
-from picarones.core.diff_utils import compute_word_diff
-from picarones.measurements.statistics import (
-    bootstrap_ci,
-    cluster_errors,
-    compute_correlation_matrix,
-    compute_pairwise_stats,
-    compute_reliability_curve,
-    compute_venn_data,
-    friedman_test,
-    nemenyi_posthoc,
-)
-from picarones.report.report_data._helpers import safe_round
-
-if TYPE_CHECKING:
-    from picarones.evaluation.benchmark_result import BenchmarkResult
-
-
-def _engine_cer_values(benchmark: "BenchmarkResult") -> dict[str, list[float]]:
-    """Map ``engine_name → [cer_individuels valides]``."""
-    out: dict[str, list[float]] = {}
-    for report in benchmark.engine_reports:
-        vals = [
-            safe_round(dr.metrics.cer)
-            for dr in report.document_results
-            if dr.metrics.error is None
-        ]
-        if vals:
-            out[report.engine_name] = vals
-    return out
-
-
-def build_pairwise_wilcoxon(benchmark: "BenchmarkResult") -> list[dict]:
-    """Tests de Wilcoxon par paire de moteurs (Sprint 7)."""
-    return compute_pairwise_stats(_engine_cer_values(benchmark))
-
-
-def build_bootstrap_cis(benchmark: "BenchmarkResult") -> list[dict]:
-    """Intervalles de confiance bootstrap par moteur (Sprint 7)."""
-    bootstrap_cis: list[dict] = []
-    for engine_name, vals in _engine_cer_values(benchmark).items():
-        lo, hi = bootstrap_ci(vals)
-        mean_v = sum(vals) / len(vals) if vals else 0.0
-        bootstrap_cis.append({
-            "engine": engine_name,
-            "mean": safe_round(mean_v),
-            "ci_lower": safe_round(lo),
-            "ci_upper": safe_round(hi),
-        })
-    return bootstrap_cis
-
-
-def build_friedman_and_nemenyi(benchmark: "BenchmarkResult") -> dict:
-    """Test de Friedman + post-hoc Nemenyi (Sprint 17, multi-moteurs).
-
-    Alignement strict sur le même ordre de documents : on reconstruit
-    la map à partir des documents communs à tous les moteurs, sinon
-    Friedman n'est pas applicable.
-
-    Returns
-    -------
-    dict
-        ``{"friedman": {...}, "nemenyi": {...}}`` à fusionner dans
-        la section ``statistics`` du rapport.
-    """
-    # Liste ordonnée des doc_ids selon l'ordre d'apparition.
-    seen: set[str] = set()
-    doc_ids_ordered: list[str] = []
-    for report in benchmark.engine_reports:
-        for dr in report.document_results:
-            if dr.doc_id not in seen:
-                seen.add(dr.doc_id)
-                doc_ids_ordered.append(dr.doc_id)
+import warnings
 
-    common_doc_ids: Optional[set[str]] = None
-    for report in benchmark.engine_reports:
-        doc_ids = {dr.doc_id for dr in report.document_results if dr.metrics.error is None}
-        common_doc_ids = doc_ids if common_doc_ids is None else common_doc_ids & doc_ids
+from picarones.reports_v2.html.data.statistics import *  # noqa: F401, F403
 
-    engine_cer_aligned: dict[str, list[float]] = {}
-    if common_doc_ids:
-        ordered_common = [d for d in doc_ids_ordered if d in common_doc_ids]
-        for report in benchmark.engine_reports:
-            dr_by_id = {dr.doc_id: dr for dr in report.document_results}
-            engine_cer_aligned[report.engine_name] = [
-                safe_round(dr_by_id[d].metrics.cer) for d in ordered_common
-            ]
-
-    if engine_cer_aligned:
-        friedman = friedman_test(engine_cer_aligned)
-        nemenyi = nemenyi_posthoc(engine_cer_aligned)
-    else:
-        friedman = {
-            "statistic": 0.0, "p_value": 1.0, "significant": False,
-            "df": 0, "n_blocks": 0, "n_engines": 0, "mean_ranks": {},
-            "interpretation": "Test de Friedman non calculé — aucun document commun.",
-            "error": "no_common_documents",
-        }
-        nemenyi = {
-            "alpha": 0.05, "critical_distance": 0.0, "q_alpha": 0.0,
-            "n_blocks": 0, "n_engines": 0, "mean_ranks": {},
-            "engines_sorted": [], "significant_matrix": [], "tied_groups": [],
-            "error": "no_common_documents",
-        }
-    return {"friedman": friedman, "nemenyi": nemenyi}
-
-
-def build_reliability_curves(benchmark: "BenchmarkResult") -> list[dict]:
-    """Courbes de fiabilité par moteur (Sprint 7)."""
-    reliability_curves: list[dict] = []
-    for report in benchmark.engine_reports:
-        vals = [
-            safe_round(dr.metrics.cer)
-            for dr in report.document_results
-            if dr.metrics.error is None
-        ]
-        curve = compute_reliability_curve(vals)
-        reliability_curves.append({
-            "engine": report.engine_name,
-            "points": curve,
-        })
-    return reliability_curves
-
-
-def build_venn_data(benchmark: "BenchmarkResult") -> dict:
-    """Venn des erreurs communes / exclusives (Sprint 7).
-
-    Construit les ensembles d'erreurs par moteur :
-    ``{engine → set("doc_id:gt_tok:hyp_tok")}``.
-    """
-    venn_error_sets: dict[str, set[str]] = {}
-    for report in benchmark.engine_reports:
-        error_set: set[str] = set()
-        for dr in report.document_results:
-            ops = compute_word_diff(dr.ground_truth, dr.hypothesis)
-            for op in ops:
-                if op["op"] in ("replace", "delete", "insert"):
-                    key = (
-                        f"{dr.doc_id}:"
-                        f"{op.get('old', op.get('text', ''))}:"
-                        f"{op.get('new', op.get('text', ''))}"
-                    )
-                    error_set.add(key)
-        venn_error_sets[report.engine_name] = error_set
-    return compute_venn_data(venn_error_sets)
-
-
-def build_error_clusters(benchmark: "BenchmarkResult") -> list[dict]:
-    """Clustering des patterns d'erreurs (Sprint 7)."""
-    error_data_all: list[dict] = []
-    for report in benchmark.engine_reports:
-        for dr in report.document_results:
-            error_data_all.append({
-                "engine": report.engine_name,
-                "gt": dr.ground_truth,
-                "hypothesis": dr.hypothesis,
-            })
-    error_clusters_raw = cluster_errors(error_data_all, max_clusters=8)
-    return [c.as_dict() for c in error_clusters_raw]
-
-
-def build_correlation_per_engine(benchmark: "BenchmarkResult") -> list[dict]:
-    """Matrice de corrélation par moteur entre métriques métiers (Sprint 7)."""
-    correlation_per_engine: list[dict] = []
-    for report in benchmark.engine_reports:
-        metrics_list: list[dict[str, float]] = []
-        for dr in report.document_results:
-            if dr.metrics.error is not None:
-                continue
-            entry: dict[str, float] = {
-                "cer": safe_round(dr.metrics.cer),
-                "wer": safe_round(dr.metrics.wer),
-                "mer": safe_round(dr.metrics.mer),
-                "wil": safe_round(dr.metrics.wil),
-            }
-            if dr.image_quality:
-                entry["quality_score"] = safe_round(dr.image_quality.get("quality_score", 0.5))
-                entry["sharpness"] = safe_round(dr.image_quality.get("sharpness_score", 0.5))
-            if dr.char_scores:
-                entry["ligature"] = safe_round(dr.char_scores.get("ligature", {}).get("score", 0.5))
-                entry["diacritic"] = safe_round(dr.char_scores.get("diacritic", {}).get("score", 0.5))
-            metrics_list.append(entry)
-        if metrics_list:
-            corr = compute_correlation_matrix(metrics_list)
-            correlation_per_engine.append({
-                "engine": report.engine_name,
-                **corr,
-            })
-    return correlation_per_engine
-
-
-__all__ = [
-    "build_pairwise_wilcoxon",
-    "build_bootstrap_cis",
-    "build_friedman_and_nemenyi",
-    "build_reliability_curves",
-    "build_venn_data",
-    "build_error_clusters",
-    "build_correlation_per_engine",
-]
+warnings.warn(
+    "picarones.report.report_data.statistics is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.data.statistics instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/snapshot.py b/picarones/report/snapshot.py
index 6042a2134894116ad2918c0756c935a657eedcfc..38438a0334dd1ef12e943986f454280b126034d9 100644
--- a/picarones/report/snapshot.py
+++ b/picarones/report/snapshot.py
@@ -1,266 +1,18 @@
-"""Snapshots de reproductibilité pour le rapport HTML (Sprint 27).
+"""``picarones.report.snapshot`` — shim re-export (déprécié, suppression 2.0).
 
-Le rapport HTML auto-contenu doit pouvoir être *rejoué* sans avoir
-accès au code source du moment où il a été généré : un lecteur en
-2026 doit pouvoir comprendre exactement quelle table de prix, quelle
-définition de métrique, quel profil de normalisation, et quelle
-version de Picarones ont produit les chiffres affichés.
-
-Avant le Sprint 27, le rapport intégrait uniquement
-``pareto.pricing_meta.last_updated`` — une simple date de mise à jour
-qui ne disait rien sur le contenu de la table. Si quelqu'un modifiait
-``picarones/data/pricing.yaml`` après génération, il était impossible
-de reconstituer ce qu'avait vu le lecteur du rapport.
-
-Quatre snapshots sont produits par ce module et embarqués dans
-``report_data.snapshots`` :
-
-- ``pricing``       — YAML brut intégral de la table de prix.
-- ``glossary``      — entrées du glossaire pour la langue du rapport.
-- ``normalization`` — profil de normalisation effectivement appliqué.
-- ``environment``   — version Picarones, Python, plateforme, commit git
-                      si dispo, liste figée des dépendances installées.
-
-Garanties
----------
-- **Déterminisme** : sur entrées identiques, ``snapshot_all()`` produit
-  un dict bit-à-bit identique. Les listes sont triées, les timestamps
-  sont absents.
-- **Pas d'effet de bord** : le module ne modifie aucun état global ;
-  les chemins YAML sont uniquement lus, jamais écrits.
-- **Dégradé non bloquant** : si pyyaml est absent, si ``pricing.yaml``
-  n'existe pas, si git n'est pas installé, le snapshot retourne un
-  dict ``{"available": False, "reason": "..."}`` plutôt que de lever.
+Canonique : :mod:`picarones.reports_v2.html.snapshot`.  Phase 5.E
+du retrait du legacy.
 """
 
 from __future__ import annotations
 
-import logging
-import platform
-import subprocess
-import sys
-from importlib.metadata import distributions
-from pathlib import Path
-from typing import Any, Optional
-
-from picarones import __version__
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Pricing snapshot
-# ---------------------------------------------------------------------------
-
-def pricing_snapshot(pricing_path: Optional[Path] = None) -> dict[str, Any]:
-    """Retourne le YAML brut + dict parsé de la table de prix utilisée.
-
-    Si ``pricing_path`` n'est pas fourni, utilise le chemin par défaut
-    de ``picarones.measurements.pricing._DEFAULT_PRICING_PATH``.
-    """
-    if pricing_path is None:
-        try:
-            from picarones.measurements.pricing import _DEFAULT_PRICING_PATH
-            pricing_path = _DEFAULT_PRICING_PATH
-        except ImportError:
-            return {"available": False, "reason": "module pricing introuvable"}
-
-    pricing_path = Path(pricing_path)
-    if not pricing_path.exists():
-        return {
-            "available": False,
-            "reason": f"pricing.yaml introuvable : {pricing_path}",
-            "expected_path": str(pricing_path),
-        }
-
-    try:
-        raw = pricing_path.read_text(encoding="utf-8")
-    except OSError as exc:
-        return {
-            "available": False,
-            "reason": f"lecture impossible : {exc}",
-            "expected_path": str(pricing_path),
-        }
-
-    try:
-        import yaml
-        data = yaml.safe_load(raw) or {}
-    except (ImportError, Exception) as exc:
-        # Pas de yaml ou parsing en échec — on garde le brut quand même.
-        logger.warning("[snapshot] parsing pricing.yaml échoué : %s", exc)
-        data = {}
-
-    return {
-        "available": True,
-        "source_path": str(pricing_path),
-        "filename": pricing_path.name,
-        "size_bytes": len(raw.encode("utf-8")),
-        "raw_yaml": raw,
-        "data": data,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Glossary snapshot
-# ---------------------------------------------------------------------------
-
-def glossary_snapshot(
-    lang: str = "fr",
-    used_keys: Optional[list[str] | set[str]] = None,
-) -> dict[str, Any]:
-    """Retourne les entrées du glossaire qui figurent dans le rapport.
-
-    ``used_keys`` permet de ne snapshotter que les termes effectivement
-    référencés (réduit la taille). ``None`` → toutes les entrées de la
-    langue (mode conservateur).
-    """
-    try:
-        from picarones.reports_v2.glossary import load_glossary, SUPPORTED_LANGS
-    except ImportError:
-        return {"available": False, "reason": "module glossary introuvable"}
-
-    full = load_glossary(lang) or {}
-    if not full:
-        return {
-            "available": False,
-            "reason": f"aucune entrée pour lang={lang!r}",
-            "supported_langs": SUPPORTED_LANGS,
-        }
-
-    if used_keys is not None:
-        keys = set(used_keys)
-        entries = {k: v for k, v in full.items() if k in keys}
-    else:
-        entries = dict(full)
-
-    # Tri pour reproductibilité bit-à-bit.
-    entries_sorted = {k: entries[k] for k in sorted(entries)}
-
-    return {
-        "available": True,
-        "lang": lang,
-        "entry_count": len(entries_sorted),
-        "entries": entries_sorted,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Normalization profile snapshot
-# ---------------------------------------------------------------------------
-
-def normalization_snapshot(profile: Any) -> dict[str, Any]:
-    """Sérialise un ``NormalizationProfile``.
-
-    Couvre les profils built-in (``medieval_french``, ``nfc``, …) et les
-    profils custom YAML chargés au runtime — l'objectif est qu'un
-    lecteur du rapport puisse régénérer exactement la même
-    normalisation à partir de ce snapshot.
-    """
-    if profile is None:
-        return {"available": False, "reason": "aucun profil fourni"}
-
-    # NormalizationProfile est un dataclass — on accède aux champs par
-    # nom plutôt que via ``asdict`` pour bien contrôler le format.
-    try:
-        return {
-            "available": True,
-            "name": getattr(profile, "name", "unknown"),
-            "nfc": bool(getattr(profile, "nfc", True)),
-            "caseless": bool(getattr(profile, "caseless", False)),
-            "diplomatic_table": dict(getattr(profile, "diplomatic_table", {}) or {}),
-            "exclude_chars": sorted(getattr(profile, "exclude_chars", set()) or set()),
-            "description": getattr(profile, "description", ""),
-        }
-    except Exception as exc:
-        return {"available": False, "reason": f"sérialisation échouée : {exc}"}
-
-
-# ---------------------------------------------------------------------------
-# Environment snapshot
-# ---------------------------------------------------------------------------
-
-def _git_commit(repo_path: Optional[Path] = None) -> Optional[str]:
-    """Retourne le commit git court (12 chars) si on est dans un repo, sinon None."""
-    cwd = repo_path or Path(__file__).resolve().parents[2]
-    try:
-        out = subprocess.check_output(
-            ["git", "rev-parse", "HEAD"],
-            cwd=str(cwd),
-            stderr=subprocess.DEVNULL,
-            text=True,
-            timeout=2,
-        ).strip()
-        return out[:12] if out else None
-    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
-        return None
-
-
-def _installed_packages(limit: int = 200) -> list[str]:
-    """Liste figée des paquets installés au format ``name==version``.
-
-    Triée par nom (case-insensitive) pour reproductibilité. Cappée à
-    ``limit`` paquets pour ne pas exploser le poids du rapport.
-    """
-    try:
-        pkgs: list[str] = []
-        seen: set[str] = set()
-        for d in distributions():
-            try:
-                name = (d.metadata.get("Name") or "").strip()
-                version = (d.version or "").strip()
-            except Exception:
-                continue
-            if not name or name.lower() in seen:
-                continue
-            seen.add(name.lower())
-            pkgs.append(f"{name}=={version}")
-        pkgs.sort(key=str.lower)
-        return pkgs[:limit]
-    except Exception as exc:  # pragma: no cover — défense en profondeur
-        logger.warning("[snapshot] enum dépendances échoué : %s", exc)
-        return []
-
-
-def environment_snapshot(repo_path: Optional[Path] = None) -> dict[str, Any]:
-    """Retourne version Picarones, Python, plateforme, commit, deps figées."""
-    return {
-        "available": True,
-        "picarones_version": __version__,
-        "python_version": platform.python_version(),
-        "python_implementation": platform.python_implementation(),
-        "platform": platform.platform(),
-        "executable": sys.executable,
-        "git_commit": _git_commit(repo_path),
-        "installed_packages": _installed_packages(),
-    }
-
-
-# ---------------------------------------------------------------------------
-# API agrégée
-# ---------------------------------------------------------------------------
-
-def snapshot_all(
-    *,
-    lang: str = "fr",
-    glossary_used_keys: Optional[list[str] | set[str]] = None,
-    pricing_path: Optional[Path] = None,
-    normalization_profile: Any = None,
-    repo_path: Optional[Path] = None,
-) -> dict[str, Any]:
-    """Construit le bloc ``snapshots`` à embarquer dans ``report_data``."""
-    return {
-        "pricing": pricing_snapshot(pricing_path=pricing_path),
-        "glossary": glossary_snapshot(lang=lang, used_keys=glossary_used_keys),
-        "normalization": normalization_snapshot(normalization_profile),
-        "environment": environment_snapshot(repo_path=repo_path),
-        "schema_version": 1,
-    }
+import warnings
 
+from picarones.reports_v2.html.snapshot import *  # noqa: F401, F403
 
-__all__ = [
-    "pricing_snapshot",
-    "glossary_snapshot",
-    "normalization_snapshot",
-    "environment_snapshot",
-    "snapshot_all",
-]
+warnings.warn(
+    "picarones.report.snapshot is deprecated and will be removed in 2.0.  "
+    "Import from picarones.reports_v2.html.snapshot instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/picarones/report/templates/_critical_difference.html b/picarones/report/templates/_critical_difference.html
deleted file mode 100644
index 5b501b9a01d20f94a6b5993011df6e6cc7deec46..0000000000000000000000000000000000000000
--- a/picarones/report/templates/_critical_difference.html
+++ /dev/null
@@ -1,39 +0,0 @@
-<!-- ── Critical Difference Diagram (Sprint 17) ─────────────────────── -->
-<section class="cdd-card" aria-labelledby="cdd-title">
-  <header class="cdd-header">
-    <h2 id="cdd-title" data-i18n="cdd_title">Test multi-moteurs — Friedman &amp; Nemenyi</h2>
-    <button type="button" class="cdd-info-btn" aria-label="Comment lire ce diagramme"
-            onclick="toggleCDDHelp()" title="Aide">?</button>
-  </header>
-
-  {% if friedman.error %}
-    <p class="cdd-note cdd-note-muted">{{ friedman.interpretation }}</p>
-  {% else %}
-    <p class="cdd-friedman">
-      <strong>Friedman</strong> :
-      Q = {{ "%.3f"|format(friedman.statistic) }},
-      df = {{ friedman.df }},
-      <em>n</em> = {{ friedman.n_blocks }} documents,
-      <em>k</em> = {{ friedman.n_engines }} moteurs,
-      p = {{ "%.4f"|format(friedman.p_value) }}
-      {% if friedman.significant %}
-        <span class="cdd-badge cdd-badge-sig" title="Différence globale significative">p &lt; 0,05</span>
-      {% else %}
-        <span class="cdd-badge cdd-badge-nsig" title="Pas de différence globale détectée">p ≥ 0,05</span>
-      {% endif %}
-    </p>
-    <div class="cdd-svg-wrapper">
-      {{ critical_difference_svg | safe }}
-    </div>
-  {% endif %}
-
-  <div id="cdd-help" class="cdd-help" hidden>
-    <p><strong data-i18n="cdd_help_title">Comment lire ce diagramme ?</strong></p>
-    <ul>
-      <li><span data-i18n="cdd_help_axis">L'axe horizontal montre le rang moyen de chaque moteur (1 = meilleur, k = pire).</span></li>
-      <li><span data-i18n="cdd_help_bars">Les barres horizontales épaisses relient les moteurs statistiquement <em>indiscernables</em> au seuil α = 0,05 (test post-hoc de Nemenyi).</span></li>
-      <li><span data-i18n="cdd_help_cd">La barre rouge <em>CD</em> en haut à gauche donne la distance critique de référence : deux moteurs dont les rangs moyens diffèrent de moins que <em>CD</em> ne peuvent pas être distingués.</span></li>
-      <li><span data-i18n="cdd_help_ref">Référence : Demšar (2006), <em>Statistical Comparisons of Classifiers over Multiple Data Sets</em>, JMLR 7:1-30.</span></li>
-    </ul>
-  </div>
-</section>
diff --git a/picarones/report/templates/_footer.html b/picarones/report/templates/_footer.html
deleted file mode 100644
index b344805df0f9ef438d47f60da893a18a1ca1825b..0000000000000000000000000000000000000000
--- a/picarones/report/templates/_footer.html
+++ /dev/null
@@ -1,6 +0,0 @@
-</main>
-
-<footer>
-  <span data-i18n="footer_by">par Picarones</span> v{{ picarones_version }}
-  — <span id="footer-date"></span>
-</footer>
diff --git a/picarones/report/templates/_header.html b/picarones/report/templates/_header.html
deleted file mode 100644
index 9e69a41e76879e990c9dd1f59a6b9b58472697b2..0000000000000000000000000000000000000000
--- a/picarones/report/templates/_header.html
+++ /dev/null
@@ -1,35 +0,0 @@
-
-<!-- ── Skip-to-content (Sprint A6, B-10) ───────────────────────────────
-     Lien WCAG 2.4.1 (Bypass Blocks) — premier enfant tabbable du body,
-     visible uniquement au focus, saute la nav et le bandeau pour
-     l'utilisateur clavier ou lecteur d'écran. -->
-<a class="skip-link" href="#main" data-i18n="skip_to_content">Aller au contenu</a>
-
-<!-- ── Navigation ─────────────────────────────────────────────────── -->
-<nav>
-  <div class="brand">
-    Picarones
-    <span data-i18n="nav_report">| rapport OCR</span>
-  </div>
-  <div class="tabs">
-    <button class="tab-btn active" onclick="showView('ranking')" data-i18n="tab_ranking">Classement</button>
-    <button class="tab-btn" onclick="showView('gallery')" data-i18n="tab_gallery">Galerie</button>
-    <button class="tab-btn" onclick="showView('document')" data-i18n="tab_document">Document</button>
-    <button class="tab-btn" onclick="showView('characters')" data-i18n="tab_characters">Caractères</button>
-    <button class="tab-btn" onclick="showView('analyses')" data-i18n="tab_analyses">Analyses</button>
-  </div>
-  <div class="meta" id="nav-meta">—</div>
-  <button class="btn-export-csv" onclick="exportCSV()" title="⬇ CSV">⬇ CSV</button>
-  <button class="btn-customize" id="btn-customize" onclick="openCustomize()"
-          title="Mode avancé" data-i18n="btn_customize">⚙ Avancé</button>
-  <button class="btn-present" id="btn-present" onclick="togglePresentMode()" data-i18n="btn_present">⊞ Présentation</button>
-</nav>
-
-<!-- ── Bandeau exclusion globale ───────────────────────────────────── -->
-<div id="global-exclusion-banner" style="display:none;background:#fef3c7;border-bottom:2px solid #f59e0b;padding:.5rem 1.5rem;font-size:.85rem;font-weight:600;color:#92400e;text-align:center">
-  <span id="global-exclusion-text"></span>
-  <button onclick="resetAllExclusions()" data-i18n="reset_all" style="margin-left:1rem;font-size:.75rem;padding:.15rem .5rem;border:1px solid #d97706;background:#fff;border-radius:.25rem;cursor:pointer">Réinitialiser</button>
-</div>
-
-<!-- ── Main (Sprint A6, B-10 : id=main pour le skip-link) ──────────── -->
-<main id="main" role="main">
diff --git a/picarones/report/templates/_narrative_summary.html b/picarones/report/templates/_narrative_summary.html
deleted file mode 100644
index 7d16c58ef6671b5a838ea58a503a04376951f31b..0000000000000000000000000000000000000000
--- a/picarones/report/templates/_narrative_summary.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!-- ── Synthèse factuelle (Sprint 18) ─────────────────────────────── -->
-{% if synthesis and synthesis.sentences %}
-<section class="synth-card" aria-labelledby="synth-title">
-  <header class="synth-header">
-    <h2 id="synth-title" data-i18n="synth_title">Synthèse factuelle</h2>
-    <span class="synth-hint" data-i18n="synth_hint">
-      Générée mécaniquement depuis les résultats — aucun LLM, reproductible.
-    </span>
-  </header>
-  <ul class="synth-list">
-    {% for sentence in synthesis.sentences %}
-    <li>{{ sentence }}</li>
-    {% endfor %}
-  </ul>
-  <p class="synth-cases-link" data-i18n="synth_cases_link">
-    Pour comprendre comment d'autres équipes ont raisonné sur des problèmes
-    similaires, voir
-    <a href="https://github.com/maribakulj/Picarones/tree/main/docs/case-studies"
-       target="_blank" rel="noopener">les études de cas</a>.
-  </p>
-</section>
-{% endif %}
diff --git a/picarones/report/templates/_side_panels.html b/picarones/report/templates/_side_panels.html
deleted file mode 100644
index a6b35a140ede0d2f23ba6c37e5327f9d455c34ab..0000000000000000000000000000000000000000
--- a/picarones/report/templates/_side_panels.html
+++ /dev/null
@@ -1,76 +0,0 @@
-<!-- ── Panneau latéral glossaire (Sprint 20) ─────────────────────── -->
-<aside id="glossary-panel" class="side-panel" hidden aria-hidden="true"
-       aria-labelledby="glossary-panel-title">
-  <header class="side-panel-header">
-    <h3 id="glossary-panel-title" class="side-panel-title">Glossaire</h3>
-    <button type="button" class="side-panel-close" aria-label="Fermer"
-            onclick="closeGlossary()">×</button>
-  </header>
-  <div id="glossary-panel-body" class="side-panel-body"></div>
-</aside>
-
-<!-- ── Panneau latéral personnalisation (Sprint 20) ──────────────── -->
-<aside id="customize-panel" class="side-panel" hidden aria-hidden="true"
-       aria-labelledby="customize-panel-title">
-  <header class="side-panel-header">
-    <h3 id="customize-panel-title" class="side-panel-title"
-        data-i18n="customize_title">Mode avancé — personnalisation</h3>
-    <button type="button" class="side-panel-close" aria-label="Fermer"
-            onclick="closeCustomize()">×</button>
-  </header>
-  <div class="side-panel-body">
-
-    <section class="custom-section">
-      <h4 data-i18n="customize_columns">Colonnes visibles</h4>
-      <div id="customize-columns-list" class="custom-col-list"></div>
-    </section>
-
-    <section class="custom-section">
-      <h4 data-i18n="customize_filters">Filtres par strate</h4>
-      <div id="customize-filters-list" class="custom-filters-list">
-        <p class="custom-note" data-i18n="customize_filters_empty">
-          Aucune strate détectée dans les métadonnées du corpus.
-        </p>
-      </div>
-    </section>
-
-    <section class="custom-section">
-      <h4>
-        <span data-i18n="customize_weights">Score composite personnel</span>
-        <button type="button" class="custom-weights-toggle" id="custom-weights-toggle"
-                onclick="toggleCustomWeights()" data-i18n="customize_weights_enable">
-          Activer
-        </button>
-      </h4>
-      <p class="custom-warning" data-i18n="customize_weights_warning">
-        Ces poids reflètent votre cas d'usage. Il n'existe pas de pondération
-        universellement valide — Picarones ne suggère aucune pondération par défaut.
-      </p>
-      <div id="custom-weights-controls" hidden>
-        <div id="custom-weights-list"></div>
-        <div class="custom-formula" id="custom-formula"></div>
-      </div>
-    </section>
-
-    <!-- Sprint A7 (m-5) — toggle palette daltonien-friendly. -->
-    <section class="custom-section">
-      <h4 data-i18n="palette_toggle">Mode daltonien-friendly</h4>
-      <p class="custom-warning" data-i18n="palette_toggle_help">
-        Bascule la palette du rapport vers Okabe-Ito (palette WCAG AA
-        recommandée pour la déficience de la vision des couleurs).
-      </p>
-      <label class="palette-toggle-row">
-        <input type="checkbox" id="palette-toggle-cb"
-               onchange="togglePalette(this.checked)"
-               aria-describedby="palette-toggle-desc">
-        <span id="palette-toggle-desc" data-i18n="palette_toggle">Mode daltonien-friendly</span>
-      </label>
-    </section>
-
-    <section class="custom-section">
-      <button type="button" class="custom-reset" onclick="resetCustomization()"
-              data-i18n="customize_reset">Réinitialiser la vue</button>
-    </section>
-
-  </div>
-</aside>
diff --git a/picarones/report/templates/view_analyses.html b/picarones/report/templates/view_analyses.html
deleted file mode 100644
index 513fc9b8d6d980a7ac8f1dedf7e2fdb40268902d..0000000000000000000000000000000000000000
--- a/picarones/report/templates/view_analyses.html
+++ /dev/null
@@ -1,326 +0,0 @@
-
-<!-- ════ Vue 4 : Analyses ══════════════════════════════════════════ -->
-<div id="view-analyses" class="view">
-  <div class="charts-grid">
-
-    <div class="chart-card">
-      <h3 data-i18n="h_cer_dist">Distribution du CER par moteur</h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-cer-hist" role="img" aria-label="Distribution des CER par moteur" data-a11y-label="Distribution des CER par moteur"></canvas>
-      </div>
-    </div>
-
-    <div class="chart-card">
-      <h3 data-i18n="h_radar">Profil des moteurs (radar)</h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-radar" role="img" aria-label="Profil radar par moteur" data-a11y-label="Profil radar par moteur"></canvas>
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.5rem" data-i18n="radar_note">
-        Axe radar : CER, WER, MER, WIL — valeurs inversées (plus c'est haut, meilleur est le moteur).
-      </div>
-    </div>
-
-    <div class="chart-card">
-      <h3 data-i18n="h_cer_doc">CER par document (tous moteurs)</h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-cer-doc" role="img" aria-label="CER par document" data-a11y-label="CER par document"></canvas>
-      </div>
-    </div>
-
-    <div class="chart-card">
-      <h3 data-i18n="h_duration">Temps d'exécution moyen (secondes/document)</h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-duration" role="img" aria-label="Durée d'inférence par moteur" data-a11y-label="Durée d'inférence par moteur"></canvas>
-      </div>
-    </div>
-
-    <div class="chart-card">
-      <h3 data-i18n="h_quality_cer">Qualité image ↔ CER (scatter plot)</h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-quality-cer" role="img" aria-label="Corrélation qualité d'image / CER" data-a11y-label="Corrélation qualité d'image / CER"></canvas>
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="quality_cer_note">
-        Chaque point = un document. Axe X = score qualité image [0–1]. Axe Y = CER. Corrélation négative attendue.
-      </div>
-    </div>
-
-    <div class="chart-card" style="grid-column:1/-1">
-      <h3 data-i18n="h_taxonomy">Taxonomie des erreurs par moteur</h3>
-      <div class="chart-canvas-wrap" style="max-height:300px">
-        <canvas id="chart-taxonomy" role="img" aria-label="Taxonomie d'erreurs par moteur" data-a11y-label="Taxonomie d'erreurs par moteur"></canvas>
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="taxonomy_note">
-        Distribution des classes d'erreurs (classes 1–9 de la taxonomie Picarones).
-      </div>
-    </div>
-
-    <!-- Sprint 7 — Courbe de fiabilité -->
-    <div class="chart-card" style="grid-column:1/-1">
-      <h3 data-i18n="h_reliability">Courbes de fiabilité</h3>
-      <div class="chart-canvas-wrap" style="max-height:300px">
-        <canvas id="chart-reliability" role="img" aria-label="Diagramme de fiabilité (calibration)" data-a11y-label="Diagramme de fiabilité (calibration)"></canvas>
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="reliability_note">
-        Pour les X% documents les plus faciles (triés par CER croissant), quel est le CER moyen cumulé ?
-        Une courbe basse = moteur performant même sur les documents faciles.
-      </div>
-    </div>
-
-    <!-- Sprint 7 — Intervalles de confiance -->
-    <div class="chart-card">
-      <h3 data-i18n="h_bootstrap">Intervalles de confiance à 95 % (bootstrap)</h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-bootstrap-ci" role="img" aria-label="Intervalles de confiance bootstrap" data-a11y-label="Intervalles de confiance bootstrap"></canvas>
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="bootstrap_note">
-        IC à 95% sur le CER moyen par moteur (1000 itérations bootstrap).
-      </div>
-    </div>
-
-    <!-- Sprint 7 — Diagramme de Venn -->
-    <div class="chart-card">
-      <h3 data-i18n="h_venn">Erreurs communes / exclusives (Venn)</h3>
-      <div id="venn-container" style="min-height:260px;display:flex;align-items:center;justify-content:center"></div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem technical" data-i18n="venn_note">
-        Intersection des ensembles d'erreurs entre les 2 ou 3 premiers concurrents.
-        Erreurs communes = segments partagés.
-      </div>
-    </div>
-
-    <!-- Sprint 7 — Tests de Wilcoxon -->
-    <div class="chart-card technical">
-      <h3 data-i18n="h_pairwise">Tests de Wilcoxon — comparaisons par paires</h3>
-      <div id="wilcoxon-table-container" style="overflow-x:auto"></div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="pairwise_note">
-        Test signé-rangé de Wilcoxon (non-paramétrique). Seuil α = 0.05.
-      </div>
-    </div>
-
-    <!-- Sprint 7 — Clustering des erreurs -->
-    <div class="chart-card" style="grid-column:1/-1">
-      <h3 data-i18n="h_clusters">Clustering des patterns d'erreurs</h3>
-      <div id="error-clusters-container"></div>
-    </div>
-
-    <!-- Sprint 10 — Scatter Gini vs CER moyen -->
-    <div class="chart-card">
-      <h3 data-i18n="h_gini_cer">Gini vs CER moyen <span style="font-size:.72rem;font-weight:400;color:var(--text-muted)" data-i18n="gini_cer_ideal">— idéal : bas-gauche</span></h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-gini-cer" role="img" aria-label="Gini vs CER" data-a11y-label="Gini vs CER"></canvas>
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="gini_cer_note">
-        Axe X = CER moyen, Axe Y = coefficient de Gini. Un moteur idéal a CER bas ET Gini bas (erreurs rares et uniformes).
-      </div>
-    </div>
-
-    <!-- Sprint 10 — Scatter ratio longueur vs ancrage -->
-    <div class="chart-card">
-      <h3 data-i18n="h_ratio_anchor">Ratio longueur vs ancrage <span style="font-size:.72rem;font-weight:400;color:var(--text-muted)" data-i18n="ratio_anchor_subtitle">— hallucinations VLM</span></h3>
-      <div class="chart-canvas-wrap">
-        <canvas id="chart-ratio-anchor" role="img" aria-label="Score d'ancrage par moteur" data-a11y-label="Score d'ancrage par moteur"></canvas>
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="ratio_anchor_note">
-        Axe X = score d'ancrage trigrammes [0–1]. Axe Y = ratio longueur sortie/GT.
-        Zone ⚠️ : ancrage &lt; 0.5 ou ratio &gt; 1.2 → hallucinations probables.
-      </div>
-    </div>
-
-    <!-- Sprint 19 — Vue Pareto coût/qualité ────────────────────────── -->
-    <div class="chart-card pareto-card" style="grid-column:1/-1">
-      <h3 data-i18n="h_pareto">Compromis qualité / coût</h3>
-      <div class="pareto-toolbar">
-        <button class="pareto-toggle active" data-axis="cost" onclick="setParetoAxis('cost')"
-                data-i18n="pareto_axis_cost">Coût € / 1000 pages</button>
-        <button class="pareto-toggle" data-axis="speed" onclick="setParetoAxis('speed')"
-                data-i18n="pareto_axis_speed">Vitesse (s / page)</button>
-        <button class="pareto-toggle pareto-experimental" data-axis="co2"
-                onclick="setParetoAxis('co2')" data-i18n="pareto_axis_co2"
-                title="Estimation expérimentale">Carbone (g CO₂)</button>
-      </div>
-      <div class="chart-canvas-wrap"><canvas id="pareto-chart" role="img" aria-label="Front Pareto coût/qualité" data-a11y-label="Front Pareto coût/qualité"></canvas></div>
-      <div id="pareto-method-note" class="pareto-note" data-i18n="pareto_note">
-        Les moteurs sur la frontière de Pareto (en évidence) sont ceux pour
-        lesquels aucun autre moteur n'offre simultanément un meilleur CER ET
-        un meilleur coût. Prix indicatifs (table interne, datée). Le mode
-        carbone est expérimental.
-      </div>
-      <details class="pareto-assumptions">
-        <summary data-i18n="pareto_assumptions_summary">Hypothèses détaillées par moteur</summary>
-        <ul id="pareto-assumptions-list"></ul>
-      </details>
-    </div>
-
-    <!-- Sprint 43 — Calibration des moteurs (ECE, MCE, reliability diagram) -->
-    {% if calibration_summary_html or reliability_diagrams_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      <h3 data-i18n="h_calibration">Calibration des moteurs</h3>
-      <div class="calibration-grid"
-           style="display:grid;gap:1.2rem;align-items:start">
-        {% if calibration_summary_html %}
-        <div>{{ calibration_summary_html }}</div>
-        {% endif %}
-        {% if reliability_diagrams_html %}
-        <div>{{ reliability_diagrams_html }}</div>
-        {% endif %}
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.6rem"
-           data-i18n="calibration_note">
-        ECE (Expected Calibration Error) : moyenne pondérée des écarts
-        |confiance − précision| par bin. Plus l'ECE est bas, plus le
-        moteur est honnête sur sa fiabilité — la diagonale du diagramme
-        représente la calibration parfaite. Un ECE élevé signale qu'on
-        ne peut pas se fier au score de confiance pour cibler la
-        relecture humaine.
-      </div>
-    </div>
-    {% endif %}
-
-    <!-- Sprint 41 — Précision sur entités nommées (NER) -->
-    {% if ner_summary_html or ner_per_category_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      <h3 data-i18n="h_ner">Précision sur entités nommées</h3>
-      <div class="ner-grid"
-           style="display:grid;gap:1.2rem;align-items:start">
-        {% if ner_summary_html %}
-        <div>{{ ner_summary_html }}</div>
-        {% endif %}
-        {% if ner_per_category_html %}
-        <div>{{ ner_per_category_html }}</div>
-        {% endif %}
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.6rem"
-           data-i18n="ner_note">
-        F1 calculé par alignement IoU ≥ 0,5 sur les spans (labels
-        case-insensitive). Plus le F1 est haut, plus le moteur restitue
-        fidèlement les entités nommées (personnes, lieux, dates) — ce
-        qui prédit l'utilité aval pour l'indexation prosopographique.
-        Cette métrique mesure conjointement OCR + extracteur NER ; le
-        modèle d'extraction lui-même peut halluciner.
-      </div>
-    </div>
-    {% endif %}
-
-    <!-- Sprint 62 — Profil philologique (6 sections : unicode_blocks,
-         abbreviations, mufi, early_modern, modern_archives, roman_numerals).
-         Adaptive : n'apparaît que si au moins un module a du signal. -->
-    {% if philological_profile_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ philological_profile_html }}
-    </div>
-    {% endif %}
-
-    <!-- Sprint 86 — A.II.5 : recherchabilité fuzzy + précision sur
-         séquences numériques. Adaptive : n'apparaît que si au moins
-         un moteur a du signal. -->
-    {% if searchability_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ searchability_html }}
-    </div>
-    {% endif %}
-    {% if numerical_sequences_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ numerical_sequences_html }}
-    </div>
-    {% endif %}
-
-    <!-- Sprint 87 — A.II.2 : lisibilité (delta Flesch). Adaptive :
-         n'apparaît que si au moins un moteur a du signal. -->
-    {% if readability_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ readability_html }}
-    </div>
-    {% endif %}
-
-    <!-- Sprint 89 — A.II.8b : spécialisation inter-moteurs.
-         Adaptive : n'apparaît que si ≥ 2 moteurs avec taxonomie. -->
-    {% if specialization_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ specialization_html }}
-    </div>
-    {% endif %}
-
-    <!-- Sprint 37 — Analyse inter-moteurs (divergence taxonomique + oracle gap) -->
-    {% if divergence_matrix_html or oracle_gap_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      <h3 data-i18n="h_inter_engine">Analyse inter-moteurs</h3>
-      <div class="inter-engine-grid"
-           style="display:grid;grid-template-columns:1fr 1fr;gap:1.2rem;align-items:start">
-        {% if divergence_matrix_html %}
-        <div>{{ divergence_matrix_html }}</div>
-        {% endif %}
-        {% if oracle_gap_html %}
-        <div>{{ oracle_gap_html }}</div>
-        {% endif %}
-      </div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.6rem"
-           data-i18n="inter_engine_note">
-        Plus la divergence est élevée, plus deux moteurs se trompent sur des
-        classes d'erreurs différentes — ils sont alors candidats à un voting
-        ensemble. L'oracle est la borne supérieure du recall token-level
-        atteignable par ce voting (proxy bag-of-words).
-      </div>
-    </div>
-    {% endif %}
-
-    <!-- Chantier 3 (post-Sprint 97) — vues thématiques composées
-         qui regroupent les renderers orphelins en sections
-         collapsibles. Adaptive : ne s'affichent que si la vue
-         retourne du contenu (au moins une sous-section avec signal). -->
-    {% if economics_view_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ economics_view_html }}
-    </div>
-    {% endif %}
-    {% if advanced_taxonomy_view_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ advanced_taxonomy_view_html }}
-    </div>
-    {% endif %}
-    {% if diagnostics_view_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ diagnostics_view_html }}
-    </div>
-    {% endif %}
-
-    <!-- Sprint « câblage des modules test-only » (mai 2026) :
-         4 sections issues de ``report_data.extra_metrics``.
-         Adaptive : ne s'affichent que si le calcul a remonté du signal. -->
-    {% if rare_token_recall_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ rare_token_recall_html }}
-    </div>
-    {% endif %}
-    {% if taxonomy_cooccurrence_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ taxonomy_cooccurrence_html }}
-    </div>
-    {% endif %}
-    {% if taxonomy_intra_doc_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ taxonomy_intra_doc_html }}
-    </div>
-    {% endif %}
-    {% if marginal_cost_html %}
-    <div class="chart-card" style="grid-column:1/-1">
-      {{ marginal_cost_html }}
-    </div>
-    {% endif %}
-
-    <!-- Sprint 7 — Matrice de corrélation -->
-    <div class="chart-card technical" style="grid-column:1/-1">
-      <h3 data-i18n="h_correlation">Matrice de corrélation entre métriques</h3>
-      <div style="margin-bottom:.5rem">
-        <label style="font-size:.82rem;font-weight:600"><span data-i18n="corr_engine_label">Moteur :</span>
-          <select id="corr-engine-select" onchange="renderCorrelationMatrix()"
-            style="padding:.25rem .5rem;border-radius:6px;border:1px solid var(--border);margin-left:.25rem"></select>
-        </label>
-      </div>
-      <div id="corr-matrix-container" style="overflow-x:auto"></div>
-      <div style="font-size:.72rem;color:var(--text-muted);margin-top:.4rem" data-i18n="corr_note">
-        Coefficient de Pearson entre les métriques CER, WER, qualité image, ligatures, diacritiques.
-        Vert = corrélation positive, Rouge = corrélation négative.
-      </div>
-    </div>
-
-  </div>
-</div>
diff --git a/picarones/report/templates/view_characters.html b/picarones/report/templates/view_characters.html
deleted file mode 100644
index 9641eeacf35fec0a17eee05c946f58b309b78e9e..0000000000000000000000000000000000000000
--- a/picarones/report/templates/view_characters.html
+++ /dev/null
@@ -1,32 +0,0 @@
-<!-- ════ Vue 5 : Caractères ════════════════════════════════════════ -->
-<div id="view-characters" class="view">
-  <div class="card">
-    <h2 data-i18n="h_characters">Analyse des caractères</h2>
-
-    <!-- Sélecteur de moteur -->
-    <div class="stat-row" style="margin-bottom:1rem">
-      <label for="char-engine-select" style="font-weight:600;margin-right:.5rem" data-i18n="char_engine_label">Moteur :</label>
-      <select id="char-engine-select" onchange="renderCharView()"
-        style="padding:.35rem .7rem;border-radius:6px;border:1px solid var(--border)"></select>
-    </div>
-
-    <!-- Scores ligatures / diacritiques -->
-    <div class="stat-row" id="char-scores-row" style="gap:1.5rem;margin-bottom:1.5rem"></div>
-
-    <!-- Matrice de confusion unicode -->
-    <h3 style="margin-bottom:.75rem">Matrice de confusion unicode
-      <span style="font-size:.75rem;font-weight:400;color:var(--text-muted)">
-        — substitutions les plus fréquentes (caractère GT → caractère OCR)
-      </span>
-    </h3>
-    <div id="confusion-heatmap" style="overflow-x:auto;margin-bottom:1.5rem"></div>
-
-    <!-- Détail ligatures par type -->
-    <h3 style="margin-bottom:.75rem">Reconnaissance des ligatures</h3>
-    <div id="ligature-detail" style="margin-bottom:1.5rem"></div>
-
-    <!-- Taxonomie détaillée -->
-    <h3 style="margin-bottom:.75rem">Distribution taxonomique des erreurs</h3>
-    <div id="taxonomy-detail"></div>
-  </div>
-</div>
diff --git a/picarones/report/templates/view_document.html b/picarones/report/templates/view_document.html
deleted file mode 100644
index 65b653b63493733fc687c89c25f4a253c30c8dff..0000000000000000000000000000000000000000
--- a/picarones/report/templates/view_document.html
+++ /dev/null
@@ -1,83 +0,0 @@
-
-<!-- ════ Vue 3 : Document ══════════════════════════════════════════ -->
-<div id="view-document" class="view">
-  <div class="doc-layout">
-    <!-- Sidebar -->
-    <aside class="doc-sidebar">
-      <div class="doc-sidebar-header" data-i18n="doc_sidebar_header">Documents</div>
-      <div id="doc-list"></div>
-    </aside>
-
-    <!-- Contenu principal -->
-    <div>
-      <div class="card" id="doc-detail-header">
-        <div style="display:flex; align-items:baseline; justify-content:space-between; flex-wrap:wrap; gap:.5rem">
-          <h2 id="doc-detail-title" data-i18n="doc_title_default">Sélectionner un document</h2>
-          <div class="stat-row" id="doc-detail-metrics"></div>
-        </div>
-      </div>
-
-      <!-- Image zoomable -->
-      <div class="card">
-        <h3 data-i18n="h_image">Image originale</h3>
-        <div class="doc-image-wrap" id="doc-image-wrap"
-          onwheel="handleZoom(event)"
-          onmousedown="startDrag(event)"
-          onmousemove="doDrag(event)"
-          onmouseup="endDrag()"
-          onmouseleave="endDrag()">
-          <div class="doc-image-placeholder" id="doc-image-placeholder">
-            <span style="font-size:2rem">🖼</span>
-            <span>Sélectionnez un document</span>
-          </div>
-          <img id="doc-image" src="" alt="Image du document" style="display:none">
-          <div class="zoom-controls">
-            <button class="zoom-btn" onclick="zoom(1.25)" title="Zoom +">+</button>
-            <button class="zoom-btn" onclick="zoom(0.8)"  title="Zoom −">−</button>
-            <button class="zoom-btn" onclick="resetZoom()" title="Réinitialiser">↺</button>
-          </div>
-        </div>
-      </div>
-
-      <!-- Diff côte à côte GT / OCR -->
-      <div class="card" id="doc-sidebyside-card">
-        <div class="sbs-header">
-          <h3 data-i18n="h_diff">Comparaison GT / OCR</h3>
-          <div class="sbs-engine-select" id="sbs-engine-select" style="display:none">
-            <label data-i18n="sbs_engine_label">Concurrent :</label>
-            <select id="sbs-engine-dropdown" onchange="renderSideBySide(currentDocId)"></select>
-          </div>
-        </div>
-        <div class="sbs-columns" id="sbs-columns">
-          <div class="sbs-col sbs-col-gt">
-            <div class="sbs-col-header sbs-gt-header">
-              <span>✓ Vérité terrain (GT)</span>
-            </div>
-            <div class="sbs-col-body" id="sbs-gt-body">—</div>
-          </div>
-          <div class="sbs-col sbs-col-ocr">
-            <div class="sbs-col-header sbs-ocr-header" id="sbs-ocr-header">
-              <span id="sbs-ocr-engine-name">OCR</span>
-              <span class="cer-badge" id="sbs-ocr-cer" style="display:none"></span>
-            </div>
-            <div class="sbs-col-body" id="sbs-ocr-body">—</div>
-          </div>
-        </div>
-        <!-- Pipeline triple-diff (affiché en dessous si applicable) -->
-        <div id="sbs-triple-diff" style="display:none"></div>
-      </div>
-
-      <!-- Sprint 10 — Distribution CER par ligne -->
-      <div class="card" id="doc-line-metrics-card" style="display:none">
-        <h3 data-i18n="h_line_metrics">Distribution des erreurs par ligne</h3>
-        <div id="doc-line-metrics-content"></div>
-      </div>
-
-      <!-- Sprint 10 — Hallucinations détectées -->
-      <div class="card" id="doc-hallucination-card" style="display:none">
-        <h3 data-i18n="h_hallucination">Analyse des hallucinations</h3>
-        <div id="doc-hallucination-content"></div>
-      </div>
-    </div>
-  </div>
-</div>
diff --git a/picarones/report/templates/view_gallery.html b/picarones/report/templates/view_gallery.html
deleted file mode 100644
index cbfec5e59d58a406753aae7a97711851b1241643..0000000000000000000000000000000000000000
--- a/picarones/report/templates/view_gallery.html
+++ /dev/null
@@ -1,35 +0,0 @@
-
-<!-- ════ Vue 2 : Galerie ═══════════════════════════════════════════ -->
-<div id="view-gallery" class="view">
-  <div class="card">
-    <h2 data-i18n="h_gallery">Galerie des documents</h2>
-    <div class="gallery-controls">
-      <label><span data-i18n="gallery_sort_label">Trier par :</span>
-        <select id="gallery-sort" onchange="renderGallery()">
-          <option value="doc_id" data-i18n-opt="gallery_sort_id">Identifiant</option>
-          <option value="mean_cer" data-i18n-opt="gallery_sort_cer">CER moyen</option>
-          <option value="difficulty_score" data-i18n-opt="gallery_sort_difficulty">Difficulté</option>
-          <option value="best_engine" data-i18n-opt="gallery_sort_best">Meilleur moteur</option>
-        </select>
-      </label>
-      <label><span data-i18n="gallery_filter_cer_label">Filtrer CER &gt;</span>
-        <input type="number" id="gallery-filter-cer" min="0" max="100" value="0" step="1"
-          style="width:60px" onchange="renderGallery()"> %
-      </label>
-      <label><span data-i18n="gallery_filter_engine_label">Moteur :</span>
-        <select id="gallery-engine-select" onchange="renderGallery()">
-          <option value="" data-i18n-opt="gallery_filter_all">Tous</option>
-        </select>
-      </label>
-      <button class="btn-secondary" onclick="resetGalleryExclusions()" id="gallery-reset-btn"
-        title="Réinitialiser toutes les exclusions manuelles" style="display:none">
-        ↺ Réinitialiser exclusions
-      </button>
-    </div>
-    <div id="gallery-exclusion-info" style="font-size:.82rem;color:var(--text-muted);margin:.4rem 0;display:none"></div>
-    <div id="gallery-grid" class="gallery-grid"></div>
-    <div id="gallery-empty" class="empty-state" style="display:none" data-i18n="gallery_empty">
-      Aucun document ne correspond aux filtres.
-    </div>
-  </div>
-</div>
diff --git a/picarones/report/templates/view_ranking.html b/picarones/report/templates/view_ranking.html
deleted file mode 100644
index 01dfdfa2ae509a9ecfac8aa321fd4590621b3490..0000000000000000000000000000000000000000
--- a/picarones/report/templates/view_ranking.html
+++ /dev/null
@@ -1,91 +0,0 @@
-
-<!-- ════ Vue 1 : Classement ════════════════════════════════════════ -->
-<div id="view-ranking" class="view active">
-  <div class="card">
-    <h2 data-i18n="h_ranking">Classement des moteurs</h2>
-    <div class="stat-row" id="ranking-stats"></div>
-    <div class="table-wrap">
-      <table id="ranking-table">
-        <thead>
-          <tr>
-            <th scope="col" data-col="rank" class="sortable sorted" data-dir="asc" data-i18n="col_rank">#<i class="sort-icon">↑</i></th>
-            <th scope="col" data-col="name" class="sortable" data-i18n="col_engine">Concurrent<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="cer"  class="sortable" data-glossary-key="cer" data-i18n="col_cer">CER exact<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="cer_diplomatic" class="sortable" id="th-cer-diplo" data-glossary-key="cer_diplomatic" data-i18n="col_cer_diplo">CER diplo.<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="wer"  class="sortable" data-glossary-key="wer" data-i18n="col_wer">WER<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="mer"  class="sortable" data-glossary-key="mer" data-i18n="col_mer">MER<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="wil"  class="sortable" data-glossary-key="wil" data-i18n="col_wil">WIL<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="ligature_score" class="sortable" id="th-ligatures" data-glossary-key="ligature_score" data-i18n="col_ligatures">Ligatures<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="diacritic_score" class="sortable" id="th-diacritics" data-glossary-key="diacritic_score" data-i18n="col_diacritics">Diacritiques<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="gini" class="sortable" id="th-gini" data-glossary-key="gini" data-i18n="col_gini">Gini<i class="sort-icon">↕</i></th>
-            <th scope="col" data-col="anchor_score" class="sortable" id="th-anchor" data-glossary-key="anchor_score" data-i18n="col_anchor">Ancrage<i class="sort-icon">↕</i></th>
-            <th scope="col" data-i18n="col_cer_median">CER médian</th>
-            <th scope="col" data-i18n="col_cer_min">CER min</th>
-            <th scope="col" data-i18n="col_cer_max">CER max</th>
-            <th scope="col" id="th-overnorm" data-i18n="col_overnorm">Sur-norm.</th>
-            <th scope="col" data-i18n="col_docs">Docs</th>
-          </tr>
-        </thead>
-        <tbody id="ranking-tbody"></tbody>
-      </table>
-    </div>
-    <div class="stat-row" style="margin-top:.75rem">
-      <div class="legend-row">
-        <span class="legend-dot" style="background:#16a34a"></span>CER &lt; 5 %
-      </div>
-      <div class="legend-row">
-        <span class="legend-dot" style="background:#ca8a04"></span>5–15 %
-      </div>
-      <div class="legend-row">
-        <span class="legend-dot" style="background:#ea580c"></span>15–30 %
-      </div>
-      <div class="legend-row">
-        <span class="legend-dot" style="background:#dc2626"></span>&gt; 30 %
-      </div>
-    </div>
-
-    <!-- Sprint 46 — vue stratifiée par script_type (rapport adaptatif :
-         section omise quand aucune strate n'est disponible) -->
-    {% if stratified_ranking_html %}
-    {{ stratified_ranking_html }}
-    {% endif %}
-  </div>
-
-  <!-- ── Métriques robustes ────────────────────────────────────── -->
-  <div class="card" id="robust-metrics-card">
-    <h2 data-i18n="h_robust">Analyse robuste (sans hallucinations)</h2>
-    <p style="font-size:.82rem;color:var(--text-muted);margin-bottom:.75rem" data-i18n="robust_desc">
-      Recalcule CER, WER, MER, WIL, Gini et ancrage en excluant les documents détectés comme hallucinés ou problématiques.
-      Cochez/décochez des documents dans la Galerie pour les exclure manuellement.
-    </p>
-    <div class="robust-controls">
-      <label>
-        <button class="robust-toggle" id="robust-cer-toggle" data-active="true"
-          onclick="toggleRobustCriterion('cer',this)">✓</button>
-        <span data-i18n="robust_cer_label">CER &gt; seuil :</span>
-        <input type="range" id="robust-cer" min="0" max="100" step="1" value="100"
-          oninput="document.getElementById('robust-cer-val').textContent=parseInt(this.value)+'%';_computeHallucinationExclusions();recalculateAll()">
-        <span id="robust-cer-val" class="slider-val">100%</span>
-      </label>
-      <label>
-        <button class="robust-toggle" id="robust-anchor-toggle" data-active="true"
-          onclick="toggleRobustCriterion('anchor',this)">✓</button>
-        <span data-i18n="robust_anchor_label">Ancrage &lt; seuil :</span>
-        <input type="range" id="robust-anchor" min="0" max="1" step="0.05" value="0.5"
-          oninput="document.getElementById('robust-anchor-val').textContent=parseFloat(this.value).toFixed(2);_computeHallucinationExclusions();recalculateAll()">
-        <span id="robust-anchor-val" class="slider-val">0.50</span>
-      </label>
-      <label>
-        <button class="robust-toggle" id="robust-ratio-toggle" data-active="true"
-          onclick="toggleRobustCriterion('ratio',this)">✓</button>
-        <span data-i18n="robust_ratio_label">Ratio longueur &gt; seuil :</span>
-        <input type="range" id="robust-ratio" min="1" max="3" step="0.1" value="1.5"
-          oninput="document.getElementById('robust-ratio-val').textContent=parseFloat(this.value).toFixed(1);_computeHallucinationExclusions();recalculateAll()">
-        <span id="robust-ratio-val" class="slider-val">1.5</span>
-      </label>
-    </div>
-    <div id="robust-summary" style="font-size:.85rem;font-weight:600;margin:.75rem 0;padding:.5rem .75rem;background:var(--bg);border-radius:.4rem;border:1px solid var(--border)"></div>
-    <div id="robust-table-wrap" class="table-wrap"></div>
-    <div id="robust-excluded-docs" style="margin-top:.75rem;font-size:.82rem"></div>
-  </div>
-</div>
diff --git a/picarones/reports_v2/html/__init__.py b/picarones/reports_v2/html/__init__.py
index 32cf1d15f16c9d9a418a003fee5fe550b0f43dd1..4bfeaa45ccb44c8b33e0c786f15828b7393287bb 100644
--- a/picarones/reports_v2/html/__init__.py
+++ b/picarones/reports_v2/html/__init__.py
@@ -21,6 +21,7 @@ Usage
 
 from __future__ import annotations
 
+from picarones.reports_v2.html.generator import ReportGenerator
 from picarones.reports_v2.html.render import HtmlReportRenderer
 
-__all__ = ["HtmlReportRenderer"]
+__all__ = ["HtmlReportRenderer", "ReportGenerator"]
diff --git a/picarones/reports_v2/html/comparison.py b/picarones/reports_v2/html/comparison.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae03a544a3730a8df3bbe9b1323ff9a14ee02fb2
--- /dev/null
+++ b/picarones/reports_v2/html/comparison.py
@@ -0,0 +1,414 @@
+"""Comparaison de deux runs de benchmark (Sprint 28).
+
+Phase 5.E — module relocalisé depuis ``picarones.report.comparison``
+vers ``picarones.reports_v2.html.comparison``.  Le chemin legacy
+reste disponible via un shim avec ``DeprecationWarning`` ;
+suppression prévue en 2.0.
+
+Le Sprint 8 a livré la persistance longitudinale via SQLite
+(``picarones.measurements.history``) et un détecteur de régression CLI. Mais
+aucun outil n'exposait la **comparaison** de deux runs côté rapport :
+un chercheur qui itère sur 8 prompts ne pouvait pas voir d'un coup
+*« Tesseract → GPT-4o version V2 a régressé de 0,8 pp en CER moyen
+sur la strate paroissiaux par rapport à V1 »*.
+
+Ce module fournit :
+
+- ``load_benchmark_json(path)`` — charge le JSON produit par
+  ``BenchmarkResult.as_dict()`` ou ``picarones run -o results.json``.
+- ``compare_benchmarks(a, b)`` — calcule les deltas par moteur
+  (CER mean, WER mean, comptes de documents traités/échoués) et
+  par strate quand la métadonnée est présente.
+- ``detect_regressions(diff, threshold)`` — liste les moteurs en
+  régression (delta CER > threshold) et en amélioration
+  (delta CER < -threshold).
+- ``render_comparison_html(diff, output_path)`` — rendu HTML
+  auto-contenu minimal via Jinja2 pour partage.
+
+Conventions
+-----------
+- Les deltas sont calculés ``b - a`` (donc positif = ``b`` est pire).
+- Un moteur présent dans un seul run apparaît dans ``only_in_a`` /
+  ``only_in_b``, jamais dans ``deltas``.
+- Un moteur dont le ``mean_cer`` est ``None`` (échec total) est
+  signalé mais ne génère pas de delta numérique.
+- ``threshold`` est en absolu (CER en fraction, pas en %). Défaut
+  0.005 = 0,5 pp.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Modèles
+# ---------------------------------------------------------------------------
+
+@dataclass
+class EngineDelta:
+    """Différence ``b - a`` pour un moteur donné."""
+    engine: str
+    cer_a: Optional[float]
+    cer_b: Optional[float]
+    delta_cer: Optional[float]
+    wer_a: Optional[float]
+    wer_b: Optional[float]
+    delta_wer: Optional[float]
+    docs_a: int
+    docs_b: int
+    failed_a: int
+    failed_b: int
+    is_regression: bool = False
+    is_improvement: bool = False
+
+    def as_dict(self) -> dict[str, Any]:
+        return {
+            "engine": self.engine,
+            "cer_a": self.cer_a,
+            "cer_b": self.cer_b,
+            "delta_cer": self.delta_cer,
+            "wer_a": self.wer_a,
+            "wer_b": self.wer_b,
+            "delta_wer": self.delta_wer,
+            "docs_a": self.docs_a,
+            "docs_b": self.docs_b,
+            "failed_a": self.failed_a,
+            "failed_b": self.failed_b,
+            "is_regression": self.is_regression,
+            "is_improvement": self.is_improvement,
+        }
+
+
+@dataclass
+class ComparisonResult:
+    """Résultat d'une comparaison ``b - a`` entre deux runs."""
+    label_a: str
+    label_b: str
+    run_date_a: Optional[str]
+    run_date_b: Optional[str]
+    corpus_a: Optional[str]
+    corpus_b: Optional[str]
+    deltas: list[EngineDelta] = field(default_factory=list)
+    only_in_a: list[str] = field(default_factory=list)
+    only_in_b: list[str] = field(default_factory=list)
+    threshold: float = 0.005
+
+    def as_dict(self) -> dict[str, Any]:
+        return {
+            "label_a": self.label_a,
+            "label_b": self.label_b,
+            "run_date_a": self.run_date_a,
+            "run_date_b": self.run_date_b,
+            "corpus_a": self.corpus_a,
+            "corpus_b": self.corpus_b,
+            "threshold": self.threshold,
+            "deltas": [d.as_dict() for d in self.deltas],
+            "only_in_a": list(self.only_in_a),
+            "only_in_b": list(self.only_in_b),
+            "regressions": [d.as_dict() for d in self.deltas if d.is_regression],
+            "improvements": [d.as_dict() for d in self.deltas if d.is_improvement],
+        }
+
+
+# ---------------------------------------------------------------------------
+# Chargement
+# ---------------------------------------------------------------------------
+
+def load_benchmark_json(path: str | Path) -> dict[str, Any]:
+    """Charge un JSON de benchmark depuis disque.
+
+    Accepte :
+      - le format ``BenchmarkResult.as_dict()`` (clé ``ranking``,
+        ``engine_reports`` ou ``engines``) ;
+      - un dict déjà parsé ; dans ce cas, ``path`` peut être un dict.
+    """
+    if isinstance(path, dict):
+        return path
+    p = Path(path)
+    if not p.exists():
+        raise FileNotFoundError(f"Fichier benchmark introuvable : {p}")
+    with p.open(encoding="utf-8") as fh:
+        data = json.load(fh)
+    if not isinstance(data, dict):
+        raise ValueError(f"Le JSON {p} doit être un dict.")
+    return data
+
+
+# ---------------------------------------------------------------------------
+# Comparaison
+# ---------------------------------------------------------------------------
+
+def _ranking_index(data: dict[str, Any]) -> dict[str, dict[str, Any]]:
+    """Indexe ``ranking`` par nom de moteur — robuste aux deux formats.
+
+    Un ``BenchmarkResult.as_dict()`` expose ``ranking`` directement
+    (clés ``engine``, ``mean_cer``, …). Le format alternatif ``engines``
+    expose le même contenu sous des clés légèrement différentes —
+    on normalise vers le format ``ranking``.
+    """
+    ranking = data.get("ranking")
+    if isinstance(ranking, list) and ranking:
+        return {
+            r["engine"]: {
+                "engine": r["engine"],
+                "mean_cer": r.get("mean_cer"),
+                "mean_wer": r.get("mean_wer"),
+                "documents": int(r.get("documents") or 0),
+                "failed": int(r.get("failed") or 0),
+            }
+            for r in ranking
+            if isinstance(r, dict) and r.get("engine")
+        }
+    # Fallback : ``engines`` (format report_data)
+    engines = data.get("engines") or []
+    out: dict[str, dict[str, Any]] = {}
+    if isinstance(engines, list):
+        for e in engines:
+            if not isinstance(e, dict):
+                continue
+            name = e.get("name") or e.get("engine")
+            if not name:
+                continue
+            out[name] = {
+                "engine": name,
+                "mean_cer": e.get("cer"),
+                "mean_wer": e.get("wer"),
+                "documents": int(e.get("documents") or 0),
+                "failed": int(e.get("failed") or 0),
+            }
+    return out
+
+
+def _label_of(data: dict[str, Any], default: str) -> str:
+    meta = data.get("meta") or {}
+    return (
+        meta.get("corpus_name")
+        or (data.get("corpus") or {}).get("name")
+        or default
+    )
+
+
+def _run_date_of(data: dict[str, Any]) -> Optional[str]:
+    return (
+        data.get("run_date")
+        or (data.get("meta") or {}).get("run_date")
+    )
+
+
+def _corpus_of(data: dict[str, Any]) -> Optional[str]:
+    meta = data.get("meta") or {}
+    return (
+        meta.get("corpus_source")
+        or (data.get("corpus") or {}).get("source")
+        or meta.get("corpus_name")
+    )
+
+
+def _safe_delta(a: Optional[float], b: Optional[float]) -> Optional[float]:
+    if a is None or b is None:
+        return None
+    return float(b) - float(a)
+
+
+def compare_benchmarks(
+    a: str | Path | dict[str, Any],
+    b: str | Path | dict[str, Any],
+    *,
+    threshold: float = 0.005,
+    label_a: str = "A",
+    label_b: str = "B",
+) -> ComparisonResult:
+    """Compare deux runs et retourne les deltas par moteur.
+
+    Convention : un delta CER positif signifie que ``b`` est *moins bon*
+    que ``a`` (régression). Un seuil ``threshold`` strictement positif
+    (en fraction, ex. 0,005 = 0,5 pp) discrimine régression / bruit.
+    """
+    da = load_benchmark_json(a) if not isinstance(a, dict) else a
+    db = load_benchmark_json(b) if not isinstance(b, dict) else b
+
+    idx_a = _ranking_index(da)
+    idx_b = _ranking_index(db)
+
+    common = sorted(set(idx_a) & set(idx_b))
+    only_a = sorted(set(idx_a) - set(idx_b))
+    only_b = sorted(set(idx_b) - set(idx_a))
+
+    deltas: list[EngineDelta] = []
+    for name in common:
+        ea = idx_a[name]
+        eb = idx_b[name]
+        delta_cer = _safe_delta(ea["mean_cer"], eb["mean_cer"])
+        delta_wer = _safe_delta(ea["mean_wer"], eb["mean_wer"])
+        regression = bool(delta_cer is not None and delta_cer > threshold)
+        improvement = bool(delta_cer is not None and delta_cer < -threshold)
+        deltas.append(
+            EngineDelta(
+                engine=name,
+                cer_a=ea["mean_cer"],
+                cer_b=eb["mean_cer"],
+                delta_cer=delta_cer,
+                wer_a=ea["mean_wer"],
+                wer_b=eb["mean_wer"],
+                delta_wer=delta_wer,
+                docs_a=int(ea["documents"]),
+                docs_b=int(eb["documents"]),
+                failed_a=int(ea["failed"]),
+                failed_b=int(eb["failed"]),
+                is_regression=regression,
+                is_improvement=improvement,
+            )
+        )
+
+    # Tri : régressions (delta décroissant) puis améliorations (delta croissant).
+    deltas.sort(key=lambda d: (
+        not d.is_regression,
+        -(d.delta_cer if d.delta_cer is not None else 0.0),
+    ))
+
+    return ComparisonResult(
+        label_a=label_a,
+        label_b=label_b,
+        run_date_a=_run_date_of(da),
+        run_date_b=_run_date_of(db),
+        corpus_a=_corpus_of(da),
+        corpus_b=_corpus_of(db),
+        deltas=deltas,
+        only_in_a=only_a,
+        only_in_b=only_b,
+        threshold=float(threshold),
+    )
+
+
+def detect_regressions(
+    diff: ComparisonResult,
+) -> list[EngineDelta]:
+    """Retourne uniquement les moteurs en régression dans ``diff``."""
+    return [d for d in diff.deltas if d.is_regression]
+
+
+# ---------------------------------------------------------------------------
+# Rendu HTML
+# ---------------------------------------------------------------------------
+
+_COMPARISON_TEMPLATE = """<!DOCTYPE html>
+<html lang="fr">
+<head>
+<meta charset="UTF-8">
+<title>Picarones — Comparaison de runs</title>
+<style>
+  body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+         max-width: 980px; margin: 2em auto; padding: 0 1em; color: #111; }
+  h1 { border-bottom: 2px solid #333; padding-bottom: .4em; }
+  h2 { margin-top: 1.6em; color: #333; }
+  table { width: 100%; border-collapse: collapse; margin: 1em 0; }
+  th, td { padding: .5em .8em; text-align: left; border-bottom: 1px solid #ddd; }
+  th { background: #f3f3f3; }
+  td.num, th.num { text-align: right; font-variant-numeric: tabular-nums; }
+  tr.regression td { background: #fef0f0; }
+  tr.improvement td { background: #f0fef2; }
+  .delta-pos { color: #b0322a; font-weight: 600; }
+  .delta-neg { color: #1b8a3a; font-weight: 600; }
+  .badge { display: inline-block; padding: .15em .55em; border-radius: 4px;
+           font-size: .8em; font-weight: 600; }
+  .badge.reg { background: #fde2e0; color: #8a1c14; }
+  .badge.imp { background: #e0f8e6; color: #0a5e22; }
+  .meta { color: #666; font-size: .9em; }
+  .empty { color: #999; font-style: italic; }
+</style>
+</head>
+<body>
+<h1>Comparaison : {{ diff.label_a }} → {{ diff.label_b }}</h1>
+<p class="meta">
+  Run A : {{ diff.run_date_a or "?" }} · corpus {{ diff.corpus_a or "?" }}<br>
+  Run B : {{ diff.run_date_b or "?" }} · corpus {{ diff.corpus_b or "?" }}<br>
+  Seuil régression / amélioration : {{ "%.3f"|format(diff.threshold) }}
+  ({{ "%.1f"|format(diff.threshold * 100) }} pp de CER absolu).
+</p>
+
+<h2>Moteurs comparés ({{ diff.deltas|length }})</h2>
+{% if not diff.deltas %}
+  <p class="empty">Aucun moteur commun aux deux runs.</p>
+{% else %}
+<table>
+  <thead>
+    <tr>
+      <th scope=\"col\">Moteur</th>
+      <th scope=\"col\" class="num">CER A</th>
+      <th scope=\"col\" class="num">CER B</th>
+      <th scope=\"col\" class="num">Δ CER</th>
+      <th scope=\"col\" class="num">Docs A → B</th>
+      <th scope=\"col\">État</th>
+    </tr>
+  </thead>
+  <tbody>
+  {% for d in diff.deltas %}
+    <tr class="{% if d.is_regression %}regression{% elif d.is_improvement %}improvement{% endif %}">
+      <td>{{ d.engine }}</td>
+      <td class="num">{{ "%.3f"|format(d.cer_a) if d.cer_a is not none else "—" }}</td>
+      <td class="num">{{ "%.3f"|format(d.cer_b) if d.cer_b is not none else "—" }}</td>
+      <td class="num">
+        {% if d.delta_cer is none %}—
+        {% elif d.delta_cer > 0 %}<span class="delta-pos">+{{ "%.3f"|format(d.delta_cer) }}</span>
+        {% else %}<span class="delta-neg">{{ "%.3f"|format(d.delta_cer) }}</span>
+        {% endif %}
+      </td>
+      <td class="num">{{ d.docs_a }} → {{ d.docs_b }}</td>
+      <td>
+        {% if d.is_regression %}<span class="badge reg">régression</span>
+        {% elif d.is_improvement %}<span class="badge imp">amélioration</span>
+        {% else %}<span class="meta">stable</span>{% endif %}
+      </td>
+    </tr>
+  {% endfor %}
+  </tbody>
+</table>
+{% endif %}
+
+{% if diff.only_in_a %}
+<h2>Présents uniquement dans A</h2>
+<ul>{% for n in diff.only_in_a %}<li>{{ n }}</li>{% endfor %}</ul>
+{% endif %}
+
+{% if diff.only_in_b %}
+<h2>Présents uniquement dans B</h2>
+<ul>{% for n in diff.only_in_b %}<li>{{ n }}</li>{% endfor %}</ul>
+{% endif %}
+
+<p class="meta">Picarones — Sprint 28 · rapport de comparaison de runs.</p>
+</body>
+</html>
+"""
+
+
+def render_comparison_html(
+    diff: ComparisonResult,
+    output_path: str | Path,
+) -> Path:
+    """Sérialise un ``ComparisonResult`` en rapport HTML auto-contenu."""
+    from jinja2 import Environment, select_autoescape
+
+    env = Environment(autoescape=select_autoescape(["html", "j2"]))
+    template = env.from_string(_COMPARISON_TEMPLATE)
+    html = template.render(diff=diff)
+    out = Path(output_path)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text(html, encoding="utf-8")
+    return out
+
+
+__all__ = [
+    "EngineDelta",
+    "ComparisonResult",
+    "load_benchmark_json",
+    "compare_benchmarks",
+    "detect_regressions",
+    "render_comparison_html",
+]
diff --git a/picarones/reports_v2/html/data/__init__.py b/picarones/reports_v2/html/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1ff8d7914a6cad053adf9cc7d310f5d84cdc62a
--- /dev/null
+++ b/picarones/reports_v2/html/data/__init__.py
@@ -0,0 +1,132 @@
+"""Construction du dict de données consommé par le template Jinja.
+
+Avant le découpage, ``picarones.report.generator._build_report_data``
+faisait 463 lignes pour transformer un :class:`BenchmarkResult` en
+dict prêt pour Jinja. Cette fonction empilait par sprint des blocs
+indépendants — engines, documents, statistiques, scatter plots,
+front Pareto, etc.
+
+Ce sous-package éclate la construction en modules thématiques :
+
+- :mod:`engines` — résumé par moteur (``engines_summary``).
+- :mod:`documents` — vue galerie + détail + difficulté Sprint 7.
+- :mod:`statistics` — Wilcoxon, Friedman, Nemenyi, bootstrap CIs,
+  reliability curves, Venn, error clusters, corrélations.
+- :mod:`scatter` — Sprint 10 : Gini vs CER, ratio vs anchor.
+- :mod:`pareto` — Sprint 19 : 3 fronts Pareto + métadonnées pricing.
+  Expose deux fonctions séparées : :func:`attach_engine_costs`
+  (mute) et :func:`build_pareto_section` (pure).
+
+L'API publique :func:`build_report_data` orchestre ces modules dans
+le bon ordre. La séquence Pareto en deux temps
+(``attach_engine_costs`` → ``build_pareto_section``) rend la
+mutation explicite — les fonctions ``build_*`` du sous-package
+sont pures sauf ``attach_engine_costs`` dont le nom le dit.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from picarones.evaluation.benchmark_result import BenchmarkResult
+
+from picarones.reports_v2.html.data.documents import (
+    annotate_documents_with_difficulty,
+    build_documents,
+)
+from picarones.reports_v2.html.data.engines import build_engines_summary
+from picarones.reports_v2.html.data.extra_metrics import (
+    compute_marginal_cost_section,
+    compute_rare_token_recall_per_engine,
+    compute_taxonomy_cooccurrence_section,
+    compute_taxonomy_intra_doc_section,
+)
+from picarones.reports_v2.html.data.pareto import (
+    attach_engine_costs,
+    build_pareto_section,
+)
+from picarones.reports_v2.html.data.scatter import (
+    build_gini_vs_cer,
+    build_ratio_vs_anchor,
+)
+from picarones.reports_v2.html.data.statistics import (
+    build_bootstrap_cis,
+    build_correlation_per_engine,
+    build_error_clusters,
+    build_friedman_and_nemenyi,
+    build_pairwise_wilcoxon,
+    build_reliability_curves,
+    build_venn_data,
+)
+
+
+def build_report_data(
+    benchmark: "BenchmarkResult", images_b64: dict[str, str],
+) -> dict:
+    """Transforme un :class:`BenchmarkResult` en dict pour le rapport HTML.
+
+    Ordre critique :
+
+    1. Construire ``engines_summary`` (pur).
+    2. Construire ``documents`` puis annoter avec la difficulté (mute
+       ``documents``).
+    3. **Attacher** les coûts à ``engines_summary`` (mute, nom
+       explicite).
+    4. **Construire** le bloc Pareto (pure, lit les coûts attachés).
+    """
+    engines_summary = build_engines_summary(benchmark)
+    documents = build_documents(benchmark, images_b64)
+    annotate_documents_with_difficulty(benchmark, documents)
+
+    attach_engine_costs(engines_summary, benchmark)
+    pareto_data = build_pareto_section(engines_summary)
+
+    return {
+        "meta": {
+            "corpus_name": benchmark.corpus_name,
+            "corpus_source": benchmark.corpus_source,
+            "document_count": benchmark.document_count,
+            "run_date": benchmark.run_date,
+            "picarones_version": benchmark.picarones_version,
+            "metadata": benchmark.metadata,
+        },
+        "ranking": benchmark.ranking(),
+        "engines": engines_summary,
+        "documents": documents,
+        # Sprint 7
+        "statistics": {
+            "pairwise_wilcoxon": build_pairwise_wilcoxon(benchmark),
+            "bootstrap_cis": build_bootstrap_cis(benchmark),
+            **build_friedman_and_nemenyi(benchmark),
+        },
+        "reliability_curves": build_reliability_curves(benchmark),
+        "venn_data": build_venn_data(benchmark),
+        "error_clusters": build_error_clusters(benchmark),
+        "correlation_per_engine": build_correlation_per_engine(benchmark),
+        # Sprint 10
+        "gini_vs_cer": build_gini_vs_cer(benchmark),
+        "ratio_vs_anchor": build_ratio_vs_anchor(benchmark),
+        # Sprint 19 — vue Pareto coût/qualité avec variantes d'axe
+        "pareto": pareto_data,
+        # Sprint 36 — analyse inter-moteurs (divergence taxonomique +
+        # complémentarité / oracle).  ``None`` si moins de 2 moteurs.
+        "inter_engine_analysis": benchmark.inter_engine_analysis,
+        # Sprint 45-46 — stratification par script_type
+        "available_strata": benchmark.available_strata(),
+        "stratified_ranking": benchmark.stratified_ranking() or None,
+        "corpus_homogeneity": benchmark.corpus_homogeneity(),
+        # Sprint « câblage des modules test-only » (mai 2026) — métriques
+        # corpus-wide qui jusque-là n'étaient pas remontées dans le rapport.
+        # Sprint 71 (A.I.1) : recall sur tokens rares (hapax + dis legomena).
+        "rare_token_recall": compute_rare_token_recall_per_engine(benchmark),
+        # Sprint 75 (A.I.4) : co-occurrence taxonomique inter-classes.
+        "taxonomy_cooccurrence": compute_taxonomy_cooccurrence_section(benchmark),
+        # Sprint 76 (A.I.4) : heatmap class × position (intra-document).
+        "taxonomy_intra_doc": compute_taxonomy_intra_doc_section(benchmark),
+        # Sprint 91 (A.II.6) : matrice de coût marginal entre paires de moteurs.
+        "marginal_cost": compute_marginal_cost_section(engines_summary),
+    }
+
+
+__all__ = ["build_report_data"]
diff --git a/picarones/reports_v2/html/data/_helpers.py b/picarones/reports_v2/html/data/_helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..de8fdee0516ca33fbe73a1eda9ed6095478e73b8
--- /dev/null
+++ b/picarones/reports_v2/html/data/_helpers.py
@@ -0,0 +1,30 @@
+"""Helpers numériques internes au sous-package report_data.
+
+Petites fonctions utilitaires partagées par tous les builders de
+sections (engines, documents, statistics, scatter, pareto). Ne pas
+importer depuis l'extérieur du sous-package — ces helpers sont
+spécifiques aux conventions du dict JSON consommé par le template.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+
+def safe_round(v: Optional[float], decimals: int = 4) -> float:
+    """Arrondit un float optionnel ; ``None`` devient ``0.0``."""
+    return round(v or 0.0, decimals)
+
+
+def percent_string(v: Optional[float], decimals: int = 2) -> str:
+    """Formate un ratio ∈ [0, 1] en chaîne pourcentage : ``0.4723 → "47.23 %"``.
+
+    ``None`` → ``"—"``. Conservé pour rétrocompat avec d'éventuels
+    callers externes (Sprint 7 historique).
+    """
+    if v is None:
+        return "—"
+    return f"{v * 100:.{decimals}f} %"
+
+
+__all__ = ["safe_round", "percent_string"]
diff --git a/picarones/reports_v2/html/data/documents.py b/picarones/reports_v2/html/data/documents.py
new file mode 100644
index 0000000000000000000000000000000000000000..5067e131b8ecf85891c6631848c91c3bc0b0bcd9
--- /dev/null
+++ b/picarones/reports_v2/html/data/documents.py
@@ -0,0 +1,167 @@
+"""Construction de la liste ``documents`` (vue galerie + vue détail).
+
+Pour chaque document du corpus, agrège les hypothèses de tous les
+moteurs avec leurs métriques, le diff caractère par caractère, et
+les champs spécifiques aux pipelines OCR+LLM (intermédiaire, mode,
+sur-normalisation).
+
+:func:`annotate_documents_with_difficulty` enrichit ensuite chaque
+document avec son score de difficulté intrinsèque (Sprint 7).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from picarones.evaluation import compute_char_diff, compute_word_diff
+from picarones.evaluation.metrics.difficulty import (
+    compute_all_difficulties,
+    difficulty_label,
+)
+from picarones.reports_v2.html.data._helpers import safe_round
+
+if TYPE_CHECKING:
+    from picarones.evaluation.benchmark_result import BenchmarkResult
+
+
+def build_documents(
+    benchmark: "BenchmarkResult", images_b64: dict[str, str],
+) -> list[dict]:
+    """Retourne la liste ordonnée des documents prêts pour le template.
+
+    L'ordre des documents préserve l'ordre d'apparition (premier moteur
+    d'abord, puis compléments depuis les moteurs suivants si certains
+    documents ne sont pas couverts par tous les moteurs).
+    """
+    seen_doc_ids: set[str] = set()
+    doc_ids_ordered: list[str] = []
+    for report in benchmark.engine_reports:
+        for dr in report.document_results:
+            if dr.doc_id not in seen_doc_ids:
+                seen_doc_ids.add(dr.doc_id)
+                doc_ids_ordered.append(dr.doc_id)
+
+    # Index croisé : doc_id → {engine_name → DocumentResult}
+    doc_engine_map: dict[str, dict] = {did: {} for did in doc_ids_ordered}
+    for report in benchmark.engine_reports:
+        for dr in report.document_results:
+            doc_engine_map.setdefault(dr.doc_id, {})[report.engine_name] = dr
+
+    documents: list[dict] = []
+    engine_names = [r.engine_name for r in benchmark.engine_reports]
+    for doc_id in doc_ids_ordered:
+        engine_results: list[dict] = []
+        gt = ""
+        image_path = ""
+        for engine_name in engine_names:
+            dr = doc_engine_map[doc_id].get(engine_name)
+            if dr is None:
+                continue
+            gt = dr.ground_truth
+            image_path = dr.image_path
+            er_entry = _build_engine_result_entry(engine_name, dr)
+            engine_results.append(er_entry)
+
+        # CER moyen sur ce document (pour le badge galerie)
+        cer_values = [er["cer"] for er in engine_results if er["error"] is None]
+        mean_cer = sum(cer_values) / len(cer_values) if cer_values else 1.0
+        best_engine = min(engine_results, key=lambda x: x["cer"], default=None)
+
+        # Script type (depuis metadata par document si disponible)
+        script_type = ""
+        first_engine = engine_names[0] if engine_names else None
+        first_dr = doc_engine_map[doc_id].get(first_engine)
+        if first_dr and first_dr.image_quality:
+            script_type = first_dr.image_quality.get("script_type", "")
+
+        documents.append({
+            "doc_id": doc_id,
+            "image_path": image_path,
+            "image_b64": images_b64.get(doc_id, ""),
+            "ground_truth": gt,
+            "mean_cer": safe_round(mean_cer),
+            "best_engine": best_engine["engine"] if best_engine else "",
+            "engine_results": engine_results,
+            "script_type": script_type,
+        })
+    return documents
+
+
+def _build_engine_result_entry(engine_name: str, dr) -> dict:
+    """Construit une entrée moteur pour un document donné (extrait pour lisibilité)."""
+    diff_ops = compute_char_diff(dr.ground_truth, dr.hypothesis)
+    er_entry: dict = {
+        "engine": engine_name,
+        "hypothesis": dr.hypothesis,
+        "cer": safe_round(dr.metrics.cer),
+        "cer_diplomatic": safe_round(dr.metrics.cer_diplomatic) if dr.metrics.cer_diplomatic is not None else None,
+        "wer": safe_round(dr.metrics.wer),
+        "mer": safe_round(dr.metrics.mer),
+        "wil": safe_round(dr.metrics.wil),
+        "duration": dr.duration_seconds,
+        "error": dr.engine_error,
+        "diff": diff_ops,
+    }
+    # Champs spécifiques aux pipelines OCR+LLM
+    if dr.ocr_intermediate is not None:
+        er_entry["ocr_intermediate"] = dr.ocr_intermediate
+        er_entry["ocr_diff"] = compute_word_diff(dr.ground_truth, dr.ocr_intermediate)
+        er_entry["llm_correction_diff"] = compute_word_diff(dr.ocr_intermediate, dr.hypothesis)
+    if dr.pipeline_metadata:
+        on = dr.pipeline_metadata.get("over_normalization")
+        if on is not None:
+            er_entry["over_normalization"] = on
+        er_entry["pipeline_mode"] = dr.pipeline_metadata.get("pipeline_mode")
+    # Sprint 5 — métriques avancées par document
+    if dr.char_scores is not None:
+        er_entry["ligature_score"] = safe_round(dr.char_scores.get("ligature", {}).get("score"))
+        er_entry["diacritic_score"] = safe_round(dr.char_scores.get("diacritic", {}).get("score"))
+    if dr.taxonomy is not None:
+        er_entry["taxonomy"] = dr.taxonomy
+    if dr.structure is not None:
+        er_entry["structure"] = dr.structure
+    if dr.image_quality is not None:
+        er_entry["image_quality"] = dr.image_quality
+    # Sprint 10
+    if dr.line_metrics is not None:
+        er_entry["line_metrics"] = dr.line_metrics
+    if dr.hallucination_metrics is not None:
+        er_entry["hallucination_metrics"] = dr.hallucination_metrics
+    return er_entry
+
+
+def annotate_documents_with_difficulty(
+    benchmark: "BenchmarkResult", documents: list[dict],
+) -> None:
+    """Annote chaque document du dict avec son score de difficulté (Sprint 7).
+
+    Modifie ``documents`` en place. Les valeurs par défaut ``0.5`` /
+    ``"Modéré"`` sont retournées si la difficulté n'a pas pu être
+    calculée (par exemple corpus dégénéré).
+    """
+    doc_ids_ordered = [d["doc_id"] for d in documents]
+    gt_map = {d["doc_id"]: d["ground_truth"] for d in documents}
+    cer_map: dict[str, dict[str, float]] = {d["doc_id"]: {} for d in documents}
+    iq_map: dict[str, float] = {}
+    for report in benchmark.engine_reports:
+        for dr in report.document_results:
+            cer_map.setdefault(dr.doc_id, {})[report.engine_name] = safe_round(dr.metrics.cer)
+            if dr.image_quality and "quality_score" in dr.image_quality:
+                iq_map[dr.doc_id] = dr.image_quality["quality_score"]
+    difficulty_scores = compute_all_difficulties(
+        doc_ids=doc_ids_ordered,
+        ground_truths=gt_map,
+        cer_map=cer_map,
+        image_quality_map=iq_map or None,
+    )
+    for doc in documents:
+        ds = difficulty_scores.get(doc["doc_id"])
+        if ds:
+            doc["difficulty_score"] = safe_round(ds.score)
+            doc["difficulty_label"] = difficulty_label(ds.score)
+        else:
+            doc["difficulty_score"] = 0.5
+            doc["difficulty_label"] = "Modéré"
+
+
+__all__ = ["build_documents", "annotate_documents_with_difficulty"]
diff --git a/picarones/reports_v2/html/data/engines.py b/picarones/reports_v2/html/data/engines.py
new file mode 100644
index 0000000000000000000000000000000000000000..6744c26c690ec3e872211cd7cabd991a4543ac2e
--- /dev/null
+++ b/picarones/reports_v2/html/data/engines.py
@@ -0,0 +1,103 @@
+"""Construction du résumé par moteur (``engines_summary``).
+
+Pour chaque ``EngineReport``, accumule métriques agrégées (CER, WER,
+MER, WIL), distribution CER pour l'histogramme, métriques avancées
+patrimoniales (Sprint 5), distribution d'erreurs (Sprint 10), NER
+(Sprint 41), calibration (Sprint 43), profil philologique (Sprint
+62), recherchabilité + séquences numériques (Sprint 86), lisibilité
+(Sprint 87) et indicateurs pipeline OCR+LLM.
+
+Les coûts (durée moyenne, prix par 1k pages, CO₂) sont ajoutés
+ultérieurement par :mod:`picarones.report.report_data.pareto` qui
+en a besoin pour calculer les fronts.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from picarones.reports_v2.html.data._helpers import safe_round
+
+if TYPE_CHECKING:
+    from picarones.evaluation.benchmark_result import BenchmarkResult
+
+
+def build_engines_summary(benchmark: "BenchmarkResult") -> list[dict]:
+    """Retourne la liste des dicts moteur, une entrée par ``EngineReport``."""
+    engines_summary: list[dict] = []
+    for report in benchmark.engine_reports:
+        agg = report.aggregated_metrics
+        diplo_agg = agg.get("cer_diplomatic", {})
+
+        line_metrics = report.aggregated_line_metrics
+        halluc = report.aggregated_hallucination
+
+        entry: dict = {
+            "name": report.engine_name,
+            "version": report.engine_version,
+            "cer":  safe_round(agg.get("cer", {}).get("mean")),
+            "wer":  safe_round(agg.get("wer", {}).get("mean")),
+            "mer":  safe_round(agg.get("mer", {}).get("mean")),
+            "wil":  safe_round(agg.get("wil", {}).get("mean")),
+            "cer_median": safe_round(agg.get("cer", {}).get("median")),
+            "cer_min":    safe_round(agg.get("cer", {}).get("min")),
+            "cer_max":    safe_round(agg.get("cer", {}).get("max")),
+            "doc_count":  agg.get("document_count", 0),
+            "failed":     agg.get("failed_count", 0),
+            # CER diplomatique (après normalisation historique : ſ=s, u=v, i=j…)
+            "cer_diplomatic": safe_round(diplo_agg.get("mean")) if diplo_agg else None,
+            "cer_diplomatic_profile": diplo_agg.get("profile"),
+            # Distribution pour l'histogramme : liste des CER individuels
+            "cer_values": [
+                safe_round(dr.metrics.cer)
+                for dr in report.document_results
+                if dr.metrics.error is None
+            ],
+            "cer_diplomatic_values": [
+                safe_round(dr.metrics.cer_diplomatic)
+                for dr in report.document_results
+                if dr.metrics.error is None and dr.metrics.cer_diplomatic is not None
+            ],
+            # Champs pipeline OCR+LLM (vides pour les moteurs OCR seuls)
+            "is_pipeline": report.is_pipeline,
+            "pipeline_info": report.pipeline_info,
+            # Sprint 5 — métriques avancées patrimoniales
+            "ligature_score": safe_round(report.ligature_score) if report.ligature_score is not None else None,
+            "diacritic_score": safe_round(report.diacritic_score) if report.diacritic_score is not None else None,
+            "aggregated_confusion": report.aggregated_confusion,
+            "aggregated_taxonomy": report.aggregated_taxonomy,
+            "aggregated_structure": report.aggregated_structure,
+            "aggregated_image_quality": report.aggregated_image_quality,
+            # Sprint 10 — distribution des erreurs + hallucinations VLM
+            "gini": safe_round(line_metrics.get("gini_mean")) if line_metrics else None,
+            "cer_p90": safe_round(line_metrics.get("percentiles", {}).get("p90")) if line_metrics else None,
+            "cer_p99": safe_round(line_metrics.get("percentiles", {}).get("p99")) if line_metrics else None,
+            "catastrophic_rate_30": safe_round(line_metrics.get("catastrophic_rate", {}).get("0.3")) if line_metrics else None,
+            "aggregated_line_metrics": line_metrics,
+            "anchor_score": safe_round(halluc.get("anchor_score_mean")) if halluc else None,
+            "length_ratio": safe_round(halluc.get("length_ratio_mean")) if halluc else None,
+            "hallucinating_doc_rate": safe_round(halluc.get("hallucinating_doc_rate")) if halluc else None,
+            "aggregated_hallucination": halluc,
+            # Sprint 41 — NER agrégé (None si aucun calcul effectué)
+            "aggregated_ner": report.aggregated_ner,
+            # Sprint 43 — calibration agrégée (None si aucune confidence
+            # n'a été exposée par le moteur sur ce corpus)
+            "aggregated_calibration": report.aggregated_calibration,
+            # Sprint 62 — profil philologique agrégé (None si aucun
+            # signal philologique sur le corpus pour ce moteur)
+            "aggregated_philological": report.aggregated_philological,
+            # Sprint 86 — A.II.5 (recherchabilité fuzzy + séquences
+            # numériques). None si aucun document n'a de signal.
+            "aggregated_searchability": report.aggregated_searchability,
+            "aggregated_numerical_sequences": (
+                report.aggregated_numerical_sequences
+            ),
+            # Sprint 87 — A.II.2 (delta Flesch agrégé)
+            "aggregated_readability": report.aggregated_readability,
+            "is_vlm": report.pipeline_info.get("is_vlm", False) if report.pipeline_info else False,
+        }
+        engines_summary.append(entry)
+    return engines_summary
+
+
+__all__ = ["build_engines_summary"]
diff --git a/picarones/reports_v2/html/data/extra_metrics.py b/picarones/reports_v2/html/data/extra_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac97d47e1a566d9748a1304152ac198361417346
--- /dev/null
+++ b/picarones/reports_v2/html/data/extra_metrics.py
@@ -0,0 +1,272 @@
+"""Métriques additionnelles consommées par le rapport HTML.
+
+Sprint « câblage des modules test-only » (mai 2026) : intègre dans le
+flux de génération du rapport des modules de mesure qui jusque-là
+n'étaient appelés par aucun consommateur en production. Concrètement :
+
+- :func:`compute_rare_token_recall_per_engine` — Sprint 71 (A.I.1) :
+  recall sur tokens rares (hapax + dis legomena) corpus-wide. Discrimine
+  un OCR qui rate les noms propres rares (critique pour l'indexation
+  prosopographique).
+- :func:`compute_taxonomy_cooccurrence_section` — Sprint 75 (A.I.4
+  chantier 1) : indice de Jaccard inter-classes au niveau document.
+- :func:`compute_taxonomy_intra_doc_section` — Sprint 76 (A.I.4
+  chantier 2) : heatmap class × position pour repérer les zones
+  concentrées d'erreur.
+- :func:`compute_marginal_cost_section` — Sprint 91 (A.II.6) : coût
+  marginal d'un moteur B vs A par erreur évitée.
+
+Toutes les fonctions sont **pures** (pas de mutation in-place) et
+retournent ``None`` ou un dict vide quand les pré-requis ne sont pas
+réunis (corpus vide, taxonomy absente, etc.) — pattern adaptive masking.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+from picarones.evaluation.metrics.marginal_cost import compute_marginal_cost_matrix
+from picarones.evaluation.metrics.rare_tokens import (
+    compute_rare_token_recall,
+    extract_rare_tokens,
+)
+from picarones.evaluation.metrics.taxonomy_cooccurrence import (
+    compute_taxonomy_cooccurrence,
+)
+from picarones.evaluation.metrics.taxonomy_intra_doc import (
+    compute_taxonomy_position_heatmap,
+)
+
+if TYPE_CHECKING:
+    from picarones.evaluation.benchmark_result import BenchmarkResult
+
+
+# ──────────────────────────────────────────────────────────────────
+# Rare-token recall (Sprint 71)
+# ──────────────────────────────────────────────────────────────────
+
+
+def compute_rare_token_recall_per_engine(
+    benchmark: "BenchmarkResult",
+    max_freq: int = 2,
+) -> dict[str, dict]:
+    """Recall corpus-wide sur les tokens rares pour chaque moteur.
+
+    Étapes :
+    1. Extraire les tokens rares du corpus (apparaissent ≤ ``max_freq``
+       fois dans toutes les GT).
+    2. Pour chaque moteur, calculer le recall moyen pondéré par doc.
+
+    Retour : ``{engine_name: {n_rare_tokens, n_recalled, recall, n_docs}}``,
+    vide si aucun moteur ou aucun token rare détecté.
+    """
+    if not benchmark.engine_reports:
+        return {}
+    # Liste des GT du corpus (premier moteur fait foi).
+    gts = [
+        dr.ground_truth
+        for dr in benchmark.engine_reports[0].document_results
+        if dr.ground_truth
+    ]
+    if not gts:
+        return {}
+    rare_tokens = extract_rare_tokens(gts, max_freq=max_freq)
+    if not rare_tokens:
+        return {}
+
+    out: dict[str, dict] = {}
+    for report in benchmark.engine_reports:
+        n_total_rare = 0
+        n_total_recalled = 0
+        n_docs = 0
+        for dr in report.document_results:
+            if dr.metrics.error is not None:
+                continue
+            metrics = compute_rare_token_recall(
+                dr.ground_truth, dr.hypothesis, rare_tokens,
+            )
+            n_total_rare += metrics["n_rare_tokens_in_reference"]
+            n_total_recalled += metrics["n_rare_tokens_recalled"]
+            n_docs += 1
+        recall = (
+            n_total_recalled / n_total_rare if n_total_rare > 0 else None
+        )
+        out[report.engine_name] = {
+            "n_rare_tokens": n_total_rare,
+            "n_recalled": n_total_recalled,
+            "recall": recall,
+            "n_docs": n_docs,
+            "max_freq": max_freq,
+        }
+    return out
+
+
+# ──────────────────────────────────────────────────────────────────
+# Co-occurrence taxonomique (Sprint 75)
+# ──────────────────────────────────────────────────────────────────
+
+
+def compute_taxonomy_cooccurrence_section(
+    benchmark: "BenchmarkResult",
+) -> Optional[dict]:
+    """Calcule la matrice de co-occurrence taxonomique corpus-wide.
+
+    Pour chaque document, on collecte l'union des classes d'erreur
+    apparues sur ce document tous moteurs confondus, puis on calcule
+    l'indice de Jaccard entre paires de classes au niveau corpus.
+
+    Retour : sortie de
+    :func:`picarones.measurements.taxonomy_cooccurrence.compute_taxonomy_cooccurrence`,
+    ou ``None`` si aucune classification taxonomique n'est disponible.
+    """
+    # Map doc_id → index dans per_doc_classes pour merger correctement
+    # les classes des moteurs additionnels qui évaluent le même doc.
+    # **Bug évité** : ne PAS utiliser un set pour retrouver l'index — un
+    # set n'a pas d'ordre garanti, ``list(set).index(x)`` retourne un
+    # index qui ne correspond pas à la position dans la liste parallèle.
+    doc_id_to_idx: dict[str, int] = {}
+    per_doc_classes: list[set[str]] = []
+
+    for report in benchmark.engine_reports:
+        for dr in report.document_results:
+            if dr.taxonomy is None:
+                continue
+            classes = {
+                cls
+                for cls, count in (dr.taxonomy.get("counts") or {}).items()
+                if count > 0
+            }
+            if not classes:
+                continue
+            idx = doc_id_to_idx.get(dr.doc_id)
+            if idx is None:
+                doc_id_to_idx[dr.doc_id] = len(per_doc_classes)
+                per_doc_classes.append(classes)
+            else:
+                # Doc déjà vu (autre moteur) : merger les classes.
+                per_doc_classes[idx] |= classes
+
+    if not per_doc_classes:
+        return None
+    return compute_taxonomy_cooccurrence(per_doc_classes)
+
+
+# ──────────────────────────────────────────────────────────────────
+# Heatmap intra-document class × position (Sprint 76)
+# ──────────────────────────────────────────────────────────────────
+
+
+def compute_taxonomy_intra_doc_section(
+    benchmark: "BenchmarkResult",
+    n_bins: int = 10,
+) -> Optional[dict]:
+    """Heatmap agrégée class × position binnée sur l'ensemble du corpus.
+
+    Pour chaque doc unique on garde le heatmap calculé par le **premier**
+    moteur (déduplication : un même doc évalué par N moteurs ne compte
+    qu'une fois). Puis on somme par classe et bin de position.
+
+    Retourne un dict compatible avec
+    :func:`picarones.report.taxonomy_intra_doc_render.build_taxonomy_intra_doc_html`
+    (clés ``n_bins``, ``per_class``, ``total_errors``, ``n_words_gt``).
+    Retourne ``None`` si aucun document n'a de signal exploitable.
+    """
+    aggregated: dict[str, list[int]] = {}
+    seen_doc_ids: set[str] = set()
+    total_errors = 0
+    n_words_gt = 0
+
+    for report in benchmark.engine_reports:
+        for dr in report.document_results:
+            if dr.doc_id in seen_doc_ids:
+                continue  # déduplication : ne pas compter un doc 2 fois
+            if dr.metrics.error is not None or not dr.ground_truth:
+                continue
+            heatmap = compute_taxonomy_position_heatmap(
+                dr.ground_truth, dr.hypothesis, n_bins=n_bins,
+            )
+            if heatmap is None:
+                continue
+            seen_doc_ids.add(dr.doc_id)
+            n_words_gt += len(dr.ground_truth.split())
+            per_class = heatmap.get("per_class", {})
+            for cls, counts in per_class.items():
+                cls_total = sum(counts)
+                if cls_total == 0:
+                    continue
+                total_errors += cls_total
+                if cls not in aggregated:
+                    aggregated[cls] = [0] * n_bins
+                for i in range(n_bins):
+                    aggregated[cls][i] += counts[i] if i < len(counts) else 0
+
+    if not aggregated:
+        return None
+    return {
+        "n_bins": n_bins,
+        "n_docs_with_data": len(seen_doc_ids),
+        "total_errors": total_errors,
+        "n_words_gt": n_words_gt,
+        "per_class": aggregated,
+    }
+
+
+# ──────────────────────────────────────────────────────────────────
+# Coût marginal inter-moteurs (Sprint 91)
+# ──────────────────────────────────────────────────────────────────
+
+
+def compute_marginal_cost_section(
+    engines_summary: list[dict],
+) -> Optional[list[dict]]:
+    """Matrice de coût marginal entre paires de moteurs.
+
+    Lit ``cost`` (attaché par :func:`attach_engine_costs`) et estime
+    le nombre d'erreurs. Pour chaque paire ``A → B``, calcule le coût
+    additionnel par erreur évitée.
+
+    **Note d'estimation** : le nombre d'erreurs est dérivé de
+    ``cer × n_caractères_corpus`` quand la longueur moyenne de doc
+    est disponible, sinon repli sur ``cer × 1000`` (proxy pour
+    1000 caractères standardisés). Les coûts marginaux affichés sont
+    des estimations pessimistes — pour un benchmark de corpus
+    homogène, l'ordonnancement est fiable ; pour un mix de
+    types de documents, à interpréter avec prudence.
+
+    Retour : liste de dicts (sortie ``["pairs"]`` de
+    :func:`compute_marginal_cost_matrix`) triée par coût marginal
+    croissant, ou ``None`` si moins de 2 moteurs ont des données
+    coût + erreur exploitables.
+    """
+    per_engine: dict[str, dict] = {}
+    for entry in engines_summary:
+        cost = entry.get("cost") or {}
+        cost_per_1k = cost.get("cost_per_1k_pages_eur")
+        cer = entry.get("cer")
+        doc_count = entry.get("doc_count") or 0
+        if cost_per_1k is None or cer is None or doc_count == 0:
+            continue
+        # Proxy : cer × 1000 caractères / page (échelle stable cohérente
+        # avec ``cost_per_1k_pages_eur``).
+        estimated_errors = cer * 1000.0
+        per_engine[entry["name"]] = {
+            "cost": cost_per_1k,
+            "errors": estimated_errors,
+        }
+    if len(per_engine) < 2:
+        return None
+    result = compute_marginal_cost_matrix(per_engine)
+    if not result:
+        return None
+    # ``compute_marginal_cost_matrix`` retourne ``{"pairs": [...]}``.
+    # On expose la liste ``pairs`` pour que le renderer reçoive un
+    # itérable de dicts (pas un wrapper).
+    return result.get("pairs") or None
+
+
+__all__ = [
+    "compute_rare_token_recall_per_engine",
+    "compute_taxonomy_cooccurrence_section",
+    "compute_taxonomy_intra_doc_section",
+    "compute_marginal_cost_section",
+]
diff --git a/picarones/reports_v2/html/data/pareto.py b/picarones/reports_v2/html/data/pareto.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6d4f1d912c18382d00ebb79115f2f06aab9eb50
--- /dev/null
+++ b/picarones/reports_v2/html/data/pareto.py
@@ -0,0 +1,159 @@
+"""Front Pareto coût/qualité (Sprint 19).
+
+Construit trois fronts Pareto avec des axes alternatifs :
+
+- ``cost`` — CER vs coût € / 1000 pages.
+- ``speed`` — CER vs durée moyenne par page.
+- ``co2`` — CER vs empreinte carbone (g CO₂ / 1000 pages, expérimental).
+
+API
+---
+Deux fonctions séparées pour rendre le contrat explicite :
+
+1. :func:`attach_engine_costs` — **mute en place** ``engines_summary``
+   en y ajoutant ``mean_duration_seconds`` et ``cost`` (extraits du
+   benchmark et de la table de pricing). Le nom dit clairement qu'il
+   y a mutation.
+2. :func:`build_pareto_section` — **fonction pure**, lit les coûts
+   déjà attachés à ``engines_summary``. Retourne le dict ``pareto``
+   prêt pour le template.
+
+L'orchestrateur (``__init__.py``) appelle les deux dans l'ordre.
+Cette séparation rend possible :
+
+- Tester :func:`build_pareto_section` indépendamment avec un
+  ``engines_summary`` pré-fabriqué.
+- Réutiliser les coûts attachés sans recalculer Pareto.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from picarones.evaluation.metrics.pricing import (
+    build_costs_for_benchmark,
+    load_pricing_database,
+)
+from picarones.evaluation.statistics import compute_pareto_front
+
+if TYPE_CHECKING:
+    from picarones.evaluation.benchmark_result import BenchmarkResult
+
+
+def attach_engine_costs(
+    engines_summary: list[dict], benchmark: "BenchmarkResult",
+) -> None:
+    """Annote chaque entrée de ``engines_summary`` avec son coût.
+
+    **Mute en place** : ajoute deux champs à chaque dict moteur :
+
+    - ``mean_duration_seconds`` (float ou ``None`` si pas de durée).
+    - ``cost`` : dict de la forme ``{cost_per_1k_pages_eur: ...,
+      co2_per_1k_pages_g: ..., ...}`` ou ``None`` si pricing
+      indisponible.
+
+    Doit être appelée AVANT :func:`build_pareto_section`, qui lit
+    ces deux champs.
+    """
+    durations_by_engine: dict[str, float] = {}
+    for report in benchmark.engine_reports:
+        durs = [
+            dr.duration_seconds
+            for dr in report.document_results
+            if dr.duration_seconds is not None
+        ]
+        if durs:
+            durations_by_engine[report.engine_name] = sum(durs) / len(durs)
+
+    costs_by_engine = build_costs_for_benchmark(
+        engines_summary, durations_by_engine,
+    )
+    for entry in engines_summary:
+        name = entry["name"]
+        entry["mean_duration_seconds"] = (
+            round(durations_by_engine.get(name, 0.0), 4)
+            if name in durations_by_engine else None
+        )
+        entry["cost"] = costs_by_engine.get(name)
+
+
+def build_pareto_section(engines_summary: list[dict]) -> dict:
+    """Construit le bloc ``pareto`` du dict de rapport.
+
+    **Fonction pure** : ne mute rien. Lit ``mean_duration_seconds``
+    et ``cost`` qui doivent avoir été attachés en amont par
+    :func:`attach_engine_costs`. Si ces champs sont absents, le
+    moteur est silencieusement omis du front (cohérent avec un
+    moteur qui n'a pas de prix connu).
+
+    Retour
+    ------
+    dict
+        Trois fronts Pareto (``cost``, ``speed``, ``co2``) plus
+        ``pricing_meta`` (table de pricing utilisée).
+    """
+    pricing_defaults, _ = load_pricing_database()
+
+    pareto_points = []
+    for entry in engines_summary:
+        cer = entry.get("cer")
+        cost = (entry.get("cost") or {}).get("cost_per_1k_pages_eur")
+        if cer is None or cost is None:
+            continue
+        pareto_points.append({"engine": entry["name"], "cer": cer, "cost": cost})
+    pareto_front_engines = compute_pareto_front(
+        pareto_points, objectives=("cer", "cost"),
+    )
+
+    pareto_speed_points = []
+    for entry in engines_summary:
+        cer = entry.get("cer")
+        dur = entry.get("mean_duration_seconds")
+        if cer is None or dur is None:
+            continue
+        pareto_speed_points.append({"engine": entry["name"], "cer": cer, "dur": dur})
+    pareto_front_speed = compute_pareto_front(
+        pareto_speed_points, objectives=("cer", "dur"),
+    )
+
+    pareto_co2_points = []
+    for entry in engines_summary:
+        cer = entry.get("cer")
+        co2 = (entry.get("cost") or {}).get("co2_per_1k_pages_g")
+        if cer is None or co2 is None:
+            continue
+        pareto_co2_points.append({"engine": entry["name"], "cer": cer, "co2": co2})
+    pareto_front_co2 = compute_pareto_front(
+        pareto_co2_points, objectives=("cer", "co2"),
+    )
+
+    return {
+        "cost": {
+            "points": pareto_points,
+            "front": pareto_front_engines,
+            "axis_label": "Coût (€ / 1000 pages)",
+        },
+        "speed": {
+            "points": pareto_speed_points,
+            "front": pareto_front_speed,
+            "axis_label": "Temps moyen (s / page)",
+        },
+        "co2": {
+            "points": pareto_co2_points,
+            "front": pareto_front_co2,
+            "axis_label": (
+                "Empreinte carbone (g CO₂ / 1000 pages, expérimental)"
+            ),
+        },
+        "pricing_meta": {
+            "last_updated": pricing_defaults.last_updated,
+            "currency": pricing_defaults.currency,
+            "hourly_rate_local_cpu_eur": pricing_defaults.hourly_rate_local_cpu_eur,
+            "hourly_rate_local_gpu_eur": pricing_defaults.hourly_rate_local_gpu_eur,
+            "grid_intensity_local": pricing_defaults.grid_intensity_local,
+            "grid_intensity_cloud": pricing_defaults.grid_intensity_cloud,
+        },
+    }
+
+
+__all__ = ["attach_engine_costs", "build_pareto_section"]
diff --git a/picarones/reports_v2/html/data/scatter.py b/picarones/reports_v2/html/data/scatter.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0f7e395438367a91da3bbbd09e4b55951ddbce0
--- /dev/null
+++ b/picarones/reports_v2/html/data/scatter.py
@@ -0,0 +1,56 @@
+"""Scatter plots du rapport (Sprint 10).
+
+- ``gini_vs_cer`` — corrélation Gini (concentration des erreurs)
+  vs CER moyen, par moteur.
+- ``ratio_vs_anchor`` — ratio de longueur OCR/GT vs score d'ancrage,
+  par moteur (révèle les hallucinations VLM).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from picarones.reports_v2.html.data._helpers import safe_round
+
+if TYPE_CHECKING:
+    from picarones.evaluation.benchmark_result import BenchmarkResult
+
+
+def build_gini_vs_cer(benchmark: "BenchmarkResult") -> list[dict]:
+    """Scatter Gini de la distribution d'erreurs vs CER moyen."""
+    gini_vs_cer: list[dict] = []
+    for report in benchmark.engine_reports:
+        line_metrics = report.aggregated_line_metrics
+        gini_val = line_metrics.get("gini_mean") if line_metrics else None
+        cer_val = report.mean_cer
+        if gini_val is not None and cer_val is not None:
+            gini_vs_cer.append({
+                "engine": report.engine_name,
+                "cer": safe_round(cer_val),
+                "gini": safe_round(gini_val),
+                "is_pipeline": report.is_pipeline,
+            })
+    return gini_vs_cer
+
+
+def build_ratio_vs_anchor(benchmark: "BenchmarkResult") -> list[dict]:
+    """Scatter ratio de longueur vs score d'ancrage (détection VLM)."""
+    ratio_vs_anchor: list[dict] = []
+    for report in benchmark.engine_reports:
+        halluc = report.aggregated_hallucination
+        if not halluc:
+            continue
+        ratio_vs_anchor.append({
+            "engine": report.engine_name,
+            "length_ratio": safe_round(halluc.get("length_ratio_mean", 1.0)),
+            "anchor_score": safe_round(halluc.get("anchor_score_mean", 1.0)),
+            "hallucinating_rate": safe_round(halluc.get("hallucinating_doc_rate", 0.0)),
+            "is_vlm": (
+                report.pipeline_info.get("is_vlm", False)
+                if report.pipeline_info else False
+            ),
+        })
+    return ratio_vs_anchor
+
+
+__all__ = ["build_gini_vs_cer", "build_ratio_vs_anchor"]
diff --git a/picarones/reports_v2/html/data/statistics.py b/picarones/reports_v2/html/data/statistics.py
new file mode 100644
index 0000000000000000000000000000000000000000..498c66a1763858c1437d72c10f760b3fe3d9f077
--- /dev/null
+++ b/picarones/reports_v2/html/data/statistics.py
@@ -0,0 +1,216 @@
+"""Sections statistiques du rapport (Sprint 7 + Sprint 17).
+
+Construit les blocs :
+
+- ``pairwise_wilcoxon`` — tests de Wilcoxon par paire de moteurs.
+- ``bootstrap_cis`` — intervalles de confiance bootstrap par moteur.
+- ``friedman`` + ``nemenyi`` — Sprint 17, multi-moteurs.
+- ``reliability_curves`` — courbes de fiabilité par moteur.
+- ``venn_data`` — diagramme de Venn des erreurs communes/exclusives.
+- ``error_clusters`` — clustering des patterns d'erreurs.
+- ``correlation_per_engine`` — matrice de corrélation par moteur.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+from picarones.evaluation import compute_word_diff
+from picarones.evaluation.statistics import (
+    bootstrap_ci,
+    cluster_errors,
+    compute_correlation_matrix,
+    compute_pairwise_stats,
+    compute_reliability_curve,
+    compute_venn_data,
+    friedman_test,
+    nemenyi_posthoc,
+)
+from picarones.reports_v2.html.data._helpers import safe_round
+
+if TYPE_CHECKING:
+    from picarones.evaluation.benchmark_result import BenchmarkResult
+
+
+def _engine_cer_values(benchmark: "BenchmarkResult") -> dict[str, list[float]]:
+    """Map ``engine_name → [cer_individuels valides]``."""
+    out: dict[str, list[float]] = {}
+    for report in benchmark.engine_reports:
+        vals = [
+            safe_round(dr.metrics.cer)
+            for dr in report.document_results
+            if dr.metrics.error is None
+        ]
+        if vals:
+            out[report.engine_name] = vals
+    return out
+
+
+def build_pairwise_wilcoxon(benchmark: "BenchmarkResult") -> list[dict]:
+    """Tests de Wilcoxon par paire de moteurs (Sprint 7)."""
+    return compute_pairwise_stats(_engine_cer_values(benchmark))
+
+
+def build_bootstrap_cis(benchmark: "BenchmarkResult") -> list[dict]:
+    """Intervalles de confiance bootstrap par moteur (Sprint 7)."""
+    bootstrap_cis: list[dict] = []
+    for engine_name, vals in _engine_cer_values(benchmark).items():
+        lo, hi = bootstrap_ci(vals)
+        mean_v = sum(vals) / len(vals) if vals else 0.0
+        bootstrap_cis.append({
+            "engine": engine_name,
+            "mean": safe_round(mean_v),
+            "ci_lower": safe_round(lo),
+            "ci_upper": safe_round(hi),
+        })
+    return bootstrap_cis
+
+
+def build_friedman_and_nemenyi(benchmark: "BenchmarkResult") -> dict:
+    """Test de Friedman + post-hoc Nemenyi (Sprint 17, multi-moteurs).
+
+    Alignement strict sur le même ordre de documents : on reconstruit
+    la map à partir des documents communs à tous les moteurs, sinon
+    Friedman n'est pas applicable.
+
+    Returns
+    -------
+    dict
+        ``{"friedman": {...}, "nemenyi": {...}}`` à fusionner dans
+        la section ``statistics`` du rapport.
+    """
+    # Liste ordonnée des doc_ids selon l'ordre d'apparition.
+    seen: set[str] = set()
+    doc_ids_ordered: list[str] = []
+    for report in benchmark.engine_reports:
+        for dr in report.document_results:
+            if dr.doc_id not in seen:
+                seen.add(dr.doc_id)
+                doc_ids_ordered.append(dr.doc_id)
+
+    common_doc_ids: Optional[set[str]] = None
+    for report in benchmark.engine_reports:
+        doc_ids = {dr.doc_id for dr in report.document_results if dr.metrics.error is None}
+        common_doc_ids = doc_ids if common_doc_ids is None else common_doc_ids & doc_ids
+
+    engine_cer_aligned: dict[str, list[float]] = {}
+    if common_doc_ids:
+        ordered_common = [d for d in doc_ids_ordered if d in common_doc_ids]
+        for report in benchmark.engine_reports:
+            dr_by_id = {dr.doc_id: dr for dr in report.document_results}
+            engine_cer_aligned[report.engine_name] = [
+                safe_round(dr_by_id[d].metrics.cer) for d in ordered_common
+            ]
+
+    if engine_cer_aligned:
+        friedman = friedman_test(engine_cer_aligned)
+        nemenyi = nemenyi_posthoc(engine_cer_aligned)
+    else:
+        friedman = {
+            "statistic": 0.0, "p_value": 1.0, "significant": False,
+            "df": 0, "n_blocks": 0, "n_engines": 0, "mean_ranks": {},
+            "interpretation": "Test de Friedman non calculé — aucun document commun.",
+            "error": "no_common_documents",
+        }
+        nemenyi = {
+            "alpha": 0.05, "critical_distance": 0.0, "q_alpha": 0.0,
+            "n_blocks": 0, "n_engines": 0, "mean_ranks": {},
+            "engines_sorted": [], "significant_matrix": [], "tied_groups": [],
+            "error": "no_common_documents",
+        }
+    return {"friedman": friedman, "nemenyi": nemenyi}
+
+
+def build_reliability_curves(benchmark: "BenchmarkResult") -> list[dict]:
+    """Courbes de fiabilité par moteur (Sprint 7)."""
+    reliability_curves: list[dict] = []
+    for report in benchmark.engine_reports:
+        vals = [
+            safe_round(dr.metrics.cer)
+            for dr in report.document_results
+            if dr.metrics.error is None
+        ]
+        curve = compute_reliability_curve(vals)
+        reliability_curves.append({
+            "engine": report.engine_name,
+            "points": curve,
+        })
+    return reliability_curves
+
+
+def build_venn_data(benchmark: "BenchmarkResult") -> dict:
+    """Venn des erreurs communes / exclusives (Sprint 7).
+
+    Construit les ensembles d'erreurs par moteur :
+    ``{engine → set("doc_id:gt_tok:hyp_tok")}``.
+    """
+    venn_error_sets: dict[str, set[str]] = {}
+    for report in benchmark.engine_reports:
+        error_set: set[str] = set()
+        for dr in report.document_results:
+            ops = compute_word_diff(dr.ground_truth, dr.hypothesis)
+            for op in ops:
+                if op["op"] in ("replace", "delete", "insert"):
+                    key = (
+                        f"{dr.doc_id}:"
+                        f"{op.get('old', op.get('text', ''))}:"
+                        f"{op.get('new', op.get('text', ''))}"
+                    )
+                    error_set.add(key)
+        venn_error_sets[report.engine_name] = error_set
+    return compute_venn_data(venn_error_sets)
+
+
+def build_error_clusters(benchmark: "BenchmarkResult") -> list[dict]:
+    """Clustering des patterns d'erreurs (Sprint 7)."""
+    error_data_all: list[dict] = []
+    for report in benchmark.engine_reports:
+        for dr in report.document_results:
+            error_data_all.append({
+                "engine": report.engine_name,
+                "gt": dr.ground_truth,
+                "hypothesis": dr.hypothesis,
+            })
+    error_clusters_raw = cluster_errors(error_data_all, max_clusters=8)
+    return [c.as_dict() for c in error_clusters_raw]
+
+
+def build_correlation_per_engine(benchmark: "BenchmarkResult") -> list[dict]:
+    """Matrice de corrélation par moteur entre métriques métiers (Sprint 7)."""
+    correlation_per_engine: list[dict] = []
+    for report in benchmark.engine_reports:
+        metrics_list: list[dict[str, float]] = []
+        for dr in report.document_results:
+            if dr.metrics.error is not None:
+                continue
+            entry: dict[str, float] = {
+                "cer": safe_round(dr.metrics.cer),
+                "wer": safe_round(dr.metrics.wer),
+                "mer": safe_round(dr.metrics.mer),
+                "wil": safe_round(dr.metrics.wil),
+            }
+            if dr.image_quality:
+                entry["quality_score"] = safe_round(dr.image_quality.get("quality_score", 0.5))
+                entry["sharpness"] = safe_round(dr.image_quality.get("sharpness_score", 0.5))
+            if dr.char_scores:
+                entry["ligature"] = safe_round(dr.char_scores.get("ligature", {}).get("score", 0.5))
+                entry["diacritic"] = safe_round(dr.char_scores.get("diacritic", {}).get("score", 0.5))
+            metrics_list.append(entry)
+        if metrics_list:
+            corr = compute_correlation_matrix(metrics_list)
+            correlation_per_engine.append({
+                "engine": report.engine_name,
+                **corr,
+            })
+    return correlation_per_engine
+
+
+__all__ = [
+    "build_pairwise_wilcoxon",
+    "build_bootstrap_cis",
+    "build_friedman_and_nemenyi",
+    "build_reliability_curves",
+    "build_venn_data",
+    "build_error_clusters",
+    "build_correlation_per_engine",
+]
diff --git a/picarones/reports_v2/html/generator.py b/picarones/reports_v2/html/generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..21d1244823792d4aa51f2e4b572c50c538733eaf
--- /dev/null
+++ b/picarones/reports_v2/html/generator.py
@@ -0,0 +1,471 @@
+"""Générateur du rapport HTML interactif auto-contenu.
+
+Phase 5.E — module relocalisé depuis ``picarones.report.generator``
+vers ``picarones.reports_v2.html.generator``.  Le chemin legacy
+reste disponible via un shim avec ``DeprecationWarning`` ;
+suppression prévue en 2.0.
+
+Le rapport produit est un fichier HTML unique embarquant :
+- Toutes les données (JSON inline)
+- Chart.js et diff2html (depuis cdnjs)
+- CSS et JavaScript de l'application
+
+Vues disponibles
+----------------
+1. Classement  — tableau triable par colonne (CER, WER, MER, WIL)
+2. Galerie     — grille d'images avec badge CER coloré
+3. Document    — image zoomable + diff coloré GT / OCR par moteur
+4. Analyses    — histogramme CER + graphique radar
+
+Architecture
+------------
+Ce module est l'**orchestrateur**. Les responsabilités lourdes sont
+découpées en sous-modules :
+
+- :mod:`picarones.report.assets` — chargement vendor.js, encodage
+  base64 d'images, externalisation lazy.
+- :mod:`picarones.report.report_data` — construction du dict JSON
+  passé au template (engines, documents, statistiques, Pareto, etc.).
+- :mod:`picarones.report.render_helpers` — couleurs / SVG mutualisés.
+
+Rétrocompat
+-----------
+Deux noms historiques sont **encore importés par des tests** sous
+leur préfixe ``_`` et doivent être préservés :
+
+- ``_build_report_data`` (importé par 14 fichiers de tests).
+- ``_cer_color`` (importé par ``tests/report/test_report.py``).
+
+Les autres noms ``_pct``, ``_safe``, ``_cer_bg``, ``_encode_image_b64``,
+``_encode_images_b64_from_result``, ``_externalize_images_to_dir``,
+``_load_vendor_js`` sont soit utilisés en interne (les 3 derniers,
+voir :meth:`ReportGenerator.generate`), soit accessibles via leur
+nom canonique dans :mod:`picarones.report.assets` ou
+:mod:`picarones.report.render_helpers`.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import Any, Optional
+
+from picarones.evaluation.benchmark_result import BenchmarkResult
+from picarones.evaluation.statistics import build_critical_difference_svg
+from picarones.reports_v2._helpers.assets import (
+    encode_images_b64_from_result as _encode_images_b64_from_result,
+    externalize_images_to_dir as _externalize_images_to_dir,
+    load_vendor_js as _load_vendor_js,
+)
+
+# Ré-exports rétrocompat consommés par les tests externes (cf. docstring
+# de module). La directive de fin de ligne documente l'intention de
+# ré-export et empêche ruff de marquer l'import comme inutilisé.
+from picarones.reports_v2._helpers.render_helpers import cer_step_color as _cer_color  # noqa: F401
+from picarones.reports_v2.html.data import build_report_data as _build_report_data  # noqa: F401
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Rendu Jinja2
+# ---------------------------------------------------------------------------
+
+# Depuis le Sprint 16, le template monolithique ~3100 lignes a été découpé en
+# fichiers externes dans ``picarones/report/templates/`` (CSS, JS, vues HTML).
+# ``base.html.j2`` assemble le tout via ``{% include %}``.
+
+_TEMPLATES_DIR = Path(__file__).parent / "templates"
+
+
+def _build_jinja_env():
+    """Construit l'Environment Jinja2 pour le rapport.
+
+    Autoescape désactivé : le comportement est équivalent à celui du
+    ``_HTML_TEMPLATE.format()`` historique. Les variables injectées
+    (JSON embarqué, SVG généré, synthèse narrative issue de templates
+    internes) sont toutes produites par le code Picarones et ne
+    nécessitent pas d'échappement HTML.
+    """
+    from jinja2 import Environment, FileSystemLoader
+    env = Environment(
+        loader=FileSystemLoader(str(_TEMPLATES_DIR)),
+        autoescape=False,
+        keep_trailing_newline=True,
+    )
+    return env
+
+
+# ---------------------------------------------------------------------------
+# Classe principale
+# ---------------------------------------------------------------------------
+
+class ReportGenerator:
+    """Génère un rapport HTML interactif depuis un BenchmarkResult.
+
+    Usage
+    -----
+    >>> from picarones.reports_v2.html import ReportGenerator
+    >>> gen = ReportGenerator(benchmark_result)
+    >>> path = gen.generate("rapport.html")
+    >>> # Rapport en anglais :
+    >>> gen_en = ReportGenerator(benchmark_result, lang="en")
+    >>> path_en = gen_en.generate("report.html")
+    """
+
+    def __init__(
+        self,
+        benchmark: BenchmarkResult,
+        images_b64: Optional[dict[str, str]] = None,
+        lang: str = "fr",
+        normalization_profile: Any = None,
+        lazy_images: bool = False,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        benchmark:
+            Résultat de benchmark à visualiser.
+        images_b64:
+            Dictionnaire {doc_id: data-URI base64 OU url relative} des images.
+            Si None, le générateur cherche dans ``benchmark.metadata["_images_b64"]``.
+            Si ``lazy_images=True``, la valeur attendue est une URL relative
+            comme ``"report-assets/<doc>.png"``.
+        lang:
+            Code langue du rapport : ``"fr"`` (défaut) ou ``"en"``.
+        normalization_profile:
+            Profil de normalisation effectivement utilisé (Sprint 27 — pour
+            le snapshot de reproductibilité). ``None`` retombe sur le
+            profil mentionné dans ``benchmark.metadata["normalization_profile"]``
+            s'il est présent, sinon snapshot indisponible.
+        lazy_images:
+            Sprint A5 (M-16) — si ``True``, les images sont écrites en
+            fichiers PNG/JPEG dans ``<output_dir>/report-assets/`` à côté
+            du HTML, et référencées via ``<img loading="lazy">``.
+            Le rapport reste auto-portant si on copie aussi le dossier
+            d'assets. Utile pour les corpus > 50 documents (un rapport
+            base64 monolithique de 1 000 docs dépasse 200 MB et fait
+            ramer le navigateur). En mode mono-doc ou démo : laisser
+            ``False`` pour un fichier HTML unique transportable.
+        """
+        self.benchmark = benchmark
+        self.images_b64: dict[str, str] = images_b64 or {}
+        self.lang = lang
+        self.normalization_profile = normalization_profile
+        self.lazy_images = lazy_images
+
+        # Récupérer les images embarquées dans les metadata (fixtures)
+        if not self.images_b64:
+            self.images_b64 = benchmark.metadata.get("_images_b64", {})  # type: ignore[assignment]
+
+        # Sprint 27 — fallback : profil de normalisation depuis les metadata
+        if self.normalization_profile is None:
+            self.normalization_profile = benchmark.metadata.get("normalization_profile")
+
+    def generate(self, output_path: str | Path) -> Path:
+        """Génère le fichier HTML et le sauvegarde sur disque.
+
+        Parameters
+        ----------
+        output_path:
+            Chemin du fichier HTML à écrire.
+
+        Returns
+        -------
+        Path
+            Chemin absolu du fichier généré.
+        """
+        from picarones.reports_v2.i18n import get_labels
+
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Sprint A5 (M-16) — externalisation des images si lazy_images=True
+        # ou auto-encodage base64 sinon. Les deux modes alimentent la même
+        # variable ``images_b64`` (le nom est conservé pour rétrocompat ;
+        # en mode lazy la valeur est une URL relative au lieu d'un data-URI).
+        # En mode lazy, on **force** l'externalisation même si self.images_b64
+        # est pré-rempli (par les fixtures, par metadata, etc.) — sinon le
+        # rapport contiendrait quand même des data-URI géants.
+        if self.lazy_images:
+            images_b64 = _externalize_images_to_dir(
+                self.benchmark, output_path.parent,
+            )
+        else:
+            images_b64 = self.images_b64
+            if not images_b64:
+                images_b64 = _encode_images_b64_from_result(self.benchmark)
+
+        labels = get_labels(self.lang)
+        report_data = _build_report_data(self.benchmark, images_b64)
+
+        # Sprint 27 — snapshots de reproductibilité (pricing, glossaire,
+        # profil de normalisation, environnement). Embarqués dans le JSON
+        # du rapport pour qu'un lecteur puisse régénérer la synthèse, le
+        # Pareto et le glossaire sans accès au code source.
+        from picarones.reports_v2.html.snapshot import snapshot_all
+        report_data["snapshots"] = snapshot_all(
+            lang=self.lang,
+            normalization_profile=self.normalization_profile,
+        )
+
+        report_json = json.dumps(report_data, ensure_ascii=False, separators=(",", ":"))
+        i18n_json = json.dumps(labels, ensure_ascii=False, separators=(",", ":"))
+        chartjs_js = _load_vendor_js("chart.umd.min.js")
+
+        # Sprint 17 — rendu SVG du CDD côté serveur (statique, pas de JS)
+        cdd_svg = build_critical_difference_svg(
+            report_data.get("statistics", {}).get("nemenyi", {}),
+        )
+
+        # Sprint 18 — synthèse factuelle narrative (déterministe, sans LLM)
+        from picarones.reports_v2.narrative import build_synthesis
+        synthesis = build_synthesis(report_data, lang=self.lang)
+
+        # Sprint 20 — glossaire contextuel chargé depuis YAML
+        from picarones.reports_v2.glossary import load_glossary
+        glossary = load_glossary(self.lang)
+        glossary_json = json.dumps(glossary, ensure_ascii=False, separators=(",", ":"))
+
+        section_html = self._build_section_html(report_data, labels)
+
+        env = _build_jinja_env()
+        template = env.get_template("base.html.j2")
+        html = template.render(
+            corpus_name=self.benchmark.corpus_name,
+            picarones_version=self.benchmark.picarones_version,
+            report_data_json=report_json,
+            i18n_json=i18n_json,
+            html_lang=labels.get("html_lang", "fr"),
+            chartjs_inline=chartjs_js,
+            critical_difference_svg=cdd_svg,
+            friedman=report_data.get("statistics", {}).get("friedman", {}),
+            synthesis=synthesis,
+            glossary_json=glossary_json,
+            **section_html,
+        )
+
+        output_path.write_text(html, encoding="utf-8")
+        return output_path.resolve()
+
+    def _build_section_html(
+        self, report_data: dict, labels: dict[str, str],
+    ) -> dict[str, str]:
+        """Construit toutes les sections HTML conditionnelles du rapport.
+
+        Chaque renderer (NER, calibration, philologie, etc.) est appelé
+        de manière indépendante. Une section retourne ``""`` si aucun
+        moteur n'a de signal pour elle — le template gère l'affichage
+        conditionnel.
+
+        Returns
+        -------
+        dict[str, str]
+            Map ``{nom_de_section: html}`` à splatter dans
+            ``template.render(**section_html)``.
+        """
+        engines = report_data.get("engines", [])
+
+        # Sprint 37 — section inter-moteurs (matrice de divergence + oracle).
+        from picarones.reports_v2.html.renderers.inter_engine import (
+            build_divergence_matrix_html,
+            build_oracle_gap_html,
+        )
+        # Sprint 41 — section NER (résumé F1 par moteur + heatmap par catégorie).
+        from picarones.reports_v2.html.renderers.ner import (
+            build_ner_per_category_html,
+            build_ner_summary_html,
+        )
+        # Sprint 43 — section calibration (tableau ECE/MCE + grille de
+        # reliability diagrams par moteur).
+        from picarones.reports_v2.html.renderers.calibration import (
+            build_calibration_summary_html,
+            build_reliability_diagrams_grid_html,
+        )
+        # Sprint 46 — section stratifiée (tableau par strate).
+        from picarones.reports_v2.html.renderers.stratification import (
+            build_stratified_ranking_html,
+        )
+        # Sprint 62 — profil philologique (6 sections adaptive).
+        from picarones.reports_v2.html.renderers.philological import (
+            build_philological_profile_html,
+        )
+        # Sprint 86 — A.II.5 : recherchabilité fuzzy + séquences numériques.
+        from picarones.reports_v2.html.renderers.searchability import (
+            build_searchability_summary_html,
+        )
+        from picarones.reports_v2.html.renderers.numerical_sequences import (
+            build_numerical_sequences_html,
+        )
+        # Sprint 87 — A.II.2 : lisibilité (delta Flesch).
+        from picarones.reports_v2.html.renderers.readability import (
+            build_readability_summary_html,
+        )
+        # Sprint 89 — A.II.8b : spécialisation inter-moteurs.
+        from picarones.reports_v2.html.renderers.specialization import (
+            build_specialization_html,
+        )
+        # Chantier 3 (post-Sprint 97) — 3 vues thématiques composées.
+        from picarones.reports_v2.html.views import (
+            build_advanced_taxonomy_view_html,
+            build_diagnostics_view_html,
+            build_economics_view_html,
+        )
+        # Sprint « câblage des modules test-only » (mai 2026) — sections
+        # qui consomment les nouvelles métriques calculées dans
+        # ``report_data.extra_metrics``.
+        from picarones.reports_v2.html.renderers.marginal_cost import (
+            build_marginal_cost_html,
+        )
+        from picarones.reports_v2.html.renderers.rare_token_recall import (
+            build_rare_token_recall_html,
+        )
+        from picarones.reports_v2.html.renderers.taxonomy_cooccurrence import (
+            build_taxonomy_cooccurrence_html,
+        )
+        from picarones.reports_v2.html.renderers.taxonomy_intra_doc import (
+            build_taxonomy_intra_doc_html,
+        )
+
+        # Spécialisation : construit une map {engine: counts} depuis les
+        # ``aggregated_taxonomy`` ; un moteur sans taxonomie est exclu.
+        taxos: dict = {}
+        for eng in engines:
+            tax = eng.get("aggregated_taxonomy")
+            if isinstance(tax, dict):
+                counts = tax.get("counts") if "counts" in tax else tax
+                if isinstance(counts, dict) and counts:
+                    taxos[eng.get("name", "?")] = {
+                        k: float(v) for k, v in counts.items()
+                        if isinstance(v, (int, float))
+                    }
+
+        return {
+            # Sprint 37
+            "divergence_matrix_html": build_divergence_matrix_html(
+                report_data.get("inter_engine_analysis"), labels=labels,
+            ),
+            "oracle_gap_html": build_oracle_gap_html(
+                report_data.get("inter_engine_analysis"), labels=labels,
+            ),
+            # Sprint 41
+            "ner_summary_html": build_ner_summary_html(engines, labels=labels),
+            "ner_per_category_html": build_ner_per_category_html(engines, labels=labels),
+            # Sprint 43
+            "calibration_summary_html": build_calibration_summary_html(
+                engines, labels=labels,
+            ),
+            "reliability_diagrams_html": build_reliability_diagrams_grid_html(
+                engines, labels=labels,
+            ),
+            # Sprint 46
+            "stratified_ranking_html": build_stratified_ranking_html(
+                report_data.get("stratified_ranking"),
+                report_data.get("available_strata"),
+                report_data.get("corpus_homogeneity"),
+                labels=labels,
+            ),
+            # Sprint 62
+            "philological_profile_html": build_philological_profile_html(
+                engines, labels=labels,
+            ),
+            # Sprint 86
+            "searchability_html": build_searchability_summary_html(
+                engines, labels=labels,
+            ),
+            "numerical_sequences_html": build_numerical_sequences_html(
+                engines, labels=labels,
+            ),
+            # Sprint 87
+            "readability_html": build_readability_summary_html(
+                engines, labels=labels,
+            ),
+            # Sprint 89
+            "specialization_html": build_specialization_html(taxos, labels=labels),
+            # Chantier 3 — vues thématiques composées
+            "economics_view_html": build_economics_view_html(
+                report_data, labels=labels,
+                engine_reports=self.benchmark.engine_reports,
+            ),
+            "advanced_taxonomy_view_html": build_advanced_taxonomy_view_html(
+                report_data, labels=labels,
+            ),
+            "diagnostics_view_html": build_diagnostics_view_html(
+                report_data, labels=labels,
+            ),
+            # Sprint « câblage des modules test-only » (mai 2026) :
+            # 4 nouvelles sections pour les modules câblés en
+            # ``report_data.extra_metrics``. Adaptive : "" si pas de signal.
+            "taxonomy_cooccurrence_html": build_taxonomy_cooccurrence_html(
+                report_data.get("taxonomy_cooccurrence"), labels=labels,
+            ),
+            "taxonomy_intra_doc_html": build_taxonomy_intra_doc_html(
+                report_data.get("taxonomy_intra_doc"), labels=labels,
+            ),
+            "rare_token_recall_html": build_rare_token_recall_html(
+                report_data.get("rare_token_recall"), labels=labels,
+            ),
+            "marginal_cost_html": build_marginal_cost_html(
+                report_data.get("marginal_cost"), labels=labels,
+            ),
+        }
+
+    @classmethod
+    def from_json(cls, json_path: str | Path, **kwargs) -> "ReportGenerator":
+        """Crée un générateur depuis un fichier JSON de résultats.
+
+        Compatible avec les fichiers produits par ``BenchmarkResult.to_json()``.
+        Les images base64 doivent être passées via ``kwargs["images_b64"]``
+        si elles ne sont pas dans le JSON.
+        """
+        import json as _json
+
+        data = _json.loads(Path(json_path).read_text(encoding="utf-8"))
+
+        # Reconstruction minimale d'un BenchmarkResult depuis le dict
+        from picarones.evaluation.metric_result import MetricsResult
+        from picarones.evaluation.benchmark_result import DocumentResult, EngineReport
+
+        engine_reports = []
+        for er_data in data.get("engine_reports", []):
+            doc_results = []
+            for dr_data in er_data.get("document_results", []):
+                m = dr_data["metrics"]
+                metrics = MetricsResult(
+                    cer=m["cer"], cer_nfc=m["cer_nfc"], cer_caseless=m["cer_caseless"],
+                    wer=m["wer"], wer_normalized=m["wer_normalized"],
+                    mer=m["mer"], wil=m["wil"],
+                    reference_length=m["reference_length"],
+                    hypothesis_length=m["hypothesis_length"],
+                    error=m.get("error"),
+                )
+                doc_results.append(DocumentResult(
+                    doc_id=dr_data["doc_id"],
+                    image_path=dr_data["image_path"],
+                    ground_truth=dr_data["ground_truth"],
+                    hypothesis=dr_data["hypothesis"],
+                    metrics=metrics,
+                    duration_seconds=dr_data.get("duration_seconds", 0.0),
+                    engine_error=dr_data.get("engine_error"),
+                ))
+            engine_reports.append(EngineReport(
+                engine_name=er_data["engine_name"],
+                engine_version=er_data.get("engine_version", "unknown"),
+                engine_config=er_data.get("engine_config", {}),
+                document_results=doc_results,
+            ))
+
+        corpus_info = data.get("corpus", {})
+        bm = BenchmarkResult(
+            corpus_name=corpus_info.get("name", "Corpus"),
+            corpus_source=corpus_info.get("source"),
+            document_count=corpus_info.get("document_count", 0),
+            engine_reports=engine_reports,
+            run_date=data.get("run_date", ""),
+            picarones_version=data.get("picarones_version", ""),
+            metadata=data.get("metadata", {}),
+        )
+
+        images_b64 = kwargs.pop("images_b64", {})
+        return cls(bm, images_b64=images_b64, **kwargs)
diff --git a/picarones/reports_v2/html/snapshot.py b/picarones/reports_v2/html/snapshot.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c19d329239e31fbe812a8baed7d417b2d87cd85
--- /dev/null
+++ b/picarones/reports_v2/html/snapshot.py
@@ -0,0 +1,281 @@
+"""Snapshots de reproductibilité pour le rapport HTML (Sprint 27).
+
+Phase 5.E — module relocalisé depuis ``picarones.report.snapshot``
+vers ``picarones.reports_v2.html.snapshot``.  Le chemin legacy
+reste disponible via un shim avec ``DeprecationWarning`` ;
+suppression prévue en 2.0.
+
+Le rapport HTML auto-contenu doit pouvoir être *rejoué* sans avoir
+accès au code source du moment où il a été généré : un lecteur en
+2026 doit pouvoir comprendre exactement quelle table de prix, quelle
+définition de métrique, quel profil de normalisation, et quelle
+version de Picarones ont produit les chiffres affichés.
+
+Avant le Sprint 27, le rapport intégrait uniquement
+``pareto.pricing_meta.last_updated`` — une simple date de mise à jour
+qui ne disait rien sur le contenu de la table. Si quelqu'un modifiait
+``picarones/data/pricing.yaml`` après génération, il était impossible
+de reconstituer ce qu'avait vu le lecteur du rapport.
+
+Quatre snapshots sont produits par ce module et embarqués dans
+``report_data.snapshots`` :
+
+- ``pricing``       — YAML brut intégral de la table de prix.
+- ``glossary``      — entrées du glossaire pour la langue du rapport.
+- ``normalization`` — profil de normalisation effectivement appliqué.
+- ``environment``   — version Picarones, Python, plateforme, commit git
+                      si dispo, liste figée des dépendances installées.
+
+Garanties
+---------
+- **Déterminisme** : sur entrées identiques, ``snapshot_all()`` produit
+  un dict bit-à-bit identique. Les listes sont triées, les timestamps
+  sont absents.
+- **Pas d'effet de bord** : le module ne modifie aucun état global ;
+  les chemins YAML sont uniquement lus, jamais écrits.
+- **Dégradé non bloquant** : si pyyaml est absent, si ``pricing.yaml``
+  n'existe pas, si git n'est pas installé, le snapshot retourne un
+  dict ``{"available": False, "reason": "..."}`` plutôt que de lever.
+"""
+
+from __future__ import annotations
+
+import logging
+import platform
+import subprocess
+import sys
+from importlib.metadata import distributions
+from pathlib import Path
+from typing import Any, Optional
+
+def _resolve_picarones_version() -> str:
+    """Récupère la version courante de Picarones sans importer le
+    package racine (interdit depuis ``reports_v2/`` par layer-deps)."""
+    try:
+        from importlib.metadata import version as _get_version
+        return _get_version("picarones")
+    except Exception:  # noqa: BLE001
+        return "1.0.0"
+
+
+__version__ = _resolve_picarones_version()
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Pricing snapshot
+# ---------------------------------------------------------------------------
+
+def pricing_snapshot(pricing_path: Optional[Path] = None) -> dict[str, Any]:
+    """Retourne le YAML brut + dict parsé de la table de prix utilisée.
+
+    Si ``pricing_path`` n'est pas fourni, utilise le chemin par défaut
+    de ``picarones.measurements.pricing._DEFAULT_PRICING_PATH``.
+    """
+    if pricing_path is None:
+        try:
+            from picarones.evaluation.metrics.pricing import _DEFAULT_PRICING_PATH
+            pricing_path = _DEFAULT_PRICING_PATH
+        except ImportError:
+            return {"available": False, "reason": "module pricing introuvable"}
+
+    pricing_path = Path(pricing_path)
+    if not pricing_path.exists():
+        return {
+            "available": False,
+            "reason": f"pricing.yaml introuvable : {pricing_path}",
+            "expected_path": str(pricing_path),
+        }
+
+    try:
+        raw = pricing_path.read_text(encoding="utf-8")
+    except OSError as exc:
+        return {
+            "available": False,
+            "reason": f"lecture impossible : {exc}",
+            "expected_path": str(pricing_path),
+        }
+
+    try:
+        import yaml
+        data = yaml.safe_load(raw) or {}
+    except (ImportError, Exception) as exc:
+        # Pas de yaml ou parsing en échec — on garde le brut quand même.
+        logger.warning("[snapshot] parsing pricing.yaml échoué : %s", exc)
+        data = {}
+
+    return {
+        "available": True,
+        "source_path": str(pricing_path),
+        "filename": pricing_path.name,
+        "size_bytes": len(raw.encode("utf-8")),
+        "raw_yaml": raw,
+        "data": data,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Glossary snapshot
+# ---------------------------------------------------------------------------
+
+def glossary_snapshot(
+    lang: str = "fr",
+    used_keys: Optional[list[str] | set[str]] = None,
+) -> dict[str, Any]:
+    """Retourne les entrées du glossaire qui figurent dans le rapport.
+
+    ``used_keys`` permet de ne snapshotter que les termes effectivement
+    référencés (réduit la taille). ``None`` → toutes les entrées de la
+    langue (mode conservateur).
+    """
+    try:
+        from picarones.reports_v2.glossary import load_glossary, SUPPORTED_LANGS
+    except ImportError:
+        return {"available": False, "reason": "module glossary introuvable"}
+
+    full = load_glossary(lang) or {}
+    if not full:
+        return {
+            "available": False,
+            "reason": f"aucune entrée pour lang={lang!r}",
+            "supported_langs": SUPPORTED_LANGS,
+        }
+
+    if used_keys is not None:
+        keys = set(used_keys)
+        entries = {k: v for k, v in full.items() if k in keys}
+    else:
+        entries = dict(full)
+
+    # Tri pour reproductibilité bit-à-bit.
+    entries_sorted = {k: entries[k] for k in sorted(entries)}
+
+    return {
+        "available": True,
+        "lang": lang,
+        "entry_count": len(entries_sorted),
+        "entries": entries_sorted,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Normalization profile snapshot
+# ---------------------------------------------------------------------------
+
+def normalization_snapshot(profile: Any) -> dict[str, Any]:
+    """Sérialise un ``NormalizationProfile``.
+
+    Couvre les profils built-in (``medieval_french``, ``nfc``, …) et les
+    profils custom YAML chargés au runtime — l'objectif est qu'un
+    lecteur du rapport puisse régénérer exactement la même
+    normalisation à partir de ce snapshot.
+    """
+    if profile is None:
+        return {"available": False, "reason": "aucun profil fourni"}
+
+    # NormalizationProfile est un dataclass — on accède aux champs par
+    # nom plutôt que via ``asdict`` pour bien contrôler le format.
+    try:
+        return {
+            "available": True,
+            "name": getattr(profile, "name", "unknown"),
+            "nfc": bool(getattr(profile, "nfc", True)),
+            "caseless": bool(getattr(profile, "caseless", False)),
+            "diplomatic_table": dict(getattr(profile, "diplomatic_table", {}) or {}),
+            "exclude_chars": sorted(getattr(profile, "exclude_chars", set()) or set()),
+            "description": getattr(profile, "description", ""),
+        }
+    except Exception as exc:
+        return {"available": False, "reason": f"sérialisation échouée : {exc}"}
+
+
+# ---------------------------------------------------------------------------
+# Environment snapshot
+# ---------------------------------------------------------------------------
+
+def _git_commit(repo_path: Optional[Path] = None) -> Optional[str]:
+    """Retourne le commit git court (12 chars) si on est dans un repo, sinon None."""
+    cwd = repo_path or Path(__file__).resolve().parents[2]
+    try:
+        out = subprocess.check_output(
+            ["git", "rev-parse", "HEAD"],
+            cwd=str(cwd),
+            stderr=subprocess.DEVNULL,
+            text=True,
+            timeout=2,
+        ).strip()
+        return out[:12] if out else None
+    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+        return None
+
+
+def _installed_packages(limit: int = 200) -> list[str]:
+    """Liste figée des paquets installés au format ``name==version``.
+
+    Triée par nom (case-insensitive) pour reproductibilité. Cappée à
+    ``limit`` paquets pour ne pas exploser le poids du rapport.
+    """
+    try:
+        pkgs: list[str] = []
+        seen: set[str] = set()
+        for d in distributions():
+            try:
+                name = (d.metadata.get("Name") or "").strip()
+                version = (d.version or "").strip()
+            except Exception:
+                continue
+            if not name or name.lower() in seen:
+                continue
+            seen.add(name.lower())
+            pkgs.append(f"{name}=={version}")
+        pkgs.sort(key=str.lower)
+        return pkgs[:limit]
+    except Exception as exc:  # pragma: no cover — défense en profondeur
+        logger.warning("[snapshot] enum dépendances échoué : %s", exc)
+        return []
+
+
+def environment_snapshot(repo_path: Optional[Path] = None) -> dict[str, Any]:
+    """Retourne version Picarones, Python, plateforme, commit, deps figées."""
+    return {
+        "available": True,
+        "picarones_version": __version__,
+        "python_version": platform.python_version(),
+        "python_implementation": platform.python_implementation(),
+        "platform": platform.platform(),
+        "executable": sys.executable,
+        "git_commit": _git_commit(repo_path),
+        "installed_packages": _installed_packages(),
+    }
+
+
+# ---------------------------------------------------------------------------
+# API agrégée
+# ---------------------------------------------------------------------------
+
+def snapshot_all(
+    *,
+    lang: str = "fr",
+    glossary_used_keys: Optional[list[str] | set[str]] = None,
+    pricing_path: Optional[Path] = None,
+    normalization_profile: Any = None,
+    repo_path: Optional[Path] = None,
+) -> dict[str, Any]:
+    """Construit le bloc ``snapshots`` à embarquer dans ``report_data``."""
+    return {
+        "pricing": pricing_snapshot(pricing_path=pricing_path),
+        "glossary": glossary_snapshot(lang=lang, used_keys=glossary_used_keys),
+        "normalization": normalization_snapshot(normalization_profile),
+        "environment": environment_snapshot(repo_path=repo_path),
+        "schema_version": 1,
+    }
+
+
+__all__ = [
+    "pricing_snapshot",
+    "glossary_snapshot",
+    "normalization_snapshot",
+    "environment_snapshot",
+    "snapshot_all",
+]
diff --git a/picarones/report/templates/_app.js b/picarones/reports_v2/html/templates/_app.js
similarity index 100%
rename from picarones/report/templates/_app.js
rename to picarones/reports_v2/html/templates/_app.js
diff --git a/picarones/report/templates/_styles.css b/picarones/reports_v2/html/templates/_styles.css
similarity index 100%
rename from picarones/report/templates/_styles.css
rename to picarones/reports_v2/html/templates/_styles.css
diff --git a/picarones/report/templates/base.html.j2 b/picarones/reports_v2/html/templates/base.html.j2
similarity index 100%
rename from picarones/report/templates/base.html.j2
rename to picarones/reports_v2/html/templates/base.html.j2
diff --git a/picarones/reports_v2/i18n/__init__.py b/picarones/reports_v2/i18n/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e150676cc3b194d3dd96db5a3e56e7af48001716
--- /dev/null
+++ b/picarones/reports_v2/i18n/__init__.py
@@ -0,0 +1,132 @@
+"""Labels i18n pour le rapport HTML et l'interface Picarones.
+
+Phase 5.E — module relocalisé depuis ``picarones.i18n`` vers
+``picarones.reports_v2.i18n``.  Le chemin legacy reste disponible
+via un shim avec ``DeprecationWarning`` ; suppression prévue en 2.0.
+
+Langues supportées
+------------------
+- ``"fr"`` : français (défaut)
+- ``"en"`` : anglais patrimonial (heritage English)
+
+Depuis le Sprint 17, les traductions sont stockées dans des fichiers
+JSON et chargées au premier accès.  ``TRANSLATIONS`` reste exposé
+comme dict pour compatibilité ascendante.
+
+Sprint 30 — durcissement
+------------------------
+- Chargement lazy + thread-safe via verrou explicite ; les serveurs
+  web sous charge concurrente ne peuvent plus initialiser deux fois.
+- ``reload_translations()`` exposé pour les tests qui modifient les
+  fichiers JSON à la volée.
+- ``get_labels()`` mémoizé via ``functools.lru_cache`` pour absorber
+  le fallback ``lang → fr`` sans relire le dict à chaque appel.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import threading
+from functools import lru_cache
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+_I18N_DIR = Path(__file__).parent
+_LOAD_LOCK = threading.Lock()
+_TRANSLATIONS_CACHE: dict[str, dict[str, str]] | None = None
+
+
+def _load_translations() -> dict[str, dict[str, str]]:
+    """Charge tous les fichiers JSON du dossier i18n.
+
+    Un fichier ``{lang}.json`` définit les labels de la langue ``lang``.
+    Retourne toujours un dict non-vide, même si le dossier est manquant
+    (dans ce cas, le dict est vide et ``get_labels`` tombe sur un fallback).
+    """
+    translations: dict[str, dict[str, str]] = {}
+    if not _I18N_DIR.is_dir():
+        return translations
+    for path in sorted(_I18N_DIR.glob("*.json")):
+        lang = path.stem
+        try:
+            with path.open(encoding="utf-8") as fh:
+                translations[lang] = json.load(fh)
+        except (OSError, json.JSONDecodeError) as e:
+            logger.warning("[i18n] fichier '%s' ignoré : %s", path, e)
+    return translations
+
+
+def _get_translations() -> dict[str, dict[str, str]]:
+    """Retourne le cache de translations, initialisé une seule fois.
+
+    Thread-safe : deux threads qui appellent simultanément en démarrage
+    ne déclencheront qu'une seule lecture disque.
+    """
+    global _TRANSLATIONS_CACHE
+    if _TRANSLATIONS_CACHE is not None:
+        return _TRANSLATIONS_CACHE
+    with _LOAD_LOCK:
+        if _TRANSLATIONS_CACHE is None:
+            _TRANSLATIONS_CACHE = _load_translations()
+    return _TRANSLATIONS_CACHE
+
+
+def reload_translations() -> None:
+    """Force la relecture des fichiers JSON au prochain ``get_labels``.
+
+    Utile pour les tests qui modifient ``reports_v2/i18n/*.json`` à la volée.
+    """
+    global _TRANSLATIONS_CACHE
+    with _LOAD_LOCK:
+        _TRANSLATIONS_CACHE = None
+    _get_labels_cached.cache_clear()
+
+
+@lru_cache(maxsize=None)
+def _get_labels_cached(lang: str) -> tuple[tuple[str, str], ...]:
+    """Cache mémoïsé : ``lang -> tuple ordonné des paires``.
+
+    Le retour en tuple permet à ``lru_cache`` de mémoriser sans
+    contrainte de hashabilité, et est trivialement converti en dict
+    par ``get_labels`` à chaque appel (coût O(n)).
+    """
+    translations = _get_translations()
+    labels = translations.get(lang) or translations.get("fr") or {}
+    return tuple(labels.items())
+
+
+def get_labels(lang: str = "fr") -> dict[str, str]:
+    """Retourne le dictionnaire de labels pour la langue donnée.
+
+    Parameters
+    ----------
+    lang:
+        Code langue : ``"fr"`` (défaut) ou ``"en"``.
+
+    Returns
+    -------
+    dict
+        Labels traduits. Toujours valide : bascule sur ``"fr"`` si lang inconnu.
+        Si ``"fr"`` lui-même manque, retourne un dict vide (comportement dégradé
+        mais non bloquant).
+    """
+    return dict(_get_labels_cached(lang))
+
+
+# ``TRANSLATIONS`` reste accessible comme attribut module pour les
+# consommateurs externes qui le lisaient directement. Initialisé
+# paresseusement à l'import — n'engendre **pas** de lecture si le
+# module n'est jamais utilisé.
+TRANSLATIONS: dict[str, dict[str, str]] = _get_translations()
+SUPPORTED_LANGS: list[str] = list(TRANSLATIONS.keys())
+
+
+__all__ = [
+    "TRANSLATIONS",
+    "SUPPORTED_LANGS",
+    "get_labels",
+    "reload_translations",
+]
diff --git a/picarones/web/benchmark_utils.py b/picarones/web/benchmark_utils.py
index 7c6ce3188dfb54ba904d8ce28945471483a9201e..f964b8c425232660affa43f227dfe7caaa85c390 100644
--- a/picarones/web/benchmark_utils.py
+++ b/picarones/web/benchmark_utils.py
@@ -226,7 +226,7 @@ def run_benchmark_thread_v2(job: BenchmarkJob, req: BenchmarkRunRequest) -> None
             return
 
         job.add_event("log", {"message": "Génération du rapport HTML…"})
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         gen = ReportGenerator(result, lang=req.report_lang)
         gen.generate(output_html)
 
@@ -334,7 +334,7 @@ def run_benchmark_thread(job: BenchmarkJob, req: BenchmarkRequest) -> None:
             return
 
         job.add_event("log", {"message": "Génération du rapport HTML…"})
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         report_lang = getattr(req, "report_lang", "fr")
         gen = ReportGenerator(result, lang=report_lang)
         gen.generate(output_html)
diff --git a/tests/architecture/test_file_budgets.py b/tests/architecture/test_file_budgets.py
index bf1115d64b2b4c17ee9fbc91b8f2d89129029510..349963f3cdce744f9db16aa0bc15e13fd7706a9a 100644
--- a/tests/architecture/test_file_budgets.py
+++ b/tests/architecture/test_file_budgets.py
@@ -49,7 +49,9 @@ FILE_BUDGETS: dict[str, int] = {
     # et le sous-package picarones/report/report_data/. Budget serré
     # à 500 pour verrouiller le gain ; toute croissance > 500 sera
     # un signal pour redécouper.
-    "picarones/report/generator.py": 500,                 # actuel 431
+    # Phase 5.E : ``report/generator.py`` est désormais un shim ;
+    # canonique dans ``reports_v2/html/generator.py``.
+    "picarones/reports_v2/html/generator.py": 550,        # actuel 471
     # --- Fichiers métier larges.
     "picarones/measurements/robustness.py": 850,          # actuel 731
     # Phase 5.C.batch7 : ``report/pipeline_render.py`` est désormais
@@ -144,7 +146,9 @@ FILE_BUDGETS: dict[str, int] = {
     # L'ancien emplacement est désormais un re-export ; le contenu
     # canonique vit ici.
     "picarones/formats/text/normalization.py": 500,       # actuel 420
-    "picarones/report/comparison.py": 500,                # actuel 409
+    # Phase 5.E : ``report/comparison.py`` est désormais un shim ;
+    # canonique dans ``reports_v2/html/comparison.py``.
+    "picarones/reports_v2/html/comparison.py": 500,       # actuel 414
     # --- Module mutualisé créé par le sprint des render helpers
     # (Sprint « consolidation des renderers » 2026-05-02). Budget
     # calibré sur la taille post-documentation des conventions.
diff --git a/tests/architecture/test_module_coverage.py b/tests/architecture/test_module_coverage.py
index ea956953372d54acb02beb228ef555cbab3ebdfd..e7081bdc9676b4b22ae7ab46b824078cd39b6b55 100644
--- a/tests/architecture/test_module_coverage.py
+++ b/tests/architecture/test_module_coverage.py
@@ -94,6 +94,13 @@ TEST_ONLY_BASELINE: frozenset[str] = frozenset({
     "image_predictive",
     "worst_lines",
     "throughput",
+    # Phase 5.E : 3 modules supplémentaires consommés uniquement
+    # par les renderers/views/data migrés vers
+    # ``reports_v2/html/`` qui importent désormais le canonique
+    # directement.
+    "statistics",
+    "pricing",
+    "difficulty",
 })
 
 
diff --git a/tests/core/test_sprint14_robust_filtering.py b/tests/core/test_sprint14_robust_filtering.py
index 965ea9b0a2ec72b00f23fd75ffbca2360e9ce1d8..8d93f24417460071e8e89e8083136440dac52d3b 100644
--- a/tests/core/test_sprint14_robust_filtering.py
+++ b/tests/core/test_sprint14_robust_filtering.py
@@ -62,7 +62,7 @@ def _make_fake_benchmark():
 
 def _generate_html(bm=None) -> str:
     """Génère le HTML complet du rapport pour un BenchmarkResult minimal."""
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
     import tempfile
     import os
     if bm is None:
diff --git a/tests/engines/test_sprint3_llm_pipelines.py b/tests/engines/test_sprint3_llm_pipelines.py
index 7781e0135e42668662f59eae0704433152f072da..5c8be1f66ec64af784f819ad576608043ebcac76 100644
--- a/tests/engines/test_sprint3_llm_pipelines.py
+++ b/tests/engines/test_sprint3_llm_pipelines.py
@@ -386,7 +386,7 @@ class TestReportWithPipeline:
     @pytest.fixture(scope="class")
     def report_data(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         bm = generate_sample_benchmark(n_docs=3, seed=42)
         images_b64 = bm.metadata.get("_images_b64", {})
         return _build_report_data(bm, images_b64)
@@ -433,7 +433,7 @@ class TestReportWithPipeline:
 
     def test_html_contains_pipeline_tag(self, tmp_path):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark(n_docs=3, seed=42)
         out = tmp_path / "report.html"
         ReportGenerator(bm).generate(out)
diff --git a/tests/engines/test_sprint4_normalization_iiif.py b/tests/engines/test_sprint4_normalization_iiif.py
index 016cbc9a9cda3f7c3e4a93d0a138b8d02d07ee51..2474cef0580f45c0f6aaca099faaebbba78c01b4 100644
--- a/tests/engines/test_sprint4_normalization_iiif.py
+++ b/tests/engines/test_sprint4_normalization_iiif.py
@@ -790,7 +790,7 @@ class TestReportDiplomaticCER:
     def test_report_data_has_cer_diplomatic(self):
         """_build_report_data doit inclure cer_diplomatic dans engines_summary."""
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
 
         bm = generate_sample_benchmark()
         data = _build_report_data(bm, images_b64={})
@@ -805,7 +805,7 @@ class TestReportDiplomaticCER:
     def test_html_contains_cer_diplo_column(self, tmp_path):
         """Le HTML généré doit contenir la colonne CER diplo."""
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
 
         bm = generate_sample_benchmark()
         out = tmp_path / "report_test.html"
@@ -818,7 +818,7 @@ class TestReportDiplomaticCER:
     def test_html_contains_medieval_graphie_indicator(self, tmp_path):
         """Le rapport doit mentionner les graphies médiévales (ſ=s ou u=v)."""
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
 
         bm = generate_sample_benchmark()
         out = tmp_path / "report_test.html"
diff --git a/tests/integration/test_sprint11_i18n_english.py b/tests/integration/test_sprint11_i18n_english.py
index 96f291413128bf137ad4265f42c1343f2042a68b..0daacdb197bb9a81c0b9a98cae8e87b43922c2ab 100644
--- a/tests/integration/test_sprint11_i18n_english.py
+++ b/tests/integration/test_sprint11_i18n_english.py
@@ -230,39 +230,39 @@ class TestI18nModule:
     """Vérifie le module picarones.i18n."""
 
     def test_get_labels_fr(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         labels = get_labels("fr")
         assert labels["tab_ranking"] == "Classement"
         assert labels["html_lang"] == "fr"
         assert labels["date_locale"] == "fr-FR"
 
     def test_get_labels_en(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         labels = get_labels("en")
         assert labels["tab_ranking"] == "Ranking"
         assert labels["html_lang"] == "en"
         assert labels["date_locale"] == "en-GB"
 
     def test_get_labels_fallback(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         # Langue inconnue → bascule sur fr
         labels = get_labels("de")
         assert labels["tab_ranking"] == "Classement"
 
     def test_all_fr_keys_present_in_en(self):
-        from picarones.i18n import TRANSLATIONS
+        from picarones.reports_v2.i18n import TRANSLATIONS
         fr_keys = set(TRANSLATIONS["fr"].keys())
         en_keys = set(TRANSLATIONS["en"].keys())
         missing = fr_keys - en_keys
         assert not missing, f"Clés présentes en FR mais absentes en EN : {missing}"
 
     def test_supported_langs(self):
-        from picarones.i18n import SUPPORTED_LANGS
+        from picarones.reports_v2.i18n import SUPPORTED_LANGS
         assert "fr" in SUPPORTED_LANGS
         assert "en" in SUPPORTED_LANGS
 
     def test_footer_labels(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         fr = get_labels("fr")
         en = get_labels("en")
         assert "footer_generated" in fr
@@ -270,7 +270,7 @@ class TestI18nModule:
         assert fr["footer_generated"] != en["footer_generated"]
 
     def test_hallucination_labels_translated(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         en = get_labels("en")
         assert "detected" in en["hall_detected"].lower()
         assert "⚠" in en["hall_detected"]
@@ -286,7 +286,7 @@ class TestEnglishReport:
     @pytest.fixture(scope="class")
     def english_html(self, tmp_path_factory):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
 
         bm = generate_sample_benchmark(n_docs=3, seed=42)
         tmp = tmp_path_factory.mktemp("report_en")
@@ -298,7 +298,7 @@ class TestEnglishReport:
     @pytest.fixture(scope="class")
     def french_html(self, tmp_path_factory):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
 
         bm = generate_sample_benchmark(n_docs=3, seed=42)
         tmp = tmp_path_factory.mktemp("report_fr")
@@ -357,14 +357,14 @@ class TestEnglishReport:
 
     def test_report_generator_default_lang_is_fr(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark(n_docs=2, seed=1)
         gen = ReportGenerator(bm)
         assert gen.lang == "fr"
 
     def test_report_generator_lang_en(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark(n_docs=2, seed=1)
         gen = ReportGenerator(bm, lang="en")
         assert gen.lang == "en"
diff --git a/tests/integration/test_sprint30_polish_a11y_dx.py b/tests/integration/test_sprint30_polish_a11y_dx.py
index b3d73ef8124e8f7c7a0a54734db577a83813d162..ee33baf74efa98ab244fec3e602ed20c43dc1d0e 100644
--- a/tests/integration/test_sprint30_polish_a11y_dx.py
+++ b/tests/integration/test_sprint30_polish_a11y_dx.py
@@ -29,20 +29,20 @@ ROOT = Path(__file__).parent.parent.parent
 
 class TestI18nCache:
     def test_get_labels_returns_dict(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         labels = get_labels("fr")
         assert isinstance(labels, dict)
         assert len(labels) > 5
 
     def test_get_labels_unknown_falls_back_to_fr(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         fr = get_labels("fr")
         unknown = get_labels("xx-pas-existante")
         # Le fallback doit être le contenu fr
         assert unknown == fr
 
     def test_get_labels_cached(self):
-        from picarones import i18n
+        from picarones.reports_v2 import i18n
         i18n.reload_translations()
         # Premier appel — peuple le cache
         i18n.get_labels("fr")
@@ -54,7 +54,7 @@ class TestI18nCache:
         assert info_after.hits > info_before.hits
 
     def test_reload_translations_clears_cache(self):
-        from picarones import i18n
+        from picarones.reports_v2 import i18n
         i18n.get_labels("fr")
         info_before = i18n._get_labels_cached.cache_info()
         assert info_before.currsize >= 1
@@ -117,7 +117,7 @@ class TestSafeVersionLogsDebug:
 
 class TestBadgesAccessibility:
     def test_app_js_exposes_tier_helpers(self):
-        path = ROOT / "picarones" / "report" / "templates" / "_app.js"
+        path = ROOT / "picarones" / "reports_v2" / "html" / "templates" / "_app.js"
         src = path.read_text(encoding="utf-8")
         for fn in ("cerTier", "cerTierIcon", "cerTierLabel"):
             assert f"function {fn}" in src, (
@@ -125,7 +125,7 @@ class TestBadgesAccessibility:
             )
 
     def test_styles_define_tier_patterns(self):
-        path = ROOT / "picarones" / "report" / "templates" / "_styles.css"
+        path = ROOT / "picarones" / "reports_v2" / "html" / "templates" / "_styles.css"
         src = path.read_text(encoding="utf-8")
         for tier in ("excellent", "acceptable", "mediocre", "critical"):
             assert f'data-cer-tier="{tier}"' in src, (
@@ -138,7 +138,7 @@ class TestBadgesAccessibility:
         assert "border: 1.5px double" in src
 
     def test_main_badge_carries_data_attr_and_aria(self):
-        path = ROOT / "picarones" / "report" / "templates" / "_app.js"
+        path = ROOT / "picarones" / "reports_v2" / "html" / "templates" / "_app.js"
         src = path.read_text(encoding="utf-8")
         assert "setAttribute('data-cer-tier'" in src
         assert "setAttribute('aria-label'" in src
@@ -206,7 +206,7 @@ class TestChangelogAndSpecsUpdated:
 class TestGeneratedReportCarriesA11y:
     def test_generated_html_embeds_tier_helpers(self, tmp_path):
         from picarones import fixtures
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
 
         b = fixtures.generate_sample_benchmark(n_docs=4)
         out = tmp_path / "rapport.html"
diff --git a/tests/measurements/test_sprint10_error_distribution.py b/tests/measurements/test_sprint10_error_distribution.py
index 0070eaa0514578c7a1ce9818fa335dbbca58d50a..aa0418eaf6bd20242a17e3cabb593ef2ea3daeee 100644
--- a/tests/measurements/test_sprint10_error_distribution.py
+++ b/tests/measurements/test_sprint10_error_distribution.py
@@ -397,7 +397,7 @@ class TestReportSprint10:
     def html_report(self, tmp_path_factory):
         """Génère un rapport HTML de démonstration."""
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark(n_docs=3, seed=42)
         tmp = tmp_path_factory.mktemp("report")
         out = tmp / "sprint10_test.html"
diff --git a/tests/measurements/test_sprint12_nouvelles_fonctionnalites.py b/tests/measurements/test_sprint12_nouvelles_fonctionnalites.py
index 4139c8bbef19acdb3f612f6767c49f65d53955f6..ccf9a27f054e67eade75ceb360abf8279aae9674 100644
--- a/tests/measurements/test_sprint12_nouvelles_fonctionnalites.py
+++ b/tests/measurements/test_sprint12_nouvelles_fonctionnalites.py
@@ -193,7 +193,7 @@ class TestChartJsInline:
 @pytest.fixture
 def sample_generator():
     """Fixture partagée : crée un ReportGenerator avec des données fictives."""
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
     from picarones.core.results import BenchmarkResult, DocumentResult, EngineReport
     from picarones.measurements.metrics import MetricsResult
 
diff --git a/tests/measurements/test_sprint18_friedman_nemenyi_cdd.py b/tests/measurements/test_sprint18_friedman_nemenyi_cdd.py
index 9df4871d30aab043a96226fb09bcd020c59a969f..d2ac55b54b22507cf2ff6cbeb3178fe97659903a 100644
--- a/tests/measurements/test_sprint18_friedman_nemenyi_cdd.py
+++ b/tests/measurements/test_sprint18_friedman_nemenyi_cdd.py
@@ -316,7 +316,7 @@ def benchmark_result():
 
 class TestReportIntegration:
     def test_report_contains_cdd_section(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -328,7 +328,7 @@ class TestReportIntegration:
         assert "cd-tie" in html
 
     def test_report_json_contains_friedman_and_nemenyi(self, benchmark_result, tmp_path):
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         data = _build_report_data(benchmark_result, images_b64={})
         stats = data.get("statistics", {})
         assert "friedman" in stats
@@ -338,7 +338,7 @@ class TestReportIntegration:
         assert "tied_groups" in stats["nemenyi"]
 
     def test_cdd_help_section_present(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -346,7 +346,7 @@ class TestReportIntegration:
         assert "toggleCDDHelp" in html  # la fonction est bien liée au bouton
 
     def test_english_locale_uses_english_cdd_labels(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report_en.html"
         ReportGenerator(benchmark_result, lang="en").generate(out)
         html = out.read_text(encoding="utf-8")
diff --git a/tests/measurements/test_sprint19_narrative_engine.py b/tests/measurements/test_sprint19_narrative_engine.py
index a56a033e2e51a26f3300aa07314b325d644eebcd..d1cb9c1fda0d3f02ec800298cdbcf08a50eb2294 100644
--- a/tests/measurements/test_sprint19_narrative_engine.py
+++ b/tests/measurements/test_sprint19_narrative_engine.py
@@ -520,7 +520,7 @@ def benchmark_result():
 
 class TestReportIntegration:
     def test_report_contains_synthesis_section(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -530,7 +530,7 @@ class TestReportIntegration:
         assert re.search(r'<ul class="synth-list">\s*<li>', html)
 
     def test_report_synthesis_is_deterministic(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out1 = tmp_path / "r1.html"
         out2 = tmp_path / "r2.html"
         ReportGenerator(benchmark_result).generate(out1)
@@ -554,7 +554,7 @@ class TestReportIntegration:
         assert registered == set(FactType)
 
     def test_english_locale_produces_english_sentences(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report_en.html"
         ReportGenerator(benchmark_result, lang="en").generate(out)
         html = out.read_text(encoding="utf-8")
diff --git a/tests/measurements/test_sprint20_pareto_pricing.py b/tests/measurements/test_sprint20_pareto_pricing.py
index bd80d6f09262ba4a4ad8ee4d11573b86433f6501..00139dde33a3fe610cad126dc47565c513e86b5d 100644
--- a/tests/measurements/test_sprint20_pareto_pricing.py
+++ b/tests/measurements/test_sprint20_pareto_pricing.py
@@ -267,7 +267,7 @@ def benchmark_result():
 
 class TestReportIntegration:
     def test_report_contains_pareto_card(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -279,7 +279,7 @@ class TestReportIntegration:
         assert "pareto-experimental" in html  # étiquette expérimental
 
     def test_report_json_contains_pareto_data(self, benchmark_result):
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         data = _build_report_data(benchmark_result, images_b64={})
         pareto = data.get("pareto", {})
         assert "cost" in pareto
@@ -293,7 +293,7 @@ class TestReportIntegration:
     def test_synthesis_may_include_pareto_sentence(self, benchmark_result, tmp_path):
         # Sur la fixture de démo, pero_ocr + tesseract sont sur le front → la
         # synthèse doit remonter une alternative moins chère
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -310,7 +310,7 @@ class TestReportIntegration:
         assert Path(_DEFAULT_PRICING_PATH).exists()
 
     def test_english_locale_renders_pareto_labels(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report_en.html"
         ReportGenerator(benchmark_result, lang="en").generate(out)
         html = out.read_text(encoding="utf-8")
diff --git a/tests/report/test_a11y_level_a.py b/tests/report/test_a11y_level_a.py
index a634f50167a8c2b1f04fe8fc35861c1bf9b54a35..da6764b91c89d8a592c538f70f5f2c5451235aef 100644
--- a/tests/report/test_a11y_level_a.py
+++ b/tests/report/test_a11y_level_a.py
@@ -25,7 +25,7 @@ import re
 import pytest
 
 from picarones.fixtures import generate_sample_benchmark
-from picarones.report.generator import ReportGenerator
+from picarones.reports_v2.html.generator import ReportGenerator
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/report/test_a11y_level_aa.py b/tests/report/test_a11y_level_aa.py
index b28bcfcaecf1a5ea755ee2f29c825d10c57d5bfc..68116e964a623344ddabd3b230c2b08165576171 100644
--- a/tests/report/test_a11y_level_aa.py
+++ b/tests/report/test_a11y_level_aa.py
@@ -20,7 +20,7 @@ import re
 import pytest
 
 from picarones.fixtures import generate_sample_benchmark
-from picarones.report.generator import ReportGenerator
+from picarones.reports_v2.html.generator import ReportGenerator
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/report/test_extra_metrics.py b/tests/report/test_extra_metrics.py
index 8b5daa54a8f1014e9bcca6c10abcfd5304bb0117..dd64379d5de3e662b62c2d49d1c2926a8bb8f0d7 100644
--- a/tests/report/test_extra_metrics.py
+++ b/tests/report/test_extra_metrics.py
@@ -16,8 +16,8 @@ from __future__ import annotations
 import pytest
 
 from picarones.fixtures import generate_sample_benchmark
-from picarones.report.report_data import build_report_data
-from picarones.report.report_data.extra_metrics import (
+from picarones.reports_v2.html.data import build_report_data
+from picarones.reports_v2.html.data.extra_metrics import (
     compute_marginal_cost_section,
     compute_rare_token_recall_per_engine,
     compute_taxonomy_cooccurrence_section,
diff --git a/tests/report/test_lazy_images.py b/tests/report/test_lazy_images.py
index 398a7e758e519b84d0f724ac61cb5bb206dcb0a6..bbe559d329035d379907e4ec87e143577394d67c 100644
--- a/tests/report/test_lazy_images.py
+++ b/tests/report/test_lazy_images.py
@@ -55,7 +55,7 @@ def demo_benchmark_with_images(tmp_path: Path):
 def test_default_mode_inlines_images(demo_benchmark_with_images, tmp_path: Path) -> None:
     """``lazy_images=False`` (défaut) : les images vivent en base64
     inline dans le HTML, aucun fichier d'asset n'est créé."""
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     out = tmp_path / "report.html"
     gen = ReportGenerator(demo_benchmark_with_images)
@@ -83,7 +83,7 @@ def test_lazy_mode_creates_asset_directory(
 ) -> None:
     """``lazy_images=True`` : ``report-assets/`` est créé à côté du HTML
     et contient des fichiers image."""
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     out = tmp_path / "report.html"
     gen = ReportGenerator(demo_benchmark_with_images, lazy_images=True)
@@ -104,7 +104,7 @@ def test_lazy_mode_html_references_relative_urls(
 ) -> None:
     """En mode lazy, le HTML référence les images via URL relative
     ``report-assets/...`` plutôt qu'un data-URI."""
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     out = tmp_path / "report.html"
     gen = ReportGenerator(demo_benchmark_with_images, lazy_images=True)
@@ -127,7 +127,7 @@ def test_lazy_mode_significantly_reduces_html_size(
     favorable au lazy. Test peu strict (ratio > 1.05) pour ne pas
     être flaky en fonction du contenu vendor.
     """
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     inline_out = tmp_path / "inline.html"
     lazy_out = tmp_path / "lazy.html"
@@ -152,7 +152,7 @@ def test_lazy_mode_with_missing_image_does_not_crash(tmp_path: Path) -> None:
     """Si l'image source n'existe pas, l'externalisation log un warning
     et continue (rétrocompat avec ``_encode_image_b64`` qui retourne ''
     silencieusement)."""
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     bench = generate_sample_benchmark(n_docs=2)
     # Pointe vers un chemin inexistant
@@ -171,7 +171,7 @@ def test_safe_filename_generation(tmp_path: Path) -> None:
     des noms de fichiers normalisés (pas de path traversal possible)."""
     from PIL import Image
 
-    from picarones.report.generator import _externalize_images_to_dir
+    from picarones.reports_v2.html.generator import _externalize_images_to_dir
 
     src = tmp_path / "src.png"
     Image.new("RGB", (50, 50), color=(0, 0, 0)).save(src)
diff --git a/tests/report/test_report.py b/tests/report/test_report.py
index dcab4e15c75db5a22e769b932fe6157aee7c8863..e401361ca5e0a6db01be6ede77289d447d86f0df 100644
--- a/tests/report/test_report.py
+++ b/tests/report/test_report.py
@@ -4,7 +4,7 @@ import json
 import pytest
 
 from picarones.fixtures import generate_sample_benchmark
-from picarones.report.generator import ReportGenerator, _build_report_data, _cer_color
+from picarones.reports_v2.html.generator import ReportGenerator, _build_report_data, _cer_color
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/report/test_sprint17_jinja2_refactor.py b/tests/report/test_sprint17_jinja2_refactor.py
index a71d9c310fb0ce6ddbf1c93f647c0cec25c8ce79..3a044651a9adb67b2e92c5912d7502bed2d43600 100644
--- a/tests/report/test_sprint17_jinja2_refactor.py
+++ b/tests/report/test_sprint17_jinja2_refactor.py
@@ -16,7 +16,7 @@ from pathlib import Path
 import pytest
 
 from picarones import fixtures
-from picarones.report.generator import (
+from picarones.reports_v2.html.generator import (
     ReportGenerator,
     _build_jinja_env,
     _TEMPLATES_DIR,
@@ -153,7 +153,7 @@ class TestI18nFromJSON:
 
     def test_fr_and_en_have_same_keys(self):
         """Garde-fou contre les traductions manquantes."""
-        from picarones.i18n import TRANSLATIONS
+        from picarones.reports_v2.i18n import TRANSLATIONS
         fr_keys = set(TRANSLATIONS.get("fr", {}).keys())
         en_keys = set(TRANSLATIONS.get("en", {}).keys())
         missing_in_en = fr_keys - en_keys
@@ -162,7 +162,7 @@ class TestI18nFromJSON:
         assert not missing_in_fr, f"Clés manquantes en français : {missing_in_fr}"
 
     def test_translations_load_via_public_api(self):
-        from picarones.i18n import get_labels, SUPPORTED_LANGS
+        from picarones.reports_v2.i18n import get_labels, SUPPORTED_LANGS
         assert "fr" in SUPPORTED_LANGS
         assert "en" in SUPPORTED_LANGS
         fr = get_labels("fr")
diff --git a/tests/report/test_sprint21_glossary_customize.py b/tests/report/test_sprint21_glossary_customize.py
index e0d04f4c552238ce8134ca9ea7ea81ed46da2c66..5cebbd7bb447e107b12f36b047adcdaf106b9631 100644
--- a/tests/report/test_sprint21_glossary_customize.py
+++ b/tests/report/test_sprint21_glossary_customize.py
@@ -131,7 +131,7 @@ def benchmark_result():
 
 class TestReportIntegration:
     def test_report_embeds_glossary_json(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -142,7 +142,7 @@ class TestReportIntegration:
         assert "definition" in glossary["cer"]
 
     def test_report_contains_side_panels(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -151,7 +151,7 @@ class TestReportIntegration:
         assert 'class="side-panel-close"' in html
 
     def test_report_has_advanced_button_in_nav(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -159,7 +159,7 @@ class TestReportIntegration:
         assert 'openCustomize()' in html
 
     def test_ranking_columns_have_glossary_keys(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -167,7 +167,7 @@ class TestReportIntegration:
             assert f'data-glossary-key="{k}"' in html, f"Header pour {k} sans data-glossary-key"
 
     def test_app_js_has_glossary_and_customize_functions(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -182,7 +182,7 @@ class TestReportIntegration:
             assert fn in html, f"Fonction {fn} manquante"
 
     def test_english_glossary_for_en_locale(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report_en.html"
         ReportGenerator(benchmark_result, lang="en").generate(out)
         html = out.read_text(encoding="utf-8")
@@ -198,7 +198,7 @@ class TestReportIntegration:
 
 class TestI18nKeysForCustomize:
     def test_required_customize_keys_present(self):
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
         required = {
             "btn_customize", "customize_title",
             "customize_columns", "customize_filters",
@@ -223,7 +223,7 @@ class TestNoPrescriptionGuards:
     et que les poids de score composite sont à 0 par défaut côté JS."""
 
     def test_warning_message_is_visible(self, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         from picarones import fixtures
         bench = fixtures.generate_sample_benchmark(n_docs=3)
         out = tmp_path / "r.html"
@@ -234,7 +234,7 @@ class TestNoPrescriptionGuards:
                "universally valid weighting" in html
 
     def test_default_weights_are_empty_in_js_state(self, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         from picarones import fixtures
         bench = fixtures.generate_sample_benchmark(n_docs=3)
         out = tmp_path / "r.html"
diff --git a/tests/report/test_sprint22_docs_case_studies.py b/tests/report/test_sprint22_docs_case_studies.py
index 5a594881b71d4122822667196540682ee75c356c..ffbfee2046820d6fa0d242ace04f03a41f0f8050 100644
--- a/tests/report/test_sprint22_docs_case_studies.py
+++ b/tests/report/test_sprint22_docs_case_studies.py
@@ -116,7 +116,7 @@ def benchmark_result():
 
 class TestReportIntegration:
     def test_report_links_to_case_studies(self, benchmark_result, tmp_path):
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -125,7 +125,7 @@ class TestReportIntegration:
     def test_report_polish_no_consecutive_empty_lines_in_views(self, benchmark_result, tmp_path):
         """Garde-fou cosmétique léger — éviter les blocs vides excessifs
         introduits par les includes Jinja2."""
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         out = tmp_path / "report.html"
         ReportGenerator(benchmark_result).generate(out)
         html = out.read_text(encoding="utf-8")
@@ -143,7 +143,7 @@ class TestEndToEnd:
 
     def test_small_corpus_renders(self, tmp_path):
         from picarones import fixtures
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bench = fixtures.generate_sample_benchmark(n_docs=2)
         out = tmp_path / "small.html"
         ReportGenerator(bench).generate(out)
@@ -151,7 +151,7 @@ class TestEndToEnd:
 
     def test_large_corpus_renders(self, tmp_path):
         from picarones import fixtures
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bench = fixtures.generate_sample_benchmark(n_docs=20)
         out = tmp_path / "large.html"
         ReportGenerator(bench).generate(out)
@@ -159,7 +159,7 @@ class TestEndToEnd:
 
     def test_english_locale_full_render(self, tmp_path):
         from picarones import fixtures
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bench = fixtures.generate_sample_benchmark(n_docs=5)
         out = tmp_path / "en.html"
         ReportGenerator(bench, lang="en").generate(out)
diff --git a/tests/report/test_sprint27_reproducibility_snapshots.py b/tests/report/test_sprint27_reproducibility_snapshots.py
index 2018c1dedcbf96e41782b8285761666e47a3cf49..9568c0c8895f2fd3a52e4b531161cf17024e9f17 100644
--- a/tests/report/test_sprint27_reproducibility_snapshots.py
+++ b/tests/report/test_sprint27_reproducibility_snapshots.py
@@ -28,7 +28,7 @@ import pytest
 
 class TestPricingSnapshot:
     def test_default_pricing_yaml_is_loaded(self):
-        from picarones.report.snapshot import pricing_snapshot
+        from picarones.reports_v2.html.snapshot import pricing_snapshot
         s = pricing_snapshot()
         assert s["available"] is True
         assert s["filename"] == "pricing.yaml"
@@ -38,19 +38,19 @@ class TestPricingSnapshot:
         assert isinstance(s["data"], dict)
 
     def test_data_contains_meta_and_engines(self):
-        from picarones.report.snapshot import pricing_snapshot
+        from picarones.reports_v2.html.snapshot import pricing_snapshot
         s = pricing_snapshot()
         assert "meta" in s["data"], "le snapshot doit exposer la section meta"
         assert "engines" in s["data"], "le snapshot doit exposer engines"
 
     def test_missing_path_returns_unavailable(self, tmp_path):
-        from picarones.report.snapshot import pricing_snapshot
+        from picarones.reports_v2.html.snapshot import pricing_snapshot
         s = pricing_snapshot(pricing_path=tmp_path / "ne-pas-exister.yaml")
         assert s["available"] is False
         assert "introuvable" in s["reason"].lower()
 
     def test_custom_yaml_round_trips(self, tmp_path):
-        from picarones.report.snapshot import pricing_snapshot
+        from picarones.reports_v2.html.snapshot import pricing_snapshot
         custom = tmp_path / "custom.yaml"
         custom.write_text(
             "meta:\n  currency: USD\n  last_updated: 2026-01-01\nengines:\n  fake: {type: local}\n",
@@ -66,7 +66,7 @@ class TestPricingSnapshot:
 
 class TestGlossarySnapshot:
     def test_default_lang_returns_entries(self):
-        from picarones.report.snapshot import glossary_snapshot
+        from picarones.reports_v2.html.snapshot import glossary_snapshot
         s = glossary_snapshot(lang="fr")
         assert s["available"] is True
         assert s["entry_count"] > 10
@@ -75,7 +75,7 @@ class TestGlossarySnapshot:
             assert k in s["entries"]
 
     def test_used_keys_filter(self):
-        from picarones.report.snapshot import glossary_snapshot
+        from picarones.reports_v2.html.snapshot import glossary_snapshot
         s = glossary_snapshot(lang="fr", used_keys=["cer"])
         assert s["entry_count"] == 1
         assert list(s["entries"]) == ["cer"]
@@ -84,13 +84,13 @@ class TestGlossarySnapshot:
         # `load_glossary` retombe sur fr si la langue est absente — donc
         # le snapshot doit être disponible avec lang='fr' ou la langue
         # demandée selon ce qu'on retourne. On vérifie qu'on ne crashe pas.
-        from picarones.report.snapshot import glossary_snapshot
+        from picarones.reports_v2.html.snapshot import glossary_snapshot
         s = glossary_snapshot(lang="xx-pas-existante")
         # Soit on retombe sur fr (available=True), soit on signale unavailable.
         assert "available" in s
 
     def test_entries_sorted_for_determinism(self):
-        from picarones.report.snapshot import glossary_snapshot
+        from picarones.reports_v2.html.snapshot import glossary_snapshot
         s = glossary_snapshot(lang="fr")
         keys = list(s["entries"])
         assert keys == sorted(keys), (
@@ -102,7 +102,7 @@ class TestGlossarySnapshot:
 class TestNormalizationSnapshot:
     def test_builtin_profile_serializes(self):
         from picarones.measurements.normalization import get_builtin_profile
-        from picarones.report.snapshot import normalization_snapshot
+        from picarones.reports_v2.html.snapshot import normalization_snapshot
         p = get_builtin_profile("medieval_french")
         s = normalization_snapshot(p)
         assert s["available"] is True
@@ -112,13 +112,13 @@ class TestNormalizationSnapshot:
         assert s["diplomatic_table"].get("ſ") == "s"
 
     def test_none_profile_returns_unavailable(self):
-        from picarones.report.snapshot import normalization_snapshot
+        from picarones.reports_v2.html.snapshot import normalization_snapshot
         s = normalization_snapshot(None)
         assert s["available"] is False
 
     def test_exclude_chars_sorted(self):
         from picarones.measurements.normalization import get_builtin_profile
-        from picarones.report.snapshot import normalization_snapshot
+        from picarones.reports_v2.html.snapshot import normalization_snapshot
         p = get_builtin_profile("sans_ponctuation")
         s = normalization_snapshot(p)
         # Liste triée pour reproductibilité
@@ -128,20 +128,20 @@ class TestNormalizationSnapshot:
 class TestEnvironmentSnapshot:
     def test_returns_picarones_version(self):
         from picarones import __version__
-        from picarones.report.snapshot import environment_snapshot
+        from picarones.reports_v2.html.snapshot import environment_snapshot
         s = environment_snapshot()
         assert s["available"] is True
         assert s["picarones_version"] == __version__
 
     def test_python_and_platform_present(self):
-        from picarones.report.snapshot import environment_snapshot
+        from picarones.reports_v2.html.snapshot import environment_snapshot
         s = environment_snapshot()
         assert s["python_version"]
         assert s["python_implementation"]
         assert s["platform"]
 
     def test_installed_packages_sorted_unique(self):
-        from picarones.report.snapshot import environment_snapshot
+        from picarones.reports_v2.html.snapshot import environment_snapshot
         s = environment_snapshot()
         pkgs = s["installed_packages"]
         assert isinstance(pkgs, list)
@@ -152,7 +152,7 @@ class TestEnvironmentSnapshot:
         assert len(names) == len(set(names))
 
     def test_git_commit_is_str_or_none(self):
-        from picarones.report.snapshot import environment_snapshot
+        from picarones.reports_v2.html.snapshot import environment_snapshot
         s = environment_snapshot()
         commit = s.get("git_commit")
         assert commit is None or (isinstance(commit, str) and 0 < len(commit) <= 12)
@@ -164,7 +164,7 @@ class TestEnvironmentSnapshot:
 
 class TestSnapshotAll:
     def test_contains_all_four_blocks(self):
-        from picarones.report.snapshot import snapshot_all
+        from picarones.reports_v2.html.snapshot import snapshot_all
         s = snapshot_all()
         for k in ("pricing", "glossary", "normalization", "environment"):
             assert k in s, f"snapshot_all doit exposer la clé '{k}'"
@@ -172,7 +172,7 @@ class TestSnapshotAll:
 
     def test_deterministic_for_same_inputs(self):
         from picarones.measurements.normalization import get_builtin_profile
-        from picarones.report.snapshot import snapshot_all
+        from picarones.reports_v2.html.snapshot import snapshot_all
         profile = get_builtin_profile("nfc")
 
         a = snapshot_all(lang="fr", normalization_profile=profile)
@@ -193,7 +193,7 @@ def generated_report_html(tmp_path_factory) -> str:
     """Génère un rapport démo et retourne son contenu HTML."""
     from picarones import fixtures
     from picarones.measurements.normalization import get_builtin_profile
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
 
     b = fixtures.generate_sample_benchmark(n_docs=6)
     out_dir = tmp_path_factory.mktemp("rep27")
diff --git a/tests/report/test_sprint37_inter_engine_html.py b/tests/report/test_sprint37_inter_engine_html.py
index 8a312bbc3b28b8fd61bb52d8308c31a2b4898d82..3632a2c8bcf24ee581365ff883667646ed399b6a 100644
--- a/tests/report/test_sprint37_inter_engine_html.py
+++ b/tests/report/test_sprint37_inter_engine_html.py
@@ -26,7 +26,7 @@ from pathlib import Path
 import pytest
 
 from picarones.fixtures import generate_sample_benchmark
-from picarones.report.generator import ReportGenerator
+from picarones.reports_v2.html.generator import ReportGenerator
 from picarones.reports_v2.html.renderers.inter_engine import (
     build_divergence_matrix_html,
     build_oracle_gap_html,
diff --git a/tests/report/test_sprint41_ner_html.py b/tests/report/test_sprint41_ner_html.py
index f5a30715df8ff96bb7dd722f4ae2a82cd44629c0..ca19c95f1ece0ceddd95c797014e5c71e15cc133 100644
--- a/tests/report/test_sprint41_ner_html.py
+++ b/tests/report/test_sprint41_ner_html.py
@@ -22,7 +22,7 @@ from pathlib import Path
 import pytest
 
 from picarones.fixtures import generate_sample_benchmark
-from picarones.report.generator import ReportGenerator
+from picarones.reports_v2.html.generator import ReportGenerator
 from picarones.reports_v2.html.renderers.ner import (
     build_ner_per_category_html,
     build_ner_summary_html,
diff --git a/tests/report/test_sprint43_calibration_html.py b/tests/report/test_sprint43_calibration_html.py
index 6a8f58e43d1e60b9bebd95b7d052ce6952ef7758..78049cbe414f12254882e855621f83072dd8b482 100644
--- a/tests/report/test_sprint43_calibration_html.py
+++ b/tests/report/test_sprint43_calibration_html.py
@@ -31,7 +31,7 @@ from picarones.reports_v2.html.renderers.calibration import (
     build_reliability_diagram_svg,
     build_reliability_diagrams_grid_html,
 )
-from picarones.report.generator import ReportGenerator
+from picarones.reports_v2.html.generator import ReportGenerator
 
 
 # ──────────────────────────────────────────────────────────────────────────
diff --git a/tests/report/test_sprint46_stratification_html.py b/tests/report/test_sprint46_stratification_html.py
index 006fff05b0f6dbec405027d04cf539d8912d497c..24bf5493b6f23d036b9e1075946a672521cd480d 100644
--- a/tests/report/test_sprint46_stratification_html.py
+++ b/tests/report/test_sprint46_stratification_html.py
@@ -31,7 +31,7 @@ from picarones.measurements.narrative.detectors import detect_stratification_rec
 from picarones.core.facts import FactImportance, FactType
 from picarones.measurements.narrative.renderer import extract_numbers, render_fact
 from picarones.core.results import DocumentResult
-from picarones.report.generator import ReportGenerator
+from picarones.reports_v2.html.generator import ReportGenerator
 from picarones.reports_v2.html.renderers.stratification import build_stratified_ranking_html
 
 
diff --git a/tests/report/test_sprint5_advanced_metrics.py b/tests/report/test_sprint5_advanced_metrics.py
index 4c1b7b2d4fa910567eaa0cc4c81ac5e900e483ba..1cf92cbcfbb9c3edef072d68e8817628cd211ea5 100644
--- a/tests/report/test_sprint5_advanced_metrics.py
+++ b/tests/report/test_sprint5_advanced_metrics.py
@@ -759,7 +759,7 @@ class TestReportSprint5:
 
     def test_report_data_has_ligature_score(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         bm = generate_sample_benchmark()
         data = _build_report_data(bm, {})
         for eng in data["engines"]:
@@ -767,7 +767,7 @@ class TestReportSprint5:
 
     def test_report_data_has_diacritic_score(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         bm = generate_sample_benchmark()
         data = _build_report_data(bm, {})
         for eng in data["engines"]:
@@ -775,7 +775,7 @@ class TestReportSprint5:
 
     def test_report_data_has_aggregated_taxonomy(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         bm = generate_sample_benchmark()
         data = _build_report_data(bm, {})
         for eng in data["engines"]:
@@ -783,7 +783,7 @@ class TestReportSprint5:
 
     def test_report_data_has_aggregated_image_quality(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         bm = generate_sample_benchmark()
         data = _build_report_data(bm, {})
         for eng in data["engines"]:
@@ -791,7 +791,7 @@ class TestReportSprint5:
 
     def test_html_has_characters_tab(self, tmp_path):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark()
         out = tmp_path / "report.html"
         ReportGenerator(bm).generate(out)
@@ -800,7 +800,7 @@ class TestReportSprint5:
 
     def test_html_has_ligatures_column(self, tmp_path):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark()
         out = tmp_path / "report.html"
         ReportGenerator(bm).generate(out)
@@ -809,7 +809,7 @@ class TestReportSprint5:
 
     def test_html_has_diacritiques_column(self, tmp_path):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark()
         out = tmp_path / "report.html"
         ReportGenerator(bm).generate(out)
@@ -818,7 +818,7 @@ class TestReportSprint5:
 
     def test_html_has_scatter_plot(self, tmp_path):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark()
         out = tmp_path / "report.html"
         ReportGenerator(bm).generate(out)
@@ -827,7 +827,7 @@ class TestReportSprint5:
 
     def test_html_has_taxonomy_chart(self, tmp_path):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark()
         out = tmp_path / "report.html"
         ReportGenerator(bm).generate(out)
@@ -836,7 +836,7 @@ class TestReportSprint5:
 
     def test_html_has_confusion_heatmap(self, tmp_path):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
         bm = generate_sample_benchmark()
         out = tmp_path / "report.html"
         ReportGenerator(bm).generate(out)
@@ -845,7 +845,7 @@ class TestReportSprint5:
 
     def test_doc_results_have_image_quality_in_report(self):
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import _build_report_data
+        from picarones.reports_v2.html.generator import _build_report_data
         bm = generate_sample_benchmark()
         data = _build_report_data(bm, {})
         doc = data["documents"][0]
diff --git a/tests/report/test_sprint7_advanced_report.py b/tests/report/test_sprint7_advanced_report.py
index cb2c1c284a71eb0af1ede1a4d88ee936e659f637..a23900ebc3ff33f02e45989a70acd9677de5e035 100644
--- a/tests/report/test_sprint7_advanced_report.py
+++ b/tests/report/test_sprint7_advanced_report.py
@@ -31,14 +31,14 @@ def sample_benchmark_s7():
 
 @pytest.fixture
 def report_data_s7(sample_benchmark_s7):
-    from picarones.report.generator import _build_report_data
+    from picarones.reports_v2.html.generator import _build_report_data
     imgs = sample_benchmark_s7.metadata.get("_images_b64", {})
     return _build_report_data(sample_benchmark_s7, imgs)
 
 
 @pytest.fixture
 def html_s7(sample_benchmark_s7):
-    from picarones.report.generator import ReportGenerator
+    from picarones.reports_v2.html.generator import ReportGenerator
     import tempfile
     import pathlib
     gen = ReportGenerator(sample_benchmark_s7)
diff --git a/tests/report/test_views.py b/tests/report/test_views.py
index e3f11421eaf08708877627a73877e243faafb29a..8f56aded459b41460ebcd9c1aa3beb50916f7f5d 100644
--- a/tests/report/test_views.py
+++ b/tests/report/test_views.py
@@ -345,11 +345,11 @@ class TestGeneratorWiring:
         une docstring) et trop liée à la forme du code.
         """
         from picarones.fixtures import generate_sample_benchmark
-        from picarones.report.generator import ReportGenerator
+        from picarones.reports_v2.html.generator import ReportGenerator
 
         bench = generate_sample_benchmark()
         gen = ReportGenerator(bench, lang="fr")
-        from picarones.i18n import get_labels
+        from picarones.reports_v2.i18n import get_labels
 
         report_data = {
             "engines": [],
@@ -381,7 +381,7 @@ class TestGeneratorWiring:
 
         tpl_src = (
             Path(__file__).parent.parent.parent
-            / "picarones" / "report" / "templates" / "view_analyses.html"
+            / "picarones" / "reports_v2" / "html" / "templates" / "view_analyses.html"
         ).read_text(encoding="utf-8")
         assert "{% if economics_view_html %}" in tpl_src
         assert "{% if advanced_taxonomy_view_html %}" in tpl_src
diff --git a/tests/web/test_sprint28_ux_save_compare.py b/tests/web/test_sprint28_ux_save_compare.py
index c8125aa78176428e81550a25cbd5f1186f331e0f..3a81338b6ff3a3302f9e6f4afe80e88fbcb62515 100644
--- a/tests/web/test_sprint28_ux_save_compare.py
+++ b/tests/web/test_sprint28_ux_save_compare.py
@@ -46,7 +46,7 @@ def _benchmark_json(engines_to_cer: dict[str, float], **extra) -> dict:
 
 class TestCompareBenchmarks:
     def test_identical_runs_no_regression(self):
-        from picarones.report.comparison import compare_benchmarks
+        from picarones.reports_v2.html.comparison import compare_benchmarks
         a = _benchmark_json({"tesseract": 0.05, "pero": 0.07})
         b = _benchmark_json({"tesseract": 0.05, "pero": 0.07})
         diff = compare_benchmarks(a, b, threshold=0.005)
@@ -54,7 +54,7 @@ class TestCompareBenchmarks:
         assert all(not d.is_regression and not d.is_improvement for d in diff.deltas)
 
     def test_regression_detected_above_threshold(self):
-        from picarones.report.comparison import compare_benchmarks, detect_regressions
+        from picarones.reports_v2.html.comparison import compare_benchmarks, detect_regressions
         a = _benchmark_json({"tesseract": 0.05})
         b = _benchmark_json({"tesseract": 0.06})  # +1 pp
         diff = compare_benchmarks(a, b, threshold=0.005)
@@ -64,7 +64,7 @@ class TestCompareBenchmarks:
         assert regs[0].delta_cer == pytest.approx(0.01, abs=1e-9)
 
     def test_improvement_detected_below_threshold(self):
-        from picarones.report.comparison import compare_benchmarks
+        from picarones.reports_v2.html.comparison import compare_benchmarks
         a = _benchmark_json({"tesseract": 0.05})
         b = _benchmark_json({"tesseract": 0.04})  # -1 pp
         diff = compare_benchmarks(a, b, threshold=0.005)
@@ -72,14 +72,14 @@ class TestCompareBenchmarks:
         assert not diff.deltas[0].is_regression
 
     def test_below_threshold_is_stable(self):
-        from picarones.report.comparison import compare_benchmarks
+        from picarones.reports_v2.html.comparison import compare_benchmarks
         a = _benchmark_json({"tesseract": 0.05})
         b = _benchmark_json({"tesseract": 0.052})  # +0.2 pp, sous le seuil 0.5 pp
         diff = compare_benchmarks(a, b, threshold=0.005)
         assert not diff.deltas[0].is_regression
 
     def test_engines_only_in_one_side(self):
-        from picarones.report.comparison import compare_benchmarks
+        from picarones.reports_v2.html.comparison import compare_benchmarks
         a = _benchmark_json({"tesseract": 0.05, "pero": 0.07})
         b = _benchmark_json({"tesseract": 0.05, "kraken": 0.06})
         diff = compare_benchmarks(a, b, threshold=0.005)
@@ -88,7 +88,7 @@ class TestCompareBenchmarks:
         assert {d.engine for d in diff.deltas} == {"tesseract"}
 
     def test_none_cer_does_not_raise(self):
-        from picarones.report.comparison import compare_benchmarks
+        from picarones.reports_v2.html.comparison import compare_benchmarks
         a = _benchmark_json({"tesseract": None})
         b = _benchmark_json({"tesseract": 0.05})
         diff = compare_benchmarks(a, b)
@@ -96,7 +96,7 @@ class TestCompareBenchmarks:
         assert not diff.deltas[0].is_regression
 
     def test_regressions_sorted_by_severity(self):
-        from picarones.report.comparison import compare_benchmarks
+        from picarones.reports_v2.html.comparison import compare_benchmarks
         a = _benchmark_json({"a": 0.05, "b": 0.05, "c": 0.05})
         b = _benchmark_json({"a": 0.07, "b": 0.10, "c": 0.06})  # b plus grave
         diff = compare_benchmarks(a, b, threshold=0.005)
@@ -105,7 +105,7 @@ class TestCompareBenchmarks:
         assert engines_in_order.index("b") < engines_in_order.index("a")
 
     def test_loads_from_file_path(self, tmp_path):
-        from picarones.report.comparison import compare_benchmarks
+        from picarones.reports_v2.html.comparison import compare_benchmarks
         a_path = tmp_path / "a.json"
         b_path = tmp_path / "b.json"
         a_path.write_text(json.dumps(_benchmark_json({"tesseract": 0.05})))
@@ -116,7 +116,7 @@ class TestCompareBenchmarks:
 
 class TestRenderComparisonHTML:
     def test_html_is_self_contained_and_named(self, tmp_path):
-        from picarones.report.comparison import compare_benchmarks, render_comparison_html
+        from picarones.reports_v2.html.comparison import compare_benchmarks, render_comparison_html
         a = _benchmark_json({"tesseract": 0.05})
         b = _benchmark_json({"tesseract": 0.07})
         diff = compare_benchmarks(a, b, label_a="V1", label_b="V2")