Spaces:
Sleeping
fix(rapport): images base64 auto-encodées + diff côte à côte char-level
Browse filesProblème 1 — Images non affichées (Galerie & vue Document)
- Ajoute GET /api/corpus/image/{upload_id}/{filename} pour servir les
images upload depuis le navigateur (usage live dans le SPA)
- Ajoute _encode_image_b64() / _encode_images_b64_from_result() qui
redimensionnent à 1200 px max et encodent en base64
- ReportGenerator.generate() auto-encode désormais toutes les images
depuis leur image_path si aucun images_b64 n'est fourni → rapport
HTML auto-contenu, fonctionnel hors-ligne et sur HuggingFace
Problème 2 — Vue Document : diff côte à côte avec surlignage
- Remplace les panneaux diff actuels par deux colonnes synchronisées :
· Gauche (GT) : caractères manquants dans l'OCR en orange (.d-miss)
· Droite (OCR) : caractères erronés vs GT en rouge (.d-err),
insertions en vert (.d-ins-ocr)
- Diff calculé au niveau caractère (compute_char_diff) pour une
précision maximale sur les erreurs de transcription
- Sélecteur de concurrent si plusieurs moteurs ; GT reste fixe à gauche
- En-tête de chaque colonne affiche nom du moteur + badge CER
- Pipeline triple-diff (GT→OCR brut / OCR brut→LLM) conservé en pied
de carte pour les pipelines OCR+LLM
https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq
- picarones/report/generator.py +239 -99
- picarones/web/app.py +18 -0
|
@@ -15,13 +15,15 @@ Vues disponibles
|
|
| 15 |
|
| 16 |
from __future__ import annotations
|
| 17 |
|
|
|
|
|
|
|
| 18 |
import json
|
| 19 |
import math
|
| 20 |
from pathlib import Path
|
| 21 |
from typing import Optional
|
| 22 |
|
| 23 |
from picarones.core.results import BenchmarkResult
|
| 24 |
-
from picarones.report.diff_utils import compute_word_diff
|
| 25 |
from picarones.core.statistics import (
|
| 26 |
compute_pairwise_stats,
|
| 27 |
compute_reliability_curve,
|
|
@@ -37,6 +39,50 @@ from picarones.core.difficulty import compute_all_difficulties, difficulty_label
|
|
| 37 |
# Helpers
|
| 38 |
# ---------------------------------------------------------------------------
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def _cer_color(cer: float) -> str:
|
| 41 |
"""Retourne une couleur CSS pour un score CER donné (0→vert, 1→rouge)."""
|
| 42 |
if cer < 0.05:
|
|
@@ -152,7 +198,7 @@ def _build_report_data(benchmark: BenchmarkResult, images_b64: dict[str, str]) -
|
|
| 152 |
continue
|
| 153 |
gt = dr.ground_truth
|
| 154 |
image_path = dr.image_path
|
| 155 |
-
diff_ops =
|
| 156 |
er_entry: dict = {
|
| 157 |
"engine": engine_name,
|
| 158 |
"hypothesis": dr.hypothesis,
|
|
@@ -692,25 +738,50 @@ tbody tr:hover {{ background: #f8fafc; }}
|
|
| 692 |
.d-rep-old {{ color: var(--del); background: var(--del-bg); border-radius: 2px 0 0 2px; padding: 0 1px; text-decoration: line-through; }}
|
| 693 |
.d-rep-new {{ color: var(--rep); background: var(--rep-bg); border-radius: 0 2px 2px 0; padding: 0 1px; }}
|
| 694 |
|
| 695 |
-
/*
|
| 696 |
-
.
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
border-radius: var(--radius);
|
| 700 |
-
overflow: hidden;
|
| 701 |
}}
|
| 702 |
-
.
|
| 703 |
-
|
| 704 |
-
background: #f0fdf4;
|
| 705 |
-
border-bottom: 1px solid #bbf7d0;
|
| 706 |
-
font-size: .83rem; font-weight: 700; color: #15803d;
|
| 707 |
}}
|
| 708 |
-
.
|
| 709 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
font-family: 'Georgia', serif;
|
| 711 |
-
max-height:
|
| 712 |
-
color: var(--text);
|
| 713 |
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
|
| 715 |
/* ── Analyses ─────────────────────────────────────────────────────── */
|
| 716 |
.charts-grid {{
|
|
@@ -1058,19 +1129,32 @@ body.present-mode nav .meta {{ display: none; }}
|
|
| 1058 |
</div>
|
| 1059 |
</div>
|
| 1060 |
|
| 1061 |
-
<!--
|
| 1062 |
-
<div class="card">
|
| 1063 |
-
<
|
| 1064 |
-
|
| 1065 |
-
<div class="
|
| 1066 |
-
|
|
|
|
|
|
|
| 1067 |
</div>
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1074 |
</div>
|
| 1075 |
|
| 1076 |
<!-- Sprint 10 — Distribution CER par ligne -->
|
|
@@ -1351,6 +1435,113 @@ function renderDiff(ops) {{
|
|
| 1351 |
}}).join(' ');
|
| 1352 |
}}
|
| 1353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1354 |
// ── Score badge (ligatures / diacritiques) ───────────────────────
|
| 1355 |
function _scoreBadge(v, label) {{
|
| 1356 |
if (v === null || v === undefined) return '<span style="color:var(--text-muted)">—</span>';
|
|
@@ -1620,75 +1811,19 @@ function loadDocument(docId) {{
|
|
| 1620 |
placeholder.innerHTML = `<span style="font-size:2rem">🖹</span><span>${{esc(doc.image_path)}}</span>`;
|
| 1621 |
}}
|
| 1622 |
|
| 1623 |
-
//
|
| 1624 |
-
document.getElementById('
|
| 1625 |
-
|
| 1626 |
-
|
| 1627 |
-
|
| 1628 |
-
|
| 1629 |
-
|
| 1630 |
-
|
| 1631 |
-
|
| 1632 |
-
|
| 1633 |
-
|
| 1634 |
-
|
| 1635 |
-
|
| 1636 |
-
const pipeTagPanel = isPipeline
|
| 1637 |
-
? `<span class="pipeline-tag">⛓ ${{modeLabel || 'pipeline'}}</span>` : '';
|
| 1638 |
-
|
| 1639 |
-
// Sur-normalisation (classe 10)
|
| 1640 |
-
let onBadge = '';
|
| 1641 |
-
if (er.over_normalization) {{
|
| 1642 |
-
const on = er.over_normalization;
|
| 1643 |
-
const onPct = (on.score * 100).toFixed(2);
|
| 1644 |
-
const cls = on.score > 0.05 ? 'over-norm-badge high' : 'over-norm-badge';
|
| 1645 |
-
onBadge = `<span class="${{cls}}" title="Classe 10 — sur-normalisation LLM">Sur-norm. ${{onPct}}%</span>`;
|
| 1646 |
-
}}
|
| 1647 |
-
|
| 1648 |
-
// Triple-diff (vue spécifique pipeline) : OCR brut / Correction LLM
|
| 1649 |
-
let tripleDiffHtml = '';
|
| 1650 |
-
if (isPipeline && er.ocr_intermediate) {{
|
| 1651 |
-
const ocrDiffHtml = renderDiff(er.ocr_diff);
|
| 1652 |
-
const llmDiffHtml = renderDiff(er.llm_correction_diff);
|
| 1653 |
-
tripleDiffHtml = `
|
| 1654 |
-
<div class="triple-diff-wrap">
|
| 1655 |
-
<div class="triple-diff-section">
|
| 1656 |
-
<h5>GT → OCR brut</h5>
|
| 1657 |
-
${{ocrDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
|
| 1658 |
-
</div>
|
| 1659 |
-
<div class="triple-diff-section">
|
| 1660 |
-
<h5>OCR brut → Correction LLM</h5>
|
| 1661 |
-
${{llmDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
|
| 1662 |
-
</div>
|
| 1663 |
-
</div>`;
|
| 1664 |
-
}}
|
| 1665 |
-
|
| 1666 |
-
// CER diplomatique par document
|
| 1667 |
-
let diplomaBadge = '';
|
| 1668 |
-
if (er.cer_diplomatic !== null && er.cer_diplomatic !== undefined) {{
|
| 1669 |
-
const dipC = cerColor(er.cer_diplomatic); const dipB = cerBg(er.cer_diplomatic);
|
| 1670 |
-
const delta = er.cer - er.cer_diplomatic;
|
| 1671 |
-
const deltaHint = delta > 0.001 ? ` (−${{(delta*100).toFixed(1)}}% avec normalisation)` : '';
|
| 1672 |
-
diplomaBadge = `<span class="cer-badge" style="color:${{dipC}};background:${{dipB}};opacity:.85"
|
| 1673 |
-
title="CER diplomatique (ſ=s, u=v, i=j…)${{deltaHint}}">diplo. ${{pct(er.cer_diplomatic)}}</span>`;
|
| 1674 |
-
}}
|
| 1675 |
-
|
| 1676 |
-
return `<div class="diff-panel">
|
| 1677 |
-
<div class="diff-panel-header">
|
| 1678 |
-
<span class="diff-panel-title">${{esc(er.engine)}}</span>
|
| 1679 |
-
${{pipeTagPanel}}
|
| 1680 |
-
<span class="diff-panel-metrics">
|
| 1681 |
-
<span class="cer-badge" style="color:${{c}};background:${{bg}}">${{pct(er.cer)}}</span>
|
| 1682 |
-
${{diplomaBadge}}
|
| 1683 |
-
<span class="badge" style="background:#f1f5f9">WER ${{pct(er.wer)}}</span>
|
| 1684 |
-
${{onBadge}}
|
| 1685 |
-
${{errBadge}}
|
| 1686 |
-
</span>
|
| 1687 |
-
</div>
|
| 1688 |
-
<div class="diff-panel-body">${{diffHtml || '<em style="color:var(--text-muted)">Aucune sortie</em>'}}</div>
|
| 1689 |
-
${{tripleDiffHtml}}
|
| 1690 |
-
</div>`;
|
| 1691 |
-
}}).join('');
|
| 1692 |
|
| 1693 |
// ── Sprint 10 : distribution CER par ligne ──────────────────────────
|
| 1694 |
const lineCard = document.getElementById('doc-line-metrics-card');
|
|
@@ -2931,8 +3066,13 @@ class ReportGenerator:
|
|
| 2931 |
output_path = Path(output_path)
|
| 2932 |
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 2933 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2934 |
labels = get_labels(self.lang)
|
| 2935 |
-
report_data = _build_report_data(self.benchmark,
|
| 2936 |
report_json = json.dumps(report_data, ensure_ascii=False, separators=(",", ":"))
|
| 2937 |
i18n_json = json.dumps(labels, ensure_ascii=False, separators=(",", ":"))
|
| 2938 |
|
|
|
|
| 15 |
|
| 16 |
from __future__ import annotations
|
| 17 |
|
| 18 |
+
import base64
|
| 19 |
+
import io
|
| 20 |
import json
|
| 21 |
import math
|
| 22 |
from pathlib import Path
|
| 23 |
from typing import Optional
|
| 24 |
|
| 25 |
from picarones.core.results import BenchmarkResult
|
| 26 |
+
from picarones.report.diff_utils import compute_char_diff, compute_word_diff
|
| 27 |
from picarones.core.statistics import (
|
| 28 |
compute_pairwise_stats,
|
| 29 |
compute_reliability_curve,
|
|
|
|
| 39 |
# Helpers
|
| 40 |
# ---------------------------------------------------------------------------
|
| 41 |
|
| 42 |
+
def _encode_image_b64(image_path: str, max_width: int = 1200) -> str:
|
| 43 |
+
"""Lit une image, la redimensionne si besoin, et retourne un data-URI base64."""
|
| 44 |
+
try:
|
| 45 |
+
from PIL import Image
|
| 46 |
+
p = Path(image_path)
|
| 47 |
+
if not p.exists():
|
| 48 |
+
return ""
|
| 49 |
+
with Image.open(p) as img:
|
| 50 |
+
if img.width > max_width:
|
| 51 |
+
ratio = max_width / img.width
|
| 52 |
+
new_h = max(1, int(img.height * ratio))
|
| 53 |
+
img = img.resize((max_width, new_h), Image.LANCZOS)
|
| 54 |
+
# Convertir en RGB pour éviter les problèmes de mode (RGBA, palette…)
|
| 55 |
+
if img.mode not in ("RGB", "L"):
|
| 56 |
+
img = img.convert("RGB")
|
| 57 |
+
buf = io.BytesIO()
|
| 58 |
+
fmt = "JPEG" if p.suffix.lower() in (".jpg", ".jpeg") else "PNG"
|
| 59 |
+
img.save(buf, format=fmt, optimize=True, quality=85)
|
| 60 |
+
b64 = base64.b64encode(buf.getvalue()).decode("ascii")
|
| 61 |
+
mime = "image/jpeg" if fmt == "JPEG" else "image/png"
|
| 62 |
+
return f"data:{mime};base64,{b64}"
|
| 63 |
+
except Exception:
|
| 64 |
+
return ""
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _encode_images_b64_from_result(benchmark: "BenchmarkResult", max_width: int = 1200) -> dict[str, str]:
|
| 68 |
+
"""Encode toutes les images d'un BenchmarkResult en base64.
|
| 69 |
+
|
| 70 |
+
Returns
|
| 71 |
+
-------
|
| 72 |
+
dict
|
| 73 |
+
``{doc_id: data_uri}``
|
| 74 |
+
"""
|
| 75 |
+
images: dict[str, str] = {}
|
| 76 |
+
if not benchmark.engine_reports:
|
| 77 |
+
return images
|
| 78 |
+
for dr in benchmark.engine_reports[0].document_results:
|
| 79 |
+
if dr.image_path and dr.doc_id not in images:
|
| 80 |
+
uri = _encode_image_b64(dr.image_path, max_width=max_width)
|
| 81 |
+
if uri:
|
| 82 |
+
images[dr.doc_id] = uri
|
| 83 |
+
return images
|
| 84 |
+
|
| 85 |
+
|
| 86 |
def _cer_color(cer: float) -> str:
|
| 87 |
"""Retourne une couleur CSS pour un score CER donné (0→vert, 1→rouge)."""
|
| 88 |
if cer < 0.05:
|
|
|
|
| 198 |
continue
|
| 199 |
gt = dr.ground_truth
|
| 200 |
image_path = dr.image_path
|
| 201 |
+
diff_ops = compute_char_diff(dr.ground_truth, dr.hypothesis)
|
| 202 |
er_entry: dict = {
|
| 203 |
"engine": engine_name,
|
| 204 |
"hypothesis": dr.hypothesis,
|
|
|
|
| 738 |
.d-rep-old {{ color: var(--del); background: var(--del-bg); border-radius: 2px 0 0 2px; padding: 0 1px; text-decoration: line-through; }}
|
| 739 |
.d-rep-new {{ color: var(--rep); background: var(--rep-bg); border-radius: 0 2px 2px 0; padding: 0 1px; }}
|
| 740 |
|
| 741 |
+
/* Side-by-side diff */
|
| 742 |
+
.sbs-header {{
|
| 743 |
+
display: flex; align-items: center; justify-content: space-between;
|
| 744 |
+
flex-wrap: wrap; gap: .5rem; margin-bottom: .75rem;
|
|
|
|
|
|
|
| 745 |
}}
|
| 746 |
+
.sbs-engine-select {{
|
| 747 |
+
display: flex; align-items: center; gap: .4rem; font-size: .82rem;
|
|
|
|
|
|
|
|
|
|
| 748 |
}}
|
| 749 |
+
.sbs-engine-select select {{
|
| 750 |
+
border: 1px solid var(--border); border-radius: 4px;
|
| 751 |
+
padding: .2rem .4rem; font-size: .82rem; background: var(--surface);
|
| 752 |
+
}}
|
| 753 |
+
.sbs-columns {{
|
| 754 |
+
display: grid; grid-template-columns: 1fr 1fr; gap: .75rem;
|
| 755 |
+
}}
|
| 756 |
+
@media (max-width: 700px) {{
|
| 757 |
+
.sbs-columns {{ grid-template-columns: 1fr; }}
|
| 758 |
+
}}
|
| 759 |
+
.sbs-col {{
|
| 760 |
+
border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden;
|
| 761 |
+
}}
|
| 762 |
+
.sbs-col-header {{
|
| 763 |
+
padding: .45rem .75rem;
|
| 764 |
+
display: flex; align-items: center; justify-content: space-between; gap: .5rem;
|
| 765 |
+
font-size: .83rem; font-weight: 700;
|
| 766 |
+
}}
|
| 767 |
+
.sbs-gt-header {{
|
| 768 |
+
background: #f0fdf4; border-bottom: 1px solid #bbf7d0; color: #15803d;
|
| 769 |
+
}}
|
| 770 |
+
.sbs-ocr-header {{
|
| 771 |
+
background: #eff6ff; border-bottom: 1px solid #bfdbfe; color: #1d4ed8;
|
| 772 |
+
}}
|
| 773 |
+
.sbs-col-body {{
|
| 774 |
+
padding: .75rem; font-size: .82rem; line-height: 1.8;
|
| 775 |
font-family: 'Georgia', serif;
|
| 776 |
+
max-height: 340px; overflow-y: auto;
|
| 777 |
+
color: var(--text); white-space: pre-wrap; word-break: break-word;
|
| 778 |
}}
|
| 779 |
+
/* Caractères manquants dans GT (orange) */
|
| 780 |
+
.d-miss {{ color: #92400e; background: #fef3c7; border-radius: 2px; padding: 0 1px; }}
|
| 781 |
+
/* Caractères erronés dans OCR (rouge) */
|
| 782 |
+
.d-err {{ color: var(--del); background: var(--del-bg); border-radius: 2px; padding: 0 1px; }}
|
| 783 |
+
/* Insertions dans OCR (vert) */
|
| 784 |
+
.d-ins-ocr {{ color: var(--ins); background: var(--ins-bg); border-radius: 2px; padding: 0 1px; }}
|
| 785 |
|
| 786 |
/* ── Analyses ─────────────────────────────────────────────────────── */
|
| 787 |
.charts-grid {{
|
|
|
|
| 1129 |
</div>
|
| 1130 |
</div>
|
| 1131 |
|
| 1132 |
+
<!-- Diff côte à côte GT / OCR -->
|
| 1133 |
+
<div class="card" id="doc-sidebyside-card">
|
| 1134 |
+
<div class="sbs-header">
|
| 1135 |
+
<h3 data-i18n="h_diff">Comparaison GT / OCR</h3>
|
| 1136 |
+
<div class="sbs-engine-select" id="sbs-engine-select" style="display:none">
|
| 1137 |
+
<label data-i18n="sbs_engine_label">Concurrent :</label>
|
| 1138 |
+
<select id="sbs-engine-dropdown" onchange="renderSideBySide(currentDocId)"></select>
|
| 1139 |
+
</div>
|
| 1140 |
</div>
|
| 1141 |
+
<div class="sbs-columns" id="sbs-columns">
|
| 1142 |
+
<div class="sbs-col sbs-col-gt">
|
| 1143 |
+
<div class="sbs-col-header sbs-gt-header">
|
| 1144 |
+
<span>✓ Vérité terrain (GT)</span>
|
| 1145 |
+
</div>
|
| 1146 |
+
<div class="sbs-col-body" id="sbs-gt-body">—</div>
|
| 1147 |
+
</div>
|
| 1148 |
+
<div class="sbs-col sbs-col-ocr">
|
| 1149 |
+
<div class="sbs-col-header sbs-ocr-header" id="sbs-ocr-header">
|
| 1150 |
+
<span id="sbs-ocr-engine-name">OCR</span>
|
| 1151 |
+
<span class="cer-badge" id="sbs-ocr-cer" style="display:none"></span>
|
| 1152 |
+
</div>
|
| 1153 |
+
<div class="sbs-col-body" id="sbs-ocr-body">—</div>
|
| 1154 |
+
</div>
|
| 1155 |
+
</div>
|
| 1156 |
+
<!-- Pipeline triple-diff (affiché en dessous si applicable) -->
|
| 1157 |
+
<div id="sbs-triple-diff" style="display:none"></div>
|
| 1158 |
</div>
|
| 1159 |
|
| 1160 |
<!-- Sprint 10 — Distribution CER par ligne -->
|
|
|
|
| 1435 |
}}).join(' ');
|
| 1436 |
}}
|
| 1437 |
|
| 1438 |
+
// ── Rendu côte à côte (char-level) ──────────────────────────────────
|
| 1439 |
+
function renderSideBySide(docId) {{
|
| 1440 |
+
const doc = DATA.documents.find(d => d.doc_id === docId);
|
| 1441 |
+
if (!doc) return;
|
| 1442 |
+
|
| 1443 |
+
const sel = document.getElementById('sbs-engine-dropdown');
|
| 1444 |
+
const engineIdx = sel && sel.value !== '' ? parseInt(sel.value, 10) : 0;
|
| 1445 |
+
const er = doc.engine_results[engineIdx];
|
| 1446 |
+
if (!er) return;
|
| 1447 |
+
|
| 1448 |
+
const ops = er.diff || [];
|
| 1449 |
+
|
| 1450 |
+
// Construire le HTML GT (gauche) et OCR (droite) depuis les mêmes ops
|
| 1451 |
+
let gtHtml = '', ocrHtml = '';
|
| 1452 |
+
ops.forEach(op => {{
|
| 1453 |
+
if (op.op === 'equal') {{
|
| 1454 |
+
const t = esc(op.text);
|
| 1455 |
+
gtHtml += t;
|
| 1456 |
+
ocrHtml += t;
|
| 1457 |
+
}} else if (op.op === 'delete') {{
|
| 1458 |
+
// Présent dans GT, absent de l'OCR → orange dans GT
|
| 1459 |
+
gtHtml += '<span class="d-miss" title="Absent de l\'OCR">' + esc(op.text) + '</span>';
|
| 1460 |
+
}} else if (op.op === 'insert') {{
|
| 1461 |
+
// Présent dans OCR, absent du GT → vert dans OCR
|
| 1462 |
+
ocrHtml += '<span class="d-ins-ocr" title="Insertion OCR">' + esc(op.text) + '</span>';
|
| 1463 |
+
}} else if (op.op === 'replace') {{
|
| 1464 |
+
// Substitution : orange dans GT, rouge dans OCR
|
| 1465 |
+
gtHtml += '<span class="d-miss" title="Différent dans l\'OCR">' + esc(op.old) + '</span>';
|
| 1466 |
+
ocrHtml += '<span class="d-err" title="Différent du GT">' + esc(op.new) + '</span>';
|
| 1467 |
+
}}
|
| 1468 |
+
}});
|
| 1469 |
+
|
| 1470 |
+
document.getElementById('sbs-gt-body').innerHTML = gtHtml || '<em style="color:var(--text-muted)">—</em>';
|
| 1471 |
+
document.getElementById('sbs-ocr-body').innerHTML = ocrHtml || '<em style="color:var(--text-muted)">Aucune sortie</em>';
|
| 1472 |
+
|
| 1473 |
+
// En-tête OCR : nom moteur + CER
|
| 1474 |
+
const c = cerColor(er.cer); const bg = cerBg(er.cer);
|
| 1475 |
+
document.getElementById('sbs-ocr-engine-name').textContent = er.engine;
|
| 1476 |
+
const cerBadgeEl = document.getElementById('sbs-ocr-cer');
|
| 1477 |
+
cerBadgeEl.textContent = pct(er.cer);
|
| 1478 |
+
cerBadgeEl.style.cssText = `color:${{c}};background:${{bg}};display:inline-block`;
|
| 1479 |
+
|
| 1480 |
+
// Pipeline triple-diff (si applicable)
|
| 1481 |
+
const tripleEl = document.getElementById('sbs-triple-diff');
|
| 1482 |
+
if (er.ocr_intermediate) {{
|
| 1483 |
+
const ocrDiffHtml = renderDiff(er.ocr_diff);
|
| 1484 |
+
const llmDiffHtml = renderDiff(er.llm_correction_diff);
|
| 1485 |
+
const isPipeline = er.ocr_intermediate !== undefined;
|
| 1486 |
+
const modeLabel = {{text_only:'texte seul', text_and_image:'image+texte', zero_shot:'zero-shot'}}[er.pipeline_mode] || '';
|
| 1487 |
+
const pipeTag = `<span class="pipeline-tag">⛓ ${{modeLabel || 'pipeline'}}</span>`;
|
| 1488 |
+
let onBadge = '';
|
| 1489 |
+
if (er.over_normalization) {{
|
| 1490 |
+
const on = er.over_normalization;
|
| 1491 |
+
const onPct = (on.score * 100).toFixed(2);
|
| 1492 |
+
const cls = on.score > 0.05 ? 'over-norm-badge high' : 'over-norm-badge';
|
| 1493 |
+
onBadge = `<span class="${{cls}}" title="Classe 10 — sur-normalisation LLM">Sur-norm. ${{onPct}}%</span>`;
|
| 1494 |
+
}}
|
| 1495 |
+
let diplomaBadge = '';
|
| 1496 |
+
if (er.cer_diplomatic !== null && er.cer_diplomatic !== undefined) {{
|
| 1497 |
+
const dipC = cerColor(er.cer_diplomatic); const dipB = cerBg(er.cer_diplomatic);
|
| 1498 |
+
const delta = er.cer - er.cer_diplomatic;
|
| 1499 |
+
const deltaHint = delta > 0.001 ? ` (−${{(delta*100).toFixed(1)}}% avec normalisation)` : '';
|
| 1500 |
+
diplomaBadge = `<span class="cer-badge" style="color:${{dipC}};background:${{dipB}};opacity:.85"
|
| 1501 |
+
title="CER diplomatique${{deltaHint}}">diplo. ${{pct(er.cer_diplomatic)}}</span>`;
|
| 1502 |
+
}}
|
| 1503 |
+
tripleEl.style.display = '';
|
| 1504 |
+
tripleEl.innerHTML = `
|
| 1505 |
+
<div style="margin-top:.75rem;padding-top:.75rem;border-top:1px solid var(--border)">
|
| 1506 |
+
<div style="display:flex;align-items:center;gap:.4rem;margin-bottom:.5rem;font-size:.83rem;font-weight:600">
|
| 1507 |
+
${{pipeTag}} ${{diplomaBadge}} ${{onBadge}}
|
| 1508 |
+
<span class="badge" style="background:#f1f5f9">WER ${{pct(er.wer)}}</span>
|
| 1509 |
+
</div>
|
| 1510 |
+
<div class="triple-diff-wrap">
|
| 1511 |
+
<div class="triple-diff-section">
|
| 1512 |
+
<h5>GT → OCR brut</h5>
|
| 1513 |
+
${{ocrDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
|
| 1514 |
+
</div>
|
| 1515 |
+
<div class="triple-diff-section">
|
| 1516 |
+
<h5>OCR brut → Correction LLM</h5>
|
| 1517 |
+
${{llmDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
|
| 1518 |
+
</div>
|
| 1519 |
+
</div>
|
| 1520 |
+
</div>`;
|
| 1521 |
+
}} else {{
|
| 1522 |
+
// Afficher WER / CER diplomatique même hors pipeline
|
| 1523 |
+
let diplomaBadge = '';
|
| 1524 |
+
if (er.cer_diplomatic !== null && er.cer_diplomatic !== undefined) {{
|
| 1525 |
+
const dipC = cerColor(er.cer_diplomatic); const dipB = cerBg(er.cer_diplomatic);
|
| 1526 |
+
const delta = er.cer - er.cer_diplomatic;
|
| 1527 |
+
const deltaHint = delta > 0.001 ? ` (−${{(delta*100).toFixed(1)}}% avec normalisation)` : '';
|
| 1528 |
+
diplomaBadge = `<span class="cer-badge" style="color:${{dipC}};background:${{dipB}};opacity:.85"
|
| 1529 |
+
title="CER diplomatique${{deltaHint}}">diplo. ${{pct(er.cer_diplomatic)}}</span>`;
|
| 1530 |
+
}}
|
| 1531 |
+
const errBadge = er.error ? `<span class="badge" style="background:#fee2e2;color:#dc2626">Erreur</span>` : '';
|
| 1532 |
+
if (diplomaBadge || errBadge) {{
|
| 1533 |
+
tripleEl.style.display = '';
|
| 1534 |
+
tripleEl.innerHTML = `<div style="margin-top:.5rem;display:flex;gap:.4rem;flex-wrap:wrap;font-size:.82rem">
|
| 1535 |
+
<span class="badge" style="background:#f1f5f9">WER ${{pct(er.wer)}}</span>
|
| 1536 |
+
${{diplomaBadge}} ${{errBadge}}
|
| 1537 |
+
</div>`;
|
| 1538 |
+
}} else {{
|
| 1539 |
+
tripleEl.style.display = 'none';
|
| 1540 |
+
tripleEl.innerHTML = '';
|
| 1541 |
+
}}
|
| 1542 |
+
}}
|
| 1543 |
+
}}
|
| 1544 |
+
|
| 1545 |
// ── Score badge (ligatures / diacritiques) ───────────────────────
|
| 1546 |
function _scoreBadge(v, label) {{
|
| 1547 |
if (v === null || v === undefined) return '<span style="color:var(--text-muted)">—</span>';
|
|
|
|
| 1811 |
placeholder.innerHTML = `<span style="font-size:2rem">🖹</span><span>${{esc(doc.image_path)}}</span>`;
|
| 1812 |
}}
|
| 1813 |
|
| 1814 |
+
// Side-by-side diff — sélecteur de concurrent
|
| 1815 |
+
const selWrap = document.getElementById('sbs-engine-select');
|
| 1816 |
+
const sel = document.getElementById('sbs-engine-dropdown');
|
| 1817 |
+
if (doc.engine_results.length > 1) {{
|
| 1818 |
+
sel.innerHTML = doc.engine_results.map((er, i) =>
|
| 1819 |
+
`<option value="${{i}}">${{esc(er.engine)}}</option>`
|
| 1820 |
+
).join('');
|
| 1821 |
+
selWrap.style.display = '';
|
| 1822 |
+
}} else {{
|
| 1823 |
+
sel.innerHTML = '';
|
| 1824 |
+
selWrap.style.display = 'none';
|
| 1825 |
+
}}
|
| 1826 |
+
renderSideBySide(docId);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1827 |
|
| 1828 |
// ── Sprint 10 : distribution CER par ligne ──────────────────────────
|
| 1829 |
const lineCard = document.getElementById('doc-line-metrics-card');
|
|
|
|
| 3066 |
output_path = Path(output_path)
|
| 3067 |
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 3068 |
|
| 3069 |
+
# Auto-encoder les images si aucune n'est fournie
|
| 3070 |
+
images_b64 = self.images_b64
|
| 3071 |
+
if not images_b64:
|
| 3072 |
+
images_b64 = _encode_images_b64_from_result(self.benchmark)
|
| 3073 |
+
|
| 3074 |
labels = get_labels(self.lang)
|
| 3075 |
+
report_data = _build_report_data(self.benchmark, images_b64)
|
| 3076 |
report_json = json.dumps(report_data, ensure_ascii=False, separators=(",", ":"))
|
| 3077 |
i18n_json = json.dumps(labels, ensure_ascii=False, separators=(",", ":"))
|
| 3078 |
|
|
@@ -733,6 +733,24 @@ async def api_corpus_uploads() -> dict:
|
|
| 733 |
return {"uploads": uploads}
|
| 734 |
|
| 735 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 736 |
@app.delete("/api/corpus/uploads/{corpus_id}")
|
| 737 |
async def api_corpus_delete(corpus_id: str) -> dict:
|
| 738 |
"""Supprime un corpus uploadé."""
|
|
|
|
| 733 |
return {"uploads": uploads}
|
| 734 |
|
| 735 |
|
| 736 |
+
@app.get("/api/corpus/image/{upload_id}/{filename}")
|
| 737 |
+
async def api_corpus_image(upload_id: str, filename: str) -> FileResponse:
|
| 738 |
+
"""Sert une image depuis le dossier d'upload."""
|
| 739 |
+
# Sécurité : interdire les path traversal
|
| 740 |
+
if "/" in upload_id or "\\" in upload_id or ".." in upload_id:
|
| 741 |
+
raise HTTPException(status_code=400, detail="upload_id invalide")
|
| 742 |
+
if "/" in filename or "\\" in filename or ".." in filename:
|
| 743 |
+
raise HTTPException(status_code=400, detail="filename invalide")
|
| 744 |
+
image_path = _UPLOADS_DIR / upload_id / filename
|
| 745 |
+
if not image_path.exists() or not image_path.is_file():
|
| 746 |
+
raise HTTPException(status_code=404, detail="Image non trouvée")
|
| 747 |
+
suffix = image_path.suffix.lower()
|
| 748 |
+
media_types = {".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png",
|
| 749 |
+
".tif": "image/tiff", ".tiff": "image/tiff", ".webp": "image/webp"}
|
| 750 |
+
media_type = media_types.get(suffix, "application/octet-stream")
|
| 751 |
+
return FileResponse(str(image_path), media_type=media_type)
|
| 752 |
+
|
| 753 |
+
|
| 754 |
@app.delete("/api/corpus/uploads/{corpus_id}")
|
| 755 |
async def api_corpus_delete(corpus_id: str) -> dict:
|
| 756 |
"""Supprime un corpus uploadé."""
|