Claude commited on
Commit
549b13c
·
unverified ·
1 Parent(s): aebf6b8

fix(rapport): images base64 auto-encodées + diff côte à côte char-level

Browse files

Problème 1 — Images non affichées (Galerie & vue Document)
- Ajoute GET /api/corpus/image/{upload_id}/{filename} pour servir les
images upload depuis le navigateur (usage live dans le SPA)
- Ajoute _encode_image_b64() / _encode_images_b64_from_result() qui
redimensionnent à 1200 px max et encodent en base64
- ReportGenerator.generate() auto-encode désormais toutes les images
depuis leur image_path si aucun images_b64 n'est fourni → rapport
HTML auto-contenu, fonctionnel hors-ligne et sur HuggingFace

Problème 2 — Vue Document : diff côte à côte avec surlignage
- Remplace les panneaux diff actuels par deux colonnes synchronisées :
· Gauche (GT) : caractères manquants dans l'OCR en orange (.d-miss)
· Droite (OCR) : caractères erronés vs GT en rouge (.d-err),
insertions en vert (.d-ins-ocr)
- Diff calculé au niveau caractère (compute_char_diff) pour une
précision maximale sur les erreurs de transcription
- Sélecteur de concurrent si plusieurs moteurs ; GT reste fixe à gauche
- En-tête de chaque colonne affiche nom du moteur + badge CER
- Pipeline triple-diff (GT→OCR brut / OCR brut→LLM) conservé en pied
de carte pour les pipelines OCR+LLM

https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq

Files changed (2) hide show
  1. picarones/report/generator.py +239 -99
  2. picarones/web/app.py +18 -0
picarones/report/generator.py CHANGED
@@ -15,13 +15,15 @@ Vues disponibles
15
 
16
  from __future__ import annotations
17
 
 
 
18
  import json
19
  import math
20
  from pathlib import Path
21
  from typing import Optional
22
 
23
  from picarones.core.results import BenchmarkResult
24
- from picarones.report.diff_utils import compute_word_diff
25
  from picarones.core.statistics import (
26
  compute_pairwise_stats,
27
  compute_reliability_curve,
@@ -37,6 +39,50 @@ from picarones.core.difficulty import compute_all_difficulties, difficulty_label
37
  # Helpers
38
  # ---------------------------------------------------------------------------
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def _cer_color(cer: float) -> str:
41
  """Retourne une couleur CSS pour un score CER donné (0→vert, 1→rouge)."""
42
  if cer < 0.05:
@@ -152,7 +198,7 @@ def _build_report_data(benchmark: BenchmarkResult, images_b64: dict[str, str]) -
152
  continue
153
  gt = dr.ground_truth
154
  image_path = dr.image_path
155
- diff_ops = compute_word_diff(dr.ground_truth, dr.hypothesis)
156
  er_entry: dict = {
157
  "engine": engine_name,
158
  "hypothesis": dr.hypothesis,
@@ -692,25 +738,50 @@ tbody tr:hover {{ background: #f8fafc; }}
692
  .d-rep-old {{ color: var(--del); background: var(--del-bg); border-radius: 2px 0 0 2px; padding: 0 1px; text-decoration: line-through; }}
693
  .d-rep-new {{ color: var(--rep); background: var(--rep-bg); border-radius: 0 2px 2px 0; padding: 0 1px; }}
694
 
695
- /* GT panel */
696
- .gt-panel {{
697
- background: var(--surface);
698
- border: 1px solid var(--border);
699
- border-radius: var(--radius);
700
- overflow: hidden;
701
  }}
702
- .gt-panel-header {{
703
- padding: .5rem .75rem;
704
- background: #f0fdf4;
705
- border-bottom: 1px solid #bbf7d0;
706
- font-size: .83rem; font-weight: 700; color: #15803d;
707
  }}
708
- .gt-panel-body {{
709
- padding: .75rem; font-size: .82rem; line-height: 1.7;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  font-family: 'Georgia', serif;
711
- max-height: 260px; overflow-y: auto;
712
- color: var(--text);
713
  }}
 
 
 
 
 
 
714
 
715
  /* ── Analyses ─────────────────────────────────────────────────────── */
716
  .charts-grid {{
@@ -1058,19 +1129,32 @@ body.present-mode nav .meta {{ display: none; }}
1058
  </div>
1059
  </div>
1060
 
1061
- <!-- Vérité terrain -->
1062
- <div class="card">
1063
- <h3 data-i18n="h_gt">Vérité terrain (GT)</h3>
1064
- <div class="gt-panel">
1065
- <div class="gt-panel-header">✓ Ground Truth</div>
1066
- <div class="gt-panel-body" id="doc-gt-text"></div>
 
 
1067
  </div>
1068
- </div>
1069
-
1070
- <!-- Diffs par moteur -->
1071
- <div class="card">
1072
- <h3 data-i18n="h_diff">Sorties OCR — diff par moteur</h3>
1073
- <div class="diff-panels" id="doc-diff-panels"></div>
 
 
 
 
 
 
 
 
 
 
 
1074
  </div>
1075
 
1076
  <!-- Sprint 10 — Distribution CER par ligne -->
@@ -1351,6 +1435,113 @@ function renderDiff(ops) {{
1351
  }}).join(' ');
1352
  }}
1353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1354
  // ── Score badge (ligatures / diacritiques) ───────────────────────
1355
  function _scoreBadge(v, label) {{
1356
  if (v === null || v === undefined) return '<span style="color:var(--text-muted)">—</span>';
@@ -1620,75 +1811,19 @@ function loadDocument(docId) {{
1620
  placeholder.innerHTML = `<span style="font-size:2rem">🖹</span><span>${{esc(doc.image_path)}}</span>`;
1621
  }}
1622
 
1623
- // GT
1624
- document.getElementById('doc-gt-text').textContent = doc.ground_truth;
1625
-
1626
- // Diffs
1627
- const panels = document.getElementById('doc-diff-panels');
1628
- panels.innerHTML = doc.engine_results.map((er, i) => {{
1629
- const c = cerColor(er.cer); const bg = cerBg(er.cer);
1630
- const diffHtml = renderDiff(er.diff);
1631
- const errBadge = er.error ? `<span class="badge" style="background:#fee2e2;color:#dc2626">Erreur</span>` : '';
1632
-
1633
- // Pipeline badge dans l'en-tête du panneau
1634
- const isPipeline = er.ocr_intermediate !== undefined;
1635
- const modeLabel = {{text_only:'texte seul', text_and_image:'image+texte', zero_shot:'zero-shot'}}[er.pipeline_mode] || '';
1636
- const pipeTagPanel = isPipeline
1637
- ? `<span class="pipeline-tag">⛓ ${{modeLabel || 'pipeline'}}</span>` : '';
1638
-
1639
- // Sur-normalisation (classe 10)
1640
- let onBadge = '';
1641
- if (er.over_normalization) {{
1642
- const on = er.over_normalization;
1643
- const onPct = (on.score * 100).toFixed(2);
1644
- const cls = on.score > 0.05 ? 'over-norm-badge high' : 'over-norm-badge';
1645
- onBadge = `<span class="${{cls}}" title="Classe 10 — sur-normalisation LLM">Sur-norm. ${{onPct}}%</span>`;
1646
- }}
1647
-
1648
- // Triple-diff (vue spécifique pipeline) : OCR brut / Correction LLM
1649
- let tripleDiffHtml = '';
1650
- if (isPipeline && er.ocr_intermediate) {{
1651
- const ocrDiffHtml = renderDiff(er.ocr_diff);
1652
- const llmDiffHtml = renderDiff(er.llm_correction_diff);
1653
- tripleDiffHtml = `
1654
- <div class="triple-diff-wrap">
1655
- <div class="triple-diff-section">
1656
- <h5>GT → OCR brut</h5>
1657
- ${{ocrDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
1658
- </div>
1659
- <div class="triple-diff-section">
1660
- <h5>OCR brut → Correction LLM</h5>
1661
- ${{llmDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
1662
- </div>
1663
- </div>`;
1664
- }}
1665
-
1666
- // CER diplomatique par document
1667
- let diplomaBadge = '';
1668
- if (er.cer_diplomatic !== null && er.cer_diplomatic !== undefined) {{
1669
- const dipC = cerColor(er.cer_diplomatic); const dipB = cerBg(er.cer_diplomatic);
1670
- const delta = er.cer - er.cer_diplomatic;
1671
- const deltaHint = delta > 0.001 ? ` (−${{(delta*100).toFixed(1)}}% avec normalisation)` : '';
1672
- diplomaBadge = `<span class="cer-badge" style="color:${{dipC}};background:${{dipB}};opacity:.85"
1673
- title="CER diplomatique (ſ=s, u=v, i=j…)${{deltaHint}}">diplo. ${{pct(er.cer_diplomatic)}}</span>`;
1674
- }}
1675
-
1676
- return `<div class="diff-panel">
1677
- <div class="diff-panel-header">
1678
- <span class="diff-panel-title">${{esc(er.engine)}}</span>
1679
- ${{pipeTagPanel}}
1680
- <span class="diff-panel-metrics">
1681
- <span class="cer-badge" style="color:${{c}};background:${{bg}}">${{pct(er.cer)}}</span>
1682
- ${{diplomaBadge}}
1683
- <span class="badge" style="background:#f1f5f9">WER ${{pct(er.wer)}}</span>
1684
- ${{onBadge}}
1685
- ${{errBadge}}
1686
- </span>
1687
- </div>
1688
- <div class="diff-panel-body">${{diffHtml || '<em style="color:var(--text-muted)">Aucune sortie</em>'}}</div>
1689
- ${{tripleDiffHtml}}
1690
- </div>`;
1691
- }}).join('');
1692
 
1693
  // ── Sprint 10 : distribution CER par ligne ──────────────────────────
1694
  const lineCard = document.getElementById('doc-line-metrics-card');
@@ -2931,8 +3066,13 @@ class ReportGenerator:
2931
  output_path = Path(output_path)
2932
  output_path.parent.mkdir(parents=True, exist_ok=True)
2933
 
 
 
 
 
 
2934
  labels = get_labels(self.lang)
2935
- report_data = _build_report_data(self.benchmark, self.images_b64)
2936
  report_json = json.dumps(report_data, ensure_ascii=False, separators=(",", ":"))
2937
  i18n_json = json.dumps(labels, ensure_ascii=False, separators=(",", ":"))
2938
 
 
15
 
16
  from __future__ import annotations
17
 
18
+ import base64
19
+ import io
20
  import json
21
  import math
22
  from pathlib import Path
23
  from typing import Optional
24
 
25
  from picarones.core.results import BenchmarkResult
26
+ from picarones.report.diff_utils import compute_char_diff, compute_word_diff
27
  from picarones.core.statistics import (
28
  compute_pairwise_stats,
29
  compute_reliability_curve,
 
39
  # Helpers
40
  # ---------------------------------------------------------------------------
41
 
42
+ def _encode_image_b64(image_path: str, max_width: int = 1200) -> str:
43
+ """Lit une image, la redimensionne si besoin, et retourne un data-URI base64."""
44
+ try:
45
+ from PIL import Image
46
+ p = Path(image_path)
47
+ if not p.exists():
48
+ return ""
49
+ with Image.open(p) as img:
50
+ if img.width > max_width:
51
+ ratio = max_width / img.width
52
+ new_h = max(1, int(img.height * ratio))
53
+ img = img.resize((max_width, new_h), Image.LANCZOS)
54
+ # Convertir en RGB pour éviter les problèmes de mode (RGBA, palette…)
55
+ if img.mode not in ("RGB", "L"):
56
+ img = img.convert("RGB")
57
+ buf = io.BytesIO()
58
+ fmt = "JPEG" if p.suffix.lower() in (".jpg", ".jpeg") else "PNG"
59
+ img.save(buf, format=fmt, optimize=True, quality=85)
60
+ b64 = base64.b64encode(buf.getvalue()).decode("ascii")
61
+ mime = "image/jpeg" if fmt == "JPEG" else "image/png"
62
+ return f"data:{mime};base64,{b64}"
63
+ except Exception:
64
+ return ""
65
+
66
+
67
+ def _encode_images_b64_from_result(benchmark: "BenchmarkResult", max_width: int = 1200) -> dict[str, str]:
68
+ """Encode toutes les images d'un BenchmarkResult en base64.
69
+
70
+ Returns
71
+ -------
72
+ dict
73
+ ``{doc_id: data_uri}``
74
+ """
75
+ images: dict[str, str] = {}
76
+ if not benchmark.engine_reports:
77
+ return images
78
+ for dr in benchmark.engine_reports[0].document_results:
79
+ if dr.image_path and dr.doc_id not in images:
80
+ uri = _encode_image_b64(dr.image_path, max_width=max_width)
81
+ if uri:
82
+ images[dr.doc_id] = uri
83
+ return images
84
+
85
+
86
  def _cer_color(cer: float) -> str:
87
  """Retourne une couleur CSS pour un score CER donné (0→vert, 1→rouge)."""
88
  if cer < 0.05:
 
198
  continue
199
  gt = dr.ground_truth
200
  image_path = dr.image_path
201
+ diff_ops = compute_char_diff(dr.ground_truth, dr.hypothesis)
202
  er_entry: dict = {
203
  "engine": engine_name,
204
  "hypothesis": dr.hypothesis,
 
738
  .d-rep-old {{ color: var(--del); background: var(--del-bg); border-radius: 2px 0 0 2px; padding: 0 1px; text-decoration: line-through; }}
739
  .d-rep-new {{ color: var(--rep); background: var(--rep-bg); border-radius: 0 2px 2px 0; padding: 0 1px; }}
740
 
741
+ /* Side-by-side diff */
742
+ .sbs-header {{
743
+ display: flex; align-items: center; justify-content: space-between;
744
+ flex-wrap: wrap; gap: .5rem; margin-bottom: .75rem;
 
 
745
  }}
746
+ .sbs-engine-select {{
747
+ display: flex; align-items: center; gap: .4rem; font-size: .82rem;
 
 
 
748
  }}
749
+ .sbs-engine-select select {{
750
+ border: 1px solid var(--border); border-radius: 4px;
751
+ padding: .2rem .4rem; font-size: .82rem; background: var(--surface);
752
+ }}
753
+ .sbs-columns {{
754
+ display: grid; grid-template-columns: 1fr 1fr; gap: .75rem;
755
+ }}
756
+ @media (max-width: 700px) {{
757
+ .sbs-columns {{ grid-template-columns: 1fr; }}
758
+ }}
759
+ .sbs-col {{
760
+ border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden;
761
+ }}
762
+ .sbs-col-header {{
763
+ padding: .45rem .75rem;
764
+ display: flex; align-items: center; justify-content: space-between; gap: .5rem;
765
+ font-size: .83rem; font-weight: 700;
766
+ }}
767
+ .sbs-gt-header {{
768
+ background: #f0fdf4; border-bottom: 1px solid #bbf7d0; color: #15803d;
769
+ }}
770
+ .sbs-ocr-header {{
771
+ background: #eff6ff; border-bottom: 1px solid #bfdbfe; color: #1d4ed8;
772
+ }}
773
+ .sbs-col-body {{
774
+ padding: .75rem; font-size: .82rem; line-height: 1.8;
775
  font-family: 'Georgia', serif;
776
+ max-height: 340px; overflow-y: auto;
777
+ color: var(--text); white-space: pre-wrap; word-break: break-word;
778
  }}
779
+ /* Caractères manquants dans GT (orange) */
780
+ .d-miss {{ color: #92400e; background: #fef3c7; border-radius: 2px; padding: 0 1px; }}
781
+ /* Caractères erronés dans OCR (rouge) */
782
+ .d-err {{ color: var(--del); background: var(--del-bg); border-radius: 2px; padding: 0 1px; }}
783
+ /* Insertions dans OCR (vert) */
784
+ .d-ins-ocr {{ color: var(--ins); background: var(--ins-bg); border-radius: 2px; padding: 0 1px; }}
785
 
786
  /* ── Analyses ─────────────────────────────────────────────────────── */
787
  .charts-grid {{
 
1129
  </div>
1130
  </div>
1131
 
1132
+ <!-- Diff côte à côte GT / OCR -->
1133
+ <div class="card" id="doc-sidebyside-card">
1134
+ <div class="sbs-header">
1135
+ <h3 data-i18n="h_diff">Comparaison GT / OCR</h3>
1136
+ <div class="sbs-engine-select" id="sbs-engine-select" style="display:none">
1137
+ <label data-i18n="sbs_engine_label">Concurrent :</label>
1138
+ <select id="sbs-engine-dropdown" onchange="renderSideBySide(currentDocId)"></select>
1139
+ </div>
1140
  </div>
1141
+ <div class="sbs-columns" id="sbs-columns">
1142
+ <div class="sbs-col sbs-col-gt">
1143
+ <div class="sbs-col-header sbs-gt-header">
1144
+ <span>✓ Vérité terrain (GT)</span>
1145
+ </div>
1146
+ <div class="sbs-col-body" id="sbs-gt-body"></div>
1147
+ </div>
1148
+ <div class="sbs-col sbs-col-ocr">
1149
+ <div class="sbs-col-header sbs-ocr-header" id="sbs-ocr-header">
1150
+ <span id="sbs-ocr-engine-name">OCR</span>
1151
+ <span class="cer-badge" id="sbs-ocr-cer" style="display:none"></span>
1152
+ </div>
1153
+ <div class="sbs-col-body" id="sbs-ocr-body">—</div>
1154
+ </div>
1155
+ </div>
1156
+ <!-- Pipeline triple-diff (affiché en dessous si applicable) -->
1157
+ <div id="sbs-triple-diff" style="display:none"></div>
1158
  </div>
1159
 
1160
  <!-- Sprint 10 — Distribution CER par ligne -->
 
1435
  }}).join(' ');
1436
  }}
1437
 
1438
+ // ── Rendu côte à côte (char-level) ──────────────────────────────────
1439
+ function renderSideBySide(docId) {{
1440
+ const doc = DATA.documents.find(d => d.doc_id === docId);
1441
+ if (!doc) return;
1442
+
1443
+ const sel = document.getElementById('sbs-engine-dropdown');
1444
+ const engineIdx = sel && sel.value !== '' ? parseInt(sel.value, 10) : 0;
1445
+ const er = doc.engine_results[engineIdx];
1446
+ if (!er) return;
1447
+
1448
+ const ops = er.diff || [];
1449
+
1450
+ // Construire le HTML GT (gauche) et OCR (droite) depuis les mêmes ops
1451
+ let gtHtml = '', ocrHtml = '';
1452
+ ops.forEach(op => {{
1453
+ if (op.op === 'equal') {{
1454
+ const t = esc(op.text);
1455
+ gtHtml += t;
1456
+ ocrHtml += t;
1457
+ }} else if (op.op === 'delete') {{
1458
+ // Présent dans GT, absent de l'OCR → orange dans GT
1459
+ gtHtml += '<span class="d-miss" title="Absent de l\'OCR">' + esc(op.text) + '</span>';
1460
+ }} else if (op.op === 'insert') {{
1461
+ // Présent dans OCR, absent du GT → vert dans OCR
1462
+ ocrHtml += '<span class="d-ins-ocr" title="Insertion OCR">' + esc(op.text) + '</span>';
1463
+ }} else if (op.op === 'replace') {{
1464
+ // Substitution : orange dans GT, rouge dans OCR
1465
+ gtHtml += '<span class="d-miss" title="Différent dans l\'OCR">' + esc(op.old) + '</span>';
1466
+ ocrHtml += '<span class="d-err" title="Différent du GT">' + esc(op.new) + '</span>';
1467
+ }}
1468
+ }});
1469
+
1470
+ document.getElementById('sbs-gt-body').innerHTML = gtHtml || '<em style="color:var(--text-muted)">—</em>';
1471
+ document.getElementById('sbs-ocr-body').innerHTML = ocrHtml || '<em style="color:var(--text-muted)">Aucune sortie</em>';
1472
+
1473
+ // En-tête OCR : nom moteur + CER
1474
+ const c = cerColor(er.cer); const bg = cerBg(er.cer);
1475
+ document.getElementById('sbs-ocr-engine-name').textContent = er.engine;
1476
+ const cerBadgeEl = document.getElementById('sbs-ocr-cer');
1477
+ cerBadgeEl.textContent = pct(er.cer);
1478
+ cerBadgeEl.style.cssText = `color:${{c}};background:${{bg}};display:inline-block`;
1479
+
1480
+ // Pipeline triple-diff (si applicable)
1481
+ const tripleEl = document.getElementById('sbs-triple-diff');
1482
+ if (er.ocr_intermediate) {{
1483
+ const ocrDiffHtml = renderDiff(er.ocr_diff);
1484
+ const llmDiffHtml = renderDiff(er.llm_correction_diff);
1485
+ const isPipeline = er.ocr_intermediate !== undefined;
1486
+ const modeLabel = {{text_only:'texte seul', text_and_image:'image+texte', zero_shot:'zero-shot'}}[er.pipeline_mode] || '';
1487
+ const pipeTag = `<span class="pipeline-tag">⛓ ${{modeLabel || 'pipeline'}}</span>`;
1488
+ let onBadge = '';
1489
+ if (er.over_normalization) {{
1490
+ const on = er.over_normalization;
1491
+ const onPct = (on.score * 100).toFixed(2);
1492
+ const cls = on.score > 0.05 ? 'over-norm-badge high' : 'over-norm-badge';
1493
+ onBadge = `<span class="${{cls}}" title="Classe 10 — sur-normalisation LLM">Sur-norm. ${{onPct}}%</span>`;
1494
+ }}
1495
+ let diplomaBadge = '';
1496
+ if (er.cer_diplomatic !== null && er.cer_diplomatic !== undefined) {{
1497
+ const dipC = cerColor(er.cer_diplomatic); const dipB = cerBg(er.cer_diplomatic);
1498
+ const delta = er.cer - er.cer_diplomatic;
1499
+ const deltaHint = delta > 0.001 ? ` (−${{(delta*100).toFixed(1)}}% avec normalisation)` : '';
1500
+ diplomaBadge = `<span class="cer-badge" style="color:${{dipC}};background:${{dipB}};opacity:.85"
1501
+ title="CER diplomatique${{deltaHint}}">diplo. ${{pct(er.cer_diplomatic)}}</span>`;
1502
+ }}
1503
+ tripleEl.style.display = '';
1504
+ tripleEl.innerHTML = `
1505
+ <div style="margin-top:.75rem;padding-top:.75rem;border-top:1px solid var(--border)">
1506
+ <div style="display:flex;align-items:center;gap:.4rem;margin-bottom:.5rem;font-size:.83rem;font-weight:600">
1507
+ ${{pipeTag}} ${{diplomaBadge}} ${{onBadge}}
1508
+ <span class="badge" style="background:#f1f5f9">WER ${{pct(er.wer)}}</span>
1509
+ </div>
1510
+ <div class="triple-diff-wrap">
1511
+ <div class="triple-diff-section">
1512
+ <h5>GT → OCR brut</h5>
1513
+ ${{ocrDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
1514
+ </div>
1515
+ <div class="triple-diff-section">
1516
+ <h5>OCR brut → Correction LLM</h5>
1517
+ ${{llmDiffHtml || '<em style="color:var(--text-muted)">—</em>'}}
1518
+ </div>
1519
+ </div>
1520
+ </div>`;
1521
+ }} else {{
1522
+ // Afficher WER / CER diplomatique même hors pipeline
1523
+ let diplomaBadge = '';
1524
+ if (er.cer_diplomatic !== null && er.cer_diplomatic !== undefined) {{
1525
+ const dipC = cerColor(er.cer_diplomatic); const dipB = cerBg(er.cer_diplomatic);
1526
+ const delta = er.cer - er.cer_diplomatic;
1527
+ const deltaHint = delta > 0.001 ? ` (−${{(delta*100).toFixed(1)}}% avec normalisation)` : '';
1528
+ diplomaBadge = `<span class="cer-badge" style="color:${{dipC}};background:${{dipB}};opacity:.85"
1529
+ title="CER diplomatique${{deltaHint}}">diplo. ${{pct(er.cer_diplomatic)}}</span>`;
1530
+ }}
1531
+ const errBadge = er.error ? `<span class="badge" style="background:#fee2e2;color:#dc2626">Erreur</span>` : '';
1532
+ if (diplomaBadge || errBadge) {{
1533
+ tripleEl.style.display = '';
1534
+ tripleEl.innerHTML = `<div style="margin-top:.5rem;display:flex;gap:.4rem;flex-wrap:wrap;font-size:.82rem">
1535
+ <span class="badge" style="background:#f1f5f9">WER ${{pct(er.wer)}}</span>
1536
+ ${{diplomaBadge}} ${{errBadge}}
1537
+ </div>`;
1538
+ }} else {{
1539
+ tripleEl.style.display = 'none';
1540
+ tripleEl.innerHTML = '';
1541
+ }}
1542
+ }}
1543
+ }}
1544
+
1545
  // ── Score badge (ligatures / diacritiques) ───────────────────────
1546
  function _scoreBadge(v, label) {{
1547
  if (v === null || v === undefined) return '<span style="color:var(--text-muted)">—</span>';
 
1811
  placeholder.innerHTML = `<span style="font-size:2rem">🖹</span><span>${{esc(doc.image_path)}}</span>`;
1812
  }}
1813
 
1814
+ // Side-by-side diff — sélecteur de concurrent
1815
+ const selWrap = document.getElementById('sbs-engine-select');
1816
+ const sel = document.getElementById('sbs-engine-dropdown');
1817
+ if (doc.engine_results.length > 1) {{
1818
+ sel.innerHTML = doc.engine_results.map((er, i) =>
1819
+ `<option value="${{i}}">${{esc(er.engine)}}</option>`
1820
+ ).join('');
1821
+ selWrap.style.display = '';
1822
+ }} else {{
1823
+ sel.innerHTML = '';
1824
+ selWrap.style.display = 'none';
1825
+ }}
1826
+ renderSideBySide(docId);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1827
 
1828
  // ── Sprint 10 : distribution CER par ligne ──────────────────────────
1829
  const lineCard = document.getElementById('doc-line-metrics-card');
 
3066
  output_path = Path(output_path)
3067
  output_path.parent.mkdir(parents=True, exist_ok=True)
3068
 
3069
+ # Auto-encoder les images si aucune n'est fournie
3070
+ images_b64 = self.images_b64
3071
+ if not images_b64:
3072
+ images_b64 = _encode_images_b64_from_result(self.benchmark)
3073
+
3074
  labels = get_labels(self.lang)
3075
+ report_data = _build_report_data(self.benchmark, images_b64)
3076
  report_json = json.dumps(report_data, ensure_ascii=False, separators=(",", ":"))
3077
  i18n_json = json.dumps(labels, ensure_ascii=False, separators=(",", ":"))
3078
 
picarones/web/app.py CHANGED
@@ -733,6 +733,24 @@ async def api_corpus_uploads() -> dict:
733
  return {"uploads": uploads}
734
 
735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
736
  @app.delete("/api/corpus/uploads/{corpus_id}")
737
  async def api_corpus_delete(corpus_id: str) -> dict:
738
  """Supprime un corpus uploadé."""
 
733
  return {"uploads": uploads}
734
 
735
 
736
+ @app.get("/api/corpus/image/{upload_id}/{filename}")
737
+ async def api_corpus_image(upload_id: str, filename: str) -> FileResponse:
738
+ """Sert une image depuis le dossier d'upload."""
739
+ # Sécurité : interdire les path traversal
740
+ if "/" in upload_id or "\\" in upload_id or ".." in upload_id:
741
+ raise HTTPException(status_code=400, detail="upload_id invalide")
742
+ if "/" in filename or "\\" in filename or ".." in filename:
743
+ raise HTTPException(status_code=400, detail="filename invalide")
744
+ image_path = _UPLOADS_DIR / upload_id / filename
745
+ if not image_path.exists() or not image_path.is_file():
746
+ raise HTTPException(status_code=404, detail="Image non trouvée")
747
+ suffix = image_path.suffix.lower()
748
+ media_types = {".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png",
749
+ ".tif": "image/tiff", ".tiff": "image/tiff", ".webp": "image/webp"}
750
+ media_type = media_types.get(suffix, "application/octet-stream")
751
+ return FileResponse(str(image_path), media_type=media_type)
752
+
753
+
754
  @app.delete("/api/corpus/uploads/{corpus_id}")
755
  async def api_corpus_delete(corpus_id: str) -> dict:
756
  """Supprime un corpus uploadé."""