Visualisation1 / app.py
GwenONERA
w
300fafc
"""
EMOTYC — Visualisation interactive des performances.
Space Gradio avec :
- Dropdown pour sélectionner une configuration (5 XLSX pré-chargés)
- Tableau HTML de performances (F1, Précision, Rappel, FN/FP/TN/TP)
- Cellules FN/FP/TN/TP cliquables → panneau d'instances concrétes
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from html import escape as html_escape
from pathlib import Path
from typing import Any
import gradio as gr
import numpy as np
import pandas as pd
# ═══════════════════════════════════════════════════════════════════════════
# CONSTANTS
# ═══════════════════════════════════════════════════════════════════════════
BASE_DIR = Path(__file__).resolve().parent
DATA_DIR = BASE_DIR / "data"
ALL_LABELS = [
"Emo", "Comportementale", "Designee", "Montree", "Suggeree",
"Base", "Complexe", "Admiration", "Autre", "Colere",
"Culpabilite", "Degout", "Embarras", "Fierte", "Jalousie",
"Joie", "Peur", "Surprise", "Tristesse",
]
PRED_SUFFIX = "_pred_emotyc"
DISPLAY_NAMES = {
"Colere": "Colère",
"Culpabilite": "Culpabilité",
"Degout": "Dégoût",
"Fierte": "Fierté",
"Designee": "Désignée",
"Montree": "Montrée",
"Suggeree": "Suggérée",
"Emo": "Émo",
}
OUTCOME_DISPLAY = {
"tp": "✅ Vrais Positifs (TP)",
"fp": "⚠️ Faux Positifs (FP)",
"fn": "❌ Faux Négatifs (FN)",
"tn": "✓ Vrais Négatifs (TN)",
}
# Configuration name → XLSX filename
CONFIGS: dict[str, str] = {
"CyberAggAdo 200": "CyberAggAdo200.parquet",
"CyberAggAdo Global — Contexte": "CyberAggAdoGlobal_Context.parquet",
"CyberAggAdo Global — Sans Contexte": "CyberAggAdoGlobal_SansContexte.parquet",
"TextToKids — Contexte": "TextToKids_Context.parquet",
"TextToKids — Sans Contexte": "TextToKids_SansContexte.parquet",
}
def display_name(label: str) -> str:
return DISPLAY_NAMES.get(label, label)
# ═══════════════════════════════════════════════════════════════════════════
# DATA STRUCTURES
# ═══════════════════════════════════════════════════════════════════════════
@dataclass
class LabelMetrics:
label: str
f1: float
precision: float
recall: float
tp: int
fp: int
fn: int
tn: int
@dataclass
class ConfigData:
name: str
df: pd.DataFrame
labels: list[str]
metrics: list[LabelMetrics]
macro_f1: float
# Index: label → outcome → list of row indices
case_index: dict[str, dict[str, list[int]]] = field(default_factory=dict)
# ═══════════════════════════════════════════════════════════════════════════
# LOADING & COMPUTATION
# ═══════════════════════════════════════════════════════════════════════════
def load_config(name: str, xlsx_path: Path) -> ConfigData:
"""Load a single config parquet and compute all metrics + case indices."""
df = pd.read_parquet(xlsx_path)
# Detect available labels (must have both gold and pred columns)
available = []
for label in ALL_LABELS:
pred_col = f"{label}{PRED_SUFFIX}"
if label in df.columns and pred_col in df.columns:
available.append(label)
if not available:
raise ValueError(f"No valid label pairs found in {xlsx_path.name}")
# Compute metrics and case index
metrics_list: list[LabelMetrics] = []
case_index: dict[str, dict[str, list[int]]] = {}
for label in available:
pred_col = f"{label}{PRED_SUFFIX}"
gold = df[label].fillna(0).astype(int).values
pred = df[pred_col].fillna(0).astype(int).values
tp_mask = (gold == 1) & (pred == 1)
fp_mask = (gold == 0) & (pred == 1)
fn_mask = (gold == 1) & (pred == 0)
tn_mask = (gold == 0) & (pred == 0)
tp = int(tp_mask.sum())
fp = int(fp_mask.sum())
fn = int(fn_mask.sum())
tn = int(tn_mask.sum())
prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
f1 = (2 * prec * rec / (prec + rec)) if (prec + rec) > 0 else 0.0
metrics_list.append(LabelMetrics(
label=label, f1=round(f1, 3), precision=round(prec, 3),
recall=round(rec, 3), tp=tp, fp=fp, fn=fn, tn=tn,
))
case_index[label] = {
"tp": np.where(tp_mask)[0].tolist(),
"fp": np.where(fp_mask)[0].tolist(),
"fn": np.where(fn_mask)[0].tolist(),
"tn": np.where(tn_mask)[0].tolist(),
}
macro_f1 = round(float(np.mean([m.f1 for m in metrics_list])), 3)
return ConfigData(
name=name, df=df, labels=available, metrics=metrics_list,
macro_f1=macro_f1, case_index=case_index,
)
def load_all_configs() -> dict[str, ConfigData]:
"""Load all configurations at startup."""
configs: dict[str, ConfigData] = {}
for name, filename in CONFIGS.items():
path = DATA_DIR / filename
if path.exists():
print(f"Chargement : {name} ({filename})")
configs[name] = load_config(name, path)
print(f" → {len(configs[name].df)} lignes, {len(configs[name].labels)} labels")
else:
print(f"⚠️ Fichier manquant : {path}")
return configs
# ═══════════════════════════════════════════════════════════════════════════
# HTML TABLE GENERATION
# ═══════════════════════════════════════════════════════════════════════════
def _metric_color(value: float) -> str:
if value >= 0.8:
return "#15803d" # green
if value >= 0.5:
return "#b45309" # orange
return "#be123c" # red
def generate_performance_html(config: ConfigData) -> str:
"""Generate an interactive HTML performance table with clickable cells."""
rows = []
for m in config.metrics:
dname = html_escape(display_name(m.label))
canon = html_escape(m.label)
f1_color = _metric_color(m.f1)
prec_color = _metric_color(m.precision)
rec_color = _metric_color(m.recall)
row = f"""<tr>
<td class="cell-label">{dname}</td>
<td class="cell-metric" style="color:{f1_color}">{m.f1:.3f}</td>
<td class="cell-metric" style="color:{prec_color}">{m.precision:.3f}</td>
<td class="cell-metric" style="color:{rec_color}">{m.recall:.3f}</td>
<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="fn" onclick="cellClick(this)">{m.fn}</td>
<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="fp" onclick="cellClick(this)">{m.fp}</td>
<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="tn" onclick="cellClick(this)">{m.tn}</td>
<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="tp" onclick="cellClick(this)">{m.tp}</td>
</tr>"""
rows.append(row)
# Macro avg row
macro_color = _metric_color(config.macro_f1)
rows.append(f"""<tr class="row-macro">
<td class="cell-label" style="font-style:italic;">Macro avg</td>
<td class="cell-metric" style="color:{macro_color}">{config.macro_f1:.3f}</td>
<td class="cell-metric"></td>
<td class="cell-metric"></td>
<td class="cell-count"></td>
<td class="cell-count"></td>
<td class="cell-count"></td>
<td class="cell-count"></td>
</tr>""")
body = "\n".join(rows)
return f"""
<style>
.perf-table-wrap {{
overflow-x: auto;
margin: 0 auto;
max-width: 900px;
}}
.perf-table {{
border-collapse: collapse;
font-family: 'Inter', 'Segoe UI', system-ui, -apple-system, sans-serif;
font-size: 13.5px;
width: 100%;
min-width: 700px;
}}
.perf-table thead th {{
background: linear-gradient(135deg, #334155 0%, #475569 100%);
color: #f1f5f9;
font-weight: 600;
padding: 10px 14px;
border: 1px solid #475569;
text-align: center;
letter-spacing: 0.02em;
position: sticky;
top: 0;
z-index: 2;
}}
.perf-table thead th:first-child {{
text-align: left;
border-top-left-radius: 8px;
}}
.perf-table thead th:last-child {{
border-top-right-radius: 8px;
}}
.perf-table tbody tr {{
transition: background 0.15s ease;
}}
.perf-table tbody tr:nth-child(even) {{
background: #f8fafc;
}}
.perf-table tbody tr:nth-child(odd) {{
background: #ffffff;
}}
.perf-table tbody tr:hover {{
background: #e0e7ff !important;
}}
.perf-table tbody tr.row-macro {{
background: #f1f5f9 !important;
border-top: 2px solid #94a3b8;
}}
.cell-label {{
font-weight: 600;
color: #0f172a;
padding: 9px 14px;
border: 1px solid #e2e8f0;
text-align: left;
white-space: nowrap;
}}
.cell-metric {{
font-weight: 600;
padding: 9px 14px;
border: 1px solid #e2e8f0;
text-align: center;
font-variant-numeric: tabular-nums;
}}
.cell-count {{
color: #64748b;
padding: 9px 14px;
border: 1px solid #e2e8f0;
text-align: center;
font-variant-numeric: tabular-nums;
}}
.cell-clickable {{
cursor: pointer;
position: relative;
transition: all 0.15s ease;
}}
.cell-clickable:hover {{
background: #c7d2fe !important;
color: #1e1b4b;
font-weight: 700;
box-shadow: inset 0 0 0 2px #6366f1;
}}
.cell-clickable.selected {{
background: #6366f1 !important;
color: #fff !important;
font-weight: 700;
box-shadow: inset 0 0 0 2px #4338ca;
}}
/* Dark mode */
@media (prefers-color-scheme: dark) {{
.perf-table thead th {{
background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
border-color: #475569;
}}
.perf-table tbody tr:nth-child(even) {{ background: #1e293b; }}
.perf-table tbody tr:nth-child(odd) {{ background: #0f172a; }}
.perf-table tbody tr:hover {{ background: #312e81 !important; }}
.perf-table tbody tr.row-macro {{ background: #1e293b !important; border-top-color: #475569; }}
.cell-label {{ color: #e2e8f0; border-color: #334155; }}
.cell-metric {{ border-color: #334155; }}
.cell-count {{ color: #94a3b8; border-color: #334155; }}
.cell-clickable:hover {{
background: #3730a3 !important;
color: #e0e7ff;
box-shadow: inset 0 0 0 2px #818cf8;
}}
.cell-clickable.selected {{
background: #4f46e5 !important;
color: #fff !important;
box-shadow: inset 0 0 0 2px #6366f1;
}}
}}
/* Gradio dark mode override */
.dark .perf-table thead th {{
background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
border-color: #475569;
}}
.dark .perf-table tbody tr:nth-child(even) {{ background: #1e293b; }}
.dark .perf-table tbody tr:nth-child(odd) {{ background: #0f172a; }}
.dark .perf-table tbody tr:hover {{ background: #312e81 !important; }}
.dark .perf-table tbody tr.row-macro {{ background: #1e293b !important; border-top-color: #475569; }}
.dark .cell-label {{ color: #e2e8f0; border-color: #334155; }}
.dark .cell-metric {{ border-color: #334155; }}
.dark .cell-count {{ color: #94a3b8; border-color: #334155; }}
.dark .cell-clickable:hover {{
background: #3730a3 !important;
color: #e0e7ff;
box-shadow: inset 0 0 0 2px #818cf8;
}}
.dark .cell-clickable.selected {{
background: #4f46e5 !important;
color: #fff !important;
box-shadow: inset 0 0 0 2px #6366f1;
}}
</style>
<div class="perf-table-wrap">
<table class="perf-table">
<thead>
<tr>
<th>Label</th>
<th>F1</th>
<th>Précision</th>
<th>Rappel</th>
<th>FN</th>
<th>FP</th>
<th>TN</th>
<th>TP</th>
</tr>
</thead>
<tbody>
{body}
</tbody>
</table>
</div>
"""
# ═══════════════════════════════════════════════════════════════════════════
# INSTANCE DISPLAY
# ═══════════════════════════════════════════════════════════════════════════
# Columns to exclude from the instance display (internal/redundant)
EXCLUDE_COLS_PATTERNS = [
"_run1", "_run2", # inter-annotator agreement columns
]
def _should_include_col(col: str) -> bool:
"""Check if a column should be shown in the instance viewer."""
for pattern in EXCLUDE_COLS_PATTERNS:
if pattern in col:
return False
return True
def get_instances(
config: ConfigData, label: str, outcome: str
) -> tuple[str, pd.DataFrame]:
"""Retrieve instances for a given label and outcome."""
if label not in config.case_index:
return "Label introuvable.", pd.DataFrame()
indices = config.case_index[label].get(outcome, [])
if not indices:
return f"Aucune instance pour {display_name(label)}{OUTCOME_DISPLAY.get(outcome, outcome)}.", pd.DataFrame()
# Get the subset
subset = config.df.iloc[indices].copy()
# Filter columns
cols_to_show = [c for c in subset.columns if _should_include_col(c)]
subset = subset[cols_to_show]
# Reorder: TEXT first, then gold label, pred label, then rest
priority_cols = []
if "TEXT" in subset.columns:
priority_cols.append("TEXT")
if label in subset.columns:
priority_cols.append(label)
pred_col = f"{label}{PRED_SUFFIX}"
if pred_col in subset.columns:
priority_cols.append(pred_col)
remaining = [c for c in subset.columns if c not in priority_cols]
subset = subset[priority_cols + remaining]
title = (
f"### {OUTCOME_DISPLAY.get(outcome, outcome)} — "
f"**{display_name(label)}** — "
f"**{len(indices)}** instance{'s' if len(indices) > 1 else ''}"
)
return title, subset.reset_index(drop=True)
# ═══════════════════════════════════════════════════════════════════════════
# GRADIO APP
# ═══════════════════════════════════════════════════════════════════════════
print("=" * 60)
print("EMOTYC — Chargement des configurations...")
print("=" * 60)
ALL_CONFIGS = load_all_configs()
print(f"\n✅ {len(ALL_CONFIGS)} configuration(s) chargée(s).\n")
DEFAULT_CONFIG = list(ALL_CONFIGS.keys())[0] if ALL_CONFIGS else None
def on_config_change(config_name: str) -> tuple[str, pd.DataFrame]:
"""When user selects a new config, update the table and clear instances."""
if config_name not in ALL_CONFIGS:
return "<p>Configuration non trouvée.</p>", pd.DataFrame()
config = ALL_CONFIGS[config_name]
html = generate_performance_html(config)
return html, pd.DataFrame()
def on_cell_click(
cell_value: str, config_name: str
) -> tuple[str, pd.DataFrame, str]:
"""When user clicks a TP/FP/TN/FN cell."""
if not cell_value or "|" not in cell_value:
return "", pd.DataFrame(), ""
label, outcome = cell_value.split("|", 1)
if config_name not in ALL_CONFIGS:
return "Configuration introuvable.", pd.DataFrame(), ""
config = ALL_CONFIGS[config_name]
title, instances_df = get_instances(config, label, outcome)
return title, instances_df, ""
HEAD_JS = """
<script>
function cellClick(el) {
document.querySelectorAll('.cell-clickable.selected').forEach(c => c.classList.remove('selected'));
el.classList.add('selected');
const label = el.getAttribute('data-label');
const outcome = el.getAttribute('data-outcome');
const value = label + '|' + outcome;
const hiddenInput = document.querySelector('#cell_click_input textarea');
if (hiddenInput) {
hiddenInput.value = value;
hiddenInput.dispatchEvent(new Event('input', { bubbles: true }));
}
}
</script>
"""
# ── Build Gradio interface ─────────────────────────────────────────────
HEADER_MD = """
# 📊 EMOTYC — Visualisation des Performances
Sélectionnez une configuration pour afficher le tableau de performances du modèle de détection des émotions.
**Cliquez sur les cellules FN, FP, TN ou TP** pour explorer les instances concrètes.
"""
with gr.Blocks(
title="EMOTYC — Performances",
theme=gr.themes.Soft(
primary_hue=gr.themes.colors.indigo,
secondary_hue=gr.themes.colors.slate,
neutral_hue=gr.themes.colors.slate,
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
),
css="""
.main-container { max-width: 1100px; margin: 0 auto; }
#cell_click_input { display: none !important; }
.instance-panel { margin-top: 8px; }
footer { display: none !important; }
""",
head=HEAD_JS,
) as demo:
with gr.Column(elem_classes="main-container"):
gr.Markdown(HEADER_MD)
with gr.Row():
config_dropdown = gr.Dropdown(
choices=list(ALL_CONFIGS.keys()),
value=DEFAULT_CONFIG,
label="Configuration",
interactive=True,
scale=3,
)
# Performance table (HTML)
perf_html = gr.HTML(label="Tableau de performances")
# Hidden textbox for JS → Python communication
cell_click_input = gr.Textbox(
value="",
visible=False,
elem_id="cell_click_input",
)
# Instance panel
with gr.Column(elem_classes="instance-panel", visible=True):
instance_title = gr.Markdown("")
instance_table = gr.Dataframe(
value=pd.DataFrame(),
label="Instances",
interactive=False,
wrap=True,
max_height=500,
)
# ── Events ─────────────────────────────────────────────────────────
config_dropdown.change(
fn=on_config_change,
inputs=[config_dropdown],
outputs=[perf_html, instance_table],
)
cell_click_input.change(
fn=on_cell_click,
inputs=[cell_click_input, config_dropdown],
outputs=[instance_title, instance_table, cell_click_input],
)
# Load default config on startup
demo.load(
fn=on_config_change,
inputs=[config_dropdown],
outputs=[perf_html, instance_table],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)