Spaces:

GwendalTsang
/

Visualisation1

Running

GwenONERA

300fafc 1 day ago

19.9 kB

	"""
	EMOTYC — Visualisation interactive des performances.

	Space Gradio avec :
	- Dropdown pour sélectionner une configuration (5 XLSX pré-chargés)
	- Tableau HTML de performances (F1, Précision, Rappel, FN/FP/TN/TP)
	- Cellules FN/FP/TN/TP cliquables → panneau d'instances concrétes
	"""

	from __future__ import annotations

	import json
	from dataclasses import dataclass, field
	from html import escape as html_escape
	from pathlib import Path
	from typing import Any

	import gradio as gr
	import numpy as np
	import pandas as pd

	# ═══════════════════════════════════════════════════════════════════════════
	# CONSTANTS
	# ═══════════════════════════════════════════════════════════════════════════

	BASE_DIR = Path(__file__).resolve().parent
	DATA_DIR = BASE_DIR / "data"

	ALL_LABELS = [
	"Emo", "Comportementale", "Designee", "Montree", "Suggeree",
	"Base", "Complexe", "Admiration", "Autre", "Colere",
	"Culpabilite", "Degout", "Embarras", "Fierte", "Jalousie",
	"Joie", "Peur", "Surprise", "Tristesse",
	]

	PRED_SUFFIX = "_pred_emotyc"

	DISPLAY_NAMES = {
	"Colere": "Colère",
	"Culpabilite": "Culpabilité",
	"Degout": "Dégoût",
	"Fierte": "Fierté",
	"Designee": "Désignée",
	"Montree": "Montrée",
	"Suggeree": "Suggérée",
	"Emo": "Émo",
	}

	OUTCOME_DISPLAY = {
	"tp": "✅ Vrais Positifs (TP)",
	"fp": "⚠️ Faux Positifs (FP)",
	"fn": "❌ Faux Négatifs (FN)",
	"tn": "✓ Vrais Négatifs (TN)",
	}

	# Configuration name → XLSX filename
	CONFIGS: dict[str, str] = {
	"CyberAggAdo 200": "CyberAggAdo200.parquet",
	"CyberAggAdo Global — Contexte": "CyberAggAdoGlobal_Context.parquet",
	"CyberAggAdo Global — Sans Contexte": "CyberAggAdoGlobal_SansContexte.parquet",
	"TextToKids — Contexte": "TextToKids_Context.parquet",
	"TextToKids — Sans Contexte": "TextToKids_SansContexte.parquet",
	}


	def display_name(label: str) -> str:
	return DISPLAY_NAMES.get(label, label)


	# ═══════════════════════════════════════════════════════════════════════════
	# DATA STRUCTURES
	# ═══════════════════════════════════════════════════════════════════════════

	@dataclass
	class LabelMetrics:
	label: str
	f1: float
	precision: float
	recall: float
	tp: int
	fp: int
	fn: int
	tn: int


	@dataclass
	class ConfigData:
	name: str
	df: pd.DataFrame
	labels: list[str]
	metrics: list[LabelMetrics]
	macro_f1: float
	# Index: label → outcome → list of row indices
	case_index: dict[str, dict[str, list[int]]] = field(default_factory=dict)


	# ═══════════════════════════════════════════════════════════════════════════
	# LOADING & COMPUTATION
	# ═══════════════════════════════════════════════════════════════════════════

	def load_config(name: str, xlsx_path: Path) -> ConfigData:
	"""Load a single config parquet and compute all metrics + case indices."""
	df = pd.read_parquet(xlsx_path)

	# Detect available labels (must have both gold and pred columns)
	available = []
	for label in ALL_LABELS:
	pred_col = f"{label}{PRED_SUFFIX}"
	if label in df.columns and pred_col in df.columns:
	available.append(label)

	if not available:
	raise ValueError(f"No valid label pairs found in {xlsx_path.name}")

	# Compute metrics and case index
	metrics_list: list[LabelMetrics] = []
	case_index: dict[str, dict[str, list[int]]] = {}

	for label in available:
	pred_col = f"{label}{PRED_SUFFIX}"
	gold = df[label].fillna(0).astype(int).values
	pred = df[pred_col].fillna(0).astype(int).values

	tp_mask = (gold == 1) & (pred == 1)
	fp_mask = (gold == 0) & (pred == 1)
	fn_mask = (gold == 1) & (pred == 0)
	tn_mask = (gold == 0) & (pred == 0)

	tp = int(tp_mask.sum())
	fp = int(fp_mask.sum())
	fn = int(fn_mask.sum())
	tn = int(tn_mask.sum())

	prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
	rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
	f1 = (2 * prec * rec / (prec + rec)) if (prec + rec) > 0 else 0.0

	metrics_list.append(LabelMetrics(
	label=label, f1=round(f1, 3), precision=round(prec, 3),
	recall=round(rec, 3), tp=tp, fp=fp, fn=fn, tn=tn,
	))

	case_index[label] = {
	"tp": np.where(tp_mask)[0].tolist(),
	"fp": np.where(fp_mask)[0].tolist(),
	"fn": np.where(fn_mask)[0].tolist(),
	"tn": np.where(tn_mask)[0].tolist(),
	}

	macro_f1 = round(float(np.mean([m.f1 for m in metrics_list])), 3)

	return ConfigData(
	name=name, df=df, labels=available, metrics=metrics_list,
	macro_f1=macro_f1, case_index=case_index,
	)


	def load_all_configs() -> dict[str, ConfigData]:
	"""Load all configurations at startup."""
	configs: dict[str, ConfigData] = {}
	for name, filename in CONFIGS.items():
	path = DATA_DIR / filename
	if path.exists():
	print(f"Chargement : {name} ({filename})")
	configs[name] = load_config(name, path)
	print(f" → {len(configs[name].df)} lignes, {len(configs[name].labels)} labels")
	else:
	print(f"⚠️ Fichier manquant : {path}")
	return configs


	# ═══════════════════════════════════════════════════════════════════════════
	# HTML TABLE GENERATION
	# ═══════════════════════════════════════════════════════════════════════════

	def _metric_color(value: float) -> str:
	if value >= 0.8:
	return "#15803d" # green
	if value >= 0.5:
	return "#b45309" # orange
	return "#be123c" # red


	def generate_performance_html(config: ConfigData) -> str:
	"""Generate an interactive HTML performance table with clickable cells."""
	rows = []

	for m in config.metrics:
	dname = html_escape(display_name(m.label))
	canon = html_escape(m.label)

	f1_color = _metric_color(m.f1)
	prec_color = _metric_color(m.precision)
	rec_color = _metric_color(m.recall)

	row = f"""<tr>
	<td class="cell-label">{dname}</td>
	<td class="cell-metric" style="color:{f1_color}">{m.f1:.3f}</td>
	<td class="cell-metric" style="color:{prec_color}">{m.precision:.3f}</td>
	<td class="cell-metric" style="color:{rec_color}">{m.recall:.3f}</td>
	<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="fn" onclick="cellClick(this)">{m.fn}</td>
	<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="fp" onclick="cellClick(this)">{m.fp}</td>
	<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="tn" onclick="cellClick(this)">{m.tn}</td>
	<td class="cell-count cell-clickable" data-label="{canon}" data-outcome="tp" onclick="cellClick(this)">{m.tp}</td>
	</tr>"""
	rows.append(row)

	# Macro avg row
	macro_color = _metric_color(config.macro_f1)
	rows.append(f"""<tr class="row-macro">
	<td class="cell-label" style="font-style:italic;">Macro avg</td>
	<td class="cell-metric" style="color:{macro_color}">{config.macro_f1:.3f}</td>
	<td class="cell-metric"></td>
	<td class="cell-metric"></td>
	<td class="cell-count"></td>
	<td class="cell-count"></td>
	<td class="cell-count"></td>
	<td class="cell-count"></td>
	</tr>""")

	body = "\n".join(rows)

	return f"""
	<style>
	.perf-table-wrap {{
	overflow-x: auto;
	margin: 0 auto;
	max-width: 900px;
	}}
	.perf-table {{
	border-collapse: collapse;
	font-family: 'Inter', 'Segoe UI', system-ui, -apple-system, sans-serif;
	font-size: 13.5px;
	width: 100%;
	min-width: 700px;
	}}
	.perf-table thead th {{
	background: linear-gradient(135deg, #334155 0%, #475569 100%);
	color: #f1f5f9;
	font-weight: 600;
	padding: 10px 14px;
	border: 1px solid #475569;
	text-align: center;
	letter-spacing: 0.02em;
	position: sticky;
	top: 0;
	z-index: 2;
	}}
	.perf-table thead th:first-child {{
	text-align: left;
	border-top-left-radius: 8px;
	}}
	.perf-table thead th:last-child {{
	border-top-right-radius: 8px;
	}}
	.perf-table tbody tr {{
	transition: background 0.15s ease;
	}}
	.perf-table tbody tr:nth-child(even) {{
	background: #f8fafc;
	}}
	.perf-table tbody tr:nth-child(odd) {{
	background: #ffffff;
	}}
	.perf-table tbody tr:hover {{
	background: #e0e7ff !important;
	}}
	.perf-table tbody tr.row-macro {{
	background: #f1f5f9 !important;
	border-top: 2px solid #94a3b8;
	}}
	.cell-label {{
	font-weight: 600;
	color: #0f172a;
	padding: 9px 14px;
	border: 1px solid #e2e8f0;
	text-align: left;
	white-space: nowrap;
	}}
	.cell-metric {{
	font-weight: 600;
	padding: 9px 14px;
	border: 1px solid #e2e8f0;
	text-align: center;
	font-variant-numeric: tabular-nums;
	}}
	.cell-count {{
	color: #64748b;
	padding: 9px 14px;
	border: 1px solid #e2e8f0;
	text-align: center;
	font-variant-numeric: tabular-nums;
	}}
	.cell-clickable {{
	cursor: pointer;
	position: relative;
	transition: all 0.15s ease;
	}}
	.cell-clickable:hover {{
	background: #c7d2fe !important;
	color: #1e1b4b;
	font-weight: 700;
	box-shadow: inset 0 0 0 2px #6366f1;
	}}
	.cell-clickable.selected {{
	background: #6366f1 !important;
	color: #fff !important;
	font-weight: 700;
	box-shadow: inset 0 0 0 2px #4338ca;
	}}

	/* Dark mode */
	@media (prefers-color-scheme: dark) {{
	.perf-table thead th {{
	background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
	border-color: #475569;
	}}
	.perf-table tbody tr:nth-child(even) {{ background: #1e293b; }}
	.perf-table tbody tr:nth-child(odd) {{ background: #0f172a; }}
	.perf-table tbody tr:hover {{ background: #312e81 !important; }}
	.perf-table tbody tr.row-macro {{ background: #1e293b !important; border-top-color: #475569; }}
	.cell-label {{ color: #e2e8f0; border-color: #334155; }}
	.cell-metric {{ border-color: #334155; }}
	.cell-count {{ color: #94a3b8; border-color: #334155; }}
	.cell-clickable:hover {{
	background: #3730a3 !important;
	color: #e0e7ff;
	box-shadow: inset 0 0 0 2px #818cf8;
	}}
	.cell-clickable.selected {{
	background: #4f46e5 !important;
	color: #fff !important;
	box-shadow: inset 0 0 0 2px #6366f1;
	}}
	}}

	/* Gradio dark mode override */
	.dark .perf-table thead th {{
	background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
	border-color: #475569;
	}}
	.dark .perf-table tbody tr:nth-child(even) {{ background: #1e293b; }}
	.dark .perf-table tbody tr:nth-child(odd) {{ background: #0f172a; }}
	.dark .perf-table tbody tr:hover {{ background: #312e81 !important; }}
	.dark .perf-table tbody tr.row-macro {{ background: #1e293b !important; border-top-color: #475569; }}
	.dark .cell-label {{ color: #e2e8f0; border-color: #334155; }}
	.dark .cell-metric {{ border-color: #334155; }}
	.dark .cell-count {{ color: #94a3b8; border-color: #334155; }}
	.dark .cell-clickable:hover {{
	background: #3730a3 !important;
	color: #e0e7ff;
	box-shadow: inset 0 0 0 2px #818cf8;
	}}
	.dark .cell-clickable.selected {{
	background: #4f46e5 !important;
	color: #fff !important;
	box-shadow: inset 0 0 0 2px #6366f1;
	}}
	</style>

	<div class="perf-table-wrap">
	<table class="perf-table">
	<thead>
	<tr>
	<th>Label</th>
	<th>F1</th>
	<th>Précision</th>
	<th>Rappel</th>
	<th>FN</th>
	<th>FP</th>
	<th>TN</th>
	<th>TP</th>
	</tr>
	</thead>
	<tbody>
	{body}
	</tbody>
	</table>
	</div>
	"""


	# ═══════════════════════════════════════════════════════════════════════════
	# INSTANCE DISPLAY
	# ═══════════════════════════════════════════════════════════════════════════

	# Columns to exclude from the instance display (internal/redundant)
	EXCLUDE_COLS_PATTERNS = [
	"_run1", "_run2", # inter-annotator agreement columns
	]


	def _should_include_col(col: str) -> bool:
	"""Check if a column should be shown in the instance viewer."""
	for pattern in EXCLUDE_COLS_PATTERNS:
	if pattern in col:
	return False
	return True


	def get_instances(
	config: ConfigData, label: str, outcome: str
	) -> tuple[str, pd.DataFrame]:
	"""Retrieve instances for a given label and outcome."""
	if label not in config.case_index:
	return "Label introuvable.", pd.DataFrame()

	indices = config.case_index[label].get(outcome, [])
	if not indices:
	return f"Aucune instance pour {display_name(label)} — {OUTCOME_DISPLAY.get(outcome, outcome)}.", pd.DataFrame()

	# Get the subset
	subset = config.df.iloc[indices].copy()

	# Filter columns
	cols_to_show = [c for c in subset.columns if _should_include_col(c)]
	subset = subset[cols_to_show]

	# Reorder: TEXT first, then gold label, pred label, then rest
	priority_cols = []
	if "TEXT" in subset.columns:
	priority_cols.append("TEXT")
	if label in subset.columns:
	priority_cols.append(label)
	pred_col = f"{label}{PRED_SUFFIX}"
	if pred_col in subset.columns:
	priority_cols.append(pred_col)

	remaining = [c for c in subset.columns if c not in priority_cols]
	subset = subset[priority_cols + remaining]

	title = (
	f"### {OUTCOME_DISPLAY.get(outcome, outcome)} — "
	f"{display_name(label)} — "
	f"{len(indices)} instance{'s' if len(indices) > 1 else ''}"
	)

	return title, subset.reset_index(drop=True)


	# ═══════════════════════════════════════════════════════════════════════════
	# GRADIO APP
	# ═══════════════════════════════════════════════════════════════════════════

	print("=" * 60)
	print("EMOTYC — Chargement des configurations...")
	print("=" * 60)
	ALL_CONFIGS = load_all_configs()
	print(f"\n✅ {len(ALL_CONFIGS)} configuration(s) chargée(s).\n")

	DEFAULT_CONFIG = list(ALL_CONFIGS.keys())[0] if ALL_CONFIGS else None


	def on_config_change(config_name: str) -> tuple[str, pd.DataFrame]:
	"""When user selects a new config, update the table and clear instances."""
	if config_name not in ALL_CONFIGS:
	return "<p>Configuration non trouvée.</p>", pd.DataFrame()

	config = ALL_CONFIGS[config_name]
	html = generate_performance_html(config)
	return html, pd.DataFrame()


	def on_cell_click(
	cell_value: str, config_name: str
	) -> tuple[str, pd.DataFrame, str]:
	"""When user clicks a TP/FP/TN/FN cell."""
	if not cell_value or "\|" not in cell_value:
	return "", pd.DataFrame(), ""

	label, outcome = cell_value.split("\|", 1)

	if config_name not in ALL_CONFIGS:
	return "Configuration introuvable.", pd.DataFrame(), ""

	config = ALL_CONFIGS[config_name]
	title, instances_df = get_instances(config, label, outcome)

	return title, instances_df, ""


	HEAD_JS = """
	<script>
	function cellClick(el) {
	document.querySelectorAll('.cell-clickable.selected').forEach(c => c.classList.remove('selected'));
	el.classList.add('selected');

	const label = el.getAttribute('data-label');
	const outcome = el.getAttribute('data-outcome');
	const value = label + '\|' + outcome;

	const hiddenInput = document.querySelector('#cell_click_input textarea');
	if (hiddenInput) {
	hiddenInput.value = value;
	hiddenInput.dispatchEvent(new Event('input', { bubbles: true }));
	}
	}
	</script>
	"""


	# ── Build Gradio interface ─────────────────────────────────────────────

	HEADER_MD = """
	# 📊 EMOTYC — Visualisation des Performances

	Sélectionnez une configuration pour afficher le tableau de performances du modèle de détection des émotions.
	Cliquez sur les cellules FN, FP, TN ou TP pour explorer les instances concrètes.
	"""

	with gr.Blocks(
	title="EMOTYC — Performances",
	theme=gr.themes.Soft(
	primary_hue=gr.themes.colors.indigo,
	secondary_hue=gr.themes.colors.slate,
	neutral_hue=gr.themes.colors.slate,
	font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
	),
	css="""
	.main-container { max-width: 1100px; margin: 0 auto; }
	#cell_click_input { display: none !important; }
	.instance-panel { margin-top: 8px; }
	footer { display: none !important; }
	""",
	head=HEAD_JS,
	) as demo:

	with gr.Column(elem_classes="main-container"):
	gr.Markdown(HEADER_MD)

	with gr.Row():
	config_dropdown = gr.Dropdown(
	choices=list(ALL_CONFIGS.keys()),
	value=DEFAULT_CONFIG,
	label="Configuration",
	interactive=True,
	scale=3,
	)



	# Performance table (HTML)
	perf_html = gr.HTML(label="Tableau de performances")

	# Hidden textbox for JS → Python communication
	cell_click_input = gr.Textbox(
	value="",
	visible=False,
	elem_id="cell_click_input",
	)

	# Instance panel
	with gr.Column(elem_classes="instance-panel", visible=True):
	instance_title = gr.Markdown("")
	instance_table = gr.Dataframe(
	value=pd.DataFrame(),
	label="Instances",
	interactive=False,
	wrap=True,
	max_height=500,
	)

	# ── Events ─────────────────────────────────────────────────────────

	config_dropdown.change(
	fn=on_config_change,
	inputs=[config_dropdown],
	outputs=[perf_html, instance_table],
	)

	cell_click_input.change(
	fn=on_cell_click,
	inputs=[cell_click_input, config_dropdown],
	outputs=[instance_title, instance_table, cell_click_input],
	)

	# Load default config on startup
	demo.load(
	fn=on_config_change,
	inputs=[config_dropdown],
	outputs=[perf_html, instance_table],
	)


	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)