Spaces:

GwendalTsang
/

Visualisation1

Running

File size: 19,911 Bytes

"""
EMOTYC — Visualisation interactive des performances.

Space Gradio avec :
- Dropdown pour sélectionner une configuration (5 XLSX pré-chargés)
- Tableau HTML de performances (F1, Précision, Rappel, FN/FP/TN/TP)
- Cellules FN/FP/TN/TP cliquables → panneau d'instances concrétes
"""

from __future__ import annotations

import json
from dataclasses import dataclass, field
from html import escape as html_escape
from pathlib import Path
from typing import Any

import gradio as gr
import numpy as np
import pandas as pd

# ═══════════════════════════════════════════════════════════════════════════
#  CONSTANTS
# ═══════════════════════════════════════════════════════════════════════════

BASE_DIR = Path(__file__).resolve().parent
DATA_DIR = BASE_DIR / "data"

ALL_LABELS = [
    "Emo", "Comportementale", "Designee", "Montree", "Suggeree",
    "Base", "Complexe", "Admiration", "Autre", "Colere",
    "Culpabilite", "Degout", "Embarras", "Fierte", "Jalousie",
    "Joie", "Peur", "Surprise", "Tristesse",
]

PRED_SUFFIX = "_pred_emotyc"

DISPLAY_NAMES = {
    "Colere": "Colère",
    "Culpabilite": "Culpabilité",
    "Degout": "Dégoût",
    "Fierte": "Fierté",
    "Designee": "Désignée",
    "Montree": "Montrée",
    "Suggeree": "Suggérée",
    "Emo": "Émo",
}

OUTCOME_DISPLAY = {
    "tp": "✅ Vrais Positifs (TP)",
    "fp": "⚠️ Faux Positifs (FP)",
    "fn": "❌ Faux Négatifs (FN)",
    "tn": "✓ Vrais Négatifs (TN)",
}

# Configuration name → XLSX filename
CONFIGS: dict[str, str] = {
    "CyberAggAdo 200": "CyberAggAdo200.parquet",
    "CyberAggAdo Global — Contexte": "CyberAggAdoGlobal_Context.parquet",
    "CyberAggAdo Global — Sans Contexte": "CyberAggAdoGlobal_SansContexte.parquet",
    "TextToKids — Contexte": "TextToKids_Context.parquet",
    "TextToKids — Sans Contexte": "TextToKids_SansContexte.parquet",
}


def display_name(label: str) -> str:
    return DISPLAY_NAMES.get(label, label)


# ═══════════════════════════════════════════════════════════════════════════
#  DATA STRUCTURES
# ═══════════════════════════════════════════════════════════════════════════

@dataclass
class LabelMetrics:
    label: str
    f1: float
    precision: float
    recall: float
    tp: int
    fp: int
    fn: int
    tn: int


@dataclass
class ConfigData:
    name: str
    df: pd.DataFrame
    labels: list[str]
    metrics: list[LabelMetrics]
    macro_f1: float
    # Index: label → outcome → list of row indices
    case_index: dict[str, dict[str, list[int]]] = field(default_factory=dict)


# ═══════════════════════════════════════════════════════════════════════════
#  LOADING & COMPUTATION
# ═══════════════════════════════════════════════════════════════════════════

def load_config(name: str, xlsx_path: Path) -> ConfigData:
    """Load a single config parquet and compute all metrics + case indices."""
    df = pd.read_parquet(xlsx_path)

    # Detect available labels (must have both gold and pred columns)
    available = []
    for label in ALL_LABELS:
        pred_col = f"{label}{PRED_SUFFIX}"
        if label in df.columns and pred_col in df.columns:
            available.append(label)

    if not available:
        raise ValueError(f"No valid label pairs found in {xlsx_path.name}")

    # Compute metrics and case index
    metrics_list: list[LabelMetrics] = []
    case_index: dict[str, dict[str, list[int]]] = {}

    for label in available:
        pred_col = f"{label}{PRED_SUFFIX}"
        gold = df[label].fillna(0).astype(int).values
        pred = df[pred_col].fillna(0).astype(int).values

        tp_mask = (gold == 1) & (pred == 1)
        fp_mask = (gold == 0) & (pred == 1)
        fn_mask = (gold == 1) & (pred == 0)
        tn_mask = (gold == 0) & (pred == 0)

        tp = int(tp_mask.sum())
        fp = int(fp_mask.sum())
        fn = int(fn_mask.sum())
        tn = int(tn_mask.sum())

        prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1 = (2 * prec * rec / (prec + rec)) if (prec + rec) > 0 else 0.0

        metrics_list.append(LabelMetrics(
            label=label, f1=round(f1, 3), precision=round(prec, 3),
            recall=round(rec, 3), tp=tp, fp=fp, fn=fn, tn=tn,
        ))

        case_index[label] = {
            "tp": np.where(tp_mask)[0].tolist(),
            "fp": np.where(fp_mask)[0].tolist(),
            "fn": np.where(fn_mask)[0].tolist(),
            "tn": np.where(tn_mask)[0].tolist(),
        }

    macro_f1 = round(float(np.mean([m.f1 for m in metrics_list])), 3)

    return ConfigData(
        name=name, df=df, labels=available, metrics=metrics_list,
        macro_f1=macro_f1, case_index=case_index,
    )


def load_all_configs() -> dict[str, ConfigData]:
    """Load all configurations at startup."""
    configs: dict[str, ConfigData] = {}
    for name, filename in CONFIGS.items():
        path = DATA_DIR / filename
        if path.exists():
            print(f"Chargement : {name} ({filename})")
            configs[name] = load_config(name, path)
            print(f"  → {len(configs[name].df)} lignes, {len(configs[name].labels)} labels")
        else:
            print(f"⚠️ Fichier manquant : {path}")
    return configs


# ═══════════════════════════════════════════════════════════════════════════
#  HTML TABLE GENERATION
# ═══════════════════════════════════════════════════════════════════════════

def _metric_color(value: float) -> str:
    if value >= 0.8:
        return "#15803d"  # green
    if value >= 0.5:
        return "#b45309"  # orange
    return "#be123c"      # red


def generate_performance_html(config: ConfigData) -> str:
    """Generate an interactive HTML performance table with clickable cells."""
    rows = []

    for m in config.metrics:
        dname = html_escape(display_name(m.label))
        canon = html_escape(m.label)

        f1_color = _metric_color(m.f1)
        prec_color = _metric_color(m.precision)
        rec_color = _metric_color(m.recall)

        row = f"""<tr>
  <td class="cell-label">{dname}</td>
  <td class="cell-metric" style="color:{f1_color}">{m.f1:.3f}</td>
  <td class="cell-metric" style="color:{prec_color}">{m.precision:.3f}</td>
  <td class="cell-metric" style="color:{rec_color}">{m.recall:.3f}</td>
  <td class="cell-count cell-clickable" data-label="{canon}" data-outcome="fn" onclick="cellClick(this)">{m.fn}</td>
  <td class="cell-count cell-clickable" data-label="{canon}" data-outcome="fp" onclick="cellClick(this)">{m.fp}</td>
  <td class="cell-count cell-clickable" data-label="{canon}" data-outcome="tn" onclick="cellClick(this)">{m.tn}</td>
  <td class="cell-count cell-clickable" data-label="{canon}" data-outcome="tp" onclick="cellClick(this)">{m.tp}</td>
</tr>"""
        rows.append(row)

    # Macro avg row
    macro_color = _metric_color(config.macro_f1)
    rows.append(f"""<tr class="row-macro">
  <td class="cell-label" style="font-style:italic;">Macro avg</td>
  <td class="cell-metric" style="color:{macro_color}">{config.macro_f1:.3f}</td>
  <td class="cell-metric"></td>
  <td class="cell-metric"></td>
  <td class="cell-count"></td>
  <td class="cell-count"></td>
  <td class="cell-count"></td>
  <td class="cell-count"></td>
</tr>""")

    body = "\n".join(rows)

    return f"""
<style>
  .perf-table-wrap {{
    overflow-x: auto;
    margin: 0 auto;
    max-width: 900px;
  }}
  .perf-table {{
    border-collapse: collapse;
    font-family: 'Inter', 'Segoe UI', system-ui, -apple-system, sans-serif;
    font-size: 13.5px;
    width: 100%;
    min-width: 700px;
  }}
  .perf-table thead th {{
    background: linear-gradient(135deg, #334155 0%, #475569 100%);
    color: #f1f5f9;
    font-weight: 600;
    padding: 10px 14px;
    border: 1px solid #475569;
    text-align: center;
    letter-spacing: 0.02em;
    position: sticky;
    top: 0;
    z-index: 2;
  }}
  .perf-table thead th:first-child {{
    text-align: left;
    border-top-left-radius: 8px;
  }}
  .perf-table thead th:last-child {{
    border-top-right-radius: 8px;
  }}
  .perf-table tbody tr {{
    transition: background 0.15s ease;
  }}
  .perf-table tbody tr:nth-child(even) {{
    background: #f8fafc;
  }}
  .perf-table tbody tr:nth-child(odd) {{
    background: #ffffff;
  }}
  .perf-table tbody tr:hover {{
    background: #e0e7ff !important;
  }}
  .perf-table tbody tr.row-macro {{
    background: #f1f5f9 !important;
    border-top: 2px solid #94a3b8;
  }}
  .cell-label {{
    font-weight: 600;
    color: #0f172a;
    padding: 9px 14px;
    border: 1px solid #e2e8f0;
    text-align: left;
    white-space: nowrap;
  }}
  .cell-metric {{
    font-weight: 600;
    padding: 9px 14px;
    border: 1px solid #e2e8f0;
    text-align: center;
    font-variant-numeric: tabular-nums;
  }}
  .cell-count {{
    color: #64748b;
    padding: 9px 14px;
    border: 1px solid #e2e8f0;
    text-align: center;
    font-variant-numeric: tabular-nums;
  }}
  .cell-clickable {{
    cursor: pointer;
    position: relative;
    transition: all 0.15s ease;
  }}
  .cell-clickable:hover {{
    background: #c7d2fe !important;
    color: #1e1b4b;
    font-weight: 700;
    box-shadow: inset 0 0 0 2px #6366f1;
  }}
  .cell-clickable.selected {{
    background: #6366f1 !important;
    color: #fff !important;
    font-weight: 700;
    box-shadow: inset 0 0 0 2px #4338ca;
  }}

  /* Dark mode */
  @media (prefers-color-scheme: dark) {{
    .perf-table thead th {{
      background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
      border-color: #475569;
    }}
    .perf-table tbody tr:nth-child(even) {{ background: #1e293b; }}
    .perf-table tbody tr:nth-child(odd) {{ background: #0f172a; }}
    .perf-table tbody tr:hover {{ background: #312e81 !important; }}
    .perf-table tbody tr.row-macro {{ background: #1e293b !important; border-top-color: #475569; }}
    .cell-label {{ color: #e2e8f0; border-color: #334155; }}
    .cell-metric {{ border-color: #334155; }}
    .cell-count {{ color: #94a3b8; border-color: #334155; }}
    .cell-clickable:hover {{
      background: #3730a3 !important;
      color: #e0e7ff;
      box-shadow: inset 0 0 0 2px #818cf8;
    }}
    .cell-clickable.selected {{
      background: #4f46e5 !important;
      color: #fff !important;
      box-shadow: inset 0 0 0 2px #6366f1;
    }}
  }}

  /* Gradio dark mode override */
  .dark .perf-table thead th {{
    background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
    border-color: #475569;
  }}
  .dark .perf-table tbody tr:nth-child(even) {{ background: #1e293b; }}
  .dark .perf-table tbody tr:nth-child(odd) {{ background: #0f172a; }}
  .dark .perf-table tbody tr:hover {{ background: #312e81 !important; }}
  .dark .perf-table tbody tr.row-macro {{ background: #1e293b !important; border-top-color: #475569; }}
  .dark .cell-label {{ color: #e2e8f0; border-color: #334155; }}
  .dark .cell-metric {{ border-color: #334155; }}
  .dark .cell-count {{ color: #94a3b8; border-color: #334155; }}
  .dark .cell-clickable:hover {{
    background: #3730a3 !important;
    color: #e0e7ff;
    box-shadow: inset 0 0 0 2px #818cf8;
  }}
  .dark .cell-clickable.selected {{
    background: #4f46e5 !important;
    color: #fff !important;
    box-shadow: inset 0 0 0 2px #6366f1;
  }}
</style>

<div class="perf-table-wrap">
<table class="perf-table">
  <thead>
    <tr>
      <th>Label</th>
      <th>F1</th>
      <th>Précision</th>
      <th>Rappel</th>
      <th>FN</th>
      <th>FP</th>
      <th>TN</th>
      <th>TP</th>
    </tr>
  </thead>
  <tbody>
    {body}
  </tbody>
</table>
</div>
"""


# ═══════════════════════════════════════════════════════════════════════════
#  INSTANCE DISPLAY
# ═══════════════════════════════════════════════════════════════════════════

# Columns to exclude from the instance display (internal/redundant)
EXCLUDE_COLS_PATTERNS = [
    "_run1", "_run2",  # inter-annotator agreement columns
]


def _should_include_col(col: str) -> bool:
    """Check if a column should be shown in the instance viewer."""
    for pattern in EXCLUDE_COLS_PATTERNS:
        if pattern in col:
            return False
    return True


def get_instances(
    config: ConfigData, label: str, outcome: str
) -> tuple[str, pd.DataFrame]:
    """Retrieve instances for a given label and outcome."""
    if label not in config.case_index:
        return "Label introuvable.", pd.DataFrame()

    indices = config.case_index[label].get(outcome, [])
    if not indices:
        return f"Aucune instance pour {display_name(label)} — {OUTCOME_DISPLAY.get(outcome, outcome)}.", pd.DataFrame()

    # Get the subset
    subset = config.df.iloc[indices].copy()

    # Filter columns
    cols_to_show = [c for c in subset.columns if _should_include_col(c)]
    subset = subset[cols_to_show]

    # Reorder: TEXT first, then gold label, pred label, then rest
    priority_cols = []
    if "TEXT" in subset.columns:
        priority_cols.append("TEXT")
    if label in subset.columns:
        priority_cols.append(label)
    pred_col = f"{label}{PRED_SUFFIX}"
    if pred_col in subset.columns:
        priority_cols.append(pred_col)

    remaining = [c for c in subset.columns if c not in priority_cols]
    subset = subset[priority_cols + remaining]

    title = (
        f"### {OUTCOME_DISPLAY.get(outcome, outcome)} — "
        f"**{display_name(label)}** — "
        f"**{len(indices)}** instance{'s' if len(indices) > 1 else ''}"
    )

    return title, subset.reset_index(drop=True)


# ═══════════════════════════════════════════════════════════════════════════
#  GRADIO APP
# ═══════════════════════════════════════════════════════════════════════════

print("=" * 60)
print("EMOTYC — Chargement des configurations...")
print("=" * 60)
ALL_CONFIGS = load_all_configs()
print(f"\n✅ {len(ALL_CONFIGS)} configuration(s) chargée(s).\n")

DEFAULT_CONFIG = list(ALL_CONFIGS.keys())[0] if ALL_CONFIGS else None


def on_config_change(config_name: str) -> tuple[str, pd.DataFrame]:
    """When user selects a new config, update the table and clear instances."""
    if config_name not in ALL_CONFIGS:
        return "<p>Configuration non trouvée.</p>", pd.DataFrame()

    config = ALL_CONFIGS[config_name]
    html = generate_performance_html(config)
    return html, pd.DataFrame()


def on_cell_click(
    cell_value: str, config_name: str
) -> tuple[str, pd.DataFrame, str]:
    """When user clicks a TP/FP/TN/FN cell."""
    if not cell_value or "|" not in cell_value:
        return "", pd.DataFrame(), ""

    label, outcome = cell_value.split("|", 1)

    if config_name not in ALL_CONFIGS:
        return "Configuration introuvable.", pd.DataFrame(), ""

    config = ALL_CONFIGS[config_name]
    title, instances_df = get_instances(config, label, outcome)

    return title, instances_df, ""


HEAD_JS = """
<script>
function cellClick(el) {
  document.querySelectorAll('.cell-clickable.selected').forEach(c => c.classList.remove('selected'));
  el.classList.add('selected');

  const label = el.getAttribute('data-label');
  const outcome = el.getAttribute('data-outcome');
  const value = label + '|' + outcome;

  const hiddenInput = document.querySelector('#cell_click_input textarea');
  if (hiddenInput) {
    hiddenInput.value = value;
    hiddenInput.dispatchEvent(new Event('input', { bubbles: true }));
  }
}
</script>
"""


# ── Build Gradio interface ─────────────────────────────────────────────

HEADER_MD = """
# 📊 EMOTYC — Visualisation des Performances

Sélectionnez une configuration pour afficher le tableau de performances du modèle de détection des émotions.
**Cliquez sur les cellules FN, FP, TN ou TP** pour explorer les instances concrètes.
"""

with gr.Blocks(
    title="EMOTYC — Performances",
    theme=gr.themes.Soft(
        primary_hue=gr.themes.colors.indigo,
        secondary_hue=gr.themes.colors.slate,
        neutral_hue=gr.themes.colors.slate,
        font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
    ),
    css="""
    .main-container { max-width: 1100px; margin: 0 auto; }
    #cell_click_input { display: none !important; }
    .instance-panel { margin-top: 8px; }
    footer { display: none !important; }
    """,
    head=HEAD_JS,
) as demo:

    with gr.Column(elem_classes="main-container"):
        gr.Markdown(HEADER_MD)

        with gr.Row():
            config_dropdown = gr.Dropdown(
                choices=list(ALL_CONFIGS.keys()),
                value=DEFAULT_CONFIG,
                label="Configuration",
                interactive=True,
                scale=3,
            )



        # Performance table (HTML)
        perf_html = gr.HTML(label="Tableau de performances")

        # Hidden textbox for JS → Python communication
        cell_click_input = gr.Textbox(
            value="",
            visible=False,
            elem_id="cell_click_input",
        )

        # Instance panel
        with gr.Column(elem_classes="instance-panel", visible=True):
            instance_title = gr.Markdown("")
            instance_table = gr.Dataframe(
                value=pd.DataFrame(),
                label="Instances",
                interactive=False,
                wrap=True,
                max_height=500,
            )

    # ── Events ─────────────────────────────────────────────────────────

    config_dropdown.change(
        fn=on_config_change,
        inputs=[config_dropdown],
        outputs=[perf_html, instance_table],
    )

    cell_click_input.change(
        fn=on_cell_click,
        inputs=[cell_click_input, config_dropdown],
        outputs=[instance_title, instance_table, cell_click_input],
    )

    # Load default config on startup
    demo.load(
        fn=on_config_change,
        inputs=[config_dropdown],
        outputs=[perf_html, instance_table],
    )


if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)