import os import re import functools import pandas as pd import gradio as gr DATA_PATH = os.environ.get("LB_DATA_PATH", "gradio (2).xlsx") BANNER_PATH = os.environ.get("LB_BANNER_PATH", "banner.png") pd.options.mode.copy_on_write = True TAB_ORDER = [ "Main Leader Board", "TTS Difficultly Level Per Model", "Performance On Codecs", "Best Model per Attack", ] SHEET_DESCRIPTIONS = { "Main Leader Board": "Overview. Comparison of models across all datasets. Lower EER is better.", "TTS Difficultly Level Per Model": "TTS stress-test. Lower TNR means harder; higher TNR means easier.", "Performance On Codecs": "Codec robustness. Compare models under compression/bitrates.", "Best Model per Attack": "Per-attack winners. Shows top-performing model per attack.", } try: import openpyxl # better engine for xlsx on Windows READ_KW = {"engine": "openpyxl"} except Exception: READ_KW = {} # ───────────────────────────────────────────────────────────────────────────── # Excel helpers # ───────────────────────────────────────────────────────────────────────────── def _ensure_file(): if not os.path.exists(DATA_PATH): raise FileNotFoundError(f"Excel file not found at '{DATA_PATH}'") @functools.lru_cache(maxsize=1) def load_sheet(name: str): _ensure_file() return pd.read_excel(DATA_PATH, sheet_name=name, **READ_KW) def _find_model_col(headers: list[str]) -> str: for cand in headers: if str(cand).lower().strip() in ("ssl model", "ssl_model", "model"): return cand return headers[0] def _base_dataset_name(colname: str) -> str: return re.sub(r"\.\d+$", "", str(colname)).strip() @functools.lru_cache(maxsize=1) def read_tts_header(): """ From 'TTS all' read: - column headers (Excel header row) - first data row (contains labels per column: attacks/TTS names/means) - the model column name """ df_cols = pd.read_excel(DATA_PATH, sheet_name="TTS all", nrows=0, **READ_KW) columns = df_cols.columns.tolist() df_head = pd.read_excel(DATA_PATH, sheet_name="TTS all", nrows=1, header=0, **READ_KW) first_row = df_head.iloc[0] if len(df_head) else pd.Series(index=columns, dtype=object) model_col = _find_model_col(columns) return model_col, columns, first_row @functools.lru_cache(maxsize=1) def build_dataset_to_labels(): """ Build mapping WITHOUT filtering to Axx only: dataset -> { label -> wide_column_name } Includes any non-empty string labels (e.g., 'A07', 'mean TNR', 'MeloTTS', etc.). If duplicate labels occur within the same dataset, make them unique (label, label#2, ...). """ model_col, columns, first_row = read_tts_header() mapping: dict[str, dict[str, str]] = {} per_dataset_seen: dict[str, set[str]] = {} for col in columns: if col == model_col: continue dataset = _base_dataset_name(col) raw_label = first_row.get(col, "") label = str(raw_label).strip() if pd.notna(raw_label) else "" if not label: # skip empty header labels continue # Ensure uniqueness within a dataset if dataset not in per_dataset_seen: per_dataset_seen[dataset] = set() unique_label = label idx = 2 while unique_label in per_dataset_seen[dataset]: unique_label = f"{label}#{idx}" idx += 1 per_dataset_seen[dataset].add(unique_label) mapping.setdefault(dataset, {})[unique_label] = col return model_col, columns, mapping def _indices_for_columns(columns: list[str], wanted: list[str]) -> list[int]: name_to_pos = {name: i for i, name in enumerate(columns)} positions = [] for w in wanted: if w not in name_to_pos: raise KeyError(f"Requested column '{w}' not found in header.") positions.append(name_to_pos[w]) return positions def read_tts_subset_for_dataset(dataset: str, label: str | None) -> pd.DataFrame: """ Efficiently read only the model column + chosen dataset labels (or a single label). Steps: - Map dataset -> labels (wide column names) - Read via positional indices (stable even with dup headers) - Drop the first data row (label row) - Rename columns to ['Model', ] - Convert numeric columns """ model_col, columns, mapping = build_dataset_to_labels() if dataset not in mapping: return pd.DataFrame({"Info": [f"No columns found for dataset '{dataset}'."]}) if label and label != "(All)": if label not in mapping[dataset]: return pd.DataFrame({"Info": [f"Label '{label}' not found in dataset '{dataset}'."]}) selected_cols = [mapping[dataset][label]] labels = [label] else: # include ALL labels for this dataset (attacks, means, TTS names…) labels = sorted(mapping[dataset].keys(), key=lambda s: s.lower()) selected_cols = [mapping[dataset][lab] for lab in labels] try: use_positions = _indices_for_columns(columns, [model_col] + selected_cols) except KeyError as e: return pd.DataFrame({"Info": [f"{e}"]}) df = pd.read_excel( DATA_PATH, sheet_name="TTS all", usecols=use_positions, header=0, **READ_KW ) # First data row holds the labels; drop it. if len(df) > 0: df = df.iloc[1:].reset_index(drop=True) # Rename columns df.columns = ["Model"] + labels df["Model"] = df["Model"].astype(str) # Coerce numeric where possible for lab in labels: df[lab] = pd.to_numeric(df[lab], errors="coerce") return df # ───────────────────────────────────────────────────────────────────────────── # Build Gradio app # ───────────────────────────────────────────────────────────────────────────── def build_app(): with gr.Blocks(title="🎙️ Benchmarking Linear-Head Classifiers") as demo: if os.path.exists(BANNER_PATH): gr.Image(value=BANNER_PATH, show_label=False) gr.Markdown("

🎙️ Benchmarking Linear-Head Classifiers Built on S3PRL Embeddings

") with gr.Tabs(): # 1–4: Auto-load the four main tabs for sheet in TAB_ORDER: try: df = load_sheet(sheet) except Exception as e: df = pd.DataFrame({"Error": [str(e)]}) with gr.TabItem(sheet): gr.Markdown(f"
{SHEET_DESCRIPTIONS.get(sheet, '')}
") gr.Dataframe(value=df, interactive=False) # Explorer (all labels, not just Axx) with gr.TabItem("Explorer for Attacks"): gr.Markdown( "
" "Pick a Dataset then (optionally) pick a specific label.
" "Rows = Models, Columns = ALL labels for that dataset (attacks, means, TTS names, etc.)." "
" ) try: _, _, mapping = build_dataset_to_labels() datasets = sorted(mapping.keys()) except Exception: mapping, datasets = {}, [] if not datasets: gr.Markdown("❗ No valid 'TTS all' sheet detected (first row must contain labels; first column models).") else: default_ds = datasets[0] default_labels = ["(All)"] + sorted(mapping[default_ds].keys(), key=lambda s: s.lower()) with gr.Row(): ds_dd = gr.Dropdown(datasets, value=default_ds, label="Dataset") lab_dd = gr.Dropdown(default_labels, value="(All)", label="Label (optional)") out = gr.Dataframe(interactive=False) def on_ds_change(ds): new_labels = ["(All)"] + sorted(mapping.get(ds, {}).keys(), key=lambda s: s.lower()) table = read_tts_subset_for_dataset(ds, None) return gr.update(choices=new_labels, value="(All)"), table ds_dd.change(on_ds_change, ds_dd, [lab_dd, out]) def on_label_change(ds, lab): return read_tts_subset_for_dataset(ds, lab if lab != "(All)" else None) lab_dd.change(on_label_change, [ds_dd, lab_dd], out) # initial render out.value = read_tts_subset_for_dataset(default_ds, None) return demo demo = build_app() if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, share=True)