Spaces:
Sleeping
Sleeping
File size: 9,414 Bytes
1d4f6ee 7a3fbb8 1d4f6ee 7a3fbb8 d6be2bf 7a3fbb8 1d4f6ee 7a3fbb8 1d4f6ee 7a3fbb8 1d4f6ee 7a3fbb8 1d4f6ee 8607107 1d4f6ee 7a3fbb8 8607107 7a3fbb8 1d4f6ee 7a3fbb8 1d4f6ee 7a3fbb8 1d4f6ee d3c6cd3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 | import os
import re
import functools
import pandas as pd
import gradio as gr
DATA_PATH = os.environ.get("LB_DATA_PATH", "gradio (2).xlsx")
BANNER_PATH = os.environ.get("LB_BANNER_PATH", "banner.png")
pd.options.mode.copy_on_write = True
TAB_ORDER = [
"Main Leader Board",
"TTS Difficultly Level Per Model",
"Performance On Codecs",
"Best Model per Attack",
]
SHEET_DESCRIPTIONS = {
"Main Leader Board": "<b>Overview.</b> Comparison of models across all datasets. Lower <b>EER</b> is better.",
"TTS Difficultly Level Per Model": "<b>TTS stress-test.</b> Lower TNR means harder; higher TNR means easier.",
"Performance On Codecs": "<b>Codec robustness.</b> Compare models under compression/bitrates.",
"Best Model per Attack": "<b>Per-attack winners.</b> Shows top-performing model per attack.",
}
try:
import openpyxl # better engine for xlsx on Windows
READ_KW = {"engine": "openpyxl"}
except Exception:
READ_KW = {}
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Excel helpers
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _ensure_file():
if not os.path.exists(DATA_PATH):
raise FileNotFoundError(f"Excel file not found at '{DATA_PATH}'")
@functools.lru_cache(maxsize=1)
def load_sheet(name: str):
_ensure_file()
return pd.read_excel(DATA_PATH, sheet_name=name, **READ_KW)
def _find_model_col(headers: list[str]) -> str:
for cand in headers:
if str(cand).lower().strip() in ("ssl model", "ssl_model", "model"):
return cand
return headers[0]
def _base_dataset_name(colname: str) -> str:
return re.sub(r"\.\d+$", "", str(colname)).strip()
@functools.lru_cache(maxsize=1)
def read_tts_header():
"""
From 'TTS all' read:
- column headers (Excel header row)
- first data row (contains labels per column: attacks/TTS names/means)
- the model column name
"""
df_cols = pd.read_excel(DATA_PATH, sheet_name="TTS all", nrows=0, **READ_KW)
columns = df_cols.columns.tolist()
df_head = pd.read_excel(DATA_PATH, sheet_name="TTS all", nrows=1, header=0, **READ_KW)
first_row = df_head.iloc[0] if len(df_head) else pd.Series(index=columns, dtype=object)
model_col = _find_model_col(columns)
return model_col, columns, first_row
@functools.lru_cache(maxsize=1)
def build_dataset_to_labels():
"""
Build mapping WITHOUT filtering to Axx only:
dataset -> { label -> wide_column_name }
Includes any non-empty string labels (e.g., 'A07', 'mean TNR', 'MeloTTS', etc.).
If duplicate labels occur within the same dataset, make them unique (label, label#2, ...).
"""
model_col, columns, first_row = read_tts_header()
mapping: dict[str, dict[str, str]] = {}
per_dataset_seen: dict[str, set[str]] = {}
for col in columns:
if col == model_col:
continue
dataset = _base_dataset_name(col)
raw_label = first_row.get(col, "")
label = str(raw_label).strip() if pd.notna(raw_label) else ""
if not label: # skip empty header labels
continue
# Ensure uniqueness within a dataset
if dataset not in per_dataset_seen:
per_dataset_seen[dataset] = set()
unique_label = label
idx = 2
while unique_label in per_dataset_seen[dataset]:
unique_label = f"{label}#{idx}"
idx += 1
per_dataset_seen[dataset].add(unique_label)
mapping.setdefault(dataset, {})[unique_label] = col
return model_col, columns, mapping
def _indices_for_columns(columns: list[str], wanted: list[str]) -> list[int]:
name_to_pos = {name: i for i, name in enumerate(columns)}
positions = []
for w in wanted:
if w not in name_to_pos:
raise KeyError(f"Requested column '{w}' not found in header.")
positions.append(name_to_pos[w])
return positions
def read_tts_subset_for_dataset(dataset: str, label: str | None) -> pd.DataFrame:
"""
Efficiently read only the model column + chosen dataset labels (or a single label).
Steps:
- Map dataset -> labels (wide column names)
- Read via positional indices (stable even with dup headers)
- Drop the first data row (label row)
- Rename columns to ['Model', <labels...>]
- Convert numeric columns
"""
model_col, columns, mapping = build_dataset_to_labels()
if dataset not in mapping:
return pd.DataFrame({"Info": [f"No columns found for dataset '{dataset}'."]})
if label and label != "(All)":
if label not in mapping[dataset]:
return pd.DataFrame({"Info": [f"Label '{label}' not found in dataset '{dataset}'."]})
selected_cols = [mapping[dataset][label]]
labels = [label]
else:
# include ALL labels for this dataset (attacks, means, TTS namesβ¦)
labels = sorted(mapping[dataset].keys(), key=lambda s: s.lower())
selected_cols = [mapping[dataset][lab] for lab in labels]
try:
use_positions = _indices_for_columns(columns, [model_col] + selected_cols)
except KeyError as e:
return pd.DataFrame({"Info": [f"{e}"]})
df = pd.read_excel(
DATA_PATH,
sheet_name="TTS all",
usecols=use_positions,
header=0,
**READ_KW
)
# First data row holds the labels; drop it.
if len(df) > 0:
df = df.iloc[1:].reset_index(drop=True)
# Rename columns
df.columns = ["Model"] + labels
df["Model"] = df["Model"].astype(str)
# Coerce numeric where possible
for lab in labels:
df[lab] = pd.to_numeric(df[lab], errors="coerce")
return df
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Build Gradio app
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def build_app():
with gr.Blocks(title="ποΈ Benchmarking Spoof-SUPERB Classifiers") as demo:
if os.path.exists(BANNER_PATH):
gr.Image(value=BANNER_PATH, show_label=False)
gr.Markdown("<h1>ποΈ Benchmarking Spoof-SUPERB Classifiers Built on S3PRL Embeddings</h1>")
with gr.Tabs():
# 1β4: Auto-load the four main tabs
for sheet in TAB_ORDER:
try:
df = load_sheet(sheet)
except Exception as e:
df = pd.DataFrame({"Error": [str(e)]})
with gr.TabItem(sheet):
gr.Markdown(f"<div class='sheet-card'>{SHEET_DESCRIPTIONS.get(sheet, '')}</div>")
gr.Dataframe(value=df, interactive=False)
# Explorer (all labels, not just Axx)
with gr.TabItem("Explorer for Attacks"):
gr.Markdown(
"<div class='sheet-card'>"
"<b>Pick a Dataset</b> then (optionally) pick a specific label.<br>"
"Rows = Models, Columns = ALL labels for that dataset (attacks, means, TTS names, etc.)."
"</div>"
)
try:
_, _, mapping = build_dataset_to_labels()
datasets = sorted(mapping.keys())
except Exception:
mapping, datasets = {}, []
if not datasets:
gr.Markdown("β <i>No valid 'TTS all' sheet detected (first row must contain labels; first column models).</i>")
else:
default_ds = datasets[0]
default_labels = ["(All)"] + sorted(mapping[default_ds].keys(), key=lambda s: s.lower())
with gr.Row():
ds_dd = gr.Dropdown(datasets, value=default_ds, label="Dataset")
lab_dd = gr.Dropdown(default_labels, value="(All)", label="Label (optional)")
out = gr.Dataframe(interactive=False)
def on_ds_change(ds):
new_labels = ["(All)"] + sorted(mapping.get(ds, {}).keys(), key=lambda s: s.lower())
table = read_tts_subset_for_dataset(ds, None)
return gr.update(choices=new_labels, value="(All)"), table
ds_dd.change(on_ds_change, ds_dd, [lab_dd, out])
def on_label_change(ds, lab):
return read_tts_subset_for_dataset(ds, lab if lab != "(All)" else None)
lab_dd.change(on_label_change, [ds_dd, lab_dd], out)
# initial render
out.value = read_tts_subset_for_dataset(default_ds, None)
return demo
demo = build_app()
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|