File size: 9,414 Bytes
1d4f6ee
7a3fbb8
 
1d4f6ee
 
 
7a3fbb8
d6be2bf
7a3fbb8
 
 
 
 
 
 
 
1d4f6ee
 
7a3fbb8
 
 
 
1d4f6ee
 
7a3fbb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d4f6ee
7a3fbb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d4f6ee
8607107
1d4f6ee
7a3fbb8
8607107
7a3fbb8
1d4f6ee
7a3fbb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d4f6ee
 
7a3fbb8
1d4f6ee
 
 
d3c6cd3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import os
import re
import functools
import pandas as pd
import gradio as gr

DATA_PATH = os.environ.get("LB_DATA_PATH", "gradio (2).xlsx")
BANNER_PATH = os.environ.get("LB_BANNER_PATH", "banner.png")
pd.options.mode.copy_on_write = True

TAB_ORDER = [
    "Main Leader Board",
    "TTS Difficultly Level Per Model",
    "Performance On Codecs",
    "Best Model per Attack",
]

SHEET_DESCRIPTIONS = {
    "Main Leader Board": "<b>Overview.</b> Comparison of models across all datasets. Lower <b>EER</b> is better.",
    "TTS Difficultly Level Per Model": "<b>TTS stress-test.</b> Lower TNR means harder; higher TNR means easier.",
    "Performance On Codecs": "<b>Codec robustness.</b> Compare models under compression/bitrates.",
    "Best Model per Attack": "<b>Per-attack winners.</b> Shows top-performing model per attack.",
}

try:
    import openpyxl  # better engine for xlsx on Windows
    READ_KW = {"engine": "openpyxl"}
except Exception:
    READ_KW = {}


# ─────────────────────────────────────────────────────────────────────────────
# Excel helpers
# ─────────────────────────────────────────────────────────────────────────────
def _ensure_file():
    if not os.path.exists(DATA_PATH):
        raise FileNotFoundError(f"Excel file not found at '{DATA_PATH}'")

@functools.lru_cache(maxsize=1)
def load_sheet(name: str):
    _ensure_file()
    return pd.read_excel(DATA_PATH, sheet_name=name, **READ_KW)

def _find_model_col(headers: list[str]) -> str:
    for cand in headers:
        if str(cand).lower().strip() in ("ssl model", "ssl_model", "model"):
            return cand
    return headers[0]

def _base_dataset_name(colname: str) -> str:
    return re.sub(r"\.\d+$", "", str(colname)).strip()

@functools.lru_cache(maxsize=1)
def read_tts_header():
    """
    From 'TTS all' read:
      - column headers (Excel header row)
      - first data row (contains labels per column: attacks/TTS names/means)
      - the model column name
    """
    df_cols = pd.read_excel(DATA_PATH, sheet_name="TTS all", nrows=0, **READ_KW)
    columns = df_cols.columns.tolist()
    df_head = pd.read_excel(DATA_PATH, sheet_name="TTS all", nrows=1, header=0, **READ_KW)
    first_row = df_head.iloc[0] if len(df_head) else pd.Series(index=columns, dtype=object)
    model_col = _find_model_col(columns)
    return model_col, columns, first_row

@functools.lru_cache(maxsize=1)
def build_dataset_to_labels():
    """
    Build mapping WITHOUT filtering to Axx only:
      dataset -> { label -> wide_column_name }
    Includes any non-empty string labels (e.g., 'A07', 'mean TNR', 'MeloTTS', etc.).
    If duplicate labels occur within the same dataset, make them unique (label, label#2, ...).
    """
    model_col, columns, first_row = read_tts_header()
    mapping: dict[str, dict[str, str]] = {}
    per_dataset_seen: dict[str, set[str]] = {}

    for col in columns:
        if col == model_col:
            continue
        dataset = _base_dataset_name(col)
        raw_label = first_row.get(col, "")
        label = str(raw_label).strip() if pd.notna(raw_label) else ""
        if not label:  # skip empty header labels
            continue

        # Ensure uniqueness within a dataset
        if dataset not in per_dataset_seen:
            per_dataset_seen[dataset] = set()
        unique_label = label
        idx = 2
        while unique_label in per_dataset_seen[dataset]:
            unique_label = f"{label}#{idx}"
            idx += 1
        per_dataset_seen[dataset].add(unique_label)

        mapping.setdefault(dataset, {})[unique_label] = col

    return model_col, columns, mapping

def _indices_for_columns(columns: list[str], wanted: list[str]) -> list[int]:
    name_to_pos = {name: i for i, name in enumerate(columns)}
    positions = []
    for w in wanted:
        if w not in name_to_pos:
            raise KeyError(f"Requested column '{w}' not found in header.")
        positions.append(name_to_pos[w])
    return positions

def read_tts_subset_for_dataset(dataset: str, label: str | None) -> pd.DataFrame:
    """
    Efficiently read only the model column + chosen dataset labels (or a single label).
    Steps:
      - Map dataset -> labels (wide column names)
      - Read via positional indices (stable even with dup headers)
      - Drop the first data row (label row)
      - Rename columns to ['Model', <labels...>]
      - Convert numeric columns
    """
    model_col, columns, mapping = build_dataset_to_labels()
    if dataset not in mapping:
        return pd.DataFrame({"Info": [f"No columns found for dataset '{dataset}'."]})

    if label and label != "(All)":
        if label not in mapping[dataset]:
            return pd.DataFrame({"Info": [f"Label '{label}' not found in dataset '{dataset}'."]})
        selected_cols = [mapping[dataset][label]]
        labels = [label]
    else:
        # include ALL labels for this dataset (attacks, means, TTS names…)
        labels = sorted(mapping[dataset].keys(), key=lambda s: s.lower())
        selected_cols = [mapping[dataset][lab] for lab in labels]

    try:
        use_positions = _indices_for_columns(columns, [model_col] + selected_cols)
    except KeyError as e:
        return pd.DataFrame({"Info": [f"{e}"]})

    df = pd.read_excel(
        DATA_PATH,
        sheet_name="TTS all",
        usecols=use_positions,
        header=0,
        **READ_KW
    )

    # First data row holds the labels; drop it.
    if len(df) > 0:
        df = df.iloc[1:].reset_index(drop=True)

    # Rename columns
    df.columns = ["Model"] + labels
    df["Model"] = df["Model"].astype(str)

    # Coerce numeric where possible
    for lab in labels:
        df[lab] = pd.to_numeric(df[lab], errors="coerce")

    return df


# ─────────────────────────────────────────────────────────────────────────────
# Build Gradio app
# ─────────────────────────────────────────────────────────────────────────────
def build_app():
    with gr.Blocks(title="πŸŽ™οΈ Benchmarking Spoof-SUPERB Classifiers") as demo:
        if os.path.exists(BANNER_PATH):
            gr.Image(value=BANNER_PATH, show_label=False)
        gr.Markdown("<h1>πŸŽ™οΈ Benchmarking Spoof-SUPERB Classifiers Built on S3PRL Embeddings</h1>")

        with gr.Tabs():
            # 1–4: Auto-load the four main tabs
            for sheet in TAB_ORDER:
                try:
                    df = load_sheet(sheet)
                except Exception as e:
                    df = pd.DataFrame({"Error": [str(e)]})
                with gr.TabItem(sheet):
                    gr.Markdown(f"<div class='sheet-card'>{SHEET_DESCRIPTIONS.get(sheet, '')}</div>")
                    gr.Dataframe(value=df, interactive=False)

            # Explorer (all labels, not just Axx)
            with gr.TabItem("Explorer for Attacks"):
                gr.Markdown(
                    "<div class='sheet-card'>"
                    "<b>Pick a Dataset</b> then (optionally) pick a specific label.<br>"
                    "Rows = Models, Columns = ALL labels for that dataset (attacks, means, TTS names, etc.)."
                    "</div>"
                )
                try:
                    _, _, mapping = build_dataset_to_labels()
                    datasets = sorted(mapping.keys())
                except Exception:
                    mapping, datasets = {}, []

                if not datasets:
                    gr.Markdown("❗ <i>No valid 'TTS all' sheet detected (first row must contain labels; first column models).</i>")
                else:
                    default_ds = datasets[0]
                    default_labels = ["(All)"] + sorted(mapping[default_ds].keys(), key=lambda s: s.lower())

                    with gr.Row():
                        ds_dd = gr.Dropdown(datasets, value=default_ds, label="Dataset")
                        lab_dd = gr.Dropdown(default_labels, value="(All)", label="Label (optional)")

                    out = gr.Dataframe(interactive=False)

                    def on_ds_change(ds):
                        new_labels = ["(All)"] + sorted(mapping.get(ds, {}).keys(), key=lambda s: s.lower())
                        table = read_tts_subset_for_dataset(ds, None)
                        return gr.update(choices=new_labels, value="(All)"), table

                    ds_dd.change(on_ds_change, ds_dd, [lab_dd, out])

                    def on_label_change(ds, lab):
                        return read_tts_subset_for_dataset(ds, lab if lab != "(All)" else None)

                    lab_dd.change(on_label_change, [ds_dd, lab_dd], out)

                    # initial render
                    out.value = read_tts_subset_for_dataset(default_ds, None)

    return demo


demo = build_app()

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)