a.azma
feat: improve column width
222085a
Raw
History Blame Contribute Delete
8.22 kB
import gradio as gr
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
from constants import (
BANNER,
INTRODUCTION_TEXT,
CITATION_TEXT,
METRICS_TAB_TEXT,
DIR_OUTPUT_REQUESTS,
CSV_PATH
)
from utils_display import make_clickable_model, styled_error, styled_message
LAST_UPDATED = "Oct 14th 2025"
CSV_PATH = Path(CSV_PATH)
if not CSV_PATH.exists():
raise FileNotFoundError(f"Benchmark CSV not found at {CSV_PATH.resolve()}")
df = pd.read_csv(CSV_PATH)
column_mapping = {
'fleurs WER': 'FLEURS WER (avg) ⬇️',
'fleurs CER': 'FLEURS CER (avg) ⬇️',
'common voice WER': 'CommonVoice WER (avg) ⬇️',
'common voice CER': 'CommonVoice CER (avg) ⬇️',
'mana WER': 'MANA WER (avg) ⬇️',
'mana CER': 'MANA CER (avg) ⬇️',
'Persian-ASR-Benchmark WER': 'PERSIAN ASR BENCHMARK WER (avg) ⬇️',
'Persian-ASR-Benchmark CER': 'PERSIAN ASR BENCHMARK CER (avg) ⬇️',
}
df.rename(columns=column_mapping, inplace=True)
wer_avg_cols = [c for c in df.columns if "WER (avg)" in c]
cer_avg_cols = [c for c in df.columns if "CER (avg)" in c]
df["Avg WER ⬇️"] = df[wer_avg_cols].mean(axis=1).round(3)
df["Avg CER ⬇️"] = df[cer_avg_cols].mean(axis=1).round(3)
for col in df.columns:
if col != "model" and col != "Model" and col != "link" and col != "Parameters":
df[col] = df[col].round(4)
df = make_clickable_model(df)
df.sort_values(by="Avg CER ⬇️", inplace=True)
# Add rank column based on Avg WER (lowest WER = rank 1)
df["Rank"] = range(1, len(df) + 1)
df.drop(columns=["model"], inplace=True)
column_order = [
"Rank",
"Model",
"Avg CER ⬇️",
"Avg WER ⬇️",
# PERSIAN ASR BENCHMARK
'PERSIAN ASR BENCHMARK CER (avg) ⬇️',
'PERSIAN ASR BENCHMARK WER (avg) ⬇️',
# FLEURS
"FLEURS CER (avg) ⬇️",
"FLEURS WER (avg) ⬇️",
# CommonVoice
"CommonVoice CER (avg) ⬇️",
"CommonVoice WER (avg) ⬇️",
# MANA
"MANA CER (avg) ⬇️",
"MANA WER (avg) ⬇️",
]
df = df[column_order]
DATATYPES = ["number" if c == "Rank" else "markdown" if c == "Model" else "number" for c in df.columns]
num_columns = len(df.columns)
model_width_percent = 35
remaining_width = 220 - model_width_percent
other_column_width = remaining_width / (num_columns - 2)
column_widths = ["50" if c == "Rank" else f"{model_width_percent}%" if c == "Model" else "125" if c == "Avg CER ⬇️" else "125" if c == "Avg WER ⬇️" else f"{other_column_width:.1f}%"
for c in df.columns]
# Function to filter dataframe based on selected datasets
def filter_dataframe(show_persian_asr, show_fleurs, show_common_voice, show_mana):
filtered_df = df.copy()
# Determine which columns to keep based on checkboxes
cols_to_keep = ["Rank", "Model"]
# Always keep average columns
cols_to_keep.extend(["Avg CER ⬇️", "Avg WER ⬇️"])
if show_persian_asr:
cols_to_keep.extend([
"PERSIAN ASR BENCHMARK CER (avg) ⬇️",
"PERSIAN ASR BENCHMARK WER (avg) ⬇️",
])
if show_fleurs:
cols_to_keep.extend([
"FLEURS CER (avg) ⬇️",
"FLEURS WER (avg) ⬇️",
])
if show_common_voice:
cols_to_keep.extend([
"CommonVoice CER (avg) ⬇️",
"CommonVoice WER (avg) ⬇️",
])
if show_mana:
cols_to_keep.extend([
"MANA CER (avg) ⬇️",
"MANA WER (avg) ⬇️",
])
# Return only the selected columns
return filtered_df[cols_to_keep]
def request_model(model_text):
if not model_text.strip():
return styled_error("Please enter a model name before submitting.")
DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)
filename = model_text.replace("/", "@") + ".txt"
out_path = DIR_OUTPUT_REQUESTS / filename
if out_path.exists():
return styled_error(f"Request for '{model_text}' already exists.")
request_entry = {
"date": datetime.now().isoformat(),
"model": model_text,
}
try:
with open(out_path, "w") as f:
f.write(str(request_entry))
return styled_message(f"✅ Your request for '{model_text}' has been recorded!")
except Exception as e:
return styled_error(f"Error saving request: {e}")
with gr.Blocks(theme=gr.themes.Base(), css="""
#leaderboard-table {
font-size: 14px;
}
#leaderboard-table td:second-child,
#leaderboard-table th:second-child {
max-width: 350px !important;
white-space: normal !important;
overflow: hidden;
text-overflow: ellipsis;
}
#leaderboard-table td:not(:second-child),
#leaderboard-table th:not(:second-child) {
text-align: center !important;
min-width: 100px;
}
#leaderboard-table th {
font-weight: bold;
background-color: #f0f0f0;
}
#banner-image {
width: 50%;
margin: 0 auto;
}
""") as demo:
gr.Image(BANNER, show_label=False, show_download_button=False, container=False, elem_id="banner-image")
gr.Markdown(INTRODUCTION_TEXT)
with gr.Tabs():
with gr.TabItem("🏅 Leaderboard"):
gr.Markdown("### Select Datasets to Display")
with gr.Row():
persian_asr_checkbox = gr.Checkbox(label="Persian ASR Benchmark", value=True)
fleurs_checkbox = gr.Checkbox(label="FLEURS", value=True)
common_voice_checkbox = gr.Checkbox(label="Common Voice", value=True)
mana_checkbox = gr.Checkbox(label="MANA", value=True)
leaderboard_table = gr.Dataframe(
value=df,
datatype=DATATYPES,
interactive=False,
wrap=True,
column_widths=column_widths,
elem_id="leaderboard-table"
)
# Update table when checkboxes change
persian_asr_checkbox.change(
fn=filter_dataframe,
inputs=[persian_asr_checkbox, fleurs_checkbox, common_voice_checkbox, mana_checkbox],
outputs=leaderboard_table
)
fleurs_checkbox.change(
fn=filter_dataframe,
inputs=[persian_asr_checkbox, fleurs_checkbox, common_voice_checkbox, mana_checkbox],
outputs=leaderboard_table
)
common_voice_checkbox.change(
fn=filter_dataframe,
inputs=[persian_asr_checkbox, fleurs_checkbox, common_voice_checkbox, mana_checkbox],
outputs=leaderboard_table
)
mana_checkbox.change(
fn=filter_dataframe,
inputs=[persian_asr_checkbox, fleurs_checkbox, common_voice_checkbox, mana_checkbox],
outputs=leaderboard_table
)
with gr.TabItem("📈 Metrics"):
gr.Markdown(METRICS_TAB_TEXT)
with gr.TabItem("✉️ Request a Model"):
gr.Markdown("### ✉️ Submit your model to be evaluated")
model_name_textbox = gr.Textbox(label="Model name (e.g. user/model_name)")
submit_button = gr.Button("🚀 Submit Request")
result_box = gr.Markdown()
submit_button.click(request_model, inputs=model_name_textbox, outputs=result_box)
with gr.TabItem("🤗 About"):
gr.Markdown("""
### About
This leaderboard showcases benchmark results for speech recognition models.
Data is sourced from local evaluations in `Benchmark_data.csv`.
""")
gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
with gr.Accordion("📙 Citation", open=False):
gr.Textbox(
value=CITATION_TEXT,
label="BibTeX citation",
show_copy_button=True,
lines=6
)
gr.Markdown("""
---
For further information, keep in touch:
**info@c1tech.group**
""")
demo.launch(server_name="0.0.0.0", server_port=7860)