TranslateBench-EN-ES / src /streamlit_app.py
Thermostatic's picture
Update src/streamlit_app.py
fd3f7e0 verified
import streamlit as st
import pandas as pd
from datasets import load_dataset # Import the Hugging Face datasets library
# Page configuration
st.set_page_config(layout="wide", page_title="TranslateBench EN-ES Leaderboard")
# Caching the data loading function
@st.cache_data # Use st.cache_data for dataframes and serializable objects
def load_data_from_hf():
"""Loads and preprocesses the benchmark data from Hugging Face."""
try:
st.info("Fetching data from Hugging Face (Thermostatic/TranslateBench-EN-ES)... This may take a moment.")
# Load the specific CSV file from the dataset
# The 'data_files' argument points to the specific file within the dataset repository.
# 'load_dataset' returns a DatasetDict. For a single CSV, it's typically under the 'train' key.
dataset_dict = load_dataset("Thermostatic/TranslateBench-EN-ES", data_files="model_benchmark_summary.csv")
# Access the dataset (it will be the 'train' split by default for a single file)
if 'train' in dataset_dict:
dataset = dataset_dict['train']
else:
# Fallback in case the default split name isn't 'train'
# This gets the first (and likely only) key in the DatasetDict
first_split_name = list(dataset_dict.keys())[0]
dataset = dataset_dict[first_split_name]
st.warning(f"Using split '{first_split_name}' as 'train' split was not found.")
df = dataset.to_pandas()
st.success("Data loaded successfully from Hugging Face!")
# --- Preprocessing (same as your original code) ---
# Extract provider from Model Name
df['Provider'] = df['Model Name'].apply(lambda x: x.split('_')[0].capitalize())
# Ensure score columns are numeric
score_cols = ['Weighted Score', 'BLEU', 'METEOR', 'COMET']
for col in score_cols:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
except Exception as e:
st.error(f"An error occurred while loading or processing data from Hugging Face: {e}")
return None
# --- Main Application ---
st.title("๐Ÿ† TranslateBench EN-ES Leaderboard")
st.markdown("""
This leaderboard shows the performance of various models on the English-to-Spanish translation task.
Data is sourced directly from the [Thermostatic/TranslateBench-EN-ES](https://huggingface.co/datasets/Thermostatic/TranslateBench-EN-ES) dataset on Hugging Face.
You can sort the table by different metrics and filter by model provider.
""")
# Load data
data_df = load_data_from_hf()
if data_df is not None:
# --- Sidebar for Controls ---
st.sidebar.header("โš™๏ธ Leaderboard Controls")
# Metric selection for sorting
sortable_metrics = ['Weighted Score', 'BLEU', 'METEOR', 'COMET']
sort_by = st.sidebar.selectbox("Sort by Metric:", sortable_metrics, index=0) # Default to Weighted Score
# Sort order
sort_order_asc = st.sidebar.radio("Sort Order:", ("Descending (Best First)", "Ascending (Worst First)"), index=0)
is_ascending = True if sort_order_asc == "Ascending (Worst First)" else False
# Provider filter
all_providers = sorted(data_df['Provider'].unique())
selected_providers = st.sidebar.multiselect(
"Filter by Provider:",
options=all_providers,
default=all_providers
)
if not selected_providers:
st.warning("Please select at least one provider to display results.")
filtered_df = pd.DataFrame(columns=data_df.columns) # Empty df
else:
filtered_df = data_df[data_df['Provider'].isin(selected_providers)]
# Apply sorting
if not filtered_df.empty:
sorted_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending).reset_index(drop=True)
# Add Rank column (1-based)
sorted_df.insert(0, 'Rank', range(1, 1 + len(sorted_df)))
else:
sorted_df = filtered_df # Still empty if no providers selected or no data after filter
# --- Display Top Performer ---
st.header("๐Ÿฅ‡ Top Performer")
if not sorted_df.empty:
top_model = sorted_df.iloc[0]
st.metric(
label=f"Best Model by {sort_by}",
value=top_model['Model Name'],
delta=f"{top_model[sort_by]:.4f} ({sort_by})",
delta_color="off" # No up/down arrow needed here
)
# Ensure all sortable_metrics exist in the top_model Series before trying to access them
cols = st.columns(len(sortable_metrics))
for i, metric in enumerate(sortable_metrics):
with cols[i]:
if metric in top_model:
st.metric(label=metric, value=f"{top_model[metric]:.4f}")
else:
st.metric(label=metric, value="N/A")
else:
st.info("No data to display for top performer based on current filters.")
# --- Display Leaderboard Table ---
st.header("๐Ÿ“Š Full Leaderboard")
# Columns to display in the table
display_columns = ['Rank', 'Model Name', 'Provider'] + sortable_metrics
# Formatting for score columns (4 decimal places)
formatter = {col: "{:.4f}" for col in sortable_metrics}
if not sorted_df.empty:
# Ensure only existing columns are selected for display
existing_display_columns = [col for col in display_columns if col in sorted_df.columns]
st.dataframe(
sorted_df[existing_display_columns].style.format(formatter),
use_container_width=True,
hide_index=True,
)
else:
st.info("No models match the current filter criteria.")
# --- Show Raw Data (Optional) ---
if st.checkbox("Show Raw Data (Downloaded, Unsorted, Unfiltered)"):
st.subheader("Raw Data")
st.dataframe(data_df)
else:
st.warning("Data could not be loaded from Hugging Face. Please check the console for errors, your internet connection, and ensure the dataset/file path is correct.")
st.markdown("---")
st.markdown("Created with Streamlit, Pandas, and data from Hugging Face Datasets.")