File size: 6,095 Bytes
759ec0e 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 fd3f7e0 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 fd3f7e0 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 4a1f5da 9fcf507 4a1f5da 759ec0e 9fcf507 4a1f5da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import streamlit as st
import pandas as pd
from datasets import load_dataset # Import the Hugging Face datasets library
# Page configuration
st.set_page_config(layout="wide", page_title="TranslateBench EN-ES Leaderboard")
# Caching the data loading function
@st.cache_data # Use st.cache_data for dataframes and serializable objects
def load_data_from_hf():
"""Loads and preprocesses the benchmark data from Hugging Face."""
try:
st.info("Fetching data from Hugging Face (Thermostatic/TranslateBench-EN-ES)... This may take a moment.")
# Load the specific CSV file from the dataset
# The 'data_files' argument points to the specific file within the dataset repository.
# 'load_dataset' returns a DatasetDict. For a single CSV, it's typically under the 'train' key.
dataset_dict = load_dataset("Thermostatic/TranslateBench-EN-ES", data_files="model_benchmark_summary.csv")
# Access the dataset (it will be the 'train' split by default for a single file)
if 'train' in dataset_dict:
dataset = dataset_dict['train']
else:
# Fallback in case the default split name isn't 'train'
# This gets the first (and likely only) key in the DatasetDict
first_split_name = list(dataset_dict.keys())[0]
dataset = dataset_dict[first_split_name]
st.warning(f"Using split '{first_split_name}' as 'train' split was not found.")
df = dataset.to_pandas()
st.success("Data loaded successfully from Hugging Face!")
# --- Preprocessing (same as your original code) ---
# Extract provider from Model Name
df['Provider'] = df['Model Name'].apply(lambda x: x.split('_')[0].capitalize())
# Ensure score columns are numeric
score_cols = ['Weighted Score', 'BLEU', 'METEOR', 'COMET']
for col in score_cols:
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
except Exception as e:
st.error(f"An error occurred while loading or processing data from Hugging Face: {e}")
return None
# --- Main Application ---
st.title("🏆 TranslateBench EN-ES Leaderboard")
st.markdown("""
This leaderboard shows the performance of various models on the English-to-Spanish translation task.
Data is sourced directly from the [Thermostatic/TranslateBench-EN-ES](https://huggingface.co/datasets/Thermostatic/TranslateBench-EN-ES) dataset on Hugging Face.
You can sort the table by different metrics and filter by model provider.
""")
# Load data
data_df = load_data_from_hf()
if data_df is not None:
# --- Sidebar for Controls ---
st.sidebar.header("⚙️ Leaderboard Controls")
# Metric selection for sorting
sortable_metrics = ['Weighted Score', 'BLEU', 'METEOR', 'COMET']
sort_by = st.sidebar.selectbox("Sort by Metric:", sortable_metrics, index=0) # Default to Weighted Score
# Sort order
sort_order_asc = st.sidebar.radio("Sort Order:", ("Descending (Best First)", "Ascending (Worst First)"), index=0)
is_ascending = True if sort_order_asc == "Ascending (Worst First)" else False
# Provider filter
all_providers = sorted(data_df['Provider'].unique())
selected_providers = st.sidebar.multiselect(
"Filter by Provider:",
options=all_providers,
default=all_providers
)
if not selected_providers:
st.warning("Please select at least one provider to display results.")
filtered_df = pd.DataFrame(columns=data_df.columns) # Empty df
else:
filtered_df = data_df[data_df['Provider'].isin(selected_providers)]
# Apply sorting
if not filtered_df.empty:
sorted_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending).reset_index(drop=True)
# Add Rank column (1-based)
sorted_df.insert(0, 'Rank', range(1, 1 + len(sorted_df)))
else:
sorted_df = filtered_df # Still empty if no providers selected or no data after filter
# --- Display Top Performer ---
st.header("🥇 Top Performer")
if not sorted_df.empty:
top_model = sorted_df.iloc[0]
st.metric(
label=f"Best Model by {sort_by}",
value=top_model['Model Name'],
delta=f"{top_model[sort_by]:.4f} ({sort_by})",
delta_color="off" # No up/down arrow needed here
)
# Ensure all sortable_metrics exist in the top_model Series before trying to access them
cols = st.columns(len(sortable_metrics))
for i, metric in enumerate(sortable_metrics):
with cols[i]:
if metric in top_model:
st.metric(label=metric, value=f"{top_model[metric]:.4f}")
else:
st.metric(label=metric, value="N/A")
else:
st.info("No data to display for top performer based on current filters.")
# --- Display Leaderboard Table ---
st.header("📊 Full Leaderboard")
# Columns to display in the table
display_columns = ['Rank', 'Model Name', 'Provider'] + sortable_metrics
# Formatting for score columns (4 decimal places)
formatter = {col: "{:.4f}" for col in sortable_metrics}
if not sorted_df.empty:
# Ensure only existing columns are selected for display
existing_display_columns = [col for col in display_columns if col in sorted_df.columns]
st.dataframe(
sorted_df[existing_display_columns].style.format(formatter),
use_container_width=True,
hide_index=True,
)
else:
st.info("No models match the current filter criteria.")
# --- Show Raw Data (Optional) ---
if st.checkbox("Show Raw Data (Downloaded, Unsorted, Unfiltered)"):
st.subheader("Raw Data")
st.dataframe(data_df)
else:
st.warning("Data could not be loaded from Hugging Face. Please check the console for errors, your internet connection, and ensure the dataset/file path is correct.")
st.markdown("---")
st.markdown("Created with Streamlit, Pandas, and data from Hugging Face Datasets.") |