import streamlit as st import pandas as pd from datasets import load_dataset # Import the Hugging Face datasets library # Page configuration st.set_page_config(layout="wide", page_title="TranslateBench EN-ES Leaderboard") # Caching the data loading function @st.cache_data # Use st.cache_data for dataframes and serializable objects def load_data_from_hf(): """Loads and preprocesses the benchmark data from Hugging Face.""" try: st.info("Fetching data from Hugging Face (Thermostatic/TranslateBench-EN-ES)... This may take a moment.") # Load the specific CSV file from the dataset # The 'data_files' argument points to the specific file within the dataset repository. # 'load_dataset' returns a DatasetDict. For a single CSV, it's typically under the 'train' key. dataset_dict = load_dataset("Thermostatic/TranslateBench-EN-ES", data_files="model_benchmark_summary.csv") # Access the dataset (it will be the 'train' split by default for a single file) if 'train' in dataset_dict: dataset = dataset_dict['train'] else: # Fallback in case the default split name isn't 'train' # This gets the first (and likely only) key in the DatasetDict first_split_name = list(dataset_dict.keys())[0] dataset = dataset_dict[first_split_name] st.warning(f"Using split '{first_split_name}' as 'train' split was not found.") df = dataset.to_pandas() st.success("Data loaded successfully from Hugging Face!") # --- Preprocessing (same as your original code) --- # Extract provider from Model Name df['Provider'] = df['Model Name'].apply(lambda x: x.split('_')[0].capitalize()) # Ensure score columns are numeric score_cols = ['Weighted Score', 'BLEU', 'METEOR', 'COMET'] for col in score_cols: df[col] = pd.to_numeric(df[col], errors='coerce') return df except Exception as e: st.error(f"An error occurred while loading or processing data from Hugging Face: {e}") return None # --- Main Application --- st.title("🏆 TranslateBench EN-ES Leaderboard") st.markdown(""" This leaderboard shows the performance of various models on the English-to-Spanish translation task. Data is sourced directly from the [Thermostatic/TranslateBench-EN-ES](https://huggingface.co/datasets/Thermostatic/TranslateBench-EN-ES) dataset on Hugging Face. You can sort the table by different metrics and filter by model provider. """) # Load data data_df = load_data_from_hf() if data_df is not None: # --- Sidebar for Controls --- st.sidebar.header("⚙️ Leaderboard Controls") # Metric selection for sorting sortable_metrics = ['Weighted Score', 'BLEU', 'METEOR', 'COMET'] sort_by = st.sidebar.selectbox("Sort by Metric:", sortable_metrics, index=0) # Default to Weighted Score # Sort order sort_order_asc = st.sidebar.radio("Sort Order:", ("Descending (Best First)", "Ascending (Worst First)"), index=0) is_ascending = True if sort_order_asc == "Ascending (Worst First)" else False # Provider filter all_providers = sorted(data_df['Provider'].unique()) selected_providers = st.sidebar.multiselect( "Filter by Provider:", options=all_providers, default=all_providers ) if not selected_providers: st.warning("Please select at least one provider to display results.") filtered_df = pd.DataFrame(columns=data_df.columns) # Empty df else: filtered_df = data_df[data_df['Provider'].isin(selected_providers)] # Apply sorting if not filtered_df.empty: sorted_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending).reset_index(drop=True) # Add Rank column (1-based) sorted_df.insert(0, 'Rank', range(1, 1 + len(sorted_df))) else: sorted_df = filtered_df # Still empty if no providers selected or no data after filter # --- Display Top Performer --- st.header("🥇 Top Performer") if not sorted_df.empty: top_model = sorted_df.iloc[0] st.metric( label=f"Best Model by {sort_by}", value=top_model['Model Name'], delta=f"{top_model[sort_by]:.4f} ({sort_by})", delta_color="off" # No up/down arrow needed here ) # Ensure all sortable_metrics exist in the top_model Series before trying to access them cols = st.columns(len(sortable_metrics)) for i, metric in enumerate(sortable_metrics): with cols[i]: if metric in top_model: st.metric(label=metric, value=f"{top_model[metric]:.4f}") else: st.metric(label=metric, value="N/A") else: st.info("No data to display for top performer based on current filters.") # --- Display Leaderboard Table --- st.header("📊 Full Leaderboard") # Columns to display in the table display_columns = ['Rank', 'Model Name', 'Provider'] + sortable_metrics # Formatting for score columns (4 decimal places) formatter = {col: "{:.4f}" for col in sortable_metrics} if not sorted_df.empty: # Ensure only existing columns are selected for display existing_display_columns = [col for col in display_columns if col in sorted_df.columns] st.dataframe( sorted_df[existing_display_columns].style.format(formatter), use_container_width=True, hide_index=True, ) else: st.info("No models match the current filter criteria.") # --- Show Raw Data (Optional) --- if st.checkbox("Show Raw Data (Downloaded, Unsorted, Unfiltered)"): st.subheader("Raw Data") st.dataframe(data_df) else: st.warning("Data could not be loaded from Hugging Face. Please check the console for errors, your internet connection, and ensure the dataset/file path is correct.") st.markdown("---") st.markdown("Created with Streamlit, Pandas, and data from Hugging Face Datasets.")