import streamlit as st
import pandas as pd
from datasets import load_dataset # Import the Hugging Face datasets library

# Page configuration
st.set_page_config(layout="wide", page_title="TranslateBench EN-ES Leaderboard")

# Caching the data loading function
@st.cache_data # Use st.cache_data for dataframes and serializable objects
def load_data_from_hf():
    """Loads and preprocesses the benchmark data from Hugging Face."""
    try:
        st.info("Fetching data from Hugging Face (Thermostatic/TranslateBench-EN-ES)... This may take a moment.")
        # Load the specific CSV file from the dataset
        # The 'data_files' argument points to the specific file within the dataset repository.
        # 'load_dataset' returns a DatasetDict. For a single CSV, it's typically under the 'train' key.
        dataset_dict = load_dataset("Thermostatic/TranslateBench-EN-ES", data_files="model_benchmark_summary.csv")
        
        # Access the dataset (it will be the 'train' split by default for a single file)
        if 'train' in dataset_dict:
            dataset = dataset_dict['train']
        else:
            # Fallback in case the default split name isn't 'train'
            # This gets the first (and likely only) key in the DatasetDict
            first_split_name = list(dataset_dict.keys())[0]
            dataset = dataset_dict[first_split_name]
            st.warning(f"Using split '{first_split_name}' as 'train' split was not found.")

        df = dataset.to_pandas()
        st.success("Data loaded successfully from Hugging Face!")

        # --- Preprocessing (same as your original code) ---
        # Extract provider from Model Name
        df['Provider'] = df['Model Name'].apply(lambda x: x.split('_')[0].capitalize())
        # Ensure score columns are numeric
        score_cols = ['Weighted Score', 'BLEU', 'METEOR', 'COMET']
        for col in score_cols:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        return df
    except Exception as e:
        st.error(f"An error occurred while loading or processing data from Hugging Face: {e}")
        return None

# --- Main Application ---
st.title("🏆 TranslateBench EN-ES Leaderboard")
st.markdown("""
This leaderboard shows the performance of various models on the English-to-Spanish translation task.
Data is sourced directly from the [Thermostatic/TranslateBench-EN-ES](https://huggingface.co/datasets/Thermostatic/TranslateBench-EN-ES) dataset on Hugging Face.
You can sort the table by different metrics and filter by model provider.
""")

# Load data
data_df = load_data_from_hf()

if data_df is not None:
    # --- Sidebar for Controls ---
    st.sidebar.header("⚙️ Leaderboard Controls")

    # Metric selection for sorting
    sortable_metrics = ['Weighted Score', 'BLEU', 'METEOR', 'COMET']
    sort_by = st.sidebar.selectbox("Sort by Metric:", sortable_metrics, index=0) # Default to Weighted Score

    # Sort order
    sort_order_asc = st.sidebar.radio("Sort Order:", ("Descending (Best First)", "Ascending (Worst First)"), index=0)
    is_ascending = True if sort_order_asc == "Ascending (Worst First)" else False

    # Provider filter
    all_providers = sorted(data_df['Provider'].unique())
    selected_providers = st.sidebar.multiselect(
        "Filter by Provider:",
        options=all_providers,
        default=all_providers
    )

    if not selected_providers:
        st.warning("Please select at least one provider to display results.")
        filtered_df = pd.DataFrame(columns=data_df.columns) # Empty df
    else:
        filtered_df = data_df[data_df['Provider'].isin(selected_providers)]

    # Apply sorting
    if not filtered_df.empty:
        sorted_df = filtered_df.sort_values(by=sort_by, ascending=is_ascending).reset_index(drop=True)
        # Add Rank column (1-based)
        sorted_df.insert(0, 'Rank', range(1, 1 + len(sorted_df)))
    else:
        sorted_df = filtered_df # Still empty if no providers selected or no data after filter


    # --- Display Top Performer ---
    st.header("🥇 Top Performer")
    if not sorted_df.empty:
        top_model = sorted_df.iloc[0]
        st.metric(
            label=f"Best Model by {sort_by}",
            value=top_model['Model Name'],
            delta=f"{top_model[sort_by]:.4f} ({sort_by})",
            delta_color="off" # No up/down arrow needed here
        )
        # Ensure all sortable_metrics exist in the top_model Series before trying to access them
        cols = st.columns(len(sortable_metrics))
        for i, metric in enumerate(sortable_metrics):
            with cols[i]:
                if metric in top_model:
                    st.metric(label=metric, value=f"{top_model[metric]:.4f}")
                else:
                    st.metric(label=metric, value="N/A")
    else:
        st.info("No data to display for top performer based on current filters.")


    # --- Display Leaderboard Table ---
    st.header("📊 Full Leaderboard")

    # Columns to display in the table
    display_columns = ['Rank', 'Model Name', 'Provider'] + sortable_metrics
    
    # Formatting for score columns (4 decimal places)
    formatter = {col: "{:.4f}" for col in sortable_metrics}

    if not sorted_df.empty:
        # Ensure only existing columns are selected for display
        existing_display_columns = [col for col in display_columns if col in sorted_df.columns]
        st.dataframe(
            sorted_df[existing_display_columns].style.format(formatter),
            use_container_width=True,
            hide_index=True,
        )
    else:
        st.info("No models match the current filter criteria.")

    # --- Show Raw Data (Optional) ---
    if st.checkbox("Show Raw Data (Downloaded, Unsorted, Unfiltered)"):
        st.subheader("Raw Data")
        st.dataframe(data_df)

else:
    st.warning("Data could not be loaded from Hugging Face. Please check the console for errors, your internet connection, and ensure the dataset/file path is correct.")

st.markdown("---")
st.markdown("Created with Streamlit, Pandas, and data from Hugging Face Datasets.")