Spaces:

leggedrobotics
/

navitrace_leaderboard

Running

File size: 12,839 Bytes

from .score_calculation.score import score_predictions
from datasets import load_dataset
import multiprocessing
import numpy as np
import streamlit as st
import pandas as pd
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
from io import StringIO
import json

RESULTS_DIR = "results/"

# Page config
st.set_page_config(
    page_title="NaviTrace Leaderboard",
    layout="centered",
    initial_sidebar_state="collapsed"
)

# Custom CSS for Nerfies-style design
st.markdown("""
<style>
    /* Import Font Awesome */
    @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');

    .header-container {
        display: flex;
        flex-direction: column;
        align-items: center;
    }

    /* Headings */
    h1 {
        text-align: center;
        font-size: 4.5rem !important;
        font-weight: 500;
        margin-top: 1rem;
        margin-bottom: 1rem;
    }
    
    /* Links container */
    .links-container {
        display: flex;
        flex-wrap: wrap;
        row-gap: 1rem;
        justify-content: center;
        text-align: center;
        margin-bottom: 3rem;
        font-size: 1.1rem;
    }
    
    .links-container a {
        white-space: nowrap;
        margin: 0 1rem;
        text-decoration: none;
        color: #3b82f6;
        font-weight: 600;
        transition: color 0.3s;
    }
    
    .links-container a:hover {
        color: #1e3a8a;
    }
        
    /* Instructions styling */
    .instruction-item {
        display: flex;
        gap: 1.5rem;
        margin: 2rem 0;
        align-items: flex-start;
    }
    
    .instruction-number {
        flex-shrink: 0;
        width: 40px;
        height: 40px;
        border-radius: 50%;
        background: linear-gradient(135deg, #3b82f6 0%, #1e3a8a 100%);
        color: white;
        display: flex;
        align-items: center;
        justify-content: center;
        font-weight: 700;
        font-size: 1.2rem;
    }
    
    .instruction-content {
        flex-grow: 1;
        padding-top: 0.3rem;
    }

    /* Media Query for mobile devices */
    @media (max-width: 600px) {
        h1 {
            font-size: 3.5rem !important; /* Adjust font size for small screens */
        }
    }
</style>
""", unsafe_allow_html=True)

def load_data():
    """Load all result files as one data frame"""

    try:
        # Load all results files
        all_dfs = []
        for file_path in Path(RESULTS_DIR).glob('*.tsv'):
            df = pd.read_csv(file_path, sep='\t')
            model_name = file_path.stem
            df["model"] = model_name
            all_dfs.append(df)
        
        # Concatenate all DataFrames into one
        if all_dfs:
            final_df = pd.concat(all_dfs, ignore_index=True)
        
        return final_df
    except Exception as e:
        st.error(f"Error loading data: {str(e)}")
        return None

def calculate_score(results_df):
    """Calculate score using private test split ground truth."""

    try:
        # Access to private dataset with test labels
        login(token=os.environ.get("HF_TOKEN"))
        dataset = load_dataset(os.environ.get("HF_DATASET_ID"), split="test")

        # Calculate score
        return score_predictions(results_df, dataset)
    except Exception as e:
        st.error(f"Error calculating score: {str(e)}")
        return None

def validate_tsv_format(uploaded_file):
    """Validate that the uploaded TSV has the correct format"""
    
    try:
        df = pd.read_csv(uploaded_file, sep='\t')
        # Check for required columns, data types, etc.
        required_cols = ["sample_id", "embodiment", "category", "prediction"]
        if not all(col in df.columns for col in required_cols):
            return False, f"Missing required columns. Expected: {required_cols}"
        return True, df
    except Exception as e:
        return False, f"Error reading file: {str(e)}"

def create_bar_chart(df, view_type):
    """Create interactive bar chart based on view type"""
    
    if view_type == "Total Score":
        
        # Format df
        df_fig = df.copy()
        df_fig["Model"] = df_fig["model"].str.replace('_', ' ')
        df_fig = df_fig[df_fig["score"] != np.inf]

        # Calculate mean score per model
        df_fig = df_fig.groupby("Model")[["score"]].mean().reset_index()
        
        # Sort the results from best to worst
        df_fig = df_fig.sort_values(by="score", ascending=True)
        
        # Create the Plotly figure using Plotly Express, now plotting only the 'frechet' score.
        fig = px.bar(
            df_fig,
            x="Model",
            y="score",
            color="score",
            color_continuous_scale=px.colors.diverging.Fall,
            template="plotly_white",
            orientation="v",
        )
        
        fig.update_layout(
            xaxis_title_text="Model",
            yaxis_title_text="Score (Lower is better)",
            title_text="",
            xaxis_tickangle=-45,
            bargap=0.2, # Increase gap for slimmer bars
            height=500, # Set the height of the plot
        )
        
        # Remove the color legend from the chart.
        fig.update_coloraxes(showscale=False)
        
        # Add annotations to show the exact score on each bar.
        fig.update_traces(
            texttemplate="%{y:.2f}",
            textposition="outside"
        )


        # TODO
        # fig = go.Figure(data=[
        #     go.Bar(
        #         x=df['Model'],
        #         y=df['Total Score'],
        #         orientation='v',
        #         marker_color=px.colors.sequential.Blues,
        #         text=df['Total Score'].round(1),
        #         textposition='outside',
        #     )
        # ])
        # fig.update_layout(
        #     title="Model Performance - Total Score",
        #     xaxis_title="Model",
        #     yaxis_title="Score",
        #     yaxis_range=[0, 100],
        #     height=500,
        # )
    
    elif view_type == "Per Embodiment":
        # embodiment_cols = [col for col in df.columns if col.startswith('Embodiment-')]
        fig = go.Figure()
        # for col in embodiment_cols:
        #     fig.add_trace(go.Bar(
        #         name=col.replace('Embodiment-', ''),
        #         x=df['Model'],
        #         y=df[col],
        #         orientation='v',
        #         marker_color=px.colors.qualitative.Plotly,
        #         text=df[col].round(1),
        #         textposition='outside',
        #     ))
        # fig.update_layout(
        #     title="Model Performance - Per Embodiment",
        #     xaxis_title="Model",
        #     yaxis_title="Score",
        #     yaxis_range=[0, 100],
        #     barmode='group',
        #     height=500,
        # )
    
    else:  # Per Category
        # category_cols = [col for col in df.columns if col.startswith('Category-')]
        fig = go.Figure()
        # for col in category_cols:
        #     fig.add_trace(go.Bar(
        #         name=col.replace('Category-', ''),
        #         x=df['Model'],
        #         y=df[col],
        #         orientation='v',
        #         marker_color=px.colors.qualitative.Plotly,
        #         text=df[col].round(1),
        #         textposition='outside',
        #     ))
        # fig.update_layout(
        #     title="Model Performance - Per Category",
        #     xaxis_title="Model",
        #     yaxis_title="Score",
        #     yaxis_range=[0, 100],
        #     barmode='group',
        #     height=500,
        # )
    
    # Common styling
    fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        font=dict(size=12),
        showlegend=(view_type != "Total Score"),
        margin=dict(t=80, b=60, l=60, r=60),
    )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=True, gridcolor='lightgray', gridwidth=0.5)
    
    return fig

# Header
st.markdown("""
<div class="header-container">
    <h1>NaviTrace Leaderboard</h1>
    <div class="links-container">
        <a href="https://leggedrobotics.github.io/navitrace_webpage/">
            🏠 Project
        </a>
        <a href="#TODO">
            📄 Paper
        </a>
        <a href="#TODO">
            💻 Code
        </a>
        <a href="https://huggingface.co/datasets/leggedrobotics/navitrace">
            💾 Dataset
        </a>
    </div>
</div>
""", unsafe_allow_html=True)

# Load data
df = load_data()

# Add user's model if it exists in session state
if 'user_results' in st.session_state:
    user_results = pd.DataFrame([st.session_state.user_results])
    df = pd.concat([user_results, df], ignore_index=True)

# View selector
view_type = st.selectbox(
    "Select View",
    ["Total Score", "Per Embodiment", "Per Category"],
)

# Display chart
fig = create_bar_chart(df, view_type)
st.plotly_chart(fig, use_container_width=True, config={
    'displayModeBar': True,
    'displaylogo': False,
    'toImageButtonOptions': {
        'format': 'png',
        'filename': 'navitrace_leaderboard',
        'height': 600,
        'width': 1200,
        'scale': 2
    }
})
st.caption("🔹 Note: Lower scores indicate better performance.")

# Detailed table
with st.expander("View Detailed Scores"):
    pass
    #TODO st.dataframe(df.style.background_gradient(cmap='Blues_r', subset=df.columns[1:]), width="stretch")

with st.expander("How to Test Your Model", expanded=True):
    # Step 1
    st.markdown("""
    <div class="instruction-item">
        <div class="instruction-number">1</div>
        <div class="instruction-content">
            <div><b>Run Evaluation</b></div>
            <div>
                Download and run our evaluation notebook adjusted to your model. The notebook will generate a TSV file with your model's predictions on the test set.
            </div>
        </div>
    </div>
    """, unsafe_allow_html=True)
    
    st.link_button("📓 Open Evaluation Notebook", "https://colab.research.google.com/your-notebook-link", width="stretch")
    
    # Step 2
    st.markdown("""
    <div class="instruction-item">
        <div class="instruction-number">2</div>
        <div class="instruction-content">
            <div><b>Upload Results</b></div>
            <div>
                Upload the TSV file generated by the evaluation notebook.
            </div>
        </div>
    </div>
    """, unsafe_allow_html=True)
    
    uploaded_file = st.file_uploader("Upload your TSV file with results", type=['tsv', 'txt'], label_visibility="collapsed")
    
    # Step 3
    st.markdown("""
    <div class="instruction-item">
        <div class="instruction-number">3</div>
        <div class="instruction-content">
            <div><b>Calculate Score</b></div>
            <div>
                Click the button below to evaluate your predictions. Scores are calculated using hidden test set ground-truths.
            </div>
        </div>
    </div>
    """, unsafe_allow_html=True)
    
    if uploaded_file is not None:
        if st.button("🧮 Calculate Score", width="stretch"):
            with st.spinner("Validating and calculating scores..."):
                # Validate format
                is_valid, result = validate_tsv_format(uploaded_file)
                if is_valid:
                    # Calculate score using hidden ground-truth
                    scores = calculate_score(result)
                    if scores is not None:
                        st.success(f"✅ Score calculated successfully: **{scores['Total Score']:.1f}**")
                        
                        # Store in session state
                        st.session_state.user_results = {
                            'Model': 'Your Model',
                            **scores
                        }
                        st.info("👆 Scroll up to see your model on the leaderboard!")
                        st.rerun()
                else:
                    st.error(f"❌ Invalid file format: {result}")
    else:
        st.info("👆 Upload a TSV file to calculate your score")
    
    # Step 4
    st.markdown("""
    <div class="instruction-item">
        <div class="instruction-number">4</div>
        <div class="instruction-content">
            <div><b>Submit to Official Leaderboard</b></div>
            <div>
                Happy with your score? Submit your model to appear on the official leaderboard.
                Fill out the form below with your model details and results.
            </div>
        </div>
    </div>
    """, unsafe_allow_html=True)
    
    st.link_button("🗳️ Submit Model", "https://docs.google.com/forms/d/e/1FAIpQLSfcAQ6JW7eey-8OFSAz2ea_StCezxJK1dt6mjW_wR-9jCHnXg/viewform?usp=dialog", width="stretch")