|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import plotly.graph_objects as go |
|
|
import plotly.express as px |
|
|
from io import StringIO |
|
|
import json |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="NaviTrace Leaderboard", |
|
|
layout="centered", |
|
|
initial_sidebar_state="collapsed" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
/* Import Font Awesome */ |
|
|
@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css'); |
|
|
|
|
|
/* Headings */ |
|
|
h1 { |
|
|
text-align: center; |
|
|
font-size: 4.5rem !important; |
|
|
font-weight: 500; |
|
|
margin-top: 1rem; |
|
|
margin-bottom: 2rem; |
|
|
} |
|
|
|
|
|
/* Links container */ |
|
|
.links-container { |
|
|
text-align: center; |
|
|
margin-bottom: 3rem; |
|
|
font-size: 1.1rem; |
|
|
} |
|
|
|
|
|
.links-container a { |
|
|
margin: 0 1rem; |
|
|
text-decoration: none; |
|
|
color: #667eea; |
|
|
font-weight: 600; |
|
|
transition: color 0.3s; |
|
|
} |
|
|
|
|
|
.links-container a:hover { |
|
|
color: #764ba2; |
|
|
} |
|
|
|
|
|
/* Instructions styling */ |
|
|
.instruction-item { |
|
|
display: flex; |
|
|
gap: 1.5rem; |
|
|
margin: 2rem 0; |
|
|
align-items: flex-start; |
|
|
} |
|
|
|
|
|
.instruction-number { |
|
|
flex-shrink: 0; |
|
|
width: 40px; |
|
|
height: 40px; |
|
|
border-radius: 50%; |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
color: white; |
|
|
display: flex; |
|
|
align-items: center; |
|
|
justify-content: center; |
|
|
font-weight: 700; |
|
|
font-size: 1.2rem; |
|
|
} |
|
|
|
|
|
.instruction-content { |
|
|
flex-grow: 1; |
|
|
padding-top: 0.3rem; |
|
|
} |
|
|
|
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
def load_data(): |
|
|
return pd.DataFrame({ |
|
|
'Model': ['GPT-4', 'Claude-3.5-Sonnet', 'Gemini-Pro', 'Llama-3-70B', 'Mistral-Large'], |
|
|
'Total Score': [87.5, 85.2, 82.1, 78.3, 75.6], |
|
|
'Embodiment-A': [90.2, 87.5, 84.3, 80.1, 77.8], |
|
|
'Embodiment-B': [85.8, 84.1, 81.2, 77.9, 74.5], |
|
|
'Embodiment-C': [86.5, 84.0, 80.8, 76.9, 74.5], |
|
|
'Category-Spatial': [88.9, 86.7, 83.5, 79.8, 76.9], |
|
|
'Category-Temporal': [86.3, 84.2, 81.0, 77.5, 75.1], |
|
|
'Category-Object': [87.3, 84.7, 81.8, 77.6, 74.8], |
|
|
}) |
|
|
|
|
|
def calculate_score(results_df): |
|
|
""" |
|
|
Calculate score using private test split ground truth. |
|
|
This function should: |
|
|
1. Load the private test split ground truth (not exposed to users) |
|
|
2. Compare uploaded predictions with ground truth |
|
|
3. Calculate metrics per embodiment and category |
|
|
4. Return detailed scores |
|
|
|
|
|
Args: |
|
|
results_df: DataFrame with columns ['sample_id', 'prediction', ...] |
|
|
|
|
|
Returns: |
|
|
dict: Scores breakdown or None if error |
|
|
""" |
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scores = { |
|
|
'Total Score': 85.0, |
|
|
'Embodiment-A': 87.0, |
|
|
'Embodiment-B': 84.0, |
|
|
'Embodiment-C': 84.0, |
|
|
'Category-Spatial': 86.0, |
|
|
'Category-Temporal': 85.0, |
|
|
'Category-Object': 84.0, |
|
|
} |
|
|
return scores |
|
|
except Exception as e: |
|
|
st.error(f"Error calculating score: {str(e)}") |
|
|
return None |
|
|
|
|
|
def validate_tsv_format(uploaded_file): |
|
|
"""Validate that the uploaded TSV has the correct format""" |
|
|
try: |
|
|
df = pd.read_csv(uploaded_file, sep='\t') |
|
|
|
|
|
|
|
|
required_cols = ['sample_id', 'prediction'] |
|
|
if not all(col in df.columns for col in required_cols): |
|
|
return False, f"Missing required columns. Expected: {required_cols}" |
|
|
return True, df |
|
|
except Exception as e: |
|
|
return False, f"Error reading file: {str(e)}" |
|
|
|
|
|
def create_bar_chart(df, view_type): |
|
|
"""Create interactive bar chart based on view type""" |
|
|
if view_type == "Total Score": |
|
|
fig = go.Figure(data=[ |
|
|
go.Bar( |
|
|
x=df['Model'], |
|
|
y=df['Total Score'], |
|
|
marker_color=px.colors.sequential.Purples_r, |
|
|
text=df['Total Score'].round(1), |
|
|
textposition='outside', |
|
|
) |
|
|
]) |
|
|
fig.update_layout( |
|
|
title="Model Performance - Total Score", |
|
|
xaxis_title="Model", |
|
|
yaxis_title="Score", |
|
|
yaxis_range=[0, 100], |
|
|
height=500, |
|
|
) |
|
|
|
|
|
elif view_type == "Per Embodiment": |
|
|
embodiment_cols = [col for col in df.columns if col.startswith('Embodiment-')] |
|
|
fig = go.Figure() |
|
|
for col in embodiment_cols: |
|
|
fig.add_trace(go.Bar( |
|
|
name=col.replace('Embodiment-', ''), |
|
|
x=df['Model'], |
|
|
y=df[col], |
|
|
text=df[col].round(1), |
|
|
textposition='outside', |
|
|
)) |
|
|
fig.update_layout( |
|
|
title="Model Performance - Per Embodiment", |
|
|
xaxis_title="Model", |
|
|
yaxis_title="Score", |
|
|
yaxis_range=[0, 100], |
|
|
barmode='group', |
|
|
height=500, |
|
|
) |
|
|
|
|
|
else: |
|
|
category_cols = [col for col in df.columns if col.startswith('Category-')] |
|
|
fig = go.Figure() |
|
|
for col in category_cols: |
|
|
fig.add_trace(go.Bar( |
|
|
name=col.replace('Category-', ''), |
|
|
x=df['Model'], |
|
|
y=df[col], |
|
|
text=df[col].round(1), |
|
|
textposition='outside', |
|
|
)) |
|
|
fig.update_layout( |
|
|
title="Model Performance - Per Category", |
|
|
xaxis_title="Model", |
|
|
yaxis_title="Score", |
|
|
yaxis_range=[0, 100], |
|
|
barmode='group', |
|
|
height=500, |
|
|
) |
|
|
|
|
|
|
|
|
fig.update_layout( |
|
|
plot_bgcolor='rgba(0,0,0,0)', |
|
|
paper_bgcolor='rgba(0,0,0,0)', |
|
|
font=dict(size=12), |
|
|
showlegend=(view_type != "Total Score"), |
|
|
margin=dict(t=80, b=60, l=60, r=60), |
|
|
) |
|
|
fig.update_xaxes(showgrid=False) |
|
|
fig.update_yaxes(showgrid=True, gridcolor='lightgray', gridwidth=0.5) |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("NaviTrace Leaderboard") |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<div class="links-container"> |
|
|
<a href="https://leggedrobotics.github.io/navitrace_webpage/" target="_blank"> |
|
|
<i class="fas fa-house"></i> Project |
|
|
</a> |
|
|
<a href="https://your-paper-website.com" target="_blank"> |
|
|
<i class="fas fa-file-pdf"></i> Paper |
|
|
</a> |
|
|
<a href="https://github.com/your-username/navitrace" target="_blank"> |
|
|
<i class="fab fa-github"></i> Code |
|
|
</a> |
|
|
<a href="https://huggingface.co/datasets/your-username/navitrace" target="_blank"> |
|
|
<i class="fas fa-database"></i> Dataset |
|
|
</a> |
|
|
<a href="https://your-demo-link.com" target="_blank"> |
|
|
<i class="far fa-images"></i> Demo |
|
|
</a> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
df = load_data() |
|
|
|
|
|
|
|
|
if 'user_results' in st.session_state: |
|
|
user_row = pd.DataFrame([st.session_state.user_results]) |
|
|
df = pd.concat([user_row, df], ignore_index=True) |
|
|
|
|
|
|
|
|
view_type = st.selectbox( |
|
|
"Select View", |
|
|
["Total Score", "Per Embodiment", "Per Category"], |
|
|
) |
|
|
|
|
|
|
|
|
fig = create_bar_chart(df, view_type) |
|
|
st.plotly_chart(fig, use_container_width=True, config={ |
|
|
'displayModeBar': True, |
|
|
'displaylogo': False, |
|
|
'toImageButtonOptions': { |
|
|
'format': 'png', |
|
|
'filename': 'navitrace_leaderboard', |
|
|
'height': 600, |
|
|
'width': 1200, |
|
|
'scale': 2 |
|
|
} |
|
|
}) |
|
|
|
|
|
|
|
|
with st.expander("View Detailed Scores"): |
|
|
st.dataframe(df.style.background_gradient(cmap='Purples', subset=df.columns[1:]), use_container_width=True) |
|
|
|
|
|
with st.expander("How to Test Your Model", expanded=True): |
|
|
|
|
|
st.markdown(""" |
|
|
<div class="instruction-item"> |
|
|
<div class="instruction-number">1</div> |
|
|
<div class="instruction-content"> |
|
|
<div><b>Run Evaluation</b></div> |
|
|
<div> |
|
|
Download and run our evaluation notebook adjusted to your model. The notebook will generate a TSV file with your model's predictions on the test set. |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.link_button("📓 Open Evaluation Notebook", "https://colab.research.google.com/your-notebook-link", use_container_width=True) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<div class="instruction-item"> |
|
|
<div class="instruction-number">2</div> |
|
|
<div class="instruction-content"> |
|
|
<div><b>Upload Results</b></div> |
|
|
<div> |
|
|
Upload the TSV file generated by the evaluation notebook. |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload your TSV file with results", type=['tsv', 'txt'], label_visibility="collapsed") |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<div class="instruction-item"> |
|
|
<div class="instruction-number">3</div> |
|
|
<div class="instruction-content"> |
|
|
<div><b>Calculate Score</b></div> |
|
|
<div> |
|
|
Click the button below to evaluate your predictions. Scores are calculated using hidden test set ground-truths. |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
if st.button("🧮 Calculate Score", use_container_width=True): |
|
|
with st.spinner("Validating and calculating scores..."): |
|
|
|
|
|
is_valid, result = validate_tsv_format(uploaded_file) |
|
|
if is_valid: |
|
|
|
|
|
scores = calculate_score(result) |
|
|
if scores is not None: |
|
|
st.success(f"✅ Score calculated successfully: **{scores['Total Score']:.1f}**") |
|
|
|
|
|
|
|
|
st.session_state.user_results = { |
|
|
'Model': 'Your Model', |
|
|
**scores |
|
|
} |
|
|
st.info("👆 Scroll up to see your model on the leaderboard!") |
|
|
st.rerun() |
|
|
else: |
|
|
st.error(f"❌ Invalid file format: {result}") |
|
|
else: |
|
|
st.info("👆 Upload a TSV file to calculate your score") |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<div class="instruction-item"> |
|
|
<div class="instruction-number">4</div> |
|
|
<div class="instruction-content"> |
|
|
<div><b>Submit to Official Leaderboard</b></div> |
|
|
<div> |
|
|
Happy with your score? Submit your model to appear on the official leaderboard. |
|
|
Fill out the form below with your model details and results. |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.link_button("🗳️ Submit Model", "https://forms.gle/your-google-form-link", use_container_width=True) |
|
|
|