Spaces:

leggedrobotics
/

navitrace_leaderboard

Running

App Files Files Community

navitrace_leaderboard / src /streamlit_app.py

TimWindecker

Update src/streamlit_app.py

144f1bb verified 2 months ago

raw

history blame

12.1 kB

	import streamlit as st
	import pandas as pd
	import plotly.graph_objects as go
	import plotly.express as px
	from io import StringIO
	import json

	# Page config
	st.set_page_config(
	page_title="NaviTrace Leaderboard",
	layout="centered",
	initial_sidebar_state="collapsed"
	)

	# Custom CSS for Nerfies-style design
	st.markdown("""
	<style>
	/* Import Font Awesome */
	@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');

	/* Headings */
	h1 {
	text-align: center;
	font-size: 4.5rem !important;
	font-weight: 500;
	margin-top: 1rem;
	margin-bottom: 2rem;
	}

	/* Links container */
	.links-container {
	text-align: center;
	margin-bottom: 3rem;
	font-size: 1.1rem;
	}

	.links-container a {
	margin: 0 1rem;
	text-decoration: none;
	color: #667eea;
	font-weight: 600;
	transition: color 0.3s;
	}

	.links-container a:hover {
	color: #764ba2;
	}

	/* Instructions styling */
	.instruction-item {
	display: flex;
	gap: 1.5rem;
	margin: 2rem 0;
	align-items: flex-start;
	}

	.instruction-number {
	flex-shrink: 0;
	width: 40px;
	height: 40px;
	border-radius: 50%;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	display: flex;
	align-items: center;
	justify-content: center;
	font-weight: 700;
	font-size: 1.2rem;
	}

	.instruction-content {
	flex-grow: 1;
	padding-top: 0.3rem;
	}

	</style>
	""", unsafe_allow_html=True)

	# Sample data - Replace with your actual data
	def load_data():
	return pd.DataFrame({
	'Model': ['GPT-4', 'Claude-3.5-Sonnet', 'Gemini-Pro', 'Llama-3-70B', 'Mistral-Large'],
	'Total Score': [87.5, 85.2, 82.1, 78.3, 75.6],
	'Embodiment-A': [90.2, 87.5, 84.3, 80.1, 77.8],
	'Embodiment-B': [85.8, 84.1, 81.2, 77.9, 74.5],
	'Embodiment-C': [86.5, 84.0, 80.8, 76.9, 74.5],
	'Category-Spatial': [88.9, 86.7, 83.5, 79.8, 76.9],
	'Category-Temporal': [86.3, 84.2, 81.0, 77.5, 75.1],
	'Category-Object': [87.3, 84.7, 81.8, 77.6, 74.8],
	})

	def calculate_score(results_df):
	"""
	Calculate score using private test split ground truth.
	This function should:
	1. Load the private test split ground truth (not exposed to users)
	2. Compare uploaded predictions with ground truth
	3. Calculate metrics per embodiment and category
	4. Return detailed scores

	Args:
	results_df: DataFrame with columns ['sample_id', 'prediction', ...]

	Returns:
	dict: Scores breakdown or None if error
	"""
	try:
	# TODO: Implement your scoring logic here
	# Example structure:
	# ground_truth = load_private_test_split() # From secure location
	# scores = evaluate_predictions(results_df, ground_truth)

	# Placeholder - replace with actual calculation
	scores = {
	'Total Score': 85.0,
	'Embodiment-A': 87.0,
	'Embodiment-B': 84.0,
	'Embodiment-C': 84.0,
	'Category-Spatial': 86.0,
	'Category-Temporal': 85.0,
	'Category-Object': 84.0,
	}
	return scores
	except Exception as e:
	st.error(f"Error calculating score: {str(e)}")
	return None

	def validate_tsv_format(uploaded_file):
	"""Validate that the uploaded TSV has the correct format"""
	try:
	df = pd.read_csv(uploaded_file, sep='\t')
	# TODO: Add your specific validation logic
	# Check for required columns, data types, etc.
	required_cols = ['sample_id', 'prediction'] # Adjust as needed
	if not all(col in df.columns for col in required_cols):
	return False, f"Missing required columns. Expected: {required_cols}"
	return True, df
	except Exception as e:
	return False, f"Error reading file: {str(e)}"

	def create_bar_chart(df, view_type):
	"""Create interactive bar chart based on view type"""
	if view_type == "Total Score":
	fig = go.Figure(data=[
	go.Bar(
	x=df['Model'],
	y=df['Total Score'],
	marker_color=px.colors.sequential.Purples_r,
	text=df['Total Score'].round(1),
	textposition='outside',
	)
	])
	fig.update_layout(
	title="Model Performance - Total Score",
	xaxis_title="Model",
	yaxis_title="Score",
	yaxis_range=[0, 100],
	height=500,
	)

	elif view_type == "Per Embodiment":
	embodiment_cols = [col for col in df.columns if col.startswith('Embodiment-')]
	fig = go.Figure()
	for col in embodiment_cols:
	fig.add_trace(go.Bar(
	name=col.replace('Embodiment-', ''),
	x=df['Model'],
	y=df[col],
	text=df[col].round(1),
	textposition='outside',
	))
	fig.update_layout(
	title="Model Performance - Per Embodiment",
	xaxis_title="Model",
	yaxis_title="Score",
	yaxis_range=[0, 100],
	barmode='group',
	height=500,
	)

	else: # Per Category
	category_cols = [col for col in df.columns if col.startswith('Category-')]
	fig = go.Figure()
	for col in category_cols:
	fig.add_trace(go.Bar(
	name=col.replace('Category-', ''),
	x=df['Model'],
	y=df[col],
	text=df[col].round(1),
	textposition='outside',
	))
	fig.update_layout(
	title="Model Performance - Per Category",
	xaxis_title="Model",
	yaxis_title="Score",
	yaxis_range=[0, 100],
	barmode='group',
	height=500,
	)

	# Common styling
	fig.update_layout(
	plot_bgcolor='rgba(0,0,0,0)',
	paper_bgcolor='rgba(0,0,0,0)',
	font=dict(size=12),
	showlegend=(view_type != "Total Score"),
	margin=dict(t=80, b=60, l=60, r=60),
	)
	fig.update_xaxes(showgrid=False)
	fig.update_yaxes(showgrid=True, gridcolor='lightgray', gridwidth=0.5)

	return fig

	# TODO remove # Serve only the chart as JSON if parameter "only_chart" is set
	# # E.g. https://huggingface.co/spaces/leggedrobotics/navitrace_leaderboard/?only_chart=total_score
	# params = st.query_params
	# if "only_chart" in params and params["only_chart"] in ["total_score", "per_embodiment", "per_category"]:
	# if params["only_chart"] == "total_score":
	# view_type = "Total Score"
	# elif params["only_chart"] == "per_embodiment":
	# view_type = "Per Embodiment"
	# elif params["only_chart"] == "per_category":
	# view_type = "Per Category"

	# # Create chart
	# df = load_data()
	# fig = create_bar_chart(df, view_type)

	# # Only output JSON
	# st.write(fig.to_json())
	# st.stop()

	# Main content
	st.title("NaviTrace Leaderboard")

	# Links
	st.markdown("""
	<div class="links-container">
	<a href="https://leggedrobotics.github.io/navitrace_webpage/" target="_blank">
	<i class="fas fa-house"></i> Project
	</a>
	<a href="https://your-paper-website.com" target="_blank">
	<i class="fas fa-file-pdf"></i> Paper
	</a>
	<a href="https://github.com/your-username/navitrace" target="_blank">
	<i class="fab fa-github"></i> Code
	</a>
	<a href="https://huggingface.co/datasets/your-username/navitrace" target="_blank">
	<i class="fas fa-database"></i> Dataset
	</a>
	<a href="https://your-demo-link.com" target="_blank">
	<i class="far fa-images"></i> Demo
	</a>
	</div>
	""", unsafe_allow_html=True)

	# Load data
	df = load_data()

	# Add user's model if it exists in session state
	if 'user_results' in st.session_state:
	user_row = pd.DataFrame([st.session_state.user_results])
	df = pd.concat([user_row, df], ignore_index=True)

	# View selector
	view_type = st.selectbox(
	"Select View",
	["Total Score", "Per Embodiment", "Per Category"],
	)

	# Display chart
	fig = create_bar_chart(df, view_type)
	st.plotly_chart(fig, use_container_width=True, config={
	'displayModeBar': True,
	'displaylogo': False,
	'toImageButtonOptions': {
	'format': 'png',
	'filename': 'navitrace_leaderboard',
	'height': 600,
	'width': 1200,
	'scale': 2
	}
	})

	# Detailed table
	with st.expander("View Detailed Scores"):
	st.dataframe(df.style.background_gradient(cmap='Purples', subset=df.columns[1:]), use_container_width=True)

	with st.expander("How to Test Your Model", expanded=True):
	# Step 1
	st.markdown("""
	<div class="instruction-item">
	<div class="instruction-number">1</div>
	<div class="instruction-content">
	<div><b>Run Evaluation</b></div>
	<div>
	Download and run our evaluation notebook adjusted to your model. The notebook will generate a TSV file with your model's predictions on the test set.
	</div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	st.link_button("📓 Open Evaluation Notebook", "https://colab.research.google.com/your-notebook-link", use_container_width=True)

	# Step 2
	st.markdown("""
	<div class="instruction-item">
	<div class="instruction-number">2</div>
	<div class="instruction-content">
	<div><b>Upload Results</b></div>
	<div>
	Upload the TSV file generated by the evaluation notebook.
	</div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	uploaded_file = st.file_uploader("Upload your TSV file with results", type=['tsv', 'txt'], label_visibility="collapsed")

	# Step 3
	st.markdown("""
	<div class="instruction-item">
	<div class="instruction-number">3</div>
	<div class="instruction-content">
	<div><b>Calculate Score</b></div>
	<div>
	Click the button below to evaluate your predictions. Scores are calculated using hidden test set ground-truths.
	</div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	if uploaded_file is not None:
	if st.button("🧮 Calculate Score", use_container_width=True):
	with st.spinner("Validating and calculating scores..."):
	# Validate format
	is_valid, result = validate_tsv_format(uploaded_file)
	if is_valid:
	# Calculate score using hidden ground-truth
	scores = calculate_score(result)
	if scores is not None:
	st.success(f"✅ Score calculated successfully: {scores['Total Score']:.1f}")

	# Store in session state
	st.session_state.user_results = {
	'Model': 'Your Model',
	**scores
	}
	st.info("👆 Scroll up to see your model on the leaderboard!")
	st.rerun()
	else:
	st.error(f"❌ Invalid file format: {result}")
	else:
	st.info("👆 Upload a TSV file to calculate your score")

	# Step 4
	st.markdown("""
	<div class="instruction-item">
	<div class="instruction-number">4</div>
	<div class="instruction-content">
	<div><b>Submit to Official Leaderboard</b></div>
	<div>
	Happy with your score? Submit your model to appear on the official leaderboard.
	Fill out the form below with your model details and results.
	</div>
	</div>
	</div>
	""", unsafe_allow_html=True)

	st.link_button("🗳️ Submit Model", "https://forms.gle/your-google-form-link", use_container_width=True)