Spaces:

leggedrobotics
/

navitrace_leaderboard

Running

App Files Files Community

TimWindecker commited on Oct 13

Commit

641159b

verified ·

1 Parent(s): 4a5921f

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +38 -64

src/streamlit_app.py CHANGED Viewed

@@ -1,10 +1,16 @@
 import streamlit as st
 import pandas as pd
 import plotly.graph_objects as go
 import plotly.express as px
 from io import StringIO
 import json
 # Page config
 st.set_page_config(
     page_title="NaviTrace Leaderboard",
@@ -93,62 +99,48 @@ st.markdown("""
 </style>
 """, unsafe_allow_html=True)
-# Sample data - Replace with your actual data
 def load_data():
-    return pd.DataFrame({
-        'Model': ['GPT-4', 'Claude-3.5-Sonnet', 'Gemini-Pro', 'Llama-3-70B', 'Mistral-Large'],
-        'Total Score': [87.5, 85.2, 82.1, 78.3, 75.6],
-        'Embodiment-A': [90.2, 87.5, 84.3, 80.1, 77.8],
-        'Embodiment-B': [85.8, 84.1, 81.2, 77.9, 74.5],
-        'Embodiment-C': [86.5, 84.0, 80.8, 76.9, 74.5],
-        'Category-Spatial': [88.9, 86.7, 83.5, 79.8, 76.9],
-        'Category-Temporal': [86.3, 84.2, 81.0, 77.5, 75.1],
-        'Category-Object': [87.3, 84.7, 81.8, 77.6, 74.8],
-    })
-def calculate_score(results_df):
-    """
-    Calculate score using private test split ground truth.
-    This function should:
-    1. Load the private test split ground truth (not exposed to users)
-    2. Compare uploaded predictions with ground truth
-    3. Calculate metrics per embodiment and category
-    4. Return detailed scores
-    Args:
-        results_df: DataFrame with columns ['sample_id', 'prediction', ...]
-    Returns:
-        dict: Scores breakdown or None if error
-    """
     try:
-        # TODO: Implement your scoring logic here
-        # Example structure:
-        # ground_truth = load_private_test_split()  # From secure location
-        # scores = evaluate_predictions(results_df, ground_truth)
-        # Placeholder - replace with actual calculation
-        scores = {
-            'Total Score': 85.0,
-            'Embodiment-A': 87.0,
-            'Embodiment-B': 84.0,
-            'Embodiment-C': 84.0,
-            'Category-Spatial': 86.0,
-            'Category-Temporal': 85.0,
-            'Category-Object': 84.0,
-        }
-        return scores
     except Exception as e:
         st.error(f"Error calculating score: {str(e)}")
         return None
 def validate_tsv_format(uploaded_file):
     """Validate that the uploaded TSV has the correct format"""
     try:
         df = pd.read_csv(uploaded_file, sep='\t')
-        # TODO: Add your specific validation logic
         # Check for required columns, data types, etc.
-        required_cols = ['sample_id', 'prediction']  # Adjust as needed
         if not all(col in df.columns for col in required_cols):
             return False, f"Missing required columns. Expected: {required_cols}"
         return True, df
@@ -157,6 +149,7 @@ def validate_tsv_format(uploaded_file):
 def create_bar_chart(df, view_type):
     """Create interactive bar chart based on view type"""
     if view_type == "Total Score":
         fig = go.Figure(data=[
             go.Bar(
@@ -233,25 +226,6 @@ def create_bar_chart(df, view_type):
     return fig
-# TODO remove # Serve only the chart as JSON if parameter "only_chart" is set
-# # E.g. https://huggingface.co/spaces/leggedrobotics/navitrace_leaderboard/?only_chart=total_score
-# params = st.query_params
-# if "only_chart" in params and params["only_chart"] in ["total_score", "per_embodiment", "per_category"]:
-#     if params["only_chart"] == "total_score":
-#         view_type = "Total Score"
-#     elif params["only_chart"] == "per_embodiment":
-#         view_type = "Per Embodiment"
-#     elif params["only_chart"] == "per_category":
-#         view_type = "Per Category"
-#     # Create chart
-#     df = load_data()
-#     fig = create_bar_chart(df, view_type)
-#     # Only output JSON
-#     st.write(fig.to_json())
-#     st.stop()
 # Header
 st.markdown("""
 <div class="header-container">
@@ -278,8 +252,8 @@ df = load_data()
 # Add user's model if it exists in session state
 if 'user_results' in st.session_state:
-    user_row = pd.DataFrame([st.session_state.user_results])
-    df = pd.concat([user_row, df], ignore_index=True)
 # View selector
 view_type = st.selectbox(

+from src.score_calculation.score import score_predictions
+from datasets import load_dataset
+import multiprocessing
 import streamlit as st
 import pandas as pd
+from pathlib import Path
 import plotly.graph_objects as go
 import plotly.express as px
 from io import StringIO
 import json
+RESULTS_DIR = "results/"
 # Page config
 st.set_page_config(
     page_title="NaviTrace Leaderboard",
 </style>
 """, unsafe_allow_html=True)
 def load_data():
+    """Load all result files as one data frame"""
     try:
+        # Load all results files
+        all_dfs = []
+        for file_path in Path(RESULTS_DIR).glob('*.tsv'):
+            df = pd.read_csv(file_path, sep='\t')
+            model_name = file_path.stem
+            df["model"] = model_name
+            all_dfs.append(df)
+        # Concatenate all DataFrames into one
+        if all_dfs:
+            final_df = pd.concat(all_dfs, ignore_index=True)
+        return final_df
+    except Exception as e:
+        st.error(f"Error loading data: {str(e)}")
+        return None
+def calculate_score(results_df):
+    """Calculate score using private test split ground truth."""
+    try:
+        # Access to private dataset with test labels
+        login(token=os.environ.get("HF_TOKEN"))
+        dataset = load_dataset(os.environ.get("HF_DATASET_ID"), split="test")
+        # Calculate score
+        return score_predictions(results_df, dataset)
     except Exception as e:
         st.error(f"Error calculating score: {str(e)}")
         return None
 def validate_tsv_format(uploaded_file):
     """Validate that the uploaded TSV has the correct format"""
     try:
         df = pd.read_csv(uploaded_file, sep='\t')
         # Check for required columns, data types, etc.
+        required_cols = ["sample_id", "embodiment", "category", "prediction"]
         if not all(col in df.columns for col in required_cols):
             return False, f"Missing required columns. Expected: {required_cols}"
         return True, df
 def create_bar_chart(df, view_type):
     """Create interactive bar chart based on view type"""
     if view_type == "Total Score":
         fig = go.Figure(data=[
             go.Bar(
     return fig
 # Header
 st.markdown("""
 <div class="header-container">
 # Add user's model if it exists in session state
 if 'user_results' in st.session_state:
+    user_results = pd.DataFrame([st.session_state.user_results])
+    df = pd.concat([user_results, df], ignore_index=True)
 # View selector
 view_type = st.selectbox(