TimWindecker commited on
Commit
1f2bf05
·
verified ·
1 Parent(s): 0073352

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +80 -157
src/streamlit_app.py CHANGED
@@ -12,86 +12,56 @@ st.set_page_config(
12
  initial_sidebar_state="collapsed"
13
  )
14
 
15
- # Custom CSS matching the NaviTrace website
16
  st.markdown("""
17
  <style>
18
- /* Import Google Fonts - matching website */
19
- @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400;600;700&display=swap');
20
  @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
21
 
22
- /* Global font and colors */
23
- html, body, [class*="css"] {
24
- font-family: 'Source Sans Pr o', sans-serif;
25
- color: #4a4a4a;
26
- }
27
-
28
  /* Limit page width */
29
  .main .block-container {
30
- max-width: 1152px;
31
  padding-top: 3rem;
32
- padding-bottom: 3rem;
33
  }
34
 
35
- /* Main title styling - matching website h1 */
36
  .main-title {
37
  text-align: center;
38
- font-size: 2.5rem;
39
- font-weight: 600;
40
- line-height: 1.125;
41
- margin-top: 1.5rem;
42
- margin-bottom: 1.5rem;
43
- color: #363636;
44
- }
45
-
46
- /* Section headers - matching website h2 */
47
- .section-header {
48
- font-size: 2rem;
49
- font-weight: 600;
50
- line-height: 1.125;
51
- margin-top: 3rem;
52
- margin-bottom: 1.5rem;
53
- color: #363636;
54
- }
55
-
56
- /* Subsection headers - matching website h3 */
57
- .subsection-header {
58
- font-size: 1.5rem;
59
  font-weight: 600;
60
- margin-top: 2rem;
61
- margin-bottom: 1rem;
62
  color: #363636;
63
  }
64
 
65
- /* Button links container */
66
  .button-links {
67
  display: flex;
68
  justify-content: center;
69
- gap: 0.75rem;
70
- margin-bottom: 2.5rem;
71
- margin-top: 1.5rem;
72
  flex-wrap: wrap;
73
  }
74
 
75
- /* Button styling - matching website */
76
  .button-link {
77
  display: inline-flex;
78
  align-items: center;
79
  gap: 0.5rem;
80
- padding: 0.5rem 1.25rem;
81
  background-color: #363636;
82
- border: 1px solid transparent;
83
- border-radius: 4px;
84
  text-decoration: none;
85
  color: #ffffff;
86
  font-weight: 400;
87
- font-size: 1rem;
88
- transition: all 0.2s ease;
89
- cursor: pointer;
90
  }
91
 
92
  .button-link:hover {
93
- background-color: #292929;
94
- border-color: #292929;
 
95
  color: #ffffff;
96
  text-decoration: none;
97
  }
@@ -100,24 +70,12 @@ st.markdown("""
100
  font-size: 1rem;
101
  }
102
 
103
- /* Content text */
104
- .content {
105
- font-size: 1rem;
106
- line-height: 1.7;
107
- color: #4a4a4a;
108
- }
109
-
110
- .content.has-text-justified {
111
- text-align: justify;
112
- }
113
-
114
- .content.has-text-centered {
115
- text-align: center;
116
- }
117
-
118
- /* Highlight text - matching dnerf class */
119
- .dnerf {
120
- font-weight: 600;
121
  color: #363636;
122
  }
123
 
@@ -127,10 +85,6 @@ st.markdown("""
127
  gap: 1.5rem;
128
  margin: 2rem 0;
129
  align-items: flex-start;
130
- background-color: #fafafa;
131
- padding: 1.5rem;
132
- border-radius: 6px;
133
- border-left: 4px solid #363636;
134
  }
135
 
136
  .instruction-number {
@@ -138,13 +92,13 @@ st.markdown("""
138
  width: 40px;
139
  height: 40px;
140
  border-radius: 50%;
141
- background-color: #363636;
142
  color: white;
143
  display: flex;
144
  align-items: center;
145
  justify-content: center;
146
  font-weight: 700;
147
- font-size: 1.25rem;
148
  }
149
 
150
  .instruction-content {
@@ -153,105 +107,51 @@ st.markdown("""
153
  }
154
 
155
  .instruction-title {
156
- font-size: 1.15rem;
157
  font-weight: 600;
158
  margin-bottom: 0.5rem;
159
  color: #363636;
160
  }
161
 
162
  .instruction-desc {
163
- color: #4a4a4a;
164
- line-height: 1.7;
165
- font-size: 1rem;
166
  }
167
 
168
  /* Streamlit button styling */
169
  .stButton>button {
170
- background-color: #363636;
171
  color: white;
172
  font-weight: 600;
173
- border: 1px solid transparent;
174
- padding: 0.5rem 1.5rem;
175
- border-radius: 4px;
176
- font-size: 1rem;
177
- transition: all 0.2s ease;
178
  }
179
 
180
  .stButton>button:hover {
181
- background-color: #292929;
182
- border-color: #292929;
183
  }
184
 
 
 
 
 
185
  /* Expander styling */
186
  .streamlit-expanderHeader {
187
- font-size: 1.15rem;
188
  font-weight: 600;
189
- color: #363636;
190
- background-color: #fafafa;
191
- border-radius: 4px;
192
  }
193
 
194
  /* File uploader */
195
  [data-testid="stFileUploader"] {
196
- margin: 1.5rem 0;
197
- border: 2px dashed #dbdbdb;
198
- border-radius: 6px;
199
- padding: 1.5rem;
200
- }
201
-
202
- /* Selectbox */
203
- .stSelectbox label {
204
- font-weight: 600;
205
- color: #363636;
206
- font-size: 1rem;
207
- }
208
-
209
- /* Dataframe styling */
210
- .dataframe {
211
- font-size: 0.95rem;
212
- }
213
-
214
- /* Info/Success/Error boxes */
215
- .stAlert {
216
- border-radius: 6px;
217
- border-left: 4px solid;
218
- }
219
-
220
- /* Hide streamlit branding */
221
- #MainMenu {visibility: hidden;}
222
- footer {visibility: hidden;}
223
- header {visibility: hidden;}
224
-
225
- /* Divider */
226
- hr {
227
- background-color: #dbdbdb;
228
- border: none;
229
- height: 2px;
230
- margin: 2rem 0;
231
- }
232
-
233
- /* Links */
234
- a {
235
- color: #3273dc;
236
- text-decoration: none;
237
- }
238
-
239
- a:hover {
240
- color: #363636;
241
- text-decoration: underline;
242
- }
243
-
244
- /* Footer text */
245
- .footer-text {
246
- text-align: center;
247
- color: #7a7a7a;
248
- font-size: 0.95rem;
249
- padding: 2rem 0;
250
  }
251
  </style>
252
  """, unsafe_allow_html=True)
253
 
254
- # Sample data
255
  def load_sample_data():
256
  return pd.DataFrame({
257
  'Model': ['GPT-4', 'Claude-3.5-Sonnet', 'Gemini-Pro', 'Llama-3-70B', 'Mistral-Large'],
@@ -265,7 +165,27 @@ def load_sample_data():
265
  })
266
 
267
  def calculate_score_backend(results_df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  try:
 
 
 
 
 
 
269
  scores = {
270
  'Total Score': 85.0,
271
  'Embodiment-A': 87.0,
@@ -281,9 +201,12 @@ def calculate_score_backend(results_df):
281
  return None
282
 
283
  def validate_tsv_format(uploaded_file):
 
284
  try:
285
  df = pd.read_csv(uploaded_file, sep='\t')
286
- required_cols = ['sample_id', 'prediction']
 
 
287
  if not all(col in df.columns for col in required_cols):
288
  return False, f"Missing required columns. Expected: {required_cols}"
289
  return True, df
@@ -291,12 +214,13 @@ def validate_tsv_format(uploaded_file):
291
  return False, f"Error reading file: {str(e)}"
292
 
293
  def create_bar_chart(df, view_type):
 
294
  if view_type == "Total Score":
295
  fig = go.Figure(data=[
296
  go.Bar(
297
  x=df['Model'],
298
  y=df['Total Score'],
299
- marker_color='#363636',
300
  text=df['Total Score'].round(1),
301
  textposition='outside',
302
  )
@@ -311,16 +235,14 @@ def create_bar_chart(df, view_type):
311
 
312
  elif view_type == "Per Embodiment":
313
  embodiment_cols = [col for col in df.columns if col.startswith('Embodiment-')]
314
- colors = ['#363636', '#4a4a4a', '#7a7a7a']
315
  fig = go.Figure()
316
- for idx, col in enumerate(embodiment_cols):
317
  fig.add_trace(go.Bar(
318
  name=col.replace('Embodiment-', ''),
319
  x=df['Model'],
320
  y=df[col],
321
  text=df[col].round(1),
322
  textposition='outside',
323
- marker_color=colors[idx % len(colors)]
324
  ))
325
  fig.update_layout(
326
  title="Model Performance - Per Embodiment",
@@ -333,16 +255,14 @@ def create_bar_chart(df, view_type):
333
 
334
  else: # Per Category
335
  category_cols = [col for col in df.columns if col.startswith('Category-')]
336
- colors = ['#363636', '#4a4a4a', '#7a7a7a']
337
  fig = go.Figure()
338
- for idx, col in enumerate(category_cols):
339
  fig.add_trace(go.Bar(
340
  name=col.replace('Category-', ''),
341
  x=df['Model'],
342
  y=df[col],
343
  text=df[col].round(1),
344
  textposition='outside',
345
- marker_color=colors[idx % len(colors)]
346
  ))
347
  fig.update_layout(
348
  title="Model Performance - Per Category",
@@ -357,19 +277,19 @@ def create_bar_chart(df, view_type):
357
  fig.update_layout(
358
  plot_bgcolor='rgba(0,0,0,0)',
359
  paper_bgcolor='rgba(0,0,0,0)',
360
- font=dict(family='Source Sans Pro, sans-serif', size=12, color='#4a4a4a'),
361
  showlegend=(view_type != "Total Score"),
362
  margin=dict(t=80, b=60, l=60, r=60),
363
  )
364
  fig.update_xaxes(showgrid=False)
365
- fig.update_yaxes(showgrid=True, gridcolor='#dbdbdb', gridwidth=1)
366
 
367
  return fig
368
 
369
  # Main content
370
  st.markdown('<h1 class="main-title">NaviTrace Leaderboard</h1>', unsafe_allow_html=True)
371
 
372
- # Button links matching website style
373
  st.markdown("""
374
  <div class="button-links">
375
  <a href="https://your-paper-website.com" target="_blank" class="button-link">
@@ -421,7 +341,7 @@ st.plotly_chart(fig, use_container_width=True, config={
421
 
422
  # Detailed table
423
  with st.expander("View Detailed Scores"):
424
- st.dataframe(df.style.background_gradient(cmap='Greys', subset=df.columns[1:]), use_container_width=True)
425
 
426
  # Export chart HTML for embedding
427
  with st.expander("Embed Chart in Your Website"):
@@ -484,12 +404,15 @@ with st.expander("How to Test Your Model", expanded=False):
484
  if uploaded_file is not None:
485
  if st.button("Calculate Score", use_container_width=False):
486
  with st.spinner("Validating and calculating scores..."):
 
487
  is_valid, result = validate_tsv_format(uploaded_file)
488
  if is_valid:
 
489
  scores = calculate_score_backend(result)
490
  if scores is not None:
491
  st.success(f"✅ Score calculated successfully: **{scores['Total Score']:.1f}**")
492
 
 
493
  st.session_state.user_results = {
494
  'Model': 'Your Model',
495
  **scores
@@ -521,7 +444,7 @@ with st.expander("How to Test Your Model", expanded=False):
521
  # Footer
522
  st.markdown("---")
523
  st.markdown("""
524
- <div class="footer-text">
525
- <p>NaviTrace Benchmark | <a href="mailto:your-email@domain.com">Contact</a></p>
526
  </div>
527
  """, unsafe_allow_html=True)
 
12
  initial_sidebar_state="collapsed"
13
  )
14
 
15
+ # Custom CSS for Nerfies-style design
16
  st.markdown("""
17
  <style>
18
+ /* Import Font Awesome */
 
19
  @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
20
 
 
 
 
 
 
 
21
  /* Limit page width */
22
  .main .block-container {
23
+ max-width: 1200px;
24
  padding-top: 3rem;
 
25
  }
26
 
27
+ /* Main title styling */
28
  .main-title {
29
  text-align: center;
30
+ font-size: 5rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  font-weight: 600;
32
+ margin-top: 1rem;
33
+ margin-bottom: 2rem;
34
  color: #363636;
35
  }
36
 
37
+ /* Button links container - Nerfies style */
38
  .button-links {
39
  display: flex;
40
  justify-content: center;
41
+ gap: 1rem;
42
+ margin-bottom: 3rem;
 
43
  flex-wrap: wrap;
44
  }
45
 
 
46
  .button-link {
47
  display: inline-flex;
48
  align-items: center;
49
  gap: 0.5rem;
50
+ padding: 0.6rem 1.5rem;
51
  background-color: #363636;
52
+ border: 1px solid #363636;
53
+ border-radius: 50px;
54
  text-decoration: none;
55
  color: #ffffff;
56
  font-weight: 400;
57
+ transition: all 0.3s ease;
58
+ font-size: 0.95rem;
 
59
  }
60
 
61
  .button-link:hover {
62
+ background-color: #5E5E5E;
63
+ transform: translateY(-2px);
64
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
65
  color: #ffffff;
66
  text-decoration: none;
67
  }
 
70
  font-size: 1rem;
71
  }
72
 
73
+ /* Section headers */
74
+ .section-header {
75
+ font-size: 1.8rem;
76
+ font-weight: 500;
77
+ margin-top: 3rem;
78
+ margin-bottom: 1.5rem;
 
 
 
 
 
 
 
 
 
 
 
 
79
  color: #363636;
80
  }
81
 
 
85
  gap: 1.5rem;
86
  margin: 2rem 0;
87
  align-items: flex-start;
 
 
 
 
88
  }
89
 
90
  .instruction-number {
 
92
  width: 40px;
93
  height: 40px;
94
  border-radius: 50%;
95
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
96
  color: white;
97
  display: flex;
98
  align-items: center;
99
  justify-content: center;
100
  font-weight: 700;
101
+ font-size: 1.2rem;
102
  }
103
 
104
  .instruction-content {
 
107
  }
108
 
109
  .instruction-title {
110
+ font-size: 1.1rem;
111
  font-weight: 600;
112
  margin-bottom: 0.5rem;
113
  color: #363636;
114
  }
115
 
116
  .instruction-desc {
117
+ color: #666;
118
+ line-height: 1.6;
 
119
  }
120
 
121
  /* Streamlit button styling */
122
  .stButton>button {
123
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
124
  color: white;
125
  font-weight: 600;
126
+ border: none;
127
+ padding: 0.5rem 2rem;
128
+ border-radius: 6px;
129
+ transition: transform 0.2s;
 
130
  }
131
 
132
  .stButton>button:hover {
133
+ transform: translateY(-2px);
134
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
135
  }
136
 
137
+ /* Hide streamlit branding */
138
+ #MainMenu {visibility: hidden;}
139
+ footer {visibility: hidden;}
140
+
141
  /* Expander styling */
142
  .streamlit-expanderHeader {
143
+ font-size: 1.1rem;
144
  font-weight: 600;
 
 
 
145
  }
146
 
147
  /* File uploader */
148
  [data-testid="stFileUploader"] {
149
+ margin: 1rem 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
  </style>
152
  """, unsafe_allow_html=True)
153
 
154
+ # Sample data - Replace with your actual data
155
  def load_sample_data():
156
  return pd.DataFrame({
157
  'Model': ['GPT-4', 'Claude-3.5-Sonnet', 'Gemini-Pro', 'Llama-3-70B', 'Mistral-Large'],
 
165
  })
166
 
167
  def calculate_score_backend(results_df):
168
+ """
169
+ Calculate score using private test split ground truth.
170
+ This function should:
171
+ 1. Load the private test split ground truth (not exposed to users)
172
+ 2. Compare uploaded predictions with ground truth
173
+ 3. Calculate metrics per embodiment and category
174
+ 4. Return detailed scores
175
+
176
+ Args:
177
+ results_df: DataFrame with columns ['sample_id', 'prediction', ...]
178
+
179
+ Returns:
180
+ dict: Scores breakdown or None if error
181
+ """
182
  try:
183
+ # TODO: Implement your scoring logic here
184
+ # Example structure:
185
+ # ground_truth = load_private_test_split() # From secure location
186
+ # scores = evaluate_predictions(results_df, ground_truth)
187
+
188
+ # Placeholder - replace with actual calculation
189
  scores = {
190
  'Total Score': 85.0,
191
  'Embodiment-A': 87.0,
 
201
  return None
202
 
203
  def validate_tsv_format(uploaded_file):
204
+ """Validate that the uploaded TSV has the correct format"""
205
  try:
206
  df = pd.read_csv(uploaded_file, sep='\t')
207
+ # TODO: Add your specific validation logic
208
+ # Check for required columns, data types, etc.
209
+ required_cols = ['sample_id', 'prediction'] # Adjust as needed
210
  if not all(col in df.columns for col in required_cols):
211
  return False, f"Missing required columns. Expected: {required_cols}"
212
  return True, df
 
214
  return False, f"Error reading file: {str(e)}"
215
 
216
  def create_bar_chart(df, view_type):
217
+ """Create interactive bar chart based on view type"""
218
  if view_type == "Total Score":
219
  fig = go.Figure(data=[
220
  go.Bar(
221
  x=df['Model'],
222
  y=df['Total Score'],
223
+ marker_color=px.colors.sequential.Purples_r,
224
  text=df['Total Score'].round(1),
225
  textposition='outside',
226
  )
 
235
 
236
  elif view_type == "Per Embodiment":
237
  embodiment_cols = [col for col in df.columns if col.startswith('Embodiment-')]
 
238
  fig = go.Figure()
239
+ for col in embodiment_cols:
240
  fig.add_trace(go.Bar(
241
  name=col.replace('Embodiment-', ''),
242
  x=df['Model'],
243
  y=df[col],
244
  text=df[col].round(1),
245
  textposition='outside',
 
246
  ))
247
  fig.update_layout(
248
  title="Model Performance - Per Embodiment",
 
255
 
256
  else: # Per Category
257
  category_cols = [col for col in df.columns if col.startswith('Category-')]
 
258
  fig = go.Figure()
259
+ for col in category_cols:
260
  fig.add_trace(go.Bar(
261
  name=col.replace('Category-', ''),
262
  x=df['Model'],
263
  y=df[col],
264
  text=df[col].round(1),
265
  textposition='outside',
 
266
  ))
267
  fig.update_layout(
268
  title="Model Performance - Per Category",
 
277
  fig.update_layout(
278
  plot_bgcolor='rgba(0,0,0,0)',
279
  paper_bgcolor='rgba(0,0,0,0)',
280
+ font=dict(size=12),
281
  showlegend=(view_type != "Total Score"),
282
  margin=dict(t=80, b=60, l=60, r=60),
283
  )
284
  fig.update_xaxes(showgrid=False)
285
+ fig.update_yaxes(showgrid=True, gridcolor='lightgray', gridwidth=0.5)
286
 
287
  return fig
288
 
289
  # Main content
290
  st.markdown('<h1 class="main-title">NaviTrace Leaderboard</h1>', unsafe_allow_html=True)
291
 
292
+ # Nerfies-style button links
293
  st.markdown("""
294
  <div class="button-links">
295
  <a href="https://your-paper-website.com" target="_blank" class="button-link">
 
341
 
342
  # Detailed table
343
  with st.expander("View Detailed Scores"):
344
+ st.dataframe(df.style.background_gradient(cmap='Purples', subset=df.columns[1:]), use_container_width=True)
345
 
346
  # Export chart HTML for embedding
347
  with st.expander("Embed Chart in Your Website"):
 
404
  if uploaded_file is not None:
405
  if st.button("Calculate Score", use_container_width=False):
406
  with st.spinner("Validating and calculating scores..."):
407
+ # Validate format
408
  is_valid, result = validate_tsv_format(uploaded_file)
409
  if is_valid:
410
+ # Calculate score using private ground truth
411
  scores = calculate_score_backend(result)
412
  if scores is not None:
413
  st.success(f"✅ Score calculated successfully: **{scores['Total Score']:.1f}**")
414
 
415
+ # Store in session state
416
  st.session_state.user_results = {
417
  'Model': 'Your Model',
418
  **scores
 
444
  # Footer
445
  st.markdown("---")
446
  st.markdown("""
447
+ <div style="text-align: center; color: #666; padding: 2rem 0;">
448
+ <p>NaviTrace Benchmark | <a href="mailto:your-email@domain.com" style="color: #667eea;">Contact</a></p>
449
  </div>
450
  """, unsafe_allow_html=True)