deepmage121 commited on
Commit
0205c53
Β·
1 Parent(s): 0aca3f5

new version with updates

Browse files
Files changed (6) hide show
  1. README.md +6 -6
  2. app.py +145 -255
  3. pyproject.toml +11 -5
  4. requirements.txt +211 -0
  5. ui_components.py +648 -1039
  6. uv.lock +0 -0
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Every Eval Ever Space
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.1.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Eee Test
3
+ emoji: πŸ‘€
4
+ colorFrom: pink
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,7 +1,4 @@
1
- """
2
- Evaluation Leaderboard - Gradio Interface
3
- Displays model evaluation results from HuggingFace datasets.
4
- """
5
  import gradio as gr
6
  import pandas as pd
7
  from pathlib import Path
@@ -23,25 +20,15 @@ from ui_components import (
23
  format_metric_details,
24
  format_model_card,
25
  format_model_comparison,
 
26
  )
27
 
28
  PAGE_SIZE = 50
29
 
30
 
31
- def update_leaderboard_table(selected_leaderboard, search_query="", current_page=1, sort_column=None, selected_columns=None, progress=gr.Progress()):
32
- """Loads and aggregates data for the selected leaderboard."""
33
  if not selected_leaderboard:
34
- return (
35
- pd.DataFrame(),
36
- format_leaderboard_header(None, {}),
37
- format_metric_details(None, {}),
38
- gr.update(choices=[], value=None),
39
- gr.update(interactive=False),
40
- gr.update(interactive=False),
41
- gr.update(choices=[], value=None),
42
- "0 / 0",
43
- gr.update(choices=[], value=[]),
44
- )
45
 
46
  metadata = get_eval_metadata(selected_leaderboard)
47
 
@@ -49,73 +36,37 @@ def update_leaderboard_table(selected_leaderboard, search_query="", current_page
49
  progress(value, desc=desc)
50
 
51
  df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
 
 
 
 
 
 
52
 
53
- # Get all available columns BEFORE filtering (for column selector)
54
- all_available_columns = list(df.columns) if not df.empty else []
55
-
56
- # Filter columns if selected (if None or empty, show all columns)
57
- if selected_columns is not None and len(selected_columns) > 0:
58
- # Ensure Model column is always included
59
- base_cols = ["Model"]
60
- available_cols = list(df.columns)
61
- cols_to_show = [col for col in base_cols if col in available_cols]
62
- # Add Developer and other selected columns
63
- cols_to_show.extend([col for col in selected_columns if col in available_cols and col not in cols_to_show])
64
- if cols_to_show:
65
- df = df[cols_to_show]
66
-
67
- if search_query and not df.empty:
68
  mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
69
  df = df[mask]
70
 
71
- filtered_count = len(df)
72
-
73
- if sort_column and sort_column in df.columns and not df.empty:
74
  df = df.sort_values(by=sort_column, ascending=False, na_position='last')
75
 
76
- total_pages = max(1, (filtered_count + PAGE_SIZE - 1) // PAGE_SIZE) if filtered_count > 0 else 1
 
77
  current_page = max(1, min(current_page, total_pages))
 
 
78
 
79
- start_idx = (current_page - 1) * PAGE_SIZE
80
- end_idx = start_idx + PAGE_SIZE
81
- df_paginated = df.iloc[start_idx:end_idx] if not df.empty else df
82
-
83
- page_choices = [str(i) for i in range(1, total_pages + 1)]
84
- page_dropdown = gr.update(choices=page_choices, value=str(current_page))
85
- prev_btn = gr.update(interactive=(current_page > 1))
86
- next_btn = gr.update(interactive=(current_page < total_pages))
87
- page_info = f"{current_page} / {total_pages}"
88
-
89
- sort_choices = list(df.columns) if not df.empty else []
90
- default_sort = sort_column if sort_column and sort_column in sort_choices else ("Average" if "Average" in sort_choices else (sort_choices[0] if sort_choices else None))
91
- sort_column_update = gr.update(choices=sort_choices, value=default_sort)
92
-
93
- # Get all available columns for column selector (use full list, not filtered)
94
- # Include all columns except Model in the selector (Model is always shown)
95
- column_choices = [col for col in all_available_columns if col != "Model"]
96
- # Preserve current selection, or default to all columns if None or empty
97
- if selected_columns is None or len(selected_columns) == 0:
98
- column_value = column_choices
99
- else:
100
- # Preserve user's selection, filtering out any invalid choices
101
- column_value = [col for col in selected_columns if col in column_choices]
102
- column_selector_update = gr.update(choices=column_choices, value=column_value)
103
-
104
- return (
105
- df_paginated,
106
- format_leaderboard_header(selected_leaderboard, metadata),
107
- format_metric_details(selected_leaderboard, metadata),
108
- page_dropdown,
109
- prev_btn,
110
- next_btn,
111
- sort_column_update,
112
- page_info,
113
- column_selector_update,
114
- )
115
 
116
 
117
  def search_model(model_query):
118
- """Search for a model and return formatted card."""
119
  if not model_query or len(model_query) < 2:
120
  return """
121
  <div class="no-results">
@@ -134,7 +85,6 @@ def search_model(model_query):
134
  </div>
135
  """
136
 
137
- # Use the first matching model
138
  model_name = list(results.keys())[0]
139
  model_data = results[model_name]
140
 
@@ -142,42 +92,38 @@ def search_model(model_query):
142
 
143
 
144
  def compare_models(selected_models):
145
- """Compare multiple selected models."""
146
- if not selected_models or len(selected_models) == 0:
147
  return """
148
  <div class="no-results">
149
  <h3>Select models to compare</h3>
150
  <p>Choose multiple models from the dropdown to see a side-by-side comparison</p>
151
  </div>
152
- """
153
 
154
- # Get data for all selected models
155
  all_results = {}
156
  for model_name in selected_models:
157
  results, _ = search_model_across_leaderboards(model_name)
158
  if results:
159
- # Use the first matching model (exact match preferred)
160
  matched_model = list(results.keys())[0]
161
  all_results[matched_model] = results[matched_model]
 
 
162
 
163
  if len(all_results) == 1:
164
- # Single model - show card view
165
  model_name = list(all_results.keys())[0]
166
- return format_model_card(model_name, all_results[model_name])
167
  elif len(all_results) > 1:
168
- # Multiple models - show comparison
169
- return format_model_comparison(list(all_results.keys()), all_results)
170
  else:
171
  return """
172
  <div class="no-results">
173
  <h3>No results found</h3>
174
  <p>Try selecting different models</p>
175
  </div>
176
- """
177
 
178
 
179
  def get_model_suggestions(query):
180
- """Get model name suggestions for autocomplete."""
181
  if not query or len(query) < 2:
182
  return gr.update(choices=[])
183
 
@@ -185,13 +131,28 @@ def get_model_suggestions(query):
185
  return gr.update(choices=matches[:15])
186
 
187
 
188
- # Load data at startup
189
  load_hf_dataset_on_startup()
190
 
191
- # Build interface
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css()) as demo:
193
 
194
- # Header
 
 
 
195
  gr.HTML("""
196
  <div class="app-header">
197
  <div class="logo-mark">EΒ³</div>
@@ -206,83 +167,53 @@ with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css())
206
  """)
207
 
208
  with gr.Tabs():
209
- # === TAB 1: Leaderboard View ===
210
- with gr.TabItem("πŸ“Š Leaderboards"):
211
- with gr.Row(elem_classes="controls-bar"):
212
- initial_choices = get_available_leaderboards()
213
- initial_value = initial_choices[0] if initial_choices else None
214
-
215
- with gr.Column(scale=2, min_width=200):
216
- leaderboard_selector = gr.Dropdown(
217
- choices=initial_choices,
218
- value=initial_value,
219
- label="Leaderboard",
220
- interactive=True
221
- )
222
- with gr.Column(scale=3, min_width=250):
223
- search_box = gr.Textbox(
224
- label="Filter",
225
- placeholder="Filter models...",
226
- show_label=True
227
- )
228
- with gr.Column(scale=1, min_width=100):
229
- refresh_btn = gr.Button("↻ Refresh", variant="secondary", size="sm")
230
-
231
- init_df, init_header, init_metrics, init_page_dropdown, init_prev, init_next, init_sort_cols, init_page_info, init_column_selector = update_leaderboard_table(initial_value, "", 1, "Average", None)
232
-
233
- header_view = gr.HTML(value=init_header)
234
-
235
- # Hidden sort state (default to Average)
236
- sort_column_dropdown = gr.Dropdown(
237
- choices=init_sort_cols.get("choices", []) if hasattr(init_sort_cols, 'get') else [],
238
- value=init_sort_cols.get("value") if hasattr(init_sort_cols, 'get') else None,
239
- visible=False,
240
- )
241
-
242
- # Column selector
243
- with gr.Row(elem_classes="controls-bar"):
244
- column_selector = gr.CheckboxGroup(
245
- choices=init_column_selector.get("choices", []) if isinstance(init_column_selector, dict) else [],
246
- value=init_column_selector.get("value", []) if isinstance(init_column_selector, dict) else [],
247
- label="Columns to Display",
248
- interactive=True,
249
- show_label=True,
250
  )
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  leaderboard_table = gr.Dataframe(
253
- value=init_df,
254
  label=None,
255
  interactive=False,
256
  wrap=False,
257
  elem_classes="dataframe",
258
  )
259
 
260
- # Pagination below table - centered
261
  with gr.Row(elem_classes="pagination-bar"):
262
  prev_btn = gr.Button("←", variant="secondary", size="sm", min_width=60)
263
- page_info = gr.Markdown(value=init_page_info, elem_classes="page-info")
264
  next_btn = gr.Button("β†’", variant="secondary", size="sm", min_width=60)
265
- # Extract choices and value from gr.update() dict, ensuring value is in choices
266
- if isinstance(init_page_dropdown, dict):
267
- page_choices = init_page_dropdown.get("choices", ["1"])
268
- page_value = str(init_page_dropdown.get("value", "1")) if init_page_dropdown.get("value") is not None else "1"
269
- # Ensure value exists in choices
270
- if page_value not in page_choices:
271
- page_value = page_choices[0] if page_choices else "1"
272
- if not page_choices:
273
- page_choices = ["1"]
274
- else:
275
- page_choices = ["1"]
276
- page_value = "1"
277
- page_dropdown = gr.Dropdown(
278
- choices=page_choices,
279
- value=page_value,
280
- visible=False,
281
- )
282
 
283
- metrics_view = gr.HTML(value=init_metrics)
284
 
285
- # === TAB 2: Model View ===
286
  with gr.TabItem("πŸ” Model Lookup"):
287
  gr.Markdown("### Find and compare models across all leaderboards")
288
 
@@ -315,182 +246,141 @@ with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css())
315
  elem_classes="selected-models-group"
316
  )
317
 
 
318
  model_card_view = gr.HTML(value=default_compare_html)
319
 
320
- # Submission guide
321
  with gr.Accordion("πŸ“€ How to Submit Data", open=False):
322
  gr.Markdown("""
323
- **Submit via GitHub Pull Request:**
324
-
325
  1. Fork [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
326
  2. Add JSON files to `data/<leaderboard>/<developer>/<model>/`
327
- 3. Open a PR β€” automated validation runs on submission
328
  4. After merge, data syncs to HuggingFace automatically
329
 
330
- [Submission Guide](https://github.com/evaleval/every_eval_ever#contributor-guide) Β· [JSON Schema](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json)
331
  """)
332
 
333
- # === State ===
334
- current_page_state = gr.State(value=1)
335
- sort_column_state = gr.State(value="Average")
336
-
337
- def go_prev(current):
338
- return max(1, current - 1)
339
-
340
- def go_next(current):
341
- return current + 1
 
 
 
 
 
 
342
 
343
- def reset_page():
344
- return 1
 
 
 
345
 
346
- def update_table_only(selected_leaderboard, search_query, current_page, sort_column, selected_columns):
347
- """Update table without modifying column selector (for column changes)."""
348
- result = update_leaderboard_table(selected_leaderboard, search_query, current_page, sort_column, selected_columns)
349
- # Return all outputs except the last one (column_selector)
350
- return result[:-1]
 
351
 
352
- # === Leaderboard Events ===
353
  leaderboard_selector.change(
354
- fn=reset_page, outputs=[current_page_state]
355
- ).then(
356
- fn=lambda: "Average", outputs=[sort_column_state]
357
- ).then(
358
- fn=lambda: None, outputs=[column_selector]
359
- ).then(
360
- fn=update_leaderboard_table,
361
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
362
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info, column_selector]
363
  )
364
 
365
  search_box.input(
366
- fn=reset_page, outputs=[current_page_state]
367
- ).then(
368
- fn=update_table_only,
369
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
370
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
371
  )
372
 
373
- sort_column_dropdown.change(
374
- fn=lambda col: col,
375
- inputs=[sort_column_dropdown],
376
- outputs=[sort_column_state]
377
- ).then(
378
- fn=reset_page, outputs=[current_page_state]
379
- ).then(
380
- fn=update_table_only,
381
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
382
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
383
- )
384
 
385
  column_selector.change(
386
- fn=reset_page, outputs=[current_page_state]
387
- ).then(
388
- fn=update_table_only,
389
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
390
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
391
- )
392
-
393
- page_dropdown.change(
394
- fn=lambda p: int(p) if p else 1,
395
- inputs=[page_dropdown],
396
- outputs=[current_page_state]
397
- ).then(
398
- fn=update_table_only,
399
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
400
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
401
  )
402
 
403
  prev_btn.click(
404
- fn=go_prev, inputs=[current_page_state], outputs=[current_page_state]
405
- ).then(
406
- fn=update_table_only,
407
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
408
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
409
  )
410
 
411
  next_btn.click(
412
- fn=go_next, inputs=[current_page_state], outputs=[current_page_state]
413
- ).then(
414
- fn=update_table_only,
415
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
416
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
417
  )
418
 
419
  refresh_btn.click(
420
- fn=lambda: gr.Dropdown(choices=get_available_leaderboards()),
421
  outputs=[leaderboard_selector]
422
- ).then(
423
- fn=lambda: clear_cache()
424
- ).then(
425
- fn=reset_page, outputs=[current_page_state]
426
- ).then(
427
- fn=lambda: "Average", outputs=[sort_column_state]
428
- ).then(
429
- fn=lambda: None, outputs=[column_selector]
430
- ).then(
431
- fn=update_leaderboard_table,
432
- inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state, column_selector],
433
- outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info, column_selector]
434
  )
435
 
436
- # === Model Search Events ===
437
  def add_model_and_compare(selected_model, current_selected):
438
- """Add a model and auto-compare."""
439
  if not selected_model:
440
- comparison_html = compare_models(current_selected) if current_selected else default_compare_html
441
  return (
442
  current_selected,
443
  gr.update(value=None),
444
  gr.update(choices=current_selected, value=current_selected),
445
- comparison_html
 
446
  )
447
 
448
- if current_selected is None:
449
- current_selected = []
450
-
451
  if selected_model not in current_selected:
452
  current_selected = current_selected + [selected_model]
453
 
454
- comparison_html = compare_models(current_selected)
455
 
456
  return (
457
  current_selected,
458
  gr.update(value=None),
459
  gr.update(choices=current_selected, value=current_selected),
460
- comparison_html
 
461
  )
462
 
463
  def update_selection(selected_list):
464
- """Update selection from checkbox changes."""
465
- selected_list = selected_list or []
466
- comparison_html = compare_models(selected_list) if selected_list else default_compare_html
467
- return selected_list, comparison_html
468
 
469
  def clear_all_models():
470
- """Clear all selected models."""
471
  return (
472
  [],
473
  gr.update(value=None),
474
  gr.update(choices=[], value=[]),
475
- default_compare_html
 
476
  )
477
 
478
- # Select from dropdown adds model and auto-compares
479
  model_dropdown.select(
480
  fn=add_model_and_compare,
481
  inputs=[model_dropdown, selected_models_state],
482
- outputs=[selected_models_state, model_dropdown, selected_models_group, model_card_view]
483
  )
484
 
485
  selected_models_group.change(
486
  fn=update_selection,
487
  inputs=[selected_models_group],
488
- outputs=[selected_models_state, model_card_view]
489
  )
490
 
491
  clear_models_btn.click(
492
  fn=clear_all_models,
493
- outputs=[selected_models_state, model_dropdown, selected_models_group, model_card_view]
494
  )
495
 
496
  DATA_DIR.mkdir(exist_ok=True)
 
1
+
 
 
 
2
  import gradio as gr
3
  import pandas as pd
4
  from pathlib import Path
 
20
  format_metric_details,
21
  format_model_card,
22
  format_model_comparison,
23
+ create_radar_plot,
24
  )
25
 
26
  PAGE_SIZE = 50
27
 
28
 
29
+ def get_leaderboard_data(selected_leaderboard, progress=gr.Progress()):
 
30
  if not selected_leaderboard:
31
+ return pd.DataFrame(), {}
 
 
 
 
 
 
 
 
 
 
32
 
33
  metadata = get_eval_metadata(selected_leaderboard)
34
 
 
36
  progress(value, desc=desc)
37
 
38
  df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
39
+ return df, metadata
40
+
41
+
42
+ def filter_and_paginate(df, search_query, sort_column, selected_columns, current_page):
43
+ if df.empty:
44
+ return df.copy(), 1, 1
45
 
46
+ df = df.copy()
47
+ all_columns = list(df.columns)
48
+
49
+ if selected_columns:
50
+ cols = ["Model"] + [c for c in all_columns if c in selected_columns and c != "Model"]
51
+ df = df[cols]
52
+
53
+ if search_query:
 
 
 
 
 
 
 
54
  mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
55
  df = df[mask]
56
 
57
+ if sort_column and sort_column in df.columns:
 
 
58
  df = df.sort_values(by=sort_column, ascending=False, na_position='last')
59
 
60
+ total_rows = len(df)
61
+ total_pages = max(1, (total_rows + PAGE_SIZE - 1) // PAGE_SIZE)
62
  current_page = max(1, min(current_page, total_pages))
63
+ start = (current_page - 1) * PAGE_SIZE
64
+ end = start + PAGE_SIZE
65
 
66
+ return df.iloc[start:end], current_page, total_pages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  def search_model(model_query):
 
70
  if not model_query or len(model_query) < 2:
71
  return """
72
  <div class="no-results">
 
85
  </div>
86
  """
87
 
 
88
  model_name = list(results.keys())[0]
89
  model_data = results[model_name]
90
 
 
92
 
93
 
94
  def compare_models(selected_models):
95
+ if not selected_models:
 
96
  return """
97
  <div class="no-results">
98
  <h3>Select models to compare</h3>
99
  <p>Choose multiple models from the dropdown to see a side-by-side comparison</p>
100
  </div>
101
+ """, None
102
 
 
103
  all_results = {}
104
  for model_name in selected_models:
105
  results, _ = search_model_across_leaderboards(model_name)
106
  if results:
 
107
  matched_model = list(results.keys())[0]
108
  all_results[matched_model] = results[matched_model]
109
+
110
+ plot = create_radar_plot(list(all_results.keys()), all_results)
111
 
112
  if len(all_results) == 1:
 
113
  model_name = list(all_results.keys())[0]
114
+ return format_model_card(model_name, all_results[model_name]), plot
115
  elif len(all_results) > 1:
116
+ return format_model_comparison(list(all_results.keys()), all_results), plot
 
117
  else:
118
  return """
119
  <div class="no-results">
120
  <h3>No results found</h3>
121
  <p>Try selecting different models</p>
122
  </div>
123
+ """, None
124
 
125
 
126
  def get_model_suggestions(query):
 
127
  if not query or len(query) < 2:
128
  return gr.update(choices=[])
129
 
 
131
  return gr.update(choices=matches[:15])
132
 
133
 
 
134
  load_hf_dataset_on_startup()
135
 
136
+ initial_leaderboards = get_available_leaderboards()
137
+ initial_leaderboard = initial_leaderboards[0] if initial_leaderboards else None
138
+
139
+ if initial_leaderboard:
140
+ _init_df, _init_metadata = get_leaderboard_data(initial_leaderboard)
141
+ _init_columns = [c for c in _init_df.columns if c != "Model"] if not _init_df.empty else []
142
+ _init_df_display, _, _init_total_pages = filter_and_paginate(_init_df, "", "Average", None, 1)
143
+ else:
144
+ _init_df = pd.DataFrame()
145
+ _init_metadata = {}
146
+ _init_columns = []
147
+ _init_df_display = pd.DataFrame()
148
+ _init_total_pages = 1
149
+
150
  with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css()) as demo:
151
 
152
+ full_df_state = gr.State(value=_init_df)
153
+ metadata_state = gr.State(value=_init_metadata)
154
+ current_page_state = gr.State(value=1)
155
+
156
  gr.HTML("""
157
  <div class="app-header">
158
  <div class="logo-mark">EΒ³</div>
 
167
  """)
168
 
169
  with gr.Tabs():
170
+ with gr.TabItem("Leaderboards"):
171
+ with gr.Column(elem_classes="controls-bar"):
172
+ with gr.Row():
173
+ with gr.Column(scale=4, min_width=260):
174
+ leaderboard_selector = gr.Dropdown(
175
+ choices=initial_leaderboards,
176
+ value=initial_leaderboard,
177
+ label="Leaderboard",
178
+ interactive=True
179
+ )
180
+ with gr.Column(scale=1, min_width=120):
181
+ refresh_btn = gr.Button("↻ Refresh", variant="secondary", size="sm")
182
+
183
+ search_box = gr.Textbox(
184
+ label="Filter",
185
+ placeholder="Filter models...",
186
+ show_label=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  )
188
 
189
+ header_view = gr.HTML(value=format_leaderboard_header(initial_leaderboard, _init_metadata))
190
+
191
+ with gr.Row(elem_classes="column-selector-bar"):
192
+ with gr.Column(scale=5, min_width=320):
193
+ column_selector = gr.Dropdown(
194
+ choices=_init_columns,
195
+ value=_init_columns,
196
+ label="Columns to Display",
197
+ multiselect=True,
198
+ interactive=True,
199
+ elem_classes="column-selector-dropdown"
200
+ )
201
+
202
  leaderboard_table = gr.Dataframe(
203
+ value=_init_df_display,
204
  label=None,
205
  interactive=False,
206
  wrap=False,
207
  elem_classes="dataframe",
208
  )
209
 
 
210
  with gr.Row(elem_classes="pagination-bar"):
211
  prev_btn = gr.Button("←", variant="secondary", size="sm", min_width=60)
212
+ page_info = gr.Markdown(value=f"1 / {_init_total_pages}", elem_classes="page-info")
213
  next_btn = gr.Button("β†’", variant="secondary", size="sm", min_width=60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
+ metrics_view = gr.HTML(value=format_metric_details(initial_leaderboard, _init_metadata))
216
 
 
217
  with gr.TabItem("πŸ” Model Lookup"):
218
  gr.Markdown("### Find and compare models across all leaderboards")
219
 
 
246
  elem_classes="selected-models-group"
247
  )
248
 
249
+ radar_view = gr.Plot(label="Radar Comparison")
250
  model_card_view = gr.HTML(value=default_compare_html)
251
 
 
252
  with gr.Accordion("πŸ“€ How to Submit Data", open=False):
253
  gr.Markdown("""
254
+ Submit via GitHub Pull Request:
 
255
  1. Fork [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
256
  2. Add JSON files to `data/<leaderboard>/<developer>/<model>/`
257
+ 3. Open a PR - automated validation runs on submission
258
  4. After merge, data syncs to HuggingFace automatically
259
 
260
+ [Submission Guide](https://github.com/evaleval/every_eval_ever#contributor-guide) - [JSON Schema](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json)
261
  """)
262
 
263
+ def load_leaderboard(leaderboard_name):
264
+ df, metadata = get_leaderboard_data(leaderboard_name)
265
+ columns = [c for c in df.columns if c != "Model"] if not df.empty else []
266
+ df_display, page, total_pages = filter_and_paginate(df, "", "Average", None, 1)
267
+
268
+ return (
269
+ df, # full_df_state
270
+ metadata, # metadata_state
271
+ 1, # current_page_state
272
+ df_display, # leaderboard_table
273
+ format_leaderboard_header(leaderboard_name, metadata), # header_view
274
+ format_metric_details(leaderboard_name, metadata), # metrics_view
275
+ gr.update(choices=columns, value=columns), # column_selector
276
+ f"1 / {total_pages}", # page_info
277
+ )
278
 
279
+ def update_table(full_df, search_query, selected_columns, current_page):
280
+ df_display, page, total_pages = filter_and_paginate(
281
+ full_df, search_query, "Average", selected_columns, current_page
282
+ )
283
+ return df_display, f"{page} / {total_pages}", page
284
 
285
+ def go_page(full_df, search_query, selected_columns, current_page, delta):
286
+ new_page = max(1, current_page + delta)
287
+ df_display, page, total_pages = filter_and_paginate(
288
+ full_df, search_query, "Average", selected_columns, new_page
289
+ )
290
+ return df_display, f"{page} / {total_pages}", page
291
 
 
292
  leaderboard_selector.change(
293
+ fn=load_leaderboard,
294
+ inputs=[leaderboard_selector],
295
+ outputs=[full_df_state, metadata_state, current_page_state, leaderboard_table, header_view, metrics_view, column_selector, page_info]
 
 
 
 
 
 
296
  )
297
 
298
  search_box.input(
299
+ fn=lambda df, q, cols: update_table(df, q, cols, 1),
300
+ inputs=[full_df_state, search_box, column_selector],
301
+ outputs=[leaderboard_table, page_info, current_page_state]
 
 
302
  )
303
 
304
+ def on_column_change(df, q, cols):
305
+ if not cols:
306
+ cols = [c for c in df.columns if c != "Model"]
307
+ return update_table(df, q, cols, 1)
 
 
 
 
 
 
 
308
 
309
  column_selector.change(
310
+ fn=on_column_change,
311
+ inputs=[full_df_state, search_box, column_selector],
312
+ outputs=[leaderboard_table, page_info, current_page_state]
 
 
 
 
 
 
 
 
 
 
 
 
313
  )
314
 
315
  prev_btn.click(
316
+ fn=lambda df, q, cols, p: go_page(df, q, cols, p, -1),
317
+ inputs=[full_df_state, search_box, column_selector, current_page_state],
318
+ outputs=[leaderboard_table, page_info, current_page_state]
 
 
319
  )
320
 
321
  next_btn.click(
322
+ fn=lambda df, q, cols, p: go_page(df, q, cols, p, 1),
323
+ inputs=[full_df_state, search_box, column_selector, current_page_state],
324
+ outputs=[leaderboard_table, page_info, current_page_state]
 
 
325
  )
326
 
327
  refresh_btn.click(
328
+ fn=lambda: (clear_cache(), gr.update(choices=get_available_leaderboards()))[1],
329
  outputs=[leaderboard_selector]
 
 
 
 
 
 
 
 
 
 
 
 
330
  )
331
 
 
332
  def add_model_and_compare(selected_model, current_selected):
 
333
  if not selected_model:
334
+ comparison_html, plot = compare_models(current_selected) if current_selected else (default_compare_html, None)
335
  return (
336
  current_selected,
337
  gr.update(value=None),
338
  gr.update(choices=current_selected, value=current_selected),
339
+ comparison_html,
340
+ plot
341
  )
342
 
 
 
 
343
  if selected_model not in current_selected:
344
  current_selected = current_selected + [selected_model]
345
 
346
+ comparison_html, plot = compare_models(current_selected)
347
 
348
  return (
349
  current_selected,
350
  gr.update(value=None),
351
  gr.update(choices=current_selected, value=current_selected),
352
+ comparison_html,
353
+ plot
354
  )
355
 
356
  def update_selection(selected_list):
357
+ comparison_html, plot = compare_models(selected_list) if selected_list else (default_compare_html, None)
358
+ return selected_list, gr.update(choices=selected_list, value=selected_list), comparison_html, plot
 
 
359
 
360
  def clear_all_models():
 
361
  return (
362
  [],
363
  gr.update(value=None),
364
  gr.update(choices=[], value=[]),
365
+ default_compare_html,
366
+ None
367
  )
368
 
 
369
  model_dropdown.select(
370
  fn=add_model_and_compare,
371
  inputs=[model_dropdown, selected_models_state],
372
+ outputs=[selected_models_state, model_dropdown, selected_models_group, model_card_view, radar_view]
373
  )
374
 
375
  selected_models_group.change(
376
  fn=update_selection,
377
  inputs=[selected_models_group],
378
+ outputs=[selected_models_state, selected_models_group, model_card_view, radar_view]
379
  )
380
 
381
  clear_models_btn.click(
382
  fn=clear_all_models,
383
+ outputs=[selected_models_state, model_dropdown, selected_models_group, model_card_view, radar_view]
384
  )
385
 
386
  DATA_DIR.mkdir(exist_ok=True)
pyproject.toml CHANGED
@@ -1,10 +1,16 @@
1
  [project]
2
- name = "eee-test"
3
- version = "0.1.0"
4
- description = "Add your description here"
 
 
 
 
5
  readme = "README.md"
6
- requires-python = ">=3.11"
7
  dependencies = [
8
- "gradio>=5.49.1",
 
9
  "pandas>=2.3.2",
 
10
  ]
 
1
  [project]
2
+ authors = [
3
+ { name = "Sree Harsha Nelaturu", email = "nelaturu.harsha@gmail.com" },
4
+ { name = "Every Eval Ever Team"}
5
+ ]
6
+ name = "e3_space"
7
+ version = "0.1.1"
8
+ description = "Space for every eval ever in the EvalEval Coalition."
9
  readme = "README.md"
10
+ requires-python = ">=3.13"
11
  dependencies = [
12
+ "datasets>=4.4.1",
13
+ "gradio>=6.1.0",
14
  "pandas>=2.3.2",
15
+ "plotly>=6.5.0",
16
  ]
requirements.txt ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o requirements.txt
3
+ aiofiles==24.1.0
4
+ # via gradio
5
+ aiohappyeyeballs==2.6.1
6
+ # via aiohttp
7
+ aiohttp==3.13.2
8
+ # via fsspec
9
+ aiosignal==1.4.0
10
+ # via aiohttp
11
+ annotated-doc==0.0.4
12
+ # via fastapi
13
+ annotated-types==0.7.0
14
+ # via pydantic
15
+ anyio==4.12.0
16
+ # via
17
+ # gradio
18
+ # httpx
19
+ # starlette
20
+ attrs==25.4.0
21
+ # via aiohttp
22
+ audioop-lts==0.2.2
23
+ # via gradio
24
+ brotli==1.2.0
25
+ # via gradio
26
+ certifi==2025.11.12
27
+ # via
28
+ # httpcore
29
+ # httpx
30
+ # requests
31
+ charset-normalizer==3.4.4
32
+ # via requests
33
+ click==8.3.1
34
+ # via
35
+ # typer
36
+ # typer-slim
37
+ # uvicorn
38
+ datasets==4.4.1
39
+ # via e3-space (pyproject.toml)
40
+ dill==0.4.0
41
+ # via
42
+ # datasets
43
+ # multiprocess
44
+ fastapi==0.124.2
45
+ # via gradio
46
+ ffmpy==1.0.0
47
+ # via gradio
48
+ filelock==3.20.0
49
+ # via
50
+ # datasets
51
+ # huggingface-hub
52
+ frozenlist==1.8.0
53
+ # via
54
+ # aiohttp
55
+ # aiosignal
56
+ fsspec==2025.10.0
57
+ # via
58
+ # datasets
59
+ # gradio-client
60
+ # huggingface-hub
61
+ gradio==6.1.0
62
+ # via e3-space (pyproject.toml)
63
+ gradio-client==2.0.1
64
+ # via gradio
65
+ groovy==0.1.2
66
+ # via gradio
67
+ h11==0.16.0
68
+ # via
69
+ # httpcore
70
+ # uvicorn
71
+ hf-xet==1.2.0
72
+ # via huggingface-hub
73
+ httpcore==1.0.9
74
+ # via httpx
75
+ httpx==0.28.1
76
+ # via
77
+ # datasets
78
+ # gradio
79
+ # gradio-client
80
+ # huggingface-hub
81
+ # safehttpx
82
+ huggingface-hub==1.2.2
83
+ # via
84
+ # datasets
85
+ # gradio
86
+ # gradio-client
87
+ idna==3.11
88
+ # via
89
+ # anyio
90
+ # httpx
91
+ # requests
92
+ # yarl
93
+ jinja2==3.1.6
94
+ # via gradio
95
+ markdown-it-py==4.0.0
96
+ # via rich
97
+ markupsafe==3.0.3
98
+ # via
99
+ # gradio
100
+ # jinja2
101
+ mdurl==0.1.2
102
+ # via markdown-it-py
103
+ multidict==6.7.0
104
+ # via
105
+ # aiohttp
106
+ # yarl
107
+ multiprocess==0.70.18
108
+ # via datasets
109
+ narwhals==2.13.0
110
+ # via plotly
111
+ numpy==2.3.5
112
+ # via
113
+ # datasets
114
+ # gradio
115
+ # pandas
116
+ orjson==3.11.5
117
+ # via gradio
118
+ packaging==25.0
119
+ # via
120
+ # datasets
121
+ # gradio
122
+ # gradio-client
123
+ # huggingface-hub
124
+ # plotly
125
+ pandas==2.3.3
126
+ # via
127
+ # e3-space (pyproject.toml)
128
+ # datasets
129
+ # gradio
130
+ pillow==12.0.0
131
+ # via gradio
132
+ plotly==6.5.0
133
+ # via e3-space (pyproject.toml)
134
+ propcache==0.4.1
135
+ # via
136
+ # aiohttp
137
+ # yarl
138
+ pyarrow==22.0.0
139
+ # via datasets
140
+ pydantic==2.12.4
141
+ # via
142
+ # fastapi
143
+ # gradio
144
+ pydantic-core==2.41.5
145
+ # via pydantic
146
+ pydub==0.25.1
147
+ # via gradio
148
+ pygments==2.19.2
149
+ # via rich
150
+ python-dateutil==2.9.0.post0
151
+ # via pandas
152
+ python-multipart==0.0.20
153
+ # via gradio
154
+ pytz==2025.2
155
+ # via pandas
156
+ pyyaml==6.0.3
157
+ # via
158
+ # datasets
159
+ # gradio
160
+ # huggingface-hub
161
+ requests==2.32.5
162
+ # via datasets
163
+ rich==14.2.0
164
+ # via typer
165
+ safehttpx==0.1.7
166
+ # via gradio
167
+ semantic-version==2.10.0
168
+ # via gradio
169
+ shellingham==1.5.4
170
+ # via
171
+ # huggingface-hub
172
+ # typer
173
+ six==1.17.0
174
+ # via python-dateutil
175
+ starlette==0.50.0
176
+ # via
177
+ # fastapi
178
+ # gradio
179
+ tomlkit==0.13.3
180
+ # via gradio
181
+ tqdm==4.67.1
182
+ # via
183
+ # datasets
184
+ # huggingface-hub
185
+ typer==0.20.0
186
+ # via gradio
187
+ typer-slim==0.20.0
188
+ # via huggingface-hub
189
+ typing-extensions==4.15.0
190
+ # via
191
+ # fastapi
192
+ # gradio
193
+ # gradio-client
194
+ # huggingface-hub
195
+ # pydantic
196
+ # pydantic-core
197
+ # typer
198
+ # typer-slim
199
+ # typing-inspection
200
+ typing-inspection==0.4.2
201
+ # via pydantic
202
+ tzdata==2025.2
203
+ # via pandas
204
+ urllib3==2.6.1
205
+ # via requests
206
+ uvicorn==0.38.0
207
+ # via gradio
208
+ xxhash==3.6.0
209
+ # via datasets
210
+ yarl==1.22.0
211
+ # via aiohttp
ui_components.py CHANGED
@@ -1,1150 +1,783 @@
1
- """
2
- UI Components: Themes, CSS, and HTML formatters for the Gradio interface.
3
- Nord color theme with balanced contrast.
4
- """
5
  import gradio as gr
 
 
6
 
7
 
8
  def get_theme():
9
- """Returns the Nord-themed Gradio theme, locked to dark mode."""
10
  return gr.themes.Base(
11
  primary_hue="blue",
12
  neutral_hue="slate",
13
- font=[gr.themes.GoogleFont("DM Sans"), "system-ui", "sans-serif"],
14
- font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
15
  ).set(
16
- body_background_fill="#2E3440",
17
- body_background_fill_dark="#2E3440",
18
- body_text_color="#ECEFF4",
19
- body_text_color_dark="#ECEFF4",
20
- body_text_color_subdued="#4C566A",
21
- body_text_color_subdued_dark="#4C566A",
22
- block_background_fill="#3B4252",
23
- block_background_fill_dark="#3B4252",
24
- block_border_width="1px",
25
- block_border_color="#434C5E",
26
- block_border_color_dark="#434C5E",
27
- block_label_text_color="#D8DEE9",
28
- block_label_text_color_dark="#D8DEE9",
29
- block_title_text_color="#ECEFF4",
30
- block_title_text_color_dark="#ECEFF4",
31
- input_background_fill="#2E3440",
32
- input_background_fill_dark="#2E3440",
33
- input_border_color="#4C566A",
34
- input_border_color_dark="#4C566A",
35
- button_primary_background_fill="#88C0D0",
36
- button_primary_background_fill_dark="#88C0D0",
37
- button_primary_text_color="#2E3440",
38
- button_primary_text_color_dark="#2E3440",
39
- button_secondary_background_fill="#434C5E",
40
- button_secondary_background_fill_dark="#434C5E",
41
- button_secondary_text_color="#ECEFF4",
42
- button_secondary_text_color_dark="#ECEFF4",
43
  )
44
 
45
 
46
  def get_custom_css():
47
- """Returns custom CSS with Nord colors."""
48
  return """
49
- /* === Nord Theme ===
50
- Polar Night: #2E3440 (bg), #3B4252 (surface), #434C5E, #4C566A
51
- Snow Storm: #D8DEE9, #E5E9F0, #ECEFF4
52
- Frost: #8FBCBB, #88C0D0, #81A1C1, #5E81AC
53
- Aurora: #BF616A, #D08770, #EBCB8B, #A3BE8C, #B48EAD
54
- */
55
-
56
- /* Lock the UI to dark Nord regardless of OS preference */
57
  :root {
58
- color-scheme: dark;
59
- background-color: #2E3440;
 
 
 
60
  }
61
 
62
- body {
63
- background: #2E3440 !important;
64
- color: #ECEFF4 !important;
65
  }
66
 
67
- /* === Base === */
68
  .gradio-container {
69
- max-width: 100% !important;
70
- margin: 0 !important;
71
- padding: 1.25rem 2.5rem 2rem !important;
72
- background: #2E3440 !important;
73
- color: #ECEFF4 !important;
74
- font-family: 'DM Sans', system-ui, sans-serif !important;
75
- font-size: 16px !important;
 
 
 
 
 
 
 
76
  }
77
 
78
- /* === Header === */
79
  .app-header {
80
  display: flex;
81
  align-items: center;
82
  gap: 1rem;
83
  margin-bottom: 1.5rem;
84
- padding: 1.25rem 1.5rem;
85
- background: #3B4252;
86
- border: 1px solid #434C5E;
87
  border-radius: 12px;
88
  }
89
 
90
- .app-header .logo-mark {
91
  width: 48px;
92
  height: 48px;
93
- background: linear-gradient(135deg, #88C0D0 0%, #81A1C1 100%);
94
  border-radius: 12px;
95
  display: flex;
96
  align-items: center;
97
  justify-content: center;
98
  font-weight: 800;
99
  font-size: 1.1rem;
100
- color: #2E3440;
101
- }
102
-
103
- .app-header .brand {
104
- display: flex;
105
- flex-direction: column;
106
- gap: 0.125rem;
107
- }
108
-
109
- .app-header h1 {
110
- margin: 0;
111
- font-size: 1.5rem;
112
- font-weight: 700;
113
- color: #ECEFF4;
114
- letter-spacing: -0.02em;
115
- }
116
-
117
- .app-header .tagline {
118
- color: #D8DEE9;
119
- font-size: 0.85rem;
120
- }
121
-
122
- .app-header .header-right {
123
- margin-left: auto;
124
- display: flex;
125
- align-items: center;
126
- gap: 0.75rem;
127
- }
128
-
129
- .app-header .version-badge {
130
- background: rgba(136, 192, 208, 0.2);
131
- border: 1px solid rgba(136, 192, 208, 0.4);
132
- border-radius: 6px;
133
- padding: 0.25rem 0.625rem;
134
- font-size: 0.7rem;
135
- font-family: 'JetBrains Mono', monospace;
136
- color: #88C0D0;
137
- }
138
-
139
- /* === Tabs === */
140
- .tabs {
141
- border: none !important;
142
- background: transparent !important;
143
- }
144
-
145
- .tab-nav {
146
- background: #3B4252 !important;
147
- border: 1px solid #434C5E !important;
148
- border-radius: 10px !important;
149
- padding: 0.25rem !important;
150
- gap: 0.25rem !important;
151
- margin-bottom: 1.25rem !important;
152
- display: inline-flex !important;
153
- }
154
-
155
- .tab-nav button {
156
- background: transparent !important;
157
- border: none !important;
158
- color: #D8DEE9 !important;
159
- padding: 0.75rem 1.5rem !important;
160
- font-size: 0.95rem !important;
161
- font-weight: 500 !important;
162
- border-radius: 8px !important;
163
- transition: all 0.15s ease !important;
164
- }
165
-
166
- .tab-nav button.selected {
167
- color: #2E3440 !important;
168
- background: #88C0D0 !important;
169
- }
170
-
171
- .tab-nav button:hover:not(.selected) {
172
- background: #434C5E !important;
173
- color: #ECEFF4 !important;
174
  }
175
 
176
- .tabitem {
177
- background: transparent !important;
178
- border: none !important;
179
- padding: 0 !important;
180
- }
181
 
182
- /* === Controls bar === */
183
- .controls-bar {
184
- background: #3B4252 !important;
185
- border: 1px solid #434C5E !important;
186
- border-radius: 10px !important;
187
- padding: 0.75rem 1.25rem !important;
188
- margin-bottom: 1rem !important;
189
- gap: 0.75rem !important;
190
- }
191
-
192
- .controls-bar label {
193
- font-size: 0.75rem !important;
194
- text-transform: uppercase !important;
195
- letter-spacing: 0.04em !important;
196
- color: #D8DEE9 !important;
197
- font-weight: 500 !important;
198
  }
199
 
200
- /* === Info banner === */
201
  .info-banner {
202
- background: #3B4252 !important;
203
- border: 1px solid #434C5E !important;
204
- border-left: 3px solid #88C0D0 !important;
205
- border-radius: 0 10px 10px 0 !important;
206
- padding: 0.75rem 1rem !important;
207
- margin-bottom: 1rem !important;
208
  }
209
 
210
- .info-banner h3 {
211
- margin: 0;
212
- font-size: 1.1rem;
213
- font-weight: 600;
214
- color: #ECEFF4;
215
- }
216
 
217
- .info-banner .eval-tags {
218
  display: flex;
 
 
 
219
  flex-wrap: wrap;
220
- gap: 0.375rem;
221
- }
222
-
223
- .info-banner .eval-tag {
224
- background: rgba(143, 188, 187, 0.15);
225
- border: 1px solid rgba(143, 188, 187, 0.3);
226
- border-radius: 4px;
227
- padding: 0.3rem 0.6rem;
228
- font-size: 0.8rem;
229
- font-family: 'JetBrains Mono', monospace;
230
- color: #8FBCBB;
231
- }
232
-
233
- /* === Dataframe - seamless styling === */
234
- .dataframe,
235
- .dataframe > div,
236
- .dataframe > div > div,
237
- .dataframe .table-wrap,
238
- .dataframe .svelte-1gfkn6j {
239
- background: #2E3440 !important;
240
- border: none !important;
241
- box-shadow: none !important;
242
- border-radius: 0 !important;
243
- }
244
-
245
- .dataframe table {
246
- width: 100% !important;
247
- border-collapse: collapse !important;
248
- font-size: 0.95rem !important;
249
- table-layout: auto !important;
250
- background: #2E3440 !important;
251
- }
252
-
253
- .dataframe thead,
254
- .dataframe thead tr {
255
- background: #2E3440 !important;
256
- position: sticky;
257
- top: 0;
258
- z-index: 10;
259
  }
260
 
261
- .dataframe thead th {
262
- padding: 0.875rem 1rem !important;
263
- font-weight: 600 !important;
264
- font-size: 0.75rem !important;
265
- text-transform: uppercase !important;
266
- letter-spacing: 0.05em !important;
267
- color: #81A1C1 !important;
268
- border-bottom: 1px solid #434C5E !important;
269
- border-top: none !important;
270
- text-align: left !important;
271
- background: #2E3440 !important;
272
- }
273
-
274
- .dataframe tbody,
275
- .dataframe tbody tr {
276
- background: #2E3440 !important;
277
- }
278
-
279
- .dataframe tbody tr {
280
- border-bottom: 1px solid #3B4252 !important;
281
- }
282
-
283
- .dataframe tbody tr:hover {
284
- background: rgba(136, 192, 208, 0.04) !important;
285
- }
286
-
287
- .dataframe tbody td {
288
- padding: 0.75rem 1rem !important;
289
- color: #E5E9F0 !important;
290
- background: #2E3440 !important;
291
- overflow: hidden !important;
292
- text-overflow: ellipsis !important;
293
- border: none !important;
294
- }
295
-
296
- /* === Pagination bar === */
297
- .pagination-bar {
298
- margin-top: 1rem !important;
299
- padding: 1rem 0 !important;
300
- border-top: 1px solid #3B4252 !important;
301
- display: flex !important;
302
- justify-content: center !important;
303
- align-items: center !important;
304
- gap: 1rem !important;
305
- }
306
-
307
- .page-info {
308
- font-family: 'JetBrains Mono', monospace !important;
309
- font-size: 1rem !important;
310
- color: #D8DEE9 !important;
311
- min-width: 80px !important;
312
- text-align: center !important;
313
- }
314
-
315
- /* Model name - white, readable */
316
- .dataframe tbody td:first-child {
317
- font-weight: 500 !important;
318
- color: #ECEFF4 !important;
319
- white-space: nowrap !important;
320
- }
321
-
322
- /* All other columns - use monospace for numbers */
323
- .dataframe tbody td:not(:first-child) {
324
- font-family: 'JetBrains Mono', monospace !important;
325
- color: #8FBCBB !important;
326
- text-align: left !important;
327
- }
328
-
329
- .dataframe tbody td:nth-child(2) {
330
- color: #88C0D0 !important;
331
- white-space: nowrap !important;
332
- }
333
-
334
- .dataframe tbody td:nth-child(3) {
335
- color: #D08770 !important;
336
- }
337
-
338
- .dataframe tbody td:nth-child(4) {
339
- font-weight: 600 !important;
340
- color: #A3BE8C !important;
341
- }
342
-
343
- .dataframe tbody td:nth-child(n+5) {
344
- white-space: nowrap !important;
345
  }
346
 
347
- /* === Status text === */
348
- .status-text {
349
- font-size: 0.9rem !important;
350
- color: #D8DEE9 !important;
351
- padding: 0.5rem 0 !important;
352
- font-family: 'JetBrains Mono', monospace !important;
353
  }
354
 
355
- /* === Model Card === */
356
- .model-card-container {
357
  display: flex;
358
  flex-direction: column;
359
- gap: 1.25rem;
360
  }
361
 
362
- .model-card-header {
363
- background: #3B4252;
364
- border: 1px solid #434C5E;
365
- border-radius: 12px;
366
- padding: 1.5rem 2rem;
367
- }
368
 
369
- .model-card-header h2 {
370
- margin: 0 0 0.5rem 0;
371
- font-size: 1.5rem;
 
372
  font-weight: 600;
373
- color: #ECEFF4;
 
 
374
  }
375
 
376
- .model-card-header .model-meta {
377
- display: flex;
378
- gap: 1.5rem;
379
- color: #D8DEE9;
380
- font-size: 0.95rem;
381
- }
382
 
383
- .model-card-header .model-meta strong {
384
- color: #8FBCBB;
 
 
 
 
 
385
  }
386
 
387
- .leaderboard-section {
388
- background: #3B4252;
389
- border: 1px solid #434C5E;
390
- border-radius: 10px;
391
- overflow: hidden;
392
- }
393
 
394
- .leaderboard-section-header {
395
- background: #434C5E;
396
- padding: 1rem 1.25rem;
397
- border-bottom: 1px solid #4C566A;
398
  display: flex;
399
- justify-content: space-between;
400
  align-items: center;
 
401
  }
402
 
403
- .leaderboard-section-header h3 {
404
- margin: 0;
405
- font-size: 1rem;
406
- font-weight: 600;
407
- color: #88C0D0;
408
- }
409
 
410
- .leaderboard-section-header .lb-avg {
411
- background: rgba(163, 190, 140, 0.15);
412
- border: 1px solid rgba(163, 190, 140, 0.3);
413
- border-radius: 8px;
414
- padding: 0.5rem 1rem;
415
- font-size: 0.85rem;
416
- color: #D8DEE9;
417
  }
418
 
419
- .leaderboard-section-header .lb-avg strong {
420
- color: #A3BE8C;
421
- font-family: 'JetBrains Mono', monospace;
422
- font-size: 1.1rem;
423
  font-weight: 700;
424
- }
425
-
426
- .scores-grid {
427
- display: grid;
428
- grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
429
- gap: 1px;
430
- background: #434C5E;
431
- }
432
-
433
- .score-item {
434
- background: #3B4252;
435
- padding: 1rem 1.25rem;
436
- }
437
-
438
- .score-item .score-label {
439
- font-size: 0.8rem;
440
  text-transform: uppercase;
441
  letter-spacing: 0.05em;
442
- color: #D8DEE9;
443
- margin-bottom: 0.375rem;
444
- }
445
-
446
- .score-item .score-value {
447
- font-size: 1.5rem;
448
- font-weight: 600;
449
- font-family: 'JetBrains Mono', monospace;
450
- color: #A3BE8C;
451
- }
452
-
453
- .score-item.highlight .score-value {
454
- color: #88C0D0;
455
- }
456
-
457
- .no-results {
458
- text-align: center;
459
- padding: 3rem 1rem;
460
- color: #D8DEE9;
461
- }
462
-
463
- .no-results h3 {
464
- color: #ECEFF4;
465
- margin-bottom: 0.5rem;
466
- }
467
-
468
-
469
- /* === New Comparison View === */
470
- .comparison-container {
471
- display: flex;
472
- flex-direction: column;
473
- gap: 1.5rem;
474
- }
475
-
476
- .comparison-summary {
477
- background: #3B4252;
478
- border: 1px solid #434C5E;
479
- border-radius: 12px;
480
- padding: 1.5rem;
481
- }
482
-
483
- .comparison-summary h2 {
484
- margin: 0 0 1rem 0;
485
- color: #ECEFF4;
486
- font-size: 1.25rem;
487
- }
488
-
489
- .summary-cards {
490
- display: flex;
491
- gap: 1rem;
492
- flex-wrap: wrap;
493
- }
494
-
495
- .summary-card {
496
- flex: 1;
497
- min-width: 200px;
498
- background: #2E3440;
499
- border-radius: 8px;
500
- padding: 1rem;
501
  }
502
 
503
- .summary-card-header {
504
- display: flex;
505
- align-items: center;
506
- gap: 0.5rem;
507
- margin-bottom: 0.75rem;
508
  }
509
 
510
- .model-dot {
511
- width: 10px;
512
- height: 10px;
513
- border-radius: 50%;
514
  }
515
 
516
- .model-name {
517
- font-weight: 600;
518
- color: #ECEFF4;
519
- font-size: 0.9rem;
520
  overflow: hidden;
521
- text-overflow: ellipsis;
522
- white-space: nowrap;
523
- }
524
-
525
- .summary-card-body {
526
- display: flex;
527
- flex-direction: column;
528
- gap: 0.5rem;
529
  }
530
 
531
- .summary-stat {
532
  display: flex;
533
  justify-content: space-between;
534
  align-items: center;
 
 
535
  }
536
 
537
- .summary-stat .stat-label {
538
- font-size: 0.75rem;
539
- color: #D8DEE9;
540
- text-transform: uppercase;
541
- letter-spacing: 0.05em;
542
  }
543
 
544
- .summary-stat .stat-value {
545
- font-family: 'JetBrains Mono', monospace;
546
- color: #8FBCBB;
547
- }
548
 
549
- .summary-stat.primary .stat-value.large {
550
- font-size: 1.5rem;
551
- font-weight: 700;
552
- color: #A3BE8C;
 
553
  }
554
 
555
- .leaderboard-comparison-card {
556
- background: #3B4252;
557
- border: 1px solid #434C5E;
558
- border-radius: 12px;
559
- overflow: hidden;
560
  }
561
 
562
- .lb-card-header {
563
- background: #434C5E;
564
- padding: 0.875rem 1.25rem;
565
  }
566
 
567
- .lb-card-header h3 {
568
- margin: 0;
569
- color: #88C0D0;
570
- font-size: 1rem;
571
- font-weight: 600;
572
  }
573
 
574
- .lb-card-body {
575
- padding: 1rem 1.25rem;
576
- display: flex;
577
- flex-direction: column;
578
- gap: 0.75rem;
579
- }
580
-
581
- .metric-comparison {
582
- display: flex;
583
- flex-direction: column;
584
- gap: 0.375rem;
585
  }
586
 
587
- .metric-name-row {
588
- margin-bottom: 0.25rem;
 
589
  }
590
 
591
- .metric-title {
592
- font-size: 0.85rem;
593
- font-weight: 600;
594
- color: #ECEFF4;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
  }
596
 
597
- .metric-title.sub {
598
- font-size: 0.75rem;
599
- font-weight: 500;
600
- color: #D8DEE9;
601
- }
602
 
603
- .model-score-row {
604
- display: flex;
605
- align-items: center;
606
- gap: 0.5rem;
607
- padding: 0.375rem 0;
608
- }
609
 
610
- .model-score-row.compact {
611
- padding: 0.25rem 0;
612
- }
613
 
614
- .model-score-row.best-score {
615
- background: rgba(163, 190, 140, 0.1);
616
- border-radius: 4px;
617
- padding-left: 0.5rem;
618
- margin-left: -0.5rem;
619
- }
620
 
621
- .model-score-row.no-data {
622
- opacity: 0.5;
623
  }
624
 
625
- .model-indicator {
626
- width: 8px;
627
- height: 8px;
628
- border-radius: 2px;
629
- flex-shrink: 0;
630
  }
631
 
632
- .model-indicator.small {
633
- width: 6px;
634
- height: 6px;
 
 
635
  }
636
 
637
- .score-bar-container {
638
- flex: 1;
639
- display: flex;
640
- align-items: center;
641
- gap: 0.75rem;
642
- height: 24px;
643
- background: #2E3440;
644
- border-radius: 4px;
645
- padding: 0 0.5rem;
646
- position: relative;
647
  }
648
 
649
- .score-bar {
650
- position: absolute;
651
- left: 0;
652
- top: 0;
653
- bottom: 0;
654
- border-radius: 4px;
655
- opacity: 0.3;
656
  }
657
 
658
- .score-bar.thin {
659
- opacity: 0.2;
 
 
660
  }
661
 
662
- .score-value {
663
- position: relative;
664
- font-family: 'JetBrains Mono', monospace;
665
- font-size: 0.9rem;
666
- font-weight: 600;
667
- color: #ECEFF4;
668
- z-index: 1;
669
  }
670
 
671
- .score-value.small {
672
- font-size: 0.8rem;
673
- font-weight: 500;
 
674
  }
675
 
676
- .score-value.dim {
677
- color: #4C566A;
678
  }
679
 
680
- /* === Selected Models Chips === */
681
- .selected-models-group label {
682
- display: inline-flex !important;
683
- align-items: center !important;
684
- background: #434C5E;
685
- border: 1px solid #4C566A;
686
- border-radius: 16px;
687
- padding: 0.35rem 0.85rem;
688
- font-size: 0.85rem;
689
- color: #ECEFF4;
690
- gap: 0.4rem;
691
- cursor: pointer;
692
- margin: 0.15rem 0.3rem 0.15rem 0 !important;
693
  }
694
 
695
- .selected-models-group label span::before {
696
- content: "Γ—";
697
- font-size: 0.75rem;
698
- color: #EBCB8B;
699
- opacity: 0;
700
- transition: opacity 0.15s ease;
701
  }
702
 
703
- .selected-models-group label:hover span::before {
704
- opacity: 1;
705
  }
706
 
707
- .selected-models-group input[type="checkbox"] {
708
- display: none;
 
 
 
709
  }
710
 
711
- /* === Heat Map Table === */
712
- .heatmap-table-wrapper {
713
- overflow-x: auto;
714
- margin-top: 1rem;
715
  }
716
 
717
- .heatmap-table {
718
- width: 100%;
719
- border-collapse: collapse;
720
- font-size: 0.85rem;
 
 
721
  }
722
 
723
- .heatmap-table thead {
724
- position: sticky;
725
- top: 0;
726
- z-index: 10;
 
727
  }
728
 
729
- .heatmap-table th {
730
- background: #434C5E;
731
- padding: 0.625rem 0.75rem;
732
- font-weight: 600;
733
- font-size: 0.7rem;
734
- text-transform: uppercase;
735
- letter-spacing: 0.05em;
736
- color: #81A1C1;
737
- text-align: left;
738
- border-bottom: 2px solid #4C566A;
739
- white-space: nowrap;
740
  }
741
 
742
- .heatmap-table th.metric-header {
743
- min-width: 120px;
744
  }
745
 
746
- .heatmap-table th.model-header {
747
- text-align: center;
748
- max-width: 150px;
749
- overflow: hidden;
750
- text-overflow: ellipsis;
751
  }
752
 
753
- .heatmap-table td {
754
- padding: 0.5rem 0.75rem;
755
- border-bottom: 1px solid #3B4252;
756
  }
757
 
758
- .heatmap-table td.metric-name {
759
- font-weight: 500;
760
- color: #D8DEE9;
761
- background: #2E3440;
762
  }
763
 
764
- .heatmap-table td.score-cell {
765
- text-align: center;
766
- font-family: 'JetBrains Mono', monospace;
767
- font-weight: 500;
768
- transition: all 0.15s ease;
769
  }
770
 
771
- .heatmap-table td.score-cell.best {
772
- background: rgba(163, 190, 140, 0.25);
773
- color: #A3BE8C;
774
- font-weight: 700;
 
 
 
 
 
 
 
775
  }
776
 
777
- .heatmap-table td.score-cell.good {
778
- background: rgba(163, 190, 140, 0.12);
779
- color: #A3BE8C;
780
  }
781
 
782
- .heatmap-table td.score-cell.mid {
783
- background: rgba(235, 203, 139, 0.12);
784
- color: #EBCB8B;
785
  }
786
 
787
- .heatmap-table td.score-cell.low {
788
- background: rgba(208, 135, 112, 0.12);
789
- color: #D08770;
 
790
  }
791
 
792
- .heatmap-table td.score-cell.worst {
793
- background: rgba(191, 97, 106, 0.15);
794
- color: #BF616A;
 
 
 
 
 
 
795
  }
796
 
797
- .heatmap-table td.score-cell.na {
798
- color: #4C566A;
799
- font-style: italic;
 
800
  }
801
 
802
- .heatmap-table tr.avg-row {
803
- background: rgba(136, 192, 208, 0.08);
804
  }
805
 
806
- .heatmap-table tr.avg-row td.metric-name {
807
- font-weight: 700;
808
- color: #88C0D0;
809
- background: rgba(136, 192, 208, 0.08);
 
 
810
  }
811
 
812
- /* === Buttons === */
813
- button {
814
- border-radius: 8px !important;
815
- font-weight: 500 !important;
816
  font-size: 0.95rem !important;
817
- transition: all 0.15s ease !important;
818
- }
819
-
820
- button.primary {
821
- background: #88C0D0 !important;
822
- color: #2E3440 !important;
823
- border: none !important;
824
- }
825
-
826
- button.primary:hover:not(:disabled) {
827
- background: #8FBCBB !important;
828
- }
829
-
830
- button.secondary,
831
- button[variant="secondary"] {
832
- background: #434C5E !important;
833
- color: #ECEFF4 !important;
834
- border: 1px solid #4C566A !important;
835
- }
836
-
837
- button.secondary:hover:not(:disabled),
838
- button[variant="secondary"]:hover:not(:disabled) {
839
- background: #4C566A !important;
840
  }
841
 
842
- button:disabled {
843
- opacity: 0.35 !important;
 
 
 
 
844
  }
845
 
846
- /* === Inputs === */
847
- input[type="text"],
848
- select {
849
- background: #2E3440 !important;
850
- border: 1px solid #4C566A !important;
851
- border-radius: 8px !important;
852
- color: #ECEFF4 !important;
853
- font-size: 1rem !important;
 
 
 
 
 
854
  }
855
 
856
- input[type="text"]:focus,
857
- select:focus {
858
- border-color: #88C0D0 !important;
859
- box-shadow: 0 0 0 3px rgba(136, 192, 208, 0.15) !important;
 
 
 
 
860
  outline: none !important;
861
  }
862
 
863
- input::placeholder {
864
- color: #4C566A !important;
 
 
 
 
 
 
865
  }
866
 
867
- /* === Accordion === */
868
- .accordion {
869
- background: #3B4252 !important;
870
- border: 1px solid #434C5E !important;
871
- border-radius: 10px !important;
872
- margin-top: 1.5rem !important;
873
  }
874
 
875
- .accordion > .label-wrap {
 
 
 
876
  background: transparent !important;
877
- padding: 1rem 1.25rem !important;
878
- color: #D8DEE9 !important;
879
- font-size: 0.95rem !important;
880
  }
881
 
882
- .accordion > .wrap {
883
- padding: 0.5rem 1.25rem 1.25rem !important;
884
- color: #D8DEE9 !important;
885
- font-size: 0.95rem !important;
886
- line-height: 1.6 !important;
 
 
887
  }
888
 
889
- .accordion code {
890
- background: #434C5E !important;
891
- padding: 0.125rem 0.375rem !important;
 
892
  border-radius: 4px !important;
893
- font-family: 'JetBrains Mono', monospace !important;
894
- font-size: 0.8rem !important;
895
- color: #8FBCBB !important;
 
 
 
 
 
896
  }
897
 
898
- /* === Metrics section === */
899
- .metrics-section {
900
- margin-top: 1.5rem;
901
- padding-top: 1.5rem;
902
- border-top: 1px solid #434C5E;
903
  }
904
 
905
- .metrics-section h3 {
906
- font-size: 0.85rem;
907
- font-weight: 600;
908
- color: #D8DEE9;
909
- margin: 0 0 1rem 0;
910
- text-transform: uppercase;
911
- letter-spacing: 0.05em;
912
  }
913
 
914
- .metrics-grid {
915
- display: grid;
916
- grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
917
- gap: 0.75rem;
918
  }
919
 
920
- .metric-card {
921
- background: #3B4252;
922
- border: 1px solid #434C5E;
923
- border-radius: 8px;
924
- overflow: hidden;
925
  }
926
 
927
- .metric-card-header {
928
- display: flex;
929
- justify-content: space-between;
930
- align-items: center;
931
- padding: 0.75rem 1rem;
932
- cursor: pointer;
933
- list-style: none;
934
  }
935
 
936
- .metric-card-header::-webkit-details-marker {
937
- display: none;
 
938
  }
939
 
940
- .metric-card-name {
941
- font-weight: 500;
942
- font-size: 0.95rem;
943
- color: #ECEFF4;
944
  }
945
 
946
- .metric-card-direction {
947
- font-size: 0.8rem;
948
- color: #D8DEE9;
949
  }
950
 
951
- .metric-card-direction .arrow {
952
- color: #A3BE8C;
953
- font-weight: 600;
 
 
 
954
  }
955
 
956
- .metric-card-body {
957
- padding: 0.875rem 1.25rem;
958
- border-top: 1px solid #434C5E;
959
- font-size: 0.9rem;
960
- color: #D8DEE9;
961
- line-height: 1.5;
 
962
  }
963
 
964
- .metric-type-badge {
965
- font-size: 0.65rem;
966
- text-transform: uppercase;
967
- letter-spacing: 0.05em;
968
- padding: 0.15rem 0.4rem;
969
- background: rgba(180, 142, 173, 0.2);
970
- border: 1px solid rgba(180, 142, 173, 0.35);
971
- border-radius: 4px;
972
- color: #B48EAD;
973
- font-family: 'JetBrains Mono', monospace;
974
  }
975
 
976
- /* === Scrollbar === */
977
- ::-webkit-scrollbar {
978
- width: 8px;
979
- height: 8px;
980
  }
981
 
982
- ::-webkit-scrollbar-track {
983
- background: #2E3440;
984
  }
985
 
986
- ::-webkit-scrollbar-thumb {
987
- background: #4C566A;
988
- border-radius: 4px;
 
 
989
  }
990
 
991
- ::-webkit-scrollbar-thumb:hover {
992
- background: #5E81AC;
 
 
 
993
  }
994
 
995
- /* === Responsive === */
996
- @media (max-width: 768px) {
997
- .gradio-container {
998
- padding: 1rem !important;
999
- }
1000
-
1001
- .scores-grid {
1002
- grid-template-columns: repeat(2, 1fr);
1003
- }
1004
  }
1005
 
1006
- /* === Overrides === */
1007
- .gradio-container footer {
1008
- display: none !important;
 
 
 
1009
  }
1010
 
1011
- .block {
1012
- background: #3B4252 !important;
 
1013
  }
1014
 
1015
- .gradio-radio label {
1016
- background: #434C5E !important;
1017
- border: 1px solid #4C566A !important;
1018
- color: #ECEFF4 !important;
1019
- border-radius: 8px !important;
1020
- font-size: 0.85rem !important;
1021
  }
1022
 
1023
- .gradio-radio label.selected {
1024
- background: #88C0D0 !important;
1025
- border-color: #88C0D0 !important;
1026
- color: #2E3440 !important;
1027
- }
 
 
 
 
 
 
 
 
1028
  """
1029
 
1030
 
1031
  def format_leaderboard_header(selected_leaderboard, metadata):
1032
- """Formats the leaderboard header info section."""
1033
  if not selected_leaderboard:
1034
- return """
1035
- <div style="text-align: center; padding: 2rem 1rem; color: #D8DEE9;">
1036
- <div style="font-size: 1.1rem;">Select a leaderboard to explore</div>
1037
- </div>
1038
- """
1039
 
1040
  if not metadata or not metadata.get("evals"):
1041
- return f"""
1042
- <div class="info-banner">
1043
- <h3>{selected_leaderboard}</h3>
1044
- </div>
1045
- """
1046
 
1047
  source_info = metadata.get("source_info", {})
1048
  org = source_info.get("organization", "Unknown")
1049
  url = source_info.get("url", "#")
1050
- eval_names = list(metadata["evals"].keys())
1051
 
1052
  eval_tags = "".join([f'<span class="eval-tag">{name}</span>' for name in eval_names])
1053
 
1054
- return f"""
1055
  <div class="info-banner">
1056
- <div style="display: flex; justify-content: space-between; align-items: center; gap: 1rem;">
1057
- <div style="display: flex; align-items: center; gap: 1rem; flex-wrap: wrap;">
1058
- <h3 style="margin: 0;">{selected_leaderboard}</h3>
1059
- <span style="color: #D8DEE9; font-size: 0.8rem;">by {org}</span>
1060
- <div class="eval-tags" style="margin: 0;">{eval_tags}</div>
1061
  </div>
1062
- <a href="{url}" target="_blank" style="
1063
- font-size: 0.75rem;
1064
- color: #88C0D0;
1065
- text-decoration: none;
1066
- padding: 0.375rem 0.75rem;
1067
- border: 1px solid rgba(136, 192, 208, 0.4);
1068
- border-radius: 6px;
1069
- white-space: nowrap;
1070
- ">Source β†’</a>
1071
  </div>
 
1072
  </div>
1073
- """
1074
 
1075
 
1076
  def format_metric_details(selected_leaderboard, metadata):
1077
- """Formats metric detail cards."""
1078
  if not selected_leaderboard or not metadata or not metadata.get("evals"):
1079
  return ""
1080
-
1081
  evals = metadata.get("evals", {})
1082
-
1083
- html = """
1084
- <div class="metrics-section">
1085
- <h3>Metric Reference</h3>
1086
- <div class="metrics-grid">
1087
- """
1088
-
1089
- for eval_name, info in evals.items():
1090
- score_type = info['score_type'].upper() if info.get('score_type') else "β€”"
1091
  direction = "Lower is better" if info.get('lower_is_better') else "Higher is better"
1092
  arrow = "↓" if info.get('lower_is_better') else "↑"
1093
-
1094
  details = ""
1095
  if info.get('score_type') == "continuous" and info.get('min_score') is not None:
1096
  details = f"Range: [{info['min_score']} – {info['max_score']}]"
1097
  elif info.get('score_type') == "levels" and info.get('level_names'):
1098
  details = f"Levels: {', '.join(str(l) for l in info['level_names'])}"
1099
-
1100
- html += f"""
1101
- <details class="metric-card">
1102
- <summary class="metric-card-header">
 
 
1103
  <span class="metric-card-name">{eval_name}</span>
1104
  <span class="metric-card-direction"><span class="arrow">{arrow}</span> {direction}</span>
1105
- </summary>
1106
  <div class="metric-card-body">
1107
  <div>{info.get('description', 'No description')}</div>
1108
  <div style="display: flex; justify-content: space-between; align-items: center; margin-top: 0.5rem;">
1109
- <span style="font-size: 0.75rem; color: #D8DEE9;">{details}</span>
1110
  <span class="metric-type-badge">{score_type}</span>
1111
  </div>
1112
  </div>
1113
- </details>
1114
- """
1115
-
1116
- html += "</div></div>"
1117
- return html
 
 
 
 
1118
 
1119
 
1120
  def format_model_card(model_name, model_data):
1121
- """Formats a model card showing all evals across leaderboards."""
1122
  if not model_data:
1123
- return """
1124
- <div class="no-results">
1125
- <h3>No results found</h3>
1126
- <p>Try searching for a different model name</p>
1127
- </div>
1128
- """
1129
 
1130
  first = list(model_data.values())[0]
1131
  developer = first.get("developer", "Unknown")
1132
  params = first.get("params")
1133
  arch = first.get("architecture", "Unknown")
1134
-
1135
  params_str = f"{params}B" if params else "β€”"
1136
 
1137
- html = f"""
1138
- <div class="model-card-container">
1139
- <div class="model-card-header">
1140
- <h2>{model_name}</h2>
1141
- <div class="model-meta">
1142
- <span><strong>Developer:</strong> {developer}</span>
1143
- <span><strong>Parameters:</strong> {params_str}</span>
1144
- <span><strong>Architecture:</strong> {arch}</span>
1145
- </div>
1146
  </div>
1147
- """
1148
 
1149
  for leaderboard_name, data in model_data.items():
1150
  results = data.get("results", {})
@@ -1154,221 +787,197 @@ def format_model_card(model_name, model_data):
1154
  scores = [v for v in results.values() if v is not None]
1155
  avg = sum(scores) / len(scores) if scores else None
1156
  avg_str = f"{avg:.2f}" if avg else "β€”"
1157
-
1158
- html += f"""
1159
- <div class="leaderboard-section">
1160
- <div class="leaderboard-section-header">
1161
- <h3>{leaderboard_name}</h3>
1162
- <span class="lb-avg">Avg: <strong>{avg_str}</strong></span>
1163
- </div>
1164
- <div class="scores-grid">
1165
- """
1166
 
1167
- sorted_results = sorted(results.items(), key=lambda x: x[1] if x[1] is not None else 0, reverse=True)
 
1168
 
1169
- for i, (metric_name, score) in enumerate(sorted_results):
1170
  score_display = f"{score:.2f}" if score is not None else "β€”"
1171
- highlight_class = "highlight" if i == 0 else ""
1172
-
1173
- html += f"""
1174
- <div class="score-item {highlight_class}">
1175
- <div class="score-label">{metric_name}</div>
1176
- <div class="score-value">{score_display}</div>
1177
- </div>
1178
- """
1179
 
1180
- html += "</div></div>"
1181
 
1182
- html += "</div>"
1183
  return html
1184
 
1185
 
1186
  def format_model_comparison(selected_models, all_results):
1187
- """Formats a comparison view showing multiple models with visual indicators."""
1188
  if not selected_models or not all_results:
1189
- return """
1190
- <div class="no-results">
1191
- <h3>Select models to compare</h3>
1192
- <p>Choose multiple models from the dropdown to see a side-by-side comparison</p>
1193
- </div>
1194
- """
1195
 
1196
- # Get all unique leaderboards across selected models
1197
  all_leaderboards = set()
1198
  model_data_dict = {}
1199
 
1200
  for model_name in selected_models:
1201
  if model_name in all_results:
1202
  model_data_dict[model_name] = all_results[model_name]
1203
- for leaderboard_name in all_results[model_name].keys():
1204
- all_leaderboards.add(leaderboard_name)
1205
 
1206
  if not model_data_dict:
1207
- return """
1208
- <div class="no-results">
1209
- <h3>No data found for selected models</h3>
1210
- <p>Try selecting different models</p>
1211
- </div>
1212
- """
1213
 
1214
  all_leaderboards = sorted(all_leaderboards)
1215
- model_colors = ['#88C0D0', '#A3BE8C', '#EBCB8B', '#D08770', '#B48EAD', '#8FBCBB', '#81A1C1', '#BF616A']
1216
-
1217
- # Calculate overall averages for summary
1218
- overall_avgs = {}
1219
- for model_name in selected_models:
1220
- if model_name in model_data_dict:
1221
- all_scores = []
1222
- for lb_data in model_data_dict[model_name].values():
1223
- all_scores.extend([v for v in lb_data.get("results", {}).values() if v is not None])
1224
- overall_avgs[model_name] = sum(all_scores) / len(all_scores) if all_scores else None
1225
 
1226
- html = """
1227
- <div class="comparison-container">
1228
- <div class="comparison-summary">
1229
- <h2>Model Comparison</h2>
1230
- <div class="summary-cards">
1231
- """
1232
 
1233
- # Summary cards for each model
1234
- for i, model_name in enumerate(selected_models):
1235
- color = model_colors[i % len(model_colors)]
1236
- avg = overall_avgs.get(model_name)
1237
- avg_str = f"{avg:.2f}" if avg is not None else "β€”"
1238
-
1239
- # Get model info
1240
- model_info = list(model_data_dict.get(model_name, {}).values())
1241
- developer = model_info[0].get("developer", "Unknown") if model_info else "Unknown"
1242
-
1243
- html += f"""
1244
- <div class="summary-card" style="border-left: 4px solid {color};">
1245
- <div class="summary-card-header">
1246
- <span class="model-dot" style="background: {color};"></span>
1247
- <span class="model-name">{model_name}</span>
1248
- </div>
1249
- <div class="summary-card-body">
1250
- <div class="summary-stat">
1251
- <span class="stat-label">Developer</span>
1252
- <span class="stat-value">{developer}</span>
1253
- </div>
1254
- <div class="summary-stat primary">
1255
- <span class="stat-label">Overall Avg</span>
1256
- <span class="stat-value large">{avg_str}</span>
1257
- </div>
1258
- </div>
1259
- </div>
1260
- """
1261
-
1262
- html += """
1263
- </div>
1264
- </div>
1265
- """
1266
-
1267
- # Leaderboard comparison cards
1268
  for leaderboard_name in all_leaderboards:
1269
- leaderboard_metrics = set()
1270
- for model_data in model_data_dict.values():
1271
- if leaderboard_name in model_data:
1272
- results = model_data[leaderboard_name].get("results", {})
1273
- leaderboard_metrics.update(results.keys())
1274
 
1275
- leaderboard_metrics = sorted(leaderboard_metrics)
1276
- if not leaderboard_metrics:
1277
  continue
1278
 
1279
- # Calculate averages for ranking
1280
- model_avgs = {}
1281
- for model_name in selected_models:
1282
- if model_name in model_data_dict and leaderboard_name in model_data_dict[model_name]:
1283
- results = model_data_dict[model_name][leaderboard_name].get("results", {})
1284
- scores = [v for v in results.values() if v is not None]
1285
- model_avgs[model_name] = sum(scores) / len(scores) if scores else None
1286
-
1287
- html += f"""
1288
- <div class="leaderboard-comparison-card">
1289
- <div class="lb-card-header">
1290
- <h3>{leaderboard_name}</h3>
1291
- </div>
1292
- <div class="lb-card-body">
1293
- """
1294
-
1295
- # Compact heat-map table
1296
- html += '<div class="heatmap-table-wrapper">'
1297
- html += '<table class="heatmap-table">'
1298
-
1299
- # Header with model names
1300
- html += '<thead><tr><th class="metric-header">Metric</th>'
1301
- for i, model_name in enumerate(selected_models):
1302
- # Truncate long names
1303
- short_name = model_name if len(model_name) <= 20 else model_name[:18] + "…"
1304
- html += f'<th class="model-header" title="{model_name}">{short_name}</th>'
1305
- html += '</tr></thead>'
1306
-
1307
- html += '<tbody>'
1308
-
1309
- # Average row first
1310
- html += '<tr class="avg-row"><td class="metric-name">Average</td>'
1311
- valid_avgs_list = [model_avgs.get(m) for m in selected_models if model_avgs.get(m) is not None]
1312
- max_avg_val = max(valid_avgs_list) if valid_avgs_list else None
1313
 
1314
  for model_name in selected_models:
1315
- avg = model_avgs.get(model_name)
1316
- if avg is not None:
1317
- cell_class = "best" if avg == max_avg_val and len(valid_avgs_list) > 1 else ""
1318
- html += f'<td class="score-cell {cell_class}">{avg:.2f}</td>'
1319
- else:
1320
- html += '<td class="score-cell na">β€”</td>'
1321
- html += '</tr>'
1322
 
1323
- # Individual metric rows
1324
- for metric_name in leaderboard_metrics:
1325
  html += f'<tr><td class="metric-name">{metric_name}</td>'
1326
 
1327
- # Get all scores for this metric
1328
- metric_scores = {}
1329
- for model_name in selected_models:
1330
- if model_name in model_data_dict and leaderboard_name in model_data_dict[model_name]:
1331
- results = model_data_dict[model_name][leaderboard_name].get("results", {})
1332
- metric_scores[model_name] = results.get(metric_name)
1333
 
1334
- valid_scores = [v for v in metric_scores.values() if v is not None]
1335
- if valid_scores:
1336
- max_score = max(valid_scores)
1337
- min_score = min(valid_scores)
1338
- score_range = max_score - min_score if max_score > min_score else 1
1339
- else:
1340
- max_score = min_score = score_range = None
1341
 
1342
  for model_name in selected_models:
1343
- score = metric_scores.get(model_name)
1344
- if score is not None and score_range is not None:
1345
- # Determine color class based on relative position
1346
- if len(valid_scores) > 1:
1347
- pct = (score - min_score) / score_range if score_range > 0 else 1
1348
- if score == max_score:
1349
- cell_class = "best"
1350
- elif pct >= 0.75:
1351
- cell_class = "good"
1352
- elif pct >= 0.5:
1353
- cell_class = "mid"
1354
- elif pct >= 0.25:
1355
- cell_class = "low"
1356
  else:
1357
- cell_class = "worst"
1358
  else:
1359
- cell_class = ""
1360
- html += f'<td class="score-cell {cell_class}">{score:.2f}</td>'
1361
  else:
1362
  html += '<td class="score-cell na">β€”</td>'
1363
-
1364
  html += '</tr>'
1365
 
1366
  html += '</tbody></table></div>'
1367
-
1368
- html += """
1369
- </div>
1370
- </div>
1371
- """
1372
 
1373
- html += "</div>"
1374
  return html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import plotly.graph_objects as go
3
+ from data_loader import get_eval_metadata
4
 
5
 
6
  def get_theme():
 
7
  return gr.themes.Base(
8
  primary_hue="blue",
9
  neutral_hue="slate",
 
 
10
  ).set(
11
+ body_background_fill="#f5f5f5",
12
+ body_text_color="#0a0a0a",
13
+ body_text_color_subdued="#525252",
14
+ block_background_fill="#ffffff",
15
+ block_border_color="#e5e5e5",
16
+ block_label_text_color="#525252",
17
+ block_title_text_color="#0a0a0a",
18
+ input_background_fill="#ffffff",
19
+ input_border_color="#e5e5e5",
20
+ button_primary_background_fill="#3b82f6",
21
+ button_primary_text_color="#ffffff",
22
+ button_secondary_background_fill="#ffffff",
23
+ button_secondary_text_color="#0a0a0a",
24
+ button_secondary_border_color="#e5e5e5",
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
 
27
 
28
  def get_custom_css():
 
29
  return """
 
 
 
 
 
 
 
 
30
  :root {
31
+ --brand-black: #0a0a0a;
32
+ --brand-dark: #1a1a1a;
33
+ --brand-gray: #2a2a2a;
34
+ --brand-light: #f5f5f5;
35
+ --brand-accent: #3b82f6;
36
  }
37
 
38
+ body, .gradio-container {
39
+ background: var(--brand-light) !important;
40
+ color: var(--brand-black) !important;
41
  }
42
 
 
43
  .gradio-container {
44
+ max-width: 100%;
45
+ padding: 1.25rem 2.5rem 2rem;
46
+ }
47
+
48
+ .gradio-container *:focus-visible {
49
+ outline: none !important;
50
+ box-shadow: inset 0 0 0 1.5px #3b82f6 !important;
51
+ }
52
+
53
+ .gradio-container .block,
54
+ .gradio-container .wrap,
55
+ .gradio-container .form,
56
+ .gradio-container .container {
57
+ box-shadow: none !important;
58
  }
59
 
 
60
  .app-header {
61
  display: flex;
62
  align-items: center;
63
  gap: 1rem;
64
  margin-bottom: 1.5rem;
65
+ padding: 1rem 1.25rem;
66
+ background: #ffffff;
67
+ border: 1px solid #e5e5e5;
68
  border-radius: 12px;
69
  }
70
 
71
+ .logo-mark {
72
  width: 48px;
73
  height: 48px;
 
74
  border-radius: 12px;
75
  display: flex;
76
  align-items: center;
77
  justify-content: center;
78
  font-weight: 800;
79
  font-size: 1.1rem;
80
+ color: #ffffff;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  }
82
 
83
+ .brand h1 { margin: 0; font-size: 1.5rem; font-weight: 700; color: #0a0a0a; }
84
+ .brand .tagline { color: #525252; font-size: 0.9rem; }
85
+ .header-right { margin-left: auto; }
 
 
86
 
87
+ .version-badge {
88
+ background: rgba(59, 130, 246, 0.1);
89
+ border: 1px solid #3b82f6;
90
+ border-radius: 8px;
91
+ padding: 0.35rem 0.6rem;
92
+ font-size: 0.78rem;
93
+ color: #3b82f6;
 
 
 
 
 
 
 
 
 
94
  }
95
 
 
96
  .info-banner {
97
+ background: #ffffff;
98
+ border: 1px solid #e5e5e5;
99
+ border-left: 3px solid #3b82f6;
100
+ border-radius: 10px;
101
+ padding: 1rem 1.25rem;
102
+ margin-bottom: 1rem;
103
  }
104
 
105
+ .info-banner h3 { margin: 0; font-weight: 600; color: #0a0a0a; }
 
 
 
 
 
106
 
107
+ .leaderboard-header {
108
  display: flex;
109
+ justify-content: space-between;
110
+ align-items: center;
111
+ gap: 1rem;
112
  flex-wrap: wrap;
113
+ margin-bottom: 0.4rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
 
116
+ .lb-title {
117
+ font-size: 1.2rem;
118
+ font-weight: 700;
119
+ color: #0a0a0a;
120
+ margin: 0;
121
+ line-height: 1.35;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  }
123
 
124
+ .lb-by {
125
+ font-size: 0.9rem;
126
+ color: #525252;
127
+ margin: 0.1rem 0 0 0;
128
+ line-height: 1.35;
 
129
  }
130
 
131
+ .lb-meta {
 
132
  display: flex;
133
  flex-direction: column;
134
+ gap: 0.1rem;
135
  }
136
 
137
+ .eval-tags { display: flex; flex-wrap: wrap; gap: 0.4rem; }
138
+ .eval-tags { margin-top: 0.35rem; }
 
 
 
 
139
 
140
+ .eval-tag {
141
+ border-radius: 10px;
142
+ padding: 0.3rem 0.65rem;
143
+ font-size: 0.82rem;
144
  font-weight: 600;
145
+ color: #0a0a0a;
146
+ border: 1px solid #e5e5e5;
147
+ background: #f8fafc;
148
  }
149
 
150
+ .eval-tag:nth-child(5n + 1) { border-color: #3b82f6; background: rgba(59, 130, 246, 0.12); color: #0a1d4a; }
151
+ .eval-tag:nth-child(5n + 2) { border-color: #10b981; background: rgba(16, 185, 129, 0.12); color: #0b3b2b; }
152
+ .eval-tag:nth-child(5n + 3) { border-color: #f97316; background: rgba(249, 115, 22, 0.12); color: #4b1f07; }
153
+ .eval-tag:nth-child(5n + 4) { border-color: #8b5cf6; background: rgba(139, 92, 246, 0.12); color: #2f0f5a; }
154
+ .eval-tag:nth-child(5n) { border-color: #06b6d4; background: rgba(6, 182, 212, 0.12); color: #053f46; }
 
155
 
156
+ .source-link {
157
+ font-size: 0.75rem;
158
+ color: #3b82f6;
159
+ text-decoration: none;
160
+ padding: 0.375rem 0.75rem;
161
+ border: 1px solid #3b82f6;
162
+ border-radius: 6px;
163
  }
164
 
165
+ .source-link:hover { background: rgba(59, 130, 246, 0.1); }
 
 
 
 
 
166
 
167
+ .pagination-bar {
168
+ margin-top: 0.75rem;
169
+ padding: 0.85rem 0 0.25rem;
 
170
  display: flex;
171
+ justify-content: center;
172
  align-items: center;
173
+ gap: 0.85rem;
174
  }
175
 
176
+ .page-info { font-size: 1rem; min-width: 80px; text-align: center; color: #0a0a0a; }
 
 
 
 
 
177
 
178
+ .metrics-section {
179
+ margin-top: 1.25rem;
180
+ padding-top: 1.25rem;
181
+ border-top: 1px solid #e5e5e5;
 
 
 
182
  }
183
 
184
+ .metrics-section h3 {
185
+ font-size: 0.9rem;
 
 
186
  font-weight: 700;
187
+ color: #525252;
188
+ margin: 0 0 0.9rem 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  text-transform: uppercase;
190
  letter-spacing: 0.05em;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  }
192
 
193
+ .metrics-grid {
194
+ display: grid;
195
+ grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
196
+ gap: 0.75rem;
 
197
  }
198
 
199
+ .metrics-grid .metric-card {
200
+ align-self: start;
 
 
201
  }
202
 
203
+ .metric-card {
204
+ background: #ffffff;
205
+ border: 1px solid #e5e5e5;
206
+ border-radius: 10px;
207
  overflow: hidden;
208
+ position: relative;
 
 
 
 
 
 
 
209
  }
210
 
211
+ .metric-card-header {
212
  display: flex;
213
  justify-content: space-between;
214
  align-items: center;
215
+ padding: 0.85rem 1rem;
216
+ cursor: pointer;
217
  }
218
 
219
+ .metric-card-header:hover {
220
+ background: #f9f9f9;
 
 
 
221
  }
222
 
223
+ .metric-card-name { font-weight: 600; color: #0a0a0a; }
224
+ .metric-card-direction { font-size: 0.82rem; color: #525252; }
225
+ .metric-card-direction .arrow { color: #22c55e; font-weight: 700; }
 
226
 
227
+ .metric-card-body {
228
+ display: none;
229
+ padding: 0.85rem 1rem;
230
+ border-top: 1px solid #e5e5e5;
231
+ color: #0a0a0a;
232
  }
233
 
234
+ .metric-card input.metric-toggle {
235
+ display: none;
 
 
 
236
  }
237
 
238
+ .metric-card input.metric-toggle:checked ~ .metric-card-body {
239
+ display: block;
 
240
  }
241
 
242
+ .metric-card input.metric-toggle:checked ~ .metric-card-header {
243
+ background: #f9f9f9;
244
+ border-bottom: 1px solid #e5e5e5;
 
 
245
  }
246
 
247
+ .metric-card input.metric-toggle:checked ~ .metric-card-header .metric-card-name,
248
+ .metric-card input.metric-toggle:checked ~ .metric-card-header .metric-card-direction {
249
+ color: #0a0a0a;
 
 
 
 
 
 
 
 
250
  }
251
 
252
+ /* Ensure multiple cards can be open at once and are closable */
253
+ .metric-card input.metric-toggle:not(:checked) ~ .metric-card-body {
254
+ display: none;
255
  }
256
 
257
+ .metric-type-badge {
258
+ font-size: 0.68rem;
259
+ text-transform: uppercase;
260
+ padding: 0.2rem 0.45rem;
261
+ background: rgba(59, 130, 246, 0.1);
262
+ border: 1px solid #3b82f6;
263
+ border-radius: 6px;
264
+ color: #3b82f6;
265
+ }
266
+
267
+ .heatmap-table { width: 100%; border-collapse: collapse; font-size: 0.85rem; }
268
+ .heatmap-table th { padding: 0.55rem 0.65rem; font-weight: 700; font-size: 0.72rem; text-transform: uppercase; color: #525252; background: #f5f5f5; }
269
+ .heatmap-table td { padding: 0.45rem 0.65rem; text-align: center; border-bottom: 1px solid #e5e5e5; }
270
+ .heatmap-table td.metric-name { text-align: left; font-weight: 600; color: #0a0a0a; }
271
+ .heatmap-table td.score-cell { font-weight: 600; }
272
+ .heatmap-table td.score-cell.best { background: rgba(34, 197, 94, 0.15); color: #16a34a; }
273
+ .heatmap-table td.score-cell.good { background: rgba(34, 197, 94, 0.08); color: #16a34a; }
274
+ .heatmap-table td.score-cell.mid { background: rgba(234, 179, 8, 0.15); color: #ca8a04; }
275
+ .heatmap-table td.score-cell.low { background: rgba(239, 68, 68, 0.12); color: #dc2626; }
276
+ .heatmap-table td.score-cell.worst { background: rgba(239, 68, 68, 0.18); color: #b91c1c; }
277
+ .heatmap-table td.score-cell.na { color: #525252; font-style: italic; }
278
+
279
+ /* Model chips */
280
+ .selected-models-group label {
281
+ display: inline-flex !important;
282
+ background: #ffffff;
283
+ border: 1px solid #e5e5e5;
284
+ border-radius: 16px;
285
+ padding: 0.35rem 0.85rem;
286
+ font-size: 0.88rem;
287
+ color: #0a0a0a;
288
+ cursor: pointer;
289
+ margin: 0.18rem 0.32rem 0.18rem 0 !important;
290
  }
291
 
292
+ .selected-models-group input[type="checkbox"] { display: none; }
 
 
 
 
293
 
294
+ .no-results { text-align: center; padding: 2.5rem 1rem; color: #525252; }
 
 
 
 
 
295
 
296
+ .gradio-container footer { display: none; }
 
 
297
 
298
+ .block, .form, .wrap, .container { background: #ffffff !important; }
 
 
 
 
 
299
 
300
+ body, .gradio-container, p, span, div, h1, h2, h3, h4, h5, h6, label, td, th {
301
+ color: #0a0a0a !important;
302
  }
303
 
304
+ .label-wrap span, .prose, .markdown, .prose p, .prose li, .markdown p, .markdown li {
305
+ color: #525252 !important;
 
 
 
306
  }
307
 
308
+ input, textarea, select {
309
+ background: #ffffff !important;
310
+ color: #0a0a0a !important;
311
+ border: 1px solid #e5e5e5 !important;
312
+ border-radius: 8px !important;
313
  }
314
 
315
+ input::placeholder, textarea::placeholder {
316
+ color: #a1a1a1 !important;
 
 
 
 
 
 
 
 
317
  }
318
 
319
+ input:focus, textarea:focus, select:focus {
320
+ border-color: #3b82f6 !important;
321
+ outline: none !important;
322
+ box-shadow: inset 0 0 0 1.5px #3b82f6 !important;
 
 
 
323
  }
324
 
325
+ select, .wrap select, .wrap input, input[type="text"], textarea {
326
+ min-height: 44px !important;
327
+ padding: 0.55rem 0.75rem !important;
328
+ font-size: 0.96rem !important;
329
  }
330
 
331
+ button {
332
+ border-radius: 8px !important;
333
+ font-weight: 500 !important;
334
+ transition: all 0.15s ease !important;
 
 
 
335
  }
336
 
337
+ button.primary, button[variant="primary"] {
338
+ background: #3b82f6 !important;
339
+ color: #ffffff !important;
340
+ border: none !important;
341
  }
342
 
343
+ button.primary:hover, button[variant="primary"]:hover {
344
+ background: #2563eb !important;
345
  }
346
 
347
+ button.secondary, button[variant="secondary"], button:not(.primary):not([variant="primary"]) {
348
+ background: #ffffff !important;
349
+ color: #0a0a0a !important;
350
+ border: 1px solid #e5e5e5 !important;
 
 
 
 
 
 
 
 
 
351
  }
352
 
353
+ button.secondary:hover, button[variant="secondary"]:hover {
354
+ border-color: #3b82f6 !important;
355
+ background: #f5f5f5 !important;
 
 
 
356
  }
357
 
358
+ .tab-nav, .tabs {
359
+ border-bottom: 1px solid #e5e5e5 !important;
360
  }
361
 
362
+ .tab-nav button, .tabs button {
363
+ color: #525252 !important;
364
+ background: transparent !important;
365
+ border: none !important;
366
+ border-bottom: 2px solid transparent !important;
367
  }
368
 
369
+ .tab-nav button.selected, .tabs button.selected {
370
+ color: #3b82f6 !important;
371
+ border-bottom-color: #3b82f6 !important;
 
372
  }
373
 
374
+ .wrap, .secondary-wrap, .primary-wrap {
375
+ background: transparent !important;
376
+ border: none !important;
377
+ border-radius: 0 !important;
378
+ box-shadow: none !important;
379
+ padding: 0 !important;
380
  }
381
 
382
+ ul[role="listbox"], .dropdown, .options {
383
+ background: #ffffff !important;
384
+ border: 1px solid #e5e5e5 !important;
385
+ border-radius: 8px !important;
386
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1) !important;
387
  }
388
 
389
+ ul[role="listbox"] li, .dropdown li, .options li {
390
+ color: #0a0a0a !important;
 
 
 
 
 
 
 
 
 
391
  }
392
 
393
+ ul[role="listbox"] li:hover, .dropdown li:hover, .options li:hover {
394
+ background: #f5f5f5 !important;
395
  }
396
 
397
+ ul[role="listbox"] li.active, .dropdown li.active, .options li.active {
398
+ background: #f5f5f5 !important;
399
+ color: #0a0a0a !important;
 
 
400
  }
401
 
402
+ ul[role="listbox"] li.selected, .dropdown li.selected {
403
+ background: rgba(59, 130, 246, 0.1) !important;
404
+ color: #3b82f6 !important;
405
  }
406
 
407
+ .accordion {
408
+ border: 1px solid #e5e5e5 !important;
409
+ border-radius: 8px !important;
410
+ background: #ffffff !important;
411
  }
412
 
413
+ .accordion > button {
414
+ color: #0a0a0a !important;
 
 
 
415
  }
416
 
417
+ .selected-models-group label, .checkbox-group label {
418
+ display: inline-flex !important;
419
+ background: #ffffff;
420
+ border: 1px solid #e5e5e5;
421
+ border-radius: 20px !important;
422
+ padding: 0.4rem 0.9rem !important;
423
+ font-size: 0.88rem !important;
424
+ color: #0a0a0a !important;
425
+ cursor: pointer !important;
426
+ margin: 0.2rem !important;
427
+ transition: all 0.15s ease !important;
428
  }
429
 
430
+ .selected-models-group label:hover, .checkbox-group label:hover {
431
+ border-color: #3b82f6 !important;
432
+ background: #f5f5f5 !important;
433
  }
434
 
435
+ .selected-models-group input[type="checkbox"], .checkbox-group input[type="checkbox"] {
436
+ display: none !important;
 
437
  }
438
 
439
+ table {
440
+ width: 100% !important;
441
+ border-collapse: collapse !important;
442
+ background: #ffffff !important;
443
  }
444
 
445
+ table th {
446
+ background: #f5f5f5 !important;
447
+ color: #525252 !important;
448
+ font-weight: 600 !important;
449
+ text-transform: uppercase !important;
450
+ font-size: 0.75rem !important;
451
+ padding: 0.75rem !important;
452
+ border-bottom: 1px solid #e5e5e5 !important;
453
+ text-align: left !important;
454
  }
455
 
456
+ table td {
457
+ padding: 0.65rem 0.75rem !important;
458
+ border-bottom: 1px solid #e5e5e5 !important;
459
+ color: #0a0a0a !important;
460
  }
461
 
462
+ table tr:hover td {
463
+ background: #f9f9f9 !important;
464
  }
465
 
466
+ .dataframe {
467
+ background: #ffffff !important;
468
+ border: 1px solid #e5e5e5 !important;
469
+ box-shadow: none !important;
470
+ border-radius: px !important;
471
+ overflow: hidden !important;
472
  }
473
 
474
+ .dataframe table {
475
+ width: 100% !important;
476
+ border-collapse: collapse !important;
 
477
  font-size: 0.95rem !important;
478
+ table-layout: auto !important;
479
+ background: #ffffff !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  }
481
 
482
+ .dataframe thead,
483
+ .dataframe thead tr {
484
+ background: #ffffff !important;
485
+ position: sticky !important;
486
+ top: 0 !important;
487
+ z-index: 10 !important;
488
  }
489
 
490
+ .dataframe thead th {
491
+ padding: 0.875rem 1rem !important;
492
+ font-weight: 700 !important;
493
+ font-size: 0.75rem !important;
494
+ text-transform: uppercase !important;
495
+ letter-spacing: 0.05em !important;
496
+ color: #0a0a0a !important;
497
+ border-bottom: 2px solid #e5e5e5 !important;
498
+ border-top: none !important;
499
+ text-align: left !important;
500
+ background: #ffffff !important;
501
+ white-space: nowrap !important;
502
+ border-radius: 0 !important;
503
  }
504
 
505
+ .dataframe thead th span,
506
+ .dataframe thead th div,
507
+ .dataframe thead th button {
508
+ background: transparent !important;
509
+ border: none !important;
510
+ border-radius: 0 !important;
511
+ box-shadow: none !important;
512
+ margin: 0 !important;
513
  outline: none !important;
514
  }
515
 
516
+ .dataframe thead th span[role="button"],
517
+ .dataframe thead th span[class*="svelte"] {
518
+ background: transparent !important;
519
+ border: none !important;
520
+ box-shadow: none !important;
521
+ outline: none !important;
522
+ padding: 0 !important;
523
+ width: auto !important;
524
  }
525
 
526
+ /* Also target the SVG icon if it exists to ensure it doesn't have a background */
527
+ .dataframe thead th svg {
528
+ background: transparent !important;
529
+ box-shadow: none !important;
 
 
530
  }
531
 
532
+ .dataframe thead th span:hover,
533
+ .dataframe thead th span[role="button"]:hover,
534
+ .dataframe thead th span[class*="svelte"]:hover,
535
+ .dataframe thead th button:hover {
536
  background: transparent !important;
537
+ border: none !important;
538
+ box-shadow: none !important;
539
+ color: #3b82f6 !important;
540
  }
541
 
542
+ .token {
543
+ background-color: rgba(59, 130, 246, 0.12) !important;
544
+ border: 1px solid rgba(59, 130, 246, 0.3) !important;
545
+ color: #1e3a8a !important;
546
+ border-radius: 6px !important;
547
+ padding: 2px 8px !important;
548
+ gap: 4px !important;
549
  }
550
 
551
+ .token-remove {
552
+ background-color: rgba(255, 255, 255, 0.4) !important;
553
+ border: 1px solid rgba(30, 58, 138, 0.5) !important; /* Dark blue outline */
554
+ color: #1e3a8a !important;
555
  border-radius: 4px !important;
556
+ margin-left: 6px !important;
557
+ padding: 1px !important;
558
+ opacity: 0.9 !important;
559
+ min-width: 18px !important;
560
+ min-height: 18px !important;
561
+ display: flex !important;
562
+ align-items: center !important;
563
+ justify-content: center !important;
564
  }
565
 
566
+ .token-remove svg {
567
+ width: 12px !important;
568
+ height: 12px !important;
 
 
569
  }
570
 
571
+ .token-remove:hover {
572
+ background-color: #1e3a8a !important;
573
+ color: #ffffff !important;
574
+ border-color: #1e3a8a !important;
 
 
 
575
  }
576
 
577
+ .selector-item {
578
+ border-radius: 6px !important;
 
 
579
  }
580
 
581
+ .gradio-container .token {
582
+ box-shadow: none !important;
583
+ font-weight: 500 !important;
 
 
584
  }
585
 
586
+ .gradio-container .token span {
587
+ color: #1e3a8a !important;
 
 
 
 
 
588
  }
589
 
590
+ .dataframe tbody,
591
+ .dataframe tbody tr {
592
+ background: #ffffff !important;
593
  }
594
 
595
+ .dataframe tbody tr {
596
+ border-bottom: 1px solid #e5e5e5 !important;
 
 
597
  }
598
 
599
+ .dataframe tbody tr:hover {
600
+ background: #f9f9f9 !important;
 
601
  }
602
 
603
+ .dataframe tbody td {
604
+ padding: 0.75rem 1rem !important;
605
+ color: #0a0a0a !important;
606
+ background: #ffffff !important;
607
+ border: none !important;
608
+ border-bottom: 1px solid #e5e5e5 !important;
609
  }
610
 
611
+ .dataframe tbody td:first-child {
612
+ font-weight: 700 !important;
613
+ color: #0a0a0a !important;
614
+ white-space: normal !important;
615
+ word-break: break-word !important;
616
+ max-width: 400px;
617
+ min-width: 250px;
618
  }
619
 
620
+ .dataframe tbody td:not(:first-child) {
621
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
622
+ text-align: left !important;
623
+ white-space: nowrap !important;
 
 
 
 
 
 
624
  }
625
 
626
+ .dataframe td:nth-child(2),
627
+ .dataframe th:nth-child(2) {
628
+ max-width: 220px;
629
+ min-width: 140px;
630
  }
631
 
632
+ .column-selector-dropdown {
633
+ min-width: 300px;
634
  }
635
 
636
+ .column-selector-dropdown .wrap {
637
+ flex-wrap: nowrap !important;
638
+ overflow-x: auto !important;
639
+ gap: 0.25rem !important;
640
+ padding: 0.5rem !important;
641
  }
642
 
643
+ .column-selector-dropdown .wrap input {
644
+ width: 100% !important;
645
+ padding-left: 0.5rem !important;
646
+ border: none !important;
647
+ box-shadow: none !important;
648
  }
649
 
650
+ .heatmap-table {
651
+ border: 1px solid #e5e5e5 !important;
652
+ border-radius: 8px !important;
653
+ overflow: hidden !important;
 
 
 
 
 
654
  }
655
 
656
+ .heatmap-table th {
657
+ background: #f5f5f5 !important;
658
+ color: #525252 !important;
659
+ padding: 0.6rem 0.75rem !important;
660
+ font-size: 0.72rem !important;
661
+ border-bottom: 2px solid #e5e5e5 !important;
662
  }
663
 
664
+ .heatmap-table td {
665
+ padding: 0.5rem 0.75rem !important;
666
+ border-bottom: 1px solid #e5e5e5 !important;
667
  }
668
 
669
+ .heatmap-table td.metric-name {
670
+ background: #f5f5f5 !important;
671
+ font-weight: 600 !important;
 
 
 
672
  }
673
 
674
+ .heatmap-table td.score-cell.best { background: rgba(34, 197, 94, 0.2) !important; color: #15803d !important; }
675
+ .heatmap-table td.score-cell.good { background: rgba(34, 197, 94, 0.1) !important; color: #16a34a !important; }
676
+ .heatmap-table td.score-cell.mid { background: rgba(234, 179, 8, 0.15) !important; color: #a16207 !important; }
677
+ .heatmap-table td.score-cell.low { background: rgba(239, 68, 68, 0.12) !important; color: #dc2626 !important; }
678
+ .heatmap-table td.score-cell.worst { background: rgba(239, 68, 68, 0.2) !important; color: #b91c1c !important; }
679
+ .heatmap-table td.score-cell.na { color: #a1a1a1 !important; font-style: italic !important; }
680
+
681
+ .gradio-container footer { display: none !important; }
682
+
683
+ ::-webkit-scrollbar { width: 8px; height: 8px; }
684
+ ::-webkit-scrollbar-track { background: #f5f5f5; }
685
+ ::-webkit-scrollbar-thumb { background: #d4d4d4; border-radius: 4px; }
686
+ ::-webkit-scrollbar-thumb:hover { background: #a1a1a1; }
687
  """
688
 
689
 
690
  def format_leaderboard_header(selected_leaderboard, metadata):
 
691
  if not selected_leaderboard:
692
+ return '<div style="text-align: center; padding: 2rem; color: #525252;">Select a leaderboard to explore</div>'
 
 
 
 
693
 
694
  if not metadata or not metadata.get("evals"):
695
+ return f'<div class="info-banner"><h3>{selected_leaderboard}</h3></div>'
 
 
 
 
696
 
697
  source_info = metadata.get("source_info", {})
698
  org = source_info.get("organization", "Unknown")
699
  url = source_info.get("url", "#")
700
+ eval_names = sorted(list(metadata["evals"].keys()))
701
 
702
  eval_tags = "".join([f'<span class="eval-tag">{name}</span>' for name in eval_names])
703
 
704
+ return f'''
705
  <div class="info-banner">
706
+ <div class="leaderboard-header">
707
+ <div class="lb-meta">
708
+ <div class="lb-title">{selected_leaderboard}</div>
709
+ <div class="lb-by">By {org}</div>
 
710
  </div>
711
+ <a href="{url}" target="_blank" class="source-link">Source β†’</a>
 
 
 
 
 
 
 
 
712
  </div>
713
+ <div class="eval-tags">{eval_tags}</div>
714
  </div>
715
+ '''
716
 
717
 
718
  def format_metric_details(selected_leaderboard, metadata):
 
719
  if not selected_leaderboard or not metadata or not metadata.get("evals"):
720
  return ""
721
+
722
  evals = metadata.get("evals", {})
723
+
724
+ cards_html = ""
725
+ for i, (eval_name, info) in enumerate(evals.items()):
726
+ score_type = info.get('score_type', '').upper() or "β€”"
 
 
 
 
 
727
  direction = "Lower is better" if info.get('lower_is_better') else "Higher is better"
728
  arrow = "↓" if info.get('lower_is_better') else "↑"
729
+
730
  details = ""
731
  if info.get('score_type') == "continuous" and info.get('min_score') is not None:
732
  details = f"Range: [{info['min_score']} – {info['max_score']}]"
733
  elif info.get('score_type') == "levels" and info.get('level_names'):
734
  details = f"Levels: {', '.join(str(l) for l in info['level_names'])}"
735
+
736
+ card_id = f"mc{i}"
737
+ cards_html += f'''
738
+ <div class="metric-card" id="{card_id}">
739
+ <input type="checkbox" id="toggle-{card_id}" class="metric-toggle" />
740
+ <label class="metric-card-header" for="toggle-{card_id}">
741
  <span class="metric-card-name">{eval_name}</span>
742
  <span class="metric-card-direction"><span class="arrow">{arrow}</span> {direction}</span>
743
+ </label>
744
  <div class="metric-card-body">
745
  <div>{info.get('description', 'No description')}</div>
746
  <div style="display: flex; justify-content: space-between; align-items: center; margin-top: 0.5rem;">
747
+ <span style="font-size: 0.75rem; color: #525252;">{details}</span>
748
  <span class="metric-type-badge">{score_type}</span>
749
  </div>
750
  </div>
751
+ </div>
752
+ '''
753
+
754
+ return f'''
755
+ <div class="metrics-section">
756
+ <h3>Metric Reference</h3>
757
+ <div class="metrics-grid">{cards_html}</div>
758
+ </div>
759
+ '''
760
 
761
 
762
  def format_model_card(model_name, model_data):
 
763
  if not model_data:
764
+ return '<div class="no-results"><h3>No results found</h3><p>Try a different model name</p></div>'
 
 
 
 
 
765
 
766
  first = list(model_data.values())[0]
767
  developer = first.get("developer", "Unknown")
768
  params = first.get("params")
769
  arch = first.get("architecture", "Unknown")
 
770
  params_str = f"{params}B" if params else "β€”"
771
 
772
+ html = f'''
773
+ <div style="padding: 1rem; background: #ffffff; border-radius: 10px; border: 1px solid #e5e5e5;">
774
+ <h2 style="margin: 0 0 0.5rem 0; color: #0a0a0a;">{model_name}</h2>
775
+ <div style="color: #525252; margin-bottom: 1rem;">
776
+ <span>Developer: {developer}</span> Β·
777
+ <span>Params: {params_str}</span> Β·
778
+ <span>Arch: {arch}</span>
 
 
779
  </div>
780
+ '''
781
 
782
  for leaderboard_name, data in model_data.items():
783
  results = data.get("results", {})
 
787
  scores = [v for v in results.values() if v is not None]
788
  avg = sum(scores) / len(scores) if scores else None
789
  avg_str = f"{avg:.2f}" if avg else "β€”"
 
 
 
 
 
 
 
 
 
790
 
791
+ html += f'<div style="margin-bottom: 1rem;"><h4 style="color: #0a0a0a;">{leaderboard_name} <span style="color: #525252;">(avg: {avg_str})</span></h4>'
792
+ html += '<div style="display: flex; flex-wrap: wrap; gap: 0.5rem;">'
793
 
794
+ for metric_name, score in sorted(results.items(), key=lambda x: x[1] if x[1] else 0, reverse=True):
795
  score_display = f"{score:.2f}" if score is not None else "β€”"
796
+ html += f'<div style="padding: 0.4rem 0.8rem; border-radius: 6px; background: #f5f5f5; border: 1px solid #e5e5e5;"><span style="color: #525252;">{metric_name}:</span> <strong style="color: #0a0a0a;">{score_display}</strong></div>'
 
 
 
 
 
 
 
797
 
798
+ html += '</div></div>'
799
 
800
+ html += '</div>'
801
  return html
802
 
803
 
804
  def format_model_comparison(selected_models, all_results):
 
805
  if not selected_models or not all_results:
806
+ return '<div class="no-results"><h3>Select models to compare</h3><p>Choose models from the dropdown</p></div>'
 
 
 
 
 
807
 
 
808
  all_leaderboards = set()
809
  model_data_dict = {}
810
 
811
  for model_name in selected_models:
812
  if model_name in all_results:
813
  model_data_dict[model_name] = all_results[model_name]
814
+ for lb in all_results[model_name].keys():
815
+ all_leaderboards.add(lb)
816
 
817
  if not model_data_dict:
818
+ return '<div class="no-results"><h3>No data found</h3></div>'
 
 
 
 
 
819
 
820
  all_leaderboards = sorted(all_leaderboards)
 
 
 
 
 
 
 
 
 
 
821
 
822
+ html = '<div style="padding: 1rem; background: #ffffff; border-radius: 10px; border: 1px solid #e5e5e5;">'
 
 
 
 
 
823
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
824
  for leaderboard_name in all_leaderboards:
825
+ metrics = set()
826
+ for md in model_data_dict.values():
827
+ if leaderboard_name in md:
828
+ metrics.update(md[leaderboard_name].get("results", {}).keys())
 
829
 
830
+ metrics = sorted(metrics)
831
+ if not metrics:
832
  continue
833
 
834
+ html += f'<h3 style="margin: 1rem 0 0.5rem; color: #0a0a0a;">{leaderboard_name}</h3>'
835
+ html += '<div style="overflow-x: auto;"><table class="heatmap-table"><thead><tr><th>Metric</th>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
836
 
837
  for model_name in selected_models:
838
+ short = model_name[:20] + "…" if len(model_name) > 20 else model_name
839
+ html += f'<th title="{model_name}">{short}</th>'
840
+ html += '</tr></thead><tbody>'
 
 
 
 
841
 
842
+ for metric_name in metrics:
 
843
  html += f'<tr><td class="metric-name">{metric_name}</td>'
844
 
845
+ scores = {}
846
+ for m in selected_models:
847
+ if m in model_data_dict and leaderboard_name in model_data_dict[m]:
848
+ scores[m] = model_data_dict[m][leaderboard_name].get("results", {}).get(metric_name)
 
 
849
 
850
+ valid = [v for v in scores.values() if v is not None]
851
+ max_s = max(valid) if valid else None
852
+ min_s = min(valid) if valid else None
 
 
 
 
853
 
854
  for model_name in selected_models:
855
+ score = scores.get(model_name)
856
+ if score is not None:
857
+ if len(valid) > 1 and max_s and min_s:
858
+ if score == max_s:
859
+ cls = "best"
860
+ elif max_s > min_s:
861
+ pct = (score - min_s) / (max_s - min_s)
862
+ cls = "good" if pct >= 0.75 else "mid" if pct >= 0.5 else "low" if pct >= 0.25 else "worst"
 
 
 
 
 
863
  else:
864
+ cls = ""
865
  else:
866
+ cls = ""
867
+ html += f'<td class="score-cell {cls}">{score:.2f}</td>'
868
  else:
869
  html += '<td class="score-cell na">β€”</td>'
 
870
  html += '</tr>'
871
 
872
  html += '</tbody></table></div>'
 
 
 
 
 
873
 
874
+ html += '</div>'
875
  return html
876
+
877
+
878
+ def create_radar_plot(selected_models, all_results):
879
+ if not selected_models or not all_results:
880
+ return None
881
+
882
+ metric_data = {}
883
+ leaderboards_involved = set()
884
+
885
+ for model in selected_models:
886
+ if model not in all_results:
887
+ continue
888
+
889
+ model_data = all_results[model]
890
+ for lb_name, lb_data in model_data.items():
891
+ leaderboards_involved.add(lb_name)
892
+ results = lb_data.get("results", {})
893
+ for metric, score in results.items():
894
+ if score is None: continue
895
+ key = f"{lb_name}: {metric}"
896
+ if key not in metric_data:
897
+ metric_data[key] = {}
898
+ metric_data[key][model] = score
899
+
900
+ if not metric_data:
901
+ return None
902
+
903
+ meta_cache = {}
904
+ for lb in leaderboards_involved:
905
+ meta_cache[lb] = get_eval_metadata(lb)
906
+
907
+ fig = go.Figure()
908
+
909
+ categories = sorted(metric_data.keys())
910
+
911
+ for model in selected_models:
912
+ r_values = []
913
+ theta_values = []
914
+ hover_texts = []
915
+
916
+ for cat in categories:
917
+ lb_name, metric_name = cat.split(": ", 1)
918
+
919
+ val = metric_data[cat].get(model)
920
+ if val is None:
921
+ r_values.append(None)
922
+ theta_values.append(cat)
923
+ hover_texts.append(f"{cat}<br>N/A")
924
+ else:
925
+ meta = meta_cache.get(lb_name, {}).get("evals", {}).get(metric_name, {})
926
+ min_s = meta.get("min_score")
927
+ max_s = meta.get("max_score")
928
+
929
+ observed_vals = []
930
+ for m in selected_models:
931
+ v = metric_data[cat].get(m)
932
+ if v is not None:
933
+ observed_vals.append(v)
934
+
935
+ observed_max = max(observed_vals) if observed_vals else 1.0
936
+
937
+ if min_s is None:
938
+ min_s = 0
939
+ if max_s is None:
940
+ if observed_max > 1:
941
+ max_s = 100
942
+ else:
943
+ max_s = 1
944
+ max_s = max(max_s, observed_max)
945
+
946
+ if max_s == min_s:
947
+ norm_val = 1.0
948
+ else:
949
+ norm_val = (val - min_s) / (max_s - min_s)
950
+
951
+ norm_val = max(0.0, min(1.0, norm_val))
952
+
953
+ r_values.append(norm_val)
954
+ theta_values.append(cat)
955
+ hover_texts.append(f"{cat}<br>Score: {val:.2f} (Norm: {norm_val:.2f})")
956
+
957
+ if r_values:
958
+ r_values.append(r_values[0])
959
+ theta_values.append(theta_values[0])
960
+ hover_texts.append(hover_texts[0])
961
+
962
+ fig.add_trace(go.Scatterpolar(
963
+ r=r_values,
964
+ theta=theta_values,
965
+ name=model,
966
+ hovertext=hover_texts,
967
+ hoverinfo="text",
968
+ fill='toself'
969
+ ))
970
+
971
+ fig.update_layout(
972
+ polar=dict(
973
+ radialaxis=dict(
974
+ visible=True,
975
+ range=[0, 1]
976
+ )
977
+ ),
978
+ showlegend=True,
979
+ margin=dict(l=80, r=80, t=20, b=20),
980
+ title="Model Comparison Radar (Normalized Scores)"
981
+ )
982
+
983
+ return fig
uv.lock ADDED
The diff for this file is too large to render. See raw diff