deepmage121 commited on
Commit
7ab1991
Β·
1 Parent(s): 0205c53

added exports, fixes to search with a split and other qol

Browse files
Files changed (3) hide show
  1. README.md +5 -4
  2. app.py +108 -9
  3. ui_components.py +26 -2
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Eee Test
3
  emoji: πŸ‘€
 
4
  colorFrom: pink
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.49.1
8
  app_file: app.py
9
- pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Every Eval Ever Space
3
  emoji: πŸ‘€
4
+ python-version: 3.13
5
  colorFrom: pink
6
  colorTo: purple
7
  sdk: gradio
8
+ sdk_version: 6.1.0
9
  app_file: app.py
10
+ pinned: true
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -131,6 +131,72 @@ def get_model_suggestions(query):
131
  return gr.update(choices=matches[:15])
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  load_hf_dataset_on_startup()
135
 
136
  initial_leaderboards = get_available_leaderboards()
@@ -179,6 +245,8 @@ with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css())
179
  )
180
  with gr.Column(scale=1, min_width=120):
181
  refresh_btn = gr.Button("↻ Refresh", variant="secondary", size="sm")
 
 
182
 
183
  search_box = gr.Textbox(
184
  label="Filter",
@@ -225,15 +293,19 @@ with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css())
225
  </div>
226
  """
227
 
 
 
 
 
 
 
228
  with gr.Row(elem_classes="controls-bar"):
229
  with gr.Column(scale=4):
230
- all_models = get_all_model_names()
231
  model_dropdown = gr.Dropdown(
232
- choices=all_models,
233
- label="Search models to add",
234
  interactive=True,
235
  allow_custom_value=False,
236
- filterable=True,
237
  )
238
  with gr.Column(scale=1, min_width=100):
239
  clear_models_btn = gr.Button("Clear All", variant="secondary", size="sm")
@@ -246,6 +318,12 @@ with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css())
246
  elem_classes="selected-models-group"
247
  )
248
 
 
 
 
 
 
 
249
  radar_view = gr.Plot(label="Radar Comparison")
250
  model_card_view = gr.HTML(value=default_compare_html)
251
 
@@ -329,12 +407,19 @@ Submit via GitHub Pull Request:
329
  outputs=[leaderboard_selector]
330
  )
331
 
 
 
 
 
 
 
332
  def add_model_and_compare(selected_model, current_selected):
333
  if not selected_model:
334
  comparison_html, plot = compare_models(current_selected) if current_selected else (default_compare_html, None)
335
  return (
336
  current_selected,
337
- gr.update(value=None),
 
338
  gr.update(choices=current_selected, value=current_selected),
339
  comparison_html,
340
  plot
@@ -347,7 +432,8 @@ Submit via GitHub Pull Request:
347
 
348
  return (
349
  current_selected,
350
- gr.update(value=None),
 
351
  gr.update(choices=current_selected, value=current_selected),
352
  comparison_html,
353
  plot
@@ -360,16 +446,23 @@ Submit via GitHub Pull Request:
360
  def clear_all_models():
361
  return (
362
  [],
363
- gr.update(value=None),
 
364
  gr.update(choices=[], value=[]),
365
  default_compare_html,
366
  None
367
  )
368
 
 
 
 
 
 
 
369
  model_dropdown.select(
370
  fn=add_model_and_compare,
371
  inputs=[model_dropdown, selected_models_state],
372
- outputs=[selected_models_state, model_dropdown, selected_models_group, model_card_view, radar_view]
373
  )
374
 
375
  selected_models_group.change(
@@ -380,7 +473,13 @@ Submit via GitHub Pull Request:
380
 
381
  clear_models_btn.click(
382
  fn=clear_all_models,
383
- outputs=[selected_models_state, model_dropdown, selected_models_group, model_card_view, radar_view]
 
 
 
 
 
 
384
  )
385
 
386
  DATA_DIR.mkdir(exist_ok=True)
 
131
  return gr.update(choices=matches[:15])
132
 
133
 
134
+ def export_leaderboard_to_csv(full_df, selected_leaderboard, search_query, selected_columns):
135
+ """Export the current leaderboard view to CSV."""
136
+ if full_df.empty:
137
+ return None
138
+
139
+ df = full_df.copy()
140
+
141
+ # Apply column selection
142
+ if selected_columns:
143
+ cols = ["Model"] + [c for c in df.columns if c in selected_columns and c != "Model"]
144
+ df = df[cols]
145
+
146
+ # Apply search filter
147
+ if search_query:
148
+ mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
149
+ df = df[mask]
150
+
151
+ # Save to CSV with absolute path
152
+ from pathlib import Path
153
+ import tempfile
154
+ temp_dir = Path(tempfile.gettempdir())
155
+ filename = temp_dir / f"{selected_leaderboard.replace(' ', '_')}_leaderboard.csv"
156
+ df.to_csv(filename, index=False)
157
+ return str(filename)
158
+
159
+
160
+ def export_comparison_to_csv(selected_models):
161
+ """Export model comparison to CSV."""
162
+ if not selected_models:
163
+ return None
164
+
165
+ all_results = {}
166
+ for model_name in selected_models:
167
+ results, _ = search_model_across_leaderboards(model_name)
168
+ if results:
169
+ matched_model = list(results.keys())[0]
170
+ all_results[matched_model] = results[matched_model]
171
+
172
+ if not all_results:
173
+ return None
174
+
175
+ # Build comparison table
176
+ rows = []
177
+ for model_name, model_data in all_results.items():
178
+ for leaderboard_name, data in model_data.items():
179
+ results = data.get("results", {})
180
+ row = {
181
+ "Model": model_name,
182
+ "Leaderboard": leaderboard_name,
183
+ "Developer": data.get("developer"),
184
+ "Params (B)": data.get("params"),
185
+ "Architecture": data.get("architecture"),
186
+ "Precision": data.get("precision")
187
+ }
188
+ row.update(results)
189
+ rows.append(row)
190
+
191
+ df = pd.DataFrame(rows)
192
+ from pathlib import Path
193
+ import tempfile
194
+ temp_dir = Path(tempfile.gettempdir())
195
+ filename = temp_dir / "model_comparison.csv"
196
+ df.to_csv(filename, index=False)
197
+ return str(filename)
198
+
199
+
200
  load_hf_dataset_on_startup()
201
 
202
  initial_leaderboards = get_available_leaderboards()
 
245
  )
246
  with gr.Column(scale=1, min_width=120):
247
  refresh_btn = gr.Button("↻ Refresh", variant="secondary", size="sm")
248
+ with gr.Column(scale=1, min_width=120):
249
+ export_btn = gr.DownloadButton("πŸ“₯ Export CSV", variant="secondary", size="sm")
250
 
251
  search_box = gr.Textbox(
252
  label="Filter",
 
293
  </div>
294
  """
295
 
296
+ model_search_box = gr.Textbox(
297
+ label="Type to search for models",
298
+ placeholder="Start typing model name (e.g., gpt, llama, claude)...",
299
+ interactive=True,
300
+ )
301
+
302
  with gr.Row(elem_classes="controls-bar"):
303
  with gr.Column(scale=4):
 
304
  model_dropdown = gr.Dropdown(
305
+ choices=[],
306
+ label="Select from search results",
307
  interactive=True,
308
  allow_custom_value=False,
 
309
  )
310
  with gr.Column(scale=1, min_width=100):
311
  clear_models_btn = gr.Button("Clear All", variant="secondary", size="sm")
 
318
  elem_classes="selected-models-group"
319
  )
320
 
321
+ with gr.Row():
322
+ with gr.Column(scale=4):
323
+ pass
324
+ with gr.Column(scale=1, min_width=120):
325
+ export_comparison_btn = gr.DownloadButton("πŸ“₯ Export CSV", variant="secondary", size="sm")
326
+
327
  radar_view = gr.Plot(label="Radar Comparison")
328
  model_card_view = gr.HTML(value=default_compare_html)
329
 
 
407
  outputs=[leaderboard_selector]
408
  )
409
 
410
+ export_btn.click(
411
+ fn=export_leaderboard_to_csv,
412
+ inputs=[full_df_state, leaderboard_selector, search_box, column_selector],
413
+ outputs=[export_btn]
414
+ )
415
+
416
  def add_model_and_compare(selected_model, current_selected):
417
  if not selected_model:
418
  comparison_html, plot = compare_models(current_selected) if current_selected else (default_compare_html, None)
419
  return (
420
  current_selected,
421
+ gr.update(value=""),
422
+ gr.update(value=None, choices=[]),
423
  gr.update(choices=current_selected, value=current_selected),
424
  comparison_html,
425
  plot
 
432
 
433
  return (
434
  current_selected,
435
+ gr.update(value=""),
436
+ gr.update(value=None, choices=[]),
437
  gr.update(choices=current_selected, value=current_selected),
438
  comparison_html,
439
  plot
 
446
  def clear_all_models():
447
  return (
448
  [],
449
+ gr.update(value=""),
450
+ gr.update(value=None, choices=[]),
451
  gr.update(choices=[], value=[]),
452
  default_compare_html,
453
  None
454
  )
455
 
456
+ model_search_box.input(
457
+ fn=get_model_suggestions,
458
+ inputs=[model_search_box],
459
+ outputs=[model_dropdown]
460
+ )
461
+
462
  model_dropdown.select(
463
  fn=add_model_and_compare,
464
  inputs=[model_dropdown, selected_models_state],
465
+ outputs=[selected_models_state, model_search_box, model_dropdown, selected_models_group, model_card_view, radar_view]
466
  )
467
 
468
  selected_models_group.change(
 
473
 
474
  clear_models_btn.click(
475
  fn=clear_all_models,
476
+ outputs=[selected_models_state, model_search_box, model_dropdown, selected_models_group, model_card_view, radar_view]
477
+ )
478
+
479
+ export_comparison_btn.click(
480
+ fn=export_comparison_to_csv,
481
+ inputs=[selected_models_state],
482
+ outputs=[export_comparison_btn]
483
  )
484
 
485
  DATA_DIR.mkdir(exist_ok=True)
ui_components.py CHANGED
@@ -177,7 +177,7 @@ body, .gradio-container {
177
 
178
  .metrics-section {
179
  margin-top: 1.25rem;
180
- padding-top: 1.25rem;
181
  border-top: 1px solid #e5e5e5;
182
  }
183
 
@@ -196,6 +196,29 @@ body, .gradio-container {
196
  gap: 0.75rem;
197
  }
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  .metrics-grid .metric-card {
200
  align-self: start;
201
  }
@@ -474,7 +497,7 @@ table tr:hover td {
474
  .dataframe table {
475
  width: 100% !important;
476
  border-collapse: collapse !important;
477
- font-size: 0.95rem !important;
478
  table-layout: auto !important;
479
  background: #ffffff !important;
480
  }
@@ -621,6 +644,7 @@ table tr:hover td {
621
  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
622
  text-align: left !important;
623
  white-space: nowrap !important;
 
624
  }
625
 
626
  .dataframe td:nth-child(2),
 
177
 
178
  .metrics-section {
179
  margin-top: 1.25rem;
180
+ padding: 1.25rem 1rem;
181
  border-top: 1px solid #e5e5e5;
182
  }
183
 
 
196
  gap: 0.75rem;
197
  }
198
 
199
+ @media (max-width: 768px) {
200
+ .metrics-grid {
201
+ grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
202
+ gap: 0.5rem;
203
+ }
204
+
205
+ .metric-card-header {
206
+ padding: 0.65rem 0.8rem;
207
+ flex-direction: column;
208
+ align-items: flex-start;
209
+ gap: 0.25rem;
210
+ }
211
+
212
+ .metric-card-body {
213
+ padding: 0.65rem 0.8rem;
214
+ font-size: 0.85rem;
215
+ }
216
+
217
+ .metrics-section {
218
+ padding: 1rem 0.5rem;
219
+ }
220
+ }
221
+
222
  .metrics-grid .metric-card {
223
  align-self: start;
224
  }
 
497
  .dataframe table {
498
  width: 100% !important;
499
  border-collapse: collapse !important;
500
+ font-size: 0.75rem !important;
501
  table-layout: auto !important;
502
  background: #ffffff !important;
503
  }
 
644
  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important;
645
  text-align: left !important;
646
  white-space: nowrap !important;
647
+ min-width: 80px !important;
648
  }
649
 
650
  .dataframe td:nth-child(2),