Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from pathlib import Path | |
| from data_loader import ( | |
| load_hf_dataset_on_startup, | |
| get_available_leaderboards, | |
| get_eval_metadata, | |
| build_leaderboard_table, | |
| clear_cache, | |
| search_model_across_leaderboards, | |
| get_model_suggestions_fast, | |
| DATA_DIR | |
| ) | |
| from ui_components import ( | |
| get_theme, | |
| get_custom_css, | |
| format_leaderboard_header, | |
| format_metric_details, | |
| format_model_card, | |
| format_model_comparison, | |
| create_radar_plot, | |
| ) | |
| PAGE_SIZE = 50 | |
| def get_leaderboard_data(selected_leaderboard, progress=gr.Progress()): | |
| if not selected_leaderboard: | |
| return pd.DataFrame(), {} | |
| metadata = get_eval_metadata(selected_leaderboard) | |
| def progress_callback(value, desc): | |
| progress(value, desc=desc) | |
| df = build_leaderboard_table(selected_leaderboard, "", progress_callback) | |
| return df, metadata | |
| def filter_and_paginate(df, search_query, sort_column, selected_columns, current_page): | |
| if df.empty: | |
| return df.copy(), 1, 1 | |
| df = df.copy() | |
| all_columns = list(df.columns) | |
| if selected_columns: | |
| cols = ["Model"] + [c for c in all_columns if c in selected_columns and c != "Model"] | |
| df = df[cols] | |
| if search_query: | |
| mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1) | |
| df = df[mask] | |
| if sort_column and sort_column in df.columns: | |
| df = df.sort_values(by=sort_column, ascending=False, na_position='last') | |
| total_rows = len(df) | |
| total_pages = max(1, (total_rows + PAGE_SIZE - 1) // PAGE_SIZE) | |
| current_page = max(1, min(current_page, total_pages)) | |
| start = (current_page - 1) * PAGE_SIZE | |
| end = start + PAGE_SIZE | |
| return df.iloc[start:end], current_page, total_pages | |
| def search_model(model_query): | |
| if not model_query or len(model_query) < 2: | |
| return """ | |
| <div class="no-results"> | |
| <h3>Search for a model</h3> | |
| <p>Enter a model name to see its benchmarks across all leaderboards</p> | |
| </div> | |
| """ | |
| results, _ = search_model_across_leaderboards(model_query) | |
| if not results: | |
| return f""" | |
| <div class="no-results"> | |
| <h3>No results for "{model_query}"</h3> | |
| <p>Try a different model name or check the spelling</p> | |
| </div> | |
| """ | |
| model_name = list(results.keys())[0] | |
| model_data = results[model_name] | |
| return format_model_card(model_name, model_data) | |
| def compare_models(selected_models): | |
| if not selected_models: | |
| return """ | |
| <div class="no-results"> | |
| <h3>Select models to compare</h3> | |
| <p>Choose multiple models from the dropdown to see a side-by-side comparison</p> | |
| </div> | |
| """, None | |
| all_results = {} | |
| for model_name in selected_models: | |
| results, _ = search_model_across_leaderboards(model_name) | |
| if results: | |
| matched_model = list(results.keys())[0] | |
| all_results[matched_model] = results[matched_model] | |
| plot = create_radar_plot(list(all_results.keys()), all_results) | |
| if len(all_results) == 1: | |
| model_name = list(all_results.keys())[0] | |
| return format_model_card(model_name, all_results[model_name]), plot | |
| elif len(all_results) > 1: | |
| return format_model_comparison(list(all_results.keys()), all_results), plot | |
| else: | |
| return """ | |
| <div class="no-results"> | |
| <h3>No results found</h3> | |
| <p>Try selecting different models</p> | |
| </div> | |
| """, None | |
| def get_model_suggestions(value): | |
| query = value or "" | |
| if not query or len(query) < 2: | |
| return gr.update(choices=[], value=[]) | |
| matches = get_model_suggestions_fast(query, limit=10) | |
| return gr.update(choices=matches, value=[]) | |
| def export_leaderboard_to_csv(full_df, selected_leaderboard, search_query, selected_columns): | |
| """Export the current leaderboard view to CSV.""" | |
| if full_df.empty: | |
| return None | |
| df = full_df.copy() | |
| # Apply column selection | |
| if selected_columns: | |
| cols = ["Model"] + [c for c in df.columns if c in selected_columns and c != "Model"] | |
| df = df[cols] | |
| # Apply search filter | |
| if search_query: | |
| mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1) | |
| df = df[mask] | |
| # Save to CSV with absolute path | |
| from pathlib import Path | |
| import tempfile | |
| temp_dir = Path(tempfile.gettempdir()) | |
| filename = temp_dir / f"{selected_leaderboard.replace(' ', '_')}_leaderboard.csv" | |
| df.to_csv(filename, index=False) | |
| return str(filename) | |
| def export_comparison_to_csv(selected_models): | |
| """Export model comparison to CSV.""" | |
| if not selected_models: | |
| return None | |
| all_results = {} | |
| for model_name in selected_models: | |
| results, _ = search_model_across_leaderboards(model_name) | |
| if results: | |
| matched_model = list(results.keys())[0] | |
| all_results[matched_model] = results[matched_model] | |
| if not all_results: | |
| return None | |
| # Build comparison table | |
| rows = [] | |
| for model_name, model_data in all_results.items(): | |
| for leaderboard_name, data in model_data.items(): | |
| results = data.get("results", {}) | |
| row = { | |
| "Model": model_name, | |
| "Leaderboard": leaderboard_name, | |
| "Developer": data.get("developer"), | |
| "Params (B)": data.get("params"), | |
| "Architecture": data.get("architecture"), | |
| "Precision": data.get("precision") | |
| } | |
| row.update(results) | |
| rows.append(row) | |
| df = pd.DataFrame(rows) | |
| from pathlib import Path | |
| import tempfile | |
| temp_dir = Path(tempfile.gettempdir()) | |
| filename = temp_dir / "model_comparison.csv" | |
| df.to_csv(filename, index=False) | |
| return str(filename) | |
| load_hf_dataset_on_startup() | |
| initial_leaderboards = get_available_leaderboards() | |
| initial_leaderboard = initial_leaderboards[0] if initial_leaderboards else None | |
| if initial_leaderboard: | |
| _init_df, _init_metadata = get_leaderboard_data(initial_leaderboard) | |
| _init_columns = [c for c in _init_df.columns if c != "Model"] if not _init_df.empty else [] | |
| _init_df_display, _, _init_total_pages = filter_and_paginate(_init_df, "", "Average", None, 1) | |
| else: | |
| _init_df = pd.DataFrame() | |
| _init_metadata = {} | |
| _init_columns = [] | |
| _init_df_display = pd.DataFrame() | |
| _init_total_pages = 1 | |
| with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css()) as demo: | |
| full_df_state = gr.State(value=_init_df) | |
| metadata_state = gr.State(value=_init_metadata) | |
| current_page_state = gr.State(value=1) | |
| gr.HTML(""" | |
| <div class="app-header"> | |
| <div class="logo-mark">E³</div> | |
| <div class="brand"> | |
| <h1>Every Eval Ever</h1> | |
| <span class="tagline">Browse and compare model benchmarks</span> | |
| </div> | |
| <div class="header-right"> | |
| <span class="version-badge">beta</span> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| with gr.TabItem("Leaderboards"): | |
| with gr.Column(elem_classes="controls-bar"): | |
| with gr.Row(): | |
| with gr.Column(scale=4, min_width=260): | |
| leaderboard_selector = gr.Dropdown( | |
| choices=initial_leaderboards, | |
| value=initial_leaderboard, | |
| label="Leaderboard", | |
| interactive=True | |
| ) | |
| with gr.Column(scale=1, min_width=120): | |
| refresh_btn = gr.Button("↻ Refresh", variant="secondary", size="sm") | |
| with gr.Column(scale=1, min_width=120): | |
| export_btn = gr.DownloadButton("📥 Export CSV", variant="secondary", size="sm") | |
| search_box = gr.Textbox( | |
| label="Filter", | |
| placeholder="Filter models...", | |
| show_label=True | |
| ) | |
| header_view = gr.HTML(value=format_leaderboard_header(initial_leaderboard, _init_metadata)) | |
| with gr.Row(elem_classes="column-selector-bar"): | |
| with gr.Column(scale=5, min_width=320): | |
| column_selector = gr.Dropdown( | |
| choices=_init_columns, | |
| value=_init_columns, | |
| label="Columns to Display", | |
| multiselect=True, | |
| interactive=True, | |
| elem_classes="column-selector-dropdown" | |
| ) | |
| leaderboard_table = gr.Dataframe( | |
| value=_init_df_display, | |
| label=None, | |
| interactive=False, | |
| wrap=False, | |
| elem_classes="dataframe", | |
| ) | |
| with gr.Row(elem_classes="pagination-bar"): | |
| prev_btn = gr.Button("←", variant="secondary", size="sm", min_width=60) | |
| page_info = gr.Markdown(value=f"1 / {_init_total_pages}", elem_classes="page-info") | |
| next_btn = gr.Button("→", variant="secondary", size="sm", min_width=60) | |
| metrics_view = gr.HTML(value=format_metric_details(initial_leaderboard, _init_metadata)) | |
| with gr.TabItem("🔍 Model Lookup"): | |
| gr.Markdown("### Find and compare models across all leaderboards") | |
| selected_models_state = gr.State(value=[]) | |
| default_compare_html = """ | |
| <div class="no-results"> | |
| <h3>Search for models to compare</h3> | |
| <p>Type in the dropdown to search, then select a model to add it</p> | |
| </div> | |
| """ | |
| with gr.Row(elem_classes="controls-bar"): | |
| with gr.Column(scale=3): | |
| model_search_box = gr.Textbox( | |
| label="Search models", | |
| placeholder="Type model name...", | |
| interactive=True, | |
| ) | |
| with gr.Column(scale=1, min_width=120): | |
| search_button = gr.Button("Search", variant="secondary") | |
| with gr.Column(scale=3): | |
| search_results = gr.CheckboxGroup( | |
| choices=[], | |
| label="Top matches (select to add)", | |
| interactive=True, | |
| value=[], | |
| elem_classes=["match-pills"], | |
| ) | |
| with gr.Column(scale=1, min_width=80): | |
| clear_models_btn = gr.Button("Clear", variant="secondary", size="sm") | |
| selected_models_group = gr.CheckboxGroup( | |
| choices=[], | |
| value=[], | |
| label="Selected Models (click to remove)", | |
| interactive=True, | |
| elem_classes="selected-models-group" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| pass | |
| with gr.Column(scale=1, min_width=120): | |
| export_comparison_btn = gr.DownloadButton("📥 Export CSV", variant="secondary", size="sm") | |
| radar_view = gr.Plot(label="Radar Comparison") | |
| model_card_view = gr.HTML(value=default_compare_html) | |
| with gr.Accordion("📤 How to Submit Data", open=False): | |
| gr.Markdown(""" | |
| Submit via GitHub Pull Request: | |
| 1. Fork [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever) | |
| 2. Add JSON files to `data/<leaderboard>/<developer>/<model>/` | |
| 3. Open a PR - automated validation runs on submission | |
| 4. After merge, data syncs to HuggingFace automatically | |
| [Submission Guide](https://github.com/evaleval/every_eval_ever#contributor-guide) - [JSON Schema](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json) | |
| """) | |
| def load_leaderboard(leaderboard_name): | |
| df, metadata = get_leaderboard_data(leaderboard_name) | |
| columns = [c for c in df.columns if c != "Model"] if not df.empty else [] | |
| df_display, page, total_pages = filter_and_paginate(df, "", "Average", None, 1) | |
| return ( | |
| df, # full_df_state | |
| metadata, # metadata_state | |
| 1, # current_page_state | |
| df_display, # leaderboard_table | |
| format_leaderboard_header(leaderboard_name, metadata), # header_view | |
| format_metric_details(leaderboard_name, metadata), # metrics_view | |
| gr.update(choices=columns, value=columns), # column_selector | |
| f"1 / {total_pages}", # page_info | |
| ) | |
| def update_table(full_df, search_query, selected_columns, current_page): | |
| df_display, page, total_pages = filter_and_paginate( | |
| full_df, search_query, "Average", selected_columns, current_page | |
| ) | |
| return df_display, f"{page} / {total_pages}", page | |
| def go_page(full_df, search_query, selected_columns, current_page, delta): | |
| new_page = max(1, current_page + delta) | |
| df_display, page, total_pages = filter_and_paginate( | |
| full_df, search_query, "Average", selected_columns, new_page | |
| ) | |
| return df_display, f"{page} / {total_pages}", page | |
| leaderboard_selector.change( | |
| fn=load_leaderboard, | |
| inputs=[leaderboard_selector], | |
| outputs=[full_df_state, metadata_state, current_page_state, leaderboard_table, header_view, metrics_view, column_selector, page_info] | |
| ) | |
| search_box.input( | |
| fn=lambda df, q, cols: update_table(df, q, cols, 1), | |
| inputs=[full_df_state, search_box, column_selector], | |
| outputs=[leaderboard_table, page_info, current_page_state] | |
| ) | |
| def on_column_change(df, q, cols): | |
| if not cols: | |
| cols = [c for c in df.columns if c != "Model"] | |
| return update_table(df, q, cols, 1) | |
| column_selector.change( | |
| fn=on_column_change, | |
| inputs=[full_df_state, search_box, column_selector], | |
| outputs=[leaderboard_table, page_info, current_page_state] | |
| ) | |
| prev_btn.click( | |
| fn=lambda df, q, cols, p: go_page(df, q, cols, p, -1), | |
| inputs=[full_df_state, search_box, column_selector, current_page_state], | |
| outputs=[leaderboard_table, page_info, current_page_state] | |
| ) | |
| next_btn.click( | |
| fn=lambda df, q, cols, p: go_page(df, q, cols, p, 1), | |
| inputs=[full_df_state, search_box, column_selector, current_page_state], | |
| outputs=[leaderboard_table, page_info, current_page_state] | |
| ) | |
| refresh_btn.click( | |
| fn=lambda: (clear_cache(), gr.update(choices=get_available_leaderboards()))[1], | |
| outputs=[leaderboard_selector] | |
| ) | |
| export_btn.click( | |
| fn=export_leaderboard_to_csv, | |
| inputs=[full_df_state, leaderboard_selector, search_box, column_selector], | |
| outputs=[export_btn] | |
| ) | |
| def add_models_from_search(selected_from_results, current_selected): | |
| selected_from_results = selected_from_results or [] | |
| current_selected = current_selected or [] | |
| merged = list(dict.fromkeys(current_selected + selected_from_results)) | |
| comparison_html, plot = compare_models(merged) if merged else (default_compare_html, None) | |
| return ( | |
| merged, | |
| gr.update(choices=[], value=[]), | |
| gr.update(choices=merged, value=merged), | |
| comparison_html, | |
| plot | |
| ) | |
| def update_selection(selected_list): | |
| comparison_html, plot = compare_models(selected_list) if selected_list else (default_compare_html, None) | |
| return selected_list, gr.update(choices=selected_list, value=selected_list), comparison_html, plot | |
| def clear_all_models(): | |
| return ( | |
| [], | |
| gr.update(value=""), | |
| gr.update(choices=[], value=[]), | |
| gr.update(choices=[], value=[]), | |
| default_compare_html, | |
| None | |
| ) | |
| search_button.click( | |
| fn=get_model_suggestions, | |
| inputs=[model_search_box], | |
| outputs=[search_results], | |
| queue=False, | |
| ) | |
| model_search_box.submit( | |
| fn=get_model_suggestions, | |
| inputs=[model_search_box], | |
| outputs=[search_results], | |
| queue=False, | |
| ) | |
| search_results.change( | |
| fn=add_models_from_search, | |
| inputs=[search_results, selected_models_state], | |
| outputs=[selected_models_state, search_results, selected_models_group, model_card_view, radar_view], | |
| ) | |
| selected_models_group.change( | |
| fn=update_selection, | |
| inputs=[selected_models_group], | |
| outputs=[selected_models_state, selected_models_group, model_card_view, radar_view] | |
| ) | |
| clear_models_btn.click( | |
| fn=clear_all_models, | |
| outputs=[selected_models_state, model_search_box, search_results, selected_models_group, model_card_view, radar_view] | |
| ) | |
| export_comparison_btn.click( | |
| fn=export_comparison_to_csv, | |
| inputs=[selected_models_state], | |
| outputs=[export_comparison_btn] | |
| ) | |
| DATA_DIR.mkdir(exist_ok=True) | |
| if __name__ == "__main__": | |
| demo.launch() | |