| import gradio as gr | |
| from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns | |
| import pandas as pd | |
| from about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| TITLE, | |
| ) | |
| from css_html_js import custom_css | |
| LEADERBOARD_DATA = [ | |
| {"name": "Intern-S1", "type": "Open", "scores": [15.74, 38.09, 28.79, 29.02, 28.87]}, | |
| {"name": "Intern-S1-mini", "type": "Open", "scores": [11.06, 36.04, 16.97, 12.42, 16.84]}, | |
| {"name": "Qwen3-VL-235B-A22B", "type": "Open", "scores": [11.97, 39.28, 28.41, 30.30, 31.62]}, | |
| {"name": "Qwen3-Max", "type": "Open", "scores": [15.38, 39.83, 33.21, 33.62, 37.80]}, | |
| {"name": "Qwen3-8B", "type": "Open", "scores": [8.18, 35.78, 18.45, 9.96, 23.37]}, | |
| {"name": "Llama-4-Scout", "type": "Open", "scores": [7.86, 29.72, 20.37, 21.66, 25.77]}, | |
| {"name": "GPT-4o", "type": "Closed", "scores": [7.86, 35.95, 26.94, 31.31, 32.30]}, | |
| {"name": "GPT-4.1", "type": "Closed", "scores": [11.32, 36.49, 34.32, 36.63, 38.49]}, | |
| {"name": "GPT-5", "type": "Closed", "scores": [14.47, 55.40, 29.89, 16.31, 38.14]}, | |
| {"name": "GPT-5.1", "type": "Closed", "scores": [11.64, 47.12, 31.00, 22.77, 34.02]}, | |
| {"name": "o3", "type": "Closed", "scores": [12.89, 46.07, 31.73, 30.04, 32.65]}, | |
| {"name": "o4-mini", "type": "Closed", "scores": [11.95, 40.78, 35.79, 28.86, 33.33]}, | |
| {"name": "Gemini-2.5-Flash", "type": "Closed", "scores": [10.69, 39.13, 21.03, 18.55, 34.36]}, | |
| {"name": "Gemini-2.5-Pro", "type": "Closed", "scores": [15.09, 39.95, 22.51, 22.05, 41.24]}, | |
| {"name": "Gemini-3-Pro", "type": "Closed", "scores": [18.48, 39.68, 36.64, 32.45, 41.92]}, | |
| {"name": "Claude-Opus-4.1", "type": "Closed", "scores": [12.93, 40.29, 34.69, 25.38, 38.83]}, | |
| {"name": "Claude-Sonnet-4.5", "type": "Closed", "scores": [13.84, 43.20, 35.79, 30.15, 37.80]}, | |
| {"name": "Grok-4", "type": "Closed", "scores": [13.31, 37.12, 33.71, 29.01, 30.24]}, | |
| ] | |
| def build_leaderboard_df(): | |
| task_cols = ["Deep Research", "Idea Generation", "Dry Experiment", "Wet Experiment", "Experimental Reasoning"] | |
| rows = [] | |
| for item in LEADERBOARD_DATA: | |
| name = item["name"] | |
| type = item["type"] | |
| scores = item["scores"] | |
| row = { | |
| "Model": name, | |
| "Type": type, | |
| "SGI-Score": round(sum(scores) / len(scores), 2), | |
| } | |
| for i, col in enumerate(task_cols): | |
| row[col] = scores[i] | |
| rows.append(row) | |
| cols = ["Model", "Type", "SGI-Score"] + task_cols | |
| df = pd.DataFrame(rows, columns=cols).sort_values(by=["SGI-Score"], ascending=False).round(decimals=2) | |
| return df | |
| LEADERBOARD_DF = build_leaderboard_df() | |
| def init_leaderboard(dataframe): | |
| datatypes = ["str", "str", "number", "number", "number", "number", "number", "number"] | |
| default_selection = ["Model","Type","SGI-Score","Deep Research","Idea Generation","Dry Experiment","Wet Experiment","Experimental Reasoning"] | |
| cant_deselect = ["Model","Type",] | |
| return Leaderboard( | |
| value=dataframe, | |
| datatype=datatypes, | |
| select_columns=SelectColumns( | |
| default_selection=default_selection, | |
| cant_deselect=cant_deselect, | |
| label="Select Columns to Display:", | |
| ), | |
| search_columns=["Model"], | |
| hide_columns=[], | |
| filter_columns=[ColumnFilter("Type", type="checkboxgroup", label="Model types")], | |
| interactive=False, | |
| ) | |
| demo = gr.Blocks(css=custom_css) | |
| with demo: | |
| gr.HTML(TITLE) | |
| leaderboard = init_leaderboard(LEADERBOARD_DF) | |
| with gr.Row(): | |
| with gr.Group(): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| lines=CITATION_BUTTON_TEXT.count('\n')+1, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| demo.queue(default_concurrency_limit=40).launch() | |