File size: 3,914 Bytes
5c07595 eccaf98 5c07595 eccaf98 5c07595 eccaf98 b767870 eccaf98 b767870 eccaf98 5c07595 eccaf98 b767870 5c07595 eccaf98 5c07595 eccaf98 5c07595 eccaf98 5c07595 eccaf98 5c07595 b767870 5c07595 b767870 5c07595 6a84810 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
TITLE,
)
from css_html_js import custom_css
LEADERBOARD_DATA = [
{"name": "Intern-S1", "type": "Open", "scores": [15.74, 38.09, 28.79, 29.02, 28.87]},
{"name": "Intern-S1-mini", "type": "Open", "scores": [11.06, 36.04, 16.97, 12.42, 16.84]},
{"name": "Qwen3-VL-235B-A22B", "type": "Open", "scores": [11.97, 39.28, 28.41, 30.30, 31.62]},
{"name": "Qwen3-Max", "type": "Open", "scores": [15.38, 39.83, 33.21, 33.62, 37.80]},
{"name": "Qwen3-8B", "type": "Open", "scores": [8.18, 35.78, 18.45, 9.96, 23.37]},
{"name": "Llama-4-Scout", "type": "Open", "scores": [7.86, 29.72, 20.37, 21.66, 25.77]},
{"name": "GPT-4o", "type": "Closed", "scores": [7.86, 35.95, 26.94, 31.31, 32.30]},
{"name": "GPT-4.1", "type": "Closed", "scores": [11.32, 36.49, 34.32, 36.63, 38.49]},
{"name": "GPT-5", "type": "Closed", "scores": [14.47, 55.40, 29.89, 16.31, 38.14]},
{"name": "GPT-5.1", "type": "Closed", "scores": [11.64, 47.12, 31.00, 22.77, 34.02]},
{"name": "o3", "type": "Closed", "scores": [12.89, 46.07, 31.73, 30.04, 32.65]},
{"name": "o4-mini", "type": "Closed", "scores": [11.95, 40.78, 35.79, 28.86, 33.33]},
{"name": "Gemini-2.5-Flash", "type": "Closed", "scores": [10.69, 39.13, 21.03, 18.55, 34.36]},
{"name": "Gemini-2.5-Pro", "type": "Closed", "scores": [15.09, 39.95, 22.51, 22.05, 41.24]},
{"name": "Gemini-3-Pro", "type": "Closed", "scores": [18.48, 39.68, 36.64, 32.45, 41.92]},
{"name": "Claude-Opus-4.1", "type": "Closed", "scores": [12.93, 40.29, 34.69, 25.38, 38.83]},
{"name": "Claude-Sonnet-4.5", "type": "Closed", "scores": [13.84, 43.20, 35.79, 30.15, 37.80]},
{"name": "Grok-4", "type": "Closed", "scores": [13.31, 37.12, 33.71, 29.01, 30.24]},
]
def build_leaderboard_df():
task_cols = ["Deep Research", "Idea Generation", "Dry Experiment", "Wet Experiment", "Experimental Reasoning"]
rows = []
for item in LEADERBOARD_DATA:
name = item["name"]
type = item["type"]
scores = item["scores"]
row = {
"Model": name,
"Type": type,
"SGI-Score": round(sum(scores) / len(scores), 2),
}
for i, col in enumerate(task_cols):
row[col] = scores[i]
rows.append(row)
cols = ["Model", "Type", "SGI-Score"] + task_cols
df = pd.DataFrame(rows, columns=cols).sort_values(by=["SGI-Score"], ascending=False).round(decimals=2)
return df
LEADERBOARD_DF = build_leaderboard_df()
def init_leaderboard(dataframe):
datatypes = ["str", "str", "number", "number", "number", "number", "number", "number"]
default_selection = ["Model","Type","SGI-Score","Deep Research","Idea Generation","Dry Experiment","Wet Experiment","Experimental Reasoning"]
cant_deselect = ["Model","Type",]
return Leaderboard(
value=dataframe,
datatype=datatypes,
select_columns=SelectColumns(
default_selection=default_selection,
cant_deselect=cant_deselect,
label="Select Columns to Display:",
),
search_columns=["Model"],
hide_columns=[],
filter_columns=[ColumnFilter("Type", type="checkboxgroup", label="Model types")],
interactive=False,
)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
leaderboard = init_leaderboard(LEADERBOARD_DF)
with gr.Row():
with gr.Group():
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=CITATION_BUTTON_TEXT.count('\n')+1,
elem_id="citation-button",
show_copy_button=True,
)
demo.queue(default_concurrency_limit=40).launch()
|