|
|
from pathlib import Path |
|
|
from typing import Optional |
|
|
|
|
|
import gradio as gr |
|
|
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns |
|
|
import pandas as pd |
|
|
import re |
|
|
|
|
|
def _slugify(title: str) -> str: |
|
|
return re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-') |
|
|
|
|
|
|
|
|
custom_css = """ |
|
|
/* 全局设置:简洁、高级的字体和背景 */ |
|
|
:root { |
|
|
--color-background-primary: #f8f8f8; /* 浅米白色背景 */ |
|
|
--color-background-secondary: #ffffff; /* 卡片背景 */ |
|
|
--color-text-primary: #333333; |
|
|
--color-accent: #8e80ff; /* 浅紫色强调色 (Primary) */ |
|
|
--color-accent-light: #a99dff; /* 浅紫色悬停色 */ |
|
|
--shadow-medium: 0 4px 12px rgba(0, 0, 0, 0.08); |
|
|
} |
|
|
|
|
|
/* 全局字体:强制使用 Arial */ |
|
|
html, body, .gradio-container, .gradio-container * { |
|
|
font-family: Arial, "Helvetica Neue", Helvetica, "Noto Sans", "PingFang SC", "Microsoft YaHei", sans-serif !important; |
|
|
} |
|
|
|
|
|
body { |
|
|
background-color: var(--color-background-primary) !important; |
|
|
} |
|
|
|
|
|
/* 增加容器最大宽度以展示完整表格 */ |
|
|
.gradio-container { |
|
|
max-width: 1400px; /* 宽度从 1800px 调窄到 1400px */ |
|
|
margin: 0 auto; |
|
|
padding: 20px; |
|
|
} |
|
|
|
|
|
/* 标题样式 */ |
|
|
#space-title { |
|
|
color: var(--color-text-primary); |
|
|
font-size: 3em; |
|
|
font-weight: 700; |
|
|
margin-bottom: 0.5em; |
|
|
padding-top: 20px; |
|
|
} |
|
|
|
|
|
/* Group/Block 组件的卡片样式 */ |
|
|
.gr-group, .gr-block { |
|
|
background-color: var(--color-background-secondary); |
|
|
border-radius: 12px; |
|
|
box-shadow: var(--shadow-medium); |
|
|
transition: box-shadow 0.3s ease; |
|
|
padding: 15px; |
|
|
margin-bottom: 20px; |
|
|
} |
|
|
|
|
|
.gr-group:hover, .gr-block:hover { |
|
|
box-shadow: 0 6px 18px rgba(0, 0, 0, 0.12); |
|
|
} |
|
|
|
|
|
/* Leaderboard 容器:调整内部布局的关键 */ |
|
|
[id^="leaderboard-"] { |
|
|
padding: 0 !important; |
|
|
} |
|
|
|
|
|
/* 搜索栏布局调整 (第一行) */ |
|
|
.leaderboard_root > div:nth-child(1) { |
|
|
padding: 0 15px 15px 15px; |
|
|
} |
|
|
|
|
|
/* 过滤器和列选择布局调整 (第二行) */ |
|
|
.leaderboard_root > div:nth-child(2) { |
|
|
display: flex; |
|
|
padding: 0 15px 15px 15px; |
|
|
} |
|
|
|
|
|
.leaderboard_root .gr-form { |
|
|
border: none; |
|
|
} |
|
|
|
|
|
/* Search Bar */ |
|
|
#search-bar-table-box { |
|
|
width: 100%; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
#search-bar-table-box > div:first-child { |
|
|
background: none; |
|
|
border: none; |
|
|
} |
|
|
|
|
|
/* === Select Columns to Display: 强制单行展示 === */ |
|
|
/* 定位 SelectColumns 的内部复选框容器 */ |
|
|
.leaderboard-filter-column:first-child .gr-form-checkbox-group { |
|
|
/* 使用 flex 容器 */ |
|
|
display: flex !important; |
|
|
flex-wrap: nowrap !important; /* 强制不换行 */ |
|
|
overflow-x: auto !important; /* 允许水平滚动 */ |
|
|
gap: 10px; |
|
|
padding-bottom: 5px; |
|
|
} |
|
|
|
|
|
/* 确保每个复选框标签保持内联块级元素 */ |
|
|
.leaderboard-filter-column:first-child .gr-form-checkbox-group label { |
|
|
flex-shrink: 0 !important; /* 防止选项被压缩 */ |
|
|
display: inline-block !important; /* 确保每个选项占据其自然宽度 */ |
|
|
margin: 0; |
|
|
white-space: nowrap; /* 确保文字也不换行 */ |
|
|
} |
|
|
|
|
|
#leaderboard-table, #leaderboard-table-lite { |
|
|
margin-top: 15px; |
|
|
border-radius: 8px; |
|
|
overflow: hidden; |
|
|
} |
|
|
|
|
|
#leaderboard-table th { |
|
|
background-color: var(--color-accent); |
|
|
color: white; |
|
|
font-weight: 600; |
|
|
text-transform: uppercase; |
|
|
border-bottom: 2px solid var(--color-accent-light); |
|
|
} |
|
|
|
|
|
#leaderboard-table tr:hover { |
|
|
background-color: #f0f0f0; |
|
|
cursor: pointer; |
|
|
transition: background-color 0.2s ease; |
|
|
} |
|
|
|
|
|
#leaderboard-table td:nth-child(2), |
|
|
#leaderboard-table th:nth-child(2) { |
|
|
max-width: 400px; |
|
|
overflow: auto; |
|
|
white-space: nowrap; |
|
|
} |
|
|
|
|
|
#leaderboard-table td:nth-child(3) { |
|
|
font-weight: bold; |
|
|
color: var(--color-accent); |
|
|
} |
|
|
|
|
|
/* Citation 区域 */ |
|
|
#citation-group { |
|
|
padding: 20px; |
|
|
margin-top: 10px; |
|
|
} |
|
|
|
|
|
#citation-button { |
|
|
margin-top: 0; |
|
|
padding: 0; |
|
|
} |
|
|
|
|
|
/* 修复 Citation 复制图标重叠问题 */ |
|
|
#citation-button label { |
|
|
display: block; |
|
|
position: relative; |
|
|
} |
|
|
|
|
|
#citation-button textarea { |
|
|
font-family: Arial, "Helvetica Neue", Helvetica, "Noto Sans", "PingFang SC", "Microsoft YaHei", sans-serif !important; |
|
|
background-color: #f1f1f1; |
|
|
border: 1px solid #cccccc; |
|
|
border-radius: 6px; |
|
|
padding: 10px; |
|
|
padding-right: 40px !important; /* 为复制按钮腾出空间 */ |
|
|
font-size: 14px !important; |
|
|
width: 100% !important; |
|
|
box-sizing: border-box; |
|
|
} |
|
|
|
|
|
/* 调整复制按钮的位置 */ |
|
|
#citation-button > label > button { |
|
|
position: absolute; |
|
|
top: 10px; |
|
|
right: 10px; |
|
|
margin: 0; |
|
|
transform: scale(1.1); |
|
|
transition: transform 0.2s ease; |
|
|
background-color: var(--color-accent) !important; |
|
|
color: white !important; |
|
|
border: none !important; |
|
|
border-radius: 6px; |
|
|
z-index: 10; |
|
|
} |
|
|
|
|
|
#citation-button > label > button:hover { |
|
|
transform: scale(1.2); |
|
|
background-color: var(--color-accent-light) !important; |
|
|
} |
|
|
|
|
|
/* Leaderboard 内部过滤/选择组件微调 */ |
|
|
.leaderboard_root .leaderboard-filter-column:last-child { |
|
|
flex-grow: 1; |
|
|
max-width: 50%; |
|
|
} |
|
|
|
|
|
.leaderboard_root .leaderboard-filter-column:first-child { |
|
|
max-width: 50%; |
|
|
padding-right: 20px; |
|
|
} |
|
|
|
|
|
/* 其他 Gradio 元素的简洁化 */ |
|
|
.wrap-inner input[type="text"], .wrap-inner input[type="number"] { |
|
|
border-radius: 6px; |
|
|
border: 1px solid #cccccc; |
|
|
padding: 8px 12px; |
|
|
} |
|
|
|
|
|
/* ==== Score bar cells ==== */ |
|
|
.leaderboard-cell-bar { |
|
|
position: relative; |
|
|
display: block; |
|
|
width: 100%; |
|
|
height: 28px; |
|
|
line-height: 28px; |
|
|
background: #f5f3ff; /* light purple background */ |
|
|
border-radius: 8px; |
|
|
overflow: hidden; |
|
|
padding-left: 38px; /* leave room for dot */ |
|
|
color: #1d1b84; /* dark purple text */ |
|
|
font-weight: 600; |
|
|
} |
|
|
.leaderboard-cell-bar .bar-fill { |
|
|
position: absolute; |
|
|
left: 0; |
|
|
top: 0; |
|
|
height: 100%; |
|
|
width: var(--w, 0%); |
|
|
background: linear-gradient(90deg, #6c5ce7 0%, #a29bfe 100%); |
|
|
opacity: 0.25; |
|
|
} |
|
|
.leaderboard-cell-bar .bar-dot { |
|
|
position: absolute; |
|
|
left: 10px; |
|
|
top: 50%; |
|
|
transform: translateY(-50%); |
|
|
width: 12px; |
|
|
height: 12px; |
|
|
border-radius: 50%; |
|
|
background: #3c1be3; |
|
|
box-shadow: 0 0 0 4px rgba(60, 27, 227, 0.08); |
|
|
} |
|
|
.leaderboard-cell-bar .bar-text { |
|
|
position: relative; |
|
|
z-index: 1; |
|
|
padding-right: 10px; |
|
|
} |
|
|
""" |
|
|
|
|
|
TITLE = """<h1 align="center" id="space-title">SciEval Leaderboards 🏆</h1>""" |
|
|
INFO = """<p align="center"> |
|
|
<a href="https://huggingface.co/datasets/InternScience/SciEval"><b>HuggingFace</b></a> · |
|
|
<a href="https://github.com/InternScience/SciEvalKit"><b>GitHub</b></a> |
|
|
</p>""" |
|
|
|
|
|
CITATION_BUTTON_LABEL = "📖 Citation" |
|
|
CITATION_BUTTON_TEXT = r""" |
|
|
@article{scieval2025, |
|
|
title={SciEvalKit: An Open-source Evaluation Toolkit for Scientific General Intelligence}, |
|
|
author={SciPrismaX Team}, |
|
|
journal={arXiv preprint}, |
|
|
year={2025} |
|
|
} |
|
|
""" |
|
|
|
|
|
LEADERBOARD_FILES = [ |
|
|
("Large Language Model Scientific Capability", "Large Language Model Scientific Capability.csv"), |
|
|
("Multimodal Model Scientific Capability", "Multimodal Model Scientific Capability.csv"), |
|
|
("Multimodal Model Disciplinary Leaderboard", "Multimodal Model Disciplinary Leaderboard.csv"), |
|
|
] |
|
|
|
|
|
|
|
|
def strip_auxiliary_columns(df: pd.DataFrame) -> pd.DataFrame: |
|
|
"""Remove unnamed columns that come from spreadsheet index exports.""" |
|
|
return df.loc[:, ~df.columns.str.contains("^Unnamed")] |
|
|
|
|
|
|
|
|
def find_sort_column(df: pd.DataFrame) -> Optional[str]: |
|
|
"""Pick a sensible default sort column.""" |
|
|
preferred = ["overall", "score", "avg", "average"] |
|
|
for col in df.columns: |
|
|
if col.lower() in preferred and pd.api.types.is_numeric_dtype(df[col]): |
|
|
return col |
|
|
numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])] |
|
|
return numeric_cols[0] if numeric_cols else None |
|
|
|
|
|
|
|
|
def _percent_widths(series: pd.Series) -> pd.Series: |
|
|
"""Compute a 0-100 width for a numeric series.""" |
|
|
s = series.astype(float) |
|
|
|
|
|
if s.min() >= 0 and s.max() <= 100: |
|
|
return s |
|
|
|
|
|
if s.min() >= 0 and s.max() <= 1.0: |
|
|
return s * 100.0 |
|
|
|
|
|
rng = s.max() - s.min() |
|
|
if rng == 0: |
|
|
return pd.Series([50.0] * len(s), index=s.index) |
|
|
return (s - s.min()) / rng * 100.0 |
|
|
|
|
|
|
|
|
def add_bar_cells(df: pd.DataFrame, exclude: Optional[list[str]] = None) -> tuple[pd.DataFrame, set[str]]: |
|
|
""" |
|
|
Convert numeric score columns to HTML with a bar background. |
|
|
Returns a new DataFrame and the set of columns that were converted. |
|
|
""" |
|
|
exclude = set((exclude or [])) |
|
|
|
|
|
exclude |= {"Model", "Type", "Parameters"} |
|
|
out = df.copy() |
|
|
converted: set[str] = set() |
|
|
for col in out.columns: |
|
|
if col in exclude: |
|
|
continue |
|
|
if pd.api.types.is_numeric_dtype(out[col]): |
|
|
widths = _percent_widths(out[col]) |
|
|
|
|
|
formatted = [] |
|
|
for val, w in zip(out[col], widths): |
|
|
try: |
|
|
disp = f"{float(val):.2f}" |
|
|
except Exception: |
|
|
disp = str(val) |
|
|
html = ( |
|
|
f'<div class="leaderboard-cell-bar" style="--w:{max(0.0, min(100.0, float(w))):.2f}%">' |
|
|
f'<span class="bar-fill"></span>' |
|
|
f'<span class="bar-dot"></span>' |
|
|
f'<span class="bar-text">{disp}</span>' |
|
|
f"</div>" |
|
|
) |
|
|
formatted.append(html) |
|
|
out[col] = formatted |
|
|
converted.add(col) |
|
|
return out, converted |
|
|
|
|
|
|
|
|
def load_leaderboard_csv(path: Path) -> pd.DataFrame: |
|
|
"""Read and clean a leaderboard CSV.""" |
|
|
df = pd.read_csv(path) |
|
|
df = strip_auxiliary_columns(df) |
|
|
df.columns = [col.strip() for col in df.columns] |
|
|
|
|
|
numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])] |
|
|
if numeric_cols: |
|
|
df[numeric_cols] = df[numeric_cols].round(2) |
|
|
|
|
|
sort_col = find_sort_column(df) |
|
|
if sort_col: |
|
|
df = df.sort_values(by=sort_col, ascending=False) |
|
|
|
|
|
return df.reset_index(drop=True) |
|
|
|
|
|
|
|
|
def safe_load(title: str, path: Path) -> tuple[str, pd.DataFrame]: |
|
|
"""Load a leaderboard but keep the app running if the CSV is missing or malformed.""" |
|
|
try: |
|
|
df = load_leaderboard_csv(path) |
|
|
except Exception as exc: |
|
|
print(f"[leaderboard] Failed to load {path}: {exc}") |
|
|
df = pd.DataFrame( |
|
|
{ |
|
|
"Status": [ |
|
|
f"Upload a CSV named '{path.name}' to populate the '{title}' leaderboard. " |
|
|
f"Error: {exc}" |
|
|
] |
|
|
} |
|
|
) |
|
|
return title, df |
|
|
|
|
|
|
|
|
def build_datatypes(df: pd.DataFrame, html_cols: Optional[set[str]] = None) -> list[str]: |
|
|
"""Build the datatype list for gradio_leaderboard. |
|
|
Columns we bar-render should be treated as markdown so inline HTML is rendered. |
|
|
""" |
|
|
html_cols = html_cols or set() |
|
|
dtypes: list[str] = [] |
|
|
for col in df.columns: |
|
|
if col in html_cols: |
|
|
|
|
|
dtypes.append("markdown") |
|
|
else: |
|
|
dtypes.append("number" if pd.api.types.is_numeric_dtype(df[col]) else "str") |
|
|
return dtypes |
|
|
|
|
|
|
|
|
def discover_leaderboards(config: list[tuple[str, str]]) -> list[tuple[str, pd.DataFrame]]: |
|
|
"""Load configured leaderboards; if a file is renamed, fall back to any other CSVs in the folder.""" |
|
|
configured_paths = [(title, Path(filename)) for title, filename in config] |
|
|
configured_names = {Path(filename).name for _, filename in config} |
|
|
|
|
|
|
|
|
boards: list[tuple[str, pd.DataFrame]] = [safe_load(title, path) for title, path in configured_paths] |
|
|
|
|
|
|
|
|
extra_csvs = [ |
|
|
path |
|
|
for path in sorted(Path(".").glob("*.csv")) |
|
|
if path.name not in configured_names |
|
|
] |
|
|
for path in extra_csvs: |
|
|
boards.append(safe_load(path.stem, path)) |
|
|
|
|
|
return boards |
|
|
|
|
|
|
|
|
leaderboards = discover_leaderboards(LEADERBOARD_FILES) |
|
|
|
|
|
required_filenames_md = "\n".join([f" - `{filename}`" for _, filename in LEADERBOARD_FILES]) |
|
|
|
|
|
demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft()) |
|
|
with demo: |
|
|
gr.HTML(TITLE) |
|
|
gr.HTML(INFO) |
|
|
|
|
|
|
|
|
for lb_title, df in leaderboards: |
|
|
with gr.Group(): |
|
|
centered_titles = { |
|
|
"Large Language Model Scientific Capability", |
|
|
"Multimodal Model Scientific Capability", |
|
|
"Multimodal Model Disciplinary Leaderboard", |
|
|
} |
|
|
if lb_title.strip() in centered_titles: |
|
|
gr.HTML(f'<h2 style="text-align:center; font-weight:700; margin: 0.2em 0;">{lb_title}</h2>') |
|
|
else: |
|
|
gr.Markdown(f"## {lb_title}") |
|
|
|
|
|
df_render, html_cols = add_bar_cells(df) |
|
|
Leaderboard( |
|
|
value=df_render, |
|
|
elem_id=f"leaderboard-{_slugify(lb_title)}", |
|
|
datatype=build_datatypes(df_render, html_cols), |
|
|
select_columns=SelectColumns( |
|
|
default_selection=list(df_render.columns), |
|
|
cant_deselect=[c for c in ("Model", "Type") if c in df_render.columns], |
|
|
label="Select columns to display:", |
|
|
), |
|
|
search_columns=["Model"] if "Model" in df_render.columns else [df_render.columns[0]], |
|
|
filter_columns=( |
|
|
[ColumnFilter("Type", type="checkboxgroup", label="Model Types:")] |
|
|
if "Type" in df_render.columns else [] |
|
|
), |
|
|
interactive=False, |
|
|
) |
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
with gr.Group(elem_id="citation-group"): |
|
|
gr.Textbox( |
|
|
value=CITATION_BUTTON_TEXT, |
|
|
label=CITATION_BUTTON_LABEL, |
|
|
lines=CITATION_BUTTON_TEXT.count("\n") + 1, |
|
|
elem_id="citation-button", |
|
|
show_copy_button=True, |
|
|
interactive=False, |
|
|
) |
|
|
|
|
|
demo.queue(default_concurrency_limit=40).launch() |
|
|
|