Spaces:
Running
Running
yangzhitao
commited on
Commit
·
7b47b0b
1
Parent(s):
6b6ce23
refactor: replace truncate function with DataFrame styling for number formatting in app.py and populate.py
Browse files- app.py +17 -18
- src/leaderboard/read_evals.py +4 -3
- src/populate.py +0 -14
app.py
CHANGED
|
@@ -65,26 +65,25 @@ print("///// --- Settings --- /////", settings.model_dump())
|
|
| 65 |
) = get_evaluation_queue_df(settings.EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 66 |
|
| 67 |
|
| 68 |
-
def
|
| 69 |
"""
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
if pd.isna(value) or not isinstance(value, (int, float)):
|
| 73 |
-
return value
|
| 74 |
-
return float(int(value * 10)) / 10
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
def format_dataframe_numbers(df: pd.DataFrame) -> pd.DataFrame:
|
| 78 |
-
"""
|
| 79 |
-
格式化 DataFrame 中的数字列,只保留1位小数并截断
|
| 80 |
"""
|
|
|
|
| 81 |
df = df.copy()
|
|
|
|
| 82 |
for col in df.columns:
|
| 83 |
if col in ['Model', 'T']: # 跳过非数字列
|
| 84 |
continue
|
| 85 |
# 检查是否为数值类型
|
| 86 |
if pd.api.types.is_numeric_dtype(df[col]):
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
return df
|
| 89 |
|
| 90 |
|
|
@@ -202,8 +201,8 @@ def init_leaderboard_tabs(
|
|
| 202 |
precision_filtered_df = filter_dataframe_by_precision(default_precision, original_df)
|
| 203 |
# 根据默认选择再筛选一次 DataFrame
|
| 204 |
initial_filtered_df = filter_dataframe_by_columns(default_selected, precision_filtered_df)
|
| 205 |
-
#
|
| 206 |
-
|
| 207 |
|
| 208 |
with gr.Row():
|
| 209 |
with gr.Column(scale=1):
|
|
@@ -241,7 +240,7 @@ def init_leaderboard_tabs(
|
|
| 241 |
with gr.Row():
|
| 242 |
with gr.Column(scale=3):
|
| 243 |
leaderboard = gr.Dataframe(
|
| 244 |
-
value=
|
| 245 |
interactive=False,
|
| 246 |
wrap=False,
|
| 247 |
datatype='markdown',
|
|
@@ -256,9 +255,9 @@ def init_leaderboard_tabs(
|
|
| 256 |
column_filtered_df = filter_dataframe_by_columns(selected_cols, precision_filtered_df)
|
| 257 |
# 最后按搜索关键词筛选
|
| 258 |
final_df = search_models_in_dataframe(search_text, column_filtered_df)
|
| 259 |
-
#
|
| 260 |
-
|
| 261 |
-
return
|
| 262 |
|
| 263 |
# 绑定搜索、列选择和 precision 的变化事件,动态更新 DataFrame
|
| 264 |
search.change(
|
|
|
|
| 65 |
) = get_evaluation_queue_df(settings.EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 66 |
|
| 67 |
|
| 68 |
+
def format_dataframe_with_styler(df: pd.DataFrame):
|
| 69 |
"""
|
| 70 |
+
使用 pandas Styler 格式化 DataFrame 中的数字列,保留1位小数
|
| 71 |
+
返回 Styler 对象,可以直接传递给 Gradio Dataframe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
| 73 |
+
# 先截断数据(如果需要截断而不是四舍五入)
|
| 74 |
df = df.copy()
|
| 75 |
+
numeric_cols = {}
|
| 76 |
for col in df.columns:
|
| 77 |
if col in ['Model', 'T']: # 跳过非数字列
|
| 78 |
continue
|
| 79 |
# 检查是否为数值类型
|
| 80 |
if pd.api.types.is_numeric_dtype(df[col]):
|
| 81 |
+
# 记录需要格式化的列: 保留1位小数
|
| 82 |
+
numeric_cols[col] = "{:.1f}"
|
| 83 |
+
|
| 84 |
+
# 使用 Styler 格式化显示
|
| 85 |
+
if numeric_cols:
|
| 86 |
+
return df.style.format(numeric_cols)
|
| 87 |
return df
|
| 88 |
|
| 89 |
|
|
|
|
| 201 |
precision_filtered_df = filter_dataframe_by_precision(default_precision, original_df)
|
| 202 |
# 根据默认选择再筛选一次 DataFrame
|
| 203 |
initial_filtered_df = filter_dataframe_by_columns(default_selected, precision_filtered_df)
|
| 204 |
+
# 使用 Styler 格式化数字列,保留1位小数
|
| 205 |
+
initial_styler = format_dataframe_with_styler(initial_filtered_df)
|
| 206 |
|
| 207 |
with gr.Row():
|
| 208 |
with gr.Column(scale=1):
|
|
|
|
| 240 |
with gr.Row():
|
| 241 |
with gr.Column(scale=3):
|
| 242 |
leaderboard = gr.Dataframe(
|
| 243 |
+
value=initial_styler, # 使用 Styler 对象格式化显示
|
| 244 |
interactive=False,
|
| 245 |
wrap=False,
|
| 246 |
datatype='markdown',
|
|
|
|
| 255 |
column_filtered_df = filter_dataframe_by_columns(selected_cols, precision_filtered_df)
|
| 256 |
# 最后按搜索关键词筛选
|
| 257 |
final_df = search_models_in_dataframe(search_text, column_filtered_df)
|
| 258 |
+
# 使用 Styler 格式化数字列,保留1位小数
|
| 259 |
+
final_styler = format_dataframe_with_styler(final_df)
|
| 260 |
+
return final_styler
|
| 261 |
|
| 262 |
# 绑定搜索、列选择和 precision 的变化事件,动态更新 DataFrame
|
| 263 |
search.change(
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -73,9 +73,11 @@ class EvalResult(BaseModel):
|
|
| 73 |
# Precision
|
| 74 |
precision = Precision.from_str(config.model_dtype)
|
| 75 |
|
| 76 |
-
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
| 79 |
org_and_model = org_and_model.split("/", 1)
|
| 80 |
|
| 81 |
if len(org_and_model) == 1:
|
|
@@ -88,7 +90,6 @@ class EvalResult(BaseModel):
|
|
| 88 |
result_key = f"{org}_{model}_{precision.value.name}"
|
| 89 |
full_model = "/".join(org_and_model)
|
| 90 |
|
| 91 |
-
meta_toml = load_meta_toml()
|
| 92 |
# update full_model from meta_toml if it exists
|
| 93 |
if "/" not in full_model:
|
| 94 |
full_model = meta_toml.model_title_to_repo_id.get(full_model, full_model)
|
|
|
|
| 73 |
# Precision
|
| 74 |
precision = Precision.from_str(config.model_dtype)
|
| 75 |
|
| 76 |
+
meta_toml = load_meta_toml()
|
| 77 |
|
| 78 |
+
# Get model and org
|
| 79 |
+
model_key = config.model_name or config.model_args or ""
|
| 80 |
+
org_and_model = meta_toml.model_key_to_repo_id.get(model_key, model_key)
|
| 81 |
org_and_model = org_and_model.split("/", 1)
|
| 82 |
|
| 83 |
if len(org_and_model) == 1:
|
|
|
|
| 90 |
result_key = f"{org}_{model}_{precision.value.name}"
|
| 91 |
full_model = "/".join(org_and_model)
|
| 92 |
|
|
|
|
| 93 |
# update full_model from meta_toml if it exists
|
| 94 |
if "/" not in full_model:
|
| 95 |
full_model = meta_toml.model_title_to_repo_id.get(full_model, full_model)
|
src/populate.py
CHANGED
|
@@ -23,15 +23,6 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
|
| 23 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 24 |
|
| 25 |
|
| 26 |
-
def truncate_to_one_decimal(value):
|
| 27 |
-
"""
|
| 28 |
-
将数字截断到1位小数(不四舍五入)
|
| 29 |
-
"""
|
| 30 |
-
if pd.isna(value) or not isinstance(value, (int, float)):
|
| 31 |
-
return value
|
| 32 |
-
return float(int(value * 10)) / 10
|
| 33 |
-
|
| 34 |
-
|
| 35 |
def get_leaderboard_df(
|
| 36 |
results_path: str,
|
| 37 |
requests_path: str,
|
|
@@ -68,11 +59,6 @@ def get_leaderboard_df(
|
|
| 68 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 69 |
df = df.loc[:, cols]
|
| 70 |
|
| 71 |
-
# 截断数字列到1位小数(不四舍五入)
|
| 72 |
-
for col in df.columns:
|
| 73 |
-
if col not in ['Model', 'T'] and pd.api.types.is_numeric_dtype(df[col]):
|
| 74 |
-
df[col] = df[col].apply(truncate_to_one_decimal)
|
| 75 |
-
|
| 76 |
# filter out if any of the benchmarks have not been produced
|
| 77 |
df = df.loc[has_no_nan_values(df, benchmark_cols), :]
|
| 78 |
return df
|
|
|
|
| 23 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 24 |
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
def get_leaderboard_df(
|
| 27 |
results_path: str,
|
| 28 |
requests_path: str,
|
|
|
|
| 59 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 60 |
df = df.loc[:, cols]
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# filter out if any of the benchmarks have not been produced
|
| 63 |
df = df.loc[has_no_nan_values(df, benchmark_cols), :]
|
| 64 |
return df
|