|
|
|
|
|
|
|
|
""" |
|
|
递归遍历 result 目录 -> 提取 acc -> |
|
|
pivot 成 行 = benchmark, 列 = model 的矩阵 |
|
|
""" |
|
|
|
|
|
import os |
|
|
import json |
|
|
import pandas as pd |
|
|
from glob import glob |
|
|
|
|
|
|
|
|
|
|
|
ROOT_DIR = "/cpfs/user/boyuan/verl_workspace/baidu/Qwen2.5-Math/evaluation/result" |
|
|
|
|
|
|
|
|
records = [] |
|
|
|
|
|
|
|
|
for model_name in sorted(os.listdir(ROOT_DIR)): |
|
|
model_path = os.path.join(ROOT_DIR, model_name) |
|
|
if not os.path.isdir(model_path): |
|
|
continue |
|
|
|
|
|
for bench_name in sorted(os.listdir(model_path)): |
|
|
bench_path = os.path.join(model_path, bench_name) |
|
|
if not os.path.isdir(bench_path): |
|
|
continue |
|
|
|
|
|
|
|
|
metric_files = glob(os.path.join(bench_path, "*_metrics.json")) |
|
|
if not metric_files: |
|
|
continue |
|
|
|
|
|
with open(metric_files[0], "r", encoding="utf-8") as f: |
|
|
acc = json.load(f).get("acc") |
|
|
|
|
|
if acc is not None: |
|
|
records.append( |
|
|
{"benchmark": bench_name, "model": model_name, "acc": acc} |
|
|
) |
|
|
|
|
|
|
|
|
df_long = pd.DataFrame(records) |
|
|
|
|
|
|
|
|
df_pivot = ( |
|
|
df_long.pivot(index="benchmark", columns="model", values="acc") |
|
|
.sort_index() |
|
|
.sort_index(axis=1) |
|
|
) |
|
|
|
|
|
|
|
|
csv_path = os.path.join(ROOT_DIR, "benchmark_vs_models_acc.csv") |
|
|
df_pivot.to_csv(csv_path, float_format="%.4g", encoding="utf-8") |
|
|
|
|
|
print("\n=== Benchmark × Model ACC 表 ===") |
|
|
print(df_pivot) |
|
|
|
|
|
print(f"\n已保存为: {csv_path}") |
|
|
|