Spaces:

XuemeiTang
/

LLM4LitReview_Benchmark

Runtime error

tangtang commited on Oct 31, 2025

Commit

df6b6fb

1 Parent(s): 430b643

Update space1

Files changed (1) hide show

src/populate.py CHANGED Viewed

@@ -13,34 +13,34 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
     print(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
-    print(df)
-    df["Precision (%)"] = df["Precision (%)"].apply(lambda x: x[0] if len(x) > 0 else np.nan)
-    df["Title search rate (%)"] = df["Title search rate (%)"].apply(lambda x: x[0] if len(x) > 0 else np.nan)
-    df = df.sort_values(by=["Precision (%)"], ascending=False)
-    # 假设用 Precision (%) 和 Title search rate (%) 的平均值
     df["Average ⬆️"] = df[["Precision (%)", "Title search rate (%)"]].mean(axis=1)
-    # # 然后排序
     df = df.sort_values(by=["Average ⬆️"], ascending=False)
-    print(df.head(10))
-    # 再保留需要显示的列
     cols = [c for c in cols if c in df.columns]
     df = df[cols].round(2)
-    # df = df[cols].round(decimals=2)
-    # filter out if any of the benchmarks have not been produced
-    #处理nan值
-    df = df.fillna(0)
-    df = df[has_no_nan_values(df, benchmark_cols)]
     return df
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     """Creates the different dataframes for the evaluation queues requestes"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]

     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
     print(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
+    # print(df.head(10))
+    # 将数组转标量，空数组变为 0
+    df["Precision (%)"] = df["Precision (%)"].apply(lambda x: x[0] if len(x) > 0 else 0)
+    df["Title search rate (%)"] = df["Title search rate (%)"].apply(lambda x: x[0] if len(x) > 0 else 0)
+    # 平均值列
     df["Average ⬆️"] = df[["Precision (%)", "Title search rate (%)"]].mean(axis=1)
+    # 排序
     df = df.sort_values(by=["Average ⬆️"], ascending=False)
+    # 保留需要显示的列
     cols = [c for c in cols if c in df.columns]
     df = df[cols].round(2)
+    # 如果 benchmark_cols 有列不在 df 中，忽略
+    benchmark_cols = [c for c in benchmark_cols if c in df.columns]
+    if benchmark_cols:
+        df = df[has_no_nan_values(df, benchmark_cols)]
+    print(df.head(10))
     return df
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     """Creates the different dataframes for the evaluation queues requestes"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]