leaderboard

Sleeping

App Files Files Community

Yyy0530 commited on Feb 26, 2025

Commit

f38d51c

1 Parent(s): 608af1a

优化平均分计算逻辑，更新模型类型过滤功能，完善用户界面说明

Browse files

Files changed (19) hide show

app - 副本.py +11 -15
app.py +11 -15
config.yaml +1 -0
data/w-w-API.xlsx +0 -0
data/w-w-Avg.xlsx +0 -0
data/w-w-Code.xlsx +0 -0
data/w-w-Customized.xlsx +0 -0
data/w-wo-API.xlsx +0 -0
data/w-wo-Avg.xlsx +0 -0
data/w-wo-Code.xlsx +0 -0
data/w-wo-Customized.xlsx +0 -0
data/wo-w-API.xlsx +0 -0
data/wo-w-Avg.xlsx +0 -0
data/wo-w-Code.xlsx +0 -0
data/wo-w-Customized.xlsx +0 -0
data/wo-wo-API.xlsx +0 -0
data/wo-wo-Avg.xlsx +0 -0
data/wo-wo-Code.xlsx +0 -0
data/wo-wo-Customized.xlsx +0 -0

app - 副本.py CHANGED Viewed

@@ -16,8 +16,8 @@ for setting in CONFIG['settings']:
         file_path = os.path.join("data", f"{CONFIG['settings_mapping'][setting]}-{data_type}.xlsx")
         df = pd.read_excel(file_path)
-        # 添加平均分列，计算除第一列和最后一列之外的均值
-        df["Average"] = df.iloc[:, 1:-1].mean(axis=1)
         # 添加 Rank 列，根据 Average 降序排名
         df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
@@ -106,13 +106,10 @@ table > tbody > tr > td:not(:nth-child(2)) {
 # 模型类型和模型大小（数值区间）设置
 MODEL_TYPES = [
-    "Open",
-    "Proprietary",
-    "Sentence Transformers",
-    "Cross-Encoders",
-    "Bi-Encoders",
-    "Uses Instructions",
-    "No Instructions",
 ]
 NUMERIC_INTERVALS = {
@@ -123,7 +120,7 @@ NUMERIC_INTERVALS = {
     ">1B": pd.Interval(1000, 1_000_000, closed='right'),
 }
-# 定义过滤函数，实现搜索和模型大小过滤功能
 def filter_data(search_query, model_types, model_sizes):
     outputs = []
     for setting in CONFIG['settings']:
@@ -136,10 +133,9 @@ def filter_data(search_query, model_types, model_sizes):
                 mask_search = df["Model"].str.lower().apply(lambda x: any(q in x for q in queries))
                 df = df[mask_search]
-            # 模型类型过滤（占位逻辑）
-            # if set(model_types) != set(MODEL_TYPES):
-            #     # 请在此处添加模型类型的过滤逻辑
-            #     pass
             # 模型大小过滤：将 "Number of Parameters" 转换为数值，并利用选定的区间进行过滤
             def parse_params(val):
@@ -180,7 +176,7 @@ with gr.Blocks(css=css) as demo:
         Welcome to the Model Leaderboard Interface!
         - **Search**: Enter keywords for the model name in the search box. Use a semicolon (`;`) to separate multiple keywords.
-        - **Model Type**: Choose the model type(s) you're interested in (currently a placeholder, pending further expansion).
         - **Model Size**: Select the parameter count range to filter models accordingly.
         Click the **Filter Data** button to update the display with the filtered data.

         file_path = os.path.join("data", f"{CONFIG['settings_mapping'][setting]}-{data_type}.xlsx")
         df = pd.read_excel(file_path)
+        # 添加平均分列，计算除第一列和倒数两列之外的均值
+        df["Average"] = df.iloc[:, 1:-2].mean(axis=1)
         # 添加 Rank 列，根据 Average 降序排名
         df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
 # 模型类型和模型大小（数值区间）设置
 MODEL_TYPES = [
+    "sparse retrieval",
+    "dense retrieval",
+    "embedding model",
+    "re-ranking model"
 ]
 NUMERIC_INTERVALS = {
     ">1B": pd.Interval(1000, 1_000_000, closed='right'),
 }
+# 定义过滤函数，实现搜索、模型类型及模型大小过滤功能
 def filter_data(search_query, model_types, model_sizes):
     outputs = []
     for setting in CONFIG['settings']:
                 mask_search = df["Model"].str.lower().apply(lambda x: any(q in x for q in queries))
                 df = df[mask_search]
+            # 模型类型过滤：假设 Excel 中存在 "Model Type" 列
+            if model_types and set(model_types) != set(MODEL_TYPES):
+                df = df[df["Model Type"].isin(model_types)]
             # 模型大小过滤：将 "Number of Parameters" 转换为数值，并利用选定的区间进行过滤
             def parse_params(val):
         Welcome to the Model Leaderboard Interface!
         - **Search**: Enter keywords for the model name in the search box. Use a semicolon (`;`) to separate multiple keywords.
+        - **Model Type**: Choose the model type(s) you're interested in.
         - **Model Size**: Select the parameter count range to filter models accordingly.
         Click the **Filter Data** button to update the display with the filtered data.

app.py CHANGED Viewed

@@ -16,8 +16,8 @@ for setting in CONFIG['settings']:
         file_path = os.path.join("data", f"{CONFIG['settings_mapping'][setting]}-{data_type}.xlsx")
         df = pd.read_excel(file_path)
-        # 添加平均分列，计算除第一列和最后一列之外的均值
-        df["Average"] = df.iloc[:, 1:-1].mean(axis=1)
         # 添加 Rank 列，根据 Average 降序排名
         df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
@@ -106,13 +106,10 @@ table > tbody > tr > td:not(:nth-child(2)) {
 # 模型类型和模型大小（数值区间）设置
 MODEL_TYPES = [
-    "Open",
-    "Proprietary",
-    "Sentence Transformers",
-    "Cross-Encoders",
-    "Bi-Encoders",
-    "Uses Instructions",
-    "No Instructions",
 ]
 NUMERIC_INTERVALS = {
@@ -123,7 +120,7 @@ NUMERIC_INTERVALS = {
     ">1B": pd.Interval(1000, 1_000_000, closed='right'),
 }
-# 定义过滤函数，实现搜索和模型大小过滤功能
 def filter_data(search_query, model_types, model_sizes):
     outputs = []
     for setting in CONFIG['settings']:
@@ -136,10 +133,9 @@ def filter_data(search_query, model_types, model_sizes):
                 mask_search = df["Model"].str.lower().apply(lambda x: any(q in x for q in queries))
                 df = df[mask_search]
-            # 模型类型过滤（占位逻辑）
-            # if set(model_types) != set(MODEL_TYPES):
-            #     # 请在此处添加模型类型的过滤逻辑
-            #     pass
             # 模型大小过滤：将 "Number of Parameters" 转换为数值，并利用选定的区间进行过滤
             def parse_params(val):
@@ -180,7 +176,7 @@ with gr.Blocks(css=css) as demo:
         Welcome to the Model Leaderboard Interface!
         - **Search**: Enter keywords for the model name in the search box. Use a semicolon (`;`) to separate multiple keywords.
-        - **Model Type**: Choose the model type(s) you're interested in (currently a placeholder, pending further expansion).
         - **Model Size**: Select the parameter count range to filter models accordingly.
         Click the **Filter Data** button to update the display with the filtered data.

         file_path = os.path.join("data", f"{CONFIG['settings_mapping'][setting]}-{data_type}.xlsx")
         df = pd.read_excel(file_path)
+        # 添加平均分列，计算除第一列和倒数两列之外的均值
+        df["Average"] = df.iloc[:, 1:-2].mean(axis=1)
         # 添加 Rank 列，根据 Average 降序排名
         df["Rank"] = df["Average"].rank(ascending=False, method='min').astype(int)
 # 模型类型和模型大小（数值区间）设置
 MODEL_TYPES = [
+    "sparse retrieval",
+    "dense retrieval",
+    "embedding model",
+    "re-ranking model"
 ]
 NUMERIC_INTERVALS = {
     ">1B": pd.Interval(1000, 1_000_000, closed='right'),
 }
+# 定义过滤函数，实现搜索、模型类型及模型大小过滤功能
 def filter_data(search_query, model_types, model_sizes):
     outputs = []
     for setting in CONFIG['settings']:
                 mask_search = df["Model"].str.lower().apply(lambda x: any(q in x for q in queries))
                 df = df[mask_search]
+            # 模型类型过滤：假设 Excel 中存在 "Model Type" 列
+            if model_types and set(model_types) != set(MODEL_TYPES):
+                df = df[df["Model Type"].isin(model_types)]
             # 模型大小过滤：将 "Number of Parameters" 转换为数值，并利用选定的区间进行过滤
             def parse_params(val):
         Welcome to the Model Leaderboard Interface!
         - **Search**: Enter keywords for the model name in the search box. Use a semicolon (`;`) to separate multiple keywords.
+        - **Model Type**: Choose the model type(s) you're interested in.
         - **Model Size**: Select the parameter count range to filter models accordingly.
         Click the **Filter Data** button to update the display with the filtered data.

config.yaml CHANGED Viewed

@@ -14,6 +14,7 @@ metrics:
   - Prec@10
   - NDCG@10
   - Number of parameters
 settings_mapping:
   "w/ meta w/ inst": "w-w"
   "w/ meta w/o inst": "w-wo"

   - Prec@10
   - NDCG@10
   - Number of parameters
+  - Model type
 settings_mapping:
   "w/ meta w/ inst": "w-w"
   "w/ meta w/o inst": "w-wo"

data/w-w-API.xlsx CHANGED Viewed

Binary files a/data/w-w-API.xlsx and b/data/w-w-API.xlsx differ

data/w-w-Avg.xlsx CHANGED Viewed

Binary files a/data/w-w-Avg.xlsx and b/data/w-w-Avg.xlsx differ

data/w-w-Code.xlsx CHANGED Viewed

Binary files a/data/w-w-Code.xlsx and b/data/w-w-Code.xlsx differ

data/w-w-Customized.xlsx CHANGED Viewed

Binary files a/data/w-w-Customized.xlsx and b/data/w-w-Customized.xlsx differ

data/w-wo-API.xlsx CHANGED Viewed

Binary files a/data/w-wo-API.xlsx and b/data/w-wo-API.xlsx differ

data/w-wo-Avg.xlsx CHANGED Viewed

Binary files a/data/w-wo-Avg.xlsx and b/data/w-wo-Avg.xlsx differ

data/w-wo-Code.xlsx CHANGED Viewed

Binary files a/data/w-wo-Code.xlsx and b/data/w-wo-Code.xlsx differ

data/w-wo-Customized.xlsx CHANGED Viewed

Binary files a/data/w-wo-Customized.xlsx and b/data/w-wo-Customized.xlsx differ

data/wo-w-API.xlsx CHANGED Viewed

Binary files a/data/wo-w-API.xlsx and b/data/wo-w-API.xlsx differ

data/wo-w-Avg.xlsx CHANGED Viewed

Binary files a/data/wo-w-Avg.xlsx and b/data/wo-w-Avg.xlsx differ

data/wo-w-Code.xlsx CHANGED Viewed

Binary files a/data/wo-w-Code.xlsx and b/data/wo-w-Code.xlsx differ

data/wo-w-Customized.xlsx CHANGED Viewed

Binary files a/data/wo-w-Customized.xlsx and b/data/wo-w-Customized.xlsx differ

data/wo-wo-API.xlsx CHANGED Viewed

Binary files a/data/wo-wo-API.xlsx and b/data/wo-wo-API.xlsx differ

data/wo-wo-Avg.xlsx CHANGED Viewed

Binary files a/data/wo-wo-Avg.xlsx and b/data/wo-wo-Avg.xlsx differ

data/wo-wo-Code.xlsx CHANGED Viewed

Binary files a/data/wo-wo-Code.xlsx and b/data/wo-wo-Code.xlsx differ

data/wo-wo-Customized.xlsx CHANGED Viewed

Binary files a/data/wo-wo-Customized.xlsx and b/data/wo-wo-Customized.xlsx differ