Spaces:
Sleeping
Sleeping
init pass
Browse files- app.py +138 -356
- requirements.txt +6 -3
- submission/a_random_submit.json +9 -0
- submission/test_submission_1.json +9 -0
- submission/test_submission_2.json +9 -0
app.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
import requests
|
|
|
|
|
|
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import pandas as pd
|
|
@@ -12,290 +14,106 @@ from tqdm.contrib.concurrent import thread_map
|
|
| 12 |
|
| 13 |
from utils import *
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 18 |
|
| 19 |
block = gr.Blocks()
|
| 20 |
api = HfApi(token=HF_TOKEN)
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
{
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
"
|
| 38 |
-
|
| 39 |
-
"video_link": "",
|
| 40 |
-
"global": None
|
| 41 |
-
},
|
| 42 |
-
{
|
| 43 |
-
"rl_env_beautiful": "FrozenLake-v1-8x8-no_slippery βοΈ",
|
| 44 |
-
"rl_env": "FrozenLake-v1-8x8-no_slippery",
|
| 45 |
-
"video_link": "",
|
| 46 |
-
"global": None
|
| 47 |
-
},
|
| 48 |
-
{
|
| 49 |
-
"rl_env_beautiful": "FrozenLake-v1-4x4 βοΈ",
|
| 50 |
-
"rl_env": "FrozenLake-v1-4x4",
|
| 51 |
-
"video_link": "",
|
| 52 |
-
"global": None
|
| 53 |
-
},
|
| 54 |
-
{
|
| 55 |
-
"rl_env_beautiful": "FrozenLake-v1-8x8 βοΈ",
|
| 56 |
-
"rl_env": "FrozenLake-v1-8x8",
|
| 57 |
-
"video_link": "",
|
| 58 |
-
"global": None
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"rl_env_beautiful": "Taxi-v3 π",
|
| 62 |
-
"rl_env": "Taxi-v3",
|
| 63 |
-
"video_link": "",
|
| 64 |
-
"global": None
|
| 65 |
-
},
|
| 66 |
-
{
|
| 67 |
-
"rl_env_beautiful": "CarRacing-v0 ποΈ",
|
| 68 |
-
"rl_env": "CarRacing-v0",
|
| 69 |
-
"video_link": "",
|
| 70 |
-
"global": None
|
| 71 |
-
},
|
| 72 |
-
{
|
| 73 |
-
"rl_env_beautiful": "CarRacing-v2 ποΈ",
|
| 74 |
-
"rl_env": "CarRacing-v2",
|
| 75 |
-
"video_link": "",
|
| 76 |
-
"global": None
|
| 77 |
-
},
|
| 78 |
-
{
|
| 79 |
-
"rl_env_beautiful": "MountainCar-v0 β°οΈ",
|
| 80 |
-
"rl_env": "MountainCar-v0",
|
| 81 |
-
"video_link": "",
|
| 82 |
-
"global": None
|
| 83 |
-
},
|
| 84 |
-
{
|
| 85 |
-
"rl_env_beautiful": "SpaceInvadersNoFrameskip-v4 πΎ",
|
| 86 |
-
"rl_env": "SpaceInvadersNoFrameskip-v4",
|
| 87 |
-
"video_link": "",
|
| 88 |
-
"global": None
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"rl_env_beautiful": "PongNoFrameskip-v4 πΎ",
|
| 92 |
-
"rl_env": "PongNoFrameskip-v4",
|
| 93 |
-
"video_link": "",
|
| 94 |
-
"global": None
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"rl_env_beautiful": "BreakoutNoFrameskip-v4 π§±",
|
| 98 |
-
"rl_env": "BreakoutNoFrameskip-v4",
|
| 99 |
-
"video_link": "",
|
| 100 |
-
"global": None
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"rl_env_beautiful": "QbertNoFrameskip-v4 π¦",
|
| 104 |
-
"rl_env": "QbertNoFrameskip-v4",
|
| 105 |
-
"video_link": "",
|
| 106 |
-
"global": None
|
| 107 |
-
},
|
| 108 |
-
{
|
| 109 |
-
"rl_env_beautiful": "BipedalWalker-v3",
|
| 110 |
-
"rl_env": "BipedalWalker-v3",
|
| 111 |
-
"video_link": "",
|
| 112 |
-
"global": None
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"rl_env_beautiful": "Walker2DBulletEnv-v0",
|
| 116 |
-
"rl_env": "Walker2DBulletEnv-v0",
|
| 117 |
-
"video_link": "",
|
| 118 |
-
"global": None
|
| 119 |
-
},
|
| 120 |
-
{
|
| 121 |
-
"rl_env_beautiful": "AntBulletEnv-v0",
|
| 122 |
-
"rl_env": "AntBulletEnv-v0",
|
| 123 |
-
"video_link": "",
|
| 124 |
-
"global": None
|
| 125 |
-
},
|
| 126 |
-
{
|
| 127 |
-
"rl_env_beautiful": "HalfCheetahBulletEnv-v0",
|
| 128 |
-
"rl_env": "HalfCheetahBulletEnv-v0",
|
| 129 |
-
"video_link": "",
|
| 130 |
-
"global": None
|
| 131 |
-
},
|
| 132 |
-
{
|
| 133 |
-
"rl_env_beautiful": "PandaReachDense-v2",
|
| 134 |
-
"rl_env": "PandaReachDense-v2",
|
| 135 |
-
"video_link": "",
|
| 136 |
-
"global": None
|
| 137 |
-
},
|
| 138 |
-
{
|
| 139 |
-
"rl_env_beautiful": "PandaReachDense-v3",
|
| 140 |
-
"rl_env": "PandaReachDense-v3",
|
| 141 |
-
"video_link": "",
|
| 142 |
-
"global": None
|
| 143 |
-
},
|
| 144 |
-
{
|
| 145 |
-
"rl_env_beautiful": "Pixelcopter-PLE-v0",
|
| 146 |
-
"rl_env": "Pixelcopter-PLE-v0",
|
| 147 |
-
"video_link": "",
|
| 148 |
-
"global": None
|
| 149 |
-
}
|
| 150 |
]
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
def restart():
|
| 153 |
print("RESTART")
|
| 154 |
api.restart_space(repo_id="huggingface-projects/Deep-Reinforcement-Learning-Leaderboard")
|
| 155 |
|
| 156 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
try:
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
return None
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
return None
|
| 167 |
-
result = meta["model-index"][0]["results"]
|
| 168 |
-
metrics = result[0]["metrics"]
|
| 169 |
-
accuracy = metrics[0]["value"]
|
| 170 |
-
return accuracy
|
| 171 |
-
|
| 172 |
-
# We keep the worst case episode
|
| 173 |
-
def parse_rewards(accuracy):
|
| 174 |
-
default_std = -1000
|
| 175 |
-
default_reward=-1000
|
| 176 |
-
if accuracy != None:
|
| 177 |
-
accuracy = str(accuracy)
|
| 178 |
-
parsed = accuracy.split('+/-')
|
| 179 |
-
if len(parsed)>1:
|
| 180 |
-
mean_reward = float(parsed[0].strip())
|
| 181 |
-
std_reward = float(parsed[1].strip())
|
| 182 |
-
elif len(parsed)==1: #only mean reward
|
| 183 |
-
mean_reward = float(parsed[0].strip())
|
| 184 |
-
std_reward = float(0)
|
| 185 |
-
else:
|
| 186 |
-
mean_reward = float(default_std)
|
| 187 |
-
std_reward = float(default_reward)
|
| 188 |
-
|
| 189 |
-
else:
|
| 190 |
-
mean_reward = float(default_std)
|
| 191 |
-
std_reward = float(default_reward)
|
| 192 |
-
return mean_reward, std_reward
|
| 193 |
-
|
| 194 |
|
| 195 |
-
def
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
model_ids = [x.modelId for x in models]
|
| 199 |
-
return model_ids
|
| 200 |
-
|
| 201 |
-
# Parralelized version
|
| 202 |
-
def update_leaderboard_dataset_parallel(rl_env, path):
|
| 203 |
-
# Get model ids associated with rl_env
|
| 204 |
-
model_ids = get_model_ids(rl_env)
|
| 205 |
-
|
| 206 |
-
def process_model(model_id):
|
| 207 |
-
meta = get_metadata(model_id)
|
| 208 |
-
#LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
|
| 209 |
-
if meta is None:
|
| 210 |
-
return None
|
| 211 |
-
user_id = model_id.split('/')[0]
|
| 212 |
-
row = {}
|
| 213 |
-
row["User"] = user_id
|
| 214 |
-
row["Model"] = model_id
|
| 215 |
-
accuracy = parse_metrics_accuracy(meta)
|
| 216 |
-
mean_reward, std_reward = parse_rewards(accuracy)
|
| 217 |
-
mean_reward = mean_reward if not pd.isna(mean_reward) else 0
|
| 218 |
-
std_reward = std_reward if not pd.isna(std_reward) else 0
|
| 219 |
-
row["Results"] = mean_reward - std_reward
|
| 220 |
-
row["Mean Reward"] = mean_reward
|
| 221 |
-
row["Std Reward"] = std_reward
|
| 222 |
-
return row
|
| 223 |
-
|
| 224 |
-
data = list(thread_map(process_model, model_ids, desc="Processing models"))
|
| 225 |
-
|
| 226 |
-
# Filter out None results (models with no metadata)
|
| 227 |
-
data = [row for row in data if row is not None]
|
| 228 |
-
|
| 229 |
-
ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
|
| 230 |
-
new_history = ranked_dataframe
|
| 231 |
-
file_path = path + "/" + rl_env + ".csv"
|
| 232 |
-
new_history.to_csv(file_path, index=False)
|
| 233 |
-
|
| 234 |
-
return ranked_dataframe
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
def update_leaderboard_dataset(rl_env, path):
|
| 238 |
-
# Get model ids associated with rl_env
|
| 239 |
-
model_ids = get_model_ids(rl_env)
|
| 240 |
data = []
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
row["Results"] = mean_reward - std_reward
|
| 259 |
-
row["Mean Reward"] = mean_reward
|
| 260 |
-
row["Std Reward"] = std_reward
|
| 261 |
-
data.append(row)
|
| 262 |
-
|
| 263 |
-
ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
|
| 264 |
-
new_history = ranked_dataframe
|
| 265 |
-
file_path = path + "/" + rl_env + ".csv"
|
| 266 |
-
new_history.to_csv(file_path, index=False)
|
| 267 |
-
|
| 268 |
-
return ranked_dataframe
|
| 269 |
-
|
| 270 |
-
def download_leaderboard_dataset():
|
| 271 |
-
path = snapshot_download(repo_id=DATASET_REPO_ID, repo_type="dataset")
|
| 272 |
-
return path
|
| 273 |
-
|
| 274 |
-
def get_data(rl_env, path) -> pd.DataFrame:
|
| 275 |
-
"""
|
| 276 |
-
Get data from rl_env
|
| 277 |
-
:return: data as a pandas DataFrame
|
| 278 |
-
"""
|
| 279 |
-
csv_path = path + "/" + rl_env + ".csv"
|
| 280 |
-
data = pd.read_csv(csv_path)
|
| 281 |
-
|
| 282 |
-
for index, row in data.iterrows():
|
| 283 |
-
user_id = row["User"]
|
| 284 |
-
data.loc[index, "User"] = make_clickable_user(user_id)
|
| 285 |
-
model_id = row["Model"]
|
| 286 |
-
data.loc[index, "Model"] = make_clickable_model(model_id)
|
| 287 |
-
|
| 288 |
-
return data
|
| 289 |
-
|
| 290 |
-
def get_data_no_html(rl_env, path) -> pd.DataFrame:
|
| 291 |
-
"""
|
| 292 |
-
Get data from rl_env
|
| 293 |
-
:return: data as a pandas DataFrame
|
| 294 |
-
"""
|
| 295 |
-
csv_path = path + "/" + rl_env + ".csv"
|
| 296 |
-
data = pd.read_csv(csv_path)
|
| 297 |
|
| 298 |
-
return data
|
| 299 |
|
| 300 |
def rank_dataframe(dataframe):
|
| 301 |
dataframe = dataframe.sort_values(by=['Results', 'User', 'Model'], ascending=False)
|
|
@@ -305,108 +123,72 @@ def rank_dataframe(dataframe):
|
|
| 305 |
dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)]
|
| 306 |
return dataframe
|
| 307 |
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
-
def
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
update_leaderboard_dataset_parallel(rl_env["rl_env"], path_)
|
| 314 |
-
|
| 315 |
-
api.upload_folder(
|
| 316 |
-
folder_path=path_,
|
| 317 |
-
repo_id="huggingface-projects/drlc-leaderboard-data",
|
| 318 |
-
repo_type="dataset",
|
| 319 |
-
commit_message="Update dataset")
|
| 320 |
-
|
| 321 |
-
def filter_data(rl_env, path, user_id):
|
| 322 |
-
data_df = get_data_no_html(rl_env, path)
|
| 323 |
-
models = []
|
| 324 |
-
models = data_df[data_df["User"] == user_id]
|
| 325 |
-
|
| 326 |
-
for index, row in models.iterrows():
|
| 327 |
-
user_id = row["User"]
|
| 328 |
-
models.loc[index, "User"] = make_clickable_user(user_id)
|
| 329 |
-
model_id = row["Model"]
|
| 330 |
-
models.loc[index, "Model"] = make_clickable_model(model_id)
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
return models
|
| 334 |
|
| 335 |
-
run_update_dataset()
|
| 336 |
|
| 337 |
with block:
|
| 338 |
-
gr.Markdown(
|
| 339 |
-
# π
|
| 340 |
|
| 341 |
-
This
|
| 342 |
|
| 343 |
-
###
|
| 344 |
-
|
| 345 |
-
You **can click on the model's name** to be redirected to its model card, including documentation.
|
| 346 |
|
| 347 |
-
###
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
-
###
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
| 357 |
""")
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
""".format(name_leaderboard = rl_env["rl_env_beautiful"], video_link = rl_env["video_link"])
|
| 368 |
-
gr.Markdown(markdown)
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
with gr.Row():
|
| 372 |
-
gr.Markdown("""
|
| 373 |
-
## Search your models
|
| 374 |
-
Simply type your user id to find your models
|
| 375 |
-
""")
|
| 376 |
-
|
| 377 |
-
with gr.Row():
|
| 378 |
-
user_id = gr.Textbox(label= "Your user id")
|
| 379 |
-
search_btn = gr.Button("Search my models π")
|
| 380 |
-
reset_btn = gr.Button("Clear my search")
|
| 381 |
-
env = gr.State(rl_env["rl_env"])
|
| 382 |
-
grpath = gr.State(path_)
|
| 383 |
-
with gr.Row():
|
| 384 |
-
gr_dataframe = gr.components.Dataframe(value=get_data(rl_env["rl_env"], path_), headers=["Ranking π", "User π€", "Model id π€", "Results", "Mean Reward", "Std Reward"], datatype=["number", "markdown", "markdown", "number", "number", "number"], row_count=(100, 'fixed'))
|
| 385 |
-
|
| 386 |
-
with gr.Row():
|
| 387 |
-
#gr_search_dataframe = gr.components.Dataframe(headers=["Ranking π", "User π€", "Model id π€", "Results", "Mean Reward", "Std Reward"], datatype=["number", "markdown", "markdown", "number", "number", "number"], visible=False)
|
| 388 |
-
search_btn.click(fn=filter_data, inputs=[env, grpath, user_id], outputs=gr_dataframe, api_name="filter_data")
|
| 389 |
-
|
| 390 |
-
with gr.Row():
|
| 391 |
-
search_btn.click(fn=filter_data, inputs=[env, grpath, user_id], outputs=gr_dataframe, api_name="filter_data")
|
| 392 |
-
reset_btn.click(fn=get_data, inputs=[env, grpath], outputs=gr_dataframe, api_name="get_data")
|
| 393 |
-
"""
|
| 394 |
-
block.load(
|
| 395 |
-
download_leaderboard_dataset,
|
| 396 |
-
inputs=[],
|
| 397 |
-
outputs=[
|
| 398 |
-
grpath
|
| 399 |
-
],
|
| 400 |
)
|
| 401 |
-
|
|
|
|
|
|
|
| 402 |
|
|
|
|
|
|
|
| 403 |
|
|
|
|
| 404 |
scheduler = BackgroundScheduler()
|
| 405 |
-
#
|
| 406 |
-
#scheduler.add_job(func=run_update_dataset, trigger="interval", seconds=3600)
|
| 407 |
-
#scheduler.add_job(download_leaderboard_dataset, 'interval', seconds=3600)
|
| 408 |
-
#scheduler.add_job(run_update_dataset, 'interval', seconds=3600)
|
| 409 |
-
scheduler.add_job(restart, 'interval', seconds=10800)
|
| 410 |
scheduler.start()
|
| 411 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
block.launch()
|
|
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
import requests
|
| 4 |
+
import glob
|
| 5 |
+
from pathlib import Path
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
|
|
|
| 14 |
|
| 15 |
from utils import *
|
| 16 |
|
| 17 |
+
# Configuration for retrieval task leaderboard
|
| 18 |
+
SUBMISSION_FOLDER = "submission"
|
| 19 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 20 |
|
| 21 |
block = gr.Blocks()
|
| 22 |
api = HfApi(token=HF_TOKEN)
|
| 23 |
|
| 24 |
+
# Retrieval task metrics configuration
|
| 25 |
+
retrieval_metrics = [
|
| 26 |
+
{
|
| 27 |
+
"metric_name": "Hit Rate Click@50",
|
| 28 |
+
"metric_key": "hit_rate_click@50",
|
| 29 |
+
"description": "Hit rate for click predictions at top 50"
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"metric_name": "Hit Rate A2C@50",
|
| 33 |
+
"metric_key": "hit_rate_A2C@50",
|
| 34 |
+
"description": "Hit rate for A2C predictions at top 50"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"metric_name": "Hit Rate Purchase@50",
|
| 38 |
+
"metric_key": "hit_rate_purchase@50",
|
| 39 |
+
"description": "Hit rate for purchase predictions at top 50"
|
| 40 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
]
|
| 42 |
|
| 43 |
+
# Main leaderboard configuration
|
| 44 |
+
leaderboard_config = {
|
| 45 |
+
"title": "π Retrieval Task Leaderboard π",
|
| 46 |
+
"description": "Leaderboard for retrieval task performance",
|
| 47 |
+
"metrics": retrieval_metrics
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
def restart():
|
| 51 |
print("RESTART")
|
| 52 |
api.restart_space(repo_id="huggingface-projects/Deep-Reinforcement-Learning-Leaderboard")
|
| 53 |
|
| 54 |
+
def load_submission_files():
|
| 55 |
+
"""Load all JSON submission files from the submission folder"""
|
| 56 |
+
submission_files = glob.glob(os.path.join(SUBMISSION_FOLDER, "*.json"))
|
| 57 |
+
submissions = []
|
| 58 |
+
|
| 59 |
+
for file_path in submission_files:
|
| 60 |
+
try:
|
| 61 |
+
with open(file_path, 'r') as f:
|
| 62 |
+
submission_data = json.load(f)
|
| 63 |
+
# Validate required fields
|
| 64 |
+
required_fields = ["user_id", "model_id", "hit_rate_click@50", "hit_rate_A2C@50", "hit_rate_purchase@50"]
|
| 65 |
+
if all(field in submission_data for field in required_fields):
|
| 66 |
+
submissions.append(submission_data)
|
| 67 |
+
else:
|
| 68 |
+
print(f"Warning: Invalid submission format in {file_path}")
|
| 69 |
+
except (json.JSONDecodeError, FileNotFoundError) as e:
|
| 70 |
+
print(f"Error reading {file_path}: {e}")
|
| 71 |
+
|
| 72 |
+
return submissions
|
| 73 |
+
|
| 74 |
+
def parse_submission_data(submission):
|
| 75 |
+
"""Parse a single submission and return formatted data"""
|
| 76 |
try:
|
| 77 |
+
# Convert string metrics to float, handle potential errors
|
| 78 |
+
click_rate = float(submission.get("hit_rate_click@50", 0))
|
| 79 |
+
a2c_rate = float(submission.get("hit_rate_A2C@50", 0))
|
| 80 |
+
purchase_rate = float(submission.get("hit_rate_purchase@50", 0))
|
|
|
|
| 81 |
|
| 82 |
+
return {
|
| 83 |
+
"User": submission.get("user_id", "Unknown"),
|
| 84 |
+
"Model": submission.get("model_id", "Unknown"),
|
| 85 |
+
"Dataset": submission.get("dataset_id", "Unknown"),
|
| 86 |
+
"Hit Rate Click@50": click_rate,
|
| 87 |
+
"Hit Rate A2C@50": a2c_rate,
|
| 88 |
+
"Hit Rate Purchase@50": purchase_rate,
|
| 89 |
+
"Comment": submission.get("comment", "")
|
| 90 |
+
}
|
| 91 |
+
except (ValueError, TypeError) as e:
|
| 92 |
+
print(f"Error parsing submission data: {e}")
|
| 93 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
def update_leaderboard_from_submissions():
|
| 96 |
+
"""Update leaderboard data from JSON submissions"""
|
| 97 |
+
submissions = load_submission_files()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
data = []
|
| 99 |
+
|
| 100 |
+
for submission in submissions:
|
| 101 |
+
parsed_data = parse_submission_data(submission)
|
| 102 |
+
if parsed_data:
|
| 103 |
+
data.append(parsed_data)
|
| 104 |
+
|
| 105 |
+
if not data:
|
| 106 |
+
# Create empty dataframe with correct columns if no submissions
|
| 107 |
+
return pd.DataFrame(columns=["User", "Model", "Dataset", "Hit Rate Click@50", "Hit Rate A2C@50", "Hit Rate Purchase@50", "Comment"])
|
| 108 |
+
|
| 109 |
+
df = pd.DataFrame(data)
|
| 110 |
+
# Sort by hit rate click@50 (descending) as default
|
| 111 |
+
df = df.sort_values(by='Hit Rate Click@50', ascending=False)
|
| 112 |
+
df.reset_index(drop=True, inplace=True)
|
| 113 |
+
df.insert(0, 'Ranking', range(1, len(df) + 1))
|
| 114 |
+
|
| 115 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
|
|
|
| 117 |
|
| 118 |
def rank_dataframe(dataframe):
|
| 119 |
dataframe = dataframe.sort_values(by=['Results', 'User', 'Model'], ascending=False)
|
|
|
|
| 123 |
dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)]
|
| 124 |
return dataframe
|
| 125 |
|
| 126 |
+
def get_leaderboard_data():
|
| 127 |
+
"""Get current leaderboard data from submissions"""
|
| 128 |
+
return update_leaderboard_from_submissions()
|
| 129 |
|
| 130 |
+
def refresh_leaderboard():
|
| 131 |
+
"""Simple function to refresh the leaderboard display"""
|
| 132 |
+
print("π Refreshing leaderboard...")
|
| 133 |
+
return get_leaderboard_data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
+
# run_update_dataset()
|
| 136 |
|
| 137 |
with block:
|
| 138 |
+
gr.Markdown("""
|
| 139 |
+
# π Retrieval Task Leaderboard π
|
| 140 |
|
| 141 |
+
This leaderboard tracks the performance of different models on retrieval tasks.
|
| 142 |
|
| 143 |
+
### How to Submit
|
| 144 |
+
Submit your results as a JSON file in the `submission` folder via pull request.
|
|
|
|
| 145 |
|
| 146 |
+
### Required JSON Format
|
| 147 |
+
```json
|
| 148 |
+
{
|
| 149 |
+
"user_id": "your_username",
|
| 150 |
+
"model_id": "your_model_name",
|
| 151 |
+
"hit_rate_click@50": "0.75",
|
| 152 |
+
"hit_rate_A2C@50": "0.68",
|
| 153 |
+
"hit_rate_purchase@50": "0.82",
|
| 154 |
+
"dataset_id": "your_dataset",
|
| 155 |
+
"comment": "Optional comment about your submission"
|
| 156 |
+
}
|
| 157 |
+
```
|
| 158 |
|
| 159 |
+
### How to Update After PR
|
| 160 |
+
**Currently, PR detection is NOT automated.** After a PR is merged:
|
| 161 |
+
1. Restart the application manually: `python3 app.py`
|
| 162 |
+
2. Or click the "π Refresh Leaderboard" button below
|
| 163 |
+
|
| 164 |
+
### Rankings
|
| 165 |
+
Currently ranked by "Hit Rate Click@50" (you can modify the sorting in the code)
|
| 166 |
""")
|
| 167 |
+
|
| 168 |
+
# Simple refresh button
|
| 169 |
+
refresh_button = gr.Button("π Refresh Leaderboard")
|
| 170 |
+
|
| 171 |
+
# Display leaderboard without Overall Score column
|
| 172 |
+
leaderboard_df = gr.Dataframe(
|
| 173 |
+
value=get_leaderboard_data(),
|
| 174 |
+
headers=["Ranking", "User", "Model", "Dataset", "Hit Rate Click@50", "Hit Rate A2C@50", "Hit Rate Purchase@50", "Comment"],
|
| 175 |
+
label="Current Leaderboard"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
)
|
| 177 |
+
|
| 178 |
+
# Simple refresh functionality
|
| 179 |
+
refresh_button.click(refresh_leaderboard, outputs=leaderboard_df)
|
| 180 |
|
| 181 |
+
# Initialize the system
|
| 182 |
+
print("π Starting Retrieval Task Leaderboard...")
|
| 183 |
|
| 184 |
+
# Setup background scheduler (optional, mainly for restart)
|
| 185 |
scheduler = BackgroundScheduler()
|
| 186 |
+
scheduler.add_job(restart, 'interval', seconds=21600) # Restart every 6 hours
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
scheduler.start()
|
| 188 |
|
| 189 |
+
print("β
System initialized successfully!")
|
| 190 |
+
print("π Leaderboard accessible at: http://127.0.0.1:7860")
|
| 191 |
+
print("β οΈ PR detection is NOT automated - restart manually after PR merges")
|
| 192 |
+
print("π Use the refresh button in the UI to update the leaderboard")
|
| 193 |
+
|
| 194 |
block.launch()
|
requirements.txt
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
APScheduler==3.10.1
|
| 2 |
-
gradio==4.0
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
APScheduler==3.10.1
|
| 2 |
+
gradio==4.44.0
|
| 3 |
+
fastapi==0.112.2
|
| 4 |
+
pydantic==2.10.6
|
| 5 |
+
httpx>=0.24.1
|
| 6 |
+
tqdm
|
| 7 |
+
huggingface-hub>=0.19.3
|
submission/a_random_submit.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"user_id": "a_random_submit",
|
| 3 |
+
"model_id": "a_random_submit",
|
| 4 |
+
"hit_rate_click@50": "0",
|
| 5 |
+
"hit_rate_A2C@50": "0",
|
| 6 |
+
"hit_rate_purchase@50": "0",
|
| 7 |
+
"dataset_id": "a_random_submit",
|
| 8 |
+
"comment": "a_random_submit"
|
| 9 |
+
}
|
submission/test_submission_1.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"user_id": "alice_researcher",
|
| 3 |
+
"model_id": "bert-retrieval-v1",
|
| 4 |
+
"hit_rate_click@50": "0.85",
|
| 5 |
+
"hit_rate_A2C@50": "0.78",
|
| 6 |
+
"hit_rate_purchase@50": "0.92",
|
| 7 |
+
"dataset_id": "ecommerce-dataset-v1",
|
| 8 |
+
"comment": "BERT-based retrieval model with fine-tuning"
|
| 9 |
+
}
|
submission/test_submission_2.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"user_id": "bob_nlp",
|
| 3 |
+
"model_id": "transformer-retrieval-v2",
|
| 4 |
+
"hit_rate_click@50": "0.82",
|
| 5 |
+
"hit_rate_A2C@50": "0.89",
|
| 6 |
+
"hit_rate_purchase@50": "0.87",
|
| 7 |
+
"dataset_id": "retail-dataset-v2",
|
| 8 |
+
"comment": "Transformer-based approach with attention mechanisms"
|
| 9 |
+
}
|