mshamrai commited on
Commit
53e132c
·
1 Parent(s): 588c2f9

chore: init

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +385 -0
  3. requirements.txt +4 -0
  4. utils.py +14 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/*
app.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+
5
+ import gradio as gr
6
+ import pandas as pd
7
+ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
8
+ from huggingface_hub.repocard import metadata_load
9
+ from apscheduler.schedulers.background import BackgroundScheduler
10
+
11
+ from tqdm.contrib.concurrent import thread_map
12
+
13
+ from utils import *
14
+
15
+ DATASET_REPO_URL = "https://huggingface.co/datasets/mshamrai/rlc-leaderboard-data"
16
+ DATASET_REPO_ID = "mshamrai/rlc-leaderboard-data"
17
+ HF_TOKEN = os.environ.get("HF_TOKEN")
18
+
19
+ STUDENTS_SET = {"mshamrai", "Kolosok", "grinvolod", "ostap-khm", "elusivephantasm", "letaldir"}
20
+
21
+ block = gr.Blocks()
22
+ api = HfApi(token=HF_TOKEN)
23
+
24
+ # Containing the data
25
+ rl_envs = [
26
+ {
27
+ "rl_env_beautiful": "LunarLander-v2 🚀",
28
+ "rl_env": "LunarLander-v2",
29
+ "video_link": "",
30
+ "global": None
31
+ },
32
+ {
33
+ "rl_env_beautiful": "CartPole-v1",
34
+ "rl_env": "CartPole-v1",
35
+ "video_link": "https://huggingface.co/sb3/ppo-CartPole-v1/resolve/main/replay.mp4",
36
+ "global": None
37
+ },
38
+ {
39
+ "rl_env_beautiful": "FrozenLake-v1-4x4-no_slippery ❄️",
40
+ "rl_env": "FrozenLake-v1-4x4-no_slippery",
41
+ "video_link": "",
42
+ "global": None
43
+ },
44
+ {
45
+ "rl_env_beautiful": "FrozenLake-v1-8x8-no_slippery ❄️",
46
+ "rl_env": "FrozenLake-v1-8x8-no_slippery",
47
+ "video_link": "",
48
+ "global": None
49
+ },
50
+ {
51
+ "rl_env_beautiful": "FrozenLake-v1-4x4 ❄️",
52
+ "rl_env": "FrozenLake-v1-4x4",
53
+ "video_link": "",
54
+ "global": None
55
+ },
56
+ {
57
+ "rl_env_beautiful": "FrozenLake-v1-8x8 ❄️",
58
+ "rl_env": "FrozenLake-v1-8x8",
59
+ "video_link": "",
60
+ "global": None
61
+ },
62
+ {
63
+ "rl_env_beautiful": "Taxi-v3 🚖",
64
+ "rl_env": "Taxi-v3",
65
+ "video_link": "",
66
+ "global": None
67
+ },
68
+ {
69
+ "rl_env_beautiful": "CarRacing-v0 🏎️",
70
+ "rl_env": "CarRacing-v0",
71
+ "video_link": "",
72
+ "global": None
73
+ },
74
+ {
75
+ "rl_env_beautiful": "CarRacing-v2 🏎️",
76
+ "rl_env": "CarRacing-v2",
77
+ "video_link": "",
78
+ "global": None
79
+ },
80
+ {
81
+ "rl_env_beautiful": "MountainCar-v0 ⛰️",
82
+ "rl_env": "MountainCar-v0",
83
+ "video_link": "",
84
+ "global": None
85
+ },
86
+ {
87
+ "rl_env_beautiful": "SpaceInvadersNoFrameskip-v4 👾",
88
+ "rl_env": "SpaceInvadersNoFrameskip-v4",
89
+ "video_link": "",
90
+ "global": None
91
+ },
92
+ {
93
+ "rl_env_beautiful": "PongNoFrameskip-v4 🎾",
94
+ "rl_env": "PongNoFrameskip-v4",
95
+ "video_link": "",
96
+ "global": None
97
+ },
98
+ {
99
+ "rl_env_beautiful": "BreakoutNoFrameskip-v4 🧱",
100
+ "rl_env": "BreakoutNoFrameskip-v4",
101
+ "video_link": "",
102
+ "global": None
103
+ },
104
+ {
105
+ "rl_env_beautiful": "QbertNoFrameskip-v4 🐦",
106
+ "rl_env": "QbertNoFrameskip-v4",
107
+ "video_link": "",
108
+ "global": None
109
+ },
110
+ {
111
+ "rl_env_beautiful": "BipedalWalker-v3",
112
+ "rl_env": "BipedalWalker-v3",
113
+ "video_link": "",
114
+ "global": None
115
+ },
116
+ {
117
+ "rl_env_beautiful": "Walker2DBulletEnv-v0",
118
+ "rl_env": "Walker2DBulletEnv-v0",
119
+ "video_link": "",
120
+ "global": None
121
+ },
122
+ {
123
+ "rl_env_beautiful": "AntBulletEnv-v0",
124
+ "rl_env": "AntBulletEnv-v0",
125
+ "video_link": "",
126
+ "global": None
127
+ },
128
+ {
129
+ "rl_env_beautiful": "HalfCheetahBulletEnv-v0",
130
+ "rl_env": "HalfCheetahBulletEnv-v0",
131
+ "video_link": "",
132
+ "global": None
133
+ },
134
+ {
135
+ "rl_env_beautiful": "PandaReachDense-v2",
136
+ "rl_env": "PandaReachDense-v2",
137
+ "video_link": "",
138
+ "global": None
139
+ },
140
+ {
141
+ "rl_env_beautiful": "PandaReachDense-v3",
142
+ "rl_env": "PandaReachDense-v3",
143
+ "video_link": "",
144
+ "global": None
145
+ },
146
+ {
147
+ "rl_env_beautiful": "Pixelcopter-PLE-v0",
148
+ "rl_env": "Pixelcopter-PLE-v0",
149
+ "video_link": "",
150
+ "global": None
151
+ }
152
+ ]
153
+
154
+ def restart():
155
+ print("RESTART")
156
+ api.restart_space(repo_id="huggingface-projects/Deep-Reinforcement-Learning-Leaderboard")
157
+
158
+ def get_metadata(model_id):
159
+ try:
160
+ readme_path = hf_hub_download(model_id, filename="README.md", etag_timeout=180)
161
+ return metadata_load(readme_path)
162
+ except requests.exceptions.HTTPError:
163
+ # 404 README.md not found
164
+ return None
165
+
166
+ def parse_metrics_accuracy(meta):
167
+ if "model-index" not in meta:
168
+ return None
169
+ result = meta["model-index"][0]["results"]
170
+ metrics = result[0]["metrics"]
171
+ accuracy = metrics[0]["value"]
172
+ return accuracy
173
+
174
+ # We keep the worst case episode
175
+ def parse_rewards(accuracy):
176
+ default_std = -1000
177
+ default_reward=-1000
178
+ if accuracy != None:
179
+ accuracy = str(accuracy)
180
+ parsed = accuracy.split('+/-')
181
+ if len(parsed)>1:
182
+ mean_reward = float(parsed[0].strip())
183
+ std_reward = float(parsed[1].strip())
184
+ elif len(parsed)==1: #only mean reward
185
+ mean_reward = float(parsed[0].strip())
186
+ std_reward = float(0)
187
+ else:
188
+ mean_reward = float(default_std)
189
+ std_reward = float(default_reward)
190
+
191
+ else:
192
+ mean_reward = float(default_std)
193
+ std_reward = float(default_reward)
194
+ return mean_reward, std_reward
195
+
196
+
197
+ def get_model_ids(rl_env):
198
+ api = HfApi()
199
+ models = api.list_models(filter=rl_env)
200
+ model_ids = [x.modelId for x in models]
201
+ return model_ids
202
+
203
+ def filter_students(model_ids):
204
+ filtered = []
205
+ for model_id in model_ids:
206
+ user_id = model_id.split('/')[0]
207
+ if user_id in STUDENTS_SET:
208
+ filtered.append(model_id)
209
+ return filtered
210
+
211
+ # Parralelized version
212
+ def update_leaderboard_dataset_parallel(rl_env, path):
213
+ # Get model ids associated with rl_env
214
+ model_ids = get_model_ids(rl_env)
215
+ model_ids = filter_students(model_ids)
216
+
217
+ def process_model(model_id):
218
+ meta = get_metadata(model_id)
219
+ #LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
220
+ if meta is None:
221
+ return None
222
+ try:
223
+ user_id = model_id.split('/')[0]
224
+ row = {}
225
+ row["User"] = user_id
226
+ row["Model"] = model_id
227
+ accuracy = parse_metrics_accuracy(meta)
228
+ mean_reward, std_reward = parse_rewards(accuracy)
229
+ mean_reward = mean_reward if not pd.isna(mean_reward) else 0
230
+ std_reward = std_reward if not pd.isna(std_reward) else 0
231
+ row["Results"] = mean_reward - std_reward
232
+ row["Mean Reward"] = mean_reward
233
+ row["Std Reward"] = std_reward
234
+ return row
235
+ except:
236
+ return None
237
+
238
+ data = list(thread_map(process_model, model_ids, desc="Processing models"))
239
+
240
+ # Filter out None results (models with no metadata)
241
+ data = [row for row in data if row is not None]
242
+
243
+ ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
244
+ new_history = ranked_dataframe
245
+ file_path = path + "/" + rl_env + ".csv"
246
+ new_history.to_csv(file_path, index=False)
247
+
248
+ return ranked_dataframe
249
+
250
+
251
+ def update_leaderboard_dataset(rl_env, path):
252
+ # Get model ids associated with rl_env
253
+ model_ids = get_model_ids(rl_env)
254
+ data = []
255
+ for model_id in model_ids:
256
+ """
257
+ readme_path = hf_hub_download(model_id, filename="README.md")
258
+ meta = metadata_load(readme_path)
259
+ """
260
+ meta = get_metadata(model_id)
261
+ #LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
262
+ if meta is None:
263
+ continue
264
+ user_id = model_id.split('/')[0]
265
+ row = {}
266
+ row["User"] = user_id
267
+ row["Model"] = model_id
268
+ accuracy = parse_metrics_accuracy(meta)
269
+ mean_reward, std_reward = parse_rewards(accuracy)
270
+ mean_reward = mean_reward if not pd.isna(mean_reward) else 0
271
+ std_reward = std_reward if not pd.isna(std_reward) else 0
272
+ row["Results"] = mean_reward - std_reward
273
+ row["Mean Reward"] = mean_reward
274
+ row["Std Reward"] = std_reward
275
+ data.append(row)
276
+
277
+ ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
278
+ new_history = ranked_dataframe
279
+ file_path = path + "/" + rl_env + ".csv"
280
+ new_history.to_csv(file_path, index=False)
281
+
282
+ return ranked_dataframe
283
+
284
+ def download_leaderboard_dataset():
285
+ path = snapshot_download(repo_id=DATASET_REPO_ID, repo_type="dataset")
286
+ return path
287
+
288
+ def get_data(rl_env, path) -> pd.DataFrame:
289
+ """
290
+ Get data from rl_env
291
+ :return: data as a pandas DataFrame
292
+ """
293
+ csv_path = path + "/" + rl_env + ".csv"
294
+ data = pd.read_csv(csv_path)
295
+
296
+ for index, row in data.iterrows():
297
+ user_id = row["User"]
298
+ data.loc[index, "User"] = make_clickable_user(user_id)
299
+ model_id = row["Model"]
300
+ data.loc[index, "Model"] = make_clickable_model(model_id)
301
+
302
+ return data
303
+
304
+ def get_data_no_html(rl_env, path) -> pd.DataFrame:
305
+ """
306
+ Get data from rl_env
307
+ :return: data as a pandas DataFrame
308
+ """
309
+ csv_path = path + "/" + rl_env + ".csv"
310
+ data = pd.read_csv(csv_path)
311
+
312
+ return data
313
+
314
+ def rank_dataframe(dataframe):
315
+ if dataframe.empty:
316
+ return pd.DataFrame(columns=['User', 'Model', 'Results', 'Mean Reward', 'Std Reward'])
317
+ dataframe = dataframe.sort_values(by=['Results', 'User', 'Model'], ascending=False)
318
+ if not 'Ranking' in dataframe.columns:
319
+ dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)])
320
+ else:
321
+ dataframe['Ranking'] = [i for i in range(1,len(dataframe)+1)]
322
+ return dataframe
323
+
324
+
325
+ def run_update_dataset():
326
+ path_ = download_leaderboard_dataset()
327
+ for i in range(0, len(rl_envs)):
328
+ rl_env = rl_envs[i]
329
+ update_leaderboard_dataset_parallel(rl_env["rl_env"], path_)
330
+
331
+ api.upload_folder(
332
+ folder_path=path_,
333
+ repo_id="mshamrai/rlc-leaderboard-data",
334
+ repo_type="dataset",
335
+ commit_message="Update dataset")
336
+
337
+ run_update_dataset()
338
+
339
+ with block:
340
+ gr.Markdown(f"""
341
+ # 🏆 Reinforcement Learning Course Leaderboard 🏆
342
+
343
+ This leaderboard is for Kyiv Academic University students to see their results during the Hugging Face <a href="https://huggingface.co/learn/deep-rl-course/unit0/introduction?fw=pt">Deep Reinforcement Learning Course</a>.
344
+
345
+ ### How are the results calculated?
346
+ We use **lower bound result to sort the models: mean_reward - std_reward.**
347
+
348
+ ### I can't find my model 😭
349
+ The leaderboard is **updated every two hours** if you can't find your models, just wait for the next update.
350
+ """)
351
+ path_ = download_leaderboard_dataset()
352
+
353
+ for i in range(0, len(rl_envs)):
354
+ rl_env = rl_envs[i]
355
+ with gr.TabItem(rl_env["rl_env_beautiful"]) as rl_tab:
356
+ with gr.Row():
357
+ markdown = """
358
+ # {name_leaderboard}
359
+
360
+ """.format(name_leaderboard = rl_env["rl_env_beautiful"], video_link = rl_env["video_link"])
361
+ gr.Markdown(markdown)
362
+
363
+
364
+ with gr.Row():
365
+ gr_dataframe = gr.components.Dataframe(value=get_data(rl_env["rl_env"], path_), headers=["Ranking 🏆", "User 🤗", "Model id 🤖", "Results", "Mean Reward", "Std Reward"], datatype=["number", "markdown", "markdown", "number", "number", "number"], row_count=(15, 'dynamic'))
366
+ """
367
+ block.load(
368
+ download_leaderboard_dataset,
369
+ inputs=[],
370
+ outputs=[
371
+ grpath
372
+ ],
373
+ )
374
+ """
375
+
376
+
377
+ scheduler = BackgroundScheduler()
378
+ # Refresh every hour
379
+ #scheduler.add_job(func=run_update_dataset, trigger="interval", seconds=3600)
380
+ #scheduler.add_job(download_leaderboard_dataset, 'interval', seconds=3600)
381
+ #scheduler.add_job(run_update_dataset, 'interval', seconds=3600)
382
+ scheduler.add_job(restart, 'interval', seconds=10800)
383
+ scheduler.start()
384
+
385
+ block.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ APScheduler==3.10.1
2
+ gradio==4.44.1
3
+ httpx>=0.24.1
4
+ tqdm
utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Based on Omar Sanseviero work
2
+ # Make model clickable link
3
+ def make_clickable_model(model_name):
4
+ # remove user from model name
5
+ model_name_show = ' '.join(model_name.split('/')[1:])
6
+
7
+ link = "https://huggingface.co/" + model_name
8
+ return f'<a target="_blank" href="{link}">{model_name_show}</a>'
9
+
10
+ # Make user clickable link
11
+ def make_clickable_user(user_id):
12
+ link = "https://huggingface.co/" + user_id
13
+ return f'<a target="_blank" href="{link}">{user_id}</a>'
14
+