mshamrai commited on
Commit
acd7bbf
·
1 Parent(s): 04e0c94

chore: add more required envs and more

Browse files
Files changed (2) hide show
  1. app.py +126 -111
  2. requirements.txt +1 -1
app.py CHANGED
@@ -24,111 +24,73 @@ rl_envs = [
24
  {
25
  "rl_env_beautiful": "LunarLander-v2 🚀",
26
  "rl_env": "LunarLander-v2",
27
- "video_link": "",
28
- "global": None
29
- },
30
- {
31
- "rl_env_beautiful": "FrozenLake-v1-4x4-no_slippery ❄️",
32
- "rl_env": "FrozenLake-v1-4x4-no_slippery",
33
- "video_link": "",
34
- "global": None
35
  },
36
  {
37
  "rl_env_beautiful": "Taxi-v3 🚖",
38
  "rl_env": "Taxi-v3",
39
- "video_link": "",
40
- "global": None
 
41
  },
42
  {
43
  "rl_env_beautiful": "SpaceInvadersNoFrameskip-v4 👾",
44
  "rl_env": "SpaceInvadersNoFrameskip-v4",
45
- "video_link": "",
46
- "global": None
 
47
  },
48
  {
49
  "rl_env_beautiful": "CartPole-v1",
50
  "rl_env": "CartPole-v1",
51
- "video_link": "https://huggingface.co/sb3/ppo-CartPole-v1/resolve/main/replay.mp4",
52
- "global": None
 
53
  },
54
  {
55
  "rl_env_beautiful": "Pixelcopter-PLE-v0",
56
  "rl_env": "Pixelcopter-PLE-v0",
57
- "video_link": "",
58
- "global": None
59
- },
60
- {
61
- "rl_env_beautiful": "CarRacing-v0 🏎️",
62
- "rl_env": "CarRacing-v0",
63
- "video_link": "",
64
- "global": None
65
- },
66
- {
67
- "rl_env_beautiful": "CarRacing-v2 🏎️",
68
- "rl_env": "CarRacing-v2",
69
- "video_link": "",
70
- "global": None
71
- },
72
- {
73
- "rl_env_beautiful": "MountainCar-v0 ⛰️",
74
- "rl_env": "MountainCar-v0",
75
- "video_link": "",
76
- "global": None
77
  },
78
  {
79
- "rl_env_beautiful": "PongNoFrameskip-v4 🎾",
80
- "rl_env": "PongNoFrameskip-v4",
81
- "video_link": "",
82
- "global": None
 
83
  },
84
  {
85
- "rl_env_beautiful": "BreakoutNoFrameskip-v4 🧱",
86
- "rl_env": "BreakoutNoFrameskip-v4",
87
- "video_link": "",
88
- "global": None
 
89
  },
90
  {
91
- "rl_env_beautiful": "QbertNoFrameskip-v4 🐦",
92
- "rl_env": "QbertNoFrameskip-v4",
93
- "video_link": "",
94
- "global": None
 
95
  },
96
  {
97
- "rl_env_beautiful": "BipedalWalker-v3",
98
- "rl_env": "BipedalWalker-v3",
99
- "video_link": "",
100
- "global": None
 
101
  },
102
  {
103
- "rl_env_beautiful": "Walker2DBulletEnv-v0",
104
- "rl_env": "Walker2DBulletEnv-v0",
105
- "video_link": "",
106
- "global": None
107
- },
108
- {
109
- "rl_env_beautiful": "AntBulletEnv-v0",
110
- "rl_env": "AntBulletEnv-v0",
111
- "video_link": "",
112
- "global": None
113
- },
114
- {
115
- "rl_env_beautiful": "HalfCheetahBulletEnv-v0",
116
- "rl_env": "HalfCheetahBulletEnv-v0",
117
- "video_link": "",
118
- "global": None
119
- },
120
- {
121
- "rl_env_beautiful": "PandaReachDense-v2",
122
- "rl_env": "PandaReachDense-v2",
123
- "video_link": "",
124
- "global": None
125
- },
126
- {
127
- "rl_env_beautiful": "PandaReachDense-v3",
128
- "rl_env": "PandaReachDense-v3",
129
- "video_link": "",
130
- "global": None
131
- },
132
  ]
133
 
134
  def restart():
@@ -174,42 +136,94 @@ def parse_rewards(accuracy):
174
  return mean_reward, std_reward
175
 
176
 
177
- def get_model_ids(rl_env):
 
 
 
 
 
 
 
178
  api = HfApi()
179
- models = api.list_models(filter=rl_env)
180
- model_ids = [x.modelId for x in models]
181
- return model_ids
182
 
183
- def filter_students(model_ids):
184
- filtered = []
185
- for model_id in model_ids:
186
- user_id = model_id.split('/')[0]
187
- if user_id in STUDENTS_SET:
188
- filtered.append(model_id)
189
- return filtered
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
 
192
  def update_leaderboard_dataset(rl_env, path):
193
  # Get model ids associated with rl_env
194
- model_ids = get_model_ids(rl_env)
195
- model_ids = filter_students(model_ids)
 
196
  data = []
197
- for model_id in model_ids:
198
- meta = get_metadata(model_id)
199
- #LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
200
- if meta is None:
201
  continue
202
  user_id = model_id.split('/')[0]
203
  row = {}
204
  row["User"] = user_id
205
  row["Model"] = model_id
206
- accuracy = parse_metrics_accuracy(meta)
207
- mean_reward, std_reward = parse_rewards(accuracy)
208
- mean_reward = mean_reward if not pd.isna(mean_reward) else 0
209
- std_reward = std_reward if not pd.isna(std_reward) else 0
210
- row["Results"] = mean_reward - std_reward
211
- row["Mean Reward"] = mean_reward
212
- row["Std Reward"] = std_reward
213
  data.append(row)
214
 
215
  if not data:
@@ -217,7 +231,7 @@ def update_leaderboard_dataset(rl_env, path):
217
 
218
  ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
219
  new_history = ranked_dataframe
220
- file_path = path + "/" + rl_env + ".csv"
221
  new_history.to_csv(file_path, index=False)
222
 
223
  def download_leaderboard_dataset():
@@ -231,7 +245,7 @@ def get_data(rl_env, path) -> pd.DataFrame:
231
  """
232
  csv_path = path + "/" + rl_env + ".csv"
233
  if not os.path.exists(csv_path):
234
- return pd.DataFrame(columns=['Ranking', 'User', 'Model', 'Results', 'Mean Reward', 'Std Reward'])
235
 
236
  data = pd.read_csv(csv_path)
237
 
@@ -255,8 +269,8 @@ def get_data_no_html(rl_env, path) -> pd.DataFrame:
255
 
256
  def rank_dataframe(dataframe):
257
  if dataframe.empty:
258
- return pd.DataFrame(columns=['User', 'Model', 'Results', 'Mean Reward', 'Std Reward'])
259
- dataframe = dataframe.sort_values(by=['Results', 'User', 'Model'], ascending=False)
260
  if not 'Ranking' in dataframe.columns:
261
  dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)])
262
  else:
@@ -268,7 +282,7 @@ def run_update_dataset():
268
  path_ = download_leaderboard_dataset()
269
  for i in range(0, len(rl_envs)):
270
  rl_env = rl_envs[i]
271
- update_leaderboard_dataset(rl_env["rl_env"], path_)
272
 
273
  api.upload_folder(
274
  folder_path=path_,
@@ -297,14 +311,15 @@ with block:
297
  with gr.TabItem(rl_env["rl_env_beautiful"]) as rl_tab:
298
  with gr.Row():
299
  markdown = """
300
- # {name_leaderboard}
 
301
 
302
- """.format(name_leaderboard = rl_env["rl_env_beautiful"], video_link = rl_env["video_link"])
303
  gr.Markdown(markdown)
304
 
305
 
306
  with gr.Row():
307
- gr_dataframe = gr.components.Dataframe(value=get_data(rl_env["rl_env"], path_), headers=["Ranking 🏆", "User 🤗", "Model id 🤖", "Results", "Mean Reward", "Std Reward"], datatype=["number", "markdown", "markdown", "number", "number", "number"], row_count=(15, 'dynamic'))
308
  """
309
  block.load(
310
  download_leaderboard_dataset,
 
24
  {
25
  "rl_env_beautiful": "LunarLander-v2 🚀",
26
  "rl_env": "LunarLander-v2",
27
+ "unit": "Unit 1",
28
+ "library": "stable-baselines3",
29
+ "min_result": 200,
 
 
 
 
 
30
  },
31
  {
32
  "rl_env_beautiful": "Taxi-v3 🚖",
33
  "rl_env": "Taxi-v3",
34
+ "unit": "Unit 2",
35
+ "library": "q-learning",
36
+ "min_result": 4,
37
  },
38
  {
39
  "rl_env_beautiful": "SpaceInvadersNoFrameskip-v4 👾",
40
  "rl_env": "SpaceInvadersNoFrameskip-v4",
41
+ "unit": "Unit 3",
42
+ "library": "stable-baselines3",
43
+ "min_result": 200,
44
  },
45
  {
46
  "rl_env_beautiful": "CartPole-v1",
47
  "rl_env": "CartPole-v1",
48
+ "unit": "Unit 4",
49
+ "library": "reinforce",
50
+ "min_result": 350,
51
  },
52
  {
53
  "rl_env_beautiful": "Pixelcopter-PLE-v0",
54
  "rl_env": "Pixelcopter-PLE-v0",
55
+ "unit": "Unit 4",
56
+ "library": "reinforce",
57
+ "min_result": 5,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  },
59
  {
60
+ "rl_env_beautiful": "ML-Agents Snowball Target ❄️",
61
+ "rl_env": "ML-Agents-SnowballTarget",
62
+ "unit": "Unit 5",
63
+ "library": "ml-agents",
64
+ "min_result": -100,
65
  },
66
  {
67
+ "rl_env_beautiful": "ML-Agents Pyramids 🏔️",
68
+ "rl_env": "ML-Agents-Pyramids",
69
+ "unit": "Unit 5",
70
+ "library": "ml-agents",
71
+ "min_result": -100,
72
  },
73
  {
74
+ "rl_env_beautiful": "Panda Reach Dense 🤖",
75
+ "rl_env": "PandaReachDense",
76
+ "unit": "Unit 6",
77
+ "library": "stable-baselines3",
78
+ "min_result": -3.5,
79
  },
80
  {
81
+ "rl_env_beautiful": "ML-Agents Soccer Twos ⚽",
82
+ "rl_env": "ML-Agents-SoccerTwos",
83
+ "unit": "Unit 7",
84
+ "library": "ml-agents",
85
+ "min_result": -100,
86
  },
87
  {
88
+ "rl_env_beautiful": "Doom Health Gathering Supreme",
89
+ "rl_env": "doom_health_gathering_supreme",
90
+ "unit": "Unit 8 PII",
91
+ "library": "sample-factory",
92
+ "min_result": 5,
93
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  ]
95
 
96
  def restart():
 
136
  return mean_reward, std_reward
137
 
138
 
139
+ def get_user_models(hf_username, env_tag, lib_tag):
140
+ """
141
+ List the Reinforcement Learning models
142
+ from user given environment and lib
143
+ :param hf_username: User HF username
144
+ :param env_tag: Environment tag
145
+ :param lib_tag: Library tag
146
+ """
147
  api = HfApi()
148
+ models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag])
 
 
149
 
150
+ user_model_ids = [(x.modelId, (x.created_at or x.last_modified)) for x in models]
151
+ return user_model_ids
152
+
153
+
154
+ def get_user_sf_models(hf_username, env_tag, lib_tag):
155
+ models_sf = []
156
+ models = api.list_models(author=hf_username, filter=["reinforcement-learning", lib_tag])
157
+
158
+ user_model_ids = [(x.modelId, (x.created_at or x.last_modified)) for x in models]
159
+
160
+ for model, last_updated in user_model_ids:
161
+ meta = get_metadata(model)
162
+ if meta is None:
163
+ continue
164
+ result = meta["model-index"][0]["results"][0]["dataset"]["name"]
165
+ if result == env_tag:
166
+ models_sf.append((model, last_updated))
167
+
168
+ return models_sf
169
+
170
+
171
+ def calculate_best_result(user_model_ids):
172
+ """
173
+ Calculate the best results of a unit
174
+ best_result = mean_reward - std_reward
175
+ :param user_model_ids: RL models of a user
176
+ """
177
+ best_result = -1000
178
+ best_model_id = ""
179
+ best_last_updated = None
180
+ for model, last_updated in user_model_ids:
181
+ meta = get_metadata(model)
182
+ if meta is None:
183
+ continue
184
+ accuracy = parse_metrics_accuracy(meta)
185
+ mean_reward, std_reward = parse_rewards(accuracy)
186
+ result = mean_reward - std_reward
187
+ if result > best_result:
188
+ best_result = result
189
+ best_model_id = model
190
+ best_last_updated = last_updated
191
+
192
+ return best_result, best_model_id, best_last_updated
193
+
194
+ def get_model_ids(hf_username, rl_env):
195
+ if rl_env["rl_env"] == "PandaReachDense":
196
+ # Since Unit 6 can use PandaReachDense-v2 or v3
197
+ user_models = get_user_models(hf_username, "PandaReachDense-v3", rl_env["library"])
198
+ if len(user_models) == 0:
199
+ user_models = get_user_models(hf_username, "PandaReachDense-v2", rl_env["library"])
200
+ elif rl_env["rl_env"] != "doom_health_gathering_supreme":
201
+ user_models = get_user_models(hf_username, rl_env["rl_env"], rl_env["library"])
202
+ else:
203
+ user_models = get_user_sf_models(hf_username, rl_env["rl_env"], rl_env["library"])
204
+
205
+ # Calculate the best result and get the best_model_id
206
+ best_result, best_model_id, best_last_updated = calculate_best_result(user_models)
207
+ passed = best_result >= rl_env["min_result"]
208
+ return best_model_id, best_result, best_last_updated, passed
209
 
210
 
211
  def update_leaderboard_dataset(rl_env, path):
212
  # Get model ids associated with rl_env
213
+ model_info = []
214
+ for user_id in STUDENTS_SET:
215
+ model_info.append(get_model_ids(user_id, rl_env))
216
  data = []
217
+ for model_id, result, updated, passed in model_info:
218
+ if model_id is None or model_id == "":
 
 
219
  continue
220
  user_id = model_id.split('/')[0]
221
  row = {}
222
  row["User"] = user_id
223
  row["Model"] = model_id
224
+ row["Result"] = result
225
+ row["Submitted"] = updated
226
+ row["Passed"] = passed
 
 
 
 
227
  data.append(row)
228
 
229
  if not data:
 
231
 
232
  ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
233
  new_history = ranked_dataframe
234
+ file_path = path + "/" + rl_env["rl_env"] + ".csv"
235
  new_history.to_csv(file_path, index=False)
236
 
237
  def download_leaderboard_dataset():
 
245
  """
246
  csv_path = path + "/" + rl_env + ".csv"
247
  if not os.path.exists(csv_path):
248
+ return pd.DataFrame(columns=['Ranking', 'User', 'Model', 'Result', 'Submitted', 'Passed'])
249
 
250
  data = pd.read_csv(csv_path)
251
 
 
269
 
270
  def rank_dataframe(dataframe):
271
  if dataframe.empty:
272
+ return pd.DataFrame(columns=['User', 'Model', 'Result', 'Passed'])
273
+ dataframe = dataframe.sort_values(by=['Result'], ascending=False)
274
  if not 'Ranking' in dataframe.columns:
275
  dataframe.insert(0, 'Ranking', [i for i in range(1,len(dataframe)+1)])
276
  else:
 
282
  path_ = download_leaderboard_dataset()
283
  for i in range(0, len(rl_envs)):
284
  rl_env = rl_envs[i]
285
+ update_leaderboard_dataset(rl_env, path_)
286
 
287
  api.upload_folder(
288
  folder_path=path_,
 
311
  with gr.TabItem(rl_env["rl_env_beautiful"]) as rl_tab:
312
  with gr.Row():
313
  markdown = """
314
+ # {unit}
315
+ ## {name_leaderboard}
316
 
317
+ """.format(name_leaderboard = rl_env["rl_env_beautiful"], unit=rl_env["unit"])
318
  gr.Markdown(markdown)
319
 
320
 
321
  with gr.Row():
322
+ gr_dataframe = gr.components.Dataframe(value=get_data(rl_env["rl_env"], path_), headers=["Ranking 🏆", "User 🤗", "Model id 🤖", "Result", "Submitted", "Passed"], datatype=["number", "markdown", "markdown", "number", "date", "bool"], row_count=(15, 'dynamic'))
323
  """
324
  block.load(
325
  download_leaderboard_dataset,
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- APScheduler==3.10.1
2
  gradio==5.49.1
3
  httpx>=0.24.1
4
  tqdm
 
1
+ APScheduler==3.11.1
2
  gradio==5.49.1
3
  httpx>=0.24.1
4
  tqdm