ImaghT commited on
Commit
eef4ddc
·
verified ·
1 Parent(s): e9b5b42

Upload Unit_6_upload.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. Unit_6_upload.py +384 -0
Unit_6_upload.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # Unit 6_upload.py - 智能上传(优先使用最佳模型)
3
+ # ============================================================
4
+
5
+ import gymnasium as gym
6
+ import panda_gym
7
+ import numpy as np
8
+ import os
9
+ import shutil
10
+ from stable_baselines3 import A2C
11
+ from stable_baselines3.common.env_util import make_vec_env
12
+ from stable_baselines3.common.vec_env import VecNormalize, VecVideoRecorder
13
+ from stable_baselines3.common.evaluation import evaluate_policy
14
+ from huggingface_hub import HfApi, create_repo
15
+
16
+ # ============================================================
17
+ # 配置参数(⚠️ 修改这里)
18
+ # ============================================================
19
+ USERNAME = "ImaghT" # 你的 HF 用户名
20
+ MODEL_NAME = "a2c-PandaReachDense-v3"
21
+ ENV_ID = "PandaReachDense-v3"
22
+ N_EVAL_EPISODES = 20 # 增加评估episodes获得更准确结果
23
+
24
+ repo_id = f"{USERNAME}/{MODEL_NAME}"
25
+
26
+ # ============================================================
27
+ # 1. 智能文件检测(优先使用最佳模型)
28
+ # ============================================================
29
+ print("="*60)
30
+ print("🔍 检测可用模型文件...")
31
+ print("="*60)
32
+
33
+ # 文件路径定义
34
+ BEST_MODEL_PATH = "/home/eason/Workspace/RL/Unit_6/logs/best_model"
35
+ BEST_VEC_NORMALIZE_PATH = "/home/eason/Workspace/RL/Unit_6/logs/best_model_vecnormalize.pkl"
36
+ FINAL_MODEL_PATH = "a2c-PandaReachDense-v3"
37
+ FINAL_VEC_NORMALIZE_PATH = "vec_normalize.pkl"
38
+
39
+ # 🎯 优先级检查:最佳模型 > 最终模型
40
+ if os.path.exists(f"{BEST_MODEL_PATH}.zip") and os.path.exists(BEST_VEC_NORMALIZE_PATH):
41
+ print("✅ 发现训练期间保存的最佳模型(推荐使用)")
42
+ MODEL_PATH = BEST_MODEL_PATH
43
+ VEC_NORMALIZE_PATH = BEST_VEC_NORMALIZE_PATH
44
+ model_source = "best_model"
45
+ elif os.path.exists(f"{FINAL_MODEL_PATH}.zip") and os.path.exists(FINAL_VEC_NORMALIZE_PATH):
46
+ print("✅ 发现最终训练模型")
47
+ MODEL_PATH = FINAL_MODEL_PATH
48
+ VEC_NORMALIZE_PATH = FINAL_VEC_NORMALIZE_PATH
49
+ model_source = "final_model"
50
+ else:
51
+ print("❌ 错误: 未找到可用的模型文件!")
52
+ print("\n请确保以下文件之一存在:")
53
+ print(f" 方案1: {BEST_MODEL_PATH}.zip + {BEST_VEC_NORMALIZE_PATH}")
54
+ print(f" 方案2: {FINAL_MODEL_PATH}.zip + {FINAL_VEC_NORMALIZE_PATH}")
55
+ print("\n请先运行 Unit 6.py 训练代码。")
56
+ exit(1)
57
+
58
+ print(f"📁 使用模型: {MODEL_PATH}")
59
+ print(f"📁 使用归一化: {VEC_NORMALIZE_PATH}")
60
+ print(f"📊 模型来源: {model_source}\n")
61
+
62
+ # ============================================================
63
+ # 2. 加载模型
64
+ # ============================================================
65
+ print("加载模型...")
66
+ eval_env = make_vec_env(ENV_ID, n_envs=1)
67
+ eval_env = VecNormalize.load(VEC_NORMALIZE_PATH, eval_env)
68
+ eval_env.training = False
69
+ eval_env.norm_reward = False
70
+
71
+ model = A2C.load(MODEL_PATH, env=eval_env)
72
+ print("✅ 模型加载成功\n")
73
+
74
+ # ============================================================
75
+ # 3. 评估模型
76
+ # ============================================================
77
+ print("="*60)
78
+ print(f"🧪 开始评估 ({N_EVAL_EPISODES} episodes)...")
79
+ print("="*60)
80
+
81
+ mean_reward, std_reward = evaluate_policy(
82
+ model,
83
+ eval_env,
84
+ n_eval_episodes=N_EVAL_EPISODES,
85
+ deterministic=True
86
+ )
87
+
88
+ score = mean_reward - std_reward
89
+
90
+ print("\n" + "="*60)
91
+ print("📊 评估结果:")
92
+ print(f" Mean Reward: {mean_reward:.2f}")
93
+ print(f" Std Reward: {std_reward:.2f}")
94
+ print(f" Score (mean-std): {score:.2f}")
95
+ print(f" 通过基准线: -3.5")
96
+ if score >= -3.5:
97
+ print(f" ✅ 状态: PASSED")
98
+ status_emoji = "✅"
99
+ else:
100
+ print(f" ❌ 状态: NOT PASSED (还差 {-3.5 - score:.2f} 分)")
101
+ status_emoji = "❌"
102
+ print("="*60 + "\n")
103
+
104
+ # ============================================================
105
+ # 4. 生成演示视频
106
+ # ============================================================
107
+ print("🎬 生成演示视频...")
108
+ video_folder = "/home/eason/Workspace/RL/Unit_6/video_upload"
109
+ os.makedirs(video_folder, exist_ok=True)
110
+
111
+ video_env = make_vec_env(ENV_ID, n_envs=1)
112
+ video_env = VecNormalize.load(VEC_NORMALIZE_PATH, video_env)
113
+ video_env.training = False
114
+ video_env.norm_reward = False
115
+
116
+ video_env = VecVideoRecorder(
117
+ video_env,
118
+ video_folder,
119
+ record_video_trigger=lambda x: x == 0,
120
+ video_length=500,
121
+ name_prefix="panda-reach-agent"
122
+ )
123
+
124
+ obs = video_env.reset()
125
+ for _ in range(500):
126
+ action, _ = model.predict(obs, deterministic=True)
127
+ obs, _, _, _ = video_env.step(action)
128
+
129
+ video_env.close()
130
+ print(f"✅ 视频已生成\n")
131
+
132
+ # ============================================================
133
+ # 5. 检查训练日志(可选信息)
134
+ # ============================================================
135
+ training_info = ""
136
+ if os.path.exists("/home/eason/Workspace/RL/Unit_6/logs/evaluations.npz"):
137
+ try:
138
+ evaluations = np.load("/home/eason/Workspace/RL/Unit_6/logs/evaluations.npz")
139
+ timesteps = evaluations['timesteps']
140
+ results = evaluations['results']
141
+
142
+ # 获取训练过程信息
143
+ total_evals = len(timesteps)
144
+ final_timestep = timesteps[-1] if len(timesteps) > 0 else "Unknown"
145
+ best_eval_reward = np.max(results.mean(axis=1)) if len(results) > 0 else "Unknown"
146
+
147
+ training_info = f"""
148
+ ## Training Monitoring
149
+
150
+ This model was trained with comprehensive monitoring:
151
+
152
+ - **Total Evaluations**: {total_evals} (every 500,000 steps)
153
+ - **Final Training Step**: {final_timestep:,}
154
+ - **Best Evaluation Reward**: {best_eval_reward:.2f}
155
+ - **Model Source**: {"Best model from training" if model_source == "best_model" else "Final training model"}
156
+ - **Callbacks Used**: EvalCallback, CheckpointCallback
157
+ - **TensorBoard Logging**: Enabled
158
+
159
+ """
160
+ print(f"📈 发现训练日志: {total_evals} 次评估记录")
161
+ except Exception as e:
162
+ print(f"⚠️ 读取训练日志失败: {e}")
163
+ training_info = "\n## Training Monitoring\n\nModel trained with monitoring callbacks.\n"
164
+ else:
165
+ training_info = "\n## Training Configuration\n\nStandard training without detailed monitoring.\n"
166
+
167
+ # ============================================================
168
+ # 6. 创建增强版 README.md
169
+ # ============================================================
170
+ readme_content = f"""---
171
+ library_name: stable-baselines3
172
+ tags:
173
+ - PandaReachDense-v3
174
+ - deep-reinforcement-learning
175
+ - reinforcement-learning
176
+ - stable-baselines3
177
+ - robotics
178
+ - panda-gym
179
+ model-index:
180
+ - name: A2C
181
+ results:
182
+ - task:
183
+ type: reinforcement-learning
184
+ name: reinforcement-learning
185
+ dataset:
186
+ name: PandaReachDense-v3
187
+ type: PandaReachDense-v3
188
+ metrics:
189
+ - type: mean_reward
190
+ value: {mean_reward:.2f} +/- {std_reward:.2f}
191
+ name: mean_reward
192
+ verified: false
193
+ ---
194
+
195
+ # {status_emoji} **A2C** Agent playing **PandaReachDense-v3**
196
+
197
+ This is a trained model of a **A2C** agent playing **PandaReachDense-v3**
198
+ using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
199
+ and the [Deep Reinforcement Learning Course](https://huggingface.co/deep-rl-course/unit6).
200
+
201
+ This environment is part of the [Panda-Gym](https://github.com/qgallouedec/panda-gym) environments and includes robotic manipulation tasks where the robot arm needs to reach a target position.
202
+
203
+ ## 🏆 Evaluation Results
204
+
205
+ | Metric | Value |
206
+ |--------|-------|
207
+ | Mean Reward | {mean_reward:.2f} |
208
+ | Std Reward | {std_reward:.2f} |
209
+ | **Score (mean - std)** | **{score:.2f}** |
210
+ | Baseline Required | -3.5 |
211
+ | Evaluation Episodes | {N_EVAL_EPISODES} |
212
+ | Status | {status_emoji} {"**PASSED**" if score >= -3.5 else "**NOT PASSED**"} |
213
+ | Model Source | {model_source.replace('_', ' ').title()} |
214
+
215
+ {training_info}
216
+
217
+ ## 🚀 Usage
218
+
219
+ ```python
220
+ import gymnasium as gym
221
+ import panda_gym
222
+ from stable_baselines3 import A2C
223
+ from stable_baselines3.common.env_util import make_vec_env
224
+ from stable_baselines3.common.vec_env import VecNormalize
225
+
226
+ # Load environment and normalization
227
+ env = make_vec_env("PandaReachDense-v3", n_envs=1)
228
+ env = VecNormalize.load("vec_normalize.pkl", env)
229
+
230
+ # ⚠️ CRITICAL: disable training mode and reward normalization at test time
231
+ env.training = False
232
+ env.norm_reward = False
233
+
234
+ # Load model
235
+ model = A2C.load("a2c-PandaReachDense-v3", env=env)
236
+
237
+ # Run inference
238
+ obs = env.reset()
239
+ for _ in range(1000):
240
+ action, _states = model.predict(obs, deterministic=True)
241
+ obs, reward, done, info = env.step(action)
242
+ if done:
243
+ obs = env.reset()
244
+ ```
245
+
246
+ ## 🔧 Training Configuration
247
+
248
+ - **Algorithm**: A2C (Advantage Actor-Critic)
249
+ - **Policy**: MultiInputPolicy (for Dict observation spaces)
250
+ - **Environment**: PandaReachDense-v3
251
+ - **Total Timesteps**: 200,0000
252
+ - **Number of Parallel Envs**: 64
253
+ - **Normalization**: VecNormalize (observation + reward)
254
+ - **Observation Clipping**: 10.0
255
+ - **Evaluation Frequency**: Every 500,000 steps
256
+ - **Checkpoint Frequency**: Every 500,000 steps
257
+
258
+ ## 🤖 Model Architecture
259
+
260
+ The agent uses a **MultiInputPolicy** because the observation space is a dictionary containing:
261
+ - `observation`: Robot joint positions, velocities, and gripper state
262
+ - `desired_goal`: Target position coordinates (x, y, z)
263
+ - `achieved_goal`: Current end-effector position coordinates (x, y, z)
264
+
265
+ The goal is to minimize the distance between `achieved_goal` and `desired_goal`.
266
+
267
+ ## 📈 Performance Notes
268
+
269
+ - **Reward Range**: Typically from -50 (far from target) to 0 (at target)
270
+ - **Success Criteria**: Achieving mean reward > -3.5 consistently
271
+ - **Episode Length**: Usually 50 steps per episode
272
+ - **Convergence**: Expect improvement after 200k-500k steps
273
+
274
+ ## 🎯 Tips for Reproduction
275
+
276
+ 1. **Normalization is Critical**: Always use VecNormalize for robotic tasks
277
+ 2. **MultiInputPolicy Required**: Dict observation spaces need special handling
278
+ 3. **Sufficient Training**: 1M+ timesteps recommended for stable performance
279
+ 4. **Evaluation**: Use deterministic=True for consistent evaluation results
280
+ """
281
+
282
+ # ============================================================
283
+ # 7. 准备上传文件
284
+ # ============================================================
285
+ print("📦 准备上传文件...")
286
+ upload_folder = "/home/eason/Workspace/RL/Unit_6/upload_temp"
287
+ os.makedirs(upload_folder, exist_ok=True)
288
+
289
+ # 保存 README
290
+ readme_path = os.path.join(upload_folder, "README.md")
291
+ with open(readme_path, "w", encoding="utf-8") as f:
292
+ f.write(readme_content)
293
+ print(f"✅ 创建 README.md")
294
+
295
+ # 复制模型文件(重命名为标准名称)
296
+ model_dest = os.path.join(upload_folder, f"{MODEL_NAME}.zip")
297
+ shutil.copy(f"{MODEL_PATH}.zip", model_dest)
298
+ print(f"✅ 复制模型文件: {MODEL_PATH}.zip -> {MODEL_NAME}.zip")
299
+
300
+ # 复制归一化文件(重命名为标准名称)
301
+ vec_norm_dest = os.path.join(upload_folder, "vec_normalize.pkl")
302
+ shutil.copy(VEC_NORMALIZE_PATH, vec_norm_dest)
303
+ print(f"✅ 复制归一化文件: {VEC_NORMALIZE_PATH} -> vec_normalize.pkl")
304
+
305
+ # 复制视频文件
306
+ video_files = [f for f in os.listdir(video_folder) if f.endswith(".mp4")]
307
+ if video_files:
308
+ video_src = os.path.join(video_folder, video_files[0])
309
+ video_dest = os.path.join(upload_folder, "replay.mp4")
310
+ shutil.copy(video_src, video_dest)
311
+ print(f"✅ 复制视频文件")
312
+ else:
313
+ print(f"⚠️ 未找到视频文件(可选)")
314
+
315
+ # 可选:复制训练日志
316
+ if os.path.exists("/home/eason/Workspace/RL/Unit_6/logs/evaluations.npz"):
317
+ eval_dest = os.path.join(upload_folder, "training_evaluations.npz")
318
+ shutil.copy("/home/eason/Workspace/RL/Unit_6/logs/evaluations.npz", eval_dest)
319
+ print(f"✅ 复制训练评估日志")
320
+
321
+ # ============================================================
322
+ # 8. 上传到 Hugging Face Hub
323
+ # ============================================================
324
+ print(f"\n🚀 上传到 {repo_id}...")
325
+
326
+ api = HfApi()
327
+
328
+ try:
329
+ # 创建仓库(如果已存在则跳过)
330
+ create_repo(repo_id, repo_type="model", exist_ok=True)
331
+ print(f"✅ 仓库已创建/验证")
332
+ except Exception as e:
333
+ print(f"⚠️ 仓库警告: {e}")
334
+
335
+ try:
336
+ # 上传整个文件夹
337
+ commit_message = f"A2C PandaReach ({model_source}) - Mean: {mean_reward:.2f}, Std: {std_reward:.2f}, Score: {score:.2f}"
338
+
339
+ api.upload_folder(
340
+ folder_path=upload_folder,
341
+ repo_id=repo_id,
342
+ repo_type="model",
343
+ commit_message=commit_message
344
+ )
345
+ print(f"\n{'='*60}")
346
+ print("🎉 上传成功!")
347
+ print(f"{'='*60}")
348
+ print(f"🔗 模型页面: https://huggingface.co/{repo_id}")
349
+ print(f"🏆 检查进度: https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course")
350
+ print(f"📊 模型来源: {model_source.replace('_', ' ').title()}")
351
+ print(f"🎯 评估分数: {score:.2f} ({'通过' if score >= -3.5 else '未通过'})")
352
+ print(f"{'='*60}\n")
353
+ except Exception as e:
354
+ print(f"\n❌ 上传失败: {e}")
355
+ print(" 请检查:")
356
+ print(" 1. 是否已运行 'huggingface-cli login'")
357
+ print(" 2. 网络连接是否正常")
358
+ print(" 3. 用户名是否正确\n")
359
+ finally:
360
+ # 清理临时文件
361
+ shutil.rmtree(upload_folder)
362
+ print("🧹 清理临时文件")
363
+
364
+ print("✨ 完成!")
365
+
366
+ # ============================================================
367
+ # 9. 额外信息输出
368
+ # ============================================================
369
+ print("\n" + "="*60)
370
+ print("📋 上传总结")
371
+ print("="*60)
372
+ print(f"📁 上传的文件:")
373
+ print(f" - {MODEL_NAME}.zip (模型)")
374
+ print(f" - vec_normalize.pkl (归一化参数)")
375
+ print(f" - README.md (文档)")
376
+ print(f" - replay.mp4 (演示视频)")
377
+ if os.path.exists("/home/eason/Workspace/RL/Unit_6/logs/evaluations.npz"):
378
+ print(f" - training_evaluations.npz (训练日志)")
379
+
380
+ print(f"\n🎯 关键信息:")
381
+ print(f" - 使用了 {'最佳' if model_source == 'best_model' else '最终'} 模型")
382
+ print(f" - 评估分数: {score:.2f}")
383
+ print(f" - 状态: {'✅ 通过' if score >= -3.5 else '❌ 未通过'}")
384
+ print("="*60)