Upload folder using huggingface_hub

Files changed (5) hide show

README.md CHANGED Viewed

@@ -16,12 +16,21 @@ model-index:
       type: Pixelcopter-PLE-v0
     metrics:
     - type: mean_reward
-      value: 29.82 +/- 25.01
       name: mean_reward
       verified: false
 ---
-# Reinforce Agent playing Pixelcopter-PLE-v0
-This is a trained **REINFORCE** (policy-gradient) agent playing **Pixelcopter-PLE-v0**.
-Trained following Unit 4 of the Hugging Face Deep Reinforcement Learning Course.

       type: Pixelcopter-PLE-v0
     metrics:
     - type: mean_reward
+      value: 20.045
       name: mean_reward
       verified: false
 ---
+# REINFORCE Agent playing Pixelcopter-PLE-v0
+This repository contains a **REINFORCE** agent trained for **Pixelcopter-PLE-v0** (Hugging Face Deep RL Course, Unit 4).
+## Evaluation
+- Episodes: 200
+- Max steps/episode: 500
+- Mean reward: 20.05
+- Std reward: 11.38
+Artifacts:
+- `model_state_dict.pt` (PyTorch state_dict)
+- `results.json` (machine-readable evaluation)
+- `replay.mp4` (sample rollout)

hyperparameters.json CHANGED Viewed

@@ -1,11 +1,7 @@
 {
   "h_size": 64,
-  "n_training_episodes": 20000,
   "n_evaluation_episodes": 200,
   "max_t": 500,
-  "gamma": 0.925,
-  "lr": 0.0001,
-  "env_id": "Pixelcopter-PLE-v0",
-  "state_space": 7,
-  "action_space": 2
 }

 {
+  "env_id": "Pixelcopter-PLE-v0",
   "h_size": 64,
   "n_evaluation_episodes": 200,
   "max_t": 500,
+  "seed": 42
 }

model_state_dict.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9d55c6d5c45a613f054df9f3885f2103c5aa8db3e67691202b0105546c40921
-size 39489

 version https://git-lfs.github.com/spec/v1
+oid sha256:67e4a1f97733e5cd0037ea7c4d8af7bce8dae3a950860b34214d2be9389bbb76
+size 39773

replay.mp4 CHANGED Viewed

Binary files a/replay.mp4 and b/replay.mp4 differ

results.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
   "env_id": "Pixelcopter-PLE-v0",
-  "mean_reward": 29.82,
-  "std_reward": 25.010749688883777,
   "n_evaluation_episodes": 200,
-  "eval_datetime": "2026-01-11T15:25:28.014496"
 }

 {
   "env_id": "Pixelcopter-PLE-v0",
+  "mean_reward": 20.045,
+  "std_reward": 11.376861386164464,
   "n_evaluation_episodes": 200,
+  "max_t": 500,
+  "eval_datetime": "2026-01-11T15:36:19.358499"
 }