ssain0771 commited on
Commit
254f275
·
verified ·
1 Parent(s): 498775d

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. hyperparameters.json +17 -8
  3. model.pt +2 -2
  4. replay.mp4 +0 -0
  5. results.json +6 -4
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: Pixelcopter-PLE-v0
17
  metrics:
18
  - type: mean_reward
19
- value: 15.23 +/- 14.93
20
  name: mean_reward
21
  verified: false
22
  ---
 
16
  type: Pixelcopter-PLE-v0
17
  metrics:
18
  - type: mean_reward
19
+ value: 13.82 +/- 13.88
20
  name: mean_reward
21
  verified: false
22
  ---
hyperparameters.json CHANGED
@@ -1,14 +1,23 @@
1
  {
2
- "h_size": 30,
3
- "n_training_episodes": 10000,
4
- "n_evaluation_episodes": 30,
5
- "max_t": 10000,
6
  "gamma": 0.99,
7
- "lr": 0.001,
 
 
 
 
 
 
 
 
 
8
  "use_ema_baseline": true,
9
  "baseline_beta": 0.98,
10
  "normalize_advantages": true,
11
- "env_id": "Pixelcopter-PLE-v0",
12
- "state_space": 7,
13
- "action_space": 2
 
14
  }
 
1
  {
2
+ "env_id": "Pixelcopter-PLE-v0",
3
+ "h_size": 128,
4
+ "lr": 0.00015,
 
5
  "gamma": 0.99,
6
+ "max_t": 5000,
7
+ "n_training_episodes": 20000,
8
+ "print_every": 1000,
9
+ "use_baseline": true,
10
+ "obs_scale": 55.0,
11
+ "clip_obs": 10.0,
12
+ "use_gap_shaping": true,
13
+ "gap_shaping_k": 0.1,
14
+ "use_entropy_bonus": true,
15
+ "entropy_coef": 0.01,
16
  "use_ema_baseline": true,
17
  "baseline_beta": 0.98,
18
  "normalize_advantages": true,
19
+ "use_wall_penalty": true,
20
+ "wall_margin": 12.0,
21
+ "wall_penalty": 0.05,
22
+ "n_evaluation_episodes": 50
23
  }
model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c327e107ce938a7a561c50cd8199cc9d80976ca8977b5e256a9c0e73c38a3d46
3
- size 12861
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db9b6e70cefb2108d34408c4a07d1b5e97ec0a05eb7b08f0013459d4822e9ea
3
+ size 8265
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1,7 +1,9 @@
1
  {
2
  "env_id": "Pixelcopter-PLE-v0",
3
- "mean_reward": 15.233333333333333,
4
- "std_reward": 14.932477654056237,
5
- "n_evaluation_episodes": 30,
6
- "eval_datetime": "2026-02-17T02:42:33.068182"
 
 
7
  }
 
1
  {
2
  "env_id": "Pixelcopter-PLE-v0",
3
+ "mean_reward": 13.82,
4
+ "std_reward": 13.877593451315684,
5
+ "mean_episode_length": 118.9,
6
+ "std_episode_length": 78.96005319147145,
7
+ "n_evaluation_episodes": 50,
8
+ "eval_datetime": "2026-02-17T03:00:41.303951"
9
  }