ssain0771 commited on
Commit
3dd7913
·
verified ·
1 Parent(s): 254f275

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. hyperparameters.json +9 -15
  3. model.pt +2 -2
  4. replay.mp4 +0 -0
  5. results.json +5 -5
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: Pixelcopter-PLE-v0
17
  metrics:
18
  - type: mean_reward
19
- value: 13.82 +/- 13.88
20
  name: mean_reward
21
  verified: false
22
  ---
 
16
  type: Pixelcopter-PLE-v0
17
  metrics:
18
  - type: mean_reward
19
+ value: 27.28 +/- 26.56
20
  name: mean_reward
21
  verified: false
22
  ---
hyperparameters.json CHANGED
@@ -1,23 +1,17 @@
1
  {
2
  "env_id": "Pixelcopter-PLE-v0",
3
- "h_size": 128,
4
- "lr": 0.00015,
 
5
  "gamma": 0.99,
6
- "max_t": 5000,
7
- "n_training_episodes": 20000,
8
- "print_every": 1000,
9
- "use_baseline": true,
10
- "obs_scale": 55.0,
11
- "clip_obs": 10.0,
12
- "use_gap_shaping": true,
13
- "gap_shaping_k": 0.1,
14
- "use_entropy_bonus": true,
15
- "entropy_coef": 0.01,
16
  "use_ema_baseline": true,
17
  "baseline_beta": 0.98,
18
  "normalize_advantages": true,
19
- "use_wall_penalty": true,
20
- "wall_margin": 12.0,
21
- "wall_penalty": 0.05,
22
  "n_evaluation_episodes": 50
23
  }
 
1
  {
2
  "env_id": "Pixelcopter-PLE-v0",
3
+ "h_size": 64,
4
+ "n_training_episodes": 40000,
5
+ "max_t": 10000,
6
  "gamma": 0.99,
7
+ "lr": 0.0001,
8
+ "print_every": 500,
9
+ "obs_scale": 1.0,
10
+ "clip_obs": null,
 
 
 
 
 
 
11
  "use_ema_baseline": true,
12
  "baseline_beta": 0.98,
13
  "normalize_advantages": true,
14
+ "use_entropy_bonus": true,
15
+ "entropy_coef": 0.005,
 
16
  "n_evaluation_episodes": 50
17
  }
model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db9b6e70cefb2108d34408c4a07d1b5e97ec0a05eb7b08f0013459d4822e9ea
3
- size 8265
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f63a4a5c2930d72570718093273d345556cb99dd09743db6e417496d798eaf
3
+ size 5705
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "env_id": "Pixelcopter-PLE-v0",
3
- "mean_reward": 13.82,
4
- "std_reward": 13.877593451315684,
5
- "mean_episode_length": 118.9,
6
- "std_episode_length": 78.96005319147145,
7
  "n_evaluation_episodes": 50,
8
- "eval_datetime": "2026-02-17T03:00:41.303951"
9
  }
 
1
  {
2
  "env_id": "Pixelcopter-PLE-v0",
3
+ "mean_reward": 27.28,
4
+ "std_reward": 26.560903599087137,
5
+ "mean_episode_length": 194.28,
6
+ "std_episode_length": 151.0614497481075,
7
  "n_evaluation_episodes": 50,
8
+ "eval_datetime": "2026-02-17T12:21:52.963761"
9
  }