winkin119 commited on
Commit
94c0e3c
·
verified ·
1 Parent(s): 062c070

upload via upload_folder 2025-07-29T12:35:20.331691+00:00

Browse files
README.md CHANGED
@@ -2,11 +2,13 @@
2
  env_name: CartPole-v1
3
  tags:
4
  - CartPole-v1
5
- - reinforce
6
  - reinforcement-learning
7
  - custom-implementation
8
  - policy-gradient
9
  - pytorch
 
 
10
  model-index:
11
  - name: Reinforce-CartPole
12
  results:
@@ -18,7 +20,7 @@ model-index:
18
  type: CartPole-v1
19
  metrics:
20
  - type: mean_reward
21
- value: 439.30 +/- 74.67
22
  name: mean_reward
23
  verified: false
24
  ---
 
2
  env_name: CartPole-v1
3
  tags:
4
  - CartPole-v1
5
+ - vanilla-reinforce
6
  - reinforcement-learning
7
  - custom-implementation
8
  - policy-gradient
9
  - pytorch
10
+ - vanilla
11
+ - monte-carlo
12
  model-index:
13
  - name: Reinforce-CartPole
14
  results:
 
20
  type: CartPole-v1
21
  metrics:
22
  - type: mean_reward
23
+ value: 500.00 +/- 0.00
24
  name: mean_reward
25
  verified: false
26
  ---
eval_result.json CHANGED
@@ -1 +1,6 @@
1
- {"mean_reward": 439.3, "std_reward": 74.66933774984214, "date": "2025-07-11 21:09:00"}
 
 
 
 
 
 
1
+ {
2
+ "mean_reward": 500.0,
3
+ "std_reward": 0.0,
4
+ "datetime": "2025-07-29T12:27:37.503071+00:00",
5
+ "train_duration_min": "1.06"
6
+ }
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e077ce25f157f2b41dfa7b3f683aec0c1109c905d0ebf9ceef11d3df2b36fe
3
+ size 74365
params.json CHANGED
@@ -1 +1,37 @@
1
- {"hyper_params": {"global_episode": 1000, "lr": 0.0001, "gamma": 0.99, "grad_acc": 1, "num_envs": 2, "use_multi_processing": true}, "env_params": {"env_id": "CartPole-v1", "max_steps": 1000, "observation_space.shape": [4], "action_space": 2}, "eval_params": {"eval_episodes": 20, "eval_seed": [1, 55, 44, 23, 12, 34, 86, 78, 90, 100, 166, 127, 134, 145, 151, 115, 178, 189, 190, 200]}, "output_params": {"output_dir": "results/exercise3_reinforce/cartpole/", "save_result": true, "model_filename": "reinforce.pth", "params_filename": "params.json", "train_result_filename": "train_result.json", "eval_result_filename": "eval_result.json"}, "hub_params": {"repo_id": "Reinforce-CartPole"}, "_notes": ["checkpoint_pathname: results/exercise3_reinforce/cartpole/reinforce.pth"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_config": {
3
+ "env_id": "CartPole-v1",
4
+ "env_kwargs": {},
5
+ "max_steps": null,
6
+ "normalize_obs": false,
7
+ "use_image": false,
8
+ "vector_env_num": null,
9
+ "use_multi_processing": false,
10
+ "image_shape": null,
11
+ "frame_stack": 1,
12
+ "frame_skip": 1,
13
+ "training_render_mode": null
14
+ },
15
+ "device": "cpu",
16
+ "learning_rate": 0.0001,
17
+ "gamma": 0.99,
18
+ "checkpoint_pathname": "",
19
+ "max_grad_norm": null,
20
+ "log_interval": 100,
21
+ "eval_episodes": 20,
22
+ "eval_random_seed": 42,
23
+ "eval_video_num": 10,
24
+ "episode": 2000,
25
+ "entropy_coef": {
26
+ "_type": "LinearSchedule",
27
+ "_module": "practice.utils_for_coding.scheduler_utils",
28
+ "_start_e": 0.1,
29
+ "_end_e": 0.01,
30
+ "_duration": 2000,
31
+ "_start_t": 0
32
+ },
33
+ "hidden_sizes": [
34
+ 128,
35
+ 128
36
+ ]
37
+ }
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
tensorboard/events.out.tfevents.1753791985.winkindeMacBook-Air.local.57654.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:141f4b06a8edd5ac46572ca3ad8943f3114d52c57e97da7ffeaf5d93755b63a1
3
+ size 717178