winkin119 commited on
Commit
ce10d84
·
verified ·
1 Parent(s): 0d2e14f
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ env_name: Pendulum-v1
3
+ tags:
4
+ - Pendulum-v1
5
+ - td3
6
+ - reinforcement-learning
7
+ - custom-implementation
8
+ - TD3
9
+ - DDPG
10
+ - Pendulum
11
+ model-index:
12
+ - name: TD3-PendulumV1
13
+ results:
14
+ - task:
15
+ type: reinforcement-learning
16
+ name: reinforcement-learning
17
+ dataset:
18
+ name: Pendulum-v1
19
+ type: Pendulum-v1
20
+ metrics:
21
+ - type: mean_reward
22
+ value: -127.79 +/- 65.35
23
+ name: mean_reward
24
+ verified: false
25
+ ---
26
+
27
+ # **TD3** Agent playing **Pendulum-v1**
28
+ This is a trained model of a **TD3** agent playing **Pendulum-v1**.
29
+
30
+ ## Usage
31
+
32
+ model = load_from_hub(repo_id="winkin119/TD3-PendulumV1", filename="td3_pendulum.pth")
33
+
34
+
35
+ env = gym.make("Pendulum-v1")
36
+ ...
eval_result.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"mean_reward": -127.78696345508857, "std_reward": 65.34958155884144, "datetime": "2025-07-24 23:20:12", "train_duration_min": "2.48"}
params.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env_config": {
3
+ "env_id": "Pendulum-v1",
4
+ "env_kwargs": {},
5
+ "max_steps": null,
6
+ "use_image": false,
7
+ "vector_env_num": 6,
8
+ "use_multi_processing": true,
9
+ "image_shape": null,
10
+ "frame_stack": 1,
11
+ "frame_skip": 1,
12
+ "training_render_mode": null
13
+ },
14
+ "device": "cpu",
15
+ "learning_rate": 0.0003,
16
+ "gamma": 0.99,
17
+ "checkpoint_pathname": "",
18
+ "eval_episodes": 50,
19
+ "eval_random_seed": 42,
20
+ "eval_video_num": 10,
21
+ "total_steps": 240000,
22
+ "hidden_sizes": [
23
+ 256,
24
+ 256
25
+ ],
26
+ "critic_lr": 0.0003,
27
+ "replay_buffer_capacity": 240000,
28
+ "batch_size": 128,
29
+ "update_start_step": 20000,
30
+ "policy_delay": 3,
31
+ "policy_noise": 0.2,
32
+ "noise_clip": 0.5,
33
+ "exploration_noise": {
34
+ "_type": "LinearSchedule",
35
+ "_module": "practice.utils_for_coding.scheduler_utils",
36
+ "start_e": 0.3,
37
+ "end_e": 0.0,
38
+ "duration": 10000
39
+ },
40
+ "max_action": 2.0,
41
+ "tau": 0.005,
42
+ "max_grad_norm": 0.5,
43
+ "env_wrappers": [
44
+ "PendulumEnv",
45
+ "PassiveEnvChecker",
46
+ "OrderEnforcing",
47
+ "TimeLimit",
48
+ "CastObsFloat32Wrapper",
49
+ "RecordEpisodeStatistics",
50
+ "Autoreset"
51
+ ]
52
+ }
replay.mp4 ADDED
Binary file (24.9 kB). View file
 
td3_pendulum.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41931e48f2a454d63856f5fa02f997d62ed44406a5527ce8b2a1c629e5ea6b28
3
+ size 273425
tensorboard/events.out.tfevents.1753370258.winkindeMacBook-Air.local.50207.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d3019db394dc3c651f88dff4c3b8b01792c9b88cf79ff1a98087c724f5e742
3
+ size 14868184
video/rl-video-episode-0.mp4 ADDED
Binary file (27.3 kB). View file
 
video/rl-video-episode-10.mp4 ADDED
Binary file (27.2 kB). View file
 
video/rl-video-episode-15.mp4 ADDED
Binary file (24.7 kB). View file
 
video/rl-video-episode-20.mp4 ADDED
Binary file (33.5 kB). View file
 
video/rl-video-episode-25.mp4 ADDED
Binary file (13.3 kB). View file
 
video/rl-video-episode-30.mp4 ADDED
Binary file (23.4 kB). View file
 
video/rl-video-episode-35.mp4 ADDED
Binary file (24.9 kB). View file
 
video/rl-video-episode-40.mp4 ADDED
Binary file (25.2 kB). View file
 
video/rl-video-episode-45.mp4 ADDED
Binary file (24.9 kB). View file
 
video/rl-video-episode-5.mp4 ADDED
Binary file (24.1 kB). View file