upload via upload_folder 2025-08-05T09:49:11.989523+00:00
Browse files- README.md +19 -6
- eval_result.json +2 -2
- full_model.pt +3 -0
- params.json +2 -1
- replay.mp4 +2 -2
- state_dict.pt +3 -0
- tensorboard/events.out.tfevents.1753436180.winkindeMacBook-Air.local.66308.0 +3 -0
README.md
CHANGED
|
@@ -5,9 +5,9 @@ tags:
|
|
| 5 |
- td3
|
| 6 |
- reinforcement-learning
|
| 7 |
- custom-implementation
|
| 8 |
-
-
|
| 9 |
-
-
|
| 10 |
-
-
|
| 11 |
model-index:
|
| 12 |
- name: TD3-Walker2dV5
|
| 13 |
results:
|
|
@@ -19,7 +19,7 @@ model-index:
|
|
| 19 |
type: Walker2d-v5
|
| 20 |
metrics:
|
| 21 |
- type: mean_reward
|
| 22 |
-
value: 4348.
|
| 23 |
name: mean_reward
|
| 24 |
verified: false
|
| 25 |
---
|
|
@@ -28,9 +28,22 @@ model-index:
|
|
| 28 |
This is a trained model of a **TD3** agent playing **Walker2d-v5**.
|
| 29 |
|
| 30 |
## Usage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
|
|
|
| 35 |
env = gym.make("Walker2d-v5")
|
|
|
|
|
|
|
| 36 |
...
|
|
|
|
|
|
|
|
|
| 5 |
- td3
|
| 6 |
- reinforcement-learning
|
| 7 |
- custom-implementation
|
| 8 |
+
- policy-gradient
|
| 9 |
+
- pytorch
|
| 10 |
+
- ddpg
|
| 11 |
model-index:
|
| 12 |
- name: TD3-Walker2dV5
|
| 13 |
results:
|
|
|
|
| 19 |
type: Walker2d-v5
|
| 20 |
metrics:
|
| 21 |
- type: mean_reward
|
| 22 |
+
value: 4348.91 +/- 73.32
|
| 23 |
name: mean_reward
|
| 24 |
verified: false
|
| 25 |
---
|
|
|
|
| 28 |
This is a trained model of a **TD3** agent playing **Walker2d-v5**.
|
| 29 |
|
| 30 |
## Usage
|
| 31 |
+
### create the conda env in https://github.com/GeneHit/drl_practice
|
| 32 |
+
```bash
|
| 33 |
+
conda create -n drl python=3.10
|
| 34 |
+
conda activate drl
|
| 35 |
+
python -m pip install -r requirements.txt
|
| 36 |
+
```
|
| 37 |
|
| 38 |
+
### play with full model
|
| 39 |
+
```python
|
| 40 |
+
# load the full model
|
| 41 |
+
model = load_from_hub(repo_id="winkin119/TD3-Walker2dV5", filename="full_model.pt")
|
| 42 |
|
| 43 |
+
# Create the environment.
|
| 44 |
env = gym.make("Walker2d-v5")
|
| 45 |
+
state, _ = env.reset()
|
| 46 |
+
action = model.action(state)
|
| 47 |
...
|
| 48 |
+
```
|
| 49 |
+
There is also a state dict version of the model, you can check the corresponding definition in the repo.
|
eval_result.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
-
"mean_reward": 4348.
|
| 3 |
-
"std_reward": 73.
|
| 4 |
"datetime": "2025-07-25 20:06:04",
|
| 5 |
"train_duration_min": "148.37"
|
| 6 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"mean_reward": 4348.906197770564,
|
| 3 |
+
"std_reward": 73.3169869523695,
|
| 4 |
"datetime": "2025-07-25 20:06:04",
|
| 5 |
"train_duration_min": "148.37"
|
| 6 |
}
|
full_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59e8b5b2f3226b3711abde1c3f1a525575f43414a7ab2639d745b68a6f325864
|
| 3 |
+
size 522233
|
params.json
CHANGED
|
@@ -39,5 +39,6 @@
|
|
| 39 |
},
|
| 40 |
"max_action": 1.0,
|
| 41 |
"tau": 0.05,
|
| 42 |
-
"max_grad_norm": 0.5
|
|
|
|
| 43 |
}
|
|
|
|
| 39 |
},
|
| 40 |
"max_action": 1.0,
|
| 41 |
"tau": 0.05,
|
| 42 |
+
"max_grad_norm": 0.5,
|
| 43 |
+
"smooth_l1_loss_beta": null
|
| 44 |
}
|
replay.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7078510567605d99b2179aeb8340b514dc3df0f6bb2620a3289b5488daeac3f
|
| 3 |
+
size 931191
|
state_dict.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59118fa73c1658960e1406c927dfd1de9a200e6a4c34995623dfb0359688338a
|
| 3 |
+
size 520377
|
tensorboard/events.out.tfevents.1753436180.winkindeMacBook-Air.local.66308.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8071dea26196af78543fc8aeee2338bfd189961194c2e99f4565c4d2770e0b7c
|
| 3 |
+
size 159485697
|