johnjim0816
commited on
Commit
·
c93cde9
1
Parent(s):
7e0d2ec
update Cartpole-v1 DuelingDQN
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml +0 -41
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt +0 -14
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml +0 -47
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt +0 -53
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml +0 -47
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt +0 -53
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv +0 -11
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml +0 -41
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt +0 -119
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv +0 -101
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt +0 -43
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt +0 -3
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv +0 -202
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt +0 -43
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt +0 -3
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv +0 -401
- CartPole-v1/{Train_CartPole-v1_DuelingDQN_ray_20230407-153236 → Train_ray_CartPole-v1_DuelingDQN_20230517-224129}/config.yaml +19 -22
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt +169 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best +0 -0
- CartPole-v1/{Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0} +2 -2
- CartPole-v1/{Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0} +2 -2
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/config.yaml
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
general_cfg:
|
| 2 |
-
algo_name: DuelingDQN
|
| 3 |
-
device: cuda
|
| 4 |
-
env_name: CartPole-v1
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
-
load_checkpoint: true
|
| 8 |
-
load_path: Train_CartPole-v1_DuelingDQN_20221122-125403
|
| 9 |
-
max_steps: 200
|
| 10 |
-
mode: test
|
| 11 |
-
save_fig: true
|
| 12 |
-
seed: 1
|
| 13 |
-
show_fig: false
|
| 14 |
-
test_eps: 10
|
| 15 |
-
train_eps: 100
|
| 16 |
-
algo_cfg:
|
| 17 |
-
batch_size: 64
|
| 18 |
-
buffer_size: 100000
|
| 19 |
-
epsilon_decay: 500
|
| 20 |
-
epsilon_end: 0.01
|
| 21 |
-
epsilon_start: 0.95
|
| 22 |
-
gamma: 0.99
|
| 23 |
-
hidden_dim: 256
|
| 24 |
-
lr: 0.0001
|
| 25 |
-
target_update: 4
|
| 26 |
-
value_layers:
|
| 27 |
-
- activation: relu
|
| 28 |
-
layer_dim:
|
| 29 |
-
- n_states
|
| 30 |
-
- 256
|
| 31 |
-
layer_type: linear
|
| 32 |
-
- activation: relu
|
| 33 |
-
layer_dim:
|
| 34 |
-
- 256
|
| 35 |
-
- 256
|
| 36 |
-
layer_type: linear
|
| 37 |
-
- activation: none
|
| 38 |
-
layer_dim:
|
| 39 |
-
- 256
|
| 40 |
-
- n_actions
|
| 41 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/logs/log.txt
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
2022-11-22 12:54:55 - r - INFO: - n_states: 4, n_actions: 2
|
| 2 |
-
2022-11-22 12:54:58 - r - INFO: - Start testing!
|
| 3 |
-
2022-11-22 12:54:58 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda
|
| 4 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
| 5 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
| 6 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
| 7 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
| 8 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
| 9 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
| 10 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
| 11 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
| 12 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
| 13 |
-
2022-11-22 12:54:58 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
| 14 |
-
2022-11-22 12:54:58 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/learning_curve.png
DELETED
|
Binary file (27.6 kB)
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_20221122-125455/results/res.csv
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,200.0,200
|
| 3 |
-
1,200.0,200
|
| 4 |
-
2,200.0,200
|
| 5 |
-
3,200.0,200
|
| 6 |
-
4,200.0,200
|
| 7 |
-
5,200.0,200
|
| 8 |
-
6,200.0,200
|
| 9 |
-
7,200.0,200
|
| 10 |
-
8,200.0,200
|
| 11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/config.yaml
DELETED
|
@@ -1,47 +0,0 @@
|
|
| 1 |
-
general_cfg:
|
| 2 |
-
algo_name: DuelingDQN
|
| 3 |
-
device: cpu
|
| 4 |
-
env_name: CartPole-v1
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
-
load_checkpoint: true
|
| 8 |
-
load_path: Train_CartPole-v1_DuelingDQN_mp_20230407-170853
|
| 9 |
-
max_steps: 200
|
| 10 |
-
mode: test
|
| 11 |
-
mp_backend: mp
|
| 12 |
-
n_workers: 1
|
| 13 |
-
new_step_api: true
|
| 14 |
-
render: false
|
| 15 |
-
render_mode: human
|
| 16 |
-
save_fig: true
|
| 17 |
-
seed: 1
|
| 18 |
-
show_fig: false
|
| 19 |
-
test_eps: 10
|
| 20 |
-
train_eps: 200
|
| 21 |
-
wrapper: null
|
| 22 |
-
algo_cfg:
|
| 23 |
-
batch_size: 64
|
| 24 |
-
buffer_size: 100000
|
| 25 |
-
epsilon_decay: 500
|
| 26 |
-
epsilon_end: 0.01
|
| 27 |
-
epsilon_start: 0.95
|
| 28 |
-
gamma: 0.99
|
| 29 |
-
hidden_dim: 256
|
| 30 |
-
lr: 0.0001
|
| 31 |
-
target_update: 4
|
| 32 |
-
value_layers:
|
| 33 |
-
- activation: relu
|
| 34 |
-
layer_dim:
|
| 35 |
-
- n_states
|
| 36 |
-
- 256
|
| 37 |
-
layer_type: linear
|
| 38 |
-
- activation: relu
|
| 39 |
-
layer_dim:
|
| 40 |
-
- 256
|
| 41 |
-
- 256
|
| 42 |
-
layer_type: linear
|
| 43 |
-
- activation: none
|
| 44 |
-
layer_dim:
|
| 45 |
-
- 256
|
| 46 |
-
- n_actions
|
| 47 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/logs/log.txt
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
2023-04-07 17:11:20 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-07 17:11:20 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-07 17:11:20 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-07 17:11:20 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-07 17:11:20 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-07 17:11:20 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-07 17:11:20 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-07 17:11:20 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-07 17:11:20 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
| 10 |
-
2023-04-07 17:11:20 - r - INFO: - mode test <class 'str'>
|
| 11 |
-
2023-04-07 17:11:20 - r - INFO: - mp_backend mp <class 'str'>
|
| 12 |
-
2023-04-07 17:11:20 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-07 17:11:20 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-07 17:11:20 - r - INFO: - train_eps 200 <class 'int'>
|
| 15 |
-
2023-04-07 17:11:20 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-07 17:11:20 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-07 17:11:20 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-07 17:11:20 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-07 17:11:20 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
| 20 |
-
2023-04-07 17:11:20 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_mp_20230407-170853 <class 'str'>
|
| 21 |
-
2023-04-07 17:11:20 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-07 17:11:20 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-07 17:11:20 - r - INFO: - n_workers 1 <class 'int'>
|
| 24 |
-
2023-04-07 17:11:20 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-07 17:11:20 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-07 17:11:20 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-07 17:11:20 - r - INFO: - gamma 0.99 <class 'float'>
|
| 28 |
-
2023-04-07 17:11:20 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-07 17:11:20 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-07 17:11:20 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-07 17:11:20 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-07 17:11:20 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-07 17:11:20 - r - INFO: - hidden_dim 256 <class 'int'>
|
| 34 |
-
2023-04-07 17:11:20 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120 <class 'str'>
|
| 35 |
-
2023-04-07 17:11:20 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/results <class 'str'>
|
| 36 |
-
2023-04-07 17:11:20 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/logs <class 'str'>
|
| 37 |
-
2023-04-07 17:11:20 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/traj <class 'str'>
|
| 38 |
-
2023-04-07 17:11:20 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-171120/videos <class 'str'>
|
| 39 |
-
2023-04-07 17:11:20 - r - INFO: - ================================================================================
|
| 40 |
-
2023-04-07 17:11:20 - r - INFO: - n_states: 4, n_actions: 2
|
| 41 |
-
2023-04-07 17:11:20 - r - INFO: - Start testing!
|
| 42 |
-
2023-04-07 17:11:20 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
| 43 |
-
2023-04-07 17:11:20 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
| 44 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
| 45 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 3/10, Reward: 190.000, Step: 190
|
| 46 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
| 47 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 5/10, Reward: 187.000, Step: 187
|
| 48 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 6/10, Reward: 182.000, Step: 182
|
| 49 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
| 50 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
| 51 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 9/10, Reward: 196.000, Step: 196
|
| 52 |
-
2023-04-07 17:11:21 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
| 53 |
-
2023-04-07 17:11:21 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/learning_curve.png
DELETED
|
Binary file (45.2 kB)
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_mp_20230407-171120/results/res.csv
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,200.0,200
|
| 3 |
-
1,200.0,200
|
| 4 |
-
2,190.0,190
|
| 5 |
-
3,200.0,200
|
| 6 |
-
4,187.0,187
|
| 7 |
-
5,182.0,182
|
| 8 |
-
6,200.0,200
|
| 9 |
-
7,200.0,200
|
| 10 |
-
8,196.0,196
|
| 11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/config.yaml
DELETED
|
@@ -1,47 +0,0 @@
|
|
| 1 |
-
general_cfg:
|
| 2 |
-
algo_name: DuelingDQN
|
| 3 |
-
device: cpu
|
| 4 |
-
env_name: CartPole-v1
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
-
load_checkpoint: true
|
| 8 |
-
load_path: Train_CartPole-v1_DuelingDQN_20230407-153236
|
| 9 |
-
max_steps: 200
|
| 10 |
-
mode: test
|
| 11 |
-
mp_backend: ray
|
| 12 |
-
n_workers: 1
|
| 13 |
-
new_step_api: true
|
| 14 |
-
render: false
|
| 15 |
-
render_mode: human
|
| 16 |
-
save_fig: true
|
| 17 |
-
seed: 1
|
| 18 |
-
show_fig: false
|
| 19 |
-
test_eps: 10
|
| 20 |
-
train_eps: 400
|
| 21 |
-
wrapper: null
|
| 22 |
-
algo_cfg:
|
| 23 |
-
batch_size: 64
|
| 24 |
-
buffer_size: 100000
|
| 25 |
-
epsilon_decay: 500
|
| 26 |
-
epsilon_end: 0.01
|
| 27 |
-
epsilon_start: 0.95
|
| 28 |
-
gamma: 0.99
|
| 29 |
-
hidden_dim: 256
|
| 30 |
-
lr: 0.0001
|
| 31 |
-
target_update: 4
|
| 32 |
-
value_layers:
|
| 33 |
-
- activation: relu
|
| 34 |
-
layer_dim:
|
| 35 |
-
- n_states
|
| 36 |
-
- 256
|
| 37 |
-
layer_type: linear
|
| 38 |
-
- activation: relu
|
| 39 |
-
layer_dim:
|
| 40 |
-
- 256
|
| 41 |
-
- 256
|
| 42 |
-
layer_type: linear
|
| 43 |
-
- activation: none
|
| 44 |
-
layer_dim:
|
| 45 |
-
- 256
|
| 46 |
-
- n_actions
|
| 47 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/logs/log.txt
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
2023-04-07 16:52:08 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-07 16:52:08 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-07 16:52:08 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-07 16:52:08 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-07 16:52:08 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-07 16:52:08 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-07 16:52:08 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-07 16:52:08 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-07 16:52:08 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
| 10 |
-
2023-04-07 16:52:08 - r - INFO: - mode test <class 'str'>
|
| 11 |
-
2023-04-07 16:52:08 - r - INFO: - mp_backend ray <class 'str'>
|
| 12 |
-
2023-04-07 16:52:08 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-07 16:52:08 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-07 16:52:08 - r - INFO: - train_eps 400 <class 'int'>
|
| 15 |
-
2023-04-07 16:52:08 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-07 16:52:08 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-07 16:52:08 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-07 16:52:08 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-07 16:52:08 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
| 20 |
-
2023-04-07 16:52:08 - r - INFO: - load_path Train_CartPole-v1_DuelingDQN_20230407-153236 <class 'str'>
|
| 21 |
-
2023-04-07 16:52:08 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-07 16:52:08 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-07 16:52:08 - r - INFO: - n_workers 1 <class 'int'>
|
| 24 |
-
2023-04-07 16:52:08 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-07 16:52:08 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-07 16:52:08 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-07 16:52:08 - r - INFO: - gamma 0.99 <class 'float'>
|
| 28 |
-
2023-04-07 16:52:08 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-07 16:52:08 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-07 16:52:08 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-07 16:52:08 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-07 16:52:08 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-07 16:52:08 - r - INFO: - hidden_dim 256 <class 'int'>
|
| 34 |
-
2023-04-07 16:52:08 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208 <class 'str'>
|
| 35 |
-
2023-04-07 16:52:08 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/results <class 'str'>
|
| 36 |
-
2023-04-07 16:52:08 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/logs <class 'str'>
|
| 37 |
-
2023-04-07 16:52:08 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/traj <class 'str'>
|
| 38 |
-
2023-04-07 16:52:08 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DuelingDQN_20230407-165208/videos <class 'str'>
|
| 39 |
-
2023-04-07 16:52:08 - r - INFO: - ================================================================================
|
| 40 |
-
2023-04-07 16:52:08 - r - INFO: - n_states: 4, n_actions: 2
|
| 41 |
-
2023-04-07 16:52:08 - r - INFO: - Start testing!
|
| 42 |
-
2023-04-07 16:52:08 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
| 43 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 1/10, Reward: 171.000, Step: 171
|
| 44 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 2/10, Reward: 185.000, Step: 185
|
| 45 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 3/10, Reward: 159.000, Step: 159
|
| 46 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 4/10, Reward: 155.000, Step: 155
|
| 47 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
| 48 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 6/10, Reward: 120.000, Step: 120
|
| 49 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
| 50 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 8/10, Reward: 187.000, Step: 187
|
| 51 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 9/10, Reward: 154.000, Step: 154
|
| 52 |
-
2023-04-07 16:52:08 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
| 53 |
-
2023-04-07 16:52:08 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/learning_curve.png
DELETED
|
Binary file (45.9 kB)
|
|
|
CartPole-v1/Test_CartPole-v1_DuelingDQN_ray_20230407-165208/results/res.csv
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,171.0,171
|
| 3 |
-
1,185.0,185
|
| 4 |
-
2,159.0,159
|
| 5 |
-
3,155.0,155
|
| 6 |
-
4,200.0,200
|
| 7 |
-
5,120.0,120
|
| 8 |
-
6,200.0,200
|
| 9 |
-
7,187.0,187
|
| 10 |
-
8,154.0,154
|
| 11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/config.yaml
DELETED
|
@@ -1,41 +0,0 @@
|
|
| 1 |
-
general_cfg:
|
| 2 |
-
algo_name: DuelingDQN
|
| 3 |
-
device: cuda
|
| 4 |
-
env_name: CartPole-v1
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
-
load_checkpoint: false
|
| 8 |
-
load_path: Train_CartPole-v1_DQN_20221026-054757
|
| 9 |
-
max_steps: 200
|
| 10 |
-
mode: train
|
| 11 |
-
save_fig: true
|
| 12 |
-
seed: 1
|
| 13 |
-
show_fig: false
|
| 14 |
-
test_eps: 10
|
| 15 |
-
train_eps: 100
|
| 16 |
-
algo_cfg:
|
| 17 |
-
batch_size: 64
|
| 18 |
-
buffer_size: 100000
|
| 19 |
-
epsilon_decay: 500
|
| 20 |
-
epsilon_end: 0.01
|
| 21 |
-
epsilon_start: 0.95
|
| 22 |
-
gamma: 0.99
|
| 23 |
-
hidden_dim: 256
|
| 24 |
-
lr: 0.0001
|
| 25 |
-
target_update: 4
|
| 26 |
-
value_layers:
|
| 27 |
-
- activation: relu
|
| 28 |
-
layer_dim:
|
| 29 |
-
- n_states
|
| 30 |
-
- 256
|
| 31 |
-
layer_type: linear
|
| 32 |
-
- activation: relu
|
| 33 |
-
layer_dim:
|
| 34 |
-
- 256
|
| 35 |
-
- 256
|
| 36 |
-
layer_type: linear
|
| 37 |
-
- activation: none
|
| 38 |
-
layer_dim:
|
| 39 |
-
- 256
|
| 40 |
-
- n_actions
|
| 41 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/logs/log.txt
DELETED
|
@@ -1,119 +0,0 @@
|
|
| 1 |
-
2022-11-22 12:54:03 - r - INFO: - n_states: 4, n_actions: 2
|
| 2 |
-
2022-11-22 12:54:06 - r - INFO: - Start training!
|
| 3 |
-
2022-11-22 12:54:06 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cuda
|
| 4 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18
|
| 5 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35
|
| 6 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13
|
| 7 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32
|
| 8 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16
|
| 9 |
-
2022-11-22 12:54:06 - r - INFO: - Current episode 5 has the best eval reward: 9.100
|
| 10 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9
|
| 11 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12
|
| 12 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16
|
| 13 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14
|
| 14 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12
|
| 15 |
-
2022-11-22 12:54:06 - r - INFO: - Current episode 10 has the best eval reward: 9.200
|
| 16 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13
|
| 17 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14
|
| 18 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19
|
| 19 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9
|
| 20 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15
|
| 21 |
-
2022-11-22 12:54:06 - r - INFO: - Current episode 15 has the best eval reward: 9.300
|
| 22 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12
|
| 23 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11
|
| 24 |
-
2022-11-22 12:54:06 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9
|
| 25 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13
|
| 26 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17
|
| 27 |
-
2022-11-22 12:54:07 - r - INFO: - Current episode 20 has the best eval reward: 9.900
|
| 28 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 21/100, Reward: 14.000, Step: 14
|
| 29 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 22/100, Reward: 20.000, Step: 20
|
| 30 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 23/100, Reward: 11.000, Step: 11
|
| 31 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 24/100, Reward: 24.000, Step: 24
|
| 32 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 25/100, Reward: 11.000, Step: 11
|
| 33 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 26/100, Reward: 11.000, Step: 11
|
| 34 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 27/100, Reward: 11.000, Step: 11
|
| 35 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 28/100, Reward: 13.000, Step: 13
|
| 36 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 29/100, Reward: 11.000, Step: 11
|
| 37 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 30/100, Reward: 8.000, Step: 8
|
| 38 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13
|
| 39 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 32/100, Reward: 9.000, Step: 9
|
| 40 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 33/100, Reward: 34.000, Step: 34
|
| 41 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 34/100, Reward: 10.000, Step: 10
|
| 42 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 35/100, Reward: 10.000, Step: 10
|
| 43 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 36/100, Reward: 10.000, Step: 10
|
| 44 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 37/100, Reward: 34.000, Step: 34
|
| 45 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 38/100, Reward: 35.000, Step: 35
|
| 46 |
-
2022-11-22 12:54:07 - r - INFO: - Episode: 39/100, Reward: 32.000, Step: 32
|
| 47 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 40/100, Reward: 37.000, Step: 37
|
| 48 |
-
2022-11-22 12:54:08 - r - INFO: - Current episode 40 has the best eval reward: 27.500
|
| 49 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 41/100, Reward: 29.000, Step: 29
|
| 50 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 42/100, Reward: 52.000, Step: 52
|
| 51 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 43/100, Reward: 54.000, Step: 54
|
| 52 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 44/100, Reward: 90.000, Step: 90
|
| 53 |
-
2022-11-22 12:54:08 - r - INFO: - Episode: 45/100, Reward: 91.000, Step: 91
|
| 54 |
-
2022-11-22 12:54:09 - r - INFO: - Current episode 45 has the best eval reward: 87.500
|
| 55 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 46/100, Reward: 51.000, Step: 51
|
| 56 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 47/100, Reward: 101.000, Step: 101
|
| 57 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 48/100, Reward: 67.000, Step: 67
|
| 58 |
-
2022-11-22 12:54:09 - r - INFO: - Episode: 49/100, Reward: 103.000, Step: 103
|
| 59 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 50/100, Reward: 45.000, Step: 45
|
| 60 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 51/100, Reward: 137.000, Step: 137
|
| 61 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 52/100, Reward: 47.000, Step: 47
|
| 62 |
-
2022-11-22 12:54:10 - r - INFO: - Episode: 53/100, Reward: 89.000, Step: 89
|
| 63 |
-
2022-11-22 12:54:11 - r - INFO: - Episode: 54/100, Reward: 95.000, Step: 95
|
| 64 |
-
2022-11-22 12:54:11 - r - INFO: - Episode: 55/100, Reward: 55.000, Step: 55
|
| 65 |
-
2022-11-22 12:54:11 - r - INFO: - Episode: 56/100, Reward: 92.000, Step: 92
|
| 66 |
-
2022-11-22 12:54:12 - r - INFO: - Episode: 57/100, Reward: 155.000, Step: 155
|
| 67 |
-
2022-11-22 12:54:12 - r - INFO: - Episode: 58/100, Reward: 125.000, Step: 125
|
| 68 |
-
2022-11-22 12:54:12 - r - INFO: - Episode: 59/100, Reward: 152.000, Step: 152
|
| 69 |
-
2022-11-22 12:54:13 - r - INFO: - Episode: 60/100, Reward: 199.000, Step: 199
|
| 70 |
-
2022-11-22 12:54:13 - r - INFO: - Current episode 60 has the best eval reward: 179.100
|
| 71 |
-
2022-11-22 12:54:14 - r - INFO: - Episode: 61/100, Reward: 88.000, Step: 88
|
| 72 |
-
2022-11-22 12:54:14 - r - INFO: - Episode: 62/100, Reward: 200.000, Step: 200
|
| 73 |
-
2022-11-22 12:54:14 - r - INFO: - Episode: 63/100, Reward: 176.000, Step: 176
|
| 74 |
-
2022-11-22 12:54:15 - r - INFO: - Episode: 64/100, Reward: 200.000, Step: 200
|
| 75 |
-
2022-11-22 12:54:15 - r - INFO: - Episode: 65/100, Reward: 200.000, Step: 200
|
| 76 |
-
2022-11-22 12:54:16 - r - INFO: - Current episode 65 has the best eval reward: 198.700
|
| 77 |
-
2022-11-22 12:54:16 - r - INFO: - Episode: 66/100, Reward: 193.000, Step: 193
|
| 78 |
-
2022-11-22 12:54:17 - r - INFO: - Episode: 67/100, Reward: 200.000, Step: 200
|
| 79 |
-
2022-11-22 12:54:17 - r - INFO: - Episode: 68/100, Reward: 200.000, Step: 200
|
| 80 |
-
2022-11-22 12:54:18 - r - INFO: - Episode: 69/100, Reward: 200.000, Step: 200
|
| 81 |
-
2022-11-22 12:54:18 - r - INFO: - Episode: 70/100, Reward: 200.000, Step: 200
|
| 82 |
-
2022-11-22 12:54:19 - r - INFO: - Current episode 70 has the best eval reward: 200.000
|
| 83 |
-
2022-11-22 12:54:20 - r - INFO: - Episode: 71/100, Reward: 200.000, Step: 200
|
| 84 |
-
2022-11-22 12:54:20 - r - INFO: - Episode: 72/100, Reward: 200.000, Step: 200
|
| 85 |
-
2022-11-22 12:54:20 - r - INFO: - Episode: 73/100, Reward: 200.000, Step: 200
|
| 86 |
-
2022-11-22 12:54:21 - r - INFO: - Episode: 74/100, Reward: 200.000, Step: 200
|
| 87 |
-
2022-11-22 12:54:21 - r - INFO: - Episode: 75/100, Reward: 200.000, Step: 200
|
| 88 |
-
2022-11-22 12:54:22 - r - INFO: - Current episode 75 has the best eval reward: 200.000
|
| 89 |
-
2022-11-22 12:54:23 - r - INFO: - Episode: 76/100, Reward: 200.000, Step: 200
|
| 90 |
-
2022-11-22 12:54:23 - r - INFO: - Episode: 77/100, Reward: 200.000, Step: 200
|
| 91 |
-
2022-11-22 12:54:24 - r - INFO: - Episode: 78/100, Reward: 200.000, Step: 200
|
| 92 |
-
2022-11-22 12:54:24 - r - INFO: - Episode: 79/100, Reward: 200.000, Step: 200
|
| 93 |
-
2022-11-22 12:54:24 - r - INFO: - Episode: 80/100, Reward: 200.000, Step: 200
|
| 94 |
-
2022-11-22 12:54:25 - r - INFO: - Current episode 80 has the best eval reward: 200.000
|
| 95 |
-
2022-11-22 12:54:26 - r - INFO: - Episode: 81/100, Reward: 200.000, Step: 200
|
| 96 |
-
2022-11-22 12:54:26 - r - INFO: - Episode: 82/100, Reward: 200.000, Step: 200
|
| 97 |
-
2022-11-22 12:54:27 - r - INFO: - Episode: 83/100, Reward: 200.000, Step: 200
|
| 98 |
-
2022-11-22 12:54:27 - r - INFO: - Episode: 84/100, Reward: 200.000, Step: 200
|
| 99 |
-
2022-11-22 12:54:27 - r - INFO: - Episode: 85/100, Reward: 200.000, Step: 200
|
| 100 |
-
2022-11-22 12:54:28 - r - INFO: - Current episode 85 has the best eval reward: 200.000
|
| 101 |
-
2022-11-22 12:54:29 - r - INFO: - Episode: 86/100, Reward: 200.000, Step: 200
|
| 102 |
-
2022-11-22 12:54:29 - r - INFO: - Episode: 87/100, Reward: 200.000, Step: 200
|
| 103 |
-
2022-11-22 12:54:30 - r - INFO: - Episode: 88/100, Reward: 200.000, Step: 200
|
| 104 |
-
2022-11-22 12:54:30 - r - INFO: - Episode: 89/100, Reward: 200.000, Step: 200
|
| 105 |
-
2022-11-22 12:54:30 - r - INFO: - Episode: 90/100, Reward: 200.000, Step: 200
|
| 106 |
-
2022-11-22 12:54:31 - r - INFO: - Current episode 90 has the best eval reward: 200.000
|
| 107 |
-
2022-11-22 12:54:32 - r - INFO: - Episode: 91/100, Reward: 200.000, Step: 200
|
| 108 |
-
2022-11-22 12:54:32 - r - INFO: - Episode: 92/100, Reward: 200.000, Step: 200
|
| 109 |
-
2022-11-22 12:54:33 - r - INFO: - Episode: 93/100, Reward: 200.000, Step: 200
|
| 110 |
-
2022-11-22 12:54:33 - r - INFO: - Episode: 94/100, Reward: 200.000, Step: 200
|
| 111 |
-
2022-11-22 12:54:34 - r - INFO: - Episode: 95/100, Reward: 200.000, Step: 200
|
| 112 |
-
2022-11-22 12:54:34 - r - INFO: - Current episode 95 has the best eval reward: 200.000
|
| 113 |
-
2022-11-22 12:54:35 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200
|
| 114 |
-
2022-11-22 12:54:35 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200
|
| 115 |
-
2022-11-22 12:54:36 - r - INFO: - Episode: 98/100, Reward: 200.000, Step: 200
|
| 116 |
-
2022-11-22 12:54:36 - r - INFO: - Episode: 99/100, Reward: 200.000, Step: 200
|
| 117 |
-
2022-11-22 12:54:37 - r - INFO: - Episode: 100/100, Reward: 200.000, Step: 200
|
| 118 |
-
2022-11-22 12:54:37 - r - INFO: - Current episode 100 has the best eval reward: 200.000
|
| 119 |
-
2022-11-22 12:54:37 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/learning_curve.png
DELETED
|
Binary file (47.5 kB)
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_20221122-125403/results/res.csv
DELETED
|
@@ -1,101 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,18.0,18
|
| 3 |
-
1,35.0,35
|
| 4 |
-
2,13.0,13
|
| 5 |
-
3,32.0,32
|
| 6 |
-
4,16.0,16
|
| 7 |
-
5,9.0,9
|
| 8 |
-
6,12.0,12
|
| 9 |
-
7,16.0,16
|
| 10 |
-
8,14.0,14
|
| 11 |
-
9,12.0,12
|
| 12 |
-
10,13.0,13
|
| 13 |
-
11,14.0,14
|
| 14 |
-
12,19.0,19
|
| 15 |
-
13,9.0,9
|
| 16 |
-
14,15.0,15
|
| 17 |
-
15,12.0,12
|
| 18 |
-
16,11.0,11
|
| 19 |
-
17,9.0,9
|
| 20 |
-
18,13.0,13
|
| 21 |
-
19,17.0,17
|
| 22 |
-
20,14.0,14
|
| 23 |
-
21,20.0,20
|
| 24 |
-
22,11.0,11
|
| 25 |
-
23,24.0,24
|
| 26 |
-
24,11.0,11
|
| 27 |
-
25,11.0,11
|
| 28 |
-
26,11.0,11
|
| 29 |
-
27,13.0,13
|
| 30 |
-
28,11.0,11
|
| 31 |
-
29,8.0,8
|
| 32 |
-
30,13.0,13
|
| 33 |
-
31,9.0,9
|
| 34 |
-
32,34.0,34
|
| 35 |
-
33,10.0,10
|
| 36 |
-
34,10.0,10
|
| 37 |
-
35,10.0,10
|
| 38 |
-
36,34.0,34
|
| 39 |
-
37,35.0,35
|
| 40 |
-
38,32.0,32
|
| 41 |
-
39,37.0,37
|
| 42 |
-
40,29.0,29
|
| 43 |
-
41,52.0,52
|
| 44 |
-
42,54.0,54
|
| 45 |
-
43,90.0,90
|
| 46 |
-
44,91.0,91
|
| 47 |
-
45,51.0,51
|
| 48 |
-
46,101.0,101
|
| 49 |
-
47,67.0,67
|
| 50 |
-
48,103.0,103
|
| 51 |
-
49,45.0,45
|
| 52 |
-
50,137.0,137
|
| 53 |
-
51,47.0,47
|
| 54 |
-
52,89.0,89
|
| 55 |
-
53,95.0,95
|
| 56 |
-
54,55.0,55
|
| 57 |
-
55,92.0,92
|
| 58 |
-
56,155.0,155
|
| 59 |
-
57,125.0,125
|
| 60 |
-
58,152.0,152
|
| 61 |
-
59,199.0,199
|
| 62 |
-
60,88.0,88
|
| 63 |
-
61,200.0,200
|
| 64 |
-
62,176.0,176
|
| 65 |
-
63,200.0,200
|
| 66 |
-
64,200.0,200
|
| 67 |
-
65,193.0,193
|
| 68 |
-
66,200.0,200
|
| 69 |
-
67,200.0,200
|
| 70 |
-
68,200.0,200
|
| 71 |
-
69,200.0,200
|
| 72 |
-
70,200.0,200
|
| 73 |
-
71,200.0,200
|
| 74 |
-
72,200.0,200
|
| 75 |
-
73,200.0,200
|
| 76 |
-
74,200.0,200
|
| 77 |
-
75,200.0,200
|
| 78 |
-
76,200.0,200
|
| 79 |
-
77,200.0,200
|
| 80 |
-
78,200.0,200
|
| 81 |
-
79,200.0,200
|
| 82 |
-
80,200.0,200
|
| 83 |
-
81,200.0,200
|
| 84 |
-
82,200.0,200
|
| 85 |
-
83,200.0,200
|
| 86 |
-
84,200.0,200
|
| 87 |
-
85,200.0,200
|
| 88 |
-
86,200.0,200
|
| 89 |
-
87,200.0,200
|
| 90 |
-
88,200.0,200
|
| 91 |
-
89,200.0,200
|
| 92 |
-
90,200.0,200
|
| 93 |
-
91,200.0,200
|
| 94 |
-
92,200.0,200
|
| 95 |
-
93,200.0,200
|
| 96 |
-
94,200.0,200
|
| 97 |
-
95,200.0,200
|
| 98 |
-
96,200.0,200
|
| 99 |
-
97,200.0,200
|
| 100 |
-
98,200.0,200
|
| 101 |
-
99,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/logs/log.txt
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
2023-04-07 17:08:53 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-07 17:08:53 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-07 17:08:53 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-07 17:08:53 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-07 17:08:53 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-07 17:08:53 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-07 17:08:53 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-07 17:08:53 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-07 17:08:53 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
| 10 |
-
2023-04-07 17:08:53 - r - INFO: - mode train <class 'str'>
|
| 11 |
-
2023-04-07 17:08:53 - r - INFO: - mp_backend mp <class 'str'>
|
| 12 |
-
2023-04-07 17:08:53 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-07 17:08:53 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-07 17:08:53 - r - INFO: - train_eps 200 <class 'int'>
|
| 15 |
-
2023-04-07 17:08:53 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-07 17:08:53 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-07 17:08:53 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-07 17:08:53 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-07 17:08:53 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
| 20 |
-
2023-04-07 17:08:53 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
| 21 |
-
2023-04-07 17:08:53 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-07 17:08:53 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-07 17:08:53 - r - INFO: - n_workers 4 <class 'int'>
|
| 24 |
-
2023-04-07 17:08:53 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-07 17:08:53 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-07 17:08:53 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-07 17:08:53 - r - INFO: - gamma 0.99 <class 'float'>
|
| 28 |
-
2023-04-07 17:08:53 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-07 17:08:53 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-07 17:08:53 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-07 17:08:53 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-07 17:08:53 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-07 17:08:53 - r - INFO: - hidden_dim 256 <class 'int'>
|
| 34 |
-
2023-04-07 17:08:53 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853 <class 'str'>
|
| 35 |
-
2023-04-07 17:08:53 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/results <class 'str'>
|
| 36 |
-
2023-04-07 17:08:53 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/logs <class 'str'>
|
| 37 |
-
2023-04-07 17:08:53 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/traj <class 'str'>
|
| 38 |
-
2023-04-07 17:08:53 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-170853/videos <class 'str'>
|
| 39 |
-
2023-04-07 17:08:53 - r - INFO: - ================================================================================
|
| 40 |
-
2023-04-07 17:08:53 - r - INFO: - n_states: 4, n_actions: 2
|
| 41 |
-
2023-04-07 17:08:53 - r - INFO: - Start training!
|
| 42 |
-
2023-04-07 17:08:53 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
| 43 |
-
2023-04-07 17:10:11 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/models/checkpoint.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:87aab291b33d6423c3c54eced436183398700a290427c1913be9d65f5503b5ae
|
| 3 |
-
size 537607
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/learning_curve.png
DELETED
|
Binary file (55.3 kB)
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_mp_20230407-170853/results/res.csv
DELETED
|
@@ -1,202 +0,0 @@
|
|
| 1 |
-
episodes,rewards
|
| 2 |
-
0,17.0
|
| 3 |
-
1,16.0
|
| 4 |
-
2,24.0
|
| 5 |
-
3,38.0
|
| 6 |
-
4,18.0
|
| 7 |
-
5,18.0
|
| 8 |
-
6,19.0
|
| 9 |
-
7,17.0
|
| 10 |
-
8,27.0
|
| 11 |
-
9,16.0
|
| 12 |
-
10,15.0
|
| 13 |
-
11,31.0
|
| 14 |
-
12,33.0
|
| 15 |
-
13,13.0
|
| 16 |
-
14,9.0
|
| 17 |
-
15,34.0
|
| 18 |
-
16,32.0
|
| 19 |
-
17,34.0
|
| 20 |
-
18,12.0
|
| 21 |
-
19,13.0
|
| 22 |
-
20,11.0
|
| 23 |
-
21,9.0
|
| 24 |
-
22,9.0
|
| 25 |
-
23,11.0
|
| 26 |
-
24,38.0
|
| 27 |
-
25,13.0
|
| 28 |
-
26,20.0
|
| 29 |
-
27,10.0
|
| 30 |
-
28,33.0
|
| 31 |
-
29,15.0
|
| 32 |
-
30,32.0
|
| 33 |
-
31,11.0
|
| 34 |
-
32,13.0
|
| 35 |
-
33,12.0
|
| 36 |
-
34,9.0
|
| 37 |
-
35,13.0
|
| 38 |
-
36,14.0
|
| 39 |
-
37,21.0
|
| 40 |
-
38,48.0
|
| 41 |
-
39,19.0
|
| 42 |
-
40,65.0
|
| 43 |
-
41,28.0
|
| 44 |
-
42,15.0
|
| 45 |
-
43,11.0
|
| 46 |
-
44,54.0
|
| 47 |
-
45,26.0
|
| 48 |
-
46,37.0
|
| 49 |
-
47,40.0
|
| 50 |
-
48,54.0
|
| 51 |
-
49,54.0
|
| 52 |
-
50,50.0
|
| 53 |
-
51,84.0
|
| 54 |
-
52,55.0
|
| 55 |
-
53,43.0
|
| 56 |
-
54,45.0
|
| 57 |
-
55,48.0
|
| 58 |
-
56,88.0
|
| 59 |
-
57,41.0
|
| 60 |
-
58,46.0
|
| 61 |
-
59,61.0
|
| 62 |
-
60,32.0
|
| 63 |
-
61,53.0
|
| 64 |
-
62,59.0
|
| 65 |
-
63,49.0
|
| 66 |
-
64,60.0
|
| 67 |
-
65,35.0
|
| 68 |
-
66,82.0
|
| 69 |
-
67,50.0
|
| 70 |
-
68,108.0
|
| 71 |
-
69,121.0
|
| 72 |
-
70,113.0
|
| 73 |
-
71,67.0
|
| 74 |
-
72,87.0
|
| 75 |
-
73,96.0
|
| 76 |
-
74,181.0
|
| 77 |
-
75,62.0
|
| 78 |
-
76,137.0
|
| 79 |
-
77,175.0
|
| 80 |
-
78,123.0
|
| 81 |
-
79,149.0
|
| 82 |
-
80,172.0
|
| 83 |
-
81,200.0
|
| 84 |
-
82,156.0
|
| 85 |
-
83,146.0
|
| 86 |
-
84,200.0
|
| 87 |
-
85,200.0
|
| 88 |
-
86,200.0
|
| 89 |
-
87,128.0
|
| 90 |
-
88,188.0
|
| 91 |
-
89,200.0
|
| 92 |
-
90,200.0
|
| 93 |
-
91,200.0
|
| 94 |
-
92,200.0
|
| 95 |
-
93,200.0
|
| 96 |
-
94,200.0
|
| 97 |
-
95,200.0
|
| 98 |
-
96,200.0
|
| 99 |
-
97,200.0
|
| 100 |
-
98,195.0
|
| 101 |
-
99,200.0
|
| 102 |
-
100,200.0
|
| 103 |
-
101,196.0
|
| 104 |
-
102,200.0
|
| 105 |
-
103,200.0
|
| 106 |
-
104,192.0
|
| 107 |
-
105,200.0
|
| 108 |
-
106,190.0
|
| 109 |
-
107,200.0
|
| 110 |
-
108,200.0
|
| 111 |
-
109,200.0
|
| 112 |
-
110,197.0
|
| 113 |
-
111,200.0
|
| 114 |
-
112,200.0
|
| 115 |
-
113,200.0
|
| 116 |
-
114,200.0
|
| 117 |
-
115,200.0
|
| 118 |
-
116,200.0
|
| 119 |
-
117,200.0
|
| 120 |
-
118,200.0
|
| 121 |
-
119,200.0
|
| 122 |
-
120,200.0
|
| 123 |
-
121,200.0
|
| 124 |
-
122,200.0
|
| 125 |
-
123,200.0
|
| 126 |
-
124,200.0
|
| 127 |
-
125,200.0
|
| 128 |
-
126,200.0
|
| 129 |
-
127,200.0
|
| 130 |
-
128,200.0
|
| 131 |
-
129,200.0
|
| 132 |
-
130,200.0
|
| 133 |
-
131,200.0
|
| 134 |
-
132,200.0
|
| 135 |
-
133,200.0
|
| 136 |
-
134,200.0
|
| 137 |
-
135,200.0
|
| 138 |
-
136,200.0
|
| 139 |
-
137,197.0
|
| 140 |
-
138,200.0
|
| 141 |
-
139,200.0
|
| 142 |
-
140,200.0
|
| 143 |
-
141,200.0
|
| 144 |
-
142,200.0
|
| 145 |
-
143,200.0
|
| 146 |
-
144,21.0
|
| 147 |
-
145,193.0
|
| 148 |
-
146,123.0
|
| 149 |
-
147,194.0
|
| 150 |
-
148,9.0
|
| 151 |
-
149,9.0
|
| 152 |
-
150,48.0
|
| 153 |
-
151,200.0
|
| 154 |
-
152,200.0
|
| 155 |
-
153,200.0
|
| 156 |
-
154,200.0
|
| 157 |
-
155,200.0
|
| 158 |
-
156,200.0
|
| 159 |
-
157,200.0
|
| 160 |
-
158,200.0
|
| 161 |
-
159,200.0
|
| 162 |
-
160,200.0
|
| 163 |
-
161,200.0
|
| 164 |
-
162,200.0
|
| 165 |
-
163,200.0
|
| 166 |
-
164,200.0
|
| 167 |
-
165,200.0
|
| 168 |
-
166,200.0
|
| 169 |
-
167,200.0
|
| 170 |
-
168,200.0
|
| 171 |
-
169,200.0
|
| 172 |
-
170,200.0
|
| 173 |
-
171,200.0
|
| 174 |
-
172,200.0
|
| 175 |
-
173,200.0
|
| 176 |
-
174,200.0
|
| 177 |
-
175,200.0
|
| 178 |
-
176,200.0
|
| 179 |
-
177,200.0
|
| 180 |
-
178,200.0
|
| 181 |
-
179,200.0
|
| 182 |
-
180,200.0
|
| 183 |
-
181,200.0
|
| 184 |
-
182,200.0
|
| 185 |
-
183,200.0
|
| 186 |
-
184,200.0
|
| 187 |
-
185,200.0
|
| 188 |
-
186,200.0
|
| 189 |
-
187,200.0
|
| 190 |
-
188,200.0
|
| 191 |
-
189,200.0
|
| 192 |
-
190,200.0
|
| 193 |
-
191,200.0
|
| 194 |
-
192,200.0
|
| 195 |
-
193,200.0
|
| 196 |
-
194,200.0
|
| 197 |
-
195,200.0
|
| 198 |
-
196,200.0
|
| 199 |
-
197,200.0
|
| 200 |
-
198,200.0
|
| 201 |
-
199,200.0
|
| 202 |
-
200,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/logs/log.txt
DELETED
|
@@ -1,43 +0,0 @@
|
|
| 1 |
-
2023-04-07 15:32:36 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-07 15:32:36 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-07 15:32:36 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-07 15:32:36 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-07 15:32:36 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-07 15:32:36 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-07 15:32:36 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-07 15:32:36 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-07 15:32:36 - r - INFO: - algo_name DuelingDQN <class 'str'>
|
| 10 |
-
2023-04-07 15:32:36 - r - INFO: - mode train <class 'str'>
|
| 11 |
-
2023-04-07 15:32:36 - r - INFO: - mp_backend ray <class 'str'>
|
| 12 |
-
2023-04-07 15:32:36 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-07 15:32:36 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-07 15:32:36 - r - INFO: - train_eps 400 <class 'int'>
|
| 15 |
-
2023-04-07 15:32:36 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-07 15:32:36 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-07 15:32:36 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-07 15:32:36 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-07 15:32:36 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
| 20 |
-
2023-04-07 15:32:36 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
| 21 |
-
2023-04-07 15:32:36 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-07 15:32:36 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-07 15:32:36 - r - INFO: - n_workers 2 <class 'int'>
|
| 24 |
-
2023-04-07 15:32:36 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-07 15:32:36 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-07 15:32:36 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-07 15:32:36 - r - INFO: - gamma 0.99 <class 'float'>
|
| 28 |
-
2023-04-07 15:32:36 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-07 15:32:36 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-07 15:32:36 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-07 15:32:36 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-07 15:32:36 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-07 15:32:36 - r - INFO: - hidden_dim 256 <class 'int'>
|
| 34 |
-
2023-04-07 15:32:36 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236 <class 'str'>
|
| 35 |
-
2023-04-07 15:32:36 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/results <class 'str'>
|
| 36 |
-
2023-04-07 15:32:36 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/logs <class 'str'>
|
| 37 |
-
2023-04-07 15:32:36 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/traj <class 'str'>
|
| 38 |
-
2023-04-07 15:32:36 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DuelingDQN_20230407-153236/videos <class 'str'>
|
| 39 |
-
2023-04-07 15:32:36 - r - INFO: - ================================================================================
|
| 40 |
-
2023-04-07 15:32:39 - r - INFO: - n_states: 4, n_actions: 2
|
| 41 |
-
2023-04-07 15:32:39 - r - INFO: - Start training!
|
| 42 |
-
2023-04-07 15:32:39 - r - INFO: - Env: CartPole-v1, Algorithm: DuelingDQN, Device: cpu
|
| 43 |
-
2023-04-07 15:40:31 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/models/checkpoint.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:03f1262598e3d636dd22e3b2fc0dfe52bf7a55348d54f51f02a8410682ec5a18
|
| 3 |
-
size 537607
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/learning_curve.png
DELETED
|
Binary file (62.6 kB)
|
|
|
CartPole-v1/Train_CartPole-v1_DuelingDQN_ray_20230407-153236/results/res.csv
DELETED
|
@@ -1,401 +0,0 @@
|
|
| 1 |
-
episodes,rewards
|
| 2 |
-
0,18.0
|
| 3 |
-
1,18.0
|
| 4 |
-
2,19.0
|
| 5 |
-
3,28.0
|
| 6 |
-
4,17.0
|
| 7 |
-
5,15.0
|
| 8 |
-
6,13.0
|
| 9 |
-
7,15.0
|
| 10 |
-
8,38.0
|
| 11 |
-
9,31.0
|
| 12 |
-
10,11.0
|
| 13 |
-
11,31.0
|
| 14 |
-
12,10.0
|
| 15 |
-
13,17.0
|
| 16 |
-
14,14.0
|
| 17 |
-
15,13.0
|
| 18 |
-
16,21.0
|
| 19 |
-
17,15.0
|
| 20 |
-
18,9.0
|
| 21 |
-
19,10.0
|
| 22 |
-
20,22.0
|
| 23 |
-
21,19.0
|
| 24 |
-
22,11.0
|
| 25 |
-
23,13.0
|
| 26 |
-
24,20.0
|
| 27 |
-
25,15.0
|
| 28 |
-
26,14.0
|
| 29 |
-
27,12.0
|
| 30 |
-
28,10.0
|
| 31 |
-
29,11.0
|
| 32 |
-
30,12.0
|
| 33 |
-
31,14.0
|
| 34 |
-
32,9.0
|
| 35 |
-
33,10.0
|
| 36 |
-
34,16.0
|
| 37 |
-
35,13.0
|
| 38 |
-
36,15.0
|
| 39 |
-
37,12.0
|
| 40 |
-
38,14.0
|
| 41 |
-
39,10.0
|
| 42 |
-
40,14.0
|
| 43 |
-
41,10.0
|
| 44 |
-
42,11.0
|
| 45 |
-
43,16.0
|
| 46 |
-
44,16.0
|
| 47 |
-
45,12.0
|
| 48 |
-
46,15.0
|
| 49 |
-
47,19.0
|
| 50 |
-
48,15.0
|
| 51 |
-
49,20.0
|
| 52 |
-
50,15.0
|
| 53 |
-
51,11.0
|
| 54 |
-
52,13.0
|
| 55 |
-
53,12.0
|
| 56 |
-
54,12.0
|
| 57 |
-
55,12.0
|
| 58 |
-
56,12.0
|
| 59 |
-
57,12.0
|
| 60 |
-
58,11.0
|
| 61 |
-
59,10.0
|
| 62 |
-
60,13.0
|
| 63 |
-
61,11.0
|
| 64 |
-
62,12.0
|
| 65 |
-
63,9.0
|
| 66 |
-
64,11.0
|
| 67 |
-
65,11.0
|
| 68 |
-
66,10.0
|
| 69 |
-
67,9.0
|
| 70 |
-
68,11.0
|
| 71 |
-
69,11.0
|
| 72 |
-
70,11.0
|
| 73 |
-
71,12.0
|
| 74 |
-
72,10.0
|
| 75 |
-
73,12.0
|
| 76 |
-
74,9.0
|
| 77 |
-
75,10.0
|
| 78 |
-
76,9.0
|
| 79 |
-
77,10.0
|
| 80 |
-
78,9.0
|
| 81 |
-
79,10.0
|
| 82 |
-
80,11.0
|
| 83 |
-
81,9.0
|
| 84 |
-
82,12.0
|
| 85 |
-
83,11.0
|
| 86 |
-
84,12.0
|
| 87 |
-
85,10.0
|
| 88 |
-
86,9.0
|
| 89 |
-
87,11.0
|
| 90 |
-
88,9.0
|
| 91 |
-
89,9.0
|
| 92 |
-
90,10.0
|
| 93 |
-
91,15.0
|
| 94 |
-
92,11.0
|
| 95 |
-
93,9.0
|
| 96 |
-
94,10.0
|
| 97 |
-
95,16.0
|
| 98 |
-
96,13.0
|
| 99 |
-
97,9.0
|
| 100 |
-
98,10.0
|
| 101 |
-
99,10.0
|
| 102 |
-
100,13.0
|
| 103 |
-
101,11.0
|
| 104 |
-
102,10.0
|
| 105 |
-
103,9.0
|
| 106 |
-
104,13.0
|
| 107 |
-
105,16.0
|
| 108 |
-
106,12.0
|
| 109 |
-
107,9.0
|
| 110 |
-
108,11.0
|
| 111 |
-
109,9.0
|
| 112 |
-
110,13.0
|
| 113 |
-
111,11.0
|
| 114 |
-
112,18.0
|
| 115 |
-
113,13.0
|
| 116 |
-
114,9.0
|
| 117 |
-
115,12.0
|
| 118 |
-
116,10.0
|
| 119 |
-
117,10.0
|
| 120 |
-
118,10.0
|
| 121 |
-
119,13.0
|
| 122 |
-
120,10.0
|
| 123 |
-
121,11.0
|
| 124 |
-
122,10.0
|
| 125 |
-
123,10.0
|
| 126 |
-
124,9.0
|
| 127 |
-
125,10.0
|
| 128 |
-
126,11.0
|
| 129 |
-
127,14.0
|
| 130 |
-
128,12.0
|
| 131 |
-
129,9.0
|
| 132 |
-
130,11.0
|
| 133 |
-
131,14.0
|
| 134 |
-
132,11.0
|
| 135 |
-
133,10.0
|
| 136 |
-
134,13.0
|
| 137 |
-
135,9.0
|
| 138 |
-
136,11.0
|
| 139 |
-
137,11.0
|
| 140 |
-
138,11.0
|
| 141 |
-
139,9.0
|
| 142 |
-
140,10.0
|
| 143 |
-
141,9.0
|
| 144 |
-
142,9.0
|
| 145 |
-
143,12.0
|
| 146 |
-
144,9.0
|
| 147 |
-
145,10.0
|
| 148 |
-
146,9.0
|
| 149 |
-
147,10.0
|
| 150 |
-
148,9.0
|
| 151 |
-
149,10.0
|
| 152 |
-
150,9.0
|
| 153 |
-
151,12.0
|
| 154 |
-
152,9.0
|
| 155 |
-
153,9.0
|
| 156 |
-
154,10.0
|
| 157 |
-
155,9.0
|
| 158 |
-
156,10.0
|
| 159 |
-
157,13.0
|
| 160 |
-
158,14.0
|
| 161 |
-
159,10.0
|
| 162 |
-
160,12.0
|
| 163 |
-
161,11.0
|
| 164 |
-
162,10.0
|
| 165 |
-
163,11.0
|
| 166 |
-
164,11.0
|
| 167 |
-
165,9.0
|
| 168 |
-
166,31.0
|
| 169 |
-
167,39.0
|
| 170 |
-
168,18.0
|
| 171 |
-
169,24.0
|
| 172 |
-
170,18.0
|
| 173 |
-
171,18.0
|
| 174 |
-
172,24.0
|
| 175 |
-
173,16.0
|
| 176 |
-
174,25.0
|
| 177 |
-
175,23.0
|
| 178 |
-
176,26.0
|
| 179 |
-
177,23.0
|
| 180 |
-
178,26.0
|
| 181 |
-
179,21.0
|
| 182 |
-
180,28.0
|
| 183 |
-
181,20.0
|
| 184 |
-
182,22.0
|
| 185 |
-
183,30.0
|
| 186 |
-
184,27.0
|
| 187 |
-
185,34.0
|
| 188 |
-
186,31.0
|
| 189 |
-
187,39.0
|
| 190 |
-
188,29.0
|
| 191 |
-
189,29.0
|
| 192 |
-
190,37.0
|
| 193 |
-
191,27.0
|
| 194 |
-
192,36.0
|
| 195 |
-
193,34.0
|
| 196 |
-
194,46.0
|
| 197 |
-
195,35.0
|
| 198 |
-
196,52.0
|
| 199 |
-
197,32.0
|
| 200 |
-
198,30.0
|
| 201 |
-
199,69.0
|
| 202 |
-
200,38.0
|
| 203 |
-
201,39.0
|
| 204 |
-
202,57.0
|
| 205 |
-
203,38.0
|
| 206 |
-
204,68.0
|
| 207 |
-
205,47.0
|
| 208 |
-
206,45.0
|
| 209 |
-
207,63.0
|
| 210 |
-
208,47.0
|
| 211 |
-
209,86.0
|
| 212 |
-
210,67.0
|
| 213 |
-
211,60.0
|
| 214 |
-
212,48.0
|
| 215 |
-
213,55.0
|
| 216 |
-
214,95.0
|
| 217 |
-
215,58.0
|
| 218 |
-
216,70.0
|
| 219 |
-
217,58.0
|
| 220 |
-
218,42.0
|
| 221 |
-
219,69.0
|
| 222 |
-
220,47.0
|
| 223 |
-
221,109.0
|
| 224 |
-
222,70.0
|
| 225 |
-
223,80.0
|
| 226 |
-
224,77.0
|
| 227 |
-
225,61.0
|
| 228 |
-
226,72.0
|
| 229 |
-
227,55.0
|
| 230 |
-
228,77.0
|
| 231 |
-
229,61.0
|
| 232 |
-
230,79.0
|
| 233 |
-
231,66.0
|
| 234 |
-
232,68.0
|
| 235 |
-
233,99.0
|
| 236 |
-
234,143.0
|
| 237 |
-
235,82.0
|
| 238 |
-
236,85.0
|
| 239 |
-
237,103.0
|
| 240 |
-
238,99.0
|
| 241 |
-
239,93.0
|
| 242 |
-
240,100.0
|
| 243 |
-
241,101.0
|
| 244 |
-
242,151.0
|
| 245 |
-
243,195.0
|
| 246 |
-
244,100.0
|
| 247 |
-
245,99.0
|
| 248 |
-
246,127.0
|
| 249 |
-
247,105.0
|
| 250 |
-
248,127.0
|
| 251 |
-
249,142.0
|
| 252 |
-
250,169.0
|
| 253 |
-
251,108.0
|
| 254 |
-
252,128.0
|
| 255 |
-
253,123.0
|
| 256 |
-
254,134.0
|
| 257 |
-
255,126.0
|
| 258 |
-
256,114.0
|
| 259 |
-
257,200.0
|
| 260 |
-
258,123.0
|
| 261 |
-
259,159.0
|
| 262 |
-
260,125.0
|
| 263 |
-
261,142.0
|
| 264 |
-
262,178.0
|
| 265 |
-
263,96.0
|
| 266 |
-
264,200.0
|
| 267 |
-
265,200.0
|
| 268 |
-
266,113.0
|
| 269 |
-
267,90.0
|
| 270 |
-
268,200.0
|
| 271 |
-
269,122.0
|
| 272 |
-
270,140.0
|
| 273 |
-
271,116.0
|
| 274 |
-
272,128.0
|
| 275 |
-
273,190.0
|
| 276 |
-
274,170.0
|
| 277 |
-
275,96.0
|
| 278 |
-
276,126.0
|
| 279 |
-
277,200.0
|
| 280 |
-
278,88.0
|
| 281 |
-
279,76.0
|
| 282 |
-
280,74.0
|
| 283 |
-
281,84.0
|
| 284 |
-
282,130.0
|
| 285 |
-
283,200.0
|
| 286 |
-
284,86.0
|
| 287 |
-
285,153.0
|
| 288 |
-
286,200.0
|
| 289 |
-
287,59.0
|
| 290 |
-
288,135.0
|
| 291 |
-
289,62.0
|
| 292 |
-
290,200.0
|
| 293 |
-
291,182.0
|
| 294 |
-
292,138.0
|
| 295 |
-
293,200.0
|
| 296 |
-
294,118.0
|
| 297 |
-
295,50.0
|
| 298 |
-
296,74.0
|
| 299 |
-
297,62.0
|
| 300 |
-
298,200.0
|
| 301 |
-
299,124.0
|
| 302 |
-
300,111.0
|
| 303 |
-
301,61.0
|
| 304 |
-
302,132.0
|
| 305 |
-
303,200.0
|
| 306 |
-
304,80.0
|
| 307 |
-
305,60.0
|
| 308 |
-
306,77.0
|
| 309 |
-
307,47.0
|
| 310 |
-
308,80.0
|
| 311 |
-
309,64.0
|
| 312 |
-
310,96.0
|
| 313 |
-
311,200.0
|
| 314 |
-
312,200.0
|
| 315 |
-
313,133.0
|
| 316 |
-
314,200.0
|
| 317 |
-
315,188.0
|
| 318 |
-
316,132.0
|
| 319 |
-
317,150.0
|
| 320 |
-
318,135.0
|
| 321 |
-
319,184.0
|
| 322 |
-
320,138.0
|
| 323 |
-
321,176.0
|
| 324 |
-
322,200.0
|
| 325 |
-
323,161.0
|
| 326 |
-
324,158.0
|
| 327 |
-
325,142.0
|
| 328 |
-
326,133.0
|
| 329 |
-
327,151.0
|
| 330 |
-
328,143.0
|
| 331 |
-
329,160.0
|
| 332 |
-
330,150.0
|
| 333 |
-
331,134.0
|
| 334 |
-
332,147.0
|
| 335 |
-
333,132.0
|
| 336 |
-
334,143.0
|
| 337 |
-
335,137.0
|
| 338 |
-
336,155.0
|
| 339 |
-
337,138.0
|
| 340 |
-
338,138.0
|
| 341 |
-
339,130.0
|
| 342 |
-
340,148.0
|
| 343 |
-
341,146.0
|
| 344 |
-
342,152.0
|
| 345 |
-
343,135.0
|
| 346 |
-
344,175.0
|
| 347 |
-
345,153.0
|
| 348 |
-
346,155.0
|
| 349 |
-
347,131.0
|
| 350 |
-
348,156.0
|
| 351 |
-
349,138.0
|
| 352 |
-
350,151.0
|
| 353 |
-
351,162.0
|
| 354 |
-
352,200.0
|
| 355 |
-
353,175.0
|
| 356 |
-
354,156.0
|
| 357 |
-
355,145.0
|
| 358 |
-
356,168.0
|
| 359 |
-
357,200.0
|
| 360 |
-
358,181.0
|
| 361 |
-
359,145.0
|
| 362 |
-
360,189.0
|
| 363 |
-
361,200.0
|
| 364 |
-
362,144.0
|
| 365 |
-
363,200.0
|
| 366 |
-
364,178.0
|
| 367 |
-
365,200.0
|
| 368 |
-
366,179.0
|
| 369 |
-
367,200.0
|
| 370 |
-
368,177.0
|
| 371 |
-
369,200.0
|
| 372 |
-
370,185.0
|
| 373 |
-
371,195.0
|
| 374 |
-
372,200.0
|
| 375 |
-
373,200.0
|
| 376 |
-
374,190.0
|
| 377 |
-
375,200.0
|
| 378 |
-
376,200.0
|
| 379 |
-
377,200.0
|
| 380 |
-
378,200.0
|
| 381 |
-
379,200.0
|
| 382 |
-
380,200.0
|
| 383 |
-
381,200.0
|
| 384 |
-
382,170.0
|
| 385 |
-
383,173.0
|
| 386 |
-
384,162.0
|
| 387 |
-
385,162.0
|
| 388 |
-
386,149.0
|
| 389 |
-
387,173.0
|
| 390 |
-
388,200.0
|
| 391 |
-
389,200.0
|
| 392 |
-
390,200.0
|
| 393 |
-
391,156.0
|
| 394 |
-
392,157.0
|
| 395 |
-
393,169.0
|
| 396 |
-
394,182.0
|
| 397 |
-
395,154.0
|
| 398 |
-
396,200.0
|
| 399 |
-
397,200.0
|
| 400 |
-
398,200.0
|
| 401 |
-
399,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/{Train_CartPole-v1_DuelingDQN_ray_20230407-153236 → Train_ray_CartPole-v1_DuelingDQN_20230517-224129}/config.yaml
RENAMED
|
@@ -1,47 +1,44 @@
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DuelingDQN
|
|
|
|
| 3 |
device: cpu
|
| 4 |
-
env_name:
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
load_checkpoint: false
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
mode: train
|
|
|
|
| 11 |
mp_backend: ray
|
| 12 |
n_workers: 2
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
render_mode: human
|
| 16 |
-
save_fig: true
|
| 17 |
seed: 1
|
| 18 |
-
show_fig: false
|
| 19 |
-
test_eps: 10
|
| 20 |
-
train_eps: 400
|
| 21 |
-
wrapper: null
|
| 22 |
algo_cfg:
|
| 23 |
batch_size: 64
|
| 24 |
buffer_size: 100000
|
|
|
|
|
|
|
| 25 |
epsilon_decay: 500
|
| 26 |
epsilon_end: 0.01
|
| 27 |
epsilon_start: 0.95
|
| 28 |
-
gamma: 0.
|
| 29 |
-
hidden_dim: 256
|
| 30 |
lr: 0.0001
|
| 31 |
target_update: 4
|
| 32 |
value_layers:
|
| 33 |
- activation: relu
|
| 34 |
layer_dim:
|
| 35 |
-
- n_states
|
| 36 |
- 256
|
| 37 |
layer_type: linear
|
| 38 |
- activation: relu
|
| 39 |
layer_dim:
|
| 40 |
- 256
|
| 41 |
-
- 256
|
| 42 |
-
layer_type: linear
|
| 43 |
-
- activation: none
|
| 44 |
-
layer_dim:
|
| 45 |
-
- 256
|
| 46 |
-
- n_actions
|
| 47 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DuelingDQN
|
| 3 |
+
collect_traj: false
|
| 4 |
device: cpu
|
| 5 |
+
env_name: gym
|
|
|
|
|
|
|
| 6 |
load_checkpoint: false
|
| 7 |
+
load_model_step: best
|
| 8 |
+
load_path: Train_single_CartPole-v1_DQN_20230515-211721
|
| 9 |
+
max_episode: 100
|
| 10 |
+
max_step: 200
|
| 11 |
mode: train
|
| 12 |
+
model_save_fre: 500
|
| 13 |
mp_backend: ray
|
| 14 |
n_workers: 2
|
| 15 |
+
online_eval: true
|
| 16 |
+
online_eval_episode: 10
|
|
|
|
|
|
|
| 17 |
seed: 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
algo_cfg:
|
| 19 |
batch_size: 64
|
| 20 |
buffer_size: 100000
|
| 21 |
+
buffer_type: REPLAY_QUE
|
| 22 |
+
dueling: true
|
| 23 |
epsilon_decay: 500
|
| 24 |
epsilon_end: 0.01
|
| 25 |
epsilon_start: 0.95
|
| 26 |
+
gamma: 0.95
|
|
|
|
| 27 |
lr: 0.0001
|
| 28 |
target_update: 4
|
| 29 |
value_layers:
|
| 30 |
- activation: relu
|
| 31 |
layer_dim:
|
|
|
|
| 32 |
- 256
|
| 33 |
layer_type: linear
|
| 34 |
- activation: relu
|
| 35 |
layer_dim:
|
| 36 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
layer_type: linear
|
| 38 |
+
env_cfg:
|
| 39 |
+
id: CartPole-v1
|
| 40 |
+
ignore_params:
|
| 41 |
+
- wrapper
|
| 42 |
+
- ignore_params
|
| 43 |
+
render_mode: null
|
| 44 |
+
wrapper: null
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/logs/log.txt
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - General Configs:
|
| 2 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
| 3 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
|
| 4 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - env_name gym <class 'str'>
|
| 5 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - algo_name DuelingDQN <class 'str'>
|
| 6 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - mode train <class 'str'>
|
| 7 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - device cpu <class 'str'>
|
| 8 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - seed 1 <class 'int'>
|
| 9 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - max_episode 100 <class 'int'>
|
| 10 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
| 11 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
| 12 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - mp_backend ray <class 'str'>
|
| 13 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
| 14 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
| 15 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
| 16 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
| 17 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
| 18 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DQN_20230515-211721 <class 'str'>
|
| 19 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
| 20 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
| 21 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Algo Configs:
|
| 22 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
| 23 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
|
| 24 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - dueling 1 <class 'bool'>
|
| 25 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
| 26 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
| 27 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
| 28 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
|
| 29 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
| 30 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
| 31 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
| 32 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
| 33 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
| 34 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
| 35 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
| 36 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Env Configs:
|
| 37 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
| 38 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - Name Value Type
|
| 39 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
| 40 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - render_mode None <class 'str'>
|
| 41 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - wrapper None <class 'str'>
|
| 42 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
| 43 |
+
2023-05-17 22:41:29 - SimpleLog - INFO: - ================================================================================
|
| 44 |
+
2023-05-17 22:41:35 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
| 45 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 0 with reward 22.0 in 22 steps
|
| 46 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 0 with reward 23.0 in 23 steps
|
| 47 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 2 with reward 10.0 in 10 steps
|
| 48 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 3 with reward 9.0 in 9 steps
|
| 49 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 1 finished episode 2 with reward 29.0 in 29 steps
|
| 50 |
+
2023-05-17 22:41:38 - RayLog - INFO: - Worker 0 finished episode 4 with reward 11.0 in 11 steps
|
| 51 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 6 with reward 15.0 in 15 steps
|
| 52 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 18.0 in 18 steps
|
| 53 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 7 with reward 9.0 in 9 steps
|
| 54 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 8 with reward 11.0 in 11 steps
|
| 55 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 1 finished episode 10 with reward 13.0 in 13 steps
|
| 56 |
+
2023-05-17 22:41:39 - RayLog - INFO: - Worker 0 finished episode 9 with reward 25.0 in 25 steps
|
| 57 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 12.0 in 12 steps
|
| 58 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 13 with reward 10.0 in 10 steps
|
| 59 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 33.0 in 33 steps
|
| 60 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 14 with reward 9.0 in 9 steps
|
| 61 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 15 with reward 10.0 in 10 steps
|
| 62 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 16 with reward 13.0 in 13 steps
|
| 63 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 1 finished episode 17 with reward 16.0 in 16 steps
|
| 64 |
+
2023-05-17 22:41:40 - RayLog - INFO: - Worker 0 finished episode 18 with reward 9.0 in 9 steps
|
| 65 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 20 with reward 11.0 in 11 steps
|
| 66 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 19 with reward 16.0 in 16 steps
|
| 67 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 18.0 in 18 steps
|
| 68 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 22 with reward 18.0 in 18 steps
|
| 69 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 23 with reward 11.0 in 11 steps
|
| 70 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 24 with reward 9.0 in 9 steps
|
| 71 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 1 finished episode 26 with reward 9.0 in 9 steps
|
| 72 |
+
2023-05-17 22:41:41 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps
|
| 73 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 11.0 in 11 steps
|
| 74 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 27 with reward 12.0 in 12 steps
|
| 75 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 15.0 in 15 steps
|
| 76 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 30 with reward 19.0 in 19 steps
|
| 77 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 0 finished episode 31 with reward 10.0 in 10 steps
|
| 78 |
+
2023-05-17 22:41:42 - RayLog - INFO: - Worker 1 finished episode 32 with reward 13.0 in 13 steps
|
| 79 |
+
2023-05-17 22:41:44 - RayLog - INFO: - update_step: 500, online_eval_reward: 200.000
|
| 80 |
+
2023-05-17 22:41:44 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
| 81 |
+
2023-05-17 22:41:45 - RayLog - INFO: - Worker 0 finished episode 33 with reward 97.0 in 97 steps
|
| 82 |
+
2023-05-17 22:41:45 - RayLog - INFO: - Worker 1 finished episode 34 with reward 96.0 in 96 steps
|
| 83 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 36 with reward 24.0 in 24 steps
|
| 84 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 35 with reward 34.0 in 34 steps
|
| 85 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 37 with reward 17.0 in 17 steps
|
| 86 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 0 finished episode 38 with reward 23.0 in 23 steps
|
| 87 |
+
2023-05-17 22:41:46 - RayLog - INFO: - Worker 1 finished episode 39 with reward 16.0 in 16 steps
|
| 88 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 41 with reward 17.0 in 17 steps
|
| 89 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 40 with reward 24.0 in 24 steps
|
| 90 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 42 with reward 21.0 in 21 steps
|
| 91 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 0 finished episode 43 with reward 29.0 in 29 steps
|
| 92 |
+
2023-05-17 22:41:47 - RayLog - INFO: - Worker 1 finished episode 44 with reward 22.0 in 22 steps
|
| 93 |
+
2023-05-17 22:41:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 100.000
|
| 94 |
+
2023-05-17 22:41:49 - RayLog - INFO: - Worker 0 finished episode 45 with reward 84.0 in 84 steps
|
| 95 |
+
2023-05-17 22:41:49 - RayLog - INFO: - Worker 1 finished episode 46 with reward 75.0 in 75 steps
|
| 96 |
+
2023-05-17 22:41:50 - RayLog - INFO: - Worker 1 finished episode 48 with reward 52.0 in 52 steps
|
| 97 |
+
2023-05-17 22:41:50 - RayLog - INFO: - Worker 0 finished episode 47 with reward 66.0 in 66 steps
|
| 98 |
+
2023-05-17 22:41:51 - RayLog - INFO: - Worker 1 finished episode 49 with reward 63.0 in 63 steps
|
| 99 |
+
2023-05-17 22:41:52 - RayLog - INFO: - Worker 0 finished episode 50 with reward 94.0 in 94 steps
|
| 100 |
+
2023-05-17 22:41:53 - RayLog - INFO: - Worker 1 finished episode 51 with reward 75.0 in 75 steps
|
| 101 |
+
2023-05-17 22:41:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 120.000
|
| 102 |
+
2023-05-17 22:41:54 - RayLog - INFO: - Worker 0 finished episode 52 with reward 102.0 in 102 steps
|
| 103 |
+
2023-05-17 22:41:55 - RayLog - INFO: - Worker 1 finished episode 53 with reward 93.0 in 93 steps
|
| 104 |
+
2023-05-17 22:41:57 - RayLog - INFO: - Worker 1 finished episode 55 with reward 126.0 in 126 steps
|
| 105 |
+
2023-05-17 22:41:58 - RayLog - INFO: - Worker 0 finished episode 54 with reward 200.0 in 200 steps
|
| 106 |
+
2023-05-17 22:41:59 - RayLog - INFO: - update_step: 2000, online_eval_reward: 200.000
|
| 107 |
+
2023-05-17 22:42:01 - RayLog - INFO: - Worker 1 finished episode 56 with reward 200.0 in 200 steps
|
| 108 |
+
2023-05-17 22:42:02 - RayLog - INFO: - Worker 0 finished episode 57 with reward 200.0 in 200 steps
|
| 109 |
+
2023-05-17 22:42:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 167.000
|
| 110 |
+
2023-05-17 22:42:05 - RayLog - INFO: - Worker 1 finished episode 58 with reward 200.0 in 200 steps
|
| 111 |
+
2023-05-17 22:42:06 - RayLog - INFO: - Worker 0 finished episode 59 with reward 168.0 in 168 steps
|
| 112 |
+
2023-05-17 22:42:09 - RayLog - INFO: - Worker 0 finished episode 61 with reward 164.0 in 164 steps
|
| 113 |
+
2023-05-17 22:42:09 - RayLog - INFO: - update_step: 3000, online_eval_reward: 145.000
|
| 114 |
+
2023-05-17 22:42:09 - RayLog - INFO: - Worker 1 finished episode 60 with reward 189.0 in 189 steps
|
| 115 |
+
2023-05-17 22:42:12 - RayLog - INFO: - Worker 0 finished episode 62 with reward 152.0 in 152 steps
|
| 116 |
+
2023-05-17 22:42:12 - RayLog - INFO: - Worker 1 finished episode 63 with reward 162.0 in 162 steps
|
| 117 |
+
2023-05-17 22:42:14 - RayLog - INFO: - update_step: 3500, online_eval_reward: 151.000
|
| 118 |
+
2023-05-17 22:42:15 - RayLog - INFO: - Worker 0 finished episode 64 with reward 143.0 in 143 steps
|
| 119 |
+
2023-05-17 22:42:16 - RayLog - INFO: - Worker 1 finished episode 65 with reward 163.0 in 163 steps
|
| 120 |
+
2023-05-17 22:42:19 - RayLog - INFO: - Worker 0 finished episode 66 with reward 187.0 in 187 steps
|
| 121 |
+
2023-05-17 22:42:19 - RayLog - INFO: - update_step: 4000, online_eval_reward: 189.000
|
| 122 |
+
2023-05-17 22:42:20 - RayLog - INFO: - Worker 1 finished episode 67 with reward 200.0 in 200 steps
|
| 123 |
+
2023-05-17 22:42:22 - RayLog - INFO: - Worker 0 finished episode 68 with reward 173.0 in 173 steps
|
| 124 |
+
2023-05-17 22:42:23 - RayLog - INFO: - Worker 1 finished episode 69 with reward 170.0 in 170 steps
|
| 125 |
+
2023-05-17 22:42:24 - RayLog - INFO: - update_step: 4500, online_eval_reward: 178.000
|
| 126 |
+
2023-05-17 22:42:26 - RayLog - INFO: - Worker 0 finished episode 70 with reward 200.0 in 200 steps
|
| 127 |
+
2023-05-17 22:42:27 - RayLog - INFO: - Worker 1 finished episode 71 with reward 200.0 in 200 steps
|
| 128 |
+
2023-05-17 22:42:30 - RayLog - INFO: - update_step: 5000, online_eval_reward: 197.000
|
| 129 |
+
2023-05-17 22:42:30 - RayLog - INFO: - Worker 0 finished episode 72 with reward 200.0 in 200 steps
|
| 130 |
+
2023-05-17 22:42:31 - RayLog - INFO: - Worker 1 finished episode 73 with reward 200.0 in 200 steps
|
| 131 |
+
2023-05-17 22:42:35 - RayLog - INFO: - Worker 0 finished episode 74 with reward 197.0 in 197 steps
|
| 132 |
+
2023-05-17 22:42:35 - RayLog - INFO: - update_step: 5500, online_eval_reward: 200.000
|
| 133 |
+
2023-05-17 22:42:36 - RayLog - INFO: - Worker 1 finished episode 75 with reward 200.0 in 200 steps
|
| 134 |
+
2023-05-17 22:42:39 - RayLog - INFO: - Worker 0 finished episode 76 with reward 200.0 in 200 steps
|
| 135 |
+
2023-05-17 22:42:40 - RayLog - INFO: - Worker 1 finished episode 77 with reward 200.0 in 200 steps
|
| 136 |
+
2023-05-17 22:42:40 - RayLog - INFO: - update_step: 6000, online_eval_reward: 200.000
|
| 137 |
+
2023-05-17 22:42:43 - RayLog - INFO: - Worker 0 finished episode 78 with reward 200.0 in 200 steps
|
| 138 |
+
2023-05-17 22:42:44 - RayLog - INFO: - Worker 1 finished episode 79 with reward 200.0 in 200 steps
|
| 139 |
+
2023-05-17 22:42:45 - RayLog - INFO: - update_step: 6500, online_eval_reward: 200.000
|
| 140 |
+
2023-05-17 22:42:47 - RayLog - INFO: - Worker 0 finished episode 80 with reward 200.0 in 200 steps
|
| 141 |
+
2023-05-17 22:42:48 - RayLog - INFO: - Worker 1 finished episode 81 with reward 200.0 in 200 steps
|
| 142 |
+
2023-05-17 22:42:51 - RayLog - INFO: - update_step: 7000, online_eval_reward: 200.000
|
| 143 |
+
2023-05-17 22:42:52 - RayLog - INFO: - Worker 0 finished episode 82 with reward 200.0 in 200 steps
|
| 144 |
+
2023-05-17 22:42:53 - RayLog - INFO: - Worker 1 finished episode 83 with reward 200.0 in 200 steps
|
| 145 |
+
2023-05-17 22:42:56 - RayLog - INFO: - Worker 0 finished episode 84 with reward 200.0 in 200 steps
|
| 146 |
+
2023-05-17 22:42:56 - RayLog - INFO: - update_step: 7500, online_eval_reward: 200.000
|
| 147 |
+
2023-05-17 22:42:57 - RayLog - INFO: - Worker 1 finished episode 85 with reward 200.0 in 200 steps
|
| 148 |
+
2023-05-17 22:43:00 - RayLog - INFO: - Worker 0 finished episode 86 with reward 200.0 in 200 steps
|
| 149 |
+
2023-05-17 22:43:01 - RayLog - INFO: - Worker 1 finished episode 87 with reward 200.0 in 200 steps
|
| 150 |
+
2023-05-17 22:43:02 - RayLog - INFO: - update_step: 8000, online_eval_reward: 200.000
|
| 151 |
+
2023-05-17 22:43:05 - RayLog - INFO: - Worker 0 finished episode 88 with reward 200.0 in 200 steps
|
| 152 |
+
2023-05-17 22:43:06 - RayLog - INFO: - Worker 1 finished episode 89 with reward 200.0 in 200 steps
|
| 153 |
+
2023-05-17 22:43:07 - RayLog - INFO: - update_step: 8500, online_eval_reward: 200.000
|
| 154 |
+
2023-05-17 22:43:09 - RayLog - INFO: - Worker 0 finished episode 90 with reward 200.0 in 200 steps
|
| 155 |
+
2023-05-17 22:43:10 - RayLog - INFO: - Worker 1 finished episode 91 with reward 200.0 in 200 steps
|
| 156 |
+
2023-05-17 22:43:12 - RayLog - INFO: - update_step: 9000, online_eval_reward: 200.000
|
| 157 |
+
2023-05-17 22:43:13 - RayLog - INFO: - Worker 0 finished episode 92 with reward 200.0 in 200 steps
|
| 158 |
+
2023-05-17 22:43:14 - RayLog - INFO: - Worker 1 finished episode 93 with reward 200.0 in 200 steps
|
| 159 |
+
2023-05-17 22:43:18 - RayLog - INFO: - Worker 0 finished episode 94 with reward 200.0 in 200 steps
|
| 160 |
+
2023-05-17 22:43:18 - RayLog - INFO: - update_step: 9500, online_eval_reward: 200.000
|
| 161 |
+
2023-05-17 22:43:19 - RayLog - INFO: - Worker 1 finished episode 95 with reward 200.0 in 200 steps
|
| 162 |
+
2023-05-17 22:43:22 - RayLog - INFO: - Worker 0 finished episode 96 with reward 200.0 in 200 steps
|
| 163 |
+
2023-05-17 22:43:23 - RayLog - INFO: - Worker 1 finished episode 97 with reward 200.0 in 200 steps
|
| 164 |
+
2023-05-17 22:43:23 - RayLog - INFO: - update_step: 10000, online_eval_reward: 200.000
|
| 165 |
+
2023-05-17 22:43:26 - RayLog - INFO: - Worker 0 finished episode 98 with reward 200.0 in 200 steps
|
| 166 |
+
2023-05-17 22:43:27 - RayLog - INFO: - Worker 1 finished episode 99 with reward 200.0 in 200 steps
|
| 167 |
+
2023-05-17 22:43:29 - RayLog - INFO: - update_step: 10500, online_eval_reward: 200.000
|
| 168 |
+
2023-05-17 22:43:30 - RayLog - INFO: - Worker 0 finished episode 100 with reward 200.0 in 200 steps
|
| 169 |
+
2023-05-17 22:43:32 - SimpleLog - INFO: - Finish training! total time consumed: 122.69s
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/10500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/1500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/2500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/3500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/4500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/5500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/6500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/7500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/8500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9000
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/9500
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DuelingDQN_20230517-224129/models/best
ADDED
|
Binary file (548 kB). View file
|
|
|
CartPole-v1/{Test_CartPole-v1_DuelingDQN_mp_20230407-171120/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334489.DESKTOP-H34HQIQ.80856.0}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c9b4a566642bacd5610c3e7b42d10f1feb9704e2a4cb2c004a7d85f75a0aba9
|
| 3 |
+
size 40
|
CartPole-v1/{Test_CartPole-v1_DuelingDQN_ray_20230407-165208/models/checkpoint.pt → Train_ray_CartPole-v1_DuelingDQN_20230517-224129/tb_logs/interact/events.out.tfevents.1684334497.DESKTOP-H34HQIQ.84100.0}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9867609ac2d205c8c66fe7bc380a67b26f152a046fb5e97d523f5b2bf1c147fd
|
| 3 |
+
size 10028
|