Commit
·
ccb908b
1
Parent(s):
989b5fc
update CartPole-v1 DoubleDQN
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml +0 -40
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt +0 -14
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt +0 -52
- CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv +0 -11
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml +0 -46
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt +0 -52
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png +0 -0
- CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv +0 -11
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410 → Test_single_CartPole-v1_DoubleDQN_20230516-115305}/config.yaml +20 -23
- CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt +55 -0
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth → Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0} +2 -2
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt → Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1} +2 -2
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml +0 -40
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt +0 -116
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv +0 -101
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt +0 -42
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth +0 -3
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv +0 -402
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt +0 -42
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt +0 -3
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png +0 -0
- CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv +0 -401
- CartPole-v1/{Train_CartPole-v1_DoubleDQN_ray_20230406-162938 → Train_ray_CartPole-v1_DoubleDQN_20230516-115126}/config.yaml +17 -20
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt +157 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500 +0 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best +0 -0
- CartPole-v1/{Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth → Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0} +2 -2
- CartPole-v1/{Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth → Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0} +2 -2
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1 +3 -0
- CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1 +3 -0
- CartPole-v1/{Train_CartPole-v1_DoubleDQN_mp_20230406-160028 → Train_single_CartPole-v1_DoubleDQN_20230516-114540}/config.yaml +18 -21
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt +162 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500 +0 -0
- CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000 +0 -0
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/config.yaml
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
general_cfg:
|
| 2 |
-
algo_name: DoubleDQN
|
| 3 |
-
device: cuda
|
| 4 |
-
env_name: CartPole-v1
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
-
load_checkpoint: true
|
| 8 |
-
load_path: Train_CartPole-v1_DoubleDQN_20221122-125516
|
| 9 |
-
max_steps: 200
|
| 10 |
-
mode: test
|
| 11 |
-
save_fig: true
|
| 12 |
-
seed: 1
|
| 13 |
-
show_fig: false
|
| 14 |
-
test_eps: 10
|
| 15 |
-
train_eps: 100
|
| 16 |
-
algo_cfg:
|
| 17 |
-
batch_size: 64
|
| 18 |
-
buffer_size: 100000
|
| 19 |
-
epsilon_decay: 500
|
| 20 |
-
epsilon_end: 0.01
|
| 21 |
-
epsilon_start: 0.95
|
| 22 |
-
gamma: 0.99
|
| 23 |
-
lr: 0.0001
|
| 24 |
-
target_update: 4
|
| 25 |
-
value_layers:
|
| 26 |
-
- activation: relu
|
| 27 |
-
layer_dim:
|
| 28 |
-
- n_states
|
| 29 |
-
- 256
|
| 30 |
-
layer_type: linear
|
| 31 |
-
- activation: relu
|
| 32 |
-
layer_dim:
|
| 33 |
-
- 256
|
| 34 |
-
- 256
|
| 35 |
-
layer_type: linear
|
| 36 |
-
- activation: none
|
| 37 |
-
layer_dim:
|
| 38 |
-
- 256
|
| 39 |
-
- n_actions
|
| 40 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/logs/log.txt
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
2022-11-22 12:56:12 - r - INFO: - n_states: 4, n_actions: 2
|
| 2 |
-
2022-11-22 12:56:14 - r - INFO: - Start testing!
|
| 3 |
-
2022-11-22 12:56:14 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda
|
| 4 |
-
2022-11-22 12:56:14 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
| 5 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
| 6 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
| 7 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
| 8 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 5/10, Reward: 138.000, Step: 138
|
| 9 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
| 10 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
| 11 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
| 12 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
| 13 |
-
2022-11-22 12:56:15 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
| 14 |
-
2022-11-22 12:56:15 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/learning_curve.png
DELETED
|
Binary file (31.4 kB)
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_20221122-125611/results/res.csv
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,200.0,200
|
| 3 |
-
1,200.0,200
|
| 4 |
-
2,200.0,200
|
| 5 |
-
3,200.0,200
|
| 6 |
-
4,138.0,138
|
| 7 |
-
5,200.0,200
|
| 8 |
-
6,200.0,200
|
| 9 |
-
7,200.0,200
|
| 10 |
-
8,200.0,200
|
| 11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/logs/log.txt
DELETED
|
@@ -1,52 +0,0 @@
|
|
| 1 |
-
2023-04-06 16:04:10 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-06 16:04:10 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-06 16:04:10 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-06 16:04:10 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-06 16:04:10 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-06 16:04:10 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-06 16:04:10 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-06 16:04:10 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-06 16:04:10 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
| 10 |
-
2023-04-06 16:04:10 - r - INFO: - mode test <class 'str'>
|
| 11 |
-
2023-04-06 16:04:10 - r - INFO: - mp_backend mp <class 'str'>
|
| 12 |
-
2023-04-06 16:04:10 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-06 16:04:10 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-06 16:04:10 - r - INFO: - train_eps 400 <class 'int'>
|
| 15 |
-
2023-04-06 16:04:10 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-06 16:04:10 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-06 16:04:10 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-06 16:04:10 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-06 16:04:10 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
| 20 |
-
2023-04-06 16:04:10 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_mp_20230406-160028 <class 'str'>
|
| 21 |
-
2023-04-06 16:04:10 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-06 16:04:10 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-06 16:04:10 - r - INFO: - n_workers 1 <class 'int'>
|
| 24 |
-
2023-04-06 16:04:10 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-06 16:04:10 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-06 16:04:10 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-06 16:04:10 - r - INFO: - gamma 0.95 <class 'float'>
|
| 28 |
-
2023-04-06 16:04:10 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-06 16:04:10 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-06 16:04:10 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-06 16:04:10 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-06 16:04:10 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-06 16:04:10 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410 <class 'str'>
|
| 34 |
-
2023-04-06 16:04:10 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/results <class 'str'>
|
| 35 |
-
2023-04-06 16:04:10 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/logs <class 'str'>
|
| 36 |
-
2023-04-06 16:04:10 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/traj <class 'str'>
|
| 37 |
-
2023-04-06 16:04:10 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-160410/videos <class 'str'>
|
| 38 |
-
2023-04-06 16:04:10 - r - INFO: - ================================================================================
|
| 39 |
-
2023-04-06 16:04:10 - r - INFO: - n_states: 4, n_actions: 2
|
| 40 |
-
2023-04-06 16:04:10 - r - INFO: - Start testing!
|
| 41 |
-
2023-04-06 16:04:10 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
| 42 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
| 43 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
| 44 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
| 45 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
| 46 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
| 47 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
| 48 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
| 49 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
| 50 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
| 51 |
-
2023-04-06 16:04:10 - r - INFO: - Episode: 10/10, Reward: 198.000, Step: 198
|
| 52 |
-
2023-04-06 16:04:10 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/learning_curve.png
DELETED
|
Binary file (34.7 kB)
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_mp_20230406-160410/results/res.csv
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,200.0,200
|
| 3 |
-
1,200.0,200
|
| 4 |
-
2,200.0,200
|
| 5 |
-
3,200.0,200
|
| 6 |
-
4,200.0,200
|
| 7 |
-
5,200.0,200
|
| 8 |
-
6,200.0,200
|
| 9 |
-
7,200.0,200
|
| 10 |
-
8,200.0,200
|
| 11 |
-
9,198.0,198
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/config.yaml
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
general_cfg:
|
| 2 |
-
algo_name: DoubleDQN
|
| 3 |
-
device: cpu
|
| 4 |
-
env_name: CartPole-v1
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
-
load_checkpoint: true
|
| 8 |
-
load_path: Train_CartPole-v1_DoubleDQN_ray_20230406-162938
|
| 9 |
-
max_steps: 200
|
| 10 |
-
mode: test
|
| 11 |
-
mp_backend: ray
|
| 12 |
-
n_workers: 1
|
| 13 |
-
new_step_api: true
|
| 14 |
-
render: false
|
| 15 |
-
render_mode: human
|
| 16 |
-
save_fig: true
|
| 17 |
-
seed: 1
|
| 18 |
-
show_fig: false
|
| 19 |
-
test_eps: 10
|
| 20 |
-
train_eps: 400
|
| 21 |
-
wrapper: null
|
| 22 |
-
algo_cfg:
|
| 23 |
-
batch_size: 64
|
| 24 |
-
buffer_size: 100000
|
| 25 |
-
epsilon_decay: 500
|
| 26 |
-
epsilon_end: 0.01
|
| 27 |
-
epsilon_start: 0.95
|
| 28 |
-
gamma: 0.95
|
| 29 |
-
lr: 0.0001
|
| 30 |
-
target_update: 4
|
| 31 |
-
value_layers:
|
| 32 |
-
- activation: relu
|
| 33 |
-
layer_dim:
|
| 34 |
-
- n_states
|
| 35 |
-
- 256
|
| 36 |
-
layer_type: linear
|
| 37 |
-
- activation: relu
|
| 38 |
-
layer_dim:
|
| 39 |
-
- 256
|
| 40 |
-
- 256
|
| 41 |
-
layer_type: linear
|
| 42 |
-
- activation: none
|
| 43 |
-
layer_dim:
|
| 44 |
-
- 256
|
| 45 |
-
- n_actions
|
| 46 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/logs/log.txt
DELETED
|
@@ -1,52 +0,0 @@
|
|
| 1 |
-
2023-04-06 17:03:48 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-06 17:03:48 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-06 17:03:48 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-06 17:03:48 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-06 17:03:48 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-06 17:03:48 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-06 17:03:48 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-06 17:03:48 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-06 17:03:48 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
| 10 |
-
2023-04-06 17:03:48 - r - INFO: - mode test <class 'str'>
|
| 11 |
-
2023-04-06 17:03:48 - r - INFO: - mp_backend ray <class 'str'>
|
| 12 |
-
2023-04-06 17:03:48 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-06 17:03:48 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-06 17:03:48 - r - INFO: - train_eps 400 <class 'int'>
|
| 15 |
-
2023-04-06 17:03:48 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-06 17:03:48 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-06 17:03:48 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-06 17:03:48 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-06 17:03:48 - r - INFO: - load_checkpoint 1 <class 'bool'>
|
| 20 |
-
2023-04-06 17:03:48 - r - INFO: - load_path Train_CartPole-v1_DoubleDQN_ray_20230406-162938 <class 'str'>
|
| 21 |
-
2023-04-06 17:03:48 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-06 17:03:48 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-06 17:03:48 - r - INFO: - n_workers 1 <class 'int'>
|
| 24 |
-
2023-04-06 17:03:48 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-06 17:03:48 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-06 17:03:48 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-06 17:03:48 - r - INFO: - gamma 0.95 <class 'float'>
|
| 28 |
-
2023-04-06 17:03:48 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-06 17:03:48 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-06 17:03:48 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-06 17:03:48 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-06 17:03:48 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-06 17:03:48 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348 <class 'str'>
|
| 34 |
-
2023-04-06 17:03:48 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/results <class 'str'>
|
| 35 |
-
2023-04-06 17:03:48 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/logs <class 'str'>
|
| 36 |
-
2023-04-06 17:03:48 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/traj <class 'str'>
|
| 37 |
-
2023-04-06 17:03:48 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Test_CartPole-v1_DoubleDQN_20230406-170348/videos <class 'str'>
|
| 38 |
-
2023-04-06 17:03:48 - r - INFO: - ================================================================================
|
| 39 |
-
2023-04-06 17:03:48 - r - INFO: - n_states: 4, n_actions: 2
|
| 40 |
-
2023-04-06 17:03:48 - r - INFO: - Start testing!
|
| 41 |
-
2023-04-06 17:03:48 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
| 42 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
|
| 43 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
|
| 44 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
|
| 45 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
|
| 46 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
|
| 47 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
|
| 48 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
|
| 49 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
|
| 50 |
-
2023-04-06 17:03:48 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
|
| 51 |
-
2023-04-06 17:03:49 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
|
| 52 |
-
2023-04-06 17:03:49 - r - INFO: - Finish testing!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/learning_curve.png
DELETED
|
Binary file (27.7 kB)
|
|
|
CartPole-v1/Test_CartPole-v1_DoubleDQN_ray_20230406-170348/results/res.csv
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,200.0,200
|
| 3 |
-
1,200.0,200
|
| 4 |
-
2,200.0,200
|
| 5 |
-
3,200.0,200
|
| 6 |
-
4,200.0,200
|
| 7 |
-
5,200.0,200
|
| 8 |
-
6,200.0,200
|
| 9 |
-
7,200.0,200
|
| 10 |
-
8,200.0,200
|
| 11 |
-
9,200.0,200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410 → Test_single_CartPole-v1_DoubleDQN_20230516-115305}/config.yaml
RENAMED
|
@@ -1,46 +1,43 @@
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DoubleDQN
|
|
|
|
| 3 |
device: cpu
|
| 4 |
-
env_name:
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
load_checkpoint: true
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
mode: test
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
save_fig: true
|
| 17 |
seed: 1
|
| 18 |
-
show_fig: false
|
| 19 |
-
test_eps: 10
|
| 20 |
-
train_eps: 400
|
| 21 |
-
wrapper: null
|
| 22 |
algo_cfg:
|
| 23 |
batch_size: 64
|
| 24 |
buffer_size: 100000
|
|
|
|
| 25 |
epsilon_decay: 500
|
| 26 |
epsilon_end: 0.01
|
| 27 |
epsilon_start: 0.95
|
| 28 |
-
gamma: 0.
|
| 29 |
lr: 0.0001
|
| 30 |
target_update: 4
|
| 31 |
value_layers:
|
| 32 |
- activation: relu
|
| 33 |
layer_dim:
|
| 34 |
-
- n_states
|
| 35 |
- 256
|
| 36 |
layer_type: linear
|
| 37 |
- activation: relu
|
| 38 |
layer_dim:
|
| 39 |
- 256
|
| 40 |
-
- 256
|
| 41 |
-
layer_type: linear
|
| 42 |
-
- activation: none
|
| 43 |
-
layer_dim:
|
| 44 |
-
- 256
|
| 45 |
-
- n_actions
|
| 46 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DoubleDQN
|
| 3 |
+
collect_traj: false
|
| 4 |
device: cpu
|
| 5 |
+
env_name: gym
|
|
|
|
|
|
|
| 6 |
load_checkpoint: true
|
| 7 |
+
load_model_step: best
|
| 8 |
+
load_path: Train_single_CartPole-v1_DoubleDQN_20230516-114540
|
| 9 |
+
max_episode: 10
|
| 10 |
+
max_step: 200
|
| 11 |
mode: test
|
| 12 |
+
model_save_fre: 500
|
| 13 |
+
mp_backend: single
|
| 14 |
+
n_workers: 2
|
| 15 |
+
online_eval: true
|
| 16 |
+
online_eval_episode: 10
|
|
|
|
| 17 |
seed: 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
algo_cfg:
|
| 19 |
batch_size: 64
|
| 20 |
buffer_size: 100000
|
| 21 |
+
buffer_type: REPLAY_QUE
|
| 22 |
epsilon_decay: 500
|
| 23 |
epsilon_end: 0.01
|
| 24 |
epsilon_start: 0.95
|
| 25 |
+
gamma: 0.99
|
| 26 |
lr: 0.0001
|
| 27 |
target_update: 4
|
| 28 |
value_layers:
|
| 29 |
- activation: relu
|
| 30 |
layer_dim:
|
|
|
|
| 31 |
- 256
|
| 32 |
layer_type: linear
|
| 33 |
- activation: relu
|
| 34 |
layer_dim:
|
| 35 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
layer_type: linear
|
| 37 |
+
env_cfg:
|
| 38 |
+
id: CartPole-v1
|
| 39 |
+
ignore_params:
|
| 40 |
+
- wrapper
|
| 41 |
+
- ignore_params
|
| 42 |
+
render_mode: null
|
| 43 |
+
wrapper: null
|
CartPole-v1/Test_single_CartPole-v1_DoubleDQN_20230516-115305/logs/log.txt
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - General Configs:
|
| 2 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
| 3 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
|
| 4 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - env_name gym <class 'str'>
|
| 5 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
|
| 6 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - mode test <class 'str'>
|
| 7 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - device cpu <class 'str'>
|
| 8 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - seed 1 <class 'int'>
|
| 9 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - max_episode 10 <class 'int'>
|
| 10 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
| 11 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
| 12 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
| 13 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
| 14 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
| 15 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
| 16 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
| 17 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - load_checkpoint 1 <class 'bool'>
|
| 18 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - load_path Train_single_CartPole-v1_DoubleDQN_20230516-114540 <class 'str'>
|
| 19 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
| 20 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
| 21 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Algo Configs:
|
| 22 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
| 23 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
|
| 24 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
| 28 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
| 31 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
| 32 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
| 33 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
| 34 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
| 35 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Env Configs:
|
| 36 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
| 37 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Name Value Type
|
| 38 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
| 39 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - render_mode None <class 'str'>
|
| 40 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - wrapper None <class 'str'>
|
| 41 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
| 42 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - ================================================================================
|
| 43 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
| 44 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Start testing!
|
| 45 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 0, ep_reward: 200.0, ep_step: 200
|
| 46 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 1, ep_reward: 200.0, ep_step: 200
|
| 47 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 2, ep_reward: 200.0, ep_step: 200
|
| 48 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 3, ep_reward: 200.0, ep_step: 200
|
| 49 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 4, ep_reward: 200.0, ep_step: 200
|
| 50 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 5, ep_reward: 200.0, ep_step: 200
|
| 51 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 6, ep_reward: 200.0, ep_step: 200
|
| 52 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 7, ep_reward: 200.0, ep_step: 200
|
| 53 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 8, ep_reward: 200.0, ep_step: 200
|
| 54 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - episode: 9, ep_reward: 200.0, ep_step: 200
|
| 55 |
+
2023-05-16 11:53:05 - SimpleLog - INFO: - Finish testing! total time consumed: 0.24s
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_mp_20230406-160410/models/checkpoint.pth → Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/interact/events.out.tfevents.1684209185.JMac.local.52313.0}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:350321a3436f2c600f7c9a0f8ba02ba28a6ad9c6e949481d6926ca5daf32d79e
|
| 3 |
+
size 1056
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_ray_20230406-170348/models/checkpoint.pt → Test_single_CartPole-v1_DoubleDQN_20230516-115305/tb_logs/model/events.out.tfevents.1684209185.JMac.local.52313.1}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25447bc5c9337e3d33f456f66eb8230e83dcc359ad3630edde9c63f21baefd4d
|
| 3 |
+
size 40
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/config.yaml
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
general_cfg:
|
| 2 |
-
algo_name: DoubleDQN
|
| 3 |
-
device: cuda
|
| 4 |
-
env_name: CartPole-v1
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
-
load_checkpoint: false
|
| 8 |
-
load_path: Train_CartPole-v1_DQN_20221026-054757
|
| 9 |
-
max_steps: 200
|
| 10 |
-
mode: train
|
| 11 |
-
save_fig: true
|
| 12 |
-
seed: 1
|
| 13 |
-
show_fig: false
|
| 14 |
-
test_eps: 10
|
| 15 |
-
train_eps: 100
|
| 16 |
-
algo_cfg:
|
| 17 |
-
batch_size: 64
|
| 18 |
-
buffer_size: 100000
|
| 19 |
-
epsilon_decay: 500
|
| 20 |
-
epsilon_end: 0.01
|
| 21 |
-
epsilon_start: 0.95
|
| 22 |
-
gamma: 0.99
|
| 23 |
-
lr: 0.0001
|
| 24 |
-
target_update: 4
|
| 25 |
-
value_layers:
|
| 26 |
-
- activation: relu
|
| 27 |
-
layer_dim:
|
| 28 |
-
- n_states
|
| 29 |
-
- 256
|
| 30 |
-
layer_type: linear
|
| 31 |
-
- activation: relu
|
| 32 |
-
layer_dim:
|
| 33 |
-
- 256
|
| 34 |
-
- 256
|
| 35 |
-
layer_type: linear
|
| 36 |
-
- activation: none
|
| 37 |
-
layer_dim:
|
| 38 |
-
- 256
|
| 39 |
-
- n_actions
|
| 40 |
-
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/logs/log.txt
DELETED
|
@@ -1,116 +0,0 @@
|
|
| 1 |
-
2022-11-22 12:55:16 - r - INFO: - n_states: 4, n_actions: 2
|
| 2 |
-
2022-11-22 12:55:19 - r - INFO: - Start training!
|
| 3 |
-
2022-11-22 12:55:19 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cuda
|
| 4 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 1/100, Reward: 18.000, Step: 18
|
| 5 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 2/100, Reward: 35.000, Step: 35
|
| 6 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 3/100, Reward: 13.000, Step: 13
|
| 7 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 4/100, Reward: 32.000, Step: 32
|
| 8 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 5/100, Reward: 16.000, Step: 16
|
| 9 |
-
2022-11-22 12:55:19 - r - INFO: - Current episode 5 has the best eval reward: 9.100
|
| 10 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 6/100, Reward: 9.000, Step: 9
|
| 11 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 7/100, Reward: 12.000, Step: 12
|
| 12 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 8/100, Reward: 16.000, Step: 16
|
| 13 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 9/100, Reward: 14.000, Step: 14
|
| 14 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 10/100, Reward: 12.000, Step: 12
|
| 15 |
-
2022-11-22 12:55:19 - r - INFO: - Current episode 10 has the best eval reward: 9.200
|
| 16 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 11/100, Reward: 13.000, Step: 13
|
| 17 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 12/100, Reward: 14.000, Step: 14
|
| 18 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 13/100, Reward: 19.000, Step: 19
|
| 19 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 14/100, Reward: 9.000, Step: 9
|
| 20 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 15/100, Reward: 15.000, Step: 15
|
| 21 |
-
2022-11-22 12:55:19 - r - INFO: - Current episode 15 has the best eval reward: 9.300
|
| 22 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 16/100, Reward: 12.000, Step: 12
|
| 23 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 17/100, Reward: 11.000, Step: 11
|
| 24 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 18/100, Reward: 9.000, Step: 9
|
| 25 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 19/100, Reward: 13.000, Step: 13
|
| 26 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 20/100, Reward: 17.000, Step: 17
|
| 27 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 21/100, Reward: 13.000, Step: 13
|
| 28 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 22/100, Reward: 15.000, Step: 15
|
| 29 |
-
2022-11-22 12:55:19 - r - INFO: - Episode: 23/100, Reward: 22.000, Step: 22
|
| 30 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 24/100, Reward: 26.000, Step: 26
|
| 31 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 25/100, Reward: 19.000, Step: 19
|
| 32 |
-
2022-11-22 12:55:20 - r - INFO: - Current episode 25 has the best eval reward: 9.800
|
| 33 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 26/100, Reward: 10.000, Step: 10
|
| 34 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 27/100, Reward: 10.000, Step: 10
|
| 35 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 28/100, Reward: 11.000, Step: 11
|
| 36 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 29/100, Reward: 13.000, Step: 13
|
| 37 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 30/100, Reward: 16.000, Step: 16
|
| 38 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 31/100, Reward: 13.000, Step: 13
|
| 39 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 32/100, Reward: 15.000, Step: 15
|
| 40 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 33/100, Reward: 12.000, Step: 12
|
| 41 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 34/100, Reward: 13.000, Step: 13
|
| 42 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 35/100, Reward: 13.000, Step: 13
|
| 43 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 36/100, Reward: 11.000, Step: 11
|
| 44 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 37/100, Reward: 9.000, Step: 9
|
| 45 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 38/100, Reward: 9.000, Step: 9
|
| 46 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 39/100, Reward: 10.000, Step: 10
|
| 47 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 40/100, Reward: 14.000, Step: 14
|
| 48 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 41/100, Reward: 9.000, Step: 9
|
| 49 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 42/100, Reward: 10.000, Step: 10
|
| 50 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 43/100, Reward: 9.000, Step: 9
|
| 51 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 44/100, Reward: 14.000, Step: 14
|
| 52 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 45/100, Reward: 10.000, Step: 10
|
| 53 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 46/100, Reward: 19.000, Step: 19
|
| 54 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 47/100, Reward: 10.000, Step: 10
|
| 55 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 48/100, Reward: 14.000, Step: 14
|
| 56 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 49/100, Reward: 18.000, Step: 18
|
| 57 |
-
2022-11-22 12:55:20 - r - INFO: - Episode: 50/100, Reward: 32.000, Step: 32
|
| 58 |
-
2022-11-22 12:55:20 - r - INFO: - Current episode 50 has the best eval reward: 24.300
|
| 59 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 51/100, Reward: 17.000, Step: 17
|
| 60 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 52/100, Reward: 15.000, Step: 15
|
| 61 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 53/100, Reward: 18.000, Step: 18
|
| 62 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 54/100, Reward: 14.000, Step: 14
|
| 63 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 55/100, Reward: 22.000, Step: 22
|
| 64 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 56/100, Reward: 14.000, Step: 14
|
| 65 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 57/100, Reward: 21.000, Step: 21
|
| 66 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 58/100, Reward: 21.000, Step: 21
|
| 67 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 59/100, Reward: 23.000, Step: 23
|
| 68 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 60/100, Reward: 21.000, Step: 21
|
| 69 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 61/100, Reward: 21.000, Step: 21
|
| 70 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 62/100, Reward: 35.000, Step: 35
|
| 71 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 63/100, Reward: 23.000, Step: 23
|
| 72 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 64/100, Reward: 27.000, Step: 27
|
| 73 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 65/100, Reward: 24.000, Step: 24
|
| 74 |
-
2022-11-22 12:55:21 - r - INFO: - Current episode 65 has the best eval reward: 29.700
|
| 75 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 66/100, Reward: 28.000, Step: 28
|
| 76 |
-
2022-11-22 12:55:21 - r - INFO: - Episode: 67/100, Reward: 30.000, Step: 30
|
| 77 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 68/100, Reward: 33.000, Step: 33
|
| 78 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 69/100, Reward: 33.000, Step: 33
|
| 79 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 70/100, Reward: 26.000, Step: 26
|
| 80 |
-
2022-11-22 12:55:22 - r - INFO: - Current episode 70 has the best eval reward: 34.400
|
| 81 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 71/100, Reward: 37.000, Step: 37
|
| 82 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 72/100, Reward: 28.000, Step: 28
|
| 83 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 73/100, Reward: 30.000, Step: 30
|
| 84 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 74/100, Reward: 41.000, Step: 41
|
| 85 |
-
2022-11-22 12:55:22 - r - INFO: - Episode: 75/100, Reward: 45.000, Step: 45
|
| 86 |
-
2022-11-22 12:55:22 - r - INFO: - Current episode 75 has the best eval reward: 35.600
|
| 87 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 76/100, Reward: 68.000, Step: 68
|
| 88 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 77/100, Reward: 33.000, Step: 33
|
| 89 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 78/100, Reward: 46.000, Step: 46
|
| 90 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 79/100, Reward: 54.000, Step: 54
|
| 91 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 80/100, Reward: 37.000, Step: 37
|
| 92 |
-
2022-11-22 12:55:23 - r - INFO: - Current episode 80 has the best eval reward: 42.800
|
| 93 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 81/100, Reward: 43.000, Step: 43
|
| 94 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 82/100, Reward: 79.000, Step: 79
|
| 95 |
-
2022-11-22 12:55:23 - r - INFO: - Episode: 83/100, Reward: 36.000, Step: 36
|
| 96 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 84/100, Reward: 58.000, Step: 58
|
| 97 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 85/100, Reward: 42.000, Step: 42
|
| 98 |
-
2022-11-22 12:55:24 - r - INFO: - Current episode 85 has the best eval reward: 62.100
|
| 99 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 86/100, Reward: 136.000, Step: 136
|
| 100 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 87/100, Reward: 57.000, Step: 57
|
| 101 |
-
2022-11-22 12:55:24 - r - INFO: - Episode: 88/100, Reward: 46.000, Step: 46
|
| 102 |
-
2022-11-22 12:55:25 - r - INFO: - Episode: 89/100, Reward: 105.000, Step: 105
|
| 103 |
-
2022-11-22 12:55:25 - r - INFO: - Episode: 90/100, Reward: 63.000, Step: 63
|
| 104 |
-
2022-11-22 12:55:25 - r - INFO: - Current episode 90 has the best eval reward: 76.600
|
| 105 |
-
2022-11-22 12:55:25 - r - INFO: - Episode: 91/100, Reward: 84.000, Step: 84
|
| 106 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 92/100, Reward: 136.000, Step: 136
|
| 107 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 93/100, Reward: 121.000, Step: 121
|
| 108 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 94/100, Reward: 96.000, Step: 96
|
| 109 |
-
2022-11-22 12:55:26 - r - INFO: - Episode: 95/100, Reward: 106.000, Step: 106
|
| 110 |
-
2022-11-22 12:55:27 - r - INFO: - Current episode 95 has the best eval reward: 187.300
|
| 111 |
-
2022-11-22 12:55:27 - r - INFO: - Episode: 96/100, Reward: 200.000, Step: 200
|
| 112 |
-
2022-11-22 12:55:28 - r - INFO: - Episode: 97/100, Reward: 200.000, Step: 200
|
| 113 |
-
2022-11-22 12:55:28 - r - INFO: - Episode: 98/100, Reward: 113.000, Step: 113
|
| 114 |
-
2022-11-22 12:55:28 - r - INFO: - Episode: 99/100, Reward: 113.000, Step: 113
|
| 115 |
-
2022-11-22 12:55:29 - r - INFO: - Episode: 100/100, Reward: 132.000, Step: 132
|
| 116 |
-
2022-11-22 12:55:29 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/learning_curve.png
DELETED
|
Binary file (47.3 kB)
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_20221122-125516/results/res.csv
DELETED
|
@@ -1,101 +0,0 @@
|
|
| 1 |
-
episodes,rewards,steps
|
| 2 |
-
0,18.0,18
|
| 3 |
-
1,35.0,35
|
| 4 |
-
2,13.0,13
|
| 5 |
-
3,32.0,32
|
| 6 |
-
4,16.0,16
|
| 7 |
-
5,9.0,9
|
| 8 |
-
6,12.0,12
|
| 9 |
-
7,16.0,16
|
| 10 |
-
8,14.0,14
|
| 11 |
-
9,12.0,12
|
| 12 |
-
10,13.0,13
|
| 13 |
-
11,14.0,14
|
| 14 |
-
12,19.0,19
|
| 15 |
-
13,9.0,9
|
| 16 |
-
14,15.0,15
|
| 17 |
-
15,12.0,12
|
| 18 |
-
16,11.0,11
|
| 19 |
-
17,9.0,9
|
| 20 |
-
18,13.0,13
|
| 21 |
-
19,17.0,17
|
| 22 |
-
20,13.0,13
|
| 23 |
-
21,15.0,15
|
| 24 |
-
22,22.0,22
|
| 25 |
-
23,26.0,26
|
| 26 |
-
24,19.0,19
|
| 27 |
-
25,10.0,10
|
| 28 |
-
26,10.0,10
|
| 29 |
-
27,11.0,11
|
| 30 |
-
28,13.0,13
|
| 31 |
-
29,16.0,16
|
| 32 |
-
30,13.0,13
|
| 33 |
-
31,15.0,15
|
| 34 |
-
32,12.0,12
|
| 35 |
-
33,13.0,13
|
| 36 |
-
34,13.0,13
|
| 37 |
-
35,11.0,11
|
| 38 |
-
36,9.0,9
|
| 39 |
-
37,9.0,9
|
| 40 |
-
38,10.0,10
|
| 41 |
-
39,14.0,14
|
| 42 |
-
40,9.0,9
|
| 43 |
-
41,10.0,10
|
| 44 |
-
42,9.0,9
|
| 45 |
-
43,14.0,14
|
| 46 |
-
44,10.0,10
|
| 47 |
-
45,19.0,19
|
| 48 |
-
46,10.0,10
|
| 49 |
-
47,14.0,14
|
| 50 |
-
48,18.0,18
|
| 51 |
-
49,32.0,32
|
| 52 |
-
50,17.0,17
|
| 53 |
-
51,15.0,15
|
| 54 |
-
52,18.0,18
|
| 55 |
-
53,14.0,14
|
| 56 |
-
54,22.0,22
|
| 57 |
-
55,14.0,14
|
| 58 |
-
56,21.0,21
|
| 59 |
-
57,21.0,21
|
| 60 |
-
58,23.0,23
|
| 61 |
-
59,21.0,21
|
| 62 |
-
60,21.0,21
|
| 63 |
-
61,35.0,35
|
| 64 |
-
62,23.0,23
|
| 65 |
-
63,27.0,27
|
| 66 |
-
64,24.0,24
|
| 67 |
-
65,28.0,28
|
| 68 |
-
66,30.0,30
|
| 69 |
-
67,33.0,33
|
| 70 |
-
68,33.0,33
|
| 71 |
-
69,26.0,26
|
| 72 |
-
70,37.0,37
|
| 73 |
-
71,28.0,28
|
| 74 |
-
72,30.0,30
|
| 75 |
-
73,41.0,41
|
| 76 |
-
74,45.0,45
|
| 77 |
-
75,68.0,68
|
| 78 |
-
76,33.0,33
|
| 79 |
-
77,46.0,46
|
| 80 |
-
78,54.0,54
|
| 81 |
-
79,37.0,37
|
| 82 |
-
80,43.0,43
|
| 83 |
-
81,79.0,79
|
| 84 |
-
82,36.0,36
|
| 85 |
-
83,58.0,58
|
| 86 |
-
84,42.0,42
|
| 87 |
-
85,136.0,136
|
| 88 |
-
86,57.0,57
|
| 89 |
-
87,46.0,46
|
| 90 |
-
88,105.0,105
|
| 91 |
-
89,63.0,63
|
| 92 |
-
90,84.0,84
|
| 93 |
-
91,136.0,136
|
| 94 |
-
92,121.0,121
|
| 95 |
-
93,96.0,96
|
| 96 |
-
94,106.0,106
|
| 97 |
-
95,200.0,200
|
| 98 |
-
96,200.0,200
|
| 99 |
-
97,113.0,113
|
| 100 |
-
98,113.0,113
|
| 101 |
-
99,132.0,132
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/logs/log.txt
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
2023-04-06 16:00:28 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-06 16:00:28 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-06 16:00:28 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-06 16:00:28 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-06 16:00:28 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-06 16:00:28 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-06 16:00:28 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-06 16:00:28 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-06 16:00:28 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
| 10 |
-
2023-04-06 16:00:28 - r - INFO: - mode train <class 'str'>
|
| 11 |
-
2023-04-06 16:00:28 - r - INFO: - mp_backend mp <class 'str'>
|
| 12 |
-
2023-04-06 16:00:28 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-06 16:00:28 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-06 16:00:28 - r - INFO: - train_eps 400 <class 'int'>
|
| 15 |
-
2023-04-06 16:00:28 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-06 16:00:28 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-06 16:00:28 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-06 16:00:28 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-06 16:00:28 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
| 20 |
-
2023-04-06 16:00:28 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
| 21 |
-
2023-04-06 16:00:28 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-06 16:00:28 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-06 16:00:28 - r - INFO: - n_workers 2 <class 'int'>
|
| 24 |
-
2023-04-06 16:00:28 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-06 16:00:28 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-06 16:00:28 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-06 16:00:28 - r - INFO: - gamma 0.95 <class 'float'>
|
| 28 |
-
2023-04-06 16:00:28 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-06 16:00:28 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-06 16:00:28 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-06 16:00:28 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-06 16:00:28 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-06 16:00:28 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028 <class 'str'>
|
| 34 |
-
2023-04-06 16:00:28 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/results <class 'str'>
|
| 35 |
-
2023-04-06 16:00:28 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/logs <class 'str'>
|
| 36 |
-
2023-04-06 16:00:28 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/traj <class 'str'>
|
| 37 |
-
2023-04-06 16:00:28 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-160028/videos <class 'str'>
|
| 38 |
-
2023-04-06 16:00:28 - r - INFO: - ================================================================================
|
| 39 |
-
2023-04-06 16:00:28 - r - INFO: - n_states: 4, n_actions: 2
|
| 40 |
-
2023-04-06 16:00:28 - r - INFO: - Start training!
|
| 41 |
-
2023-04-06 16:00:28 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
| 42 |
-
2023-04-06 16:01:56 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/models/checkpoint.pth
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:4cd537f6ab4ac0ff27caa323076685e3a3fff04b064dbdfc509baae76e9a9406
|
| 3 |
-
size 272407
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/learning_curve.png
DELETED
|
Binary file (44.9 kB)
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_mp_20230406-160028/results/res.csv
DELETED
|
@@ -1,402 +0,0 @@
|
|
| 1 |
-
episodes,rewards
|
| 2 |
-
0,18.0
|
| 3 |
-
1,39.0
|
| 4 |
-
2,18.0
|
| 5 |
-
3,28.0
|
| 6 |
-
4,15.0
|
| 7 |
-
5,12.0
|
| 8 |
-
6,15.0
|
| 9 |
-
7,40.0
|
| 10 |
-
8,31.0
|
| 11 |
-
9,17.0
|
| 12 |
-
10,17.0
|
| 13 |
-
11,31.0
|
| 14 |
-
12,13.0
|
| 15 |
-
13,14.0
|
| 16 |
-
14,15.0
|
| 17 |
-
15,21.0
|
| 18 |
-
16,9.0
|
| 19 |
-
17,10.0
|
| 20 |
-
18,22.0
|
| 21 |
-
19,19.0
|
| 22 |
-
20,11.0
|
| 23 |
-
21,13.0
|
| 24 |
-
22,15.0
|
| 25 |
-
23,20.0
|
| 26 |
-
24,14.0
|
| 27 |
-
25,12.0
|
| 28 |
-
26,11.0
|
| 29 |
-
27,10.0
|
| 30 |
-
28,14.0
|
| 31 |
-
29,12.0
|
| 32 |
-
30,9.0
|
| 33 |
-
31,10.0
|
| 34 |
-
32,16.0
|
| 35 |
-
33,13.0
|
| 36 |
-
34,12.0
|
| 37 |
-
35,15.0
|
| 38 |
-
36,10.0
|
| 39 |
-
37,17.0
|
| 40 |
-
38,10.0
|
| 41 |
-
39,10.0
|
| 42 |
-
40,13.0
|
| 43 |
-
41,12.0
|
| 44 |
-
42,10.0
|
| 45 |
-
43,12.0
|
| 46 |
-
44,12.0
|
| 47 |
-
45,12.0
|
| 48 |
-
46,9.0
|
| 49 |
-
47,10.0
|
| 50 |
-
48,10.0
|
| 51 |
-
49,11.0
|
| 52 |
-
50,9.0
|
| 53 |
-
51,13.0
|
| 54 |
-
52,9.0
|
| 55 |
-
53,10.0
|
| 56 |
-
54,10.0
|
| 57 |
-
55,11.0
|
| 58 |
-
56,10.0
|
| 59 |
-
57,10.0
|
| 60 |
-
58,14.0
|
| 61 |
-
59,9.0
|
| 62 |
-
60,10.0
|
| 63 |
-
61,10.0
|
| 64 |
-
62,9.0
|
| 65 |
-
63,12.0
|
| 66 |
-
64,10.0
|
| 67 |
-
65,13.0
|
| 68 |
-
66,12.0
|
| 69 |
-
67,15.0
|
| 70 |
-
68,10.0
|
| 71 |
-
69,13.0
|
| 72 |
-
70,14.0
|
| 73 |
-
71,10.0
|
| 74 |
-
72,30.0
|
| 75 |
-
73,11.0
|
| 76 |
-
74,9.0
|
| 77 |
-
75,11.0
|
| 78 |
-
76,9.0
|
| 79 |
-
77,11.0
|
| 80 |
-
78,9.0
|
| 81 |
-
79,11.0
|
| 82 |
-
80,16.0
|
| 83 |
-
81,21.0
|
| 84 |
-
82,10.0
|
| 85 |
-
83,9.0
|
| 86 |
-
84,14.0
|
| 87 |
-
85,9.0
|
| 88 |
-
86,13.0
|
| 89 |
-
87,9.0
|
| 90 |
-
88,13.0
|
| 91 |
-
89,17.0
|
| 92 |
-
90,26.0
|
| 93 |
-
91,32.0
|
| 94 |
-
92,14.0
|
| 95 |
-
93,17.0
|
| 96 |
-
94,11.0
|
| 97 |
-
95,34.0
|
| 98 |
-
96,10.0
|
| 99 |
-
97,23.0
|
| 100 |
-
98,14.0
|
| 101 |
-
99,37.0
|
| 102 |
-
100,27.0
|
| 103 |
-
101,34.0
|
| 104 |
-
102,23.0
|
| 105 |
-
103,59.0
|
| 106 |
-
104,9.0
|
| 107 |
-
105,91.0
|
| 108 |
-
106,61.0
|
| 109 |
-
107,47.0
|
| 110 |
-
108,21.0
|
| 111 |
-
109,27.0
|
| 112 |
-
110,22.0
|
| 113 |
-
111,30.0
|
| 114 |
-
112,22.0
|
| 115 |
-
113,29.0
|
| 116 |
-
114,25.0
|
| 117 |
-
115,68.0
|
| 118 |
-
116,102.0
|
| 119 |
-
117,54.0
|
| 120 |
-
118,46.0
|
| 121 |
-
119,34.0
|
| 122 |
-
120,61.0
|
| 123 |
-
121,81.0
|
| 124 |
-
122,55.0
|
| 125 |
-
123,67.0
|
| 126 |
-
124,71.0
|
| 127 |
-
125,46.0
|
| 128 |
-
126,88.0
|
| 129 |
-
127,90.0
|
| 130 |
-
128,68.0
|
| 131 |
-
129,114.0
|
| 132 |
-
130,66.0
|
| 133 |
-
131,102.0
|
| 134 |
-
132,100.0
|
| 135 |
-
133,88.0
|
| 136 |
-
134,80.0
|
| 137 |
-
135,81.0
|
| 138 |
-
136,49.0
|
| 139 |
-
137,123.0
|
| 140 |
-
138,197.0
|
| 141 |
-
139,146.0
|
| 142 |
-
140,93.0
|
| 143 |
-
141,135.0
|
| 144 |
-
142,117.0
|
| 145 |
-
143,104.0
|
| 146 |
-
144,168.0
|
| 147 |
-
145,114.0
|
| 148 |
-
146,82.0
|
| 149 |
-
147,153.0
|
| 150 |
-
148,106.0
|
| 151 |
-
149,140.0
|
| 152 |
-
150,100.0
|
| 153 |
-
151,120.0
|
| 154 |
-
152,117.0
|
| 155 |
-
153,173.0
|
| 156 |
-
154,200.0
|
| 157 |
-
155,142.0
|
| 158 |
-
156,180.0
|
| 159 |
-
157,156.0
|
| 160 |
-
158,149.0
|
| 161 |
-
159,173.0
|
| 162 |
-
160,187.0
|
| 163 |
-
161,200.0
|
| 164 |
-
162,188.0
|
| 165 |
-
163,156.0
|
| 166 |
-
164,170.0
|
| 167 |
-
165,158.0
|
| 168 |
-
166,200.0
|
| 169 |
-
167,152.0
|
| 170 |
-
168,194.0
|
| 171 |
-
169,196.0
|
| 172 |
-
170,189.0
|
| 173 |
-
171,200.0
|
| 174 |
-
172,173.0
|
| 175 |
-
173,200.0
|
| 176 |
-
174,154.0
|
| 177 |
-
175,200.0
|
| 178 |
-
176,200.0
|
| 179 |
-
177,200.0
|
| 180 |
-
178,189.0
|
| 181 |
-
179,194.0
|
| 182 |
-
180,199.0
|
| 183 |
-
181,200.0
|
| 184 |
-
182,200.0
|
| 185 |
-
183,189.0
|
| 186 |
-
184,200.0
|
| 187 |
-
185,200.0
|
| 188 |
-
186,200.0
|
| 189 |
-
187,200.0
|
| 190 |
-
188,200.0
|
| 191 |
-
189,200.0
|
| 192 |
-
190,200.0
|
| 193 |
-
191,200.0
|
| 194 |
-
192,200.0
|
| 195 |
-
193,200.0
|
| 196 |
-
194,200.0
|
| 197 |
-
195,189.0
|
| 198 |
-
196,198.0
|
| 199 |
-
197,195.0
|
| 200 |
-
198,199.0
|
| 201 |
-
199,200.0
|
| 202 |
-
200,200.0
|
| 203 |
-
201,200.0
|
| 204 |
-
202,198.0
|
| 205 |
-
203,196.0
|
| 206 |
-
204,200.0
|
| 207 |
-
205,200.0
|
| 208 |
-
206,200.0
|
| 209 |
-
207,200.0
|
| 210 |
-
208,200.0
|
| 211 |
-
209,200.0
|
| 212 |
-
210,195.0
|
| 213 |
-
211,198.0
|
| 214 |
-
212,200.0
|
| 215 |
-
213,200.0
|
| 216 |
-
214,200.0
|
| 217 |
-
215,200.0
|
| 218 |
-
216,200.0
|
| 219 |
-
217,194.0
|
| 220 |
-
218,200.0
|
| 221 |
-
219,200.0
|
| 222 |
-
220,200.0
|
| 223 |
-
221,200.0
|
| 224 |
-
222,200.0
|
| 225 |
-
223,200.0
|
| 226 |
-
224,197.0
|
| 227 |
-
225,200.0
|
| 228 |
-
226,200.0
|
| 229 |
-
227,200.0
|
| 230 |
-
228,199.0
|
| 231 |
-
229,200.0
|
| 232 |
-
230,200.0
|
| 233 |
-
231,198.0
|
| 234 |
-
232,200.0
|
| 235 |
-
233,200.0
|
| 236 |
-
234,197.0
|
| 237 |
-
235,200.0
|
| 238 |
-
236,200.0
|
| 239 |
-
237,200.0
|
| 240 |
-
238,200.0
|
| 241 |
-
239,196.0
|
| 242 |
-
240,200.0
|
| 243 |
-
241,200.0
|
| 244 |
-
242,195.0
|
| 245 |
-
243,200.0
|
| 246 |
-
244,200.0
|
| 247 |
-
245,200.0
|
| 248 |
-
246,200.0
|
| 249 |
-
247,200.0
|
| 250 |
-
248,200.0
|
| 251 |
-
249,200.0
|
| 252 |
-
250,200.0
|
| 253 |
-
251,200.0
|
| 254 |
-
252,200.0
|
| 255 |
-
253,200.0
|
| 256 |
-
254,200.0
|
| 257 |
-
255,199.0
|
| 258 |
-
256,200.0
|
| 259 |
-
257,200.0
|
| 260 |
-
258,200.0
|
| 261 |
-
259,200.0
|
| 262 |
-
260,200.0
|
| 263 |
-
261,200.0
|
| 264 |
-
262,200.0
|
| 265 |
-
263,200.0
|
| 266 |
-
264,200.0
|
| 267 |
-
265,200.0
|
| 268 |
-
266,200.0
|
| 269 |
-
267,200.0
|
| 270 |
-
268,200.0
|
| 271 |
-
269,200.0
|
| 272 |
-
270,200.0
|
| 273 |
-
271,200.0
|
| 274 |
-
272,200.0
|
| 275 |
-
273,200.0
|
| 276 |
-
274,200.0
|
| 277 |
-
275,200.0
|
| 278 |
-
276,200.0
|
| 279 |
-
277,200.0
|
| 280 |
-
278,200.0
|
| 281 |
-
279,200.0
|
| 282 |
-
280,200.0
|
| 283 |
-
281,200.0
|
| 284 |
-
282,200.0
|
| 285 |
-
283,200.0
|
| 286 |
-
284,200.0
|
| 287 |
-
285,200.0
|
| 288 |
-
286,200.0
|
| 289 |
-
287,200.0
|
| 290 |
-
288,200.0
|
| 291 |
-
289,200.0
|
| 292 |
-
290,200.0
|
| 293 |
-
291,200.0
|
| 294 |
-
292,200.0
|
| 295 |
-
293,200.0
|
| 296 |
-
294,200.0
|
| 297 |
-
295,200.0
|
| 298 |
-
296,200.0
|
| 299 |
-
297,200.0
|
| 300 |
-
298,200.0
|
| 301 |
-
299,200.0
|
| 302 |
-
300,200.0
|
| 303 |
-
301,200.0
|
| 304 |
-
302,200.0
|
| 305 |
-
303,200.0
|
| 306 |
-
304,200.0
|
| 307 |
-
305,200.0
|
| 308 |
-
306,200.0
|
| 309 |
-
307,200.0
|
| 310 |
-
308,200.0
|
| 311 |
-
309,200.0
|
| 312 |
-
310,200.0
|
| 313 |
-
311,200.0
|
| 314 |
-
312,200.0
|
| 315 |
-
313,200.0
|
| 316 |
-
314,200.0
|
| 317 |
-
315,200.0
|
| 318 |
-
316,200.0
|
| 319 |
-
317,200.0
|
| 320 |
-
318,200.0
|
| 321 |
-
319,200.0
|
| 322 |
-
320,200.0
|
| 323 |
-
321,200.0
|
| 324 |
-
322,200.0
|
| 325 |
-
323,200.0
|
| 326 |
-
324,200.0
|
| 327 |
-
325,200.0
|
| 328 |
-
326,200.0
|
| 329 |
-
327,200.0
|
| 330 |
-
328,200.0
|
| 331 |
-
329,200.0
|
| 332 |
-
330,200.0
|
| 333 |
-
331,200.0
|
| 334 |
-
332,200.0
|
| 335 |
-
333,200.0
|
| 336 |
-
334,200.0
|
| 337 |
-
335,200.0
|
| 338 |
-
336,200.0
|
| 339 |
-
337,200.0
|
| 340 |
-
338,200.0
|
| 341 |
-
339,200.0
|
| 342 |
-
340,200.0
|
| 343 |
-
341,200.0
|
| 344 |
-
342,200.0
|
| 345 |
-
343,200.0
|
| 346 |
-
344,200.0
|
| 347 |
-
345,200.0
|
| 348 |
-
346,200.0
|
| 349 |
-
347,200.0
|
| 350 |
-
348,200.0
|
| 351 |
-
349,200.0
|
| 352 |
-
350,200.0
|
| 353 |
-
351,200.0
|
| 354 |
-
352,200.0
|
| 355 |
-
353,200.0
|
| 356 |
-
354,200.0
|
| 357 |
-
355,200.0
|
| 358 |
-
356,200.0
|
| 359 |
-
357,200.0
|
| 360 |
-
358,200.0
|
| 361 |
-
359,200.0
|
| 362 |
-
360,200.0
|
| 363 |
-
361,200.0
|
| 364 |
-
362,200.0
|
| 365 |
-
363,200.0
|
| 366 |
-
364,200.0
|
| 367 |
-
365,200.0
|
| 368 |
-
366,200.0
|
| 369 |
-
367,200.0
|
| 370 |
-
368,200.0
|
| 371 |
-
369,200.0
|
| 372 |
-
370,200.0
|
| 373 |
-
371,200.0
|
| 374 |
-
372,200.0
|
| 375 |
-
373,200.0
|
| 376 |
-
374,200.0
|
| 377 |
-
375,200.0
|
| 378 |
-
376,200.0
|
| 379 |
-
377,200.0
|
| 380 |
-
378,200.0
|
| 381 |
-
379,200.0
|
| 382 |
-
380,200.0
|
| 383 |
-
381,200.0
|
| 384 |
-
382,200.0
|
| 385 |
-
383,200.0
|
| 386 |
-
384,200.0
|
| 387 |
-
385,200.0
|
| 388 |
-
386,200.0
|
| 389 |
-
387,200.0
|
| 390 |
-
388,200.0
|
| 391 |
-
389,200.0
|
| 392 |
-
390,200.0
|
| 393 |
-
391,200.0
|
| 394 |
-
392,200.0
|
| 395 |
-
393,200.0
|
| 396 |
-
394,200.0
|
| 397 |
-
395,200.0
|
| 398 |
-
396,200.0
|
| 399 |
-
397,200.0
|
| 400 |
-
398,200.0
|
| 401 |
-
399,200.0
|
| 402 |
-
400,200.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/logs/log.txt
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
2023-04-06 16:29:38 - r - INFO: - Hyperparameters:
|
| 2 |
-
2023-04-06 16:29:38 - r - INFO: - ================================================================================
|
| 3 |
-
2023-04-06 16:29:38 - r - INFO: - Name Value Type
|
| 4 |
-
2023-04-06 16:29:38 - r - INFO: - env_name CartPole-v1 <class 'str'>
|
| 5 |
-
2023-04-06 16:29:38 - r - INFO: - new_step_api 1 <class 'bool'>
|
| 6 |
-
2023-04-06 16:29:38 - r - INFO: - wrapper None <class 'str'>
|
| 7 |
-
2023-04-06 16:29:38 - r - INFO: - render 0 <class 'bool'>
|
| 8 |
-
2023-04-06 16:29:38 - r - INFO: - render_mode human <class 'str'>
|
| 9 |
-
2023-04-06 16:29:38 - r - INFO: - algo_name DoubleDQN <class 'str'>
|
| 10 |
-
2023-04-06 16:29:38 - r - INFO: - mode train <class 'str'>
|
| 11 |
-
2023-04-06 16:29:38 - r - INFO: - mp_backend ray <class 'str'>
|
| 12 |
-
2023-04-06 16:29:38 - r - INFO: - seed 1 <class 'int'>
|
| 13 |
-
2023-04-06 16:29:38 - r - INFO: - device cpu <class 'str'>
|
| 14 |
-
2023-04-06 16:29:38 - r - INFO: - train_eps 400 <class 'int'>
|
| 15 |
-
2023-04-06 16:29:38 - r - INFO: - test_eps 10 <class 'int'>
|
| 16 |
-
2023-04-06 16:29:38 - r - INFO: - eval_eps 10 <class 'int'>
|
| 17 |
-
2023-04-06 16:29:38 - r - INFO: - eval_per_episode 5 <class 'int'>
|
| 18 |
-
2023-04-06 16:29:38 - r - INFO: - max_steps 200 <class 'int'>
|
| 19 |
-
2023-04-06 16:29:38 - r - INFO: - load_checkpoint 0 <class 'bool'>
|
| 20 |
-
2023-04-06 16:29:38 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
| 21 |
-
2023-04-06 16:29:38 - r - INFO: - show_fig 0 <class 'bool'>
|
| 22 |
-
2023-04-06 16:29:38 - r - INFO: - save_fig 1 <class 'bool'>
|
| 23 |
-
2023-04-06 16:29:38 - r - INFO: - n_workers 2 <class 'int'>
|
| 24 |
-
2023-04-06 16:29:38 - r - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
-
2023-04-06 16:29:38 - r - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
-
2023-04-06 16:29:38 - r - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
-
2023-04-06 16:29:38 - r - INFO: - gamma 0.95 <class 'float'>
|
| 28 |
-
2023-04-06 16:29:38 - r - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
-
2023-04-06 16:29:38 - r - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
-
2023-04-06 16:29:38 - r - INFO: - batch_size 64 <class 'int'>
|
| 31 |
-
2023-04-06 16:29:38 - r - INFO: - target_update 4 <class 'int'>
|
| 32 |
-
2023-04-06 16:29:38 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] <class 'str'>
|
| 33 |
-
2023-04-06 16:29:38 - r - INFO: - task_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938 <class 'str'>
|
| 34 |
-
2023-04-06 16:29:38 - r - INFO: - res_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/results <class 'str'>
|
| 35 |
-
2023-04-06 16:29:38 - r - INFO: - log_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/logs <class 'str'>
|
| 36 |
-
2023-04-06 16:29:38 - r - INFO: - traj_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/traj <class 'str'>
|
| 37 |
-
2023-04-06 16:29:38 - r - INFO: - video_dir /media/disk/gsc/joyrl-offline/tasks/Train_CartPole-v1_DoubleDQN_20230406-162938/videos <class 'str'>
|
| 38 |
-
2023-04-06 16:29:38 - r - INFO: - ================================================================================
|
| 39 |
-
2023-04-06 16:29:40 - r - INFO: - n_states: 4, n_actions: 2
|
| 40 |
-
2023-04-06 16:29:40 - r - INFO: - Start training!
|
| 41 |
-
2023-04-06 16:29:40 - r - INFO: - Env: CartPole-v1, Algorithm: DoubleDQN, Device: cpu
|
| 42 |
-
2023-04-06 16:37:19 - r - INFO: - Finish training!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/models/checkpoint.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7d3a724152cea263dec5b58d80bee101405e7b3268a34c265d414ebbd771c5ac
|
| 3 |
-
size 272407
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/learning_curve.png
DELETED
|
Binary file (58.2 kB)
|
|
|
CartPole-v1/Train_CartPole-v1_DoubleDQN_ray_20230406-162938/results/res.csv
DELETED
|
@@ -1,401 +0,0 @@
|
|
| 1 |
-
episodes,rewards
|
| 2 |
-
0,16.0
|
| 3 |
-
1,18.0
|
| 4 |
-
2,18.0
|
| 5 |
-
3,28.0
|
| 6 |
-
4,16.0
|
| 7 |
-
5,15.0
|
| 8 |
-
6,14.0
|
| 9 |
-
7,15.0
|
| 10 |
-
8,11.0
|
| 11 |
-
9,49.0
|
| 12 |
-
10,31.0
|
| 13 |
-
11,31.0
|
| 14 |
-
12,11.0
|
| 15 |
-
13,14.0
|
| 16 |
-
14,13.0
|
| 17 |
-
15,21.0
|
| 18 |
-
16,9.0
|
| 19 |
-
17,28.0
|
| 20 |
-
18,10.0
|
| 21 |
-
19,19.0
|
| 22 |
-
20,15.0
|
| 23 |
-
21,12.0
|
| 24 |
-
22,13.0
|
| 25 |
-
23,18.0
|
| 26 |
-
24,15.0
|
| 27 |
-
25,14.0
|
| 28 |
-
26,13.0
|
| 29 |
-
27,11.0
|
| 30 |
-
28,12.0
|
| 31 |
-
29,14.0
|
| 32 |
-
30,12.0
|
| 33 |
-
31,10.0
|
| 34 |
-
32,9.0
|
| 35 |
-
33,13.0
|
| 36 |
-
34,16.0
|
| 37 |
-
35,17.0
|
| 38 |
-
36,15.0
|
| 39 |
-
37,12.0
|
| 40 |
-
38,10.0
|
| 41 |
-
39,18.0
|
| 42 |
-
40,10.0
|
| 43 |
-
41,11.0
|
| 44 |
-
42,21.0
|
| 45 |
-
43,16.0
|
| 46 |
-
44,15.0
|
| 47 |
-
45,19.0
|
| 48 |
-
46,15.0
|
| 49 |
-
47,20.0
|
| 50 |
-
48,15.0
|
| 51 |
-
49,14.0
|
| 52 |
-
50,12.0
|
| 53 |
-
51,10.0
|
| 54 |
-
52,12.0
|
| 55 |
-
53,11.0
|
| 56 |
-
54,12.0
|
| 57 |
-
55,10.0
|
| 58 |
-
56,11.0
|
| 59 |
-
57,9.0
|
| 60 |
-
58,15.0
|
| 61 |
-
59,10.0
|
| 62 |
-
60,15.0
|
| 63 |
-
61,11.0
|
| 64 |
-
62,12.0
|
| 65 |
-
63,15.0
|
| 66 |
-
64,13.0
|
| 67 |
-
65,11.0
|
| 68 |
-
66,12.0
|
| 69 |
-
67,12.0
|
| 70 |
-
68,10.0
|
| 71 |
-
69,11.0
|
| 72 |
-
70,11.0
|
| 73 |
-
71,9.0
|
| 74 |
-
72,9.0
|
| 75 |
-
73,11.0
|
| 76 |
-
74,9.0
|
| 77 |
-
75,10.0
|
| 78 |
-
76,9.0
|
| 79 |
-
77,10.0
|
| 80 |
-
78,9.0
|
| 81 |
-
79,10.0
|
| 82 |
-
80,10.0
|
| 83 |
-
81,9.0
|
| 84 |
-
82,15.0
|
| 85 |
-
83,10.0
|
| 86 |
-
84,10.0
|
| 87 |
-
85,11.0
|
| 88 |
-
86,11.0
|
| 89 |
-
87,13.0
|
| 90 |
-
88,13.0
|
| 91 |
-
89,9.0
|
| 92 |
-
90,16.0
|
| 93 |
-
91,12.0
|
| 94 |
-
92,15.0
|
| 95 |
-
93,9.0
|
| 96 |
-
94,10.0
|
| 97 |
-
95,10.0
|
| 98 |
-
96,11.0
|
| 99 |
-
97,11.0
|
| 100 |
-
98,9.0
|
| 101 |
-
99,12.0
|
| 102 |
-
100,16.0
|
| 103 |
-
101,10.0
|
| 104 |
-
102,15.0
|
| 105 |
-
103,9.0
|
| 106 |
-
104,9.0
|
| 107 |
-
105,10.0
|
| 108 |
-
106,11.0
|
| 109 |
-
107,10.0
|
| 110 |
-
108,13.0
|
| 111 |
-
109,11.0
|
| 112 |
-
110,9.0
|
| 113 |
-
111,14.0
|
| 114 |
-
112,10.0
|
| 115 |
-
113,12.0
|
| 116 |
-
114,10.0
|
| 117 |
-
115,10.0
|
| 118 |
-
116,10.0
|
| 119 |
-
117,13.0
|
| 120 |
-
118,10.0
|
| 121 |
-
119,11.0
|
| 122 |
-
120,9.0
|
| 123 |
-
121,11.0
|
| 124 |
-
122,10.0
|
| 125 |
-
123,10.0
|
| 126 |
-
124,12.0
|
| 127 |
-
125,41.0
|
| 128 |
-
126,9.0
|
| 129 |
-
127,31.0
|
| 130 |
-
128,14.0
|
| 131 |
-
129,14.0
|
| 132 |
-
130,14.0
|
| 133 |
-
131,11.0
|
| 134 |
-
132,14.0
|
| 135 |
-
133,12.0
|
| 136 |
-
134,16.0
|
| 137 |
-
135,11.0
|
| 138 |
-
136,12.0
|
| 139 |
-
137,16.0
|
| 140 |
-
138,12.0
|
| 141 |
-
139,14.0
|
| 142 |
-
140,12.0
|
| 143 |
-
141,18.0
|
| 144 |
-
142,15.0
|
| 145 |
-
143,18.0
|
| 146 |
-
144,14.0
|
| 147 |
-
145,14.0
|
| 148 |
-
146,18.0
|
| 149 |
-
147,17.0
|
| 150 |
-
148,24.0
|
| 151 |
-
149,13.0
|
| 152 |
-
150,18.0
|
| 153 |
-
151,15.0
|
| 154 |
-
152,20.0
|
| 155 |
-
153,17.0
|
| 156 |
-
154,16.0
|
| 157 |
-
155,17.0
|
| 158 |
-
156,14.0
|
| 159 |
-
157,26.0
|
| 160 |
-
158,26.0
|
| 161 |
-
159,42.0
|
| 162 |
-
160,25.0
|
| 163 |
-
161,58.0
|
| 164 |
-
162,48.0
|
| 165 |
-
163,48.0
|
| 166 |
-
164,61.0
|
| 167 |
-
165,115.0
|
| 168 |
-
166,156.0
|
| 169 |
-
167,56.0
|
| 170 |
-
168,61.0
|
| 171 |
-
169,83.0
|
| 172 |
-
170,36.0
|
| 173 |
-
171,47.0
|
| 174 |
-
172,31.0
|
| 175 |
-
173,27.0
|
| 176 |
-
174,50.0
|
| 177 |
-
175,34.0
|
| 178 |
-
176,32.0
|
| 179 |
-
177,49.0
|
| 180 |
-
178,30.0
|
| 181 |
-
179,50.0
|
| 182 |
-
180,34.0
|
| 183 |
-
181,27.0
|
| 184 |
-
182,49.0
|
| 185 |
-
183,35.0
|
| 186 |
-
184,52.0
|
| 187 |
-
185,35.0
|
| 188 |
-
186,47.0
|
| 189 |
-
187,50.0
|
| 190 |
-
188,35.0
|
| 191 |
-
189,54.0
|
| 192 |
-
190,33.0
|
| 193 |
-
191,50.0
|
| 194 |
-
192,63.0
|
| 195 |
-
193,121.0
|
| 196 |
-
194,86.0
|
| 197 |
-
195,46.0
|
| 198 |
-
196,54.0
|
| 199 |
-
197,42.0
|
| 200 |
-
198,73.0
|
| 201 |
-
199,45.0
|
| 202 |
-
200,48.0
|
| 203 |
-
201,72.0
|
| 204 |
-
202,60.0
|
| 205 |
-
203,96.0
|
| 206 |
-
204,40.0
|
| 207 |
-
205,46.0
|
| 208 |
-
206,65.0
|
| 209 |
-
207,84.0
|
| 210 |
-
208,115.0
|
| 211 |
-
209,78.0
|
| 212 |
-
210,33.0
|
| 213 |
-
211,40.0
|
| 214 |
-
212,32.0
|
| 215 |
-
213,39.0
|
| 216 |
-
214,47.0
|
| 217 |
-
215,37.0
|
| 218 |
-
216,53.0
|
| 219 |
-
217,37.0
|
| 220 |
-
218,56.0
|
| 221 |
-
219,36.0
|
| 222 |
-
220,101.0
|
| 223 |
-
221,105.0
|
| 224 |
-
222,172.0
|
| 225 |
-
223,116.0
|
| 226 |
-
224,200.0
|
| 227 |
-
225,162.0
|
| 228 |
-
226,200.0
|
| 229 |
-
227,200.0
|
| 230 |
-
228,200.0
|
| 231 |
-
229,200.0
|
| 232 |
-
230,200.0
|
| 233 |
-
231,200.0
|
| 234 |
-
232,200.0
|
| 235 |
-
233,200.0
|
| 236 |
-
234,200.0
|
| 237 |
-
235,200.0
|
| 238 |
-
236,200.0
|
| 239 |
-
237,200.0
|
| 240 |
-
238,200.0
|
| 241 |
-
239,200.0
|
| 242 |
-
240,200.0
|
| 243 |
-
241,200.0
|
| 244 |
-
242,200.0
|
| 245 |
-
243,200.0
|
| 246 |
-
244,200.0
|
| 247 |
-
245,200.0
|
| 248 |
-
246,200.0
|
| 249 |
-
247,200.0
|
| 250 |
-
248,200.0
|
| 251 |
-
249,200.0
|
| 252 |
-
250,200.0
|
| 253 |
-
251,200.0
|
| 254 |
-
252,200.0
|
| 255 |
-
253,200.0
|
| 256 |
-
254,200.0
|
| 257 |
-
255,200.0
|
| 258 |
-
256,200.0
|
| 259 |
-
257,200.0
|
| 260 |
-
258,200.0
|
| 261 |
-
259,200.0
|
| 262 |
-
260,200.0
|
| 263 |
-
261,200.0
|
| 264 |
-
262,200.0
|
| 265 |
-
263,200.0
|
| 266 |
-
264,200.0
|
| 267 |
-
265,200.0
|
| 268 |
-
266,200.0
|
| 269 |
-
267,200.0
|
| 270 |
-
268,200.0
|
| 271 |
-
269,200.0
|
| 272 |
-
270,200.0
|
| 273 |
-
271,200.0
|
| 274 |
-
272,200.0
|
| 275 |
-
273,200.0
|
| 276 |
-
274,200.0
|
| 277 |
-
275,200.0
|
| 278 |
-
276,200.0
|
| 279 |
-
277,200.0
|
| 280 |
-
278,200.0
|
| 281 |
-
279,200.0
|
| 282 |
-
280,200.0
|
| 283 |
-
281,200.0
|
| 284 |
-
282,200.0
|
| 285 |
-
283,200.0
|
| 286 |
-
284,200.0
|
| 287 |
-
285,200.0
|
| 288 |
-
286,200.0
|
| 289 |
-
287,200.0
|
| 290 |
-
288,200.0
|
| 291 |
-
289,199.0
|
| 292 |
-
290,200.0
|
| 293 |
-
291,190.0
|
| 294 |
-
292,179.0
|
| 295 |
-
293,189.0
|
| 296 |
-
294,193.0
|
| 297 |
-
295,200.0
|
| 298 |
-
296,200.0
|
| 299 |
-
297,200.0
|
| 300 |
-
298,195.0
|
| 301 |
-
299,200.0
|
| 302 |
-
300,186.0
|
| 303 |
-
301,175.0
|
| 304 |
-
302,177.0
|
| 305 |
-
303,185.0
|
| 306 |
-
304,167.0
|
| 307 |
-
305,172.0
|
| 308 |
-
306,164.0
|
| 309 |
-
307,146.0
|
| 310 |
-
308,187.0
|
| 311 |
-
309,150.0
|
| 312 |
-
310,146.0
|
| 313 |
-
311,165.0
|
| 314 |
-
312,200.0
|
| 315 |
-
313,200.0
|
| 316 |
-
314,200.0
|
| 317 |
-
315,200.0
|
| 318 |
-
316,200.0
|
| 319 |
-
317,158.0
|
| 320 |
-
318,181.0
|
| 321 |
-
319,174.0
|
| 322 |
-
320,175.0
|
| 323 |
-
321,176.0
|
| 324 |
-
322,170.0
|
| 325 |
-
323,161.0
|
| 326 |
-
324,180.0
|
| 327 |
-
325,200.0
|
| 328 |
-
326,198.0
|
| 329 |
-
327,179.0
|
| 330 |
-
328,192.0
|
| 331 |
-
329,157.0
|
| 332 |
-
330,151.0
|
| 333 |
-
331,198.0
|
| 334 |
-
332,154.0
|
| 335 |
-
333,165.0
|
| 336 |
-
334,200.0
|
| 337 |
-
335,179.0
|
| 338 |
-
336,200.0
|
| 339 |
-
337,191.0
|
| 340 |
-
338,177.0
|
| 341 |
-
339,200.0
|
| 342 |
-
340,171.0
|
| 343 |
-
341,200.0
|
| 344 |
-
342,200.0
|
| 345 |
-
343,200.0
|
| 346 |
-
344,200.0
|
| 347 |
-
345,200.0
|
| 348 |
-
346,200.0
|
| 349 |
-
347,163.0
|
| 350 |
-
348,134.0
|
| 351 |
-
349,200.0
|
| 352 |
-
350,140.0
|
| 353 |
-
351,200.0
|
| 354 |
-
352,200.0
|
| 355 |
-
353,139.0
|
| 356 |
-
354,152.0
|
| 357 |
-
355,136.0
|
| 358 |
-
356,200.0
|
| 359 |
-
357,200.0
|
| 360 |
-
358,173.0
|
| 361 |
-
359,200.0
|
| 362 |
-
360,155.0
|
| 363 |
-
361,134.0
|
| 364 |
-
362,200.0
|
| 365 |
-
363,186.0
|
| 366 |
-
364,142.0
|
| 367 |
-
365,200.0
|
| 368 |
-
366,200.0
|
| 369 |
-
367,128.0
|
| 370 |
-
368,200.0
|
| 371 |
-
369,200.0
|
| 372 |
-
370,200.0
|
| 373 |
-
371,200.0
|
| 374 |
-
372,200.0
|
| 375 |
-
373,200.0
|
| 376 |
-
374,200.0
|
| 377 |
-
375,142.0
|
| 378 |
-
376,162.0
|
| 379 |
-
377,180.0
|
| 380 |
-
378,120.0
|
| 381 |
-
379,190.0
|
| 382 |
-
380,169.0
|
| 383 |
-
381,125.0
|
| 384 |
-
382,189.0
|
| 385 |
-
383,158.0
|
| 386 |
-
384,197.0
|
| 387 |
-
385,200.0
|
| 388 |
-
386,200.0
|
| 389 |
-
387,139.0
|
| 390 |
-
388,158.0
|
| 391 |
-
389,165.0
|
| 392 |
-
390,200.0
|
| 393 |
-
391,200.0
|
| 394 |
-
392,113.0
|
| 395 |
-
393,115.0
|
| 396 |
-
394,117.0
|
| 397 |
-
395,119.0
|
| 398 |
-
396,110.0
|
| 399 |
-
397,119.0
|
| 400 |
-
398,200.0
|
| 401 |
-
399,133.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CartPole-v1/{Train_CartPole-v1_DoubleDQN_ray_20230406-162938 → Train_ray_CartPole-v1_DoubleDQN_20230516-115126}/config.yaml
RENAMED
|
@@ -1,46 +1,43 @@
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DoubleDQN
|
|
|
|
| 3 |
device: cpu
|
| 4 |
-
env_name:
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
load_checkpoint: false
|
|
|
|
| 8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
| 9 |
-
|
|
|
|
| 10 |
mode: train
|
|
|
|
| 11 |
mp_backend: ray
|
| 12 |
n_workers: 2
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
render_mode: human
|
| 16 |
-
save_fig: true
|
| 17 |
seed: 1
|
| 18 |
-
show_fig: false
|
| 19 |
-
test_eps: 10
|
| 20 |
-
train_eps: 400
|
| 21 |
-
wrapper: null
|
| 22 |
algo_cfg:
|
| 23 |
batch_size: 64
|
| 24 |
buffer_size: 100000
|
|
|
|
| 25 |
epsilon_decay: 500
|
| 26 |
epsilon_end: 0.01
|
| 27 |
epsilon_start: 0.95
|
| 28 |
-
gamma: 0.
|
| 29 |
lr: 0.0001
|
| 30 |
target_update: 4
|
| 31 |
value_layers:
|
| 32 |
- activation: relu
|
| 33 |
layer_dim:
|
| 34 |
-
- n_states
|
| 35 |
- 256
|
| 36 |
layer_type: linear
|
| 37 |
- activation: relu
|
| 38 |
layer_dim:
|
| 39 |
- 256
|
| 40 |
-
- 256
|
| 41 |
-
layer_type: linear
|
| 42 |
-
- activation: none
|
| 43 |
-
layer_dim:
|
| 44 |
-
- 256
|
| 45 |
-
- n_actions
|
| 46 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DoubleDQN
|
| 3 |
+
collect_traj: false
|
| 4 |
device: cpu
|
| 5 |
+
env_name: gym
|
|
|
|
|
|
|
| 6 |
load_checkpoint: false
|
| 7 |
+
load_model_step: best
|
| 8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
| 9 |
+
max_episode: 100
|
| 10 |
+
max_step: 200
|
| 11 |
mode: train
|
| 12 |
+
model_save_fre: 500
|
| 13 |
mp_backend: ray
|
| 14 |
n_workers: 2
|
| 15 |
+
online_eval: true
|
| 16 |
+
online_eval_episode: 10
|
|
|
|
|
|
|
| 17 |
seed: 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
algo_cfg:
|
| 19 |
batch_size: 64
|
| 20 |
buffer_size: 100000
|
| 21 |
+
buffer_type: REPLAY_QUE
|
| 22 |
epsilon_decay: 500
|
| 23 |
epsilon_end: 0.01
|
| 24 |
epsilon_start: 0.95
|
| 25 |
+
gamma: 0.99
|
| 26 |
lr: 0.0001
|
| 27 |
target_update: 4
|
| 28 |
value_layers:
|
| 29 |
- activation: relu
|
| 30 |
layer_dim:
|
|
|
|
| 31 |
- 256
|
| 32 |
layer_type: linear
|
| 33 |
- activation: relu
|
| 34 |
layer_dim:
|
| 35 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
layer_type: linear
|
| 37 |
+
env_cfg:
|
| 38 |
+
id: CartPole-v1
|
| 39 |
+
ignore_params:
|
| 40 |
+
- wrapper
|
| 41 |
+
- ignore_params
|
| 42 |
+
render_mode: null
|
| 43 |
+
wrapper: null
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/logs/log.txt
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - General Configs:
|
| 2 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
| 3 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
|
| 4 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - env_name gym <class 'str'>
|
| 5 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
|
| 6 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - mode train <class 'str'>
|
| 7 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - device cpu <class 'str'>
|
| 8 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - seed 1 <class 'int'>
|
| 9 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - max_episode 100 <class 'int'>
|
| 10 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
| 11 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
| 12 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - mp_backend ray <class 'str'>
|
| 13 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
| 14 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
| 15 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
| 16 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
| 17 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
| 18 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
| 19 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
| 20 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
| 21 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Algo Configs:
|
| 22 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
| 23 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
|
| 24 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
| 28 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
| 31 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
| 32 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
| 33 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
| 34 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
| 35 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Env Configs:
|
| 36 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
| 37 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - Name Value Type
|
| 38 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
| 39 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - render_mode None <class 'str'>
|
| 40 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - wrapper None <class 'str'>
|
| 41 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
| 42 |
+
2023-05-16 11:51:26 - SimpleLog - INFO: - ================================================================================
|
| 43 |
+
2023-05-16 11:51:32 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
| 44 |
+
2023-05-16 11:51:37 - RayLog - INFO: - Worker 0 finished episode 0 with reward 16.0 in 16 steps
|
| 45 |
+
2023-05-16 11:51:37 - RayLog - INFO: - Worker 1 finished episode 0 with reward 20.0 in 20 steps
|
| 46 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 2 with reward 11.0 in 11 steps
|
| 47 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 2 with reward 15.0 in 15 steps
|
| 48 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 4 with reward 13.0 in 13 steps
|
| 49 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 0 finished episode 3 with reward 22.0 in 22 steps
|
| 50 |
+
2023-05-16 11:51:39 - RayLog - INFO: - Worker 1 finished episode 5 with reward 9.0 in 9 steps
|
| 51 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 6 with reward 14.0 in 14 steps
|
| 52 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 7 with reward 12.0 in 12 steps
|
| 53 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 9 with reward 13.0 in 13 steps
|
| 54 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 10 with reward 14.0 in 14 steps
|
| 55 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 8 with reward 35.0 in 35 steps
|
| 56 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 11 with reward 13.0 in 13 steps
|
| 57 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 0 finished episode 12 with reward 15.0 in 15 steps
|
| 58 |
+
2023-05-16 11:51:40 - RayLog - INFO: - Worker 1 finished episode 13 with reward 12.0 in 12 steps
|
| 59 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 14 with reward 10.0 in 10 steps
|
| 60 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 15 with reward 11.0 in 11 steps
|
| 61 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 16 with reward 11.0 in 11 steps
|
| 62 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 18 with reward 12.0 in 12 steps
|
| 63 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 17 with reward 21.0 in 21 steps
|
| 64 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 19 with reward 15.0 in 15 steps
|
| 65 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 1 finished episode 20 with reward 18.0 in 18 steps
|
| 66 |
+
2023-05-16 11:51:41 - RayLog - INFO: - Worker 0 finished episode 21 with reward 13.0 in 13 steps
|
| 67 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 22 with reward 10.0 in 10 steps
|
| 68 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 23 with reward 16.0 in 16 steps
|
| 69 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 24 with reward 19.0 in 19 steps
|
| 70 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 25 with reward 10.0 in 10 steps
|
| 71 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 27 with reward 10.0 in 10 steps
|
| 72 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 28 with reward 10.0 in 10 steps
|
| 73 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 1 finished episode 26 with reward 26.0 in 26 steps
|
| 74 |
+
2023-05-16 11:51:42 - RayLog - INFO: - Worker 0 finished episode 29 with reward 12.0 in 12 steps
|
| 75 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 30 with reward 15.0 in 15 steps
|
| 76 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 32 with reward 10.0 in 10 steps
|
| 77 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 31 with reward 18.0 in 18 steps
|
| 78 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 33 with reward 13.0 in 13 steps
|
| 79 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 34 with reward 10.0 in 10 steps
|
| 80 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 0 finished episode 36 with reward 9.0 in 9 steps
|
| 81 |
+
2023-05-16 11:51:43 - RayLog - INFO: - Worker 1 finished episode 35 with reward 12.0 in 12 steps
|
| 82 |
+
2023-05-16 11:51:45 - RayLog - INFO: - update_step: 500, online_eval_reward: 9.000
|
| 83 |
+
2023-05-16 11:51:45 - RayLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
|
| 84 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 38 with reward 10.0 in 10 steps
|
| 85 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 37 with reward 13.0 in 13 steps
|
| 86 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 40 with reward 9.0 in 9 steps
|
| 87 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 39 with reward 14.0 in 14 steps
|
| 88 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 41 with reward 14.0 in 14 steps
|
| 89 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 1 finished episode 42 with reward 20.0 in 20 steps
|
| 90 |
+
2023-05-16 11:51:45 - RayLog - INFO: - Worker 0 finished episode 43 with reward 15.0 in 15 steps
|
| 91 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 44 with reward 19.0 in 19 steps
|
| 92 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 45 with reward 17.0 in 17 steps
|
| 93 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 47 with reward 12.0 in 12 steps
|
| 94 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 46 with reward 15.0 in 15 steps
|
| 95 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 48 with reward 14.0 in 14 steps
|
| 96 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 49 with reward 16.0 in 16 steps
|
| 97 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 50 with reward 9.0 in 9 steps
|
| 98 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 1 finished episode 51 with reward 13.0 in 13 steps
|
| 99 |
+
2023-05-16 11:51:46 - RayLog - INFO: - Worker 0 finished episode 52 with reward 10.0 in 10 steps
|
| 100 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 53 with reward 13.0 in 13 steps
|
| 101 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 54 with reward 13.0 in 13 steps
|
| 102 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 56 with reward 14.0 in 14 steps
|
| 103 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 55 with reward 20.0 in 20 steps
|
| 104 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 57 with reward 14.0 in 14 steps
|
| 105 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 1 finished episode 58 with reward 16.0 in 16 steps
|
| 106 |
+
2023-05-16 11:51:47 - RayLog - INFO: - Worker 0 finished episode 59 with reward 11.0 in 11 steps
|
| 107 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 60 with reward 14.0 in 14 steps
|
| 108 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 61 with reward 9.0 in 9 steps
|
| 109 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 63 with reward 19.0 in 19 steps
|
| 110 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 1 finished episode 62 with reward 23.0 in 23 steps
|
| 111 |
+
2023-05-16 11:51:48 - RayLog - INFO: - Worker 0 finished episode 64 with reward 18.0 in 18 steps
|
| 112 |
+
2023-05-16 11:51:49 - RayLog - INFO: - Worker 1 finished episode 65 with reward 26.0 in 26 steps
|
| 113 |
+
2023-05-16 11:51:49 - RayLog - INFO: - update_step: 1000, online_eval_reward: 69.000
|
| 114 |
+
2023-05-16 11:51:49 - RayLog - INFO: - current update step obtain a better online_eval_reward: 69.000, save the best model!
|
| 115 |
+
2023-05-16 11:51:50 - RayLog - INFO: - Worker 1 finished episode 67 with reward 93.0 in 93 steps
|
| 116 |
+
2023-05-16 11:51:51 - RayLog - INFO: - Worker 0 finished episode 66 with reward 127.0 in 127 steps
|
| 117 |
+
2023-05-16 11:51:51 - RayLog - INFO: - Worker 1 finished episode 68 with reward 40.0 in 40 steps
|
| 118 |
+
2023-05-16 11:51:52 - RayLog - INFO: - Worker 0 finished episode 69 with reward 54.0 in 54 steps
|
| 119 |
+
2023-05-16 11:51:52 - RayLog - INFO: - Worker 1 finished episode 70 with reward 48.0 in 48 steps
|
| 120 |
+
2023-05-16 11:51:53 - RayLog - INFO: - Worker 0 finished episode 71 with reward 62.0 in 62 steps
|
| 121 |
+
2023-05-16 11:51:53 - RayLog - INFO: - Worker 1 finished episode 72 with reward 60.0 in 60 steps
|
| 122 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 74 with reward 35.0 in 35 steps
|
| 123 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 73 with reward 47.0 in 47 steps
|
| 124 |
+
2023-05-16 11:51:54 - RayLog - INFO: - update_step: 1500, online_eval_reward: 63.000
|
| 125 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 1 finished episode 75 with reward 38.0 in 38 steps
|
| 126 |
+
2023-05-16 11:51:54 - RayLog - INFO: - Worker 0 finished episode 76 with reward 46.0 in 46 steps
|
| 127 |
+
2023-05-16 11:51:55 - RayLog - INFO: - Worker 1 finished episode 77 with reward 40.0 in 40 steps
|
| 128 |
+
2023-05-16 11:51:55 - RayLog - INFO: - Worker 0 finished episode 78 with reward 57.0 in 57 steps
|
| 129 |
+
2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 79 with reward 38.0 in 38 steps
|
| 130 |
+
2023-05-16 11:51:56 - RayLog - INFO: - Worker 1 finished episode 81 with reward 33.0 in 33 steps
|
| 131 |
+
2023-05-16 11:51:56 - RayLog - INFO: - Worker 0 finished episode 80 with reward 51.0 in 51 steps
|
| 132 |
+
2023-05-16 11:51:57 - RayLog - INFO: - Worker 1 finished episode 82 with reward 44.0 in 44 steps
|
| 133 |
+
2023-05-16 11:51:58 - RayLog - INFO: - Worker 0 finished episode 83 with reward 70.0 in 70 steps
|
| 134 |
+
2023-05-16 11:51:58 - RayLog - INFO: - Worker 1 finished episode 84 with reward 55.0 in 55 steps
|
| 135 |
+
2023-05-16 11:51:58 - RayLog - INFO: - update_step: 2000, online_eval_reward: 82.000
|
| 136 |
+
2023-05-16 11:51:58 - RayLog - INFO: - current update step obtain a better online_eval_reward: 82.000, save the best model!
|
| 137 |
+
2023-05-16 11:51:59 - RayLog - INFO: - Worker 0 finished episode 85 with reward 66.0 in 66 steps
|
| 138 |
+
2023-05-16 11:51:59 - RayLog - INFO: - Worker 1 finished episode 86 with reward 56.0 in 56 steps
|
| 139 |
+
2023-05-16 11:52:00 - RayLog - INFO: - Worker 1 finished episode 88 with reward 45.0 in 45 steps
|
| 140 |
+
2023-05-16 11:52:00 - RayLog - INFO: - Worker 0 finished episode 87 with reward 68.0 in 68 steps
|
| 141 |
+
2023-05-16 11:52:01 - RayLog - INFO: - Worker 1 finished episode 89 with reward 50.0 in 50 steps
|
| 142 |
+
2023-05-16 11:52:02 - RayLog - INFO: - Worker 0 finished episode 90 with reward 79.0 in 79 steps
|
| 143 |
+
2023-05-16 11:52:02 - RayLog - INFO: - Worker 1 finished episode 91 with reward 57.0 in 57 steps
|
| 144 |
+
2023-05-16 11:52:04 - RayLog - INFO: - update_step: 2500, online_eval_reward: 77.000
|
| 145 |
+
2023-05-16 11:52:04 - RayLog - INFO: - Worker 1 finished episode 93 with reward 66.0 in 66 steps
|
| 146 |
+
2023-05-16 11:52:04 - RayLog - INFO: - Worker 0 finished episode 92 with reward 84.0 in 84 steps
|
| 147 |
+
2023-05-16 11:52:05 - RayLog - INFO: - Worker 1 finished episode 94 with reward 56.0 in 56 steps
|
| 148 |
+
2023-05-16 11:52:07 - RayLog - INFO: - Worker 0 finished episode 95 with reward 134.0 in 134 steps
|
| 149 |
+
2023-05-16 11:52:08 - RayLog - INFO: - Worker 1 finished episode 96 with reward 115.0 in 115 steps
|
| 150 |
+
2023-05-16 11:52:10 - RayLog - INFO: - update_step: 3000, online_eval_reward: 200.000
|
| 151 |
+
2023-05-16 11:52:10 - RayLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
| 152 |
+
2023-05-16 11:52:12 - RayLog - INFO: - Worker 0 finished episode 97 with reward 200.0 in 200 steps
|
| 153 |
+
2023-05-16 11:52:13 - RayLog - INFO: - Worker 1 finished episode 98 with reward 200.0 in 200 steps
|
| 154 |
+
2023-05-16 11:52:15 - RayLog - INFO: - update_step: 3500, online_eval_reward: 200.000
|
| 155 |
+
2023-05-16 11:52:16 - RayLog - INFO: - Worker 0 finished episode 99 with reward 200.0 in 200 steps
|
| 156 |
+
2023-05-16 11:52:17 - RayLog - INFO: - Worker 1 finished episode 100 with reward 200.0 in 200 steps
|
| 157 |
+
2023-05-16 11:52:19 - SimpleLog - INFO: - Finish training! total time consumed: 53.70s
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1000
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/1500
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2000
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/2500
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3000
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/3500
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/500
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/models/best
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/{Test_CartPole-v1_DoubleDQN_20221122-125611/models/checkpoint.pth → Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209086.JMac.local.52110.0}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be6be5f2b8ae4530630b850e07d2bbd6010678cb75c3d3050606cdfa0e1f6acd
|
| 3 |
+
size 40
|
CartPole-v1/{Train_CartPole-v1_DoubleDQN_20221122-125516/models/checkpoint.pth → Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/interact/events.out.tfevents.1684209096.JMac.local.52161.0}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4aa028324617b734607430bc18aa93daae8536fcfea762ed7cdd92c65a472dd0
|
| 3 |
+
size 10028
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209086.JMac.local.52110.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37344be608143375d6347aff7b1395cef1e5b52479a11b5faf17e1f631046d1d
|
| 3 |
+
size 40
|
CartPole-v1/Train_ray_CartPole-v1_DoubleDQN_20230516-115126/tb_logs/model/events.out.tfevents.1684209096.JMac.local.52161.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c34b8e5d11e95894f9f70682c415e3475f765542af5aa31b961c6d0d8e11923
|
| 3 |
+
size 177587
|
CartPole-v1/{Train_CartPole-v1_DoubleDQN_mp_20230406-160028 → Train_single_CartPole-v1_DoubleDQN_20230516-114540}/config.yaml
RENAMED
|
@@ -1,46 +1,43 @@
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DoubleDQN
|
|
|
|
| 3 |
device: cpu
|
| 4 |
-
env_name:
|
| 5 |
-
eval_eps: 10
|
| 6 |
-
eval_per_episode: 5
|
| 7 |
load_checkpoint: false
|
|
|
|
| 8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
| 9 |
-
|
|
|
|
| 10 |
mode: train
|
| 11 |
-
|
|
|
|
| 12 |
n_workers: 2
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
render_mode: human
|
| 16 |
-
save_fig: true
|
| 17 |
seed: 1
|
| 18 |
-
show_fig: false
|
| 19 |
-
test_eps: 10
|
| 20 |
-
train_eps: 400
|
| 21 |
-
wrapper: null
|
| 22 |
algo_cfg:
|
| 23 |
batch_size: 64
|
| 24 |
buffer_size: 100000
|
|
|
|
| 25 |
epsilon_decay: 500
|
| 26 |
epsilon_end: 0.01
|
| 27 |
epsilon_start: 0.95
|
| 28 |
-
gamma: 0.
|
| 29 |
lr: 0.0001
|
| 30 |
target_update: 4
|
| 31 |
value_layers:
|
| 32 |
- activation: relu
|
| 33 |
layer_dim:
|
| 34 |
-
- n_states
|
| 35 |
- 256
|
| 36 |
layer_type: linear
|
| 37 |
- activation: relu
|
| 38 |
layer_dim:
|
| 39 |
- 256
|
| 40 |
-
- 256
|
| 41 |
-
layer_type: linear
|
| 42 |
-
- activation: none
|
| 43 |
-
layer_dim:
|
| 44 |
-
- 256
|
| 45 |
-
- n_actions
|
| 46 |
layer_type: linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
general_cfg:
|
| 2 |
algo_name: DoubleDQN
|
| 3 |
+
collect_traj: false
|
| 4 |
device: cpu
|
| 5 |
+
env_name: gym
|
|
|
|
|
|
|
| 6 |
load_checkpoint: false
|
| 7 |
+
load_model_step: best
|
| 8 |
load_path: Train_CartPole-v1_DQN_20221026-054757
|
| 9 |
+
max_episode: 100
|
| 10 |
+
max_step: 200
|
| 11 |
mode: train
|
| 12 |
+
model_save_fre: 500
|
| 13 |
+
mp_backend: single
|
| 14 |
n_workers: 2
|
| 15 |
+
online_eval: true
|
| 16 |
+
online_eval_episode: 10
|
|
|
|
|
|
|
| 17 |
seed: 1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
algo_cfg:
|
| 19 |
batch_size: 64
|
| 20 |
buffer_size: 100000
|
| 21 |
+
buffer_type: REPLAY_QUE
|
| 22 |
epsilon_decay: 500
|
| 23 |
epsilon_end: 0.01
|
| 24 |
epsilon_start: 0.95
|
| 25 |
+
gamma: 0.99
|
| 26 |
lr: 0.0001
|
| 27 |
target_update: 4
|
| 28 |
value_layers:
|
| 29 |
- activation: relu
|
| 30 |
layer_dim:
|
|
|
|
| 31 |
- 256
|
| 32 |
layer_type: linear
|
| 33 |
- activation: relu
|
| 34 |
layer_dim:
|
| 35 |
- 256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
layer_type: linear
|
| 37 |
+
env_cfg:
|
| 38 |
+
id: CartPole-v1
|
| 39 |
+
ignore_params:
|
| 40 |
+
- wrapper
|
| 41 |
+
- ignore_params
|
| 42 |
+
render_mode: null
|
| 43 |
+
wrapper: null
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/logs/log.txt
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - General Configs:
|
| 2 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
| 3 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
|
| 4 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - env_name gym <class 'str'>
|
| 5 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - algo_name DoubleDQN <class 'str'>
|
| 6 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - mode train <class 'str'>
|
| 7 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - device cpu <class 'str'>
|
| 8 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - seed 1 <class 'int'>
|
| 9 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - max_episode 100 <class 'int'>
|
| 10 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
| 11 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
| 12 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
| 13 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
| 14 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
| 15 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
| 16 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - model_save_fre 500 <class 'int'>
|
| 17 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
| 18 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 <class 'str'>
|
| 19 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
| 20 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
| 21 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Algo Configs:
|
| 22 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
| 23 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
|
| 24 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
| 25 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
| 26 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
| 27 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - gamma 0.99 <class 'float'>
|
| 28 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
| 29 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_size 100000 <class 'int'>
|
| 30 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
| 31 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - target_update 4 <class 'int'>
|
| 32 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256], 'activation': 'relu'}] <class 'str'>
|
| 33 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
| 34 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
| 35 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Env Configs:
|
| 36 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
| 37 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Name Value Type
|
| 38 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - id CartPole-v1 <class 'str'>
|
| 39 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - render_mode None <class 'str'>
|
| 40 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - wrapper None <class 'str'>
|
| 41 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
| 42 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - ================================================================================
|
| 43 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - obs_space: Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32), n_actions: Discrete(2)
|
| 44 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - Start training!
|
| 45 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 0, ep_reward: 25.0, ep_step: 25
|
| 46 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 1, ep_reward: 17.0, ep_step: 17
|
| 47 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 2, ep_reward: 19.0, ep_step: 19
|
| 48 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 3, ep_reward: 14.0, ep_step: 14
|
| 49 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 4, ep_reward: 14.0, ep_step: 14
|
| 50 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 5, ep_reward: 21.0, ep_step: 21
|
| 51 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 6, ep_reward: 22.0, ep_step: 22
|
| 52 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 7, ep_reward: 13.0, ep_step: 13
|
| 53 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 8, ep_reward: 27.0, ep_step: 27
|
| 54 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 9, ep_reward: 11.0, ep_step: 11
|
| 55 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 10, ep_reward: 14.0, ep_step: 14
|
| 56 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 11, ep_reward: 24.0, ep_step: 24
|
| 57 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 12, ep_reward: 23.0, ep_step: 23
|
| 58 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 13, ep_reward: 12.0, ep_step: 12
|
| 59 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 14, ep_reward: 12.0, ep_step: 12
|
| 60 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 15, ep_reward: 13.0, ep_step: 13
|
| 61 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 16, ep_reward: 11.0, ep_step: 11
|
| 62 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 17, ep_reward: 15.0, ep_step: 15
|
| 63 |
+
2023-05-16 11:45:40 - SimpleLog - INFO: - episode: 18, ep_reward: 12.0, ep_step: 12
|
| 64 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 19, ep_reward: 27.0, ep_step: 27
|
| 65 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 20, ep_reward: 14.0, ep_step: 14
|
| 66 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 21, ep_reward: 19.0, ep_step: 19
|
| 67 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 22, ep_reward: 10.0, ep_step: 10
|
| 68 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 23, ep_reward: 10.0, ep_step: 10
|
| 69 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 24, ep_reward: 15.0, ep_step: 15
|
| 70 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 25, ep_reward: 15.0, ep_step: 15
|
| 71 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 26, ep_reward: 14.0, ep_step: 14
|
| 72 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 27, ep_reward: 11.0, ep_step: 11
|
| 73 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 28, ep_reward: 10.0, ep_step: 10
|
| 74 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 29, ep_reward: 12.0, ep_step: 12
|
| 75 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 30, ep_reward: 10.0, ep_step: 10
|
| 76 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 31, ep_reward: 9.0, ep_step: 9
|
| 77 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 32, ep_reward: 11.0, ep_step: 11
|
| 78 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 33, ep_reward: 9.0, ep_step: 9
|
| 79 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 34, ep_reward: 13.0, ep_step: 13
|
| 80 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 35, ep_reward: 10.0, ep_step: 10
|
| 81 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 36, ep_reward: 9.0, ep_step: 9
|
| 82 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 37, ep_reward: 10.0, ep_step: 10
|
| 83 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - update_step: 500, online_eval_reward: 9.000
|
| 84 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 9.000, save the best model!
|
| 85 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 38, ep_reward: 14.0, ep_step: 14
|
| 86 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 39, ep_reward: 11.0, ep_step: 11
|
| 87 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 40, ep_reward: 9.0, ep_step: 9
|
| 88 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 41, ep_reward: 9.0, ep_step: 9
|
| 89 |
+
2023-05-16 11:45:41 - SimpleLog - INFO: - episode: 42, ep_reward: 9.0, ep_step: 9
|
| 90 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 43, ep_reward: 11.0, ep_step: 11
|
| 91 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 44, ep_reward: 21.0, ep_step: 21
|
| 92 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 45, ep_reward: 13.0, ep_step: 13
|
| 93 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 46, ep_reward: 12.0, ep_step: 12
|
| 94 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 47, ep_reward: 30.0, ep_step: 30
|
| 95 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 48, ep_reward: 20.0, ep_step: 20
|
| 96 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 49, ep_reward: 28.0, ep_step: 28
|
| 97 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 50, ep_reward: 22.0, ep_step: 22
|
| 98 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 51, ep_reward: 20.0, ep_step: 20
|
| 99 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 52, ep_reward: 26.0, ep_step: 26
|
| 100 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 53, ep_reward: 24.0, ep_step: 24
|
| 101 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 54, ep_reward: 30.0, ep_step: 30
|
| 102 |
+
2023-05-16 11:45:42 - SimpleLog - INFO: - episode: 55, ep_reward: 26.0, ep_step: 26
|
| 103 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 56, ep_reward: 41.0, ep_step: 41
|
| 104 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 57, ep_reward: 58.0, ep_step: 58
|
| 105 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 58, ep_reward: 59.0, ep_step: 59
|
| 106 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: 63.000
|
| 107 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 63.000, save the best model!
|
| 108 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 59, ep_reward: 58.0, ep_step: 58
|
| 109 |
+
2023-05-16 11:45:43 - SimpleLog - INFO: - episode: 60, ep_reward: 47.0, ep_step: 47
|
| 110 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 61, ep_reward: 84.0, ep_step: 84
|
| 111 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 62, ep_reward: 44.0, ep_step: 44
|
| 112 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 63, ep_reward: 59.0, ep_step: 59
|
| 113 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 64, ep_reward: 39.0, ep_step: 39
|
| 114 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 65, ep_reward: 53.0, ep_step: 53
|
| 115 |
+
2023-05-16 11:45:44 - SimpleLog - INFO: - episode: 66, ep_reward: 70.0, ep_step: 70
|
| 116 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 67, ep_reward: 58.0, ep_step: 58
|
| 117 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: 65.000
|
| 118 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 65.000, save the best model!
|
| 119 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 68, ep_reward: 101.0, ep_step: 101
|
| 120 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 69, ep_reward: 52.0, ep_step: 52
|
| 121 |
+
2023-05-16 11:45:45 - SimpleLog - INFO: - episode: 70, ep_reward: 58.0, ep_step: 58
|
| 122 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 71, ep_reward: 61.0, ep_step: 61
|
| 123 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 72, ep_reward: 91.0, ep_step: 91
|
| 124 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 73, ep_reward: 54.0, ep_step: 54
|
| 125 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: 65.000
|
| 126 |
+
2023-05-16 11:45:46 - SimpleLog - INFO: - episode: 74, ep_reward: 98.0, ep_step: 98
|
| 127 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 75, ep_reward: 67.0, ep_step: 67
|
| 128 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 76, ep_reward: 70.0, ep_step: 70
|
| 129 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 77, ep_reward: 74.0, ep_step: 74
|
| 130 |
+
2023-05-16 11:45:47 - SimpleLog - INFO: - episode: 78, ep_reward: 72.0, ep_step: 72
|
| 131 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 79, ep_reward: 81.0, ep_step: 81
|
| 132 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 80, ep_reward: 82.0, ep_step: 82
|
| 133 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: 94.000
|
| 134 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 94.000, save the best model!
|
| 135 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 81, ep_reward: 97.0, ep_step: 97
|
| 136 |
+
2023-05-16 11:45:48 - SimpleLog - INFO: - episode: 82, ep_reward: 89.0, ep_step: 89
|
| 137 |
+
2023-05-16 11:45:49 - SimpleLog - INFO: - episode: 83, ep_reward: 200.0, ep_step: 200
|
| 138 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 84, ep_reward: 142.0, ep_step: 142
|
| 139 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: 153.000
|
| 140 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 153.000, save the best model!
|
| 141 |
+
2023-05-16 11:45:50 - SimpleLog - INFO: - episode: 85, ep_reward: 114.0, ep_step: 114
|
| 142 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 86, ep_reward: 162.0, ep_step: 162
|
| 143 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - episode: 87, ep_reward: 200.0, ep_step: 200
|
| 144 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: 200.000
|
| 145 |
+
2023-05-16 11:45:51 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: 200.000, save the best model!
|
| 146 |
+
2023-05-16 11:45:52 - SimpleLog - INFO: - episode: 88, ep_reward: 200.0, ep_step: 200
|
| 147 |
+
2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 89, ep_reward: 200.0, ep_step: 200
|
| 148 |
+
2023-05-16 11:45:53 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: 200.000
|
| 149 |
+
2023-05-16 11:45:53 - SimpleLog - INFO: - episode: 90, ep_reward: 200.0, ep_step: 200
|
| 150 |
+
2023-05-16 11:45:54 - SimpleLog - INFO: - episode: 91, ep_reward: 200.0, ep_step: 200
|
| 151 |
+
2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 92, ep_reward: 200.0, ep_step: 200
|
| 152 |
+
2023-05-16 11:45:55 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: 200.000
|
| 153 |
+
2023-05-16 11:45:55 - SimpleLog - INFO: - episode: 93, ep_reward: 200.0, ep_step: 200
|
| 154 |
+
2023-05-16 11:45:56 - SimpleLog - INFO: - episode: 94, ep_reward: 200.0, ep_step: 200
|
| 155 |
+
2023-05-16 11:45:57 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: 200.000
|
| 156 |
+
2023-05-16 11:45:57 - SimpleLog - INFO: - episode: 95, ep_reward: 200.0, ep_step: 200
|
| 157 |
+
2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 96, ep_reward: 200.0, ep_step: 200
|
| 158 |
+
2023-05-16 11:45:58 - SimpleLog - INFO: - episode: 97, ep_reward: 200.0, ep_step: 200
|
| 159 |
+
2023-05-16 11:45:58 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: 200.000
|
| 160 |
+
2023-05-16 11:45:59 - SimpleLog - INFO: - episode: 98, ep_reward: 200.0, ep_step: 200
|
| 161 |
+
2023-05-16 11:46:00 - SimpleLog - INFO: - episode: 99, ep_reward: 200.0, ep_step: 200
|
| 162 |
+
2023-05-16 11:46:00 - SimpleLog - INFO: - Finish training! total time consumed: 20.03s
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1000
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/1500
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2000
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/2500
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3000
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/3500
ADDED
|
Binary file (545 kB). View file
|
|
|
CartPole-v1/Train_single_CartPole-v1_DoubleDQN_20230516-114540/models/4000
ADDED
|
Binary file (545 kB). View file
|
|
|