Train_Pendulum-v1_SAC
Browse files- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/config.yaml +81 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/logs/log.txt +487 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/100 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10100 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10200 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10300 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10800 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10900 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1100 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11100 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11900 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1200 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12500 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12700 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12800 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1300 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13800 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13900 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1400 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14900 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1500 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15900 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1600 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16700 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1700 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17000 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17900 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1800 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18400 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18700 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18800 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18900 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/interact/events.out.tfevents.1687077551.ML3090.330549.0 +3 -0
- ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/policy/events.out.tfevents.1687077551.ML3090.330549.1 +3 -0
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/config.yaml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
general_cfg:
|
| 2 |
+
algo_name: SAC
|
| 3 |
+
collect_traj: false
|
| 4 |
+
device: cpu
|
| 5 |
+
env_name: gym
|
| 6 |
+
interact_summary_fre: 1
|
| 7 |
+
load_checkpoint: false
|
| 8 |
+
load_model_step: best
|
| 9 |
+
load_path: Train_CartPole-v1_SAC_20230618-162702
|
| 10 |
+
max_episode: 200
|
| 11 |
+
max_step: 200
|
| 12 |
+
mode: train
|
| 13 |
+
model_save_fre: 100
|
| 14 |
+
model_summary_fre: 1
|
| 15 |
+
mp_backend: single
|
| 16 |
+
n_learners: 1
|
| 17 |
+
n_workers: 2
|
| 18 |
+
online_eval: true
|
| 19 |
+
online_eval_episode: 10
|
| 20 |
+
seed: 1
|
| 21 |
+
share_buffer: true
|
| 22 |
+
algo_cfg:
|
| 23 |
+
action_type: continuous
|
| 24 |
+
actor_layers:
|
| 25 |
+
- activation: relu
|
| 26 |
+
layer_size:
|
| 27 |
+
- 256
|
| 28 |
+
layer_type: linear
|
| 29 |
+
- activation: relu
|
| 30 |
+
layer_size:
|
| 31 |
+
- 256
|
| 32 |
+
layer_type: linear
|
| 33 |
+
actor_lr: 0.0003
|
| 34 |
+
alpha: 0.1
|
| 35 |
+
alpha_lr: 0.0001
|
| 36 |
+
automatic_entropy_tuning: false
|
| 37 |
+
batch_size: 64
|
| 38 |
+
buffer_size: 1000000
|
| 39 |
+
buffer_type: REPLAY_QUE
|
| 40 |
+
critic1_lr: 0.001
|
| 41 |
+
critic2_lr: 0.001
|
| 42 |
+
critic_layers:
|
| 43 |
+
- activation: relu
|
| 44 |
+
layer_size:
|
| 45 |
+
- 256
|
| 46 |
+
layer_type: linear
|
| 47 |
+
- activation: relu
|
| 48 |
+
layer_size:
|
| 49 |
+
- 256
|
| 50 |
+
layer_type: linear
|
| 51 |
+
epsilon_decay: 500
|
| 52 |
+
epsilon_end: 0.01
|
| 53 |
+
epsilon_start: 0.95
|
| 54 |
+
gamma: 0.95
|
| 55 |
+
hidden_dim: 64
|
| 56 |
+
independ_actor: true
|
| 57 |
+
lr: 0.0001
|
| 58 |
+
min_policy: 0
|
| 59 |
+
n_epochs: 1
|
| 60 |
+
n_steps_per_learn: 1
|
| 61 |
+
share_optimizer: false
|
| 62 |
+
start_steps: 10000
|
| 63 |
+
target_update: 1
|
| 64 |
+
target_update_fre: 1
|
| 65 |
+
tau: 0.005
|
| 66 |
+
value_layers:
|
| 67 |
+
- activation: relu
|
| 68 |
+
layer_size:
|
| 69 |
+
- 256
|
| 70 |
+
layer_type: linear
|
| 71 |
+
- activation: relu
|
| 72 |
+
layer_size:
|
| 73 |
+
- 256
|
| 74 |
+
layer_type: linear
|
| 75 |
+
env_cfg:
|
| 76 |
+
id: Pendulum-v1
|
| 77 |
+
ignore_params:
|
| 78 |
+
- wrapper
|
| 79 |
+
- ignore_params
|
| 80 |
+
render_mode: null
|
| 81 |
+
wrapper: null
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/logs/log.txt
ADDED
|
@@ -0,0 +1,487 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - General Configs:
|
| 2 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
|
| 3 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - Name Value Type
|
| 4 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - env_name gym <class 'str'>
|
| 5 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - algo_name SAC <class 'str'>
|
| 6 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - mode train <class 'str'>
|
| 7 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - device cpu <class 'str'>
|
| 8 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - seed 1 <class 'int'>
|
| 9 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - max_episode 200 <class 'int'>
|
| 10 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - max_step 200 <class 'int'>
|
| 11 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - collect_traj 0 <class 'bool'>
|
| 12 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - mp_backend single <class 'str'>
|
| 13 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - n_workers 2 <class 'int'>
|
| 14 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - n_learners 1 <class 'int'>
|
| 15 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - share_buffer 1 <class 'bool'>
|
| 16 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - online_eval 1 <class 'bool'>
|
| 17 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - online_eval_episode 10 <class 'int'>
|
| 18 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - model_save_fre 100 <class 'int'>
|
| 19 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - load_checkpoint 0 <class 'bool'>
|
| 20 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - load_path Train_CartPole-v1_SAC_20230618-162702 <class 'str'>
|
| 21 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - load_model_step best <class 'str'>
|
| 22 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - interact_summary_fre 1 <class 'int'>
|
| 23 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - model_summary_fre 1 <class 'int'>
|
| 24 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
|
| 25 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - Algo Configs:
|
| 26 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
|
| 27 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - Name Value Type
|
| 28 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - critic1_lr 0.001 <class 'float'>
|
| 29 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - critic2_lr 0.001 <class 'float'>
|
| 30 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - actor_lr 0.0003 <class 'float'>
|
| 31 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - gamma 0.95 <class 'float'>
|
| 32 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - tau 0.005 <class 'float'>
|
| 33 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - alpha 0.1 <class 'float'>
|
| 34 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - automatic_entropy_tuning 0 <class 'bool'>
|
| 35 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - batch_size 64 <class 'int'>
|
| 36 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - hidden_dim 64 <class 'int'>
|
| 37 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - n_epochs 1 <class 'int'>
|
| 38 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - start_steps 10000 <class 'int'>
|
| 39 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - target_update_fre 1 <class 'int'>
|
| 40 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - buffer_size 1000000 <class 'int'>
|
| 41 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - min_policy 0 <class 'int'>
|
| 42 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - alpha_lr 0.0001 <class 'float'>
|
| 43 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - n_steps_per_learn 1 <class 'int'>
|
| 44 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - action_type continuous <class 'str'>
|
| 45 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - independ_actor 1 <class 'bool'>
|
| 46 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - share_optimizer 0 <class 'bool'>
|
| 47 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - actor_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
|
| 48 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - critic_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
|
| 49 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - value_layers [{'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_size': [256], 'activation': 'relu'}] <class 'str'>
|
| 50 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - buffer_type REPLAY_QUE <class 'str'>
|
| 51 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - epsilon_decay 500 <class 'int'>
|
| 52 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - epsilon_end 0.01 <class 'float'>
|
| 53 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - epsilon_start 0.95 <class 'float'>
|
| 54 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - lr 0.0001 <class 'float'>
|
| 55 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - target_update 1 <class 'int'>
|
| 56 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
|
| 57 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - Env Configs:
|
| 58 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
|
| 59 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - Name Value Type
|
| 60 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - id Pendulum-v1 <class 'str'>
|
| 61 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - render_mode None <class 'str'>
|
| 62 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - wrapper None <class 'str'>
|
| 63 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - ignore_params ['wrapper', 'ignore_params'] <class 'str'>
|
| 64 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - ================================================================================
|
| 65 |
+
2023-06-18 16:39:11 - SimpleLog - INFO: - Start training!
|
| 66 |
+
2023-06-18 16:39:13 - SimpleLog - INFO: - update_step: 100, online_eval_reward: -1380.018
|
| 67 |
+
2023-06-18 16:39:13 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1380.018, save the best model!
|
| 68 |
+
2023-06-18 16:39:13 - SimpleLog - INFO: - Interactor 0 finished episode 1 with reward -1247.240 in 200 steps
|
| 69 |
+
2023-06-18 16:39:13 - SimpleLog - INFO: - Interactor 1 finished episode 2 with reward -1239.670 in 200 steps
|
| 70 |
+
2023-06-18 16:39:14 - SimpleLog - INFO: - update_step: 200, online_eval_reward: -1594.462
|
| 71 |
+
2023-06-18 16:39:15 - SimpleLog - INFO: - update_step: 300, online_eval_reward: -1575.933
|
| 72 |
+
2023-06-18 16:39:15 - SimpleLog - INFO: - Interactor 0 finished episode 3 with reward -1465.696 in 200 steps
|
| 73 |
+
2023-06-18 16:39:15 - SimpleLog - INFO: - Interactor 1 finished episode 4 with reward -1538.397 in 200 steps
|
| 74 |
+
2023-06-18 16:39:16 - SimpleLog - INFO: - update_step: 400, online_eval_reward: -1550.200
|
| 75 |
+
2023-06-18 16:39:17 - SimpleLog - INFO: - update_step: 500, online_eval_reward: -1517.290
|
| 76 |
+
2023-06-18 16:39:18 - SimpleLog - INFO: - Interactor 0 finished episode 5 with reward -1516.613 in 200 steps
|
| 77 |
+
2023-06-18 16:39:18 - SimpleLog - INFO: - Interactor 1 finished episode 6 with reward -1697.405 in 200 steps
|
| 78 |
+
2023-06-18 16:39:18 - SimpleLog - INFO: - update_step: 600, online_eval_reward: -1474.664
|
| 79 |
+
2023-06-18 16:39:19 - SimpleLog - INFO: - update_step: 700, online_eval_reward: -1480.853
|
| 80 |
+
2023-06-18 16:39:20 - SimpleLog - INFO: - Interactor 0 finished episode 7 with reward -1479.039 in 200 steps
|
| 81 |
+
2023-06-18 16:39:20 - SimpleLog - INFO: - Interactor 1 finished episode 8 with reward -1621.124 in 200 steps
|
| 82 |
+
2023-06-18 16:39:21 - SimpleLog - INFO: - update_step: 800, online_eval_reward: -1444.541
|
| 83 |
+
2023-06-18 16:39:22 - SimpleLog - INFO: - update_step: 900, online_eval_reward: -1432.778
|
| 84 |
+
2023-06-18 16:39:22 - SimpleLog - INFO: - Interactor 0 finished episode 9 with reward -1375.822 in 200 steps
|
| 85 |
+
2023-06-18 16:39:22 - SimpleLog - INFO: - Interactor 1 finished episode 10 with reward -1548.226 in 200 steps
|
| 86 |
+
2023-06-18 16:39:23 - SimpleLog - INFO: - update_step: 1000, online_eval_reward: -1379.687
|
| 87 |
+
2023-06-18 16:39:23 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1379.687, save the best model!
|
| 88 |
+
2023-06-18 16:39:24 - SimpleLog - INFO: - update_step: 1100, online_eval_reward: -1365.993
|
| 89 |
+
2023-06-18 16:39:24 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1365.993, save the best model!
|
| 90 |
+
2023-06-18 16:39:25 - SimpleLog - INFO: - Interactor 0 finished episode 11 with reward -1417.818 in 200 steps
|
| 91 |
+
2023-06-18 16:39:25 - SimpleLog - INFO: - Interactor 1 finished episode 12 with reward -1484.637 in 200 steps
|
| 92 |
+
2023-06-18 16:39:26 - SimpleLog - INFO: - update_step: 1200, online_eval_reward: -1289.023
|
| 93 |
+
2023-06-18 16:39:26 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1289.023, save the best model!
|
| 94 |
+
2023-06-18 16:39:27 - SimpleLog - INFO: - update_step: 1300, online_eval_reward: -1274.835
|
| 95 |
+
2023-06-18 16:39:27 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1274.835, save the best model!
|
| 96 |
+
2023-06-18 16:39:27 - SimpleLog - INFO: - Interactor 0 finished episode 13 with reward -1299.286 in 200 steps
|
| 97 |
+
2023-06-18 16:39:27 - SimpleLog - INFO: - Interactor 1 finished episode 14 with reward -1410.494 in 200 steps
|
| 98 |
+
2023-06-18 16:39:28 - SimpleLog - INFO: - update_step: 1400, online_eval_reward: -1234.458
|
| 99 |
+
2023-06-18 16:39:28 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1234.458, save the best model!
|
| 100 |
+
2023-06-18 16:39:29 - SimpleLog - INFO: - update_step: 1500, online_eval_reward: -1167.915
|
| 101 |
+
2023-06-18 16:39:29 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1167.915, save the best model!
|
| 102 |
+
2023-06-18 16:39:30 - SimpleLog - INFO: - Interactor 0 finished episode 15 with reward -1114.071 in 200 steps
|
| 103 |
+
2023-06-18 16:39:30 - SimpleLog - INFO: - Interactor 1 finished episode 16 with reward -1412.817 in 200 steps
|
| 104 |
+
2023-06-18 16:39:30 - SimpleLog - INFO: - update_step: 1600, online_eval_reward: -1164.202
|
| 105 |
+
2023-06-18 16:39:30 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1164.202, save the best model!
|
| 106 |
+
2023-06-18 16:39:32 - SimpleLog - INFO: - update_step: 1700, online_eval_reward: -1135.373
|
| 107 |
+
2023-06-18 16:39:32 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1135.373, save the best model!
|
| 108 |
+
2023-06-18 16:39:32 - SimpleLog - INFO: - Interactor 0 finished episode 17 with reward -1036.607 in 200 steps
|
| 109 |
+
2023-06-18 16:39:32 - SimpleLog - INFO: - Interactor 1 finished episode 18 with reward -1192.079 in 200 steps
|
| 110 |
+
2023-06-18 16:39:33 - SimpleLog - INFO: - update_step: 1800, online_eval_reward: -1136.757
|
| 111 |
+
2023-06-18 16:39:34 - SimpleLog - INFO: - update_step: 1900, online_eval_reward: -1096.014
|
| 112 |
+
2023-06-18 16:39:34 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1096.014, save the best model!
|
| 113 |
+
2023-06-18 16:39:35 - SimpleLog - INFO: - Interactor 0 finished episode 19 with reward -1018.449 in 200 steps
|
| 114 |
+
2023-06-18 16:39:35 - SimpleLog - INFO: - Interactor 1 finished episode 20 with reward -1162.681 in 200 steps
|
| 115 |
+
2023-06-18 16:39:35 - SimpleLog - INFO: - update_step: 2000, online_eval_reward: -1065.251
|
| 116 |
+
2023-06-18 16:39:35 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -1065.251, save the best model!
|
| 117 |
+
2023-06-18 16:39:37 - SimpleLog - INFO: - update_step: 2100, online_eval_reward: -1124.240
|
| 118 |
+
2023-06-18 16:39:37 - SimpleLog - INFO: - Interactor 0 finished episode 21 with reward -971.930 in 200 steps
|
| 119 |
+
2023-06-18 16:39:37 - SimpleLog - INFO: - Interactor 1 finished episode 22 with reward -1129.776 in 200 steps
|
| 120 |
+
2023-06-18 16:39:38 - SimpleLog - INFO: - update_step: 2200, online_eval_reward: -880.723
|
| 121 |
+
2023-06-18 16:39:38 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -880.723, save the best model!
|
| 122 |
+
2023-06-18 16:39:39 - SimpleLog - INFO: - update_step: 2300, online_eval_reward: -993.409
|
| 123 |
+
2023-06-18 16:39:40 - SimpleLog - INFO: - Interactor 0 finished episode 23 with reward -940.286 in 200 steps
|
| 124 |
+
2023-06-18 16:39:40 - SimpleLog - INFO: - Interactor 1 finished episode 24 with reward -1006.670 in 200 steps
|
| 125 |
+
2023-06-18 16:39:40 - SimpleLog - INFO: - update_step: 2400, online_eval_reward: -874.374
|
| 126 |
+
2023-06-18 16:39:40 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -874.374, save the best model!
|
| 127 |
+
2023-06-18 16:39:42 - SimpleLog - INFO: - update_step: 2500, online_eval_reward: -1023.463
|
| 128 |
+
2023-06-18 16:39:42 - SimpleLog - INFO: - Interactor 0 finished episode 25 with reward -854.571 in 200 steps
|
| 129 |
+
2023-06-18 16:39:42 - SimpleLog - INFO: - Interactor 1 finished episode 26 with reward -995.997 in 200 steps
|
| 130 |
+
2023-06-18 16:39:43 - SimpleLog - INFO: - update_step: 2600, online_eval_reward: -970.227
|
| 131 |
+
2023-06-18 16:39:44 - SimpleLog - INFO: - update_step: 2700, online_eval_reward: -970.407
|
| 132 |
+
2023-06-18 16:39:45 - SimpleLog - INFO: - Interactor 0 finished episode 27 with reward -891.371 in 200 steps
|
| 133 |
+
2023-06-18 16:39:45 - SimpleLog - INFO: - Interactor 1 finished episode 28 with reward -884.375 in 200 steps
|
| 134 |
+
2023-06-18 16:39:45 - SimpleLog - INFO: - update_step: 2800, online_eval_reward: -988.688
|
| 135 |
+
2023-06-18 16:39:47 - SimpleLog - INFO: - update_step: 2900, online_eval_reward: -844.578
|
| 136 |
+
2023-06-18 16:39:47 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -844.578, save the best model!
|
| 137 |
+
2023-06-18 16:39:47 - SimpleLog - INFO: - Interactor 0 finished episode 29 with reward -874.244 in 200 steps
|
| 138 |
+
2023-06-18 16:39:47 - SimpleLog - INFO: - Interactor 1 finished episode 30 with reward -950.054 in 200 steps
|
| 139 |
+
2023-06-18 16:39:48 - SimpleLog - INFO: - update_step: 3000, online_eval_reward: -931.215
|
| 140 |
+
2023-06-18 16:39:49 - SimpleLog - INFO: - update_step: 3100, online_eval_reward: -969.424
|
| 141 |
+
2023-06-18 16:39:50 - SimpleLog - INFO: - Interactor 0 finished episode 31 with reward -761.690 in 200 steps
|
| 142 |
+
2023-06-18 16:39:50 - SimpleLog - INFO: - Interactor 1 finished episode 32 with reward -810.092 in 200 steps
|
| 143 |
+
2023-06-18 16:39:50 - SimpleLog - INFO: - update_step: 3200, online_eval_reward: -858.450
|
| 144 |
+
2023-06-18 16:39:52 - SimpleLog - INFO: - update_step: 3300, online_eval_reward: -974.745
|
| 145 |
+
2023-06-18 16:39:52 - SimpleLog - INFO: - Interactor 0 finished episode 33 with reward -720.616 in 200 steps
|
| 146 |
+
2023-06-18 16:39:52 - SimpleLog - INFO: - Interactor 1 finished episode 34 with reward -763.590 in 200 steps
|
| 147 |
+
2023-06-18 16:39:53 - SimpleLog - INFO: - update_step: 3400, online_eval_reward: -865.729
|
| 148 |
+
2023-06-18 16:39:54 - SimpleLog - INFO: - update_step: 3500, online_eval_reward: -703.698
|
| 149 |
+
2023-06-18 16:39:54 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -703.698, save the best model!
|
| 150 |
+
2023-06-18 16:39:55 - SimpleLog - INFO: - Interactor 0 finished episode 35 with reward -697.547 in 200 steps
|
| 151 |
+
2023-06-18 16:39:55 - SimpleLog - INFO: - Interactor 1 finished episode 36 with reward -750.590 in 200 steps
|
| 152 |
+
2023-06-18 16:39:55 - SimpleLog - INFO: - update_step: 3600, online_eval_reward: -863.473
|
| 153 |
+
2023-06-18 16:39:57 - SimpleLog - INFO: - update_step: 3700, online_eval_reward: -859.878
|
| 154 |
+
2023-06-18 16:39:57 - SimpleLog - INFO: - Interactor 0 finished episode 37 with reward -731.446 in 200 steps
|
| 155 |
+
2023-06-18 16:39:57 - SimpleLog - INFO: - Interactor 1 finished episode 38 with reward -874.953 in 200 steps
|
| 156 |
+
2023-06-18 16:39:58 - SimpleLog - INFO: - update_step: 3800, online_eval_reward: -855.164
|
| 157 |
+
2023-06-18 16:39:59 - SimpleLog - INFO: - update_step: 3900, online_eval_reward: -630.874
|
| 158 |
+
2023-06-18 16:39:59 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -630.874, save the best model!
|
| 159 |
+
2023-06-18 16:40:00 - SimpleLog - INFO: - Interactor 0 finished episode 39 with reward -545.310 in 200 steps
|
| 160 |
+
2023-06-18 16:40:00 - SimpleLog - INFO: - Interactor 1 finished episode 40 with reward -751.373 in 200 steps
|
| 161 |
+
2023-06-18 16:40:00 - SimpleLog - INFO: - update_step: 4000, online_eval_reward: -756.478
|
| 162 |
+
2023-06-18 16:40:02 - SimpleLog - INFO: - update_step: 4100, online_eval_reward: -863.402
|
| 163 |
+
2023-06-18 16:40:02 - SimpleLog - INFO: - Interactor 0 finished episode 41 with reward -646.219 in 200 steps
|
| 164 |
+
2023-06-18 16:40:02 - SimpleLog - INFO: - Interactor 1 finished episode 42 with reward -753.741 in 200 steps
|
| 165 |
+
2023-06-18 16:40:03 - SimpleLog - INFO: - update_step: 4200, online_eval_reward: -626.108
|
| 166 |
+
2023-06-18 16:40:03 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -626.108, save the best model!
|
| 167 |
+
2023-06-18 16:40:04 - SimpleLog - INFO: - update_step: 4300, online_eval_reward: -748.564
|
| 168 |
+
2023-06-18 16:40:05 - SimpleLog - INFO: - Interactor 0 finished episode 43 with reward -632.094 in 200 steps
|
| 169 |
+
2023-06-18 16:40:05 - SimpleLog - INFO: - Interactor 1 finished episode 44 with reward -751.567 in 200 steps
|
| 170 |
+
2023-06-18 16:40:05 - SimpleLog - INFO: - update_step: 4400, online_eval_reward: -649.743
|
| 171 |
+
2023-06-18 16:40:06 - SimpleLog - INFO: - update_step: 4500, online_eval_reward: -766.485
|
| 172 |
+
2023-06-18 16:40:07 - SimpleLog - INFO: - Interactor 0 finished episode 45 with reward -627.878 in 200 steps
|
| 173 |
+
2023-06-18 16:40:07 - SimpleLog - INFO: - Interactor 1 finished episode 46 with reward -628.071 in 200 steps
|
| 174 |
+
2023-06-18 16:40:08 - SimpleLog - INFO: - update_step: 4600, online_eval_reward: -781.563
|
| 175 |
+
2023-06-18 16:40:09 - SimpleLog - INFO: - update_step: 4700, online_eval_reward: -628.591
|
| 176 |
+
2023-06-18 16:40:10 - SimpleLog - INFO: - Interactor 0 finished episode 47 with reward -509.268 in 200 steps
|
| 177 |
+
2023-06-18 16:40:10 - SimpleLog - INFO: - Interactor 1 finished episode 48 with reward -377.213 in 200 steps
|
| 178 |
+
2023-06-18 16:40:10 - SimpleLog - INFO: - update_step: 4800, online_eval_reward: -504.638
|
| 179 |
+
2023-06-18 16:40:10 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -504.638, save the best model!
|
| 180 |
+
2023-06-18 16:40:11 - SimpleLog - INFO: - update_step: 4900, online_eval_reward: -530.351
|
| 181 |
+
2023-06-18 16:40:12 - SimpleLog - INFO: - Interactor 0 finished episode 49 with reward -383.350 in 200 steps
|
| 182 |
+
2023-06-18 16:40:12 - SimpleLog - INFO: - Interactor 1 finished episode 50 with reward -501.978 in 200 steps
|
| 183 |
+
2023-06-18 16:40:13 - SimpleLog - INFO: - update_step: 5000, online_eval_reward: -504.956
|
| 184 |
+
2023-06-18 16:40:14 - SimpleLog - INFO: - update_step: 5100, online_eval_reward: -632.072
|
| 185 |
+
2023-06-18 16:40:14 - SimpleLog - INFO: - Interactor 0 finished episode 51 with reward -255.278 in 200 steps
|
| 186 |
+
2023-06-18 16:40:14 - SimpleLog - INFO: - Interactor 1 finished episode 52 with reward -500.879 in 200 steps
|
| 187 |
+
2023-06-18 16:40:15 - SimpleLog - INFO: - update_step: 5200, online_eval_reward: -641.763
|
| 188 |
+
2023-06-18 16:40:16 - SimpleLog - INFO: - update_step: 5300, online_eval_reward: -508.721
|
| 189 |
+
2023-06-18 16:40:17 - SimpleLog - INFO: - Interactor 0 finished episode 53 with reward -257.886 in 200 steps
|
| 190 |
+
2023-06-18 16:40:17 - SimpleLog - INFO: - Interactor 1 finished episode 54 with reward -500.677 in 200 steps
|
| 191 |
+
2023-06-18 16:40:18 - SimpleLog - INFO: - update_step: 5400, online_eval_reward: -624.826
|
| 192 |
+
2023-06-18 16:40:19 - SimpleLog - INFO: - update_step: 5500, online_eval_reward: -504.471
|
| 193 |
+
2023-06-18 16:40:19 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -504.471, save the best model!
|
| 194 |
+
2023-06-18 16:40:19 - SimpleLog - INFO: - Interactor 0 finished episode 55 with reward -259.141 in 200 steps
|
| 195 |
+
2023-06-18 16:40:19 - SimpleLog - INFO: - Interactor 1 finished episode 56 with reward -501.679 in 200 steps
|
| 196 |
+
2023-06-18 16:40:20 - SimpleLog - INFO: - update_step: 5600, online_eval_reward: -381.350
|
| 197 |
+
2023-06-18 16:40:20 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -381.350, save the best model!
|
| 198 |
+
2023-06-18 16:40:21 - SimpleLog - INFO: - update_step: 5700, online_eval_reward: -627.468
|
| 199 |
+
2023-06-18 16:40:22 - SimpleLog - INFO: - Interactor 0 finished episode 57 with reward -384.113 in 200 steps
|
| 200 |
+
2023-06-18 16:40:22 - SimpleLog - INFO: - Interactor 1 finished episode 58 with reward -401.693 in 200 steps
|
| 201 |
+
2023-06-18 16:40:23 - SimpleLog - INFO: - update_step: 5800, online_eval_reward: -627.865
|
| 202 |
+
2023-06-18 16:40:24 - SimpleLog - INFO: - update_step: 5900, online_eval_reward: -524.992
|
| 203 |
+
2023-06-18 16:40:24 - SimpleLog - INFO: - Interactor 0 finished episode 59 with reward -386.282 in 200 steps
|
| 204 |
+
2023-06-18 16:40:24 - SimpleLog - INFO: - Interactor 1 finished episode 60 with reward -377.906 in 200 steps
|
| 205 |
+
2023-06-18 16:40:25 - SimpleLog - INFO: - update_step: 6000, online_eval_reward: -504.850
|
| 206 |
+
2023-06-18 16:40:26 - SimpleLog - INFO: - update_step: 6100, online_eval_reward: -622.662
|
| 207 |
+
2023-06-18 16:40:27 - SimpleLog - INFO: - Interactor 0 finished episode 61 with reward -264.980 in 200 steps
|
| 208 |
+
2023-06-18 16:40:27 - SimpleLog - INFO: - Interactor 1 finished episode 62 with reward -376.024 in 200 steps
|
| 209 |
+
2023-06-18 16:40:27 - SimpleLog - INFO: - update_step: 6200, online_eval_reward: -629.334
|
| 210 |
+
2023-06-18 16:40:29 - SimpleLog - INFO: - update_step: 6300, online_eval_reward: -505.557
|
| 211 |
+
2023-06-18 16:40:29 - SimpleLog - INFO: - Interactor 0 finished episode 63 with reward -258.428 in 200 steps
|
| 212 |
+
2023-06-18 16:40:29 - SimpleLog - INFO: - Interactor 1 finished episode 64 with reward -377.041 in 200 steps
|
| 213 |
+
2023-06-18 16:40:30 - SimpleLog - INFO: - update_step: 6400, online_eval_reward: -506.662
|
| 214 |
+
2023-06-18 16:40:31 - SimpleLog - INFO: - update_step: 6500, online_eval_reward: -504.294
|
| 215 |
+
2023-06-18 16:40:32 - SimpleLog - INFO: - Interactor 0 finished episode 65 with reward -144.788 in 200 steps
|
| 216 |
+
2023-06-18 16:40:32 - SimpleLog - INFO: - Interactor 1 finished episode 66 with reward -250.861 in 200 steps
|
| 217 |
+
2023-06-18 16:40:32 - SimpleLog - INFO: - update_step: 6600, online_eval_reward: -506.432
|
| 218 |
+
2023-06-18 16:40:34 - SimpleLog - INFO: - update_step: 6700, online_eval_reward: -503.358
|
| 219 |
+
2023-06-18 16:40:34 - SimpleLog - INFO: - Interactor 0 finished episode 67 with reward -384.900 in 200 steps
|
| 220 |
+
2023-06-18 16:40:34 - SimpleLog - INFO: - Interactor 1 finished episode 68 with reward -503.686 in 200 steps
|
| 221 |
+
2023-06-18 16:40:35 - SimpleLog - INFO: - update_step: 6800, online_eval_reward: -610.874
|
| 222 |
+
2023-06-18 16:40:36 - SimpleLog - INFO: - update_step: 6900, online_eval_reward: -507.600
|
| 223 |
+
2023-06-18 16:40:37 - SimpleLog - INFO: - Interactor 0 finished episode 69 with reward -6.160 in 200 steps
|
| 224 |
+
2023-06-18 16:40:37 - SimpleLog - INFO: - Interactor 1 finished episode 70 with reward -249.738 in 200 steps
|
| 225 |
+
2023-06-18 16:40:37 - SimpleLog - INFO: - update_step: 7000, online_eval_reward: -516.111
|
| 226 |
+
2023-06-18 16:40:39 - SimpleLog - INFO: - update_step: 7100, online_eval_reward: -504.814
|
| 227 |
+
2023-06-18 16:40:39 - SimpleLog - INFO: - Interactor 0 finished episode 71 with reward -130.383 in 200 steps
|
| 228 |
+
2023-06-18 16:40:39 - SimpleLog - INFO: - Interactor 1 finished episode 72 with reward -248.035 in 200 steps
|
| 229 |
+
2023-06-18 16:40:40 - SimpleLog - INFO: - update_step: 7200, online_eval_reward: -504.119
|
| 230 |
+
2023-06-18 16:40:41 - SimpleLog - INFO: - update_step: 7300, online_eval_reward: -252.920
|
| 231 |
+
2023-06-18 16:40:41 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -252.920, save the best model!
|
| 232 |
+
2023-06-18 16:40:42 - SimpleLog - INFO: - Interactor 0 finished episode 73 with reward -129.843 in 200 steps
|
| 233 |
+
2023-06-18 16:40:42 - SimpleLog - INFO: - Interactor 1 finished episode 74 with reward -123.091 in 200 steps
|
| 234 |
+
2023-06-18 16:40:42 - SimpleLog - INFO: - update_step: 7400, online_eval_reward: -504.046
|
| 235 |
+
2023-06-18 16:40:44 - SimpleLog - INFO: - update_step: 7500, online_eval_reward: -506.457
|
| 236 |
+
2023-06-18 16:40:44 - SimpleLog - INFO: - Interactor 0 finished episode 75 with reward -9.244 in 200 steps
|
| 237 |
+
2023-06-18 16:40:44 - SimpleLog - INFO: - Interactor 1 finished episode 76 with reward -374.890 in 200 steps
|
| 238 |
+
2023-06-18 16:40:45 - SimpleLog - INFO: - update_step: 7600, online_eval_reward: -253.852
|
| 239 |
+
2023-06-18 16:40:46 - SimpleLog - INFO: - update_step: 7700, online_eval_reward: -497.678
|
| 240 |
+
2023-06-18 16:40:47 - SimpleLog - INFO: - Interactor 0 finished episode 77 with reward -257.436 in 200 steps
|
| 241 |
+
2023-06-18 16:40:47 - SimpleLog - INFO: - Interactor 1 finished episode 78 with reward -248.164 in 200 steps
|
| 242 |
+
2023-06-18 16:40:47 - SimpleLog - INFO: - update_step: 7800, online_eval_reward: -518.276
|
| 243 |
+
2023-06-18 16:40:49 - SimpleLog - INFO: - update_step: 7900, online_eval_reward: -496.236
|
| 244 |
+
2023-06-18 16:40:49 - SimpleLog - INFO: - Interactor 0 finished episode 79 with reward -129.262 in 200 steps
|
| 245 |
+
2023-06-18 16:40:49 - SimpleLog - INFO: - Interactor 1 finished episode 80 with reward -378.180 in 200 steps
|
| 246 |
+
2023-06-18 16:40:50 - SimpleLog - INFO: - update_step: 8000, online_eval_reward: -505.930
|
| 247 |
+
2023-06-18 16:40:51 - SimpleLog - INFO: - update_step: 8100, online_eval_reward: -505.368
|
| 248 |
+
2023-06-18 16:40:52 - SimpleLog - INFO: - Interactor 0 finished episode 81 with reward -382.849 in 200 steps
|
| 249 |
+
2023-06-18 16:40:52 - SimpleLog - INFO: - Interactor 1 finished episode 82 with reward -501.592 in 200 steps
|
| 250 |
+
2023-06-18 16:40:52 - SimpleLog - INFO: - update_step: 8200, online_eval_reward: -512.991
|
| 251 |
+
2023-06-18 16:40:54 - SimpleLog - INFO: - update_step: 8300, online_eval_reward: -504.157
|
| 252 |
+
2023-06-18 16:40:54 - SimpleLog - INFO: - Interactor 0 finished episode 83 with reward -289.496 in 200 steps
|
| 253 |
+
2023-06-18 16:40:54 - SimpleLog - INFO: - Interactor 1 finished episode 84 with reward -379.101 in 200 steps
|
| 254 |
+
2023-06-18 16:40:55 - SimpleLog - INFO: - update_step: 8400, online_eval_reward: -471.989
|
| 255 |
+
2023-06-18 16:40:56 - SimpleLog - INFO: - update_step: 8500, online_eval_reward: -378.024
|
| 256 |
+
2023-06-18 16:40:57 - SimpleLog - INFO: - Interactor 0 finished episode 85 with reward -383.413 in 200 steps
|
| 257 |
+
2023-06-18 16:40:57 - SimpleLog - INFO: - Interactor 1 finished episode 86 with reward -377.348 in 200 steps
|
| 258 |
+
2023-06-18 16:40:57 - SimpleLog - INFO: - update_step: 8600, online_eval_reward: -504.045
|
| 259 |
+
2023-06-18 16:40:59 - SimpleLog - INFO: - update_step: 8700, online_eval_reward: -622.723
|
| 260 |
+
2023-06-18 16:40:59 - SimpleLog - INFO: - Interactor 0 finished episode 87 with reward -258.051 in 200 steps
|
| 261 |
+
2023-06-18 16:40:59 - SimpleLog - INFO: - Interactor 1 finished episode 88 with reward -378.418 in 200 steps
|
| 262 |
+
2023-06-18 16:41:00 - SimpleLog - INFO: - update_step: 8800, online_eval_reward: -504.854
|
| 263 |
+
2023-06-18 16:41:01 - SimpleLog - INFO: - update_step: 8900, online_eval_reward: -501.113
|
| 264 |
+
2023-06-18 16:41:02 - SimpleLog - INFO: - Interactor 0 finished episode 89 with reward -265.978 in 200 steps
|
| 265 |
+
2023-06-18 16:41:02 - SimpleLog - INFO: - Interactor 1 finished episode 90 with reward -377.551 in 200 steps
|
| 266 |
+
2023-06-18 16:41:02 - SimpleLog - INFO: - update_step: 9000, online_eval_reward: -627.357
|
| 267 |
+
2023-06-18 16:41:04 - SimpleLog - INFO: - update_step: 9100, online_eval_reward: -378.242
|
| 268 |
+
2023-06-18 16:41:04 - SimpleLog - INFO: - Interactor 0 finished episode 91 with reward -376.627 in 200 steps
|
| 269 |
+
2023-06-18 16:41:04 - SimpleLog - INFO: - Interactor 1 finished episode 92 with reward -379.475 in 200 steps
|
| 270 |
+
2023-06-18 16:41:05 - SimpleLog - INFO: - update_step: 9200, online_eval_reward: -523.777
|
| 271 |
+
2023-06-18 16:41:06 - SimpleLog - INFO: - update_step: 9300, online_eval_reward: -392.154
|
| 272 |
+
2023-06-18 16:41:07 - SimpleLog - INFO: - Interactor 0 finished episode 93 with reward -257.879 in 200 steps
|
| 273 |
+
2023-06-18 16:41:07 - SimpleLog - INFO: - Interactor 1 finished episode 94 with reward -364.066 in 200 steps
|
| 274 |
+
2023-06-18 16:41:07 - SimpleLog - INFO: - update_step: 9400, online_eval_reward: -505.073
|
| 275 |
+
2023-06-18 16:41:09 - SimpleLog - INFO: - update_step: 9500, online_eval_reward: -392.630
|
| 276 |
+
2023-06-18 16:41:09 - SimpleLog - INFO: - Interactor 0 finished episode 95 with reward -256.403 in 200 steps
|
| 277 |
+
2023-06-18 16:41:09 - SimpleLog - INFO: - Interactor 1 finished episode 96 with reward -378.919 in 200 steps
|
| 278 |
+
2023-06-18 16:41:10 - SimpleLog - INFO: - update_step: 9600, online_eval_reward: -611.083
|
| 279 |
+
2023-06-18 16:41:11 - SimpleLog - INFO: - update_step: 9700, online_eval_reward: -575.006
|
| 280 |
+
2023-06-18 16:41:12 - SimpleLog - INFO: - Interactor 0 finished episode 97 with reward -257.988 in 200 steps
|
| 281 |
+
2023-06-18 16:41:12 - SimpleLog - INFO: - Interactor 1 finished episode 98 with reward -375.953 in 200 steps
|
| 282 |
+
2023-06-18 16:41:12 - SimpleLog - INFO: - update_step: 9800, online_eval_reward: -504.909
|
| 283 |
+
2023-06-18 16:41:14 - SimpleLog - INFO: - update_step: 9900, online_eval_reward: -521.546
|
| 284 |
+
2023-06-18 16:41:14 - SimpleLog - INFO: - Interactor 0 finished episode 99 with reward -257.814 in 200 steps
|
| 285 |
+
2023-06-18 16:41:14 - SimpleLog - INFO: - Interactor 1 finished episode 100 with reward -376.679 in 200 steps
|
| 286 |
+
2023-06-18 16:41:15 - SimpleLog - INFO: - update_step: 10000, online_eval_reward: -498.104
|
| 287 |
+
2023-06-18 16:41:16 - SimpleLog - INFO: - update_step: 10100, online_eval_reward: -505.148
|
| 288 |
+
2023-06-18 16:41:17 - SimpleLog - INFO: - Interactor 0 finished episode 101 with reward -383.380 in 200 steps
|
| 289 |
+
2023-06-18 16:41:17 - SimpleLog - INFO: - Interactor 1 finished episode 102 with reward -376.594 in 200 steps
|
| 290 |
+
2023-06-18 16:41:17 - SimpleLog - INFO: - update_step: 10200, online_eval_reward: -502.743
|
| 291 |
+
2023-06-18 16:41:18 - SimpleLog - INFO: - update_step: 10300, online_eval_reward: -495.894
|
| 292 |
+
2023-06-18 16:41:19 - SimpleLog - INFO: - Interactor 0 finished episode 103 with reward -258.215 in 200 steps
|
| 293 |
+
2023-06-18 16:41:19 - SimpleLog - INFO: - Interactor 1 finished episode 104 with reward -252.459 in 200 steps
|
| 294 |
+
2023-06-18 16:41:20 - SimpleLog - INFO: - update_step: 10400, online_eval_reward: -428.521
|
| 295 |
+
2023-06-18 16:41:21 - SimpleLog - INFO: - update_step: 10500, online_eval_reward: -509.165
|
| 296 |
+
2023-06-18 16:41:21 - SimpleLog - INFO: - Interactor 0 finished episode 105 with reward -237.254 in 200 steps
|
| 297 |
+
2023-06-18 16:41:21 - SimpleLog - INFO: - Interactor 1 finished episode 106 with reward -249.686 in 200 steps
|
| 298 |
+
2023-06-18 16:41:22 - SimpleLog - INFO: - update_step: 10600, online_eval_reward: -378.710
|
| 299 |
+
2023-06-18 16:41:23 - SimpleLog - INFO: - update_step: 10700, online_eval_reward: -467.916
|
| 300 |
+
2023-06-18 16:41:24 - SimpleLog - INFO: - Interactor 0 finished episode 107 with reward -380.161 in 200 steps
|
| 301 |
+
2023-06-18 16:41:24 - SimpleLog - INFO: - Interactor 1 finished episode 108 with reward -377.340 in 200 steps
|
| 302 |
+
2023-06-18 16:41:25 - SimpleLog - INFO: - update_step: 10800, online_eval_reward: -504.152
|
| 303 |
+
2023-06-18 16:41:26 - SimpleLog - INFO: - update_step: 10900, online_eval_reward: -616.149
|
| 304 |
+
2023-06-18 16:41:26 - SimpleLog - INFO: - Interactor 0 finished episode 109 with reward -256.526 in 200 steps
|
| 305 |
+
2023-06-18 16:41:26 - SimpleLog - INFO: - Interactor 1 finished episode 110 with reward -250.052 in 200 steps
|
| 306 |
+
2023-06-18 16:41:27 - SimpleLog - INFO: - update_step: 11000, online_eval_reward: -624.551
|
| 307 |
+
2023-06-18 16:41:28 - SimpleLog - INFO: - update_step: 11100, online_eval_reward: -626.679
|
| 308 |
+
2023-06-18 16:41:29 - SimpleLog - INFO: - Interactor 0 finished episode 111 with reward -255.673 in 200 steps
|
| 309 |
+
2023-06-18 16:41:29 - SimpleLog - INFO: - Interactor 1 finished episode 112 with reward -249.592 in 200 steps
|
| 310 |
+
2023-06-18 16:41:30 - SimpleLog - INFO: - update_step: 11200, online_eval_reward: -504.051
|
| 311 |
+
2023-06-18 16:41:31 - SimpleLog - INFO: - update_step: 11300, online_eval_reward: -627.940
|
| 312 |
+
2023-06-18 16:41:31 - SimpleLog - INFO: - Interactor 0 finished episode 113 with reward -257.611 in 200 steps
|
| 313 |
+
2023-06-18 16:41:31 - SimpleLog - INFO: - Interactor 1 finished episode 114 with reward -375.881 in 200 steps
|
| 314 |
+
2023-06-18 16:41:32 - SimpleLog - INFO: - update_step: 11400, online_eval_reward: -603.049
|
| 315 |
+
2023-06-18 16:41:33 - SimpleLog - INFO: - update_step: 11500, online_eval_reward: -471.987
|
| 316 |
+
2023-06-18 16:41:34 - SimpleLog - INFO: - Interactor 0 finished episode 115 with reward -382.501 in 200 steps
|
| 317 |
+
2023-06-18 16:41:34 - SimpleLog - INFO: - Interactor 1 finished episode 116 with reward -252.038 in 200 steps
|
| 318 |
+
2023-06-18 16:41:35 - SimpleLog - INFO: - update_step: 11600, online_eval_reward: -503.790
|
| 319 |
+
2023-06-18 16:41:36 - SimpleLog - INFO: - update_step: 11700, online_eval_reward: -503.284
|
| 320 |
+
2023-06-18 16:41:36 - SimpleLog - INFO: - Interactor 0 finished episode 117 with reward -264.405 in 200 steps
|
| 321 |
+
2023-06-18 16:41:36 - SimpleLog - INFO: - Interactor 1 finished episode 118 with reward -502.513 in 200 steps
|
| 322 |
+
2023-06-18 16:41:37 - SimpleLog - INFO: - update_step: 11800, online_eval_reward: -627.830
|
| 323 |
+
2023-06-18 16:41:38 - SimpleLog - INFO: - update_step: 11900, online_eval_reward: -622.682
|
| 324 |
+
2023-06-18 16:41:39 - SimpleLog - INFO: - Interactor 0 finished episode 119 with reward -344.907 in 200 steps
|
| 325 |
+
2023-06-18 16:41:39 - SimpleLog - INFO: - Interactor 1 finished episode 120 with reward -405.190 in 200 steps
|
| 326 |
+
2023-06-18 16:41:40 - SimpleLog - INFO: - update_step: 12000, online_eval_reward: -503.674
|
| 327 |
+
2023-06-18 16:41:41 - SimpleLog - INFO: - update_step: 12100, online_eval_reward: -628.032
|
| 328 |
+
2023-06-18 16:41:42 - SimpleLog - INFO: - Interactor 0 finished episode 121 with reward -384.215 in 200 steps
|
| 329 |
+
2023-06-18 16:41:42 - SimpleLog - INFO: - Interactor 1 finished episode 122 with reward -432.613 in 200 steps
|
| 330 |
+
2023-06-18 16:41:42 - SimpleLog - INFO: - update_step: 12200, online_eval_reward: -503.886
|
| 331 |
+
2023-06-18 16:41:44 - SimpleLog - INFO: - update_step: 12300, online_eval_reward: -503.432
|
| 332 |
+
2023-06-18 16:41:44 - SimpleLog - INFO: - Interactor 0 finished episode 123 with reward -383.556 in 200 steps
|
| 333 |
+
2023-06-18 16:41:44 - SimpleLog - INFO: - Interactor 1 finished episode 124 with reward -415.292 in 200 steps
|
| 334 |
+
2023-06-18 16:41:45 - SimpleLog - INFO: - update_step: 12400, online_eval_reward: -549.750
|
| 335 |
+
2023-06-18 16:41:46 - SimpleLog - INFO: - update_step: 12500, online_eval_reward: -500.803
|
| 336 |
+
2023-06-18 16:41:47 - SimpleLog - INFO: - Interactor 0 finished episode 125 with reward -384.129 in 200 steps
|
| 337 |
+
2023-06-18 16:41:47 - SimpleLog - INFO: - Interactor 1 finished episode 126 with reward -378.644 in 200 steps
|
| 338 |
+
2023-06-18 16:41:47 - SimpleLog - INFO: - update_step: 12600, online_eval_reward: -614.173
|
| 339 |
+
2023-06-18 16:41:49 - SimpleLog - INFO: - update_step: 12700, online_eval_reward: -504.940
|
| 340 |
+
2023-06-18 16:41:49 - SimpleLog - INFO: - Interactor 0 finished episode 127 with reward -384.349 in 200 steps
|
| 341 |
+
2023-06-18 16:41:49 - SimpleLog - INFO: - Interactor 1 finished episode 128 with reward -378.360 in 200 steps
|
| 342 |
+
2023-06-18 16:41:50 - SimpleLog - INFO: - update_step: 12800, online_eval_reward: -627.797
|
| 343 |
+
2023-06-18 16:41:51 - SimpleLog - INFO: - update_step: 12900, online_eval_reward: -597.391
|
| 344 |
+
2023-06-18 16:41:52 - SimpleLog - INFO: - Interactor 0 finished episode 129 with reward -383.978 in 200 steps
|
| 345 |
+
2023-06-18 16:41:52 - SimpleLog - INFO: - Interactor 1 finished episode 130 with reward -380.662 in 200 steps
|
| 346 |
+
2023-06-18 16:41:52 - SimpleLog - INFO: - update_step: 13000, online_eval_reward: -504.481
|
| 347 |
+
2023-06-18 16:41:54 - SimpleLog - INFO: - update_step: 13100, online_eval_reward: -502.841
|
| 348 |
+
2023-06-18 16:41:54 - SimpleLog - INFO: - Interactor 0 finished episode 131 with reward -383.082 in 200 steps
|
| 349 |
+
2023-06-18 16:41:54 - SimpleLog - INFO: - Interactor 1 finished episode 132 with reward -377.854 in 200 steps
|
| 350 |
+
2023-06-18 16:41:55 - SimpleLog - INFO: - update_step: 13200, online_eval_reward: -606.172
|
| 351 |
+
2023-06-18 16:41:56 - SimpleLog - INFO: - update_step: 13300, online_eval_reward: -627.079
|
| 352 |
+
2023-06-18 16:41:57 - SimpleLog - INFO: - Interactor 0 finished episode 133 with reward -261.498 in 200 steps
|
| 353 |
+
2023-06-18 16:41:57 - SimpleLog - INFO: - Interactor 1 finished episode 134 with reward -378.427 in 200 steps
|
| 354 |
+
2023-06-18 16:41:57 - SimpleLog - INFO: - update_step: 13400, online_eval_reward: -621.482
|
| 355 |
+
2023-06-18 16:41:59 - SimpleLog - INFO: - update_step: 13500, online_eval_reward: -627.802
|
| 356 |
+
2023-06-18 16:41:59 - SimpleLog - INFO: - Interactor 0 finished episode 135 with reward -259.911 in 200 steps
|
| 357 |
+
2023-06-18 16:41:59 - SimpleLog - INFO: - Interactor 1 finished episode 136 with reward -254.313 in 200 steps
|
| 358 |
+
2023-06-18 16:42:00 - SimpleLog - INFO: - update_step: 13600, online_eval_reward: -554.930
|
| 359 |
+
2023-06-18 16:42:01 - SimpleLog - INFO: - update_step: 13700, online_eval_reward: -568.048
|
| 360 |
+
2023-06-18 16:42:02 - SimpleLog - INFO: - Interactor 0 finished episode 137 with reward -292.799 in 200 steps
|
| 361 |
+
2023-06-18 16:42:02 - SimpleLog - INFO: - Interactor 1 finished episode 138 with reward -378.388 in 200 steps
|
| 362 |
+
2023-06-18 16:42:02 - SimpleLog - INFO: - update_step: 13800, online_eval_reward: -536.149
|
| 363 |
+
2023-06-18 16:42:03 - SimpleLog - INFO: - update_step: 13900, online_eval_reward: -506.025
|
| 364 |
+
2023-06-18 16:42:04 - SimpleLog - INFO: - Interactor 0 finished episode 139 with reward -259.192 in 200 steps
|
| 365 |
+
2023-06-18 16:42:04 - SimpleLog - INFO: - Interactor 1 finished episode 140 with reward -378.459 in 200 steps
|
| 366 |
+
2023-06-18 16:42:05 - SimpleLog - INFO: - update_step: 14000, online_eval_reward: -521.703
|
| 367 |
+
2023-06-18 16:42:06 - SimpleLog - INFO: - update_step: 14100, online_eval_reward: -510.447
|
| 368 |
+
2023-06-18 16:42:07 - SimpleLog - INFO: - Interactor 0 finished episode 141 with reward -261.268 in 200 steps
|
| 369 |
+
2023-06-18 16:42:07 - SimpleLog - INFO: - Interactor 1 finished episode 142 with reward -378.171 in 200 steps
|
| 370 |
+
2023-06-18 16:42:07 - SimpleLog - INFO: - update_step: 14200, online_eval_reward: -522.750
|
| 371 |
+
2023-06-18 16:42:09 - SimpleLog - INFO: - update_step: 14300, online_eval_reward: -499.859
|
| 372 |
+
2023-06-18 16:42:09 - SimpleLog - INFO: - Interactor 0 finished episode 143 with reward -258.966 in 200 steps
|
| 373 |
+
2023-06-18 16:42:09 - SimpleLog - INFO: - Interactor 1 finished episode 144 with reward -377.578 in 200 steps
|
| 374 |
+
2023-06-18 16:42:10 - SimpleLog - INFO: - update_step: 14400, online_eval_reward: -503.152
|
| 375 |
+
2023-06-18 16:42:11 - SimpleLog - INFO: - update_step: 14500, online_eval_reward: -504.464
|
| 376 |
+
2023-06-18 16:42:12 - SimpleLog - INFO: - Interactor 0 finished episode 145 with reward -259.010 in 200 steps
|
| 377 |
+
2023-06-18 16:42:12 - SimpleLog - INFO: - Interactor 1 finished episode 146 with reward -378.754 in 200 steps
|
| 378 |
+
2023-06-18 16:42:12 - SimpleLog - INFO: - update_step: 14600, online_eval_reward: -407.771
|
| 379 |
+
2023-06-18 16:42:13 - SimpleLog - INFO: - update_step: 14700, online_eval_reward: -504.594
|
| 380 |
+
2023-06-18 16:42:14 - SimpleLog - INFO: - Interactor 0 finished episode 147 with reward -265.800 in 200 steps
|
| 381 |
+
2023-06-18 16:42:14 - SimpleLog - INFO: - Interactor 1 finished episode 148 with reward -379.020 in 200 steps
|
| 382 |
+
2023-06-18 16:42:15 - SimpleLog - INFO: - update_step: 14800, online_eval_reward: -521.266
|
| 383 |
+
2023-06-18 16:42:16 - SimpleLog - INFO: - update_step: 14900, online_eval_reward: -504.800
|
| 384 |
+
2023-06-18 16:42:17 - SimpleLog - INFO: - Interactor 0 finished episode 149 with reward -259.717 in 200 steps
|
| 385 |
+
2023-06-18 16:42:17 - SimpleLog - INFO: - Interactor 1 finished episode 150 with reward -371.442 in 200 steps
|
| 386 |
+
2023-06-18 16:42:17 - SimpleLog - INFO: - update_step: 15000, online_eval_reward: -557.607
|
| 387 |
+
2023-06-18 16:42:18 - SimpleLog - INFO: - update_step: 15100, online_eval_reward: -503.430
|
| 388 |
+
2023-06-18 16:42:19 - SimpleLog - INFO: - Interactor 0 finished episode 151 with reward -259.884 in 200 steps
|
| 389 |
+
2023-06-18 16:42:19 - SimpleLog - INFO: - Interactor 1 finished episode 152 with reward -378.490 in 200 steps
|
| 390 |
+
2023-06-18 16:42:20 - SimpleLog - INFO: - update_step: 15200, online_eval_reward: -503.907
|
| 391 |
+
2023-06-18 16:42:21 - SimpleLog - INFO: - update_step: 15300, online_eval_reward: -504.686
|
| 392 |
+
2023-06-18 16:42:22 - SimpleLog - INFO: - Interactor 0 finished episode 153 with reward -137.131 in 200 steps
|
| 393 |
+
2023-06-18 16:42:22 - SimpleLog - INFO: - Interactor 1 finished episode 154 with reward -256.148 in 200 steps
|
| 394 |
+
2023-06-18 16:42:22 - SimpleLog - INFO: - update_step: 15400, online_eval_reward: -503.950
|
| 395 |
+
2023-06-18 16:42:23 - SimpleLog - INFO: - update_step: 15500, online_eval_reward: -377.807
|
| 396 |
+
2023-06-18 16:42:24 - SimpleLog - INFO: - Interactor 0 finished episode 155 with reward -182.937 in 200 steps
|
| 397 |
+
2023-06-18 16:42:24 - SimpleLog - INFO: - Interactor 1 finished episode 156 with reward -228.954 in 200 steps
|
| 398 |
+
2023-06-18 16:42:25 - SimpleLog - INFO: - update_step: 15600, online_eval_reward: -633.895
|
| 399 |
+
2023-06-18 16:42:26 - SimpleLog - INFO: - update_step: 15700, online_eval_reward: -504.156
|
| 400 |
+
2023-06-18 16:42:27 - SimpleLog - INFO: - Interactor 0 finished episode 157 with reward -135.201 in 200 steps
|
| 401 |
+
2023-06-18 16:42:27 - SimpleLog - INFO: - Interactor 1 finished episode 158 with reward -127.702 in 200 steps
|
| 402 |
+
2023-06-18 16:42:27 - SimpleLog - INFO: - update_step: 15800, online_eval_reward: -504.442
|
| 403 |
+
2023-06-18 16:42:28 - SimpleLog - INFO: - update_step: 15900, online_eval_reward: -378.320
|
| 404 |
+
2023-06-18 16:42:29 - SimpleLog - INFO: - Interactor 0 finished episode 159 with reward -140.012 in 200 steps
|
| 405 |
+
2023-06-18 16:42:29 - SimpleLog - INFO: - Interactor 1 finished episode 160 with reward -254.871 in 200 steps
|
| 406 |
+
2023-06-18 16:42:30 - SimpleLog - INFO: - update_step: 16000, online_eval_reward: -758.459
|
| 407 |
+
2023-06-18 16:42:31 - SimpleLog - INFO: - update_step: 16100, online_eval_reward: -528.790
|
| 408 |
+
2023-06-18 16:42:32 - SimpleLog - INFO: - Interactor 0 finished episode 161 with reward -260.244 in 200 steps
|
| 409 |
+
2023-06-18 16:42:32 - SimpleLog - INFO: - Interactor 1 finished episode 162 with reward -378.559 in 200 steps
|
| 410 |
+
2023-06-18 16:42:32 - SimpleLog - INFO: - update_step: 16200, online_eval_reward: -512.799
|
| 411 |
+
2023-06-18 16:42:33 - SimpleLog - INFO: - update_step: 16300, online_eval_reward: -500.630
|
| 412 |
+
2023-06-18 16:42:34 - SimpleLog - INFO: - Interactor 0 finished episode 163 with reward -135.817 in 200 steps
|
| 413 |
+
2023-06-18 16:42:34 - SimpleLog - INFO: - Interactor 1 finished episode 164 with reward -260.366 in 200 steps
|
| 414 |
+
2023-06-18 16:42:35 - SimpleLog - INFO: - update_step: 16400, online_eval_reward: -514.297
|
| 415 |
+
2023-06-18 16:42:36 - SimpleLog - INFO: - update_step: 16500, online_eval_reward: -504.048
|
| 416 |
+
2023-06-18 16:42:37 - SimpleLog - INFO: - Interactor 0 finished episode 165 with reward -13.423 in 200 steps
|
| 417 |
+
2023-06-18 16:42:37 - SimpleLog - INFO: - Interactor 1 finished episode 166 with reward -132.449 in 200 steps
|
| 418 |
+
2023-06-18 16:42:37 - SimpleLog - INFO: - update_step: 16600, online_eval_reward: -504.361
|
| 419 |
+
2023-06-18 16:42:39 - SimpleLog - INFO: - update_step: 16700, online_eval_reward: -503.206
|
| 420 |
+
2023-06-18 16:42:39 - SimpleLog - INFO: - Interactor 0 finished episode 167 with reward -139.457 in 200 steps
|
| 421 |
+
2023-06-18 16:42:39 - SimpleLog - INFO: - Interactor 1 finished episode 168 with reward -374.078 in 200 steps
|
| 422 |
+
2023-06-18 16:42:40 - SimpleLog - INFO: - update_step: 16800, online_eval_reward: -453.025
|
| 423 |
+
2023-06-18 16:42:41 - SimpleLog - INFO: - update_step: 16900, online_eval_reward: -377.225
|
| 424 |
+
2023-06-18 16:42:42 - SimpleLog - INFO: - Interactor 0 finished episode 169 with reward -277.210 in 200 steps
|
| 425 |
+
2023-06-18 16:42:42 - SimpleLog - INFO: - Interactor 1 finished episode 170 with reward -252.749 in 200 steps
|
| 426 |
+
2023-06-18 16:42:42 - SimpleLog - INFO: - update_step: 17000, online_eval_reward: -494.961
|
| 427 |
+
2023-06-18 16:42:44 - SimpleLog - INFO: - update_step: 17100, online_eval_reward: -628.388
|
| 428 |
+
2023-06-18 16:42:44 - SimpleLog - INFO: - Interactor 0 finished episode 171 with reward -186.962 in 200 steps
|
| 429 |
+
2023-06-18 16:42:44 - SimpleLog - INFO: - Interactor 1 finished episode 172 with reward -279.426 in 200 steps
|
| 430 |
+
2023-06-18 16:42:45 - SimpleLog - INFO: - update_step: 17200, online_eval_reward: -403.423
|
| 431 |
+
2023-06-18 16:42:46 - SimpleLog - INFO: - update_step: 17300, online_eval_reward: -377.556
|
| 432 |
+
2023-06-18 16:42:47 - SimpleLog - INFO: - Interactor 0 finished episode 173 with reward -129.997 in 200 steps
|
| 433 |
+
2023-06-18 16:42:47 - SimpleLog - INFO: - Interactor 1 finished episode 174 with reward -249.593 in 200 steps
|
| 434 |
+
2023-06-18 16:42:47 - SimpleLog - INFO: - update_step: 17400, online_eval_reward: -125.928
|
| 435 |
+
2023-06-18 16:42:47 - SimpleLog - INFO: - current update step obtain a better online_eval_reward: -125.928, save the best model!
|
| 436 |
+
2023-06-18 16:42:49 - SimpleLog - INFO: - update_step: 17500, online_eval_reward: -252.264
|
| 437 |
+
2023-06-18 16:42:49 - SimpleLog - INFO: - Interactor 0 finished episode 175 with reward -333.461 in 200 steps
|
| 438 |
+
2023-06-18 16:42:49 - SimpleLog - INFO: - Interactor 1 finished episode 176 with reward -380.684 in 200 steps
|
| 439 |
+
2023-06-18 16:42:50 - SimpleLog - INFO: - update_step: 17600, online_eval_reward: -377.967
|
| 440 |
+
2023-06-18 16:42:51 - SimpleLog - INFO: - update_step: 17700, online_eval_reward: -502.527
|
| 441 |
+
2023-06-18 16:42:52 - SimpleLog - INFO: - Interactor 0 finished episode 177 with reward -382.695 in 200 steps
|
| 442 |
+
2023-06-18 16:42:52 - SimpleLog - INFO: - Interactor 1 finished episode 178 with reward -377.542 in 200 steps
|
| 443 |
+
2023-06-18 16:42:52 - SimpleLog - INFO: - update_step: 17800, online_eval_reward: -503.022
|
| 444 |
+
2023-06-18 16:42:54 - SimpleLog - INFO: - update_step: 17900, online_eval_reward: -509.103
|
| 445 |
+
2023-06-18 16:42:54 - SimpleLog - INFO: - Interactor 0 finished episode 179 with reward -382.172 in 200 steps
|
| 446 |
+
2023-06-18 16:42:54 - SimpleLog - INFO: - Interactor 1 finished episode 180 with reward -500.410 in 200 steps
|
| 447 |
+
2023-06-18 16:42:55 - SimpleLog - INFO: - update_step: 18000, online_eval_reward: -518.620
|
| 448 |
+
2023-06-18 16:42:56 - SimpleLog - INFO: - update_step: 18100, online_eval_reward: -502.824
|
| 449 |
+
2023-06-18 16:42:57 - SimpleLog - INFO: - Interactor 0 finished episode 181 with reward -381.368 in 200 steps
|
| 450 |
+
2023-06-18 16:42:57 - SimpleLog - INFO: - Interactor 1 finished episode 182 with reward -500.005 in 200 steps
|
| 451 |
+
2023-06-18 16:42:58 - SimpleLog - INFO: - update_step: 18200, online_eval_reward: -748.451
|
| 452 |
+
2023-06-18 16:42:59 - SimpleLog - INFO: - update_step: 18300, online_eval_reward: -519.708
|
| 453 |
+
2023-06-18 16:42:59 - SimpleLog - INFO: - Interactor 0 finished episode 183 with reward -382.336 in 200 steps
|
| 454 |
+
2023-06-18 16:42:59 - SimpleLog - INFO: - Interactor 1 finished episode 184 with reward -377.370 in 200 steps
|
| 455 |
+
2023-06-18 16:43:00 - SimpleLog - INFO: - update_step: 18400, online_eval_reward: -503.253
|
| 456 |
+
2023-06-18 16:43:01 - SimpleLog - INFO: - update_step: 18500, online_eval_reward: -504.352
|
| 457 |
+
2023-06-18 16:43:02 - SimpleLog - INFO: - Interactor 0 finished episode 185 with reward -381.589 in 200 steps
|
| 458 |
+
2023-06-18 16:43:02 - SimpleLog - INFO: - Interactor 1 finished episode 186 with reward -498.094 in 200 steps
|
| 459 |
+
2023-06-18 16:43:03 - SimpleLog - INFO: - update_step: 18600, online_eval_reward: -621.553
|
| 460 |
+
2023-06-18 16:43:04 - SimpleLog - INFO: - update_step: 18700, online_eval_reward: -503.396
|
| 461 |
+
2023-06-18 16:43:04 - SimpleLog - INFO: - Interactor 0 finished episode 187 with reward -381.901 in 200 steps
|
| 462 |
+
2023-06-18 16:43:04 - SimpleLog - INFO: - Interactor 1 finished episode 188 with reward -500.203 in 200 steps
|
| 463 |
+
2023-06-18 16:43:05 - SimpleLog - INFO: - update_step: 18800, online_eval_reward: -378.566
|
| 464 |
+
2023-06-18 16:43:06 - SimpleLog - INFO: - update_step: 18900, online_eval_reward: -503.793
|
| 465 |
+
2023-06-18 16:43:07 - SimpleLog - INFO: - Interactor 0 finished episode 189 with reward -381.694 in 200 steps
|
| 466 |
+
2023-06-18 16:43:07 - SimpleLog - INFO: - Interactor 1 finished episode 190 with reward -500.428 in 200 steps
|
| 467 |
+
2023-06-18 16:43:08 - SimpleLog - INFO: - update_step: 19000, online_eval_reward: -502.545
|
| 468 |
+
2023-06-18 16:43:09 - SimpleLog - INFO: - update_step: 19100, online_eval_reward: -501.932
|
| 469 |
+
2023-06-18 16:43:10 - SimpleLog - INFO: - Interactor 0 finished episode 191 with reward -268.149 in 200 steps
|
| 470 |
+
2023-06-18 16:43:10 - SimpleLog - INFO: - Interactor 1 finished episode 192 with reward -376.541 in 200 steps
|
| 471 |
+
2023-06-18 16:43:10 - SimpleLog - INFO: - update_step: 19200, online_eval_reward: -558.227
|
| 472 |
+
2023-06-18 16:43:12 - SimpleLog - INFO: - update_step: 19300, online_eval_reward: -517.582
|
| 473 |
+
2023-06-18 16:43:12 - SimpleLog - INFO: - Interactor 0 finished episode 193 with reward -259.277 in 200 steps
|
| 474 |
+
2023-06-18 16:43:12 - SimpleLog - INFO: - Interactor 1 finished episode 194 with reward -377.712 in 200 steps
|
| 475 |
+
2023-06-18 16:43:13 - SimpleLog - INFO: - update_step: 19400, online_eval_reward: -500.636
|
| 476 |
+
2023-06-18 16:43:14 - SimpleLog - INFO: - update_step: 19500, online_eval_reward: -377.581
|
| 477 |
+
2023-06-18 16:43:15 - SimpleLog - INFO: - Interactor 0 finished episode 195 with reward -258.805 in 200 steps
|
| 478 |
+
2023-06-18 16:43:15 - SimpleLog - INFO: - Interactor 1 finished episode 196 with reward -374.762 in 200 steps
|
| 479 |
+
2023-06-18 16:43:15 - SimpleLog - INFO: - update_step: 19600, online_eval_reward: -125.934
|
| 480 |
+
2023-06-18 16:43:17 - SimpleLog - INFO: - update_step: 19700, online_eval_reward: -502.898
|
| 481 |
+
2023-06-18 16:43:17 - SimpleLog - INFO: - Interactor 0 finished episode 197 with reward -134.881 in 200 steps
|
| 482 |
+
2023-06-18 16:43:17 - SimpleLog - INFO: - Interactor 1 finished episode 198 with reward -274.250 in 200 steps
|
| 483 |
+
2023-06-18 16:43:18 - SimpleLog - INFO: - update_step: 19800, online_eval_reward: -377.947
|
| 484 |
+
2023-06-18 16:43:19 - SimpleLog - INFO: - update_step: 19900, online_eval_reward: -380.967
|
| 485 |
+
2023-06-18 16:43:20 - SimpleLog - INFO: - Interactor 0 finished episode 199 with reward -131.490 in 200 steps
|
| 486 |
+
2023-06-18 16:43:20 - SimpleLog - INFO: - Interactor 1 finished episode 200 with reward -254.782 in 200 steps
|
| 487 |
+
2023-06-18 16:43:20 - SimpleLog - INFO: - Finish training! Time cost: 248.411 s
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/100
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6607973c1581ba4c3b8799d4376360708a60cd801b603549605dde4ac4d26aaa
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f343139d2775d5e39cc655b23e115f6180e6624cc12df75622fb1425a82a709
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7053581566e0070608f66e30f3b1a2806ff44a347e49bea615c53fb71fa80b7b
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10100
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:744e25f444f803c9287d9e4a1778778f566112f62a8730f705f72525c72e1647
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10200
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64688b841083f83a8b2c3998d3e3402f2cf928babf50cb4b4c8be1be6ee7c644
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10300
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bc1df6de020679ff0466c501b4dcbd3c3522f6a25b75d0bb4dc2b1a19f41dd7
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10800
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b938f5f4cb442cddc47aa55d7fceef27a593f8b7d3b60bea04cf9da6a3234a8d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/10900
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c645b696b549ff06072a9dc08e1478faee681db9dc566d69ca2a83f62475750c
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1100
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61ced2e7040447f293adafcb38f7ea7903cc9ece300709f70f440c115842fa99
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:beb1075600544d7b6feb3fdd4703c3ea6db3f69724260eda201ccad904cc6009
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11100
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14bed910238b9f159851d3bbc21a3fc9ddc23af43406ea918fd64c2907d2e265
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/11900
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20252803de7591bea432335559dc71a1b7ada510e680176edb733bc6dbbd582d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1200
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c889b18b770bd8a886aa129a1628ed420ea454dfc2704787276cc937d4098d66
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12500
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68aa800bba3c2802c5637067f57c9cca77e04058b5da96d25a7331ba49682b1b
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12700
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:424478d6a4b86e391c98ac9b137586649aa95cabf0fac752ec1ee8685a3a3819
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/12800
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:371ed90e0af2ee87bbfa8fee59da5efcea081f075249dfab81ae409724eeded2
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1300
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bd3d70aa912612a5d055e6502f9cf009599074ac0b841ba71a3455c6d1f127d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c0f609feb4677dfbe68264cbb61c49a87b8386b030969a6e4af1289e662a60c
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13800
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0ddd9fd9d8a6bf55e63cc0cfb012aaaccc5848146d72c1f2c1c3ab8552823e9
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/13900
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c499fcd51c0f094d6a0e3b2887bcaa98ce0cac2e923a83a28af23340d84b9d2d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1400
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e4c9dc95391267a927149672e1728991bd99e985939a2435138f7cc6c166d00
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f180ddc8d15f5412da6ee16d3de69f74befb87170ad2ceac2fcee4dcc5596276
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/14900
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c08c221756645d0de274659d2435f31946a4b7a24a0b355e2354b9022f7c09c
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1500
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a943247b25e7fbc314d3fd8e0ae37fcbcfb87c4696f32a6d66f8eebdab073756
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:408f74f6096fea01d95ae21f6c6c21dd785dded9e3bbaa8d4d0734a0ba3b5225
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/15900
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75f63f5c2abcca6135f121b189e30ead7a57f61d5b3812693a3bb49c59c46eb2
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1600
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d88a0f2d2a8778ac14e22b53f2e0d89802ad96f184afddac590b35e42234816
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a19455607f9eeb21f47874bc3f21010616e1b9c6b55d545f7e02ec889fc43830
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/16700
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fc08b72260f458bea0645814fe58029ea5c6bc8152a3bfcc6340660b9b0871d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1700
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c4ce31ef505b8fe1bc4901f88563a581cbbe7589646aade15f98ac611452a3d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17000
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d3c01b0b87bcf3ef87a2d3004ba9ce4c435f92260c7497311a710fbc5fa9c8d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/17900
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9ef7679e4048b9bd2fad9d46e5eec731287f7417f728a50e82de637df645800
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/1800
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d96b95eda647674a386e958056398a06d6e0914151fd5be14db66038e67eb42d
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18400
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16c5a09e7119bf7018a03c3ce02aa73f594d77f3c11c57231bd8ab793466a7c5
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18700
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df6c639431e9ffab0dc399adcc3a0efeacc59fc1623884e6e785d0660c164787
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18800
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:963ab3a1615a535fdce86220bf3dfb9a876e6b53fe8c0f66557a8f3dabc7d89f
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/models/18900
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d765380a45d7704c4945a56bee144bd25693374b6b715f17610fc45e14304547
|
| 3 |
+
size 1356936
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/interact/events.out.tfevents.1687077551.ML3090.330549.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2fdc28d315e8a1ee8ff541b15b2faddec4cb3cc990850353dbc27d6aea5558d
|
| 3 |
+
size 19786
|
ClassControl/Pendulum-v1/Train_Pendulum-v1_SAC_20230618-163911/tb_logs/policy/events.out.tfevents.1687077551.ML3090.330549.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fdd019c428a762b1ca0b2a815d9daec2aa8b2682774e3c3236ef73182f1485b
|
| 3 |
+
size 5561148
|