diff --git "a/experiment/rl2_trainer_1/debug.log" "b/experiment/rl2_trainer_1/debug.log" new file mode 100644--- /dev/null +++ "b/experiment/rl2_trainer_1/debug.log" @@ -0,0 +1,11102 @@ +2025-03-29 19:33:17 | [rl2_trainer] Logging to /home/h2khalil/MetaRL-Assistive-Robotics/data/local/experiment/rl2_trainer_1 +2025-03-29 19:33:19 | [rl2_trainer] Obtaining samples... +2025-03-29 19:33:35 | [rl2_trainer] epoch #0 | Optimizing policy... +2025-03-29 19:33:36 | [rl2_trainer] epoch #0 | Fitting baseline... +2025-03-29 19:33:36 | [rl2_trainer] epoch #0 | Computing loss before +2025-03-29 19:33:37 | [rl2_trainer] epoch #0 | Computing KL before +2025-03-29 19:33:37 | [rl2_trainer] epoch #0 | Optimizing +2025-03-29 19:33:39 | [rl2_trainer] epoch #0 | Computing KL after +2025-03-29 19:33:39 | [rl2_trainer] epoch #0 | Computing loss after +2025-03-29 19:33:39 | [rl2_trainer] epoch #0 | Saving snapshot... +2025-03-29 19:33:39 | [rl2_trainer] epoch #0 | Saved +2025-03-29 19:33:39 | [rl2_trainer] epoch #0 | Time 20.65 s +2025-03-29 19:33:39 | [rl2_trainer] epoch #0 | EpochTime 20.65 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -41.4442 +Average/AverageReturn -71.3862 +Average/Iteration 0 +Average/MaxReturn -16.7725 +Average/MinReturn -108.309 +Average/NumEpisodes 8 +Average/StdReturn 31.4488 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.896469 +TotalEnvSteps 800 +__unnamed_task__/AverageDiscountedReturn -41.4442 +__unnamed_task__/AverageReturn -71.3862 +__unnamed_task__/Iteration 0 +__unnamed_task__/MaxReturn -16.7725 +__unnamed_task__/MinReturn -108.309 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.4488 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.94244 +policy/KL 0.0244176 +policy/KLBefore 0 +policy/LossAfter -0.302634 +policy/LossBefore 0.0249686 +policy/dLoss 0.327602 +---------------------------------------- ------------ +2025-03-29 19:33:55 | [rl2_trainer] epoch #1 | Optimizing policy... +2025-03-29 19:33:55 | [rl2_trainer] epoch #1 | Fitting baseline... +2025-03-29 19:33:55 | [rl2_trainer] epoch #1 | Computing loss before +2025-03-29 19:33:55 | [rl2_trainer] epoch #1 | Computing KL before +2025-03-29 19:33:55 | [rl2_trainer] epoch #1 | Optimizing +2025-03-29 19:33:57 | [rl2_trainer] epoch #1 | Computing KL after +2025-03-29 19:33:58 | [rl2_trainer] epoch #1 | Computing loss after +2025-03-29 19:33:58 | [rl2_trainer] epoch #1 | Saving snapshot... +2025-03-29 19:33:58 | [rl2_trainer] epoch #1 | Saved +2025-03-29 19:33:58 | [rl2_trainer] epoch #1 | Time 39.06 s +2025-03-29 19:33:58 | [rl2_trainer] epoch #1 | EpochTime 18.41 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -52.5487 +Average/AverageReturn -90.4144 +Average/Iteration 1 +Average/MaxReturn -38.3642 +Average/MinReturn -131.562 +Average/NumEpisodes 8 +Average/StdReturn 31.2359 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973452 +TotalEnvSteps 1600 +__unnamed_task__/AverageDiscountedReturn -52.5487 +__unnamed_task__/AverageReturn -90.4144 +__unnamed_task__/Iteration 1 +__unnamed_task__/MaxReturn -38.3642 +__unnamed_task__/MinReturn -131.562 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.2359 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.92806 +policy/KL 0.0251213 +policy/KLBefore 0 +policy/LossAfter -0.210536 +policy/LossBefore -0.00227562 +policy/dLoss 0.208261 +---------------------------------------- ------------- +2025-03-29 19:34:14 | [rl2_trainer] epoch #2 | Optimizing policy... +2025-03-29 19:34:14 | [rl2_trainer] epoch #2 | Fitting baseline... +2025-03-29 19:34:14 | [rl2_trainer] epoch #2 | Computing loss before +2025-03-29 19:34:14 | [rl2_trainer] epoch #2 | Computing KL before +2025-03-29 19:34:14 | [rl2_trainer] epoch #2 | Optimizing +2025-03-29 19:34:16 | [rl2_trainer] epoch #2 | Computing KL after +2025-03-29 19:34:16 | [rl2_trainer] epoch #2 | Computing loss after +2025-03-29 19:34:17 | [rl2_trainer] epoch #2 | Saving snapshot... +2025-03-29 19:34:17 | [rl2_trainer] epoch #2 | Saved +2025-03-29 19:34:17 | [rl2_trainer] epoch #2 | Time 58.01 s +2025-03-29 19:34:17 | [rl2_trainer] epoch #2 | EpochTime 18.94 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -46.3526 +Average/AverageReturn -75.1802 +Average/Iteration 2 +Average/MaxReturn -51.546 +Average/MinReturn -93.9813 +Average/NumEpisodes 8 +Average/StdReturn 13.7671 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977091 +TotalEnvSteps 2400 +__unnamed_task__/AverageDiscountedReturn -46.3526 +__unnamed_task__/AverageReturn -75.1802 +__unnamed_task__/Iteration 2 +__unnamed_task__/MaxReturn -51.546 +__unnamed_task__/MinReturn -93.9813 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 13.7671 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.90228 +policy/KL 0.0213384 +policy/KLBefore 0 +policy/LossAfter -0.098975 +policy/LossBefore -0.00942025 +policy/dLoss 0.0895547 +---------------------------------------- ------------- +2025-03-29 19:34:33 | [rl2_trainer] epoch #3 | Optimizing policy... +2025-03-29 19:34:33 | [rl2_trainer] epoch #3 | Fitting baseline... +2025-03-29 19:34:33 | [rl2_trainer] epoch #3 | Computing loss before +2025-03-29 19:34:33 | [rl2_trainer] epoch #3 | Computing KL before +2025-03-29 19:34:33 | [rl2_trainer] epoch #3 | Optimizing +2025-03-29 19:34:35 | [rl2_trainer] epoch #3 | Computing KL after +2025-03-29 19:34:35 | [rl2_trainer] epoch #3 | Computing loss after +2025-03-29 19:34:35 | [rl2_trainer] epoch #3 | Saving snapshot... +2025-03-29 19:34:35 | [rl2_trainer] epoch #3 | Saved +2025-03-29 19:34:35 | [rl2_trainer] epoch #3 | Time 76.94 s +2025-03-29 19:34:35 | [rl2_trainer] epoch #3 | EpochTime 18.93 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -47.665 +Average/AverageReturn -76.5853 +Average/Iteration 3 +Average/MaxReturn -41.4119 +Average/MinReturn -121.156 +Average/NumEpisodes 8 +Average/StdReturn 22.7081 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982242 +TotalEnvSteps 3200 +__unnamed_task__/AverageDiscountedReturn -47.665 +__unnamed_task__/AverageReturn -76.5853 +__unnamed_task__/Iteration 3 +__unnamed_task__/MaxReturn -41.4119 +__unnamed_task__/MinReturn -121.156 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.7081 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.88722 +policy/KL 0.0184509 +policy/KLBefore 0 +policy/LossAfter -0.118195 +policy/LossBefore -0.0122706 +policy/dLoss 0.105924 +---------------------------------------- ------------ +2025-03-29 19:34:52 | [rl2_trainer] epoch #4 | Optimizing policy... +2025-03-29 19:34:52 | [rl2_trainer] epoch #4 | Fitting baseline... +2025-03-29 19:34:52 | [rl2_trainer] epoch #4 | Computing loss before +2025-03-29 19:34:52 | [rl2_trainer] epoch #4 | Computing KL before +2025-03-29 19:34:52 | [rl2_trainer] epoch #4 | Optimizing +2025-03-29 19:34:54 | [rl2_trainer] epoch #4 | Computing KL after +2025-03-29 19:34:54 | [rl2_trainer] epoch #4 | Computing loss after +2025-03-29 19:34:54 | [rl2_trainer] epoch #4 | Saving snapshot... +2025-03-29 19:34:54 | [rl2_trainer] epoch #4 | Saved +2025-03-29 19:34:54 | [rl2_trainer] epoch #4 | Time 95.77 s +2025-03-29 19:34:54 | [rl2_trainer] epoch #4 | EpochTime 18.83 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -49.8648 +Average/AverageReturn -80.0712 +Average/Iteration 4 +Average/MaxReturn -49.3289 +Average/MinReturn -103.197 +Average/NumEpisodes 8 +Average/StdReturn 15.7916 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983962 +TotalEnvSteps 4000 +__unnamed_task__/AverageDiscountedReturn -49.8648 +__unnamed_task__/AverageReturn -80.0712 +__unnamed_task__/Iteration 4 +__unnamed_task__/MaxReturn -49.3289 +__unnamed_task__/MinReturn -103.197 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.7916 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.87185 +policy/KL 0.0315954 +policy/KLBefore 0 +policy/LossAfter -0.117455 +policy/LossBefore 0.00228841 +policy/dLoss 0.119743 +---------------------------------------- ------------- +2025-03-29 19:35:11 | [rl2_trainer] epoch #5 | Optimizing policy... +2025-03-29 19:35:11 | [rl2_trainer] epoch #5 | Fitting baseline... +2025-03-29 19:35:11 | [rl2_trainer] epoch #5 | Computing loss before +2025-03-29 19:35:11 | [rl2_trainer] epoch #5 | Computing KL before +2025-03-29 19:35:11 | [rl2_trainer] epoch #5 | Optimizing +2025-03-29 19:35:13 | [rl2_trainer] epoch #5 | Computing KL after +2025-03-29 19:35:13 | [rl2_trainer] epoch #5 | Computing loss after +2025-03-29 19:35:13 | [rl2_trainer] epoch #5 | Saving snapshot... +2025-03-29 19:35:13 | [rl2_trainer] epoch #5 | Saved +2025-03-29 19:35:13 | [rl2_trainer] epoch #5 | Time 114.46 s +2025-03-29 19:35:13 | [rl2_trainer] epoch #5 | EpochTime 18.69 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -54.1155 +Average/AverageReturn -86.3302 +Average/Iteration 5 +Average/MaxReturn -57.4572 +Average/MinReturn -116.939 +Average/NumEpisodes 8 +Average/StdReturn 23.291 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.989644 +TotalEnvSteps 4800 +__unnamed_task__/AverageDiscountedReturn -54.1155 +__unnamed_task__/AverageReturn -86.3302 +__unnamed_task__/Iteration 5 +__unnamed_task__/MaxReturn -57.4572 +__unnamed_task__/MinReturn -116.939 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.291 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.84609 +policy/KL 0.0304598 +policy/KLBefore 0 +policy/LossAfter -0.124477 +policy/LossBefore -0.00731533 +policy/dLoss 0.117162 +---------------------------------------- ------------- +2025-03-29 19:35:29 | [rl2_trainer] epoch #6 | Optimizing policy... +2025-03-29 19:35:29 | [rl2_trainer] epoch #6 | Fitting baseline... +2025-03-29 19:35:29 | [rl2_trainer] epoch #6 | Computing loss before +2025-03-29 19:35:29 | [rl2_trainer] epoch #6 | Computing KL before +2025-03-29 19:35:29 | [rl2_trainer] epoch #6 | Optimizing +2025-03-29 19:35:31 | [rl2_trainer] epoch #6 | Computing KL after +2025-03-29 19:35:31 | [rl2_trainer] epoch #6 | Computing loss after +2025-03-29 19:35:31 | [rl2_trainer] epoch #6 | Saving snapshot... +2025-03-29 19:35:31 | [rl2_trainer] epoch #6 | Saved +2025-03-29 19:35:31 | [rl2_trainer] epoch #6 | Time 132.78 s +2025-03-29 19:35:31 | [rl2_trainer] epoch #6 | EpochTime 18.32 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -60.3839 +Average/AverageReturn -99.8585 +Average/Iteration 6 +Average/MaxReturn -73.5325 +Average/MinReturn -180.454 +Average/NumEpisodes 8 +Average/StdReturn 32.6683 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.980398 +TotalEnvSteps 5600 +__unnamed_task__/AverageDiscountedReturn -60.3839 +__unnamed_task__/AverageReturn -99.8585 +__unnamed_task__/Iteration 6 +__unnamed_task__/MaxReturn -73.5325 +__unnamed_task__/MinReturn -180.454 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.6683 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.82654 +policy/KL 0.0244589 +policy/KLBefore 0 +policy/LossAfter -0.221365 +policy/LossBefore 0.00539336 +policy/dLoss 0.226759 +---------------------------------------- ------------- +2025-03-29 19:35:47 | [rl2_trainer] epoch #7 | Optimizing policy... +2025-03-29 19:35:47 | [rl2_trainer] epoch #7 | Fitting baseline... +2025-03-29 19:35:47 | [rl2_trainer] epoch #7 | Computing loss before +2025-03-29 19:35:47 | [rl2_trainer] epoch #7 | Computing KL before +2025-03-29 19:35:47 | [rl2_trainer] epoch #7 | Optimizing +2025-03-29 19:35:49 | [rl2_trainer] epoch #7 | Computing KL after +2025-03-29 19:35:49 | [rl2_trainer] epoch #7 | Computing loss after +2025-03-29 19:35:49 | [rl2_trainer] epoch #7 | Saving snapshot... +2025-03-29 19:35:49 | [rl2_trainer] epoch #7 | Saved +2025-03-29 19:35:49 | [rl2_trainer] epoch #7 | Time 150.81 s +2025-03-29 19:35:49 | [rl2_trainer] epoch #7 | EpochTime 18.02 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -62.8757 +Average/AverageReturn -104.122 +Average/Iteration 7 +Average/MaxReturn -78.1878 +Average/MinReturn -163.345 +Average/NumEpisodes 8 +Average/StdReturn 30.5239 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.950811 +TotalEnvSteps 6400 +__unnamed_task__/AverageDiscountedReturn -62.8757 +__unnamed_task__/AverageReturn -104.122 +__unnamed_task__/Iteration 7 +__unnamed_task__/MaxReturn -78.1878 +__unnamed_task__/MinReturn -163.345 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.5239 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.81839 +policy/KL 0.0341305 +policy/KLBefore 0 +policy/LossAfter -0.327741 +policy/LossBefore -0.0241593 +policy/dLoss 0.303582 +---------------------------------------- ------------ +2025-03-29 19:36:04 | [rl2_trainer] epoch #8 | Optimizing policy... +2025-03-29 19:36:04 | [rl2_trainer] epoch #8 | Fitting baseline... +2025-03-29 19:36:04 | [rl2_trainer] epoch #8 | Computing loss before +2025-03-29 19:36:05 | [rl2_trainer] epoch #8 | Computing KL before +2025-03-29 19:36:05 | [rl2_trainer] epoch #8 | Optimizing +2025-03-29 19:36:07 | [rl2_trainer] epoch #8 | Computing KL after +2025-03-29 19:36:07 | [rl2_trainer] epoch #8 | Computing loss after +2025-03-29 19:36:07 | [rl2_trainer] epoch #8 | Saving snapshot... +2025-03-29 19:36:07 | [rl2_trainer] epoch #8 | Saved +2025-03-29 19:36:07 | [rl2_trainer] epoch #8 | Time 168.33 s +2025-03-29 19:36:07 | [rl2_trainer] epoch #8 | EpochTime 17.52 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -70.1955 +Average/AverageReturn -122.218 +Average/Iteration 8 +Average/MaxReturn -72.7834 +Average/MinReturn -163.033 +Average/NumEpisodes 8 +Average/StdReturn 33.9286 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978826 +TotalEnvSteps 7200 +__unnamed_task__/AverageDiscountedReturn -70.1955 +__unnamed_task__/AverageReturn -122.218 +__unnamed_task__/Iteration 8 +__unnamed_task__/MaxReturn -72.7834 +__unnamed_task__/MinReturn -163.033 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.9286 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.80169 +policy/KL 0.0219162 +policy/KLBefore 0 +policy/LossAfter -0.266887 +policy/LossBefore -0.0171129 +policy/dLoss 0.249774 +---------------------------------------- ------------ +2025-03-29 19:36:22 | [rl2_trainer] epoch #9 | Optimizing policy... +2025-03-29 19:36:22 | [rl2_trainer] epoch #9 | Fitting baseline... +2025-03-29 19:36:22 | [rl2_trainer] epoch #9 | Computing loss before +2025-03-29 19:36:22 | [rl2_trainer] epoch #9 | Computing KL before +2025-03-29 19:36:22 | [rl2_trainer] epoch #9 | Optimizing +2025-03-29 19:36:24 | [rl2_trainer] epoch #9 | Computing KL after +2025-03-29 19:36:24 | [rl2_trainer] epoch #9 | Computing loss after +2025-03-29 19:36:24 | [rl2_trainer] epoch #9 | Saving snapshot... +2025-03-29 19:36:24 | [rl2_trainer] epoch #9 | Saved +2025-03-29 19:36:24 | [rl2_trainer] epoch #9 | Time 185.76 s +2025-03-29 19:36:24 | [rl2_trainer] epoch #9 | EpochTime 17.43 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -70.0735 +Average/AverageReturn -118.4 +Average/Iteration 9 +Average/MaxReturn -76.4715 +Average/MinReturn -173.494 +Average/NumEpisodes 8 +Average/StdReturn 31.9665 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.987875 +TotalEnvSteps 8000 +__unnamed_task__/AverageDiscountedReturn -70.0735 +__unnamed_task__/AverageReturn -118.4 +__unnamed_task__/Iteration 9 +__unnamed_task__/MaxReturn -76.4715 +__unnamed_task__/MinReturn -173.494 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.9665 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.79745 +policy/KL 0.0239106 +policy/KLBefore 0 +policy/LossAfter -0.180257 +policy/LossBefore -0.0110449 +policy/dLoss 0.169212 +---------------------------------------- ------------ +2025-03-29 19:36:40 | [rl2_trainer] epoch #10 | Optimizing policy... +2025-03-29 19:36:40 | [rl2_trainer] epoch #10 | Fitting baseline... +2025-03-29 19:36:40 | [rl2_trainer] epoch #10 | Computing loss before +2025-03-29 19:36:40 | [rl2_trainer] epoch #10 | Computing KL before +2025-03-29 19:36:40 | [rl2_trainer] epoch #10 | Optimizing +2025-03-29 19:36:42 | [rl2_trainer] epoch #10 | Computing KL after +2025-03-29 19:36:42 | [rl2_trainer] epoch #10 | Computing loss after +2025-03-29 19:36:42 | [rl2_trainer] epoch #10 | Saving snapshot... +2025-03-29 19:36:42 | [rl2_trainer] epoch #10 | Saved +2025-03-29 19:36:42 | [rl2_trainer] epoch #10 | Time 203.80 s +2025-03-29 19:36:42 | [rl2_trainer] epoch #10 | EpochTime 18.04 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -50.4287 +Average/AverageReturn -95.1916 +Average/Iteration 10 +Average/MaxReturn 57.7707 +Average/MinReturn -157.479 +Average/NumEpisodes 8 +Average/StdReturn 64.2695 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972474 +TotalEnvSteps 8800 +__unnamed_task__/AverageDiscountedReturn -50.4287 +__unnamed_task__/AverageReturn -95.1916 +__unnamed_task__/Iteration 10 +__unnamed_task__/MaxReturn 57.7707 +__unnamed_task__/MinReturn -157.479 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 64.2695 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.78005 +policy/KL 0.0321199 +policy/KLBefore 0 +policy/LossAfter -0.435791 +policy/LossBefore 0.00723328 +policy/dLoss 0.443024 +---------------------------------------- ------------- +2025-03-29 19:36:58 | [rl2_trainer] epoch #11 | Optimizing policy... +2025-03-29 19:36:58 | [rl2_trainer] epoch #11 | Fitting baseline... +2025-03-29 19:36:58 | [rl2_trainer] epoch #11 | Computing loss before +2025-03-29 19:36:58 | [rl2_trainer] epoch #11 | Computing KL before +2025-03-29 19:36:58 | [rl2_trainer] epoch #11 | Optimizing +2025-03-29 19:37:00 | [rl2_trainer] epoch #11 | Computing KL after +2025-03-29 19:37:00 | [rl2_trainer] epoch #11 | Computing loss after +2025-03-29 19:37:00 | [rl2_trainer] epoch #11 | Saving snapshot... +2025-03-29 19:37:00 | [rl2_trainer] epoch #11 | Saved +2025-03-29 19:37:00 | [rl2_trainer] epoch #11 | Time 221.77 s +2025-03-29 19:37:00 | [rl2_trainer] epoch #11 | EpochTime 17.97 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -70.5067 +Average/AverageReturn -120.16 +Average/Iteration 11 +Average/MaxReturn -71.7796 +Average/MinReturn -157.098 +Average/NumEpisodes 8 +Average/StdReturn 25.9381 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957843 +TotalEnvSteps 9600 +__unnamed_task__/AverageDiscountedReturn -70.5067 +__unnamed_task__/AverageReturn -120.16 +__unnamed_task__/Iteration 11 +__unnamed_task__/MaxReturn -71.7796 +__unnamed_task__/MinReturn -157.098 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.9381 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.74662 +policy/KL 0.0202122 +policy/KLBefore 0 +policy/LossAfter -0.328189 +policy/LossBefore -0.0346258 +policy/dLoss 0.293563 +---------------------------------------- ------------ +2025-03-29 19:37:17 | [rl2_trainer] epoch #12 | Optimizing policy... +2025-03-29 19:37:17 | [rl2_trainer] epoch #12 | Fitting baseline... +2025-03-29 19:37:17 | [rl2_trainer] epoch #12 | Computing loss before +2025-03-29 19:37:17 | [rl2_trainer] epoch #12 | Computing KL before +2025-03-29 19:37:17 | [rl2_trainer] epoch #12 | Optimizing +2025-03-29 19:37:19 | [rl2_trainer] epoch #12 | Computing KL after +2025-03-29 19:37:19 | [rl2_trainer] epoch #12 | Computing loss after +2025-03-29 19:37:19 | [rl2_trainer] epoch #12 | Saving snapshot... +2025-03-29 19:37:19 | [rl2_trainer] epoch #12 | Saved +2025-03-29 19:37:19 | [rl2_trainer] epoch #12 | Time 240.61 s +2025-03-29 19:37:19 | [rl2_trainer] epoch #12 | EpochTime 18.84 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -55.4215 +Average/AverageReturn -92.4522 +Average/Iteration 12 +Average/MaxReturn -54.6153 +Average/MinReturn -138.427 +Average/NumEpisodes 8 +Average/StdReturn 24.3934 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.965383 +TotalEnvSteps 10400 +__unnamed_task__/AverageDiscountedReturn -55.4215 +__unnamed_task__/AverageReturn -92.4522 +__unnamed_task__/Iteration 12 +__unnamed_task__/MaxReturn -54.6153 +__unnamed_task__/MinReturn -138.427 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.3934 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.73408 +policy/KL 0.0179175 +policy/KLBefore 0 +policy/LossAfter -0.122155 +policy/LossBefore 0.004577 +policy/dLoss 0.126732 +---------------------------------------- ------------- +2025-03-29 19:37:36 | [rl2_trainer] epoch #13 | Optimizing policy... +2025-03-29 19:37:36 | [rl2_trainer] epoch #13 | Fitting baseline... +2025-03-29 19:37:36 | [rl2_trainer] epoch #13 | Computing loss before +2025-03-29 19:37:36 | [rl2_trainer] epoch #13 | Computing KL before +2025-03-29 19:37:36 | [rl2_trainer] epoch #13 | Optimizing +2025-03-29 19:37:38 | [rl2_trainer] epoch #13 | Computing KL after +2025-03-29 19:37:38 | [rl2_trainer] epoch #13 | Computing loss after +2025-03-29 19:37:38 | [rl2_trainer] epoch #13 | Saving snapshot... +2025-03-29 19:37:38 | [rl2_trainer] epoch #13 | Saved +2025-03-29 19:37:38 | [rl2_trainer] epoch #13 | Time 259.46 s +2025-03-29 19:37:38 | [rl2_trainer] epoch #13 | EpochTime 18.84 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -68.1888 +Average/AverageReturn -116.869 +Average/Iteration 13 +Average/MaxReturn -71.6081 +Average/MinReturn -167.132 +Average/NumEpisodes 8 +Average/StdReturn 31.3695 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.952159 +TotalEnvSteps 11200 +__unnamed_task__/AverageDiscountedReturn -68.1888 +__unnamed_task__/AverageReturn -116.869 +__unnamed_task__/Iteration 13 +__unnamed_task__/MaxReturn -71.6081 +__unnamed_task__/MinReturn -167.132 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.3695 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.72006 +policy/KL 0.0211753 +policy/KLBefore 0 +policy/LossAfter -0.288696 +policy/LossBefore -0.0184699 +policy/dLoss 0.270226 +---------------------------------------- ------------- +2025-03-29 19:37:54 | [rl2_trainer] epoch #14 | Optimizing policy... +2025-03-29 19:37:54 | [rl2_trainer] epoch #14 | Fitting baseline... +2025-03-29 19:37:54 | [rl2_trainer] epoch #14 | Computing loss before +2025-03-29 19:37:54 | [rl2_trainer] epoch #14 | Computing KL before +2025-03-29 19:37:54 | [rl2_trainer] epoch #14 | Optimizing +2025-03-29 19:37:56 | [rl2_trainer] epoch #14 | Computing KL after +2025-03-29 19:37:56 | [rl2_trainer] epoch #14 | Computing loss after +2025-03-29 19:37:56 | [rl2_trainer] epoch #14 | Saving snapshot... +2025-03-29 19:37:56 | [rl2_trainer] epoch #14 | Saved +2025-03-29 19:37:56 | [rl2_trainer] epoch #14 | Time 277.45 s +2025-03-29 19:37:56 | [rl2_trainer] epoch #14 | EpochTime 17.99 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -60.0162 +Average/AverageReturn -102.27 +Average/Iteration 14 +Average/MaxReturn -76.7003 +Average/MinReturn -145.761 +Average/NumEpisodes 8 +Average/StdReturn 24.6531 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.959209 +TotalEnvSteps 12000 +__unnamed_task__/AverageDiscountedReturn -60.0162 +__unnamed_task__/AverageReturn -102.27 +__unnamed_task__/Iteration 14 +__unnamed_task__/MaxReturn -76.7003 +__unnamed_task__/MinReturn -145.761 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.6531 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.70584 +policy/KL 0.0177512 +policy/KLBefore 0 +policy/LossAfter -0.161264 +policy/LossBefore 0.0103171 +policy/dLoss 0.171581 +---------------------------------------- ------------- +2025-03-29 19:38:11 | [rl2_trainer] epoch #15 | Optimizing policy... +2025-03-29 19:38:11 | [rl2_trainer] epoch #15 | Fitting baseline... +2025-03-29 19:38:11 | [rl2_trainer] epoch #15 | Computing loss before +2025-03-29 19:38:11 | [rl2_trainer] epoch #15 | Computing KL before +2025-03-29 19:38:11 | [rl2_trainer] epoch #15 | Optimizing +2025-03-29 19:38:14 | [rl2_trainer] epoch #15 | Computing KL after +2025-03-29 19:38:14 | [rl2_trainer] epoch #15 | Computing loss after +2025-03-29 19:38:14 | [rl2_trainer] epoch #15 | Saving snapshot... +2025-03-29 19:38:14 | [rl2_trainer] epoch #15 | Saved +2025-03-29 19:38:14 | [rl2_trainer] epoch #15 | Time 295.09 s +2025-03-29 19:38:14 | [rl2_trainer] epoch #15 | EpochTime 17.64 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -74.3648 +Average/AverageReturn -126.297 +Average/Iteration 15 +Average/MaxReturn -81.3865 +Average/MinReturn -187.159 +Average/NumEpisodes 8 +Average/StdReturn 30.3555 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.963702 +TotalEnvSteps 12800 +__unnamed_task__/AverageDiscountedReturn -74.3648 +__unnamed_task__/AverageReturn -126.297 +__unnamed_task__/Iteration 15 +__unnamed_task__/MaxReturn -81.3865 +__unnamed_task__/MinReturn -187.159 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.3555 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.70755 +policy/KL 0.0299831 +policy/KLBefore 0 +policy/LossAfter -0.444048 +policy/LossBefore -0.00572574 +policy/dLoss 0.438322 +---------------------------------------- -------------- +2025-03-29 19:38:30 | [rl2_trainer] epoch #16 | Optimizing policy... +2025-03-29 19:38:30 | [rl2_trainer] epoch #16 | Fitting baseline... +2025-03-29 19:38:30 | [rl2_trainer] epoch #16 | Computing loss before +2025-03-29 19:38:30 | [rl2_trainer] epoch #16 | Computing KL before +2025-03-29 19:38:30 | [rl2_trainer] epoch #16 | Optimizing +2025-03-29 19:38:32 | [rl2_trainer] epoch #16 | Computing KL after +2025-03-29 19:38:32 | [rl2_trainer] epoch #16 | Computing loss after +2025-03-29 19:38:32 | [rl2_trainer] epoch #16 | Saving snapshot... +2025-03-29 19:38:32 | [rl2_trainer] epoch #16 | Saved +2025-03-29 19:38:32 | [rl2_trainer] epoch #16 | Time 313.63 s +2025-03-29 19:38:32 | [rl2_trainer] epoch #16 | EpochTime 18.54 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -61.4395 +Average/AverageReturn -105.086 +Average/Iteration 16 +Average/MaxReturn -61.2063 +Average/MinReturn -152.792 +Average/NumEpisodes 8 +Average/StdReturn 36.9207 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968189 +TotalEnvSteps 13600 +__unnamed_task__/AverageDiscountedReturn -61.4395 +__unnamed_task__/AverageReturn -105.086 +__unnamed_task__/Iteration 16 +__unnamed_task__/MaxReturn -61.2063 +__unnamed_task__/MinReturn -152.792 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.9207 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.70808 +policy/KL 0.0266718 +policy/KLBefore 0 +policy/LossAfter -0.340539 +policy/LossBefore -0.00680853 +policy/dLoss 0.333731 +---------------------------------------- -------------- +2025-03-29 19:38:48 | [rl2_trainer] epoch #17 | Optimizing policy... +2025-03-29 19:38:48 | [rl2_trainer] epoch #17 | Fitting baseline... +2025-03-29 19:38:48 | [rl2_trainer] epoch #17 | Computing loss before +2025-03-29 19:38:48 | [rl2_trainer] epoch #17 | Computing KL before +2025-03-29 19:38:48 | [rl2_trainer] epoch #17 | Optimizing +2025-03-29 19:38:51 | [rl2_trainer] epoch #17 | Computing KL after +2025-03-29 19:38:51 | [rl2_trainer] epoch #17 | Computing loss after +2025-03-29 19:38:51 | [rl2_trainer] epoch #17 | Saving snapshot... +2025-03-29 19:38:51 | [rl2_trainer] epoch #17 | Saved +2025-03-29 19:38:51 | [rl2_trainer] epoch #17 | Time 332.12 s +2025-03-29 19:38:51 | [rl2_trainer] epoch #17 | EpochTime 18.48 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -69.9454 +Average/AverageReturn -125.937 +Average/Iteration 17 +Average/MaxReturn -73.0946 +Average/MinReturn -176.304 +Average/NumEpisodes 8 +Average/StdReturn 44.4534 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.976737 +TotalEnvSteps 14400 +__unnamed_task__/AverageDiscountedReturn -69.9454 +__unnamed_task__/AverageReturn -125.937 +__unnamed_task__/Iteration 17 +__unnamed_task__/MaxReturn -73.0946 +__unnamed_task__/MinReturn -176.304 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 44.4534 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.67361 +policy/KL 0.0166696 +policy/KLBefore 0 +policy/LossAfter -0.309524 +policy/LossBefore 0.0317624 +policy/dLoss 0.341287 +---------------------------------------- ------------- +2025-03-29 19:39:08 | [rl2_trainer] epoch #18 | Optimizing policy... +2025-03-29 19:39:08 | [rl2_trainer] epoch #18 | Fitting baseline... +2025-03-29 19:39:08 | [rl2_trainer] epoch #18 | Computing loss before +2025-03-29 19:39:08 | [rl2_trainer] epoch #18 | Computing KL before +2025-03-29 19:39:08 | [rl2_trainer] epoch #18 | Optimizing +2025-03-29 19:39:10 | [rl2_trainer] epoch #18 | Computing KL after +2025-03-29 19:39:10 | [rl2_trainer] epoch #18 | Computing loss after +2025-03-29 19:39:10 | [rl2_trainer] epoch #18 | Saving snapshot... +2025-03-29 19:39:10 | [rl2_trainer] epoch #18 | Saved +2025-03-29 19:39:10 | [rl2_trainer] epoch #18 | Time 351.56 s +2025-03-29 19:39:10 | [rl2_trainer] epoch #18 | EpochTime 19.44 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -64.6 +Average/AverageReturn -113.191 +Average/Iteration 18 +Average/MaxReturn -67.3444 +Average/MinReturn -181.918 +Average/NumEpisodes 8 +Average/StdReturn 42.177 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96976 +TotalEnvSteps 15200 +__unnamed_task__/AverageDiscountedReturn -64.6 +__unnamed_task__/AverageReturn -113.191 +__unnamed_task__/Iteration 18 +__unnamed_task__/MaxReturn -67.3444 +__unnamed_task__/MinReturn -181.918 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 42.177 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.64839 +policy/KL 0.030733 +policy/KLBefore 0 +policy/LossAfter -0.303578 +policy/LossBefore 0.0608052 +policy/dLoss 0.364383 +---------------------------------------- ------------- +2025-03-29 19:39:27 | [rl2_trainer] epoch #19 | Optimizing policy... +2025-03-29 19:39:27 | [rl2_trainer] epoch #19 | Fitting baseline... +2025-03-29 19:39:27 | [rl2_trainer] epoch #19 | Computing loss before +2025-03-29 19:39:27 | [rl2_trainer] epoch #19 | Computing KL before +2025-03-29 19:39:28 | [rl2_trainer] epoch #19 | Optimizing +2025-03-29 19:39:30 | [rl2_trainer] epoch #19 | Computing KL after +2025-03-29 19:39:30 | [rl2_trainer] epoch #19 | Computing loss after +2025-03-29 19:39:30 | [rl2_trainer] epoch #19 | Saving snapshot... +2025-03-29 19:39:30 | [rl2_trainer] epoch #19 | Saved +2025-03-29 19:39:30 | [rl2_trainer] epoch #19 | Time 371.31 s +2025-03-29 19:39:30 | [rl2_trainer] epoch #19 | EpochTime 19.75 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -55.6699 +Average/AverageReturn -86.6088 +Average/Iteration 19 +Average/MaxReturn -58.9913 +Average/MinReturn -102.368 +Average/NumEpisodes 8 +Average/StdReturn 13.0453 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974012 +TotalEnvSteps 16000 +__unnamed_task__/AverageDiscountedReturn -55.6699 +__unnamed_task__/AverageReturn -86.6088 +__unnamed_task__/Iteration 19 +__unnamed_task__/MaxReturn -58.9913 +__unnamed_task__/MinReturn -102.368 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 13.0453 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.64138 +policy/KL 0.0188015 +policy/KLBefore 0 +policy/LossAfter -0.14344 +policy/LossBefore -0.0145384 +policy/dLoss 0.128902 +---------------------------------------- ------------- +2025-03-29 19:39:47 | [rl2_trainer] epoch #20 | Optimizing policy... +2025-03-29 19:39:47 | [rl2_trainer] epoch #20 | Fitting baseline... +2025-03-29 19:39:47 | [rl2_trainer] epoch #20 | Computing loss before +2025-03-29 19:39:47 | [rl2_trainer] epoch #20 | Computing KL before +2025-03-29 19:39:47 | [rl2_trainer] epoch #20 | Optimizing +2025-03-29 19:39:49 | [rl2_trainer] epoch #20 | Computing KL after +2025-03-29 19:39:49 | [rl2_trainer] epoch #20 | Computing loss after +2025-03-29 19:39:49 | [rl2_trainer] epoch #20 | Saving snapshot... +2025-03-29 19:39:49 | [rl2_trainer] epoch #20 | Saved +2025-03-29 19:39:49 | [rl2_trainer] epoch #20 | Time 390.64 s +2025-03-29 19:39:49 | [rl2_trainer] epoch #20 | EpochTime 19.33 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -58.5842 +Average/AverageReturn -95.1336 +Average/Iteration 20 +Average/MaxReturn -69.6347 +Average/MinReturn -142.462 +Average/NumEpisodes 8 +Average/StdReturn 27.2653 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96891 +TotalEnvSteps 16800 +__unnamed_task__/AverageDiscountedReturn -58.5842 +__unnamed_task__/AverageReturn -95.1336 +__unnamed_task__/Iteration 20 +__unnamed_task__/MaxReturn -69.6347 +__unnamed_task__/MinReturn -142.462 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.2653 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.63672 +policy/KL 0.0154164 +policy/KLBefore 0 +policy/LossAfter -0.215486 +policy/LossBefore -0.0190209 +policy/dLoss 0.196465 +---------------------------------------- ------------- +2025-03-29 19:40:06 | [rl2_trainer] epoch #21 | Optimizing policy... +2025-03-29 19:40:06 | [rl2_trainer] epoch #21 | Fitting baseline... +2025-03-29 19:40:06 | [rl2_trainer] epoch #21 | Computing loss before +2025-03-29 19:40:06 | [rl2_trainer] epoch #21 | Computing KL before +2025-03-29 19:40:06 | [rl2_trainer] epoch #21 | Optimizing +2025-03-29 19:40:09 | [rl2_trainer] epoch #21 | Computing KL after +2025-03-29 19:40:09 | [rl2_trainer] epoch #21 | Computing loss after +2025-03-29 19:40:09 | [rl2_trainer] epoch #21 | Saving snapshot... +2025-03-29 19:40:09 | [rl2_trainer] epoch #21 | Saved +2025-03-29 19:40:09 | [rl2_trainer] epoch #21 | Time 410.27 s +2025-03-29 19:40:09 | [rl2_trainer] epoch #21 | EpochTime 19.62 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -45.4544 +Average/AverageReturn -77.4376 +Average/Iteration 21 +Average/MaxReturn 5.41246 +Average/MinReturn -170.047 +Average/NumEpisodes 8 +Average/StdReturn 44.6625 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.963742 +TotalEnvSteps 17600 +__unnamed_task__/AverageDiscountedReturn -45.4544 +__unnamed_task__/AverageReturn -77.4376 +__unnamed_task__/Iteration 21 +__unnamed_task__/MaxReturn 5.41246 +__unnamed_task__/MinReturn -170.047 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 44.6625 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.6463 +policy/KL 0.0193189 +policy/KLBefore 0 +policy/LossAfter -0.241262 +policy/LossBefore 0.00429097 +policy/dLoss 0.245553 +---------------------------------------- -------------- +2025-03-29 19:40:25 | [rl2_trainer] epoch #22 | Optimizing policy... +2025-03-29 19:40:25 | [rl2_trainer] epoch #22 | Fitting baseline... +2025-03-29 19:40:25 | [rl2_trainer] epoch #22 | Computing loss before +2025-03-29 19:40:25 | [rl2_trainer] epoch #22 | Computing KL before +2025-03-29 19:40:25 | [rl2_trainer] epoch #22 | Optimizing +2025-03-29 19:40:28 | [rl2_trainer] epoch #22 | Computing KL after +2025-03-29 19:40:28 | [rl2_trainer] epoch #22 | Computing loss after +2025-03-29 19:40:28 | [rl2_trainer] epoch #22 | Saving snapshot... +2025-03-29 19:40:28 | [rl2_trainer] epoch #22 | Saved +2025-03-29 19:40:28 | [rl2_trainer] epoch #22 | Time 429.21 s +2025-03-29 19:40:28 | [rl2_trainer] epoch #22 | EpochTime 18.94 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -61.6234 +Average/AverageReturn -102.853 +Average/Iteration 22 +Average/MaxReturn -58.9426 +Average/MinReturn -159.584 +Average/NumEpisodes 8 +Average/StdReturn 35.2863 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.958373 +TotalEnvSteps 18400 +__unnamed_task__/AverageDiscountedReturn -61.6234 +__unnamed_task__/AverageReturn -102.853 +__unnamed_task__/Iteration 22 +__unnamed_task__/MaxReturn -58.9426 +__unnamed_task__/MinReturn -159.584 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.2863 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.65631 +policy/KL 0.0147216 +policy/KLBefore 0 +policy/LossAfter -0.24069 +policy/LossBefore -0.0132256 +policy/dLoss 0.227464 +---------------------------------------- ------------- +2025-03-29 19:40:44 | [rl2_trainer] epoch #23 | Optimizing policy... +2025-03-29 19:40:44 | [rl2_trainer] epoch #23 | Fitting baseline... +2025-03-29 19:40:44 | [rl2_trainer] epoch #23 | Computing loss before +2025-03-29 19:40:44 | [rl2_trainer] epoch #23 | Computing KL before +2025-03-29 19:40:44 | [rl2_trainer] epoch #23 | Optimizing +2025-03-29 19:40:46 | [rl2_trainer] epoch #23 | Computing KL after +2025-03-29 19:40:46 | [rl2_trainer] epoch #23 | Computing loss after +2025-03-29 19:40:47 | [rl2_trainer] epoch #23 | Saving snapshot... +2025-03-29 19:40:47 | [rl2_trainer] epoch #23 | Saved +2025-03-29 19:40:47 | [rl2_trainer] epoch #23 | Time 447.99 s +2025-03-29 19:40:47 | [rl2_trainer] epoch #23 | EpochTime 18.77 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -59.3234 +Average/AverageReturn -100.955 +Average/Iteration 23 +Average/MaxReturn -63.2911 +Average/MinReturn -157.596 +Average/NumEpisodes 8 +Average/StdReturn 35.0368 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.943237 +TotalEnvSteps 19200 +__unnamed_task__/AverageDiscountedReturn -59.3234 +__unnamed_task__/AverageReturn -100.955 +__unnamed_task__/Iteration 23 +__unnamed_task__/MaxReturn -63.2911 +__unnamed_task__/MinReturn -157.596 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.0368 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.64623 +policy/KL 0.0221148 +policy/KLBefore 0 +policy/LossAfter -0.217954 +policy/LossBefore 0.0471229 +policy/dLoss 0.265077 +---------------------------------------- ------------- +2025-03-29 19:41:03 | [rl2_trainer] epoch #24 | Optimizing policy... +2025-03-29 19:41:03 | [rl2_trainer] epoch #24 | Fitting baseline... +2025-03-29 19:41:03 | [rl2_trainer] epoch #24 | Computing loss before +2025-03-29 19:41:03 | [rl2_trainer] epoch #24 | Computing KL before +2025-03-29 19:41:03 | [rl2_trainer] epoch #24 | Optimizing +2025-03-29 19:41:06 | [rl2_trainer] epoch #24 | Computing KL after +2025-03-29 19:41:06 | [rl2_trainer] epoch #24 | Computing loss after +2025-03-29 19:41:06 | [rl2_trainer] epoch #24 | Saving snapshot... +2025-03-29 19:41:06 | [rl2_trainer] epoch #24 | Saved +2025-03-29 19:41:06 | [rl2_trainer] epoch #24 | Time 467.29 s +2025-03-29 19:41:06 | [rl2_trainer] epoch #24 | EpochTime 19.30 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -53.7484 +Average/AverageReturn -89.5279 +Average/Iteration 24 +Average/MaxReturn -54.6747 +Average/MinReturn -176.032 +Average/NumEpisodes 8 +Average/StdReturn 35.6382 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.958455 +TotalEnvSteps 20000 +__unnamed_task__/AverageDiscountedReturn -53.7484 +__unnamed_task__/AverageReturn -89.5279 +__unnamed_task__/Iteration 24 +__unnamed_task__/MaxReturn -54.6747 +__unnamed_task__/MinReturn -176.032 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.6382 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.636 +policy/KL 0.019137 +policy/KLBefore 0 +policy/LossAfter -0.310419 +policy/LossBefore 0.0140072 +policy/dLoss 0.324427 +---------------------------------------- ------------- +2025-03-29 19:41:22 | [rl2_trainer] epoch #25 | Optimizing policy... +2025-03-29 19:41:22 | [rl2_trainer] epoch #25 | Fitting baseline... +2025-03-29 19:41:22 | [rl2_trainer] epoch #25 | Computing loss before +2025-03-29 19:41:22 | [rl2_trainer] epoch #25 | Computing KL before +2025-03-29 19:41:22 | [rl2_trainer] epoch #25 | Optimizing +2025-03-29 19:41:24 | [rl2_trainer] epoch #25 | Computing KL after +2025-03-29 19:41:24 | [rl2_trainer] epoch #25 | Computing loss after +2025-03-29 19:41:24 | [rl2_trainer] epoch #25 | Saving snapshot... +2025-03-29 19:41:24 | [rl2_trainer] epoch #25 | Saved +2025-03-29 19:41:24 | [rl2_trainer] epoch #25 | Time 485.87 s +2025-03-29 19:41:24 | [rl2_trainer] epoch #25 | EpochTime 18.58 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -57.6229 +Average/AverageReturn -91.0038 +Average/Iteration 25 +Average/MaxReturn -68.9141 +Average/MinReturn -160.162 +Average/NumEpisodes 8 +Average/StdReturn 28.5679 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.948313 +TotalEnvSteps 20800 +__unnamed_task__/AverageDiscountedReturn -57.6229 +__unnamed_task__/AverageReturn -91.0038 +__unnamed_task__/Iteration 25 +__unnamed_task__/MaxReturn -68.9141 +__unnamed_task__/MinReturn -160.162 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.5679 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.62364 +policy/KL 0.0270757 +policy/KLBefore 0 +policy/LossAfter -0.291011 +policy/LossBefore -0.0131072 +policy/dLoss 0.277903 +---------------------------------------- ------------- +2025-03-29 19:41:41 | [rl2_trainer] epoch #26 | Optimizing policy... +2025-03-29 19:41:41 | [rl2_trainer] epoch #26 | Fitting baseline... +2025-03-29 19:41:41 | [rl2_trainer] epoch #26 | Computing loss before +2025-03-29 19:41:41 | [rl2_trainer] epoch #26 | Computing KL before +2025-03-29 19:41:41 | [rl2_trainer] epoch #26 | Optimizing +2025-03-29 19:41:43 | [rl2_trainer] epoch #26 | Computing KL after +2025-03-29 19:41:43 | [rl2_trainer] epoch #26 | Computing loss after +2025-03-29 19:41:43 | [rl2_trainer] epoch #26 | Saving snapshot... +2025-03-29 19:41:43 | [rl2_trainer] epoch #26 | Saved +2025-03-29 19:41:43 | [rl2_trainer] epoch #26 | Time 504.85 s +2025-03-29 19:41:43 | [rl2_trainer] epoch #26 | EpochTime 18.98 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -61.2059 +Average/AverageReturn -99.7531 +Average/Iteration 26 +Average/MaxReturn -80.1272 +Average/MinReturn -117.554 +Average/NumEpisodes 8 +Average/StdReturn 14.4807 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974427 +TotalEnvSteps 21600 +__unnamed_task__/AverageDiscountedReturn -61.2059 +__unnamed_task__/AverageReturn -99.7531 +__unnamed_task__/Iteration 26 +__unnamed_task__/MaxReturn -80.1272 +__unnamed_task__/MinReturn -117.554 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.4807 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.61752 +policy/KL 0.0166241 +policy/KLBefore 0 +policy/LossAfter -0.162351 +policy/LossBefore -0.00758066 +policy/dLoss 0.154771 +---------------------------------------- -------------- +2025-03-29 19:41:59 | [rl2_trainer] epoch #27 | Optimizing policy... +2025-03-29 19:41:59 | [rl2_trainer] epoch #27 | Fitting baseline... +2025-03-29 19:41:59 | [rl2_trainer] epoch #27 | Computing loss before +2025-03-29 19:42:00 | [rl2_trainer] epoch #27 | Computing KL before +2025-03-29 19:42:00 | [rl2_trainer] epoch #27 | Optimizing +2025-03-29 19:42:02 | [rl2_trainer] epoch #27 | Computing KL after +2025-03-29 19:42:02 | [rl2_trainer] epoch #27 | Computing loss after +2025-03-29 19:42:02 | [rl2_trainer] epoch #27 | Saving snapshot... +2025-03-29 19:42:02 | [rl2_trainer] epoch #27 | Saved +2025-03-29 19:42:02 | [rl2_trainer] epoch #27 | Time 523.35 s +2025-03-29 19:42:02 | [rl2_trainer] epoch #27 | EpochTime 18.50 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -65.2714 +Average/AverageReturn -106.688 +Average/Iteration 27 +Average/MaxReturn -67.3707 +Average/MinReturn -172.474 +Average/NumEpisodes 8 +Average/StdReturn 36.9911 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973287 +TotalEnvSteps 22400 +__unnamed_task__/AverageDiscountedReturn -65.2714 +__unnamed_task__/AverageReturn -106.688 +__unnamed_task__/Iteration 27 +__unnamed_task__/MaxReturn -67.3707 +__unnamed_task__/MinReturn -172.474 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.9911 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.61296 +policy/KL 0.0201574 +policy/KLBefore 0 +policy/LossAfter -0.314899 +policy/LossBefore -0.0144259 +policy/dLoss 0.300473 +---------------------------------------- ------------- +2025-03-29 19:42:18 | [rl2_trainer] epoch #28 | Optimizing policy... +2025-03-29 19:42:18 | [rl2_trainer] epoch #28 | Fitting baseline... +2025-03-29 19:42:19 | [rl2_trainer] epoch #28 | Computing loss before +2025-03-29 19:42:19 | [rl2_trainer] epoch #28 | Computing KL before +2025-03-29 19:42:19 | [rl2_trainer] epoch #28 | Optimizing +2025-03-29 19:42:21 | [rl2_trainer] epoch #28 | Computing KL after +2025-03-29 19:42:21 | [rl2_trainer] epoch #28 | Computing loss after +2025-03-29 19:42:21 | [rl2_trainer] epoch #28 | Saving snapshot... +2025-03-29 19:42:21 | [rl2_trainer] epoch #28 | Saved +2025-03-29 19:42:21 | [rl2_trainer] epoch #28 | Time 542.40 s +2025-03-29 19:42:21 | [rl2_trainer] epoch #28 | EpochTime 19.05 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -43.1543 +Average/AverageReturn -72.7595 +Average/Iteration 28 +Average/MaxReturn 68.298 +Average/MinReturn -139.859 +Average/NumEpisodes 8 +Average/StdReturn 57.3942 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.948062 +TotalEnvSteps 23200 +__unnamed_task__/AverageDiscountedReturn -43.1543 +__unnamed_task__/AverageReturn -72.7595 +__unnamed_task__/Iteration 28 +__unnamed_task__/MaxReturn 68.298 +__unnamed_task__/MinReturn -139.859 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 57.3942 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.58984 +policy/KL 0.0204997 +policy/KLBefore 0 +policy/LossAfter -0.411945 +policy/LossBefore 0.0134046 +policy/dLoss 0.42535 +---------------------------------------- ------------- +2025-03-29 19:42:37 | [rl2_trainer] epoch #29 | Optimizing policy... +2025-03-29 19:42:37 | [rl2_trainer] epoch #29 | Fitting baseline... +2025-03-29 19:42:37 | [rl2_trainer] epoch #29 | Computing loss before +2025-03-29 19:42:37 | [rl2_trainer] epoch #29 | Computing KL before +2025-03-29 19:42:38 | [rl2_trainer] epoch #29 | Optimizing +2025-03-29 19:42:40 | [rl2_trainer] epoch #29 | Computing KL after +2025-03-29 19:42:40 | [rl2_trainer] epoch #29 | Computing loss after +2025-03-29 19:42:40 | [rl2_trainer] epoch #29 | Saving snapshot... +2025-03-29 19:42:40 | [rl2_trainer] epoch #29 | Saved +2025-03-29 19:42:40 | [rl2_trainer] epoch #29 | Time 561.35 s +2025-03-29 19:42:40 | [rl2_trainer] epoch #29 | EpochTime 18.94 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -55.3741 +Average/AverageReturn -87.0127 +Average/Iteration 29 +Average/MaxReturn -63.3479 +Average/MinReturn -116.043 +Average/NumEpisodes 8 +Average/StdReturn 17.3273 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.986536 +TotalEnvSteps 24000 +__unnamed_task__/AverageDiscountedReturn -55.3741 +__unnamed_task__/AverageReturn -87.0127 +__unnamed_task__/Iteration 29 +__unnamed_task__/MaxReturn -63.3479 +__unnamed_task__/MinReturn -116.043 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.3273 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56848 +policy/KL 0.0176656 +policy/KLBefore 0 +policy/LossAfter -0.0660458 +policy/LossBefore 0.00403969 +policy/dLoss 0.0700855 +---------------------------------------- -------------- +2025-03-29 19:42:57 | [rl2_trainer] epoch #30 | Optimizing policy... +2025-03-29 19:42:57 | [rl2_trainer] epoch #30 | Fitting baseline... +2025-03-29 19:42:57 | [rl2_trainer] epoch #30 | Computing loss before +2025-03-29 19:42:57 | [rl2_trainer] epoch #30 | Computing KL before +2025-03-29 19:42:57 | [rl2_trainer] epoch #30 | Optimizing +2025-03-29 19:42:59 | [rl2_trainer] epoch #30 | Computing KL after +2025-03-29 19:42:59 | [rl2_trainer] epoch #30 | Computing loss after +2025-03-29 19:42:59 | [rl2_trainer] epoch #30 | Saving snapshot... +2025-03-29 19:42:59 | [rl2_trainer] epoch #30 | Saved +2025-03-29 19:42:59 | [rl2_trainer] epoch #30 | Time 580.84 s +2025-03-29 19:42:59 | [rl2_trainer] epoch #30 | EpochTime 19.49 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.723 +Average/AverageReturn -98.5071 +Average/Iteration 30 +Average/MaxReturn -62.7741 +Average/MinReturn -174.13 +Average/NumEpisodes 8 +Average/StdReturn 33.4752 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973496 +TotalEnvSteps 24800 +__unnamed_task__/AverageDiscountedReturn -60.723 +__unnamed_task__/AverageReturn -98.5071 +__unnamed_task__/Iteration 30 +__unnamed_task__/MaxReturn -62.7741 +__unnamed_task__/MinReturn -174.13 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.4752 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55856 +policy/KL 0.0202454 +policy/KLBefore 0 +policy/LossAfter -0.205076 +policy/LossBefore -0.00217897 +policy/dLoss 0.202897 +---------------------------------------- -------------- +2025-03-29 19:43:16 | [rl2_trainer] epoch #31 | Optimizing policy... +2025-03-29 19:43:16 | [rl2_trainer] epoch #31 | Fitting baseline... +2025-03-29 19:43:16 | [rl2_trainer] epoch #31 | Computing loss before +2025-03-29 19:43:16 | [rl2_trainer] epoch #31 | Computing KL before +2025-03-29 19:43:16 | [rl2_trainer] epoch #31 | Optimizing +2025-03-29 19:43:19 | [rl2_trainer] epoch #31 | Computing KL after +2025-03-29 19:43:19 | [rl2_trainer] epoch #31 | Computing loss after +2025-03-29 19:43:19 | [rl2_trainer] epoch #31 | Saving snapshot... +2025-03-29 19:43:19 | [rl2_trainer] epoch #31 | Saved +2025-03-29 19:43:19 | [rl2_trainer] epoch #31 | Time 600.27 s +2025-03-29 19:43:19 | [rl2_trainer] epoch #31 | EpochTime 19.42 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -64.3728 +Average/AverageReturn -109.264 +Average/Iteration 31 +Average/MaxReturn -63.9475 +Average/MinReturn -167.522 +Average/NumEpisodes 8 +Average/StdReturn 27.5982 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978952 +TotalEnvSteps 25600 +__unnamed_task__/AverageDiscountedReturn -64.3728 +__unnamed_task__/AverageReturn -109.264 +__unnamed_task__/Iteration 31 +__unnamed_task__/MaxReturn -63.9475 +__unnamed_task__/MinReturn -167.522 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.5982 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54961 +policy/KL 0.0232773 +policy/KLBefore 0 +policy/LossAfter -0.185294 +policy/LossBefore -0.00250505 +policy/dLoss 0.182789 +---------------------------------------- -------------- +2025-03-29 19:43:36 | [rl2_trainer] epoch #32 | Optimizing policy... +2025-03-29 19:43:36 | [rl2_trainer] epoch #32 | Fitting baseline... +2025-03-29 19:43:36 | [rl2_trainer] epoch #32 | Computing loss before +2025-03-29 19:43:36 | [rl2_trainer] epoch #32 | Computing KL before +2025-03-29 19:43:36 | [rl2_trainer] epoch #32 | Optimizing +2025-03-29 19:43:38 | [rl2_trainer] epoch #32 | Computing KL after +2025-03-29 19:43:38 | [rl2_trainer] epoch #32 | Computing loss after +2025-03-29 19:43:38 | [rl2_trainer] epoch #32 | Saving snapshot... +2025-03-29 19:43:38 | [rl2_trainer] epoch #32 | Saved +2025-03-29 19:43:38 | [rl2_trainer] epoch #32 | Time 619.95 s +2025-03-29 19:43:38 | [rl2_trainer] epoch #32 | EpochTime 19.68 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -54.2982 +Average/AverageReturn -85.7036 +Average/Iteration 32 +Average/MaxReturn -62.6476 +Average/MinReturn -114.798 +Average/NumEpisodes 8 +Average/StdReturn 17.5923 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972429 +TotalEnvSteps 26400 +__unnamed_task__/AverageDiscountedReturn -54.2982 +__unnamed_task__/AverageReturn -85.7036 +__unnamed_task__/Iteration 32 +__unnamed_task__/MaxReturn -62.6476 +__unnamed_task__/MinReturn -114.798 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.5923 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5511 +policy/KL 0.012057 +policy/KLBefore 0 +policy/LossAfter -0.103464 +policy/LossBefore -0.00274544 +policy/dLoss 0.100718 +---------------------------------------- -------------- +2025-03-29 19:43:56 | [rl2_trainer] epoch #33 | Optimizing policy... +2025-03-29 19:43:56 | [rl2_trainer] epoch #33 | Fitting baseline... +2025-03-29 19:43:56 | [rl2_trainer] epoch #33 | Computing loss before +2025-03-29 19:43:56 | [rl2_trainer] epoch #33 | Computing KL before +2025-03-29 19:43:56 | [rl2_trainer] epoch #33 | Optimizing +2025-03-29 19:43:58 | [rl2_trainer] epoch #33 | Computing KL after +2025-03-29 19:43:58 | [rl2_trainer] epoch #33 | Computing loss after +2025-03-29 19:43:58 | [rl2_trainer] epoch #33 | Saving snapshot... +2025-03-29 19:43:58 | [rl2_trainer] epoch #33 | Saved +2025-03-29 19:43:58 | [rl2_trainer] epoch #33 | Time 639.62 s +2025-03-29 19:43:58 | [rl2_trainer] epoch #33 | EpochTime 19.67 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -56.3336 +Average/AverageReturn -87.0186 +Average/Iteration 33 +Average/MaxReturn -57.6255 +Average/MinReturn -113.303 +Average/NumEpisodes 8 +Average/StdReturn 21.3215 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982932 +TotalEnvSteps 27200 +__unnamed_task__/AverageDiscountedReturn -56.3336 +__unnamed_task__/AverageReturn -87.0186 +__unnamed_task__/Iteration 33 +__unnamed_task__/MaxReturn -57.6255 +__unnamed_task__/MinReturn -113.303 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.3215 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55722 +policy/KL 0.0208395 +policy/KLBefore 0 +policy/LossAfter -0.128385 +policy/LossBefore -0.00119503 +policy/dLoss 0.12719 +---------------------------------------- -------------- +2025-03-29 19:44:15 | [rl2_trainer] epoch #34 | Optimizing policy... +2025-03-29 19:44:15 | [rl2_trainer] epoch #34 | Fitting baseline... +2025-03-29 19:44:15 | [rl2_trainer] epoch #34 | Computing loss before +2025-03-29 19:44:15 | [rl2_trainer] epoch #34 | Computing KL before +2025-03-29 19:44:15 | [rl2_trainer] epoch #34 | Optimizing +2025-03-29 19:44:18 | [rl2_trainer] epoch #34 | Computing KL after +2025-03-29 19:44:18 | [rl2_trainer] epoch #34 | Computing loss after +2025-03-29 19:44:18 | [rl2_trainer] epoch #34 | Saving snapshot... +2025-03-29 19:44:18 | [rl2_trainer] epoch #34 | Saved +2025-03-29 19:44:18 | [rl2_trainer] epoch #34 | Time 659.25 s +2025-03-29 19:44:18 | [rl2_trainer] epoch #34 | EpochTime 19.63 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -54.7871 +Average/AverageReturn -84.7508 +Average/Iteration 34 +Average/MaxReturn -61.1163 +Average/MinReturn -115.81 +Average/NumEpisodes 8 +Average/StdReturn 22.9378 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983356 +TotalEnvSteps 28000 +__unnamed_task__/AverageDiscountedReturn -54.7871 +__unnamed_task__/AverageReturn -84.7508 +__unnamed_task__/Iteration 34 +__unnamed_task__/MaxReturn -61.1163 +__unnamed_task__/MinReturn -115.81 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.9378 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56159 +policy/KL 0.012267 +policy/KLBefore 0 +policy/LossAfter -0.148426 +policy/LossBefore -0.0261474 +policy/dLoss 0.122278 +---------------------------------------- ------------- +2025-03-29 19:44:35 | [rl2_trainer] epoch #35 | Optimizing policy... +2025-03-29 19:44:35 | [rl2_trainer] epoch #35 | Fitting baseline... +2025-03-29 19:44:35 | [rl2_trainer] epoch #35 | Computing loss before +2025-03-29 19:44:35 | [rl2_trainer] epoch #35 | Computing KL before +2025-03-29 19:44:35 | [rl2_trainer] epoch #35 | Optimizing +2025-03-29 19:44:37 | [rl2_trainer] epoch #35 | Computing KL after +2025-03-29 19:44:37 | [rl2_trainer] epoch #35 | Computing loss after +2025-03-29 19:44:37 | [rl2_trainer] epoch #35 | Saving snapshot... +2025-03-29 19:44:37 | [rl2_trainer] epoch #35 | Saved +2025-03-29 19:44:37 | [rl2_trainer] epoch #35 | Time 678.52 s +2025-03-29 19:44:37 | [rl2_trainer] epoch #35 | EpochTime 19.27 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -56.6333 +Average/AverageReturn -86.6283 +Average/Iteration 35 +Average/MaxReturn -66.594 +Average/MinReturn -122.846 +Average/NumEpisodes 8 +Average/StdReturn 17.0408 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.981906 +TotalEnvSteps 28800 +__unnamed_task__/AverageDiscountedReturn -56.6333 +__unnamed_task__/AverageReturn -86.6283 +__unnamed_task__/Iteration 35 +__unnamed_task__/MaxReturn -66.594 +__unnamed_task__/MinReturn -122.846 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.0408 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55525 +policy/KL 0.0165111 +policy/KLBefore 0 +policy/LossAfter -0.115341 +policy/LossBefore 0.00621348 +policy/dLoss 0.121554 +---------------------------------------- -------------- +2025-03-29 19:44:54 | [rl2_trainer] epoch #36 | Optimizing policy... +2025-03-29 19:44:54 | [rl2_trainer] epoch #36 | Fitting baseline... +2025-03-29 19:44:54 | [rl2_trainer] epoch #36 | Computing loss before +2025-03-29 19:44:54 | [rl2_trainer] epoch #36 | Computing KL before +2025-03-29 19:44:54 | [rl2_trainer] epoch #36 | Optimizing +2025-03-29 19:44:56 | [rl2_trainer] epoch #36 | Computing KL after +2025-03-29 19:44:56 | [rl2_trainer] epoch #36 | Computing loss after +2025-03-29 19:44:56 | [rl2_trainer] epoch #36 | Saving snapshot... +2025-03-29 19:44:56 | [rl2_trainer] epoch #36 | Saved +2025-03-29 19:44:56 | [rl2_trainer] epoch #36 | Time 697.72 s +2025-03-29 19:44:56 | [rl2_trainer] epoch #36 | EpochTime 19.19 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -57.4477 +Average/AverageReturn -90.5283 +Average/Iteration 36 +Average/MaxReturn -70.6185 +Average/MinReturn -117.993 +Average/NumEpisodes 8 +Average/StdReturn 18.1087 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972257 +TotalEnvSteps 29600 +__unnamed_task__/AverageDiscountedReturn -57.4477 +__unnamed_task__/AverageReturn -90.5283 +__unnamed_task__/Iteration 36 +__unnamed_task__/MaxReturn -70.6185 +__unnamed_task__/MinReturn -117.993 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 18.1087 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55164 +policy/KL 0.016934 +policy/KLBefore 0 +policy/LossAfter -0.0857325 +policy/LossBefore 0.000478382 +policy/dLoss 0.0862108 +---------------------------------------- --------------- +2025-03-29 19:45:13 | [rl2_trainer] epoch #37 | Optimizing policy... +2025-03-29 19:45:13 | [rl2_trainer] epoch #37 | Fitting baseline... +2025-03-29 19:45:13 | [rl2_trainer] epoch #37 | Computing loss before +2025-03-29 19:45:13 | [rl2_trainer] epoch #37 | Computing KL before +2025-03-29 19:45:13 | [rl2_trainer] epoch #37 | Optimizing +2025-03-29 19:45:16 | [rl2_trainer] epoch #37 | Computing KL after +2025-03-29 19:45:16 | [rl2_trainer] epoch #37 | Computing loss after +2025-03-29 19:45:16 | [rl2_trainer] epoch #37 | Saving snapshot... +2025-03-29 19:45:16 | [rl2_trainer] epoch #37 | Saved +2025-03-29 19:45:16 | [rl2_trainer] epoch #37 | Time 717.12 s +2025-03-29 19:45:16 | [rl2_trainer] epoch #37 | EpochTime 19.40 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -55.7098 +Average/AverageReturn -87.2423 +Average/Iteration 37 +Average/MaxReturn -60.3793 +Average/MinReturn -105.086 +Average/NumEpisodes 8 +Average/StdReturn 17.2097 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977412 +TotalEnvSteps 30400 +__unnamed_task__/AverageDiscountedReturn -55.7098 +__unnamed_task__/AverageReturn -87.2423 +__unnamed_task__/Iteration 37 +__unnamed_task__/MaxReturn -60.3793 +__unnamed_task__/MinReturn -105.086 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.2097 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55241 +policy/KL 0.0150322 +policy/KLBefore 0 +policy/LossAfter -0.12109 +policy/LossBefore 0.00607008 +policy/dLoss 0.12716 +---------------------------------------- -------------- +2025-03-29 19:45:33 | [rl2_trainer] epoch #38 | Optimizing policy... +2025-03-29 19:45:33 | [rl2_trainer] epoch #38 | Fitting baseline... +2025-03-29 19:45:33 | [rl2_trainer] epoch #38 | Computing loss before +2025-03-29 19:45:33 | [rl2_trainer] epoch #38 | Computing KL before +2025-03-29 19:45:33 | [rl2_trainer] epoch #38 | Optimizing +2025-03-29 19:45:35 | [rl2_trainer] epoch #38 | Computing KL after +2025-03-29 19:45:35 | [rl2_trainer] epoch #38 | Computing loss after +2025-03-29 19:45:35 | [rl2_trainer] epoch #38 | Saving snapshot... +2025-03-29 19:45:35 | [rl2_trainer] epoch #38 | Saved +2025-03-29 19:45:35 | [rl2_trainer] epoch #38 | Time 736.84 s +2025-03-29 19:45:35 | [rl2_trainer] epoch #38 | EpochTime 19.71 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -55.4028 +Average/AverageReturn -84.4865 +Average/Iteration 38 +Average/MaxReturn -60.8253 +Average/MinReturn -119.244 +Average/NumEpisodes 8 +Average/StdReturn 20.2534 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978169 +TotalEnvSteps 31200 +__unnamed_task__/AverageDiscountedReturn -55.4028 +__unnamed_task__/AverageReturn -84.4865 +__unnamed_task__/Iteration 38 +__unnamed_task__/MaxReturn -60.8253 +__unnamed_task__/MinReturn -119.244 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.2534 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54765 +policy/KL 0.0112575 +policy/KLBefore 0 +policy/LossAfter -0.117236 +policy/LossBefore -0.00312737 +policy/dLoss 0.114108 +---------------------------------------- -------------- +2025-03-29 19:45:52 | [rl2_trainer] epoch #39 | Optimizing policy... +2025-03-29 19:45:52 | [rl2_trainer] epoch #39 | Fitting baseline... +2025-03-29 19:45:52 | [rl2_trainer] epoch #39 | Computing loss before +2025-03-29 19:45:52 | [rl2_trainer] epoch #39 | Computing KL before +2025-03-29 19:45:52 | [rl2_trainer] epoch #39 | Optimizing +2025-03-29 19:45:55 | [rl2_trainer] epoch #39 | Computing KL after +2025-03-29 19:45:55 | [rl2_trainer] epoch #39 | Computing loss after +2025-03-29 19:45:55 | [rl2_trainer] epoch #39 | Saving snapshot... +2025-03-29 19:45:55 | [rl2_trainer] epoch #39 | Saved +2025-03-29 19:45:55 | [rl2_trainer] epoch #39 | Time 756.13 s +2025-03-29 19:45:55 | [rl2_trainer] epoch #39 | EpochTime 19.29 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -58.5075 +Average/AverageReturn -91.3561 +Average/Iteration 39 +Average/MaxReturn -61.1368 +Average/MinReturn -115.586 +Average/NumEpisodes 8 +Average/StdReturn 17.2262 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982146 +TotalEnvSteps 32000 +__unnamed_task__/AverageDiscountedReturn -58.5075 +__unnamed_task__/AverageReturn -91.3561 +__unnamed_task__/Iteration 39 +__unnamed_task__/MaxReturn -61.1368 +__unnamed_task__/MinReturn -115.586 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.2262 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54899 +policy/KL 0.0180684 +policy/KLBefore 0 +policy/LossAfter -0.081002 +policy/LossBefore -0.00063767 +policy/dLoss 0.0803643 +---------------------------------------- -------------- +2025-03-29 19:46:12 | [rl2_trainer] epoch #40 | Optimizing policy... +2025-03-29 19:46:12 | [rl2_trainer] epoch #40 | Fitting baseline... +2025-03-29 19:46:12 | [rl2_trainer] epoch #40 | Computing loss before +2025-03-29 19:46:12 | [rl2_trainer] epoch #40 | Computing KL before +2025-03-29 19:46:12 | [rl2_trainer] epoch #40 | Optimizing +2025-03-29 19:46:14 | [rl2_trainer] epoch #40 | Computing KL after +2025-03-29 19:46:14 | [rl2_trainer] epoch #40 | Computing loss after +2025-03-29 19:46:14 | [rl2_trainer] epoch #40 | Saving snapshot... +2025-03-29 19:46:14 | [rl2_trainer] epoch #40 | Saved +2025-03-29 19:46:14 | [rl2_trainer] epoch #40 | Time 775.88 s +2025-03-29 19:46:14 | [rl2_trainer] epoch #40 | EpochTime 19.75 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -56.1221 +Average/AverageReturn -92.7082 +Average/Iteration 40 +Average/MaxReturn -44.8631 +Average/MinReturn -127.642 +Average/NumEpisodes 8 +Average/StdReturn 28.6847 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.963113 +TotalEnvSteps 32800 +__unnamed_task__/AverageDiscountedReturn -56.1221 +__unnamed_task__/AverageReturn -92.7082 +__unnamed_task__/Iteration 40 +__unnamed_task__/MaxReturn -44.8631 +__unnamed_task__/MinReturn -127.642 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.6847 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55271 +policy/KL 0.0154547 +policy/KLBefore 0 +policy/LossAfter -0.111894 +policy/LossBefore 0.000636616 +policy/dLoss 0.112531 +---------------------------------------- --------------- +2025-03-29 19:46:32 | [rl2_trainer] epoch #41 | Optimizing policy... +2025-03-29 19:46:32 | [rl2_trainer] epoch #41 | Fitting baseline... +2025-03-29 19:46:32 | [rl2_trainer] epoch #41 | Computing loss before +2025-03-29 19:46:32 | [rl2_trainer] epoch #41 | Computing KL before +2025-03-29 19:46:32 | [rl2_trainer] epoch #41 | Optimizing +2025-03-29 19:46:34 | [rl2_trainer] epoch #41 | Computing KL after +2025-03-29 19:46:34 | [rl2_trainer] epoch #41 | Computing loss after +2025-03-29 19:46:35 | [rl2_trainer] epoch #41 | Saving snapshot... +2025-03-29 19:46:35 | [rl2_trainer] epoch #41 | Saved +2025-03-29 19:46:35 | [rl2_trainer] epoch #41 | Time 795.97 s +2025-03-29 19:46:35 | [rl2_trainer] epoch #41 | EpochTime 20.09 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -55.1017 +Average/AverageReturn -89.3892 +Average/Iteration 41 +Average/MaxReturn -63.2244 +Average/MinReturn -116.181 +Average/NumEpisodes 8 +Average/StdReturn 24.8114 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.990431 +TotalEnvSteps 33600 +__unnamed_task__/AverageDiscountedReturn -55.1017 +__unnamed_task__/AverageReturn -89.3892 +__unnamed_task__/Iteration 41 +__unnamed_task__/MaxReturn -63.2244 +__unnamed_task__/MinReturn -116.181 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.8114 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5629 +policy/KL 0.0187978 +policy/KLBefore 0 +policy/LossAfter -0.084071 +policy/LossBefore 0.00624066 +policy/dLoss 0.0903116 +---------------------------------------- -------------- +2025-03-29 19:46:52 | [rl2_trainer] epoch #42 | Optimizing policy... +2025-03-29 19:46:52 | [rl2_trainer] epoch #42 | Fitting baseline... +2025-03-29 19:46:52 | [rl2_trainer] epoch #42 | Computing loss before +2025-03-29 19:46:52 | [rl2_trainer] epoch #42 | Computing KL before +2025-03-29 19:46:52 | [rl2_trainer] epoch #42 | Optimizing +2025-03-29 19:46:54 | [rl2_trainer] epoch #42 | Computing KL after +2025-03-29 19:46:54 | [rl2_trainer] epoch #42 | Computing loss after +2025-03-29 19:46:54 | [rl2_trainer] epoch #42 | Saving snapshot... +2025-03-29 19:46:54 | [rl2_trainer] epoch #42 | Saved +2025-03-29 19:46:54 | [rl2_trainer] epoch #42 | Time 815.55 s +2025-03-29 19:46:54 | [rl2_trainer] epoch #42 | EpochTime 19.57 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -53.3383 +Average/AverageReturn -84.8954 +Average/Iteration 42 +Average/MaxReturn -60.1845 +Average/MinReturn -118.835 +Average/NumEpisodes 8 +Average/StdReturn 20.0612 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.963122 +TotalEnvSteps 34400 +__unnamed_task__/AverageDiscountedReturn -53.3383 +__unnamed_task__/AverageReturn -84.8954 +__unnamed_task__/Iteration 42 +__unnamed_task__/MaxReturn -60.1845 +__unnamed_task__/MinReturn -118.835 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.0612 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.57382 +policy/KL 0.00992956 +policy/KLBefore 0 +policy/LossAfter -0.127139 +policy/LossBefore -0.00784431 +policy/dLoss 0.119295 +---------------------------------------- -------------- +2025-03-29 19:47:11 | [rl2_trainer] epoch #43 | Optimizing policy... +2025-03-29 19:47:11 | [rl2_trainer] epoch #43 | Fitting baseline... +2025-03-29 19:47:11 | [rl2_trainer] epoch #43 | Computing loss before +2025-03-29 19:47:11 | [rl2_trainer] epoch #43 | Computing KL before +2025-03-29 19:47:11 | [rl2_trainer] epoch #43 | Optimizing +2025-03-29 19:47:13 | [rl2_trainer] epoch #43 | Computing KL after +2025-03-29 19:47:13 | [rl2_trainer] epoch #43 | Computing loss after +2025-03-29 19:47:13 | [rl2_trainer] epoch #43 | Saving snapshot... +2025-03-29 19:47:13 | [rl2_trainer] epoch #43 | Saved +2025-03-29 19:47:13 | [rl2_trainer] epoch #43 | Time 834.86 s +2025-03-29 19:47:13 | [rl2_trainer] epoch #43 | EpochTime 19.31 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -54.0977 +Average/AverageReturn -84.3381 +Average/Iteration 43 +Average/MaxReturn -66.6869 +Average/MinReturn -121.406 +Average/NumEpisodes 8 +Average/StdReturn 16.6238 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.9828 +TotalEnvSteps 35200 +__unnamed_task__/AverageDiscountedReturn -54.0977 +__unnamed_task__/AverageReturn -84.3381 +__unnamed_task__/Iteration 43 +__unnamed_task__/MaxReturn -66.6869 +__unnamed_task__/MinReturn -121.406 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.6238 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.58931 +policy/KL 0.0246153 +policy/KLBefore 0 +policy/LossAfter -0.096917 +policy/LossBefore 0.00995246 +policy/dLoss 0.106869 +---------------------------------------- -------------- +2025-03-29 19:47:31 | [rl2_trainer] epoch #44 | Optimizing policy... +2025-03-29 19:47:31 | [rl2_trainer] epoch #44 | Fitting baseline... +2025-03-29 19:47:31 | [rl2_trainer] epoch #44 | Computing loss before +2025-03-29 19:47:31 | [rl2_trainer] epoch #44 | Computing KL before +2025-03-29 19:47:31 | [rl2_trainer] epoch #44 | Optimizing +2025-03-29 19:47:33 | [rl2_trainer] epoch #44 | Computing KL after +2025-03-29 19:47:33 | [rl2_trainer] epoch #44 | Computing loss after +2025-03-29 19:47:34 | [rl2_trainer] epoch #44 | Saving snapshot... +2025-03-29 19:47:34 | [rl2_trainer] epoch #44 | Saved +2025-03-29 19:47:34 | [rl2_trainer] epoch #44 | Time 855.01 s +2025-03-29 19:47:34 | [rl2_trainer] epoch #44 | EpochTime 20.15 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -52.3273 +Average/AverageReturn -83.48 +Average/Iteration 44 +Average/MaxReturn -58.5295 +Average/MinReturn -116.712 +Average/NumEpisodes 8 +Average/StdReturn 20.9727 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983017 +TotalEnvSteps 36000 +__unnamed_task__/AverageDiscountedReturn -52.3273 +__unnamed_task__/AverageReturn -83.48 +__unnamed_task__/Iteration 44 +__unnamed_task__/MaxReturn -58.5295 +__unnamed_task__/MinReturn -116.712 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.9727 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.60636 +policy/KL 0.0133536 +policy/KLBefore 0 +policy/LossAfter -0.116813 +policy/LossBefore 0.00755486 +policy/dLoss 0.124368 +---------------------------------------- -------------- +2025-03-29 19:47:51 | [rl2_trainer] epoch #45 | Optimizing policy... +2025-03-29 19:47:51 | [rl2_trainer] epoch #45 | Fitting baseline... +2025-03-29 19:47:51 | [rl2_trainer] epoch #45 | Computing loss before +2025-03-29 19:47:51 | [rl2_trainer] epoch #45 | Computing KL before +2025-03-29 19:47:51 | [rl2_trainer] epoch #45 | Optimizing +2025-03-29 19:47:53 | [rl2_trainer] epoch #45 | Computing KL after +2025-03-29 19:47:53 | [rl2_trainer] epoch #45 | Computing loss after +2025-03-29 19:47:53 | [rl2_trainer] epoch #45 | Saving snapshot... +2025-03-29 19:47:53 | [rl2_trainer] epoch #45 | Saved +2025-03-29 19:47:53 | [rl2_trainer] epoch #45 | Time 874.68 s +2025-03-29 19:47:53 | [rl2_trainer] epoch #45 | EpochTime 19.66 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -63.1123 +Average/AverageReturn -101.258 +Average/Iteration 45 +Average/MaxReturn -62.9264 +Average/MinReturn -190.997 +Average/NumEpisodes 8 +Average/StdReturn 39.4127 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.961509 +TotalEnvSteps 36800 +__unnamed_task__/AverageDiscountedReturn -63.1123 +__unnamed_task__/AverageReturn -101.258 +__unnamed_task__/Iteration 45 +__unnamed_task__/MaxReturn -62.9264 +__unnamed_task__/MinReturn -190.997 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 39.4127 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.59082 +policy/KL 0.0198926 +policy/KLBefore 0 +policy/LossAfter -0.273886 +policy/LossBefore 0.0123523 +policy/dLoss 0.286238 +---------------------------------------- ------------- +2025-03-29 19:48:10 | [rl2_trainer] epoch #46 | Optimizing policy... +2025-03-29 19:48:10 | [rl2_trainer] epoch #46 | Fitting baseline... +2025-03-29 19:48:10 | [rl2_trainer] epoch #46 | Computing loss before +2025-03-29 19:48:10 | [rl2_trainer] epoch #46 | Computing KL before +2025-03-29 19:48:10 | [rl2_trainer] epoch #46 | Optimizing +2025-03-29 19:48:13 | [rl2_trainer] epoch #46 | Computing KL after +2025-03-29 19:48:13 | [rl2_trainer] epoch #46 | Computing loss after +2025-03-29 19:48:13 | [rl2_trainer] epoch #46 | Saving snapshot... +2025-03-29 19:48:13 | [rl2_trainer] epoch #46 | Saved +2025-03-29 19:48:13 | [rl2_trainer] epoch #46 | Time 894.20 s +2025-03-29 19:48:13 | [rl2_trainer] epoch #46 | EpochTime 19.52 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -52.1685 +Average/AverageReturn -81.32 +Average/Iteration 46 +Average/MaxReturn -60.2734 +Average/MinReturn -102.265 +Average/NumEpisodes 8 +Average/StdReturn 16.4966 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974306 +TotalEnvSteps 37600 +__unnamed_task__/AverageDiscountedReturn -52.1685 +__unnamed_task__/AverageReturn -81.32 +__unnamed_task__/Iteration 46 +__unnamed_task__/MaxReturn -60.2734 +__unnamed_task__/MinReturn -102.265 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.4966 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.57085 +policy/KL 0.0162656 +policy/KLBefore 0 +policy/LossAfter -0.131028 +policy/LossBefore 0.000462971 +policy/dLoss 0.131491 +---------------------------------------- --------------- +2025-03-29 19:48:30 | [rl2_trainer] epoch #47 | Optimizing policy... +2025-03-29 19:48:30 | [rl2_trainer] epoch #47 | Fitting baseline... +2025-03-29 19:48:30 | [rl2_trainer] epoch #47 | Computing loss before +2025-03-29 19:48:30 | [rl2_trainer] epoch #47 | Computing KL before +2025-03-29 19:48:30 | [rl2_trainer] epoch #47 | Optimizing +2025-03-29 19:48:32 | [rl2_trainer] epoch #47 | Computing KL after +2025-03-29 19:48:32 | [rl2_trainer] epoch #47 | Computing loss after +2025-03-29 19:48:32 | [rl2_trainer] epoch #47 | Saving snapshot... +2025-03-29 19:48:32 | [rl2_trainer] epoch #47 | Saved +2025-03-29 19:48:32 | [rl2_trainer] epoch #47 | Time 913.64 s +2025-03-29 19:48:32 | [rl2_trainer] epoch #47 | EpochTime 19.44 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -53.385 +Average/AverageReturn -82.0295 +Average/Iteration 47 +Average/MaxReturn -62.5914 +Average/MinReturn -113.374 +Average/NumEpisodes 8 +Average/StdReturn 15.4112 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977 +TotalEnvSteps 38400 +__unnamed_task__/AverageDiscountedReturn -53.385 +__unnamed_task__/AverageReturn -82.0295 +__unnamed_task__/Iteration 47 +__unnamed_task__/MaxReturn -62.5914 +__unnamed_task__/MinReturn -113.374 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.4112 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56625 +policy/KL 0.0215115 +policy/KLBefore 0 +policy/LossAfter -0.0874554 +policy/LossBefore 0.00601854 +policy/dLoss 0.093474 +---------------------------------------- -------------- +2025-03-29 19:48:49 | [rl2_trainer] epoch #48 | Optimizing policy... +2025-03-29 19:48:49 | [rl2_trainer] epoch #48 | Fitting baseline... +2025-03-29 19:48:49 | [rl2_trainer] epoch #48 | Computing loss before +2025-03-29 19:48:49 | [rl2_trainer] epoch #48 | Computing KL before +2025-03-29 19:48:49 | [rl2_trainer] epoch #48 | Optimizing +2025-03-29 19:48:52 | [rl2_trainer] epoch #48 | Computing KL after +2025-03-29 19:48:52 | [rl2_trainer] epoch #48 | Computing loss after +2025-03-29 19:48:52 | [rl2_trainer] epoch #48 | Saving snapshot... +2025-03-29 19:48:52 | [rl2_trainer] epoch #48 | Saved +2025-03-29 19:48:52 | [rl2_trainer] epoch #48 | Time 933.15 s +2025-03-29 19:48:52 | [rl2_trainer] epoch #48 | EpochTime 19.51 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -59.9548 +Average/AverageReturn -98.9684 +Average/Iteration 48 +Average/MaxReturn -60.9813 +Average/MinReturn -188.694 +Average/NumEpisodes 8 +Average/StdReturn 38.1463 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957706 +TotalEnvSteps 39200 +__unnamed_task__/AverageDiscountedReturn -59.9548 +__unnamed_task__/AverageReturn -98.9684 +__unnamed_task__/Iteration 48 +__unnamed_task__/MaxReturn -60.9813 +__unnamed_task__/MinReturn -188.694 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 38.1463 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5477 +policy/KL 0.0181803 +policy/KLBefore 0 +policy/LossAfter -0.242344 +policy/LossBefore 0.010633 +policy/dLoss 0.252977 +---------------------------------------- ------------- +2025-03-29 19:49:10 | [rl2_trainer] epoch #49 | Optimizing policy... +2025-03-29 19:49:10 | [rl2_trainer] epoch #49 | Fitting baseline... +2025-03-29 19:49:10 | [rl2_trainer] epoch #49 | Computing loss before +2025-03-29 19:49:10 | [rl2_trainer] epoch #49 | Computing KL before +2025-03-29 19:49:10 | [rl2_trainer] epoch #49 | Optimizing +2025-03-29 19:49:12 | [rl2_trainer] epoch #49 | Computing KL after +2025-03-29 19:49:12 | [rl2_trainer] epoch #49 | Computing loss after +2025-03-29 19:49:12 | [rl2_trainer] epoch #49 | Saving snapshot... +2025-03-29 19:49:12 | [rl2_trainer] epoch #49 | Saved +2025-03-29 19:49:12 | [rl2_trainer] epoch #49 | Time 953.89 s +2025-03-29 19:49:12 | [rl2_trainer] epoch #49 | EpochTime 20.74 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -54.4353 +Average/AverageReturn -86.6988 +Average/Iteration 49 +Average/MaxReturn -63.4654 +Average/MinReturn -144.11 +Average/NumEpisodes 8 +Average/StdReturn 24.4584 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.95558 +TotalEnvSteps 40000 +__unnamed_task__/AverageDiscountedReturn -54.4353 +__unnamed_task__/AverageReturn -86.6988 +__unnamed_task__/Iteration 49 +__unnamed_task__/MaxReturn -63.4654 +__unnamed_task__/MinReturn -144.11 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.4584 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.53491 +policy/KL 0.0154219 +policy/KLBefore 0 +policy/LossAfter -0.152656 +policy/LossBefore 0.00215995 +policy/dLoss 0.154816 +---------------------------------------- -------------- +2025-03-29 19:49:29 | [rl2_trainer] epoch #50 | Optimizing policy... +2025-03-29 19:49:29 | [rl2_trainer] epoch #50 | Fitting baseline... +2025-03-29 19:49:29 | [rl2_trainer] epoch #50 | Computing loss before +2025-03-29 19:49:30 | [rl2_trainer] epoch #50 | Computing KL before +2025-03-29 19:49:30 | [rl2_trainer] epoch #50 | Optimizing +2025-03-29 19:49:32 | [rl2_trainer] epoch #50 | Computing KL after +2025-03-29 19:49:32 | [rl2_trainer] epoch #50 | Computing loss after +2025-03-29 19:49:32 | [rl2_trainer] epoch #50 | Saving snapshot... +2025-03-29 19:49:32 | [rl2_trainer] epoch #50 | Saved +2025-03-29 19:49:32 | [rl2_trainer] epoch #50 | Time 973.41 s +2025-03-29 19:49:32 | [rl2_trainer] epoch #50 | EpochTime 19.51 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -62.8822 +Average/AverageReturn -105.184 +Average/Iteration 50 +Average/MaxReturn -74.7652 +Average/MinReturn -159.211 +Average/NumEpisodes 8 +Average/StdReturn 26.3968 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.952334 +TotalEnvSteps 40800 +__unnamed_task__/AverageDiscountedReturn -62.8822 +__unnamed_task__/AverageReturn -105.184 +__unnamed_task__/Iteration 50 +__unnamed_task__/MaxReturn -74.7652 +__unnamed_task__/MinReturn -159.211 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.3968 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.52442 +policy/KL 0.02163 +policy/KLBefore 0 +policy/LossAfter -0.274927 +policy/LossBefore -0.0207847 +policy/dLoss 0.254142 +---------------------------------------- ------------- +2025-03-29 19:49:48 | [rl2_trainer] epoch #51 | Optimizing policy... +2025-03-29 19:49:48 | [rl2_trainer] epoch #51 | Fitting baseline... +2025-03-29 19:49:48 | [rl2_trainer] epoch #51 | Computing loss before +2025-03-29 19:49:49 | [rl2_trainer] epoch #51 | Computing KL before +2025-03-29 19:49:49 | [rl2_trainer] epoch #51 | Optimizing +2025-03-29 19:49:51 | [rl2_trainer] epoch #51 | Computing KL after +2025-03-29 19:49:51 | [rl2_trainer] epoch #51 | Computing loss after +2025-03-29 19:49:51 | [rl2_trainer] epoch #51 | Saving snapshot... +2025-03-29 19:49:51 | [rl2_trainer] epoch #51 | Saved +2025-03-29 19:49:51 | [rl2_trainer] epoch #51 | Time 992.38 s +2025-03-29 19:49:51 | [rl2_trainer] epoch #51 | EpochTime 18.97 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -58.5567 +Average/AverageReturn -95.3651 +Average/Iteration 51 +Average/MaxReturn -69.5745 +Average/MinReturn -115.048 +Average/NumEpisodes 8 +Average/StdReturn 14.0023 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968877 +TotalEnvSteps 41600 +__unnamed_task__/AverageDiscountedReturn -58.5567 +__unnamed_task__/AverageReturn -95.3651 +__unnamed_task__/Iteration 51 +__unnamed_task__/MaxReturn -69.5745 +__unnamed_task__/MinReturn -115.048 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.0023 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.52113 +policy/KL 0.0182325 +policy/KLBefore 0 +policy/LossAfter -0.13369 +policy/LossBefore -0.0034519 +policy/dLoss 0.130238 +---------------------------------------- ------------- +2025-03-29 19:50:09 | [rl2_trainer] epoch #52 | Optimizing policy... +2025-03-29 19:50:09 | [rl2_trainer] epoch #52 | Fitting baseline... +2025-03-29 19:50:09 | [rl2_trainer] epoch #52 | Computing loss before +2025-03-29 19:50:09 | [rl2_trainer] epoch #52 | Computing KL before +2025-03-29 19:50:09 | [rl2_trainer] epoch #52 | Optimizing +2025-03-29 19:50:11 | [rl2_trainer] epoch #52 | Computing KL after +2025-03-29 19:50:12 | [rl2_trainer] epoch #52 | Computing loss after +2025-03-29 19:50:12 | [rl2_trainer] epoch #52 | Saving snapshot... +2025-03-29 19:50:12 | [rl2_trainer] epoch #52 | Saved +2025-03-29 19:50:12 | [rl2_trainer] epoch #52 | Time 1013.12 s +2025-03-29 19:50:12 | [rl2_trainer] epoch #52 | EpochTime 20.73 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -50.9564 +Average/AverageReturn -77.6225 +Average/Iteration 52 +Average/MaxReturn -61.104 +Average/MinReturn -110.197 +Average/NumEpisodes 8 +Average/StdReturn 15.7614 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979479 +TotalEnvSteps 42400 +__unnamed_task__/AverageDiscountedReturn -50.9564 +__unnamed_task__/AverageReturn -77.6225 +__unnamed_task__/Iteration 52 +__unnamed_task__/MaxReturn -61.104 +__unnamed_task__/MinReturn -110.197 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.7614 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.53079 +policy/KL 0.0150413 +policy/KLBefore 0 +policy/LossAfter -0.114426 +policy/LossBefore -0.00391267 +policy/dLoss 0.110513 +---------------------------------------- -------------- +2025-03-29 19:50:31 | [rl2_trainer] epoch #53 | Optimizing policy... +2025-03-29 19:50:31 | [rl2_trainer] epoch #53 | Fitting baseline... +2025-03-29 19:50:31 | [rl2_trainer] epoch #53 | Computing loss before +2025-03-29 19:50:31 | [rl2_trainer] epoch #53 | Computing KL before +2025-03-29 19:50:31 | [rl2_trainer] epoch #53 | Optimizing +2025-03-29 19:50:34 | [rl2_trainer] epoch #53 | Computing KL after +2025-03-29 19:50:34 | [rl2_trainer] epoch #53 | Computing loss after +2025-03-29 19:50:34 | [rl2_trainer] epoch #53 | Saving snapshot... +2025-03-29 19:50:34 | [rl2_trainer] epoch #53 | Saved +2025-03-29 19:50:34 | [rl2_trainer] epoch #53 | Time 1035.79 s +2025-03-29 19:50:34 | [rl2_trainer] epoch #53 | EpochTime 22.67 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -63.8785 +Average/AverageReturn -107.593 +Average/Iteration 53 +Average/MaxReturn -84.8312 +Average/MinReturn -122.316 +Average/NumEpisodes 8 +Average/StdReturn 10.2218 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983442 +TotalEnvSteps 43200 +__unnamed_task__/AverageDiscountedReturn -63.8785 +__unnamed_task__/AverageReturn -107.593 +__unnamed_task__/Iteration 53 +__unnamed_task__/MaxReturn -84.8312 +__unnamed_task__/MinReturn -122.316 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 10.2218 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5477 +policy/KL 0.0192354 +policy/KLBefore 0 +policy/LossAfter -0.16108 +policy/LossBefore 0.00196847 +policy/dLoss 0.163048 +---------------------------------------- -------------- +2025-03-29 19:50:54 | [rl2_trainer] epoch #54 | Optimizing policy... +2025-03-29 19:50:54 | [rl2_trainer] epoch #54 | Fitting baseline... +2025-03-29 19:50:54 | [rl2_trainer] epoch #54 | Computing loss before +2025-03-29 19:50:54 | [rl2_trainer] epoch #54 | Computing KL before +2025-03-29 19:50:54 | [rl2_trainer] epoch #54 | Optimizing +2025-03-29 19:50:56 | [rl2_trainer] epoch #54 | Computing KL after +2025-03-29 19:50:57 | [rl2_trainer] epoch #54 | Computing loss after +2025-03-29 19:50:57 | [rl2_trainer] epoch #54 | Saving snapshot... +2025-03-29 19:50:57 | [rl2_trainer] epoch #54 | Saved +2025-03-29 19:50:57 | [rl2_trainer] epoch #54 | Time 1058.08 s +2025-03-29 19:50:57 | [rl2_trainer] epoch #54 | EpochTime 22.29 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -61.4026 +Average/AverageReturn -102.833 +Average/Iteration 54 +Average/MaxReturn -70.7313 +Average/MinReturn -152.663 +Average/NumEpisodes 8 +Average/StdReturn 25.8947 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.916566 +TotalEnvSteps 44000 +__unnamed_task__/AverageDiscountedReturn -61.4026 +__unnamed_task__/AverageReturn -102.833 +__unnamed_task__/Iteration 54 +__unnamed_task__/MaxReturn -70.7313 +__unnamed_task__/MinReturn -152.663 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.8947 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55064 +policy/KL 0.0167741 +policy/KLBefore 0 +policy/LossAfter -0.219379 +policy/LossBefore -0.00479659 +policy/dLoss 0.214582 +---------------------------------------- -------------- +2025-03-29 19:51:13 | [rl2_trainer] epoch #55 | Optimizing policy... +2025-03-29 19:51:13 | [rl2_trainer] epoch #55 | Fitting baseline... +2025-03-29 19:51:13 | [rl2_trainer] epoch #55 | Computing loss before +2025-03-29 19:51:13 | [rl2_trainer] epoch #55 | Computing KL before +2025-03-29 19:51:13 | [rl2_trainer] epoch #55 | Optimizing +2025-03-29 19:51:16 | [rl2_trainer] epoch #55 | Computing KL after +2025-03-29 19:51:16 | [rl2_trainer] epoch #55 | Computing loss after +2025-03-29 19:51:16 | [rl2_trainer] epoch #55 | Saving snapshot... +2025-03-29 19:51:16 | [rl2_trainer] epoch #55 | Saved +2025-03-29 19:51:16 | [rl2_trainer] epoch #55 | Time 1077.23 s +2025-03-29 19:51:16 | [rl2_trainer] epoch #55 | EpochTime 19.15 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -68.9861 +Average/AverageReturn -120.046 +Average/Iteration 55 +Average/MaxReturn -106.926 +Average/MinReturn -142.941 +Average/NumEpisodes 8 +Average/StdReturn 12.512 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.961305 +TotalEnvSteps 44800 +__unnamed_task__/AverageDiscountedReturn -68.9861 +__unnamed_task__/AverageReturn -120.046 +__unnamed_task__/Iteration 55 +__unnamed_task__/MaxReturn -106.926 +__unnamed_task__/MinReturn -142.941 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 12.512 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55547 +policy/KL 0.0137826 +policy/KLBefore 0 +policy/LossAfter -0.184383 +policy/LossBefore 0.0017318 +policy/dLoss 0.186115 +---------------------------------------- ------------- +2025-03-29 19:51:33 | [rl2_trainer] epoch #56 | Optimizing policy... +2025-03-29 19:51:34 | [rl2_trainer] epoch #56 | Fitting baseline... +2025-03-29 19:51:34 | [rl2_trainer] epoch #56 | Computing loss before +2025-03-29 19:51:34 | [rl2_trainer] epoch #56 | Computing KL before +2025-03-29 19:51:34 | [rl2_trainer] epoch #56 | Optimizing +2025-03-29 19:51:36 | [rl2_trainer] epoch #56 | Computing KL after +2025-03-29 19:51:36 | [rl2_trainer] epoch #56 | Computing loss after +2025-03-29 19:51:36 | [rl2_trainer] epoch #56 | Saving snapshot... +2025-03-29 19:51:36 | [rl2_trainer] epoch #56 | Saved +2025-03-29 19:51:36 | [rl2_trainer] epoch #56 | Time 1097.38 s +2025-03-29 19:51:36 | [rl2_trainer] epoch #56 | EpochTime 20.14 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -56.7776 +Average/AverageReturn -92.5645 +Average/Iteration 56 +Average/MaxReturn -61.7311 +Average/MinReturn -120.628 +Average/NumEpisodes 8 +Average/StdReturn 22.6435 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.986114 +TotalEnvSteps 45600 +__unnamed_task__/AverageDiscountedReturn -56.7776 +__unnamed_task__/AverageReturn -92.5645 +__unnamed_task__/Iteration 56 +__unnamed_task__/MaxReturn -61.7311 +__unnamed_task__/MinReturn -120.628 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.6435 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56035 +policy/KL 0.0132892 +policy/KLBefore 0 +policy/LossAfter -0.108603 +policy/LossBefore 0.0165536 +policy/dLoss 0.125156 +---------------------------------------- ------------- +2025-03-29 19:51:52 | [rl2_trainer] epoch #57 | Optimizing policy... +2025-03-29 19:51:53 | [rl2_trainer] epoch #57 | Fitting baseline... +2025-03-29 19:51:53 | [rl2_trainer] epoch #57 | Computing loss before +2025-03-29 19:51:53 | [rl2_trainer] epoch #57 | Computing KL before +2025-03-29 19:51:53 | [rl2_trainer] epoch #57 | Optimizing +2025-03-29 19:51:55 | [rl2_trainer] epoch #57 | Computing KL after +2025-03-29 19:51:55 | [rl2_trainer] epoch #57 | Computing loss after +2025-03-29 19:51:55 | [rl2_trainer] epoch #57 | Saving snapshot... +2025-03-29 19:51:55 | [rl2_trainer] epoch #57 | Saved +2025-03-29 19:51:55 | [rl2_trainer] epoch #57 | Time 1116.43 s +2025-03-29 19:51:55 | [rl2_trainer] epoch #57 | EpochTime 19.05 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -64.7715 +Average/AverageReturn -109.497 +Average/Iteration 57 +Average/MaxReturn -90.5557 +Average/MinReturn -121.903 +Average/NumEpisodes 8 +Average/StdReturn 11.5102 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983916 +TotalEnvSteps 46400 +__unnamed_task__/AverageDiscountedReturn -64.7715 +__unnamed_task__/AverageReturn -109.497 +__unnamed_task__/Iteration 57 +__unnamed_task__/MaxReturn -90.5557 +__unnamed_task__/MinReturn -121.903 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 11.5102 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5652 +policy/KL 0.0160559 +policy/KLBefore 0 +policy/LossAfter -0.0862918 +policy/LossBefore 0.00524288 +policy/dLoss 0.0915347 +---------------------------------------- -------------- +2025-03-29 19:52:12 | [rl2_trainer] epoch #58 | Optimizing policy... +2025-03-29 19:52:12 | [rl2_trainer] epoch #58 | Fitting baseline... +2025-03-29 19:52:12 | [rl2_trainer] epoch #58 | Computing loss before +2025-03-29 19:52:12 | [rl2_trainer] epoch #58 | Computing KL before +2025-03-29 19:52:12 | [rl2_trainer] epoch #58 | Optimizing +2025-03-29 19:52:14 | [rl2_trainer] epoch #58 | Computing KL after +2025-03-29 19:52:15 | [rl2_trainer] epoch #58 | Computing loss after +2025-03-29 19:52:15 | [rl2_trainer] epoch #58 | Saving snapshot... +2025-03-29 19:52:15 | [rl2_trainer] epoch #58 | Saved +2025-03-29 19:52:15 | [rl2_trainer] epoch #58 | Time 1136.09 s +2025-03-29 19:52:15 | [rl2_trainer] epoch #58 | EpochTime 19.65 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -64.5823 +Average/AverageReturn -106.612 +Average/Iteration 58 +Average/MaxReturn -61.4266 +Average/MinReturn -125.012 +Average/NumEpisodes 8 +Average/StdReturn 19.5092 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983622 +TotalEnvSteps 47200 +__unnamed_task__/AverageDiscountedReturn -64.5823 +__unnamed_task__/AverageReturn -106.612 +__unnamed_task__/Iteration 58 +__unnamed_task__/MaxReturn -61.4266 +__unnamed_task__/MinReturn -125.012 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.5092 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55956 +policy/KL 0.0177415 +policy/KLBefore 0 +policy/LossAfter -0.119961 +policy/LossBefore -0.0052239 +policy/dLoss 0.114737 +---------------------------------------- ------------- +2025-03-29 19:52:31 | [rl2_trainer] epoch #59 | Optimizing policy... +2025-03-29 19:52:31 | [rl2_trainer] epoch #59 | Fitting baseline... +2025-03-29 19:52:31 | [rl2_trainer] epoch #59 | Computing loss before +2025-03-29 19:52:31 | [rl2_trainer] epoch #59 | Computing KL before +2025-03-29 19:52:31 | [rl2_trainer] epoch #59 | Optimizing +2025-03-29 19:52:34 | [rl2_trainer] epoch #59 | Computing KL after +2025-03-29 19:52:34 | [rl2_trainer] epoch #59 | Computing loss after +2025-03-29 19:52:34 | [rl2_trainer] epoch #59 | Saving snapshot... +2025-03-29 19:52:34 | [rl2_trainer] epoch #59 | Saved +2025-03-29 19:52:34 | [rl2_trainer] epoch #59 | Time 1155.14 s +2025-03-29 19:52:34 | [rl2_trainer] epoch #59 | EpochTime 19.05 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -64.5596 +Average/AverageReturn -106.907 +Average/Iteration 59 +Average/MaxReturn -64.8369 +Average/MinReturn -171.352 +Average/NumEpisodes 8 +Average/StdReturn 31.1754 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.910431 +TotalEnvSteps 48000 +__unnamed_task__/AverageDiscountedReturn -64.5596 +__unnamed_task__/AverageReturn -106.907 +__unnamed_task__/Iteration 59 +__unnamed_task__/MaxReturn -64.8369 +__unnamed_task__/MinReturn -171.352 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.1754 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.57168 +policy/KL 0.0213122 +policy/KLBefore 0 +policy/LossAfter -0.366283 +policy/LossBefore -0.0411184 +policy/dLoss 0.325164 +---------------------------------------- ------------- +2025-03-29 19:52:50 | [rl2_trainer] epoch #60 | Optimizing policy... +2025-03-29 19:52:50 | [rl2_trainer] epoch #60 | Fitting baseline... +2025-03-29 19:52:50 | [rl2_trainer] epoch #60 | Computing loss before +2025-03-29 19:52:50 | [rl2_trainer] epoch #60 | Computing KL before +2025-03-29 19:52:50 | [rl2_trainer] epoch #60 | Optimizing +2025-03-29 19:52:52 | [rl2_trainer] epoch #60 | Computing KL after +2025-03-29 19:52:52 | [rl2_trainer] epoch #60 | Computing loss after +2025-03-29 19:52:53 | [rl2_trainer] epoch #60 | Saving snapshot... +2025-03-29 19:52:53 | [rl2_trainer] epoch #60 | Saved +2025-03-29 19:52:53 | [rl2_trainer] epoch #60 | Time 1174.03 s +2025-03-29 19:52:53 | [rl2_trainer] epoch #60 | EpochTime 18.88 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -73.6548 +Average/AverageReturn -124.376 +Average/Iteration 60 +Average/MaxReturn -114.339 +Average/MinReturn -146.49 +Average/NumEpisodes 8 +Average/StdReturn 9.81307 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.970058 +TotalEnvSteps 48800 +__unnamed_task__/AverageDiscountedReturn -73.6548 +__unnamed_task__/AverageReturn -124.376 +__unnamed_task__/Iteration 60 +__unnamed_task__/MaxReturn -114.339 +__unnamed_task__/MinReturn -146.49 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 9.81307 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56918 +policy/KL 0.037334 +policy/KLBefore 0 +policy/LossAfter -0.179902 +policy/LossBefore 0.00164976 +policy/dLoss 0.181552 +---------------------------------------- -------------- +2025-03-29 19:53:09 | [rl2_trainer] epoch #61 | Optimizing policy... +2025-03-29 19:53:09 | [rl2_trainer] epoch #61 | Fitting baseline... +2025-03-29 19:53:09 | [rl2_trainer] epoch #61 | Computing loss before +2025-03-29 19:53:09 | [rl2_trainer] epoch #61 | Computing KL before +2025-03-29 19:53:09 | [rl2_trainer] epoch #61 | Optimizing +2025-03-29 19:53:11 | [rl2_trainer] epoch #61 | Computing KL after +2025-03-29 19:53:11 | [rl2_trainer] epoch #61 | Computing loss after +2025-03-29 19:53:11 | [rl2_trainer] epoch #61 | Saving snapshot... +2025-03-29 19:53:11 | [rl2_trainer] epoch #61 | Saved +2025-03-29 19:53:11 | [rl2_trainer] epoch #61 | Time 1192.66 s +2025-03-29 19:53:11 | [rl2_trainer] epoch #61 | EpochTime 18.63 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -64.555 +Average/AverageReturn -108.979 +Average/Iteration 61 +Average/MaxReturn -96.453 +Average/MinReturn -120.631 +Average/NumEpisodes 8 +Average/StdReturn 9.41102 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.985868 +TotalEnvSteps 49600 +__unnamed_task__/AverageDiscountedReturn -64.555 +__unnamed_task__/AverageReturn -108.979 +__unnamed_task__/Iteration 61 +__unnamed_task__/MaxReturn -96.453 +__unnamed_task__/MinReturn -120.631 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 9.41102 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55767 +policy/KL 0.014447 +policy/KLBefore 0 +policy/LossAfter -0.101516 +policy/LossBefore -0.0108444 +policy/dLoss 0.0906712 +---------------------------------------- ------------- +2025-03-29 19:53:28 | [rl2_trainer] epoch #62 | Optimizing policy... +2025-03-29 19:53:28 | [rl2_trainer] epoch #62 | Fitting baseline... +2025-03-29 19:53:28 | [rl2_trainer] epoch #62 | Computing loss before +2025-03-29 19:53:28 | [rl2_trainer] epoch #62 | Computing KL before +2025-03-29 19:53:28 | [rl2_trainer] epoch #62 | Optimizing +2025-03-29 19:53:30 | [rl2_trainer] epoch #62 | Computing KL after +2025-03-29 19:53:30 | [rl2_trainer] epoch #62 | Computing loss after +2025-03-29 19:53:30 | [rl2_trainer] epoch #62 | Saving snapshot... +2025-03-29 19:53:30 | [rl2_trainer] epoch #62 | Saved +2025-03-29 19:53:30 | [rl2_trainer] epoch #62 | Time 1211.85 s +2025-03-29 19:53:30 | [rl2_trainer] epoch #62 | EpochTime 19.19 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -59.333 +Average/AverageReturn -96.9818 +Average/Iteration 62 +Average/MaxReturn -69.4918 +Average/MinReturn -121.163 +Average/NumEpisodes 8 +Average/StdReturn 19.0616 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973281 +TotalEnvSteps 50400 +__unnamed_task__/AverageDiscountedReturn -59.333 +__unnamed_task__/AverageReturn -96.9818 +__unnamed_task__/Iteration 62 +__unnamed_task__/MaxReturn -69.4918 +__unnamed_task__/MinReturn -121.163 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.0616 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5558 +policy/KL 0.0120092 +policy/KLBefore 0 +policy/LossAfter -0.130387 +policy/LossBefore -0.0133481 +policy/dLoss 0.117039 +---------------------------------------- ------------- +2025-03-29 19:53:46 | [rl2_trainer] epoch #63 | Optimizing policy... +2025-03-29 19:53:46 | [rl2_trainer] epoch #63 | Fitting baseline... +2025-03-29 19:53:46 | [rl2_trainer] epoch #63 | Computing loss before +2025-03-29 19:53:46 | [rl2_trainer] epoch #63 | Computing KL before +2025-03-29 19:53:47 | [rl2_trainer] epoch #63 | Optimizing +2025-03-29 19:53:49 | [rl2_trainer] epoch #63 | Computing KL after +2025-03-29 19:53:49 | [rl2_trainer] epoch #63 | Computing loss after +2025-03-29 19:53:49 | [rl2_trainer] epoch #63 | Saving snapshot... +2025-03-29 19:53:49 | [rl2_trainer] epoch #63 | Saved +2025-03-29 19:53:49 | [rl2_trainer] epoch #63 | Time 1230.42 s +2025-03-29 19:53:49 | [rl2_trainer] epoch #63 | EpochTime 18.56 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -66.8957 +Average/AverageReturn -111.182 +Average/Iteration 63 +Average/MaxReturn -93.6503 +Average/MinReturn -123.978 +Average/NumEpisodes 8 +Average/StdReturn 9.62229 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979166 +TotalEnvSteps 51200 +__unnamed_task__/AverageDiscountedReturn -66.8957 +__unnamed_task__/AverageReturn -111.182 +__unnamed_task__/Iteration 63 +__unnamed_task__/MaxReturn -93.6503 +__unnamed_task__/MinReturn -123.978 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 9.62229 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56029 +policy/KL 0.0140264 +policy/KLBefore 0 +policy/LossAfter -0.13064 +policy/LossBefore 9.87339e-05 +policy/dLoss 0.130739 +---------------------------------------- --------------- +2025-03-29 19:54:06 | [rl2_trainer] epoch #64 | Optimizing policy... +2025-03-29 19:54:06 | [rl2_trainer] epoch #64 | Fitting baseline... +2025-03-29 19:54:06 | [rl2_trainer] epoch #64 | Computing loss before +2025-03-29 19:54:06 | [rl2_trainer] epoch #64 | Computing KL before +2025-03-29 19:54:06 | [rl2_trainer] epoch #64 | Optimizing +2025-03-29 19:54:08 | [rl2_trainer] epoch #64 | Computing KL after +2025-03-29 19:54:09 | [rl2_trainer] epoch #64 | Computing loss after +2025-03-29 19:54:09 | [rl2_trainer] epoch #64 | Saving snapshot... +2025-03-29 19:54:09 | [rl2_trainer] epoch #64 | Saved +2025-03-29 19:54:09 | [rl2_trainer] epoch #64 | Time 1250.07 s +2025-03-29 19:54:09 | [rl2_trainer] epoch #64 | EpochTime 19.65 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -52.731 +Average/AverageReturn -83.0212 +Average/Iteration 64 +Average/MaxReturn -61.5387 +Average/MinReturn -118.138 +Average/NumEpisodes 8 +Average/StdReturn 19.3087 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968845 +TotalEnvSteps 52000 +__unnamed_task__/AverageDiscountedReturn -52.731 +__unnamed_task__/AverageReturn -83.0212 +__unnamed_task__/Iteration 64 +__unnamed_task__/MaxReturn -61.5387 +__unnamed_task__/MinReturn -118.138 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.3087 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5481 +policy/KL 0.0180359 +policy/KLBefore 0 +policy/LossAfter -0.105015 +policy/LossBefore 0.00219107 +policy/dLoss 0.107206 +---------------------------------------- -------------- +2025-03-29 19:54:25 | [rl2_trainer] epoch #65 | Optimizing policy... +2025-03-29 19:54:25 | [rl2_trainer] epoch #65 | Fitting baseline... +2025-03-29 19:54:25 | [rl2_trainer] epoch #65 | Computing loss before +2025-03-29 19:54:25 | [rl2_trainer] epoch #65 | Computing KL before +2025-03-29 19:54:25 | [rl2_trainer] epoch #65 | Optimizing +2025-03-29 19:54:27 | [rl2_trainer] epoch #65 | Computing KL after +2025-03-29 19:54:27 | [rl2_trainer] epoch #65 | Computing loss after +2025-03-29 19:54:27 | [rl2_trainer] epoch #65 | Saving snapshot... +2025-03-29 19:54:27 | [rl2_trainer] epoch #65 | Saved +2025-03-29 19:54:27 | [rl2_trainer] epoch #65 | Time 1268.90 s +2025-03-29 19:54:27 | [rl2_trainer] epoch #65 | EpochTime 18.83 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -67.0224 +Average/AverageReturn -113.025 +Average/Iteration 65 +Average/MaxReturn -70.6423 +Average/MinReturn -180.255 +Average/NumEpisodes 8 +Average/StdReturn 30.0604 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.936267 +TotalEnvSteps 52800 +__unnamed_task__/AverageDiscountedReturn -67.0224 +__unnamed_task__/AverageReturn -113.025 +__unnamed_task__/Iteration 65 +__unnamed_task__/MaxReturn -70.6423 +__unnamed_task__/MinReturn -180.255 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.0604 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54183 +policy/KL 0.0262471 +policy/KLBefore 0 +policy/LossAfter -0.327964 +policy/LossBefore -0.0397166 +policy/dLoss 0.288247 +---------------------------------------- ------------- +2025-03-29 19:54:44 | [rl2_trainer] epoch #66 | Optimizing policy... +2025-03-29 19:54:44 | [rl2_trainer] epoch #66 | Fitting baseline... +2025-03-29 19:54:44 | [rl2_trainer] epoch #66 | Computing loss before +2025-03-29 19:54:44 | [rl2_trainer] epoch #66 | Computing KL before +2025-03-29 19:54:44 | [rl2_trainer] epoch #66 | Optimizing +2025-03-29 19:54:46 | [rl2_trainer] epoch #66 | Computing KL after +2025-03-29 19:54:46 | [rl2_trainer] epoch #66 | Computing loss after +2025-03-29 19:54:46 | [rl2_trainer] epoch #66 | Saving snapshot... +2025-03-29 19:54:46 | [rl2_trainer] epoch #66 | Saved +2025-03-29 19:54:46 | [rl2_trainer] epoch #66 | Time 1287.89 s +2025-03-29 19:54:46 | [rl2_trainer] epoch #66 | EpochTime 18.99 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -58.3321 +Average/AverageReturn -94.4988 +Average/Iteration 66 +Average/MaxReturn -77.2534 +Average/MinReturn -120.03 +Average/NumEpisodes 8 +Average/StdReturn 14.9982 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978418 +TotalEnvSteps 53600 +__unnamed_task__/AverageDiscountedReturn -58.3321 +__unnamed_task__/AverageReturn -94.4988 +__unnamed_task__/Iteration 66 +__unnamed_task__/MaxReturn -77.2534 +__unnamed_task__/MinReturn -120.03 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.9982 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54852 +policy/KL 0.0365783 +policy/KLBefore 0 +policy/LossAfter -0.14827 +policy/LossBefore -0.00384607 +policy/dLoss 0.144424 +---------------------------------------- -------------- +2025-03-29 19:55:03 | [rl2_trainer] epoch #67 | Optimizing policy... +2025-03-29 19:55:03 | [rl2_trainer] epoch #67 | Fitting baseline... +2025-03-29 19:55:03 | [rl2_trainer] epoch #67 | Computing loss before +2025-03-29 19:55:03 | [rl2_trainer] epoch #67 | Computing KL before +2025-03-29 19:55:03 | [rl2_trainer] epoch #67 | Optimizing +2025-03-29 19:55:05 | [rl2_trainer] epoch #67 | Computing KL after +2025-03-29 19:55:05 | [rl2_trainer] epoch #67 | Computing loss after +2025-03-29 19:55:06 | [rl2_trainer] epoch #67 | Saving snapshot... +2025-03-29 19:55:06 | [rl2_trainer] epoch #67 | Saved +2025-03-29 19:55:06 | [rl2_trainer] epoch #67 | Time 1306.99 s +2025-03-29 19:55:06 | [rl2_trainer] epoch #67 | EpochTime 19.10 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -66.501 +Average/AverageReturn -109.704 +Average/Iteration 67 +Average/MaxReturn -76.4349 +Average/MinReturn -161.33 +Average/NumEpisodes 8 +Average/StdReturn 24.1712 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982565 +TotalEnvSteps 54400 +__unnamed_task__/AverageDiscountedReturn -66.501 +__unnamed_task__/AverageReturn -109.704 +__unnamed_task__/Iteration 67 +__unnamed_task__/MaxReturn -76.4349 +__unnamed_task__/MinReturn -161.33 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.1712 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55194 +policy/KL 0.0274358 +policy/KLBefore 0 +policy/LossAfter -0.182471 +policy/LossBefore -0.00620013 +policy/dLoss 0.176271 +---------------------------------------- -------------- +2025-03-29 19:55:23 | [rl2_trainer] epoch #68 | Optimizing policy... +2025-03-29 19:55:23 | [rl2_trainer] epoch #68 | Fitting baseline... +2025-03-29 19:55:23 | [rl2_trainer] epoch #68 | Computing loss before +2025-03-29 19:55:23 | [rl2_trainer] epoch #68 | Computing KL before +2025-03-29 19:55:23 | [rl2_trainer] epoch #68 | Optimizing +2025-03-29 19:55:25 | [rl2_trainer] epoch #68 | Computing KL after +2025-03-29 19:55:25 | [rl2_trainer] epoch #68 | Computing loss after +2025-03-29 19:55:25 | [rl2_trainer] epoch #68 | Saving snapshot... +2025-03-29 19:55:25 | [rl2_trainer] epoch #68 | Saved +2025-03-29 19:55:25 | [rl2_trainer] epoch #68 | Time 1326.61 s +2025-03-29 19:55:25 | [rl2_trainer] epoch #68 | EpochTime 19.62 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -57.3711 +Average/AverageReturn -89.5563 +Average/Iteration 68 +Average/MaxReturn -62.9695 +Average/MinReturn -107.715 +Average/NumEpisodes 8 +Average/StdReturn 17.0156 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969087 +TotalEnvSteps 55200 +__unnamed_task__/AverageDiscountedReturn -57.3711 +__unnamed_task__/AverageReturn -89.5563 +__unnamed_task__/Iteration 68 +__unnamed_task__/MaxReturn -62.9695 +__unnamed_task__/MinReturn -107.715 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.0156 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54878 +policy/KL 0.0144262 +policy/KLBefore 0 +policy/LossAfter -0.138714 +policy/LossBefore -0.00413498 +policy/dLoss 0.13458 +---------------------------------------- -------------- +2025-03-29 19:55:43 | [rl2_trainer] epoch #69 | Optimizing policy... +2025-03-29 19:55:43 | [rl2_trainer] epoch #69 | Fitting baseline... +2025-03-29 19:55:43 | [rl2_trainer] epoch #69 | Computing loss before +2025-03-29 19:55:43 | [rl2_trainer] epoch #69 | Computing KL before +2025-03-29 19:55:43 | [rl2_trainer] epoch #69 | Optimizing +2025-03-29 19:55:45 | [rl2_trainer] epoch #69 | Computing KL after +2025-03-29 19:55:45 | [rl2_trainer] epoch #69 | Computing loss after +2025-03-29 19:55:45 | [rl2_trainer] epoch #69 | Saving snapshot... +2025-03-29 19:55:45 | [rl2_trainer] epoch #69 | Saved +2025-03-29 19:55:45 | [rl2_trainer] epoch #69 | Time 1346.39 s +2025-03-29 19:55:45 | [rl2_trainer] epoch #69 | EpochTime 19.77 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -59.5902 +Average/AverageReturn -97.6674 +Average/Iteration 69 +Average/MaxReturn -62.4093 +Average/MinReturn -141.718 +Average/NumEpisodes 8 +Average/StdReturn 24.7987 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.95706 +TotalEnvSteps 56000 +__unnamed_task__/AverageDiscountedReturn -59.5902 +__unnamed_task__/AverageReturn -97.6674 +__unnamed_task__/Iteration 69 +__unnamed_task__/MaxReturn -62.4093 +__unnamed_task__/MinReturn -141.718 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.7987 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55638 +policy/KL 0.0154993 +policy/KLBefore 0 +policy/LossAfter -0.176369 +policy/LossBefore 0.0342089 +policy/dLoss 0.210578 +---------------------------------------- ------------- +2025-03-29 19:56:02 | [rl2_trainer] epoch #70 | Optimizing policy... +2025-03-29 19:56:02 | [rl2_trainer] epoch #70 | Fitting baseline... +2025-03-29 19:56:02 | [rl2_trainer] epoch #70 | Computing loss before +2025-03-29 19:56:02 | [rl2_trainer] epoch #70 | Computing KL before +2025-03-29 19:56:02 | [rl2_trainer] epoch #70 | Optimizing +2025-03-29 19:56:05 | [rl2_trainer] epoch #70 | Computing KL after +2025-03-29 19:56:05 | [rl2_trainer] epoch #70 | Computing loss after +2025-03-29 19:56:05 | [rl2_trainer] epoch #70 | Saving snapshot... +2025-03-29 19:56:05 | [rl2_trainer] epoch #70 | Saved +2025-03-29 19:56:05 | [rl2_trainer] epoch #70 | Time 1366.23 s +2025-03-29 19:56:05 | [rl2_trainer] epoch #70 | EpochTime 19.84 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -56.3083 +Average/AverageReturn -90.3318 +Average/Iteration 70 +Average/MaxReturn -66.7467 +Average/MinReturn -116.099 +Average/NumEpisodes 8 +Average/StdReturn 19.959 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.97976 +TotalEnvSteps 56800 +__unnamed_task__/AverageDiscountedReturn -56.3083 +__unnamed_task__/AverageReturn -90.3318 +__unnamed_task__/Iteration 70 +__unnamed_task__/MaxReturn -66.7467 +__unnamed_task__/MinReturn -116.099 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.959 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56021 +policy/KL 0.0132449 +policy/KLBefore 0 +policy/LossAfter -0.152568 +policy/LossBefore -0.0104883 +policy/dLoss 0.14208 +---------------------------------------- ------------- +2025-03-29 19:56:22 | [rl2_trainer] epoch #71 | Optimizing policy... +2025-03-29 19:56:22 | [rl2_trainer] epoch #71 | Fitting baseline... +2025-03-29 19:56:22 | [rl2_trainer] epoch #71 | Computing loss before +2025-03-29 19:56:22 | [rl2_trainer] epoch #71 | Computing KL before +2025-03-29 19:56:22 | [rl2_trainer] epoch #71 | Optimizing +2025-03-29 19:56:25 | [rl2_trainer] epoch #71 | Computing KL after +2025-03-29 19:56:25 | [rl2_trainer] epoch #71 | Computing loss after +2025-03-29 19:56:25 | [rl2_trainer] epoch #71 | Saving snapshot... +2025-03-29 19:56:25 | [rl2_trainer] epoch #71 | Saved +2025-03-29 19:56:25 | [rl2_trainer] epoch #71 | Time 1386.13 s +2025-03-29 19:56:25 | [rl2_trainer] epoch #71 | EpochTime 19.90 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -51.8639 +Average/AverageReturn -79.5237 +Average/Iteration 71 +Average/MaxReturn -62.0398 +Average/MinReturn -100.833 +Average/NumEpisodes 8 +Average/StdReturn 14.4493 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.980036 +TotalEnvSteps 57600 +__unnamed_task__/AverageDiscountedReturn -51.8639 +__unnamed_task__/AverageReturn -79.5237 +__unnamed_task__/Iteration 71 +__unnamed_task__/MaxReturn -62.0398 +__unnamed_task__/MinReturn -100.833 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.4493 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55449 +policy/KL 0.0124202 +policy/KLBefore 0 +policy/LossAfter -0.105184 +policy/LossBefore -0.00300257 +policy/dLoss 0.102182 +---------------------------------------- -------------- +2025-03-29 19:56:42 | [rl2_trainer] epoch #72 | Optimizing policy... +2025-03-29 19:56:42 | [rl2_trainer] epoch #72 | Fitting baseline... +2025-03-29 19:56:42 | [rl2_trainer] epoch #72 | Computing loss before +2025-03-29 19:56:42 | [rl2_trainer] epoch #72 | Computing KL before +2025-03-29 19:56:42 | [rl2_trainer] epoch #72 | Optimizing +2025-03-29 19:56:44 | [rl2_trainer] epoch #72 | Computing KL after +2025-03-29 19:56:44 | [rl2_trainer] epoch #72 | Computing loss after +2025-03-29 19:56:44 | [rl2_trainer] epoch #72 | Saving snapshot... +2025-03-29 19:56:44 | [rl2_trainer] epoch #72 | Saved +2025-03-29 19:56:44 | [rl2_trainer] epoch #72 | Time 1405.92 s +2025-03-29 19:56:44 | [rl2_trainer] epoch #72 | EpochTime 19.78 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -59.8125 +Average/AverageReturn -97.4859 +Average/Iteration 72 +Average/MaxReturn -60.8791 +Average/MinReturn -168.864 +Average/NumEpisodes 8 +Average/StdReturn 34.5461 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.959445 +TotalEnvSteps 58400 +__unnamed_task__/AverageDiscountedReturn -59.8125 +__unnamed_task__/AverageReturn -97.4859 +__unnamed_task__/Iteration 72 +__unnamed_task__/MaxReturn -60.8791 +__unnamed_task__/MinReturn -168.864 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.5461 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54339 +policy/KL 0.0271524 +policy/KLBefore 0 +policy/LossAfter -0.304129 +policy/LossBefore -0.0268822 +policy/dLoss 0.277247 +---------------------------------------- ------------- +2025-03-29 19:57:01 | [rl2_trainer] epoch #73 | Optimizing policy... +2025-03-29 19:57:01 | [rl2_trainer] epoch #73 | Fitting baseline... +2025-03-29 19:57:01 | [rl2_trainer] epoch #73 | Computing loss before +2025-03-29 19:57:01 | [rl2_trainer] epoch #73 | Computing KL before +2025-03-29 19:57:01 | [rl2_trainer] epoch #73 | Optimizing +2025-03-29 19:57:03 | [rl2_trainer] epoch #73 | Computing KL after +2025-03-29 19:57:03 | [rl2_trainer] epoch #73 | Computing loss after +2025-03-29 19:57:03 | [rl2_trainer] epoch #73 | Saving snapshot... +2025-03-29 19:57:03 | [rl2_trainer] epoch #73 | Saved +2025-03-29 19:57:03 | [rl2_trainer] epoch #73 | Time 1424.50 s +2025-03-29 19:57:03 | [rl2_trainer] epoch #73 | EpochTime 18.58 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -76.2542 +Average/AverageReturn -132.427 +Average/Iteration 73 +Average/MaxReturn -113.273 +Average/MinReturn -184.699 +Average/NumEpisodes 8 +Average/StdReturn 27.1021 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974368 +TotalEnvSteps 59200 +__unnamed_task__/AverageDiscountedReturn -76.2542 +__unnamed_task__/AverageReturn -132.427 +__unnamed_task__/Iteration 73 +__unnamed_task__/MaxReturn -113.273 +__unnamed_task__/MinReturn -184.699 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.1021 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55679 +policy/KL 0.0285617 +policy/KLBefore 0 +policy/LossAfter -0.289422 +policy/LossBefore 0.0300174 +policy/dLoss 0.319439 +---------------------------------------- ------------- +2025-03-29 19:57:21 | [rl2_trainer] epoch #74 | Optimizing policy... +2025-03-29 19:57:21 | [rl2_trainer] epoch #74 | Fitting baseline... +2025-03-29 19:57:21 | [rl2_trainer] epoch #74 | Computing loss before +2025-03-29 19:57:21 | [rl2_trainer] epoch #74 | Computing KL before +2025-03-29 19:57:21 | [rl2_trainer] epoch #74 | Optimizing +2025-03-29 19:57:23 | [rl2_trainer] epoch #74 | Computing KL after +2025-03-29 19:57:23 | [rl2_trainer] epoch #74 | Computing loss after +2025-03-29 19:57:23 | [rl2_trainer] epoch #74 | Saving snapshot... +2025-03-29 19:57:23 | [rl2_trainer] epoch #74 | Saved +2025-03-29 19:57:23 | [rl2_trainer] epoch #74 | Time 1444.53 s +2025-03-29 19:57:23 | [rl2_trainer] epoch #74 | EpochTime 20.03 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -55.7336 +Average/AverageReturn -87.7934 +Average/Iteration 74 +Average/MaxReturn -63.9614 +Average/MinReturn -118.159 +Average/NumEpisodes 8 +Average/StdReturn 20.4373 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975096 +TotalEnvSteps 60000 +__unnamed_task__/AverageDiscountedReturn -55.7336 +__unnamed_task__/AverageReturn -87.7934 +__unnamed_task__/Iteration 74 +__unnamed_task__/MaxReturn -63.9614 +__unnamed_task__/MinReturn -118.159 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.4373 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56952 +policy/KL 0.0135823 +policy/KLBefore 0 +policy/LossAfter -0.108865 +policy/LossBefore 0.0141832 +policy/dLoss 0.123048 +---------------------------------------- ------------- +2025-03-29 19:57:41 | [rl2_trainer] epoch #75 | Optimizing policy... +2025-03-29 19:57:41 | [rl2_trainer] epoch #75 | Fitting baseline... +2025-03-29 19:57:41 | [rl2_trainer] epoch #75 | Computing loss before +2025-03-29 19:57:41 | [rl2_trainer] epoch #75 | Computing KL before +2025-03-29 19:57:41 | [rl2_trainer] epoch #75 | Optimizing +2025-03-29 19:57:43 | [rl2_trainer] epoch #75 | Computing KL after +2025-03-29 19:57:43 | [rl2_trainer] epoch #75 | Computing loss after +2025-03-29 19:57:43 | [rl2_trainer] epoch #75 | Saving snapshot... +2025-03-29 19:57:43 | [rl2_trainer] epoch #75 | Saved +2025-03-29 19:57:43 | [rl2_trainer] epoch #75 | Time 1464.45 s +2025-03-29 19:57:43 | [rl2_trainer] epoch #75 | EpochTime 19.92 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -66.3389 +Average/AverageReturn -108.712 +Average/Iteration 75 +Average/MaxReturn -60.9357 +Average/MinReturn -173.976 +Average/NumEpisodes 8 +Average/StdReturn 35.4653 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.986668 +TotalEnvSteps 60800 +__unnamed_task__/AverageDiscountedReturn -66.3389 +__unnamed_task__/AverageReturn -108.712 +__unnamed_task__/Iteration 75 +__unnamed_task__/MaxReturn -60.9357 +__unnamed_task__/MinReturn -173.976 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.4653 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.57115 +policy/KL 0.0196577 +policy/KLBefore 0 +policy/LossAfter -0.159136 +policy/LossBefore 0.0103076 +policy/dLoss 0.169443 +---------------------------------------- ------------- +2025-03-29 19:58:00 | [rl2_trainer] epoch #76 | Optimizing policy... +2025-03-29 19:58:00 | [rl2_trainer] epoch #76 | Fitting baseline... +2025-03-29 19:58:00 | [rl2_trainer] epoch #76 | Computing loss before +2025-03-29 19:58:00 | [rl2_trainer] epoch #76 | Computing KL before +2025-03-29 19:58:00 | [rl2_trainer] epoch #76 | Optimizing +2025-03-29 19:58:03 | [rl2_trainer] epoch #76 | Computing KL after +2025-03-29 19:58:03 | [rl2_trainer] epoch #76 | Computing loss after +2025-03-29 19:58:03 | [rl2_trainer] epoch #76 | Saving snapshot... +2025-03-29 19:58:03 | [rl2_trainer] epoch #76 | Saved +2025-03-29 19:58:03 | [rl2_trainer] epoch #76 | Time 1484.22 s +2025-03-29 19:58:03 | [rl2_trainer] epoch #76 | EpochTime 19.77 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -54.2608 +Average/AverageReturn -82.9184 +Average/Iteration 76 +Average/MaxReturn -66.293 +Average/MinReturn -111.723 +Average/NumEpisodes 8 +Average/StdReturn 14.962 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969945 +TotalEnvSteps 61600 +__unnamed_task__/AverageDiscountedReturn -54.2608 +__unnamed_task__/AverageReturn -82.9184 +__unnamed_task__/Iteration 76 +__unnamed_task__/MaxReturn -66.293 +__unnamed_task__/MinReturn -111.723 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.962 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.57264 +policy/KL 0.013968 +policy/KLBefore 0 +policy/LossAfter -0.101508 +policy/LossBefore 0.00586293 +policy/dLoss 0.107371 +---------------------------------------- -------------- +2025-03-29 19:58:20 | [rl2_trainer] epoch #77 | Optimizing policy... +2025-03-29 19:58:20 | [rl2_trainer] epoch #77 | Fitting baseline... +2025-03-29 19:58:20 | [rl2_trainer] epoch #77 | Computing loss before +2025-03-29 19:58:20 | [rl2_trainer] epoch #77 | Computing KL before +2025-03-29 19:58:20 | [rl2_trainer] epoch #77 | Optimizing +2025-03-29 19:58:23 | [rl2_trainer] epoch #77 | Computing KL after +2025-03-29 19:58:23 | [rl2_trainer] epoch #77 | Computing loss after +2025-03-29 19:58:23 | [rl2_trainer] epoch #77 | Saving snapshot... +2025-03-29 19:58:23 | [rl2_trainer] epoch #77 | Saved +2025-03-29 19:58:23 | [rl2_trainer] epoch #77 | Time 1504.09 s +2025-03-29 19:58:23 | [rl2_trainer] epoch #77 | EpochTime 19.87 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -55.5639 +Average/AverageReturn -87.026 +Average/Iteration 77 +Average/MaxReturn -62.2419 +Average/MinReturn -118.43 +Average/NumEpisodes 8 +Average/StdReturn 19.246 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983626 +TotalEnvSteps 62400 +__unnamed_task__/AverageDiscountedReturn -55.5639 +__unnamed_task__/AverageReturn -87.026 +__unnamed_task__/Iteration 77 +__unnamed_task__/MaxReturn -62.2419 +__unnamed_task__/MinReturn -118.43 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.246 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5678 +policy/KL 0.0110197 +policy/KLBefore 0 +policy/LossAfter -0.113738 +policy/LossBefore 0.0040283 +policy/dLoss 0.117766 +---------------------------------------- ------------- +2025-03-29 19:58:40 | [rl2_trainer] epoch #78 | Optimizing policy... +2025-03-29 19:58:40 | [rl2_trainer] epoch #78 | Fitting baseline... +2025-03-29 19:58:40 | [rl2_trainer] epoch #78 | Computing loss before +2025-03-29 19:58:40 | [rl2_trainer] epoch #78 | Computing KL before +2025-03-29 19:58:40 | [rl2_trainer] epoch #78 | Optimizing +2025-03-29 19:58:42 | [rl2_trainer] epoch #78 | Computing KL after +2025-03-29 19:58:42 | [rl2_trainer] epoch #78 | Computing loss after +2025-03-29 19:58:42 | [rl2_trainer] epoch #78 | Saving snapshot... +2025-03-29 19:58:42 | [rl2_trainer] epoch #78 | Saved +2025-03-29 19:58:42 | [rl2_trainer] epoch #78 | Time 1523.80 s +2025-03-29 19:58:42 | [rl2_trainer] epoch #78 | EpochTime 19.71 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -61.601 +Average/AverageReturn -102.251 +Average/Iteration 78 +Average/MaxReturn -76.7304 +Average/MinReturn -122.544 +Average/NumEpisodes 8 +Average/StdReturn 15.2147 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.976877 +TotalEnvSteps 63200 +__unnamed_task__/AverageDiscountedReturn -61.601 +__unnamed_task__/AverageReturn -102.251 +__unnamed_task__/Iteration 78 +__unnamed_task__/MaxReturn -76.7304 +__unnamed_task__/MinReturn -122.544 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.2147 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.57805 +policy/KL 0.0196864 +policy/KLBefore 0 +policy/LossAfter -0.112762 +policy/LossBefore 0.011535 +policy/dLoss 0.124297 +---------------------------------------- ------------- +2025-03-29 19:59:00 | [rl2_trainer] epoch #79 | Optimizing policy... +2025-03-29 19:59:00 | [rl2_trainer] epoch #79 | Fitting baseline... +2025-03-29 19:59:00 | [rl2_trainer] epoch #79 | Computing loss before +2025-03-29 19:59:00 | [rl2_trainer] epoch #79 | Computing KL before +2025-03-29 19:59:00 | [rl2_trainer] epoch #79 | Optimizing +2025-03-29 19:59:02 | [rl2_trainer] epoch #79 | Computing KL after +2025-03-29 19:59:02 | [rl2_trainer] epoch #79 | Computing loss after +2025-03-29 19:59:02 | [rl2_trainer] epoch #79 | Saving snapshot... +2025-03-29 19:59:02 | [rl2_trainer] epoch #79 | Saved +2025-03-29 19:59:02 | [rl2_trainer] epoch #79 | Time 1543.73 s +2025-03-29 19:59:02 | [rl2_trainer] epoch #79 | EpochTime 19.92 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -50.2776 +Average/AverageReturn -77.377 +Average/Iteration 79 +Average/MaxReturn -54.863 +Average/MinReturn -125.318 +Average/NumEpisodes 8 +Average/StdReturn 20.444 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973536 +TotalEnvSteps 64000 +__unnamed_task__/AverageDiscountedReturn -50.2776 +__unnamed_task__/AverageReturn -77.377 +__unnamed_task__/Iteration 79 +__unnamed_task__/MaxReturn -54.863 +__unnamed_task__/MinReturn -125.318 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.444 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5989 +policy/KL 0.0119966 +policy/KLBefore 0 +policy/LossAfter -0.152022 +policy/LossBefore 0.0264861 +policy/dLoss 0.178508 +---------------------------------------- ------------- +2025-03-29 19:59:19 | [rl2_trainer] epoch #80 | Optimizing policy... +2025-03-29 19:59:19 | [rl2_trainer] epoch #80 | Fitting baseline... +2025-03-29 19:59:19 | [rl2_trainer] epoch #80 | Computing loss before +2025-03-29 19:59:19 | [rl2_trainer] epoch #80 | Computing KL before +2025-03-29 19:59:19 | [rl2_trainer] epoch #80 | Optimizing +2025-03-29 19:59:21 | [rl2_trainer] epoch #80 | Computing KL after +2025-03-29 19:59:21 | [rl2_trainer] epoch #80 | Computing loss after +2025-03-29 19:59:21 | [rl2_trainer] epoch #80 | Saving snapshot... +2025-03-29 19:59:21 | [rl2_trainer] epoch #80 | Saved +2025-03-29 19:59:21 | [rl2_trainer] epoch #80 | Time 1562.77 s +2025-03-29 19:59:21 | [rl2_trainer] epoch #80 | EpochTime 19.03 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -65.0399 +Average/AverageReturn -105.888 +Average/Iteration 80 +Average/MaxReturn -65.5549 +Average/MinReturn -177.171 +Average/NumEpisodes 8 +Average/StdReturn 31.368 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.984268 +TotalEnvSteps 64800 +__unnamed_task__/AverageDiscountedReturn -65.0399 +__unnamed_task__/AverageReturn -105.888 +__unnamed_task__/Iteration 80 +__unnamed_task__/MaxReturn -65.5549 +__unnamed_task__/MinReturn -177.171 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.368 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.62652 +policy/KL 0.0165199 +policy/KLBefore 0 +policy/LossAfter -0.189157 +policy/LossBefore 0.00088974 +policy/dLoss 0.190047 +---------------------------------------- -------------- +2025-03-29 19:59:38 | [rl2_trainer] epoch #81 | Optimizing policy... +2025-03-29 19:59:38 | [rl2_trainer] epoch #81 | Fitting baseline... +2025-03-29 19:59:38 | [rl2_trainer] epoch #81 | Computing loss before +2025-03-29 19:59:38 | [rl2_trainer] epoch #81 | Computing KL before +2025-03-29 19:59:38 | [rl2_trainer] epoch #81 | Optimizing +2025-03-29 19:59:40 | [rl2_trainer] epoch #81 | Computing KL after +2025-03-29 19:59:40 | [rl2_trainer] epoch #81 | Computing loss after +2025-03-29 19:59:40 | [rl2_trainer] epoch #81 | Saving snapshot... +2025-03-29 19:59:40 | [rl2_trainer] epoch #81 | Saved +2025-03-29 19:59:40 | [rl2_trainer] epoch #81 | Time 1581.75 s +2025-03-29 19:59:40 | [rl2_trainer] epoch #81 | EpochTime 18.98 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -59.1039 +Average/AverageReturn -93.3985 +Average/Iteration 81 +Average/MaxReturn -76.5032 +Average/MinReturn -106.237 +Average/NumEpisodes 8 +Average/StdReturn 8.85445 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.976513 +TotalEnvSteps 65600 +__unnamed_task__/AverageDiscountedReturn -59.1039 +__unnamed_task__/AverageReturn -93.3985 +__unnamed_task__/Iteration 81 +__unnamed_task__/MaxReturn -76.5032 +__unnamed_task__/MinReturn -106.237 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 8.85445 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.65383 +policy/KL 0.0186614 +policy/KLBefore 0 +policy/LossAfter -0.134162 +policy/LossBefore -0.00660964 +policy/dLoss 0.127552 +---------------------------------------- -------------- +2025-03-29 19:59:57 | [rl2_trainer] epoch #82 | Optimizing policy... +2025-03-29 19:59:57 | [rl2_trainer] epoch #82 | Fitting baseline... +2025-03-29 19:59:57 | [rl2_trainer] epoch #82 | Computing loss before +2025-03-29 19:59:58 | [rl2_trainer] epoch #82 | Computing KL before +2025-03-29 19:59:58 | [rl2_trainer] epoch #82 | Optimizing +2025-03-29 20:00:00 | [rl2_trainer] epoch #82 | Computing KL after +2025-03-29 20:00:00 | [rl2_trainer] epoch #82 | Computing loss after +2025-03-29 20:00:00 | [rl2_trainer] epoch #82 | Saving snapshot... +2025-03-29 20:00:00 | [rl2_trainer] epoch #82 | Saved +2025-03-29 20:00:00 | [rl2_trainer] epoch #82 | Time 1601.35 s +2025-03-29 20:00:00 | [rl2_trainer] epoch #82 | EpochTime 19.60 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.1285 +Average/AverageReturn -96.8193 +Average/Iteration 82 +Average/MaxReturn -61.5534 +Average/MinReturn -139.259 +Average/NumEpisodes 8 +Average/StdReturn 24.6077 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.985228 +TotalEnvSteps 66400 +__unnamed_task__/AverageDiscountedReturn -60.1285 +__unnamed_task__/AverageReturn -96.8193 +__unnamed_task__/Iteration 82 +__unnamed_task__/MaxReturn -61.5534 +__unnamed_task__/MinReturn -139.259 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.6077 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.66409 +policy/KL 0.0152577 +policy/KLBefore 0 +policy/LossAfter -0.144375 +policy/LossBefore -0.00148623 +policy/dLoss 0.142889 +---------------------------------------- -------------- +2025-03-29 20:00:17 | [rl2_trainer] epoch #83 | Optimizing policy... +2025-03-29 20:00:17 | [rl2_trainer] epoch #83 | Fitting baseline... +2025-03-29 20:00:17 | [rl2_trainer] epoch #83 | Computing loss before +2025-03-29 20:00:17 | [rl2_trainer] epoch #83 | Computing KL before +2025-03-29 20:00:17 | [rl2_trainer] epoch #83 | Optimizing +2025-03-29 20:00:19 | [rl2_trainer] epoch #83 | Computing KL after +2025-03-29 20:00:19 | [rl2_trainer] epoch #83 | Computing loss after +2025-03-29 20:00:19 | [rl2_trainer] epoch #83 | Saving snapshot... +2025-03-29 20:00:19 | [rl2_trainer] epoch #83 | Saved +2025-03-29 20:00:19 | [rl2_trainer] epoch #83 | Time 1620.69 s +2025-03-29 20:00:19 | [rl2_trainer] epoch #83 | EpochTime 19.33 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -54.513 +Average/AverageReturn -82.8077 +Average/Iteration 83 +Average/MaxReturn -64.935 +Average/MinReturn -117.796 +Average/NumEpisodes 8 +Average/StdReturn 14.7263 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974205 +TotalEnvSteps 67200 +__unnamed_task__/AverageDiscountedReturn -54.513 +__unnamed_task__/AverageReturn -82.8077 +__unnamed_task__/Iteration 83 +__unnamed_task__/MaxReturn -64.935 +__unnamed_task__/MinReturn -117.796 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.7263 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.65696 +policy/KL 0.0266523 +policy/KLBefore 0 +policy/LossAfter -0.151545 +policy/LossBefore 0.000634327 +policy/dLoss 0.15218 +---------------------------------------- --------------- +2025-03-29 20:00:36 | [rl2_trainer] epoch #84 | Optimizing policy... +2025-03-29 20:00:36 | [rl2_trainer] epoch #84 | Fitting baseline... +2025-03-29 20:00:36 | [rl2_trainer] epoch #84 | Computing loss before +2025-03-29 20:00:36 | [rl2_trainer] epoch #84 | Computing KL before +2025-03-29 20:00:36 | [rl2_trainer] epoch #84 | Optimizing +2025-03-29 20:00:38 | [rl2_trainer] epoch #84 | Computing KL after +2025-03-29 20:00:38 | [rl2_trainer] epoch #84 | Computing loss after +2025-03-29 20:00:39 | [rl2_trainer] epoch #84 | Saving snapshot... +2025-03-29 20:00:39 | [rl2_trainer] epoch #84 | Saved +2025-03-29 20:00:39 | [rl2_trainer] epoch #84 | Time 1640.00 s +2025-03-29 20:00:39 | [rl2_trainer] epoch #84 | EpochTime 19.30 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -56.5878 +Average/AverageReturn -88.0448 +Average/Iteration 84 +Average/MaxReturn -64.5295 +Average/MinReturn -114.318 +Average/NumEpisodes 8 +Average/StdReturn 15.8431 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.984759 +TotalEnvSteps 68000 +__unnamed_task__/AverageDiscountedReturn -56.5878 +__unnamed_task__/AverageReturn -88.0448 +__unnamed_task__/Iteration 84 +__unnamed_task__/MaxReturn -64.5295 +__unnamed_task__/MinReturn -114.318 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.8431 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.64788 +policy/KL 0.0236284 +policy/KLBefore 0 +policy/LossAfter -0.140953 +policy/LossBefore -0.0133637 +policy/dLoss 0.127589 +---------------------------------------- ------------- +2025-03-29 20:00:58 | [rl2_trainer] epoch #85 | Optimizing policy... +2025-03-29 20:00:58 | [rl2_trainer] epoch #85 | Fitting baseline... +2025-03-29 20:00:58 | [rl2_trainer] epoch #85 | Computing loss before +2025-03-29 20:00:59 | [rl2_trainer] epoch #85 | Computing KL before +2025-03-29 20:00:59 | [rl2_trainer] epoch #85 | Optimizing +2025-03-29 20:01:01 | [rl2_trainer] epoch #85 | Computing KL after +2025-03-29 20:01:02 | [rl2_trainer] epoch #85 | Computing loss after +2025-03-29 20:01:02 | [rl2_trainer] epoch #85 | Saving snapshot... +2025-03-29 20:01:02 | [rl2_trainer] epoch #85 | Saved +2025-03-29 20:01:02 | [rl2_trainer] epoch #85 | Time 1663.12 s +2025-03-29 20:01:02 | [rl2_trainer] epoch #85 | EpochTime 23.13 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -55.6909 +Average/AverageReturn -83.5125 +Average/Iteration 85 +Average/MaxReturn -67.0097 +Average/MinReturn -95.1507 +Average/NumEpisodes 8 +Average/StdReturn 9.57061 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982922 +TotalEnvSteps 68800 +__unnamed_task__/AverageDiscountedReturn -55.6909 +__unnamed_task__/AverageReturn -83.5125 +__unnamed_task__/Iteration 85 +__unnamed_task__/MaxReturn -67.0097 +__unnamed_task__/MinReturn -95.1507 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 9.57061 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.63718 +policy/KL 0.0126479 +policy/KLBefore 0 +policy/LossAfter -0.114249 +policy/LossBefore -0.0168875 +policy/dLoss 0.0973612 +---------------------------------------- ------------- +2025-03-29 20:01:22 | [rl2_trainer] epoch #86 | Optimizing policy... +2025-03-29 20:01:22 | [rl2_trainer] epoch #86 | Fitting baseline... +2025-03-29 20:01:22 | [rl2_trainer] epoch #86 | Computing loss before +2025-03-29 20:01:22 | [rl2_trainer] epoch #86 | Computing KL before +2025-03-29 20:01:22 | [rl2_trainer] epoch #86 | Optimizing +2025-03-29 20:01:24 | [rl2_trainer] epoch #86 | Computing KL after +2025-03-29 20:01:24 | [rl2_trainer] epoch #86 | Computing loss after +2025-03-29 20:01:24 | [rl2_trainer] epoch #86 | Saving snapshot... +2025-03-29 20:01:24 | [rl2_trainer] epoch #86 | Saved +2025-03-29 20:01:24 | [rl2_trainer] epoch #86 | Time 1685.93 s +2025-03-29 20:01:24 | [rl2_trainer] epoch #86 | EpochTime 22.80 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -56.6614 +Average/AverageReturn -87.8623 +Average/Iteration 86 +Average/MaxReturn -70.5705 +Average/MinReturn -110.293 +Average/NumEpisodes 8 +Average/StdReturn 16.2516 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.985514 +TotalEnvSteps 69600 +__unnamed_task__/AverageDiscountedReturn -56.6614 +__unnamed_task__/AverageReturn -87.8623 +__unnamed_task__/Iteration 86 +__unnamed_task__/MaxReturn -70.5705 +__unnamed_task__/MinReturn -110.293 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.2516 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.60808 +policy/KL 0.0154552 +policy/KLBefore 0 +policy/LossAfter -0.100821 +policy/LossBefore 0.0148481 +policy/dLoss 0.115669 +---------------------------------------- ------------- +2025-03-29 20:01:41 | [rl2_trainer] epoch #87 | Optimizing policy... +2025-03-29 20:01:41 | [rl2_trainer] epoch #87 | Fitting baseline... +2025-03-29 20:01:41 | [rl2_trainer] epoch #87 | Computing loss before +2025-03-29 20:01:41 | [rl2_trainer] epoch #87 | Computing KL before +2025-03-29 20:01:41 | [rl2_trainer] epoch #87 | Optimizing +2025-03-29 20:01:44 | [rl2_trainer] epoch #87 | Computing KL after +2025-03-29 20:01:44 | [rl2_trainer] epoch #87 | Computing loss after +2025-03-29 20:01:44 | [rl2_trainer] epoch #87 | Saving snapshot... +2025-03-29 20:01:44 | [rl2_trainer] epoch #87 | Saved +2025-03-29 20:01:44 | [rl2_trainer] epoch #87 | Time 1705.24 s +2025-03-29 20:01:44 | [rl2_trainer] epoch #87 | EpochTime 19.31 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -59.01 +Average/AverageReturn -95.1703 +Average/Iteration 87 +Average/MaxReturn -68.112 +Average/MinReturn -132.208 +Average/NumEpisodes 8 +Average/StdReturn 22.9855 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.971922 +TotalEnvSteps 70400 +__unnamed_task__/AverageDiscountedReturn -59.01 +__unnamed_task__/AverageReturn -95.1703 +__unnamed_task__/Iteration 87 +__unnamed_task__/MaxReturn -68.112 +__unnamed_task__/MinReturn -132.208 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.9855 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.57067 +policy/KL 0.0180074 +policy/KLBefore 0 +policy/LossAfter -0.124696 +policy/LossBefore 0.0259907 +policy/dLoss 0.150687 +---------------------------------------- ------------- +2025-03-29 20:02:01 | [rl2_trainer] epoch #88 | Optimizing policy... +2025-03-29 20:02:01 | [rl2_trainer] epoch #88 | Fitting baseline... +2025-03-29 20:02:01 | [rl2_trainer] epoch #88 | Computing loss before +2025-03-29 20:02:01 | [rl2_trainer] epoch #88 | Computing KL before +2025-03-29 20:02:01 | [rl2_trainer] epoch #88 | Optimizing +2025-03-29 20:02:03 | [rl2_trainer] epoch #88 | Computing KL after +2025-03-29 20:02:03 | [rl2_trainer] epoch #88 | Computing loss after +2025-03-29 20:02:03 | [rl2_trainer] epoch #88 | Saving snapshot... +2025-03-29 20:02:03 | [rl2_trainer] epoch #88 | Saved +2025-03-29 20:02:03 | [rl2_trainer] epoch #88 | Time 1724.78 s +2025-03-29 20:02:03 | [rl2_trainer] epoch #88 | EpochTime 19.53 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -53.89 +Average/AverageReturn -82.2796 +Average/Iteration 88 +Average/MaxReturn -68.6331 +Average/MinReturn -117.886 +Average/NumEpisodes 8 +Average/StdReturn 16.1277 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967865 +TotalEnvSteps 71200 +__unnamed_task__/AverageDiscountedReturn -53.89 +__unnamed_task__/AverageReturn -82.2796 +__unnamed_task__/Iteration 88 +__unnamed_task__/MaxReturn -68.6331 +__unnamed_task__/MinReturn -117.886 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.1277 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55573 +policy/KL 0.0201768 +policy/KLBefore 0 +policy/LossAfter -0.149802 +policy/LossBefore -0.0141395 +policy/dLoss 0.135662 +---------------------------------------- ------------- +2025-03-29 20:02:20 | [rl2_trainer] epoch #89 | Optimizing policy... +2025-03-29 20:02:20 | [rl2_trainer] epoch #89 | Fitting baseline... +2025-03-29 20:02:20 | [rl2_trainer] epoch #89 | Computing loss before +2025-03-29 20:02:20 | [rl2_trainer] epoch #89 | Computing KL before +2025-03-29 20:02:20 | [rl2_trainer] epoch #89 | Optimizing +2025-03-29 20:02:23 | [rl2_trainer] epoch #89 | Computing KL after +2025-03-29 20:02:23 | [rl2_trainer] epoch #89 | Computing loss after +2025-03-29 20:02:23 | [rl2_trainer] epoch #89 | Saving snapshot... +2025-03-29 20:02:23 | [rl2_trainer] epoch #89 | Saved +2025-03-29 20:02:23 | [rl2_trainer] epoch #89 | Time 1744.17 s +2025-03-29 20:02:23 | [rl2_trainer] epoch #89 | EpochTime 19.39 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -56.7024 +Average/AverageReturn -91.0476 +Average/Iteration 89 +Average/MaxReturn -67.8416 +Average/MinReturn -191.891 +Average/NumEpisodes 8 +Average/StdReturn 38.9621 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.93887 +TotalEnvSteps 72000 +__unnamed_task__/AverageDiscountedReturn -56.7024 +__unnamed_task__/AverageReturn -91.0476 +__unnamed_task__/Iteration 89 +__unnamed_task__/MaxReturn -67.8416 +__unnamed_task__/MinReturn -191.891 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 38.9621 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54146 +policy/KL 0.0327043 +policy/KLBefore 0 +policy/LossAfter -0.273641 +policy/LossBefore 0.0123196 +policy/dLoss 0.28596 +---------------------------------------- ------------- +2025-03-29 20:02:40 | [rl2_trainer] epoch #90 | Optimizing policy... +2025-03-29 20:02:40 | [rl2_trainer] epoch #90 | Fitting baseline... +2025-03-29 20:02:40 | [rl2_trainer] epoch #90 | Computing loss before +2025-03-29 20:02:40 | [rl2_trainer] epoch #90 | Computing KL before +2025-03-29 20:02:40 | [rl2_trainer] epoch #90 | Optimizing +2025-03-29 20:02:43 | [rl2_trainer] epoch #90 | Computing KL after +2025-03-29 20:02:43 | [rl2_trainer] epoch #90 | Computing loss after +2025-03-29 20:02:43 | [rl2_trainer] epoch #90 | Saving snapshot... +2025-03-29 20:02:43 | [rl2_trainer] epoch #90 | Saved +2025-03-29 20:02:43 | [rl2_trainer] epoch #90 | Time 1764.18 s +2025-03-29 20:02:43 | [rl2_trainer] epoch #90 | EpochTime 20.01 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -58.9542 +Average/AverageReturn -93.994 +Average/Iteration 90 +Average/MaxReturn -72.1153 +Average/MinReturn -125.213 +Average/NumEpisodes 8 +Average/StdReturn 20.1366 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978851 +TotalEnvSteps 72800 +__unnamed_task__/AverageDiscountedReturn -58.9542 +__unnamed_task__/AverageReturn -93.994 +__unnamed_task__/Iteration 90 +__unnamed_task__/MaxReturn -72.1153 +__unnamed_task__/MinReturn -125.213 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.1366 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.54938 +policy/KL 0.0216917 +policy/KLBefore 0 +policy/LossAfter -0.186886 +policy/LossBefore -0.000858545 +policy/dLoss 0.186027 +---------------------------------------- --------------- +2025-03-29 20:03:00 | [rl2_trainer] epoch #91 | Optimizing policy... +2025-03-29 20:03:00 | [rl2_trainer] epoch #91 | Fitting baseline... +2025-03-29 20:03:00 | [rl2_trainer] epoch #91 | Computing loss before +2025-03-29 20:03:00 | [rl2_trainer] epoch #91 | Computing KL before +2025-03-29 20:03:00 | [rl2_trainer] epoch #91 | Optimizing +2025-03-29 20:03:02 | [rl2_trainer] epoch #91 | Computing KL after +2025-03-29 20:03:02 | [rl2_trainer] epoch #91 | Computing loss after +2025-03-29 20:03:02 | [rl2_trainer] epoch #91 | Saving snapshot... +2025-03-29 20:03:02 | [rl2_trainer] epoch #91 | Saved +2025-03-29 20:03:02 | [rl2_trainer] epoch #91 | Time 1783.78 s +2025-03-29 20:03:02 | [rl2_trainer] epoch #91 | EpochTime 19.59 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -55.9696 +Average/AverageReturn -87.9582 +Average/Iteration 91 +Average/MaxReturn -43.9241 +Average/MinReturn -137.735 +Average/NumEpisodes 8 +Average/StdReturn 27.7006 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.981774 +TotalEnvSteps 73600 +__unnamed_task__/AverageDiscountedReturn -55.9696 +__unnamed_task__/AverageReturn -87.9582 +__unnamed_task__/Iteration 91 +__unnamed_task__/MaxReturn -43.9241 +__unnamed_task__/MinReturn -137.735 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.7006 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55372 +policy/KL 0.0244838 +policy/KLBefore 0 +policy/LossAfter -0.14767 +policy/LossBefore 0.00920229 +policy/dLoss 0.156872 +---------------------------------------- -------------- +2025-03-29 20:03:19 | [rl2_trainer] epoch #92 | Optimizing policy... +2025-03-29 20:03:19 | [rl2_trainer] epoch #92 | Fitting baseline... +2025-03-29 20:03:19 | [rl2_trainer] epoch #92 | Computing loss before +2025-03-29 20:03:19 | [rl2_trainer] epoch #92 | Computing KL before +2025-03-29 20:03:19 | [rl2_trainer] epoch #92 | Optimizing +2025-03-29 20:03:22 | [rl2_trainer] epoch #92 | Computing KL after +2025-03-29 20:03:22 | [rl2_trainer] epoch #92 | Computing loss after +2025-03-29 20:03:22 | [rl2_trainer] epoch #92 | Saving snapshot... +2025-03-29 20:03:22 | [rl2_trainer] epoch #92 | Saved +2025-03-29 20:03:22 | [rl2_trainer] epoch #92 | Time 1803.17 s +2025-03-29 20:03:22 | [rl2_trainer] epoch #92 | EpochTime 19.39 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -58.1555 +Average/AverageReturn -90.9478 +Average/Iteration 92 +Average/MaxReturn -64.889 +Average/MinReturn -183.534 +Average/NumEpisodes 8 +Average/StdReturn 36.6703 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.944318 +TotalEnvSteps 74400 +__unnamed_task__/AverageDiscountedReturn -58.1555 +__unnamed_task__/AverageReturn -90.9478 +__unnamed_task__/Iteration 92 +__unnamed_task__/MaxReturn -64.889 +__unnamed_task__/MinReturn -183.534 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.6703 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55619 +policy/KL 0.0115742 +policy/KLBefore 0 +policy/LossAfter -0.233263 +policy/LossBefore -0.0106608 +policy/dLoss 0.222602 +---------------------------------------- ------------- +2025-03-29 20:03:39 | [rl2_trainer] epoch #93 | Optimizing policy... +2025-03-29 20:03:39 | [rl2_trainer] epoch #93 | Fitting baseline... +2025-03-29 20:03:39 | [rl2_trainer] epoch #93 | Computing loss before +2025-03-29 20:03:39 | [rl2_trainer] epoch #93 | Computing KL before +2025-03-29 20:03:39 | [rl2_trainer] epoch #93 | Optimizing +2025-03-29 20:03:41 | [rl2_trainer] epoch #93 | Computing KL after +2025-03-29 20:03:41 | [rl2_trainer] epoch #93 | Computing loss after +2025-03-29 20:03:41 | [rl2_trainer] epoch #93 | Saving snapshot... +2025-03-29 20:03:41 | [rl2_trainer] epoch #93 | Saved +2025-03-29 20:03:41 | [rl2_trainer] epoch #93 | Time 1822.59 s +2025-03-29 20:03:41 | [rl2_trainer] epoch #93 | EpochTime 19.42 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -56.922 +Average/AverageReturn -84.5234 +Average/Iteration 93 +Average/MaxReturn -68.4341 +Average/MinReturn -112.221 +Average/NumEpisodes 8 +Average/StdReturn 12.9904 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.958525 +TotalEnvSteps 75200 +__unnamed_task__/AverageDiscountedReturn -56.922 +__unnamed_task__/AverageReturn -84.5234 +__unnamed_task__/Iteration 93 +__unnamed_task__/MaxReturn -68.4341 +__unnamed_task__/MinReturn -112.221 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 12.9904 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56026 +policy/KL 0.0181247 +policy/KLBefore 0 +policy/LossAfter -0.166934 +policy/LossBefore -0.0181838 +policy/dLoss 0.14875 +---------------------------------------- ------------- +2025-03-29 20:03:58 | [rl2_trainer] epoch #94 | Optimizing policy... +2025-03-29 20:03:58 | [rl2_trainer] epoch #94 | Fitting baseline... +2025-03-29 20:03:58 | [rl2_trainer] epoch #94 | Computing loss before +2025-03-29 20:03:58 | [rl2_trainer] epoch #94 | Computing KL before +2025-03-29 20:03:58 | [rl2_trainer] epoch #94 | Optimizing +2025-03-29 20:04:01 | [rl2_trainer] epoch #94 | Computing KL after +2025-03-29 20:04:01 | [rl2_trainer] epoch #94 | Computing loss after +2025-03-29 20:04:01 | [rl2_trainer] epoch #94 | Saving snapshot... +2025-03-29 20:04:01 | [rl2_trainer] epoch #94 | Saved +2025-03-29 20:04:01 | [rl2_trainer] epoch #94 | Time 1842.11 s +2025-03-29 20:04:01 | [rl2_trainer] epoch #94 | EpochTime 19.52 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -60.7412 +Average/AverageReturn -97.9856 +Average/Iteration 94 +Average/MaxReturn -72.0793 +Average/MinReturn -136.686 +Average/NumEpisodes 8 +Average/StdReturn 23.9384 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982533 +TotalEnvSteps 76000 +__unnamed_task__/AverageDiscountedReturn -60.7412 +__unnamed_task__/AverageReturn -97.9856 +__unnamed_task__/Iteration 94 +__unnamed_task__/MaxReturn -72.0793 +__unnamed_task__/MinReturn -136.686 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.9384 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56536 +policy/KL 0.014231 +policy/KLBefore 0 +policy/LossAfter -0.163859 +policy/LossBefore -0.0141895 +policy/dLoss 0.149669 +---------------------------------------- ------------- +2025-03-29 20:04:18 | [rl2_trainer] epoch #95 | Optimizing policy... +2025-03-29 20:04:18 | [rl2_trainer] epoch #95 | Fitting baseline... +2025-03-29 20:04:18 | [rl2_trainer] epoch #95 | Computing loss before +2025-03-29 20:04:18 | [rl2_trainer] epoch #95 | Computing KL before +2025-03-29 20:04:18 | [rl2_trainer] epoch #95 | Optimizing +2025-03-29 20:04:20 | [rl2_trainer] epoch #95 | Computing KL after +2025-03-29 20:04:20 | [rl2_trainer] epoch #95 | Computing loss after +2025-03-29 20:04:20 | [rl2_trainer] epoch #95 | Saving snapshot... +2025-03-29 20:04:20 | [rl2_trainer] epoch #95 | Saved +2025-03-29 20:04:20 | [rl2_trainer] epoch #95 | Time 1861.70 s +2025-03-29 20:04:20 | [rl2_trainer] epoch #95 | EpochTime 19.59 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -56.7039 +Average/AverageReturn -85.0556 +Average/Iteration 95 +Average/MaxReturn -74.191 +Average/MinReturn -121.743 +Average/NumEpisodes 8 +Average/StdReturn 14.2849 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979821 +TotalEnvSteps 76800 +__unnamed_task__/AverageDiscountedReturn -56.7039 +__unnamed_task__/AverageReturn -85.0556 +__unnamed_task__/Iteration 95 +__unnamed_task__/MaxReturn -74.191 +__unnamed_task__/MinReturn -121.743 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.2849 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5599 +policy/KL 0.0159587 +policy/KLBefore 0 +policy/LossAfter -0.147447 +policy/LossBefore -0.0122064 +policy/dLoss 0.13524 +---------------------------------------- ------------- +2025-03-29 20:04:37 | [rl2_trainer] epoch #96 | Optimizing policy... +2025-03-29 20:04:37 | [rl2_trainer] epoch #96 | Fitting baseline... +2025-03-29 20:04:37 | [rl2_trainer] epoch #96 | Computing loss before +2025-03-29 20:04:37 | [rl2_trainer] epoch #96 | Computing KL before +2025-03-29 20:04:37 | [rl2_trainer] epoch #96 | Optimizing +2025-03-29 20:04:39 | [rl2_trainer] epoch #96 | Computing KL after +2025-03-29 20:04:39 | [rl2_trainer] epoch #96 | Computing loss after +2025-03-29 20:04:39 | [rl2_trainer] epoch #96 | Saving snapshot... +2025-03-29 20:04:39 | [rl2_trainer] epoch #96 | Saved +2025-03-29 20:04:39 | [rl2_trainer] epoch #96 | Time 1880.77 s +2025-03-29 20:04:39 | [rl2_trainer] epoch #96 | EpochTime 19.06 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -61.3353 +Average/AverageReturn -96.7999 +Average/Iteration 96 +Average/MaxReturn -73.4901 +Average/MinReturn -123.962 +Average/NumEpisodes 8 +Average/StdReturn 16.7615 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978055 +TotalEnvSteps 77600 +__unnamed_task__/AverageDiscountedReturn -61.3353 +__unnamed_task__/AverageReturn -96.7999 +__unnamed_task__/Iteration 96 +__unnamed_task__/MaxReturn -73.4901 +__unnamed_task__/MinReturn -123.962 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.7615 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55989 +policy/KL 0.0142844 +policy/KLBefore 0 +policy/LossAfter -0.127945 +policy/LossBefore -0.000315075 +policy/dLoss 0.12763 +---------------------------------------- --------------- +2025-03-29 20:04:58 | [rl2_trainer] epoch #97 | Optimizing policy... +2025-03-29 20:04:59 | [rl2_trainer] epoch #97 | Fitting baseline... +2025-03-29 20:04:59 | [rl2_trainer] epoch #97 | Computing loss before +2025-03-29 20:04:59 | [rl2_trainer] epoch #97 | Computing KL before +2025-03-29 20:04:59 | [rl2_trainer] epoch #97 | Optimizing +2025-03-29 20:05:01 | [rl2_trainer] epoch #97 | Computing KL after +2025-03-29 20:05:01 | [rl2_trainer] epoch #97 | Computing loss after +2025-03-29 20:05:02 | [rl2_trainer] epoch #97 | Saving snapshot... +2025-03-29 20:05:02 | [rl2_trainer] epoch #97 | Saved +2025-03-29 20:05:02 | [rl2_trainer] epoch #97 | Time 1903.02 s +2025-03-29 20:05:02 | [rl2_trainer] epoch #97 | EpochTime 22.25 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -62.3146 +Average/AverageReturn -98.4669 +Average/Iteration 97 +Average/MaxReturn -67.5312 +Average/MinReturn -163.665 +Average/NumEpisodes 8 +Average/StdReturn 28.9516 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.965939 +TotalEnvSteps 78400 +__unnamed_task__/AverageDiscountedReturn -62.3146 +__unnamed_task__/AverageReturn -98.4669 +__unnamed_task__/Iteration 97 +__unnamed_task__/MaxReturn -67.5312 +__unnamed_task__/MinReturn -163.665 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.9516 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56016 +policy/KL 0.0246908 +policy/KLBefore 0 +policy/LossAfter -0.255682 +policy/LossBefore 0.00501039 +policy/dLoss 0.260693 +---------------------------------------- -------------- +2025-03-29 20:05:21 | [rl2_trainer] epoch #98 | Optimizing policy... +2025-03-29 20:05:21 | [rl2_trainer] epoch #98 | Fitting baseline... +2025-03-29 20:05:21 | [rl2_trainer] epoch #98 | Computing loss before +2025-03-29 20:05:21 | [rl2_trainer] epoch #98 | Computing KL before +2025-03-29 20:05:21 | [rl2_trainer] epoch #98 | Optimizing +2025-03-29 20:05:23 | [rl2_trainer] epoch #98 | Computing KL after +2025-03-29 20:05:23 | [rl2_trainer] epoch #98 | Computing loss after +2025-03-29 20:05:23 | [rl2_trainer] epoch #98 | Saving snapshot... +2025-03-29 20:05:23 | [rl2_trainer] epoch #98 | Saved +2025-03-29 20:05:23 | [rl2_trainer] epoch #98 | Time 1924.80 s +2025-03-29 20:05:23 | [rl2_trainer] epoch #98 | EpochTime 21.78 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -62.2413 +Average/AverageReturn -100.446 +Average/Iteration 98 +Average/MaxReturn -70.799 +Average/MinReturn -207.483 +Average/NumEpisodes 8 +Average/StdReturn 43.913 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.97107 +TotalEnvSteps 79200 +__unnamed_task__/AverageDiscountedReturn -62.2413 +__unnamed_task__/AverageReturn -100.446 +__unnamed_task__/Iteration 98 +__unnamed_task__/MaxReturn -70.799 +__unnamed_task__/MinReturn -207.483 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 43.913 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56407 +policy/KL 0.0249675 +policy/KLBefore 0 +policy/LossAfter -0.37221 +policy/LossBefore 0.00269602 +policy/dLoss 0.374907 +---------------------------------------- -------------- +2025-03-29 20:05:40 | [rl2_trainer] epoch #99 | Optimizing policy... +2025-03-29 20:05:40 | [rl2_trainer] epoch #99 | Fitting baseline... +2025-03-29 20:05:40 | [rl2_trainer] epoch #99 | Computing loss before +2025-03-29 20:05:40 | [rl2_trainer] epoch #99 | Computing KL before +2025-03-29 20:05:40 | [rl2_trainer] epoch #99 | Optimizing +2025-03-29 20:05:43 | [rl2_trainer] epoch #99 | Computing KL after +2025-03-29 20:05:43 | [rl2_trainer] epoch #99 | Computing loss after +2025-03-29 20:05:43 | [rl2_trainer] epoch #99 | Saving snapshot... +2025-03-29 20:05:43 | [rl2_trainer] epoch #99 | Saved +2025-03-29 20:05:43 | [rl2_trainer] epoch #99 | Time 1944.21 s +2025-03-29 20:05:43 | [rl2_trainer] epoch #99 | EpochTime 19.40 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -55.1159 +Average/AverageReturn -82.9236 +Average/Iteration 99 +Average/MaxReturn -67.7467 +Average/MinReturn -112.531 +Average/NumEpisodes 8 +Average/StdReturn 16.0885 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978676 +TotalEnvSteps 80000 +__unnamed_task__/AverageDiscountedReturn -55.1159 +__unnamed_task__/AverageReturn -82.9236 +__unnamed_task__/Iteration 99 +__unnamed_task__/MaxReturn -67.7467 +__unnamed_task__/MinReturn -112.531 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.0885 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.56075 +policy/KL 0.0216854 +policy/KLBefore 0 +policy/LossAfter -0.124943 +policy/LossBefore 0.00019568 +policy/dLoss 0.125138 +---------------------------------------- -------------- +2025-03-29 20:06:00 | [rl2_trainer] epoch #100 | Optimizing policy... +2025-03-29 20:06:00 | [rl2_trainer] epoch #100 | Fitting baseline... +2025-03-29 20:06:00 | [rl2_trainer] epoch #100 | Computing loss before +2025-03-29 20:06:00 | [rl2_trainer] epoch #100 | Computing KL before +2025-03-29 20:06:00 | [rl2_trainer] epoch #100 | Optimizing +2025-03-29 20:06:02 | [rl2_trainer] epoch #100 | Computing KL after +2025-03-29 20:06:02 | [rl2_trainer] epoch #100 | Computing loss after +2025-03-29 20:06:02 | [rl2_trainer] epoch #100 | Saving snapshot... +2025-03-29 20:06:02 | [rl2_trainer] epoch #100 | Saved +2025-03-29 20:06:02 | [rl2_trainer] epoch #100 | Time 1963.79 s +2025-03-29 20:06:02 | [rl2_trainer] epoch #100 | EpochTime 19.58 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.9118 +Average/AverageReturn -95.0815 +Average/Iteration 100 +Average/MaxReturn -75.9669 +Average/MinReturn -124.744 +Average/NumEpisodes 8 +Average/StdReturn 15.7034 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96553 +TotalEnvSteps 80800 +__unnamed_task__/AverageDiscountedReturn -60.9118 +__unnamed_task__/AverageReturn -95.0815 +__unnamed_task__/Iteration 100 +__unnamed_task__/MaxReturn -75.9669 +__unnamed_task__/MinReturn -124.744 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.7034 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.55768 +policy/KL 0.0187003 +policy/KLBefore 0 +policy/LossAfter -0.123184 +policy/LossBefore -0.00424056 +policy/dLoss 0.118944 +---------------------------------------- -------------- +2025-03-29 20:06:20 | [rl2_trainer] epoch #101 | Optimizing policy... +2025-03-29 20:06:20 | [rl2_trainer] epoch #101 | Fitting baseline... +2025-03-29 20:06:20 | [rl2_trainer] epoch #101 | Computing loss before +2025-03-29 20:06:20 | [rl2_trainer] epoch #101 | Computing KL before +2025-03-29 20:06:21 | [rl2_trainer] epoch #101 | Optimizing +2025-03-29 20:06:23 | [rl2_trainer] epoch #101 | Computing KL after +2025-03-29 20:06:23 | [rl2_trainer] epoch #101 | Computing loss after +2025-03-29 20:06:23 | [rl2_trainer] epoch #101 | Saving snapshot... +2025-03-29 20:06:23 | [rl2_trainer] epoch #101 | Saved +2025-03-29 20:06:23 | [rl2_trainer] epoch #101 | Time 1984.84 s +2025-03-29 20:06:23 | [rl2_trainer] epoch #101 | EpochTime 21.04 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -66.6794 +Average/AverageReturn -110.25 +Average/Iteration 101 +Average/MaxReturn -71.2594 +Average/MinReturn -151.327 +Average/NumEpisodes 8 +Average/StdReturn 25.4974 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972197 +TotalEnvSteps 81600 +__unnamed_task__/AverageDiscountedReturn -66.6794 +__unnamed_task__/AverageReturn -110.25 +__unnamed_task__/Iteration 101 +__unnamed_task__/MaxReturn -71.2594 +__unnamed_task__/MinReturn -151.327 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.4974 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5516 +policy/KL 0.0244717 +policy/KLBefore 0 +policy/LossAfter -0.195312 +policy/LossBefore -0.0222592 +policy/dLoss 0.173053 +---------------------------------------- ------------- +2025-03-29 20:06:44 | [rl2_trainer] epoch #102 | Optimizing policy... +2025-03-29 20:06:44 | [rl2_trainer] epoch #102 | Fitting baseline... +2025-03-29 20:06:44 | [rl2_trainer] epoch #102 | Computing loss before +2025-03-29 20:06:44 | [rl2_trainer] epoch #102 | Computing KL before +2025-03-29 20:06:44 | [rl2_trainer] epoch #102 | Optimizing +2025-03-29 20:06:46 | [rl2_trainer] epoch #102 | Computing KL after +2025-03-29 20:06:46 | [rl2_trainer] epoch #102 | Computing loss after +2025-03-29 20:06:47 | [rl2_trainer] epoch #102 | Saving snapshot... +2025-03-29 20:06:47 | [rl2_trainer] epoch #102 | Saved +2025-03-29 20:06:47 | [rl2_trainer] epoch #102 | Time 2008.00 s +2025-03-29 20:06:47 | [rl2_trainer] epoch #102 | EpochTime 23.16 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -59.6324 +Average/AverageReturn -89.1519 +Average/Iteration 102 +Average/MaxReturn -72.534 +Average/MinReturn -113.723 +Average/NumEpisodes 8 +Average/StdReturn 15.0483 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.981545 +TotalEnvSteps 82400 +__unnamed_task__/AverageDiscountedReturn -59.6324 +__unnamed_task__/AverageReturn -89.1519 +__unnamed_task__/Iteration 102 +__unnamed_task__/MaxReturn -72.534 +__unnamed_task__/MinReturn -113.723 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.0483 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.53679 +policy/KL 0.0223693 +policy/KLBefore 0 +policy/LossAfter -0.121849 +policy/LossBefore 0.000131817 +policy/dLoss 0.121981 +---------------------------------------- --------------- +2025-03-29 20:07:05 | [rl2_trainer] epoch #103 | Optimizing policy... +2025-03-29 20:07:05 | [rl2_trainer] epoch #103 | Fitting baseline... +2025-03-29 20:07:05 | [rl2_trainer] epoch #103 | Computing loss before +2025-03-29 20:07:05 | [rl2_trainer] epoch #103 | Computing KL before +2025-03-29 20:07:05 | [rl2_trainer] epoch #103 | Optimizing +2025-03-29 20:07:07 | [rl2_trainer] epoch #103 | Computing KL after +2025-03-29 20:07:07 | [rl2_trainer] epoch #103 | Computing loss after +2025-03-29 20:07:07 | [rl2_trainer] epoch #103 | Saving snapshot... +2025-03-29 20:07:07 | [rl2_trainer] epoch #103 | Saved +2025-03-29 20:07:07 | [rl2_trainer] epoch #103 | Time 2028.88 s +2025-03-29 20:07:07 | [rl2_trainer] epoch #103 | EpochTime 20.88 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -59.0281 +Average/AverageReturn -91.163 +Average/Iteration 103 +Average/MaxReturn -68.5244 +Average/MinReturn -112.684 +Average/NumEpisodes 8 +Average/StdReturn 14.0102 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.986633 +TotalEnvSteps 83200 +__unnamed_task__/AverageDiscountedReturn -59.0281 +__unnamed_task__/AverageReturn -91.163 +__unnamed_task__/Iteration 103 +__unnamed_task__/MaxReturn -68.5244 +__unnamed_task__/MinReturn -112.684 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.0102 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.52653 +policy/KL 0.0254217 +policy/KLBefore 0 +policy/LossAfter -0.112353 +policy/LossBefore -0.013109 +policy/dLoss 0.0992441 +---------------------------------------- ------------- +2025-03-29 20:07:27 | [rl2_trainer] epoch #104 | Optimizing policy... +2025-03-29 20:07:27 | [rl2_trainer] epoch #104 | Fitting baseline... +2025-03-29 20:07:27 | [rl2_trainer] epoch #104 | Computing loss before +2025-03-29 20:07:27 | [rl2_trainer] epoch #104 | Computing KL before +2025-03-29 20:07:27 | [rl2_trainer] epoch #104 | Optimizing +2025-03-29 20:07:30 | [rl2_trainer] epoch #104 | Computing KL after +2025-03-29 20:07:30 | [rl2_trainer] epoch #104 | Computing loss after +2025-03-29 20:07:30 | [rl2_trainer] epoch #104 | Saving snapshot... +2025-03-29 20:07:30 | [rl2_trainer] epoch #104 | Saved +2025-03-29 20:07:30 | [rl2_trainer] epoch #104 | Time 2051.20 s +2025-03-29 20:07:30 | [rl2_trainer] epoch #104 | EpochTime 22.32 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -59.077 +Average/AverageReturn -89.5714 +Average/Iteration 104 +Average/MaxReturn -76.0905 +Average/MinReturn -110.236 +Average/NumEpisodes 8 +Average/StdReturn 11.8911 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968778 +TotalEnvSteps 84000 +__unnamed_task__/AverageDiscountedReturn -59.077 +__unnamed_task__/AverageReturn -89.5714 +__unnamed_task__/Iteration 104 +__unnamed_task__/MaxReturn -76.0905 +__unnamed_task__/MinReturn -110.236 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 11.8911 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.52183 +policy/KL 0.0148834 +policy/KLBefore 0 +policy/LossAfter -0.165658 +policy/LossBefore -0.00939385 +policy/dLoss 0.156265 +---------------------------------------- -------------- +2025-03-29 20:07:50 | [rl2_trainer] epoch #105 | Optimizing policy... +2025-03-29 20:07:50 | [rl2_trainer] epoch #105 | Fitting baseline... +2025-03-29 20:07:50 | [rl2_trainer] epoch #105 | Computing loss before +2025-03-29 20:07:50 | [rl2_trainer] epoch #105 | Computing KL before +2025-03-29 20:07:50 | [rl2_trainer] epoch #105 | Optimizing +2025-03-29 20:07:52 | [rl2_trainer] epoch #105 | Computing KL after +2025-03-29 20:07:52 | [rl2_trainer] epoch #105 | Computing loss after +2025-03-29 20:07:52 | [rl2_trainer] epoch #105 | Saving snapshot... +2025-03-29 20:07:52 | [rl2_trainer] epoch #105 | Saved +2025-03-29 20:07:52 | [rl2_trainer] epoch #105 | Time 2073.59 s +2025-03-29 20:07:52 | [rl2_trainer] epoch #105 | EpochTime 22.38 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -63.7335 +Average/AverageReturn -99.0756 +Average/Iteration 105 +Average/MaxReturn -68.7149 +Average/MinReturn -169.71 +Average/NumEpisodes 8 +Average/StdReturn 30.5362 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.959662 +TotalEnvSteps 84800 +__unnamed_task__/AverageDiscountedReturn -63.7335 +__unnamed_task__/AverageReturn -99.0756 +__unnamed_task__/Iteration 105 +__unnamed_task__/MaxReturn -68.7149 +__unnamed_task__/MinReturn -169.71 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.5362 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.51516 +policy/KL 0.0205661 +policy/KLBefore 0 +policy/LossAfter -0.196022 +policy/LossBefore -0.00511665 +policy/dLoss 0.190905 +---------------------------------------- -------------- +2025-03-29 20:08:09 | [rl2_trainer] epoch #106 | Optimizing policy... +2025-03-29 20:08:09 | [rl2_trainer] epoch #106 | Fitting baseline... +2025-03-29 20:08:09 | [rl2_trainer] epoch #106 | Computing loss before +2025-03-29 20:08:09 | [rl2_trainer] epoch #106 | Computing KL before +2025-03-29 20:08:10 | [rl2_trainer] epoch #106 | Optimizing +2025-03-29 20:08:12 | [rl2_trainer] epoch #106 | Computing KL after +2025-03-29 20:08:12 | [rl2_trainer] epoch #106 | Computing loss after +2025-03-29 20:08:12 | [rl2_trainer] epoch #106 | Saving snapshot... +2025-03-29 20:08:12 | [rl2_trainer] epoch #106 | Saved +2025-03-29 20:08:12 | [rl2_trainer] epoch #106 | Time 2093.20 s +2025-03-29 20:08:12 | [rl2_trainer] epoch #106 | EpochTime 19.61 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -42.9252 +Average/AverageReturn -71.5659 +Average/Iteration 106 +Average/MaxReturn 129.693 +Average/MinReturn -190.916 +Average/NumEpisodes 8 +Average/StdReturn 83.8573 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977926 +TotalEnvSteps 85600 +__unnamed_task__/AverageDiscountedReturn -42.9252 +__unnamed_task__/AverageReturn -71.5659 +__unnamed_task__/Iteration 106 +__unnamed_task__/MaxReturn 129.693 +__unnamed_task__/MinReturn -190.916 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 83.8573 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.50343 +policy/KL 0.0416163 +policy/KLBefore 0 +policy/LossAfter -0.570225 +policy/LossBefore 0.000414457 +policy/dLoss 0.57064 +---------------------------------------- --------------- +2025-03-29 20:08:28 | [rl2_trainer] epoch #107 | Optimizing policy... +2025-03-29 20:08:28 | [rl2_trainer] epoch #107 | Fitting baseline... +2025-03-29 20:08:28 | [rl2_trainer] epoch #107 | Computing loss before +2025-03-29 20:08:28 | [rl2_trainer] epoch #107 | Computing KL before +2025-03-29 20:08:28 | [rl2_trainer] epoch #107 | Optimizing +2025-03-29 20:08:30 | [rl2_trainer] epoch #107 | Computing KL after +2025-03-29 20:08:31 | [rl2_trainer] epoch #107 | Computing loss after +2025-03-29 20:08:31 | [rl2_trainer] epoch #107 | Saving snapshot... +2025-03-29 20:08:31 | [rl2_trainer] epoch #107 | Saved +2025-03-29 20:08:31 | [rl2_trainer] epoch #107 | Time 2112.05 s +2025-03-29 20:08:31 | [rl2_trainer] epoch #107 | EpochTime 18.85 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -70.9177 +Average/AverageReturn -115.76 +Average/Iteration 107 +Average/MaxReturn -70.4943 +Average/MinReturn -169.467 +Average/NumEpisodes 8 +Average/StdReturn 32.1093 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974495 +TotalEnvSteps 86400 +__unnamed_task__/AverageDiscountedReturn -70.9177 +__unnamed_task__/AverageReturn -115.76 +__unnamed_task__/Iteration 107 +__unnamed_task__/MaxReturn -70.4943 +__unnamed_task__/MinReturn -169.467 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.1093 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.50456 +policy/KL 0.027261 +policy/KLBefore 0 +policy/LossAfter -0.238653 +policy/LossBefore 0.0388852 +policy/dLoss 0.277538 +---------------------------------------- ------------- +2025-03-29 20:08:48 | [rl2_trainer] epoch #108 | Optimizing policy... +2025-03-29 20:08:48 | [rl2_trainer] epoch #108 | Fitting baseline... +2025-03-29 20:08:48 | [rl2_trainer] epoch #108 | Computing loss before +2025-03-29 20:08:48 | [rl2_trainer] epoch #108 | Computing KL before +2025-03-29 20:08:48 | [rl2_trainer] epoch #108 | Optimizing +2025-03-29 20:08:50 | [rl2_trainer] epoch #108 | Computing KL after +2025-03-29 20:08:50 | [rl2_trainer] epoch #108 | Computing loss after +2025-03-29 20:08:50 | [rl2_trainer] epoch #108 | Saving snapshot... +2025-03-29 20:08:50 | [rl2_trainer] epoch #108 | Saved +2025-03-29 20:08:50 | [rl2_trainer] epoch #108 | Time 2131.55 s +2025-03-29 20:08:50 | [rl2_trainer] epoch #108 | EpochTime 19.50 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.5642 +Average/AverageReturn -101.65 +Average/Iteration 108 +Average/MaxReturn -74.4114 +Average/MinReturn -149.51 +Average/NumEpisodes 8 +Average/StdReturn 22.0609 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983382 +TotalEnvSteps 87200 +__unnamed_task__/AverageDiscountedReturn -63.5642 +__unnamed_task__/AverageReturn -101.65 +__unnamed_task__/Iteration 108 +__unnamed_task__/MaxReturn -74.4114 +__unnamed_task__/MinReturn -149.51 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.0609 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.50609 +policy/KL 0.0335249 +policy/KLBefore 0 +policy/LossAfter -0.119793 +policy/LossBefore -0.000457439 +policy/dLoss 0.119335 +---------------------------------------- --------------- +2025-03-29 20:09:06 | [rl2_trainer] epoch #109 | Optimizing policy... +2025-03-29 20:09:06 | [rl2_trainer] epoch #109 | Fitting baseline... +2025-03-29 20:09:06 | [rl2_trainer] epoch #109 | Computing loss before +2025-03-29 20:09:06 | [rl2_trainer] epoch #109 | Computing KL before +2025-03-29 20:09:06 | [rl2_trainer] epoch #109 | Optimizing +2025-03-29 20:09:09 | [rl2_trainer] epoch #109 | Computing KL after +2025-03-29 20:09:09 | [rl2_trainer] epoch #109 | Computing loss after +2025-03-29 20:09:09 | [rl2_trainer] epoch #109 | Saving snapshot... +2025-03-29 20:09:09 | [rl2_trainer] epoch #109 | Saved +2025-03-29 20:09:09 | [rl2_trainer] epoch #109 | Time 2150.34 s +2025-03-29 20:09:09 | [rl2_trainer] epoch #109 | EpochTime 18.79 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -72.3202 +Average/AverageReturn -115.669 +Average/Iteration 109 +Average/MaxReturn -76.0981 +Average/MinReturn -182.912 +Average/NumEpisodes 8 +Average/StdReturn 35.4959 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.976723 +TotalEnvSteps 88000 +__unnamed_task__/AverageDiscountedReturn -72.3202 +__unnamed_task__/AverageReturn -115.669 +__unnamed_task__/Iteration 109 +__unnamed_task__/MaxReturn -76.0981 +__unnamed_task__/MinReturn -182.912 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.4959 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.51541 +policy/KL 0.0189362 +policy/KLBefore 0 +policy/LossAfter -0.233862 +policy/LossBefore -0.0166835 +policy/dLoss 0.217179 +---------------------------------------- ------------- +2025-03-29 20:09:27 | [rl2_trainer] epoch #110 | Optimizing policy... +2025-03-29 20:09:27 | [rl2_trainer] epoch #110 | Fitting baseline... +2025-03-29 20:09:27 | [rl2_trainer] epoch #110 | Computing loss before +2025-03-29 20:09:27 | [rl2_trainer] epoch #110 | Computing KL before +2025-03-29 20:09:28 | [rl2_trainer] epoch #110 | Optimizing +2025-03-29 20:09:30 | [rl2_trainer] epoch #110 | Computing KL after +2025-03-29 20:09:30 | [rl2_trainer] epoch #110 | Computing loss after +2025-03-29 20:09:30 | [rl2_trainer] epoch #110 | Saving snapshot... +2025-03-29 20:09:30 | [rl2_trainer] epoch #110 | Saved +2025-03-29 20:09:30 | [rl2_trainer] epoch #110 | Time 2171.80 s +2025-03-29 20:09:30 | [rl2_trainer] epoch #110 | EpochTime 21.46 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -63.0074 +Average/AverageReturn -98.3394 +Average/Iteration 110 +Average/MaxReturn -74.3109 +Average/MinReturn -132.354 +Average/NumEpisodes 8 +Average/StdReturn 16.7432 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.985251 +TotalEnvSteps 88800 +__unnamed_task__/AverageDiscountedReturn -63.0074 +__unnamed_task__/AverageReturn -98.3394 +__unnamed_task__/Iteration 110 +__unnamed_task__/MaxReturn -74.3109 +__unnamed_task__/MinReturn -132.354 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.7432 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5163 +policy/KL 0.0201796 +policy/KLBefore 0 +policy/LossAfter -0.119428 +policy/LossBefore -0.00584105 +policy/dLoss 0.113587 +---------------------------------------- -------------- +2025-03-29 20:09:48 | [rl2_trainer] epoch #111 | Optimizing policy... +2025-03-29 20:09:48 | [rl2_trainer] epoch #111 | Fitting baseline... +2025-03-29 20:09:48 | [rl2_trainer] epoch #111 | Computing loss before +2025-03-29 20:09:48 | [rl2_trainer] epoch #111 | Computing KL before +2025-03-29 20:09:48 | [rl2_trainer] epoch #111 | Optimizing +2025-03-29 20:09:51 | [rl2_trainer] epoch #111 | Computing KL after +2025-03-29 20:09:51 | [rl2_trainer] epoch #111 | Computing loss after +2025-03-29 20:09:51 | [rl2_trainer] epoch #111 | Saving snapshot... +2025-03-29 20:09:51 | [rl2_trainer] epoch #111 | Saved +2025-03-29 20:09:51 | [rl2_trainer] epoch #111 | Time 2192.60 s +2025-03-29 20:09:51 | [rl2_trainer] epoch #111 | EpochTime 20.80 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -75.6036 +Average/AverageReturn -125.463 +Average/Iteration 111 +Average/MaxReturn -73.4134 +Average/MinReturn -177.938 +Average/NumEpisodes 8 +Average/StdReturn 35.7273 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.98196 +TotalEnvSteps 89600 +__unnamed_task__/AverageDiscountedReturn -75.6036 +__unnamed_task__/AverageReturn -125.463 +__unnamed_task__/Iteration 111 +__unnamed_task__/MaxReturn -73.4134 +__unnamed_task__/MinReturn -177.938 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.7273 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.50807 +policy/KL 0.0350799 +policy/KLBefore 0 +policy/LossAfter -0.292675 +policy/LossBefore -0.0111456 +policy/dLoss 0.281529 +---------------------------------------- ------------- +2025-03-29 20:10:09 | [rl2_trainer] epoch #112 | Optimizing policy... +2025-03-29 20:10:09 | [rl2_trainer] epoch #112 | Fitting baseline... +2025-03-29 20:10:09 | [rl2_trainer] epoch #112 | Computing loss before +2025-03-29 20:10:09 | [rl2_trainer] epoch #112 | Computing KL before +2025-03-29 20:10:09 | [rl2_trainer] epoch #112 | Optimizing +2025-03-29 20:10:11 | [rl2_trainer] epoch #112 | Computing KL after +2025-03-29 20:10:11 | [rl2_trainer] epoch #112 | Computing loss after +2025-03-29 20:10:12 | [rl2_trainer] epoch #112 | Saving snapshot... +2025-03-29 20:10:12 | [rl2_trainer] epoch #112 | Saved +2025-03-29 20:10:12 | [rl2_trainer] epoch #112 | Time 2212.98 s +2025-03-29 20:10:12 | [rl2_trainer] epoch #112 | EpochTime 20.38 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.8262 +Average/AverageReturn -92.8164 +Average/Iteration 112 +Average/MaxReturn -71.4176 +Average/MinReturn -141.648 +Average/NumEpisodes 8 +Average/StdReturn 21.133 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.976591 +TotalEnvSteps 90400 +__unnamed_task__/AverageDiscountedReturn -60.8262 +__unnamed_task__/AverageReturn -92.8164 +__unnamed_task__/Iteration 112 +__unnamed_task__/MaxReturn -71.4176 +__unnamed_task__/MinReturn -141.648 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.133 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.49862 +policy/KL 0.0316944 +policy/KLBefore 0 +policy/LossAfter -0.158163 +policy/LossBefore -0.00474336 +policy/dLoss 0.15342 +---------------------------------------- -------------- +2025-03-29 20:10:32 | [rl2_trainer] epoch #113 | Optimizing policy... +2025-03-29 20:10:32 | [rl2_trainer] epoch #113 | Fitting baseline... +2025-03-29 20:10:32 | [rl2_trainer] epoch #113 | Computing loss before +2025-03-29 20:10:32 | [rl2_trainer] epoch #113 | Computing KL before +2025-03-29 20:10:32 | [rl2_trainer] epoch #113 | Optimizing +2025-03-29 20:10:35 | [rl2_trainer] epoch #113 | Computing KL after +2025-03-29 20:10:35 | [rl2_trainer] epoch #113 | Computing loss after +2025-03-29 20:10:35 | [rl2_trainer] epoch #113 | Saving snapshot... +2025-03-29 20:10:35 | [rl2_trainer] epoch #113 | Saved +2025-03-29 20:10:35 | [rl2_trainer] epoch #113 | Time 2236.70 s +2025-03-29 20:10:35 | [rl2_trainer] epoch #113 | EpochTime 23.71 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -66.2307 +Average/AverageReturn -105.14 +Average/Iteration 113 +Average/MaxReturn -72.4106 +Average/MinReturn -168.333 +Average/NumEpisodes 8 +Average/StdReturn 27.4373 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975556 +TotalEnvSteps 91200 +__unnamed_task__/AverageDiscountedReturn -66.2307 +__unnamed_task__/AverageReturn -105.14 +__unnamed_task__/Iteration 113 +__unnamed_task__/MaxReturn -72.4106 +__unnamed_task__/MinReturn -168.333 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.4373 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.48961 +policy/KL 0.0239973 +policy/KLBefore 0 +policy/LossAfter -0.174806 +policy/LossBefore 0.0305283 +policy/dLoss 0.205334 +---------------------------------------- ------------- +2025-03-29 20:10:55 | [rl2_trainer] epoch #114 | Optimizing policy... +2025-03-29 20:10:55 | [rl2_trainer] epoch #114 | Fitting baseline... +2025-03-29 20:10:55 | [rl2_trainer] epoch #114 | Computing loss before +2025-03-29 20:10:55 | [rl2_trainer] epoch #114 | Computing KL before +2025-03-29 20:10:55 | [rl2_trainer] epoch #114 | Optimizing +2025-03-29 20:10:58 | [rl2_trainer] epoch #114 | Computing KL after +2025-03-29 20:10:58 | [rl2_trainer] epoch #114 | Computing loss after +2025-03-29 20:10:58 | [rl2_trainer] epoch #114 | Saving snapshot... +2025-03-29 20:10:58 | [rl2_trainer] epoch #114 | Saved +2025-03-29 20:10:58 | [rl2_trainer] epoch #114 | Time 2259.32 s +2025-03-29 20:10:58 | [rl2_trainer] epoch #114 | EpochTime 22.62 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -64.1424 +Average/AverageReturn -98.3598 +Average/Iteration 114 +Average/MaxReturn -84.0277 +Average/MinReturn -111.423 +Average/NumEpisodes 8 +Average/StdReturn 9.91524 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969521 +TotalEnvSteps 92000 +__unnamed_task__/AverageDiscountedReturn -64.1424 +__unnamed_task__/AverageReturn -98.3598 +__unnamed_task__/Iteration 114 +__unnamed_task__/MaxReturn -84.0277 +__unnamed_task__/MinReturn -111.423 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 9.91524 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.47816 +policy/KL 0.0229969 +policy/KLBefore 0 +policy/LossAfter -0.193277 +policy/LossBefore -0.0241348 +policy/dLoss 0.169143 +---------------------------------------- ------------- +2025-03-29 20:11:15 | [rl2_trainer] epoch #115 | Optimizing policy... +2025-03-29 20:11:15 | [rl2_trainer] epoch #115 | Fitting baseline... +2025-03-29 20:11:15 | [rl2_trainer] epoch #115 | Computing loss before +2025-03-29 20:11:15 | [rl2_trainer] epoch #115 | Computing KL before +2025-03-29 20:11:15 | [rl2_trainer] epoch #115 | Optimizing +2025-03-29 20:11:17 | [rl2_trainer] epoch #115 | Computing KL after +2025-03-29 20:11:17 | [rl2_trainer] epoch #115 | Computing loss after +2025-03-29 20:11:17 | [rl2_trainer] epoch #115 | Saving snapshot... +2025-03-29 20:11:17 | [rl2_trainer] epoch #115 | Saved +2025-03-29 20:11:17 | [rl2_trainer] epoch #115 | Time 2278.74 s +2025-03-29 20:11:17 | [rl2_trainer] epoch #115 | EpochTime 19.43 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -63.0049 +Average/AverageReturn -99.1343 +Average/Iteration 115 +Average/MaxReturn -73.9547 +Average/MinReturn -129.883 +Average/NumEpisodes 8 +Average/StdReturn 17.8485 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972273 +TotalEnvSteps 92800 +__unnamed_task__/AverageDiscountedReturn -63.0049 +__unnamed_task__/AverageReturn -99.1343 +__unnamed_task__/Iteration 115 +__unnamed_task__/MaxReturn -73.9547 +__unnamed_task__/MinReturn -129.883 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.8485 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.47521 +policy/KL 0.0212481 +policy/KLBefore 0 +policy/LossAfter -0.219112 +policy/LossBefore -0.0340928 +policy/dLoss 0.185019 +---------------------------------------- ------------- +2025-03-29 20:11:37 | [rl2_trainer] epoch #116 | Optimizing policy... +2025-03-29 20:11:37 | [rl2_trainer] epoch #116 | Fitting baseline... +2025-03-29 20:11:37 | [rl2_trainer] epoch #116 | Computing loss before +2025-03-29 20:11:37 | [rl2_trainer] epoch #116 | Computing KL before +2025-03-29 20:11:37 | [rl2_trainer] epoch #116 | Optimizing +2025-03-29 20:11:39 | [rl2_trainer] epoch #116 | Computing KL after +2025-03-29 20:11:39 | [rl2_trainer] epoch #116 | Computing loss after +2025-03-29 20:11:39 | [rl2_trainer] epoch #116 | Saving snapshot... +2025-03-29 20:11:39 | [rl2_trainer] epoch #116 | Saved +2025-03-29 20:11:39 | [rl2_trainer] epoch #116 | Time 2300.96 s +2025-03-29 20:11:39 | [rl2_trainer] epoch #116 | EpochTime 22.21 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -68.5736 +Average/AverageReturn -107.719 +Average/Iteration 116 +Average/MaxReturn -76.1242 +Average/MinReturn -169.912 +Average/NumEpisodes 8 +Average/StdReturn 27.8206 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968967 +TotalEnvSteps 93600 +__unnamed_task__/AverageDiscountedReturn -68.5736 +__unnamed_task__/AverageReturn -107.719 +__unnamed_task__/Iteration 116 +__unnamed_task__/MaxReturn -76.1242 +__unnamed_task__/MinReturn -169.912 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.8206 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.47255 +policy/KL 0.0190703 +policy/KLBefore 0 +policy/LossAfter -0.215582 +policy/LossBefore -0.00473711 +policy/dLoss 0.210845 +---------------------------------------- -------------- +2025-03-29 20:12:00 | [rl2_trainer] epoch #117 | Optimizing policy... +2025-03-29 20:12:00 | [rl2_trainer] epoch #117 | Fitting baseline... +2025-03-29 20:12:00 | [rl2_trainer] epoch #117 | Computing loss before +2025-03-29 20:12:00 | [rl2_trainer] epoch #117 | Computing KL before +2025-03-29 20:12:00 | [rl2_trainer] epoch #117 | Optimizing +2025-03-29 20:12:02 | [rl2_trainer] epoch #117 | Computing KL after +2025-03-29 20:12:02 | [rl2_trainer] epoch #117 | Computing loss after +2025-03-29 20:12:02 | [rl2_trainer] epoch #117 | Saving snapshot... +2025-03-29 20:12:02 | [rl2_trainer] epoch #117 | Saved +2025-03-29 20:12:02 | [rl2_trainer] epoch #117 | Time 2323.54 s +2025-03-29 20:12:02 | [rl2_trainer] epoch #117 | EpochTime 22.58 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -59.4978 +Average/AverageReturn -91.0757 +Average/Iteration 117 +Average/MaxReturn -80.4334 +Average/MinReturn -102.93 +Average/NumEpisodes 8 +Average/StdReturn 8.57863 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.980428 +TotalEnvSteps 94400 +__unnamed_task__/AverageDiscountedReturn -59.4978 +__unnamed_task__/AverageReturn -91.0757 +__unnamed_task__/Iteration 117 +__unnamed_task__/MaxReturn -80.4334 +__unnamed_task__/MinReturn -102.93 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 8.57863 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.4727 +policy/KL 0.026133 +policy/KLBefore 0 +policy/LossAfter -0.102929 +policy/LossBefore 0.00332891 +policy/dLoss 0.106258 +---------------------------------------- -------------- +2025-03-29 20:12:19 | [rl2_trainer] epoch #118 | Optimizing policy... +2025-03-29 20:12:19 | [rl2_trainer] epoch #118 | Fitting baseline... +2025-03-29 20:12:19 | [rl2_trainer] epoch #118 | Computing loss before +2025-03-29 20:12:19 | [rl2_trainer] epoch #118 | Computing KL before +2025-03-29 20:12:19 | [rl2_trainer] epoch #118 | Optimizing +2025-03-29 20:12:21 | [rl2_trainer] epoch #118 | Computing KL after +2025-03-29 20:12:22 | [rl2_trainer] epoch #118 | Computing loss after +2025-03-29 20:12:22 | [rl2_trainer] epoch #118 | Saving snapshot... +2025-03-29 20:12:22 | [rl2_trainer] epoch #118 | Saved +2025-03-29 20:12:22 | [rl2_trainer] epoch #118 | Time 2343.07 s +2025-03-29 20:12:22 | [rl2_trainer] epoch #118 | EpochTime 19.53 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -60.811 +Average/AverageReturn -91.8881 +Average/Iteration 118 +Average/MaxReturn -72.9161 +Average/MinReturn -113.334 +Average/NumEpisodes 8 +Average/StdReturn 11.4105 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.981644 +TotalEnvSteps 95200 +__unnamed_task__/AverageDiscountedReturn -60.811 +__unnamed_task__/AverageReturn -91.8881 +__unnamed_task__/Iteration 118 +__unnamed_task__/MaxReturn -72.9161 +__unnamed_task__/MinReturn -113.334 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 11.4105 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.47354 +policy/KL 0.0285675 +policy/KLBefore 0 +policy/LossAfter -0.118247 +policy/LossBefore -0.000998297 +policy/dLoss 0.117248 +---------------------------------------- --------------- +2025-03-29 20:12:38 | [rl2_trainer] epoch #119 | Optimizing policy... +2025-03-29 20:12:38 | [rl2_trainer] epoch #119 | Fitting baseline... +2025-03-29 20:12:38 | [rl2_trainer] epoch #119 | Computing loss before +2025-03-29 20:12:38 | [rl2_trainer] epoch #119 | Computing KL before +2025-03-29 20:12:38 | [rl2_trainer] epoch #119 | Optimizing +2025-03-29 20:12:40 | [rl2_trainer] epoch #119 | Computing KL after +2025-03-29 20:12:40 | [rl2_trainer] epoch #119 | Computing loss after +2025-03-29 20:12:40 | [rl2_trainer] epoch #119 | Saving snapshot... +2025-03-29 20:12:40 | [rl2_trainer] epoch #119 | Saved +2025-03-29 20:12:40 | [rl2_trainer] epoch #119 | Time 2361.45 s +2025-03-29 20:12:40 | [rl2_trainer] epoch #119 | EpochTime 18.38 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -67.7506 +Average/AverageReturn -106.047 +Average/Iteration 119 +Average/MaxReturn -62.2359 +Average/MinReturn -181.351 +Average/NumEpisodes 8 +Average/StdReturn 44.9276 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979567 +TotalEnvSteps 96000 +__unnamed_task__/AverageDiscountedReturn -67.7506 +__unnamed_task__/AverageReturn -106.047 +__unnamed_task__/Iteration 119 +__unnamed_task__/MaxReturn -62.2359 +__unnamed_task__/MinReturn -181.351 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 44.9276 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.48586 +policy/KL 0.0234875 +policy/KLBefore 0 +policy/LossAfter -0.274546 +policy/LossBefore -0.00687263 +policy/dLoss 0.267673 +---------------------------------------- -------------- +2025-03-29 20:12:57 | [rl2_trainer] epoch #120 | Optimizing policy... +2025-03-29 20:12:57 | [rl2_trainer] epoch #120 | Fitting baseline... +2025-03-29 20:12:57 | [rl2_trainer] epoch #120 | Computing loss before +2025-03-29 20:12:57 | [rl2_trainer] epoch #120 | Computing KL before +2025-03-29 20:12:57 | [rl2_trainer] epoch #120 | Optimizing +2025-03-29 20:12:59 | [rl2_trainer] epoch #120 | Computing KL after +2025-03-29 20:13:00 | [rl2_trainer] epoch #120 | Computing loss after +2025-03-29 20:13:00 | [rl2_trainer] epoch #120 | Saving snapshot... +2025-03-29 20:13:00 | [rl2_trainer] epoch #120 | Saved +2025-03-29 20:13:00 | [rl2_trainer] epoch #120 | Time 2381.07 s +2025-03-29 20:13:00 | [rl2_trainer] epoch #120 | EpochTime 19.62 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.1574 +Average/AverageReturn -89.2771 +Average/Iteration 120 +Average/MaxReturn -69.1626 +Average/MinReturn -106.577 +Average/NumEpisodes 8 +Average/StdReturn 12.6093 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968036 +TotalEnvSteps 96800 +__unnamed_task__/AverageDiscountedReturn -60.1574 +__unnamed_task__/AverageReturn -89.2771 +__unnamed_task__/Iteration 120 +__unnamed_task__/MaxReturn -69.1626 +__unnamed_task__/MinReturn -106.577 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 12.6093 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.48054 +policy/KL 0.024117 +policy/KLBefore 0 +policy/LossAfter -0.157649 +policy/LossBefore -0.00705087 +policy/dLoss 0.150598 +---------------------------------------- -------------- +2025-03-29 20:13:17 | [rl2_trainer] epoch #121 | Optimizing policy... +2025-03-29 20:13:17 | [rl2_trainer] epoch #121 | Fitting baseline... +2025-03-29 20:13:17 | [rl2_trainer] epoch #121 | Computing loss before +2025-03-29 20:13:17 | [rl2_trainer] epoch #121 | Computing KL before +2025-03-29 20:13:17 | [rl2_trainer] epoch #121 | Optimizing +2025-03-29 20:13:19 | [rl2_trainer] epoch #121 | Computing KL after +2025-03-29 20:13:19 | [rl2_trainer] epoch #121 | Computing loss after +2025-03-29 20:13:19 | [rl2_trainer] epoch #121 | Saving snapshot... +2025-03-29 20:13:19 | [rl2_trainer] epoch #121 | Saved +2025-03-29 20:13:19 | [rl2_trainer] epoch #121 | Time 2400.63 s +2025-03-29 20:13:19 | [rl2_trainer] epoch #121 | EpochTime 19.55 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -62.2982 +Average/AverageReturn -96.4687 +Average/Iteration 121 +Average/MaxReturn -43.7169 +Average/MinReturn -125.674 +Average/NumEpisodes 8 +Average/StdReturn 24.0097 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969308 +TotalEnvSteps 97600 +__unnamed_task__/AverageDiscountedReturn -62.2982 +__unnamed_task__/AverageReturn -96.4687 +__unnamed_task__/Iteration 121 +__unnamed_task__/MaxReturn -43.7169 +__unnamed_task__/MinReturn -125.674 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.0097 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.4781 +policy/KL 0.0279368 +policy/KLBefore 0 +policy/LossAfter -0.17394 +policy/LossBefore -0.031585 +policy/dLoss 0.142355 +---------------------------------------- ------------- +2025-03-29 20:13:36 | [rl2_trainer] epoch #122 | Optimizing policy... +2025-03-29 20:13:36 | [rl2_trainer] epoch #122 | Fitting baseline... +2025-03-29 20:13:36 | [rl2_trainer] epoch #122 | Computing loss before +2025-03-29 20:13:36 | [rl2_trainer] epoch #122 | Computing KL before +2025-03-29 20:13:36 | [rl2_trainer] epoch #122 | Optimizing +2025-03-29 20:13:38 | [rl2_trainer] epoch #122 | Computing KL after +2025-03-29 20:13:38 | [rl2_trainer] epoch #122 | Computing loss after +2025-03-29 20:13:38 | [rl2_trainer] epoch #122 | Saving snapshot... +2025-03-29 20:13:38 | [rl2_trainer] epoch #122 | Saved +2025-03-29 20:13:38 | [rl2_trainer] epoch #122 | Time 2419.56 s +2025-03-29 20:13:38 | [rl2_trainer] epoch #122 | EpochTime 18.93 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -62.1672 +Average/AverageReturn -94.115 +Average/Iteration 122 +Average/MaxReturn -48.1401 +Average/MinReturn -164.1 +Average/NumEpisodes 8 +Average/StdReturn 35.3723 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.948676 +TotalEnvSteps 98400 +__unnamed_task__/AverageDiscountedReturn -62.1672 +__unnamed_task__/AverageReturn -94.115 +__unnamed_task__/Iteration 122 +__unnamed_task__/MaxReturn -48.1401 +__unnamed_task__/MinReturn -164.1 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.3723 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.45813 +policy/KL 0.020978 +policy/KLBefore 0 +policy/LossAfter -0.259526 +policy/LossBefore -0.0260011 +policy/dLoss 0.233525 +---------------------------------------- ------------- +2025-03-29 20:13:57 | [rl2_trainer] epoch #123 | Optimizing policy... +2025-03-29 20:13:57 | [rl2_trainer] epoch #123 | Fitting baseline... +2025-03-29 20:13:57 | [rl2_trainer] epoch #123 | Computing loss before +2025-03-29 20:13:57 | [rl2_trainer] epoch #123 | Computing KL before +2025-03-29 20:13:57 | [rl2_trainer] epoch #123 | Optimizing +2025-03-29 20:14:00 | [rl2_trainer] epoch #123 | Computing KL after +2025-03-29 20:14:00 | [rl2_trainer] epoch #123 | Computing loss after +2025-03-29 20:14:00 | [rl2_trainer] epoch #123 | Saving snapshot... +2025-03-29 20:14:00 | [rl2_trainer] epoch #123 | Saved +2025-03-29 20:14:00 | [rl2_trainer] epoch #123 | Time 2441.37 s +2025-03-29 20:14:00 | [rl2_trainer] epoch #123 | EpochTime 21.81 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -56.2888 +Average/AverageReturn -82.879 +Average/Iteration 123 +Average/MaxReturn -47.2772 +Average/MinReturn -108.482 +Average/NumEpisodes 8 +Average/StdReturn 20.8362 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.981637 +TotalEnvSteps 99200 +__unnamed_task__/AverageDiscountedReturn -56.2888 +__unnamed_task__/AverageReturn -82.879 +__unnamed_task__/Iteration 123 +__unnamed_task__/MaxReturn -47.2772 +__unnamed_task__/MinReturn -108.482 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.8362 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.45221 +policy/KL 0.0151034 +policy/KLBefore 0 +policy/LossAfter -0.126014 +policy/LossBefore 0.00441326 +policy/dLoss 0.130427 +---------------------------------------- -------------- +2025-03-29 20:14:20 | [rl2_trainer] epoch #124 | Optimizing policy... +2025-03-29 20:14:20 | [rl2_trainer] epoch #124 | Fitting baseline... +2025-03-29 20:14:20 | [rl2_trainer] epoch #124 | Computing loss before +2025-03-29 20:14:20 | [rl2_trainer] epoch #124 | Computing KL before +2025-03-29 20:14:20 | [rl2_trainer] epoch #124 | Optimizing +2025-03-29 20:14:23 | [rl2_trainer] epoch #124 | Computing KL after +2025-03-29 20:14:23 | [rl2_trainer] epoch #124 | Computing loss after +2025-03-29 20:14:23 | [rl2_trainer] epoch #124 | Saving snapshot... +2025-03-29 20:14:23 | [rl2_trainer] epoch #124 | Saved +2025-03-29 20:14:23 | [rl2_trainer] epoch #124 | Time 2464.56 s +2025-03-29 20:14:23 | [rl2_trainer] epoch #124 | EpochTime 23.19 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -55.3191 +Average/AverageReturn -84.643 +Average/Iteration 124 +Average/MaxReturn -58.0679 +Average/MinReturn -116.259 +Average/NumEpisodes 8 +Average/StdReturn 18.1535 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975021 +TotalEnvSteps 100000 +__unnamed_task__/AverageDiscountedReturn -55.3191 +__unnamed_task__/AverageReturn -84.643 +__unnamed_task__/Iteration 124 +__unnamed_task__/MaxReturn -58.0679 +__unnamed_task__/MinReturn -116.259 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 18.1535 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.4641 +policy/KL 0.0230975 +policy/KLBefore 0 +policy/LossAfter -0.124848 +policy/LossBefore -0.00627365 +policy/dLoss 0.118574 +---------------------------------------- --------------- +2025-03-29 20:14:43 | [rl2_trainer] epoch #125 | Optimizing policy... +2025-03-29 20:14:43 | [rl2_trainer] epoch #125 | Fitting baseline... +2025-03-29 20:14:43 | [rl2_trainer] epoch #125 | Computing loss before +2025-03-29 20:14:43 | [rl2_trainer] epoch #125 | Computing KL before +2025-03-29 20:14:43 | [rl2_trainer] epoch #125 | Optimizing +2025-03-29 20:14:46 | [rl2_trainer] epoch #125 | Computing KL after +2025-03-29 20:14:46 | [rl2_trainer] epoch #125 | Computing loss after +2025-03-29 20:14:46 | [rl2_trainer] epoch #125 | Saving snapshot... +2025-03-29 20:14:46 | [rl2_trainer] epoch #125 | Saved +2025-03-29 20:14:46 | [rl2_trainer] epoch #125 | Time 2487.36 s +2025-03-29 20:14:46 | [rl2_trainer] epoch #125 | EpochTime 22.79 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -59.6031 +Average/AverageReturn -92.6012 +Average/Iteration 125 +Average/MaxReturn -71.2832 +Average/MinReturn -129.898 +Average/NumEpisodes 8 +Average/StdReturn 15.8789 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.95541 +TotalEnvSteps 100800 +__unnamed_task__/AverageDiscountedReturn -59.6031 +__unnamed_task__/AverageReturn -92.6012 +__unnamed_task__/Iteration 125 +__unnamed_task__/MaxReturn -71.2832 +__unnamed_task__/MinReturn -129.898 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.8789 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.47795 +policy/KL 0.0274014 +policy/KLBefore 0 +policy/LossAfter -0.210902 +policy/LossBefore 0.00030746 +policy/dLoss 0.211209 +---------------------------------------- --------------- +2025-03-29 20:15:06 | [rl2_trainer] epoch #126 | Optimizing policy... +2025-03-29 20:15:06 | [rl2_trainer] epoch #126 | Fitting baseline... +2025-03-29 20:15:06 | [rl2_trainer] epoch #126 | Computing loss before +2025-03-29 20:15:06 | [rl2_trainer] epoch #126 | Computing KL before +2025-03-29 20:15:06 | [rl2_trainer] epoch #126 | Optimizing +2025-03-29 20:15:09 | [rl2_trainer] epoch #126 | Computing KL after +2025-03-29 20:15:09 | [rl2_trainer] epoch #126 | Computing loss after +2025-03-29 20:15:09 | [rl2_trainer] epoch #126 | Saving snapshot... +2025-03-29 20:15:09 | [rl2_trainer] epoch #126 | Saved +2025-03-29 20:15:09 | [rl2_trainer] epoch #126 | Time 2510.31 s +2025-03-29 20:15:09 | [rl2_trainer] epoch #126 | EpochTime 22.95 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -57.2191 +Average/AverageReturn -87.016 +Average/Iteration 126 +Average/MaxReturn -58.9728 +Average/MinReturn -106.074 +Average/NumEpisodes 8 +Average/StdReturn 16.3244 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975568 +TotalEnvSteps 101600 +__unnamed_task__/AverageDiscountedReturn -57.2191 +__unnamed_task__/AverageReturn -87.016 +__unnamed_task__/Iteration 126 +__unnamed_task__/MaxReturn -58.9728 +__unnamed_task__/MinReturn -106.074 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.3244 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.47025 +policy/KL 0.0181434 +policy/KLBefore 0 +policy/LossAfter -0.156844 +policy/LossBefore -0.0189182 +policy/dLoss 0.137926 +---------------------------------------- -------------- +2025-03-29 20:15:29 | [rl2_trainer] epoch #127 | Optimizing policy... +2025-03-29 20:15:29 | [rl2_trainer] epoch #127 | Fitting baseline... +2025-03-29 20:15:29 | [rl2_trainer] epoch #127 | Computing loss before +2025-03-29 20:15:29 | [rl2_trainer] epoch #127 | Computing KL before +2025-03-29 20:15:29 | [rl2_trainer] epoch #127 | Optimizing +2025-03-29 20:15:31 | [rl2_trainer] epoch #127 | Computing KL after +2025-03-29 20:15:31 | [rl2_trainer] epoch #127 | Computing loss after +2025-03-29 20:15:32 | [rl2_trainer] epoch #127 | Saving snapshot... +2025-03-29 20:15:32 | [rl2_trainer] epoch #127 | Saved +2025-03-29 20:15:32 | [rl2_trainer] epoch #127 | Time 2533.07 s +2025-03-29 20:15:32 | [rl2_trainer] epoch #127 | EpochTime 22.76 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -63.5702 +Average/AverageReturn -104.947 +Average/Iteration 127 +Average/MaxReturn -47.6027 +Average/MinReturn -181.013 +Average/NumEpisodes 8 +Average/StdReturn 38.828 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977767 +TotalEnvSteps 102400 +__unnamed_task__/AverageDiscountedReturn -63.5702 +__unnamed_task__/AverageReturn -104.947 +__unnamed_task__/Iteration 127 +__unnamed_task__/MaxReturn -47.6027 +__unnamed_task__/MinReturn -181.013 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 38.828 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.45868 +policy/KL 0.0292094 +policy/KLBefore 0 +policy/LossAfter -0.334349 +policy/LossBefore -0.0186366 +policy/dLoss 0.315713 +---------------------------------------- -------------- +2025-03-29 20:15:52 | [rl2_trainer] epoch #128 | Optimizing policy... +2025-03-29 20:15:52 | [rl2_trainer] epoch #128 | Fitting baseline... +2025-03-29 20:15:52 | [rl2_trainer] epoch #128 | Computing loss before +2025-03-29 20:15:52 | [rl2_trainer] epoch #128 | Computing KL before +2025-03-29 20:15:52 | [rl2_trainer] epoch #128 | Optimizing +2025-03-29 20:15:55 | [rl2_trainer] epoch #128 | Computing KL after +2025-03-29 20:15:55 | [rl2_trainer] epoch #128 | Computing loss after +2025-03-29 20:15:55 | [rl2_trainer] epoch #128 | Saving snapshot... +2025-03-29 20:15:55 | [rl2_trainer] epoch #128 | Saved +2025-03-29 20:15:55 | [rl2_trainer] epoch #128 | Time 2556.10 s +2025-03-29 20:15:55 | [rl2_trainer] epoch #128 | EpochTime 23.03 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -55.4995 +Average/AverageReturn -83.3086 +Average/Iteration 128 +Average/MaxReturn -53.337 +Average/MinReturn -130.796 +Average/NumEpisodes 8 +Average/StdReturn 21.6123 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966165 +TotalEnvSteps 103200 +__unnamed_task__/AverageDiscountedReturn -55.4995 +__unnamed_task__/AverageReturn -83.3086 +__unnamed_task__/Iteration 128 +__unnamed_task__/MaxReturn -53.337 +__unnamed_task__/MinReturn -130.796 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.6123 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.43681 +policy/KL 0.0174842 +policy/KLBefore 0 +policy/LossAfter -0.208772 +policy/LossBefore 0.00495287 +policy/dLoss 0.213725 +---------------------------------------- --------------- +2025-03-29 20:16:15 | [rl2_trainer] epoch #129 | Optimizing policy... +2025-03-29 20:16:15 | [rl2_trainer] epoch #129 | Fitting baseline... +2025-03-29 20:16:15 | [rl2_trainer] epoch #129 | Computing loss before +2025-03-29 20:16:15 | [rl2_trainer] epoch #129 | Computing KL before +2025-03-29 20:16:15 | [rl2_trainer] epoch #129 | Optimizing +2025-03-29 20:16:17 | [rl2_trainer] epoch #129 | Computing KL after +2025-03-29 20:16:17 | [rl2_trainer] epoch #129 | Computing loss after +2025-03-29 20:16:18 | [rl2_trainer] epoch #129 | Saving snapshot... +2025-03-29 20:16:18 | [rl2_trainer] epoch #129 | Saved +2025-03-29 20:16:18 | [rl2_trainer] epoch #129 | Time 2579.05 s +2025-03-29 20:16:18 | [rl2_trainer] epoch #129 | EpochTime 22.94 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -70.0176 +Average/AverageReturn -112.505 +Average/Iteration 129 +Average/MaxReturn -89.5515 +Average/MinReturn -171.402 +Average/NumEpisodes 8 +Average/StdReturn 25.9531 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.981235 +TotalEnvSteps 104000 +__unnamed_task__/AverageDiscountedReturn -70.0176 +__unnamed_task__/AverageReturn -112.505 +__unnamed_task__/Iteration 129 +__unnamed_task__/MaxReturn -89.5515 +__unnamed_task__/MinReturn -171.402 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.9531 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.40222 +policy/KL 0.0237927 +policy/KLBefore 0 +policy/LossAfter -0.194374 +policy/LossBefore -0.00761056 +policy/dLoss 0.186764 +---------------------------------------- --------------- +2025-03-29 20:16:37 | [rl2_trainer] epoch #130 | Optimizing policy... +2025-03-29 20:16:37 | [rl2_trainer] epoch #130 | Fitting baseline... +2025-03-29 20:16:37 | [rl2_trainer] epoch #130 | Computing loss before +2025-03-29 20:16:37 | [rl2_trainer] epoch #130 | Computing KL before +2025-03-29 20:16:37 | [rl2_trainer] epoch #130 | Optimizing +2025-03-29 20:16:40 | [rl2_trainer] epoch #130 | Computing KL after +2025-03-29 20:16:40 | [rl2_trainer] epoch #130 | Computing loss after +2025-03-29 20:16:40 | [rl2_trainer] epoch #130 | Saving snapshot... +2025-03-29 20:16:40 | [rl2_trainer] epoch #130 | Saved +2025-03-29 20:16:40 | [rl2_trainer] epoch #130 | Time 2601.37 s +2025-03-29 20:16:40 | [rl2_trainer] epoch #130 | EpochTime 22.32 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.9437 +Average/AverageReturn -92.8291 +Average/Iteration 130 +Average/MaxReturn -55.8631 +Average/MinReturn -158.686 +Average/NumEpisodes 8 +Average/StdReturn 30.2719 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.980063 +TotalEnvSteps 104800 +__unnamed_task__/AverageDiscountedReturn -60.9437 +__unnamed_task__/AverageReturn -92.8291 +__unnamed_task__/Iteration 130 +__unnamed_task__/MaxReturn -55.8631 +__unnamed_task__/MinReturn -158.686 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.2719 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.38464 +policy/KL 0.0268945 +policy/KLBefore 0 +policy/LossAfter -0.212586 +policy/LossBefore 0.0147007 +policy/dLoss 0.227287 +---------------------------------------- -------------- +2025-03-29 20:16:59 | [rl2_trainer] epoch #131 | Optimizing policy... +2025-03-29 20:16:59 | [rl2_trainer] epoch #131 | Fitting baseline... +2025-03-29 20:16:59 | [rl2_trainer] epoch #131 | Computing loss before +2025-03-29 20:16:59 | [rl2_trainer] epoch #131 | Computing KL before +2025-03-29 20:16:59 | [rl2_trainer] epoch #131 | Optimizing +2025-03-29 20:17:02 | [rl2_trainer] epoch #131 | Computing KL after +2025-03-29 20:17:02 | [rl2_trainer] epoch #131 | Computing loss after +2025-03-29 20:17:02 | [rl2_trainer] epoch #131 | Saving snapshot... +2025-03-29 20:17:02 | [rl2_trainer] epoch #131 | Saved +2025-03-29 20:17:02 | [rl2_trainer] epoch #131 | Time 2623.25 s +2025-03-29 20:17:02 | [rl2_trainer] epoch #131 | EpochTime 21.88 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -77.4599 +Average/AverageReturn -124.5 +Average/Iteration 131 +Average/MaxReturn -73.6955 +Average/MinReturn -215.754 +Average/NumEpisodes 8 +Average/StdReturn 52.8203 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969299 +TotalEnvSteps 105600 +__unnamed_task__/AverageDiscountedReturn -77.4599 +__unnamed_task__/AverageReturn -124.5 +__unnamed_task__/Iteration 131 +__unnamed_task__/MaxReturn -73.6955 +__unnamed_task__/MinReturn -215.754 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 52.8203 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.3648 +policy/KL 0.028104 +policy/KLBefore 0 +policy/LossAfter -0.430037 +policy/LossBefore 0.0161302 +policy/dLoss 0.446167 +---------------------------------------- -------------- +2025-03-29 20:17:22 | [rl2_trainer] epoch #132 | Optimizing policy... +2025-03-29 20:17:22 | [rl2_trainer] epoch #132 | Fitting baseline... +2025-03-29 20:17:22 | [rl2_trainer] epoch #132 | Computing loss before +2025-03-29 20:17:22 | [rl2_trainer] epoch #132 | Computing KL before +2025-03-29 20:17:22 | [rl2_trainer] epoch #132 | Optimizing +2025-03-29 20:17:25 | [rl2_trainer] epoch #132 | Computing KL after +2025-03-29 20:17:25 | [rl2_trainer] epoch #132 | Computing loss after +2025-03-29 20:17:25 | [rl2_trainer] epoch #132 | Saving snapshot... +2025-03-29 20:17:25 | [rl2_trainer] epoch #132 | Saved +2025-03-29 20:17:25 | [rl2_trainer] epoch #132 | Time 2646.79 s +2025-03-29 20:17:25 | [rl2_trainer] epoch #132 | EpochTime 23.54 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.5626 +Average/AverageReturn -97.2782 +Average/Iteration 132 +Average/MaxReturn -48.8213 +Average/MinReturn -194.919 +Average/NumEpisodes 8 +Average/StdReturn 44.8081 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978798 +TotalEnvSteps 106400 +__unnamed_task__/AverageDiscountedReturn -60.5626 +__unnamed_task__/AverageReturn -97.2782 +__unnamed_task__/Iteration 132 +__unnamed_task__/MaxReturn -48.8213 +__unnamed_task__/MinReturn -194.919 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 44.8081 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.35632 +policy/KL 0.0190402 +policy/KLBefore 0 +policy/LossAfter -0.283382 +policy/LossBefore 0.0245832 +policy/dLoss 0.307965 +---------------------------------------- -------------- +2025-03-29 20:17:45 | [rl2_trainer] epoch #133 | Optimizing policy... +2025-03-29 20:17:45 | [rl2_trainer] epoch #133 | Fitting baseline... +2025-03-29 20:17:45 | [rl2_trainer] epoch #133 | Computing loss before +2025-03-29 20:17:45 | [rl2_trainer] epoch #133 | Computing KL before +2025-03-29 20:17:45 | [rl2_trainer] epoch #133 | Optimizing +2025-03-29 20:17:48 | [rl2_trainer] epoch #133 | Computing KL after +2025-03-29 20:17:48 | [rl2_trainer] epoch #133 | Computing loss after +2025-03-29 20:17:48 | [rl2_trainer] epoch #133 | Saving snapshot... +2025-03-29 20:17:48 | [rl2_trainer] epoch #133 | Saved +2025-03-29 20:17:48 | [rl2_trainer] epoch #133 | Time 2669.76 s +2025-03-29 20:17:48 | [rl2_trainer] epoch #133 | EpochTime 22.96 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -71.2136 +Average/AverageReturn -115.308 +Average/Iteration 133 +Average/MaxReturn -72.3227 +Average/MinReturn -186.583 +Average/NumEpisodes 8 +Average/StdReturn 33.054 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.986086 +TotalEnvSteps 107200 +__unnamed_task__/AverageDiscountedReturn -71.2136 +__unnamed_task__/AverageReturn -115.308 +__unnamed_task__/Iteration 133 +__unnamed_task__/MaxReturn -72.3227 +__unnamed_task__/MinReturn -186.583 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.054 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.36591 +policy/KL 0.0270038 +policy/KLBefore 0 +policy/LossAfter -0.225646 +policy/LossBefore -0.00307165 +policy/dLoss 0.222574 +---------------------------------------- --------------- +2025-03-29 20:18:08 | [rl2_trainer] epoch #134 | Optimizing policy... +2025-03-29 20:18:08 | [rl2_trainer] epoch #134 | Fitting baseline... +2025-03-29 20:18:08 | [rl2_trainer] epoch #134 | Computing loss before +2025-03-29 20:18:08 | [rl2_trainer] epoch #134 | Computing KL before +2025-03-29 20:18:08 | [rl2_trainer] epoch #134 | Optimizing +2025-03-29 20:18:11 | [rl2_trainer] epoch #134 | Computing KL after +2025-03-29 20:18:11 | [rl2_trainer] epoch #134 | Computing loss after +2025-03-29 20:18:11 | [rl2_trainer] epoch #134 | Saving snapshot... +2025-03-29 20:18:11 | [rl2_trainer] epoch #134 | Saved +2025-03-29 20:18:11 | [rl2_trainer] epoch #134 | Time 2692.58 s +2025-03-29 20:18:11 | [rl2_trainer] epoch #134 | EpochTime 22.83 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -61.3741 +Average/AverageReturn -92.184 +Average/Iteration 134 +Average/MaxReturn -42.1301 +Average/MinReturn -180.25 +Average/NumEpisodes 8 +Average/StdReturn 37.4102 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.956261 +TotalEnvSteps 108000 +__unnamed_task__/AverageDiscountedReturn -61.3741 +__unnamed_task__/AverageReturn -92.184 +__unnamed_task__/Iteration 134 +__unnamed_task__/MaxReturn -42.1301 +__unnamed_task__/MinReturn -180.25 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.4102 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.36094 +policy/KL 0.0143196 +policy/KLBefore 0 +policy/LossAfter -0.226807 +policy/LossBefore -0.0205612 +policy/dLoss 0.206246 +---------------------------------------- -------------- +2025-03-29 20:18:32 | [rl2_trainer] epoch #135 | Optimizing policy... +2025-03-29 20:18:32 | [rl2_trainer] epoch #135 | Fitting baseline... +2025-03-29 20:18:32 | [rl2_trainer] epoch #135 | Computing loss before +2025-03-29 20:18:32 | [rl2_trainer] epoch #135 | Computing KL before +2025-03-29 20:18:32 | [rl2_trainer] epoch #135 | Optimizing +2025-03-29 20:18:34 | [rl2_trainer] epoch #135 | Computing KL after +2025-03-29 20:18:35 | [rl2_trainer] epoch #135 | Computing loss after +2025-03-29 20:18:35 | [rl2_trainer] epoch #135 | Saving snapshot... +2025-03-29 20:18:35 | [rl2_trainer] epoch #135 | Saved +2025-03-29 20:18:35 | [rl2_trainer] epoch #135 | Time 2716.09 s +2025-03-29 20:18:35 | [rl2_trainer] epoch #135 | EpochTime 23.50 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -58.2279 +Average/AverageReturn -87.4827 +Average/Iteration 135 +Average/MaxReturn -49.5893 +Average/MinReturn -128.768 +Average/NumEpisodes 8 +Average/StdReturn 22.7776 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982486 +TotalEnvSteps 108800 +__unnamed_task__/AverageDiscountedReturn -58.2279 +__unnamed_task__/AverageReturn -87.4827 +__unnamed_task__/Iteration 135 +__unnamed_task__/MaxReturn -49.5893 +__unnamed_task__/MinReturn -128.768 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.7776 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.34651 +policy/KL 0.0314631 +policy/KLBefore 0 +policy/LossAfter -0.18994 +policy/LossBefore -0.0316223 +policy/dLoss 0.158318 +---------------------------------------- -------------- +2025-03-29 20:18:54 | [rl2_trainer] epoch #136 | Optimizing policy... +2025-03-29 20:18:54 | [rl2_trainer] epoch #136 | Fitting baseline... +2025-03-29 20:18:54 | [rl2_trainer] epoch #136 | Computing loss before +2025-03-29 20:18:54 | [rl2_trainer] epoch #136 | Computing KL before +2025-03-29 20:18:54 | [rl2_trainer] epoch #136 | Optimizing +2025-03-29 20:18:56 | [rl2_trainer] epoch #136 | Computing KL after +2025-03-29 20:18:57 | [rl2_trainer] epoch #136 | Computing loss after +2025-03-29 20:18:57 | [rl2_trainer] epoch #136 | Saving snapshot... +2025-03-29 20:18:57 | [rl2_trainer] epoch #136 | Saved +2025-03-29 20:18:57 | [rl2_trainer] epoch #136 | Time 2738.08 s +2025-03-29 20:18:57 | [rl2_trainer] epoch #136 | EpochTime 21.99 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -65.8349 +Average/AverageReturn -110.285 +Average/Iteration 136 +Average/MaxReturn -23.9895 +Average/MinReturn -165.067 +Average/NumEpisodes 8 +Average/StdReturn 47.8573 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96688 +TotalEnvSteps 109600 +__unnamed_task__/AverageDiscountedReturn -65.8349 +__unnamed_task__/AverageReturn -110.285 +__unnamed_task__/Iteration 136 +__unnamed_task__/MaxReturn -23.9895 +__unnamed_task__/MinReturn -165.067 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 47.8573 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.35865 +policy/KL 0.035229 +policy/KLBefore 0 +policy/LossAfter -0.488197 +policy/LossBefore -0.0204872 +policy/dLoss 0.46771 +---------------------------------------- -------------- +2025-03-29 20:19:16 | [rl2_trainer] epoch #137 | Optimizing policy... +2025-03-29 20:19:16 | [rl2_trainer] epoch #137 | Fitting baseline... +2025-03-29 20:19:16 | [rl2_trainer] epoch #137 | Computing loss before +2025-03-29 20:19:16 | [rl2_trainer] epoch #137 | Computing KL before +2025-03-29 20:19:16 | [rl2_trainer] epoch #137 | Optimizing +2025-03-29 20:19:19 | [rl2_trainer] epoch #137 | Computing KL after +2025-03-29 20:19:19 | [rl2_trainer] epoch #137 | Computing loss after +2025-03-29 20:19:19 | [rl2_trainer] epoch #137 | Saving snapshot... +2025-03-29 20:19:19 | [rl2_trainer] epoch #137 | Saved +2025-03-29 20:19:19 | [rl2_trainer] epoch #137 | Time 2760.52 s +2025-03-29 20:19:19 | [rl2_trainer] epoch #137 | EpochTime 22.43 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.8346 +Average/AverageReturn -114.932 +Average/Iteration 137 +Average/MaxReturn -71.6174 +Average/MinReturn -171.611 +Average/NumEpisodes 8 +Average/StdReturn 40.193 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.949901 +TotalEnvSteps 110400 +__unnamed_task__/AverageDiscountedReturn -71.8346 +__unnamed_task__/AverageReturn -114.932 +__unnamed_task__/Iteration 137 +__unnamed_task__/MaxReturn -71.6174 +__unnamed_task__/MinReturn -171.611 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 40.193 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.37409 +policy/KL 0.0432548 +policy/KLBefore 0 +policy/LossAfter -0.535287 +policy/LossBefore -0.0273559 +policy/dLoss 0.507931 +---------------------------------------- -------------- +2025-03-29 20:19:38 | [rl2_trainer] epoch #138 | Optimizing policy... +2025-03-29 20:19:38 | [rl2_trainer] epoch #138 | Fitting baseline... +2025-03-29 20:19:38 | [rl2_trainer] epoch #138 | Computing loss before +2025-03-29 20:19:38 | [rl2_trainer] epoch #138 | Computing KL before +2025-03-29 20:19:38 | [rl2_trainer] epoch #138 | Optimizing +2025-03-29 20:19:40 | [rl2_trainer] epoch #138 | Computing KL after +2025-03-29 20:19:40 | [rl2_trainer] epoch #138 | Computing loss after +2025-03-29 20:19:40 | [rl2_trainer] epoch #138 | Saving snapshot... +2025-03-29 20:19:40 | [rl2_trainer] epoch #138 | Saved +2025-03-29 20:19:40 | [rl2_trainer] epoch #138 | Time 2781.75 s +2025-03-29 20:19:40 | [rl2_trainer] epoch #138 | EpochTime 21.22 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -73.3448 +Average/AverageReturn -117.89 +Average/Iteration 138 +Average/MaxReturn -89.7591 +Average/MinReturn -194.475 +Average/NumEpisodes 8 +Average/StdReturn 35.5186 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.964659 +TotalEnvSteps 111200 +__unnamed_task__/AverageDiscountedReturn -73.3448 +__unnamed_task__/AverageReturn -117.89 +__unnamed_task__/Iteration 138 +__unnamed_task__/MaxReturn -89.7591 +__unnamed_task__/MinReturn -194.475 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.5186 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.39616 +policy/KL 0.038952 +policy/KLBefore 0 +policy/LossAfter -0.470726 +policy/LossBefore -0.0261488 +policy/dLoss 0.444577 +---------------------------------------- -------------- +2025-03-29 20:20:00 | [rl2_trainer] epoch #139 | Optimizing policy... +2025-03-29 20:20:00 | [rl2_trainer] epoch #139 | Fitting baseline... +2025-03-29 20:20:00 | [rl2_trainer] epoch #139 | Computing loss before +2025-03-29 20:20:00 | [rl2_trainer] epoch #139 | Computing KL before +2025-03-29 20:20:00 | [rl2_trainer] epoch #139 | Optimizing +2025-03-29 20:20:03 | [rl2_trainer] epoch #139 | Computing KL after +2025-03-29 20:20:03 | [rl2_trainer] epoch #139 | Computing loss after +2025-03-29 20:20:03 | [rl2_trainer] epoch #139 | Saving snapshot... +2025-03-29 20:20:03 | [rl2_trainer] epoch #139 | Saved +2025-03-29 20:20:03 | [rl2_trainer] epoch #139 | Time 2804.21 s +2025-03-29 20:20:03 | [rl2_trainer] epoch #139 | EpochTime 22.46 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -67.0307 +Average/AverageReturn -108.836 +Average/Iteration 139 +Average/MaxReturn -54.0782 +Average/MinReturn -167.878 +Average/NumEpisodes 8 +Average/StdReturn 32.8364 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.919581 +TotalEnvSteps 112000 +__unnamed_task__/AverageDiscountedReturn -67.0307 +__unnamed_task__/AverageReturn -108.836 +__unnamed_task__/Iteration 139 +__unnamed_task__/MaxReturn -54.0782 +__unnamed_task__/MinReturn -167.878 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.8364 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.42151 +policy/KL 0.0166534 +policy/KLBefore 0 +policy/LossAfter -0.45495 +policy/LossBefore -0.0382988 +policy/dLoss 0.416651 +---------------------------------------- -------------- +2025-03-29 20:20:20 | [rl2_trainer] epoch #140 | Optimizing policy... +2025-03-29 20:20:20 | [rl2_trainer] epoch #140 | Fitting baseline... +2025-03-29 20:20:20 | [rl2_trainer] epoch #140 | Computing loss before +2025-03-29 20:20:20 | [rl2_trainer] epoch #140 | Computing KL before +2025-03-29 20:20:20 | [rl2_trainer] epoch #140 | Optimizing +2025-03-29 20:20:23 | [rl2_trainer] epoch #140 | Computing KL after +2025-03-29 20:20:23 | [rl2_trainer] epoch #140 | Computing loss after +2025-03-29 20:20:23 | [rl2_trainer] epoch #140 | Saving snapshot... +2025-03-29 20:20:23 | [rl2_trainer] epoch #140 | Saved +2025-03-29 20:20:23 | [rl2_trainer] epoch #140 | Time 2824.11 s +2025-03-29 20:20:23 | [rl2_trainer] epoch #140 | EpochTime 19.90 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -79.5542 +Average/AverageReturn -127.015 +Average/Iteration 140 +Average/MaxReturn -74.4189 +Average/MinReturn -192.632 +Average/NumEpisodes 8 +Average/StdReturn 46.2635 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973513 +TotalEnvSteps 112800 +__unnamed_task__/AverageDiscountedReturn -79.5542 +__unnamed_task__/AverageReturn -127.015 +__unnamed_task__/Iteration 140 +__unnamed_task__/MaxReturn -74.4189 +__unnamed_task__/MinReturn -192.632 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 46.2635 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.41549 +policy/KL 0.0320779 +policy/KLBefore 0 +policy/LossAfter -0.54862 +policy/LossBefore -0.0502436 +policy/dLoss 0.498376 +---------------------------------------- -------------- +2025-03-29 20:20:41 | [rl2_trainer] epoch #141 | Optimizing policy... +2025-03-29 20:20:41 | [rl2_trainer] epoch #141 | Fitting baseline... +2025-03-29 20:20:41 | [rl2_trainer] epoch #141 | Computing loss before +2025-03-29 20:20:41 | [rl2_trainer] epoch #141 | Computing KL before +2025-03-29 20:20:41 | [rl2_trainer] epoch #141 | Optimizing +2025-03-29 20:20:44 | [rl2_trainer] epoch #141 | Computing KL after +2025-03-29 20:20:44 | [rl2_trainer] epoch #141 | Computing loss after +2025-03-29 20:20:44 | [rl2_trainer] epoch #141 | Saving snapshot... +2025-03-29 20:20:44 | [rl2_trainer] epoch #141 | Saved +2025-03-29 20:20:44 | [rl2_trainer] epoch #141 | Time 2845.49 s +2025-03-29 20:20:44 | [rl2_trainer] epoch #141 | EpochTime 21.38 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -79.2789 +Average/AverageReturn -127.045 +Average/Iteration 141 +Average/MaxReturn -80.4707 +Average/MinReturn -189.214 +Average/NumEpisodes 8 +Average/StdReturn 38.9655 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.962637 +TotalEnvSteps 113600 +__unnamed_task__/AverageDiscountedReturn -79.2789 +__unnamed_task__/AverageReturn -127.045 +__unnamed_task__/Iteration 141 +__unnamed_task__/MaxReturn -80.4707 +__unnamed_task__/MinReturn -189.214 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 38.9655 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.39224 +policy/KL 0.0315558 +policy/KLBefore 0 +policy/LossAfter -0.535261 +policy/LossBefore -0.0441625 +policy/dLoss 0.491098 +---------------------------------------- -------------- +2025-03-29 20:21:03 | [rl2_trainer] epoch #142 | Optimizing policy... +2025-03-29 20:21:03 | [rl2_trainer] epoch #142 | Fitting baseline... +2025-03-29 20:21:03 | [rl2_trainer] epoch #142 | Computing loss before +2025-03-29 20:21:03 | [rl2_trainer] epoch #142 | Computing KL before +2025-03-29 20:21:03 | [rl2_trainer] epoch #142 | Optimizing +2025-03-29 20:21:05 | [rl2_trainer] epoch #142 | Computing KL after +2025-03-29 20:21:05 | [rl2_trainer] epoch #142 | Computing loss after +2025-03-29 20:21:06 | [rl2_trainer] epoch #142 | Saving snapshot... +2025-03-29 20:21:06 | [rl2_trainer] epoch #142 | Saved +2025-03-29 20:21:06 | [rl2_trainer] epoch #142 | Time 2867.04 s +2025-03-29 20:21:06 | [rl2_trainer] epoch #142 | EpochTime 21.54 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -76.0754 +Average/AverageReturn -122.82 +Average/Iteration 142 +Average/MaxReturn -69.6757 +Average/MinReturn -190.922 +Average/NumEpisodes 8 +Average/StdReturn 50.2453 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967344 +TotalEnvSteps 114400 +__unnamed_task__/AverageDiscountedReturn -76.0754 +__unnamed_task__/AverageReturn -122.82 +__unnamed_task__/Iteration 142 +__unnamed_task__/MaxReturn -69.6757 +__unnamed_task__/MinReturn -190.922 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 50.2453 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.38006 +policy/KL 0.0403801 +policy/KLBefore 0 +policy/LossAfter -0.567202 +policy/LossBefore 0.0180877 +policy/dLoss 0.58529 +---------------------------------------- -------------- +2025-03-29 20:21:23 | [rl2_trainer] epoch #143 | Optimizing policy... +2025-03-29 20:21:23 | [rl2_trainer] epoch #143 | Fitting baseline... +2025-03-29 20:21:23 | [rl2_trainer] epoch #143 | Computing loss before +2025-03-29 20:21:23 | [rl2_trainer] epoch #143 | Computing KL before +2025-03-29 20:21:23 | [rl2_trainer] epoch #143 | Optimizing +2025-03-29 20:21:26 | [rl2_trainer] epoch #143 | Computing KL after +2025-03-29 20:21:26 | [rl2_trainer] epoch #143 | Computing loss after +2025-03-29 20:21:26 | [rl2_trainer] epoch #143 | Saving snapshot... +2025-03-29 20:21:26 | [rl2_trainer] epoch #143 | Saved +2025-03-29 20:21:26 | [rl2_trainer] epoch #143 | Time 2887.26 s +2025-03-29 20:21:26 | [rl2_trainer] epoch #143 | EpochTime 20.22 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -80.4071 +Average/AverageReturn -125.068 +Average/Iteration 143 +Average/MaxReturn -67.1931 +Average/MinReturn -184.336 +Average/NumEpisodes 8 +Average/StdReturn 44.3014 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.976323 +TotalEnvSteps 115200 +__unnamed_task__/AverageDiscountedReturn -80.4071 +__unnamed_task__/AverageReturn -125.068 +__unnamed_task__/Iteration 143 +__unnamed_task__/MaxReturn -67.1931 +__unnamed_task__/MinReturn -184.336 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 44.3014 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.35451 +policy/KL 0.0294761 +policy/KLBefore 0 +policy/LossAfter -0.475422 +policy/LossBefore -0.00528055 +policy/dLoss 0.470141 +---------------------------------------- --------------- +2025-03-29 20:21:44 | [rl2_trainer] epoch #144 | Optimizing policy... +2025-03-29 20:21:44 | [rl2_trainer] epoch #144 | Fitting baseline... +2025-03-29 20:21:44 | [rl2_trainer] epoch #144 | Computing loss before +2025-03-29 20:21:44 | [rl2_trainer] epoch #144 | Computing KL before +2025-03-29 20:21:44 | [rl2_trainer] epoch #144 | Optimizing +2025-03-29 20:21:46 | [rl2_trainer] epoch #144 | Computing KL after +2025-03-29 20:21:46 | [rl2_trainer] epoch #144 | Computing loss after +2025-03-29 20:21:47 | [rl2_trainer] epoch #144 | Saving snapshot... +2025-03-29 20:21:47 | [rl2_trainer] epoch #144 | Saved +2025-03-29 20:21:47 | [rl2_trainer] epoch #144 | Time 2908.05 s +2025-03-29 20:21:47 | [rl2_trainer] epoch #144 | EpochTime 20.79 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -72.0981 +Average/AverageReturn -114.948 +Average/Iteration 144 +Average/MaxReturn -53.1919 +Average/MinReturn -188.467 +Average/NumEpisodes 8 +Average/StdReturn 43.7577 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.965145 +TotalEnvSteps 116000 +__unnamed_task__/AverageDiscountedReturn -72.0981 +__unnamed_task__/AverageReturn -114.948 +__unnamed_task__/Iteration 144 +__unnamed_task__/MaxReturn -53.1919 +__unnamed_task__/MinReturn -188.467 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 43.7577 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.33127 +policy/KL 0.0336387 +policy/KLBefore 0 +policy/LossAfter -0.453977 +policy/LossBefore 0.0175658 +policy/dLoss 0.471543 +---------------------------------------- -------------- +2025-03-29 20:22:05 | [rl2_trainer] epoch #145 | Optimizing policy... +2025-03-29 20:22:05 | [rl2_trainer] epoch #145 | Fitting baseline... +2025-03-29 20:22:05 | [rl2_trainer] epoch #145 | Computing loss before +2025-03-29 20:22:05 | [rl2_trainer] epoch #145 | Computing KL before +2025-03-29 20:22:05 | [rl2_trainer] epoch #145 | Optimizing +2025-03-29 20:22:08 | [rl2_trainer] epoch #145 | Computing KL after +2025-03-29 20:22:08 | [rl2_trainer] epoch #145 | Computing loss after +2025-03-29 20:22:08 | [rl2_trainer] epoch #145 | Saving snapshot... +2025-03-29 20:22:08 | [rl2_trainer] epoch #145 | Saved +2025-03-29 20:22:08 | [rl2_trainer] epoch #145 | Time 2929.94 s +2025-03-29 20:22:08 | [rl2_trainer] epoch #145 | EpochTime 21.89 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -87.0037 +Average/AverageReturn -136.976 +Average/Iteration 145 +Average/MaxReturn -69.6707 +Average/MinReturn -221.339 +Average/NumEpisodes 8 +Average/StdReturn 54.9773 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.971662 +TotalEnvSteps 116800 +__unnamed_task__/AverageDiscountedReturn -87.0037 +__unnamed_task__/AverageReturn -136.976 +__unnamed_task__/Iteration 145 +__unnamed_task__/MaxReturn -69.6707 +__unnamed_task__/MinReturn -221.339 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 54.9773 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.30832 +policy/KL 0.0322063 +policy/KLBefore 0 +policy/LossAfter -0.581376 +policy/LossBefore -0.00907335 +policy/dLoss 0.572302 +---------------------------------------- --------------- +2025-03-29 20:22:27 | [rl2_trainer] epoch #146 | Optimizing policy... +2025-03-29 20:22:27 | [rl2_trainer] epoch #146 | Fitting baseline... +2025-03-29 20:22:27 | [rl2_trainer] epoch #146 | Computing loss before +2025-03-29 20:22:27 | [rl2_trainer] epoch #146 | Computing KL before +2025-03-29 20:22:27 | [rl2_trainer] epoch #146 | Optimizing +2025-03-29 20:22:30 | [rl2_trainer] epoch #146 | Computing KL after +2025-03-29 20:22:30 | [rl2_trainer] epoch #146 | Computing loss after +2025-03-29 20:22:30 | [rl2_trainer] epoch #146 | Saving snapshot... +2025-03-29 20:22:30 | [rl2_trainer] epoch #146 | Saved +2025-03-29 20:22:30 | [rl2_trainer] epoch #146 | Time 2951.34 s +2025-03-29 20:22:30 | [rl2_trainer] epoch #146 | EpochTime 21.40 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -82.4849 +Average/AverageReturn -133.504 +Average/Iteration 146 +Average/MaxReturn -84.1122 +Average/MinReturn -184.902 +Average/NumEpisodes 8 +Average/StdReturn 39.6229 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.95466 +TotalEnvSteps 117600 +__unnamed_task__/AverageDiscountedReturn -82.4849 +__unnamed_task__/AverageReturn -133.504 +__unnamed_task__/Iteration 146 +__unnamed_task__/MaxReturn -84.1122 +__unnamed_task__/MinReturn -184.902 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 39.6229 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.28609 +policy/KL 0.0291061 +policy/KLBefore 0 +policy/LossAfter -0.595704 +policy/LossBefore -0.0117218 +policy/dLoss 0.583982 +---------------------------------------- -------------- +2025-03-29 20:22:49 | [rl2_trainer] epoch #147 | Optimizing policy... +2025-03-29 20:22:49 | [rl2_trainer] epoch #147 | Fitting baseline... +2025-03-29 20:22:49 | [rl2_trainer] epoch #147 | Computing loss before +2025-03-29 20:22:49 | [rl2_trainer] epoch #147 | Computing KL before +2025-03-29 20:22:49 | [rl2_trainer] epoch #147 | Optimizing +2025-03-29 20:22:52 | [rl2_trainer] epoch #147 | Computing KL after +2025-03-29 20:22:52 | [rl2_trainer] epoch #147 | Computing loss after +2025-03-29 20:22:52 | [rl2_trainer] epoch #147 | Saving snapshot... +2025-03-29 20:22:52 | [rl2_trainer] epoch #147 | Saved +2025-03-29 20:22:52 | [rl2_trainer] epoch #147 | Time 2973.54 s +2025-03-29 20:22:52 | [rl2_trainer] epoch #147 | EpochTime 22.19 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -79.1049 +Average/AverageReturn -127.202 +Average/Iteration 147 +Average/MaxReturn -66.4449 +Average/MinReturn -192.124 +Average/NumEpisodes 8 +Average/StdReturn 47.6212 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957414 +TotalEnvSteps 118400 +__unnamed_task__/AverageDiscountedReturn -79.1049 +__unnamed_task__/AverageReturn -127.202 +__unnamed_task__/Iteration 147 +__unnamed_task__/MaxReturn -66.4449 +__unnamed_task__/MinReturn -192.124 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 47.6212 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.30031 +policy/KL 0.0358708 +policy/KLBefore 0 +policy/LossAfter -0.638993 +policy/LossBefore -0.00973309 +policy/dLoss 0.62926 +---------------------------------------- --------------- +2025-03-29 20:23:10 | [rl2_trainer] epoch #148 | Optimizing policy... +2025-03-29 20:23:10 | [rl2_trainer] epoch #148 | Fitting baseline... +2025-03-29 20:23:10 | [rl2_trainer] epoch #148 | Computing loss before +2025-03-29 20:23:10 | [rl2_trainer] epoch #148 | Computing KL before +2025-03-29 20:23:11 | [rl2_trainer] epoch #148 | Optimizing +2025-03-29 20:23:14 | [rl2_trainer] epoch #148 | Computing KL after +2025-03-29 20:23:14 | [rl2_trainer] epoch #148 | Computing loss after +2025-03-29 20:23:14 | [rl2_trainer] epoch #148 | Saving snapshot... +2025-03-29 20:23:14 | [rl2_trainer] epoch #148 | Saved +2025-03-29 20:23:14 | [rl2_trainer] epoch #148 | Time 2995.22 s +2025-03-29 20:23:14 | [rl2_trainer] epoch #148 | EpochTime 21.68 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -84.6706 +Average/AverageReturn -135.228 +Average/Iteration 148 +Average/MaxReturn -80.7688 +Average/MinReturn -183.313 +Average/NumEpisodes 8 +Average/StdReturn 40.3121 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.944633 +TotalEnvSteps 119200 +__unnamed_task__/AverageDiscountedReturn -84.6706 +__unnamed_task__/AverageReturn -135.228 +__unnamed_task__/Iteration 148 +__unnamed_task__/MaxReturn -80.7688 +__unnamed_task__/MinReturn -183.313 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 40.3121 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.34045 +policy/KL 0.0366644 +policy/KLBefore 0 +policy/LossAfter -0.713858 +policy/LossBefore 0.00349064 +policy/dLoss 0.717349 +---------------------------------------- --------------- +2025-03-29 20:23:32 | [rl2_trainer] epoch #149 | Optimizing policy... +2025-03-29 20:23:32 | [rl2_trainer] epoch #149 | Fitting baseline... +2025-03-29 20:23:32 | [rl2_trainer] epoch #149 | Computing loss before +2025-03-29 20:23:32 | [rl2_trainer] epoch #149 | Computing KL before +2025-03-29 20:23:32 | [rl2_trainer] epoch #149 | Optimizing +2025-03-29 20:23:35 | [rl2_trainer] epoch #149 | Computing KL after +2025-03-29 20:23:35 | [rl2_trainer] epoch #149 | Computing loss after +2025-03-29 20:23:35 | [rl2_trainer] epoch #149 | Saving snapshot... +2025-03-29 20:23:35 | [rl2_trainer] epoch #149 | Saved +2025-03-29 20:23:35 | [rl2_trainer] epoch #149 | Time 3016.53 s +2025-03-29 20:23:35 | [rl2_trainer] epoch #149 | EpochTime 21.32 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -81.1372 +Average/AverageReturn -132.835 +Average/Iteration 149 +Average/MaxReturn -73.2314 +Average/MinReturn -199.381 +Average/NumEpisodes 8 +Average/StdReturn 44.5646 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.950558 +TotalEnvSteps 120000 +__unnamed_task__/AverageDiscountedReturn -81.1372 +__unnamed_task__/AverageReturn -132.835 +__unnamed_task__/Iteration 149 +__unnamed_task__/MaxReturn -73.2314 +__unnamed_task__/MinReturn -199.381 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 44.5646 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.31749 +policy/KL 0.0391456 +policy/KLBefore 0 +policy/LossAfter -0.736282 +policy/LossBefore -0.0567748 +policy/dLoss 0.679507 +---------------------------------------- -------------- +2025-03-29 20:23:53 | [rl2_trainer] epoch #150 | Optimizing policy... +2025-03-29 20:23:53 | [rl2_trainer] epoch #150 | Fitting baseline... +2025-03-29 20:23:53 | [rl2_trainer] epoch #150 | Computing loss before +2025-03-29 20:23:53 | [rl2_trainer] epoch #150 | Computing KL before +2025-03-29 20:23:53 | [rl2_trainer] epoch #150 | Optimizing +2025-03-29 20:23:55 | [rl2_trainer] epoch #150 | Computing KL after +2025-03-29 20:23:55 | [rl2_trainer] epoch #150 | Computing loss after +2025-03-29 20:23:55 | [rl2_trainer] epoch #150 | Saving snapshot... +2025-03-29 20:23:55 | [rl2_trainer] epoch #150 | Saved +2025-03-29 20:23:55 | [rl2_trainer] epoch #150 | Time 3036.81 s +2025-03-29 20:23:55 | [rl2_trainer] epoch #150 | EpochTime 20.27 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8062 +Average/AverageReturn -118.425 +Average/Iteration 150 +Average/MaxReturn 24.3791 +Average/MinReturn -185.802 +Average/NumEpisodes 8 +Average/StdReturn 66.3062 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967971 +TotalEnvSteps 120800 +__unnamed_task__/AverageDiscountedReturn -72.8062 +__unnamed_task__/AverageReturn -118.425 +__unnamed_task__/Iteration 150 +__unnamed_task__/MaxReturn 24.3791 +__unnamed_task__/MinReturn -185.802 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 66.3062 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.3176 +policy/KL 0.0527192 +policy/KLBefore 0 +policy/LossAfter -0.654572 +policy/LossBefore -0.0393092 +policy/dLoss 0.615263 +---------------------------------------- -------------- +2025-03-29 20:24:11 | [rl2_trainer] epoch #151 | Optimizing policy... +2025-03-29 20:24:11 | [rl2_trainer] epoch #151 | Fitting baseline... +2025-03-29 20:24:11 | [rl2_trainer] epoch #151 | Computing loss before +2025-03-29 20:24:11 | [rl2_trainer] epoch #151 | Computing KL before +2025-03-29 20:24:11 | [rl2_trainer] epoch #151 | Optimizing +2025-03-29 20:24:13 | [rl2_trainer] epoch #151 | Computing KL after +2025-03-29 20:24:13 | [rl2_trainer] epoch #151 | Computing loss after +2025-03-29 20:24:13 | [rl2_trainer] epoch #151 | Saving snapshot... +2025-03-29 20:24:13 | [rl2_trainer] epoch #151 | Saved +2025-03-29 20:24:13 | [rl2_trainer] epoch #151 | Time 3054.70 s +2025-03-29 20:24:13 | [rl2_trainer] epoch #151 | EpochTime 17.89 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -81.4215 +Average/AverageReturn -134.122 +Average/Iteration 151 +Average/MaxReturn -73.7902 +Average/MinReturn -195.46 +Average/NumEpisodes 8 +Average/StdReturn 49.0261 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967598 +TotalEnvSteps 121600 +__unnamed_task__/AverageDiscountedReturn -81.4215 +__unnamed_task__/AverageReturn -134.122 +__unnamed_task__/Iteration 151 +__unnamed_task__/MaxReturn -73.7902 +__unnamed_task__/MinReturn -195.46 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 49.0261 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.32614 +policy/KL 0.0371698 +policy/KLBefore 0 +policy/LossAfter -0.603103 +policy/LossBefore -0.0246284 +policy/dLoss 0.578474 +---------------------------------------- -------------- +2025-03-29 20:24:29 | [rl2_trainer] epoch #152 | Optimizing policy... +2025-03-29 20:24:29 | [rl2_trainer] epoch #152 | Fitting baseline... +2025-03-29 20:24:29 | [rl2_trainer] epoch #152 | Computing loss before +2025-03-29 20:24:29 | [rl2_trainer] epoch #152 | Computing KL before +2025-03-29 20:24:29 | [rl2_trainer] epoch #152 | Optimizing +2025-03-29 20:24:31 | [rl2_trainer] epoch #152 | Computing KL after +2025-03-29 20:24:31 | [rl2_trainer] epoch #152 | Computing loss after +2025-03-29 20:24:31 | [rl2_trainer] epoch #152 | Saving snapshot... +2025-03-29 20:24:31 | [rl2_trainer] epoch #152 | Saved +2025-03-29 20:24:31 | [rl2_trainer] epoch #152 | Time 3072.51 s +2025-03-29 20:24:31 | [rl2_trainer] epoch #152 | EpochTime 17.80 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -85.9592 +Average/AverageReturn -136.914 +Average/Iteration 152 +Average/MaxReturn -71.7799 +Average/MinReturn -202.86 +Average/NumEpisodes 8 +Average/StdReturn 56.8169 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.970216 +TotalEnvSteps 122400 +__unnamed_task__/AverageDiscountedReturn -85.9592 +__unnamed_task__/AverageReturn -136.914 +__unnamed_task__/Iteration 152 +__unnamed_task__/MaxReturn -71.7799 +__unnamed_task__/MinReturn -202.86 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 56.8169 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.28947 +policy/KL 0.0358345 +policy/KLBefore 0 +policy/LossAfter -0.700014 +policy/LossBefore -0.0484389 +policy/dLoss 0.651575 +---------------------------------------- -------------- +2025-03-29 20:24:46 | [rl2_trainer] epoch #153 | Optimizing policy... +2025-03-29 20:24:46 | [rl2_trainer] epoch #153 | Fitting baseline... +2025-03-29 20:24:46 | [rl2_trainer] epoch #153 | Computing loss before +2025-03-29 20:24:46 | [rl2_trainer] epoch #153 | Computing KL before +2025-03-29 20:24:47 | [rl2_trainer] epoch #153 | Optimizing +2025-03-29 20:24:49 | [rl2_trainer] epoch #153 | Computing KL after +2025-03-29 20:24:49 | [rl2_trainer] epoch #153 | Computing loss after +2025-03-29 20:24:49 | [rl2_trainer] epoch #153 | Saving snapshot... +2025-03-29 20:24:49 | [rl2_trainer] epoch #153 | Saved +2025-03-29 20:24:49 | [rl2_trainer] epoch #153 | Time 3090.31 s +2025-03-29 20:24:49 | [rl2_trainer] epoch #153 | EpochTime 17.80 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -84.6173 +Average/AverageReturn -136.337 +Average/Iteration 153 +Average/MaxReturn -74.4182 +Average/MinReturn -205.584 +Average/NumEpisodes 8 +Average/StdReturn 48.3257 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969842 +TotalEnvSteps 123200 +__unnamed_task__/AverageDiscountedReturn -84.6173 +__unnamed_task__/AverageReturn -136.337 +__unnamed_task__/Iteration 153 +__unnamed_task__/MaxReturn -74.4182 +__unnamed_task__/MinReturn -205.584 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 48.3257 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.27525 +policy/KL 0.0393862 +policy/KLBefore 0 +policy/LossAfter -0.476818 +policy/LossBefore 0.0325434 +policy/dLoss 0.509361 +---------------------------------------- -------------- +2025-03-29 20:25:05 | [rl2_trainer] epoch #154 | Optimizing policy... +2025-03-29 20:25:05 | [rl2_trainer] epoch #154 | Fitting baseline... +2025-03-29 20:25:05 | [rl2_trainer] epoch #154 | Computing loss before +2025-03-29 20:25:05 | [rl2_trainer] epoch #154 | Computing KL before +2025-03-29 20:25:05 | [rl2_trainer] epoch #154 | Optimizing +2025-03-29 20:25:07 | [rl2_trainer] epoch #154 | Computing KL after +2025-03-29 20:25:07 | [rl2_trainer] epoch #154 | Computing loss after +2025-03-29 20:25:07 | [rl2_trainer] epoch #154 | Saving snapshot... +2025-03-29 20:25:07 | [rl2_trainer] epoch #154 | Saved +2025-03-29 20:25:07 | [rl2_trainer] epoch #154 | Time 3108.94 s +2025-03-29 20:25:07 | [rl2_trainer] epoch #154 | EpochTime 18.63 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -82.0051 +Average/AverageReturn -135.627 +Average/Iteration 154 +Average/MaxReturn -71.1609 +Average/MinReturn -209.96 +Average/NumEpisodes 8 +Average/StdReturn 57.1719 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978546 +TotalEnvSteps 124000 +__unnamed_task__/AverageDiscountedReturn -82.0051 +__unnamed_task__/AverageReturn -135.627 +__unnamed_task__/Iteration 154 +__unnamed_task__/MaxReturn -71.1609 +__unnamed_task__/MinReturn -209.96 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 57.1719 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.28494 +policy/KL 0.0516272 +policy/KLBefore 0 +policy/LossAfter -0.619674 +policy/LossBefore -0.023982 +policy/dLoss 0.595692 +---------------------------------------- -------------- +2025-03-29 20:25:23 | [rl2_trainer] epoch #155 | Optimizing policy... +2025-03-29 20:25:23 | [rl2_trainer] epoch #155 | Fitting baseline... +2025-03-29 20:25:23 | [rl2_trainer] epoch #155 | Computing loss before +2025-03-29 20:25:23 | [rl2_trainer] epoch #155 | Computing KL before +2025-03-29 20:25:24 | [rl2_trainer] epoch #155 | Optimizing +2025-03-29 20:25:26 | [rl2_trainer] epoch #155 | Computing KL after +2025-03-29 20:25:26 | [rl2_trainer] epoch #155 | Computing loss after +2025-03-29 20:25:26 | [rl2_trainer] epoch #155 | Saving snapshot... +2025-03-29 20:25:26 | [rl2_trainer] epoch #155 | Saved +2025-03-29 20:25:26 | [rl2_trainer] epoch #155 | Time 3127.30 s +2025-03-29 20:25:26 | [rl2_trainer] epoch #155 | EpochTime 18.35 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -86.478 +Average/AverageReturn -141.77 +Average/Iteration 155 +Average/MaxReturn -88.3096 +Average/MinReturn -212.318 +Average/NumEpisodes 8 +Average/StdReturn 49.8863 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.959782 +TotalEnvSteps 124800 +__unnamed_task__/AverageDiscountedReturn -86.478 +__unnamed_task__/AverageReturn -141.77 +__unnamed_task__/Iteration 155 +__unnamed_task__/MaxReturn -88.3096 +__unnamed_task__/MinReturn -212.318 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 49.8863 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.30304 +policy/KL 0.0371387 +policy/KLBefore 0 +policy/LossAfter -0.7125 +policy/LossBefore -0.0204732 +policy/dLoss 0.692027 +---------------------------------------- -------------- +2025-03-29 20:25:42 | [rl2_trainer] epoch #156 | Optimizing policy... +2025-03-29 20:25:42 | [rl2_trainer] epoch #156 | Fitting baseline... +2025-03-29 20:25:42 | [rl2_trainer] epoch #156 | Computing loss before +2025-03-29 20:25:42 | [rl2_trainer] epoch #156 | Computing KL before +2025-03-29 20:25:42 | [rl2_trainer] epoch #156 | Optimizing +2025-03-29 20:25:44 | [rl2_trainer] epoch #156 | Computing KL after +2025-03-29 20:25:44 | [rl2_trainer] epoch #156 | Computing loss after +2025-03-29 20:25:44 | [rl2_trainer] epoch #156 | Saving snapshot... +2025-03-29 20:25:44 | [rl2_trainer] epoch #156 | Saved +2025-03-29 20:25:44 | [rl2_trainer] epoch #156 | Time 3145.95 s +2025-03-29 20:25:44 | [rl2_trainer] epoch #156 | EpochTime 18.65 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -84.2447 +Average/AverageReturn -142.058 +Average/Iteration 156 +Average/MaxReturn -85.9525 +Average/MinReturn -210.924 +Average/NumEpisodes 8 +Average/StdReturn 49.2476 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.964888 +TotalEnvSteps 125600 +__unnamed_task__/AverageDiscountedReturn -84.2447 +__unnamed_task__/AverageReturn -142.058 +__unnamed_task__/Iteration 156 +__unnamed_task__/MaxReturn -85.9525 +__unnamed_task__/MinReturn -210.924 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 49.2476 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.3112 +policy/KL 0.0420905 +policy/KLBefore 0 +policy/LossAfter -0.614361 +policy/LossBefore 0.00129333 +policy/dLoss 0.615655 +---------------------------------------- --------------- +2025-03-29 20:26:00 | [rl2_trainer] epoch #157 | Optimizing policy... +2025-03-29 20:26:00 | [rl2_trainer] epoch #157 | Fitting baseline... +2025-03-29 20:26:00 | [rl2_trainer] epoch #157 | Computing loss before +2025-03-29 20:26:00 | [rl2_trainer] epoch #157 | Computing KL before +2025-03-29 20:26:01 | [rl2_trainer] epoch #157 | Optimizing +2025-03-29 20:26:03 | [rl2_trainer] epoch #157 | Computing KL after +2025-03-29 20:26:03 | [rl2_trainer] epoch #157 | Computing loss after +2025-03-29 20:26:03 | [rl2_trainer] epoch #157 | Saving snapshot... +2025-03-29 20:26:03 | [rl2_trainer] epoch #157 | Saved +2025-03-29 20:26:03 | [rl2_trainer] epoch #157 | Time 3164.35 s +2025-03-29 20:26:03 | [rl2_trainer] epoch #157 | EpochTime 18.40 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -77.3062 +Average/AverageReturn -126.067 +Average/Iteration 157 +Average/MaxReturn -88.909 +Average/MinReturn -202.136 +Average/NumEpisodes 8 +Average/StdReturn 42.4511 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.928529 +TotalEnvSteps 126400 +__unnamed_task__/AverageDiscountedReturn -77.3062 +__unnamed_task__/AverageReturn -126.067 +__unnamed_task__/Iteration 157 +__unnamed_task__/MaxReturn -88.909 +__unnamed_task__/MinReturn -202.136 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 42.4511 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.31647 +policy/KL 0.0300727 +policy/KLBefore 0 +policy/LossAfter -0.612092 +policy/LossBefore 0.00753147 +policy/dLoss 0.619624 +---------------------------------------- --------------- +2025-03-29 20:26:20 | [rl2_trainer] epoch #158 | Optimizing policy... +2025-03-29 20:26:20 | [rl2_trainer] epoch #158 | Fitting baseline... +2025-03-29 20:26:20 | [rl2_trainer] epoch #158 | Computing loss before +2025-03-29 20:26:21 | [rl2_trainer] epoch #158 | Computing KL before +2025-03-29 20:26:21 | [rl2_trainer] epoch #158 | Optimizing +2025-03-29 20:26:23 | [rl2_trainer] epoch #158 | Computing KL after +2025-03-29 20:26:23 | [rl2_trainer] epoch #158 | Computing loss after +2025-03-29 20:26:23 | [rl2_trainer] epoch #158 | Saving snapshot... +2025-03-29 20:26:23 | [rl2_trainer] epoch #158 | Saved +2025-03-29 20:26:23 | [rl2_trainer] epoch #158 | Time 3184.81 s +2025-03-29 20:26:23 | [rl2_trainer] epoch #158 | EpochTime 20.46 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -76.4247 +Average/AverageReturn -123.992 +Average/Iteration 158 +Average/MaxReturn -59.7162 +Average/MinReturn -198.859 +Average/NumEpisodes 8 +Average/StdReturn 49.6341 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.927853 +TotalEnvSteps 127200 +__unnamed_task__/AverageDiscountedReturn -76.4247 +__unnamed_task__/AverageReturn -123.992 +__unnamed_task__/Iteration 158 +__unnamed_task__/MaxReturn -59.7162 +__unnamed_task__/MinReturn -198.859 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 49.6341 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.29304 +policy/KL 0.0392132 +policy/KLBefore 0 +policy/LossAfter -0.593953 +policy/LossBefore 0.00247734 +policy/dLoss 0.59643 +---------------------------------------- --------------- +2025-03-29 20:26:42 | [rl2_trainer] epoch #159 | Optimizing policy... +2025-03-29 20:26:42 | [rl2_trainer] epoch #159 | Fitting baseline... +2025-03-29 20:26:42 | [rl2_trainer] epoch #159 | Computing loss before +2025-03-29 20:26:42 | [rl2_trainer] epoch #159 | Computing KL before +2025-03-29 20:26:42 | [rl2_trainer] epoch #159 | Optimizing +2025-03-29 20:26:44 | [rl2_trainer] epoch #159 | Computing KL after +2025-03-29 20:26:45 | [rl2_trainer] epoch #159 | Computing loss after +2025-03-29 20:26:45 | [rl2_trainer] epoch #159 | Saving snapshot... +2025-03-29 20:26:45 | [rl2_trainer] epoch #159 | Saved +2025-03-29 20:26:45 | [rl2_trainer] epoch #159 | Time 3206.05 s +2025-03-29 20:26:45 | [rl2_trainer] epoch #159 | EpochTime 21.24 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -76.0314 +Average/AverageReturn -126.854 +Average/Iteration 159 +Average/MaxReturn -79.0648 +Average/MinReturn -186.531 +Average/NumEpisodes 8 +Average/StdReturn 37.07 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.97653 +TotalEnvSteps 128000 +__unnamed_task__/AverageDiscountedReturn -76.0314 +__unnamed_task__/AverageReturn -126.854 +__unnamed_task__/Iteration 159 +__unnamed_task__/MaxReturn -79.0648 +__unnamed_task__/MinReturn -186.531 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.07 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.27188 +policy/KL 0.0227305 +policy/KLBefore 0 +policy/LossAfter -0.329753 +policy/LossBefore -0.0172548 +policy/dLoss 0.312498 +---------------------------------------- -------------- +2025-03-29 20:27:01 | [rl2_trainer] epoch #160 | Optimizing policy... +2025-03-29 20:27:01 | [rl2_trainer] epoch #160 | Fitting baseline... +2025-03-29 20:27:01 | [rl2_trainer] epoch #160 | Computing loss before +2025-03-29 20:27:01 | [rl2_trainer] epoch #160 | Computing KL before +2025-03-29 20:27:01 | [rl2_trainer] epoch #160 | Optimizing +2025-03-29 20:27:03 | [rl2_trainer] epoch #160 | Computing KL after +2025-03-29 20:27:03 | [rl2_trainer] epoch #160 | Computing loss after +2025-03-29 20:27:04 | [rl2_trainer] epoch #160 | Saving snapshot... +2025-03-29 20:27:04 | [rl2_trainer] epoch #160 | Saved +2025-03-29 20:27:04 | [rl2_trainer] epoch #160 | Time 3225.00 s +2025-03-29 20:27:04 | [rl2_trainer] epoch #160 | EpochTime 18.94 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -79.7587 +Average/AverageReturn -129.451 +Average/Iteration 160 +Average/MaxReturn -91.3938 +Average/MinReturn -191.161 +Average/NumEpisodes 8 +Average/StdReturn 37.65 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.945157 +TotalEnvSteps 128800 +__unnamed_task__/AverageDiscountedReturn -79.7587 +__unnamed_task__/AverageReturn -129.451 +__unnamed_task__/Iteration 160 +__unnamed_task__/MaxReturn -91.3938 +__unnamed_task__/MinReturn -191.161 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.65 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.23987 +policy/KL 0.0229402 +policy/KLBefore 0 +policy/LossAfter -0.554716 +policy/LossBefore -0.0178036 +policy/dLoss 0.536913 +---------------------------------------- -------------- +2025-03-29 20:27:20 | [rl2_trainer] epoch #161 | Optimizing policy... +2025-03-29 20:27:20 | [rl2_trainer] epoch #161 | Fitting baseline... +2025-03-29 20:27:20 | [rl2_trainer] epoch #161 | Computing loss before +2025-03-29 20:27:20 | [rl2_trainer] epoch #161 | Computing KL before +2025-03-29 20:27:20 | [rl2_trainer] epoch #161 | Optimizing +2025-03-29 20:27:23 | [rl2_trainer] epoch #161 | Computing KL after +2025-03-29 20:27:23 | [rl2_trainer] epoch #161 | Computing loss after +2025-03-29 20:27:23 | [rl2_trainer] epoch #161 | Saving snapshot... +2025-03-29 20:27:23 | [rl2_trainer] epoch #161 | Saved +2025-03-29 20:27:23 | [rl2_trainer] epoch #161 | Time 3244.18 s +2025-03-29 20:27:23 | [rl2_trainer] epoch #161 | EpochTime 19.18 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -80.1414 +Average/AverageReturn -135.255 +Average/Iteration 161 +Average/MaxReturn -80.0628 +Average/MinReturn -185.556 +Average/NumEpisodes 8 +Average/StdReturn 44.7229 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.962745 +TotalEnvSteps 129600 +__unnamed_task__/AverageDiscountedReturn -80.1414 +__unnamed_task__/AverageReturn -135.255 +__unnamed_task__/Iteration 161 +__unnamed_task__/MaxReturn -80.0628 +__unnamed_task__/MinReturn -185.556 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 44.7229 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.18591 +policy/KL 0.0260076 +policy/KLBefore 0 +policy/LossAfter -0.529642 +policy/LossBefore -0.0468398 +policy/dLoss 0.482802 +---------------------------------------- -------------- +2025-03-29 20:27:41 | [rl2_trainer] epoch #162 | Optimizing policy... +2025-03-29 20:27:41 | [rl2_trainer] epoch #162 | Fitting baseline... +2025-03-29 20:27:41 | [rl2_trainer] epoch #162 | Computing loss before +2025-03-29 20:27:41 | [rl2_trainer] epoch #162 | Computing KL before +2025-03-29 20:27:41 | [rl2_trainer] epoch #162 | Optimizing +2025-03-29 20:27:43 | [rl2_trainer] epoch #162 | Computing KL after +2025-03-29 20:27:43 | [rl2_trainer] epoch #162 | Computing loss after +2025-03-29 20:27:44 | [rl2_trainer] epoch #162 | Saving snapshot... +2025-03-29 20:27:44 | [rl2_trainer] epoch #162 | Saved +2025-03-29 20:27:44 | [rl2_trainer] epoch #162 | Time 3265.02 s +2025-03-29 20:27:44 | [rl2_trainer] epoch #162 | EpochTime 20.84 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -82.5016 +Average/AverageReturn -138.653 +Average/Iteration 162 +Average/MaxReturn -91.8014 +Average/MinReturn -213.934 +Average/NumEpisodes 8 +Average/StdReturn 48.9458 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.954141 +TotalEnvSteps 130400 +__unnamed_task__/AverageDiscountedReturn -82.5016 +__unnamed_task__/AverageReturn -138.653 +__unnamed_task__/Iteration 162 +__unnamed_task__/MaxReturn -91.8014 +__unnamed_task__/MinReturn -213.934 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 48.9458 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.18697 +policy/KL 0.0287683 +policy/KLBefore 0 +policy/LossAfter -0.573599 +policy/LossBefore 0.0339288 +policy/dLoss 0.607528 +---------------------------------------- -------------- +2025-03-29 20:28:02 | [rl2_trainer] epoch #163 | Optimizing policy... +2025-03-29 20:28:02 | [rl2_trainer] epoch #163 | Fitting baseline... +2025-03-29 20:28:02 | [rl2_trainer] epoch #163 | Computing loss before +2025-03-29 20:28:02 | [rl2_trainer] epoch #163 | Computing KL before +2025-03-29 20:28:02 | [rl2_trainer] epoch #163 | Optimizing +2025-03-29 20:28:04 | [rl2_trainer] epoch #163 | Computing KL after +2025-03-29 20:28:04 | [rl2_trainer] epoch #163 | Computing loss after +2025-03-29 20:28:04 | [rl2_trainer] epoch #163 | Saving snapshot... +2025-03-29 20:28:04 | [rl2_trainer] epoch #163 | Saved +2025-03-29 20:28:04 | [rl2_trainer] epoch #163 | Time 3285.93 s +2025-03-29 20:28:04 | [rl2_trainer] epoch #163 | EpochTime 20.91 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -77.7751 +Average/AverageReturn -129.558 +Average/Iteration 163 +Average/MaxReturn -62.1082 +Average/MinReturn -204.098 +Average/NumEpisodes 8 +Average/StdReturn 50.0081 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.952996 +TotalEnvSteps 131200 +__unnamed_task__/AverageDiscountedReturn -77.7751 +__unnamed_task__/AverageReturn -129.558 +__unnamed_task__/Iteration 163 +__unnamed_task__/MaxReturn -62.1082 +__unnamed_task__/MinReturn -204.098 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 50.0081 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.18762 +policy/KL 0.0255648 +policy/KLBefore 0 +policy/LossAfter -0.664709 +policy/LossBefore -0.0905495 +policy/dLoss 0.57416 +---------------------------------------- -------------- +2025-03-29 20:28:21 | [rl2_trainer] epoch #164 | Optimizing policy... +2025-03-29 20:28:21 | [rl2_trainer] epoch #164 | Fitting baseline... +2025-03-29 20:28:21 | [rl2_trainer] epoch #164 | Computing loss before +2025-03-29 20:28:21 | [rl2_trainer] epoch #164 | Computing KL before +2025-03-29 20:28:21 | [rl2_trainer] epoch #164 | Optimizing +2025-03-29 20:28:23 | [rl2_trainer] epoch #164 | Computing KL after +2025-03-29 20:28:23 | [rl2_trainer] epoch #164 | Computing loss after +2025-03-29 20:28:23 | [rl2_trainer] epoch #164 | Saving snapshot... +2025-03-29 20:28:23 | [rl2_trainer] epoch #164 | Saved +2025-03-29 20:28:23 | [rl2_trainer] epoch #164 | Time 3304.76 s +2025-03-29 20:28:23 | [rl2_trainer] epoch #164 | EpochTime 18.82 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -77.1059 +Average/AverageReturn -125.528 +Average/Iteration 164 +Average/MaxReturn -85.9175 +Average/MinReturn -177.375 +Average/NumEpisodes 8 +Average/StdReturn 34.2478 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.926967 +TotalEnvSteps 132000 +__unnamed_task__/AverageDiscountedReturn -77.1059 +__unnamed_task__/AverageReturn -125.528 +__unnamed_task__/Iteration 164 +__unnamed_task__/MaxReturn -85.9175 +__unnamed_task__/MinReturn -177.375 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.2478 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.15985 +policy/KL 0.0286517 +policy/KLBefore 0 +policy/LossAfter -0.568187 +policy/LossBefore 0.041007 +policy/dLoss 0.609194 +---------------------------------------- -------------- +2025-03-29 20:28:41 | [rl2_trainer] epoch #165 | Optimizing policy... +2025-03-29 20:28:41 | [rl2_trainer] epoch #165 | Fitting baseline... +2025-03-29 20:28:41 | [rl2_trainer] epoch #165 | Computing loss before +2025-03-29 20:28:41 | [rl2_trainer] epoch #165 | Computing KL before +2025-03-29 20:28:41 | [rl2_trainer] epoch #165 | Optimizing +2025-03-29 20:28:43 | [rl2_trainer] epoch #165 | Computing KL after +2025-03-29 20:28:43 | [rl2_trainer] epoch #165 | Computing loss after +2025-03-29 20:28:43 | [rl2_trainer] epoch #165 | Saving snapshot... +2025-03-29 20:28:43 | [rl2_trainer] epoch #165 | Saved +2025-03-29 20:28:43 | [rl2_trainer] epoch #165 | Time 3324.68 s +2025-03-29 20:28:43 | [rl2_trainer] epoch #165 | EpochTime 19.92 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -75.9722 +Average/AverageReturn -124.164 +Average/Iteration 165 +Average/MaxReturn -90.4275 +Average/MinReturn -202.414 +Average/NumEpisodes 8 +Average/StdReturn 43.247 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.958213 +TotalEnvSteps 132800 +__unnamed_task__/AverageDiscountedReturn -75.9722 +__unnamed_task__/AverageReturn -124.164 +__unnamed_task__/Iteration 165 +__unnamed_task__/MaxReturn -90.4275 +__unnamed_task__/MinReturn -202.414 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 43.247 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.13355 +policy/KL 0.0213311 +policy/KLBefore 0 +policy/LossAfter -0.4561 +policy/LossBefore -0.00257637 +policy/dLoss 0.453524 +---------------------------------------- --------------- +2025-03-29 20:28:59 | [rl2_trainer] epoch #166 | Optimizing policy... +2025-03-29 20:28:59 | [rl2_trainer] epoch #166 | Fitting baseline... +2025-03-29 20:28:59 | [rl2_trainer] epoch #166 | Computing loss before +2025-03-29 20:28:59 | [rl2_trainer] epoch #166 | Computing KL before +2025-03-29 20:28:59 | [rl2_trainer] epoch #166 | Optimizing +2025-03-29 20:29:01 | [rl2_trainer] epoch #166 | Computing KL after +2025-03-29 20:29:01 | [rl2_trainer] epoch #166 | Computing loss after +2025-03-29 20:29:01 | [rl2_trainer] epoch #166 | Saving snapshot... +2025-03-29 20:29:01 | [rl2_trainer] epoch #166 | Saved +2025-03-29 20:29:01 | [rl2_trainer] epoch #166 | Time 3342.75 s +2025-03-29 20:29:01 | [rl2_trainer] epoch #166 | EpochTime 18.07 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -76.1671 +Average/AverageReturn -126.97 +Average/Iteration 166 +Average/MaxReturn -77.3092 +Average/MinReturn -180.765 +Average/NumEpisodes 8 +Average/StdReturn 39.3416 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.953033 +TotalEnvSteps 133600 +__unnamed_task__/AverageDiscountedReturn -76.1671 +__unnamed_task__/AverageReturn -126.97 +__unnamed_task__/Iteration 166 +__unnamed_task__/MaxReturn -77.3092 +__unnamed_task__/MinReturn -180.765 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 39.3416 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.11536 +policy/KL 0.023623 +policy/KLBefore 0 +policy/LossAfter -0.522515 +policy/LossBefore -0.0117423 +policy/dLoss 0.510773 +---------------------------------------- -------------- +2025-03-29 20:29:17 | [rl2_trainer] epoch #167 | Optimizing policy... +2025-03-29 20:29:17 | [rl2_trainer] epoch #167 | Fitting baseline... +2025-03-29 20:29:17 | [rl2_trainer] epoch #167 | Computing loss before +2025-03-29 20:29:17 | [rl2_trainer] epoch #167 | Computing KL before +2025-03-29 20:29:18 | [rl2_trainer] epoch #167 | Optimizing +2025-03-29 20:29:20 | [rl2_trainer] epoch #167 | Computing KL after +2025-03-29 20:29:20 | [rl2_trainer] epoch #167 | Computing loss after +2025-03-29 20:29:20 | [rl2_trainer] epoch #167 | Saving snapshot... +2025-03-29 20:29:20 | [rl2_trainer] epoch #167 | Saved +2025-03-29 20:29:20 | [rl2_trainer] epoch #167 | Time 3361.09 s +2025-03-29 20:29:20 | [rl2_trainer] epoch #167 | EpochTime 18.34 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -75.2515 +Average/AverageReturn -122.334 +Average/Iteration 167 +Average/MaxReturn -82.5781 +Average/MinReturn -194.664 +Average/NumEpisodes 8 +Average/StdReturn 39.7864 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.952488 +TotalEnvSteps 134400 +__unnamed_task__/AverageDiscountedReturn -75.2515 +__unnamed_task__/AverageReturn -122.334 +__unnamed_task__/Iteration 167 +__unnamed_task__/MaxReturn -82.5781 +__unnamed_task__/MinReturn -194.664 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 39.7864 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.0981 +policy/KL 0.0192674 +policy/KLBefore 0 +policy/LossAfter -0.423878 +policy/LossBefore -0.012745 +policy/dLoss 0.411133 +---------------------------------------- -------------- +2025-03-29 20:29:35 | [rl2_trainer] epoch #168 | Optimizing policy... +2025-03-29 20:29:35 | [rl2_trainer] epoch #168 | Fitting baseline... +2025-03-29 20:29:35 | [rl2_trainer] epoch #168 | Computing loss before +2025-03-29 20:29:35 | [rl2_trainer] epoch #168 | Computing KL before +2025-03-29 20:29:35 | [rl2_trainer] epoch #168 | Optimizing +2025-03-29 20:29:37 | [rl2_trainer] epoch #168 | Computing KL after +2025-03-29 20:29:38 | [rl2_trainer] epoch #168 | Computing loss after +2025-03-29 20:29:38 | [rl2_trainer] epoch #168 | Saving snapshot... +2025-03-29 20:29:38 | [rl2_trainer] epoch #168 | Saved +2025-03-29 20:29:38 | [rl2_trainer] epoch #168 | Time 3379.07 s +2025-03-29 20:29:38 | [rl2_trainer] epoch #168 | EpochTime 17.97 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -75.0397 +Average/AverageReturn -121.689 +Average/Iteration 168 +Average/MaxReturn -80.8681 +Average/MinReturn -159.078 +Average/NumEpisodes 8 +Average/StdReturn 28.6409 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978532 +TotalEnvSteps 135200 +__unnamed_task__/AverageDiscountedReturn -75.0397 +__unnamed_task__/AverageReturn -121.689 +__unnamed_task__/Iteration 168 +__unnamed_task__/MaxReturn -80.8681 +__unnamed_task__/MinReturn -159.078 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.6409 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.0845 +policy/KL 0.0231377 +policy/KLBefore 0 +policy/LossAfter -0.245158 +policy/LossBefore 0.0381137 +policy/dLoss 0.283272 +---------------------------------------- -------------- +2025-03-29 20:29:54 | [rl2_trainer] epoch #169 | Optimizing policy... +2025-03-29 20:29:54 | [rl2_trainer] epoch #169 | Fitting baseline... +2025-03-29 20:29:54 | [rl2_trainer] epoch #169 | Computing loss before +2025-03-29 20:29:54 | [rl2_trainer] epoch #169 | Computing KL before +2025-03-29 20:29:54 | [rl2_trainer] epoch #169 | Optimizing +2025-03-29 20:29:56 | [rl2_trainer] epoch #169 | Computing KL after +2025-03-29 20:29:56 | [rl2_trainer] epoch #169 | Computing loss after +2025-03-29 20:29:56 | [rl2_trainer] epoch #169 | Saving snapshot... +2025-03-29 20:29:56 | [rl2_trainer] epoch #169 | Saved +2025-03-29 20:29:56 | [rl2_trainer] epoch #169 | Time 3397.81 s +2025-03-29 20:29:56 | [rl2_trainer] epoch #169 | EpochTime 18.74 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -70.2189 +Average/AverageReturn -113.143 +Average/Iteration 169 +Average/MaxReturn -72.4821 +Average/MinReturn -177.534 +Average/NumEpisodes 8 +Average/StdReturn 30.0076 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.955783 +TotalEnvSteps 136000 +__unnamed_task__/AverageDiscountedReturn -70.2189 +__unnamed_task__/AverageReturn -113.143 +__unnamed_task__/Iteration 169 +__unnamed_task__/MaxReturn -72.4821 +__unnamed_task__/MinReturn -177.534 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.0076 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.08484 +policy/KL 0.0284216 +policy/KLBefore 0 +policy/LossAfter -0.382262 +policy/LossBefore 0.0145877 +policy/dLoss 0.396849 +---------------------------------------- -------------- +2025-03-29 20:30:12 | [rl2_trainer] epoch #170 | Optimizing policy... +2025-03-29 20:30:12 | [rl2_trainer] epoch #170 | Fitting baseline... +2025-03-29 20:30:12 | [rl2_trainer] epoch #170 | Computing loss before +2025-03-29 20:30:12 | [rl2_trainer] epoch #170 | Computing KL before +2025-03-29 20:30:12 | [rl2_trainer] epoch #170 | Optimizing +2025-03-29 20:30:15 | [rl2_trainer] epoch #170 | Computing KL after +2025-03-29 20:30:15 | [rl2_trainer] epoch #170 | Computing loss after +2025-03-29 20:30:15 | [rl2_trainer] epoch #170 | Saving snapshot... +2025-03-29 20:30:15 | [rl2_trainer] epoch #170 | Saved +2025-03-29 20:30:15 | [rl2_trainer] epoch #170 | Time 3416.17 s +2025-03-29 20:30:15 | [rl2_trainer] epoch #170 | EpochTime 18.35 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -70.9797 +Average/AverageReturn -114.671 +Average/Iteration 170 +Average/MaxReturn -66.0465 +Average/MinReturn -172.43 +Average/NumEpisodes 8 +Average/StdReturn 36.7935 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96253 +TotalEnvSteps 136800 +__unnamed_task__/AverageDiscountedReturn -70.9797 +__unnamed_task__/AverageReturn -114.671 +__unnamed_task__/Iteration 170 +__unnamed_task__/MaxReturn -66.0465 +__unnamed_task__/MinReturn -172.43 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.7935 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.07201 +policy/KL 0.0342155 +policy/KLBefore 0 +policy/LossAfter -0.334673 +policy/LossBefore 0.0675656 +policy/dLoss 0.402238 +---------------------------------------- -------------- +2025-03-29 20:30:31 | [rl2_trainer] epoch #171 | Optimizing policy... +2025-03-29 20:30:31 | [rl2_trainer] epoch #171 | Fitting baseline... +2025-03-29 20:30:31 | [rl2_trainer] epoch #171 | Computing loss before +2025-03-29 20:30:31 | [rl2_trainer] epoch #171 | Computing KL before +2025-03-29 20:30:31 | [rl2_trainer] epoch #171 | Optimizing +2025-03-29 20:30:33 | [rl2_trainer] epoch #171 | Computing KL after +2025-03-29 20:30:33 | [rl2_trainer] epoch #171 | Computing loss after +2025-03-29 20:30:33 | [rl2_trainer] epoch #171 | Saving snapshot... +2025-03-29 20:30:33 | [rl2_trainer] epoch #171 | Saved +2025-03-29 20:30:33 | [rl2_trainer] epoch #171 | Time 3434.66 s +2025-03-29 20:30:33 | [rl2_trainer] epoch #171 | EpochTime 18.50 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -78.457 +Average/AverageReturn -131.089 +Average/Iteration 171 +Average/MaxReturn -98.2035 +Average/MinReturn -203.908 +Average/NumEpisodes 8 +Average/StdReturn 37.062 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967041 +TotalEnvSteps 137600 +__unnamed_task__/AverageDiscountedReturn -78.457 +__unnamed_task__/AverageReturn -131.089 +__unnamed_task__/Iteration 171 +__unnamed_task__/MaxReturn -98.2035 +__unnamed_task__/MinReturn -203.908 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.062 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.05898 +policy/KL 0.0251494 +policy/KLBefore 0 +policy/LossAfter -0.396078 +policy/LossBefore -0.0625788 +policy/dLoss 0.3335 +---------------------------------------- -------------- +2025-03-29 20:30:50 | [rl2_trainer] epoch #172 | Optimizing policy... +2025-03-29 20:30:50 | [rl2_trainer] epoch #172 | Fitting baseline... +2025-03-29 20:30:50 | [rl2_trainer] epoch #172 | Computing loss before +2025-03-29 20:30:50 | [rl2_trainer] epoch #172 | Computing KL before +2025-03-29 20:30:50 | [rl2_trainer] epoch #172 | Optimizing +2025-03-29 20:30:52 | [rl2_trainer] epoch #172 | Computing KL after +2025-03-29 20:30:52 | [rl2_trainer] epoch #172 | Computing loss after +2025-03-29 20:30:52 | [rl2_trainer] epoch #172 | Saving snapshot... +2025-03-29 20:30:52 | [rl2_trainer] epoch #172 | Saved +2025-03-29 20:30:52 | [rl2_trainer] epoch #172 | Time 3453.83 s +2025-03-29 20:30:52 | [rl2_trainer] epoch #172 | EpochTime 19.17 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.8223 +Average/AverageReturn -95.6565 +Average/Iteration 172 +Average/MaxReturn -55.8631 +Average/MinReturn -172.661 +Average/NumEpisodes 8 +Average/StdReturn 33.921 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.932363 +TotalEnvSteps 138400 +__unnamed_task__/AverageDiscountedReturn -63.8223 +__unnamed_task__/AverageReturn -95.6565 +__unnamed_task__/Iteration 172 +__unnamed_task__/MaxReturn -55.8631 +__unnamed_task__/MinReturn -172.661 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.921 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.04214 +policy/KL 0.0215694 +policy/KLBefore 0 +policy/LossAfter -0.379742 +policy/LossBefore -0.00610859 +policy/dLoss 0.373634 +---------------------------------------- --------------- +2025-03-29 20:31:09 | [rl2_trainer] epoch #173 | Optimizing policy... +2025-03-29 20:31:09 | [rl2_trainer] epoch #173 | Fitting baseline... +2025-03-29 20:31:09 | [rl2_trainer] epoch #173 | Computing loss before +2025-03-29 20:31:09 | [rl2_trainer] epoch #173 | Computing KL before +2025-03-29 20:31:09 | [rl2_trainer] epoch #173 | Optimizing +2025-03-29 20:31:11 | [rl2_trainer] epoch #173 | Computing KL after +2025-03-29 20:31:11 | [rl2_trainer] epoch #173 | Computing loss after +2025-03-29 20:31:11 | [rl2_trainer] epoch #173 | Saving snapshot... +2025-03-29 20:31:11 | [rl2_trainer] epoch #173 | Saved +2025-03-29 20:31:11 | [rl2_trainer] epoch #173 | Time 3472.78 s +2025-03-29 20:31:11 | [rl2_trainer] epoch #173 | EpochTime 18.95 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -78.2621 +Average/AverageReturn -128.23 +Average/Iteration 173 +Average/MaxReturn -74.3599 +Average/MinReturn -210.036 +Average/NumEpisodes 8 +Average/StdReturn 46.5197 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.937328 +TotalEnvSteps 139200 +__unnamed_task__/AverageDiscountedReturn -78.2621 +__unnamed_task__/AverageReturn -128.23 +__unnamed_task__/Iteration 173 +__unnamed_task__/MaxReturn -74.3599 +__unnamed_task__/MinReturn -210.036 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 46.5197 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.03642 +policy/KL 0.0371112 +policy/KLBefore 0 +policy/LossAfter -0.730992 +policy/LossBefore -0.0579421 +policy/dLoss 0.67305 +---------------------------------------- -------------- +2025-03-29 20:31:27 | [rl2_trainer] epoch #174 | Optimizing policy... +2025-03-29 20:31:27 | [rl2_trainer] epoch #174 | Fitting baseline... +2025-03-29 20:31:27 | [rl2_trainer] epoch #174 | Computing loss before +2025-03-29 20:31:27 | [rl2_trainer] epoch #174 | Computing KL before +2025-03-29 20:31:27 | [rl2_trainer] epoch #174 | Optimizing +2025-03-29 20:31:29 | [rl2_trainer] epoch #174 | Computing KL after +2025-03-29 20:31:29 | [rl2_trainer] epoch #174 | Computing loss after +2025-03-29 20:31:30 | [rl2_trainer] epoch #174 | Saving snapshot... +2025-03-29 20:31:30 | [rl2_trainer] epoch #174 | Saved +2025-03-29 20:31:30 | [rl2_trainer] epoch #174 | Time 3491.01 s +2025-03-29 20:31:30 | [rl2_trainer] epoch #174 | EpochTime 18.23 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -81.4687 +Average/AverageReturn -125.353 +Average/Iteration 174 +Average/MaxReturn -62.6299 +Average/MinReturn -193.693 +Average/NumEpisodes 8 +Average/StdReturn 42.6612 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973266 +TotalEnvSteps 140000 +__unnamed_task__/AverageDiscountedReturn -81.4687 +__unnamed_task__/AverageReturn -125.353 +__unnamed_task__/Iteration 174 +__unnamed_task__/MaxReturn -62.6299 +__unnamed_task__/MinReturn -193.693 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 42.6612 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.03334 +policy/KL 0.0257871 +policy/KLBefore 0 +policy/LossAfter -0.483349 +policy/LossBefore -0.0440722 +policy/dLoss 0.439277 +---------------------------------------- -------------- +2025-03-29 20:31:45 | [rl2_trainer] epoch #175 | Optimizing policy... +2025-03-29 20:31:45 | [rl2_trainer] epoch #175 | Fitting baseline... +2025-03-29 20:31:45 | [rl2_trainer] epoch #175 | Computing loss before +2025-03-29 20:31:45 | [rl2_trainer] epoch #175 | Computing KL before +2025-03-29 20:31:45 | [rl2_trainer] epoch #175 | Optimizing +2025-03-29 20:31:47 | [rl2_trainer] epoch #175 | Computing KL after +2025-03-29 20:31:47 | [rl2_trainer] epoch #175 | Computing loss after +2025-03-29 20:31:48 | [rl2_trainer] epoch #175 | Saving snapshot... +2025-03-29 20:31:48 | [rl2_trainer] epoch #175 | Saved +2025-03-29 20:31:48 | [rl2_trainer] epoch #175 | Time 3509.03 s +2025-03-29 20:31:48 | [rl2_trainer] epoch #175 | EpochTime 18.02 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -75.4986 +Average/AverageReturn -119.453 +Average/Iteration 175 +Average/MaxReturn -88.2941 +Average/MinReturn -184.389 +Average/NumEpisodes 8 +Average/StdReturn 32.8282 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.962724 +TotalEnvSteps 140800 +__unnamed_task__/AverageDiscountedReturn -75.4986 +__unnamed_task__/AverageReturn -119.453 +__unnamed_task__/Iteration 175 +__unnamed_task__/MaxReturn -88.2941 +__unnamed_task__/MinReturn -184.389 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.8282 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.0144 +policy/KL 0.0199252 +policy/KLBefore 0 +policy/LossAfter -0.319637 +policy/LossBefore 0.0523393 +policy/dLoss 0.371976 +---------------------------------------- -------------- +2025-03-29 20:32:03 | [rl2_trainer] epoch #176 | Optimizing policy... +2025-03-29 20:32:03 | [rl2_trainer] epoch #176 | Fitting baseline... +2025-03-29 20:32:03 | [rl2_trainer] epoch #176 | Computing loss before +2025-03-29 20:32:03 | [rl2_trainer] epoch #176 | Computing KL before +2025-03-29 20:32:03 | [rl2_trainer] epoch #176 | Optimizing +2025-03-29 20:32:06 | [rl2_trainer] epoch #176 | Computing KL after +2025-03-29 20:32:06 | [rl2_trainer] epoch #176 | Computing loss after +2025-03-29 20:32:06 | [rl2_trainer] epoch #176 | Saving snapshot... +2025-03-29 20:32:06 | [rl2_trainer] epoch #176 | Saved +2025-03-29 20:32:06 | [rl2_trainer] epoch #176 | Time 3527.25 s +2025-03-29 20:32:06 | [rl2_trainer] epoch #176 | EpochTime 18.21 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -78.1899 +Average/AverageReturn -123.009 +Average/Iteration 176 +Average/MaxReturn -95.5593 +Average/MinReturn -168.136 +Average/NumEpisodes 8 +Average/StdReturn 31.1296 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966308 +TotalEnvSteps 141600 +__unnamed_task__/AverageDiscountedReturn -78.1899 +__unnamed_task__/AverageReturn -123.009 +__unnamed_task__/Iteration 176 +__unnamed_task__/MaxReturn -95.5593 +__unnamed_task__/MinReturn -168.136 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.1296 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.99523 +policy/KL 0.0258291 +policy/KLBefore 0 +policy/LossAfter -0.348894 +policy/LossBefore 0.0187559 +policy/dLoss 0.36765 +---------------------------------------- -------------- +2025-03-29 20:32:21 | [rl2_trainer] epoch #177 | Optimizing policy... +2025-03-29 20:32:21 | [rl2_trainer] epoch #177 | Fitting baseline... +2025-03-29 20:32:21 | [rl2_trainer] epoch #177 | Computing loss before +2025-03-29 20:32:21 | [rl2_trainer] epoch #177 | Computing KL before +2025-03-29 20:32:21 | [rl2_trainer] epoch #177 | Optimizing +2025-03-29 20:32:24 | [rl2_trainer] epoch #177 | Computing KL after +2025-03-29 20:32:24 | [rl2_trainer] epoch #177 | Computing loss after +2025-03-29 20:32:24 | [rl2_trainer] epoch #177 | Saving snapshot... +2025-03-29 20:32:24 | [rl2_trainer] epoch #177 | Saved +2025-03-29 20:32:24 | [rl2_trainer] epoch #177 | Time 3545.17 s +2025-03-29 20:32:24 | [rl2_trainer] epoch #177 | EpochTime 17.92 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -73.3101 +Average/AverageReturn -114.511 +Average/Iteration 177 +Average/MaxReturn -76.969 +Average/MinReturn -171.376 +Average/NumEpisodes 8 +Average/StdReturn 33.79 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975021 +TotalEnvSteps 142400 +__unnamed_task__/AverageDiscountedReturn -73.3101 +__unnamed_task__/AverageReturn -114.511 +__unnamed_task__/Iteration 177 +__unnamed_task__/MaxReturn -76.969 +__unnamed_task__/MinReturn -171.376 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.79 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.99689 +policy/KL 0.0274557 +policy/KLBefore 0 +policy/LossAfter -0.350482 +policy/LossBefore 0.00389814 +policy/dLoss 0.35438 +---------------------------------------- --------------- +2025-03-29 20:32:40 | [rl2_trainer] epoch #178 | Optimizing policy... +2025-03-29 20:32:40 | [rl2_trainer] epoch #178 | Fitting baseline... +2025-03-29 20:32:40 | [rl2_trainer] epoch #178 | Computing loss before +2025-03-29 20:32:40 | [rl2_trainer] epoch #178 | Computing KL before +2025-03-29 20:32:40 | [rl2_trainer] epoch #178 | Optimizing +2025-03-29 20:32:42 | [rl2_trainer] epoch #178 | Computing KL after +2025-03-29 20:32:43 | [rl2_trainer] epoch #178 | Computing loss after +2025-03-29 20:32:43 | [rl2_trainer] epoch #178 | Saving snapshot... +2025-03-29 20:32:43 | [rl2_trainer] epoch #178 | Saved +2025-03-29 20:32:43 | [rl2_trainer] epoch #178 | Time 3564.04 s +2025-03-29 20:32:43 | [rl2_trainer] epoch #178 | EpochTime 18.87 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.7242 +Average/AverageReturn -95.6195 +Average/Iteration 178 +Average/MaxReturn -58.7377 +Average/MinReturn -149.571 +Average/NumEpisodes 8 +Average/StdReturn 23.7163 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979713 +TotalEnvSteps 143200 +__unnamed_task__/AverageDiscountedReturn -63.7242 +__unnamed_task__/AverageReturn -95.6195 +__unnamed_task__/Iteration 178 +__unnamed_task__/MaxReturn -58.7377 +__unnamed_task__/MinReturn -149.571 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.7163 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.99312 +policy/KL 0.018262 +policy/KLBefore 0 +policy/LossAfter -0.131355 +policy/LossBefore 0.00087862 +policy/dLoss 0.132234 +---------------------------------------- --------------- +2025-03-29 20:32:59 | [rl2_trainer] epoch #179 | Optimizing policy... +2025-03-29 20:32:59 | [rl2_trainer] epoch #179 | Fitting baseline... +2025-03-29 20:32:59 | [rl2_trainer] epoch #179 | Computing loss before +2025-03-29 20:32:59 | [rl2_trainer] epoch #179 | Computing KL before +2025-03-29 20:32:59 | [rl2_trainer] epoch #179 | Optimizing +2025-03-29 20:33:01 | [rl2_trainer] epoch #179 | Computing KL after +2025-03-29 20:33:01 | [rl2_trainer] epoch #179 | Computing loss after +2025-03-29 20:33:01 | [rl2_trainer] epoch #179 | Saving snapshot... +2025-03-29 20:33:01 | [rl2_trainer] epoch #179 | Saved +2025-03-29 20:33:01 | [rl2_trainer] epoch #179 | Time 3582.78 s +2025-03-29 20:33:01 | [rl2_trainer] epoch #179 | EpochTime 18.74 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -67.5069 +Average/AverageReturn -104.583 +Average/Iteration 179 +Average/MaxReturn -62.8671 +Average/MinReturn -183.174 +Average/NumEpisodes 8 +Average/StdReturn 39.1367 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.941726 +TotalEnvSteps 144000 +__unnamed_task__/AverageDiscountedReturn -67.5069 +__unnamed_task__/AverageReturn -104.583 +__unnamed_task__/Iteration 179 +__unnamed_task__/MaxReturn -62.8671 +__unnamed_task__/MinReturn -183.174 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 39.1367 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.00101 +policy/KL 0.0200751 +policy/KLBefore 0 +policy/LossAfter -0.466373 +policy/LossBefore -0.0437383 +policy/dLoss 0.422634 +---------------------------------------- -------------- +2025-03-29 20:33:18 | [rl2_trainer] epoch #180 | Optimizing policy... +2025-03-29 20:33:18 | [rl2_trainer] epoch #180 | Fitting baseline... +2025-03-29 20:33:18 | [rl2_trainer] epoch #180 | Computing loss before +2025-03-29 20:33:18 | [rl2_trainer] epoch #180 | Computing KL before +2025-03-29 20:33:18 | [rl2_trainer] epoch #180 | Optimizing +2025-03-29 20:33:20 | [rl2_trainer] epoch #180 | Computing KL after +2025-03-29 20:33:20 | [rl2_trainer] epoch #180 | Computing loss after +2025-03-29 20:33:20 | [rl2_trainer] epoch #180 | Saving snapshot... +2025-03-29 20:33:20 | [rl2_trainer] epoch #180 | Saved +2025-03-29 20:33:20 | [rl2_trainer] epoch #180 | Time 3601.49 s +2025-03-29 20:33:20 | [rl2_trainer] epoch #180 | EpochTime 18.71 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -64.5724 +Average/AverageReturn -98.6772 +Average/Iteration 180 +Average/MaxReturn -70.0915 +Average/MinReturn -157.552 +Average/NumEpisodes 8 +Average/StdReturn 24.6734 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967092 +TotalEnvSteps 144800 +__unnamed_task__/AverageDiscountedReturn -64.5724 +__unnamed_task__/AverageReturn -98.6772 +__unnamed_task__/Iteration 180 +__unnamed_task__/MaxReturn -70.0915 +__unnamed_task__/MinReturn -157.552 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.6734 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.9972 +policy/KL 0.0251634 +policy/KLBefore 0 +policy/LossAfter -0.208254 +policy/LossBefore -0.0181828 +policy/dLoss 0.190071 +---------------------------------------- -------------- +2025-03-29 20:33:37 | [rl2_trainer] epoch #181 | Optimizing policy... +2025-03-29 20:33:37 | [rl2_trainer] epoch #181 | Fitting baseline... +2025-03-29 20:33:37 | [rl2_trainer] epoch #181 | Computing loss before +2025-03-29 20:33:37 | [rl2_trainer] epoch #181 | Computing KL before +2025-03-29 20:33:37 | [rl2_trainer] epoch #181 | Optimizing +2025-03-29 20:33:39 | [rl2_trainer] epoch #181 | Computing KL after +2025-03-29 20:33:39 | [rl2_trainer] epoch #181 | Computing loss after +2025-03-29 20:33:40 | [rl2_trainer] epoch #181 | Saving snapshot... +2025-03-29 20:33:40 | [rl2_trainer] epoch #181 | Saved +2025-03-29 20:33:40 | [rl2_trainer] epoch #181 | Time 3620.98 s +2025-03-29 20:33:40 | [rl2_trainer] epoch #181 | EpochTime 19.48 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -68.4412 +Average/AverageReturn -107.434 +Average/Iteration 181 +Average/MaxReturn -83.2863 +Average/MinReturn -150.082 +Average/NumEpisodes 8 +Average/StdReturn 24.1106 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.963104 +TotalEnvSteps 145600 +__unnamed_task__/AverageDiscountedReturn -68.4412 +__unnamed_task__/AverageReturn -107.434 +__unnamed_task__/Iteration 181 +__unnamed_task__/MaxReturn -83.2863 +__unnamed_task__/MinReturn -150.082 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.1106 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.9854 +policy/KL 0.024908 +policy/KLBefore 0 +policy/LossAfter -0.241778 +policy/LossBefore 0.0100153 +policy/dLoss 0.251794 +---------------------------------------- -------------- +2025-03-29 20:33:56 | [rl2_trainer] epoch #182 | Optimizing policy... +2025-03-29 20:33:56 | [rl2_trainer] epoch #182 | Fitting baseline... +2025-03-29 20:33:56 | [rl2_trainer] epoch #182 | Computing loss before +2025-03-29 20:33:56 | [rl2_trainer] epoch #182 | Computing KL before +2025-03-29 20:33:56 | [rl2_trainer] epoch #182 | Optimizing +2025-03-29 20:33:59 | [rl2_trainer] epoch #182 | Computing KL after +2025-03-29 20:33:59 | [rl2_trainer] epoch #182 | Computing loss after +2025-03-29 20:33:59 | [rl2_trainer] epoch #182 | Saving snapshot... +2025-03-29 20:33:59 | [rl2_trainer] epoch #182 | Saved +2025-03-29 20:33:59 | [rl2_trainer] epoch #182 | Time 3640.12 s +2025-03-29 20:33:59 | [rl2_trainer] epoch #182 | EpochTime 19.14 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -66.5282 +Average/AverageReturn -104.132 +Average/Iteration 182 +Average/MaxReturn -86.6524 +Average/MinReturn -157.226 +Average/NumEpisodes 8 +Average/StdReturn 22.0489 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973286 +TotalEnvSteps 146400 +__unnamed_task__/AverageDiscountedReturn -66.5282 +__unnamed_task__/AverageReturn -104.132 +__unnamed_task__/Iteration 182 +__unnamed_task__/MaxReturn -86.6524 +__unnamed_task__/MinReturn -157.226 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.0489 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.98484 +policy/KL 0.0278216 +policy/KLBefore 0 +policy/LossAfter -0.162564 +policy/LossBefore 0.00175182 +policy/dLoss 0.164316 +---------------------------------------- --------------- +2025-03-29 20:34:14 | [rl2_trainer] epoch #183 | Optimizing policy... +2025-03-29 20:34:14 | [rl2_trainer] epoch #183 | Fitting baseline... +2025-03-29 20:34:14 | [rl2_trainer] epoch #183 | Computing loss before +2025-03-29 20:34:14 | [rl2_trainer] epoch #183 | Computing KL before +2025-03-29 20:34:14 | [rl2_trainer] epoch #183 | Optimizing +2025-03-29 20:34:17 | [rl2_trainer] epoch #183 | Computing KL after +2025-03-29 20:34:17 | [rl2_trainer] epoch #183 | Computing loss after +2025-03-29 20:34:17 | [rl2_trainer] epoch #183 | Saving snapshot... +2025-03-29 20:34:17 | [rl2_trainer] epoch #183 | Saved +2025-03-29 20:34:17 | [rl2_trainer] epoch #183 | Time 3658.18 s +2025-03-29 20:34:17 | [rl2_trainer] epoch #183 | EpochTime 18.06 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -75.1837 +Average/AverageReturn -117.298 +Average/Iteration 183 +Average/MaxReturn -56.7556 +Average/MinReturn -192.849 +Average/NumEpisodes 8 +Average/StdReturn 46.0905 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.950632 +TotalEnvSteps 147200 +__unnamed_task__/AverageDiscountedReturn -75.1837 +__unnamed_task__/AverageReturn -117.298 +__unnamed_task__/Iteration 183 +__unnamed_task__/MaxReturn -56.7556 +__unnamed_task__/MinReturn -192.849 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 46.0905 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.9708 +policy/KL 0.0274653 +policy/KLBefore 0 +policy/LossAfter -0.521762 +policy/LossBefore -0.0404147 +policy/dLoss 0.481347 +---------------------------------------- -------------- +2025-03-29 20:34:34 | [rl2_trainer] epoch #184 | Optimizing policy... +2025-03-29 20:34:34 | [rl2_trainer] epoch #184 | Fitting baseline... +2025-03-29 20:34:34 | [rl2_trainer] epoch #184 | Computing loss before +2025-03-29 20:34:34 | [rl2_trainer] epoch #184 | Computing KL before +2025-03-29 20:34:34 | [rl2_trainer] epoch #184 | Optimizing +2025-03-29 20:34:36 | [rl2_trainer] epoch #184 | Computing KL after +2025-03-29 20:34:36 | [rl2_trainer] epoch #184 | Computing loss after +2025-03-29 20:34:36 | [rl2_trainer] epoch #184 | Saving snapshot... +2025-03-29 20:34:36 | [rl2_trainer] epoch #184 | Saved +2025-03-29 20:34:36 | [rl2_trainer] epoch #184 | Time 3677.49 s +2025-03-29 20:34:36 | [rl2_trainer] epoch #184 | EpochTime 19.30 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.3071 +Average/AverageReturn -98.9186 +Average/Iteration 184 +Average/MaxReturn -42.0418 +Average/MinReturn -182.868 +Average/NumEpisodes 8 +Average/StdReturn 36.174 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967736 +TotalEnvSteps 148000 +__unnamed_task__/AverageDiscountedReturn -63.3071 +__unnamed_task__/AverageReturn -98.9186 +__unnamed_task__/Iteration 184 +__unnamed_task__/MaxReturn -42.0418 +__unnamed_task__/MinReturn -182.868 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.174 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.96912 +policy/KL 0.0279573 +policy/KLBefore 0 +policy/LossAfter -0.237098 +policy/LossBefore 0.00490928 +policy/dLoss 0.242007 +---------------------------------------- --------------- +2025-03-29 20:34:51 | [rl2_trainer] epoch #185 | Optimizing policy... +2025-03-29 20:34:51 | [rl2_trainer] epoch #185 | Fitting baseline... +2025-03-29 20:34:51 | [rl2_trainer] epoch #185 | Computing loss before +2025-03-29 20:34:51 | [rl2_trainer] epoch #185 | Computing KL before +2025-03-29 20:34:51 | [rl2_trainer] epoch #185 | Optimizing +2025-03-29 20:34:54 | [rl2_trainer] epoch #185 | Computing KL after +2025-03-29 20:34:54 | [rl2_trainer] epoch #185 | Computing loss after +2025-03-29 20:34:54 | [rl2_trainer] epoch #185 | Saving snapshot... +2025-03-29 20:34:54 | [rl2_trainer] epoch #185 | Saved +2025-03-29 20:34:54 | [rl2_trainer] epoch #185 | Time 3695.27 s +2025-03-29 20:34:54 | [rl2_trainer] epoch #185 | EpochTime 17.78 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -74.4993 +Average/AverageReturn -115.182 +Average/Iteration 185 +Average/MaxReturn -85.1948 +Average/MinReturn -157.244 +Average/NumEpisodes 8 +Average/StdReturn 23.8916 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975885 +TotalEnvSteps 148800 +__unnamed_task__/AverageDiscountedReturn -74.4993 +__unnamed_task__/AverageReturn -115.182 +__unnamed_task__/Iteration 185 +__unnamed_task__/MaxReturn -85.1948 +__unnamed_task__/MinReturn -157.244 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.8916 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.96574 +policy/KL 0.0301192 +policy/KLBefore 0 +policy/LossAfter -0.255767 +policy/LossBefore 0.00402359 +policy/dLoss 0.259791 +---------------------------------------- --------------- +2025-03-29 20:35:11 | [rl2_trainer] epoch #186 | Optimizing policy... +2025-03-29 20:35:11 | [rl2_trainer] epoch #186 | Fitting baseline... +2025-03-29 20:35:11 | [rl2_trainer] epoch #186 | Computing loss before +2025-03-29 20:35:11 | [rl2_trainer] epoch #186 | Computing KL before +2025-03-29 20:35:11 | [rl2_trainer] epoch #186 | Optimizing +2025-03-29 20:35:13 | [rl2_trainer] epoch #186 | Computing KL after +2025-03-29 20:35:13 | [rl2_trainer] epoch #186 | Computing loss after +2025-03-29 20:35:13 | [rl2_trainer] epoch #186 | Saving snapshot... +2025-03-29 20:35:13 | [rl2_trainer] epoch #186 | Saved +2025-03-29 20:35:13 | [rl2_trainer] epoch #186 | Time 3714.66 s +2025-03-29 20:35:13 | [rl2_trainer] epoch #186 | EpochTime 19.39 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.345 +Average/AverageReturn -96.6801 +Average/Iteration 186 +Average/MaxReturn -65.9591 +Average/MinReturn -152.603 +Average/NumEpisodes 8 +Average/StdReturn 25.8832 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968001 +TotalEnvSteps 149600 +__unnamed_task__/AverageDiscountedReturn -63.345 +__unnamed_task__/AverageReturn -96.6801 +__unnamed_task__/Iteration 186 +__unnamed_task__/MaxReturn -65.9591 +__unnamed_task__/MinReturn -152.603 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.8832 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.95191 +policy/KL 0.0126861 +policy/KLBefore 0 +policy/LossAfter -0.173208 +policy/LossBefore -0.00275222 +policy/dLoss 0.170455 +---------------------------------------- --------------- +2025-03-29 20:35:31 | [rl2_trainer] epoch #187 | Optimizing policy... +2025-03-29 20:35:31 | [rl2_trainer] epoch #187 | Fitting baseline... +2025-03-29 20:35:31 | [rl2_trainer] epoch #187 | Computing loss before +2025-03-29 20:35:31 | [rl2_trainer] epoch #187 | Computing KL before +2025-03-29 20:35:31 | [rl2_trainer] epoch #187 | Optimizing +2025-03-29 20:35:33 | [rl2_trainer] epoch #187 | Computing KL after +2025-03-29 20:35:33 | [rl2_trainer] epoch #187 | Computing loss after +2025-03-29 20:35:33 | [rl2_trainer] epoch #187 | Saving snapshot... +2025-03-29 20:35:33 | [rl2_trainer] epoch #187 | Saved +2025-03-29 20:35:33 | [rl2_trainer] epoch #187 | Time 3734.58 s +2025-03-29 20:35:33 | [rl2_trainer] epoch #187 | EpochTime 19.92 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -51.1508 +Average/AverageReturn -74.5031 +Average/Iteration 187 +Average/MaxReturn -50.3125 +Average/MinReturn -97.7624 +Average/NumEpisodes 8 +Average/StdReturn 16.1032 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.985484 +TotalEnvSteps 150400 +__unnamed_task__/AverageDiscountedReturn -51.1508 +__unnamed_task__/AverageReturn -74.5031 +__unnamed_task__/Iteration 187 +__unnamed_task__/MaxReturn -50.3125 +__unnamed_task__/MinReturn -97.7624 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.1032 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.94501 +policy/KL 0.0151537 +policy/KLBefore 0 +policy/LossAfter -0.0534474 +policy/LossBefore 0.00478467 +policy/dLoss 0.0582321 +---------------------------------------- --------------- +2025-03-29 20:35:49 | [rl2_trainer] epoch #188 | Optimizing policy... +2025-03-29 20:35:49 | [rl2_trainer] epoch #188 | Fitting baseline... +2025-03-29 20:35:49 | [rl2_trainer] epoch #188 | Computing loss before +2025-03-29 20:35:49 | [rl2_trainer] epoch #188 | Computing KL before +2025-03-29 20:35:49 | [rl2_trainer] epoch #188 | Optimizing +2025-03-29 20:35:51 | [rl2_trainer] epoch #188 | Computing KL after +2025-03-29 20:35:51 | [rl2_trainer] epoch #188 | Computing loss after +2025-03-29 20:35:51 | [rl2_trainer] epoch #188 | Saving snapshot... +2025-03-29 20:35:51 | [rl2_trainer] epoch #188 | Saved +2025-03-29 20:35:51 | [rl2_trainer] epoch #188 | Time 3752.49 s +2025-03-29 20:35:51 | [rl2_trainer] epoch #188 | EpochTime 17.91 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -75.9441 +Average/AverageReturn -121.11 +Average/Iteration 188 +Average/MaxReturn -79.1902 +Average/MinReturn -180.482 +Average/NumEpisodes 8 +Average/StdReturn 43.8678 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.951692 +TotalEnvSteps 151200 +__unnamed_task__/AverageDiscountedReturn -75.9441 +__unnamed_task__/AverageReturn -121.11 +__unnamed_task__/Iteration 188 +__unnamed_task__/MaxReturn -79.1902 +__unnamed_task__/MinReturn -180.482 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 43.8678 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.92817 +policy/KL 0.0222161 +policy/KLBefore 0 +policy/LossAfter -0.525618 +policy/LossBefore 0.00136852 +policy/dLoss 0.526986 +---------------------------------------- --------------- +2025-03-29 20:36:06 | [rl2_trainer] epoch #189 | Optimizing policy... +2025-03-29 20:36:06 | [rl2_trainer] epoch #189 | Fitting baseline... +2025-03-29 20:36:06 | [rl2_trainer] epoch #189 | Computing loss before +2025-03-29 20:36:07 | [rl2_trainer] epoch #189 | Computing KL before +2025-03-29 20:36:07 | [rl2_trainer] epoch #189 | Optimizing +2025-03-29 20:36:09 | [rl2_trainer] epoch #189 | Computing KL after +2025-03-29 20:36:09 | [rl2_trainer] epoch #189 | Computing loss after +2025-03-29 20:36:09 | [rl2_trainer] epoch #189 | Saving snapshot... +2025-03-29 20:36:09 | [rl2_trainer] epoch #189 | Saved +2025-03-29 20:36:09 | [rl2_trainer] epoch #189 | Time 3770.40 s +2025-03-29 20:36:09 | [rl2_trainer] epoch #189 | EpochTime 17.91 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -72.9686 +Average/AverageReturn -116.224 +Average/Iteration 189 +Average/MaxReturn -87.2541 +Average/MinReturn -167.986 +Average/NumEpisodes 8 +Average/StdReturn 34.7063 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982859 +TotalEnvSteps 152000 +__unnamed_task__/AverageDiscountedReturn -72.9686 +__unnamed_task__/AverageReturn -116.224 +__unnamed_task__/Iteration 189 +__unnamed_task__/MaxReturn -87.2541 +__unnamed_task__/MinReturn -167.986 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.7063 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.90167 +policy/KL 0.025415 +policy/KLBefore 0 +policy/LossAfter -0.220358 +policy/LossBefore 0.021263 +policy/dLoss 0.241621 +---------------------------------------- ------------- +2025-03-29 20:36:25 | [rl2_trainer] epoch #190 | Optimizing policy... +2025-03-29 20:36:25 | [rl2_trainer] epoch #190 | Fitting baseline... +2025-03-29 20:36:25 | [rl2_trainer] epoch #190 | Computing loss before +2025-03-29 20:36:25 | [rl2_trainer] epoch #190 | Computing KL before +2025-03-29 20:36:25 | [rl2_trainer] epoch #190 | Optimizing +2025-03-29 20:36:27 | [rl2_trainer] epoch #190 | Computing KL after +2025-03-29 20:36:27 | [rl2_trainer] epoch #190 | Computing loss after +2025-03-29 20:36:27 | [rl2_trainer] epoch #190 | Saving snapshot... +2025-03-29 20:36:27 | [rl2_trainer] epoch #190 | Saved +2025-03-29 20:36:27 | [rl2_trainer] epoch #190 | Time 3788.83 s +2025-03-29 20:36:27 | [rl2_trainer] epoch #190 | EpochTime 18.43 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -70.231 +Average/AverageReturn -107.667 +Average/Iteration 190 +Average/MaxReturn -49.8223 +Average/MinReturn -198.792 +Average/NumEpisodes 8 +Average/StdReturn 40.5191 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.955939 +TotalEnvSteps 152800 +__unnamed_task__/AverageDiscountedReturn -70.231 +__unnamed_task__/AverageReturn -107.667 +__unnamed_task__/Iteration 190 +__unnamed_task__/MaxReturn -49.8223 +__unnamed_task__/MinReturn -198.792 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 40.5191 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.89099 +policy/KL 0.0283761 +policy/KLBefore 0 +policy/LossAfter -0.358972 +policy/LossBefore 0.0394121 +policy/dLoss 0.398384 +---------------------------------------- -------------- +2025-03-29 20:36:44 | [rl2_trainer] epoch #191 | Optimizing policy... +2025-03-29 20:36:44 | [rl2_trainer] epoch #191 | Fitting baseline... +2025-03-29 20:36:44 | [rl2_trainer] epoch #191 | Computing loss before +2025-03-29 20:36:44 | [rl2_trainer] epoch #191 | Computing KL before +2025-03-29 20:36:44 | [rl2_trainer] epoch #191 | Optimizing +2025-03-29 20:36:46 | [rl2_trainer] epoch #191 | Computing KL after +2025-03-29 20:36:46 | [rl2_trainer] epoch #191 | Computing loss after +2025-03-29 20:36:46 | [rl2_trainer] epoch #191 | Saving snapshot... +2025-03-29 20:36:46 | [rl2_trainer] epoch #191 | Saved +2025-03-29 20:36:46 | [rl2_trainer] epoch #191 | Time 3807.68 s +2025-03-29 20:36:46 | [rl2_trainer] epoch #191 | EpochTime 18.84 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -60.6952 +Average/AverageReturn -94.6368 +Average/Iteration 191 +Average/MaxReturn -52.0442 +Average/MinReturn -155.387 +Average/NumEpisodes 8 +Average/StdReturn 30.4798 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966784 +TotalEnvSteps 153600 +__unnamed_task__/AverageDiscountedReturn -60.6952 +__unnamed_task__/AverageReturn -94.6368 +__unnamed_task__/Iteration 191 +__unnamed_task__/MaxReturn -52.0442 +__unnamed_task__/MinReturn -155.387 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.4798 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.88062 +policy/KL 0.017991 +policy/KLBefore 0 +policy/LossAfter -0.158086 +policy/LossBefore -0.00177928 +policy/dLoss 0.156307 +---------------------------------------- --------------- +2025-03-29 20:37:02 | [rl2_trainer] epoch #192 | Optimizing policy... +2025-03-29 20:37:02 | [rl2_trainer] epoch #192 | Fitting baseline... +2025-03-29 20:37:02 | [rl2_trainer] epoch #192 | Computing loss before +2025-03-29 20:37:02 | [rl2_trainer] epoch #192 | Computing KL before +2025-03-29 20:37:02 | [rl2_trainer] epoch #192 | Optimizing +2025-03-29 20:37:04 | [rl2_trainer] epoch #192 | Computing KL after +2025-03-29 20:37:04 | [rl2_trainer] epoch #192 | Computing loss after +2025-03-29 20:37:04 | [rl2_trainer] epoch #192 | Saving snapshot... +2025-03-29 20:37:04 | [rl2_trainer] epoch #192 | Saved +2025-03-29 20:37:04 | [rl2_trainer] epoch #192 | Time 3825.86 s +2025-03-29 20:37:04 | [rl2_trainer] epoch #192 | EpochTime 18.18 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -67.3077 +Average/AverageReturn -101.405 +Average/Iteration 192 +Average/MaxReturn -71.4205 +Average/MinReturn -165.477 +Average/NumEpisodes 8 +Average/StdReturn 31.2925 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977516 +TotalEnvSteps 154400 +__unnamed_task__/AverageDiscountedReturn -67.3077 +__unnamed_task__/AverageReturn -101.405 +__unnamed_task__/Iteration 192 +__unnamed_task__/MaxReturn -71.4205 +__unnamed_task__/MinReturn -165.477 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.2925 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.87142 +policy/KL 0.0193917 +policy/KLBefore 0 +policy/LossAfter -0.260964 +policy/LossBefore 0.00684573 +policy/dLoss 0.26781 +---------------------------------------- --------------- +2025-03-29 20:37:21 | [rl2_trainer] epoch #193 | Optimizing policy... +2025-03-29 20:37:21 | [rl2_trainer] epoch #193 | Fitting baseline... +2025-03-29 20:37:21 | [rl2_trainer] epoch #193 | Computing loss before +2025-03-29 20:37:22 | [rl2_trainer] epoch #193 | Computing KL before +2025-03-29 20:37:22 | [rl2_trainer] epoch #193 | Optimizing +2025-03-29 20:37:24 | [rl2_trainer] epoch #193 | Computing KL after +2025-03-29 20:37:24 | [rl2_trainer] epoch #193 | Computing loss after +2025-03-29 20:37:24 | [rl2_trainer] epoch #193 | Saving snapshot... +2025-03-29 20:37:24 | [rl2_trainer] epoch #193 | Saved +2025-03-29 20:37:24 | [rl2_trainer] epoch #193 | Time 3845.32 s +2025-03-29 20:37:24 | [rl2_trainer] epoch #193 | EpochTime 19.46 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -51.8617 +Average/AverageReturn -83.9131 +Average/Iteration 193 +Average/MaxReturn -48.3692 +Average/MinReturn -150.408 +Average/NumEpisodes 8 +Average/StdReturn 28.014 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973375 +TotalEnvSteps 155200 +__unnamed_task__/AverageDiscountedReturn -51.8617 +__unnamed_task__/AverageReturn -83.9131 +__unnamed_task__/Iteration 193 +__unnamed_task__/MaxReturn -48.3692 +__unnamed_task__/MinReturn -150.408 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.014 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.86568 +policy/KL 0.0135727 +policy/KLBefore 0 +policy/LossAfter -0.154007 +policy/LossBefore -0.011305 +policy/dLoss 0.142702 +---------------------------------------- -------------- +2025-03-29 20:37:40 | [rl2_trainer] epoch #194 | Optimizing policy... +2025-03-29 20:37:40 | [rl2_trainer] epoch #194 | Fitting baseline... +2025-03-29 20:37:40 | [rl2_trainer] epoch #194 | Computing loss before +2025-03-29 20:37:40 | [rl2_trainer] epoch #194 | Computing KL before +2025-03-29 20:37:40 | [rl2_trainer] epoch #194 | Optimizing +2025-03-29 20:37:43 | [rl2_trainer] epoch #194 | Computing KL after +2025-03-29 20:37:43 | [rl2_trainer] epoch #194 | Computing loss after +2025-03-29 20:37:43 | [rl2_trainer] epoch #194 | Saving snapshot... +2025-03-29 20:37:43 | [rl2_trainer] epoch #194 | Saved +2025-03-29 20:37:43 | [rl2_trainer] epoch #194 | Time 3864.23 s +2025-03-29 20:37:43 | [rl2_trainer] epoch #194 | EpochTime 18.90 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -59.4179 +Average/AverageReturn -95.792 +Average/Iteration 194 +Average/MaxReturn -52.8304 +Average/MinReturn -164.784 +Average/NumEpisodes 8 +Average/StdReturn 34.4738 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.961377 +TotalEnvSteps 156000 +__unnamed_task__/AverageDiscountedReturn -59.4179 +__unnamed_task__/AverageReturn -95.792 +__unnamed_task__/Iteration 194 +__unnamed_task__/MaxReturn -52.8304 +__unnamed_task__/MinReturn -164.784 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.4738 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.86491 +policy/KL 0.0183591 +policy/KLBefore 0 +policy/LossAfter -0.215132 +policy/LossBefore 0.0120651 +policy/dLoss 0.227197 +---------------------------------------- -------------- +2025-03-29 20:37:59 | [rl2_trainer] epoch #195 | Optimizing policy... +2025-03-29 20:37:59 | [rl2_trainer] epoch #195 | Fitting baseline... +2025-03-29 20:37:59 | [rl2_trainer] epoch #195 | Computing loss before +2025-03-29 20:37:59 | [rl2_trainer] epoch #195 | Computing KL before +2025-03-29 20:37:59 | [rl2_trainer] epoch #195 | Optimizing +2025-03-29 20:38:01 | [rl2_trainer] epoch #195 | Computing KL after +2025-03-29 20:38:01 | [rl2_trainer] epoch #195 | Computing loss after +2025-03-29 20:38:01 | [rl2_trainer] epoch #195 | Saving snapshot... +2025-03-29 20:38:01 | [rl2_trainer] epoch #195 | Saved +2025-03-29 20:38:01 | [rl2_trainer] epoch #195 | Time 3882.79 s +2025-03-29 20:38:01 | [rl2_trainer] epoch #195 | EpochTime 18.56 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -59.7746 +Average/AverageReturn -91.5211 +Average/Iteration 195 +Average/MaxReturn -63.8894 +Average/MinReturn -161.256 +Average/NumEpisodes 8 +Average/StdReturn 28.9174 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969093 +TotalEnvSteps 156800 +__unnamed_task__/AverageDiscountedReturn -59.7746 +__unnamed_task__/AverageReturn -91.5211 +__unnamed_task__/Iteration 195 +__unnamed_task__/MaxReturn -63.8894 +__unnamed_task__/MinReturn -161.256 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.9174 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.86342 +policy/KL 0.0165499 +policy/KLBefore 0 +policy/LossAfter -0.202507 +policy/LossBefore -0.00724542 +policy/dLoss 0.195262 +---------------------------------------- --------------- +2025-03-29 20:38:18 | [rl2_trainer] epoch #196 | Optimizing policy... +2025-03-29 20:38:18 | [rl2_trainer] epoch #196 | Fitting baseline... +2025-03-29 20:38:18 | [rl2_trainer] epoch #196 | Computing loss before +2025-03-29 20:38:18 | [rl2_trainer] epoch #196 | Computing KL before +2025-03-29 20:38:18 | [rl2_trainer] epoch #196 | Optimizing +2025-03-29 20:38:20 | [rl2_trainer] epoch #196 | Computing KL after +2025-03-29 20:38:20 | [rl2_trainer] epoch #196 | Computing loss after +2025-03-29 20:38:20 | [rl2_trainer] epoch #196 | Saving snapshot... +2025-03-29 20:38:20 | [rl2_trainer] epoch #196 | Saved +2025-03-29 20:38:20 | [rl2_trainer] epoch #196 | Time 3901.73 s +2025-03-29 20:38:20 | [rl2_trainer] epoch #196 | EpochTime 18.94 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.4131 +Average/AverageReturn -102.195 +Average/Iteration 196 +Average/MaxReturn -68.3172 +Average/MinReturn -177.756 +Average/NumEpisodes 8 +Average/StdReturn 35.0666 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969319 +TotalEnvSteps 157600 +__unnamed_task__/AverageDiscountedReturn -63.4131 +__unnamed_task__/AverageReturn -102.195 +__unnamed_task__/Iteration 196 +__unnamed_task__/MaxReturn -68.3172 +__unnamed_task__/MinReturn -177.756 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 35.0666 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.85382 +policy/KL 0.0206771 +policy/KLBefore 0 +policy/LossAfter -0.154277 +policy/LossBefore -0.00143147 +policy/dLoss 0.152845 +---------------------------------------- --------------- +2025-03-29 20:38:37 | [rl2_trainer] epoch #197 | Optimizing policy... +2025-03-29 20:38:37 | [rl2_trainer] epoch #197 | Fitting baseline... +2025-03-29 20:38:37 | [rl2_trainer] epoch #197 | Computing loss before +2025-03-29 20:38:37 | [rl2_trainer] epoch #197 | Computing KL before +2025-03-29 20:38:37 | [rl2_trainer] epoch #197 | Optimizing +2025-03-29 20:38:39 | [rl2_trainer] epoch #197 | Computing KL after +2025-03-29 20:38:39 | [rl2_trainer] epoch #197 | Computing loss after +2025-03-29 20:38:39 | [rl2_trainer] epoch #197 | Saving snapshot... +2025-03-29 20:38:39 | [rl2_trainer] epoch #197 | Saved +2025-03-29 20:38:39 | [rl2_trainer] epoch #197 | Time 3920.71 s +2025-03-29 20:38:39 | [rl2_trainer] epoch #197 | EpochTime 18.97 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -67.3284 +Average/AverageReturn -99.0031 +Average/Iteration 197 +Average/MaxReturn -59.9678 +Average/MinReturn -155.565 +Average/NumEpisodes 8 +Average/StdReturn 26.4307 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.969915 +TotalEnvSteps 158400 +__unnamed_task__/AverageDiscountedReturn -67.3284 +__unnamed_task__/AverageReturn -99.0031 +__unnamed_task__/Iteration 197 +__unnamed_task__/MaxReturn -59.9678 +__unnamed_task__/MinReturn -155.565 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.4307 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.83869 +policy/KL 0.0257327 +policy/KLBefore 0 +policy/LossAfter -0.243715 +policy/LossBefore -0.00246308 +policy/dLoss 0.241252 +---------------------------------------- --------------- +2025-03-29 20:38:55 | [rl2_trainer] epoch #198 | Optimizing policy... +2025-03-29 20:38:55 | [rl2_trainer] epoch #198 | Fitting baseline... +2025-03-29 20:38:55 | [rl2_trainer] epoch #198 | Computing loss before +2025-03-29 20:38:55 | [rl2_trainer] epoch #198 | Computing KL before +2025-03-29 20:38:55 | [rl2_trainer] epoch #198 | Optimizing +2025-03-29 20:38:57 | [rl2_trainer] epoch #198 | Computing KL after +2025-03-29 20:38:57 | [rl2_trainer] epoch #198 | Computing loss after +2025-03-29 20:38:57 | [rl2_trainer] epoch #198 | Saving snapshot... +2025-03-29 20:38:57 | [rl2_trainer] epoch #198 | Saved +2025-03-29 20:38:57 | [rl2_trainer] epoch #198 | Time 3938.39 s +2025-03-29 20:38:57 | [rl2_trainer] epoch #198 | EpochTime 17.68 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -77.3896 +Average/AverageReturn -122.298 +Average/Iteration 198 +Average/MaxReturn -93.7889 +Average/MinReturn -178.371 +Average/NumEpisodes 8 +Average/StdReturn 32.2784 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.953218 +TotalEnvSteps 159200 +__unnamed_task__/AverageDiscountedReturn -77.3896 +__unnamed_task__/AverageReturn -122.298 +__unnamed_task__/Iteration 198 +__unnamed_task__/MaxReturn -93.7889 +__unnamed_task__/MinReturn -178.371 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.2784 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.82608 +policy/KL 0.0246226 +policy/KLBefore 0 +policy/LossAfter -0.468283 +policy/LossBefore 0.0420211 +policy/dLoss 0.510304 +---------------------------------------- -------------- +2025-03-29 20:39:14 | [rl2_trainer] epoch #199 | Optimizing policy... +2025-03-29 20:39:14 | [rl2_trainer] epoch #199 | Fitting baseline... +2025-03-29 20:39:14 | [rl2_trainer] epoch #199 | Computing loss before +2025-03-29 20:39:14 | [rl2_trainer] epoch #199 | Computing KL before +2025-03-29 20:39:14 | [rl2_trainer] epoch #199 | Optimizing +2025-03-29 20:39:16 | [rl2_trainer] epoch #199 | Computing KL after +2025-03-29 20:39:16 | [rl2_trainer] epoch #199 | Computing loss after +2025-03-29 20:39:16 | [rl2_trainer] epoch #199 | Saving snapshot... +2025-03-29 20:39:16 | [rl2_trainer] epoch #199 | Saved +2025-03-29 20:39:16 | [rl2_trainer] epoch #199 | Time 3957.92 s +2025-03-29 20:39:16 | [rl2_trainer] epoch #199 | EpochTime 19.53 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -51.9304 +Average/AverageReturn -78.5306 +Average/Iteration 199 +Average/MaxReturn -58.7158 +Average/MinReturn -96.2236 +Average/NumEpisodes 8 +Average/StdReturn 12.3499 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.987021 +TotalEnvSteps 160000 +__unnamed_task__/AverageDiscountedReturn -51.9304 +__unnamed_task__/AverageReturn -78.5306 +__unnamed_task__/Iteration 199 +__unnamed_task__/MaxReturn -58.7158 +__unnamed_task__/MinReturn -96.2236 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 12.3499 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.82081 +policy/KL 0.0200096 +policy/KLBefore 0 +policy/LossAfter -0.0816101 +policy/LossBefore 0.00891152 +policy/dLoss 0.0905216 +---------------------------------------- --------------- +2025-03-29 20:39:31 | [rl2_trainer] epoch #200 | Optimizing policy... +2025-03-29 20:39:31 | [rl2_trainer] epoch #200 | Fitting baseline... +2025-03-29 20:39:31 | [rl2_trainer] epoch #200 | Computing loss before +2025-03-29 20:39:31 | [rl2_trainer] epoch #200 | Computing KL before +2025-03-29 20:39:31 | [rl2_trainer] epoch #200 | Optimizing +2025-03-29 20:39:34 | [rl2_trainer] epoch #200 | Computing KL after +2025-03-29 20:39:34 | [rl2_trainer] epoch #200 | Computing loss after +2025-03-29 20:39:34 | [rl2_trainer] epoch #200 | Saving snapshot... +2025-03-29 20:39:34 | [rl2_trainer] epoch #200 | Saved +2025-03-29 20:39:34 | [rl2_trainer] epoch #200 | Time 3975.27 s +2025-03-29 20:39:34 | [rl2_trainer] epoch #200 | EpochTime 17.35 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -74.3452 +Average/AverageReturn -109.99 +Average/Iteration 200 +Average/MaxReturn -53.1589 +Average/MinReturn -154.557 +Average/NumEpisodes 8 +Average/StdReturn 30.4618 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.960992 +TotalEnvSteps 160800 +__unnamed_task__/AverageDiscountedReturn -74.3452 +__unnamed_task__/AverageReturn -109.99 +__unnamed_task__/Iteration 200 +__unnamed_task__/MaxReturn -53.1589 +__unnamed_task__/MinReturn -154.557 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.4618 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.83237 +policy/KL 0.0186331 +policy/KLBefore 0 +policy/LossAfter -0.343284 +policy/LossBefore -0.00490887 +policy/dLoss 0.338375 +---------------------------------------- --------------- +2025-03-29 20:39:50 | [rl2_trainer] epoch #201 | Optimizing policy... +2025-03-29 20:39:50 | [rl2_trainer] epoch #201 | Fitting baseline... +2025-03-29 20:39:50 | [rl2_trainer] epoch #201 | Computing loss before +2025-03-29 20:39:50 | [rl2_trainer] epoch #201 | Computing KL before +2025-03-29 20:39:50 | [rl2_trainer] epoch #201 | Optimizing +2025-03-29 20:39:52 | [rl2_trainer] epoch #201 | Computing KL after +2025-03-29 20:39:52 | [rl2_trainer] epoch #201 | Computing loss after +2025-03-29 20:39:52 | [rl2_trainer] epoch #201 | Saving snapshot... +2025-03-29 20:39:52 | [rl2_trainer] epoch #201 | Saved +2025-03-29 20:39:52 | [rl2_trainer] epoch #201 | Time 3993.60 s +2025-03-29 20:39:52 | [rl2_trainer] epoch #201 | EpochTime 18.33 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -70.8898 +Average/AverageReturn -110.976 +Average/Iteration 201 +Average/MaxReturn -66.8551 +Average/MinReturn -169.35 +Average/NumEpisodes 8 +Average/StdReturn 30.7241 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.962288 +TotalEnvSteps 161600 +__unnamed_task__/AverageDiscountedReturn -70.8898 +__unnamed_task__/AverageReturn -110.976 +__unnamed_task__/Iteration 201 +__unnamed_task__/MaxReturn -66.8551 +__unnamed_task__/MinReturn -169.35 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.7241 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.82516 +policy/KL 0.0319565 +policy/KLBefore 0 +policy/LossAfter -0.364175 +policy/LossBefore -0.030476 +policy/dLoss 0.333699 +---------------------------------------- -------------- +2025-03-29 20:40:09 | [rl2_trainer] epoch #202 | Optimizing policy... +2025-03-29 20:40:09 | [rl2_trainer] epoch #202 | Fitting baseline... +2025-03-29 20:40:09 | [rl2_trainer] epoch #202 | Computing loss before +2025-03-29 20:40:09 | [rl2_trainer] epoch #202 | Computing KL before +2025-03-29 20:40:09 | [rl2_trainer] epoch #202 | Optimizing +2025-03-29 20:40:11 | [rl2_trainer] epoch #202 | Computing KL after +2025-03-29 20:40:11 | [rl2_trainer] epoch #202 | Computing loss after +2025-03-29 20:40:11 | [rl2_trainer] epoch #202 | Saving snapshot... +2025-03-29 20:40:11 | [rl2_trainer] epoch #202 | Saved +2025-03-29 20:40:11 | [rl2_trainer] epoch #202 | Time 4012.95 s +2025-03-29 20:40:11 | [rl2_trainer] epoch #202 | EpochTime 19.35 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -57.5782 +Average/AverageReturn -88.5791 +Average/Iteration 202 +Average/MaxReturn -58.5493 +Average/MinReturn -124.884 +Average/NumEpisodes 8 +Average/StdReturn 17.7855 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.970016 +TotalEnvSteps 162400 +__unnamed_task__/AverageDiscountedReturn -57.5782 +__unnamed_task__/AverageReturn -88.5791 +__unnamed_task__/Iteration 202 +__unnamed_task__/MaxReturn -58.5493 +__unnamed_task__/MinReturn -124.884 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.7855 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.81336 +policy/KL 0.0125197 +policy/KLBefore 0 +policy/LossAfter -0.129356 +policy/LossBefore -0.0131174 +policy/dLoss 0.116238 +---------------------------------------- -------------- +2025-03-29 20:40:28 | [rl2_trainer] epoch #203 | Optimizing policy... +2025-03-29 20:40:28 | [rl2_trainer] epoch #203 | Fitting baseline... +2025-03-29 20:40:28 | [rl2_trainer] epoch #203 | Computing loss before +2025-03-29 20:40:29 | [rl2_trainer] epoch #203 | Computing KL before +2025-03-29 20:40:29 | [rl2_trainer] epoch #203 | Optimizing +2025-03-29 20:40:31 | [rl2_trainer] epoch #203 | Computing KL after +2025-03-29 20:40:31 | [rl2_trainer] epoch #203 | Computing loss after +2025-03-29 20:40:32 | [rl2_trainer] epoch #203 | Saving snapshot... +2025-03-29 20:40:32 | [rl2_trainer] epoch #203 | Saved +2025-03-29 20:40:32 | [rl2_trainer] epoch #203 | Time 4032.98 s +2025-03-29 20:40:32 | [rl2_trainer] epoch #203 | EpochTime 20.03 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -66.3263 +Average/AverageReturn -103.11 +Average/Iteration 203 +Average/MaxReturn -57.3749 +Average/MinReturn -174.34 +Average/NumEpisodes 8 +Average/StdReturn 37.5012 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.958631 +TotalEnvSteps 163200 +__unnamed_task__/AverageDiscountedReturn -66.3263 +__unnamed_task__/AverageReturn -103.11 +__unnamed_task__/Iteration 203 +__unnamed_task__/MaxReturn -57.3749 +__unnamed_task__/MinReturn -174.34 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.5012 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.7991 +policy/KL 0.0267488 +policy/KLBefore 0 +policy/LossAfter -0.417028 +policy/LossBefore -0.0131578 +policy/dLoss 0.40387 +---------------------------------------- -------------- +2025-03-29 20:40:49 | [rl2_trainer] epoch #204 | Optimizing policy... +2025-03-29 20:40:49 | [rl2_trainer] epoch #204 | Fitting baseline... +2025-03-29 20:40:49 | [rl2_trainer] epoch #204 | Computing loss before +2025-03-29 20:40:49 | [rl2_trainer] epoch #204 | Computing KL before +2025-03-29 20:40:49 | [rl2_trainer] epoch #204 | Optimizing +2025-03-29 20:40:51 | [rl2_trainer] epoch #204 | Computing KL after +2025-03-29 20:40:51 | [rl2_trainer] epoch #204 | Computing loss after +2025-03-29 20:40:51 | [rl2_trainer] epoch #204 | Saving snapshot... +2025-03-29 20:40:51 | [rl2_trainer] epoch #204 | Saved +2025-03-29 20:40:51 | [rl2_trainer] epoch #204 | Time 4052.74 s +2025-03-29 20:40:51 | [rl2_trainer] epoch #204 | EpochTime 19.76 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -73.9229 +Average/AverageReturn -114.21 +Average/Iteration 204 +Average/MaxReturn -85.9812 +Average/MinReturn -165.602 +Average/NumEpisodes 8 +Average/StdReturn 25.5142 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974049 +TotalEnvSteps 164000 +__unnamed_task__/AverageDiscountedReturn -73.9229 +__unnamed_task__/AverageReturn -114.21 +__unnamed_task__/Iteration 204 +__unnamed_task__/MaxReturn -85.9812 +__unnamed_task__/MinReturn -165.602 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.5142 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.80329 +policy/KL 0.0146844 +policy/KLBefore 0 +policy/LossAfter -0.263121 +policy/LossBefore -0.017957 +policy/dLoss 0.245164 +---------------------------------------- -------------- +2025-03-29 20:41:08 | [rl2_trainer] epoch #205 | Optimizing policy... +2025-03-29 20:41:08 | [rl2_trainer] epoch #205 | Fitting baseline... +2025-03-29 20:41:08 | [rl2_trainer] epoch #205 | Computing loss before +2025-03-29 20:41:08 | [rl2_trainer] epoch #205 | Computing KL before +2025-03-29 20:41:08 | [rl2_trainer] epoch #205 | Optimizing +2025-03-29 20:41:11 | [rl2_trainer] epoch #205 | Computing KL after +2025-03-29 20:41:11 | [rl2_trainer] epoch #205 | Computing loss after +2025-03-29 20:41:11 | [rl2_trainer] epoch #205 | Saving snapshot... +2025-03-29 20:41:11 | [rl2_trainer] epoch #205 | Saved +2025-03-29 20:41:11 | [rl2_trainer] epoch #205 | Time 4072.15 s +2025-03-29 20:41:11 | [rl2_trainer] epoch #205 | EpochTime 19.41 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.9691 +Average/AverageReturn -94.1523 +Average/Iteration 205 +Average/MaxReturn -56.0682 +Average/MinReturn -168.107 +Average/NumEpisodes 8 +Average/StdReturn 33.7716 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.973829 +TotalEnvSteps 164800 +__unnamed_task__/AverageDiscountedReturn -63.9691 +__unnamed_task__/AverageReturn -94.1523 +__unnamed_task__/Iteration 205 +__unnamed_task__/MaxReturn -56.0682 +__unnamed_task__/MinReturn -168.107 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.7716 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.81859 +policy/KL 0.0339184 +policy/KLBefore 0 +policy/LossAfter -0.230913 +policy/LossBefore -0.00458822 +policy/dLoss 0.226324 +---------------------------------------- --------------- +2025-03-29 20:41:27 | [rl2_trainer] epoch #206 | Optimizing policy... +2025-03-29 20:41:27 | [rl2_trainer] epoch #206 | Fitting baseline... +2025-03-29 20:41:27 | [rl2_trainer] epoch #206 | Computing loss before +2025-03-29 20:41:27 | [rl2_trainer] epoch #206 | Computing KL before +2025-03-29 20:41:27 | [rl2_trainer] epoch #206 | Optimizing +2025-03-29 20:41:30 | [rl2_trainer] epoch #206 | Computing KL after +2025-03-29 20:41:30 | [rl2_trainer] epoch #206 | Computing loss after +2025-03-29 20:41:30 | [rl2_trainer] epoch #206 | Saving snapshot... +2025-03-29 20:41:30 | [rl2_trainer] epoch #206 | Saved +2025-03-29 20:41:30 | [rl2_trainer] epoch #206 | Time 4091.14 s +2025-03-29 20:41:30 | [rl2_trainer] epoch #206 | EpochTime 18.99 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -68.55 +Average/AverageReturn -107.496 +Average/Iteration 206 +Average/MaxReturn -64.3628 +Average/MinReturn -164.994 +Average/NumEpisodes 8 +Average/StdReturn 30.9901 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977109 +TotalEnvSteps 165600 +__unnamed_task__/AverageDiscountedReturn -68.55 +__unnamed_task__/AverageReturn -107.496 +__unnamed_task__/Iteration 206 +__unnamed_task__/MaxReturn -64.3628 +__unnamed_task__/MinReturn -164.994 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.9901 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.83541 +policy/KL 0.0164737 +policy/KLBefore 0 +policy/LossAfter -0.273672 +policy/LossBefore -0.00320471 +policy/dLoss 0.270468 +---------------------------------------- --------------- +2025-03-29 20:41:46 | [rl2_trainer] epoch #207 | Optimizing policy... +2025-03-29 20:41:46 | [rl2_trainer] epoch #207 | Fitting baseline... +2025-03-29 20:41:46 | [rl2_trainer] epoch #207 | Computing loss before +2025-03-29 20:41:46 | [rl2_trainer] epoch #207 | Computing KL before +2025-03-29 20:41:46 | [rl2_trainer] epoch #207 | Optimizing +2025-03-29 20:41:48 | [rl2_trainer] epoch #207 | Computing KL after +2025-03-29 20:41:49 | [rl2_trainer] epoch #207 | Computing loss after +2025-03-29 20:41:49 | [rl2_trainer] epoch #207 | Saving snapshot... +2025-03-29 20:41:49 | [rl2_trainer] epoch #207 | Saved +2025-03-29 20:41:49 | [rl2_trainer] epoch #207 | Time 4110.05 s +2025-03-29 20:41:49 | [rl2_trainer] epoch #207 | EpochTime 18.91 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -57.8684 +Average/AverageReturn -86.9107 +Average/Iteration 207 +Average/MaxReturn -60.4242 +Average/MinReturn -119.83 +Average/NumEpisodes 8 +Average/StdReturn 20.0577 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972628 +TotalEnvSteps 166400 +__unnamed_task__/AverageDiscountedReturn -57.8684 +__unnamed_task__/AverageReturn -86.9107 +__unnamed_task__/Iteration 207 +__unnamed_task__/MaxReturn -60.4242 +__unnamed_task__/MinReturn -119.83 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.0577 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.83706 +policy/KL 0.0208931 +policy/KLBefore 0 +policy/LossAfter -0.153012 +policy/LossBefore -0.00911882 +policy/dLoss 0.143893 +---------------------------------------- --------------- +2025-03-29 20:42:05 | [rl2_trainer] epoch #208 | Optimizing policy... +2025-03-29 20:42:05 | [rl2_trainer] epoch #208 | Fitting baseline... +2025-03-29 20:42:05 | [rl2_trainer] epoch #208 | Computing loss before +2025-03-29 20:42:05 | [rl2_trainer] epoch #208 | Computing KL before +2025-03-29 20:42:05 | [rl2_trainer] epoch #208 | Optimizing +2025-03-29 20:42:08 | [rl2_trainer] epoch #208 | Computing KL after +2025-03-29 20:42:08 | [rl2_trainer] epoch #208 | Computing loss after +2025-03-29 20:42:08 | [rl2_trainer] epoch #208 | Saving snapshot... +2025-03-29 20:42:08 | [rl2_trainer] epoch #208 | Saved +2025-03-29 20:42:08 | [rl2_trainer] epoch #208 | Time 4129.10 s +2025-03-29 20:42:08 | [rl2_trainer] epoch #208 | EpochTime 19.04 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -61.0318 +Average/AverageReturn -90.7557 +Average/Iteration 208 +Average/MaxReturn -64.0916 +Average/MinReturn -155.685 +Average/NumEpisodes 8 +Average/StdReturn 26.6166 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.971689 +TotalEnvSteps 167200 +__unnamed_task__/AverageDiscountedReturn -61.0318 +__unnamed_task__/AverageReturn -90.7557 +__unnamed_task__/Iteration 208 +__unnamed_task__/MaxReturn -64.0916 +__unnamed_task__/MinReturn -155.685 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.6166 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.8384 +policy/KL 0.0187568 +policy/KLBefore 0 +policy/LossAfter -0.180349 +policy/LossBefore 0.0156458 +policy/dLoss 0.195994 +---------------------------------------- -------------- +2025-03-29 20:42:24 | [rl2_trainer] epoch #209 | Optimizing policy... +2025-03-29 20:42:24 | [rl2_trainer] epoch #209 | Fitting baseline... +2025-03-29 20:42:24 | [rl2_trainer] epoch #209 | Computing loss before +2025-03-29 20:42:24 | [rl2_trainer] epoch #209 | Computing KL before +2025-03-29 20:42:24 | [rl2_trainer] epoch #209 | Optimizing +2025-03-29 20:42:26 | [rl2_trainer] epoch #209 | Computing KL after +2025-03-29 20:42:26 | [rl2_trainer] epoch #209 | Computing loss after +2025-03-29 20:42:26 | [rl2_trainer] epoch #209 | Saving snapshot... +2025-03-29 20:42:26 | [rl2_trainer] epoch #209 | Saved +2025-03-29 20:42:26 | [rl2_trainer] epoch #209 | Time 4147.46 s +2025-03-29 20:42:26 | [rl2_trainer] epoch #209 | EpochTime 18.36 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -64.3946 +Average/AverageReturn -96.766 +Average/Iteration 209 +Average/MaxReturn -64.0647 +Average/MinReturn -149.615 +Average/NumEpisodes 8 +Average/StdReturn 22.9065 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.980106 +TotalEnvSteps 168000 +__unnamed_task__/AverageDiscountedReturn -64.3946 +__unnamed_task__/AverageReturn -96.766 +__unnamed_task__/Iteration 209 +__unnamed_task__/MaxReturn -64.0647 +__unnamed_task__/MinReturn -149.615 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.9065 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.84118 +policy/KL 0.0169933 +policy/KLBefore 0 +policy/LossAfter -0.161719 +policy/LossBefore 0.0101709 +policy/dLoss 0.17189 +---------------------------------------- -------------- +2025-03-29 20:42:41 | [rl2_trainer] epoch #210 | Optimizing policy... +2025-03-29 20:42:41 | [rl2_trainer] epoch #210 | Fitting baseline... +2025-03-29 20:42:41 | [rl2_trainer] epoch #210 | Computing loss before +2025-03-29 20:42:41 | [rl2_trainer] epoch #210 | Computing KL before +2025-03-29 20:42:41 | [rl2_trainer] epoch #210 | Optimizing +2025-03-29 20:42:44 | [rl2_trainer] epoch #210 | Computing KL after +2025-03-29 20:42:44 | [rl2_trainer] epoch #210 | Computing loss after +2025-03-29 20:42:44 | [rl2_trainer] epoch #210 | Saving snapshot... +2025-03-29 20:42:44 | [rl2_trainer] epoch #210 | Saved +2025-03-29 20:42:44 | [rl2_trainer] epoch #210 | Time 4165.12 s +2025-03-29 20:42:44 | [rl2_trainer] epoch #210 | EpochTime 17.65 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -69.4772 +Average/AverageReturn -107.177 +Average/Iteration 210 +Average/MaxReturn -83.2404 +Average/MinReturn -163.344 +Average/NumEpisodes 8 +Average/StdReturn 25.8481 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.933179 +TotalEnvSteps 168800 +__unnamed_task__/AverageDiscountedReturn -69.4772 +__unnamed_task__/AverageReturn -107.177 +__unnamed_task__/Iteration 210 +__unnamed_task__/MaxReturn -83.2404 +__unnamed_task__/MinReturn -163.344 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.8481 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.83337 +policy/KL 0.0226375 +policy/KLBefore 0 +policy/LossAfter -0.440935 +policy/LossBefore 0.00840187 +policy/dLoss 0.449337 +---------------------------------------- --------------- +2025-03-29 20:43:00 | [rl2_trainer] epoch #211 | Optimizing policy... +2025-03-29 20:43:00 | [rl2_trainer] epoch #211 | Fitting baseline... +2025-03-29 20:43:00 | [rl2_trainer] epoch #211 | Computing loss before +2025-03-29 20:43:00 | [rl2_trainer] epoch #211 | Computing KL before +2025-03-29 20:43:00 | [rl2_trainer] epoch #211 | Optimizing +2025-03-29 20:43:02 | [rl2_trainer] epoch #211 | Computing KL after +2025-03-29 20:43:02 | [rl2_trainer] epoch #211 | Computing loss after +2025-03-29 20:43:03 | [rl2_trainer] epoch #211 | Saving snapshot... +2025-03-29 20:43:03 | [rl2_trainer] epoch #211 | Saved +2025-03-29 20:43:03 | [rl2_trainer] epoch #211 | Time 4184.03 s +2025-03-29 20:43:03 | [rl2_trainer] epoch #211 | EpochTime 18.91 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.3098 +Average/AverageReturn -91.6761 +Average/Iteration 211 +Average/MaxReturn -41.9124 +Average/MinReturn -141.3 +Average/NumEpisodes 8 +Average/StdReturn 28.1623 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979293 +TotalEnvSteps 169600 +__unnamed_task__/AverageDiscountedReturn -60.3098 +__unnamed_task__/AverageReturn -91.6761 +__unnamed_task__/Iteration 211 +__unnamed_task__/MaxReturn -41.9124 +__unnamed_task__/MinReturn -141.3 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.1623 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.812 +policy/KL 0.0285643 +policy/KLBefore 0 +policy/LossAfter -0.168488 +policy/LossBefore -0.0115858 +policy/dLoss 0.156902 +---------------------------------------- -------------- +2025-03-29 20:43:18 | [rl2_trainer] epoch #212 | Optimizing policy... +2025-03-29 20:43:18 | [rl2_trainer] epoch #212 | Fitting baseline... +2025-03-29 20:43:18 | [rl2_trainer] epoch #212 | Computing loss before +2025-03-29 20:43:18 | [rl2_trainer] epoch #212 | Computing KL before +2025-03-29 20:43:18 | [rl2_trainer] epoch #212 | Optimizing +2025-03-29 20:43:21 | [rl2_trainer] epoch #212 | Computing KL after +2025-03-29 20:43:21 | [rl2_trainer] epoch #212 | Computing loss after +2025-03-29 20:43:21 | [rl2_trainer] epoch #212 | Saving snapshot... +2025-03-29 20:43:21 | [rl2_trainer] epoch #212 | Saved +2025-03-29 20:43:21 | [rl2_trainer] epoch #212 | Time 4202.22 s +2025-03-29 20:43:21 | [rl2_trainer] epoch #212 | EpochTime 18.19 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.1635 +Average/AverageReturn -112.016 +Average/Iteration 212 +Average/MaxReturn -61.5849 +Average/MinReturn -183.315 +Average/NumEpisodes 8 +Average/StdReturn 36.8415 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972509 +TotalEnvSteps 170400 +__unnamed_task__/AverageDiscountedReturn -71.1635 +__unnamed_task__/AverageReturn -112.016 +__unnamed_task__/Iteration 212 +__unnamed_task__/MaxReturn -61.5849 +__unnamed_task__/MinReturn -183.315 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.8415 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.7839 +policy/KL 0.0142955 +policy/KLBefore 0 +policy/LossAfter -0.337633 +policy/LossBefore -0.0224802 +policy/dLoss 0.315153 +---------------------------------------- -------------- +2025-03-29 20:43:37 | [rl2_trainer] epoch #213 | Optimizing policy... +2025-03-29 20:43:37 | [rl2_trainer] epoch #213 | Fitting baseline... +2025-03-29 20:43:37 | [rl2_trainer] epoch #213 | Computing loss before +2025-03-29 20:43:37 | [rl2_trainer] epoch #213 | Computing KL before +2025-03-29 20:43:37 | [rl2_trainer] epoch #213 | Optimizing +2025-03-29 20:43:39 | [rl2_trainer] epoch #213 | Computing KL after +2025-03-29 20:43:39 | [rl2_trainer] epoch #213 | Computing loss after +2025-03-29 20:43:39 | [rl2_trainer] epoch #213 | Saving snapshot... +2025-03-29 20:43:39 | [rl2_trainer] epoch #213 | Saved +2025-03-29 20:43:39 | [rl2_trainer] epoch #213 | Time 4220.93 s +2025-03-29 20:43:39 | [rl2_trainer] epoch #213 | EpochTime 18.70 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -68.009 +Average/AverageReturn -102.625 +Average/Iteration 213 +Average/MaxReturn -42.3093 +Average/MinReturn -155.06 +Average/NumEpisodes 8 +Average/StdReturn 29.7566 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.961413 +TotalEnvSteps 171200 +__unnamed_task__/AverageDiscountedReturn -68.009 +__unnamed_task__/AverageReturn -102.625 +__unnamed_task__/Iteration 213 +__unnamed_task__/MaxReturn -42.3093 +__unnamed_task__/MinReturn -155.06 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 29.7566 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.75574 +policy/KL 0.019589 +policy/KLBefore 0 +policy/LossAfter -0.236469 +policy/LossBefore 0.00601059 +policy/dLoss 0.242479 +---------------------------------------- --------------- +2025-03-29 20:43:56 | [rl2_trainer] epoch #214 | Optimizing policy... +2025-03-29 20:43:56 | [rl2_trainer] epoch #214 | Fitting baseline... +2025-03-29 20:43:56 | [rl2_trainer] epoch #214 | Computing loss before +2025-03-29 20:43:56 | [rl2_trainer] epoch #214 | Computing KL before +2025-03-29 20:43:56 | [rl2_trainer] epoch #214 | Optimizing +2025-03-29 20:43:58 | [rl2_trainer] epoch #214 | Computing KL after +2025-03-29 20:43:58 | [rl2_trainer] epoch #214 | Computing loss after +2025-03-29 20:43:58 | [rl2_trainer] epoch #214 | Saving snapshot... +2025-03-29 20:43:58 | [rl2_trainer] epoch #214 | Saved +2025-03-29 20:43:58 | [rl2_trainer] epoch #214 | Time 4239.92 s +2025-03-29 20:43:58 | [rl2_trainer] epoch #214 | EpochTime 18.99 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -61.8787 +Average/AverageReturn -92.2102 +Average/Iteration 214 +Average/MaxReturn -60.9524 +Average/MinReturn -143.897 +Average/NumEpisodes 8 +Average/StdReturn 27.3947 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983167 +TotalEnvSteps 172000 +__unnamed_task__/AverageDiscountedReturn -61.8787 +__unnamed_task__/AverageReturn -92.2102 +__unnamed_task__/Iteration 214 +__unnamed_task__/MaxReturn -60.9524 +__unnamed_task__/MinReturn -143.897 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.3947 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.74184 +policy/KL 0.0120511 +policy/KLBefore 0 +policy/LossAfter -0.125451 +policy/LossBefore -0.00537568 +policy/dLoss 0.120075 +---------------------------------------- --------------- +2025-03-29 20:44:14 | [rl2_trainer] epoch #215 | Optimizing policy... +2025-03-29 20:44:14 | [rl2_trainer] epoch #215 | Fitting baseline... +2025-03-29 20:44:14 | [rl2_trainer] epoch #215 | Computing loss before +2025-03-29 20:44:14 | [rl2_trainer] epoch #215 | Computing KL before +2025-03-29 20:44:14 | [rl2_trainer] epoch #215 | Optimizing +2025-03-29 20:44:17 | [rl2_trainer] epoch #215 | Computing KL after +2025-03-29 20:44:17 | [rl2_trainer] epoch #215 | Computing loss after +2025-03-29 20:44:17 | [rl2_trainer] epoch #215 | Saving snapshot... +2025-03-29 20:44:17 | [rl2_trainer] epoch #215 | Saved +2025-03-29 20:44:17 | [rl2_trainer] epoch #215 | Time 4258.35 s +2025-03-29 20:44:17 | [rl2_trainer] epoch #215 | EpochTime 18.43 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5573 +Average/AverageReturn -108.82 +Average/Iteration 215 +Average/MaxReturn -54.3207 +Average/MinReturn -169.119 +Average/NumEpisodes 8 +Average/StdReturn 36.9552 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966084 +TotalEnvSteps 172800 +__unnamed_task__/AverageDiscountedReturn -71.5573 +__unnamed_task__/AverageReturn -108.82 +__unnamed_task__/Iteration 215 +__unnamed_task__/MaxReturn -54.3207 +__unnamed_task__/MinReturn -169.119 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.9552 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.72224 +policy/KL 0.0266751 +policy/KLBefore 0 +policy/LossAfter -0.309205 +policy/LossBefore 0.0213285 +policy/dLoss 0.330534 +---------------------------------------- -------------- +2025-03-29 20:44:33 | [rl2_trainer] epoch #216 | Optimizing policy... +2025-03-29 20:44:34 | [rl2_trainer] epoch #216 | Fitting baseline... +2025-03-29 20:44:34 | [rl2_trainer] epoch #216 | Computing loss before +2025-03-29 20:44:34 | [rl2_trainer] epoch #216 | Computing KL before +2025-03-29 20:44:34 | [rl2_trainer] epoch #216 | Optimizing +2025-03-29 20:44:36 | [rl2_trainer] epoch #216 | Computing KL after +2025-03-29 20:44:36 | [rl2_trainer] epoch #216 | Computing loss after +2025-03-29 20:44:36 | [rl2_trainer] epoch #216 | Saving snapshot... +2025-03-29 20:44:36 | [rl2_trainer] epoch #216 | Saved +2025-03-29 20:44:36 | [rl2_trainer] epoch #216 | Time 4277.35 s +2025-03-29 20:44:36 | [rl2_trainer] epoch #216 | EpochTime 18.99 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -69.6392 +Average/AverageReturn -105.577 +Average/Iteration 216 +Average/MaxReturn -76.2457 +Average/MinReturn -184.678 +Average/NumEpisodes 8 +Average/StdReturn 38.9265 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979156 +TotalEnvSteps 173600 +__unnamed_task__/AverageDiscountedReturn -69.6392 +__unnamed_task__/AverageReturn -105.577 +__unnamed_task__/Iteration 216 +__unnamed_task__/MaxReturn -76.2457 +__unnamed_task__/MinReturn -184.678 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 38.9265 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.70547 +policy/KL 0.0163036 +policy/KLBefore 0 +policy/LossAfter -0.277992 +policy/LossBefore -0.0291891 +policy/dLoss 0.248803 +---------------------------------------- -------------- +2025-03-29 20:44:52 | [rl2_trainer] epoch #217 | Optimizing policy... +2025-03-29 20:44:52 | [rl2_trainer] epoch #217 | Fitting baseline... +2025-03-29 20:44:52 | [rl2_trainer] epoch #217 | Computing loss before +2025-03-29 20:44:52 | [rl2_trainer] epoch #217 | Computing KL before +2025-03-29 20:44:52 | [rl2_trainer] epoch #217 | Optimizing +2025-03-29 20:44:54 | [rl2_trainer] epoch #217 | Computing KL after +2025-03-29 20:44:54 | [rl2_trainer] epoch #217 | Computing loss after +2025-03-29 20:44:54 | [rl2_trainer] epoch #217 | Saving snapshot... +2025-03-29 20:44:54 | [rl2_trainer] epoch #217 | Saved +2025-03-29 20:44:54 | [rl2_trainer] epoch #217 | Time 4295.59 s +2025-03-29 20:44:54 | [rl2_trainer] epoch #217 | EpochTime 18.24 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.2892 +Average/AverageReturn -111.553 +Average/Iteration 217 +Average/MaxReturn -67.8512 +Average/MinReturn -168.588 +Average/NumEpisodes 8 +Average/StdReturn 34.1084 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.971906 +TotalEnvSteps 174400 +__unnamed_task__/AverageDiscountedReturn -71.2892 +__unnamed_task__/AverageReturn -111.553 +__unnamed_task__/Iteration 217 +__unnamed_task__/MaxReturn -67.8512 +__unnamed_task__/MinReturn -168.588 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.1084 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.68236 +policy/KL 0.0201695 +policy/KLBefore 0 +policy/LossAfter -0.314191 +policy/LossBefore 0.0186683 +policy/dLoss 0.33286 +---------------------------------------- -------------- +2025-03-29 20:45:09 | [rl2_trainer] epoch #218 | Optimizing policy... +2025-03-29 20:45:09 | [rl2_trainer] epoch #218 | Fitting baseline... +2025-03-29 20:45:09 | [rl2_trainer] epoch #218 | Computing loss before +2025-03-29 20:45:09 | [rl2_trainer] epoch #218 | Computing KL before +2025-03-29 20:45:09 | [rl2_trainer] epoch #218 | Optimizing +2025-03-29 20:45:11 | [rl2_trainer] epoch #218 | Computing KL after +2025-03-29 20:45:11 | [rl2_trainer] epoch #218 | Computing loss after +2025-03-29 20:45:11 | [rl2_trainer] epoch #218 | Saving snapshot... +2025-03-29 20:45:11 | [rl2_trainer] epoch #218 | Saved +2025-03-29 20:45:11 | [rl2_trainer] epoch #218 | Time 4312.97 s +2025-03-29 20:45:11 | [rl2_trainer] epoch #218 | EpochTime 17.37 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -80.5264 +Average/AverageReturn -125.359 +Average/Iteration 218 +Average/MaxReturn -92.7773 +Average/MinReturn -190.285 +Average/NumEpisodes 8 +Average/StdReturn 32.6377 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966656 +TotalEnvSteps 175200 +__unnamed_task__/AverageDiscountedReturn -80.5264 +__unnamed_task__/AverageReturn -125.359 +__unnamed_task__/Iteration 218 +__unnamed_task__/MaxReturn -92.7773 +__unnamed_task__/MinReturn -190.285 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.6377 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.67181 +policy/KL 0.0213613 +policy/KLBefore 0 +policy/LossAfter -0.447719 +policy/LossBefore -0.00669445 +policy/dLoss 0.441025 +---------------------------------------- --------------- +2025-03-29 20:45:28 | [rl2_trainer] epoch #219 | Optimizing policy... +2025-03-29 20:45:28 | [rl2_trainer] epoch #219 | Fitting baseline... +2025-03-29 20:45:28 | [rl2_trainer] epoch #219 | Computing loss before +2025-03-29 20:45:28 | [rl2_trainer] epoch #219 | Computing KL before +2025-03-29 20:45:28 | [rl2_trainer] epoch #219 | Optimizing +2025-03-29 20:45:30 | [rl2_trainer] epoch #219 | Computing KL after +2025-03-29 20:45:30 | [rl2_trainer] epoch #219 | Computing loss after +2025-03-29 20:45:30 | [rl2_trainer] epoch #219 | Saving snapshot... +2025-03-29 20:45:30 | [rl2_trainer] epoch #219 | Saved +2025-03-29 20:45:30 | [rl2_trainer] epoch #219 | Time 4331.55 s +2025-03-29 20:45:30 | [rl2_trainer] epoch #219 | EpochTime 18.57 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -66.0123 +Average/AverageReturn -96.4197 +Average/Iteration 219 +Average/MaxReturn -57.1459 +Average/MinReturn -148.297 +Average/NumEpisodes 8 +Average/StdReturn 30.6058 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.948398 +TotalEnvSteps 176000 +__unnamed_task__/AverageDiscountedReturn -66.0123 +__unnamed_task__/AverageReturn -96.4197 +__unnamed_task__/Iteration 219 +__unnamed_task__/MaxReturn -57.1459 +__unnamed_task__/MinReturn -148.297 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.6058 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.66201 +policy/KL 0.025827 +policy/KLBefore 0 +policy/LossAfter -0.34684 +policy/LossBefore -0.0342628 +policy/dLoss 0.312577 +---------------------------------------- -------------- +2025-03-29 20:45:46 | [rl2_trainer] epoch #220 | Optimizing policy... +2025-03-29 20:45:46 | [rl2_trainer] epoch #220 | Fitting baseline... +2025-03-29 20:45:46 | [rl2_trainer] epoch #220 | Computing loss before +2025-03-29 20:45:46 | [rl2_trainer] epoch #220 | Computing KL before +2025-03-29 20:45:46 | [rl2_trainer] epoch #220 | Optimizing +2025-03-29 20:45:48 | [rl2_trainer] epoch #220 | Computing KL after +2025-03-29 20:45:48 | [rl2_trainer] epoch #220 | Computing loss after +2025-03-29 20:45:48 | [rl2_trainer] epoch #220 | Saving snapshot... +2025-03-29 20:45:48 | [rl2_trainer] epoch #220 | Saved +2025-03-29 20:45:48 | [rl2_trainer] epoch #220 | Time 4349.63 s +2025-03-29 20:45:48 | [rl2_trainer] epoch #220 | EpochTime 18.08 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -69.3213 +Average/AverageReturn -101.359 +Average/Iteration 220 +Average/MaxReturn -62.2596 +Average/MinReturn -149.146 +Average/NumEpisodes 8 +Average/StdReturn 30.0147 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96561 +TotalEnvSteps 176800 +__unnamed_task__/AverageDiscountedReturn -69.3213 +__unnamed_task__/AverageReturn -101.359 +__unnamed_task__/Iteration 220 +__unnamed_task__/MaxReturn -62.2596 +__unnamed_task__/MinReturn -149.146 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.0147 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.65706 +policy/KL 0.0211067 +policy/KLBefore 0 +policy/LossAfter -0.292931 +policy/LossBefore 0.00509956 +policy/dLoss 0.298031 +---------------------------------------- --------------- +2025-03-29 20:46:03 | [rl2_trainer] epoch #221 | Optimizing policy... +2025-03-29 20:46:03 | [rl2_trainer] epoch #221 | Fitting baseline... +2025-03-29 20:46:03 | [rl2_trainer] epoch #221 | Computing loss before +2025-03-29 20:46:03 | [rl2_trainer] epoch #221 | Computing KL before +2025-03-29 20:46:03 | [rl2_trainer] epoch #221 | Optimizing +2025-03-29 20:46:06 | [rl2_trainer] epoch #221 | Computing KL after +2025-03-29 20:46:06 | [rl2_trainer] epoch #221 | Computing loss after +2025-03-29 20:46:06 | [rl2_trainer] epoch #221 | Saving snapshot... +2025-03-29 20:46:06 | [rl2_trainer] epoch #221 | Saved +2025-03-29 20:46:06 | [rl2_trainer] epoch #221 | Time 4367.12 s +2025-03-29 20:46:06 | [rl2_trainer] epoch #221 | EpochTime 17.49 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -73.6579 +Average/AverageReturn -113.08 +Average/Iteration 221 +Average/MaxReturn -79.2137 +Average/MinReturn -158.961 +Average/NumEpisodes 8 +Average/StdReturn 29.15 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974341 +TotalEnvSteps 177600 +__unnamed_task__/AverageDiscountedReturn -73.6579 +__unnamed_task__/AverageReturn -113.08 +__unnamed_task__/Iteration 221 +__unnamed_task__/MaxReturn -79.2137 +__unnamed_task__/MinReturn -158.961 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 29.15 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.64674 +policy/KL 0.0149373 +policy/KLBefore 0 +policy/LossAfter -0.280669 +policy/LossBefore -0.00176039 +policy/dLoss 0.278909 +---------------------------------------- --------------- +2025-03-29 20:46:21 | [rl2_trainer] epoch #222 | Optimizing policy... +2025-03-29 20:46:21 | [rl2_trainer] epoch #222 | Fitting baseline... +2025-03-29 20:46:21 | [rl2_trainer] epoch #222 | Computing loss before +2025-03-29 20:46:21 | [rl2_trainer] epoch #222 | Computing KL before +2025-03-29 20:46:22 | [rl2_trainer] epoch #222 | Optimizing +2025-03-29 20:46:24 | [rl2_trainer] epoch #222 | Computing KL after +2025-03-29 20:46:24 | [rl2_trainer] epoch #222 | Computing loss after +2025-03-29 20:46:24 | [rl2_trainer] epoch #222 | Saving snapshot... +2025-03-29 20:46:24 | [rl2_trainer] epoch #222 | Saved +2025-03-29 20:46:24 | [rl2_trainer] epoch #222 | Time 4385.34 s +2025-03-29 20:46:24 | [rl2_trainer] epoch #222 | EpochTime 18.22 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -68.7957 +Average/AverageReturn -103.003 +Average/Iteration 222 +Average/MaxReturn -60.0523 +Average/MinReturn -179.819 +Average/NumEpisodes 8 +Average/StdReturn 36.3745 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.941666 +TotalEnvSteps 178400 +__unnamed_task__/AverageDiscountedReturn -68.7957 +__unnamed_task__/AverageReturn -103.003 +__unnamed_task__/Iteration 222 +__unnamed_task__/MaxReturn -60.0523 +__unnamed_task__/MinReturn -179.819 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 36.3745 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.6195 +policy/KL 0.0423796 +policy/KLBefore 0 +policy/LossAfter -0.371611 +policy/LossBefore -0.0343357 +policy/dLoss 0.337276 +---------------------------------------- -------------- +2025-03-29 20:46:40 | [rl2_trainer] epoch #223 | Optimizing policy... +2025-03-29 20:46:40 | [rl2_trainer] epoch #223 | Fitting baseline... +2025-03-29 20:46:40 | [rl2_trainer] epoch #223 | Computing loss before +2025-03-29 20:46:40 | [rl2_trainer] epoch #223 | Computing KL before +2025-03-29 20:46:40 | [rl2_trainer] epoch #223 | Optimizing +2025-03-29 20:46:42 | [rl2_trainer] epoch #223 | Computing KL after +2025-03-29 20:46:42 | [rl2_trainer] epoch #223 | Computing loss after +2025-03-29 20:46:42 | [rl2_trainer] epoch #223 | Saving snapshot... +2025-03-29 20:46:42 | [rl2_trainer] epoch #223 | Saved +2025-03-29 20:46:42 | [rl2_trainer] epoch #223 | Time 4403.73 s +2025-03-29 20:46:42 | [rl2_trainer] epoch #223 | EpochTime 18.39 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -70.0569 +Average/AverageReturn -112.859 +Average/Iteration 223 +Average/MaxReturn -74.3003 +Average/MinReturn -153.967 +Average/NumEpisodes 8 +Average/StdReturn 29.2769 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983251 +TotalEnvSteps 179200 +__unnamed_task__/AverageDiscountedReturn -70.0569 +__unnamed_task__/AverageReturn -112.859 +__unnamed_task__/Iteration 223 +__unnamed_task__/MaxReturn -74.3003 +__unnamed_task__/MinReturn -153.967 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 29.2769 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.60003 +policy/KL 0.0312792 +policy/KLBefore 0 +policy/LossAfter -0.253987 +policy/LossBefore -0.0296664 +policy/dLoss 0.224321 +---------------------------------------- -------------- +2025-03-29 20:46:58 | [rl2_trainer] epoch #224 | Optimizing policy... +2025-03-29 20:46:58 | [rl2_trainer] epoch #224 | Fitting baseline... +2025-03-29 20:46:58 | [rl2_trainer] epoch #224 | Computing loss before +2025-03-29 20:46:58 | [rl2_trainer] epoch #224 | Computing KL before +2025-03-29 20:46:58 | [rl2_trainer] epoch #224 | Optimizing +2025-03-29 20:47:00 | [rl2_trainer] epoch #224 | Computing KL after +2025-03-29 20:47:00 | [rl2_trainer] epoch #224 | Computing loss after +2025-03-29 20:47:00 | [rl2_trainer] epoch #224 | Saving snapshot... +2025-03-29 20:47:00 | [rl2_trainer] epoch #224 | Saved +2025-03-29 20:47:00 | [rl2_trainer] epoch #224 | Time 4421.47 s +2025-03-29 20:47:00 | [rl2_trainer] epoch #224 | EpochTime 17.74 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4665 +Average/AverageReturn -111.626 +Average/Iteration 224 +Average/MaxReturn -76.3032 +Average/MinReturn -166.703 +Average/NumEpisodes 8 +Average/StdReturn 33.1991 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.956168 +TotalEnvSteps 180000 +__unnamed_task__/AverageDiscountedReturn -71.4665 +__unnamed_task__/AverageReturn -111.626 +__unnamed_task__/Iteration 224 +__unnamed_task__/MaxReturn -76.3032 +__unnamed_task__/MinReturn -166.703 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.1991 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.5791 +policy/KL 0.0326367 +policy/KLBefore 0 +policy/LossAfter -0.416952 +policy/LossBefore 0.0251177 +policy/dLoss 0.44207 +---------------------------------------- -------------- +2025-03-29 20:47:15 | [rl2_trainer] epoch #225 | Optimizing policy... +2025-03-29 20:47:15 | [rl2_trainer] epoch #225 | Fitting baseline... +2025-03-29 20:47:15 | [rl2_trainer] epoch #225 | Computing loss before +2025-03-29 20:47:15 | [rl2_trainer] epoch #225 | Computing KL before +2025-03-29 20:47:15 | [rl2_trainer] epoch #225 | Optimizing +2025-03-29 20:47:17 | [rl2_trainer] epoch #225 | Computing KL after +2025-03-29 20:47:17 | [rl2_trainer] epoch #225 | Computing loss after +2025-03-29 20:47:17 | [rl2_trainer] epoch #225 | Saving snapshot... +2025-03-29 20:47:17 | [rl2_trainer] epoch #225 | Saved +2025-03-29 20:47:17 | [rl2_trainer] epoch #225 | Time 4438.96 s +2025-03-29 20:47:17 | [rl2_trainer] epoch #225 | EpochTime 17.49 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -71.972 +Average/AverageReturn -112.561 +Average/Iteration 225 +Average/MaxReturn -84.9644 +Average/MinReturn -148.128 +Average/NumEpisodes 8 +Average/StdReturn 23.9835 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966655 +TotalEnvSteps 180800 +__unnamed_task__/AverageDiscountedReturn -71.972 +__unnamed_task__/AverageReturn -112.561 +__unnamed_task__/Iteration 225 +__unnamed_task__/MaxReturn -84.9644 +__unnamed_task__/MinReturn -148.128 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.9835 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57325 +policy/KL 0.0237262 +policy/KLBefore 0 +policy/LossAfter -0.335477 +policy/LossBefore -0.00225647 +policy/dLoss 0.333221 +---------------------------------------- --------------- +2025-03-29 20:47:34 | [rl2_trainer] epoch #226 | Optimizing policy... +2025-03-29 20:47:34 | [rl2_trainer] epoch #226 | Fitting baseline... +2025-03-29 20:47:34 | [rl2_trainer] epoch #226 | Computing loss before +2025-03-29 20:47:34 | [rl2_trainer] epoch #226 | Computing KL before +2025-03-29 20:47:34 | [rl2_trainer] epoch #226 | Optimizing +2025-03-29 20:47:36 | [rl2_trainer] epoch #226 | Computing KL after +2025-03-29 20:47:36 | [rl2_trainer] epoch #226 | Computing loss after +2025-03-29 20:47:36 | [rl2_trainer] epoch #226 | Saving snapshot... +2025-03-29 20:47:36 | [rl2_trainer] epoch #226 | Saved +2025-03-29 20:47:36 | [rl2_trainer] epoch #226 | Time 4457.64 s +2025-03-29 20:47:36 | [rl2_trainer] epoch #226 | EpochTime 18.67 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -67.1051 +Average/AverageReturn -103.315 +Average/Iteration 226 +Average/MaxReturn -46.0403 +Average/MinReturn -166.276 +Average/NumEpisodes 8 +Average/StdReturn 37.0521 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.976647 +TotalEnvSteps 181600 +__unnamed_task__/AverageDiscountedReturn -67.1051 +__unnamed_task__/AverageReturn -103.315 +__unnamed_task__/Iteration 226 +__unnamed_task__/MaxReturn -46.0403 +__unnamed_task__/MinReturn -166.276 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.0521 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57156 +policy/KL 0.0169358 +policy/KLBefore 0 +policy/LossAfter -0.250153 +policy/LossBefore 0.00866082 +policy/dLoss 0.258814 +---------------------------------------- --------------- +2025-03-29 20:47:51 | [rl2_trainer] epoch #227 | Optimizing policy... +2025-03-29 20:47:51 | [rl2_trainer] epoch #227 | Fitting baseline... +2025-03-29 20:47:51 | [rl2_trainer] epoch #227 | Computing loss before +2025-03-29 20:47:51 | [rl2_trainer] epoch #227 | Computing KL before +2025-03-29 20:47:52 | [rl2_trainer] epoch #227 | Optimizing +2025-03-29 20:47:54 | [rl2_trainer] epoch #227 | Computing KL after +2025-03-29 20:47:54 | [rl2_trainer] epoch #227 | Computing loss after +2025-03-29 20:47:54 | [rl2_trainer] epoch #227 | Saving snapshot... +2025-03-29 20:47:54 | [rl2_trainer] epoch #227 | Saved +2025-03-29 20:47:54 | [rl2_trainer] epoch #227 | Time 4475.30 s +2025-03-29 20:47:54 | [rl2_trainer] epoch #227 | EpochTime 17.66 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -79.3797 +Average/AverageReturn -124.761 +Average/Iteration 227 +Average/MaxReturn -79.0864 +Average/MinReturn -190.167 +Average/NumEpisodes 8 +Average/StdReturn 34.8783 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957516 +TotalEnvSteps 182400 +__unnamed_task__/AverageDiscountedReturn -79.3797 +__unnamed_task__/AverageReturn -124.761 +__unnamed_task__/Iteration 227 +__unnamed_task__/MaxReturn -79.0864 +__unnamed_task__/MinReturn -190.167 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.8783 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57437 +policy/KL 0.0270517 +policy/KLBefore 0 +policy/LossAfter -0.429121 +policy/LossBefore 0.0178791 +policy/dLoss 0.447 +---------------------------------------- -------------- +2025-03-29 20:48:10 | [rl2_trainer] epoch #228 | Optimizing policy... +2025-03-29 20:48:10 | [rl2_trainer] epoch #228 | Fitting baseline... +2025-03-29 20:48:10 | [rl2_trainer] epoch #228 | Computing loss before +2025-03-29 20:48:10 | [rl2_trainer] epoch #228 | Computing KL before +2025-03-29 20:48:10 | [rl2_trainer] epoch #228 | Optimizing +2025-03-29 20:48:13 | [rl2_trainer] epoch #228 | Computing KL after +2025-03-29 20:48:13 | [rl2_trainer] epoch #228 | Computing loss after +2025-03-29 20:48:13 | [rl2_trainer] epoch #228 | Saving snapshot... +2025-03-29 20:48:13 | [rl2_trainer] epoch #228 | Saved +2025-03-29 20:48:13 | [rl2_trainer] epoch #228 | Time 4494.15 s +2025-03-29 20:48:13 | [rl2_trainer] epoch #228 | EpochTime 18.85 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -66.1594 +Average/AverageReturn -103.254 +Average/Iteration 228 +Average/MaxReturn -40.972 +Average/MinReturn -175.18 +Average/NumEpisodes 8 +Average/StdReturn 37.3915 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.959908 +TotalEnvSteps 183200 +__unnamed_task__/AverageDiscountedReturn -66.1594 +__unnamed_task__/AverageReturn -103.254 +__unnamed_task__/Iteration 228 +__unnamed_task__/MaxReturn -40.972 +__unnamed_task__/MinReturn -175.18 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.3915 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.56648 +policy/KL 0.0158399 +policy/KLBefore 0 +policy/LossAfter -0.31243 +policy/LossBefore 0.00227759 +policy/dLoss 0.314707 +---------------------------------------- --------------- +2025-03-29 20:48:29 | [rl2_trainer] epoch #229 | Optimizing policy... +2025-03-29 20:48:29 | [rl2_trainer] epoch #229 | Fitting baseline... +2025-03-29 20:48:29 | [rl2_trainer] epoch #229 | Computing loss before +2025-03-29 20:48:29 | [rl2_trainer] epoch #229 | Computing KL before +2025-03-29 20:48:29 | [rl2_trainer] epoch #229 | Optimizing +2025-03-29 20:48:31 | [rl2_trainer] epoch #229 | Computing KL after +2025-03-29 20:48:31 | [rl2_trainer] epoch #229 | Computing loss after +2025-03-29 20:48:31 | [rl2_trainer] epoch #229 | Saving snapshot... +2025-03-29 20:48:31 | [rl2_trainer] epoch #229 | Saved +2025-03-29 20:48:31 | [rl2_trainer] epoch #229 | Time 4512.96 s +2025-03-29 20:48:31 | [rl2_trainer] epoch #229 | EpochTime 18.80 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -65.7206 +Average/AverageReturn -103.661 +Average/Iteration 229 +Average/MaxReturn -50.1584 +Average/MinReturn -169.546 +Average/NumEpisodes 8 +Average/StdReturn 33.9141 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.909227 +TotalEnvSteps 184000 +__unnamed_task__/AverageDiscountedReturn -65.7206 +__unnamed_task__/AverageReturn -103.661 +__unnamed_task__/Iteration 229 +__unnamed_task__/MaxReturn -50.1584 +__unnamed_task__/MinReturn -169.546 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.9141 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.54429 +policy/KL 0.0375655 +policy/KLBefore 0 +policy/LossAfter -0.455705 +policy/LossBefore -0.00535 +policy/dLoss 0.450355 +---------------------------------------- -------------- +2025-03-29 20:48:48 | [rl2_trainer] epoch #230 | Optimizing policy... +2025-03-29 20:48:48 | [rl2_trainer] epoch #230 | Fitting baseline... +2025-03-29 20:48:48 | [rl2_trainer] epoch #230 | Computing loss before +2025-03-29 20:48:48 | [rl2_trainer] epoch #230 | Computing KL before +2025-03-29 20:48:48 | [rl2_trainer] epoch #230 | Optimizing +2025-03-29 20:48:50 | [rl2_trainer] epoch #230 | Computing KL after +2025-03-29 20:48:50 | [rl2_trainer] epoch #230 | Computing loss after +2025-03-29 20:48:51 | [rl2_trainer] epoch #230 | Saving snapshot... +2025-03-29 20:48:51 | [rl2_trainer] epoch #230 | Saved +2025-03-29 20:48:51 | [rl2_trainer] epoch #230 | Time 4532.01 s +2025-03-29 20:48:51 | [rl2_trainer] epoch #230 | EpochTime 19.05 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -59.1632 +Average/AverageReturn -88.8906 +Average/Iteration 230 +Average/MaxReturn -43.0507 +Average/MinReturn -135.837 +Average/NumEpisodes 8 +Average/StdReturn 28.3418 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972861 +TotalEnvSteps 184800 +__unnamed_task__/AverageDiscountedReturn -59.1632 +__unnamed_task__/AverageReturn -88.8906 +__unnamed_task__/Iteration 230 +__unnamed_task__/MaxReturn -43.0507 +__unnamed_task__/MinReturn -135.837 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.3418 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.51673 +policy/KL 0.014047 +policy/KLBefore 0 +policy/LossAfter -0.216286 +policy/LossBefore -0.0241366 +policy/dLoss 0.192149 +---------------------------------------- -------------- +2025-03-29 20:49:08 | [rl2_trainer] epoch #231 | Optimizing policy... +2025-03-29 20:49:08 | [rl2_trainer] epoch #231 | Fitting baseline... +2025-03-29 20:49:08 | [rl2_trainer] epoch #231 | Computing loss before +2025-03-29 20:49:08 | [rl2_trainer] epoch #231 | Computing KL before +2025-03-29 20:49:08 | [rl2_trainer] epoch #231 | Optimizing +2025-03-29 20:49:10 | [rl2_trainer] epoch #231 | Computing KL after +2025-03-29 20:49:10 | [rl2_trainer] epoch #231 | Computing loss after +2025-03-29 20:49:10 | [rl2_trainer] epoch #231 | Saving snapshot... +2025-03-29 20:49:10 | [rl2_trainer] epoch #231 | Saved +2025-03-29 20:49:10 | [rl2_trainer] epoch #231 | Time 4551.68 s +2025-03-29 20:49:10 | [rl2_trainer] epoch #231 | EpochTime 19.66 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.9638 +Average/AverageReturn -90.9337 +Average/Iteration 231 +Average/MaxReturn -57.5523 +Average/MinReturn -146.004 +Average/NumEpisodes 8 +Average/StdReturn 25.0568 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96087 +TotalEnvSteps 185600 +__unnamed_task__/AverageDiscountedReturn -60.9638 +__unnamed_task__/AverageReturn -90.9337 +__unnamed_task__/Iteration 231 +__unnamed_task__/MaxReturn -57.5523 +__unnamed_task__/MinReturn -146.004 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.0568 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.50469 +policy/KL 0.0189194 +policy/KLBefore 0 +policy/LossAfter -0.20339 +policy/LossBefore -0.0107998 +policy/dLoss 0.19259 +---------------------------------------- -------------- +2025-03-29 20:49:26 | [rl2_trainer] epoch #232 | Optimizing policy... +2025-03-29 20:49:26 | [rl2_trainer] epoch #232 | Fitting baseline... +2025-03-29 20:49:26 | [rl2_trainer] epoch #232 | Computing loss before +2025-03-29 20:49:26 | [rl2_trainer] epoch #232 | Computing KL before +2025-03-29 20:49:27 | [rl2_trainer] epoch #232 | Optimizing +2025-03-29 20:49:29 | [rl2_trainer] epoch #232 | Computing KL after +2025-03-29 20:49:29 | [rl2_trainer] epoch #232 | Computing loss after +2025-03-29 20:49:29 | [rl2_trainer] epoch #232 | Saving snapshot... +2025-03-29 20:49:29 | [rl2_trainer] epoch #232 | Saved +2025-03-29 20:49:29 | [rl2_trainer] epoch #232 | Time 4570.31 s +2025-03-29 20:49:29 | [rl2_trainer] epoch #232 | EpochTime 18.63 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -68.3369 +Average/AverageReturn -109.143 +Average/Iteration 232 +Average/MaxReturn -68.2935 +Average/MinReturn -158.837 +Average/NumEpisodes 8 +Average/StdReturn 32.6366 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.958559 +TotalEnvSteps 186400 +__unnamed_task__/AverageDiscountedReturn -68.3369 +__unnamed_task__/AverageReturn -109.143 +__unnamed_task__/Iteration 232 +__unnamed_task__/MaxReturn -68.2935 +__unnamed_task__/MinReturn -158.837 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.6366 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.50115 +policy/KL 0.0306213 +policy/KLBefore 0 +policy/LossAfter -0.463289 +policy/LossBefore -0.0416667 +policy/dLoss 0.421623 +---------------------------------------- -------------- +2025-03-29 20:49:45 | [rl2_trainer] epoch #233 | Optimizing policy... +2025-03-29 20:49:45 | [rl2_trainer] epoch #233 | Fitting baseline... +2025-03-29 20:49:45 | [rl2_trainer] epoch #233 | Computing loss before +2025-03-29 20:49:45 | [rl2_trainer] epoch #233 | Computing KL before +2025-03-29 20:49:45 | [rl2_trainer] epoch #233 | Optimizing +2025-03-29 20:49:48 | [rl2_trainer] epoch #233 | Computing KL after +2025-03-29 20:49:48 | [rl2_trainer] epoch #233 | Computing loss after +2025-03-29 20:49:48 | [rl2_trainer] epoch #233 | Saving snapshot... +2025-03-29 20:49:48 | [rl2_trainer] epoch #233 | Saved +2025-03-29 20:49:48 | [rl2_trainer] epoch #233 | Time 4589.22 s +2025-03-29 20:49:48 | [rl2_trainer] epoch #233 | EpochTime 18.91 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -63.8607 +Average/AverageReturn -99.407 +Average/Iteration 233 +Average/MaxReturn -41.9493 +Average/MinReturn -143.325 +Average/NumEpisodes 8 +Average/StdReturn 30.1649 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.950917 +TotalEnvSteps 187200 +__unnamed_task__/AverageDiscountedReturn -63.8607 +__unnamed_task__/AverageReturn -99.407 +__unnamed_task__/Iteration 233 +__unnamed_task__/MaxReturn -41.9493 +__unnamed_task__/MinReturn -143.325 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.1649 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.50735 +policy/KL 0.0188983 +policy/KLBefore 0 +policy/LossAfter -0.320272 +policy/LossBefore 0.0109441 +policy/dLoss 0.331216 +---------------------------------------- -------------- +2025-03-29 20:50:03 | [rl2_trainer] epoch #234 | Optimizing policy... +2025-03-29 20:50:03 | [rl2_trainer] epoch #234 | Fitting baseline... +2025-03-29 20:50:03 | [rl2_trainer] epoch #234 | Computing loss before +2025-03-29 20:50:03 | [rl2_trainer] epoch #234 | Computing KL before +2025-03-29 20:50:03 | [rl2_trainer] epoch #234 | Optimizing +2025-03-29 20:50:05 | [rl2_trainer] epoch #234 | Computing KL after +2025-03-29 20:50:05 | [rl2_trainer] epoch #234 | Computing loss after +2025-03-29 20:50:05 | [rl2_trainer] epoch #234 | Saving snapshot... +2025-03-29 20:50:05 | [rl2_trainer] epoch #234 | Saved +2025-03-29 20:50:05 | [rl2_trainer] epoch #234 | Time 4606.96 s +2025-03-29 20:50:05 | [rl2_trainer] epoch #234 | EpochTime 17.74 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -75.73 +Average/AverageReturn -119.661 +Average/Iteration 234 +Average/MaxReturn -82.6578 +Average/MinReturn -166.856 +Average/NumEpisodes 8 +Average/StdReturn 29.506 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.955404 +TotalEnvSteps 188000 +__unnamed_task__/AverageDiscountedReturn -75.73 +__unnamed_task__/AverageReturn -119.661 +__unnamed_task__/Iteration 234 +__unnamed_task__/MaxReturn -82.6578 +__unnamed_task__/MinReturn -166.856 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 29.506 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.50867 +policy/KL 0.0247302 +policy/KLBefore 0 +policy/LossAfter -0.477331 +policy/LossBefore 0.000596862 +policy/dLoss 0.477928 +---------------------------------------- ---------------- +2025-03-29 20:50:21 | [rl2_trainer] epoch #235 | Optimizing policy... +2025-03-29 20:50:21 | [rl2_trainer] epoch #235 | Fitting baseline... +2025-03-29 20:50:21 | [rl2_trainer] epoch #235 | Computing loss before +2025-03-29 20:50:21 | [rl2_trainer] epoch #235 | Computing KL before +2025-03-29 20:50:21 | [rl2_trainer] epoch #235 | Optimizing +2025-03-29 20:50:23 | [rl2_trainer] epoch #235 | Computing KL after +2025-03-29 20:50:23 | [rl2_trainer] epoch #235 | Computing loss after +2025-03-29 20:50:23 | [rl2_trainer] epoch #235 | Saving snapshot... +2025-03-29 20:50:23 | [rl2_trainer] epoch #235 | Saved +2025-03-29 20:50:23 | [rl2_trainer] epoch #235 | Time 4624.60 s +2025-03-29 20:50:23 | [rl2_trainer] epoch #235 | EpochTime 17.64 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -77.5103 +Average/AverageReturn -124.672 +Average/Iteration 235 +Average/MaxReturn -88.5014 +Average/MinReturn -178.232 +Average/NumEpisodes 8 +Average/StdReturn 32.1025 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975042 +TotalEnvSteps 188800 +__unnamed_task__/AverageDiscountedReturn -77.5103 +__unnamed_task__/AverageReturn -124.672 +__unnamed_task__/Iteration 235 +__unnamed_task__/MaxReturn -88.5014 +__unnamed_task__/MinReturn -178.232 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.1025 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.52744 +policy/KL 0.0225033 +policy/KLBefore 0 +policy/LossAfter -0.358663 +policy/LossBefore -0.00661616 +policy/dLoss 0.352046 +---------------------------------------- --------------- +2025-03-29 20:50:38 | [rl2_trainer] epoch #236 | Optimizing policy... +2025-03-29 20:50:38 | [rl2_trainer] epoch #236 | Fitting baseline... +2025-03-29 20:50:38 | [rl2_trainer] epoch #236 | Computing loss before +2025-03-29 20:50:38 | [rl2_trainer] epoch #236 | Computing KL before +2025-03-29 20:50:38 | [rl2_trainer] epoch #236 | Optimizing +2025-03-29 20:50:41 | [rl2_trainer] epoch #236 | Computing KL after +2025-03-29 20:50:41 | [rl2_trainer] epoch #236 | Computing loss after +2025-03-29 20:50:41 | [rl2_trainer] epoch #236 | Saving snapshot... +2025-03-29 20:50:41 | [rl2_trainer] epoch #236 | Saved +2025-03-29 20:50:41 | [rl2_trainer] epoch #236 | Time 4642.28 s +2025-03-29 20:50:41 | [rl2_trainer] epoch #236 | EpochTime 17.67 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5672 +Average/AverageReturn -110.753 +Average/Iteration 236 +Average/MaxReturn -84.4747 +Average/MinReturn -144.998 +Average/NumEpisodes 8 +Average/StdReturn 23.1555 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.956442 +TotalEnvSteps 189600 +__unnamed_task__/AverageDiscountedReturn -71.5672 +__unnamed_task__/AverageReturn -110.753 +__unnamed_task__/Iteration 236 +__unnamed_task__/MaxReturn -84.4747 +__unnamed_task__/MinReturn -144.998 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.1555 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.54663 +policy/KL 0.022418 +policy/KLBefore 0 +policy/LossAfter -0.427486 +policy/LossBefore -0.0427298 +policy/dLoss 0.384756 +---------------------------------------- -------------- +2025-03-29 20:50:57 | [rl2_trainer] epoch #237 | Optimizing policy... +2025-03-29 20:50:57 | [rl2_trainer] epoch #237 | Fitting baseline... +2025-03-29 20:50:57 | [rl2_trainer] epoch #237 | Computing loss before +2025-03-29 20:50:57 | [rl2_trainer] epoch #237 | Computing KL before +2025-03-29 20:50:57 | [rl2_trainer] epoch #237 | Optimizing +2025-03-29 20:50:59 | [rl2_trainer] epoch #237 | Computing KL after +2025-03-29 20:50:59 | [rl2_trainer] epoch #237 | Computing loss after +2025-03-29 20:50:59 | [rl2_trainer] epoch #237 | Saving snapshot... +2025-03-29 20:50:59 | [rl2_trainer] epoch #237 | Saved +2025-03-29 20:50:59 | [rl2_trainer] epoch #237 | Time 4660.58 s +2025-03-29 20:50:59 | [rl2_trainer] epoch #237 | EpochTime 18.30 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -67.1666 +Average/AverageReturn -103.528 +Average/Iteration 237 +Average/MaxReturn -59.8852 +Average/MinReturn -143.181 +Average/NumEpisodes 8 +Average/StdReturn 28.2164 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982388 +TotalEnvSteps 190400 +__unnamed_task__/AverageDiscountedReturn -67.1666 +__unnamed_task__/AverageReturn -103.528 +__unnamed_task__/Iteration 237 +__unnamed_task__/MaxReturn -59.8852 +__unnamed_task__/MinReturn -143.181 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.2164 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.56248 +policy/KL 0.018996 +policy/KLBefore 0 +policy/LossAfter -0.224418 +policy/LossBefore 0.00507903 +policy/dLoss 0.229497 +---------------------------------------- --------------- +2025-03-29 20:51:15 | [rl2_trainer] epoch #238 | Optimizing policy... +2025-03-29 20:51:15 | [rl2_trainer] epoch #238 | Fitting baseline... +2025-03-29 20:51:15 | [rl2_trainer] epoch #238 | Computing loss before +2025-03-29 20:51:15 | [rl2_trainer] epoch #238 | Computing KL before +2025-03-29 20:51:15 | [rl2_trainer] epoch #238 | Optimizing +2025-03-29 20:51:17 | [rl2_trainer] epoch #238 | Computing KL after +2025-03-29 20:51:17 | [rl2_trainer] epoch #238 | Computing loss after +2025-03-29 20:51:17 | [rl2_trainer] epoch #238 | Saving snapshot... +2025-03-29 20:51:17 | [rl2_trainer] epoch #238 | Saved +2025-03-29 20:51:17 | [rl2_trainer] epoch #238 | Time 4678.95 s +2025-03-29 20:51:17 | [rl2_trainer] epoch #238 | EpochTime 18.37 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -72.434 +Average/AverageReturn -114.097 +Average/Iteration 238 +Average/MaxReturn -74.8957 +Average/MinReturn -167.678 +Average/NumEpisodes 8 +Average/StdReturn 29.9275 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.962323 +TotalEnvSteps 191200 +__unnamed_task__/AverageDiscountedReturn -72.434 +__unnamed_task__/AverageReturn -114.097 +__unnamed_task__/Iteration 238 +__unnamed_task__/MaxReturn -74.8957 +__unnamed_task__/MinReturn -167.678 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 29.9275 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57841 +policy/KL 0.0248431 +policy/KLBefore 0 +policy/LossAfter -0.320008 +policy/LossBefore -0.00028481 +policy/dLoss 0.319724 +---------------------------------------- --------------- +2025-03-29 20:51:34 | [rl2_trainer] epoch #239 | Optimizing policy... +2025-03-29 20:51:34 | [rl2_trainer] epoch #239 | Fitting baseline... +2025-03-29 20:51:34 | [rl2_trainer] epoch #239 | Computing loss before +2025-03-29 20:51:34 | [rl2_trainer] epoch #239 | Computing KL before +2025-03-29 20:51:34 | [rl2_trainer] epoch #239 | Optimizing +2025-03-29 20:51:36 | [rl2_trainer] epoch #239 | Computing KL after +2025-03-29 20:51:36 | [rl2_trainer] epoch #239 | Computing loss after +2025-03-29 20:51:36 | [rl2_trainer] epoch #239 | Saving snapshot... +2025-03-29 20:51:36 | [rl2_trainer] epoch #239 | Saved +2025-03-29 20:51:36 | [rl2_trainer] epoch #239 | Time 4697.88 s +2025-03-29 20:51:36 | [rl2_trainer] epoch #239 | EpochTime 18.94 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -65.4777 +Average/AverageReturn -100.919 +Average/Iteration 239 +Average/MaxReturn -61.6587 +Average/MinReturn -162.81 +Average/NumEpisodes 8 +Average/StdReturn 31.4048 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979789 +TotalEnvSteps 192000 +__unnamed_task__/AverageDiscountedReturn -65.4777 +__unnamed_task__/AverageReturn -100.919 +__unnamed_task__/Iteration 239 +__unnamed_task__/MaxReturn -61.6587 +__unnamed_task__/MinReturn -162.81 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.4048 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57729 +policy/KL 0.0179254 +policy/KLBefore 0 +policy/LossAfter -0.210049 +policy/LossBefore -0.00368467 +policy/dLoss 0.206364 +---------------------------------------- --------------- +2025-03-29 20:51:51 | [rl2_trainer] epoch #240 | Optimizing policy... +2025-03-29 20:51:51 | [rl2_trainer] epoch #240 | Fitting baseline... +2025-03-29 20:51:51 | [rl2_trainer] epoch #240 | Computing loss before +2025-03-29 20:51:52 | [rl2_trainer] epoch #240 | Computing KL before +2025-03-29 20:51:52 | [rl2_trainer] epoch #240 | Optimizing +2025-03-29 20:51:54 | [rl2_trainer] epoch #240 | Computing KL after +2025-03-29 20:51:54 | [rl2_trainer] epoch #240 | Computing loss after +2025-03-29 20:51:54 | [rl2_trainer] epoch #240 | Saving snapshot... +2025-03-29 20:51:54 | [rl2_trainer] epoch #240 | Saved +2025-03-29 20:51:54 | [rl2_trainer] epoch #240 | Time 4715.38 s +2025-03-29 20:51:54 | [rl2_trainer] epoch #240 | EpochTime 17.50 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -73.0028 +Average/AverageReturn -116.319 +Average/Iteration 240 +Average/MaxReturn -90.9826 +Average/MinReturn -146.741 +Average/NumEpisodes 8 +Average/StdReturn 22.6736 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967328 +TotalEnvSteps 192800 +__unnamed_task__/AverageDiscountedReturn -73.0028 +__unnamed_task__/AverageReturn -116.319 +__unnamed_task__/Iteration 240 +__unnamed_task__/MaxReturn -90.9826 +__unnamed_task__/MinReturn -146.741 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.6736 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57318 +policy/KL 0.0208293 +policy/KLBefore 0 +policy/LossAfter -0.37733 +policy/LossBefore -0.0455216 +policy/dLoss 0.331808 +---------------------------------------- -------------- +2025-03-29 20:52:09 | [rl2_trainer] epoch #241 | Optimizing policy... +2025-03-29 20:52:09 | [rl2_trainer] epoch #241 | Fitting baseline... +2025-03-29 20:52:09 | [rl2_trainer] epoch #241 | Computing loss before +2025-03-29 20:52:09 | [rl2_trainer] epoch #241 | Computing KL before +2025-03-29 20:52:09 | [rl2_trainer] epoch #241 | Optimizing +2025-03-29 20:52:11 | [rl2_trainer] epoch #241 | Computing KL after +2025-03-29 20:52:11 | [rl2_trainer] epoch #241 | Computing loss after +2025-03-29 20:52:12 | [rl2_trainer] epoch #241 | Saving snapshot... +2025-03-29 20:52:12 | [rl2_trainer] epoch #241 | Saved +2025-03-29 20:52:12 | [rl2_trainer] epoch #241 | Time 4732.99 s +2025-03-29 20:52:12 | [rl2_trainer] epoch #241 | EpochTime 17.60 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -74.1871 +Average/AverageReturn -117.86 +Average/Iteration 241 +Average/MaxReturn -82.546 +Average/MinReturn -159.006 +Average/NumEpisodes 8 +Average/StdReturn 30.6155 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.943746 +TotalEnvSteps 193600 +__unnamed_task__/AverageDiscountedReturn -74.1871 +__unnamed_task__/AverageReturn -117.86 +__unnamed_task__/Iteration 241 +__unnamed_task__/MaxReturn -82.546 +__unnamed_task__/MinReturn -159.006 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.6155 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57755 +policy/KL 0.0271009 +policy/KLBefore 0 +policy/LossAfter -0.46652 +policy/LossBefore 0.0107145 +policy/dLoss 0.477234 +---------------------------------------- -------------- +2025-03-29 20:52:27 | [rl2_trainer] epoch #242 | Optimizing policy... +2025-03-29 20:52:27 | [rl2_trainer] epoch #242 | Fitting baseline... +2025-03-29 20:52:27 | [rl2_trainer] epoch #242 | Computing loss before +2025-03-29 20:52:27 | [rl2_trainer] epoch #242 | Computing KL before +2025-03-29 20:52:27 | [rl2_trainer] epoch #242 | Optimizing +2025-03-29 20:52:29 | [rl2_trainer] epoch #242 | Computing KL after +2025-03-29 20:52:29 | [rl2_trainer] epoch #242 | Computing loss after +2025-03-29 20:52:29 | [rl2_trainer] epoch #242 | Saving snapshot... +2025-03-29 20:52:29 | [rl2_trainer] epoch #242 | Saved +2025-03-29 20:52:29 | [rl2_trainer] epoch #242 | Time 4750.89 s +2025-03-29 20:52:29 | [rl2_trainer] epoch #242 | EpochTime 17.90 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -72.213 +Average/AverageReturn -112.14 +Average/Iteration 242 +Average/MaxReturn -55.618 +Average/MinReturn -171.933 +Average/NumEpisodes 8 +Average/StdReturn 38.621 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.952409 +TotalEnvSteps 194400 +__unnamed_task__/AverageDiscountedReturn -72.213 +__unnamed_task__/AverageReturn -112.14 +__unnamed_task__/Iteration 242 +__unnamed_task__/MaxReturn -55.618 +__unnamed_task__/MinReturn -171.933 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 38.621 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.58984 +policy/KL 0.0218103 +policy/KLBefore 0 +policy/LossAfter -0.488602 +policy/LossBefore -0.0522681 +policy/dLoss 0.436334 +---------------------------------------- -------------- +2025-03-29 20:52:46 | [rl2_trainer] epoch #243 | Optimizing policy... +2025-03-29 20:52:46 | [rl2_trainer] epoch #243 | Fitting baseline... +2025-03-29 20:52:46 | [rl2_trainer] epoch #243 | Computing loss before +2025-03-29 20:52:46 | [rl2_trainer] epoch #243 | Computing KL before +2025-03-29 20:52:46 | [rl2_trainer] epoch #243 | Optimizing +2025-03-29 20:52:48 | [rl2_trainer] epoch #243 | Computing KL after +2025-03-29 20:52:48 | [rl2_trainer] epoch #243 | Computing loss after +2025-03-29 20:52:48 | [rl2_trainer] epoch #243 | Saving snapshot... +2025-03-29 20:52:48 | [rl2_trainer] epoch #243 | Saved +2025-03-29 20:52:48 | [rl2_trainer] epoch #243 | Time 4769.84 s +2025-03-29 20:52:48 | [rl2_trainer] epoch #243 | EpochTime 18.95 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -64.4055 +Average/AverageReturn -97.4447 +Average/Iteration 243 +Average/MaxReturn -59.9784 +Average/MinReturn -189.007 +Average/NumEpisodes 8 +Average/StdReturn 37.2082 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966625 +TotalEnvSteps 195200 +__unnamed_task__/AverageDiscountedReturn -64.4055 +__unnamed_task__/AverageReturn -97.4447 +__unnamed_task__/Iteration 243 +__unnamed_task__/MaxReturn -59.9784 +__unnamed_task__/MinReturn -189.007 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.2082 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.59006 +policy/KL 0.0138747 +policy/KLBefore 0 +policy/LossAfter -0.212872 +policy/LossBefore 0.00225948 +policy/dLoss 0.215132 +---------------------------------------- --------------- +2025-03-29 20:53:05 | [rl2_trainer] epoch #244 | Optimizing policy... +2025-03-29 20:53:05 | [rl2_trainer] epoch #244 | Fitting baseline... +2025-03-29 20:53:05 | [rl2_trainer] epoch #244 | Computing loss before +2025-03-29 20:53:05 | [rl2_trainer] epoch #244 | Computing KL before +2025-03-29 20:53:05 | [rl2_trainer] epoch #244 | Optimizing +2025-03-29 20:53:07 | [rl2_trainer] epoch #244 | Computing KL after +2025-03-29 20:53:08 | [rl2_trainer] epoch #244 | Computing loss after +2025-03-29 20:53:08 | [rl2_trainer] epoch #244 | Saving snapshot... +2025-03-29 20:53:08 | [rl2_trainer] epoch #244 | Saved +2025-03-29 20:53:08 | [rl2_trainer] epoch #244 | Time 4789.08 s +2025-03-29 20:53:08 | [rl2_trainer] epoch #244 | EpochTime 19.24 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -78.3332 +Average/AverageReturn -124.374 +Average/Iteration 244 +Average/MaxReturn -89.7435 +Average/MinReturn -179.368 +Average/NumEpisodes 8 +Average/StdReturn 32.0227 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977439 +TotalEnvSteps 196000 +__unnamed_task__/AverageDiscountedReturn -78.3332 +__unnamed_task__/AverageReturn -124.374 +__unnamed_task__/Iteration 244 +__unnamed_task__/MaxReturn -89.7435 +__unnamed_task__/MinReturn -179.368 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 32.0227 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.59032 +policy/KL 0.0223152 +policy/KLBefore 0 +policy/LossAfter -0.357947 +policy/LossBefore -0.00433359 +policy/dLoss 0.353614 +---------------------------------------- --------------- +2025-03-29 20:53:24 | [rl2_trainer] epoch #245 | Optimizing policy... +2025-03-29 20:53:25 | [rl2_trainer] epoch #245 | Fitting baseline... +2025-03-29 20:53:25 | [rl2_trainer] epoch #245 | Computing loss before +2025-03-29 20:53:25 | [rl2_trainer] epoch #245 | Computing KL before +2025-03-29 20:53:25 | [rl2_trainer] epoch #245 | Optimizing +2025-03-29 20:53:27 | [rl2_trainer] epoch #245 | Computing KL after +2025-03-29 20:53:27 | [rl2_trainer] epoch #245 | Computing loss after +2025-03-29 20:53:27 | [rl2_trainer] epoch #245 | Saving snapshot... +2025-03-29 20:53:27 | [rl2_trainer] epoch #245 | Saved +2025-03-29 20:53:27 | [rl2_trainer] epoch #245 | Time 4808.34 s +2025-03-29 20:53:27 | [rl2_trainer] epoch #245 | EpochTime 19.25 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -69.4975 +Average/AverageReturn -111.136 +Average/Iteration 245 +Average/MaxReturn -90.6573 +Average/MinReturn -138.236 +Average/NumEpisodes 8 +Average/StdReturn 17.6827 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.956835 +TotalEnvSteps 196800 +__unnamed_task__/AverageDiscountedReturn -69.4975 +__unnamed_task__/AverageReturn -111.136 +__unnamed_task__/Iteration 245 +__unnamed_task__/MaxReturn -90.6573 +__unnamed_task__/MinReturn -138.236 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.6827 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.58527 +policy/KL 0.0175129 +policy/KLBefore 0 +policy/LossAfter -0.371351 +policy/LossBefore -0.033003 +policy/dLoss 0.338348 +---------------------------------------- -------------- +2025-03-29 20:53:43 | [rl2_trainer] epoch #246 | Optimizing policy... +2025-03-29 20:53:43 | [rl2_trainer] epoch #246 | Fitting baseline... +2025-03-29 20:53:43 | [rl2_trainer] epoch #246 | Computing loss before +2025-03-29 20:53:43 | [rl2_trainer] epoch #246 | Computing KL before +2025-03-29 20:53:43 | [rl2_trainer] epoch #246 | Optimizing +2025-03-29 20:53:46 | [rl2_trainer] epoch #246 | Computing KL after +2025-03-29 20:53:46 | [rl2_trainer] epoch #246 | Computing loss after +2025-03-29 20:53:46 | [rl2_trainer] epoch #246 | Saving snapshot... +2025-03-29 20:53:46 | [rl2_trainer] epoch #246 | Saved +2025-03-29 20:53:46 | [rl2_trainer] epoch #246 | Time 4827.56 s +2025-03-29 20:53:46 | [rl2_trainer] epoch #246 | EpochTime 19.22 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -64.4327 +Average/AverageReturn -98.8703 +Average/Iteration 246 +Average/MaxReturn -65.4031 +Average/MinReturn -135.322 +Average/NumEpisodes 8 +Average/StdReturn 19.785 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.963045 +TotalEnvSteps 197600 +__unnamed_task__/AverageDiscountedReturn -64.4327 +__unnamed_task__/AverageReturn -98.8703 +__unnamed_task__/Iteration 246 +__unnamed_task__/MaxReturn -65.4031 +__unnamed_task__/MinReturn -135.322 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.785 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.58241 +policy/KL 0.0214159 +policy/KLBefore 0 +policy/LossAfter -0.330494 +policy/LossBefore -0.0419784 +policy/dLoss 0.288516 +---------------------------------------- -------------- +2025-03-29 20:54:03 | [rl2_trainer] epoch #247 | Optimizing policy... +2025-03-29 20:54:03 | [rl2_trainer] epoch #247 | Fitting baseline... +2025-03-29 20:54:03 | [rl2_trainer] epoch #247 | Computing loss before +2025-03-29 20:54:03 | [rl2_trainer] epoch #247 | Computing KL before +2025-03-29 20:54:03 | [rl2_trainer] epoch #247 | Optimizing +2025-03-29 20:54:05 | [rl2_trainer] epoch #247 | Computing KL after +2025-03-29 20:54:05 | [rl2_trainer] epoch #247 | Computing loss after +2025-03-29 20:54:05 | [rl2_trainer] epoch #247 | Saving snapshot... +2025-03-29 20:54:05 | [rl2_trainer] epoch #247 | Saved +2025-03-29 20:54:05 | [rl2_trainer] epoch #247 | Time 4846.90 s +2025-03-29 20:54:05 | [rl2_trainer] epoch #247 | EpochTime 19.34 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -61.2074 +Average/AverageReturn -92.5011 +Average/Iteration 247 +Average/MaxReturn -51.6618 +Average/MinReturn -135.907 +Average/NumEpisodes 8 +Average/StdReturn 24.8244 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.963028 +TotalEnvSteps 198400 +__unnamed_task__/AverageDiscountedReturn -61.2074 +__unnamed_task__/AverageReturn -92.5011 +__unnamed_task__/Iteration 247 +__unnamed_task__/MaxReturn -51.6618 +__unnamed_task__/MinReturn -135.907 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.8244 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57724 +policy/KL 0.0174639 +policy/KLBefore 0 +policy/LossAfter -0.298086 +policy/LossBefore -0.0197682 +policy/dLoss 0.278317 +---------------------------------------- -------------- +2025-03-29 20:54:23 | [rl2_trainer] epoch #248 | Optimizing policy... +2025-03-29 20:54:23 | [rl2_trainer] epoch #248 | Fitting baseline... +2025-03-29 20:54:23 | [rl2_trainer] epoch #248 | Computing loss before +2025-03-29 20:54:23 | [rl2_trainer] epoch #248 | Computing KL before +2025-03-29 20:54:24 | [rl2_trainer] epoch #248 | Optimizing +2025-03-29 20:54:26 | [rl2_trainer] epoch #248 | Computing KL after +2025-03-29 20:54:26 | [rl2_trainer] epoch #248 | Computing loss after +2025-03-29 20:54:26 | [rl2_trainer] epoch #248 | Saving snapshot... +2025-03-29 20:54:26 | [rl2_trainer] epoch #248 | Saved +2025-03-29 20:54:26 | [rl2_trainer] epoch #248 | Time 4867.14 s +2025-03-29 20:54:26 | [rl2_trainer] epoch #248 | EpochTime 20.23 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -61.4526 +Average/AverageReturn -94.9331 +Average/Iteration 248 +Average/MaxReturn -47.2225 +Average/MinReturn -154.189 +Average/NumEpisodes 8 +Average/StdReturn 29.6461 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.961347 +TotalEnvSteps 199200 +__unnamed_task__/AverageDiscountedReturn -61.4526 +__unnamed_task__/AverageReturn -94.9331 +__unnamed_task__/Iteration 248 +__unnamed_task__/MaxReturn -47.2225 +__unnamed_task__/MinReturn -154.189 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 29.6461 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.56662 +policy/KL 0.0184452 +policy/KLBefore 0 +policy/LossAfter -0.271285 +policy/LossBefore -0.00767544 +policy/dLoss 0.263609 +---------------------------------------- --------------- +2025-03-29 20:54:41 | [rl2_trainer] epoch #249 | Optimizing policy... +2025-03-29 20:54:41 | [rl2_trainer] epoch #249 | Fitting baseline... +2025-03-29 20:54:41 | [rl2_trainer] epoch #249 | Computing loss before +2025-03-29 20:54:41 | [rl2_trainer] epoch #249 | Computing KL before +2025-03-29 20:54:41 | [rl2_trainer] epoch #249 | Optimizing +2025-03-29 20:54:44 | [rl2_trainer] epoch #249 | Computing KL after +2025-03-29 20:54:44 | [rl2_trainer] epoch #249 | Computing loss after +2025-03-29 20:54:44 | [rl2_trainer] epoch #249 | Saving snapshot... +2025-03-29 20:54:44 | [rl2_trainer] epoch #249 | Saved +2025-03-29 20:54:44 | [rl2_trainer] epoch #249 | Time 4885.23 s +2025-03-29 20:54:44 | [rl2_trainer] epoch #249 | EpochTime 18.09 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.0501 +Average/AverageReturn -111.602 +Average/Iteration 249 +Average/MaxReturn -67.0935 +Average/MinReturn -159.626 +Average/NumEpisodes 8 +Average/StdReturn 30.3929 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972839 +TotalEnvSteps 200000 +__unnamed_task__/AverageDiscountedReturn -71.0501 +__unnamed_task__/AverageReturn -111.602 +__unnamed_task__/Iteration 249 +__unnamed_task__/MaxReturn -67.0935 +__unnamed_task__/MinReturn -159.626 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.3929 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.56617 +policy/KL 0.0192356 +policy/KLBefore 0 +policy/LossAfter -0.27149 +policy/LossBefore -0.012079 +policy/dLoss 0.259411 +---------------------------------------- -------------- +2025-03-29 20:55:03 | [rl2_trainer] epoch #250 | Optimizing policy... +2025-03-29 20:55:03 | [rl2_trainer] epoch #250 | Fitting baseline... +2025-03-29 20:55:03 | [rl2_trainer] epoch #250 | Computing loss before +2025-03-29 20:55:03 | [rl2_trainer] epoch #250 | Computing KL before +2025-03-29 20:55:03 | [rl2_trainer] epoch #250 | Optimizing +2025-03-29 20:55:06 | [rl2_trainer] epoch #250 | Computing KL after +2025-03-29 20:55:06 | [rl2_trainer] epoch #250 | Computing loss after +2025-03-29 20:55:06 | [rl2_trainer] epoch #250 | Saving snapshot... +2025-03-29 20:55:06 | [rl2_trainer] epoch #250 | Saved +2025-03-29 20:55:06 | [rl2_trainer] epoch #250 | Time 4907.39 s +2025-03-29 20:55:06 | [rl2_trainer] epoch #250 | EpochTime 22.16 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -65.9929 +Average/AverageReturn -103.802 +Average/Iteration 250 +Average/MaxReturn -68.9399 +Average/MinReturn -156.14 +Average/NumEpisodes 8 +Average/StdReturn 28.4739 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.984148 +TotalEnvSteps 200800 +__unnamed_task__/AverageDiscountedReturn -65.9929 +__unnamed_task__/AverageReturn -103.802 +__unnamed_task__/Iteration 250 +__unnamed_task__/MaxReturn -68.9399 +__unnamed_task__/MinReturn -156.14 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 28.4739 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57824 +policy/KL 0.016681 +policy/KLBefore 0 +policy/LossAfter -0.226162 +policy/LossBefore -0.0219702 +policy/dLoss 0.204192 +---------------------------------------- -------------- +2025-03-29 20:55:24 | [rl2_trainer] epoch #251 | Optimizing policy... +2025-03-29 20:55:24 | [rl2_trainer] epoch #251 | Fitting baseline... +2025-03-29 20:55:24 | [rl2_trainer] epoch #251 | Computing loss before +2025-03-29 20:55:24 | [rl2_trainer] epoch #251 | Computing KL before +2025-03-29 20:55:24 | [rl2_trainer] epoch #251 | Optimizing +2025-03-29 20:55:27 | [rl2_trainer] epoch #251 | Computing KL after +2025-03-29 20:55:27 | [rl2_trainer] epoch #251 | Computing loss after +2025-03-29 20:55:27 | [rl2_trainer] epoch #251 | Saving snapshot... +2025-03-29 20:55:27 | [rl2_trainer] epoch #251 | Saved +2025-03-29 20:55:27 | [rl2_trainer] epoch #251 | Time 4928.65 s +2025-03-29 20:55:27 | [rl2_trainer] epoch #251 | EpochTime 21.25 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -68.1462 +Average/AverageReturn -107.219 +Average/Iteration 251 +Average/MaxReturn -75.6515 +Average/MinReturn -162.322 +Average/NumEpisodes 8 +Average/StdReturn 31.1899 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.955226 +TotalEnvSteps 201600 +__unnamed_task__/AverageDiscountedReturn -68.1462 +__unnamed_task__/AverageReturn -107.219 +__unnamed_task__/Iteration 251 +__unnamed_task__/MaxReturn -75.6515 +__unnamed_task__/MinReturn -162.322 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 31.1899 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.60082 +policy/KL 0.0282594 +policy/KLBefore 0 +policy/LossAfter -0.418786 +policy/LossBefore -0.0300969 +policy/dLoss 0.38869 +---------------------------------------- -------------- +2025-03-29 20:55:44 | [rl2_trainer] epoch #252 | Optimizing policy... +2025-03-29 20:55:44 | [rl2_trainer] epoch #252 | Fitting baseline... +2025-03-29 20:55:44 | [rl2_trainer] epoch #252 | Computing loss before +2025-03-29 20:55:44 | [rl2_trainer] epoch #252 | Computing KL before +2025-03-29 20:55:44 | [rl2_trainer] epoch #252 | Optimizing +2025-03-29 20:55:46 | [rl2_trainer] epoch #252 | Computing KL after +2025-03-29 20:55:46 | [rl2_trainer] epoch #252 | Computing loss after +2025-03-29 20:55:46 | [rl2_trainer] epoch #252 | Saving snapshot... +2025-03-29 20:55:46 | [rl2_trainer] epoch #252 | Saved +2025-03-29 20:55:46 | [rl2_trainer] epoch #252 | Time 4947.83 s +2025-03-29 20:55:46 | [rl2_trainer] epoch #252 | EpochTime 19.18 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -67.6875 +Average/AverageReturn -106.368 +Average/Iteration 252 +Average/MaxReturn -51.5035 +Average/MinReturn -154.953 +Average/NumEpisodes 8 +Average/StdReturn 26.8525 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.96364 +TotalEnvSteps 202400 +__unnamed_task__/AverageDiscountedReturn -67.6875 +__unnamed_task__/AverageReturn -106.368 +__unnamed_task__/Iteration 252 +__unnamed_task__/MaxReturn -51.5035 +__unnamed_task__/MinReturn -154.953 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.8525 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.59698 +policy/KL 0.023346 +policy/KLBefore 0 +policy/LossAfter -0.279409 +policy/LossBefore 0.0263893 +policy/dLoss 0.305798 +---------------------------------------- -------------- +2025-03-29 20:56:02 | [rl2_trainer] epoch #253 | Optimizing policy... +2025-03-29 20:56:02 | [rl2_trainer] epoch #253 | Fitting baseline... +2025-03-29 20:56:02 | [rl2_trainer] epoch #253 | Computing loss before +2025-03-29 20:56:03 | [rl2_trainer] epoch #253 | Computing KL before +2025-03-29 20:56:03 | [rl2_trainer] epoch #253 | Optimizing +2025-03-29 20:56:05 | [rl2_trainer] epoch #253 | Computing KL after +2025-03-29 20:56:05 | [rl2_trainer] epoch #253 | Computing loss after +2025-03-29 20:56:05 | [rl2_trainer] epoch #253 | Saving snapshot... +2025-03-29 20:56:05 | [rl2_trainer] epoch #253 | Saved +2025-03-29 20:56:05 | [rl2_trainer] epoch #253 | Time 4966.22 s +2025-03-29 20:56:05 | [rl2_trainer] epoch #253 | EpochTime 18.39 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -64.3268 +Average/AverageReturn -102.481 +Average/Iteration 253 +Average/MaxReturn -65.044 +Average/MinReturn -146.969 +Average/NumEpisodes 8 +Average/StdReturn 25.1218 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.965949 +TotalEnvSteps 203200 +__unnamed_task__/AverageDiscountedReturn -64.3268 +__unnamed_task__/AverageReturn -102.481 +__unnamed_task__/Iteration 253 +__unnamed_task__/MaxReturn -65.044 +__unnamed_task__/MinReturn -146.969 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.1218 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57718 +policy/KL 0.0162757 +policy/KLBefore 0 +policy/LossAfter -0.266471 +policy/LossBefore -0.0196701 +policy/dLoss 0.2468 +---------------------------------------- -------------- +2025-03-29 20:56:21 | [rl2_trainer] epoch #254 | Optimizing policy... +2025-03-29 20:56:21 | [rl2_trainer] epoch #254 | Fitting baseline... +2025-03-29 20:56:21 | [rl2_trainer] epoch #254 | Computing loss before +2025-03-29 20:56:21 | [rl2_trainer] epoch #254 | Computing KL before +2025-03-29 20:56:21 | [rl2_trainer] epoch #254 | Optimizing +2025-03-29 20:56:23 | [rl2_trainer] epoch #254 | Computing KL after +2025-03-29 20:56:23 | [rl2_trainer] epoch #254 | Computing loss after +2025-03-29 20:56:24 | [rl2_trainer] epoch #254 | Saving snapshot... +2025-03-29 20:56:24 | [rl2_trainer] epoch #254 | Saved +2025-03-29 20:56:24 | [rl2_trainer] epoch #254 | Time 4985.03 s +2025-03-29 20:56:24 | [rl2_trainer] epoch #254 | EpochTime 18.81 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -63.4434 +Average/AverageReturn -99.2054 +Average/Iteration 254 +Average/MaxReturn -55.7248 +Average/MinReturn -173.974 +Average/NumEpisodes 8 +Average/StdReturn 34.999 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957372 +TotalEnvSteps 204000 +__unnamed_task__/AverageDiscountedReturn -63.4434 +__unnamed_task__/AverageReturn -99.2054 +__unnamed_task__/Iteration 254 +__unnamed_task__/MaxReturn -55.7248 +__unnamed_task__/MinReturn -173.974 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.999 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.58851 +policy/KL 0.0186939 +policy/KLBefore 0 +policy/LossAfter -0.311743 +policy/LossBefore 8.44955e-06 +policy/dLoss 0.311752 +---------------------------------------- ---------------- +2025-03-29 20:56:40 | [rl2_trainer] epoch #255 | Optimizing policy... +2025-03-29 20:56:40 | [rl2_trainer] epoch #255 | Fitting baseline... +2025-03-29 20:56:40 | [rl2_trainer] epoch #255 | Computing loss before +2025-03-29 20:56:40 | [rl2_trainer] epoch #255 | Computing KL before +2025-03-29 20:56:40 | [rl2_trainer] epoch #255 | Optimizing +2025-03-29 20:56:42 | [rl2_trainer] epoch #255 | Computing KL after +2025-03-29 20:56:42 | [rl2_trainer] epoch #255 | Computing loss after +2025-03-29 20:56:42 | [rl2_trainer] epoch #255 | Saving snapshot... +2025-03-29 20:56:42 | [rl2_trainer] epoch #255 | Saved +2025-03-29 20:56:42 | [rl2_trainer] epoch #255 | Time 5003.91 s +2025-03-29 20:56:42 | [rl2_trainer] epoch #255 | EpochTime 18.87 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -66.5798 +Average/AverageReturn -109.04 +Average/Iteration 255 +Average/MaxReturn -57.9292 +Average/MinReturn -174.444 +Average/NumEpisodes 8 +Average/StdReturn 37.7249 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.967083 +TotalEnvSteps 204800 +__unnamed_task__/AverageDiscountedReturn -66.5798 +__unnamed_task__/AverageReturn -109.04 +__unnamed_task__/Iteration 255 +__unnamed_task__/MaxReturn -57.9292 +__unnamed_task__/MinReturn -174.444 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 37.7249 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.57779 +policy/KL 0.0237316 +policy/KLBefore 0 +policy/LossAfter -0.40103 +policy/LossBefore 0.0299329 +policy/dLoss 0.430963 +---------------------------------------- -------------- +2025-03-29 20:56:59 | [rl2_trainer] epoch #256 | Optimizing policy... +2025-03-29 20:56:59 | [rl2_trainer] epoch #256 | Fitting baseline... +2025-03-29 20:56:59 | [rl2_trainer] epoch #256 | Computing loss before +2025-03-29 20:56:59 | [rl2_trainer] epoch #256 | Computing KL before +2025-03-29 20:56:59 | [rl2_trainer] epoch #256 | Optimizing +2025-03-29 20:57:01 | [rl2_trainer] epoch #256 | Computing KL after +2025-03-29 20:57:01 | [rl2_trainer] epoch #256 | Computing loss after +2025-03-29 20:57:01 | [rl2_trainer] epoch #256 | Saving snapshot... +2025-03-29 20:57:01 | [rl2_trainer] epoch #256 | Saved +2025-03-29 20:57:01 | [rl2_trainer] epoch #256 | Time 5022.88 s +2025-03-29 20:57:01 | [rl2_trainer] epoch #256 | EpochTime 18.96 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -67.948 +Average/AverageReturn -111.967 +Average/Iteration 256 +Average/MaxReturn -57.5525 +Average/MinReturn -172.192 +Average/NumEpisodes 8 +Average/StdReturn 34.7643 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.959959 +TotalEnvSteps 205600 +__unnamed_task__/AverageDiscountedReturn -67.948 +__unnamed_task__/AverageReturn -111.967 +__unnamed_task__/Iteration 256 +__unnamed_task__/MaxReturn -57.5525 +__unnamed_task__/MinReturn -172.192 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 34.7643 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.56658 +policy/KL 0.0160413 +policy/KLBefore 0 +policy/LossAfter -0.28116 +policy/LossBefore 0.0247784 +policy/dLoss 0.305938 +---------------------------------------- -------------- +2025-03-29 20:57:18 | [rl2_trainer] epoch #257 | Optimizing policy... +2025-03-29 20:57:18 | [rl2_trainer] epoch #257 | Fitting baseline... +2025-03-29 20:57:18 | [rl2_trainer] epoch #257 | Computing loss before +2025-03-29 20:57:18 | [rl2_trainer] epoch #257 | Computing KL before +2025-03-29 20:57:18 | [rl2_trainer] epoch #257 | Optimizing +2025-03-29 20:57:20 | [rl2_trainer] epoch #257 | Computing KL after +2025-03-29 20:57:20 | [rl2_trainer] epoch #257 | Computing loss after +2025-03-29 20:57:20 | [rl2_trainer] epoch #257 | Saving snapshot... +2025-03-29 20:57:20 | [rl2_trainer] epoch #257 | Saved +2025-03-29 20:57:20 | [rl2_trainer] epoch #257 | Time 5041.74 s +2025-03-29 20:57:20 | [rl2_trainer] epoch #257 | EpochTime 18.86 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -60.9732 +Average/AverageReturn -96.3774 +Average/Iteration 257 +Average/MaxReturn -63.0688 +Average/MinReturn -141.906 +Average/NumEpisodes 8 +Average/StdReturn 23.0996 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.959728 +TotalEnvSteps 206400 +__unnamed_task__/AverageDiscountedReturn -60.9732 +__unnamed_task__/AverageReturn -96.3774 +__unnamed_task__/Iteration 257 +__unnamed_task__/MaxReturn -63.0688 +__unnamed_task__/MinReturn -141.906 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.0996 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.55515 +policy/KL 0.0132551 +policy/KLBefore 0 +policy/LossAfter -0.216468 +policy/LossBefore -0.0154307 +policy/dLoss 0.201037 +---------------------------------------- -------------- +2025-03-29 20:57:37 | [rl2_trainer] epoch #258 | Optimizing policy... +2025-03-29 20:57:37 | [rl2_trainer] epoch #258 | Fitting baseline... +2025-03-29 20:57:37 | [rl2_trainer] epoch #258 | Computing loss before +2025-03-29 20:57:37 | [rl2_trainer] epoch #258 | Computing KL before +2025-03-29 20:57:37 | [rl2_trainer] epoch #258 | Optimizing +2025-03-29 20:57:39 | [rl2_trainer] epoch #258 | Computing KL after +2025-03-29 20:57:39 | [rl2_trainer] epoch #258 | Computing loss after +2025-03-29 20:57:39 | [rl2_trainer] epoch #258 | Saving snapshot... +2025-03-29 20:57:39 | [rl2_trainer] epoch #258 | Saved +2025-03-29 20:57:39 | [rl2_trainer] epoch #258 | Time 5060.89 s +2025-03-29 20:57:39 | [rl2_trainer] epoch #258 | EpochTime 19.15 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -55.5332 +Average/AverageReturn -83.1845 +Average/Iteration 258 +Average/MaxReturn -50.2139 +Average/MinReturn -130.022 +Average/NumEpisodes 8 +Average/StdReturn 22.8569 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.977754 +TotalEnvSteps 207200 +__unnamed_task__/AverageDiscountedReturn -55.5332 +__unnamed_task__/AverageReturn -83.1845 +__unnamed_task__/Iteration 258 +__unnamed_task__/MaxReturn -50.2139 +__unnamed_task__/MinReturn -130.022 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.8569 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.54932 +policy/KL 0.0168842 +policy/KLBefore 0 +policy/LossAfter -0.140213 +policy/LossBefore -0.0152044 +policy/dLoss 0.125009 +---------------------------------------- -------------- +2025-03-29 20:57:56 | [rl2_trainer] epoch #259 | Optimizing policy... +2025-03-29 20:57:56 | [rl2_trainer] epoch #259 | Fitting baseline... +2025-03-29 20:57:56 | [rl2_trainer] epoch #259 | Computing loss before +2025-03-29 20:57:56 | [rl2_trainer] epoch #259 | Computing KL before +2025-03-29 20:57:56 | [rl2_trainer] epoch #259 | Optimizing +2025-03-29 20:57:58 | [rl2_trainer] epoch #259 | Computing KL after +2025-03-29 20:57:58 | [rl2_trainer] epoch #259 | Computing loss after +2025-03-29 20:57:58 | [rl2_trainer] epoch #259 | Saving snapshot... +2025-03-29 20:57:58 | [rl2_trainer] epoch #259 | Saved +2025-03-29 20:57:58 | [rl2_trainer] epoch #259 | Time 5079.48 s +2025-03-29 20:57:58 | [rl2_trainer] epoch #259 | EpochTime 18.59 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -63.1138 +Average/AverageReturn -102.63 +Average/Iteration 259 +Average/MaxReturn -90.7162 +Average/MinReturn -119.489 +Average/NumEpisodes 8 +Average/StdReturn 11.9278 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.95725 +TotalEnvSteps 208000 +__unnamed_task__/AverageDiscountedReturn -63.1138 +__unnamed_task__/AverageReturn -102.63 +__unnamed_task__/Iteration 259 +__unnamed_task__/MaxReturn -90.7162 +__unnamed_task__/MinReturn -119.489 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 11.9278 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.55084 +policy/KL 0.0173209 +policy/KLBefore 0 +policy/LossAfter -0.289636 +policy/LossBefore 0.0127471 +policy/dLoss 0.302383 +---------------------------------------- -------------- +2025-03-29 20:58:15 | [rl2_trainer] epoch #260 | Optimizing policy... +2025-03-29 20:58:15 | [rl2_trainer] epoch #260 | Fitting baseline... +2025-03-29 20:58:15 | [rl2_trainer] epoch #260 | Computing loss before +2025-03-29 20:58:15 | [rl2_trainer] epoch #260 | Computing KL before +2025-03-29 20:58:15 | [rl2_trainer] epoch #260 | Optimizing +2025-03-29 20:58:18 | [rl2_trainer] epoch #260 | Computing KL after +2025-03-29 20:58:18 | [rl2_trainer] epoch #260 | Computing loss after +2025-03-29 20:58:18 | [rl2_trainer] epoch #260 | Saving snapshot... +2025-03-29 20:58:18 | [rl2_trainer] epoch #260 | Saved +2025-03-29 20:58:18 | [rl2_trainer] epoch #260 | Time 5099.58 s +2025-03-29 20:58:18 | [rl2_trainer] epoch #260 | EpochTime 20.10 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -52.9848 +Average/AverageReturn -79.8584 +Average/Iteration 260 +Average/MaxReturn -39.3303 +Average/MinReturn -125.797 +Average/NumEpisodes 8 +Average/StdReturn 26.4877 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.972227 +TotalEnvSteps 208800 +__unnamed_task__/AverageDiscountedReturn -52.9848 +__unnamed_task__/AverageReturn -79.8584 +__unnamed_task__/Iteration 260 +__unnamed_task__/MaxReturn -39.3303 +__unnamed_task__/MinReturn -125.797 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.4877 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.54513 +policy/KL 0.0178303 +policy/KLBefore 0 +policy/LossAfter -0.176383 +policy/LossBefore -0.0133872 +policy/dLoss 0.162996 +---------------------------------------- -------------- +2025-03-29 20:58:36 | [rl2_trainer] epoch #261 | Optimizing policy... +2025-03-29 20:58:36 | [rl2_trainer] epoch #261 | Fitting baseline... +2025-03-29 20:58:36 | [rl2_trainer] epoch #261 | Computing loss before +2025-03-29 20:58:36 | [rl2_trainer] epoch #261 | Computing KL before +2025-03-29 20:58:36 | [rl2_trainer] epoch #261 | Optimizing +2025-03-29 20:58:39 | [rl2_trainer] epoch #261 | Computing KL after +2025-03-29 20:58:39 | [rl2_trainer] epoch #261 | Computing loss after +2025-03-29 20:58:39 | [rl2_trainer] epoch #261 | Saving snapshot... +2025-03-29 20:58:39 | [rl2_trainer] epoch #261 | Saved +2025-03-29 20:58:39 | [rl2_trainer] epoch #261 | Time 5120.54 s +2025-03-29 20:58:39 | [rl2_trainer] epoch #261 | EpochTime 20.95 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -63.6672 +Average/AverageReturn -105.463 +Average/Iteration 261 +Average/MaxReturn -71.3786 +Average/MinReturn -155.138 +Average/NumEpisodes 8 +Average/StdReturn 25.1304 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974545 +TotalEnvSteps 209600 +__unnamed_task__/AverageDiscountedReturn -63.6672 +__unnamed_task__/AverageReturn -105.463 +__unnamed_task__/Iteration 261 +__unnamed_task__/MaxReturn -71.3786 +__unnamed_task__/MinReturn -155.138 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 25.1304 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.52966 +policy/KL 0.0173083 +policy/KLBefore 0 +policy/LossAfter -0.281285 +policy/LossBefore 0.00684289 +policy/dLoss 0.288128 +---------------------------------------- --------------- +2025-03-29 20:58:55 | [rl2_trainer] epoch #262 | Optimizing policy... +2025-03-29 20:58:55 | [rl2_trainer] epoch #262 | Fitting baseline... +2025-03-29 20:58:55 | [rl2_trainer] epoch #262 | Computing loss before +2025-03-29 20:58:55 | [rl2_trainer] epoch #262 | Computing KL before +2025-03-29 20:58:56 | [rl2_trainer] epoch #262 | Optimizing +2025-03-29 20:58:58 | [rl2_trainer] epoch #262 | Computing KL after +2025-03-29 20:58:58 | [rl2_trainer] epoch #262 | Computing loss after +2025-03-29 20:58:59 | [rl2_trainer] epoch #262 | Saving snapshot... +2025-03-29 20:58:59 | [rl2_trainer] epoch #262 | Saved +2025-03-29 20:58:59 | [rl2_trainer] epoch #262 | Time 5140.03 s +2025-03-29 20:58:59 | [rl2_trainer] epoch #262 | EpochTime 19.50 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -68.8802 +Average/AverageReturn -112.937 +Average/Iteration 262 +Average/MaxReturn -90.3347 +Average/MinReturn -132.639 +Average/NumEpisodes 8 +Average/StdReturn 15.6149 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.966359 +TotalEnvSteps 210400 +__unnamed_task__/AverageDiscountedReturn -68.8802 +__unnamed_task__/AverageReturn -112.937 +__unnamed_task__/Iteration 262 +__unnamed_task__/MaxReturn -90.3347 +__unnamed_task__/MinReturn -132.639 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.6149 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.50373 +policy/KL 0.0271292 +policy/KLBefore 0 +policy/LossAfter -0.359239 +policy/LossBefore -0.0379956 +policy/dLoss 0.321243 +---------------------------------------- -------------- +2025-03-29 20:59:17 | [rl2_trainer] epoch #263 | Optimizing policy... +2025-03-29 20:59:17 | [rl2_trainer] epoch #263 | Fitting baseline... +2025-03-29 20:59:17 | [rl2_trainer] epoch #263 | Computing loss before +2025-03-29 20:59:17 | [rl2_trainer] epoch #263 | Computing KL before +2025-03-29 20:59:17 | [rl2_trainer] epoch #263 | Optimizing +2025-03-29 20:59:19 | [rl2_trainer] epoch #263 | Computing KL after +2025-03-29 20:59:19 | [rl2_trainer] epoch #263 | Computing loss after +2025-03-29 20:59:19 | [rl2_trainer] epoch #263 | Saving snapshot... +2025-03-29 20:59:19 | [rl2_trainer] epoch #263 | Saved +2025-03-29 20:59:19 | [rl2_trainer] epoch #263 | Time 5160.67 s +2025-03-29 20:59:19 | [rl2_trainer] epoch #263 | EpochTime 20.63 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -54.2702 +Average/AverageReturn -80.733 +Average/Iteration 263 +Average/MaxReturn -57.3218 +Average/MinReturn -103.19 +Average/NumEpisodes 8 +Average/StdReturn 15.4102 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.981792 +TotalEnvSteps 211200 +__unnamed_task__/AverageDiscountedReturn -54.2702 +__unnamed_task__/AverageReturn -80.733 +__unnamed_task__/Iteration 263 +__unnamed_task__/MaxReturn -57.3218 +__unnamed_task__/MinReturn -103.19 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 15.4102 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.48822 +policy/KL 0.0185406 +policy/KLBefore 0 +policy/LossAfter -0.101184 +policy/LossBefore -0.000360532 +policy/dLoss 0.100823 +---------------------------------------- ---------------- +2025-03-29 20:59:36 | [rl2_trainer] epoch #264 | Optimizing policy... +2025-03-29 20:59:36 | [rl2_trainer] epoch #264 | Fitting baseline... +2025-03-29 20:59:36 | [rl2_trainer] epoch #264 | Computing loss before +2025-03-29 20:59:36 | [rl2_trainer] epoch #264 | Computing KL before +2025-03-29 20:59:36 | [rl2_trainer] epoch #264 | Optimizing +2025-03-29 20:59:39 | [rl2_trainer] epoch #264 | Computing KL after +2025-03-29 20:59:39 | [rl2_trainer] epoch #264 | Computing loss after +2025-03-29 20:59:39 | [rl2_trainer] epoch #264 | Saving snapshot... +2025-03-29 20:59:39 | [rl2_trainer] epoch #264 | Saved +2025-03-29 20:59:39 | [rl2_trainer] epoch #264 | Time 5180.22 s +2025-03-29 20:59:39 | [rl2_trainer] epoch #264 | EpochTime 19.55 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -55.3916 +Average/AverageReturn -83.7496 +Average/Iteration 264 +Average/MaxReturn -52.3551 +Average/MinReturn -104.653 +Average/NumEpisodes 8 +Average/StdReturn 19.0717 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.989192 +TotalEnvSteps 212000 +__unnamed_task__/AverageDiscountedReturn -55.3916 +__unnamed_task__/AverageReturn -83.7496 +__unnamed_task__/Iteration 264 +__unnamed_task__/MaxReturn -52.3551 +__unnamed_task__/MinReturn -104.653 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.0717 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.4812 +policy/KL 0.0158437 +policy/KLBefore 0 +policy/LossAfter -0.116479 +policy/LossBefore -0.00110741 +policy/dLoss 0.115372 +---------------------------------------- --------------- +2025-03-29 20:59:56 | [rl2_trainer] epoch #265 | Optimizing policy... +2025-03-29 20:59:56 | [rl2_trainer] epoch #265 | Fitting baseline... +2025-03-29 20:59:56 | [rl2_trainer] epoch #265 | Computing loss before +2025-03-29 20:59:56 | [rl2_trainer] epoch #265 | Computing KL before +2025-03-29 20:59:56 | [rl2_trainer] epoch #265 | Optimizing +2025-03-29 20:59:58 | [rl2_trainer] epoch #265 | Computing KL after +2025-03-29 20:59:58 | [rl2_trainer] epoch #265 | Computing loss after +2025-03-29 20:59:58 | [rl2_trainer] epoch #265 | Saving snapshot... +2025-03-29 20:59:58 | [rl2_trainer] epoch #265 | Saved +2025-03-29 20:59:58 | [rl2_trainer] epoch #265 | Time 5199.49 s +2025-03-29 20:59:58 | [rl2_trainer] epoch #265 | EpochTime 19.27 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -62.5706 +Average/AverageReturn -96.8314 +Average/Iteration 265 +Average/MaxReturn -67.4837 +Average/MinReturn -167.081 +Average/NumEpisodes 8 +Average/StdReturn 30.0029 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974205 +TotalEnvSteps 212800 +__unnamed_task__/AverageDiscountedReturn -62.5706 +__unnamed_task__/AverageReturn -96.8314 +__unnamed_task__/Iteration 265 +__unnamed_task__/MaxReturn -67.4837 +__unnamed_task__/MinReturn -167.081 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 30.0029 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.47365 +policy/KL 0.022148 +policy/KLBefore 0 +policy/LossAfter -0.28577 +policy/LossBefore -0.0153948 +policy/dLoss 0.270376 +---------------------------------------- -------------- +2025-03-29 21:00:16 | [rl2_trainer] epoch #266 | Optimizing policy... +2025-03-29 21:00:16 | [rl2_trainer] epoch #266 | Fitting baseline... +2025-03-29 21:00:16 | [rl2_trainer] epoch #266 | Computing loss before +2025-03-29 21:00:16 | [rl2_trainer] epoch #266 | Computing KL before +2025-03-29 21:00:16 | [rl2_trainer] epoch #266 | Optimizing +2025-03-29 21:00:18 | [rl2_trainer] epoch #266 | Computing KL after +2025-03-29 21:00:18 | [rl2_trainer] epoch #266 | Computing loss after +2025-03-29 21:00:18 | [rl2_trainer] epoch #266 | Saving snapshot... +2025-03-29 21:00:18 | [rl2_trainer] epoch #266 | Saved +2025-03-29 21:00:18 | [rl2_trainer] epoch #266 | Time 5219.80 s +2025-03-29 21:00:18 | [rl2_trainer] epoch #266 | EpochTime 20.31 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -57.92 +Average/AverageReturn -90.973 +Average/Iteration 266 +Average/MaxReturn -53.361 +Average/MinReturn -118.054 +Average/NumEpisodes 8 +Average/StdReturn 22.9921 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.922457 +TotalEnvSteps 213600 +__unnamed_task__/AverageDiscountedReturn -57.92 +__unnamed_task__/AverageReturn -90.973 +__unnamed_task__/Iteration 266 +__unnamed_task__/MaxReturn -53.361 +__unnamed_task__/MinReturn -118.054 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.9921 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.47781 +policy/KL 0.0100701 +policy/KLBefore 0 +policy/LossAfter -0.261497 +policy/LossBefore -0.00147387 +policy/dLoss 0.260023 +---------------------------------------- --------------- +2025-03-29 21:00:34 | [rl2_trainer] epoch #267 | Optimizing policy... +2025-03-29 21:00:34 | [rl2_trainer] epoch #267 | Fitting baseline... +2025-03-29 21:00:34 | [rl2_trainer] epoch #267 | Computing loss before +2025-03-29 21:00:34 | [rl2_trainer] epoch #267 | Computing KL before +2025-03-29 21:00:34 | [rl2_trainer] epoch #267 | Optimizing +2025-03-29 21:00:36 | [rl2_trainer] epoch #267 | Computing KL after +2025-03-29 21:00:36 | [rl2_trainer] epoch #267 | Computing loss after +2025-03-29 21:00:37 | [rl2_trainer] epoch #267 | Saving snapshot... +2025-03-29 21:00:37 | [rl2_trainer] epoch #267 | Saved +2025-03-29 21:00:37 | [rl2_trainer] epoch #267 | Time 5238.01 s +2025-03-29 21:00:37 | [rl2_trainer] epoch #267 | EpochTime 18.21 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -64.3772 +Average/AverageReturn -103.448 +Average/Iteration 267 +Average/MaxReturn -58.0958 +Average/MinReturn -140.111 +Average/NumEpisodes 8 +Average/StdReturn 24.8709 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979717 +TotalEnvSteps 214400 +__unnamed_task__/AverageDiscountedReturn -64.3772 +__unnamed_task__/AverageReturn -103.448 +__unnamed_task__/Iteration 267 +__unnamed_task__/MaxReturn -58.0958 +__unnamed_task__/MinReturn -140.111 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.8709 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.46355 +policy/KL 0.0188432 +policy/KLBefore 0 +policy/LossAfter -0.202087 +policy/LossBefore 0.0196307 +policy/dLoss 0.221718 +---------------------------------------- -------------- +2025-03-29 21:00:52 | [rl2_trainer] epoch #268 | Optimizing policy... +2025-03-29 21:00:52 | [rl2_trainer] epoch #268 | Fitting baseline... +2025-03-29 21:00:52 | [rl2_trainer] epoch #268 | Computing loss before +2025-03-29 21:00:52 | [rl2_trainer] epoch #268 | Computing KL before +2025-03-29 21:00:52 | [rl2_trainer] epoch #268 | Optimizing +2025-03-29 21:00:54 | [rl2_trainer] epoch #268 | Computing KL after +2025-03-29 21:00:54 | [rl2_trainer] epoch #268 | Computing loss after +2025-03-29 21:00:54 | [rl2_trainer] epoch #268 | Saving snapshot... +2025-03-29 21:00:54 | [rl2_trainer] epoch #268 | Saved +2025-03-29 21:00:54 | [rl2_trainer] epoch #268 | Time 5255.60 s +2025-03-29 21:00:54 | [rl2_trainer] epoch #268 | EpochTime 17.58 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5171 +Average/AverageReturn -115.308 +Average/Iteration 268 +Average/MaxReturn -88.4487 +Average/MinReturn -166.664 +Average/NumEpisodes 8 +Average/StdReturn 23.4579 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.968453 +TotalEnvSteps 215200 +__unnamed_task__/AverageDiscountedReturn -71.5171 +__unnamed_task__/AverageReturn -115.308 +__unnamed_task__/Iteration 268 +__unnamed_task__/MaxReturn -88.4487 +__unnamed_task__/MinReturn -166.664 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.4579 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.45492 +policy/KL 0.0260966 +policy/KLBefore 0 +policy/LossAfter -0.336682 +policy/LossBefore 0.0247817 +policy/dLoss 0.361464 +---------------------------------------- -------------- +2025-03-29 21:01:10 | [rl2_trainer] epoch #269 | Optimizing policy... +2025-03-29 21:01:10 | [rl2_trainer] epoch #269 | Fitting baseline... +2025-03-29 21:01:10 | [rl2_trainer] epoch #269 | Computing loss before +2025-03-29 21:01:10 | [rl2_trainer] epoch #269 | Computing KL before +2025-03-29 21:01:10 | [rl2_trainer] epoch #269 | Optimizing +2025-03-29 21:01:12 | [rl2_trainer] epoch #269 | Computing KL after +2025-03-29 21:01:12 | [rl2_trainer] epoch #269 | Computing loss after +2025-03-29 21:01:12 | [rl2_trainer] epoch #269 | Saving snapshot... +2025-03-29 21:01:12 | [rl2_trainer] epoch #269 | Saved +2025-03-29 21:01:12 | [rl2_trainer] epoch #269 | Time 5273.96 s +2025-03-29 21:01:12 | [rl2_trainer] epoch #269 | EpochTime 18.36 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -62.5345 +Average/AverageReturn -100.377 +Average/Iteration 269 +Average/MaxReturn -52.597 +Average/MinReturn -138.91 +Average/NumEpisodes 8 +Average/StdReturn 24.05 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979654 +TotalEnvSteps 216000 +__unnamed_task__/AverageDiscountedReturn -62.5345 +__unnamed_task__/AverageReturn -100.377 +__unnamed_task__/Iteration 269 +__unnamed_task__/MaxReturn -52.597 +__unnamed_task__/MinReturn -138.91 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.05 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.46054 +policy/KL 0.0212567 +policy/KLBefore 0 +policy/LossAfter -0.246701 +policy/LossBefore 0.00267488 +policy/dLoss 0.249376 +---------------------------------------- --------------- +2025-03-29 21:01:31 | [rl2_trainer] epoch #270 | Optimizing policy... +2025-03-29 21:01:31 | [rl2_trainer] epoch #270 | Fitting baseline... +2025-03-29 21:01:31 | [rl2_trainer] epoch #270 | Computing loss before +2025-03-29 21:01:31 | [rl2_trainer] epoch #270 | Computing KL before +2025-03-29 21:01:32 | [rl2_trainer] epoch #270 | Optimizing +2025-03-29 21:01:34 | [rl2_trainer] epoch #270 | Computing KL after +2025-03-29 21:01:34 | [rl2_trainer] epoch #270 | Computing loss after +2025-03-29 21:01:34 | [rl2_trainer] epoch #270 | Saving snapshot... +2025-03-29 21:01:34 | [rl2_trainer] epoch #270 | Saved +2025-03-29 21:01:34 | [rl2_trainer] epoch #270 | Time 5295.60 s +2025-03-29 21:01:34 | [rl2_trainer] epoch #270 | EpochTime 21.64 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -57.2376 +Average/AverageReturn -88.1565 +Average/Iteration 270 +Average/MaxReturn -55.0372 +Average/MinReturn -132.847 +Average/NumEpisodes 8 +Average/StdReturn 24.5126 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.983622 +TotalEnvSteps 216800 +__unnamed_task__/AverageDiscountedReturn -57.2376 +__unnamed_task__/AverageReturn -88.1565 +__unnamed_task__/Iteration 270 +__unnamed_task__/MaxReturn -55.0372 +__unnamed_task__/MinReturn -132.847 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.5126 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.46543 +policy/KL 0.0197239 +policy/KLBefore 0 +policy/LossAfter -0.169349 +policy/LossBefore -0.000144114 +policy/dLoss 0.169205 +---------------------------------------- ---------------- +2025-03-29 21:01:53 | [rl2_trainer] epoch #271 | Optimizing policy... +2025-03-29 21:01:53 | [rl2_trainer] epoch #271 | Fitting baseline... +2025-03-29 21:01:53 | [rl2_trainer] epoch #271 | Computing loss before +2025-03-29 21:01:53 | [rl2_trainer] epoch #271 | Computing KL before +2025-03-29 21:01:53 | [rl2_trainer] epoch #271 | Optimizing +2025-03-29 21:01:55 | [rl2_trainer] epoch #271 | Computing KL after +2025-03-29 21:01:55 | [rl2_trainer] epoch #271 | Computing loss after +2025-03-29 21:01:55 | [rl2_trainer] epoch #271 | Saving snapshot... +2025-03-29 21:01:55 | [rl2_trainer] epoch #271 | Saved +2025-03-29 21:01:55 | [rl2_trainer] epoch #271 | Time 5316.50 s +2025-03-29 21:01:55 | [rl2_trainer] epoch #271 | EpochTime 20.89 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -50.7695 +Average/AverageReturn -74.409 +Average/Iteration 271 +Average/MaxReturn -53.0653 +Average/MinReturn -105.798 +Average/NumEpisodes 8 +Average/StdReturn 19.8574 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.986926 +TotalEnvSteps 217600 +__unnamed_task__/AverageDiscountedReturn -50.7695 +__unnamed_task__/AverageReturn -74.409 +__unnamed_task__/Iteration 271 +__unnamed_task__/MaxReturn -53.0653 +__unnamed_task__/MinReturn -105.798 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.8574 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.46074 +policy/KL 0.0280295 +policy/KLBefore 0 +policy/LossAfter -0.0987441 +policy/LossBefore -0.00613443 +policy/dLoss 0.0926097 +---------------------------------------- --------------- +2025-03-29 21:02:12 | [rl2_trainer] epoch #272 | Optimizing policy... +2025-03-29 21:02:12 | [rl2_trainer] epoch #272 | Fitting baseline... +2025-03-29 21:02:12 | [rl2_trainer] epoch #272 | Computing loss before +2025-03-29 21:02:12 | [rl2_trainer] epoch #272 | Computing KL before +2025-03-29 21:02:12 | [rl2_trainer] epoch #272 | Optimizing +2025-03-29 21:02:14 | [rl2_trainer] epoch #272 | Computing KL after +2025-03-29 21:02:14 | [rl2_trainer] epoch #272 | Computing loss after +2025-03-29 21:02:14 | [rl2_trainer] epoch #272 | Saving snapshot... +2025-03-29 21:02:14 | [rl2_trainer] epoch #272 | Saved +2025-03-29 21:02:14 | [rl2_trainer] epoch #272 | Time 5335.57 s +2025-03-29 21:02:14 | [rl2_trainer] epoch #272 | EpochTime 19.07 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -60.1758 +Average/AverageReturn -98.5774 +Average/Iteration 272 +Average/MaxReturn -53.7181 +Average/MinReturn -125.371 +Average/NumEpisodes 8 +Average/StdReturn 20.3846 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978522 +TotalEnvSteps 218400 +__unnamed_task__/AverageDiscountedReturn -60.1758 +__unnamed_task__/AverageReturn -98.5774 +__unnamed_task__/Iteration 272 +__unnamed_task__/MaxReturn -53.7181 +__unnamed_task__/MinReturn -125.371 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.3846 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.44542 +policy/KL 0.020057 +policy/KLBefore 0 +policy/LossAfter -0.210624 +policy/LossBefore 0.00774221 +policy/dLoss 0.218366 +---------------------------------------- --------------- +2025-03-29 21:02:30 | [rl2_trainer] epoch #273 | Optimizing policy... +2025-03-29 21:02:30 | [rl2_trainer] epoch #273 | Fitting baseline... +2025-03-29 21:02:30 | [rl2_trainer] epoch #273 | Computing loss before +2025-03-29 21:02:30 | [rl2_trainer] epoch #273 | Computing KL before +2025-03-29 21:02:30 | [rl2_trainer] epoch #273 | Optimizing +2025-03-29 21:02:33 | [rl2_trainer] epoch #273 | Computing KL after +2025-03-29 21:02:33 | [rl2_trainer] epoch #273 | Computing loss after +2025-03-29 21:02:33 | [rl2_trainer] epoch #273 | Saving snapshot... +2025-03-29 21:02:33 | [rl2_trainer] epoch #273 | Saved +2025-03-29 21:02:33 | [rl2_trainer] epoch #273 | Time 5354.22 s +2025-03-29 21:02:33 | [rl2_trainer] epoch #273 | EpochTime 18.65 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -62.8791 +Average/AverageReturn -101.604 +Average/Iteration 273 +Average/MaxReturn -57.3424 +Average/MinReturn -137.523 +Average/NumEpisodes 8 +Average/StdReturn 22.411 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979365 +TotalEnvSteps 219200 +__unnamed_task__/AverageDiscountedReturn -62.8791 +__unnamed_task__/AverageReturn -101.604 +__unnamed_task__/Iteration 273 +__unnamed_task__/MaxReturn -57.3424 +__unnamed_task__/MinReturn -137.523 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.411 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.44167 +policy/KL 0.0121678 +policy/KLBefore 0 +policy/LossAfter -0.175482 +policy/LossBefore -0.00167072 +policy/dLoss 0.173812 +---------------------------------------- --------------- +2025-03-29 21:02:50 | [rl2_trainer] epoch #274 | Optimizing policy... +2025-03-29 21:02:50 | [rl2_trainer] epoch #274 | Fitting baseline... +2025-03-29 21:02:50 | [rl2_trainer] epoch #274 | Computing loss before +2025-03-29 21:02:50 | [rl2_trainer] epoch #274 | Computing KL before +2025-03-29 21:02:50 | [rl2_trainer] epoch #274 | Optimizing +2025-03-29 21:02:52 | [rl2_trainer] epoch #274 | Computing KL after +2025-03-29 21:02:52 | [rl2_trainer] epoch #274 | Computing loss after +2025-03-29 21:02:52 | [rl2_trainer] epoch #274 | Saving snapshot... +2025-03-29 21:02:52 | [rl2_trainer] epoch #274 | Saved +2025-03-29 21:02:52 | [rl2_trainer] epoch #274 | Time 5373.97 s +2025-03-29 21:02:52 | [rl2_trainer] epoch #274 | EpochTime 19.74 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -57.0563 +Average/AverageReturn -91.368 +Average/Iteration 274 +Average/MaxReturn -49.8739 +Average/MinReturn -120.783 +Average/NumEpisodes 8 +Average/StdReturn 24.9927 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957995 +TotalEnvSteps 220000 +__unnamed_task__/AverageDiscountedReturn -57.0563 +__unnamed_task__/AverageReturn -91.368 +__unnamed_task__/Iteration 274 +__unnamed_task__/MaxReturn -49.8739 +__unnamed_task__/MinReturn -120.783 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.9927 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.43868 +policy/KL 0.0178595 +policy/KLBefore 0 +policy/LossAfter -0.22773 +policy/LossBefore -0.0128194 +policy/dLoss 0.21491 +---------------------------------------- -------------- +2025-03-29 21:03:12 | [rl2_trainer] epoch #275 | Optimizing policy... +2025-03-29 21:03:12 | [rl2_trainer] epoch #275 | Fitting baseline... +2025-03-29 21:03:12 | [rl2_trainer] epoch #275 | Computing loss before +2025-03-29 21:03:12 | [rl2_trainer] epoch #275 | Computing KL before +2025-03-29 21:03:12 | [rl2_trainer] epoch #275 | Optimizing +2025-03-29 21:03:14 | [rl2_trainer] epoch #275 | Computing KL after +2025-03-29 21:03:14 | [rl2_trainer] epoch #275 | Computing loss after +2025-03-29 21:03:14 | [rl2_trainer] epoch #275 | Saving snapshot... +2025-03-29 21:03:14 | [rl2_trainer] epoch #275 | Saved +2025-03-29 21:03:14 | [rl2_trainer] epoch #275 | Time 5395.82 s +2025-03-29 21:03:14 | [rl2_trainer] epoch #275 | EpochTime 21.85 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -55.0882 +Average/AverageReturn -89.6779 +Average/Iteration 275 +Average/MaxReturn -53.0183 +Average/MinReturn -120.494 +Average/NumEpisodes 8 +Average/StdReturn 21.886 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.975825 +TotalEnvSteps 220800 +__unnamed_task__/AverageDiscountedReturn -55.0882 +__unnamed_task__/AverageReturn -89.6779 +__unnamed_task__/Iteration 275 +__unnamed_task__/MaxReturn -53.0183 +__unnamed_task__/MinReturn -120.494 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.886 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.43867 +policy/KL 0.00947499 +policy/KLBefore 0 +policy/LossAfter -0.142512 +policy/LossBefore -0.00596121 +policy/dLoss 0.136551 +---------------------------------------- --------------- +2025-03-29 21:03:32 | [rl2_trainer] epoch #276 | Optimizing policy... +2025-03-29 21:03:32 | [rl2_trainer] epoch #276 | Fitting baseline... +2025-03-29 21:03:32 | [rl2_trainer] epoch #276 | Computing loss before +2025-03-29 21:03:32 | [rl2_trainer] epoch #276 | Computing KL before +2025-03-29 21:03:32 | [rl2_trainer] epoch #276 | Optimizing +2025-03-29 21:03:34 | [rl2_trainer] epoch #276 | Computing KL after +2025-03-29 21:03:34 | [rl2_trainer] epoch #276 | Computing loss after +2025-03-29 21:03:34 | [rl2_trainer] epoch #276 | Saving snapshot... +2025-03-29 21:03:34 | [rl2_trainer] epoch #276 | Saved +2025-03-29 21:03:34 | [rl2_trainer] epoch #276 | Time 5415.72 s +2025-03-29 21:03:34 | [rl2_trainer] epoch #276 | EpochTime 19.90 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -52.9696 +Average/AverageReturn -78.2418 +Average/Iteration 276 +Average/MaxReturn -51.9239 +Average/MinReturn -104.314 +Average/NumEpisodes 8 +Average/StdReturn 22.8678 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.988734 +TotalEnvSteps 221600 +__unnamed_task__/AverageDiscountedReturn -52.9696 +__unnamed_task__/AverageReturn -78.2418 +__unnamed_task__/Iteration 276 +__unnamed_task__/MaxReturn -51.9239 +__unnamed_task__/MinReturn -104.314 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.8678 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.44418 +policy/KL 0.0175631 +policy/KLBefore 0 +policy/LossAfter -0.117379 +policy/LossBefore -0.0063935 +policy/dLoss 0.110986 +---------------------------------------- -------------- +2025-03-29 21:03:51 | [rl2_trainer] epoch #277 | Optimizing policy... +2025-03-29 21:03:51 | [rl2_trainer] epoch #277 | Fitting baseline... +2025-03-29 21:03:51 | [rl2_trainer] epoch #277 | Computing loss before +2025-03-29 21:03:51 | [rl2_trainer] epoch #277 | Computing KL before +2025-03-29 21:03:51 | [rl2_trainer] epoch #277 | Optimizing +2025-03-29 21:03:53 | [rl2_trainer] epoch #277 | Computing KL after +2025-03-29 21:03:53 | [rl2_trainer] epoch #277 | Computing loss after +2025-03-29 21:03:53 | [rl2_trainer] epoch #277 | Saving snapshot... +2025-03-29 21:03:53 | [rl2_trainer] epoch #277 | Saved +2025-03-29 21:03:53 | [rl2_trainer] epoch #277 | Time 5434.79 s +2025-03-29 21:03:53 | [rl2_trainer] epoch #277 | EpochTime 19.06 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -58.6649 +Average/AverageReturn -94.5023 +Average/Iteration 277 +Average/MaxReturn -44.7938 +Average/MinReturn -134.52 +Average/NumEpisodes 8 +Average/StdReturn 29.3558 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978146 +TotalEnvSteps 222400 +__unnamed_task__/AverageDiscountedReturn -58.6649 +__unnamed_task__/AverageReturn -94.5023 +__unnamed_task__/Iteration 277 +__unnamed_task__/MaxReturn -44.7938 +__unnamed_task__/MinReturn -134.52 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 29.3558 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.44156 +policy/KL 0.0192668 +policy/KLBefore 0 +policy/LossAfter -0.205361 +policy/LossBefore -0.00391088 +policy/dLoss 0.20145 +---------------------------------------- --------------- +2025-03-29 21:04:12 | [rl2_trainer] epoch #278 | Optimizing policy... +2025-03-29 21:04:12 | [rl2_trainer] epoch #278 | Fitting baseline... +2025-03-29 21:04:12 | [rl2_trainer] epoch #278 | Computing loss before +2025-03-29 21:04:12 | [rl2_trainer] epoch #278 | Computing KL before +2025-03-29 21:04:12 | [rl2_trainer] epoch #278 | Optimizing +2025-03-29 21:04:15 | [rl2_trainer] epoch #278 | Computing KL after +2025-03-29 21:04:15 | [rl2_trainer] epoch #278 | Computing loss after +2025-03-29 21:04:15 | [rl2_trainer] epoch #278 | Saving snapshot... +2025-03-29 21:04:15 | [rl2_trainer] epoch #278 | Saved +2025-03-29 21:04:15 | [rl2_trainer] epoch #278 | Time 5456.57 s +2025-03-29 21:04:15 | [rl2_trainer] epoch #278 | EpochTime 21.78 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -53.7479 +Average/AverageReturn -86.4116 +Average/Iteration 278 +Average/MaxReturn -66.0731 +Average/MinReturn -101.04 +Average/NumEpisodes 8 +Average/StdReturn 12.5562 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957668 +TotalEnvSteps 223200 +__unnamed_task__/AverageDiscountedReturn -53.7479 +__unnamed_task__/AverageReturn -86.4116 +__unnamed_task__/Iteration 278 +__unnamed_task__/MaxReturn -66.0731 +__unnamed_task__/MinReturn -101.04 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 12.5562 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.432 +policy/KL 0.0133938 +policy/KLBefore 0 +policy/LossAfter -0.207853 +policy/LossBefore -0.0200235 +policy/dLoss 0.187829 +---------------------------------------- -------------- +2025-03-29 21:04:35 | [rl2_trainer] epoch #279 | Optimizing policy... +2025-03-29 21:04:35 | [rl2_trainer] epoch #279 | Fitting baseline... +2025-03-29 21:04:35 | [rl2_trainer] epoch #279 | Computing loss before +2025-03-29 21:04:35 | [rl2_trainer] epoch #279 | Computing KL before +2025-03-29 21:04:35 | [rl2_trainer] epoch #279 | Optimizing +2025-03-29 21:04:38 | [rl2_trainer] epoch #279 | Computing KL after +2025-03-29 21:04:38 | [rl2_trainer] epoch #279 | Computing loss after +2025-03-29 21:04:38 | [rl2_trainer] epoch #279 | Saving snapshot... +2025-03-29 21:04:38 | [rl2_trainer] epoch #279 | Saved +2025-03-29 21:04:38 | [rl2_trainer] epoch #279 | Time 5479.35 s +2025-03-29 21:04:38 | [rl2_trainer] epoch #279 | EpochTime 22.78 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -54.1051 +Average/AverageReturn -88.2335 +Average/Iteration 279 +Average/MaxReturn -62.0482 +Average/MinReturn -114.615 +Average/NumEpisodes 8 +Average/StdReturn 17.442 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.957038 +TotalEnvSteps 224000 +__unnamed_task__/AverageDiscountedReturn -54.1051 +__unnamed_task__/AverageReturn -88.2335 +__unnamed_task__/Iteration 279 +__unnamed_task__/MaxReturn -62.0482 +__unnamed_task__/MinReturn -114.615 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 17.442 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.4319 +policy/KL 0.0130427 +policy/KLBefore 0 +policy/LossAfter -0.213363 +policy/LossBefore -0.017688 +policy/dLoss 0.195675 +---------------------------------------- -------------- +2025-03-29 21:04:55 | [rl2_trainer] epoch #280 | Optimizing policy... +2025-03-29 21:04:55 | [rl2_trainer] epoch #280 | Fitting baseline... +2025-03-29 21:04:55 | [rl2_trainer] epoch #280 | Computing loss before +2025-03-29 21:04:55 | [rl2_trainer] epoch #280 | Computing KL before +2025-03-29 21:04:55 | [rl2_trainer] epoch #280 | Optimizing +2025-03-29 21:04:57 | [rl2_trainer] epoch #280 | Computing KL after +2025-03-29 21:04:57 | [rl2_trainer] epoch #280 | Computing loss after +2025-03-29 21:04:57 | [rl2_trainer] epoch #280 | Saving snapshot... +2025-03-29 21:04:57 | [rl2_trainer] epoch #280 | Saved +2025-03-29 21:04:57 | [rl2_trainer] epoch #280 | Time 5498.57 s +2025-03-29 21:04:57 | [rl2_trainer] epoch #280 | EpochTime 19.22 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -58.969 +Average/AverageReturn -94.8165 +Average/Iteration 280 +Average/MaxReturn -53.193 +Average/MinReturn -127.633 +Average/NumEpisodes 8 +Average/StdReturn 19.7563 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.989419 +TotalEnvSteps 224800 +__unnamed_task__/AverageDiscountedReturn -58.969 +__unnamed_task__/AverageReturn -94.8165 +__unnamed_task__/Iteration 280 +__unnamed_task__/MaxReturn -53.193 +__unnamed_task__/MinReturn -127.633 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.7563 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.44194 +policy/KL 0.0201674 +policy/KLBefore 0 +policy/LossAfter -0.0930412 +policy/LossBefore 0.00642578 +policy/dLoss 0.099467 +---------------------------------------- --------------- +2025-03-29 21:05:14 | [rl2_trainer] epoch #281 | Optimizing policy... +2025-03-29 21:05:14 | [rl2_trainer] epoch #281 | Fitting baseline... +2025-03-29 21:05:14 | [rl2_trainer] epoch #281 | Computing loss before +2025-03-29 21:05:14 | [rl2_trainer] epoch #281 | Computing KL before +2025-03-29 21:05:14 | [rl2_trainer] epoch #281 | Optimizing +2025-03-29 21:05:17 | [rl2_trainer] epoch #281 | Computing KL after +2025-03-29 21:05:17 | [rl2_trainer] epoch #281 | Computing loss after +2025-03-29 21:05:17 | [rl2_trainer] epoch #281 | Saving snapshot... +2025-03-29 21:05:17 | [rl2_trainer] epoch #281 | Saved +2025-03-29 21:05:17 | [rl2_trainer] epoch #281 | Time 5518.16 s +2025-03-29 21:05:17 | [rl2_trainer] epoch #281 | EpochTime 19.58 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -53.14 +Average/AverageReturn -86.6459 +Average/Iteration 281 +Average/MaxReturn -47.1663 +Average/MinReturn -110.101 +Average/NumEpisodes 8 +Average/StdReturn 21.0303 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.971304 +TotalEnvSteps 225600 +__unnamed_task__/AverageDiscountedReturn -53.14 +__unnamed_task__/AverageReturn -86.6459 +__unnamed_task__/Iteration 281 +__unnamed_task__/MaxReturn -47.1663 +__unnamed_task__/MinReturn -110.101 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.0303 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.44279 +policy/KL 0.0130957 +policy/KLBefore 0 +policy/LossAfter -0.146036 +policy/LossBefore -0.00691456 +policy/dLoss 0.139121 +---------------------------------------- --------------- +2025-03-29 21:05:34 | [rl2_trainer] epoch #282 | Optimizing policy... +2025-03-29 21:05:34 | [rl2_trainer] epoch #282 | Fitting baseline... +2025-03-29 21:05:34 | [rl2_trainer] epoch #282 | Computing loss before +2025-03-29 21:05:34 | [rl2_trainer] epoch #282 | Computing KL before +2025-03-29 21:05:34 | [rl2_trainer] epoch #282 | Optimizing +2025-03-29 21:05:36 | [rl2_trainer] epoch #282 | Computing KL after +2025-03-29 21:05:36 | [rl2_trainer] epoch #282 | Computing loss after +2025-03-29 21:05:36 | [rl2_trainer] epoch #282 | Saving snapshot... +2025-03-29 21:05:36 | [rl2_trainer] epoch #282 | Saved +2025-03-29 21:05:36 | [rl2_trainer] epoch #282 | Time 5537.58 s +2025-03-29 21:05:36 | [rl2_trainer] epoch #282 | EpochTime 19.42 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -51.4918 +Average/AverageReturn -77.3746 +Average/Iteration 282 +Average/MaxReturn -49.5365 +Average/MinReturn -100.619 +Average/NumEpisodes 8 +Average/StdReturn 20.845 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.990345 +TotalEnvSteps 226400 +__unnamed_task__/AverageDiscountedReturn -51.4918 +__unnamed_task__/AverageReturn -77.3746 +__unnamed_task__/Iteration 282 +__unnamed_task__/MaxReturn -49.5365 +__unnamed_task__/MinReturn -100.619 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.845 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.43568 +policy/KL 0.0177203 +policy/KLBefore 0 +policy/LossAfter -0.101487 +policy/LossBefore 0.000856695 +policy/dLoss 0.102344 +---------------------------------------- ---------------- +2025-03-29 21:05:53 | [rl2_trainer] epoch #283 | Optimizing policy... +2025-03-29 21:05:53 | [rl2_trainer] epoch #283 | Fitting baseline... +2025-03-29 21:05:53 | [rl2_trainer] epoch #283 | Computing loss before +2025-03-29 21:05:53 | [rl2_trainer] epoch #283 | Computing KL before +2025-03-29 21:05:53 | [rl2_trainer] epoch #283 | Optimizing +2025-03-29 21:05:56 | [rl2_trainer] epoch #283 | Computing KL after +2025-03-29 21:05:56 | [rl2_trainer] epoch #283 | Computing loss after +2025-03-29 21:05:56 | [rl2_trainer] epoch #283 | Saving snapshot... +2025-03-29 21:05:56 | [rl2_trainer] epoch #283 | Saved +2025-03-29 21:05:56 | [rl2_trainer] epoch #283 | Time 5557.20 s +2025-03-29 21:05:56 | [rl2_trainer] epoch #283 | EpochTime 19.62 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -49.3045 +Average/AverageReturn -75.5575 +Average/Iteration 283 +Average/MaxReturn -46.7861 +Average/MinReturn -125.813 +Average/NumEpisodes 8 +Average/StdReturn 26.5698 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.993216 +TotalEnvSteps 227200 +__unnamed_task__/AverageDiscountedReturn -49.3045 +__unnamed_task__/AverageReturn -75.5575 +__unnamed_task__/Iteration 283 +__unnamed_task__/MaxReturn -46.7861 +__unnamed_task__/MinReturn -125.813 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.5698 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.43903 +policy/KL 0.0139082 +policy/KLBefore 0 +policy/LossAfter -0.0920618 +policy/LossBefore -0.00161482 +policy/dLoss 0.0904469 +---------------------------------------- --------------- +2025-03-29 21:06:13 | [rl2_trainer] epoch #284 | Optimizing policy... +2025-03-29 21:06:13 | [rl2_trainer] epoch #284 | Fitting baseline... +2025-03-29 21:06:13 | [rl2_trainer] epoch #284 | Computing loss before +2025-03-29 21:06:13 | [rl2_trainer] epoch #284 | Computing KL before +2025-03-29 21:06:13 | [rl2_trainer] epoch #284 | Optimizing +2025-03-29 21:06:15 | [rl2_trainer] epoch #284 | Computing KL after +2025-03-29 21:06:15 | [rl2_trainer] epoch #284 | Computing loss after +2025-03-29 21:06:15 | [rl2_trainer] epoch #284 | Saving snapshot... +2025-03-29 21:06:15 | [rl2_trainer] epoch #284 | Saved +2025-03-29 21:06:15 | [rl2_trainer] epoch #284 | Time 5576.58 s +2025-03-29 21:06:15 | [rl2_trainer] epoch #284 | EpochTime 19.37 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -51.6425 +Average/AverageReturn -77.6052 +Average/Iteration 284 +Average/MaxReturn -52.4974 +Average/MinReturn -103.809 +Average/NumEpisodes 8 +Average/StdReturn 19.8739 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.989802 +TotalEnvSteps 228000 +__unnamed_task__/AverageDiscountedReturn -51.6425 +__unnamed_task__/AverageReturn -77.6052 +__unnamed_task__/Iteration 284 +__unnamed_task__/MaxReturn -52.4974 +__unnamed_task__/MinReturn -103.809 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 19.8739 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.4441 +policy/KL 0.019906 +policy/KLBefore 0 +policy/LossAfter -0.108531 +policy/LossBefore -0.00240013 +policy/dLoss 0.106131 +---------------------------------------- --------------- +2025-03-29 21:06:32 | [rl2_trainer] epoch #285 | Optimizing policy... +2025-03-29 21:06:32 | [rl2_trainer] epoch #285 | Fitting baseline... +2025-03-29 21:06:32 | [rl2_trainer] epoch #285 | Computing loss before +2025-03-29 21:06:32 | [rl2_trainer] epoch #285 | Computing KL before +2025-03-29 21:06:32 | [rl2_trainer] epoch #285 | Optimizing +2025-03-29 21:06:34 | [rl2_trainer] epoch #285 | Computing KL after +2025-03-29 21:06:34 | [rl2_trainer] epoch #285 | Computing loss after +2025-03-29 21:06:35 | [rl2_trainer] epoch #285 | Saving snapshot... +2025-03-29 21:06:35 | [rl2_trainer] epoch #285 | Saved +2025-03-29 21:06:35 | [rl2_trainer] epoch #285 | Time 5596.00 s +2025-03-29 21:06:35 | [rl2_trainer] epoch #285 | EpochTime 19.42 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -53.3215 +Average/AverageReturn -87.6127 +Average/Iteration 285 +Average/MaxReturn -46.384 +Average/MinReturn -115.5 +Average/NumEpisodes 8 +Average/StdReturn 21.7597 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.978293 +TotalEnvSteps 228800 +__unnamed_task__/AverageDiscountedReturn -53.3215 +__unnamed_task__/AverageReturn -87.6127 +__unnamed_task__/Iteration 285 +__unnamed_task__/MaxReturn -46.384 +__unnamed_task__/MinReturn -115.5 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.7597 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.43702 +policy/KL 0.0218087 +policy/KLBefore 0 +policy/LossAfter -0.138479 +policy/LossBefore -0.00557075 +policy/dLoss 0.132908 +---------------------------------------- --------------- +2025-03-29 21:06:52 | [rl2_trainer] epoch #286 | Optimizing policy... +2025-03-29 21:06:52 | [rl2_trainer] epoch #286 | Fitting baseline... +2025-03-29 21:06:52 | [rl2_trainer] epoch #286 | Computing loss before +2025-03-29 21:06:52 | [rl2_trainer] epoch #286 | Computing KL before +2025-03-29 21:06:52 | [rl2_trainer] epoch #286 | Optimizing +2025-03-29 21:06:54 | [rl2_trainer] epoch #286 | Computing KL after +2025-03-29 21:06:54 | [rl2_trainer] epoch #286 | Computing loss after +2025-03-29 21:06:54 | [rl2_trainer] epoch #286 | Saving snapshot... +2025-03-29 21:06:54 | [rl2_trainer] epoch #286 | Saved +2025-03-29 21:06:54 | [rl2_trainer] epoch #286 | Time 5615.51 s +2025-03-29 21:06:54 | [rl2_trainer] epoch #286 | EpochTime 19.51 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -49.0244 +Average/AverageReturn -75.6743 +Average/Iteration 286 +Average/MaxReturn -46.21 +Average/MinReturn -98.6445 +Average/NumEpisodes 8 +Average/StdReturn 20.4473 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.993726 +TotalEnvSteps 229600 +__unnamed_task__/AverageDiscountedReturn -49.0244 +__unnamed_task__/AverageReturn -75.6743 +__unnamed_task__/Iteration 286 +__unnamed_task__/MaxReturn -46.21 +__unnamed_task__/MinReturn -98.6445 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 20.4473 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.43111 +policy/KL 0.0298751 +policy/KLBefore 0 +policy/LossAfter -0.0661254 +policy/LossBefore -0.0090337 +policy/dLoss 0.0570917 +---------------------------------------- -------------- +2025-03-29 21:07:11 | [rl2_trainer] epoch #287 | Optimizing policy... +2025-03-29 21:07:11 | [rl2_trainer] epoch #287 | Fitting baseline... +2025-03-29 21:07:11 | [rl2_trainer] epoch #287 | Computing loss before +2025-03-29 21:07:11 | [rl2_trainer] epoch #287 | Computing KL before +2025-03-29 21:07:12 | [rl2_trainer] epoch #287 | Optimizing +2025-03-29 21:07:14 | [rl2_trainer] epoch #287 | Computing KL after +2025-03-29 21:07:14 | [rl2_trainer] epoch #287 | Computing loss after +2025-03-29 21:07:14 | [rl2_trainer] epoch #287 | Saving snapshot... +2025-03-29 21:07:14 | [rl2_trainer] epoch #287 | Saved +2025-03-29 21:07:14 | [rl2_trainer] epoch #287 | Time 5635.24 s +2025-03-29 21:07:14 | [rl2_trainer] epoch #287 | EpochTime 19.73 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -47.4743 +Average/AverageReturn -74.4674 +Average/Iteration 287 +Average/MaxReturn -38.0506 +Average/MinReturn -100.08 +Average/NumEpisodes 8 +Average/StdReturn 23.0561 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.985256 +TotalEnvSteps 230400 +__unnamed_task__/AverageDiscountedReturn -47.4743 +__unnamed_task__/AverageReturn -74.4674 +__unnamed_task__/Iteration 287 +__unnamed_task__/MaxReturn -38.0506 +__unnamed_task__/MinReturn -100.08 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.0561 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.42799 +policy/KL 0.019673 +policy/KLBefore 0 +policy/LossAfter -0.122997 +policy/LossBefore 0.00327451 +policy/dLoss 0.126271 +---------------------------------------- --------------- +2025-03-29 21:07:33 | [rl2_trainer] epoch #288 | Optimizing policy... +2025-03-29 21:07:33 | [rl2_trainer] epoch #288 | Fitting baseline... +2025-03-29 21:07:33 | [rl2_trainer] epoch #288 | Computing loss before +2025-03-29 21:07:33 | [rl2_trainer] epoch #288 | Computing KL before +2025-03-29 21:07:33 | [rl2_trainer] epoch #288 | Optimizing +2025-03-29 21:07:36 | [rl2_trainer] epoch #288 | Computing KL after +2025-03-29 21:07:36 | [rl2_trainer] epoch #288 | Computing loss after +2025-03-29 21:07:36 | [rl2_trainer] epoch #288 | Saving snapshot... +2025-03-29 21:07:36 | [rl2_trainer] epoch #288 | Saved +2025-03-29 21:07:36 | [rl2_trainer] epoch #288 | Time 5657.19 s +2025-03-29 21:07:36 | [rl2_trainer] epoch #288 | EpochTime 21.95 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -45.1645 +Average/AverageReturn -68.801 +Average/Iteration 288 +Average/MaxReturn -54.7458 +Average/MinReturn -85.5455 +Average/NumEpisodes 8 +Average/StdReturn 12.4981 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982575 +TotalEnvSteps 231200 +__unnamed_task__/AverageDiscountedReturn -45.1645 +__unnamed_task__/AverageReturn -68.801 +__unnamed_task__/Iteration 288 +__unnamed_task__/MaxReturn -54.7458 +__unnamed_task__/MinReturn -85.5455 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 12.4981 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.42396 +policy/KL 0.0109772 +policy/KLBefore 0 +policy/LossAfter -0.109141 +policy/LossBefore -0.012017 +policy/dLoss 0.0971241 +---------------------------------------- -------------- +2025-03-29 21:07:56 | [rl2_trainer] epoch #289 | Optimizing policy... +2025-03-29 21:07:56 | [rl2_trainer] epoch #289 | Fitting baseline... +2025-03-29 21:07:56 | [rl2_trainer] epoch #289 | Computing loss before +2025-03-29 21:07:56 | [rl2_trainer] epoch #289 | Computing KL before +2025-03-29 21:07:56 | [rl2_trainer] epoch #289 | Optimizing +2025-03-29 21:07:59 | [rl2_trainer] epoch #289 | Computing KL after +2025-03-29 21:07:59 | [rl2_trainer] epoch #289 | Computing loss after +2025-03-29 21:07:59 | [rl2_trainer] epoch #289 | Saving snapshot... +2025-03-29 21:07:59 | [rl2_trainer] epoch #289 | Saved +2025-03-29 21:07:59 | [rl2_trainer] epoch #289 | Time 5680.37 s +2025-03-29 21:07:59 | [rl2_trainer] epoch #289 | EpochTime 23.18 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -53.6285 +Average/AverageReturn -82.2042 +Average/Iteration 289 +Average/MaxReturn -49.2013 +Average/MinReturn -123.398 +Average/NumEpisodes 8 +Average/StdReturn 24.2509 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.985875 +TotalEnvSteps 232000 +__unnamed_task__/AverageDiscountedReturn -53.6285 +__unnamed_task__/AverageReturn -82.2042 +__unnamed_task__/Iteration 289 +__unnamed_task__/MaxReturn -49.2013 +__unnamed_task__/MinReturn -123.398 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 24.2509 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.42647 +policy/KL 0.0243705 +policy/KLBefore 0 +policy/LossAfter -0.0958382 +policy/LossBefore -0.00057178 +policy/dLoss 0.0952664 +---------------------------------------- --------------- +2025-03-29 21:08:19 | [rl2_trainer] epoch #290 | Optimizing policy... +2025-03-29 21:08:19 | [rl2_trainer] epoch #290 | Fitting baseline... +2025-03-29 21:08:19 | [rl2_trainer] epoch #290 | Computing loss before +2025-03-29 21:08:19 | [rl2_trainer] epoch #290 | Computing KL before +2025-03-29 21:08:19 | [rl2_trainer] epoch #290 | Optimizing +2025-03-29 21:08:22 | [rl2_trainer] epoch #290 | Computing KL after +2025-03-29 21:08:22 | [rl2_trainer] epoch #290 | Computing loss after +2025-03-29 21:08:22 | [rl2_trainer] epoch #290 | Saving snapshot... +2025-03-29 21:08:22 | [rl2_trainer] epoch #290 | Saved +2025-03-29 21:08:22 | [rl2_trainer] epoch #290 | Time 5703.89 s +2025-03-29 21:08:22 | [rl2_trainer] epoch #290 | EpochTime 23.51 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -46.3058 +Average/AverageReturn -77.558 +Average/Iteration 290 +Average/MaxReturn -46.518 +Average/MinReturn -142.638 +Average/NumEpisodes 8 +Average/StdReturn 33.778 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.979677 +TotalEnvSteps 232800 +__unnamed_task__/AverageDiscountedReturn -46.3058 +__unnamed_task__/AverageReturn -77.558 +__unnamed_task__/Iteration 290 +__unnamed_task__/MaxReturn -46.518 +__unnamed_task__/MinReturn -142.638 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 33.778 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.42661 +policy/KL 0.0100815 +policy/KLBefore 0 +policy/LossAfter -0.136579 +policy/LossBefore -0.0202757 +policy/dLoss 0.116303 +---------------------------------------- -------------- +2025-03-29 21:08:42 | [rl2_trainer] epoch #291 | Optimizing policy... +2025-03-29 21:08:42 | [rl2_trainer] epoch #291 | Fitting baseline... +2025-03-29 21:08:42 | [rl2_trainer] epoch #291 | Computing loss before +2025-03-29 21:08:43 | [rl2_trainer] epoch #291 | Computing KL before +2025-03-29 21:08:43 | [rl2_trainer] epoch #291 | Optimizing +2025-03-29 21:08:45 | [rl2_trainer] epoch #291 | Computing KL after +2025-03-29 21:08:45 | [rl2_trainer] epoch #291 | Computing loss after +2025-03-29 21:08:45 | [rl2_trainer] epoch #291 | Saving snapshot... +2025-03-29 21:08:45 | [rl2_trainer] epoch #291 | Saved +2025-03-29 21:08:45 | [rl2_trainer] epoch #291 | Time 5726.74 s +2025-03-29 21:08:45 | [rl2_trainer] epoch #291 | EpochTime 22.84 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -49.8099 +Average/AverageReturn -80.7842 +Average/Iteration 291 +Average/MaxReturn -50.4598 +Average/MinReturn -127.153 +Average/NumEpisodes 8 +Average/StdReturn 23.2657 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.980751 +TotalEnvSteps 233600 +__unnamed_task__/AverageDiscountedReturn -49.8099 +__unnamed_task__/AverageReturn -80.7842 +__unnamed_task__/Iteration 291 +__unnamed_task__/MaxReturn -50.4598 +__unnamed_task__/MinReturn -127.153 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 23.2657 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.4186 +policy/KL 0.0237311 +policy/KLBefore 0 +policy/LossAfter -0.148186 +policy/LossBefore 0.013967 +policy/dLoss 0.162153 +---------------------------------------- -------------- +2025-03-29 21:09:06 | [rl2_trainer] epoch #292 | Optimizing policy... +2025-03-29 21:09:06 | [rl2_trainer] epoch #292 | Fitting baseline... +2025-03-29 21:09:06 | [rl2_trainer] epoch #292 | Computing loss before +2025-03-29 21:09:06 | [rl2_trainer] epoch #292 | Computing KL before +2025-03-29 21:09:06 | [rl2_trainer] epoch #292 | Optimizing +2025-03-29 21:09:09 | [rl2_trainer] epoch #292 | Computing KL after +2025-03-29 21:09:09 | [rl2_trainer] epoch #292 | Computing loss after +2025-03-29 21:09:09 | [rl2_trainer] epoch #292 | Saving snapshot... +2025-03-29 21:09:09 | [rl2_trainer] epoch #292 | Saved +2025-03-29 21:09:09 | [rl2_trainer] epoch #292 | Time 5750.51 s +2025-03-29 21:09:09 | [rl2_trainer] epoch #292 | EpochTime 23.77 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -48.8303 +Average/AverageReturn -78.7796 +Average/Iteration 292 +Average/MaxReturn -50.5865 +Average/MinReturn -129.045 +Average/NumEpisodes 8 +Average/StdReturn 26.1581 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.992396 +TotalEnvSteps 234400 +__unnamed_task__/AverageDiscountedReturn -48.8303 +__unnamed_task__/AverageReturn -78.7796 +__unnamed_task__/Iteration 292 +__unnamed_task__/MaxReturn -50.5865 +__unnamed_task__/MinReturn -129.045 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.1581 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.41008 +policy/KL 0.0265311 +policy/KLBefore 0 +policy/LossAfter -0.115185 +policy/LossBefore -0.00944188 +policy/dLoss 0.105743 +---------------------------------------- --------------- +2025-03-29 21:09:29 | [rl2_trainer] epoch #293 | Optimizing policy... +2025-03-29 21:09:29 | [rl2_trainer] epoch #293 | Fitting baseline... +2025-03-29 21:09:29 | [rl2_trainer] epoch #293 | Computing loss before +2025-03-29 21:09:29 | [rl2_trainer] epoch #293 | Computing KL before +2025-03-29 21:09:30 | [rl2_trainer] epoch #293 | Optimizing +2025-03-29 21:09:32 | [rl2_trainer] epoch #293 | Computing KL after +2025-03-29 21:09:32 | [rl2_trainer] epoch #293 | Computing loss after +2025-03-29 21:09:32 | [rl2_trainer] epoch #293 | Saving snapshot... +2025-03-29 21:09:32 | [rl2_trainer] epoch #293 | Saved +2025-03-29 21:09:32 | [rl2_trainer] epoch #293 | Time 5773.93 s +2025-03-29 21:09:32 | [rl2_trainer] epoch #293 | EpochTime 23.42 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -48.5985 +Average/AverageReturn -76.5328 +Average/Iteration 293 +Average/MaxReturn -48.4377 +Average/MinReturn -128.901 +Average/NumEpisodes 8 +Average/StdReturn 22.6333 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.994144 +TotalEnvSteps 235200 +__unnamed_task__/AverageDiscountedReturn -48.5985 +__unnamed_task__/AverageReturn -76.5328 +__unnamed_task__/Iteration 293 +__unnamed_task__/MaxReturn -48.4377 +__unnamed_task__/MinReturn -128.901 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 22.6333 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.4013 +policy/KL 0.0101069 +policy/KLBefore 0 +policy/LossAfter -0.0379007 +policy/LossBefore 0.0125368 +policy/dLoss 0.0504375 +---------------------------------------- -------------- +2025-03-29 21:09:53 | [rl2_trainer] epoch #294 | Optimizing policy... +2025-03-29 21:09:53 | [rl2_trainer] epoch #294 | Fitting baseline... +2025-03-29 21:09:53 | [rl2_trainer] epoch #294 | Computing loss before +2025-03-29 21:09:53 | [rl2_trainer] epoch #294 | Computing KL before +2025-03-29 21:09:53 | [rl2_trainer] epoch #294 | Optimizing +2025-03-29 21:09:56 | [rl2_trainer] epoch #294 | Computing KL after +2025-03-29 21:09:56 | [rl2_trainer] epoch #294 | Computing loss after +2025-03-29 21:09:56 | [rl2_trainer] epoch #294 | Saving snapshot... +2025-03-29 21:09:56 | [rl2_trainer] epoch #294 | Saved +2025-03-29 21:09:56 | [rl2_trainer] epoch #294 | Time 5797.46 s +2025-03-29 21:09:56 | [rl2_trainer] epoch #294 | EpochTime 23.53 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -43.9127 +Average/AverageReturn -72.0561 +Average/Iteration 294 +Average/MaxReturn -53.1076 +Average/MinReturn -138.702 +Average/NumEpisodes 8 +Average/StdReturn 26.4017 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.989003 +TotalEnvSteps 236000 +__unnamed_task__/AverageDiscountedReturn -43.9127 +__unnamed_task__/AverageReturn -72.0561 +__unnamed_task__/Iteration 294 +__unnamed_task__/MaxReturn -53.1076 +__unnamed_task__/MinReturn -138.702 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 26.4017 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.39463 +policy/KL 0.0138103 +policy/KLBefore 0 +policy/LossAfter -0.0625358 +policy/LossBefore 0.00386542 +policy/dLoss 0.0664012 +---------------------------------------- --------------- +2025-03-29 21:10:17 | [rl2_trainer] epoch #295 | Optimizing policy... +2025-03-29 21:10:17 | [rl2_trainer] epoch #295 | Fitting baseline... +2025-03-29 21:10:17 | [rl2_trainer] epoch #295 | Computing loss before +2025-03-29 21:10:17 | [rl2_trainer] epoch #295 | Computing KL before +2025-03-29 21:10:17 | [rl2_trainer] epoch #295 | Optimizing +2025-03-29 21:10:19 | [rl2_trainer] epoch #295 | Computing KL after +2025-03-29 21:10:19 | [rl2_trainer] epoch #295 | Computing loss after +2025-03-29 21:10:19 | [rl2_trainer] epoch #295 | Saving snapshot... +2025-03-29 21:10:19 | [rl2_trainer] epoch #295 | Saved +2025-03-29 21:10:19 | [rl2_trainer] epoch #295 | Time 5820.73 s +2025-03-29 21:10:19 | [rl2_trainer] epoch #295 | EpochTime 23.27 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -45.1866 +Average/AverageReturn -70.698 +Average/Iteration 295 +Average/MaxReturn -36.2989 +Average/MinReturn -111.97 +Average/NumEpisodes 8 +Average/StdReturn 21.7299 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.982085 +TotalEnvSteps 236800 +__unnamed_task__/AverageDiscountedReturn -45.1866 +__unnamed_task__/AverageReturn -70.698 +__unnamed_task__/Iteration 295 +__unnamed_task__/MaxReturn -36.2989 +__unnamed_task__/MinReturn -111.97 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.7299 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.38542 +policy/KL 0.0130063 +policy/KLBefore 0 +policy/LossAfter -0.0883233 +policy/LossBefore -0.0100918 +policy/dLoss 0.0782315 +---------------------------------------- -------------- +2025-03-29 21:10:40 | [rl2_trainer] epoch #296 | Optimizing policy... +2025-03-29 21:10:40 | [rl2_trainer] epoch #296 | Fitting baseline... +2025-03-29 21:10:40 | [rl2_trainer] epoch #296 | Computing loss before +2025-03-29 21:10:40 | [rl2_trainer] epoch #296 | Computing KL before +2025-03-29 21:10:40 | [rl2_trainer] epoch #296 | Optimizing +2025-03-29 21:10:42 | [rl2_trainer] epoch #296 | Computing KL after +2025-03-29 21:10:42 | [rl2_trainer] epoch #296 | Computing loss after +2025-03-29 21:10:42 | [rl2_trainer] epoch #296 | Saving snapshot... +2025-03-29 21:10:42 | [rl2_trainer] epoch #296 | Saved +2025-03-29 21:10:42 | [rl2_trainer] epoch #296 | Time 5843.95 s +2025-03-29 21:10:42 | [rl2_trainer] epoch #296 | EpochTime 23.21 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -46.849 +Average/AverageReturn -74.7977 +Average/Iteration 296 +Average/MaxReturn -49.0078 +Average/MinReturn -130.948 +Average/NumEpisodes 8 +Average/StdReturn 27.4062 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.993427 +TotalEnvSteps 237600 +__unnamed_task__/AverageDiscountedReturn -46.849 +__unnamed_task__/AverageReturn -74.7977 +__unnamed_task__/Iteration 296 +__unnamed_task__/MaxReturn -49.0078 +__unnamed_task__/MinReturn -130.948 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 27.4062 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.37292 +policy/KL 0.0354572 +policy/KLBefore 0 +policy/LossAfter -0.0750272 +policy/LossBefore -0.00344178 +policy/dLoss 0.0715854 +---------------------------------------- --------------- +2025-03-29 21:11:03 | [rl2_trainer] epoch #297 | Optimizing policy... +2025-03-29 21:11:03 | [rl2_trainer] epoch #297 | Fitting baseline... +2025-03-29 21:11:03 | [rl2_trainer] epoch #297 | Computing loss before +2025-03-29 21:11:03 | [rl2_trainer] epoch #297 | Computing KL before +2025-03-29 21:11:04 | [rl2_trainer] epoch #297 | Optimizing +2025-03-29 21:11:06 | [rl2_trainer] epoch #297 | Computing KL after +2025-03-29 21:11:06 | [rl2_trainer] epoch #297 | Computing loss after +2025-03-29 21:11:06 | [rl2_trainer] epoch #297 | Saving snapshot... +2025-03-29 21:11:06 | [rl2_trainer] epoch #297 | Saved +2025-03-29 21:11:06 | [rl2_trainer] epoch #297 | Time 5867.95 s +2025-03-29 21:11:06 | [rl2_trainer] epoch #297 | EpochTime 24.00 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -46.2054 +Average/AverageReturn -71.2346 +Average/Iteration 297 +Average/MaxReturn -45.1007 +Average/MinReturn -111.956 +Average/NumEpisodes 8 +Average/StdReturn 21.7121 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.992333 +TotalEnvSteps 238400 +__unnamed_task__/AverageDiscountedReturn -46.2054 +__unnamed_task__/AverageReturn -71.2346 +__unnamed_task__/Iteration 297 +__unnamed_task__/MaxReturn -45.1007 +__unnamed_task__/MinReturn -111.956 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 21.7121 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.36421 +policy/KL 0.0396697 +policy/KLBefore 0 +policy/LossAfter -0.0854244 +policy/LossBefore 0.000469222 +policy/dLoss 0.0858937 +---------------------------------------- ---------------- +2025-03-29 21:11:27 | [rl2_trainer] epoch #298 | Optimizing policy... +2025-03-29 21:11:27 | [rl2_trainer] epoch #298 | Fitting baseline... +2025-03-29 21:11:27 | [rl2_trainer] epoch #298 | Computing loss before +2025-03-29 21:11:27 | [rl2_trainer] epoch #298 | Computing KL before +2025-03-29 21:11:27 | [rl2_trainer] epoch #298 | Optimizing +2025-03-29 21:11:30 | [rl2_trainer] epoch #298 | Computing KL after +2025-03-29 21:11:30 | [rl2_trainer] epoch #298 | Computing loss after +2025-03-29 21:11:30 | [rl2_trainer] epoch #298 | Saving snapshot... +2025-03-29 21:11:30 | [rl2_trainer] epoch #298 | Saved +2025-03-29 21:11:30 | [rl2_trainer] epoch #298 | Time 5891.68 s +2025-03-29 21:11:30 | [rl2_trainer] epoch #298 | EpochTime 23.73 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -45.7281 +Average/AverageReturn -72.6962 +Average/Iteration 298 +Average/MaxReturn -53.9774 +Average/MinReturn -97.5107 +Average/NumEpisodes 8 +Average/StdReturn 14.6044 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.984863 +TotalEnvSteps 239200 +__unnamed_task__/AverageDiscountedReturn -45.7281 +__unnamed_task__/AverageReturn -72.6962 +__unnamed_task__/Iteration 298 +__unnamed_task__/MaxReturn -53.9774 +__unnamed_task__/MinReturn -97.5107 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 14.6044 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.35726 +policy/KL 0.0236894 +policy/KLBefore 0 +policy/LossAfter -0.0919068 +policy/LossBefore 0.00205247 +policy/dLoss 0.0939592 +---------------------------------------- --------------- +2025-03-29 21:11:49 | [rl2_trainer] epoch #299 | Optimizing policy... +2025-03-29 21:11:49 | [rl2_trainer] epoch #299 | Fitting baseline... +2025-03-29 21:11:49 | [rl2_trainer] epoch #299 | Computing loss before +2025-03-29 21:11:49 | [rl2_trainer] epoch #299 | Computing KL before +2025-03-29 21:11:49 | [rl2_trainer] epoch #299 | Optimizing +2025-03-29 21:11:52 | [rl2_trainer] epoch #299 | Computing KL after +2025-03-29 21:11:52 | [rl2_trainer] epoch #299 | Computing loss after +2025-03-29 21:11:52 | [rl2_trainer] epoch #299 | Saving snapshot... +2025-03-29 21:11:52 | [rl2_trainer] epoch #299 | Saved +2025-03-29 21:11:52 | [rl2_trainer] epoch #299 | Time 5913.87 s +2025-03-29 21:11:52 | [rl2_trainer] epoch #299 | EpochTime 22.18 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -46.0835 +Average/AverageReturn -74.1415 +Average/Iteration 299 +Average/MaxReturn -52.1784 +Average/MinReturn -104.449 +Average/NumEpisodes 8 +Average/StdReturn 16.3541 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.974594 +TotalEnvSteps 240000 +__unnamed_task__/AverageDiscountedReturn -46.0835 +__unnamed_task__/AverageReturn -74.1415 +__unnamed_task__/Iteration 299 +__unnamed_task__/MaxReturn -52.1784 +__unnamed_task__/MinReturn -104.449 +__unnamed_task__/NumEpisodes 8 +__unnamed_task__/StdReturn 16.3541 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.34914 +policy/KL 0.0169961 +policy/KLBefore 0 +policy/LossAfter -0.0862707 +policy/LossBefore -0.000752625 +policy/dLoss 0.085518 +---------------------------------------- ----------------