diff --git "a/experiment/rl2_trainer_2/debug.log" "b/experiment/rl2_trainer_2/debug.log" new file mode 100644--- /dev/null +++ "b/experiment/rl2_trainer_2/debug.log" @@ -0,0 +1,18502 @@ +2025-04-02 13:51:58 | [rl2_trainer] Logging to /home/h2khalil/MetaRL-Assistive-Robotics/data/local/experiment/rl2_trainer_2 +2025-04-02 13:51:59 | [rl2_trainer] Obtaining samples... +2025-04-02 13:54:00 | [rl2_trainer] epoch #0 | Optimizing policy... +2025-04-02 13:54:02 | [rl2_trainer] epoch #0 | Fitting baseline... +2025-04-02 13:54:02 | [rl2_trainer] epoch #0 | Computing loss before +2025-04-02 13:54:02 | [rl2_trainer] epoch #0 | Computing KL before +2025-04-02 13:54:03 | [rl2_trainer] epoch #0 | Optimizing +2025-04-02 13:54:37 | [rl2_trainer] epoch #0 | Computing KL after +2025-04-02 13:54:38 | [rl2_trainer] epoch #0 | Computing loss after +2025-04-02 13:54:39 | [rl2_trainer] epoch #0 | Saving snapshot... +2025-04-02 13:54:39 | [rl2_trainer] epoch #0 | Saved +2025-04-02 13:54:39 | [rl2_trainer] epoch #0 | Time 159.83 s +2025-04-02 13:54:39 | [rl2_trainer] epoch #0 | EpochTime 159.83 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -50.4922 +Average/AverageReturn -87.0395 +Average/Iteration 0 +Average/MaxReturn -31.5352 +Average/MinReturn -134.654 +Average/NumEpisodes 100 +Average/StdReturn 24.7474 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.829598 +TotalEnvSteps 10000 +__unnamed_task__/AverageDiscountedReturn -50.4922 +__unnamed_task__/AverageReturn -87.0395 +__unnamed_task__/Iteration 0 +__unnamed_task__/MaxReturn -31.5352 +__unnamed_task__/MinReturn -134.654 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 24.7474 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.89092 +policy/KL 0.0178519 +policy/KLBefore 0 +policy/LossAfter -0.0991226 +policy/LossBefore -0.0231915 +policy/dLoss 0.0759312 +---------------------------------------- ------------- +2025-04-02 13:56:50 | [rl2_trainer] epoch #1 | Optimizing policy... +2025-04-02 13:56:50 | [rl2_trainer] epoch #1 | Fitting baseline... +2025-04-02 13:56:50 | [rl2_trainer] epoch #1 | Computing loss before +2025-04-02 13:56:51 | [rl2_trainer] epoch #1 | Computing KL before +2025-04-02 13:56:51 | [rl2_trainer] epoch #1 | Optimizing +2025-04-02 13:57:25 | [rl2_trainer] epoch #1 | Computing KL after +2025-04-02 13:57:26 | [rl2_trainer] epoch #1 | Computing loss after +2025-04-02 13:57:27 | [rl2_trainer] epoch #1 | Saving snapshot... +2025-04-02 13:57:27 | [rl2_trainer] epoch #1 | Saved +2025-04-02 13:57:27 | [rl2_trainer] epoch #1 | Time 327.83 s +2025-04-02 13:57:27 | [rl2_trainer] epoch #1 | EpochTime 168.00 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -41.989 +Average/AverageReturn -70.7663 +Average/Iteration 1 +Average/MaxReturn -30.9451 +Average/MinReturn -129.346 +Average/NumEpisodes 100 +Average/StdReturn 23.5706 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.847657 +TotalEnvSteps 20000 +__unnamed_task__/AverageDiscountedReturn -41.989 +__unnamed_task__/AverageReturn -70.7663 +__unnamed_task__/Iteration 1 +__unnamed_task__/MaxReturn -30.9451 +__unnamed_task__/MinReturn -129.346 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 23.5706 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.85506 +policy/KL 0.0148879 +policy/KLBefore 0 +policy/LossAfter -0.0632297 +policy/LossBefore -0.00391337 +policy/dLoss 0.0593163 +---------------------------------------- -------------- +2025-04-02 13:59:26 | [rl2_trainer] epoch #2 | Optimizing policy... +2025-04-02 13:59:26 | [rl2_trainer] epoch #2 | Fitting baseline... +2025-04-02 13:59:26 | [rl2_trainer] epoch #2 | Computing loss before +2025-04-02 13:59:27 | [rl2_trainer] epoch #2 | Computing KL before +2025-04-02 13:59:27 | [rl2_trainer] epoch #2 | Optimizing +2025-04-02 14:00:01 | [rl2_trainer] epoch #2 | Computing KL after +2025-04-02 14:00:01 | [rl2_trainer] epoch #2 | Computing loss after +2025-04-02 14:00:02 | [rl2_trainer] epoch #2 | Saving snapshot... +2025-04-02 14:00:02 | [rl2_trainer] epoch #2 | Saved +2025-04-02 14:00:02 | [rl2_trainer] epoch #2 | Time 483.12 s +2025-04-02 14:00:02 | [rl2_trainer] epoch #2 | EpochTime 155.28 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -37.9035 +Average/AverageReturn -64.5875 +Average/Iteration 2 +Average/MaxReturn -32.703 +Average/MinReturn -109.19 +Average/NumEpisodes 100 +Average/StdReturn 16.7161 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.705294 +TotalEnvSteps 30000 +__unnamed_task__/AverageDiscountedReturn -37.9035 +__unnamed_task__/AverageReturn -64.5875 +__unnamed_task__/Iteration 2 +__unnamed_task__/MaxReturn -32.703 +__unnamed_task__/MinReturn -109.19 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 16.7161 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.8307 +policy/KL 0.00763483 +policy/KLBefore 0 +policy/LossAfter -0.0469767 +policy/LossBefore 0.00697974 +policy/dLoss 0.0539565 +---------------------------------------- -------------- +2025-04-02 14:02:04 | [rl2_trainer] epoch #3 | Optimizing policy... +2025-04-02 14:02:04 | [rl2_trainer] epoch #3 | Fitting baseline... +2025-04-02 14:02:04 | [rl2_trainer] epoch #3 | Computing loss before +2025-04-02 14:02:05 | [rl2_trainer] epoch #3 | Computing KL before +2025-04-02 14:02:06 | [rl2_trainer] epoch #3 | Optimizing +2025-04-02 14:02:40 | [rl2_trainer] epoch #3 | Computing KL after +2025-04-02 14:02:40 | [rl2_trainer] epoch #3 | Computing loss after +2025-04-02 14:02:41 | [rl2_trainer] epoch #3 | Saving snapshot... +2025-04-02 14:02:41 | [rl2_trainer] epoch #3 | Saved +2025-04-02 14:02:41 | [rl2_trainer] epoch #3 | Time 642.13 s +2025-04-02 14:02:41 | [rl2_trainer] epoch #3 | EpochTime 159.01 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -36.1391 +Average/AverageReturn -61.1312 +Average/Iteration 3 +Average/MaxReturn -34.4226 +Average/MinReturn -106.36 +Average/NumEpisodes 100 +Average/StdReturn 15.653 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.720215 +TotalEnvSteps 40000 +__unnamed_task__/AverageDiscountedReturn -36.1391 +__unnamed_task__/AverageReturn -61.1312 +__unnamed_task__/Iteration 3 +__unnamed_task__/MaxReturn -34.4226 +__unnamed_task__/MinReturn -106.36 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 15.653 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.80713 +policy/KL 0.0120939 +policy/KLBefore 0 +policy/LossAfter -0.0623463 +policy/LossBefore -0.0218599 +policy/dLoss 0.0404864 +---------------------------------------- ------------- +2025-04-02 14:05:41 | [rl2_trainer] epoch #4 | Optimizing policy... +2025-04-02 14:05:42 | [rl2_trainer] epoch #4 | Fitting baseline... +2025-04-02 14:05:42 | [rl2_trainer] epoch #4 | Computing loss before +2025-04-02 14:05:42 | [rl2_trainer] epoch #4 | Computing KL before +2025-04-02 14:05:43 | [rl2_trainer] epoch #4 | Optimizing +2025-04-02 14:06:17 | [rl2_trainer] epoch #4 | Computing KL after +2025-04-02 14:06:18 | [rl2_trainer] epoch #4 | Computing loss after +2025-04-02 14:06:18 | [rl2_trainer] epoch #4 | Saving snapshot... +2025-04-02 14:06:18 | [rl2_trainer] epoch #4 | Saved +2025-04-02 14:06:18 | [rl2_trainer] epoch #4 | Time 859.51 s +2025-04-02 14:06:18 | [rl2_trainer] epoch #4 | EpochTime 217.37 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -28.0708 +Average/AverageReturn -47.4134 +Average/Iteration 4 +Average/MaxReturn -1.74535 +Average/MinReturn -77.453 +Average/NumEpisodes 100 +Average/StdReturn 13.1107 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.568133 +TotalEnvSteps 50000 +__unnamed_task__/AverageDiscountedReturn -28.0708 +__unnamed_task__/AverageReturn -47.4134 +__unnamed_task__/Iteration 4 +__unnamed_task__/MaxReturn -1.74535 +__unnamed_task__/MinReturn -77.453 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1107 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.79053 +policy/KL 0.00634195 +policy/KLBefore 0 +policy/LossAfter -0.0215956 +policy/LossBefore 0.0167064 +policy/dLoss 0.038302 +---------------------------------------- -------------- +2025-04-02 14:08:20 | [rl2_trainer] epoch #5 | Optimizing policy... +2025-04-02 14:08:21 | [rl2_trainer] epoch #5 | Fitting baseline... +2025-04-02 14:08:21 | [rl2_trainer] epoch #5 | Computing loss before +2025-04-02 14:08:21 | [rl2_trainer] epoch #5 | Computing KL before +2025-04-02 14:08:22 | [rl2_trainer] epoch #5 | Optimizing +2025-04-02 14:08:56 | [rl2_trainer] epoch #5 | Computing KL after +2025-04-02 14:08:57 | [rl2_trainer] epoch #5 | Computing loss after +2025-04-02 14:08:58 | [rl2_trainer] epoch #5 | Saving snapshot... +2025-04-02 14:08:58 | [rl2_trainer] epoch #5 | Saved +2025-04-02 14:08:58 | [rl2_trainer] epoch #5 | Time 1018.83 s +2025-04-02 14:08:58 | [rl2_trainer] epoch #5 | EpochTime 159.32 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -32.2691 +Average/AverageReturn -53.6916 +Average/Iteration 5 +Average/MaxReturn -28.7725 +Average/MinReturn -93.8008 +Average/NumEpisodes 100 +Average/StdReturn 12.8352 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.72636 +TotalEnvSteps 60000 +__unnamed_task__/AverageDiscountedReturn -32.2691 +__unnamed_task__/AverageReturn -53.6916 +__unnamed_task__/Iteration 5 +__unnamed_task__/MaxReturn -28.7725 +__unnamed_task__/MinReturn -93.8008 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.8352 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.75693 +policy/KL 0.00758754 +policy/KLBefore 0 +policy/LossAfter -0.0498452 +policy/LossBefore -0.0199467 +policy/dLoss 0.0298984 +---------------------------------------- -------------- +2025-04-02 14:11:11 | [rl2_trainer] epoch #6 | Optimizing policy... +2025-04-02 14:11:11 | [rl2_trainer] epoch #6 | Fitting baseline... +2025-04-02 14:11:11 | [rl2_trainer] epoch #6 | Computing loss before +2025-04-02 14:11:12 | [rl2_trainer] epoch #6 | Computing KL before +2025-04-02 14:11:12 | [rl2_trainer] epoch #6 | Optimizing +2025-04-02 14:11:45 | [rl2_trainer] epoch #6 | Computing KL after +2025-04-02 14:11:45 | [rl2_trainer] epoch #6 | Computing loss after +2025-04-02 14:11:46 | [rl2_trainer] epoch #6 | Saving snapshot... +2025-04-02 14:11:46 | [rl2_trainer] epoch #6 | Saved +2025-04-02 14:11:46 | [rl2_trainer] epoch #6 | Time 1187.17 s +2025-04-02 14:11:46 | [rl2_trainer] epoch #6 | EpochTime 168.34 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -32.4455 +Average/AverageReturn -53.5345 +Average/Iteration 6 +Average/MaxReturn -37.1624 +Average/MinReturn -78.423 +Average/NumEpisodes 100 +Average/StdReturn 9.3759 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.499799 +TotalEnvSteps 70000 +__unnamed_task__/AverageDiscountedReturn -32.4455 +__unnamed_task__/AverageReturn -53.5345 +__unnamed_task__/Iteration 6 +__unnamed_task__/MaxReturn -37.1624 +__unnamed_task__/MinReturn -78.423 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.3759 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.74106 +policy/KL 0.00662705 +policy/KLBefore 0 +policy/LossAfter -0.0220091 +policy/LossBefore -0.000335352 +policy/dLoss 0.0216738 +---------------------------------------- --------------- +2025-04-02 14:14:01 | [rl2_trainer] epoch #7 | Optimizing policy... +2025-04-02 14:14:01 | [rl2_trainer] epoch #7 | Fitting baseline... +2025-04-02 14:14:01 | [rl2_trainer] epoch #7 | Computing loss before +2025-04-02 14:14:02 | [rl2_trainer] epoch #7 | Computing KL before +2025-04-02 14:14:03 | [rl2_trainer] epoch #7 | Optimizing +2025-04-02 14:14:37 | [rl2_trainer] epoch #7 | Computing KL after +2025-04-02 14:14:37 | [rl2_trainer] epoch #7 | Computing loss after +2025-04-02 14:14:38 | [rl2_trainer] epoch #7 | Saving snapshot... +2025-04-02 14:14:38 | [rl2_trainer] epoch #7 | Saved +2025-04-02 14:14:38 | [rl2_trainer] epoch #7 | Time 1359.31 s +2025-04-02 14:14:38 | [rl2_trainer] epoch #7 | EpochTime 172.14 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -31.5487 +Average/AverageReturn -51.8696 +Average/Iteration 7 +Average/MaxReturn -27.6599 +Average/MinReturn -73.682 +Average/NumEpisodes 100 +Average/StdReturn 10.4826 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.650773 +TotalEnvSteps 80000 +__unnamed_task__/AverageDiscountedReturn -31.5487 +__unnamed_task__/AverageReturn -51.8696 +__unnamed_task__/Iteration 7 +__unnamed_task__/MaxReturn -27.6599 +__unnamed_task__/MinReturn -73.682 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.4826 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.73287 +policy/KL 0.0105379 +policy/KLBefore 0 +policy/LossAfter -0.014197 +policy/LossBefore 0.00960274 +policy/dLoss 0.0237997 +---------------------------------------- -------------- +2025-04-02 14:16:39 | [rl2_trainer] epoch #8 | Optimizing policy... +2025-04-02 14:16:40 | [rl2_trainer] epoch #8 | Fitting baseline... +2025-04-02 14:16:40 | [rl2_trainer] epoch #8 | Computing loss before +2025-04-02 14:16:40 | [rl2_trainer] epoch #8 | Computing KL before +2025-04-02 14:16:41 | [rl2_trainer] epoch #8 | Optimizing +2025-04-02 14:17:14 | [rl2_trainer] epoch #8 | Computing KL after +2025-04-02 14:17:15 | [rl2_trainer] epoch #8 | Computing loss after +2025-04-02 14:17:16 | [rl2_trainer] epoch #8 | Saving snapshot... +2025-04-02 14:17:16 | [rl2_trainer] epoch #8 | Saved +2025-04-02 14:17:16 | [rl2_trainer] epoch #8 | Time 1517.05 s +2025-04-02 14:17:16 | [rl2_trainer] epoch #8 | EpochTime 157.73 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -30.5161 +Average/AverageReturn -50.4696 +Average/Iteration 8 +Average/MaxReturn -24.1737 +Average/MinReturn -80.4745 +Average/NumEpisodes 100 +Average/StdReturn 11.9948 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.789331 +TotalEnvSteps 90000 +__unnamed_task__/AverageDiscountedReturn -30.5161 +__unnamed_task__/AverageReturn -50.4696 +__unnamed_task__/Iteration 8 +__unnamed_task__/MaxReturn -24.1737 +__unnamed_task__/MinReturn -80.4745 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.9948 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.70647 +policy/KL 0.00668891 +policy/KLBefore 0 +policy/LossAfter -0.0383259 +policy/LossBefore -0.0170772 +policy/dLoss 0.0212488 +---------------------------------------- -------------- +2025-04-02 14:19:58 | [rl2_trainer] epoch #9 | Optimizing policy... +2025-04-02 14:19:59 | [rl2_trainer] epoch #9 | Fitting baseline... +2025-04-02 14:19:59 | [rl2_trainer] epoch #9 | Computing loss before +2025-04-02 14:19:59 | [rl2_trainer] epoch #9 | Computing KL before +2025-04-02 14:20:00 | [rl2_trainer] epoch #9 | Optimizing +2025-04-02 14:20:35 | [rl2_trainer] epoch #9 | Computing KL after +2025-04-02 14:20:36 | [rl2_trainer] epoch #9 | Computing loss after +2025-04-02 14:20:37 | [rl2_trainer] epoch #9 | Saving snapshot... +2025-04-02 14:20:37 | [rl2_trainer] epoch #9 | Saved +2025-04-02 14:20:37 | [rl2_trainer] epoch #9 | Time 1717.56 s +2025-04-02 14:20:37 | [rl2_trainer] epoch #9 | EpochTime 200.51 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -24.0229 +Average/AverageReturn -39.7223 +Average/Iteration 9 +Average/MaxReturn -12.1275 +Average/MinReturn -67.0958 +Average/NumEpisodes 100 +Average/StdReturn 10.1543 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.592973 +TotalEnvSteps 100000 +__unnamed_task__/AverageDiscountedReturn -24.0229 +__unnamed_task__/AverageReturn -39.7223 +__unnamed_task__/Iteration 9 +__unnamed_task__/MaxReturn -12.1275 +__unnamed_task__/MinReturn -67.0958 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.1543 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.68526 +policy/KL 0.00863496 +policy/KLBefore 0 +policy/LossAfter -0.0294216 +policy/LossBefore -0.000832434 +policy/dLoss 0.0285892 +---------------------------------------- ---------------- +2025-04-02 14:23:54 | [rl2_trainer] epoch #10 | Optimizing policy... +2025-04-02 14:23:54 | [rl2_trainer] epoch #10 | Fitting baseline... +2025-04-02 14:23:54 | [rl2_trainer] epoch #10 | Computing loss before +2025-04-02 14:23:55 | [rl2_trainer] epoch #10 | Computing KL before +2025-04-02 14:23:55 | [rl2_trainer] epoch #10 | Optimizing +2025-04-02 14:24:30 | [rl2_trainer] epoch #10 | Computing KL after +2025-04-02 14:24:30 | [rl2_trainer] epoch #10 | Computing loss after +2025-04-02 14:24:31 | [rl2_trainer] epoch #10 | Saving snapshot... +2025-04-02 14:24:31 | [rl2_trainer] epoch #10 | Saved +2025-04-02 14:24:31 | [rl2_trainer] epoch #10 | Time 1952.29 s +2025-04-02 14:24:31 | [rl2_trainer] epoch #10 | EpochTime 234.73 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -23.4664 +Average/AverageReturn -38.4336 +Average/Iteration 10 +Average/MaxReturn 32.1652 +Average/MinReturn -70.043 +Average/NumEpisodes 100 +Average/StdReturn 12.7167 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.654998 +TotalEnvSteps 110000 +__unnamed_task__/AverageDiscountedReturn -23.4664 +__unnamed_task__/AverageReturn -38.4336 +__unnamed_task__/Iteration 10 +__unnamed_task__/MaxReturn 32.1652 +__unnamed_task__/MinReturn -70.043 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.7167 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.68268 +policy/KL 0.00921529 +policy/KLBefore 0 +policy/LossAfter -0.03696 +policy/LossBefore 0.00500403 +policy/dLoss 0.041964 +---------------------------------------- --------------- +2025-04-02 14:28:01 | [rl2_trainer] epoch #11 | Optimizing policy... +2025-04-02 14:28:02 | [rl2_trainer] epoch #11 | Fitting baseline... +2025-04-02 14:28:02 | [rl2_trainer] epoch #11 | Computing loss before +2025-04-02 14:28:02 | [rl2_trainer] epoch #11 | Computing KL before +2025-04-02 14:28:03 | [rl2_trainer] epoch #11 | Optimizing +2025-04-02 14:28:37 | [rl2_trainer] epoch #11 | Computing KL after +2025-04-02 14:28:37 | [rl2_trainer] epoch #11 | Computing loss after +2025-04-02 14:28:38 | [rl2_trainer] epoch #11 | Saving snapshot... +2025-04-02 14:28:38 | [rl2_trainer] epoch #11 | Saved +2025-04-02 14:28:38 | [rl2_trainer] epoch #11 | Time 2199.16 s +2025-04-02 14:28:38 | [rl2_trainer] epoch #11 | EpochTime 246.86 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -23.4849 +Average/AverageReturn -38.5172 +Average/Iteration 11 +Average/MaxReturn -11.837 +Average/MinReturn -72.3027 +Average/NumEpisodes 100 +Average/StdReturn 9.7652 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.40994 +TotalEnvSteps 120000 +__unnamed_task__/AverageDiscountedReturn -23.4849 +__unnamed_task__/AverageReturn -38.5172 +__unnamed_task__/Iteration 11 +__unnamed_task__/MaxReturn -11.837 +__unnamed_task__/MinReturn -72.3027 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.7652 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.66913 +policy/KL 0.00767209 +policy/KLBefore 0 +policy/LossAfter -0.0457221 +policy/LossBefore -0.00322298 +policy/dLoss 0.0424991 +---------------------------------------- --------------- +2025-04-02 14:30:39 | [rl2_trainer] epoch #12 | Optimizing policy... +2025-04-02 14:30:39 | [rl2_trainer] epoch #12 | Fitting baseline... +2025-04-02 14:30:39 | [rl2_trainer] epoch #12 | Computing loss before +2025-04-02 14:30:39 | [rl2_trainer] epoch #12 | Computing KL before +2025-04-02 14:30:40 | [rl2_trainer] epoch #12 | Optimizing +2025-04-02 14:31:14 | [rl2_trainer] epoch #12 | Computing KL after +2025-04-02 14:31:14 | [rl2_trainer] epoch #12 | Computing loss after +2025-04-02 14:31:15 | [rl2_trainer] epoch #12 | Saving snapshot... +2025-04-02 14:31:15 | [rl2_trainer] epoch #12 | Saved +2025-04-02 14:31:15 | [rl2_trainer] epoch #12 | Time 2356.07 s +2025-04-02 14:31:15 | [rl2_trainer] epoch #12 | EpochTime 156.90 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -28.6855 +Average/AverageReturn -47.1301 +Average/Iteration 12 +Average/MaxReturn -24.606 +Average/MinReturn -127.517 +Average/NumEpisodes 100 +Average/StdReturn 12.5527 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.711794 +TotalEnvSteps 130000 +__unnamed_task__/AverageDiscountedReturn -28.6855 +__unnamed_task__/AverageReturn -47.1301 +__unnamed_task__/Iteration 12 +__unnamed_task__/MaxReturn -24.606 +__unnamed_task__/MinReturn -127.517 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.5527 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.65095 +policy/KL 0.00610315 +policy/KLBefore 0 +policy/LossAfter -0.0212851 +policy/LossBefore 0.00146461 +policy/dLoss 0.0227497 +---------------------------------------- --------------- +2025-04-02 14:33:27 | [rl2_trainer] epoch #13 | Optimizing policy... +2025-04-02 14:33:27 | [rl2_trainer] epoch #13 | Fitting baseline... +2025-04-02 14:33:27 | [rl2_trainer] epoch #13 | Computing loss before +2025-04-02 14:33:27 | [rl2_trainer] epoch #13 | Computing KL before +2025-04-02 14:33:28 | [rl2_trainer] epoch #13 | Optimizing +2025-04-02 14:34:00 | [rl2_trainer] epoch #13 | Computing KL after +2025-04-02 14:34:00 | [rl2_trainer] epoch #13 | Computing loss after +2025-04-02 14:34:01 | [rl2_trainer] epoch #13 | Saving snapshot... +2025-04-02 14:34:01 | [rl2_trainer] epoch #13 | Saved +2025-04-02 14:34:01 | [rl2_trainer] epoch #13 | Time 2522.13 s +2025-04-02 14:34:01 | [rl2_trainer] epoch #13 | EpochTime 166.06 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -29.8447 +Average/AverageReturn -49.3096 +Average/Iteration 13 +Average/MaxReturn -27.7207 +Average/MinReturn -99.8893 +Average/NumEpisodes 100 +Average/StdReturn 10.0485 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.605208 +TotalEnvSteps 140000 +__unnamed_task__/AverageDiscountedReturn -29.8447 +__unnamed_task__/AverageReturn -49.3096 +__unnamed_task__/Iteration 13 +__unnamed_task__/MaxReturn -27.7207 +__unnamed_task__/MinReturn -99.8893 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0485 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.63362 +policy/KL 0.00817799 +policy/KLBefore 0 +policy/LossAfter -0.0313205 +policy/LossBefore -0.0061756 +policy/dLoss 0.0251449 +---------------------------------------- --------------- +2025-04-02 14:37:35 | [rl2_trainer] epoch #14 | Optimizing policy... +2025-04-02 14:37:35 | [rl2_trainer] epoch #14 | Fitting baseline... +2025-04-02 14:37:35 | [rl2_trainer] epoch #14 | Computing loss before +2025-04-02 14:37:36 | [rl2_trainer] epoch #14 | Computing KL before +2025-04-02 14:37:36 | [rl2_trainer] epoch #14 | Optimizing +2025-04-02 14:38:10 | [rl2_trainer] epoch #14 | Computing KL after +2025-04-02 14:38:11 | [rl2_trainer] epoch #14 | Computing loss after +2025-04-02 14:38:11 | [rl2_trainer] epoch #14 | Saving snapshot... +2025-04-02 14:38:11 | [rl2_trainer] epoch #14 | Saved +2025-04-02 14:38:11 | [rl2_trainer] epoch #14 | Time 2772.46 s +2025-04-02 14:38:11 | [rl2_trainer] epoch #14 | EpochTime 250.33 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.9231 +Average/AverageReturn -37.7077 +Average/Iteration 14 +Average/MaxReturn 12.9196 +Average/MinReturn -66.3906 +Average/NumEpisodes 100 +Average/StdReturn 11.6212 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.58578 +TotalEnvSteps 150000 +__unnamed_task__/AverageDiscountedReturn -22.9231 +__unnamed_task__/AverageReturn -37.7077 +__unnamed_task__/Iteration 14 +__unnamed_task__/MaxReturn 12.9196 +__unnamed_task__/MinReturn -66.3906 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.6212 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.60035 +policy/KL 0.00671369 +policy/KLBefore 0 +policy/LossAfter -0.0454045 +policy/LossBefore -0.00958425 +policy/dLoss 0.0358203 +---------------------------------------- --------------- +2025-04-02 14:39:40 | [rl2_trainer] epoch #15 | Optimizing policy... +2025-04-02 14:39:40 | [rl2_trainer] epoch #15 | Fitting baseline... +2025-04-02 14:39:40 | [rl2_trainer] epoch #15 | Computing loss before +2025-04-02 14:39:41 | [rl2_trainer] epoch #15 | Computing KL before +2025-04-02 14:39:41 | [rl2_trainer] epoch #15 | Optimizing +2025-04-02 14:40:15 | [rl2_trainer] epoch #15 | Computing KL after +2025-04-02 14:40:16 | [rl2_trainer] epoch #15 | Computing loss after +2025-04-02 14:40:17 | [rl2_trainer] epoch #15 | Saving snapshot... +2025-04-02 14:40:17 | [rl2_trainer] epoch #15 | Saved +2025-04-02 14:40:17 | [rl2_trainer] epoch #15 | Time 2897.83 s +2025-04-02 14:40:17 | [rl2_trainer] epoch #15 | EpochTime 125.36 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -28.3022 +Average/AverageReturn -46.8349 +Average/Iteration 15 +Average/MaxReturn -30.626 +Average/MinReturn -75.819 +Average/NumEpisodes 100 +Average/StdReturn 9.19841 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.486094 +TotalEnvSteps 160000 +__unnamed_task__/AverageDiscountedReturn -28.3022 +__unnamed_task__/AverageReturn -46.8349 +__unnamed_task__/Iteration 15 +__unnamed_task__/MaxReturn -30.626 +__unnamed_task__/MinReturn -75.819 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.19841 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.58096 +policy/KL 0.0060012 +policy/KLBefore 0 +policy/LossAfter -0.0295743 +policy/LossBefore -0.0111703 +policy/dLoss 0.0184039 +---------------------------------------- -------------- +2025-04-02 14:42:20 | [rl2_trainer] epoch #16 | Optimizing policy... +2025-04-02 14:42:20 | [rl2_trainer] epoch #16 | Fitting baseline... +2025-04-02 14:42:20 | [rl2_trainer] epoch #16 | Computing loss before +2025-04-02 14:42:21 | [rl2_trainer] epoch #16 | Computing KL before +2025-04-02 14:42:22 | [rl2_trainer] epoch #16 | Optimizing +2025-04-02 14:42:55 | [rl2_trainer] epoch #16 | Computing KL after +2025-04-02 14:42:56 | [rl2_trainer] epoch #16 | Computing loss after +2025-04-02 14:42:57 | [rl2_trainer] epoch #16 | Saving snapshot... +2025-04-02 14:42:57 | [rl2_trainer] epoch #16 | Saved +2025-04-02 14:42:57 | [rl2_trainer] epoch #16 | Time 3057.78 s +2025-04-02 14:42:57 | [rl2_trainer] epoch #16 | EpochTime 159.95 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -26.413 +Average/AverageReturn -43.2171 +Average/Iteration 16 +Average/MaxReturn -24.838 +Average/MinReturn -83.2904 +Average/NumEpisodes 100 +Average/StdReturn 10.2076 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.561835 +TotalEnvSteps 170000 +__unnamed_task__/AverageDiscountedReturn -26.413 +__unnamed_task__/AverageReturn -43.2171 +__unnamed_task__/Iteration 16 +__unnamed_task__/MaxReturn -24.838 +__unnamed_task__/MinReturn -83.2904 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.2076 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.5583 +policy/KL 0.00776547 +policy/KLBefore 0 +policy/LossAfter -0.0222984 +policy/LossBefore -0.00148195 +policy/dLoss 0.0208164 +---------------------------------------- --------------- +2025-04-02 14:45:01 | [rl2_trainer] epoch #17 | Optimizing policy... +2025-04-02 14:45:01 | [rl2_trainer] epoch #17 | Fitting baseline... +2025-04-02 14:45:01 | [rl2_trainer] epoch #17 | Computing loss before +2025-04-02 14:45:02 | [rl2_trainer] epoch #17 | Computing KL before +2025-04-02 14:45:02 | [rl2_trainer] epoch #17 | Optimizing +2025-04-02 14:45:37 | [rl2_trainer] epoch #17 | Computing KL after +2025-04-02 14:45:37 | [rl2_trainer] epoch #17 | Computing loss after +2025-04-02 14:45:38 | [rl2_trainer] epoch #17 | Saving snapshot... +2025-04-02 14:45:38 | [rl2_trainer] epoch #17 | Saved +2025-04-02 14:45:38 | [rl2_trainer] epoch #17 | Time 3219.41 s +2025-04-02 14:45:38 | [rl2_trainer] epoch #17 | EpochTime 161.63 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -25.1055 +Average/AverageReturn -40.6307 +Average/Iteration 17 +Average/MaxReturn -25.552 +Average/MinReturn -59.0916 +Average/NumEpisodes 100 +Average/StdReturn 7.51609 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.396105 +TotalEnvSteps 180000 +__unnamed_task__/AverageDiscountedReturn -25.1055 +__unnamed_task__/AverageReturn -40.6307 +__unnamed_task__/Iteration 17 +__unnamed_task__/MaxReturn -25.552 +__unnamed_task__/MinReturn -59.0916 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.51609 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.52626 +policy/KL 0.0101081 +policy/KLBefore 0 +policy/LossAfter -0.0307362 +policy/LossBefore -0.010739 +policy/dLoss 0.0199971 +---------------------------------------- -------------- +2025-04-02 14:47:14 | [rl2_trainer] epoch #18 | Optimizing policy... +2025-04-02 14:47:15 | [rl2_trainer] epoch #18 | Fitting baseline... +2025-04-02 14:47:15 | [rl2_trainer] epoch #18 | Computing loss before +2025-04-02 14:47:15 | [rl2_trainer] epoch #18 | Computing KL before +2025-04-02 14:47:16 | [rl2_trainer] epoch #18 | Optimizing +2025-04-02 14:47:49 | [rl2_trainer] epoch #18 | Computing KL after +2025-04-02 14:47:50 | [rl2_trainer] epoch #18 | Computing loss after +2025-04-02 14:47:51 | [rl2_trainer] epoch #18 | Saving snapshot... +2025-04-02 14:47:51 | [rl2_trainer] epoch #18 | Saved +2025-04-02 14:47:51 | [rl2_trainer] epoch #18 | Time 3351.76 s +2025-04-02 14:47:51 | [rl2_trainer] epoch #18 | EpochTime 132.35 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -26.9784 +Average/AverageReturn -44.4437 +Average/Iteration 18 +Average/MaxReturn -26.8942 +Average/MinReturn -80.3933 +Average/NumEpisodes 100 +Average/StdReturn 10.5206 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.685697 +TotalEnvSteps 190000 +__unnamed_task__/AverageDiscountedReturn -26.9784 +__unnamed_task__/AverageReturn -44.4437 +__unnamed_task__/Iteration 18 +__unnamed_task__/MaxReturn -26.8942 +__unnamed_task__/MinReturn -80.3933 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.5206 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.51272 +policy/KL 0.00546531 +policy/KLBefore 0 +policy/LossAfter -0.0177607 +policy/LossBefore 0.00659564 +policy/dLoss 0.0243563 +---------------------------------------- --------------- +2025-04-02 14:50:38 | [rl2_trainer] epoch #19 | Optimizing policy... +2025-04-02 14:50:38 | [rl2_trainer] epoch #19 | Fitting baseline... +2025-04-02 14:50:38 | [rl2_trainer] epoch #19 | Computing loss before +2025-04-02 14:50:39 | [rl2_trainer] epoch #19 | Computing KL before +2025-04-02 14:50:39 | [rl2_trainer] epoch #19 | Optimizing +2025-04-02 14:51:13 | [rl2_trainer] epoch #19 | Computing KL after +2025-04-02 14:51:13 | [rl2_trainer] epoch #19 | Computing loss after +2025-04-02 14:51:14 | [rl2_trainer] epoch #19 | Saving snapshot... +2025-04-02 14:51:14 | [rl2_trainer] epoch #19 | Saved +2025-04-02 14:51:14 | [rl2_trainer] epoch #19 | Time 3555.10 s +2025-04-02 14:51:14 | [rl2_trainer] epoch #19 | EpochTime 203.34 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.4076 +Average/AverageReturn -36.2859 +Average/Iteration 19 +Average/MaxReturn -18.7813 +Average/MinReturn -58.713 +Average/NumEpisodes 100 +Average/StdReturn 8.37964 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.582989 +TotalEnvSteps 200000 +__unnamed_task__/AverageDiscountedReturn -22.4076 +__unnamed_task__/AverageReturn -36.2859 +__unnamed_task__/Iteration 19 +__unnamed_task__/MaxReturn -18.7813 +__unnamed_task__/MinReturn -58.713 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.37964 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.50489 +policy/KL 0.00948896 +policy/KLBefore 0 +policy/LossAfter -0.0262183 +policy/LossBefore 0.00217735 +policy/dLoss 0.0283956 +---------------------------------------- --------------- +2025-04-02 14:54:35 | [rl2_trainer] epoch #20 | Optimizing policy... +2025-04-02 14:54:36 | [rl2_trainer] epoch #20 | Fitting baseline... +2025-04-02 14:54:36 | [rl2_trainer] epoch #20 | Computing loss before +2025-04-02 14:54:36 | [rl2_trainer] epoch #20 | Computing KL before +2025-04-02 14:54:37 | [rl2_trainer] epoch #20 | Optimizing +2025-04-02 14:55:10 | [rl2_trainer] epoch #20 | Computing KL after +2025-04-02 14:55:10 | [rl2_trainer] epoch #20 | Computing loss after +2025-04-02 14:55:11 | [rl2_trainer] epoch #20 | Saving snapshot... +2025-04-02 14:55:11 | [rl2_trainer] epoch #20 | Saved +2025-04-02 14:55:11 | [rl2_trainer] epoch #20 | Time 3792.23 s +2025-04-02 14:55:11 | [rl2_trainer] epoch #20 | EpochTime 237.12 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -22.4199 +Average/AverageReturn -36.4736 +Average/Iteration 20 +Average/MaxReturn 10.8748 +Average/MinReturn -58.6644 +Average/NumEpisodes 100 +Average/StdReturn 9.81001 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.643003 +TotalEnvSteps 210000 +__unnamed_task__/AverageDiscountedReturn -22.4199 +__unnamed_task__/AverageReturn -36.4736 +__unnamed_task__/Iteration 20 +__unnamed_task__/MaxReturn 10.8748 +__unnamed_task__/MinReturn -58.6644 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.81001 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.47497 +policy/KL 0.00724772 +policy/KLBefore 0 +policy/LossAfter -0.0262413 +policy/LossBefore -0.000288123 +policy/dLoss 0.0259532 +---------------------------------------- ---------------- +2025-04-02 14:57:58 | [rl2_trainer] epoch #21 | Optimizing policy... +2025-04-02 14:57:59 | [rl2_trainer] epoch #21 | Fitting baseline... +2025-04-02 14:57:59 | [rl2_trainer] epoch #21 | Computing loss before +2025-04-02 14:57:59 | [rl2_trainer] epoch #21 | Computing KL before +2025-04-02 14:58:00 | [rl2_trainer] epoch #21 | Optimizing +2025-04-02 14:58:34 | [rl2_trainer] epoch #21 | Computing KL after +2025-04-02 14:58:34 | [rl2_trainer] epoch #21 | Computing loss after +2025-04-02 14:58:35 | [rl2_trainer] epoch #21 | Saving snapshot... +2025-04-02 14:58:35 | [rl2_trainer] epoch #21 | Saved +2025-04-02 14:58:35 | [rl2_trainer] epoch #21 | Time 3996.30 s +2025-04-02 14:58:35 | [rl2_trainer] epoch #21 | EpochTime 204.07 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -24.2006 +Average/AverageReturn -39.5749 +Average/Iteration 21 +Average/MaxReturn -11.2383 +Average/MinReturn -65.8664 +Average/NumEpisodes 100 +Average/StdReturn 8.46646 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.429813 +TotalEnvSteps 220000 +__unnamed_task__/AverageDiscountedReturn -24.2006 +__unnamed_task__/AverageReturn -39.5749 +__unnamed_task__/Iteration 21 +__unnamed_task__/MaxReturn -11.2383 +__unnamed_task__/MinReturn -65.8664 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.46646 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.44339 +policy/KL 0.00674126 +policy/KLBefore 0 +policy/LossAfter -0.0163876 +policy/LossBefore -0.00319566 +policy/dLoss 0.0131919 +---------------------------------------- --------------- +2025-04-02 14:59:55 | [rl2_trainer] epoch #22 | Optimizing policy... +2025-04-02 14:59:55 | [rl2_trainer] epoch #22 | Fitting baseline... +2025-04-02 14:59:55 | [rl2_trainer] epoch #22 | Computing loss before +2025-04-02 14:59:56 | [rl2_trainer] epoch #22 | Computing KL before +2025-04-02 14:59:56 | [rl2_trainer] epoch #22 | Optimizing +2025-04-02 15:00:30 | [rl2_trainer] epoch #22 | Computing KL after +2025-04-02 15:00:31 | [rl2_trainer] epoch #22 | Computing loss after +2025-04-02 15:00:31 | [rl2_trainer] epoch #22 | Saving snapshot... +2025-04-02 15:00:31 | [rl2_trainer] epoch #22 | Saved +2025-04-02 15:00:31 | [rl2_trainer] epoch #22 | Time 4112.51 s +2025-04-02 15:00:31 | [rl2_trainer] epoch #22 | EpochTime 116.20 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -26.0409 +Average/AverageReturn -42.4021 +Average/Iteration 22 +Average/MaxReturn -24.8895 +Average/MinReturn -66.5793 +Average/NumEpisodes 100 +Average/StdReturn 8.55257 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.654446 +TotalEnvSteps 230000 +__unnamed_task__/AverageDiscountedReturn -26.0409 +__unnamed_task__/AverageReturn -42.4021 +__unnamed_task__/Iteration 22 +__unnamed_task__/MaxReturn -24.8895 +__unnamed_task__/MinReturn -66.5793 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.55257 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.42966 +policy/KL 0.00684236 +policy/KLBefore 0 +policy/LossAfter -0.0223427 +policy/LossBefore -0.00481087 +policy/dLoss 0.0175319 +---------------------------------------- --------------- +2025-04-02 15:03:41 | [rl2_trainer] epoch #23 | Optimizing policy... +2025-04-02 15:03:41 | [rl2_trainer] epoch #23 | Fitting baseline... +2025-04-02 15:03:41 | [rl2_trainer] epoch #23 | Computing loss before +2025-04-02 15:03:42 | [rl2_trainer] epoch #23 | Computing KL before +2025-04-02 15:03:43 | [rl2_trainer] epoch #23 | Optimizing +2025-04-02 15:04:19 | [rl2_trainer] epoch #23 | Computing KL after +2025-04-02 15:04:20 | [rl2_trainer] epoch #23 | Computing loss after +2025-04-02 15:04:21 | [rl2_trainer] epoch #23 | Saving snapshot... +2025-04-02 15:04:21 | [rl2_trainer] epoch #23 | Saved +2025-04-02 15:04:21 | [rl2_trainer] epoch #23 | Time 4341.72 s +2025-04-02 15:04:21 | [rl2_trainer] epoch #23 | EpochTime 229.21 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -21.7479 +Average/AverageReturn -35.6639 +Average/Iteration 23 +Average/MaxReturn -1.08257 +Average/MinReturn -80.9149 +Average/NumEpisodes 100 +Average/StdReturn 11.8024 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.454688 +TotalEnvSteps 240000 +__unnamed_task__/AverageDiscountedReturn -21.7479 +__unnamed_task__/AverageReturn -35.6639 +__unnamed_task__/Iteration 23 +__unnamed_task__/MaxReturn -1.08257 +__unnamed_task__/MinReturn -80.9149 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.8024 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.42477 +policy/KL 0.0108617 +policy/KLBefore 0 +policy/LossAfter -0.0590277 +policy/LossBefore -0.00844553 +policy/dLoss 0.0505821 +---------------------------------------- --------------- +2025-04-02 15:05:47 | [rl2_trainer] epoch #24 | Optimizing policy... +2025-04-02 15:05:48 | [rl2_trainer] epoch #24 | Fitting baseline... +2025-04-02 15:05:48 | [rl2_trainer] epoch #24 | Computing loss before +2025-04-02 15:05:48 | [rl2_trainer] epoch #24 | Computing KL before +2025-04-02 15:05:49 | [rl2_trainer] epoch #24 | Optimizing +2025-04-02 15:06:25 | [rl2_trainer] epoch #24 | Computing KL after +2025-04-02 15:06:25 | [rl2_trainer] epoch #24 | Computing loss after +2025-04-02 15:06:26 | [rl2_trainer] epoch #24 | Saving snapshot... +2025-04-02 15:06:26 | [rl2_trainer] epoch #24 | Saved +2025-04-02 15:06:26 | [rl2_trainer] epoch #24 | Time 4467.17 s +2025-04-02 15:06:26 | [rl2_trainer] epoch #24 | EpochTime 125.45 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -26.5276 +Average/AverageReturn -43.367 +Average/Iteration 24 +Average/MaxReturn -17.6346 +Average/MinReturn -91.3772 +Average/NumEpisodes 100 +Average/StdReturn 10.7482 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.72307 +TotalEnvSteps 250000 +__unnamed_task__/AverageDiscountedReturn -26.5276 +__unnamed_task__/AverageReturn -43.367 +__unnamed_task__/Iteration 24 +__unnamed_task__/MaxReturn -17.6346 +__unnamed_task__/MinReturn -91.3772 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.7482 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.42528 +policy/KL 0.0065928 +policy/KLBefore 0 +policy/LossAfter -0.0301787 +policy/LossBefore -0.00538608 +policy/dLoss 0.0247926 +---------------------------------------- --------------- +2025-04-02 15:08:36 | [rl2_trainer] epoch #25 | Optimizing policy... +2025-04-02 15:08:36 | [rl2_trainer] epoch #25 | Fitting baseline... +2025-04-02 15:08:36 | [rl2_trainer] epoch #25 | Computing loss before +2025-04-02 15:08:37 | [rl2_trainer] epoch #25 | Computing KL before +2025-04-02 15:08:38 | [rl2_trainer] epoch #25 | Optimizing +2025-04-02 15:09:13 | [rl2_trainer] epoch #25 | Computing KL after +2025-04-02 15:09:14 | [rl2_trainer] epoch #25 | Computing loss after +2025-04-02 15:09:15 | [rl2_trainer] epoch #25 | Saving snapshot... +2025-04-02 15:09:15 | [rl2_trainer] epoch #25 | Saved +2025-04-02 15:09:15 | [rl2_trainer] epoch #25 | Time 4636.09 s +2025-04-02 15:09:15 | [rl2_trainer] epoch #25 | EpochTime 168.92 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -23.2811 +Average/AverageReturn -38.0307 +Average/Iteration 25 +Average/MaxReturn -22.8977 +Average/MinReturn -63.8816 +Average/NumEpisodes 100 +Average/StdReturn 7.15353 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.339911 +TotalEnvSteps 260000 +__unnamed_task__/AverageDiscountedReturn -23.2811 +__unnamed_task__/AverageReturn -38.0307 +__unnamed_task__/Iteration 25 +__unnamed_task__/MaxReturn -22.8977 +__unnamed_task__/MinReturn -63.8816 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.15353 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.40396 +policy/KL 0.00574222 +policy/KLBefore 0 +policy/LossAfter -0.0282824 +policy/LossBefore -0.00789727 +policy/dLoss 0.0203851 +---------------------------------------- --------------- +2025-04-02 15:11:25 | [rl2_trainer] epoch #26 | Optimizing policy... +2025-04-02 15:11:25 | [rl2_trainer] epoch #26 | Fitting baseline... +2025-04-02 15:11:25 | [rl2_trainer] epoch #26 | Computing loss before +2025-04-02 15:11:26 | [rl2_trainer] epoch #26 | Computing KL before +2025-04-02 15:11:27 | [rl2_trainer] epoch #26 | Optimizing +2025-04-02 15:12:02 | [rl2_trainer] epoch #26 | Computing KL after +2025-04-02 15:12:03 | [rl2_trainer] epoch #26 | Computing loss after +2025-04-02 15:12:04 | [rl2_trainer] epoch #26 | Saving snapshot... +2025-04-02 15:12:04 | [rl2_trainer] epoch #26 | Saved +2025-04-02 15:12:04 | [rl2_trainer] epoch #26 | Time 4804.67 s +2025-04-02 15:12:04 | [rl2_trainer] epoch #26 | EpochTime 168.57 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -26.2733 +Average/AverageReturn -43.2476 +Average/Iteration 26 +Average/MaxReturn -25.2908 +Average/MinReturn -71.5794 +Average/NumEpisodes 100 +Average/StdReturn 8.8849 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.609174 +TotalEnvSteps 270000 +__unnamed_task__/AverageDiscountedReturn -26.2733 +__unnamed_task__/AverageReturn -43.2476 +__unnamed_task__/Iteration 26 +__unnamed_task__/MaxReturn -25.2908 +__unnamed_task__/MinReturn -71.5794 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.8849 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.39067 +policy/KL 0.00675703 +policy/KLBefore 0 +policy/LossAfter -0.0234823 +policy/LossBefore -0.00747332 +policy/dLoss 0.016009 +---------------------------------------- --------------- +2025-04-02 15:14:03 | [rl2_trainer] epoch #27 | Optimizing policy... +2025-04-02 15:14:04 | [rl2_trainer] epoch #27 | Fitting baseline... +2025-04-02 15:14:04 | [rl2_trainer] epoch #27 | Computing loss before +2025-04-02 15:14:04 | [rl2_trainer] epoch #27 | Computing KL before +2025-04-02 15:14:05 | [rl2_trainer] epoch #27 | Optimizing +2025-04-02 15:14:42 | [rl2_trainer] epoch #27 | Computing KL after +2025-04-02 15:14:42 | [rl2_trainer] epoch #27 | Computing loss after +2025-04-02 15:14:43 | [rl2_trainer] epoch #27 | Saving snapshot... +2025-04-02 15:14:43 | [rl2_trainer] epoch #27 | Saved +2025-04-02 15:14:43 | [rl2_trainer] epoch #27 | Time 4964.20 s +2025-04-02 15:14:43 | [rl2_trainer] epoch #27 | EpochTime 159.52 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -23.6716 +Average/AverageReturn -37.8464 +Average/Iteration 27 +Average/MaxReturn -21.1313 +Average/MinReturn -51.7164 +Average/NumEpisodes 100 +Average/StdReturn 6.4073 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.525444 +TotalEnvSteps 280000 +__unnamed_task__/AverageDiscountedReturn -23.6716 +__unnamed_task__/AverageReturn -37.8464 +__unnamed_task__/Iteration 27 +__unnamed_task__/MaxReturn -21.1313 +__unnamed_task__/MinReturn -51.7164 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.4073 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.37842 +policy/KL 0.00986823 +policy/KLBefore 0 +policy/LossAfter -0.0153769 +policy/LossBefore -0.00291173 +policy/dLoss 0.0124652 +---------------------------------------- --------------- +2025-04-02 15:17:21 | [rl2_trainer] epoch #28 | Optimizing policy... +2025-04-02 15:17:22 | [rl2_trainer] epoch #28 | Fitting baseline... +2025-04-02 15:17:22 | [rl2_trainer] epoch #28 | Computing loss before +2025-04-02 15:17:22 | [rl2_trainer] epoch #28 | Computing KL before +2025-04-02 15:17:23 | [rl2_trainer] epoch #28 | Optimizing +2025-04-02 15:17:59 | [rl2_trainer] epoch #28 | Computing KL after +2025-04-02 15:17:59 | [rl2_trainer] epoch #28 | Computing loss after +2025-04-02 15:18:00 | [rl2_trainer] epoch #28 | Saving snapshot... +2025-04-02 15:18:00 | [rl2_trainer] epoch #28 | Saved +2025-04-02 15:18:00 | [rl2_trainer] epoch #28 | Time 5161.26 s +2025-04-02 15:18:00 | [rl2_trainer] epoch #28 | EpochTime 197.06 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -23.9727 +Average/AverageReturn -39.6488 +Average/Iteration 28 +Average/MaxReturn -13.3356 +Average/MinReturn -103.13 +Average/NumEpisodes 100 +Average/StdReturn 12.509 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.687524 +TotalEnvSteps 290000 +__unnamed_task__/AverageDiscountedReturn -23.9727 +__unnamed_task__/AverageReturn -39.6488 +__unnamed_task__/Iteration 28 +__unnamed_task__/MaxReturn -13.3356 +__unnamed_task__/MinReturn -103.13 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.509 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.36217 +policy/KL 0.00759181 +policy/KLBefore 0 +policy/LossAfter -0.0483738 +policy/LossBefore -0.00768642 +policy/dLoss 0.0406873 +---------------------------------------- --------------- +2025-04-02 15:20:55 | [rl2_trainer] epoch #29 | Optimizing policy... +2025-04-02 15:20:55 | [rl2_trainer] epoch #29 | Fitting baseline... +2025-04-02 15:20:55 | [rl2_trainer] epoch #29 | Computing loss before +2025-04-02 15:20:56 | [rl2_trainer] epoch #29 | Computing KL before +2025-04-02 15:20:56 | [rl2_trainer] epoch #29 | Optimizing +2025-04-02 15:21:32 | [rl2_trainer] epoch #29 | Computing KL after +2025-04-02 15:21:32 | [rl2_trainer] epoch #29 | Computing loss after +2025-04-02 15:21:33 | [rl2_trainer] epoch #29 | Saving snapshot... +2025-04-02 15:21:33 | [rl2_trainer] epoch #29 | Saved +2025-04-02 15:21:33 | [rl2_trainer] epoch #29 | Time 5374.36 s +2025-04-02 15:21:33 | [rl2_trainer] epoch #29 | EpochTime 213.09 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -23.2723 +Average/AverageReturn -37.8738 +Average/Iteration 29 +Average/MaxReturn -16.5568 +Average/MinReturn -58.4465 +Average/NumEpisodes 100 +Average/StdReturn 7.42276 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.410202 +TotalEnvSteps 300000 +__unnamed_task__/AverageDiscountedReturn -23.2723 +__unnamed_task__/AverageReturn -37.8738 +__unnamed_task__/Iteration 29 +__unnamed_task__/MaxReturn -16.5568 +__unnamed_task__/MinReturn -58.4465 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.42276 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.356 +policy/KL 0.00911842 +policy/KLBefore 0 +policy/LossAfter -0.0169873 +policy/LossBefore 0.00925489 +policy/dLoss 0.0262422 +---------------------------------------- --------------- +2025-04-02 15:23:45 | [rl2_trainer] epoch #30 | Optimizing policy... +2025-04-02 15:23:45 | [rl2_trainer] epoch #30 | Fitting baseline... +2025-04-02 15:23:45 | [rl2_trainer] epoch #30 | Computing loss before +2025-04-02 15:23:46 | [rl2_trainer] epoch #30 | Computing KL before +2025-04-02 15:23:46 | [rl2_trainer] epoch #30 | Optimizing +2025-04-02 15:24:21 | [rl2_trainer] epoch #30 | Computing KL after +2025-04-02 15:24:22 | [rl2_trainer] epoch #30 | Computing loss after +2025-04-02 15:24:23 | [rl2_trainer] epoch #30 | Saving snapshot... +2025-04-02 15:24:23 | [rl2_trainer] epoch #30 | Saved +2025-04-02 15:24:23 | [rl2_trainer] epoch #30 | Time 5543.92 s +2025-04-02 15:24:23 | [rl2_trainer] epoch #30 | EpochTime 169.56 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.2548 +Average/AverageReturn -34.2766 +Average/Iteration 30 +Average/MaxReturn -6.584 +Average/MinReturn -56.7853 +Average/NumEpisodes 100 +Average/StdReturn 8.70768 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.500943 +TotalEnvSteps 310000 +__unnamed_task__/AverageDiscountedReturn -21.2548 +__unnamed_task__/AverageReturn -34.2766 +__unnamed_task__/Iteration 30 +__unnamed_task__/MaxReturn -6.584 +__unnamed_task__/MinReturn -56.7853 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.70768 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.35655 +policy/KL 0.0060208 +policy/KLBefore 0 +policy/LossAfter -0.026028 +policy/LossBefore 0.000737781 +policy/dLoss 0.0267658 +---------------------------------------- ---------------- +2025-04-02 15:26:40 | [rl2_trainer] epoch #31 | Optimizing policy... +2025-04-02 15:26:40 | [rl2_trainer] epoch #31 | Fitting baseline... +2025-04-02 15:26:40 | [rl2_trainer] epoch #31 | Computing loss before +2025-04-02 15:26:41 | [rl2_trainer] epoch #31 | Computing KL before +2025-04-02 15:26:41 | [rl2_trainer] epoch #31 | Optimizing +2025-04-02 15:27:15 | [rl2_trainer] epoch #31 | Computing KL after +2025-04-02 15:27:15 | [rl2_trainer] epoch #31 | Computing loss after +2025-04-02 15:27:16 | [rl2_trainer] epoch #31 | Saving snapshot... +2025-04-02 15:27:16 | [rl2_trainer] epoch #31 | Saved +2025-04-02 15:27:16 | [rl2_trainer] epoch #31 | Time 5717.27 s +2025-04-02 15:27:16 | [rl2_trainer] epoch #31 | EpochTime 173.35 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -24.031 +Average/AverageReturn -39.4002 +Average/Iteration 31 +Average/MaxReturn -15.3182 +Average/MinReturn -111.577 +Average/NumEpisodes 100 +Average/StdReturn 12.4285 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.557705 +TotalEnvSteps 320000 +__unnamed_task__/AverageDiscountedReturn -24.031 +__unnamed_task__/AverageReturn -39.4002 +__unnamed_task__/Iteration 31 +__unnamed_task__/MaxReturn -15.3182 +__unnamed_task__/MinReturn -111.577 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.4285 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.33832 +policy/KL 0.0071043 +policy/KLBefore 0 +policy/LossAfter -0.0278998 +policy/LossBefore 0.00275769 +policy/dLoss 0.0306575 +---------------------------------------- --------------- +2025-04-02 15:31:38 | [rl2_trainer] epoch #32 | Optimizing policy... +2025-04-02 15:31:38 | [rl2_trainer] epoch #32 | Fitting baseline... +2025-04-02 15:31:38 | [rl2_trainer] epoch #32 | Computing loss before +2025-04-02 15:31:38 | [rl2_trainer] epoch #32 | Computing KL before +2025-04-02 15:31:39 | [rl2_trainer] epoch #32 | Optimizing +2025-04-02 15:32:15 | [rl2_trainer] epoch #32 | Computing KL after +2025-04-02 15:32:16 | [rl2_trainer] epoch #32 | Computing loss after +2025-04-02 15:32:16 | [rl2_trainer] epoch #32 | Saving snapshot... +2025-04-02 15:32:16 | [rl2_trainer] epoch #32 | Saved +2025-04-02 15:32:16 | [rl2_trainer] epoch #32 | Time 6017.44 s +2025-04-02 15:32:16 | [rl2_trainer] epoch #32 | EpochTime 300.16 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.0858 +Average/AverageReturn -36.2014 +Average/Iteration 32 +Average/MaxReturn -16.8098 +Average/MinReturn -88.4467 +Average/NumEpisodes 100 +Average/StdReturn 10.4837 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.500094 +TotalEnvSteps 330000 +__unnamed_task__/AverageDiscountedReturn -22.0858 +__unnamed_task__/AverageReturn -36.2014 +__unnamed_task__/Iteration 32 +__unnamed_task__/MaxReturn -16.8098 +__unnamed_task__/MinReturn -88.4467 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.4837 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.31966 +policy/KL 0.0122125 +policy/KLBefore 0 +policy/LossAfter -0.0407789 +policy/LossBefore -0.00676465 +policy/dLoss 0.0340142 +---------------------------------------- --------------- +2025-04-02 15:35:12 | [rl2_trainer] epoch #33 | Optimizing policy... +2025-04-02 15:35:12 | [rl2_trainer] epoch #33 | Fitting baseline... +2025-04-02 15:35:12 | [rl2_trainer] epoch #33 | Computing loss before +2025-04-02 15:35:13 | [rl2_trainer] epoch #33 | Computing KL before +2025-04-02 15:35:14 | [rl2_trainer] epoch #33 | Optimizing +2025-04-02 15:35:50 | [rl2_trainer] epoch #33 | Computing KL after +2025-04-02 15:35:50 | [rl2_trainer] epoch #33 | Computing loss after +2025-04-02 15:35:51 | [rl2_trainer] epoch #33 | Saving snapshot... +2025-04-02 15:35:51 | [rl2_trainer] epoch #33 | Saved +2025-04-02 15:35:51 | [rl2_trainer] epoch #33 | Time 6232.37 s +2025-04-02 15:35:51 | [rl2_trainer] epoch #33 | EpochTime 214.93 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -21.5411 +Average/AverageReturn -34.9727 +Average/Iteration 33 +Average/MaxReturn -19.7934 +Average/MinReturn -55.4603 +Average/NumEpisodes 100 +Average/StdReturn 7.21257 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.468388 +TotalEnvSteps 340000 +__unnamed_task__/AverageDiscountedReturn -21.5411 +__unnamed_task__/AverageReturn -34.9727 +__unnamed_task__/Iteration 33 +__unnamed_task__/MaxReturn -19.7934 +__unnamed_task__/MinReturn -55.4603 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.21257 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.2967 +policy/KL 0.00493688 +policy/KLBefore 0 +policy/LossAfter -0.0144095 +policy/LossBefore -0.00509355 +policy/dLoss 0.00931593 +---------------------------------------- --------------- +2025-04-02 15:38:20 | [rl2_trainer] epoch #34 | Optimizing policy... +2025-04-02 15:38:21 | [rl2_trainer] epoch #34 | Fitting baseline... +2025-04-02 15:38:21 | [rl2_trainer] epoch #34 | Computing loss before +2025-04-02 15:38:21 | [rl2_trainer] epoch #34 | Computing KL before +2025-04-02 15:38:22 | [rl2_trainer] epoch #34 | Optimizing +2025-04-02 15:38:58 | [rl2_trainer] epoch #34 | Computing KL after +2025-04-02 15:38:59 | [rl2_trainer] epoch #34 | Computing loss after +2025-04-02 15:39:00 | [rl2_trainer] epoch #34 | Saving snapshot... +2025-04-02 15:39:00 | [rl2_trainer] epoch #34 | Saved +2025-04-02 15:39:00 | [rl2_trainer] epoch #34 | Time 6420.60 s +2025-04-02 15:39:00 | [rl2_trainer] epoch #34 | EpochTime 188.22 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.9418 +Average/AverageReturn -33.6795 +Average/Iteration 34 +Average/MaxReturn -17.832 +Average/MinReturn -57.3806 +Average/NumEpisodes 100 +Average/StdReturn 7.50676 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.610695 +TotalEnvSteps 350000 +__unnamed_task__/AverageDiscountedReturn -20.9418 +__unnamed_task__/AverageReturn -33.6795 +__unnamed_task__/Iteration 34 +__unnamed_task__/MaxReturn -17.832 +__unnamed_task__/MinReturn -57.3806 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.50676 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.27127 +policy/KL 0.00948179 +policy/KLBefore 0 +policy/LossAfter -0.0205738 +policy/LossBefore -0.00179242 +policy/dLoss 0.0187814 +---------------------------------------- --------------- +2025-04-02 15:41:09 | [rl2_trainer] epoch #35 | Optimizing policy... +2025-04-02 15:41:09 | [rl2_trainer] epoch #35 | Fitting baseline... +2025-04-02 15:41:09 | [rl2_trainer] epoch #35 | Computing loss before +2025-04-02 15:41:10 | [rl2_trainer] epoch #35 | Computing KL before +2025-04-02 15:41:11 | [rl2_trainer] epoch #35 | Optimizing +2025-04-02 15:41:47 | [rl2_trainer] epoch #35 | Computing KL after +2025-04-02 15:41:48 | [rl2_trainer] epoch #35 | Computing loss after +2025-04-02 15:41:49 | [rl2_trainer] epoch #35 | Saving snapshot... +2025-04-02 15:41:49 | [rl2_trainer] epoch #35 | Saved +2025-04-02 15:41:49 | [rl2_trainer] epoch #35 | Time 6589.60 s +2025-04-02 15:41:49 | [rl2_trainer] epoch #35 | EpochTime 169.01 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -23.4459 +Average/AverageReturn -38.2362 +Average/Iteration 35 +Average/MaxReturn -23.7455 +Average/MinReturn -70.3099 +Average/NumEpisodes 100 +Average/StdReturn 9.2089 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.550229 +TotalEnvSteps 360000 +__unnamed_task__/AverageDiscountedReturn -23.4459 +__unnamed_task__/AverageReturn -38.2362 +__unnamed_task__/Iteration 35 +__unnamed_task__/MaxReturn -23.7455 +__unnamed_task__/MinReturn -70.3099 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.2089 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.25627 +policy/KL 0.0107916 +policy/KLBefore 0 +policy/LossAfter -0.0254097 +policy/LossBefore 0.00085625 +policy/dLoss 0.0262659 +---------------------------------------- --------------- +2025-04-02 15:45:19 | [rl2_trainer] epoch #36 | Optimizing policy... +2025-04-02 15:45:19 | [rl2_trainer] epoch #36 | Fitting baseline... +2025-04-02 15:45:19 | [rl2_trainer] epoch #36 | Computing loss before +2025-04-02 15:45:20 | [rl2_trainer] epoch #36 | Computing KL before +2025-04-02 15:45:20 | [rl2_trainer] epoch #36 | Optimizing +2025-04-02 15:45:54 | [rl2_trainer] epoch #36 | Computing KL after +2025-04-02 15:45:54 | [rl2_trainer] epoch #36 | Computing loss after +2025-04-02 15:45:55 | [rl2_trainer] epoch #36 | Saving snapshot... +2025-04-02 15:45:55 | [rl2_trainer] epoch #36 | Saved +2025-04-02 15:45:55 | [rl2_trainer] epoch #36 | Time 6836.13 s +2025-04-02 15:45:55 | [rl2_trainer] epoch #36 | EpochTime 246.52 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.3832 +Average/AverageReturn -36.3905 +Average/Iteration 36 +Average/MaxReturn -20.7774 +Average/MinReturn -55.9657 +Average/NumEpisodes 100 +Average/StdReturn 8.62114 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.619407 +TotalEnvSteps 370000 +__unnamed_task__/AverageDiscountedReturn -22.3832 +__unnamed_task__/AverageReturn -36.3905 +__unnamed_task__/Iteration 36 +__unnamed_task__/MaxReturn -20.7774 +__unnamed_task__/MinReturn -55.9657 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.62114 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.2441 +policy/KL 0.00874318 +policy/KLBefore 0 +policy/LossAfter -0.0492848 +policy/LossBefore -0.0152499 +policy/dLoss 0.0340348 +---------------------------------------- --------------- +2025-04-02 15:48:02 | [rl2_trainer] epoch #37 | Optimizing policy... +2025-04-02 15:48:02 | [rl2_trainer] epoch #37 | Fitting baseline... +2025-04-02 15:48:02 | [rl2_trainer] epoch #37 | Computing loss before +2025-04-02 15:48:03 | [rl2_trainer] epoch #37 | Computing KL before +2025-04-02 15:48:03 | [rl2_trainer] epoch #37 | Optimizing +2025-04-02 15:48:41 | [rl2_trainer] epoch #37 | Computing KL after +2025-04-02 15:48:41 | [rl2_trainer] epoch #37 | Computing loss after +2025-04-02 15:48:42 | [rl2_trainer] epoch #37 | Saving snapshot... +2025-04-02 15:48:42 | [rl2_trainer] epoch #37 | Saved +2025-04-02 15:48:42 | [rl2_trainer] epoch #37 | Time 7003.28 s +2025-04-02 15:48:42 | [rl2_trainer] epoch #37 | EpochTime 167.15 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -21.4343 +Average/AverageReturn -34.1446 +Average/Iteration 37 +Average/MaxReturn -16.8109 +Average/MinReturn -60.4424 +Average/NumEpisodes 100 +Average/StdReturn 8.4874 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.717673 +TotalEnvSteps 380000 +__unnamed_task__/AverageDiscountedReturn -21.4343 +__unnamed_task__/AverageReturn -34.1446 +__unnamed_task__/Iteration 37 +__unnamed_task__/MaxReturn -16.8109 +__unnamed_task__/MinReturn -60.4424 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.4874 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.24124 +policy/KL 0.00690851 +policy/KLBefore 0 +policy/LossAfter -0.019335 +policy/LossBefore 0.00158842 +policy/dLoss 0.0209235 +---------------------------------------- --------------- +2025-04-02 15:50:41 | [rl2_trainer] epoch #38 | Optimizing policy... +2025-04-02 15:50:41 | [rl2_trainer] epoch #38 | Fitting baseline... +2025-04-02 15:50:41 | [rl2_trainer] epoch #38 | Computing loss before +2025-04-02 15:50:42 | [rl2_trainer] epoch #38 | Computing KL before +2025-04-02 15:50:42 | [rl2_trainer] epoch #38 | Optimizing +2025-04-02 15:51:18 | [rl2_trainer] epoch #38 | Computing KL after +2025-04-02 15:51:19 | [rl2_trainer] epoch #38 | Computing loss after +2025-04-02 15:51:20 | [rl2_trainer] epoch #38 | Saving snapshot... +2025-04-02 15:51:20 | [rl2_trainer] epoch #38 | Saved +2025-04-02 15:51:20 | [rl2_trainer] epoch #38 | Time 7161.08 s +2025-04-02 15:51:20 | [rl2_trainer] epoch #38 | EpochTime 157.80 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.8164 +Average/AverageReturn -36.4722 +Average/Iteration 38 +Average/MaxReturn -21.2703 +Average/MinReturn -63.2258 +Average/NumEpisodes 100 +Average/StdReturn 7.85908 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.209517 +TotalEnvSteps 390000 +__unnamed_task__/AverageDiscountedReturn -22.8164 +__unnamed_task__/AverageReturn -36.4722 +__unnamed_task__/Iteration 38 +__unnamed_task__/MaxReturn -21.2703 +__unnamed_task__/MinReturn -63.2258 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.85908 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.22615 +policy/KL 0.00642292 +policy/KLBefore 0 +policy/LossAfter -0.0287954 +policy/LossBefore -0.00162 +policy/dLoss 0.0271754 +---------------------------------------- --------------- +2025-04-02 15:53:58 | [rl2_trainer] epoch #39 | Optimizing policy... +2025-04-02 15:53:58 | [rl2_trainer] epoch #39 | Fitting baseline... +2025-04-02 15:53:58 | [rl2_trainer] epoch #39 | Computing loss before +2025-04-02 15:53:59 | [rl2_trainer] epoch #39 | Computing KL before +2025-04-02 15:53:59 | [rl2_trainer] epoch #39 | Optimizing +2025-04-02 15:54:36 | [rl2_trainer] epoch #39 | Computing KL after +2025-04-02 15:54:36 | [rl2_trainer] epoch #39 | Computing loss after +2025-04-02 15:54:37 | [rl2_trainer] epoch #39 | Saving snapshot... +2025-04-02 15:54:37 | [rl2_trainer] epoch #39 | Saved +2025-04-02 15:54:37 | [rl2_trainer] epoch #39 | Time 7358.11 s +2025-04-02 15:54:37 | [rl2_trainer] epoch #39 | EpochTime 197.03 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.0946 +Average/AverageReturn -35.9648 +Average/Iteration 39 +Average/MaxReturn -13.2903 +Average/MinReturn -71.8184 +Average/NumEpisodes 100 +Average/StdReturn 9.58589 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.485208 +TotalEnvSteps 400000 +__unnamed_task__/AverageDiscountedReturn -22.0946 +__unnamed_task__/AverageReturn -35.9648 +__unnamed_task__/Iteration 39 +__unnamed_task__/MaxReturn -13.2903 +__unnamed_task__/MinReturn -71.8184 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.58589 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.20596 +policy/KL 0.00923267 +policy/KLBefore 0 +policy/LossAfter -0.0355275 +policy/LossBefore -0.00210495 +policy/dLoss 0.0334226 +---------------------------------------- --------------- +2025-04-02 15:57:18 | [rl2_trainer] epoch #40 | Optimizing policy... +2025-04-02 15:57:18 | [rl2_trainer] epoch #40 | Fitting baseline... +2025-04-02 15:57:18 | [rl2_trainer] epoch #40 | Computing loss before +2025-04-02 15:57:19 | [rl2_trainer] epoch #40 | Computing KL before +2025-04-02 15:57:20 | [rl2_trainer] epoch #40 | Optimizing +2025-04-02 15:57:56 | [rl2_trainer] epoch #40 | Computing KL after +2025-04-02 15:57:56 | [rl2_trainer] epoch #40 | Computing loss after +2025-04-02 15:57:57 | [rl2_trainer] epoch #40 | Saving snapshot... +2025-04-02 15:57:57 | [rl2_trainer] epoch #40 | Saved +2025-04-02 15:57:57 | [rl2_trainer] epoch #40 | Time 7558.09 s +2025-04-02 15:57:57 | [rl2_trainer] epoch #40 | EpochTime 199.98 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.1108 +Average/AverageReturn -35.7815 +Average/Iteration 40 +Average/MaxReturn -17.5306 +Average/MinReturn -55.9392 +Average/NumEpisodes 100 +Average/StdReturn 8.7126 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.542939 +TotalEnvSteps 410000 +__unnamed_task__/AverageDiscountedReturn -22.1108 +__unnamed_task__/AverageReturn -35.7815 +__unnamed_task__/Iteration 40 +__unnamed_task__/MaxReturn -17.5306 +__unnamed_task__/MinReturn -55.9392 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.7126 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.1881 +policy/KL 0.00711199 +policy/KLBefore 0 +policy/LossAfter -0.0208128 +policy/LossBefore 0.00443793 +policy/dLoss 0.0252508 +---------------------------------------- --------------- +2025-04-02 16:00:08 | [rl2_trainer] epoch #41 | Optimizing policy... +2025-04-02 16:00:08 | [rl2_trainer] epoch #41 | Fitting baseline... +2025-04-02 16:00:08 | [rl2_trainer] epoch #41 | Computing loss before +2025-04-02 16:00:09 | [rl2_trainer] epoch #41 | Computing KL before +2025-04-02 16:00:09 | [rl2_trainer] epoch #41 | Optimizing +2025-04-02 16:00:44 | [rl2_trainer] epoch #41 | Computing KL after +2025-04-02 16:00:45 | [rl2_trainer] epoch #41 | Computing loss after +2025-04-02 16:00:45 | [rl2_trainer] epoch #41 | Saving snapshot... +2025-04-02 16:00:45 | [rl2_trainer] epoch #41 | Saved +2025-04-02 16:00:45 | [rl2_trainer] epoch #41 | Time 7726.41 s +2025-04-02 16:00:45 | [rl2_trainer] epoch #41 | EpochTime 168.31 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -22.0245 +Average/AverageReturn -35.2514 +Average/Iteration 41 +Average/MaxReturn -3.25668 +Average/MinReturn -58.5169 +Average/NumEpisodes 100 +Average/StdReturn 8.25972 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.553227 +TotalEnvSteps 420000 +__unnamed_task__/AverageDiscountedReturn -22.0245 +__unnamed_task__/AverageReturn -35.2514 +__unnamed_task__/Iteration 41 +__unnamed_task__/MaxReturn -3.25668 +__unnamed_task__/MinReturn -58.5169 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.25972 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.18056 +policy/KL 0.01038 +policy/KLBefore 0 +policy/LossAfter -0.0303251 +policy/LossBefore -0.000674927 +policy/dLoss 0.0296502 +---------------------------------------- ---------------- +2025-04-02 16:03:55 | [rl2_trainer] epoch #42 | Optimizing policy... +2025-04-02 16:03:55 | [rl2_trainer] epoch #42 | Fitting baseline... +2025-04-02 16:03:55 | [rl2_trainer] epoch #42 | Computing loss before +2025-04-02 16:03:55 | [rl2_trainer] epoch #42 | Computing KL before +2025-04-02 16:03:56 | [rl2_trainer] epoch #42 | Optimizing +2025-04-02 16:04:28 | [rl2_trainer] epoch #42 | Computing KL after +2025-04-02 16:04:28 | [rl2_trainer] epoch #42 | Computing loss after +2025-04-02 16:04:29 | [rl2_trainer] epoch #42 | Saving snapshot... +2025-04-02 16:04:29 | [rl2_trainer] epoch #42 | Saved +2025-04-02 16:04:29 | [rl2_trainer] epoch #42 | Time 7950.14 s +2025-04-02 16:04:29 | [rl2_trainer] epoch #42 | EpochTime 223.73 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.2536 +Average/AverageReturn -32.6201 +Average/Iteration 42 +Average/MaxReturn -16.5254 +Average/MinReturn -50.069 +Average/NumEpisodes 100 +Average/StdReturn 6.55966 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.643794 +TotalEnvSteps 430000 +__unnamed_task__/AverageDiscountedReturn -20.2536 +__unnamed_task__/AverageReturn -32.6201 +__unnamed_task__/Iteration 42 +__unnamed_task__/MaxReturn -16.5254 +__unnamed_task__/MinReturn -50.069 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.55966 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.1602 +policy/KL 0.0086571 +policy/KLBefore 0 +policy/LossAfter -0.0143498 +policy/LossBefore -0.00178462 +policy/dLoss 0.0125652 +---------------------------------------- --------------- +2025-04-02 16:05:51 | [rl2_trainer] epoch #43 | Optimizing policy... +2025-04-02 16:05:51 | [rl2_trainer] epoch #43 | Fitting baseline... +2025-04-02 16:05:51 | [rl2_trainer] epoch #43 | Computing loss before +2025-04-02 16:05:52 | [rl2_trainer] epoch #43 | Computing KL before +2025-04-02 16:05:52 | [rl2_trainer] epoch #43 | Optimizing +2025-04-02 16:06:25 | [rl2_trainer] epoch #43 | Computing KL after +2025-04-02 16:06:25 | [rl2_trainer] epoch #43 | Computing loss after +2025-04-02 16:06:26 | [rl2_trainer] epoch #43 | Saving snapshot... +2025-04-02 16:06:26 | [rl2_trainer] epoch #43 | Saved +2025-04-02 16:06:26 | [rl2_trainer] epoch #43 | Time 8067.11 s +2025-04-02 16:06:26 | [rl2_trainer] epoch #43 | EpochTime 116.97 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -22.2032 +Average/AverageReturn -35.3831 +Average/Iteration 43 +Average/MaxReturn -12.4323 +Average/MinReturn -94.1863 +Average/NumEpisodes 100 +Average/StdReturn 9.59502 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.506093 +TotalEnvSteps 440000 +__unnamed_task__/AverageDiscountedReturn -22.2032 +__unnamed_task__/AverageReturn -35.3831 +__unnamed_task__/Iteration 43 +__unnamed_task__/MaxReturn -12.4323 +__unnamed_task__/MinReturn -94.1863 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.59502 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.12356 +policy/KL 0.00645735 +policy/KLBefore 0 +policy/LossAfter -0.0220101 +policy/LossBefore 0.000243257 +policy/dLoss 0.0222533 +---------------------------------------- ---------------- +2025-04-02 16:09:14 | [rl2_trainer] epoch #44 | Optimizing policy... +2025-04-02 16:09:15 | [rl2_trainer] epoch #44 | Fitting baseline... +2025-04-02 16:09:15 | [rl2_trainer] epoch #44 | Computing loss before +2025-04-02 16:09:15 | [rl2_trainer] epoch #44 | Computing KL before +2025-04-02 16:09:16 | [rl2_trainer] epoch #44 | Optimizing +2025-04-02 16:09:49 | [rl2_trainer] epoch #44 | Computing KL after +2025-04-02 16:09:49 | [rl2_trainer] epoch #44 | Computing loss after +2025-04-02 16:09:50 | [rl2_trainer] epoch #44 | Saving snapshot... +2025-04-02 16:09:50 | [rl2_trainer] epoch #44 | Saved +2025-04-02 16:09:50 | [rl2_trainer] epoch #44 | Time 8271.21 s +2025-04-02 16:09:50 | [rl2_trainer] epoch #44 | EpochTime 204.09 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.7249 +Average/AverageReturn -32.9647 +Average/Iteration 44 +Average/MaxReturn -19.9709 +Average/MinReturn -60.1459 +Average/NumEpisodes 100 +Average/StdReturn 7.83369 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.730356 +TotalEnvSteps 450000 +__unnamed_task__/AverageDiscountedReturn -20.7249 +__unnamed_task__/AverageReturn -32.9647 +__unnamed_task__/Iteration 44 +__unnamed_task__/MaxReturn -19.9709 +__unnamed_task__/MinReturn -60.1459 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.83369 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.09584 +policy/KL 0.00558401 +policy/KLBefore 0 +policy/LossAfter -0.0148741 +policy/LossBefore -0.00607455 +policy/dLoss 0.00879955 +---------------------------------------- --------------- +2025-04-02 16:12:09 | [rl2_trainer] epoch #45 | Optimizing policy... +2025-04-02 16:12:09 | [rl2_trainer] epoch #45 | Fitting baseline... +2025-04-02 16:12:09 | [rl2_trainer] epoch #45 | Computing loss before +2025-04-02 16:12:09 | [rl2_trainer] epoch #45 | Computing KL before +2025-04-02 16:12:10 | [rl2_trainer] epoch #45 | Optimizing +2025-04-02 16:12:44 | [rl2_trainer] epoch #45 | Computing KL after +2025-04-02 16:12:45 | [rl2_trainer] epoch #45 | Computing loss after +2025-04-02 16:12:46 | [rl2_trainer] epoch #45 | Saving snapshot... +2025-04-02 16:12:46 | [rl2_trainer] epoch #45 | Saved +2025-04-02 16:12:46 | [rl2_trainer] epoch #45 | Time 8446.68 s +2025-04-02 16:12:46 | [rl2_trainer] epoch #45 | EpochTime 175.46 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.063 +Average/AverageReturn -30.3855 +Average/Iteration 45 +Average/MaxReturn -14.5945 +Average/MinReturn -73.0616 +Average/NumEpisodes 100 +Average/StdReturn 8.71846 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.564455 +TotalEnvSteps 460000 +__unnamed_task__/AverageDiscountedReturn -19.063 +__unnamed_task__/AverageReturn -30.3855 +__unnamed_task__/Iteration 45 +__unnamed_task__/MaxReturn -14.5945 +__unnamed_task__/MinReturn -73.0616 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.71846 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.06833 +policy/KL 0.00950865 +policy/KLBefore 0 +policy/LossAfter -0.00769306 +policy/LossBefore 0.00961084 +policy/dLoss 0.0173039 +---------------------------------------- --------------- +2025-04-02 16:15:19 | [rl2_trainer] epoch #46 | Optimizing policy... +2025-04-02 16:15:20 | [rl2_trainer] epoch #46 | Fitting baseline... +2025-04-02 16:15:20 | [rl2_trainer] epoch #46 | Computing loss before +2025-04-02 16:15:20 | [rl2_trainer] epoch #46 | Computing KL before +2025-04-02 16:15:21 | [rl2_trainer] epoch #46 | Optimizing +2025-04-02 16:15:54 | [rl2_trainer] epoch #46 | Computing KL after +2025-04-02 16:15:54 | [rl2_trainer] epoch #46 | Computing loss after +2025-04-02 16:15:55 | [rl2_trainer] epoch #46 | Saving snapshot... +2025-04-02 16:15:55 | [rl2_trainer] epoch #46 | Saved +2025-04-02 16:15:55 | [rl2_trainer] epoch #46 | Time 8636.10 s +2025-04-02 16:15:55 | [rl2_trainer] epoch #46 | EpochTime 189.42 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -21.832 +Average/AverageReturn -35.5598 +Average/Iteration 46 +Average/MaxReturn -7.73067 +Average/MinReturn -55.4965 +Average/NumEpisodes 100 +Average/StdReturn 9.16179 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.593874 +TotalEnvSteps 470000 +__unnamed_task__/AverageDiscountedReturn -21.832 +__unnamed_task__/AverageReturn -35.5598 +__unnamed_task__/Iteration 46 +__unnamed_task__/MaxReturn -7.73067 +__unnamed_task__/MinReturn -55.4965 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.16179 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.04515 +policy/KL 0.00729048 +policy/KLBefore 0 +policy/LossAfter -0.0329573 +policy/LossBefore -0.00946688 +policy/dLoss 0.0234904 +---------------------------------------- --------------- +2025-04-02 16:18:44 | [rl2_trainer] epoch #47 | Optimizing policy... +2025-04-02 16:18:44 | [rl2_trainer] epoch #47 | Fitting baseline... +2025-04-02 16:18:44 | [rl2_trainer] epoch #47 | Computing loss before +2025-04-02 16:18:44 | [rl2_trainer] epoch #47 | Computing KL before +2025-04-02 16:18:45 | [rl2_trainer] epoch #47 | Optimizing +2025-04-02 16:19:18 | [rl2_trainer] epoch #47 | Computing KL after +2025-04-02 16:19:19 | [rl2_trainer] epoch #47 | Computing loss after +2025-04-02 16:19:20 | [rl2_trainer] epoch #47 | Saving snapshot... +2025-04-02 16:19:20 | [rl2_trainer] epoch #47 | Saved +2025-04-02 16:19:20 | [rl2_trainer] epoch #47 | Time 8840.79 s +2025-04-02 16:19:20 | [rl2_trainer] epoch #47 | EpochTime 204.69 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.7864 +Average/AverageReturn -29.669 +Average/Iteration 47 +Average/MaxReturn -5.88537 +Average/MinReturn -54.0225 +Average/NumEpisodes 100 +Average/StdReturn 8.47639 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.715987 +TotalEnvSteps 480000 +__unnamed_task__/AverageDiscountedReturn -18.7864 +__unnamed_task__/AverageReturn -29.669 +__unnamed_task__/Iteration 47 +__unnamed_task__/MaxReturn -5.88537 +__unnamed_task__/MinReturn -54.0225 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.47639 +__unnamed_task__/TerminationRate 0 +policy/Entropy 9.02152 +policy/KL 0.00798457 +policy/KLBefore 0 +policy/LossAfter -0.0085194 +policy/LossBefore 0.00502077 +policy/dLoss 0.0135402 +---------------------------------------- --------------- +2025-04-02 16:22:26 | [rl2_trainer] epoch #48 | Optimizing policy... +2025-04-02 16:22:27 | [rl2_trainer] epoch #48 | Fitting baseline... +2025-04-02 16:22:27 | [rl2_trainer] epoch #48 | Computing loss before +2025-04-02 16:22:27 | [rl2_trainer] epoch #48 | Computing KL before +2025-04-02 16:22:28 | [rl2_trainer] epoch #48 | Optimizing +2025-04-02 16:23:01 | [rl2_trainer] epoch #48 | Computing KL after +2025-04-02 16:23:02 | [rl2_trainer] epoch #48 | Computing loss after +2025-04-02 16:23:02 | [rl2_trainer] epoch #48 | Saving snapshot... +2025-04-02 16:23:02 | [rl2_trainer] epoch #48 | Saved +2025-04-02 16:23:02 | [rl2_trainer] epoch #48 | Time 9063.53 s +2025-04-02 16:23:02 | [rl2_trainer] epoch #48 | EpochTime 222.73 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.6169 +Average/AverageReturn -33.1845 +Average/Iteration 48 +Average/MaxReturn -2.30629 +Average/MinReturn -56.0413 +Average/NumEpisodes 100 +Average/StdReturn 8.3649 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.510364 +TotalEnvSteps 490000 +__unnamed_task__/AverageDiscountedReturn -20.6169 +__unnamed_task__/AverageReturn -33.1845 +__unnamed_task__/Iteration 48 +__unnamed_task__/MaxReturn -2.30629 +__unnamed_task__/MinReturn -56.0413 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.3649 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.99516 +policy/KL 0.00789699 +policy/KLBefore 0 +policy/LossAfter -0.0209053 +policy/LossBefore -0.00115854 +policy/dLoss 0.0197468 +---------------------------------------- --------------- +2025-04-02 16:25:22 | [rl2_trainer] epoch #49 | Optimizing policy... +2025-04-02 16:25:22 | [rl2_trainer] epoch #49 | Fitting baseline... +2025-04-02 16:25:22 | [rl2_trainer] epoch #49 | Computing loss before +2025-04-02 16:25:22 | [rl2_trainer] epoch #49 | Computing KL before +2025-04-02 16:25:23 | [rl2_trainer] epoch #49 | Optimizing +2025-04-02 16:25:57 | [rl2_trainer] epoch #49 | Computing KL after +2025-04-02 16:25:57 | [rl2_trainer] epoch #49 | Computing loss after +2025-04-02 16:25:58 | [rl2_trainer] epoch #49 | Saving snapshot... +2025-04-02 16:25:58 | [rl2_trainer] epoch #49 | Saved +2025-04-02 16:25:58 | [rl2_trainer] epoch #49 | Time 9238.99 s +2025-04-02 16:25:58 | [rl2_trainer] epoch #49 | EpochTime 175.46 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -21.7442 +Average/AverageReturn -34.8681 +Average/Iteration 49 +Average/MaxReturn -9.8988 +Average/MinReturn -78.9281 +Average/NumEpisodes 100 +Average/StdReturn 10.6699 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.845796 +TotalEnvSteps 500000 +__unnamed_task__/AverageDiscountedReturn -21.7442 +__unnamed_task__/AverageReturn -34.8681 +__unnamed_task__/Iteration 49 +__unnamed_task__/MaxReturn -9.8988 +__unnamed_task__/MinReturn -78.9281 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.6699 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.97393 +policy/KL 0.0090054 +policy/KLBefore 0 +policy/LossAfter -0.0284811 +policy/LossBefore -0.0106733 +policy/dLoss 0.0178078 +---------------------------------------- -------------- +2025-04-02 16:28:39 | [rl2_trainer] epoch #50 | Optimizing policy... +2025-04-02 16:28:39 | [rl2_trainer] epoch #50 | Fitting baseline... +2025-04-02 16:28:39 | [rl2_trainer] epoch #50 | Computing loss before +2025-04-02 16:28:40 | [rl2_trainer] epoch #50 | Computing KL before +2025-04-02 16:28:41 | [rl2_trainer] epoch #50 | Optimizing +2025-04-02 16:29:35 | [rl2_trainer] epoch #50 | Computing KL after +2025-04-02 16:29:36 | [rl2_trainer] epoch #50 | Computing loss after +2025-04-02 16:29:37 | [rl2_trainer] epoch #50 | Saving snapshot... +2025-04-02 16:29:37 | [rl2_trainer] epoch #50 | Saved +2025-04-02 16:29:37 | [rl2_trainer] epoch #50 | Time 9458.47 s +2025-04-02 16:29:37 | [rl2_trainer] epoch #50 | EpochTime 219.48 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -21.2619 +Average/AverageReturn -33.7635 +Average/Iteration 50 +Average/MaxReturn -17.5448 +Average/MinReturn -128.419 +Average/NumEpisodes 100 +Average/StdReturn 13.4672 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.551726 +TotalEnvSteps 510000 +__unnamed_task__/AverageDiscountedReturn -21.2619 +__unnamed_task__/AverageReturn -33.7635 +__unnamed_task__/Iteration 50 +__unnamed_task__/MaxReturn -17.5448 +__unnamed_task__/MinReturn -128.419 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.4672 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.95133 +policy/KL 0.0127989 +policy/KLBefore 0 +policy/LossAfter -0.0469733 +policy/LossBefore -0.00813562 +policy/dLoss 0.0388377 +---------------------------------------- --------------- +2025-04-02 16:32:02 | [rl2_trainer] epoch #51 | Optimizing policy... +2025-04-02 16:32:02 | [rl2_trainer] epoch #51 | Fitting baseline... +2025-04-02 16:32:02 | [rl2_trainer] epoch #51 | Computing loss before +2025-04-02 16:32:03 | [rl2_trainer] epoch #51 | Computing KL before +2025-04-02 16:32:04 | [rl2_trainer] epoch #51 | Optimizing +2025-04-02 16:32:59 | [rl2_trainer] epoch #51 | Computing KL after +2025-04-02 16:32:59 | [rl2_trainer] epoch #51 | Computing loss after +2025-04-02 16:33:00 | [rl2_trainer] epoch #51 | Saving snapshot... +2025-04-02 16:33:00 | [rl2_trainer] epoch #51 | Saved +2025-04-02 16:33:00 | [rl2_trainer] epoch #51 | Time 9661.14 s +2025-04-02 16:33:00 | [rl2_trainer] epoch #51 | EpochTime 202.67 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.3747 +Average/AverageReturn -36.0527 +Average/Iteration 51 +Average/MaxReturn -17.0785 +Average/MinReturn -51.7235 +Average/NumEpisodes 100 +Average/StdReturn 8.0169 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.618391 +TotalEnvSteps 520000 +__unnamed_task__/AverageDiscountedReturn -22.3747 +__unnamed_task__/AverageReturn -36.0527 +__unnamed_task__/Iteration 51 +__unnamed_task__/MaxReturn -17.0785 +__unnamed_task__/MinReturn -51.7235 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.0169 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.93976 +policy/KL 0.00696894 +policy/KLBefore 0 +policy/LossAfter -0.0217105 +policy/LossBefore -0.0011002 +policy/dLoss 0.0206103 +---------------------------------------- --------------- +2025-04-02 16:35:37 | [rl2_trainer] epoch #52 | Optimizing policy... +2025-04-02 16:35:38 | [rl2_trainer] epoch #52 | Fitting baseline... +2025-04-02 16:35:38 | [rl2_trainer] epoch #52 | Computing loss before +2025-04-02 16:35:38 | [rl2_trainer] epoch #52 | Computing KL before +2025-04-02 16:35:39 | [rl2_trainer] epoch #52 | Optimizing +2025-04-02 16:36:16 | [rl2_trainer] epoch #52 | Computing KL after +2025-04-02 16:36:17 | [rl2_trainer] epoch #52 | Computing loss after +2025-04-02 16:36:18 | [rl2_trainer] epoch #52 | Saving snapshot... +2025-04-02 16:36:18 | [rl2_trainer] epoch #52 | Saved +2025-04-02 16:36:18 | [rl2_trainer] epoch #52 | Time 9858.60 s +2025-04-02 16:36:18 | [rl2_trainer] epoch #52 | EpochTime 197.46 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -20.2917 +Average/AverageReturn -32.4668 +Average/Iteration 52 +Average/MaxReturn -16.1679 +Average/MinReturn -116.838 +Average/NumEpisodes 100 +Average/StdReturn 13.4765 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.48551 +TotalEnvSteps 530000 +__unnamed_task__/AverageDiscountedReturn -20.2917 +__unnamed_task__/AverageReturn -32.4668 +__unnamed_task__/Iteration 52 +__unnamed_task__/MaxReturn -16.1679 +__unnamed_task__/MinReturn -116.838 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.4765 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.93587 +policy/KL 0.0076699 +policy/KLBefore 0 +policy/LossAfter -0.0773133 +policy/LossBefore -0.0241693 +policy/dLoss 0.053144 +---------------------------------------- -------------- +2025-04-02 16:38:27 | [rl2_trainer] epoch #53 | Optimizing policy... +2025-04-02 16:38:28 | [rl2_trainer] epoch #53 | Fitting baseline... +2025-04-02 16:38:28 | [rl2_trainer] epoch #53 | Computing loss before +2025-04-02 16:38:28 | [rl2_trainer] epoch #53 | Computing KL before +2025-04-02 16:38:29 | [rl2_trainer] epoch #53 | Optimizing +2025-04-02 16:39:04 | [rl2_trainer] epoch #53 | Computing KL after +2025-04-02 16:39:05 | [rl2_trainer] epoch #53 | Computing loss after +2025-04-02 16:39:06 | [rl2_trainer] epoch #53 | Saving snapshot... +2025-04-02 16:39:06 | [rl2_trainer] epoch #53 | Saved +2025-04-02 16:39:06 | [rl2_trainer] epoch #53 | Time 10026.75 s +2025-04-02 16:39:06 | [rl2_trainer] epoch #53 | EpochTime 168.15 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.1216 +Average/AverageReturn -32.0326 +Average/Iteration 53 +Average/MaxReturn -16.3669 +Average/MinReturn -57.5895 +Average/NumEpisodes 100 +Average/StdReturn 6.56069 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.44894 +TotalEnvSteps 540000 +__unnamed_task__/AverageDiscountedReturn -20.1216 +__unnamed_task__/AverageReturn -32.0326 +__unnamed_task__/Iteration 53 +__unnamed_task__/MaxReturn -16.3669 +__unnamed_task__/MinReturn -57.5895 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.56069 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.92341 +policy/KL 0.00912862 +policy/KLBefore 0 +policy/LossAfter -0.019505 +policy/LossBefore -0.00553963 +policy/dLoss 0.0139654 +---------------------------------------- --------------- +2025-04-02 16:41:56 | [rl2_trainer] epoch #54 | Optimizing policy... +2025-04-02 16:41:57 | [rl2_trainer] epoch #54 | Fitting baseline... +2025-04-02 16:41:57 | [rl2_trainer] epoch #54 | Computing loss before +2025-04-02 16:41:57 | [rl2_trainer] epoch #54 | Computing KL before +2025-04-02 16:41:58 | [rl2_trainer] epoch #54 | Optimizing +2025-04-02 16:42:33 | [rl2_trainer] epoch #54 | Computing KL after +2025-04-02 16:42:34 | [rl2_trainer] epoch #54 | Computing loss after +2025-04-02 16:42:35 | [rl2_trainer] epoch #54 | Saving snapshot... +2025-04-02 16:42:35 | [rl2_trainer] epoch #54 | Saved +2025-04-02 16:42:35 | [rl2_trainer] epoch #54 | Time 10235.93 s +2025-04-02 16:42:35 | [rl2_trainer] epoch #54 | EpochTime 209.18 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.658 +Average/AverageReturn -29.822 +Average/Iteration 54 +Average/MaxReturn -12.9239 +Average/MinReturn -65.7933 +Average/NumEpisodes 100 +Average/StdReturn 8.60786 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.52565 +TotalEnvSteps 550000 +__unnamed_task__/AverageDiscountedReturn -18.658 +__unnamed_task__/AverageReturn -29.822 +__unnamed_task__/Iteration 54 +__unnamed_task__/MaxReturn -12.9239 +__unnamed_task__/MinReturn -65.7933 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.60786 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.89343 +policy/KL 0.00790403 +policy/KLBefore 0 +policy/LossAfter -0.0198817 +policy/LossBefore 0.00086424 +policy/dLoss 0.020746 +---------------------------------------- --------------- +2025-04-02 16:44:48 | [rl2_trainer] epoch #55 | Optimizing policy... +2025-04-02 16:44:48 | [rl2_trainer] epoch #55 | Fitting baseline... +2025-04-02 16:44:48 | [rl2_trainer] epoch #55 | Computing loss before +2025-04-02 16:44:49 | [rl2_trainer] epoch #55 | Computing KL before +2025-04-02 16:44:49 | [rl2_trainer] epoch #55 | Optimizing +2025-04-02 16:45:26 | [rl2_trainer] epoch #55 | Computing KL after +2025-04-02 16:45:27 | [rl2_trainer] epoch #55 | Computing loss after +2025-04-02 16:45:28 | [rl2_trainer] epoch #55 | Saving snapshot... +2025-04-02 16:45:28 | [rl2_trainer] epoch #55 | Saved +2025-04-02 16:45:28 | [rl2_trainer] epoch #55 | Time 10408.77 s +2025-04-02 16:45:28 | [rl2_trainer] epoch #55 | EpochTime 172.84 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.9892 +Average/AverageReturn -33.5099 +Average/Iteration 55 +Average/MaxReturn -17.9884 +Average/MinReturn -56.0661 +Average/NumEpisodes 100 +Average/StdReturn 8.44971 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.770996 +TotalEnvSteps 560000 +__unnamed_task__/AverageDiscountedReturn -20.9892 +__unnamed_task__/AverageReturn -33.5099 +__unnamed_task__/Iteration 55 +__unnamed_task__/MaxReturn -17.9884 +__unnamed_task__/MinReturn -56.0661 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.44971 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.87251 +policy/KL 0.00829308 +policy/KLBefore 0 +policy/LossAfter -0.0142319 +policy/LossBefore 0.00352098 +policy/dLoss 0.0177528 +---------------------------------------- --------------- +2025-04-02 16:47:57 | [rl2_trainer] epoch #56 | Optimizing policy... +2025-04-02 16:47:57 | [rl2_trainer] epoch #56 | Fitting baseline... +2025-04-02 16:47:57 | [rl2_trainer] epoch #56 | Computing loss before +2025-04-02 16:47:58 | [rl2_trainer] epoch #56 | Computing KL before +2025-04-02 16:47:59 | [rl2_trainer] epoch #56 | Optimizing +2025-04-02 16:48:35 | [rl2_trainer] epoch #56 | Computing KL after +2025-04-02 16:48:35 | [rl2_trainer] epoch #56 | Computing loss after +2025-04-02 16:48:36 | [rl2_trainer] epoch #56 | Saving snapshot... +2025-04-02 16:48:36 | [rl2_trainer] epoch #56 | Saved +2025-04-02 16:48:36 | [rl2_trainer] epoch #56 | Time 10597.34 s +2025-04-02 16:48:36 | [rl2_trainer] epoch #56 | EpochTime 188.57 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.7127 +Average/AverageReturn -29.548 +Average/Iteration 56 +Average/MaxReturn -5.2361 +Average/MinReturn -55.2471 +Average/NumEpisodes 100 +Average/StdReturn 7.84962 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.540963 +TotalEnvSteps 570000 +__unnamed_task__/AverageDiscountedReturn -18.7127 +__unnamed_task__/AverageReturn -29.548 +__unnamed_task__/Iteration 56 +__unnamed_task__/MaxReturn -5.2361 +__unnamed_task__/MinReturn -55.2471 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.84962 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.85014 +policy/KL 0.0106551 +policy/KLBefore 0 +policy/LossAfter -0.0236673 +policy/LossBefore -0.00110388 +policy/dLoss 0.0225634 +---------------------------------------- --------------- +2025-04-02 16:50:49 | [rl2_trainer] epoch #57 | Optimizing policy... +2025-04-02 16:50:49 | [rl2_trainer] epoch #57 | Fitting baseline... +2025-04-02 16:50:49 | [rl2_trainer] epoch #57 | Computing loss before +2025-04-02 16:50:50 | [rl2_trainer] epoch #57 | Computing KL before +2025-04-02 16:50:50 | [rl2_trainer] epoch #57 | Optimizing +2025-04-02 16:51:27 | [rl2_trainer] epoch #57 | Computing KL after +2025-04-02 16:51:27 | [rl2_trainer] epoch #57 | Computing loss after +2025-04-02 16:51:28 | [rl2_trainer] epoch #57 | Saving snapshot... +2025-04-02 16:51:28 | [rl2_trainer] epoch #57 | Saved +2025-04-02 16:51:28 | [rl2_trainer] epoch #57 | Time 10769.46 s +2025-04-02 16:51:28 | [rl2_trainer] epoch #57 | EpochTime 172.11 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.8349 +Average/AverageReturn -33.347 +Average/Iteration 57 +Average/MaxReturn -1.37432 +Average/MinReturn -65.6849 +Average/NumEpisodes 100 +Average/StdReturn 9.15828 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.643444 +TotalEnvSteps 580000 +__unnamed_task__/AverageDiscountedReturn -20.8349 +__unnamed_task__/AverageReturn -33.347 +__unnamed_task__/Iteration 57 +__unnamed_task__/MaxReturn -1.37432 +__unnamed_task__/MinReturn -65.6849 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.15828 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.8362 +policy/KL 0.00756163 +policy/KLBefore 0 +policy/LossAfter -0.0312189 +policy/LossBefore -0.00653902 +policy/dLoss 0.0246799 +---------------------------------------- --------------- +2025-04-02 16:53:29 | [rl2_trainer] epoch #58 | Optimizing policy... +2025-04-02 16:53:29 | [rl2_trainer] epoch #58 | Fitting baseline... +2025-04-02 16:53:29 | [rl2_trainer] epoch #58 | Computing loss before +2025-04-02 16:53:29 | [rl2_trainer] epoch #58 | Computing KL before +2025-04-02 16:53:30 | [rl2_trainer] epoch #58 | Optimizing +2025-04-02 16:54:07 | [rl2_trainer] epoch #58 | Computing KL after +2025-04-02 16:54:07 | [rl2_trainer] epoch #58 | Computing loss after +2025-04-02 16:54:08 | [rl2_trainer] epoch #58 | Saving snapshot... +2025-04-02 16:54:08 | [rl2_trainer] epoch #58 | Saved +2025-04-02 16:54:08 | [rl2_trainer] epoch #58 | Time 10929.31 s +2025-04-02 16:54:08 | [rl2_trainer] epoch #58 | EpochTime 159.85 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -22.0875 +Average/AverageReturn -35.3391 +Average/Iteration 58 +Average/MaxReturn -18.2012 +Average/MinReturn -58.3934 +Average/NumEpisodes 100 +Average/StdReturn 7.39697 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.521148 +TotalEnvSteps 590000 +__unnamed_task__/AverageDiscountedReturn -22.0875 +__unnamed_task__/AverageReturn -35.3391 +__unnamed_task__/Iteration 58 +__unnamed_task__/MaxReturn -18.2012 +__unnamed_task__/MinReturn -58.3934 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.39697 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.80903 +policy/KL 0.00879134 +policy/KLBefore 0 +policy/LossAfter -0.0131995 +policy/LossBefore 0.00278304 +policy/dLoss 0.0159825 +---------------------------------------- --------------- +2025-04-02 16:56:32 | [rl2_trainer] epoch #59 | Optimizing policy... +2025-04-02 16:56:32 | [rl2_trainer] epoch #59 | Fitting baseline... +2025-04-02 16:56:32 | [rl2_trainer] epoch #59 | Computing loss before +2025-04-02 16:56:32 | [rl2_trainer] epoch #59 | Computing KL before +2025-04-02 16:56:33 | [rl2_trainer] epoch #59 | Optimizing +2025-04-02 16:57:07 | [rl2_trainer] epoch #59 | Computing KL after +2025-04-02 16:57:08 | [rl2_trainer] epoch #59 | Computing loss after +2025-04-02 16:57:09 | [rl2_trainer] epoch #59 | Saving snapshot... +2025-04-02 16:57:09 | [rl2_trainer] epoch #59 | Saved +2025-04-02 16:57:09 | [rl2_trainer] epoch #59 | Time 11109.59 s +2025-04-02 16:57:09 | [rl2_trainer] epoch #59 | EpochTime 180.27 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -18.8211 +Average/AverageReturn -29.364 +Average/Iteration 59 +Average/MaxReturn -3.92344 +Average/MinReturn -119.896 +Average/NumEpisodes 100 +Average/StdReturn 11.612 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.613141 +TotalEnvSteps 600000 +__unnamed_task__/AverageDiscountedReturn -18.8211 +__unnamed_task__/AverageReturn -29.364 +__unnamed_task__/Iteration 59 +__unnamed_task__/MaxReturn -3.92344 +__unnamed_task__/MinReturn -119.896 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.612 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.78754 +policy/KL 0.0088165 +policy/KLBefore 0 +policy/LossAfter -0.0274895 +policy/LossBefore -0.000932056 +policy/dLoss 0.0265575 +---------------------------------------- ---------------- +2025-04-02 16:58:36 | [rl2_trainer] epoch #60 | Optimizing policy... +2025-04-02 16:58:36 | [rl2_trainer] epoch #60 | Fitting baseline... +2025-04-02 16:58:36 | [rl2_trainer] epoch #60 | Computing loss before +2025-04-02 16:58:37 | [rl2_trainer] epoch #60 | Computing KL before +2025-04-02 16:58:38 | [rl2_trainer] epoch #60 | Optimizing +2025-04-02 16:59:15 | [rl2_trainer] epoch #60 | Computing KL after +2025-04-02 16:59:15 | [rl2_trainer] epoch #60 | Computing loss after +2025-04-02 16:59:16 | [rl2_trainer] epoch #60 | Saving snapshot... +2025-04-02 16:59:16 | [rl2_trainer] epoch #60 | Saved +2025-04-02 16:59:16 | [rl2_trainer] epoch #60 | Time 11237.12 s +2025-04-02 16:59:16 | [rl2_trainer] epoch #60 | EpochTime 127.53 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -21.2685 +Average/AverageReturn -33.7593 +Average/Iteration 60 +Average/MaxReturn -16.0001 +Average/MinReturn -50.1409 +Average/NumEpisodes 100 +Average/StdReturn 7.09862 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.747962 +TotalEnvSteps 610000 +__unnamed_task__/AverageDiscountedReturn -21.2685 +__unnamed_task__/AverageReturn -33.7593 +__unnamed_task__/Iteration 60 +__unnamed_task__/MaxReturn -16.0001 +__unnamed_task__/MinReturn -50.1409 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.09862 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.77367 +policy/KL 0.0059244 +policy/KLBefore 0 +policy/LossAfter -0.0195335 +policy/LossBefore -0.00575612 +policy/dLoss 0.0137774 +---------------------------------------- --------------- +2025-04-02 17:01:40 | [rl2_trainer] epoch #61 | Optimizing policy... +2025-04-02 17:01:41 | [rl2_trainer] epoch #61 | Fitting baseline... +2025-04-02 17:01:41 | [rl2_trainer] epoch #61 | Computing loss before +2025-04-02 17:01:41 | [rl2_trainer] epoch #61 | Computing KL before +2025-04-02 17:01:42 | [rl2_trainer] epoch #61 | Optimizing +2025-04-02 17:02:18 | [rl2_trainer] epoch #61 | Computing KL after +2025-04-02 17:02:19 | [rl2_trainer] epoch #61 | Computing loss after +2025-04-02 17:02:20 | [rl2_trainer] epoch #61 | Saving snapshot... +2025-04-02 17:02:20 | [rl2_trainer] epoch #61 | Saved +2025-04-02 17:02:20 | [rl2_trainer] epoch #61 | Time 11421.07 s +2025-04-02 17:02:20 | [rl2_trainer] epoch #61 | EpochTime 183.95 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -16.7001 +Average/AverageReturn -26.0693 +Average/Iteration 61 +Average/MaxReturn -6.43316 +Average/MinReturn -54.5875 +Average/NumEpisodes 100 +Average/StdReturn 6.23999 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.406876 +TotalEnvSteps 620000 +__unnamed_task__/AverageDiscountedReturn -16.7001 +__unnamed_task__/AverageReturn -26.0693 +__unnamed_task__/Iteration 61 +__unnamed_task__/MaxReturn -6.43316 +__unnamed_task__/MinReturn -54.5875 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.23999 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.74092 +policy/KL 0.0118063 +policy/KLBefore 0 +policy/LossAfter -0.00224084 +policy/LossBefore 0.00637992 +policy/dLoss 0.00862076 +---------------------------------------- --------------- +2025-04-02 17:04:18 | [rl2_trainer] epoch #62 | Optimizing policy... +2025-04-02 17:04:18 | [rl2_trainer] epoch #62 | Fitting baseline... +2025-04-02 17:04:18 | [rl2_trainer] epoch #62 | Computing loss before +2025-04-02 17:04:19 | [rl2_trainer] epoch #62 | Computing KL before +2025-04-02 17:04:20 | [rl2_trainer] epoch #62 | Optimizing +2025-04-02 17:04:54 | [rl2_trainer] epoch #62 | Computing KL after +2025-04-02 17:04:55 | [rl2_trainer] epoch #62 | Computing loss after +2025-04-02 17:04:55 | [rl2_trainer] epoch #62 | Saving snapshot... +2025-04-02 17:04:55 | [rl2_trainer] epoch #62 | Saved +2025-04-02 17:04:55 | [rl2_trainer] epoch #62 | Time 11576.45 s +2025-04-02 17:04:55 | [rl2_trainer] epoch #62 | EpochTime 155.37 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -21.8793 +Average/AverageReturn -34.7923 +Average/Iteration 62 +Average/MaxReturn -16.7298 +Average/MinReturn -63.7527 +Average/NumEpisodes 100 +Average/StdReturn 7.96606 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.579239 +TotalEnvSteps 630000 +__unnamed_task__/AverageDiscountedReturn -21.8793 +__unnamed_task__/AverageReturn -34.7923 +__unnamed_task__/Iteration 62 +__unnamed_task__/MaxReturn -16.7298 +__unnamed_task__/MinReturn -63.7527 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.96606 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.70829 +policy/KL 0.00796104 +policy/KLBefore 0 +policy/LossAfter -0.0342756 +policy/LossBefore -0.0199404 +policy/dLoss 0.0143352 +---------------------------------------- --------------- +2025-04-02 17:06:57 | [rl2_trainer] epoch #63 | Optimizing policy... +2025-04-02 17:06:57 | [rl2_trainer] epoch #63 | Fitting baseline... +2025-04-02 17:06:57 | [rl2_trainer] epoch #63 | Computing loss before +2025-04-02 17:06:58 | [rl2_trainer] epoch #63 | Computing KL before +2025-04-02 17:06:58 | [rl2_trainer] epoch #63 | Optimizing +2025-04-02 17:07:31 | [rl2_trainer] epoch #63 | Computing KL after +2025-04-02 17:07:32 | [rl2_trainer] epoch #63 | Computing loss after +2025-04-02 17:07:33 | [rl2_trainer] epoch #63 | Saving snapshot... +2025-04-02 17:07:33 | [rl2_trainer] epoch #63 | Saved +2025-04-02 17:07:33 | [rl2_trainer] epoch #63 | Time 11733.78 s +2025-04-02 17:07:33 | [rl2_trainer] epoch #63 | EpochTime 157.33 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -21.617 +Average/AverageReturn -34.4945 +Average/Iteration 63 +Average/MaxReturn -19.475 +Average/MinReturn -57.7843 +Average/NumEpisodes 100 +Average/StdReturn 7.92493 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.553119 +TotalEnvSteps 640000 +__unnamed_task__/AverageDiscountedReturn -21.617 +__unnamed_task__/AverageReturn -34.4945 +__unnamed_task__/Iteration 63 +__unnamed_task__/MaxReturn -19.475 +__unnamed_task__/MinReturn -57.7843 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.92493 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.68952 +policy/KL 0.0133732 +policy/KLBefore 0 +policy/LossAfter -0.0150976 +policy/LossBefore 0.0041983 +policy/dLoss 0.0192959 +---------------------------------------- -------------- +2025-04-02 17:09:51 | [rl2_trainer] epoch #64 | Optimizing policy... +2025-04-02 17:09:51 | [rl2_trainer] epoch #64 | Fitting baseline... +2025-04-02 17:09:51 | [rl2_trainer] epoch #64 | Computing loss before +2025-04-02 17:09:52 | [rl2_trainer] epoch #64 | Computing KL before +2025-04-02 17:09:52 | [rl2_trainer] epoch #64 | Optimizing +2025-04-02 17:10:25 | [rl2_trainer] epoch #64 | Computing KL after +2025-04-02 17:10:26 | [rl2_trainer] epoch #64 | Computing loss after +2025-04-02 17:10:27 | [rl2_trainer] epoch #64 | Saving snapshot... +2025-04-02 17:10:27 | [rl2_trainer] epoch #64 | Saved +2025-04-02 17:10:27 | [rl2_trainer] epoch #64 | Time 11907.89 s +2025-04-02 17:10:27 | [rl2_trainer] epoch #64 | EpochTime 174.11 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -17.527 +Average/AverageReturn -27.2251 +Average/Iteration 64 +Average/MaxReturn -6.26573 +Average/MinReturn -119.984 +Average/NumEpisodes 100 +Average/StdReturn 11.6282 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.50876 +TotalEnvSteps 650000 +__unnamed_task__/AverageDiscountedReturn -17.527 +__unnamed_task__/AverageReturn -27.2251 +__unnamed_task__/Iteration 64 +__unnamed_task__/MaxReturn -6.26573 +__unnamed_task__/MinReturn -119.984 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.6282 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.6534 +policy/KL 0.0126138 +policy/KLBefore 0 +policy/LossAfter -0.0331193 +policy/LossBefore -0.00935867 +policy/dLoss 0.0237606 +---------------------------------------- --------------- +2025-04-02 17:12:22 | [rl2_trainer] epoch #65 | Optimizing policy... +2025-04-02 17:12:23 | [rl2_trainer] epoch #65 | Fitting baseline... +2025-04-02 17:12:23 | [rl2_trainer] epoch #65 | Computing loss before +2025-04-02 17:12:23 | [rl2_trainer] epoch #65 | Computing KL before +2025-04-02 17:12:24 | [rl2_trainer] epoch #65 | Optimizing +2025-04-02 17:13:00 | [rl2_trainer] epoch #65 | Computing KL after +2025-04-02 17:13:00 | [rl2_trainer] epoch #65 | Computing loss after +2025-04-02 17:13:01 | [rl2_trainer] epoch #65 | Saving snapshot... +2025-04-02 17:13:01 | [rl2_trainer] epoch #65 | Saved +2025-04-02 17:13:01 | [rl2_trainer] epoch #65 | Time 12062.09 s +2025-04-02 17:13:01 | [rl2_trainer] epoch #65 | EpochTime 154.20 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.6073 +Average/AverageReturn -34.3929 +Average/Iteration 65 +Average/MaxReturn -12.1597 +Average/MinReturn -64.8175 +Average/NumEpisodes 100 +Average/StdReturn 8.06208 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.504918 +TotalEnvSteps 660000 +__unnamed_task__/AverageDiscountedReturn -21.6073 +__unnamed_task__/AverageReturn -34.3929 +__unnamed_task__/Iteration 65 +__unnamed_task__/MaxReturn -12.1597 +__unnamed_task__/MinReturn -64.8175 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.06208 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.61941 +policy/KL 0.0128658 +policy/KLBefore 0 +policy/LossAfter -0.0200526 +policy/LossBefore 0.000431287 +policy/dLoss 0.0204839 +---------------------------------------- ---------------- +2025-04-02 17:16:44 | [rl2_trainer] epoch #66 | Optimizing policy... +2025-04-02 17:16:45 | [rl2_trainer] epoch #66 | Fitting baseline... +2025-04-02 17:16:45 | [rl2_trainer] epoch #66 | Computing loss before +2025-04-02 17:16:45 | [rl2_trainer] epoch #66 | Computing KL before +2025-04-02 17:16:46 | [rl2_trainer] epoch #66 | Optimizing +2025-04-02 17:17:23 | [rl2_trainer] epoch #66 | Computing KL after +2025-04-02 17:17:23 | [rl2_trainer] epoch #66 | Computing loss after +2025-04-02 17:17:24 | [rl2_trainer] epoch #66 | Saving snapshot... +2025-04-02 17:17:24 | [rl2_trainer] epoch #66 | Saved +2025-04-02 17:17:24 | [rl2_trainer] epoch #66 | Time 12325.28 s +2025-04-02 17:17:24 | [rl2_trainer] epoch #66 | EpochTime 263.19 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -19.7186 +Average/AverageReturn -31.6136 +Average/Iteration 66 +Average/MaxReturn 2.64321 +Average/MinReturn -77.5506 +Average/NumEpisodes 100 +Average/StdReturn 10.4296 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance -1.48067 +TotalEnvSteps 670000 +__unnamed_task__/AverageDiscountedReturn -19.7186 +__unnamed_task__/AverageReturn -31.6136 +__unnamed_task__/Iteration 66 +__unnamed_task__/MaxReturn 2.64321 +__unnamed_task__/MinReturn -77.5506 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.4296 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.56751 +policy/KL 0.0158801 +policy/KLBefore 0 +policy/LossAfter -0.10466 +policy/LossBefore 0.0137479 +policy/dLoss 0.118408 +---------------------------------------- -------------- +2025-04-02 17:18:59 | [rl2_trainer] epoch #67 | Optimizing policy... +2025-04-02 17:19:00 | [rl2_trainer] epoch #67 | Fitting baseline... +2025-04-02 17:19:00 | [rl2_trainer] epoch #67 | Computing loss before +2025-04-02 17:19:00 | [rl2_trainer] epoch #67 | Computing KL before +2025-04-02 17:19:01 | [rl2_trainer] epoch #67 | Optimizing +2025-04-02 17:19:35 | [rl2_trainer] epoch #67 | Computing KL after +2025-04-02 17:19:35 | [rl2_trainer] epoch #67 | Computing loss after +2025-04-02 17:19:37 | [rl2_trainer] epoch #67 | Saving snapshot... +2025-04-02 17:19:37 | [rl2_trainer] epoch #67 | Saved +2025-04-02 17:19:37 | [rl2_trainer] epoch #67 | Time 12457.92 s +2025-04-02 17:19:37 | [rl2_trainer] epoch #67 | EpochTime 132.64 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.8305 +Average/AverageReturn -31.0862 +Average/Iteration 67 +Average/MaxReturn -17.3939 +Average/MinReturn -70.3912 +Average/NumEpisodes 100 +Average/StdReturn 6.47901 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.394212 +TotalEnvSteps 680000 +__unnamed_task__/AverageDiscountedReturn -19.8305 +__unnamed_task__/AverageReturn -31.0862 +__unnamed_task__/Iteration 67 +__unnamed_task__/MaxReturn -17.3939 +__unnamed_task__/MinReturn -70.3912 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.47901 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.53476 +policy/KL 0.0081072 +policy/KLBefore 0 +policy/LossAfter -0.00519544 +policy/LossBefore 0.00409046 +policy/dLoss 0.00928591 +---------------------------------------- --------------- +2025-04-02 17:23:40 | [rl2_trainer] epoch #68 | Optimizing policy... +2025-04-02 17:23:41 | [rl2_trainer] epoch #68 | Fitting baseline... +2025-04-02 17:23:41 | [rl2_trainer] epoch #68 | Computing loss before +2025-04-02 17:23:41 | [rl2_trainer] epoch #68 | Computing KL before +2025-04-02 17:23:42 | [rl2_trainer] epoch #68 | Optimizing +2025-04-02 17:24:17 | [rl2_trainer] epoch #68 | Computing KL after +2025-04-02 17:24:18 | [rl2_trainer] epoch #68 | Computing loss after +2025-04-02 17:24:19 | [rl2_trainer] epoch #68 | Saving snapshot... +2025-04-02 17:24:19 | [rl2_trainer] epoch #68 | Saved +2025-04-02 17:24:19 | [rl2_trainer] epoch #68 | Time 12739.58 s +2025-04-02 17:24:19 | [rl2_trainer] epoch #68 | EpochTime 281.65 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.6783 +Average/AverageReturn -31.8856 +Average/Iteration 68 +Average/MaxReturn -13.382 +Average/MinReturn -79.8428 +Average/NumEpisodes 100 +Average/StdReturn 9.25318 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.657206 +TotalEnvSteps 690000 +__unnamed_task__/AverageDiscountedReturn -19.6783 +__unnamed_task__/AverageReturn -31.8856 +__unnamed_task__/Iteration 68 +__unnamed_task__/MaxReturn -13.382 +__unnamed_task__/MinReturn -79.8428 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.25318 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.52831 +policy/KL 0.0160795 +policy/KLBefore 0 +policy/LossAfter -0.0217415 +policy/LossBefore 0.00883037 +policy/dLoss 0.0305719 +---------------------------------------- --------------- +2025-04-02 17:25:46 | [rl2_trainer] epoch #69 | Optimizing policy... +2025-04-02 17:25:46 | [rl2_trainer] epoch #69 | Fitting baseline... +2025-04-02 17:25:46 | [rl2_trainer] epoch #69 | Computing loss before +2025-04-02 17:25:47 | [rl2_trainer] epoch #69 | Computing KL before +2025-04-02 17:25:47 | [rl2_trainer] epoch #69 | Optimizing +2025-04-02 17:26:22 | [rl2_trainer] epoch #69 | Computing KL after +2025-04-02 17:26:23 | [rl2_trainer] epoch #69 | Computing loss after +2025-04-02 17:26:24 | [rl2_trainer] epoch #69 | Saving snapshot... +2025-04-02 17:26:24 | [rl2_trainer] epoch #69 | Saved +2025-04-02 17:26:24 | [rl2_trainer] epoch #69 | Time 12865.02 s +2025-04-02 17:26:24 | [rl2_trainer] epoch #69 | EpochTime 125.44 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.4014 +Average/AverageReturn -30.2535 +Average/Iteration 69 +Average/MaxReturn -12.5301 +Average/MinReturn -58.2306 +Average/NumEpisodes 100 +Average/StdReturn 6.4524 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.425714 +TotalEnvSteps 700000 +__unnamed_task__/AverageDiscountedReturn -19.4014 +__unnamed_task__/AverageReturn -30.2535 +__unnamed_task__/Iteration 69 +__unnamed_task__/MaxReturn -12.5301 +__unnamed_task__/MinReturn -58.2306 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.4524 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.50789 +policy/KL 0.00709639 +policy/KLBefore 0 +policy/LossAfter -0.0264526 +policy/LossBefore -0.00710552 +policy/dLoss 0.0193471 +---------------------------------------- --------------- +2025-04-02 17:28:50 | [rl2_trainer] epoch #70 | Optimizing policy... +2025-04-02 17:28:51 | [rl2_trainer] epoch #70 | Fitting baseline... +2025-04-02 17:28:51 | [rl2_trainer] epoch #70 | Computing loss before +2025-04-02 17:28:51 | [rl2_trainer] epoch #70 | Computing KL before +2025-04-02 17:28:52 | [rl2_trainer] epoch #70 | Optimizing +2025-04-02 17:29:29 | [rl2_trainer] epoch #70 | Computing KL after +2025-04-02 17:29:30 | [rl2_trainer] epoch #70 | Computing loss after +2025-04-02 17:29:31 | [rl2_trainer] epoch #70 | Saving snapshot... +2025-04-02 17:29:31 | [rl2_trainer] epoch #70 | Saved +2025-04-02 17:29:31 | [rl2_trainer] epoch #70 | Time 13051.54 s +2025-04-02 17:29:31 | [rl2_trainer] epoch #70 | EpochTime 186.51 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.2694 +Average/AverageReturn -29.0326 +Average/Iteration 70 +Average/MaxReturn -3.08938 +Average/MinReturn -75.421 +Average/NumEpisodes 100 +Average/StdReturn 9.32323 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.486487 +TotalEnvSteps 710000 +__unnamed_task__/AverageDiscountedReturn -18.2694 +__unnamed_task__/AverageReturn -29.0326 +__unnamed_task__/Iteration 70 +__unnamed_task__/MaxReturn -3.08938 +__unnamed_task__/MinReturn -75.421 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.32323 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.46176 +policy/KL 0.00954889 +policy/KLBefore 0 +policy/LossAfter -0.0245641 +policy/LossBefore 0.00130126 +policy/dLoss 0.0258654 +---------------------------------------- --------------- +2025-04-02 17:31:38 | [rl2_trainer] epoch #71 | Optimizing policy... +2025-04-02 17:31:38 | [rl2_trainer] epoch #71 | Fitting baseline... +2025-04-02 17:31:38 | [rl2_trainer] epoch #71 | Computing loss before +2025-04-02 17:31:39 | [rl2_trainer] epoch #71 | Computing KL before +2025-04-02 17:31:39 | [rl2_trainer] epoch #71 | Optimizing +2025-04-02 17:32:15 | [rl2_trainer] epoch #71 | Computing KL after +2025-04-02 17:32:16 | [rl2_trainer] epoch #71 | Computing loss after +2025-04-02 17:32:17 | [rl2_trainer] epoch #71 | Saving snapshot... +2025-04-02 17:32:17 | [rl2_trainer] epoch #71 | Saved +2025-04-02 17:32:17 | [rl2_trainer] epoch #71 | Time 13218.14 s +2025-04-02 17:32:17 | [rl2_trainer] epoch #71 | EpochTime 166.60 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.7301 +Average/AverageReturn -31.4064 +Average/Iteration 71 +Average/MaxReturn 4.67825 +Average/MinReturn -66.8113 +Average/NumEpisodes 100 +Average/StdReturn 9.30279 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.67267 +TotalEnvSteps 720000 +__unnamed_task__/AverageDiscountedReturn -19.7301 +__unnamed_task__/AverageReturn -31.4064 +__unnamed_task__/Iteration 71 +__unnamed_task__/MaxReturn 4.67825 +__unnamed_task__/MinReturn -66.8113 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.30279 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.42646 +policy/KL 0.0154049 +policy/KLBefore 0 +policy/LossAfter -0.0180328 +policy/LossBefore -0.00352194 +policy/dLoss 0.0145108 +---------------------------------------- --------------- +2025-04-02 17:34:30 | [rl2_trainer] epoch #72 | Optimizing policy... +2025-04-02 17:34:30 | [rl2_trainer] epoch #72 | Fitting baseline... +2025-04-02 17:34:30 | [rl2_trainer] epoch #72 | Computing loss before +2025-04-02 17:34:31 | [rl2_trainer] epoch #72 | Computing KL before +2025-04-02 17:34:31 | [rl2_trainer] epoch #72 | Optimizing +2025-04-02 17:35:05 | [rl2_trainer] epoch #72 | Computing KL after +2025-04-02 17:35:06 | [rl2_trainer] epoch #72 | Computing loss after +2025-04-02 17:35:06 | [rl2_trainer] epoch #72 | Saving snapshot... +2025-04-02 17:35:06 | [rl2_trainer] epoch #72 | Saved +2025-04-02 17:35:06 | [rl2_trainer] epoch #72 | Time 13387.38 s +2025-04-02 17:35:06 | [rl2_trainer] epoch #72 | EpochTime 169.25 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.3205 +Average/AverageReturn -28.8476 +Average/Iteration 72 +Average/MaxReturn -16.7907 +Average/MinReturn -48.758 +Average/NumEpisodes 100 +Average/StdReturn 7.12233 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.824156 +TotalEnvSteps 730000 +__unnamed_task__/AverageDiscountedReturn -18.3205 +__unnamed_task__/AverageReturn -28.8476 +__unnamed_task__/Iteration 72 +__unnamed_task__/MaxReturn -16.7907 +__unnamed_task__/MinReturn -48.758 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.12233 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.40385 +policy/KL 0.00875085 +policy/KLBefore 0 +policy/LossAfter -0.00755148 +policy/LossBefore 0.00234323 +policy/dLoss 0.00989471 +---------------------------------------- --------------- +2025-04-02 17:38:55 | [rl2_trainer] epoch #73 | Optimizing policy... +2025-04-02 17:38:55 | [rl2_trainer] epoch #73 | Fitting baseline... +2025-04-02 17:38:55 | [rl2_trainer] epoch #73 | Computing loss before +2025-04-02 17:38:56 | [rl2_trainer] epoch #73 | Computing KL before +2025-04-02 17:38:56 | [rl2_trainer] epoch #73 | Optimizing +2025-04-02 17:39:32 | [rl2_trainer] epoch #73 | Computing KL after +2025-04-02 17:39:32 | [rl2_trainer] epoch #73 | Computing loss after +2025-04-02 17:39:33 | [rl2_trainer] epoch #73 | Saving snapshot... +2025-04-02 17:39:33 | [rl2_trainer] epoch #73 | Saved +2025-04-02 17:39:33 | [rl2_trainer] epoch #73 | Time 13654.21 s +2025-04-02 17:39:33 | [rl2_trainer] epoch #73 | EpochTime 266.82 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -16.2673 +Average/AverageReturn -25.9476 +Average/Iteration 73 +Average/MaxReturn 0.811113 +Average/MinReturn -63.9558 +Average/NumEpisodes 100 +Average/StdReturn 8.96829 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.555954 +TotalEnvSteps 740000 +__unnamed_task__/AverageDiscountedReturn -16.2673 +__unnamed_task__/AverageReturn -25.9476 +__unnamed_task__/Iteration 73 +__unnamed_task__/MaxReturn 0.811113 +__unnamed_task__/MinReturn -63.9558 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.96829 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.37519 +policy/KL 0.00693271 +policy/KLBefore 0 +policy/LossAfter -0.0452516 +policy/LossBefore -0.0218726 +policy/dLoss 0.0233789 +---------------------------------------- --------------- +2025-04-02 17:42:06 | [rl2_trainer] epoch #74 | Optimizing policy... +2025-04-02 17:42:06 | [rl2_trainer] epoch #74 | Fitting baseline... +2025-04-02 17:42:06 | [rl2_trainer] epoch #74 | Computing loss before +2025-04-02 17:42:07 | [rl2_trainer] epoch #74 | Computing KL before +2025-04-02 17:42:07 | [rl2_trainer] epoch #74 | Optimizing +2025-04-02 17:42:42 | [rl2_trainer] epoch #74 | Computing KL after +2025-04-02 17:42:43 | [rl2_trainer] epoch #74 | Computing loss after +2025-04-02 17:42:44 | [rl2_trainer] epoch #74 | Saving snapshot... +2025-04-02 17:42:44 | [rl2_trainer] epoch #74 | Saved +2025-04-02 17:42:44 | [rl2_trainer] epoch #74 | Time 13844.91 s +2025-04-02 17:42:44 | [rl2_trainer] epoch #74 | EpochTime 190.71 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -19.8039 +Average/AverageReturn -31.3026 +Average/Iteration 74 +Average/MaxReturn -8.91498 +Average/MinReturn -65.1832 +Average/NumEpisodes 100 +Average/StdReturn 8.97218 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.527093 +TotalEnvSteps 750000 +__unnamed_task__/AverageDiscountedReturn -19.8039 +__unnamed_task__/AverageReturn -31.3026 +__unnamed_task__/Iteration 74 +__unnamed_task__/MaxReturn -8.91498 +__unnamed_task__/MinReturn -65.1832 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.97218 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.33487 +policy/KL 0.0093933 +policy/KLBefore 0 +policy/LossAfter -0.0345887 +policy/LossBefore -0.0148107 +policy/dLoss 0.0197779 +---------------------------------------- -------------- +2025-04-02 17:45:41 | [rl2_trainer] epoch #75 | Optimizing policy... +2025-04-02 17:45:42 | [rl2_trainer] epoch #75 | Fitting baseline... +2025-04-02 17:45:42 | [rl2_trainer] epoch #75 | Computing loss before +2025-04-02 17:45:42 | [rl2_trainer] epoch #75 | Computing KL before +2025-04-02 17:45:43 | [rl2_trainer] epoch #75 | Optimizing +2025-04-02 17:46:18 | [rl2_trainer] epoch #75 | Computing KL after +2025-04-02 17:46:19 | [rl2_trainer] epoch #75 | Computing loss after +2025-04-02 17:46:20 | [rl2_trainer] epoch #75 | Saving snapshot... +2025-04-02 17:46:20 | [rl2_trainer] epoch #75 | Saved +2025-04-02 17:46:20 | [rl2_trainer] epoch #75 | Time 14060.81 s +2025-04-02 17:46:20 | [rl2_trainer] epoch #75 | EpochTime 215.89 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.5484 +Average/AverageReturn -31.3283 +Average/Iteration 75 +Average/MaxReturn -14.2839 +Average/MinReturn -64.3903 +Average/NumEpisodes 100 +Average/StdReturn 9.81454 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.797536 +TotalEnvSteps 760000 +__unnamed_task__/AverageDiscountedReturn -19.5484 +__unnamed_task__/AverageReturn -31.3283 +__unnamed_task__/Iteration 75 +__unnamed_task__/MaxReturn -14.2839 +__unnamed_task__/MinReturn -64.3903 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.81454 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.31301 +policy/KL 0.00851202 +policy/KLBefore 0 +policy/LossAfter -0.0205902 +policy/LossBefore -0.00234408 +policy/dLoss 0.0182461 +---------------------------------------- --------------- +2025-04-02 17:49:17 | [rl2_trainer] epoch #76 | Optimizing policy... +2025-04-02 17:49:17 | [rl2_trainer] epoch #76 | Fitting baseline... +2025-04-02 17:49:17 | [rl2_trainer] epoch #76 | Computing loss before +2025-04-02 17:49:18 | [rl2_trainer] epoch #76 | Computing KL before +2025-04-02 17:49:19 | [rl2_trainer] epoch #76 | Optimizing +2025-04-02 17:49:55 | [rl2_trainer] epoch #76 | Computing KL after +2025-04-02 17:49:55 | [rl2_trainer] epoch #76 | Computing loss after +2025-04-02 17:49:56 | [rl2_trainer] epoch #76 | Saving snapshot... +2025-04-02 17:49:56 | [rl2_trainer] epoch #76 | Saved +2025-04-02 17:49:56 | [rl2_trainer] epoch #76 | Time 14277.26 s +2025-04-02 17:49:56 | [rl2_trainer] epoch #76 | EpochTime 216.45 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -17.629 +Average/AverageReturn -27.7511 +Average/Iteration 76 +Average/MaxReturn -13.7569 +Average/MinReturn -47.3795 +Average/NumEpisodes 100 +Average/StdReturn 6.85974 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.730557 +TotalEnvSteps 770000 +__unnamed_task__/AverageDiscountedReturn -17.629 +__unnamed_task__/AverageReturn -27.7511 +__unnamed_task__/Iteration 76 +__unnamed_task__/MaxReturn -13.7569 +__unnamed_task__/MinReturn -47.3795 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.85974 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.28994 +policy/KL 0.0107779 +policy/KLBefore 0 +policy/LossAfter -0.00279228 +policy/LossBefore 0.00532511 +policy/dLoss 0.00811739 +---------------------------------------- --------------- +2025-04-02 17:53:15 | [rl2_trainer] epoch #77 | Optimizing policy... +2025-04-02 17:53:15 | [rl2_trainer] epoch #77 | Fitting baseline... +2025-04-02 17:53:15 | [rl2_trainer] epoch #77 | Computing loss before +2025-04-02 17:53:16 | [rl2_trainer] epoch #77 | Computing KL before +2025-04-02 17:53:16 | [rl2_trainer] epoch #77 | Optimizing +2025-04-02 17:53:53 | [rl2_trainer] epoch #77 | Computing KL after +2025-04-02 17:53:54 | [rl2_trainer] epoch #77 | Computing loss after +2025-04-02 17:53:55 | [rl2_trainer] epoch #77 | Saving snapshot... +2025-04-02 17:53:55 | [rl2_trainer] epoch #77 | Saved +2025-04-02 17:53:55 | [rl2_trainer] epoch #77 | Time 14515.84 s +2025-04-02 17:53:55 | [rl2_trainer] epoch #77 | EpochTime 238.59 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.3568 +Average/AverageReturn -30.9346 +Average/Iteration 77 +Average/MaxReturn -16.9499 +Average/MinReturn -71.4912 +Average/NumEpisodes 100 +Average/StdReturn 8.62531 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.406561 +TotalEnvSteps 780000 +__unnamed_task__/AverageDiscountedReturn -19.3568 +__unnamed_task__/AverageReturn -30.9346 +__unnamed_task__/Iteration 77 +__unnamed_task__/MaxReturn -16.9499 +__unnamed_task__/MinReturn -71.4912 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.62531 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.27715 +policy/KL 0.00783026 +policy/KLBefore 0 +policy/LossAfter -0.0166646 +policy/LossBefore 0.011186 +policy/dLoss 0.0278507 +---------------------------------------- --------------- +2025-04-02 17:56:51 | [rl2_trainer] epoch #78 | Optimizing policy... +2025-04-02 17:56:51 | [rl2_trainer] epoch #78 | Fitting baseline... +2025-04-02 17:56:51 | [rl2_trainer] epoch #78 | Computing loss before +2025-04-02 17:56:52 | [rl2_trainer] epoch #78 | Computing KL before +2025-04-02 17:56:52 | [rl2_trainer] epoch #78 | Optimizing +2025-04-02 17:57:27 | [rl2_trainer] epoch #78 | Computing KL after +2025-04-02 17:57:28 | [rl2_trainer] epoch #78 | Computing loss after +2025-04-02 17:57:29 | [rl2_trainer] epoch #78 | Saving snapshot... +2025-04-02 17:57:29 | [rl2_trainer] epoch #78 | Saved +2025-04-02 17:57:29 | [rl2_trainer] epoch #78 | Time 14729.95 s +2025-04-02 17:57:29 | [rl2_trainer] epoch #78 | EpochTime 214.10 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -17.0579 +Average/AverageReturn -26.6664 +Average/Iteration 78 +Average/MaxReturn -2.9626 +Average/MinReturn -44.3455 +Average/NumEpisodes 100 +Average/StdReturn 6.79531 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.740731 +TotalEnvSteps 790000 +__unnamed_task__/AverageDiscountedReturn -17.0579 +__unnamed_task__/AverageReturn -26.6664 +__unnamed_task__/Iteration 78 +__unnamed_task__/MaxReturn -2.9626 +__unnamed_task__/MinReturn -44.3455 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.79531 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.25715 +policy/KL 0.00790452 +policy/KLBefore 0 +policy/LossAfter -0.0126213 +policy/LossBefore -0.00517856 +policy/dLoss 0.00744277 +---------------------------------------- --------------- +2025-04-02 17:59:30 | [rl2_trainer] epoch #79 | Optimizing policy... +2025-04-02 17:59:30 | [rl2_trainer] epoch #79 | Fitting baseline... +2025-04-02 17:59:30 | [rl2_trainer] epoch #79 | Computing loss before +2025-04-02 17:59:31 | [rl2_trainer] epoch #79 | Computing KL before +2025-04-02 17:59:31 | [rl2_trainer] epoch #79 | Optimizing +2025-04-02 18:00:07 | [rl2_trainer] epoch #79 | Computing KL after +2025-04-02 18:00:07 | [rl2_trainer] epoch #79 | Computing loss after +2025-04-02 18:00:08 | [rl2_trainer] epoch #79 | Saving snapshot... +2025-04-02 18:00:08 | [rl2_trainer] epoch #79 | Saved +2025-04-02 18:00:08 | [rl2_trainer] epoch #79 | Time 14889.26 s +2025-04-02 18:00:08 | [rl2_trainer] epoch #79 | EpochTime 159.31 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -20.0497 +Average/AverageReturn -31.7677 +Average/Iteration 79 +Average/MaxReturn -15.7192 +Average/MinReturn -60.9404 +Average/NumEpisodes 100 +Average/StdReturn 7.10974 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.521358 +TotalEnvSteps 800000 +__unnamed_task__/AverageDiscountedReturn -20.0497 +__unnamed_task__/AverageReturn -31.7677 +__unnamed_task__/Iteration 79 +__unnamed_task__/MaxReturn -15.7192 +__unnamed_task__/MinReturn -60.9404 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.10974 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.23604 +policy/KL 0.00594133 +policy/KLBefore 0 +policy/LossAfter -0.0116964 +policy/LossBefore 0.0011264 +policy/dLoss 0.0128228 +---------------------------------------- --------------- +2025-04-02 18:02:38 | [rl2_trainer] epoch #80 | Optimizing policy... +2025-04-02 18:02:39 | [rl2_trainer] epoch #80 | Fitting baseline... +2025-04-02 18:02:39 | [rl2_trainer] epoch #80 | Computing loss before +2025-04-02 18:02:39 | [rl2_trainer] epoch #80 | Computing KL before +2025-04-02 18:02:40 | [rl2_trainer] epoch #80 | Optimizing +2025-04-02 18:03:16 | [rl2_trainer] epoch #80 | Computing KL after +2025-04-02 18:03:16 | [rl2_trainer] epoch #80 | Computing loss after +2025-04-02 18:03:17 | [rl2_trainer] epoch #80 | Saving snapshot... +2025-04-02 18:03:17 | [rl2_trainer] epoch #80 | Saved +2025-04-02 18:03:17 | [rl2_trainer] epoch #80 | Time 15078.38 s +2025-04-02 18:03:17 | [rl2_trainer] epoch #80 | EpochTime 189.12 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -19.1847 +Average/AverageReturn -30.5959 +Average/Iteration 80 +Average/MaxReturn -12.7569 +Average/MinReturn -72.7648 +Average/NumEpisodes 100 +Average/StdReturn 8.30789 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.477923 +TotalEnvSteps 810000 +__unnamed_task__/AverageDiscountedReturn -19.1847 +__unnamed_task__/AverageReturn -30.5959 +__unnamed_task__/Iteration 80 +__unnamed_task__/MaxReturn -12.7569 +__unnamed_task__/MinReturn -72.7648 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.30789 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.21693 +policy/KL 0.00691013 +policy/KLBefore 0 +policy/LossAfter -0.0289628 +policy/LossBefore -0.00390946 +policy/dLoss 0.0250534 +---------------------------------------- --------------- +2025-04-02 18:04:44 | [rl2_trainer] epoch #81 | Optimizing policy... +2025-04-02 18:04:44 | [rl2_trainer] epoch #81 | Fitting baseline... +2025-04-02 18:04:44 | [rl2_trainer] epoch #81 | Computing loss before +2025-04-02 18:04:45 | [rl2_trainer] epoch #81 | Computing KL before +2025-04-02 18:04:45 | [rl2_trainer] epoch #81 | Optimizing +2025-04-02 18:05:21 | [rl2_trainer] epoch #81 | Computing KL after +2025-04-02 18:05:21 | [rl2_trainer] epoch #81 | Computing loss after +2025-04-02 18:05:22 | [rl2_trainer] epoch #81 | Saving snapshot... +2025-04-02 18:05:22 | [rl2_trainer] epoch #81 | Saved +2025-04-02 18:05:22 | [rl2_trainer] epoch #81 | Time 15203.43 s +2025-04-02 18:05:22 | [rl2_trainer] epoch #81 | EpochTime 125.05 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.0643 +Average/AverageReturn -28.2448 +Average/Iteration 81 +Average/MaxReturn -10.0434 +Average/MinReturn -40.768 +Average/NumEpisodes 100 +Average/StdReturn 5.37256 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.542537 +TotalEnvSteps 820000 +__unnamed_task__/AverageDiscountedReturn -18.0643 +__unnamed_task__/AverageReturn -28.2448 +__unnamed_task__/Iteration 81 +__unnamed_task__/MaxReturn -10.0434 +__unnamed_task__/MinReturn -40.768 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.37256 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.19244 +policy/KL 0.00670743 +policy/KLBefore 0 +policy/LossAfter -0.00622792 +policy/LossBefore 0.00188489 +policy/dLoss 0.00811282 +---------------------------------------- --------------- +2025-04-02 18:06:52 | [rl2_trainer] epoch #82 | Optimizing policy... +2025-04-02 18:06:53 | [rl2_trainer] epoch #82 | Fitting baseline... +2025-04-02 18:06:53 | [rl2_trainer] epoch #82 | Computing loss before +2025-04-02 18:06:53 | [rl2_trainer] epoch #82 | Computing KL before +2025-04-02 18:06:54 | [rl2_trainer] epoch #82 | Optimizing +2025-04-02 18:07:30 | [rl2_trainer] epoch #82 | Computing KL after +2025-04-02 18:07:30 | [rl2_trainer] epoch #82 | Computing loss after +2025-04-02 18:07:31 | [rl2_trainer] epoch #82 | Saving snapshot... +2025-04-02 18:07:31 | [rl2_trainer] epoch #82 | Saved +2025-04-02 18:07:31 | [rl2_trainer] epoch #82 | Time 15332.18 s +2025-04-02 18:07:31 | [rl2_trainer] epoch #82 | EpochTime 128.75 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.2701 +Average/AverageReturn -28.5465 +Average/Iteration 82 +Average/MaxReturn -8.68775 +Average/MinReturn -130.186 +Average/NumEpisodes 100 +Average/StdReturn 11.9367 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.812086 +TotalEnvSteps 830000 +__unnamed_task__/AverageDiscountedReturn -18.2701 +__unnamed_task__/AverageReturn -28.5465 +__unnamed_task__/Iteration 82 +__unnamed_task__/MaxReturn -8.68775 +__unnamed_task__/MinReturn -130.186 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.9367 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.16193 +policy/KL 0.00719706 +policy/KLBefore 0 +policy/LossAfter -0.00305459 +policy/LossBefore 0.0111834 +policy/dLoss 0.014238 +---------------------------------------- --------------- +2025-04-02 18:11:26 | [rl2_trainer] epoch #83 | Optimizing policy... +2025-04-02 18:11:26 | [rl2_trainer] epoch #83 | Fitting baseline... +2025-04-02 18:11:26 | [rl2_trainer] epoch #83 | Computing loss before +2025-04-02 18:11:27 | [rl2_trainer] epoch #83 | Computing KL before +2025-04-02 18:11:27 | [rl2_trainer] epoch #83 | Optimizing +2025-04-02 18:12:03 | [rl2_trainer] epoch #83 | Computing KL after +2025-04-02 18:12:04 | [rl2_trainer] epoch #83 | Computing loss after +2025-04-02 18:12:05 | [rl2_trainer] epoch #83 | Saving snapshot... +2025-04-02 18:12:05 | [rl2_trainer] epoch #83 | Saved +2025-04-02 18:12:05 | [rl2_trainer] epoch #83 | Time 15605.88 s +2025-04-02 18:12:05 | [rl2_trainer] epoch #83 | EpochTime 273.70 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -17.6743 +Average/AverageReturn -28.0285 +Average/Iteration 83 +Average/MaxReturn -7.87462 +Average/MinReturn -46.9258 +Average/NumEpisodes 100 +Average/StdReturn 7.50959 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.521961 +TotalEnvSteps 840000 +__unnamed_task__/AverageDiscountedReturn -17.6743 +__unnamed_task__/AverageReturn -28.0285 +__unnamed_task__/Iteration 83 +__unnamed_task__/MaxReturn -7.87462 +__unnamed_task__/MinReturn -46.9258 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.50959 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.14423 +policy/KL 0.00742841 +policy/KLBefore 0 +policy/LossAfter -0.0220547 +policy/LossBefore -0.00194652 +policy/dLoss 0.0201082 +---------------------------------------- --------------- +2025-04-02 18:14:29 | [rl2_trainer] epoch #84 | Optimizing policy... +2025-04-02 18:14:29 | [rl2_trainer] epoch #84 | Fitting baseline... +2025-04-02 18:14:29 | [rl2_trainer] epoch #84 | Computing loss before +2025-04-02 18:14:30 | [rl2_trainer] epoch #84 | Computing KL before +2025-04-02 18:14:31 | [rl2_trainer] epoch #84 | Optimizing +2025-04-02 18:15:05 | [rl2_trainer] epoch #84 | Computing KL after +2025-04-02 18:15:06 | [rl2_trainer] epoch #84 | Computing loss after +2025-04-02 18:15:06 | [rl2_trainer] epoch #84 | Saving snapshot... +2025-04-02 18:15:06 | [rl2_trainer] epoch #84 | Saved +2025-04-02 18:15:06 | [rl2_trainer] epoch #84 | Time 15787.47 s +2025-04-02 18:15:06 | [rl2_trainer] epoch #84 | EpochTime 181.59 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -15.2253 +Average/AverageReturn -23.6823 +Average/Iteration 84 +Average/MaxReturn -11.5551 +Average/MinReturn -40.4078 +Average/NumEpisodes 100 +Average/StdReturn 4.94142 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.415435 +TotalEnvSteps 850000 +__unnamed_task__/AverageDiscountedReturn -15.2253 +__unnamed_task__/AverageReturn -23.6823 +__unnamed_task__/Iteration 84 +__unnamed_task__/MaxReturn -11.5551 +__unnamed_task__/MinReturn -40.4078 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.94142 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.13115 +policy/KL 0.00723157 +policy/KLBefore 0 +policy/LossAfter -0.0151895 +policy/LossBefore -0.00213713 +policy/dLoss 0.0130523 +---------------------------------------- --------------- +2025-04-02 18:16:33 | [rl2_trainer] epoch #85 | Optimizing policy... +2025-04-02 18:16:34 | [rl2_trainer] epoch #85 | Fitting baseline... +2025-04-02 18:16:34 | [rl2_trainer] epoch #85 | Computing loss before +2025-04-02 18:16:34 | [rl2_trainer] epoch #85 | Computing KL before +2025-04-02 18:16:35 | [rl2_trainer] epoch #85 | Optimizing +2025-04-02 18:17:09 | [rl2_trainer] epoch #85 | Computing KL after +2025-04-02 18:17:10 | [rl2_trainer] epoch #85 | Computing loss after +2025-04-02 18:17:11 | [rl2_trainer] epoch #85 | Saving snapshot... +2025-04-02 18:17:11 | [rl2_trainer] epoch #85 | Saved +2025-04-02 18:17:11 | [rl2_trainer] epoch #85 | Time 15911.69 s +2025-04-02 18:17:11 | [rl2_trainer] epoch #85 | EpochTime 124.22 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -17.3714 +Average/AverageReturn -26.9134 +Average/Iteration 85 +Average/MaxReturn -14.0474 +Average/MinReturn -41.9807 +Average/NumEpisodes 100 +Average/StdReturn 5.75238 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.660034 +TotalEnvSteps 860000 +__unnamed_task__/AverageDiscountedReturn -17.3714 +__unnamed_task__/AverageReturn -26.9134 +__unnamed_task__/Iteration 85 +__unnamed_task__/MaxReturn -14.0474 +__unnamed_task__/MinReturn -41.9807 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.75238 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.11408 +policy/KL 0.00836392 +policy/KLBefore 0 +policy/LossAfter -0.0198876 +policy/LossBefore -0.00816547 +policy/dLoss 0.0117221 +---------------------------------------- --------------- +2025-04-02 18:20:47 | [rl2_trainer] epoch #86 | Optimizing policy... +2025-04-02 18:20:48 | [rl2_trainer] epoch #86 | Fitting baseline... +2025-04-02 18:20:48 | [rl2_trainer] epoch #86 | Computing loss before +2025-04-02 18:20:48 | [rl2_trainer] epoch #86 | Computing KL before +2025-04-02 18:20:49 | [rl2_trainer] epoch #86 | Optimizing +2025-04-02 18:21:24 | [rl2_trainer] epoch #86 | Computing KL after +2025-04-02 18:21:25 | [rl2_trainer] epoch #86 | Computing loss after +2025-04-02 18:21:25 | [rl2_trainer] epoch #86 | Saving snapshot... +2025-04-02 18:21:25 | [rl2_trainer] epoch #86 | Saved +2025-04-02 18:21:25 | [rl2_trainer] epoch #86 | Time 16166.48 s +2025-04-02 18:21:25 | [rl2_trainer] epoch #86 | EpochTime 254.78 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -16.7836 +Average/AverageReturn -26.6281 +Average/Iteration 86 +Average/MaxReturn 14.8031 +Average/MinReturn -81.4763 +Average/NumEpisodes 100 +Average/StdReturn 11.1089 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.570631 +TotalEnvSteps 870000 +__unnamed_task__/AverageDiscountedReturn -16.7836 +__unnamed_task__/AverageReturn -26.6281 +__unnamed_task__/Iteration 86 +__unnamed_task__/MaxReturn 14.8031 +__unnamed_task__/MinReturn -81.4763 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.1089 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.0956 +policy/KL 0.0130083 +policy/KLBefore 0 +policy/LossAfter -0.0519966 +policy/LossBefore -0.0138958 +policy/dLoss 0.0381008 +---------------------------------------- -------------- +2025-04-02 18:23:46 | [rl2_trainer] epoch #87 | Optimizing policy... +2025-04-02 18:23:46 | [rl2_trainer] epoch #87 | Fitting baseline... +2025-04-02 18:23:46 | [rl2_trainer] epoch #87 | Computing loss before +2025-04-02 18:23:47 | [rl2_trainer] epoch #87 | Computing KL before +2025-04-02 18:23:48 | [rl2_trainer] epoch #87 | Optimizing +2025-04-02 18:24:24 | [rl2_trainer] epoch #87 | Computing KL after +2025-04-02 18:24:25 | [rl2_trainer] epoch #87 | Computing loss after +2025-04-02 18:24:25 | [rl2_trainer] epoch #87 | Saving snapshot... +2025-04-02 18:24:25 | [rl2_trainer] epoch #87 | Saved +2025-04-02 18:24:25 | [rl2_trainer] epoch #87 | Time 16346.52 s +2025-04-02 18:24:25 | [rl2_trainer] epoch #87 | EpochTime 180.04 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -16.7449 +Average/AverageReturn -26.1829 +Average/Iteration 87 +Average/MaxReturn -2.5768 +Average/MinReturn -41.7796 +Average/NumEpisodes 100 +Average/StdReturn 6.69212 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.692642 +TotalEnvSteps 880000 +__unnamed_task__/AverageDiscountedReturn -16.7449 +__unnamed_task__/AverageReturn -26.1829 +__unnamed_task__/Iteration 87 +__unnamed_task__/MaxReturn -2.5768 +__unnamed_task__/MinReturn -41.7796 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.69212 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.08517 +policy/KL 0.00698796 +policy/KLBefore 0 +policy/LossAfter -0.0199065 +policy/LossBefore -0.00278168 +policy/dLoss 0.0171249 +---------------------------------------- --------------- +2025-04-02 18:27:19 | [rl2_trainer] epoch #88 | Optimizing policy... +2025-04-02 18:27:19 | [rl2_trainer] epoch #88 | Fitting baseline... +2025-04-02 18:27:19 | [rl2_trainer] epoch #88 | Computing loss before +2025-04-02 18:27:20 | [rl2_trainer] epoch #88 | Computing KL before +2025-04-02 18:27:20 | [rl2_trainer] epoch #88 | Optimizing +2025-04-02 18:27:56 | [rl2_trainer] epoch #88 | Computing KL after +2025-04-02 18:27:57 | [rl2_trainer] epoch #88 | Computing loss after +2025-04-02 18:27:58 | [rl2_trainer] epoch #88 | Saving snapshot... +2025-04-02 18:27:58 | [rl2_trainer] epoch #88 | Saved +2025-04-02 18:27:58 | [rl2_trainer] epoch #88 | Time 16558.69 s +2025-04-02 18:27:58 | [rl2_trainer] epoch #88 | EpochTime 212.16 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -17.7861 +Average/AverageReturn -28.1213 +Average/Iteration 88 +Average/MaxReturn 3.32067 +Average/MinReturn -61.0914 +Average/NumEpisodes 100 +Average/StdReturn 8.58698 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.476159 +TotalEnvSteps 890000 +__unnamed_task__/AverageDiscountedReturn -17.7861 +__unnamed_task__/AverageReturn -28.1213 +__unnamed_task__/Iteration 88 +__unnamed_task__/MaxReturn 3.32067 +__unnamed_task__/MinReturn -61.0914 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.58698 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.08334 +policy/KL 0.0109168 +policy/KLBefore 0 +policy/LossAfter -0.0592105 +policy/LossBefore -0.0261853 +policy/dLoss 0.0330252 +---------------------------------------- -------------- +2025-04-02 18:30:14 | [rl2_trainer] epoch #89 | Optimizing policy... +2025-04-02 18:30:15 | [rl2_trainer] epoch #89 | Fitting baseline... +2025-04-02 18:30:15 | [rl2_trainer] epoch #89 | Computing loss before +2025-04-02 18:30:15 | [rl2_trainer] epoch #89 | Computing KL before +2025-04-02 18:30:16 | [rl2_trainer] epoch #89 | Optimizing +2025-04-02 18:30:52 | [rl2_trainer] epoch #89 | Computing KL after +2025-04-02 18:30:53 | [rl2_trainer] epoch #89 | Computing loss after +2025-04-02 18:30:54 | [rl2_trainer] epoch #89 | Saving snapshot... +2025-04-02 18:30:54 | [rl2_trainer] epoch #89 | Saved +2025-04-02 18:30:54 | [rl2_trainer] epoch #89 | Time 16734.67 s +2025-04-02 18:30:54 | [rl2_trainer] epoch #89 | EpochTime 175.97 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -15.8284 +Average/AverageReturn -24.4036 +Average/Iteration 89 +Average/MaxReturn 15.1521 +Average/MinReturn -40.2524 +Average/NumEpisodes 100 +Average/StdReturn 7.45971 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.568718 +TotalEnvSteps 900000 +__unnamed_task__/AverageDiscountedReturn -15.8284 +__unnamed_task__/AverageReturn -24.4036 +__unnamed_task__/Iteration 89 +__unnamed_task__/MaxReturn 15.1521 +__unnamed_task__/MinReturn -40.2524 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.45971 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.05627 +policy/KL 0.00758687 +policy/KLBefore 0 +policy/LossAfter -0.019978 +policy/LossBefore 0.00158783 +policy/dLoss 0.0215658 +---------------------------------------- --------------- +2025-04-02 18:34:56 | [rl2_trainer] epoch #90 | Optimizing policy... +2025-04-02 18:34:56 | [rl2_trainer] epoch #90 | Fitting baseline... +2025-04-02 18:34:56 | [rl2_trainer] epoch #90 | Computing loss before +2025-04-02 18:34:57 | [rl2_trainer] epoch #90 | Computing KL before +2025-04-02 18:34:58 | [rl2_trainer] epoch #90 | Optimizing +2025-04-02 18:35:32 | [rl2_trainer] epoch #90 | Computing KL after +2025-04-02 18:35:33 | [rl2_trainer] epoch #90 | Computing loss after +2025-04-02 18:35:34 | [rl2_trainer] epoch #90 | Saving snapshot... +2025-04-02 18:35:34 | [rl2_trainer] epoch #90 | Saved +2025-04-02 18:35:34 | [rl2_trainer] epoch #90 | Time 17014.82 s +2025-04-02 18:35:34 | [rl2_trainer] epoch #90 | EpochTime 280.15 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.2011 +Average/AverageReturn -28.946 +Average/Iteration 90 +Average/MaxReturn -10.3595 +Average/MinReturn -45.3235 +Average/NumEpisodes 100 +Average/StdReturn 7.23395 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.544292 +TotalEnvSteps 910000 +__unnamed_task__/AverageDiscountedReturn -18.2011 +__unnamed_task__/AverageReturn -28.946 +__unnamed_task__/Iteration 90 +__unnamed_task__/MaxReturn -10.3595 +__unnamed_task__/MinReturn -45.3235 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.23395 +__unnamed_task__/TerminationRate 0 +policy/Entropy 8.02809 +policy/KL 0.00874927 +policy/KLBefore 0 +policy/LossAfter -0.0112226 +policy/LossBefore 0.00650398 +policy/dLoss 0.0177266 +---------------------------------------- --------------- +2025-04-02 18:38:32 | [rl2_trainer] epoch #91 | Optimizing policy... +2025-04-02 18:38:32 | [rl2_trainer] epoch #91 | Fitting baseline... +2025-04-02 18:38:32 | [rl2_trainer] epoch #91 | Computing loss before +2025-04-02 18:38:33 | [rl2_trainer] epoch #91 | Computing KL before +2025-04-02 18:38:34 | [rl2_trainer] epoch #91 | Optimizing +2025-04-02 18:39:06 | [rl2_trainer] epoch #91 | Computing KL after +2025-04-02 18:39:07 | [rl2_trainer] epoch #91 | Computing loss after +2025-04-02 18:39:08 | [rl2_trainer] epoch #91 | Saving snapshot... +2025-04-02 18:39:08 | [rl2_trainer] epoch #91 | Saved +2025-04-02 18:39:08 | [rl2_trainer] epoch #91 | Time 17228.56 s +2025-04-02 18:39:08 | [rl2_trainer] epoch #91 | EpochTime 213.73 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -17.7224 +Average/AverageReturn -28.0901 +Average/Iteration 91 +Average/MaxReturn -0.738187 +Average/MinReturn -135.546 +Average/NumEpisodes 100 +Average/StdReturn 15.3596 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.588928 +TotalEnvSteps 920000 +__unnamed_task__/AverageDiscountedReturn -17.7224 +__unnamed_task__/AverageReturn -28.0901 +__unnamed_task__/Iteration 91 +__unnamed_task__/MaxReturn -0.738187 +__unnamed_task__/MinReturn -135.546 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 15.3596 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.9865 +policy/KL 0.0113879 +policy/KLBefore 0 +policy/LossAfter -0.0603134 +policy/LossBefore -0.0087794 +policy/dLoss 0.051534 +---------------------------------------- -------------- +2025-04-02 18:41:59 | [rl2_trainer] epoch #92 | Optimizing policy... +2025-04-02 18:41:59 | [rl2_trainer] epoch #92 | Fitting baseline... +2025-04-02 18:41:59 | [rl2_trainer] epoch #92 | Computing loss before +2025-04-02 18:42:00 | [rl2_trainer] epoch #92 | Computing KL before +2025-04-02 18:42:00 | [rl2_trainer] epoch #92 | Optimizing +2025-04-02 18:42:37 | [rl2_trainer] epoch #92 | Computing KL after +2025-04-02 18:42:37 | [rl2_trainer] epoch #92 | Computing loss after +2025-04-02 18:42:38 | [rl2_trainer] epoch #92 | Saving snapshot... +2025-04-02 18:42:38 | [rl2_trainer] epoch #92 | Saved +2025-04-02 18:42:38 | [rl2_trainer] epoch #92 | Time 17439.41 s +2025-04-02 18:42:38 | [rl2_trainer] epoch #92 | EpochTime 210.85 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -16.4711 +Average/AverageReturn -26.0391 +Average/Iteration 92 +Average/MaxReturn -5.23487 +Average/MinReturn -45.6739 +Average/NumEpisodes 100 +Average/StdReturn 6.69394 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.531686 +TotalEnvSteps 930000 +__unnamed_task__/AverageDiscountedReturn -16.4711 +__unnamed_task__/AverageReturn -26.0391 +__unnamed_task__/Iteration 92 +__unnamed_task__/MaxReturn -5.23487 +__unnamed_task__/MinReturn -45.6739 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.69394 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.95034 +policy/KL 0.00704272 +policy/KLBefore 0 +policy/LossAfter -0.00934268 +policy/LossBefore 0.00490316 +policy/dLoss 0.0142458 +---------------------------------------- --------------- +2025-04-02 18:47:11 | [rl2_trainer] epoch #93 | Optimizing policy... +2025-04-02 18:47:11 | [rl2_trainer] epoch #93 | Fitting baseline... +2025-04-02 18:47:11 | [rl2_trainer] epoch #93 | Computing loss before +2025-04-02 18:47:12 | [rl2_trainer] epoch #93 | Computing KL before +2025-04-02 18:47:12 | [rl2_trainer] epoch #93 | Optimizing +2025-04-02 18:47:48 | [rl2_trainer] epoch #93 | Computing KL after +2025-04-02 18:47:49 | [rl2_trainer] epoch #93 | Computing loss after +2025-04-02 18:47:50 | [rl2_trainer] epoch #93 | Saving snapshot... +2025-04-02 18:47:50 | [rl2_trainer] epoch #93 | Saved +2025-04-02 18:47:50 | [rl2_trainer] epoch #93 | Time 17750.83 s +2025-04-02 18:47:50 | [rl2_trainer] epoch #93 | EpochTime 311.42 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -16.0023 +Average/AverageReturn -25.0705 +Average/Iteration 93 +Average/MaxReturn -8.02995 +Average/MinReturn -53.477 +Average/NumEpisodes 100 +Average/StdReturn 7.14242 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.45873 +TotalEnvSteps 940000 +__unnamed_task__/AverageDiscountedReturn -16.0023 +__unnamed_task__/AverageReturn -25.0705 +__unnamed_task__/Iteration 93 +__unnamed_task__/MaxReturn -8.02995 +__unnamed_task__/MinReturn -53.477 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.14242 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.92779 +policy/KL 0.00778215 +policy/KLBefore 0 +policy/LossAfter -0.0191474 +policy/LossBefore -0.0037338 +policy/dLoss 0.0154136 +---------------------------------------- --------------- +2025-04-02 18:49:47 | [rl2_trainer] epoch #94 | Optimizing policy... +2025-04-02 18:49:47 | [rl2_trainer] epoch #94 | Fitting baseline... +2025-04-02 18:49:47 | [rl2_trainer] epoch #94 | Computing loss before +2025-04-02 18:49:48 | [rl2_trainer] epoch #94 | Computing KL before +2025-04-02 18:49:48 | [rl2_trainer] epoch #94 | Optimizing +2025-04-02 18:50:24 | [rl2_trainer] epoch #94 | Computing KL after +2025-04-02 18:50:24 | [rl2_trainer] epoch #94 | Computing loss after +2025-04-02 18:50:25 | [rl2_trainer] epoch #94 | Saving snapshot... +2025-04-02 18:50:25 | [rl2_trainer] epoch #94 | Saved +2025-04-02 18:50:25 | [rl2_trainer] epoch #94 | Time 17906.21 s +2025-04-02 18:50:25 | [rl2_trainer] epoch #94 | EpochTime 155.37 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -16.3959 +Average/AverageReturn -25.9836 +Average/Iteration 94 +Average/MaxReturn -1.44195 +Average/MinReturn -61.1993 +Average/NumEpisodes 100 +Average/StdReturn 8.44535 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.52235 +TotalEnvSteps 950000 +__unnamed_task__/AverageDiscountedReturn -16.3959 +__unnamed_task__/AverageReturn -25.9836 +__unnamed_task__/Iteration 94 +__unnamed_task__/MaxReturn -1.44195 +__unnamed_task__/MinReturn -61.1993 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.44535 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.91885 +policy/KL 0.0110303 +policy/KLBefore 0 +policy/LossAfter -0.0282145 +policy/LossBefore -0.00329757 +policy/dLoss 0.0249169 +---------------------------------------- --------------- +2025-04-02 18:53:22 | [rl2_trainer] epoch #95 | Optimizing policy... +2025-04-02 18:53:22 | [rl2_trainer] epoch #95 | Fitting baseline... +2025-04-02 18:53:22 | [rl2_trainer] epoch #95 | Computing loss before +2025-04-02 18:53:23 | [rl2_trainer] epoch #95 | Computing KL before +2025-04-02 18:53:24 | [rl2_trainer] epoch #95 | Optimizing +2025-04-02 18:53:56 | [rl2_trainer] epoch #95 | Computing KL after +2025-04-02 18:53:57 | [rl2_trainer] epoch #95 | Computing loss after +2025-04-02 18:53:58 | [rl2_trainer] epoch #95 | Saving snapshot... +2025-04-02 18:53:58 | [rl2_trainer] epoch #95 | Saved +2025-04-02 18:53:58 | [rl2_trainer] epoch #95 | Time 18118.76 s +2025-04-02 18:53:58 | [rl2_trainer] epoch #95 | EpochTime 212.55 s +---------------------------------------- ---------------- +Average/AverageDiscountedReturn -16.7036 +Average/AverageReturn -25.9273 +Average/Iteration 95 +Average/MaxReturn -5.95484 +Average/MinReturn -42.4591 +Average/NumEpisodes 100 +Average/StdReturn 6.35964 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.622433 +TotalEnvSteps 960000 +__unnamed_task__/AverageDiscountedReturn -16.7036 +__unnamed_task__/AverageReturn -25.9273 +__unnamed_task__/Iteration 95 +__unnamed_task__/MaxReturn -5.95484 +__unnamed_task__/MinReturn -42.4591 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.35964 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.89886 +policy/KL 0.00590856 +policy/KLBefore 0 +policy/LossAfter -0.011925 +policy/LossBefore -1.75003e-05 +policy/dLoss 0.0119075 +---------------------------------------- ---------------- +2025-04-02 18:56:44 | [rl2_trainer] epoch #96 | Optimizing policy... +2025-04-02 18:56:44 | [rl2_trainer] epoch #96 | Fitting baseline... +2025-04-02 18:56:44 | [rl2_trainer] epoch #96 | Computing loss before +2025-04-02 18:56:45 | [rl2_trainer] epoch #96 | Computing KL before +2025-04-02 18:56:46 | [rl2_trainer] epoch #96 | Optimizing +2025-04-02 18:57:21 | [rl2_trainer] epoch #96 | Computing KL after +2025-04-02 18:57:22 | [rl2_trainer] epoch #96 | Computing loss after +2025-04-02 18:57:23 | [rl2_trainer] epoch #96 | Saving snapshot... +2025-04-02 18:57:23 | [rl2_trainer] epoch #96 | Saved +2025-04-02 18:57:23 | [rl2_trainer] epoch #96 | Time 18323.76 s +2025-04-02 18:57:23 | [rl2_trainer] epoch #96 | EpochTime 205.00 s +---------------------------------------- -------------- +Average/AverageDiscountedReturn -16.5465 +Average/AverageReturn -26.0797 +Average/Iteration 96 +Average/MaxReturn -2.35081 +Average/MinReturn -101.193 +Average/NumEpisodes 100 +Average/StdReturn 11.3375 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.465991 +TotalEnvSteps 970000 +__unnamed_task__/AverageDiscountedReturn -16.5465 +__unnamed_task__/AverageReturn -26.0797 +__unnamed_task__/Iteration 96 +__unnamed_task__/MaxReturn -2.35081 +__unnamed_task__/MinReturn -101.193 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.3375 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.88085 +policy/KL 0.0115735 +policy/KLBefore 0 +policy/LossAfter -0.0539997 +policy/LossBefore -0.017418 +policy/dLoss 0.0365817 +---------------------------------------- -------------- +2025-04-02 18:59:28 | [rl2_trainer] epoch #97 | Optimizing policy... +2025-04-02 18:59:28 | [rl2_trainer] epoch #97 | Fitting baseline... +2025-04-02 18:59:28 | [rl2_trainer] epoch #97 | Computing loss before +2025-04-02 18:59:29 | [rl2_trainer] epoch #97 | Computing KL before +2025-04-02 18:59:29 | [rl2_trainer] epoch #97 | Optimizing +2025-04-02 19:00:06 | [rl2_trainer] epoch #97 | Computing KL after +2025-04-02 19:00:06 | [rl2_trainer] epoch #97 | Computing loss after +2025-04-02 19:00:07 | [rl2_trainer] epoch #97 | Saving snapshot... +2025-04-02 19:00:07 | [rl2_trainer] epoch #97 | Saved +2025-04-02 19:00:07 | [rl2_trainer] epoch #97 | Time 18488.51 s +2025-04-02 19:00:07 | [rl2_trainer] epoch #97 | EpochTime 164.74 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -18.5671 +Average/AverageReturn -29.2539 +Average/Iteration 97 +Average/MaxReturn -17.0735 +Average/MinReturn -59.6581 +Average/NumEpisodes 100 +Average/StdReturn 6.2734 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.48912 +TotalEnvSteps 980000 +__unnamed_task__/AverageDiscountedReturn -18.5671 +__unnamed_task__/AverageReturn -29.2539 +__unnamed_task__/Iteration 97 +__unnamed_task__/MaxReturn -17.0735 +__unnamed_task__/MinReturn -59.6581 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.2734 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.87558 +policy/KL 0.00970136 +policy/KLBefore 0 +policy/LossAfter -0.0143333 +policy/LossBefore 0.00412517 +policy/dLoss 0.0184585 +---------------------------------------- --------------- +2025-04-02 19:04:01 | [rl2_trainer] epoch #98 | Optimizing policy... +2025-04-02 19:04:02 | [rl2_trainer] epoch #98 | Fitting baseline... +2025-04-02 19:04:02 | [rl2_trainer] epoch #98 | Computing loss before +2025-04-02 19:04:02 | [rl2_trainer] epoch #98 | Computing KL before +2025-04-02 19:04:03 | [rl2_trainer] epoch #98 | Optimizing +2025-04-02 19:04:39 | [rl2_trainer] epoch #98 | Computing KL after +2025-04-02 19:04:40 | [rl2_trainer] epoch #98 | Computing loss after +2025-04-02 19:04:41 | [rl2_trainer] epoch #98 | Saving snapshot... +2025-04-02 19:04:41 | [rl2_trainer] epoch #98 | Saved +2025-04-02 19:04:41 | [rl2_trainer] epoch #98 | Time 18761.91 s +2025-04-02 19:04:41 | [rl2_trainer] epoch #98 | EpochTime 273.40 s +---------------------------------------- --------------- +Average/AverageDiscountedReturn -16.7809 +Average/AverageReturn -26.6846 +Average/Iteration 98 +Average/MaxReturn -0.935712 +Average/MinReturn -62.0507 +Average/NumEpisodes 100 +Average/StdReturn 8.76258 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.494755 +TotalEnvSteps 990000 +__unnamed_task__/AverageDiscountedReturn -16.7809 +__unnamed_task__/AverageReturn -26.6846 +__unnamed_task__/Iteration 98 +__unnamed_task__/MaxReturn -0.935712 +__unnamed_task__/MinReturn -62.0507 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.76258 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.84269 +policy/KL 0.0107056 +policy/KLBefore 0 +policy/LossAfter -0.021185 +policy/LossBefore 0.00203768 +policy/dLoss 0.0232226 +---------------------------------------- --------------- +2025-04-02 19:06:40 | [rl2_trainer] epoch #99 | Optimizing policy... +2025-04-02 19:06:41 | [rl2_trainer] epoch #99 | Fitting baseline... +2025-04-02 19:06:41 | [rl2_trainer] epoch #99 | Computing loss before +2025-04-02 19:06:41 | [rl2_trainer] epoch #99 | Computing KL before +2025-04-02 19:06:42 | [rl2_trainer] epoch #99 | Optimizing +2025-04-02 19:07:16 | [rl2_trainer] epoch #99 | Computing KL after +2025-04-02 19:07:17 | [rl2_trainer] epoch #99 | Computing loss after +2025-04-02 19:07:17 | [rl2_trainer] epoch #99 | Saving snapshot... +2025-04-02 19:07:17 | [rl2_trainer] epoch #99 | Saved +2025-04-02 19:07:17 | [rl2_trainer] epoch #99 | Time 18918.52 s +2025-04-02 19:07:17 | [rl2_trainer] epoch #99 | EpochTime 156.61 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -19.3551 +Average/AverageReturn -30.2973 +Average/Iteration 99 +Average/MaxReturn -17.5174 +Average/MinReturn -52.568 +Average/NumEpisodes 100 +Average/StdReturn 6.62782 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.487172 +TotalEnvSteps 1e+06 +__unnamed_task__/AverageDiscountedReturn -19.3551 +__unnamed_task__/AverageReturn -30.2973 +__unnamed_task__/Iteration 99 +__unnamed_task__/MaxReturn -17.5174 +__unnamed_task__/MinReturn -52.568 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.62782 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.81402 +policy/KL 0.00865924 +policy/KLBefore 0 +policy/LossAfter -0.00895551 +policy/LossBefore 0.00135599 +policy/dLoss 0.0103115 +---------------------------------------- ------------ +2025-04-02 19:09:51 | [rl2_trainer] epoch #100 | Optimizing policy... +2025-04-02 19:09:51 | [rl2_trainer] epoch #100 | Fitting baseline... +2025-04-02 19:09:51 | [rl2_trainer] epoch #100 | Computing loss before +2025-04-02 19:09:52 | [rl2_trainer] epoch #100 | Computing KL before +2025-04-02 19:09:52 | [rl2_trainer] epoch #100 | Optimizing +2025-04-02 19:10:25 | [rl2_trainer] epoch #100 | Computing KL after +2025-04-02 19:10:26 | [rl2_trainer] epoch #100 | Computing loss after +2025-04-02 19:10:27 | [rl2_trainer] epoch #100 | Saving snapshot... +2025-04-02 19:10:27 | [rl2_trainer] epoch #100 | Saved +2025-04-02 19:10:27 | [rl2_trainer] epoch #100 | Time 19107.63 s +2025-04-02 19:10:27 | [rl2_trainer] epoch #100 | EpochTime 189.11 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.9117 +Average/AverageReturn -25.239 +Average/Iteration 100 +Average/MaxReturn 1.2102 +Average/MinReturn -75.4078 +Average/NumEpisodes 100 +Average/StdReturn 8.31372 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.446369 +TotalEnvSteps 1.01e+06 +__unnamed_task__/AverageDiscountedReturn -15.9117 +__unnamed_task__/AverageReturn -25.239 +__unnamed_task__/Iteration 100 +__unnamed_task__/MaxReturn 1.2102 +__unnamed_task__/MinReturn -75.4078 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.31372 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.78772 +policy/KL 0.00833455 +policy/KLBefore 0 +policy/LossAfter -0.0528949 +policy/LossBefore -0.0246386 +policy/dLoss 0.0282562 +---------------------------------------- ------------ +2025-04-02 19:12:42 | [rl2_trainer] epoch #101 | Optimizing policy... +2025-04-02 19:12:42 | [rl2_trainer] epoch #101 | Fitting baseline... +2025-04-02 19:12:42 | [rl2_trainer] epoch #101 | Computing loss before +2025-04-02 19:12:43 | [rl2_trainer] epoch #101 | Computing KL before +2025-04-02 19:12:43 | [rl2_trainer] epoch #101 | Optimizing +2025-04-02 19:13:15 | [rl2_trainer] epoch #101 | Computing KL after +2025-04-02 19:13:16 | [rl2_trainer] epoch #101 | Computing loss after +2025-04-02 19:13:17 | [rl2_trainer] epoch #101 | Saving snapshot... +2025-04-02 19:13:17 | [rl2_trainer] epoch #101 | Saved +2025-04-02 19:13:17 | [rl2_trainer] epoch #101 | Time 19277.74 s +2025-04-02 19:13:17 | [rl2_trainer] epoch #101 | EpochTime 170.10 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.8828 +Average/AverageReturn -25.0314 +Average/Iteration 101 +Average/MaxReturn -7.6361 +Average/MinReturn -76.5431 +Average/NumEpisodes 100 +Average/StdReturn 8.61649 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.59824 +TotalEnvSteps 1.02e+06 +__unnamed_task__/AverageDiscountedReturn -15.8828 +__unnamed_task__/AverageReturn -25.0314 +__unnamed_task__/Iteration 101 +__unnamed_task__/MaxReturn -7.6361 +__unnamed_task__/MinReturn -76.5431 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.61649 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.77006 +policy/KL 0.0115254 +policy/KLBefore 0 +policy/LossAfter -0.0202946 +policy/LossBefore -0.000435596 +policy/dLoss 0.019859 +---------------------------------------- ------------- +2025-04-02 19:15:36 | [rl2_trainer] epoch #102 | Optimizing policy... +2025-04-02 19:15:37 | [rl2_trainer] epoch #102 | Fitting baseline... +2025-04-02 19:15:37 | [rl2_trainer] epoch #102 | Computing loss before +2025-04-02 19:15:37 | [rl2_trainer] epoch #102 | Computing KL before +2025-04-02 19:15:38 | [rl2_trainer] epoch #102 | Optimizing +2025-04-02 19:16:12 | [rl2_trainer] epoch #102 | Computing KL after +2025-04-02 19:16:12 | [rl2_trainer] epoch #102 | Computing loss after +2025-04-02 19:16:13 | [rl2_trainer] epoch #102 | Saving snapshot... +2025-04-02 19:16:13 | [rl2_trainer] epoch #102 | Saved +2025-04-02 19:16:13 | [rl2_trainer] epoch #102 | Time 19454.30 s +2025-04-02 19:16:13 | [rl2_trainer] epoch #102 | EpochTime 176.56 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.4502 +Average/AverageReturn -22.5275 +Average/Iteration 102 +Average/MaxReturn -2.56088 +Average/MinReturn -44.2208 +Average/NumEpisodes 100 +Average/StdReturn 5.53913 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.379603 +TotalEnvSteps 1.03e+06 +__unnamed_task__/AverageDiscountedReturn -14.4502 +__unnamed_task__/AverageReturn -22.5275 +__unnamed_task__/Iteration 102 +__unnamed_task__/MaxReturn -2.56088 +__unnamed_task__/MinReturn -44.2208 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.53913 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.7516 +policy/KL 0.00764304 +policy/KLBefore 0 +policy/LossAfter -0.0106112 +policy/LossBefore 0.00055129 +policy/dLoss 0.0111625 +---------------------------------------- ------------ +2025-04-02 19:19:28 | [rl2_trainer] epoch #103 | Optimizing policy... +2025-04-02 19:19:28 | [rl2_trainer] epoch #103 | Fitting baseline... +2025-04-02 19:19:28 | [rl2_trainer] epoch #103 | Computing loss before +2025-04-02 19:19:29 | [rl2_trainer] epoch #103 | Computing KL before +2025-04-02 19:19:29 | [rl2_trainer] epoch #103 | Optimizing +2025-04-02 19:20:06 | [rl2_trainer] epoch #103 | Computing KL after +2025-04-02 19:20:06 | [rl2_trainer] epoch #103 | Computing loss after +2025-04-02 19:20:07 | [rl2_trainer] epoch #103 | Saving snapshot... +2025-04-02 19:20:07 | [rl2_trainer] epoch #103 | Saved +2025-04-02 19:20:07 | [rl2_trainer] epoch #103 | Time 19688.21 s +2025-04-02 19:20:07 | [rl2_trainer] epoch #103 | EpochTime 233.91 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1794 +Average/AverageReturn -23.8484 +Average/Iteration 103 +Average/MaxReturn -1.41208 +Average/MinReturn -66.4928 +Average/NumEpisodes 100 +Average/StdReturn 8.45554 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.461962 +TotalEnvSteps 1.04e+06 +__unnamed_task__/AverageDiscountedReturn -15.1794 +__unnamed_task__/AverageReturn -23.8484 +__unnamed_task__/Iteration 103 +__unnamed_task__/MaxReturn -1.41208 +__unnamed_task__/MinReturn -66.4928 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.45554 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.72153 +policy/KL 0.0129074 +policy/KLBefore 0 +policy/LossAfter -0.0146912 +policy/LossBefore 0.00769071 +policy/dLoss 0.0223819 +---------------------------------------- ------------ +2025-04-02 19:22:22 | [rl2_trainer] epoch #104 | Optimizing policy... +2025-04-02 19:22:22 | [rl2_trainer] epoch #104 | Fitting baseline... +2025-04-02 19:22:22 | [rl2_trainer] epoch #104 | Computing loss before +2025-04-02 19:22:23 | [rl2_trainer] epoch #104 | Computing KL before +2025-04-02 19:22:24 | [rl2_trainer] epoch #104 | Optimizing +2025-04-02 19:23:00 | [rl2_trainer] epoch #104 | Computing KL after +2025-04-02 19:23:01 | [rl2_trainer] epoch #104 | Computing loss after +2025-04-02 19:23:02 | [rl2_trainer] epoch #104 | Saving snapshot... +2025-04-02 19:23:02 | [rl2_trainer] epoch #104 | Saved +2025-04-02 19:23:02 | [rl2_trainer] epoch #104 | Time 19862.58 s +2025-04-02 19:23:02 | [rl2_trainer] epoch #104 | EpochTime 174.37 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.3179 +Average/AverageReturn -26.9673 +Average/Iteration 104 +Average/MaxReturn -9.94184 +Average/MinReturn -47.7876 +Average/NumEpisodes 100 +Average/StdReturn 7.03472 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.816095 +TotalEnvSteps 1.05e+06 +__unnamed_task__/AverageDiscountedReturn -17.3179 +__unnamed_task__/AverageReturn -26.9673 +__unnamed_task__/Iteration 104 +__unnamed_task__/MaxReturn -9.94184 +__unnamed_task__/MinReturn -47.7876 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.03472 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.68506 +policy/KL 0.00662874 +policy/KLBefore 0 +policy/LossAfter 0.00344585 +policy/LossBefore 0.00511384 +policy/dLoss 0.00166799 +---------------------------------------- ------------ +2025-04-02 19:25:09 | [rl2_trainer] epoch #105 | Optimizing policy... +2025-04-02 19:25:09 | [rl2_trainer] epoch #105 | Fitting baseline... +2025-04-02 19:25:09 | [rl2_trainer] epoch #105 | Computing loss before +2025-04-02 19:25:10 | [rl2_trainer] epoch #105 | Computing KL before +2025-04-02 19:25:10 | [rl2_trainer] epoch #105 | Optimizing +2025-04-02 19:25:47 | [rl2_trainer] epoch #105 | Computing KL after +2025-04-02 19:25:48 | [rl2_trainer] epoch #105 | Computing loss after +2025-04-02 19:25:49 | [rl2_trainer] epoch #105 | Saving snapshot... +2025-04-02 19:25:49 | [rl2_trainer] epoch #105 | Saved +2025-04-02 19:25:49 | [rl2_trainer] epoch #105 | Time 20030.01 s +2025-04-02 19:25:49 | [rl2_trainer] epoch #105 | EpochTime 167.42 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.1685 +Average/AverageReturn -28.7531 +Average/Iteration 105 +Average/MaxReturn -12.133 +Average/MinReturn -77.4584 +Average/NumEpisodes 100 +Average/StdReturn 9.12057 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.704925 +TotalEnvSteps 1.06e+06 +__unnamed_task__/AverageDiscountedReturn -18.1685 +__unnamed_task__/AverageReturn -28.7531 +__unnamed_task__/Iteration 105 +__unnamed_task__/MaxReturn -12.133 +__unnamed_task__/MinReturn -77.4584 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.12057 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.65912 +policy/KL 0.0177004 +policy/KLBefore 0 +policy/LossAfter -0.0265444 +policy/LossBefore -0.00720857 +policy/dLoss 0.0193358 +---------------------------------------- ------------ +2025-04-02 19:27:33 | [rl2_trainer] epoch #106 | Optimizing policy... +2025-04-02 19:27:34 | [rl2_trainer] epoch #106 | Fitting baseline... +2025-04-02 19:27:34 | [rl2_trainer] epoch #106 | Computing loss before +2025-04-02 19:27:34 | [rl2_trainer] epoch #106 | Computing KL before +2025-04-02 19:27:35 | [rl2_trainer] epoch #106 | Optimizing +2025-04-02 19:28:12 | [rl2_trainer] epoch #106 | Computing KL after +2025-04-02 19:28:13 | [rl2_trainer] epoch #106 | Computing loss after +2025-04-02 19:28:14 | [rl2_trainer] epoch #106 | Saving snapshot... +2025-04-02 19:28:14 | [rl2_trainer] epoch #106 | Saved +2025-04-02 19:28:14 | [rl2_trainer] epoch #106 | Time 20174.58 s +2025-04-02 19:28:14 | [rl2_trainer] epoch #106 | EpochTime 144.57 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -17.5848 +Average/AverageReturn -27.4683 +Average/Iteration 106 +Average/MaxReturn -9.68824 +Average/MinReturn -52.9663 +Average/NumEpisodes 100 +Average/StdReturn 7.23235 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.670774 +TotalEnvSteps 1.07e+06 +__unnamed_task__/AverageDiscountedReturn -17.5848 +__unnamed_task__/AverageReturn -27.4683 +__unnamed_task__/Iteration 106 +__unnamed_task__/MaxReturn -9.68824 +__unnamed_task__/MinReturn -52.9663 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.23235 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.63918 +policy/KL 0.010416 +policy/KLBefore 0 +policy/LossAfter -0.0170018 +policy/LossBefore 0.0048689 +policy/dLoss 0.0218707 +---------------------------------------- ----------- +2025-04-02 19:30:22 | [rl2_trainer] epoch #107 | Optimizing policy... +2025-04-02 19:30:22 | [rl2_trainer] epoch #107 | Fitting baseline... +2025-04-02 19:30:22 | [rl2_trainer] epoch #107 | Computing loss before +2025-04-02 19:30:23 | [rl2_trainer] epoch #107 | Computing KL before +2025-04-02 19:30:23 | [rl2_trainer] epoch #107 | Optimizing +2025-04-02 19:30:57 | [rl2_trainer] epoch #107 | Computing KL after +2025-04-02 19:30:57 | [rl2_trainer] epoch #107 | Computing loss after +2025-04-02 19:30:58 | [rl2_trainer] epoch #107 | Saving snapshot... +2025-04-02 19:30:58 | [rl2_trainer] epoch #107 | Saved +2025-04-02 19:30:58 | [rl2_trainer] epoch #107 | Time 20339.43 s +2025-04-02 19:30:58 | [rl2_trainer] epoch #107 | EpochTime 164.85 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.0578 +Average/AverageReturn -26.7479 +Average/Iteration 107 +Average/MaxReturn -10.8552 +Average/MinReturn -52.4794 +Average/NumEpisodes 100 +Average/StdReturn 7.59418 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.790616 +TotalEnvSteps 1.08e+06 +__unnamed_task__/AverageDiscountedReturn -17.0578 +__unnamed_task__/AverageReturn -26.7479 +__unnamed_task__/Iteration 107 +__unnamed_task__/MaxReturn -10.8552 +__unnamed_task__/MinReturn -52.4794 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.59418 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.61573 +policy/KL 0.0101625 +policy/KLBefore 0 +policy/LossAfter -0.0109861 +policy/LossBefore -0.00295264 +policy/dLoss 0.00803344 +---------------------------------------- ------------ +2025-04-02 19:34:32 | [rl2_trainer] epoch #108 | Optimizing policy... +2025-04-02 19:34:32 | [rl2_trainer] epoch #108 | Fitting baseline... +2025-04-02 19:34:32 | [rl2_trainer] epoch #108 | Computing loss before +2025-04-02 19:34:33 | [rl2_trainer] epoch #108 | Computing KL before +2025-04-02 19:34:34 | [rl2_trainer] epoch #108 | Optimizing +2025-04-02 19:35:10 | [rl2_trainer] epoch #108 | Computing KL after +2025-04-02 19:35:10 | [rl2_trainer] epoch #108 | Computing loss after +2025-04-02 19:35:11 | [rl2_trainer] epoch #108 | Saving snapshot... +2025-04-02 19:35:11 | [rl2_trainer] epoch #108 | Saved +2025-04-02 19:35:11 | [rl2_trainer] epoch #108 | Time 20592.39 s +2025-04-02 19:35:11 | [rl2_trainer] epoch #108 | EpochTime 252.96 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -18.5309 +Average/AverageReturn -29.0445 +Average/Iteration 108 +Average/MaxReturn -3.43923 +Average/MinReturn -67.3349 +Average/NumEpisodes 100 +Average/StdReturn 8.04534 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.440406 +TotalEnvSteps 1.09e+06 +__unnamed_task__/AverageDiscountedReturn -18.5309 +__unnamed_task__/AverageReturn -29.0445 +__unnamed_task__/Iteration 108 +__unnamed_task__/MaxReturn -3.43923 +__unnamed_task__/MinReturn -67.3349 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.04534 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.57806 +policy/KL 0.0114923 +policy/KLBefore 0 +policy/LossAfter -0.0287019 +policy/LossBefore -0.0109479 +policy/dLoss 0.0177541 +---------------------------------------- ----------- +2025-04-02 19:37:44 | [rl2_trainer] epoch #109 | Optimizing policy... +2025-04-02 19:37:45 | [rl2_trainer] epoch #109 | Fitting baseline... +2025-04-02 19:37:45 | [rl2_trainer] epoch #109 | Computing loss before +2025-04-02 19:37:45 | [rl2_trainer] epoch #109 | Computing KL before +2025-04-02 19:37:46 | [rl2_trainer] epoch #109 | Optimizing +2025-04-02 19:38:22 | [rl2_trainer] epoch #109 | Computing KL after +2025-04-02 19:38:23 | [rl2_trainer] epoch #109 | Computing loss after +2025-04-02 19:38:24 | [rl2_trainer] epoch #109 | Saving snapshot... +2025-04-02 19:38:24 | [rl2_trainer] epoch #109 | Saved +2025-04-02 19:38:24 | [rl2_trainer] epoch #109 | Time 20785.03 s +2025-04-02 19:38:24 | [rl2_trainer] epoch #109 | EpochTime 192.63 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.0181 +Average/AverageReturn -24.9143 +Average/Iteration 109 +Average/MaxReturn -7.77257 +Average/MinReturn -107.374 +Average/NumEpisodes 100 +Average/StdReturn 10.836 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.445741 +TotalEnvSteps 1.1e+06 +__unnamed_task__/AverageDiscountedReturn -16.0181 +__unnamed_task__/AverageReturn -24.9143 +__unnamed_task__/Iteration 109 +__unnamed_task__/MaxReturn -7.77257 +__unnamed_task__/MinReturn -107.374 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.836 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.55098 +policy/KL 0.0110781 +policy/KLBefore 0 +policy/LossAfter -0.0688803 +policy/LossBefore -0.019239 +policy/dLoss 0.0496413 +---------------------------------------- ------------ +2025-04-02 19:40:34 | [rl2_trainer] epoch #110 | Optimizing policy... +2025-04-02 19:40:34 | [rl2_trainer] epoch #110 | Fitting baseline... +2025-04-02 19:40:34 | [rl2_trainer] epoch #110 | Computing loss before +2025-04-02 19:40:35 | [rl2_trainer] epoch #110 | Computing KL before +2025-04-02 19:40:35 | [rl2_trainer] epoch #110 | Optimizing +2025-04-02 19:41:11 | [rl2_trainer] epoch #110 | Computing KL after +2025-04-02 19:41:11 | [rl2_trainer] epoch #110 | Computing loss after +2025-04-02 19:41:12 | [rl2_trainer] epoch #110 | Saving snapshot... +2025-04-02 19:41:12 | [rl2_trainer] epoch #110 | Saved +2025-04-02 19:41:12 | [rl2_trainer] epoch #110 | Time 20953.24 s +2025-04-02 19:41:12 | [rl2_trainer] epoch #110 | EpochTime 168.21 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.9499 +Average/AverageReturn -26.5943 +Average/Iteration 110 +Average/MaxReturn -6.60843 +Average/MinReturn -50.3904 +Average/NumEpisodes 100 +Average/StdReturn 8.34299 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.813711 +TotalEnvSteps 1.11e+06 +__unnamed_task__/AverageDiscountedReturn -16.9499 +__unnamed_task__/AverageReturn -26.5943 +__unnamed_task__/Iteration 110 +__unnamed_task__/MaxReturn -6.60843 +__unnamed_task__/MinReturn -50.3904 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.34299 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.53166 +policy/KL 0.00901029 +policy/KLBefore 0 +policy/LossAfter -0.018526 +policy/LossBefore -0.00454785 +policy/dLoss 0.0139781 +---------------------------------------- ------------ +2025-04-02 19:43:19 | [rl2_trainer] epoch #111 | Optimizing policy... +2025-04-02 19:43:20 | [rl2_trainer] epoch #111 | Fitting baseline... +2025-04-02 19:43:20 | [rl2_trainer] epoch #111 | Computing loss before +2025-04-02 19:43:20 | [rl2_trainer] epoch #111 | Computing KL before +2025-04-02 19:43:21 | [rl2_trainer] epoch #111 | Optimizing +2025-04-02 19:43:55 | [rl2_trainer] epoch #111 | Computing KL after +2025-04-02 19:43:56 | [rl2_trainer] epoch #111 | Computing loss after +2025-04-02 19:43:57 | [rl2_trainer] epoch #111 | Saving snapshot... +2025-04-02 19:43:57 | [rl2_trainer] epoch #111 | Saved +2025-04-02 19:43:57 | [rl2_trainer] epoch #111 | Time 21117.95 s +2025-04-02 19:43:57 | [rl2_trainer] epoch #111 | EpochTime 164.72 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.5929 +Average/AverageReturn -26.0825 +Average/Iteration 111 +Average/MaxReturn 2.93668 +Average/MinReturn -55.3788 +Average/NumEpisodes 100 +Average/StdReturn 8.15172 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.805461 +TotalEnvSteps 1.12e+06 +__unnamed_task__/AverageDiscountedReturn -16.5929 +__unnamed_task__/AverageReturn -26.0825 +__unnamed_task__/Iteration 111 +__unnamed_task__/MaxReturn 2.93668 +__unnamed_task__/MinReturn -55.3788 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.15172 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.5116 +policy/KL 0.00853559 +policy/KLBefore 0 +policy/LossAfter -0.0128445 +policy/LossBefore -0.0024843 +policy/dLoss 0.0103602 +---------------------------------------- ------------ +2025-04-02 19:46:50 | [rl2_trainer] epoch #112 | Optimizing policy... +2025-04-02 19:46:51 | [rl2_trainer] epoch #112 | Fitting baseline... +2025-04-02 19:46:51 | [rl2_trainer] epoch #112 | Computing loss before +2025-04-02 19:46:51 | [rl2_trainer] epoch #112 | Computing KL before +2025-04-02 19:46:52 | [rl2_trainer] epoch #112 | Optimizing +2025-04-02 19:47:28 | [rl2_trainer] epoch #112 | Computing KL after +2025-04-02 19:47:28 | [rl2_trainer] epoch #112 | Computing loss after +2025-04-02 19:47:29 | [rl2_trainer] epoch #112 | Saving snapshot... +2025-04-02 19:47:29 | [rl2_trainer] epoch #112 | Saved +2025-04-02 19:47:29 | [rl2_trainer] epoch #112 | Time 21330.22 s +2025-04-02 19:47:29 | [rl2_trainer] epoch #112 | EpochTime 212.26 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.0137 +Average/AverageReturn -25.0446 +Average/Iteration 112 +Average/MaxReturn 5.9522 +Average/MinReturn -37.836 +Average/NumEpisodes 100 +Average/StdReturn 6.52705 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.636823 +TotalEnvSteps 1.13e+06 +__unnamed_task__/AverageDiscountedReturn -16.0137 +__unnamed_task__/AverageReturn -25.0446 +__unnamed_task__/Iteration 112 +__unnamed_task__/MaxReturn 5.9522 +__unnamed_task__/MinReturn -37.836 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.52705 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.48407 +policy/KL 0.00759347 +policy/KLBefore 0 +policy/LossAfter -0.0160036 +policy/LossBefore -0.00412343 +policy/dLoss 0.0118801 +---------------------------------------- ------------ +2025-04-02 19:50:37 | [rl2_trainer] epoch #113 | Optimizing policy... +2025-04-02 19:50:37 | [rl2_trainer] epoch #113 | Fitting baseline... +2025-04-02 19:50:37 | [rl2_trainer] epoch #113 | Computing loss before +2025-04-02 19:50:37 | [rl2_trainer] epoch #113 | Computing KL before +2025-04-02 19:50:38 | [rl2_trainer] epoch #113 | Optimizing +2025-04-02 19:51:14 | [rl2_trainer] epoch #113 | Computing KL after +2025-04-02 19:51:14 | [rl2_trainer] epoch #113 | Computing loss after +2025-04-02 19:51:15 | [rl2_trainer] epoch #113 | Saving snapshot... +2025-04-02 19:51:15 | [rl2_trainer] epoch #113 | Saved +2025-04-02 19:51:15 | [rl2_trainer] epoch #113 | Time 21556.44 s +2025-04-02 19:51:15 | [rl2_trainer] epoch #113 | EpochTime 226.22 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.5425 +Average/AverageReturn -27.8808 +Average/Iteration 113 +Average/MaxReturn -10.4593 +Average/MinReturn -62.6878 +Average/NumEpisodes 100 +Average/StdReturn 7.1513 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.597796 +TotalEnvSteps 1.14e+06 +__unnamed_task__/AverageDiscountedReturn -17.5425 +__unnamed_task__/AverageReturn -27.8808 +__unnamed_task__/Iteration 113 +__unnamed_task__/MaxReturn -10.4593 +__unnamed_task__/MinReturn -62.6878 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.1513 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.4571 +policy/KL 0.0130838 +policy/KLBefore 0 +policy/LossAfter -0.016799 +policy/LossBefore -0.00392556 +policy/dLoss 0.0128734 +---------------------------------------- ------------ +2025-04-02 19:53:42 | [rl2_trainer] epoch #114 | Optimizing policy... +2025-04-02 19:53:42 | [rl2_trainer] epoch #114 | Fitting baseline... +2025-04-02 19:53:42 | [rl2_trainer] epoch #114 | Computing loss before +2025-04-02 19:53:43 | [rl2_trainer] epoch #114 | Computing KL before +2025-04-02 19:53:44 | [rl2_trainer] epoch #114 | Optimizing +2025-04-02 19:54:21 | [rl2_trainer] epoch #114 | Computing KL after +2025-04-02 19:54:21 | [rl2_trainer] epoch #114 | Computing loss after +2025-04-02 19:54:22 | [rl2_trainer] epoch #114 | Saving snapshot... +2025-04-02 19:54:22 | [rl2_trainer] epoch #114 | Saved +2025-04-02 19:54:22 | [rl2_trainer] epoch #114 | Time 21743.21 s +2025-04-02 19:54:22 | [rl2_trainer] epoch #114 | EpochTime 186.76 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.7728 +Average/AverageReturn -21.6416 +Average/Iteration 114 +Average/MaxReturn -4.65397 +Average/MinReturn -39.3355 +Average/NumEpisodes 100 +Average/StdReturn 4.70143 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.363901 +TotalEnvSteps 1.15e+06 +__unnamed_task__/AverageDiscountedReturn -13.7728 +__unnamed_task__/AverageReturn -21.6416 +__unnamed_task__/Iteration 114 +__unnamed_task__/MaxReturn -4.65397 +__unnamed_task__/MinReturn -39.3355 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.70143 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.42203 +policy/KL 0.0104305 +policy/KLBefore 0 +policy/LossAfter -0.00914409 +policy/LossBefore -0.00030774 +policy/dLoss 0.00883636 +---------------------------------------- ------------ +2025-04-02 19:56:23 | [rl2_trainer] epoch #115 | Optimizing policy... +2025-04-02 19:56:23 | [rl2_trainer] epoch #115 | Fitting baseline... +2025-04-02 19:56:23 | [rl2_trainer] epoch #115 | Computing loss before +2025-04-02 19:56:24 | [rl2_trainer] epoch #115 | Computing KL before +2025-04-02 19:56:24 | [rl2_trainer] epoch #115 | Optimizing +2025-04-02 19:57:01 | [rl2_trainer] epoch #115 | Computing KL after +2025-04-02 19:57:01 | [rl2_trainer] epoch #115 | Computing loss after +2025-04-02 19:57:02 | [rl2_trainer] epoch #115 | Saving snapshot... +2025-04-02 19:57:02 | [rl2_trainer] epoch #115 | Saved +2025-04-02 19:57:02 | [rl2_trainer] epoch #115 | Time 21903.12 s +2025-04-02 19:57:02 | [rl2_trainer] epoch #115 | EpochTime 159.91 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.2947 +Average/AverageReturn -28.4012 +Average/Iteration 115 +Average/MaxReturn -10.6628 +Average/MinReturn -40.9774 +Average/NumEpisodes 100 +Average/StdReturn 6.25304 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.616678 +TotalEnvSteps 1.16e+06 +__unnamed_task__/AverageDiscountedReturn -18.2947 +__unnamed_task__/AverageReturn -28.4012 +__unnamed_task__/Iteration 115 +__unnamed_task__/MaxReturn -10.6628 +__unnamed_task__/MinReturn -40.9774 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.25304 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.40337 +policy/KL 0.0100329 +policy/KLBefore 0 +policy/LossAfter -0.0123566 +policy/LossBefore 0.00131714 +policy/dLoss 0.0136737 +---------------------------------------- ------------ +2025-04-02 20:00:05 | [rl2_trainer] epoch #116 | Optimizing policy... +2025-04-02 20:00:06 | [rl2_trainer] epoch #116 | Fitting baseline... +2025-04-02 20:00:06 | [rl2_trainer] epoch #116 | Computing loss before +2025-04-02 20:00:06 | [rl2_trainer] epoch #116 | Computing KL before +2025-04-02 20:00:07 | [rl2_trainer] epoch #116 | Optimizing +2025-04-02 20:00:39 | [rl2_trainer] epoch #116 | Computing KL after +2025-04-02 20:00:40 | [rl2_trainer] epoch #116 | Computing loss after +2025-04-02 20:00:41 | [rl2_trainer] epoch #116 | Saving snapshot... +2025-04-02 20:00:41 | [rl2_trainer] epoch #116 | Saved +2025-04-02 20:00:41 | [rl2_trainer] epoch #116 | Time 22121.75 s +2025-04-02 20:00:41 | [rl2_trainer] epoch #116 | EpochTime 218.62 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1932 +Average/AverageReturn -23.6002 +Average/Iteration 116 +Average/MaxReturn 15.0004 +Average/MinReturn -42.8137 +Average/NumEpisodes 100 +Average/StdReturn 6.52258 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.473746 +TotalEnvSteps 1.17e+06 +__unnamed_task__/AverageDiscountedReturn -15.1932 +__unnamed_task__/AverageReturn -23.6002 +__unnamed_task__/Iteration 116 +__unnamed_task__/MaxReturn 15.0004 +__unnamed_task__/MinReturn -42.8137 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.52258 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.38077 +policy/KL 0.00808259 +policy/KLBefore 0 +policy/LossAfter -0.00581318 +policy/LossBefore 0.00600898 +policy/dLoss 0.0118222 +---------------------------------------- ------------ +2025-04-02 20:04:31 | [rl2_trainer] epoch #117 | Optimizing policy... +2025-04-02 20:04:32 | [rl2_trainer] epoch #117 | Fitting baseline... +2025-04-02 20:04:32 | [rl2_trainer] epoch #117 | Computing loss before +2025-04-02 20:04:32 | [rl2_trainer] epoch #117 | Computing KL before +2025-04-02 20:04:33 | [rl2_trainer] epoch #117 | Optimizing +2025-04-02 20:05:09 | [rl2_trainer] epoch #117 | Computing KL after +2025-04-02 20:05:10 | [rl2_trainer] epoch #117 | Computing loss after +2025-04-02 20:05:11 | [rl2_trainer] epoch #117 | Saving snapshot... +2025-04-02 20:05:11 | [rl2_trainer] epoch #117 | Saved +2025-04-02 20:05:11 | [rl2_trainer] epoch #117 | Time 22391.80 s +2025-04-02 20:05:11 | [rl2_trainer] epoch #117 | EpochTime 270.05 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.71 +Average/AverageReturn -26.2288 +Average/Iteration 117 +Average/MaxReturn 17.2115 +Average/MinReturn -62.3998 +Average/NumEpisodes 100 +Average/StdReturn 10.0464 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.308918 +TotalEnvSteps 1.18e+06 +__unnamed_task__/AverageDiscountedReturn -16.71 +__unnamed_task__/AverageReturn -26.2288 +__unnamed_task__/Iteration 117 +__unnamed_task__/MaxReturn 17.2115 +__unnamed_task__/MinReturn -62.3998 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0464 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.36192 +policy/KL 0.0118832 +policy/KLBefore 0 +policy/LossAfter -0.0423513 +policy/LossBefore 0.00074169 +policy/dLoss 0.043093 +---------------------------------------- ------------ +2025-04-02 20:08:11 | [rl2_trainer] epoch #118 | Optimizing policy... +2025-04-02 20:08:11 | [rl2_trainer] epoch #118 | Fitting baseline... +2025-04-02 20:08:11 | [rl2_trainer] epoch #118 | Computing loss before +2025-04-02 20:08:12 | [rl2_trainer] epoch #118 | Computing KL before +2025-04-02 20:08:12 | [rl2_trainer] epoch #118 | Optimizing +2025-04-02 20:08:48 | [rl2_trainer] epoch #118 | Computing KL after +2025-04-02 20:08:49 | [rl2_trainer] epoch #118 | Computing loss after +2025-04-02 20:08:50 | [rl2_trainer] epoch #118 | Saving snapshot... +2025-04-02 20:08:50 | [rl2_trainer] epoch #118 | Saved +2025-04-02 20:08:50 | [rl2_trainer] epoch #118 | Time 22610.85 s +2025-04-02 20:08:50 | [rl2_trainer] epoch #118 | EpochTime 219.05 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7698 +Average/AverageReturn -24.7468 +Average/Iteration 118 +Average/MaxReturn 3.53947 +Average/MinReturn -56.814 +Average/NumEpisodes 100 +Average/StdReturn 8.39611 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance -0.557819 +TotalEnvSteps 1.19e+06 +__unnamed_task__/AverageDiscountedReturn -15.7698 +__unnamed_task__/AverageReturn -24.7468 +__unnamed_task__/Iteration 118 +__unnamed_task__/MaxReturn 3.53947 +__unnamed_task__/MinReturn -56.814 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.39611 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.33507 +policy/KL 0.0145354 +policy/KLBefore 0 +policy/LossAfter -0.0647009 +policy/LossBefore -0.000766303 +policy/dLoss 0.0639346 +---------------------------------------- ------------- +2025-04-02 20:11:09 | [rl2_trainer] epoch #119 | Optimizing policy... +2025-04-02 20:11:10 | [rl2_trainer] epoch #119 | Fitting baseline... +2025-04-02 20:11:10 | [rl2_trainer] epoch #119 | Computing loss before +2025-04-02 20:11:10 | [rl2_trainer] epoch #119 | Computing KL before +2025-04-02 20:11:11 | [rl2_trainer] epoch #119 | Optimizing +2025-04-02 20:11:46 | [rl2_trainer] epoch #119 | Computing KL after +2025-04-02 20:11:46 | [rl2_trainer] epoch #119 | Computing loss after +2025-04-02 20:11:47 | [rl2_trainer] epoch #119 | Saving snapshot... +2025-04-02 20:11:47 | [rl2_trainer] epoch #119 | Saved +2025-04-02 20:11:47 | [rl2_trainer] epoch #119 | Time 22788.11 s +2025-04-02 20:11:47 | [rl2_trainer] epoch #119 | EpochTime 177.25 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.5433 +Average/AverageReturn -25.7551 +Average/Iteration 119 +Average/MaxReturn -4.60226 +Average/MinReturn -38.9226 +Average/NumEpisodes 100 +Average/StdReturn 5.46197 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.653934 +TotalEnvSteps 1.2e+06 +__unnamed_task__/AverageDiscountedReturn -16.5433 +__unnamed_task__/AverageReturn -25.7551 +__unnamed_task__/Iteration 119 +__unnamed_task__/MaxReturn -4.60226 +__unnamed_task__/MinReturn -38.9226 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.46197 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.30796 +policy/KL 0.0138711 +policy/KLBefore 0 +policy/LossAfter -0.00914424 +policy/LossBefore -0.00307605 +policy/dLoss 0.00606819 +---------------------------------------- ------------ +2025-04-02 20:14:41 | [rl2_trainer] epoch #120 | Optimizing policy... +2025-04-02 20:14:41 | [rl2_trainer] epoch #120 | Fitting baseline... +2025-04-02 20:14:41 | [rl2_trainer] epoch #120 | Computing loss before +2025-04-02 20:14:41 | [rl2_trainer] epoch #120 | Computing KL before +2025-04-02 20:14:42 | [rl2_trainer] epoch #120 | Optimizing +2025-04-02 20:15:14 | [rl2_trainer] epoch #120 | Computing KL after +2025-04-02 20:15:15 | [rl2_trainer] epoch #120 | Computing loss after +2025-04-02 20:15:16 | [rl2_trainer] epoch #120 | Saving snapshot... +2025-04-02 20:15:16 | [rl2_trainer] epoch #120 | Saved +2025-04-02 20:15:16 | [rl2_trainer] epoch #120 | Time 22997.01 s +2025-04-02 20:15:16 | [rl2_trainer] epoch #120 | EpochTime 208.90 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.3446 +Average/AverageReturn -25.7474 +Average/Iteration 120 +Average/MaxReturn 1.10075 +Average/MinReturn -55.2088 +Average/NumEpisodes 100 +Average/StdReturn 8.40131 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.0687835 +TotalEnvSteps 1.21e+06 +__unnamed_task__/AverageDiscountedReturn -16.3446 +__unnamed_task__/AverageReturn -25.7474 +__unnamed_task__/Iteration 120 +__unnamed_task__/MaxReturn 1.10075 +__unnamed_task__/MinReturn -55.2088 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.40131 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.28162 +policy/KL 0.0145202 +policy/KLBefore 0 +policy/LossAfter -0.0503328 +policy/LossBefore 0.00494951 +policy/dLoss 0.0552823 +---------------------------------------- ------------ +2025-04-02 20:18:29 | [rl2_trainer] epoch #121 | Optimizing policy... +2025-04-02 20:18:30 | [rl2_trainer] epoch #121 | Fitting baseline... +2025-04-02 20:18:30 | [rl2_trainer] epoch #121 | Computing loss before +2025-04-02 20:18:30 | [rl2_trainer] epoch #121 | Computing KL before +2025-04-02 20:18:31 | [rl2_trainer] epoch #121 | Optimizing +2025-04-02 20:19:08 | [rl2_trainer] epoch #121 | Computing KL after +2025-04-02 20:19:09 | [rl2_trainer] epoch #121 | Computing loss after +2025-04-02 20:19:10 | [rl2_trainer] epoch #121 | Saving snapshot... +2025-04-02 20:19:10 | [rl2_trainer] epoch #121 | Saved +2025-04-02 20:19:10 | [rl2_trainer] epoch #121 | Time 23230.74 s +2025-04-02 20:19:10 | [rl2_trainer] epoch #121 | EpochTime 233.73 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.193 +Average/AverageReturn -29.0349 +Average/Iteration 121 +Average/MaxReturn -15.0442 +Average/MinReturn -73.821 +Average/NumEpisodes 100 +Average/StdReturn 10.0368 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.512339 +TotalEnvSteps 1.22e+06 +__unnamed_task__/AverageDiscountedReturn -18.193 +__unnamed_task__/AverageReturn -29.0349 +__unnamed_task__/Iteration 121 +__unnamed_task__/MaxReturn -15.0442 +__unnamed_task__/MinReturn -73.821 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0368 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.26553 +policy/KL 0.0116465 +policy/KLBefore 0 +policy/LossAfter -0.0334585 +policy/LossBefore 0.00108235 +policy/dLoss 0.0345409 +---------------------------------------- ------------ +2025-04-02 20:20:36 | [rl2_trainer] epoch #122 | Optimizing policy... +2025-04-02 20:20:37 | [rl2_trainer] epoch #122 | Fitting baseline... +2025-04-02 20:20:37 | [rl2_trainer] epoch #122 | Computing loss before +2025-04-02 20:20:37 | [rl2_trainer] epoch #122 | Computing KL before +2025-04-02 20:20:38 | [rl2_trainer] epoch #122 | Optimizing +2025-04-02 20:21:13 | [rl2_trainer] epoch #122 | Computing KL after +2025-04-02 20:21:14 | [rl2_trainer] epoch #122 | Computing loss after +2025-04-02 20:21:15 | [rl2_trainer] epoch #122 | Saving snapshot... +2025-04-02 20:21:15 | [rl2_trainer] epoch #122 | Saved +2025-04-02 20:21:15 | [rl2_trainer] epoch #122 | Time 23356.10 s +2025-04-02 20:21:15 | [rl2_trainer] epoch #122 | EpochTime 125.35 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3588 +Average/AverageReturn -23.7301 +Average/Iteration 122 +Average/MaxReturn 2.23713 +Average/MinReturn -38.1226 +Average/NumEpisodes 100 +Average/StdReturn 6.37273 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.699989 +TotalEnvSteps 1.23e+06 +__unnamed_task__/AverageDiscountedReturn -15.3588 +__unnamed_task__/AverageReturn -23.7301 +__unnamed_task__/Iteration 122 +__unnamed_task__/MaxReturn 2.23713 +__unnamed_task__/MinReturn -38.1226 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.37273 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.26374 +policy/KL 0.00963202 +policy/KLBefore 0 +policy/LossAfter -0.0270947 +policy/LossBefore -0.00945664 +policy/dLoss 0.0176381 +---------------------------------------- ------------ +2025-04-02 20:22:44 | [rl2_trainer] epoch #123 | Optimizing policy... +2025-04-02 20:22:44 | [rl2_trainer] epoch #123 | Fitting baseline... +2025-04-02 20:22:44 | [rl2_trainer] epoch #123 | Computing loss before +2025-04-02 20:22:45 | [rl2_trainer] epoch #123 | Computing KL before +2025-04-02 20:22:46 | [rl2_trainer] epoch #123 | Optimizing +2025-04-02 20:23:21 | [rl2_trainer] epoch #123 | Computing KL after +2025-04-02 20:23:21 | [rl2_trainer] epoch #123 | Computing loss after +2025-04-02 20:23:22 | [rl2_trainer] epoch #123 | Saving snapshot... +2025-04-02 20:23:22 | [rl2_trainer] epoch #123 | Saved +2025-04-02 20:23:22 | [rl2_trainer] epoch #123 | Time 23483.08 s +2025-04-02 20:23:22 | [rl2_trainer] epoch #123 | EpochTime 126.99 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.5625 +Average/AverageReturn -24.133 +Average/Iteration 123 +Average/MaxReturn -4.54811 +Average/MinReturn -37.687 +Average/NumEpisodes 100 +Average/StdReturn 5.82351 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.719313 +TotalEnvSteps 1.24e+06 +__unnamed_task__/AverageDiscountedReturn -15.5625 +__unnamed_task__/AverageReturn -24.133 +__unnamed_task__/Iteration 123 +__unnamed_task__/MaxReturn -4.54811 +__unnamed_task__/MinReturn -37.687 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.82351 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.25437 +policy/KL 0.0104516 +policy/KLBefore 0 +policy/LossAfter -0.0146336 +policy/LossBefore -0.0015748 +policy/dLoss 0.0130588 +---------------------------------------- ----------- +2025-04-02 20:26:20 | [rl2_trainer] epoch #124 | Optimizing policy... +2025-04-02 20:26:20 | [rl2_trainer] epoch #124 | Fitting baseline... +2025-04-02 20:26:20 | [rl2_trainer] epoch #124 | Computing loss before +2025-04-02 20:26:20 | [rl2_trainer] epoch #124 | Computing KL before +2025-04-02 20:26:21 | [rl2_trainer] epoch #124 | Optimizing +2025-04-02 20:26:55 | [rl2_trainer] epoch #124 | Computing KL after +2025-04-02 20:26:56 | [rl2_trainer] epoch #124 | Computing loss after +2025-04-02 20:26:57 | [rl2_trainer] epoch #124 | Saving snapshot... +2025-04-02 20:26:57 | [rl2_trainer] epoch #124 | Saved +2025-04-02 20:26:57 | [rl2_trainer] epoch #124 | Time 23697.61 s +2025-04-02 20:26:57 | [rl2_trainer] epoch #124 | EpochTime 214.52 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.309 +Average/AverageReturn -23.9812 +Average/Iteration 124 +Average/MaxReturn 5.66316 +Average/MinReturn -63.7877 +Average/NumEpisodes 100 +Average/StdReturn 8.87889 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.576236 +TotalEnvSteps 1.25e+06 +__unnamed_task__/AverageDiscountedReturn -15.309 +__unnamed_task__/AverageReturn -23.9812 +__unnamed_task__/Iteration 124 +__unnamed_task__/MaxReturn 5.66316 +__unnamed_task__/MinReturn -63.7877 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.87889 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.23202 +policy/KL 0.0111383 +policy/KLBefore 0 +policy/LossAfter -0.0248302 +policy/LossBefore -0.00255135 +policy/dLoss 0.0222788 +---------------------------------------- ------------ +2025-04-02 20:28:51 | [rl2_trainer] epoch #125 | Optimizing policy... +2025-04-02 20:28:52 | [rl2_trainer] epoch #125 | Fitting baseline... +2025-04-02 20:28:52 | [rl2_trainer] epoch #125 | Computing loss before +2025-04-02 20:28:52 | [rl2_trainer] epoch #125 | Computing KL before +2025-04-02 20:28:53 | [rl2_trainer] epoch #125 | Optimizing +2025-04-02 20:29:27 | [rl2_trainer] epoch #125 | Computing KL after +2025-04-02 20:29:27 | [rl2_trainer] epoch #125 | Computing loss after +2025-04-02 20:29:28 | [rl2_trainer] epoch #125 | Saving snapshot... +2025-04-02 20:29:28 | [rl2_trainer] epoch #125 | Saved +2025-04-02 20:29:28 | [rl2_trainer] epoch #125 | Time 23849.21 s +2025-04-02 20:29:28 | [rl2_trainer] epoch #125 | EpochTime 151.60 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -18.0394 +Average/AverageReturn -28.1246 +Average/Iteration 125 +Average/MaxReturn -16.0437 +Average/MinReturn -50.9756 +Average/NumEpisodes 100 +Average/StdReturn 5.84741 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.524924 +TotalEnvSteps 1.26e+06 +__unnamed_task__/AverageDiscountedReturn -18.0394 +__unnamed_task__/AverageReturn -28.1246 +__unnamed_task__/Iteration 125 +__unnamed_task__/MaxReturn -16.0437 +__unnamed_task__/MinReturn -50.9756 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.84741 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.21567 +policy/KL 0.0109792 +policy/KLBefore 0 +policy/LossAfter -0.0154429 +policy/LossBefore 0.000799324 +policy/dLoss 0.0162422 +---------------------------------------- ------------- +2025-04-02 20:32:21 | [rl2_trainer] epoch #126 | Optimizing policy... +2025-04-02 20:32:22 | [rl2_trainer] epoch #126 | Fitting baseline... +2025-04-02 20:32:22 | [rl2_trainer] epoch #126 | Computing loss before +2025-04-02 20:32:22 | [rl2_trainer] epoch #126 | Computing KL before +2025-04-02 20:32:23 | [rl2_trainer] epoch #126 | Optimizing +2025-04-02 20:32:56 | [rl2_trainer] epoch #126 | Computing KL after +2025-04-02 20:32:57 | [rl2_trainer] epoch #126 | Computing loss after +2025-04-02 20:32:57 | [rl2_trainer] epoch #126 | Saving snapshot... +2025-04-02 20:32:57 | [rl2_trainer] epoch #126 | Saved +2025-04-02 20:32:57 | [rl2_trainer] epoch #126 | Time 24058.45 s +2025-04-02 20:32:57 | [rl2_trainer] epoch #126 | EpochTime 209.24 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.4284 +Average/AverageReturn -29.2003 +Average/Iteration 126 +Average/MaxReturn -13.1371 +Average/MinReturn -79.4277 +Average/NumEpisodes 100 +Average/StdReturn 11.0898 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.571433 +TotalEnvSteps 1.27e+06 +__unnamed_task__/AverageDiscountedReturn -18.4284 +__unnamed_task__/AverageReturn -29.2003 +__unnamed_task__/Iteration 126 +__unnamed_task__/MaxReturn -13.1371 +__unnamed_task__/MinReturn -79.4277 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.0898 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.2106 +policy/KL 0.0110208 +policy/KLBefore 0 +policy/LossAfter -0.032824 +policy/LossBefore 0.00397563 +policy/dLoss 0.0367997 +---------------------------------------- ------------ +2025-04-02 20:35:26 | [rl2_trainer] epoch #127 | Optimizing policy... +2025-04-02 20:35:26 | [rl2_trainer] epoch #127 | Fitting baseline... +2025-04-02 20:35:26 | [rl2_trainer] epoch #127 | Computing loss before +2025-04-02 20:35:27 | [rl2_trainer] epoch #127 | Computing KL before +2025-04-02 20:35:27 | [rl2_trainer] epoch #127 | Optimizing +2025-04-02 20:36:04 | [rl2_trainer] epoch #127 | Computing KL after +2025-04-02 20:36:05 | [rl2_trainer] epoch #127 | Computing loss after +2025-04-02 20:36:06 | [rl2_trainer] epoch #127 | Saving snapshot... +2025-04-02 20:36:06 | [rl2_trainer] epoch #127 | Saved +2025-04-02 20:36:06 | [rl2_trainer] epoch #127 | Time 24246.85 s +2025-04-02 20:36:06 | [rl2_trainer] epoch #127 | EpochTime 188.39 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.1475 +Average/AverageReturn -21.7279 +Average/Iteration 127 +Average/MaxReturn -7.1038 +Average/MinReturn -33.1458 +Average/NumEpisodes 100 +Average/StdReturn 4.51307 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.586887 +TotalEnvSteps 1.28e+06 +__unnamed_task__/AverageDiscountedReturn -14.1475 +__unnamed_task__/AverageReturn -21.7279 +__unnamed_task__/Iteration 127 +__unnamed_task__/MaxReturn -7.1038 +__unnamed_task__/MinReturn -33.1458 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.51307 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.19846 +policy/KL 0.00934099 +policy/KLBefore 0 +policy/LossAfter -0.014151 +policy/LossBefore -0.00207928 +policy/dLoss 0.0120718 +---------------------------------------- ------------ +2025-04-02 20:38:18 | [rl2_trainer] epoch #128 | Optimizing policy... +2025-04-02 20:38:18 | [rl2_trainer] epoch #128 | Fitting baseline... +2025-04-02 20:38:18 | [rl2_trainer] epoch #128 | Computing loss before +2025-04-02 20:38:19 | [rl2_trainer] epoch #128 | Computing KL before +2025-04-02 20:38:19 | [rl2_trainer] epoch #128 | Optimizing +2025-04-02 20:38:56 | [rl2_trainer] epoch #128 | Computing KL after +2025-04-02 20:38:56 | [rl2_trainer] epoch #128 | Computing loss after +2025-04-02 20:38:57 | [rl2_trainer] epoch #128 | Saving snapshot... +2025-04-02 20:38:57 | [rl2_trainer] epoch #128 | Saved +2025-04-02 20:38:57 | [rl2_trainer] epoch #128 | Time 24418.07 s +2025-04-02 20:38:57 | [rl2_trainer] epoch #128 | EpochTime 171.22 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7881 +Average/AverageReturn -24.3861 +Average/Iteration 128 +Average/MaxReturn -11.0474 +Average/MinReturn -89.144 +Average/NumEpisodes 100 +Average/StdReturn 8.96147 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.560581 +TotalEnvSteps 1.29e+06 +__unnamed_task__/AverageDiscountedReturn -15.7881 +__unnamed_task__/AverageReturn -24.3861 +__unnamed_task__/Iteration 128 +__unnamed_task__/MaxReturn -11.0474 +__unnamed_task__/MinReturn -89.144 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.96147 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.17104 +policy/KL 0.0106066 +policy/KLBefore 0 +policy/LossAfter -0.0148348 +policy/LossBefore -0.000216226 +policy/dLoss 0.0146186 +---------------------------------------- ------------- +2025-04-02 20:40:53 | [rl2_trainer] epoch #129 | Optimizing policy... +2025-04-02 20:40:53 | [rl2_trainer] epoch #129 | Fitting baseline... +2025-04-02 20:40:53 | [rl2_trainer] epoch #129 | Computing loss before +2025-04-02 20:40:54 | [rl2_trainer] epoch #129 | Computing KL before +2025-04-02 20:40:55 | [rl2_trainer] epoch #129 | Optimizing +2025-04-02 20:41:31 | [rl2_trainer] epoch #129 | Computing KL after +2025-04-02 20:41:32 | [rl2_trainer] epoch #129 | Computing loss after +2025-04-02 20:41:33 | [rl2_trainer] epoch #129 | Saving snapshot... +2025-04-02 20:41:33 | [rl2_trainer] epoch #129 | Saved +2025-04-02 20:41:33 | [rl2_trainer] epoch #129 | Time 24573.97 s +2025-04-02 20:41:33 | [rl2_trainer] epoch #129 | EpochTime 155.90 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5404 +Average/AverageReturn -27.1728 +Average/Iteration 129 +Average/MaxReturn 0.559833 +Average/MinReturn -52.7879 +Average/NumEpisodes 100 +Average/StdReturn 6.62181 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.517958 +TotalEnvSteps 1.3e+06 +__unnamed_task__/AverageDiscountedReturn -17.5404 +__unnamed_task__/AverageReturn -27.1728 +__unnamed_task__/Iteration 129 +__unnamed_task__/MaxReturn 0.559833 +__unnamed_task__/MinReturn -52.7879 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.62181 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.1459 +policy/KL 0.00884506 +policy/KLBefore 0 +policy/LossAfter -0.00617694 +policy/LossBefore 0.000790179 +policy/dLoss 0.00696711 +---------------------------------------- ------------- +2025-04-02 20:44:57 | [rl2_trainer] epoch #130 | Optimizing policy... +2025-04-02 20:44:58 | [rl2_trainer] epoch #130 | Fitting baseline... +2025-04-02 20:44:58 | [rl2_trainer] epoch #130 | Computing loss before +2025-04-02 20:44:58 | [rl2_trainer] epoch #130 | Computing KL before +2025-04-02 20:44:59 | [rl2_trainer] epoch #130 | Optimizing +2025-04-02 20:45:34 | [rl2_trainer] epoch #130 | Computing KL after +2025-04-02 20:45:35 | [rl2_trainer] epoch #130 | Computing loss after +2025-04-02 20:45:36 | [rl2_trainer] epoch #130 | Saving snapshot... +2025-04-02 20:45:36 | [rl2_trainer] epoch #130 | Saved +2025-04-02 20:45:36 | [rl2_trainer] epoch #130 | Time 24816.63 s +2025-04-02 20:45:36 | [rl2_trainer] epoch #130 | EpochTime 242.66 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.6823 +Average/AverageReturn -26.6189 +Average/Iteration 130 +Average/MaxReturn 4.63873 +Average/MinReturn -70.0582 +Average/NumEpisodes 100 +Average/StdReturn 11.6473 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.677613 +TotalEnvSteps 1.31e+06 +__unnamed_task__/AverageDiscountedReturn -16.6823 +__unnamed_task__/AverageReturn -26.6189 +__unnamed_task__/Iteration 130 +__unnamed_task__/MaxReturn 4.63873 +__unnamed_task__/MinReturn -70.0582 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.6473 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.1429 +policy/KL 0.0135149 +policy/KLBefore 0 +policy/LossAfter -0.0457814 +policy/LossBefore -0.00673262 +policy/dLoss 0.0390488 +---------------------------------------- ------------ +2025-04-02 20:48:40 | [rl2_trainer] epoch #131 | Optimizing policy... +2025-04-02 20:48:40 | [rl2_trainer] epoch #131 | Fitting baseline... +2025-04-02 20:48:40 | [rl2_trainer] epoch #131 | Computing loss before +2025-04-02 20:48:41 | [rl2_trainer] epoch #131 | Computing KL before +2025-04-02 20:48:41 | [rl2_trainer] epoch #131 | Optimizing +2025-04-02 20:49:17 | [rl2_trainer] epoch #131 | Computing KL after +2025-04-02 20:49:18 | [rl2_trainer] epoch #131 | Computing loss after +2025-04-02 20:49:18 | [rl2_trainer] epoch #131 | Saving snapshot... +2025-04-02 20:49:18 | [rl2_trainer] epoch #131 | Saved +2025-04-02 20:49:18 | [rl2_trainer] epoch #131 | Time 25039.44 s +2025-04-02 20:49:18 | [rl2_trainer] epoch #131 | EpochTime 222.81 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3825 +Average/AverageReturn -23.9443 +Average/Iteration 131 +Average/MaxReturn -1.39639 +Average/MinReturn -63.8172 +Average/NumEpisodes 100 +Average/StdReturn 7.29028 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.640824 +TotalEnvSteps 1.32e+06 +__unnamed_task__/AverageDiscountedReturn -15.3825 +__unnamed_task__/AverageReturn -23.9443 +__unnamed_task__/Iteration 131 +__unnamed_task__/MaxReturn -1.39639 +__unnamed_task__/MinReturn -63.8172 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.29028 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.13086 +policy/KL 0.00918889 +policy/KLBefore 0 +policy/LossAfter -0.0121096 +policy/LossBefore 0.00438955 +policy/dLoss 0.0164991 +---------------------------------------- ------------ +2025-04-02 20:52:00 | [rl2_trainer] epoch #132 | Optimizing policy... +2025-04-02 20:52:00 | [rl2_trainer] epoch #132 | Fitting baseline... +2025-04-02 20:52:00 | [rl2_trainer] epoch #132 | Computing loss before +2025-04-02 20:52:01 | [rl2_trainer] epoch #132 | Computing KL before +2025-04-02 20:52:01 | [rl2_trainer] epoch #132 | Optimizing +2025-04-02 20:52:34 | [rl2_trainer] epoch #132 | Computing KL after +2025-04-02 20:52:35 | [rl2_trainer] epoch #132 | Computing loss after +2025-04-02 20:52:35 | [rl2_trainer] epoch #132 | Saving snapshot... +2025-04-02 20:52:35 | [rl2_trainer] epoch #132 | Saved +2025-04-02 20:52:35 | [rl2_trainer] epoch #132 | Time 25236.43 s +2025-04-02 20:52:35 | [rl2_trainer] epoch #132 | EpochTime 196.99 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.2967 +Average/AverageReturn -27.5268 +Average/Iteration 132 +Average/MaxReturn -2.43233 +Average/MinReturn -67.7127 +Average/NumEpisodes 100 +Average/StdReturn 12.8638 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.702356 +TotalEnvSteps 1.33e+06 +__unnamed_task__/AverageDiscountedReturn -17.2967 +__unnamed_task__/AverageReturn -27.5268 +__unnamed_task__/Iteration 132 +__unnamed_task__/MaxReturn -2.43233 +__unnamed_task__/MinReturn -67.7127 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.8638 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.121 +policy/KL 0.0143323 +policy/KLBefore 0 +policy/LossAfter -0.0436772 +policy/LossBefore 0.00365718 +policy/dLoss 0.0473344 +---------------------------------------- ------------ +2025-04-02 20:54:45 | [rl2_trainer] epoch #133 | Optimizing policy... +2025-04-02 20:54:45 | [rl2_trainer] epoch #133 | Fitting baseline... +2025-04-02 20:54:45 | [rl2_trainer] epoch #133 | Computing loss before +2025-04-02 20:54:46 | [rl2_trainer] epoch #133 | Computing KL before +2025-04-02 20:54:46 | [rl2_trainer] epoch #133 | Optimizing +2025-04-02 20:55:23 | [rl2_trainer] epoch #133 | Computing KL after +2025-04-02 20:55:24 | [rl2_trainer] epoch #133 | Computing loss after +2025-04-02 20:55:25 | [rl2_trainer] epoch #133 | Saving snapshot... +2025-04-02 20:55:25 | [rl2_trainer] epoch #133 | Saved +2025-04-02 20:55:25 | [rl2_trainer] epoch #133 | Time 25406.08 s +2025-04-02 20:55:25 | [rl2_trainer] epoch #133 | EpochTime 169.65 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.828 +Average/AverageReturn -23.0369 +Average/Iteration 133 +Average/MaxReturn 6.10349 +Average/MinReturn -60.9251 +Average/NumEpisodes 100 +Average/StdReturn 8.81674 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.780476 +TotalEnvSteps 1.34e+06 +__unnamed_task__/AverageDiscountedReturn -14.828 +__unnamed_task__/AverageReturn -23.0369 +__unnamed_task__/Iteration 133 +__unnamed_task__/MaxReturn 6.10349 +__unnamed_task__/MinReturn -60.9251 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.81674 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.10116 +policy/KL 0.0127045 +policy/KLBefore 0 +policy/LossAfter -0.0199293 +policy/LossBefore -0.0024252 +policy/dLoss 0.0175041 +---------------------------------------- ----------- +2025-04-02 20:59:05 | [rl2_trainer] epoch #134 | Optimizing policy... +2025-04-02 20:59:05 | [rl2_trainer] epoch #134 | Fitting baseline... +2025-04-02 20:59:05 | [rl2_trainer] epoch #134 | Computing loss before +2025-04-02 20:59:06 | [rl2_trainer] epoch #134 | Computing KL before +2025-04-02 20:59:06 | [rl2_trainer] epoch #134 | Optimizing +2025-04-02 20:59:40 | [rl2_trainer] epoch #134 | Computing KL after +2025-04-02 20:59:41 | [rl2_trainer] epoch #134 | Computing loss after +2025-04-02 20:59:42 | [rl2_trainer] epoch #134 | Saving snapshot... +2025-04-02 20:59:42 | [rl2_trainer] epoch #134 | Saved +2025-04-02 20:59:42 | [rl2_trainer] epoch #134 | Time 25662.55 s +2025-04-02 20:59:42 | [rl2_trainer] epoch #134 | EpochTime 256.46 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.981 +Average/AverageReturn -28.6155 +Average/Iteration 134 +Average/MaxReturn -3.50766 +Average/MinReturn -57.5357 +Average/NumEpisodes 100 +Average/StdReturn 9.26224 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.587701 +TotalEnvSteps 1.35e+06 +__unnamed_task__/AverageDiscountedReturn -17.981 +__unnamed_task__/AverageReturn -28.6155 +__unnamed_task__/Iteration 134 +__unnamed_task__/MaxReturn -3.50766 +__unnamed_task__/MinReturn -57.5357 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.26224 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.08937 +policy/KL 0.0132228 +policy/KLBefore 0 +policy/LossAfter -0.0318085 +policy/LossBefore 0.00249149 +policy/dLoss 0.0343 +---------------------------------------- ------------ +2025-04-02 21:03:39 | [rl2_trainer] epoch #135 | Optimizing policy... +2025-04-02 21:03:39 | [rl2_trainer] epoch #135 | Fitting baseline... +2025-04-02 21:03:39 | [rl2_trainer] epoch #135 | Computing loss before +2025-04-02 21:03:40 | [rl2_trainer] epoch #135 | Computing KL before +2025-04-02 21:03:41 | [rl2_trainer] epoch #135 | Optimizing +2025-04-02 21:04:16 | [rl2_trainer] epoch #135 | Computing KL after +2025-04-02 21:04:16 | [rl2_trainer] epoch #135 | Computing loss after +2025-04-02 21:04:17 | [rl2_trainer] epoch #135 | Saving snapshot... +2025-04-02 21:04:17 | [rl2_trainer] epoch #135 | Saved +2025-04-02 21:04:17 | [rl2_trainer] epoch #135 | Time 25938.26 s +2025-04-02 21:04:17 | [rl2_trainer] epoch #135 | EpochTime 275.71 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.2863 +Average/AverageReturn -29.0662 +Average/Iteration 135 +Average/MaxReturn -12.3476 +Average/MinReturn -65.4995 +Average/NumEpisodes 100 +Average/StdReturn 9.84594 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.656285 +TotalEnvSteps 1.36e+06 +__unnamed_task__/AverageDiscountedReturn -18.2863 +__unnamed_task__/AverageReturn -29.0662 +__unnamed_task__/Iteration 135 +__unnamed_task__/MaxReturn -12.3476 +__unnamed_task__/MinReturn -65.4995 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.84594 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.08482 +policy/KL 0.0124862 +policy/KLBefore 0 +policy/LossAfter -0.0340126 +policy/LossBefore -0.00272663 +policy/dLoss 0.031286 +---------------------------------------- ------------ +2025-04-02 21:06:47 | [rl2_trainer] epoch #136 | Optimizing policy... +2025-04-02 21:06:47 | [rl2_trainer] epoch #136 | Fitting baseline... +2025-04-02 21:06:47 | [rl2_trainer] epoch #136 | Computing loss before +2025-04-02 21:06:48 | [rl2_trainer] epoch #136 | Computing KL before +2025-04-02 21:06:48 | [rl2_trainer] epoch #136 | Optimizing +2025-04-02 21:07:24 | [rl2_trainer] epoch #136 | Computing KL after +2025-04-02 21:07:25 | [rl2_trainer] epoch #136 | Computing loss after +2025-04-02 21:07:26 | [rl2_trainer] epoch #136 | Saving snapshot... +2025-04-02 21:07:26 | [rl2_trainer] epoch #136 | Saved +2025-04-02 21:07:26 | [rl2_trainer] epoch #136 | Time 26126.63 s +2025-04-02 21:07:26 | [rl2_trainer] epoch #136 | EpochTime 188.37 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.718 +Average/AverageReturn -26.3692 +Average/Iteration 136 +Average/MaxReturn 6.30832 +Average/MinReturn -54.497 +Average/NumEpisodes 100 +Average/StdReturn 9.1294 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.433632 +TotalEnvSteps 1.37e+06 +__unnamed_task__/AverageDiscountedReturn -16.718 +__unnamed_task__/AverageReturn -26.3692 +__unnamed_task__/Iteration 136 +__unnamed_task__/MaxReturn 6.30832 +__unnamed_task__/MinReturn -54.497 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.1294 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.07232 +policy/KL 0.0132864 +policy/KLBefore 0 +policy/LossAfter -0.0289048 +policy/LossBefore 0.00665172 +policy/dLoss 0.0355566 +---------------------------------------- ------------ +2025-04-02 21:08:52 | [rl2_trainer] epoch #137 | Optimizing policy... +2025-04-02 21:08:52 | [rl2_trainer] epoch #137 | Fitting baseline... +2025-04-02 21:08:52 | [rl2_trainer] epoch #137 | Computing loss before +2025-04-02 21:08:53 | [rl2_trainer] epoch #137 | Computing KL before +2025-04-02 21:08:53 | [rl2_trainer] epoch #137 | Optimizing +2025-04-02 21:09:27 | [rl2_trainer] epoch #137 | Computing KL after +2025-04-02 21:09:28 | [rl2_trainer] epoch #137 | Computing loss after +2025-04-02 21:09:28 | [rl2_trainer] epoch #137 | Saving snapshot... +2025-04-02 21:09:28 | [rl2_trainer] epoch #137 | Saved +2025-04-02 21:09:28 | [rl2_trainer] epoch #137 | Time 26249.51 s +2025-04-02 21:09:28 | [rl2_trainer] epoch #137 | EpochTime 122.88 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.4566 +Average/AverageReturn -23.6711 +Average/Iteration 137 +Average/MaxReturn -5.07184 +Average/MinReturn -40.0613 +Average/NumEpisodes 100 +Average/StdReturn 6.73441 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.591747 +TotalEnvSteps 1.38e+06 +__unnamed_task__/AverageDiscountedReturn -15.4566 +__unnamed_task__/AverageReturn -23.6711 +__unnamed_task__/Iteration 137 +__unnamed_task__/MaxReturn -5.07184 +__unnamed_task__/MinReturn -40.0613 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.73441 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.0545 +policy/KL 0.0109158 +policy/KLBefore 0 +policy/LossAfter -0.00792106 +policy/LossBefore 0.00678676 +policy/dLoss 0.0147078 +---------------------------------------- ------------ +2025-04-02 21:11:28 | [rl2_trainer] epoch #138 | Optimizing policy... +2025-04-02 21:11:28 | [rl2_trainer] epoch #138 | Fitting baseline... +2025-04-02 21:11:28 | [rl2_trainer] epoch #138 | Computing loss before +2025-04-02 21:11:29 | [rl2_trainer] epoch #138 | Computing KL before +2025-04-02 21:11:29 | [rl2_trainer] epoch #138 | Optimizing +2025-04-02 21:12:06 | [rl2_trainer] epoch #138 | Computing KL after +2025-04-02 21:12:06 | [rl2_trainer] epoch #138 | Computing loss after +2025-04-02 21:12:07 | [rl2_trainer] epoch #138 | Saving snapshot... +2025-04-02 21:12:07 | [rl2_trainer] epoch #138 | Saved +2025-04-02 21:12:07 | [rl2_trainer] epoch #138 | Time 26408.08 s +2025-04-02 21:12:07 | [rl2_trainer] epoch #138 | EpochTime 158.57 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.822 +Average/AverageReturn -25.8734 +Average/Iteration 138 +Average/MaxReturn -8.93172 +Average/MinReturn -48.4874 +Average/NumEpisodes 100 +Average/StdReturn 6.67383 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.42806 +TotalEnvSteps 1.39e+06 +__unnamed_task__/AverageDiscountedReturn -16.822 +__unnamed_task__/AverageReturn -25.8734 +__unnamed_task__/Iteration 138 +__unnamed_task__/MaxReturn -8.93172 +__unnamed_task__/MinReturn -48.4874 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.67383 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.04124 +policy/KL 0.0087196 +policy/KLBefore 0 +policy/LossAfter -0.00972331 +policy/LossBefore 0.00636574 +policy/dLoss 0.016089 +---------------------------------------- ------------ +2025-04-02 21:14:27 | [rl2_trainer] epoch #139 | Optimizing policy... +2025-04-02 21:14:27 | [rl2_trainer] epoch #139 | Fitting baseline... +2025-04-02 21:14:27 | [rl2_trainer] epoch #139 | Computing loss before +2025-04-02 21:14:28 | [rl2_trainer] epoch #139 | Computing KL before +2025-04-02 21:14:28 | [rl2_trainer] epoch #139 | Optimizing +2025-04-02 21:15:05 | [rl2_trainer] epoch #139 | Computing KL after +2025-04-02 21:15:05 | [rl2_trainer] epoch #139 | Computing loss after +2025-04-02 21:15:06 | [rl2_trainer] epoch #139 | Saving snapshot... +2025-04-02 21:15:06 | [rl2_trainer] epoch #139 | Saved +2025-04-02 21:15:06 | [rl2_trainer] epoch #139 | Time 26587.31 s +2025-04-02 21:15:06 | [rl2_trainer] epoch #139 | EpochTime 179.23 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.9046 +Average/AverageReturn -23.2209 +Average/Iteration 139 +Average/MaxReturn 10.2107 +Average/MinReturn -38.0162 +Average/NumEpisodes 100 +Average/StdReturn 7.50498 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.473856 +TotalEnvSteps 1.4e+06 +__unnamed_task__/AverageDiscountedReturn -14.9046 +__unnamed_task__/AverageReturn -23.2209 +__unnamed_task__/Iteration 139 +__unnamed_task__/MaxReturn 10.2107 +__unnamed_task__/MinReturn -38.0162 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.50498 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.03512 +policy/KL 0.00933831 +policy/KLBefore 0 +policy/LossAfter -0.0299987 +policy/LossBefore -0.00498974 +policy/dLoss 0.0250089 +---------------------------------------- ------------ +2025-04-02 21:18:04 | [rl2_trainer] epoch #140 | Optimizing policy... +2025-04-02 21:18:05 | [rl2_trainer] epoch #140 | Fitting baseline... +2025-04-02 21:18:05 | [rl2_trainer] epoch #140 | Computing loss before +2025-04-02 21:18:05 | [rl2_trainer] epoch #140 | Computing KL before +2025-04-02 21:18:06 | [rl2_trainer] epoch #140 | Optimizing +2025-04-02 21:18:42 | [rl2_trainer] epoch #140 | Computing KL after +2025-04-02 21:18:43 | [rl2_trainer] epoch #140 | Computing loss after +2025-04-02 21:18:44 | [rl2_trainer] epoch #140 | Saving snapshot... +2025-04-02 21:18:44 | [rl2_trainer] epoch #140 | Saved +2025-04-02 21:18:44 | [rl2_trainer] epoch #140 | Time 26804.79 s +2025-04-02 21:18:44 | [rl2_trainer] epoch #140 | EpochTime 217.48 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.5225 +Average/AverageReturn -22.1448 +Average/Iteration 140 +Average/MaxReturn 3.12261 +Average/MinReturn -57.7684 +Average/NumEpisodes 100 +Average/StdReturn 6.28005 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.515413 +TotalEnvSteps 1.41e+06 +__unnamed_task__/AverageDiscountedReturn -14.5225 +__unnamed_task__/AverageReturn -22.1448 +__unnamed_task__/Iteration 140 +__unnamed_task__/MaxReturn 3.12261 +__unnamed_task__/MinReturn -57.7684 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.28005 +__unnamed_task__/TerminationRate 0 +policy/Entropy 7.02321 +policy/KL 0.00743167 +policy/KLBefore 0 +policy/LossAfter -0.00162558 +policy/LossBefore 0.00987108 +policy/dLoss 0.0114967 +---------------------------------------- ------------ +2025-04-02 21:21:37 | [rl2_trainer] epoch #141 | Optimizing policy... +2025-04-02 21:21:38 | [rl2_trainer] epoch #141 | Fitting baseline... +2025-04-02 21:21:38 | [rl2_trainer] epoch #141 | Computing loss before +2025-04-02 21:21:38 | [rl2_trainer] epoch #141 | Computing KL before +2025-04-02 21:21:39 | [rl2_trainer] epoch #141 | Optimizing +2025-04-02 21:22:14 | [rl2_trainer] epoch #141 | Computing KL after +2025-04-02 21:22:15 | [rl2_trainer] epoch #141 | Computing loss after +2025-04-02 21:22:16 | [rl2_trainer] epoch #141 | Saving snapshot... +2025-04-02 21:22:16 | [rl2_trainer] epoch #141 | Saved +2025-04-02 21:22:16 | [rl2_trainer] epoch #141 | Time 27017.01 s +2025-04-02 21:22:16 | [rl2_trainer] epoch #141 | EpochTime 212.22 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.2083 +Average/AverageReturn -22.2029 +Average/Iteration 141 +Average/MaxReturn 4.22794 +Average/MinReturn -50.8229 +Average/NumEpisodes 100 +Average/StdReturn 8.19609 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.542036 +TotalEnvSteps 1.42e+06 +__unnamed_task__/AverageDiscountedReturn -14.2083 +__unnamed_task__/AverageReturn -22.2029 +__unnamed_task__/Iteration 141 +__unnamed_task__/MaxReturn 4.22794 +__unnamed_task__/MinReturn -50.8229 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.19609 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.99931 +policy/KL 0.0114676 +policy/KLBefore 0 +policy/LossAfter -0.0240528 +policy/LossBefore -0.00121947 +policy/dLoss 0.0228334 +---------------------------------------- ------------ +2025-04-02 21:25:16 | [rl2_trainer] epoch #142 | Optimizing policy... +2025-04-02 21:25:16 | [rl2_trainer] epoch #142 | Fitting baseline... +2025-04-02 21:25:16 | [rl2_trainer] epoch #142 | Computing loss before +2025-04-02 21:25:16 | [rl2_trainer] epoch #142 | Computing KL before +2025-04-02 21:25:17 | [rl2_trainer] epoch #142 | Optimizing +2025-04-02 21:25:53 | [rl2_trainer] epoch #142 | Computing KL after +2025-04-02 21:25:54 | [rl2_trainer] epoch #142 | Computing loss after +2025-04-02 21:25:55 | [rl2_trainer] epoch #142 | Saving snapshot... +2025-04-02 21:25:55 | [rl2_trainer] epoch #142 | Saved +2025-04-02 21:25:55 | [rl2_trainer] epoch #142 | Time 27235.74 s +2025-04-02 21:25:55 | [rl2_trainer] epoch #142 | EpochTime 218.72 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3699 +Average/AverageReturn -24.5356 +Average/Iteration 142 +Average/MaxReturn 0.456681 +Average/MinReturn -64.6754 +Average/NumEpisodes 100 +Average/StdReturn 9.9661 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.66859 +TotalEnvSteps 1.43e+06 +__unnamed_task__/AverageDiscountedReturn -15.3699 +__unnamed_task__/AverageReturn -24.5356 +__unnamed_task__/Iteration 142 +__unnamed_task__/MaxReturn 0.456681 +__unnamed_task__/MinReturn -64.6754 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.9661 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.96594 +policy/KL 0.0108593 +policy/KLBefore 0 +policy/LossAfter -0.0176932 +policy/LossBefore 0.000462085 +policy/dLoss 0.0181552 +---------------------------------------- ------------- +2025-04-02 21:27:55 | [rl2_trainer] epoch #143 | Optimizing policy... +2025-04-02 21:27:56 | [rl2_trainer] epoch #143 | Fitting baseline... +2025-04-02 21:27:56 | [rl2_trainer] epoch #143 | Computing loss before +2025-04-02 21:27:56 | [rl2_trainer] epoch #143 | Computing KL before +2025-04-02 21:27:57 | [rl2_trainer] epoch #143 | Optimizing +2025-04-02 21:28:32 | [rl2_trainer] epoch #143 | Computing KL after +2025-04-02 21:28:33 | [rl2_trainer] epoch #143 | Computing loss after +2025-04-02 21:28:34 | [rl2_trainer] epoch #143 | Saving snapshot... +2025-04-02 21:28:34 | [rl2_trainer] epoch #143 | Saved +2025-04-02 21:28:34 | [rl2_trainer] epoch #143 | Time 27394.95 s +2025-04-02 21:28:34 | [rl2_trainer] epoch #143 | EpochTime 159.21 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.0905 +Average/AverageReturn -24.6631 +Average/Iteration 143 +Average/MaxReturn 3.47225 +Average/MinReturn -44.3446 +Average/NumEpisodes 100 +Average/StdReturn 8.03704 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.658908 +TotalEnvSteps 1.44e+06 +__unnamed_task__/AverageDiscountedReturn -16.0905 +__unnamed_task__/AverageReturn -24.6631 +__unnamed_task__/Iteration 143 +__unnamed_task__/MaxReturn 3.47225 +__unnamed_task__/MinReturn -44.3446 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.03704 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.95355 +policy/KL 0.00891887 +policy/KLBefore 0 +policy/LossAfter -0.0295673 +policy/LossBefore -0.00800341 +policy/dLoss 0.0215639 +---------------------------------------- ------------ +2025-04-02 21:31:38 | [rl2_trainer] epoch #144 | Optimizing policy... +2025-04-02 21:31:39 | [rl2_trainer] epoch #144 | Fitting baseline... +2025-04-02 21:31:39 | [rl2_trainer] epoch #144 | Computing loss before +2025-04-02 21:31:39 | [rl2_trainer] epoch #144 | Computing KL before +2025-04-02 21:31:40 | [rl2_trainer] epoch #144 | Optimizing +2025-04-02 21:32:14 | [rl2_trainer] epoch #144 | Computing KL after +2025-04-02 21:32:14 | [rl2_trainer] epoch #144 | Computing loss after +2025-04-02 21:32:15 | [rl2_trainer] epoch #144 | Saving snapshot... +2025-04-02 21:32:15 | [rl2_trainer] epoch #144 | Saved +2025-04-02 21:32:15 | [rl2_trainer] epoch #144 | Time 27616.21 s +2025-04-02 21:32:15 | [rl2_trainer] epoch #144 | EpochTime 221.26 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -16.8936 +Average/AverageReturn -26.4602 +Average/Iteration 144 +Average/MaxReturn -1.80125 +Average/MinReturn -57.2823 +Average/NumEpisodes 100 +Average/StdReturn 8.39297 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.5367 +TotalEnvSteps 1.45e+06 +__unnamed_task__/AverageDiscountedReturn -16.8936 +__unnamed_task__/AverageReturn -26.4602 +__unnamed_task__/Iteration 144 +__unnamed_task__/MaxReturn -1.80125 +__unnamed_task__/MinReturn -57.2823 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.39297 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.94963 +policy/KL 0.0113582 +policy/KLBefore 0 +policy/LossAfter -0.017847 +policy/LossBefore 0.0102502 +policy/dLoss 0.0280972 +---------------------------------------- ----------- +2025-04-02 21:35:11 | [rl2_trainer] epoch #145 | Optimizing policy... +2025-04-02 21:35:12 | [rl2_trainer] epoch #145 | Fitting baseline... +2025-04-02 21:35:12 | [rl2_trainer] epoch #145 | Computing loss before +2025-04-02 21:35:12 | [rl2_trainer] epoch #145 | Computing KL before +2025-04-02 21:35:13 | [rl2_trainer] epoch #145 | Optimizing +2025-04-02 21:35:47 | [rl2_trainer] epoch #145 | Computing KL after +2025-04-02 21:35:48 | [rl2_trainer] epoch #145 | Computing loss after +2025-04-02 21:35:48 | [rl2_trainer] epoch #145 | Saving snapshot... +2025-04-02 21:35:48 | [rl2_trainer] epoch #145 | Saved +2025-04-02 21:35:48 | [rl2_trainer] epoch #145 | Time 27829.36 s +2025-04-02 21:35:48 | [rl2_trainer] epoch #145 | EpochTime 213.15 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.8949 +Average/AverageReturn -22.9742 +Average/Iteration 145 +Average/MaxReturn 0.780455 +Average/MinReturn -36.9897 +Average/NumEpisodes 100 +Average/StdReturn 5.41227 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.507322 +TotalEnvSteps 1.46e+06 +__unnamed_task__/AverageDiscountedReturn -14.8949 +__unnamed_task__/AverageReturn -22.9742 +__unnamed_task__/Iteration 145 +__unnamed_task__/MaxReturn 0.780455 +__unnamed_task__/MinReturn -36.9897 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.41227 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.93448 +policy/KL 0.00594864 +policy/KLBefore 0 +policy/LossAfter -0.019023 +policy/LossBefore -0.00821026 +policy/dLoss 0.0108128 +---------------------------------------- ------------ +2025-04-02 21:38:15 | [rl2_trainer] epoch #146 | Optimizing policy... +2025-04-02 21:38:15 | [rl2_trainer] epoch #146 | Fitting baseline... +2025-04-02 21:38:15 | [rl2_trainer] epoch #146 | Computing loss before +2025-04-02 21:38:16 | [rl2_trainer] epoch #146 | Computing KL before +2025-04-02 21:38:16 | [rl2_trainer] epoch #146 | Optimizing +2025-04-02 21:38:49 | [rl2_trainer] epoch #146 | Computing KL after +2025-04-02 21:38:50 | [rl2_trainer] epoch #146 | Computing loss after +2025-04-02 21:38:51 | [rl2_trainer] epoch #146 | Saving snapshot... +2025-04-02 21:38:51 | [rl2_trainer] epoch #146 | Saved +2025-04-02 21:38:51 | [rl2_trainer] epoch #146 | Time 28011.65 s +2025-04-02 21:38:51 | [rl2_trainer] epoch #146 | EpochTime 182.28 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.4673 +Average/AverageReturn -22.5682 +Average/Iteration 146 +Average/MaxReturn 4.559 +Average/MinReturn -37.1129 +Average/NumEpisodes 100 +Average/StdReturn 7.1211 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.645418 +TotalEnvSteps 1.47e+06 +__unnamed_task__/AverageDiscountedReturn -14.4673 +__unnamed_task__/AverageReturn -22.5682 +__unnamed_task__/Iteration 146 +__unnamed_task__/MaxReturn 4.559 +__unnamed_task__/MinReturn -37.1129 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.1211 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.91845 +policy/KL 0.00952178 +policy/KLBefore 0 +policy/LossAfter -0.0097334 +policy/LossBefore 0.00871468 +policy/dLoss 0.0184481 +---------------------------------------- ------------ +2025-04-02 21:40:58 | [rl2_trainer] epoch #147 | Optimizing policy... +2025-04-02 21:40:58 | [rl2_trainer] epoch #147 | Fitting baseline... +2025-04-02 21:40:58 | [rl2_trainer] epoch #147 | Computing loss before +2025-04-02 21:40:59 | [rl2_trainer] epoch #147 | Computing KL before +2025-04-02 21:41:00 | [rl2_trainer] epoch #147 | Optimizing +2025-04-02 21:41:33 | [rl2_trainer] epoch #147 | Computing KL after +2025-04-02 21:41:34 | [rl2_trainer] epoch #147 | Computing loss after +2025-04-02 21:41:35 | [rl2_trainer] epoch #147 | Saving snapshot... +2025-04-02 21:41:35 | [rl2_trainer] epoch #147 | Saved +2025-04-02 21:41:35 | [rl2_trainer] epoch #147 | Time 28175.80 s +2025-04-02 21:41:35 | [rl2_trainer] epoch #147 | EpochTime 164.15 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.1819 +Average/AverageReturn -26.4172 +Average/Iteration 147 +Average/MaxReturn -17.4322 +Average/MinReturn -41.1414 +Average/NumEpisodes 100 +Average/StdReturn 5.6133 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.219821 +TotalEnvSteps 1.48e+06 +__unnamed_task__/AverageDiscountedReturn -17.1819 +__unnamed_task__/AverageReturn -26.4172 +__unnamed_task__/Iteration 147 +__unnamed_task__/MaxReturn -17.4322 +__unnamed_task__/MinReturn -41.1414 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.6133 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.89545 +policy/KL 0.0108079 +policy/KLBefore 0 +policy/LossAfter -0.0114932 +policy/LossBefore 0.00416604 +policy/dLoss 0.0156592 +---------------------------------------- ------------ +2025-04-02 21:43:02 | [rl2_trainer] epoch #148 | Optimizing policy... +2025-04-02 21:43:02 | [rl2_trainer] epoch #148 | Fitting baseline... +2025-04-02 21:43:02 | [rl2_trainer] epoch #148 | Computing loss before +2025-04-02 21:43:03 | [rl2_trainer] epoch #148 | Computing KL before +2025-04-02 21:43:03 | [rl2_trainer] epoch #148 | Optimizing +2025-04-02 21:43:37 | [rl2_trainer] epoch #148 | Computing KL after +2025-04-02 21:43:38 | [rl2_trainer] epoch #148 | Computing loss after +2025-04-02 21:43:39 | [rl2_trainer] epoch #148 | Saving snapshot... +2025-04-02 21:43:39 | [rl2_trainer] epoch #148 | Saved +2025-04-02 21:43:39 | [rl2_trainer] epoch #148 | Time 28299.73 s +2025-04-02 21:43:39 | [rl2_trainer] epoch #148 | EpochTime 123.93 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -14.264 +Average/AverageReturn -21.9451 +Average/Iteration 148 +Average/MaxReturn 20.2879 +Average/MinReturn -37.6534 +Average/NumEpisodes 100 +Average/StdReturn 6.69559 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.563702 +TotalEnvSteps 1.49e+06 +__unnamed_task__/AverageDiscountedReturn -14.264 +__unnamed_task__/AverageReturn -21.9451 +__unnamed_task__/Iteration 148 +__unnamed_task__/MaxReturn 20.2879 +__unnamed_task__/MinReturn -37.6534 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.69559 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.8632 +policy/KL 0.0105587 +policy/KLBefore 0 +policy/LossAfter -0.0191883 +policy/LossBefore -0.000545752 +policy/dLoss 0.0186426 +---------------------------------------- ------------- +2025-04-02 21:45:37 | [rl2_trainer] epoch #149 | Optimizing policy... +2025-04-02 21:45:37 | [rl2_trainer] epoch #149 | Fitting baseline... +2025-04-02 21:45:37 | [rl2_trainer] epoch #149 | Computing loss before +2025-04-02 21:45:38 | [rl2_trainer] epoch #149 | Computing KL before +2025-04-02 21:45:38 | [rl2_trainer] epoch #149 | Optimizing +2025-04-02 21:46:13 | [rl2_trainer] epoch #149 | Computing KL after +2025-04-02 21:46:13 | [rl2_trainer] epoch #149 | Computing loss after +2025-04-02 21:46:14 | [rl2_trainer] epoch #149 | Saving snapshot... +2025-04-02 21:46:14 | [rl2_trainer] epoch #149 | Saved +2025-04-02 21:46:14 | [rl2_trainer] epoch #149 | Time 28455.09 s +2025-04-02 21:46:14 | [rl2_trainer] epoch #149 | EpochTime 155.36 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8534 +Average/AverageReturn -25.9791 +Average/Iteration 149 +Average/MaxReturn -9.30411 +Average/MinReturn -44.5067 +Average/NumEpisodes 100 +Average/StdReturn 5.56173 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.57233 +TotalEnvSteps 1.5e+06 +__unnamed_task__/AverageDiscountedReturn -16.8534 +__unnamed_task__/AverageReturn -25.9791 +__unnamed_task__/Iteration 149 +__unnamed_task__/MaxReturn -9.30411 +__unnamed_task__/MinReturn -44.5067 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.56173 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.8476 +policy/KL 0.0103815 +policy/KLBefore 0 +policy/LossAfter -0.0108049 +policy/LossBefore 0.000924839 +policy/dLoss 0.0117297 +---------------------------------------- ------------- +2025-04-02 21:48:09 | [rl2_trainer] epoch #150 | Optimizing policy... +2025-04-02 21:48:09 | [rl2_trainer] epoch #150 | Fitting baseline... +2025-04-02 21:48:09 | [rl2_trainer] epoch #150 | Computing loss before +2025-04-02 21:48:09 | [rl2_trainer] epoch #150 | Computing KL before +2025-04-02 21:48:10 | [rl2_trainer] epoch #150 | Optimizing +2025-04-02 21:48:43 | [rl2_trainer] epoch #150 | Computing KL after +2025-04-02 21:48:44 | [rl2_trainer] epoch #150 | Computing loss after +2025-04-02 21:48:44 | [rl2_trainer] epoch #150 | Saving snapshot... +2025-04-02 21:48:44 | [rl2_trainer] epoch #150 | Saved +2025-04-02 21:48:44 | [rl2_trainer] epoch #150 | Time 28605.52 s +2025-04-02 21:48:44 | [rl2_trainer] epoch #150 | EpochTime 150.42 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -16.4767 +Average/AverageReturn -26.0886 +Average/Iteration 150 +Average/MaxReturn -0.162843 +Average/MinReturn -50.6158 +Average/NumEpisodes 100 +Average/StdReturn 8.2786 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.497239 +TotalEnvSteps 1.51e+06 +__unnamed_task__/AverageDiscountedReturn -16.4767 +__unnamed_task__/AverageReturn -26.0886 +__unnamed_task__/Iteration 150 +__unnamed_task__/MaxReturn -0.162843 +__unnamed_task__/MinReturn -50.6158 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.2786 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.848 +policy/KL 0.012837 +policy/KLBefore 0 +policy/LossAfter -0.0472008 +policy/LossBefore -0.0109925 +policy/dLoss 0.0362083 +---------------------------------------- ----------- +2025-04-02 21:50:50 | [rl2_trainer] epoch #151 | Optimizing policy... +2025-04-02 21:50:50 | [rl2_trainer] epoch #151 | Fitting baseline... +2025-04-02 21:50:50 | [rl2_trainer] epoch #151 | Computing loss before +2025-04-02 21:50:51 | [rl2_trainer] epoch #151 | Computing KL before +2025-04-02 21:50:52 | [rl2_trainer] epoch #151 | Optimizing +2025-04-02 21:51:26 | [rl2_trainer] epoch #151 | Computing KL after +2025-04-02 21:51:26 | [rl2_trainer] epoch #151 | Computing loss after +2025-04-02 21:51:27 | [rl2_trainer] epoch #151 | Saving snapshot... +2025-04-02 21:51:27 | [rl2_trainer] epoch #151 | Saved +2025-04-02 21:51:27 | [rl2_trainer] epoch #151 | Time 28768.10 s +2025-04-02 21:51:27 | [rl2_trainer] epoch #151 | EpochTime 162.57 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2527 +Average/AverageReturn -23.2684 +Average/Iteration 151 +Average/MaxReturn 15.4374 +Average/MinReturn -36.2817 +Average/NumEpisodes 100 +Average/StdReturn 6.145 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.58177 +TotalEnvSteps 1.52e+06 +__unnamed_task__/AverageDiscountedReturn -15.2527 +__unnamed_task__/AverageReturn -23.2684 +__unnamed_task__/Iteration 151 +__unnamed_task__/MaxReturn 15.4374 +__unnamed_task__/MinReturn -36.2817 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.145 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.82891 +policy/KL 0.0110725 +policy/KLBefore 0 +policy/LossAfter -0.0155637 +policy/LossBefore 0.000614169 +policy/dLoss 0.0161779 +---------------------------------------- ------------- +2025-04-02 21:53:34 | [rl2_trainer] epoch #152 | Optimizing policy... +2025-04-02 21:53:34 | [rl2_trainer] epoch #152 | Fitting baseline... +2025-04-02 21:53:34 | [rl2_trainer] epoch #152 | Computing loss before +2025-04-02 21:53:35 | [rl2_trainer] epoch #152 | Computing KL before +2025-04-02 21:53:35 | [rl2_trainer] epoch #152 | Optimizing +2025-04-02 21:54:08 | [rl2_trainer] epoch #152 | Computing KL after +2025-04-02 21:54:08 | [rl2_trainer] epoch #152 | Computing loss after +2025-04-02 21:54:09 | [rl2_trainer] epoch #152 | Saving snapshot... +2025-04-02 21:54:09 | [rl2_trainer] epoch #152 | Saved +2025-04-02 21:54:09 | [rl2_trainer] epoch #152 | Time 28930.19 s +2025-04-02 21:54:09 | [rl2_trainer] epoch #152 | EpochTime 162.09 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1387 +Average/AverageReturn -23.1956 +Average/Iteration 152 +Average/MaxReturn 16.719 +Average/MinReturn -33.8113 +Average/NumEpisodes 100 +Average/StdReturn 6.32109 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.556776 +TotalEnvSteps 1.53e+06 +__unnamed_task__/AverageDiscountedReturn -15.1387 +__unnamed_task__/AverageReturn -23.1956 +__unnamed_task__/Iteration 152 +__unnamed_task__/MaxReturn 16.719 +__unnamed_task__/MinReturn -33.8113 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.32109 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.78733 +policy/KL 0.0177705 +policy/KLBefore 0 +policy/LossAfter -0.00860973 +policy/LossBefore 0.00163811 +policy/dLoss 0.0102478 +---------------------------------------- ------------ +2025-04-02 21:56:03 | [rl2_trainer] epoch #153 | Optimizing policy... +2025-04-02 21:56:04 | [rl2_trainer] epoch #153 | Fitting baseline... +2025-04-02 21:56:04 | [rl2_trainer] epoch #153 | Computing loss before +2025-04-02 21:56:04 | [rl2_trainer] epoch #153 | Computing KL before +2025-04-02 21:56:05 | [rl2_trainer] epoch #153 | Optimizing +2025-04-02 21:56:37 | [rl2_trainer] epoch #153 | Computing KL after +2025-04-02 21:56:37 | [rl2_trainer] epoch #153 | Computing loss after +2025-04-02 21:56:38 | [rl2_trainer] epoch #153 | Saving snapshot... +2025-04-02 21:56:38 | [rl2_trainer] epoch #153 | Saved +2025-04-02 21:56:38 | [rl2_trainer] epoch #153 | Time 29078.96 s +2025-04-02 21:56:38 | [rl2_trainer] epoch #153 | EpochTime 148.77 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.468 +Average/AverageReturn -23.6122 +Average/Iteration 153 +Average/MaxReturn 9.10202 +Average/MinReturn -39.9701 +Average/NumEpisodes 100 +Average/StdReturn 6.44343 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.398211 +TotalEnvSteps 1.54e+06 +__unnamed_task__/AverageDiscountedReturn -15.468 +__unnamed_task__/AverageReturn -23.6122 +__unnamed_task__/Iteration 153 +__unnamed_task__/MaxReturn 9.10202 +__unnamed_task__/MinReturn -39.9701 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.44343 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.76663 +policy/KL 0.00788699 +policy/KLBefore 0 +policy/LossAfter -0.0145013 +policy/LossBefore -0.00176373 +policy/dLoss 0.0127376 +---------------------------------------- ------------ +2025-04-02 21:59:00 | [rl2_trainer] epoch #154 | Optimizing policy... +2025-04-02 21:59:00 | [rl2_trainer] epoch #154 | Fitting baseline... +2025-04-02 21:59:01 | [rl2_trainer] epoch #154 | Computing loss before +2025-04-02 21:59:01 | [rl2_trainer] epoch #154 | Computing KL before +2025-04-02 21:59:02 | [rl2_trainer] epoch #154 | Optimizing +2025-04-02 21:59:35 | [rl2_trainer] epoch #154 | Computing KL after +2025-04-02 21:59:36 | [rl2_trainer] epoch #154 | Computing loss after +2025-04-02 21:59:36 | [rl2_trainer] epoch #154 | Saving snapshot... +2025-04-02 21:59:36 | [rl2_trainer] epoch #154 | Saved +2025-04-02 21:59:36 | [rl2_trainer] epoch #154 | Time 29257.50 s +2025-04-02 21:59:36 | [rl2_trainer] epoch #154 | EpochTime 178.53 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.1845 +Average/AverageReturn -20.6717 +Average/Iteration 154 +Average/MaxReturn -1.43183 +Average/MinReturn -31.5369 +Average/NumEpisodes 100 +Average/StdReturn 4.66136 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.333464 +TotalEnvSteps 1.55e+06 +__unnamed_task__/AverageDiscountedReturn -13.1845 +__unnamed_task__/AverageReturn -20.6717 +__unnamed_task__/Iteration 154 +__unnamed_task__/MaxReturn -1.43183 +__unnamed_task__/MinReturn -31.5369 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.66136 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.74882 +policy/KL 0.0121513 +policy/KLBefore 0 +policy/LossAfter -0.00726459 +policy/LossBefore 0.00358746 +policy/dLoss 0.010852 +---------------------------------------- ------------ +2025-04-02 22:01:44 | [rl2_trainer] epoch #155 | Optimizing policy... +2025-04-02 22:01:44 | [rl2_trainer] epoch #155 | Fitting baseline... +2025-04-02 22:01:44 | [rl2_trainer] epoch #155 | Computing loss before +2025-04-02 22:01:45 | [rl2_trainer] epoch #155 | Computing KL before +2025-04-02 22:01:45 | [rl2_trainer] epoch #155 | Optimizing +2025-04-02 22:02:18 | [rl2_trainer] epoch #155 | Computing KL after +2025-04-02 22:02:19 | [rl2_trainer] epoch #155 | Computing loss after +2025-04-02 22:02:19 | [rl2_trainer] epoch #155 | Saving snapshot... +2025-04-02 22:02:19 | [rl2_trainer] epoch #155 | Saved +2025-04-02 22:02:19 | [rl2_trainer] epoch #155 | Time 29420.51 s +2025-04-02 22:02:19 | [rl2_trainer] epoch #155 | EpochTime 163.01 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1242 +Average/AverageReturn -23.373 +Average/Iteration 155 +Average/MaxReturn -4.97553 +Average/MinReturn -34.7314 +Average/NumEpisodes 100 +Average/StdReturn 5.14384 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.541286 +TotalEnvSteps 1.56e+06 +__unnamed_task__/AverageDiscountedReturn -15.1242 +__unnamed_task__/AverageReturn -23.373 +__unnamed_task__/Iteration 155 +__unnamed_task__/MaxReturn -4.97553 +__unnamed_task__/MinReturn -34.7314 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.14384 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.71921 +policy/KL 0.00925028 +policy/KLBefore 0 +policy/LossAfter -0.0125239 +policy/LossBefore -0.00245012 +policy/dLoss 0.0100738 +---------------------------------------- ------------ +2025-04-02 22:04:18 | [rl2_trainer] epoch #156 | Optimizing policy... +2025-04-02 22:04:19 | [rl2_trainer] epoch #156 | Fitting baseline... +2025-04-02 22:04:19 | [rl2_trainer] epoch #156 | Computing loss before +2025-04-02 22:04:20 | [rl2_trainer] epoch #156 | Computing KL before +2025-04-02 22:04:20 | [rl2_trainer] epoch #156 | Optimizing +2025-04-02 22:05:07 | [rl2_trainer] epoch #156 | Computing KL after +2025-04-02 22:05:08 | [rl2_trainer] epoch #156 | Computing loss after +2025-04-02 22:05:09 | [rl2_trainer] epoch #156 | Saving snapshot... +2025-04-02 22:05:09 | [rl2_trainer] epoch #156 | Saved +2025-04-02 22:05:09 | [rl2_trainer] epoch #156 | Time 29590.37 s +2025-04-02 22:05:09 | [rl2_trainer] epoch #156 | EpochTime 169.86 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.4094 +Average/AverageReturn -25.5185 +Average/Iteration 156 +Average/MaxReturn -5.16892 +Average/MinReturn -39.842 +Average/NumEpisodes 100 +Average/StdReturn 5.87972 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.466421 +TotalEnvSteps 1.57e+06 +__unnamed_task__/AverageDiscountedReturn -16.4094 +__unnamed_task__/AverageReturn -25.5185 +__unnamed_task__/Iteration 156 +__unnamed_task__/MaxReturn -5.16892 +__unnamed_task__/MinReturn -39.842 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.87972 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.69263 +policy/KL 0.0100702 +policy/KLBefore 0 +policy/LossAfter -0.0164885 +policy/LossBefore -0.00252617 +policy/dLoss 0.0139623 +---------------------------------------- ------------ +2025-04-02 22:07:40 | [rl2_trainer] epoch #157 | Optimizing policy... +2025-04-02 22:07:40 | [rl2_trainer] epoch #157 | Fitting baseline... +2025-04-02 22:07:40 | [rl2_trainer] epoch #157 | Computing loss before +2025-04-02 22:07:41 | [rl2_trainer] epoch #157 | Computing KL before +2025-04-02 22:07:41 | [rl2_trainer] epoch #157 | Optimizing +2025-04-02 22:08:15 | [rl2_trainer] epoch #157 | Computing KL after +2025-04-02 22:08:16 | [rl2_trainer] epoch #157 | Computing loss after +2025-04-02 22:08:17 | [rl2_trainer] epoch #157 | Saving snapshot... +2025-04-02 22:08:17 | [rl2_trainer] epoch #157 | Saved +2025-04-02 22:08:17 | [rl2_trainer] epoch #157 | Time 29777.54 s +2025-04-02 22:08:17 | [rl2_trainer] epoch #157 | EpochTime 187.16 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.8384 +Average/AverageReturn -22.4288 +Average/Iteration 157 +Average/MaxReturn -4.26095 +Average/MinReturn -33.8907 +Average/NumEpisodes 100 +Average/StdReturn 5.29423 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.730412 +TotalEnvSteps 1.58e+06 +__unnamed_task__/AverageDiscountedReturn -14.8384 +__unnamed_task__/AverageReturn -22.4288 +__unnamed_task__/Iteration 157 +__unnamed_task__/MaxReturn -4.26095 +__unnamed_task__/MinReturn -33.8907 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.29423 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.66156 +policy/KL 0.0125774 +policy/KLBefore 0 +policy/LossAfter -0.00639741 +policy/LossBefore -0.00300017 +policy/dLoss 0.00339725 +---------------------------------------- ------------ +2025-04-02 22:09:46 | [rl2_trainer] epoch #158 | Optimizing policy... +2025-04-02 22:09:46 | [rl2_trainer] epoch #158 | Fitting baseline... +2025-04-02 22:09:46 | [rl2_trainer] epoch #158 | Computing loss before +2025-04-02 22:09:47 | [rl2_trainer] epoch #158 | Computing KL before +2025-04-02 22:09:47 | [rl2_trainer] epoch #158 | Optimizing +2025-04-02 22:10:20 | [rl2_trainer] epoch #158 | Computing KL after +2025-04-02 22:10:21 | [rl2_trainer] epoch #158 | Computing loss after +2025-04-02 22:10:22 | [rl2_trainer] epoch #158 | Saving snapshot... +2025-04-02 22:10:22 | [rl2_trainer] epoch #158 | Saved +2025-04-02 22:10:22 | [rl2_trainer] epoch #158 | Time 29902.73 s +2025-04-02 22:10:22 | [rl2_trainer] epoch #158 | EpochTime 125.19 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -14.3225 +Average/AverageReturn -21.9037 +Average/Iteration 158 +Average/MaxReturn 8.73406 +Average/MinReturn -100.624 +Average/NumEpisodes 100 +Average/StdReturn 11.333 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.599836 +TotalEnvSteps 1.59e+06 +__unnamed_task__/AverageDiscountedReturn -14.3225 +__unnamed_task__/AverageReturn -21.9037 +__unnamed_task__/Iteration 158 +__unnamed_task__/MaxReturn 8.73406 +__unnamed_task__/MinReturn -100.624 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.333 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.63983 +policy/KL 0.0117624 +policy/KLBefore 0 +policy/LossAfter -0.0317316 +policy/LossBefore 0.00595837 +policy/dLoss 0.03769 +---------------------------------------- ------------- +2025-04-02 22:12:17 | [rl2_trainer] epoch #159 | Optimizing policy... +2025-04-02 22:12:17 | [rl2_trainer] epoch #159 | Fitting baseline... +2025-04-02 22:12:17 | [rl2_trainer] epoch #159 | Computing loss before +2025-04-02 22:12:18 | [rl2_trainer] epoch #159 | Computing KL before +2025-04-02 22:12:18 | [rl2_trainer] epoch #159 | Optimizing +2025-04-02 22:12:52 | [rl2_trainer] epoch #159 | Computing KL after +2025-04-02 22:12:52 | [rl2_trainer] epoch #159 | Computing loss after +2025-04-02 22:12:53 | [rl2_trainer] epoch #159 | Saving snapshot... +2025-04-02 22:12:53 | [rl2_trainer] epoch #159 | Saved +2025-04-02 22:12:53 | [rl2_trainer] epoch #159 | Time 30054.40 s +2025-04-02 22:12:53 | [rl2_trainer] epoch #159 | EpochTime 151.67 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.7951 +Average/AverageReturn -24.3572 +Average/Iteration 159 +Average/MaxReturn -3.07116 +Average/MinReturn -38.1678 +Average/NumEpisodes 100 +Average/StdReturn 5.44352 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.473764 +TotalEnvSteps 1.6e+06 +__unnamed_task__/AverageDiscountedReturn -15.7951 +__unnamed_task__/AverageReturn -24.3572 +__unnamed_task__/Iteration 159 +__unnamed_task__/MaxReturn -3.07116 +__unnamed_task__/MinReturn -38.1678 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.44352 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.62169 +policy/KL 0.0108456 +policy/KLBefore 0 +policy/LossAfter -0.0067713 +policy/LossBefore 0.00277 +policy/dLoss 0.00954129 +---------------------------------------- ------------ +2025-04-02 22:14:50 | [rl2_trainer] epoch #160 | Optimizing policy... +2025-04-02 22:14:51 | [rl2_trainer] epoch #160 | Fitting baseline... +2025-04-02 22:14:51 | [rl2_trainer] epoch #160 | Computing loss before +2025-04-02 22:14:51 | [rl2_trainer] epoch #160 | Computing KL before +2025-04-02 22:14:52 | [rl2_trainer] epoch #160 | Optimizing +2025-04-02 22:15:27 | [rl2_trainer] epoch #160 | Computing KL after +2025-04-02 22:15:28 | [rl2_trainer] epoch #160 | Computing loss after +2025-04-02 22:15:29 | [rl2_trainer] epoch #160 | Saving snapshot... +2025-04-02 22:15:29 | [rl2_trainer] epoch #160 | Saved +2025-04-02 22:15:29 | [rl2_trainer] epoch #160 | Time 30209.94 s +2025-04-02 22:15:29 | [rl2_trainer] epoch #160 | EpochTime 155.53 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.5506 +Average/AverageReturn -23.8845 +Average/Iteration 160 +Average/MaxReturn -1.69025 +Average/MinReturn -37.5854 +Average/NumEpisodes 100 +Average/StdReturn 5.03243 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.594508 +TotalEnvSteps 1.61e+06 +__unnamed_task__/AverageDiscountedReturn -15.5506 +__unnamed_task__/AverageReturn -23.8845 +__unnamed_task__/Iteration 160 +__unnamed_task__/MaxReturn -1.69025 +__unnamed_task__/MinReturn -37.5854 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.03243 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.60459 +policy/KL 0.0118321 +policy/KLBefore 0 +policy/LossAfter -0.00497221 +policy/LossBefore 0.00380841 +policy/dLoss 0.00878062 +---------------------------------------- ------------ +2025-04-02 22:18:22 | [rl2_trainer] epoch #161 | Optimizing policy... +2025-04-02 22:18:22 | [rl2_trainer] epoch #161 | Fitting baseline... +2025-04-02 22:18:22 | [rl2_trainer] epoch #161 | Computing loss before +2025-04-02 22:18:22 | [rl2_trainer] epoch #161 | Computing KL before +2025-04-02 22:18:23 | [rl2_trainer] epoch #161 | Optimizing +2025-04-02 22:18:57 | [rl2_trainer] epoch #161 | Computing KL after +2025-04-02 22:18:57 | [rl2_trainer] epoch #161 | Computing loss after +2025-04-02 22:18:58 | [rl2_trainer] epoch #161 | Saving snapshot... +2025-04-02 22:18:58 | [rl2_trainer] epoch #161 | Saved +2025-04-02 22:18:58 | [rl2_trainer] epoch #161 | Time 30418.94 s +2025-04-02 22:18:58 | [rl2_trainer] epoch #161 | EpochTime 209.00 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.0479 +Average/AverageReturn -21.5829 +Average/Iteration 161 +Average/MaxReturn -7.53723 +Average/MinReturn -33.192 +Average/NumEpisodes 100 +Average/StdReturn 4.91731 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.436371 +TotalEnvSteps 1.62e+06 +__unnamed_task__/AverageDiscountedReturn -14.0479 +__unnamed_task__/AverageReturn -21.5829 +__unnamed_task__/Iteration 161 +__unnamed_task__/MaxReturn -7.53723 +__unnamed_task__/MinReturn -33.192 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.91731 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.58894 +policy/KL 0.00751433 +policy/KLBefore 0 +policy/LossAfter -0.0133743 +policy/LossBefore 0.00159324 +policy/dLoss 0.0149676 +---------------------------------------- ------------ +2025-04-02 22:21:51 | [rl2_trainer] epoch #162 | Optimizing policy... +2025-04-02 22:21:51 | [rl2_trainer] epoch #162 | Fitting baseline... +2025-04-02 22:21:51 | [rl2_trainer] epoch #162 | Computing loss before +2025-04-02 22:21:52 | [rl2_trainer] epoch #162 | Computing KL before +2025-04-02 22:21:52 | [rl2_trainer] epoch #162 | Optimizing +2025-04-02 22:22:27 | [rl2_trainer] epoch #162 | Computing KL after +2025-04-02 22:22:28 | [rl2_trainer] epoch #162 | Computing loss after +2025-04-02 22:22:29 | [rl2_trainer] epoch #162 | Saving snapshot... +2025-04-02 22:22:29 | [rl2_trainer] epoch #162 | Saved +2025-04-02 22:22:29 | [rl2_trainer] epoch #162 | Time 30629.98 s +2025-04-02 22:22:29 | [rl2_trainer] epoch #162 | EpochTime 211.03 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.6125 +Average/AverageReturn -22.4611 +Average/Iteration 162 +Average/MaxReturn 2.73002 +Average/MinReturn -34.6647 +Average/NumEpisodes 100 +Average/StdReturn 5.47083 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.419506 +TotalEnvSteps 1.63e+06 +__unnamed_task__/AverageDiscountedReturn -14.6125 +__unnamed_task__/AverageReturn -22.4611 +__unnamed_task__/Iteration 162 +__unnamed_task__/MaxReturn 2.73002 +__unnamed_task__/MinReturn -34.6647 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.47083 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.55758 +policy/KL 0.0139515 +policy/KLBefore 0 +policy/LossAfter -0.0136862 +policy/LossBefore -0.00124723 +policy/dLoss 0.012439 +---------------------------------------- ------------ +2025-04-02 22:24:42 | [rl2_trainer] epoch #163 | Optimizing policy... +2025-04-02 22:24:43 | [rl2_trainer] epoch #163 | Fitting baseline... +2025-04-02 22:24:43 | [rl2_trainer] epoch #163 | Computing loss before +2025-04-02 22:24:43 | [rl2_trainer] epoch #163 | Computing KL before +2025-04-02 22:24:44 | [rl2_trainer] epoch #163 | Optimizing +2025-04-02 22:25:17 | [rl2_trainer] epoch #163 | Computing KL after +2025-04-02 22:25:18 | [rl2_trainer] epoch #163 | Computing loss after +2025-04-02 22:25:19 | [rl2_trainer] epoch #163 | Saving snapshot... +2025-04-02 22:25:19 | [rl2_trainer] epoch #163 | Saved +2025-04-02 22:25:19 | [rl2_trainer] epoch #163 | Time 30799.87 s +2025-04-02 22:25:19 | [rl2_trainer] epoch #163 | EpochTime 169.89 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -18.0282 +Average/AverageReturn -28.9846 +Average/Iteration 163 +Average/MaxReturn -11.5831 +Average/MinReturn -63.8741 +Average/NumEpisodes 100 +Average/StdReturn 9.89643 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.675171 +TotalEnvSteps 1.64e+06 +__unnamed_task__/AverageDiscountedReturn -18.0282 +__unnamed_task__/AverageReturn -28.9846 +__unnamed_task__/Iteration 163 +__unnamed_task__/MaxReturn -11.5831 +__unnamed_task__/MinReturn -63.8741 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.89643 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.52149 +policy/KL 0.0117727 +policy/KLBefore 0 +policy/LossAfter -0.0428413 +policy/LossBefore -0.0117901 +policy/dLoss 0.0310512 +---------------------------------------- ----------- +2025-04-02 22:26:46 | [rl2_trainer] epoch #164 | Optimizing policy... +2025-04-02 22:26:46 | [rl2_trainer] epoch #164 | Fitting baseline... +2025-04-02 22:26:46 | [rl2_trainer] epoch #164 | Computing loss before +2025-04-02 22:26:47 | [rl2_trainer] epoch #164 | Computing KL before +2025-04-02 22:26:48 | [rl2_trainer] epoch #164 | Optimizing +2025-04-02 22:27:23 | [rl2_trainer] epoch #164 | Computing KL after +2025-04-02 22:27:24 | [rl2_trainer] epoch #164 | Computing loss after +2025-04-02 22:27:25 | [rl2_trainer] epoch #164 | Saving snapshot... +2025-04-02 22:27:25 | [rl2_trainer] epoch #164 | Saved +2025-04-02 22:27:25 | [rl2_trainer] epoch #164 | Time 30925.89 s +2025-04-02 22:27:25 | [rl2_trainer] epoch #164 | EpochTime 126.01 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.5191 +Average/AverageReturn -20.9683 +Average/Iteration 164 +Average/MaxReturn 7.65859 +Average/MinReturn -33.7306 +Average/NumEpisodes 100 +Average/StdReturn 6.91047 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.39918 +TotalEnvSteps 1.65e+06 +__unnamed_task__/AverageDiscountedReturn -13.5191 +__unnamed_task__/AverageReturn -20.9683 +__unnamed_task__/Iteration 164 +__unnamed_task__/MaxReturn 7.65859 +__unnamed_task__/MinReturn -33.7306 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.91047 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.49749 +policy/KL 0.01151 +policy/KLBefore 0 +policy/LossAfter -0.0358266 +policy/LossBefore -0.0106014 +policy/dLoss 0.0252252 +---------------------------------------- ----------- +2025-04-02 22:30:24 | [rl2_trainer] epoch #165 | Optimizing policy... +2025-04-02 22:30:24 | [rl2_trainer] epoch #165 | Fitting baseline... +2025-04-02 22:30:24 | [rl2_trainer] epoch #165 | Computing loss before +2025-04-02 22:30:25 | [rl2_trainer] epoch #165 | Computing KL before +2025-04-02 22:30:25 | [rl2_trainer] epoch #165 | Optimizing +2025-04-02 22:31:00 | [rl2_trainer] epoch #165 | Computing KL after +2025-04-02 22:31:01 | [rl2_trainer] epoch #165 | Computing loss after +2025-04-02 22:31:02 | [rl2_trainer] epoch #165 | Saving snapshot... +2025-04-02 22:31:02 | [rl2_trainer] epoch #165 | Saved +2025-04-02 22:31:02 | [rl2_trainer] epoch #165 | Time 31142.95 s +2025-04-02 22:31:02 | [rl2_trainer] epoch #165 | EpochTime 217.06 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.5052 +Average/AverageReturn -20.5928 +Average/Iteration 165 +Average/MaxReturn -6.97224 +Average/MinReturn -33.522 +Average/NumEpisodes 100 +Average/StdReturn 4.9553 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.428387 +TotalEnvSteps 1.66e+06 +__unnamed_task__/AverageDiscountedReturn -13.5052 +__unnamed_task__/AverageReturn -20.5928 +__unnamed_task__/Iteration 165 +__unnamed_task__/MaxReturn -6.97224 +__unnamed_task__/MinReturn -33.522 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.9553 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.47757 +policy/KL 0.0111303 +policy/KLBefore 0 +policy/LossAfter -0.0129599 +policy/LossBefore -0.00217221 +policy/dLoss 0.0107877 +---------------------------------------- ------------ +2025-04-02 22:33:29 | [rl2_trainer] epoch #166 | Optimizing policy... +2025-04-02 22:33:29 | [rl2_trainer] epoch #166 | Fitting baseline... +2025-04-02 22:33:29 | [rl2_trainer] epoch #166 | Computing loss before +2025-04-02 22:33:30 | [rl2_trainer] epoch #166 | Computing KL before +2025-04-02 22:33:30 | [rl2_trainer] epoch #166 | Optimizing +2025-04-02 22:34:06 | [rl2_trainer] epoch #166 | Computing KL after +2025-04-02 22:34:07 | [rl2_trainer] epoch #166 | Computing loss after +2025-04-02 22:34:08 | [rl2_trainer] epoch #166 | Saving snapshot... +2025-04-02 22:34:08 | [rl2_trainer] epoch #166 | Saved +2025-04-02 22:34:08 | [rl2_trainer] epoch #166 | Time 31328.55 s +2025-04-02 22:34:08 | [rl2_trainer] epoch #166 | EpochTime 185.60 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.7994 +Average/AverageReturn -18.2339 +Average/Iteration 166 +Average/MaxReturn 9.94788 +Average/MinReturn -28.4003 +Average/NumEpisodes 100 +Average/StdReturn 4.81984 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.277707 +TotalEnvSteps 1.67e+06 +__unnamed_task__/AverageDiscountedReturn -11.7994 +__unnamed_task__/AverageReturn -18.2339 +__unnamed_task__/Iteration 166 +__unnamed_task__/MaxReturn 9.94788 +__unnamed_task__/MinReturn -28.4003 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.81984 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.43897 +policy/KL 0.00860806 +policy/KLBefore 0 +policy/LossAfter 0.000970511 +policy/LossBefore 0.00734663 +policy/dLoss 0.00637612 +---------------------------------------- ------------- +2025-04-02 22:37:15 | [rl2_trainer] epoch #167 | Optimizing policy... +2025-04-02 22:37:16 | [rl2_trainer] epoch #167 | Fitting baseline... +2025-04-02 22:37:16 | [rl2_trainer] epoch #167 | Computing loss before +2025-04-02 22:37:16 | [rl2_trainer] epoch #167 | Computing KL before +2025-04-02 22:37:17 | [rl2_trainer] epoch #167 | Optimizing +2025-04-02 22:37:50 | [rl2_trainer] epoch #167 | Computing KL after +2025-04-02 22:37:51 | [rl2_trainer] epoch #167 | Computing loss after +2025-04-02 22:37:51 | [rl2_trainer] epoch #167 | Saving snapshot... +2025-04-02 22:37:51 | [rl2_trainer] epoch #167 | Saved +2025-04-02 22:37:51 | [rl2_trainer] epoch #167 | Time 31552.44 s +2025-04-02 22:37:51 | [rl2_trainer] epoch #167 | EpochTime 223.89 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.0029 +Average/AverageReturn -24.1136 +Average/Iteration 167 +Average/MaxReturn 2.93674 +Average/MinReturn -60.2654 +Average/NumEpisodes 100 +Average/StdReturn 12.0042 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.696941 +TotalEnvSteps 1.68e+06 +__unnamed_task__/AverageDiscountedReturn -15.0029 +__unnamed_task__/AverageReturn -24.1136 +__unnamed_task__/Iteration 167 +__unnamed_task__/MaxReturn 2.93674 +__unnamed_task__/MinReturn -60.2654 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.0042 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.3966 +policy/KL 0.0122142 +policy/KLBefore 0 +policy/LossAfter -0.0442119 +policy/LossBefore 0.00259683 +policy/dLoss 0.0468087 +---------------------------------------- ------------ +2025-04-02 22:42:18 | [rl2_trainer] epoch #168 | Optimizing policy... +2025-04-02 22:42:19 | [rl2_trainer] epoch #168 | Fitting baseline... +2025-04-02 22:42:19 | [rl2_trainer] epoch #168 | Computing loss before +2025-04-02 22:42:19 | [rl2_trainer] epoch #168 | Computing KL before +2025-04-02 22:42:20 | [rl2_trainer] epoch #168 | Optimizing +2025-04-02 22:42:54 | [rl2_trainer] epoch #168 | Computing KL after +2025-04-02 22:42:55 | [rl2_trainer] epoch #168 | Computing loss after +2025-04-02 22:42:55 | [rl2_trainer] epoch #168 | Saving snapshot... +2025-04-02 22:42:55 | [rl2_trainer] epoch #168 | Saved +2025-04-02 22:42:55 | [rl2_trainer] epoch #168 | Time 31856.36 s +2025-04-02 22:42:55 | [rl2_trainer] epoch #168 | EpochTime 303.91 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.4811 +Average/AverageReturn -23.4813 +Average/Iteration 168 +Average/MaxReturn 5.7549 +Average/MinReturn -54.4215 +Average/NumEpisodes 100 +Average/StdReturn 9.56864 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.691004 +TotalEnvSteps 1.69e+06 +__unnamed_task__/AverageDiscountedReturn -14.4811 +__unnamed_task__/AverageReturn -23.4813 +__unnamed_task__/Iteration 168 +__unnamed_task__/MaxReturn 5.7549 +__unnamed_task__/MinReturn -54.4215 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.56864 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.36114 +policy/KL 0.0147712 +policy/KLBefore 0 +policy/LossAfter -0.04358 +policy/LossBefore -0.0148212 +policy/dLoss 0.0287588 +---------------------------------------- ----------- +2025-04-02 22:45:01 | [rl2_trainer] epoch #169 | Optimizing policy... +2025-04-02 22:45:01 | [rl2_trainer] epoch #169 | Fitting baseline... +2025-04-02 22:45:01 | [rl2_trainer] epoch #169 | Computing loss before +2025-04-02 22:45:02 | [rl2_trainer] epoch #169 | Computing KL before +2025-04-02 22:45:02 | [rl2_trainer] epoch #169 | Optimizing +2025-04-02 22:45:36 | [rl2_trainer] epoch #169 | Computing KL after +2025-04-02 22:45:36 | [rl2_trainer] epoch #169 | Computing loss after +2025-04-02 22:45:37 | [rl2_trainer] epoch #169 | Saving snapshot... +2025-04-02 22:45:37 | [rl2_trainer] epoch #169 | Saved +2025-04-02 22:45:37 | [rl2_trainer] epoch #169 | Time 32018.20 s +2025-04-02 22:45:37 | [rl2_trainer] epoch #169 | EpochTime 161.84 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.3033 +Average/AverageReturn -19.832 +Average/Iteration 169 +Average/MaxReturn -0.871448 +Average/MinReturn -37.9352 +Average/NumEpisodes 100 +Average/StdReturn 6.43924 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.683627 +TotalEnvSteps 1.7e+06 +__unnamed_task__/AverageDiscountedReturn -13.3033 +__unnamed_task__/AverageReturn -19.832 +__unnamed_task__/Iteration 169 +__unnamed_task__/MaxReturn -0.871448 +__unnamed_task__/MinReturn -37.9352 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.43924 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.33128 +policy/KL 0.00882052 +policy/KLBefore 0 +policy/LossAfter -0.00566459 +policy/LossBefore 0.00060695 +policy/dLoss 0.00627154 +---------------------------------------- ------------ +2025-04-02 22:47:49 | [rl2_trainer] epoch #170 | Optimizing policy... +2025-04-02 22:47:50 | [rl2_trainer] epoch #170 | Fitting baseline... +2025-04-02 22:47:50 | [rl2_trainer] epoch #170 | Computing loss before +2025-04-02 22:47:50 | [rl2_trainer] epoch #170 | Computing KL before +2025-04-02 22:47:51 | [rl2_trainer] epoch #170 | Optimizing +2025-04-02 22:48:25 | [rl2_trainer] epoch #170 | Computing KL after +2025-04-02 22:48:26 | [rl2_trainer] epoch #170 | Computing loss after +2025-04-02 22:48:27 | [rl2_trainer] epoch #170 | Saving snapshot... +2025-04-02 22:48:27 | [rl2_trainer] epoch #170 | Saved +2025-04-02 22:48:27 | [rl2_trainer] epoch #170 | Time 32187.90 s +2025-04-02 22:48:27 | [rl2_trainer] epoch #170 | EpochTime 169.70 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.1175 +Average/AverageReturn -21.8678 +Average/Iteration 170 +Average/MaxReturn -11.1493 +Average/MinReturn -36.0558 +Average/NumEpisodes 100 +Average/StdReturn 5.12405 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.6588 +TotalEnvSteps 1.71e+06 +__unnamed_task__/AverageDiscountedReturn -14.1175 +__unnamed_task__/AverageReturn -21.8678 +__unnamed_task__/Iteration 170 +__unnamed_task__/MaxReturn -11.1493 +__unnamed_task__/MinReturn -36.0558 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.12405 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.3186 +policy/KL 0.010206 +policy/KLBefore 0 +policy/LossAfter -0.00701617 +policy/LossBefore 0.00433613 +policy/dLoss 0.0113523 +---------------------------------------- ------------ +2025-04-02 22:50:55 | [rl2_trainer] epoch #171 | Optimizing policy... +2025-04-02 22:50:55 | [rl2_trainer] epoch #171 | Fitting baseline... +2025-04-02 22:50:55 | [rl2_trainer] epoch #171 | Computing loss before +2025-04-02 22:50:56 | [rl2_trainer] epoch #171 | Computing KL before +2025-04-02 22:50:56 | [rl2_trainer] epoch #171 | Optimizing +2025-04-02 22:51:32 | [rl2_trainer] epoch #171 | Computing KL after +2025-04-02 22:51:33 | [rl2_trainer] epoch #171 | Computing loss after +2025-04-02 22:51:34 | [rl2_trainer] epoch #171 | Saving snapshot... +2025-04-02 22:51:34 | [rl2_trainer] epoch #171 | Saved +2025-04-02 22:51:34 | [rl2_trainer] epoch #171 | Time 32374.66 s +2025-04-02 22:51:34 | [rl2_trainer] epoch #171 | EpochTime 186.76 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.1593 +Average/AverageReturn -21.9429 +Average/Iteration 171 +Average/MaxReturn -3.51255 +Average/MinReturn -34.1653 +Average/NumEpisodes 100 +Average/StdReturn 5.73461 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.614972 +TotalEnvSteps 1.72e+06 +__unnamed_task__/AverageDiscountedReturn -14.1593 +__unnamed_task__/AverageReturn -21.9429 +__unnamed_task__/Iteration 171 +__unnamed_task__/MaxReturn -3.51255 +__unnamed_task__/MinReturn -34.1653 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.73461 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.31385 +policy/KL 0.0122985 +policy/KLBefore 0 +policy/LossAfter -0.0087124 +policy/LossBefore 0.00672863 +policy/dLoss 0.015441 +---------------------------------------- ------------ +2025-04-02 22:54:41 | [rl2_trainer] epoch #172 | Optimizing policy... +2025-04-02 22:54:41 | [rl2_trainer] epoch #172 | Fitting baseline... +2025-04-02 22:54:41 | [rl2_trainer] epoch #172 | Computing loss before +2025-04-02 22:54:42 | [rl2_trainer] epoch #172 | Computing KL before +2025-04-02 22:54:42 | [rl2_trainer] epoch #172 | Optimizing +2025-04-02 22:55:17 | [rl2_trainer] epoch #172 | Computing KL after +2025-04-02 22:55:17 | [rl2_trainer] epoch #172 | Computing loss after +2025-04-02 22:55:18 | [rl2_trainer] epoch #172 | Saving snapshot... +2025-04-02 22:55:18 | [rl2_trainer] epoch #172 | Saved +2025-04-02 22:55:18 | [rl2_trainer] epoch #172 | Time 32599.41 s +2025-04-02 22:55:18 | [rl2_trainer] epoch #172 | EpochTime 224.74 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.4053 +Average/AverageReturn -21.4323 +Average/Iteration 172 +Average/MaxReturn 6.76933 +Average/MinReturn -57.4492 +Average/NumEpisodes 100 +Average/StdReturn 9.55426 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.611498 +TotalEnvSteps 1.73e+06 +__unnamed_task__/AverageDiscountedReturn -13.4053 +__unnamed_task__/AverageReturn -21.4323 +__unnamed_task__/Iteration 172 +__unnamed_task__/MaxReturn 6.76933 +__unnamed_task__/MinReturn -57.4492 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.55426 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.31895 +policy/KL 0.0125884 +policy/KLBefore 0 +policy/LossAfter -0.0353606 +policy/LossBefore 0.00585131 +policy/dLoss 0.0412119 +---------------------------------------- ------------ +2025-04-02 22:58:08 | [rl2_trainer] epoch #173 | Optimizing policy... +2025-04-02 22:58:09 | [rl2_trainer] epoch #173 | Fitting baseline... +2025-04-02 22:58:09 | [rl2_trainer] epoch #173 | Computing loss before +2025-04-02 22:58:09 | [rl2_trainer] epoch #173 | Computing KL before +2025-04-02 22:58:10 | [rl2_trainer] epoch #173 | Optimizing +2025-04-02 22:58:44 | [rl2_trainer] epoch #173 | Computing KL after +2025-04-02 22:58:45 | [rl2_trainer] epoch #173 | Computing loss after +2025-04-02 22:58:46 | [rl2_trainer] epoch #173 | Saving snapshot... +2025-04-02 22:58:46 | [rl2_trainer] epoch #173 | Saved +2025-04-02 22:58:46 | [rl2_trainer] epoch #173 | Time 32806.88 s +2025-04-02 22:58:46 | [rl2_trainer] epoch #173 | EpochTime 207.48 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.386 +Average/AverageReturn -22.9112 +Average/Iteration 173 +Average/MaxReturn 4.47805 +Average/MinReturn -54.279 +Average/NumEpisodes 100 +Average/StdReturn 8.67069 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.780945 +TotalEnvSteps 1.74e+06 +__unnamed_task__/AverageDiscountedReturn -14.386 +__unnamed_task__/AverageReturn -22.9112 +__unnamed_task__/Iteration 173 +__unnamed_task__/MaxReturn 4.47805 +__unnamed_task__/MinReturn -54.279 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.67069 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.31478 +policy/KL 0.0112802 +policy/KLBefore 0 +policy/LossAfter -0.0206334 +policy/LossBefore 0.00705699 +policy/dLoss 0.0276904 +---------------------------------------- ------------ +2025-04-02 23:01:28 | [rl2_trainer] epoch #174 | Optimizing policy... +2025-04-02 23:01:28 | [rl2_trainer] epoch #174 | Fitting baseline... +2025-04-02 23:01:28 | [rl2_trainer] epoch #174 | Computing loss before +2025-04-02 23:01:29 | [rl2_trainer] epoch #174 | Computing KL before +2025-04-02 23:01:30 | [rl2_trainer] epoch #174 | Optimizing +2025-04-02 23:02:03 | [rl2_trainer] epoch #174 | Computing KL after +2025-04-02 23:02:04 | [rl2_trainer] epoch #174 | Computing loss after +2025-04-02 23:02:05 | [rl2_trainer] epoch #174 | Saving snapshot... +2025-04-02 23:02:05 | [rl2_trainer] epoch #174 | Saved +2025-04-02 23:02:05 | [rl2_trainer] epoch #174 | Time 33005.60 s +2025-04-02 23:02:05 | [rl2_trainer] epoch #174 | EpochTime 198.71 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.5257 +Average/AverageReturn -27.7603 +Average/Iteration 174 +Average/MaxReturn -10.0792 +Average/MinReturn -50.3924 +Average/NumEpisodes 100 +Average/StdReturn 7.75478 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.508119 +TotalEnvSteps 1.75e+06 +__unnamed_task__/AverageDiscountedReturn -17.5257 +__unnamed_task__/AverageReturn -27.7603 +__unnamed_task__/Iteration 174 +__unnamed_task__/MaxReturn -10.0792 +__unnamed_task__/MinReturn -50.3924 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.75478 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.31215 +policy/KL 0.0111257 +policy/KLBefore 0 +policy/LossAfter -0.0249044 +policy/LossBefore 0.00654143 +policy/dLoss 0.0314458 +---------------------------------------- ------------ +2025-04-02 23:04:00 | [rl2_trainer] epoch #175 | Optimizing policy... +2025-04-02 23:04:00 | [rl2_trainer] epoch #175 | Fitting baseline... +2025-04-02 23:04:00 | [rl2_trainer] epoch #175 | Computing loss before +2025-04-02 23:04:01 | [rl2_trainer] epoch #175 | Computing KL before +2025-04-02 23:04:01 | [rl2_trainer] epoch #175 | Optimizing +2025-04-02 23:04:36 | [rl2_trainer] epoch #175 | Computing KL after +2025-04-02 23:04:37 | [rl2_trainer] epoch #175 | Computing loss after +2025-04-02 23:04:38 | [rl2_trainer] epoch #175 | Saving snapshot... +2025-04-02 23:04:38 | [rl2_trainer] epoch #175 | Saved +2025-04-02 23:04:38 | [rl2_trainer] epoch #175 | Time 33158.54 s +2025-04-02 23:04:38 | [rl2_trainer] epoch #175 | EpochTime 152.94 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.5801 +Average/AverageReturn -24.012 +Average/Iteration 175 +Average/MaxReturn -1.65193 +Average/MinReturn -34.9112 +Average/NumEpisodes 100 +Average/StdReturn 5.22193 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.547156 +TotalEnvSteps 1.76e+06 +__unnamed_task__/AverageDiscountedReturn -15.5801 +__unnamed_task__/AverageReturn -24.012 +__unnamed_task__/Iteration 175 +__unnamed_task__/MaxReturn -1.65193 +__unnamed_task__/MinReturn -34.9112 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.22193 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.30849 +policy/KL 0.00814855 +policy/KLBefore 0 +policy/LossAfter -0.0196307 +policy/LossBefore -0.00554777 +policy/dLoss 0.014083 +---------------------------------------- ------------ +2025-04-02 23:08:33 | [rl2_trainer] epoch #176 | Optimizing policy... +2025-04-02 23:08:33 | [rl2_trainer] epoch #176 | Fitting baseline... +2025-04-02 23:08:33 | [rl2_trainer] epoch #176 | Computing loss before +2025-04-02 23:08:34 | [rl2_trainer] epoch #176 | Computing KL before +2025-04-02 23:08:34 | [rl2_trainer] epoch #176 | Optimizing +2025-04-02 23:09:10 | [rl2_trainer] epoch #176 | Computing KL after +2025-04-02 23:09:11 | [rl2_trainer] epoch #176 | Computing loss after +2025-04-02 23:09:11 | [rl2_trainer] epoch #176 | Saving snapshot... +2025-04-02 23:09:11 | [rl2_trainer] epoch #176 | Saved +2025-04-02 23:09:11 | [rl2_trainer] epoch #176 | Time 33432.51 s +2025-04-02 23:09:11 | [rl2_trainer] epoch #176 | EpochTime 273.97 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3263 +Average/AverageReturn -24.4288 +Average/Iteration 176 +Average/MaxReturn 0.956715 +Average/MinReturn -65.3274 +Average/NumEpisodes 100 +Average/StdReturn 10.6376 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.659967 +TotalEnvSteps 1.77e+06 +__unnamed_task__/AverageDiscountedReturn -15.3263 +__unnamed_task__/AverageReturn -24.4288 +__unnamed_task__/Iteration 176 +__unnamed_task__/MaxReturn 0.956715 +__unnamed_task__/MinReturn -65.3274 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.6376 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.28513 +policy/KL 0.0124446 +policy/KLBefore 0 +policy/LossAfter -0.0496878 +policy/LossBefore -0.00344041 +policy/dLoss 0.0462474 +---------------------------------------- ------------ +2025-04-02 23:11:10 | [rl2_trainer] epoch #177 | Optimizing policy... +2025-04-02 23:11:10 | [rl2_trainer] epoch #177 | Fitting baseline... +2025-04-02 23:11:10 | [rl2_trainer] epoch #177 | Computing loss before +2025-04-02 23:11:11 | [rl2_trainer] epoch #177 | Computing KL before +2025-04-02 23:11:12 | [rl2_trainer] epoch #177 | Optimizing +2025-04-02 23:11:46 | [rl2_trainer] epoch #177 | Computing KL after +2025-04-02 23:11:46 | [rl2_trainer] epoch #177 | Computing loss after +2025-04-02 23:11:47 | [rl2_trainer] epoch #177 | Saving snapshot... +2025-04-02 23:11:47 | [rl2_trainer] epoch #177 | Saved +2025-04-02 23:11:47 | [rl2_trainer] epoch #177 | Time 33588.31 s +2025-04-02 23:11:47 | [rl2_trainer] epoch #177 | EpochTime 155.80 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.085 +Average/AverageReturn -24.9529 +Average/Iteration 177 +Average/MaxReturn -3.58136 +Average/MinReturn -41.659 +Average/NumEpisodes 100 +Average/StdReturn 6.2497 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.665008 +TotalEnvSteps 1.78e+06 +__unnamed_task__/AverageDiscountedReturn -16.085 +__unnamed_task__/AverageReturn -24.9529 +__unnamed_task__/Iteration 177 +__unnamed_task__/MaxReturn -3.58136 +__unnamed_task__/MinReturn -41.659 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.2497 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.25784 +policy/KL 0.00866885 +policy/KLBefore 0 +policy/LossAfter -0.0160555 +policy/LossBefore -0.00436556 +policy/dLoss 0.0116899 +---------------------------------------- ------------ +2025-04-02 23:14:23 | [rl2_trainer] epoch #178 | Optimizing policy... +2025-04-02 23:14:23 | [rl2_trainer] epoch #178 | Fitting baseline... +2025-04-02 23:14:23 | [rl2_trainer] epoch #178 | Computing loss before +2025-04-02 23:14:24 | [rl2_trainer] epoch #178 | Computing KL before +2025-04-02 23:14:25 | [rl2_trainer] epoch #178 | Optimizing +2025-04-02 23:14:58 | [rl2_trainer] epoch #178 | Computing KL after +2025-04-02 23:14:58 | [rl2_trainer] epoch #178 | Computing loss after +2025-04-02 23:14:59 | [rl2_trainer] epoch #178 | Saving snapshot... +2025-04-02 23:14:59 | [rl2_trainer] epoch #178 | Saved +2025-04-02 23:14:59 | [rl2_trainer] epoch #178 | Time 33779.96 s +2025-04-02 23:14:59 | [rl2_trainer] epoch #178 | EpochTime 191.65 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.2693 +Average/AverageReturn -24.0137 +Average/Iteration 178 +Average/MaxReturn 7.4379 +Average/MinReturn -62.3541 +Average/NumEpisodes 100 +Average/StdReturn 10.2149 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.42905 +TotalEnvSteps 1.79e+06 +__unnamed_task__/AverageDiscountedReturn -15.2693 +__unnamed_task__/AverageReturn -24.0137 +__unnamed_task__/Iteration 178 +__unnamed_task__/MaxReturn 7.4379 +__unnamed_task__/MinReturn -62.3541 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.2149 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.23638 +policy/KL 0.011699 +policy/KLBefore 0 +policy/LossAfter -0.0479517 +policy/LossBefore -0.00481606 +policy/dLoss 0.0431356 +---------------------------------------- ------------ +2025-04-02 23:18:52 | [rl2_trainer] epoch #179 | Optimizing policy... +2025-04-02 23:18:52 | [rl2_trainer] epoch #179 | Fitting baseline... +2025-04-02 23:18:52 | [rl2_trainer] epoch #179 | Computing loss before +2025-04-02 23:18:53 | [rl2_trainer] epoch #179 | Computing KL before +2025-04-02 23:18:53 | [rl2_trainer] epoch #179 | Optimizing +2025-04-02 23:19:28 | [rl2_trainer] epoch #179 | Computing KL after +2025-04-02 23:19:29 | [rl2_trainer] epoch #179 | Computing loss after +2025-04-02 23:19:29 | [rl2_trainer] epoch #179 | Saving snapshot... +2025-04-02 23:19:29 | [rl2_trainer] epoch #179 | Saved +2025-04-02 23:19:29 | [rl2_trainer] epoch #179 | Time 34050.45 s +2025-04-02 23:19:29 | [rl2_trainer] epoch #179 | EpochTime 270.49 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.4237 +Average/AverageReturn -22.7988 +Average/Iteration 179 +Average/MaxReturn 4.48665 +Average/MinReturn -55.5408 +Average/NumEpisodes 100 +Average/StdReturn 9.39343 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.699023 +TotalEnvSteps 1.8e+06 +__unnamed_task__/AverageDiscountedReturn -14.4237 +__unnamed_task__/AverageReturn -22.7988 +__unnamed_task__/Iteration 179 +__unnamed_task__/MaxReturn 4.48665 +__unnamed_task__/MinReturn -55.5408 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.39343 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.20806 +policy/KL 0.0139753 +policy/KLBefore 0 +policy/LossAfter -0.0339095 +policy/LossBefore -0.00737877 +policy/dLoss 0.0265308 +---------------------------------------- ------------ +2025-04-02 23:21:44 | [rl2_trainer] epoch #180 | Optimizing policy... +2025-04-02 23:21:45 | [rl2_trainer] epoch #180 | Fitting baseline... +2025-04-02 23:21:45 | [rl2_trainer] epoch #180 | Computing loss before +2025-04-02 23:21:45 | [rl2_trainer] epoch #180 | Computing KL before +2025-04-02 23:21:46 | [rl2_trainer] epoch #180 | Optimizing +2025-04-02 23:22:20 | [rl2_trainer] epoch #180 | Computing KL after +2025-04-02 23:22:20 | [rl2_trainer] epoch #180 | Computing loss after +2025-04-02 23:22:21 | [rl2_trainer] epoch #180 | Saving snapshot... +2025-04-02 23:22:21 | [rl2_trainer] epoch #180 | Saved +2025-04-02 23:22:21 | [rl2_trainer] epoch #180 | Time 34222.01 s +2025-04-02 23:22:21 | [rl2_trainer] epoch #180 | EpochTime 171.56 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3658 +Average/AverageReturn -24.2948 +Average/Iteration 180 +Average/MaxReturn -13.5995 +Average/MinReturn -48.8346 +Average/NumEpisodes 100 +Average/StdReturn 6.60185 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.450567 +TotalEnvSteps 1.81e+06 +__unnamed_task__/AverageDiscountedReturn -15.3658 +__unnamed_task__/AverageReturn -24.2948 +__unnamed_task__/Iteration 180 +__unnamed_task__/MaxReturn -13.5995 +__unnamed_task__/MinReturn -48.8346 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.60185 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.18794 +policy/KL 0.0121468 +policy/KLBefore 0 +policy/LossAfter -0.0261222 +policy/LossBefore -0.00850994 +policy/dLoss 0.0176123 +---------------------------------------- ------------ +2025-04-02 23:23:49 | [rl2_trainer] epoch #181 | Optimizing policy... +2025-04-02 23:23:49 | [rl2_trainer] epoch #181 | Fitting baseline... +2025-04-02 23:23:49 | [rl2_trainer] epoch #181 | Computing loss before +2025-04-02 23:23:50 | [rl2_trainer] epoch #181 | Computing KL before +2025-04-02 23:23:51 | [rl2_trainer] epoch #181 | Optimizing +2025-04-02 23:24:27 | [rl2_trainer] epoch #181 | Computing KL after +2025-04-02 23:24:28 | [rl2_trainer] epoch #181 | Computing loss after +2025-04-02 23:24:28 | [rl2_trainer] epoch #181 | Saving snapshot... +2025-04-02 23:24:28 | [rl2_trainer] epoch #181 | Saved +2025-04-02 23:24:28 | [rl2_trainer] epoch #181 | Time 34349.46 s +2025-04-02 23:24:28 | [rl2_trainer] epoch #181 | EpochTime 127.45 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.0399 +Average/AverageReturn -21.484 +Average/Iteration 181 +Average/MaxReturn -1.31227 +Average/MinReturn -34.9475 +Average/NumEpisodes 100 +Average/StdReturn 5.13854 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.552429 +TotalEnvSteps 1.82e+06 +__unnamed_task__/AverageDiscountedReturn -14.0399 +__unnamed_task__/AverageReturn -21.484 +__unnamed_task__/Iteration 181 +__unnamed_task__/MaxReturn -1.31227 +__unnamed_task__/MinReturn -34.9475 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.13854 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.18084 +policy/KL 0.00783199 +policy/KLBefore 0 +policy/LossAfter -0.0174227 +policy/LossBefore -0.00175756 +policy/dLoss 0.0156651 +---------------------------------------- ------------ +2025-04-02 23:26:11 | [rl2_trainer] epoch #182 | Optimizing policy... +2025-04-02 23:26:11 | [rl2_trainer] epoch #182 | Fitting baseline... +2025-04-02 23:26:11 | [rl2_trainer] epoch #182 | Computing loss before +2025-04-02 23:26:12 | [rl2_trainer] epoch #182 | Computing KL before +2025-04-02 23:26:13 | [rl2_trainer] epoch #182 | Optimizing +2025-04-02 23:26:49 | [rl2_trainer] epoch #182 | Computing KL after +2025-04-02 23:26:49 | [rl2_trainer] epoch #182 | Computing loss after +2025-04-02 23:26:50 | [rl2_trainer] epoch #182 | Saving snapshot... +2025-04-02 23:26:50 | [rl2_trainer] epoch #182 | Saved +2025-04-02 23:26:50 | [rl2_trainer] epoch #182 | Time 34491.50 s +2025-04-02 23:26:50 | [rl2_trainer] epoch #182 | EpochTime 142.04 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.8973 +Average/AverageReturn -22.9231 +Average/Iteration 182 +Average/MaxReturn -8.64624 +Average/MinReturn -84.6908 +Average/NumEpisodes 100 +Average/StdReturn 9.79335 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.549815 +TotalEnvSteps 1.83e+06 +__unnamed_task__/AverageDiscountedReturn -14.8973 +__unnamed_task__/AverageReturn -22.9231 +__unnamed_task__/Iteration 182 +__unnamed_task__/MaxReturn -8.64624 +__unnamed_task__/MinReturn -84.6908 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.79335 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.1701 +policy/KL 0.0123534 +policy/KLBefore 0 +policy/LossAfter -0.0368878 +policy/LossBefore 0.00155336 +policy/dLoss 0.0384411 +---------------------------------------- ------------ +2025-04-02 23:28:19 | [rl2_trainer] epoch #183 | Optimizing policy... +2025-04-02 23:28:19 | [rl2_trainer] epoch #183 | Fitting baseline... +2025-04-02 23:28:19 | [rl2_trainer] epoch #183 | Computing loss before +2025-04-02 23:28:20 | [rl2_trainer] epoch #183 | Computing KL before +2025-04-02 23:28:20 | [rl2_trainer] epoch #183 | Optimizing +2025-04-02 23:28:55 | [rl2_trainer] epoch #183 | Computing KL after +2025-04-02 23:28:56 | [rl2_trainer] epoch #183 | Computing loss after +2025-04-02 23:28:57 | [rl2_trainer] epoch #183 | Saving snapshot... +2025-04-02 23:28:57 | [rl2_trainer] epoch #183 | Saved +2025-04-02 23:28:57 | [rl2_trainer] epoch #183 | Time 34617.79 s +2025-04-02 23:28:57 | [rl2_trainer] epoch #183 | EpochTime 126.28 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.09 +Average/AverageReturn -21.503 +Average/Iteration 183 +Average/MaxReturn 5.03232 +Average/MinReturn -33.6511 +Average/NumEpisodes 100 +Average/StdReturn 4.71656 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.425724 +TotalEnvSteps 1.84e+06 +__unnamed_task__/AverageDiscountedReturn -14.09 +__unnamed_task__/AverageReturn -21.503 +__unnamed_task__/Iteration 183 +__unnamed_task__/MaxReturn 5.03232 +__unnamed_task__/MinReturn -33.6511 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.71656 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.14999 +policy/KL 0.012453 +policy/KLBefore 0 +policy/LossAfter -0.0121756 +policy/LossBefore -0.00350574 +policy/dLoss 0.00866988 +---------------------------------------- ------------ +2025-04-02 23:30:55 | [rl2_trainer] epoch #184 | Optimizing policy... +2025-04-02 23:30:55 | [rl2_trainer] epoch #184 | Fitting baseline... +2025-04-02 23:30:55 | [rl2_trainer] epoch #184 | Computing loss before +2025-04-02 23:30:56 | [rl2_trainer] epoch #184 | Computing KL before +2025-04-02 23:30:56 | [rl2_trainer] epoch #184 | Optimizing +2025-04-02 23:31:31 | [rl2_trainer] epoch #184 | Computing KL after +2025-04-02 23:31:31 | [rl2_trainer] epoch #184 | Computing loss after +2025-04-02 23:31:32 | [rl2_trainer] epoch #184 | Saving snapshot... +2025-04-02 23:31:32 | [rl2_trainer] epoch #184 | Saved +2025-04-02 23:31:32 | [rl2_trainer] epoch #184 | Time 34773.37 s +2025-04-02 23:31:32 | [rl2_trainer] epoch #184 | EpochTime 155.58 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.767 +Average/AverageReturn -24.2872 +Average/Iteration 184 +Average/MaxReturn 5.42652 +Average/MinReturn -42.2935 +Average/NumEpisodes 100 +Average/StdReturn 8.58123 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.575175 +TotalEnvSteps 1.85e+06 +__unnamed_task__/AverageDiscountedReturn -15.767 +__unnamed_task__/AverageReturn -24.2872 +__unnamed_task__/Iteration 184 +__unnamed_task__/MaxReturn 5.42652 +__unnamed_task__/MinReturn -42.2935 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.58123 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.1248 +policy/KL 0.0136821 +policy/KLBefore 0 +policy/LossAfter -0.0248326 +policy/LossBefore 0.000387531 +policy/dLoss 0.0252201 +---------------------------------------- ------------- +2025-04-02 23:33:34 | [rl2_trainer] epoch #185 | Optimizing policy... +2025-04-02 23:33:34 | [rl2_trainer] epoch #185 | Fitting baseline... +2025-04-02 23:33:34 | [rl2_trainer] epoch #185 | Computing loss before +2025-04-02 23:33:35 | [rl2_trainer] epoch #185 | Computing KL before +2025-04-02 23:33:35 | [rl2_trainer] epoch #185 | Optimizing +2025-04-02 23:34:10 | [rl2_trainer] epoch #185 | Computing KL after +2025-04-02 23:34:10 | [rl2_trainer] epoch #185 | Computing loss after +2025-04-02 23:34:11 | [rl2_trainer] epoch #185 | Saving snapshot... +2025-04-02 23:34:11 | [rl2_trainer] epoch #185 | Saved +2025-04-02 23:34:11 | [rl2_trainer] epoch #185 | Time 34932.28 s +2025-04-02 23:34:11 | [rl2_trainer] epoch #185 | EpochTime 158.91 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1033 +Average/AverageReturn -23.3696 +Average/Iteration 185 +Average/MaxReturn 1.3226 +Average/MinReturn -43.8365 +Average/NumEpisodes 100 +Average/StdReturn 6.02946 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.588772 +TotalEnvSteps 1.86e+06 +__unnamed_task__/AverageDiscountedReturn -15.1033 +__unnamed_task__/AverageReturn -23.3696 +__unnamed_task__/Iteration 185 +__unnamed_task__/MaxReturn 1.3226 +__unnamed_task__/MinReturn -43.8365 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.02946 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.11201 +policy/KL 0.0105881 +policy/KLBefore 0 +policy/LossAfter -0.0154911 +policy/LossBefore -0.00251462 +policy/dLoss 0.0129765 +---------------------------------------- ------------ +2025-04-02 23:37:53 | [rl2_trainer] epoch #186 | Optimizing policy... +2025-04-02 23:37:54 | [rl2_trainer] epoch #186 | Fitting baseline... +2025-04-02 23:37:54 | [rl2_trainer] epoch #186 | Computing loss before +2025-04-02 23:37:54 | [rl2_trainer] epoch #186 | Computing KL before +2025-04-02 23:37:55 | [rl2_trainer] epoch #186 | Optimizing +2025-04-02 23:38:29 | [rl2_trainer] epoch #186 | Computing KL after +2025-04-02 23:38:30 | [rl2_trainer] epoch #186 | Computing loss after +2025-04-02 23:38:31 | [rl2_trainer] epoch #186 | Saving snapshot... +2025-04-02 23:38:31 | [rl2_trainer] epoch #186 | Saved +2025-04-02 23:38:31 | [rl2_trainer] epoch #186 | Time 35191.80 s +2025-04-02 23:38:31 | [rl2_trainer] epoch #186 | EpochTime 259.52 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -16.9703 +Average/AverageReturn -26.9363 +Average/Iteration 186 +Average/MaxReturn 5.41528 +Average/MinReturn -54.5057 +Average/NumEpisodes 100 +Average/StdReturn 11.9312 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.752923 +TotalEnvSteps 1.87e+06 +__unnamed_task__/AverageDiscountedReturn -16.9703 +__unnamed_task__/AverageReturn -26.9363 +__unnamed_task__/Iteration 186 +__unnamed_task__/MaxReturn 5.41528 +__unnamed_task__/MinReturn -54.5057 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.9312 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.11437 +policy/KL 0.0100779 +policy/KLBefore 0 +policy/LossAfter -0.0692158 +policy/LossBefore -0.0218691 +policy/dLoss 0.0473467 +---------------------------------------- ----------- +2025-04-02 23:39:59 | [rl2_trainer] epoch #187 | Optimizing policy... +2025-04-02 23:39:59 | [rl2_trainer] epoch #187 | Fitting baseline... +2025-04-02 23:39:59 | [rl2_trainer] epoch #187 | Computing loss before +2025-04-02 23:39:59 | [rl2_trainer] epoch #187 | Computing KL before +2025-04-02 23:40:00 | [rl2_trainer] epoch #187 | Optimizing +2025-04-02 23:40:35 | [rl2_trainer] epoch #187 | Computing KL after +2025-04-02 23:40:36 | [rl2_trainer] epoch #187 | Computing loss after +2025-04-02 23:40:37 | [rl2_trainer] epoch #187 | Saving snapshot... +2025-04-02 23:40:37 | [rl2_trainer] epoch #187 | Saved +2025-04-02 23:40:37 | [rl2_trainer] epoch #187 | Time 35317.84 s +2025-04-02 23:40:37 | [rl2_trainer] epoch #187 | EpochTime 126.04 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -14.5179 +Average/AverageReturn -22.1007 +Average/Iteration 187 +Average/MaxReturn 9.1671 +Average/MinReturn -120.341 +Average/NumEpisodes 100 +Average/StdReturn 12.1363 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.503894 +TotalEnvSteps 1.88e+06 +__unnamed_task__/AverageDiscountedReturn -14.5179 +__unnamed_task__/AverageReturn -22.1007 +__unnamed_task__/Iteration 187 +__unnamed_task__/MaxReturn 9.1671 +__unnamed_task__/MinReturn -120.341 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.1363 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.09277 +policy/KL 0.0126543 +policy/KLBefore 0 +policy/LossAfter -0.0388835 +policy/LossBefore 0.00218372 +policy/dLoss 0.0410673 +---------------------------------------- ------------- +2025-04-02 23:44:28 | [rl2_trainer] epoch #188 | Optimizing policy... +2025-04-02 23:44:29 | [rl2_trainer] epoch #188 | Fitting baseline... +2025-04-02 23:44:29 | [rl2_trainer] epoch #188 | Computing loss before +2025-04-02 23:44:29 | [rl2_trainer] epoch #188 | Computing KL before +2025-04-02 23:44:30 | [rl2_trainer] epoch #188 | Optimizing +2025-04-02 23:45:03 | [rl2_trainer] epoch #188 | Computing KL after +2025-04-02 23:45:04 | [rl2_trainer] epoch #188 | Computing loss after +2025-04-02 23:45:05 | [rl2_trainer] epoch #188 | Saving snapshot... +2025-04-02 23:45:05 | [rl2_trainer] epoch #188 | Saved +2025-04-02 23:45:05 | [rl2_trainer] epoch #188 | Time 35585.86 s +2025-04-02 23:45:05 | [rl2_trainer] epoch #188 | EpochTime 268.02 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.0973 +Average/AverageReturn -23.8596 +Average/Iteration 188 +Average/MaxReturn -4.12526 +Average/MinReturn -64.9467 +Average/NumEpisodes 100 +Average/StdReturn 11.6338 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.760052 +TotalEnvSteps 1.89e+06 +__unnamed_task__/AverageDiscountedReturn -15.0973 +__unnamed_task__/AverageReturn -23.8596 +__unnamed_task__/Iteration 188 +__unnamed_task__/MaxReturn -4.12526 +__unnamed_task__/MinReturn -64.9467 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.6338 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.05727 +policy/KL 0.0127406 +policy/KLBefore 0 +policy/LossAfter -0.0262304 +policy/LossBefore -0.00133266 +policy/dLoss 0.0248977 +---------------------------------------- ------------ +2025-04-02 23:47:18 | [rl2_trainer] epoch #189 | Optimizing policy... +2025-04-02 23:47:18 | [rl2_trainer] epoch #189 | Fitting baseline... +2025-04-02 23:47:18 | [rl2_trainer] epoch #189 | Computing loss before +2025-04-02 23:47:19 | [rl2_trainer] epoch #189 | Computing KL before +2025-04-02 23:47:20 | [rl2_trainer] epoch #189 | Optimizing +2025-04-02 23:47:55 | [rl2_trainer] epoch #189 | Computing KL after +2025-04-02 23:47:55 | [rl2_trainer] epoch #189 | Computing loss after +2025-04-02 23:47:56 | [rl2_trainer] epoch #189 | Saving snapshot... +2025-04-02 23:47:56 | [rl2_trainer] epoch #189 | Saved +2025-04-02 23:47:56 | [rl2_trainer] epoch #189 | Time 35757.30 s +2025-04-02 23:47:56 | [rl2_trainer] epoch #189 | EpochTime 171.43 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.7045 +Average/AverageReturn -19.2809 +Average/Iteration 189 +Average/MaxReturn 6.98168 +Average/MinReturn -32.5603 +Average/NumEpisodes 100 +Average/StdReturn 5.86379 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.529671 +TotalEnvSteps 1.9e+06 +__unnamed_task__/AverageDiscountedReturn -12.7045 +__unnamed_task__/AverageReturn -19.2809 +__unnamed_task__/Iteration 189 +__unnamed_task__/MaxReturn 6.98168 +__unnamed_task__/MinReturn -32.5603 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.86379 +__unnamed_task__/TerminationRate 0 +policy/Entropy 6.03102 +policy/KL 0.0109886 +policy/KLBefore 0 +policy/LossAfter -0.0161232 +policy/LossBefore -0.00288016 +policy/dLoss 0.013243 +---------------------------------------- ------------ +2025-04-02 23:50:11 | [rl2_trainer] epoch #190 | Optimizing policy... +2025-04-02 23:50:12 | [rl2_trainer] epoch #190 | Fitting baseline... +2025-04-02 23:50:12 | [rl2_trainer] epoch #190 | Computing loss before +2025-04-02 23:50:12 | [rl2_trainer] epoch #190 | Computing KL before +2025-04-02 23:50:13 | [rl2_trainer] epoch #190 | Optimizing +2025-04-02 23:50:48 | [rl2_trainer] epoch #190 | Computing KL after +2025-04-02 23:50:49 | [rl2_trainer] epoch #190 | Computing loss after +2025-04-02 23:50:50 | [rl2_trainer] epoch #190 | Saving snapshot... +2025-04-02 23:50:50 | [rl2_trainer] epoch #190 | Saved +2025-04-02 23:50:50 | [rl2_trainer] epoch #190 | Time 35930.75 s +2025-04-02 23:50:50 | [rl2_trainer] epoch #190 | EpochTime 173.45 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.9925 +Average/AverageReturn -19.7494 +Average/Iteration 190 +Average/MaxReturn 5.01642 +Average/MinReturn -32.5048 +Average/NumEpisodes 100 +Average/StdReturn 5.79016 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.395371 +TotalEnvSteps 1.91e+06 +__unnamed_task__/AverageDiscountedReturn -12.9925 +__unnamed_task__/AverageReturn -19.7494 +__unnamed_task__/Iteration 190 +__unnamed_task__/MaxReturn 5.01642 +__unnamed_task__/MinReturn -32.5048 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.79016 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.99156 +policy/KL 0.00831622 +policy/KLBefore 0 +policy/LossAfter -0.0230497 +policy/LossBefore -0.00814872 +policy/dLoss 0.014901 +---------------------------------------- ------------ +2025-04-02 23:53:15 | [rl2_trainer] epoch #191 | Optimizing policy... +2025-04-02 23:53:16 | [rl2_trainer] epoch #191 | Fitting baseline... +2025-04-02 23:53:16 | [rl2_trainer] epoch #191 | Computing loss before +2025-04-02 23:53:16 | [rl2_trainer] epoch #191 | Computing KL before +2025-04-02 23:53:17 | [rl2_trainer] epoch #191 | Optimizing +2025-04-02 23:53:53 | [rl2_trainer] epoch #191 | Computing KL after +2025-04-02 23:53:53 | [rl2_trainer] epoch #191 | Computing loss after +2025-04-02 23:53:54 | [rl2_trainer] epoch #191 | Saving snapshot... +2025-04-02 23:53:54 | [rl2_trainer] epoch #191 | Saved +2025-04-02 23:53:54 | [rl2_trainer] epoch #191 | Time 36115.16 s +2025-04-02 23:53:54 | [rl2_trainer] epoch #191 | EpochTime 184.40 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.4995 +Average/AverageReturn -23.6997 +Average/Iteration 191 +Average/MaxReturn -11.9431 +Average/MinReturn -39.1894 +Average/NumEpisodes 100 +Average/StdReturn 6.03049 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.767131 +TotalEnvSteps 1.92e+06 +__unnamed_task__/AverageDiscountedReturn -15.4995 +__unnamed_task__/AverageReturn -23.6997 +__unnamed_task__/Iteration 191 +__unnamed_task__/MaxReturn -11.9431 +__unnamed_task__/MinReturn -39.1894 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.03049 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.95397 +policy/KL 0.0101866 +policy/KLBefore 0 +policy/LossAfter -0.00640935 +policy/LossBefore -0.00408538 +policy/dLoss 0.00232397 +---------------------------------------- ------------ +2025-04-02 23:56:00 | [rl2_trainer] epoch #192 | Optimizing policy... +2025-04-02 23:56:01 | [rl2_trainer] epoch #192 | Fitting baseline... +2025-04-02 23:56:01 | [rl2_trainer] epoch #192 | Computing loss before +2025-04-02 23:56:01 | [rl2_trainer] epoch #192 | Computing KL before +2025-04-02 23:56:02 | [rl2_trainer] epoch #192 | Optimizing +2025-04-02 23:56:38 | [rl2_trainer] epoch #192 | Computing KL after +2025-04-02 23:56:39 | [rl2_trainer] epoch #192 | Computing loss after +2025-04-02 23:56:40 | [rl2_trainer] epoch #192 | Saving snapshot... +2025-04-02 23:56:40 | [rl2_trainer] epoch #192 | Saved +2025-04-02 23:56:40 | [rl2_trainer] epoch #192 | Time 36280.60 s +2025-04-02 23:56:40 | [rl2_trainer] epoch #192 | EpochTime 165.44 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.5799 +Average/AverageReturn -23.8141 +Average/Iteration 192 +Average/MaxReturn -6.48798 +Average/MinReturn -40.7879 +Average/NumEpisodes 100 +Average/StdReturn 6.50279 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.691226 +TotalEnvSteps 1.93e+06 +__unnamed_task__/AverageDiscountedReturn -15.5799 +__unnamed_task__/AverageReturn -23.8141 +__unnamed_task__/Iteration 192 +__unnamed_task__/MaxReturn -6.48798 +__unnamed_task__/MinReturn -40.7879 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.50279 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.93025 +policy/KL 0.0120001 +policy/KLBefore 0 +policy/LossAfter -0.0223603 +policy/LossBefore -0.00670703 +policy/dLoss 0.0156533 +---------------------------------------- ------------ +2025-04-03 00:00:33 | [rl2_trainer] epoch #193 | Optimizing policy... +2025-04-03 00:00:33 | [rl2_trainer] epoch #193 | Fitting baseline... +2025-04-03 00:00:33 | [rl2_trainer] epoch #193 | Computing loss before +2025-04-03 00:00:34 | [rl2_trainer] epoch #193 | Computing KL before +2025-04-03 00:00:34 | [rl2_trainer] epoch #193 | Optimizing +2025-04-03 00:01:10 | [rl2_trainer] epoch #193 | Computing KL after +2025-04-03 00:01:10 | [rl2_trainer] epoch #193 | Computing loss after +2025-04-03 00:01:11 | [rl2_trainer] epoch #193 | Saving snapshot... +2025-04-03 00:01:11 | [rl2_trainer] epoch #193 | Saved +2025-04-03 00:01:11 | [rl2_trainer] epoch #193 | Time 36552.45 s +2025-04-03 00:01:11 | [rl2_trainer] epoch #193 | EpochTime 271.85 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.2526 +Average/AverageReturn -24.1918 +Average/Iteration 193 +Average/MaxReturn 6.73599 +Average/MinReturn -66.9009 +Average/NumEpisodes 100 +Average/StdReturn 12.3607 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.736069 +TotalEnvSteps 1.94e+06 +__unnamed_task__/AverageDiscountedReturn -15.2526 +__unnamed_task__/AverageReturn -24.1918 +__unnamed_task__/Iteration 193 +__unnamed_task__/MaxReturn 6.73599 +__unnamed_task__/MinReturn -66.9009 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.3607 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.87526 +policy/KL 0.0151461 +policy/KLBefore 0 +policy/LossAfter -0.0247137 +policy/LossBefore 0.00447526 +policy/dLoss 0.0291889 +---------------------------------------- ------------ +2025-04-03 00:04:13 | [rl2_trainer] epoch #194 | Optimizing policy... +2025-04-03 00:04:13 | [rl2_trainer] epoch #194 | Fitting baseline... +2025-04-03 00:04:13 | [rl2_trainer] epoch #194 | Computing loss before +2025-04-03 00:04:13 | [rl2_trainer] epoch #194 | Computing KL before +2025-04-03 00:04:14 | [rl2_trainer] epoch #194 | Optimizing +2025-04-03 00:04:47 | [rl2_trainer] epoch #194 | Computing KL after +2025-04-03 00:04:48 | [rl2_trainer] epoch #194 | Computing loss after +2025-04-03 00:04:49 | [rl2_trainer] epoch #194 | Saving snapshot... +2025-04-03 00:04:49 | [rl2_trainer] epoch #194 | Saved +2025-04-03 00:04:49 | [rl2_trainer] epoch #194 | Time 36769.90 s +2025-04-03 00:04:49 | [rl2_trainer] epoch #194 | EpochTime 217.45 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -16.1638 +Average/AverageReturn -25.5741 +Average/Iteration 194 +Average/MaxReturn -5.22742 +Average/MinReturn -65.2075 +Average/NumEpisodes 100 +Average/StdReturn 12.3872 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.6431 +TotalEnvSteps 1.95e+06 +__unnamed_task__/AverageDiscountedReturn -16.1638 +__unnamed_task__/AverageReturn -25.5741 +__unnamed_task__/Iteration 194 +__unnamed_task__/MaxReturn -5.22742 +__unnamed_task__/MinReturn -65.2075 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.3872 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.84271 +policy/KL 0.0110146 +policy/KLBefore 0 +policy/LossAfter -0.0456932 +policy/LossBefore -0.010518 +policy/dLoss 0.0351752 +---------------------------------------- ----------- +2025-04-03 00:07:39 | [rl2_trainer] epoch #195 | Optimizing policy... +2025-04-03 00:07:39 | [rl2_trainer] epoch #195 | Fitting baseline... +2025-04-03 00:07:39 | [rl2_trainer] epoch #195 | Computing loss before +2025-04-03 00:07:40 | [rl2_trainer] epoch #195 | Computing KL before +2025-04-03 00:07:41 | [rl2_trainer] epoch #195 | Optimizing +2025-04-03 00:08:17 | [rl2_trainer] epoch #195 | Computing KL after +2025-04-03 00:08:18 | [rl2_trainer] epoch #195 | Computing loss after +2025-04-03 00:08:18 | [rl2_trainer] epoch #195 | Saving snapshot... +2025-04-03 00:08:18 | [rl2_trainer] epoch #195 | Saved +2025-04-03 00:08:18 | [rl2_trainer] epoch #195 | Time 36979.47 s +2025-04-03 00:08:18 | [rl2_trainer] epoch #195 | EpochTime 209.57 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1696 +Average/AverageReturn -23.865 +Average/Iteration 195 +Average/MaxReturn -3.937 +Average/MinReturn -60.4794 +Average/NumEpisodes 100 +Average/StdReturn 9.52373 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.631025 +TotalEnvSteps 1.96e+06 +__unnamed_task__/AverageDiscountedReturn -15.1696 +__unnamed_task__/AverageReturn -23.865 +__unnamed_task__/Iteration 195 +__unnamed_task__/MaxReturn -3.937 +__unnamed_task__/MinReturn -60.4794 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.52373 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.82585 +policy/KL 0.00999881 +policy/KLBefore 0 +policy/LossAfter -0.041317 +policy/LossBefore -0.00670995 +policy/dLoss 0.034607 +---------------------------------------- ------------ +2025-04-03 00:10:43 | [rl2_trainer] epoch #196 | Optimizing policy... +2025-04-03 00:10:43 | [rl2_trainer] epoch #196 | Fitting baseline... +2025-04-03 00:10:43 | [rl2_trainer] epoch #196 | Computing loss before +2025-04-03 00:10:44 | [rl2_trainer] epoch #196 | Computing KL before +2025-04-03 00:10:44 | [rl2_trainer] epoch #196 | Optimizing +2025-04-03 00:11:19 | [rl2_trainer] epoch #196 | Computing KL after +2025-04-03 00:11:19 | [rl2_trainer] epoch #196 | Computing loss after +2025-04-03 00:11:20 | [rl2_trainer] epoch #196 | Saving snapshot... +2025-04-03 00:11:20 | [rl2_trainer] epoch #196 | Saved +2025-04-03 00:11:20 | [rl2_trainer] epoch #196 | Time 37161.24 s +2025-04-03 00:11:20 | [rl2_trainer] epoch #196 | EpochTime 181.76 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.4631 +Average/AverageReturn -18.6761 +Average/Iteration 196 +Average/MaxReturn 8.03319 +Average/MinReturn -69.3932 +Average/NumEpisodes 100 +Average/StdReturn 7.10566 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.45999 +TotalEnvSteps 1.97e+06 +__unnamed_task__/AverageDiscountedReturn -12.4631 +__unnamed_task__/AverageReturn -18.6761 +__unnamed_task__/Iteration 196 +__unnamed_task__/MaxReturn 8.03319 +__unnamed_task__/MinReturn -69.3932 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.10566 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.80302 +policy/KL 0.00799422 +policy/KLBefore 0 +policy/LossAfter -0.0230824 +policy/LossBefore -0.00177503 +policy/dLoss 0.0213074 +---------------------------------------- ------------ +2025-04-03 00:14:53 | [rl2_trainer] epoch #197 | Optimizing policy... +2025-04-03 00:14:53 | [rl2_trainer] epoch #197 | Fitting baseline... +2025-04-03 00:14:53 | [rl2_trainer] epoch #197 | Computing loss before +2025-04-03 00:14:54 | [rl2_trainer] epoch #197 | Computing KL before +2025-04-03 00:14:55 | [rl2_trainer] epoch #197 | Optimizing +2025-04-03 00:15:32 | [rl2_trainer] epoch #197 | Computing KL after +2025-04-03 00:15:33 | [rl2_trainer] epoch #197 | Computing loss after +2025-04-03 00:15:34 | [rl2_trainer] epoch #197 | Saving snapshot... +2025-04-03 00:15:34 | [rl2_trainer] epoch #197 | Saved +2025-04-03 00:15:34 | [rl2_trainer] epoch #197 | Time 37414.81 s +2025-04-03 00:15:34 | [rl2_trainer] epoch #197 | EpochTime 253.57 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.5338 +Average/AverageReturn -22.8798 +Average/Iteration 197 +Average/MaxReturn -0.185914 +Average/MinReturn -56.902 +Average/NumEpisodes 100 +Average/StdReturn 12.0203 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.696239 +TotalEnvSteps 1.98e+06 +__unnamed_task__/AverageDiscountedReturn -14.5338 +__unnamed_task__/AverageReturn -22.8798 +__unnamed_task__/Iteration 197 +__unnamed_task__/MaxReturn -0.185914 +__unnamed_task__/MinReturn -56.902 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.0203 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.78081 +policy/KL 0.0157582 +policy/KLBefore 0 +policy/LossAfter -0.0476577 +policy/LossBefore -0.00714805 +policy/dLoss 0.0405097 +---------------------------------------- ------------ +2025-04-03 00:18:35 | [rl2_trainer] epoch #198 | Optimizing policy... +2025-04-03 00:18:35 | [rl2_trainer] epoch #198 | Fitting baseline... +2025-04-03 00:18:35 | [rl2_trainer] epoch #198 | Computing loss before +2025-04-03 00:18:36 | [rl2_trainer] epoch #198 | Computing KL before +2025-04-03 00:18:37 | [rl2_trainer] epoch #198 | Optimizing +2025-04-03 00:19:13 | [rl2_trainer] epoch #198 | Computing KL after +2025-04-03 00:19:14 | [rl2_trainer] epoch #198 | Computing loss after +2025-04-03 00:19:15 | [rl2_trainer] epoch #198 | Saving snapshot... +2025-04-03 00:19:15 | [rl2_trainer] epoch #198 | Saved +2025-04-03 00:19:15 | [rl2_trainer] epoch #198 | Time 37635.55 s +2025-04-03 00:19:15 | [rl2_trainer] epoch #198 | EpochTime 220.73 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.5506 +Average/AverageReturn -22.8726 +Average/Iteration 198 +Average/MaxReturn -5.57159 +Average/MinReturn -51.2125 +Average/NumEpisodes 100 +Average/StdReturn 7.99084 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.786267 +TotalEnvSteps 1.99e+06 +__unnamed_task__/AverageDiscountedReturn -14.5506 +__unnamed_task__/AverageReturn -22.8726 +__unnamed_task__/Iteration 198 +__unnamed_task__/MaxReturn -5.57159 +__unnamed_task__/MinReturn -51.2125 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.99084 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.77181 +policy/KL 0.0104251 +policy/KLBefore 0 +policy/LossAfter -0.0257272 +policy/LossBefore -0.0055649 +policy/dLoss 0.0201623 +---------------------------------------- ----------- +2025-04-03 00:22:19 | [rl2_trainer] epoch #199 | Optimizing policy... +2025-04-03 00:22:19 | [rl2_trainer] epoch #199 | Fitting baseline... +2025-04-03 00:22:19 | [rl2_trainer] epoch #199 | Computing loss before +2025-04-03 00:22:20 | [rl2_trainer] epoch #199 | Computing KL before +2025-04-03 00:22:21 | [rl2_trainer] epoch #199 | Optimizing +2025-04-03 00:22:55 | [rl2_trainer] epoch #199 | Computing KL after +2025-04-03 00:22:56 | [rl2_trainer] epoch #199 | Computing loss after +2025-04-03 00:22:57 | [rl2_trainer] epoch #199 | Saving snapshot... +2025-04-03 00:22:57 | [rl2_trainer] epoch #199 | Saved +2025-04-03 00:22:57 | [rl2_trainer] epoch #199 | Time 37857.78 s +2025-04-03 00:22:57 | [rl2_trainer] epoch #199 | EpochTime 222.23 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.2995 +Average/AverageReturn -26.0258 +Average/Iteration 199 +Average/MaxReturn 8.64199 +Average/MinReturn -60.8096 +Average/NumEpisodes 100 +Average/StdReturn 10.0597 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.603589 +TotalEnvSteps 2e+06 +__unnamed_task__/AverageDiscountedReturn -16.2995 +__unnamed_task__/AverageReturn -26.0258 +__unnamed_task__/Iteration 199 +__unnamed_task__/MaxReturn 8.64199 +__unnamed_task__/MinReturn -60.8096 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0597 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.75764 +policy/KL 0.0116027 +policy/KLBefore 0 +policy/LossAfter -0.0402596 +policy/LossBefore -0.00614734 +policy/dLoss 0.0341123 +---------------------------------------- ------------ +2025-04-03 00:25:58 | [rl2_trainer] epoch #200 | Optimizing policy... +2025-04-03 00:25:58 | [rl2_trainer] epoch #200 | Fitting baseline... +2025-04-03 00:25:58 | [rl2_trainer] epoch #200 | Computing loss before +2025-04-03 00:25:59 | [rl2_trainer] epoch #200 | Computing KL before +2025-04-03 00:26:00 | [rl2_trainer] epoch #200 | Optimizing +2025-04-03 00:26:33 | [rl2_trainer] epoch #200 | Computing KL after +2025-04-03 00:26:34 | [rl2_trainer] epoch #200 | Computing loss after +2025-04-03 00:26:35 | [rl2_trainer] epoch #200 | Saving snapshot... +2025-04-03 00:26:35 | [rl2_trainer] epoch #200 | Saved +2025-04-03 00:26:35 | [rl2_trainer] epoch #200 | Time 38075.67 s +2025-04-03 00:26:35 | [rl2_trainer] epoch #200 | EpochTime 217.89 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.3789 +Average/AverageReturn -25.9259 +Average/Iteration 200 +Average/MaxReturn -9.00222 +Average/MinReturn -50.7708 +Average/NumEpisodes 100 +Average/StdReturn 8.24267 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.687756 +TotalEnvSteps 2.01e+06 +__unnamed_task__/AverageDiscountedReturn -16.3789 +__unnamed_task__/AverageReturn -25.9259 +__unnamed_task__/Iteration 200 +__unnamed_task__/MaxReturn -9.00222 +__unnamed_task__/MinReturn -50.7708 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.24267 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.74978 +policy/KL 0.0143652 +policy/KLBefore 0 +policy/LossAfter -0.0353561 +policy/LossBefore -0.00595884 +policy/dLoss 0.0293973 +---------------------------------------- ------------ +2025-04-03 00:28:44 | [rl2_trainer] epoch #201 | Optimizing policy... +2025-04-03 00:28:45 | [rl2_trainer] epoch #201 | Fitting baseline... +2025-04-03 00:28:45 | [rl2_trainer] epoch #201 | Computing loss before +2025-04-03 00:28:45 | [rl2_trainer] epoch #201 | Computing KL before +2025-04-03 00:28:46 | [rl2_trainer] epoch #201 | Optimizing +2025-04-03 00:29:21 | [rl2_trainer] epoch #201 | Computing KL after +2025-04-03 00:29:21 | [rl2_trainer] epoch #201 | Computing loss after +2025-04-03 00:29:22 | [rl2_trainer] epoch #201 | Saving snapshot... +2025-04-03 00:29:22 | [rl2_trainer] epoch #201 | Saved +2025-04-03 00:29:22 | [rl2_trainer] epoch #201 | Time 38243.32 s +2025-04-03 00:29:22 | [rl2_trainer] epoch #201 | EpochTime 167.64 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.4826 +Average/AverageReturn -23.8692 +Average/Iteration 201 +Average/MaxReturn -7.26762 +Average/MinReturn -59.3163 +Average/NumEpisodes 100 +Average/StdReturn 6.47319 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.707308 +TotalEnvSteps 2.02e+06 +__unnamed_task__/AverageDiscountedReturn -15.4826 +__unnamed_task__/AverageReturn -23.8692 +__unnamed_task__/Iteration 201 +__unnamed_task__/MaxReturn -7.26762 +__unnamed_task__/MinReturn -59.3163 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.47319 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.74244 +policy/KL 0.0112884 +policy/KLBefore 0 +policy/LossAfter -0.0202712 +policy/LossBefore -0.00146399 +policy/dLoss 0.0188072 +---------------------------------------- ------------ +2025-04-03 00:32:34 | [rl2_trainer] epoch #202 | Optimizing policy... +2025-04-03 00:32:35 | [rl2_trainer] epoch #202 | Fitting baseline... +2025-04-03 00:32:35 | [rl2_trainer] epoch #202 | Computing loss before +2025-04-03 00:32:35 | [rl2_trainer] epoch #202 | Computing KL before +2025-04-03 00:32:36 | [rl2_trainer] epoch #202 | Optimizing +2025-04-03 00:33:13 | [rl2_trainer] epoch #202 | Computing KL after +2025-04-03 00:33:13 | [rl2_trainer] epoch #202 | Computing loss after +2025-04-03 00:33:14 | [rl2_trainer] epoch #202 | Saving snapshot... +2025-04-03 00:33:14 | [rl2_trainer] epoch #202 | Saved +2025-04-03 00:33:14 | [rl2_trainer] epoch #202 | Time 38475.18 s +2025-04-03 00:33:14 | [rl2_trainer] epoch #202 | EpochTime 231.86 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.1037 +Average/AverageReturn -25.2905 +Average/Iteration 202 +Average/MaxReturn 0.0974665 +Average/MinReturn -87.2367 +Average/NumEpisodes 100 +Average/StdReturn 13.2926 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.696521 +TotalEnvSteps 2.03e+06 +__unnamed_task__/AverageDiscountedReturn -16.1037 +__unnamed_task__/AverageReturn -25.2905 +__unnamed_task__/Iteration 202 +__unnamed_task__/MaxReturn 0.0974665 +__unnamed_task__/MinReturn -87.2367 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.2926 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.7326 +policy/KL 0.0129266 +policy/KLBefore 0 +policy/LossAfter -0.0606175 +policy/LossBefore 0.00190442 +policy/dLoss 0.0625219 +---------------------------------------- ------------ +2025-04-03 00:36:07 | [rl2_trainer] epoch #203 | Optimizing policy... +2025-04-03 00:36:08 | [rl2_trainer] epoch #203 | Fitting baseline... +2025-04-03 00:36:08 | [rl2_trainer] epoch #203 | Computing loss before +2025-04-03 00:36:08 | [rl2_trainer] epoch #203 | Computing KL before +2025-04-03 00:36:09 | [rl2_trainer] epoch #203 | Optimizing +2025-04-03 00:36:43 | [rl2_trainer] epoch #203 | Computing KL after +2025-04-03 00:36:44 | [rl2_trainer] epoch #203 | Computing loss after +2025-04-03 00:36:44 | [rl2_trainer] epoch #203 | Saving snapshot... +2025-04-03 00:36:44 | [rl2_trainer] epoch #203 | Saved +2025-04-03 00:36:44 | [rl2_trainer] epoch #203 | Time 38685.48 s +2025-04-03 00:36:44 | [rl2_trainer] epoch #203 | EpochTime 210.29 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.5309 +Average/AverageReturn -22.2186 +Average/Iteration 203 +Average/MaxReturn -7.97927 +Average/MinReturn -39.4075 +Average/NumEpisodes 100 +Average/StdReturn 5.94691 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.826533 +TotalEnvSteps 2.04e+06 +__unnamed_task__/AverageDiscountedReturn -14.5309 +__unnamed_task__/AverageReturn -22.2186 +__unnamed_task__/Iteration 203 +__unnamed_task__/MaxReturn -7.97927 +__unnamed_task__/MinReturn -39.4075 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.94691 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.717 +policy/KL 0.00785757 +policy/KLBefore 0 +policy/LossAfter -0.0168355 +policy/LossBefore -0.00807161 +policy/dLoss 0.00876386 +---------------------------------------- ------------ +2025-04-03 00:38:49 | [rl2_trainer] epoch #204 | Optimizing policy... +2025-04-03 00:38:50 | [rl2_trainer] epoch #204 | Fitting baseline... +2025-04-03 00:38:50 | [rl2_trainer] epoch #204 | Computing loss before +2025-04-03 00:38:50 | [rl2_trainer] epoch #204 | Computing KL before +2025-04-03 00:38:51 | [rl2_trainer] epoch #204 | Optimizing +2025-04-03 00:39:27 | [rl2_trainer] epoch #204 | Computing KL after +2025-04-03 00:39:27 | [rl2_trainer] epoch #204 | Computing loss after +2025-04-03 00:39:28 | [rl2_trainer] epoch #204 | Saving snapshot... +2025-04-03 00:39:28 | [rl2_trainer] epoch #204 | Saved +2025-04-03 00:39:28 | [rl2_trainer] epoch #204 | Time 38849.22 s +2025-04-03 00:39:28 | [rl2_trainer] epoch #204 | EpochTime 163.74 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -16.663 +Average/AverageReturn -25.4412 +Average/Iteration 204 +Average/MaxReturn -11.6518 +Average/MinReturn -82.0086 +Average/NumEpisodes 100 +Average/StdReturn 8.74045 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.271354 +TotalEnvSteps 2.05e+06 +__unnamed_task__/AverageDiscountedReturn -16.663 +__unnamed_task__/AverageReturn -25.4412 +__unnamed_task__/Iteration 204 +__unnamed_task__/MaxReturn -11.6518 +__unnamed_task__/MinReturn -82.0086 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.74045 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.71474 +policy/KL 0.0137058 +policy/KLBefore 0 +policy/LossAfter -0.0393476 +policy/LossBefore -0.000101701 +policy/dLoss 0.0392459 +---------------------------------------- ------------- +2025-04-03 00:42:22 | [rl2_trainer] epoch #205 | Optimizing policy... +2025-04-03 00:42:22 | [rl2_trainer] epoch #205 | Fitting baseline... +2025-04-03 00:42:22 | [rl2_trainer] epoch #205 | Computing loss before +2025-04-03 00:42:23 | [rl2_trainer] epoch #205 | Computing KL before +2025-04-03 00:42:24 | [rl2_trainer] epoch #205 | Optimizing +2025-04-03 00:42:58 | [rl2_trainer] epoch #205 | Computing KL after +2025-04-03 00:42:58 | [rl2_trainer] epoch #205 | Computing loss after +2025-04-03 00:42:59 | [rl2_trainer] epoch #205 | Saving snapshot... +2025-04-03 00:42:59 | [rl2_trainer] epoch #205 | Saved +2025-04-03 00:42:59 | [rl2_trainer] epoch #205 | Time 39060.32 s +2025-04-03 00:42:59 | [rl2_trainer] epoch #205 | EpochTime 211.10 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.4154 +Average/AverageReturn -22.282 +Average/Iteration 205 +Average/MaxReturn -5.93251 +Average/MinReturn -65.3227 +Average/NumEpisodes 100 +Average/StdReturn 6.70875 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.479763 +TotalEnvSteps 2.06e+06 +__unnamed_task__/AverageDiscountedReturn -14.4154 +__unnamed_task__/AverageReturn -22.282 +__unnamed_task__/Iteration 205 +__unnamed_task__/MaxReturn -5.93251 +__unnamed_task__/MinReturn -65.3227 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.70875 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.70853 +policy/KL 0.0114296 +policy/KLBefore 0 +policy/LossAfter -0.0235902 +policy/LossBefore -0.0027553 +policy/dLoss 0.0208349 +---------------------------------------- ----------- +2025-04-03 00:45:53 | [rl2_trainer] epoch #206 | Optimizing policy... +2025-04-03 00:45:54 | [rl2_trainer] epoch #206 | Fitting baseline... +2025-04-03 00:45:54 | [rl2_trainer] epoch #206 | Computing loss before +2025-04-03 00:45:54 | [rl2_trainer] epoch #206 | Computing KL before +2025-04-03 00:45:55 | [rl2_trainer] epoch #206 | Optimizing +2025-04-03 00:46:29 | [rl2_trainer] epoch #206 | Computing KL after +2025-04-03 00:46:30 | [rl2_trainer] epoch #206 | Computing loss after +2025-04-03 00:46:31 | [rl2_trainer] epoch #206 | Saving snapshot... +2025-04-03 00:46:31 | [rl2_trainer] epoch #206 | Saved +2025-04-03 00:46:31 | [rl2_trainer] epoch #206 | Time 39271.69 s +2025-04-03 00:46:31 | [rl2_trainer] epoch #206 | EpochTime 211.37 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.2338 +Average/AverageReturn -21.8622 +Average/Iteration 206 +Average/MaxReturn 6.58011 +Average/MinReturn -43.0843 +Average/NumEpisodes 100 +Average/StdReturn 7.4387 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.753108 +TotalEnvSteps 2.07e+06 +__unnamed_task__/AverageDiscountedReturn -14.2338 +__unnamed_task__/AverageReturn -21.8622 +__unnamed_task__/Iteration 206 +__unnamed_task__/MaxReturn 6.58011 +__unnamed_task__/MinReturn -43.0843 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.4387 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.67733 +policy/KL 0.0108618 +policy/KLBefore 0 +policy/LossAfter -0.00824706 +policy/LossBefore 0.00269555 +policy/dLoss 0.0109426 +---------------------------------------- ------------ +2025-04-03 00:49:13 | [rl2_trainer] epoch #207 | Optimizing policy... +2025-04-03 00:49:13 | [rl2_trainer] epoch #207 | Fitting baseline... +2025-04-03 00:49:13 | [rl2_trainer] epoch #207 | Computing loss before +2025-04-03 00:49:13 | [rl2_trainer] epoch #207 | Computing KL before +2025-04-03 00:49:14 | [rl2_trainer] epoch #207 | Optimizing +2025-04-03 00:49:50 | [rl2_trainer] epoch #207 | Computing KL after +2025-04-03 00:49:50 | [rl2_trainer] epoch #207 | Computing loss after +2025-04-03 00:49:51 | [rl2_trainer] epoch #207 | Saving snapshot... +2025-04-03 00:49:51 | [rl2_trainer] epoch #207 | Saved +2025-04-03 00:49:51 | [rl2_trainer] epoch #207 | Time 39472.24 s +2025-04-03 00:49:51 | [rl2_trainer] epoch #207 | EpochTime 200.55 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.7631 +Average/AverageReturn -26.1536 +Average/Iteration 207 +Average/MaxReturn -7.79386 +Average/MinReturn -151.666 +Average/NumEpisodes 100 +Average/StdReturn 16.0706 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.644435 +TotalEnvSteps 2.08e+06 +__unnamed_task__/AverageDiscountedReturn -16.7631 +__unnamed_task__/AverageReturn -26.1536 +__unnamed_task__/Iteration 207 +__unnamed_task__/MaxReturn -7.79386 +__unnamed_task__/MinReturn -151.666 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 16.0706 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.63889 +policy/KL 0.0138905 +policy/KLBefore 0 +policy/LossAfter -0.0637067 +policy/LossBefore -0.0022199 +policy/dLoss 0.0614868 +---------------------------------------- ------------ +2025-04-03 00:51:23 | [rl2_trainer] epoch #208 | Optimizing policy... +2025-04-03 00:51:24 | [rl2_trainer] epoch #208 | Fitting baseline... +2025-04-03 00:51:24 | [rl2_trainer] epoch #208 | Computing loss before +2025-04-03 00:51:24 | [rl2_trainer] epoch #208 | Computing KL before +2025-04-03 00:51:25 | [rl2_trainer] epoch #208 | Optimizing +2025-04-03 00:52:01 | [rl2_trainer] epoch #208 | Computing KL after +2025-04-03 00:52:01 | [rl2_trainer] epoch #208 | Computing loss after +2025-04-03 00:52:02 | [rl2_trainer] epoch #208 | Saving snapshot... +2025-04-03 00:52:02 | [rl2_trainer] epoch #208 | Saved +2025-04-03 00:52:02 | [rl2_trainer] epoch #208 | Time 39603.20 s +2025-04-03 00:52:02 | [rl2_trainer] epoch #208 | EpochTime 130.96 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.324 +Average/AverageReturn -23.7462 +Average/Iteration 208 +Average/MaxReturn -12.0348 +Average/MinReturn -49.2572 +Average/NumEpisodes 100 +Average/StdReturn 4.96752 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.631703 +TotalEnvSteps 2.09e+06 +__unnamed_task__/AverageDiscountedReturn -15.324 +__unnamed_task__/AverageReturn -23.7462 +__unnamed_task__/Iteration 208 +__unnamed_task__/MaxReturn -12.0348 +__unnamed_task__/MinReturn -49.2572 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.96752 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.61986 +policy/KL 0.0125825 +policy/KLBefore 0 +policy/LossAfter -0.0083684 +policy/LossBefore -0.00353969 +policy/dLoss 0.0048287 +---------------------------------------- ------------ +2025-04-03 00:54:59 | [rl2_trainer] epoch #209 | Optimizing policy... +2025-04-03 00:55:00 | [rl2_trainer] epoch #209 | Fitting baseline... +2025-04-03 00:55:00 | [rl2_trainer] epoch #209 | Computing loss before +2025-04-03 00:55:00 | [rl2_trainer] epoch #209 | Computing KL before +2025-04-03 00:55:01 | [rl2_trainer] epoch #209 | Optimizing +2025-04-03 00:55:37 | [rl2_trainer] epoch #209 | Computing KL after +2025-04-03 00:55:37 | [rl2_trainer] epoch #209 | Computing loss after +2025-04-03 00:55:38 | [rl2_trainer] epoch #209 | Saving snapshot... +2025-04-03 00:55:38 | [rl2_trainer] epoch #209 | Saved +2025-04-03 00:55:38 | [rl2_trainer] epoch #209 | Time 39819.30 s +2025-04-03 00:55:38 | [rl2_trainer] epoch #209 | EpochTime 216.10 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -14.0955 +Average/AverageReturn -21.4961 +Average/Iteration 209 +Average/MaxReturn -2.19123 +Average/MinReturn -41.5548 +Average/NumEpisodes 100 +Average/StdReturn 7.3068 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.765217 +TotalEnvSteps 2.1e+06 +__unnamed_task__/AverageDiscountedReturn -14.0955 +__unnamed_task__/AverageReturn -21.4961 +__unnamed_task__/Iteration 209 +__unnamed_task__/MaxReturn -2.19123 +__unnamed_task__/MinReturn -41.5548 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.3068 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.61044 +policy/KL 0.00907715 +policy/KLBefore 0 +policy/LossAfter -0.0181117 +policy/LossBefore -0.000940375 +policy/dLoss 0.0171713 +---------------------------------------- ------------- +2025-04-03 00:57:48 | [rl2_trainer] epoch #210 | Optimizing policy... +2025-04-03 00:57:48 | [rl2_trainer] epoch #210 | Fitting baseline... +2025-04-03 00:57:48 | [rl2_trainer] epoch #210 | Computing loss before +2025-04-03 00:57:48 | [rl2_trainer] epoch #210 | Computing KL before +2025-04-03 00:57:49 | [rl2_trainer] epoch #210 | Optimizing +2025-04-03 00:58:25 | [rl2_trainer] epoch #210 | Computing KL after +2025-04-03 00:58:25 | [rl2_trainer] epoch #210 | Computing loss after +2025-04-03 00:58:26 | [rl2_trainer] epoch #210 | Saving snapshot... +2025-04-03 00:58:26 | [rl2_trainer] epoch #210 | Saved +2025-04-03 00:58:26 | [rl2_trainer] epoch #210 | Time 39987.42 s +2025-04-03 00:58:26 | [rl2_trainer] epoch #210 | EpochTime 168.11 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.4585 +Average/AverageReturn -22.659 +Average/Iteration 210 +Average/MaxReturn 0.90809 +Average/MinReturn -68.2491 +Average/NumEpisodes 100 +Average/StdReturn 7.48349 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.514202 +TotalEnvSteps 2.11e+06 +__unnamed_task__/AverageDiscountedReturn -14.4585 +__unnamed_task__/AverageReturn -22.659 +__unnamed_task__/Iteration 210 +__unnamed_task__/MaxReturn 0.90809 +__unnamed_task__/MinReturn -68.2491 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.48349 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.604 +policy/KL 0.0116609 +policy/KLBefore 0 +policy/LossAfter -0.0261175 +policy/LossBefore 0.00302317 +policy/dLoss 0.0291407 +---------------------------------------- ------------ +2025-04-03 01:00:53 | [rl2_trainer] epoch #211 | Optimizing policy... +2025-04-03 01:00:53 | [rl2_trainer] epoch #211 | Fitting baseline... +2025-04-03 01:00:53 | [rl2_trainer] epoch #211 | Computing loss before +2025-04-03 01:00:54 | [rl2_trainer] epoch #211 | Computing KL before +2025-04-03 01:00:54 | [rl2_trainer] epoch #211 | Optimizing +2025-04-03 01:01:30 | [rl2_trainer] epoch #211 | Computing KL after +2025-04-03 01:01:31 | [rl2_trainer] epoch #211 | Computing loss after +2025-04-03 01:01:32 | [rl2_trainer] epoch #211 | Saving snapshot... +2025-04-03 01:01:32 | [rl2_trainer] epoch #211 | Saved +2025-04-03 01:01:32 | [rl2_trainer] epoch #211 | Time 40172.65 s +2025-04-03 01:01:32 | [rl2_trainer] epoch #211 | EpochTime 185.23 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.9508 +Average/AverageReturn -21.381 +Average/Iteration 211 +Average/MaxReturn 6.5656 +Average/MinReturn -66.3938 +Average/NumEpisodes 100 +Average/StdReturn 7.00418 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.723879 +TotalEnvSteps 2.12e+06 +__unnamed_task__/AverageDiscountedReturn -13.9508 +__unnamed_task__/AverageReturn -21.381 +__unnamed_task__/Iteration 211 +__unnamed_task__/MaxReturn 6.5656 +__unnamed_task__/MinReturn -66.3938 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.00418 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.59714 +policy/KL 0.0110743 +policy/KLBefore 0 +policy/LossAfter -0.0259945 +policy/LossBefore -0.00597781 +policy/dLoss 0.0200167 +---------------------------------------- ------------ +2025-04-03 01:05:09 | [rl2_trainer] epoch #212 | Optimizing policy... +2025-04-03 01:05:10 | [rl2_trainer] epoch #212 | Fitting baseline... +2025-04-03 01:05:10 | [rl2_trainer] epoch #212 | Computing loss before +2025-04-03 01:05:10 | [rl2_trainer] epoch #212 | Computing KL before +2025-04-03 01:05:11 | [rl2_trainer] epoch #212 | Optimizing +2025-04-03 01:05:46 | [rl2_trainer] epoch #212 | Computing KL after +2025-04-03 01:05:47 | [rl2_trainer] epoch #212 | Computing loss after +2025-04-03 01:05:48 | [rl2_trainer] epoch #212 | Saving snapshot... +2025-04-03 01:05:48 | [rl2_trainer] epoch #212 | Saved +2025-04-03 01:05:48 | [rl2_trainer] epoch #212 | Time 40428.68 s +2025-04-03 01:05:48 | [rl2_trainer] epoch #212 | EpochTime 256.03 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3304 +Average/AverageReturn -23.6657 +Average/Iteration 212 +Average/MaxReturn -8.29901 +Average/MinReturn -46.8015 +Average/NumEpisodes 100 +Average/StdReturn 6.5347 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.648659 +TotalEnvSteps 2.13e+06 +__unnamed_task__/AverageDiscountedReturn -15.3304 +__unnamed_task__/AverageReturn -23.6657 +__unnamed_task__/Iteration 212 +__unnamed_task__/MaxReturn -8.29901 +__unnamed_task__/MinReturn -46.8015 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.5347 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.58437 +policy/KL 0.0105631 +policy/KLBefore 0 +policy/LossAfter -0.00514887 +policy/LossBefore 0.00727698 +policy/dLoss 0.0124258 +---------------------------------------- ------------ +2025-04-03 01:08:45 | [rl2_trainer] epoch #213 | Optimizing policy... +2025-04-03 01:08:45 | [rl2_trainer] epoch #213 | Fitting baseline... +2025-04-03 01:08:45 | [rl2_trainer] epoch #213 | Computing loss before +2025-04-03 01:08:46 | [rl2_trainer] epoch #213 | Computing KL before +2025-04-03 01:08:46 | [rl2_trainer] epoch #213 | Optimizing +2025-04-03 01:09:22 | [rl2_trainer] epoch #213 | Computing KL after +2025-04-03 01:09:23 | [rl2_trainer] epoch #213 | Computing loss after +2025-04-03 01:09:24 | [rl2_trainer] epoch #213 | Saving snapshot... +2025-04-03 01:09:24 | [rl2_trainer] epoch #213 | Saved +2025-04-03 01:09:24 | [rl2_trainer] epoch #213 | Time 40644.79 s +2025-04-03 01:09:24 | [rl2_trainer] epoch #213 | EpochTime 216.10 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.6656 +Average/AverageReturn -22.3438 +Average/Iteration 213 +Average/MaxReturn -6.21962 +Average/MinReturn -37.9297 +Average/NumEpisodes 100 +Average/StdReturn 5.73529 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.777984 +TotalEnvSteps 2.14e+06 +__unnamed_task__/AverageDiscountedReturn -14.6656 +__unnamed_task__/AverageReturn -22.3438 +__unnamed_task__/Iteration 213 +__unnamed_task__/MaxReturn -6.21962 +__unnamed_task__/MinReturn -37.9297 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.73529 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.5594 +policy/KL 0.00852356 +policy/KLBefore 0 +policy/LossAfter -0.0032606 +policy/LossBefore 0.00236027 +policy/dLoss 0.00562087 +---------------------------------------- ------------ +2025-04-03 01:11:39 | [rl2_trainer] epoch #214 | Optimizing policy... +2025-04-03 01:11:39 | [rl2_trainer] epoch #214 | Fitting baseline... +2025-04-03 01:11:39 | [rl2_trainer] epoch #214 | Computing loss before +2025-04-03 01:11:39 | [rl2_trainer] epoch #214 | Computing KL before +2025-04-03 01:11:40 | [rl2_trainer] epoch #214 | Optimizing +2025-04-03 01:12:16 | [rl2_trainer] epoch #214 | Computing KL after +2025-04-03 01:12:16 | [rl2_trainer] epoch #214 | Computing loss after +2025-04-03 01:12:17 | [rl2_trainer] epoch #214 | Saving snapshot... +2025-04-03 01:12:17 | [rl2_trainer] epoch #214 | Saved +2025-04-03 01:12:17 | [rl2_trainer] epoch #214 | Time 40818.23 s +2025-04-03 01:12:17 | [rl2_trainer] epoch #214 | EpochTime 173.43 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.6652 +Average/AverageReturn -19.2609 +Average/Iteration 214 +Average/MaxReturn -2.96728 +Average/MinReturn -33.1992 +Average/NumEpisodes 100 +Average/StdReturn 4.92603 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.518862 +TotalEnvSteps 2.15e+06 +__unnamed_task__/AverageDiscountedReturn -12.6652 +__unnamed_task__/AverageReturn -19.2609 +__unnamed_task__/Iteration 214 +__unnamed_task__/MaxReturn -2.96728 +__unnamed_task__/MinReturn -33.1992 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.92603 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.53621 +policy/KL 0.0102528 +policy/KLBefore 0 +policy/LossAfter -0.00767897 +policy/LossBefore 0.00422813 +policy/dLoss 0.0119071 +---------------------------------------- ------------ +2025-04-03 01:15:02 | [rl2_trainer] epoch #215 | Optimizing policy... +2025-04-03 01:15:02 | [rl2_trainer] epoch #215 | Fitting baseline... +2025-04-03 01:15:02 | [rl2_trainer] epoch #215 | Computing loss before +2025-04-03 01:15:03 | [rl2_trainer] epoch #215 | Computing KL before +2025-04-03 01:15:04 | [rl2_trainer] epoch #215 | Optimizing +2025-04-03 01:15:38 | [rl2_trainer] epoch #215 | Computing KL after +2025-04-03 01:15:38 | [rl2_trainer] epoch #215 | Computing loss after +2025-04-03 01:15:39 | [rl2_trainer] epoch #215 | Saving snapshot... +2025-04-03 01:15:39 | [rl2_trainer] epoch #215 | Saved +2025-04-03 01:15:39 | [rl2_trainer] epoch #215 | Time 41020.15 s +2025-04-03 01:15:39 | [rl2_trainer] epoch #215 | EpochTime 201.92 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1461 +Average/AverageReturn -23.53 +Average/Iteration 215 +Average/MaxReturn -0.406309 +Average/MinReturn -69.9635 +Average/NumEpisodes 100 +Average/StdReturn 8.47387 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.587805 +TotalEnvSteps 2.16e+06 +__unnamed_task__/AverageDiscountedReturn -15.1461 +__unnamed_task__/AverageReturn -23.53 +__unnamed_task__/Iteration 215 +__unnamed_task__/MaxReturn -0.406309 +__unnamed_task__/MinReturn -69.9635 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.47387 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.51874 +policy/KL 0.0127146 +policy/KLBefore 0 +policy/LossAfter -0.0200536 +policy/LossBefore 0.00284265 +policy/dLoss 0.0228963 +---------------------------------------- ------------ +2025-04-03 01:18:25 | [rl2_trainer] epoch #216 | Optimizing policy... +2025-04-03 01:18:25 | [rl2_trainer] epoch #216 | Fitting baseline... +2025-04-03 01:18:25 | [rl2_trainer] epoch #216 | Computing loss before +2025-04-03 01:18:26 | [rl2_trainer] epoch #216 | Computing KL before +2025-04-03 01:18:26 | [rl2_trainer] epoch #216 | Optimizing +2025-04-03 01:19:02 | [rl2_trainer] epoch #216 | Computing KL after +2025-04-03 01:19:02 | [rl2_trainer] epoch #216 | Computing loss after +2025-04-03 01:19:03 | [rl2_trainer] epoch #216 | Saving snapshot... +2025-04-03 01:19:03 | [rl2_trainer] epoch #216 | Saved +2025-04-03 01:19:03 | [rl2_trainer] epoch #216 | Time 41224.24 s +2025-04-03 01:19:03 | [rl2_trainer] epoch #216 | EpochTime 204.09 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.8063 +Average/AverageReturn -22.8998 +Average/Iteration 216 +Average/MaxReturn -3.59318 +Average/MinReturn -58.3351 +Average/NumEpisodes 100 +Average/StdReturn 6.75314 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.631798 +TotalEnvSteps 2.17e+06 +__unnamed_task__/AverageDiscountedReturn -14.8063 +__unnamed_task__/AverageReturn -22.8998 +__unnamed_task__/Iteration 216 +__unnamed_task__/MaxReturn -3.59318 +__unnamed_task__/MinReturn -58.3351 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.75314 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.50324 +policy/KL 0.0111064 +policy/KLBefore 0 +policy/LossAfter -0.0215279 +policy/LossBefore -0.0059135 +policy/dLoss 0.0156144 +---------------------------------------- ----------- +2025-04-03 01:22:02 | [rl2_trainer] epoch #217 | Optimizing policy... +2025-04-03 01:22:02 | [rl2_trainer] epoch #217 | Fitting baseline... +2025-04-03 01:22:02 | [rl2_trainer] epoch #217 | Computing loss before +2025-04-03 01:22:03 | [rl2_trainer] epoch #217 | Computing KL before +2025-04-03 01:22:03 | [rl2_trainer] epoch #217 | Optimizing +2025-04-03 01:22:40 | [rl2_trainer] epoch #217 | Computing KL after +2025-04-03 01:22:40 | [rl2_trainer] epoch #217 | Computing loss after +2025-04-03 01:22:41 | [rl2_trainer] epoch #217 | Saving snapshot... +2025-04-03 01:22:41 | [rl2_trainer] epoch #217 | Saved +2025-04-03 01:22:41 | [rl2_trainer] epoch #217 | Time 41442.18 s +2025-04-03 01:22:41 | [rl2_trainer] epoch #217 | EpochTime 217.93 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.3722 +Average/AverageReturn -21.887 +Average/Iteration 217 +Average/MaxReturn -5.32514 +Average/MinReturn -35.5951 +Average/NumEpisodes 100 +Average/StdReturn 5.601 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.747858 +TotalEnvSteps 2.18e+06 +__unnamed_task__/AverageDiscountedReturn -14.3722 +__unnamed_task__/AverageReturn -21.887 +__unnamed_task__/Iteration 217 +__unnamed_task__/MaxReturn -5.32514 +__unnamed_task__/MinReturn -35.5951 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.601 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.4828 +policy/KL 0.00907158 +policy/KLBefore 0 +policy/LossAfter -0.00990055 +policy/LossBefore -0.00185183 +policy/dLoss 0.00804872 +---------------------------------------- ------------ +2025-04-03 01:25:13 | [rl2_trainer] epoch #218 | Optimizing policy... +2025-04-03 01:25:13 | [rl2_trainer] epoch #218 | Fitting baseline... +2025-04-03 01:25:13 | [rl2_trainer] epoch #218 | Computing loss before +2025-04-03 01:25:14 | [rl2_trainer] epoch #218 | Computing KL before +2025-04-03 01:25:15 | [rl2_trainer] epoch #218 | Optimizing +2025-04-03 01:25:50 | [rl2_trainer] epoch #218 | Computing KL after +2025-04-03 01:25:51 | [rl2_trainer] epoch #218 | Computing loss after +2025-04-03 01:25:51 | [rl2_trainer] epoch #218 | Saving snapshot... +2025-04-03 01:25:51 | [rl2_trainer] epoch #218 | Saved +2025-04-03 01:25:51 | [rl2_trainer] epoch #218 | Time 41632.42 s +2025-04-03 01:25:51 | [rl2_trainer] epoch #218 | EpochTime 190.24 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.203 +Average/AverageReturn -23.6054 +Average/Iteration 218 +Average/MaxReturn -14.0415 +Average/MinReturn -65.5437 +Average/NumEpisodes 100 +Average/StdReturn 8.65053 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.673642 +TotalEnvSteps 2.19e+06 +__unnamed_task__/AverageDiscountedReturn -15.203 +__unnamed_task__/AverageReturn -23.6054 +__unnamed_task__/Iteration 218 +__unnamed_task__/MaxReturn -14.0415 +__unnamed_task__/MinReturn -65.5437 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.65053 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.46559 +policy/KL 0.0127724 +policy/KLBefore 0 +policy/LossAfter -0.0245028 +policy/LossBefore -0.00930034 +policy/dLoss 0.0152024 +---------------------------------------- ------------ +2025-04-03 01:27:50 | [rl2_trainer] epoch #219 | Optimizing policy... +2025-04-03 01:27:51 | [rl2_trainer] epoch #219 | Fitting baseline... +2025-04-03 01:27:51 | [rl2_trainer] epoch #219 | Computing loss before +2025-04-03 01:27:51 | [rl2_trainer] epoch #219 | Computing KL before +2025-04-03 01:27:52 | [rl2_trainer] epoch #219 | Optimizing +2025-04-03 01:28:28 | [rl2_trainer] epoch #219 | Computing KL after +2025-04-03 01:28:29 | [rl2_trainer] epoch #219 | Computing loss after +2025-04-03 01:28:30 | [rl2_trainer] epoch #219 | Saving snapshot... +2025-04-03 01:28:30 | [rl2_trainer] epoch #219 | Saved +2025-04-03 01:28:30 | [rl2_trainer] epoch #219 | Time 41790.63 s +2025-04-03 01:28:30 | [rl2_trainer] epoch #219 | EpochTime 158.21 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.158 +Average/AverageReturn -23.5175 +Average/Iteration 219 +Average/MaxReturn 5.07174 +Average/MinReturn -36.755 +Average/NumEpisodes 100 +Average/StdReturn 6.07067 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.492462 +TotalEnvSteps 2.2e+06 +__unnamed_task__/AverageDiscountedReturn -15.158 +__unnamed_task__/AverageReturn -23.5175 +__unnamed_task__/Iteration 219 +__unnamed_task__/MaxReturn 5.07174 +__unnamed_task__/MinReturn -36.755 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.07067 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.44554 +policy/KL 0.0112913 +policy/KLBefore 0 +policy/LossAfter -0.0134808 +policy/LossBefore 0.00176136 +policy/dLoss 0.0152421 +---------------------------------------- ------------ +2025-04-03 01:31:28 | [rl2_trainer] epoch #220 | Optimizing policy... +2025-04-03 01:31:28 | [rl2_trainer] epoch #220 | Fitting baseline... +2025-04-03 01:31:28 | [rl2_trainer] epoch #220 | Computing loss before +2025-04-03 01:31:29 | [rl2_trainer] epoch #220 | Computing KL before +2025-04-03 01:31:29 | [rl2_trainer] epoch #220 | Optimizing +2025-04-03 01:32:05 | [rl2_trainer] epoch #220 | Computing KL after +2025-04-03 01:32:06 | [rl2_trainer] epoch #220 | Computing loss after +2025-04-03 01:32:07 | [rl2_trainer] epoch #220 | Saving snapshot... +2025-04-03 01:32:07 | [rl2_trainer] epoch #220 | Saved +2025-04-03 01:32:07 | [rl2_trainer] epoch #220 | Time 42007.61 s +2025-04-03 01:32:07 | [rl2_trainer] epoch #220 | EpochTime 216.98 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.9323 +Average/AverageReturn -23.0838 +Average/Iteration 220 +Average/MaxReturn -12.536 +Average/MinReturn -78.7241 +Average/NumEpisodes 100 +Average/StdReturn 8.60131 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.659747 +TotalEnvSteps 2.21e+06 +__unnamed_task__/AverageDiscountedReturn -14.9323 +__unnamed_task__/AverageReturn -23.0838 +__unnamed_task__/Iteration 220 +__unnamed_task__/MaxReturn -12.536 +__unnamed_task__/MinReturn -78.7241 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.60131 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.43224 +policy/KL 0.0121982 +policy/KLBefore 0 +policy/LossAfter -0.0221403 +policy/LossBefore -0.0048801 +policy/dLoss 0.0172602 +---------------------------------------- ----------- +2025-04-03 01:35:20 | [rl2_trainer] epoch #221 | Optimizing policy... +2025-04-03 01:35:20 | [rl2_trainer] epoch #221 | Fitting baseline... +2025-04-03 01:35:20 | [rl2_trainer] epoch #221 | Computing loss before +2025-04-03 01:35:21 | [rl2_trainer] epoch #221 | Computing KL before +2025-04-03 01:35:21 | [rl2_trainer] epoch #221 | Optimizing +2025-04-03 01:35:57 | [rl2_trainer] epoch #221 | Computing KL after +2025-04-03 01:35:57 | [rl2_trainer] epoch #221 | Computing loss after +2025-04-03 01:35:58 | [rl2_trainer] epoch #221 | Saving snapshot... +2025-04-03 01:35:58 | [rl2_trainer] epoch #221 | Saved +2025-04-03 01:35:58 | [rl2_trainer] epoch #221 | Time 42239.01 s +2025-04-03 01:35:58 | [rl2_trainer] epoch #221 | EpochTime 231.40 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.9657 +Average/AverageReturn -20.3392 +Average/Iteration 221 +Average/MaxReturn -2.01371 +Average/MinReturn -38.1189 +Average/NumEpisodes 100 +Average/StdReturn 6.41201 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.582603 +TotalEnvSteps 2.22e+06 +__unnamed_task__/AverageDiscountedReturn -12.9657 +__unnamed_task__/AverageReturn -20.3392 +__unnamed_task__/Iteration 221 +__unnamed_task__/MaxReturn -2.01371 +__unnamed_task__/MinReturn -38.1189 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.41201 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.40881 +policy/KL 0.0127367 +policy/KLBefore 0 +policy/LossAfter -0.0164055 +policy/LossBefore 0.00137134 +policy/dLoss 0.0177769 +---------------------------------------- ------------ +2025-04-03 01:38:23 | [rl2_trainer] epoch #222 | Optimizing policy... +2025-04-03 01:38:23 | [rl2_trainer] epoch #222 | Fitting baseline... +2025-04-03 01:38:23 | [rl2_trainer] epoch #222 | Computing loss before +2025-04-03 01:38:24 | [rl2_trainer] epoch #222 | Computing KL before +2025-04-03 01:38:24 | [rl2_trainer] epoch #222 | Optimizing +2025-04-03 01:39:00 | [rl2_trainer] epoch #222 | Computing KL after +2025-04-03 01:39:01 | [rl2_trainer] epoch #222 | Computing loss after +2025-04-03 01:39:01 | [rl2_trainer] epoch #222 | Saving snapshot... +2025-04-03 01:39:01 | [rl2_trainer] epoch #222 | Saved +2025-04-03 01:39:01 | [rl2_trainer] epoch #222 | Time 42422.50 s +2025-04-03 01:39:01 | [rl2_trainer] epoch #222 | EpochTime 183.49 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.6292 +Average/AverageReturn -23.0032 +Average/Iteration 222 +Average/MaxReturn 7.06527 +Average/MinReturn -69.7194 +Average/NumEpisodes 100 +Average/StdReturn 9.83674 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.546489 +TotalEnvSteps 2.23e+06 +__unnamed_task__/AverageDiscountedReturn -14.6292 +__unnamed_task__/AverageReturn -23.0032 +__unnamed_task__/Iteration 222 +__unnamed_task__/MaxReturn 7.06527 +__unnamed_task__/MinReturn -69.7194 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.83674 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.38675 +policy/KL 0.0105302 +policy/KLBefore 0 +policy/LossAfter -0.0398239 +policy/LossBefore -0.00395833 +policy/dLoss 0.0358655 +---------------------------------------- ------------ +2025-04-03 01:41:39 | [rl2_trainer] epoch #223 | Optimizing policy... +2025-04-03 01:41:40 | [rl2_trainer] epoch #223 | Fitting baseline... +2025-04-03 01:41:40 | [rl2_trainer] epoch #223 | Computing loss before +2025-04-03 01:41:40 | [rl2_trainer] epoch #223 | Computing KL before +2025-04-03 01:41:41 | [rl2_trainer] epoch #223 | Optimizing +2025-04-03 01:42:17 | [rl2_trainer] epoch #223 | Computing KL after +2025-04-03 01:42:18 | [rl2_trainer] epoch #223 | Computing loss after +2025-04-03 01:42:19 | [rl2_trainer] epoch #223 | Saving snapshot... +2025-04-03 01:42:19 | [rl2_trainer] epoch #223 | Saved +2025-04-03 01:42:19 | [rl2_trainer] epoch #223 | Time 42619.59 s +2025-04-03 01:42:19 | [rl2_trainer] epoch #223 | EpochTime 197.09 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.498 +Average/AverageReturn -22.7253 +Average/Iteration 223 +Average/MaxReturn 8.43489 +Average/MinReturn -67.4827 +Average/NumEpisodes 100 +Average/StdReturn 10.0525 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.443927 +TotalEnvSteps 2.24e+06 +__unnamed_task__/AverageDiscountedReturn -14.498 +__unnamed_task__/AverageReturn -22.7253 +__unnamed_task__/Iteration 223 +__unnamed_task__/MaxReturn 8.43489 +__unnamed_task__/MinReturn -67.4827 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0525 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.36274 +policy/KL 0.0127332 +policy/KLBefore 0 +policy/LossAfter -0.0452912 +policy/LossBefore -0.00618987 +policy/dLoss 0.0391013 +---------------------------------------- ------------ +2025-04-03 01:44:17 | [rl2_trainer] epoch #224 | Optimizing policy... +2025-04-03 01:44:18 | [rl2_trainer] epoch #224 | Fitting baseline... +2025-04-03 01:44:18 | [rl2_trainer] epoch #224 | Computing loss before +2025-04-03 01:44:18 | [rl2_trainer] epoch #224 | Computing KL before +2025-04-03 01:44:19 | [rl2_trainer] epoch #224 | Optimizing +2025-04-03 01:44:55 | [rl2_trainer] epoch #224 | Computing KL after +2025-04-03 01:44:56 | [rl2_trainer] epoch #224 | Computing loss after +2025-04-03 01:44:57 | [rl2_trainer] epoch #224 | Saving snapshot... +2025-04-03 01:44:57 | [rl2_trainer] epoch #224 | Saved +2025-04-03 01:44:57 | [rl2_trainer] epoch #224 | Time 42777.88 s +2025-04-03 01:44:57 | [rl2_trainer] epoch #224 | EpochTime 158.29 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.4996 +Average/AverageReturn -24.2186 +Average/Iteration 224 +Average/MaxReturn -14.968 +Average/MinReturn -41.2927 +Average/NumEpisodes 100 +Average/StdReturn 5.03483 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.54436 +TotalEnvSteps 2.25e+06 +__unnamed_task__/AverageDiscountedReturn -15.4996 +__unnamed_task__/AverageReturn -24.2186 +__unnamed_task__/Iteration 224 +__unnamed_task__/MaxReturn -14.968 +__unnamed_task__/MinReturn -41.2927 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.03483 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.33829 +policy/KL 0.00904455 +policy/KLBefore 0 +policy/LossAfter -0.0112474 +policy/LossBefore -0.00354639 +policy/dLoss 0.00770099 +---------------------------------------- ------------ +2025-04-03 01:46:56 | [rl2_trainer] epoch #225 | Optimizing policy... +2025-04-03 01:46:57 | [rl2_trainer] epoch #225 | Fitting baseline... +2025-04-03 01:46:57 | [rl2_trainer] epoch #225 | Computing loss before +2025-04-03 01:46:57 | [rl2_trainer] epoch #225 | Computing KL before +2025-04-03 01:46:58 | [rl2_trainer] epoch #225 | Optimizing +2025-04-03 01:47:33 | [rl2_trainer] epoch #225 | Computing KL after +2025-04-03 01:47:33 | [rl2_trainer] epoch #225 | Computing loss after +2025-04-03 01:47:34 | [rl2_trainer] epoch #225 | Saving snapshot... +2025-04-03 01:47:34 | [rl2_trainer] epoch #225 | Saved +2025-04-03 01:47:34 | [rl2_trainer] epoch #225 | Time 42935.40 s +2025-04-03 01:47:34 | [rl2_trainer] epoch #225 | EpochTime 157.52 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1947 +Average/AverageReturn -23.5505 +Average/Iteration 225 +Average/MaxReturn -1.51625 +Average/MinReturn -40.2788 +Average/NumEpisodes 100 +Average/StdReturn 5.53136 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.651344 +TotalEnvSteps 2.26e+06 +__unnamed_task__/AverageDiscountedReturn -15.1947 +__unnamed_task__/AverageReturn -23.5505 +__unnamed_task__/Iteration 225 +__unnamed_task__/MaxReturn -1.51625 +__unnamed_task__/MinReturn -40.2788 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.53136 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.31155 +policy/KL 0.0103482 +policy/KLBefore 0 +policy/LossAfter -0.0106718 +policy/LossBefore -0.00168595 +policy/dLoss 0.00898584 +---------------------------------------- ------------ +2025-04-03 01:49:44 | [rl2_trainer] epoch #226 | Optimizing policy... +2025-04-03 01:49:45 | [rl2_trainer] epoch #226 | Fitting baseline... +2025-04-03 01:49:45 | [rl2_trainer] epoch #226 | Computing loss before +2025-04-03 01:49:45 | [rl2_trainer] epoch #226 | Computing KL before +2025-04-03 01:49:46 | [rl2_trainer] epoch #226 | Optimizing +2025-04-03 01:50:24 | [rl2_trainer] epoch #226 | Computing KL after +2025-04-03 01:50:25 | [rl2_trainer] epoch #226 | Computing loss after +2025-04-03 01:50:26 | [rl2_trainer] epoch #226 | Saving snapshot... +2025-04-03 01:50:26 | [rl2_trainer] epoch #226 | Saved +2025-04-03 01:50:26 | [rl2_trainer] epoch #226 | Time 43106.88 s +2025-04-03 01:50:26 | [rl2_trainer] epoch #226 | EpochTime 171.48 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.2763 +Average/AverageReturn -23.3233 +Average/Iteration 226 +Average/MaxReturn -11.8229 +Average/MinReturn -72.1208 +Average/NumEpisodes 100 +Average/StdReturn 6.72813 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.53024 +TotalEnvSteps 2.27e+06 +__unnamed_task__/AverageDiscountedReturn -15.2763 +__unnamed_task__/AverageReturn -23.3233 +__unnamed_task__/Iteration 226 +__unnamed_task__/MaxReturn -11.8229 +__unnamed_task__/MinReturn -72.1208 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.72813 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.29797 +policy/KL 0.0107504 +policy/KLBefore 0 +policy/LossAfter -0.029451 +policy/LossBefore -0.00453282 +policy/dLoss 0.0249182 +---------------------------------------- ------------ +2025-04-03 01:52:26 | [rl2_trainer] epoch #227 | Optimizing policy... +2025-04-03 01:52:27 | [rl2_trainer] epoch #227 | Fitting baseline... +2025-04-03 01:52:27 | [rl2_trainer] epoch #227 | Computing loss before +2025-04-03 01:52:27 | [rl2_trainer] epoch #227 | Computing KL before +2025-04-03 01:52:28 | [rl2_trainer] epoch #227 | Optimizing +2025-04-03 01:53:03 | [rl2_trainer] epoch #227 | Computing KL after +2025-04-03 01:53:04 | [rl2_trainer] epoch #227 | Computing loss after +2025-04-03 01:53:05 | [rl2_trainer] epoch #227 | Saving snapshot... +2025-04-03 01:53:05 | [rl2_trainer] epoch #227 | Saved +2025-04-03 01:53:05 | [rl2_trainer] epoch #227 | Time 43265.88 s +2025-04-03 01:53:05 | [rl2_trainer] epoch #227 | EpochTime 159.00 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.6952 +Average/AverageReturn -22.6594 +Average/Iteration 227 +Average/MaxReturn 8.44478 +Average/MinReturn -33.4874 +Average/NumEpisodes 100 +Average/StdReturn 5.2341 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.625083 +TotalEnvSteps 2.28e+06 +__unnamed_task__/AverageDiscountedReturn -14.6952 +__unnamed_task__/AverageReturn -22.6594 +__unnamed_task__/Iteration 227 +__unnamed_task__/MaxReturn 8.44478 +__unnamed_task__/MinReturn -33.4874 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.2341 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.28673 +policy/KL 0.0101943 +policy/KLBefore 0 +policy/LossAfter -0.00935241 +policy/LossBefore 0.00144728 +policy/dLoss 0.0107997 +---------------------------------------- ------------ +2025-04-03 01:55:24 | [rl2_trainer] epoch #228 | Optimizing policy... +2025-04-03 01:55:25 | [rl2_trainer] epoch #228 | Fitting baseline... +2025-04-03 01:55:25 | [rl2_trainer] epoch #228 | Computing loss before +2025-04-03 01:55:25 | [rl2_trainer] epoch #228 | Computing KL before +2025-04-03 01:55:26 | [rl2_trainer] epoch #228 | Optimizing +2025-04-03 01:55:59 | [rl2_trainer] epoch #228 | Computing KL after +2025-04-03 01:56:00 | [rl2_trainer] epoch #228 | Computing loss after +2025-04-03 01:56:01 | [rl2_trainer] epoch #228 | Saving snapshot... +2025-04-03 01:56:01 | [rl2_trainer] epoch #228 | Saved +2025-04-03 01:56:01 | [rl2_trainer] epoch #228 | Time 43441.90 s +2025-04-03 01:56:01 | [rl2_trainer] epoch #228 | EpochTime 176.01 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.0742 +Average/AverageReturn -20.3943 +Average/Iteration 228 +Average/MaxReturn -1.96937 +Average/MinReturn -33.4322 +Average/NumEpisodes 100 +Average/StdReturn 5.18171 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.353953 +TotalEnvSteps 2.29e+06 +__unnamed_task__/AverageDiscountedReturn -13.0742 +__unnamed_task__/AverageReturn -20.3943 +__unnamed_task__/Iteration 228 +__unnamed_task__/MaxReturn -1.96937 +__unnamed_task__/MinReturn -33.4322 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.18171 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.26506 +policy/KL 0.00857668 +policy/KLBefore 0 +policy/LossAfter -0.0104442 +policy/LossBefore 0.00368513 +policy/dLoss 0.0141294 +---------------------------------------- ------------ +2025-04-03 01:58:23 | [rl2_trainer] epoch #229 | Optimizing policy... +2025-04-03 01:58:24 | [rl2_trainer] epoch #229 | Fitting baseline... +2025-04-03 01:58:24 | [rl2_trainer] epoch #229 | Computing loss before +2025-04-03 01:58:24 | [rl2_trainer] epoch #229 | Computing KL before +2025-04-03 01:58:25 | [rl2_trainer] epoch #229 | Optimizing +2025-04-03 01:59:01 | [rl2_trainer] epoch #229 | Computing KL after +2025-04-03 01:59:02 | [rl2_trainer] epoch #229 | Computing loss after +2025-04-03 01:59:03 | [rl2_trainer] epoch #229 | Saving snapshot... +2025-04-03 01:59:03 | [rl2_trainer] epoch #229 | Saved +2025-04-03 01:59:03 | [rl2_trainer] epoch #229 | Time 43623.71 s +2025-04-03 01:59:03 | [rl2_trainer] epoch #229 | EpochTime 181.81 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.5494 +Average/AverageReturn -22.7576 +Average/Iteration 229 +Average/MaxReturn -0.158499 +Average/MinReturn -58.1974 +Average/NumEpisodes 100 +Average/StdReturn 7.31905 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.517715 +TotalEnvSteps 2.3e+06 +__unnamed_task__/AverageDiscountedReturn -14.5494 +__unnamed_task__/AverageReturn -22.7576 +__unnamed_task__/Iteration 229 +__unnamed_task__/MaxReturn -0.158499 +__unnamed_task__/MinReturn -58.1974 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.31905 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.23967 +policy/KL 0.00962889 +policy/KLBefore 0 +policy/LossAfter -0.0292389 +policy/LossBefore -0.0120778 +policy/dLoss 0.0171611 +---------------------------------------- ------------ +2025-04-03 02:02:46 | [rl2_trainer] epoch #230 | Optimizing policy... +2025-04-03 02:02:46 | [rl2_trainer] epoch #230 | Fitting baseline... +2025-04-03 02:02:46 | [rl2_trainer] epoch #230 | Computing loss before +2025-04-03 02:02:47 | [rl2_trainer] epoch #230 | Computing KL before +2025-04-03 02:02:47 | [rl2_trainer] epoch #230 | Optimizing +2025-04-03 02:03:23 | [rl2_trainer] epoch #230 | Computing KL after +2025-04-03 02:03:24 | [rl2_trainer] epoch #230 | Computing loss after +2025-04-03 02:03:25 | [rl2_trainer] epoch #230 | Saving snapshot... +2025-04-03 02:03:25 | [rl2_trainer] epoch #230 | Saved +2025-04-03 02:03:25 | [rl2_trainer] epoch #230 | Time 43886.00 s +2025-04-03 02:03:25 | [rl2_trainer] epoch #230 | EpochTime 262.28 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1466 +Average/AverageReturn -23.2863 +Average/Iteration 230 +Average/MaxReturn -9.43619 +Average/MinReturn -61.5091 +Average/NumEpisodes 100 +Average/StdReturn 7.78021 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.570342 +TotalEnvSteps 2.31e+06 +__unnamed_task__/AverageDiscountedReturn -15.1466 +__unnamed_task__/AverageReturn -23.2863 +__unnamed_task__/Iteration 230 +__unnamed_task__/MaxReturn -9.43619 +__unnamed_task__/MinReturn -61.5091 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.78021 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.22075 +policy/KL 0.00751726 +policy/KLBefore 0 +policy/LossAfter -0.0199196 +policy/LossBefore 0.00140917 +policy/dLoss 0.0213288 +---------------------------------------- ------------ +2025-04-03 02:04:55 | [rl2_trainer] epoch #231 | Optimizing policy... +2025-04-03 02:04:55 | [rl2_trainer] epoch #231 | Fitting baseline... +2025-04-03 02:04:55 | [rl2_trainer] epoch #231 | Computing loss before +2025-04-03 02:04:55 | [rl2_trainer] epoch #231 | Computing KL before +2025-04-03 02:04:56 | [rl2_trainer] epoch #231 | Optimizing +2025-04-03 02:05:29 | [rl2_trainer] epoch #231 | Computing KL after +2025-04-03 02:05:29 | [rl2_trainer] epoch #231 | Computing loss after +2025-04-03 02:05:30 | [rl2_trainer] epoch #231 | Saving snapshot... +2025-04-03 02:05:30 | [rl2_trainer] epoch #231 | Saved +2025-04-03 02:05:30 | [rl2_trainer] epoch #231 | Time 44011.31 s +2025-04-03 02:05:30 | [rl2_trainer] epoch #231 | EpochTime 125.31 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.9975 +Average/AverageReturn -21.8407 +Average/Iteration 231 +Average/MaxReturn 11.1869 +Average/MinReturn -38.2145 +Average/NumEpisodes 100 +Average/StdReturn 6.2316 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.599123 +TotalEnvSteps 2.32e+06 +__unnamed_task__/AverageDiscountedReturn -13.9975 +__unnamed_task__/AverageReturn -21.8407 +__unnamed_task__/Iteration 231 +__unnamed_task__/MaxReturn 11.1869 +__unnamed_task__/MinReturn -38.2145 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.2316 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.20634 +policy/KL 0.00854611 +policy/KLBefore 0 +policy/LossAfter -0.0214309 +policy/LossBefore -0.00240218 +policy/dLoss 0.0190287 +---------------------------------------- ------------ +2025-04-03 02:08:31 | [rl2_trainer] epoch #232 | Optimizing policy... +2025-04-03 02:08:31 | [rl2_trainer] epoch #232 | Fitting baseline... +2025-04-03 02:08:31 | [rl2_trainer] epoch #232 | Computing loss before +2025-04-03 02:08:32 | [rl2_trainer] epoch #232 | Computing KL before +2025-04-03 02:08:32 | [rl2_trainer] epoch #232 | Optimizing +2025-04-03 02:09:08 | [rl2_trainer] epoch #232 | Computing KL after +2025-04-03 02:09:08 | [rl2_trainer] epoch #232 | Computing loss after +2025-04-03 02:09:09 | [rl2_trainer] epoch #232 | Saving snapshot... +2025-04-03 02:09:09 | [rl2_trainer] epoch #232 | Saved +2025-04-03 02:09:09 | [rl2_trainer] epoch #232 | Time 44230.15 s +2025-04-03 02:09:09 | [rl2_trainer] epoch #232 | EpochTime 218.84 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.3163 +Average/AverageReturn -22.0089 +Average/Iteration 232 +Average/MaxReturn -8.32919 +Average/MinReturn -56.1136 +Average/NumEpisodes 100 +Average/StdReturn 5.56774 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.442658 +TotalEnvSteps 2.33e+06 +__unnamed_task__/AverageDiscountedReturn -14.3163 +__unnamed_task__/AverageReturn -22.0089 +__unnamed_task__/Iteration 232 +__unnamed_task__/MaxReturn -8.32919 +__unnamed_task__/MinReturn -56.1136 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.56774 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.20155 +policy/KL 0.0112717 +policy/KLBefore 0 +policy/LossAfter -0.0184616 +policy/LossBefore 0.00262036 +policy/dLoss 0.021082 +---------------------------------------- ------------ +2025-04-03 02:12:08 | [rl2_trainer] epoch #233 | Optimizing policy... +2025-04-03 02:12:08 | [rl2_trainer] epoch #233 | Fitting baseline... +2025-04-03 02:12:08 | [rl2_trainer] epoch #233 | Computing loss before +2025-04-03 02:12:09 | [rl2_trainer] epoch #233 | Computing KL before +2025-04-03 02:12:09 | [rl2_trainer] epoch #233 | Optimizing +2025-04-03 02:12:45 | [rl2_trainer] epoch #233 | Computing KL after +2025-04-03 02:12:46 | [rl2_trainer] epoch #233 | Computing loss after +2025-04-03 02:12:47 | [rl2_trainer] epoch #233 | Saving snapshot... +2025-04-03 02:12:47 | [rl2_trainer] epoch #233 | Saved +2025-04-03 02:12:47 | [rl2_trainer] epoch #233 | Time 44447.91 s +2025-04-03 02:12:47 | [rl2_trainer] epoch #233 | EpochTime 217.76 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.4382 +Average/AverageReturn -20.613 +Average/Iteration 233 +Average/MaxReturn -3.27973 +Average/MinReturn -30.1048 +Average/NumEpisodes 100 +Average/StdReturn 4.30659 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.426347 +TotalEnvSteps 2.34e+06 +__unnamed_task__/AverageDiscountedReturn -13.4382 +__unnamed_task__/AverageReturn -20.613 +__unnamed_task__/Iteration 233 +__unnamed_task__/MaxReturn -3.27973 +__unnamed_task__/MinReturn -30.1048 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.30659 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.19334 +policy/KL 0.0108145 +policy/KLBefore 0 +policy/LossAfter -0.0200338 +policy/LossBefore -0.00194642 +policy/dLoss 0.0180874 +---------------------------------------- ------------ +2025-04-03 02:15:28 | [rl2_trainer] epoch #234 | Optimizing policy... +2025-04-03 02:15:28 | [rl2_trainer] epoch #234 | Fitting baseline... +2025-04-03 02:15:28 | [rl2_trainer] epoch #234 | Computing loss before +2025-04-03 02:15:29 | [rl2_trainer] epoch #234 | Computing KL before +2025-04-03 02:15:29 | [rl2_trainer] epoch #234 | Optimizing +2025-04-03 02:16:05 | [rl2_trainer] epoch #234 | Computing KL after +2025-04-03 02:16:06 | [rl2_trainer] epoch #234 | Computing loss after +2025-04-03 02:16:07 | [rl2_trainer] epoch #234 | Saving snapshot... +2025-04-03 02:16:07 | [rl2_trainer] epoch #234 | Saved +2025-04-03 02:16:07 | [rl2_trainer] epoch #234 | Time 44647.92 s +2025-04-03 02:16:07 | [rl2_trainer] epoch #234 | EpochTime 200.01 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.7882 +Average/AverageReturn -22.8772 +Average/Iteration 234 +Average/MaxReturn 4.06913 +Average/MinReturn -70.7266 +Average/NumEpisodes 100 +Average/StdReturn 8.26438 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.573342 +TotalEnvSteps 2.35e+06 +__unnamed_task__/AverageDiscountedReturn -14.7882 +__unnamed_task__/AverageReturn -22.8772 +__unnamed_task__/Iteration 234 +__unnamed_task__/MaxReturn 4.06913 +__unnamed_task__/MinReturn -70.7266 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.26438 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.18524 +policy/KL 0.0116077 +policy/KLBefore 0 +policy/LossAfter -0.0215346 +policy/LossBefore 0.00779623 +policy/dLoss 0.0293308 +---------------------------------------- ------------ +2025-04-03 02:19:03 | [rl2_trainer] epoch #235 | Optimizing policy... +2025-04-03 02:19:04 | [rl2_trainer] epoch #235 | Fitting baseline... +2025-04-03 02:19:04 | [rl2_trainer] epoch #235 | Computing loss before +2025-04-03 02:19:04 | [rl2_trainer] epoch #235 | Computing KL before +2025-04-03 02:19:05 | [rl2_trainer] epoch #235 | Optimizing +2025-04-03 02:19:43 | [rl2_trainer] epoch #235 | Computing KL after +2025-04-03 02:19:43 | [rl2_trainer] epoch #235 | Computing loss after +2025-04-03 02:19:44 | [rl2_trainer] epoch #235 | Saving snapshot... +2025-04-03 02:19:44 | [rl2_trainer] epoch #235 | Saved +2025-04-03 02:19:44 | [rl2_trainer] epoch #235 | Time 44865.40 s +2025-04-03 02:19:44 | [rl2_trainer] epoch #235 | EpochTime 217.48 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.7658 +Average/AverageReturn -21.9616 +Average/Iteration 235 +Average/MaxReturn 42.9261 +Average/MinReturn -81.7731 +Average/NumEpisodes 100 +Average/StdReturn 13.5388 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.588294 +TotalEnvSteps 2.36e+06 +__unnamed_task__/AverageDiscountedReturn -13.7658 +__unnamed_task__/AverageReturn -21.9616 +__unnamed_task__/Iteration 235 +__unnamed_task__/MaxReturn 42.9261 +__unnamed_task__/MinReturn -81.7731 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.5388 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.19312 +policy/KL 0.01439 +policy/KLBefore 0 +policy/LossAfter -0.0467099 +policy/LossBefore 0.0119498 +policy/dLoss 0.0586597 +---------------------------------------- ----------- +2025-04-03 02:23:29 | [rl2_trainer] epoch #236 | Optimizing policy... +2025-04-03 02:23:30 | [rl2_trainer] epoch #236 | Fitting baseline... +2025-04-03 02:23:30 | [rl2_trainer] epoch #236 | Computing loss before +2025-04-03 02:23:30 | [rl2_trainer] epoch #236 | Computing KL before +2025-04-03 02:23:31 | [rl2_trainer] epoch #236 | Optimizing +2025-04-03 02:24:07 | [rl2_trainer] epoch #236 | Computing KL after +2025-04-03 02:24:07 | [rl2_trainer] epoch #236 | Computing loss after +2025-04-03 02:24:08 | [rl2_trainer] epoch #236 | Saving snapshot... +2025-04-03 02:24:08 | [rl2_trainer] epoch #236 | Saved +2025-04-03 02:24:08 | [rl2_trainer] epoch #236 | Time 45129.07 s +2025-04-03 02:24:08 | [rl2_trainer] epoch #236 | EpochTime 263.67 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.2268 +Average/AverageReturn -22.347 +Average/Iteration 236 +Average/MaxReturn 4.11883 +Average/MinReturn -59.2905 +Average/NumEpisodes 100 +Average/StdReturn 7.79232 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.479557 +TotalEnvSteps 2.37e+06 +__unnamed_task__/AverageDiscountedReturn -14.2268 +__unnamed_task__/AverageReturn -22.347 +__unnamed_task__/Iteration 236 +__unnamed_task__/MaxReturn 4.11883 +__unnamed_task__/MinReturn -59.2905 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.79232 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.18645 +policy/KL 0.0106192 +policy/KLBefore 0 +policy/LossAfter -0.0277067 +policy/LossBefore 0.00740237 +policy/dLoss 0.0351091 +---------------------------------------- ------------ +2025-04-03 02:28:32 | [rl2_trainer] epoch #237 | Optimizing policy... +2025-04-03 02:28:33 | [rl2_trainer] epoch #237 | Fitting baseline... +2025-04-03 02:28:33 | [rl2_trainer] epoch #237 | Computing loss before +2025-04-03 02:28:33 | [rl2_trainer] epoch #237 | Computing KL before +2025-04-03 02:28:34 | [rl2_trainer] epoch #237 | Optimizing +2025-04-03 02:29:08 | [rl2_trainer] epoch #237 | Computing KL after +2025-04-03 02:29:09 | [rl2_trainer] epoch #237 | Computing loss after +2025-04-03 02:29:10 | [rl2_trainer] epoch #237 | Saving snapshot... +2025-04-03 02:29:10 | [rl2_trainer] epoch #237 | Saved +2025-04-03 02:29:10 | [rl2_trainer] epoch #237 | Time 45430.93 s +2025-04-03 02:29:10 | [rl2_trainer] epoch #237 | EpochTime 301.85 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.9497 +Average/AverageReturn -26.8588 +Average/Iteration 237 +Average/MaxReturn -14.4664 +Average/MinReturn -92.6481 +Average/NumEpisodes 100 +Average/StdReturn 10.2321 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.62671 +TotalEnvSteps 2.38e+06 +__unnamed_task__/AverageDiscountedReturn -16.9497 +__unnamed_task__/AverageReturn -26.8588 +__unnamed_task__/Iteration 237 +__unnamed_task__/MaxReturn -14.4664 +__unnamed_task__/MinReturn -92.6481 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.2321 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.15629 +policy/KL 0.0177477 +policy/KLBefore 0 +policy/LossAfter -0.0261059 +policy/LossBefore -0.00369713 +policy/dLoss 0.0224088 +---------------------------------------- ------------ +2025-04-03 02:32:46 | [rl2_trainer] epoch #238 | Optimizing policy... +2025-04-03 02:32:47 | [rl2_trainer] epoch #238 | Fitting baseline... +2025-04-03 02:32:47 | [rl2_trainer] epoch #238 | Computing loss before +2025-04-03 02:32:47 | [rl2_trainer] epoch #238 | Computing KL before +2025-04-03 02:32:48 | [rl2_trainer] epoch #238 | Optimizing +2025-04-03 02:33:22 | [rl2_trainer] epoch #238 | Computing KL after +2025-04-03 02:33:23 | [rl2_trainer] epoch #238 | Computing loss after +2025-04-03 02:33:24 | [rl2_trainer] epoch #238 | Saving snapshot... +2025-04-03 02:33:24 | [rl2_trainer] epoch #238 | Saved +2025-04-03 02:33:24 | [rl2_trainer] epoch #238 | Time 45684.92 s +2025-04-03 02:33:24 | [rl2_trainer] epoch #238 | EpochTime 254.00 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.1646 +Average/AverageReturn -20.507 +Average/Iteration 238 +Average/MaxReturn 6.78842 +Average/MinReturn -76.0829 +Average/NumEpisodes 100 +Average/StdReturn 9.4567 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.639693 +TotalEnvSteps 2.39e+06 +__unnamed_task__/AverageDiscountedReturn -13.1646 +__unnamed_task__/AverageReturn -20.507 +__unnamed_task__/Iteration 238 +__unnamed_task__/MaxReturn 6.78842 +__unnamed_task__/MinReturn -76.0829 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.4567 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.13346 +policy/KL 0.0108143 +policy/KLBefore 0 +policy/LossAfter -0.0111525 +policy/LossBefore 0.0100608 +policy/dLoss 0.0212133 +---------------------------------------- ----------- +2025-04-03 02:36:20 | [rl2_trainer] epoch #239 | Optimizing policy... +2025-04-03 02:36:20 | [rl2_trainer] epoch #239 | Fitting baseline... +2025-04-03 02:36:20 | [rl2_trainer] epoch #239 | Computing loss before +2025-04-03 02:36:21 | [rl2_trainer] epoch #239 | Computing KL before +2025-04-03 02:36:22 | [rl2_trainer] epoch #239 | Optimizing +2025-04-03 02:36:57 | [rl2_trainer] epoch #239 | Computing KL after +2025-04-03 02:36:58 | [rl2_trainer] epoch #239 | Computing loss after +2025-04-03 02:36:59 | [rl2_trainer] epoch #239 | Saving snapshot... +2025-04-03 02:36:59 | [rl2_trainer] epoch #239 | Saved +2025-04-03 02:36:59 | [rl2_trainer] epoch #239 | Time 45899.87 s +2025-04-03 02:36:59 | [rl2_trainer] epoch #239 | EpochTime 214.94 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.549 +Average/AverageReturn -20.8255 +Average/Iteration 239 +Average/MaxReturn -0.215745 +Average/MinReturn -32.102 +Average/NumEpisodes 100 +Average/StdReturn 5.38238 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.42268 +TotalEnvSteps 2.4e+06 +__unnamed_task__/AverageDiscountedReturn -13.549 +__unnamed_task__/AverageReturn -20.8255 +__unnamed_task__/Iteration 239 +__unnamed_task__/MaxReturn -0.215745 +__unnamed_task__/MinReturn -32.102 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.38238 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.11778 +policy/KL 0.0114741 +policy/KLBefore 0 +policy/LossAfter -0.015241 +policy/LossBefore 0.0020579 +policy/dLoss 0.0172989 +---------------------------------------- ----------- +2025-04-03 02:38:28 | [rl2_trainer] epoch #240 | Optimizing policy... +2025-04-03 02:38:29 | [rl2_trainer] epoch #240 | Fitting baseline... +2025-04-03 02:38:29 | [rl2_trainer] epoch #240 | Computing loss before +2025-04-03 02:38:29 | [rl2_trainer] epoch #240 | Computing KL before +2025-04-03 02:38:30 | [rl2_trainer] epoch #240 | Optimizing +2025-04-03 02:39:05 | [rl2_trainer] epoch #240 | Computing KL after +2025-04-03 02:39:05 | [rl2_trainer] epoch #240 | Computing loss after +2025-04-03 02:39:06 | [rl2_trainer] epoch #240 | Saving snapshot... +2025-04-03 02:39:06 | [rl2_trainer] epoch #240 | Saved +2025-04-03 02:39:06 | [rl2_trainer] epoch #240 | Time 46026.99 s +2025-04-03 02:39:06 | [rl2_trainer] epoch #240 | EpochTime 127.12 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.9819 +Average/AverageReturn -21.5958 +Average/Iteration 240 +Average/MaxReturn 9.22065 +Average/MinReturn -33.4826 +Average/NumEpisodes 100 +Average/StdReturn 5.54939 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.389266 +TotalEnvSteps 2.41e+06 +__unnamed_task__/AverageDiscountedReturn -13.9819 +__unnamed_task__/AverageReturn -21.5958 +__unnamed_task__/Iteration 240 +__unnamed_task__/MaxReturn 9.22065 +__unnamed_task__/MinReturn -33.4826 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.54939 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.1007 +policy/KL 0.0101785 +policy/KLBefore 0 +policy/LossAfter -0.0201936 +policy/LossBefore 0.000101503 +policy/dLoss 0.0202951 +---------------------------------------- ------------- +2025-04-03 02:40:31 | [rl2_trainer] epoch #241 | Optimizing policy... +2025-04-03 02:40:31 | [rl2_trainer] epoch #241 | Fitting baseline... +2025-04-03 02:40:31 | [rl2_trainer] epoch #241 | Computing loss before +2025-04-03 02:40:32 | [rl2_trainer] epoch #241 | Computing KL before +2025-04-03 02:40:32 | [rl2_trainer] epoch #241 | Optimizing +2025-04-03 02:41:08 | [rl2_trainer] epoch #241 | Computing KL after +2025-04-03 02:41:09 | [rl2_trainer] epoch #241 | Computing loss after +2025-04-03 02:41:10 | [rl2_trainer] epoch #241 | Saving snapshot... +2025-04-03 02:41:10 | [rl2_trainer] epoch #241 | Saved +2025-04-03 02:41:10 | [rl2_trainer] epoch #241 | Time 46150.72 s +2025-04-03 02:41:10 | [rl2_trainer] epoch #241 | EpochTime 123.73 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.6352 +Average/AverageReturn -21.2127 +Average/Iteration 241 +Average/MaxReturn 5.19655 +Average/MinReturn -43.9428 +Average/NumEpisodes 100 +Average/StdReturn 6.01951 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.471357 +TotalEnvSteps 2.42e+06 +__unnamed_task__/AverageDiscountedReturn -13.6352 +__unnamed_task__/AverageReturn -21.2127 +__unnamed_task__/Iteration 241 +__unnamed_task__/MaxReturn 5.19655 +__unnamed_task__/MinReturn -43.9428 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.01951 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.06969 +policy/KL 0.0135855 +policy/KLBefore 0 +policy/LossAfter -0.0041217 +policy/LossBefore 0.00704927 +policy/dLoss 0.011171 +---------------------------------------- ------------ +2025-04-03 02:43:19 | [rl2_trainer] epoch #242 | Optimizing policy... +2025-04-03 02:43:19 | [rl2_trainer] epoch #242 | Fitting baseline... +2025-04-03 02:43:19 | [rl2_trainer] epoch #242 | Computing loss before +2025-04-03 02:43:20 | [rl2_trainer] epoch #242 | Computing KL before +2025-04-03 02:43:20 | [rl2_trainer] epoch #242 | Optimizing +2025-04-03 02:43:55 | [rl2_trainer] epoch #242 | Computing KL after +2025-04-03 02:43:56 | [rl2_trainer] epoch #242 | Computing loss after +2025-04-03 02:43:57 | [rl2_trainer] epoch #242 | Saving snapshot... +2025-04-03 02:43:57 | [rl2_trainer] epoch #242 | Saved +2025-04-03 02:43:57 | [rl2_trainer] epoch #242 | Time 46317.93 s +2025-04-03 02:43:57 | [rl2_trainer] epoch #242 | EpochTime 167.21 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.0255 +Average/AverageReturn -22.951 +Average/Iteration 242 +Average/MaxReturn -11.4257 +Average/MinReturn -38.7787 +Average/NumEpisodes 100 +Average/StdReturn 5.18449 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.487084 +TotalEnvSteps 2.43e+06 +__unnamed_task__/AverageDiscountedReturn -15.0255 +__unnamed_task__/AverageReturn -22.951 +__unnamed_task__/Iteration 242 +__unnamed_task__/MaxReturn -11.4257 +__unnamed_task__/MinReturn -38.7787 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.18449 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.04256 +policy/KL 0.0105909 +policy/KLBefore 0 +policy/LossAfter -0.00604395 +policy/LossBefore 0.00164595 +policy/dLoss 0.00768989 +---------------------------------------- ------------ +2025-04-03 02:47:19 | [rl2_trainer] epoch #243 | Optimizing policy... +2025-04-03 02:47:19 | [rl2_trainer] epoch #243 | Fitting baseline... +2025-04-03 02:47:19 | [rl2_trainer] epoch #243 | Computing loss before +2025-04-03 02:47:19 | [rl2_trainer] epoch #243 | Computing KL before +2025-04-03 02:47:20 | [rl2_trainer] epoch #243 | Optimizing +2025-04-03 02:47:55 | [rl2_trainer] epoch #243 | Computing KL after +2025-04-03 02:47:55 | [rl2_trainer] epoch #243 | Computing loss after +2025-04-03 02:47:56 | [rl2_trainer] epoch #243 | Saving snapshot... +2025-04-03 02:47:56 | [rl2_trainer] epoch #243 | Saved +2025-04-03 02:47:56 | [rl2_trainer] epoch #243 | Time 46557.34 s +2025-04-03 02:47:56 | [rl2_trainer] epoch #243 | EpochTime 239.41 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.7342 +Average/AverageReturn -21.2956 +Average/Iteration 243 +Average/MaxReturn 0.108983 +Average/MinReturn -73.6807 +Average/NumEpisodes 100 +Average/StdReturn 9.57823 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.637303 +TotalEnvSteps 2.44e+06 +__unnamed_task__/AverageDiscountedReturn -13.7342 +__unnamed_task__/AverageReturn -21.2956 +__unnamed_task__/Iteration 243 +__unnamed_task__/MaxReturn 0.108983 +__unnamed_task__/MinReturn -73.6807 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.57823 +__unnamed_task__/TerminationRate 0 +policy/Entropy 5.02557 +policy/KL 0.0126977 +policy/KLBefore 0 +policy/LossAfter -0.0218603 +policy/LossBefore 0.00394659 +policy/dLoss 0.0258069 +---------------------------------------- ------------ +2025-04-03 02:50:27 | [rl2_trainer] epoch #244 | Optimizing policy... +2025-04-03 02:50:28 | [rl2_trainer] epoch #244 | Fitting baseline... +2025-04-03 02:50:28 | [rl2_trainer] epoch #244 | Computing loss before +2025-04-03 02:50:28 | [rl2_trainer] epoch #244 | Computing KL before +2025-04-03 02:50:29 | [rl2_trainer] epoch #244 | Optimizing +2025-04-03 02:51:06 | [rl2_trainer] epoch #244 | Computing KL after +2025-04-03 02:51:07 | [rl2_trainer] epoch #244 | Computing loss after +2025-04-03 02:51:08 | [rl2_trainer] epoch #244 | Saving snapshot... +2025-04-03 02:51:08 | [rl2_trainer] epoch #244 | Saved +2025-04-03 02:51:08 | [rl2_trainer] epoch #244 | Time 46748.57 s +2025-04-03 02:51:08 | [rl2_trainer] epoch #244 | EpochTime 191.22 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.4747 +Average/AverageReturn -20.6072 +Average/Iteration 244 +Average/MaxReturn -8.02213 +Average/MinReturn -56.907 +Average/NumEpisodes 100 +Average/StdReturn 5.87617 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.349194 +TotalEnvSteps 2.45e+06 +__unnamed_task__/AverageDiscountedReturn -13.4747 +__unnamed_task__/AverageReturn -20.6072 +__unnamed_task__/Iteration 244 +__unnamed_task__/MaxReturn -8.02213 +__unnamed_task__/MinReturn -56.907 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.87617 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.99712 +policy/KL 0.0129417 +policy/KLBefore 0 +policy/LossAfter -0.0192625 +policy/LossBefore -0.0060078 +policy/dLoss 0.0132547 +---------------------------------------- ----------- +2025-04-03 02:52:35 | [rl2_trainer] epoch #245 | Optimizing policy... +2025-04-03 02:52:35 | [rl2_trainer] epoch #245 | Fitting baseline... +2025-04-03 02:52:35 | [rl2_trainer] epoch #245 | Computing loss before +2025-04-03 02:52:36 | [rl2_trainer] epoch #245 | Computing KL before +2025-04-03 02:52:36 | [rl2_trainer] epoch #245 | Optimizing +2025-04-03 02:53:12 | [rl2_trainer] epoch #245 | Computing KL after +2025-04-03 02:53:12 | [rl2_trainer] epoch #245 | Computing loss after +2025-04-03 02:53:13 | [rl2_trainer] epoch #245 | Saving snapshot... +2025-04-03 02:53:13 | [rl2_trainer] epoch #245 | Saved +2025-04-03 02:53:13 | [rl2_trainer] epoch #245 | Time 46874.23 s +2025-04-03 02:53:13 | [rl2_trainer] epoch #245 | EpochTime 125.66 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.1769 +Average/AverageReturn -21.6859 +Average/Iteration 245 +Average/MaxReturn -3.4103 +Average/MinReturn -66.8779 +Average/NumEpisodes 100 +Average/StdReturn 6.22111 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.557956 +TotalEnvSteps 2.46e+06 +__unnamed_task__/AverageDiscountedReturn -14.1769 +__unnamed_task__/AverageReturn -21.6859 +__unnamed_task__/Iteration 245 +__unnamed_task__/MaxReturn -3.4103 +__unnamed_task__/MinReturn -66.8779 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.22111 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.96773 +policy/KL 0.0116314 +policy/KLBefore 0 +policy/LossAfter -0.0152163 +policy/LossBefore -0.00090188 +policy/dLoss 0.0143144 +---------------------------------------- ------------ +2025-04-03 02:55:24 | [rl2_trainer] epoch #246 | Optimizing policy... +2025-04-03 02:55:24 | [rl2_trainer] epoch #246 | Fitting baseline... +2025-04-03 02:55:24 | [rl2_trainer] epoch #246 | Computing loss before +2025-04-03 02:55:25 | [rl2_trainer] epoch #246 | Computing KL before +2025-04-03 02:55:26 | [rl2_trainer] epoch #246 | Optimizing +2025-04-03 02:56:02 | [rl2_trainer] epoch #246 | Computing KL after +2025-04-03 02:56:03 | [rl2_trainer] epoch #246 | Computing loss after +2025-04-03 02:56:04 | [rl2_trainer] epoch #246 | Saving snapshot... +2025-04-03 02:56:04 | [rl2_trainer] epoch #246 | Saved +2025-04-03 02:56:04 | [rl2_trainer] epoch #246 | Time 47044.53 s +2025-04-03 02:56:04 | [rl2_trainer] epoch #246 | EpochTime 170.29 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.9993 +Average/AverageReturn -21.3784 +Average/Iteration 246 +Average/MaxReturn -1.68943 +Average/MinReturn -39.2086 +Average/NumEpisodes 100 +Average/StdReturn 5.49995 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.536236 +TotalEnvSteps 2.47e+06 +__unnamed_task__/AverageDiscountedReturn -13.9993 +__unnamed_task__/AverageReturn -21.3784 +__unnamed_task__/Iteration 246 +__unnamed_task__/MaxReturn -1.68943 +__unnamed_task__/MinReturn -39.2086 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.49995 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.9583 +policy/KL 0.0103288 +policy/KLBefore 0 +policy/LossAfter -0.0203252 +policy/LossBefore -0.00381372 +policy/dLoss 0.0165115 +---------------------------------------- ------------ +2025-04-03 02:58:31 | [rl2_trainer] epoch #247 | Optimizing policy... +2025-04-03 02:58:31 | [rl2_trainer] epoch #247 | Fitting baseline... +2025-04-03 02:58:31 | [rl2_trainer] epoch #247 | Computing loss before +2025-04-03 02:58:32 | [rl2_trainer] epoch #247 | Computing KL before +2025-04-03 02:58:32 | [rl2_trainer] epoch #247 | Optimizing +2025-04-03 02:59:09 | [rl2_trainer] epoch #247 | Computing KL after +2025-04-03 02:59:10 | [rl2_trainer] epoch #247 | Computing loss after +2025-04-03 02:59:11 | [rl2_trainer] epoch #247 | Saving snapshot... +2025-04-03 02:59:11 | [rl2_trainer] epoch #247 | Saved +2025-04-03 02:59:11 | [rl2_trainer] epoch #247 | Time 47231.69 s +2025-04-03 02:59:11 | [rl2_trainer] epoch #247 | EpochTime 187.16 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.1726 +Average/AverageReturn -18.4313 +Average/Iteration 247 +Average/MaxReturn 6.37819 +Average/MinReturn -43.145 +Average/NumEpisodes 100 +Average/StdReturn 5.82031 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.338315 +TotalEnvSteps 2.48e+06 +__unnamed_task__/AverageDiscountedReturn -12.1726 +__unnamed_task__/AverageReturn -18.4313 +__unnamed_task__/Iteration 247 +__unnamed_task__/MaxReturn 6.37819 +__unnamed_task__/MinReturn -43.145 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.82031 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.96031 +policy/KL 0.0130356 +policy/KLBefore 0 +policy/LossAfter -0.0302169 +policy/LossBefore -0.00502671 +policy/dLoss 0.0251902 +---------------------------------------- ------------ +2025-04-03 03:00:38 | [rl2_trainer] epoch #248 | Optimizing policy... +2025-04-03 03:00:39 | [rl2_trainer] epoch #248 | Fitting baseline... +2025-04-03 03:00:39 | [rl2_trainer] epoch #248 | Computing loss before +2025-04-03 03:00:39 | [rl2_trainer] epoch #248 | Computing KL before +2025-04-03 03:00:40 | [rl2_trainer] epoch #248 | Optimizing +2025-04-03 03:01:15 | [rl2_trainer] epoch #248 | Computing KL after +2025-04-03 03:01:16 | [rl2_trainer] epoch #248 | Computing loss after +2025-04-03 03:01:17 | [rl2_trainer] epoch #248 | Saving snapshot... +2025-04-03 03:01:17 | [rl2_trainer] epoch #248 | Saved +2025-04-03 03:01:17 | [rl2_trainer] epoch #248 | Time 47357.71 s +2025-04-03 03:01:17 | [rl2_trainer] epoch #248 | EpochTime 126.02 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.7402 +Average/AverageReturn -19.4928 +Average/Iteration 248 +Average/MaxReturn 0.3712 +Average/MinReturn -31.4182 +Average/NumEpisodes 100 +Average/StdReturn 4.44247 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.460678 +TotalEnvSteps 2.49e+06 +__unnamed_task__/AverageDiscountedReturn -12.7402 +__unnamed_task__/AverageReturn -19.4928 +__unnamed_task__/Iteration 248 +__unnamed_task__/MaxReturn 0.3712 +__unnamed_task__/MinReturn -31.4182 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.44247 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.96369 +policy/KL 0.0125175 +policy/KLBefore 0 +policy/LossAfter -0.0151985 +policy/LossBefore 0.000372705 +policy/dLoss 0.0155712 +---------------------------------------- ------------- +2025-04-03 03:03:53 | [rl2_trainer] epoch #249 | Optimizing policy... +2025-04-03 03:03:53 | [rl2_trainer] epoch #249 | Fitting baseline... +2025-04-03 03:03:53 | [rl2_trainer] epoch #249 | Computing loss before +2025-04-03 03:03:53 | [rl2_trainer] epoch #249 | Computing KL before +2025-04-03 03:03:54 | [rl2_trainer] epoch #249 | Optimizing +2025-04-03 03:04:30 | [rl2_trainer] epoch #249 | Computing KL after +2025-04-03 03:04:31 | [rl2_trainer] epoch #249 | Computing loss after +2025-04-03 03:04:32 | [rl2_trainer] epoch #249 | Saving snapshot... +2025-04-03 03:04:32 | [rl2_trainer] epoch #249 | Saved +2025-04-03 03:04:32 | [rl2_trainer] epoch #249 | Time 47552.77 s +2025-04-03 03:04:32 | [rl2_trainer] epoch #249 | EpochTime 195.05 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.9363 +Average/AverageReturn -23.3613 +Average/Iteration 249 +Average/MaxReturn -7.54711 +Average/MinReturn -90.634 +Average/NumEpisodes 100 +Average/StdReturn 12.8953 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.699566 +TotalEnvSteps 2.5e+06 +__unnamed_task__/AverageDiscountedReturn -14.9363 +__unnamed_task__/AverageReturn -23.3613 +__unnamed_task__/Iteration 249 +__unnamed_task__/MaxReturn -7.54711 +__unnamed_task__/MinReturn -90.634 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.8953 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.96783 +policy/KL 0.0136476 +policy/KLBefore 0 +policy/LossAfter -0.036276 +policy/LossBefore 0.00339534 +policy/dLoss 0.0396713 +---------------------------------------- ------------ +2025-04-03 03:06:41 | [rl2_trainer] epoch #250 | Optimizing policy... +2025-04-03 03:06:41 | [rl2_trainer] epoch #250 | Fitting baseline... +2025-04-03 03:06:41 | [rl2_trainer] epoch #250 | Computing loss before +2025-04-03 03:06:42 | [rl2_trainer] epoch #250 | Computing KL before +2025-04-03 03:06:42 | [rl2_trainer] epoch #250 | Optimizing +2025-04-03 03:07:17 | [rl2_trainer] epoch #250 | Computing KL after +2025-04-03 03:07:18 | [rl2_trainer] epoch #250 | Computing loss after +2025-04-03 03:07:19 | [rl2_trainer] epoch #250 | Saving snapshot... +2025-04-03 03:07:19 | [rl2_trainer] epoch #250 | Saved +2025-04-03 03:07:19 | [rl2_trainer] epoch #250 | Time 47719.73 s +2025-04-03 03:07:19 | [rl2_trainer] epoch #250 | EpochTime 166.96 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.2809 +Average/AverageReturn -21.7292 +Average/Iteration 250 +Average/MaxReturn -8.34173 +Average/MinReturn -63.6613 +Average/NumEpisodes 100 +Average/StdReturn 6.48806 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.606818 +TotalEnvSteps 2.51e+06 +__unnamed_task__/AverageDiscountedReturn -14.2809 +__unnamed_task__/AverageReturn -21.7292 +__unnamed_task__/Iteration 250 +__unnamed_task__/MaxReturn -8.34173 +__unnamed_task__/MinReturn -63.6613 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.48806 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.94602 +policy/KL 0.0134027 +policy/KLBefore 0 +policy/LossAfter -0.0189548 +policy/LossBefore -0.00367491 +policy/dLoss 0.0152799 +---------------------------------------- ------------ +2025-04-03 03:10:19 | [rl2_trainer] epoch #251 | Optimizing policy... +2025-04-03 03:10:19 | [rl2_trainer] epoch #251 | Fitting baseline... +2025-04-03 03:10:19 | [rl2_trainer] epoch #251 | Computing loss before +2025-04-03 03:10:20 | [rl2_trainer] epoch #251 | Computing KL before +2025-04-03 03:10:20 | [rl2_trainer] epoch #251 | Optimizing +2025-04-03 03:10:57 | [rl2_trainer] epoch #251 | Computing KL after +2025-04-03 03:10:57 | [rl2_trainer] epoch #251 | Computing loss after +2025-04-03 03:10:58 | [rl2_trainer] epoch #251 | Saving snapshot... +2025-04-03 03:10:58 | [rl2_trainer] epoch #251 | Saved +2025-04-03 03:10:58 | [rl2_trainer] epoch #251 | Time 47939.31 s +2025-04-03 03:10:58 | [rl2_trainer] epoch #251 | EpochTime 219.58 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.6418 +Average/AverageReturn -19.2091 +Average/Iteration 251 +Average/MaxReturn 7.96462 +Average/MinReturn -33.17 +Average/NumEpisodes 100 +Average/StdReturn 5.87176 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.59078 +TotalEnvSteps 2.52e+06 +__unnamed_task__/AverageDiscountedReturn -12.6418 +__unnamed_task__/AverageReturn -19.2091 +__unnamed_task__/Iteration 251 +__unnamed_task__/MaxReturn 7.96462 +__unnamed_task__/MinReturn -33.17 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.87176 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.9161 +policy/KL 0.00811247 +policy/KLBefore 0 +policy/LossAfter -0.0123292 +policy/LossBefore -0.000611406 +policy/dLoss 0.0117178 +---------------------------------------- ------------- +2025-04-03 03:13:26 | [rl2_trainer] epoch #252 | Optimizing policy... +2025-04-03 03:13:26 | [rl2_trainer] epoch #252 | Fitting baseline... +2025-04-03 03:13:26 | [rl2_trainer] epoch #252 | Computing loss before +2025-04-03 03:13:27 | [rl2_trainer] epoch #252 | Computing KL before +2025-04-03 03:13:28 | [rl2_trainer] epoch #252 | Optimizing +2025-04-03 03:14:03 | [rl2_trainer] epoch #252 | Computing KL after +2025-04-03 03:14:04 | [rl2_trainer] epoch #252 | Computing loss after +2025-04-03 03:14:05 | [rl2_trainer] epoch #252 | Saving snapshot... +2025-04-03 03:14:05 | [rl2_trainer] epoch #252 | Saved +2025-04-03 03:14:05 | [rl2_trainer] epoch #252 | Time 48126.08 s +2025-04-03 03:14:05 | [rl2_trainer] epoch #252 | EpochTime 186.77 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.0545 +Average/AverageReturn -18.1691 +Average/Iteration 252 +Average/MaxReturn 3.87294 +Average/MinReturn -24.631 +Average/NumEpisodes 100 +Average/StdReturn 3.83812 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.13562 +TotalEnvSteps 2.53e+06 +__unnamed_task__/AverageDiscountedReturn -12.0545 +__unnamed_task__/AverageReturn -18.1691 +__unnamed_task__/Iteration 252 +__unnamed_task__/MaxReturn 3.87294 +__unnamed_task__/MinReturn -24.631 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 3.83812 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.87666 +policy/KL 0.0121329 +policy/KLBefore 0 +policy/LossAfter -0.0144015 +policy/LossBefore -0.00548184 +policy/dLoss 0.0089197 +---------------------------------------- ------------ +2025-04-03 03:17:38 | [rl2_trainer] epoch #253 | Optimizing policy... +2025-04-03 03:17:39 | [rl2_trainer] epoch #253 | Fitting baseline... +2025-04-03 03:17:39 | [rl2_trainer] epoch #253 | Computing loss before +2025-04-03 03:17:39 | [rl2_trainer] epoch #253 | Computing KL before +2025-04-03 03:17:40 | [rl2_trainer] epoch #253 | Optimizing +2025-04-03 03:18:17 | [rl2_trainer] epoch #253 | Computing KL after +2025-04-03 03:18:17 | [rl2_trainer] epoch #253 | Computing loss after +2025-04-03 03:18:19 | [rl2_trainer] epoch #253 | Saving snapshot... +2025-04-03 03:18:19 | [rl2_trainer] epoch #253 | Saved +2025-04-03 03:18:19 | [rl2_trainer] epoch #253 | Time 48379.63 s +2025-04-03 03:18:19 | [rl2_trainer] epoch #253 | EpochTime 253.55 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.6505 +Average/AverageReturn -21.536 +Average/Iteration 253 +Average/MaxReturn -3.6787 +Average/MinReturn -64.0722 +Average/NumEpisodes 100 +Average/StdReturn 9.63284 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.751978 +TotalEnvSteps 2.54e+06 +__unnamed_task__/AverageDiscountedReturn -13.6505 +__unnamed_task__/AverageReturn -21.536 +__unnamed_task__/Iteration 253 +__unnamed_task__/MaxReturn -3.6787 +__unnamed_task__/MinReturn -64.0722 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.63284 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.85804 +policy/KL 0.0111008 +policy/KLBefore 0 +policy/LossAfter -0.0228841 +policy/LossBefore 0.00236413 +policy/dLoss 0.0252482 +---------------------------------------- ------------ +2025-04-03 03:20:43 | [rl2_trainer] epoch #254 | Optimizing policy... +2025-04-03 03:20:44 | [rl2_trainer] epoch #254 | Fitting baseline... +2025-04-03 03:20:44 | [rl2_trainer] epoch #254 | Computing loss before +2025-04-03 03:20:44 | [rl2_trainer] epoch #254 | Computing KL before +2025-04-03 03:20:45 | [rl2_trainer] epoch #254 | Optimizing +2025-04-03 03:21:21 | [rl2_trainer] epoch #254 | Computing KL after +2025-04-03 03:21:22 | [rl2_trainer] epoch #254 | Computing loss after +2025-04-03 03:21:23 | [rl2_trainer] epoch #254 | Saving snapshot... +2025-04-03 03:21:23 | [rl2_trainer] epoch #254 | Saved +2025-04-03 03:21:23 | [rl2_trainer] epoch #254 | Time 48564.11 s +2025-04-03 03:21:23 | [rl2_trainer] epoch #254 | EpochTime 184.48 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.5607 +Average/AverageReturn -17.6463 +Average/Iteration 254 +Average/MaxReturn 14.8977 +Average/MinReturn -29.7429 +Average/NumEpisodes 100 +Average/StdReturn 6.95911 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.598677 +TotalEnvSteps 2.55e+06 +__unnamed_task__/AverageDiscountedReturn -11.5607 +__unnamed_task__/AverageReturn -17.6463 +__unnamed_task__/Iteration 254 +__unnamed_task__/MaxReturn 14.8977 +__unnamed_task__/MinReturn -29.7429 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.95911 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.82674 +policy/KL 0.0121244 +policy/KLBefore 0 +policy/LossAfter -0.0185977 +policy/LossBefore -0.00090878 +policy/dLoss 0.0176889 +---------------------------------------- ------------ +2025-04-03 03:22:53 | [rl2_trainer] epoch #255 | Optimizing policy... +2025-04-03 03:22:53 | [rl2_trainer] epoch #255 | Fitting baseline... +2025-04-03 03:22:53 | [rl2_trainer] epoch #255 | Computing loss before +2025-04-03 03:22:54 | [rl2_trainer] epoch #255 | Computing KL before +2025-04-03 03:22:55 | [rl2_trainer] epoch #255 | Optimizing +2025-04-03 03:23:31 | [rl2_trainer] epoch #255 | Computing KL after +2025-04-03 03:23:31 | [rl2_trainer] epoch #255 | Computing loss after +2025-04-03 03:23:32 | [rl2_trainer] epoch #255 | Saving snapshot... +2025-04-03 03:23:32 | [rl2_trainer] epoch #255 | Saved +2025-04-03 03:23:32 | [rl2_trainer] epoch #255 | Time 48693.20 s +2025-04-03 03:23:32 | [rl2_trainer] epoch #255 | EpochTime 129.08 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.9613 +Average/AverageReturn -19.8833 +Average/Iteration 255 +Average/MaxReturn -2.90807 +Average/MinReturn -94.1303 +Average/NumEpisodes 100 +Average/StdReturn 8.72356 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.465477 +TotalEnvSteps 2.56e+06 +__unnamed_task__/AverageDiscountedReturn -12.9613 +__unnamed_task__/AverageReturn -19.8833 +__unnamed_task__/Iteration 255 +__unnamed_task__/MaxReturn -2.90807 +__unnamed_task__/MinReturn -94.1303 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.72356 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.78645 +policy/KL 0.0129118 +policy/KLBefore 0 +policy/LossAfter -0.016143 +policy/LossBefore 0.00316049 +policy/dLoss 0.0193034 +---------------------------------------- ------------ +2025-04-03 03:24:59 | [rl2_trainer] epoch #256 | Optimizing policy... +2025-04-03 03:25:00 | [rl2_trainer] epoch #256 | Fitting baseline... +2025-04-03 03:25:00 | [rl2_trainer] epoch #256 | Computing loss before +2025-04-03 03:25:00 | [rl2_trainer] epoch #256 | Computing KL before +2025-04-03 03:25:01 | [rl2_trainer] epoch #256 | Optimizing +2025-04-03 03:25:36 | [rl2_trainer] epoch #256 | Computing KL after +2025-04-03 03:25:36 | [rl2_trainer] epoch #256 | Computing loss after +2025-04-03 03:25:37 | [rl2_trainer] epoch #256 | Saving snapshot... +2025-04-03 03:25:37 | [rl2_trainer] epoch #256 | Saved +2025-04-03 03:25:37 | [rl2_trainer] epoch #256 | Time 48818.41 s +2025-04-03 03:25:37 | [rl2_trainer] epoch #256 | EpochTime 125.22 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.6839 +Average/AverageReturn -19.413 +Average/Iteration 256 +Average/MaxReturn -13.7399 +Average/MinReturn -28.3826 +Average/NumEpisodes 100 +Average/StdReturn 3.22095 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.421732 +TotalEnvSteps 2.57e+06 +__unnamed_task__/AverageDiscountedReturn -12.6839 +__unnamed_task__/AverageReturn -19.413 +__unnamed_task__/Iteration 256 +__unnamed_task__/MaxReturn -13.7399 +__unnamed_task__/MinReturn -28.3826 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 3.22095 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.76297 +policy/KL 0.0129975 +policy/KLBefore 0 +policy/LossAfter 0.00126531 +policy/LossBefore 0.00171387 +policy/dLoss 0.000448557 +---------------------------------------- ------------- +2025-04-03 03:28:36 | [rl2_trainer] epoch #257 | Optimizing policy... +2025-04-03 03:28:36 | [rl2_trainer] epoch #257 | Fitting baseline... +2025-04-03 03:28:36 | [rl2_trainer] epoch #257 | Computing loss before +2025-04-03 03:28:37 | [rl2_trainer] epoch #257 | Computing KL before +2025-04-03 03:28:38 | [rl2_trainer] epoch #257 | Optimizing +2025-04-03 03:29:11 | [rl2_trainer] epoch #257 | Computing KL after +2025-04-03 03:29:12 | [rl2_trainer] epoch #257 | Computing loss after +2025-04-03 03:29:13 | [rl2_trainer] epoch #257 | Saving snapshot... +2025-04-03 03:29:13 | [rl2_trainer] epoch #257 | Saved +2025-04-03 03:29:13 | [rl2_trainer] epoch #257 | Time 49034.03 s +2025-04-03 03:29:13 | [rl2_trainer] epoch #257 | EpochTime 215.62 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.3316 +Average/AverageReturn -18.5344 +Average/Iteration 257 +Average/MaxReturn -2.69614 +Average/MinReturn -35.0961 +Average/NumEpisodes 100 +Average/StdReturn 5.33609 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.384719 +TotalEnvSteps 2.58e+06 +__unnamed_task__/AverageDiscountedReturn -12.3316 +__unnamed_task__/AverageReturn -18.5344 +__unnamed_task__/Iteration 257 +__unnamed_task__/MaxReturn -2.69614 +__unnamed_task__/MinReturn -35.0961 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.33609 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.74625 +policy/KL 0.00979021 +policy/KLBefore 0 +policy/LossAfter -0.0234731 +policy/LossBefore -0.00742402 +policy/dLoss 0.0160491 +---------------------------------------- ------------ +2025-04-03 03:31:33 | [rl2_trainer] epoch #258 | Optimizing policy... +2025-04-03 03:31:33 | [rl2_trainer] epoch #258 | Fitting baseline... +2025-04-03 03:31:33 | [rl2_trainer] epoch #258 | Computing loss before +2025-04-03 03:31:34 | [rl2_trainer] epoch #258 | Computing KL before +2025-04-03 03:31:34 | [rl2_trainer] epoch #258 | Optimizing +2025-04-03 03:32:11 | [rl2_trainer] epoch #258 | Computing KL after +2025-04-03 03:32:11 | [rl2_trainer] epoch #258 | Computing loss after +2025-04-03 03:32:12 | [rl2_trainer] epoch #258 | Saving snapshot... +2025-04-03 03:32:12 | [rl2_trainer] epoch #258 | Saved +2025-04-03 03:32:12 | [rl2_trainer] epoch #258 | Time 49213.16 s +2025-04-03 03:32:12 | [rl2_trainer] epoch #258 | EpochTime 179.13 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.9529 +Average/AverageReturn -18.1416 +Average/Iteration 258 +Average/MaxReturn 3.88601 +Average/MinReturn -33.1772 +Average/NumEpisodes 100 +Average/StdReturn 6.58767 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.53886 +TotalEnvSteps 2.59e+06 +__unnamed_task__/AverageDiscountedReturn -11.9529 +__unnamed_task__/AverageReturn -18.1416 +__unnamed_task__/Iteration 258 +__unnamed_task__/MaxReturn 3.88601 +__unnamed_task__/MinReturn -33.1772 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.58767 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.71519 +policy/KL 0.0128684 +policy/KLBefore 0 +policy/LossAfter -0.0209785 +policy/LossBefore -0.00715002 +policy/dLoss 0.0138285 +---------------------------------------- ------------ +2025-04-03 03:34:20 | [rl2_trainer] epoch #259 | Optimizing policy... +2025-04-03 03:34:21 | [rl2_trainer] epoch #259 | Fitting baseline... +2025-04-03 03:34:21 | [rl2_trainer] epoch #259 | Computing loss before +2025-04-03 03:34:21 | [rl2_trainer] epoch #259 | Computing KL before +2025-04-03 03:34:22 | [rl2_trainer] epoch #259 | Optimizing +2025-04-03 03:34:57 | [rl2_trainer] epoch #259 | Computing KL after +2025-04-03 03:34:58 | [rl2_trainer] epoch #259 | Computing loss after +2025-04-03 03:34:59 | [rl2_trainer] epoch #259 | Saving snapshot... +2025-04-03 03:34:59 | [rl2_trainer] epoch #259 | Saved +2025-04-03 03:34:59 | [rl2_trainer] epoch #259 | Time 49379.69 s +2025-04-03 03:34:59 | [rl2_trainer] epoch #259 | EpochTime 166.52 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.9732 +Average/AverageReturn -21.1771 +Average/Iteration 259 +Average/MaxReturn -2.79886 +Average/MinReturn -119.273 +Average/NumEpisodes 100 +Average/StdReturn 14.2922 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.359538 +TotalEnvSteps 2.6e+06 +__unnamed_task__/AverageDiscountedReturn -13.9732 +__unnamed_task__/AverageReturn -21.1771 +__unnamed_task__/Iteration 259 +__unnamed_task__/MaxReturn -2.79886 +__unnamed_task__/MinReturn -119.273 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 14.2922 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.68398 +policy/KL 0.0271259 +policy/KLBefore 0 +policy/LossAfter -0.00717272 +policy/LossBefore 0.0109804 +policy/dLoss 0.0181531 +---------------------------------------- ------------- +2025-04-03 03:36:57 | [rl2_trainer] epoch #260 | Optimizing policy... +2025-04-03 03:36:57 | [rl2_trainer] epoch #260 | Fitting baseline... +2025-04-03 03:36:57 | [rl2_trainer] epoch #260 | Computing loss before +2025-04-03 03:36:57 | [rl2_trainer] epoch #260 | Computing KL before +2025-04-03 03:36:58 | [rl2_trainer] epoch #260 | Optimizing +2025-04-03 03:37:34 | [rl2_trainer] epoch #260 | Computing KL after +2025-04-03 03:37:34 | [rl2_trainer] epoch #260 | Computing loss after +2025-04-03 03:37:35 | [rl2_trainer] epoch #260 | Saving snapshot... +2025-04-03 03:37:35 | [rl2_trainer] epoch #260 | Saved +2025-04-03 03:37:35 | [rl2_trainer] epoch #260 | Time 49536.25 s +2025-04-03 03:37:35 | [rl2_trainer] epoch #260 | EpochTime 156.56 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.6734 +Average/AverageReturn -21.0327 +Average/Iteration 260 +Average/MaxReturn -6.59652 +Average/MinReturn -36.6566 +Average/NumEpisodes 100 +Average/StdReturn 4.99818 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.516189 +TotalEnvSteps 2.61e+06 +__unnamed_task__/AverageDiscountedReturn -13.6734 +__unnamed_task__/AverageReturn -21.0327 +__unnamed_task__/Iteration 260 +__unnamed_task__/MaxReturn -6.59652 +__unnamed_task__/MinReturn -36.6566 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.99818 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.66645 +policy/KL 0.0147851 +policy/KLBefore 0 +policy/LossAfter -0.00802927 +policy/LossBefore -0.00474507 +policy/dLoss 0.0032842 +---------------------------------------- ------------ +2025-04-03 03:40:33 | [rl2_trainer] epoch #261 | Optimizing policy... +2025-04-03 03:40:34 | [rl2_trainer] epoch #261 | Fitting baseline... +2025-04-03 03:40:34 | [rl2_trainer] epoch #261 | Computing loss before +2025-04-03 03:40:34 | [rl2_trainer] epoch #261 | Computing KL before +2025-04-03 03:40:35 | [rl2_trainer] epoch #261 | Optimizing +2025-04-03 03:41:10 | [rl2_trainer] epoch #261 | Computing KL after +2025-04-03 03:41:11 | [rl2_trainer] epoch #261 | Computing loss after +2025-04-03 03:41:11 | [rl2_trainer] epoch #261 | Saving snapshot... +2025-04-03 03:41:11 | [rl2_trainer] epoch #261 | Saved +2025-04-03 03:41:11 | [rl2_trainer] epoch #261 | Time 49752.48 s +2025-04-03 03:41:11 | [rl2_trainer] epoch #261 | EpochTime 216.23 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.4314 +Average/AverageReturn -19.1498 +Average/Iteration 261 +Average/MaxReturn 0.465381 +Average/MinReturn -31.6092 +Average/NumEpisodes 100 +Average/StdReturn 5.51715 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.594719 +TotalEnvSteps 2.62e+06 +__unnamed_task__/AverageDiscountedReturn -12.4314 +__unnamed_task__/AverageReturn -19.1498 +__unnamed_task__/Iteration 261 +__unnamed_task__/MaxReturn 0.465381 +__unnamed_task__/MinReturn -31.6092 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.51715 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.64028 +policy/KL 0.0100263 +policy/KLBefore 0 +policy/LossAfter -0.0127625 +policy/LossBefore -0.000951683 +policy/dLoss 0.0118108 +---------------------------------------- ------------- +2025-04-03 03:43:20 | [rl2_trainer] epoch #262 | Optimizing policy... +2025-04-03 03:43:21 | [rl2_trainer] epoch #262 | Fitting baseline... +2025-04-03 03:43:21 | [rl2_trainer] epoch #262 | Computing loss before +2025-04-03 03:43:21 | [rl2_trainer] epoch #262 | Computing KL before +2025-04-03 03:43:22 | [rl2_trainer] epoch #262 | Optimizing +2025-04-03 03:43:58 | [rl2_trainer] epoch #262 | Computing KL after +2025-04-03 03:43:58 | [rl2_trainer] epoch #262 | Computing loss after +2025-04-03 03:43:59 | [rl2_trainer] epoch #262 | Saving snapshot... +2025-04-03 03:43:59 | [rl2_trainer] epoch #262 | Saved +2025-04-03 03:43:59 | [rl2_trainer] epoch #262 | Time 49920.07 s +2025-04-03 03:43:59 | [rl2_trainer] epoch #262 | EpochTime 167.59 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.1129 +Average/AverageReturn -19.8671 +Average/Iteration 262 +Average/MaxReturn 5.0481 +Average/MinReturn -40.4285 +Average/NumEpisodes 100 +Average/StdReturn 5.348 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.418325 +TotalEnvSteps 2.63e+06 +__unnamed_task__/AverageDiscountedReturn -13.1129 +__unnamed_task__/AverageReturn -19.8671 +__unnamed_task__/Iteration 262 +__unnamed_task__/MaxReturn 5.0481 +__unnamed_task__/MinReturn -40.4285 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.348 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.61845 +policy/KL 0.0104906 +policy/KLBefore 0 +policy/LossAfter -0.0218929 +policy/LossBefore -0.00686109 +policy/dLoss 0.0150318 +---------------------------------------- ------------ +2025-04-03 03:46:16 | [rl2_trainer] epoch #263 | Optimizing policy... +2025-04-03 03:46:16 | [rl2_trainer] epoch #263 | Fitting baseline... +2025-04-03 03:46:16 | [rl2_trainer] epoch #263 | Computing loss before +2025-04-03 03:46:17 | [rl2_trainer] epoch #263 | Computing KL before +2025-04-03 03:46:18 | [rl2_trainer] epoch #263 | Optimizing +2025-04-03 03:46:54 | [rl2_trainer] epoch #263 | Computing KL after +2025-04-03 03:46:54 | [rl2_trainer] epoch #263 | Computing loss after +2025-04-03 03:46:55 | [rl2_trainer] epoch #263 | Saving snapshot... +2025-04-03 03:46:55 | [rl2_trainer] epoch #263 | Saved +2025-04-03 03:46:55 | [rl2_trainer] epoch #263 | Time 50096.40 s +2025-04-03 03:46:55 | [rl2_trainer] epoch #263 | EpochTime 176.33 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.1652 +Average/AverageReturn -19.0865 +Average/Iteration 263 +Average/MaxReturn 0.720777 +Average/MinReturn -30.7934 +Average/NumEpisodes 100 +Average/StdReturn 5.7018 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.569617 +TotalEnvSteps 2.64e+06 +__unnamed_task__/AverageDiscountedReturn -12.1652 +__unnamed_task__/AverageReturn -19.0865 +__unnamed_task__/Iteration 263 +__unnamed_task__/MaxReturn 0.720777 +__unnamed_task__/MinReturn -30.7934 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.7018 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.59136 +policy/KL 0.00807992 +policy/KLBefore 0 +policy/LossAfter -0.0095732 +policy/LossBefore 0.00153416 +policy/dLoss 0.0111074 +---------------------------------------- ------------ +2025-04-03 03:48:23 | [rl2_trainer] epoch #264 | Optimizing policy... +2025-04-03 03:48:23 | [rl2_trainer] epoch #264 | Fitting baseline... +2025-04-03 03:48:23 | [rl2_trainer] epoch #264 | Computing loss before +2025-04-03 03:48:24 | [rl2_trainer] epoch #264 | Computing KL before +2025-04-03 03:48:24 | [rl2_trainer] epoch #264 | Optimizing +2025-04-03 03:49:00 | [rl2_trainer] epoch #264 | Computing KL after +2025-04-03 03:49:01 | [rl2_trainer] epoch #264 | Computing loss after +2025-04-03 03:49:02 | [rl2_trainer] epoch #264 | Saving snapshot... +2025-04-03 03:49:02 | [rl2_trainer] epoch #264 | Saved +2025-04-03 03:49:02 | [rl2_trainer] epoch #264 | Time 50222.94 s +2025-04-03 03:49:02 | [rl2_trainer] epoch #264 | EpochTime 126.54 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.9114 +Average/AverageReturn -18.425 +Average/Iteration 264 +Average/MaxReturn 6.0458 +Average/MinReturn -34.0431 +Average/NumEpisodes 100 +Average/StdReturn 5.86043 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.257333 +TotalEnvSteps 2.65e+06 +__unnamed_task__/AverageDiscountedReturn -11.9114 +__unnamed_task__/AverageReturn -18.425 +__unnamed_task__/Iteration 264 +__unnamed_task__/MaxReturn 6.0458 +__unnamed_task__/MinReturn -34.0431 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.86043 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.5577 +policy/KL 0.0118539 +policy/KLBefore 0 +policy/LossAfter -0.0102941 +policy/LossBefore 0.00373122 +policy/dLoss 0.0140254 +---------------------------------------- ------------ +2025-04-03 03:51:25 | [rl2_trainer] epoch #265 | Optimizing policy... +2025-04-03 03:51:25 | [rl2_trainer] epoch #265 | Fitting baseline... +2025-04-03 03:51:25 | [rl2_trainer] epoch #265 | Computing loss before +2025-04-03 03:51:26 | [rl2_trainer] epoch #265 | Computing KL before +2025-04-03 03:51:27 | [rl2_trainer] epoch #265 | Optimizing +2025-04-03 03:52:02 | [rl2_trainer] epoch #265 | Computing KL after +2025-04-03 03:52:02 | [rl2_trainer] epoch #265 | Computing loss after +2025-04-03 03:52:03 | [rl2_trainer] epoch #265 | Saving snapshot... +2025-04-03 03:52:03 | [rl2_trainer] epoch #265 | Saved +2025-04-03 03:52:03 | [rl2_trainer] epoch #265 | Time 50404.24 s +2025-04-03 03:52:03 | [rl2_trainer] epoch #265 | EpochTime 181.30 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.4857 +Average/AverageReturn -17.7321 +Average/Iteration 265 +Average/MaxReturn -0.499633 +Average/MinReturn -28.8048 +Average/NumEpisodes 100 +Average/StdReturn 5.48882 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.598993 +TotalEnvSteps 2.66e+06 +__unnamed_task__/AverageDiscountedReturn -11.4857 +__unnamed_task__/AverageReturn -17.7321 +__unnamed_task__/Iteration 265 +__unnamed_task__/MaxReturn -0.499633 +__unnamed_task__/MinReturn -28.8048 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.48882 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.54223 +policy/KL 0.00899946 +policy/KLBefore 0 +policy/LossAfter -0.021668 +policy/LossBefore -0.00612284 +policy/dLoss 0.0155452 +---------------------------------------- ------------ +2025-04-03 03:55:58 | [rl2_trainer] epoch #266 | Optimizing policy... +2025-04-03 03:55:58 | [rl2_trainer] epoch #266 | Fitting baseline... +2025-04-03 03:55:58 | [rl2_trainer] epoch #266 | Computing loss before +2025-04-03 03:55:59 | [rl2_trainer] epoch #266 | Computing KL before +2025-04-03 03:55:59 | [rl2_trainer] epoch #266 | Optimizing +2025-04-03 03:56:35 | [rl2_trainer] epoch #266 | Computing KL after +2025-04-03 03:56:35 | [rl2_trainer] epoch #266 | Computing loss after +2025-04-03 03:56:36 | [rl2_trainer] epoch #266 | Saving snapshot... +2025-04-03 03:56:36 | [rl2_trainer] epoch #266 | Saved +2025-04-03 03:56:36 | [rl2_trainer] epoch #266 | Time 50677.22 s +2025-04-03 03:56:36 | [rl2_trainer] epoch #266 | EpochTime 272.98 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.9866 +Average/AverageReturn -26.8433 +Average/Iteration 266 +Average/MaxReturn -11.8367 +Average/MinReturn -55.0437 +Average/NumEpisodes 100 +Average/StdReturn 8.73391 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.816 +TotalEnvSteps 2.67e+06 +__unnamed_task__/AverageDiscountedReturn -16.9866 +__unnamed_task__/AverageReturn -26.8433 +__unnamed_task__/Iteration 266 +__unnamed_task__/MaxReturn -11.8367 +__unnamed_task__/MinReturn -55.0437 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.73391 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.54214 +policy/KL 0.0112884 +policy/KLBefore 0 +policy/LossAfter -0.0325854 +policy/LossBefore -0.00650079 +policy/dLoss 0.0260847 +---------------------------------------- ------------ +2025-04-03 03:58:17 | [rl2_trainer] epoch #267 | Optimizing policy... +2025-04-03 03:58:17 | [rl2_trainer] epoch #267 | Fitting baseline... +2025-04-03 03:58:17 | [rl2_trainer] epoch #267 | Computing loss before +2025-04-03 03:58:18 | [rl2_trainer] epoch #267 | Computing KL before +2025-04-03 03:58:18 | [rl2_trainer] epoch #267 | Optimizing +2025-04-03 03:58:53 | [rl2_trainer] epoch #267 | Computing KL after +2025-04-03 03:58:54 | [rl2_trainer] epoch #267 | Computing loss after +2025-04-03 03:58:55 | [rl2_trainer] epoch #267 | Saving snapshot... +2025-04-03 03:58:55 | [rl2_trainer] epoch #267 | Saved +2025-04-03 03:58:55 | [rl2_trainer] epoch #267 | Time 50816.13 s +2025-04-03 03:58:55 | [rl2_trainer] epoch #267 | EpochTime 138.91 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.4992 +Average/AverageReturn -19.1796 +Average/Iteration 267 +Average/MaxReturn -0.942722 +Average/MinReturn -29.6172 +Average/NumEpisodes 100 +Average/StdReturn 4.05971 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.461193 +TotalEnvSteps 2.68e+06 +__unnamed_task__/AverageDiscountedReturn -12.4992 +__unnamed_task__/AverageReturn -19.1796 +__unnamed_task__/Iteration 267 +__unnamed_task__/MaxReturn -0.942722 +__unnamed_task__/MinReturn -29.6172 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.05971 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.53205 +policy/KL 0.00960769 +policy/KLBefore 0 +policy/LossAfter -0.0169734 +policy/LossBefore -0.00442086 +policy/dLoss 0.0125526 +---------------------------------------- ------------ +2025-04-03 04:01:03 | [rl2_trainer] epoch #268 | Optimizing policy... +2025-04-03 04:01:03 | [rl2_trainer] epoch #268 | Fitting baseline... +2025-04-03 04:01:03 | [rl2_trainer] epoch #268 | Computing loss before +2025-04-03 04:01:04 | [rl2_trainer] epoch #268 | Computing KL before +2025-04-03 04:01:04 | [rl2_trainer] epoch #268 | Optimizing +2025-04-03 04:01:39 | [rl2_trainer] epoch #268 | Computing KL after +2025-04-03 04:01:39 | [rl2_trainer] epoch #268 | Computing loss after +2025-04-03 04:01:40 | [rl2_trainer] epoch #268 | Saving snapshot... +2025-04-03 04:01:40 | [rl2_trainer] epoch #268 | Saved +2025-04-03 04:01:40 | [rl2_trainer] epoch #268 | Time 50981.27 s +2025-04-03 04:01:40 | [rl2_trainer] epoch #268 | EpochTime 165.15 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.5459 +Average/AverageReturn -19.1244 +Average/Iteration 268 +Average/MaxReturn 7.50896 +Average/MinReturn -31.9457 +Average/NumEpisodes 100 +Average/StdReturn 5.7237 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.681404 +TotalEnvSteps 2.69e+06 +__unnamed_task__/AverageDiscountedReturn -12.5459 +__unnamed_task__/AverageReturn -19.1244 +__unnamed_task__/Iteration 268 +__unnamed_task__/MaxReturn 7.50896 +__unnamed_task__/MinReturn -31.9457 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.7237 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.51639 +policy/KL 0.00733659 +policy/KLBefore 0 +policy/LossAfter -0.0220934 +policy/LossBefore -0.00955774 +policy/dLoss 0.0125356 +---------------------------------------- ------------ +2025-04-03 04:03:40 | [rl2_trainer] epoch #269 | Optimizing policy... +2025-04-03 04:03:41 | [rl2_trainer] epoch #269 | Fitting baseline... +2025-04-03 04:03:41 | [rl2_trainer] epoch #269 | Computing loss before +2025-04-03 04:03:41 | [rl2_trainer] epoch #269 | Computing KL before +2025-04-03 04:03:42 | [rl2_trainer] epoch #269 | Optimizing +2025-04-03 04:04:14 | [rl2_trainer] epoch #269 | Computing KL after +2025-04-03 04:04:14 | [rl2_trainer] epoch #269 | Computing loss after +2025-04-03 04:04:15 | [rl2_trainer] epoch #269 | Saving snapshot... +2025-04-03 04:04:15 | [rl2_trainer] epoch #269 | Saved +2025-04-03 04:04:15 | [rl2_trainer] epoch #269 | Time 51136.24 s +2025-04-03 04:04:15 | [rl2_trainer] epoch #269 | EpochTime 154.96 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.6863 +Average/AverageReturn -21.0807 +Average/Iteration 269 +Average/MaxReturn -8.80672 +Average/MinReturn -34.3342 +Average/NumEpisodes 100 +Average/StdReturn 4.63992 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.544426 +TotalEnvSteps 2.7e+06 +__unnamed_task__/AverageDiscountedReturn -13.6863 +__unnamed_task__/AverageReturn -21.0807 +__unnamed_task__/Iteration 269 +__unnamed_task__/MaxReturn -8.80672 +__unnamed_task__/MinReturn -34.3342 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.63992 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.50266 +policy/KL 0.00821178 +policy/KLBefore 0 +policy/LossAfter -0.0120117 +policy/LossBefore -0.00326091 +policy/dLoss 0.00875075 +---------------------------------------- ------------ +2025-04-03 04:07:15 | [rl2_trainer] epoch #270 | Optimizing policy... +2025-04-03 04:07:16 | [rl2_trainer] epoch #270 | Fitting baseline... +2025-04-03 04:07:16 | [rl2_trainer] epoch #270 | Computing loss before +2025-04-03 04:07:16 | [rl2_trainer] epoch #270 | Computing KL before +2025-04-03 04:07:17 | [rl2_trainer] epoch #270 | Optimizing +2025-04-03 04:07:52 | [rl2_trainer] epoch #270 | Computing KL after +2025-04-03 04:07:52 | [rl2_trainer] epoch #270 | Computing loss after +2025-04-03 04:07:53 | [rl2_trainer] epoch #270 | Saving snapshot... +2025-04-03 04:07:53 | [rl2_trainer] epoch #270 | Saved +2025-04-03 04:07:53 | [rl2_trainer] epoch #270 | Time 51354.34 s +2025-04-03 04:07:53 | [rl2_trainer] epoch #270 | EpochTime 218.10 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.3752 +Average/AverageReturn -18.7877 +Average/Iteration 270 +Average/MaxReturn 15.8361 +Average/MinReturn -45.1832 +Average/NumEpisodes 100 +Average/StdReturn 7.60137 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.7394 +TotalEnvSteps 2.71e+06 +__unnamed_task__/AverageDiscountedReturn -12.3752 +__unnamed_task__/AverageReturn -18.7877 +__unnamed_task__/Iteration 270 +__unnamed_task__/MaxReturn 15.8361 +__unnamed_task__/MinReturn -45.1832 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.60137 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.4719 +policy/KL 0.0092911 +policy/KLBefore 0 +policy/LossAfter -0.0158536 +policy/LossBefore 0.000831949 +policy/dLoss 0.0166856 +---------------------------------------- ------------- +2025-04-03 04:09:19 | [rl2_trainer] epoch #271 | Optimizing policy... +2025-04-03 04:09:19 | [rl2_trainer] epoch #271 | Fitting baseline... +2025-04-03 04:09:19 | [rl2_trainer] epoch #271 | Computing loss before +2025-04-03 04:09:20 | [rl2_trainer] epoch #271 | Computing KL before +2025-04-03 04:09:20 | [rl2_trainer] epoch #271 | Optimizing +2025-04-03 04:09:56 | [rl2_trainer] epoch #271 | Computing KL after +2025-04-03 04:09:57 | [rl2_trainer] epoch #271 | Computing loss after +2025-04-03 04:09:58 | [rl2_trainer] epoch #271 | Saving snapshot... +2025-04-03 04:09:58 | [rl2_trainer] epoch #271 | Saved +2025-04-03 04:09:58 | [rl2_trainer] epoch #271 | Time 51478.86 s +2025-04-03 04:09:58 | [rl2_trainer] epoch #271 | EpochTime 124.52 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.9753 +Average/AverageReturn -20.0089 +Average/Iteration 271 +Average/MaxReturn -3.90282 +Average/MinReturn -100.532 +Average/NumEpisodes 100 +Average/StdReturn 9.77208 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.524567 +TotalEnvSteps 2.72e+06 +__unnamed_task__/AverageDiscountedReturn -12.9753 +__unnamed_task__/AverageReturn -20.0089 +__unnamed_task__/Iteration 271 +__unnamed_task__/MaxReturn -3.90282 +__unnamed_task__/MinReturn -100.532 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.77208 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.43838 +policy/KL 0.0121416 +policy/KLBefore 0 +policy/LossAfter -0.0266582 +policy/LossBefore 0.00130305 +policy/dLoss 0.0279613 +---------------------------------------- ------------- +2025-04-03 04:12:56 | [rl2_trainer] epoch #272 | Optimizing policy... +2025-04-03 04:12:56 | [rl2_trainer] epoch #272 | Fitting baseline... +2025-04-03 04:12:56 | [rl2_trainer] epoch #272 | Computing loss before +2025-04-03 04:12:57 | [rl2_trainer] epoch #272 | Computing KL before +2025-04-03 04:12:57 | [rl2_trainer] epoch #272 | Optimizing +2025-04-03 04:13:34 | [rl2_trainer] epoch #272 | Computing KL after +2025-04-03 04:13:34 | [rl2_trainer] epoch #272 | Computing loss after +2025-04-03 04:13:35 | [rl2_trainer] epoch #272 | Saving snapshot... +2025-04-03 04:13:35 | [rl2_trainer] epoch #272 | Saved +2025-04-03 04:13:35 | [rl2_trainer] epoch #272 | Time 51696.28 s +2025-04-03 04:13:35 | [rl2_trainer] epoch #272 | EpochTime 217.41 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.0814 +Average/AverageReturn -18.1382 +Average/Iteration 272 +Average/MaxReturn 0.0894818 +Average/MinReturn -32.0966 +Average/NumEpisodes 100 +Average/StdReturn 5.3646 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.626535 +TotalEnvSteps 2.73e+06 +__unnamed_task__/AverageDiscountedReturn -12.0814 +__unnamed_task__/AverageReturn -18.1382 +__unnamed_task__/Iteration 272 +__unnamed_task__/MaxReturn 0.0894818 +__unnamed_task__/MinReturn -32.0966 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.3646 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.42318 +policy/KL 0.00921083 +policy/KLBefore 0 +policy/LossAfter -0.0127565 +policy/LossBefore -0.00170573 +policy/dLoss 0.0110508 +---------------------------------------- ------------ +2025-04-03 04:15:31 | [rl2_trainer] epoch #273 | Optimizing policy... +2025-04-03 04:15:32 | [rl2_trainer] epoch #273 | Fitting baseline... +2025-04-03 04:15:32 | [rl2_trainer] epoch #273 | Computing loss before +2025-04-03 04:15:32 | [rl2_trainer] epoch #273 | Computing KL before +2025-04-03 04:15:33 | [rl2_trainer] epoch #273 | Optimizing +2025-04-03 04:16:08 | [rl2_trainer] epoch #273 | Computing KL after +2025-04-03 04:16:08 | [rl2_trainer] epoch #273 | Computing loss after +2025-04-03 04:16:09 | [rl2_trainer] epoch #273 | Saving snapshot... +2025-04-03 04:16:09 | [rl2_trainer] epoch #273 | Saved +2025-04-03 04:16:09 | [rl2_trainer] epoch #273 | Time 51850.07 s +2025-04-03 04:16:09 | [rl2_trainer] epoch #273 | EpochTime 153.79 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.2165 +Average/AverageReturn -18.7864 +Average/Iteration 273 +Average/MaxReturn 3.0222 +Average/MinReturn -31.6089 +Average/NumEpisodes 100 +Average/StdReturn 4.88864 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.627209 +TotalEnvSteps 2.74e+06 +__unnamed_task__/AverageDiscountedReturn -12.2165 +__unnamed_task__/AverageReturn -18.7864 +__unnamed_task__/Iteration 273 +__unnamed_task__/MaxReturn 3.0222 +__unnamed_task__/MinReturn -31.6089 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.88864 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.40757 +policy/KL 0.0110818 +policy/KLBefore 0 +policy/LossAfter -0.0148538 +policy/LossBefore -0.00363322 +policy/dLoss 0.0112206 +---------------------------------------- ------------ +2025-04-03 04:19:11 | [rl2_trainer] epoch #274 | Optimizing policy... +2025-04-03 04:19:11 | [rl2_trainer] epoch #274 | Fitting baseline... +2025-04-03 04:19:11 | [rl2_trainer] epoch #274 | Computing loss before +2025-04-03 04:19:12 | [rl2_trainer] epoch #274 | Computing KL before +2025-04-03 04:19:12 | [rl2_trainer] epoch #274 | Optimizing +2025-04-03 04:19:48 | [rl2_trainer] epoch #274 | Computing KL after +2025-04-03 04:19:48 | [rl2_trainer] epoch #274 | Computing loss after +2025-04-03 04:19:49 | [rl2_trainer] epoch #274 | Saving snapshot... +2025-04-03 04:19:49 | [rl2_trainer] epoch #274 | Saved +2025-04-03 04:19:49 | [rl2_trainer] epoch #274 | Time 52070.35 s +2025-04-03 04:19:49 | [rl2_trainer] epoch #274 | EpochTime 220.28 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.3778 +Average/AverageReturn -17.4708 +Average/Iteration 274 +Average/MaxReturn -1.57062 +Average/MinReturn -32.6554 +Average/NumEpisodes 100 +Average/StdReturn 4.87793 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.554996 +TotalEnvSteps 2.75e+06 +__unnamed_task__/AverageDiscountedReturn -11.3778 +__unnamed_task__/AverageReturn -17.4708 +__unnamed_task__/Iteration 274 +__unnamed_task__/MaxReturn -1.57062 +__unnamed_task__/MinReturn -32.6554 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.87793 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.38677 +policy/KL 0.00931518 +policy/KLBefore 0 +policy/LossAfter -0.00929668 +policy/LossBefore 0.000562379 +policy/dLoss 0.00985906 +---------------------------------------- ------------- +2025-04-03 04:24:02 | [rl2_trainer] epoch #275 | Optimizing policy... +2025-04-03 04:24:03 | [rl2_trainer] epoch #275 | Fitting baseline... +2025-04-03 04:24:03 | [rl2_trainer] epoch #275 | Computing loss before +2025-04-03 04:24:03 | [rl2_trainer] epoch #275 | Computing KL before +2025-04-03 04:24:04 | [rl2_trainer] epoch #275 | Optimizing +2025-04-03 04:24:40 | [rl2_trainer] epoch #275 | Computing KL after +2025-04-03 04:24:41 | [rl2_trainer] epoch #275 | Computing loss after +2025-04-03 04:24:42 | [rl2_trainer] epoch #275 | Saving snapshot... +2025-04-03 04:24:42 | [rl2_trainer] epoch #275 | Saved +2025-04-03 04:24:42 | [rl2_trainer] epoch #275 | Time 52362.79 s +2025-04-03 04:24:42 | [rl2_trainer] epoch #275 | EpochTime 292.43 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.4485 +Average/AverageReturn -21.2088 +Average/Iteration 275 +Average/MaxReturn 12.5862 +Average/MinReturn -62.0273 +Average/NumEpisodes 100 +Average/StdReturn 12.5919 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.805413 +TotalEnvSteps 2.76e+06 +__unnamed_task__/AverageDiscountedReturn -13.4485 +__unnamed_task__/AverageReturn -21.2088 +__unnamed_task__/Iteration 275 +__unnamed_task__/MaxReturn 12.5862 +__unnamed_task__/MinReturn -62.0273 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.5919 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.36637 +policy/KL 0.0149999 +policy/KLBefore 0 +policy/LossAfter -0.0388677 +policy/LossBefore -0.00674991 +policy/dLoss 0.0321177 +---------------------------------------- ------------ +2025-04-03 04:27:13 | [rl2_trainer] epoch #276 | Optimizing policy... +2025-04-03 04:27:13 | [rl2_trainer] epoch #276 | Fitting baseline... +2025-04-03 04:27:13 | [rl2_trainer] epoch #276 | Computing loss before +2025-04-03 04:27:14 | [rl2_trainer] epoch #276 | Computing KL before +2025-04-03 04:27:14 | [rl2_trainer] epoch #276 | Optimizing +2025-04-03 04:27:50 | [rl2_trainer] epoch #276 | Computing KL after +2025-04-03 04:27:51 | [rl2_trainer] epoch #276 | Computing loss after +2025-04-03 04:27:52 | [rl2_trainer] epoch #276 | Saving snapshot... +2025-04-03 04:27:52 | [rl2_trainer] epoch #276 | Saved +2025-04-03 04:27:52 | [rl2_trainer] epoch #276 | Time 52552.91 s +2025-04-03 04:27:52 | [rl2_trainer] epoch #276 | EpochTime 190.12 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.1489 +Average/AverageReturn -15.612 +Average/Iteration 276 +Average/MaxReturn 5.35715 +Average/MinReturn -28.3817 +Average/NumEpisodes 100 +Average/StdReturn 5.39606 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.403534 +TotalEnvSteps 2.77e+06 +__unnamed_task__/AverageDiscountedReturn -10.1489 +__unnamed_task__/AverageReturn -15.612 +__unnamed_task__/Iteration 276 +__unnamed_task__/MaxReturn 5.35715 +__unnamed_task__/MinReturn -28.3817 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.39606 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.35947 +policy/KL 0.0131636 +policy/KLBefore 0 +policy/LossAfter -0.0219249 +policy/LossBefore -0.00437714 +policy/dLoss 0.0175478 +---------------------------------------- ------------ +2025-04-03 04:32:11 | [rl2_trainer] epoch #277 | Optimizing policy... +2025-04-03 04:32:12 | [rl2_trainer] epoch #277 | Fitting baseline... +2025-04-03 04:32:12 | [rl2_trainer] epoch #277 | Computing loss before +2025-04-03 04:32:12 | [rl2_trainer] epoch #277 | Computing KL before +2025-04-03 04:32:13 | [rl2_trainer] epoch #277 | Optimizing +2025-04-03 04:32:49 | [rl2_trainer] epoch #277 | Computing KL after +2025-04-03 04:32:50 | [rl2_trainer] epoch #277 | Computing loss after +2025-04-03 04:32:51 | [rl2_trainer] epoch #277 | Saving snapshot... +2025-04-03 04:32:51 | [rl2_trainer] epoch #277 | Saved +2025-04-03 04:32:51 | [rl2_trainer] epoch #277 | Time 52851.56 s +2025-04-03 04:32:51 | [rl2_trainer] epoch #277 | EpochTime 298.65 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.6096 +Average/AverageReturn -21.6235 +Average/Iteration 277 +Average/MaxReturn 23.4637 +Average/MinReturn -60.1246 +Average/NumEpisodes 100 +Average/StdReturn 12.3402 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.802978 +TotalEnvSteps 2.78e+06 +__unnamed_task__/AverageDiscountedReturn -13.6096 +__unnamed_task__/AverageReturn -21.6235 +__unnamed_task__/Iteration 277 +__unnamed_task__/MaxReturn 23.4637 +__unnamed_task__/MinReturn -60.1246 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.3402 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.34984 +policy/KL 0.0119892 +policy/KLBefore 0 +policy/LossAfter -0.0538291 +policy/LossBefore -0.00641887 +policy/dLoss 0.0474102 +---------------------------------------- ------------ +2025-04-03 04:35:52 | [rl2_trainer] epoch #278 | Optimizing policy... +2025-04-03 04:35:53 | [rl2_trainer] epoch #278 | Fitting baseline... +2025-04-03 04:35:53 | [rl2_trainer] epoch #278 | Computing loss before +2025-04-03 04:35:53 | [rl2_trainer] epoch #278 | Computing KL before +2025-04-03 04:35:54 | [rl2_trainer] epoch #278 | Optimizing +2025-04-03 04:36:30 | [rl2_trainer] epoch #278 | Computing KL after +2025-04-03 04:36:31 | [rl2_trainer] epoch #278 | Computing loss after +2025-04-03 04:36:32 | [rl2_trainer] epoch #278 | Saving snapshot... +2025-04-03 04:36:32 | [rl2_trainer] epoch #278 | Saved +2025-04-03 04:36:32 | [rl2_trainer] epoch #278 | Time 53072.85 s +2025-04-03 04:36:32 | [rl2_trainer] epoch #278 | EpochTime 221.28 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.8599 +Average/AverageReturn -16.3064 +Average/Iteration 278 +Average/MaxReturn 10.3163 +Average/MinReturn -37.2996 +Average/NumEpisodes 100 +Average/StdReturn 5.69902 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.484039 +TotalEnvSteps 2.79e+06 +__unnamed_task__/AverageDiscountedReturn -10.8599 +__unnamed_task__/AverageReturn -16.3064 +__unnamed_task__/Iteration 278 +__unnamed_task__/MaxReturn 10.3163 +__unnamed_task__/MinReturn -37.2996 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.69902 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.32378 +policy/KL 0.00964138 +policy/KLBefore 0 +policy/LossAfter -0.00627281 +policy/LossBefore 0.0050497 +policy/dLoss 0.0113225 +---------------------------------------- ------------ +2025-04-03 04:38:57 | [rl2_trainer] epoch #279 | Optimizing policy... +2025-04-03 04:38:58 | [rl2_trainer] epoch #279 | Fitting baseline... +2025-04-03 04:38:58 | [rl2_trainer] epoch #279 | Computing loss before +2025-04-03 04:38:58 | [rl2_trainer] epoch #279 | Computing KL before +2025-04-03 04:38:59 | [rl2_trainer] epoch #279 | Optimizing +2025-04-03 04:39:35 | [rl2_trainer] epoch #279 | Computing KL after +2025-04-03 04:39:36 | [rl2_trainer] epoch #279 | Computing loss after +2025-04-03 04:39:36 | [rl2_trainer] epoch #279 | Saving snapshot... +2025-04-03 04:39:36 | [rl2_trainer] epoch #279 | Saved +2025-04-03 04:39:36 | [rl2_trainer] epoch #279 | Time 53257.43 s +2025-04-03 04:39:36 | [rl2_trainer] epoch #279 | EpochTime 184.58 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2397 +Average/AverageReturn -23.8388 +Average/Iteration 279 +Average/MaxReturn 3.35848 +Average/MinReturn -100.473 +Average/NumEpisodes 100 +Average/StdReturn 13.9577 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.774561 +TotalEnvSteps 2.8e+06 +__unnamed_task__/AverageDiscountedReturn -15.2397 +__unnamed_task__/AverageReturn -23.8388 +__unnamed_task__/Iteration 279 +__unnamed_task__/MaxReturn 3.35848 +__unnamed_task__/MinReturn -100.473 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.9577 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.32598 +policy/KL 0.0142345 +policy/KLBefore 0 +policy/LossAfter -0.0465131 +policy/LossBefore 0.00236031 +policy/dLoss 0.0488734 +---------------------------------------- ------------- +2025-04-03 04:42:30 | [rl2_trainer] epoch #280 | Optimizing policy... +2025-04-03 04:42:30 | [rl2_trainer] epoch #280 | Fitting baseline... +2025-04-03 04:42:30 | [rl2_trainer] epoch #280 | Computing loss before +2025-04-03 04:42:31 | [rl2_trainer] epoch #280 | Computing KL before +2025-04-03 04:42:31 | [rl2_trainer] epoch #280 | Optimizing +2025-04-03 04:43:08 | [rl2_trainer] epoch #280 | Computing KL after +2025-04-03 04:43:08 | [rl2_trainer] epoch #280 | Computing loss after +2025-04-03 04:43:09 | [rl2_trainer] epoch #280 | Saving snapshot... +2025-04-03 04:43:09 | [rl2_trainer] epoch #280 | Saved +2025-04-03 04:43:09 | [rl2_trainer] epoch #280 | Time 53470.36 s +2025-04-03 04:43:09 | [rl2_trainer] epoch #280 | EpochTime 212.92 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.6451 +Average/AverageReturn -26.5955 +Average/Iteration 280 +Average/MaxReturn -14.8369 +Average/MinReturn -60.865 +Average/NumEpisodes 100 +Average/StdReturn 7.8333 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.771622 +TotalEnvSteps 2.81e+06 +__unnamed_task__/AverageDiscountedReturn -16.6451 +__unnamed_task__/AverageReturn -26.5955 +__unnamed_task__/Iteration 280 +__unnamed_task__/MaxReturn -14.8369 +__unnamed_task__/MinReturn -60.865 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.8333 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.3342 +policy/KL 0.0124622 +policy/KLBefore 0 +policy/LossAfter -0.025292 +policy/LossBefore 0.00584109 +policy/dLoss 0.0311331 +---------------------------------------- ------------ +2025-04-03 04:45:06 | [rl2_trainer] epoch #281 | Optimizing policy... +2025-04-03 04:45:07 | [rl2_trainer] epoch #281 | Fitting baseline... +2025-04-03 04:45:07 | [rl2_trainer] epoch #281 | Computing loss before +2025-04-03 04:45:07 | [rl2_trainer] epoch #281 | Computing KL before +2025-04-03 04:45:08 | [rl2_trainer] epoch #281 | Optimizing +2025-04-03 04:45:43 | [rl2_trainer] epoch #281 | Computing KL after +2025-04-03 04:45:44 | [rl2_trainer] epoch #281 | Computing loss after +2025-04-03 04:45:45 | [rl2_trainer] epoch #281 | Saving snapshot... +2025-04-03 04:45:45 | [rl2_trainer] epoch #281 | Saved +2025-04-03 04:45:45 | [rl2_trainer] epoch #281 | Time 53625.82 s +2025-04-03 04:45:45 | [rl2_trainer] epoch #281 | EpochTime 155.45 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.2314 +Average/AverageReturn -18.6378 +Average/Iteration 281 +Average/MaxReturn 9.64734 +Average/MinReturn -32.5916 +Average/NumEpisodes 100 +Average/StdReturn 6.0151 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.544703 +TotalEnvSteps 2.82e+06 +__unnamed_task__/AverageDiscountedReturn -12.2314 +__unnamed_task__/AverageReturn -18.6378 +__unnamed_task__/Iteration 281 +__unnamed_task__/MaxReturn 9.64734 +__unnamed_task__/MinReturn -32.5916 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.0151 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.32539 +policy/KL 0.0107281 +policy/KLBefore 0 +policy/LossAfter -0.0216688 +policy/LossBefore -0.00473996 +policy/dLoss 0.0169289 +---------------------------------------- ------------ +2025-04-03 04:48:41 | [rl2_trainer] epoch #282 | Optimizing policy... +2025-04-03 04:48:41 | [rl2_trainer] epoch #282 | Fitting baseline... +2025-04-03 04:48:41 | [rl2_trainer] epoch #282 | Computing loss before +2025-04-03 04:48:42 | [rl2_trainer] epoch #282 | Computing KL before +2025-04-03 04:48:42 | [rl2_trainer] epoch #282 | Optimizing +2025-04-03 04:49:17 | [rl2_trainer] epoch #282 | Computing KL after +2025-04-03 04:49:18 | [rl2_trainer] epoch #282 | Computing loss after +2025-04-03 04:49:19 | [rl2_trainer] epoch #282 | Saving snapshot... +2025-04-03 04:49:19 | [rl2_trainer] epoch #282 | Saved +2025-04-03 04:49:19 | [rl2_trainer] epoch #282 | Time 53839.64 s +2025-04-03 04:49:19 | [rl2_trainer] epoch #282 | EpochTime 213.82 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.2435 +Average/AverageReturn -25.9825 +Average/Iteration 282 +Average/MaxReturn -5.79522 +Average/MinReturn -46.401 +Average/NumEpisodes 100 +Average/StdReturn 8.43216 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.590484 +TotalEnvSteps 2.83e+06 +__unnamed_task__/AverageDiscountedReturn -16.2435 +__unnamed_task__/AverageReturn -25.9825 +__unnamed_task__/Iteration 282 +__unnamed_task__/MaxReturn -5.79522 +__unnamed_task__/MinReturn -46.401 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.43216 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.32833 +policy/KL 0.0118602 +policy/KLBefore 0 +policy/LossAfter -0.0567107 +policy/LossBefore -0.00343568 +policy/dLoss 0.053275 +---------------------------------------- ------------ +2025-04-03 04:51:33 | [rl2_trainer] epoch #283 | Optimizing policy... +2025-04-03 04:51:33 | [rl2_trainer] epoch #283 | Fitting baseline... +2025-04-03 04:51:33 | [rl2_trainer] epoch #283 | Computing loss before +2025-04-03 04:51:33 | [rl2_trainer] epoch #283 | Computing KL before +2025-04-03 04:51:34 | [rl2_trainer] epoch #283 | Optimizing +2025-04-03 04:52:11 | [rl2_trainer] epoch #283 | Computing KL after +2025-04-03 04:52:11 | [rl2_trainer] epoch #283 | Computing loss after +2025-04-03 04:52:12 | [rl2_trainer] epoch #283 | Saving snapshot... +2025-04-03 04:52:12 | [rl2_trainer] epoch #283 | Saved +2025-04-03 04:52:12 | [rl2_trainer] epoch #283 | Time 54013.20 s +2025-04-03 04:52:12 | [rl2_trainer] epoch #283 | EpochTime 173.56 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.2939 +Average/AverageReturn -20.1707 +Average/Iteration 283 +Average/MaxReturn -8.76647 +Average/MinReturn -34.0971 +Average/NumEpisodes 100 +Average/StdReturn 4.73652 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.550013 +TotalEnvSteps 2.84e+06 +__unnamed_task__/AverageDiscountedReturn -13.2939 +__unnamed_task__/AverageReturn -20.1707 +__unnamed_task__/Iteration 283 +__unnamed_task__/MaxReturn -8.76647 +__unnamed_task__/MinReturn -34.0971 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.73652 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.33045 +policy/KL 0.0100129 +policy/KLBefore 0 +policy/LossAfter -0.019563 +policy/LossBefore -0.0063035 +policy/dLoss 0.0132595 +---------------------------------------- ----------- +2025-04-03 04:53:41 | [rl2_trainer] epoch #284 | Optimizing policy... +2025-04-03 04:53:41 | [rl2_trainer] epoch #284 | Fitting baseline... +2025-04-03 04:53:41 | [rl2_trainer] epoch #284 | Computing loss before +2025-04-03 04:53:42 | [rl2_trainer] epoch #284 | Computing KL before +2025-04-03 04:53:42 | [rl2_trainer] epoch #284 | Optimizing +2025-04-03 04:54:18 | [rl2_trainer] epoch #284 | Computing KL after +2025-04-03 04:54:19 | [rl2_trainer] epoch #284 | Computing loss after +2025-04-03 04:54:19 | [rl2_trainer] epoch #284 | Saving snapshot... +2025-04-03 04:54:19 | [rl2_trainer] epoch #284 | Saved +2025-04-03 04:54:19 | [rl2_trainer] epoch #284 | Time 54140.47 s +2025-04-03 04:54:19 | [rl2_trainer] epoch #284 | EpochTime 127.27 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -11.2314 +Average/AverageReturn -17.2004 +Average/Iteration 284 +Average/MaxReturn 20.9645 +Average/MinReturn -52.7551 +Average/NumEpisodes 100 +Average/StdReturn 8.29792 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.526494 +TotalEnvSteps 2.85e+06 +__unnamed_task__/AverageDiscountedReturn -11.2314 +__unnamed_task__/AverageReturn -17.2004 +__unnamed_task__/Iteration 284 +__unnamed_task__/MaxReturn 20.9645 +__unnamed_task__/MinReturn -52.7551 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.29792 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.30814 +policy/KL 0.013195 +policy/KLBefore 0 +policy/LossAfter -0.0428409 +policy/LossBefore -0.0134755 +policy/dLoss 0.0293654 +---------------------------------------- ----------- +2025-04-03 04:58:03 | [rl2_trainer] epoch #285 | Optimizing policy... +2025-04-03 04:58:03 | [rl2_trainer] epoch #285 | Fitting baseline... +2025-04-03 04:58:03 | [rl2_trainer] epoch #285 | Computing loss before +2025-04-03 04:58:04 | [rl2_trainer] epoch #285 | Computing KL before +2025-04-03 04:58:05 | [rl2_trainer] epoch #285 | Optimizing +2025-04-03 04:58:41 | [rl2_trainer] epoch #285 | Computing KL after +2025-04-03 04:58:42 | [rl2_trainer] epoch #285 | Computing loss after +2025-04-03 04:58:42 | [rl2_trainer] epoch #285 | Saving snapshot... +2025-04-03 04:58:42 | [rl2_trainer] epoch #285 | Saved +2025-04-03 04:58:42 | [rl2_trainer] epoch #285 | Time 54403.50 s +2025-04-03 04:58:42 | [rl2_trainer] epoch #285 | EpochTime 263.03 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.1294 +Average/AverageReturn -22.0122 +Average/Iteration 285 +Average/MaxReturn 5.20203 +Average/MinReturn -85.4294 +Average/NumEpisodes 100 +Average/StdReturn 14.0135 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.791212 +TotalEnvSteps 2.86e+06 +__unnamed_task__/AverageDiscountedReturn -14.1294 +__unnamed_task__/AverageReturn -22.0122 +__unnamed_task__/Iteration 285 +__unnamed_task__/MaxReturn 5.20203 +__unnamed_task__/MinReturn -85.4294 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 14.0135 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.26473 +policy/KL 0.0165958 +policy/KLBefore 0 +policy/LossAfter -0.0418866 +policy/LossBefore -0.00366367 +policy/dLoss 0.038223 +---------------------------------------- ------------ +2025-04-03 05:01:41 | [rl2_trainer] epoch #286 | Optimizing policy... +2025-04-03 05:01:41 | [rl2_trainer] epoch #286 | Fitting baseline... +2025-04-03 05:01:41 | [rl2_trainer] epoch #286 | Computing loss before +2025-04-03 05:01:42 | [rl2_trainer] epoch #286 | Computing KL before +2025-04-03 05:01:42 | [rl2_trainer] epoch #286 | Optimizing +2025-04-03 05:02:18 | [rl2_trainer] epoch #286 | Computing KL after +2025-04-03 05:02:19 | [rl2_trainer] epoch #286 | Computing loss after +2025-04-03 05:02:20 | [rl2_trainer] epoch #286 | Saving snapshot... +2025-04-03 05:02:20 | [rl2_trainer] epoch #286 | Saved +2025-04-03 05:02:20 | [rl2_trainer] epoch #286 | Time 54620.55 s +2025-04-03 05:02:20 | [rl2_trainer] epoch #286 | EpochTime 217.05 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.3149 +Average/AverageReturn -16.8428 +Average/Iteration 286 +Average/MaxReturn 5.94491 +Average/MinReturn -43.1214 +Average/NumEpisodes 100 +Average/StdReturn 7.11001 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.478504 +TotalEnvSteps 2.87e+06 +__unnamed_task__/AverageDiscountedReturn -11.3149 +__unnamed_task__/AverageReturn -16.8428 +__unnamed_task__/Iteration 286 +__unnamed_task__/MaxReturn 5.94491 +__unnamed_task__/MinReturn -43.1214 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.11001 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.221 +policy/KL 0.0120421 +policy/KLBefore 0 +policy/LossAfter -0.0269588 +policy/LossBefore -0.00975812 +policy/dLoss 0.0172007 +---------------------------------------- ------------ +2025-04-03 05:03:46 | [rl2_trainer] epoch #287 | Optimizing policy... +2025-04-03 05:03:46 | [rl2_trainer] epoch #287 | Fitting baseline... +2025-04-03 05:03:46 | [rl2_trainer] epoch #287 | Computing loss before +2025-04-03 05:03:47 | [rl2_trainer] epoch #287 | Computing KL before +2025-04-03 05:03:47 | [rl2_trainer] epoch #287 | Optimizing +2025-04-03 05:04:22 | [rl2_trainer] epoch #287 | Computing KL after +2025-04-03 05:04:23 | [rl2_trainer] epoch #287 | Computing loss after +2025-04-03 05:04:24 | [rl2_trainer] epoch #287 | Saving snapshot... +2025-04-03 05:04:24 | [rl2_trainer] epoch #287 | Saved +2025-04-03 05:04:24 | [rl2_trainer] epoch #287 | Time 54745.03 s +2025-04-03 05:04:24 | [rl2_trainer] epoch #287 | EpochTime 124.47 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.1588 +Average/AverageReturn -16.878 +Average/Iteration 287 +Average/MaxReturn 4.13933 +Average/MinReturn -29.8201 +Average/NumEpisodes 100 +Average/StdReturn 6.02725 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.708028 +TotalEnvSteps 2.88e+06 +__unnamed_task__/AverageDiscountedReturn -11.1588 +__unnamed_task__/AverageReturn -16.878 +__unnamed_task__/Iteration 287 +__unnamed_task__/MaxReturn 4.13933 +__unnamed_task__/MinReturn -29.8201 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.02725 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.18539 +policy/KL 0.0129155 +policy/KLBefore 0 +policy/LossAfter -0.00767672 +policy/LossBefore -0.00130688 +policy/dLoss 0.00636984 +---------------------------------------- ------------ +2025-04-03 05:07:15 | [rl2_trainer] epoch #288 | Optimizing policy... +2025-04-03 05:07:15 | [rl2_trainer] epoch #288 | Fitting baseline... +2025-04-03 05:07:15 | [rl2_trainer] epoch #288 | Computing loss before +2025-04-03 05:07:16 | [rl2_trainer] epoch #288 | Computing KL before +2025-04-03 05:07:16 | [rl2_trainer] epoch #288 | Optimizing +2025-04-03 05:07:49 | [rl2_trainer] epoch #288 | Computing KL after +2025-04-03 05:07:49 | [rl2_trainer] epoch #288 | Computing loss after +2025-04-03 05:07:50 | [rl2_trainer] epoch #288 | Saving snapshot... +2025-04-03 05:07:50 | [rl2_trainer] epoch #288 | Saved +2025-04-03 05:07:50 | [rl2_trainer] epoch #288 | Time 54951.33 s +2025-04-03 05:07:50 | [rl2_trainer] epoch #288 | EpochTime 206.30 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.696 +Average/AverageReturn -25.035 +Average/Iteration 288 +Average/MaxReturn 17.8321 +Average/MinReturn -47.6497 +Average/NumEpisodes 100 +Average/StdReturn 10.5419 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.768141 +TotalEnvSteps 2.89e+06 +__unnamed_task__/AverageDiscountedReturn -15.696 +__unnamed_task__/AverageReturn -25.035 +__unnamed_task__/Iteration 288 +__unnamed_task__/MaxReturn 17.8321 +__unnamed_task__/MinReturn -47.6497 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.5419 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.14145 +policy/KL 0.0113981 +policy/KLBefore 0 +policy/LossAfter -0.0146038 +policy/LossBefore 0.00544518 +policy/dLoss 0.0200489 +---------------------------------------- ------------ +2025-04-03 05:11:26 | [rl2_trainer] epoch #289 | Optimizing policy... +2025-04-03 05:11:27 | [rl2_trainer] epoch #289 | Fitting baseline... +2025-04-03 05:11:27 | [rl2_trainer] epoch #289 | Computing loss before +2025-04-03 05:11:27 | [rl2_trainer] epoch #289 | Computing KL before +2025-04-03 05:11:28 | [rl2_trainer] epoch #289 | Optimizing +2025-04-03 05:12:04 | [rl2_trainer] epoch #289 | Computing KL after +2025-04-03 05:12:05 | [rl2_trainer] epoch #289 | Computing loss after +2025-04-03 05:12:06 | [rl2_trainer] epoch #289 | Saving snapshot... +2025-04-03 05:12:06 | [rl2_trainer] epoch #289 | Saved +2025-04-03 05:12:06 | [rl2_trainer] epoch #289 | Time 55206.54 s +2025-04-03 05:12:06 | [rl2_trainer] epoch #289 | EpochTime 255.20 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.4918 +Average/AverageReturn -24.1319 +Average/Iteration 289 +Average/MaxReturn 8.69168 +Average/MinReturn -70.9132 +Average/NumEpisodes 100 +Average/StdReturn 10.0286 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.793019 +TotalEnvSteps 2.9e+06 +__unnamed_task__/AverageDiscountedReturn -15.4918 +__unnamed_task__/AverageReturn -24.1319 +__unnamed_task__/Iteration 289 +__unnamed_task__/MaxReturn 8.69168 +__unnamed_task__/MinReturn -70.9132 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0286 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.10759 +policy/KL 0.0119755 +policy/KLBefore 0 +policy/LossAfter -0.02778 +policy/LossBefore -0.00407551 +policy/dLoss 0.0237045 +---------------------------------------- ------------ +2025-04-03 05:14:34 | [rl2_trainer] epoch #290 | Optimizing policy... +2025-04-03 05:14:35 | [rl2_trainer] epoch #290 | Fitting baseline... +2025-04-03 05:14:35 | [rl2_trainer] epoch #290 | Computing loss before +2025-04-03 05:14:35 | [rl2_trainer] epoch #290 | Computing KL before +2025-04-03 05:14:36 | [rl2_trainer] epoch #290 | Optimizing +2025-04-03 05:15:11 | [rl2_trainer] epoch #290 | Computing KL after +2025-04-03 05:15:11 | [rl2_trainer] epoch #290 | Computing loss after +2025-04-03 05:15:12 | [rl2_trainer] epoch #290 | Saving snapshot... +2025-04-03 05:15:12 | [rl2_trainer] epoch #290 | Saved +2025-04-03 05:15:12 | [rl2_trainer] epoch #290 | Time 55393.14 s +2025-04-03 05:15:12 | [rl2_trainer] epoch #290 | EpochTime 186.60 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.4202 +Average/AverageReturn -13.929 +Average/Iteration 290 +Average/MaxReturn 28.562 +Average/MinReturn -25.9745 +Average/NumEpisodes 100 +Average/StdReturn 7.18746 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.440738 +TotalEnvSteps 2.91e+06 +__unnamed_task__/AverageDiscountedReturn -9.4202 +__unnamed_task__/AverageReturn -13.929 +__unnamed_task__/Iteration 290 +__unnamed_task__/MaxReturn 28.562 +__unnamed_task__/MinReturn -25.9745 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.18746 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.07665 +policy/KL 0.0126477 +policy/KLBefore 0 +policy/LossAfter -0.01804 +policy/LossBefore 0.00715746 +policy/dLoss 0.0251975 +---------------------------------------- ------------ +2025-04-03 05:17:40 | [rl2_trainer] epoch #291 | Optimizing policy... +2025-04-03 05:17:40 | [rl2_trainer] epoch #291 | Fitting baseline... +2025-04-03 05:17:40 | [rl2_trainer] epoch #291 | Computing loss before +2025-04-03 05:17:41 | [rl2_trainer] epoch #291 | Computing KL before +2025-04-03 05:17:41 | [rl2_trainer] epoch #291 | Optimizing +2025-04-03 05:18:18 | [rl2_trainer] epoch #291 | Computing KL after +2025-04-03 05:18:18 | [rl2_trainer] epoch #291 | Computing loss after +2025-04-03 05:18:19 | [rl2_trainer] epoch #291 | Saving snapshot... +2025-04-03 05:18:19 | [rl2_trainer] epoch #291 | Saved +2025-04-03 05:18:19 | [rl2_trainer] epoch #291 | Time 55580.10 s +2025-04-03 05:18:19 | [rl2_trainer] epoch #291 | EpochTime 186.95 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.7282 +Average/AverageReturn -16.3608 +Average/Iteration 291 +Average/MaxReturn 7.60615 +Average/MinReturn -29.9097 +Average/NumEpisodes 100 +Average/StdReturn 6.18585 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.389977 +TotalEnvSteps 2.92e+06 +__unnamed_task__/AverageDiscountedReturn -10.7282 +__unnamed_task__/AverageReturn -16.3608 +__unnamed_task__/Iteration 291 +__unnamed_task__/MaxReturn 7.60615 +__unnamed_task__/MinReturn -29.9097 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.18585 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.06861 +policy/KL 0.0119049 +policy/KLBefore 0 +policy/LossAfter -0.0245447 +policy/LossBefore 0.00178886 +policy/dLoss 0.0263336 +---------------------------------------- ------------ +2025-04-03 05:20:17 | [rl2_trainer] epoch #292 | Optimizing policy... +2025-04-03 05:20:17 | [rl2_trainer] epoch #292 | Fitting baseline... +2025-04-03 05:20:17 | [rl2_trainer] epoch #292 | Computing loss before +2025-04-03 05:20:18 | [rl2_trainer] epoch #292 | Computing KL before +2025-04-03 05:20:18 | [rl2_trainer] epoch #292 | Optimizing +2025-04-03 05:20:53 | [rl2_trainer] epoch #292 | Computing KL after +2025-04-03 05:20:54 | [rl2_trainer] epoch #292 | Computing loss after +2025-04-03 05:20:55 | [rl2_trainer] epoch #292 | Saving snapshot... +2025-04-03 05:20:55 | [rl2_trainer] epoch #292 | Saved +2025-04-03 05:20:55 | [rl2_trainer] epoch #292 | Time 55735.67 s +2025-04-03 05:20:55 | [rl2_trainer] epoch #292 | EpochTime 155.57 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.9666 +Average/AverageReturn -19.599 +Average/Iteration 292 +Average/MaxReturn -4.03983 +Average/MinReturn -39.1045 +Average/NumEpisodes 100 +Average/StdReturn 5.85573 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.49776 +TotalEnvSteps 2.93e+06 +__unnamed_task__/AverageDiscountedReturn -12.9666 +__unnamed_task__/AverageReturn -19.599 +__unnamed_task__/Iteration 292 +__unnamed_task__/MaxReturn -4.03983 +__unnamed_task__/MinReturn -39.1045 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.85573 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.06309 +policy/KL 0.0135873 +policy/KLBefore 0 +policy/LossAfter -0.0205301 +policy/LossBefore 0.00192239 +policy/dLoss 0.0224525 +---------------------------------------- ------------ +2025-04-03 05:23:22 | [rl2_trainer] epoch #293 | Optimizing policy... +2025-04-03 05:23:23 | [rl2_trainer] epoch #293 | Fitting baseline... +2025-04-03 05:23:23 | [rl2_trainer] epoch #293 | Computing loss before +2025-04-03 05:23:23 | [rl2_trainer] epoch #293 | Computing KL before +2025-04-03 05:23:24 | [rl2_trainer] epoch #293 | Optimizing +2025-04-03 05:24:00 | [rl2_trainer] epoch #293 | Computing KL after +2025-04-03 05:24:01 | [rl2_trainer] epoch #293 | Computing loss after +2025-04-03 05:24:02 | [rl2_trainer] epoch #293 | Saving snapshot... +2025-04-03 05:24:02 | [rl2_trainer] epoch #293 | Saved +2025-04-03 05:24:02 | [rl2_trainer] epoch #293 | Time 55922.78 s +2025-04-03 05:24:02 | [rl2_trainer] epoch #293 | EpochTime 187.10 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.2188 +Average/AverageReturn -16.5006 +Average/Iteration 293 +Average/MaxReturn 10.4997 +Average/MinReturn -149.451 +Average/NumEpisodes 100 +Average/StdReturn 14.5732 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.408542 +TotalEnvSteps 2.94e+06 +__unnamed_task__/AverageDiscountedReturn -11.2188 +__unnamed_task__/AverageReturn -16.5006 +__unnamed_task__/Iteration 293 +__unnamed_task__/MaxReturn 10.4997 +__unnamed_task__/MinReturn -149.451 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 14.5732 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.0696 +policy/KL 0.0250837 +policy/KLBefore 0 +policy/LossAfter -0.0623918 +policy/LossBefore -0.00700516 +policy/dLoss 0.0553866 +---------------------------------------- ------------- +2025-04-03 05:27:20 | [rl2_trainer] epoch #294 | Optimizing policy... +2025-04-03 05:27:20 | [rl2_trainer] epoch #294 | Fitting baseline... +2025-04-03 05:27:20 | [rl2_trainer] epoch #294 | Computing loss before +2025-04-03 05:27:21 | [rl2_trainer] epoch #294 | Computing KL before +2025-04-03 05:27:21 | [rl2_trainer] epoch #294 | Optimizing +2025-04-03 05:27:58 | [rl2_trainer] epoch #294 | Computing KL after +2025-04-03 05:27:58 | [rl2_trainer] epoch #294 | Computing loss after +2025-04-03 05:27:59 | [rl2_trainer] epoch #294 | Saving snapshot... +2025-04-03 05:27:59 | [rl2_trainer] epoch #294 | Saved +2025-04-03 05:27:59 | [rl2_trainer] epoch #294 | Time 56160.08 s +2025-04-03 05:27:59 | [rl2_trainer] epoch #294 | EpochTime 237.30 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.7447 +Average/AverageReturn -23.3615 +Average/Iteration 294 +Average/MaxReturn -5.63188 +Average/MinReturn -46.7988 +Average/NumEpisodes 100 +Average/StdReturn 8.77412 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.860615 +TotalEnvSteps 2.95e+06 +__unnamed_task__/AverageDiscountedReturn -14.7447 +__unnamed_task__/AverageReturn -23.3615 +__unnamed_task__/Iteration 294 +__unnamed_task__/MaxReturn -5.63188 +__unnamed_task__/MinReturn -46.7988 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.77412 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.06078 +policy/KL 0.013956 +policy/KLBefore 0 +policy/LossAfter -0.013484 +policy/LossBefore 0.00320574 +policy/dLoss 0.0166897 +---------------------------------------- ------------ +2025-04-03 05:30:38 | [rl2_trainer] epoch #295 | Optimizing policy... +2025-04-03 05:30:39 | [rl2_trainer] epoch #295 | Fitting baseline... +2025-04-03 05:30:39 | [rl2_trainer] epoch #295 | Computing loss before +2025-04-03 05:30:39 | [rl2_trainer] epoch #295 | Computing KL before +2025-04-03 05:30:40 | [rl2_trainer] epoch #295 | Optimizing +2025-04-03 05:31:16 | [rl2_trainer] epoch #295 | Computing KL after +2025-04-03 05:31:16 | [rl2_trainer] epoch #295 | Computing loss after +2025-04-03 05:31:17 | [rl2_trainer] epoch #295 | Saving snapshot... +2025-04-03 05:31:17 | [rl2_trainer] epoch #295 | Saved +2025-04-03 05:31:17 | [rl2_trainer] epoch #295 | Time 56358.32 s +2025-04-03 05:31:17 | [rl2_trainer] epoch #295 | EpochTime 198.24 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.8094 +Average/AverageReturn -23.3121 +Average/Iteration 295 +Average/MaxReturn -6.04568 +Average/MinReturn -42.8088 +Average/NumEpisodes 100 +Average/StdReturn 7.61188 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.785388 +TotalEnvSteps 2.96e+06 +__unnamed_task__/AverageDiscountedReturn -14.8094 +__unnamed_task__/AverageReturn -23.3121 +__unnamed_task__/Iteration 295 +__unnamed_task__/MaxReturn -6.04568 +__unnamed_task__/MinReturn -42.8088 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.61188 +__unnamed_task__/TerminationRate 0 +policy/Entropy 4.03565 +policy/KL 0.0102683 +policy/KLBefore 0 +policy/LossAfter -0.017171 +policy/LossBefore -0.00436453 +policy/dLoss 0.0128065 +---------------------------------------- ------------ +2025-04-03 05:33:25 | [rl2_trainer] epoch #296 | Optimizing policy... +2025-04-03 05:33:26 | [rl2_trainer] epoch #296 | Fitting baseline... +2025-04-03 05:33:26 | [rl2_trainer] epoch #296 | Computing loss before +2025-04-03 05:33:26 | [rl2_trainer] epoch #296 | Computing KL before +2025-04-03 05:33:27 | [rl2_trainer] epoch #296 | Optimizing +2025-04-03 05:34:02 | [rl2_trainer] epoch #296 | Computing KL after +2025-04-03 05:34:03 | [rl2_trainer] epoch #296 | Computing loss after +2025-04-03 05:34:04 | [rl2_trainer] epoch #296 | Saving snapshot... +2025-04-03 05:34:04 | [rl2_trainer] epoch #296 | Saved +2025-04-03 05:34:04 | [rl2_trainer] epoch #296 | Time 56524.74 s +2025-04-03 05:34:04 | [rl2_trainer] epoch #296 | EpochTime 166.42 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.016 +Average/AverageReturn -16.443 +Average/Iteration 296 +Average/MaxReturn 54.0878 +Average/MinReturn -42.9384 +Average/NumEpisodes 100 +Average/StdReturn 9.20705 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.339028 +TotalEnvSteps 2.97e+06 +__unnamed_task__/AverageDiscountedReturn -11.016 +__unnamed_task__/AverageReturn -16.443 +__unnamed_task__/Iteration 296 +__unnamed_task__/MaxReturn 54.0878 +__unnamed_task__/MinReturn -42.9384 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.20705 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.99917 +policy/KL 0.0117495 +policy/KLBefore 0 +policy/LossAfter -0.0259498 +policy/LossBefore 0.00137385 +policy/dLoss 0.0273236 +---------------------------------------- ------------ +2025-04-03 05:36:14 | [rl2_trainer] epoch #297 | Optimizing policy... +2025-04-03 05:36:15 | [rl2_trainer] epoch #297 | Fitting baseline... +2025-04-03 05:36:15 | [rl2_trainer] epoch #297 | Computing loss before +2025-04-03 05:36:15 | [rl2_trainer] epoch #297 | Computing KL before +2025-04-03 05:36:16 | [rl2_trainer] epoch #297 | Optimizing +2025-04-03 05:36:52 | [rl2_trainer] epoch #297 | Computing KL after +2025-04-03 05:36:53 | [rl2_trainer] epoch #297 | Computing loss after +2025-04-03 05:36:53 | [rl2_trainer] epoch #297 | Saving snapshot... +2025-04-03 05:36:53 | [rl2_trainer] epoch #297 | Saved +2025-04-03 05:36:53 | [rl2_trainer] epoch #297 | Time 56694.49 s +2025-04-03 05:36:53 | [rl2_trainer] epoch #297 | EpochTime 169.75 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.4651 +Average/AverageReturn -17.0417 +Average/Iteration 297 +Average/MaxReturn 7.57285 +Average/MinReturn -33.9049 +Average/NumEpisodes 100 +Average/StdReturn 5.57231 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.588689 +TotalEnvSteps 2.98e+06 +__unnamed_task__/AverageDiscountedReturn -11.4651 +__unnamed_task__/AverageReturn -17.0417 +__unnamed_task__/Iteration 297 +__unnamed_task__/MaxReturn 7.57285 +__unnamed_task__/MinReturn -33.9049 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.57231 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.97345 +policy/KL 0.0145453 +policy/KLBefore 0 +policy/LossAfter -0.00955648 +policy/LossBefore 0.00131356 +policy/dLoss 0.01087 +---------------------------------------- ------------ +2025-04-03 05:38:37 | [rl2_trainer] epoch #298 | Optimizing policy... +2025-04-03 05:38:37 | [rl2_trainer] epoch #298 | Fitting baseline... +2025-04-03 05:38:37 | [rl2_trainer] epoch #298 | Computing loss before +2025-04-03 05:38:38 | [rl2_trainer] epoch #298 | Computing KL before +2025-04-03 05:38:38 | [rl2_trainer] epoch #298 | Optimizing +2025-04-03 05:39:12 | [rl2_trainer] epoch #298 | Computing KL after +2025-04-03 05:39:13 | [rl2_trainer] epoch #298 | Computing loss after +2025-04-03 05:39:14 | [rl2_trainer] epoch #298 | Saving snapshot... +2025-04-03 05:39:14 | [rl2_trainer] epoch #298 | Saved +2025-04-03 05:39:14 | [rl2_trainer] epoch #298 | Time 56834.76 s +2025-04-03 05:39:14 | [rl2_trainer] epoch #298 | EpochTime 140.27 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -11.3672 +Average/AverageReturn -17.0828 +Average/Iteration 298 +Average/MaxReturn 7.73183 +Average/MinReturn -29.7329 +Average/NumEpisodes 100 +Average/StdReturn 6.5739 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.472828 +TotalEnvSteps 2.99e+06 +__unnamed_task__/AverageDiscountedReturn -11.3672 +__unnamed_task__/AverageReturn -17.0828 +__unnamed_task__/Iteration 298 +__unnamed_task__/MaxReturn 7.73183 +__unnamed_task__/MinReturn -29.7329 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.5739 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.95113 +policy/KL 0.010771 +policy/KLBefore 0 +policy/LossAfter -0.0257514 +policy/LossBefore -0.0024961 +policy/dLoss 0.0232553 +---------------------------------------- ----------- +2025-04-03 05:42:11 | [rl2_trainer] epoch #299 | Optimizing policy... +2025-04-03 05:42:11 | [rl2_trainer] epoch #299 | Fitting baseline... +2025-04-03 05:42:11 | [rl2_trainer] epoch #299 | Computing loss before +2025-04-03 05:42:12 | [rl2_trainer] epoch #299 | Computing KL before +2025-04-03 05:42:13 | [rl2_trainer] epoch #299 | Optimizing +2025-04-03 05:42:50 | [rl2_trainer] epoch #299 | Computing KL after +2025-04-03 05:42:50 | [rl2_trainer] epoch #299 | Computing loss after +2025-04-03 05:42:51 | [rl2_trainer] epoch #299 | Saving snapshot... +2025-04-03 05:42:51 | [rl2_trainer] epoch #299 | Saved +2025-04-03 05:42:51 | [rl2_trainer] epoch #299 | Time 57052.36 s +2025-04-03 05:42:51 | [rl2_trainer] epoch #299 | EpochTime 217.59 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.368 +Average/AverageReturn -15.707 +Average/Iteration 299 +Average/MaxReturn 7.25679 +Average/MinReturn -38.9842 +Average/NumEpisodes 100 +Average/StdReturn 6.18331 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.523392 +TotalEnvSteps 3e+06 +__unnamed_task__/AverageDiscountedReturn -10.368 +__unnamed_task__/AverageReturn -15.707 +__unnamed_task__/Iteration 299 +__unnamed_task__/MaxReturn 7.25679 +__unnamed_task__/MinReturn -38.9842 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.18331 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.9311 +policy/KL 0.010998 +policy/KLBefore 0 +policy/LossAfter -0.013487 +policy/LossBefore 0.00338786 +policy/dLoss 0.0168748 +---------------------------------------- ------------ +2025-04-03 05:45:48 | [rl2_trainer] epoch #300 | Optimizing policy... +2025-04-03 05:45:48 | [rl2_trainer] epoch #300 | Fitting baseline... +2025-04-03 05:45:48 | [rl2_trainer] epoch #300 | Computing loss before +2025-04-03 05:45:49 | [rl2_trainer] epoch #300 | Computing KL before +2025-04-03 05:45:49 | [rl2_trainer] epoch #300 | Optimizing +2025-04-03 05:46:24 | [rl2_trainer] epoch #300 | Computing KL after +2025-04-03 05:46:25 | [rl2_trainer] epoch #300 | Computing loss after +2025-04-03 05:46:26 | [rl2_trainer] epoch #300 | Saving snapshot... +2025-04-03 05:46:26 | [rl2_trainer] epoch #300 | Saved +2025-04-03 05:46:26 | [rl2_trainer] epoch #300 | Time 57266.95 s +2025-04-03 05:46:26 | [rl2_trainer] epoch #300 | EpochTime 214.59 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.4322 +Average/AverageReturn -21.2933 +Average/Iteration 300 +Average/MaxReturn 10.3593 +Average/MinReturn -50.6538 +Average/NumEpisodes 100 +Average/StdReturn 10.284 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.825099 +TotalEnvSteps 3.01e+06 +__unnamed_task__/AverageDiscountedReturn -13.4322 +__unnamed_task__/AverageReturn -21.2933 +__unnamed_task__/Iteration 300 +__unnamed_task__/MaxReturn 10.3593 +__unnamed_task__/MinReturn -50.6538 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.284 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.91999 +policy/KL 0.0127665 +policy/KLBefore 0 +policy/LossAfter -0.0328714 +policy/LossBefore -0.000531158 +policy/dLoss 0.0323402 +---------------------------------------- ------------- +2025-04-03 05:49:27 | [rl2_trainer] epoch #301 | Optimizing policy... +2025-04-03 05:49:27 | [rl2_trainer] epoch #301 | Fitting baseline... +2025-04-03 05:49:27 | [rl2_trainer] epoch #301 | Computing loss before +2025-04-03 05:49:28 | [rl2_trainer] epoch #301 | Computing KL before +2025-04-03 05:49:28 | [rl2_trainer] epoch #301 | Optimizing +2025-04-03 05:50:05 | [rl2_trainer] epoch #301 | Computing KL after +2025-04-03 05:50:06 | [rl2_trainer] epoch #301 | Computing loss after +2025-04-03 05:50:07 | [rl2_trainer] epoch #301 | Saving snapshot... +2025-04-03 05:50:07 | [rl2_trainer] epoch #301 | Saved +2025-04-03 05:50:07 | [rl2_trainer] epoch #301 | Time 57487.88 s +2025-04-03 05:50:07 | [rl2_trainer] epoch #301 | EpochTime 220.93 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.4587 +Average/AverageReturn -16.9411 +Average/Iteration 301 +Average/MaxReturn -2.32671 +Average/MinReturn -31.6885 +Average/NumEpisodes 100 +Average/StdReturn 4.82364 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.598189 +TotalEnvSteps 3.02e+06 +__unnamed_task__/AverageDiscountedReturn -11.4587 +__unnamed_task__/AverageReturn -16.9411 +__unnamed_task__/Iteration 301 +__unnamed_task__/MaxReturn -2.32671 +__unnamed_task__/MinReturn -31.6885 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.82364 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.90435 +policy/KL 0.00873378 +policy/KLBefore 0 +policy/LossAfter -0.0190575 +policy/LossBefore -0.00713703 +policy/dLoss 0.0119205 +---------------------------------------- ------------ +2025-04-03 05:52:16 | [rl2_trainer] epoch #302 | Optimizing policy... +2025-04-03 05:52:16 | [rl2_trainer] epoch #302 | Fitting baseline... +2025-04-03 05:52:16 | [rl2_trainer] epoch #302 | Computing loss before +2025-04-03 05:52:17 | [rl2_trainer] epoch #302 | Computing KL before +2025-04-03 05:52:17 | [rl2_trainer] epoch #302 | Optimizing +2025-04-03 05:52:54 | [rl2_trainer] epoch #302 | Computing KL after +2025-04-03 05:52:54 | [rl2_trainer] epoch #302 | Computing loss after +2025-04-03 05:52:55 | [rl2_trainer] epoch #302 | Saving snapshot... +2025-04-03 05:52:55 | [rl2_trainer] epoch #302 | Saved +2025-04-03 05:52:55 | [rl2_trainer] epoch #302 | Time 57656.04 s +2025-04-03 05:52:55 | [rl2_trainer] epoch #302 | EpochTime 168.15 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.6438 +Average/AverageReturn -15.8129 +Average/Iteration 302 +Average/MaxReturn 0.14752 +Average/MinReturn -30.402 +Average/NumEpisodes 100 +Average/StdReturn 5.92888 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.547921 +TotalEnvSteps 3.03e+06 +__unnamed_task__/AverageDiscountedReturn -10.6438 +__unnamed_task__/AverageReturn -15.8129 +__unnamed_task__/Iteration 302 +__unnamed_task__/MaxReturn 0.14752 +__unnamed_task__/MinReturn -30.402 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.92888 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.8972 +policy/KL 0.0136553 +policy/KLBefore 0 +policy/LossAfter -0.0313717 +policy/LossBefore -0.00945823 +policy/dLoss 0.0219135 +---------------------------------------- ------------ +2025-04-03 05:55:14 | [rl2_trainer] epoch #303 | Optimizing policy... +2025-04-03 05:55:15 | [rl2_trainer] epoch #303 | Fitting baseline... +2025-04-03 05:55:15 | [rl2_trainer] epoch #303 | Computing loss before +2025-04-03 05:55:15 | [rl2_trainer] epoch #303 | Computing KL before +2025-04-03 05:55:16 | [rl2_trainer] epoch #303 | Optimizing +2025-04-03 05:55:50 | [rl2_trainer] epoch #303 | Computing KL after +2025-04-03 05:55:51 | [rl2_trainer] epoch #303 | Computing loss after +2025-04-03 05:55:52 | [rl2_trainer] epoch #303 | Saving snapshot... +2025-04-03 05:55:52 | [rl2_trainer] epoch #303 | Saved +2025-04-03 05:55:52 | [rl2_trainer] epoch #303 | Time 57832.94 s +2025-04-03 05:55:52 | [rl2_trainer] epoch #303 | EpochTime 176.91 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.6716 +Average/AverageReturn -16.1408 +Average/Iteration 303 +Average/MaxReturn 11.2767 +Average/MinReturn -27.0283 +Average/NumEpisodes 100 +Average/StdReturn 6.54584 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.249799 +TotalEnvSteps 3.04e+06 +__unnamed_task__/AverageDiscountedReturn -10.6716 +__unnamed_task__/AverageReturn -16.1408 +__unnamed_task__/Iteration 303 +__unnamed_task__/MaxReturn 11.2767 +__unnamed_task__/MinReturn -27.0283 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.54584 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.883 +policy/KL 0.010392 +policy/KLBefore 0 +policy/LossAfter -0.0370494 +policy/LossBefore -0.00245091 +policy/dLoss 0.0345984 +---------------------------------------- ------------ +2025-04-03 05:59:05 | [rl2_trainer] epoch #304 | Optimizing policy... +2025-04-03 05:59:06 | [rl2_trainer] epoch #304 | Fitting baseline... +2025-04-03 05:59:06 | [rl2_trainer] epoch #304 | Computing loss before +2025-04-03 05:59:06 | [rl2_trainer] epoch #304 | Computing KL before +2025-04-03 05:59:07 | [rl2_trainer] epoch #304 | Optimizing +2025-04-03 05:59:43 | [rl2_trainer] epoch #304 | Computing KL after +2025-04-03 05:59:43 | [rl2_trainer] epoch #304 | Computing loss after +2025-04-03 05:59:44 | [rl2_trainer] epoch #304 | Saving snapshot... +2025-04-03 05:59:44 | [rl2_trainer] epoch #304 | Saved +2025-04-03 05:59:44 | [rl2_trainer] epoch #304 | Time 58065.11 s +2025-04-03 05:59:44 | [rl2_trainer] epoch #304 | EpochTime 232.16 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -14.9214 +Average/AverageReturn -23.4582 +Average/Iteration 304 +Average/MaxReturn 1.68523 +Average/MinReturn -103.242 +Average/NumEpisodes 100 +Average/StdReturn 13.2324 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.775252 +TotalEnvSteps 3.05e+06 +__unnamed_task__/AverageDiscountedReturn -14.9214 +__unnamed_task__/AverageReturn -23.4582 +__unnamed_task__/Iteration 304 +__unnamed_task__/MaxReturn 1.68523 +__unnamed_task__/MinReturn -103.242 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.2324 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.86526 +policy/KL 0.0148927 +policy/KLBefore 0 +policy/LossAfter -0.0462913 +policy/LossBefore -0.00850398 +policy/dLoss 0.0377873 +---------------------------------------- ------------- +2025-04-03 06:02:58 | [rl2_trainer] epoch #305 | Optimizing policy... +2025-04-03 06:02:58 | [rl2_trainer] epoch #305 | Fitting baseline... +2025-04-03 06:02:58 | [rl2_trainer] epoch #305 | Computing loss before +2025-04-03 06:02:59 | [rl2_trainer] epoch #305 | Computing KL before +2025-04-03 06:03:00 | [rl2_trainer] epoch #305 | Optimizing +2025-04-03 06:03:35 | [rl2_trainer] epoch #305 | Computing KL after +2025-04-03 06:03:36 | [rl2_trainer] epoch #305 | Computing loss after +2025-04-03 06:03:37 | [rl2_trainer] epoch #305 | Saving snapshot... +2025-04-03 06:03:37 | [rl2_trainer] epoch #305 | Saved +2025-04-03 06:03:37 | [rl2_trainer] epoch #305 | Time 58297.58 s +2025-04-03 06:03:37 | [rl2_trainer] epoch #305 | EpochTime 232.46 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.1246 +Average/AverageReturn -23.6128 +Average/Iteration 305 +Average/MaxReturn -4.60253 +Average/MinReturn -44.0813 +Average/NumEpisodes 100 +Average/StdReturn 8.09704 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.800287 +TotalEnvSteps 3.06e+06 +__unnamed_task__/AverageDiscountedReturn -15.1246 +__unnamed_task__/AverageReturn -23.6128 +__unnamed_task__/Iteration 305 +__unnamed_task__/MaxReturn -4.60253 +__unnamed_task__/MinReturn -44.0813 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.09704 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.85473 +policy/KL 0.0100941 +policy/KLBefore 0 +policy/LossAfter -0.0258332 +policy/LossBefore -0.003124 +policy/dLoss 0.0227092 +---------------------------------------- ----------- +2025-04-03 06:06:06 | [rl2_trainer] epoch #306 | Optimizing policy... +2025-04-03 06:06:06 | [rl2_trainer] epoch #306 | Fitting baseline... +2025-04-03 06:06:06 | [rl2_trainer] epoch #306 | Computing loss before +2025-04-03 06:06:07 | [rl2_trainer] epoch #306 | Computing KL before +2025-04-03 06:06:07 | [rl2_trainer] epoch #306 | Optimizing +2025-04-03 06:06:44 | [rl2_trainer] epoch #306 | Computing KL after +2025-04-03 06:06:44 | [rl2_trainer] epoch #306 | Computing loss after +2025-04-03 06:06:45 | [rl2_trainer] epoch #306 | Saving snapshot... +2025-04-03 06:06:45 | [rl2_trainer] epoch #306 | Saved +2025-04-03 06:06:45 | [rl2_trainer] epoch #306 | Time 58486.22 s +2025-04-03 06:06:45 | [rl2_trainer] epoch #306 | EpochTime 188.64 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.03467 +Average/AverageReturn -13.354 +Average/Iteration 306 +Average/MaxReturn 27.4037 +Average/MinReturn -24.2279 +Average/NumEpisodes 100 +Average/StdReturn 6.95042 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.480694 +TotalEnvSteps 3.07e+06 +__unnamed_task__/AverageDiscountedReturn -9.03467 +__unnamed_task__/AverageReturn -13.354 +__unnamed_task__/Iteration 306 +__unnamed_task__/MaxReturn 27.4037 +__unnamed_task__/MinReturn -24.2279 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.95042 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.84966 +policy/KL 0.0136482 +policy/KLBefore 0 +policy/LossAfter -0.0325942 +policy/LossBefore -0.00763791 +policy/dLoss 0.0249563 +---------------------------------------- ------------ +2025-04-03 06:09:09 | [rl2_trainer] epoch #307 | Optimizing policy... +2025-04-03 06:09:10 | [rl2_trainer] epoch #307 | Fitting baseline... +2025-04-03 06:09:10 | [rl2_trainer] epoch #307 | Computing loss before +2025-04-03 06:09:10 | [rl2_trainer] epoch #307 | Computing KL before +2025-04-03 06:09:11 | [rl2_trainer] epoch #307 | Optimizing +2025-04-03 06:09:47 | [rl2_trainer] epoch #307 | Computing KL after +2025-04-03 06:09:48 | [rl2_trainer] epoch #307 | Computing loss after +2025-04-03 06:09:49 | [rl2_trainer] epoch #307 | Saving snapshot... +2025-04-03 06:09:49 | [rl2_trainer] epoch #307 | Saved +2025-04-03 06:09:49 | [rl2_trainer] epoch #307 | Time 58669.60 s +2025-04-03 06:09:49 | [rl2_trainer] epoch #307 | EpochTime 183.37 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.1404 +Average/AverageReturn -20.772 +Average/Iteration 307 +Average/MaxReturn 12.2612 +Average/MinReturn -43.143 +Average/NumEpisodes 100 +Average/StdReturn 10.9553 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.839956 +TotalEnvSteps 3.08e+06 +__unnamed_task__/AverageDiscountedReturn -13.1404 +__unnamed_task__/AverageReturn -20.772 +__unnamed_task__/Iteration 307 +__unnamed_task__/MaxReturn 12.2612 +__unnamed_task__/MinReturn -43.143 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.9553 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.82181 +policy/KL 0.0117523 +policy/KLBefore 0 +policy/LossAfter -0.0232464 +policy/LossBefore 8.79211e-05 +policy/dLoss 0.0233343 +---------------------------------------- ------------- +2025-04-03 06:12:36 | [rl2_trainer] epoch #308 | Optimizing policy... +2025-04-03 06:12:37 | [rl2_trainer] epoch #308 | Fitting baseline... +2025-04-03 06:12:37 | [rl2_trainer] epoch #308 | Computing loss before +2025-04-03 06:12:37 | [rl2_trainer] epoch #308 | Computing KL before +2025-04-03 06:12:38 | [rl2_trainer] epoch #308 | Optimizing +2025-04-03 06:13:12 | [rl2_trainer] epoch #308 | Computing KL after +2025-04-03 06:13:13 | [rl2_trainer] epoch #308 | Computing loss after +2025-04-03 06:13:13 | [rl2_trainer] epoch #308 | Saving snapshot... +2025-04-03 06:13:13 | [rl2_trainer] epoch #308 | Saved +2025-04-03 06:13:13 | [rl2_trainer] epoch #308 | Time 58874.52 s +2025-04-03 06:13:13 | [rl2_trainer] epoch #308 | EpochTime 204.92 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.2316 +Average/AverageReturn -23.8881 +Average/Iteration 308 +Average/MaxReturn -1.66513 +Average/MinReturn -49.5182 +Average/NumEpisodes 100 +Average/StdReturn 9.58612 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.826959 +TotalEnvSteps 3.09e+06 +__unnamed_task__/AverageDiscountedReturn -15.2316 +__unnamed_task__/AverageReturn -23.8881 +__unnamed_task__/Iteration 308 +__unnamed_task__/MaxReturn -1.66513 +__unnamed_task__/MinReturn -49.5182 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.58612 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.80996 +policy/KL 0.0106085 +policy/KLBefore 0 +policy/LossAfter -0.0274135 +policy/LossBefore -0.0024794 +policy/dLoss 0.0249341 +---------------------------------------- ----------- +2025-04-03 06:16:14 | [rl2_trainer] epoch #309 | Optimizing policy... +2025-04-03 06:16:14 | [rl2_trainer] epoch #309 | Fitting baseline... +2025-04-03 06:16:14 | [rl2_trainer] epoch #309 | Computing loss before +2025-04-03 06:16:15 | [rl2_trainer] epoch #309 | Computing KL before +2025-04-03 06:16:16 | [rl2_trainer] epoch #309 | Optimizing +2025-04-03 06:16:51 | [rl2_trainer] epoch #309 | Computing KL after +2025-04-03 06:16:52 | [rl2_trainer] epoch #309 | Computing loss after +2025-04-03 06:16:53 | [rl2_trainer] epoch #309 | Saving snapshot... +2025-04-03 06:16:53 | [rl2_trainer] epoch #309 | Saved +2025-04-03 06:16:53 | [rl2_trainer] epoch #309 | Time 59093.53 s +2025-04-03 06:16:53 | [rl2_trainer] epoch #309 | EpochTime 219.01 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.5706 +Average/AverageReturn -17.3354 +Average/Iteration 309 +Average/MaxReturn 2.79171 +Average/MinReturn -29.2331 +Average/NumEpisodes 100 +Average/StdReturn 4.75921 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.444813 +TotalEnvSteps 3.1e+06 +__unnamed_task__/AverageDiscountedReturn -11.5706 +__unnamed_task__/AverageReturn -17.3354 +__unnamed_task__/Iteration 309 +__unnamed_task__/MaxReturn 2.79171 +__unnamed_task__/MinReturn -29.2331 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.75921 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.78847 +policy/KL 0.00918665 +policy/KLBefore 0 +policy/LossAfter -0.00914509 +policy/LossBefore -0.0015125 +policy/dLoss 0.00763259 +---------------------------------------- ------------ +2025-04-03 06:20:01 | [rl2_trainer] epoch #310 | Optimizing policy... +2025-04-03 06:20:02 | [rl2_trainer] epoch #310 | Fitting baseline... +2025-04-03 06:20:02 | [rl2_trainer] epoch #310 | Computing loss before +2025-04-03 06:20:02 | [rl2_trainer] epoch #310 | Computing KL before +2025-04-03 06:20:03 | [rl2_trainer] epoch #310 | Optimizing +2025-04-03 06:20:39 | [rl2_trainer] epoch #310 | Computing KL after +2025-04-03 06:20:40 | [rl2_trainer] epoch #310 | Computing loss after +2025-04-03 06:20:41 | [rl2_trainer] epoch #310 | Saving snapshot... +2025-04-03 06:20:41 | [rl2_trainer] epoch #310 | Saved +2025-04-03 06:20:41 | [rl2_trainer] epoch #310 | Time 59321.90 s +2025-04-03 06:20:41 | [rl2_trainer] epoch #310 | EpochTime 228.36 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.1653 +Average/AverageReturn -22.5609 +Average/Iteration 310 +Average/MaxReturn 4.54869 +Average/MinReturn -46.5496 +Average/NumEpisodes 100 +Average/StdReturn 9.32609 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.853796 +TotalEnvSteps 3.11e+06 +__unnamed_task__/AverageDiscountedReturn -14.1653 +__unnamed_task__/AverageReturn -22.5609 +__unnamed_task__/Iteration 310 +__unnamed_task__/MaxReturn 4.54869 +__unnamed_task__/MinReturn -46.5496 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.32609 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.74516 +policy/KL 0.0117768 +policy/KLBefore 0 +policy/LossAfter -0.0148758 +policy/LossBefore -0.00325625 +policy/dLoss 0.0116196 +---------------------------------------- ------------ +2025-04-03 06:22:09 | [rl2_trainer] epoch #311 | Optimizing policy... +2025-04-03 06:22:09 | [rl2_trainer] epoch #311 | Fitting baseline... +2025-04-03 06:22:09 | [rl2_trainer] epoch #311 | Computing loss before +2025-04-03 06:22:10 | [rl2_trainer] epoch #311 | Computing KL before +2025-04-03 06:22:11 | [rl2_trainer] epoch #311 | Optimizing +2025-04-03 06:22:45 | [rl2_trainer] epoch #311 | Computing KL after +2025-04-03 06:22:46 | [rl2_trainer] epoch #311 | Computing loss after +2025-04-03 06:22:47 | [rl2_trainer] epoch #311 | Saving snapshot... +2025-04-03 06:22:47 | [rl2_trainer] epoch #311 | Saved +2025-04-03 06:22:47 | [rl2_trainer] epoch #311 | Time 59447.95 s +2025-04-03 06:22:47 | [rl2_trainer] epoch #311 | EpochTime 126.05 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -11.665 +Average/AverageReturn -17.5583 +Average/Iteration 311 +Average/MaxReturn 5.4017 +Average/MinReturn -64.4707 +Average/NumEpisodes 100 +Average/StdReturn 9.99697 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.544962 +TotalEnvSteps 3.12e+06 +__unnamed_task__/AverageDiscountedReturn -11.665 +__unnamed_task__/AverageReturn -17.5583 +__unnamed_task__/Iteration 311 +__unnamed_task__/MaxReturn 5.4017 +__unnamed_task__/MinReturn -64.4707 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.99697 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.69403 +policy/KL 0.0119738 +policy/KLBefore 0 +policy/LossAfter -0.0247606 +policy/LossBefore 0.0055724 +policy/dLoss 0.030333 +---------------------------------------- ----------- +2025-04-03 06:24:14 | [rl2_trainer] epoch #312 | Optimizing policy... +2025-04-03 06:24:15 | [rl2_trainer] epoch #312 | Fitting baseline... +2025-04-03 06:24:15 | [rl2_trainer] epoch #312 | Computing loss before +2025-04-03 06:24:15 | [rl2_trainer] epoch #312 | Computing KL before +2025-04-03 06:24:16 | [rl2_trainer] epoch #312 | Optimizing +2025-04-03 06:24:51 | [rl2_trainer] epoch #312 | Computing KL after +2025-04-03 06:24:51 | [rl2_trainer] epoch #312 | Computing loss after +2025-04-03 06:24:52 | [rl2_trainer] epoch #312 | Saving snapshot... +2025-04-03 06:24:52 | [rl2_trainer] epoch #312 | Saved +2025-04-03 06:24:52 | [rl2_trainer] epoch #312 | Time 59573.05 s +2025-04-03 06:24:52 | [rl2_trainer] epoch #312 | EpochTime 125.10 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.80107 +Average/AverageReturn -14.5985 +Average/Iteration 312 +Average/MaxReturn 11.4986 +Average/MinReturn -28.5982 +Average/NumEpisodes 100 +Average/StdReturn 7.21033 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.6901 +TotalEnvSteps 3.13e+06 +__unnamed_task__/AverageDiscountedReturn -9.80107 +__unnamed_task__/AverageReturn -14.5985 +__unnamed_task__/Iteration 312 +__unnamed_task__/MaxReturn 11.4986 +__unnamed_task__/MinReturn -28.5982 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.21033 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.65809 +policy/KL 0.0111148 +policy/KLBefore 0 +policy/LossAfter -0.0206043 +policy/LossBefore -0.00591301 +policy/dLoss 0.0146913 +---------------------------------------- ------------ +2025-04-03 06:27:53 | [rl2_trainer] epoch #313 | Optimizing policy... +2025-04-03 06:27:53 | [rl2_trainer] epoch #313 | Fitting baseline... +2025-04-03 06:27:53 | [rl2_trainer] epoch #313 | Computing loss before +2025-04-03 06:27:53 | [rl2_trainer] epoch #313 | Computing KL before +2025-04-03 06:27:54 | [rl2_trainer] epoch #313 | Optimizing +2025-04-03 06:28:30 | [rl2_trainer] epoch #313 | Computing KL after +2025-04-03 06:28:31 | [rl2_trainer] epoch #313 | Computing loss after +2025-04-03 06:28:31 | [rl2_trainer] epoch #313 | Saving snapshot... +2025-04-03 06:28:31 | [rl2_trainer] epoch #313 | Saved +2025-04-03 06:28:31 | [rl2_trainer] epoch #313 | Time 59792.39 s +2025-04-03 06:28:31 | [rl2_trainer] epoch #313 | EpochTime 219.33 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.8833 +Average/AverageReturn -25.0982 +Average/Iteration 313 +Average/MaxReturn -1.24341 +Average/MinReturn -68.2487 +Average/NumEpisodes 100 +Average/StdReturn 9.55689 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.769233 +TotalEnvSteps 3.14e+06 +__unnamed_task__/AverageDiscountedReturn -15.8833 +__unnamed_task__/AverageReturn -25.0982 +__unnamed_task__/Iteration 313 +__unnamed_task__/MaxReturn -1.24341 +__unnamed_task__/MinReturn -68.2487 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.55689 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.63266 +policy/KL 0.0122365 +policy/KLBefore 0 +policy/LossAfter -0.0160386 +policy/LossBefore 0.00319758 +policy/dLoss 0.0192362 +---------------------------------------- ------------ +2025-04-03 06:32:20 | [rl2_trainer] epoch #314 | Optimizing policy... +2025-04-03 06:32:20 | [rl2_trainer] epoch #314 | Fitting baseline... +2025-04-03 06:32:20 | [rl2_trainer] epoch #314 | Computing loss before +2025-04-03 06:32:21 | [rl2_trainer] epoch #314 | Computing KL before +2025-04-03 06:32:22 | [rl2_trainer] epoch #314 | Optimizing +2025-04-03 06:32:58 | [rl2_trainer] epoch #314 | Computing KL after +2025-04-03 06:32:59 | [rl2_trainer] epoch #314 | Computing loss after +2025-04-03 06:33:00 | [rl2_trainer] epoch #314 | Saving snapshot... +2025-04-03 06:33:00 | [rl2_trainer] epoch #314 | Saved +2025-04-03 06:33:00 | [rl2_trainer] epoch #314 | Time 60060.86 s +2025-04-03 06:33:00 | [rl2_trainer] epoch #314 | EpochTime 268.48 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.6539 +Average/AverageReturn -21.7025 +Average/Iteration 314 +Average/MaxReturn -3.50118 +Average/MinReturn -50.8907 +Average/NumEpisodes 100 +Average/StdReturn 9.37413 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.871869 +TotalEnvSteps 3.15e+06 +__unnamed_task__/AverageDiscountedReturn -13.6539 +__unnamed_task__/AverageReturn -21.7025 +__unnamed_task__/Iteration 314 +__unnamed_task__/MaxReturn -3.50118 +__unnamed_task__/MinReturn -50.8907 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.37413 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.60446 +policy/KL 0.0125997 +policy/KLBefore 0 +policy/LossAfter -0.0196727 +policy/LossBefore -0.000744662 +policy/dLoss 0.0189281 +---------------------------------------- ------------- +2025-04-03 06:35:14 | [rl2_trainer] epoch #315 | Optimizing policy... +2025-04-03 06:35:14 | [rl2_trainer] epoch #315 | Fitting baseline... +2025-04-03 06:35:14 | [rl2_trainer] epoch #315 | Computing loss before +2025-04-03 06:35:15 | [rl2_trainer] epoch #315 | Computing KL before +2025-04-03 06:35:15 | [rl2_trainer] epoch #315 | Optimizing +2025-04-03 06:35:50 | [rl2_trainer] epoch #315 | Computing KL after +2025-04-03 06:35:50 | [rl2_trainer] epoch #315 | Computing loss after +2025-04-03 06:35:51 | [rl2_trainer] epoch #315 | Saving snapshot... +2025-04-03 06:35:51 | [rl2_trainer] epoch #315 | Saved +2025-04-03 06:35:51 | [rl2_trainer] epoch #315 | Time 60232.06 s +2025-04-03 06:35:51 | [rl2_trainer] epoch #315 | EpochTime 171.20 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2945 +Average/AverageReturn -24.272 +Average/Iteration 315 +Average/MaxReturn 6.70423 +Average/MinReturn -55.1472 +Average/NumEpisodes 100 +Average/StdReturn 8.80271 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.795109 +TotalEnvSteps 3.16e+06 +__unnamed_task__/AverageDiscountedReturn -15.2945 +__unnamed_task__/AverageReturn -24.272 +__unnamed_task__/Iteration 315 +__unnamed_task__/MaxReturn 6.70423 +__unnamed_task__/MinReturn -55.1472 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.80271 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.57835 +policy/KL 0.0110438 +policy/KLBefore 0 +policy/LossAfter -0.0149562 +policy/LossBefore 0.000674251 +policy/dLoss 0.0156305 +---------------------------------------- ------------- +2025-04-03 06:37:59 | [rl2_trainer] epoch #316 | Optimizing policy... +2025-04-03 06:37:59 | [rl2_trainer] epoch #316 | Fitting baseline... +2025-04-03 06:37:59 | [rl2_trainer] epoch #316 | Computing loss before +2025-04-03 06:38:00 | [rl2_trainer] epoch #316 | Computing KL before +2025-04-03 06:38:00 | [rl2_trainer] epoch #316 | Optimizing +2025-04-03 06:38:37 | [rl2_trainer] epoch #316 | Computing KL after +2025-04-03 06:38:37 | [rl2_trainer] epoch #316 | Computing loss after +2025-04-03 06:38:38 | [rl2_trainer] epoch #316 | Saving snapshot... +2025-04-03 06:38:38 | [rl2_trainer] epoch #316 | Saved +2025-04-03 06:38:38 | [rl2_trainer] epoch #316 | Time 60399.23 s +2025-04-03 06:38:38 | [rl2_trainer] epoch #316 | EpochTime 167.16 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -11.7081 +Average/AverageReturn -17.7049 +Average/Iteration 316 +Average/MaxReturn 5.69885 +Average/MinReturn -31.9126 +Average/NumEpisodes 100 +Average/StdReturn 6.63176 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.439959 +TotalEnvSteps 3.17e+06 +__unnamed_task__/AverageDiscountedReturn -11.7081 +__unnamed_task__/AverageReturn -17.7049 +__unnamed_task__/Iteration 316 +__unnamed_task__/MaxReturn 5.69885 +__unnamed_task__/MinReturn -31.9126 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.63176 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.56904 +policy/KL 0.0263789 +policy/KLBefore 0 +policy/LossAfter -0.0134474 +policy/LossBefore -0.0016946 +policy/dLoss 0.0117528 +---------------------------------------- ----------- +2025-04-03 06:41:46 | [rl2_trainer] epoch #317 | Optimizing policy... +2025-04-03 06:41:46 | [rl2_trainer] epoch #317 | Fitting baseline... +2025-04-03 06:41:46 | [rl2_trainer] epoch #317 | Computing loss before +2025-04-03 06:41:47 | [rl2_trainer] epoch #317 | Computing KL before +2025-04-03 06:41:47 | [rl2_trainer] epoch #317 | Optimizing +2025-04-03 06:42:24 | [rl2_trainer] epoch #317 | Computing KL after +2025-04-03 06:42:25 | [rl2_trainer] epoch #317 | Computing loss after +2025-04-03 06:42:26 | [rl2_trainer] epoch #317 | Saving snapshot... +2025-04-03 06:42:26 | [rl2_trainer] epoch #317 | Saved +2025-04-03 06:42:26 | [rl2_trainer] epoch #317 | Time 60626.88 s +2025-04-03 06:42:26 | [rl2_trainer] epoch #317 | EpochTime 227.64 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3446 +Average/AverageReturn -24.2125 +Average/Iteration 317 +Average/MaxReturn -1.20905 +Average/MinReturn -49.6356 +Average/NumEpisodes 100 +Average/StdReturn 8.86605 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.734145 +TotalEnvSteps 3.18e+06 +__unnamed_task__/AverageDiscountedReturn -15.3446 +__unnamed_task__/AverageReturn -24.2125 +__unnamed_task__/Iteration 317 +__unnamed_task__/MaxReturn -1.20905 +__unnamed_task__/MinReturn -49.6356 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.86605 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.57321 +policy/KL 0.0127746 +policy/KLBefore 0 +policy/LossAfter -0.0348579 +policy/LossBefore -0.00253091 +policy/dLoss 0.032327 +---------------------------------------- ------------ +2025-04-03 06:45:30 | [rl2_trainer] epoch #318 | Optimizing policy... +2025-04-03 06:45:30 | [rl2_trainer] epoch #318 | Fitting baseline... +2025-04-03 06:45:30 | [rl2_trainer] epoch #318 | Computing loss before +2025-04-03 06:45:30 | [rl2_trainer] epoch #318 | Computing KL before +2025-04-03 06:45:31 | [rl2_trainer] epoch #318 | Optimizing +2025-04-03 06:46:06 | [rl2_trainer] epoch #318 | Computing KL after +2025-04-03 06:46:06 | [rl2_trainer] epoch #318 | Computing loss after +2025-04-03 06:46:07 | [rl2_trainer] epoch #318 | Saving snapshot... +2025-04-03 06:46:07 | [rl2_trainer] epoch #318 | Saved +2025-04-03 06:46:07 | [rl2_trainer] epoch #318 | Time 60848.42 s +2025-04-03 06:46:07 | [rl2_trainer] epoch #318 | EpochTime 221.54 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.5068 +Average/AverageReturn -25.9991 +Average/Iteration 318 +Average/MaxReturn -15.1287 +Average/MinReturn -43.0159 +Average/NumEpisodes 100 +Average/StdReturn 6.64997 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.79388 +TotalEnvSteps 3.19e+06 +__unnamed_task__/AverageDiscountedReturn -16.5068 +__unnamed_task__/AverageReturn -25.9991 +__unnamed_task__/Iteration 318 +__unnamed_task__/MaxReturn -15.1287 +__unnamed_task__/MinReturn -43.0159 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.64997 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.54662 +policy/KL 0.0113409 +policy/KLBefore 0 +policy/LossAfter -0.0164593 +policy/LossBefore -0.00786132 +policy/dLoss 0.00859799 +---------------------------------------- ------------ +2025-04-03 06:50:00 | [rl2_trainer] epoch #319 | Optimizing policy... +2025-04-03 06:50:00 | [rl2_trainer] epoch #319 | Fitting baseline... +2025-04-03 06:50:00 | [rl2_trainer] epoch #319 | Computing loss before +2025-04-03 06:50:01 | [rl2_trainer] epoch #319 | Computing KL before +2025-04-03 06:50:01 | [rl2_trainer] epoch #319 | Optimizing +2025-04-03 06:50:38 | [rl2_trainer] epoch #319 | Computing KL after +2025-04-03 06:50:39 | [rl2_trainer] epoch #319 | Computing loss after +2025-04-03 06:50:40 | [rl2_trainer] epoch #319 | Saving snapshot... +2025-04-03 06:50:40 | [rl2_trainer] epoch #319 | Saved +2025-04-03 06:50:40 | [rl2_trainer] epoch #319 | Time 61120.68 s +2025-04-03 06:50:40 | [rl2_trainer] epoch #319 | EpochTime 272.26 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.9118 +Average/AverageReturn -25.3974 +Average/Iteration 319 +Average/MaxReturn -6.6667 +Average/MinReturn -54.6085 +Average/NumEpisodes 100 +Average/StdReturn 9.50698 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.686662 +TotalEnvSteps 3.2e+06 +__unnamed_task__/AverageDiscountedReturn -15.9118 +__unnamed_task__/AverageReturn -25.3974 +__unnamed_task__/Iteration 319 +__unnamed_task__/MaxReturn -6.6667 +__unnamed_task__/MinReturn -54.6085 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.50698 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.53786 +policy/KL 0.012672 +policy/KLBefore 0 +policy/LossAfter -0.0573645 +policy/LossBefore -0.00816775 +policy/dLoss 0.0491968 +---------------------------------------- ------------ +2025-04-03 06:52:49 | [rl2_trainer] epoch #320 | Optimizing policy... +2025-04-03 06:52:50 | [rl2_trainer] epoch #320 | Fitting baseline... +2025-04-03 06:52:50 | [rl2_trainer] epoch #320 | Computing loss before +2025-04-03 06:52:50 | [rl2_trainer] epoch #320 | Computing KL before +2025-04-03 06:52:51 | [rl2_trainer] epoch #320 | Optimizing +2025-04-03 06:53:27 | [rl2_trainer] epoch #320 | Computing KL after +2025-04-03 06:53:27 | [rl2_trainer] epoch #320 | Computing loss after +2025-04-03 06:53:28 | [rl2_trainer] epoch #320 | Saving snapshot... +2025-04-03 06:53:28 | [rl2_trainer] epoch #320 | Saved +2025-04-03 06:53:28 | [rl2_trainer] epoch #320 | Time 61289.38 s +2025-04-03 06:53:28 | [rl2_trainer] epoch #320 | EpochTime 168.69 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.9357 +Average/AverageReturn -21.0822 +Average/Iteration 320 +Average/MaxReturn -2.77281 +Average/MinReturn -36.1659 +Average/NumEpisodes 100 +Average/StdReturn 5.28979 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.713198 +TotalEnvSteps 3.21e+06 +__unnamed_task__/AverageDiscountedReturn -13.9357 +__unnamed_task__/AverageReturn -21.0822 +__unnamed_task__/Iteration 320 +__unnamed_task__/MaxReturn -2.77281 +__unnamed_task__/MinReturn -36.1659 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.28979 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.53107 +policy/KL 0.010708 +policy/KLBefore 0 +policy/LossAfter -0.00823793 +policy/LossBefore 0.00190915 +policy/dLoss 0.0101471 +---------------------------------------- ------------ +2025-04-03 06:55:38 | [rl2_trainer] epoch #321 | Optimizing policy... +2025-04-03 06:55:38 | [rl2_trainer] epoch #321 | Fitting baseline... +2025-04-03 06:55:38 | [rl2_trainer] epoch #321 | Computing loss before +2025-04-03 06:55:39 | [rl2_trainer] epoch #321 | Computing KL before +2025-04-03 06:55:40 | [rl2_trainer] epoch #321 | Optimizing +2025-04-03 06:56:15 | [rl2_trainer] epoch #321 | Computing KL after +2025-04-03 06:56:16 | [rl2_trainer] epoch #321 | Computing loss after +2025-04-03 06:56:17 | [rl2_trainer] epoch #321 | Saving snapshot... +2025-04-03 06:56:17 | [rl2_trainer] epoch #321 | Saved +2025-04-03 06:56:17 | [rl2_trainer] epoch #321 | Time 61457.76 s +2025-04-03 06:56:17 | [rl2_trainer] epoch #321 | EpochTime 168.38 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.6042 +Average/AverageReturn -20.6925 +Average/Iteration 321 +Average/MaxReturn 7.05919 +Average/MinReturn -34.9563 +Average/NumEpisodes 100 +Average/StdReturn 5.72618 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.551279 +TotalEnvSteps 3.22e+06 +__unnamed_task__/AverageDiscountedReturn -13.6042 +__unnamed_task__/AverageReturn -20.6925 +__unnamed_task__/Iteration 321 +__unnamed_task__/MaxReturn 7.05919 +__unnamed_task__/MinReturn -34.9563 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.72618 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.52125 +policy/KL 0.01163 +policy/KLBefore 0 +policy/LossAfter -0.0262061 +policy/LossBefore -0.00956942 +policy/dLoss 0.0166367 +---------------------------------------- ------------ +2025-04-03 06:58:26 | [rl2_trainer] epoch #322 | Optimizing policy... +2025-04-03 06:58:26 | [rl2_trainer] epoch #322 | Fitting baseline... +2025-04-03 06:58:26 | [rl2_trainer] epoch #322 | Computing loss before +2025-04-03 06:58:27 | [rl2_trainer] epoch #322 | Computing KL before +2025-04-03 06:58:27 | [rl2_trainer] epoch #322 | Optimizing +2025-04-03 06:59:03 | [rl2_trainer] epoch #322 | Computing KL after +2025-04-03 06:59:03 | [rl2_trainer] epoch #322 | Computing loss after +2025-04-03 06:59:04 | [rl2_trainer] epoch #322 | Saving snapshot... +2025-04-03 06:59:04 | [rl2_trainer] epoch #322 | Saved +2025-04-03 06:59:04 | [rl2_trainer] epoch #322 | Time 61625.12 s +2025-04-03 06:59:04 | [rl2_trainer] epoch #322 | EpochTime 167.35 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.2243 +Average/AverageReturn -19.8898 +Average/Iteration 322 +Average/MaxReturn -8.37153 +Average/MinReturn -34.9309 +Average/NumEpisodes 100 +Average/StdReturn 4.59572 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.607513 +TotalEnvSteps 3.23e+06 +__unnamed_task__/AverageDiscountedReturn -13.2243 +__unnamed_task__/AverageReturn -19.8898 +__unnamed_task__/Iteration 322 +__unnamed_task__/MaxReturn -8.37153 +__unnamed_task__/MinReturn -34.9309 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.59572 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.51976 +policy/KL 0.013366 +policy/KLBefore 0 +policy/LossAfter -0.015066 +policy/LossBefore -0.000797681 +policy/dLoss 0.0142683 +---------------------------------------- ------------- +2025-04-03 07:03:01 | [rl2_trainer] epoch #323 | Optimizing policy... +2025-04-03 07:03:02 | [rl2_trainer] epoch #323 | Fitting baseline... +2025-04-03 07:03:02 | [rl2_trainer] epoch #323 | Computing loss before +2025-04-03 07:03:02 | [rl2_trainer] epoch #323 | Computing KL before +2025-04-03 07:03:03 | [rl2_trainer] epoch #323 | Optimizing +2025-04-03 07:03:40 | [rl2_trainer] epoch #323 | Computing KL after +2025-04-03 07:03:40 | [rl2_trainer] epoch #323 | Computing loss after +2025-04-03 07:03:41 | [rl2_trainer] epoch #323 | Saving snapshot... +2025-04-03 07:03:41 | [rl2_trainer] epoch #323 | Saved +2025-04-03 07:03:41 | [rl2_trainer] epoch #323 | Time 61902.37 s +2025-04-03 07:03:41 | [rl2_trainer] epoch #323 | EpochTime 277.25 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.2389 +Average/AverageReturn -25.8482 +Average/Iteration 323 +Average/MaxReturn -13.3558 +Average/MinReturn -56.1768 +Average/NumEpisodes 100 +Average/StdReturn 8.94234 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.832765 +TotalEnvSteps 3.24e+06 +__unnamed_task__/AverageDiscountedReturn -16.2389 +__unnamed_task__/AverageReturn -25.8482 +__unnamed_task__/Iteration 323 +__unnamed_task__/MaxReturn -13.3558 +__unnamed_task__/MinReturn -56.1768 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.94234 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.50577 +policy/KL 0.00896097 +policy/KLBefore 0 +policy/LossAfter -0.0102823 +policy/LossBefore 0.00950587 +policy/dLoss 0.0197882 +---------------------------------------- ------------ +2025-04-03 07:05:39 | [rl2_trainer] epoch #324 | Optimizing policy... +2025-04-03 07:05:40 | [rl2_trainer] epoch #324 | Fitting baseline... +2025-04-03 07:05:40 | [rl2_trainer] epoch #324 | Computing loss before +2025-04-03 07:05:40 | [rl2_trainer] epoch #324 | Computing KL before +2025-04-03 07:05:41 | [rl2_trainer] epoch #324 | Optimizing +2025-04-03 07:06:17 | [rl2_trainer] epoch #324 | Computing KL after +2025-04-03 07:06:17 | [rl2_trainer] epoch #324 | Computing loss after +2025-04-03 07:06:18 | [rl2_trainer] epoch #324 | Saving snapshot... +2025-04-03 07:06:18 | [rl2_trainer] epoch #324 | Saved +2025-04-03 07:06:18 | [rl2_trainer] epoch #324 | Time 62059.29 s +2025-04-03 07:06:18 | [rl2_trainer] epoch #324 | EpochTime 156.91 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.1536 +Average/AverageReturn -20.0153 +Average/Iteration 324 +Average/MaxReturn 24.2438 +Average/MinReturn -35.3668 +Average/NumEpisodes 100 +Average/StdReturn 6.85358 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.530151 +TotalEnvSteps 3.25e+06 +__unnamed_task__/AverageDiscountedReturn -13.1536 +__unnamed_task__/AverageReturn -20.0153 +__unnamed_task__/Iteration 324 +__unnamed_task__/MaxReturn 24.2438 +__unnamed_task__/MinReturn -35.3668 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.85358 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.48542 +policy/KL 0.0141361 +policy/KLBefore 0 +policy/LossAfter -0.0259538 +policy/LossBefore -0.00596214 +policy/dLoss 0.0199916 +---------------------------------------- ------------ +2025-04-03 07:09:34 | [rl2_trainer] epoch #325 | Optimizing policy... +2025-04-03 07:09:34 | [rl2_trainer] epoch #325 | Fitting baseline... +2025-04-03 07:09:34 | [rl2_trainer] epoch #325 | Computing loss before +2025-04-03 07:09:34 | [rl2_trainer] epoch #325 | Computing KL before +2025-04-03 07:09:35 | [rl2_trainer] epoch #325 | Optimizing +2025-04-03 07:10:12 | [rl2_trainer] epoch #325 | Computing KL after +2025-04-03 07:10:12 | [rl2_trainer] epoch #325 | Computing loss after +2025-04-03 07:10:13 | [rl2_trainer] epoch #325 | Saving snapshot... +2025-04-03 07:10:13 | [rl2_trainer] epoch #325 | Saved +2025-04-03 07:10:13 | [rl2_trainer] epoch #325 | Time 62294.02 s +2025-04-03 07:10:13 | [rl2_trainer] epoch #325 | EpochTime 234.72 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.6552 +Average/AverageReturn -26.2685 +Average/Iteration 325 +Average/MaxReturn -13.6338 +Average/MinReturn -46.026 +Average/NumEpisodes 100 +Average/StdReturn 8.26598 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.677669 +TotalEnvSteps 3.26e+06 +__unnamed_task__/AverageDiscountedReturn -16.6552 +__unnamed_task__/AverageReturn -26.2685 +__unnamed_task__/Iteration 325 +__unnamed_task__/MaxReturn -13.6338 +__unnamed_task__/MinReturn -46.026 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.26598 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.46279 +policy/KL 0.0145526 +policy/KLBefore 0 +policy/LossAfter -0.0339889 +policy/LossBefore 0.00152154 +policy/dLoss 0.0355105 +---------------------------------------- ------------ +2025-04-03 07:12:41 | [rl2_trainer] epoch #326 | Optimizing policy... +2025-04-03 07:12:41 | [rl2_trainer] epoch #326 | Fitting baseline... +2025-04-03 07:12:41 | [rl2_trainer] epoch #326 | Computing loss before +2025-04-03 07:12:41 | [rl2_trainer] epoch #326 | Computing KL before +2025-04-03 07:12:42 | [rl2_trainer] epoch #326 | Optimizing +2025-04-03 07:13:17 | [rl2_trainer] epoch #326 | Computing KL after +2025-04-03 07:13:18 | [rl2_trainer] epoch #326 | Computing loss after +2025-04-03 07:13:19 | [rl2_trainer] epoch #326 | Saving snapshot... +2025-04-03 07:13:19 | [rl2_trainer] epoch #326 | Saved +2025-04-03 07:13:19 | [rl2_trainer] epoch #326 | Time 62479.85 s +2025-04-03 07:13:19 | [rl2_trainer] epoch #326 | EpochTime 185.83 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.8175 +Average/AverageReturn -17.8434 +Average/Iteration 326 +Average/MaxReturn 2.64314 +Average/MinReturn -41.1271 +Average/NumEpisodes 100 +Average/StdReturn 6.17605 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.571613 +TotalEnvSteps 3.27e+06 +__unnamed_task__/AverageDiscountedReturn -11.8175 +__unnamed_task__/AverageReturn -17.8434 +__unnamed_task__/Iteration 326 +__unnamed_task__/MaxReturn 2.64314 +__unnamed_task__/MinReturn -41.1271 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.17605 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.43602 +policy/KL 0.0108643 +policy/KLBefore 0 +policy/LossAfter -0.0133238 +policy/LossBefore 0.000174516 +policy/dLoss 0.0134983 +---------------------------------------- ------------- +2025-04-03 07:16:07 | [rl2_trainer] epoch #327 | Optimizing policy... +2025-04-03 07:16:08 | [rl2_trainer] epoch #327 | Fitting baseline... +2025-04-03 07:16:08 | [rl2_trainer] epoch #327 | Computing loss before +2025-04-03 07:16:08 | [rl2_trainer] epoch #327 | Computing KL before +2025-04-03 07:16:09 | [rl2_trainer] epoch #327 | Optimizing +2025-04-03 07:16:45 | [rl2_trainer] epoch #327 | Computing KL after +2025-04-03 07:16:46 | [rl2_trainer] epoch #327 | Computing loss after +2025-04-03 07:16:47 | [rl2_trainer] epoch #327 | Saving snapshot... +2025-04-03 07:16:47 | [rl2_trainer] epoch #327 | Saved +2025-04-03 07:16:47 | [rl2_trainer] epoch #327 | Time 62687.73 s +2025-04-03 07:16:47 | [rl2_trainer] epoch #327 | EpochTime 207.88 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.2848 +Average/AverageReturn -22.5208 +Average/Iteration 327 +Average/MaxReturn 6.4208 +Average/MinReturn -55.9829 +Average/NumEpisodes 100 +Average/StdReturn 11.758 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.873861 +TotalEnvSteps 3.28e+06 +__unnamed_task__/AverageDiscountedReturn -14.2848 +__unnamed_task__/AverageReturn -22.5208 +__unnamed_task__/Iteration 327 +__unnamed_task__/MaxReturn 6.4208 +__unnamed_task__/MinReturn -55.9829 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.758 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.42686 +policy/KL 0.0170574 +policy/KLBefore 0 +policy/LossAfter -0.0304636 +policy/LossBefore 0.00179663 +policy/dLoss 0.0322602 +---------------------------------------- ------------ +2025-04-03 07:19:05 | [rl2_trainer] epoch #328 | Optimizing policy... +2025-04-03 07:19:05 | [rl2_trainer] epoch #328 | Fitting baseline... +2025-04-03 07:19:05 | [rl2_trainer] epoch #328 | Computing loss before +2025-04-03 07:19:06 | [rl2_trainer] epoch #328 | Computing KL before +2025-04-03 07:19:07 | [rl2_trainer] epoch #328 | Optimizing +2025-04-03 07:19:42 | [rl2_trainer] epoch #328 | Computing KL after +2025-04-03 07:19:43 | [rl2_trainer] epoch #328 | Computing loss after +2025-04-03 07:19:44 | [rl2_trainer] epoch #328 | Saving snapshot... +2025-04-03 07:19:44 | [rl2_trainer] epoch #328 | Saved +2025-04-03 07:19:44 | [rl2_trainer] epoch #328 | Time 62864.74 s +2025-04-03 07:19:44 | [rl2_trainer] epoch #328 | EpochTime 177.00 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.4255 +Average/AverageReturn -19.2771 +Average/Iteration 328 +Average/MaxReturn -0.475886 +Average/MinReturn -28.6361 +Average/NumEpisodes 100 +Average/StdReturn 4.36519 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.442277 +TotalEnvSteps 3.29e+06 +__unnamed_task__/AverageDiscountedReturn -12.4255 +__unnamed_task__/AverageReturn -19.2771 +__unnamed_task__/Iteration 328 +__unnamed_task__/MaxReturn -0.475886 +__unnamed_task__/MinReturn -28.6361 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.36519 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.42284 +policy/KL 0.0124743 +policy/KLBefore 0 +policy/LossAfter -0.0173043 +policy/LossBefore 0.000668765 +policy/dLoss 0.017973 +---------------------------------------- ------------- +2025-04-03 07:22:38 | [rl2_trainer] epoch #329 | Optimizing policy... +2025-04-03 07:22:38 | [rl2_trainer] epoch #329 | Fitting baseline... +2025-04-03 07:22:38 | [rl2_trainer] epoch #329 | Computing loss before +2025-04-03 07:22:39 | [rl2_trainer] epoch #329 | Computing KL before +2025-04-03 07:22:40 | [rl2_trainer] epoch #329 | Optimizing +2025-04-03 07:23:16 | [rl2_trainer] epoch #329 | Computing KL after +2025-04-03 07:23:16 | [rl2_trainer] epoch #329 | Computing loss after +2025-04-03 07:23:17 | [rl2_trainer] epoch #329 | Saving snapshot... +2025-04-03 07:23:17 | [rl2_trainer] epoch #329 | Saved +2025-04-03 07:23:17 | [rl2_trainer] epoch #329 | Time 63078.11 s +2025-04-03 07:23:17 | [rl2_trainer] epoch #329 | EpochTime 213.38 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.4878 +Average/AverageReturn -22.486 +Average/Iteration 329 +Average/MaxReturn 3.14345 +Average/MinReturn -53.2212 +Average/NumEpisodes 100 +Average/StdReturn 13.2435 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.894282 +TotalEnvSteps 3.3e+06 +__unnamed_task__/AverageDiscountedReturn -14.4878 +__unnamed_task__/AverageReturn -22.486 +__unnamed_task__/Iteration 329 +__unnamed_task__/MaxReturn 3.14345 +__unnamed_task__/MinReturn -53.2212 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.2435 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.40694 +policy/KL 0.0144546 +policy/KLBefore 0 +policy/LossAfter -0.0410819 +policy/LossBefore -0.0061281 +policy/dLoss 0.0349538 +---------------------------------------- ----------- +2025-04-03 07:25:15 | [rl2_trainer] epoch #330 | Optimizing policy... +2025-04-03 07:25:16 | [rl2_trainer] epoch #330 | Fitting baseline... +2025-04-03 07:25:16 | [rl2_trainer] epoch #330 | Computing loss before +2025-04-03 07:25:16 | [rl2_trainer] epoch #330 | Computing KL before +2025-04-03 07:25:17 | [rl2_trainer] epoch #330 | Optimizing +2025-04-03 07:25:53 | [rl2_trainer] epoch #330 | Computing KL after +2025-04-03 07:25:53 | [rl2_trainer] epoch #330 | Computing loss after +2025-04-03 07:25:54 | [rl2_trainer] epoch #330 | Saving snapshot... +2025-04-03 07:25:54 | [rl2_trainer] epoch #330 | Saved +2025-04-03 07:25:54 | [rl2_trainer] epoch #330 | Time 63235.28 s +2025-04-03 07:25:54 | [rl2_trainer] epoch #330 | EpochTime 157.17 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.0011 +Average/AverageReturn -19.5497 +Average/Iteration 330 +Average/MaxReturn 9.8085 +Average/MinReturn -42.8158 +Average/NumEpisodes 100 +Average/StdReturn 6.18977 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.554811 +TotalEnvSteps 3.31e+06 +__unnamed_task__/AverageDiscountedReturn -13.0011 +__unnamed_task__/AverageReturn -19.5497 +__unnamed_task__/Iteration 330 +__unnamed_task__/MaxReturn 9.8085 +__unnamed_task__/MinReturn -42.8158 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.18977 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.40496 +policy/KL 0.0122243 +policy/KLBefore 0 +policy/LossAfter -0.0293152 +policy/LossBefore -0.00658745 +policy/dLoss 0.0227277 +---------------------------------------- ------------ +2025-04-03 07:28:07 | [rl2_trainer] epoch #331 | Optimizing policy... +2025-04-03 07:28:07 | [rl2_trainer] epoch #331 | Fitting baseline... +2025-04-03 07:28:07 | [rl2_trainer] epoch #331 | Computing loss before +2025-04-03 07:28:08 | [rl2_trainer] epoch #331 | Computing KL before +2025-04-03 07:28:08 | [rl2_trainer] epoch #331 | Optimizing +2025-04-03 07:28:45 | [rl2_trainer] epoch #331 | Computing KL after +2025-04-03 07:28:45 | [rl2_trainer] epoch #331 | Computing loss after +2025-04-03 07:28:46 | [rl2_trainer] epoch #331 | Saving snapshot... +2025-04-03 07:28:46 | [rl2_trainer] epoch #331 | Saved +2025-04-03 07:28:46 | [rl2_trainer] epoch #331 | Time 63407.18 s +2025-04-03 07:28:46 | [rl2_trainer] epoch #331 | EpochTime 171.89 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.918 +Average/AverageReturn -15.9197 +Average/Iteration 331 +Average/MaxReturn 28.7653 +Average/MinReturn -60.1436 +Average/NumEpisodes 100 +Average/StdReturn 10.7122 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.679253 +TotalEnvSteps 3.32e+06 +__unnamed_task__/AverageDiscountedReturn -10.918 +__unnamed_task__/AverageReturn -15.9197 +__unnamed_task__/Iteration 331 +__unnamed_task__/MaxReturn 28.7653 +__unnamed_task__/MinReturn -60.1436 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.7122 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.39232 +policy/KL 0.0147797 +policy/KLBefore 0 +policy/LossAfter -0.0432352 +policy/LossBefore -0.00616597 +policy/dLoss 0.0370692 +---------------------------------------- ------------ +2025-04-03 07:30:13 | [rl2_trainer] epoch #332 | Optimizing policy... +2025-04-03 07:30:13 | [rl2_trainer] epoch #332 | Fitting baseline... +2025-04-03 07:30:13 | [rl2_trainer] epoch #332 | Computing loss before +2025-04-03 07:30:14 | [rl2_trainer] epoch #332 | Computing KL before +2025-04-03 07:30:15 | [rl2_trainer] epoch #332 | Optimizing +2025-04-03 07:30:49 | [rl2_trainer] epoch #332 | Computing KL after +2025-04-03 07:30:50 | [rl2_trainer] epoch #332 | Computing loss after +2025-04-03 07:30:51 | [rl2_trainer] epoch #332 | Saving snapshot... +2025-04-03 07:30:51 | [rl2_trainer] epoch #332 | Saved +2025-04-03 07:30:51 | [rl2_trainer] epoch #332 | Time 63531.97 s +2025-04-03 07:30:51 | [rl2_trainer] epoch #332 | EpochTime 124.79 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.4898 +Average/AverageReturn -15.2877 +Average/Iteration 332 +Average/MaxReturn 7.7969 +Average/MinReturn -59.375 +Average/NumEpisodes 100 +Average/StdReturn 8.45892 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.603896 +TotalEnvSteps 3.33e+06 +__unnamed_task__/AverageDiscountedReturn -10.4898 +__unnamed_task__/AverageReturn -15.2877 +__unnamed_task__/Iteration 332 +__unnamed_task__/MaxReturn 7.7969 +__unnamed_task__/MinReturn -59.375 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.45892 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.36744 +policy/KL 0.0136003 +policy/KLBefore 0 +policy/LossAfter -0.0254091 +policy/LossBefore 0.00183932 +policy/dLoss 0.0272485 +---------------------------------------- ------------ +2025-04-03 07:34:41 | [rl2_trainer] epoch #333 | Optimizing policy... +2025-04-03 07:34:42 | [rl2_trainer] epoch #333 | Fitting baseline... +2025-04-03 07:34:42 | [rl2_trainer] epoch #333 | Computing loss before +2025-04-03 07:34:42 | [rl2_trainer] epoch #333 | Computing KL before +2025-04-03 07:34:43 | [rl2_trainer] epoch #333 | Optimizing +2025-04-03 07:35:19 | [rl2_trainer] epoch #333 | Computing KL after +2025-04-03 07:35:20 | [rl2_trainer] epoch #333 | Computing loss after +2025-04-03 07:35:21 | [rl2_trainer] epoch #333 | Saving snapshot... +2025-04-03 07:35:21 | [rl2_trainer] epoch #333 | Saved +2025-04-03 07:35:21 | [rl2_trainer] epoch #333 | Time 63801.88 s +2025-04-03 07:35:21 | [rl2_trainer] epoch #333 | EpochTime 269.90 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2641 +Average/AverageReturn -24.2484 +Average/Iteration 333 +Average/MaxReturn -11.3974 +Average/MinReturn -50.7482 +Average/NumEpisodes 100 +Average/StdReturn 8.93411 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.864511 +TotalEnvSteps 3.34e+06 +__unnamed_task__/AverageDiscountedReturn -15.2641 +__unnamed_task__/AverageReturn -24.2484 +__unnamed_task__/Iteration 333 +__unnamed_task__/MaxReturn -11.3974 +__unnamed_task__/MinReturn -50.7482 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.93411 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.33887 +policy/KL 0.0135351 +policy/KLBefore 0 +policy/LossAfter -0.0134436 +policy/LossBefore 0.000541792 +policy/dLoss 0.0139854 +---------------------------------------- ------------- +2025-04-03 07:37:45 | [rl2_trainer] epoch #334 | Optimizing policy... +2025-04-03 07:37:45 | [rl2_trainer] epoch #334 | Fitting baseline... +2025-04-03 07:37:45 | [rl2_trainer] epoch #334 | Computing loss before +2025-04-03 07:37:45 | [rl2_trainer] epoch #334 | Computing KL before +2025-04-03 07:37:46 | [rl2_trainer] epoch #334 | Optimizing +2025-04-03 07:38:23 | [rl2_trainer] epoch #334 | Computing KL after +2025-04-03 07:38:23 | [rl2_trainer] epoch #334 | Computing loss after +2025-04-03 07:38:24 | [rl2_trainer] epoch #334 | Saving snapshot... +2025-04-03 07:38:24 | [rl2_trainer] epoch #334 | Saved +2025-04-03 07:38:24 | [rl2_trainer] epoch #334 | Time 63985.30 s +2025-04-03 07:38:24 | [rl2_trainer] epoch #334 | EpochTime 183.41 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.0409 +Average/AverageReturn -23.8316 +Average/Iteration 334 +Average/MaxReturn -6.57067 +Average/MinReturn -56.9656 +Average/NumEpisodes 100 +Average/StdReturn 8.27279 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.721016 +TotalEnvSteps 3.35e+06 +__unnamed_task__/AverageDiscountedReturn -15.0409 +__unnamed_task__/AverageReturn -23.8316 +__unnamed_task__/Iteration 334 +__unnamed_task__/MaxReturn -6.57067 +__unnamed_task__/MinReturn -56.9656 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.27279 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.32434 +policy/KL 0.0114294 +policy/KLBefore 0 +policy/LossAfter -0.0299382 +policy/LossBefore -0.000785925 +policy/dLoss 0.0291523 +---------------------------------------- ------------- +2025-04-03 07:40:52 | [rl2_trainer] epoch #335 | Optimizing policy... +2025-04-03 07:40:52 | [rl2_trainer] epoch #335 | Fitting baseline... +2025-04-03 07:40:52 | [rl2_trainer] epoch #335 | Computing loss before +2025-04-03 07:40:53 | [rl2_trainer] epoch #335 | Computing KL before +2025-04-03 07:40:53 | [rl2_trainer] epoch #335 | Optimizing +2025-04-03 07:41:29 | [rl2_trainer] epoch #335 | Computing KL after +2025-04-03 07:41:30 | [rl2_trainer] epoch #335 | Computing loss after +2025-04-03 07:41:31 | [rl2_trainer] epoch #335 | Saving snapshot... +2025-04-03 07:41:31 | [rl2_trainer] epoch #335 | Saved +2025-04-03 07:41:31 | [rl2_trainer] epoch #335 | Time 64171.62 s +2025-04-03 07:41:31 | [rl2_trainer] epoch #335 | EpochTime 186.32 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.07688 +Average/AverageReturn -13.5124 +Average/Iteration 335 +Average/MaxReturn 10.1921 +Average/MinReturn -23.5716 +Average/NumEpisodes 100 +Average/StdReturn 7.08953 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.521599 +TotalEnvSteps 3.36e+06 +__unnamed_task__/AverageDiscountedReturn -9.07688 +__unnamed_task__/AverageReturn -13.5124 +__unnamed_task__/Iteration 335 +__unnamed_task__/MaxReturn 10.1921 +__unnamed_task__/MinReturn -23.5716 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.08953 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.28937 +policy/KL 0.0175429 +policy/KLBefore 0 +policy/LossAfter -0.00300879 +policy/LossBefore 0.00621317 +policy/dLoss 0.00922196 +---------------------------------------- ------------ +2025-04-03 07:44:33 | [rl2_trainer] epoch #336 | Optimizing policy... +2025-04-03 07:44:33 | [rl2_trainer] epoch #336 | Fitting baseline... +2025-04-03 07:44:33 | [rl2_trainer] epoch #336 | Computing loss before +2025-04-03 07:44:34 | [rl2_trainer] epoch #336 | Computing KL before +2025-04-03 07:44:34 | [rl2_trainer] epoch #336 | Optimizing +2025-04-03 07:45:10 | [rl2_trainer] epoch #336 | Computing KL after +2025-04-03 07:45:10 | [rl2_trainer] epoch #336 | Computing loss after +2025-04-03 07:45:11 | [rl2_trainer] epoch #336 | Saving snapshot... +2025-04-03 07:45:11 | [rl2_trainer] epoch #336 | Saved +2025-04-03 07:45:11 | [rl2_trainer] epoch #336 | Time 64392.09 s +2025-04-03 07:45:11 | [rl2_trainer] epoch #336 | EpochTime 220.47 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.3577 +Average/AverageReturn -19.4775 +Average/Iteration 336 +Average/MaxReturn 6.30271 +Average/MinReturn -47.0904 +Average/NumEpisodes 100 +Average/StdReturn 11.3855 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.855003 +TotalEnvSteps 3.37e+06 +__unnamed_task__/AverageDiscountedReturn -12.3577 +__unnamed_task__/AverageReturn -19.4775 +__unnamed_task__/Iteration 336 +__unnamed_task__/MaxReturn 6.30271 +__unnamed_task__/MinReturn -47.0904 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.3855 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.25667 +policy/KL 0.0118552 +policy/KLBefore 0 +policy/LossAfter -0.0296028 +policy/LossBefore -0.00844706 +policy/dLoss 0.0211557 +---------------------------------------- ------------ +2025-04-03 07:48:21 | [rl2_trainer] epoch #337 | Optimizing policy... +2025-04-03 07:48:21 | [rl2_trainer] epoch #337 | Fitting baseline... +2025-04-03 07:48:21 | [rl2_trainer] epoch #337 | Computing loss before +2025-04-03 07:48:22 | [rl2_trainer] epoch #337 | Computing KL before +2025-04-03 07:48:23 | [rl2_trainer] epoch #337 | Optimizing +2025-04-03 07:48:57 | [rl2_trainer] epoch #337 | Computing KL after +2025-04-03 07:48:58 | [rl2_trainer] epoch #337 | Computing loss after +2025-04-03 07:48:59 | [rl2_trainer] epoch #337 | Saving snapshot... +2025-04-03 07:48:59 | [rl2_trainer] epoch #337 | Saved +2025-04-03 07:48:59 | [rl2_trainer] epoch #337 | Time 64619.61 s +2025-04-03 07:48:59 | [rl2_trainer] epoch #337 | EpochTime 227.51 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -14.8769 +Average/AverageReturn -23.5175 +Average/Iteration 337 +Average/MaxReturn 9.04447 +Average/MinReturn -52.5139 +Average/NumEpisodes 100 +Average/StdReturn 9.03443 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.764055 +TotalEnvSteps 3.38e+06 +__unnamed_task__/AverageDiscountedReturn -14.8769 +__unnamed_task__/AverageReturn -23.5175 +__unnamed_task__/Iteration 337 +__unnamed_task__/MaxReturn 9.04447 +__unnamed_task__/MinReturn -52.5139 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.03443 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.23327 +policy/KL 0.0131185 +policy/KLBefore 0 +policy/LossAfter -0.0362824 +policy/LossBefore -0.0133059 +policy/dLoss 0.0229766 +---------------------------------------- ----------- +2025-04-03 07:51:45 | [rl2_trainer] epoch #338 | Optimizing policy... +2025-04-03 07:51:46 | [rl2_trainer] epoch #338 | Fitting baseline... +2025-04-03 07:51:46 | [rl2_trainer] epoch #338 | Computing loss before +2025-04-03 07:51:46 | [rl2_trainer] epoch #338 | Computing KL before +2025-04-03 07:51:47 | [rl2_trainer] epoch #338 | Optimizing +2025-04-03 07:52:22 | [rl2_trainer] epoch #338 | Computing KL after +2025-04-03 07:52:23 | [rl2_trainer] epoch #338 | Computing loss after +2025-04-03 07:52:24 | [rl2_trainer] epoch #338 | Saving snapshot... +2025-04-03 07:52:24 | [rl2_trainer] epoch #338 | Saved +2025-04-03 07:52:24 | [rl2_trainer] epoch #338 | Time 64824.96 s +2025-04-03 07:52:24 | [rl2_trainer] epoch #338 | EpochTime 205.35 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.8733 +Average/AverageReturn -21.8764 +Average/Iteration 338 +Average/MaxReturn 6.29952 +Average/MinReturn -53.2766 +Average/NumEpisodes 100 +Average/StdReturn 13.1301 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.880649 +TotalEnvSteps 3.39e+06 +__unnamed_task__/AverageDiscountedReturn -13.8733 +__unnamed_task__/AverageReturn -21.8764 +__unnamed_task__/Iteration 338 +__unnamed_task__/MaxReturn 6.29952 +__unnamed_task__/MinReturn -53.2766 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1301 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.19292 +policy/KL 0.0158232 +policy/KLBefore 0 +policy/LossAfter -0.0277968 +policy/LossBefore 0.00460162 +policy/dLoss 0.0323984 +---------------------------------------- ------------ +2025-04-03 07:54:48 | [rl2_trainer] epoch #339 | Optimizing policy... +2025-04-03 07:54:48 | [rl2_trainer] epoch #339 | Fitting baseline... +2025-04-03 07:54:48 | [rl2_trainer] epoch #339 | Computing loss before +2025-04-03 07:54:49 | [rl2_trainer] epoch #339 | Computing KL before +2025-04-03 07:54:49 | [rl2_trainer] epoch #339 | Optimizing +2025-04-03 07:55:24 | [rl2_trainer] epoch #339 | Computing KL after +2025-04-03 07:55:25 | [rl2_trainer] epoch #339 | Computing loss after +2025-04-03 07:55:25 | [rl2_trainer] epoch #339 | Saving snapshot... +2025-04-03 07:55:25 | [rl2_trainer] epoch #339 | Saved +2025-04-03 07:55:25 | [rl2_trainer] epoch #339 | Time 65006.41 s +2025-04-03 07:55:25 | [rl2_trainer] epoch #339 | EpochTime 181.44 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.4246 +Average/AverageReturn -24.4659 +Average/Iteration 339 +Average/MaxReturn 6.61796 +Average/MinReturn -59.2023 +Average/NumEpisodes 100 +Average/StdReturn 10.4641 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.784327 +TotalEnvSteps 3.4e+06 +__unnamed_task__/AverageDiscountedReturn -15.4246 +__unnamed_task__/AverageReturn -24.4659 +__unnamed_task__/Iteration 339 +__unnamed_task__/MaxReturn 6.61796 +__unnamed_task__/MinReturn -59.2023 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.4641 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.14772 +policy/KL 0.01194 +policy/KLBefore 0 +policy/LossAfter -0.0193497 +policy/LossBefore -0.00083219 +policy/dLoss 0.0185175 +---------------------------------------- ------------ +2025-04-03 07:57:22 | [rl2_trainer] epoch #340 | Optimizing policy... +2025-04-03 07:57:22 | [rl2_trainer] epoch #340 | Fitting baseline... +2025-04-03 07:57:22 | [rl2_trainer] epoch #340 | Computing loss before +2025-04-03 07:57:23 | [rl2_trainer] epoch #340 | Computing KL before +2025-04-03 07:57:23 | [rl2_trainer] epoch #340 | Optimizing +2025-04-03 07:58:00 | [rl2_trainer] epoch #340 | Computing KL after +2025-04-03 07:58:01 | [rl2_trainer] epoch #340 | Computing loss after +2025-04-03 07:58:02 | [rl2_trainer] epoch #340 | Saving snapshot... +2025-04-03 07:58:02 | [rl2_trainer] epoch #340 | Saved +2025-04-03 07:58:02 | [rl2_trainer] epoch #340 | Time 65162.78 s +2025-04-03 07:58:02 | [rl2_trainer] epoch #340 | EpochTime 156.37 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.6898 +Average/AverageReturn -20.8341 +Average/Iteration 340 +Average/MaxReturn -1.01815 +Average/MinReturn -52.5496 +Average/NumEpisodes 100 +Average/StdReturn 7.2321 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.736894 +TotalEnvSteps 3.41e+06 +__unnamed_task__/AverageDiscountedReturn -13.6898 +__unnamed_task__/AverageReturn -20.8341 +__unnamed_task__/Iteration 340 +__unnamed_task__/MaxReturn -1.01815 +__unnamed_task__/MinReturn -52.5496 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.2321 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.11459 +policy/KL 0.0105658 +policy/KLBefore 0 +policy/LossAfter -0.0185708 +policy/LossBefore -0.00123499 +policy/dLoss 0.0173358 +---------------------------------------- ------------ +2025-04-03 08:00:55 | [rl2_trainer] epoch #341 | Optimizing policy... +2025-04-03 08:00:56 | [rl2_trainer] epoch #341 | Fitting baseline... +2025-04-03 08:00:56 | [rl2_trainer] epoch #341 | Computing loss before +2025-04-03 08:00:56 | [rl2_trainer] epoch #341 | Computing KL before +2025-04-03 08:00:57 | [rl2_trainer] epoch #341 | Optimizing +2025-04-03 08:01:32 | [rl2_trainer] epoch #341 | Computing KL after +2025-04-03 08:01:33 | [rl2_trainer] epoch #341 | Computing loss after +2025-04-03 08:01:34 | [rl2_trainer] epoch #341 | Saving snapshot... +2025-04-03 08:01:34 | [rl2_trainer] epoch #341 | Saved +2025-04-03 08:01:34 | [rl2_trainer] epoch #341 | Time 65374.56 s +2025-04-03 08:01:34 | [rl2_trainer] epoch #341 | EpochTime 211.77 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.5807 +Average/AverageReturn -24.5613 +Average/Iteration 341 +Average/MaxReturn 17.1452 +Average/MinReturn -49.7972 +Average/NumEpisodes 100 +Average/StdReturn 10.8785 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.770452 +TotalEnvSteps 3.42e+06 +__unnamed_task__/AverageDiscountedReturn -15.5807 +__unnamed_task__/AverageReturn -24.5613 +__unnamed_task__/Iteration 341 +__unnamed_task__/MaxReturn 17.1452 +__unnamed_task__/MinReturn -49.7972 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.8785 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.07275 +policy/KL 0.0157511 +policy/KLBefore 0 +policy/LossAfter -0.0214146 +policy/LossBefore 0.00750662 +policy/dLoss 0.0289212 +---------------------------------------- ------------ +2025-04-03 08:04:28 | [rl2_trainer] epoch #342 | Optimizing policy... +2025-04-03 08:04:28 | [rl2_trainer] epoch #342 | Fitting baseline... +2025-04-03 08:04:28 | [rl2_trainer] epoch #342 | Computing loss before +2025-04-03 08:04:29 | [rl2_trainer] epoch #342 | Computing KL before +2025-04-03 08:04:29 | [rl2_trainer] epoch #342 | Optimizing +2025-04-03 08:05:04 | [rl2_trainer] epoch #342 | Computing KL after +2025-04-03 08:05:05 | [rl2_trainer] epoch #342 | Computing loss after +2025-04-03 08:05:06 | [rl2_trainer] epoch #342 | Saving snapshot... +2025-04-03 08:05:06 | [rl2_trainer] epoch #342 | Saved +2025-04-03 08:05:06 | [rl2_trainer] epoch #342 | Time 65586.99 s +2025-04-03 08:05:06 | [rl2_trainer] epoch #342 | EpochTime 212.43 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.1977 +Average/AverageReturn -24.1403 +Average/Iteration 342 +Average/MaxReturn 1.46303 +Average/MinReturn -61.2584 +Average/NumEpisodes 100 +Average/StdReturn 9.188 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.846976 +TotalEnvSteps 3.43e+06 +__unnamed_task__/AverageDiscountedReturn -15.1977 +__unnamed_task__/AverageReturn -24.1403 +__unnamed_task__/Iteration 342 +__unnamed_task__/MaxReturn 1.46303 +__unnamed_task__/MinReturn -61.2584 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.188 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.04469 +policy/KL 0.010527 +policy/KLBefore 0 +policy/LossAfter -0.0135131 +policy/LossBefore -0.000732803 +policy/dLoss 0.0127803 +---------------------------------------- ------------- +2025-04-03 08:07:16 | [rl2_trainer] epoch #343 | Optimizing policy... +2025-04-03 08:07:16 | [rl2_trainer] epoch #343 | Fitting baseline... +2025-04-03 08:07:16 | [rl2_trainer] epoch #343 | Computing loss before +2025-04-03 08:07:17 | [rl2_trainer] epoch #343 | Computing KL before +2025-04-03 08:07:18 | [rl2_trainer] epoch #343 | Optimizing +2025-04-03 08:07:52 | [rl2_trainer] epoch #343 | Computing KL after +2025-04-03 08:07:53 | [rl2_trainer] epoch #343 | Computing loss after +2025-04-03 08:07:54 | [rl2_trainer] epoch #343 | Saving snapshot... +2025-04-03 08:07:54 | [rl2_trainer] epoch #343 | Saved +2025-04-03 08:07:54 | [rl2_trainer] epoch #343 | Time 65754.78 s +2025-04-03 08:07:54 | [rl2_trainer] epoch #343 | EpochTime 167.79 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.1224 +Average/AverageReturn -17.9984 +Average/Iteration 343 +Average/MaxReturn 6.99453 +Average/MinReturn -31.4647 +Average/NumEpisodes 100 +Average/StdReturn 7.16026 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.705441 +TotalEnvSteps 3.44e+06 +__unnamed_task__/AverageDiscountedReturn -12.1224 +__unnamed_task__/AverageReturn -17.9984 +__unnamed_task__/Iteration 343 +__unnamed_task__/MaxReturn 6.99453 +__unnamed_task__/MinReturn -31.4647 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.16026 +__unnamed_task__/TerminationRate 0 +policy/Entropy 3.01742 +policy/KL 0.0106675 +policy/KLBefore 0 +policy/LossAfter -0.0147549 +policy/LossBefore -0.00130536 +policy/dLoss 0.0134496 +---------------------------------------- ------------ +2025-04-03 08:11:05 | [rl2_trainer] epoch #344 | Optimizing policy... +2025-04-03 08:11:05 | [rl2_trainer] epoch #344 | Fitting baseline... +2025-04-03 08:11:05 | [rl2_trainer] epoch #344 | Computing loss before +2025-04-03 08:11:06 | [rl2_trainer] epoch #344 | Computing KL before +2025-04-03 08:11:06 | [rl2_trainer] epoch #344 | Optimizing +2025-04-03 08:11:43 | [rl2_trainer] epoch #344 | Computing KL after +2025-04-03 08:11:43 | [rl2_trainer] epoch #344 | Computing loss after +2025-04-03 08:11:44 | [rl2_trainer] epoch #344 | Saving snapshot... +2025-04-03 08:11:44 | [rl2_trainer] epoch #344 | Saved +2025-04-03 08:11:44 | [rl2_trainer] epoch #344 | Time 65985.24 s +2025-04-03 08:11:44 | [rl2_trainer] epoch #344 | EpochTime 230.46 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.2349 +Average/AverageReturn -26.0143 +Average/Iteration 344 +Average/MaxReturn -0.226343 +Average/MinReturn -58.7011 +Average/NumEpisodes 100 +Average/StdReturn 10.9205 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.823939 +TotalEnvSteps 3.45e+06 +__unnamed_task__/AverageDiscountedReturn -16.2349 +__unnamed_task__/AverageReturn -26.0143 +__unnamed_task__/Iteration 344 +__unnamed_task__/MaxReturn -0.226343 +__unnamed_task__/MinReturn -58.7011 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.9205 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.99905 +policy/KL 0.013368 +policy/KLBefore 0 +policy/LossAfter -0.0300498 +policy/LossBefore 0.00147923 +policy/dLoss 0.0315291 +---------------------------------------- ------------ +2025-04-03 08:13:43 | [rl2_trainer] epoch #345 | Optimizing policy... +2025-04-03 08:13:43 | [rl2_trainer] epoch #345 | Fitting baseline... +2025-04-03 08:13:43 | [rl2_trainer] epoch #345 | Computing loss before +2025-04-03 08:13:44 | [rl2_trainer] epoch #345 | Computing KL before +2025-04-03 08:13:44 | [rl2_trainer] epoch #345 | Optimizing +2025-04-03 08:14:21 | [rl2_trainer] epoch #345 | Computing KL after +2025-04-03 08:14:22 | [rl2_trainer] epoch #345 | Computing loss after +2025-04-03 08:14:23 | [rl2_trainer] epoch #345 | Saving snapshot... +2025-04-03 08:14:23 | [rl2_trainer] epoch #345 | Saved +2025-04-03 08:14:23 | [rl2_trainer] epoch #345 | Time 66143.76 s +2025-04-03 08:14:23 | [rl2_trainer] epoch #345 | EpochTime 158.51 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.4271 +Average/AverageReturn -20.4247 +Average/Iteration 345 +Average/MaxReturn 5.04227 +Average/MinReturn -38.6449 +Average/NumEpisodes 100 +Average/StdReturn 6.22586 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.688578 +TotalEnvSteps 3.46e+06 +__unnamed_task__/AverageDiscountedReturn -13.4271 +__unnamed_task__/AverageReturn -20.4247 +__unnamed_task__/Iteration 345 +__unnamed_task__/MaxReturn 5.04227 +__unnamed_task__/MinReturn -38.6449 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.22586 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.97771 +policy/KL 0.0113254 +policy/KLBefore 0 +policy/LossAfter -0.00680392 +policy/LossBefore 0.0030472 +policy/dLoss 0.00985112 +---------------------------------------- ------------ +2025-04-03 08:16:40 | [rl2_trainer] epoch #346 | Optimizing policy... +2025-04-03 08:16:40 | [rl2_trainer] epoch #346 | Fitting baseline... +2025-04-03 08:16:40 | [rl2_trainer] epoch #346 | Computing loss before +2025-04-03 08:16:41 | [rl2_trainer] epoch #346 | Computing KL before +2025-04-03 08:16:41 | [rl2_trainer] epoch #346 | Optimizing +2025-04-03 08:17:15 | [rl2_trainer] epoch #346 | Computing KL after +2025-04-03 08:17:16 | [rl2_trainer] epoch #346 | Computing loss after +2025-04-03 08:17:17 | [rl2_trainer] epoch #346 | Saving snapshot... +2025-04-03 08:17:17 | [rl2_trainer] epoch #346 | Saved +2025-04-03 08:17:17 | [rl2_trainer] epoch #346 | Time 66317.55 s +2025-04-03 08:17:17 | [rl2_trainer] epoch #346 | EpochTime 173.79 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.1044 +Average/AverageReturn -16.7109 +Average/Iteration 346 +Average/MaxReturn 15.6382 +Average/MinReturn -26.9688 +Average/NumEpisodes 100 +Average/StdReturn 6.72407 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.478215 +TotalEnvSteps 3.47e+06 +__unnamed_task__/AverageDiscountedReturn -11.1044 +__unnamed_task__/AverageReturn -16.7109 +__unnamed_task__/Iteration 346 +__unnamed_task__/MaxReturn 15.6382 +__unnamed_task__/MinReturn -26.9688 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.72407 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.96334 +policy/KL 0.0092783 +policy/KLBefore 0 +policy/LossAfter -0.0358574 +policy/LossBefore -0.00793542 +policy/dLoss 0.027922 +---------------------------------------- ------------ +2025-04-03 08:19:16 | [rl2_trainer] epoch #347 | Optimizing policy... +2025-04-03 08:19:16 | [rl2_trainer] epoch #347 | Fitting baseline... +2025-04-03 08:19:16 | [rl2_trainer] epoch #347 | Computing loss before +2025-04-03 08:19:17 | [rl2_trainer] epoch #347 | Computing KL before +2025-04-03 08:19:18 | [rl2_trainer] epoch #347 | Optimizing +2025-04-03 08:19:53 | [rl2_trainer] epoch #347 | Computing KL after +2025-04-03 08:19:53 | [rl2_trainer] epoch #347 | Computing loss after +2025-04-03 08:19:54 | [rl2_trainer] epoch #347 | Saving snapshot... +2025-04-03 08:19:54 | [rl2_trainer] epoch #347 | Saved +2025-04-03 08:19:54 | [rl2_trainer] epoch #347 | Time 66475.25 s +2025-04-03 08:19:54 | [rl2_trainer] epoch #347 | EpochTime 157.70 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.9282 +Average/AverageReturn -19.5351 +Average/Iteration 347 +Average/MaxReturn -0.0725934 +Average/MinReturn -42.8864 +Average/NumEpisodes 100 +Average/StdReturn 6.90295 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.60177 +TotalEnvSteps 3.48e+06 +__unnamed_task__/AverageDiscountedReturn -12.9282 +__unnamed_task__/AverageReturn -19.5351 +__unnamed_task__/Iteration 347 +__unnamed_task__/MaxReturn -0.0725934 +__unnamed_task__/MinReturn -42.8864 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.90295 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.94457 +policy/KL 0.0122189 +policy/KLBefore 0 +policy/LossAfter -0.016141 +policy/LossBefore -0.00275099 +policy/dLoss 0.01339 +---------------------------------------- ------------ +2025-04-03 08:22:44 | [rl2_trainer] epoch #348 | Optimizing policy... +2025-04-03 08:22:45 | [rl2_trainer] epoch #348 | Fitting baseline... +2025-04-03 08:22:45 | [rl2_trainer] epoch #348 | Computing loss before +2025-04-03 08:22:45 | [rl2_trainer] epoch #348 | Computing KL before +2025-04-03 08:22:46 | [rl2_trainer] epoch #348 | Optimizing +2025-04-03 08:23:21 | [rl2_trainer] epoch #348 | Computing KL after +2025-04-03 08:23:21 | [rl2_trainer] epoch #348 | Computing loss after +2025-04-03 08:23:22 | [rl2_trainer] epoch #348 | Saving snapshot... +2025-04-03 08:23:22 | [rl2_trainer] epoch #348 | Saved +2025-04-03 08:23:22 | [rl2_trainer] epoch #348 | Time 66683.17 s +2025-04-03 08:23:22 | [rl2_trainer] epoch #348 | EpochTime 207.92 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -14.6135 +Average/AverageReturn -22.8027 +Average/Iteration 348 +Average/MaxReturn 1.40617 +Average/MinReturn -109.347 +Average/NumEpisodes 100 +Average/StdReturn 15.1103 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.720212 +TotalEnvSteps 3.49e+06 +__unnamed_task__/AverageDiscountedReturn -14.6135 +__unnamed_task__/AverageReturn -22.8027 +__unnamed_task__/Iteration 348 +__unnamed_task__/MaxReturn 1.40617 +__unnamed_task__/MinReturn -109.347 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 15.1103 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.90941 +policy/KL 0.0170743 +policy/KLBefore 0 +policy/LossAfter -0.044609 +policy/LossBefore 0.00789438 +policy/dLoss 0.0525034 +---------------------------------------- ------------- +2025-04-03 08:25:49 | [rl2_trainer] epoch #349 | Optimizing policy... +2025-04-03 08:25:50 | [rl2_trainer] epoch #349 | Fitting baseline... +2025-04-03 08:25:50 | [rl2_trainer] epoch #349 | Computing loss before +2025-04-03 08:25:50 | [rl2_trainer] epoch #349 | Computing KL before +2025-04-03 08:25:51 | [rl2_trainer] epoch #349 | Optimizing +2025-04-03 08:26:26 | [rl2_trainer] epoch #349 | Computing KL after +2025-04-03 08:26:26 | [rl2_trainer] epoch #349 | Computing loss after +2025-04-03 08:26:27 | [rl2_trainer] epoch #349 | Saving snapshot... +2025-04-03 08:26:27 | [rl2_trainer] epoch #349 | Saved +2025-04-03 08:26:27 | [rl2_trainer] epoch #349 | Time 66868.18 s +2025-04-03 08:26:27 | [rl2_trainer] epoch #349 | EpochTime 185.00 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -9.43192 +Average/AverageReturn -13.8272 +Average/Iteration 349 +Average/MaxReturn 12.5426 +Average/MinReturn -25.4256 +Average/NumEpisodes 100 +Average/StdReturn 8.29461 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.594822 +TotalEnvSteps 3.5e+06 +__unnamed_task__/AverageDiscountedReturn -9.43192 +__unnamed_task__/AverageReturn -13.8272 +__unnamed_task__/Iteration 349 +__unnamed_task__/MaxReturn 12.5426 +__unnamed_task__/MinReturn -25.4256 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.29461 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.86593 +policy/KL 0.0156763 +policy/KLBefore 0 +policy/LossAfter -0.0230002 +policy/LossBefore 0.000813357 +policy/dLoss 0.0238135 +---------------------------------------- ------------- +2025-04-03 08:29:23 | [rl2_trainer] epoch #350 | Optimizing policy... +2025-04-03 08:29:23 | [rl2_trainer] epoch #350 | Fitting baseline... +2025-04-03 08:29:23 | [rl2_trainer] epoch #350 | Computing loss before +2025-04-03 08:29:24 | [rl2_trainer] epoch #350 | Computing KL before +2025-04-03 08:29:25 | [rl2_trainer] epoch #350 | Optimizing +2025-04-03 08:30:01 | [rl2_trainer] epoch #350 | Computing KL after +2025-04-03 08:30:02 | [rl2_trainer] epoch #350 | Computing loss after +2025-04-03 08:30:03 | [rl2_trainer] epoch #350 | Saving snapshot... +2025-04-03 08:30:03 | [rl2_trainer] epoch #350 | Saved +2025-04-03 08:30:03 | [rl2_trainer] epoch #350 | Time 67083.92 s +2025-04-03 08:30:03 | [rl2_trainer] epoch #350 | EpochTime 215.74 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.1716 +Average/AverageReturn -21.852 +Average/Iteration 350 +Average/MaxReturn 18.0754 +Average/MinReturn -50.5464 +Average/NumEpisodes 100 +Average/StdReturn 12.8073 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.830232 +TotalEnvSteps 3.51e+06 +__unnamed_task__/AverageDiscountedReturn -14.1716 +__unnamed_task__/AverageReturn -21.852 +__unnamed_task__/Iteration 350 +__unnamed_task__/MaxReturn 18.0754 +__unnamed_task__/MinReturn -50.5464 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.8073 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.83661 +policy/KL 0.0142912 +policy/KLBefore 0 +policy/LossAfter -0.0293297 +policy/LossBefore 0.00224423 +policy/dLoss 0.031574 +---------------------------------------- ------------ +2025-04-03 08:34:08 | [rl2_trainer] epoch #351 | Optimizing policy... +2025-04-03 08:34:08 | [rl2_trainer] epoch #351 | Fitting baseline... +2025-04-03 08:34:08 | [rl2_trainer] epoch #351 | Computing loss before +2025-04-03 08:34:09 | [rl2_trainer] epoch #351 | Computing KL before +2025-04-03 08:34:09 | [rl2_trainer] epoch #351 | Optimizing +2025-04-03 08:34:44 | [rl2_trainer] epoch #351 | Computing KL after +2025-04-03 08:34:45 | [rl2_trainer] epoch #351 | Computing loss after +2025-04-03 08:34:46 | [rl2_trainer] epoch #351 | Saving snapshot... +2025-04-03 08:34:46 | [rl2_trainer] epoch #351 | Saved +2025-04-03 08:34:46 | [rl2_trainer] epoch #351 | Time 67366.55 s +2025-04-03 08:34:46 | [rl2_trainer] epoch #351 | EpochTime 282.62 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -17.2327 +Average/AverageReturn -27.1621 +Average/Iteration 351 +Average/MaxReturn -10.8496 +Average/MinReturn -63.4702 +Average/NumEpisodes 100 +Average/StdReturn 10.1354 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.800084 +TotalEnvSteps 3.52e+06 +__unnamed_task__/AverageDiscountedReturn -17.2327 +__unnamed_task__/AverageReturn -27.1621 +__unnamed_task__/Iteration 351 +__unnamed_task__/MaxReturn -10.8496 +__unnamed_task__/MinReturn -63.4702 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.1354 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.80281 +policy/KL 0.0165474 +policy/KLBefore 0 +policy/LossAfter -0.037162 +policy/LossBefore -0.0111369 +policy/dLoss 0.0260251 +---------------------------------------- ----------- +2025-04-03 08:37:20 | [rl2_trainer] epoch #352 | Optimizing policy... +2025-04-03 08:37:20 | [rl2_trainer] epoch #352 | Fitting baseline... +2025-04-03 08:37:20 | [rl2_trainer] epoch #352 | Computing loss before +2025-04-03 08:37:20 | [rl2_trainer] epoch #352 | Computing KL before +2025-04-03 08:37:21 | [rl2_trainer] epoch #352 | Optimizing +2025-04-03 08:37:57 | [rl2_trainer] epoch #352 | Computing KL after +2025-04-03 08:37:57 | [rl2_trainer] epoch #352 | Computing loss after +2025-04-03 08:37:58 | [rl2_trainer] epoch #352 | Saving snapshot... +2025-04-03 08:37:58 | [rl2_trainer] epoch #352 | Saved +2025-04-03 08:37:58 | [rl2_trainer] epoch #352 | Time 67559.28 s +2025-04-03 08:37:58 | [rl2_trainer] epoch #352 | EpochTime 192.73 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -16.693 +Average/AverageReturn -26.3011 +Average/Iteration 352 +Average/MaxReturn -13.2106 +Average/MinReturn -55.5715 +Average/NumEpisodes 100 +Average/StdReturn 8.95852 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.763499 +TotalEnvSteps 3.53e+06 +__unnamed_task__/AverageDiscountedReturn -16.693 +__unnamed_task__/AverageReturn -26.3011 +__unnamed_task__/Iteration 352 +__unnamed_task__/MaxReturn -13.2106 +__unnamed_task__/MinReturn -55.5715 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.95852 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.78063 +policy/KL 0.0138015 +policy/KLBefore 0 +policy/LossAfter -0.0230218 +policy/LossBefore 0.0023777 +policy/dLoss 0.0253995 +---------------------------------------- ----------- +2025-04-03 08:40:59 | [rl2_trainer] epoch #353 | Optimizing policy... +2025-04-03 08:40:59 | [rl2_trainer] epoch #353 | Fitting baseline... +2025-04-03 08:40:59 | [rl2_trainer] epoch #353 | Computing loss before +2025-04-03 08:41:00 | [rl2_trainer] epoch #353 | Computing KL before +2025-04-03 08:41:00 | [rl2_trainer] epoch #353 | Optimizing +2025-04-03 08:41:36 | [rl2_trainer] epoch #353 | Computing KL after +2025-04-03 08:41:37 | [rl2_trainer] epoch #353 | Computing loss after +2025-04-03 08:41:38 | [rl2_trainer] epoch #353 | Saving snapshot... +2025-04-03 08:41:38 | [rl2_trainer] epoch #353 | Saved +2025-04-03 08:41:38 | [rl2_trainer] epoch #353 | Time 67778.77 s +2025-04-03 08:41:38 | [rl2_trainer] epoch #353 | EpochTime 219.49 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.2234 +Average/AverageReturn -20.0299 +Average/Iteration 353 +Average/MaxReturn -8.2022 +Average/MinReturn -34.0852 +Average/NumEpisodes 100 +Average/StdReturn 4.75327 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.632946 +TotalEnvSteps 3.54e+06 +__unnamed_task__/AverageDiscountedReturn -13.2234 +__unnamed_task__/AverageReturn -20.0299 +__unnamed_task__/Iteration 353 +__unnamed_task__/MaxReturn -8.2022 +__unnamed_task__/MinReturn -34.0852 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.75327 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.76131 +policy/KL 0.00989511 +policy/KLBefore 0 +policy/LossAfter -0.00467195 +policy/LossBefore 0.00177578 +policy/dLoss 0.00644773 +---------------------------------------- ------------ +2025-04-03 08:43:39 | [rl2_trainer] epoch #354 | Optimizing policy... +2025-04-03 08:43:39 | [rl2_trainer] epoch #354 | Fitting baseline... +2025-04-03 08:43:39 | [rl2_trainer] epoch #354 | Computing loss before +2025-04-03 08:43:40 | [rl2_trainer] epoch #354 | Computing KL before +2025-04-03 08:43:41 | [rl2_trainer] epoch #354 | Optimizing +2025-04-03 08:44:17 | [rl2_trainer] epoch #354 | Computing KL after +2025-04-03 08:44:17 | [rl2_trainer] epoch #354 | Computing loss after +2025-04-03 08:44:18 | [rl2_trainer] epoch #354 | Saving snapshot... +2025-04-03 08:44:18 | [rl2_trainer] epoch #354 | Saved +2025-04-03 08:44:18 | [rl2_trainer] epoch #354 | Time 67939.30 s +2025-04-03 08:44:18 | [rl2_trainer] epoch #354 | EpochTime 160.53 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.0145 +Average/AverageReturn -18.0825 +Average/Iteration 354 +Average/MaxReturn 16.6469 +Average/MinReturn -35.2808 +Average/NumEpisodes 100 +Average/StdReturn 8.08219 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.56478 +TotalEnvSteps 3.55e+06 +__unnamed_task__/AverageDiscountedReturn -12.0145 +__unnamed_task__/AverageReturn -18.0825 +__unnamed_task__/Iteration 354 +__unnamed_task__/MaxReturn 16.6469 +__unnamed_task__/MinReturn -35.2808 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.08219 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.73482 +policy/KL 0.010997 +policy/KLBefore 0 +policy/LossAfter -0.0282148 +policy/LossBefore -0.00232139 +policy/dLoss 0.0258934 +---------------------------------------- ------------ +2025-04-03 08:48:11 | [rl2_trainer] epoch #355 | Optimizing policy... +2025-04-03 08:48:11 | [rl2_trainer] epoch #355 | Fitting baseline... +2025-04-03 08:48:11 | [rl2_trainer] epoch #355 | Computing loss before +2025-04-03 08:48:12 | [rl2_trainer] epoch #355 | Computing KL before +2025-04-03 08:48:13 | [rl2_trainer] epoch #355 | Optimizing +2025-04-03 08:48:48 | [rl2_trainer] epoch #355 | Computing KL after +2025-04-03 08:48:49 | [rl2_trainer] epoch #355 | Computing loss after +2025-04-03 08:48:50 | [rl2_trainer] epoch #355 | Saving snapshot... +2025-04-03 08:48:50 | [rl2_trainer] epoch #355 | Saved +2025-04-03 08:48:50 | [rl2_trainer] epoch #355 | Time 68210.68 s +2025-04-03 08:48:50 | [rl2_trainer] epoch #355 | EpochTime 271.38 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1432 +Average/AverageReturn -24.0392 +Average/Iteration 355 +Average/MaxReturn -4.17518 +Average/MinReturn -46.7309 +Average/NumEpisodes 100 +Average/StdReturn 8.52171 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.840084 +TotalEnvSteps 3.56e+06 +__unnamed_task__/AverageDiscountedReturn -15.1432 +__unnamed_task__/AverageReturn -24.0392 +__unnamed_task__/Iteration 355 +__unnamed_task__/MaxReturn -4.17518 +__unnamed_task__/MinReturn -46.7309 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.52171 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.71205 +policy/KL 0.0125761 +policy/KLBefore 0 +policy/LossAfter -0.0219872 +policy/LossBefore -0.00380499 +policy/dLoss 0.0181822 +---------------------------------------- ------------ +2025-04-03 08:51:18 | [rl2_trainer] epoch #356 | Optimizing policy... +2025-04-03 08:51:19 | [rl2_trainer] epoch #356 | Fitting baseline... +2025-04-03 08:51:19 | [rl2_trainer] epoch #356 | Computing loss before +2025-04-03 08:51:19 | [rl2_trainer] epoch #356 | Computing KL before +2025-04-03 08:51:20 | [rl2_trainer] epoch #356 | Optimizing +2025-04-03 08:51:55 | [rl2_trainer] epoch #356 | Computing KL after +2025-04-03 08:51:56 | [rl2_trainer] epoch #356 | Computing loss after +2025-04-03 08:51:57 | [rl2_trainer] epoch #356 | Saving snapshot... +2025-04-03 08:51:57 | [rl2_trainer] epoch #356 | Saved +2025-04-03 08:51:57 | [rl2_trainer] epoch #356 | Time 68397.86 s +2025-04-03 08:51:57 | [rl2_trainer] epoch #356 | EpochTime 187.17 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.3328 +Average/AverageReturn -15.1594 +Average/Iteration 356 +Average/MaxReturn 16.4312 +Average/MinReturn -26.2326 +Average/NumEpisodes 100 +Average/StdReturn 7.34968 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.55478 +TotalEnvSteps 3.57e+06 +__unnamed_task__/AverageDiscountedReturn -10.3328 +__unnamed_task__/AverageReturn -15.1594 +__unnamed_task__/Iteration 356 +__unnamed_task__/MaxReturn 16.4312 +__unnamed_task__/MinReturn -26.2326 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.34968 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.7005 +policy/KL 0.0143138 +policy/KLBefore 0 +policy/LossAfter -0.0426939 +policy/LossBefore -0.00618516 +policy/dLoss 0.0365087 +---------------------------------------- ------------ +2025-04-03 08:54:54 | [rl2_trainer] epoch #357 | Optimizing policy... +2025-04-03 08:54:54 | [rl2_trainer] epoch #357 | Fitting baseline... +2025-04-03 08:54:54 | [rl2_trainer] epoch #357 | Computing loss before +2025-04-03 08:54:55 | [rl2_trainer] epoch #357 | Computing KL before +2025-04-03 08:54:55 | [rl2_trainer] epoch #357 | Optimizing +2025-04-03 08:55:30 | [rl2_trainer] epoch #357 | Computing KL after +2025-04-03 08:55:30 | [rl2_trainer] epoch #357 | Computing loss after +2025-04-03 08:55:31 | [rl2_trainer] epoch #357 | Saving snapshot... +2025-04-03 08:55:31 | [rl2_trainer] epoch #357 | Saved +2025-04-03 08:55:31 | [rl2_trainer] epoch #357 | Time 68612.40 s +2025-04-03 08:55:31 | [rl2_trainer] epoch #357 | EpochTime 214.54 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.4448 +Average/AverageReturn -24.608 +Average/Iteration 357 +Average/MaxReturn -1.82066 +Average/MinReturn -54.1724 +Average/NumEpisodes 100 +Average/StdReturn 9.88447 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.840286 +TotalEnvSteps 3.58e+06 +__unnamed_task__/AverageDiscountedReturn -15.4448 +__unnamed_task__/AverageReturn -24.608 +__unnamed_task__/Iteration 357 +__unnamed_task__/MaxReturn -1.82066 +__unnamed_task__/MinReturn -54.1724 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.88447 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.67846 +policy/KL 0.0123898 +policy/KLBefore 0 +policy/LossAfter -0.0310305 +policy/LossBefore -0.00710476 +policy/dLoss 0.0239257 +---------------------------------------- ------------ +2025-04-03 08:57:54 | [rl2_trainer] epoch #358 | Optimizing policy... +2025-04-03 08:57:55 | [rl2_trainer] epoch #358 | Fitting baseline... +2025-04-03 08:57:55 | [rl2_trainer] epoch #358 | Computing loss before +2025-04-03 08:57:55 | [rl2_trainer] epoch #358 | Computing KL before +2025-04-03 08:57:56 | [rl2_trainer] epoch #358 | Optimizing +2025-04-03 08:58:30 | [rl2_trainer] epoch #358 | Computing KL after +2025-04-03 08:58:30 | [rl2_trainer] epoch #358 | Computing loss after +2025-04-03 08:58:31 | [rl2_trainer] epoch #358 | Saving snapshot... +2025-04-03 08:58:31 | [rl2_trainer] epoch #358 | Saved +2025-04-03 08:58:31 | [rl2_trainer] epoch #358 | Time 68792.22 s +2025-04-03 08:58:31 | [rl2_trainer] epoch #358 | EpochTime 179.82 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.693 +Average/AverageReturn -17.3121 +Average/Iteration 358 +Average/MaxReturn 0.125015 +Average/MinReturn -30.8927 +Average/NumEpisodes 100 +Average/StdReturn 4.11727 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.433406 +TotalEnvSteps 3.59e+06 +__unnamed_task__/AverageDiscountedReturn -11.693 +__unnamed_task__/AverageReturn -17.3121 +__unnamed_task__/Iteration 358 +__unnamed_task__/MaxReturn 0.125015 +__unnamed_task__/MinReturn -30.8927 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.11727 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.66436 +policy/KL 0.00859361 +policy/KLBefore 0 +policy/LossAfter -0.0225281 +policy/LossBefore -0.00885345 +policy/dLoss 0.0136747 +---------------------------------------- ------------ +2025-04-03 09:00:26 | [rl2_trainer] epoch #359 | Optimizing policy... +2025-04-03 09:00:26 | [rl2_trainer] epoch #359 | Fitting baseline... +2025-04-03 09:00:26 | [rl2_trainer] epoch #359 | Computing loss before +2025-04-03 09:00:27 | [rl2_trainer] epoch #359 | Computing KL before +2025-04-03 09:00:27 | [rl2_trainer] epoch #359 | Optimizing +2025-04-03 09:01:01 | [rl2_trainer] epoch #359 | Computing KL after +2025-04-03 09:01:02 | [rl2_trainer] epoch #359 | Computing loss after +2025-04-03 09:01:03 | [rl2_trainer] epoch #359 | Saving snapshot... +2025-04-03 09:01:03 | [rl2_trainer] epoch #359 | Saved +2025-04-03 09:01:03 | [rl2_trainer] epoch #359 | Time 68943.53 s +2025-04-03 09:01:03 | [rl2_trainer] epoch #359 | EpochTime 151.31 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -12.8334 +Average/AverageReturn -19.309 +Average/Iteration 359 +Average/MaxReturn 8.29644 +Average/MinReturn -40.2057 +Average/NumEpisodes 100 +Average/StdReturn 6.76247 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.671155 +TotalEnvSteps 3.6e+06 +__unnamed_task__/AverageDiscountedReturn -12.8334 +__unnamed_task__/AverageReturn -19.309 +__unnamed_task__/Iteration 359 +__unnamed_task__/MaxReturn 8.29644 +__unnamed_task__/MinReturn -40.2057 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.76247 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.65541 +policy/KL 0.0135253 +policy/KLBefore 0 +policy/LossAfter -0.0174934 +policy/LossBefore 0.000300385 +policy/dLoss 0.0177937 +---------------------------------------- ------------- +2025-04-03 09:03:13 | [rl2_trainer] epoch #360 | Optimizing policy... +2025-04-03 09:03:13 | [rl2_trainer] epoch #360 | Fitting baseline... +2025-04-03 09:03:13 | [rl2_trainer] epoch #360 | Computing loss before +2025-04-03 09:03:14 | [rl2_trainer] epoch #360 | Computing KL before +2025-04-03 09:03:15 | [rl2_trainer] epoch #360 | Optimizing +2025-04-03 09:03:48 | [rl2_trainer] epoch #360 | Computing KL after +2025-04-03 09:03:49 | [rl2_trainer] epoch #360 | Computing loss after +2025-04-03 09:03:49 | [rl2_trainer] epoch #360 | Saving snapshot... +2025-04-03 09:03:49 | [rl2_trainer] epoch #360 | Saved +2025-04-03 09:03:49 | [rl2_trainer] epoch #360 | Time 69110.51 s +2025-04-03 09:03:49 | [rl2_trainer] epoch #360 | EpochTime 166.97 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.2261 +Average/AverageReturn -23.9758 +Average/Iteration 360 +Average/MaxReturn 10.1502 +Average/MinReturn -48.356 +Average/NumEpisodes 100 +Average/StdReturn 10.1729 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.814408 +TotalEnvSteps 3.61e+06 +__unnamed_task__/AverageDiscountedReturn -15.2261 +__unnamed_task__/AverageReturn -23.9758 +__unnamed_task__/Iteration 360 +__unnamed_task__/MaxReturn 10.1502 +__unnamed_task__/MinReturn -48.356 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.1729 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.64398 +policy/KL 0.0132177 +policy/KLBefore 0 +policy/LossAfter -0.0335193 +policy/LossBefore -0.00316524 +policy/dLoss 0.0303541 +---------------------------------------- ------------ +2025-04-03 09:07:18 | [rl2_trainer] epoch #361 | Optimizing policy... +2025-04-03 09:07:19 | [rl2_trainer] epoch #361 | Fitting baseline... +2025-04-03 09:07:19 | [rl2_trainer] epoch #361 | Computing loss before +2025-04-03 09:07:19 | [rl2_trainer] epoch #361 | Computing KL before +2025-04-03 09:07:20 | [rl2_trainer] epoch #361 | Optimizing +2025-04-03 09:07:54 | [rl2_trainer] epoch #361 | Computing KL after +2025-04-03 09:07:54 | [rl2_trainer] epoch #361 | Computing loss after +2025-04-03 09:07:55 | [rl2_trainer] epoch #361 | Saving snapshot... +2025-04-03 09:07:55 | [rl2_trainer] epoch #361 | Saved +2025-04-03 09:07:55 | [rl2_trainer] epoch #361 | Time 69355.90 s +2025-04-03 09:07:55 | [rl2_trainer] epoch #361 | EpochTime 245.39 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.8344 +Average/AverageReturn -23.5637 +Average/Iteration 361 +Average/MaxReturn -6.80571 +Average/MinReturn -46.1839 +Average/NumEpisodes 100 +Average/StdReturn 7.37423 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.793047 +TotalEnvSteps 3.62e+06 +__unnamed_task__/AverageDiscountedReturn -14.8344 +__unnamed_task__/AverageReturn -23.5637 +__unnamed_task__/Iteration 361 +__unnamed_task__/MaxReturn -6.80571 +__unnamed_task__/MinReturn -46.1839 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.37423 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.62299 +policy/KL 0.0128325 +policy/KLBefore 0 +policy/LossAfter -0.0159374 +policy/LossBefore 0.00168808 +policy/dLoss 0.0176255 +---------------------------------------- ------------ +2025-04-03 09:10:50 | [rl2_trainer] epoch #362 | Optimizing policy... +2025-04-03 09:10:50 | [rl2_trainer] epoch #362 | Fitting baseline... +2025-04-03 09:10:50 | [rl2_trainer] epoch #362 | Computing loss before +2025-04-03 09:10:51 | [rl2_trainer] epoch #362 | Computing KL before +2025-04-03 09:10:51 | [rl2_trainer] epoch #362 | Optimizing +2025-04-03 09:11:25 | [rl2_trainer] epoch #362 | Computing KL after +2025-04-03 09:11:26 | [rl2_trainer] epoch #362 | Computing loss after +2025-04-03 09:11:27 | [rl2_trainer] epoch #362 | Saving snapshot... +2025-04-03 09:11:27 | [rl2_trainer] epoch #362 | Saved +2025-04-03 09:11:27 | [rl2_trainer] epoch #362 | Time 69567.53 s +2025-04-03 09:11:27 | [rl2_trainer] epoch #362 | EpochTime 211.62 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.7102 +Average/AverageReturn -21.0815 +Average/Iteration 362 +Average/MaxReturn 8.02 +Average/MinReturn -53.1476 +Average/NumEpisodes 100 +Average/StdReturn 11.2107 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.810647 +TotalEnvSteps 3.63e+06 +__unnamed_task__/AverageDiscountedReturn -13.7102 +__unnamed_task__/AverageReturn -21.0815 +__unnamed_task__/Iteration 362 +__unnamed_task__/MaxReturn 8.02 +__unnamed_task__/MinReturn -53.1476 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.2107 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.59396 +policy/KL 0.0164486 +policy/KLBefore 0 +policy/LossAfter -0.0327394 +policy/LossBefore -0.000319684 +policy/dLoss 0.0324198 +---------------------------------------- ------------- +2025-04-03 09:13:24 | [rl2_trainer] epoch #363 | Optimizing policy... +2025-04-03 09:13:24 | [rl2_trainer] epoch #363 | Fitting baseline... +2025-04-03 09:13:24 | [rl2_trainer] epoch #363 | Computing loss before +2025-04-03 09:13:25 | [rl2_trainer] epoch #363 | Computing KL before +2025-04-03 09:13:25 | [rl2_trainer] epoch #363 | Optimizing +2025-04-03 09:13:59 | [rl2_trainer] epoch #363 | Computing KL after +2025-04-03 09:14:00 | [rl2_trainer] epoch #363 | Computing loss after +2025-04-03 09:14:00 | [rl2_trainer] epoch #363 | Saving snapshot... +2025-04-03 09:14:00 | [rl2_trainer] epoch #363 | Saved +2025-04-03 09:14:00 | [rl2_trainer] epoch #363 | Time 69721.36 s +2025-04-03 09:14:00 | [rl2_trainer] epoch #363 | EpochTime 153.83 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.9689 +Average/AverageReturn -19.6685 +Average/Iteration 363 +Average/MaxReturn 4.60307 +Average/MinReturn -36.1921 +Average/NumEpisodes 100 +Average/StdReturn 5.87942 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.491555 +TotalEnvSteps 3.64e+06 +__unnamed_task__/AverageDiscountedReturn -12.9689 +__unnamed_task__/AverageReturn -19.6685 +__unnamed_task__/Iteration 363 +__unnamed_task__/MaxReturn 4.60307 +__unnamed_task__/MinReturn -36.1921 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.87942 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.55991 +policy/KL 0.0120441 +policy/KLBefore 0 +policy/LossAfter -0.0232914 +policy/LossBefore -0.00484254 +policy/dLoss 0.0184489 +---------------------------------------- ------------ +2025-04-03 09:17:25 | [rl2_trainer] epoch #364 | Optimizing policy... +2025-04-03 09:17:26 | [rl2_trainer] epoch #364 | Fitting baseline... +2025-04-03 09:17:26 | [rl2_trainer] epoch #364 | Computing loss before +2025-04-03 09:17:26 | [rl2_trainer] epoch #364 | Computing KL before +2025-04-03 09:17:27 | [rl2_trainer] epoch #364 | Optimizing +2025-04-03 09:18:00 | [rl2_trainer] epoch #364 | Computing KL after +2025-04-03 09:18:01 | [rl2_trainer] epoch #364 | Computing loss after +2025-04-03 09:18:02 | [rl2_trainer] epoch #364 | Saving snapshot... +2025-04-03 09:18:02 | [rl2_trainer] epoch #364 | Saved +2025-04-03 09:18:02 | [rl2_trainer] epoch #364 | Time 69962.83 s +2025-04-03 09:18:02 | [rl2_trainer] epoch #364 | EpochTime 241.47 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3148 +Average/AverageReturn -24.3797 +Average/Iteration 364 +Average/MaxReturn -14.0527 +Average/MinReturn -69.702 +Average/NumEpisodes 100 +Average/StdReturn 9.02739 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.850166 +TotalEnvSteps 3.65e+06 +__unnamed_task__/AverageDiscountedReturn -15.3148 +__unnamed_task__/AverageReturn -24.3797 +__unnamed_task__/Iteration 364 +__unnamed_task__/MaxReturn -14.0527 +__unnamed_task__/MinReturn -69.702 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.02739 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.52219 +policy/KL 0.0125122 +policy/KLBefore 0 +policy/LossAfter -0.0204662 +policy/LossBefore -0.00654091 +policy/dLoss 0.0139253 +---------------------------------------- ------------ +2025-04-03 09:19:31 | [rl2_trainer] epoch #365 | Optimizing policy... +2025-04-03 09:19:31 | [rl2_trainer] epoch #365 | Fitting baseline... +2025-04-03 09:19:31 | [rl2_trainer] epoch #365 | Computing loss before +2025-04-03 09:19:32 | [rl2_trainer] epoch #365 | Computing KL before +2025-04-03 09:19:33 | [rl2_trainer] epoch #365 | Optimizing +2025-04-03 09:20:07 | [rl2_trainer] epoch #365 | Computing KL after +2025-04-03 09:20:07 | [rl2_trainer] epoch #365 | Computing loss after +2025-04-03 09:20:08 | [rl2_trainer] epoch #365 | Saving snapshot... +2025-04-03 09:20:08 | [rl2_trainer] epoch #365 | Saved +2025-04-03 09:20:08 | [rl2_trainer] epoch #365 | Time 70089.23 s +2025-04-03 09:20:08 | [rl2_trainer] epoch #365 | EpochTime 126.39 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -11.0141 +Average/AverageReturn -16.2037 +Average/Iteration 365 +Average/MaxReturn 36.6086 +Average/MinReturn -25.2139 +Average/NumEpisodes 100 +Average/StdReturn 7.72722 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.27563 +TotalEnvSteps 3.66e+06 +__unnamed_task__/AverageDiscountedReturn -11.0141 +__unnamed_task__/AverageReturn -16.2037 +__unnamed_task__/Iteration 365 +__unnamed_task__/MaxReturn 36.6086 +__unnamed_task__/MinReturn -25.2139 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.72722 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.47624 +policy/KL 0.013212 +policy/KLBefore 0 +policy/LossAfter -0.0309144 +policy/LossBefore -0.0103479 +policy/dLoss 0.0205665 +---------------------------------------- ----------- +2025-04-03 09:22:05 | [rl2_trainer] epoch #366 | Optimizing policy... +2025-04-03 09:22:06 | [rl2_trainer] epoch #366 | Fitting baseline... +2025-04-03 09:22:06 | [rl2_trainer] epoch #366 | Computing loss before +2025-04-03 09:22:06 | [rl2_trainer] epoch #366 | Computing KL before +2025-04-03 09:22:07 | [rl2_trainer] epoch #366 | Optimizing +2025-04-03 09:22:41 | [rl2_trainer] epoch #366 | Computing KL after +2025-04-03 09:22:41 | [rl2_trainer] epoch #366 | Computing loss after +2025-04-03 09:22:42 | [rl2_trainer] epoch #366 | Saving snapshot... +2025-04-03 09:22:42 | [rl2_trainer] epoch #366 | Saved +2025-04-03 09:22:42 | [rl2_trainer] epoch #366 | Time 70243.19 s +2025-04-03 09:22:42 | [rl2_trainer] epoch #366 | EpochTime 153.97 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.8847 +Average/AverageReturn -21.3366 +Average/Iteration 366 +Average/MaxReturn 9.56671 +Average/MinReturn -51.0343 +Average/NumEpisodes 100 +Average/StdReturn 8.18481 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.798406 +TotalEnvSteps 3.67e+06 +__unnamed_task__/AverageDiscountedReturn -13.8847 +__unnamed_task__/AverageReturn -21.3366 +__unnamed_task__/Iteration 366 +__unnamed_task__/MaxReturn 9.56671 +__unnamed_task__/MinReturn -51.0343 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.18481 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.45719 +policy/KL 0.016105 +policy/KLBefore 0 +policy/LossAfter -0.0301087 +policy/LossBefore -0.0123886 +policy/dLoss 0.0177201 +---------------------------------------- ----------- +2025-04-03 09:25:34 | [rl2_trainer] epoch #367 | Optimizing policy... +2025-04-03 09:25:35 | [rl2_trainer] epoch #367 | Fitting baseline... +2025-04-03 09:25:35 | [rl2_trainer] epoch #367 | Computing loss before +2025-04-03 09:25:35 | [rl2_trainer] epoch #367 | Computing KL before +2025-04-03 09:25:36 | [rl2_trainer] epoch #367 | Optimizing +2025-04-03 09:26:11 | [rl2_trainer] epoch #367 | Computing KL after +2025-04-03 09:26:11 | [rl2_trainer] epoch #367 | Computing loss after +2025-04-03 09:26:12 | [rl2_trainer] epoch #367 | Saving snapshot... +2025-04-03 09:26:12 | [rl2_trainer] epoch #367 | Saved +2025-04-03 09:26:12 | [rl2_trainer] epoch #367 | Time 70453.07 s +2025-04-03 09:26:12 | [rl2_trainer] epoch #367 | EpochTime 209.87 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.3437 +Average/AverageReturn -22.0333 +Average/Iteration 367 +Average/MaxReturn -11.0531 +Average/MinReturn -63.2844 +Average/NumEpisodes 100 +Average/StdReturn 6.30402 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.737258 +TotalEnvSteps 3.68e+06 +__unnamed_task__/AverageDiscountedReturn -14.3437 +__unnamed_task__/AverageReturn -22.0333 +__unnamed_task__/Iteration 367 +__unnamed_task__/MaxReturn -11.0531 +__unnamed_task__/MinReturn -63.2844 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.30402 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.4498 +policy/KL 0.0102295 +policy/KLBefore 0 +policy/LossAfter -0.0175067 +policy/LossBefore -0.00197767 +policy/dLoss 0.015529 +---------------------------------------- ------------ +2025-04-03 09:28:27 | [rl2_trainer] epoch #368 | Optimizing policy... +2025-04-03 09:28:27 | [rl2_trainer] epoch #368 | Fitting baseline... +2025-04-03 09:28:27 | [rl2_trainer] epoch #368 | Computing loss before +2025-04-03 09:28:28 | [rl2_trainer] epoch #368 | Computing KL before +2025-04-03 09:28:28 | [rl2_trainer] epoch #368 | Optimizing +2025-04-03 09:29:02 | [rl2_trainer] epoch #368 | Computing KL after +2025-04-03 09:29:03 | [rl2_trainer] epoch #368 | Computing loss after +2025-04-03 09:29:04 | [rl2_trainer] epoch #368 | Saving snapshot... +2025-04-03 09:29:04 | [rl2_trainer] epoch #368 | Saved +2025-04-03 09:29:04 | [rl2_trainer] epoch #368 | Time 70624.79 s +2025-04-03 09:29:04 | [rl2_trainer] epoch #368 | EpochTime 171.71 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.9573 +Average/AverageReturn -18.2666 +Average/Iteration 368 +Average/MaxReturn -6.04645 +Average/MinReturn -32.8803 +Average/NumEpisodes 100 +Average/StdReturn 4.22547 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.582059 +TotalEnvSteps 3.69e+06 +__unnamed_task__/AverageDiscountedReturn -11.9573 +__unnamed_task__/AverageReturn -18.2666 +__unnamed_task__/Iteration 368 +__unnamed_task__/MaxReturn -6.04645 +__unnamed_task__/MinReturn -32.8803 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.22547 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.44238 +policy/KL 0.00946655 +policy/KLBefore 0 +policy/LossAfter -0.0148191 +policy/LossBefore -0.000749455 +policy/dLoss 0.0140697 +---------------------------------------- ------------- +2025-04-03 09:31:58 | [rl2_trainer] epoch #369 | Optimizing policy... +2025-04-03 09:31:58 | [rl2_trainer] epoch #369 | Fitting baseline... +2025-04-03 09:31:58 | [rl2_trainer] epoch #369 | Computing loss before +2025-04-03 09:31:58 | [rl2_trainer] epoch #369 | Computing KL before +2025-04-03 09:31:59 | [rl2_trainer] epoch #369 | Optimizing +2025-04-03 09:32:33 | [rl2_trainer] epoch #369 | Computing KL after +2025-04-03 09:32:33 | [rl2_trainer] epoch #369 | Computing loss after +2025-04-03 09:32:34 | [rl2_trainer] epoch #369 | Saving snapshot... +2025-04-03 09:32:34 | [rl2_trainer] epoch #369 | Saved +2025-04-03 09:32:34 | [rl2_trainer] epoch #369 | Time 70835.20 s +2025-04-03 09:32:34 | [rl2_trainer] epoch #369 | EpochTime 210.41 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.0569 +Average/AverageReturn -21.5574 +Average/Iteration 369 +Average/MaxReturn -1.01546 +Average/MinReturn -47.6985 +Average/NumEpisodes 100 +Average/StdReturn 6.66008 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.754291 +TotalEnvSteps 3.7e+06 +__unnamed_task__/AverageDiscountedReturn -14.0569 +__unnamed_task__/AverageReturn -21.5574 +__unnamed_task__/Iteration 369 +__unnamed_task__/MaxReturn -1.01546 +__unnamed_task__/MinReturn -47.6985 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.66008 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.42489 +policy/KL 0.0125537 +policy/KLBefore 0 +policy/LossAfter -0.0231894 +policy/LossBefore -0.00784003 +policy/dLoss 0.0153494 +---------------------------------------- ------------ +2025-04-03 09:34:30 | [rl2_trainer] epoch #370 | Optimizing policy... +2025-04-03 09:34:30 | [rl2_trainer] epoch #370 | Fitting baseline... +2025-04-03 09:34:30 | [rl2_trainer] epoch #370 | Computing loss before +2025-04-03 09:34:30 | [rl2_trainer] epoch #370 | Computing KL before +2025-04-03 09:34:31 | [rl2_trainer] epoch #370 | Optimizing +2025-04-03 09:35:05 | [rl2_trainer] epoch #370 | Computing KL after +2025-04-03 09:35:05 | [rl2_trainer] epoch #370 | Computing loss after +2025-04-03 09:35:06 | [rl2_trainer] epoch #370 | Saving snapshot... +2025-04-03 09:35:06 | [rl2_trainer] epoch #370 | Saved +2025-04-03 09:35:06 | [rl2_trainer] epoch #370 | Time 70987.14 s +2025-04-03 09:35:06 | [rl2_trainer] epoch #370 | EpochTime 151.94 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.2071 +Average/AverageReturn -20.1409 +Average/Iteration 370 +Average/MaxReturn 0.683935 +Average/MinReturn -39.2827 +Average/NumEpisodes 100 +Average/StdReturn 6.61292 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.748499 +TotalEnvSteps 3.71e+06 +__unnamed_task__/AverageDiscountedReturn -13.2071 +__unnamed_task__/AverageReturn -20.1409 +__unnamed_task__/Iteration 370 +__unnamed_task__/MaxReturn 0.683935 +__unnamed_task__/MinReturn -39.2827 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.61292 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.40708 +policy/KL 0.0105134 +policy/KLBefore 0 +policy/LossAfter -0.0217088 +policy/LossBefore -0.0103561 +policy/dLoss 0.0113528 +---------------------------------------- ----------- +2025-04-03 09:38:03 | [rl2_trainer] epoch #371 | Optimizing policy... +2025-04-03 09:38:03 | [rl2_trainer] epoch #371 | Fitting baseline... +2025-04-03 09:38:03 | [rl2_trainer] epoch #371 | Computing loss before +2025-04-03 09:38:03 | [rl2_trainer] epoch #371 | Computing KL before +2025-04-03 09:38:04 | [rl2_trainer] epoch #371 | Optimizing +2025-04-03 09:38:37 | [rl2_trainer] epoch #371 | Computing KL after +2025-04-03 09:38:38 | [rl2_trainer] epoch #371 | Computing loss after +2025-04-03 09:38:39 | [rl2_trainer] epoch #371 | Saving snapshot... +2025-04-03 09:38:39 | [rl2_trainer] epoch #371 | Saved +2025-04-03 09:38:39 | [rl2_trainer] epoch #371 | Time 71199.65 s +2025-04-03 09:38:39 | [rl2_trainer] epoch #371 | EpochTime 212.51 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.7507 +Average/AverageReturn -20.8956 +Average/Iteration 371 +Average/MaxReturn -14.5248 +Average/MinReturn -37.4085 +Average/NumEpisodes 100 +Average/StdReturn 4.81584 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.688987 +TotalEnvSteps 3.72e+06 +__unnamed_task__/AverageDiscountedReturn -13.7507 +__unnamed_task__/AverageReturn -20.8956 +__unnamed_task__/Iteration 371 +__unnamed_task__/MaxReturn -14.5248 +__unnamed_task__/MinReturn -37.4085 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.81584 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.376 +policy/KL 0.00964428 +policy/KLBefore 0 +policy/LossAfter -0.00681148 +policy/LossBefore -0.00283546 +policy/dLoss 0.00397601 +---------------------------------------- ------------ +2025-04-03 09:40:44 | [rl2_trainer] epoch #372 | Optimizing policy... +2025-04-03 09:40:44 | [rl2_trainer] epoch #372 | Fitting baseline... +2025-04-03 09:40:44 | [rl2_trainer] epoch #372 | Computing loss before +2025-04-03 09:40:45 | [rl2_trainer] epoch #372 | Computing KL before +2025-04-03 09:40:45 | [rl2_trainer] epoch #372 | Optimizing +2025-04-03 09:41:19 | [rl2_trainer] epoch #372 | Computing KL after +2025-04-03 09:41:20 | [rl2_trainer] epoch #372 | Computing loss after +2025-04-03 09:41:20 | [rl2_trainer] epoch #372 | Saving snapshot... +2025-04-03 09:41:20 | [rl2_trainer] epoch #372 | Saved +2025-04-03 09:41:20 | [rl2_trainer] epoch #372 | Time 71361.43 s +2025-04-03 09:41:20 | [rl2_trainer] epoch #372 | EpochTime 161.78 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.7097 +Average/AverageReturn -18.8591 +Average/Iteration 372 +Average/MaxReturn 1.87203 +Average/MinReturn -35.204 +Average/NumEpisodes 100 +Average/StdReturn 6.24576 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.697212 +TotalEnvSteps 3.73e+06 +__unnamed_task__/AverageDiscountedReturn -12.7097 +__unnamed_task__/AverageReturn -18.8591 +__unnamed_task__/Iteration 372 +__unnamed_task__/MaxReturn 1.87203 +__unnamed_task__/MinReturn -35.204 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.24576 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.34503 +policy/KL 0.012865 +policy/KLBefore 0 +policy/LossAfter -0.0156268 +policy/LossBefore -0.00581262 +policy/dLoss 0.00981419 +---------------------------------------- ------------ +2025-04-03 09:44:15 | [rl2_trainer] epoch #373 | Optimizing policy... +2025-04-03 09:44:15 | [rl2_trainer] epoch #373 | Fitting baseline... +2025-04-03 09:44:15 | [rl2_trainer] epoch #373 | Computing loss before +2025-04-03 09:44:16 | [rl2_trainer] epoch #373 | Computing KL before +2025-04-03 09:44:16 | [rl2_trainer] epoch #373 | Optimizing +2025-04-03 09:44:49 | [rl2_trainer] epoch #373 | Computing KL after +2025-04-03 09:44:50 | [rl2_trainer] epoch #373 | Computing loss after +2025-04-03 09:44:51 | [rl2_trainer] epoch #373 | Saving snapshot... +2025-04-03 09:44:51 | [rl2_trainer] epoch #373 | Saved +2025-04-03 09:44:51 | [rl2_trainer] epoch #373 | Time 71571.54 s +2025-04-03 09:44:51 | [rl2_trainer] epoch #373 | EpochTime 210.11 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.439 +Average/AverageReturn -20.4785 +Average/Iteration 373 +Average/MaxReturn -9.61547 +Average/MinReturn -41.4748 +Average/NumEpisodes 100 +Average/StdReturn 5.13097 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.762592 +TotalEnvSteps 3.74e+06 +__unnamed_task__/AverageDiscountedReturn -13.439 +__unnamed_task__/AverageReturn -20.4785 +__unnamed_task__/Iteration 373 +__unnamed_task__/MaxReturn -9.61547 +__unnamed_task__/MinReturn -41.4748 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.13097 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.33599 +policy/KL 0.00844091 +policy/KLBefore 0 +policy/LossAfter -0.0110885 +policy/LossBefore -0.00234891 +policy/dLoss 0.00873958 +---------------------------------------- ------------ +2025-04-03 09:47:00 | [rl2_trainer] epoch #374 | Optimizing policy... +2025-04-03 09:47:01 | [rl2_trainer] epoch #374 | Fitting baseline... +2025-04-03 09:47:01 | [rl2_trainer] epoch #374 | Computing loss before +2025-04-03 09:47:01 | [rl2_trainer] epoch #374 | Computing KL before +2025-04-03 09:47:02 | [rl2_trainer] epoch #374 | Optimizing +2025-04-03 09:47:36 | [rl2_trainer] epoch #374 | Computing KL after +2025-04-03 09:47:37 | [rl2_trainer] epoch #374 | Computing loss after +2025-04-03 09:47:37 | [rl2_trainer] epoch #374 | Saving snapshot... +2025-04-03 09:47:37 | [rl2_trainer] epoch #374 | Saved +2025-04-03 09:47:37 | [rl2_trainer] epoch #374 | Time 71738.41 s +2025-04-03 09:47:37 | [rl2_trainer] epoch #374 | EpochTime 166.86 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.8286 +Average/AverageReturn -19.8116 +Average/Iteration 374 +Average/MaxReturn 4.51895 +Average/MinReturn -38.5504 +Average/NumEpisodes 100 +Average/StdReturn 6.47388 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.688038 +TotalEnvSteps 3.75e+06 +__unnamed_task__/AverageDiscountedReturn -12.8286 +__unnamed_task__/AverageReturn -19.8116 +__unnamed_task__/Iteration 374 +__unnamed_task__/MaxReturn 4.51895 +__unnamed_task__/MinReturn -38.5504 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.47388 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.34779 +policy/KL 0.0122246 +policy/KLBefore 0 +policy/LossAfter -0.0208597 +policy/LossBefore 0.00451888 +policy/dLoss 0.0253786 +---------------------------------------- ------------ +2025-04-03 09:49:43 | [rl2_trainer] epoch #375 | Optimizing policy... +2025-04-03 09:49:44 | [rl2_trainer] epoch #375 | Fitting baseline... +2025-04-03 09:49:44 | [rl2_trainer] epoch #375 | Computing loss before +2025-04-03 09:49:44 | [rl2_trainer] epoch #375 | Computing KL before +2025-04-03 09:49:45 | [rl2_trainer] epoch #375 | Optimizing +2025-04-03 09:50:18 | [rl2_trainer] epoch #375 | Computing KL after +2025-04-03 09:50:19 | [rl2_trainer] epoch #375 | Computing loss after +2025-04-03 09:50:20 | [rl2_trainer] epoch #375 | Saving snapshot... +2025-04-03 09:50:20 | [rl2_trainer] epoch #375 | Saved +2025-04-03 09:50:20 | [rl2_trainer] epoch #375 | Time 71900.82 s +2025-04-03 09:50:20 | [rl2_trainer] epoch #375 | EpochTime 162.41 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.1285 +Average/AverageReturn -16.2998 +Average/Iteration 375 +Average/MaxReturn 8.25147 +Average/MinReturn -34.907 +Average/NumEpisodes 100 +Average/StdReturn 8.40295 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.752655 +TotalEnvSteps 3.76e+06 +__unnamed_task__/AverageDiscountedReturn -11.1285 +__unnamed_task__/AverageReturn -16.2998 +__unnamed_task__/Iteration 375 +__unnamed_task__/MaxReturn 8.25147 +__unnamed_task__/MinReturn -34.907 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.40295 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.33128 +policy/KL 0.0152761 +policy/KLBefore 0 +policy/LossAfter -0.0275222 +policy/LossBefore -0.00420734 +policy/dLoss 0.0233149 +---------------------------------------- ------------ +2025-04-03 09:52:46 | [rl2_trainer] epoch #376 | Optimizing policy... +2025-04-03 09:52:47 | [rl2_trainer] epoch #376 | Fitting baseline... +2025-04-03 09:52:47 | [rl2_trainer] epoch #376 | Computing loss before +2025-04-03 09:52:47 | [rl2_trainer] epoch #376 | Computing KL before +2025-04-03 09:52:48 | [rl2_trainer] epoch #376 | Optimizing +2025-04-03 09:53:23 | [rl2_trainer] epoch #376 | Computing KL after +2025-04-03 09:53:24 | [rl2_trainer] epoch #376 | Computing loss after +2025-04-03 09:53:25 | [rl2_trainer] epoch #376 | Saving snapshot... +2025-04-03 09:53:25 | [rl2_trainer] epoch #376 | Saved +2025-04-03 09:53:25 | [rl2_trainer] epoch #376 | Time 72085.82 s +2025-04-03 09:53:25 | [rl2_trainer] epoch #376 | EpochTime 185.00 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -9.72036 +Average/AverageReturn -14.2123 +Average/Iteration 376 +Average/MaxReturn 12.157 +Average/MinReturn -25.1561 +Average/NumEpisodes 100 +Average/StdReturn 6.59743 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.649045 +TotalEnvSteps 3.77e+06 +__unnamed_task__/AverageDiscountedReturn -9.72036 +__unnamed_task__/AverageReturn -14.2123 +__unnamed_task__/Iteration 376 +__unnamed_task__/MaxReturn 12.157 +__unnamed_task__/MinReturn -25.1561 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.59743 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.28827 +policy/KL 0.0113566 +policy/KLBefore 0 +policy/LossAfter -0.0258159 +policy/LossBefore -0.0130927 +policy/dLoss 0.0127233 +---------------------------------------- ----------- +2025-04-03 09:56:15 | [rl2_trainer] epoch #377 | Optimizing policy... +2025-04-03 09:56:15 | [rl2_trainer] epoch #377 | Fitting baseline... +2025-04-03 09:56:15 | [rl2_trainer] epoch #377 | Computing loss before +2025-04-03 09:56:16 | [rl2_trainer] epoch #377 | Computing KL before +2025-04-03 09:56:16 | [rl2_trainer] epoch #377 | Optimizing +2025-04-03 09:56:53 | [rl2_trainer] epoch #377 | Computing KL after +2025-04-03 09:56:54 | [rl2_trainer] epoch #377 | Computing loss after +2025-04-03 09:56:55 | [rl2_trainer] epoch #377 | Saving snapshot... +2025-04-03 09:56:55 | [rl2_trainer] epoch #377 | Saved +2025-04-03 09:56:55 | [rl2_trainer] epoch #377 | Time 72295.53 s +2025-04-03 09:56:55 | [rl2_trainer] epoch #377 | EpochTime 209.71 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.8865 +Average/AverageReturn -26.884 +Average/Iteration 377 +Average/MaxReturn 1.44636 +Average/MinReturn -62.4023 +Average/NumEpisodes 100 +Average/StdReturn 11.8173 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.836857 +TotalEnvSteps 3.78e+06 +__unnamed_task__/AverageDiscountedReturn -16.8865 +__unnamed_task__/AverageReturn -26.884 +__unnamed_task__/Iteration 377 +__unnamed_task__/MaxReturn 1.44636 +__unnamed_task__/MinReturn -62.4023 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.8173 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.24674 +policy/KL 0.0139795 +policy/KLBefore 0 +policy/LossAfter -0.0293585 +policy/LossBefore -0.00345708 +policy/dLoss 0.0259014 +---------------------------------------- ------------ +2025-04-03 09:59:07 | [rl2_trainer] epoch #378 | Optimizing policy... +2025-04-03 09:59:07 | [rl2_trainer] epoch #378 | Fitting baseline... +2025-04-03 09:59:07 | [rl2_trainer] epoch #378 | Computing loss before +2025-04-03 09:59:08 | [rl2_trainer] epoch #378 | Computing KL before +2025-04-03 09:59:08 | [rl2_trainer] epoch #378 | Optimizing +2025-04-03 09:59:44 | [rl2_trainer] epoch #378 | Computing KL after +2025-04-03 09:59:44 | [rl2_trainer] epoch #378 | Computing loss after +2025-04-03 09:59:45 | [rl2_trainer] epoch #378 | Saving snapshot... +2025-04-03 09:59:45 | [rl2_trainer] epoch #378 | Saved +2025-04-03 09:59:45 | [rl2_trainer] epoch #378 | Time 72466.07 s +2025-04-03 09:59:45 | [rl2_trainer] epoch #378 | EpochTime 170.54 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.2236 +Average/AverageReturn -20.0317 +Average/Iteration 378 +Average/MaxReturn 32.0803 +Average/MinReturn -41.2211 +Average/NumEpisodes 100 +Average/StdReturn 7.41419 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.511475 +TotalEnvSteps 3.79e+06 +__unnamed_task__/AverageDiscountedReturn -13.2236 +__unnamed_task__/AverageReturn -20.0317 +__unnamed_task__/Iteration 378 +__unnamed_task__/MaxReturn 32.0803 +__unnamed_task__/MinReturn -41.2211 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.41419 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.2173 +policy/KL 0.0135854 +policy/KLBefore 0 +policy/LossAfter -0.0282384 +policy/LossBefore -0.00427514 +policy/dLoss 0.0239633 +---------------------------------------- ------------ +2025-04-03 10:02:54 | [rl2_trainer] epoch #379 | Optimizing policy... +2025-04-03 10:02:54 | [rl2_trainer] epoch #379 | Fitting baseline... +2025-04-03 10:02:54 | [rl2_trainer] epoch #379 | Computing loss before +2025-04-03 10:02:55 | [rl2_trainer] epoch #379 | Computing KL before +2025-04-03 10:02:55 | [rl2_trainer] epoch #379 | Optimizing +2025-04-03 10:03:31 | [rl2_trainer] epoch #379 | Computing KL after +2025-04-03 10:03:32 | [rl2_trainer] epoch #379 | Computing loss after +2025-04-03 10:03:33 | [rl2_trainer] epoch #379 | Saving snapshot... +2025-04-03 10:03:33 | [rl2_trainer] epoch #379 | Saved +2025-04-03 10:03:33 | [rl2_trainer] epoch #379 | Time 72693.68 s +2025-04-03 10:03:33 | [rl2_trainer] epoch #379 | EpochTime 227.60 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -16.1138 +Average/AverageReturn -25.8178 +Average/Iteration 379 +Average/MaxReturn 8.86073 +Average/MinReturn -54.715 +Average/NumEpisodes 100 +Average/StdReturn 11.744 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.877013 +TotalEnvSteps 3.8e+06 +__unnamed_task__/AverageDiscountedReturn -16.1138 +__unnamed_task__/AverageReturn -25.8178 +__unnamed_task__/Iteration 379 +__unnamed_task__/MaxReturn 8.86073 +__unnamed_task__/MinReturn -54.715 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.744 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.19866 +policy/KL 0.0138624 +policy/KLBefore 0 +policy/LossAfter -0.0260134 +policy/LossBefore -0.000434824 +policy/dLoss 0.0255786 +---------------------------------------- ------------- +2025-04-03 10:05:52 | [rl2_trainer] epoch #380 | Optimizing policy... +2025-04-03 10:05:52 | [rl2_trainer] epoch #380 | Fitting baseline... +2025-04-03 10:05:52 | [rl2_trainer] epoch #380 | Computing loss before +2025-04-03 10:05:53 | [rl2_trainer] epoch #380 | Computing KL before +2025-04-03 10:05:53 | [rl2_trainer] epoch #380 | Optimizing +2025-04-03 10:06:28 | [rl2_trainer] epoch #380 | Computing KL after +2025-04-03 10:06:28 | [rl2_trainer] epoch #380 | Computing loss after +2025-04-03 10:06:29 | [rl2_trainer] epoch #380 | Saving snapshot... +2025-04-03 10:06:29 | [rl2_trainer] epoch #380 | Saved +2025-04-03 10:06:29 | [rl2_trainer] epoch #380 | Time 72870.14 s +2025-04-03 10:06:29 | [rl2_trainer] epoch #380 | EpochTime 176.46 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -10.7358 +Average/AverageReturn -16.3758 +Average/Iteration 380 +Average/MaxReturn 11.3124 +Average/MinReturn -24.4445 +Average/NumEpisodes 100 +Average/StdReturn 4.36326 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.502577 +TotalEnvSteps 3.81e+06 +__unnamed_task__/AverageDiscountedReturn -10.7358 +__unnamed_task__/AverageReturn -16.3758 +__unnamed_task__/Iteration 380 +__unnamed_task__/MaxReturn 11.3124 +__unnamed_task__/MinReturn -24.4445 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.36326 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.18279 +policy/KL 0.0105938 +policy/KLBefore 0 +policy/LossAfter -0.0143216 +policy/LossBefore -0.000325423 +policy/dLoss 0.0139962 +---------------------------------------- ------------- +2025-04-03 10:09:44 | [rl2_trainer] epoch #381 | Optimizing policy... +2025-04-03 10:09:45 | [rl2_trainer] epoch #381 | Fitting baseline... +2025-04-03 10:09:45 | [rl2_trainer] epoch #381 | Computing loss before +2025-04-03 10:09:45 | [rl2_trainer] epoch #381 | Computing KL before +2025-04-03 10:09:46 | [rl2_trainer] epoch #381 | Optimizing +2025-04-03 10:10:20 | [rl2_trainer] epoch #381 | Computing KL after +2025-04-03 10:10:21 | [rl2_trainer] epoch #381 | Computing loss after +2025-04-03 10:10:22 | [rl2_trainer] epoch #381 | Saving snapshot... +2025-04-03 10:10:22 | [rl2_trainer] epoch #381 | Saved +2025-04-03 10:10:22 | [rl2_trainer] epoch #381 | Time 73102.96 s +2025-04-03 10:10:22 | [rl2_trainer] epoch #381 | EpochTime 232.81 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.7852 +Average/AverageReturn -26.635 +Average/Iteration 381 +Average/MaxReturn -11.1482 +Average/MinReturn -56.1634 +Average/NumEpisodes 100 +Average/StdReturn 9.22785 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.781176 +TotalEnvSteps 3.82e+06 +__unnamed_task__/AverageDiscountedReturn -16.7852 +__unnamed_task__/AverageReturn -26.635 +__unnamed_task__/Iteration 381 +__unnamed_task__/MaxReturn -11.1482 +__unnamed_task__/MinReturn -56.1634 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.22785 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.15673 +policy/KL 0.0151956 +policy/KLBefore 0 +policy/LossAfter -0.0248081 +policy/LossBefore 0.00129545 +policy/dLoss 0.0261035 +---------------------------------------- ------------ +2025-04-03 10:12:17 | [rl2_trainer] epoch #382 | Optimizing policy... +2025-04-03 10:12:17 | [rl2_trainer] epoch #382 | Fitting baseline... +2025-04-03 10:12:17 | [rl2_trainer] epoch #382 | Computing loss before +2025-04-03 10:12:18 | [rl2_trainer] epoch #382 | Computing KL before +2025-04-03 10:12:18 | [rl2_trainer] epoch #382 | Optimizing +2025-04-03 10:12:55 | [rl2_trainer] epoch #382 | Computing KL after +2025-04-03 10:12:55 | [rl2_trainer] epoch #382 | Computing loss after +2025-04-03 10:12:56 | [rl2_trainer] epoch #382 | Saving snapshot... +2025-04-03 10:12:56 | [rl2_trainer] epoch #382 | Saved +2025-04-03 10:12:56 | [rl2_trainer] epoch #382 | Time 73257.21 s +2025-04-03 10:12:56 | [rl2_trainer] epoch #382 | EpochTime 154.25 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.6489 +Average/AverageReturn -17.6237 +Average/Iteration 382 +Average/MaxReturn 0.361426 +Average/MinReturn -43.5319 +Average/NumEpisodes 100 +Average/StdReturn 5.31073 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.533281 +TotalEnvSteps 3.83e+06 +__unnamed_task__/AverageDiscountedReturn -11.6489 +__unnamed_task__/AverageReturn -17.6237 +__unnamed_task__/Iteration 382 +__unnamed_task__/MaxReturn 0.361426 +__unnamed_task__/MinReturn -43.5319 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.31073 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.12342 +policy/KL 0.0149372 +policy/KLBefore 0 +policy/LossAfter -0.00706459 +policy/LossBefore -0.000222877 +policy/dLoss 0.00684171 +---------------------------------------- ------------- +2025-04-03 10:16:44 | [rl2_trainer] epoch #383 | Optimizing policy... +2025-04-03 10:16:44 | [rl2_trainer] epoch #383 | Fitting baseline... +2025-04-03 10:16:44 | [rl2_trainer] epoch #383 | Computing loss before +2025-04-03 10:16:45 | [rl2_trainer] epoch #383 | Computing KL before +2025-04-03 10:16:45 | [rl2_trainer] epoch #383 | Optimizing +2025-04-03 10:17:21 | [rl2_trainer] epoch #383 | Computing KL after +2025-04-03 10:17:22 | [rl2_trainer] epoch #383 | Computing loss after +2025-04-03 10:17:23 | [rl2_trainer] epoch #383 | Saving snapshot... +2025-04-03 10:17:23 | [rl2_trainer] epoch #383 | Saved +2025-04-03 10:17:23 | [rl2_trainer] epoch #383 | Time 73523.90 s +2025-04-03 10:17:23 | [rl2_trainer] epoch #383 | EpochTime 266.69 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.4229 +Average/AverageReturn -26.0745 +Average/Iteration 383 +Average/MaxReturn 3.64826 +Average/MinReturn -57.691 +Average/NumEpisodes 100 +Average/StdReturn 11.6843 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.815548 +TotalEnvSteps 3.84e+06 +__unnamed_task__/AverageDiscountedReturn -16.4229 +__unnamed_task__/AverageReturn -26.0745 +__unnamed_task__/Iteration 383 +__unnamed_task__/MaxReturn 3.64826 +__unnamed_task__/MinReturn -57.691 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.6843 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.10869 +policy/KL 0.01639 +policy/KLBefore 0 +policy/LossAfter -0.043408 +policy/LossBefore -0.00438814 +policy/dLoss 0.0390198 +---------------------------------------- ------------ +2025-04-03 10:19:20 | [rl2_trainer] epoch #384 | Optimizing policy... +2025-04-03 10:19:20 | [rl2_trainer] epoch #384 | Fitting baseline... +2025-04-03 10:19:20 | [rl2_trainer] epoch #384 | Computing loss before +2025-04-03 10:19:20 | [rl2_trainer] epoch #384 | Computing KL before +2025-04-03 10:19:21 | [rl2_trainer] epoch #384 | Optimizing +2025-04-03 10:19:56 | [rl2_trainer] epoch #384 | Computing KL after +2025-04-03 10:19:57 | [rl2_trainer] epoch #384 | Computing loss after +2025-04-03 10:19:58 | [rl2_trainer] epoch #384 | Saving snapshot... +2025-04-03 10:19:58 | [rl2_trainer] epoch #384 | Saved +2025-04-03 10:19:58 | [rl2_trainer] epoch #384 | Time 73678.67 s +2025-04-03 10:19:58 | [rl2_trainer] epoch #384 | EpochTime 154.77 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -12.1048 +Average/AverageReturn -18.2593 +Average/Iteration 384 +Average/MaxReturn 8.60265 +Average/MinReturn -30.497 +Average/NumEpisodes 100 +Average/StdReturn 5.89936 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.483134 +TotalEnvSteps 3.85e+06 +__unnamed_task__/AverageDiscountedReturn -12.1048 +__unnamed_task__/AverageReturn -18.2593 +__unnamed_task__/Iteration 384 +__unnamed_task__/MaxReturn 8.60265 +__unnamed_task__/MinReturn -30.497 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.89936 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.10592 +policy/KL 0.0138024 +policy/KLBefore 0 +policy/LossAfter -0.0286999 +policy/LossBefore -0.00712759 +policy/dLoss 0.0215723 +---------------------------------------- ------------ +2025-04-03 10:23:32 | [rl2_trainer] epoch #385 | Optimizing policy... +2025-04-03 10:23:32 | [rl2_trainer] epoch #385 | Fitting baseline... +2025-04-03 10:23:33 | [rl2_trainer] epoch #385 | Computing loss before +2025-04-03 10:23:33 | [rl2_trainer] epoch #385 | Computing KL before +2025-04-03 10:23:34 | [rl2_trainer] epoch #385 | Optimizing +2025-04-03 10:24:09 | [rl2_trainer] epoch #385 | Computing KL after +2025-04-03 10:24:10 | [rl2_trainer] epoch #385 | Computing loss after +2025-04-03 10:24:11 | [rl2_trainer] epoch #385 | Saving snapshot... +2025-04-03 10:24:11 | [rl2_trainer] epoch #385 | Saved +2025-04-03 10:24:11 | [rl2_trainer] epoch #385 | Time 73931.60 s +2025-04-03 10:24:11 | [rl2_trainer] epoch #385 | EpochTime 252.93 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.941 +Average/AverageReturn -25.679 +Average/Iteration 385 +Average/MaxReturn -0.585049 +Average/MinReturn -65.9409 +Average/NumEpisodes 100 +Average/StdReturn 12.585 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.848104 +TotalEnvSteps 3.86e+06 +__unnamed_task__/AverageDiscountedReturn -15.941 +__unnamed_task__/AverageReturn -25.679 +__unnamed_task__/Iteration 385 +__unnamed_task__/MaxReturn -0.585049 +__unnamed_task__/MinReturn -65.9409 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.585 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.09007 +policy/KL 0.0146282 +policy/KLBefore 0 +policy/LossAfter -0.0375607 +policy/LossBefore -0.00391136 +policy/dLoss 0.0336494 +---------------------------------------- ------------ +2025-04-03 10:27:10 | [rl2_trainer] epoch #386 | Optimizing policy... +2025-04-03 10:27:10 | [rl2_trainer] epoch #386 | Fitting baseline... +2025-04-03 10:27:10 | [rl2_trainer] epoch #386 | Computing loss before +2025-04-03 10:27:11 | [rl2_trainer] epoch #386 | Computing KL before +2025-04-03 10:27:11 | [rl2_trainer] epoch #386 | Optimizing +2025-04-03 10:27:47 | [rl2_trainer] epoch #386 | Computing KL after +2025-04-03 10:27:48 | [rl2_trainer] epoch #386 | Computing loss after +2025-04-03 10:27:49 | [rl2_trainer] epoch #386 | Saving snapshot... +2025-04-03 10:27:49 | [rl2_trainer] epoch #386 | Saved +2025-04-03 10:27:49 | [rl2_trainer] epoch #386 | Time 74149.82 s +2025-04-03 10:27:49 | [rl2_trainer] epoch #386 | EpochTime 218.22 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -11.2536 +Average/AverageReturn -17.0711 +Average/Iteration 386 +Average/MaxReturn 25.1582 +Average/MinReturn -31.8249 +Average/NumEpisodes 100 +Average/StdReturn 7.42504 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.556162 +TotalEnvSteps 3.87e+06 +__unnamed_task__/AverageDiscountedReturn -11.2536 +__unnamed_task__/AverageReturn -17.0711 +__unnamed_task__/Iteration 386 +__unnamed_task__/MaxReturn 25.1582 +__unnamed_task__/MinReturn -31.8249 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.42504 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.05338 +policy/KL 0.010611 +policy/KLBefore 0 +policy/LossAfter -0.028051 +policy/LossBefore -0.01463 +policy/dLoss 0.0134209 +---------------------------------------- ----------- +2025-04-03 10:32:14 | [rl2_trainer] epoch #387 | Optimizing policy... +2025-04-03 10:32:14 | [rl2_trainer] epoch #387 | Fitting baseline... +2025-04-03 10:32:14 | [rl2_trainer] epoch #387 | Computing loss before +2025-04-03 10:32:15 | [rl2_trainer] epoch #387 | Computing KL before +2025-04-03 10:32:16 | [rl2_trainer] epoch #387 | Optimizing +2025-04-03 10:32:51 | [rl2_trainer] epoch #387 | Computing KL after +2025-04-03 10:32:52 | [rl2_trainer] epoch #387 | Computing loss after +2025-04-03 10:32:53 | [rl2_trainer] epoch #387 | Saving snapshot... +2025-04-03 10:32:53 | [rl2_trainer] epoch #387 | Saved +2025-04-03 10:32:53 | [rl2_trainer] epoch #387 | Time 74453.80 s +2025-04-03 10:32:53 | [rl2_trainer] epoch #387 | EpochTime 303.97 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.1238 +Average/AverageReturn -24.2126 +Average/Iteration 387 +Average/MaxReturn 3.9374 +Average/MinReturn -67.9715 +Average/NumEpisodes 100 +Average/StdReturn 12.0154 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.869556 +TotalEnvSteps 3.88e+06 +__unnamed_task__/AverageDiscountedReturn -15.1238 +__unnamed_task__/AverageReturn -24.2126 +__unnamed_task__/Iteration 387 +__unnamed_task__/MaxReturn 3.9374 +__unnamed_task__/MinReturn -67.9715 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.0154 +__unnamed_task__/TerminationRate 0 +policy/Entropy 2.0106 +policy/KL 0.0146513 +policy/KLBefore 0 +policy/LossAfter -0.0249093 +policy/LossBefore -0.00333443 +policy/dLoss 0.0215749 +---------------------------------------- ------------ +2025-04-03 10:35:13 | [rl2_trainer] epoch #388 | Optimizing policy... +2025-04-03 10:35:14 | [rl2_trainer] epoch #388 | Fitting baseline... +2025-04-03 10:35:14 | [rl2_trainer] epoch #388 | Computing loss before +2025-04-03 10:35:14 | [rl2_trainer] epoch #388 | Computing KL before +2025-04-03 10:35:15 | [rl2_trainer] epoch #388 | Optimizing +2025-04-03 10:35:49 | [rl2_trainer] epoch #388 | Computing KL after +2025-04-03 10:35:49 | [rl2_trainer] epoch #388 | Computing loss after +2025-04-03 10:35:50 | [rl2_trainer] epoch #388 | Saving snapshot... +2025-04-03 10:35:50 | [rl2_trainer] epoch #388 | Saved +2025-04-03 10:35:50 | [rl2_trainer] epoch #388 | Time 74631.29 s +2025-04-03 10:35:50 | [rl2_trainer] epoch #388 | EpochTime 177.50 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -10.5675 +Average/AverageReturn -16.0174 +Average/Iteration 388 +Average/MaxReturn 11.2506 +Average/MinReturn -24.4599 +Average/NumEpisodes 100 +Average/StdReturn 5.35293 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.4157 +TotalEnvSteps 3.89e+06 +__unnamed_task__/AverageDiscountedReturn -10.5675 +__unnamed_task__/AverageReturn -16.0174 +__unnamed_task__/Iteration 388 +__unnamed_task__/MaxReturn 11.2506 +__unnamed_task__/MinReturn -24.4599 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.35293 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.9725 +policy/KL 0.012158 +policy/KLBefore 0 +policy/LossAfter -0.0155662 +policy/LossBefore 0.000373626 +policy/dLoss 0.0159399 +---------------------------------------- ------------- +2025-04-03 10:38:20 | [rl2_trainer] epoch #389 | Optimizing policy... +2025-04-03 10:38:20 | [rl2_trainer] epoch #389 | Fitting baseline... +2025-04-03 10:38:20 | [rl2_trainer] epoch #389 | Computing loss before +2025-04-03 10:38:21 | [rl2_trainer] epoch #389 | Computing KL before +2025-04-03 10:38:21 | [rl2_trainer] epoch #389 | Optimizing +2025-04-03 10:38:56 | [rl2_trainer] epoch #389 | Computing KL after +2025-04-03 10:38:57 | [rl2_trainer] epoch #389 | Computing loss after +2025-04-03 10:38:58 | [rl2_trainer] epoch #389 | Saving snapshot... +2025-04-03 10:38:58 | [rl2_trainer] epoch #389 | Saved +2025-04-03 10:38:58 | [rl2_trainer] epoch #389 | Time 74818.62 s +2025-04-03 10:38:58 | [rl2_trainer] epoch #389 | EpochTime 187.32 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.0003 +Average/AverageReturn -12.797 +Average/Iteration 389 +Average/MaxReturn 12.9776 +Average/MinReturn -53.7092 +Average/NumEpisodes 100 +Average/StdReturn 8.01001 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.361 +TotalEnvSteps 3.9e+06 +__unnamed_task__/AverageDiscountedReturn -9.0003 +__unnamed_task__/AverageReturn -12.797 +__unnamed_task__/Iteration 389 +__unnamed_task__/MaxReturn 12.9776 +__unnamed_task__/MinReturn -53.7092 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.01001 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.94282 +policy/KL 0.0152899 +policy/KLBefore 0 +policy/LossAfter -0.0191729 +policy/LossBefore 0.00620069 +policy/dLoss 0.0253736 +---------------------------------------- ------------ +2025-04-03 10:40:26 | [rl2_trainer] epoch #390 | Optimizing policy... +2025-04-03 10:40:26 | [rl2_trainer] epoch #390 | Fitting baseline... +2025-04-03 10:40:26 | [rl2_trainer] epoch #390 | Computing loss before +2025-04-03 10:40:27 | [rl2_trainer] epoch #390 | Computing KL before +2025-04-03 10:40:27 | [rl2_trainer] epoch #390 | Optimizing +2025-04-03 10:41:03 | [rl2_trainer] epoch #390 | Computing KL after +2025-04-03 10:41:04 | [rl2_trainer] epoch #390 | Computing loss after +2025-04-03 10:41:05 | [rl2_trainer] epoch #390 | Saving snapshot... +2025-04-03 10:41:05 | [rl2_trainer] epoch #390 | Saved +2025-04-03 10:41:05 | [rl2_trainer] epoch #390 | Time 74945.71 s +2025-04-03 10:41:05 | [rl2_trainer] epoch #390 | EpochTime 127.09 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.90323 +Average/AverageReturn -13.2596 +Average/Iteration 390 +Average/MaxReturn 18.1914 +Average/MinReturn -25.1098 +Average/NumEpisodes 100 +Average/StdReturn 8.14611 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.480593 +TotalEnvSteps 3.91e+06 +__unnamed_task__/AverageDiscountedReturn -8.90323 +__unnamed_task__/AverageReturn -13.2596 +__unnamed_task__/Iteration 390 +__unnamed_task__/MaxReturn 18.1914 +__unnamed_task__/MinReturn -25.1098 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.14611 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.89369 +policy/KL 0.0178738 +policy/KLBefore 0 +policy/LossAfter -0.0277069 +policy/LossBefore 0.00694978 +policy/dLoss 0.0346566 +---------------------------------------- ------------ +2025-04-03 10:42:32 | [rl2_trainer] epoch #391 | Optimizing policy... +2025-04-03 10:42:32 | [rl2_trainer] epoch #391 | Fitting baseline... +2025-04-03 10:42:32 | [rl2_trainer] epoch #391 | Computing loss before +2025-04-03 10:42:33 | [rl2_trainer] epoch #391 | Computing KL before +2025-04-03 10:42:33 | [rl2_trainer] epoch #391 | Optimizing +2025-04-03 10:43:09 | [rl2_trainer] epoch #391 | Computing KL after +2025-04-03 10:43:09 | [rl2_trainer] epoch #391 | Computing loss after +2025-04-03 10:43:10 | [rl2_trainer] epoch #391 | Saving snapshot... +2025-04-03 10:43:10 | [rl2_trainer] epoch #391 | Saved +2025-04-03 10:43:10 | [rl2_trainer] epoch #391 | Time 75070.97 s +2025-04-03 10:43:10 | [rl2_trainer] epoch #391 | EpochTime 125.25 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.49187 +Average/AverageReturn -13.6558 +Average/Iteration 391 +Average/MaxReturn 22.6705 +Average/MinReturn -35.9058 +Average/NumEpisodes 100 +Average/StdReturn 9.29893 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.607148 +TotalEnvSteps 3.92e+06 +__unnamed_task__/AverageDiscountedReturn -9.49187 +__unnamed_task__/AverageReturn -13.6558 +__unnamed_task__/Iteration 391 +__unnamed_task__/MaxReturn 22.6705 +__unnamed_task__/MinReturn -35.9058 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.29893 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.84981 +policy/KL 0.0196096 +policy/KLBefore 0 +policy/LossAfter -0.0438093 +policy/LossBefore -0.00873435 +policy/dLoss 0.035075 +---------------------------------------- ------------ +2025-04-03 10:46:10 | [rl2_trainer] epoch #392 | Optimizing policy... +2025-04-03 10:46:10 | [rl2_trainer] epoch #392 | Fitting baseline... +2025-04-03 10:46:10 | [rl2_trainer] epoch #392 | Computing loss before +2025-04-03 10:46:11 | [rl2_trainer] epoch #392 | Computing KL before +2025-04-03 10:46:11 | [rl2_trainer] epoch #392 | Optimizing +2025-04-03 10:46:46 | [rl2_trainer] epoch #392 | Computing KL after +2025-04-03 10:46:47 | [rl2_trainer] epoch #392 | Computing loss after +2025-04-03 10:46:48 | [rl2_trainer] epoch #392 | Saving snapshot... +2025-04-03 10:46:48 | [rl2_trainer] epoch #392 | Saved +2025-04-03 10:46:48 | [rl2_trainer] epoch #392 | Time 75288.67 s +2025-04-03 10:46:48 | [rl2_trainer] epoch #392 | EpochTime 217.69 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.1432 +Average/AverageReturn -26.07 +Average/Iteration 392 +Average/MaxReturn -5.38366 +Average/MinReturn -52.8162 +Average/NumEpisodes 100 +Average/StdReturn 10.6631 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.84911 +TotalEnvSteps 3.93e+06 +__unnamed_task__/AverageDiscountedReturn -16.1432 +__unnamed_task__/AverageReturn -26.07 +__unnamed_task__/Iteration 392 +__unnamed_task__/MaxReturn -5.38366 +__unnamed_task__/MinReturn -52.8162 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.6631 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.83147 +policy/KL 0.0149537 +policy/KLBefore 0 +policy/LossAfter -0.0197349 +policy/LossBefore 0.00761649 +policy/dLoss 0.0273514 +---------------------------------------- ------------ +2025-04-03 10:49:44 | [rl2_trainer] epoch #393 | Optimizing policy... +2025-04-03 10:49:44 | [rl2_trainer] epoch #393 | Fitting baseline... +2025-04-03 10:49:44 | [rl2_trainer] epoch #393 | Computing loss before +2025-04-03 10:49:45 | [rl2_trainer] epoch #393 | Computing KL before +2025-04-03 10:49:45 | [rl2_trainer] epoch #393 | Optimizing +2025-04-03 10:50:20 | [rl2_trainer] epoch #393 | Computing KL after +2025-04-03 10:50:21 | [rl2_trainer] epoch #393 | Computing loss after +2025-04-03 10:50:22 | [rl2_trainer] epoch #393 | Saving snapshot... +2025-04-03 10:50:22 | [rl2_trainer] epoch #393 | Saved +2025-04-03 10:50:22 | [rl2_trainer] epoch #393 | Time 75502.63 s +2025-04-03 10:50:22 | [rl2_trainer] epoch #393 | EpochTime 213.96 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.5404 +Average/AverageReturn -26.5254 +Average/Iteration 393 +Average/MaxReturn 1.87239 +Average/MinReturn -94.6691 +Average/NumEpisodes 100 +Average/StdReturn 13.657 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.821788 +TotalEnvSteps 3.94e+06 +__unnamed_task__/AverageDiscountedReturn -16.5404 +__unnamed_task__/AverageReturn -26.5254 +__unnamed_task__/Iteration 393 +__unnamed_task__/MaxReturn 1.87239 +__unnamed_task__/MinReturn -94.6691 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.657 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.83089 +policy/KL 0.0159249 +policy/KLBefore 0 +policy/LossAfter -0.0562303 +policy/LossBefore -0.00218244 +policy/dLoss 0.0540479 +---------------------------------------- ------------ +2025-04-03 10:51:49 | [rl2_trainer] epoch #394 | Optimizing policy... +2025-04-03 10:51:49 | [rl2_trainer] epoch #394 | Fitting baseline... +2025-04-03 10:51:49 | [rl2_trainer] epoch #394 | Computing loss before +2025-04-03 10:51:50 | [rl2_trainer] epoch #394 | Computing KL before +2025-04-03 10:51:51 | [rl2_trainer] epoch #394 | Optimizing +2025-04-03 10:52:25 | [rl2_trainer] epoch #394 | Computing KL after +2025-04-03 10:52:26 | [rl2_trainer] epoch #394 | Computing loss after +2025-04-03 10:52:27 | [rl2_trainer] epoch #394 | Saving snapshot... +2025-04-03 10:52:27 | [rl2_trainer] epoch #394 | Saved +2025-04-03 10:52:27 | [rl2_trainer] epoch #394 | Time 75627.95 s +2025-04-03 10:52:27 | [rl2_trainer] epoch #394 | EpochTime 125.32 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -7.62268 +Average/AverageReturn -10.9557 +Average/Iteration 394 +Average/MaxReturn 49.3752 +Average/MinReturn -110.563 +Average/NumEpisodes 100 +Average/StdReturn 16.9669 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.550554 +TotalEnvSteps 3.95e+06 +__unnamed_task__/AverageDiscountedReturn -7.62268 +__unnamed_task__/AverageReturn -10.9557 +__unnamed_task__/Iteration 394 +__unnamed_task__/MaxReturn 49.3752 +__unnamed_task__/MinReturn -110.563 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 16.9669 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.81372 +policy/KL 0.033349 +policy/KLBefore 0 +policy/LossAfter -0.0603301 +policy/LossBefore -0.0179039 +policy/dLoss 0.0424262 +---------------------------------------- ------------ +2025-04-03 10:55:28 | [rl2_trainer] epoch #395 | Optimizing policy... +2025-04-03 10:55:29 | [rl2_trainer] epoch #395 | Fitting baseline... +2025-04-03 10:55:29 | [rl2_trainer] epoch #395 | Computing loss before +2025-04-03 10:55:29 | [rl2_trainer] epoch #395 | Computing KL before +2025-04-03 10:55:30 | [rl2_trainer] epoch #395 | Optimizing +2025-04-03 10:56:06 | [rl2_trainer] epoch #395 | Computing KL after +2025-04-03 10:56:06 | [rl2_trainer] epoch #395 | Computing loss after +2025-04-03 10:56:07 | [rl2_trainer] epoch #395 | Saving snapshot... +2025-04-03 10:56:07 | [rl2_trainer] epoch #395 | Saved +2025-04-03 10:56:07 | [rl2_trainer] epoch #395 | Time 75848.06 s +2025-04-03 10:56:07 | [rl2_trainer] epoch #395 | EpochTime 220.11 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.6406 +Average/AverageReturn -17.4282 +Average/Iteration 395 +Average/MaxReturn 2.34576 +Average/MinReturn -40.2929 +Average/NumEpisodes 100 +Average/StdReturn 5.3829 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.505078 +TotalEnvSteps 3.96e+06 +__unnamed_task__/AverageDiscountedReturn -11.6406 +__unnamed_task__/AverageReturn -17.4282 +__unnamed_task__/Iteration 395 +__unnamed_task__/MaxReturn 2.34576 +__unnamed_task__/MinReturn -40.2929 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.3829 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.80563 +policy/KL 0.0288106 +policy/KLBefore 0 +policy/LossAfter -0.00255087 +policy/LossBefore -0.00434414 +policy/dLoss -0.00179327 +---------------------------------------- ------------ +2025-04-03 10:59:08 | [rl2_trainer] epoch #396 | Optimizing policy... +2025-04-03 10:59:08 | [rl2_trainer] epoch #396 | Fitting baseline... +2025-04-03 10:59:08 | [rl2_trainer] epoch #396 | Computing loss before +2025-04-03 10:59:09 | [rl2_trainer] epoch #396 | Computing KL before +2025-04-03 10:59:09 | [rl2_trainer] epoch #396 | Optimizing +2025-04-03 10:59:45 | [rl2_trainer] epoch #396 | Computing KL after +2025-04-03 10:59:45 | [rl2_trainer] epoch #396 | Computing loss after +2025-04-03 10:59:46 | [rl2_trainer] epoch #396 | Saving snapshot... +2025-04-03 10:59:46 | [rl2_trainer] epoch #396 | Saved +2025-04-03 10:59:46 | [rl2_trainer] epoch #396 | Time 76067.32 s +2025-04-03 10:59:46 | [rl2_trainer] epoch #396 | EpochTime 219.25 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.5411 +Average/AverageReturn -17.3328 +Average/Iteration 396 +Average/MaxReturn -4.59937 +Average/MinReturn -32.3851 +Average/NumEpisodes 100 +Average/StdReturn 4.91511 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.544723 +TotalEnvSteps 3.97e+06 +__unnamed_task__/AverageDiscountedReturn -11.5411 +__unnamed_task__/AverageReturn -17.3328 +__unnamed_task__/Iteration 396 +__unnamed_task__/MaxReturn -4.59937 +__unnamed_task__/MinReturn -32.3851 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.91511 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.79098 +policy/KL 0.0112573 +policy/KLBefore 0 +policy/LossAfter -0.00843922 +policy/LossBefore 0.00140165 +policy/dLoss 0.00984088 +---------------------------------------- ------------ +2025-04-03 11:02:51 | [rl2_trainer] epoch #397 | Optimizing policy... +2025-04-03 11:02:51 | [rl2_trainer] epoch #397 | Fitting baseline... +2025-04-03 11:02:51 | [rl2_trainer] epoch #397 | Computing loss before +2025-04-03 11:02:52 | [rl2_trainer] epoch #397 | Computing KL before +2025-04-03 11:02:53 | [rl2_trainer] epoch #397 | Optimizing +2025-04-03 11:03:26 | [rl2_trainer] epoch #397 | Computing KL after +2025-04-03 11:03:27 | [rl2_trainer] epoch #397 | Computing loss after +2025-04-03 11:03:28 | [rl2_trainer] epoch #397 | Saving snapshot... +2025-04-03 11:03:28 | [rl2_trainer] epoch #397 | Saved +2025-04-03 11:03:28 | [rl2_trainer] epoch #397 | Time 76289.00 s +2025-04-03 11:03:28 | [rl2_trainer] epoch #397 | EpochTime 221.68 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -10.7667 +Average/AverageReturn -16.0622 +Average/Iteration 397 +Average/MaxReturn 19.5014 +Average/MinReturn -32.8047 +Average/NumEpisodes 100 +Average/StdReturn 6.36567 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.570616 +TotalEnvSteps 3.98e+06 +__unnamed_task__/AverageDiscountedReturn -10.7667 +__unnamed_task__/AverageReturn -16.0622 +__unnamed_task__/Iteration 397 +__unnamed_task__/MaxReturn 19.5014 +__unnamed_task__/MinReturn -32.8047 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.36567 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.77301 +policy/KL 0.0100502 +policy/KLBefore 0 +policy/LossAfter -0.0174616 +policy/LossBefore -0.000659288 +policy/dLoss 0.0168023 +---------------------------------------- ------------- +2025-04-03 11:05:43 | [rl2_trainer] epoch #398 | Optimizing policy... +2025-04-03 11:05:43 | [rl2_trainer] epoch #398 | Fitting baseline... +2025-04-03 11:05:43 | [rl2_trainer] epoch #398 | Computing loss before +2025-04-03 11:05:44 | [rl2_trainer] epoch #398 | Computing KL before +2025-04-03 11:05:44 | [rl2_trainer] epoch #398 | Optimizing +2025-04-03 11:06:21 | [rl2_trainer] epoch #398 | Computing KL after +2025-04-03 11:06:21 | [rl2_trainer] epoch #398 | Computing loss after +2025-04-03 11:06:22 | [rl2_trainer] epoch #398 | Saving snapshot... +2025-04-03 11:06:22 | [rl2_trainer] epoch #398 | Saved +2025-04-03 11:06:22 | [rl2_trainer] epoch #398 | Time 76463.05 s +2025-04-03 11:06:22 | [rl2_trainer] epoch #398 | EpochTime 174.04 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8711 +Average/AverageReturn -26.6649 +Average/Iteration 398 +Average/MaxReturn -4.14524 +Average/MinReturn -58.1819 +Average/NumEpisodes 100 +Average/StdReturn 12.2929 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.854715 +TotalEnvSteps 3.99e+06 +__unnamed_task__/AverageDiscountedReturn -16.8711 +__unnamed_task__/AverageReturn -26.6649 +__unnamed_task__/Iteration 398 +__unnamed_task__/MaxReturn -4.14524 +__unnamed_task__/MinReturn -58.1819 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.2929 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.76193 +policy/KL 0.0176427 +policy/KLBefore 0 +policy/LossAfter -0.043503 +policy/LossBefore -0.000803784 +policy/dLoss 0.0426992 +---------------------------------------- ------------- +2025-04-03 11:09:24 | [rl2_trainer] epoch #399 | Optimizing policy... +2025-04-03 11:09:25 | [rl2_trainer] epoch #399 | Fitting baseline... +2025-04-03 11:09:25 | [rl2_trainer] epoch #399 | Computing loss before +2025-04-03 11:09:25 | [rl2_trainer] epoch #399 | Computing KL before +2025-04-03 11:09:26 | [rl2_trainer] epoch #399 | Optimizing +2025-04-03 11:10:02 | [rl2_trainer] epoch #399 | Computing KL after +2025-04-03 11:10:03 | [rl2_trainer] epoch #399 | Computing loss after +2025-04-03 11:10:04 | [rl2_trainer] epoch #399 | Saving snapshot... +2025-04-03 11:10:04 | [rl2_trainer] epoch #399 | Saved +2025-04-03 11:10:04 | [rl2_trainer] epoch #399 | Time 76684.54 s +2025-04-03 11:10:04 | [rl2_trainer] epoch #399 | EpochTime 221.49 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.5839 +Average/AverageReturn -15.6138 +Average/Iteration 399 +Average/MaxReturn 16.0281 +Average/MinReturn -32.7488 +Average/NumEpisodes 100 +Average/StdReturn 7.44593 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.559777 +TotalEnvSteps 4e+06 +__unnamed_task__/AverageDiscountedReturn -10.5839 +__unnamed_task__/AverageReturn -15.6138 +__unnamed_task__/Iteration 399 +__unnamed_task__/MaxReturn 16.0281 +__unnamed_task__/MinReturn -32.7488 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.44593 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.74467 +policy/KL 0.0146333 +policy/KLBefore 0 +policy/LossAfter -0.0195674 +policy/LossBefore 0.00297692 +policy/dLoss 0.0225443 +---------------------------------------- ------------ +2025-04-03 11:13:03 | [rl2_trainer] epoch #400 | Optimizing policy... +2025-04-03 11:13:04 | [rl2_trainer] epoch #400 | Fitting baseline... +2025-04-03 11:13:04 | [rl2_trainer] epoch #400 | Computing loss before +2025-04-03 11:13:04 | [rl2_trainer] epoch #400 | Computing KL before +2025-04-03 11:13:05 | [rl2_trainer] epoch #400 | Optimizing +2025-04-03 11:13:40 | [rl2_trainer] epoch #400 | Computing KL after +2025-04-03 11:13:41 | [rl2_trainer] epoch #400 | Computing loss after +2025-04-03 11:13:41 | [rl2_trainer] epoch #400 | Saving snapshot... +2025-04-03 11:13:41 | [rl2_trainer] epoch #400 | Saved +2025-04-03 11:13:41 | [rl2_trainer] epoch #400 | Time 76902.50 s +2025-04-03 11:13:41 | [rl2_trainer] epoch #400 | EpochTime 217.95 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.4912 +Average/AverageReturn -15.5858 +Average/Iteration 400 +Average/MaxReturn -0.161631 +Average/MinReturn -30.48 +Average/NumEpisodes 100 +Average/StdReturn 4.93062 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.559298 +TotalEnvSteps 4.01e+06 +__unnamed_task__/AverageDiscountedReturn -10.4912 +__unnamed_task__/AverageReturn -15.5858 +__unnamed_task__/Iteration 400 +__unnamed_task__/MaxReturn -0.161631 +__unnamed_task__/MinReturn -30.48 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.93062 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.72928 +policy/KL 0.0109234 +policy/KLBefore 0 +policy/LossAfter -0.0136506 +policy/LossBefore -0.00315544 +policy/dLoss 0.0104951 +---------------------------------------- ------------ +2025-04-03 11:17:42 | [rl2_trainer] epoch #401 | Optimizing policy... +2025-04-03 11:17:42 | [rl2_trainer] epoch #401 | Fitting baseline... +2025-04-03 11:17:42 | [rl2_trainer] epoch #401 | Computing loss before +2025-04-03 11:17:43 | [rl2_trainer] epoch #401 | Computing KL before +2025-04-03 11:17:43 | [rl2_trainer] epoch #401 | Optimizing +2025-04-03 11:18:19 | [rl2_trainer] epoch #401 | Computing KL after +2025-04-03 11:18:19 | [rl2_trainer] epoch #401 | Computing loss after +2025-04-03 11:18:20 | [rl2_trainer] epoch #401 | Saving snapshot... +2025-04-03 11:18:20 | [rl2_trainer] epoch #401 | Saved +2025-04-03 11:18:20 | [rl2_trainer] epoch #401 | Time 77181.31 s +2025-04-03 11:18:20 | [rl2_trainer] epoch #401 | EpochTime 278.81 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3033 +Average/AverageReturn -24.4986 +Average/Iteration 401 +Average/MaxReturn 38.7467 +Average/MinReturn -78.6652 +Average/NumEpisodes 100 +Average/StdReturn 16.1878 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.835796 +TotalEnvSteps 4.02e+06 +__unnamed_task__/AverageDiscountedReturn -15.3033 +__unnamed_task__/AverageReturn -24.4986 +__unnamed_task__/Iteration 401 +__unnamed_task__/MaxReturn 38.7467 +__unnamed_task__/MinReturn -78.6652 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 16.1878 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.71025 +policy/KL 0.0197092 +policy/KLBefore 0 +policy/LossAfter -0.0491892 +policy/LossBefore 0.00225047 +policy/dLoss 0.0514397 +---------------------------------------- ------------ +2025-04-03 11:21:35 | [rl2_trainer] epoch #402 | Optimizing policy... +2025-04-03 11:21:35 | [rl2_trainer] epoch #402 | Fitting baseline... +2025-04-03 11:21:35 | [rl2_trainer] epoch #402 | Computing loss before +2025-04-03 11:21:36 | [rl2_trainer] epoch #402 | Computing KL before +2025-04-03 11:21:37 | [rl2_trainer] epoch #402 | Optimizing +2025-04-03 11:22:12 | [rl2_trainer] epoch #402 | Computing KL after +2025-04-03 11:22:13 | [rl2_trainer] epoch #402 | Computing loss after +2025-04-03 11:22:14 | [rl2_trainer] epoch #402 | Saving snapshot... +2025-04-03 11:22:14 | [rl2_trainer] epoch #402 | Saved +2025-04-03 11:22:14 | [rl2_trainer] epoch #402 | Time 77414.81 s +2025-04-03 11:22:14 | [rl2_trainer] epoch #402 | EpochTime 233.50 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -17.4045 +Average/AverageReturn -27.4185 +Average/Iteration 402 +Average/MaxReturn 4.65033 +Average/MinReturn -57.9376 +Average/NumEpisodes 100 +Average/StdReturn 13.1561 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.848749 +TotalEnvSteps 4.03e+06 +__unnamed_task__/AverageDiscountedReturn -17.4045 +__unnamed_task__/AverageReturn -27.4185 +__unnamed_task__/Iteration 402 +__unnamed_task__/MaxReturn 4.65033 +__unnamed_task__/MinReturn -57.9376 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1561 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.68372 +policy/KL 0.017721 +policy/KLBefore 0 +policy/LossAfter -0.0412278 +policy/LossBefore -0.0128922 +policy/dLoss 0.0283356 +---------------------------------------- ----------- +2025-04-03 11:24:43 | [rl2_trainer] epoch #403 | Optimizing policy... +2025-04-03 11:24:44 | [rl2_trainer] epoch #403 | Fitting baseline... +2025-04-03 11:24:44 | [rl2_trainer] epoch #403 | Computing loss before +2025-04-03 11:24:44 | [rl2_trainer] epoch #403 | Computing KL before +2025-04-03 11:24:45 | [rl2_trainer] epoch #403 | Optimizing +2025-04-03 11:25:21 | [rl2_trainer] epoch #403 | Computing KL after +2025-04-03 11:25:21 | [rl2_trainer] epoch #403 | Computing loss after +2025-04-03 11:25:22 | [rl2_trainer] epoch #403 | Saving snapshot... +2025-04-03 11:25:22 | [rl2_trainer] epoch #403 | Saved +2025-04-03 11:25:22 | [rl2_trainer] epoch #403 | Time 77603.45 s +2025-04-03 11:25:22 | [rl2_trainer] epoch #403 | EpochTime 188.63 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.7823 +Average/AverageReturn -16.1931 +Average/Iteration 403 +Average/MaxReturn 3.07802 +Average/MinReturn -23.0523 +Average/NumEpisodes 100 +Average/StdReturn 3.69485 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.372903 +TotalEnvSteps 4.04e+06 +__unnamed_task__/AverageDiscountedReturn -10.7823 +__unnamed_task__/AverageReturn -16.1931 +__unnamed_task__/Iteration 403 +__unnamed_task__/MaxReturn 3.07802 +__unnamed_task__/MinReturn -23.0523 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 3.69485 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.65569 +policy/KL 0.0128294 +policy/KLBefore 0 +policy/LossAfter -0.00885698 +policy/LossBefore 0.00126636 +policy/dLoss 0.0101233 +---------------------------------------- ------------ +2025-04-03 11:27:26 | [rl2_trainer] epoch #404 | Optimizing policy... +2025-04-03 11:27:26 | [rl2_trainer] epoch #404 | Fitting baseline... +2025-04-03 11:27:26 | [rl2_trainer] epoch #404 | Computing loss before +2025-04-03 11:27:26 | [rl2_trainer] epoch #404 | Computing KL before +2025-04-03 11:27:27 | [rl2_trainer] epoch #404 | Optimizing +2025-04-03 11:28:02 | [rl2_trainer] epoch #404 | Computing KL after +2025-04-03 11:28:03 | [rl2_trainer] epoch #404 | Computing loss after +2025-04-03 11:28:04 | [rl2_trainer] epoch #404 | Saving snapshot... +2025-04-03 11:28:04 | [rl2_trainer] epoch #404 | Saved +2025-04-03 11:28:04 | [rl2_trainer] epoch #404 | Time 77764.84 s +2025-04-03 11:28:04 | [rl2_trainer] epoch #404 | EpochTime 161.39 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.2789 +Average/AverageReturn -26.1253 +Average/Iteration 404 +Average/MaxReturn 0.616953 +Average/MinReturn -55.8575 +Average/NumEpisodes 100 +Average/StdReturn 11.4259 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.822114 +TotalEnvSteps 4.05e+06 +__unnamed_task__/AverageDiscountedReturn -16.2789 +__unnamed_task__/AverageReturn -26.1253 +__unnamed_task__/Iteration 404 +__unnamed_task__/MaxReturn 0.616953 +__unnamed_task__/MinReturn -55.8575 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.4259 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.63293 +policy/KL 0.0133077 +policy/KLBefore 0 +policy/LossAfter -0.0341183 +policy/LossBefore 0.00392036 +policy/dLoss 0.0380386 +---------------------------------------- ------------ +2025-04-03 11:31:05 | [rl2_trainer] epoch #405 | Optimizing policy... +2025-04-03 11:31:05 | [rl2_trainer] epoch #405 | Fitting baseline... +2025-04-03 11:31:05 | [rl2_trainer] epoch #405 | Computing loss before +2025-04-03 11:31:06 | [rl2_trainer] epoch #405 | Computing KL before +2025-04-03 11:31:06 | [rl2_trainer] epoch #405 | Optimizing +2025-04-03 11:31:42 | [rl2_trainer] epoch #405 | Computing KL after +2025-04-03 11:31:43 | [rl2_trainer] epoch #405 | Computing loss after +2025-04-03 11:31:44 | [rl2_trainer] epoch #405 | Saving snapshot... +2025-04-03 11:31:44 | [rl2_trainer] epoch #405 | Saved +2025-04-03 11:31:44 | [rl2_trainer] epoch #405 | Time 77984.99 s +2025-04-03 11:31:44 | [rl2_trainer] epoch #405 | EpochTime 220.14 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.0879 +Average/AverageReturn -16.4883 +Average/Iteration 405 +Average/MaxReturn 7.66169 +Average/MinReturn -33.382 +Average/NumEpisodes 100 +Average/StdReturn 6.0137 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.372299 +TotalEnvSteps 4.06e+06 +__unnamed_task__/AverageDiscountedReturn -11.0879 +__unnamed_task__/AverageReturn -16.4883 +__unnamed_task__/Iteration 405 +__unnamed_task__/MaxReturn 7.66169 +__unnamed_task__/MinReturn -33.382 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.0137 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.59958 +policy/KL 0.0177077 +policy/KLBefore 0 +policy/LossAfter -0.0115009 +policy/LossBefore -0.000963008 +policy/dLoss 0.0105379 +---------------------------------------- ------------- +2025-04-03 11:35:33 | [rl2_trainer] epoch #406 | Optimizing policy... +2025-04-03 11:35:33 | [rl2_trainer] epoch #406 | Fitting baseline... +2025-04-03 11:35:33 | [rl2_trainer] epoch #406 | Computing loss before +2025-04-03 11:35:34 | [rl2_trainer] epoch #406 | Computing KL before +2025-04-03 11:35:34 | [rl2_trainer] epoch #406 | Optimizing +2025-04-03 11:36:10 | [rl2_trainer] epoch #406 | Computing KL after +2025-04-03 11:36:10 | [rl2_trainer] epoch #406 | Computing loss after +2025-04-03 11:36:11 | [rl2_trainer] epoch #406 | Saving snapshot... +2025-04-03 11:36:11 | [rl2_trainer] epoch #406 | Saved +2025-04-03 11:36:11 | [rl2_trainer] epoch #406 | Time 78252.40 s +2025-04-03 11:36:11 | [rl2_trainer] epoch #406 | EpochTime 267.41 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.0842 +Average/AverageReturn -25.8217 +Average/Iteration 406 +Average/MaxReturn 1.75796 +Average/MinReturn -65.7484 +Average/NumEpisodes 100 +Average/StdReturn 13.4177 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.888758 +TotalEnvSteps 4.07e+06 +__unnamed_task__/AverageDiscountedReturn -16.0842 +__unnamed_task__/AverageReturn -25.8217 +__unnamed_task__/Iteration 406 +__unnamed_task__/MaxReturn 1.75796 +__unnamed_task__/MinReturn -65.7484 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.4177 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.57863 +policy/KL 0.0184415 +policy/KLBefore 0 +policy/LossAfter -0.0220014 +policy/LossBefore 0.00243566 +policy/dLoss 0.0244371 +---------------------------------------- ------------ +2025-04-03 11:39:12 | [rl2_trainer] epoch #407 | Optimizing policy... +2025-04-03 11:39:12 | [rl2_trainer] epoch #407 | Fitting baseline... +2025-04-03 11:39:12 | [rl2_trainer] epoch #407 | Computing loss before +2025-04-03 11:39:13 | [rl2_trainer] epoch #407 | Computing KL before +2025-04-03 11:39:14 | [rl2_trainer] epoch #407 | Optimizing +2025-04-03 11:39:49 | [rl2_trainer] epoch #407 | Computing KL after +2025-04-03 11:39:50 | [rl2_trainer] epoch #407 | Computing loss after +2025-04-03 11:39:51 | [rl2_trainer] epoch #407 | Saving snapshot... +2025-04-03 11:39:51 | [rl2_trainer] epoch #407 | Saved +2025-04-03 11:39:51 | [rl2_trainer] epoch #407 | Time 78472.00 s +2025-04-03 11:39:51 | [rl2_trainer] epoch #407 | EpochTime 219.59 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.87449 +Average/AverageReturn -14.3097 +Average/Iteration 407 +Average/MaxReturn 9.86376 +Average/MinReturn -34.5204 +Average/NumEpisodes 100 +Average/StdReturn 6.3229 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.468297 +TotalEnvSteps 4.08e+06 +__unnamed_task__/AverageDiscountedReturn -9.87449 +__unnamed_task__/AverageReturn -14.3097 +__unnamed_task__/Iteration 407 +__unnamed_task__/MaxReturn 9.86376 +__unnamed_task__/MinReturn -34.5204 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.3229 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.5559 +policy/KL 0.0141749 +policy/KLBefore 0 +policy/LossAfter -0.0123367 +policy/LossBefore 0.00214543 +policy/dLoss 0.0144822 +---------------------------------------- ------------ +2025-04-03 11:42:46 | [rl2_trainer] epoch #408 | Optimizing policy... +2025-04-03 11:42:47 | [rl2_trainer] epoch #408 | Fitting baseline... +2025-04-03 11:42:47 | [rl2_trainer] epoch #408 | Computing loss before +2025-04-03 11:42:47 | [rl2_trainer] epoch #408 | Computing KL before +2025-04-03 11:42:48 | [rl2_trainer] epoch #408 | Optimizing +2025-04-03 11:43:23 | [rl2_trainer] epoch #408 | Computing KL after +2025-04-03 11:43:24 | [rl2_trainer] epoch #408 | Computing loss after +2025-04-03 11:43:25 | [rl2_trainer] epoch #408 | Saving snapshot... +2025-04-03 11:43:25 | [rl2_trainer] epoch #408 | Saved +2025-04-03 11:43:25 | [rl2_trainer] epoch #408 | Time 78685.83 s +2025-04-03 11:43:25 | [rl2_trainer] epoch #408 | EpochTime 213.83 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.8169 +Average/AverageReturn -25.4417 +Average/Iteration 408 +Average/MaxReturn 2.59772 +Average/MinReturn -60.9402 +Average/NumEpisodes 100 +Average/StdReturn 11.34 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.826689 +TotalEnvSteps 4.09e+06 +__unnamed_task__/AverageDiscountedReturn -15.8169 +__unnamed_task__/AverageReturn -25.4417 +__unnamed_task__/Iteration 408 +__unnamed_task__/MaxReturn 2.59772 +__unnamed_task__/MinReturn -60.9402 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.34 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.52934 +policy/KL 0.0130433 +policy/KLBefore 0 +policy/LossAfter -0.0402629 +policy/LossBefore -0.006226 +policy/dLoss 0.0340369 +---------------------------------------- ----------- +2025-04-03 11:45:44 | [rl2_trainer] epoch #409 | Optimizing policy... +2025-04-03 11:45:44 | [rl2_trainer] epoch #409 | Fitting baseline... +2025-04-03 11:45:44 | [rl2_trainer] epoch #409 | Computing loss before +2025-04-03 11:45:45 | [rl2_trainer] epoch #409 | Computing KL before +2025-04-03 11:45:45 | [rl2_trainer] epoch #409 | Optimizing +2025-04-03 11:46:21 | [rl2_trainer] epoch #409 | Computing KL after +2025-04-03 11:46:22 | [rl2_trainer] epoch #409 | Computing loss after +2025-04-03 11:46:23 | [rl2_trainer] epoch #409 | Saving snapshot... +2025-04-03 11:46:23 | [rl2_trainer] epoch #409 | Saved +2025-04-03 11:46:23 | [rl2_trainer] epoch #409 | Time 78863.65 s +2025-04-03 11:46:23 | [rl2_trainer] epoch #409 | EpochTime 177.82 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.1841 +Average/AverageReturn -27.6034 +Average/Iteration 409 +Average/MaxReturn 9.30954 +Average/MinReturn -55.5549 +Average/NumEpisodes 100 +Average/StdReturn 12.944 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.859247 +TotalEnvSteps 4.1e+06 +__unnamed_task__/AverageDiscountedReturn -17.1841 +__unnamed_task__/AverageReturn -27.6034 +__unnamed_task__/Iteration 409 +__unnamed_task__/MaxReturn 9.30954 +__unnamed_task__/MinReturn -55.5549 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.944 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.51034 +policy/KL 0.0159279 +policy/KLBefore 0 +policy/LossAfter -0.0468461 +policy/LossBefore -0.00120651 +policy/dLoss 0.0456396 +---------------------------------------- ------------ +2025-04-03 11:48:08 | [rl2_trainer] epoch #410 | Optimizing policy... +2025-04-03 11:48:08 | [rl2_trainer] epoch #410 | Fitting baseline... +2025-04-03 11:48:08 | [rl2_trainer] epoch #410 | Computing loss before +2025-04-03 11:48:09 | [rl2_trainer] epoch #410 | Computing KL before +2025-04-03 11:48:09 | [rl2_trainer] epoch #410 | Optimizing +2025-04-03 11:48:44 | [rl2_trainer] epoch #410 | Computing KL after +2025-04-03 11:48:45 | [rl2_trainer] epoch #410 | Computing loss after +2025-04-03 11:48:45 | [rl2_trainer] epoch #410 | Saving snapshot... +2025-04-03 11:48:45 | [rl2_trainer] epoch #410 | Saved +2025-04-03 11:48:45 | [rl2_trainer] epoch #410 | Time 79006.51 s +2025-04-03 11:48:45 | [rl2_trainer] epoch #410 | EpochTime 142.86 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7611 +Average/AverageReturn -25.1029 +Average/Iteration 410 +Average/MaxReturn 29.8944 +Average/MinReturn -51.417 +Average/NumEpisodes 100 +Average/StdReturn 12.5225 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.782007 +TotalEnvSteps 4.11e+06 +__unnamed_task__/AverageDiscountedReturn -15.7611 +__unnamed_task__/AverageReturn -25.1029 +__unnamed_task__/Iteration 410 +__unnamed_task__/MaxReturn 29.8944 +__unnamed_task__/MinReturn -51.417 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.5225 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.48573 +policy/KL 0.0175876 +policy/KLBefore 0 +policy/LossAfter -0.0369278 +policy/LossBefore -0.000662251 +policy/dLoss 0.0362655 +---------------------------------------- ------------- +2025-04-03 11:51:51 | [rl2_trainer] epoch #411 | Optimizing policy... +2025-04-03 11:51:51 | [rl2_trainer] epoch #411 | Fitting baseline... +2025-04-03 11:51:51 | [rl2_trainer] epoch #411 | Computing loss before +2025-04-03 11:51:51 | [rl2_trainer] epoch #411 | Computing KL before +2025-04-03 11:51:52 | [rl2_trainer] epoch #411 | Optimizing +2025-04-03 11:52:29 | [rl2_trainer] epoch #411 | Computing KL after +2025-04-03 11:52:29 | [rl2_trainer] epoch #411 | Computing loss after +2025-04-03 11:52:30 | [rl2_trainer] epoch #411 | Saving snapshot... +2025-04-03 11:52:30 | [rl2_trainer] epoch #411 | Saved +2025-04-03 11:52:30 | [rl2_trainer] epoch #411 | Time 79230.97 s +2025-04-03 11:52:30 | [rl2_trainer] epoch #411 | EpochTime 224.46 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.8196 +Average/AverageReturn -25.2353 +Average/Iteration 411 +Average/MaxReturn -9.84361 +Average/MinReturn -55.2464 +Average/NumEpisodes 100 +Average/StdReturn 11.9064 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.89644 +TotalEnvSteps 4.12e+06 +__unnamed_task__/AverageDiscountedReturn -15.8196 +__unnamed_task__/AverageReturn -25.2353 +__unnamed_task__/Iteration 411 +__unnamed_task__/MaxReturn -9.84361 +__unnamed_task__/MinReturn -55.2464 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.9064 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.45953 +policy/KL 0.0152198 +policy/KLBefore 0 +policy/LossAfter -0.0286173 +policy/LossBefore -0.00626997 +policy/dLoss 0.0223474 +---------------------------------------- ------------ +2025-04-03 11:54:48 | [rl2_trainer] epoch #412 | Optimizing policy... +2025-04-03 11:54:49 | [rl2_trainer] epoch #412 | Fitting baseline... +2025-04-03 11:54:49 | [rl2_trainer] epoch #412 | Computing loss before +2025-04-03 11:54:49 | [rl2_trainer] epoch #412 | Computing KL before +2025-04-03 11:54:50 | [rl2_trainer] epoch #412 | Optimizing +2025-04-03 11:55:24 | [rl2_trainer] epoch #412 | Computing KL after +2025-04-03 11:55:25 | [rl2_trainer] epoch #412 | Computing loss after +2025-04-03 11:55:26 | [rl2_trainer] epoch #412 | Saving snapshot... +2025-04-03 11:55:26 | [rl2_trainer] epoch #412 | Saved +2025-04-03 11:55:26 | [rl2_trainer] epoch #412 | Time 79406.80 s +2025-04-03 11:55:26 | [rl2_trainer] epoch #412 | EpochTime 175.82 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.4486 +Average/AverageReturn -15.7966 +Average/Iteration 412 +Average/MaxReturn 24.5044 +Average/MinReturn -29.0993 +Average/NumEpisodes 100 +Average/StdReturn 7.15117 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.357344 +TotalEnvSteps 4.13e+06 +__unnamed_task__/AverageDiscountedReturn -10.4486 +__unnamed_task__/AverageReturn -15.7966 +__unnamed_task__/Iteration 412 +__unnamed_task__/MaxReturn 24.5044 +__unnamed_task__/MinReturn -29.0993 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.15117 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.42834 +policy/KL 0.0133883 +policy/KLBefore 0 +policy/LossAfter -0.0302439 +policy/LossBefore -0.00489998 +policy/dLoss 0.0253439 +---------------------------------------- ------------ +2025-04-03 11:59:09 | [rl2_trainer] epoch #413 | Optimizing policy... +2025-04-03 11:59:09 | [rl2_trainer] epoch #413 | Fitting baseline... +2025-04-03 11:59:09 | [rl2_trainer] epoch #413 | Computing loss before +2025-04-03 11:59:10 | [rl2_trainer] epoch #413 | Computing KL before +2025-04-03 11:59:11 | [rl2_trainer] epoch #413 | Optimizing +2025-04-03 11:59:47 | [rl2_trainer] epoch #413 | Computing KL after +2025-04-03 11:59:47 | [rl2_trainer] epoch #413 | Computing loss after +2025-04-03 11:59:48 | [rl2_trainer] epoch #413 | Saving snapshot... +2025-04-03 11:59:48 | [rl2_trainer] epoch #413 | Saved +2025-04-03 11:59:48 | [rl2_trainer] epoch #413 | Time 79669.21 s +2025-04-03 11:59:48 | [rl2_trainer] epoch #413 | EpochTime 262.41 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.0553 +Average/AverageReturn -23.3348 +Average/Iteration 413 +Average/MaxReturn 21.3605 +Average/MinReturn -64.3531 +Average/NumEpisodes 100 +Average/StdReturn 13.548 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.783244 +TotalEnvSteps 4.14e+06 +__unnamed_task__/AverageDiscountedReturn -15.0553 +__unnamed_task__/AverageReturn -23.3348 +__unnamed_task__/Iteration 413 +__unnamed_task__/MaxReturn 21.3605 +__unnamed_task__/MinReturn -64.3531 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.548 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.42775 +policy/KL 0.0211517 +policy/KLBefore 0 +policy/LossAfter -0.0344004 +policy/LossBefore 0.0187157 +policy/dLoss 0.0531161 +---------------------------------------- ----------- +2025-04-03 12:01:55 | [rl2_trainer] epoch #414 | Optimizing policy... +2025-04-03 12:01:56 | [rl2_trainer] epoch #414 | Fitting baseline... +2025-04-03 12:01:56 | [rl2_trainer] epoch #414 | Computing loss before +2025-04-03 12:01:56 | [rl2_trainer] epoch #414 | Computing KL before +2025-04-03 12:01:57 | [rl2_trainer] epoch #414 | Optimizing +2025-04-03 12:02:33 | [rl2_trainer] epoch #414 | Computing KL after +2025-04-03 12:02:34 | [rl2_trainer] epoch #414 | Computing loss after +2025-04-03 12:02:34 | [rl2_trainer] epoch #414 | Saving snapshot... +2025-04-03 12:02:34 | [rl2_trainer] epoch #414 | Saved +2025-04-03 12:02:34 | [rl2_trainer] epoch #414 | Time 79835.51 s +2025-04-03 12:02:34 | [rl2_trainer] epoch #414 | EpochTime 166.29 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.4301 +Average/AverageReturn -16.466 +Average/Iteration 414 +Average/MaxReturn 16.0188 +Average/MinReturn -34.933 +Average/NumEpisodes 100 +Average/StdReturn 5.83532 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.414717 +TotalEnvSteps 4.15e+06 +__unnamed_task__/AverageDiscountedReturn -11.4301 +__unnamed_task__/AverageReturn -16.466 +__unnamed_task__/Iteration 414 +__unnamed_task__/MaxReturn 16.0188 +__unnamed_task__/MinReturn -34.933 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.83532 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.42019 +policy/KL 0.0116794 +policy/KLBefore 0 +policy/LossAfter -0.026067 +policy/LossBefore -0.00014689 +policy/dLoss 0.0259201 +---------------------------------------- ------------ +2025-04-03 12:05:18 | [rl2_trainer] epoch #415 | Optimizing policy... +2025-04-03 12:05:18 | [rl2_trainer] epoch #415 | Fitting baseline... +2025-04-03 12:05:18 | [rl2_trainer] epoch #415 | Computing loss before +2025-04-03 12:05:19 | [rl2_trainer] epoch #415 | Computing KL before +2025-04-03 12:05:19 | [rl2_trainer] epoch #415 | Optimizing +2025-04-03 12:05:55 | [rl2_trainer] epoch #415 | Computing KL after +2025-04-03 12:05:56 | [rl2_trainer] epoch #415 | Computing loss after +2025-04-03 12:05:57 | [rl2_trainer] epoch #415 | Saving snapshot... +2025-04-03 12:05:57 | [rl2_trainer] epoch #415 | Saved +2025-04-03 12:05:57 | [rl2_trainer] epoch #415 | Time 80038.08 s +2025-04-03 12:05:57 | [rl2_trainer] epoch #415 | EpochTime 202.56 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.5868 +Average/AverageReturn -24.6473 +Average/Iteration 415 +Average/MaxReturn -7.38227 +Average/MinReturn -53.9744 +Average/NumEpisodes 100 +Average/StdReturn 10.8449 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.871047 +TotalEnvSteps 4.16e+06 +__unnamed_task__/AverageDiscountedReturn -15.5868 +__unnamed_task__/AverageReturn -24.6473 +__unnamed_task__/Iteration 415 +__unnamed_task__/MaxReturn -7.38227 +__unnamed_task__/MinReturn -53.9744 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.8449 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.40749 +policy/KL 0.0116247 +policy/KLBefore 0 +policy/LossAfter -0.0341335 +policy/LossBefore -0.00304935 +policy/dLoss 0.0310841 +---------------------------------------- ------------ +2025-04-03 12:07:54 | [rl2_trainer] epoch #416 | Optimizing policy... +2025-04-03 12:07:55 | [rl2_trainer] epoch #416 | Fitting baseline... +2025-04-03 12:07:55 | [rl2_trainer] epoch #416 | Computing loss before +2025-04-03 12:07:55 | [rl2_trainer] epoch #416 | Computing KL before +2025-04-03 12:07:56 | [rl2_trainer] epoch #416 | Optimizing +2025-04-03 12:08:32 | [rl2_trainer] epoch #416 | Computing KL after +2025-04-03 12:08:32 | [rl2_trainer] epoch #416 | Computing loss after +2025-04-03 12:08:33 | [rl2_trainer] epoch #416 | Saving snapshot... +2025-04-03 12:08:33 | [rl2_trainer] epoch #416 | Saved +2025-04-03 12:08:33 | [rl2_trainer] epoch #416 | Time 80194.37 s +2025-04-03 12:08:33 | [rl2_trainer] epoch #416 | EpochTime 156.29 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -9.6365 +Average/AverageReturn -13.8894 +Average/Iteration 416 +Average/MaxReturn 31.6493 +Average/MinReturn -32.7051 +Average/NumEpisodes 100 +Average/StdReturn 9.85396 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.364622 +TotalEnvSteps 4.17e+06 +__unnamed_task__/AverageDiscountedReturn -9.6365 +__unnamed_task__/AverageReturn -13.8894 +__unnamed_task__/Iteration 416 +__unnamed_task__/MaxReturn 31.6493 +__unnamed_task__/MinReturn -32.7051 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.85396 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.39898 +policy/KL 0.0160433 +policy/KLBefore 0 +policy/LossAfter -0.0248624 +policy/LossBefore 0.0141815 +policy/dLoss 0.0390439 +---------------------------------------- ----------- +2025-04-03 12:10:00 | [rl2_trainer] epoch #417 | Optimizing policy... +2025-04-03 12:10:00 | [rl2_trainer] epoch #417 | Fitting baseline... +2025-04-03 12:10:00 | [rl2_trainer] epoch #417 | Computing loss before +2025-04-03 12:10:01 | [rl2_trainer] epoch #417 | Computing KL before +2025-04-03 12:10:01 | [rl2_trainer] epoch #417 | Optimizing +2025-04-03 12:10:38 | [rl2_trainer] epoch #417 | Computing KL after +2025-04-03 12:10:39 | [rl2_trainer] epoch #417 | Computing loss after +2025-04-03 12:10:40 | [rl2_trainer] epoch #417 | Saving snapshot... +2025-04-03 12:10:40 | [rl2_trainer] epoch #417 | Saved +2025-04-03 12:10:40 | [rl2_trainer] epoch #417 | Time 80320.64 s +2025-04-03 12:10:40 | [rl2_trainer] epoch #417 | EpochTime 126.27 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.8572 +Average/AverageReturn -16.2288 +Average/Iteration 417 +Average/MaxReturn 5.76996 +Average/MinReturn -23.6832 +Average/NumEpisodes 100 +Average/StdReturn 3.96186 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.47856 +TotalEnvSteps 4.18e+06 +__unnamed_task__/AverageDiscountedReturn -10.8572 +__unnamed_task__/AverageReturn -16.2288 +__unnamed_task__/Iteration 417 +__unnamed_task__/MaxReturn 5.76996 +__unnamed_task__/MinReturn -23.6832 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 3.96186 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.39061 +policy/KL 0.0142049 +policy/KLBefore 0 +policy/LossAfter -0.0161126 +policy/LossBefore -0.00215207 +policy/dLoss 0.0139606 +---------------------------------------- ------------ +2025-04-03 12:14:24 | [rl2_trainer] epoch #418 | Optimizing policy... +2025-04-03 12:14:24 | [rl2_trainer] epoch #418 | Fitting baseline... +2025-04-03 12:14:24 | [rl2_trainer] epoch #418 | Computing loss before +2025-04-03 12:14:25 | [rl2_trainer] epoch #418 | Computing KL before +2025-04-03 12:14:25 | [rl2_trainer] epoch #418 | Optimizing +2025-04-03 12:15:01 | [rl2_trainer] epoch #418 | Computing KL after +2025-04-03 12:15:02 | [rl2_trainer] epoch #418 | Computing loss after +2025-04-03 12:15:03 | [rl2_trainer] epoch #418 | Saving snapshot... +2025-04-03 12:15:03 | [rl2_trainer] epoch #418 | Saved +2025-04-03 12:15:03 | [rl2_trainer] epoch #418 | Time 80583.83 s +2025-04-03 12:15:03 | [rl2_trainer] epoch #418 | EpochTime 263.19 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.5481 +Average/AverageReturn -26.243 +Average/Iteration 418 +Average/MaxReturn -11.1322 +Average/MinReturn -55.151 +Average/NumEpisodes 100 +Average/StdReturn 10.8981 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.883193 +TotalEnvSteps 4.19e+06 +__unnamed_task__/AverageDiscountedReturn -16.5481 +__unnamed_task__/AverageReturn -26.243 +__unnamed_task__/Iteration 418 +__unnamed_task__/MaxReturn -11.1322 +__unnamed_task__/MinReturn -55.151 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.8981 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.37849 +policy/KL 0.01345 +policy/KLBefore 0 +policy/LossAfter -0.0295516 +policy/LossBefore -0.00613619 +policy/dLoss 0.0234154 +---------------------------------------- ------------ +2025-04-03 12:18:01 | [rl2_trainer] epoch #419 | Optimizing policy... +2025-04-03 12:18:01 | [rl2_trainer] epoch #419 | Fitting baseline... +2025-04-03 12:18:01 | [rl2_trainer] epoch #419 | Computing loss before +2025-04-03 12:18:02 | [rl2_trainer] epoch #419 | Computing KL before +2025-04-03 12:18:03 | [rl2_trainer] epoch #419 | Optimizing +2025-04-03 12:18:38 | [rl2_trainer] epoch #419 | Computing KL after +2025-04-03 12:18:38 | [rl2_trainer] epoch #419 | Computing loss after +2025-04-03 12:18:39 | [rl2_trainer] epoch #419 | Saving snapshot... +2025-04-03 12:18:39 | [rl2_trainer] epoch #419 | Saved +2025-04-03 12:18:39 | [rl2_trainer] epoch #419 | Time 80800.13 s +2025-04-03 12:18:39 | [rl2_trainer] epoch #419 | EpochTime 216.29 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.9452 +Average/AverageReturn -16.1116 +Average/Iteration 419 +Average/MaxReturn 5.47186 +Average/MinReturn -42.2425 +Average/NumEpisodes 100 +Average/StdReturn 5.3772 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.575021 +TotalEnvSteps 4.2e+06 +__unnamed_task__/AverageDiscountedReturn -10.9452 +__unnamed_task__/AverageReturn -16.1116 +__unnamed_task__/Iteration 419 +__unnamed_task__/MaxReturn 5.47186 +__unnamed_task__/MinReturn -42.2425 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.3772 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.3539 +policy/KL 0.0136889 +policy/KLBefore 0 +policy/LossAfter -0.0088574 +policy/LossBefore 0.00523721 +policy/dLoss 0.0140946 +---------------------------------------- ------------ +2025-04-03 12:21:57 | [rl2_trainer] epoch #420 | Optimizing policy... +2025-04-03 12:21:58 | [rl2_trainer] epoch #420 | Fitting baseline... +2025-04-03 12:21:58 | [rl2_trainer] epoch #420 | Computing loss before +2025-04-03 12:21:58 | [rl2_trainer] epoch #420 | Computing KL before +2025-04-03 12:21:59 | [rl2_trainer] epoch #420 | Optimizing +2025-04-03 12:22:34 | [rl2_trainer] epoch #420 | Computing KL after +2025-04-03 12:22:35 | [rl2_trainer] epoch #420 | Computing loss after +2025-04-03 12:22:36 | [rl2_trainer] epoch #420 | Saving snapshot... +2025-04-03 12:22:36 | [rl2_trainer] epoch #420 | Saved +2025-04-03 12:22:36 | [rl2_trainer] epoch #420 | Time 81036.75 s +2025-04-03 12:22:36 | [rl2_trainer] epoch #420 | EpochTime 236.62 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4088 +Average/AverageReturn -25.9621 +Average/Iteration 420 +Average/MaxReturn -5.04454 +Average/MinReturn -84.2347 +Average/NumEpisodes 100 +Average/StdReturn 13.1904 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.906338 +TotalEnvSteps 4.21e+06 +__unnamed_task__/AverageDiscountedReturn -16.4088 +__unnamed_task__/AverageReturn -25.9621 +__unnamed_task__/Iteration 420 +__unnamed_task__/MaxReturn -5.04454 +__unnamed_task__/MinReturn -84.2347 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1904 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.33448 +policy/KL 0.0152121 +policy/KLBefore 0 +policy/LossAfter -0.0325064 +policy/LossBefore 0.000305183 +policy/dLoss 0.0328116 +---------------------------------------- ------------- +2025-04-03 12:25:04 | [rl2_trainer] epoch #421 | Optimizing policy... +2025-04-03 12:25:04 | [rl2_trainer] epoch #421 | Fitting baseline... +2025-04-03 12:25:04 | [rl2_trainer] epoch #421 | Computing loss before +2025-04-03 12:25:05 | [rl2_trainer] epoch #421 | Computing KL before +2025-04-03 12:25:05 | [rl2_trainer] epoch #421 | Optimizing +2025-04-03 12:25:42 | [rl2_trainer] epoch #421 | Computing KL after +2025-04-03 12:25:42 | [rl2_trainer] epoch #421 | Computing loss after +2025-04-03 12:25:43 | [rl2_trainer] epoch #421 | Saving snapshot... +2025-04-03 12:25:43 | [rl2_trainer] epoch #421 | Saved +2025-04-03 12:25:43 | [rl2_trainer] epoch #421 | Time 81224.07 s +2025-04-03 12:25:43 | [rl2_trainer] epoch #421 | EpochTime 187.32 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.3913 +Average/AverageReturn -15.3377 +Average/Iteration 421 +Average/MaxReturn 5.4778 +Average/MinReturn -22.4828 +Average/NumEpisodes 100 +Average/StdReturn 4.16018 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.419154 +TotalEnvSteps 4.22e+06 +__unnamed_task__/AverageDiscountedReturn -10.3913 +__unnamed_task__/AverageReturn -15.3377 +__unnamed_task__/Iteration 421 +__unnamed_task__/MaxReturn 5.4778 +__unnamed_task__/MinReturn -22.4828 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.16018 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.32552 +policy/KL 0.0121808 +policy/KLBefore 0 +policy/LossAfter -0.0188396 +policy/LossBefore -0.00231292 +policy/dLoss 0.0165266 +---------------------------------------- ------------ +2025-04-03 12:28:16 | [rl2_trainer] epoch #422 | Optimizing policy... +2025-04-03 12:28:17 | [rl2_trainer] epoch #422 | Fitting baseline... +2025-04-03 12:28:17 | [rl2_trainer] epoch #422 | Computing loss before +2025-04-03 12:28:17 | [rl2_trainer] epoch #422 | Computing KL before +2025-04-03 12:28:18 | [rl2_trainer] epoch #422 | Optimizing +2025-04-03 12:28:54 | [rl2_trainer] epoch #422 | Computing KL after +2025-04-03 12:28:55 | [rl2_trainer] epoch #422 | Computing loss after +2025-04-03 12:28:56 | [rl2_trainer] epoch #422 | Saving snapshot... +2025-04-03 12:28:56 | [rl2_trainer] epoch #422 | Saved +2025-04-03 12:28:56 | [rl2_trainer] epoch #422 | Time 81416.94 s +2025-04-03 12:28:56 | [rl2_trainer] epoch #422 | EpochTime 192.87 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.173 +Average/AverageReturn -24.2655 +Average/Iteration 422 +Average/MaxReturn 4.74031 +Average/MinReturn -55.8566 +Average/NumEpisodes 100 +Average/StdReturn 12.081 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.819641 +TotalEnvSteps 4.23e+06 +__unnamed_task__/AverageDiscountedReturn -15.173 +__unnamed_task__/AverageReturn -24.2655 +__unnamed_task__/Iteration 422 +__unnamed_task__/MaxReturn 4.74031 +__unnamed_task__/MinReturn -55.8566 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.081 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.33175 +policy/KL 0.0211495 +policy/KLBefore 0 +policy/LossAfter -0.0474543 +policy/LossBefore 0.00180556 +policy/dLoss 0.0492599 +---------------------------------------- ------------ +2025-04-03 12:32:57 | [rl2_trainer] epoch #423 | Optimizing policy... +2025-04-03 12:32:57 | [rl2_trainer] epoch #423 | Fitting baseline... +2025-04-03 12:32:57 | [rl2_trainer] epoch #423 | Computing loss before +2025-04-03 12:32:58 | [rl2_trainer] epoch #423 | Computing KL before +2025-04-03 12:32:59 | [rl2_trainer] epoch #423 | Optimizing +2025-04-03 12:33:33 | [rl2_trainer] epoch #423 | Computing KL after +2025-04-03 12:33:33 | [rl2_trainer] epoch #423 | Computing loss after +2025-04-03 12:33:34 | [rl2_trainer] epoch #423 | Saving snapshot... +2025-04-03 12:33:34 | [rl2_trainer] epoch #423 | Saved +2025-04-03 12:33:34 | [rl2_trainer] epoch #423 | Time 81695.29 s +2025-04-03 12:33:34 | [rl2_trainer] epoch #423 | EpochTime 278.35 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -15.9659 +Average/AverageReturn -25.4023 +Average/Iteration 423 +Average/MaxReturn -8.09956 +Average/MinReturn -59.9942 +Average/NumEpisodes 100 +Average/StdReturn 11.9047 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.874605 +TotalEnvSteps 4.24e+06 +__unnamed_task__/AverageDiscountedReturn -15.9659 +__unnamed_task__/AverageReturn -25.4023 +__unnamed_task__/Iteration 423 +__unnamed_task__/MaxReturn -8.09956 +__unnamed_task__/MinReturn -59.9942 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.9047 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.34315 +policy/KL 0.0155926 +policy/KLBefore 0 +policy/LossAfter -0.0433847 +policy/LossBefore -0.000778847 +policy/dLoss 0.0426058 +---------------------------------------- ------------- +2025-04-03 12:35:42 | [rl2_trainer] epoch #424 | Optimizing policy... +2025-04-03 12:35:42 | [rl2_trainer] epoch #424 | Fitting baseline... +2025-04-03 12:35:42 | [rl2_trainer] epoch #424 | Computing loss before +2025-04-03 12:35:43 | [rl2_trainer] epoch #424 | Computing KL before +2025-04-03 12:35:43 | [rl2_trainer] epoch #424 | Optimizing +2025-04-03 12:36:19 | [rl2_trainer] epoch #424 | Computing KL after +2025-04-03 12:36:20 | [rl2_trainer] epoch #424 | Computing loss after +2025-04-03 12:36:21 | [rl2_trainer] epoch #424 | Saving snapshot... +2025-04-03 12:36:21 | [rl2_trainer] epoch #424 | Saved +2025-04-03 12:36:21 | [rl2_trainer] epoch #424 | Time 81861.96 s +2025-04-03 12:36:21 | [rl2_trainer] epoch #424 | EpochTime 166.67 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -9.20549 +Average/AverageReturn -12.7385 +Average/Iteration 424 +Average/MaxReturn 31.5773 +Average/MinReturn -72.7726 +Average/NumEpisodes 100 +Average/StdReturn 10.0315 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.418572 +TotalEnvSteps 4.25e+06 +__unnamed_task__/AverageDiscountedReturn -9.20549 +__unnamed_task__/AverageReturn -12.7385 +__unnamed_task__/Iteration 424 +__unnamed_task__/MaxReturn 31.5773 +__unnamed_task__/MinReturn -72.7726 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0315 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.32969 +policy/KL 0.0181457 +policy/KLBefore 0 +policy/LossAfter -0.0358359 +policy/LossBefore 0.000851478 +policy/dLoss 0.0366874 +---------------------------------------- ------------- +2025-04-03 12:40:03 | [rl2_trainer] epoch #425 | Optimizing policy... +2025-04-03 12:40:03 | [rl2_trainer] epoch #425 | Fitting baseline... +2025-04-03 12:40:03 | [rl2_trainer] epoch #425 | Computing loss before +2025-04-03 12:40:04 | [rl2_trainer] epoch #425 | Computing KL before +2025-04-03 12:40:04 | [rl2_trainer] epoch #425 | Optimizing +2025-04-03 12:40:40 | [rl2_trainer] epoch #425 | Computing KL after +2025-04-03 12:40:41 | [rl2_trainer] epoch #425 | Computing loss after +2025-04-03 12:40:42 | [rl2_trainer] epoch #425 | Saving snapshot... +2025-04-03 12:40:42 | [rl2_trainer] epoch #425 | Saved +2025-04-03 12:40:42 | [rl2_trainer] epoch #425 | Time 82122.60 s +2025-04-03 12:40:42 | [rl2_trainer] epoch #425 | EpochTime 260.64 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.6403 +Average/AverageReturn -24.554 +Average/Iteration 425 +Average/MaxReturn 10.7199 +Average/MinReturn -67.8633 +Average/NumEpisodes 100 +Average/StdReturn 14.6092 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.896007 +TotalEnvSteps 4.26e+06 +__unnamed_task__/AverageDiscountedReturn -15.6403 +__unnamed_task__/AverageReturn -24.554 +__unnamed_task__/Iteration 425 +__unnamed_task__/MaxReturn 10.7199 +__unnamed_task__/MinReturn -67.8633 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 14.6092 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.31722 +policy/KL 0.0166908 +policy/KLBefore 0 +policy/LossAfter -0.0455059 +policy/LossBefore -0.00731333 +policy/dLoss 0.0381926 +---------------------------------------- ------------ +2025-04-03 12:44:35 | [rl2_trainer] epoch #426 | Optimizing policy... +2025-04-03 12:44:35 | [rl2_trainer] epoch #426 | Fitting baseline... +2025-04-03 12:44:35 | [rl2_trainer] epoch #426 | Computing loss before +2025-04-03 12:44:36 | [rl2_trainer] epoch #426 | Computing KL before +2025-04-03 12:44:37 | [rl2_trainer] epoch #426 | Optimizing +2025-04-03 12:45:11 | [rl2_trainer] epoch #426 | Computing KL after +2025-04-03 12:45:12 | [rl2_trainer] epoch #426 | Computing loss after +2025-04-03 12:45:13 | [rl2_trainer] epoch #426 | Saving snapshot... +2025-04-03 12:45:13 | [rl2_trainer] epoch #426 | Saved +2025-04-03 12:45:13 | [rl2_trainer] epoch #426 | Time 82393.96 s +2025-04-03 12:45:13 | [rl2_trainer] epoch #426 | EpochTime 271.36 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.3177 +Average/AverageReturn -23.7346 +Average/Iteration 426 +Average/MaxReturn 30.4664 +Average/MinReturn -59.6743 +Average/NumEpisodes 100 +Average/StdReturn 15.5816 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.858018 +TotalEnvSteps 4.27e+06 +__unnamed_task__/AverageDiscountedReturn -15.3177 +__unnamed_task__/AverageReturn -23.7346 +__unnamed_task__/Iteration 426 +__unnamed_task__/MaxReturn 30.4664 +__unnamed_task__/MinReturn -59.6743 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 15.5816 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.31972 +policy/KL 0.018018 +policy/KLBefore 0 +policy/LossAfter -0.049209 +policy/LossBefore 0.00844348 +policy/dLoss 0.0576525 +---------------------------------------- ------------ +2025-04-03 12:47:32 | [rl2_trainer] epoch #427 | Optimizing policy... +2025-04-03 12:47:32 | [rl2_trainer] epoch #427 | Fitting baseline... +2025-04-03 12:47:32 | [rl2_trainer] epoch #427 | Computing loss before +2025-04-03 12:47:32 | [rl2_trainer] epoch #427 | Computing KL before +2025-04-03 12:47:33 | [rl2_trainer] epoch #427 | Optimizing +2025-04-03 12:48:09 | [rl2_trainer] epoch #427 | Computing KL after +2025-04-03 12:48:09 | [rl2_trainer] epoch #427 | Computing loss after +2025-04-03 12:48:10 | [rl2_trainer] epoch #427 | Saving snapshot... +2025-04-03 12:48:10 | [rl2_trainer] epoch #427 | Saved +2025-04-03 12:48:10 | [rl2_trainer] epoch #427 | Time 82571.25 s +2025-04-03 12:48:10 | [rl2_trainer] epoch #427 | EpochTime 177.29 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.844 +Average/AverageReturn -25.2225 +Average/Iteration 427 +Average/MaxReturn 8.30007 +Average/MinReturn -54.8584 +Average/NumEpisodes 100 +Average/StdReturn 11.5505 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.848697 +TotalEnvSteps 4.28e+06 +__unnamed_task__/AverageDiscountedReturn -15.844 +__unnamed_task__/AverageReturn -25.2225 +__unnamed_task__/Iteration 427 +__unnamed_task__/MaxReturn 8.30007 +__unnamed_task__/MinReturn -54.8584 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.5505 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.3052 +policy/KL 0.0152721 +policy/KLBefore 0 +policy/LossAfter -0.046003 +policy/LossBefore -0.00534883 +policy/dLoss 0.0406542 +---------------------------------------- ------------ +2025-04-03 12:51:42 | [rl2_trainer] epoch #428 | Optimizing policy... +2025-04-03 12:51:42 | [rl2_trainer] epoch #428 | Fitting baseline... +2025-04-03 12:51:42 | [rl2_trainer] epoch #428 | Computing loss before +2025-04-03 12:51:42 | [rl2_trainer] epoch #428 | Computing KL before +2025-04-03 12:51:43 | [rl2_trainer] epoch #428 | Optimizing +2025-04-03 12:52:18 | [rl2_trainer] epoch #428 | Computing KL after +2025-04-03 12:52:19 | [rl2_trainer] epoch #428 | Computing loss after +2025-04-03 12:52:20 | [rl2_trainer] epoch #428 | Saving snapshot... +2025-04-03 12:52:20 | [rl2_trainer] epoch #428 | Saved +2025-04-03 12:52:20 | [rl2_trainer] epoch #428 | Time 82820.72 s +2025-04-03 12:52:20 | [rl2_trainer] epoch #428 | EpochTime 249.46 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.5197 +Average/AverageReturn -24.4897 +Average/Iteration 428 +Average/MaxReturn 40.5327 +Average/MinReturn -62.1192 +Average/NumEpisodes 100 +Average/StdReturn 13.3695 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.845184 +TotalEnvSteps 4.29e+06 +__unnamed_task__/AverageDiscountedReturn -15.5197 +__unnamed_task__/AverageReturn -24.4897 +__unnamed_task__/Iteration 428 +__unnamed_task__/MaxReturn 40.5327 +__unnamed_task__/MinReturn -62.1192 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.3695 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.2937 +policy/KL 0.0177771 +policy/KLBefore 0 +policy/LossAfter -0.0619607 +policy/LossBefore -0.0102201 +policy/dLoss 0.0517405 +---------------------------------------- ----------- +2025-04-03 12:54:17 | [rl2_trainer] epoch #429 | Optimizing policy... +2025-04-03 12:54:17 | [rl2_trainer] epoch #429 | Fitting baseline... +2025-04-03 12:54:17 | [rl2_trainer] epoch #429 | Computing loss before +2025-04-03 12:54:18 | [rl2_trainer] epoch #429 | Computing KL before +2025-04-03 12:54:18 | [rl2_trainer] epoch #429 | Optimizing +2025-04-03 12:54:54 | [rl2_trainer] epoch #429 | Computing KL after +2025-04-03 12:54:55 | [rl2_trainer] epoch #429 | Computing loss after +2025-04-03 12:54:56 | [rl2_trainer] epoch #429 | Saving snapshot... +2025-04-03 12:54:56 | [rl2_trainer] epoch #429 | Saved +2025-04-03 12:54:56 | [rl2_trainer] epoch #429 | Time 82976.62 s +2025-04-03 12:54:56 | [rl2_trainer] epoch #429 | EpochTime 155.91 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.055 +Average/AverageReturn -14.7194 +Average/Iteration 429 +Average/MaxReturn 21.1163 +Average/MinReturn -30.1032 +Average/NumEpisodes 100 +Average/StdReturn 7.9637 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.433838 +TotalEnvSteps 4.3e+06 +__unnamed_task__/AverageDiscountedReturn -10.055 +__unnamed_task__/AverageReturn -14.7194 +__unnamed_task__/Iteration 429 +__unnamed_task__/MaxReturn 21.1163 +__unnamed_task__/MinReturn -30.1032 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.9637 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.2932 +policy/KL 0.0130618 +policy/KLBefore 0 +policy/LossAfter -0.0272396 +policy/LossBefore 0.00953324 +policy/dLoss 0.0367728 +---------------------------------------- ------------ +2025-04-03 12:56:22 | [rl2_trainer] epoch #430 | Optimizing policy... +2025-04-03 12:56:22 | [rl2_trainer] epoch #430 | Fitting baseline... +2025-04-03 12:56:22 | [rl2_trainer] epoch #430 | Computing loss before +2025-04-03 12:56:23 | [rl2_trainer] epoch #430 | Computing KL before +2025-04-03 12:56:23 | [rl2_trainer] epoch #430 | Optimizing +2025-04-03 12:56:59 | [rl2_trainer] epoch #430 | Computing KL after +2025-04-03 12:56:59 | [rl2_trainer] epoch #430 | Computing loss after +2025-04-03 12:57:00 | [rl2_trainer] epoch #430 | Saving snapshot... +2025-04-03 12:57:00 | [rl2_trainer] epoch #430 | Saved +2025-04-03 12:57:00 | [rl2_trainer] epoch #430 | Time 83101.13 s +2025-04-03 12:57:00 | [rl2_trainer] epoch #430 | EpochTime 124.51 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -8.65512 +Average/AverageReturn -12.9865 +Average/Iteration 430 +Average/MaxReturn 38.1435 +Average/MinReturn -25.287 +Average/NumEpisodes 100 +Average/StdReturn 10.3617 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.357415 +TotalEnvSteps 4.31e+06 +__unnamed_task__/AverageDiscountedReturn -8.65512 +__unnamed_task__/AverageReturn -12.9865 +__unnamed_task__/Iteration 430 +__unnamed_task__/MaxReturn 38.1435 +__unnamed_task__/MinReturn -25.287 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.3617 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.25315 +policy/KL 0.0243233 +policy/KLBefore 0 +policy/LossAfter -0.0587116 +policy/LossBefore -0.0108696 +policy/dLoss 0.047842 +---------------------------------------- ----------- +2025-04-03 13:00:01 | [rl2_trainer] epoch #431 | Optimizing policy... +2025-04-03 13:00:01 | [rl2_trainer] epoch #431 | Fitting baseline... +2025-04-03 13:00:01 | [rl2_trainer] epoch #431 | Computing loss before +2025-04-03 13:00:02 | [rl2_trainer] epoch #431 | Computing KL before +2025-04-03 13:00:02 | [rl2_trainer] epoch #431 | Optimizing +2025-04-03 13:00:38 | [rl2_trainer] epoch #431 | Computing KL after +2025-04-03 13:00:38 | [rl2_trainer] epoch #431 | Computing loss after +2025-04-03 13:00:39 | [rl2_trainer] epoch #431 | Saving snapshot... +2025-04-03 13:00:39 | [rl2_trainer] epoch #431 | Saved +2025-04-03 13:00:39 | [rl2_trainer] epoch #431 | Time 83320.11 s +2025-04-03 13:00:39 | [rl2_trainer] epoch #431 | EpochTime 218.98 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -10.8648 +Average/AverageReturn -16.1776 +Average/Iteration 431 +Average/MaxReturn 7.50893 +Average/MinReturn -31.9985 +Average/NumEpisodes 100 +Average/StdReturn 4.99765 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.294105 +TotalEnvSteps 4.32e+06 +__unnamed_task__/AverageDiscountedReturn -10.8648 +__unnamed_task__/AverageReturn -16.1776 +__unnamed_task__/Iteration 431 +__unnamed_task__/MaxReturn 7.50893 +__unnamed_task__/MinReturn -31.9985 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.99765 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.23042 +policy/KL 0.0149525 +policy/KLBefore 0 +policy/LossAfter -0.0187212 +policy/LossBefore -0.0061084 +policy/dLoss 0.0126128 +---------------------------------------- ----------- +2025-04-03 13:04:05 | [rl2_trainer] epoch #432 | Optimizing policy... +2025-04-03 13:04:06 | [rl2_trainer] epoch #432 | Fitting baseline... +2025-04-03 13:04:06 | [rl2_trainer] epoch #432 | Computing loss before +2025-04-03 13:04:06 | [rl2_trainer] epoch #432 | Computing KL before +2025-04-03 13:04:07 | [rl2_trainer] epoch #432 | Optimizing +2025-04-03 13:04:41 | [rl2_trainer] epoch #432 | Computing KL after +2025-04-03 13:04:41 | [rl2_trainer] epoch #432 | Computing loss after +2025-04-03 13:04:42 | [rl2_trainer] epoch #432 | Saving snapshot... +2025-04-03 13:04:42 | [rl2_trainer] epoch #432 | Saved +2025-04-03 13:04:42 | [rl2_trainer] epoch #432 | Time 83563.01 s +2025-04-03 13:04:42 | [rl2_trainer] epoch #432 | EpochTime 242.90 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.8459 +Average/AverageReturn -25.5847 +Average/Iteration 432 +Average/MaxReturn 4.08694 +Average/MinReturn -60.7323 +Average/NumEpisodes 100 +Average/StdReturn 12.4063 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.864192 +TotalEnvSteps 4.33e+06 +__unnamed_task__/AverageDiscountedReturn -15.8459 +__unnamed_task__/AverageReturn -25.5847 +__unnamed_task__/Iteration 432 +__unnamed_task__/MaxReturn 4.08694 +__unnamed_task__/MinReturn -60.7323 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.4063 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.21098 +policy/KL 0.0154369 +policy/KLBefore 0 +policy/LossAfter -0.0442628 +policy/LossBefore -0.00538268 +policy/dLoss 0.0388801 +---------------------------------------- ------------ +2025-04-03 13:06:59 | [rl2_trainer] epoch #433 | Optimizing policy... +2025-04-03 13:07:00 | [rl2_trainer] epoch #433 | Fitting baseline... +2025-04-03 13:07:00 | [rl2_trainer] epoch #433 | Computing loss before +2025-04-03 13:07:00 | [rl2_trainer] epoch #433 | Computing KL before +2025-04-03 13:07:01 | [rl2_trainer] epoch #433 | Optimizing +2025-04-03 13:07:36 | [rl2_trainer] epoch #433 | Computing KL after +2025-04-03 13:07:37 | [rl2_trainer] epoch #433 | Computing loss after +2025-04-03 13:07:38 | [rl2_trainer] epoch #433 | Saving snapshot... +2025-04-03 13:07:38 | [rl2_trainer] epoch #433 | Saved +2025-04-03 13:07:38 | [rl2_trainer] epoch #433 | Time 83738.55 s +2025-04-03 13:07:38 | [rl2_trainer] epoch #433 | EpochTime 175.53 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -9.29621 +Average/AverageReturn -14.202 +Average/Iteration 433 +Average/MaxReturn 21.4886 +Average/MinReturn -25.4953 +Average/NumEpisodes 100 +Average/StdReturn 8.39646 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.408503 +TotalEnvSteps 4.34e+06 +__unnamed_task__/AverageDiscountedReturn -9.29621 +__unnamed_task__/AverageReturn -14.202 +__unnamed_task__/Iteration 433 +__unnamed_task__/MaxReturn 21.4886 +__unnamed_task__/MinReturn -25.4953 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.39646 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.19772 +policy/KL 0.0168702 +policy/KLBefore 0 +policy/LossAfter -0.0240076 +policy/LossBefore 0.0126324 +policy/dLoss 0.0366399 +---------------------------------------- ----------- +2025-04-03 13:11:26 | [rl2_trainer] epoch #434 | Optimizing policy... +2025-04-03 13:11:27 | [rl2_trainer] epoch #434 | Fitting baseline... +2025-04-03 13:11:27 | [rl2_trainer] epoch #434 | Computing loss before +2025-04-03 13:11:27 | [rl2_trainer] epoch #434 | Computing KL before +2025-04-03 13:11:28 | [rl2_trainer] epoch #434 | Optimizing +2025-04-03 13:12:04 | [rl2_trainer] epoch #434 | Computing KL after +2025-04-03 13:12:04 | [rl2_trainer] epoch #434 | Computing loss after +2025-04-03 13:12:05 | [rl2_trainer] epoch #434 | Saving snapshot... +2025-04-03 13:12:05 | [rl2_trainer] epoch #434 | Saved +2025-04-03 13:12:05 | [rl2_trainer] epoch #434 | Time 84006.36 s +2025-04-03 13:12:05 | [rl2_trainer] epoch #434 | EpochTime 267.81 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.891 +Average/AverageReturn -23.361 +Average/Iteration 434 +Average/MaxReturn 3.75757 +Average/MinReturn -82.6869 +Average/NumEpisodes 100 +Average/StdReturn 12.0777 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.854286 +TotalEnvSteps 4.35e+06 +__unnamed_task__/AverageDiscountedReturn -14.891 +__unnamed_task__/AverageReturn -23.361 +__unnamed_task__/Iteration 434 +__unnamed_task__/MaxReturn 3.75757 +__unnamed_task__/MinReturn -82.6869 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.0777 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.2001 +policy/KL 0.0148485 +policy/KLBefore 0 +policy/LossAfter -0.0378268 +policy/LossBefore -0.00182355 +policy/dLoss 0.0360032 +---------------------------------------- ------------ +2025-04-03 13:14:01 | [rl2_trainer] epoch #435 | Optimizing policy... +2025-04-03 13:14:02 | [rl2_trainer] epoch #435 | Fitting baseline... +2025-04-03 13:14:02 | [rl2_trainer] epoch #435 | Computing loss before +2025-04-03 13:14:02 | [rl2_trainer] epoch #435 | Computing KL before +2025-04-03 13:14:03 | [rl2_trainer] epoch #435 | Optimizing +2025-04-03 13:14:39 | [rl2_trainer] epoch #435 | Computing KL after +2025-04-03 13:14:40 | [rl2_trainer] epoch #435 | Computing loss after +2025-04-03 13:14:41 | [rl2_trainer] epoch #435 | Saving snapshot... +2025-04-03 13:14:41 | [rl2_trainer] epoch #435 | Saved +2025-04-03 13:14:41 | [rl2_trainer] epoch #435 | Time 84161.95 s +2025-04-03 13:14:41 | [rl2_trainer] epoch #435 | EpochTime 155.59 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.6516 +Average/AverageReturn -15.4833 +Average/Iteration 435 +Average/MaxReturn 20.706 +Average/MinReturn -29.019 +Average/NumEpisodes 100 +Average/StdReturn 7.03809 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.481875 +TotalEnvSteps 4.36e+06 +__unnamed_task__/AverageDiscountedReturn -10.6516 +__unnamed_task__/AverageReturn -15.4833 +__unnamed_task__/Iteration 435 +__unnamed_task__/MaxReturn 20.706 +__unnamed_task__/MinReturn -29.019 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.03809 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.18855 +policy/KL 0.0138128 +policy/KLBefore 0 +policy/LossAfter -0.0367789 +policy/LossBefore -0.00690446 +policy/dLoss 0.0298745 +---------------------------------------- ------------ +2025-04-03 13:18:21 | [rl2_trainer] epoch #436 | Optimizing policy... +2025-04-03 13:18:22 | [rl2_trainer] epoch #436 | Fitting baseline... +2025-04-03 13:18:22 | [rl2_trainer] epoch #436 | Computing loss before +2025-04-03 13:18:22 | [rl2_trainer] epoch #436 | Computing KL before +2025-04-03 13:18:23 | [rl2_trainer] epoch #436 | Optimizing +2025-04-03 13:18:59 | [rl2_trainer] epoch #436 | Computing KL after +2025-04-03 13:18:59 | [rl2_trainer] epoch #436 | Computing loss after +2025-04-03 13:19:00 | [rl2_trainer] epoch #436 | Saving snapshot... +2025-04-03 13:19:00 | [rl2_trainer] epoch #436 | Saved +2025-04-03 13:19:00 | [rl2_trainer] epoch #436 | Time 84421.21 s +2025-04-03 13:19:00 | [rl2_trainer] epoch #436 | EpochTime 259.26 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.6807 +Average/AverageReturn -24.6872 +Average/Iteration 436 +Average/MaxReturn -10.5491 +Average/MinReturn -64.4271 +Average/NumEpisodes 100 +Average/StdReturn 11.5977 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.850467 +TotalEnvSteps 4.37e+06 +__unnamed_task__/AverageDiscountedReturn -15.6807 +__unnamed_task__/AverageReturn -24.6872 +__unnamed_task__/Iteration 436 +__unnamed_task__/MaxReturn -10.5491 +__unnamed_task__/MinReturn -64.4271 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.5977 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.17382 +policy/KL 0.0158367 +policy/KLBefore 0 +policy/LossAfter -0.0405286 +policy/LossBefore -0.00584642 +policy/dLoss 0.0346822 +---------------------------------------- ------------ +2025-04-03 13:22:56 | [rl2_trainer] epoch #437 | Optimizing policy... +2025-04-03 13:22:56 | [rl2_trainer] epoch #437 | Fitting baseline... +2025-04-03 13:22:56 | [rl2_trainer] epoch #437 | Computing loss before +2025-04-03 13:22:57 | [rl2_trainer] epoch #437 | Computing KL before +2025-04-03 13:22:58 | [rl2_trainer] epoch #437 | Optimizing +2025-04-03 13:23:34 | [rl2_trainer] epoch #437 | Computing KL after +2025-04-03 13:23:35 | [rl2_trainer] epoch #437 | Computing loss after +2025-04-03 13:23:36 | [rl2_trainer] epoch #437 | Saving snapshot... +2025-04-03 13:23:36 | [rl2_trainer] epoch #437 | Saved +2025-04-03 13:23:36 | [rl2_trainer] epoch #437 | Time 84696.93 s +2025-04-03 13:23:36 | [rl2_trainer] epoch #437 | EpochTime 275.72 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.5094 +Average/AverageReturn -24.9171 +Average/Iteration 437 +Average/MaxReturn 57.0026 +Average/MinReturn -74.2596 +Average/NumEpisodes 100 +Average/StdReturn 13.1089 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.705625 +TotalEnvSteps 4.38e+06 +__unnamed_task__/AverageDiscountedReturn -15.5094 +__unnamed_task__/AverageReturn -24.9171 +__unnamed_task__/Iteration 437 +__unnamed_task__/MaxReturn 57.0026 +__unnamed_task__/MinReturn -74.2596 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1089 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.15915 +policy/KL 0.0154966 +policy/KLBefore 0 +policy/LossAfter -0.0474062 +policy/LossBefore 0.0105275 +policy/dLoss 0.0579338 +---------------------------------------- ----------- +2025-04-03 13:26:21 | [rl2_trainer] epoch #438 | Optimizing policy... +2025-04-03 13:26:22 | [rl2_trainer] epoch #438 | Fitting baseline... +2025-04-03 13:26:22 | [rl2_trainer] epoch #438 | Computing loss before +2025-04-03 13:26:22 | [rl2_trainer] epoch #438 | Computing KL before +2025-04-03 13:26:23 | [rl2_trainer] epoch #438 | Optimizing +2025-04-03 13:26:57 | [rl2_trainer] epoch #438 | Computing KL after +2025-04-03 13:26:58 | [rl2_trainer] epoch #438 | Computing loss after +2025-04-03 13:26:59 | [rl2_trainer] epoch #438 | Saving snapshot... +2025-04-03 13:26:59 | [rl2_trainer] epoch #438 | Saved +2025-04-03 13:26:59 | [rl2_trainer] epoch #438 | Time 84899.84 s +2025-04-03 13:26:59 | [rl2_trainer] epoch #438 | EpochTime 202.90 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.933 +Average/AverageReturn -16.4953 +Average/Iteration 438 +Average/MaxReturn 20.0814 +Average/MinReturn -28.7647 +Average/NumEpisodes 100 +Average/StdReturn 8.1744 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.552442 +TotalEnvSteps 4.39e+06 +__unnamed_task__/AverageDiscountedReturn -10.933 +__unnamed_task__/AverageReturn -16.4953 +__unnamed_task__/Iteration 438 +__unnamed_task__/MaxReturn 20.0814 +__unnamed_task__/MinReturn -28.7647 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.1744 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.13566 +policy/KL 0.0144981 +policy/KLBefore 0 +policy/LossAfter -0.0278297 +policy/LossBefore 0.00168217 +policy/dLoss 0.0295118 +---------------------------------------- ------------ +2025-04-03 13:29:18 | [rl2_trainer] epoch #439 | Optimizing policy... +2025-04-03 13:29:18 | [rl2_trainer] epoch #439 | Fitting baseline... +2025-04-03 13:29:18 | [rl2_trainer] epoch #439 | Computing loss before +2025-04-03 13:29:19 | [rl2_trainer] epoch #439 | Computing KL before +2025-04-03 13:29:20 | [rl2_trainer] epoch #439 | Optimizing +2025-04-03 13:29:56 | [rl2_trainer] epoch #439 | Computing KL after +2025-04-03 13:29:56 | [rl2_trainer] epoch #439 | Computing loss after +2025-04-03 13:29:57 | [rl2_trainer] epoch #439 | Saving snapshot... +2025-04-03 13:29:57 | [rl2_trainer] epoch #439 | Saved +2025-04-03 13:29:57 | [rl2_trainer] epoch #439 | Time 85078.19 s +2025-04-03 13:29:57 | [rl2_trainer] epoch #439 | EpochTime 178.36 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -11.5455 +Average/AverageReturn -17.4444 +Average/Iteration 439 +Average/MaxReturn 4.92784 +Average/MinReturn -24.6158 +Average/NumEpisodes 100 +Average/StdReturn 4.72573 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.497058 +TotalEnvSteps 4.4e+06 +__unnamed_task__/AverageDiscountedReturn -11.5455 +__unnamed_task__/AverageReturn -17.4444 +__unnamed_task__/Iteration 439 +__unnamed_task__/MaxReturn 4.92784 +__unnamed_task__/MinReturn -24.6158 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 4.72573 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.11553 +policy/KL 0.0120482 +policy/KLBefore 0 +policy/LossAfter -0.0115872 +policy/LossBefore -0.000473232 +policy/dLoss 0.011114 +---------------------------------------- ------------- +2025-04-03 13:33:37 | [rl2_trainer] epoch #440 | Optimizing policy... +2025-04-03 13:33:38 | [rl2_trainer] epoch #440 | Fitting baseline... +2025-04-03 13:33:38 | [rl2_trainer] epoch #440 | Computing loss before +2025-04-03 13:33:38 | [rl2_trainer] epoch #440 | Computing KL before +2025-04-03 13:33:39 | [rl2_trainer] epoch #440 | Optimizing +2025-04-03 13:34:15 | [rl2_trainer] epoch #440 | Computing KL after +2025-04-03 13:34:16 | [rl2_trainer] epoch #440 | Computing loss after +2025-04-03 13:34:17 | [rl2_trainer] epoch #440 | Saving snapshot... +2025-04-03 13:34:17 | [rl2_trainer] epoch #440 | Saved +2025-04-03 13:34:17 | [rl2_trainer] epoch #440 | Time 85338.00 s +2025-04-03 13:34:17 | [rl2_trainer] epoch #440 | EpochTime 259.80 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.8469 +Average/AverageReturn -24.8396 +Average/Iteration 440 +Average/MaxReturn 5.03775 +Average/MinReturn -76.1 +Average/NumEpisodes 100 +Average/StdReturn 12.8842 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.817883 +TotalEnvSteps 4.41e+06 +__unnamed_task__/AverageDiscountedReturn -15.8469 +__unnamed_task__/AverageReturn -24.8396 +__unnamed_task__/Iteration 440 +__unnamed_task__/MaxReturn 5.03775 +__unnamed_task__/MinReturn -76.1 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.8842 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.07794 +policy/KL 0.0170605 +policy/KLBefore 0 +policy/LossAfter -0.0456745 +policy/LossBefore -0.0042038 +policy/dLoss 0.0414707 +---------------------------------------- ----------- +2025-04-03 13:35:44 | [rl2_trainer] epoch #441 | Optimizing policy... +2025-04-03 13:35:44 | [rl2_trainer] epoch #441 | Fitting baseline... +2025-04-03 13:35:45 | [rl2_trainer] epoch #441 | Computing loss before +2025-04-03 13:35:45 | [rl2_trainer] epoch #441 | Computing KL before +2025-04-03 13:35:46 | [rl2_trainer] epoch #441 | Optimizing +2025-04-03 13:36:23 | [rl2_trainer] epoch #441 | Computing KL after +2025-04-03 13:36:23 | [rl2_trainer] epoch #441 | Computing loss after +2025-04-03 13:36:24 | [rl2_trainer] epoch #441 | Saving snapshot... +2025-04-03 13:36:24 | [rl2_trainer] epoch #441 | Saved +2025-04-03 13:36:24 | [rl2_trainer] epoch #441 | Time 85465.12 s +2025-04-03 13:36:24 | [rl2_trainer] epoch #441 | EpochTime 127.12 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.65213 +Average/AverageReturn -12.7512 +Average/Iteration 441 +Average/MaxReturn 31.3746 +Average/MinReturn -26.476 +Average/NumEpisodes 100 +Average/StdReturn 9.05635 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.391453 +TotalEnvSteps 4.42e+06 +__unnamed_task__/AverageDiscountedReturn -8.65213 +__unnamed_task__/AverageReturn -12.7512 +__unnamed_task__/Iteration 441 +__unnamed_task__/MaxReturn 31.3746 +__unnamed_task__/MinReturn -26.476 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.05635 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.03457 +policy/KL 0.015834 +policy/KLBefore 0 +policy/LossAfter -0.0378345 +policy/LossBefore 0.00818983 +policy/dLoss 0.0460243 +---------------------------------------- ------------ +2025-04-03 13:39:25 | [rl2_trainer] epoch #442 | Optimizing policy... +2025-04-03 13:39:25 | [rl2_trainer] epoch #442 | Fitting baseline... +2025-04-03 13:39:25 | [rl2_trainer] epoch #442 | Computing loss before +2025-04-03 13:39:26 | [rl2_trainer] epoch #442 | Computing KL before +2025-04-03 13:39:26 | [rl2_trainer] epoch #442 | Optimizing +2025-04-03 13:40:02 | [rl2_trainer] epoch #442 | Computing KL after +2025-04-03 13:40:02 | [rl2_trainer] epoch #442 | Computing loss after +2025-04-03 13:40:03 | [rl2_trainer] epoch #442 | Saving snapshot... +2025-04-03 13:40:03 | [rl2_trainer] epoch #442 | Saved +2025-04-03 13:40:03 | [rl2_trainer] epoch #442 | Time 85684.39 s +2025-04-03 13:40:03 | [rl2_trainer] epoch #442 | EpochTime 219.27 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.8024 +Average/AverageReturn -17.5223 +Average/Iteration 442 +Average/MaxReturn 18.5487 +Average/MinReturn -50.484 +Average/NumEpisodes 100 +Average/StdReturn 7.44946 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.598902 +TotalEnvSteps 4.43e+06 +__unnamed_task__/AverageDiscountedReturn -11.8024 +__unnamed_task__/AverageReturn -17.5223 +__unnamed_task__/Iteration 442 +__unnamed_task__/MaxReturn 18.5487 +__unnamed_task__/MinReturn -50.484 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.44946 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.01488 +policy/KL 0.0132199 +policy/KLBefore 0 +policy/LossAfter -0.0294917 +policy/LossBefore -0.00142553 +policy/dLoss 0.0280662 +---------------------------------------- ------------ +2025-04-03 13:43:02 | [rl2_trainer] epoch #443 | Optimizing policy... +2025-04-03 13:43:02 | [rl2_trainer] epoch #443 | Fitting baseline... +2025-04-03 13:43:02 | [rl2_trainer] epoch #443 | Computing loss before +2025-04-03 13:43:02 | [rl2_trainer] epoch #443 | Computing KL before +2025-04-03 13:43:03 | [rl2_trainer] epoch #443 | Optimizing +2025-04-03 13:43:39 | [rl2_trainer] epoch #443 | Computing KL after +2025-04-03 13:43:39 | [rl2_trainer] epoch #443 | Computing loss after +2025-04-03 13:43:40 | [rl2_trainer] epoch #443 | Saving snapshot... +2025-04-03 13:43:40 | [rl2_trainer] epoch #443 | Saved +2025-04-03 13:43:40 | [rl2_trainer] epoch #443 | Time 85901.25 s +2025-04-03 13:43:40 | [rl2_trainer] epoch #443 | EpochTime 216.85 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.9999 +Average/AverageReturn -16.3215 +Average/Iteration 443 +Average/MaxReturn 9.74548 +Average/MinReturn -26.5989 +Average/NumEpisodes 100 +Average/StdReturn 6.18385 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.547833 +TotalEnvSteps 4.44e+06 +__unnamed_task__/AverageDiscountedReturn -10.9999 +__unnamed_task__/AverageReturn -16.3215 +__unnamed_task__/Iteration 443 +__unnamed_task__/MaxReturn 9.74548 +__unnamed_task__/MinReturn -26.5989 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.18385 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.01809 +policy/KL 0.0130485 +policy/KLBefore 0 +policy/LossAfter -0.0233815 +policy/LossBefore 0.00298272 +policy/dLoss 0.0263642 +---------------------------------------- ------------ +2025-04-03 13:47:24 | [rl2_trainer] epoch #444 | Optimizing policy... +2025-04-03 13:47:24 | [rl2_trainer] epoch #444 | Fitting baseline... +2025-04-03 13:47:24 | [rl2_trainer] epoch #444 | Computing loss before +2025-04-03 13:47:25 | [rl2_trainer] epoch #444 | Computing KL before +2025-04-03 13:47:25 | [rl2_trainer] epoch #444 | Optimizing +2025-04-03 13:48:00 | [rl2_trainer] epoch #444 | Computing KL after +2025-04-03 13:48:01 | [rl2_trainer] epoch #444 | Computing loss after +2025-04-03 13:48:01 | [rl2_trainer] epoch #444 | Saving snapshot... +2025-04-03 13:48:01 | [rl2_trainer] epoch #444 | Saved +2025-04-03 13:48:01 | [rl2_trainer] epoch #444 | Time 86162.43 s +2025-04-03 13:48:01 | [rl2_trainer] epoch #444 | EpochTime 261.18 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.4018 +Average/AverageReturn -26.2678 +Average/Iteration 444 +Average/MaxReturn -9.69948 +Average/MinReturn -63.2614 +Average/NumEpisodes 100 +Average/StdReturn 10.7834 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.871888 +TotalEnvSteps 4.45e+06 +__unnamed_task__/AverageDiscountedReturn -16.4018 +__unnamed_task__/AverageReturn -26.2678 +__unnamed_task__/Iteration 444 +__unnamed_task__/MaxReturn -9.69948 +__unnamed_task__/MinReturn -63.2614 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.7834 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.02867 +policy/KL 0.0122722 +policy/KLBefore 0 +policy/LossAfter -0.0426671 +policy/LossBefore -0.00420683 +policy/dLoss 0.0384603 +---------------------------------------- ------------ +2025-04-03 13:50:50 | [rl2_trainer] epoch #445 | Optimizing policy... +2025-04-03 13:50:51 | [rl2_trainer] epoch #445 | Fitting baseline... +2025-04-03 13:50:51 | [rl2_trainer] epoch #445 | Computing loss before +2025-04-03 13:50:51 | [rl2_trainer] epoch #445 | Computing KL before +2025-04-03 13:50:52 | [rl2_trainer] epoch #445 | Optimizing +2025-04-03 13:51:28 | [rl2_trainer] epoch #445 | Computing KL after +2025-04-03 13:51:29 | [rl2_trainer] epoch #445 | Computing loss after +2025-04-03 13:51:30 | [rl2_trainer] epoch #445 | Saving snapshot... +2025-04-03 13:51:30 | [rl2_trainer] epoch #445 | Saved +2025-04-03 13:51:30 | [rl2_trainer] epoch #445 | Time 86370.61 s +2025-04-03 13:51:30 | [rl2_trainer] epoch #445 | EpochTime 208.17 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -16.1025 +Average/AverageReturn -25.6618 +Average/Iteration 445 +Average/MaxReturn 6.74944 +Average/MinReturn -59.4049 +Average/NumEpisodes 100 +Average/StdReturn 12.636 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.863935 +TotalEnvSteps 4.46e+06 +__unnamed_task__/AverageDiscountedReturn -16.1025 +__unnamed_task__/AverageReturn -25.6618 +__unnamed_task__/Iteration 445 +__unnamed_task__/MaxReturn 6.74944 +__unnamed_task__/MinReturn -59.4049 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.636 +__unnamed_task__/TerminationRate 0 +policy/Entropy 1.01721 +policy/KL 0.0173802 +policy/KLBefore 0 +policy/LossAfter -0.0465984 +policy/LossBefore -0.00329599 +policy/dLoss 0.0433024 +---------------------------------------- ------------ +2025-04-03 13:53:36 | [rl2_trainer] epoch #446 | Optimizing policy... +2025-04-03 13:53:36 | [rl2_trainer] epoch #446 | Fitting baseline... +2025-04-03 13:53:36 | [rl2_trainer] epoch #446 | Computing loss before +2025-04-03 13:53:37 | [rl2_trainer] epoch #446 | Computing KL before +2025-04-03 13:53:37 | [rl2_trainer] epoch #446 | Optimizing +2025-04-03 13:54:14 | [rl2_trainer] epoch #446 | Computing KL after +2025-04-03 13:54:14 | [rl2_trainer] epoch #446 | Computing loss after +2025-04-03 13:54:15 | [rl2_trainer] epoch #446 | Saving snapshot... +2025-04-03 13:54:15 | [rl2_trainer] epoch #446 | Saved +2025-04-03 13:54:15 | [rl2_trainer] epoch #446 | Time 86536.07 s +2025-04-03 13:54:15 | [rl2_trainer] epoch #446 | EpochTime 165.45 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.64346 +Average/AverageReturn -13.582 +Average/Iteration 446 +Average/MaxReturn 31.9629 +Average/MinReturn -65.2172 +Average/NumEpisodes 100 +Average/StdReturn 10.7017 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.339737 +TotalEnvSteps 4.47e+06 +__unnamed_task__/AverageDiscountedReturn -9.64346 +__unnamed_task__/AverageReturn -13.582 +__unnamed_task__/Iteration 446 +__unnamed_task__/MaxReturn 31.9629 +__unnamed_task__/MinReturn -65.2172 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.7017 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.971899 +policy/KL 0.0425895 +policy/KLBefore 0 +policy/LossAfter 0.0144877 +policy/LossBefore 0.00768091 +policy/dLoss -0.00680683 +---------------------------------------- ------------ +2025-04-03 13:56:11 | [rl2_trainer] epoch #447 | Optimizing policy... +2025-04-03 13:56:11 | [rl2_trainer] epoch #447 | Fitting baseline... +2025-04-03 13:56:11 | [rl2_trainer] epoch #447 | Computing loss before +2025-04-03 13:56:12 | [rl2_trainer] epoch #447 | Computing KL before +2025-04-03 13:56:13 | [rl2_trainer] epoch #447 | Optimizing +2025-04-03 13:56:45 | [rl2_trainer] epoch #447 | Computing KL after +2025-04-03 13:56:46 | [rl2_trainer] epoch #447 | Computing loss after +2025-04-03 13:56:47 | [rl2_trainer] epoch #447 | Saving snapshot... +2025-04-03 13:56:47 | [rl2_trainer] epoch #447 | Saved +2025-04-03 13:56:47 | [rl2_trainer] epoch #447 | Time 86687.64 s +2025-04-03 13:56:47 | [rl2_trainer] epoch #447 | EpochTime 151.58 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.90305 +Average/AverageReturn -14.1537 +Average/Iteration 447 +Average/MaxReturn 24.0608 +Average/MinReturn -27.728 +Average/NumEpisodes 100 +Average/StdReturn 7.98014 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.49534 +TotalEnvSteps 4.48e+06 +__unnamed_task__/AverageDiscountedReturn -9.90305 +__unnamed_task__/AverageReturn -14.1537 +__unnamed_task__/Iteration 447 +__unnamed_task__/MaxReturn 24.0608 +__unnamed_task__/MinReturn -27.728 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.98014 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.943323 +policy/KL 0.016935 +policy/KLBefore 0 +policy/LossAfter -0.029086 +policy/LossBefore -0.00327256 +policy/dLoss 0.0258135 +---------------------------------------- ------------ +2025-04-03 13:59:15 | [rl2_trainer] epoch #448 | Optimizing policy... +2025-04-03 13:59:15 | [rl2_trainer] epoch #448 | Fitting baseline... +2025-04-03 13:59:15 | [rl2_trainer] epoch #448 | Computing loss before +2025-04-03 13:59:16 | [rl2_trainer] epoch #448 | Computing KL before +2025-04-03 13:59:16 | [rl2_trainer] epoch #448 | Optimizing +2025-04-03 13:59:51 | [rl2_trainer] epoch #448 | Computing KL after +2025-04-03 13:59:52 | [rl2_trainer] epoch #448 | Computing loss after +2025-04-03 13:59:53 | [rl2_trainer] epoch #448 | Saving snapshot... +2025-04-03 13:59:53 | [rl2_trainer] epoch #448 | Saved +2025-04-03 13:59:53 | [rl2_trainer] epoch #448 | Time 86873.92 s +2025-04-03 13:59:53 | [rl2_trainer] epoch #448 | EpochTime 186.27 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.0824 +Average/AverageReturn -14.8441 +Average/Iteration 448 +Average/MaxReturn 20.0115 +Average/MinReturn -44.7414 +Average/NumEpisodes 100 +Average/StdReturn 10.4312 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.579941 +TotalEnvSteps 4.49e+06 +__unnamed_task__/AverageDiscountedReturn -10.0824 +__unnamed_task__/AverageReturn -14.8441 +__unnamed_task__/Iteration 448 +__unnamed_task__/MaxReturn 20.0115 +__unnamed_task__/MinReturn -44.7414 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.4312 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.901097 +policy/KL 0.0213884 +policy/KLBefore 0 +policy/LossAfter -0.0277007 +policy/LossBefore 0.00726528 +policy/dLoss 0.0349659 +---------------------------------------- ------------ +2025-04-03 14:02:11 | [rl2_trainer] epoch #449 | Optimizing policy... +2025-04-03 14:02:11 | [rl2_trainer] epoch #449 | Fitting baseline... +2025-04-03 14:02:11 | [rl2_trainer] epoch #449 | Computing loss before +2025-04-03 14:02:12 | [rl2_trainer] epoch #449 | Computing KL before +2025-04-03 14:02:13 | [rl2_trainer] epoch #449 | Optimizing +2025-04-03 14:02:48 | [rl2_trainer] epoch #449 | Computing KL after +2025-04-03 14:02:48 | [rl2_trainer] epoch #449 | Computing loss after +2025-04-03 14:02:49 | [rl2_trainer] epoch #449 | Saving snapshot... +2025-04-03 14:02:49 | [rl2_trainer] epoch #449 | Saved +2025-04-03 14:02:49 | [rl2_trainer] epoch #449 | Time 87050.27 s +2025-04-03 14:02:49 | [rl2_trainer] epoch #449 | EpochTime 176.36 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.60932 +Average/AverageReturn -12.694 +Average/Iteration 449 +Average/MaxReturn 43.4843 +Average/MinReturn -30.19 +Average/NumEpisodes 100 +Average/StdReturn 12.1008 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.470281 +TotalEnvSteps 4.5e+06 +__unnamed_task__/AverageDiscountedReturn -8.60932 +__unnamed_task__/AverageReturn -12.694 +__unnamed_task__/Iteration 449 +__unnamed_task__/MaxReturn 43.4843 +__unnamed_task__/MinReturn -30.19 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.1008 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.900639 +policy/KL 0.0185981 +policy/KLBefore 0 +policy/LossAfter -0.0509395 +policy/LossBefore 0.00460949 +policy/dLoss 0.0555489 +---------------------------------------- ------------ +2025-04-03 14:04:48 | [rl2_trainer] epoch #450 | Optimizing policy... +2025-04-03 14:04:49 | [rl2_trainer] epoch #450 | Fitting baseline... +2025-04-03 14:04:49 | [rl2_trainer] epoch #450 | Computing loss before +2025-04-03 14:04:49 | [rl2_trainer] epoch #450 | Computing KL before +2025-04-03 14:04:50 | [rl2_trainer] epoch #450 | Optimizing +2025-04-03 14:05:24 | [rl2_trainer] epoch #450 | Computing KL after +2025-04-03 14:05:25 | [rl2_trainer] epoch #450 | Computing loss after +2025-04-03 14:05:26 | [rl2_trainer] epoch #450 | Saving snapshot... +2025-04-03 14:05:26 | [rl2_trainer] epoch #450 | Saved +2025-04-03 14:05:26 | [rl2_trainer] epoch #450 | Time 87206.95 s +2025-04-03 14:05:26 | [rl2_trainer] epoch #450 | EpochTime 156.67 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.61491 +Average/AverageReturn -12.1393 +Average/Iteration 450 +Average/MaxReturn 30.5701 +Average/MinReturn -29.6014 +Average/NumEpisodes 100 +Average/StdReturn 11.2494 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.397487 +TotalEnvSteps 4.51e+06 +__unnamed_task__/AverageDiscountedReturn -8.61491 +__unnamed_task__/AverageReturn -12.1393 +__unnamed_task__/Iteration 450 +__unnamed_task__/MaxReturn 30.5701 +__unnamed_task__/MinReturn -29.6014 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.2494 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.88006 +policy/KL 0.0206984 +policy/KLBefore 0 +policy/LossAfter -0.0404302 +policy/LossBefore 0.00654049 +policy/dLoss 0.0469707 +---------------------------------------- ------------ +2025-04-03 14:07:36 | [rl2_trainer] epoch #451 | Optimizing policy... +2025-04-03 14:07:36 | [rl2_trainer] epoch #451 | Fitting baseline... +2025-04-03 14:07:36 | [rl2_trainer] epoch #451 | Computing loss before +2025-04-03 14:07:37 | [rl2_trainer] epoch #451 | Computing KL before +2025-04-03 14:07:37 | [rl2_trainer] epoch #451 | Optimizing +2025-04-03 14:08:13 | [rl2_trainer] epoch #451 | Computing KL after +2025-04-03 14:08:14 | [rl2_trainer] epoch #451 | Computing loss after +2025-04-03 14:08:15 | [rl2_trainer] epoch #451 | Saving snapshot... +2025-04-03 14:08:15 | [rl2_trainer] epoch #451 | Saved +2025-04-03 14:08:15 | [rl2_trainer] epoch #451 | Time 87375.85 s +2025-04-03 14:08:15 | [rl2_trainer] epoch #451 | EpochTime 168.90 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -8.9261 +Average/AverageReturn -12.687 +Average/Iteration 451 +Average/MaxReturn 29.5784 +Average/MinReturn -97.7683 +Average/NumEpisodes 100 +Average/StdReturn 13.1007 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.564584 +TotalEnvSteps 4.52e+06 +__unnamed_task__/AverageDiscountedReturn -8.9261 +__unnamed_task__/AverageReturn -12.687 +__unnamed_task__/Iteration 451 +__unnamed_task__/MaxReturn 29.5784 +__unnamed_task__/MinReturn -97.7683 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1007 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.856179 +policy/KL 0.0229119 +policy/KLBefore 0 +policy/LossAfter -0.0419301 +policy/LossBefore -0.0106515 +policy/dLoss 0.0312786 +---------------------------------------- ----------- +2025-04-03 14:10:12 | [rl2_trainer] epoch #452 | Optimizing policy... +2025-04-03 14:10:12 | [rl2_trainer] epoch #452 | Fitting baseline... +2025-04-03 14:10:12 | [rl2_trainer] epoch #452 | Computing loss before +2025-04-03 14:10:13 | [rl2_trainer] epoch #452 | Computing KL before +2025-04-03 14:10:13 | [rl2_trainer] epoch #452 | Optimizing +2025-04-03 14:10:49 | [rl2_trainer] epoch #452 | Computing KL after +2025-04-03 14:10:50 | [rl2_trainer] epoch #452 | Computing loss after +2025-04-03 14:10:51 | [rl2_trainer] epoch #452 | Saving snapshot... +2025-04-03 14:10:51 | [rl2_trainer] epoch #452 | Saved +2025-04-03 14:10:51 | [rl2_trainer] epoch #452 | Time 87531.57 s +2025-04-03 14:10:51 | [rl2_trainer] epoch #452 | EpochTime 155.72 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.92088 +Average/AverageReturn -12.9065 +Average/Iteration 452 +Average/MaxReturn 32.6207 +Average/MinReturn -24.7912 +Average/NumEpisodes 100 +Average/StdReturn 10.3548 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.30135 +TotalEnvSteps 4.53e+06 +__unnamed_task__/AverageDiscountedReturn -8.92088 +__unnamed_task__/AverageReturn -12.9065 +__unnamed_task__/Iteration 452 +__unnamed_task__/MaxReturn 32.6207 +__unnamed_task__/MinReturn -24.7912 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.3548 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.832579 +policy/KL 0.0229205 +policy/KLBefore 0 +policy/LossAfter -0.0306129 +policy/LossBefore 0.00487325 +policy/dLoss 0.0354861 +---------------------------------------- ------------ +2025-04-03 14:14:08 | [rl2_trainer] epoch #453 | Optimizing policy... +2025-04-03 14:14:08 | [rl2_trainer] epoch #453 | Fitting baseline... +2025-04-03 14:14:08 | [rl2_trainer] epoch #453 | Computing loss before +2025-04-03 14:14:09 | [rl2_trainer] epoch #453 | Computing KL before +2025-04-03 14:14:09 | [rl2_trainer] epoch #453 | Optimizing +2025-04-03 14:14:43 | [rl2_trainer] epoch #453 | Computing KL after +2025-04-03 14:14:44 | [rl2_trainer] epoch #453 | Computing loss after +2025-04-03 14:14:45 | [rl2_trainer] epoch #453 | Saving snapshot... +2025-04-03 14:14:45 | [rl2_trainer] epoch #453 | Saved +2025-04-03 14:14:45 | [rl2_trainer] epoch #453 | Time 87765.58 s +2025-04-03 14:14:45 | [rl2_trainer] epoch #453 | EpochTime 234.01 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.1538 +Average/AverageReturn -26.9355 +Average/Iteration 453 +Average/MaxReturn 24.6159 +Average/MinReturn -61.3258 +Average/NumEpisodes 100 +Average/StdReturn 15.0739 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.85347 +TotalEnvSteps 4.54e+06 +__unnamed_task__/AverageDiscountedReturn -17.1538 +__unnamed_task__/AverageReturn -26.9355 +__unnamed_task__/Iteration 453 +__unnamed_task__/MaxReturn 24.6159 +__unnamed_task__/MinReturn -61.3258 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 15.0739 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.801145 +policy/KL 0.0223526 +policy/KLBefore 0 +policy/LossAfter -0.0580881 +policy/LossBefore -0.00356431 +policy/dLoss 0.0545238 +---------------------------------------- ------------ +2025-04-03 14:17:22 | [rl2_trainer] epoch #454 | Optimizing policy... +2025-04-03 14:17:23 | [rl2_trainer] epoch #454 | Fitting baseline... +2025-04-03 14:17:23 | [rl2_trainer] epoch #454 | Computing loss before +2025-04-03 14:17:23 | [rl2_trainer] epoch #454 | Computing KL before +2025-04-03 14:17:24 | [rl2_trainer] epoch #454 | Optimizing +2025-04-03 14:18:01 | [rl2_trainer] epoch #454 | Computing KL after +2025-04-03 14:18:01 | [rl2_trainer] epoch #454 | Computing loss after +2025-04-03 14:18:02 | [rl2_trainer] epoch #454 | Saving snapshot... +2025-04-03 14:18:02 | [rl2_trainer] epoch #454 | Saved +2025-04-03 14:18:02 | [rl2_trainer] epoch #454 | Time 87963.08 s +2025-04-03 14:18:02 | [rl2_trainer] epoch #454 | EpochTime 197.49 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.2049 +Average/AverageReturn -22.4067 +Average/Iteration 454 +Average/MaxReturn 28.7002 +Average/MinReturn -56.6779 +Average/NumEpisodes 100 +Average/StdReturn 16.7842 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.86508 +TotalEnvSteps 4.55e+06 +__unnamed_task__/AverageDiscountedReturn -14.2049 +__unnamed_task__/AverageReturn -22.4067 +__unnamed_task__/Iteration 454 +__unnamed_task__/MaxReturn 28.7002 +__unnamed_task__/MinReturn -56.6779 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 16.7842 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.791002 +policy/KL 0.0153484 +policy/KLBefore 0 +policy/LossAfter -0.0678832 +policy/LossBefore -0.00865588 +policy/dLoss 0.0592273 +---------------------------------------- ------------ +2025-04-03 14:21:25 | [rl2_trainer] epoch #455 | Optimizing policy... +2025-04-03 14:21:25 | [rl2_trainer] epoch #455 | Fitting baseline... +2025-04-03 14:21:25 | [rl2_trainer] epoch #455 | Computing loss before +2025-04-03 14:21:26 | [rl2_trainer] epoch #455 | Computing KL before +2025-04-03 14:21:26 | [rl2_trainer] epoch #455 | Optimizing +2025-04-03 14:22:02 | [rl2_trainer] epoch #455 | Computing KL after +2025-04-03 14:22:03 | [rl2_trainer] epoch #455 | Computing loss after +2025-04-03 14:22:04 | [rl2_trainer] epoch #455 | Saving snapshot... +2025-04-03 14:22:04 | [rl2_trainer] epoch #455 | Saved +2025-04-03 14:22:04 | [rl2_trainer] epoch #455 | Time 88204.62 s +2025-04-03 14:22:04 | [rl2_trainer] epoch #455 | EpochTime 241.54 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.3534 +Average/AverageReturn -21.1609 +Average/Iteration 455 +Average/MaxReturn 28.3409 +Average/MinReturn -62.2221 +Average/NumEpisodes 100 +Average/StdReturn 18.9439 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.799938 +TotalEnvSteps 4.56e+06 +__unnamed_task__/AverageDiscountedReturn -13.3534 +__unnamed_task__/AverageReturn -21.1609 +__unnamed_task__/Iteration 455 +__unnamed_task__/MaxReturn 28.3409 +__unnamed_task__/MinReturn -62.2221 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 18.9439 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.780712 +policy/KL 0.0262701 +policy/KLBefore 0 +policy/LossAfter -0.101466 +policy/LossBefore 0.00128279 +policy/dLoss 0.102748 +---------------------------------------- ------------ +2025-04-03 14:25:35 | [rl2_trainer] epoch #456 | Optimizing policy... +2025-04-03 14:25:36 | [rl2_trainer] epoch #456 | Fitting baseline... +2025-04-03 14:25:36 | [rl2_trainer] epoch #456 | Computing loss before +2025-04-03 14:25:36 | [rl2_trainer] epoch #456 | Computing KL before +2025-04-03 14:25:37 | [rl2_trainer] epoch #456 | Optimizing +2025-04-03 14:26:12 | [rl2_trainer] epoch #456 | Computing KL after +2025-04-03 14:26:12 | [rl2_trainer] epoch #456 | Computing loss after +2025-04-03 14:26:13 | [rl2_trainer] epoch #456 | Saving snapshot... +2025-04-03 14:26:13 | [rl2_trainer] epoch #456 | Saved +2025-04-03 14:26:13 | [rl2_trainer] epoch #456 | Time 88454.07 s +2025-04-03 14:26:13 | [rl2_trainer] epoch #456 | EpochTime 249.44 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -13.9042 +Average/AverageReturn -22.0421 +Average/Iteration 456 +Average/MaxReturn 28.6161 +Average/MinReturn -83.4162 +Average/NumEpisodes 100 +Average/StdReturn 16.5511 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.827023 +TotalEnvSteps 4.57e+06 +__unnamed_task__/AverageDiscountedReturn -13.9042 +__unnamed_task__/AverageReturn -22.0421 +__unnamed_task__/Iteration 456 +__unnamed_task__/MaxReturn 28.6161 +__unnamed_task__/MinReturn -83.4162 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 16.5511 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.758593 +policy/KL 0.0198591 +policy/KLBefore 0 +policy/LossAfter -0.0705625 +policy/LossBefore -0.000352838 +policy/dLoss 0.0702096 +---------------------------------------- ------------- +2025-04-03 14:28:59 | [rl2_trainer] epoch #457 | Optimizing policy... +2025-04-03 14:28:59 | [rl2_trainer] epoch #457 | Fitting baseline... +2025-04-03 14:28:59 | [rl2_trainer] epoch #457 | Computing loss before +2025-04-03 14:29:00 | [rl2_trainer] epoch #457 | Computing KL before +2025-04-03 14:29:00 | [rl2_trainer] epoch #457 | Optimizing +2025-04-03 14:29:36 | [rl2_trainer] epoch #457 | Computing KL after +2025-04-03 14:29:37 | [rl2_trainer] epoch #457 | Computing loss after +2025-04-03 14:29:38 | [rl2_trainer] epoch #457 | Saving snapshot... +2025-04-03 14:29:38 | [rl2_trainer] epoch #457 | Saved +2025-04-03 14:29:38 | [rl2_trainer] epoch #457 | Time 88658.76 s +2025-04-03 14:29:38 | [rl2_trainer] epoch #457 | EpochTime 204.69 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -16.0903 +Average/AverageReturn -25.2872 +Average/Iteration 457 +Average/MaxReturn 6.72127 +Average/MinReturn -54.9086 +Average/NumEpisodes 100 +Average/StdReturn 12.1876 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.795738 +TotalEnvSteps 4.58e+06 +__unnamed_task__/AverageDiscountedReturn -16.0903 +__unnamed_task__/AverageReturn -25.2872 +__unnamed_task__/Iteration 457 +__unnamed_task__/MaxReturn 6.72127 +__unnamed_task__/MinReturn -54.9086 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.1876 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.746378 +policy/KL 0.0181598 +policy/KLBefore 0 +policy/LossAfter -0.0629402 +policy/LossBefore -0.0131785 +policy/dLoss 0.0497618 +---------------------------------------- ----------- +2025-04-03 14:32:04 | [rl2_trainer] epoch #458 | Optimizing policy... +2025-04-03 14:32:05 | [rl2_trainer] epoch #458 | Fitting baseline... +2025-04-03 14:32:05 | [rl2_trainer] epoch #458 | Computing loss before +2025-04-03 14:32:05 | [rl2_trainer] epoch #458 | Computing KL before +2025-04-03 14:32:06 | [rl2_trainer] epoch #458 | Optimizing +2025-04-03 14:32:43 | [rl2_trainer] epoch #458 | Computing KL after +2025-04-03 14:32:43 | [rl2_trainer] epoch #458 | Computing loss after +2025-04-03 14:32:44 | [rl2_trainer] epoch #458 | Saving snapshot... +2025-04-03 14:32:44 | [rl2_trainer] epoch #458 | Saved +2025-04-03 14:32:44 | [rl2_trainer] epoch #458 | Time 88845.45 s +2025-04-03 14:32:44 | [rl2_trainer] epoch #458 | EpochTime 186.69 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -10.2478 +Average/AverageReturn -15.1024 +Average/Iteration 458 +Average/MaxReturn 27.2903 +Average/MinReturn -27.1732 +Average/NumEpisodes 100 +Average/StdReturn 10.107 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.684049 +TotalEnvSteps 4.59e+06 +__unnamed_task__/AverageDiscountedReturn -10.2478 +__unnamed_task__/AverageReturn -15.1024 +__unnamed_task__/Iteration 458 +__unnamed_task__/MaxReturn 27.2903 +__unnamed_task__/MinReturn -27.1732 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.107 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.71549 +policy/KL 0.019866 +policy/KLBefore 0 +policy/LossAfter -0.0395032 +policy/LossBefore -0.0015268 +policy/dLoss 0.0379764 +---------------------------------------- ----------- +2025-04-03 14:34:52 | [rl2_trainer] epoch #459 | Optimizing policy... +2025-04-03 14:34:52 | [rl2_trainer] epoch #459 | Fitting baseline... +2025-04-03 14:34:52 | [rl2_trainer] epoch #459 | Computing loss before +2025-04-03 14:34:52 | [rl2_trainer] epoch #459 | Computing KL before +2025-04-03 14:34:53 | [rl2_trainer] epoch #459 | Optimizing +2025-04-03 14:35:28 | [rl2_trainer] epoch #459 | Computing KL after +2025-04-03 14:35:29 | [rl2_trainer] epoch #459 | Computing loss after +2025-04-03 14:35:30 | [rl2_trainer] epoch #459 | Saving snapshot... +2025-04-03 14:35:30 | [rl2_trainer] epoch #459 | Saved +2025-04-03 14:35:30 | [rl2_trainer] epoch #459 | Time 89010.93 s +2025-04-03 14:35:30 | [rl2_trainer] epoch #459 | EpochTime 165.48 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -8.64634 +Average/AverageReturn -12.2254 +Average/Iteration 459 +Average/MaxReturn 64.7143 +Average/MinReturn -79.6642 +Average/NumEpisodes 100 +Average/StdReturn 13.3519 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.470645 +TotalEnvSteps 4.6e+06 +__unnamed_task__/AverageDiscountedReturn -8.64634 +__unnamed_task__/AverageReturn -12.2254 +__unnamed_task__/Iteration 459 +__unnamed_task__/MaxReturn 64.7143 +__unnamed_task__/MinReturn -79.6642 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.3519 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.693556 +policy/KL 0.0177023 +policy/KLBefore 0 +policy/LossAfter -0.0868211 +policy/LossBefore -0.040873 +policy/dLoss 0.0459482 +---------------------------------------- ----------- +2025-04-03 14:37:45 | [rl2_trainer] epoch #460 | Optimizing policy... +2025-04-03 14:37:45 | [rl2_trainer] epoch #460 | Fitting baseline... +2025-04-03 14:37:45 | [rl2_trainer] epoch #460 | Computing loss before +2025-04-03 14:37:46 | [rl2_trainer] epoch #460 | Computing KL before +2025-04-03 14:37:46 | [rl2_trainer] epoch #460 | Optimizing +2025-04-03 14:38:21 | [rl2_trainer] epoch #460 | Computing KL after +2025-04-03 14:38:22 | [rl2_trainer] epoch #460 | Computing loss after +2025-04-03 14:38:23 | [rl2_trainer] epoch #460 | Saving snapshot... +2025-04-03 14:38:23 | [rl2_trainer] epoch #460 | Saved +2025-04-03 14:38:23 | [rl2_trainer] epoch #460 | Time 89183.71 s +2025-04-03 14:38:23 | [rl2_trainer] epoch #460 | EpochTime 172.78 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -15.0465 +Average/AverageReturn -24.0964 +Average/Iteration 460 +Average/MaxReturn 14.988 +Average/MinReturn -56.714 +Average/NumEpisodes 100 +Average/StdReturn 14.6069 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.849727 +TotalEnvSteps 4.61e+06 +__unnamed_task__/AverageDiscountedReturn -15.0465 +__unnamed_task__/AverageReturn -24.0964 +__unnamed_task__/Iteration 460 +__unnamed_task__/MaxReturn 14.988 +__unnamed_task__/MinReturn -56.714 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 14.6069 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.679792 +policy/KL 0.0160479 +policy/KLBefore 0 +policy/LossAfter -0.0453393 +policy/LossBefore -0.00330938 +policy/dLoss 0.04203 +---------------------------------------- ------------ +2025-04-03 14:39:50 | [rl2_trainer] epoch #461 | Optimizing policy... +2025-04-03 14:39:50 | [rl2_trainer] epoch #461 | Fitting baseline... +2025-04-03 14:39:50 | [rl2_trainer] epoch #461 | Computing loss before +2025-04-03 14:39:51 | [rl2_trainer] epoch #461 | Computing KL before +2025-04-03 14:39:52 | [rl2_trainer] epoch #461 | Optimizing +2025-04-03 14:40:26 | [rl2_trainer] epoch #461 | Computing KL after +2025-04-03 14:40:26 | [rl2_trainer] epoch #461 | Computing loss after +2025-04-03 14:40:27 | [rl2_trainer] epoch #461 | Saving snapshot... +2025-04-03 14:40:27 | [rl2_trainer] epoch #461 | Saved +2025-04-03 14:40:27 | [rl2_trainer] epoch #461 | Time 89308.23 s +2025-04-03 14:40:27 | [rl2_trainer] epoch #461 | EpochTime 124.52 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.14214 +Average/AverageReturn -11.8251 +Average/Iteration 461 +Average/MaxReturn 21.4114 +Average/MinReturn -26.261 +Average/NumEpisodes 100 +Average/StdReturn 8.49253 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.473332 +TotalEnvSteps 4.62e+06 +__unnamed_task__/AverageDiscountedReturn -8.14214 +__unnamed_task__/AverageReturn -11.8251 +__unnamed_task__/Iteration 461 +__unnamed_task__/MaxReturn 21.4114 +__unnamed_task__/MinReturn -26.261 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.49253 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.650518 +policy/KL 0.0157182 +policy/KLBefore 0 +policy/LossAfter -0.0493723 +policy/LossBefore -0.00967238 +policy/dLoss 0.0396999 +---------------------------------------- ------------ +2025-04-03 14:42:54 | [rl2_trainer] epoch #462 | Optimizing policy... +2025-04-03 14:42:54 | [rl2_trainer] epoch #462 | Fitting baseline... +2025-04-03 14:42:54 | [rl2_trainer] epoch #462 | Computing loss before +2025-04-03 14:42:55 | [rl2_trainer] epoch #462 | Computing KL before +2025-04-03 14:42:56 | [rl2_trainer] epoch #462 | Optimizing +2025-04-03 14:43:32 | [rl2_trainer] epoch #462 | Computing KL after +2025-04-03 14:43:32 | [rl2_trainer] epoch #462 | Computing loss after +2025-04-03 14:43:33 | [rl2_trainer] epoch #462 | Saving snapshot... +2025-04-03 14:43:33 | [rl2_trainer] epoch #462 | Saved +2025-04-03 14:43:33 | [rl2_trainer] epoch #462 | Time 89494.09 s +2025-04-03 14:43:33 | [rl2_trainer] epoch #462 | EpochTime 185.85 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.51229 +Average/AverageReturn -13.6583 +Average/Iteration 462 +Average/MaxReturn 67.2762 +Average/MinReturn -83.8753 +Average/NumEpisodes 100 +Average/StdReturn 14.7058 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.571023 +TotalEnvSteps 4.63e+06 +__unnamed_task__/AverageDiscountedReturn -9.51229 +__unnamed_task__/AverageReturn -13.6583 +__unnamed_task__/Iteration 462 +__unnamed_task__/MaxReturn 67.2762 +__unnamed_task__/MinReturn -83.8753 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 14.7058 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.603086 +policy/KL 0.0201104 +policy/KLBefore 0 +policy/LossAfter -0.0515652 +policy/LossBefore -0.00838981 +policy/dLoss 0.0431754 +---------------------------------------- ------------ +2025-04-03 14:44:58 | [rl2_trainer] epoch #463 | Optimizing policy... +2025-04-03 14:44:58 | [rl2_trainer] epoch #463 | Fitting baseline... +2025-04-03 14:44:58 | [rl2_trainer] epoch #463 | Computing loss before +2025-04-03 14:44:59 | [rl2_trainer] epoch #463 | Computing KL before +2025-04-03 14:45:00 | [rl2_trainer] epoch #463 | Optimizing +2025-04-03 14:45:36 | [rl2_trainer] epoch #463 | Computing KL after +2025-04-03 14:45:36 | [rl2_trainer] epoch #463 | Computing loss after +2025-04-03 14:45:37 | [rl2_trainer] epoch #463 | Saving snapshot... +2025-04-03 14:45:37 | [rl2_trainer] epoch #463 | Saved +2025-04-03 14:45:37 | [rl2_trainer] epoch #463 | Time 89618.31 s +2025-04-03 14:45:37 | [rl2_trainer] epoch #463 | EpochTime 124.22 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.73667 +Average/AverageReturn -12.8594 +Average/Iteration 463 +Average/MaxReturn 18.0919 +Average/MinReturn -39.272 +Average/NumEpisodes 100 +Average/StdReturn 8.3997 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.472768 +TotalEnvSteps 4.64e+06 +__unnamed_task__/AverageDiscountedReturn -8.73667 +__unnamed_task__/AverageReturn -12.8594 +__unnamed_task__/Iteration 463 +__unnamed_task__/MaxReturn 18.0919 +__unnamed_task__/MinReturn -39.272 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.3997 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.574764 +policy/KL 0.0186986 +policy/KLBefore 0 +policy/LossAfter -0.0387157 +policy/LossBefore 0.00329444 +policy/dLoss 0.0420102 +---------------------------------------- ------------ +2025-04-03 14:48:05 | [rl2_trainer] epoch #464 | Optimizing policy... +2025-04-03 14:48:05 | [rl2_trainer] epoch #464 | Fitting baseline... +2025-04-03 14:48:05 | [rl2_trainer] epoch #464 | Computing loss before +2025-04-03 14:48:06 | [rl2_trainer] epoch #464 | Computing KL before +2025-04-03 14:48:07 | [rl2_trainer] epoch #464 | Optimizing +2025-04-03 14:48:43 | [rl2_trainer] epoch #464 | Computing KL after +2025-04-03 14:48:43 | [rl2_trainer] epoch #464 | Computing loss after +2025-04-03 14:48:44 | [rl2_trainer] epoch #464 | Saving snapshot... +2025-04-03 14:48:44 | [rl2_trainer] epoch #464 | Saved +2025-04-03 14:48:44 | [rl2_trainer] epoch #464 | Time 89805.36 s +2025-04-03 14:48:44 | [rl2_trainer] epoch #464 | EpochTime 187.05 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.8944 +Average/AverageReturn -16.2355 +Average/Iteration 464 +Average/MaxReturn 8.9552 +Average/MinReturn -28.826 +Average/NumEpisodes 100 +Average/StdReturn 7.54259 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.786258 +TotalEnvSteps 4.65e+06 +__unnamed_task__/AverageDiscountedReturn -10.8944 +__unnamed_task__/AverageReturn -16.2355 +__unnamed_task__/Iteration 464 +__unnamed_task__/MaxReturn 8.9552 +__unnamed_task__/MinReturn -28.826 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.54259 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.557981 +policy/KL 0.0138638 +policy/KLBefore 0 +policy/LossAfter -0.0213694 +policy/LossBefore -0.00172366 +policy/dLoss 0.0196458 +---------------------------------------- ------------ +2025-04-03 14:52:21 | [rl2_trainer] epoch #465 | Optimizing policy... +2025-04-03 14:52:21 | [rl2_trainer] epoch #465 | Fitting baseline... +2025-04-03 14:52:21 | [rl2_trainer] epoch #465 | Computing loss before +2025-04-03 14:52:21 | [rl2_trainer] epoch #465 | Computing KL before +2025-04-03 14:52:22 | [rl2_trainer] epoch #465 | Optimizing +2025-04-03 14:52:54 | [rl2_trainer] epoch #465 | Computing KL after +2025-04-03 14:52:55 | [rl2_trainer] epoch #465 | Computing loss after +2025-04-03 14:52:56 | [rl2_trainer] epoch #465 | Saving snapshot... +2025-04-03 14:52:56 | [rl2_trainer] epoch #465 | Saved +2025-04-03 14:52:56 | [rl2_trainer] epoch #465 | Time 90056.83 s +2025-04-03 14:52:56 | [rl2_trainer] epoch #465 | EpochTime 251.47 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.6695 +Average/AverageReturn -30.0047 +Average/Iteration 465 +Average/MaxReturn -16.2106 +Average/MinReturn -57.0696 +Average/NumEpisodes 100 +Average/StdReturn 11.4024 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.877032 +TotalEnvSteps 4.66e+06 +__unnamed_task__/AverageDiscountedReturn -18.6695 +__unnamed_task__/AverageReturn -30.0047 +__unnamed_task__/Iteration 465 +__unnamed_task__/MaxReturn -16.2106 +__unnamed_task__/MinReturn -57.0696 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.4024 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.543291 +policy/KL 0.0133392 +policy/KLBefore 0 +policy/LossAfter -0.0334469 +policy/LossBefore -0.00569571 +policy/dLoss 0.0277512 +---------------------------------------- ------------ +2025-04-03 14:56:20 | [rl2_trainer] epoch #466 | Optimizing policy... +2025-04-03 14:56:20 | [rl2_trainer] epoch #466 | Fitting baseline... +2025-04-03 14:56:20 | [rl2_trainer] epoch #466 | Computing loss before +2025-04-03 14:56:21 | [rl2_trainer] epoch #466 | Computing KL before +2025-04-03 14:56:21 | [rl2_trainer] epoch #466 | Optimizing +2025-04-03 14:56:55 | [rl2_trainer] epoch #466 | Computing KL after +2025-04-03 14:56:55 | [rl2_trainer] epoch #466 | Computing loss after +2025-04-03 14:56:56 | [rl2_trainer] epoch #466 | Saving snapshot... +2025-04-03 14:56:56 | [rl2_trainer] epoch #466 | Saved +2025-04-03 14:56:56 | [rl2_trainer] epoch #466 | Time 90297.02 s +2025-04-03 14:56:56 | [rl2_trainer] epoch #466 | EpochTime 240.18 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.1074 +Average/AverageReturn -29.0937 +Average/Iteration 466 +Average/MaxReturn -15.9551 +Average/MinReturn -71.0741 +Average/NumEpisodes 100 +Average/StdReturn 11.8139 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.873652 +TotalEnvSteps 4.67e+06 +__unnamed_task__/AverageDiscountedReturn -18.1074 +__unnamed_task__/AverageReturn -29.0937 +__unnamed_task__/Iteration 466 +__unnamed_task__/MaxReturn -15.9551 +__unnamed_task__/MinReturn -71.0741 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.8139 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.52657 +policy/KL 0.0158911 +policy/KLBefore 0 +policy/LossAfter -0.0334716 +policy/LossBefore 0.00343669 +policy/dLoss 0.0369083 +---------------------------------------- ------------ +2025-04-03 14:59:05 | [rl2_trainer] epoch #467 | Optimizing policy... +2025-04-03 14:59:05 | [rl2_trainer] epoch #467 | Fitting baseline... +2025-04-03 14:59:05 | [rl2_trainer] epoch #467 | Computing loss before +2025-04-03 14:59:06 | [rl2_trainer] epoch #467 | Computing KL before +2025-04-03 14:59:06 | [rl2_trainer] epoch #467 | Optimizing +2025-04-03 14:59:42 | [rl2_trainer] epoch #467 | Computing KL after +2025-04-03 14:59:42 | [rl2_trainer] epoch #467 | Computing loss after +2025-04-03 14:59:43 | [rl2_trainer] epoch #467 | Saving snapshot... +2025-04-03 14:59:43 | [rl2_trainer] epoch #467 | Saved +2025-04-03 14:59:43 | [rl2_trainer] epoch #467 | Time 90464.37 s +2025-04-03 14:59:43 | [rl2_trainer] epoch #467 | EpochTime 167.36 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -9.33778 +Average/AverageReturn -12.9693 +Average/Iteration 467 +Average/MaxReturn 27.3322 +Average/MinReturn -60.7553 +Average/NumEpisodes 100 +Average/StdReturn 11.5088 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.226417 +TotalEnvSteps 4.68e+06 +__unnamed_task__/AverageDiscountedReturn -9.33778 +__unnamed_task__/AverageReturn -12.9693 +__unnamed_task__/Iteration 467 +__unnamed_task__/MaxReturn 27.3322 +__unnamed_task__/MinReturn -60.7553 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.5088 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.527647 +policy/KL 0.0293443 +policy/KLBefore 0 +policy/LossAfter -0.0646315 +policy/LossBefore -0.000633282 +policy/dLoss 0.0639982 +---------------------------------------- ------------- +2025-04-03 15:01:51 | [rl2_trainer] epoch #468 | Optimizing policy... +2025-04-03 15:01:51 | [rl2_trainer] epoch #468 | Fitting baseline... +2025-04-03 15:01:51 | [rl2_trainer] epoch #468 | Computing loss before +2025-04-03 15:01:52 | [rl2_trainer] epoch #468 | Computing KL before +2025-04-03 15:01:53 | [rl2_trainer] epoch #468 | Optimizing +2025-04-03 15:02:29 | [rl2_trainer] epoch #468 | Computing KL after +2025-04-03 15:02:29 | [rl2_trainer] epoch #468 | Computing loss after +2025-04-03 15:02:30 | [rl2_trainer] epoch #468 | Saving snapshot... +2025-04-03 15:02:30 | [rl2_trainer] epoch #468 | Saved +2025-04-03 15:02:30 | [rl2_trainer] epoch #468 | Time 90631.19 s +2025-04-03 15:02:30 | [rl2_trainer] epoch #468 | EpochTime 166.81 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.11647 +Average/AverageReturn -12.6876 +Average/Iteration 468 +Average/MaxReturn 32.9389 +Average/MinReturn -91.1579 +Average/NumEpisodes 100 +Average/StdReturn 13.359 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.49734 +TotalEnvSteps 4.69e+06 +__unnamed_task__/AverageDiscountedReturn -9.11647 +__unnamed_task__/AverageReturn -12.6876 +__unnamed_task__/Iteration 468 +__unnamed_task__/MaxReturn 32.9389 +__unnamed_task__/MinReturn -91.1579 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.359 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.52409 +policy/KL 0.0169945 +policy/KLBefore 0 +policy/LossAfter -0.0593846 +policy/LossBefore -0.00200397 +policy/dLoss 0.0573807 +---------------------------------------- ------------ +2025-04-03 15:04:46 | [rl2_trainer] epoch #469 | Optimizing policy... +2025-04-03 15:04:47 | [rl2_trainer] epoch #469 | Fitting baseline... +2025-04-03 15:04:47 | [rl2_trainer] epoch #469 | Computing loss before +2025-04-03 15:04:47 | [rl2_trainer] epoch #469 | Computing KL before +2025-04-03 15:04:48 | [rl2_trainer] epoch #469 | Optimizing +2025-04-03 15:05:23 | [rl2_trainer] epoch #469 | Computing KL after +2025-04-03 15:05:23 | [rl2_trainer] epoch #469 | Computing loss after +2025-04-03 15:05:24 | [rl2_trainer] epoch #469 | Saving snapshot... +2025-04-03 15:05:24 | [rl2_trainer] epoch #469 | Saved +2025-04-03 15:05:24 | [rl2_trainer] epoch #469 | Time 90805.27 s +2025-04-03 15:05:24 | [rl2_trainer] epoch #469 | EpochTime 174.08 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.1428 +Average/AverageReturn -16.9574 +Average/Iteration 469 +Average/MaxReturn 19.5772 +Average/MinReturn -31.5022 +Average/NumEpisodes 100 +Average/StdReturn 9.41931 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.574297 +TotalEnvSteps 4.7e+06 +__unnamed_task__/AverageDiscountedReturn -11.1428 +__unnamed_task__/AverageReturn -16.9574 +__unnamed_task__/Iteration 469 +__unnamed_task__/MaxReturn 19.5772 +__unnamed_task__/MinReturn -31.5022 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.41931 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.508266 +policy/KL 0.0121669 +policy/KLBefore 0 +policy/LossAfter -0.0356815 +policy/LossBefore -0.00204622 +policy/dLoss 0.0336353 +---------------------------------------- ------------ +2025-04-03 15:06:51 | [rl2_trainer] epoch #470 | Optimizing policy... +2025-04-03 15:06:51 | [rl2_trainer] epoch #470 | Fitting baseline... +2025-04-03 15:06:51 | [rl2_trainer] epoch #470 | Computing loss before +2025-04-03 15:06:52 | [rl2_trainer] epoch #470 | Computing KL before +2025-04-03 15:06:53 | [rl2_trainer] epoch #470 | Optimizing +2025-04-03 15:07:30 | [rl2_trainer] epoch #470 | Computing KL after +2025-04-03 15:07:31 | [rl2_trainer] epoch #470 | Computing loss after +2025-04-03 15:07:31 | [rl2_trainer] epoch #470 | Saving snapshot... +2025-04-03 15:07:31 | [rl2_trainer] epoch #470 | Saved +2025-04-03 15:07:31 | [rl2_trainer] epoch #470 | Time 90932.44 s +2025-04-03 15:07:31 | [rl2_trainer] epoch #470 | EpochTime 127.17 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -6.64971 +Average/AverageReturn -9.37041 +Average/Iteration 470 +Average/MaxReturn 74.1957 +Average/MinReturn -50.185 +Average/NumEpisodes 100 +Average/StdReturn 15.2362 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.551523 +TotalEnvSteps 4.71e+06 +__unnamed_task__/AverageDiscountedReturn -6.64971 +__unnamed_task__/AverageReturn -9.37041 +__unnamed_task__/Iteration 470 +__unnamed_task__/MaxReturn 74.1957 +__unnamed_task__/MinReturn -50.185 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 15.2362 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.501457 +policy/KL 0.0197557 +policy/KLBefore 0 +policy/LossAfter -0.116093 +policy/LossBefore -0.0276406 +policy/dLoss 0.0884524 +---------------------------------------- ----------- +2025-04-03 15:09:30 | [rl2_trainer] epoch #471 | Optimizing policy... +2025-04-03 15:09:30 | [rl2_trainer] epoch #471 | Fitting baseline... +2025-04-03 15:09:30 | [rl2_trainer] epoch #471 | Computing loss before +2025-04-03 15:09:31 | [rl2_trainer] epoch #471 | Computing KL before +2025-04-03 15:09:32 | [rl2_trainer] epoch #471 | Optimizing +2025-04-03 15:10:07 | [rl2_trainer] epoch #471 | Computing KL after +2025-04-03 15:10:08 | [rl2_trainer] epoch #471 | Computing loss after +2025-04-03 15:10:09 | [rl2_trainer] epoch #471 | Saving snapshot... +2025-04-03 15:10:09 | [rl2_trainer] epoch #471 | Saved +2025-04-03 15:10:09 | [rl2_trainer] epoch #471 | Time 91089.65 s +2025-04-03 15:10:09 | [rl2_trainer] epoch #471 | EpochTime 157.21 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -10.8217 +Average/AverageReturn -15.8475 +Average/Iteration 471 +Average/MaxReturn 9.03208 +Average/MinReturn -30.0861 +Average/NumEpisodes 100 +Average/StdReturn 5.89474 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.46939 +TotalEnvSteps 4.72e+06 +__unnamed_task__/AverageDiscountedReturn -10.8217 +__unnamed_task__/AverageReturn -15.8475 +__unnamed_task__/Iteration 471 +__unnamed_task__/MaxReturn 9.03208 +__unnamed_task__/MinReturn -30.0861 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 5.89474 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.496937 +policy/KL 0.0139082 +policy/KLBefore 0 +policy/LossAfter -0.0207056 +policy/LossBefore -0.00132485 +policy/dLoss 0.0193807 +---------------------------------------- ------------ +2025-04-03 15:12:54 | [rl2_trainer] epoch #472 | Optimizing policy... +2025-04-03 15:12:54 | [rl2_trainer] epoch #472 | Fitting baseline... +2025-04-03 15:12:54 | [rl2_trainer] epoch #472 | Computing loss before +2025-04-03 15:12:55 | [rl2_trainer] epoch #472 | Computing KL before +2025-04-03 15:12:55 | [rl2_trainer] epoch #472 | Optimizing +2025-04-03 15:13:32 | [rl2_trainer] epoch #472 | Computing KL after +2025-04-03 15:13:32 | [rl2_trainer] epoch #472 | Computing loss after +2025-04-03 15:13:33 | [rl2_trainer] epoch #472 | Saving snapshot... +2025-04-03 15:13:33 | [rl2_trainer] epoch #472 | Saved +2025-04-03 15:13:33 | [rl2_trainer] epoch #472 | Time 91294.35 s +2025-04-03 15:13:33 | [rl2_trainer] epoch #472 | EpochTime 204.70 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -13.5844 +Average/AverageReturn -21.384 +Average/Iteration 472 +Average/MaxReturn 27.9309 +Average/MinReturn -62.1811 +Average/NumEpisodes 100 +Average/StdReturn 18.6214 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.896583 +TotalEnvSteps 4.73e+06 +__unnamed_task__/AverageDiscountedReturn -13.5844 +__unnamed_task__/AverageReturn -21.384 +__unnamed_task__/Iteration 472 +__unnamed_task__/MaxReturn 27.9309 +__unnamed_task__/MinReturn -62.1811 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 18.6214 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.487203 +policy/KL 0.0176711 +policy/KLBefore 0 +policy/LossAfter -0.0724549 +policy/LossBefore -0.0099126 +policy/dLoss 0.0625423 +---------------------------------------- ----------- +2025-04-03 15:17:02 | [rl2_trainer] epoch #473 | Optimizing policy... +2025-04-03 15:17:02 | [rl2_trainer] epoch #473 | Fitting baseline... +2025-04-03 15:17:02 | [rl2_trainer] epoch #473 | Computing loss before +2025-04-03 15:17:03 | [rl2_trainer] epoch #473 | Computing KL before +2025-04-03 15:17:03 | [rl2_trainer] epoch #473 | Optimizing +2025-04-03 15:17:38 | [rl2_trainer] epoch #473 | Computing KL after +2025-04-03 15:17:38 | [rl2_trainer] epoch #473 | Computing loss after +2025-04-03 15:17:39 | [rl2_trainer] epoch #473 | Saving snapshot... +2025-04-03 15:17:39 | [rl2_trainer] epoch #473 | Saved +2025-04-03 15:17:39 | [rl2_trainer] epoch #473 | Time 91540.04 s +2025-04-03 15:17:39 | [rl2_trainer] epoch #473 | EpochTime 245.68 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.0219 +Average/AverageReturn -20.6125 +Average/Iteration 473 +Average/MaxReturn 32.0359 +Average/MinReturn -60.3525 +Average/NumEpisodes 100 +Average/StdReturn 16.6307 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.850854 +TotalEnvSteps 4.74e+06 +__unnamed_task__/AverageDiscountedReturn -13.0219 +__unnamed_task__/AverageReturn -20.6125 +__unnamed_task__/Iteration 473 +__unnamed_task__/MaxReturn 32.0359 +__unnamed_task__/MinReturn -60.3525 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 16.6307 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.453373 +policy/KL 0.0196518 +policy/KLBefore 0 +policy/LossAfter -0.0524054 +policy/LossBefore 0.00694569 +policy/dLoss 0.0593511 +---------------------------------------- ------------ +2025-04-03 15:19:04 | [rl2_trainer] epoch #474 | Optimizing policy... +2025-04-03 15:19:05 | [rl2_trainer] epoch #474 | Fitting baseline... +2025-04-03 15:19:05 | [rl2_trainer] epoch #474 | Computing loss before +2025-04-03 15:19:05 | [rl2_trainer] epoch #474 | Computing KL before +2025-04-03 15:19:06 | [rl2_trainer] epoch #474 | Optimizing +2025-04-03 15:19:43 | [rl2_trainer] epoch #474 | Computing KL after +2025-04-03 15:19:43 | [rl2_trainer] epoch #474 | Computing loss after +2025-04-03 15:19:44 | [rl2_trainer] epoch #474 | Saving snapshot... +2025-04-03 15:19:44 | [rl2_trainer] epoch #474 | Saved +2025-04-03 15:19:44 | [rl2_trainer] epoch #474 | Time 91665.16 s +2025-04-03 15:19:44 | [rl2_trainer] epoch #474 | EpochTime 125.11 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -7.09292 +Average/AverageReturn -9.79849 +Average/Iteration 474 +Average/MaxReturn 38.4679 +Average/MinReturn -42.4487 +Average/NumEpisodes 100 +Average/StdReturn 13.1441 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.650873 +TotalEnvSteps 4.75e+06 +__unnamed_task__/AverageDiscountedReturn -7.09292 +__unnamed_task__/AverageReturn -9.79849 +__unnamed_task__/Iteration 474 +__unnamed_task__/MaxReturn 38.4679 +__unnamed_task__/MinReturn -42.4487 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1441 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.40906 +policy/KL 0.0183932 +policy/KLBefore 0 +policy/LossAfter -0.0738582 +policy/LossBefore -0.0242922 +policy/dLoss 0.0495659 +---------------------------------------- ----------- +2025-04-03 15:21:41 | [rl2_trainer] epoch #475 | Optimizing policy... +2025-04-03 15:21:42 | [rl2_trainer] epoch #475 | Fitting baseline... +2025-04-03 15:21:42 | [rl2_trainer] epoch #475 | Computing loss before +2025-04-03 15:21:42 | [rl2_trainer] epoch #475 | Computing KL before +2025-04-03 15:21:43 | [rl2_trainer] epoch #475 | Optimizing +2025-04-03 15:22:18 | [rl2_trainer] epoch #475 | Computing KL after +2025-04-03 15:22:19 | [rl2_trainer] epoch #475 | Computing loss after +2025-04-03 15:22:19 | [rl2_trainer] epoch #475 | Saving snapshot... +2025-04-03 15:22:19 | [rl2_trainer] epoch #475 | Saved +2025-04-03 15:22:19 | [rl2_trainer] epoch #475 | Time 91820.47 s +2025-04-03 15:22:19 | [rl2_trainer] epoch #475 | EpochTime 155.31 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -9.78834 +Average/AverageReturn -14.0668 +Average/Iteration 475 +Average/MaxReturn 34.9047 +Average/MinReturn -31.5773 +Average/NumEpisodes 100 +Average/StdReturn 10.4421 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.465035 +TotalEnvSteps 4.76e+06 +__unnamed_task__/AverageDiscountedReturn -9.78834 +__unnamed_task__/AverageReturn -14.0668 +__unnamed_task__/Iteration 475 +__unnamed_task__/MaxReturn 34.9047 +__unnamed_task__/MinReturn -31.5773 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.4421 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.368624 +policy/KL 0.016597 +policy/KLBefore 0 +policy/LossAfter -0.0546827 +policy/LossBefore -0.0144497 +policy/dLoss 0.040233 +---------------------------------------- ----------- +2025-04-03 15:24:18 | [rl2_trainer] epoch #476 | Optimizing policy... +2025-04-03 15:24:18 | [rl2_trainer] epoch #476 | Fitting baseline... +2025-04-03 15:24:18 | [rl2_trainer] epoch #476 | Computing loss before +2025-04-03 15:24:19 | [rl2_trainer] epoch #476 | Computing KL before +2025-04-03 15:24:20 | [rl2_trainer] epoch #476 | Optimizing +2025-04-03 15:24:55 | [rl2_trainer] epoch #476 | Computing KL after +2025-04-03 15:24:56 | [rl2_trainer] epoch #476 | Computing loss after +2025-04-03 15:24:57 | [rl2_trainer] epoch #476 | Saving snapshot... +2025-04-03 15:24:57 | [rl2_trainer] epoch #476 | Saved +2025-04-03 15:24:57 | [rl2_trainer] epoch #476 | Time 91977.58 s +2025-04-03 15:24:57 | [rl2_trainer] epoch #476 | EpochTime 157.11 s +---------------------------------------- ------------- +Average/AverageDiscountedReturn -10.8661 +Average/AverageReturn -15.7643 +Average/Iteration 476 +Average/MaxReturn 24.7837 +Average/MinReturn -33.583 +Average/NumEpisodes 100 +Average/StdReturn 8.11018 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.339319 +TotalEnvSteps 4.77e+06 +__unnamed_task__/AverageDiscountedReturn -10.8661 +__unnamed_task__/AverageReturn -15.7643 +__unnamed_task__/Iteration 476 +__unnamed_task__/MaxReturn 24.7837 +__unnamed_task__/MinReturn -33.583 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.11018 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.335124 +policy/KL 0.0144787 +policy/KLBefore 0 +policy/LossAfter -0.0291119 +policy/LossBefore -0.000639203 +policy/dLoss 0.0284727 +---------------------------------------- ------------- +2025-04-03 15:27:25 | [rl2_trainer] epoch #477 | Optimizing policy... +2025-04-03 15:27:25 | [rl2_trainer] epoch #477 | Fitting baseline... +2025-04-03 15:27:25 | [rl2_trainer] epoch #477 | Computing loss before +2025-04-03 15:27:26 | [rl2_trainer] epoch #477 | Computing KL before +2025-04-03 15:27:26 | [rl2_trainer] epoch #477 | Optimizing +2025-04-03 15:28:03 | [rl2_trainer] epoch #477 | Computing KL after +2025-04-03 15:28:03 | [rl2_trainer] epoch #477 | Computing loss after +2025-04-03 15:28:04 | [rl2_trainer] epoch #477 | Saving snapshot... +2025-04-03 15:28:04 | [rl2_trainer] epoch #477 | Saved +2025-04-03 15:28:04 | [rl2_trainer] epoch #477 | Time 92165.11 s +2025-04-03 15:28:04 | [rl2_trainer] epoch #477 | EpochTime 187.53 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -8.83731 +Average/AverageReturn -13.0622 +Average/Iteration 477 +Average/MaxReturn 33.4467 +Average/MinReturn -25.4723 +Average/NumEpisodes 100 +Average/StdReturn 12.3284 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.69979 +TotalEnvSteps 4.78e+06 +__unnamed_task__/AverageDiscountedReturn -8.83731 +__unnamed_task__/AverageReturn -13.0622 +__unnamed_task__/Iteration 477 +__unnamed_task__/MaxReturn 33.4467 +__unnamed_task__/MinReturn -25.4723 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.3284 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.3166 +policy/KL 0.015744 +policy/KLBefore 0 +policy/LossAfter -0.0255962 +policy/LossBefore 0.0129715 +policy/dLoss 0.0385677 +---------------------------------------- ----------- +2025-04-03 15:30:31 | [rl2_trainer] epoch #478 | Optimizing policy... +2025-04-03 15:30:31 | [rl2_trainer] epoch #478 | Fitting baseline... +2025-04-03 15:30:31 | [rl2_trainer] epoch #478 | Computing loss before +2025-04-03 15:30:32 | [rl2_trainer] epoch #478 | Computing KL before +2025-04-03 15:30:32 | [rl2_trainer] epoch #478 | Optimizing +2025-04-03 15:31:07 | [rl2_trainer] epoch #478 | Computing KL after +2025-04-03 15:31:07 | [rl2_trainer] epoch #478 | Computing loss after +2025-04-03 15:31:08 | [rl2_trainer] epoch #478 | Saving snapshot... +2025-04-03 15:31:08 | [rl2_trainer] epoch #478 | Saved +2025-04-03 15:31:08 | [rl2_trainer] epoch #478 | Time 92349.15 s +2025-04-03 15:31:08 | [rl2_trainer] epoch #478 | EpochTime 184.04 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -7.39284 +Average/AverageReturn -10.4987 +Average/Iteration 478 +Average/MaxReturn 48.3674 +Average/MinReturn -29.0694 +Average/NumEpisodes 100 +Average/StdReturn 15.2994 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.762508 +TotalEnvSteps 4.79e+06 +__unnamed_task__/AverageDiscountedReturn -7.39284 +__unnamed_task__/AverageReturn -10.4987 +__unnamed_task__/Iteration 478 +__unnamed_task__/MaxReturn 48.3674 +__unnamed_task__/MinReturn -29.0694 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 15.2994 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.312658 +policy/KL 0.0148744 +policy/KLBefore 0 +policy/LossAfter -0.0673734 +policy/LossBefore 0.00447937 +policy/dLoss 0.0718528 +---------------------------------------- ------------ +2025-04-03 15:34:08 | [rl2_trainer] epoch #479 | Optimizing policy... +2025-04-03 15:34:08 | [rl2_trainer] epoch #479 | Fitting baseline... +2025-04-03 15:34:08 | [rl2_trainer] epoch #479 | Computing loss before +2025-04-03 15:34:08 | [rl2_trainer] epoch #479 | Computing KL before +2025-04-03 15:34:09 | [rl2_trainer] epoch #479 | Optimizing +2025-04-03 15:34:42 | [rl2_trainer] epoch #479 | Computing KL after +2025-04-03 15:34:43 | [rl2_trainer] epoch #479 | Computing loss after +2025-04-03 15:34:44 | [rl2_trainer] epoch #479 | Saving snapshot... +2025-04-03 15:34:44 | [rl2_trainer] epoch #479 | Saved +2025-04-03 15:34:44 | [rl2_trainer] epoch #479 | Time 92564.64 s +2025-04-03 15:34:44 | [rl2_trainer] epoch #479 | EpochTime 215.49 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.361 +Average/AverageReturn -16.5894 +Average/Iteration 479 +Average/MaxReturn 19.1229 +Average/MinReturn -34.9599 +Average/NumEpisodes 100 +Average/StdReturn 8.57327 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.575904 +TotalEnvSteps 4.8e+06 +__unnamed_task__/AverageDiscountedReturn -11.361 +__unnamed_task__/AverageReturn -16.5894 +__unnamed_task__/Iteration 479 +__unnamed_task__/MaxReturn 19.1229 +__unnamed_task__/MinReturn -34.9599 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.57327 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.300295 +policy/KL 0.0157922 +policy/KLBefore 0 +policy/LossAfter -0.0389757 +policy/LossBefore -0.00349732 +policy/dLoss 0.0354784 +---------------------------------------- ------------ +2025-04-03 15:37:09 | [rl2_trainer] epoch #480 | Optimizing policy... +2025-04-03 15:37:10 | [rl2_trainer] epoch #480 | Fitting baseline... +2025-04-03 15:37:10 | [rl2_trainer] epoch #480 | Computing loss before +2025-04-03 15:37:10 | [rl2_trainer] epoch #480 | Computing KL before +2025-04-03 15:37:11 | [rl2_trainer] epoch #480 | Optimizing +2025-04-03 15:37:45 | [rl2_trainer] epoch #480 | Computing KL after +2025-04-03 15:37:46 | [rl2_trainer] epoch #480 | Computing loss after +2025-04-03 15:37:46 | [rl2_trainer] epoch #480 | Saving snapshot... +2025-04-03 15:37:46 | [rl2_trainer] epoch #480 | Saved +2025-04-03 15:37:46 | [rl2_trainer] epoch #480 | Time 92747.42 s +2025-04-03 15:37:46 | [rl2_trainer] epoch #480 | EpochTime 182.77 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.2402 +Average/AverageReturn -16.4125 +Average/Iteration 480 +Average/MaxReturn 9.95464 +Average/MinReturn -30.8376 +Average/NumEpisodes 100 +Average/StdReturn 7.4106 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.645172 +TotalEnvSteps 4.81e+06 +__unnamed_task__/AverageDiscountedReturn -11.2402 +__unnamed_task__/AverageReturn -16.4125 +__unnamed_task__/Iteration 480 +__unnamed_task__/MaxReturn 9.95464 +__unnamed_task__/MinReturn -30.8376 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.4106 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.284465 +policy/KL 0.0147417 +policy/KLBefore 0 +policy/LossAfter -0.0325798 +policy/LossBefore -0.00696936 +policy/dLoss 0.0256105 +---------------------------------------- ------------ +2025-04-03 15:39:54 | [rl2_trainer] epoch #481 | Optimizing policy... +2025-04-03 15:39:54 | [rl2_trainer] epoch #481 | Fitting baseline... +2025-04-03 15:39:54 | [rl2_trainer] epoch #481 | Computing loss before +2025-04-03 15:39:55 | [rl2_trainer] epoch #481 | Computing KL before +2025-04-03 15:39:55 | [rl2_trainer] epoch #481 | Optimizing +2025-04-03 15:40:31 | [rl2_trainer] epoch #481 | Computing KL after +2025-04-03 15:40:32 | [rl2_trainer] epoch #481 | Computing loss after +2025-04-03 15:40:33 | [rl2_trainer] epoch #481 | Saving snapshot... +2025-04-03 15:40:33 | [rl2_trainer] epoch #481 | Saved +2025-04-03 15:40:33 | [rl2_trainer] epoch #481 | Time 92913.88 s +2025-04-03 15:40:33 | [rl2_trainer] epoch #481 | EpochTime 166.46 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -7.27183 +Average/AverageReturn -9.58932 +Average/Iteration 481 +Average/MaxReturn 48.7884 +Average/MinReturn -29.0447 +Average/NumEpisodes 100 +Average/StdReturn 12.6443 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.512367 +TotalEnvSteps 4.82e+06 +__unnamed_task__/AverageDiscountedReturn -7.27183 +__unnamed_task__/AverageReturn -9.58932 +__unnamed_task__/Iteration 481 +__unnamed_task__/MaxReturn 48.7884 +__unnamed_task__/MinReturn -29.0447 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.6443 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.262653 +policy/KL 0.0182046 +policy/KLBefore 0 +policy/LossAfter -0.0391409 +policy/LossBefore 0.0178479 +policy/dLoss 0.0569888 +---------------------------------------- ----------- +2025-04-03 15:43:30 | [rl2_trainer] epoch #482 | Optimizing policy... +2025-04-03 15:43:30 | [rl2_trainer] epoch #482 | Fitting baseline... +2025-04-03 15:43:30 | [rl2_trainer] epoch #482 | Computing loss before +2025-04-03 15:43:31 | [rl2_trainer] epoch #482 | Computing KL before +2025-04-03 15:43:31 | [rl2_trainer] epoch #482 | Optimizing +2025-04-03 15:44:08 | [rl2_trainer] epoch #482 | Computing KL after +2025-04-03 15:44:09 | [rl2_trainer] epoch #482 | Computing loss after +2025-04-03 15:44:10 | [rl2_trainer] epoch #482 | Saving snapshot... +2025-04-03 15:44:10 | [rl2_trainer] epoch #482 | Saved +2025-04-03 15:44:10 | [rl2_trainer] epoch #482 | Time 93130.57 s +2025-04-03 15:44:10 | [rl2_trainer] epoch #482 | EpochTime 216.69 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -11.5631 +Average/AverageReturn -17.0756 +Average/Iteration 482 +Average/MaxReturn 8.68553 +Average/MinReturn -31.1288 +Average/NumEpisodes 100 +Average/StdReturn 6.3976 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.603943 +TotalEnvSteps 4.83e+06 +__unnamed_task__/AverageDiscountedReturn -11.5631 +__unnamed_task__/AverageReturn -17.0756 +__unnamed_task__/Iteration 482 +__unnamed_task__/MaxReturn 8.68553 +__unnamed_task__/MinReturn -31.1288 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 6.3976 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.234637 +policy/KL 0.0122253 +policy/KLBefore 0 +policy/LossAfter -0.02593 +policy/LossBefore -0.0121969 +policy/dLoss 0.0137332 +---------------------------------------- ----------- +2025-04-03 15:47:26 | [rl2_trainer] epoch #483 | Optimizing policy... +2025-04-03 15:47:26 | [rl2_trainer] epoch #483 | Fitting baseline... +2025-04-03 15:47:26 | [rl2_trainer] epoch #483 | Computing loss before +2025-04-03 15:47:27 | [rl2_trainer] epoch #483 | Computing KL before +2025-04-03 15:47:27 | [rl2_trainer] epoch #483 | Optimizing +2025-04-03 15:48:03 | [rl2_trainer] epoch #483 | Computing KL after +2025-04-03 15:48:03 | [rl2_trainer] epoch #483 | Computing loss after +2025-04-03 15:48:04 | [rl2_trainer] epoch #483 | Saving snapshot... +2025-04-03 15:48:04 | [rl2_trainer] epoch #483 | Saved +2025-04-03 15:48:04 | [rl2_trainer] epoch #483 | Time 93365.24 s +2025-04-03 15:48:04 | [rl2_trainer] epoch #483 | EpochTime 234.67 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.2305 +Average/AverageReturn -27.7013 +Average/Iteration 483 +Average/MaxReturn 7.91899 +Average/MinReturn -57.1773 +Average/NumEpisodes 100 +Average/StdReturn 13.3555 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.812739 +TotalEnvSteps 4.84e+06 +__unnamed_task__/AverageDiscountedReturn -17.2305 +__unnamed_task__/AverageReturn -27.7013 +__unnamed_task__/Iteration 483 +__unnamed_task__/MaxReturn 7.91899 +__unnamed_task__/MinReturn -57.1773 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.3555 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.219143 +policy/KL 0.0217437 +policy/KLBefore 0 +policy/LossAfter -0.0436236 +policy/LossBefore -0.00184396 +policy/dLoss 0.0417796 +---------------------------------------- ------------ +2025-04-03 15:50:42 | [rl2_trainer] epoch #484 | Optimizing policy... +2025-04-03 15:50:43 | [rl2_trainer] epoch #484 | Fitting baseline... +2025-04-03 15:50:43 | [rl2_trainer] epoch #484 | Computing loss before +2025-04-03 15:50:43 | [rl2_trainer] epoch #484 | Computing KL before +2025-04-03 15:50:44 | [rl2_trainer] epoch #484 | Optimizing +2025-04-03 15:51:21 | [rl2_trainer] epoch #484 | Computing KL after +2025-04-03 15:51:21 | [rl2_trainer] epoch #484 | Computing loss after +2025-04-03 15:51:22 | [rl2_trainer] epoch #484 | Saving snapshot... +2025-04-03 15:51:22 | [rl2_trainer] epoch #484 | Saved +2025-04-03 15:51:22 | [rl2_trainer] epoch #484 | Time 93563.11 s +2025-04-03 15:51:22 | [rl2_trainer] epoch #484 | EpochTime 197.87 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -14.2529 +Average/AverageReturn -22.6002 +Average/Iteration 484 +Average/MaxReturn 38.4137 +Average/MinReturn -66.2902 +Average/NumEpisodes 100 +Average/StdReturn 24.1733 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.906915 +TotalEnvSteps 4.85e+06 +__unnamed_task__/AverageDiscountedReturn -14.2529 +__unnamed_task__/AverageReturn -22.6002 +__unnamed_task__/Iteration 484 +__unnamed_task__/MaxReturn 38.4137 +__unnamed_task__/MinReturn -66.2902 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 24.1733 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.225833 +policy/KL 0.0194892 +policy/KLBefore 0 +policy/LossAfter -0.100601 +policy/LossBefore -0.00441959 +policy/dLoss 0.0961809 +---------------------------------------- ------------ +2025-04-03 15:55:22 | [rl2_trainer] epoch #485 | Optimizing policy... +2025-04-03 15:55:23 | [rl2_trainer] epoch #485 | Fitting baseline... +2025-04-03 15:55:23 | [rl2_trainer] epoch #485 | Computing loss before +2025-04-03 15:55:23 | [rl2_trainer] epoch #485 | Computing KL before +2025-04-03 15:55:24 | [rl2_trainer] epoch #485 | Optimizing +2025-04-03 15:55:58 | [rl2_trainer] epoch #485 | Computing KL after +2025-04-03 15:55:59 | [rl2_trainer] epoch #485 | Computing loss after +2025-04-03 15:56:00 | [rl2_trainer] epoch #485 | Saving snapshot... +2025-04-03 15:56:00 | [rl2_trainer] epoch #485 | Saved +2025-04-03 15:56:00 | [rl2_trainer] epoch #485 | Time 93840.73 s +2025-04-03 15:56:00 | [rl2_trainer] epoch #485 | EpochTime 277.61 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -18.402 +Average/AverageReturn -29.6723 +Average/Iteration 485 +Average/MaxReturn -17.2357 +Average/MinReturn -72.8097 +Average/NumEpisodes 100 +Average/StdReturn 11.3671 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.852489 +TotalEnvSteps 4.86e+06 +__unnamed_task__/AverageDiscountedReturn -18.402 +__unnamed_task__/AverageReturn -29.6723 +__unnamed_task__/Iteration 485 +__unnamed_task__/MaxReturn -17.2357 +__unnamed_task__/MinReturn -72.8097 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.3671 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.229574 +policy/KL 0.0152173 +policy/KLBefore 0 +policy/LossAfter -0.0416219 +policy/LossBefore -0.0105578 +policy/dLoss 0.0310641 +---------------------------------------- ----------- +2025-04-03 15:59:49 | [rl2_trainer] epoch #486 | Optimizing policy... +2025-04-03 15:59:49 | [rl2_trainer] epoch #486 | Fitting baseline... +2025-04-03 15:59:49 | [rl2_trainer] epoch #486 | Computing loss before +2025-04-03 15:59:50 | [rl2_trainer] epoch #486 | Computing KL before +2025-04-03 15:59:51 | [rl2_trainer] epoch #486 | Optimizing +2025-04-03 16:00:27 | [rl2_trainer] epoch #486 | Computing KL after +2025-04-03 16:00:27 | [rl2_trainer] epoch #486 | Computing loss after +2025-04-03 16:00:28 | [rl2_trainer] epoch #486 | Saving snapshot... +2025-04-03 16:00:28 | [rl2_trainer] epoch #486 | Saved +2025-04-03 16:00:28 | [rl2_trainer] epoch #486 | Time 94109.37 s +2025-04-03 16:00:28 | [rl2_trainer] epoch #486 | EpochTime 268.64 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.495 +Average/AverageReturn -29.7962 +Average/Iteration 486 +Average/MaxReturn -17.2154 +Average/MinReturn -71.1731 +Average/NumEpisodes 100 +Average/StdReturn 11.7054 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.853324 +TotalEnvSteps 4.87e+06 +__unnamed_task__/AverageDiscountedReturn -18.495 +__unnamed_task__/AverageReturn -29.7962 +__unnamed_task__/Iteration 486 +__unnamed_task__/MaxReturn -17.2154 +__unnamed_task__/MinReturn -71.1731 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 11.7054 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.224794 +policy/KL 0.0135518 +policy/KLBefore 0 +policy/LossAfter -0.0528933 +policy/LossBefore -0.00559243 +policy/dLoss 0.0473009 +---------------------------------------- ------------ +2025-04-03 16:01:57 | [rl2_trainer] epoch #487 | Optimizing policy... +2025-04-03 16:01:58 | [rl2_trainer] epoch #487 | Fitting baseline... +2025-04-03 16:01:58 | [rl2_trainer] epoch #487 | Computing loss before +2025-04-03 16:01:58 | [rl2_trainer] epoch #487 | Computing KL before +2025-04-03 16:01:59 | [rl2_trainer] epoch #487 | Optimizing +2025-04-03 16:02:32 | [rl2_trainer] epoch #487 | Computing KL after +2025-04-03 16:02:33 | [rl2_trainer] epoch #487 | Computing loss after +2025-04-03 16:02:34 | [rl2_trainer] epoch #487 | Saving snapshot... +2025-04-03 16:02:34 | [rl2_trainer] epoch #487 | Saved +2025-04-03 16:02:34 | [rl2_trainer] epoch #487 | Time 94234.85 s +2025-04-03 16:02:34 | [rl2_trainer] epoch #487 | EpochTime 125.47 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.44075 +Average/AverageReturn -13.6405 +Average/Iteration 487 +Average/MaxReturn 30.1855 +Average/MinReturn -74.9349 +Average/NumEpisodes 100 +Average/StdReturn 13.2796 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.703316 +TotalEnvSteps 4.88e+06 +__unnamed_task__/AverageDiscountedReturn -9.44075 +__unnamed_task__/AverageReturn -13.6405 +__unnamed_task__/Iteration 487 +__unnamed_task__/MaxReturn 30.1855 +__unnamed_task__/MinReturn -74.9349 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.2796 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.198469 +policy/KL 0.0172292 +policy/KLBefore 0 +policy/LossAfter -0.0655397 +policy/LossBefore -0.00158597 +policy/dLoss 0.0639537 +---------------------------------------- ------------ +2025-04-03 16:04:06 | [rl2_trainer] epoch #488 | Optimizing policy... +2025-04-03 16:04:07 | [rl2_trainer] epoch #488 | Fitting baseline... +2025-04-03 16:04:07 | [rl2_trainer] epoch #488 | Computing loss before +2025-04-03 16:04:07 | [rl2_trainer] epoch #488 | Computing KL before +2025-04-03 16:04:08 | [rl2_trainer] epoch #488 | Optimizing +2025-04-03 16:04:44 | [rl2_trainer] epoch #488 | Computing KL after +2025-04-03 16:04:44 | [rl2_trainer] epoch #488 | Computing loss after +2025-04-03 16:04:45 | [rl2_trainer] epoch #488 | Saving snapshot... +2025-04-03 16:04:45 | [rl2_trainer] epoch #488 | Saved +2025-04-03 16:04:45 | [rl2_trainer] epoch #488 | Time 94366.19 s +2025-04-03 16:04:45 | [rl2_trainer] epoch #488 | EpochTime 131.34 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -8.00258 +Average/AverageReturn -11.2819 +Average/Iteration 488 +Average/MaxReturn 32.1611 +Average/MinReturn -38.0946 +Average/NumEpisodes 100 +Average/StdReturn 13.7996 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.742995 +TotalEnvSteps 4.89e+06 +__unnamed_task__/AverageDiscountedReturn -8.00258 +__unnamed_task__/AverageReturn -11.2819 +__unnamed_task__/Iteration 488 +__unnamed_task__/MaxReturn 32.1611 +__unnamed_task__/MinReturn -38.0946 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.7996 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.188789 +policy/KL 0.0195719 +policy/KLBefore 0 +policy/LossAfter -0.0828933 +policy/LossBefore 0.00209549 +policy/dLoss 0.0849888 +---------------------------------------- ------------ +2025-04-03 16:08:15 | [rl2_trainer] epoch #489 | Optimizing policy... +2025-04-03 16:08:15 | [rl2_trainer] epoch #489 | Fitting baseline... +2025-04-03 16:08:15 | [rl2_trainer] epoch #489 | Computing loss before +2025-04-03 16:08:16 | [rl2_trainer] epoch #489 | Computing KL before +2025-04-03 16:08:16 | [rl2_trainer] epoch #489 | Optimizing +2025-04-03 16:08:52 | [rl2_trainer] epoch #489 | Computing KL after +2025-04-03 16:08:53 | [rl2_trainer] epoch #489 | Computing loss after +2025-04-03 16:08:54 | [rl2_trainer] epoch #489 | Saving snapshot... +2025-04-03 16:08:54 | [rl2_trainer] epoch #489 | Saved +2025-04-03 16:08:54 | [rl2_trainer] epoch #489 | Time 94614.96 s +2025-04-03 16:08:54 | [rl2_trainer] epoch #489 | EpochTime 248.77 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -18.3532 +Average/AverageReturn -29.4842 +Average/Iteration 489 +Average/MaxReturn -15.7377 +Average/MinReturn -58.9039 +Average/NumEpisodes 100 +Average/StdReturn 10.0953 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.876715 +TotalEnvSteps 4.9e+06 +__unnamed_task__/AverageDiscountedReturn -18.3532 +__unnamed_task__/AverageReturn -29.4842 +__unnamed_task__/Iteration 489 +__unnamed_task__/MaxReturn -15.7377 +__unnamed_task__/MinReturn -58.9039 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 10.0953 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.184267 +policy/KL 0.0145873 +policy/KLBefore 0 +policy/LossAfter -0.023307 +policy/LossBefore -0.00400089 +policy/dLoss 0.0193061 +---------------------------------------- ------------ +2025-04-03 16:10:21 | [rl2_trainer] epoch #490 | Optimizing policy... +2025-04-03 16:10:21 | [rl2_trainer] epoch #490 | Fitting baseline... +2025-04-03 16:10:21 | [rl2_trainer] epoch #490 | Computing loss before +2025-04-03 16:10:22 | [rl2_trainer] epoch #490 | Computing KL before +2025-04-03 16:10:23 | [rl2_trainer] epoch #490 | Optimizing +2025-04-03 16:10:57 | [rl2_trainer] epoch #490 | Computing KL after +2025-04-03 16:10:58 | [rl2_trainer] epoch #490 | Computing loss after +2025-04-03 16:10:58 | [rl2_trainer] epoch #490 | Saving snapshot... +2025-04-03 16:10:58 | [rl2_trainer] epoch #490 | Saved +2025-04-03 16:10:58 | [rl2_trainer] epoch #490 | Time 94739.44 s +2025-04-03 16:10:58 | [rl2_trainer] epoch #490 | EpochTime 124.48 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.30157 +Average/AverageReturn -13.6077 +Average/Iteration 490 +Average/MaxReturn 17.2568 +Average/MinReturn -29.971 +Average/NumEpisodes 100 +Average/StdReturn 9.33236 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.663181 +TotalEnvSteps 4.91e+06 +__unnamed_task__/AverageDiscountedReturn -9.30157 +__unnamed_task__/AverageReturn -13.6077 +__unnamed_task__/Iteration 490 +__unnamed_task__/MaxReturn 17.2568 +__unnamed_task__/MinReturn -29.971 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 9.33236 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.151344 +policy/KL 0.0140354 +policy/KLBefore 0 +policy/LossAfter -0.029824 +policy/LossBefore 0.00381513 +policy/dLoss 0.0336392 +---------------------------------------- ------------ +2025-04-03 16:13:04 | [rl2_trainer] epoch #491 | Optimizing policy... +2025-04-03 16:13:04 | [rl2_trainer] epoch #491 | Fitting baseline... +2025-04-03 16:13:04 | [rl2_trainer] epoch #491 | Computing loss before +2025-04-03 16:13:05 | [rl2_trainer] epoch #491 | Computing KL before +2025-04-03 16:13:05 | [rl2_trainer] epoch #491 | Optimizing +2025-04-03 16:13:41 | [rl2_trainer] epoch #491 | Computing KL after +2025-04-03 16:13:42 | [rl2_trainer] epoch #491 | Computing loss after +2025-04-03 16:13:43 | [rl2_trainer] epoch #491 | Saving snapshot... +2025-04-03 16:13:43 | [rl2_trainer] epoch #491 | Saved +2025-04-03 16:13:43 | [rl2_trainer] epoch #491 | Time 94903.55 s +2025-04-03 16:13:43 | [rl2_trainer] epoch #491 | EpochTime 164.11 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -11.9683 +Average/AverageReturn -17.8345 +Average/Iteration 491 +Average/MaxReturn 19.1096 +Average/MinReturn -41.5012 +Average/NumEpisodes 100 +Average/StdReturn 7.68946 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.506248 +TotalEnvSteps 4.92e+06 +__unnamed_task__/AverageDiscountedReturn -11.9683 +__unnamed_task__/AverageReturn -17.8345 +__unnamed_task__/Iteration 491 +__unnamed_task__/MaxReturn 19.1096 +__unnamed_task__/MinReturn -41.5012 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.68946 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.128221 +policy/KL 0.0162671 +policy/KLBefore 0 +policy/LossAfter -0.0333453 +policy/LossBefore -0.00469599 +policy/dLoss 0.0286493 +---------------------------------------- ------------ +2025-04-03 16:16:05 | [rl2_trainer] epoch #492 | Optimizing policy... +2025-04-03 16:16:05 | [rl2_trainer] epoch #492 | Fitting baseline... +2025-04-03 16:16:05 | [rl2_trainer] epoch #492 | Computing loss before +2025-04-03 16:16:06 | [rl2_trainer] epoch #492 | Computing KL before +2025-04-03 16:16:06 | [rl2_trainer] epoch #492 | Optimizing +2025-04-03 16:16:42 | [rl2_trainer] epoch #492 | Computing KL after +2025-04-03 16:16:42 | [rl2_trainer] epoch #492 | Computing loss after +2025-04-03 16:16:43 | [rl2_trainer] epoch #492 | Saving snapshot... +2025-04-03 16:16:43 | [rl2_trainer] epoch #492 | Saved +2025-04-03 16:16:43 | [rl2_trainer] epoch #492 | Time 95084.13 s +2025-04-03 16:16:43 | [rl2_trainer] epoch #492 | EpochTime 180.57 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -15.8368 +Average/AverageReturn -24.6668 +Average/Iteration 492 +Average/MaxReturn 39.8114 +Average/MinReturn -64.522 +Average/NumEpisodes 100 +Average/StdReturn 19.6242 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.89308 +TotalEnvSteps 4.93e+06 +__unnamed_task__/AverageDiscountedReturn -15.8368 +__unnamed_task__/AverageReturn -24.6668 +__unnamed_task__/Iteration 492 +__unnamed_task__/MaxReturn 39.8114 +__unnamed_task__/MinReturn -64.522 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 19.6242 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.111092 +policy/KL 0.0194218 +policy/KLBefore 0 +policy/LossAfter -0.08009 +policy/LossBefore -0.0148205 +policy/dLoss 0.0652695 +---------------------------------------- ----------- +2025-04-03 16:19:36 | [rl2_trainer] epoch #493 | Optimizing policy... +2025-04-03 16:19:37 | [rl2_trainer] epoch #493 | Fitting baseline... +2025-04-03 16:19:37 | [rl2_trainer] epoch #493 | Computing loss before +2025-04-03 16:19:37 | [rl2_trainer] epoch #493 | Computing KL before +2025-04-03 16:19:38 | [rl2_trainer] epoch #493 | Optimizing +2025-04-03 16:20:13 | [rl2_trainer] epoch #493 | Computing KL after +2025-04-03 16:20:13 | [rl2_trainer] epoch #493 | Computing loss after +2025-04-03 16:20:14 | [rl2_trainer] epoch #493 | Saving snapshot... +2025-04-03 16:20:14 | [rl2_trainer] epoch #493 | Saved +2025-04-03 16:20:14 | [rl2_trainer] epoch #493 | Time 95295.17 s +2025-04-03 16:20:14 | [rl2_trainer] epoch #493 | EpochTime 211.04 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -17.7592 +Average/AverageReturn -28.4956 +Average/Iteration 493 +Average/MaxReturn 5.00064 +Average/MinReturn -57.5411 +Average/NumEpisodes 100 +Average/StdReturn 13.1221 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.804025 +TotalEnvSteps 4.94e+06 +__unnamed_task__/AverageDiscountedReturn -17.7592 +__unnamed_task__/AverageReturn -28.4956 +__unnamed_task__/Iteration 493 +__unnamed_task__/MaxReturn 5.00064 +__unnamed_task__/MinReturn -57.5411 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.1221 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.0981007 +policy/KL 0.0197341 +policy/KLBefore 0 +policy/LossAfter -0.0647372 +policy/LossBefore -0.00602634 +policy/dLoss 0.0587108 +---------------------------------------- ------------ +2025-04-03 16:21:44 | [rl2_trainer] epoch #494 | Optimizing policy... +2025-04-03 16:21:44 | [rl2_trainer] epoch #494 | Fitting baseline... +2025-04-03 16:21:44 | [rl2_trainer] epoch #494 | Computing loss before +2025-04-03 16:21:45 | [rl2_trainer] epoch #494 | Computing KL before +2025-04-03 16:21:45 | [rl2_trainer] epoch #494 | Optimizing +2025-04-03 16:22:21 | [rl2_trainer] epoch #494 | Computing KL after +2025-04-03 16:22:22 | [rl2_trainer] epoch #494 | Computing loss after +2025-04-03 16:22:23 | [rl2_trainer] epoch #494 | Saving snapshot... +2025-04-03 16:22:23 | [rl2_trainer] epoch #494 | Saved +2025-04-03 16:22:23 | [rl2_trainer] epoch #494 | Time 95424.04 s +2025-04-03 16:22:23 | [rl2_trainer] epoch #494 | EpochTime 128.86 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -8.0085 +Average/AverageReturn -11.4725 +Average/Iteration 494 +Average/MaxReturn 34.9437 +Average/MinReturn -37.9521 +Average/NumEpisodes 100 +Average/StdReturn 12.7353 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.709315 +TotalEnvSteps 4.95e+06 +__unnamed_task__/AverageDiscountedReturn -8.0085 +__unnamed_task__/AverageReturn -11.4725 +__unnamed_task__/Iteration 494 +__unnamed_task__/MaxReturn 34.9437 +__unnamed_task__/MinReturn -37.9521 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.7353 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.0754328 +policy/KL 0.0175998 +policy/KLBefore 0 +policy/LossAfter -0.0791201 +policy/LossBefore -0.0172026 +policy/dLoss 0.0619174 +---------------------------------------- ----------- +2025-04-03 16:25:22 | [rl2_trainer] epoch #495 | Optimizing policy... +2025-04-03 16:25:22 | [rl2_trainer] epoch #495 | Fitting baseline... +2025-04-03 16:25:22 | [rl2_trainer] epoch #495 | Computing loss before +2025-04-03 16:25:22 | [rl2_trainer] epoch #495 | Computing KL before +2025-04-03 16:25:23 | [rl2_trainer] epoch #495 | Optimizing +2025-04-03 16:25:58 | [rl2_trainer] epoch #495 | Computing KL after +2025-04-03 16:25:59 | [rl2_trainer] epoch #495 | Computing loss after +2025-04-03 16:26:00 | [rl2_trainer] epoch #495 | Saving snapshot... +2025-04-03 16:26:00 | [rl2_trainer] epoch #495 | Saved +2025-04-03 16:26:00 | [rl2_trainer] epoch #495 | Time 95640.95 s +2025-04-03 16:26:00 | [rl2_trainer] epoch #495 | EpochTime 216.91 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -12.4672 +Average/AverageReturn -18.5415 +Average/Iteration 495 +Average/MaxReturn 27.8838 +Average/MinReturn -33.5217 +Average/NumEpisodes 100 +Average/StdReturn 7.96274 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.660796 +TotalEnvSteps 4.96e+06 +__unnamed_task__/AverageDiscountedReturn -12.4672 +__unnamed_task__/AverageReturn -18.5415 +__unnamed_task__/Iteration 495 +__unnamed_task__/MaxReturn 27.8838 +__unnamed_task__/MinReturn -33.5217 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 7.96274 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.0614643 +policy/KL 0.0121617 +policy/KLBefore 0 +policy/LossAfter -0.0179054 +policy/LossBefore 0.0068149 +policy/dLoss 0.0247203 +---------------------------------------- ----------- +2025-04-03 16:28:28 | [rl2_trainer] epoch #496 | Optimizing policy... +2025-04-03 16:28:28 | [rl2_trainer] epoch #496 | Fitting baseline... +2025-04-03 16:28:28 | [rl2_trainer] epoch #496 | Computing loss before +2025-04-03 16:28:28 | [rl2_trainer] epoch #496 | Computing KL before +2025-04-03 16:28:29 | [rl2_trainer] epoch #496 | Optimizing +2025-04-03 16:29:05 | [rl2_trainer] epoch #496 | Computing KL after +2025-04-03 16:29:05 | [rl2_trainer] epoch #496 | Computing loss after +2025-04-03 16:29:06 | [rl2_trainer] epoch #496 | Saving snapshot... +2025-04-03 16:29:06 | [rl2_trainer] epoch #496 | Saved +2025-04-03 16:29:06 | [rl2_trainer] epoch #496 | Time 95827.38 s +2025-04-03 16:29:06 | [rl2_trainer] epoch #496 | EpochTime 186.42 s +---------------------------------------- ----------- +Average/AverageDiscountedReturn -9.77933 +Average/AverageReturn -14.3749 +Average/Iteration 496 +Average/MaxReturn 30.069 +Average/MinReturn -36.6658 +Average/NumEpisodes 100 +Average/StdReturn 12.1088 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.703331 +TotalEnvSteps 4.97e+06 +__unnamed_task__/AverageDiscountedReturn -9.77933 +__unnamed_task__/AverageReturn -14.3749 +__unnamed_task__/Iteration 496 +__unnamed_task__/MaxReturn 30.069 +__unnamed_task__/MinReturn -36.6658 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 12.1088 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.0547228 +policy/KL 0.0144121 +policy/KLBefore 0 +policy/LossAfter -0.0575565 +policy/LossBefore -0.0102652 +policy/dLoss 0.0472913 +---------------------------------------- ----------- +2025-04-03 16:30:37 | [rl2_trainer] epoch #497 | Optimizing policy... +2025-04-03 16:30:37 | [rl2_trainer] epoch #497 | Fitting baseline... +2025-04-03 16:30:37 | [rl2_trainer] epoch #497 | Computing loss before +2025-04-03 16:30:38 | [rl2_trainer] epoch #497 | Computing KL before +2025-04-03 16:30:38 | [rl2_trainer] epoch #497 | Optimizing +2025-04-03 16:31:15 | [rl2_trainer] epoch #497 | Computing KL after +2025-04-03 16:31:16 | [rl2_trainer] epoch #497 | Computing loss after +2025-04-03 16:31:16 | [rl2_trainer] epoch #497 | Saving snapshot... +2025-04-03 16:31:16 | [rl2_trainer] epoch #497 | Saved +2025-04-03 16:31:16 | [rl2_trainer] epoch #497 | Time 95957.43 s +2025-04-03 16:31:16 | [rl2_trainer] epoch #497 | EpochTime 130.05 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -7.85239 +Average/AverageReturn -10.992 +Average/Iteration 497 +Average/MaxReturn 33.9706 +Average/MinReturn -31.4259 +Average/NumEpisodes 100 +Average/StdReturn 13.3298 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.811664 +TotalEnvSteps 4.98e+06 +__unnamed_task__/AverageDiscountedReturn -7.85239 +__unnamed_task__/AverageReturn -10.992 +__unnamed_task__/Iteration 497 +__unnamed_task__/MaxReturn 33.9706 +__unnamed_task__/MinReturn -31.4259 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.3298 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.0282335 +policy/KL 0.0190111 +policy/KLBefore 0 +policy/LossAfter -0.0348932 +policy/LossBefore 0.00833141 +policy/dLoss 0.0432247 +---------------------------------------- ------------ +2025-04-03 16:33:46 | [rl2_trainer] epoch #498 | Optimizing policy... +2025-04-03 16:33:47 | [rl2_trainer] epoch #498 | Fitting baseline... +2025-04-03 16:33:47 | [rl2_trainer] epoch #498 | Computing loss before +2025-04-03 16:33:47 | [rl2_trainer] epoch #498 | Computing KL before +2025-04-03 16:33:48 | [rl2_trainer] epoch #498 | Optimizing +2025-04-03 16:34:24 | [rl2_trainer] epoch #498 | Computing KL after +2025-04-03 16:34:24 | [rl2_trainer] epoch #498 | Computing loss after +2025-04-03 16:34:25 | [rl2_trainer] epoch #498 | Saving snapshot... +2025-04-03 16:34:25 | [rl2_trainer] epoch #498 | Saved +2025-04-03 16:34:25 | [rl2_trainer] epoch #498 | Time 96146.08 s +2025-04-03 16:34:25 | [rl2_trainer] epoch #498 | EpochTime 188.64 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -9.65494 +Average/AverageReturn -14.1278 +Average/Iteration 498 +Average/MaxReturn 37.0385 +Average/MinReturn -34.3282 +Average/NumEpisodes 100 +Average/StdReturn 13.8127 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.726644 +TotalEnvSteps 4.99e+06 +__unnamed_task__/AverageDiscountedReturn -9.65494 +__unnamed_task__/AverageReturn -14.1278 +__unnamed_task__/Iteration 498 +__unnamed_task__/MaxReturn 37.0385 +__unnamed_task__/MinReturn -34.3282 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 13.8127 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.0227718 +policy/KL 0.0161568 +policy/KLBefore 0 +policy/LossAfter -0.0621175 +policy/LossBefore 0.00194205 +policy/dLoss 0.0640596 +---------------------------------------- ------------ +2025-04-03 16:37:24 | [rl2_trainer] epoch #499 | Optimizing policy... +2025-04-03 16:37:25 | [rl2_trainer] epoch #499 | Fitting baseline... +2025-04-03 16:37:25 | [rl2_trainer] epoch #499 | Computing loss before +2025-04-03 16:37:25 | [rl2_trainer] epoch #499 | Computing KL before +2025-04-03 16:37:26 | [rl2_trainer] epoch #499 | Optimizing +2025-04-03 16:38:02 | [rl2_trainer] epoch #499 | Computing KL after +2025-04-03 16:38:03 | [rl2_trainer] epoch #499 | Computing loss after +2025-04-03 16:38:04 | [rl2_trainer] epoch #499 | Saving snapshot... +2025-04-03 16:38:04 | [rl2_trainer] epoch #499 | Saved +2025-04-03 16:38:04 | [rl2_trainer] epoch #499 | Time 96364.83 s +2025-04-03 16:38:04 | [rl2_trainer] epoch #499 | EpochTime 218.74 s +---------------------------------------- ------------ +Average/AverageDiscountedReturn -13.1221 +Average/AverageReturn -19.8396 +Average/Iteration 499 +Average/MaxReturn 25.6103 +Average/MinReturn -33.7966 +Average/NumEpisodes 100 +Average/StdReturn 8.31253 +Average/TerminationRate 0 +LinearFeatureBaseline/ExplainedVariance 0.671096 +TotalEnvSteps 5e+06 +__unnamed_task__/AverageDiscountedReturn -13.1221 +__unnamed_task__/AverageReturn -19.8396 +__unnamed_task__/Iteration 499 +__unnamed_task__/MaxReturn 25.6103 +__unnamed_task__/MinReturn -33.7966 +__unnamed_task__/NumEpisodes 100 +__unnamed_task__/StdReturn 8.31253 +__unnamed_task__/TerminationRate 0 +policy/Entropy 0.0137997 +policy/KL 0.0133564 +policy/KLBefore 0 +policy/LossAfter -0.0267404 +policy/LossBefore -0.00467234 +policy/dLoss 0.022068 +---------------------------------------- ------------