diff --git "a/experiment/maml_trainer/debug.log" "b/experiment/maml_trainer/debug.log" new file mode 100644--- /dev/null +++ "b/experiment/maml_trainer/debug.log" @@ -0,0 +1,14402 @@ +2025-03-29 14:04:30 | [maml_trainer] Logging to /home/h2khalil/MetaRL-Assistive-Robotics/data/local/experiment/maml_trainer +2025-03-29 14:08:18 | [maml_trainer] Obtaining samples... +2025-03-29 14:45:41 | [maml_trainer] epoch #0 | Sampling for adapation and meta-testing... +2025-03-29 14:52:14 | [maml_trainer] epoch #0 | Finished meta-testing... +2025-03-29 14:52:14 | [maml_trainer] epoch #0 | Saving snapshot... +2025-03-29 14:53:13 | [maml_trainer] epoch #0 | Saved +2025-03-29 14:53:13 | [maml_trainer] epoch #0 | Time 2694.32 s +2025-03-29 14:53:13 | [maml_trainer] epoch #0 | EpochTime 2694.32 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -81.6842 +Average/AverageReturn -192.31 +Average/Iteration 0 +Average/MaxReturn -131.135 +Average/MinReturn -216.702 +Average/NumEpisodes 80 +Average/StdReturn 18.1135 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92564 +GaussianMLPPolicy/KLAfter 0.0257486 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.36371e-05 +GaussianMLPPolicy/LossBefore 6.16908e-09 +GaussianMLPPolicy/dLoss -8.36309e-05 +Iteration 0 +MetaTest/Average/AverageDiscountedReturn -186.585 +MetaTest/Average/AverageReturn -186.585 +MetaTest/Average/Iteration 0 +MetaTest/Average/MaxReturn -136.911 +MetaTest/Average/MinReturn -203.413 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.1941 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -186.585 +MetaTest/__unnamed_task__/AverageReturn -186.585 +MetaTest/__unnamed_task__/Iteration 0 +MetaTest/__unnamed_task__/MaxReturn -136.911 +MetaTest/__unnamed_task__/MinReturn -203.413 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.1941 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 32000 +__unnamed_task__/AverageDiscountedReturn -81.6842 +__unnamed_task__/AverageReturn -192.31 +__unnamed_task__/Iteration 0 +__unnamed_task__/MaxReturn -131.135 +__unnamed_task__/MinReturn -216.702 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.1135 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-03-29 15:26:06 | [maml_trainer] epoch #1 | Sampling for adapation and meta-testing... +2025-03-29 15:34:42 | [maml_trainer] epoch #1 | Finished meta-testing... +2025-03-29 15:34:42 | [maml_trainer] epoch #1 | Saving snapshot... +2025-03-29 15:35:01 | [maml_trainer] epoch #1 | Saved +2025-03-29 15:35:01 | [maml_trainer] epoch #1 | Time 5202.54 s +2025-03-29 15:35:01 | [maml_trainer] epoch #1 | EpochTime 2508.21 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -77.739 +Average/AverageReturn -181.998 +Average/Iteration 1 +Average/MaxReturn -114.121 +Average/MinReturn -208.155 +Average/NumEpisodes 80 +Average/StdReturn 21.1785 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92885 +GaussianMLPPolicy/KLAfter 0.0167083 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.83799e-05 +GaussianMLPPolicy/LossBefore 3.23355e-09 +GaussianMLPPolicy/dLoss -1.83766e-05 +Iteration 1 +MetaTest/Average/AverageDiscountedReturn -182.093 +MetaTest/Average/AverageReturn -182.093 +MetaTest/Average/Iteration 1 +MetaTest/Average/MaxReturn -135.252 +MetaTest/Average/MinReturn -201.663 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.923 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -182.093 +MetaTest/__unnamed_task__/AverageReturn -182.093 +MetaTest/__unnamed_task__/Iteration 1 +MetaTest/__unnamed_task__/MaxReturn -135.252 +MetaTest/__unnamed_task__/MinReturn -201.663 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.923 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 64000 +__unnamed_task__/AverageDiscountedReturn -77.739 +__unnamed_task__/AverageReturn -181.998 +__unnamed_task__/Iteration 1 +__unnamed_task__/MaxReturn -114.121 +__unnamed_task__/MinReturn -208.155 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.1785 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-03-29 15:49:54 | [maml_trainer] epoch #2 | Sampling for adapation and meta-testing... +2025-03-29 15:53:33 | [maml_trainer] epoch #2 | Finished meta-testing... +2025-03-29 15:53:33 | [maml_trainer] epoch #2 | Saving snapshot... +2025-03-29 15:53:58 | [maml_trainer] epoch #2 | Saved +2025-03-29 15:53:58 | [maml_trainer] epoch #2 | Time 6339.89 s +2025-03-29 15:53:58 | [maml_trainer] epoch #2 | EpochTime 1137.34 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -74.4718 +Average/AverageReturn -172.667 +Average/Iteration 2 +Average/MaxReturn -109.883 +Average/MinReturn -201.5 +Average/NumEpisodes 80 +Average/StdReturn 23.0811 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93337 +GaussianMLPPolicy/KLAfter 0.0174124 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.11334e-05 +GaussianMLPPolicy/LossBefore 3.14415e-09 +GaussianMLPPolicy/dLoss 7.11365e-05 +Iteration 2 +MetaTest/Average/AverageDiscountedReturn -151.168 +MetaTest/Average/AverageReturn -151.168 +MetaTest/Average/Iteration 2 +MetaTest/Average/MaxReturn -113.581 +MetaTest/Average/MinReturn -196.478 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.6446 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -151.168 +MetaTest/__unnamed_task__/AverageReturn -151.168 +MetaTest/__unnamed_task__/Iteration 2 +MetaTest/__unnamed_task__/MaxReturn -113.581 +MetaTest/__unnamed_task__/MinReturn -196.478 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.6446 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 96000 +__unnamed_task__/AverageDiscountedReturn -74.4718 +__unnamed_task__/AverageReturn -172.667 +__unnamed_task__/Iteration 2 +__unnamed_task__/MaxReturn -109.883 +__unnamed_task__/MinReturn -201.5 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.0811 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-03-29 16:08:35 | [maml_trainer] epoch #3 | Sampling for adapation and meta-testing... +2025-03-29 16:12:12 | [maml_trainer] epoch #3 | Finished meta-testing... +2025-03-29 16:12:12 | [maml_trainer] epoch #3 | Saving snapshot... +2025-03-29 16:12:30 | [maml_trainer] epoch #3 | Saved +2025-03-29 16:12:30 | [maml_trainer] epoch #3 | Time 7451.28 s +2025-03-29 16:12:30 | [maml_trainer] epoch #3 | EpochTime 1111.39 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -68.066 +Average/AverageReturn -153.78 +Average/Iteration 3 +Average/MaxReturn -107.395 +Average/MinReturn -203.582 +Average/NumEpisodes 80 +Average/StdReturn 27.6886 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93872 +GaussianMLPPolicy/KLAfter 0.0151199 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000121231 +GaussianMLPPolicy/LossBefore 5.57303e-09 +GaussianMLPPolicy/dLoss 0.000121237 +Iteration 3 +MetaTest/Average/AverageDiscountedReturn -137.086 +MetaTest/Average/AverageReturn -137.086 +MetaTest/Average/Iteration 3 +MetaTest/Average/MaxReturn -112.305 +MetaTest/Average/MinReturn -205.435 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 27.3853 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -137.086 +MetaTest/__unnamed_task__/AverageReturn -137.086 +MetaTest/__unnamed_task__/Iteration 3 +MetaTest/__unnamed_task__/MaxReturn -112.305 +MetaTest/__unnamed_task__/MinReturn -205.435 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 27.3853 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 128000 +__unnamed_task__/AverageDiscountedReturn -68.066 +__unnamed_task__/AverageReturn -153.78 +__unnamed_task__/Iteration 3 +__unnamed_task__/MaxReturn -107.395 +__unnamed_task__/MinReturn -203.582 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 27.6886 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 16:27:27 | [maml_trainer] epoch #4 | Sampling for adapation and meta-testing... +2025-03-29 16:30:59 | [maml_trainer] epoch #4 | Finished meta-testing... +2025-03-29 16:30:59 | [maml_trainer] epoch #4 | Saving snapshot... +2025-03-29 16:31:17 | [maml_trainer] epoch #4 | Saved +2025-03-29 16:31:17 | [maml_trainer] epoch #4 | Time 8578.73 s +2025-03-29 16:31:17 | [maml_trainer] epoch #4 | EpochTime 1127.45 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -61.5336 +Average/AverageReturn -137.048 +Average/Iteration 4 +Average/MaxReturn -100.233 +Average/MinReturn -190.829 +Average/NumEpisodes 80 +Average/StdReturn 24.0293 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94401 +GaussianMLPPolicy/KLAfter 0.0170275 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000103669 +GaussianMLPPolicy/LossBefore 1.43051e-09 +GaussianMLPPolicy/dLoss 0.000103671 +Iteration 4 +MetaTest/Average/AverageDiscountedReturn -117.73 +MetaTest/Average/AverageReturn -117.73 +MetaTest/Average/Iteration 4 +MetaTest/Average/MaxReturn -95.9234 +MetaTest/Average/MinReturn -139.476 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.6439 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -117.73 +MetaTest/__unnamed_task__/AverageReturn -117.73 +MetaTest/__unnamed_task__/Iteration 4 +MetaTest/__unnamed_task__/MaxReturn -95.9234 +MetaTest/__unnamed_task__/MinReturn -139.476 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.6439 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 160000 +__unnamed_task__/AverageDiscountedReturn -61.5336 +__unnamed_task__/AverageReturn -137.048 +__unnamed_task__/Iteration 4 +__unnamed_task__/MaxReturn -100.233 +__unnamed_task__/MinReturn -190.829 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.0293 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 16:45:56 | [maml_trainer] epoch #5 | Sampling for adapation and meta-testing... +2025-03-29 16:49:29 | [maml_trainer] epoch #5 | Finished meta-testing... +2025-03-29 16:49:29 | [maml_trainer] epoch #5 | Saving snapshot... +2025-03-29 16:49:48 | [maml_trainer] epoch #5 | Saved +2025-03-29 16:49:48 | [maml_trainer] epoch #5 | Time 9689.42 s +2025-03-29 16:49:48 | [maml_trainer] epoch #5 | EpochTime 1110.69 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -54.6357 +Average/AverageReturn -118.954 +Average/Iteration 5 +Average/MaxReturn -96.6705 +Average/MinReturn -173.076 +Average/NumEpisodes 80 +Average/StdReturn 14.651 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94919 +GaussianMLPPolicy/KLAfter 0.0155094 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.49818e-05 +GaussianMLPPolicy/LossBefore -1.69873e-09 +GaussianMLPPolicy/dLoss 8.49801e-05 +Iteration 5 +MetaTest/Average/AverageDiscountedReturn -113.253 +MetaTest/Average/AverageReturn -113.253 +MetaTest/Average/Iteration 5 +MetaTest/Average/MaxReturn -93.6043 +MetaTest/Average/MinReturn -127.387 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.6004 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -113.253 +MetaTest/__unnamed_task__/AverageReturn -113.253 +MetaTest/__unnamed_task__/Iteration 5 +MetaTest/__unnamed_task__/MaxReturn -93.6043 +MetaTest/__unnamed_task__/MinReturn -127.387 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.6004 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 192000 +__unnamed_task__/AverageDiscountedReturn -54.6357 +__unnamed_task__/AverageReturn -118.954 +__unnamed_task__/Iteration 5 +__unnamed_task__/MaxReturn -96.6705 +__unnamed_task__/MinReturn -173.076 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.651 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 17:04:03 | [maml_trainer] epoch #6 | Sampling for adapation and meta-testing... +2025-03-29 17:07:37 | [maml_trainer] epoch #6 | Finished meta-testing... +2025-03-29 17:07:37 | [maml_trainer] epoch #6 | Saving snapshot... +2025-03-29 17:07:56 | [maml_trainer] epoch #6 | Saved +2025-03-29 17:07:56 | [maml_trainer] epoch #6 | Time 10777.62 s +2025-03-29 17:07:56 | [maml_trainer] epoch #6 | EpochTime 1088.20 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -51.8901 +Average/AverageReturn -112.45 +Average/Iteration 6 +Average/MaxReturn -93.8763 +Average/MinReturn -165.547 +Average/NumEpisodes 80 +Average/StdReturn 9.29064 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95488 +GaussianMLPPolicy/KLAfter 0.0127908 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.05952e-05 +GaussianMLPPolicy/LossBefore -4.73857e-09 +GaussianMLPPolicy/dLoss 4.05904e-05 +Iteration 6 +MetaTest/Average/AverageDiscountedReturn -106.046 +MetaTest/Average/AverageReturn -106.046 +MetaTest/Average/Iteration 6 +MetaTest/Average/MaxReturn -91.1024 +MetaTest/Average/MinReturn -123.493 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.57805 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -106.046 +MetaTest/__unnamed_task__/AverageReturn -106.046 +MetaTest/__unnamed_task__/Iteration 6 +MetaTest/__unnamed_task__/MaxReturn -91.1024 +MetaTest/__unnamed_task__/MinReturn -123.493 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.57805 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 224000 +__unnamed_task__/AverageDiscountedReturn -51.8901 +__unnamed_task__/AverageReturn -112.45 +__unnamed_task__/Iteration 6 +__unnamed_task__/MaxReturn -93.8763 +__unnamed_task__/MinReturn -165.547 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.29064 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 17:22:14 | [maml_trainer] epoch #7 | Sampling for adapation and meta-testing... +2025-03-29 17:25:50 | [maml_trainer] epoch #7 | Finished meta-testing... +2025-03-29 17:25:50 | [maml_trainer] epoch #7 | Saving snapshot... +2025-03-29 17:26:08 | [maml_trainer] epoch #7 | Saved +2025-03-29 17:26:08 | [maml_trainer] epoch #7 | Time 11869.61 s +2025-03-29 17:26:08 | [maml_trainer] epoch #7 | EpochTime 1091.99 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -49.7706 +Average/AverageReturn -106.593 +Average/Iteration 7 +Average/MaxReturn -84.0775 +Average/MinReturn -152.182 +Average/NumEpisodes 80 +Average/StdReturn 9.30248 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96089 +GaussianMLPPolicy/KLAfter 0.00990651 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000145924 +GaussianMLPPolicy/LossBefore 2.80142e-09 +GaussianMLPPolicy/dLoss 0.000145927 +Iteration 7 +MetaTest/Average/AverageDiscountedReturn -101.288 +MetaTest/Average/AverageReturn -101.288 +MetaTest/Average/Iteration 7 +MetaTest/Average/MaxReturn -84.6947 +MetaTest/Average/MinReturn -112.773 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.69814 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -101.288 +MetaTest/__unnamed_task__/AverageReturn -101.288 +MetaTest/__unnamed_task__/Iteration 7 +MetaTest/__unnamed_task__/MaxReturn -84.6947 +MetaTest/__unnamed_task__/MinReturn -112.773 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.69814 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 256000 +__unnamed_task__/AverageDiscountedReturn -49.7706 +__unnamed_task__/AverageReturn -106.593 +__unnamed_task__/Iteration 7 +__unnamed_task__/MaxReturn -84.0775 +__unnamed_task__/MinReturn -152.182 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.30248 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 17:40:33 | [maml_trainer] epoch #8 | Sampling for adapation and meta-testing... +2025-03-29 17:44:02 | [maml_trainer] epoch #8 | Finished meta-testing... +2025-03-29 17:44:02 | [maml_trainer] epoch #8 | Saving snapshot... +2025-03-29 17:44:20 | [maml_trainer] epoch #8 | Saved +2025-03-29 17:44:20 | [maml_trainer] epoch #8 | Time 12962.01 s +2025-03-29 17:44:20 | [maml_trainer] epoch #8 | EpochTime 1092.40 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -48.0997 +Average/AverageReturn -101.737 +Average/Iteration 8 +Average/MaxReturn -86.0529 +Average/MinReturn -122.63 +Average/NumEpisodes 80 +Average/StdReturn 7.35582 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96613 +GaussianMLPPolicy/KLAfter 0.00941518 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000107881 +GaussianMLPPolicy/LossBefore 1.78814e-10 +GaussianMLPPolicy/dLoss -0.000107881 +Iteration 8 +MetaTest/Average/AverageDiscountedReturn -96.4115 +MetaTest/Average/AverageReturn -96.4115 +MetaTest/Average/Iteration 8 +MetaTest/Average/MaxReturn -84.5984 +MetaTest/Average/MinReturn -113.651 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.14611 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -96.4115 +MetaTest/__unnamed_task__/AverageReturn -96.4115 +MetaTest/__unnamed_task__/Iteration 8 +MetaTest/__unnamed_task__/MaxReturn -84.5984 +MetaTest/__unnamed_task__/MinReturn -113.651 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.14611 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 288000 +__unnamed_task__/AverageDiscountedReturn -48.0997 +__unnamed_task__/AverageReturn -101.737 +__unnamed_task__/Iteration 8 +__unnamed_task__/MaxReturn -86.0529 +__unnamed_task__/MinReturn -122.63 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.35582 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 17:58:24 | [maml_trainer] epoch #9 | Sampling for adapation and meta-testing... +2025-03-29 18:02:02 | [maml_trainer] epoch #9 | Finished meta-testing... +2025-03-29 18:02:02 | [maml_trainer] epoch #9 | Saving snapshot... +2025-03-29 18:02:21 | [maml_trainer] epoch #9 | Saved +2025-03-29 18:02:21 | [maml_trainer] epoch #9 | Time 14042.66 s +2025-03-29 18:02:21 | [maml_trainer] epoch #9 | EpochTime 1080.64 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -47.5626 +Average/AverageReturn -99.9979 +Average/Iteration 9 +Average/MaxReturn -77.0455 +Average/MinReturn -119.184 +Average/NumEpisodes 80 +Average/StdReturn 8.46917 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97179 +GaussianMLPPolicy/KLAfter 0.0122861 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000130503 +GaussianMLPPolicy/LossBefore 1.2815e-09 +GaussianMLPPolicy/dLoss 0.000130504 +Iteration 9 +MetaTest/Average/AverageDiscountedReturn -97.5832 +MetaTest/Average/AverageReturn -97.5832 +MetaTest/Average/Iteration 9 +MetaTest/Average/MaxReturn -75.1328 +MetaTest/Average/MinReturn -111.216 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.13949 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -97.5832 +MetaTest/__unnamed_task__/AverageReturn -97.5832 +MetaTest/__unnamed_task__/Iteration 9 +MetaTest/__unnamed_task__/MaxReturn -75.1328 +MetaTest/__unnamed_task__/MinReturn -111.216 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.13949 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 320000 +__unnamed_task__/AverageDiscountedReturn -47.5626 +__unnamed_task__/AverageReturn -99.9979 +__unnamed_task__/Iteration 9 +__unnamed_task__/MaxReturn -77.0455 +__unnamed_task__/MinReturn -119.184 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.46917 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 18:16:45 | [maml_trainer] epoch #10 | Sampling for adapation and meta-testing... +2025-03-29 18:20:22 | [maml_trainer] epoch #10 | Finished meta-testing... +2025-03-29 18:20:22 | [maml_trainer] epoch #10 | Saving snapshot... +2025-03-29 18:20:41 | [maml_trainer] epoch #10 | Saved +2025-03-29 18:20:41 | [maml_trainer] epoch #10 | Time 15142.11 s +2025-03-29 18:20:41 | [maml_trainer] epoch #10 | EpochTime 1099.45 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -46.0745 +Average/AverageReturn -96.256 +Average/Iteration 10 +Average/MaxReturn -80.0254 +Average/MinReturn -106.699 +Average/NumEpisodes 80 +Average/StdReturn 6.48753 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97581 +GaussianMLPPolicy/KLAfter 0.00679781 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.39427e-05 +GaussianMLPPolicy/LossBefore 1.77026e-08 +GaussianMLPPolicy/dLoss -3.3925e-05 +Iteration 10 +MetaTest/Average/AverageDiscountedReturn -94.5052 +MetaTest/Average/AverageReturn -94.5052 +MetaTest/Average/Iteration 10 +MetaTest/Average/MaxReturn -84.0868 +MetaTest/Average/MinReturn -105.103 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.40841 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -94.5052 +MetaTest/__unnamed_task__/AverageReturn -94.5052 +MetaTest/__unnamed_task__/Iteration 10 +MetaTest/__unnamed_task__/MaxReturn -84.0868 +MetaTest/__unnamed_task__/MinReturn -105.103 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.40841 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 352000 +__unnamed_task__/AverageDiscountedReturn -46.0745 +__unnamed_task__/AverageReturn -96.256 +__unnamed_task__/Iteration 10 +__unnamed_task__/MaxReturn -80.0254 +__unnamed_task__/MinReturn -106.699 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.48753 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 18:35:06 | [maml_trainer] epoch #11 | Sampling for adapation and meta-testing... +2025-03-29 18:38:45 | [maml_trainer] epoch #11 | Finished meta-testing... +2025-03-29 18:38:45 | [maml_trainer] epoch #11 | Saving snapshot... +2025-03-29 18:39:03 | [maml_trainer] epoch #11 | Saved +2025-03-29 18:39:03 | [maml_trainer] epoch #11 | Time 16244.92 s +2025-03-29 18:39:03 | [maml_trainer] epoch #11 | EpochTime 1102.81 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -45.5352 +Average/AverageReturn -94.5383 +Average/Iteration 11 +Average/MaxReturn -73.6762 +Average/MinReturn -109.491 +Average/NumEpisodes 80 +Average/StdReturn 8.25946 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9773 +GaussianMLPPolicy/KLAfter 0.0047538 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.09733e-05 +GaussianMLPPolicy/LossBefore -1.01626e-08 +GaussianMLPPolicy/dLoss 7.09632e-05 +Iteration 11 +MetaTest/Average/AverageDiscountedReturn -93.5671 +MetaTest/Average/AverageReturn -93.5671 +MetaTest/Average/Iteration 11 +MetaTest/Average/MaxReturn -77.8634 +MetaTest/Average/MinReturn -111.295 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.69372 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -93.5671 +MetaTest/__unnamed_task__/AverageReturn -93.5671 +MetaTest/__unnamed_task__/Iteration 11 +MetaTest/__unnamed_task__/MaxReturn -77.8634 +MetaTest/__unnamed_task__/MinReturn -111.295 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.69372 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 384000 +__unnamed_task__/AverageDiscountedReturn -45.5352 +__unnamed_task__/AverageReturn -94.5383 +__unnamed_task__/Iteration 11 +__unnamed_task__/MaxReturn -73.6762 +__unnamed_task__/MinReturn -109.491 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.25946 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 18:53:30 | [maml_trainer] epoch #12 | Sampling for adapation and meta-testing... +2025-03-29 18:57:07 | [maml_trainer] epoch #12 | Finished meta-testing... +2025-03-29 18:57:07 | [maml_trainer] epoch #12 | Saving snapshot... +2025-03-29 18:57:26 | [maml_trainer] epoch #12 | Saved +2025-03-29 18:57:26 | [maml_trainer] epoch #12 | Time 17347.50 s +2025-03-29 18:57:26 | [maml_trainer] epoch #12 | EpochTime 1102.58 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -44.2988 +Average/AverageReturn -91.641 +Average/Iteration 12 +Average/MaxReturn -74.2556 +Average/MinReturn -107.967 +Average/NumEpisodes 80 +Average/StdReturn 7.70799 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97989 +GaussianMLPPolicy/KLAfter 0.00380214 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.29006e-05 +GaussianMLPPolicy/LossBefore -6.3777e-09 +GaussianMLPPolicy/dLoss 7.28943e-05 +Iteration 12 +MetaTest/Average/AverageDiscountedReturn -90.1427 +MetaTest/Average/AverageReturn -90.1427 +MetaTest/Average/Iteration 12 +MetaTest/Average/MaxReturn -75.1154 +MetaTest/Average/MinReturn -105.122 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.42643 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -90.1427 +MetaTest/__unnamed_task__/AverageReturn -90.1427 +MetaTest/__unnamed_task__/Iteration 12 +MetaTest/__unnamed_task__/MaxReturn -75.1154 +MetaTest/__unnamed_task__/MinReturn -105.122 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.42643 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 416000 +__unnamed_task__/AverageDiscountedReturn -44.2988 +__unnamed_task__/AverageReturn -91.641 +__unnamed_task__/Iteration 12 +__unnamed_task__/MaxReturn -74.2556 +__unnamed_task__/MinReturn -107.967 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.70799 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 19:11:51 | [maml_trainer] epoch #13 | Sampling for adapation and meta-testing... +2025-03-29 19:15:27 | [maml_trainer] epoch #13 | Finished meta-testing... +2025-03-29 19:15:27 | [maml_trainer] epoch #13 | Saving snapshot... +2025-03-29 19:15:46 | [maml_trainer] epoch #13 | Saved +2025-03-29 19:15:46 | [maml_trainer] epoch #13 | Time 18447.05 s +2025-03-29 19:15:46 | [maml_trainer] epoch #13 | EpochTime 1099.55 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -44.4211 +Average/AverageReturn -91.7099 +Average/Iteration 13 +Average/MaxReturn -77.8717 +Average/MinReturn -110.913 +Average/NumEpisodes 80 +Average/StdReturn 7.10949 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98314 +GaussianMLPPolicy/KLAfter 0.00589673 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.93649e-05 +GaussianMLPPolicy/LossBefore -2.68221e-10 +GaussianMLPPolicy/dLoss -6.93651e-05 +Iteration 13 +MetaTest/Average/AverageDiscountedReturn -89.5117 +MetaTest/Average/AverageReturn -89.5117 +MetaTest/Average/Iteration 13 +MetaTest/Average/MaxReturn -76.6584 +MetaTest/Average/MinReturn -111.179 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.78343 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -89.5117 +MetaTest/__unnamed_task__/AverageReturn -89.5117 +MetaTest/__unnamed_task__/Iteration 13 +MetaTest/__unnamed_task__/MaxReturn -76.6584 +MetaTest/__unnamed_task__/MinReturn -111.179 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.78343 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 448000 +__unnamed_task__/AverageDiscountedReturn -44.4211 +__unnamed_task__/AverageReturn -91.7099 +__unnamed_task__/Iteration 13 +__unnamed_task__/MaxReturn -77.8717 +__unnamed_task__/MinReturn -110.913 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.10949 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 19:30:00 | [maml_trainer] epoch #14 | Sampling for adapation and meta-testing... +2025-03-29 19:33:38 | [maml_trainer] epoch #14 | Finished meta-testing... +2025-03-29 19:33:38 | [maml_trainer] epoch #14 | Saving snapshot... +2025-03-29 19:33:58 | [maml_trainer] epoch #14 | Saved +2025-03-29 19:33:58 | [maml_trainer] epoch #14 | Time 19539.05 s +2025-03-29 19:33:58 | [maml_trainer] epoch #14 | EpochTime 1091.99 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -44.184 +Average/AverageReturn -91.3549 +Average/Iteration 14 +Average/MaxReturn -74.7981 +Average/MinReturn -112.451 +Average/NumEpisodes 80 +Average/StdReturn 7.34246 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98566 +GaussianMLPPolicy/KLAfter 0.00554289 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000135135 +GaussianMLPPolicy/LossBefore 7.83801e-09 +GaussianMLPPolicy/dLoss 0.000135143 +Iteration 14 +MetaTest/Average/AverageDiscountedReturn -85.9367 +MetaTest/Average/AverageReturn -85.9367 +MetaTest/Average/Iteration 14 +MetaTest/Average/MaxReturn -76.0258 +MetaTest/Average/MinReturn -98.301 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.71631 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -85.9367 +MetaTest/__unnamed_task__/AverageReturn -85.9367 +MetaTest/__unnamed_task__/Iteration 14 +MetaTest/__unnamed_task__/MaxReturn -76.0258 +MetaTest/__unnamed_task__/MinReturn -98.301 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.71631 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 480000 +__unnamed_task__/AverageDiscountedReturn -44.184 +__unnamed_task__/AverageReturn -91.3549 +__unnamed_task__/Iteration 14 +__unnamed_task__/MaxReturn -74.7981 +__unnamed_task__/MinReturn -112.451 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.34246 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 19:48:35 | [maml_trainer] epoch #15 | Sampling for adapation and meta-testing... +2025-03-29 19:52:25 | [maml_trainer] epoch #15 | Finished meta-testing... +2025-03-29 19:52:25 | [maml_trainer] epoch #15 | Saving snapshot... +2025-03-29 19:52:43 | [maml_trainer] epoch #15 | Saved +2025-03-29 19:52:43 | [maml_trainer] epoch #15 | Time 20664.63 s +2025-03-29 19:52:43 | [maml_trainer] epoch #15 | EpochTime 1125.58 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -42.9182 +Average/AverageReturn -88.7597 +Average/Iteration 15 +Average/MaxReturn -68.5596 +Average/MinReturn -106.585 +Average/NumEpisodes 80 +Average/StdReturn 7.74858 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98738 +GaussianMLPPolicy/KLAfter 0.00657281 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.89444e-05 +GaussianMLPPolicy/LossBefore -4.73857e-09 +GaussianMLPPolicy/dLoss 3.89396e-05 +Iteration 15 +MetaTest/Average/AverageDiscountedReturn -87.4191 +MetaTest/Average/AverageReturn -87.4191 +MetaTest/Average/Iteration 15 +MetaTest/Average/MaxReturn -76.7098 +MetaTest/Average/MinReturn -96.9215 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.4876 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -87.4191 +MetaTest/__unnamed_task__/AverageReturn -87.4191 +MetaTest/__unnamed_task__/Iteration 15 +MetaTest/__unnamed_task__/MaxReturn -76.7098 +MetaTest/__unnamed_task__/MinReturn -96.9215 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.4876 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 512000 +__unnamed_task__/AverageDiscountedReturn -42.9182 +__unnamed_task__/AverageReturn -88.7597 +__unnamed_task__/Iteration 15 +__unnamed_task__/MaxReturn -68.5596 +__unnamed_task__/MinReturn -106.585 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.74858 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 20:08:03 | [maml_trainer] epoch #16 | Sampling for adapation and meta-testing... +2025-03-29 20:12:07 | [maml_trainer] epoch #16 | Finished meta-testing... +2025-03-29 20:12:07 | [maml_trainer] epoch #16 | Saving snapshot... +2025-03-29 20:12:26 | [maml_trainer] epoch #16 | Saved +2025-03-29 20:12:26 | [maml_trainer] epoch #16 | Time 21847.45 s +2025-03-29 20:12:26 | [maml_trainer] epoch #16 | EpochTime 1182.81 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -42.1616 +Average/AverageReturn -86.8405 +Average/Iteration 16 +Average/MaxReturn -68.4834 +Average/MinReturn -106.56 +Average/NumEpisodes 80 +Average/StdReturn 8.69316 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98808 +GaussianMLPPolicy/KLAfter 0.00443145 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.56231e-05 +GaussianMLPPolicy/LossBefore 5.66244e-10 +GaussianMLPPolicy/dLoss 8.56237e-05 +Iteration 16 +MetaTest/Average/AverageDiscountedReturn -85.452 +MetaTest/Average/AverageReturn -85.452 +MetaTest/Average/Iteration 16 +MetaTest/Average/MaxReturn -72.4457 +MetaTest/Average/MinReturn -97.156 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.8053 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -85.452 +MetaTest/__unnamed_task__/AverageReturn -85.452 +MetaTest/__unnamed_task__/Iteration 16 +MetaTest/__unnamed_task__/MaxReturn -72.4457 +MetaTest/__unnamed_task__/MinReturn -97.156 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.8053 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 544000 +__unnamed_task__/AverageDiscountedReturn -42.1616 +__unnamed_task__/AverageReturn -86.8405 +__unnamed_task__/Iteration 16 +__unnamed_task__/MaxReturn -68.4834 +__unnamed_task__/MinReturn -106.56 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.69316 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 20:29:05 | [maml_trainer] epoch #17 | Sampling for adapation and meta-testing... +2025-03-29 20:32:47 | [maml_trainer] epoch #17 | Finished meta-testing... +2025-03-29 20:32:47 | [maml_trainer] epoch #17 | Saving snapshot... +2025-03-29 20:33:06 | [maml_trainer] epoch #17 | Saved +2025-03-29 20:33:06 | [maml_trainer] epoch #17 | Time 23087.69 s +2025-03-29 20:33:06 | [maml_trainer] epoch #17 | EpochTime 1240.24 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -42.0064 +Average/AverageReturn -86.4679 +Average/Iteration 17 +Average/MaxReturn -64.9759 +Average/MinReturn -106.147 +Average/NumEpisodes 80 +Average/StdReturn 7.44741 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98852 +GaussianMLPPolicy/KLAfter 0.00423174 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.23616e-05 +GaussianMLPPolicy/LossBefore 4.52995e-09 +GaussianMLPPolicy/dLoss -1.2357e-05 +Iteration 17 +MetaTest/Average/AverageDiscountedReturn -85.5974 +MetaTest/Average/AverageReturn -85.5974 +MetaTest/Average/Iteration 17 +MetaTest/Average/MaxReturn -76.2361 +MetaTest/Average/MinReturn -94.6891 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.06291 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -85.5974 +MetaTest/__unnamed_task__/AverageReturn -85.5974 +MetaTest/__unnamed_task__/Iteration 17 +MetaTest/__unnamed_task__/MaxReturn -76.2361 +MetaTest/__unnamed_task__/MinReturn -94.6891 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.06291 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 576000 +__unnamed_task__/AverageDiscountedReturn -42.0064 +__unnamed_task__/AverageReturn -86.4679 +__unnamed_task__/Iteration 17 +__unnamed_task__/MaxReturn -64.9759 +__unnamed_task__/MinReturn -106.147 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.44741 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 20:48:04 | [maml_trainer] epoch #18 | Sampling for adapation and meta-testing... +2025-03-29 20:51:48 | [maml_trainer] epoch #18 | Finished meta-testing... +2025-03-29 20:51:48 | [maml_trainer] epoch #18 | Saving snapshot... +2025-03-29 20:52:08 | [maml_trainer] epoch #18 | Saved +2025-03-29 20:52:08 | [maml_trainer] epoch #18 | Time 24229.44 s +2025-03-29 20:52:08 | [maml_trainer] epoch #18 | EpochTime 1141.75 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -40.5412 +Average/AverageReturn -83.3442 +Average/Iteration 18 +Average/MaxReturn -67.843 +Average/MinReturn -104.212 +Average/NumEpisodes 80 +Average/StdReturn 8.31696 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98739 +GaussianMLPPolicy/KLAfter 0.00504685 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.7662e-05 +GaussianMLPPolicy/LossBefore 2.01762e-08 +GaussianMLPPolicy/dLoss -2.76419e-05 +Iteration 18 +MetaTest/Average/AverageDiscountedReturn -83.9848 +MetaTest/Average/AverageReturn -83.9848 +MetaTest/Average/Iteration 18 +MetaTest/Average/MaxReturn -67.131 +MetaTest/Average/MinReturn -97.3472 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.33707 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -83.9848 +MetaTest/__unnamed_task__/AverageReturn -83.9848 +MetaTest/__unnamed_task__/Iteration 18 +MetaTest/__unnamed_task__/MaxReturn -67.131 +MetaTest/__unnamed_task__/MinReturn -97.3472 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.33707 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 608000 +__unnamed_task__/AverageDiscountedReturn -40.5412 +__unnamed_task__/AverageReturn -83.3442 +__unnamed_task__/Iteration 18 +__unnamed_task__/MaxReturn -67.843 +__unnamed_task__/MinReturn -104.212 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.31696 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 21:07:42 | [maml_trainer] epoch #19 | Sampling for adapation and meta-testing... +2025-03-29 21:12:14 | [maml_trainer] epoch #19 | Finished meta-testing... +2025-03-29 21:12:14 | [maml_trainer] epoch #19 | Saving snapshot... +2025-03-29 21:12:40 | [maml_trainer] epoch #19 | Saved +2025-03-29 21:12:40 | [maml_trainer] epoch #19 | Time 25461.56 s +2025-03-29 21:12:40 | [maml_trainer] epoch #19 | EpochTime 1232.12 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -40.293 +Average/AverageReturn -81.9016 +Average/Iteration 19 +Average/MaxReturn -62.9009 +Average/MinReturn -98.1728 +Average/NumEpisodes 80 +Average/StdReturn 8.32451 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98773 +GaussianMLPPolicy/KLAfter 0.00653278 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.03172e-05 +GaussianMLPPolicy/LossBefore -4.20213e-09 +GaussianMLPPolicy/dLoss -5.03214e-05 +Iteration 19 +MetaTest/Average/AverageDiscountedReturn -77.915 +MetaTest/Average/AverageReturn -77.915 +MetaTest/Average/Iteration 19 +MetaTest/Average/MaxReturn -62.651 +MetaTest/Average/MinReturn -94.5604 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.52851 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -77.915 +MetaTest/__unnamed_task__/AverageReturn -77.915 +MetaTest/__unnamed_task__/Iteration 19 +MetaTest/__unnamed_task__/MaxReturn -62.651 +MetaTest/__unnamed_task__/MinReturn -94.5604 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.52851 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 640000 +__unnamed_task__/AverageDiscountedReturn -40.293 +__unnamed_task__/AverageReturn -81.9016 +__unnamed_task__/Iteration 19 +__unnamed_task__/MaxReturn -62.9009 +__unnamed_task__/MinReturn -98.1728 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.32451 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 21:29:07 | [maml_trainer] epoch #20 | Sampling for adapation and meta-testing... +2025-03-29 21:33:28 | [maml_trainer] epoch #20 | Finished meta-testing... +2025-03-29 21:33:28 | [maml_trainer] epoch #20 | Saving snapshot... +2025-03-29 21:33:49 | [maml_trainer] epoch #20 | Saved +2025-03-29 21:33:49 | [maml_trainer] epoch #20 | Time 26730.45 s +2025-03-29 21:33:49 | [maml_trainer] epoch #20 | EpochTime 1268.88 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -40.5124 +Average/AverageReturn -82.7279 +Average/Iteration 20 +Average/MaxReturn -64.2478 +Average/MinReturn -104.283 +Average/NumEpisodes 80 +Average/StdReturn 7.62047 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98808 +GaussianMLPPolicy/KLAfter 0.00588516 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.023e-05 +GaussianMLPPolicy/LossBefore -2.44379e-09 +GaussianMLPPolicy/dLoss 4.02276e-05 +Iteration 20 +MetaTest/Average/AverageDiscountedReturn -78.9486 +MetaTest/Average/AverageReturn -78.9486 +MetaTest/Average/Iteration 20 +MetaTest/Average/MaxReturn -64.2495 +MetaTest/Average/MinReturn -97.0389 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.77431 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -78.9486 +MetaTest/__unnamed_task__/AverageReturn -78.9486 +MetaTest/__unnamed_task__/Iteration 20 +MetaTest/__unnamed_task__/MaxReturn -64.2495 +MetaTest/__unnamed_task__/MinReturn -97.0389 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.77431 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 672000 +__unnamed_task__/AverageDiscountedReturn -40.5124 +__unnamed_task__/AverageReturn -82.7279 +__unnamed_task__/Iteration 20 +__unnamed_task__/MaxReturn -64.2478 +__unnamed_task__/MinReturn -104.283 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.62047 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 21:50:05 | [maml_trainer] epoch #21 | Sampling for adapation and meta-testing... +2025-03-29 21:53:55 | [maml_trainer] epoch #21 | Finished meta-testing... +2025-03-29 21:53:55 | [maml_trainer] epoch #21 | Saving snapshot... +2025-03-29 21:54:14 | [maml_trainer] epoch #21 | Saved +2025-03-29 21:54:14 | [maml_trainer] epoch #21 | Time 27955.67 s +2025-03-29 21:54:14 | [maml_trainer] epoch #21 | EpochTime 1225.22 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -39.1371 +Average/AverageReturn -79.3492 +Average/Iteration 21 +Average/MaxReturn -64.0934 +Average/MinReturn -97.2364 +Average/NumEpisodes 80 +Average/StdReturn 7.46332 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98958 +GaussianMLPPolicy/KLAfter 0.00621178 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000169701 +GaussianMLPPolicy/LossBefore 1.78814e-09 +GaussianMLPPolicy/dLoss 0.000169702 +Iteration 21 +MetaTest/Average/AverageDiscountedReturn -81.293 +MetaTest/Average/AverageReturn -81.293 +MetaTest/Average/Iteration 21 +MetaTest/Average/MaxReturn -61.4477 +MetaTest/Average/MinReturn -97.6531 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.27894 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -81.293 +MetaTest/__unnamed_task__/AverageReturn -81.293 +MetaTest/__unnamed_task__/Iteration 21 +MetaTest/__unnamed_task__/MaxReturn -61.4477 +MetaTest/__unnamed_task__/MinReturn -97.6531 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.27894 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 704000 +__unnamed_task__/AverageDiscountedReturn -39.1371 +__unnamed_task__/AverageReturn -79.3492 +__unnamed_task__/Iteration 21 +__unnamed_task__/MaxReturn -64.0934 +__unnamed_task__/MinReturn -97.2364 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.46332 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 22:09:47 | [maml_trainer] epoch #22 | Sampling for adapation and meta-testing... +2025-03-29 22:13:37 | [maml_trainer] epoch #22 | Finished meta-testing... +2025-03-29 22:13:37 | [maml_trainer] epoch #22 | Saving snapshot... +2025-03-29 22:13:57 | [maml_trainer] epoch #22 | Saved +2025-03-29 22:13:57 | [maml_trainer] epoch #22 | Time 29138.42 s +2025-03-29 22:13:57 | [maml_trainer] epoch #22 | EpochTime 1182.75 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -39.0393 +Average/AverageReturn -78.1868 +Average/Iteration 22 +Average/MaxReturn -62.4644 +Average/MinReturn -100.727 +Average/NumEpisodes 80 +Average/StdReturn 8.25478 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99013 +GaussianMLPPolicy/KLAfter 0.00648733 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.47659e-06 +GaussianMLPPolicy/LossBefore 7.86781e-09 +GaussianMLPPolicy/dLoss -5.46873e-06 +Iteration 22 +MetaTest/Average/AverageDiscountedReturn -77.5203 +MetaTest/Average/AverageReturn -77.5203 +MetaTest/Average/Iteration 22 +MetaTest/Average/MaxReturn -66.799 +MetaTest/Average/MinReturn -93.7182 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.05823 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -77.5203 +MetaTest/__unnamed_task__/AverageReturn -77.5203 +MetaTest/__unnamed_task__/Iteration 22 +MetaTest/__unnamed_task__/MaxReturn -66.799 +MetaTest/__unnamed_task__/MinReturn -93.7182 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.05823 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 736000 +__unnamed_task__/AverageDiscountedReturn -39.0393 +__unnamed_task__/AverageReturn -78.1868 +__unnamed_task__/Iteration 22 +__unnamed_task__/MaxReturn -62.4644 +__unnamed_task__/MinReturn -100.727 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.25478 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 22:29:32 | [maml_trainer] epoch #23 | Sampling for adapation and meta-testing... +2025-03-29 22:33:19 | [maml_trainer] epoch #23 | Finished meta-testing... +2025-03-29 22:33:19 | [maml_trainer] epoch #23 | Saving snapshot... +2025-03-29 22:33:44 | [maml_trainer] epoch #23 | Saved +2025-03-29 22:33:44 | [maml_trainer] epoch #23 | Time 30325.91 s +2025-03-29 22:33:44 | [maml_trainer] epoch #23 | EpochTime 1187.49 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -38.6514 +Average/AverageReturn -77.2321 +Average/Iteration 23 +Average/MaxReturn -60.5929 +Average/MinReturn -99.1665 +Average/NumEpisodes 80 +Average/StdReturn 8.54462 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99009 +GaussianMLPPolicy/KLAfter 0.00495013 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.9822e-05 +GaussianMLPPolicy/LossBefore -6.34789e-09 +GaussianMLPPolicy/dLoss 3.98157e-05 +Iteration 23 +MetaTest/Average/AverageDiscountedReturn -81.0673 +MetaTest/Average/AverageReturn -81.0673 +MetaTest/Average/Iteration 23 +MetaTest/Average/MaxReturn -66.4252 +MetaTest/Average/MinReturn -95.045 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.70191 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -81.0673 +MetaTest/__unnamed_task__/AverageReturn -81.0673 +MetaTest/__unnamed_task__/Iteration 23 +MetaTest/__unnamed_task__/MaxReturn -66.4252 +MetaTest/__unnamed_task__/MinReturn -95.045 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.70191 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 768000 +__unnamed_task__/AverageDiscountedReturn -38.6514 +__unnamed_task__/AverageReturn -77.2321 +__unnamed_task__/Iteration 23 +__unnamed_task__/MaxReturn -60.5929 +__unnamed_task__/MinReturn -99.1665 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.54462 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 22:49:27 | [maml_trainer] epoch #24 | Sampling for adapation and meta-testing... +2025-03-29 22:53:17 | [maml_trainer] epoch #24 | Finished meta-testing... +2025-03-29 22:53:17 | [maml_trainer] epoch #24 | Saving snapshot... +2025-03-29 22:53:35 | [maml_trainer] epoch #24 | Saved +2025-03-29 22:53:35 | [maml_trainer] epoch #24 | Time 31517.02 s +2025-03-29 22:53:35 | [maml_trainer] epoch #24 | EpochTime 1191.10 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -39.2607 +Average/AverageReturn -78.2703 +Average/Iteration 24 +Average/MaxReturn -54.8975 +Average/MinReturn -99.8802 +Average/NumEpisodes 80 +Average/StdReturn 8.38224 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99185 +GaussianMLPPolicy/KLAfter 0.00455429 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.12408e-06 +GaussianMLPPolicy/LossBefore 3.93391e-09 +GaussianMLPPolicy/dLoss 6.12801e-06 +Iteration 24 +MetaTest/Average/AverageDiscountedReturn -77.3183 +MetaTest/Average/AverageReturn -77.3183 +MetaTest/Average/Iteration 24 +MetaTest/Average/MaxReturn -62.821 +MetaTest/Average/MinReturn -91.9925 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.90627 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -77.3183 +MetaTest/__unnamed_task__/AverageReturn -77.3183 +MetaTest/__unnamed_task__/Iteration 24 +MetaTest/__unnamed_task__/MaxReturn -62.821 +MetaTest/__unnamed_task__/MinReturn -91.9925 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.90627 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 800000 +__unnamed_task__/AverageDiscountedReturn -39.2607 +__unnamed_task__/AverageReturn -78.2703 +__unnamed_task__/Iteration 24 +__unnamed_task__/MaxReturn -54.8975 +__unnamed_task__/MinReturn -99.8802 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.38224 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 23:08:52 | [maml_trainer] epoch #25 | Sampling for adapation and meta-testing... +2025-03-29 23:12:45 | [maml_trainer] epoch #25 | Finished meta-testing... +2025-03-29 23:12:45 | [maml_trainer] epoch #25 | Saving snapshot... +2025-03-29 23:13:04 | [maml_trainer] epoch #25 | Saved +2025-03-29 23:13:04 | [maml_trainer] epoch #25 | Time 32685.10 s +2025-03-29 23:13:04 | [maml_trainer] epoch #25 | EpochTime 1168.08 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -38.9653 +Average/AverageReturn -77.5143 +Average/Iteration 25 +Average/MaxReturn -56.8329 +Average/MinReturn -102.339 +Average/NumEpisodes 80 +Average/StdReturn 9.2522 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99353 +GaussianMLPPolicy/KLAfter 0.00390212 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.11071e-05 +GaussianMLPPolicy/LossBefore 3.09944e-09 +GaussianMLPPolicy/dLoss 4.11102e-05 +Iteration 25 +MetaTest/Average/AverageDiscountedReturn -76.4429 +MetaTest/Average/AverageReturn -76.4429 +MetaTest/Average/Iteration 25 +MetaTest/Average/MaxReturn -64.9046 +MetaTest/Average/MinReturn -90.3075 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.10372 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -76.4429 +MetaTest/__unnamed_task__/AverageReturn -76.4429 +MetaTest/__unnamed_task__/Iteration 25 +MetaTest/__unnamed_task__/MaxReturn -64.9046 +MetaTest/__unnamed_task__/MinReturn -90.3075 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.10372 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 832000 +__unnamed_task__/AverageDiscountedReturn -38.9653 +__unnamed_task__/AverageReturn -77.5143 +__unnamed_task__/Iteration 25 +__unnamed_task__/MaxReturn -56.8329 +__unnamed_task__/MinReturn -102.339 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.2522 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 23:28:16 | [maml_trainer] epoch #26 | Sampling for adapation and meta-testing... +2025-03-29 23:32:02 | [maml_trainer] epoch #26 | Finished meta-testing... +2025-03-29 23:32:02 | [maml_trainer] epoch #26 | Saving snapshot... +2025-03-29 23:32:21 | [maml_trainer] epoch #26 | Saved +2025-03-29 23:32:21 | [maml_trainer] epoch #26 | Time 33842.07 s +2025-03-29 23:32:21 | [maml_trainer] epoch #26 | EpochTime 1156.97 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -38.4893 +Average/AverageReturn -75.8672 +Average/Iteration 26 +Average/MaxReturn -56.3208 +Average/MinReturn -106.57 +Average/NumEpisodes 80 +Average/StdReturn 7.88509 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99464 +GaussianMLPPolicy/KLAfter 0.00439373 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.36744e-05 +GaussianMLPPolicy/LossBefore 9.65595e-09 +GaussianMLPPolicy/dLoss 3.3684e-05 +Iteration 26 +MetaTest/Average/AverageDiscountedReturn -73.5904 +MetaTest/Average/AverageReturn -73.5904 +MetaTest/Average/Iteration 26 +MetaTest/Average/MaxReturn -58.4429 +MetaTest/Average/MinReturn -92.3816 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.97084 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -73.5904 +MetaTest/__unnamed_task__/AverageReturn -73.5904 +MetaTest/__unnamed_task__/Iteration 26 +MetaTest/__unnamed_task__/MaxReturn -58.4429 +MetaTest/__unnamed_task__/MinReturn -92.3816 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.97084 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 864000 +__unnamed_task__/AverageDiscountedReturn -38.4893 +__unnamed_task__/AverageReturn -75.8672 +__unnamed_task__/Iteration 26 +__unnamed_task__/MaxReturn -56.3208 +__unnamed_task__/MinReturn -106.57 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.88509 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-29 23:47:29 | [maml_trainer] epoch #27 | Sampling for adapation and meta-testing... +2025-03-29 23:51:17 | [maml_trainer] epoch #27 | Finished meta-testing... +2025-03-29 23:51:17 | [maml_trainer] epoch #27 | Saving snapshot... +2025-03-29 23:51:36 | [maml_trainer] epoch #27 | Saved +2025-03-29 23:51:36 | [maml_trainer] epoch #27 | Time 34997.57 s +2025-03-29 23:51:36 | [maml_trainer] epoch #27 | EpochTime 1155.49 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -37.619 +Average/AverageReturn -74.0338 +Average/Iteration 27 +Average/MaxReturn -57.3566 +Average/MinReturn -93.3387 +Average/NumEpisodes 80 +Average/StdReturn 8.25711 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99539 +GaussianMLPPolicy/KLAfter 0.00423975 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.14696e-05 +GaussianMLPPolicy/LossBefore -8.04663e-10 +GaussianMLPPolicy/dLoss 5.14688e-05 +Iteration 27 +MetaTest/Average/AverageDiscountedReturn -73.2912 +MetaTest/Average/AverageReturn -73.2912 +MetaTest/Average/Iteration 27 +MetaTest/Average/MaxReturn -61.0363 +MetaTest/Average/MinReturn -85.9363 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.6946 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -73.2912 +MetaTest/__unnamed_task__/AverageReturn -73.2912 +MetaTest/__unnamed_task__/Iteration 27 +MetaTest/__unnamed_task__/MaxReturn -61.0363 +MetaTest/__unnamed_task__/MinReturn -85.9363 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.6946 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 896000 +__unnamed_task__/AverageDiscountedReturn -37.619 +__unnamed_task__/AverageReturn -74.0338 +__unnamed_task__/Iteration 27 +__unnamed_task__/MaxReturn -57.3566 +__unnamed_task__/MinReturn -93.3387 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.25711 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-30 00:07:01 | [maml_trainer] epoch #28 | Sampling for adapation and meta-testing... +2025-03-30 00:10:54 | [maml_trainer] epoch #28 | Finished meta-testing... +2025-03-30 00:10:54 | [maml_trainer] epoch #28 | Saving snapshot... +2025-03-30 00:11:14 | [maml_trainer] epoch #28 | Saved +2025-03-30 00:11:14 | [maml_trainer] epoch #28 | Time 36175.25 s +2025-03-30 00:11:14 | [maml_trainer] epoch #28 | EpochTime 1177.68 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -37.5926 +Average/AverageReturn -73.2493 +Average/Iteration 28 +Average/MaxReturn -51.5353 +Average/MinReturn -103.984 +Average/NumEpisodes 80 +Average/StdReturn 8.62127 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99619 +GaussianMLPPolicy/KLAfter 0.00540624 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.6349e-05 +GaussianMLPPolicy/LossBefore -8.34465e-10 +GaussianMLPPolicy/dLoss -8.63498e-05 +Iteration 28 +MetaTest/Average/AverageDiscountedReturn -69.8318 +MetaTest/Average/AverageReturn -69.8318 +MetaTest/Average/Iteration 28 +MetaTest/Average/MaxReturn -59.1333 +MetaTest/Average/MinReturn -85.8919 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.84116 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -69.8318 +MetaTest/__unnamed_task__/AverageReturn -69.8318 +MetaTest/__unnamed_task__/Iteration 28 +MetaTest/__unnamed_task__/MaxReturn -59.1333 +MetaTest/__unnamed_task__/MinReturn -85.8919 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.84116 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 928000 +__unnamed_task__/AverageDiscountedReturn -37.5926 +__unnamed_task__/AverageReturn -73.2493 +__unnamed_task__/Iteration 28 +__unnamed_task__/MaxReturn -51.5353 +__unnamed_task__/MinReturn -103.984 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.62127 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-30 00:26:35 | [maml_trainer] epoch #29 | Sampling for adapation and meta-testing... +2025-03-30 00:30:27 | [maml_trainer] epoch #29 | Finished meta-testing... +2025-03-30 00:30:27 | [maml_trainer] epoch #29 | Saving snapshot... +2025-03-30 00:30:45 | [maml_trainer] epoch #29 | Saved +2025-03-30 00:30:45 | [maml_trainer] epoch #29 | Time 37346.37 s +2025-03-30 00:30:45 | [maml_trainer] epoch #29 | EpochTime 1171.11 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -37.2795 +Average/AverageReturn -73.0523 +Average/Iteration 29 +Average/MaxReturn -58.5436 +Average/MinReturn -107.782 +Average/NumEpisodes 80 +Average/StdReturn 8.99926 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99548 +GaussianMLPPolicy/KLAfter 0.00820142 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.05774e-05 +GaussianMLPPolicy/LossBefore 1.2815e-08 +GaussianMLPPolicy/dLoss 4.05903e-05 +Iteration 29 +MetaTest/Average/AverageDiscountedReturn -73.5466 +MetaTest/Average/AverageReturn -73.5466 +MetaTest/Average/Iteration 29 +MetaTest/Average/MaxReturn -61.929 +MetaTest/Average/MinReturn -93.6044 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.32256 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -73.5466 +MetaTest/__unnamed_task__/AverageReturn -73.5466 +MetaTest/__unnamed_task__/Iteration 29 +MetaTest/__unnamed_task__/MaxReturn -61.929 +MetaTest/__unnamed_task__/MinReturn -93.6044 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.32256 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 960000 +__unnamed_task__/AverageDiscountedReturn -37.2795 +__unnamed_task__/AverageReturn -73.0523 +__unnamed_task__/Iteration 29 +__unnamed_task__/MaxReturn -58.5436 +__unnamed_task__/MinReturn -107.782 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.99926 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-30 00:46:00 | [maml_trainer] epoch #30 | Sampling for adapation and meta-testing... +2025-03-30 00:49:47 | [maml_trainer] epoch #30 | Finished meta-testing... +2025-03-30 00:49:47 | [maml_trainer] epoch #30 | Saving snapshot... +2025-03-30 00:50:06 | [maml_trainer] epoch #30 | Saved +2025-03-30 00:50:06 | [maml_trainer] epoch #30 | Time 38507.83 s +2025-03-30 00:50:06 | [maml_trainer] epoch #30 | EpochTime 1161.45 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -37.4665 +Average/AverageReturn -74.243 +Average/Iteration 30 +Average/MaxReturn -56.8646 +Average/MinReturn -105.34 +Average/NumEpisodes 80 +Average/StdReturn 10.4928 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99394 +GaussianMLPPolicy/KLAfter 0.00501623 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.50298e-05 +GaussianMLPPolicy/LossBefore 8.94067e-11 +GaussianMLPPolicy/dLoss 4.50298e-05 +Iteration 30 +MetaTest/Average/AverageDiscountedReturn -71.6487 +MetaTest/Average/AverageReturn -71.6487 +MetaTest/Average/Iteration 30 +MetaTest/Average/MaxReturn -55.5973 +MetaTest/Average/MinReturn -88.3613 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.66005 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -71.6487 +MetaTest/__unnamed_task__/AverageReturn -71.6487 +MetaTest/__unnamed_task__/Iteration 30 +MetaTest/__unnamed_task__/MaxReturn -55.5973 +MetaTest/__unnamed_task__/MinReturn -88.3613 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.66005 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 992000 +__unnamed_task__/AverageDiscountedReturn -37.4665 +__unnamed_task__/AverageReturn -74.243 +__unnamed_task__/Iteration 30 +__unnamed_task__/MaxReturn -56.8646 +__unnamed_task__/MinReturn -105.34 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.4928 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-03-30 01:05:15 | [maml_trainer] epoch #31 | Sampling for adapation and meta-testing... +2025-03-30 01:09:03 | [maml_trainer] epoch #31 | Finished meta-testing... +2025-03-30 01:09:03 | [maml_trainer] epoch #31 | Saving snapshot... +2025-03-30 01:09:22 | [maml_trainer] epoch #31 | Saved +2025-03-30 01:09:22 | [maml_trainer] epoch #31 | Time 39663.66 s +2025-03-30 01:09:22 | [maml_trainer] epoch #31 | EpochTime 1155.83 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -37.0946 +Average/AverageReturn -72.7077 +Average/Iteration 31 +Average/MaxReturn -54.9062 +Average/MinReturn -105.692 +Average/NumEpisodes 80 +Average/StdReturn 8.95361 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99306 +GaussianMLPPolicy/KLAfter 0.00564931 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.5789e-05 +GaussianMLPPolicy/LossBefore 3.57628e-09 +GaussianMLPPolicy/dLoss 1.57926e-05 +Iteration 31 +MetaTest/Average/AverageDiscountedReturn -72.3601 +MetaTest/Average/AverageReturn -72.3601 +MetaTest/Average/Iteration 31 +MetaTest/Average/MaxReturn -57.8592 +MetaTest/Average/MinReturn -91.2836 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.27259 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -72.3601 +MetaTest/__unnamed_task__/AverageReturn -72.3601 +MetaTest/__unnamed_task__/Iteration 31 +MetaTest/__unnamed_task__/MaxReturn -57.8592 +MetaTest/__unnamed_task__/MinReturn -91.2836 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.27259 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.024e+06 +__unnamed_task__/AverageDiscountedReturn -37.0946 +__unnamed_task__/AverageReturn -72.7077 +__unnamed_task__/Iteration 31 +__unnamed_task__/MaxReturn -54.9062 +__unnamed_task__/MinReturn -105.692 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.95361 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 01:24:31 | [maml_trainer] epoch #32 | Sampling for adapation and meta-testing... +2025-03-30 01:28:20 | [maml_trainer] epoch #32 | Finished meta-testing... +2025-03-30 01:28:20 | [maml_trainer] epoch #32 | Saving snapshot... +2025-03-30 01:28:39 | [maml_trainer] epoch #32 | Saved +2025-03-30 01:28:39 | [maml_trainer] epoch #32 | Time 40820.69 s +2025-03-30 01:28:39 | [maml_trainer] epoch #32 | EpochTime 1157.03 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -37.3497 +Average/AverageReturn -73.5079 +Average/Iteration 32 +Average/MaxReturn -52.5385 +Average/MinReturn -103.177 +Average/NumEpisodes 80 +Average/StdReturn 9.60238 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99214 +GaussianMLPPolicy/KLAfter 0.00590476 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.10662e-05 +GaussianMLPPolicy/LossBefore -2.05636e-09 +GaussianMLPPolicy/dLoss -2.10683e-05 +Iteration 32 +MetaTest/Average/AverageDiscountedReturn -72.7471 +MetaTest/Average/AverageReturn -72.7471 +MetaTest/Average/Iteration 32 +MetaTest/Average/MaxReturn -56.8577 +MetaTest/Average/MinReturn -87.1797 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.84465 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -72.7471 +MetaTest/__unnamed_task__/AverageReturn -72.7471 +MetaTest/__unnamed_task__/Iteration 32 +MetaTest/__unnamed_task__/MaxReturn -56.8577 +MetaTest/__unnamed_task__/MinReturn -87.1797 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.84465 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.056e+06 +__unnamed_task__/AverageDiscountedReturn -37.3497 +__unnamed_task__/AverageReturn -73.5079 +__unnamed_task__/Iteration 32 +__unnamed_task__/MaxReturn -52.5385 +__unnamed_task__/MinReturn -103.177 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.60238 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 01:44:03 | [maml_trainer] epoch #33 | Sampling for adapation and meta-testing... +2025-03-30 01:47:56 | [maml_trainer] epoch #33 | Finished meta-testing... +2025-03-30 01:47:56 | [maml_trainer] epoch #33 | Saving snapshot... +2025-03-30 01:48:14 | [maml_trainer] epoch #33 | Saved +2025-03-30 01:48:14 | [maml_trainer] epoch #33 | Time 41995.79 s +2025-03-30 01:48:14 | [maml_trainer] epoch #33 | EpochTime 1175.09 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.8805 +Average/AverageReturn -71.8276 +Average/Iteration 33 +Average/MaxReturn -55.1094 +Average/MinReturn -98.2301 +Average/NumEpisodes 80 +Average/StdReturn 7.78342 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9909 +GaussianMLPPolicy/KLAfter 0.00394269 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.31437e-05 +GaussianMLPPolicy/LossBefore -2.38419e-10 +GaussianMLPPolicy/dLoss -9.31439e-05 +Iteration 33 +MetaTest/Average/AverageDiscountedReturn -75.5403 +MetaTest/Average/AverageReturn -75.5403 +MetaTest/Average/Iteration 33 +MetaTest/Average/MaxReturn -59.5254 +MetaTest/Average/MinReturn -103.37 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.1374 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -75.5403 +MetaTest/__unnamed_task__/AverageReturn -75.5403 +MetaTest/__unnamed_task__/Iteration 33 +MetaTest/__unnamed_task__/MaxReturn -59.5254 +MetaTest/__unnamed_task__/MinReturn -103.37 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.1374 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.088e+06 +__unnamed_task__/AverageDiscountedReturn -36.8805 +__unnamed_task__/AverageReturn -71.8276 +__unnamed_task__/Iteration 33 +__unnamed_task__/MaxReturn -55.1094 +__unnamed_task__/MinReturn -98.2301 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.78342 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 02:03:22 | [maml_trainer] epoch #34 | Sampling for adapation and meta-testing... +2025-03-30 02:07:12 | [maml_trainer] epoch #34 | Finished meta-testing... +2025-03-30 02:07:12 | [maml_trainer] epoch #34 | Saving snapshot... +2025-03-30 02:07:31 | [maml_trainer] epoch #34 | Saved +2025-03-30 02:07:31 | [maml_trainer] epoch #34 | Time 43152.13 s +2025-03-30 02:07:31 | [maml_trainer] epoch #34 | EpochTime 1156.34 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.4516 +Average/AverageReturn -71.333 +Average/Iteration 34 +Average/MaxReturn -49.2488 +Average/MinReturn -99.0932 +Average/NumEpisodes 80 +Average/StdReturn 8.8994 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98876 +GaussianMLPPolicy/KLAfter 0.00424328 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.68705e-05 +GaussianMLPPolicy/LossBefore -6.4075e-09 +GaussianMLPPolicy/dLoss -2.68769e-05 +Iteration 34 +MetaTest/Average/AverageDiscountedReturn -72.4147 +MetaTest/Average/AverageReturn -72.4147 +MetaTest/Average/Iteration 34 +MetaTest/Average/MaxReturn -53.7246 +MetaTest/Average/MinReturn -102.553 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.3654 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -72.4147 +MetaTest/__unnamed_task__/AverageReturn -72.4147 +MetaTest/__unnamed_task__/Iteration 34 +MetaTest/__unnamed_task__/MaxReturn -53.7246 +MetaTest/__unnamed_task__/MinReturn -102.553 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.3654 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.12e+06 +__unnamed_task__/AverageDiscountedReturn -36.4516 +__unnamed_task__/AverageReturn -71.333 +__unnamed_task__/Iteration 34 +__unnamed_task__/MaxReturn -49.2488 +__unnamed_task__/MinReturn -99.0932 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.8994 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 02:22:42 | [maml_trainer] epoch #35 | Sampling for adapation and meta-testing... +2025-03-30 02:26:26 | [maml_trainer] epoch #35 | Finished meta-testing... +2025-03-30 02:26:26 | [maml_trainer] epoch #35 | Saving snapshot... +2025-03-30 02:26:45 | [maml_trainer] epoch #35 | Saved +2025-03-30 02:26:45 | [maml_trainer] epoch #35 | Time 44306.87 s +2025-03-30 02:26:45 | [maml_trainer] epoch #35 | EpochTime 1154.74 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.7406 +Average/AverageReturn -72.8103 +Average/Iteration 35 +Average/MaxReturn -56.1665 +Average/MinReturn -102.853 +Average/NumEpisodes 80 +Average/StdReturn 8.92532 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98709 +GaussianMLPPolicy/KLAfter 0.00340089 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.66524e-05 +GaussianMLPPolicy/LossBefore -1.12653e-08 +GaussianMLPPolicy/dLoss 1.66412e-05 +Iteration 35 +MetaTest/Average/AverageDiscountedReturn -75.8542 +MetaTest/Average/AverageReturn -75.8542 +MetaTest/Average/Iteration 35 +MetaTest/Average/MaxReturn -60.8955 +MetaTest/Average/MinReturn -100.432 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.311 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -75.8542 +MetaTest/__unnamed_task__/AverageReturn -75.8542 +MetaTest/__unnamed_task__/Iteration 35 +MetaTest/__unnamed_task__/MaxReturn -60.8955 +MetaTest/__unnamed_task__/MinReturn -100.432 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.311 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.152e+06 +__unnamed_task__/AverageDiscountedReturn -36.7406 +__unnamed_task__/AverageReturn -72.8103 +__unnamed_task__/Iteration 35 +__unnamed_task__/MaxReturn -56.1665 +__unnamed_task__/MinReturn -102.853 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.92532 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 02:41:50 | [maml_trainer] epoch #36 | Sampling for adapation and meta-testing... +2025-03-30 02:45:39 | [maml_trainer] epoch #36 | Finished meta-testing... +2025-03-30 02:45:39 | [maml_trainer] epoch #36 | Saving snapshot... +2025-03-30 02:45:58 | [maml_trainer] epoch #36 | Saved +2025-03-30 02:45:58 | [maml_trainer] epoch #36 | Time 45459.74 s +2025-03-30 02:45:58 | [maml_trainer] epoch #36 | EpochTime 1152.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -37.0912 +Average/AverageReturn -73.5921 +Average/Iteration 36 +Average/MaxReturn -56.5694 +Average/MinReturn -101.049 +Average/NumEpisodes 80 +Average/StdReturn 9.41712 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98615 +GaussianMLPPolicy/KLAfter 0.00316686 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.02998e-05 +GaussianMLPPolicy/LossBefore -9.23872e-10 +GaussianMLPPolicy/dLoss 9.02989e-05 +Iteration 36 +MetaTest/Average/AverageDiscountedReturn -73.155 +MetaTest/Average/AverageReturn -73.155 +MetaTest/Average/Iteration 36 +MetaTest/Average/MaxReturn -56.6504 +MetaTest/Average/MinReturn -91.8004 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.94149 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -73.155 +MetaTest/__unnamed_task__/AverageReturn -73.155 +MetaTest/__unnamed_task__/Iteration 36 +MetaTest/__unnamed_task__/MaxReturn -56.6504 +MetaTest/__unnamed_task__/MinReturn -91.8004 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.94149 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.184e+06 +__unnamed_task__/AverageDiscountedReturn -37.0912 +__unnamed_task__/AverageReturn -73.5921 +__unnamed_task__/Iteration 36 +__unnamed_task__/MaxReturn -56.5694 +__unnamed_task__/MinReturn -101.049 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.41712 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 03:01:05 | [maml_trainer] epoch #37 | Sampling for adapation and meta-testing... +2025-03-30 03:04:52 | [maml_trainer] epoch #37 | Finished meta-testing... +2025-03-30 03:04:52 | [maml_trainer] epoch #37 | Saving snapshot... +2025-03-30 03:05:11 | [maml_trainer] epoch #37 | Saved +2025-03-30 03:05:11 | [maml_trainer] epoch #37 | Time 46612.75 s +2025-03-30 03:05:11 | [maml_trainer] epoch #37 | EpochTime 1153.00 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -37.2924 +Average/AverageReturn -74.4608 +Average/Iteration 37 +Average/MaxReturn -52.543 +Average/MinReturn -108.952 +Average/NumEpisodes 80 +Average/StdReturn 10.3376 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98455 +GaussianMLPPolicy/KLAfter 0.00291497 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.37182e-05 +GaussianMLPPolicy/LossBefore 1.57952e-09 +GaussianMLPPolicy/dLoss 8.37197e-05 +Iteration 37 +MetaTest/Average/AverageDiscountedReturn -74.7993 +MetaTest/Average/AverageReturn -74.7993 +MetaTest/Average/Iteration 37 +MetaTest/Average/MaxReturn -55.0935 +MetaTest/Average/MinReturn -91.6709 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.13376 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -74.7993 +MetaTest/__unnamed_task__/AverageReturn -74.7993 +MetaTest/__unnamed_task__/Iteration 37 +MetaTest/__unnamed_task__/MaxReturn -55.0935 +MetaTest/__unnamed_task__/MinReturn -91.6709 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.13376 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.216e+06 +__unnamed_task__/AverageDiscountedReturn -37.2924 +__unnamed_task__/AverageReturn -74.4608 +__unnamed_task__/Iteration 37 +__unnamed_task__/MaxReturn -52.543 +__unnamed_task__/MinReturn -108.952 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.3376 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 03:20:26 | [maml_trainer] epoch #38 | Sampling for adapation and meta-testing... +2025-03-30 03:24:13 | [maml_trainer] epoch #38 | Finished meta-testing... +2025-03-30 03:24:13 | [maml_trainer] epoch #38 | Saving snapshot... +2025-03-30 03:24:31 | [maml_trainer] epoch #38 | Saved +2025-03-30 03:24:31 | [maml_trainer] epoch #38 | Time 47772.51 s +2025-03-30 03:24:31 | [maml_trainer] epoch #38 | EpochTime 1159.75 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -37.5391 +Average/AverageReturn -75.2278 +Average/Iteration 38 +Average/MaxReturn -56.7293 +Average/MinReturn -96.3083 +Average/NumEpisodes 80 +Average/StdReturn 9.34174 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98249 +GaussianMLPPolicy/KLAfter 0.0013104 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.68165e-05 +GaussianMLPPolicy/LossBefore -1.27554e-08 +GaussianMLPPolicy/dLoss -2.68292e-05 +Iteration 38 +MetaTest/Average/AverageDiscountedReturn -77.0793 +MetaTest/Average/AverageReturn -77.0793 +MetaTest/Average/Iteration 38 +MetaTest/Average/MaxReturn -59.8339 +MetaTest/Average/MinReturn -88.8127 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.75957 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -77.0793 +MetaTest/__unnamed_task__/AverageReturn -77.0793 +MetaTest/__unnamed_task__/Iteration 38 +MetaTest/__unnamed_task__/MaxReturn -59.8339 +MetaTest/__unnamed_task__/MinReturn -88.8127 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.75957 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.248e+06 +__unnamed_task__/AverageDiscountedReturn -37.5391 +__unnamed_task__/AverageReturn -75.2278 +__unnamed_task__/Iteration 38 +__unnamed_task__/MaxReturn -56.7293 +__unnamed_task__/MinReturn -96.3083 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.34174 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 03:39:39 | [maml_trainer] epoch #39 | Sampling for adapation and meta-testing... +2025-03-30 03:43:27 | [maml_trainer] epoch #39 | Finished meta-testing... +2025-03-30 03:43:27 | [maml_trainer] epoch #39 | Saving snapshot... +2025-03-30 03:43:46 | [maml_trainer] epoch #39 | Saved +2025-03-30 03:43:46 | [maml_trainer] epoch #39 | Time 48927.35 s +2025-03-30 03:43:46 | [maml_trainer] epoch #39 | EpochTime 1154.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.4193 +Average/AverageReturn -72.5379 +Average/Iteration 39 +Average/MaxReturn -49.8306 +Average/MinReturn -102.671 +Average/NumEpisodes 80 +Average/StdReturn 10.8982 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98024 +GaussianMLPPolicy/KLAfter 0.00100474 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.42951e-05 +GaussianMLPPolicy/LossBefore 3.09944e-09 +GaussianMLPPolicy/dLoss 3.42982e-05 +Iteration 39 +MetaTest/Average/AverageDiscountedReturn -75.2167 +MetaTest/Average/AverageReturn -75.2167 +MetaTest/Average/Iteration 39 +MetaTest/Average/MaxReturn -60.536 +MetaTest/Average/MinReturn -92.4341 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.44411 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -75.2167 +MetaTest/__unnamed_task__/AverageReturn -75.2167 +MetaTest/__unnamed_task__/Iteration 39 +MetaTest/__unnamed_task__/MaxReturn -60.536 +MetaTest/__unnamed_task__/MinReturn -92.4341 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.44411 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.28e+06 +__unnamed_task__/AverageDiscountedReturn -36.4193 +__unnamed_task__/AverageReturn -72.5379 +__unnamed_task__/Iteration 39 +__unnamed_task__/MaxReturn -49.8306 +__unnamed_task__/MinReturn -102.671 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.8982 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 03:58:58 | [maml_trainer] epoch #40 | Sampling for adapation and meta-testing... +2025-03-30 04:02:47 | [maml_trainer] epoch #40 | Finished meta-testing... +2025-03-30 04:02:47 | [maml_trainer] epoch #40 | Saving snapshot... +2025-03-30 04:03:06 | [maml_trainer] epoch #40 | Saved +2025-03-30 04:03:06 | [maml_trainer] epoch #40 | Time 50087.20 s +2025-03-30 04:03:06 | [maml_trainer] epoch #40 | EpochTime 1159.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -37.2816 +Average/AverageReturn -74.6616 +Average/Iteration 40 +Average/MaxReturn -55.3021 +Average/MinReturn -101.099 +Average/NumEpisodes 80 +Average/StdReturn 10.5999 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97764 +GaussianMLPPolicy/KLAfter 0.00123003 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.02526e-05 +GaussianMLPPolicy/LossBefore 8.88109e-09 +GaussianMLPPolicy/dLoss 9.02615e-05 +Iteration 40 +MetaTest/Average/AverageDiscountedReturn -74.6646 +MetaTest/Average/AverageReturn -74.6646 +MetaTest/Average/Iteration 40 +MetaTest/Average/MaxReturn -58.8318 +MetaTest/Average/MinReturn -94.8908 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.4725 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -74.6646 +MetaTest/__unnamed_task__/AverageReturn -74.6646 +MetaTest/__unnamed_task__/Iteration 40 +MetaTest/__unnamed_task__/MaxReturn -58.8318 +MetaTest/__unnamed_task__/MinReturn -94.8908 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.4725 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.312e+06 +__unnamed_task__/AverageDiscountedReturn -37.2816 +__unnamed_task__/AverageReturn -74.6616 +__unnamed_task__/Iteration 40 +__unnamed_task__/MaxReturn -55.3021 +__unnamed_task__/MinReturn -101.099 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.5999 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 04:18:11 | [maml_trainer] epoch #41 | Sampling for adapation and meta-testing... +2025-03-30 04:21:58 | [maml_trainer] epoch #41 | Finished meta-testing... +2025-03-30 04:21:58 | [maml_trainer] epoch #41 | Saving snapshot... +2025-03-30 04:22:17 | [maml_trainer] epoch #41 | Saved +2025-03-30 04:22:17 | [maml_trainer] epoch #41 | Time 51238.02 s +2025-03-30 04:22:17 | [maml_trainer] epoch #41 | EpochTime 1150.82 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -37.3154 +Average/AverageReturn -74.4014 +Average/Iteration 41 +Average/MaxReturn -52.8987 +Average/MinReturn -99.1047 +Average/NumEpisodes 80 +Average/StdReturn 10.2155 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97562 +GaussianMLPPolicy/KLAfter 0.00183982 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.75738e-06 +GaussianMLPPolicy/LossBefore -4.64916e-09 +GaussianMLPPolicy/dLoss -4.76202e-06 +Iteration 41 +MetaTest/Average/AverageDiscountedReturn -73.1431 +MetaTest/Average/AverageReturn -73.1431 +MetaTest/Average/Iteration 41 +MetaTest/Average/MaxReturn -50.2254 +MetaTest/Average/MinReturn -107.159 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.968 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -73.1431 +MetaTest/__unnamed_task__/AverageReturn -73.1431 +MetaTest/__unnamed_task__/Iteration 41 +MetaTest/__unnamed_task__/MaxReturn -50.2254 +MetaTest/__unnamed_task__/MinReturn -107.159 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.968 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.344e+06 +__unnamed_task__/AverageDiscountedReturn -37.3154 +__unnamed_task__/AverageReturn -74.4014 +__unnamed_task__/Iteration 41 +__unnamed_task__/MaxReturn -52.8987 +__unnamed_task__/MinReturn -99.1047 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2155 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 04:37:30 | [maml_trainer] epoch #42 | Sampling for adapation and meta-testing... +2025-03-30 04:41:18 | [maml_trainer] epoch #42 | Finished meta-testing... +2025-03-30 04:41:18 | [maml_trainer] epoch #42 | Saving snapshot... +2025-03-30 04:41:36 | [maml_trainer] epoch #42 | Saved +2025-03-30 04:41:36 | [maml_trainer] epoch #42 | Time 52397.57 s +2025-03-30 04:41:36 | [maml_trainer] epoch #42 | EpochTime 1159.54 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.7731 +Average/AverageReturn -73.3162 +Average/Iteration 42 +Average/MaxReturn -51.6155 +Average/MinReturn -95.3423 +Average/NumEpisodes 80 +Average/StdReturn 10.5624 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97442 +GaussianMLPPolicy/KLAfter 0.00115957 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.39981e-05 +GaussianMLPPolicy/LossBefore 6.13928e-09 +GaussianMLPPolicy/dLoss -5.39919e-05 +Iteration 42 +MetaTest/Average/AverageDiscountedReturn -74.7366 +MetaTest/Average/AverageReturn -74.7366 +MetaTest/Average/Iteration 42 +MetaTest/Average/MaxReturn -52.4039 +MetaTest/Average/MinReturn -103.73 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.1375 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -74.7366 +MetaTest/__unnamed_task__/AverageReturn -74.7366 +MetaTest/__unnamed_task__/Iteration 42 +MetaTest/__unnamed_task__/MaxReturn -52.4039 +MetaTest/__unnamed_task__/MinReturn -103.73 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.1375 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.376e+06 +__unnamed_task__/AverageDiscountedReturn -36.7731 +__unnamed_task__/AverageReturn -73.3162 +__unnamed_task__/Iteration 42 +__unnamed_task__/MaxReturn -51.6155 +__unnamed_task__/MinReturn -95.3423 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.5624 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 04:56:46 | [maml_trainer] epoch #43 | Sampling for adapation and meta-testing... +2025-03-30 05:00:35 | [maml_trainer] epoch #43 | Finished meta-testing... +2025-03-30 05:00:35 | [maml_trainer] epoch #43 | Saving snapshot... +2025-03-30 05:00:54 | [maml_trainer] epoch #43 | Saved +2025-03-30 05:00:54 | [maml_trainer] epoch #43 | Time 53555.96 s +2025-03-30 05:00:54 | [maml_trainer] epoch #43 | EpochTime 1158.39 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.954 +Average/AverageReturn -74.5294 +Average/Iteration 43 +Average/MaxReturn -57.0835 +Average/MinReturn -102.206 +Average/NumEpisodes 80 +Average/StdReturn 11.4103 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9732 +GaussianMLPPolicy/KLAfter 0.0014267 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.87965e-06 +GaussianMLPPolicy/LossBefore 5.24521e-09 +GaussianMLPPolicy/dLoss 9.8849e-06 +Iteration 43 +MetaTest/Average/AverageDiscountedReturn -72.8948 +MetaTest/Average/AverageReturn -72.8948 +MetaTest/Average/Iteration 43 +MetaTest/Average/MaxReturn -57.7517 +MetaTest/Average/MinReturn -95.9168 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.40209 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -72.8948 +MetaTest/__unnamed_task__/AverageReturn -72.8948 +MetaTest/__unnamed_task__/Iteration 43 +MetaTest/__unnamed_task__/MaxReturn -57.7517 +MetaTest/__unnamed_task__/MinReturn -95.9168 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.40209 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.408e+06 +__unnamed_task__/AverageDiscountedReturn -36.954 +__unnamed_task__/AverageReturn -74.5294 +__unnamed_task__/Iteration 43 +__unnamed_task__/MaxReturn -57.0835 +__unnamed_task__/MinReturn -102.206 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4103 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 05:16:17 | [maml_trainer] epoch #44 | Sampling for adapation and meta-testing... +2025-03-30 05:20:05 | [maml_trainer] epoch #44 | Finished meta-testing... +2025-03-30 05:20:05 | [maml_trainer] epoch #44 | Saving snapshot... +2025-03-30 05:20:25 | [maml_trainer] epoch #44 | Saved +2025-03-30 05:20:25 | [maml_trainer] epoch #44 | Time 54726.26 s +2025-03-30 05:20:25 | [maml_trainer] epoch #44 | EpochTime 1170.29 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -37.0479 +Average/AverageReturn -74.1627 +Average/Iteration 44 +Average/MaxReturn -56.4595 +Average/MinReturn -94.4407 +Average/NumEpisodes 80 +Average/StdReturn 9.89138 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97147 +GaussianMLPPolicy/KLAfter 0.00203974 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.16829e-06 +GaussianMLPPolicy/LossBefore 7.33137e-09 +GaussianMLPPolicy/dLoss 5.17562e-06 +Iteration 44 +MetaTest/Average/AverageDiscountedReturn -73.5263 +MetaTest/Average/AverageReturn -73.5263 +MetaTest/Average/Iteration 44 +MetaTest/Average/MaxReturn -60.3453 +MetaTest/Average/MinReturn -93.2349 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.34565 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -73.5263 +MetaTest/__unnamed_task__/AverageReturn -73.5263 +MetaTest/__unnamed_task__/Iteration 44 +MetaTest/__unnamed_task__/MaxReturn -60.3453 +MetaTest/__unnamed_task__/MinReturn -93.2349 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.34565 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.44e+06 +__unnamed_task__/AverageDiscountedReturn -37.0479 +__unnamed_task__/AverageReturn -74.1627 +__unnamed_task__/Iteration 44 +__unnamed_task__/MaxReturn -56.4595 +__unnamed_task__/MinReturn -94.4407 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.89138 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 05:35:42 | [maml_trainer] epoch #45 | Sampling for adapation and meta-testing... +2025-03-30 05:39:30 | [maml_trainer] epoch #45 | Finished meta-testing... +2025-03-30 05:39:30 | [maml_trainer] epoch #45 | Saving snapshot... +2025-03-30 05:39:49 | [maml_trainer] epoch #45 | Saved +2025-03-30 05:39:49 | [maml_trainer] epoch #45 | Time 55890.51 s +2025-03-30 05:39:49 | [maml_trainer] epoch #45 | EpochTime 1164.25 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.7074 +Average/AverageReturn -74.2219 +Average/Iteration 45 +Average/MaxReturn -53.189 +Average/MinReturn -106.012 +Average/NumEpisodes 80 +Average/StdReturn 12.0614 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97127 +GaussianMLPPolicy/KLAfter 0.00203822 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000134953 +GaussianMLPPolicy/LossBefore -6.58631e-09 +GaussianMLPPolicy/dLoss 0.000134946 +Iteration 45 +MetaTest/Average/AverageDiscountedReturn -70.5354 +MetaTest/Average/AverageReturn -70.5354 +MetaTest/Average/Iteration 45 +MetaTest/Average/MaxReturn -55.408 +MetaTest/Average/MinReturn -93.4584 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.54395 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -70.5354 +MetaTest/__unnamed_task__/AverageReturn -70.5354 +MetaTest/__unnamed_task__/Iteration 45 +MetaTest/__unnamed_task__/MaxReturn -55.408 +MetaTest/__unnamed_task__/MinReturn -93.4584 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.54395 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.472e+06 +__unnamed_task__/AverageDiscountedReturn -36.7074 +__unnamed_task__/AverageReturn -74.2219 +__unnamed_task__/Iteration 45 +__unnamed_task__/MaxReturn -53.189 +__unnamed_task__/MinReturn -106.012 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.0614 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 05:55:04 | [maml_trainer] epoch #46 | Sampling for adapation and meta-testing... +2025-03-30 05:58:52 | [maml_trainer] epoch #46 | Finished meta-testing... +2025-03-30 05:58:52 | [maml_trainer] epoch #46 | Saving snapshot... +2025-03-30 05:59:11 | [maml_trainer] epoch #46 | Saved +2025-03-30 05:59:11 | [maml_trainer] epoch #46 | Time 57052.15 s +2025-03-30 05:59:11 | [maml_trainer] epoch #46 | EpochTime 1161.64 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -35.308 +Average/AverageReturn -70.0999 +Average/Iteration 46 +Average/MaxReturn -50.7085 +Average/MinReturn -96.7041 +Average/NumEpisodes 80 +Average/StdReturn 9.48822 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97037 +GaussianMLPPolicy/KLAfter 0.00242521 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.9179e-05 +GaussianMLPPolicy/LossBefore 4.44055e-09 +GaussianMLPPolicy/dLoss 1.91835e-05 +Iteration 46 +MetaTest/Average/AverageDiscountedReturn -75.755 +MetaTest/Average/AverageReturn -75.755 +MetaTest/Average/Iteration 46 +MetaTest/Average/MaxReturn -52.5598 +MetaTest/Average/MinReturn -108.069 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.0543 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -75.755 +MetaTest/__unnamed_task__/AverageReturn -75.755 +MetaTest/__unnamed_task__/Iteration 46 +MetaTest/__unnamed_task__/MaxReturn -52.5598 +MetaTest/__unnamed_task__/MinReturn -108.069 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.0543 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.504e+06 +__unnamed_task__/AverageDiscountedReturn -35.308 +__unnamed_task__/AverageReturn -70.0999 +__unnamed_task__/Iteration 46 +__unnamed_task__/MaxReturn -50.7085 +__unnamed_task__/MinReturn -96.7041 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.48822 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 06:14:30 | [maml_trainer] epoch #47 | Sampling for adapation and meta-testing... +2025-03-30 06:18:20 | [maml_trainer] epoch #47 | Finished meta-testing... +2025-03-30 06:18:20 | [maml_trainer] epoch #47 | Saving snapshot... +2025-03-30 06:18:38 | [maml_trainer] epoch #47 | Saved +2025-03-30 06:18:38 | [maml_trainer] epoch #47 | Time 58219.61 s +2025-03-30 06:18:38 | [maml_trainer] epoch #47 | EpochTime 1167.45 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -35.2398 +Average/AverageReturn -70.2842 +Average/Iteration 47 +Average/MaxReturn -51.4477 +Average/MinReturn -94.4066 +Average/NumEpisodes 80 +Average/StdReturn 10.2812 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96887 +GaussianMLPPolicy/KLAfter 0.0028483 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.68726e-05 +GaussianMLPPolicy/LossBefore 1.00434e-08 +GaussianMLPPolicy/dLoss -6.68626e-05 +Iteration 47 +MetaTest/Average/AverageDiscountedReturn -73.602 +MetaTest/Average/AverageReturn -73.602 +MetaTest/Average/Iteration 47 +MetaTest/Average/MaxReturn -55.2488 +MetaTest/Average/MinReturn -102.634 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.463 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -73.602 +MetaTest/__unnamed_task__/AverageReturn -73.602 +MetaTest/__unnamed_task__/Iteration 47 +MetaTest/__unnamed_task__/MaxReturn -55.2488 +MetaTest/__unnamed_task__/MinReturn -102.634 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.463 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.536e+06 +__unnamed_task__/AverageDiscountedReturn -35.2398 +__unnamed_task__/AverageReturn -70.2842 +__unnamed_task__/Iteration 47 +__unnamed_task__/MaxReturn -51.4477 +__unnamed_task__/MinReturn -94.4066 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2812 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 06:33:49 | [maml_trainer] epoch #48 | Sampling for adapation and meta-testing... +2025-03-30 06:37:44 | [maml_trainer] epoch #48 | Finished meta-testing... +2025-03-30 06:37:44 | [maml_trainer] epoch #48 | Saving snapshot... +2025-03-30 06:38:03 | [maml_trainer] epoch #48 | Saved +2025-03-30 06:38:03 | [maml_trainer] epoch #48 | Time 59384.80 s +2025-03-30 06:38:03 | [maml_trainer] epoch #48 | EpochTime 1165.19 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -35.501 +Average/AverageReturn -70.7312 +Average/Iteration 48 +Average/MaxReturn -53.1059 +Average/MinReturn -99.2901 +Average/NumEpisodes 80 +Average/StdReturn 9.63416 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96774 +GaussianMLPPolicy/KLAfter 0.00265233 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.39433e-05 +GaussianMLPPolicy/LossBefore -8.91089e-09 +GaussianMLPPolicy/dLoss -5.39522e-05 +Iteration 48 +MetaTest/Average/AverageDiscountedReturn -71.1436 +MetaTest/Average/AverageReturn -71.1436 +MetaTest/Average/Iteration 48 +MetaTest/Average/MaxReturn -59.1449 +MetaTest/Average/MinReturn -100.102 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.1807 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -71.1436 +MetaTest/__unnamed_task__/AverageReturn -71.1436 +MetaTest/__unnamed_task__/Iteration 48 +MetaTest/__unnamed_task__/MaxReturn -59.1449 +MetaTest/__unnamed_task__/MinReturn -100.102 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.1807 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.568e+06 +__unnamed_task__/AverageDiscountedReturn -35.501 +__unnamed_task__/AverageReturn -70.7312 +__unnamed_task__/Iteration 48 +__unnamed_task__/MaxReturn -53.1059 +__unnamed_task__/MinReturn -99.2901 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.63416 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 06:53:36 | [maml_trainer] epoch #49 | Sampling for adapation and meta-testing... +2025-03-30 06:57:27 | [maml_trainer] epoch #49 | Finished meta-testing... +2025-03-30 06:57:27 | [maml_trainer] epoch #49 | Saving snapshot... +2025-03-30 06:57:46 | [maml_trainer] epoch #49 | Saved +2025-03-30 06:57:46 | [maml_trainer] epoch #49 | Time 60567.90 s +2025-03-30 06:57:46 | [maml_trainer] epoch #49 | EpochTime 1183.10 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.782 +Average/AverageReturn -68.587 +Average/Iteration 49 +Average/MaxReturn -51.5807 +Average/MinReturn -91.5508 +Average/NumEpisodes 80 +Average/StdReturn 8.7624 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96567 +GaussianMLPPolicy/KLAfter 0.00242913 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000106155 +GaussianMLPPolicy/LossBefore -1.51992e-08 +GaussianMLPPolicy/dLoss 0.00010614 +Iteration 49 +MetaTest/Average/AverageDiscountedReturn -71.6926 +MetaTest/Average/AverageReturn -71.6926 +MetaTest/Average/Iteration 49 +MetaTest/Average/MaxReturn -57.3365 +MetaTest/Average/MinReturn -87.2391 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.89822 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -71.6926 +MetaTest/__unnamed_task__/AverageReturn -71.6926 +MetaTest/__unnamed_task__/Iteration 49 +MetaTest/__unnamed_task__/MaxReturn -57.3365 +MetaTest/__unnamed_task__/MinReturn -87.2391 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.89822 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.6e+06 +__unnamed_task__/AverageDiscountedReturn -34.782 +__unnamed_task__/AverageReturn -68.587 +__unnamed_task__/Iteration 49 +__unnamed_task__/MaxReturn -51.5807 +__unnamed_task__/MinReturn -91.5508 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.7624 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 07:13:15 | [maml_trainer] epoch #50 | Sampling for adapation and meta-testing... +2025-03-30 07:17:07 | [maml_trainer] epoch #50 | Finished meta-testing... +2025-03-30 07:17:07 | [maml_trainer] epoch #50 | Saving snapshot... +2025-03-30 07:17:27 | [maml_trainer] epoch #50 | Saved +2025-03-30 07:17:27 | [maml_trainer] epoch #50 | Time 61748.14 s +2025-03-30 07:17:27 | [maml_trainer] epoch #50 | EpochTime 1180.23 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -36.1261 +Average/AverageReturn -72.3799 +Average/Iteration 50 +Average/MaxReturn -51.9743 +Average/MinReturn -103.821 +Average/NumEpisodes 80 +Average/StdReturn 11.0117 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96313 +GaussianMLPPolicy/KLAfter 0.00378725 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000103202 +GaussianMLPPolicy/LossBefore 7.30157e-09 +GaussianMLPPolicy/dLoss 0.000103209 +Iteration 50 +MetaTest/Average/AverageDiscountedReturn -66.6026 +MetaTest/Average/AverageReturn -66.6026 +MetaTest/Average/Iteration 50 +MetaTest/Average/MaxReturn -50.3326 +MetaTest/Average/MinReturn -94.1973 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.0838 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -66.6026 +MetaTest/__unnamed_task__/AverageReturn -66.6026 +MetaTest/__unnamed_task__/Iteration 50 +MetaTest/__unnamed_task__/MaxReturn -50.3326 +MetaTest/__unnamed_task__/MinReturn -94.1973 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.0838 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.632e+06 +__unnamed_task__/AverageDiscountedReturn -36.1261 +__unnamed_task__/AverageReturn -72.3799 +__unnamed_task__/Iteration 50 +__unnamed_task__/MaxReturn -51.9743 +__unnamed_task__/MinReturn -103.821 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0117 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 07:33:06 | [maml_trainer] epoch #51 | Sampling for adapation and meta-testing... +2025-03-30 07:36:58 | [maml_trainer] epoch #51 | Finished meta-testing... +2025-03-30 07:36:58 | [maml_trainer] epoch #51 | Saving snapshot... +2025-03-30 07:37:16 | [maml_trainer] epoch #51 | Saved +2025-03-30 07:37:16 | [maml_trainer] epoch #51 | Time 62937.84 s +2025-03-30 07:37:16 | [maml_trainer] epoch #51 | EpochTime 1189.70 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.5078 +Average/AverageReturn -67.9598 +Average/Iteration 51 +Average/MaxReturn -53.6169 +Average/MinReturn -99.0024 +Average/NumEpisodes 80 +Average/StdReturn 9.87471 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96079 +GaussianMLPPolicy/KLAfter 0.00303802 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000123951 +GaussianMLPPolicy/LossBefore -7.48038e-09 +GaussianMLPPolicy/dLoss 0.000123944 +Iteration 51 +MetaTest/Average/AverageDiscountedReturn -68.354 +MetaTest/Average/AverageReturn -68.354 +MetaTest/Average/Iteration 51 +MetaTest/Average/MaxReturn -55.8222 +MetaTest/Average/MinReturn -92.862 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7064 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -68.354 +MetaTest/__unnamed_task__/AverageReturn -68.354 +MetaTest/__unnamed_task__/Iteration 51 +MetaTest/__unnamed_task__/MaxReturn -55.8222 +MetaTest/__unnamed_task__/MinReturn -92.862 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7064 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.664e+06 +__unnamed_task__/AverageDiscountedReturn -34.5078 +__unnamed_task__/AverageReturn -67.9598 +__unnamed_task__/Iteration 51 +__unnamed_task__/MaxReturn -53.6169 +__unnamed_task__/MinReturn -99.0024 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.87471 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 07:52:40 | [maml_trainer] epoch #52 | Sampling for adapation and meta-testing... +2025-03-30 07:56:33 | [maml_trainer] epoch #52 | Finished meta-testing... +2025-03-30 07:56:33 | [maml_trainer] epoch #52 | Saving snapshot... +2025-03-30 07:56:52 | [maml_trainer] epoch #52 | Saved +2025-03-30 07:56:52 | [maml_trainer] epoch #52 | Time 64113.72 s +2025-03-30 07:56:52 | [maml_trainer] epoch #52 | EpochTime 1175.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.7591 +Average/AverageReturn -68.4752 +Average/Iteration 52 +Average/MaxReturn -52.0886 +Average/MinReturn -104.947 +Average/NumEpisodes 80 +Average/StdReturn 10.0482 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95934 +GaussianMLPPolicy/KLAfter 0.00231181 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000119713 +GaussianMLPPolicy/LossBefore 1.10269e-09 +GaussianMLPPolicy/dLoss -0.000119712 +Iteration 52 +MetaTest/Average/AverageDiscountedReturn -65.3499 +MetaTest/Average/AverageReturn -65.3499 +MetaTest/Average/Iteration 52 +MetaTest/Average/MaxReturn -53.6891 +MetaTest/Average/MinReturn -95.1861 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.53845 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -65.3499 +MetaTest/__unnamed_task__/AverageReturn -65.3499 +MetaTest/__unnamed_task__/Iteration 52 +MetaTest/__unnamed_task__/MaxReturn -53.6891 +MetaTest/__unnamed_task__/MinReturn -95.1861 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.53845 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.696e+06 +__unnamed_task__/AverageDiscountedReturn -34.7591 +__unnamed_task__/AverageReturn -68.4752 +__unnamed_task__/Iteration 52 +__unnamed_task__/MaxReturn -52.0886 +__unnamed_task__/MinReturn -104.947 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.0482 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 08:12:21 | [maml_trainer] epoch #53 | Sampling for adapation and meta-testing... +2025-03-30 08:16:13 | [maml_trainer] epoch #53 | Finished meta-testing... +2025-03-30 08:16:13 | [maml_trainer] epoch #53 | Saving snapshot... +2025-03-30 08:16:32 | [maml_trainer] epoch #53 | Saved +2025-03-30 08:16:32 | [maml_trainer] epoch #53 | Time 65293.91 s +2025-03-30 08:16:32 | [maml_trainer] epoch #53 | EpochTime 1180.19 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.0202 +Average/AverageReturn -66.4909 +Average/Iteration 53 +Average/MaxReturn -50.67 +Average/MinReturn -89.5265 +Average/NumEpisodes 80 +Average/StdReturn 9.12984 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95614 +GaussianMLPPolicy/KLAfter 0.00212966 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.2078e-05 +GaussianMLPPolicy/LossBefore 1.38581e-08 +GaussianMLPPolicy/dLoss -3.20641e-05 +Iteration 53 +MetaTest/Average/AverageDiscountedReturn -64.8484 +MetaTest/Average/AverageReturn -64.8484 +MetaTest/Average/Iteration 53 +MetaTest/Average/MaxReturn -55.4042 +MetaTest/Average/MinReturn -89.6816 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.08264 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.8484 +MetaTest/__unnamed_task__/AverageReturn -64.8484 +MetaTest/__unnamed_task__/Iteration 53 +MetaTest/__unnamed_task__/MaxReturn -55.4042 +MetaTest/__unnamed_task__/MinReturn -89.6816 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.08264 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.728e+06 +__unnamed_task__/AverageDiscountedReturn -34.0202 +__unnamed_task__/AverageReturn -66.4909 +__unnamed_task__/Iteration 53 +__unnamed_task__/MaxReturn -50.67 +__unnamed_task__/MinReturn -89.5265 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.12984 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 08:32:00 | [maml_trainer] epoch #54 | Sampling for adapation and meta-testing... +2025-03-30 08:35:53 | [maml_trainer] epoch #54 | Finished meta-testing... +2025-03-30 08:35:53 | [maml_trainer] epoch #54 | Saving snapshot... +2025-03-30 08:36:12 | [maml_trainer] epoch #54 | Saved +2025-03-30 08:36:12 | [maml_trainer] epoch #54 | Time 66473.46 s +2025-03-30 08:36:12 | [maml_trainer] epoch #54 | EpochTime 1179.55 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.5405 +Average/AverageReturn -67.1072 +Average/Iteration 54 +Average/MaxReturn -50.0964 +Average/MinReturn -96.8493 +Average/NumEpisodes 80 +Average/StdReturn 8.31513 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95319 +GaussianMLPPolicy/KLAfter 0.00190791 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.6339e-05 +GaussianMLPPolicy/LossBefore 8.55327e-09 +GaussianMLPPolicy/dLoss -2.63304e-05 +Iteration 54 +MetaTest/Average/AverageDiscountedReturn -72.1158 +MetaTest/Average/AverageReturn -72.1158 +MetaTest/Average/Iteration 54 +MetaTest/Average/MaxReturn -54.6623 +MetaTest/Average/MinReturn -96.1283 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3021 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -72.1158 +MetaTest/__unnamed_task__/AverageReturn -72.1158 +MetaTest/__unnamed_task__/Iteration 54 +MetaTest/__unnamed_task__/MaxReturn -54.6623 +MetaTest/__unnamed_task__/MinReturn -96.1283 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3021 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.76e+06 +__unnamed_task__/AverageDiscountedReturn -34.5405 +__unnamed_task__/AverageReturn -67.1072 +__unnamed_task__/Iteration 54 +__unnamed_task__/MaxReturn -50.0964 +__unnamed_task__/MinReturn -96.8493 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.31513 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 08:51:43 | [maml_trainer] epoch #55 | Sampling for adapation and meta-testing... +2025-03-30 08:55:34 | [maml_trainer] epoch #55 | Finished meta-testing... +2025-03-30 08:55:34 | [maml_trainer] epoch #55 | Saving snapshot... +2025-03-30 08:55:52 | [maml_trainer] epoch #55 | Saved +2025-03-30 08:55:52 | [maml_trainer] epoch #55 | Time 67653.97 s +2025-03-30 08:55:52 | [maml_trainer] epoch #55 | EpochTime 1180.50 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.8009 +Average/AverageReturn -68.5233 +Average/Iteration 55 +Average/MaxReturn -52.6985 +Average/MinReturn -102.17 +Average/NumEpisodes 80 +Average/StdReturn 10.6403 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95057 +GaussianMLPPolicy/KLAfter 0.00149358 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.25732e-05 +GaussianMLPPolicy/LossBefore 4.91738e-09 +GaussianMLPPolicy/dLoss 1.25781e-05 +Iteration 55 +MetaTest/Average/AverageDiscountedReturn -68.9306 +MetaTest/Average/AverageReturn -68.9306 +MetaTest/Average/Iteration 55 +MetaTest/Average/MaxReturn -52.5642 +MetaTest/Average/MinReturn -86.9834 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7156 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -68.9306 +MetaTest/__unnamed_task__/AverageReturn -68.9306 +MetaTest/__unnamed_task__/Iteration 55 +MetaTest/__unnamed_task__/MaxReturn -52.5642 +MetaTest/__unnamed_task__/MinReturn -86.9834 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7156 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.792e+06 +__unnamed_task__/AverageDiscountedReturn -34.8009 +__unnamed_task__/AverageReturn -68.5233 +__unnamed_task__/Iteration 55 +__unnamed_task__/MaxReturn -52.6985 +__unnamed_task__/MinReturn -102.17 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.6403 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 09:11:21 | [maml_trainer] epoch #56 | Sampling for adapation and meta-testing... +2025-03-30 09:15:13 | [maml_trainer] epoch #56 | Finished meta-testing... +2025-03-30 09:15:13 | [maml_trainer] epoch #56 | Saving snapshot... +2025-03-30 09:15:32 | [maml_trainer] epoch #56 | Saved +2025-03-30 09:15:32 | [maml_trainer] epoch #56 | Time 68833.73 s +2025-03-30 09:15:32 | [maml_trainer] epoch #56 | EpochTime 1179.76 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.0952 +Average/AverageReturn -66.2796 +Average/Iteration 56 +Average/MaxReturn -49.2654 +Average/MinReturn -100.227 +Average/NumEpisodes 80 +Average/StdReturn 9.77627 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94756 +GaussianMLPPolicy/KLAfter 0.00106588 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.04902e-05 +GaussianMLPPolicy/LossBefore -2.5034e-09 +GaussianMLPPolicy/dLoss -1.04927e-05 +Iteration 56 +MetaTest/Average/AverageDiscountedReturn -69.4817 +MetaTest/Average/AverageReturn -69.4817 +MetaTest/Average/Iteration 56 +MetaTest/Average/MaxReturn -55.7282 +MetaTest/Average/MinReturn -103.015 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.2461 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -69.4817 +MetaTest/__unnamed_task__/AverageReturn -69.4817 +MetaTest/__unnamed_task__/Iteration 56 +MetaTest/__unnamed_task__/MaxReturn -55.7282 +MetaTest/__unnamed_task__/MinReturn -103.015 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.2461 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.824e+06 +__unnamed_task__/AverageDiscountedReturn -34.0952 +__unnamed_task__/AverageReturn -66.2796 +__unnamed_task__/Iteration 56 +__unnamed_task__/MaxReturn -49.2654 +__unnamed_task__/MinReturn -100.227 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.77627 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 09:31:07 | [maml_trainer] epoch #57 | Sampling for adapation and meta-testing... +2025-03-30 09:35:01 | [maml_trainer] epoch #57 | Finished meta-testing... +2025-03-30 09:35:01 | [maml_trainer] epoch #57 | Saving snapshot... +2025-03-30 09:35:21 | [maml_trainer] epoch #57 | Saved +2025-03-30 09:35:21 | [maml_trainer] epoch #57 | Time 70022.54 s +2025-03-30 09:35:21 | [maml_trainer] epoch #57 | EpochTime 1188.81 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.2165 +Average/AverageReturn -66.8332 +Average/Iteration 57 +Average/MaxReturn -50.6638 +Average/MinReturn -103.892 +Average/NumEpisodes 80 +Average/StdReturn 10.9889 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94402 +GaussianMLPPolicy/KLAfter 0.000960415 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.55056e-05 +GaussianMLPPolicy/LossBefore -1.09076e-08 +GaussianMLPPolicy/dLoss 1.54947e-05 +Iteration 57 +MetaTest/Average/AverageDiscountedReturn -66.1783 +MetaTest/Average/AverageReturn -66.1783 +MetaTest/Average/Iteration 57 +MetaTest/Average/MaxReturn -53.2826 +MetaTest/Average/MinReturn -90.8523 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.7308 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -66.1783 +MetaTest/__unnamed_task__/AverageReturn -66.1783 +MetaTest/__unnamed_task__/Iteration 57 +MetaTest/__unnamed_task__/MaxReturn -53.2826 +MetaTest/__unnamed_task__/MinReturn -90.8523 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.7308 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.856e+06 +__unnamed_task__/AverageDiscountedReturn -34.2165 +__unnamed_task__/AverageReturn -66.8332 +__unnamed_task__/Iteration 57 +__unnamed_task__/MaxReturn -50.6638 +__unnamed_task__/MinReturn -103.892 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.9889 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 09:51:15 | [maml_trainer] epoch #58 | Sampling for adapation and meta-testing... +2025-03-30 09:55:20 | [maml_trainer] epoch #58 | Finished meta-testing... +2025-03-30 09:55:20 | [maml_trainer] epoch #58 | Saving snapshot... +2025-03-30 09:55:40 | [maml_trainer] epoch #58 | Saved +2025-03-30 09:55:40 | [maml_trainer] epoch #58 | Time 71241.26 s +2025-03-30 09:55:40 | [maml_trainer] epoch #58 | EpochTime 1218.71 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.9315 +Average/AverageReturn -65.9189 +Average/Iteration 58 +Average/MaxReturn -48.8424 +Average/MinReturn -107.534 +Average/NumEpisodes 80 +Average/StdReturn 11.631 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94289 +GaussianMLPPolicy/KLAfter 0.00122573 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.73295e-05 +GaussianMLPPolicy/LossBefore -5.06639e-10 +GaussianMLPPolicy/dLoss -6.733e-05 +Iteration 58 +MetaTest/Average/AverageDiscountedReturn -67.4854 +MetaTest/Average/AverageReturn -67.4854 +MetaTest/Average/Iteration 58 +MetaTest/Average/MaxReturn -52.5224 +MetaTest/Average/MinReturn -86.0433 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7243 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -67.4854 +MetaTest/__unnamed_task__/AverageReturn -67.4854 +MetaTest/__unnamed_task__/Iteration 58 +MetaTest/__unnamed_task__/MaxReturn -52.5224 +MetaTest/__unnamed_task__/MinReturn -86.0433 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7243 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.888e+06 +__unnamed_task__/AverageDiscountedReturn -33.9315 +__unnamed_task__/AverageReturn -65.9189 +__unnamed_task__/Iteration 58 +__unnamed_task__/MaxReturn -48.8424 +__unnamed_task__/MinReturn -107.534 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.631 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 10:11:32 | [maml_trainer] epoch #59 | Sampling for adapation and meta-testing... +2025-03-30 10:15:30 | [maml_trainer] epoch #59 | Finished meta-testing... +2025-03-30 10:15:30 | [maml_trainer] epoch #59 | Saving snapshot... +2025-03-30 10:15:49 | [maml_trainer] epoch #59 | Saved +2025-03-30 10:15:49 | [maml_trainer] epoch #59 | Time 72450.72 s +2025-03-30 10:15:49 | [maml_trainer] epoch #59 | EpochTime 1209.46 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.0166 +Average/AverageReturn -65.806 +Average/Iteration 59 +Average/MaxReturn -50.3163 +Average/MinReturn -86.7859 +Average/NumEpisodes 80 +Average/StdReturn 8.7453 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94205 +GaussianMLPPolicy/KLAfter 0.00126563 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.43983e-06 +GaussianMLPPolicy/LossBefore 3.01003e-09 +GaussianMLPPolicy/dLoss -4.43682e-06 +Iteration 59 +MetaTest/Average/AverageDiscountedReturn -68.4232 +MetaTest/Average/AverageReturn -68.4232 +MetaTest/Average/Iteration 59 +MetaTest/Average/MaxReturn -51.6871 +MetaTest/Average/MinReturn -101.553 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.6166 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -68.4232 +MetaTest/__unnamed_task__/AverageReturn -68.4232 +MetaTest/__unnamed_task__/Iteration 59 +MetaTest/__unnamed_task__/MaxReturn -51.6871 +MetaTest/__unnamed_task__/MinReturn -101.553 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.6166 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.92e+06 +__unnamed_task__/AverageDiscountedReturn -34.0166 +__unnamed_task__/AverageReturn -65.806 +__unnamed_task__/Iteration 59 +__unnamed_task__/MaxReturn -50.3163 +__unnamed_task__/MinReturn -86.7859 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.7453 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 10:31:56 | [maml_trainer] epoch #60 | Sampling for adapation and meta-testing... +2025-03-30 10:35:50 | [maml_trainer] epoch #60 | Finished meta-testing... +2025-03-30 10:35:50 | [maml_trainer] epoch #60 | Saving snapshot... +2025-03-30 10:36:09 | [maml_trainer] epoch #60 | Saved +2025-03-30 10:36:09 | [maml_trainer] epoch #60 | Time 73670.70 s +2025-03-30 10:36:09 | [maml_trainer] epoch #60 | EpochTime 1219.98 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.9335 +Average/AverageReturn -64.9959 +Average/Iteration 60 +Average/MaxReturn -49.6029 +Average/MinReturn -91.1613 +Average/NumEpisodes 80 +Average/StdReturn 8.50574 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94134 +GaussianMLPPolicy/KLAfter 0.00176932 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.53141e-06 +GaussianMLPPolicy/LossBefore -4.41074e-09 +GaussianMLPPolicy/dLoss 9.527e-06 +Iteration 60 +MetaTest/Average/AverageDiscountedReturn -67.9696 +MetaTest/Average/AverageReturn -67.9696 +MetaTest/Average/Iteration 60 +MetaTest/Average/MaxReturn -50.5714 +MetaTest/Average/MinReturn -88.4776 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.22608 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -67.9696 +MetaTest/__unnamed_task__/AverageReturn -67.9696 +MetaTest/__unnamed_task__/Iteration 60 +MetaTest/__unnamed_task__/MaxReturn -50.5714 +MetaTest/__unnamed_task__/MinReturn -88.4776 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.22608 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.952e+06 +__unnamed_task__/AverageDiscountedReturn -33.9335 +__unnamed_task__/AverageReturn -64.9959 +__unnamed_task__/Iteration 60 +__unnamed_task__/MaxReturn -49.6029 +__unnamed_task__/MinReturn -91.1613 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.50574 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 10:52:07 | [maml_trainer] epoch #61 | Sampling for adapation and meta-testing... +2025-03-30 10:56:05 | [maml_trainer] epoch #61 | Finished meta-testing... +2025-03-30 10:56:05 | [maml_trainer] epoch #61 | Saving snapshot... +2025-03-30 10:56:25 | [maml_trainer] epoch #61 | Saved +2025-03-30 10:56:25 | [maml_trainer] epoch #61 | Time 74886.24 s +2025-03-30 10:56:25 | [maml_trainer] epoch #61 | EpochTime 1215.54 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.7933 +Average/AverageReturn -65.4313 +Average/Iteration 61 +Average/MaxReturn -50.1676 +Average/MinReturn -100.306 +Average/NumEpisodes 80 +Average/StdReturn 9.71002 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9402 +GaussianMLPPolicy/KLAfter 0.00342025 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000139571 +GaussianMLPPolicy/LossBefore 4.64916e-09 +GaussianMLPPolicy/dLoss -0.000139567 +Iteration 61 +MetaTest/Average/AverageDiscountedReturn -62.2821 +MetaTest/Average/AverageReturn -62.2821 +MetaTest/Average/Iteration 61 +MetaTest/Average/MaxReturn -51.2635 +MetaTest/Average/MinReturn -93.7327 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.76574 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.2821 +MetaTest/__unnamed_task__/AverageReturn -62.2821 +MetaTest/__unnamed_task__/Iteration 61 +MetaTest/__unnamed_task__/MaxReturn -51.2635 +MetaTest/__unnamed_task__/MinReturn -93.7327 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.76574 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.984e+06 +__unnamed_task__/AverageDiscountedReturn -33.7933 +__unnamed_task__/AverageReturn -65.4313 +__unnamed_task__/Iteration 61 +__unnamed_task__/MaxReturn -50.1676 +__unnamed_task__/MinReturn -100.306 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.71002 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 11:12:06 | [maml_trainer] epoch #62 | Sampling for adapation and meta-testing... +2025-03-30 11:16:02 | [maml_trainer] epoch #62 | Finished meta-testing... +2025-03-30 11:16:02 | [maml_trainer] epoch #62 | Saving snapshot... +2025-03-30 11:16:22 | [maml_trainer] epoch #62 | Saved +2025-03-30 11:16:22 | [maml_trainer] epoch #62 | Time 76083.09 s +2025-03-30 11:16:22 | [maml_trainer] epoch #62 | EpochTime 1196.85 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.2571 +Average/AverageReturn -66.2137 +Average/Iteration 62 +Average/MaxReturn -47.4374 +Average/MinReturn -105.496 +Average/NumEpisodes 80 +Average/StdReturn 9.26708 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93945 +GaussianMLPPolicy/KLAfter 0.00308344 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.43708e-05 +GaussianMLPPolicy/LossBefore 8.85129e-09 +GaussianMLPPolicy/dLoss 9.43797e-05 +Iteration 62 +MetaTest/Average/AverageDiscountedReturn -64.1902 +MetaTest/Average/AverageReturn -64.1902 +MetaTest/Average/Iteration 62 +MetaTest/Average/MaxReturn -55.0323 +MetaTest/Average/MinReturn -88.1229 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.65618 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.1902 +MetaTest/__unnamed_task__/AverageReturn -64.1902 +MetaTest/__unnamed_task__/Iteration 62 +MetaTest/__unnamed_task__/MaxReturn -55.0323 +MetaTest/__unnamed_task__/MinReturn -88.1229 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.65618 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.016e+06 +__unnamed_task__/AverageDiscountedReturn -34.2571 +__unnamed_task__/AverageReturn -66.2137 +__unnamed_task__/Iteration 62 +__unnamed_task__/MaxReturn -47.4374 +__unnamed_task__/MinReturn -105.496 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.26708 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 11:32:11 | [maml_trainer] epoch #63 | Sampling for adapation and meta-testing... +2025-03-30 11:36:08 | [maml_trainer] epoch #63 | Finished meta-testing... +2025-03-30 11:36:08 | [maml_trainer] epoch #63 | Saving snapshot... +2025-03-30 11:36:27 | [maml_trainer] epoch #63 | Saved +2025-03-30 11:36:27 | [maml_trainer] epoch #63 | Time 77288.89 s +2025-03-30 11:36:27 | [maml_trainer] epoch #63 | EpochTime 1205.80 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.1044 +Average/AverageReturn -65.1778 +Average/Iteration 63 +Average/MaxReturn -51.8389 +Average/MinReturn -97.6694 +Average/NumEpisodes 80 +Average/StdReturn 9.05329 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93816 +GaussianMLPPolicy/KLAfter 0.00198464 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.60297e-05 +GaussianMLPPolicy/LossBefore 1.3113e-09 +GaussianMLPPolicy/dLoss 7.6031e-05 +Iteration 63 +MetaTest/Average/AverageDiscountedReturn -65.7784 +MetaTest/Average/AverageReturn -65.7784 +MetaTest/Average/Iteration 63 +MetaTest/Average/MaxReturn -50.5306 +MetaTest/Average/MinReturn -84.9034 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.28841 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -65.7784 +MetaTest/__unnamed_task__/AverageReturn -65.7784 +MetaTest/__unnamed_task__/Iteration 63 +MetaTest/__unnamed_task__/MaxReturn -50.5306 +MetaTest/__unnamed_task__/MinReturn -84.9034 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.28841 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.048e+06 +__unnamed_task__/AverageDiscountedReturn -34.1044 +__unnamed_task__/AverageReturn -65.1778 +__unnamed_task__/Iteration 63 +__unnamed_task__/MaxReturn -51.8389 +__unnamed_task__/MinReturn -97.6694 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.05329 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 11:52:08 | [maml_trainer] epoch #64 | Sampling for adapation and meta-testing... +2025-03-30 11:56:02 | [maml_trainer] epoch #64 | Finished meta-testing... +2025-03-30 11:56:02 | [maml_trainer] epoch #64 | Saving snapshot... +2025-03-30 11:56:21 | [maml_trainer] epoch #64 | Saved +2025-03-30 11:56:21 | [maml_trainer] epoch #64 | Time 78482.36 s +2025-03-30 11:56:21 | [maml_trainer] epoch #64 | EpochTime 1193.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.3195 +Average/AverageReturn -63.4258 +Average/Iteration 64 +Average/MaxReturn -48.0061 +Average/MinReturn -97.6364 +Average/NumEpisodes 80 +Average/StdReturn 8.72762 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93663 +GaussianMLPPolicy/KLAfter 0.000971522 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.30036e-05 +GaussianMLPPolicy/LossBefore 7.689e-09 +GaussianMLPPolicy/dLoss 2.30113e-05 +Iteration 64 +MetaTest/Average/AverageDiscountedReturn -64.6023 +MetaTest/Average/AverageReturn -64.6023 +MetaTest/Average/Iteration 64 +MetaTest/Average/MaxReturn -54.3463 +MetaTest/Average/MinReturn -83.31 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.32806 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.6023 +MetaTest/__unnamed_task__/AverageReturn -64.6023 +MetaTest/__unnamed_task__/Iteration 64 +MetaTest/__unnamed_task__/MaxReturn -54.3463 +MetaTest/__unnamed_task__/MinReturn -83.31 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.32806 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.08e+06 +__unnamed_task__/AverageDiscountedReturn -33.3195 +__unnamed_task__/AverageReturn -63.4258 +__unnamed_task__/Iteration 64 +__unnamed_task__/MaxReturn -48.0061 +__unnamed_task__/MinReturn -97.6364 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.72762 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 12:11:52 | [maml_trainer] epoch #65 | Sampling for adapation and meta-testing... +2025-03-30 12:15:46 | [maml_trainer] epoch #65 | Finished meta-testing... +2025-03-30 12:15:46 | [maml_trainer] epoch #65 | Saving snapshot... +2025-03-30 12:16:06 | [maml_trainer] epoch #65 | Saved +2025-03-30 12:16:06 | [maml_trainer] epoch #65 | Time 79667.50 s +2025-03-30 12:16:06 | [maml_trainer] epoch #65 | EpochTime 1185.14 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.4383 +Average/AverageReturn -66.0166 +Average/Iteration 65 +Average/MaxReturn -51.111 +Average/MinReturn -102.261 +Average/NumEpisodes 80 +Average/StdReturn 9.23279 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93547 +GaussianMLPPolicy/KLAfter 0.00170985 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.23287e-05 +GaussianMLPPolicy/LossBefore 3.39746e-09 +GaussianMLPPolicy/dLoss 9.23321e-05 +Iteration 65 +MetaTest/Average/AverageDiscountedReturn -61.4679 +MetaTest/Average/AverageReturn -61.4679 +MetaTest/Average/Iteration 65 +MetaTest/Average/MaxReturn -50.9164 +MetaTest/Average/MinReturn -83.765 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.59918 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.4679 +MetaTest/__unnamed_task__/AverageReturn -61.4679 +MetaTest/__unnamed_task__/Iteration 65 +MetaTest/__unnamed_task__/MaxReturn -50.9164 +MetaTest/__unnamed_task__/MinReturn -83.765 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.59918 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.112e+06 +__unnamed_task__/AverageDiscountedReturn -34.4383 +__unnamed_task__/AverageReturn -66.0166 +__unnamed_task__/Iteration 65 +__unnamed_task__/MaxReturn -51.111 +__unnamed_task__/MinReturn -102.261 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.23279 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 12:32:06 | [maml_trainer] epoch #66 | Sampling for adapation and meta-testing... +2025-03-30 12:36:05 | [maml_trainer] epoch #66 | Finished meta-testing... +2025-03-30 12:36:05 | [maml_trainer] epoch #66 | Saving snapshot... +2025-03-30 12:36:25 | [maml_trainer] epoch #66 | Saved +2025-03-30 12:36:25 | [maml_trainer] epoch #66 | Time 80886.15 s +2025-03-30 12:36:25 | [maml_trainer] epoch #66 | EpochTime 1218.65 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.0147 +Average/AverageReturn -65.6605 +Average/Iteration 66 +Average/MaxReturn -51.0911 +Average/MinReturn -103.053 +Average/NumEpisodes 80 +Average/StdReturn 9.92892 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93535 +GaussianMLPPolicy/KLAfter 0.00190514 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.23688e-05 +GaussianMLPPolicy/LossBefore 3.27826e-10 +GaussianMLPPolicy/dLoss -4.23685e-05 +Iteration 66 +MetaTest/Average/AverageDiscountedReturn -64.5053 +MetaTest/Average/AverageReturn -64.5053 +MetaTest/Average/Iteration 66 +MetaTest/Average/MaxReturn -52.6175 +MetaTest/Average/MinReturn -78.0003 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.21188 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.5053 +MetaTest/__unnamed_task__/AverageReturn -64.5053 +MetaTest/__unnamed_task__/Iteration 66 +MetaTest/__unnamed_task__/MaxReturn -52.6175 +MetaTest/__unnamed_task__/MinReturn -78.0003 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.21188 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.144e+06 +__unnamed_task__/AverageDiscountedReturn -34.0147 +__unnamed_task__/AverageReturn -65.6605 +__unnamed_task__/Iteration 66 +__unnamed_task__/MaxReturn -51.0911 +__unnamed_task__/MinReturn -103.053 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.92892 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 12:52:22 | [maml_trainer] epoch #67 | Sampling for adapation and meta-testing... +2025-03-30 12:56:17 | [maml_trainer] epoch #67 | Finished meta-testing... +2025-03-30 12:56:17 | [maml_trainer] epoch #67 | Saving snapshot... +2025-03-30 12:56:37 | [maml_trainer] epoch #67 | Saved +2025-03-30 12:56:37 | [maml_trainer] epoch #67 | Time 82098.58 s +2025-03-30 12:56:37 | [maml_trainer] epoch #67 | EpochTime 1212.43 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.4342 +Average/AverageReturn -63.3142 +Average/Iteration 67 +Average/MaxReturn -50.9793 +Average/MinReturn -89.9246 +Average/NumEpisodes 80 +Average/StdReturn 7.63025 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93606 +GaussianMLPPolicy/KLAfter 0.00226789 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.13591e-05 +GaussianMLPPolicy/LossBefore -2.71201e-09 +GaussianMLPPolicy/dLoss -8.13618e-05 +Iteration 67 +MetaTest/Average/AverageDiscountedReturn -64.0518 +MetaTest/Average/AverageReturn -64.0518 +MetaTest/Average/Iteration 67 +MetaTest/Average/MaxReturn -55.3486 +MetaTest/Average/MinReturn -77.3847 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.20589 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.0518 +MetaTest/__unnamed_task__/AverageReturn -64.0518 +MetaTest/__unnamed_task__/Iteration 67 +MetaTest/__unnamed_task__/MaxReturn -55.3486 +MetaTest/__unnamed_task__/MinReturn -77.3847 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.20589 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.176e+06 +__unnamed_task__/AverageDiscountedReturn -33.4342 +__unnamed_task__/AverageReturn -63.3142 +__unnamed_task__/Iteration 67 +__unnamed_task__/MaxReturn -50.9793 +__unnamed_task__/MinReturn -89.9246 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.63025 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 13:12:39 | [maml_trainer] epoch #68 | Sampling for adapation and meta-testing... +2025-03-30 13:16:45 | [maml_trainer] epoch #68 | Finished meta-testing... +2025-03-30 13:16:45 | [maml_trainer] epoch #68 | Saving snapshot... +2025-03-30 13:17:06 | [maml_trainer] epoch #68 | Saved +2025-03-30 13:17:06 | [maml_trainer] epoch #68 | Time 83327.35 s +2025-03-30 13:17:06 | [maml_trainer] epoch #68 | EpochTime 1228.76 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.1264 +Average/AverageReturn -65.0363 +Average/Iteration 68 +Average/MaxReturn -51.2984 +Average/MinReturn -87.7544 +Average/NumEpisodes 80 +Average/StdReturn 7.41963 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93672 +GaussianMLPPolicy/KLAfter 0.00334071 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.5789e-05 +GaussianMLPPolicy/LossBefore 4.47035e-10 +GaussianMLPPolicy/dLoss 2.57895e-05 +Iteration 68 +MetaTest/Average/AverageDiscountedReturn -62.8084 +MetaTest/Average/AverageReturn -62.8084 +MetaTest/Average/Iteration 68 +MetaTest/Average/MaxReturn -49.2811 +MetaTest/Average/MinReturn -81.4928 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.35533 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.8084 +MetaTest/__unnamed_task__/AverageReturn -62.8084 +MetaTest/__unnamed_task__/Iteration 68 +MetaTest/__unnamed_task__/MaxReturn -49.2811 +MetaTest/__unnamed_task__/MinReturn -81.4928 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.35533 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.208e+06 +__unnamed_task__/AverageDiscountedReturn -34.1264 +__unnamed_task__/AverageReturn -65.0363 +__unnamed_task__/Iteration 68 +__unnamed_task__/MaxReturn -51.2984 +__unnamed_task__/MinReturn -87.7544 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.41963 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 13:33:17 | [maml_trainer] epoch #69 | Sampling for adapation and meta-testing... +2025-03-30 13:37:26 | [maml_trainer] epoch #69 | Finished meta-testing... +2025-03-30 13:37:26 | [maml_trainer] epoch #69 | Saving snapshot... +2025-03-30 13:37:46 | [maml_trainer] epoch #69 | Saved +2025-03-30 13:37:46 | [maml_trainer] epoch #69 | Time 84567.55 s +2025-03-30 13:37:46 | [maml_trainer] epoch #69 | EpochTime 1240.20 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.2248 +Average/AverageReturn -65.1432 +Average/Iteration 69 +Average/MaxReturn -50.1863 +Average/MinReturn -89.5186 +Average/NumEpisodes 80 +Average/StdReturn 8.73927 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93695 +GaussianMLPPolicy/KLAfter 0.00330125 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000103825 +GaussianMLPPolicy/LossBefore 8.04663e-10 +GaussianMLPPolicy/dLoss 0.000103826 +Iteration 69 +MetaTest/Average/AverageDiscountedReturn -64.7955 +MetaTest/Average/AverageReturn -64.7955 +MetaTest/Average/Iteration 69 +MetaTest/Average/MaxReturn -54.049 +MetaTest/Average/MinReturn -74.3904 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.4348 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.7955 +MetaTest/__unnamed_task__/AverageReturn -64.7955 +MetaTest/__unnamed_task__/Iteration 69 +MetaTest/__unnamed_task__/MaxReturn -54.049 +MetaTest/__unnamed_task__/MinReturn -74.3904 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.4348 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.24e+06 +__unnamed_task__/AverageDiscountedReturn -34.2248 +__unnamed_task__/AverageReturn -65.1432 +__unnamed_task__/Iteration 69 +__unnamed_task__/MaxReturn -50.1863 +__unnamed_task__/MinReturn -89.5186 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.73927 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 13:54:11 | [maml_trainer] epoch #70 | Sampling for adapation and meta-testing... +2025-03-30 13:58:20 | [maml_trainer] epoch #70 | Finished meta-testing... +2025-03-30 13:58:20 | [maml_trainer] epoch #70 | Saving snapshot... +2025-03-30 13:58:41 | [maml_trainer] epoch #70 | Saved +2025-03-30 13:58:41 | [maml_trainer] epoch #70 | Time 85822.05 s +2025-03-30 13:58:41 | [maml_trainer] epoch #70 | EpochTime 1254.49 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -34.1954 +Average/AverageReturn -64.6576 +Average/Iteration 70 +Average/MaxReturn -50.4498 +Average/MinReturn -95.1104 +Average/NumEpisodes 80 +Average/StdReturn 9.37763 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93732 +GaussianMLPPolicy/KLAfter 0.00492415 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.90012e-05 +GaussianMLPPolicy/LossBefore -6.25849e-09 +GaussianMLPPolicy/dLoss -9.90074e-05 +Iteration 70 +MetaTest/Average/AverageDiscountedReturn -63.7613 +MetaTest/Average/AverageReturn -63.7613 +MetaTest/Average/Iteration 70 +MetaTest/Average/MaxReturn -46.34 +MetaTest/Average/MinReturn -83.7508 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.86334 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.7613 +MetaTest/__unnamed_task__/AverageReturn -63.7613 +MetaTest/__unnamed_task__/Iteration 70 +MetaTest/__unnamed_task__/MaxReturn -46.34 +MetaTest/__unnamed_task__/MinReturn -83.7508 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.86334 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.272e+06 +__unnamed_task__/AverageDiscountedReturn -34.1954 +__unnamed_task__/AverageReturn -64.6576 +__unnamed_task__/Iteration 70 +__unnamed_task__/MaxReturn -50.4498 +__unnamed_task__/MinReturn -95.1104 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.37763 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 14:14:47 | [maml_trainer] epoch #71 | Sampling for adapation and meta-testing... +2025-03-30 14:18:42 | [maml_trainer] epoch #71 | Finished meta-testing... +2025-03-30 14:18:42 | [maml_trainer] epoch #71 | Saving snapshot... +2025-03-30 14:19:02 | [maml_trainer] epoch #71 | Saved +2025-03-30 14:19:02 | [maml_trainer] epoch #71 | Time 87043.21 s +2025-03-30 14:19:02 | [maml_trainer] epoch #71 | EpochTime 1221.16 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.1689 +Average/AverageReturn -61.9819 +Average/Iteration 71 +Average/MaxReturn -49.1487 +Average/MinReturn -82.5284 +Average/NumEpisodes 80 +Average/StdReturn 6.9291 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93799 +GaussianMLPPolicy/KLAfter 0.00438343 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000176979 +GaussianMLPPolicy/LossBefore 5.42402e-09 +GaussianMLPPolicy/dLoss 0.000176984 +Iteration 71 +MetaTest/Average/AverageDiscountedReturn -66.6126 +MetaTest/Average/AverageReturn -66.6126 +MetaTest/Average/Iteration 71 +MetaTest/Average/MaxReturn -49.9948 +MetaTest/Average/MinReturn -93.0298 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.01812 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -66.6126 +MetaTest/__unnamed_task__/AverageReturn -66.6126 +MetaTest/__unnamed_task__/Iteration 71 +MetaTest/__unnamed_task__/MaxReturn -49.9948 +MetaTest/__unnamed_task__/MinReturn -93.0298 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.01812 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.304e+06 +__unnamed_task__/AverageDiscountedReturn -33.1689 +__unnamed_task__/AverageReturn -61.9819 +__unnamed_task__/Iteration 71 +__unnamed_task__/MaxReturn -49.1487 +__unnamed_task__/MinReturn -82.5284 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.9291 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 14:34:42 | [maml_trainer] epoch #72 | Sampling for adapation and meta-testing... +2025-03-30 14:38:42 | [maml_trainer] epoch #72 | Finished meta-testing... +2025-03-30 14:38:42 | [maml_trainer] epoch #72 | Saving snapshot... +2025-03-30 14:39:03 | [maml_trainer] epoch #72 | Saved +2025-03-30 14:39:03 | [maml_trainer] epoch #72 | Time 88244.90 s +2025-03-30 14:39:03 | [maml_trainer] epoch #72 | EpochTime 1201.68 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.8457 +Average/AverageReturn -63.4193 +Average/Iteration 72 +Average/MaxReturn -49.5346 +Average/MinReturn -83.2256 +Average/NumEpisodes 80 +Average/StdReturn 7.91745 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94029 +GaussianMLPPolicy/KLAfter 0.00359485 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.7039e-05 +GaussianMLPPolicy/LossBefore -5.54323e-09 +GaussianMLPPolicy/dLoss 6.70334e-05 +Iteration 72 +MetaTest/Average/AverageDiscountedReturn -61.7612 +MetaTest/Average/AverageReturn -61.7612 +MetaTest/Average/Iteration 72 +MetaTest/Average/MaxReturn -55.0847 +MetaTest/Average/MinReturn -72.0559 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.94522 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.7612 +MetaTest/__unnamed_task__/AverageReturn -61.7612 +MetaTest/__unnamed_task__/Iteration 72 +MetaTest/__unnamed_task__/MaxReturn -55.0847 +MetaTest/__unnamed_task__/MinReturn -72.0559 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.94522 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.336e+06 +__unnamed_task__/AverageDiscountedReturn -33.8457 +__unnamed_task__/AverageReturn -63.4193 +__unnamed_task__/Iteration 72 +__unnamed_task__/MaxReturn -49.5346 +__unnamed_task__/MinReturn -83.2256 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.91745 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 14:55:19 | [maml_trainer] epoch #73 | Sampling for adapation and meta-testing... +2025-03-30 14:59:30 | [maml_trainer] epoch #73 | Finished meta-testing... +2025-03-30 14:59:30 | [maml_trainer] epoch #73 | Saving snapshot... +2025-03-30 14:59:51 | [maml_trainer] epoch #73 | Saved +2025-03-30 14:59:51 | [maml_trainer] epoch #73 | Time 89492.32 s +2025-03-30 14:59:51 | [maml_trainer] epoch #73 | EpochTime 1247.42 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.9808 +Average/AverageReturn -64.3584 +Average/Iteration 73 +Average/MaxReturn -50.1435 +Average/MinReturn -82.0638 +Average/NumEpisodes 80 +Average/StdReturn 7.73637 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94258 +GaussianMLPPolicy/KLAfter 0.00340006 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000127603 +GaussianMLPPolicy/LossBefore 2.5034e-09 +GaussianMLPPolicy/dLoss -0.0001276 +Iteration 73 +MetaTest/Average/AverageDiscountedReturn -61.5196 +MetaTest/Average/AverageReturn -61.5196 +MetaTest/Average/Iteration 73 +MetaTest/Average/MaxReturn -52.8053 +MetaTest/Average/MinReturn -78.7406 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.51418 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.5196 +MetaTest/__unnamed_task__/AverageReturn -61.5196 +MetaTest/__unnamed_task__/Iteration 73 +MetaTest/__unnamed_task__/MaxReturn -52.8053 +MetaTest/__unnamed_task__/MinReturn -78.7406 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.51418 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.368e+06 +__unnamed_task__/AverageDiscountedReturn -33.9808 +__unnamed_task__/AverageReturn -64.3584 +__unnamed_task__/Iteration 73 +__unnamed_task__/MaxReturn -50.1435 +__unnamed_task__/MinReturn -82.0638 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.73637 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 15:16:20 | [maml_trainer] epoch #74 | Sampling for adapation and meta-testing... +2025-03-30 15:20:32 | [maml_trainer] epoch #74 | Finished meta-testing... +2025-03-30 15:20:32 | [maml_trainer] epoch #74 | Saving snapshot... +2025-03-30 15:20:54 | [maml_trainer] epoch #74 | Saved +2025-03-30 15:20:54 | [maml_trainer] epoch #74 | Time 90755.03 s +2025-03-30 15:20:54 | [maml_trainer] epoch #74 | EpochTime 1262.70 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.6857 +Average/AverageReturn -60.9396 +Average/Iteration 74 +Average/MaxReturn -48.9403 +Average/MinReturn -76.2946 +Average/NumEpisodes 80 +Average/StdReturn 6.42624 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94528 +GaussianMLPPolicy/KLAfter 0.00527487 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000125901 +GaussianMLPPolicy/LossBefore -6.4671e-09 +GaussianMLPPolicy/dLoss 0.000125895 +Iteration 74 +MetaTest/Average/AverageDiscountedReturn -63.5884 +MetaTest/Average/AverageReturn -63.5884 +MetaTest/Average/Iteration 74 +MetaTest/Average/MaxReturn -50.6763 +MetaTest/Average/MinReturn -83.4865 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.75213 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.5884 +MetaTest/__unnamed_task__/AverageReturn -63.5884 +MetaTest/__unnamed_task__/Iteration 74 +MetaTest/__unnamed_task__/MaxReturn -50.6763 +MetaTest/__unnamed_task__/MinReturn -83.4865 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.75213 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.4e+06 +__unnamed_task__/AverageDiscountedReturn -32.6857 +__unnamed_task__/AverageReturn -60.9396 +__unnamed_task__/Iteration 74 +__unnamed_task__/MaxReturn -48.9403 +__unnamed_task__/MinReturn -76.2946 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.42624 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 15:37:41 | [maml_trainer] epoch #75 | Sampling for adapation and meta-testing... +2025-03-30 15:41:36 | [maml_trainer] epoch #75 | Finished meta-testing... +2025-03-30 15:41:36 | [maml_trainer] epoch #75 | Saving snapshot... +2025-03-30 15:41:56 | [maml_trainer] epoch #75 | Saved +2025-03-30 15:41:56 | [maml_trainer] epoch #75 | Time 92017.90 s +2025-03-30 15:41:56 | [maml_trainer] epoch #75 | EpochTime 1262.87 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.1432 +Average/AverageReturn -61.8696 +Average/Iteration 75 +Average/MaxReturn -49.543 +Average/MinReturn -83.5658 +Average/NumEpisodes 80 +Average/StdReturn 7.23785 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9489 +GaussianMLPPolicy/KLAfter 0.00469652 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.93866e-05 +GaussianMLPPolicy/LossBefore -6.28829e-09 +GaussianMLPPolicy/dLoss 3.93803e-05 +Iteration 75 +MetaTest/Average/AverageDiscountedReturn -61.9759 +MetaTest/Average/AverageReturn -61.9759 +MetaTest/Average/Iteration 75 +MetaTest/Average/MaxReturn -51.1832 +MetaTest/Average/MinReturn -79.6995 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.65275 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.9759 +MetaTest/__unnamed_task__/AverageReturn -61.9759 +MetaTest/__unnamed_task__/Iteration 75 +MetaTest/__unnamed_task__/MaxReturn -51.1832 +MetaTest/__unnamed_task__/MinReturn -79.6995 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.65275 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.432e+06 +__unnamed_task__/AverageDiscountedReturn -33.1432 +__unnamed_task__/AverageReturn -61.8696 +__unnamed_task__/Iteration 75 +__unnamed_task__/MaxReturn -49.543 +__unnamed_task__/MinReturn -83.5658 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.23785 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 15:58:20 | [maml_trainer] epoch #76 | Sampling for adapation and meta-testing... +2025-03-30 16:02:32 | [maml_trainer] epoch #76 | Finished meta-testing... +2025-03-30 16:02:32 | [maml_trainer] epoch #76 | Saving snapshot... +2025-03-30 16:02:53 | [maml_trainer] epoch #76 | Saved +2025-03-30 16:02:53 | [maml_trainer] epoch #76 | Time 93274.72 s +2025-03-30 16:02:53 | [maml_trainer] epoch #76 | EpochTime 1256.81 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.0204 +Average/AverageReturn -61.4188 +Average/Iteration 76 +Average/MaxReturn -47.174 +Average/MinReturn -83.6685 +Average/NumEpisodes 80 +Average/StdReturn 6.24937 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95326 +GaussianMLPPolicy/KLAfter 0.00414861 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.85423e-05 +GaussianMLPPolicy/LossBefore -4.17232e-10 +GaussianMLPPolicy/dLoss 1.85418e-05 +Iteration 76 +MetaTest/Average/AverageDiscountedReturn -61.6905 +MetaTest/Average/AverageReturn -61.6905 +MetaTest/Average/Iteration 76 +MetaTest/Average/MaxReturn -51.3162 +MetaTest/Average/MinReturn -73.6799 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.27246 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.6905 +MetaTest/__unnamed_task__/AverageReturn -61.6905 +MetaTest/__unnamed_task__/Iteration 76 +MetaTest/__unnamed_task__/MaxReturn -51.3162 +MetaTest/__unnamed_task__/MinReturn -73.6799 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.27246 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.464e+06 +__unnamed_task__/AverageDiscountedReturn -33.0204 +__unnamed_task__/AverageReturn -61.4188 +__unnamed_task__/Iteration 76 +__unnamed_task__/MaxReturn -47.174 +__unnamed_task__/MinReturn -83.6685 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.24937 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 16:19:37 | [maml_trainer] epoch #77 | Sampling for adapation and meta-testing... +2025-03-30 16:23:44 | [maml_trainer] epoch #77 | Finished meta-testing... +2025-03-30 16:23:44 | [maml_trainer] epoch #77 | Saving snapshot... +2025-03-30 16:24:03 | [maml_trainer] epoch #77 | Saved +2025-03-30 16:24:03 | [maml_trainer] epoch #77 | Time 94544.78 s +2025-03-30 16:24:03 | [maml_trainer] epoch #77 | EpochTime 1270.06 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.1265 +Average/AverageReturn -61.5727 +Average/Iteration 77 +Average/MaxReturn -50.6495 +Average/MinReturn -99.6623 +Average/NumEpisodes 80 +Average/StdReturn 8.17349 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95794 +GaussianMLPPolicy/KLAfter 0.00274956 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.88505e-05 +GaussianMLPPolicy/LossBefore -6.61612e-09 +GaussianMLPPolicy/dLoss -3.88572e-05 +Iteration 77 +MetaTest/Average/AverageDiscountedReturn -59.6188 +MetaTest/Average/AverageReturn -59.6188 +MetaTest/Average/Iteration 77 +MetaTest/Average/MaxReturn -49.7942 +MetaTest/Average/MinReturn -69.9023 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.60876 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.6188 +MetaTest/__unnamed_task__/AverageReturn -59.6188 +MetaTest/__unnamed_task__/Iteration 77 +MetaTest/__unnamed_task__/MaxReturn -49.7942 +MetaTest/__unnamed_task__/MinReturn -69.9023 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.60876 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.496e+06 +__unnamed_task__/AverageDiscountedReturn -33.1265 +__unnamed_task__/AverageReturn -61.5727 +__unnamed_task__/Iteration 77 +__unnamed_task__/MaxReturn -50.6495 +__unnamed_task__/MinReturn -99.6623 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.17349 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 16:40:53 | [maml_trainer] epoch #78 | Sampling for adapation and meta-testing... +2025-03-30 16:45:06 | [maml_trainer] epoch #78 | Finished meta-testing... +2025-03-30 16:45:06 | [maml_trainer] epoch #78 | Saving snapshot... +2025-03-30 16:45:27 | [maml_trainer] epoch #78 | Saved +2025-03-30 16:45:27 | [maml_trainer] epoch #78 | Time 95828.48 s +2025-03-30 16:45:27 | [maml_trainer] epoch #78 | EpochTime 1283.69 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.6466 +Average/AverageReturn -60.2834 +Average/Iteration 78 +Average/MaxReturn -49.1154 +Average/MinReturn -81.8633 +Average/NumEpisodes 80 +Average/StdReturn 7.3201 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96257 +GaussianMLPPolicy/KLAfter 0.00293399 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.20849e-05 +GaussianMLPPolicy/LossBefore -7.74861e-10 +GaussianMLPPolicy/dLoss 5.20841e-05 +Iteration 78 +MetaTest/Average/AverageDiscountedReturn -62.9608 +MetaTest/Average/AverageReturn -62.9608 +MetaTest/Average/Iteration 78 +MetaTest/Average/MaxReturn -49.9984 +MetaTest/Average/MinReturn -75.3052 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.07686 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.9608 +MetaTest/__unnamed_task__/AverageReturn -62.9608 +MetaTest/__unnamed_task__/Iteration 78 +MetaTest/__unnamed_task__/MaxReturn -49.9984 +MetaTest/__unnamed_task__/MinReturn -75.3052 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.07686 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.528e+06 +__unnamed_task__/AverageDiscountedReturn -32.6466 +__unnamed_task__/AverageReturn -60.2834 +__unnamed_task__/Iteration 78 +__unnamed_task__/MaxReturn -49.1154 +__unnamed_task__/MinReturn -81.8633 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.3201 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 17:02:18 | [maml_trainer] epoch #79 | Sampling for adapation and meta-testing... +2025-03-30 17:06:34 | [maml_trainer] epoch #79 | Finished meta-testing... +2025-03-30 17:06:34 | [maml_trainer] epoch #79 | Saving snapshot... +2025-03-30 17:06:56 | [maml_trainer] epoch #79 | Saved +2025-03-30 17:06:56 | [maml_trainer] epoch #79 | Time 97117.46 s +2025-03-30 17:06:56 | [maml_trainer] epoch #79 | EpochTime 1288.97 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.7902 +Average/AverageReturn -60.5687 +Average/Iteration 79 +Average/MaxReturn -47.7739 +Average/MinReturn -77.1387 +Average/NumEpisodes 80 +Average/StdReturn 5.69763 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96649 +GaussianMLPPolicy/KLAfter 0.00257219 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.38006e-05 +GaussianMLPPolicy/LossBefore -2.6226e-09 +GaussianMLPPolicy/dLoss 2.3798e-05 +Iteration 79 +MetaTest/Average/AverageDiscountedReturn -58.477 +MetaTest/Average/AverageReturn -58.477 +MetaTest/Average/Iteration 79 +MetaTest/Average/MaxReturn -50.0316 +MetaTest/Average/MinReturn -69.832 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.21383 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.477 +MetaTest/__unnamed_task__/AverageReturn -58.477 +MetaTest/__unnamed_task__/Iteration 79 +MetaTest/__unnamed_task__/MaxReturn -50.0316 +MetaTest/__unnamed_task__/MinReturn -69.832 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.21383 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.56e+06 +__unnamed_task__/AverageDiscountedReturn -32.7902 +__unnamed_task__/AverageReturn -60.5687 +__unnamed_task__/Iteration 79 +__unnamed_task__/MaxReturn -47.7739 +__unnamed_task__/MinReturn -77.1387 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.69763 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 17:24:12 | [maml_trainer] epoch #80 | Sampling for adapation and meta-testing... +2025-03-30 17:28:32 | [maml_trainer] epoch #80 | Finished meta-testing... +2025-03-30 17:28:32 | [maml_trainer] epoch #80 | Saving snapshot... +2025-03-30 17:28:54 | [maml_trainer] epoch #80 | Saved +2025-03-30 17:28:54 | [maml_trainer] epoch #80 | Time 98435.35 s +2025-03-30 17:28:54 | [maml_trainer] epoch #80 | EpochTime 1317.89 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -32.9693 +Average/AverageReturn -61.9965 +Average/Iteration 80 +Average/MaxReturn -50.8572 +Average/MinReturn -152.702 +Average/NumEpisodes 80 +Average/StdReturn 12.0068 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97064 +GaussianMLPPolicy/KLAfter 0.0012685 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.31344e-06 +GaussianMLPPolicy/LossBefore -1.23084e-08 +GaussianMLPPolicy/dLoss -4.32575e-06 +Iteration 80 +MetaTest/Average/AverageDiscountedReturn -61.7784 +MetaTest/Average/AverageReturn -61.7784 +MetaTest/Average/Iteration 80 +MetaTest/Average/MaxReturn -52.2634 +MetaTest/Average/MinReturn -70.3841 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.45006 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.7784 +MetaTest/__unnamed_task__/AverageReturn -61.7784 +MetaTest/__unnamed_task__/Iteration 80 +MetaTest/__unnamed_task__/MaxReturn -52.2634 +MetaTest/__unnamed_task__/MinReturn -70.3841 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.45006 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.592e+06 +__unnamed_task__/AverageDiscountedReturn -32.9693 +__unnamed_task__/AverageReturn -61.9965 +__unnamed_task__/Iteration 80 +__unnamed_task__/MaxReturn -50.8572 +__unnamed_task__/MinReturn -152.702 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.0068 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 17:45:28 | [maml_trainer] epoch #81 | Sampling for adapation and meta-testing... +2025-03-30 17:49:26 | [maml_trainer] epoch #81 | Finished meta-testing... +2025-03-30 17:49:26 | [maml_trainer] epoch #81 | Saving snapshot... +2025-03-30 17:49:46 | [maml_trainer] epoch #81 | Saved +2025-03-30 17:49:46 | [maml_trainer] epoch #81 | Time 99687.21 s +2025-03-30 17:49:46 | [maml_trainer] epoch #81 | EpochTime 1251.85 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -32.205 +Average/AverageReturn -60.7222 +Average/Iteration 81 +Average/MaxReturn -47.4544 +Average/MinReturn -191.529 +Average/NumEpisodes 80 +Average/StdReturn 16.8669 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97521 +GaussianMLPPolicy/KLAfter 0.00140527 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.1204e-05 +GaussianMLPPolicy/LossBefore 6.04987e-09 +GaussianMLPPolicy/dLoss 3.121e-05 +Iteration 81 +MetaTest/Average/AverageDiscountedReturn -57.4348 +MetaTest/Average/AverageReturn -57.4348 +MetaTest/Average/Iteration 81 +MetaTest/Average/MaxReturn -49.1327 +MetaTest/Average/MinReturn -67.9705 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.77576 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.4348 +MetaTest/__unnamed_task__/AverageReturn -57.4348 +MetaTest/__unnamed_task__/Iteration 81 +MetaTest/__unnamed_task__/MaxReturn -49.1327 +MetaTest/__unnamed_task__/MinReturn -67.9705 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.77576 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.624e+06 +__unnamed_task__/AverageDiscountedReturn -32.205 +__unnamed_task__/AverageReturn -60.7222 +__unnamed_task__/Iteration 81 +__unnamed_task__/MaxReturn -47.4544 +__unnamed_task__/MinReturn -191.529 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.8669 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 18:06:27 | [maml_trainer] epoch #82 | Sampling for adapation and meta-testing... +2025-03-30 18:10:28 | [maml_trainer] epoch #82 | Finished meta-testing... +2025-03-30 18:10:28 | [maml_trainer] epoch #82 | Saving snapshot... +2025-03-30 18:10:48 | [maml_trainer] epoch #82 | Saved +2025-03-30 18:10:48 | [maml_trainer] epoch #82 | Time 100949.67 s +2025-03-30 18:10:48 | [maml_trainer] epoch #82 | EpochTime 1262.46 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.002 +Average/AverageReturn -58.4809 +Average/Iteration 82 +Average/MaxReturn -48.1626 +Average/MinReturn -73.3478 +Average/NumEpisodes 80 +Average/StdReturn 5.23677 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98002 +GaussianMLPPolicy/KLAfter 0.00142698 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.35533e-05 +GaussianMLPPolicy/LossBefore -3.21865e-09 +GaussianMLPPolicy/dLoss 2.35501e-05 +Iteration 82 +MetaTest/Average/AverageDiscountedReturn -60.544 +MetaTest/Average/AverageReturn -60.544 +MetaTest/Average/Iteration 82 +MetaTest/Average/MaxReturn -47.9726 +MetaTest/Average/MinReturn -74.5236 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.318 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.544 +MetaTest/__unnamed_task__/AverageReturn -60.544 +MetaTest/__unnamed_task__/Iteration 82 +MetaTest/__unnamed_task__/MaxReturn -47.9726 +MetaTest/__unnamed_task__/MinReturn -74.5236 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.318 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.656e+06 +__unnamed_task__/AverageDiscountedReturn -32.002 +__unnamed_task__/AverageReturn -58.4809 +__unnamed_task__/Iteration 82 +__unnamed_task__/MaxReturn -48.1626 +__unnamed_task__/MinReturn -73.3478 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.23677 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 18:27:48 | [maml_trainer] epoch #83 | Sampling for adapation and meta-testing... +2025-03-30 18:32:02 | [maml_trainer] epoch #83 | Finished meta-testing... +2025-03-30 18:32:02 | [maml_trainer] epoch #83 | Saving snapshot... +2025-03-30 18:32:24 | [maml_trainer] epoch #83 | Saved +2025-03-30 18:32:24 | [maml_trainer] epoch #83 | Time 102245.60 s +2025-03-30 18:32:24 | [maml_trainer] epoch #83 | EpochTime 1295.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -32.9434 +Average/AverageReturn -61.6929 +Average/Iteration 83 +Average/MaxReturn -47.8508 +Average/MinReturn -116.948 +Average/NumEpisodes 80 +Average/StdReturn 10.0472 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98392 +GaussianMLPPolicy/KLAfter 0.00221333 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.02207e-06 +GaussianMLPPolicy/LossBefore 2.41399e-09 +GaussianMLPPolicy/dLoss -6.01965e-06 +Iteration 83 +MetaTest/Average/AverageDiscountedReturn -57.0571 +MetaTest/Average/AverageReturn -57.0571 +MetaTest/Average/Iteration 83 +MetaTest/Average/MaxReturn -48.0341 +MetaTest/Average/MinReturn -67.7459 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.02884 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.0571 +MetaTest/__unnamed_task__/AverageReturn -57.0571 +MetaTest/__unnamed_task__/Iteration 83 +MetaTest/__unnamed_task__/MaxReturn -48.0341 +MetaTest/__unnamed_task__/MinReturn -67.7459 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.02884 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.688e+06 +__unnamed_task__/AverageDiscountedReturn -32.9434 +__unnamed_task__/AverageReturn -61.6929 +__unnamed_task__/Iteration 83 +__unnamed_task__/MaxReturn -47.8508 +__unnamed_task__/MinReturn -116.948 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.0472 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 18:49:00 | [maml_trainer] epoch #84 | Sampling for adapation and meta-testing... +2025-03-30 18:53:15 | [maml_trainer] epoch #84 | Finished meta-testing... +2025-03-30 18:53:15 | [maml_trainer] epoch #84 | Saving snapshot... +2025-03-30 18:53:36 | [maml_trainer] epoch #84 | Saved +2025-03-30 18:53:36 | [maml_trainer] epoch #84 | Time 103517.68 s +2025-03-30 18:53:36 | [maml_trainer] epoch #84 | EpochTime 1272.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.9231 +Average/AverageReturn -58.9367 +Average/Iteration 84 +Average/MaxReturn -47.0715 +Average/MinReturn -100.457 +Average/NumEpisodes 80 +Average/StdReturn 7.9777 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9865 +GaussianMLPPolicy/KLAfter 0.00169647 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.97684e-05 +GaussianMLPPolicy/LossBefore 6.67572e-09 +GaussianMLPPolicy/dLoss 4.97751e-05 +Iteration 84 +MetaTest/Average/AverageDiscountedReturn -70.4637 +MetaTest/Average/AverageReturn -70.4637 +MetaTest/Average/Iteration 84 +MetaTest/Average/MaxReturn -48.9245 +MetaTest/Average/MinReturn -135.352 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.0467 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -70.4637 +MetaTest/__unnamed_task__/AverageReturn -70.4637 +MetaTest/__unnamed_task__/Iteration 84 +MetaTest/__unnamed_task__/MaxReturn -48.9245 +MetaTest/__unnamed_task__/MinReturn -135.352 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.0467 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.72e+06 +__unnamed_task__/AverageDiscountedReturn -31.9231 +__unnamed_task__/AverageReturn -58.9367 +__unnamed_task__/Iteration 84 +__unnamed_task__/MaxReturn -47.0715 +__unnamed_task__/MinReturn -100.457 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.9777 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 19:10:33 | [maml_trainer] epoch #85 | Sampling for adapation and meta-testing... +2025-03-30 19:14:39 | [maml_trainer] epoch #85 | Finished meta-testing... +2025-03-30 19:14:39 | [maml_trainer] epoch #85 | Saving snapshot... +2025-03-30 19:14:59 | [maml_trainer] epoch #85 | Saved +2025-03-30 19:14:59 | [maml_trainer] epoch #85 | Time 104800.37 s +2025-03-30 19:14:59 | [maml_trainer] epoch #85 | EpochTime 1282.68 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -32.6483 +Average/AverageReturn -63.2898 +Average/Iteration 85 +Average/MaxReturn -46.2128 +Average/MinReturn -120.302 +Average/NumEpisodes 80 +Average/StdReturn 15.0633 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98844 +GaussianMLPPolicy/KLAfter 0.00111315 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.42917e-06 +GaussianMLPPolicy/LossBefore 2.05636e-09 +GaussianMLPPolicy/dLoss 8.43123e-06 +Iteration 85 +MetaTest/Average/AverageDiscountedReturn -63.8157 +MetaTest/Average/AverageReturn -63.8157 +MetaTest/Average/Iteration 85 +MetaTest/Average/MaxReturn -48.0983 +MetaTest/Average/MinReturn -143.11 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.3101 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.8157 +MetaTest/__unnamed_task__/AverageReturn -63.8157 +MetaTest/__unnamed_task__/Iteration 85 +MetaTest/__unnamed_task__/MaxReturn -48.0983 +MetaTest/__unnamed_task__/MinReturn -143.11 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.3101 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.752e+06 +__unnamed_task__/AverageDiscountedReturn -32.6483 +__unnamed_task__/AverageReturn -63.2898 +__unnamed_task__/Iteration 85 +__unnamed_task__/MaxReturn -46.2128 +__unnamed_task__/MinReturn -120.302 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.0633 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 19:31:26 | [maml_trainer] epoch #86 | Sampling for adapation and meta-testing... +2025-03-30 19:35:29 | [maml_trainer] epoch #86 | Finished meta-testing... +2025-03-30 19:35:29 | [maml_trainer] epoch #86 | Saving snapshot... +2025-03-30 19:35:50 | [maml_trainer] epoch #86 | Saved +2025-03-30 19:35:50 | [maml_trainer] epoch #86 | Time 106051.24 s +2025-03-30 19:35:50 | [maml_trainer] epoch #86 | EpochTime 1250.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.2819 +Average/AverageReturn -65.372 +Average/Iteration 86 +Average/MaxReturn -46.4866 +Average/MinReturn -166.869 +Average/NumEpisodes 80 +Average/StdReturn 20.0159 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99138 +GaussianMLPPolicy/KLAfter 0.00246301 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.68141e-05 +GaussianMLPPolicy/LossBefore -2.92063e-09 +GaussianMLPPolicy/dLoss 5.68112e-05 +Iteration 86 +MetaTest/Average/AverageDiscountedReturn -60.0029 +MetaTest/Average/AverageReturn -60.0029 +MetaTest/Average/Iteration 86 +MetaTest/Average/MaxReturn -50.0357 +MetaTest/Average/MinReturn -80.9241 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.00107 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.0029 +MetaTest/__unnamed_task__/AverageReturn -60.0029 +MetaTest/__unnamed_task__/Iteration 86 +MetaTest/__unnamed_task__/MaxReturn -50.0357 +MetaTest/__unnamed_task__/MinReturn -80.9241 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.00107 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.784e+06 +__unnamed_task__/AverageDiscountedReturn -33.2819 +__unnamed_task__/AverageReturn -65.372 +__unnamed_task__/Iteration 86 +__unnamed_task__/MaxReturn -46.4866 +__unnamed_task__/MinReturn -166.869 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.0159 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 19:52:42 | [maml_trainer] epoch #87 | Sampling for adapation and meta-testing... +2025-03-30 19:56:47 | [maml_trainer] epoch #87 | Finished meta-testing... +2025-03-30 19:56:47 | [maml_trainer] epoch #87 | Saving snapshot... +2025-03-30 19:57:07 | [maml_trainer] epoch #87 | Saved +2025-03-30 19:57:07 | [maml_trainer] epoch #87 | Time 107328.41 s +2025-03-30 19:57:07 | [maml_trainer] epoch #87 | EpochTime 1277.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -34.5858 +Average/AverageReturn -70.9394 +Average/Iteration 87 +Average/MaxReturn -49.6997 +Average/MinReturn -160.068 +Average/NumEpisodes 80 +Average/StdReturn 26.2002 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99314 +GaussianMLPPolicy/KLAfter 0.00292993 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.0321e-05 +GaussianMLPPolicy/LossBefore 9.53674e-10 +GaussianMLPPolicy/dLoss -7.032e-05 +Iteration 87 +MetaTest/Average/AverageDiscountedReturn -65.9769 +MetaTest/Average/AverageReturn -65.9769 +MetaTest/Average/Iteration 87 +MetaTest/Average/MaxReturn -52.8243 +MetaTest/Average/MinReturn -107.228 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.6411 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -65.9769 +MetaTest/__unnamed_task__/AverageReturn -65.9769 +MetaTest/__unnamed_task__/Iteration 87 +MetaTest/__unnamed_task__/MaxReturn -52.8243 +MetaTest/__unnamed_task__/MinReturn -107.228 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.6411 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.816e+06 +__unnamed_task__/AverageDiscountedReturn -34.5858 +__unnamed_task__/AverageReturn -70.9394 +__unnamed_task__/Iteration 87 +__unnamed_task__/MaxReturn -49.6997 +__unnamed_task__/MinReturn -160.068 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.2002 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 20:13:48 | [maml_trainer] epoch #88 | Sampling for adapation and meta-testing... +2025-03-30 20:17:59 | [maml_trainer] epoch #88 | Finished meta-testing... +2025-03-30 20:17:59 | [maml_trainer] epoch #88 | Saving snapshot... +2025-03-30 20:18:20 | [maml_trainer] epoch #88 | Saved +2025-03-30 20:18:20 | [maml_trainer] epoch #88 | Time 108601.85 s +2025-03-30 20:18:20 | [maml_trainer] epoch #88 | EpochTime 1273.44 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.1352 +Average/AverageReturn -66.0887 +Average/Iteration 88 +Average/MaxReturn -46.5394 +Average/MinReturn -134.249 +Average/NumEpisodes 80 +Average/StdReturn 18.5318 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99426 +GaussianMLPPolicy/KLAfter 0.00377027 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.16556e-05 +GaussianMLPPolicy/LossBefore 3.27826e-10 +GaussianMLPPolicy/dLoss 5.1656e-05 +Iteration 88 +MetaTest/Average/AverageDiscountedReturn -61.5005 +MetaTest/Average/AverageReturn -61.5005 +MetaTest/Average/Iteration 88 +MetaTest/Average/MaxReturn -50.4571 +MetaTest/Average/MinReturn -88.969 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.18976 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.5005 +MetaTest/__unnamed_task__/AverageReturn -61.5005 +MetaTest/__unnamed_task__/Iteration 88 +MetaTest/__unnamed_task__/MaxReturn -50.4571 +MetaTest/__unnamed_task__/MinReturn -88.969 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.18976 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.848e+06 +__unnamed_task__/AverageDiscountedReturn -33.1352 +__unnamed_task__/AverageReturn -66.0887 +__unnamed_task__/Iteration 88 +__unnamed_task__/MaxReturn -46.5394 +__unnamed_task__/MinReturn -134.249 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.5318 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 20:34:54 | [maml_trainer] epoch #89 | Sampling for adapation and meta-testing... +2025-03-30 20:39:01 | [maml_trainer] epoch #89 | Finished meta-testing... +2025-03-30 20:39:01 | [maml_trainer] epoch #89 | Saving snapshot... +2025-03-30 20:39:22 | [maml_trainer] epoch #89 | Saved +2025-03-30 20:39:22 | [maml_trainer] epoch #89 | Time 109863.98 s +2025-03-30 20:39:22 | [maml_trainer] epoch #89 | EpochTime 1262.13 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.5394 +Average/AverageReturn -64.8683 +Average/Iteration 89 +Average/MaxReturn -47.7871 +Average/MinReturn -137.678 +Average/NumEpisodes 80 +Average/StdReturn 16.5197 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99498 +GaussianMLPPolicy/KLAfter 0.00374035 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.34201e-05 +GaussianMLPPolicy/LossBefore -3.33786e-09 +GaussianMLPPolicy/dLoss 3.34167e-05 +Iteration 89 +MetaTest/Average/AverageDiscountedReturn -59.9501 +MetaTest/Average/AverageReturn -59.9501 +MetaTest/Average/Iteration 89 +MetaTest/Average/MaxReturn -48.6437 +MetaTest/Average/MinReturn -75.2688 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.80408 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.9501 +MetaTest/__unnamed_task__/AverageReturn -59.9501 +MetaTest/__unnamed_task__/Iteration 89 +MetaTest/__unnamed_task__/MaxReturn -48.6437 +MetaTest/__unnamed_task__/MinReturn -75.2688 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.80408 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.88e+06 +__unnamed_task__/AverageDiscountedReturn -33.5394 +__unnamed_task__/AverageReturn -64.8683 +__unnamed_task__/Iteration 89 +__unnamed_task__/MaxReturn -47.7871 +__unnamed_task__/MinReturn -137.678 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.5197 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 20:55:40 | [maml_trainer] epoch #90 | Sampling for adapation and meta-testing... +2025-03-30 20:59:48 | [maml_trainer] epoch #90 | Finished meta-testing... +2025-03-30 20:59:48 | [maml_trainer] epoch #90 | Saving snapshot... +2025-03-30 21:00:09 | [maml_trainer] epoch #90 | Saved +2025-03-30 21:00:09 | [maml_trainer] epoch #90 | Time 111110.28 s +2025-03-30 21:00:09 | [maml_trainer] epoch #90 | EpochTime 1246.29 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.8698 +Average/AverageReturn -64.9173 +Average/Iteration 90 +Average/MaxReturn -48.1625 +Average/MinReturn -156.449 +Average/NumEpisodes 80 +Average/StdReturn 17.0611 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99648 +GaussianMLPPolicy/KLAfter 0.00533717 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.02373e-05 +GaussianMLPPolicy/LossBefore -8.16584e-09 +GaussianMLPPolicy/dLoss -2.02454e-05 +Iteration 90 +MetaTest/Average/AverageDiscountedReturn -63.1649 +MetaTest/Average/AverageReturn -63.1649 +MetaTest/Average/Iteration 90 +MetaTest/Average/MaxReturn -47.984 +MetaTest/Average/MinReturn -148.942 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.2514 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.1649 +MetaTest/__unnamed_task__/AverageReturn -63.1649 +MetaTest/__unnamed_task__/Iteration 90 +MetaTest/__unnamed_task__/MaxReturn -47.984 +MetaTest/__unnamed_task__/MinReturn -148.942 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.2514 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.912e+06 +__unnamed_task__/AverageDiscountedReturn -33.8698 +__unnamed_task__/AverageReturn -64.9173 +__unnamed_task__/Iteration 90 +__unnamed_task__/MaxReturn -48.1625 +__unnamed_task__/MinReturn -156.449 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.0611 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 21:16:59 | [maml_trainer] epoch #91 | Sampling for adapation and meta-testing... +2025-03-30 21:21:06 | [maml_trainer] epoch #91 | Finished meta-testing... +2025-03-30 21:21:06 | [maml_trainer] epoch #91 | Saving snapshot... +2025-03-30 21:21:25 | [maml_trainer] epoch #91 | Saved +2025-03-30 21:21:25 | [maml_trainer] epoch #91 | Time 112386.99 s +2025-03-30 21:21:25 | [maml_trainer] epoch #91 | EpochTime 1276.71 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.2643 +Average/AverageReturn -62.8693 +Average/Iteration 91 +Average/MaxReturn -46.8029 +Average/MinReturn -149.349 +Average/NumEpisodes 80 +Average/StdReturn 13.9933 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99891 +GaussianMLPPolicy/KLAfter 0.00567162 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.02651e-05 +GaussianMLPPolicy/LossBefore 9.0003e-09 +GaussianMLPPolicy/dLoss 5.02741e-05 +Iteration 91 +MetaTest/Average/AverageDiscountedReturn -61.2505 +MetaTest/Average/AverageReturn -61.2505 +MetaTest/Average/Iteration 91 +MetaTest/Average/MaxReturn -48.1212 +MetaTest/Average/MinReturn -121.517 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.3515 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.2505 +MetaTest/__unnamed_task__/AverageReturn -61.2505 +MetaTest/__unnamed_task__/Iteration 91 +MetaTest/__unnamed_task__/MaxReturn -48.1212 +MetaTest/__unnamed_task__/MinReturn -121.517 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.3515 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.944e+06 +__unnamed_task__/AverageDiscountedReturn -33.2643 +__unnamed_task__/AverageReturn -62.8693 +__unnamed_task__/Iteration 91 +__unnamed_task__/MaxReturn -46.8029 +__unnamed_task__/MinReturn -149.349 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.9933 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 21:37:07 | [maml_trainer] epoch #92 | Sampling for adapation and meta-testing... +2025-03-30 21:41:00 | [maml_trainer] epoch #92 | Finished meta-testing... +2025-03-30 21:41:00 | [maml_trainer] epoch #92 | Saving snapshot... +2025-03-30 21:41:21 | [maml_trainer] epoch #92 | Saved +2025-03-30 21:41:21 | [maml_trainer] epoch #92 | Time 113582.50 s +2025-03-30 21:41:21 | [maml_trainer] epoch #92 | EpochTime 1195.50 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -32.743 +Average/AverageReturn -60.3513 +Average/Iteration 92 +Average/MaxReturn -48.61 +Average/MinReturn -127.803 +Average/NumEpisodes 80 +Average/StdReturn 10.5106 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0019 +GaussianMLPPolicy/KLAfter 0.00654462 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.97858e-05 +GaussianMLPPolicy/LossBefore 2.29478e-09 +GaussianMLPPolicy/dLoss 8.97881e-05 +Iteration 92 +MetaTest/Average/AverageDiscountedReturn -63.53 +MetaTest/Average/AverageReturn -63.53 +MetaTest/Average/Iteration 92 +MetaTest/Average/MaxReturn -50.8272 +MetaTest/Average/MinReturn -112.538 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.6552 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.53 +MetaTest/__unnamed_task__/AverageReturn -63.53 +MetaTest/__unnamed_task__/Iteration 92 +MetaTest/__unnamed_task__/MaxReturn -50.8272 +MetaTest/__unnamed_task__/MinReturn -112.538 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.6552 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.976e+06 +__unnamed_task__/AverageDiscountedReturn -32.743 +__unnamed_task__/AverageReturn -60.3513 +__unnamed_task__/Iteration 92 +__unnamed_task__/MaxReturn -48.61 +__unnamed_task__/MinReturn -127.803 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.5106 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 21:57:50 | [maml_trainer] epoch #93 | Sampling for adapation and meta-testing... +2025-03-30 22:01:51 | [maml_trainer] epoch #93 | Finished meta-testing... +2025-03-30 22:01:51 | [maml_trainer] epoch #93 | Saving snapshot... +2025-03-30 22:02:13 | [maml_trainer] epoch #93 | Saved +2025-03-30 22:02:13 | [maml_trainer] epoch #93 | Time 114834.24 s +2025-03-30 22:02:13 | [maml_trainer] epoch #93 | EpochTime 1251.74 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.8443 +Average/AverageReturn -59.9444 +Average/Iteration 93 +Average/MaxReturn -48.6933 +Average/MinReturn -98.1658 +Average/NumEpisodes 80 +Average/StdReturn 8.11988 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0031 +GaussianMLPPolicy/KLAfter 0.0055757 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.60056e-05 +GaussianMLPPolicy/LossBefore 1.90735e-09 +GaussianMLPPolicy/dLoss 7.60075e-05 +Iteration 93 +MetaTest/Average/AverageDiscountedReturn -61.5513 +MetaTest/Average/AverageReturn -61.5513 +MetaTest/Average/Iteration 93 +MetaTest/Average/MaxReturn -50.4304 +MetaTest/Average/MinReturn -74.9423 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.92278 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.5513 +MetaTest/__unnamed_task__/AverageReturn -61.5513 +MetaTest/__unnamed_task__/Iteration 93 +MetaTest/__unnamed_task__/MaxReturn -50.4304 +MetaTest/__unnamed_task__/MinReturn -74.9423 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.92278 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.008e+06 +__unnamed_task__/AverageDiscountedReturn -32.8443 +__unnamed_task__/AverageReturn -59.9444 +__unnamed_task__/Iteration 93 +__unnamed_task__/MaxReturn -48.6933 +__unnamed_task__/MinReturn -98.1658 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.11988 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 22:17:58 | [maml_trainer] epoch #94 | Sampling for adapation and meta-testing... +2025-03-30 22:21:53 | [maml_trainer] epoch #94 | Finished meta-testing... +2025-03-30 22:21:53 | [maml_trainer] epoch #94 | Saving snapshot... +2025-03-30 22:22:13 | [maml_trainer] epoch #94 | Saved +2025-03-30 22:22:13 | [maml_trainer] epoch #94 | Time 116034.97 s +2025-03-30 22:22:13 | [maml_trainer] epoch #94 | EpochTime 1200.72 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.0831 +Average/AverageReturn -59.9715 +Average/Iteration 94 +Average/MaxReturn -47.611 +Average/MinReturn -76.6539 +Average/NumEpisodes 80 +Average/StdReturn 6.57087 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0039 +GaussianMLPPolicy/KLAfter 0.00516804 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.76329e-05 +GaussianMLPPolicy/LossBefore -2.25306e-08 +GaussianMLPPolicy/dLoss 5.76104e-05 +Iteration 94 +MetaTest/Average/AverageDiscountedReturn -59.8715 +MetaTest/Average/AverageReturn -59.8715 +MetaTest/Average/Iteration 94 +MetaTest/Average/MaxReturn -50.7538 +MetaTest/Average/MinReturn -68.1227 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.85746 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.8715 +MetaTest/__unnamed_task__/AverageReturn -59.8715 +MetaTest/__unnamed_task__/Iteration 94 +MetaTest/__unnamed_task__/MaxReturn -50.7538 +MetaTest/__unnamed_task__/MinReturn -68.1227 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.85746 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.04e+06 +__unnamed_task__/AverageDiscountedReturn -33.0831 +__unnamed_task__/AverageReturn -59.9715 +__unnamed_task__/Iteration 94 +__unnamed_task__/MaxReturn -47.611 +__unnamed_task__/MinReturn -76.6539 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.57087 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 22:37:55 | [maml_trainer] epoch #95 | Sampling for adapation and meta-testing... +2025-03-30 22:41:45 | [maml_trainer] epoch #95 | Finished meta-testing... +2025-03-30 22:41:45 | [maml_trainer] epoch #95 | Saving snapshot... +2025-03-30 22:42:05 | [maml_trainer] epoch #95 | Saved +2025-03-30 22:42:05 | [maml_trainer] epoch #95 | Time 117226.30 s +2025-03-30 22:42:05 | [maml_trainer] epoch #95 | EpochTime 1191.33 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -33.289 +Average/AverageReturn -60.5218 +Average/Iteration 95 +Average/MaxReturn -48.4199 +Average/MinReturn -114.438 +Average/NumEpisodes 80 +Average/StdReturn 8.67208 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0048 +GaussianMLPPolicy/KLAfter 0.00450262 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.79842e-05 +GaussianMLPPolicy/LossBefore -4.35114e-09 +GaussianMLPPolicy/dLoss -2.79886e-05 +Iteration 95 +MetaTest/Average/AverageDiscountedReturn -56.898 +MetaTest/Average/AverageReturn -56.898 +MetaTest/Average/Iteration 95 +MetaTest/Average/MaxReturn -48.6497 +MetaTest/Average/MinReturn -65.3212 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.18784 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.898 +MetaTest/__unnamed_task__/AverageReturn -56.898 +MetaTest/__unnamed_task__/Iteration 95 +MetaTest/__unnamed_task__/MaxReturn -48.6497 +MetaTest/__unnamed_task__/MinReturn -65.3212 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.18784 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.072e+06 +__unnamed_task__/AverageDiscountedReturn -33.289 +__unnamed_task__/AverageReturn -60.5218 +__unnamed_task__/Iteration 95 +__unnamed_task__/MaxReturn -48.4199 +__unnamed_task__/MinReturn -114.438 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.67208 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-30 22:57:38 | [maml_trainer] epoch #96 | Sampling for adapation and meta-testing... +2025-03-30 23:01:39 | [maml_trainer] epoch #96 | Finished meta-testing... +2025-03-30 23:01:39 | [maml_trainer] epoch #96 | Saving snapshot... +2025-03-30 23:02:00 | [maml_trainer] epoch #96 | Saved +2025-03-30 23:02:00 | [maml_trainer] epoch #96 | Time 118421.53 s +2025-03-30 23:02:00 | [maml_trainer] epoch #96 | EpochTime 1195.23 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.3962 +Average/AverageReturn -59.811 +Average/Iteration 96 +Average/MaxReturn -49.5033 +Average/MinReturn -74.5205 +Average/NumEpisodes 80 +Average/StdReturn 5.09586 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.005 +GaussianMLPPolicy/KLAfter 0.00556933 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.02635e-09 +GaussianMLPPolicy/LossBefore -7.30157e-09 +GaussianMLPPolicy/dLoss -5.27521e-09 +Iteration 96 +MetaTest/Average/AverageDiscountedReturn -58.3035 +MetaTest/Average/AverageReturn -58.3035 +MetaTest/Average/Iteration 96 +MetaTest/Average/MaxReturn -50.7544 +MetaTest/Average/MinReturn -67.2571 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.5005 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.3035 +MetaTest/__unnamed_task__/AverageReturn -58.3035 +MetaTest/__unnamed_task__/Iteration 96 +MetaTest/__unnamed_task__/MaxReturn -50.7544 +MetaTest/__unnamed_task__/MinReturn -67.2571 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.5005 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.104e+06 +__unnamed_task__/AverageDiscountedReturn -33.3962 +__unnamed_task__/AverageReturn -59.811 +__unnamed_task__/Iteration 96 +__unnamed_task__/MaxReturn -49.5033 +__unnamed_task__/MinReturn -74.5205 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.09586 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 23:17:42 | [maml_trainer] epoch #97 | Sampling for adapation and meta-testing... +2025-03-30 23:21:44 | [maml_trainer] epoch #97 | Finished meta-testing... +2025-03-30 23:21:44 | [maml_trainer] epoch #97 | Saving snapshot... +2025-03-30 23:22:06 | [maml_trainer] epoch #97 | Saved +2025-03-30 23:22:06 | [maml_trainer] epoch #97 | Time 119627.70 s +2025-03-30 23:22:06 | [maml_trainer] epoch #97 | EpochTime 1206.17 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.0911 +Average/AverageReturn -59.2275 +Average/Iteration 97 +Average/MaxReturn -48.5314 +Average/MinReturn -72.8643 +Average/NumEpisodes 80 +Average/StdReturn 5.03644 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0043 +GaussianMLPPolicy/KLAfter 0.00483779 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.59798e-05 +GaussianMLPPolicy/LossBefore 0 +GaussianMLPPolicy/dLoss 3.59798e-05 +Iteration 97 +MetaTest/Average/AverageDiscountedReturn -57.8807 +MetaTest/Average/AverageReturn -57.8807 +MetaTest/Average/Iteration 97 +MetaTest/Average/MaxReturn -49.1117 +MetaTest/Average/MinReturn -65.6147 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.56347 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.8807 +MetaTest/__unnamed_task__/AverageReturn -57.8807 +MetaTest/__unnamed_task__/Iteration 97 +MetaTest/__unnamed_task__/MaxReturn -49.1117 +MetaTest/__unnamed_task__/MinReturn -65.6147 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.56347 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.136e+06 +__unnamed_task__/AverageDiscountedReturn -33.0911 +__unnamed_task__/AverageReturn -59.2275 +__unnamed_task__/Iteration 97 +__unnamed_task__/MaxReturn -48.5314 +__unnamed_task__/MinReturn -72.8643 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.03644 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 23:38:09 | [maml_trainer] epoch #98 | Sampling for adapation and meta-testing... +2025-03-30 23:42:07 | [maml_trainer] epoch #98 | Finished meta-testing... +2025-03-30 23:42:08 | [maml_trainer] epoch #98 | Saving snapshot... +2025-03-30 23:42:28 | [maml_trainer] epoch #98 | Saved +2025-03-30 23:42:28 | [maml_trainer] epoch #98 | Time 120849.37 s +2025-03-30 23:42:28 | [maml_trainer] epoch #98 | EpochTime 1221.66 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.6942 +Average/AverageReturn -60.415 +Average/Iteration 98 +Average/MaxReturn -50.8753 +Average/MinReturn -76.603 +Average/NumEpisodes 80 +Average/StdReturn 5.45924 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0029 +GaussianMLPPolicy/KLAfter 0.0037817 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.85218e-05 +GaussianMLPPolicy/LossBefore 4.23193e-09 +GaussianMLPPolicy/dLoss 4.85261e-05 +Iteration 98 +MetaTest/Average/AverageDiscountedReturn -59.0688 +MetaTest/Average/AverageReturn -59.0688 +MetaTest/Average/Iteration 98 +MetaTest/Average/MaxReturn -50.1187 +MetaTest/Average/MinReturn -64.8993 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.89567 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.0688 +MetaTest/__unnamed_task__/AverageReturn -59.0688 +MetaTest/__unnamed_task__/Iteration 98 +MetaTest/__unnamed_task__/MaxReturn -50.1187 +MetaTest/__unnamed_task__/MinReturn -64.8993 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.89567 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.168e+06 +__unnamed_task__/AverageDiscountedReturn -33.6942 +__unnamed_task__/AverageReturn -60.415 +__unnamed_task__/Iteration 98 +__unnamed_task__/MaxReturn -50.8753 +__unnamed_task__/MinReturn -76.603 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.45924 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-30 23:58:42 | [maml_trainer] epoch #99 | Sampling for adapation and meta-testing... +2025-03-31 00:02:33 | [maml_trainer] epoch #99 | Finished meta-testing... +2025-03-31 00:02:33 | [maml_trainer] epoch #99 | Saving snapshot... +2025-03-31 00:02:53 | [maml_trainer] epoch #99 | Saved +2025-03-31 00:02:53 | [maml_trainer] epoch #99 | Time 122074.16 s +2025-03-31 00:02:53 | [maml_trainer] epoch #99 | EpochTime 1224.79 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.9079 +Average/AverageReturn -61.1038 +Average/Iteration 99 +Average/MaxReturn -50.614 +Average/MinReturn -76.679 +Average/NumEpisodes 80 +Average/StdReturn 5.61037 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.001 +GaussianMLPPolicy/KLAfter 0.0040729 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000158534 +GaussianMLPPolicy/LossBefore -1.72853e-09 +GaussianMLPPolicy/dLoss -0.000158536 +Iteration 99 +MetaTest/Average/AverageDiscountedReturn -59.9088 +MetaTest/Average/AverageReturn -59.9088 +MetaTest/Average/Iteration 99 +MetaTest/Average/MaxReturn -51.1586 +MetaTest/Average/MinReturn -72.7763 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.80584 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.9088 +MetaTest/__unnamed_task__/AverageReturn -59.9088 +MetaTest/__unnamed_task__/Iteration 99 +MetaTest/__unnamed_task__/MaxReturn -51.1586 +MetaTest/__unnamed_task__/MinReturn -72.7763 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.80584 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.2e+06 +__unnamed_task__/AverageDiscountedReturn -33.9079 +__unnamed_task__/AverageReturn -61.1038 +__unnamed_task__/Iteration 99 +__unnamed_task__/MaxReturn -50.614 +__unnamed_task__/MinReturn -76.679 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.61037 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 00:18:04 | [maml_trainer] epoch #100 | Sampling for adapation and meta-testing... +2025-03-31 00:21:56 | [maml_trainer] epoch #100 | Finished meta-testing... +2025-03-31 00:21:56 | [maml_trainer] epoch #100 | Saving snapshot... +2025-03-31 00:22:15 | [maml_trainer] epoch #100 | Saved +2025-03-31 00:22:15 | [maml_trainer] epoch #100 | Time 123236.83 s +2025-03-31 00:22:15 | [maml_trainer] epoch #100 | EpochTime 1162.67 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.7014 +Average/AverageReturn -60.8137 +Average/Iteration 100 +Average/MaxReturn -49.4321 +Average/MinReturn -76.0383 +Average/NumEpisodes 80 +Average/StdReturn 5.41829 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99951 +GaussianMLPPolicy/KLAfter 0.0027839 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.63873e-06 +GaussianMLPPolicy/LossBefore 6.49691e-09 +GaussianMLPPolicy/dLoss -4.63223e-06 +Iteration 100 +MetaTest/Average/AverageDiscountedReturn -62.2424 +MetaTest/Average/AverageReturn -62.2424 +MetaTest/Average/Iteration 100 +MetaTest/Average/MaxReturn -51.9816 +MetaTest/Average/MinReturn -78.5556 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.71901 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.2424 +MetaTest/__unnamed_task__/AverageReturn -62.2424 +MetaTest/__unnamed_task__/Iteration 100 +MetaTest/__unnamed_task__/MaxReturn -51.9816 +MetaTest/__unnamed_task__/MinReturn -78.5556 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.71901 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.232e+06 +__unnamed_task__/AverageDiscountedReturn -33.7014 +__unnamed_task__/AverageReturn -60.8137 +__unnamed_task__/Iteration 100 +__unnamed_task__/MaxReturn -49.4321 +__unnamed_task__/MinReturn -76.0383 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.41829 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 00:38:15 | [maml_trainer] epoch #101 | Sampling for adapation and meta-testing... +2025-03-31 00:42:12 | [maml_trainer] epoch #101 | Finished meta-testing... +2025-03-31 00:42:12 | [maml_trainer] epoch #101 | Saving snapshot... +2025-03-31 00:42:33 | [maml_trainer] epoch #101 | Saved +2025-03-31 00:42:33 | [maml_trainer] epoch #101 | Time 124454.50 s +2025-03-31 00:42:33 | [maml_trainer] epoch #101 | EpochTime 1217.67 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.6795 +Average/AverageReturn -60.5684 +Average/Iteration 101 +Average/MaxReturn -48.6668 +Average/MinReturn -74.9351 +Average/NumEpisodes 80 +Average/StdReturn 5.4781 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99819 +GaussianMLPPolicy/KLAfter 0.00214203 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.73421e-05 +GaussianMLPPolicy/LossBefore 2.5332e-09 +GaussianMLPPolicy/dLoss -5.73396e-05 +Iteration 101 +MetaTest/Average/AverageDiscountedReturn -59.163 +MetaTest/Average/AverageReturn -59.163 +MetaTest/Average/Iteration 101 +MetaTest/Average/MaxReturn -50.0384 +MetaTest/Average/MinReturn -68.7049 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.33282 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.163 +MetaTest/__unnamed_task__/AverageReturn -59.163 +MetaTest/__unnamed_task__/Iteration 101 +MetaTest/__unnamed_task__/MaxReturn -50.0384 +MetaTest/__unnamed_task__/MinReturn -68.7049 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.33282 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.264e+06 +__unnamed_task__/AverageDiscountedReturn -33.6795 +__unnamed_task__/AverageReturn -60.5684 +__unnamed_task__/Iteration 101 +__unnamed_task__/MaxReturn -48.6668 +__unnamed_task__/MinReturn -74.9351 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.4781 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 00:58:31 | [maml_trainer] epoch #102 | Sampling for adapation and meta-testing... +2025-03-31 01:02:34 | [maml_trainer] epoch #102 | Finished meta-testing... +2025-03-31 01:02:34 | [maml_trainer] epoch #102 | Saving snapshot... +2025-03-31 01:02:56 | [maml_trainer] epoch #102 | Saved +2025-03-31 01:02:56 | [maml_trainer] epoch #102 | Time 125677.06 s +2025-03-31 01:02:56 | [maml_trainer] epoch #102 | EpochTime 1222.55 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.6925 +Average/AverageReturn -60.7592 +Average/Iteration 102 +Average/MaxReturn -46.1892 +Average/MinReturn -77.4712 +Average/NumEpisodes 80 +Average/StdReturn 5.52341 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99641 +GaussianMLPPolicy/KLAfter 0.00254313 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.2582e-05 +GaussianMLPPolicy/LossBefore -1.72853e-09 +GaussianMLPPolicy/dLoss 5.25803e-05 +Iteration 102 +MetaTest/Average/AverageDiscountedReturn -61.0978 +MetaTest/Average/AverageReturn -61.0978 +MetaTest/Average/Iteration 102 +MetaTest/Average/MaxReturn -51.6704 +MetaTest/Average/MinReturn -71.5775 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.50088 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.0978 +MetaTest/__unnamed_task__/AverageReturn -61.0978 +MetaTest/__unnamed_task__/Iteration 102 +MetaTest/__unnamed_task__/MaxReturn -51.6704 +MetaTest/__unnamed_task__/MinReturn -71.5775 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.50088 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.296e+06 +__unnamed_task__/AverageDiscountedReturn -33.6925 +__unnamed_task__/AverageReturn -60.7592 +__unnamed_task__/Iteration 102 +__unnamed_task__/MaxReturn -46.1892 +__unnamed_task__/MinReturn -77.4712 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.52341 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 01:19:10 | [maml_trainer] epoch #103 | Sampling for adapation and meta-testing... +2025-03-31 01:23:09 | [maml_trainer] epoch #103 | Finished meta-testing... +2025-03-31 01:23:09 | [maml_trainer] epoch #103 | Saving snapshot... +2025-03-31 01:23:30 | [maml_trainer] epoch #103 | Saved +2025-03-31 01:23:30 | [maml_trainer] epoch #103 | Time 126911.90 s +2025-03-31 01:23:30 | [maml_trainer] epoch #103 | EpochTime 1234.83 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.0613 +Average/AverageReturn -59.7481 +Average/Iteration 103 +Average/MaxReturn -48.1886 +Average/MinReturn -77.2302 +Average/NumEpisodes 80 +Average/StdReturn 6.32568 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99645 +GaussianMLPPolicy/KLAfter 0.00234109 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.45281e-07 +GaussianMLPPolicy/LossBefore -6.07967e-09 +GaussianMLPPolicy/dLoss 8.39201e-07 +Iteration 103 +MetaTest/Average/AverageDiscountedReturn -57.9208 +MetaTest/Average/AverageReturn -57.9208 +MetaTest/Average/Iteration 103 +MetaTest/Average/MaxReturn -48.9487 +MetaTest/Average/MinReturn -79.785 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.54186 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.9208 +MetaTest/__unnamed_task__/AverageReturn -57.9208 +MetaTest/__unnamed_task__/Iteration 103 +MetaTest/__unnamed_task__/MaxReturn -48.9487 +MetaTest/__unnamed_task__/MinReturn -79.785 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.54186 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.328e+06 +__unnamed_task__/AverageDiscountedReturn -33.0613 +__unnamed_task__/AverageReturn -59.7481 +__unnamed_task__/Iteration 103 +__unnamed_task__/MaxReturn -48.1886 +__unnamed_task__/MinReturn -77.2302 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.32568 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 01:39:40 | [maml_trainer] epoch #104 | Sampling for adapation and meta-testing... +2025-03-31 01:43:32 | [maml_trainer] epoch #104 | Finished meta-testing... +2025-03-31 01:43:32 | [maml_trainer] epoch #104 | Saving snapshot... +2025-03-31 01:43:51 | [maml_trainer] epoch #104 | Saved +2025-03-31 01:43:51 | [maml_trainer] epoch #104 | Time 128133.00 s +2025-03-31 01:43:51 | [maml_trainer] epoch #104 | EpochTime 1221.10 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -33.3618 +Average/AverageReturn -60.6831 +Average/Iteration 104 +Average/MaxReturn -47.5005 +Average/MinReturn -75.635 +Average/NumEpisodes 80 +Average/StdReturn 5.7912 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99726 +GaussianMLPPolicy/KLAfter 0.00338631 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.13871e-05 +GaussianMLPPolicy/LossBefore -1.96695e-09 +GaussianMLPPolicy/dLoss -7.1389e-05 +Iteration 104 +MetaTest/Average/AverageDiscountedReturn -57.6292 +MetaTest/Average/AverageReturn -57.6292 +MetaTest/Average/Iteration 104 +MetaTest/Average/MaxReturn -49.3253 +MetaTest/Average/MinReturn -68.7617 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.08416 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.6292 +MetaTest/__unnamed_task__/AverageReturn -57.6292 +MetaTest/__unnamed_task__/Iteration 104 +MetaTest/__unnamed_task__/MaxReturn -49.3253 +MetaTest/__unnamed_task__/MinReturn -68.7617 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.08416 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.36e+06 +__unnamed_task__/AverageDiscountedReturn -33.3618 +__unnamed_task__/AverageReturn -60.6831 +__unnamed_task__/Iteration 104 +__unnamed_task__/MaxReturn -47.5005 +__unnamed_task__/MinReturn -75.635 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.7912 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 01:59:16 | [maml_trainer] epoch #105 | Sampling for adapation and meta-testing... +2025-03-31 02:03:07 | [maml_trainer] epoch #105 | Finished meta-testing... +2025-03-31 02:03:07 | [maml_trainer] epoch #105 | Saving snapshot... +2025-03-31 02:03:27 | [maml_trainer] epoch #105 | Saved +2025-03-31 02:03:27 | [maml_trainer] epoch #105 | Time 129308.66 s +2025-03-31 02:03:27 | [maml_trainer] epoch #105 | EpochTime 1175.65 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.7144 +Average/AverageReturn -58.7893 +Average/Iteration 105 +Average/MaxReturn -49.3036 +Average/MinReturn -77.1573 +Average/NumEpisodes 80 +Average/StdReturn 5.50073 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99846 +GaussianMLPPolicy/KLAfter 0.00363071 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.67721e-05 +GaussianMLPPolicy/LossBefore 3.93391e-09 +GaussianMLPPolicy/dLoss -5.67682e-05 +Iteration 105 +MetaTest/Average/AverageDiscountedReturn -60.7138 +MetaTest/Average/AverageReturn -60.7138 +MetaTest/Average/Iteration 105 +MetaTest/Average/MaxReturn -47.9613 +MetaTest/Average/MinReturn -77.3246 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.45983 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.7138 +MetaTest/__unnamed_task__/AverageReturn -60.7138 +MetaTest/__unnamed_task__/Iteration 105 +MetaTest/__unnamed_task__/MaxReturn -47.9613 +MetaTest/__unnamed_task__/MinReturn -77.3246 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.45983 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.392e+06 +__unnamed_task__/AverageDiscountedReturn -32.7144 +__unnamed_task__/AverageReturn -58.7893 +__unnamed_task__/Iteration 105 +__unnamed_task__/MaxReturn -49.3036 +__unnamed_task__/MinReturn -77.1573 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.50073 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 02:18:52 | [maml_trainer] epoch #106 | Sampling for adapation and meta-testing... +2025-03-31 02:22:41 | [maml_trainer] epoch #106 | Finished meta-testing... +2025-03-31 02:22:41 | [maml_trainer] epoch #106 | Saving snapshot... +2025-03-31 02:23:01 | [maml_trainer] epoch #106 | Saved +2025-03-31 02:23:01 | [maml_trainer] epoch #106 | Time 130483.00 s +2025-03-31 02:23:01 | [maml_trainer] epoch #106 | EpochTime 1174.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.4919 +Average/AverageReturn -58.6453 +Average/Iteration 106 +Average/MaxReturn -48.7291 +Average/MinReturn -73.511 +Average/NumEpisodes 80 +Average/StdReturn 4.95747 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99918 +GaussianMLPPolicy/KLAfter 0.00362975 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.46854e-05 +GaussianMLPPolicy/LossBefore 2.98024e-11 +GaussianMLPPolicy/dLoss 3.46854e-05 +Iteration 106 +MetaTest/Average/AverageDiscountedReturn -56.691 +MetaTest/Average/AverageReturn -56.691 +MetaTest/Average/Iteration 106 +MetaTest/Average/MaxReturn -44.8234 +MetaTest/Average/MinReturn -68.2875 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.63638 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.691 +MetaTest/__unnamed_task__/AverageReturn -56.691 +MetaTest/__unnamed_task__/Iteration 106 +MetaTest/__unnamed_task__/MaxReturn -44.8234 +MetaTest/__unnamed_task__/MinReturn -68.2875 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.63638 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.424e+06 +__unnamed_task__/AverageDiscountedReturn -32.4919 +__unnamed_task__/AverageReturn -58.6453 +__unnamed_task__/Iteration 106 +__unnamed_task__/MaxReturn -48.7291 +__unnamed_task__/MinReturn -73.511 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.95747 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 02:38:29 | [maml_trainer] epoch #107 | Sampling for adapation and meta-testing... +2025-03-31 02:42:23 | [maml_trainer] epoch #107 | Finished meta-testing... +2025-03-31 02:42:23 | [maml_trainer] epoch #107 | Saving snapshot... +2025-03-31 02:42:44 | [maml_trainer] epoch #107 | Saved +2025-03-31 02:42:44 | [maml_trainer] epoch #107 | Time 131665.65 s +2025-03-31 02:42:44 | [maml_trainer] epoch #107 | EpochTime 1182.64 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -31.8945 +Average/AverageReturn -57.9532 +Average/Iteration 107 +Average/MaxReturn -48.7458 +Average/MinReturn -75.8799 +Average/NumEpisodes 80 +Average/StdReturn 5.66345 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99966 +GaussianMLPPolicy/KLAfter 0.0038089 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.00098e-05 +GaussianMLPPolicy/LossBefore 4.70877e-09 +GaussianMLPPolicy/dLoss 9.00145e-05 +Iteration 107 +MetaTest/Average/AverageDiscountedReturn -57.0124 +MetaTest/Average/AverageReturn -57.0124 +MetaTest/Average/Iteration 107 +MetaTest/Average/MaxReturn -50.181 +MetaTest/Average/MinReturn -65.6047 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.42772 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.0124 +MetaTest/__unnamed_task__/AverageReturn -57.0124 +MetaTest/__unnamed_task__/Iteration 107 +MetaTest/__unnamed_task__/MaxReturn -50.181 +MetaTest/__unnamed_task__/MinReturn -65.6047 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.42772 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.456e+06 +__unnamed_task__/AverageDiscountedReturn -31.8945 +__unnamed_task__/AverageReturn -57.9532 +__unnamed_task__/Iteration 107 +__unnamed_task__/MaxReturn -48.7458 +__unnamed_task__/MinReturn -75.8799 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.66345 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 02:58:14 | [maml_trainer] epoch #108 | Sampling for adapation and meta-testing... +2025-03-31 03:02:06 | [maml_trainer] epoch #108 | Finished meta-testing... +2025-03-31 03:02:06 | [maml_trainer] epoch #108 | Saving snapshot... +2025-03-31 03:02:25 | [maml_trainer] epoch #108 | Saved +2025-03-31 03:02:25 | [maml_trainer] epoch #108 | Time 132846.72 s +2025-03-31 03:02:25 | [maml_trainer] epoch #108 | EpochTime 1181.07 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -32.2743 +Average/AverageReturn -58.7632 +Average/Iteration 108 +Average/MaxReturn -47.9669 +Average/MinReturn -70.3042 +Average/NumEpisodes 80 +Average/StdReturn 5.7748 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99863 +GaussianMLPPolicy/KLAfter 0.00312719 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.40975e-05 +GaussianMLPPolicy/LossBefore 8.16584e-09 +GaussianMLPPolicy/dLoss 2.41056e-05 +Iteration 108 +MetaTest/Average/AverageDiscountedReturn -57.2583 +MetaTest/Average/AverageReturn -57.2583 +MetaTest/Average/Iteration 108 +MetaTest/Average/MaxReturn -49.4222 +MetaTest/Average/MinReturn -68.2349 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.71808 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.2583 +MetaTest/__unnamed_task__/AverageReturn -57.2583 +MetaTest/__unnamed_task__/Iteration 108 +MetaTest/__unnamed_task__/MaxReturn -49.4222 +MetaTest/__unnamed_task__/MinReturn -68.2349 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.71808 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.488e+06 +__unnamed_task__/AverageDiscountedReturn -32.2743 +__unnamed_task__/AverageReturn -58.7632 +__unnamed_task__/Iteration 108 +__unnamed_task__/MaxReturn -47.9669 +__unnamed_task__/MinReturn -70.3042 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.7748 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 03:18:00 | [maml_trainer] epoch #109 | Sampling for adapation and meta-testing... +2025-03-31 03:21:52 | [maml_trainer] epoch #109 | Finished meta-testing... +2025-03-31 03:21:52 | [maml_trainer] epoch #109 | Saving snapshot... +2025-03-31 03:22:11 | [maml_trainer] epoch #109 | Saved +2025-03-31 03:22:11 | [maml_trainer] epoch #109 | Time 134032.59 s +2025-03-31 03:22:11 | [maml_trainer] epoch #109 | EpochTime 1185.87 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -31.3909 +Average/AverageReturn -56.6542 +Average/Iteration 109 +Average/MaxReturn -45.8041 +Average/MinReturn -73.5472 +Average/NumEpisodes 80 +Average/StdReturn 5.93844 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9977 +GaussianMLPPolicy/KLAfter 0.00314899 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.11373e-06 +GaussianMLPPolicy/LossBefore 6.25849e-09 +GaussianMLPPolicy/dLoss -2.10747e-06 +Iteration 109 +MetaTest/Average/AverageDiscountedReturn -57.7754 +MetaTest/Average/AverageReturn -57.7754 +MetaTest/Average/Iteration 109 +MetaTest/Average/MaxReturn -49.0403 +MetaTest/Average/MinReturn -73.6763 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.07114 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.7754 +MetaTest/__unnamed_task__/AverageReturn -57.7754 +MetaTest/__unnamed_task__/Iteration 109 +MetaTest/__unnamed_task__/MaxReturn -49.0403 +MetaTest/__unnamed_task__/MinReturn -73.6763 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.07114 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.52e+06 +__unnamed_task__/AverageDiscountedReturn -31.3909 +__unnamed_task__/AverageReturn -56.6542 +__unnamed_task__/Iteration 109 +__unnamed_task__/MaxReturn -45.8041 +__unnamed_task__/MinReturn -73.5472 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.93844 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 03:37:42 | [maml_trainer] epoch #110 | Sampling for adapation and meta-testing... +2025-03-31 03:41:35 | [maml_trainer] epoch #110 | Finished meta-testing... +2025-03-31 03:41:35 | [maml_trainer] epoch #110 | Saving snapshot... +2025-03-31 03:41:55 | [maml_trainer] epoch #110 | Saved +2025-03-31 03:41:55 | [maml_trainer] epoch #110 | Time 135216.42 s +2025-03-31 03:41:55 | [maml_trainer] epoch #110 | EpochTime 1183.83 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -31.103 +Average/AverageReturn -56.0484 +Average/Iteration 110 +Average/MaxReturn -43.023 +Average/MinReturn -74.3782 +Average/NumEpisodes 80 +Average/StdReturn 5.57314 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9959 +GaussianMLPPolicy/KLAfter 0.00304184 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.53524e-06 +GaussianMLPPolicy/LossBefore -6.85453e-09 +GaussianMLPPolicy/dLoss -5.5421e-06 +Iteration 110 +MetaTest/Average/AverageDiscountedReturn -55.1345 +MetaTest/Average/AverageReturn -55.1345 +MetaTest/Average/Iteration 110 +MetaTest/Average/MaxReturn -47.1278 +MetaTest/Average/MinReturn -67.7522 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.33502 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.1345 +MetaTest/__unnamed_task__/AverageReturn -55.1345 +MetaTest/__unnamed_task__/Iteration 110 +MetaTest/__unnamed_task__/MaxReturn -47.1278 +MetaTest/__unnamed_task__/MinReturn -67.7522 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.33502 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.552e+06 +__unnamed_task__/AverageDiscountedReturn -31.103 +__unnamed_task__/AverageReturn -56.0484 +__unnamed_task__/Iteration 110 +__unnamed_task__/MaxReturn -43.023 +__unnamed_task__/MinReturn -74.3782 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.57314 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 03:57:30 | [maml_trainer] epoch #111 | Sampling for adapation and meta-testing... +2025-03-31 04:01:23 | [maml_trainer] epoch #111 | Finished meta-testing... +2025-03-31 04:01:23 | [maml_trainer] epoch #111 | Saving snapshot... +2025-03-31 04:01:43 | [maml_trainer] epoch #111 | Saved +2025-03-31 04:01:43 | [maml_trainer] epoch #111 | Time 136404.74 s +2025-03-31 04:01:43 | [maml_trainer] epoch #111 | EpochTime 1188.32 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -31.3798 +Average/AverageReturn -56.8278 +Average/Iteration 111 +Average/MaxReturn -47.2578 +Average/MinReturn -69.5726 +Average/NumEpisodes 80 +Average/StdReturn 5.73885 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99379 +GaussianMLPPolicy/KLAfter 0.00226838 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.06611e-06 +GaussianMLPPolicy/LossBefore -1.51992e-09 +GaussianMLPPolicy/dLoss 4.06459e-06 +Iteration 111 +MetaTest/Average/AverageDiscountedReturn -57.166 +MetaTest/Average/AverageReturn -57.166 +MetaTest/Average/Iteration 111 +MetaTest/Average/MaxReturn -48.72 +MetaTest/Average/MinReturn -65.9552 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.64011 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.166 +MetaTest/__unnamed_task__/AverageReturn -57.166 +MetaTest/__unnamed_task__/Iteration 111 +MetaTest/__unnamed_task__/MaxReturn -48.72 +MetaTest/__unnamed_task__/MinReturn -65.9552 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.64011 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.584e+06 +__unnamed_task__/AverageDiscountedReturn -31.3798 +__unnamed_task__/AverageReturn -56.8278 +__unnamed_task__/Iteration 111 +__unnamed_task__/MaxReturn -47.2578 +__unnamed_task__/MinReturn -69.5726 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.73885 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 04:17:21 | [maml_trainer] epoch #112 | Sampling for adapation and meta-testing... +2025-03-31 04:21:14 | [maml_trainer] epoch #112 | Finished meta-testing... +2025-03-31 04:21:14 | [maml_trainer] epoch #112 | Saving snapshot... +2025-03-31 04:21:34 | [maml_trainer] epoch #112 | Saved +2025-03-31 04:21:34 | [maml_trainer] epoch #112 | Time 137595.68 s +2025-03-31 04:21:34 | [maml_trainer] epoch #112 | EpochTime 1190.93 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -31.2815 +Average/AverageReturn -56.7889 +Average/Iteration 112 +Average/MaxReturn -47.3338 +Average/MinReturn -76.186 +Average/NumEpisodes 80 +Average/StdReturn 5.92595 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99141 +GaussianMLPPolicy/KLAfter 0.00244361 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.29131e-05 +GaussianMLPPolicy/LossBefore 3.57628e-10 +GaussianMLPPolicy/dLoss -3.29127e-05 +Iteration 112 +MetaTest/Average/AverageDiscountedReturn -56.3317 +MetaTest/Average/AverageReturn -56.3317 +MetaTest/Average/Iteration 112 +MetaTest/Average/MaxReturn -45.6918 +MetaTest/Average/MinReturn -65.58 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.02312 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.3317 +MetaTest/__unnamed_task__/AverageReturn -56.3317 +MetaTest/__unnamed_task__/Iteration 112 +MetaTest/__unnamed_task__/MaxReturn -45.6918 +MetaTest/__unnamed_task__/MinReturn -65.58 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.02312 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.616e+06 +__unnamed_task__/AverageDiscountedReturn -31.2815 +__unnamed_task__/AverageReturn -56.7889 +__unnamed_task__/Iteration 112 +__unnamed_task__/MaxReturn -47.3338 +__unnamed_task__/MinReturn -76.186 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.92595 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 04:37:13 | [maml_trainer] epoch #113 | Sampling for adapation and meta-testing... +2025-03-31 04:41:08 | [maml_trainer] epoch #113 | Finished meta-testing... +2025-03-31 04:41:08 | [maml_trainer] epoch #113 | Saving snapshot... +2025-03-31 04:41:27 | [maml_trainer] epoch #113 | Saved +2025-03-31 04:41:27 | [maml_trainer] epoch #113 | Time 138788.82 s +2025-03-31 04:41:27 | [maml_trainer] epoch #113 | EpochTime 1193.13 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.8106 +Average/AverageReturn -55.3793 +Average/Iteration 113 +Average/MaxReturn -44.0285 +Average/MinReturn -71.7904 +Average/NumEpisodes 80 +Average/StdReturn 5.92676 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98803 +GaussianMLPPolicy/KLAfter 0.00255879 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000179373 +GaussianMLPPolicy/LossBefore -6.67572e-09 +GaussianMLPPolicy/dLoss 0.000179366 +Iteration 113 +MetaTest/Average/AverageDiscountedReturn -58.6012 +MetaTest/Average/AverageReturn -58.6012 +MetaTest/Average/Iteration 113 +MetaTest/Average/MaxReturn -45.9849 +MetaTest/Average/MinReturn -96.561 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3492 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.6012 +MetaTest/__unnamed_task__/AverageReturn -58.6012 +MetaTest/__unnamed_task__/Iteration 113 +MetaTest/__unnamed_task__/MaxReturn -45.9849 +MetaTest/__unnamed_task__/MinReturn -96.561 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3492 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.648e+06 +__unnamed_task__/AverageDiscountedReturn -30.8106 +__unnamed_task__/AverageReturn -55.3793 +__unnamed_task__/Iteration 113 +__unnamed_task__/MaxReturn -44.0285 +__unnamed_task__/MinReturn -71.7904 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.92676 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 04:57:02 | [maml_trainer] epoch #114 | Sampling for adapation and meta-testing... +2025-03-31 05:00:56 | [maml_trainer] epoch #114 | Finished meta-testing... +2025-03-31 05:00:56 | [maml_trainer] epoch #114 | Saving snapshot... +2025-03-31 05:01:16 | [maml_trainer] epoch #114 | Saved +2025-03-31 05:01:16 | [maml_trainer] epoch #114 | Time 139977.50 s +2025-03-31 05:01:16 | [maml_trainer] epoch #114 | EpochTime 1188.68 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.4744 +Average/AverageReturn -55.0577 +Average/Iteration 114 +Average/MaxReturn -44.668 +Average/MinReturn -68.4873 +Average/NumEpisodes 80 +Average/StdReturn 5.86901 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98661 +GaussianMLPPolicy/KLAfter 0.00389353 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000125983 +GaussianMLPPolicy/LossBefore -3.75509e-09 +GaussianMLPPolicy/dLoss 0.000125979 +Iteration 114 +MetaTest/Average/AverageDiscountedReturn -53.3399 +MetaTest/Average/AverageReturn -53.3399 +MetaTest/Average/Iteration 114 +MetaTest/Average/MaxReturn -43.7244 +MetaTest/Average/MinReturn -64.2744 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.00237 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.3399 +MetaTest/__unnamed_task__/AverageReturn -53.3399 +MetaTest/__unnamed_task__/Iteration 114 +MetaTest/__unnamed_task__/MaxReturn -43.7244 +MetaTest/__unnamed_task__/MinReturn -64.2744 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.00237 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.68e+06 +__unnamed_task__/AverageDiscountedReturn -30.4744 +__unnamed_task__/AverageReturn -55.0577 +__unnamed_task__/Iteration 114 +__unnamed_task__/MaxReturn -44.668 +__unnamed_task__/MinReturn -68.4873 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.86901 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 05:16:47 | [maml_trainer] epoch #115 | Sampling for adapation and meta-testing... +2025-03-31 05:20:39 | [maml_trainer] epoch #115 | Finished meta-testing... +2025-03-31 05:20:39 | [maml_trainer] epoch #115 | Saving snapshot... +2025-03-31 05:20:59 | [maml_trainer] epoch #115 | Saved +2025-03-31 05:20:59 | [maml_trainer] epoch #115 | Time 141161.00 s +2025-03-31 05:20:59 | [maml_trainer] epoch #115 | EpochTime 1183.49 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.8196 +Average/AverageReturn -52.9255 +Average/Iteration 115 +Average/MaxReturn -9.84989 +Average/MinReturn -88.7872 +Average/NumEpisodes 80 +Average/StdReturn 8.66277 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98554 +GaussianMLPPolicy/KLAfter 0.00332509 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000106326 +GaussianMLPPolicy/LossBefore -7.89762e-09 +GaussianMLPPolicy/dLoss 0.000106318 +Iteration 115 +MetaTest/Average/AverageDiscountedReturn -47.3155 +MetaTest/Average/AverageReturn -47.3155 +MetaTest/Average/Iteration 115 +MetaTest/Average/MaxReturn 200.335 +MetaTest/Average/MinReturn -140.043 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 61.4836 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.3155 +MetaTest/__unnamed_task__/AverageReturn -47.3155 +MetaTest/__unnamed_task__/Iteration 115 +MetaTest/__unnamed_task__/MaxReturn 200.335 +MetaTest/__unnamed_task__/MinReturn -140.043 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 61.4836 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.712e+06 +__unnamed_task__/AverageDiscountedReturn -29.8196 +__unnamed_task__/AverageReturn -52.9255 +__unnamed_task__/Iteration 115 +__unnamed_task__/MaxReturn -9.84989 +__unnamed_task__/MinReturn -88.7872 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.66277 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 05:36:27 | [maml_trainer] epoch #116 | Sampling for adapation and meta-testing... +2025-03-31 05:40:20 | [maml_trainer] epoch #116 | Finished meta-testing... +2025-03-31 05:40:20 | [maml_trainer] epoch #116 | Saving snapshot... +2025-03-31 05:40:40 | [maml_trainer] epoch #116 | Saved +2025-03-31 05:40:40 | [maml_trainer] epoch #116 | Time 142341.95 s +2025-03-31 05:40:40 | [maml_trainer] epoch #116 | EpochTime 1180.95 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.8571 +Average/AverageReturn -54.4146 +Average/Iteration 116 +Average/MaxReturn -42.7041 +Average/MinReturn -105.429 +Average/NumEpisodes 80 +Average/StdReturn 9.3702 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98323 +GaussianMLPPolicy/KLAfter 0.00284398 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000100237 +GaussianMLPPolicy/LossBefore -1.37091e-09 +GaussianMLPPolicy/dLoss 0.000100236 +Iteration 116 +MetaTest/Average/AverageDiscountedReturn -58.2101 +MetaTest/Average/AverageReturn -58.2101 +MetaTest/Average/Iteration 116 +MetaTest/Average/MaxReturn -43.3344 +MetaTest/Average/MinReturn -138.385 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.8702 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.2101 +MetaTest/__unnamed_task__/AverageReturn -58.2101 +MetaTest/__unnamed_task__/Iteration 116 +MetaTest/__unnamed_task__/MaxReturn -43.3344 +MetaTest/__unnamed_task__/MinReturn -138.385 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.8702 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.744e+06 +__unnamed_task__/AverageDiscountedReturn -29.8571 +__unnamed_task__/AverageReturn -54.4146 +__unnamed_task__/Iteration 116 +__unnamed_task__/MaxReturn -42.7041 +__unnamed_task__/MinReturn -105.429 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.3702 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 05:56:29 | [maml_trainer] epoch #117 | Sampling for adapation and meta-testing... +2025-03-31 06:00:29 | [maml_trainer] epoch #117 | Finished meta-testing... +2025-03-31 06:00:29 | [maml_trainer] epoch #117 | Saving snapshot... +2025-03-31 06:00:48 | [maml_trainer] epoch #117 | Saved +2025-03-31 06:00:48 | [maml_trainer] epoch #117 | Time 143549.91 s +2025-03-31 06:00:48 | [maml_trainer] epoch #117 | EpochTime 1207.96 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.1649 +Average/AverageReturn -57.4228 +Average/Iteration 117 +Average/MaxReturn -43.0728 +Average/MinReturn -136.417 +Average/NumEpisodes 80 +Average/StdReturn 15.7214 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98327 +GaussianMLPPolicy/KLAfter 0.00213764 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000100912 +GaussianMLPPolicy/LossBefore -2.74181e-09 +GaussianMLPPolicy/dLoss -0.000100915 +Iteration 117 +MetaTest/Average/AverageDiscountedReturn -59.3084 +MetaTest/Average/AverageReturn -59.3084 +MetaTest/Average/Iteration 117 +MetaTest/Average/MaxReturn -44.6451 +MetaTest/Average/MinReturn -120.232 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.9727 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.3084 +MetaTest/__unnamed_task__/AverageReturn -59.3084 +MetaTest/__unnamed_task__/Iteration 117 +MetaTest/__unnamed_task__/MaxReturn -44.6451 +MetaTest/__unnamed_task__/MinReturn -120.232 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.9727 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.776e+06 +__unnamed_task__/AverageDiscountedReturn -30.1649 +__unnamed_task__/AverageReturn -57.4228 +__unnamed_task__/Iteration 117 +__unnamed_task__/MaxReturn -43.0728 +__unnamed_task__/MinReturn -136.417 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.7214 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 06:16:38 | [maml_trainer] epoch #118 | Sampling for adapation and meta-testing... +2025-03-31 06:20:34 | [maml_trainer] epoch #118 | Finished meta-testing... +2025-03-31 06:20:34 | [maml_trainer] epoch #118 | Saving snapshot... +2025-03-31 06:20:53 | [maml_trainer] epoch #118 | Saved +2025-03-31 06:20:53 | [maml_trainer] epoch #118 | Time 144754.93 s +2025-03-31 06:20:53 | [maml_trainer] epoch #118 | EpochTime 1205.01 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.2783 +Average/AverageReturn -65.5746 +Average/Iteration 118 +Average/MaxReturn -9.14816 +Average/MinReturn -169.065 +Average/NumEpisodes 80 +Average/StdReturn 28.3323 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98255 +GaussianMLPPolicy/KLAfter 0.00163972 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.08152e-05 +GaussianMLPPolicy/LossBefore 7.82311e-09 +GaussianMLPPolicy/dLoss -2.08074e-05 +Iteration 118 +MetaTest/Average/AverageDiscountedReturn -67.1822 +MetaTest/Average/AverageReturn -67.1822 +MetaTest/Average/Iteration 118 +MetaTest/Average/MaxReturn -42.3109 +MetaTest/Average/MinReturn -147.828 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 29.8596 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -67.1822 +MetaTest/__unnamed_task__/AverageReturn -67.1822 +MetaTest/__unnamed_task__/Iteration 118 +MetaTest/__unnamed_task__/MaxReturn -42.3109 +MetaTest/__unnamed_task__/MinReturn -147.828 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 29.8596 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.808e+06 +__unnamed_task__/AverageDiscountedReturn -31.2783 +__unnamed_task__/AverageReturn -65.5746 +__unnamed_task__/Iteration 118 +__unnamed_task__/MaxReturn -9.14816 +__unnamed_task__/MinReturn -169.065 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 28.3323 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 06:36:37 | [maml_trainer] epoch #119 | Sampling for adapation and meta-testing... +2025-03-31 06:40:36 | [maml_trainer] epoch #119 | Finished meta-testing... +2025-03-31 06:40:36 | [maml_trainer] epoch #119 | Saving snapshot... +2025-03-31 06:40:57 | [maml_trainer] epoch #119 | Saved +2025-03-31 06:40:57 | [maml_trainer] epoch #119 | Time 145958.57 s +2025-03-31 06:40:57 | [maml_trainer] epoch #119 | EpochTime 1203.63 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.5451 +Average/AverageReturn -59.5985 +Average/Iteration 119 +Average/MaxReturn -43.8182 +Average/MinReturn -143.531 +Average/NumEpisodes 80 +Average/StdReturn 18.3729 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98174 +GaussianMLPPolicy/KLAfter 0.00139101 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.151e-06 +GaussianMLPPolicy/LossBefore 5.24521e-09 +GaussianMLPPolicy/dLoss 3.15624e-06 +Iteration 119 +MetaTest/Average/AverageDiscountedReturn -69.2156 +MetaTest/Average/AverageReturn -69.2156 +MetaTest/Average/Iteration 119 +MetaTest/Average/MaxReturn -44.6082 +MetaTest/Average/MinReturn -163.069 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 33.5967 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -69.2156 +MetaTest/__unnamed_task__/AverageReturn -69.2156 +MetaTest/__unnamed_task__/Iteration 119 +MetaTest/__unnamed_task__/MaxReturn -44.6082 +MetaTest/__unnamed_task__/MinReturn -163.069 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 33.5967 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.84e+06 +__unnamed_task__/AverageDiscountedReturn -30.5451 +__unnamed_task__/AverageReturn -59.5985 +__unnamed_task__/Iteration 119 +__unnamed_task__/MaxReturn -43.8182 +__unnamed_task__/MinReturn -143.531 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.3729 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 06:57:02 | [maml_trainer] epoch #120 | Sampling for adapation and meta-testing... +2025-03-31 07:01:00 | [maml_trainer] epoch #120 | Finished meta-testing... +2025-03-31 07:01:00 | [maml_trainer] epoch #120 | Saving snapshot... +2025-03-31 07:01:20 | [maml_trainer] epoch #120 | Saved +2025-03-31 07:01:20 | [maml_trainer] epoch #120 | Time 147181.67 s +2025-03-31 07:01:20 | [maml_trainer] epoch #120 | EpochTime 1223.10 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.3201 +Average/AverageReturn -60.0864 +Average/Iteration 120 +Average/MaxReturn 220.486 +Average/MinReturn -166.273 +Average/NumEpisodes 80 +Average/StdReturn 41.6817 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98166 +GaussianMLPPolicy/KLAfter 0.00180094 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.00246e-05 +GaussianMLPPolicy/LossBefore 1.06543e-09 +GaussianMLPPolicy/dLoss 1.00257e-05 +Iteration 120 +MetaTest/Average/AverageDiscountedReturn -80.3433 +MetaTest/Average/AverageReturn -80.3433 +MetaTest/Average/Iteration 120 +MetaTest/Average/MaxReturn -43.5447 +MetaTest/Average/MinReturn -159.158 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 40.0463 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -80.3433 +MetaTest/__unnamed_task__/AverageReturn -80.3433 +MetaTest/__unnamed_task__/Iteration 120 +MetaTest/__unnamed_task__/MaxReturn -43.5447 +MetaTest/__unnamed_task__/MinReturn -159.158 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 40.0463 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.872e+06 +__unnamed_task__/AverageDiscountedReturn -30.3201 +__unnamed_task__/AverageReturn -60.0864 +__unnamed_task__/Iteration 120 +__unnamed_task__/MaxReturn 220.486 +__unnamed_task__/MinReturn -166.273 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 41.6817 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 07:17:01 | [maml_trainer] epoch #121 | Sampling for adapation and meta-testing... +2025-03-31 07:20:53 | [maml_trainer] epoch #121 | Finished meta-testing... +2025-03-31 07:20:53 | [maml_trainer] epoch #121 | Saving snapshot... +2025-03-31 07:21:13 | [maml_trainer] epoch #121 | Saved +2025-03-31 07:21:13 | [maml_trainer] epoch #121 | Time 148374.95 s +2025-03-31 07:21:13 | [maml_trainer] epoch #121 | EpochTime 1193.28 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.7503 +Average/AverageReturn -62.0271 +Average/Iteration 121 +Average/MaxReturn 14.9394 +Average/MinReturn -152.324 +Average/NumEpisodes 80 +Average/StdReturn 25.5698 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98162 +GaussianMLPPolicy/KLAfter 0.00118199 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.30071e-05 +GaussianMLPPolicy/LossBefore 4.96209e-09 +GaussianMLPPolicy/dLoss 5.30121e-05 +Iteration 121 +MetaTest/Average/AverageDiscountedReturn -79.4441 +MetaTest/Average/AverageReturn -79.4441 +MetaTest/Average/Iteration 121 +MetaTest/Average/MaxReturn -43.5404 +MetaTest/Average/MinReturn -163.342 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 46.2436 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -79.4441 +MetaTest/__unnamed_task__/AverageReturn -79.4441 +MetaTest/__unnamed_task__/Iteration 121 +MetaTest/__unnamed_task__/MaxReturn -43.5404 +MetaTest/__unnamed_task__/MinReturn -163.342 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 46.2436 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.904e+06 +__unnamed_task__/AverageDiscountedReturn -30.7503 +__unnamed_task__/AverageReturn -62.0271 +__unnamed_task__/Iteration 121 +__unnamed_task__/MaxReturn 14.9394 +__unnamed_task__/MinReturn -152.324 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.5698 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 07:36:54 | [maml_trainer] epoch #122 | Sampling for adapation and meta-testing... +2025-03-31 07:40:50 | [maml_trainer] epoch #122 | Finished meta-testing... +2025-03-31 07:40:50 | [maml_trainer] epoch #122 | Saving snapshot... +2025-03-31 07:41:10 | [maml_trainer] epoch #122 | Saved +2025-03-31 07:41:10 | [maml_trainer] epoch #122 | Time 149571.02 s +2025-03-31 07:41:10 | [maml_trainer] epoch #122 | EpochTime 1196.07 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.7524 +Average/AverageReturn -68.5954 +Average/Iteration 122 +Average/MaxReturn -42.3746 +Average/MinReturn -167.514 +Average/NumEpisodes 80 +Average/StdReturn 31.5689 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98069 +GaussianMLPPolicy/KLAfter 0.0015793 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.36951e-05 +GaussianMLPPolicy/LossBefore -6.85453e-09 +GaussianMLPPolicy/dLoss 2.36883e-05 +Iteration 122 +MetaTest/Average/AverageDiscountedReturn -56.7805 +MetaTest/Average/AverageReturn -56.7805 +MetaTest/Average/Iteration 122 +MetaTest/Average/MaxReturn -41.9141 +MetaTest/Average/MinReturn -73.7327 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.83677 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.7805 +MetaTest/__unnamed_task__/AverageReturn -56.7805 +MetaTest/__unnamed_task__/Iteration 122 +MetaTest/__unnamed_task__/MaxReturn -41.9141 +MetaTest/__unnamed_task__/MinReturn -73.7327 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.83677 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.936e+06 +__unnamed_task__/AverageDiscountedReturn -31.7524 +__unnamed_task__/AverageReturn -68.5954 +__unnamed_task__/Iteration 122 +__unnamed_task__/MaxReturn -42.3746 +__unnamed_task__/MinReturn -167.514 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 31.5689 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 07:57:07 | [maml_trainer] epoch #123 | Sampling for adapation and meta-testing... +2025-03-31 08:01:04 | [maml_trainer] epoch #123 | Finished meta-testing... +2025-03-31 08:01:04 | [maml_trainer] epoch #123 | Saving snapshot... +2025-03-31 08:01:25 | [maml_trainer] epoch #123 | Saved +2025-03-31 08:01:25 | [maml_trainer] epoch #123 | Time 150786.39 s +2025-03-31 08:01:25 | [maml_trainer] epoch #123 | EpochTime 1215.37 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.7952 +Average/AverageReturn -61.9762 +Average/Iteration 123 +Average/MaxReturn -42.2467 +Average/MinReturn -174.447 +Average/NumEpisodes 80 +Average/StdReturn 22.7125 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97981 +GaussianMLPPolicy/KLAfter 0.00134545 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.18604e-05 +GaussianMLPPolicy/LossBefore 5.2005e-09 +GaussianMLPPolicy/dLoss 3.18656e-05 +Iteration 123 +MetaTest/Average/AverageDiscountedReturn -63.233 +MetaTest/Average/AverageReturn -63.233 +MetaTest/Average/Iteration 123 +MetaTest/Average/MaxReturn -48.0512 +MetaTest/Average/MinReturn -138.286 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.5712 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.233 +MetaTest/__unnamed_task__/AverageReturn -63.233 +MetaTest/__unnamed_task__/Iteration 123 +MetaTest/__unnamed_task__/MaxReturn -48.0512 +MetaTest/__unnamed_task__/MinReturn -138.286 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.5712 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.968e+06 +__unnamed_task__/AverageDiscountedReturn -30.7952 +__unnamed_task__/AverageReturn -61.9762 +__unnamed_task__/Iteration 123 +__unnamed_task__/MaxReturn -42.2467 +__unnamed_task__/MinReturn -174.447 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.7125 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 08:17:01 | [maml_trainer] epoch #124 | Sampling for adapation and meta-testing... +2025-03-31 08:20:57 | [maml_trainer] epoch #124 | Finished meta-testing... +2025-03-31 08:20:57 | [maml_trainer] epoch #124 | Saving snapshot... +2025-03-31 08:21:18 | [maml_trainer] epoch #124 | Saved +2025-03-31 08:21:18 | [maml_trainer] epoch #124 | Time 151979.17 s +2025-03-31 08:21:18 | [maml_trainer] epoch #124 | EpochTime 1192.78 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.0511 +Average/AverageReturn -61.9413 +Average/Iteration 124 +Average/MaxReturn -45.5598 +Average/MinReturn -144.409 +Average/NumEpisodes 80 +Average/StdReturn 20.9883 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97966 +GaussianMLPPolicy/KLAfter 0.00108865 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.22206e-05 +GaussianMLPPolicy/LossBefore 3.57628e-09 +GaussianMLPPolicy/dLoss 9.22241e-05 +Iteration 124 +MetaTest/Average/AverageDiscountedReturn -54.9094 +MetaTest/Average/AverageReturn -54.9094 +MetaTest/Average/Iteration 124 +MetaTest/Average/MaxReturn -42.3374 +MetaTest/Average/MinReturn -97.7727 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.3829 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.9094 +MetaTest/__unnamed_task__/AverageReturn -54.9094 +MetaTest/__unnamed_task__/Iteration 124 +MetaTest/__unnamed_task__/MaxReturn -42.3374 +MetaTest/__unnamed_task__/MinReturn -97.7727 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.3829 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4e+06 +__unnamed_task__/AverageDiscountedReturn -31.0511 +__unnamed_task__/AverageReturn -61.9413 +__unnamed_task__/Iteration 124 +__unnamed_task__/MaxReturn -45.5598 +__unnamed_task__/MinReturn -144.409 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.9883 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 08:36:58 | [maml_trainer] epoch #125 | Sampling for adapation and meta-testing... +2025-03-31 08:40:50 | [maml_trainer] epoch #125 | Finished meta-testing... +2025-03-31 08:40:50 | [maml_trainer] epoch #125 | Saving snapshot... +2025-03-31 08:41:11 | [maml_trainer] epoch #125 | Saved +2025-03-31 08:41:11 | [maml_trainer] epoch #125 | Time 153172.23 s +2025-03-31 08:41:11 | [maml_trainer] epoch #125 | EpochTime 1193.05 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.6731 +Average/AverageReturn -53.3879 +Average/Iteration 125 +Average/MaxReturn 199.308 +Average/MinReturn -104.649 +Average/NumEpisodes 80 +Average/StdReturn 30.7855 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9792 +GaussianMLPPolicy/KLAfter 0.000983339 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.33164e-05 +GaussianMLPPolicy/LossBefore 8.41916e-09 +GaussianMLPPolicy/dLoss -8.3308e-05 +Iteration 125 +MetaTest/Average/AverageDiscountedReturn -54.0034 +MetaTest/Average/AverageReturn -54.0034 +MetaTest/Average/Iteration 125 +MetaTest/Average/MaxReturn -44.415 +MetaTest/Average/MinReturn -66.1736 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.96843 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.0034 +MetaTest/__unnamed_task__/AverageReturn -54.0034 +MetaTest/__unnamed_task__/Iteration 125 +MetaTest/__unnamed_task__/MaxReturn -44.415 +MetaTest/__unnamed_task__/MinReturn -66.1736 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.96843 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.032e+06 +__unnamed_task__/AverageDiscountedReturn -29.6731 +__unnamed_task__/AverageReturn -53.3879 +__unnamed_task__/Iteration 125 +__unnamed_task__/MaxReturn 199.308 +__unnamed_task__/MinReturn -104.649 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 30.7855 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 08:56:55 | [maml_trainer] epoch #126 | Sampling for adapation and meta-testing... +2025-03-31 09:00:48 | [maml_trainer] epoch #126 | Finished meta-testing... +2025-03-31 09:00:48 | [maml_trainer] epoch #126 | Saving snapshot... +2025-03-31 09:01:08 | [maml_trainer] epoch #126 | Saved +2025-03-31 09:01:08 | [maml_trainer] epoch #126 | Time 154369.07 s +2025-03-31 09:01:08 | [maml_trainer] epoch #126 | EpochTime 1196.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.3521 +Average/AverageReturn -58.0427 +Average/Iteration 126 +Average/MaxReturn -42.6793 +Average/MinReturn -112.12 +Average/NumEpisodes 80 +Average/StdReturn 13.2248 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97904 +GaussianMLPPolicy/KLAfter 0.00115785 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.74439e-05 +GaussianMLPPolicy/LossBefore -2.83122e-09 +GaussianMLPPolicy/dLoss -5.74467e-05 +Iteration 126 +MetaTest/Average/AverageDiscountedReturn -55.2901 +MetaTest/Average/AverageReturn -55.2901 +MetaTest/Average/Iteration 126 +MetaTest/Average/MaxReturn -44.6718 +MetaTest/Average/MinReturn -71.1036 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.02065 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.2901 +MetaTest/__unnamed_task__/AverageReturn -55.2901 +MetaTest/__unnamed_task__/Iteration 126 +MetaTest/__unnamed_task__/MaxReturn -44.6718 +MetaTest/__unnamed_task__/MinReturn -71.1036 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.02065 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.064e+06 +__unnamed_task__/AverageDiscountedReturn -30.3521 +__unnamed_task__/AverageReturn -58.0427 +__unnamed_task__/Iteration 126 +__unnamed_task__/MaxReturn -42.6793 +__unnamed_task__/MinReturn -112.12 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2248 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 09:16:41 | [maml_trainer] epoch #127 | Sampling for adapation and meta-testing... +2025-03-31 09:20:37 | [maml_trainer] epoch #127 | Finished meta-testing... +2025-03-31 09:20:37 | [maml_trainer] epoch #127 | Saving snapshot... +2025-03-31 09:20:58 | [maml_trainer] epoch #127 | Saved +2025-03-31 09:20:58 | [maml_trainer] epoch #127 | Time 155559.15 s +2025-03-31 09:20:58 | [maml_trainer] epoch #127 | EpochTime 1190.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.4531 +Average/AverageReturn -56.5341 +Average/Iteration 127 +Average/MaxReturn -41.8727 +Average/MinReturn -110.381 +Average/NumEpisodes 80 +Average/StdReturn 10.0282 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97844 +GaussianMLPPolicy/KLAfter 0.00126258 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.13294e-06 +GaussianMLPPolicy/LossBefore 6.88434e-09 +GaussianMLPPolicy/dLoss 8.13982e-06 +Iteration 127 +MetaTest/Average/AverageDiscountedReturn -56.6858 +MetaTest/Average/AverageReturn -56.6858 +MetaTest/Average/Iteration 127 +MetaTest/Average/MaxReturn -46.1969 +MetaTest/Average/MinReturn -72.8412 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.80377 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.6858 +MetaTest/__unnamed_task__/AverageReturn -56.6858 +MetaTest/__unnamed_task__/Iteration 127 +MetaTest/__unnamed_task__/MaxReturn -46.1969 +MetaTest/__unnamed_task__/MinReturn -72.8412 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.80377 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.096e+06 +__unnamed_task__/AverageDiscountedReturn -30.4531 +__unnamed_task__/AverageReturn -56.5341 +__unnamed_task__/Iteration 127 +__unnamed_task__/MaxReturn -41.8727 +__unnamed_task__/MinReturn -110.381 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.0282 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 09:36:38 | [maml_trainer] epoch #128 | Sampling for adapation and meta-testing... +2025-03-31 09:40:30 | [maml_trainer] epoch #128 | Finished meta-testing... +2025-03-31 09:40:30 | [maml_trainer] epoch #128 | Saving snapshot... +2025-03-31 09:40:51 | [maml_trainer] epoch #128 | Saved +2025-03-31 09:40:51 | [maml_trainer] epoch #128 | Time 156752.10 s +2025-03-31 09:40:51 | [maml_trainer] epoch #128 | EpochTime 1192.95 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.5382 +Average/AverageReturn -56.3445 +Average/Iteration 128 +Average/MaxReturn -41.5494 +Average/MinReturn -82.7473 +Average/NumEpisodes 80 +Average/StdReturn 9.40918 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97974 +GaussianMLPPolicy/KLAfter 0.00238186 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.58707e-05 +GaussianMLPPolicy/LossBefore 2.17557e-09 +GaussianMLPPolicy/dLoss 1.58729e-05 +Iteration 128 +MetaTest/Average/AverageDiscountedReturn -55.1845 +MetaTest/Average/AverageReturn -55.1845 +MetaTest/Average/Iteration 128 +MetaTest/Average/MaxReturn -46.0313 +MetaTest/Average/MinReturn -75.9331 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.18228 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.1845 +MetaTest/__unnamed_task__/AverageReturn -55.1845 +MetaTest/__unnamed_task__/Iteration 128 +MetaTest/__unnamed_task__/MaxReturn -46.0313 +MetaTest/__unnamed_task__/MinReturn -75.9331 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.18228 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.128e+06 +__unnamed_task__/AverageDiscountedReturn -30.5382 +__unnamed_task__/AverageReturn -56.3445 +__unnamed_task__/Iteration 128 +__unnamed_task__/MaxReturn -41.5494 +__unnamed_task__/MinReturn -82.7473 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.40918 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 09:56:24 | [maml_trainer] epoch #129 | Sampling for adapation and meta-testing... +2025-03-31 10:00:19 | [maml_trainer] epoch #129 | Finished meta-testing... +2025-03-31 10:00:19 | [maml_trainer] epoch #129 | Saving snapshot... +2025-03-31 10:00:40 | [maml_trainer] epoch #129 | Saved +2025-03-31 10:00:40 | [maml_trainer] epoch #129 | Time 157941.58 s +2025-03-31 10:00:40 | [maml_trainer] epoch #129 | EpochTime 1189.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.2141 +Average/AverageReturn -55.5359 +Average/Iteration 129 +Average/MaxReturn -43.7317 +Average/MinReturn -78.6447 +Average/NumEpisodes 80 +Average/StdReturn 7.22188 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98115 +GaussianMLPPolicy/KLAfter 0.00236486 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.78683e-05 +GaussianMLPPolicy/LossBefore 1.07288e-08 +GaussianMLPPolicy/dLoss 2.78791e-05 +Iteration 129 +MetaTest/Average/AverageDiscountedReturn -56.9728 +MetaTest/Average/AverageReturn -56.9728 +MetaTest/Average/Iteration 129 +MetaTest/Average/MaxReturn -46.8602 +MetaTest/Average/MinReturn -71.7306 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.93662 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.9728 +MetaTest/__unnamed_task__/AverageReturn -56.9728 +MetaTest/__unnamed_task__/Iteration 129 +MetaTest/__unnamed_task__/MaxReturn -46.8602 +MetaTest/__unnamed_task__/MinReturn -71.7306 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.93662 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.16e+06 +__unnamed_task__/AverageDiscountedReturn -30.2141 +__unnamed_task__/AverageReturn -55.5359 +__unnamed_task__/Iteration 129 +__unnamed_task__/MaxReturn -43.7317 +__unnamed_task__/MinReturn -78.6447 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.22188 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 10:16:17 | [maml_trainer] epoch #130 | Sampling for adapation and meta-testing... +2025-03-31 10:20:11 | [maml_trainer] epoch #130 | Finished meta-testing... +2025-03-31 10:20:11 | [maml_trainer] epoch #130 | Saving snapshot... +2025-03-31 10:20:31 | [maml_trainer] epoch #130 | Saved +2025-03-31 10:20:31 | [maml_trainer] epoch #130 | Time 159132.49 s +2025-03-31 10:20:31 | [maml_trainer] epoch #130 | EpochTime 1190.91 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.8996 +Average/AverageReturn -57.1669 +Average/Iteration 130 +Average/MaxReturn -45.273 +Average/MinReturn -79.4622 +Average/NumEpisodes 80 +Average/StdReturn 8.66978 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98382 +GaussianMLPPolicy/KLAfter 0.00258327 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.80748e-05 +GaussianMLPPolicy/LossBefore 1.34706e-08 +GaussianMLPPolicy/dLoss -3.80614e-05 +Iteration 130 +MetaTest/Average/AverageDiscountedReturn -56.3021 +MetaTest/Average/AverageReturn -56.3021 +MetaTest/Average/Iteration 130 +MetaTest/Average/MaxReturn -44.5081 +MetaTest/Average/MinReturn -75.9675 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.20776 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.3021 +MetaTest/__unnamed_task__/AverageReturn -56.3021 +MetaTest/__unnamed_task__/Iteration 130 +MetaTest/__unnamed_task__/MaxReturn -44.5081 +MetaTest/__unnamed_task__/MinReturn -75.9675 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.20776 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.192e+06 +__unnamed_task__/AverageDiscountedReturn -30.8996 +__unnamed_task__/AverageReturn -57.1669 +__unnamed_task__/Iteration 130 +__unnamed_task__/MaxReturn -45.273 +__unnamed_task__/MinReturn -79.4622 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.66978 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 10:36:19 | [maml_trainer] epoch #131 | Sampling for adapation and meta-testing... +2025-03-31 10:40:13 | [maml_trainer] epoch #131 | Finished meta-testing... +2025-03-31 10:40:13 | [maml_trainer] epoch #131 | Saving snapshot... +2025-03-31 10:40:33 | [maml_trainer] epoch #131 | Saved +2025-03-31 10:40:33 | [maml_trainer] epoch #131 | Time 160334.45 s +2025-03-31 10:40:33 | [maml_trainer] epoch #131 | EpochTime 1201.97 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -31.0427 +Average/AverageReturn -57.7813 +Average/Iteration 131 +Average/MaxReturn -44.709 +Average/MinReturn -75.0056 +Average/NumEpisodes 80 +Average/StdReturn 7.69824 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98659 +GaussianMLPPolicy/KLAfter 0.00240422 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.36844e-05 +GaussianMLPPolicy/LossBefore 1.48416e-08 +GaussianMLPPolicy/dLoss 4.36993e-05 +Iteration 131 +MetaTest/Average/AverageDiscountedReturn -57.564 +MetaTest/Average/AverageReturn -57.564 +MetaTest/Average/Iteration 131 +MetaTest/Average/MaxReturn -49.8674 +MetaTest/Average/MinReturn -76.889 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.1348 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.564 +MetaTest/__unnamed_task__/AverageReturn -57.564 +MetaTest/__unnamed_task__/Iteration 131 +MetaTest/__unnamed_task__/MaxReturn -49.8674 +MetaTest/__unnamed_task__/MinReturn -76.889 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.1348 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.224e+06 +__unnamed_task__/AverageDiscountedReturn -31.0427 +__unnamed_task__/AverageReturn -57.7813 +__unnamed_task__/Iteration 131 +__unnamed_task__/MaxReturn -44.709 +__unnamed_task__/MinReturn -75.0056 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.69824 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 10:57:08 | [maml_trainer] epoch #132 | Sampling for adapation and meta-testing... +2025-03-31 11:01:11 | [maml_trainer] epoch #132 | Finished meta-testing... +2025-03-31 11:01:11 | [maml_trainer] epoch #132 | Saving snapshot... +2025-03-31 11:01:31 | [maml_trainer] epoch #132 | Saved +2025-03-31 11:01:31 | [maml_trainer] epoch #132 | Time 161592.99 s +2025-03-31 11:01:31 | [maml_trainer] epoch #132 | EpochTime 1258.53 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.8012 +Average/AverageReturn -56.4995 +Average/Iteration 132 +Average/MaxReturn -44.2868 +Average/MinReturn -78.3547 +Average/NumEpisodes 80 +Average/StdReturn 6.20999 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98947 +GaussianMLPPolicy/KLAfter 0.00340467 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.04474e-05 +GaussianMLPPolicy/LossBefore -9.17912e-09 +GaussianMLPPolicy/dLoss 8.04382e-05 +Iteration 132 +MetaTest/Average/AverageDiscountedReturn -56.212 +MetaTest/Average/AverageReturn -56.212 +MetaTest/Average/Iteration 132 +MetaTest/Average/MaxReturn -46.7243 +MetaTest/Average/MinReturn -63.5036 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.23357 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.212 +MetaTest/__unnamed_task__/AverageReturn -56.212 +MetaTest/__unnamed_task__/Iteration 132 +MetaTest/__unnamed_task__/MaxReturn -46.7243 +MetaTest/__unnamed_task__/MinReturn -63.5036 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.23357 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.256e+06 +__unnamed_task__/AverageDiscountedReturn -30.8012 +__unnamed_task__/AverageReturn -56.4995 +__unnamed_task__/Iteration 132 +__unnamed_task__/MaxReturn -44.2868 +__unnamed_task__/MinReturn -78.3547 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.20999 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 11:17:44 | [maml_trainer] epoch #133 | Sampling for adapation and meta-testing... +2025-03-31 11:21:53 | [maml_trainer] epoch #133 | Finished meta-testing... +2025-03-31 11:21:53 | [maml_trainer] epoch #133 | Saving snapshot... +2025-03-31 11:22:14 | [maml_trainer] epoch #133 | Saved +2025-03-31 11:22:14 | [maml_trainer] epoch #133 | Time 162835.88 s +2025-03-31 11:22:14 | [maml_trainer] epoch #133 | EpochTime 1242.88 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.3461 +Average/AverageReturn -55.1036 +Average/Iteration 133 +Average/MaxReturn -45.7137 +Average/MinReturn -69.8459 +Average/NumEpisodes 80 +Average/StdReturn 5.58086 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99185 +GaussianMLPPolicy/KLAfter 0.0026655 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000138067 +GaussianMLPPolicy/LossBefore -7.09295e-09 +GaussianMLPPolicy/dLoss 0.00013806 +Iteration 133 +MetaTest/Average/AverageDiscountedReturn -55.4968 +MetaTest/Average/AverageReturn -55.4968 +MetaTest/Average/Iteration 133 +MetaTest/Average/MaxReturn -48.3581 +MetaTest/Average/MinReturn -63.969 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.25868 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.4968 +MetaTest/__unnamed_task__/AverageReturn -55.4968 +MetaTest/__unnamed_task__/Iteration 133 +MetaTest/__unnamed_task__/MaxReturn -48.3581 +MetaTest/__unnamed_task__/MinReturn -63.969 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.25868 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.288e+06 +__unnamed_task__/AverageDiscountedReturn -30.3461 +__unnamed_task__/AverageReturn -55.1036 +__unnamed_task__/Iteration 133 +__unnamed_task__/MaxReturn -45.7137 +__unnamed_task__/MinReturn -69.8459 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.58086 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 11:38:39 | [maml_trainer] epoch #134 | Sampling for adapation and meta-testing... +2025-03-31 11:42:44 | [maml_trainer] epoch #134 | Finished meta-testing... +2025-03-31 11:42:44 | [maml_trainer] epoch #134 | Saving snapshot... +2025-03-31 11:43:05 | [maml_trainer] epoch #134 | Saved +2025-03-31 11:43:05 | [maml_trainer] epoch #134 | Time 164086.24 s +2025-03-31 11:43:05 | [maml_trainer] epoch #134 | EpochTime 1250.36 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.3853 +Average/AverageReturn -55.5234 +Average/Iteration 134 +Average/MaxReturn -44.8553 +Average/MinReturn -76.7504 +Average/NumEpisodes 80 +Average/StdReturn 5.86414 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99387 +GaussianMLPPolicy/KLAfter 0.00200854 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.09762e-08 +GaussianMLPPolicy/LossBefore 5.96046e-09 +GaussianMLPPolicy/dLoss -5.50157e-08 +Iteration 134 +MetaTest/Average/AverageDiscountedReturn -55.5751 +MetaTest/Average/AverageReturn -55.5751 +MetaTest/Average/Iteration 134 +MetaTest/Average/MaxReturn -43.9624 +MetaTest/Average/MinReturn -69.8897 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.17789 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.5751 +MetaTest/__unnamed_task__/AverageReturn -55.5751 +MetaTest/__unnamed_task__/Iteration 134 +MetaTest/__unnamed_task__/MaxReturn -43.9624 +MetaTest/__unnamed_task__/MinReturn -69.8897 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.17789 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.32e+06 +__unnamed_task__/AverageDiscountedReturn -30.3853 +__unnamed_task__/AverageReturn -55.5234 +__unnamed_task__/Iteration 134 +__unnamed_task__/MaxReturn -44.8553 +__unnamed_task__/MinReturn -76.7504 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.86414 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 11:59:35 | [maml_trainer] epoch #135 | Sampling for adapation and meta-testing... +2025-03-31 12:03:46 | [maml_trainer] epoch #135 | Finished meta-testing... +2025-03-31 12:03:46 | [maml_trainer] epoch #135 | Saving snapshot... +2025-03-31 12:04:07 | [maml_trainer] epoch #135 | Saved +2025-03-31 12:04:07 | [maml_trainer] epoch #135 | Time 165348.25 s +2025-03-31 12:04:07 | [maml_trainer] epoch #135 | EpochTime 1262.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.6629 +Average/AverageReturn -56.1984 +Average/Iteration 135 +Average/MaxReturn -45.9453 +Average/MinReturn -74.0996 +Average/NumEpisodes 80 +Average/StdReturn 6.0864 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99689 +GaussianMLPPolicy/KLAfter 0.00132602 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.51695e-05 +GaussianMLPPolicy/LossBefore 2.98023e-10 +GaussianMLPPolicy/dLoss 5.51698e-05 +Iteration 135 +MetaTest/Average/AverageDiscountedReturn -55.5416 +MetaTest/Average/AverageReturn -55.5416 +MetaTest/Average/Iteration 135 +MetaTest/Average/MaxReturn -47.4994 +MetaTest/Average/MinReturn -63.8326 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.82702 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.5416 +MetaTest/__unnamed_task__/AverageReturn -55.5416 +MetaTest/__unnamed_task__/Iteration 135 +MetaTest/__unnamed_task__/MaxReturn -47.4994 +MetaTest/__unnamed_task__/MinReturn -63.8326 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.82702 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.352e+06 +__unnamed_task__/AverageDiscountedReturn -30.6629 +__unnamed_task__/AverageReturn -56.1984 +__unnamed_task__/Iteration 135 +__unnamed_task__/MaxReturn -45.9453 +__unnamed_task__/MinReturn -74.0996 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.0864 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 12:20:48 | [maml_trainer] epoch #136 | Sampling for adapation and meta-testing... +2025-03-31 12:25:00 | [maml_trainer] epoch #136 | Finished meta-testing... +2025-03-31 12:25:00 | [maml_trainer] epoch #136 | Saving snapshot... +2025-03-31 12:25:21 | [maml_trainer] epoch #136 | Saved +2025-03-31 12:25:21 | [maml_trainer] epoch #136 | Time 166622.15 s +2025-03-31 12:25:21 | [maml_trainer] epoch #136 | EpochTime 1273.90 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.131 +Average/AverageReturn -54.5284 +Average/Iteration 136 +Average/MaxReturn -46.1178 +Average/MinReturn -65.7533 +Average/NumEpisodes 80 +Average/StdReturn 4.8794 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0009 +GaussianMLPPolicy/KLAfter 0.00184021 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.96563e-06 +GaussianMLPPolicy/LossBefore 1.18613e-08 +GaussianMLPPolicy/dLoss -2.95377e-06 +Iteration 136 +MetaTest/Average/AverageDiscountedReturn -55.2044 +MetaTest/Average/AverageReturn -55.2044 +MetaTest/Average/Iteration 136 +MetaTest/Average/MaxReturn -45.9629 +MetaTest/Average/MinReturn -68.8696 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.84897 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.2044 +MetaTest/__unnamed_task__/AverageReturn -55.2044 +MetaTest/__unnamed_task__/Iteration 136 +MetaTest/__unnamed_task__/MaxReturn -45.9629 +MetaTest/__unnamed_task__/MinReturn -68.8696 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.84897 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.384e+06 +__unnamed_task__/AverageDiscountedReturn -30.131 +__unnamed_task__/AverageReturn -54.5284 +__unnamed_task__/Iteration 136 +__unnamed_task__/MaxReturn -46.1178 +__unnamed_task__/MinReturn -65.7533 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.8794 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 12:42:13 | [maml_trainer] epoch #137 | Sampling for adapation and meta-testing... +2025-03-31 12:46:26 | [maml_trainer] epoch #137 | Finished meta-testing... +2025-03-31 12:46:26 | [maml_trainer] epoch #137 | Saving snapshot... +2025-03-31 12:46:48 | [maml_trainer] epoch #137 | Saved +2025-03-31 12:46:48 | [maml_trainer] epoch #137 | Time 167909.83 s +2025-03-31 12:46:48 | [maml_trainer] epoch #137 | EpochTime 1287.67 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.584 +Average/AverageReturn -53.732 +Average/Iteration 137 +Average/MaxReturn -44.2093 +Average/MinReturn -74.9944 +Average/NumEpisodes 80 +Average/StdReturn 5.66253 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0027 +GaussianMLPPolicy/KLAfter 0.00424394 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.0133e-05 +GaussianMLPPolicy/LossBefore -1.32918e-08 +GaussianMLPPolicy/dLoss 8.01197e-05 +Iteration 137 +MetaTest/Average/AverageDiscountedReturn -52.5807 +MetaTest/Average/AverageReturn -52.5807 +MetaTest/Average/Iteration 137 +MetaTest/Average/MaxReturn -41.9142 +MetaTest/Average/MinReturn -66.9019 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.52918 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.5807 +MetaTest/__unnamed_task__/AverageReturn -52.5807 +MetaTest/__unnamed_task__/Iteration 137 +MetaTest/__unnamed_task__/MaxReturn -41.9142 +MetaTest/__unnamed_task__/MinReturn -66.9019 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.52918 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.416e+06 +__unnamed_task__/AverageDiscountedReturn -29.584 +__unnamed_task__/AverageReturn -53.732 +__unnamed_task__/Iteration 137 +__unnamed_task__/MaxReturn -44.2093 +__unnamed_task__/MinReturn -74.9944 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.66253 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 13:03:45 | [maml_trainer] epoch #138 | Sampling for adapation and meta-testing... +2025-03-31 13:08:00 | [maml_trainer] epoch #138 | Finished meta-testing... +2025-03-31 13:08:00 | [maml_trainer] epoch #138 | Saving snapshot... +2025-03-31 13:08:22 | [maml_trainer] epoch #138 | Saved +2025-03-31 13:08:22 | [maml_trainer] epoch #138 | Time 169203.29 s +2025-03-31 13:08:22 | [maml_trainer] epoch #138 | EpochTime 1293.46 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.8211 +Average/AverageReturn -53.7042 +Average/Iteration 138 +Average/MaxReturn -42.8562 +Average/MinReturn -68.4297 +Average/NumEpisodes 80 +Average/StdReturn 5.38064 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0036 +GaussianMLPPolicy/KLAfter 0.00379348 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.53909e-05 +GaussianMLPPolicy/LossBefore -2.02656e-09 +GaussianMLPPolicy/dLoss 9.53889e-05 +Iteration 138 +MetaTest/Average/AverageDiscountedReturn -53.506 +MetaTest/Average/AverageReturn -53.506 +MetaTest/Average/Iteration 138 +MetaTest/Average/MaxReturn -44.6166 +MetaTest/Average/MinReturn -63.1568 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.12775 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.506 +MetaTest/__unnamed_task__/AverageReturn -53.506 +MetaTest/__unnamed_task__/Iteration 138 +MetaTest/__unnamed_task__/MaxReturn -44.6166 +MetaTest/__unnamed_task__/MinReturn -63.1568 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.12775 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.448e+06 +__unnamed_task__/AverageDiscountedReturn -29.8211 +__unnamed_task__/AverageReturn -53.7042 +__unnamed_task__/Iteration 138 +__unnamed_task__/MaxReturn -42.8562 +__unnamed_task__/MinReturn -68.4297 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.38064 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 13:25:09 | [maml_trainer] epoch #139 | Sampling for adapation and meta-testing... +2025-03-31 13:29:24 | [maml_trainer] epoch #139 | Finished meta-testing... +2025-03-31 13:29:24 | [maml_trainer] epoch #139 | Saving snapshot... +2025-03-31 13:29:46 | [maml_trainer] epoch #139 | Saved +2025-03-31 13:29:46 | [maml_trainer] epoch #139 | Time 170487.38 s +2025-03-31 13:29:46 | [maml_trainer] epoch #139 | EpochTime 1284.09 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.8036 +Average/AverageReturn -54.0624 +Average/Iteration 139 +Average/MaxReturn -46.9613 +Average/MinReturn -71.8368 +Average/NumEpisodes 80 +Average/StdReturn 4.8546 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0042 +GaussianMLPPolicy/KLAfter 0.00258849 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.30501e-06 +GaussianMLPPolicy/LossBefore -1.44243e-08 +GaussianMLPPolicy/dLoss -6.31943e-06 +Iteration 139 +MetaTest/Average/AverageDiscountedReturn -54.8498 +MetaTest/Average/AverageReturn -54.8498 +MetaTest/Average/Iteration 139 +MetaTest/Average/MaxReturn -46.7028 +MetaTest/Average/MinReturn -62.2744 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.95894 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.8498 +MetaTest/__unnamed_task__/AverageReturn -54.8498 +MetaTest/__unnamed_task__/Iteration 139 +MetaTest/__unnamed_task__/MaxReturn -46.7028 +MetaTest/__unnamed_task__/MinReturn -62.2744 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.95894 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.48e+06 +__unnamed_task__/AverageDiscountedReturn -29.8036 +__unnamed_task__/AverageReturn -54.0624 +__unnamed_task__/Iteration 139 +__unnamed_task__/MaxReturn -46.9613 +__unnamed_task__/MinReturn -71.8368 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.8546 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 13:46:24 | [maml_trainer] epoch #140 | Sampling for adapation and meta-testing... +2025-03-31 13:50:31 | [maml_trainer] epoch #140 | Finished meta-testing... +2025-03-31 13:50:31 | [maml_trainer] epoch #140 | Saving snapshot... +2025-03-31 13:50:51 | [maml_trainer] epoch #140 | Saved +2025-03-31 13:50:51 | [maml_trainer] epoch #140 | Time 171752.39 s +2025-03-31 13:50:51 | [maml_trainer] epoch #140 | EpochTime 1265.01 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.7557 +Average/AverageReturn -54.8166 +Average/Iteration 140 +Average/MaxReturn -44.1742 +Average/MinReturn -153.152 +Average/NumEpisodes 80 +Average/StdReturn 12.3342 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0047 +GaussianMLPPolicy/KLAfter 0.00452673 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.17381e-05 +GaussianMLPPolicy/LossBefore -1.30832e-08 +GaussianMLPPolicy/dLoss 1.1725e-05 +Iteration 140 +MetaTest/Average/AverageDiscountedReturn -54.8738 +MetaTest/Average/AverageReturn -54.8738 +MetaTest/Average/Iteration 140 +MetaTest/Average/MaxReturn -46.6272 +MetaTest/Average/MinReturn -66.0891 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.81834 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.8738 +MetaTest/__unnamed_task__/AverageReturn -54.8738 +MetaTest/__unnamed_task__/Iteration 140 +MetaTest/__unnamed_task__/MaxReturn -46.6272 +MetaTest/__unnamed_task__/MinReturn -66.0891 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.81834 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.512e+06 +__unnamed_task__/AverageDiscountedReturn -29.7557 +__unnamed_task__/AverageReturn -54.8166 +__unnamed_task__/Iteration 140 +__unnamed_task__/MaxReturn -44.1742 +__unnamed_task__/MinReturn -153.152 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.3342 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 14:07:07 | [maml_trainer] epoch #141 | Sampling for adapation and meta-testing... +2025-03-31 14:11:03 | [maml_trainer] epoch #141 | Finished meta-testing... +2025-03-31 14:11:03 | [maml_trainer] epoch #141 | Saving snapshot... +2025-03-31 14:11:24 | [maml_trainer] epoch #141 | Saved +2025-03-31 14:11:24 | [maml_trainer] epoch #141 | Time 172985.59 s +2025-03-31 14:11:24 | [maml_trainer] epoch #141 | EpochTime 1233.19 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.0074 +Average/AverageReturn -55.9807 +Average/Iteration 141 +Average/MaxReturn -44.7899 +Average/MinReturn -121.363 +Average/NumEpisodes 80 +Average/StdReturn 11.4744 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.004 +GaussianMLPPolicy/KLAfter 0.00222979 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.32005e-05 +GaussianMLPPolicy/LossBefore -4.79817e-09 +GaussianMLPPolicy/dLoss -2.32053e-05 +Iteration 141 +MetaTest/Average/AverageDiscountedReturn -59.6494 +MetaTest/Average/AverageReturn -59.6494 +MetaTest/Average/Iteration 141 +MetaTest/Average/MaxReturn -45.6615 +MetaTest/Average/MinReturn -81.3912 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.8379 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.6494 +MetaTest/__unnamed_task__/AverageReturn -59.6494 +MetaTest/__unnamed_task__/Iteration 141 +MetaTest/__unnamed_task__/MaxReturn -45.6615 +MetaTest/__unnamed_task__/MinReturn -81.3912 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.8379 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.544e+06 +__unnamed_task__/AverageDiscountedReturn -30.0074 +__unnamed_task__/AverageReturn -55.9807 +__unnamed_task__/Iteration 141 +__unnamed_task__/MaxReturn -44.7899 +__unnamed_task__/MinReturn -121.363 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4744 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 14:27:51 | [maml_trainer] epoch #142 | Sampling for adapation and meta-testing... +2025-03-31 14:31:59 | [maml_trainer] epoch #142 | Finished meta-testing... +2025-03-31 14:31:59 | [maml_trainer] epoch #142 | Saving snapshot... +2025-03-31 14:32:21 | [maml_trainer] epoch #142 | Saved +2025-03-31 14:32:21 | [maml_trainer] epoch #142 | Time 174242.06 s +2025-03-31 14:32:21 | [maml_trainer] epoch #142 | EpochTime 1256.46 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.3288 +Average/AverageReturn -56.6117 +Average/Iteration 142 +Average/MaxReturn -43.3121 +Average/MinReturn -162.318 +Average/NumEpisodes 80 +Average/StdReturn 15.083 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0035 +GaussianMLPPolicy/KLAfter 0.00127895 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.48068e-05 +GaussianMLPPolicy/LossBefore 1.3113e-09 +GaussianMLPPolicy/dLoss 1.48081e-05 +Iteration 142 +MetaTest/Average/AverageDiscountedReturn -58.3207 +MetaTest/Average/AverageReturn -58.3207 +MetaTest/Average/Iteration 142 +MetaTest/Average/MaxReturn -47.2678 +MetaTest/Average/MinReturn -82.0205 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.6635 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.3207 +MetaTest/__unnamed_task__/AverageReturn -58.3207 +MetaTest/__unnamed_task__/Iteration 142 +MetaTest/__unnamed_task__/MaxReturn -47.2678 +MetaTest/__unnamed_task__/MinReturn -82.0205 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.6635 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.576e+06 +__unnamed_task__/AverageDiscountedReturn -30.3288 +__unnamed_task__/AverageReturn -56.6117 +__unnamed_task__/Iteration 142 +__unnamed_task__/MaxReturn -43.3121 +__unnamed_task__/MinReturn -162.318 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.083 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 14:48:58 | [maml_trainer] epoch #143 | Sampling for adapation and meta-testing... +2025-03-31 14:53:08 | [maml_trainer] epoch #143 | Finished meta-testing... +2025-03-31 14:53:08 | [maml_trainer] epoch #143 | Saving snapshot... +2025-03-31 14:53:30 | [maml_trainer] epoch #143 | Saved +2025-03-31 14:53:30 | [maml_trainer] epoch #143 | Time 175511.61 s +2025-03-31 14:53:30 | [maml_trainer] epoch #143 | EpochTime 1269.55 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.1595 +Average/AverageReturn -60.1134 +Average/Iteration 143 +Average/MaxReturn -46.7118 +Average/MinReturn -149.931 +Average/NumEpisodes 80 +Average/StdReturn 13.8988 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0029 +GaussianMLPPolicy/KLAfter 0.00187098 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.05787e-05 +GaussianMLPPolicy/LossBefore -8.37445e-09 +GaussianMLPPolicy/dLoss 4.05703e-05 +Iteration 143 +MetaTest/Average/AverageDiscountedReturn -55.3508 +MetaTest/Average/AverageReturn -55.3508 +MetaTest/Average/Iteration 143 +MetaTest/Average/MaxReturn -44.9229 +MetaTest/Average/MinReturn -68.5838 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.41303 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.3508 +MetaTest/__unnamed_task__/AverageReturn -55.3508 +MetaTest/__unnamed_task__/Iteration 143 +MetaTest/__unnamed_task__/MaxReturn -44.9229 +MetaTest/__unnamed_task__/MinReturn -68.5838 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.41303 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.608e+06 +__unnamed_task__/AverageDiscountedReturn -31.1595 +__unnamed_task__/AverageReturn -60.1134 +__unnamed_task__/Iteration 143 +__unnamed_task__/MaxReturn -46.7118 +__unnamed_task__/MinReturn -149.931 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8988 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 15:10:05 | [maml_trainer] epoch #144 | Sampling for adapation and meta-testing... +2025-03-31 15:14:11 | [maml_trainer] epoch #144 | Finished meta-testing... +2025-03-31 15:14:11 | [maml_trainer] epoch #144 | Saving snapshot... +2025-03-31 15:14:31 | [maml_trainer] epoch #144 | Saved +2025-03-31 15:14:31 | [maml_trainer] epoch #144 | Time 176772.70 s +2025-03-31 15:14:31 | [maml_trainer] epoch #144 | EpochTime 1261.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.1916 +Average/AverageReturn -56.1338 +Average/Iteration 144 +Average/MaxReturn -42.1121 +Average/MinReturn -116.882 +Average/NumEpisodes 80 +Average/StdReturn 11.7776 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0026 +GaussianMLPPolicy/KLAfter 0.00184954 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.0946e-05 +GaussianMLPPolicy/LossBefore 7.27177e-09 +GaussianMLPPolicy/dLoss -4.09387e-05 +Iteration 144 +MetaTest/Average/AverageDiscountedReturn -58.0884 +MetaTest/Average/AverageReturn -58.0884 +MetaTest/Average/Iteration 144 +MetaTest/Average/MaxReturn -45.114 +MetaTest/Average/MinReturn -83.829 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3012 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.0884 +MetaTest/__unnamed_task__/AverageReturn -58.0884 +MetaTest/__unnamed_task__/Iteration 144 +MetaTest/__unnamed_task__/MaxReturn -45.114 +MetaTest/__unnamed_task__/MinReturn -83.829 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3012 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.64e+06 +__unnamed_task__/AverageDiscountedReturn -30.1916 +__unnamed_task__/AverageReturn -56.1338 +__unnamed_task__/Iteration 144 +__unnamed_task__/MaxReturn -42.1121 +__unnamed_task__/MinReturn -116.882 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.7776 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 15:31:01 | [maml_trainer] epoch #145 | Sampling for adapation and meta-testing... +2025-03-31 15:35:01 | [maml_trainer] epoch #145 | Finished meta-testing... +2025-03-31 15:35:01 | [maml_trainer] epoch #145 | Saving snapshot... +2025-03-31 15:35:23 | [maml_trainer] epoch #145 | Saved +2025-03-31 15:35:23 | [maml_trainer] epoch #145 | Time 178024.81 s +2025-03-31 15:35:23 | [maml_trainer] epoch #145 | EpochTime 1252.11 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.7392 +Average/AverageReturn -59.2622 +Average/Iteration 145 +Average/MaxReturn -42.8037 +Average/MinReturn -168.463 +Average/NumEpisodes 80 +Average/StdReturn 18.7362 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0021 +GaussianMLPPolicy/KLAfter 0.00150201 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.18027e-05 +GaussianMLPPolicy/LossBefore -1.07288e-09 +GaussianMLPPolicy/dLoss -3.18038e-05 +Iteration 145 +MetaTest/Average/AverageDiscountedReturn -55.786 +MetaTest/Average/AverageReturn -55.786 +MetaTest/Average/Iteration 145 +MetaTest/Average/MaxReturn -46.3813 +MetaTest/Average/MinReturn -74.9396 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.51438 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.786 +MetaTest/__unnamed_task__/AverageReturn -55.786 +MetaTest/__unnamed_task__/Iteration 145 +MetaTest/__unnamed_task__/MaxReturn -46.3813 +MetaTest/__unnamed_task__/MinReturn -74.9396 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.51438 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.672e+06 +__unnamed_task__/AverageDiscountedReturn -30.7392 +__unnamed_task__/AverageReturn -59.2622 +__unnamed_task__/Iteration 145 +__unnamed_task__/MaxReturn -42.8037 +__unnamed_task__/MinReturn -168.463 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.7362 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 15:51:53 | [maml_trainer] epoch #146 | Sampling for adapation and meta-testing... +2025-03-31 15:56:02 | [maml_trainer] epoch #146 | Finished meta-testing... +2025-03-31 15:56:02 | [maml_trainer] epoch #146 | Saving snapshot... +2025-03-31 15:56:24 | [maml_trainer] epoch #146 | Saved +2025-03-31 15:56:24 | [maml_trainer] epoch #146 | Time 179285.49 s +2025-03-31 15:56:24 | [maml_trainer] epoch #146 | EpochTime 1260.68 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.3603 +Average/AverageReturn -57.5738 +Average/Iteration 146 +Average/MaxReturn -44.4484 +Average/MinReturn -136.304 +Average/NumEpisodes 80 +Average/StdReturn 13.0972 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0019 +GaussianMLPPolicy/KLAfter 0.00183678 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.67074e-05 +GaussianMLPPolicy/LossBefore 5.45382e-09 +GaussianMLPPolicy/dLoss -3.6702e-05 +Iteration 146 +MetaTest/Average/AverageDiscountedReturn -57.0871 +MetaTest/Average/AverageReturn -57.0871 +MetaTest/Average/Iteration 146 +MetaTest/Average/MaxReturn -47.6321 +MetaTest/Average/MinReturn -74.5345 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.31645 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.0871 +MetaTest/__unnamed_task__/AverageReturn -57.0871 +MetaTest/__unnamed_task__/Iteration 146 +MetaTest/__unnamed_task__/MaxReturn -47.6321 +MetaTest/__unnamed_task__/MinReturn -74.5345 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.31645 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.704e+06 +__unnamed_task__/AverageDiscountedReturn -30.3603 +__unnamed_task__/AverageReturn -57.5738 +__unnamed_task__/Iteration 146 +__unnamed_task__/MaxReturn -44.4484 +__unnamed_task__/MinReturn -136.304 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.0972 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 16:12:47 | [maml_trainer] epoch #147 | Sampling for adapation and meta-testing... +2025-03-31 16:16:51 | [maml_trainer] epoch #147 | Finished meta-testing... +2025-03-31 16:16:51 | [maml_trainer] epoch #147 | Saving snapshot... +2025-03-31 16:17:12 | [maml_trainer] epoch #147 | Saved +2025-03-31 16:17:12 | [maml_trainer] epoch #147 | Time 180533.80 s +2025-03-31 16:17:12 | [maml_trainer] epoch #147 | EpochTime 1248.30 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.2096 +Average/AverageReturn -56.5039 +Average/Iteration 147 +Average/MaxReturn -44.8826 +Average/MinReturn -95.127 +Average/NumEpisodes 80 +Average/StdReturn 7.9659 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0008 +GaussianMLPPolicy/KLAfter 0.00227442 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.60549e-05 +GaussianMLPPolicy/LossBefore 8.9407e-10 +GaussianMLPPolicy/dLoss 5.60558e-05 +Iteration 147 +MetaTest/Average/AverageDiscountedReturn -56.8524 +MetaTest/Average/AverageReturn -56.8524 +MetaTest/Average/Iteration 147 +MetaTest/Average/MaxReturn -47.4025 +MetaTest/Average/MinReturn -77.0528 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.85346 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.8524 +MetaTest/__unnamed_task__/AverageReturn -56.8524 +MetaTest/__unnamed_task__/Iteration 147 +MetaTest/__unnamed_task__/MaxReturn -47.4025 +MetaTest/__unnamed_task__/MinReturn -77.0528 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.85346 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.736e+06 +__unnamed_task__/AverageDiscountedReturn -30.2096 +__unnamed_task__/AverageReturn -56.5039 +__unnamed_task__/Iteration 147 +__unnamed_task__/MaxReturn -44.8826 +__unnamed_task__/MinReturn -95.127 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.9659 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 16:33:43 | [maml_trainer] epoch #148 | Sampling for adapation and meta-testing... +2025-03-31 16:37:51 | [maml_trainer] epoch #148 | Finished meta-testing... +2025-03-31 16:37:51 | [maml_trainer] epoch #148 | Saving snapshot... +2025-03-31 16:38:12 | [maml_trainer] epoch #148 | Saved +2025-03-31 16:38:12 | [maml_trainer] epoch #148 | Time 181793.37 s +2025-03-31 16:38:12 | [maml_trainer] epoch #148 | EpochTime 1259.56 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.9129 +Average/AverageReturn -55.8069 +Average/Iteration 148 +Average/MaxReturn -42.3303 +Average/MinReturn -89.901 +Average/NumEpisodes 80 +Average/StdReturn 9.28299 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0009 +GaussianMLPPolicy/KLAfter 0.00191894 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.02041e-05 +GaussianMLPPolicy/LossBefore 3.09944e-09 +GaussianMLPPolicy/dLoss -8.0201e-05 +Iteration 148 +MetaTest/Average/AverageDiscountedReturn -60.87 +MetaTest/Average/AverageReturn -60.87 +MetaTest/Average/Iteration 148 +MetaTest/Average/MaxReturn -45.5503 +MetaTest/Average/MinReturn -97.305 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.6042 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.87 +MetaTest/__unnamed_task__/AverageReturn -60.87 +MetaTest/__unnamed_task__/Iteration 148 +MetaTest/__unnamed_task__/MaxReturn -45.5503 +MetaTest/__unnamed_task__/MinReturn -97.305 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.6042 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.768e+06 +__unnamed_task__/AverageDiscountedReturn -29.9129 +__unnamed_task__/AverageReturn -55.8069 +__unnamed_task__/Iteration 148 +__unnamed_task__/MaxReturn -42.3303 +__unnamed_task__/MinReturn -89.901 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.28299 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 16:54:35 | [maml_trainer] epoch #149 | Sampling for adapation and meta-testing... +2025-03-31 16:58:42 | [maml_trainer] epoch #149 | Finished meta-testing... +2025-03-31 16:58:42 | [maml_trainer] epoch #149 | Saving snapshot... +2025-03-31 16:59:04 | [maml_trainer] epoch #149 | Saved +2025-03-31 16:59:04 | [maml_trainer] epoch #149 | Time 183045.12 s +2025-03-31 16:59:04 | [maml_trainer] epoch #149 | EpochTime 1251.75 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.9953 +Average/AverageReturn -55.7488 +Average/Iteration 149 +Average/MaxReturn -44.9725 +Average/MinReturn -91.825 +Average/NumEpisodes 80 +Average/StdReturn 8.91751 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99998 +GaussianMLPPolicy/KLAfter 0.00151725 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.01274e-06 +GaussianMLPPolicy/LossBefore -5.60284e-09 +GaussianMLPPolicy/dLoss 6.00714e-06 +Iteration 149 +MetaTest/Average/AverageDiscountedReturn -57.887 +MetaTest/Average/AverageReturn -57.887 +MetaTest/Average/Iteration 149 +MetaTest/Average/MaxReturn -45.1582 +MetaTest/Average/MinReturn -94.2217 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.5437 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.887 +MetaTest/__unnamed_task__/AverageReturn -57.887 +MetaTest/__unnamed_task__/Iteration 149 +MetaTest/__unnamed_task__/MaxReturn -45.1582 +MetaTest/__unnamed_task__/MinReturn -94.2217 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.5437 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.8e+06 +__unnamed_task__/AverageDiscountedReturn -29.9953 +__unnamed_task__/AverageReturn -55.7488 +__unnamed_task__/Iteration 149 +__unnamed_task__/MaxReturn -44.9725 +__unnamed_task__/MinReturn -91.825 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.91751 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 17:15:29 | [maml_trainer] epoch #150 | Sampling for adapation and meta-testing... +2025-03-31 17:19:41 | [maml_trainer] epoch #150 | Finished meta-testing... +2025-03-31 17:19:41 | [maml_trainer] epoch #150 | Saving snapshot... +2025-03-31 17:20:04 | [maml_trainer] epoch #150 | Saved +2025-03-31 17:20:04 | [maml_trainer] epoch #150 | Time 184305.04 s +2025-03-31 17:20:04 | [maml_trainer] epoch #150 | EpochTime 1259.91 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.6617 +Average/AverageReturn -58.7106 +Average/Iteration 150 +Average/MaxReturn -44.503 +Average/MinReturn -180.056 +Average/NumEpisodes 80 +Average/StdReturn 15.6979 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99967 +GaussianMLPPolicy/KLAfter 0.000978984 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.17238e-05 +GaussianMLPPolicy/LossBefore 3.57628e-10 +GaussianMLPPolicy/dLoss 1.17241e-05 +Iteration 150 +MetaTest/Average/AverageDiscountedReturn -56.3186 +MetaTest/Average/AverageReturn -56.3186 +MetaTest/Average/Iteration 150 +MetaTest/Average/MaxReturn -45.7396 +MetaTest/Average/MinReturn -70.6992 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.1462 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.3186 +MetaTest/__unnamed_task__/AverageReturn -56.3186 +MetaTest/__unnamed_task__/Iteration 150 +MetaTest/__unnamed_task__/MaxReturn -45.7396 +MetaTest/__unnamed_task__/MinReturn -70.6992 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.1462 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.832e+06 +__unnamed_task__/AverageDiscountedReturn -30.6617 +__unnamed_task__/AverageReturn -58.7106 +__unnamed_task__/Iteration 150 +__unnamed_task__/MaxReturn -44.503 +__unnamed_task__/MinReturn -180.056 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.6979 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 17:35:58 | [maml_trainer] epoch #151 | Sampling for adapation and meta-testing... +2025-03-31 17:40:02 | [maml_trainer] epoch #151 | Finished meta-testing... +2025-03-31 17:40:02 | [maml_trainer] epoch #151 | Saving snapshot... +2025-03-31 17:40:23 | [maml_trainer] epoch #151 | Saved +2025-03-31 17:40:23 | [maml_trainer] epoch #151 | Time 185524.13 s +2025-03-31 17:40:23 | [maml_trainer] epoch #151 | EpochTime 1219.09 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.782 +Average/AverageReturn -55.9642 +Average/Iteration 151 +Average/MaxReturn -43.8696 +Average/MinReturn -84.7452 +Average/NumEpisodes 80 +Average/StdReturn 8.64659 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0009 +GaussianMLPPolicy/KLAfter 0.0021671 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.06652e-05 +GaussianMLPPolicy/LossBefore -7.689e-09 +GaussianMLPPolicy/dLoss -1.06729e-05 +Iteration 151 +MetaTest/Average/AverageDiscountedReturn -57.2618 +MetaTest/Average/AverageReturn -57.2618 +MetaTest/Average/Iteration 151 +MetaTest/Average/MaxReturn -42.7871 +MetaTest/Average/MinReturn -78.5821 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.0829 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.2618 +MetaTest/__unnamed_task__/AverageReturn -57.2618 +MetaTest/__unnamed_task__/Iteration 151 +MetaTest/__unnamed_task__/MaxReturn -42.7871 +MetaTest/__unnamed_task__/MinReturn -78.5821 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.0829 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.864e+06 +__unnamed_task__/AverageDiscountedReturn -29.782 +__unnamed_task__/AverageReturn -55.9642 +__unnamed_task__/Iteration 151 +__unnamed_task__/MaxReturn -43.8696 +__unnamed_task__/MinReturn -84.7452 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.64659 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 17:56:35 | [maml_trainer] epoch #152 | Sampling for adapation and meta-testing... +2025-03-31 18:00:44 | [maml_trainer] epoch #152 | Finished meta-testing... +2025-03-31 18:00:44 | [maml_trainer] epoch #152 | Saving snapshot... +2025-03-31 18:01:05 | [maml_trainer] epoch #152 | Saved +2025-03-31 18:01:05 | [maml_trainer] epoch #152 | Time 186767.02 s +2025-03-31 18:01:05 | [maml_trainer] epoch #152 | EpochTime 1242.89 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.5563 +Average/AverageReturn -58.3877 +Average/Iteration 152 +Average/MaxReturn -44.8451 +Average/MinReturn -92.1785 +Average/NumEpisodes 80 +Average/StdReturn 9.72725 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.002 +GaussianMLPPolicy/KLAfter 0.0020299 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.36855e-05 +GaussianMLPPolicy/LossBefore -1.54078e-08 +GaussianMLPPolicy/dLoss 4.36701e-05 +Iteration 152 +MetaTest/Average/AverageDiscountedReturn -58.3895 +MetaTest/Average/AverageReturn -58.3895 +MetaTest/Average/Iteration 152 +MetaTest/Average/MaxReturn -45.6877 +MetaTest/Average/MinReturn -75.762 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.16092 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.3895 +MetaTest/__unnamed_task__/AverageReturn -58.3895 +MetaTest/__unnamed_task__/Iteration 152 +MetaTest/__unnamed_task__/MaxReturn -45.6877 +MetaTest/__unnamed_task__/MinReturn -75.762 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.16092 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.896e+06 +__unnamed_task__/AverageDiscountedReturn -30.5563 +__unnamed_task__/AverageReturn -58.3877 +__unnamed_task__/Iteration 152 +__unnamed_task__/MaxReturn -44.8451 +__unnamed_task__/MinReturn -92.1785 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.72725 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 18:17:21 | [maml_trainer] epoch #153 | Sampling for adapation and meta-testing... +2025-03-31 18:21:31 | [maml_trainer] epoch #153 | Finished meta-testing... +2025-03-31 18:21:31 | [maml_trainer] epoch #153 | Saving snapshot... +2025-03-31 18:21:53 | [maml_trainer] epoch #153 | Saved +2025-03-31 18:21:53 | [maml_trainer] epoch #153 | Time 188014.16 s +2025-03-31 18:21:53 | [maml_trainer] epoch #153 | EpochTime 1247.14 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.5955 +Average/AverageReturn -57.7686 +Average/Iteration 153 +Average/MaxReturn -43.0356 +Average/MinReturn -87.5091 +Average/NumEpisodes 80 +Average/StdReturn 9.4078 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0027 +GaussianMLPPolicy/KLAfter 0.00156781 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.91784e-05 +GaussianMLPPolicy/LossBefore -5.63264e-09 +GaussianMLPPolicy/dLoss -4.9184e-05 +Iteration 153 +MetaTest/Average/AverageDiscountedReturn -53.16 +MetaTest/Average/AverageReturn -53.16 +MetaTest/Average/Iteration 153 +MetaTest/Average/MaxReturn -46.1446 +MetaTest/Average/MinReturn -69.4524 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.58988 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.16 +MetaTest/__unnamed_task__/AverageReturn -53.16 +MetaTest/__unnamed_task__/Iteration 153 +MetaTest/__unnamed_task__/MaxReturn -46.1446 +MetaTest/__unnamed_task__/MinReturn -69.4524 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.58988 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.928e+06 +__unnamed_task__/AverageDiscountedReturn -30.5955 +__unnamed_task__/AverageReturn -57.7686 +__unnamed_task__/Iteration 153 +__unnamed_task__/MaxReturn -43.0356 +__unnamed_task__/MinReturn -87.5091 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.4078 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 18:38:00 | [maml_trainer] epoch #154 | Sampling for adapation and meta-testing... +2025-03-31 18:42:01 | [maml_trainer] epoch #154 | Finished meta-testing... +2025-03-31 18:42:01 | [maml_trainer] epoch #154 | Saving snapshot... +2025-03-31 18:42:22 | [maml_trainer] epoch #154 | Saved +2025-03-31 18:42:22 | [maml_trainer] epoch #154 | Time 189243.50 s +2025-03-31 18:42:22 | [maml_trainer] epoch #154 | EpochTime 1229.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.1203 +Average/AverageReturn -56.5156 +Average/Iteration 154 +Average/MaxReturn -44.9096 +Average/MinReturn -78.15 +Average/NumEpisodes 80 +Average/StdReturn 7.7975 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0041 +GaussianMLPPolicy/KLAfter 0.00271995 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.5396e-05 +GaussianMLPPolicy/LossBefore 5.78165e-09 +GaussianMLPPolicy/dLoss -9.53903e-05 +Iteration 154 +MetaTest/Average/AverageDiscountedReturn -54.7778 +MetaTest/Average/AverageReturn -54.7778 +MetaTest/Average/Iteration 154 +MetaTest/Average/MaxReturn -46.3168 +MetaTest/Average/MinReturn -70.9584 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.71971 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.7778 +MetaTest/__unnamed_task__/AverageReturn -54.7778 +MetaTest/__unnamed_task__/Iteration 154 +MetaTest/__unnamed_task__/MaxReturn -46.3168 +MetaTest/__unnamed_task__/MinReturn -70.9584 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.71971 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.96e+06 +__unnamed_task__/AverageDiscountedReturn -30.1203 +__unnamed_task__/AverageReturn -56.5156 +__unnamed_task__/Iteration 154 +__unnamed_task__/MaxReturn -44.9096 +__unnamed_task__/MinReturn -78.15 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.7975 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 18:58:10 | [maml_trainer] epoch #155 | Sampling for adapation and meta-testing... +2025-03-31 19:02:07 | [maml_trainer] epoch #155 | Finished meta-testing... +2025-03-31 19:02:07 | [maml_trainer] epoch #155 | Saving snapshot... +2025-03-31 19:02:28 | [maml_trainer] epoch #155 | Saved +2025-03-31 19:02:28 | [maml_trainer] epoch #155 | Time 190449.63 s +2025-03-31 19:02:28 | [maml_trainer] epoch #155 | EpochTime 1206.13 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.7968 +Average/AverageReturn -58.5163 +Average/Iteration 155 +Average/MaxReturn -45.1409 +Average/MinReturn -88.7193 +Average/NumEpisodes 80 +Average/StdReturn 8.9566 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0062 +GaussianMLPPolicy/KLAfter 0.00330607 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.49981e-05 +GaussianMLPPolicy/LossBefore -2.98024e-11 +GaussianMLPPolicy/dLoss 8.4998e-05 +Iteration 155 +MetaTest/Average/AverageDiscountedReturn -58.6227 +MetaTest/Average/AverageReturn -58.6227 +MetaTest/Average/Iteration 155 +MetaTest/Average/MaxReturn -48.9226 +MetaTest/Average/MinReturn -79.6395 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.52775 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.6227 +MetaTest/__unnamed_task__/AverageReturn -58.6227 +MetaTest/__unnamed_task__/Iteration 155 +MetaTest/__unnamed_task__/MaxReturn -48.9226 +MetaTest/__unnamed_task__/MinReturn -79.6395 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.52775 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.992e+06 +__unnamed_task__/AverageDiscountedReturn -30.7968 +__unnamed_task__/AverageReturn -58.5163 +__unnamed_task__/Iteration 155 +__unnamed_task__/MaxReturn -45.1409 +__unnamed_task__/MinReturn -88.7193 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.9566 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 19:18:48 | [maml_trainer] epoch #156 | Sampling for adapation and meta-testing... +2025-03-31 19:22:56 | [maml_trainer] epoch #156 | Finished meta-testing... +2025-03-31 19:22:56 | [maml_trainer] epoch #156 | Saving snapshot... +2025-03-31 19:23:17 | [maml_trainer] epoch #156 | Saved +2025-03-31 19:23:17 | [maml_trainer] epoch #156 | Time 191698.79 s +2025-03-31 19:23:17 | [maml_trainer] epoch #156 | EpochTime 1249.16 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.1756 +Average/AverageReturn -58.3361 +Average/Iteration 156 +Average/MaxReturn -43.3323 +Average/MinReturn -123.926 +Average/NumEpisodes 80 +Average/StdReturn 12.0731 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0083 +GaussianMLPPolicy/KLAfter 0.0028259 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.20564e-05 +GaussianMLPPolicy/LossBefore 1.99676e-09 +GaussianMLPPolicy/dLoss -5.20544e-05 +Iteration 156 +MetaTest/Average/AverageDiscountedReturn -58.2049 +MetaTest/Average/AverageReturn -58.2049 +MetaTest/Average/Iteration 156 +MetaTest/Average/MaxReturn -48.0671 +MetaTest/Average/MinReturn -73.0003 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.60909 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.2049 +MetaTest/__unnamed_task__/AverageReturn -58.2049 +MetaTest/__unnamed_task__/Iteration 156 +MetaTest/__unnamed_task__/MaxReturn -48.0671 +MetaTest/__unnamed_task__/MinReturn -73.0003 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.60909 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.024e+06 +__unnamed_task__/AverageDiscountedReturn -30.1756 +__unnamed_task__/AverageReturn -58.3361 +__unnamed_task__/Iteration 156 +__unnamed_task__/MaxReturn -43.3323 +__unnamed_task__/MinReturn -123.926 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.0731 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 19:39:41 | [maml_trainer] epoch #157 | Sampling for adapation and meta-testing... +2025-03-31 19:43:52 | [maml_trainer] epoch #157 | Finished meta-testing... +2025-03-31 19:43:52 | [maml_trainer] epoch #157 | Saving snapshot... +2025-03-31 19:44:14 | [maml_trainer] epoch #157 | Saved +2025-03-31 19:44:14 | [maml_trainer] epoch #157 | Time 192955.34 s +2025-03-31 19:44:14 | [maml_trainer] epoch #157 | EpochTime 1256.54 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.9151 +Average/AverageReturn -56.8199 +Average/Iteration 157 +Average/MaxReturn -44.6608 +Average/MinReturn -97.5582 +Average/NumEpisodes 80 +Average/StdReturn 8.82719 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0108 +GaussianMLPPolicy/KLAfter 0.00267995 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000101681 +GaussianMLPPolicy/LossBefore -8.46386e-09 +GaussianMLPPolicy/dLoss 0.000101672 +Iteration 157 +MetaTest/Average/AverageDiscountedReturn -57.823 +MetaTest/Average/AverageReturn -57.823 +MetaTest/Average/Iteration 157 +MetaTest/Average/MaxReturn -43.9207 +MetaTest/Average/MinReturn -89.1133 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.7657 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.823 +MetaTest/__unnamed_task__/AverageReturn -57.823 +MetaTest/__unnamed_task__/Iteration 157 +MetaTest/__unnamed_task__/MaxReturn -43.9207 +MetaTest/__unnamed_task__/MinReturn -89.1133 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.7657 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.056e+06 +__unnamed_task__/AverageDiscountedReturn -29.9151 +__unnamed_task__/AverageReturn -56.8199 +__unnamed_task__/Iteration 157 +__unnamed_task__/MaxReturn -44.6608 +__unnamed_task__/MinReturn -97.5582 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.82719 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 20:00:48 | [maml_trainer] epoch #158 | Sampling for adapation and meta-testing... +2025-03-31 20:04:57 | [maml_trainer] epoch #158 | Finished meta-testing... +2025-03-31 20:04:57 | [maml_trainer] epoch #158 | Saving snapshot... +2025-03-31 20:05:22 | [maml_trainer] epoch #158 | Saved +2025-03-31 20:05:22 | [maml_trainer] epoch #158 | Time 194223.05 s +2025-03-31 20:05:22 | [maml_trainer] epoch #158 | EpochTime 1267.71 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.0799 +Average/AverageReturn -57.3758 +Average/Iteration 158 +Average/MaxReturn -43.9675 +Average/MinReturn -81.0494 +Average/NumEpisodes 80 +Average/StdReturn 8.85258 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0137 +GaussianMLPPolicy/KLAfter 0.00175047 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.06378e-05 +GaussianMLPPolicy/LossBefore 4.52995e-09 +GaussianMLPPolicy/dLoss -4.06333e-05 +Iteration 158 +MetaTest/Average/AverageDiscountedReturn -56.7606 +MetaTest/Average/AverageReturn -56.7606 +MetaTest/Average/Iteration 158 +MetaTest/Average/MaxReturn -42.6009 +MetaTest/Average/MinReturn -76.1708 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.05777 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.7606 +MetaTest/__unnamed_task__/AverageReturn -56.7606 +MetaTest/__unnamed_task__/Iteration 158 +MetaTest/__unnamed_task__/MaxReturn -42.6009 +MetaTest/__unnamed_task__/MinReturn -76.1708 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.05777 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.088e+06 +__unnamed_task__/AverageDiscountedReturn -30.0799 +__unnamed_task__/AverageReturn -57.3758 +__unnamed_task__/Iteration 158 +__unnamed_task__/MaxReturn -43.9675 +__unnamed_task__/MinReturn -81.0494 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.85258 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 20:22:59 | [maml_trainer] epoch #159 | Sampling for adapation and meta-testing... +2025-03-31 20:27:20 | [maml_trainer] epoch #159 | Finished meta-testing... +2025-03-31 20:27:20 | [maml_trainer] epoch #159 | Saving snapshot... +2025-03-31 20:27:41 | [maml_trainer] epoch #159 | Saved +2025-03-31 20:27:41 | [maml_trainer] epoch #159 | Time 195562.61 s +2025-03-31 20:27:41 | [maml_trainer] epoch #159 | EpochTime 1339.55 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.9538 +Average/AverageReturn -58.1711 +Average/Iteration 159 +Average/MaxReturn -44.6238 +Average/MinReturn -149.083 +Average/NumEpisodes 80 +Average/StdReturn 12.9818 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0157 +GaussianMLPPolicy/KLAfter 0.000952404 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.85565e-05 +GaussianMLPPolicy/LossBefore -1.54972e-09 +GaussianMLPPolicy/dLoss -1.85581e-05 +Iteration 159 +MetaTest/Average/AverageDiscountedReturn -61.2124 +MetaTest/Average/AverageReturn -61.2124 +MetaTest/Average/Iteration 159 +MetaTest/Average/MaxReturn -48.5356 +MetaTest/Average/MinReturn -115.379 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.4335 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.2124 +MetaTest/__unnamed_task__/AverageReturn -61.2124 +MetaTest/__unnamed_task__/Iteration 159 +MetaTest/__unnamed_task__/MaxReturn -48.5356 +MetaTest/__unnamed_task__/MinReturn -115.379 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.4335 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.12e+06 +__unnamed_task__/AverageDiscountedReturn -29.9538 +__unnamed_task__/AverageReturn -58.1711 +__unnamed_task__/Iteration 159 +__unnamed_task__/MaxReturn -44.6238 +__unnamed_task__/MinReturn -149.083 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.9818 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 20:45:21 | [maml_trainer] epoch #160 | Sampling for adapation and meta-testing... +2025-03-31 20:49:32 | [maml_trainer] epoch #160 | Finished meta-testing... +2025-03-31 20:49:32 | [maml_trainer] epoch #160 | Saving snapshot... +2025-03-31 20:49:54 | [maml_trainer] epoch #160 | Saved +2025-03-31 20:49:54 | [maml_trainer] epoch #160 | Time 196895.08 s +2025-03-31 20:49:54 | [maml_trainer] epoch #160 | EpochTime 1332.46 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.2323 +Average/AverageReturn -58.8543 +Average/Iteration 160 +Average/MaxReturn -44.199 +Average/MinReturn -167.389 +Average/NumEpisodes 80 +Average/StdReturn 15.2007 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0186 +GaussianMLPPolicy/KLAfter 0.00245615 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.34488e-05 +GaussianMLPPolicy/LossBefore -2.5928e-09 +GaussianMLPPolicy/dLoss 4.34462e-05 +Iteration 160 +MetaTest/Average/AverageDiscountedReturn -58.6019 +MetaTest/Average/AverageReturn -58.6019 +MetaTest/Average/Iteration 160 +MetaTest/Average/MaxReturn -47.38 +MetaTest/Average/MinReturn -81.729 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.23259 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.6019 +MetaTest/__unnamed_task__/AverageReturn -58.6019 +MetaTest/__unnamed_task__/Iteration 160 +MetaTest/__unnamed_task__/MaxReturn -47.38 +MetaTest/__unnamed_task__/MinReturn -81.729 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.23259 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.152e+06 +__unnamed_task__/AverageDiscountedReturn -30.2323 +__unnamed_task__/AverageReturn -58.8543 +__unnamed_task__/Iteration 160 +__unnamed_task__/MaxReturn -44.199 +__unnamed_task__/MinReturn -167.389 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.2007 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 21:06:29 | [maml_trainer] epoch #161 | Sampling for adapation and meta-testing... +2025-03-31 21:10:38 | [maml_trainer] epoch #161 | Finished meta-testing... +2025-03-31 21:10:38 | [maml_trainer] epoch #161 | Saving snapshot... +2025-03-31 21:10:59 | [maml_trainer] epoch #161 | Saved +2025-03-31 21:10:59 | [maml_trainer] epoch #161 | Time 198160.21 s +2025-03-31 21:10:59 | [maml_trainer] epoch #161 | EpochTime 1265.13 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.1873 +Average/AverageReturn -58.0196 +Average/Iteration 161 +Average/MaxReturn -46.9473 +Average/MinReturn -117.13 +Average/NumEpisodes 80 +Average/StdReturn 12.4444 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0221 +GaussianMLPPolicy/KLAfter 0.00426771 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.35599e-05 +GaussianMLPPolicy/LossBefore -2.98023e-10 +GaussianMLPPolicy/dLoss 3.35596e-05 +Iteration 161 +MetaTest/Average/AverageDiscountedReturn -54.9828 +MetaTest/Average/AverageReturn -54.9828 +MetaTest/Average/Iteration 161 +MetaTest/Average/MaxReturn -43.3815 +MetaTest/Average/MinReturn -65.1769 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.39847 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.9828 +MetaTest/__unnamed_task__/AverageReturn -54.9828 +MetaTest/__unnamed_task__/Iteration 161 +MetaTest/__unnamed_task__/MaxReturn -43.3815 +MetaTest/__unnamed_task__/MinReturn -65.1769 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.39847 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.184e+06 +__unnamed_task__/AverageDiscountedReturn -30.1873 +__unnamed_task__/AverageReturn -58.0196 +__unnamed_task__/Iteration 161 +__unnamed_task__/MaxReturn -46.9473 +__unnamed_task__/MinReturn -117.13 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.4444 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 21:27:30 | [maml_trainer] epoch #162 | Sampling for adapation and meta-testing... +2025-03-31 21:31:39 | [maml_trainer] epoch #162 | Finished meta-testing... +2025-03-31 21:31:39 | [maml_trainer] epoch #162 | Saving snapshot... +2025-03-31 21:32:00 | [maml_trainer] epoch #162 | Saved +2025-03-31 21:32:00 | [maml_trainer] epoch #162 | Time 199421.16 s +2025-03-31 21:32:00 | [maml_trainer] epoch #162 | EpochTime 1260.94 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.7141 +Average/AverageReturn -56.0226 +Average/Iteration 162 +Average/MaxReturn -44.5928 +Average/MinReturn -133.956 +Average/NumEpisodes 80 +Average/StdReturn 10.697 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0261 +GaussianMLPPolicy/KLAfter 0.00340498 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.34919e-05 +GaussianMLPPolicy/LossBefore -1.04308e-09 +GaussianMLPPolicy/dLoss -8.34929e-05 +Iteration 162 +MetaTest/Average/AverageDiscountedReturn -54.8744 +MetaTest/Average/AverageReturn -54.8744 +MetaTest/Average/Iteration 162 +MetaTest/Average/MaxReturn -47.5677 +MetaTest/Average/MinReturn -70.3938 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.19286 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.8744 +MetaTest/__unnamed_task__/AverageReturn -54.8744 +MetaTest/__unnamed_task__/Iteration 162 +MetaTest/__unnamed_task__/MaxReturn -47.5677 +MetaTest/__unnamed_task__/MinReturn -70.3938 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.19286 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.216e+06 +__unnamed_task__/AverageDiscountedReturn -29.7141 +__unnamed_task__/AverageReturn -56.0226 +__unnamed_task__/Iteration 162 +__unnamed_task__/MaxReturn -44.5928 +__unnamed_task__/MinReturn -133.956 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.697 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 21:48:21 | [maml_trainer] epoch #163 | Sampling for adapation and meta-testing... +2025-03-31 21:52:19 | [maml_trainer] epoch #163 | Finished meta-testing... +2025-03-31 21:52:19 | [maml_trainer] epoch #163 | Saving snapshot... +2025-03-31 21:52:40 | [maml_trainer] epoch #163 | Saved +2025-03-31 21:52:40 | [maml_trainer] epoch #163 | Time 200661.35 s +2025-03-31 21:52:40 | [maml_trainer] epoch #163 | EpochTime 1240.19 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.3245 +Average/AverageReturn -55.7627 +Average/Iteration 163 +Average/MaxReturn -43.7278 +Average/MinReturn -94.29 +Average/NumEpisodes 80 +Average/StdReturn 8.01543 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0292 +GaussianMLPPolicy/KLAfter 0.00208566 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000144961 +GaussianMLPPolicy/LossBefore -7.15256e-10 +GaussianMLPPolicy/dLoss 0.00014496 +Iteration 163 +MetaTest/Average/AverageDiscountedReturn -53.2153 +MetaTest/Average/AverageReturn -53.2153 +MetaTest/Average/Iteration 163 +MetaTest/Average/MaxReturn -43.0135 +MetaTest/Average/MinReturn -60.7055 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.5526 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.2153 +MetaTest/__unnamed_task__/AverageReturn -53.2153 +MetaTest/__unnamed_task__/Iteration 163 +MetaTest/__unnamed_task__/MaxReturn -43.0135 +MetaTest/__unnamed_task__/MinReturn -60.7055 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.5526 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.248e+06 +__unnamed_task__/AverageDiscountedReturn -29.3245 +__unnamed_task__/AverageReturn -55.7627 +__unnamed_task__/Iteration 163 +__unnamed_task__/MaxReturn -43.7278 +__unnamed_task__/MinReturn -94.29 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.01543 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 22:09:50 | [maml_trainer] epoch #164 | Sampling for adapation and meta-testing... +2025-03-31 22:14:08 | [maml_trainer] epoch #164 | Finished meta-testing... +2025-03-31 22:14:08 | [maml_trainer] epoch #164 | Saving snapshot... +2025-03-31 22:14:30 | [maml_trainer] epoch #164 | Saved +2025-03-31 22:14:30 | [maml_trainer] epoch #164 | Time 201971.17 s +2025-03-31 22:14:30 | [maml_trainer] epoch #164 | EpochTime 1309.82 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.271 +Average/AverageReturn -54.9792 +Average/Iteration 164 +Average/MaxReturn -45.4473 +Average/MinReturn -84.5784 +Average/NumEpisodes 80 +Average/StdReturn 7.24314 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0306 +GaussianMLPPolicy/KLAfter 0.00296408 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.43213e-05 +GaussianMLPPolicy/LossBefore 4.11272e-09 +GaussianMLPPolicy/dLoss 1.43254e-05 +Iteration 164 +MetaTest/Average/AverageDiscountedReturn -61.3146 +MetaTest/Average/AverageReturn -61.3146 +MetaTest/Average/Iteration 164 +MetaTest/Average/MaxReturn -49.3103 +MetaTest/Average/MinReturn -104.087 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.7919 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.3146 +MetaTest/__unnamed_task__/AverageReturn -61.3146 +MetaTest/__unnamed_task__/Iteration 164 +MetaTest/__unnamed_task__/MaxReturn -49.3103 +MetaTest/__unnamed_task__/MinReturn -104.087 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.7919 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.28e+06 +__unnamed_task__/AverageDiscountedReturn -29.271 +__unnamed_task__/AverageReturn -54.9792 +__unnamed_task__/Iteration 164 +__unnamed_task__/MaxReturn -45.4473 +__unnamed_task__/MinReturn -84.5784 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.24314 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 22:31:05 | [maml_trainer] epoch #165 | Sampling for adapation and meta-testing... +2025-03-31 22:35:30 | [maml_trainer] epoch #165 | Finished meta-testing... +2025-03-31 22:35:30 | [maml_trainer] epoch #165 | Saving snapshot... +2025-03-31 22:35:51 | [maml_trainer] epoch #165 | Saved +2025-03-31 22:35:51 | [maml_trainer] epoch #165 | Time 203252.38 s +2025-03-31 22:35:51 | [maml_trainer] epoch #165 | EpochTime 1281.21 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.0663 +Average/AverageReturn -55.6254 +Average/Iteration 165 +Average/MaxReturn -42.7305 +Average/MinReturn -132.187 +Average/NumEpisodes 80 +Average/StdReturn 10.5201 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0292 +GaussianMLPPolicy/KLAfter 0.00261323 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000150755 +GaussianMLPPolicy/LossBefore 5.21541e-09 +GaussianMLPPolicy/dLoss 0.00015076 +Iteration 165 +MetaTest/Average/AverageDiscountedReturn -55.2146 +MetaTest/Average/AverageReturn -55.2146 +MetaTest/Average/Iteration 165 +MetaTest/Average/MaxReturn -48.0077 +MetaTest/Average/MinReturn -71.0919 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.97114 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.2146 +MetaTest/__unnamed_task__/AverageReturn -55.2146 +MetaTest/__unnamed_task__/Iteration 165 +MetaTest/__unnamed_task__/MaxReturn -48.0077 +MetaTest/__unnamed_task__/MinReturn -71.0919 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.97114 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.312e+06 +__unnamed_task__/AverageDiscountedReturn -29.0663 +__unnamed_task__/AverageReturn -55.6254 +__unnamed_task__/Iteration 165 +__unnamed_task__/MaxReturn -42.7305 +__unnamed_task__/MinReturn -132.187 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.5201 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 22:52:40 | [maml_trainer] epoch #166 | Sampling for adapation and meta-testing... +2025-03-31 22:56:40 | [maml_trainer] epoch #166 | Finished meta-testing... +2025-03-31 22:56:40 | [maml_trainer] epoch #166 | Saving snapshot... +2025-03-31 22:57:01 | [maml_trainer] epoch #166 | Saved +2025-03-31 22:57:01 | [maml_trainer] epoch #166 | Time 204522.25 s +2025-03-31 22:57:01 | [maml_trainer] epoch #166 | EpochTime 1269.86 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.1016 +Average/AverageReturn -54.8418 +Average/Iteration 166 +Average/MaxReturn -44.2151 +Average/MinReturn -109.529 +Average/NumEpisodes 80 +Average/StdReturn 8.5926 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0297 +GaussianMLPPolicy/KLAfter 0.00363963 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.20415e-06 +GaussianMLPPolicy/LossBefore 6.97374e-09 +GaussianMLPPolicy/dLoss -4.19718e-06 +Iteration 166 +MetaTest/Average/AverageDiscountedReturn -53.4605 +MetaTest/Average/AverageReturn -53.4605 +MetaTest/Average/Iteration 166 +MetaTest/Average/MaxReturn -47.2877 +MetaTest/Average/MinReturn -60.6549 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.54993 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.4605 +MetaTest/__unnamed_task__/AverageReturn -53.4605 +MetaTest/__unnamed_task__/Iteration 166 +MetaTest/__unnamed_task__/MaxReturn -47.2877 +MetaTest/__unnamed_task__/MinReturn -60.6549 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.54993 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.344e+06 +__unnamed_task__/AverageDiscountedReturn -29.1016 +__unnamed_task__/AverageReturn -54.8418 +__unnamed_task__/Iteration 166 +__unnamed_task__/MaxReturn -44.2151 +__unnamed_task__/MinReturn -109.529 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.5926 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 23:13:58 | [maml_trainer] epoch #167 | Sampling for adapation and meta-testing... +2025-03-31 23:18:15 | [maml_trainer] epoch #167 | Finished meta-testing... +2025-03-31 23:18:15 | [maml_trainer] epoch #167 | Saving snapshot... +2025-03-31 23:18:34 | [maml_trainer] epoch #167 | Saved +2025-03-31 23:18:34 | [maml_trainer] epoch #167 | Time 205816.01 s +2025-03-31 23:18:34 | [maml_trainer] epoch #167 | EpochTime 1293.76 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.9621 +Average/AverageReturn -54.6901 +Average/Iteration 167 +Average/MaxReturn -46.2991 +Average/MinReturn -85.3617 +Average/NumEpisodes 80 +Average/StdReturn 6.19337 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0309 +GaussianMLPPolicy/KLAfter 0.00364906 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.6012e-05 +GaussianMLPPolicy/LossBefore 3.75509e-09 +GaussianMLPPolicy/dLoss -9.60082e-05 +Iteration 167 +MetaTest/Average/AverageDiscountedReturn -58.6115 +MetaTest/Average/AverageReturn -58.6115 +MetaTest/Average/Iteration 167 +MetaTest/Average/MaxReturn -48.0617 +MetaTest/Average/MinReturn -107.493 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.9504 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.6115 +MetaTest/__unnamed_task__/AverageReturn -58.6115 +MetaTest/__unnamed_task__/Iteration 167 +MetaTest/__unnamed_task__/MaxReturn -48.0617 +MetaTest/__unnamed_task__/MinReturn -107.493 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.9504 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.376e+06 +__unnamed_task__/AverageDiscountedReturn -28.9621 +__unnamed_task__/AverageReturn -54.6901 +__unnamed_task__/Iteration 167 +__unnamed_task__/MaxReturn -46.2991 +__unnamed_task__/MinReturn -85.3617 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.19337 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-03-31 23:34:56 | [maml_trainer] epoch #168 | Sampling for adapation and meta-testing... +2025-03-31 23:39:10 | [maml_trainer] epoch #168 | Finished meta-testing... +2025-03-31 23:39:10 | [maml_trainer] epoch #168 | Saving snapshot... +2025-03-31 23:39:33 | [maml_trainer] epoch #168 | Saved +2025-03-31 23:39:33 | [maml_trainer] epoch #168 | Time 207074.36 s +2025-03-31 23:39:33 | [maml_trainer] epoch #168 | EpochTime 1258.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.2344 +Average/AverageReturn -54.3826 +Average/Iteration 168 +Average/MaxReturn -44.9383 +Average/MinReturn -73.3169 +Average/NumEpisodes 80 +Average/StdReturn 4.90963 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0312 +GaussianMLPPolicy/KLAfter 0.00522236 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.26799e-05 +GaussianMLPPolicy/LossBefore -7.15256e-10 +GaussianMLPPolicy/dLoss -5.26806e-05 +Iteration 168 +MetaTest/Average/AverageDiscountedReturn -55.0298 +MetaTest/Average/AverageReturn -55.0298 +MetaTest/Average/Iteration 168 +MetaTest/Average/MaxReturn -46.8838 +MetaTest/Average/MinReturn -64.6599 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.72086 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.0298 +MetaTest/__unnamed_task__/AverageReturn -55.0298 +MetaTest/__unnamed_task__/Iteration 168 +MetaTest/__unnamed_task__/MaxReturn -46.8838 +MetaTest/__unnamed_task__/MinReturn -64.6599 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.72086 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.408e+06 +__unnamed_task__/AverageDiscountedReturn -29.2344 +__unnamed_task__/AverageReturn -54.3826 +__unnamed_task__/Iteration 168 +__unnamed_task__/MaxReturn -44.9383 +__unnamed_task__/MinReturn -73.3169 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.90963 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-03-31 23:56:39 | [maml_trainer] epoch #169 | Sampling for adapation and meta-testing... +2025-04-01 00:00:54 | [maml_trainer] epoch #169 | Finished meta-testing... +2025-04-01 00:00:54 | [maml_trainer] epoch #169 | Saving snapshot... +2025-04-01 00:01:16 | [maml_trainer] epoch #169 | Saved +2025-04-01 00:01:16 | [maml_trainer] epoch #169 | Time 208377.07 s +2025-04-01 00:01:16 | [maml_trainer] epoch #169 | EpochTime 1302.71 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.6022 +Average/AverageReturn -57.2476 +Average/Iteration 169 +Average/MaxReturn -47.146 +Average/MinReturn -91.0797 +Average/NumEpisodes 80 +Average/StdReturn 8.2313 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0313 +GaussianMLPPolicy/KLAfter 0.00407404 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.91874e-05 +GaussianMLPPolicy/LossBefore -5.48363e-09 +GaussianMLPPolicy/dLoss -8.91929e-05 +Iteration 169 +MetaTest/Average/AverageDiscountedReturn -57.7978 +MetaTest/Average/AverageReturn -57.7978 +MetaTest/Average/Iteration 169 +MetaTest/Average/MaxReturn -46.9585 +MetaTest/Average/MinReturn -116.278 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.8214 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.7978 +MetaTest/__unnamed_task__/AverageReturn -57.7978 +MetaTest/__unnamed_task__/Iteration 169 +MetaTest/__unnamed_task__/MaxReturn -46.9585 +MetaTest/__unnamed_task__/MinReturn -116.278 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.8214 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.44e+06 +__unnamed_task__/AverageDiscountedReturn -29.6022 +__unnamed_task__/AverageReturn -57.2476 +__unnamed_task__/Iteration 169 +__unnamed_task__/MaxReturn -47.146 +__unnamed_task__/MinReturn -91.0797 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.2313 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 00:19:00 | [maml_trainer] epoch #170 | Sampling for adapation and meta-testing... +2025-04-01 00:23:16 | [maml_trainer] epoch #170 | Finished meta-testing... +2025-04-01 00:23:16 | [maml_trainer] epoch #170 | Saving snapshot... +2025-04-01 00:23:37 | [maml_trainer] epoch #170 | Saved +2025-04-01 00:23:37 | [maml_trainer] epoch #170 | Time 209718.71 s +2025-04-01 00:23:37 | [maml_trainer] epoch #170 | EpochTime 1341.64 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.0408 +Average/AverageReturn -58.4174 +Average/Iteration 170 +Average/MaxReturn -48.3974 +Average/MinReturn -109.147 +Average/NumEpisodes 80 +Average/StdReturn 10.256 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0327 +GaussianMLPPolicy/KLAfter 0.00384137 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000124837 +GaussianMLPPolicy/LossBefore -6.73532e-09 +GaussianMLPPolicy/dLoss -0.000124844 +Iteration 170 +MetaTest/Average/AverageDiscountedReturn -61.3687 +MetaTest/Average/AverageReturn -61.3687 +MetaTest/Average/Iteration 170 +MetaTest/Average/MaxReturn -52.5355 +MetaTest/Average/MinReturn -84.9304 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.9024 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.3687 +MetaTest/__unnamed_task__/AverageReturn -61.3687 +MetaTest/__unnamed_task__/Iteration 170 +MetaTest/__unnamed_task__/MaxReturn -52.5355 +MetaTest/__unnamed_task__/MinReturn -84.9304 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.9024 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.472e+06 +__unnamed_task__/AverageDiscountedReturn -30.0408 +__unnamed_task__/AverageReturn -58.4174 +__unnamed_task__/Iteration 170 +__unnamed_task__/MaxReturn -48.3974 +__unnamed_task__/MinReturn -109.147 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.256 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 00:40:04 | [maml_trainer] epoch #171 | Sampling for adapation and meta-testing... +2025-04-01 00:44:03 | [maml_trainer] epoch #171 | Finished meta-testing... +2025-04-01 00:44:03 | [maml_trainer] epoch #171 | Saving snapshot... +2025-04-01 00:44:23 | [maml_trainer] epoch #171 | Saved +2025-04-01 00:44:23 | [maml_trainer] epoch #171 | Time 210964.52 s +2025-04-01 00:44:23 | [maml_trainer] epoch #171 | EpochTime 1245.80 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.2561 +Average/AverageReturn -58.7714 +Average/Iteration 171 +Average/MaxReturn -46.3737 +Average/MinReturn -156.948 +Average/NumEpisodes 80 +Average/StdReturn 12.5001 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0314 +GaussianMLPPolicy/KLAfter 0.00450374 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.90306e-05 +GaussianMLPPolicy/LossBefore 2.02656e-09 +GaussianMLPPolicy/dLoss 2.90326e-05 +Iteration 171 +MetaTest/Average/AverageDiscountedReturn -62.2151 +MetaTest/Average/AverageReturn -62.2151 +MetaTest/Average/Iteration 171 +MetaTest/Average/MaxReturn -50.5829 +MetaTest/Average/MinReturn -82.8796 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.08447 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.2151 +MetaTest/__unnamed_task__/AverageReturn -62.2151 +MetaTest/__unnamed_task__/Iteration 171 +MetaTest/__unnamed_task__/MaxReturn -50.5829 +MetaTest/__unnamed_task__/MinReturn -82.8796 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.08447 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.504e+06 +__unnamed_task__/AverageDiscountedReturn -30.2561 +__unnamed_task__/AverageReturn -58.7714 +__unnamed_task__/Iteration 171 +__unnamed_task__/MaxReturn -46.3737 +__unnamed_task__/MinReturn -156.948 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.5001 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 01:00:14 | [maml_trainer] epoch #172 | Sampling for adapation and meta-testing... +2025-04-01 01:04:25 | [maml_trainer] epoch #172 | Finished meta-testing... +2025-04-01 01:04:25 | [maml_trainer] epoch #172 | Saving snapshot... +2025-04-01 01:04:47 | [maml_trainer] epoch #172 | Saved +2025-04-01 01:04:47 | [maml_trainer] epoch #172 | Time 212188.73 s +2025-04-01 01:04:47 | [maml_trainer] epoch #172 | EpochTime 1224.21 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.5802 +Average/AverageReturn -59.5675 +Average/Iteration 172 +Average/MaxReturn -47.9568 +Average/MinReturn -75.2239 +Average/NumEpisodes 80 +Average/StdReturn 6.80185 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0287 +GaussianMLPPolicy/KLAfter 0.00273185 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.69013e-05 +GaussianMLPPolicy/LossBefore -1.66893e-08 +GaussianMLPPolicy/dLoss -2.6918e-05 +Iteration 172 +MetaTest/Average/AverageDiscountedReturn -60.4465 +MetaTest/Average/AverageReturn -60.4465 +MetaTest/Average/Iteration 172 +MetaTest/Average/MaxReturn -52.1798 +MetaTest/Average/MinReturn -81.1998 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.20595 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.4465 +MetaTest/__unnamed_task__/AverageReturn -60.4465 +MetaTest/__unnamed_task__/Iteration 172 +MetaTest/__unnamed_task__/MaxReturn -52.1798 +MetaTest/__unnamed_task__/MinReturn -81.1998 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.20595 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.536e+06 +__unnamed_task__/AverageDiscountedReturn -30.5802 +__unnamed_task__/AverageReturn -59.5675 +__unnamed_task__/Iteration 172 +__unnamed_task__/MaxReturn -47.9568 +__unnamed_task__/MinReturn -75.2239 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.80185 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 01:21:30 | [maml_trainer] epoch #173 | Sampling for adapation and meta-testing... +2025-04-01 01:25:38 | [maml_trainer] epoch #173 | Finished meta-testing... +2025-04-01 01:25:38 | [maml_trainer] epoch #173 | Saving snapshot... +2025-04-01 01:25:59 | [maml_trainer] epoch #173 | Saved +2025-04-01 01:25:59 | [maml_trainer] epoch #173 | Time 213460.09 s +2025-04-01 01:25:59 | [maml_trainer] epoch #173 | EpochTime 1271.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.108 +Average/AverageReturn -62.3749 +Average/Iteration 173 +Average/MaxReturn -49.0378 +Average/MinReturn -96.7658 +Average/NumEpisodes 80 +Average/StdReturn 9.11171 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.025 +GaussianMLPPolicy/KLAfter 0.00477671 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.01812e-05 +GaussianMLPPolicy/LossBefore -2.26498e-09 +GaussianMLPPolicy/dLoss -2.01835e-05 +Iteration 173 +MetaTest/Average/AverageDiscountedReturn -66.4876 +MetaTest/Average/AverageReturn -66.4876 +MetaTest/Average/Iteration 173 +MetaTest/Average/MaxReturn -51.2182 +MetaTest/Average/MinReturn -104.003 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.4558 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -66.4876 +MetaTest/__unnamed_task__/AverageReturn -66.4876 +MetaTest/__unnamed_task__/Iteration 173 +MetaTest/__unnamed_task__/MaxReturn -51.2182 +MetaTest/__unnamed_task__/MinReturn -104.003 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.4558 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.568e+06 +__unnamed_task__/AverageDiscountedReturn -31.108 +__unnamed_task__/AverageReturn -62.3749 +__unnamed_task__/Iteration 173 +__unnamed_task__/MaxReturn -49.0378 +__unnamed_task__/MinReturn -96.7658 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.11171 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 01:41:50 | [maml_trainer] epoch #174 | Sampling for adapation and meta-testing... +2025-04-01 01:45:48 | [maml_trainer] epoch #174 | Finished meta-testing... +2025-04-01 01:45:48 | [maml_trainer] epoch #174 | Saving snapshot... +2025-04-01 01:46:09 | [maml_trainer] epoch #174 | Saved +2025-04-01 01:46:09 | [maml_trainer] epoch #174 | Time 214670.39 s +2025-04-01 01:46:09 | [maml_trainer] epoch #174 | EpochTime 1210.29 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.5 +Average/AverageReturn -62.6695 +Average/Iteration 174 +Average/MaxReturn -49.7792 +Average/MinReturn -79.209 +Average/NumEpisodes 80 +Average/StdReturn 7.18348 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0216 +GaussianMLPPolicy/KLAfter 0.00291938 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.18009e-05 +GaussianMLPPolicy/LossBefore -3.27826e-09 +GaussianMLPPolicy/dLoss -6.18042e-05 +Iteration 174 +MetaTest/Average/AverageDiscountedReturn -63.7065 +MetaTest/Average/AverageReturn -63.7065 +MetaTest/Average/Iteration 174 +MetaTest/Average/MaxReturn -51.7967 +MetaTest/Average/MinReturn -128.422 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.1201 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.7065 +MetaTest/__unnamed_task__/AverageReturn -63.7065 +MetaTest/__unnamed_task__/Iteration 174 +MetaTest/__unnamed_task__/MaxReturn -51.7967 +MetaTest/__unnamed_task__/MinReturn -128.422 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.1201 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.6e+06 +__unnamed_task__/AverageDiscountedReturn -31.5 +__unnamed_task__/AverageReturn -62.6695 +__unnamed_task__/Iteration 174 +__unnamed_task__/MaxReturn -49.7792 +__unnamed_task__/MinReturn -79.209 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.18348 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 02:02:06 | [maml_trainer] epoch #175 | Sampling for adapation and meta-testing... +2025-04-01 02:06:03 | [maml_trainer] epoch #175 | Finished meta-testing... +2025-04-01 02:06:03 | [maml_trainer] epoch #175 | Saving snapshot... +2025-04-01 02:06:23 | [maml_trainer] epoch #175 | Saved +2025-04-01 02:06:23 | [maml_trainer] epoch #175 | Time 215884.22 s +2025-04-01 02:06:23 | [maml_trainer] epoch #175 | EpochTime 1213.82 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.1182 +Average/AverageReturn -61.5495 +Average/Iteration 175 +Average/MaxReturn -47.0552 +Average/MinReturn -135.364 +Average/NumEpisodes 80 +Average/StdReturn 11.6341 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0189 +GaussianMLPPolicy/KLAfter 0.00203766 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.74716e-05 +GaussianMLPPolicy/LossBefore 4.17232e-10 +GaussianMLPPolicy/dLoss 1.7472e-05 +Iteration 175 +MetaTest/Average/AverageDiscountedReturn -62.5932 +MetaTest/Average/AverageReturn -62.5932 +MetaTest/Average/Iteration 175 +MetaTest/Average/MaxReturn -52.9407 +MetaTest/Average/MinReturn -72.111 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.45626 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.5932 +MetaTest/__unnamed_task__/AverageReturn -62.5932 +MetaTest/__unnamed_task__/Iteration 175 +MetaTest/__unnamed_task__/MaxReturn -52.9407 +MetaTest/__unnamed_task__/MinReturn -72.111 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.45626 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.632e+06 +__unnamed_task__/AverageDiscountedReturn -31.1182 +__unnamed_task__/AverageReturn -61.5495 +__unnamed_task__/Iteration 175 +__unnamed_task__/MaxReturn -47.0552 +__unnamed_task__/MinReturn -135.364 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6341 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 02:22:10 | [maml_trainer] epoch #176 | Sampling for adapation and meta-testing... +2025-04-01 02:26:12 | [maml_trainer] epoch #176 | Finished meta-testing... +2025-04-01 02:26:12 | [maml_trainer] epoch #176 | Saving snapshot... +2025-04-01 02:26:33 | [maml_trainer] epoch #176 | Saved +2025-04-01 02:26:33 | [maml_trainer] epoch #176 | Time 217094.55 s +2025-04-01 02:26:33 | [maml_trainer] epoch #176 | EpochTime 1210.33 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -30.7525 +Average/AverageReturn -61.026 +Average/Iteration 176 +Average/MaxReturn -45.9188 +Average/MinReturn -88.1076 +Average/NumEpisodes 80 +Average/StdReturn 7.40869 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0169 +GaussianMLPPolicy/KLAfter 0.00155842 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.91698e-05 +GaussianMLPPolicy/LossBefore 9.53674e-10 +GaussianMLPPolicy/dLoss -3.91689e-05 +Iteration 176 +MetaTest/Average/AverageDiscountedReturn -61.2007 +MetaTest/Average/AverageReturn -61.2007 +MetaTest/Average/Iteration 176 +MetaTest/Average/MaxReturn -52.1201 +MetaTest/Average/MinReturn -76.1733 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.15041 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.2007 +MetaTest/__unnamed_task__/AverageReturn -61.2007 +MetaTest/__unnamed_task__/Iteration 176 +MetaTest/__unnamed_task__/MaxReturn -52.1201 +MetaTest/__unnamed_task__/MinReturn -76.1733 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.15041 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.664e+06 +__unnamed_task__/AverageDiscountedReturn -30.7525 +__unnamed_task__/AverageReturn -61.026 +__unnamed_task__/Iteration 176 +__unnamed_task__/MaxReturn -45.9188 +__unnamed_task__/MinReturn -88.1076 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.40869 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 02:42:20 | [maml_trainer] epoch #177 | Sampling for adapation and meta-testing... +2025-04-01 02:46:20 | [maml_trainer] epoch #177 | Finished meta-testing... +2025-04-01 02:46:20 | [maml_trainer] epoch #177 | Saving snapshot... +2025-04-01 02:46:41 | [maml_trainer] epoch #177 | Saved +2025-04-01 02:46:41 | [maml_trainer] epoch #177 | Time 218302.25 s +2025-04-01 02:46:41 | [maml_trainer] epoch #177 | EpochTime 1207.70 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.4933 +Average/AverageReturn -64.2011 +Average/Iteration 177 +Average/MaxReturn -50.1548 +Average/MinReturn -137.62 +Average/NumEpisodes 80 +Average/StdReturn 12.0977 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0157 +GaussianMLPPolicy/KLAfter 0.00186481 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.54863e-05 +GaussianMLPPolicy/LossBefore -6.79493e-09 +GaussianMLPPolicy/dLoss -9.54931e-05 +Iteration 177 +MetaTest/Average/AverageDiscountedReturn -62.5483 +MetaTest/Average/AverageReturn -62.5483 +MetaTest/Average/Iteration 177 +MetaTest/Average/MaxReturn -48.5671 +MetaTest/Average/MinReturn -83.2139 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.74959 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.5483 +MetaTest/__unnamed_task__/AverageReturn -62.5483 +MetaTest/__unnamed_task__/Iteration 177 +MetaTest/__unnamed_task__/MaxReturn -48.5671 +MetaTest/__unnamed_task__/MinReturn -83.2139 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.74959 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.696e+06 +__unnamed_task__/AverageDiscountedReturn -31.4933 +__unnamed_task__/AverageReturn -64.2011 +__unnamed_task__/Iteration 177 +__unnamed_task__/MaxReturn -50.1548 +__unnamed_task__/MinReturn -137.62 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.0977 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 03:02:45 | [maml_trainer] epoch #178 | Sampling for adapation and meta-testing... +2025-04-01 03:06:45 | [maml_trainer] epoch #178 | Finished meta-testing... +2025-04-01 03:06:45 | [maml_trainer] epoch #178 | Saving snapshot... +2025-04-01 03:07:06 | [maml_trainer] epoch #178 | Saved +2025-04-01 03:07:06 | [maml_trainer] epoch #178 | Time 219527.81 s +2025-04-01 03:07:06 | [maml_trainer] epoch #178 | EpochTime 1225.56 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.76 +Average/AverageReturn -65.1411 +Average/Iteration 178 +Average/MaxReturn -51.7555 +Average/MinReturn -122.431 +Average/NumEpisodes 80 +Average/StdReturn 11.0339 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0137 +GaussianMLPPolicy/KLAfter 0.00262959 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.8784e-05 +GaussianMLPPolicy/LossBefore -9.38773e-09 +GaussianMLPPolicy/dLoss 5.87746e-05 +Iteration 178 +MetaTest/Average/AverageDiscountedReturn -62.6185 +MetaTest/Average/AverageReturn -62.6185 +MetaTest/Average/Iteration 178 +MetaTest/Average/MaxReturn -51.0373 +MetaTest/Average/MinReturn -91.5659 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.0927 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.6185 +MetaTest/__unnamed_task__/AverageReturn -62.6185 +MetaTest/__unnamed_task__/Iteration 178 +MetaTest/__unnamed_task__/MaxReturn -51.0373 +MetaTest/__unnamed_task__/MinReturn -91.5659 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.0927 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.728e+06 +__unnamed_task__/AverageDiscountedReturn -31.76 +__unnamed_task__/AverageReturn -65.1411 +__unnamed_task__/Iteration 178 +__unnamed_task__/MaxReturn -51.7555 +__unnamed_task__/MinReturn -122.431 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0339 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 03:23:17 | [maml_trainer] epoch #179 | Sampling for adapation and meta-testing... +2025-04-01 03:27:19 | [maml_trainer] epoch #179 | Finished meta-testing... +2025-04-01 03:27:19 | [maml_trainer] epoch #179 | Saving snapshot... +2025-04-01 03:27:39 | [maml_trainer] epoch #179 | Saved +2025-04-01 03:27:39 | [maml_trainer] epoch #179 | Time 220761.01 s +2025-04-01 03:27:39 | [maml_trainer] epoch #179 | EpochTime 1233.19 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.9879 +Average/AverageReturn -62.3898 +Average/Iteration 179 +Average/MaxReturn -46.6122 +Average/MinReturn -116.745 +Average/NumEpisodes 80 +Average/StdReturn 11.6279 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0119 +GaussianMLPPolicy/KLAfter 0.0024938 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.18121e-05 +GaussianMLPPolicy/LossBefore -1.3411e-08 +GaussianMLPPolicy/dLoss 2.17987e-05 +Iteration 179 +MetaTest/Average/AverageDiscountedReturn -66.5371 +MetaTest/Average/AverageReturn -66.5371 +MetaTest/Average/Iteration 179 +MetaTest/Average/MaxReturn -52.3285 +MetaTest/Average/MinReturn -148.069 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.627 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -66.5371 +MetaTest/__unnamed_task__/AverageReturn -66.5371 +MetaTest/__unnamed_task__/Iteration 179 +MetaTest/__unnamed_task__/MaxReturn -52.3285 +MetaTest/__unnamed_task__/MinReturn -148.069 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.627 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.76e+06 +__unnamed_task__/AverageDiscountedReturn -30.9879 +__unnamed_task__/AverageReturn -62.3898 +__unnamed_task__/Iteration 179 +__unnamed_task__/MaxReturn -46.6122 +__unnamed_task__/MinReturn -116.745 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6279 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 03:43:58 | [maml_trainer] epoch #180 | Sampling for adapation and meta-testing... +2025-04-01 03:48:00 | [maml_trainer] epoch #180 | Finished meta-testing... +2025-04-01 03:48:00 | [maml_trainer] epoch #180 | Saving snapshot... +2025-04-01 03:48:20 | [maml_trainer] epoch #180 | Saved +2025-04-01 03:48:20 | [maml_trainer] epoch #180 | Time 222001.55 s +2025-04-01 03:48:20 | [maml_trainer] epoch #180 | EpochTime 1240.54 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.9294 +Average/AverageReturn -61.8016 +Average/Iteration 180 +Average/MaxReturn -48.0692 +Average/MinReturn -122.297 +Average/NumEpisodes 80 +Average/StdReturn 10.1394 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0098 +GaussianMLPPolicy/KLAfter 0.00305026 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.96008e-05 +GaussianMLPPolicy/LossBefore -1.40369e-08 +GaussianMLPPolicy/dLoss 6.95868e-05 +Iteration 180 +MetaTest/Average/AverageDiscountedReturn -57.7082 +MetaTest/Average/AverageReturn -57.7082 +MetaTest/Average/Iteration 180 +MetaTest/Average/MaxReturn -48.6365 +MetaTest/Average/MinReturn -76.3925 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.03204 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.7082 +MetaTest/__unnamed_task__/AverageReturn -57.7082 +MetaTest/__unnamed_task__/Iteration 180 +MetaTest/__unnamed_task__/MaxReturn -48.6365 +MetaTest/__unnamed_task__/MinReturn -76.3925 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.03204 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.792e+06 +__unnamed_task__/AverageDiscountedReturn -30.9294 +__unnamed_task__/AverageReturn -61.8016 +__unnamed_task__/Iteration 180 +__unnamed_task__/MaxReturn -48.0692 +__unnamed_task__/MinReturn -122.297 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1394 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 04:04:16 | [maml_trainer] epoch #181 | Sampling for adapation and meta-testing... +2025-04-01 04:08:24 | [maml_trainer] epoch #181 | Finished meta-testing... +2025-04-01 04:08:24 | [maml_trainer] epoch #181 | Saving snapshot... +2025-04-01 04:08:45 | [maml_trainer] epoch #181 | Saved +2025-04-01 04:08:45 | [maml_trainer] epoch #181 | Time 223226.35 s +2025-04-01 04:08:45 | [maml_trainer] epoch #181 | EpochTime 1224.79 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.0793 +Average/AverageReturn -60.3627 +Average/Iteration 181 +Average/MaxReturn -46.6374 +Average/MinReturn -123.667 +Average/NumEpisodes 80 +Average/StdReturn 13.8827 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0086 +GaussianMLPPolicy/KLAfter 0.00341136 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.24131e-05 +GaussianMLPPolicy/LossBefore 9.05991e-09 +GaussianMLPPolicy/dLoss 9.24222e-05 +Iteration 181 +MetaTest/Average/AverageDiscountedReturn -59.0942 +MetaTest/Average/AverageReturn -59.0942 +MetaTest/Average/Iteration 181 +MetaTest/Average/MaxReturn -48.7204 +MetaTest/Average/MinReturn -94.1468 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.58277 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.0942 +MetaTest/__unnamed_task__/AverageReturn -59.0942 +MetaTest/__unnamed_task__/Iteration 181 +MetaTest/__unnamed_task__/MaxReturn -48.7204 +MetaTest/__unnamed_task__/MinReturn -94.1468 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.58277 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.824e+06 +__unnamed_task__/AverageDiscountedReturn -30.0793 +__unnamed_task__/AverageReturn -60.3627 +__unnamed_task__/Iteration 181 +__unnamed_task__/MaxReturn -46.6374 +__unnamed_task__/MinReturn -123.667 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8827 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 04:25:19 | [maml_trainer] epoch #182 | Sampling for adapation and meta-testing... +2025-04-01 04:29:25 | [maml_trainer] epoch #182 | Finished meta-testing... +2025-04-01 04:29:25 | [maml_trainer] epoch #182 | Saving snapshot... +2025-04-01 04:29:47 | [maml_trainer] epoch #182 | Saved +2025-04-01 04:29:47 | [maml_trainer] epoch #182 | Time 224488.09 s +2025-04-01 04:29:47 | [maml_trainer] epoch #182 | EpochTime 1261.74 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.3686 +Average/AverageReturn -60.6701 +Average/Iteration 182 +Average/MaxReturn -48.7335 +Average/MinReturn -126.483 +Average/NumEpisodes 80 +Average/StdReturn 11.4259 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0081 +GaussianMLPPolicy/KLAfter 0.0034527 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000184458 +GaussianMLPPolicy/LossBefore -7.15256e-10 +GaussianMLPPolicy/dLoss 0.000184457 +Iteration 182 +MetaTest/Average/AverageDiscountedReturn -56.7075 +MetaTest/Average/AverageReturn -56.7075 +MetaTest/Average/Iteration 182 +MetaTest/Average/MaxReturn -49.0416 +MetaTest/Average/MinReturn -71.354 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.08741 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.7075 +MetaTest/__unnamed_task__/AverageReturn -56.7075 +MetaTest/__unnamed_task__/Iteration 182 +MetaTest/__unnamed_task__/MaxReturn -49.0416 +MetaTest/__unnamed_task__/MinReturn -71.354 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.08741 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.856e+06 +__unnamed_task__/AverageDiscountedReturn -30.3686 +__unnamed_task__/AverageReturn -60.6701 +__unnamed_task__/Iteration 182 +__unnamed_task__/MaxReturn -48.7335 +__unnamed_task__/MinReturn -126.483 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4259 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 04:46:19 | [maml_trainer] epoch #183 | Sampling for adapation and meta-testing... +2025-04-01 04:50:23 | [maml_trainer] epoch #183 | Finished meta-testing... +2025-04-01 04:50:23 | [maml_trainer] epoch #183 | Saving snapshot... +2025-04-01 04:50:44 | [maml_trainer] epoch #183 | Saved +2025-04-01 04:50:44 | [maml_trainer] epoch #183 | Time 225745.94 s +2025-04-01 04:50:44 | [maml_trainer] epoch #183 | EpochTime 1257.85 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.6848 +Average/AverageReturn -58.2793 +Average/Iteration 183 +Average/MaxReturn -47.0487 +Average/MinReturn -88.7282 +Average/NumEpisodes 80 +Average/StdReturn 7.23316 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0077 +GaussianMLPPolicy/KLAfter 0.00155691 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.23558e-05 +GaussianMLPPolicy/LossBefore -1.13845e-08 +GaussianMLPPolicy/dLoss -2.23672e-05 +Iteration 183 +MetaTest/Average/AverageDiscountedReturn -56.9138 +MetaTest/Average/AverageReturn -56.9138 +MetaTest/Average/Iteration 183 +MetaTest/Average/MaxReturn -47.1198 +MetaTest/Average/MinReturn -90.0634 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.81784 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.9138 +MetaTest/__unnamed_task__/AverageReturn -56.9138 +MetaTest/__unnamed_task__/Iteration 183 +MetaTest/__unnamed_task__/MaxReturn -47.1198 +MetaTest/__unnamed_task__/MinReturn -90.0634 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.81784 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.888e+06 +__unnamed_task__/AverageDiscountedReturn -29.6848 +__unnamed_task__/AverageReturn -58.2793 +__unnamed_task__/Iteration 183 +__unnamed_task__/MaxReturn -47.0487 +__unnamed_task__/MinReturn -88.7282 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.23316 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 05:07:01 | [maml_trainer] epoch #184 | Sampling for adapation and meta-testing... +2025-04-01 05:11:04 | [maml_trainer] epoch #184 | Finished meta-testing... +2025-04-01 05:11:04 | [maml_trainer] epoch #184 | Saving snapshot... +2025-04-01 05:11:25 | [maml_trainer] epoch #184 | Saved +2025-04-01 05:11:25 | [maml_trainer] epoch #184 | Time 226986.08 s +2025-04-01 05:11:25 | [maml_trainer] epoch #184 | EpochTime 1240.14 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.9071 +Average/AverageReturn -58.9792 +Average/Iteration 184 +Average/MaxReturn -47.5711 +Average/MinReturn -89.8853 +Average/NumEpisodes 80 +Average/StdReturn 8.27952 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.006 +GaussianMLPPolicy/KLAfter 0.00159618 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.25699e-05 +GaussianMLPPolicy/LossBefore -8.52346e-09 +GaussianMLPPolicy/dLoss 7.25614e-05 +Iteration 184 +MetaTest/Average/AverageDiscountedReturn -57.1077 +MetaTest/Average/AverageReturn -57.1077 +MetaTest/Average/Iteration 184 +MetaTest/Average/MaxReturn -46.7959 +MetaTest/Average/MinReturn -73.547 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.41537 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.1077 +MetaTest/__unnamed_task__/AverageReturn -57.1077 +MetaTest/__unnamed_task__/Iteration 184 +MetaTest/__unnamed_task__/MaxReturn -46.7959 +MetaTest/__unnamed_task__/MinReturn -73.547 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.41537 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.92e+06 +__unnamed_task__/AverageDiscountedReturn -29.9071 +__unnamed_task__/AverageReturn -58.9792 +__unnamed_task__/Iteration 184 +__unnamed_task__/MaxReturn -47.5711 +__unnamed_task__/MinReturn -89.8853 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.27952 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 05:27:44 | [maml_trainer] epoch #185 | Sampling for adapation and meta-testing... +2025-04-01 05:31:48 | [maml_trainer] epoch #185 | Finished meta-testing... +2025-04-01 05:31:48 | [maml_trainer] epoch #185 | Saving snapshot... +2025-04-01 05:32:09 | [maml_trainer] epoch #185 | Saved +2025-04-01 05:32:09 | [maml_trainer] epoch #185 | Time 228230.92 s +2025-04-01 05:32:09 | [maml_trainer] epoch #185 | EpochTime 1244.83 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.5532 +Average/AverageReturn -58.2197 +Average/Iteration 185 +Average/MaxReturn -46.3806 +Average/MinReturn -175.983 +Average/NumEpisodes 80 +Average/StdReturn 14.1802 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0024 +GaussianMLPPolicy/KLAfter 0.00204392 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.32677e-05 +GaussianMLPPolicy/LossBefore -2.20537e-09 +GaussianMLPPolicy/dLoss 5.32655e-05 +Iteration 185 +MetaTest/Average/AverageDiscountedReturn -59.242 +MetaTest/Average/AverageReturn -59.242 +MetaTest/Average/Iteration 185 +MetaTest/Average/MaxReturn -49.6767 +MetaTest/Average/MinReturn -95.0625 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.10203 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.242 +MetaTest/__unnamed_task__/AverageReturn -59.242 +MetaTest/__unnamed_task__/Iteration 185 +MetaTest/__unnamed_task__/MaxReturn -49.6767 +MetaTest/__unnamed_task__/MinReturn -95.0625 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.10203 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.952e+06 +__unnamed_task__/AverageDiscountedReturn -29.5532 +__unnamed_task__/AverageReturn -58.2197 +__unnamed_task__/Iteration 185 +__unnamed_task__/MaxReturn -46.3806 +__unnamed_task__/MinReturn -175.983 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.1802 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 05:48:38 | [maml_trainer] epoch #186 | Sampling for adapation and meta-testing... +2025-04-01 05:52:43 | [maml_trainer] epoch #186 | Finished meta-testing... +2025-04-01 05:52:43 | [maml_trainer] epoch #186 | Saving snapshot... +2025-04-01 05:53:04 | [maml_trainer] epoch #186 | Saved +2025-04-01 05:53:04 | [maml_trainer] epoch #186 | Time 229485.70 s +2025-04-01 05:53:04 | [maml_trainer] epoch #186 | EpochTime 1254.78 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.3728 +Average/AverageReturn -57.6667 +Average/Iteration 186 +Average/MaxReturn -47.5499 +Average/MinReturn -147.604 +Average/NumEpisodes 80 +Average/StdReturn 11.8859 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0003 +GaussianMLPPolicy/KLAfter 0.00176311 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.30216e-05 +GaussianMLPPolicy/LossBefore -2.86102e-09 +GaussianMLPPolicy/dLoss -2.30245e-05 +Iteration 186 +MetaTest/Average/AverageDiscountedReturn -57.1271 +MetaTest/Average/AverageReturn -57.1271 +MetaTest/Average/Iteration 186 +MetaTest/Average/MaxReturn -46.8952 +MetaTest/Average/MinReturn -77.0862 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.60649 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.1271 +MetaTest/__unnamed_task__/AverageReturn -57.1271 +MetaTest/__unnamed_task__/Iteration 186 +MetaTest/__unnamed_task__/MaxReturn -46.8952 +MetaTest/__unnamed_task__/MinReturn -77.0862 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.60649 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.984e+06 +__unnamed_task__/AverageDiscountedReturn -29.3728 +__unnamed_task__/AverageReturn -57.6667 +__unnamed_task__/Iteration 186 +__unnamed_task__/MaxReturn -47.5499 +__unnamed_task__/MinReturn -147.604 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.8859 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 06:09:21 | [maml_trainer] epoch #187 | Sampling for adapation and meta-testing... +2025-04-01 06:13:26 | [maml_trainer] epoch #187 | Finished meta-testing... +2025-04-01 06:13:26 | [maml_trainer] epoch #187 | Saving snapshot... +2025-04-01 06:13:48 | [maml_trainer] epoch #187 | Saved +2025-04-01 06:13:48 | [maml_trainer] epoch #187 | Time 230729.24 s +2025-04-01 06:13:48 | [maml_trainer] epoch #187 | EpochTime 1243.53 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.5975 +Average/AverageReturn -57.9983 +Average/Iteration 187 +Average/MaxReturn -45.5275 +Average/MinReturn -168.806 +Average/NumEpisodes 80 +Average/StdReturn 13.9327 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99903 +GaussianMLPPolicy/KLAfter 0.00202319 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.08475e-05 +GaussianMLPPolicy/LossBefore 8.70228e-09 +GaussianMLPPolicy/dLoss -7.08388e-05 +Iteration 187 +MetaTest/Average/AverageDiscountedReturn -54.6102 +MetaTest/Average/AverageReturn -54.6102 +MetaTest/Average/Iteration 187 +MetaTest/Average/MaxReturn -43.9652 +MetaTest/Average/MinReturn -62.2569 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.69887 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.6102 +MetaTest/__unnamed_task__/AverageReturn -54.6102 +MetaTest/__unnamed_task__/Iteration 187 +MetaTest/__unnamed_task__/MaxReturn -43.9652 +MetaTest/__unnamed_task__/MinReturn -62.2569 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.69887 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.016e+06 +__unnamed_task__/AverageDiscountedReturn -29.5975 +__unnamed_task__/AverageReturn -57.9983 +__unnamed_task__/Iteration 187 +__unnamed_task__/MaxReturn -45.5275 +__unnamed_task__/MinReturn -168.806 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.9327 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 06:30:05 | [maml_trainer] epoch #188 | Sampling for adapation and meta-testing... +2025-04-01 06:34:12 | [maml_trainer] epoch #188 | Finished meta-testing... +2025-04-01 06:34:12 | [maml_trainer] epoch #188 | Saving snapshot... +2025-04-01 06:34:33 | [maml_trainer] epoch #188 | Saved +2025-04-01 06:34:33 | [maml_trainer] epoch #188 | Time 231974.50 s +2025-04-01 06:34:33 | [maml_trainer] epoch #188 | EpochTime 1245.26 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.8287 +Average/AverageReturn -55.9649 +Average/Iteration 188 +Average/MaxReturn -44.5501 +Average/MinReturn -98.1058 +Average/NumEpisodes 80 +Average/StdReturn 7.17893 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99686 +GaussianMLPPolicy/KLAfter 0.00247053 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.00011231 +GaussianMLPPolicy/LossBefore 3.93391e-09 +GaussianMLPPolicy/dLoss 0.000112314 +Iteration 188 +MetaTest/Average/AverageDiscountedReturn -55.3438 +MetaTest/Average/AverageReturn -55.3438 +MetaTest/Average/Iteration 188 +MetaTest/Average/MaxReturn -43.7743 +MetaTest/Average/MinReturn -69.11 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.29651 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.3438 +MetaTest/__unnamed_task__/AverageReturn -55.3438 +MetaTest/__unnamed_task__/Iteration 188 +MetaTest/__unnamed_task__/MaxReturn -43.7743 +MetaTest/__unnamed_task__/MinReturn -69.11 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.29651 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.048e+06 +__unnamed_task__/AverageDiscountedReturn -28.8287 +__unnamed_task__/AverageReturn -55.9649 +__unnamed_task__/Iteration 188 +__unnamed_task__/MaxReturn -44.5501 +__unnamed_task__/MinReturn -98.1058 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.17893 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 06:51:07 | [maml_trainer] epoch #189 | Sampling for adapation and meta-testing... +2025-04-01 06:55:15 | [maml_trainer] epoch #189 | Finished meta-testing... +2025-04-01 06:55:15 | [maml_trainer] epoch #189 | Saving snapshot... +2025-04-01 06:55:36 | [maml_trainer] epoch #189 | Saved +2025-04-01 06:55:36 | [maml_trainer] epoch #189 | Time 233237.09 s +2025-04-01 06:55:36 | [maml_trainer] epoch #189 | EpochTime 1262.58 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.0025 +Average/AverageReturn -55.964 +Average/Iteration 189 +Average/MaxReturn -44.2866 +Average/MinReturn -87.8233 +Average/NumEpisodes 80 +Average/StdReturn 6.8849 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99457 +GaussianMLPPolicy/KLAfter 0.00321499 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000107627 +GaussianMLPPolicy/LossBefore 2.68221e-09 +GaussianMLPPolicy/dLoss 0.000107629 +Iteration 189 +MetaTest/Average/AverageDiscountedReturn -54.3258 +MetaTest/Average/AverageReturn -54.3258 +MetaTest/Average/Iteration 189 +MetaTest/Average/MaxReturn -47.9803 +MetaTest/Average/MinReturn -67.7375 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.47988 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.3258 +MetaTest/__unnamed_task__/AverageReturn -54.3258 +MetaTest/__unnamed_task__/Iteration 189 +MetaTest/__unnamed_task__/MaxReturn -47.9803 +MetaTest/__unnamed_task__/MinReturn -67.7375 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.47988 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.08e+06 +__unnamed_task__/AverageDiscountedReturn -29.0025 +__unnamed_task__/AverageReturn -55.964 +__unnamed_task__/Iteration 189 +__unnamed_task__/MaxReturn -44.2866 +__unnamed_task__/MinReturn -87.8233 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.8849 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 07:11:57 | [maml_trainer] epoch #190 | Sampling for adapation and meta-testing... +2025-04-01 07:16:01 | [maml_trainer] epoch #190 | Finished meta-testing... +2025-04-01 07:16:01 | [maml_trainer] epoch #190 | Saving snapshot... +2025-04-01 07:16:23 | [maml_trainer] epoch #190 | Saved +2025-04-01 07:16:23 | [maml_trainer] epoch #190 | Time 234484.09 s +2025-04-01 07:16:23 | [maml_trainer] epoch #190 | EpochTime 1247.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.8604 +Average/AverageReturn -55.6621 +Average/Iteration 190 +Average/MaxReturn -46.9065 +Average/MinReturn -89.7828 +Average/NumEpisodes 80 +Average/StdReturn 6.65079 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99077 +GaussianMLPPolicy/KLAfter 0.00469829 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000127282 +GaussianMLPPolicy/LossBefore 7.21216e-09 +GaussianMLPPolicy/dLoss 0.000127289 +Iteration 190 +MetaTest/Average/AverageDiscountedReturn -54.4569 +MetaTest/Average/AverageReturn -54.4569 +MetaTest/Average/Iteration 190 +MetaTest/Average/MaxReturn -47.463 +MetaTest/Average/MinReturn -63.9692 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.62377 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.4569 +MetaTest/__unnamed_task__/AverageReturn -54.4569 +MetaTest/__unnamed_task__/Iteration 190 +MetaTest/__unnamed_task__/MaxReturn -47.463 +MetaTest/__unnamed_task__/MinReturn -63.9692 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.62377 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.112e+06 +__unnamed_task__/AverageDiscountedReturn -28.8604 +__unnamed_task__/AverageReturn -55.6621 +__unnamed_task__/Iteration 190 +__unnamed_task__/MaxReturn -46.9065 +__unnamed_task__/MinReturn -89.7828 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.65079 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 07:32:42 | [maml_trainer] epoch #191 | Sampling for adapation and meta-testing... +2025-04-01 07:36:43 | [maml_trainer] epoch #191 | Finished meta-testing... +2025-04-01 07:36:43 | [maml_trainer] epoch #191 | Saving snapshot... +2025-04-01 07:37:04 | [maml_trainer] epoch #191 | Saved +2025-04-01 07:37:04 | [maml_trainer] epoch #191 | Time 235725.82 s +2025-04-01 07:37:04 | [maml_trainer] epoch #191 | EpochTime 1241.72 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.7334 +Average/AverageReturn -56.5027 +Average/Iteration 191 +Average/MaxReturn -45.308 +Average/MinReturn -170.769 +Average/NumEpisodes 80 +Average/StdReturn 13.9234 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9883 +GaussianMLPPolicy/KLAfter 0.00571116 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.33753e-05 +GaussianMLPPolicy/LossBefore -4.76837e-09 +GaussianMLPPolicy/dLoss -4.338e-05 +Iteration 191 +MetaTest/Average/AverageDiscountedReturn -56.0123 +MetaTest/Average/AverageReturn -56.0123 +MetaTest/Average/Iteration 191 +MetaTest/Average/MaxReturn -47.8198 +MetaTest/Average/MinReturn -66.3437 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.8617 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.0123 +MetaTest/__unnamed_task__/AverageReturn -56.0123 +MetaTest/__unnamed_task__/Iteration 191 +MetaTest/__unnamed_task__/MaxReturn -47.8198 +MetaTest/__unnamed_task__/MinReturn -66.3437 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.8617 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.144e+06 +__unnamed_task__/AverageDiscountedReturn -28.7334 +__unnamed_task__/AverageReturn -56.5027 +__unnamed_task__/Iteration 191 +__unnamed_task__/MaxReturn -45.308 +__unnamed_task__/MinReturn -170.769 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.9234 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 07:53:12 | [maml_trainer] epoch #192 | Sampling for adapation and meta-testing... +2025-04-01 07:57:18 | [maml_trainer] epoch #192 | Finished meta-testing... +2025-04-01 07:57:18 | [maml_trainer] epoch #192 | Saving snapshot... +2025-04-01 07:57:40 | [maml_trainer] epoch #192 | Saved +2025-04-01 07:57:40 | [maml_trainer] epoch #192 | Time 236961.13 s +2025-04-01 07:57:40 | [maml_trainer] epoch #192 | EpochTime 1235.30 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.6714 +Average/AverageReturn -54.275 +Average/Iteration 192 +Average/MaxReturn -46.5907 +Average/MinReturn -64.3683 +Average/NumEpisodes 80 +Average/StdReturn 4.33991 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98594 +GaussianMLPPolicy/KLAfter 0.0023006 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.27834e-05 +GaussianMLPPolicy/LossBefore -5.48363e-09 +GaussianMLPPolicy/dLoss -9.27889e-05 +Iteration 192 +MetaTest/Average/AverageDiscountedReturn -51.7506 +MetaTest/Average/AverageReturn -51.7506 +MetaTest/Average/Iteration 192 +MetaTest/Average/MaxReturn -46.9679 +MetaTest/Average/MinReturn -59.7209 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.08294 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.7506 +MetaTest/__unnamed_task__/AverageReturn -51.7506 +MetaTest/__unnamed_task__/Iteration 192 +MetaTest/__unnamed_task__/MaxReturn -46.9679 +MetaTest/__unnamed_task__/MinReturn -59.7209 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.08294 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.176e+06 +__unnamed_task__/AverageDiscountedReturn -28.6714 +__unnamed_task__/AverageReturn -54.275 +__unnamed_task__/Iteration 192 +__unnamed_task__/MaxReturn -46.5907 +__unnamed_task__/MinReturn -64.3683 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.33991 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 08:13:55 | [maml_trainer] epoch #193 | Sampling for adapation and meta-testing... +2025-04-01 08:17:59 | [maml_trainer] epoch #193 | Finished meta-testing... +2025-04-01 08:17:59 | [maml_trainer] epoch #193 | Saving snapshot... +2025-04-01 08:18:19 | [maml_trainer] epoch #193 | Saved +2025-04-01 08:18:19 | [maml_trainer] epoch #193 | Time 238200.71 s +2025-04-01 08:18:19 | [maml_trainer] epoch #193 | EpochTime 1239.58 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.3748 +Average/AverageReturn -54.2739 +Average/Iteration 193 +Average/MaxReturn -45.4134 +Average/MinReturn -69.7349 +Average/NumEpisodes 80 +Average/StdReturn 5.65486 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98315 +GaussianMLPPolicy/KLAfter 0.0025972 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.86135e-05 +GaussianMLPPolicy/LossBefore -8.10623e-09 +GaussianMLPPolicy/dLoss 1.86054e-05 +Iteration 193 +MetaTest/Average/AverageDiscountedReturn -54.7584 +MetaTest/Average/AverageReturn -54.7584 +MetaTest/Average/Iteration 193 +MetaTest/Average/MaxReturn -48.3779 +MetaTest/Average/MinReturn -66.4331 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.88082 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.7584 +MetaTest/__unnamed_task__/AverageReturn -54.7584 +MetaTest/__unnamed_task__/Iteration 193 +MetaTest/__unnamed_task__/MaxReturn -48.3779 +MetaTest/__unnamed_task__/MinReturn -66.4331 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.88082 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.208e+06 +__unnamed_task__/AverageDiscountedReturn -28.3748 +__unnamed_task__/AverageReturn -54.2739 +__unnamed_task__/Iteration 193 +__unnamed_task__/MaxReturn -45.4134 +__unnamed_task__/MinReturn -69.7349 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.65486 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 08:34:35 | [maml_trainer] epoch #194 | Sampling for adapation and meta-testing... +2025-04-01 08:38:38 | [maml_trainer] epoch #194 | Finished meta-testing... +2025-04-01 08:38:38 | [maml_trainer] epoch #194 | Saving snapshot... +2025-04-01 08:38:59 | [maml_trainer] epoch #194 | Saved +2025-04-01 08:38:59 | [maml_trainer] epoch #194 | Time 239440.71 s +2025-04-01 08:38:59 | [maml_trainer] epoch #194 | EpochTime 1240.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.7294 +Average/AverageReturn -54.7769 +Average/Iteration 194 +Average/MaxReturn -46.8609 +Average/MinReturn -73.0301 +Average/NumEpisodes 80 +Average/StdReturn 5.04766 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97984 +GaussianMLPPolicy/KLAfter 0.00312598 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.03723e-05 +GaussianMLPPolicy/LossBefore -1.29938e-08 +GaussianMLPPolicy/dLoss 5.03593e-05 +Iteration 194 +MetaTest/Average/AverageDiscountedReturn -53.8697 +MetaTest/Average/AverageReturn -53.8697 +MetaTest/Average/Iteration 194 +MetaTest/Average/MaxReturn -46.5476 +MetaTest/Average/MinReturn -63.072 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.69944 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.8697 +MetaTest/__unnamed_task__/AverageReturn -53.8697 +MetaTest/__unnamed_task__/Iteration 194 +MetaTest/__unnamed_task__/MaxReturn -46.5476 +MetaTest/__unnamed_task__/MinReturn -63.072 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.69944 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.24e+06 +__unnamed_task__/AverageDiscountedReturn -28.7294 +__unnamed_task__/AverageReturn -54.7769 +__unnamed_task__/Iteration 194 +__unnamed_task__/MaxReturn -46.8609 +__unnamed_task__/MinReturn -73.0301 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.04766 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 08:55:11 | [maml_trainer] epoch #195 | Sampling for adapation and meta-testing... +2025-04-01 08:59:14 | [maml_trainer] epoch #195 | Finished meta-testing... +2025-04-01 08:59:14 | [maml_trainer] epoch #195 | Saving snapshot... +2025-04-01 08:59:35 | [maml_trainer] epoch #195 | Saved +2025-04-01 08:59:35 | [maml_trainer] epoch #195 | Time 240676.32 s +2025-04-01 08:59:35 | [maml_trainer] epoch #195 | EpochTime 1235.61 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.3062 +Average/AverageReturn -53.621 +Average/Iteration 195 +Average/MaxReturn -44.4827 +Average/MinReturn -66.593 +Average/NumEpisodes 80 +Average/StdReturn 4.51269 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9762 +GaussianMLPPolicy/KLAfter 0.00746373 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000243321 +GaussianMLPPolicy/LossBefore 6.61612e-09 +GaussianMLPPolicy/dLoss -0.000243315 +Iteration 195 +MetaTest/Average/AverageDiscountedReturn -53.49 +MetaTest/Average/AverageReturn -53.49 +MetaTest/Average/Iteration 195 +MetaTest/Average/MaxReturn -47.0778 +MetaTest/Average/MinReturn -63.3016 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.63655 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.49 +MetaTest/__unnamed_task__/AverageReturn -53.49 +MetaTest/__unnamed_task__/Iteration 195 +MetaTest/__unnamed_task__/MaxReturn -47.0778 +MetaTest/__unnamed_task__/MinReturn -63.3016 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.63655 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.272e+06 +__unnamed_task__/AverageDiscountedReturn -28.3062 +__unnamed_task__/AverageReturn -53.621 +__unnamed_task__/Iteration 195 +__unnamed_task__/MaxReturn -44.4827 +__unnamed_task__/MinReturn -66.593 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.51269 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 09:15:42 | [maml_trainer] epoch #196 | Sampling for adapation and meta-testing... +2025-04-01 09:19:45 | [maml_trainer] epoch #196 | Finished meta-testing... +2025-04-01 09:19:45 | [maml_trainer] epoch #196 | Saving snapshot... +2025-04-01 09:20:07 | [maml_trainer] epoch #196 | Saved +2025-04-01 09:20:07 | [maml_trainer] epoch #196 | Time 241908.07 s +2025-04-01 09:20:07 | [maml_trainer] epoch #196 | EpochTime 1231.74 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.9342 +Average/AverageReturn -52.7077 +Average/Iteration 196 +Average/MaxReturn -44.7201 +Average/MinReturn -63.5657 +Average/NumEpisodes 80 +Average/StdReturn 4.14001 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97208 +GaussianMLPPolicy/KLAfter 0.00758979 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.87487e-06 +GaussianMLPPolicy/LossBefore -7.98702e-09 +GaussianMLPPolicy/dLoss 5.86688e-06 +Iteration 196 +MetaTest/Average/AverageDiscountedReturn -51.9153 +MetaTest/Average/AverageReturn -51.9153 +MetaTest/Average/Iteration 196 +MetaTest/Average/MaxReturn -44.8035 +MetaTest/Average/MinReturn -59.078 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.69805 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.9153 +MetaTest/__unnamed_task__/AverageReturn -51.9153 +MetaTest/__unnamed_task__/Iteration 196 +MetaTest/__unnamed_task__/MaxReturn -44.8035 +MetaTest/__unnamed_task__/MinReturn -59.078 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.69805 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.304e+06 +__unnamed_task__/AverageDiscountedReturn -27.9342 +__unnamed_task__/AverageReturn -52.7077 +__unnamed_task__/Iteration 196 +__unnamed_task__/MaxReturn -44.7201 +__unnamed_task__/MinReturn -63.5657 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.14001 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 09:36:27 | [maml_trainer] epoch #197 | Sampling for adapation and meta-testing... +2025-04-01 09:40:45 | [maml_trainer] epoch #197 | Finished meta-testing... +2025-04-01 09:40:45 | [maml_trainer] epoch #197 | Saving snapshot... +2025-04-01 09:41:07 | [maml_trainer] epoch #197 | Saved +2025-04-01 09:41:07 | [maml_trainer] epoch #197 | Time 243168.05 s +2025-04-01 09:41:07 | [maml_trainer] epoch #197 | EpochTime 1259.98 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.0965 +Average/AverageReturn -52.7083 +Average/Iteration 197 +Average/MaxReturn -44.7886 +Average/MinReturn -63.6899 +Average/NumEpisodes 80 +Average/StdReturn 4.05351 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96952 +GaussianMLPPolicy/KLAfter 0.00647424 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000161309 +GaussianMLPPolicy/LossBefore -2.5034e-09 +GaussianMLPPolicy/dLoss -0.000161311 +Iteration 197 +MetaTest/Average/AverageDiscountedReturn -54.833 +MetaTest/Average/AverageReturn -54.833 +MetaTest/Average/Iteration 197 +MetaTest/Average/MaxReturn -46.5785 +MetaTest/Average/MinReturn -84.4943 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.94834 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.833 +MetaTest/__unnamed_task__/AverageReturn -54.833 +MetaTest/__unnamed_task__/Iteration 197 +MetaTest/__unnamed_task__/MaxReturn -46.5785 +MetaTest/__unnamed_task__/MinReturn -84.4943 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.94834 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.336e+06 +__unnamed_task__/AverageDiscountedReturn -28.0965 +__unnamed_task__/AverageReturn -52.7083 +__unnamed_task__/Iteration 197 +__unnamed_task__/MaxReturn -44.7886 +__unnamed_task__/MinReturn -63.6899 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.05351 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 09:58:17 | [maml_trainer] epoch #198 | Sampling for adapation and meta-testing... +2025-04-01 10:02:40 | [maml_trainer] epoch #198 | Finished meta-testing... +2025-04-01 10:02:40 | [maml_trainer] epoch #198 | Saving snapshot... +2025-04-01 10:03:02 | [maml_trainer] epoch #198 | Saved +2025-04-01 10:03:02 | [maml_trainer] epoch #198 | Time 244483.28 s +2025-04-01 10:03:02 | [maml_trainer] epoch #198 | EpochTime 1315.23 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.447 +Average/AverageReturn -53.2557 +Average/Iteration 198 +Average/MaxReturn -45.8119 +Average/MinReturn -75.3553 +Average/NumEpisodes 80 +Average/StdReturn 5.62493 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96698 +GaussianMLPPolicy/KLAfter 0.00732838 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000284694 +GaussianMLPPolicy/LossBefore 1.19209e-10 +GaussianMLPPolicy/dLoss 0.000284694 +Iteration 198 +MetaTest/Average/AverageDiscountedReturn -49.6482 +MetaTest/Average/AverageReturn -49.6482 +MetaTest/Average/Iteration 198 +MetaTest/Average/MaxReturn -44.8267 +MetaTest/Average/MinReturn -53.4638 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 2.57936 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.6482 +MetaTest/__unnamed_task__/AverageReturn -49.6482 +MetaTest/__unnamed_task__/Iteration 198 +MetaTest/__unnamed_task__/MaxReturn -44.8267 +MetaTest/__unnamed_task__/MinReturn -53.4638 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 2.57936 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.368e+06 +__unnamed_task__/AverageDiscountedReturn -28.447 +__unnamed_task__/AverageReturn -53.2557 +__unnamed_task__/Iteration 198 +__unnamed_task__/MaxReturn -45.8119 +__unnamed_task__/MinReturn -75.3553 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.62493 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 10:20:30 | [maml_trainer] epoch #199 | Sampling for adapation and meta-testing... +2025-04-01 10:25:10 | [maml_trainer] epoch #199 | Finished meta-testing... +2025-04-01 10:25:10 | [maml_trainer] epoch #199 | Saving snapshot... +2025-04-01 10:25:33 | [maml_trainer] epoch #199 | Saved +2025-04-01 10:25:33 | [maml_trainer] epoch #199 | Time 245835.00 s +2025-04-01 10:25:33 | [maml_trainer] epoch #199 | EpochTime 1351.71 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.8927 +Average/AverageReturn -52.0013 +Average/Iteration 199 +Average/MaxReturn -45.0658 +Average/MinReturn -63.2566 +Average/NumEpisodes 80 +Average/StdReturn 3.9676 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96501 +GaussianMLPPolicy/KLAfter 0.00795169 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000229979 +GaussianMLPPolicy/LossBefore 3.03984e-09 +GaussianMLPPolicy/dLoss -0.000229976 +Iteration 199 +MetaTest/Average/AverageDiscountedReturn -52.3986 +MetaTest/Average/AverageReturn -52.3986 +MetaTest/Average/Iteration 199 +MetaTest/Average/MaxReturn -46.0858 +MetaTest/Average/MinReturn -61.8572 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.45659 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.3986 +MetaTest/__unnamed_task__/AverageReturn -52.3986 +MetaTest/__unnamed_task__/Iteration 199 +MetaTest/__unnamed_task__/MaxReturn -46.0858 +MetaTest/__unnamed_task__/MinReturn -61.8572 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.45659 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.4e+06 +__unnamed_task__/AverageDiscountedReturn -27.8927 +__unnamed_task__/AverageReturn -52.0013 +__unnamed_task__/Iteration 199 +__unnamed_task__/MaxReturn -45.0658 +__unnamed_task__/MinReturn -63.2566 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.9676 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 10:42:59 | [maml_trainer] epoch #200 | Sampling for adapation and meta-testing... +2025-04-01 10:47:19 | [maml_trainer] epoch #200 | Finished meta-testing... +2025-04-01 10:47:19 | [maml_trainer] epoch #200 | Saving snapshot... +2025-04-01 10:47:42 | [maml_trainer] epoch #200 | Saved +2025-04-01 10:47:42 | [maml_trainer] epoch #200 | Time 247163.47 s +2025-04-01 10:47:42 | [maml_trainer] epoch #200 | EpochTime 1328.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.6874 +Average/AverageReturn -50.9233 +Average/Iteration 200 +Average/MaxReturn -43.143 +Average/MinReturn -66.1218 +Average/NumEpisodes 80 +Average/StdReturn 3.92363 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96216 +GaussianMLPPolicy/KLAfter 0.00852344 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.93358e-05 +GaussianMLPPolicy/LossBefore 9.0003e-09 +GaussianMLPPolicy/dLoss -5.93268e-05 +Iteration 200 +MetaTest/Average/AverageDiscountedReturn -52.035 +MetaTest/Average/AverageReturn -52.035 +MetaTest/Average/Iteration 200 +MetaTest/Average/MaxReturn -45.5324 +MetaTest/Average/MinReturn -70.3847 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.34178 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.035 +MetaTest/__unnamed_task__/AverageReturn -52.035 +MetaTest/__unnamed_task__/Iteration 200 +MetaTest/__unnamed_task__/MaxReturn -45.5324 +MetaTest/__unnamed_task__/MinReturn -70.3847 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.34178 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.432e+06 +__unnamed_task__/AverageDiscountedReturn -27.6874 +__unnamed_task__/AverageReturn -50.9233 +__unnamed_task__/Iteration 200 +__unnamed_task__/MaxReturn -43.143 +__unnamed_task__/MinReturn -66.1218 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.92363 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 11:05:06 | [maml_trainer] epoch #201 | Sampling for adapation and meta-testing... +2025-04-01 11:09:24 | [maml_trainer] epoch #201 | Finished meta-testing... +2025-04-01 11:09:24 | [maml_trainer] epoch #201 | Saving snapshot... +2025-04-01 11:09:47 | [maml_trainer] epoch #201 | Saved +2025-04-01 11:09:47 | [maml_trainer] epoch #201 | Time 248488.51 s +2025-04-01 11:09:47 | [maml_trainer] epoch #201 | EpochTime 1325.03 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.2924 +Average/AverageReturn -50.9075 +Average/Iteration 201 +Average/MaxReturn -41.1596 +Average/MinReturn -67.6407 +Average/NumEpisodes 80 +Average/StdReturn 5.08183 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95861 +GaussianMLPPolicy/KLAfter 0.00590206 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.13205e-06 +GaussianMLPPolicy/LossBefore -1.09076e-08 +GaussianMLPPolicy/dLoss -3.14295e-06 +Iteration 201 +MetaTest/Average/AverageDiscountedReturn -50.5541 +MetaTest/Average/AverageReturn -50.5541 +MetaTest/Average/Iteration 201 +MetaTest/Average/MaxReturn -42.6253 +MetaTest/Average/MinReturn -60.3128 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.524 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.5541 +MetaTest/__unnamed_task__/AverageReturn -50.5541 +MetaTest/__unnamed_task__/Iteration 201 +MetaTest/__unnamed_task__/MaxReturn -42.6253 +MetaTest/__unnamed_task__/MinReturn -60.3128 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.524 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.464e+06 +__unnamed_task__/AverageDiscountedReturn -27.2924 +__unnamed_task__/AverageReturn -50.9075 +__unnamed_task__/Iteration 201 +__unnamed_task__/MaxReturn -41.1596 +__unnamed_task__/MinReturn -67.6407 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.08183 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 11:26:48 | [maml_trainer] epoch #202 | Sampling for adapation and meta-testing... +2025-04-01 11:31:02 | [maml_trainer] epoch #202 | Finished meta-testing... +2025-04-01 11:31:02 | [maml_trainer] epoch #202 | Saving snapshot... +2025-04-01 11:31:23 | [maml_trainer] epoch #202 | Saved +2025-04-01 11:31:23 | [maml_trainer] epoch #202 | Time 249784.75 s +2025-04-01 11:31:23 | [maml_trainer] epoch #202 | EpochTime 1296.24 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.5168 +Average/AverageReturn -50.9692 +Average/Iteration 202 +Average/MaxReturn -42.027 +Average/MinReturn -78.8863 +Average/NumEpisodes 80 +Average/StdReturn 6.26867 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95663 +GaussianMLPPolicy/KLAfter 0.00555279 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000122701 +GaussianMLPPolicy/LossBefore 2.98023e-10 +GaussianMLPPolicy/dLoss -0.000122701 +Iteration 202 +MetaTest/Average/AverageDiscountedReturn -54.0254 +MetaTest/Average/AverageReturn -54.0254 +MetaTest/Average/Iteration 202 +MetaTest/Average/MaxReturn -44.8574 +MetaTest/Average/MinReturn -67.6277 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.24414 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.0254 +MetaTest/__unnamed_task__/AverageReturn -54.0254 +MetaTest/__unnamed_task__/Iteration 202 +MetaTest/__unnamed_task__/MaxReturn -44.8574 +MetaTest/__unnamed_task__/MinReturn -67.6277 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.24414 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.496e+06 +__unnamed_task__/AverageDiscountedReturn -27.5168 +__unnamed_task__/AverageReturn -50.9692 +__unnamed_task__/Iteration 202 +__unnamed_task__/MaxReturn -42.027 +__unnamed_task__/MinReturn -78.8863 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.26867 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 11:48:24 | [maml_trainer] epoch #203 | Sampling for adapation and meta-testing... +2025-04-01 11:52:41 | [maml_trainer] epoch #203 | Finished meta-testing... +2025-04-01 11:52:41 | [maml_trainer] epoch #203 | Saving snapshot... +2025-04-01 11:53:03 | [maml_trainer] epoch #203 | Saved +2025-04-01 11:53:03 | [maml_trainer] epoch #203 | Time 251084.85 s +2025-04-01 11:53:03 | [maml_trainer] epoch #203 | EpochTime 1300.10 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.9794 +Average/AverageReturn -50.1729 +Average/Iteration 203 +Average/MaxReturn -40.7453 +Average/MinReturn -75.8694 +Average/NumEpisodes 80 +Average/StdReturn 6.02234 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95489 +GaussianMLPPolicy/KLAfter 0.0054653 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.25526e-05 +GaussianMLPPolicy/LossBefore -7.27177e-09 +GaussianMLPPolicy/dLoss 1.25453e-05 +Iteration 203 +MetaTest/Average/AverageDiscountedReturn -51.8975 +MetaTest/Average/AverageReturn -51.8975 +MetaTest/Average/Iteration 203 +MetaTest/Average/MaxReturn -44.3124 +MetaTest/Average/MinReturn -68.7865 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.20072 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.8975 +MetaTest/__unnamed_task__/AverageReturn -51.8975 +MetaTest/__unnamed_task__/Iteration 203 +MetaTest/__unnamed_task__/MaxReturn -44.3124 +MetaTest/__unnamed_task__/MinReturn -68.7865 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.20072 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.528e+06 +__unnamed_task__/AverageDiscountedReturn -26.9794 +__unnamed_task__/AverageReturn -50.1729 +__unnamed_task__/Iteration 203 +__unnamed_task__/MaxReturn -40.7453 +__unnamed_task__/MinReturn -75.8694 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.02234 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 12:09:58 | [maml_trainer] epoch #204 | Sampling for adapation and meta-testing... +2025-04-01 12:14:10 | [maml_trainer] epoch #204 | Finished meta-testing... +2025-04-01 12:14:10 | [maml_trainer] epoch #204 | Saving snapshot... +2025-04-01 12:14:32 | [maml_trainer] epoch #204 | Saved +2025-04-01 12:14:32 | [maml_trainer] epoch #204 | Time 252373.62 s +2025-04-01 12:14:32 | [maml_trainer] epoch #204 | EpochTime 1288.76 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.0977 +Average/AverageReturn -53.2232 +Average/Iteration 204 +Average/MaxReturn -41.5307 +Average/MinReturn -77.2347 +Average/NumEpisodes 80 +Average/StdReturn 8.23514 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95331 +GaussianMLPPolicy/KLAfter 0.00328158 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.08407e-05 +GaussianMLPPolicy/LossBefore -3.03984e-09 +GaussianMLPPolicy/dLoss -8.08437e-05 +Iteration 204 +MetaTest/Average/AverageDiscountedReturn -57.3313 +MetaTest/Average/AverageReturn -57.3313 +MetaTest/Average/Iteration 204 +MetaTest/Average/MaxReturn -43.6029 +MetaTest/Average/MinReturn -92.76 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.4136 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.3313 +MetaTest/__unnamed_task__/AverageReturn -57.3313 +MetaTest/__unnamed_task__/Iteration 204 +MetaTest/__unnamed_task__/MaxReturn -43.6029 +MetaTest/__unnamed_task__/MinReturn -92.76 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.4136 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.56e+06 +__unnamed_task__/AverageDiscountedReturn -28.0977 +__unnamed_task__/AverageReturn -53.2232 +__unnamed_task__/Iteration 204 +__unnamed_task__/MaxReturn -41.5307 +__unnamed_task__/MinReturn -77.2347 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.23514 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 12:31:20 | [maml_trainer] epoch #205 | Sampling for adapation and meta-testing... +2025-04-01 12:35:34 | [maml_trainer] epoch #205 | Finished meta-testing... +2025-04-01 12:35:34 | [maml_trainer] epoch #205 | Saving snapshot... +2025-04-01 12:35:56 | [maml_trainer] epoch #205 | Saved +2025-04-01 12:35:56 | [maml_trainer] epoch #205 | Time 253657.69 s +2025-04-01 12:35:56 | [maml_trainer] epoch #205 | EpochTime 1284.07 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.519 +Average/AverageReturn -56.8674 +Average/Iteration 205 +Average/MaxReturn -40.0521 +Average/MinReturn -142.966 +Average/NumEpisodes 80 +Average/StdReturn 14.15 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95154 +GaussianMLPPolicy/KLAfter 0.00174658 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.71055e-05 +GaussianMLPPolicy/LossBefore -9.83477e-09 +GaussianMLPPolicy/dLoss -5.71153e-05 +Iteration 205 +MetaTest/Average/AverageDiscountedReturn -50.8419 +MetaTest/Average/AverageReturn -50.8419 +MetaTest/Average/Iteration 205 +MetaTest/Average/MaxReturn -41.1061 +MetaTest/Average/MinReturn -65.9486 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.49085 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.8419 +MetaTest/__unnamed_task__/AverageReturn -50.8419 +MetaTest/__unnamed_task__/Iteration 205 +MetaTest/__unnamed_task__/MaxReturn -41.1061 +MetaTest/__unnamed_task__/MinReturn -65.9486 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.49085 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.592e+06 +__unnamed_task__/AverageDiscountedReturn -28.519 +__unnamed_task__/AverageReturn -56.8674 +__unnamed_task__/Iteration 205 +__unnamed_task__/MaxReturn -40.0521 +__unnamed_task__/MinReturn -142.966 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.15 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 12:52:13 | [maml_trainer] epoch #206 | Sampling for adapation and meta-testing... +2025-04-01 12:56:13 | [maml_trainer] epoch #206 | Finished meta-testing... +2025-04-01 12:56:13 | [maml_trainer] epoch #206 | Saving snapshot... +2025-04-01 12:56:33 | [maml_trainer] epoch #206 | Saved +2025-04-01 12:56:33 | [maml_trainer] epoch #206 | Time 254894.64 s +2025-04-01 12:56:33 | [maml_trainer] epoch #206 | EpochTime 1236.95 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.3734 +Average/AverageReturn -55.2697 +Average/Iteration 206 +Average/MaxReturn -38.7419 +Average/MinReturn -130.48 +Average/NumEpisodes 80 +Average/StdReturn 14.444 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95014 +GaussianMLPPolicy/KLAfter 0.000958298 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.91595e-05 +GaussianMLPPolicy/LossBefore -4.58956e-09 +GaussianMLPPolicy/dLoss -1.91641e-05 +Iteration 206 +MetaTest/Average/AverageDiscountedReturn -52.2461 +MetaTest/Average/AverageReturn -52.2461 +MetaTest/Average/Iteration 206 +MetaTest/Average/MaxReturn -44.3045 +MetaTest/Average/MinReturn -74.1699 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.08996 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.2461 +MetaTest/__unnamed_task__/AverageReturn -52.2461 +MetaTest/__unnamed_task__/Iteration 206 +MetaTest/__unnamed_task__/MaxReturn -44.3045 +MetaTest/__unnamed_task__/MinReturn -74.1699 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.08996 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.624e+06 +__unnamed_task__/AverageDiscountedReturn -28.3734 +__unnamed_task__/AverageReturn -55.2697 +__unnamed_task__/Iteration 206 +__unnamed_task__/MaxReturn -38.7419 +__unnamed_task__/MinReturn -130.48 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.444 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 13:12:24 | [maml_trainer] epoch #207 | Sampling for adapation and meta-testing... +2025-04-01 13:16:28 | [maml_trainer] epoch #207 | Finished meta-testing... +2025-04-01 13:16:28 | [maml_trainer] epoch #207 | Saving snapshot... +2025-04-01 13:16:49 | [maml_trainer] epoch #207 | Saved +2025-04-01 13:16:49 | [maml_trainer] epoch #207 | Time 256110.34 s +2025-04-01 13:16:49 | [maml_trainer] epoch #207 | EpochTime 1215.69 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.4646 +Average/AverageReturn -59.7259 +Average/Iteration 207 +Average/MaxReturn -41.1065 +Average/MinReturn -173.891 +Average/NumEpisodes 80 +Average/StdReturn 22.033 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94837 +GaussianMLPPolicy/KLAfter 0.000988037 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.83666e-06 +GaussianMLPPolicy/LossBefore 9.23872e-10 +GaussianMLPPolicy/dLoss 4.83759e-06 +Iteration 207 +MetaTest/Average/AverageDiscountedReturn -54.5092 +MetaTest/Average/AverageReturn -54.5092 +MetaTest/Average/Iteration 207 +MetaTest/Average/MaxReturn -44.4124 +MetaTest/Average/MinReturn -87.6552 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.9866 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.5092 +MetaTest/__unnamed_task__/AverageReturn -54.5092 +MetaTest/__unnamed_task__/Iteration 207 +MetaTest/__unnamed_task__/MaxReturn -44.4124 +MetaTest/__unnamed_task__/MinReturn -87.6552 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.9866 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.656e+06 +__unnamed_task__/AverageDiscountedReturn -29.4646 +__unnamed_task__/AverageReturn -59.7259 +__unnamed_task__/Iteration 207 +__unnamed_task__/MaxReturn -41.1065 +__unnamed_task__/MinReturn -173.891 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.033 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 13:32:58 | [maml_trainer] epoch #208 | Sampling for adapation and meta-testing... +2025-04-01 13:36:54 | [maml_trainer] epoch #208 | Finished meta-testing... +2025-04-01 13:36:54 | [maml_trainer] epoch #208 | Saving snapshot... +2025-04-01 13:37:15 | [maml_trainer] epoch #208 | Saved +2025-04-01 13:37:15 | [maml_trainer] epoch #208 | Time 257336.86 s +2025-04-01 13:37:15 | [maml_trainer] epoch #208 | EpochTime 1226.52 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.7629 +Average/AverageReturn -56.4568 +Average/Iteration 208 +Average/MaxReturn -43.3448 +Average/MinReturn -141.178 +Average/NumEpisodes 80 +Average/StdReturn 14.1166 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94817 +GaussianMLPPolicy/KLAfter 0.00163609 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.49133e-06 +GaussianMLPPolicy/LossBefore -2.05636e-09 +GaussianMLPPolicy/dLoss -6.49339e-06 +Iteration 208 +MetaTest/Average/AverageDiscountedReturn -56.3629 +MetaTest/Average/AverageReturn -56.3629 +MetaTest/Average/Iteration 208 +MetaTest/Average/MaxReturn -45.6215 +MetaTest/Average/MinReturn -73.9142 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.36037 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.3629 +MetaTest/__unnamed_task__/AverageReturn -56.3629 +MetaTest/__unnamed_task__/Iteration 208 +MetaTest/__unnamed_task__/MaxReturn -45.6215 +MetaTest/__unnamed_task__/MinReturn -73.9142 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.36037 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.688e+06 +__unnamed_task__/AverageDiscountedReturn -28.7629 +__unnamed_task__/AverageReturn -56.4568 +__unnamed_task__/Iteration 208 +__unnamed_task__/MaxReturn -43.3448 +__unnamed_task__/MinReturn -141.178 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.1166 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 13:52:58 | [maml_trainer] epoch #209 | Sampling for adapation and meta-testing... +2025-04-01 13:56:56 | [maml_trainer] epoch #209 | Finished meta-testing... +2025-04-01 13:56:56 | [maml_trainer] epoch #209 | Saving snapshot... +2025-04-01 13:57:18 | [maml_trainer] epoch #209 | Saved +2025-04-01 13:57:18 | [maml_trainer] epoch #209 | Time 258539.23 s +2025-04-01 13:57:18 | [maml_trainer] epoch #209 | EpochTime 1202.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.1947 +Average/AverageReturn -59.2112 +Average/Iteration 209 +Average/MaxReturn -42.304 +Average/MinReturn -113.882 +Average/NumEpisodes 80 +Average/StdReturn 14.6535 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94892 +GaussianMLPPolicy/KLAfter 0.00134228 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000107756 +GaussianMLPPolicy/LossBefore -2.65241e-09 +GaussianMLPPolicy/dLoss -0.000107758 +Iteration 209 +MetaTest/Average/AverageDiscountedReturn -54.9498 +MetaTest/Average/AverageReturn -54.9498 +MetaTest/Average/Iteration 209 +MetaTest/Average/MaxReturn -43.8734 +MetaTest/Average/MinReturn -81.699 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.654 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.9498 +MetaTest/__unnamed_task__/AverageReturn -54.9498 +MetaTest/__unnamed_task__/Iteration 209 +MetaTest/__unnamed_task__/MaxReturn -43.8734 +MetaTest/__unnamed_task__/MinReturn -81.699 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.654 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.72e+06 +__unnamed_task__/AverageDiscountedReturn -29.1947 +__unnamed_task__/AverageReturn -59.2112 +__unnamed_task__/Iteration 209 +__unnamed_task__/MaxReturn -42.304 +__unnamed_task__/MinReturn -113.882 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.6535 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 14:13:22 | [maml_trainer] epoch #210 | Sampling for adapation and meta-testing... +2025-04-01 14:17:25 | [maml_trainer] epoch #210 | Finished meta-testing... +2025-04-01 14:17:25 | [maml_trainer] epoch #210 | Saving snapshot... +2025-04-01 14:17:48 | [maml_trainer] epoch #210 | Saved +2025-04-01 14:17:48 | [maml_trainer] epoch #210 | Time 259769.68 s +2025-04-01 14:17:48 | [maml_trainer] epoch #210 | EpochTime 1230.45 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.446 +Average/AverageReturn -55.8366 +Average/Iteration 210 +Average/MaxReturn -40.4533 +Average/MinReturn -140.494 +Average/NumEpisodes 80 +Average/StdReturn 13.8913 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95042 +GaussianMLPPolicy/KLAfter 0.00113918 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.26316e-05 +GaussianMLPPolicy/LossBefore -5.96048e-11 +GaussianMLPPolicy/dLoss 3.26315e-05 +Iteration 210 +MetaTest/Average/AverageDiscountedReturn -62.4213 +MetaTest/Average/AverageReturn -62.4213 +MetaTest/Average/Iteration 210 +MetaTest/Average/MaxReturn -41.7178 +MetaTest/Average/MinReturn -121.1 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.7101 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.4213 +MetaTest/__unnamed_task__/AverageReturn -62.4213 +MetaTest/__unnamed_task__/Iteration 210 +MetaTest/__unnamed_task__/MaxReturn -41.7178 +MetaTest/__unnamed_task__/MinReturn -121.1 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.7101 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.752e+06 +__unnamed_task__/AverageDiscountedReturn -28.446 +__unnamed_task__/AverageReturn -55.8366 +__unnamed_task__/Iteration 210 +__unnamed_task__/MaxReturn -40.4533 +__unnamed_task__/MinReturn -140.494 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8913 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 14:34:39 | [maml_trainer] epoch #211 | Sampling for adapation and meta-testing... +2025-04-01 14:38:51 | [maml_trainer] epoch #211 | Finished meta-testing... +2025-04-01 14:38:51 | [maml_trainer] epoch #211 | Saving snapshot... +2025-04-01 14:39:11 | [maml_trainer] epoch #211 | Saved +2025-04-01 14:39:11 | [maml_trainer] epoch #211 | Time 261052.32 s +2025-04-01 14:39:11 | [maml_trainer] epoch #211 | EpochTime 1282.64 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.4998 +Average/AverageReturn -60.7445 +Average/Iteration 211 +Average/MaxReturn -39.7907 +Average/MinReturn -165.668 +Average/NumEpisodes 80 +Average/StdReturn 21.0377 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95227 +GaussianMLPPolicy/KLAfter 0.00116814 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000123993 +GaussianMLPPolicy/LossBefore 5.57303e-09 +GaussianMLPPolicy/dLoss 0.000123999 +Iteration 211 +MetaTest/Average/AverageDiscountedReturn -62.0213 +MetaTest/Average/AverageReturn -62.0213 +MetaTest/Average/Iteration 211 +MetaTest/Average/MaxReturn -41.909 +MetaTest/Average/MinReturn -118.401 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.46 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.0213 +MetaTest/__unnamed_task__/AverageReturn -62.0213 +MetaTest/__unnamed_task__/Iteration 211 +MetaTest/__unnamed_task__/MaxReturn -41.909 +MetaTest/__unnamed_task__/MinReturn -118.401 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.46 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.784e+06 +__unnamed_task__/AverageDiscountedReturn -29.4998 +__unnamed_task__/AverageReturn -60.7445 +__unnamed_task__/Iteration 211 +__unnamed_task__/MaxReturn -39.7907 +__unnamed_task__/MinReturn -165.668 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.0377 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 14:55:42 | [maml_trainer] epoch #212 | Sampling for adapation and meta-testing... +2025-04-01 14:59:43 | [maml_trainer] epoch #212 | Finished meta-testing... +2025-04-01 14:59:43 | [maml_trainer] epoch #212 | Saving snapshot... +2025-04-01 15:00:04 | [maml_trainer] epoch #212 | Saved +2025-04-01 15:00:04 | [maml_trainer] epoch #212 | Time 262305.53 s +2025-04-01 15:00:04 | [maml_trainer] epoch #212 | EpochTime 1253.21 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.3874 +Average/AverageReturn -60.6342 +Average/Iteration 212 +Average/MaxReturn -41.223 +Average/MinReturn -146.282 +Average/NumEpisodes 80 +Average/StdReturn 19.7113 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95375 +GaussianMLPPolicy/KLAfter 0.00111899 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.89638e-05 +GaussianMLPPolicy/LossBefore 5.36442e-09 +GaussianMLPPolicy/dLoss -1.89584e-05 +Iteration 212 +MetaTest/Average/AverageDiscountedReturn -56.9791 +MetaTest/Average/AverageReturn -56.9791 +MetaTest/Average/Iteration 212 +MetaTest/Average/MaxReturn -43.6334 +MetaTest/Average/MinReturn -89.4573 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.8238 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.9791 +MetaTest/__unnamed_task__/AverageReturn -56.9791 +MetaTest/__unnamed_task__/Iteration 212 +MetaTest/__unnamed_task__/MaxReturn -43.6334 +MetaTest/__unnamed_task__/MinReturn -89.4573 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.8238 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.816e+06 +__unnamed_task__/AverageDiscountedReturn -29.3874 +__unnamed_task__/AverageReturn -60.6342 +__unnamed_task__/Iteration 212 +__unnamed_task__/MaxReturn -41.223 +__unnamed_task__/MinReturn -146.282 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.7113 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 15:16:14 | [maml_trainer] epoch #213 | Sampling for adapation and meta-testing... +2025-04-01 15:20:25 | [maml_trainer] epoch #213 | Finished meta-testing... +2025-04-01 15:20:25 | [maml_trainer] epoch #213 | Saving snapshot... +2025-04-01 15:20:47 | [maml_trainer] epoch #213 | Saved +2025-04-01 15:20:47 | [maml_trainer] epoch #213 | Time 263548.93 s +2025-04-01 15:20:47 | [maml_trainer] epoch #213 | EpochTime 1243.40 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.5246 +Average/AverageReturn -60.226 +Average/Iteration 213 +Average/MaxReturn -36.3934 +Average/MinReturn -144.557 +Average/NumEpisodes 80 +Average/StdReturn 17.9479 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95535 +GaussianMLPPolicy/KLAfter 0.00134356 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.69142e-05 +GaussianMLPPolicy/LossBefore 4.41074e-09 +GaussianMLPPolicy/dLoss 7.69186e-05 +Iteration 213 +MetaTest/Average/AverageDiscountedReturn -58.1109 +MetaTest/Average/AverageReturn -58.1109 +MetaTest/Average/Iteration 213 +MetaTest/Average/MaxReturn -42.2011 +MetaTest/Average/MinReturn -84.244 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.9772 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.1109 +MetaTest/__unnamed_task__/AverageReturn -58.1109 +MetaTest/__unnamed_task__/Iteration 213 +MetaTest/__unnamed_task__/MaxReturn -42.2011 +MetaTest/__unnamed_task__/MinReturn -84.244 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.9772 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.848e+06 +__unnamed_task__/AverageDiscountedReturn -29.5246 +__unnamed_task__/AverageReturn -60.226 +__unnamed_task__/Iteration 213 +__unnamed_task__/MaxReturn -36.3934 +__unnamed_task__/MinReturn -144.557 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9479 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 15:37:39 | [maml_trainer] epoch #214 | Sampling for adapation and meta-testing... +2025-04-01 15:41:52 | [maml_trainer] epoch #214 | Finished meta-testing... +2025-04-01 15:41:52 | [maml_trainer] epoch #214 | Saving snapshot... +2025-04-01 15:42:15 | [maml_trainer] epoch #214 | Saved +2025-04-01 15:42:15 | [maml_trainer] epoch #214 | Time 264836.25 s +2025-04-01 15:42:15 | [maml_trainer] epoch #214 | EpochTime 1287.32 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.5942 +Average/AverageReturn -57.3412 +Average/Iteration 214 +Average/MaxReturn -40.8056 +Average/MinReturn -146.3 +Average/NumEpisodes 80 +Average/StdReturn 15.745 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95543 +GaussianMLPPolicy/KLAfter 0.00151693 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.83251e-05 +GaussianMLPPolicy/LossBefore -1.78814e-10 +GaussianMLPPolicy/dLoss 2.83249e-05 +Iteration 214 +MetaTest/Average/AverageDiscountedReturn -64.6312 +MetaTest/Average/AverageReturn -64.6312 +MetaTest/Average/Iteration 214 +MetaTest/Average/MaxReturn -43.5461 +MetaTest/Average/MinReturn -128.462 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.9722 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.6312 +MetaTest/__unnamed_task__/AverageReturn -64.6312 +MetaTest/__unnamed_task__/Iteration 214 +MetaTest/__unnamed_task__/MaxReturn -43.5461 +MetaTest/__unnamed_task__/MinReturn -128.462 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.9722 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.88e+06 +__unnamed_task__/AverageDiscountedReturn -28.5942 +__unnamed_task__/AverageReturn -57.3412 +__unnamed_task__/Iteration 214 +__unnamed_task__/MaxReturn -40.8056 +__unnamed_task__/MinReturn -146.3 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.745 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 15:59:13 | [maml_trainer] epoch #215 | Sampling for adapation and meta-testing... +2025-04-01 16:03:30 | [maml_trainer] epoch #215 | Finished meta-testing... +2025-04-01 16:03:30 | [maml_trainer] epoch #215 | Saving snapshot... +2025-04-01 16:03:51 | [maml_trainer] epoch #215 | Saved +2025-04-01 16:03:51 | [maml_trainer] epoch #215 | Time 266132.94 s +2025-04-01 16:03:51 | [maml_trainer] epoch #215 | EpochTime 1296.68 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.3474 +Average/AverageReturn -59.7827 +Average/Iteration 215 +Average/MaxReturn -40.3279 +Average/MinReturn -163.248 +Average/NumEpisodes 80 +Average/StdReturn 17.419 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95493 +GaussianMLPPolicy/KLAfter 0.00142055 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.01192e-05 +GaussianMLPPolicy/LossBefore 8.76188e-09 +GaussianMLPPolicy/dLoss 3.01279e-05 +Iteration 215 +MetaTest/Average/AverageDiscountedReturn -60.7866 +MetaTest/Average/AverageReturn -60.7866 +MetaTest/Average/Iteration 215 +MetaTest/Average/MaxReturn -41.9661 +MetaTest/Average/MinReturn -169.13 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.1563 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.7866 +MetaTest/__unnamed_task__/AverageReturn -60.7866 +MetaTest/__unnamed_task__/Iteration 215 +MetaTest/__unnamed_task__/MaxReturn -41.9661 +MetaTest/__unnamed_task__/MinReturn -169.13 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.1563 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.912e+06 +__unnamed_task__/AverageDiscountedReturn -29.3474 +__unnamed_task__/AverageReturn -59.7827 +__unnamed_task__/Iteration 215 +__unnamed_task__/MaxReturn -40.3279 +__unnamed_task__/MinReturn -163.248 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.419 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 16:20:46 | [maml_trainer] epoch #216 | Sampling for adapation and meta-testing... +2025-04-01 16:24:56 | [maml_trainer] epoch #216 | Finished meta-testing... +2025-04-01 16:24:56 | [maml_trainer] epoch #216 | Saving snapshot... +2025-04-01 16:25:19 | [maml_trainer] epoch #216 | Saved +2025-04-01 16:25:19 | [maml_trainer] epoch #216 | Time 267420.06 s +2025-04-01 16:25:19 | [maml_trainer] epoch #216 | EpochTime 1287.11 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.8995 +Average/AverageReturn -60.6695 +Average/Iteration 216 +Average/MaxReturn -39.9387 +Average/MinReturn -126.282 +Average/NumEpisodes 80 +Average/StdReturn 16.6059 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95391 +GaussianMLPPolicy/KLAfter 0.00134332 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.30621e-05 +GaussianMLPPolicy/LossBefore -1.10269e-09 +GaussianMLPPolicy/dLoss -1.30633e-05 +Iteration 216 +MetaTest/Average/AverageDiscountedReturn -54.6595 +MetaTest/Average/AverageReturn -54.6595 +MetaTest/Average/Iteration 216 +MetaTest/Average/MaxReturn -44.8731 +MetaTest/Average/MinReturn -80.0829 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.76017 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.6595 +MetaTest/__unnamed_task__/AverageReturn -54.6595 +MetaTest/__unnamed_task__/Iteration 216 +MetaTest/__unnamed_task__/MaxReturn -44.8731 +MetaTest/__unnamed_task__/MinReturn -80.0829 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.76017 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.944e+06 +__unnamed_task__/AverageDiscountedReturn -29.8995 +__unnamed_task__/AverageReturn -60.6695 +__unnamed_task__/Iteration 216 +__unnamed_task__/MaxReturn -39.9387 +__unnamed_task__/MinReturn -126.282 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.6059 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 16:42:22 | [maml_trainer] epoch #217 | Sampling for adapation and meta-testing... +2025-04-01 16:46:34 | [maml_trainer] epoch #217 | Finished meta-testing... +2025-04-01 16:46:34 | [maml_trainer] epoch #217 | Saving snapshot... +2025-04-01 16:46:58 | [maml_trainer] epoch #217 | Saved +2025-04-01 16:46:58 | [maml_trainer] epoch #217 | Time 268719.14 s +2025-04-01 16:46:58 | [maml_trainer] epoch #217 | EpochTime 1299.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.2632 +Average/AverageReturn -59.8967 +Average/Iteration 217 +Average/MaxReturn -40.3991 +Average/MinReturn -145.691 +Average/NumEpisodes 80 +Average/StdReturn 16.3875 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95338 +GaussianMLPPolicy/KLAfter 0.00125536 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.00010306 +GaussianMLPPolicy/LossBefore 1.63913e-09 +GaussianMLPPolicy/dLoss 0.000103061 +Iteration 217 +MetaTest/Average/AverageDiscountedReturn -58.9484 +MetaTest/Average/AverageReturn -58.9484 +MetaTest/Average/Iteration 217 +MetaTest/Average/MaxReturn -39.3426 +MetaTest/Average/MinReturn -110.595 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.7639 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -58.9484 +MetaTest/__unnamed_task__/AverageReturn -58.9484 +MetaTest/__unnamed_task__/Iteration 217 +MetaTest/__unnamed_task__/MaxReturn -39.3426 +MetaTest/__unnamed_task__/MinReturn -110.595 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.7639 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.976e+06 +__unnamed_task__/AverageDiscountedReturn -29.2632 +__unnamed_task__/AverageReturn -59.8967 +__unnamed_task__/Iteration 217 +__unnamed_task__/MaxReturn -40.3991 +__unnamed_task__/MinReturn -145.691 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.3875 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 17:03:06 | [maml_trainer] epoch #218 | Sampling for adapation and meta-testing... +2025-04-01 17:07:02 | [maml_trainer] epoch #218 | Finished meta-testing... +2025-04-01 17:07:02 | [maml_trainer] epoch #218 | Saving snapshot... +2025-04-01 17:07:24 | [maml_trainer] epoch #218 | Saved +2025-04-01 17:07:24 | [maml_trainer] epoch #218 | Time 269945.21 s +2025-04-01 17:07:24 | [maml_trainer] epoch #218 | EpochTime 1226.06 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.5131 +Average/AverageReturn -59.7717 +Average/Iteration 218 +Average/MaxReturn -39.1226 +Average/MinReturn -169.532 +Average/NumEpisodes 80 +Average/StdReturn 20.0767 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95336 +GaussianMLPPolicy/KLAfter 0.00151707 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.32023e-05 +GaussianMLPPolicy/LossBefore 3.06964e-09 +GaussianMLPPolicy/dLoss -1.31992e-05 +Iteration 218 +MetaTest/Average/AverageDiscountedReturn -61.0529 +MetaTest/Average/AverageReturn -61.0529 +MetaTest/Average/Iteration 218 +MetaTest/Average/MaxReturn -40.8253 +MetaTest/Average/MinReturn -91.9374 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.8655 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -61.0529 +MetaTest/__unnamed_task__/AverageReturn -61.0529 +MetaTest/__unnamed_task__/Iteration 218 +MetaTest/__unnamed_task__/MaxReturn -40.8253 +MetaTest/__unnamed_task__/MinReturn -91.9374 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.8655 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.008e+06 +__unnamed_task__/AverageDiscountedReturn -29.5131 +__unnamed_task__/AverageReturn -59.7717 +__unnamed_task__/Iteration 218 +__unnamed_task__/MaxReturn -39.1226 +__unnamed_task__/MinReturn -169.532 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.0767 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 17:23:19 | [maml_trainer] epoch #219 | Sampling for adapation and meta-testing... +2025-04-01 17:27:20 | [maml_trainer] epoch #219 | Finished meta-testing... +2025-04-01 17:27:20 | [maml_trainer] epoch #219 | Saving snapshot... +2025-04-01 17:27:41 | [maml_trainer] epoch #219 | Saved +2025-04-01 17:27:41 | [maml_trainer] epoch #219 | Time 271162.50 s +2025-04-01 17:27:41 | [maml_trainer] epoch #219 | EpochTime 1217.29 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.0074 +Average/AverageReturn -58.7238 +Average/Iteration 219 +Average/MaxReturn -41.001 +Average/MinReturn -126.648 +Average/NumEpisodes 80 +Average/StdReturn 14.3082 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95326 +GaussianMLPPolicy/KLAfter 0.00130345 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.37594e-05 +GaussianMLPPolicy/LossBefore -3.18885e-09 +GaussianMLPPolicy/dLoss -7.37626e-05 +Iteration 219 +MetaTest/Average/AverageDiscountedReturn -60.7882 +MetaTest/Average/AverageReturn -60.7882 +MetaTest/Average/Iteration 219 +MetaTest/Average/MaxReturn -45.3926 +MetaTest/Average/MinReturn -103.251 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.4249 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.7882 +MetaTest/__unnamed_task__/AverageReturn -60.7882 +MetaTest/__unnamed_task__/Iteration 219 +MetaTest/__unnamed_task__/MaxReturn -45.3926 +MetaTest/__unnamed_task__/MinReturn -103.251 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.4249 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.04e+06 +__unnamed_task__/AverageDiscountedReturn -29.0074 +__unnamed_task__/AverageReturn -58.7238 +__unnamed_task__/Iteration 219 +__unnamed_task__/MaxReturn -41.001 +__unnamed_task__/MinReturn -126.648 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.3082 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 17:43:45 | [maml_trainer] epoch #220 | Sampling for adapation and meta-testing... +2025-04-01 17:47:49 | [maml_trainer] epoch #220 | Finished meta-testing... +2025-04-01 17:47:49 | [maml_trainer] epoch #220 | Saving snapshot... +2025-04-01 17:48:09 | [maml_trainer] epoch #220 | Saved +2025-04-01 17:48:09 | [maml_trainer] epoch #220 | Time 272390.68 s +2025-04-01 17:48:09 | [maml_trainer] epoch #220 | EpochTime 1228.18 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.9283 +Average/AverageReturn -62.5462 +Average/Iteration 220 +Average/MaxReturn -41.0618 +Average/MinReturn -181.433 +Average/NumEpisodes 80 +Average/StdReturn 22.1879 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95356 +GaussianMLPPolicy/KLAfter 0.00172858 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.42247e-05 +GaussianMLPPolicy/LossBefore 6.75023e-09 +GaussianMLPPolicy/dLoss 4.42315e-05 +Iteration 220 +MetaTest/Average/AverageDiscountedReturn -57.0691 +MetaTest/Average/AverageReturn -57.0691 +MetaTest/Average/Iteration 220 +MetaTest/Average/MaxReturn -43.3695 +MetaTest/Average/MinReturn -90.204 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.6892 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.0691 +MetaTest/__unnamed_task__/AverageReturn -57.0691 +MetaTest/__unnamed_task__/Iteration 220 +MetaTest/__unnamed_task__/MaxReturn -43.3695 +MetaTest/__unnamed_task__/MinReturn -90.204 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.6892 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.072e+06 +__unnamed_task__/AverageDiscountedReturn -29.9283 +__unnamed_task__/AverageReturn -62.5462 +__unnamed_task__/Iteration 220 +__unnamed_task__/MaxReturn -41.0618 +__unnamed_task__/MinReturn -181.433 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.1879 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 18:04:13 | [maml_trainer] epoch #221 | Sampling for adapation and meta-testing... +2025-04-01 18:08:10 | [maml_trainer] epoch #221 | Finished meta-testing... +2025-04-01 18:08:10 | [maml_trainer] epoch #221 | Saving snapshot... +2025-04-01 18:08:31 | [maml_trainer] epoch #221 | Saved +2025-04-01 18:08:31 | [maml_trainer] epoch #221 | Time 273612.53 s +2025-04-01 18:08:31 | [maml_trainer] epoch #221 | EpochTime 1221.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.7081 +Average/AverageReturn -64.6008 +Average/Iteration 221 +Average/MaxReturn -40.6212 +Average/MinReturn -143.004 +Average/NumEpisodes 80 +Average/StdReturn 17.7765 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95263 +GaussianMLPPolicy/KLAfter 0.000559091 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.61418e-05 +GaussianMLPPolicy/LossBefore 2.20537e-09 +GaussianMLPPolicy/dLoss -5.61396e-05 +Iteration 221 +MetaTest/Average/AverageDiscountedReturn -67.1405 +MetaTest/Average/AverageReturn -67.1405 +MetaTest/Average/Iteration 221 +MetaTest/Average/MaxReturn -44.7101 +MetaTest/Average/MinReturn -115.8 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.2974 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -67.1405 +MetaTest/__unnamed_task__/AverageReturn -67.1405 +MetaTest/__unnamed_task__/Iteration 221 +MetaTest/__unnamed_task__/MaxReturn -44.7101 +MetaTest/__unnamed_task__/MinReturn -115.8 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.2974 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.104e+06 +__unnamed_task__/AverageDiscountedReturn -30.7081 +__unnamed_task__/AverageReturn -64.6008 +__unnamed_task__/Iteration 221 +__unnamed_task__/MaxReturn -40.6212 +__unnamed_task__/MinReturn -143.004 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.7765 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 18:24:39 | [maml_trainer] epoch #222 | Sampling for adapation and meta-testing... +2025-04-01 18:28:37 | [maml_trainer] epoch #222 | Finished meta-testing... +2025-04-01 18:28:37 | [maml_trainer] epoch #222 | Saving snapshot... +2025-04-01 18:28:59 | [maml_trainer] epoch #222 | Saved +2025-04-01 18:28:59 | [maml_trainer] epoch #222 | Time 274840.28 s +2025-04-01 18:28:59 | [maml_trainer] epoch #222 | EpochTime 1227.75 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.1527 +Average/AverageReturn -63.553 +Average/Iteration 222 +Average/MaxReturn -38.6175 +Average/MinReturn -178.63 +Average/NumEpisodes 80 +Average/StdReturn 24.1403 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95231 +GaussianMLPPolicy/KLAfter 0.000405224 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.01754e-05 +GaussianMLPPolicy/LossBefore -6.16908e-09 +GaussianMLPPolicy/dLoss 4.01692e-05 +Iteration 222 +MetaTest/Average/AverageDiscountedReturn -64.0891 +MetaTest/Average/AverageReturn -64.0891 +MetaTest/Average/Iteration 222 +MetaTest/Average/MaxReturn -43.2202 +MetaTest/Average/MinReturn -104.848 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.5352 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.0891 +MetaTest/__unnamed_task__/AverageReturn -64.0891 +MetaTest/__unnamed_task__/Iteration 222 +MetaTest/__unnamed_task__/MaxReturn -43.2202 +MetaTest/__unnamed_task__/MinReturn -104.848 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.5352 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.136e+06 +__unnamed_task__/AverageDiscountedReturn -30.1527 +__unnamed_task__/AverageReturn -63.553 +__unnamed_task__/Iteration 222 +__unnamed_task__/MaxReturn -38.6175 +__unnamed_task__/MinReturn -178.63 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.1403 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 18:45:00 | [maml_trainer] epoch #223 | Sampling for adapation and meta-testing... +2025-04-01 18:49:00 | [maml_trainer] epoch #223 | Finished meta-testing... +2025-04-01 18:49:00 | [maml_trainer] epoch #223 | Saving snapshot... +2025-04-01 18:49:22 | [maml_trainer] epoch #223 | Saved +2025-04-01 18:49:22 | [maml_trainer] epoch #223 | Time 276063.08 s +2025-04-01 18:49:22 | [maml_trainer] epoch #223 | EpochTime 1222.79 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -30.631 +Average/AverageReturn -63.742 +Average/Iteration 223 +Average/MaxReturn -40.8908 +Average/MinReturn -137.259 +Average/NumEpisodes 80 +Average/StdReturn 18.0266 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95219 +GaussianMLPPolicy/KLAfter 0.000691688 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.83395e-05 +GaussianMLPPolicy/LossBefore -3.8147e-09 +GaussianMLPPolicy/dLoss -2.83433e-05 +Iteration 223 +MetaTest/Average/AverageDiscountedReturn -60.2212 +MetaTest/Average/AverageReturn -60.2212 +MetaTest/Average/Iteration 223 +MetaTest/Average/MaxReturn -41.4618 +MetaTest/Average/MinReturn -108.724 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.6006 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -60.2212 +MetaTest/__unnamed_task__/AverageReturn -60.2212 +MetaTest/__unnamed_task__/Iteration 223 +MetaTest/__unnamed_task__/MaxReturn -41.4618 +MetaTest/__unnamed_task__/MinReturn -108.724 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.6006 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.168e+06 +__unnamed_task__/AverageDiscountedReturn -30.631 +__unnamed_task__/AverageReturn -63.742 +__unnamed_task__/Iteration 223 +__unnamed_task__/MaxReturn -40.8908 +__unnamed_task__/MinReturn -137.259 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.0266 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 19:05:14 | [maml_trainer] epoch #224 | Sampling for adapation and meta-testing... +2025-04-01 19:09:12 | [maml_trainer] epoch #224 | Finished meta-testing... +2025-04-01 19:09:12 | [maml_trainer] epoch #224 | Saving snapshot... +2025-04-01 19:09:33 | [maml_trainer] epoch #224 | Saved +2025-04-01 19:09:33 | [maml_trainer] epoch #224 | Time 277274.71 s +2025-04-01 19:09:33 | [maml_trainer] epoch #224 | EpochTime 1211.63 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -31.0826 +Average/AverageReturn -66.3327 +Average/Iteration 224 +Average/MaxReturn -41.5343 +Average/MinReturn -152.777 +Average/NumEpisodes 80 +Average/StdReturn 20.2861 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95228 +GaussianMLPPolicy/KLAfter 0.000621854 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.66867e-05 +GaussianMLPPolicy/LossBefore -8.01682e-09 +GaussianMLPPolicy/dLoss 3.66787e-05 +Iteration 224 +MetaTest/Average/AverageDiscountedReturn -64.3237 +MetaTest/Average/AverageReturn -64.3237 +MetaTest/Average/Iteration 224 +MetaTest/Average/MaxReturn -42.9015 +MetaTest/Average/MinReturn -122.034 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.4784 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -64.3237 +MetaTest/__unnamed_task__/AverageReturn -64.3237 +MetaTest/__unnamed_task__/Iteration 224 +MetaTest/__unnamed_task__/MaxReturn -42.9015 +MetaTest/__unnamed_task__/MinReturn -122.034 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.4784 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.2e+06 +__unnamed_task__/AverageDiscountedReturn -31.0826 +__unnamed_task__/AverageReturn -66.3327 +__unnamed_task__/Iteration 224 +__unnamed_task__/MaxReturn -41.5343 +__unnamed_task__/MinReturn -152.777 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.2861 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 19:25:27 | [maml_trainer] epoch #225 | Sampling for adapation and meta-testing... +2025-04-01 19:29:30 | [maml_trainer] epoch #225 | Finished meta-testing... +2025-04-01 19:29:30 | [maml_trainer] epoch #225 | Saving snapshot... +2025-04-01 19:29:51 | [maml_trainer] epoch #225 | Saved +2025-04-01 19:29:51 | [maml_trainer] epoch #225 | Time 278492.76 s +2025-04-01 19:29:51 | [maml_trainer] epoch #225 | EpochTime 1218.05 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.7606 +Average/AverageReturn -60.4381 +Average/Iteration 225 +Average/MaxReturn -40.9843 +Average/MinReturn -101.823 +Average/NumEpisodes 80 +Average/StdReturn 13.5664 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95216 +GaussianMLPPolicy/KLAfter 0.00127431 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.22008e-05 +GaussianMLPPolicy/LossBefore -8.25524e-09 +GaussianMLPPolicy/dLoss 9.21925e-05 +Iteration 225 +MetaTest/Average/AverageDiscountedReturn -59.8797 +MetaTest/Average/AverageReturn -59.8797 +MetaTest/Average/Iteration 225 +MetaTest/Average/MaxReturn -42.5913 +MetaTest/Average/MinReturn -115.814 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.8107 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -59.8797 +MetaTest/__unnamed_task__/AverageReturn -59.8797 +MetaTest/__unnamed_task__/Iteration 225 +MetaTest/__unnamed_task__/MaxReturn -42.5913 +MetaTest/__unnamed_task__/MinReturn -115.814 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.8107 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.232e+06 +__unnamed_task__/AverageDiscountedReturn -29.7606 +__unnamed_task__/AverageReturn -60.4381 +__unnamed_task__/Iteration 225 +__unnamed_task__/MaxReturn -40.9843 +__unnamed_task__/MinReturn -101.823 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.5664 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 19:45:42 | [maml_trainer] epoch #226 | Sampling for adapation and meta-testing... +2025-04-01 19:49:39 | [maml_trainer] epoch #226 | Finished meta-testing... +2025-04-01 19:49:39 | [maml_trainer] epoch #226 | Saving snapshot... +2025-04-01 19:50:01 | [maml_trainer] epoch #226 | Saved +2025-04-01 19:50:01 | [maml_trainer] epoch #226 | Time 279702.25 s +2025-04-01 19:50:01 | [maml_trainer] epoch #226 | EpochTime 1209.48 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -29.9599 +Average/AverageReturn -61.2007 +Average/Iteration 226 +Average/MaxReturn -39.8321 +Average/MinReturn -186.076 +Average/NumEpisodes 80 +Average/StdReturn 20.0038 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95333 +GaussianMLPPolicy/KLAfter 0.00180305 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.08623e-05 +GaussianMLPPolicy/LossBefore -2.66731e-09 +GaussianMLPPolicy/dLoss 3.08596e-05 +Iteration 226 +MetaTest/Average/AverageDiscountedReturn -53.2917 +MetaTest/Average/AverageReturn -53.2917 +MetaTest/Average/Iteration 226 +MetaTest/Average/MaxReturn -41.2371 +MetaTest/Average/MinReturn -71.0958 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.95838 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.2917 +MetaTest/__unnamed_task__/AverageReturn -53.2917 +MetaTest/__unnamed_task__/Iteration 226 +MetaTest/__unnamed_task__/MaxReturn -41.2371 +MetaTest/__unnamed_task__/MinReturn -71.0958 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.95838 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.264e+06 +__unnamed_task__/AverageDiscountedReturn -29.9599 +__unnamed_task__/AverageReturn -61.2007 +__unnamed_task__/Iteration 226 +__unnamed_task__/MaxReturn -39.8321 +__unnamed_task__/MinReturn -186.076 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.0038 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 20:05:55 | [maml_trainer] epoch #227 | Sampling for adapation and meta-testing... +2025-04-01 20:09:54 | [maml_trainer] epoch #227 | Finished meta-testing... +2025-04-01 20:09:54 | [maml_trainer] epoch #227 | Saving snapshot... +2025-04-01 20:10:16 | [maml_trainer] epoch #227 | Saved +2025-04-01 20:10:16 | [maml_trainer] epoch #227 | Time 280917.37 s +2025-04-01 20:10:16 | [maml_trainer] epoch #227 | EpochTime 1215.12 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -28.9097 +Average/AverageReturn -57.4468 +Average/Iteration 227 +Average/MaxReturn -39.5265 +Average/MinReturn -145.193 +Average/NumEpisodes 80 +Average/StdReturn 15.228 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95323 +GaussianMLPPolicy/KLAfter 0.00219729 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.0556e-05 +GaussianMLPPolicy/LossBefore -3.93391e-09 +GaussianMLPPolicy/dLoss 7.05521e-05 +Iteration 227 +MetaTest/Average/AverageDiscountedReturn -54.2149 +MetaTest/Average/AverageReturn -54.2149 +MetaTest/Average/Iteration 227 +MetaTest/Average/MaxReturn -41.4429 +MetaTest/Average/MinReturn -84.0026 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.5782 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.2149 +MetaTest/__unnamed_task__/AverageReturn -54.2149 +MetaTest/__unnamed_task__/Iteration 227 +MetaTest/__unnamed_task__/MaxReturn -41.4429 +MetaTest/__unnamed_task__/MinReturn -84.0026 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.5782 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.296e+06 +__unnamed_task__/AverageDiscountedReturn -28.9097 +__unnamed_task__/AverageReturn -57.4468 +__unnamed_task__/Iteration 227 +__unnamed_task__/MaxReturn -39.5265 +__unnamed_task__/MinReturn -145.193 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.228 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-01 20:26:24 | [maml_trainer] epoch #228 | Sampling for adapation and meta-testing... +2025-04-01 20:30:23 | [maml_trainer] epoch #228 | Finished meta-testing... +2025-04-01 20:30:23 | [maml_trainer] epoch #228 | Saving snapshot... +2025-04-01 20:30:44 | [maml_trainer] epoch #228 | Saved +2025-04-01 20:30:44 | [maml_trainer] epoch #228 | Time 282145.12 s +2025-04-01 20:30:44 | [maml_trainer] epoch #228 | EpochTime 1227.75 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -29.1678 +Average/AverageReturn -56.2536 +Average/Iteration 228 +Average/MaxReturn 24.3872 +Average/MinReturn -90.9681 +Average/NumEpisodes 80 +Average/StdReturn 14.7645 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95224 +GaussianMLPPolicy/KLAfter 0.00104123 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.39441e-05 +GaussianMLPPolicy/LossBefore 7.24196e-09 +GaussianMLPPolicy/dLoss 3.39513e-05 +Iteration 228 +MetaTest/Average/AverageDiscountedReturn -45.4774 +MetaTest/Average/AverageReturn -45.4774 +MetaTest/Average/Iteration 228 +MetaTest/Average/MaxReturn 88.9481 +MetaTest/Average/MinReturn -71.4604 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 31.7677 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.4774 +MetaTest/__unnamed_task__/AverageReturn -45.4774 +MetaTest/__unnamed_task__/Iteration 228 +MetaTest/__unnamed_task__/MaxReturn 88.9481 +MetaTest/__unnamed_task__/MinReturn -71.4604 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 31.7677 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.328e+06 +__unnamed_task__/AverageDiscountedReturn -29.1678 +__unnamed_task__/AverageReturn -56.2536 +__unnamed_task__/Iteration 228 +__unnamed_task__/MaxReturn 24.3872 +__unnamed_task__/MinReturn -90.9681 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.7645 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 20:46:41 | [maml_trainer] epoch #229 | Sampling for adapation and meta-testing... +2025-04-01 20:50:48 | [maml_trainer] epoch #229 | Finished meta-testing... +2025-04-01 20:50:48 | [maml_trainer] epoch #229 | Saving snapshot... +2025-04-01 20:51:11 | [maml_trainer] epoch #229 | Saved +2025-04-01 20:51:11 | [maml_trainer] epoch #229 | Time 283372.56 s +2025-04-01 20:51:11 | [maml_trainer] epoch #229 | EpochTime 1227.43 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.5701 +Average/AverageReturn -55.3511 +Average/Iteration 229 +Average/MaxReturn -40.7992 +Average/MinReturn -83.4228 +Average/NumEpisodes 80 +Average/StdReturn 10.5604 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95088 +GaussianMLPPolicy/KLAfter 0.00254199 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000119399 +GaussianMLPPolicy/LossBefore -6.02007e-09 +GaussianMLPPolicy/dLoss 0.000119393 +Iteration 229 +MetaTest/Average/AverageDiscountedReturn -54.4706 +MetaTest/Average/AverageReturn -54.4706 +MetaTest/Average/Iteration 229 +MetaTest/Average/MaxReturn -44.4829 +MetaTest/Average/MinReturn -76.3939 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.6173 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.4706 +MetaTest/__unnamed_task__/AverageReturn -54.4706 +MetaTest/__unnamed_task__/Iteration 229 +MetaTest/__unnamed_task__/MaxReturn -44.4829 +MetaTest/__unnamed_task__/MinReturn -76.3939 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.6173 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.36e+06 +__unnamed_task__/AverageDiscountedReturn -28.5701 +__unnamed_task__/AverageReturn -55.3511 +__unnamed_task__/Iteration 229 +__unnamed_task__/MaxReturn -40.7992 +__unnamed_task__/MinReturn -83.4228 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.5604 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 21:08:02 | [maml_trainer] epoch #230 | Sampling for adapation and meta-testing... +2025-04-01 21:12:13 | [maml_trainer] epoch #230 | Finished meta-testing... +2025-04-01 21:12:13 | [maml_trainer] epoch #230 | Saving snapshot... +2025-04-01 21:12:35 | [maml_trainer] epoch #230 | Saved +2025-04-01 21:12:35 | [maml_trainer] epoch #230 | Time 284656.96 s +2025-04-01 21:12:35 | [maml_trainer] epoch #230 | EpochTime 1284.40 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.7223 +Average/AverageReturn -51.334 +Average/Iteration 230 +Average/MaxReturn -39.6765 +Average/MinReturn -74.9368 +Average/NumEpisodes 80 +Average/StdReturn 7.64695 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94931 +GaussianMLPPolicy/KLAfter 0.00260824 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.36295e-05 +GaussianMLPPolicy/LossBefore -1.01328e-09 +GaussianMLPPolicy/dLoss 5.36285e-05 +Iteration 230 +MetaTest/Average/AverageDiscountedReturn -56.1934 +MetaTest/Average/AverageReturn -56.1934 +MetaTest/Average/Iteration 230 +MetaTest/Average/MaxReturn -40.9612 +MetaTest/Average/MinReturn -80.2339 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.9156 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.1934 +MetaTest/__unnamed_task__/AverageReturn -56.1934 +MetaTest/__unnamed_task__/Iteration 230 +MetaTest/__unnamed_task__/MaxReturn -40.9612 +MetaTest/__unnamed_task__/MinReturn -80.2339 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.9156 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.392e+06 +__unnamed_task__/AverageDiscountedReturn -27.7223 +__unnamed_task__/AverageReturn -51.334 +__unnamed_task__/Iteration 230 +__unnamed_task__/MaxReturn -39.6765 +__unnamed_task__/MinReturn -74.9368 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.64695 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 21:29:28 | [maml_trainer] epoch #231 | Sampling for adapation and meta-testing... +2025-04-01 21:33:41 | [maml_trainer] epoch #231 | Finished meta-testing... +2025-04-01 21:33:41 | [maml_trainer] epoch #231 | Saving snapshot... +2025-04-01 21:34:04 | [maml_trainer] epoch #231 | Saved +2025-04-01 21:34:04 | [maml_trainer] epoch #231 | Time 285945.45 s +2025-04-01 21:34:04 | [maml_trainer] epoch #231 | EpochTime 1288.49 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -28.4765 +Average/AverageReturn -54.6097 +Average/Iteration 231 +Average/MaxReturn -41.3022 +Average/MinReturn -86.3969 +Average/NumEpisodes 80 +Average/StdReturn 10.8106 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94881 +GaussianMLPPolicy/KLAfter 0.00188031 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.2794e-05 +GaussianMLPPolicy/LossBefore -7.39098e-09 +GaussianMLPPolicy/dLoss 6.27866e-05 +Iteration 231 +MetaTest/Average/AverageDiscountedReturn -50.6968 +MetaTest/Average/AverageReturn -50.6968 +MetaTest/Average/Iteration 231 +MetaTest/Average/MaxReturn -43.0984 +MetaTest/Average/MinReturn -76.3966 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.78469 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.6968 +MetaTest/__unnamed_task__/AverageReturn -50.6968 +MetaTest/__unnamed_task__/Iteration 231 +MetaTest/__unnamed_task__/MaxReturn -43.0984 +MetaTest/__unnamed_task__/MinReturn -76.3966 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.78469 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.424e+06 +__unnamed_task__/AverageDiscountedReturn -28.4765 +__unnamed_task__/AverageReturn -54.6097 +__unnamed_task__/Iteration 231 +__unnamed_task__/MaxReturn -41.3022 +__unnamed_task__/MinReturn -86.3969 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.8106 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 21:51:08 | [maml_trainer] epoch #232 | Sampling for adapation and meta-testing... +2025-04-01 21:55:24 | [maml_trainer] epoch #232 | Finished meta-testing... +2025-04-01 21:55:24 | [maml_trainer] epoch #232 | Saving snapshot... +2025-04-01 21:55:47 | [maml_trainer] epoch #232 | Saved +2025-04-01 21:55:47 | [maml_trainer] epoch #232 | Time 287248.76 s +2025-04-01 21:55:47 | [maml_trainer] epoch #232 | EpochTime 1303.30 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.69 +Average/AverageReturn -51.6284 +Average/Iteration 232 +Average/MaxReturn -42.6228 +Average/MinReturn -74.9602 +Average/NumEpisodes 80 +Average/StdReturn 6.66382 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94762 +GaussianMLPPolicy/KLAfter 0.00176964 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.44559e-05 +GaussianMLPPolicy/LossBefore -1.54972e-08 +GaussianMLPPolicy/dLoss 2.44404e-05 +Iteration 232 +MetaTest/Average/AverageDiscountedReturn -52.4178 +MetaTest/Average/AverageReturn -52.4178 +MetaTest/Average/Iteration 232 +MetaTest/Average/MaxReturn -44.0578 +MetaTest/Average/MinReturn -77.9857 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.45133 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.4178 +MetaTest/__unnamed_task__/AverageReturn -52.4178 +MetaTest/__unnamed_task__/Iteration 232 +MetaTest/__unnamed_task__/MaxReturn -44.0578 +MetaTest/__unnamed_task__/MinReturn -77.9857 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.45133 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.456e+06 +__unnamed_task__/AverageDiscountedReturn -27.69 +__unnamed_task__/AverageReturn -51.6284 +__unnamed_task__/Iteration 232 +__unnamed_task__/MaxReturn -42.6228 +__unnamed_task__/MinReturn -74.9602 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.66382 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 22:12:56 | [maml_trainer] epoch #233 | Sampling for adapation and meta-testing... +2025-04-01 22:17:16 | [maml_trainer] epoch #233 | Finished meta-testing... +2025-04-01 22:17:16 | [maml_trainer] epoch #233 | Saving snapshot... +2025-04-01 22:17:38 | [maml_trainer] epoch #233 | Saved +2025-04-01 22:17:38 | [maml_trainer] epoch #233 | Time 288559.10 s +2025-04-01 22:17:38 | [maml_trainer] epoch #233 | EpochTime 1310.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.903 +Average/AverageReturn -51.5947 +Average/Iteration 233 +Average/MaxReturn -40.7258 +Average/MinReturn -80.3938 +Average/NumEpisodes 80 +Average/StdReturn 7.31757 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94633 +GaussianMLPPolicy/KLAfter 0.00242864 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.13658e-05 +GaussianMLPPolicy/LossBefore -1.2815e-08 +GaussianMLPPolicy/dLoss 6.1353e-05 +Iteration 233 +MetaTest/Average/AverageDiscountedReturn -51.6322 +MetaTest/Average/AverageReturn -51.6322 +MetaTest/Average/Iteration 233 +MetaTest/Average/MaxReturn -43.726 +MetaTest/Average/MinReturn -71.9067 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.01804 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.6322 +MetaTest/__unnamed_task__/AverageReturn -51.6322 +MetaTest/__unnamed_task__/Iteration 233 +MetaTest/__unnamed_task__/MaxReturn -43.726 +MetaTest/__unnamed_task__/MinReturn -71.9067 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.01804 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.488e+06 +__unnamed_task__/AverageDiscountedReturn -27.903 +__unnamed_task__/AverageReturn -51.5947 +__unnamed_task__/Iteration 233 +__unnamed_task__/MaxReturn -40.7258 +__unnamed_task__/MinReturn -80.3938 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.31757 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 22:35:02 | [maml_trainer] epoch #234 | Sampling for adapation and meta-testing... +2025-04-01 22:39:20 | [maml_trainer] epoch #234 | Finished meta-testing... +2025-04-01 22:39:20 | [maml_trainer] epoch #234 | Saving snapshot... +2025-04-01 22:39:43 | [maml_trainer] epoch #234 | Saved +2025-04-01 22:39:43 | [maml_trainer] epoch #234 | Time 289884.38 s +2025-04-01 22:39:43 | [maml_trainer] epoch #234 | EpochTime 1325.28 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.4796 +Average/AverageReturn -50.8213 +Average/Iteration 234 +Average/MaxReturn -42.6073 +Average/MinReturn -76.3643 +Average/NumEpisodes 80 +Average/StdReturn 6.26445 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9443 +GaussianMLPPolicy/KLAfter 0.00144548 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.99056e-05 +GaussianMLPPolicy/LossBefore 1.37091e-09 +GaussianMLPPolicy/dLoss -1.99042e-05 +Iteration 234 +MetaTest/Average/AverageDiscountedReturn -51.2445 +MetaTest/Average/AverageReturn -51.2445 +MetaTest/Average/Iteration 234 +MetaTest/Average/MaxReturn -44.6866 +MetaTest/Average/MinReturn -73.8342 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.19308 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.2445 +MetaTest/__unnamed_task__/AverageReturn -51.2445 +MetaTest/__unnamed_task__/Iteration 234 +MetaTest/__unnamed_task__/MaxReturn -44.6866 +MetaTest/__unnamed_task__/MinReturn -73.8342 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.19308 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.52e+06 +__unnamed_task__/AverageDiscountedReturn -27.4796 +__unnamed_task__/AverageReturn -50.8213 +__unnamed_task__/Iteration 234 +__unnamed_task__/MaxReturn -42.6073 +__unnamed_task__/MinReturn -76.3643 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.26445 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 22:56:55 | [maml_trainer] epoch #235 | Sampling for adapation and meta-testing... +2025-04-01 23:01:13 | [maml_trainer] epoch #235 | Finished meta-testing... +2025-04-01 23:01:13 | [maml_trainer] epoch #235 | Saving snapshot... +2025-04-01 23:01:36 | [maml_trainer] epoch #235 | Saved +2025-04-01 23:01:36 | [maml_trainer] epoch #235 | Time 291198.00 s +2025-04-01 23:01:36 | [maml_trainer] epoch #235 | EpochTime 1313.61 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.0465 +Average/AverageReturn -50.3442 +Average/Iteration 235 +Average/MaxReturn -40.3661 +Average/MinReturn -68.3778 +Average/NumEpisodes 80 +Average/StdReturn 5.26833 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94166 +GaussianMLPPolicy/KLAfter 0.00163146 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.18006e-05 +GaussianMLPPolicy/LossBefore -9.83477e-09 +GaussianMLPPolicy/dLoss -7.18104e-05 +Iteration 235 +MetaTest/Average/AverageDiscountedReturn -52.6586 +MetaTest/Average/AverageReturn -52.6586 +MetaTest/Average/Iteration 235 +MetaTest/Average/MaxReturn -43.5997 +MetaTest/Average/MinReturn -69.5793 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.80969 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.6586 +MetaTest/__unnamed_task__/AverageReturn -52.6586 +MetaTest/__unnamed_task__/Iteration 235 +MetaTest/__unnamed_task__/MaxReturn -43.5997 +MetaTest/__unnamed_task__/MinReturn -69.5793 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.80969 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.552e+06 +__unnamed_task__/AverageDiscountedReturn -27.0465 +__unnamed_task__/AverageReturn -50.3442 +__unnamed_task__/Iteration 235 +__unnamed_task__/MaxReturn -40.3661 +__unnamed_task__/MinReturn -68.3778 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.26833 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 23:18:48 | [maml_trainer] epoch #236 | Sampling for adapation and meta-testing... +2025-04-01 23:23:04 | [maml_trainer] epoch #236 | Finished meta-testing... +2025-04-01 23:23:04 | [maml_trainer] epoch #236 | Saving snapshot... +2025-04-01 23:23:27 | [maml_trainer] epoch #236 | Saved +2025-04-01 23:23:27 | [maml_trainer] epoch #236 | Time 292508.18 s +2025-04-01 23:23:27 | [maml_trainer] epoch #236 | EpochTime 1310.18 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.1652 +Average/AverageReturn -50.0848 +Average/Iteration 236 +Average/MaxReturn -41.6745 +Average/MinReturn -73.1352 +Average/NumEpisodes 80 +Average/StdReturn 5.97698 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93755 +GaussianMLPPolicy/KLAfter 0.00391106 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.34783e-05 +GaussianMLPPolicy/LossBefore 4.35114e-09 +GaussianMLPPolicy/dLoss -1.34739e-05 +Iteration 236 +MetaTest/Average/AverageDiscountedReturn -49.9637 +MetaTest/Average/AverageReturn -49.9637 +MetaTest/Average/Iteration 236 +MetaTest/Average/MaxReturn -42.8958 +MetaTest/Average/MinReturn -56.8817 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.32018 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.9637 +MetaTest/__unnamed_task__/AverageReturn -49.9637 +MetaTest/__unnamed_task__/Iteration 236 +MetaTest/__unnamed_task__/MaxReturn -42.8958 +MetaTest/__unnamed_task__/MinReturn -56.8817 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.32018 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.584e+06 +__unnamed_task__/AverageDiscountedReturn -27.1652 +__unnamed_task__/AverageReturn -50.0848 +__unnamed_task__/Iteration 236 +__unnamed_task__/MaxReturn -41.6745 +__unnamed_task__/MinReturn -73.1352 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.97698 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-01 23:40:49 | [maml_trainer] epoch #237 | Sampling for adapation and meta-testing... +2025-04-01 23:44:59 | [maml_trainer] epoch #237 | Finished meta-testing... +2025-04-01 23:44:59 | [maml_trainer] epoch #237 | Saving snapshot... +2025-04-01 23:45:20 | [maml_trainer] epoch #237 | Saved +2025-04-01 23:45:20 | [maml_trainer] epoch #237 | Time 293821.41 s +2025-04-01 23:45:20 | [maml_trainer] epoch #237 | EpochTime 1313.22 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.2507 +Average/AverageReturn -50.9423 +Average/Iteration 237 +Average/MaxReturn -40.6031 +Average/MinReturn -80.7078 +Average/NumEpisodes 80 +Average/StdReturn 6.5751 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93548 +GaussianMLPPolicy/KLAfter 0.00287824 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.55979e-06 +GaussianMLPPolicy/LossBefore -7.15256e-10 +GaussianMLPPolicy/dLoss -1.56051e-06 +Iteration 237 +MetaTest/Average/AverageDiscountedReturn -48.6198 +MetaTest/Average/AverageReturn -48.6198 +MetaTest/Average/Iteration 237 +MetaTest/Average/MaxReturn -39.0922 +MetaTest/Average/MinReturn -55.3364 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.75505 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.6198 +MetaTest/__unnamed_task__/AverageReturn -48.6198 +MetaTest/__unnamed_task__/Iteration 237 +MetaTest/__unnamed_task__/MaxReturn -39.0922 +MetaTest/__unnamed_task__/MinReturn -55.3364 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.75505 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.616e+06 +__unnamed_task__/AverageDiscountedReturn -27.2507 +__unnamed_task__/AverageReturn -50.9423 +__unnamed_task__/Iteration 237 +__unnamed_task__/MaxReturn -40.6031 +__unnamed_task__/MinReturn -80.7078 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.5751 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 00:02:23 | [maml_trainer] epoch #238 | Sampling for adapation and meta-testing... +2025-04-02 00:06:43 | [maml_trainer] epoch #238 | Finished meta-testing... +2025-04-02 00:06:43 | [maml_trainer] epoch #238 | Saving snapshot... +2025-04-02 00:07:05 | [maml_trainer] epoch #238 | Saved +2025-04-02 00:07:05 | [maml_trainer] epoch #238 | Time 295126.94 s +2025-04-02 00:07:05 | [maml_trainer] epoch #238 | EpochTime 1305.53 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.6131 +Average/AverageReturn -48.9902 +Average/Iteration 238 +Average/MaxReturn -38.934 +Average/MinReturn -63.5328 +Average/NumEpisodes 80 +Average/StdReturn 4.4941 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93164 +GaussianMLPPolicy/KLAfter 0.00484362 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000135787 +GaussianMLPPolicy/LossBefore 1.52588e-08 +GaussianMLPPolicy/dLoss -0.000135772 +Iteration 238 +MetaTest/Average/AverageDiscountedReturn -50.3323 +MetaTest/Average/AverageReturn -50.3323 +MetaTest/Average/Iteration 238 +MetaTest/Average/MaxReturn -43.4522 +MetaTest/Average/MinReturn -60.7062 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.14478 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.3323 +MetaTest/__unnamed_task__/AverageReturn -50.3323 +MetaTest/__unnamed_task__/Iteration 238 +MetaTest/__unnamed_task__/MaxReturn -43.4522 +MetaTest/__unnamed_task__/MinReturn -60.7062 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.14478 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.648e+06 +__unnamed_task__/AverageDiscountedReturn -26.6131 +__unnamed_task__/AverageReturn -48.9902 +__unnamed_task__/Iteration 238 +__unnamed_task__/MaxReturn -38.934 +__unnamed_task__/MinReturn -63.5328 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.4941 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 00:24:17 | [maml_trainer] epoch #239 | Sampling for adapation and meta-testing... +2025-04-02 00:28:35 | [maml_trainer] epoch #239 | Finished meta-testing... +2025-04-02 00:28:35 | [maml_trainer] epoch #239 | Saving snapshot... +2025-04-02 00:28:58 | [maml_trainer] epoch #239 | Saved +2025-04-02 00:28:58 | [maml_trainer] epoch #239 | Time 296439.58 s +2025-04-02 00:28:58 | [maml_trainer] epoch #239 | EpochTime 1312.63 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -27.5306 +Average/AverageReturn -50.9876 +Average/Iteration 239 +Average/MaxReturn -41.1665 +Average/MinReturn -65.9911 +Average/NumEpisodes 80 +Average/StdReturn 5.1108 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92862 +GaussianMLPPolicy/KLAfter 0.0061105 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000108094 +GaussianMLPPolicy/LossBefore -6.02007e-09 +GaussianMLPPolicy/dLoss -0.0001081 +Iteration 239 +MetaTest/Average/AverageDiscountedReturn -49.5212 +MetaTest/Average/AverageReturn -49.5212 +MetaTest/Average/Iteration 239 +MetaTest/Average/MaxReturn -42.1173 +MetaTest/Average/MinReturn -58.299 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.42281 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.5212 +MetaTest/__unnamed_task__/AverageReturn -49.5212 +MetaTest/__unnamed_task__/Iteration 239 +MetaTest/__unnamed_task__/MaxReturn -42.1173 +MetaTest/__unnamed_task__/MinReturn -58.299 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.42281 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.68e+06 +__unnamed_task__/AverageDiscountedReturn -27.5306 +__unnamed_task__/AverageReturn -50.9876 +__unnamed_task__/Iteration 239 +__unnamed_task__/MaxReturn -41.1665 +__unnamed_task__/MinReturn -65.9911 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.1108 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 00:46:23 | [maml_trainer] epoch #240 | Sampling for adapation and meta-testing... +2025-04-02 00:50:44 | [maml_trainer] epoch #240 | Finished meta-testing... +2025-04-02 00:50:44 | [maml_trainer] epoch #240 | Saving snapshot... +2025-04-02 00:51:07 | [maml_trainer] epoch #240 | Saved +2025-04-02 00:51:07 | [maml_trainer] epoch #240 | Time 297768.20 s +2025-04-02 00:51:07 | [maml_trainer] epoch #240 | EpochTime 1328.62 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.7213 +Average/AverageReturn -49.777 +Average/Iteration 240 +Average/MaxReturn -41.1057 +Average/MinReturn -113.689 +Average/NumEpisodes 80 +Average/StdReturn 8.21183 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92559 +GaussianMLPPolicy/KLAfter 0.00484518 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000110975 +GaussianMLPPolicy/LossBefore 7.33137e-09 +GaussianMLPPolicy/dLoss -0.000110968 +Iteration 240 +MetaTest/Average/AverageDiscountedReturn -48.6025 +MetaTest/Average/AverageReturn -48.6025 +MetaTest/Average/Iteration 240 +MetaTest/Average/MaxReturn -43.3188 +MetaTest/Average/MinReturn -56.565 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.06976 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.6025 +MetaTest/__unnamed_task__/AverageReturn -48.6025 +MetaTest/__unnamed_task__/Iteration 240 +MetaTest/__unnamed_task__/MaxReturn -43.3188 +MetaTest/__unnamed_task__/MinReturn -56.565 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.06976 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.712e+06 +__unnamed_task__/AverageDiscountedReturn -26.7213 +__unnamed_task__/AverageReturn -49.777 +__unnamed_task__/Iteration 240 +__unnamed_task__/MaxReturn -41.1057 +__unnamed_task__/MinReturn -113.689 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.21183 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 01:08:30 | [maml_trainer] epoch #241 | Sampling for adapation and meta-testing... +2025-04-02 01:12:49 | [maml_trainer] epoch #241 | Finished meta-testing... +2025-04-02 01:12:49 | [maml_trainer] epoch #241 | Saving snapshot... +2025-04-02 01:13:11 | [maml_trainer] epoch #241 | Saved +2025-04-02 01:13:11 | [maml_trainer] epoch #241 | Time 299092.96 s +2025-04-02 01:13:11 | [maml_trainer] epoch #241 | EpochTime 1324.75 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.4164 +Average/AverageReturn -49.2257 +Average/Iteration 241 +Average/MaxReturn -42.0582 +Average/MinReturn -58.6083 +Average/NumEpisodes 80 +Average/StdReturn 3.81022 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92199 +GaussianMLPPolicy/KLAfter 0.00605187 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000166464 +GaussianMLPPolicy/LossBefore -8.9407e-10 +GaussianMLPPolicy/dLoss 0.000166463 +Iteration 241 +MetaTest/Average/AverageDiscountedReturn -49.3434 +MetaTest/Average/AverageReturn -49.3434 +MetaTest/Average/Iteration 241 +MetaTest/Average/MaxReturn -44.4378 +MetaTest/Average/MinReturn -79.0388 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.23578 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.3434 +MetaTest/__unnamed_task__/AverageReturn -49.3434 +MetaTest/__unnamed_task__/Iteration 241 +MetaTest/__unnamed_task__/MaxReturn -44.4378 +MetaTest/__unnamed_task__/MinReturn -79.0388 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.23578 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.744e+06 +__unnamed_task__/AverageDiscountedReturn -26.4164 +__unnamed_task__/AverageReturn -49.2257 +__unnamed_task__/Iteration 241 +__unnamed_task__/MaxReturn -42.0582 +__unnamed_task__/MinReturn -58.6083 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.81022 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 01:30:47 | [maml_trainer] epoch #242 | Sampling for adapation and meta-testing... +2025-04-02 01:35:07 | [maml_trainer] epoch #242 | Finished meta-testing... +2025-04-02 01:35:07 | [maml_trainer] epoch #242 | Saving snapshot... +2025-04-02 01:35:30 | [maml_trainer] epoch #242 | Saved +2025-04-02 01:35:30 | [maml_trainer] epoch #242 | Time 300431.20 s +2025-04-02 01:35:30 | [maml_trainer] epoch #242 | EpochTime 1338.24 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.769 +Average/AverageReturn -49.7509 +Average/Iteration 242 +Average/MaxReturn -43.1764 +Average/MinReturn -61.3728 +Average/NumEpisodes 80 +Average/StdReturn 4.15657 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91973 +GaussianMLPPolicy/KLAfter 0.00350552 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.6082e-05 +GaussianMLPPolicy/LossBefore 6.67572e-09 +GaussianMLPPolicy/dLoss -3.60754e-05 +Iteration 242 +MetaTest/Average/AverageDiscountedReturn -51.0225 +MetaTest/Average/AverageReturn -51.0225 +MetaTest/Average/Iteration 242 +MetaTest/Average/MaxReturn -45.0464 +MetaTest/Average/MinReturn -64.0342 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.49028 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.0225 +MetaTest/__unnamed_task__/AverageReturn -51.0225 +MetaTest/__unnamed_task__/Iteration 242 +MetaTest/__unnamed_task__/MaxReturn -45.0464 +MetaTest/__unnamed_task__/MinReturn -64.0342 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.49028 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.776e+06 +__unnamed_task__/AverageDiscountedReturn -26.769 +__unnamed_task__/AverageReturn -49.7509 +__unnamed_task__/Iteration 242 +__unnamed_task__/MaxReturn -43.1764 +__unnamed_task__/MinReturn -61.3728 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.15657 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 01:52:58 | [maml_trainer] epoch #243 | Sampling for adapation and meta-testing... +2025-04-02 01:57:12 | [maml_trainer] epoch #243 | Finished meta-testing... +2025-04-02 01:57:12 | [maml_trainer] epoch #243 | Saving snapshot... +2025-04-02 01:57:34 | [maml_trainer] epoch #243 | Saved +2025-04-02 01:57:34 | [maml_trainer] epoch #243 | Time 301755.22 s +2025-04-02 01:57:34 | [maml_trainer] epoch #243 | EpochTime 1324.02 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.5626 +Average/AverageReturn -49.436 +Average/Iteration 243 +Average/MaxReturn -42.7879 +Average/MinReturn -60.7807 +Average/NumEpisodes 80 +Average/StdReturn 3.67537 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91826 +GaussianMLPPolicy/KLAfter 0.00152158 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.21303e-05 +GaussianMLPPolicy/LossBefore 5.66244e-09 +GaussianMLPPolicy/dLoss -3.21246e-05 +Iteration 243 +MetaTest/Average/AverageDiscountedReturn -50.4167 +MetaTest/Average/AverageReturn -50.4167 +MetaTest/Average/Iteration 243 +MetaTest/Average/MaxReturn -44.9258 +MetaTest/Average/MinReturn -58.264 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.63759 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.4167 +MetaTest/__unnamed_task__/AverageReturn -50.4167 +MetaTest/__unnamed_task__/Iteration 243 +MetaTest/__unnamed_task__/MaxReturn -44.9258 +MetaTest/__unnamed_task__/MinReturn -58.264 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.63759 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.808e+06 +__unnamed_task__/AverageDiscountedReturn -26.5626 +__unnamed_task__/AverageReturn -49.436 +__unnamed_task__/Iteration 243 +__unnamed_task__/MaxReturn -42.7879 +__unnamed_task__/MinReturn -60.7807 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.67537 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 02:13:54 | [maml_trainer] epoch #244 | Sampling for adapation and meta-testing... +2025-04-02 02:17:57 | [maml_trainer] epoch #244 | Finished meta-testing... +2025-04-02 02:17:57 | [maml_trainer] epoch #244 | Saving snapshot... +2025-04-02 02:18:18 | [maml_trainer] epoch #244 | Saved +2025-04-02 02:18:18 | [maml_trainer] epoch #244 | Time 302999.93 s +2025-04-02 02:18:18 | [maml_trainer] epoch #244 | EpochTime 1244.70 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.8705 +Average/AverageReturn -49.8378 +Average/Iteration 244 +Average/MaxReturn -43.1918 +Average/MinReturn -60.0846 +Average/NumEpisodes 80 +Average/StdReturn 3.99979 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91721 +GaussianMLPPolicy/KLAfter 0.00144648 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.9587e-05 +GaussianMLPPolicy/LossBefore 4.94719e-09 +GaussianMLPPolicy/dLoss 3.9592e-05 +Iteration 244 +MetaTest/Average/AverageDiscountedReturn -49.6127 +MetaTest/Average/AverageReturn -49.6127 +MetaTest/Average/Iteration 244 +MetaTest/Average/MaxReturn -44.3756 +MetaTest/Average/MinReturn -58.2956 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.77137 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.6127 +MetaTest/__unnamed_task__/AverageReturn -49.6127 +MetaTest/__unnamed_task__/Iteration 244 +MetaTest/__unnamed_task__/MaxReturn -44.3756 +MetaTest/__unnamed_task__/MinReturn -58.2956 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.77137 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.84e+06 +__unnamed_task__/AverageDiscountedReturn -26.8705 +__unnamed_task__/AverageReturn -49.8378 +__unnamed_task__/Iteration 244 +__unnamed_task__/MaxReturn -43.1918 +__unnamed_task__/MinReturn -60.0846 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.99979 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 02:34:39 | [maml_trainer] epoch #245 | Sampling for adapation and meta-testing... +2025-04-02 02:38:42 | [maml_trainer] epoch #245 | Finished meta-testing... +2025-04-02 02:38:42 | [maml_trainer] epoch #245 | Saving snapshot... +2025-04-02 02:39:03 | [maml_trainer] epoch #245 | Saved +2025-04-02 02:39:03 | [maml_trainer] epoch #245 | Time 304244.48 s +2025-04-02 02:39:03 | [maml_trainer] epoch #245 | EpochTime 1244.55 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.676 +Average/AverageReturn -49.7997 +Average/Iteration 245 +Average/MaxReturn -42.6971 +Average/MinReturn -58.6533 +Average/NumEpisodes 80 +Average/StdReturn 3.67853 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91761 +GaussianMLPPolicy/KLAfter 0.00285965 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.44574e-05 +GaussianMLPPolicy/LossBefore -1.508e-08 +GaussianMLPPolicy/dLoss 2.44423e-05 +Iteration 245 +MetaTest/Average/AverageDiscountedReturn -49.2091 +MetaTest/Average/AverageReturn -49.2091 +MetaTest/Average/Iteration 245 +MetaTest/Average/MaxReturn -44.2353 +MetaTest/Average/MinReturn -62.234 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.02995 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.2091 +MetaTest/__unnamed_task__/AverageReturn -49.2091 +MetaTest/__unnamed_task__/Iteration 245 +MetaTest/__unnamed_task__/MaxReturn -44.2353 +MetaTest/__unnamed_task__/MinReturn -62.234 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.02995 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.872e+06 +__unnamed_task__/AverageDiscountedReturn -26.676 +__unnamed_task__/AverageReturn -49.7997 +__unnamed_task__/Iteration 245 +__unnamed_task__/MaxReturn -42.6971 +__unnamed_task__/MinReturn -58.6533 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.67853 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 02:55:45 | [maml_trainer] epoch #246 | Sampling for adapation and meta-testing... +2025-04-02 02:59:53 | [maml_trainer] epoch #246 | Finished meta-testing... +2025-04-02 02:59:53 | [maml_trainer] epoch #246 | Saving snapshot... +2025-04-02 03:00:13 | [maml_trainer] epoch #246 | Saved +2025-04-02 03:00:13 | [maml_trainer] epoch #246 | Time 305514.65 s +2025-04-02 03:00:13 | [maml_trainer] epoch #246 | EpochTime 1270.17 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.6287 +Average/AverageReturn -49.5581 +Average/Iteration 246 +Average/MaxReturn -41.9844 +Average/MinReturn -57.5213 +Average/NumEpisodes 80 +Average/StdReturn 3.39268 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91747 +GaussianMLPPolicy/KLAfter 0.00310112 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000271369 +GaussianMLPPolicy/LossBefore -2.19345e-08 +GaussianMLPPolicy/dLoss -0.000271391 +Iteration 246 +MetaTest/Average/AverageDiscountedReturn -48.1811 +MetaTest/Average/AverageReturn -48.1811 +MetaTest/Average/Iteration 246 +MetaTest/Average/MaxReturn -43.9779 +MetaTest/Average/MinReturn -54.8985 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 2.34847 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.1811 +MetaTest/__unnamed_task__/AverageReturn -48.1811 +MetaTest/__unnamed_task__/Iteration 246 +MetaTest/__unnamed_task__/MaxReturn -43.9779 +MetaTest/__unnamed_task__/MinReturn -54.8985 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 2.34847 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.904e+06 +__unnamed_task__/AverageDiscountedReturn -26.6287 +__unnamed_task__/AverageReturn -49.5581 +__unnamed_task__/Iteration 246 +__unnamed_task__/MaxReturn -41.9844 +__unnamed_task__/MinReturn -57.5213 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.39268 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 03:16:36 | [maml_trainer] epoch #247 | Sampling for adapation and meta-testing... +2025-04-02 03:20:45 | [maml_trainer] epoch #247 | Finished meta-testing... +2025-04-02 03:20:45 | [maml_trainer] epoch #247 | Saving snapshot... +2025-04-02 03:21:07 | [maml_trainer] epoch #247 | Saved +2025-04-02 03:21:07 | [maml_trainer] epoch #247 | Time 306768.56 s +2025-04-02 03:21:07 | [maml_trainer] epoch #247 | EpochTime 1253.90 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.6479 +Average/AverageReturn -49.748 +Average/Iteration 247 +Average/MaxReturn -44.0882 +Average/MinReturn -61.5038 +Average/NumEpisodes 80 +Average/StdReturn 3.43647 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91804 +GaussianMLPPolicy/KLAfter 0.00118152 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.44112e-05 +GaussianMLPPolicy/LossBefore 3.45707e-09 +GaussianMLPPolicy/dLoss 7.44147e-05 +Iteration 247 +MetaTest/Average/AverageDiscountedReturn -51.7682 +MetaTest/Average/AverageReturn -51.7682 +MetaTest/Average/Iteration 247 +MetaTest/Average/MaxReturn -45.8627 +MetaTest/Average/MinReturn -63.5562 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.56963 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.7682 +MetaTest/__unnamed_task__/AverageReturn -51.7682 +MetaTest/__unnamed_task__/Iteration 247 +MetaTest/__unnamed_task__/MaxReturn -45.8627 +MetaTest/__unnamed_task__/MinReturn -63.5562 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.56963 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.936e+06 +__unnamed_task__/AverageDiscountedReturn -26.6479 +__unnamed_task__/AverageReturn -49.748 +__unnamed_task__/Iteration 247 +__unnamed_task__/MaxReturn -44.0882 +__unnamed_task__/MinReturn -61.5038 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.43647 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 03:37:37 | [maml_trainer] epoch #248 | Sampling for adapation and meta-testing... +2025-04-02 03:41:47 | [maml_trainer] epoch #248 | Finished meta-testing... +2025-04-02 03:41:47 | [maml_trainer] epoch #248 | Saving snapshot... +2025-04-02 03:42:09 | [maml_trainer] epoch #248 | Saved +2025-04-02 03:42:09 | [maml_trainer] epoch #248 | Time 308030.27 s +2025-04-02 03:42:09 | [maml_trainer] epoch #248 | EpochTime 1261.71 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.6157 +Average/AverageReturn -49.8812 +Average/Iteration 248 +Average/MaxReturn -39.046 +Average/MinReturn -58.0094 +Average/NumEpisodes 80 +Average/StdReturn 3.83731 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91731 +GaussianMLPPolicy/KLAfter 0.000417107 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.57416e-06 +GaussianMLPPolicy/LossBefore 2.80142e-09 +GaussianMLPPolicy/dLoss 1.57696e-06 +Iteration 248 +MetaTest/Average/AverageDiscountedReturn -48.6309 +MetaTest/Average/AverageReturn -48.6309 +MetaTest/Average/Iteration 248 +MetaTest/Average/MaxReturn -43.7788 +MetaTest/Average/MinReturn -54.7661 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 2.89093 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.6309 +MetaTest/__unnamed_task__/AverageReturn -48.6309 +MetaTest/__unnamed_task__/Iteration 248 +MetaTest/__unnamed_task__/MaxReturn -43.7788 +MetaTest/__unnamed_task__/MinReturn -54.7661 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 2.89093 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.968e+06 +__unnamed_task__/AverageDiscountedReturn -26.6157 +__unnamed_task__/AverageReturn -49.8812 +__unnamed_task__/Iteration 248 +__unnamed_task__/MaxReturn -39.046 +__unnamed_task__/MinReturn -58.0094 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 3.83731 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 03:58:39 | [maml_trainer] epoch #249 | Sampling for adapation and meta-testing... +2025-04-02 04:02:48 | [maml_trainer] epoch #249 | Finished meta-testing... +2025-04-02 04:02:48 | [maml_trainer] epoch #249 | Saving snapshot... +2025-04-02 04:03:09 | [maml_trainer] epoch #249 | Saved +2025-04-02 04:03:09 | [maml_trainer] epoch #249 | Time 309290.98 s +2025-04-02 04:03:09 | [maml_trainer] epoch #249 | EpochTime 1260.70 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -27.0377 +Average/AverageReturn -50.7799 +Average/Iteration 249 +Average/MaxReturn -43.0221 +Average/MinReturn -100.729 +Average/NumEpisodes 80 +Average/StdReturn 7.06586 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91565 +GaussianMLPPolicy/KLAfter 0.000922354 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.9924e-05 +GaussianMLPPolicy/LossBefore 1.2219e-08 +GaussianMLPPolicy/dLoss 1.99363e-05 +Iteration 249 +MetaTest/Average/AverageDiscountedReturn -50.6395 +MetaTest/Average/AverageReturn -50.6395 +MetaTest/Average/Iteration 249 +MetaTest/Average/MaxReturn -45.2767 +MetaTest/Average/MinReturn -59.1544 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.7464 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.6395 +MetaTest/__unnamed_task__/AverageReturn -50.6395 +MetaTest/__unnamed_task__/Iteration 249 +MetaTest/__unnamed_task__/MaxReturn -45.2767 +MetaTest/__unnamed_task__/MinReturn -59.1544 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.7464 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8e+06 +__unnamed_task__/AverageDiscountedReturn -27.0377 +__unnamed_task__/AverageReturn -50.7799 +__unnamed_task__/Iteration 249 +__unnamed_task__/MaxReturn -43.0221 +__unnamed_task__/MinReturn -100.729 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.06586 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 04:19:32 | [maml_trainer] epoch #250 | Sampling for adapation and meta-testing... +2025-04-02 04:23:35 | [maml_trainer] epoch #250 | Finished meta-testing... +2025-04-02 04:23:35 | [maml_trainer] epoch #250 | Saving snapshot... +2025-04-02 04:23:56 | [maml_trainer] epoch #250 | Saved +2025-04-02 04:23:56 | [maml_trainer] epoch #250 | Time 310537.67 s +2025-04-02 04:23:56 | [maml_trainer] epoch #250 | EpochTime 1246.69 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.8865 +Average/AverageReturn -50.9457 +Average/Iteration 250 +Average/MaxReturn -42.2579 +Average/MinReturn -115.834 +Average/NumEpisodes 80 +Average/StdReturn 9.27359 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91415 +GaussianMLPPolicy/KLAfter 0.00179861 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.27261e-05 +GaussianMLPPolicy/LossBefore 2.68221e-09 +GaussianMLPPolicy/dLoss 8.27288e-05 +Iteration 250 +MetaTest/Average/AverageDiscountedReturn -52.3587 +MetaTest/Average/AverageReturn -52.3587 +MetaTest/Average/Iteration 250 +MetaTest/Average/MaxReturn -45.7712 +MetaTest/Average/MinReturn -63.5985 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.27544 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.3587 +MetaTest/__unnamed_task__/AverageReturn -52.3587 +MetaTest/__unnamed_task__/Iteration 250 +MetaTest/__unnamed_task__/MaxReturn -45.7712 +MetaTest/__unnamed_task__/MinReturn -63.5985 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.27544 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.032e+06 +__unnamed_task__/AverageDiscountedReturn -26.8865 +__unnamed_task__/AverageReturn -50.9457 +__unnamed_task__/Iteration 250 +__unnamed_task__/MaxReturn -42.2579 +__unnamed_task__/MinReturn -115.834 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.27359 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 04:40:20 | [maml_trainer] epoch #251 | Sampling for adapation and meta-testing... +2025-04-02 04:44:28 | [maml_trainer] epoch #251 | Finished meta-testing... +2025-04-02 04:44:28 | [maml_trainer] epoch #251 | Saving snapshot... +2025-04-02 04:44:48 | [maml_trainer] epoch #251 | Saved +2025-04-02 04:44:48 | [maml_trainer] epoch #251 | Time 311789.73 s +2025-04-02 04:44:48 | [maml_trainer] epoch #251 | EpochTime 1252.05 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.9399 +Average/AverageReturn -50.4839 +Average/Iteration 251 +Average/MaxReturn -42.6563 +Average/MinReturn -87.1828 +Average/NumEpisodes 80 +Average/StdReturn 6.04835 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91386 +GaussianMLPPolicy/KLAfter 0.00357354 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.02356e-05 +GaussianMLPPolicy/LossBefore -8.82149e-09 +GaussianMLPPolicy/dLoss 3.02268e-05 +Iteration 251 +MetaTest/Average/AverageDiscountedReturn -50.5461 +MetaTest/Average/AverageReturn -50.5461 +MetaTest/Average/Iteration 251 +MetaTest/Average/MaxReturn -42.9345 +MetaTest/Average/MinReturn -60.2023 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.41349 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.5461 +MetaTest/__unnamed_task__/AverageReturn -50.5461 +MetaTest/__unnamed_task__/Iteration 251 +MetaTest/__unnamed_task__/MaxReturn -42.9345 +MetaTest/__unnamed_task__/MinReturn -60.2023 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.41349 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.064e+06 +__unnamed_task__/AverageDiscountedReturn -26.9399 +__unnamed_task__/AverageReturn -50.4839 +__unnamed_task__/Iteration 251 +__unnamed_task__/MaxReturn -42.6563 +__unnamed_task__/MinReturn -87.1828 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.04835 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 05:01:05 | [maml_trainer] epoch #252 | Sampling for adapation and meta-testing... +2025-04-02 05:05:11 | [maml_trainer] epoch #252 | Finished meta-testing... +2025-04-02 05:05:11 | [maml_trainer] epoch #252 | Saving snapshot... +2025-04-02 05:05:33 | [maml_trainer] epoch #252 | Saved +2025-04-02 05:05:33 | [maml_trainer] epoch #252 | Time 313034.46 s +2025-04-02 05:05:33 | [maml_trainer] epoch #252 | EpochTime 1244.73 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.5869 +Average/AverageReturn -49.6708 +Average/Iteration 252 +Average/MaxReturn -41.8079 +Average/MinReturn -58.7105 +Average/NumEpisodes 80 +Average/StdReturn 4.21808 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91252 +GaussianMLPPolicy/KLAfter 0.00602618 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.42007e-05 +GaussianMLPPolicy/LossBefore 5.54323e-09 +GaussianMLPPolicy/dLoss 2.42062e-05 +Iteration 252 +MetaTest/Average/AverageDiscountedReturn -50.4591 +MetaTest/Average/AverageReturn -50.4591 +MetaTest/Average/Iteration 252 +MetaTest/Average/MaxReturn -44.2483 +MetaTest/Average/MinReturn -72.6721 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.57237 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.4591 +MetaTest/__unnamed_task__/AverageReturn -50.4591 +MetaTest/__unnamed_task__/Iteration 252 +MetaTest/__unnamed_task__/MaxReturn -44.2483 +MetaTest/__unnamed_task__/MinReturn -72.6721 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.57237 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.096e+06 +__unnamed_task__/AverageDiscountedReturn -26.5869 +__unnamed_task__/AverageReturn -49.6708 +__unnamed_task__/Iteration 252 +__unnamed_task__/MaxReturn -41.8079 +__unnamed_task__/MinReturn -58.7105 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.21808 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 05:21:48 | [maml_trainer] epoch #253 | Sampling for adapation and meta-testing... +2025-04-02 05:25:55 | [maml_trainer] epoch #253 | Finished meta-testing... +2025-04-02 05:25:55 | [maml_trainer] epoch #253 | Saving snapshot... +2025-04-02 05:26:16 | [maml_trainer] epoch #253 | Saved +2025-04-02 05:26:16 | [maml_trainer] epoch #253 | Time 314277.88 s +2025-04-02 05:26:16 | [maml_trainer] epoch #253 | EpochTime 1243.42 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -27.234 +Average/AverageReturn -51.2885 +Average/Iteration 253 +Average/MaxReturn -43.4976 +Average/MinReturn -116.491 +Average/NumEpisodes 80 +Average/StdReturn 8.83303 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91198 +GaussianMLPPolicy/KLAfter 0.00584098 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.56286e-05 +GaussianMLPPolicy/LossBefore -1.01924e-08 +GaussianMLPPolicy/dLoss -8.56388e-05 +Iteration 253 +MetaTest/Average/AverageDiscountedReturn -53.1529 +MetaTest/Average/AverageReturn -53.1529 +MetaTest/Average/Iteration 253 +MetaTest/Average/MaxReturn -44.079 +MetaTest/Average/MinReturn -120.025 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.8815 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.1529 +MetaTest/__unnamed_task__/AverageReturn -53.1529 +MetaTest/__unnamed_task__/Iteration 253 +MetaTest/__unnamed_task__/MaxReturn -44.079 +MetaTest/__unnamed_task__/MinReturn -120.025 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.8815 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.128e+06 +__unnamed_task__/AverageDiscountedReturn -27.234 +__unnamed_task__/AverageReturn -51.2885 +__unnamed_task__/Iteration 253 +__unnamed_task__/MaxReturn -43.4976 +__unnamed_task__/MinReturn -116.491 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.83303 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 05:42:33 | [maml_trainer] epoch #254 | Sampling for adapation and meta-testing... +2025-04-02 05:46:36 | [maml_trainer] epoch #254 | Finished meta-testing... +2025-04-02 05:46:36 | [maml_trainer] epoch #254 | Saving snapshot... +2025-04-02 05:46:57 | [maml_trainer] epoch #254 | Saved +2025-04-02 05:46:57 | [maml_trainer] epoch #254 | Time 315518.49 s +2025-04-02 05:46:57 | [maml_trainer] epoch #254 | EpochTime 1240.60 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.6383 +Average/AverageReturn -49.7681 +Average/Iteration 254 +Average/MaxReturn -42.3283 +Average/MinReturn -62.2242 +Average/NumEpisodes 80 +Average/StdReturn 4.30757 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91165 +GaussianMLPPolicy/KLAfter 0.0051888 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.1771e-05 +GaussianMLPPolicy/LossBefore -9.53674e-10 +GaussianMLPPolicy/dLoss -3.1772e-05 +Iteration 254 +MetaTest/Average/AverageDiscountedReturn -50.7339 +MetaTest/Average/AverageReturn -50.7339 +MetaTest/Average/Iteration 254 +MetaTest/Average/MaxReturn -43.9758 +MetaTest/Average/MinReturn -68.4231 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.84679 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.7339 +MetaTest/__unnamed_task__/AverageReturn -50.7339 +MetaTest/__unnamed_task__/Iteration 254 +MetaTest/__unnamed_task__/MaxReturn -43.9758 +MetaTest/__unnamed_task__/MinReturn -68.4231 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.84679 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.16e+06 +__unnamed_task__/AverageDiscountedReturn -26.6383 +__unnamed_task__/AverageReturn -49.7681 +__unnamed_task__/Iteration 254 +__unnamed_task__/MaxReturn -42.3283 +__unnamed_task__/MinReturn -62.2242 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.30757 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 06:03:15 | [maml_trainer] epoch #255 | Sampling for adapation and meta-testing... +2025-04-02 06:07:19 | [maml_trainer] epoch #255 | Finished meta-testing... +2025-04-02 06:07:19 | [maml_trainer] epoch #255 | Saving snapshot... +2025-04-02 06:07:40 | [maml_trainer] epoch #255 | Saved +2025-04-02 06:07:40 | [maml_trainer] epoch #255 | Time 316761.45 s +2025-04-02 06:07:40 | [maml_trainer] epoch #255 | EpochTime 1242.95 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.6518 +Average/AverageReturn -49.4393 +Average/Iteration 255 +Average/MaxReturn -41.151 +Average/MinReturn -69.3074 +Average/NumEpisodes 80 +Average/StdReturn 5.09481 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91238 +GaussianMLPPolicy/KLAfter 0.00506535 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.2687e-06 +GaussianMLPPolicy/LossBefore -7.7486e-10 +GaussianMLPPolicy/dLoss 9.26793e-06 +Iteration 255 +MetaTest/Average/AverageDiscountedReturn -49.5188 +MetaTest/Average/AverageReturn -49.5188 +MetaTest/Average/Iteration 255 +MetaTest/Average/MaxReturn -43.6144 +MetaTest/Average/MinReturn -61.042 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.41737 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.5188 +MetaTest/__unnamed_task__/AverageReturn -49.5188 +MetaTest/__unnamed_task__/Iteration 255 +MetaTest/__unnamed_task__/MaxReturn -43.6144 +MetaTest/__unnamed_task__/MinReturn -61.042 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.41737 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.192e+06 +__unnamed_task__/AverageDiscountedReturn -26.6518 +__unnamed_task__/AverageReturn -49.4393 +__unnamed_task__/Iteration 255 +__unnamed_task__/MaxReturn -41.151 +__unnamed_task__/MinReturn -69.3074 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.09481 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 06:24:04 | [maml_trainer] epoch #256 | Sampling for adapation and meta-testing... +2025-04-02 06:28:08 | [maml_trainer] epoch #256 | Finished meta-testing... +2025-04-02 06:28:08 | [maml_trainer] epoch #256 | Saving snapshot... +2025-04-02 06:28:30 | [maml_trainer] epoch #256 | Saved +2025-04-02 06:28:30 | [maml_trainer] epoch #256 | Time 318011.84 s +2025-04-02 06:28:30 | [maml_trainer] epoch #256 | EpochTime 1250.39 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.4523 +Average/AverageReturn -49.2619 +Average/Iteration 256 +Average/MaxReturn -40.4189 +Average/MinReturn -60.9947 +Average/NumEpisodes 80 +Average/StdReturn 4.21468 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91101 +GaussianMLPPolicy/KLAfter 0.00364224 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.41291e-05 +GaussianMLPPolicy/LossBefore -5.24521e-09 +GaussianMLPPolicy/dLoss 6.41239e-05 +Iteration 256 +MetaTest/Average/AverageDiscountedReturn -48.7821 +MetaTest/Average/AverageReturn -48.7821 +MetaTest/Average/Iteration 256 +MetaTest/Average/MaxReturn -40.8842 +MetaTest/Average/MinReturn -57.6159 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.78722 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.7821 +MetaTest/__unnamed_task__/AverageReturn -48.7821 +MetaTest/__unnamed_task__/Iteration 256 +MetaTest/__unnamed_task__/MaxReturn -40.8842 +MetaTest/__unnamed_task__/MinReturn -57.6159 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.78722 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.224e+06 +__unnamed_task__/AverageDiscountedReturn -26.4523 +__unnamed_task__/AverageReturn -49.2619 +__unnamed_task__/Iteration 256 +__unnamed_task__/MaxReturn -40.4189 +__unnamed_task__/MinReturn -60.9947 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.21468 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 06:45:01 | [maml_trainer] epoch #257 | Sampling for adapation and meta-testing... +2025-04-02 06:49:04 | [maml_trainer] epoch #257 | Finished meta-testing... +2025-04-02 06:49:04 | [maml_trainer] epoch #257 | Saving snapshot... +2025-04-02 06:49:25 | [maml_trainer] epoch #257 | Saved +2025-04-02 06:49:25 | [maml_trainer] epoch #257 | Time 319266.89 s +2025-04-02 06:49:25 | [maml_trainer] epoch #257 | EpochTime 1255.05 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.9827 +Average/AverageReturn -50.7904 +Average/Iteration 257 +Average/MaxReturn -42.8398 +Average/MinReturn -123.016 +Average/NumEpisodes 80 +Average/StdReturn 9.47746 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90906 +GaussianMLPPolicy/KLAfter 0.00195476 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.15632e-05 +GaussianMLPPolicy/LossBefore -2.01762e-08 +GaussianMLPPolicy/dLoss -2.15834e-05 +Iteration 257 +MetaTest/Average/AverageDiscountedReturn -51.4174 +MetaTest/Average/AverageReturn -51.4174 +MetaTest/Average/Iteration 257 +MetaTest/Average/MaxReturn -42.6541 +MetaTest/Average/MinReturn -68.7823 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.94364 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.4174 +MetaTest/__unnamed_task__/AverageReturn -51.4174 +MetaTest/__unnamed_task__/Iteration 257 +MetaTest/__unnamed_task__/MaxReturn -42.6541 +MetaTest/__unnamed_task__/MinReturn -68.7823 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.94364 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.256e+06 +__unnamed_task__/AverageDiscountedReturn -26.9827 +__unnamed_task__/AverageReturn -50.7904 +__unnamed_task__/Iteration 257 +__unnamed_task__/MaxReturn -42.8398 +__unnamed_task__/MinReturn -123.016 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.47746 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 07:05:36 | [maml_trainer] epoch #258 | Sampling for adapation and meta-testing... +2025-04-02 07:09:44 | [maml_trainer] epoch #258 | Finished meta-testing... +2025-04-02 07:09:44 | [maml_trainer] epoch #258 | Saving snapshot... +2025-04-02 07:10:05 | [maml_trainer] epoch #258 | Saved +2025-04-02 07:10:05 | [maml_trainer] epoch #258 | Time 320507.02 s +2025-04-02 07:10:05 | [maml_trainer] epoch #258 | EpochTime 1240.13 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.1316 +Average/AverageReturn -48.537 +Average/Iteration 258 +Average/MaxReturn -40.0443 +Average/MinReturn -68.8626 +Average/NumEpisodes 80 +Average/StdReturn 4.88704 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9077 +GaussianMLPPolicy/KLAfter 0.0017138 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.33015e-05 +GaussianMLPPolicy/LossBefore 1.19209e-08 +GaussianMLPPolicy/dLoss -6.32896e-05 +Iteration 258 +MetaTest/Average/AverageDiscountedReturn -48.4274 +MetaTest/Average/AverageReturn -48.4274 +MetaTest/Average/Iteration 258 +MetaTest/Average/MaxReturn -39.7755 +MetaTest/Average/MinReturn -66.9879 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.04726 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.4274 +MetaTest/__unnamed_task__/AverageReturn -48.4274 +MetaTest/__unnamed_task__/Iteration 258 +MetaTest/__unnamed_task__/MaxReturn -39.7755 +MetaTest/__unnamed_task__/MinReturn -66.9879 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.04726 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.288e+06 +__unnamed_task__/AverageDiscountedReturn -26.1316 +__unnamed_task__/AverageReturn -48.537 +__unnamed_task__/Iteration 258 +__unnamed_task__/MaxReturn -40.0443 +__unnamed_task__/MinReturn -68.8626 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.88704 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 07:26:34 | [maml_trainer] epoch #259 | Sampling for adapation and meta-testing... +2025-04-02 07:30:38 | [maml_trainer] epoch #259 | Finished meta-testing... +2025-04-02 07:30:38 | [maml_trainer] epoch #259 | Saving snapshot... +2025-04-02 07:30:59 | [maml_trainer] epoch #259 | Saved +2025-04-02 07:30:59 | [maml_trainer] epoch #259 | Time 321760.38 s +2025-04-02 07:30:59 | [maml_trainer] epoch #259 | EpochTime 1253.36 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -25.9736 +Average/AverageReturn -48.3783 +Average/Iteration 259 +Average/MaxReturn -38.5471 +Average/MinReturn -64.2364 +Average/NumEpisodes 80 +Average/StdReturn 5.48258 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90661 +GaussianMLPPolicy/KLAfter 0.000586095 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.76547e-05 +GaussianMLPPolicy/LossBefore -1.84774e-09 +GaussianMLPPolicy/dLoss -8.76565e-05 +Iteration 259 +MetaTest/Average/AverageDiscountedReturn -48.8292 +MetaTest/Average/AverageReturn -48.8292 +MetaTest/Average/Iteration 259 +MetaTest/Average/MaxReturn -44.1039 +MetaTest/Average/MinReturn -59.0082 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.15143 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.8292 +MetaTest/__unnamed_task__/AverageReturn -48.8292 +MetaTest/__unnamed_task__/Iteration 259 +MetaTest/__unnamed_task__/MaxReturn -44.1039 +MetaTest/__unnamed_task__/MinReturn -59.0082 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.15143 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.32e+06 +__unnamed_task__/AverageDiscountedReturn -25.9736 +__unnamed_task__/AverageReturn -48.3783 +__unnamed_task__/Iteration 259 +__unnamed_task__/MaxReturn -38.5471 +__unnamed_task__/MinReturn -64.2364 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.48258 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 07:47:23 | [maml_trainer] epoch #260 | Sampling for adapation and meta-testing... +2025-04-02 07:51:30 | [maml_trainer] epoch #260 | Finished meta-testing... +2025-04-02 07:51:30 | [maml_trainer] epoch #260 | Saving snapshot... +2025-04-02 07:51:51 | [maml_trainer] epoch #260 | Saved +2025-04-02 07:51:51 | [maml_trainer] epoch #260 | Time 323012.41 s +2025-04-02 07:51:51 | [maml_trainer] epoch #260 | EpochTime 1252.03 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.1173 +Average/AverageReturn -48.3479 +Average/Iteration 260 +Average/MaxReturn -39.702 +Average/MinReturn -65.1445 +Average/NumEpisodes 80 +Average/StdReturn 5.50774 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90636 +GaussianMLPPolicy/KLAfter 0.00088628 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000104469 +GaussianMLPPolicy/LossBefore -9.59635e-09 +GaussianMLPPolicy/dLoss 0.000104459 +Iteration 260 +MetaTest/Average/AverageDiscountedReturn -49.4947 +MetaTest/Average/AverageReturn -49.4947 +MetaTest/Average/Iteration 260 +MetaTest/Average/MaxReturn -43.631 +MetaTest/Average/MinReturn -64.0202 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.8931 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.4947 +MetaTest/__unnamed_task__/AverageReturn -49.4947 +MetaTest/__unnamed_task__/Iteration 260 +MetaTest/__unnamed_task__/MaxReturn -43.631 +MetaTest/__unnamed_task__/MinReturn -64.0202 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.8931 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.352e+06 +__unnamed_task__/AverageDiscountedReturn -26.1173 +__unnamed_task__/AverageReturn -48.3479 +__unnamed_task__/Iteration 260 +__unnamed_task__/MaxReturn -39.702 +__unnamed_task__/MinReturn -65.1445 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.50774 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 08:08:27 | [maml_trainer] epoch #261 | Sampling for adapation and meta-testing... +2025-04-02 08:12:28 | [maml_trainer] epoch #261 | Finished meta-testing... +2025-04-02 08:12:28 | [maml_trainer] epoch #261 | Saving snapshot... +2025-04-02 08:12:50 | [maml_trainer] epoch #261 | Saved +2025-04-02 08:12:50 | [maml_trainer] epoch #261 | Time 324271.51 s +2025-04-02 08:12:50 | [maml_trainer] epoch #261 | EpochTime 1259.09 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -27.2639 +Average/AverageReturn -52.363 +Average/Iteration 261 +Average/MaxReturn -40.446 +Average/MinReturn -143.219 +Average/NumEpisodes 80 +Average/StdReturn 14.7277 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90694 +GaussianMLPPolicy/KLAfter 0.000808422 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.01348e-05 +GaussianMLPPolicy/LossBefore 1.54972e-09 +GaussianMLPPolicy/dLoss 1.01363e-05 +Iteration 261 +MetaTest/Average/AverageDiscountedReturn -50.5273 +MetaTest/Average/AverageReturn -50.5273 +MetaTest/Average/Iteration 261 +MetaTest/Average/MaxReturn -42.2506 +MetaTest/Average/MinReturn -59.3995 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.08023 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.5273 +MetaTest/__unnamed_task__/AverageReturn -50.5273 +MetaTest/__unnamed_task__/Iteration 261 +MetaTest/__unnamed_task__/MaxReturn -42.2506 +MetaTest/__unnamed_task__/MinReturn -59.3995 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.08023 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.384e+06 +__unnamed_task__/AverageDiscountedReturn -27.2639 +__unnamed_task__/AverageReturn -52.363 +__unnamed_task__/Iteration 261 +__unnamed_task__/MaxReturn -40.446 +__unnamed_task__/MinReturn -143.219 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.7277 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 08:29:26 | [maml_trainer] epoch #262 | Sampling for adapation and meta-testing... +2025-04-02 08:33:32 | [maml_trainer] epoch #262 | Finished meta-testing... +2025-04-02 08:33:32 | [maml_trainer] epoch #262 | Saving snapshot... +2025-04-02 08:33:53 | [maml_trainer] epoch #262 | Saved +2025-04-02 08:33:53 | [maml_trainer] epoch #262 | Time 325535.00 s +2025-04-02 08:33:53 | [maml_trainer] epoch #262 | EpochTime 1263.49 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.532 +Average/AverageReturn -49.7735 +Average/Iteration 262 +Average/MaxReturn -39.5732 +Average/MinReturn -65.9936 +Average/NumEpisodes 80 +Average/StdReturn 5.65148 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90693 +GaussianMLPPolicy/KLAfter 0.00221917 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000122077 +GaussianMLPPolicy/LossBefore 3.99351e-09 +GaussianMLPPolicy/dLoss 0.000122081 +Iteration 262 +MetaTest/Average/AverageDiscountedReturn -50.4775 +MetaTest/Average/AverageReturn -50.4775 +MetaTest/Average/Iteration 262 +MetaTest/Average/MaxReturn -42.4098 +MetaTest/Average/MinReturn -73.0194 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.39867 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.4775 +MetaTest/__unnamed_task__/AverageReturn -50.4775 +MetaTest/__unnamed_task__/Iteration 262 +MetaTest/__unnamed_task__/MaxReturn -42.4098 +MetaTest/__unnamed_task__/MinReturn -73.0194 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.39867 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.416e+06 +__unnamed_task__/AverageDiscountedReturn -26.532 +__unnamed_task__/AverageReturn -49.7735 +__unnamed_task__/Iteration 262 +__unnamed_task__/MaxReturn -39.5732 +__unnamed_task__/MinReturn -65.9936 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.65148 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 08:50:16 | [maml_trainer] epoch #263 | Sampling for adapation and meta-testing... +2025-04-02 08:54:22 | [maml_trainer] epoch #263 | Finished meta-testing... +2025-04-02 08:54:22 | [maml_trainer] epoch #263 | Saving snapshot... +2025-04-02 08:54:43 | [maml_trainer] epoch #263 | Saved +2025-04-02 08:54:43 | [maml_trainer] epoch #263 | Time 326784.74 s +2025-04-02 08:54:43 | [maml_trainer] epoch #263 | EpochTime 1249.74 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.3425 +Average/AverageReturn -49.7014 +Average/Iteration 263 +Average/MaxReturn -40.5699 +Average/MinReturn -129.763 +Average/NumEpisodes 80 +Average/StdReturn 10.3858 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90642 +GaussianMLPPolicy/KLAfter 0.000684454 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.67341e-06 +GaussianMLPPolicy/LossBefore -1.08778e-08 +GaussianMLPPolicy/dLoss 5.66253e-06 +Iteration 263 +MetaTest/Average/AverageDiscountedReturn -54.2138 +MetaTest/Average/AverageReturn -54.2138 +MetaTest/Average/Iteration 263 +MetaTest/Average/MaxReturn -42.1517 +MetaTest/Average/MinReturn -115.221 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.7517 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.2138 +MetaTest/__unnamed_task__/AverageReturn -54.2138 +MetaTest/__unnamed_task__/Iteration 263 +MetaTest/__unnamed_task__/MaxReturn -42.1517 +MetaTest/__unnamed_task__/MinReturn -115.221 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.7517 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.448e+06 +__unnamed_task__/AverageDiscountedReturn -26.3425 +__unnamed_task__/AverageReturn -49.7014 +__unnamed_task__/Iteration 263 +__unnamed_task__/MaxReturn -40.5699 +__unnamed_task__/MinReturn -129.763 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.3858 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 09:11:24 | [maml_trainer] epoch #264 | Sampling for adapation and meta-testing... +2025-04-02 09:15:24 | [maml_trainer] epoch #264 | Finished meta-testing... +2025-04-02 09:15:24 | [maml_trainer] epoch #264 | Saving snapshot... +2025-04-02 09:15:45 | [maml_trainer] epoch #264 | Saved +2025-04-02 09:15:45 | [maml_trainer] epoch #264 | Time 328046.72 s +2025-04-02 09:15:45 | [maml_trainer] epoch #264 | EpochTime 1261.97 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.1862 +Average/AverageReturn -48.6115 +Average/Iteration 264 +Average/MaxReturn -40.4601 +Average/MinReturn -63.3609 +Average/NumEpisodes 80 +Average/StdReturn 4.68423 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90683 +GaussianMLPPolicy/KLAfter 0.00115954 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.84788e-05 +GaussianMLPPolicy/LossBefore -1.0848e-08 +GaussianMLPPolicy/dLoss -4.84896e-05 +Iteration 264 +MetaTest/Average/AverageDiscountedReturn -48.433 +MetaTest/Average/AverageReturn -48.433 +MetaTest/Average/Iteration 264 +MetaTest/Average/MaxReturn -41.3998 +MetaTest/Average/MinReturn -62.1513 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.29993 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.433 +MetaTest/__unnamed_task__/AverageReturn -48.433 +MetaTest/__unnamed_task__/Iteration 264 +MetaTest/__unnamed_task__/MaxReturn -41.3998 +MetaTest/__unnamed_task__/MinReturn -62.1513 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.29993 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.48e+06 +__unnamed_task__/AverageDiscountedReturn -26.1862 +__unnamed_task__/AverageReturn -48.6115 +__unnamed_task__/Iteration 264 +__unnamed_task__/MaxReturn -40.4601 +__unnamed_task__/MinReturn -63.3609 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.68423 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 09:32:26 | [maml_trainer] epoch #265 | Sampling for adapation and meta-testing... +2025-04-02 09:36:34 | [maml_trainer] epoch #265 | Finished meta-testing... +2025-04-02 09:36:34 | [maml_trainer] epoch #265 | Saving snapshot... +2025-04-02 09:36:56 | [maml_trainer] epoch #265 | Saved +2025-04-02 09:36:56 | [maml_trainer] epoch #265 | Time 329317.28 s +2025-04-02 09:36:56 | [maml_trainer] epoch #265 | EpochTime 1270.56 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.1195 +Average/AverageReturn -48.5729 +Average/Iteration 265 +Average/MaxReturn -38.8465 +Average/MinReturn -71.2622 +Average/NumEpisodes 80 +Average/StdReturn 5.12778 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90571 +GaussianMLPPolicy/KLAfter 0.00215932 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.38367e-05 +GaussianMLPPolicy/LossBefore 5.126e-09 +GaussianMLPPolicy/dLoss 2.38419e-05 +Iteration 265 +MetaTest/Average/AverageDiscountedReturn -48.762 +MetaTest/Average/AverageReturn -48.762 +MetaTest/Average/Iteration 265 +MetaTest/Average/MaxReturn -41.5341 +MetaTest/Average/MinReturn -56.3969 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.56325 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.762 +MetaTest/__unnamed_task__/AverageReturn -48.762 +MetaTest/__unnamed_task__/Iteration 265 +MetaTest/__unnamed_task__/MaxReturn -41.5341 +MetaTest/__unnamed_task__/MinReturn -56.3969 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.56325 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.512e+06 +__unnamed_task__/AverageDiscountedReturn -26.1195 +__unnamed_task__/AverageReturn -48.5729 +__unnamed_task__/Iteration 265 +__unnamed_task__/MaxReturn -38.8465 +__unnamed_task__/MinReturn -71.2622 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.12778 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 09:53:22 | [maml_trainer] epoch #266 | Sampling for adapation and meta-testing... +2025-04-02 09:57:25 | [maml_trainer] epoch #266 | Finished meta-testing... +2025-04-02 09:57:25 | [maml_trainer] epoch #266 | Saving snapshot... +2025-04-02 09:57:47 | [maml_trainer] epoch #266 | Saved +2025-04-02 09:57:47 | [maml_trainer] epoch #266 | Time 330568.34 s +2025-04-02 09:57:47 | [maml_trainer] epoch #266 | EpochTime 1251.06 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.7617 +Average/AverageReturn -50.7105 +Average/Iteration 266 +Average/MaxReturn -41.486 +Average/MinReturn -122.793 +Average/NumEpisodes 80 +Average/StdReturn 9.98713 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90437 +GaussianMLPPolicy/KLAfter 0.0026121 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.85165e-05 +GaussianMLPPolicy/LossBefore -1.49012e-08 +GaussianMLPPolicy/dLoss -6.85314e-05 +Iteration 266 +MetaTest/Average/AverageDiscountedReturn -48.6478 +MetaTest/Average/AverageReturn -48.6478 +MetaTest/Average/Iteration 266 +MetaTest/Average/MaxReturn -41.4419 +MetaTest/Average/MinReturn -62.8852 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.60724 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.6478 +MetaTest/__unnamed_task__/AverageReturn -48.6478 +MetaTest/__unnamed_task__/Iteration 266 +MetaTest/__unnamed_task__/MaxReturn -41.4419 +MetaTest/__unnamed_task__/MinReturn -62.8852 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.60724 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.544e+06 +__unnamed_task__/AverageDiscountedReturn -26.7617 +__unnamed_task__/AverageReturn -50.7105 +__unnamed_task__/Iteration 266 +__unnamed_task__/MaxReturn -41.486 +__unnamed_task__/MinReturn -122.793 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.98713 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 10:14:06 | [maml_trainer] epoch #267 | Sampling for adapation and meta-testing... +2025-04-02 10:18:17 | [maml_trainer] epoch #267 | Finished meta-testing... +2025-04-02 10:18:17 | [maml_trainer] epoch #267 | Saving snapshot... +2025-04-02 10:18:39 | [maml_trainer] epoch #267 | Saved +2025-04-02 10:18:39 | [maml_trainer] epoch #267 | Time 331820.38 s +2025-04-02 10:18:39 | [maml_trainer] epoch #267 | EpochTime 1252.03 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -25.9843 +Average/AverageReturn -48.4083 +Average/Iteration 267 +Average/MaxReturn -40.3788 +Average/MinReturn -64.7928 +Average/NumEpisodes 80 +Average/StdReturn 5.53165 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90245 +GaussianMLPPolicy/KLAfter 0.00287146 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.10727e-05 +GaussianMLPPolicy/LossBefore 7.689e-09 +GaussianMLPPolicy/dLoss -1.1065e-05 +Iteration 267 +MetaTest/Average/AverageDiscountedReturn -48.165 +MetaTest/Average/AverageReturn -48.165 +MetaTest/Average/Iteration 267 +MetaTest/Average/MaxReturn -40.0171 +MetaTest/Average/MinReturn -60.985 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.26471 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.165 +MetaTest/__unnamed_task__/AverageReturn -48.165 +MetaTest/__unnamed_task__/Iteration 267 +MetaTest/__unnamed_task__/MaxReturn -40.0171 +MetaTest/__unnamed_task__/MinReturn -60.985 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.26471 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.576e+06 +__unnamed_task__/AverageDiscountedReturn -25.9843 +__unnamed_task__/AverageReturn -48.4083 +__unnamed_task__/Iteration 267 +__unnamed_task__/MaxReturn -40.3788 +__unnamed_task__/MinReturn -64.7928 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.53165 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 10:35:44 | [maml_trainer] epoch #268 | Sampling for adapation and meta-testing... +2025-04-02 10:40:07 | [maml_trainer] epoch #268 | Finished meta-testing... +2025-04-02 10:40:07 | [maml_trainer] epoch #268 | Saving snapshot... +2025-04-02 10:40:29 | [maml_trainer] epoch #268 | Saved +2025-04-02 10:40:29 | [maml_trainer] epoch #268 | Time 333130.65 s +2025-04-02 10:40:29 | [maml_trainer] epoch #268 | EpochTime 1310.27 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.4264 +Average/AverageReturn -49.969 +Average/Iteration 268 +Average/MaxReturn -41.0933 +Average/MinReturn -116.936 +Average/NumEpisodes 80 +Average/StdReturn 9.10434 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9016 +GaussianMLPPolicy/KLAfter 0.00305371 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.35587e-05 +GaussianMLPPolicy/LossBefore 1.68383e-08 +GaussianMLPPolicy/dLoss 7.35756e-05 +Iteration 268 +MetaTest/Average/AverageDiscountedReturn -47.7347 +MetaTest/Average/AverageReturn -47.7347 +MetaTest/Average/Iteration 268 +MetaTest/Average/MaxReturn -40.6219 +MetaTest/Average/MinReturn -59.2945 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.14921 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.7347 +MetaTest/__unnamed_task__/AverageReturn -47.7347 +MetaTest/__unnamed_task__/Iteration 268 +MetaTest/__unnamed_task__/MaxReturn -40.6219 +MetaTest/__unnamed_task__/MinReturn -59.2945 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.14921 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.608e+06 +__unnamed_task__/AverageDiscountedReturn -26.4264 +__unnamed_task__/AverageReturn -49.969 +__unnamed_task__/Iteration 268 +__unnamed_task__/MaxReturn -41.0933 +__unnamed_task__/MinReturn -116.936 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.10434 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 10:57:48 | [maml_trainer] epoch #269 | Sampling for adapation and meta-testing... +2025-04-02 11:02:08 | [maml_trainer] epoch #269 | Finished meta-testing... +2025-04-02 11:02:08 | [maml_trainer] epoch #269 | Saving snapshot... +2025-04-02 11:02:31 | [maml_trainer] epoch #269 | Saved +2025-04-02 11:02:31 | [maml_trainer] epoch #269 | Time 334452.44 s +2025-04-02 11:02:31 | [maml_trainer] epoch #269 | EpochTime 1321.79 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.4716 +Average/AverageReturn -49.3 +Average/Iteration 269 +Average/MaxReturn -38.8952 +Average/MinReturn -64.407 +Average/NumEpisodes 80 +Average/StdReturn 5.13584 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90072 +GaussianMLPPolicy/KLAfter 0.00496245 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000102776 +GaussianMLPPolicy/LossBefore 8.58307e-09 +GaussianMLPPolicy/dLoss 0.000102784 +Iteration 269 +MetaTest/Average/AverageDiscountedReturn -51.8882 +MetaTest/Average/AverageReturn -51.8882 +MetaTest/Average/Iteration 269 +MetaTest/Average/MaxReturn -40.712 +MetaTest/Average/MinReturn -125.946 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.5782 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.8882 +MetaTest/__unnamed_task__/AverageReturn -51.8882 +MetaTest/__unnamed_task__/Iteration 269 +MetaTest/__unnamed_task__/MaxReturn -40.712 +MetaTest/__unnamed_task__/MinReturn -125.946 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.5782 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.64e+06 +__unnamed_task__/AverageDiscountedReturn -26.4716 +__unnamed_task__/AverageReturn -49.3 +__unnamed_task__/Iteration 269 +__unnamed_task__/MaxReturn -38.8952 +__unnamed_task__/MinReturn -64.407 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.13584 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 11:19:50 | [maml_trainer] epoch #270 | Sampling for adapation and meta-testing... +2025-04-02 11:24:09 | [maml_trainer] epoch #270 | Finished meta-testing... +2025-04-02 11:24:09 | [maml_trainer] epoch #270 | Saving snapshot... +2025-04-02 11:24:33 | [maml_trainer] epoch #270 | Saved +2025-04-02 11:24:33 | [maml_trainer] epoch #270 | Time 335774.37 s +2025-04-02 11:24:33 | [maml_trainer] epoch #270 | EpochTime 1321.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.0998 +Average/AverageReturn -48.4501 +Average/Iteration 270 +Average/MaxReturn -39.7187 +Average/MinReturn -62.2074 +Average/NumEpisodes 80 +Average/StdReturn 4.71038 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90057 +GaussianMLPPolicy/KLAfter 0.00530785 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.56852e-05 +GaussianMLPPolicy/LossBefore 2.09808e-08 +GaussianMLPPolicy/dLoss 6.57061e-05 +Iteration 270 +MetaTest/Average/AverageDiscountedReturn -51.9127 +MetaTest/Average/AverageReturn -51.9127 +MetaTest/Average/Iteration 270 +MetaTest/Average/MaxReturn -42.2752 +MetaTest/Average/MinReturn -112.401 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.2616 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.9127 +MetaTest/__unnamed_task__/AverageReturn -51.9127 +MetaTest/__unnamed_task__/Iteration 270 +MetaTest/__unnamed_task__/MaxReturn -42.2752 +MetaTest/__unnamed_task__/MinReturn -112.401 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.2616 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.672e+06 +__unnamed_task__/AverageDiscountedReturn -26.0998 +__unnamed_task__/AverageReturn -48.4501 +__unnamed_task__/Iteration 270 +__unnamed_task__/MaxReturn -39.7187 +__unnamed_task__/MinReturn -62.2074 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.71038 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 11:42:00 | [maml_trainer] epoch #271 | Sampling for adapation and meta-testing... +2025-04-02 11:46:21 | [maml_trainer] epoch #271 | Finished meta-testing... +2025-04-02 11:46:21 | [maml_trainer] epoch #271 | Saving snapshot... +2025-04-02 11:46:45 | [maml_trainer] epoch #271 | Saved +2025-04-02 11:46:45 | [maml_trainer] epoch #271 | Time 337106.34 s +2025-04-02 11:46:45 | [maml_trainer] epoch #271 | EpochTime 1331.97 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.3342 +Average/AverageReturn -48.9336 +Average/Iteration 271 +Average/MaxReturn -39.6383 +Average/MinReturn -69.0627 +Average/NumEpisodes 80 +Average/StdReturn 5.81737 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90072 +GaussianMLPPolicy/KLAfter 0.00346048 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.57243e-05 +GaussianMLPPolicy/LossBefore -6.73533e-09 +GaussianMLPPolicy/dLoss -1.5731e-05 +Iteration 271 +MetaTest/Average/AverageDiscountedReturn -52.7635 +MetaTest/Average/AverageReturn -52.7635 +MetaTest/Average/Iteration 271 +MetaTest/Average/MaxReturn -40.8771 +MetaTest/Average/MinReturn -133.64 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.0977 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.7635 +MetaTest/__unnamed_task__/AverageReturn -52.7635 +MetaTest/__unnamed_task__/Iteration 271 +MetaTest/__unnamed_task__/MaxReturn -40.8771 +MetaTest/__unnamed_task__/MinReturn -133.64 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.0977 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.704e+06 +__unnamed_task__/AverageDiscountedReturn -26.3342 +__unnamed_task__/AverageReturn -48.9336 +__unnamed_task__/Iteration 271 +__unnamed_task__/MaxReturn -39.6383 +__unnamed_task__/MinReturn -69.0627 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.81737 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 12:03:54 | [maml_trainer] epoch #272 | Sampling for adapation and meta-testing... +2025-04-02 12:08:20 | [maml_trainer] epoch #272 | Finished meta-testing... +2025-04-02 12:08:20 | [maml_trainer] epoch #272 | Saving snapshot... +2025-04-02 12:08:43 | [maml_trainer] epoch #272 | Saved +2025-04-02 12:08:43 | [maml_trainer] epoch #272 | Time 338424.12 s +2025-04-02 12:08:43 | [maml_trainer] epoch #272 | EpochTime 1317.77 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.4301 +Average/AverageReturn -49.6274 +Average/Iteration 272 +Average/MaxReturn -39.0874 +Average/MinReturn -72.6432 +Average/NumEpisodes 80 +Average/StdReturn 5.54994 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90099 +GaussianMLPPolicy/KLAfter 0.00384465 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.91239e-05 +GaussianMLPPolicy/LossBefore 1.43051e-09 +GaussianMLPPolicy/dLoss -5.91225e-05 +Iteration 272 +MetaTest/Average/AverageDiscountedReturn -49.3503 +MetaTest/Average/AverageReturn -49.3503 +MetaTest/Average/Iteration 272 +MetaTest/Average/MaxReturn -41.9412 +MetaTest/Average/MinReturn -66.476 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.93576 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.3503 +MetaTest/__unnamed_task__/AverageReturn -49.3503 +MetaTest/__unnamed_task__/Iteration 272 +MetaTest/__unnamed_task__/MaxReturn -41.9412 +MetaTest/__unnamed_task__/MinReturn -66.476 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.93576 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.736e+06 +__unnamed_task__/AverageDiscountedReturn -26.4301 +__unnamed_task__/AverageReturn -49.6274 +__unnamed_task__/Iteration 272 +__unnamed_task__/MaxReturn -39.0874 +__unnamed_task__/MinReturn -72.6432 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.54994 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 12:25:22 | [maml_trainer] epoch #273 | Sampling for adapation and meta-testing... +2025-04-02 12:29:29 | [maml_trainer] epoch #273 | Finished meta-testing... +2025-04-02 12:29:29 | [maml_trainer] epoch #273 | Saving snapshot... +2025-04-02 12:29:49 | [maml_trainer] epoch #273 | Saved +2025-04-02 12:29:49 | [maml_trainer] epoch #273 | Time 339690.92 s +2025-04-02 12:29:49 | [maml_trainer] epoch #273 | EpochTime 1266.80 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.4897 +Average/AverageReturn -49.8064 +Average/Iteration 273 +Average/MaxReturn -42.1777 +Average/MinReturn -111.917 +Average/NumEpisodes 80 +Average/StdReturn 8.20475 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90212 +GaussianMLPPolicy/KLAfter 0.00243315 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.6491e-06 +GaussianMLPPolicy/LossBefore 3.99351e-09 +GaussianMLPPolicy/dLoss 7.65309e-06 +Iteration 273 +MetaTest/Average/AverageDiscountedReturn -47.9319 +MetaTest/Average/AverageReturn -47.9319 +MetaTest/Average/Iteration 273 +MetaTest/Average/MaxReturn -38.1539 +MetaTest/Average/MinReturn -54.0466 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.63891 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.9319 +MetaTest/__unnamed_task__/AverageReturn -47.9319 +MetaTest/__unnamed_task__/Iteration 273 +MetaTest/__unnamed_task__/MaxReturn -38.1539 +MetaTest/__unnamed_task__/MinReturn -54.0466 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.63891 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.768e+06 +__unnamed_task__/AverageDiscountedReturn -26.4897 +__unnamed_task__/AverageReturn -49.8064 +__unnamed_task__/Iteration 273 +__unnamed_task__/MaxReturn -42.1777 +__unnamed_task__/MinReturn -111.917 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.20475 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 12:46:15 | [maml_trainer] epoch #274 | Sampling for adapation and meta-testing... +2025-04-02 12:50:20 | [maml_trainer] epoch #274 | Finished meta-testing... +2025-04-02 12:50:20 | [maml_trainer] epoch #274 | Saving snapshot... +2025-04-02 12:50:41 | [maml_trainer] epoch #274 | Saved +2025-04-02 12:50:41 | [maml_trainer] epoch #274 | Time 340942.97 s +2025-04-02 12:50:41 | [maml_trainer] epoch #274 | EpochTime 1252.04 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.3836 +Average/AverageReturn -49.3289 +Average/Iteration 274 +Average/MaxReturn -40.8925 +Average/MinReturn -62.3649 +Average/NumEpisodes 80 +Average/StdReturn 4.50485 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90347 +GaussianMLPPolicy/KLAfter 0.00262916 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.81505e-05 +GaussianMLPPolicy/LossBefore -2.2769e-08 +GaussianMLPPolicy/dLoss -3.81733e-05 +Iteration 274 +MetaTest/Average/AverageDiscountedReturn -49.7128 +MetaTest/Average/AverageReturn -49.7128 +MetaTest/Average/Iteration 274 +MetaTest/Average/MaxReturn -43.259 +MetaTest/Average/MinReturn -57.652 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.32232 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.7128 +MetaTest/__unnamed_task__/AverageReturn -49.7128 +MetaTest/__unnamed_task__/Iteration 274 +MetaTest/__unnamed_task__/MaxReturn -43.259 +MetaTest/__unnamed_task__/MinReturn -57.652 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.32232 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.8e+06 +__unnamed_task__/AverageDiscountedReturn -26.3836 +__unnamed_task__/AverageReturn -49.3289 +__unnamed_task__/Iteration 274 +__unnamed_task__/MaxReturn -40.8925 +__unnamed_task__/MinReturn -62.3649 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.50485 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 13:06:53 | [maml_trainer] epoch #275 | Sampling for adapation and meta-testing... +2025-04-02 13:11:07 | [maml_trainer] epoch #275 | Finished meta-testing... +2025-04-02 13:11:07 | [maml_trainer] epoch #275 | Saving snapshot... +2025-04-02 13:11:30 | [maml_trainer] epoch #275 | Saved +2025-04-02 13:11:30 | [maml_trainer] epoch #275 | Time 342191.53 s +2025-04-02 13:11:30 | [maml_trainer] epoch #275 | EpochTime 1248.55 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.5062 +Average/AverageReturn -49.5379 +Average/Iteration 275 +Average/MaxReturn -40.5538 +Average/MinReturn -61.2091 +Average/NumEpisodes 80 +Average/StdReturn 4.52083 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90357 +GaussianMLPPolicy/KLAfter 0.00479817 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.34368e-05 +GaussianMLPPolicy/LossBefore -8.70228e-09 +GaussianMLPPolicy/dLoss -7.34455e-05 +Iteration 275 +MetaTest/Average/AverageDiscountedReturn -49.5208 +MetaTest/Average/AverageReturn -49.5208 +MetaTest/Average/Iteration 275 +MetaTest/Average/MaxReturn -41.8437 +MetaTest/Average/MinReturn -66.9236 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.35792 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.5208 +MetaTest/__unnamed_task__/AverageReturn -49.5208 +MetaTest/__unnamed_task__/Iteration 275 +MetaTest/__unnamed_task__/MaxReturn -41.8437 +MetaTest/__unnamed_task__/MinReturn -66.9236 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.35792 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.832e+06 +__unnamed_task__/AverageDiscountedReturn -26.5062 +__unnamed_task__/AverageReturn -49.5379 +__unnamed_task__/Iteration 275 +__unnamed_task__/MaxReturn -40.5538 +__unnamed_task__/MinReturn -61.2091 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.52083 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 13:28:40 | [maml_trainer] epoch #276 | Sampling for adapation and meta-testing... +2025-04-02 13:32:58 | [maml_trainer] epoch #276 | Finished meta-testing... +2025-04-02 13:32:58 | [maml_trainer] epoch #276 | Saving snapshot... +2025-04-02 13:33:22 | [maml_trainer] epoch #276 | Saved +2025-04-02 13:33:22 | [maml_trainer] epoch #276 | Time 343503.21 s +2025-04-02 13:33:22 | [maml_trainer] epoch #276 | EpochTime 1311.67 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.2308 +Average/AverageReturn -48.7297 +Average/Iteration 276 +Average/MaxReturn -40.6259 +Average/MinReturn -63.9514 +Average/NumEpisodes 80 +Average/StdReturn 5.1861 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90388 +GaussianMLPPolicy/KLAfter 0.00482589 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000174431 +GaussianMLPPolicy/LossBefore 6.07967e-09 +GaussianMLPPolicy/dLoss -0.000174425 +Iteration 276 +MetaTest/Average/AverageDiscountedReturn -50.8891 +MetaTest/Average/AverageReturn -50.8891 +MetaTest/Average/Iteration 276 +MetaTest/Average/MaxReturn -42.2249 +MetaTest/Average/MinReturn -64.4055 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.21143 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.8891 +MetaTest/__unnamed_task__/AverageReturn -50.8891 +MetaTest/__unnamed_task__/Iteration 276 +MetaTest/__unnamed_task__/MaxReturn -42.2249 +MetaTest/__unnamed_task__/MinReturn -64.4055 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.21143 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.864e+06 +__unnamed_task__/AverageDiscountedReturn -26.2308 +__unnamed_task__/AverageReturn -48.7297 +__unnamed_task__/Iteration 276 +__unnamed_task__/MaxReturn -40.6259 +__unnamed_task__/MinReturn -63.9514 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.1861 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 13:50:44 | [maml_trainer] epoch #277 | Sampling for adapation and meta-testing... +2025-04-02 13:55:02 | [maml_trainer] epoch #277 | Finished meta-testing... +2025-04-02 13:55:02 | [maml_trainer] epoch #277 | Saving snapshot... +2025-04-02 13:55:24 | [maml_trainer] epoch #277 | Saved +2025-04-02 13:55:24 | [maml_trainer] epoch #277 | Time 344825.26 s +2025-04-02 13:55:24 | [maml_trainer] epoch #277 | EpochTime 1322.05 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.1895 +Average/AverageReturn -48.5739 +Average/Iteration 277 +Average/MaxReturn -41.0699 +Average/MinReturn -62.6201 +Average/NumEpisodes 80 +Average/StdReturn 4.34476 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90488 +GaussianMLPPolicy/KLAfter 0.00202145 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.16199e-05 +GaussianMLPPolicy/LossBefore -5.30481e-09 +GaussianMLPPolicy/dLoss 3.16146e-05 +Iteration 277 +MetaTest/Average/AverageDiscountedReturn -48.4783 +MetaTest/Average/AverageReturn -48.4783 +MetaTest/Average/Iteration 277 +MetaTest/Average/MaxReturn -40.8097 +MetaTest/Average/MinReturn -56.8619 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.37991 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.4783 +MetaTest/__unnamed_task__/AverageReturn -48.4783 +MetaTest/__unnamed_task__/Iteration 277 +MetaTest/__unnamed_task__/MaxReturn -40.8097 +MetaTest/__unnamed_task__/MinReturn -56.8619 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.37991 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.896e+06 +__unnamed_task__/AverageDiscountedReturn -26.1895 +__unnamed_task__/AverageReturn -48.5739 +__unnamed_task__/Iteration 277 +__unnamed_task__/MaxReturn -41.0699 +__unnamed_task__/MinReturn -62.6201 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.34476 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 14:12:46 | [maml_trainer] epoch #278 | Sampling for adapation and meta-testing... +2025-04-02 14:17:07 | [maml_trainer] epoch #278 | Finished meta-testing... +2025-04-02 14:17:07 | [maml_trainer] epoch #278 | Saving snapshot... +2025-04-02 14:17:30 | [maml_trainer] epoch #278 | Saved +2025-04-02 14:17:30 | [maml_trainer] epoch #278 | Time 346151.96 s +2025-04-02 14:17:30 | [maml_trainer] epoch #278 | EpochTime 1326.70 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.7733 +Average/AverageReturn -49.9802 +Average/Iteration 278 +Average/MaxReturn -41.0076 +Average/MinReturn -70.6885 +Average/NumEpisodes 80 +Average/StdReturn 5.39043 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90508 +GaussianMLPPolicy/KLAfter 0.00193747 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.15001e-05 +GaussianMLPPolicy/LossBefore -9.23872e-09 +GaussianMLPPolicy/dLoss -7.15093e-05 +Iteration 278 +MetaTest/Average/AverageDiscountedReturn -50.8262 +MetaTest/Average/AverageReturn -50.8262 +MetaTest/Average/Iteration 278 +MetaTest/Average/MaxReturn -41.0376 +MetaTest/Average/MinReturn -62.016 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.64102 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.8262 +MetaTest/__unnamed_task__/AverageReturn -50.8262 +MetaTest/__unnamed_task__/Iteration 278 +MetaTest/__unnamed_task__/MaxReturn -41.0376 +MetaTest/__unnamed_task__/MinReturn -62.016 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.64102 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.928e+06 +__unnamed_task__/AverageDiscountedReturn -26.7733 +__unnamed_task__/AverageReturn -49.9802 +__unnamed_task__/Iteration 278 +__unnamed_task__/MaxReturn -41.0076 +__unnamed_task__/MinReturn -70.6885 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.39043 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 14:34:50 | [maml_trainer] epoch #279 | Sampling for adapation and meta-testing... +2025-04-02 14:39:09 | [maml_trainer] epoch #279 | Finished meta-testing... +2025-04-02 14:39:09 | [maml_trainer] epoch #279 | Saving snapshot... +2025-04-02 14:39:33 | [maml_trainer] epoch #279 | Saved +2025-04-02 14:39:33 | [maml_trainer] epoch #279 | Time 347474.23 s +2025-04-02 14:39:33 | [maml_trainer] epoch #279 | EpochTime 1322.26 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.4356 +Average/AverageReturn -49.5131 +Average/Iteration 279 +Average/MaxReturn -40.4128 +Average/MinReturn -69.4664 +Average/NumEpisodes 80 +Average/StdReturn 5.48627 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90595 +GaussianMLPPolicy/KLAfter 0.00234529 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.64441e-06 +GaussianMLPPolicy/LossBefore 4.11272e-09 +GaussianMLPPolicy/dLoss 7.64852e-06 +Iteration 279 +MetaTest/Average/AverageDiscountedReturn -48.8925 +MetaTest/Average/AverageReturn -48.8925 +MetaTest/Average/Iteration 279 +MetaTest/Average/MaxReturn -42.8482 +MetaTest/Average/MinReturn -64.678 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.51739 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.8925 +MetaTest/__unnamed_task__/AverageReturn -48.8925 +MetaTest/__unnamed_task__/Iteration 279 +MetaTest/__unnamed_task__/MaxReturn -42.8482 +MetaTest/__unnamed_task__/MinReturn -64.678 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.51739 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.96e+06 +__unnamed_task__/AverageDiscountedReturn -26.4356 +__unnamed_task__/AverageReturn -49.5131 +__unnamed_task__/Iteration 279 +__unnamed_task__/MaxReturn -40.4128 +__unnamed_task__/MinReturn -69.4664 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.48627 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 14:56:51 | [maml_trainer] epoch #280 | Sampling for adapation and meta-testing... +2025-04-02 15:01:10 | [maml_trainer] epoch #280 | Finished meta-testing... +2025-04-02 15:01:10 | [maml_trainer] epoch #280 | Saving snapshot... +2025-04-02 15:01:33 | [maml_trainer] epoch #280 | Saved +2025-04-02 15:01:33 | [maml_trainer] epoch #280 | Time 348794.88 s +2025-04-02 15:01:33 | [maml_trainer] epoch #280 | EpochTime 1320.65 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.5169 +Average/AverageReturn -49.2464 +Average/Iteration 280 +Average/MaxReturn -39.8391 +Average/MinReturn -70.0249 +Average/NumEpisodes 80 +Average/StdReturn 5.91072 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90711 +GaussianMLPPolicy/KLAfter 0.00185878 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000152559 +GaussianMLPPolicy/LossBefore -9.17912e-09 +GaussianMLPPolicy/dLoss 0.00015255 +Iteration 280 +MetaTest/Average/AverageDiscountedReturn -50.8607 +MetaTest/Average/AverageReturn -50.8607 +MetaTest/Average/Iteration 280 +MetaTest/Average/MaxReturn -41.9185 +MetaTest/Average/MinReturn -72.7243 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.03265 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.8607 +MetaTest/__unnamed_task__/AverageReturn -50.8607 +MetaTest/__unnamed_task__/Iteration 280 +MetaTest/__unnamed_task__/MaxReturn -41.9185 +MetaTest/__unnamed_task__/MinReturn -72.7243 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.03265 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.992e+06 +__unnamed_task__/AverageDiscountedReturn -26.5169 +__unnamed_task__/AverageReturn -49.2464 +__unnamed_task__/Iteration 280 +__unnamed_task__/MaxReturn -39.8391 +__unnamed_task__/MinReturn -70.0249 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.91072 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 15:19:33 | [maml_trainer] epoch #281 | Sampling for adapation and meta-testing... +2025-04-02 15:24:06 | [maml_trainer] epoch #281 | Finished meta-testing... +2025-04-02 15:24:06 | [maml_trainer] epoch #281 | Saving snapshot... +2025-04-02 15:24:30 | [maml_trainer] epoch #281 | Saved +2025-04-02 15:24:30 | [maml_trainer] epoch #281 | Time 350171.80 s +2025-04-02 15:24:30 | [maml_trainer] epoch #281 | EpochTime 1376.92 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.2123 +Average/AverageReturn -48.8811 +Average/Iteration 281 +Average/MaxReturn -39.6329 +Average/MinReturn -66.8028 +Average/NumEpisodes 80 +Average/StdReturn 6.1411 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90807 +GaussianMLPPolicy/KLAfter 0.00149685 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000168929 +GaussianMLPPolicy/LossBefore -1.32918e-08 +GaussianMLPPolicy/dLoss 0.000168915 +Iteration 281 +MetaTest/Average/AverageDiscountedReturn -52.8898 +MetaTest/Average/AverageReturn -52.8898 +MetaTest/Average/Iteration 281 +MetaTest/Average/MaxReturn -42.6931 +MetaTest/Average/MinReturn -133.132 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.9595 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.8898 +MetaTest/__unnamed_task__/AverageReturn -52.8898 +MetaTest/__unnamed_task__/Iteration 281 +MetaTest/__unnamed_task__/MaxReturn -42.6931 +MetaTest/__unnamed_task__/MinReturn -133.132 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.9595 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.024e+06 +__unnamed_task__/AverageDiscountedReturn -26.2123 +__unnamed_task__/AverageReturn -48.8811 +__unnamed_task__/Iteration 281 +__unnamed_task__/MaxReturn -39.6329 +__unnamed_task__/MinReturn -66.8028 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.1411 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 15:42:38 | [maml_trainer] epoch #282 | Sampling for adapation and meta-testing... +2025-04-02 15:46:57 | [maml_trainer] epoch #282 | Finished meta-testing... +2025-04-02 15:46:57 | [maml_trainer] epoch #282 | Saving snapshot... +2025-04-02 15:47:19 | [maml_trainer] epoch #282 | Saved +2025-04-02 15:47:19 | [maml_trainer] epoch #282 | Time 351540.36 s +2025-04-02 15:47:19 | [maml_trainer] epoch #282 | EpochTime 1368.56 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.2305 +Average/AverageReturn -48.7054 +Average/Iteration 282 +Average/MaxReturn -40.7783 +Average/MinReturn -67.8972 +Average/NumEpisodes 80 +Average/StdReturn 5.23126 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90862 +GaussianMLPPolicy/KLAfter 0.00182642 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.23339e-05 +GaussianMLPPolicy/LossBefore -8.34465e-10 +GaussianMLPPolicy/dLoss -9.23347e-05 +Iteration 282 +MetaTest/Average/AverageDiscountedReturn -50.0439 +MetaTest/Average/AverageReturn -50.0439 +MetaTest/Average/Iteration 282 +MetaTest/Average/MaxReturn -39.3647 +MetaTest/Average/MinReturn -69.2753 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.15007 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.0439 +MetaTest/__unnamed_task__/AverageReturn -50.0439 +MetaTest/__unnamed_task__/Iteration 282 +MetaTest/__unnamed_task__/MaxReturn -39.3647 +MetaTest/__unnamed_task__/MinReturn -69.2753 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.15007 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.056e+06 +__unnamed_task__/AverageDiscountedReturn -26.2305 +__unnamed_task__/AverageReturn -48.7054 +__unnamed_task__/Iteration 282 +__unnamed_task__/MaxReturn -40.7783 +__unnamed_task__/MinReturn -67.8972 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.23126 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 16:05:12 | [maml_trainer] epoch #283 | Sampling for adapation and meta-testing... +2025-04-02 16:09:27 | [maml_trainer] epoch #283 | Finished meta-testing... +2025-04-02 16:09:27 | [maml_trainer] epoch #283 | Saving snapshot... +2025-04-02 16:09:51 | [maml_trainer] epoch #283 | Saved +2025-04-02 16:09:51 | [maml_trainer] epoch #283 | Time 352892.13 s +2025-04-02 16:09:51 | [maml_trainer] epoch #283 | EpochTime 1351.76 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -26.1196 +Average/AverageReturn -48.1672 +Average/Iteration 283 +Average/MaxReturn -38.4905 +Average/MinReturn -62.5557 +Average/NumEpisodes 80 +Average/StdReturn 4.31167 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91032 +GaussianMLPPolicy/KLAfter 0.00154456 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.59533e-05 +GaussianMLPPolicy/LossBefore 1.57952e-08 +GaussianMLPPolicy/dLoss -4.59375e-05 +Iteration 283 +MetaTest/Average/AverageDiscountedReturn -47.5351 +MetaTest/Average/AverageReturn -47.5351 +MetaTest/Average/Iteration 283 +MetaTest/Average/MaxReturn -42.1724 +MetaTest/Average/MinReturn -56.7521 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.43449 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.5351 +MetaTest/__unnamed_task__/AverageReturn -47.5351 +MetaTest/__unnamed_task__/Iteration 283 +MetaTest/__unnamed_task__/MaxReturn -42.1724 +MetaTest/__unnamed_task__/MinReturn -56.7521 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.43449 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.088e+06 +__unnamed_task__/AverageDiscountedReturn -26.1196 +__unnamed_task__/AverageReturn -48.1672 +__unnamed_task__/Iteration 283 +__unnamed_task__/MaxReturn -38.4905 +__unnamed_task__/MinReturn -62.5557 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.31167 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 16:27:29 | [maml_trainer] epoch #284 | Sampling for adapation and meta-testing... +2025-04-02 16:33:17 | [maml_trainer] epoch #284 | Finished meta-testing... +2025-04-02 16:33:17 | [maml_trainer] epoch #284 | Saving snapshot... +2025-04-02 16:33:41 | [maml_trainer] epoch #284 | Saved +2025-04-02 16:33:41 | [maml_trainer] epoch #284 | Time 354322.20 s +2025-04-02 16:33:41 | [maml_trainer] epoch #284 | EpochTime 1430.07 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -25.8156 +Average/AverageReturn -48.0001 +Average/Iteration 284 +Average/MaxReturn -39.0615 +Average/MinReturn -62.9321 +Average/NumEpisodes 80 +Average/StdReturn 4.922 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9137 +GaussianMLPPolicy/KLAfter 0.0010886 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.62014e-05 +GaussianMLPPolicy/LossBefore 8.40425e-09 +GaussianMLPPolicy/dLoss 3.62098e-05 +Iteration 284 +MetaTest/Average/AverageDiscountedReturn -50.5495 +MetaTest/Average/AverageReturn -50.5495 +MetaTest/Average/Iteration 284 +MetaTest/Average/MaxReturn -41.9135 +MetaTest/Average/MinReturn -67.9562 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.47691 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.5495 +MetaTest/__unnamed_task__/AverageReturn -50.5495 +MetaTest/__unnamed_task__/Iteration 284 +MetaTest/__unnamed_task__/MaxReturn -41.9135 +MetaTest/__unnamed_task__/MinReturn -67.9562 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.47691 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.12e+06 +__unnamed_task__/AverageDiscountedReturn -25.8156 +__unnamed_task__/AverageReturn -48.0001 +__unnamed_task__/Iteration 284 +__unnamed_task__/MaxReturn -39.0615 +__unnamed_task__/MinReturn -62.9321 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 4.922 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 16:51:53 | [maml_trainer] epoch #285 | Sampling for adapation and meta-testing... +2025-04-02 16:56:26 | [maml_trainer] epoch #285 | Finished meta-testing... +2025-04-02 16:56:26 | [maml_trainer] epoch #285 | Saving snapshot... +2025-04-02 16:56:50 | [maml_trainer] epoch #285 | Saved +2025-04-02 16:56:50 | [maml_trainer] epoch #285 | Time 355711.54 s +2025-04-02 16:56:50 | [maml_trainer] epoch #285 | EpochTime 1389.33 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.473 +Average/AverageReturn -50.0189 +Average/Iteration 285 +Average/MaxReturn -37.336 +Average/MinReturn -142.088 +Average/NumEpisodes 80 +Average/StdReturn 12.1044 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91708 +GaussianMLPPolicy/KLAfter 0.0014966 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.03779e-06 +GaussianMLPPolicy/LossBefore -7.7486e-10 +GaussianMLPPolicy/dLoss 2.03702e-06 +Iteration 285 +MetaTest/Average/AverageDiscountedReturn -46.0714 +MetaTest/Average/AverageReturn -46.0714 +MetaTest/Average/Iteration 285 +MetaTest/Average/MaxReturn -39.4447 +MetaTest/Average/MinReturn -50.5485 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 3.3109 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -46.0714 +MetaTest/__unnamed_task__/AverageReturn -46.0714 +MetaTest/__unnamed_task__/Iteration 285 +MetaTest/__unnamed_task__/MaxReturn -39.4447 +MetaTest/__unnamed_task__/MinReturn -50.5485 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 3.3109 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.152e+06 +__unnamed_task__/AverageDiscountedReturn -26.473 +__unnamed_task__/AverageReturn -50.0189 +__unnamed_task__/Iteration 285 +__unnamed_task__/MaxReturn -37.336 +__unnamed_task__/MinReturn -142.088 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1044 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:14:49 | [maml_trainer] epoch #286 | Sampling for adapation and meta-testing... +2025-04-02 17:19:22 | [maml_trainer] epoch #286 | Finished meta-testing... +2025-04-02 17:19:22 | [maml_trainer] epoch #286 | Saving snapshot... +2025-04-02 17:19:49 | [maml_trainer] epoch #286 | Saved +2025-04-02 17:19:49 | [maml_trainer] epoch #286 | Time 357090.38 s +2025-04-02 17:19:49 | [maml_trainer] epoch #286 | EpochTime 1378.84 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -25.6658 +Average/AverageReturn -47.6917 +Average/Iteration 286 +Average/MaxReturn -39.5878 +Average/MinReturn -64.7366 +Average/NumEpisodes 80 +Average/StdReturn 5.18402 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9208 +GaussianMLPPolicy/KLAfter 0.00183224 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.06365e-05 +GaussianMLPPolicy/LossBefore 1.23978e-08 +GaussianMLPPolicy/dLoss -5.06241e-05 +Iteration 286 +MetaTest/Average/AverageDiscountedReturn -46.4445 +MetaTest/Average/AverageReturn -46.4445 +MetaTest/Average/Iteration 286 +MetaTest/Average/MaxReturn -39.2334 +MetaTest/Average/MinReturn -56.7942 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.15157 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -46.4445 +MetaTest/__unnamed_task__/AverageReturn -46.4445 +MetaTest/__unnamed_task__/Iteration 286 +MetaTest/__unnamed_task__/MaxReturn -39.2334 +MetaTest/__unnamed_task__/MinReturn -56.7942 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.15157 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.184e+06 +__unnamed_task__/AverageDiscountedReturn -25.6658 +__unnamed_task__/AverageReturn -47.6917 +__unnamed_task__/Iteration 286 +__unnamed_task__/MaxReturn -39.5878 +__unnamed_task__/MinReturn -64.7366 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 5.18402 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 17:37:58 | [maml_trainer] epoch #287 | Sampling for adapation and meta-testing... +2025-04-02 17:42:31 | [maml_trainer] epoch #287 | Finished meta-testing... +2025-04-02 17:42:31 | [maml_trainer] epoch #287 | Saving snapshot... +2025-04-02 17:42:56 | [maml_trainer] epoch #287 | Saved +2025-04-02 17:42:56 | [maml_trainer] epoch #287 | Time 358477.53 s +2025-04-02 17:42:56 | [maml_trainer] epoch #287 | EpochTime 1387.15 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.0419 +Average/AverageReturn -48.9799 +Average/Iteration 287 +Average/MaxReturn -39.1297 +Average/MinReturn -103.325 +Average/NumEpisodes 80 +Average/StdReturn 7.94207 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92395 +GaussianMLPPolicy/KLAfter 0.00222344 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.32061e-05 +GaussianMLPPolicy/LossBefore -2.26498e-09 +GaussianMLPPolicy/dLoss -6.32083e-05 +Iteration 287 +MetaTest/Average/AverageDiscountedReturn -45.6884 +MetaTest/Average/AverageReturn -45.6884 +MetaTest/Average/Iteration 287 +MetaTest/Average/MaxReturn -39.1987 +MetaTest/Average/MinReturn -55.0774 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.01793 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.6884 +MetaTest/__unnamed_task__/AverageReturn -45.6884 +MetaTest/__unnamed_task__/Iteration 287 +MetaTest/__unnamed_task__/MaxReturn -39.1987 +MetaTest/__unnamed_task__/MinReturn -55.0774 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.01793 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.216e+06 +__unnamed_task__/AverageDiscountedReturn -26.0419 +__unnamed_task__/AverageReturn -48.9799 +__unnamed_task__/Iteration 287 +__unnamed_task__/MaxReturn -39.1297 +__unnamed_task__/MinReturn -103.325 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.94207 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:01:03 | [maml_trainer] epoch #288 | Sampling for adapation and meta-testing... +2025-04-02 18:05:38 | [maml_trainer] epoch #288 | Finished meta-testing... +2025-04-02 18:05:38 | [maml_trainer] epoch #288 | Saving snapshot... +2025-04-02 18:06:02 | [maml_trainer] epoch #288 | Saved +2025-04-02 18:06:02 | [maml_trainer] epoch #288 | Time 359863.62 s +2025-04-02 18:06:02 | [maml_trainer] epoch #288 | EpochTime 1386.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -25.9789 +Average/AverageReturn -48.0169 +Average/Iteration 288 +Average/MaxReturn -39.3356 +Average/MinReturn -88.601 +Average/NumEpisodes 80 +Average/StdReturn 6.99627 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92908 +GaussianMLPPolicy/KLAfter 0.0024695 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.37029e-05 +GaussianMLPPolicy/LossBefore -1.14441e-08 +GaussianMLPPolicy/dLoss -3.37143e-05 +Iteration 288 +MetaTest/Average/AverageDiscountedReturn -52.5435 +MetaTest/Average/AverageReturn -52.5435 +MetaTest/Average/Iteration 288 +MetaTest/Average/MaxReturn -43.1211 +MetaTest/Average/MinReturn -118.104 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.5236 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.5435 +MetaTest/__unnamed_task__/AverageReturn -52.5435 +MetaTest/__unnamed_task__/Iteration 288 +MetaTest/__unnamed_task__/MaxReturn -43.1211 +MetaTest/__unnamed_task__/MinReturn -118.104 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.5236 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.248e+06 +__unnamed_task__/AverageDiscountedReturn -25.9789 +__unnamed_task__/AverageReturn -48.0169 +__unnamed_task__/Iteration 288 +__unnamed_task__/MaxReturn -39.3356 +__unnamed_task__/MinReturn -88.601 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.99627 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:24:16 | [maml_trainer] epoch #289 | Sampling for adapation and meta-testing... +2025-04-02 18:28:45 | [maml_trainer] epoch #289 | Finished meta-testing... +2025-04-02 18:28:45 | [maml_trainer] epoch #289 | Saving snapshot... +2025-04-02 18:29:09 | [maml_trainer] epoch #289 | Saved +2025-04-02 18:29:09 | [maml_trainer] epoch #289 | Time 361250.72 s +2025-04-02 18:29:09 | [maml_trainer] epoch #289 | EpochTime 1387.10 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.4067 +Average/AverageReturn -49.7548 +Average/Iteration 289 +Average/MaxReturn -39.6498 +Average/MinReturn -107.704 +Average/NumEpisodes 80 +Average/StdReturn 8.74835 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9328 +GaussianMLPPolicy/KLAfter 0.0062735 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.69911e-06 +GaussianMLPPolicy/LossBefore -4.76837e-10 +GaussianMLPPolicy/dLoss -2.69958e-06 +Iteration 289 +MetaTest/Average/AverageDiscountedReturn -48.2155 +MetaTest/Average/AverageReturn -48.2155 +MetaTest/Average/Iteration 289 +MetaTest/Average/MaxReturn -41.5586 +MetaTest/Average/MinReturn -57.3633 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.71046 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.2155 +MetaTest/__unnamed_task__/AverageReturn -48.2155 +MetaTest/__unnamed_task__/Iteration 289 +MetaTest/__unnamed_task__/MaxReturn -41.5586 +MetaTest/__unnamed_task__/MinReturn -57.3633 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.71046 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.28e+06 +__unnamed_task__/AverageDiscountedReturn -26.4067 +__unnamed_task__/AverageReturn -49.7548 +__unnamed_task__/Iteration 289 +__unnamed_task__/MaxReturn -39.6498 +__unnamed_task__/MinReturn -107.704 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.74835 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:47:10 | [maml_trainer] epoch #290 | Sampling for adapation and meta-testing... +2025-04-02 18:51:41 | [maml_trainer] epoch #290 | Finished meta-testing... +2025-04-02 18:51:41 | [maml_trainer] epoch #290 | Saving snapshot... +2025-04-02 18:52:04 | [maml_trainer] epoch #290 | Saved +2025-04-02 18:52:04 | [maml_trainer] epoch #290 | Time 362625.90 s +2025-04-02 18:52:04 | [maml_trainer] epoch #290 | EpochTime 1375.18 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.5525 +Average/AverageReturn -51.1386 +Average/Iteration 290 +Average/MaxReturn -38.3447 +Average/MinReturn -173.825 +Average/NumEpisodes 80 +Average/StdReturn 17.439 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93623 +GaussianMLPPolicy/KLAfter 0.00366376 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000131047 +GaussianMLPPolicy/LossBefore -2.98023e-11 +GaussianMLPPolicy/dLoss 0.000131047 +Iteration 290 +MetaTest/Average/AverageDiscountedReturn -47.2773 +MetaTest/Average/AverageReturn -47.2773 +MetaTest/Average/Iteration 290 +MetaTest/Average/MaxReturn -38.4339 +MetaTest/Average/MinReturn -61.9194 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.51573 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.2773 +MetaTest/__unnamed_task__/AverageReturn -47.2773 +MetaTest/__unnamed_task__/Iteration 290 +MetaTest/__unnamed_task__/MaxReturn -38.4339 +MetaTest/__unnamed_task__/MinReturn -61.9194 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.51573 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.312e+06 +__unnamed_task__/AverageDiscountedReturn -26.5525 +__unnamed_task__/AverageReturn -51.1386 +__unnamed_task__/Iteration 290 +__unnamed_task__/MaxReturn -38.3447 +__unnamed_task__/MinReturn -173.825 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.439 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:09:58 | [maml_trainer] epoch #291 | Sampling for adapation and meta-testing... +2025-04-02 19:14:19 | [maml_trainer] epoch #291 | Finished meta-testing... +2025-04-02 19:14:19 | [maml_trainer] epoch #291 | Saving snapshot... +2025-04-02 19:14:42 | [maml_trainer] epoch #291 | Saved +2025-04-02 19:14:42 | [maml_trainer] epoch #291 | Time 363983.62 s +2025-04-02 19:14:42 | [maml_trainer] epoch #291 | EpochTime 1357.72 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -25.4922 +Average/AverageReturn -47.3055 +Average/Iteration 291 +Average/MaxReturn -37.5354 +Average/MinReturn -109.983 +Average/NumEpisodes 80 +Average/StdReturn 8.45124 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93633 +GaussianMLPPolicy/KLAfter 0.00245433 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.39911e-05 +GaussianMLPPolicy/LossBefore -4.52995e-09 +GaussianMLPPolicy/dLoss 5.39866e-05 +Iteration 291 +MetaTest/Average/AverageDiscountedReturn -53.745 +MetaTest/Average/AverageReturn -53.745 +MetaTest/Average/Iteration 291 +MetaTest/Average/MaxReturn -41.1172 +MetaTest/Average/MinReturn -138.176 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.1354 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.745 +MetaTest/__unnamed_task__/AverageReturn -53.745 +MetaTest/__unnamed_task__/Iteration 291 +MetaTest/__unnamed_task__/MaxReturn -41.1172 +MetaTest/__unnamed_task__/MinReturn -138.176 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.1354 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.344e+06 +__unnamed_task__/AverageDiscountedReturn -25.4922 +__unnamed_task__/AverageReturn -47.3055 +__unnamed_task__/Iteration 291 +__unnamed_task__/MaxReturn -37.5354 +__unnamed_task__/MinReturn -109.983 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.45124 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:32:50 | [maml_trainer] epoch #292 | Sampling for adapation and meta-testing... +2025-04-02 19:37:23 | [maml_trainer] epoch #292 | Finished meta-testing... +2025-04-02 19:37:23 | [maml_trainer] epoch #292 | Saving snapshot... +2025-04-02 19:37:47 | [maml_trainer] epoch #292 | Saved +2025-04-02 19:37:47 | [maml_trainer] epoch #292 | Time 365368.97 s +2025-04-02 19:37:47 | [maml_trainer] epoch #292 | EpochTime 1385.35 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.7045 +Average/AverageReturn -51.7397 +Average/Iteration 292 +Average/MaxReturn -34.5054 +Average/MinReturn -146.289 +Average/NumEpisodes 80 +Average/StdReturn 18.8845 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93646 +GaussianMLPPolicy/KLAfter 0.0011406 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.17672e-06 +GaussianMLPPolicy/LossBefore 1.07586e-08 +GaussianMLPPolicy/dLoss -3.16596e-06 +Iteration 292 +MetaTest/Average/AverageDiscountedReturn -47.8679 +MetaTest/Average/AverageReturn -47.8679 +MetaTest/Average/Iteration 292 +MetaTest/Average/MaxReturn -38.18 +MetaTest/Average/MinReturn -62.3045 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.06281 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.8679 +MetaTest/__unnamed_task__/AverageReturn -47.8679 +MetaTest/__unnamed_task__/Iteration 292 +MetaTest/__unnamed_task__/MaxReturn -38.18 +MetaTest/__unnamed_task__/MinReturn -62.3045 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.06281 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.376e+06 +__unnamed_task__/AverageDiscountedReturn -26.7045 +__unnamed_task__/AverageReturn -51.7397 +__unnamed_task__/Iteration 292 +__unnamed_task__/MaxReturn -34.5054 +__unnamed_task__/MinReturn -146.289 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.8845 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:55:51 | [maml_trainer] epoch #293 | Sampling for adapation and meta-testing... +2025-04-02 20:00:22 | [maml_trainer] epoch #293 | Finished meta-testing... +2025-04-02 20:00:22 | [maml_trainer] epoch #293 | Saving snapshot... +2025-04-02 20:00:46 | [maml_trainer] epoch #293 | Saved +2025-04-02 20:00:46 | [maml_trainer] epoch #293 | Time 366747.73 s +2025-04-02 20:00:46 | [maml_trainer] epoch #293 | EpochTime 1378.76 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.0364 +Average/AverageReturn -49.1346 +Average/Iteration 293 +Average/MaxReturn -36.0713 +Average/MinReturn -143.645 +Average/NumEpisodes 80 +Average/StdReturn 13.2531 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93599 +GaussianMLPPolicy/KLAfter 0.00111793 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.88499e-06 +GaussianMLPPolicy/LossBefore 3.06964e-09 +GaussianMLPPolicy/dLoss -4.88192e-06 +Iteration 293 +MetaTest/Average/AverageDiscountedReturn -47.6989 +MetaTest/Average/AverageReturn -47.6989 +MetaTest/Average/Iteration 293 +MetaTest/Average/MaxReturn -37.4278 +MetaTest/Average/MinReturn -63.9098 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.76431 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.6989 +MetaTest/__unnamed_task__/AverageReturn -47.6989 +MetaTest/__unnamed_task__/Iteration 293 +MetaTest/__unnamed_task__/MaxReturn -37.4278 +MetaTest/__unnamed_task__/MinReturn -63.9098 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.76431 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.408e+06 +__unnamed_task__/AverageDiscountedReturn -26.0364 +__unnamed_task__/AverageReturn -49.1346 +__unnamed_task__/Iteration 293 +__unnamed_task__/MaxReturn -36.0713 +__unnamed_task__/MinReturn -143.645 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2531 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:18:48 | [maml_trainer] epoch #294 | Sampling for adapation and meta-testing... +2025-04-02 20:23:20 | [maml_trainer] epoch #294 | Finished meta-testing... +2025-04-02 20:23:20 | [maml_trainer] epoch #294 | Saving snapshot... +2025-04-02 20:23:42 | [maml_trainer] epoch #294 | Saved +2025-04-02 20:23:42 | [maml_trainer] epoch #294 | Time 368123.74 s +2025-04-02 20:23:42 | [maml_trainer] epoch #294 | EpochTime 1376.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -25.6417 +Average/AverageReturn -48.123 +Average/Iteration 294 +Average/MaxReturn -36.6812 +Average/MinReturn -68.639 +Average/NumEpisodes 80 +Average/StdReturn 6.58556 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93552 +GaussianMLPPolicy/KLAfter 0.0015276 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.90462e-05 +GaussianMLPPolicy/LossBefore 3.93391e-09 +GaussianMLPPolicy/dLoss -8.90423e-05 +Iteration 294 +MetaTest/Average/AverageDiscountedReturn -51.1748 +MetaTest/Average/AverageReturn -51.1748 +MetaTest/Average/Iteration 294 +MetaTest/Average/MaxReturn -40.2085 +MetaTest/Average/MinReturn -73.783 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.98806 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -51.1748 +MetaTest/__unnamed_task__/AverageReturn -51.1748 +MetaTest/__unnamed_task__/Iteration 294 +MetaTest/__unnamed_task__/MaxReturn -40.2085 +MetaTest/__unnamed_task__/MinReturn -73.783 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.98806 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.44e+06 +__unnamed_task__/AverageDiscountedReturn -25.6417 +__unnamed_task__/AverageReturn -48.123 +__unnamed_task__/Iteration 294 +__unnamed_task__/MaxReturn -36.6812 +__unnamed_task__/MinReturn -68.639 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.58556 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:41:25 | [maml_trainer] epoch #295 | Sampling for adapation and meta-testing... +2025-04-02 20:45:55 | [maml_trainer] epoch #295 | Finished meta-testing... +2025-04-02 20:45:55 | [maml_trainer] epoch #295 | Saving snapshot... +2025-04-02 20:46:18 | [maml_trainer] epoch #295 | Saved +2025-04-02 20:46:18 | [maml_trainer] epoch #295 | Time 369479.81 s +2025-04-02 20:46:18 | [maml_trainer] epoch #295 | EpochTime 1356.07 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -25.743 +Average/AverageReturn -48.7895 +Average/Iteration 295 +Average/MaxReturn -38.3491 +Average/MinReturn -124.28 +Average/NumEpisodes 80 +Average/StdReturn 10.6129 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93439 +GaussianMLPPolicy/KLAfter 0.00235352 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.65149e-05 +GaussianMLPPolicy/LossBefore -5.24521e-09 +GaussianMLPPolicy/dLoss -1.65201e-05 +Iteration 295 +MetaTest/Average/AverageDiscountedReturn -45.2038 +MetaTest/Average/AverageReturn -45.2038 +MetaTest/Average/Iteration 295 +MetaTest/Average/MaxReturn -37.089 +MetaTest/Average/MinReturn -54.2342 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.49292 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.2038 +MetaTest/__unnamed_task__/AverageReturn -45.2038 +MetaTest/__unnamed_task__/Iteration 295 +MetaTest/__unnamed_task__/MaxReturn -37.089 +MetaTest/__unnamed_task__/MinReturn -54.2342 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.49292 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.472e+06 +__unnamed_task__/AverageDiscountedReturn -25.743 +__unnamed_task__/AverageReturn -48.7895 +__unnamed_task__/Iteration 295 +__unnamed_task__/MaxReturn -38.3491 +__unnamed_task__/MinReturn -124.28 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.6129 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:04:23 | [maml_trainer] epoch #296 | Sampling for adapation and meta-testing... +2025-04-02 21:08:55 | [maml_trainer] epoch #296 | Finished meta-testing... +2025-04-02 21:08:55 | [maml_trainer] epoch #296 | Saving snapshot... +2025-04-02 21:09:20 | [maml_trainer] epoch #296 | Saved +2025-04-02 21:09:20 | [maml_trainer] epoch #296 | Time 370861.45 s +2025-04-02 21:09:20 | [maml_trainer] epoch #296 | EpochTime 1381.63 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -26.0854 +Average/AverageReturn -49.3701 +Average/Iteration 296 +Average/MaxReturn -38.7899 +Average/MinReturn -134.903 +Average/NumEpisodes 80 +Average/StdReturn 11.8654 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93593 +GaussianMLPPolicy/KLAfter 0.00308092 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.97328e-05 +GaussianMLPPolicy/LossBefore -9.53674e-10 +GaussianMLPPolicy/dLoss -1.97338e-05 +Iteration 296 +MetaTest/Average/AverageDiscountedReturn -44.7956 +MetaTest/Average/AverageReturn -44.7956 +MetaTest/Average/Iteration 296 +MetaTest/Average/MaxReturn -39.1654 +MetaTest/Average/MinReturn -63.4034 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.61701 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -44.7956 +MetaTest/__unnamed_task__/AverageReturn -44.7956 +MetaTest/__unnamed_task__/Iteration 296 +MetaTest/__unnamed_task__/MaxReturn -39.1654 +MetaTest/__unnamed_task__/MinReturn -63.4034 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.61701 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.504e+06 +__unnamed_task__/AverageDiscountedReturn -26.0854 +__unnamed_task__/AverageReturn -49.3701 +__unnamed_task__/Iteration 296 +__unnamed_task__/MaxReturn -38.7899 +__unnamed_task__/MinReturn -134.903 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.8654 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:27:29 | [maml_trainer] epoch #297 | Sampling for adapation and meta-testing... +2025-04-02 21:32:00 | [maml_trainer] epoch #297 | Finished meta-testing... +2025-04-02 21:32:00 | [maml_trainer] epoch #297 | Saving snapshot... +2025-04-02 21:32:23 | [maml_trainer] epoch #297 | Saved +2025-04-02 21:32:23 | [maml_trainer] epoch #297 | Time 372244.92 s +2025-04-02 21:32:23 | [maml_trainer] epoch #297 | EpochTime 1383.46 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -25.7364 +Average/AverageReturn -48.8253 +Average/Iteration 297 +Average/MaxReturn -37.6423 +Average/MinReturn -132.916 +Average/NumEpisodes 80 +Average/StdReturn 14.9537 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93707 +GaussianMLPPolicy/KLAfter 0.0031288 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.19094e-05 +GaussianMLPPolicy/LossBefore 5.66244e-10 +GaussianMLPPolicy/dLoss 2.191e-05 +Iteration 297 +MetaTest/Average/AverageDiscountedReturn -47.9253 +MetaTest/Average/AverageReturn -47.9253 +MetaTest/Average/Iteration 297 +MetaTest/Average/MaxReturn -38.5407 +MetaTest/Average/MinReturn -72.7897 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.75781 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.9253 +MetaTest/__unnamed_task__/AverageReturn -47.9253 +MetaTest/__unnamed_task__/Iteration 297 +MetaTest/__unnamed_task__/MaxReturn -38.5407 +MetaTest/__unnamed_task__/MinReturn -72.7897 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.75781 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.536e+06 +__unnamed_task__/AverageDiscountedReturn -25.7364 +__unnamed_task__/AverageReturn -48.8253 +__unnamed_task__/Iteration 297 +__unnamed_task__/MaxReturn -37.6423 +__unnamed_task__/MinReturn -132.916 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.9537 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:49:36 | [maml_trainer] epoch #298 | Sampling for adapation and meta-testing... +2025-04-02 21:53:54 | [maml_trainer] epoch #298 | Finished meta-testing... +2025-04-02 21:53:54 | [maml_trainer] epoch #298 | Saving snapshot... +2025-04-02 21:54:17 | [maml_trainer] epoch #298 | Saved +2025-04-02 21:54:17 | [maml_trainer] epoch #298 | Time 373558.66 s +2025-04-02 21:54:17 | [maml_trainer] epoch #298 | EpochTime 1313.74 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -25.1623 +Average/AverageReturn -46.9454 +Average/Iteration 298 +Average/MaxReturn -37.3561 +Average/MinReturn -78.9948 +Average/NumEpisodes 80 +Average/StdReturn 7.00752 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93768 +GaussianMLPPolicy/KLAfter 0.0032865 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.79315e-05 +GaussianMLPPolicy/LossBefore -1.37091e-09 +GaussianMLPPolicy/dLoss -3.79329e-05 +Iteration 298 +MetaTest/Average/AverageDiscountedReturn -45.5467 +MetaTest/Average/AverageReturn -45.5467 +MetaTest/Average/Iteration 298 +MetaTest/Average/MaxReturn -36.9254 +MetaTest/Average/MinReturn -61.482 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.34609 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.5467 +MetaTest/__unnamed_task__/AverageReturn -45.5467 +MetaTest/__unnamed_task__/Iteration 298 +MetaTest/__unnamed_task__/MaxReturn -36.9254 +MetaTest/__unnamed_task__/MinReturn -61.482 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.34609 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.568e+06 +__unnamed_task__/AverageDiscountedReturn -25.1623 +__unnamed_task__/AverageReturn -46.9454 +__unnamed_task__/Iteration 298 +__unnamed_task__/MaxReturn -37.3561 +__unnamed_task__/MinReturn -78.9948 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.00752 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:12:07 | [maml_trainer] epoch #299 | Sampling for adapation and meta-testing... +2025-04-02 22:16:28 | [maml_trainer] epoch #299 | Finished meta-testing... +2025-04-02 22:16:28 | [maml_trainer] epoch #299 | Saving snapshot... +2025-04-02 22:16:50 | [maml_trainer] epoch #299 | Saved +2025-04-02 22:16:50 | [maml_trainer] epoch #299 | Time 374911.22 s +2025-04-02 22:16:50 | [maml_trainer] epoch #299 | EpochTime 1352.56 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -25.3927 +Average/AverageReturn -47.4834 +Average/Iteration 299 +Average/MaxReturn -36.4694 +Average/MinReturn -74.2167 +Average/NumEpisodes 80 +Average/StdReturn 6.85447 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93769 +GaussianMLPPolicy/KLAfter 0.00222253 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.20724e-05 +GaussianMLPPolicy/LossBefore -2.46763e-08 +GaussianMLPPolicy/dLoss 2.20478e-05 +Iteration 299 +MetaTest/Average/AverageDiscountedReturn -47.1999 +MetaTest/Average/AverageReturn -47.1999 +MetaTest/Average/Iteration 299 +MetaTest/Average/MaxReturn -38.2831 +MetaTest/Average/MinReturn -71.922 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.787 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.1999 +MetaTest/__unnamed_task__/AverageReturn -47.1999 +MetaTest/__unnamed_task__/Iteration 299 +MetaTest/__unnamed_task__/MaxReturn -38.2831 +MetaTest/__unnamed_task__/MinReturn -71.922 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.787 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.6e+06 +__unnamed_task__/AverageDiscountedReturn -25.3927 +__unnamed_task__/AverageReturn -47.4834 +__unnamed_task__/Iteration 299 +__unnamed_task__/MaxReturn -36.4694 +__unnamed_task__/MinReturn -74.2167 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.85447 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------