diff --git "a/experiment/maml_trainer_2/debug.log" "b/experiment/maml_trainer_2/debug.log" new file mode 100644--- /dev/null +++ "b/experiment/maml_trainer_2/debug.log" @@ -0,0 +1,14402 @@ +2025-04-02 13:45:18 | [maml_trainer] Logging to /home/h2khalil/MetaRL-Assistive-Robotics/data/local/experiment/maml_trainer_2 +2025-04-02 13:52:58 | [maml_trainer] Obtaining samples... +2025-04-02 13:58:24 | [maml_trainer] epoch #0 | Sampling for adapation and meta-testing... +2025-04-02 13:59:44 | [maml_trainer] epoch #0 | Finished meta-testing... +2025-04-02 13:59:44 | [maml_trainer] epoch #0 | Saving snapshot... +2025-04-02 14:00:02 | [maml_trainer] epoch #0 | Saved +2025-04-02 14:00:02 | [maml_trainer] epoch #0 | Time 424.38 s +2025-04-02 14:00:02 | [maml_trainer] epoch #0 | EpochTime 424.38 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -40.2857 +Average/AverageReturn -106.321 +Average/Iteration 0 +Average/MaxReturn -72.5083 +Average/MinReturn -150.002 +Average/NumEpisodes 80 +Average/StdReturn 19.6817 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92576 +GaussianMLPPolicy/KLAfter 0.0352945 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000188627 +GaussianMLPPolicy/LossBefore -6.25849e-10 +GaussianMLPPolicy/dLoss -0.000188627 +Iteration 0 +MetaTest/Average/AverageDiscountedReturn -103.38 +MetaTest/Average/AverageReturn -103.38 +MetaTest/Average/Iteration 0 +MetaTest/Average/MaxReturn -78.3711 +MetaTest/Average/MinReturn -130.707 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.5069 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -103.38 +MetaTest/__unnamed_task__/AverageReturn -103.38 +MetaTest/__unnamed_task__/Iteration 0 +MetaTest/__unnamed_task__/MaxReturn -78.3711 +MetaTest/__unnamed_task__/MinReturn -130.707 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.5069 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 32000 +__unnamed_task__/AverageDiscountedReturn -40.2857 +__unnamed_task__/AverageReturn -106.321 +__unnamed_task__/Iteration 0 +__unnamed_task__/MaxReturn -72.5083 +__unnamed_task__/MinReturn -150.002 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.6817 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-04-02 14:05:36 | [maml_trainer] epoch #1 | Sampling for adapation and meta-testing... +2025-04-02 14:06:59 | [maml_trainer] epoch #1 | Finished meta-testing... +2025-04-02 14:06:59 | [maml_trainer] epoch #1 | Saving snapshot... +2025-04-02 14:07:20 | [maml_trainer] epoch #1 | Saved +2025-04-02 14:07:20 | [maml_trainer] epoch #1 | Time 862.38 s +2025-04-02 14:07:20 | [maml_trainer] epoch #1 | EpochTime 438.00 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -39.061 +Average/AverageReturn -101.474 +Average/Iteration 1 +Average/MaxReturn -63.4282 +Average/MinReturn -152.53 +Average/NumEpisodes 80 +Average/StdReturn 18.2648 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92983 +GaussianMLPPolicy/KLAfter 0.0201304 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000104268 +GaussianMLPPolicy/LossBefore -2.26498e-09 +GaussianMLPPolicy/dLoss -0.00010427 +Iteration 1 +MetaTest/Average/AverageDiscountedReturn -98.9382 +MetaTest/Average/AverageReturn -98.9382 +MetaTest/Average/Iteration 1 +MetaTest/Average/MaxReturn -75.0828 +MetaTest/Average/MinReturn -128.008 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.5367 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -98.9382 +MetaTest/__unnamed_task__/AverageReturn -98.9382 +MetaTest/__unnamed_task__/Iteration 1 +MetaTest/__unnamed_task__/MaxReturn -75.0828 +MetaTest/__unnamed_task__/MinReturn -128.008 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.5367 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 64000 +__unnamed_task__/AverageDiscountedReturn -39.061 +__unnamed_task__/AverageReturn -101.474 +__unnamed_task__/Iteration 1 +__unnamed_task__/MaxReturn -63.4282 +__unnamed_task__/MinReturn -152.53 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.2648 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-04-02 14:12:53 | [maml_trainer] epoch #2 | Sampling for adapation and meta-testing... +2025-04-02 14:14:17 | [maml_trainer] epoch #2 | Finished meta-testing... +2025-04-02 14:14:17 | [maml_trainer] epoch #2 | Saving snapshot... +2025-04-02 14:14:37 | [maml_trainer] epoch #2 | Saved +2025-04-02 14:14:37 | [maml_trainer] epoch #2 | Time 1299.60 s +2025-04-02 14:14:37 | [maml_trainer] epoch #2 | EpochTime 437.21 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -37.789 +Average/AverageReturn -98.2822 +Average/Iteration 2 +Average/MaxReturn -62.8766 +Average/MinReturn -134.738 +Average/NumEpisodes 80 +Average/StdReturn 16.6256 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93524 +GaussianMLPPolicy/KLAfter 0.0135207 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.96046e-05 +GaussianMLPPolicy/LossBefore -1.02818e-08 +GaussianMLPPolicy/dLoss -4.96148e-05 +Iteration 2 +MetaTest/Average/AverageDiscountedReturn -94.6955 +MetaTest/Average/AverageReturn -94.6955 +MetaTest/Average/Iteration 2 +MetaTest/Average/MaxReturn -72.3712 +MetaTest/Average/MinReturn -133.005 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.9952 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -94.6955 +MetaTest/__unnamed_task__/AverageReturn -94.6955 +MetaTest/__unnamed_task__/Iteration 2 +MetaTest/__unnamed_task__/MaxReturn -72.3712 +MetaTest/__unnamed_task__/MinReturn -133.005 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.9952 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 96000 +__unnamed_task__/AverageDiscountedReturn -37.789 +__unnamed_task__/AverageReturn -98.2822 +__unnamed_task__/Iteration 2 +__unnamed_task__/MaxReturn -62.8766 +__unnamed_task__/MinReturn -134.738 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.6256 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-04-02 14:20:09 | [maml_trainer] epoch #3 | Sampling for adapation and meta-testing... +2025-04-02 14:21:32 | [maml_trainer] epoch #3 | Finished meta-testing... +2025-04-02 14:21:32 | [maml_trainer] epoch #3 | Saving snapshot... +2025-04-02 14:21:52 | [maml_trainer] epoch #3 | Saved +2025-04-02 14:21:52 | [maml_trainer] epoch #3 | Time 1734.14 s +2025-04-02 14:21:52 | [maml_trainer] epoch #3 | EpochTime 434.54 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -36.5387 +Average/AverageReturn -94.6952 +Average/Iteration 3 +Average/MaxReturn -53.1432 +Average/MinReturn -133.797 +Average/NumEpisodes 80 +Average/StdReturn 17.8521 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93997 +GaussianMLPPolicy/KLAfter 0.0232095 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.63165e-07 +GaussianMLPPolicy/LossBefore -8.25524e-09 +GaussianMLPPolicy/dLoss 8.5491e-07 +Iteration 3 +MetaTest/Average/AverageDiscountedReturn -90.0885 +MetaTest/Average/AverageReturn -90.0885 +MetaTest/Average/Iteration 3 +MetaTest/Average/MaxReturn -70.1412 +MetaTest/Average/MinReturn -122.009 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.673 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -90.0885 +MetaTest/__unnamed_task__/AverageReturn -90.0885 +MetaTest/__unnamed_task__/Iteration 3 +MetaTest/__unnamed_task__/MaxReturn -70.1412 +MetaTest/__unnamed_task__/MinReturn -122.009 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.673 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 128000 +__unnamed_task__/AverageDiscountedReturn -36.5387 +__unnamed_task__/AverageReturn -94.6952 +__unnamed_task__/Iteration 3 +__unnamed_task__/MaxReturn -53.1432 +__unnamed_task__/MinReturn -133.797 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.8521 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:27:26 | [maml_trainer] epoch #4 | Sampling for adapation and meta-testing... +2025-04-02 14:28:50 | [maml_trainer] epoch #4 | Finished meta-testing... +2025-04-02 14:28:50 | [maml_trainer] epoch #4 | Saving snapshot... +2025-04-02 14:29:10 | [maml_trainer] epoch #4 | Saved +2025-04-02 14:29:10 | [maml_trainer] epoch #4 | Time 2172.34 s +2025-04-02 14:29:10 | [maml_trainer] epoch #4 | EpochTime 438.20 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -35.9369 +Average/AverageReturn -92.5914 +Average/Iteration 4 +Average/MaxReturn -46.1069 +Average/MinReturn -145.418 +Average/NumEpisodes 80 +Average/StdReturn 19.6243 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94542 +GaussianMLPPolicy/KLAfter 0.0319889 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000123046 +GaussianMLPPolicy/LossBefore -4.36604e-09 +GaussianMLPPolicy/dLoss -0.00012305 +Iteration 4 +MetaTest/Average/AverageDiscountedReturn -91.5655 +MetaTest/Average/AverageReturn -91.5655 +MetaTest/Average/Iteration 4 +MetaTest/Average/MaxReturn -65.3909 +MetaTest/Average/MinReturn -124.684 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.0634 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -91.5655 +MetaTest/__unnamed_task__/AverageReturn -91.5655 +MetaTest/__unnamed_task__/Iteration 4 +MetaTest/__unnamed_task__/MaxReturn -65.3909 +MetaTest/__unnamed_task__/MinReturn -124.684 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.0634 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 160000 +__unnamed_task__/AverageDiscountedReturn -35.9369 +__unnamed_task__/AverageReturn -92.5914 +__unnamed_task__/Iteration 4 +__unnamed_task__/MaxReturn -46.1069 +__unnamed_task__/MinReturn -145.418 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.6243 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:34:46 | [maml_trainer] epoch #5 | Sampling for adapation and meta-testing... +2025-04-02 14:36:08 | [maml_trainer] epoch #5 | Finished meta-testing... +2025-04-02 14:36:08 | [maml_trainer] epoch #5 | Saving snapshot... +2025-04-02 14:36:29 | [maml_trainer] epoch #5 | Saved +2025-04-02 14:36:29 | [maml_trainer] epoch #5 | Time 2611.14 s +2025-04-02 14:36:29 | [maml_trainer] epoch #5 | EpochTime 438.79 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -33.8114 +Average/AverageReturn -87.8835 +Average/Iteration 5 +Average/MaxReturn -38.3391 +Average/MinReturn -132.135 +Average/NumEpisodes 80 +Average/StdReturn 16.0984 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95093 +GaussianMLPPolicy/KLAfter 0.0242466 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.85896e-05 +GaussianMLPPolicy/LossBefore 3.45707e-09 +GaussianMLPPolicy/dLoss -6.85861e-05 +Iteration 5 +MetaTest/Average/AverageDiscountedReturn -82.0524 +MetaTest/Average/AverageReturn -82.0524 +MetaTest/Average/Iteration 5 +MetaTest/Average/MaxReturn -52.4803 +MetaTest/Average/MinReturn -125.233 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.2624 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -82.0524 +MetaTest/__unnamed_task__/AverageReturn -82.0524 +MetaTest/__unnamed_task__/Iteration 5 +MetaTest/__unnamed_task__/MaxReturn -52.4803 +MetaTest/__unnamed_task__/MinReturn -125.233 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.2624 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 192000 +__unnamed_task__/AverageDiscountedReturn -33.8114 +__unnamed_task__/AverageReturn -87.8835 +__unnamed_task__/Iteration 5 +__unnamed_task__/MaxReturn -38.3391 +__unnamed_task__/MinReturn -132.135 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.0984 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:42:02 | [maml_trainer] epoch #6 | Sampling for adapation and meta-testing... +2025-04-02 14:43:27 | [maml_trainer] epoch #6 | Finished meta-testing... +2025-04-02 14:43:27 | [maml_trainer] epoch #6 | Saving snapshot... +2025-04-02 14:43:47 | [maml_trainer] epoch #6 | Saved +2025-04-02 14:43:47 | [maml_trainer] epoch #6 | Time 3049.42 s +2025-04-02 14:43:47 | [maml_trainer] epoch #6 | EpochTime 438.28 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -32.1834 +Average/AverageReturn -81.7478 +Average/Iteration 6 +Average/MaxReturn -28.0771 +Average/MinReturn -126.066 +Average/NumEpisodes 80 +Average/StdReturn 17.5887 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95524 +GaussianMLPPolicy/KLAfter 0.0232031 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000240112 +GaussianMLPPolicy/LossBefore 6.91414e-09 +GaussianMLPPolicy/dLoss -0.000240105 +Iteration 6 +MetaTest/Average/AverageDiscountedReturn -71.208 +MetaTest/Average/AverageReturn -71.208 +MetaTest/Average/Iteration 6 +MetaTest/Average/MaxReturn -35.6478 +MetaTest/Average/MinReturn -112.513 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.899 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -71.208 +MetaTest/__unnamed_task__/AverageReturn -71.208 +MetaTest/__unnamed_task__/Iteration 6 +MetaTest/__unnamed_task__/MaxReturn -35.6478 +MetaTest/__unnamed_task__/MinReturn -112.513 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.899 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 224000 +__unnamed_task__/AverageDiscountedReturn -32.1834 +__unnamed_task__/AverageReturn -81.7478 +__unnamed_task__/Iteration 6 +__unnamed_task__/MaxReturn -28.0771 +__unnamed_task__/MinReturn -126.066 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.5887 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:49:23 | [maml_trainer] epoch #7 | Sampling for adapation and meta-testing... +2025-04-02 14:50:46 | [maml_trainer] epoch #7 | Finished meta-testing... +2025-04-02 14:50:46 | [maml_trainer] epoch #7 | Saving snapshot... +2025-04-02 14:51:05 | [maml_trainer] epoch #7 | Saved +2025-04-02 14:51:05 | [maml_trainer] epoch #7 | Time 3487.65 s +2025-04-02 14:51:05 | [maml_trainer] epoch #7 | EpochTime 438.22 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -30.6886 +Average/AverageReturn -76.3881 +Average/Iteration 7 +Average/MaxReturn -29.8284 +Average/MinReturn -115.643 +Average/NumEpisodes 80 +Average/StdReturn 17.1587 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96034 +GaussianMLPPolicy/KLAfter 0.0157956 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.92125e-05 +GaussianMLPPolicy/LossBefore -3.96371e-09 +GaussianMLPPolicy/dLoss -1.92164e-05 +Iteration 7 +MetaTest/Average/AverageDiscountedReturn -74.3218 +MetaTest/Average/AverageReturn -74.3218 +MetaTest/Average/Iteration 7 +MetaTest/Average/MaxReturn -55.395 +MetaTest/Average/MinReturn -102.926 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.8749 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -74.3218 +MetaTest/__unnamed_task__/AverageReturn -74.3218 +MetaTest/__unnamed_task__/Iteration 7 +MetaTest/__unnamed_task__/MaxReturn -55.395 +MetaTest/__unnamed_task__/MinReturn -102.926 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.8749 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 256000 +__unnamed_task__/AverageDiscountedReturn -30.6886 +__unnamed_task__/AverageReturn -76.3881 +__unnamed_task__/Iteration 7 +__unnamed_task__/MaxReturn -29.8284 +__unnamed_task__/MinReturn -115.643 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.1587 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:56:38 | [maml_trainer] epoch #8 | Sampling for adapation and meta-testing... +2025-04-02 14:58:01 | [maml_trainer] epoch #8 | Finished meta-testing... +2025-04-02 14:58:01 | [maml_trainer] epoch #8 | Saving snapshot... +2025-04-02 14:58:21 | [maml_trainer] epoch #8 | Saved +2025-04-02 14:58:21 | [maml_trainer] epoch #8 | Time 3923.65 s +2025-04-02 14:58:21 | [maml_trainer] epoch #8 | EpochTime 435.99 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -30.0909 +Average/AverageReturn -73.6748 +Average/Iteration 8 +Average/MaxReturn -33.2851 +Average/MinReturn -116.034 +Average/NumEpisodes 80 +Average/StdReturn 17.394 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9613 +GaussianMLPPolicy/KLAfter 0.0169462 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.33053e-05 +GaussianMLPPolicy/LossBefore 1.87755e-09 +GaussianMLPPolicy/dLoss 7.33072e-05 +Iteration 8 +MetaTest/Average/AverageDiscountedReturn -72.4223 +MetaTest/Average/AverageReturn -72.4223 +MetaTest/Average/Iteration 8 +MetaTest/Average/MaxReturn -35.1502 +MetaTest/Average/MinReturn -108.248 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.2816 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -72.4223 +MetaTest/__unnamed_task__/AverageReturn -72.4223 +MetaTest/__unnamed_task__/Iteration 8 +MetaTest/__unnamed_task__/MaxReturn -35.1502 +MetaTest/__unnamed_task__/MinReturn -108.248 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.2816 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 288000 +__unnamed_task__/AverageDiscountedReturn -30.0909 +__unnamed_task__/AverageReturn -73.6748 +__unnamed_task__/Iteration 8 +__unnamed_task__/MaxReturn -33.2851 +__unnamed_task__/MinReturn -116.034 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.394 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:04:00 | [maml_trainer] epoch #9 | Sampling for adapation and meta-testing... +2025-04-02 15:05:27 | [maml_trainer] epoch #9 | Finished meta-testing... +2025-04-02 15:05:27 | [maml_trainer] epoch #9 | Saving snapshot... +2025-04-02 15:05:48 | [maml_trainer] epoch #9 | Saved +2025-04-02 15:05:48 | [maml_trainer] epoch #9 | Time 4370.41 s +2025-04-02 15:05:48 | [maml_trainer] epoch #9 | EpochTime 446.76 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -27.9603 +Average/AverageReturn -64.4378 +Average/Iteration 9 +Average/MaxReturn -21.2583 +Average/MinReturn -110.174 +Average/NumEpisodes 80 +Average/StdReturn 17.7726 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96363 +GaussianMLPPolicy/KLAfter 0.0104727 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.19302e-05 +GaussianMLPPolicy/LossBefore -1.2219e-09 +GaussianMLPPolicy/dLoss -9.19314e-05 +Iteration 9 +MetaTest/Average/AverageDiscountedReturn -63.6072 +MetaTest/Average/AverageReturn -63.6072 +MetaTest/Average/Iteration 9 +MetaTest/Average/MaxReturn -45.0368 +MetaTest/Average/MinReturn -88.9944 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.8703 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -63.6072 +MetaTest/__unnamed_task__/AverageReturn -63.6072 +MetaTest/__unnamed_task__/Iteration 9 +MetaTest/__unnamed_task__/MaxReturn -45.0368 +MetaTest/__unnamed_task__/MinReturn -88.9944 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.8703 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 320000 +__unnamed_task__/AverageDiscountedReturn -27.9603 +__unnamed_task__/AverageReturn -64.4378 +__unnamed_task__/Iteration 9 +__unnamed_task__/MaxReturn -21.2583 +__unnamed_task__/MinReturn -110.174 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.7726 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:11:37 | [maml_trainer] epoch #10 | Sampling for adapation and meta-testing... +2025-04-02 15:13:05 | [maml_trainer] epoch #10 | Finished meta-testing... +2025-04-02 15:13:05 | [maml_trainer] epoch #10 | Saving snapshot... +2025-04-02 15:13:26 | [maml_trainer] epoch #10 | Saved +2025-04-02 15:13:26 | [maml_trainer] epoch #10 | Time 4828.16 s +2025-04-02 15:13:26 | [maml_trainer] epoch #10 | EpochTime 457.74 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -26.0895 +Average/AverageReturn -60.6789 +Average/Iteration 10 +Average/MaxReturn -33.7186 +Average/MinReturn -93.3737 +Average/NumEpisodes 80 +Average/StdReturn 13.3663 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96589 +GaussianMLPPolicy/KLAfter 0.0112622 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.42797e-05 +GaussianMLPPolicy/LossBefore -1.10269e-08 +GaussianMLPPolicy/dLoss 9.42686e-05 +Iteration 10 +MetaTest/Average/AverageDiscountedReturn -54.2535 +MetaTest/Average/AverageReturn -54.2535 +MetaTest/Average/Iteration 10 +MetaTest/Average/MaxReturn -29.9755 +MetaTest/Average/MinReturn -86.9711 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.0625 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -54.2535 +MetaTest/__unnamed_task__/AverageReturn -54.2535 +MetaTest/__unnamed_task__/Iteration 10 +MetaTest/__unnamed_task__/MaxReturn -29.9755 +MetaTest/__unnamed_task__/MinReturn -86.9711 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.0625 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 352000 +__unnamed_task__/AverageDiscountedReturn -26.0895 +__unnamed_task__/AverageReturn -60.6789 +__unnamed_task__/Iteration 10 +__unnamed_task__/MaxReturn -33.7186 +__unnamed_task__/MinReturn -93.3737 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.3663 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:19:13 | [maml_trainer] epoch #11 | Sampling for adapation and meta-testing... +2025-04-02 15:20:41 | [maml_trainer] epoch #11 | Finished meta-testing... +2025-04-02 15:20:41 | [maml_trainer] epoch #11 | Saving snapshot... +2025-04-02 15:21:02 | [maml_trainer] epoch #11 | Saved +2025-04-02 15:21:02 | [maml_trainer] epoch #11 | Time 5284.53 s +2025-04-02 15:21:02 | [maml_trainer] epoch #11 | EpochTime 456.37 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -24.5232 +Average/AverageReturn -56.1248 +Average/Iteration 11 +Average/MaxReturn -26.7654 +Average/MinReturn -86.5341 +Average/NumEpisodes 80 +Average/StdReturn 13.2961 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96678 +GaussianMLPPolicy/KLAfter 0.0120825 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000174274 +GaussianMLPPolicy/LossBefore -9.38773e-09 +GaussianMLPPolicy/dLoss 0.000174264 +Iteration 11 +MetaTest/Average/AverageDiscountedReturn -53.4935 +MetaTest/Average/AverageReturn -53.4935 +MetaTest/Average/Iteration 11 +MetaTest/Average/MaxReturn -32.2421 +MetaTest/Average/MinReturn -80.4587 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.532 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.4935 +MetaTest/__unnamed_task__/AverageReturn -53.4935 +MetaTest/__unnamed_task__/Iteration 11 +MetaTest/__unnamed_task__/MaxReturn -32.2421 +MetaTest/__unnamed_task__/MinReturn -80.4587 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.532 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 384000 +__unnamed_task__/AverageDiscountedReturn -24.5232 +__unnamed_task__/AverageReturn -56.1248 +__unnamed_task__/Iteration 11 +__unnamed_task__/MaxReturn -26.7654 +__unnamed_task__/MinReturn -86.5341 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2961 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:26:46 | [maml_trainer] epoch #12 | Sampling for adapation and meta-testing... +2025-04-02 15:28:12 | [maml_trainer] epoch #12 | Finished meta-testing... +2025-04-02 15:28:12 | [maml_trainer] epoch #12 | Saving snapshot... +2025-04-02 15:28:32 | [maml_trainer] epoch #12 | Saved +2025-04-02 15:28:32 | [maml_trainer] epoch #12 | Time 5734.63 s +2025-04-02 15:28:32 | [maml_trainer] epoch #12 | EpochTime 450.10 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -24.048 +Average/AverageReturn -54.9135 +Average/Iteration 12 +Average/MaxReturn -1.16963 +Average/MinReturn -88.8953 +Average/NumEpisodes 80 +Average/StdReturn 14.4205 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96975 +GaussianMLPPolicy/KLAfter 0.0108115 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000223943 +GaussianMLPPolicy/LossBefore -5.00679e-09 +GaussianMLPPolicy/dLoss 0.000223938 +Iteration 12 +MetaTest/Average/AverageDiscountedReturn -47.8777 +MetaTest/Average/AverageReturn -47.8777 +MetaTest/Average/Iteration 12 +MetaTest/Average/MaxReturn -19.7344 +MetaTest/Average/MinReturn -71.575 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.3723 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.8777 +MetaTest/__unnamed_task__/AverageReturn -47.8777 +MetaTest/__unnamed_task__/Iteration 12 +MetaTest/__unnamed_task__/MaxReturn -19.7344 +MetaTest/__unnamed_task__/MinReturn -71.575 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.3723 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 416000 +__unnamed_task__/AverageDiscountedReturn -24.048 +__unnamed_task__/AverageReturn -54.9135 +__unnamed_task__/Iteration 12 +__unnamed_task__/MaxReturn -1.16963 +__unnamed_task__/MinReturn -88.8953 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.4205 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:34:24 | [maml_trainer] epoch #13 | Sampling for adapation and meta-testing... +2025-04-02 15:35:52 | [maml_trainer] epoch #13 | Finished meta-testing... +2025-04-02 15:35:52 | [maml_trainer] epoch #13 | Saving snapshot... +2025-04-02 15:36:14 | [maml_trainer] epoch #13 | Saved +2025-04-02 15:36:14 | [maml_trainer] epoch #13 | Time 6196.34 s +2025-04-02 15:36:14 | [maml_trainer] epoch #13 | EpochTime 461.71 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -22.242 +Average/AverageReturn -49.7107 +Average/Iteration 13 +Average/MaxReturn -23.5815 +Average/MinReturn -89.7381 +Average/NumEpisodes 80 +Average/StdReturn 12.4535 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97205 +GaussianMLPPolicy/KLAfter 0.0104842 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.08625e-05 +GaussianMLPPolicy/LossBefore 1.74344e-08 +GaussianMLPPolicy/dLoss 4.08799e-05 +Iteration 13 +MetaTest/Average/AverageDiscountedReturn -48.846 +MetaTest/Average/AverageReturn -48.846 +MetaTest/Average/Iteration 13 +MetaTest/Average/MaxReturn -33.0187 +MetaTest/Average/MinReturn -76.9736 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3726 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.846 +MetaTest/__unnamed_task__/AverageReturn -48.846 +MetaTest/__unnamed_task__/Iteration 13 +MetaTest/__unnamed_task__/MaxReturn -33.0187 +MetaTest/__unnamed_task__/MinReturn -76.9736 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3726 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 448000 +__unnamed_task__/AverageDiscountedReturn -22.242 +__unnamed_task__/AverageReturn -49.7107 +__unnamed_task__/Iteration 13 +__unnamed_task__/MaxReturn -23.5815 +__unnamed_task__/MinReturn -89.7381 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.4535 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:42:09 | [maml_trainer] epoch #14 | Sampling for adapation and meta-testing... +2025-04-02 15:43:33 | [maml_trainer] epoch #14 | Finished meta-testing... +2025-04-02 15:43:33 | [maml_trainer] epoch #14 | Saving snapshot... +2025-04-02 15:43:53 | [maml_trainer] epoch #14 | Saved +2025-04-02 15:43:53 | [maml_trainer] epoch #14 | Time 6655.02 s +2025-04-02 15:43:53 | [maml_trainer] epoch #14 | EpochTime 458.68 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -20.5863 +Average/AverageReturn -46.5944 +Average/Iteration 14 +Average/MaxReturn -8.32939 +Average/MinReturn -85.2561 +Average/NumEpisodes 80 +Average/StdReturn 12.287 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97527 +GaussianMLPPolicy/KLAfter 0.00564095 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.37284e-05 +GaussianMLPPolicy/LossBefore 1.12057e-08 +GaussianMLPPolicy/dLoss 6.37396e-05 +Iteration 14 +MetaTest/Average/AverageDiscountedReturn -47.7746 +MetaTest/Average/AverageReturn -47.7746 +MetaTest/Average/Iteration 14 +MetaTest/Average/MaxReturn -27.4652 +MetaTest/Average/MinReturn -68.4596 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.5294 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.7746 +MetaTest/__unnamed_task__/AverageReturn -47.7746 +MetaTest/__unnamed_task__/Iteration 14 +MetaTest/__unnamed_task__/MaxReturn -27.4652 +MetaTest/__unnamed_task__/MinReturn -68.4596 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.5294 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 480000 +__unnamed_task__/AverageDiscountedReturn -20.5863 +__unnamed_task__/AverageReturn -46.5944 +__unnamed_task__/Iteration 14 +__unnamed_task__/MaxReturn -8.32939 +__unnamed_task__/MinReturn -85.2561 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.287 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:49:35 | [maml_trainer] epoch #15 | Sampling for adapation and meta-testing... +2025-04-02 15:51:03 | [maml_trainer] epoch #15 | Finished meta-testing... +2025-04-02 15:51:03 | [maml_trainer] epoch #15 | Saving snapshot... +2025-04-02 15:51:23 | [maml_trainer] epoch #15 | Saved +2025-04-02 15:51:23 | [maml_trainer] epoch #15 | Time 7105.23 s +2025-04-02 15:51:23 | [maml_trainer] epoch #15 | EpochTime 450.20 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -20.8721 +Average/AverageReturn -47.2043 +Average/Iteration 15 +Average/MaxReturn -19.9091 +Average/MinReturn -83.5795 +Average/NumEpisodes 80 +Average/StdReturn 12.3718 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9786 +GaussianMLPPolicy/KLAfter 0.00788591 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000247161 +GaussianMLPPolicy/LossBefore 1.42455e-08 +GaussianMLPPolicy/dLoss 0.000247175 +Iteration 15 +MetaTest/Average/AverageDiscountedReturn -48.3999 +MetaTest/Average/AverageReturn -48.3999 +MetaTest/Average/Iteration 15 +MetaTest/Average/MaxReturn -31.6868 +MetaTest/Average/MinReturn -65.1542 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.96446 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.3999 +MetaTest/__unnamed_task__/AverageReturn -48.3999 +MetaTest/__unnamed_task__/Iteration 15 +MetaTest/__unnamed_task__/MaxReturn -31.6868 +MetaTest/__unnamed_task__/MinReturn -65.1542 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.96446 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 512000 +__unnamed_task__/AverageDiscountedReturn -20.8721 +__unnamed_task__/AverageReturn -47.2043 +__unnamed_task__/Iteration 15 +__unnamed_task__/MaxReturn -19.9091 +__unnamed_task__/MinReturn -83.5795 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.3718 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:57:21 | [maml_trainer] epoch #16 | Sampling for adapation and meta-testing... +2025-04-02 15:58:51 | [maml_trainer] epoch #16 | Finished meta-testing... +2025-04-02 15:58:51 | [maml_trainer] epoch #16 | Saving snapshot... +2025-04-02 15:59:12 | [maml_trainer] epoch #16 | Saved +2025-04-02 15:59:12 | [maml_trainer] epoch #16 | Time 7574.78 s +2025-04-02 15:59:12 | [maml_trainer] epoch #16 | EpochTime 469.55 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -20.4886 +Average/AverageReturn -47.724 +Average/Iteration 16 +Average/MaxReturn -11.9902 +Average/MinReturn -80.0682 +Average/NumEpisodes 80 +Average/StdReturn 13.8081 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98077 +GaussianMLPPolicy/KLAfter 0.00783644 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.25503e-05 +GaussianMLPPolicy/LossBefore -1.21295e-08 +GaussianMLPPolicy/dLoss 8.25382e-05 +Iteration 16 +MetaTest/Average/AverageDiscountedReturn -53.7156 +MetaTest/Average/AverageReturn -53.7156 +MetaTest/Average/Iteration 16 +MetaTest/Average/MaxReturn -18.2111 +MetaTest/Average/MinReturn -77.4959 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.8902 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -53.7156 +MetaTest/__unnamed_task__/AverageReturn -53.7156 +MetaTest/__unnamed_task__/Iteration 16 +MetaTest/__unnamed_task__/MaxReturn -18.2111 +MetaTest/__unnamed_task__/MinReturn -77.4959 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.8902 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 544000 +__unnamed_task__/AverageDiscountedReturn -20.4886 +__unnamed_task__/AverageReturn -47.724 +__unnamed_task__/Iteration 16 +__unnamed_task__/MaxReturn -11.9902 +__unnamed_task__/MinReturn -80.0682 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8081 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:04:55 | [maml_trainer] epoch #17 | Sampling for adapation and meta-testing... +2025-04-02 16:06:21 | [maml_trainer] epoch #17 | Finished meta-testing... +2025-04-02 16:06:21 | [maml_trainer] epoch #17 | Saving snapshot... +2025-04-02 16:06:40 | [maml_trainer] epoch #17 | Saved +2025-04-02 16:06:40 | [maml_trainer] epoch #17 | Time 8022.96 s +2025-04-02 16:06:40 | [maml_trainer] epoch #17 | EpochTime 448.18 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.4873 +Average/AverageReturn -49.4591 +Average/Iteration 17 +Average/MaxReturn -14.5515 +Average/MinReturn -88.2621 +Average/NumEpisodes 80 +Average/StdReturn 13.7594 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98279 +GaussianMLPPolicy/KLAfter 0.00358154 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.53847e-05 +GaussianMLPPolicy/LossBefore 9.0003e-09 +GaussianMLPPolicy/dLoss -3.53757e-05 +Iteration 17 +MetaTest/Average/AverageDiscountedReturn -48.118 +MetaTest/Average/AverageReturn -48.118 +MetaTest/Average/Iteration 17 +MetaTest/Average/MaxReturn -25.9397 +MetaTest/Average/MinReturn -83.1821 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.3323 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.118 +MetaTest/__unnamed_task__/AverageReturn -48.118 +MetaTest/__unnamed_task__/Iteration 17 +MetaTest/__unnamed_task__/MaxReturn -25.9397 +MetaTest/__unnamed_task__/MinReturn -83.1821 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.3323 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 576000 +__unnamed_task__/AverageDiscountedReturn -21.4873 +__unnamed_task__/AverageReturn -49.4591 +__unnamed_task__/Iteration 17 +__unnamed_task__/MaxReturn -14.5515 +__unnamed_task__/MinReturn -88.2621 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7594 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:12:20 | [maml_trainer] epoch #18 | Sampling for adapation and meta-testing... +2025-04-02 16:13:44 | [maml_trainer] epoch #18 | Finished meta-testing... +2025-04-02 16:13:44 | [maml_trainer] epoch #18 | Saving snapshot... +2025-04-02 16:14:03 | [maml_trainer] epoch #18 | Saved +2025-04-02 16:14:03 | [maml_trainer] epoch #18 | Time 8465.84 s +2025-04-02 16:14:03 | [maml_trainer] epoch #18 | EpochTime 442.88 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -19.9892 +Average/AverageReturn -47.1975 +Average/Iteration 18 +Average/MaxReturn -5.71725 +Average/MinReturn -84.6149 +Average/NumEpisodes 80 +Average/StdReturn 13.5506 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98288 +GaussianMLPPolicy/KLAfter 0.00741562 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.22289e-05 +GaussianMLPPolicy/LossBefore -6.07967e-09 +GaussianMLPPolicy/dLoss -6.2235e-05 +Iteration 18 +MetaTest/Average/AverageDiscountedReturn -50.3331 +MetaTest/Average/AverageReturn -50.3331 +MetaTest/Average/Iteration 18 +MetaTest/Average/MaxReturn -33.7117 +MetaTest/Average/MinReturn -66.9821 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3601 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -50.3331 +MetaTest/__unnamed_task__/AverageReturn -50.3331 +MetaTest/__unnamed_task__/Iteration 18 +MetaTest/__unnamed_task__/MaxReturn -33.7117 +MetaTest/__unnamed_task__/MinReturn -66.9821 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3601 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 608000 +__unnamed_task__/AverageDiscountedReturn -19.9892 +__unnamed_task__/AverageReturn -47.1975 +__unnamed_task__/Iteration 18 +__unnamed_task__/MaxReturn -5.71725 +__unnamed_task__/MinReturn -84.6149 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.5506 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:19:39 | [maml_trainer] epoch #19 | Sampling for adapation and meta-testing... +2025-04-02 16:21:03 | [maml_trainer] epoch #19 | Finished meta-testing... +2025-04-02 16:21:03 | [maml_trainer] epoch #19 | Saving snapshot... +2025-04-02 16:21:24 | [maml_trainer] epoch #19 | Saved +2025-04-02 16:21:24 | [maml_trainer] epoch #19 | Time 8905.98 s +2025-04-02 16:21:24 | [maml_trainer] epoch #19 | EpochTime 440.14 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.1342 +Average/AverageReturn -51.7012 +Average/Iteration 19 +Average/MaxReturn -23.2172 +Average/MinReturn -84.2754 +Average/NumEpisodes 80 +Average/StdReturn 13.8437 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98431 +GaussianMLPPolicy/KLAfter 0.00616102 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.59737e-05 +GaussianMLPPolicy/LossBefore -2.80142e-09 +GaussianMLPPolicy/dLoss -9.59765e-05 +Iteration 19 +MetaTest/Average/AverageDiscountedReturn -49.6763 +MetaTest/Average/AverageReturn -49.6763 +MetaTest/Average/Iteration 19 +MetaTest/Average/MaxReturn -18.3285 +MetaTest/Average/MinReturn -81.7329 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.6254 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -49.6763 +MetaTest/__unnamed_task__/AverageReturn -49.6763 +MetaTest/__unnamed_task__/Iteration 19 +MetaTest/__unnamed_task__/MaxReturn -18.3285 +MetaTest/__unnamed_task__/MinReturn -81.7329 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.6254 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 640000 +__unnamed_task__/AverageDiscountedReturn -21.1342 +__unnamed_task__/AverageReturn -51.7012 +__unnamed_task__/Iteration 19 +__unnamed_task__/MaxReturn -23.2172 +__unnamed_task__/MinReturn -84.2754 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8437 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:27:36 | [maml_trainer] epoch #20 | Sampling for adapation and meta-testing... +2025-04-02 16:29:39 | [maml_trainer] epoch #20 | Finished meta-testing... +2025-04-02 16:29:39 | [maml_trainer] epoch #20 | Saving snapshot... +2025-04-02 16:30:07 | [maml_trainer] epoch #20 | Saved +2025-04-02 16:30:07 | [maml_trainer] epoch #20 | Time 9429.36 s +2025-04-02 16:30:07 | [maml_trainer] epoch #20 | EpochTime 523.37 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -20.4741 +Average/AverageReturn -49.8087 +Average/Iteration 20 +Average/MaxReturn -14.8797 +Average/MinReturn -79.3515 +Average/NumEpisodes 80 +Average/StdReturn 14.4902 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98573 +GaussianMLPPolicy/KLAfter 0.00586922 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.50178e-05 +GaussianMLPPolicy/LossBefore -2.8491e-08 +GaussianMLPPolicy/dLoss 1.49893e-05 +Iteration 20 +MetaTest/Average/AverageDiscountedReturn -57.8758 +MetaTest/Average/AverageReturn -57.8758 +MetaTest/Average/Iteration 20 +MetaTest/Average/MaxReturn -31.6822 +MetaTest/Average/MinReturn -98.2409 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.233 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -57.8758 +MetaTest/__unnamed_task__/AverageReturn -57.8758 +MetaTest/__unnamed_task__/Iteration 20 +MetaTest/__unnamed_task__/MaxReturn -31.6822 +MetaTest/__unnamed_task__/MinReturn -98.2409 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.233 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 672000 +__unnamed_task__/AverageDiscountedReturn -20.4741 +__unnamed_task__/AverageReturn -49.8087 +__unnamed_task__/Iteration 20 +__unnamed_task__/MaxReturn -14.8797 +__unnamed_task__/MinReturn -79.3515 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.4902 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:36:54 | [maml_trainer] epoch #21 | Sampling for adapation and meta-testing... +2025-04-02 16:38:23 | [maml_trainer] epoch #21 | Finished meta-testing... +2025-04-02 16:38:23 | [maml_trainer] epoch #21 | Saving snapshot... +2025-04-02 16:38:44 | [maml_trainer] epoch #21 | Saved +2025-04-02 16:38:44 | [maml_trainer] epoch #21 | Time 9946.20 s +2025-04-02 16:38:44 | [maml_trainer] epoch #21 | EpochTime 516.84 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.8769 +Average/AverageReturn -54.6404 +Average/Iteration 21 +Average/MaxReturn -25.3388 +Average/MinReturn -86.2781 +Average/NumEpisodes 80 +Average/StdReturn 15.1089 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98798 +GaussianMLPPolicy/KLAfter 0.00540618 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000125856 +GaussianMLPPolicy/LossBefore 7.92742e-09 +GaussianMLPPolicy/dLoss -0.000125848 +Iteration 21 +MetaTest/Average/AverageDiscountedReturn -62.6328 +MetaTest/Average/AverageReturn -62.6328 +MetaTest/Average/Iteration 21 +MetaTest/Average/MaxReturn -33.118 +MetaTest/Average/MinReturn -99.7727 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.9384 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -62.6328 +MetaTest/__unnamed_task__/AverageReturn -62.6328 +MetaTest/__unnamed_task__/Iteration 21 +MetaTest/__unnamed_task__/MaxReturn -33.118 +MetaTest/__unnamed_task__/MinReturn -99.7727 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.9384 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 704000 +__unnamed_task__/AverageDiscountedReturn -21.8769 +__unnamed_task__/AverageReturn -54.6404 +__unnamed_task__/Iteration 21 +__unnamed_task__/MaxReturn -25.3388 +__unnamed_task__/MinReturn -86.2781 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.1089 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:44:42 | [maml_trainer] epoch #22 | Sampling for adapation and meta-testing... +2025-04-02 16:46:15 | [maml_trainer] epoch #22 | Finished meta-testing... +2025-04-02 16:46:15 | [maml_trainer] epoch #22 | Saving snapshot... +2025-04-02 16:46:36 | [maml_trainer] epoch #22 | Saved +2025-04-02 16:46:36 | [maml_trainer] epoch #22 | Time 10418.41 s +2025-04-02 16:46:36 | [maml_trainer] epoch #22 | EpochTime 472.20 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.0592 +Average/AverageReturn -53.3162 +Average/Iteration 22 +Average/MaxReturn -16.2946 +Average/MinReturn -86.9996 +Average/NumEpisodes 80 +Average/StdReturn 17.0866 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98927 +GaussianMLPPolicy/KLAfter 0.0101861 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.00344e-05 +GaussianMLPPolicy/LossBefore -9.47714e-09 +GaussianMLPPolicy/dLoss -2.00439e-05 +Iteration 22 +MetaTest/Average/AverageDiscountedReturn -52.3263 +MetaTest/Average/AverageReturn -52.3263 +MetaTest/Average/Iteration 22 +MetaTest/Average/MaxReturn -6.16666 +MetaTest/Average/MinReturn -95.7249 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.8507 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -52.3263 +MetaTest/__unnamed_task__/AverageReturn -52.3263 +MetaTest/__unnamed_task__/Iteration 22 +MetaTest/__unnamed_task__/MaxReturn -6.16666 +MetaTest/__unnamed_task__/MinReturn -95.7249 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.8507 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 736000 +__unnamed_task__/AverageDiscountedReturn -21.0592 +__unnamed_task__/AverageReturn -53.3162 +__unnamed_task__/Iteration 22 +__unnamed_task__/MaxReturn -16.2946 +__unnamed_task__/MinReturn -86.9996 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.0866 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:52:39 | [maml_trainer] epoch #23 | Sampling for adapation and meta-testing... +2025-04-02 16:54:10 | [maml_trainer] epoch #23 | Finished meta-testing... +2025-04-02 16:54:10 | [maml_trainer] epoch #23 | Saving snapshot... +2025-04-02 16:54:31 | [maml_trainer] epoch #23 | Saved +2025-04-02 16:54:31 | [maml_trainer] epoch #23 | Time 10893.22 s +2025-04-02 16:54:31 | [maml_trainer] epoch #23 | EpochTime 474.81 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.5016 +Average/AverageReturn -55.0622 +Average/Iteration 23 +Average/MaxReturn -5.55675 +Average/MinReturn -95.1882 +Average/NumEpisodes 80 +Average/StdReturn 17.1435 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98991 +GaussianMLPPolicy/KLAfter 0.00793389 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000166197 +GaussianMLPPolicy/LossBefore 4.70877e-09 +GaussianMLPPolicy/dLoss -0.000166192 +Iteration 23 +MetaTest/Average/AverageDiscountedReturn -55.298 +MetaTest/Average/AverageReturn -55.298 +MetaTest/Average/Iteration 23 +MetaTest/Average/MaxReturn -27.2043 +MetaTest/Average/MinReturn -94.044 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.9316 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -55.298 +MetaTest/__unnamed_task__/AverageReturn -55.298 +MetaTest/__unnamed_task__/Iteration 23 +MetaTest/__unnamed_task__/MaxReturn -27.2043 +MetaTest/__unnamed_task__/MinReturn -94.044 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.9316 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 768000 +__unnamed_task__/AverageDiscountedReturn -21.5016 +__unnamed_task__/AverageReturn -55.0622 +__unnamed_task__/Iteration 23 +__unnamed_task__/MaxReturn -5.55675 +__unnamed_task__/MinReturn -95.1882 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.1435 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:00:34 | [maml_trainer] epoch #24 | Sampling for adapation and meta-testing... +2025-04-02 17:02:05 | [maml_trainer] epoch #24 | Finished meta-testing... +2025-04-02 17:02:05 | [maml_trainer] epoch #24 | Saving snapshot... +2025-04-02 17:02:25 | [maml_trainer] epoch #24 | Saved +2025-04-02 17:02:25 | [maml_trainer] epoch #24 | Time 11367.39 s +2025-04-02 17:02:25 | [maml_trainer] epoch #24 | EpochTime 474.17 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -21.9132 +Average/AverageReturn -56.6954 +Average/Iteration 24 +Average/MaxReturn 0.517216 +Average/MinReturn -94.4109 +Average/NumEpisodes 80 +Average/StdReturn 18.4242 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99128 +GaussianMLPPolicy/KLAfter 0.00964961 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000113122 +GaussianMLPPolicy/LossBefore -2.31266e-08 +GaussianMLPPolicy/dLoss 0.000113098 +Iteration 24 +MetaTest/Average/AverageDiscountedReturn -56.3885 +MetaTest/Average/AverageReturn -56.3885 +MetaTest/Average/Iteration 24 +MetaTest/Average/MaxReturn -31.4536 +MetaTest/Average/MinReturn -91.8113 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.8316 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -56.3885 +MetaTest/__unnamed_task__/AverageReturn -56.3885 +MetaTest/__unnamed_task__/Iteration 24 +MetaTest/__unnamed_task__/MaxReturn -31.4536 +MetaTest/__unnamed_task__/MinReturn -91.8113 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.8316 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 800000 +__unnamed_task__/AverageDiscountedReturn -21.9132 +__unnamed_task__/AverageReturn -56.6954 +__unnamed_task__/Iteration 24 +__unnamed_task__/MaxReturn 0.517216 +__unnamed_task__/MinReturn -94.4109 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.4242 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:08:22 | [maml_trainer] epoch #25 | Sampling for adapation and meta-testing... +2025-04-02 17:09:46 | [maml_trainer] epoch #25 | Finished meta-testing... +2025-04-02 17:09:46 | [maml_trainer] epoch #25 | Saving snapshot... +2025-04-02 17:10:08 | [maml_trainer] epoch #25 | Saved +2025-04-02 17:10:08 | [maml_trainer] epoch #25 | Time 11830.16 s +2025-04-02 17:10:08 | [maml_trainer] epoch #25 | EpochTime 462.77 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -20.9843 +Average/AverageReturn -54.9349 +Average/Iteration 25 +Average/MaxReturn -21.1848 +Average/MinReturn -95.1646 +Average/NumEpisodes 80 +Average/StdReturn 16.4727 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99405 +GaussianMLPPolicy/KLAfter 0.00842431 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.92002e-05 +GaussianMLPPolicy/LossBefore 7.95722e-09 +GaussianMLPPolicy/dLoss 6.92082e-05 +Iteration 25 +MetaTest/Average/AverageDiscountedReturn -45.4068 +MetaTest/Average/AverageReturn -45.4068 +MetaTest/Average/Iteration 25 +MetaTest/Average/MaxReturn 20.5466 +MetaTest/Average/MinReturn -71.3081 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.4979 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.4068 +MetaTest/__unnamed_task__/AverageReturn -45.4068 +MetaTest/__unnamed_task__/Iteration 25 +MetaTest/__unnamed_task__/MaxReturn 20.5466 +MetaTest/__unnamed_task__/MinReturn -71.3081 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.4979 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 832000 +__unnamed_task__/AverageDiscountedReturn -20.9843 +__unnamed_task__/AverageReturn -54.9349 +__unnamed_task__/Iteration 25 +__unnamed_task__/MaxReturn -21.1848 +__unnamed_task__/MinReturn -95.1646 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.4727 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:16:09 | [maml_trainer] epoch #26 | Sampling for adapation and meta-testing... +2025-04-02 17:17:39 | [maml_trainer] epoch #26 | Finished meta-testing... +2025-04-02 17:17:39 | [maml_trainer] epoch #26 | Saving snapshot... +2025-04-02 17:18:01 | [maml_trainer] epoch #26 | Saved +2025-04-02 17:18:01 | [maml_trainer] epoch #26 | Time 12303.55 s +2025-04-02 17:18:01 | [maml_trainer] epoch #26 | EpochTime 473.38 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -20.978 +Average/AverageReturn -53.6062 +Average/Iteration 26 +Average/MaxReturn 23.3987 +Average/MinReturn -97.5123 +Average/NumEpisodes 80 +Average/StdReturn 18.9487 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99562 +GaussianMLPPolicy/KLAfter 0.00611855 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.00022539 +GaussianMLPPolicy/LossBefore 2.66284e-08 +GaussianMLPPolicy/dLoss 0.000225417 +Iteration 26 +MetaTest/Average/AverageDiscountedReturn -40.4543 +MetaTest/Average/AverageReturn -40.4543 +MetaTest/Average/Iteration 26 +MetaTest/Average/MaxReturn -4.08434 +MetaTest/Average/MinReturn -67.3082 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.5428 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.4543 +MetaTest/__unnamed_task__/AverageReturn -40.4543 +MetaTest/__unnamed_task__/Iteration 26 +MetaTest/__unnamed_task__/MaxReturn -4.08434 +MetaTest/__unnamed_task__/MinReturn -67.3082 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.5428 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 864000 +__unnamed_task__/AverageDiscountedReturn -20.978 +__unnamed_task__/AverageReturn -53.6062 +__unnamed_task__/Iteration 26 +__unnamed_task__/MaxReturn 23.3987 +__unnamed_task__/MinReturn -97.5123 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.9487 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:24:20 | [maml_trainer] epoch #27 | Sampling for adapation and meta-testing... +2025-04-02 17:25:52 | [maml_trainer] epoch #27 | Finished meta-testing... +2025-04-02 17:25:52 | [maml_trainer] epoch #27 | Saving snapshot... +2025-04-02 17:26:12 | [maml_trainer] epoch #27 | Saved +2025-04-02 17:26:12 | [maml_trainer] epoch #27 | Time 12794.57 s +2025-04-02 17:26:12 | [maml_trainer] epoch #27 | EpochTime 491.01 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -19.8947 +Average/AverageReturn -48.6148 +Average/Iteration 27 +Average/MaxReturn -9.00478 +Average/MinReturn -81.0772 +Average/NumEpisodes 80 +Average/StdReturn 13.8745 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99663 +GaussianMLPPolicy/KLAfter 0.00612761 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.4546e-05 +GaussianMLPPolicy/LossBefore -1.71959e-08 +GaussianMLPPolicy/dLoss 8.45288e-05 +Iteration 27 +MetaTest/Average/AverageDiscountedReturn -48.4645 +MetaTest/Average/AverageReturn -48.4645 +MetaTest/Average/Iteration 27 +MetaTest/Average/MaxReturn 0.254689 +MetaTest/Average/MinReturn -77.7094 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.7466 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.4645 +MetaTest/__unnamed_task__/AverageReturn -48.4645 +MetaTest/__unnamed_task__/Iteration 27 +MetaTest/__unnamed_task__/MaxReturn 0.254689 +MetaTest/__unnamed_task__/MinReturn -77.7094 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.7466 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 896000 +__unnamed_task__/AverageDiscountedReturn -19.8947 +__unnamed_task__/AverageReturn -48.6148 +__unnamed_task__/Iteration 27 +__unnamed_task__/MaxReturn -9.00478 +__unnamed_task__/MinReturn -81.0772 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8745 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:32:16 | [maml_trainer] epoch #28 | Sampling for adapation and meta-testing... +2025-04-02 17:33:45 | [maml_trainer] epoch #28 | Finished meta-testing... +2025-04-02 17:33:45 | [maml_trainer] epoch #28 | Saving snapshot... +2025-04-02 17:34:05 | [maml_trainer] epoch #28 | Saved +2025-04-02 17:34:05 | [maml_trainer] epoch #28 | Time 13267.84 s +2025-04-02 17:34:05 | [maml_trainer] epoch #28 | EpochTime 473.27 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -19.6976 +Average/AverageReturn -46.637 +Average/Iteration 28 +Average/MaxReturn -17.2486 +Average/MinReturn -90.0363 +Average/NumEpisodes 80 +Average/StdReturn 15.0707 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9975 +GaussianMLPPolicy/KLAfter 0.00648991 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.62359e-05 +GaussianMLPPolicy/LossBefore -8.88109e-09 +GaussianMLPPolicy/dLoss -1.62448e-05 +Iteration 28 +MetaTest/Average/AverageDiscountedReturn -45.3535 +MetaTest/Average/AverageReturn -45.3535 +MetaTest/Average/Iteration 28 +MetaTest/Average/MaxReturn -24.0665 +MetaTest/Average/MinReturn -77.7647 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.8217 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.3535 +MetaTest/__unnamed_task__/AverageReturn -45.3535 +MetaTest/__unnamed_task__/Iteration 28 +MetaTest/__unnamed_task__/MaxReturn -24.0665 +MetaTest/__unnamed_task__/MinReturn -77.7647 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.8217 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 928000 +__unnamed_task__/AverageDiscountedReturn -19.6976 +__unnamed_task__/AverageReturn -46.637 +__unnamed_task__/Iteration 28 +__unnamed_task__/MaxReturn -17.2486 +__unnamed_task__/MinReturn -90.0363 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.0707 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:40:02 | [maml_trainer] epoch #29 | Sampling for adapation and meta-testing... +2025-04-02 17:41:33 | [maml_trainer] epoch #29 | Finished meta-testing... +2025-04-02 17:41:33 | [maml_trainer] epoch #29 | Saving snapshot... +2025-04-02 17:41:55 | [maml_trainer] epoch #29 | Saved +2025-04-02 17:41:55 | [maml_trainer] epoch #29 | Time 13737.39 s +2025-04-02 17:41:55 | [maml_trainer] epoch #29 | EpochTime 469.55 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -18.4245 +Average/AverageReturn -42.3334 +Average/Iteration 29 +Average/MaxReturn 2.71932 +Average/MinReturn -81.4739 +Average/NumEpisodes 80 +Average/StdReturn 15.9254 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99869 +GaussianMLPPolicy/KLAfter 0.00747835 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.76563e-06 +GaussianMLPPolicy/LossBefore 8.49366e-09 +GaussianMLPPolicy/dLoss 9.77412e-06 +Iteration 29 +MetaTest/Average/AverageDiscountedReturn -38.0101 +MetaTest/Average/AverageReturn -38.0101 +MetaTest/Average/Iteration 29 +MetaTest/Average/MaxReturn -11.8788 +MetaTest/Average/MinReturn -75.8875 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.5926 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.0101 +MetaTest/__unnamed_task__/AverageReturn -38.0101 +MetaTest/__unnamed_task__/Iteration 29 +MetaTest/__unnamed_task__/MaxReturn -11.8788 +MetaTest/__unnamed_task__/MinReturn -75.8875 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.5926 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 960000 +__unnamed_task__/AverageDiscountedReturn -18.4245 +__unnamed_task__/AverageReturn -42.3334 +__unnamed_task__/Iteration 29 +__unnamed_task__/MaxReturn 2.71932 +__unnamed_task__/MinReturn -81.4739 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.9254 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:48:00 | [maml_trainer] epoch #30 | Sampling for adapation and meta-testing... +2025-04-02 17:49:32 | [maml_trainer] epoch #30 | Finished meta-testing... +2025-04-02 17:49:32 | [maml_trainer] epoch #30 | Saving snapshot... +2025-04-02 17:49:53 | [maml_trainer] epoch #30 | Saved +2025-04-02 17:49:53 | [maml_trainer] epoch #30 | Time 14215.47 s +2025-04-02 17:49:53 | [maml_trainer] epoch #30 | EpochTime 478.07 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -18.1606 +Average/AverageReturn -41.1763 +Average/Iteration 30 +Average/MaxReturn -9.82888 +Average/MinReturn -79.2711 +Average/NumEpisodes 80 +Average/StdReturn 12.3334 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99939 +GaussianMLPPolicy/KLAfter 0.00445089 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.46949e-05 +GaussianMLPPolicy/LossBefore -1.5825e-08 +GaussianMLPPolicy/dLoss -8.47107e-05 +Iteration 30 +MetaTest/Average/AverageDiscountedReturn -41.674 +MetaTest/Average/AverageReturn -41.674 +MetaTest/Average/Iteration 30 +MetaTest/Average/MaxReturn -23.776 +MetaTest/Average/MinReturn -72.3616 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.7033 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.674 +MetaTest/__unnamed_task__/AverageReturn -41.674 +MetaTest/__unnamed_task__/Iteration 30 +MetaTest/__unnamed_task__/MaxReturn -23.776 +MetaTest/__unnamed_task__/MinReturn -72.3616 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.7033 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 992000 +__unnamed_task__/AverageDiscountedReturn -18.1606 +__unnamed_task__/AverageReturn -41.1763 +__unnamed_task__/Iteration 30 +__unnamed_task__/MaxReturn -9.82888 +__unnamed_task__/MinReturn -79.2711 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.3334 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 17:55:58 | [maml_trainer] epoch #31 | Sampling for adapation and meta-testing... +2025-04-02 17:57:29 | [maml_trainer] epoch #31 | Finished meta-testing... +2025-04-02 17:57:29 | [maml_trainer] epoch #31 | Saving snapshot... +2025-04-02 17:57:51 | [maml_trainer] epoch #31 | Saved +2025-04-02 17:57:51 | [maml_trainer] epoch #31 | Time 14693.45 s +2025-04-02 17:57:51 | [maml_trainer] epoch #31 | EpochTime 477.98 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.8073 +Average/AverageReturn -42.7406 +Average/Iteration 31 +Average/MaxReturn -13.6966 +Average/MinReturn -77.8376 +Average/NumEpisodes 80 +Average/StdReturn 14.3799 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0011 +GaussianMLPPolicy/KLAfter 0.00411553 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.71134e-05 +GaussianMLPPolicy/LossBefore -2.23517e-09 +GaussianMLPPolicy/dLoss -1.71157e-05 +Iteration 31 +MetaTest/Average/AverageDiscountedReturn -42.2444 +MetaTest/Average/AverageReturn -42.2444 +MetaTest/Average/Iteration 31 +MetaTest/Average/MaxReturn -7.00079 +MetaTest/Average/MinReturn -92.4289 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.2952 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.2444 +MetaTest/__unnamed_task__/AverageReturn -42.2444 +MetaTest/__unnamed_task__/Iteration 31 +MetaTest/__unnamed_task__/MaxReturn -7.00079 +MetaTest/__unnamed_task__/MinReturn -92.4289 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.2952 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.024e+06 +__unnamed_task__/AverageDiscountedReturn -18.8073 +__unnamed_task__/AverageReturn -42.7406 +__unnamed_task__/Iteration 31 +__unnamed_task__/MaxReturn -13.6966 +__unnamed_task__/MinReturn -77.8376 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.3799 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:03:59 | [maml_trainer] epoch #32 | Sampling for adapation and meta-testing... +2025-04-02 18:05:32 | [maml_trainer] epoch #32 | Finished meta-testing... +2025-04-02 18:05:32 | [maml_trainer] epoch #32 | Saving snapshot... +2025-04-02 18:05:53 | [maml_trainer] epoch #32 | Saved +2025-04-02 18:05:53 | [maml_trainer] epoch #32 | Time 15175.64 s +2025-04-02 18:05:53 | [maml_trainer] epoch #32 | EpochTime 482.19 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.8578 +Average/AverageReturn -43.0394 +Average/Iteration 32 +Average/MaxReturn -17.8346 +Average/MinReturn -82.1993 +Average/NumEpisodes 80 +Average/StdReturn 12.6271 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0007 +GaussianMLPPolicy/KLAfter 0.00582217 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.38794e-06 +GaussianMLPPolicy/LossBefore 2.11596e-09 +GaussianMLPPolicy/dLoss -7.38582e-06 +Iteration 32 +MetaTest/Average/AverageDiscountedReturn -38.9374 +MetaTest/Average/AverageReturn -38.9374 +MetaTest/Average/Iteration 32 +MetaTest/Average/MaxReturn -18.7136 +MetaTest/Average/MinReturn -52.348 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.4078 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.9374 +MetaTest/__unnamed_task__/AverageReturn -38.9374 +MetaTest/__unnamed_task__/Iteration 32 +MetaTest/__unnamed_task__/MaxReturn -18.7136 +MetaTest/__unnamed_task__/MinReturn -52.348 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.4078 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.056e+06 +__unnamed_task__/AverageDiscountedReturn -18.8578 +__unnamed_task__/AverageReturn -43.0394 +__unnamed_task__/Iteration 32 +__unnamed_task__/MaxReturn -17.8346 +__unnamed_task__/MinReturn -82.1993 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.6271 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:11:59 | [maml_trainer] epoch #33 | Sampling for adapation and meta-testing... +2025-04-02 18:13:31 | [maml_trainer] epoch #33 | Finished meta-testing... +2025-04-02 18:13:31 | [maml_trainer] epoch #33 | Saving snapshot... +2025-04-02 18:13:51 | [maml_trainer] epoch #33 | Saved +2025-04-02 18:13:51 | [maml_trainer] epoch #33 | Time 15653.70 s +2025-04-02 18:13:51 | [maml_trainer] epoch #33 | EpochTime 478.05 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.8317 +Average/AverageReturn -41.9216 +Average/Iteration 33 +Average/MaxReturn -7.29547 +Average/MinReturn -68.2222 +Average/NumEpisodes 80 +Average/StdReturn 11.4731 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0013 +GaussianMLPPolicy/KLAfter 0.00816652 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000205678 +GaussianMLPPolicy/LossBefore 1.81794e-09 +GaussianMLPPolicy/dLoss 0.00020568 +Iteration 33 +MetaTest/Average/AverageDiscountedReturn -42.8248 +MetaTest/Average/AverageReturn -42.8248 +MetaTest/Average/Iteration 33 +MetaTest/Average/MaxReturn -21.289 +MetaTest/Average/MinReturn -65.3199 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.8921 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.8248 +MetaTest/__unnamed_task__/AverageReturn -42.8248 +MetaTest/__unnamed_task__/Iteration 33 +MetaTest/__unnamed_task__/MaxReturn -21.289 +MetaTest/__unnamed_task__/MinReturn -65.3199 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.8921 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.088e+06 +__unnamed_task__/AverageDiscountedReturn -18.8317 +__unnamed_task__/AverageReturn -41.9216 +__unnamed_task__/Iteration 33 +__unnamed_task__/MaxReturn -7.29547 +__unnamed_task__/MinReturn -68.2222 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4731 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:19:55 | [maml_trainer] epoch #34 | Sampling for adapation and meta-testing... +2025-04-02 18:21:27 | [maml_trainer] epoch #34 | Finished meta-testing... +2025-04-02 18:21:27 | [maml_trainer] epoch #34 | Saving snapshot... +2025-04-02 18:21:49 | [maml_trainer] epoch #34 | Saved +2025-04-02 18:21:49 | [maml_trainer] epoch #34 | Time 16131.30 s +2025-04-02 18:21:49 | [maml_trainer] epoch #34 | EpochTime 477.59 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.2514 +Average/AverageReturn -43.2864 +Average/Iteration 34 +Average/MaxReturn -16.2884 +Average/MinReturn -91.3163 +Average/NumEpisodes 80 +Average/StdReturn 14.4985 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0013 +GaussianMLPPolicy/KLAfter 0.00578484 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.58118e-05 +GaussianMLPPolicy/LossBefore -1.45137e-08 +GaussianMLPPolicy/dLoss -8.58263e-05 +Iteration 34 +MetaTest/Average/AverageDiscountedReturn -33.9359 +MetaTest/Average/AverageReturn -33.9359 +MetaTest/Average/Iteration 34 +MetaTest/Average/MaxReturn -5.12424 +MetaTest/Average/MinReturn -65.3005 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.0781 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.9359 +MetaTest/__unnamed_task__/AverageReturn -33.9359 +MetaTest/__unnamed_task__/Iteration 34 +MetaTest/__unnamed_task__/MaxReturn -5.12424 +MetaTest/__unnamed_task__/MinReturn -65.3005 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.0781 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.12e+06 +__unnamed_task__/AverageDiscountedReturn -19.2514 +__unnamed_task__/AverageReturn -43.2864 +__unnamed_task__/Iteration 34 +__unnamed_task__/MaxReturn -16.2884 +__unnamed_task__/MinReturn -91.3163 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.4985 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:27:49 | [maml_trainer] epoch #35 | Sampling for adapation and meta-testing... +2025-04-02 18:29:19 | [maml_trainer] epoch #35 | Finished meta-testing... +2025-04-02 18:29:19 | [maml_trainer] epoch #35 | Saving snapshot... +2025-04-02 18:29:41 | [maml_trainer] epoch #35 | Saved +2025-04-02 18:29:41 | [maml_trainer] epoch #35 | Time 16603.30 s +2025-04-02 18:29:41 | [maml_trainer] epoch #35 | EpochTime 472.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.3884 +Average/AverageReturn -44.8496 +Average/Iteration 35 +Average/MaxReturn 14.2588 +Average/MinReturn -81.3204 +Average/NumEpisodes 80 +Average/StdReturn 15.4006 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0015 +GaussianMLPPolicy/KLAfter 0.00522996 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.33016e-05 +GaussianMLPPolicy/LossBefore 1.19805e-08 +GaussianMLPPolicy/dLoss -5.32896e-05 +Iteration 35 +MetaTest/Average/AverageDiscountedReturn -43.4544 +MetaTest/Average/AverageReturn -43.4544 +MetaTest/Average/Iteration 35 +MetaTest/Average/MaxReturn -12.1476 +MetaTest/Average/MinReturn -79.8924 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.1353 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.4544 +MetaTest/__unnamed_task__/AverageReturn -43.4544 +MetaTest/__unnamed_task__/Iteration 35 +MetaTest/__unnamed_task__/MaxReturn -12.1476 +MetaTest/__unnamed_task__/MinReturn -79.8924 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.1353 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.152e+06 +__unnamed_task__/AverageDiscountedReturn -19.3884 +__unnamed_task__/AverageReturn -44.8496 +__unnamed_task__/Iteration 35 +__unnamed_task__/MaxReturn 14.2588 +__unnamed_task__/MinReturn -81.3204 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.4006 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:35:49 | [maml_trainer] epoch #36 | Sampling for adapation and meta-testing... +2025-04-02 18:37:20 | [maml_trainer] epoch #36 | Finished meta-testing... +2025-04-02 18:37:20 | [maml_trainer] epoch #36 | Saving snapshot... +2025-04-02 18:37:40 | [maml_trainer] epoch #36 | Saved +2025-04-02 18:37:40 | [maml_trainer] epoch #36 | Time 17082.37 s +2025-04-02 18:37:40 | [maml_trainer] epoch #36 | EpochTime 479.06 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.5525 +Average/AverageReturn -44.8012 +Average/Iteration 36 +Average/MaxReturn 6.09308 +Average/MinReturn -81.137 +Average/NumEpisodes 80 +Average/StdReturn 16.6554 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0018 +GaussianMLPPolicy/KLAfter 0.00241309 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000133774 +GaussianMLPPolicy/LossBefore 2.98024e-11 +GaussianMLPPolicy/dLoss 0.000133774 +Iteration 36 +MetaTest/Average/AverageDiscountedReturn -43.6124 +MetaTest/Average/AverageReturn -43.6124 +MetaTest/Average/Iteration 36 +MetaTest/Average/MaxReturn -16.9561 +MetaTest/Average/MinReturn -85.1516 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.2254 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.6124 +MetaTest/__unnamed_task__/AverageReturn -43.6124 +MetaTest/__unnamed_task__/Iteration 36 +MetaTest/__unnamed_task__/MaxReturn -16.9561 +MetaTest/__unnamed_task__/MinReturn -85.1516 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.2254 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.184e+06 +__unnamed_task__/AverageDiscountedReturn -19.5525 +__unnamed_task__/AverageReturn -44.8012 +__unnamed_task__/Iteration 36 +__unnamed_task__/MaxReturn 6.09308 +__unnamed_task__/MinReturn -81.137 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.6554 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:43:42 | [maml_trainer] epoch #37 | Sampling for adapation and meta-testing... +2025-04-02 18:45:13 | [maml_trainer] epoch #37 | Finished meta-testing... +2025-04-02 18:45:13 | [maml_trainer] epoch #37 | Saving snapshot... +2025-04-02 18:45:35 | [maml_trainer] epoch #37 | Saved +2025-04-02 18:45:35 | [maml_trainer] epoch #37 | Time 17557.14 s +2025-04-02 18:45:35 | [maml_trainer] epoch #37 | EpochTime 474.78 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.5405 +Average/AverageReturn -45.3016 +Average/Iteration 37 +Average/MaxReturn -15.4621 +Average/MinReturn -87.0647 +Average/NumEpisodes 80 +Average/StdReturn 15.4132 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0018 +GaussianMLPPolicy/KLAfter 0.00192757 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.6965e-05 +GaussianMLPPolicy/LossBefore 7.12276e-09 +GaussianMLPPolicy/dLoss -1.69579e-05 +Iteration 37 +MetaTest/Average/AverageDiscountedReturn -44.646 +MetaTest/Average/AverageReturn -44.646 +MetaTest/Average/Iteration 37 +MetaTest/Average/MaxReturn -13.4186 +MetaTest/Average/MinReturn -70.8636 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.6465 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -44.646 +MetaTest/__unnamed_task__/AverageReturn -44.646 +MetaTest/__unnamed_task__/Iteration 37 +MetaTest/__unnamed_task__/MaxReturn -13.4186 +MetaTest/__unnamed_task__/MinReturn -70.8636 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.6465 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.216e+06 +__unnamed_task__/AverageDiscountedReturn -19.5405 +__unnamed_task__/AverageReturn -45.3016 +__unnamed_task__/Iteration 37 +__unnamed_task__/MaxReturn -15.4621 +__unnamed_task__/MinReturn -87.0647 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.4132 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:51:41 | [maml_trainer] epoch #38 | Sampling for adapation and meta-testing... +2025-04-02 18:53:10 | [maml_trainer] epoch #38 | Finished meta-testing... +2025-04-02 18:53:10 | [maml_trainer] epoch #38 | Saving snapshot... +2025-04-02 18:53:30 | [maml_trainer] epoch #38 | Saved +2025-04-02 18:53:30 | [maml_trainer] epoch #38 | Time 18032.43 s +2025-04-02 18:53:30 | [maml_trainer] epoch #38 | EpochTime 475.29 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.6339 +Average/AverageReturn -46.6624 +Average/Iteration 38 +Average/MaxReturn -20.1083 +Average/MinReturn -84.4959 +Average/NumEpisodes 80 +Average/StdReturn 14.2857 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.002 +GaussianMLPPolicy/KLAfter 0.00203737 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.42455e-06 +GaussianMLPPolicy/LossBefore 1.68085e-08 +GaussianMLPPolicy/dLoss 6.44135e-06 +Iteration 38 +MetaTest/Average/AverageDiscountedReturn -43.4401 +MetaTest/Average/AverageReturn -43.4401 +MetaTest/Average/Iteration 38 +MetaTest/Average/MaxReturn -12.1508 +MetaTest/Average/MinReturn -76.3952 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.8531 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.4401 +MetaTest/__unnamed_task__/AverageReturn -43.4401 +MetaTest/__unnamed_task__/Iteration 38 +MetaTest/__unnamed_task__/MaxReturn -12.1508 +MetaTest/__unnamed_task__/MinReturn -76.3952 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.8531 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.248e+06 +__unnamed_task__/AverageDiscountedReturn -19.6339 +__unnamed_task__/AverageReturn -46.6624 +__unnamed_task__/Iteration 38 +__unnamed_task__/MaxReturn -20.1083 +__unnamed_task__/MinReturn -84.4959 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.2857 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 18:59:34 | [maml_trainer] epoch #39 | Sampling for adapation and meta-testing... +2025-04-02 19:01:06 | [maml_trainer] epoch #39 | Finished meta-testing... +2025-04-02 19:01:06 | [maml_trainer] epoch #39 | Saving snapshot... +2025-04-02 19:01:28 | [maml_trainer] epoch #39 | Saved +2025-04-02 19:01:28 | [maml_trainer] epoch #39 | Time 18510.49 s +2025-04-02 19:01:28 | [maml_trainer] epoch #39 | EpochTime 478.06 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.7644 +Average/AverageReturn -45.7446 +Average/Iteration 39 +Average/MaxReturn -1.00908 +Average/MinReturn -89.8427 +Average/NumEpisodes 80 +Average/StdReturn 14.6905 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0024 +GaussianMLPPolicy/KLAfter 0.00145559 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000161865 +GaussianMLPPolicy/LossBefore -4.85778e-09 +GaussianMLPPolicy/dLoss -0.00016187 +Iteration 39 +MetaTest/Average/AverageDiscountedReturn -39.8745 +MetaTest/Average/AverageReturn -39.8745 +MetaTest/Average/Iteration 39 +MetaTest/Average/MaxReturn -25.6001 +MetaTest/Average/MinReturn -62.8462 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.81777 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.8745 +MetaTest/__unnamed_task__/AverageReturn -39.8745 +MetaTest/__unnamed_task__/Iteration 39 +MetaTest/__unnamed_task__/MaxReturn -25.6001 +MetaTest/__unnamed_task__/MinReturn -62.8462 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.81777 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.28e+06 +__unnamed_task__/AverageDiscountedReturn -19.7644 +__unnamed_task__/AverageReturn -45.7446 +__unnamed_task__/Iteration 39 +__unnamed_task__/MaxReturn -1.00908 +__unnamed_task__/MinReturn -89.8427 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.6905 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 19:07:33 | [maml_trainer] epoch #40 | Sampling for adapation and meta-testing... +2025-04-02 19:09:01 | [maml_trainer] epoch #40 | Finished meta-testing... +2025-04-02 19:09:01 | [maml_trainer] epoch #40 | Saving snapshot... +2025-04-02 19:09:22 | [maml_trainer] epoch #40 | Saved +2025-04-02 19:09:22 | [maml_trainer] epoch #40 | Time 18984.10 s +2025-04-02 19:09:22 | [maml_trainer] epoch #40 | EpochTime 473.60 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.5941 +Average/AverageReturn -45.6751 +Average/Iteration 40 +Average/MaxReturn -12.3956 +Average/MinReturn -83.2399 +Average/NumEpisodes 80 +Average/StdReturn 15.9778 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0032 +GaussianMLPPolicy/KLAfter 0.0016975 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.07845e-05 +GaussianMLPPolicy/LossBefore 2.21431e-08 +GaussianMLPPolicy/dLoss -1.07624e-05 +Iteration 40 +MetaTest/Average/AverageDiscountedReturn -41.2322 +MetaTest/Average/AverageReturn -41.2322 +MetaTest/Average/Iteration 40 +MetaTest/Average/MaxReturn -17.7566 +MetaTest/Average/MinReturn -63.391 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.1376 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.2322 +MetaTest/__unnamed_task__/AverageReturn -41.2322 +MetaTest/__unnamed_task__/Iteration 40 +MetaTest/__unnamed_task__/MaxReturn -17.7566 +MetaTest/__unnamed_task__/MinReturn -63.391 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.1376 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.312e+06 +__unnamed_task__/AverageDiscountedReturn -19.5941 +__unnamed_task__/AverageReturn -45.6751 +__unnamed_task__/Iteration 40 +__unnamed_task__/MaxReturn -12.3956 +__unnamed_task__/MinReturn -83.2399 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.9778 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 19:15:08 | [maml_trainer] epoch #41 | Sampling for adapation and meta-testing... +2025-04-02 19:16:35 | [maml_trainer] epoch #41 | Finished meta-testing... +2025-04-02 19:16:35 | [maml_trainer] epoch #41 | Saving snapshot... +2025-04-02 19:16:55 | [maml_trainer] epoch #41 | Saved +2025-04-02 19:16:55 | [maml_trainer] epoch #41 | Time 19437.19 s +2025-04-02 19:16:55 | [maml_trainer] epoch #41 | EpochTime 453.09 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -20.8102 +Average/AverageReturn -48.8397 +Average/Iteration 41 +Average/MaxReturn 1.58137 +Average/MinReturn -87.9838 +Average/NumEpisodes 80 +Average/StdReturn 17.1146 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.005 +GaussianMLPPolicy/KLAfter 0.00221944 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.13773e-05 +GaussianMLPPolicy/LossBefore -7.00355e-09 +GaussianMLPPolicy/dLoss -8.13843e-05 +Iteration 41 +MetaTest/Average/AverageDiscountedReturn -48.6111 +MetaTest/Average/AverageReturn -48.6111 +MetaTest/Average/Iteration 41 +MetaTest/Average/MaxReturn -33.5807 +MetaTest/Average/MinReturn -68.3198 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.381 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -48.6111 +MetaTest/__unnamed_task__/AverageReturn -48.6111 +MetaTest/__unnamed_task__/Iteration 41 +MetaTest/__unnamed_task__/MaxReturn -33.5807 +MetaTest/__unnamed_task__/MinReturn -68.3198 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.381 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.344e+06 +__unnamed_task__/AverageDiscountedReturn -20.8102 +__unnamed_task__/AverageReturn -48.8397 +__unnamed_task__/Iteration 41 +__unnamed_task__/MaxReturn 1.58137 +__unnamed_task__/MinReturn -87.9838 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.1146 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 19:22:57 | [maml_trainer] epoch #42 | Sampling for adapation and meta-testing... +2025-04-02 19:24:29 | [maml_trainer] epoch #42 | Finished meta-testing... +2025-04-02 19:24:29 | [maml_trainer] epoch #42 | Saving snapshot... +2025-04-02 19:24:51 | [maml_trainer] epoch #42 | Saved +2025-04-02 19:24:51 | [maml_trainer] epoch #42 | Time 19913.96 s +2025-04-02 19:24:51 | [maml_trainer] epoch #42 | EpochTime 476.76 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.0073 +Average/AverageReturn -44.8036 +Average/Iteration 42 +Average/MaxReturn 111.789 +Average/MinReturn -83.5409 +Average/NumEpisodes 80 +Average/StdReturn 23.8489 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0074 +GaussianMLPPolicy/KLAfter 0.00225797 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.49142e-06 +GaussianMLPPolicy/LossBefore 1.37091e-09 +GaussianMLPPolicy/dLoss 8.4928e-06 +Iteration 42 +MetaTest/Average/AverageDiscountedReturn -43.9458 +MetaTest/Average/AverageReturn -43.9458 +MetaTest/Average/Iteration 42 +MetaTest/Average/MaxReturn 0.446834 +MetaTest/Average/MinReturn -68.8958 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.538 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.9458 +MetaTest/__unnamed_task__/AverageReturn -43.9458 +MetaTest/__unnamed_task__/Iteration 42 +MetaTest/__unnamed_task__/MaxReturn 0.446834 +MetaTest/__unnamed_task__/MinReturn -68.8958 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.538 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.376e+06 +__unnamed_task__/AverageDiscountedReturn -19.0073 +__unnamed_task__/AverageReturn -44.8036 +__unnamed_task__/Iteration 42 +__unnamed_task__/MaxReturn 111.789 +__unnamed_task__/MinReturn -83.5409 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.8489 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 19:30:57 | [maml_trainer] epoch #43 | Sampling for adapation and meta-testing... +2025-04-02 19:32:29 | [maml_trainer] epoch #43 | Finished meta-testing... +2025-04-02 19:32:29 | [maml_trainer] epoch #43 | Saving snapshot... +2025-04-02 19:32:51 | [maml_trainer] epoch #43 | Saved +2025-04-02 19:32:51 | [maml_trainer] epoch #43 | Time 20393.43 s +2025-04-02 19:32:51 | [maml_trainer] epoch #43 | EpochTime 479.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -20.4428 +Average/AverageReturn -49.0745 +Average/Iteration 43 +Average/MaxReturn -4.70335 +Average/MinReturn -98.5063 +Average/NumEpisodes 80 +Average/StdReturn 17.2571 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0091 +GaussianMLPPolicy/KLAfter 0.00280714 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000109138 +GaussianMLPPolicy/LossBefore -2.86102e-09 +GaussianMLPPolicy/dLoss 0.000109135 +Iteration 43 +MetaTest/Average/AverageDiscountedReturn -34.2625 +MetaTest/Average/AverageReturn -34.2625 +MetaTest/Average/Iteration 43 +MetaTest/Average/MaxReturn 47.2825 +MetaTest/Average/MinReturn -76.5779 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.9503 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.2625 +MetaTest/__unnamed_task__/AverageReturn -34.2625 +MetaTest/__unnamed_task__/Iteration 43 +MetaTest/__unnamed_task__/MaxReturn 47.2825 +MetaTest/__unnamed_task__/MinReturn -76.5779 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.9503 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.408e+06 +__unnamed_task__/AverageDiscountedReturn -20.4428 +__unnamed_task__/AverageReturn -49.0745 +__unnamed_task__/Iteration 43 +__unnamed_task__/MaxReturn -4.70335 +__unnamed_task__/MinReturn -98.5063 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.2571 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 19:38:57 | [maml_trainer] epoch #44 | Sampling for adapation and meta-testing... +2025-04-02 19:40:29 | [maml_trainer] epoch #44 | Finished meta-testing... +2025-04-02 19:40:29 | [maml_trainer] epoch #44 | Saving snapshot... +2025-04-02 19:40:50 | [maml_trainer] epoch #44 | Saved +2025-04-02 19:40:50 | [maml_trainer] epoch #44 | Time 20872.48 s +2025-04-02 19:40:50 | [maml_trainer] epoch #44 | EpochTime 479.04 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.1898 +Average/AverageReturn -44.2029 +Average/Iteration 44 +Average/MaxReturn -3.42646 +Average/MinReturn -75.6662 +Average/NumEpisodes 80 +Average/StdReturn 14.6949 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0084 +GaussianMLPPolicy/KLAfter 0.00214961 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.85343e-05 +GaussianMLPPolicy/LossBefore -2.39611e-08 +GaussianMLPPolicy/dLoss -6.85583e-05 +Iteration 44 +MetaTest/Average/AverageDiscountedReturn -38.3354 +MetaTest/Average/AverageReturn -38.3354 +MetaTest/Average/Iteration 44 +MetaTest/Average/MaxReturn 16.3489 +MetaTest/Average/MinReturn -76.3305 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.5691 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.3354 +MetaTest/__unnamed_task__/AverageReturn -38.3354 +MetaTest/__unnamed_task__/Iteration 44 +MetaTest/__unnamed_task__/MaxReturn 16.3489 +MetaTest/__unnamed_task__/MinReturn -76.3305 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.5691 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.44e+06 +__unnamed_task__/AverageDiscountedReturn -19.1898 +__unnamed_task__/AverageReturn -44.2029 +__unnamed_task__/Iteration 44 +__unnamed_task__/MaxReturn -3.42646 +__unnamed_task__/MinReturn -75.6662 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.6949 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 19:46:45 | [maml_trainer] epoch #45 | Sampling for adapation and meta-testing... +2025-04-02 19:48:16 | [maml_trainer] epoch #45 | Finished meta-testing... +2025-04-02 19:48:16 | [maml_trainer] epoch #45 | Saving snapshot... +2025-04-02 19:48:39 | [maml_trainer] epoch #45 | Saved +2025-04-02 19:48:39 | [maml_trainer] epoch #45 | Time 21341.00 s +2025-04-02 19:48:39 | [maml_trainer] epoch #45 | EpochTime 468.52 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.5305 +Average/AverageReturn -44.0774 +Average/Iteration 45 +Average/MaxReturn -2.64878 +Average/MinReturn -81.6317 +Average/NumEpisodes 80 +Average/StdReturn 14.2659 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0086 +GaussianMLPPolicy/KLAfter 0.00416845 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.00475e-05 +GaussianMLPPolicy/LossBefore 6.94394e-09 +GaussianMLPPolicy/dLoss -7.00405e-05 +Iteration 45 +MetaTest/Average/AverageDiscountedReturn -42.3016 +MetaTest/Average/AverageReturn -42.3016 +MetaTest/Average/Iteration 45 +MetaTest/Average/MaxReturn -10.5363 +MetaTest/Average/MinReturn -71.3769 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.151 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.3016 +MetaTest/__unnamed_task__/AverageReturn -42.3016 +MetaTest/__unnamed_task__/Iteration 45 +MetaTest/__unnamed_task__/MaxReturn -10.5363 +MetaTest/__unnamed_task__/MinReturn -71.3769 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.151 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.472e+06 +__unnamed_task__/AverageDiscountedReturn -18.5305 +__unnamed_task__/AverageReturn -44.0774 +__unnamed_task__/Iteration 45 +__unnamed_task__/MaxReturn -2.64878 +__unnamed_task__/MinReturn -81.6317 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.2659 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 19:54:48 | [maml_trainer] epoch #46 | Sampling for adapation and meta-testing... +2025-04-02 19:56:20 | [maml_trainer] epoch #46 | Finished meta-testing... +2025-04-02 19:56:20 | [maml_trainer] epoch #46 | Saving snapshot... +2025-04-02 19:56:42 | [maml_trainer] epoch #46 | Saved +2025-04-02 19:56:42 | [maml_trainer] epoch #46 | Time 21824.11 s +2025-04-02 19:56:42 | [maml_trainer] epoch #46 | EpochTime 483.11 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.4227 +Average/AverageReturn -42.8413 +Average/Iteration 46 +Average/MaxReturn -8.2572 +Average/MinReturn -99.3025 +Average/NumEpisodes 80 +Average/StdReturn 15.1644 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0078 +GaussianMLPPolicy/KLAfter 0.00525632 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.53786e-05 +GaussianMLPPolicy/LossBefore 3.12924e-09 +GaussianMLPPolicy/dLoss 8.53817e-05 +Iteration 46 +MetaTest/Average/AverageDiscountedReturn -44.0515 +MetaTest/Average/AverageReturn -44.0515 +MetaTest/Average/Iteration 46 +MetaTest/Average/MaxReturn -22.5637 +MetaTest/Average/MinReturn -61.7641 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.7156 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -44.0515 +MetaTest/__unnamed_task__/AverageReturn -44.0515 +MetaTest/__unnamed_task__/Iteration 46 +MetaTest/__unnamed_task__/MaxReturn -22.5637 +MetaTest/__unnamed_task__/MinReturn -61.7641 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.7156 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.504e+06 +__unnamed_task__/AverageDiscountedReturn -18.4227 +__unnamed_task__/AverageReturn -42.8413 +__unnamed_task__/Iteration 46 +__unnamed_task__/MaxReturn -8.2572 +__unnamed_task__/MinReturn -99.3025 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.1644 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:02:50 | [maml_trainer] epoch #47 | Sampling for adapation and meta-testing... +2025-04-02 20:04:22 | [maml_trainer] epoch #47 | Finished meta-testing... +2025-04-02 20:04:22 | [maml_trainer] epoch #47 | Saving snapshot... +2025-04-02 20:04:43 | [maml_trainer] epoch #47 | Saved +2025-04-02 20:04:43 | [maml_trainer] epoch #47 | Time 22305.13 s +2025-04-02 20:04:43 | [maml_trainer] epoch #47 | EpochTime 481.01 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.0385 +Average/AverageReturn -41.7071 +Average/Iteration 47 +Average/MaxReturn 91.4526 +Average/MinReturn -82.7276 +Average/NumEpisodes 80 +Average/StdReturn 21.1976 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0077 +GaussianMLPPolicy/KLAfter 0.00603047 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.48917e-05 +GaussianMLPPolicy/LossBefore 3.44217e-09 +GaussianMLPPolicy/dLoss -4.48883e-05 +Iteration 47 +MetaTest/Average/AverageDiscountedReturn -43.9257 +MetaTest/Average/AverageReturn -43.9257 +MetaTest/Average/Iteration 47 +MetaTest/Average/MaxReturn -20.9292 +MetaTest/Average/MinReturn -67.3976 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.1758 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.9257 +MetaTest/__unnamed_task__/AverageReturn -43.9257 +MetaTest/__unnamed_task__/Iteration 47 +MetaTest/__unnamed_task__/MaxReturn -20.9292 +MetaTest/__unnamed_task__/MinReturn -67.3976 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.1758 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.536e+06 +__unnamed_task__/AverageDiscountedReturn -18.0385 +__unnamed_task__/AverageReturn -41.7071 +__unnamed_task__/Iteration 47 +__unnamed_task__/MaxReturn 91.4526 +__unnamed_task__/MinReturn -82.7276 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.1976 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:10:48 | [maml_trainer] epoch #48 | Sampling for adapation and meta-testing... +2025-04-02 20:12:20 | [maml_trainer] epoch #48 | Finished meta-testing... +2025-04-02 20:12:20 | [maml_trainer] epoch #48 | Saving snapshot... +2025-04-02 20:12:41 | [maml_trainer] epoch #48 | Saved +2025-04-02 20:12:41 | [maml_trainer] epoch #48 | Time 22783.83 s +2025-04-02 20:12:41 | [maml_trainer] epoch #48 | EpochTime 478.70 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.7074 +Average/AverageReturn -40.1016 +Average/Iteration 48 +Average/MaxReturn 38.4037 +Average/MinReturn -76.4653 +Average/NumEpisodes 80 +Average/StdReturn 17.0931 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0075 +GaussianMLPPolicy/KLAfter 0.00574314 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.14418e-05 +GaussianMLPPolicy/LossBefore 1.93715e-08 +GaussianMLPPolicy/dLoss 1.14612e-05 +Iteration 48 +MetaTest/Average/AverageDiscountedReturn -42.4988 +MetaTest/Average/AverageReturn -42.4988 +MetaTest/Average/Iteration 48 +MetaTest/Average/MaxReturn -24.8375 +MetaTest/Average/MinReturn -67.1745 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.9251 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.4988 +MetaTest/__unnamed_task__/AverageReturn -42.4988 +MetaTest/__unnamed_task__/Iteration 48 +MetaTest/__unnamed_task__/MaxReturn -24.8375 +MetaTest/__unnamed_task__/MinReturn -67.1745 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.9251 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.568e+06 +__unnamed_task__/AverageDiscountedReturn -17.7074 +__unnamed_task__/AverageReturn -40.1016 +__unnamed_task__/Iteration 48 +__unnamed_task__/MaxReturn 38.4037 +__unnamed_task__/MinReturn -76.4653 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.0931 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:18:42 | [maml_trainer] epoch #49 | Sampling for adapation and meta-testing... +2025-04-02 20:20:15 | [maml_trainer] epoch #49 | Finished meta-testing... +2025-04-02 20:20:15 | [maml_trainer] epoch #49 | Saving snapshot... +2025-04-02 20:20:35 | [maml_trainer] epoch #49 | Saved +2025-04-02 20:20:35 | [maml_trainer] epoch #49 | Time 23257.82 s +2025-04-02 20:20:35 | [maml_trainer] epoch #49 | EpochTime 473.98 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.798 +Average/AverageReturn -40.9482 +Average/Iteration 49 +Average/MaxReturn 11.5178 +Average/MinReturn -79.7529 +Average/NumEpisodes 80 +Average/StdReturn 14.5245 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0061 +GaussianMLPPolicy/KLAfter 0.00795525 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000319918 +GaussianMLPPolicy/LossBefore 2.28286e-08 +GaussianMLPPolicy/dLoss 0.000319941 +Iteration 49 +MetaTest/Average/AverageDiscountedReturn -38.2858 +MetaTest/Average/AverageReturn -38.2858 +MetaTest/Average/Iteration 49 +MetaTest/Average/MaxReturn -18.2733 +MetaTest/Average/MinReturn -71.3676 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.673 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.2858 +MetaTest/__unnamed_task__/AverageReturn -38.2858 +MetaTest/__unnamed_task__/Iteration 49 +MetaTest/__unnamed_task__/MaxReturn -18.2733 +MetaTest/__unnamed_task__/MinReturn -71.3676 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.673 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.6e+06 +__unnamed_task__/AverageDiscountedReturn -17.798 +__unnamed_task__/AverageReturn -40.9482 +__unnamed_task__/Iteration 49 +__unnamed_task__/MaxReturn 11.5178 +__unnamed_task__/MinReturn -79.7529 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.5245 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:26:33 | [maml_trainer] epoch #50 | Sampling for adapation and meta-testing... +2025-04-02 20:28:01 | [maml_trainer] epoch #50 | Finished meta-testing... +2025-04-02 20:28:01 | [maml_trainer] epoch #50 | Saving snapshot... +2025-04-02 20:28:22 | [maml_trainer] epoch #50 | Saved +2025-04-02 20:28:22 | [maml_trainer] epoch #50 | Time 23724.94 s +2025-04-02 20:28:22 | [maml_trainer] epoch #50 | EpochTime 467.12 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.8195 +Average/AverageReturn -40.4067 +Average/Iteration 50 +Average/MaxReturn -8.6914 +Average/MinReturn -71.2312 +Average/NumEpisodes 80 +Average/StdReturn 14.1414 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0043 +GaussianMLPPolicy/KLAfter 0.00673606 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000120578 +GaussianMLPPolicy/LossBefore -3.24845e-09 +GaussianMLPPolicy/dLoss 0.000120575 +Iteration 50 +MetaTest/Average/AverageDiscountedReturn -39.9869 +MetaTest/Average/AverageReturn -39.9869 +MetaTest/Average/Iteration 50 +MetaTest/Average/MaxReturn -16.5843 +MetaTest/Average/MinReturn -68.198 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.8823 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.9869 +MetaTest/__unnamed_task__/AverageReturn -39.9869 +MetaTest/__unnamed_task__/Iteration 50 +MetaTest/__unnamed_task__/MaxReturn -16.5843 +MetaTest/__unnamed_task__/MinReturn -68.198 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.8823 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.632e+06 +__unnamed_task__/AverageDiscountedReturn -17.8195 +__unnamed_task__/AverageReturn -40.4067 +__unnamed_task__/Iteration 50 +__unnamed_task__/MaxReturn -8.6914 +__unnamed_task__/MinReturn -71.2312 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.1414 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:34:23 | [maml_trainer] epoch #51 | Sampling for adapation and meta-testing... +2025-04-02 20:35:55 | [maml_trainer] epoch #51 | Finished meta-testing... +2025-04-02 20:35:55 | [maml_trainer] epoch #51 | Saving snapshot... +2025-04-02 20:36:17 | [maml_trainer] epoch #51 | Saved +2025-04-02 20:36:17 | [maml_trainer] epoch #51 | Time 24199.38 s +2025-04-02 20:36:17 | [maml_trainer] epoch #51 | EpochTime 474.44 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.8358 +Average/AverageReturn -40.455 +Average/Iteration 51 +Average/MaxReturn 1.64362 +Average/MinReturn -77.0096 +Average/NumEpisodes 80 +Average/StdReturn 14.408 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0021 +GaussianMLPPolicy/KLAfter 0.00251017 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000146318 +GaussianMLPPolicy/LossBefore 9.0003e-09 +GaussianMLPPolicy/dLoss 0.000146327 +Iteration 51 +MetaTest/Average/AverageDiscountedReturn -40.8186 +MetaTest/Average/AverageReturn -40.8186 +MetaTest/Average/Iteration 51 +MetaTest/Average/MaxReturn -1.49979 +MetaTest/Average/MinReturn -73.4131 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.9214 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.8186 +MetaTest/__unnamed_task__/AverageReturn -40.8186 +MetaTest/__unnamed_task__/Iteration 51 +MetaTest/__unnamed_task__/MaxReturn -1.49979 +MetaTest/__unnamed_task__/MinReturn -73.4131 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.9214 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.664e+06 +__unnamed_task__/AverageDiscountedReturn -17.8358 +__unnamed_task__/AverageReturn -40.455 +__unnamed_task__/Iteration 51 +__unnamed_task__/MaxReturn 1.64362 +__unnamed_task__/MinReturn -77.0096 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.408 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:42:25 | [maml_trainer] epoch #52 | Sampling for adapation and meta-testing... +2025-04-02 20:43:58 | [maml_trainer] epoch #52 | Finished meta-testing... +2025-04-02 20:43:58 | [maml_trainer] epoch #52 | Saving snapshot... +2025-04-02 20:44:19 | [maml_trainer] epoch #52 | Saved +2025-04-02 20:44:19 | [maml_trainer] epoch #52 | Time 24681.52 s +2025-04-02 20:44:19 | [maml_trainer] epoch #52 | EpochTime 482.14 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.8891 +Average/AverageReturn -43.7323 +Average/Iteration 52 +Average/MaxReturn -10.3893 +Average/MinReturn -76.8542 +Average/NumEpisodes 80 +Average/StdReturn 12.8202 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 10.0001 +GaussianMLPPolicy/KLAfter 0.00210334 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.26961e-05 +GaussianMLPPolicy/LossBefore -6.67572e-09 +GaussianMLPPolicy/dLoss 1.26894e-05 +Iteration 52 +MetaTest/Average/AverageDiscountedReturn -42.4598 +MetaTest/Average/AverageReturn -42.4598 +MetaTest/Average/Iteration 52 +MetaTest/Average/MaxReturn -26.4454 +MetaTest/Average/MinReturn -61.1317 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3025 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.4598 +MetaTest/__unnamed_task__/AverageReturn -42.4598 +MetaTest/__unnamed_task__/Iteration 52 +MetaTest/__unnamed_task__/MaxReturn -26.4454 +MetaTest/__unnamed_task__/MinReturn -61.1317 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3025 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.696e+06 +__unnamed_task__/AverageDiscountedReturn -18.8891 +__unnamed_task__/AverageReturn -43.7323 +__unnamed_task__/Iteration 52 +__unnamed_task__/MaxReturn -10.3893 +__unnamed_task__/MinReturn -76.8542 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.8202 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:50:34 | [maml_trainer] epoch #53 | Sampling for adapation and meta-testing... +2025-04-02 20:52:07 | [maml_trainer] epoch #53 | Finished meta-testing... +2025-04-02 20:52:07 | [maml_trainer] epoch #53 | Saving snapshot... +2025-04-02 20:52:28 | [maml_trainer] epoch #53 | Saved +2025-04-02 20:52:28 | [maml_trainer] epoch #53 | Time 25170.70 s +2025-04-02 20:52:28 | [maml_trainer] epoch #53 | EpochTime 489.18 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.8041 +Average/AverageReturn -41.9028 +Average/Iteration 53 +Average/MaxReturn -20.092 +Average/MinReturn -83.4709 +Average/NumEpisodes 80 +Average/StdReturn 11.6239 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99762 +GaussianMLPPolicy/KLAfter 0.00171712 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000137987 +GaussianMLPPolicy/LossBefore -9.50694e-09 +GaussianMLPPolicy/dLoss 0.000137978 +Iteration 53 +MetaTest/Average/AverageDiscountedReturn -46.937 +MetaTest/Average/AverageReturn -46.937 +MetaTest/Average/Iteration 53 +MetaTest/Average/MaxReturn -27.1601 +MetaTest/Average/MinReturn -77.1411 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.6362 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -46.937 +MetaTest/__unnamed_task__/AverageReturn -46.937 +MetaTest/__unnamed_task__/Iteration 53 +MetaTest/__unnamed_task__/MaxReturn -27.1601 +MetaTest/__unnamed_task__/MinReturn -77.1411 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.6362 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.728e+06 +__unnamed_task__/AverageDiscountedReturn -17.8041 +__unnamed_task__/AverageReturn -41.9028 +__unnamed_task__/Iteration 53 +__unnamed_task__/MaxReturn -20.092 +__unnamed_task__/MinReturn -83.4709 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6239 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 20:58:40 | [maml_trainer] epoch #54 | Sampling for adapation and meta-testing... +2025-04-02 21:00:09 | [maml_trainer] epoch #54 | Finished meta-testing... +2025-04-02 21:00:09 | [maml_trainer] epoch #54 | Saving snapshot... +2025-04-02 21:00:32 | [maml_trainer] epoch #54 | Saved +2025-04-02 21:00:32 | [maml_trainer] epoch #54 | Time 25654.23 s +2025-04-02 21:00:32 | [maml_trainer] epoch #54 | EpochTime 483.53 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.6603 +Average/AverageReturn -40.5722 +Average/Iteration 54 +Average/MaxReturn 15.9293 +Average/MinReturn -64.4573 +Average/NumEpisodes 80 +Average/StdReturn 13.0328 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99431 +GaussianMLPPolicy/KLAfter 0.00119713 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.387e-05 +GaussianMLPPolicy/LossBefore 1.18017e-08 +GaussianMLPPolicy/dLoss 3.38818e-05 +Iteration 54 +MetaTest/Average/AverageDiscountedReturn -37.5142 +MetaTest/Average/AverageReturn -37.5142 +MetaTest/Average/Iteration 54 +MetaTest/Average/MaxReturn -13.7525 +MetaTest/Average/MinReturn -64.5996 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.5512 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.5142 +MetaTest/__unnamed_task__/AverageReturn -37.5142 +MetaTest/__unnamed_task__/Iteration 54 +MetaTest/__unnamed_task__/MaxReturn -13.7525 +MetaTest/__unnamed_task__/MinReturn -64.5996 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.5512 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.76e+06 +__unnamed_task__/AverageDiscountedReturn -17.6603 +__unnamed_task__/AverageReturn -40.5722 +__unnamed_task__/Iteration 54 +__unnamed_task__/MaxReturn 15.9293 +__unnamed_task__/MinReturn -64.4573 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.0328 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 21:06:39 | [maml_trainer] epoch #55 | Sampling for adapation and meta-testing... +2025-04-02 21:08:13 | [maml_trainer] epoch #55 | Finished meta-testing... +2025-04-02 21:08:13 | [maml_trainer] epoch #55 | Saving snapshot... +2025-04-02 21:08:35 | [maml_trainer] epoch #55 | Saved +2025-04-02 21:08:35 | [maml_trainer] epoch #55 | Time 26137.93 s +2025-04-02 21:08:35 | [maml_trainer] epoch #55 | EpochTime 483.69 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.0121 +Average/AverageReturn -39.4175 +Average/Iteration 55 +Average/MaxReturn 8.72458 +Average/MinReturn -73.2583 +Average/NumEpisodes 80 +Average/StdReturn 13.9 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.99134 +GaussianMLPPolicy/KLAfter 0.00126455 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000104351 +GaussianMLPPolicy/LossBefore -1.74642e-08 +GaussianMLPPolicy/dLoss 0.000104333 +Iteration 55 +MetaTest/Average/AverageDiscountedReturn -35.3312 +MetaTest/Average/AverageReturn -35.3312 +MetaTest/Average/Iteration 55 +MetaTest/Average/MaxReturn 38.8443 +MetaTest/Average/MinReturn -63.0523 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.6629 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.3312 +MetaTest/__unnamed_task__/AverageReturn -35.3312 +MetaTest/__unnamed_task__/Iteration 55 +MetaTest/__unnamed_task__/MaxReturn 38.8443 +MetaTest/__unnamed_task__/MinReturn -63.0523 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.6629 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.792e+06 +__unnamed_task__/AverageDiscountedReturn -17.0121 +__unnamed_task__/AverageReturn -39.4175 +__unnamed_task__/Iteration 55 +__unnamed_task__/MaxReturn 8.72458 +__unnamed_task__/MinReturn -73.2583 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.9 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 21:14:49 | [maml_trainer] epoch #56 | Sampling for adapation and meta-testing... +2025-04-02 21:16:21 | [maml_trainer] epoch #56 | Finished meta-testing... +2025-04-02 21:16:21 | [maml_trainer] epoch #56 | Saving snapshot... +2025-04-02 21:16:42 | [maml_trainer] epoch #56 | Saved +2025-04-02 21:16:42 | [maml_trainer] epoch #56 | Time 26624.49 s +2025-04-02 21:16:42 | [maml_trainer] epoch #56 | EpochTime 486.56 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7748 +Average/AverageReturn -39.3114 +Average/Iteration 56 +Average/MaxReturn 4.61241 +Average/MinReturn -77.203 +Average/NumEpisodes 80 +Average/StdReturn 13.8286 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98951 +GaussianMLPPolicy/KLAfter 0.00198677 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.10429e-05 +GaussianMLPPolicy/LossBefore -1.49012e-09 +GaussianMLPPolicy/dLoss 3.10414e-05 +Iteration 56 +MetaTest/Average/AverageDiscountedReturn -41.0579 +MetaTest/Average/AverageReturn -41.0579 +MetaTest/Average/Iteration 56 +MetaTest/Average/MaxReturn 5.54115 +MetaTest/Average/MinReturn -63.094 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.8612 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.0579 +MetaTest/__unnamed_task__/AverageReturn -41.0579 +MetaTest/__unnamed_task__/Iteration 56 +MetaTest/__unnamed_task__/MaxReturn 5.54115 +MetaTest/__unnamed_task__/MinReturn -63.094 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.8612 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.824e+06 +__unnamed_task__/AverageDiscountedReturn -16.7748 +__unnamed_task__/AverageReturn -39.3114 +__unnamed_task__/Iteration 56 +__unnamed_task__/MaxReturn 4.61241 +__unnamed_task__/MinReturn -77.203 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8286 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 21:22:53 | [maml_trainer] epoch #57 | Sampling for adapation and meta-testing... +2025-04-02 21:24:26 | [maml_trainer] epoch #57 | Finished meta-testing... +2025-04-02 21:24:26 | [maml_trainer] epoch #57 | Saving snapshot... +2025-04-02 21:24:49 | [maml_trainer] epoch #57 | Saved +2025-04-02 21:24:49 | [maml_trainer] epoch #57 | Time 27111.19 s +2025-04-02 21:24:49 | [maml_trainer] epoch #57 | EpochTime 486.70 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.55 +Average/AverageReturn -41.0559 +Average/Iteration 57 +Average/MaxReturn 5.77807 +Average/MinReturn -73.2814 +Average/NumEpisodes 80 +Average/StdReturn 14.1116 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98782 +GaussianMLPPolicy/KLAfter 0.00323137 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000116357 +GaussianMLPPolicy/LossBefore -1.19209e-10 +GaussianMLPPolicy/dLoss 0.000116357 +Iteration 57 +MetaTest/Average/AverageDiscountedReturn -41.7192 +MetaTest/Average/AverageReturn -41.7192 +MetaTest/Average/Iteration 57 +MetaTest/Average/MaxReturn -20.5166 +MetaTest/Average/MinReturn -62.0176 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.0142 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.7192 +MetaTest/__unnamed_task__/AverageReturn -41.7192 +MetaTest/__unnamed_task__/Iteration 57 +MetaTest/__unnamed_task__/MaxReturn -20.5166 +MetaTest/__unnamed_task__/MinReturn -62.0176 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.0142 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.856e+06 +__unnamed_task__/AverageDiscountedReturn -17.55 +__unnamed_task__/AverageReturn -41.0559 +__unnamed_task__/Iteration 57 +__unnamed_task__/MaxReturn 5.77807 +__unnamed_task__/MinReturn -73.2814 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.1116 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 21:31:04 | [maml_trainer] epoch #58 | Sampling for adapation and meta-testing... +2025-04-02 21:32:34 | [maml_trainer] epoch #58 | Finished meta-testing... +2025-04-02 21:32:34 | [maml_trainer] epoch #58 | Saving snapshot... +2025-04-02 21:32:55 | [maml_trainer] epoch #58 | Saved +2025-04-02 21:32:55 | [maml_trainer] epoch #58 | Time 27597.96 s +2025-04-02 21:32:55 | [maml_trainer] epoch #58 | EpochTime 486.77 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5336 +Average/AverageReturn -40.0304 +Average/Iteration 58 +Average/MaxReturn 2.04219 +Average/MinReturn -72.2847 +Average/NumEpisodes 80 +Average/StdReturn 12.7998 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98719 +GaussianMLPPolicy/KLAfter 0.00498967 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000133069 +GaussianMLPPolicy/LossBefore 1.01328e-09 +GaussianMLPPolicy/dLoss -0.000133068 +Iteration 58 +MetaTest/Average/AverageDiscountedReturn -40.9204 +MetaTest/Average/AverageReturn -40.9204 +MetaTest/Average/Iteration 58 +MetaTest/Average/MaxReturn -24.2372 +MetaTest/Average/MinReturn -82.6656 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.1306 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.9204 +MetaTest/__unnamed_task__/AverageReturn -40.9204 +MetaTest/__unnamed_task__/Iteration 58 +MetaTest/__unnamed_task__/MaxReturn -24.2372 +MetaTest/__unnamed_task__/MinReturn -82.6656 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.1306 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.888e+06 +__unnamed_task__/AverageDiscountedReturn -17.5336 +__unnamed_task__/AverageReturn -40.0304 +__unnamed_task__/Iteration 58 +__unnamed_task__/MaxReturn 2.04219 +__unnamed_task__/MinReturn -72.2847 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.7998 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 21:38:51 | [maml_trainer] epoch #59 | Sampling for adapation and meta-testing... +2025-04-02 21:40:20 | [maml_trainer] epoch #59 | Finished meta-testing... +2025-04-02 21:40:20 | [maml_trainer] epoch #59 | Saving snapshot... +2025-04-02 21:40:41 | [maml_trainer] epoch #59 | Saved +2025-04-02 21:40:41 | [maml_trainer] epoch #59 | Time 28063.25 s +2025-04-02 21:40:41 | [maml_trainer] epoch #59 | EpochTime 465.29 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.6379 +Average/AverageReturn -37.54 +Average/Iteration 59 +Average/MaxReturn 6.18578 +Average/MinReturn -64.8999 +Average/NumEpisodes 80 +Average/StdReturn 12.7921 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98467 +GaussianMLPPolicy/KLAfter 0.00384119 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.04382e-05 +GaussianMLPPolicy/LossBefore 1.96397e-08 +GaussianMLPPolicy/dLoss 8.04578e-05 +Iteration 59 +MetaTest/Average/AverageDiscountedReturn -39.0978 +MetaTest/Average/AverageReturn -39.0978 +MetaTest/Average/Iteration 59 +MetaTest/Average/MaxReturn 24.9568 +MetaTest/Average/MinReturn -60.0183 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.0399 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.0978 +MetaTest/__unnamed_task__/AverageReturn -39.0978 +MetaTest/__unnamed_task__/Iteration 59 +MetaTest/__unnamed_task__/MaxReturn 24.9568 +MetaTest/__unnamed_task__/MinReturn -60.0183 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.0399 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.92e+06 +__unnamed_task__/AverageDiscountedReturn -16.6379 +__unnamed_task__/AverageReturn -37.54 +__unnamed_task__/Iteration 59 +__unnamed_task__/MaxReturn 6.18578 +__unnamed_task__/MinReturn -64.8999 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.7921 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 21:46:37 | [maml_trainer] epoch #60 | Sampling for adapation and meta-testing... +2025-04-02 21:48:04 | [maml_trainer] epoch #60 | Finished meta-testing... +2025-04-02 21:48:04 | [maml_trainer] epoch #60 | Saving snapshot... +2025-04-02 21:48:25 | [maml_trainer] epoch #60 | Saved +2025-04-02 21:48:25 | [maml_trainer] epoch #60 | Time 28527.67 s +2025-04-02 21:48:25 | [maml_trainer] epoch #60 | EpochTime 464.42 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.2271 +Average/AverageReturn -39.4297 +Average/Iteration 60 +Average/MaxReturn -13.9089 +Average/MinReturn -69.983 +Average/NumEpisodes 80 +Average/StdReturn 10.6151 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.98185 +GaussianMLPPolicy/KLAfter 0.00319016 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000109662 +GaussianMLPPolicy/LossBefore -5.54323e-09 +GaussianMLPPolicy/dLoss 0.000109656 +Iteration 60 +MetaTest/Average/AverageDiscountedReturn -43.4704 +MetaTest/Average/AverageReturn -43.4704 +MetaTest/Average/Iteration 60 +MetaTest/Average/MaxReturn -26.6324 +MetaTest/Average/MinReturn -83.954 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.494 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.4704 +MetaTest/__unnamed_task__/AverageReturn -43.4704 +MetaTest/__unnamed_task__/Iteration 60 +MetaTest/__unnamed_task__/MaxReturn -26.6324 +MetaTest/__unnamed_task__/MinReturn -83.954 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.494 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.952e+06 +__unnamed_task__/AverageDiscountedReturn -17.2271 +__unnamed_task__/AverageReturn -39.4297 +__unnamed_task__/Iteration 60 +__unnamed_task__/MaxReturn -13.9089 +__unnamed_task__/MinReturn -69.983 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.6151 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 21:54:19 | [maml_trainer] epoch #61 | Sampling for adapation and meta-testing... +2025-04-02 21:55:47 | [maml_trainer] epoch #61 | Finished meta-testing... +2025-04-02 21:55:47 | [maml_trainer] epoch #61 | Saving snapshot... +2025-04-02 21:56:07 | [maml_trainer] epoch #61 | Saved +2025-04-02 21:56:07 | [maml_trainer] epoch #61 | Time 28989.21 s +2025-04-02 21:56:07 | [maml_trainer] epoch #61 | EpochTime 461.53 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.6405 +Average/AverageReturn -38.2825 +Average/Iteration 61 +Average/MaxReturn -2.68104 +Average/MinReturn -71.1585 +Average/NumEpisodes 80 +Average/StdReturn 11.6675 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9792 +GaussianMLPPolicy/KLAfter 0.00272085 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.4413e-05 +GaussianMLPPolicy/LossBefore 1.37985e-08 +GaussianMLPPolicy/dLoss -6.43992e-05 +Iteration 61 +MetaTest/Average/AverageDiscountedReturn -40.2001 +MetaTest/Average/AverageReturn -40.2001 +MetaTest/Average/Iteration 61 +MetaTest/Average/MaxReturn -21.653 +MetaTest/Average/MinReturn -59.8502 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.19073 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.2001 +MetaTest/__unnamed_task__/AverageReturn -40.2001 +MetaTest/__unnamed_task__/Iteration 61 +MetaTest/__unnamed_task__/MaxReturn -21.653 +MetaTest/__unnamed_task__/MinReturn -59.8502 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.19073 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.984e+06 +__unnamed_task__/AverageDiscountedReturn -16.6405 +__unnamed_task__/AverageReturn -38.2825 +__unnamed_task__/Iteration 61 +__unnamed_task__/MaxReturn -2.68104 +__unnamed_task__/MinReturn -71.1585 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6675 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:01:59 | [maml_trainer] epoch #62 | Sampling for adapation and meta-testing... +2025-04-02 22:03:28 | [maml_trainer] epoch #62 | Finished meta-testing... +2025-04-02 22:03:28 | [maml_trainer] epoch #62 | Saving snapshot... +2025-04-02 22:03:50 | [maml_trainer] epoch #62 | Saved +2025-04-02 22:03:50 | [maml_trainer] epoch #62 | Time 29452.22 s +2025-04-02 22:03:50 | [maml_trainer] epoch #62 | EpochTime 463.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5615 +Average/AverageReturn -40.3479 +Average/Iteration 62 +Average/MaxReturn -4.81493 +Average/MinReturn -68.7044 +Average/NumEpisodes 80 +Average/StdReturn 12.4399 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97748 +GaussianMLPPolicy/KLAfter 0.00179534 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000100537 +GaussianMLPPolicy/LossBefore -1.24276e-08 +GaussianMLPPolicy/dLoss 0.000100525 +Iteration 62 +MetaTest/Average/AverageDiscountedReturn -40.8026 +MetaTest/Average/AverageReturn -40.8026 +MetaTest/Average/Iteration 62 +MetaTest/Average/MaxReturn -6.20687 +MetaTest/Average/MinReturn -72.5519 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.2169 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.8026 +MetaTest/__unnamed_task__/AverageReturn -40.8026 +MetaTest/__unnamed_task__/Iteration 62 +MetaTest/__unnamed_task__/MaxReturn -6.20687 +MetaTest/__unnamed_task__/MinReturn -72.5519 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.2169 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.016e+06 +__unnamed_task__/AverageDiscountedReturn -17.5615 +__unnamed_task__/AverageReturn -40.3479 +__unnamed_task__/Iteration 62 +__unnamed_task__/MaxReturn -4.81493 +__unnamed_task__/MinReturn -68.7044 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.4399 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:10:22 | [maml_trainer] epoch #63 | Sampling for adapation and meta-testing... +2025-04-02 22:11:50 | [maml_trainer] epoch #63 | Finished meta-testing... +2025-04-02 22:11:50 | [maml_trainer] epoch #63 | Saving snapshot... +2025-04-02 22:12:12 | [maml_trainer] epoch #63 | Saved +2025-04-02 22:12:12 | [maml_trainer] epoch #63 | Time 29954.19 s +2025-04-02 22:12:12 | [maml_trainer] epoch #63 | EpochTime 501.97 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5248 +Average/AverageReturn -37.9621 +Average/Iteration 63 +Average/MaxReturn 45.0245 +Average/MinReturn -64.4337 +Average/NumEpisodes 80 +Average/StdReturn 14.6074 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97436 +GaussianMLPPolicy/KLAfter 0.00259448 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.12332e-05 +GaussianMLPPolicy/LossBefore 1.20699e-08 +GaussianMLPPolicy/dLoss -7.12211e-05 +Iteration 63 +MetaTest/Average/AverageDiscountedReturn -36.4001 +MetaTest/Average/AverageReturn -36.4001 +MetaTest/Average/Iteration 63 +MetaTest/Average/MaxReturn -1.08814 +MetaTest/Average/MinReturn -48.8931 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.2783 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.4001 +MetaTest/__unnamed_task__/AverageReturn -36.4001 +MetaTest/__unnamed_task__/Iteration 63 +MetaTest/__unnamed_task__/MaxReturn -1.08814 +MetaTest/__unnamed_task__/MinReturn -48.8931 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.2783 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.048e+06 +__unnamed_task__/AverageDiscountedReturn -16.5248 +__unnamed_task__/AverageReturn -37.9621 +__unnamed_task__/Iteration 63 +__unnamed_task__/MaxReturn 45.0245 +__unnamed_task__/MinReturn -64.4337 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.6074 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:18:11 | [maml_trainer] epoch #64 | Sampling for adapation and meta-testing... +2025-04-02 22:19:39 | [maml_trainer] epoch #64 | Finished meta-testing... +2025-04-02 22:19:39 | [maml_trainer] epoch #64 | Saving snapshot... +2025-04-02 22:19:59 | [maml_trainer] epoch #64 | Saved +2025-04-02 22:19:59 | [maml_trainer] epoch #64 | Time 30421.14 s +2025-04-02 22:19:59 | [maml_trainer] epoch #64 | EpochTime 466.94 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5386 +Average/AverageReturn -37.0669 +Average/Iteration 64 +Average/MaxReturn -1.26593 +Average/MinReturn -64.5418 +Average/NumEpisodes 80 +Average/StdReturn 10.335 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97138 +GaussianMLPPolicy/KLAfter 0.00357038 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000130051 +GaussianMLPPolicy/LossBefore 1.7643e-08 +GaussianMLPPolicy/dLoss 0.000130069 +Iteration 64 +MetaTest/Average/AverageDiscountedReturn -38.5091 +MetaTest/Average/AverageReturn -38.5091 +MetaTest/Average/Iteration 64 +MetaTest/Average/MaxReturn -27.6911 +MetaTest/Average/MinReturn -59.7297 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.07574 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.5091 +MetaTest/__unnamed_task__/AverageReturn -38.5091 +MetaTest/__unnamed_task__/Iteration 64 +MetaTest/__unnamed_task__/MaxReturn -27.6911 +MetaTest/__unnamed_task__/MinReturn -59.7297 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.07574 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.08e+06 +__unnamed_task__/AverageDiscountedReturn -16.5386 +__unnamed_task__/AverageReturn -37.0669 +__unnamed_task__/Iteration 64 +__unnamed_task__/MaxReturn -1.26593 +__unnamed_task__/MinReturn -64.5418 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.335 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:25:52 | [maml_trainer] epoch #65 | Sampling for adapation and meta-testing... +2025-04-02 22:27:22 | [maml_trainer] epoch #65 | Finished meta-testing... +2025-04-02 22:27:22 | [maml_trainer] epoch #65 | Saving snapshot... +2025-04-02 22:27:44 | [maml_trainer] epoch #65 | Saved +2025-04-02 22:27:44 | [maml_trainer] epoch #65 | Time 30886.58 s +2025-04-02 22:27:44 | [maml_trainer] epoch #65 | EpochTime 465.44 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9135 +Average/AverageReturn -38.7397 +Average/Iteration 65 +Average/MaxReturn 2.06965 +Average/MinReturn -63.5179 +Average/NumEpisodes 80 +Average/StdReturn 11.4032 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96936 +GaussianMLPPolicy/KLAfter 0.00398799 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000103308 +GaussianMLPPolicy/LossBefore -2.77758e-08 +GaussianMLPPolicy/dLoss 0.00010328 +Iteration 65 +MetaTest/Average/AverageDiscountedReturn -39.8771 +MetaTest/Average/AverageReturn -39.8771 +MetaTest/Average/Iteration 65 +MetaTest/Average/MaxReturn -11.0893 +MetaTest/Average/MinReturn -64.5025 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.5833 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.8771 +MetaTest/__unnamed_task__/AverageReturn -39.8771 +MetaTest/__unnamed_task__/Iteration 65 +MetaTest/__unnamed_task__/MaxReturn -11.0893 +MetaTest/__unnamed_task__/MinReturn -64.5025 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.5833 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.112e+06 +__unnamed_task__/AverageDiscountedReturn -16.9135 +__unnamed_task__/AverageReturn -38.7397 +__unnamed_task__/Iteration 65 +__unnamed_task__/MaxReturn 2.06965 +__unnamed_task__/MinReturn -63.5179 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4032 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:33:52 | [maml_trainer] epoch #66 | Sampling for adapation and meta-testing... +2025-04-02 22:35:21 | [maml_trainer] epoch #66 | Finished meta-testing... +2025-04-02 22:35:21 | [maml_trainer] epoch #66 | Saving snapshot... +2025-04-02 22:35:42 | [maml_trainer] epoch #66 | Saved +2025-04-02 22:35:42 | [maml_trainer] epoch #66 | Time 31364.77 s +2025-04-02 22:35:42 | [maml_trainer] epoch #66 | EpochTime 478.18 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.1951 +Average/AverageReturn -38.9947 +Average/Iteration 66 +Average/MaxReturn 9.86626 +Average/MinReturn -75.6201 +Average/NumEpisodes 80 +Average/StdReturn 13.2836 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96851 +GaussianMLPPolicy/KLAfter 0.00345285 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.98702e-05 +GaussianMLPPolicy/LossBefore -1.13249e-09 +GaussianMLPPolicy/dLoss -4.98713e-05 +Iteration 66 +MetaTest/Average/AverageDiscountedReturn -41.2995 +MetaTest/Average/AverageReturn -41.2995 +MetaTest/Average/Iteration 66 +MetaTest/Average/MaxReturn -20.4457 +MetaTest/Average/MinReturn -54.3312 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.9851 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.2995 +MetaTest/__unnamed_task__/AverageReturn -41.2995 +MetaTest/__unnamed_task__/Iteration 66 +MetaTest/__unnamed_task__/MaxReturn -20.4457 +MetaTest/__unnamed_task__/MinReturn -54.3312 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.9851 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.144e+06 +__unnamed_task__/AverageDiscountedReturn -17.1951 +__unnamed_task__/AverageReturn -38.9947 +__unnamed_task__/Iteration 66 +__unnamed_task__/MaxReturn 9.86626 +__unnamed_task__/MinReturn -75.6201 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2836 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:41:37 | [maml_trainer] epoch #67 | Sampling for adapation and meta-testing... +2025-04-02 22:43:06 | [maml_trainer] epoch #67 | Finished meta-testing... +2025-04-02 22:43:06 | [maml_trainer] epoch #67 | Saving snapshot... +2025-04-02 22:43:28 | [maml_trainer] epoch #67 | Saved +2025-04-02 22:43:28 | [maml_trainer] epoch #67 | Time 31830.03 s +2025-04-02 22:43:28 | [maml_trainer] epoch #67 | EpochTime 465.26 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3391 +Average/AverageReturn -37.3869 +Average/Iteration 67 +Average/MaxReturn 1.06043 +Average/MinReturn -68.7952 +Average/NumEpisodes 80 +Average/StdReturn 10.677 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96683 +GaussianMLPPolicy/KLAfter 0.00204147 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000103619 +GaussianMLPPolicy/LossBefore 2.20537e-09 +GaussianMLPPolicy/dLoss -0.000103617 +Iteration 67 +MetaTest/Average/AverageDiscountedReturn -38.5436 +MetaTest/Average/AverageReturn -38.5436 +MetaTest/Average/Iteration 67 +MetaTest/Average/MaxReturn 0.163547 +MetaTest/Average/MinReturn -65.3543 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.2567 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.5436 +MetaTest/__unnamed_task__/AverageReturn -38.5436 +MetaTest/__unnamed_task__/Iteration 67 +MetaTest/__unnamed_task__/MaxReturn 0.163547 +MetaTest/__unnamed_task__/MinReturn -65.3543 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.2567 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.176e+06 +__unnamed_task__/AverageDiscountedReturn -16.3391 +__unnamed_task__/AverageReturn -37.3869 +__unnamed_task__/Iteration 67 +__unnamed_task__/MaxReturn 1.06043 +__unnamed_task__/MinReturn -68.7952 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.677 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:49:27 | [maml_trainer] epoch #68 | Sampling for adapation and meta-testing... +2025-04-02 22:50:58 | [maml_trainer] epoch #68 | Finished meta-testing... +2025-04-02 22:50:58 | [maml_trainer] epoch #68 | Saving snapshot... +2025-04-02 22:51:20 | [maml_trainer] epoch #68 | Saved +2025-04-02 22:51:20 | [maml_trainer] epoch #68 | Time 32302.94 s +2025-04-02 22:51:20 | [maml_trainer] epoch #68 | EpochTime 472.91 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.1112 +Average/AverageReturn -37.282 +Average/Iteration 68 +Average/MaxReturn -8.17799 +Average/MinReturn -68.0425 +Average/NumEpisodes 80 +Average/StdReturn 11.3885 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96691 +GaussianMLPPolicy/KLAfter 0.00272044 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.48484e-05 +GaussianMLPPolicy/LossBefore -1.10269e-08 +GaussianMLPPolicy/dLoss -6.48594e-05 +Iteration 68 +MetaTest/Average/AverageDiscountedReturn -43.6048 +MetaTest/Average/AverageReturn -43.6048 +MetaTest/Average/Iteration 68 +MetaTest/Average/MaxReturn -27.3714 +MetaTest/Average/MinReturn -68.8329 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.8462 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.6048 +MetaTest/__unnamed_task__/AverageReturn -43.6048 +MetaTest/__unnamed_task__/Iteration 68 +MetaTest/__unnamed_task__/MaxReturn -27.3714 +MetaTest/__unnamed_task__/MinReturn -68.8329 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.8462 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.208e+06 +__unnamed_task__/AverageDiscountedReturn -16.1112 +__unnamed_task__/AverageReturn -37.282 +__unnamed_task__/Iteration 68 +__unnamed_task__/MaxReturn -8.17799 +__unnamed_task__/MinReturn -68.0425 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.3885 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 22:57:23 | [maml_trainer] epoch #69 | Sampling for adapation and meta-testing... +2025-04-02 22:58:54 | [maml_trainer] epoch #69 | Finished meta-testing... +2025-04-02 22:58:54 | [maml_trainer] epoch #69 | Saving snapshot... +2025-04-02 22:59:13 | [maml_trainer] epoch #69 | Saved +2025-04-02 22:59:13 | [maml_trainer] epoch #69 | Time 32775.87 s +2025-04-02 22:59:13 | [maml_trainer] epoch #69 | EpochTime 472.93 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5656 +Average/AverageReturn -39.8788 +Average/Iteration 69 +Average/MaxReturn 5.35179 +Average/MinReturn -66.4829 +Average/NumEpisodes 80 +Average/StdReturn 11.939 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9665 +GaussianMLPPolicy/KLAfter 0.00318406 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.38068e-05 +GaussianMLPPolicy/LossBefore 9.47714e-09 +GaussianMLPPolicy/dLoss 5.38163e-05 +Iteration 69 +MetaTest/Average/AverageDiscountedReturn -42.9057 +MetaTest/Average/AverageReturn -42.9057 +MetaTest/Average/Iteration 69 +MetaTest/Average/MaxReturn -1.89653 +MetaTest/Average/MinReturn -60.8691 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.0574 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.9057 +MetaTest/__unnamed_task__/AverageReturn -42.9057 +MetaTest/__unnamed_task__/Iteration 69 +MetaTest/__unnamed_task__/MaxReturn -1.89653 +MetaTest/__unnamed_task__/MinReturn -60.8691 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.0574 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.24e+06 +__unnamed_task__/AverageDiscountedReturn -17.5656 +__unnamed_task__/AverageReturn -39.8788 +__unnamed_task__/Iteration 69 +__unnamed_task__/MaxReturn 5.35179 +__unnamed_task__/MinReturn -66.4829 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.939 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 23:05:09 | [maml_trainer] epoch #70 | Sampling for adapation and meta-testing... +2025-04-02 23:06:40 | [maml_trainer] epoch #70 | Finished meta-testing... +2025-04-02 23:06:40 | [maml_trainer] epoch #70 | Saving snapshot... +2025-04-02 23:07:02 | [maml_trainer] epoch #70 | Saved +2025-04-02 23:07:02 | [maml_trainer] epoch #70 | Time 33244.53 s +2025-04-02 23:07:02 | [maml_trainer] epoch #70 | EpochTime 468.66 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.1904 +Average/AverageReturn -39.2252 +Average/Iteration 70 +Average/MaxReturn -6.04391 +Average/MinReturn -67.8778 +Average/NumEpisodes 80 +Average/StdReturn 11.3896 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96567 +GaussianMLPPolicy/KLAfter 0.00360157 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000173519 +GaussianMLPPolicy/LossBefore -9.11951e-09 +GaussianMLPPolicy/dLoss -0.000173528 +Iteration 70 +MetaTest/Average/AverageDiscountedReturn -35.8258 +MetaTest/Average/AverageReturn -35.8258 +MetaTest/Average/Iteration 70 +MetaTest/Average/MaxReturn 116.32 +MetaTest/Average/MinReturn -73.1053 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 37.4832 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.8258 +MetaTest/__unnamed_task__/AverageReturn -35.8258 +MetaTest/__unnamed_task__/Iteration 70 +MetaTest/__unnamed_task__/MaxReturn 116.32 +MetaTest/__unnamed_task__/MinReturn -73.1053 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 37.4832 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.272e+06 +__unnamed_task__/AverageDiscountedReturn -17.1904 +__unnamed_task__/AverageReturn -39.2252 +__unnamed_task__/Iteration 70 +__unnamed_task__/MaxReturn -6.04391 +__unnamed_task__/MinReturn -67.8778 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.3896 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 23:13:02 | [maml_trainer] epoch #71 | Sampling for adapation and meta-testing... +2025-04-02 23:14:33 | [maml_trainer] epoch #71 | Finished meta-testing... +2025-04-02 23:14:33 | [maml_trainer] epoch #71 | Saving snapshot... +2025-04-02 23:14:54 | [maml_trainer] epoch #71 | Saved +2025-04-02 23:14:54 | [maml_trainer] epoch #71 | Time 33716.89 s +2025-04-02 23:14:54 | [maml_trainer] epoch #71 | EpochTime 472.36 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.481 +Average/AverageReturn -39.6169 +Average/Iteration 71 +Average/MaxReturn -9.52422 +Average/MinReturn -68.4776 +Average/NumEpisodes 80 +Average/StdReturn 10.8253 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96463 +GaussianMLPPolicy/KLAfter 0.00204609 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.93513e-05 +GaussianMLPPolicy/LossBefore 4.11272e-09 +GaussianMLPPolicy/dLoss 9.93554e-05 +Iteration 71 +MetaTest/Average/AverageDiscountedReturn -40.357 +MetaTest/Average/AverageReturn -40.357 +MetaTest/Average/Iteration 71 +MetaTest/Average/MaxReturn -27.1081 +MetaTest/Average/MinReturn -59.6102 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.8354 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.357 +MetaTest/__unnamed_task__/AverageReturn -40.357 +MetaTest/__unnamed_task__/Iteration 71 +MetaTest/__unnamed_task__/MaxReturn -27.1081 +MetaTest/__unnamed_task__/MinReturn -59.6102 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.8354 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.304e+06 +__unnamed_task__/AverageDiscountedReturn -17.481 +__unnamed_task__/AverageReturn -39.6169 +__unnamed_task__/Iteration 71 +__unnamed_task__/MaxReturn -9.52422 +__unnamed_task__/MinReturn -68.4776 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.8253 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 23:20:52 | [maml_trainer] epoch #72 | Sampling for adapation and meta-testing... +2025-04-02 23:22:20 | [maml_trainer] epoch #72 | Finished meta-testing... +2025-04-02 23:22:20 | [maml_trainer] epoch #72 | Saving snapshot... +2025-04-02 23:22:40 | [maml_trainer] epoch #72 | Saved +2025-04-02 23:22:40 | [maml_trainer] epoch #72 | Time 34182.58 s +2025-04-02 23:22:40 | [maml_trainer] epoch #72 | EpochTime 465.68 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8467 +Average/AverageReturn -39.0216 +Average/Iteration 72 +Average/MaxReturn -12.0807 +Average/MinReturn -80.1126 +Average/NumEpisodes 80 +Average/StdReturn 11.467 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96562 +GaussianMLPPolicy/KLAfter 0.00382142 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.76658e-05 +GaussianMLPPolicy/LossBefore -3.69549e-09 +GaussianMLPPolicy/dLoss 3.76621e-05 +Iteration 72 +MetaTest/Average/AverageDiscountedReturn -40.1844 +MetaTest/Average/AverageReturn -40.1844 +MetaTest/Average/Iteration 72 +MetaTest/Average/MaxReturn 10.7804 +MetaTest/Average/MinReturn -76.4069 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.1874 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.1844 +MetaTest/__unnamed_task__/AverageReturn -40.1844 +MetaTest/__unnamed_task__/Iteration 72 +MetaTest/__unnamed_task__/MaxReturn 10.7804 +MetaTest/__unnamed_task__/MinReturn -76.4069 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.1874 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.336e+06 +__unnamed_task__/AverageDiscountedReturn -16.8467 +__unnamed_task__/AverageReturn -39.0216 +__unnamed_task__/Iteration 72 +__unnamed_task__/MaxReturn -12.0807 +__unnamed_task__/MinReturn -80.1126 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.467 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 23:28:48 | [maml_trainer] epoch #73 | Sampling for adapation and meta-testing... +2025-04-02 23:30:21 | [maml_trainer] epoch #73 | Finished meta-testing... +2025-04-02 23:30:21 | [maml_trainer] epoch #73 | Saving snapshot... +2025-04-02 23:30:42 | [maml_trainer] epoch #73 | Saved +2025-04-02 23:30:42 | [maml_trainer] epoch #73 | Time 34664.91 s +2025-04-02 23:30:42 | [maml_trainer] epoch #73 | EpochTime 482.33 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.941 +Average/AverageReturn -40.2621 +Average/Iteration 73 +Average/MaxReturn 6.55845 +Average/MinReturn -76.3434 +Average/NumEpisodes 80 +Average/StdReturn 11.1301 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96708 +GaussianMLPPolicy/KLAfter 0.00481048 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000214189 +GaussianMLPPolicy/LossBefore 3.12924e-09 +GaussianMLPPolicy/dLoss -0.000214186 +Iteration 73 +MetaTest/Average/AverageDiscountedReturn -38.9346 +MetaTest/Average/AverageReturn -38.9346 +MetaTest/Average/Iteration 73 +MetaTest/Average/MaxReturn -21.6439 +MetaTest/Average/MinReturn -58.4505 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.64991 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.9346 +MetaTest/__unnamed_task__/AverageReturn -38.9346 +MetaTest/__unnamed_task__/Iteration 73 +MetaTest/__unnamed_task__/MaxReturn -21.6439 +MetaTest/__unnamed_task__/MinReturn -58.4505 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.64991 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.368e+06 +__unnamed_task__/AverageDiscountedReturn -17.941 +__unnamed_task__/AverageReturn -40.2621 +__unnamed_task__/Iteration 73 +__unnamed_task__/MaxReturn 6.55845 +__unnamed_task__/MinReturn -76.3434 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.1301 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 23:36:52 | [maml_trainer] epoch #74 | Sampling for adapation and meta-testing... +2025-04-02 23:38:21 | [maml_trainer] epoch #74 | Finished meta-testing... +2025-04-02 23:38:21 | [maml_trainer] epoch #74 | Saving snapshot... +2025-04-02 23:38:42 | [maml_trainer] epoch #74 | Saved +2025-04-02 23:38:42 | [maml_trainer] epoch #74 | Time 35144.46 s +2025-04-02 23:38:42 | [maml_trainer] epoch #74 | EpochTime 479.54 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.7789 +Average/AverageReturn -40.2635 +Average/Iteration 74 +Average/MaxReturn -22.5067 +Average/MinReturn -64.4049 +Average/NumEpisodes 80 +Average/StdReturn 9.72514 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96813 +GaussianMLPPolicy/KLAfter 0.00519627 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.84288e-05 +GaussianMLPPolicy/LossBefore 1.21593e-08 +GaussianMLPPolicy/dLoss 8.8441e-05 +Iteration 74 +MetaTest/Average/AverageDiscountedReturn -38.0654 +MetaTest/Average/AverageReturn -38.0654 +MetaTest/Average/Iteration 74 +MetaTest/Average/MaxReturn -11.5643 +MetaTest/Average/MinReturn -53.8329 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7607 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.0654 +MetaTest/__unnamed_task__/AverageReturn -38.0654 +MetaTest/__unnamed_task__/Iteration 74 +MetaTest/__unnamed_task__/MaxReturn -11.5643 +MetaTest/__unnamed_task__/MinReturn -53.8329 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7607 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.4e+06 +__unnamed_task__/AverageDiscountedReturn -17.7789 +__unnamed_task__/AverageReturn -40.2635 +__unnamed_task__/Iteration 74 +__unnamed_task__/MaxReturn -22.5067 +__unnamed_task__/MinReturn -64.4049 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.72514 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 23:44:35 | [maml_trainer] epoch #75 | Sampling for adapation and meta-testing... +2025-04-02 23:46:03 | [maml_trainer] epoch #75 | Finished meta-testing... +2025-04-02 23:46:03 | [maml_trainer] epoch #75 | Saving snapshot... +2025-04-02 23:46:23 | [maml_trainer] epoch #75 | Saved +2025-04-02 23:46:23 | [maml_trainer] epoch #75 | Time 35605.76 s +2025-04-02 23:46:23 | [maml_trainer] epoch #75 | EpochTime 461.30 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.8487 +Average/AverageReturn -40.3312 +Average/Iteration 75 +Average/MaxReturn -14.8407 +Average/MinReturn -74.7663 +Average/NumEpisodes 80 +Average/StdReturn 10.3171 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96973 +GaussianMLPPolicy/KLAfter 0.00442886 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000139342 +GaussianMLPPolicy/LossBefore -2.0206e-08 +GaussianMLPPolicy/dLoss 0.000139322 +Iteration 75 +MetaTest/Average/AverageDiscountedReturn -42.9911 +MetaTest/Average/AverageReturn -42.9911 +MetaTest/Average/Iteration 75 +MetaTest/Average/MaxReturn -22.5972 +MetaTest/Average/MinReturn -67.5438 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.077 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.9911 +MetaTest/__unnamed_task__/AverageReturn -42.9911 +MetaTest/__unnamed_task__/Iteration 75 +MetaTest/__unnamed_task__/MaxReturn -22.5972 +MetaTest/__unnamed_task__/MinReturn -67.5438 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.077 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.432e+06 +__unnamed_task__/AverageDiscountedReturn -17.8487 +__unnamed_task__/AverageReturn -40.3312 +__unnamed_task__/Iteration 75 +__unnamed_task__/MaxReturn -14.8407 +__unnamed_task__/MinReturn -74.7663 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.3171 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-02 23:52:47 | [maml_trainer] epoch #76 | Sampling for adapation and meta-testing... +2025-04-02 23:54:19 | [maml_trainer] epoch #76 | Finished meta-testing... +2025-04-02 23:54:19 | [maml_trainer] epoch #76 | Saving snapshot... +2025-04-02 23:54:41 | [maml_trainer] epoch #76 | Saved +2025-04-02 23:54:41 | [maml_trainer] epoch #76 | Time 36103.74 s +2025-04-02 23:54:41 | [maml_trainer] epoch #76 | EpochTime 497.98 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.0644 +Average/AverageReturn -43.1444 +Average/Iteration 76 +Average/MaxReturn -0.523522 +Average/MinReturn -73.5562 +Average/NumEpisodes 80 +Average/StdReturn 12.4047 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97084 +GaussianMLPPolicy/KLAfter 0.00370514 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000145807 +GaussianMLPPolicy/LossBefore -8.31485e-09 +GaussianMLPPolicy/dLoss 0.000145798 +Iteration 76 +MetaTest/Average/AverageDiscountedReturn -43.2937 +MetaTest/Average/AverageReturn -43.2937 +MetaTest/Average/Iteration 76 +MetaTest/Average/MaxReturn -27.0972 +MetaTest/Average/MinReturn -71.584 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.0394 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.2937 +MetaTest/__unnamed_task__/AverageReturn -43.2937 +MetaTest/__unnamed_task__/Iteration 76 +MetaTest/__unnamed_task__/MaxReturn -27.0972 +MetaTest/__unnamed_task__/MinReturn -71.584 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.0394 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.464e+06 +__unnamed_task__/AverageDiscountedReturn -19.0644 +__unnamed_task__/AverageReturn -43.1444 +__unnamed_task__/Iteration 76 +__unnamed_task__/MaxReturn -0.523522 +__unnamed_task__/MinReturn -73.5562 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.4047 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:00:47 | [maml_trainer] epoch #77 | Sampling for adapation and meta-testing... +2025-04-03 00:02:19 | [maml_trainer] epoch #77 | Finished meta-testing... +2025-04-03 00:02:19 | [maml_trainer] epoch #77 | Saving snapshot... +2025-04-03 00:02:41 | [maml_trainer] epoch #77 | Saved +2025-04-03 00:02:41 | [maml_trainer] epoch #77 | Time 36583.79 s +2025-04-03 00:02:41 | [maml_trainer] epoch #77 | EpochTime 480.05 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.0319 +Average/AverageReturn -38.334 +Average/Iteration 77 +Average/MaxReturn 17.9983 +Average/MinReturn -70.4688 +Average/NumEpisodes 80 +Average/StdReturn 12.1729 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97165 +GaussianMLPPolicy/KLAfter 0.00254581 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000151719 +GaussianMLPPolicy/LossBefore 3.78489e-09 +GaussianMLPPolicy/dLoss -0.000151715 +Iteration 77 +MetaTest/Average/AverageDiscountedReturn -38.3163 +MetaTest/Average/AverageReturn -38.3163 +MetaTest/Average/Iteration 77 +MetaTest/Average/MaxReturn -24.5134 +MetaTest/Average/MinReturn -69.7602 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.1457 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.3163 +MetaTest/__unnamed_task__/AverageReturn -38.3163 +MetaTest/__unnamed_task__/Iteration 77 +MetaTest/__unnamed_task__/MaxReturn -24.5134 +MetaTest/__unnamed_task__/MinReturn -69.7602 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.1457 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.496e+06 +__unnamed_task__/AverageDiscountedReturn -17.0319 +__unnamed_task__/AverageReturn -38.334 +__unnamed_task__/Iteration 77 +__unnamed_task__/MaxReturn 17.9983 +__unnamed_task__/MinReturn -70.4688 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1729 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:08:50 | [maml_trainer] epoch #78 | Sampling for adapation and meta-testing... +2025-04-03 00:10:17 | [maml_trainer] epoch #78 | Finished meta-testing... +2025-04-03 00:10:17 | [maml_trainer] epoch #78 | Saving snapshot... +2025-04-03 00:10:37 | [maml_trainer] epoch #78 | Saved +2025-04-03 00:10:37 | [maml_trainer] epoch #78 | Time 37059.33 s +2025-04-03 00:10:37 | [maml_trainer] epoch #78 | EpochTime 475.53 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.8868 +Average/AverageReturn -39.7274 +Average/Iteration 78 +Average/MaxReturn 16.9398 +Average/MinReturn -71.525 +Average/NumEpisodes 80 +Average/StdReturn 14.3776 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97151 +GaussianMLPPolicy/KLAfter 0.00231471 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.85463e-05 +GaussianMLPPolicy/LossBefore -1.36793e-08 +GaussianMLPPolicy/dLoss 4.85326e-05 +Iteration 78 +MetaTest/Average/AverageDiscountedReturn -41.2451 +MetaTest/Average/AverageReturn -41.2451 +MetaTest/Average/Iteration 78 +MetaTest/Average/MaxReturn -8.06256 +MetaTest/Average/MinReturn -73.5816 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.0957 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.2451 +MetaTest/__unnamed_task__/AverageReturn -41.2451 +MetaTest/__unnamed_task__/Iteration 78 +MetaTest/__unnamed_task__/MaxReturn -8.06256 +MetaTest/__unnamed_task__/MinReturn -73.5816 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.0957 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.528e+06 +__unnamed_task__/AverageDiscountedReturn -17.8868 +__unnamed_task__/AverageReturn -39.7274 +__unnamed_task__/Iteration 78 +__unnamed_task__/MaxReturn 16.9398 +__unnamed_task__/MinReturn -71.525 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.3776 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:16:41 | [maml_trainer] epoch #79 | Sampling for adapation and meta-testing... +2025-04-03 00:18:14 | [maml_trainer] epoch #79 | Finished meta-testing... +2025-04-03 00:18:14 | [maml_trainer] epoch #79 | Saving snapshot... +2025-04-03 00:18:37 | [maml_trainer] epoch #79 | Saved +2025-04-03 00:18:37 | [maml_trainer] epoch #79 | Time 37539.14 s +2025-04-03 00:18:37 | [maml_trainer] epoch #79 | EpochTime 479.80 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.4206 +Average/AverageReturn -39.2834 +Average/Iteration 79 +Average/MaxReturn -16.3383 +Average/MinReturn -69.9098 +Average/NumEpisodes 80 +Average/StdReturn 10.4071 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.97058 +GaussianMLPPolicy/KLAfter 0.001783 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000148184 +GaussianMLPPolicy/LossBefore -7.21216e-09 +GaussianMLPPolicy/dLoss 0.000148177 +Iteration 79 +MetaTest/Average/AverageDiscountedReturn -39.3117 +MetaTest/Average/AverageReturn -39.3117 +MetaTest/Average/Iteration 79 +MetaTest/Average/MaxReturn -14.0643 +MetaTest/Average/MinReturn -72.8687 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.8489 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.3117 +MetaTest/__unnamed_task__/AverageReturn -39.3117 +MetaTest/__unnamed_task__/Iteration 79 +MetaTest/__unnamed_task__/MaxReturn -14.0643 +MetaTest/__unnamed_task__/MinReturn -72.8687 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.8489 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.56e+06 +__unnamed_task__/AverageDiscountedReturn -17.4206 +__unnamed_task__/AverageReturn -39.2834 +__unnamed_task__/Iteration 79 +__unnamed_task__/MaxReturn -16.3383 +__unnamed_task__/MinReturn -69.9098 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.4071 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:24:44 | [maml_trainer] epoch #80 | Sampling for adapation and meta-testing... +2025-04-03 00:26:11 | [maml_trainer] epoch #80 | Finished meta-testing... +2025-04-03 00:26:11 | [maml_trainer] epoch #80 | Saving snapshot... +2025-04-03 00:26:32 | [maml_trainer] epoch #80 | Saved +2025-04-03 00:26:32 | [maml_trainer] epoch #80 | Time 38014.67 s +2025-04-03 00:26:32 | [maml_trainer] epoch #80 | EpochTime 475.53 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9742 +Average/AverageReturn -38.2115 +Average/Iteration 80 +Average/MaxReturn -20.5498 +Average/MinReturn -72.1231 +Average/NumEpisodes 80 +Average/StdReturn 8.98315 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9692 +GaussianMLPPolicy/KLAfter 0.0033096 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000116493 +GaussianMLPPolicy/LossBefore -1.52588e-08 +GaussianMLPPolicy/dLoss 0.000116478 +Iteration 80 +MetaTest/Average/AverageDiscountedReturn -36.244 +MetaTest/Average/AverageReturn -36.244 +MetaTest/Average/Iteration 80 +MetaTest/Average/MaxReturn -12.7253 +MetaTest/Average/MinReturn -60.0022 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3907 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.244 +MetaTest/__unnamed_task__/AverageReturn -36.244 +MetaTest/__unnamed_task__/Iteration 80 +MetaTest/__unnamed_task__/MaxReturn -12.7253 +MetaTest/__unnamed_task__/MinReturn -60.0022 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3907 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.592e+06 +__unnamed_task__/AverageDiscountedReturn -16.9742 +__unnamed_task__/AverageReturn -38.2115 +__unnamed_task__/Iteration 80 +__unnamed_task__/MaxReturn -20.5498 +__unnamed_task__/MinReturn -72.1231 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.98315 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:32:33 | [maml_trainer] epoch #81 | Sampling for adapation and meta-testing... +2025-04-03 00:34:04 | [maml_trainer] epoch #81 | Finished meta-testing... +2025-04-03 00:34:04 | [maml_trainer] epoch #81 | Saving snapshot... +2025-04-03 00:34:26 | [maml_trainer] epoch #81 | Saved +2025-04-03 00:34:26 | [maml_trainer] epoch #81 | Time 38488.66 s +2025-04-03 00:34:26 | [maml_trainer] epoch #81 | EpochTime 473.99 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.84 +Average/AverageReturn -39.9705 +Average/Iteration 81 +Average/MaxReturn -1.97083 +Average/MinReturn -75.545 +Average/NumEpisodes 80 +Average/StdReturn 12.8274 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96797 +GaussianMLPPolicy/KLAfter 0.00287687 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000120789 +GaussianMLPPolicy/LossBefore -1.42157e-08 +GaussianMLPPolicy/dLoss 0.000120775 +Iteration 81 +MetaTest/Average/AverageDiscountedReturn -35.2234 +MetaTest/Average/AverageReturn -35.2234 +MetaTest/Average/Iteration 81 +MetaTest/Average/MaxReturn 25.4606 +MetaTest/Average/MinReturn -58.4673 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.0339 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.2234 +MetaTest/__unnamed_task__/AverageReturn -35.2234 +MetaTest/__unnamed_task__/Iteration 81 +MetaTest/__unnamed_task__/MaxReturn 25.4606 +MetaTest/__unnamed_task__/MinReturn -58.4673 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.0339 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.624e+06 +__unnamed_task__/AverageDiscountedReturn -17.84 +__unnamed_task__/AverageReturn -39.9705 +__unnamed_task__/Iteration 81 +__unnamed_task__/MaxReturn -1.97083 +__unnamed_task__/MinReturn -75.545 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.8274 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:40:25 | [maml_trainer] epoch #82 | Sampling for adapation and meta-testing... +2025-04-03 00:41:52 | [maml_trainer] epoch #82 | Finished meta-testing... +2025-04-03 00:41:52 | [maml_trainer] epoch #82 | Saving snapshot... +2025-04-03 00:42:13 | [maml_trainer] epoch #82 | Saved +2025-04-03 00:42:13 | [maml_trainer] epoch #82 | Time 38955.54 s +2025-04-03 00:42:13 | [maml_trainer] epoch #82 | EpochTime 466.88 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.737 +Average/AverageReturn -40.6813 +Average/Iteration 82 +Average/MaxReturn -22.4546 +Average/MinReturn -62.1272 +Average/NumEpisodes 80 +Average/StdReturn 10.4495 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96778 +GaussianMLPPolicy/KLAfter 0.00331948 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.30062e-05 +GaussianMLPPolicy/LossBefore 2.09212e-08 +GaussianMLPPolicy/dLoss 6.30271e-05 +Iteration 82 +MetaTest/Average/AverageDiscountedReturn -38.1407 +MetaTest/Average/AverageReturn -38.1407 +MetaTest/Average/Iteration 82 +MetaTest/Average/MaxReturn -27.6707 +MetaTest/Average/MinReturn -51.8502 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.30499 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.1407 +MetaTest/__unnamed_task__/AverageReturn -38.1407 +MetaTest/__unnamed_task__/Iteration 82 +MetaTest/__unnamed_task__/MaxReturn -27.6707 +MetaTest/__unnamed_task__/MinReturn -51.8502 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.30499 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.656e+06 +__unnamed_task__/AverageDiscountedReturn -17.737 +__unnamed_task__/AverageReturn -40.6813 +__unnamed_task__/Iteration 82 +__unnamed_task__/MaxReturn -22.4546 +__unnamed_task__/MinReturn -62.1272 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.4495 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:48:10 | [maml_trainer] epoch #83 | Sampling for adapation and meta-testing... +2025-04-03 00:49:41 | [maml_trainer] epoch #83 | Finished meta-testing... +2025-04-03 00:49:41 | [maml_trainer] epoch #83 | Saving snapshot... +2025-04-03 00:50:04 | [maml_trainer] epoch #83 | Saved +2025-04-03 00:50:04 | [maml_trainer] epoch #83 | Time 39426.67 s +2025-04-03 00:50:04 | [maml_trainer] epoch #83 | EpochTime 471.12 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7169 +Average/AverageReturn -37.6061 +Average/Iteration 83 +Average/MaxReturn -1.38749 +Average/MinReturn -62.4635 +Average/NumEpisodes 80 +Average/StdReturn 10.8496 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96721 +GaussianMLPPolicy/KLAfter 0.00546135 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000156121 +GaussianMLPPolicy/LossBefore 7.03335e-09 +GaussianMLPPolicy/dLoss 0.000156128 +Iteration 83 +MetaTest/Average/AverageDiscountedReturn -36.4941 +MetaTest/Average/AverageReturn -36.4941 +MetaTest/Average/Iteration 83 +MetaTest/Average/MaxReturn -6.0855 +MetaTest/Average/MinReturn -59.7931 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.8194 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.4941 +MetaTest/__unnamed_task__/AverageReturn -36.4941 +MetaTest/__unnamed_task__/Iteration 83 +MetaTest/__unnamed_task__/MaxReturn -6.0855 +MetaTest/__unnamed_task__/MinReturn -59.7931 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.8194 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.688e+06 +__unnamed_task__/AverageDiscountedReturn -16.7169 +__unnamed_task__/AverageReturn -37.6061 +__unnamed_task__/Iteration 83 +__unnamed_task__/MaxReturn -1.38749 +__unnamed_task__/MinReturn -62.4635 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.8496 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 00:56:11 | [maml_trainer] epoch #84 | Sampling for adapation and meta-testing... +2025-04-03 00:57:43 | [maml_trainer] epoch #84 | Finished meta-testing... +2025-04-03 00:57:43 | [maml_trainer] epoch #84 | Saving snapshot... +2025-04-03 00:58:05 | [maml_trainer] epoch #84 | Saved +2025-04-03 00:58:05 | [maml_trainer] epoch #84 | Time 39907.77 s +2025-04-03 00:58:05 | [maml_trainer] epoch #84 | EpochTime 481.09 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.6885 +Average/AverageReturn -40.3584 +Average/Iteration 84 +Average/MaxReturn -21.7419 +Average/MinReturn -60.8742 +Average/NumEpisodes 80 +Average/StdReturn 9.74539 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96617 +GaussianMLPPolicy/KLAfter 0.00498031 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.40185e-05 +GaussianMLPPolicy/LossBefore -1.58548e-08 +GaussianMLPPolicy/dLoss -1.40343e-05 +Iteration 84 +MetaTest/Average/AverageDiscountedReturn -40.3169 +MetaTest/Average/AverageReturn -40.3169 +MetaTest/Average/Iteration 84 +MetaTest/Average/MaxReturn -12.9616 +MetaTest/Average/MinReturn -57.7449 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.05376 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.3169 +MetaTest/__unnamed_task__/AverageReturn -40.3169 +MetaTest/__unnamed_task__/Iteration 84 +MetaTest/__unnamed_task__/MaxReturn -12.9616 +MetaTest/__unnamed_task__/MinReturn -57.7449 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.05376 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.72e+06 +__unnamed_task__/AverageDiscountedReturn -17.6885 +__unnamed_task__/AverageReturn -40.3584 +__unnamed_task__/Iteration 84 +__unnamed_task__/MaxReturn -21.7419 +__unnamed_task__/MinReturn -60.8742 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.74539 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 01:04:13 | [maml_trainer] epoch #85 | Sampling for adapation and meta-testing... +2025-04-03 01:05:45 | [maml_trainer] epoch #85 | Finished meta-testing... +2025-04-03 01:05:45 | [maml_trainer] epoch #85 | Saving snapshot... +2025-04-03 01:06:06 | [maml_trainer] epoch #85 | Saved +2025-04-03 01:06:06 | [maml_trainer] epoch #85 | Time 40388.72 s +2025-04-03 01:06:06 | [maml_trainer] epoch #85 | EpochTime 480.95 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.3131 +Average/AverageReturn -38.5108 +Average/Iteration 85 +Average/MaxReturn -15.918 +Average/MinReturn -65.265 +Average/NumEpisodes 80 +Average/StdReturn 10.1669 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96474 +GaussianMLPPolicy/KLAfter 0.00445755 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.24915e-05 +GaussianMLPPolicy/LossBefore 9.23872e-09 +GaussianMLPPolicy/dLoss -2.24822e-05 +Iteration 85 +MetaTest/Average/AverageDiscountedReturn -42.2694 +MetaTest/Average/AverageReturn -42.2694 +MetaTest/Average/Iteration 85 +MetaTest/Average/MaxReturn -25.5113 +MetaTest/Average/MinReturn -60.6078 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.12376 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.2694 +MetaTest/__unnamed_task__/AverageReturn -42.2694 +MetaTest/__unnamed_task__/Iteration 85 +MetaTest/__unnamed_task__/MaxReturn -25.5113 +MetaTest/__unnamed_task__/MinReturn -60.6078 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.12376 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.752e+06 +__unnamed_task__/AverageDiscountedReturn -17.3131 +__unnamed_task__/AverageReturn -38.5108 +__unnamed_task__/Iteration 85 +__unnamed_task__/MaxReturn -15.918 +__unnamed_task__/MinReturn -65.265 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1669 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 01:12:17 | [maml_trainer] epoch #86 | Sampling for adapation and meta-testing... +2025-04-03 01:13:47 | [maml_trainer] epoch #86 | Finished meta-testing... +2025-04-03 01:13:47 | [maml_trainer] epoch #86 | Saving snapshot... +2025-04-03 01:14:11 | [maml_trainer] epoch #86 | Saved +2025-04-03 01:14:11 | [maml_trainer] epoch #86 | Time 40873.23 s +2025-04-03 01:14:11 | [maml_trainer] epoch #86 | EpochTime 484.51 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9816 +Average/AverageReturn -38.2496 +Average/Iteration 86 +Average/MaxReturn -14.2024 +Average/MinReturn -55.8521 +Average/NumEpisodes 80 +Average/StdReturn 8.98548 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96435 +GaussianMLPPolicy/KLAfter 0.00747435 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000274682 +GaussianMLPPolicy/LossBefore 3.25441e-08 +GaussianMLPPolicy/dLoss 0.000274714 +Iteration 86 +MetaTest/Average/AverageDiscountedReturn -39.0686 +MetaTest/Average/AverageReturn -39.0686 +MetaTest/Average/Iteration 86 +MetaTest/Average/MaxReturn -25.4661 +MetaTest/Average/MinReturn -58.2677 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.72669 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.0686 +MetaTest/__unnamed_task__/AverageReturn -39.0686 +MetaTest/__unnamed_task__/Iteration 86 +MetaTest/__unnamed_task__/MaxReturn -25.4661 +MetaTest/__unnamed_task__/MinReturn -58.2677 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.72669 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.784e+06 +__unnamed_task__/AverageDiscountedReturn -16.9816 +__unnamed_task__/AverageReturn -38.2496 +__unnamed_task__/Iteration 86 +__unnamed_task__/MaxReturn -14.2024 +__unnamed_task__/MinReturn -55.8521 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.98548 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 01:20:20 | [maml_trainer] epoch #87 | Sampling for adapation and meta-testing... +2025-04-03 01:21:50 | [maml_trainer] epoch #87 | Finished meta-testing... +2025-04-03 01:21:50 | [maml_trainer] epoch #87 | Saving snapshot... +2025-04-03 01:22:13 | [maml_trainer] epoch #87 | Saved +2025-04-03 01:22:13 | [maml_trainer] epoch #87 | Time 41355.57 s +2025-04-03 01:22:13 | [maml_trainer] epoch #87 | EpochTime 482.33 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7179 +Average/AverageReturn -37.5968 +Average/Iteration 87 +Average/MaxReturn 17.3171 +Average/MinReturn -58.7711 +Average/NumEpisodes 80 +Average/StdReturn 10.7176 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96234 +GaussianMLPPolicy/KLAfter 0.00480663 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.88119e-05 +GaussianMLPPolicy/LossBefore -1.40667e-08 +GaussianMLPPolicy/dLoss 7.87978e-05 +Iteration 87 +MetaTest/Average/AverageDiscountedReturn -41.2215 +MetaTest/Average/AverageReturn -41.2215 +MetaTest/Average/Iteration 87 +MetaTest/Average/MaxReturn -26.1683 +MetaTest/Average/MinReturn -56.379 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.62901 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.2215 +MetaTest/__unnamed_task__/AverageReturn -41.2215 +MetaTest/__unnamed_task__/Iteration 87 +MetaTest/__unnamed_task__/MaxReturn -26.1683 +MetaTest/__unnamed_task__/MinReturn -56.379 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.62901 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.816e+06 +__unnamed_task__/AverageDiscountedReturn -16.7179 +__unnamed_task__/AverageReturn -37.5968 +__unnamed_task__/Iteration 87 +__unnamed_task__/MaxReturn 17.3171 +__unnamed_task__/MinReturn -58.7711 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.7176 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 01:28:26 | [maml_trainer] epoch #88 | Sampling for adapation and meta-testing... +2025-04-03 01:29:58 | [maml_trainer] epoch #88 | Finished meta-testing... +2025-04-03 01:29:58 | [maml_trainer] epoch #88 | Saving snapshot... +2025-04-03 01:30:20 | [maml_trainer] epoch #88 | Saved +2025-04-03 01:30:20 | [maml_trainer] epoch #88 | Time 41842.29 s +2025-04-03 01:30:20 | [maml_trainer] epoch #88 | EpochTime 486.72 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.2335 +Average/AverageReturn -39.0006 +Average/Iteration 88 +Average/MaxReturn -21.1415 +Average/MinReturn -61.1567 +Average/NumEpisodes 80 +Average/StdReturn 7.96403 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.96081 +GaussianMLPPolicy/KLAfter 0.00845532 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.77112e-05 +GaussianMLPPolicy/LossBefore -1.90139e-08 +GaussianMLPPolicy/dLoss 2.76922e-05 +Iteration 88 +MetaTest/Average/AverageDiscountedReturn -38.1042 +MetaTest/Average/AverageReturn -38.1042 +MetaTest/Average/Iteration 88 +MetaTest/Average/MaxReturn -24.6621 +MetaTest/Average/MinReturn -48.9639 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.58489 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.1042 +MetaTest/__unnamed_task__/AverageReturn -38.1042 +MetaTest/__unnamed_task__/Iteration 88 +MetaTest/__unnamed_task__/MaxReturn -24.6621 +MetaTest/__unnamed_task__/MinReturn -48.9639 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.58489 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.848e+06 +__unnamed_task__/AverageDiscountedReturn -17.2335 +__unnamed_task__/AverageReturn -39.0006 +__unnamed_task__/Iteration 88 +__unnamed_task__/MaxReturn -21.1415 +__unnamed_task__/MinReturn -61.1567 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.96403 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 01:36:30 | [maml_trainer] epoch #89 | Sampling for adapation and meta-testing... +2025-04-03 01:38:03 | [maml_trainer] epoch #89 | Finished meta-testing... +2025-04-03 01:38:03 | [maml_trainer] epoch #89 | Saving snapshot... +2025-04-03 01:38:26 | [maml_trainer] epoch #89 | Saved +2025-04-03 01:38:26 | [maml_trainer] epoch #89 | Time 42328.69 s +2025-04-03 01:38:26 | [maml_trainer] epoch #89 | EpochTime 486.40 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.6787 +Average/AverageReturn -38.0401 +Average/Iteration 89 +Average/MaxReturn -6.8845 +Average/MinReturn -55.9867 +Average/NumEpisodes 80 +Average/StdReturn 9.54008 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95934 +GaussianMLPPolicy/KLAfter 0.00637081 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.06994e-05 +GaussianMLPPolicy/LossBefore 8.46386e-09 +GaussianMLPPolicy/dLoss 6.07079e-05 +Iteration 89 +MetaTest/Average/AverageDiscountedReturn -38.0371 +MetaTest/Average/AverageReturn -38.0371 +MetaTest/Average/Iteration 89 +MetaTest/Average/MaxReturn -22.0211 +MetaTest/Average/MinReturn -60.5028 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.67922 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.0371 +MetaTest/__unnamed_task__/AverageReturn -38.0371 +MetaTest/__unnamed_task__/Iteration 89 +MetaTest/__unnamed_task__/MaxReturn -22.0211 +MetaTest/__unnamed_task__/MinReturn -60.5028 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.67922 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.88e+06 +__unnamed_task__/AverageDiscountedReturn -16.6787 +__unnamed_task__/AverageReturn -38.0401 +__unnamed_task__/Iteration 89 +__unnamed_task__/MaxReturn -6.8845 +__unnamed_task__/MinReturn -55.9867 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.54008 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 01:44:37 | [maml_trainer] epoch #90 | Sampling for adapation and meta-testing... +2025-04-03 01:46:08 | [maml_trainer] epoch #90 | Finished meta-testing... +2025-04-03 01:46:08 | [maml_trainer] epoch #90 | Saving snapshot... +2025-04-03 01:46:31 | [maml_trainer] epoch #90 | Saved +2025-04-03 01:46:31 | [maml_trainer] epoch #90 | Time 42813.42 s +2025-04-03 01:46:31 | [maml_trainer] epoch #90 | EpochTime 484.72 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.6413 +Average/AverageReturn -38.3462 +Average/Iteration 90 +Average/MaxReturn 40.425 +Average/MinReturn -64.5802 +Average/NumEpisodes 80 +Average/StdReturn 13.7486 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95812 +GaussianMLPPolicy/KLAfter 0.00704054 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.07155e-05 +GaussianMLPPolicy/LossBefore -7.03335e-09 +GaussianMLPPolicy/dLoss 4.07085e-05 +Iteration 90 +MetaTest/Average/AverageDiscountedReturn -41.0455 +MetaTest/Average/AverageReturn -41.0455 +MetaTest/Average/Iteration 90 +MetaTest/Average/MaxReturn -28.3289 +MetaTest/Average/MinReturn -59.8348 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.24932 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.0455 +MetaTest/__unnamed_task__/AverageReturn -41.0455 +MetaTest/__unnamed_task__/Iteration 90 +MetaTest/__unnamed_task__/MaxReturn -28.3289 +MetaTest/__unnamed_task__/MinReturn -59.8348 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.24932 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.912e+06 +__unnamed_task__/AverageDiscountedReturn -16.6413 +__unnamed_task__/AverageReturn -38.3462 +__unnamed_task__/Iteration 90 +__unnamed_task__/MaxReturn 40.425 +__unnamed_task__/MinReturn -64.5802 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7486 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 01:52:41 | [maml_trainer] epoch #91 | Sampling for adapation and meta-testing... +2025-04-03 01:54:12 | [maml_trainer] epoch #91 | Finished meta-testing... +2025-04-03 01:54:12 | [maml_trainer] epoch #91 | Saving snapshot... +2025-04-03 01:54:34 | [maml_trainer] epoch #91 | Saved +2025-04-03 01:54:34 | [maml_trainer] epoch #91 | Time 43296.27 s +2025-04-03 01:54:34 | [maml_trainer] epoch #91 | EpochTime 482.85 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9422 +Average/AverageReturn -39.1523 +Average/Iteration 91 +Average/MaxReturn -16.2191 +Average/MinReturn -69.4358 +Average/NumEpisodes 80 +Average/StdReturn 9.72549 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9568 +GaussianMLPPolicy/KLAfter 0.00711864 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.85844e-05 +GaussianMLPPolicy/LossBefore -8.52346e-09 +GaussianMLPPolicy/dLoss -1.85929e-05 +Iteration 91 +MetaTest/Average/AverageDiscountedReturn -38.3089 +MetaTest/Average/AverageReturn -38.3089 +MetaTest/Average/Iteration 91 +MetaTest/Average/MaxReturn -7.39314 +MetaTest/Average/MinReturn -54.8926 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.38041 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.3089 +MetaTest/__unnamed_task__/AverageReturn -38.3089 +MetaTest/__unnamed_task__/Iteration 91 +MetaTest/__unnamed_task__/MaxReturn -7.39314 +MetaTest/__unnamed_task__/MinReturn -54.8926 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.38041 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.944e+06 +__unnamed_task__/AverageDiscountedReturn -16.9422 +__unnamed_task__/AverageReturn -39.1523 +__unnamed_task__/Iteration 91 +__unnamed_task__/MaxReturn -16.2191 +__unnamed_task__/MinReturn -69.4358 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.72549 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:00:46 | [maml_trainer] epoch #92 | Sampling for adapation and meta-testing... +2025-04-03 02:02:18 | [maml_trainer] epoch #92 | Finished meta-testing... +2025-04-03 02:02:18 | [maml_trainer] epoch #92 | Saving snapshot... +2025-04-03 02:02:42 | [maml_trainer] epoch #92 | Saved +2025-04-03 02:02:42 | [maml_trainer] epoch #92 | Time 43784.27 s +2025-04-03 02:02:42 | [maml_trainer] epoch #92 | EpochTime 487.99 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.6905 +Average/AverageReturn -41.4997 +Average/Iteration 92 +Average/MaxReturn 0.47988 +Average/MinReturn -77.2686 +Average/NumEpisodes 80 +Average/StdReturn 11.2554 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95646 +GaussianMLPPolicy/KLAfter 0.0094993 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000202847 +GaussianMLPPolicy/LossBefore -3.93391e-09 +GaussianMLPPolicy/dLoss -0.000202851 +Iteration 92 +MetaTest/Average/AverageDiscountedReturn -42.0118 +MetaTest/Average/AverageReturn -42.0118 +MetaTest/Average/Iteration 92 +MetaTest/Average/MaxReturn -22.1932 +MetaTest/Average/MinReturn -59.2805 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.81092 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.0118 +MetaTest/__unnamed_task__/AverageReturn -42.0118 +MetaTest/__unnamed_task__/Iteration 92 +MetaTest/__unnamed_task__/MaxReturn -22.1932 +MetaTest/__unnamed_task__/MinReturn -59.2805 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.81092 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.976e+06 +__unnamed_task__/AverageDiscountedReturn -17.6905 +__unnamed_task__/AverageReturn -41.4997 +__unnamed_task__/Iteration 92 +__unnamed_task__/MaxReturn 0.47988 +__unnamed_task__/MinReturn -77.2686 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.2554 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:08:56 | [maml_trainer] epoch #93 | Sampling for adapation and meta-testing... +2025-04-03 02:10:29 | [maml_trainer] epoch #93 | Finished meta-testing... +2025-04-03 02:10:29 | [maml_trainer] epoch #93 | Saving snapshot... +2025-04-03 02:10:51 | [maml_trainer] epoch #93 | Saved +2025-04-03 02:10:51 | [maml_trainer] epoch #93 | Time 44273.83 s +2025-04-03 02:10:51 | [maml_trainer] epoch #93 | EpochTime 489.56 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.6756 +Average/AverageReturn -40.9556 +Average/Iteration 93 +Average/MaxReturn -3.58566 +Average/MinReturn -74.1306 +Average/NumEpisodes 80 +Average/StdReturn 11.3637 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95634 +GaussianMLPPolicy/KLAfter 0.00812104 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000198876 +GaussianMLPPolicy/LossBefore -1.27554e-08 +GaussianMLPPolicy/dLoss 0.000198864 +Iteration 93 +MetaTest/Average/AverageDiscountedReturn -42.7227 +MetaTest/Average/AverageReturn -42.7227 +MetaTest/Average/Iteration 93 +MetaTest/Average/MaxReturn -13.1534 +MetaTest/Average/MinReturn -58.235 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.1203 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.7227 +MetaTest/__unnamed_task__/AverageReturn -42.7227 +MetaTest/__unnamed_task__/Iteration 93 +MetaTest/__unnamed_task__/MaxReturn -13.1534 +MetaTest/__unnamed_task__/MinReturn -58.235 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.1203 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.008e+06 +__unnamed_task__/AverageDiscountedReturn -17.6756 +__unnamed_task__/AverageReturn -40.9556 +__unnamed_task__/Iteration 93 +__unnamed_task__/MaxReturn -3.58566 +__unnamed_task__/MinReturn -74.1306 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.3637 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:17:05 | [maml_trainer] epoch #94 | Sampling for adapation and meta-testing... +2025-04-03 02:18:36 | [maml_trainer] epoch #94 | Finished meta-testing... +2025-04-03 02:18:36 | [maml_trainer] epoch #94 | Saving snapshot... +2025-04-03 02:18:58 | [maml_trainer] epoch #94 | Saved +2025-04-03 02:18:58 | [maml_trainer] epoch #94 | Time 44760.28 s +2025-04-03 02:18:58 | [maml_trainer] epoch #94 | EpochTime 486.45 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5122 +Average/AverageReturn -40.6333 +Average/Iteration 94 +Average/MaxReturn -14.333 +Average/MinReturn -68.1628 +Average/NumEpisodes 80 +Average/StdReturn 11.5393 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95694 +GaussianMLPPolicy/KLAfter 0.00410401 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000115267 +GaussianMLPPolicy/LossBefore 1.57356e-08 +GaussianMLPPolicy/dLoss 0.000115283 +Iteration 94 +MetaTest/Average/AverageDiscountedReturn -35.4743 +MetaTest/Average/AverageReturn -35.4743 +MetaTest/Average/Iteration 94 +MetaTest/Average/MaxReturn -16.552 +MetaTest/Average/MinReturn -58.9191 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.1739 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.4743 +MetaTest/__unnamed_task__/AverageReturn -35.4743 +MetaTest/__unnamed_task__/Iteration 94 +MetaTest/__unnamed_task__/MaxReturn -16.552 +MetaTest/__unnamed_task__/MinReturn -58.9191 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.1739 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.04e+06 +__unnamed_task__/AverageDiscountedReturn -17.5122 +__unnamed_task__/AverageReturn -40.6333 +__unnamed_task__/Iteration 94 +__unnamed_task__/MaxReturn -14.333 +__unnamed_task__/MinReturn -68.1628 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.5393 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:25:04 | [maml_trainer] epoch #95 | Sampling for adapation and meta-testing... +2025-04-03 02:26:35 | [maml_trainer] epoch #95 | Finished meta-testing... +2025-04-03 02:26:35 | [maml_trainer] epoch #95 | Saving snapshot... +2025-04-03 02:26:58 | [maml_trainer] epoch #95 | Saved +2025-04-03 02:26:58 | [maml_trainer] epoch #95 | Time 45240.66 s +2025-04-03 02:26:58 | [maml_trainer] epoch #95 | EpochTime 480.37 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.327 +Average/AverageReturn -40.6006 +Average/Iteration 95 +Average/MaxReturn -19.4044 +Average/MinReturn -64.523 +Average/NumEpisodes 80 +Average/StdReturn 8.63808 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95717 +GaussianMLPPolicy/KLAfter 0.00425012 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.36607e-05 +GaussianMLPPolicy/LossBefore -2.02656e-08 +GaussianMLPPolicy/dLoss 2.36405e-05 +Iteration 95 +MetaTest/Average/AverageDiscountedReturn -45.0415 +MetaTest/Average/AverageReturn -45.0415 +MetaTest/Average/Iteration 95 +MetaTest/Average/MaxReturn -30.6776 +MetaTest/Average/MinReturn -74.0447 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7524 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.0415 +MetaTest/__unnamed_task__/AverageReturn -45.0415 +MetaTest/__unnamed_task__/Iteration 95 +MetaTest/__unnamed_task__/MaxReturn -30.6776 +MetaTest/__unnamed_task__/MinReturn -74.0447 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7524 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.072e+06 +__unnamed_task__/AverageDiscountedReturn -17.327 +__unnamed_task__/AverageReturn -40.6006 +__unnamed_task__/Iteration 95 +__unnamed_task__/MaxReturn -19.4044 +__unnamed_task__/MinReturn -64.523 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.63808 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:33:03 | [maml_trainer] epoch #96 | Sampling for adapation and meta-testing... +2025-04-03 02:34:35 | [maml_trainer] epoch #96 | Finished meta-testing... +2025-04-03 02:34:35 | [maml_trainer] epoch #96 | Saving snapshot... +2025-04-03 02:34:58 | [maml_trainer] epoch #96 | Saved +2025-04-03 02:34:58 | [maml_trainer] epoch #96 | Time 45720.17 s +2025-04-03 02:34:58 | [maml_trainer] epoch #96 | EpochTime 479.51 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.2029 +Average/AverageReturn -42.0354 +Average/Iteration 96 +Average/MaxReturn -15.0672 +Average/MinReturn -66.8623 +Average/NumEpisodes 80 +Average/StdReturn 9.28551 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9579 +GaussianMLPPolicy/KLAfter 0.00620141 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.24029e-05 +GaussianMLPPolicy/LossBefore 5.90086e-09 +GaussianMLPPolicy/dLoss -3.2397e-05 +Iteration 96 +MetaTest/Average/AverageDiscountedReturn -39.9153 +MetaTest/Average/AverageReturn -39.9153 +MetaTest/Average/Iteration 96 +MetaTest/Average/MaxReturn -23.9463 +MetaTest/Average/MinReturn -62.0674 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.12874 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.9153 +MetaTest/__unnamed_task__/AverageReturn -39.9153 +MetaTest/__unnamed_task__/Iteration 96 +MetaTest/__unnamed_task__/MaxReturn -23.9463 +MetaTest/__unnamed_task__/MinReturn -62.0674 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.12874 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.104e+06 +__unnamed_task__/AverageDiscountedReturn -18.2029 +__unnamed_task__/AverageReturn -42.0354 +__unnamed_task__/Iteration 96 +__unnamed_task__/MaxReturn -15.0672 +__unnamed_task__/MinReturn -66.8623 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.28551 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:41:13 | [maml_trainer] epoch #97 | Sampling for adapation and meta-testing... +2025-04-03 02:42:46 | [maml_trainer] epoch #97 | Finished meta-testing... +2025-04-03 02:42:46 | [maml_trainer] epoch #97 | Saving snapshot... +2025-04-03 02:43:08 | [maml_trainer] epoch #97 | Saved +2025-04-03 02:43:08 | [maml_trainer] epoch #97 | Time 46210.28 s +2025-04-03 02:43:08 | [maml_trainer] epoch #97 | EpochTime 490.11 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.6243 +Average/AverageReturn -40.5101 +Average/Iteration 97 +Average/MaxReturn 16.5683 +Average/MinReturn -78.6654 +Average/NumEpisodes 80 +Average/StdReturn 11.8464 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95824 +GaussianMLPPolicy/KLAfter 0.00639798 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000117659 +GaussianMLPPolicy/LossBefore 8.01682e-09 +GaussianMLPPolicy/dLoss 0.000117667 +Iteration 97 +MetaTest/Average/AverageDiscountedReturn -38.3594 +MetaTest/Average/AverageReturn -38.3594 +MetaTest/Average/Iteration 97 +MetaTest/Average/MaxReturn -13.2402 +MetaTest/Average/MinReturn -51.1648 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.69692 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.3594 +MetaTest/__unnamed_task__/AverageReturn -38.3594 +MetaTest/__unnamed_task__/Iteration 97 +MetaTest/__unnamed_task__/MaxReturn -13.2402 +MetaTest/__unnamed_task__/MinReturn -51.1648 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.69692 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.136e+06 +__unnamed_task__/AverageDiscountedReturn -17.6243 +__unnamed_task__/AverageReturn -40.5101 +__unnamed_task__/Iteration 97 +__unnamed_task__/MaxReturn 16.5683 +__unnamed_task__/MinReturn -78.6654 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.8464 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:49:16 | [maml_trainer] epoch #98 | Sampling for adapation and meta-testing... +2025-04-03 02:50:49 | [maml_trainer] epoch #98 | Finished meta-testing... +2025-04-03 02:50:49 | [maml_trainer] epoch #98 | Saving snapshot... +2025-04-03 02:51:14 | [maml_trainer] epoch #98 | Saved +2025-04-03 02:51:14 | [maml_trainer] epoch #98 | Time 46696.03 s +2025-04-03 02:51:14 | [maml_trainer] epoch #98 | EpochTime 485.75 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.464 +Average/AverageReturn -40.9019 +Average/Iteration 98 +Average/MaxReturn -22.3025 +Average/MinReturn -71.5507 +Average/NumEpisodes 80 +Average/StdReturn 9.70238 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95795 +GaussianMLPPolicy/KLAfter 0.00531835 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.09889e-05 +GaussianMLPPolicy/LossBefore 1.00732e-08 +GaussianMLPPolicy/dLoss 4.09989e-05 +Iteration 98 +MetaTest/Average/AverageDiscountedReturn -35.746 +MetaTest/Average/AverageReturn -35.746 +MetaTest/Average/Iteration 98 +MetaTest/Average/MaxReturn -23.507 +MetaTest/Average/MinReturn -52.6672 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.26188 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.746 +MetaTest/__unnamed_task__/AverageReturn -35.746 +MetaTest/__unnamed_task__/Iteration 98 +MetaTest/__unnamed_task__/MaxReturn -23.507 +MetaTest/__unnamed_task__/MinReturn -52.6672 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.26188 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.168e+06 +__unnamed_task__/AverageDiscountedReturn -17.464 +__unnamed_task__/AverageReturn -40.9019 +__unnamed_task__/Iteration 98 +__unnamed_task__/MaxReturn -22.3025 +__unnamed_task__/MinReturn -71.5507 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.70238 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 02:57:30 | [maml_trainer] epoch #99 | Sampling for adapation and meta-testing... +2025-04-03 02:59:02 | [maml_trainer] epoch #99 | Finished meta-testing... +2025-04-03 02:59:02 | [maml_trainer] epoch #99 | Saving snapshot... +2025-04-03 02:59:26 | [maml_trainer] epoch #99 | Saved +2025-04-03 02:59:26 | [maml_trainer] epoch #99 | Time 47188.04 s +2025-04-03 02:59:26 | [maml_trainer] epoch #99 | EpochTime 492.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7718 +Average/AverageReturn -37.5531 +Average/Iteration 99 +Average/MaxReturn 28.7495 +Average/MinReturn -73.2578 +Average/NumEpisodes 80 +Average/StdReturn 12.8464 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95782 +GaussianMLPPolicy/KLAfter 0.00541757 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.1316e-05 +GaussianMLPPolicy/LossBefore 1.25766e-08 +GaussianMLPPolicy/dLoss -7.13034e-05 +Iteration 99 +MetaTest/Average/AverageDiscountedReturn -43.3502 +MetaTest/Average/AverageReturn -43.3502 +MetaTest/Average/Iteration 99 +MetaTest/Average/MaxReturn -24.5432 +MetaTest/Average/MinReturn -62.7711 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.4714 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.3502 +MetaTest/__unnamed_task__/AverageReturn -43.3502 +MetaTest/__unnamed_task__/Iteration 99 +MetaTest/__unnamed_task__/MaxReturn -24.5432 +MetaTest/__unnamed_task__/MinReturn -62.7711 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.4714 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.2e+06 +__unnamed_task__/AverageDiscountedReturn -16.7718 +__unnamed_task__/AverageReturn -37.5531 +__unnamed_task__/Iteration 99 +__unnamed_task__/MaxReturn 28.7495 +__unnamed_task__/MinReturn -73.2578 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.8464 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 03:05:37 | [maml_trainer] epoch #100 | Sampling for adapation and meta-testing... +2025-04-03 03:07:09 | [maml_trainer] epoch #100 | Finished meta-testing... +2025-04-03 03:07:09 | [maml_trainer] epoch #100 | Saving snapshot... +2025-04-03 03:07:31 | [maml_trainer] epoch #100 | Saved +2025-04-03 03:07:31 | [maml_trainer] epoch #100 | Time 47673.45 s +2025-04-03 03:07:31 | [maml_trainer] epoch #100 | EpochTime 485.41 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.7745 +Average/AverageReturn -41.1732 +Average/Iteration 100 +Average/MaxReturn -25.4925 +Average/MinReturn -62.241 +Average/NumEpisodes 80 +Average/StdReturn 8.55025 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95708 +GaussianMLPPolicy/KLAfter 0.00221227 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000115284 +GaussianMLPPolicy/LossBefore -2.26498e-08 +GaussianMLPPolicy/dLoss -0.000115307 +Iteration 100 +MetaTest/Average/AverageDiscountedReturn -34.5186 +MetaTest/Average/AverageReturn -34.5186 +MetaTest/Average/Iteration 100 +MetaTest/Average/MaxReturn -15.3712 +MetaTest/Average/MinReturn -46.8962 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.77272 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.5186 +MetaTest/__unnamed_task__/AverageReturn -34.5186 +MetaTest/__unnamed_task__/Iteration 100 +MetaTest/__unnamed_task__/MaxReturn -15.3712 +MetaTest/__unnamed_task__/MinReturn -46.8962 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.77272 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.232e+06 +__unnamed_task__/AverageDiscountedReturn -17.7745 +__unnamed_task__/AverageReturn -41.1732 +__unnamed_task__/Iteration 100 +__unnamed_task__/MaxReturn -25.4925 +__unnamed_task__/MinReturn -62.241 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.55025 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 03:13:36 | [maml_trainer] epoch #101 | Sampling for adapation and meta-testing... +2025-04-03 03:15:10 | [maml_trainer] epoch #101 | Finished meta-testing... +2025-04-03 03:15:10 | [maml_trainer] epoch #101 | Saving snapshot... +2025-04-03 03:15:33 | [maml_trainer] epoch #101 | Saved +2025-04-03 03:15:33 | [maml_trainer] epoch #101 | Time 48155.67 s +2025-04-03 03:15:33 | [maml_trainer] epoch #101 | EpochTime 482.22 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.6579 +Average/AverageReturn -40.7877 +Average/Iteration 101 +Average/MaxReturn -7.5641 +Average/MinReturn -77.8875 +Average/NumEpisodes 80 +Average/StdReturn 13.0655 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95488 +GaussianMLPPolicy/KLAfter 0.00330854 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000100518 +GaussianMLPPolicy/LossBefore 2.32458e-09 +GaussianMLPPolicy/dLoss -0.000100516 +Iteration 101 +MetaTest/Average/AverageDiscountedReturn -39.3629 +MetaTest/Average/AverageReturn -39.3629 +MetaTest/Average/Iteration 101 +MetaTest/Average/MaxReturn -21.336 +MetaTest/Average/MinReturn -62.1134 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.2632 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.3629 +MetaTest/__unnamed_task__/AverageReturn -39.3629 +MetaTest/__unnamed_task__/Iteration 101 +MetaTest/__unnamed_task__/MaxReturn -21.336 +MetaTest/__unnamed_task__/MinReturn -62.1134 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.2632 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.264e+06 +__unnamed_task__/AverageDiscountedReturn -17.6579 +__unnamed_task__/AverageReturn -40.7877 +__unnamed_task__/Iteration 101 +__unnamed_task__/MaxReturn -7.5641 +__unnamed_task__/MinReturn -77.8875 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.0655 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 03:21:42 | [maml_trainer] epoch #102 | Sampling for adapation and meta-testing... +2025-04-03 03:23:15 | [maml_trainer] epoch #102 | Finished meta-testing... +2025-04-03 03:23:15 | [maml_trainer] epoch #102 | Saving snapshot... +2025-04-03 03:23:38 | [maml_trainer] epoch #102 | Saved +2025-04-03 03:23:38 | [maml_trainer] epoch #102 | Time 48640.46 s +2025-04-03 03:23:38 | [maml_trainer] epoch #102 | EpochTime 484.78 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.2264 +Average/AverageReturn -41.9759 +Average/Iteration 102 +Average/MaxReturn -1.61894 +Average/MinReturn -72.5258 +Average/NumEpisodes 80 +Average/StdReturn 11.4979 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95198 +GaussianMLPPolicy/KLAfter 0.00412758 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.22371e-05 +GaussianMLPPolicy/LossBefore -1.21891e-08 +GaussianMLPPolicy/dLoss -5.22493e-05 +Iteration 102 +MetaTest/Average/AverageDiscountedReturn -42.0439 +MetaTest/Average/AverageReturn -42.0439 +MetaTest/Average/Iteration 102 +MetaTest/Average/MaxReturn -23.7473 +MetaTest/Average/MinReturn -73.4332 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.6146 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.0439 +MetaTest/__unnamed_task__/AverageReturn -42.0439 +MetaTest/__unnamed_task__/Iteration 102 +MetaTest/__unnamed_task__/MaxReturn -23.7473 +MetaTest/__unnamed_task__/MinReturn -73.4332 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.6146 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.296e+06 +__unnamed_task__/AverageDiscountedReturn -18.2264 +__unnamed_task__/AverageReturn -41.9759 +__unnamed_task__/Iteration 102 +__unnamed_task__/MaxReturn -1.61894 +__unnamed_task__/MinReturn -72.5258 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4979 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 03:29:50 | [maml_trainer] epoch #103 | Sampling for adapation and meta-testing... +2025-04-03 03:31:21 | [maml_trainer] epoch #103 | Finished meta-testing... +2025-04-03 03:31:21 | [maml_trainer] epoch #103 | Saving snapshot... +2025-04-03 03:31:43 | [maml_trainer] epoch #103 | Saved +2025-04-03 03:31:43 | [maml_trainer] epoch #103 | Time 49125.09 s +2025-04-03 03:31:43 | [maml_trainer] epoch #103 | EpochTime 484.63 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.3073 +Average/AverageReturn -42.3363 +Average/Iteration 103 +Average/MaxReturn -19.0742 +Average/MinReturn -74.7329 +Average/NumEpisodes 80 +Average/StdReturn 12.2155 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.951 +GaussianMLPPolicy/KLAfter 0.00322561 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000296563 +GaussianMLPPolicy/LossBefore -1.07884e-08 +GaussianMLPPolicy/dLoss 0.000296552 +Iteration 103 +MetaTest/Average/AverageDiscountedReturn -38.8653 +MetaTest/Average/AverageReturn -38.8653 +MetaTest/Average/Iteration 103 +MetaTest/Average/MaxReturn -7.34097 +MetaTest/Average/MinReturn -62.6371 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.5137 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.8653 +MetaTest/__unnamed_task__/AverageReturn -38.8653 +MetaTest/__unnamed_task__/Iteration 103 +MetaTest/__unnamed_task__/MaxReturn -7.34097 +MetaTest/__unnamed_task__/MinReturn -62.6371 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.5137 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.328e+06 +__unnamed_task__/AverageDiscountedReturn -18.3073 +__unnamed_task__/AverageReturn -42.3363 +__unnamed_task__/Iteration 103 +__unnamed_task__/MaxReturn -19.0742 +__unnamed_task__/MinReturn -74.7329 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.2155 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 03:37:54 | [maml_trainer] epoch #104 | Sampling for adapation and meta-testing... +2025-04-03 03:39:25 | [maml_trainer] epoch #104 | Finished meta-testing... +2025-04-03 03:39:25 | [maml_trainer] epoch #104 | Saving snapshot... +2025-04-03 03:39:49 | [maml_trainer] epoch #104 | Saved +2025-04-03 03:39:49 | [maml_trainer] epoch #104 | Time 49611.74 s +2025-04-03 03:39:49 | [maml_trainer] epoch #104 | EpochTime 486.65 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.0809 +Average/AverageReturn -38.2423 +Average/Iteration 104 +Average/MaxReturn -15.7197 +Average/MinReturn -61.9313 +Average/NumEpisodes 80 +Average/StdReturn 8.94706 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95071 +GaussianMLPPolicy/KLAfter 0.00389881 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.90841e-05 +GaussianMLPPolicy/LossBefore 1.13249e-09 +GaussianMLPPolicy/dLoss -9.9083e-05 +Iteration 104 +MetaTest/Average/AverageDiscountedReturn -33.2135 +MetaTest/Average/AverageReturn -33.2135 +MetaTest/Average/Iteration 104 +MetaTest/Average/MaxReturn -8.15125 +MetaTest/Average/MinReturn -51.1345 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.06292 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.2135 +MetaTest/__unnamed_task__/AverageReturn -33.2135 +MetaTest/__unnamed_task__/Iteration 104 +MetaTest/__unnamed_task__/MaxReturn -8.15125 +MetaTest/__unnamed_task__/MinReturn -51.1345 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.06292 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.36e+06 +__unnamed_task__/AverageDiscountedReturn -17.0809 +__unnamed_task__/AverageReturn -38.2423 +__unnamed_task__/Iteration 104 +__unnamed_task__/MaxReturn -15.7197 +__unnamed_task__/MinReturn -61.9313 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.94706 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 03:45:56 | [maml_trainer] epoch #105 | Sampling for adapation and meta-testing... +2025-04-03 03:47:28 | [maml_trainer] epoch #105 | Finished meta-testing... +2025-04-03 03:47:28 | [maml_trainer] epoch #105 | Saving snapshot... +2025-04-03 03:47:51 | [maml_trainer] epoch #105 | Saved +2025-04-03 03:47:51 | [maml_trainer] epoch #105 | Time 50093.54 s +2025-04-03 03:47:51 | [maml_trainer] epoch #105 | EpochTime 481.79 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.6572 +Average/AverageReturn -36.4525 +Average/Iteration 105 +Average/MaxReturn 16.7605 +Average/MinReturn -62.6546 +Average/NumEpisodes 80 +Average/StdReturn 11.9457 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95114 +GaussianMLPPolicy/KLAfter 0.00483998 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000241933 +GaussianMLPPolicy/LossBefore 4.41074e-09 +GaussianMLPPolicy/dLoss -0.000241928 +Iteration 105 +MetaTest/Average/AverageDiscountedReturn -39.4823 +MetaTest/Average/AverageReturn -39.4823 +MetaTest/Average/Iteration 105 +MetaTest/Average/MaxReturn 10.9591 +MetaTest/Average/MinReturn -61.0834 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.1697 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.4823 +MetaTest/__unnamed_task__/AverageReturn -39.4823 +MetaTest/__unnamed_task__/Iteration 105 +MetaTest/__unnamed_task__/MaxReturn 10.9591 +MetaTest/__unnamed_task__/MinReturn -61.0834 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.1697 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.392e+06 +__unnamed_task__/AverageDiscountedReturn -16.6572 +__unnamed_task__/AverageReturn -36.4525 +__unnamed_task__/Iteration 105 +__unnamed_task__/MaxReturn 16.7605 +__unnamed_task__/MinReturn -62.6546 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.9457 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 03:54:04 | [maml_trainer] epoch #106 | Sampling for adapation and meta-testing... +2025-04-03 03:55:36 | [maml_trainer] epoch #106 | Finished meta-testing... +2025-04-03 03:55:36 | [maml_trainer] epoch #106 | Saving snapshot... +2025-04-03 03:55:58 | [maml_trainer] epoch #106 | Saved +2025-04-03 03:55:58 | [maml_trainer] epoch #106 | Time 50580.71 s +2025-04-03 03:55:58 | [maml_trainer] epoch #106 | EpochTime 487.17 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2814 +Average/AverageReturn -35.5491 +Average/Iteration 106 +Average/MaxReturn -4.19546 +Average/MinReturn -69.94 +Average/NumEpisodes 80 +Average/StdReturn 9.82585 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95102 +GaussianMLPPolicy/KLAfter 0.00390006 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.22361e-06 +GaussianMLPPolicy/LossBefore 1.38879e-08 +GaussianMLPPolicy/dLoss 2.2375e-06 +Iteration 106 +MetaTest/Average/AverageDiscountedReturn -32.4641 +MetaTest/Average/AverageReturn -32.4641 +MetaTest/Average/Iteration 106 +MetaTest/Average/MaxReturn -16.2252 +MetaTest/Average/MinReturn -48.456 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.10761 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.4641 +MetaTest/__unnamed_task__/AverageReturn -32.4641 +MetaTest/__unnamed_task__/Iteration 106 +MetaTest/__unnamed_task__/MaxReturn -16.2252 +MetaTest/__unnamed_task__/MinReturn -48.456 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.10761 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.424e+06 +__unnamed_task__/AverageDiscountedReturn -16.2814 +__unnamed_task__/AverageReturn -35.5491 +__unnamed_task__/Iteration 106 +__unnamed_task__/MaxReturn -4.19546 +__unnamed_task__/MinReturn -69.94 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.82585 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:02:06 | [maml_trainer] epoch #107 | Sampling for adapation and meta-testing... +2025-04-03 04:03:40 | [maml_trainer] epoch #107 | Finished meta-testing... +2025-04-03 04:03:40 | [maml_trainer] epoch #107 | Saving snapshot... +2025-04-03 04:04:03 | [maml_trainer] epoch #107 | Saved +2025-04-03 04:04:03 | [maml_trainer] epoch #107 | Time 51065.55 s +2025-04-03 04:04:03 | [maml_trainer] epoch #107 | EpochTime 484.83 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8679 +Average/AverageReturn -36.3928 +Average/Iteration 107 +Average/MaxReturn 40.2488 +Average/MinReturn -59.8314 +Average/NumEpisodes 80 +Average/StdReturn 12.348 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.95012 +GaussianMLPPolicy/KLAfter 0.00469732 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000105643 +GaussianMLPPolicy/LossBefore 1.83284e-08 +GaussianMLPPolicy/dLoss 0.000105661 +Iteration 107 +MetaTest/Average/AverageDiscountedReturn -33.9245 +MetaTest/Average/AverageReturn -33.9245 +MetaTest/Average/Iteration 107 +MetaTest/Average/MaxReturn 4.95173 +MetaTest/Average/MinReturn -56.8449 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.2356 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.9245 +MetaTest/__unnamed_task__/AverageReturn -33.9245 +MetaTest/__unnamed_task__/Iteration 107 +MetaTest/__unnamed_task__/MaxReturn 4.95173 +MetaTest/__unnamed_task__/MinReturn -56.8449 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.2356 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.456e+06 +__unnamed_task__/AverageDiscountedReturn -16.8679 +__unnamed_task__/AverageReturn -36.3928 +__unnamed_task__/Iteration 107 +__unnamed_task__/MaxReturn 40.2488 +__unnamed_task__/MinReturn -59.8314 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.348 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:10:13 | [maml_trainer] epoch #108 | Sampling for adapation and meta-testing... +2025-04-03 04:11:44 | [maml_trainer] epoch #108 | Finished meta-testing... +2025-04-03 04:11:44 | [maml_trainer] epoch #108 | Saving snapshot... +2025-04-03 04:12:08 | [maml_trainer] epoch #108 | Saved +2025-04-03 04:12:08 | [maml_trainer] epoch #108 | Time 51550.24 s +2025-04-03 04:12:08 | [maml_trainer] epoch #108 | EpochTime 484.69 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8777 +Average/AverageReturn -36.2397 +Average/Iteration 108 +Average/MaxReturn -5.93125 +Average/MinReturn -65.1065 +Average/NumEpisodes 80 +Average/StdReturn 11.5544 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94817 +GaussianMLPPolicy/KLAfter 0.00244155 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.89609e-05 +GaussianMLPPolicy/LossBefore -7.15256e-10 +GaussianMLPPolicy/dLoss 7.89602e-05 +Iteration 108 +MetaTest/Average/AverageDiscountedReturn -36.2153 +MetaTest/Average/AverageReturn -36.2153 +MetaTest/Average/Iteration 108 +MetaTest/Average/MaxReturn -2.16105 +MetaTest/Average/MinReturn -77.5375 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.161 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.2153 +MetaTest/__unnamed_task__/AverageReturn -36.2153 +MetaTest/__unnamed_task__/Iteration 108 +MetaTest/__unnamed_task__/MaxReturn -2.16105 +MetaTest/__unnamed_task__/MinReturn -77.5375 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.161 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.488e+06 +__unnamed_task__/AverageDiscountedReturn -16.8777 +__unnamed_task__/AverageReturn -36.2397 +__unnamed_task__/Iteration 108 +__unnamed_task__/MaxReturn -5.93125 +__unnamed_task__/MinReturn -65.1065 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.5544 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:18:21 | [maml_trainer] epoch #109 | Sampling for adapation and meta-testing... +2025-04-03 04:19:52 | [maml_trainer] epoch #109 | Finished meta-testing... +2025-04-03 04:19:52 | [maml_trainer] epoch #109 | Saving snapshot... +2025-04-03 04:20:16 | [maml_trainer] epoch #109 | Saved +2025-04-03 04:20:16 | [maml_trainer] epoch #109 | Time 52038.04 s +2025-04-03 04:20:16 | [maml_trainer] epoch #109 | EpochTime 487.80 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0746 +Average/AverageReturn -33.4558 +Average/Iteration 109 +Average/MaxReturn 21.6366 +Average/MinReturn -63.2221 +Average/NumEpisodes 80 +Average/StdReturn 15.6505 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94589 +GaussianMLPPolicy/KLAfter 0.00216199 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.34377e-05 +GaussianMLPPolicy/LossBefore -3.18885e-09 +GaussianMLPPolicy/dLoss 9.34345e-05 +Iteration 109 +MetaTest/Average/AverageDiscountedReturn -30.1503 +MetaTest/Average/AverageReturn -30.1503 +MetaTest/Average/Iteration 109 +MetaTest/Average/MaxReturn -13.3589 +MetaTest/Average/MinReturn -44.6463 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.24282 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.1503 +MetaTest/__unnamed_task__/AverageReturn -30.1503 +MetaTest/__unnamed_task__/Iteration 109 +MetaTest/__unnamed_task__/MaxReturn -13.3589 +MetaTest/__unnamed_task__/MinReturn -44.6463 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.24282 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.52e+06 +__unnamed_task__/AverageDiscountedReturn -16.0746 +__unnamed_task__/AverageReturn -33.4558 +__unnamed_task__/Iteration 109 +__unnamed_task__/MaxReturn 21.6366 +__unnamed_task__/MinReturn -63.2221 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.6505 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:26:19 | [maml_trainer] epoch #110 | Sampling for adapation and meta-testing... +2025-04-03 04:27:52 | [maml_trainer] epoch #110 | Finished meta-testing... +2025-04-03 04:27:52 | [maml_trainer] epoch #110 | Saving snapshot... +2025-04-03 04:28:15 | [maml_trainer] epoch #110 | Saved +2025-04-03 04:28:15 | [maml_trainer] epoch #110 | Time 52517.77 s +2025-04-03 04:28:15 | [maml_trainer] epoch #110 | EpochTime 479.73 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0475 +Average/AverageReturn -34.2287 +Average/Iteration 110 +Average/MaxReturn -10.1957 +Average/MinReturn -69.0882 +Average/NumEpisodes 80 +Average/StdReturn 11.9169 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94455 +GaussianMLPPolicy/KLAfter 0.00175638 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.61312e-05 +GaussianMLPPolicy/LossBefore 9.65595e-09 +GaussianMLPPolicy/dLoss -3.61215e-05 +Iteration 110 +MetaTest/Average/AverageDiscountedReturn -33.4653 +MetaTest/Average/AverageReturn -33.4653 +MetaTest/Average/Iteration 110 +MetaTest/Average/MaxReturn -18.7472 +MetaTest/Average/MinReturn -52.0774 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.0227 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.4653 +MetaTest/__unnamed_task__/AverageReturn -33.4653 +MetaTest/__unnamed_task__/Iteration 110 +MetaTest/__unnamed_task__/MaxReturn -18.7472 +MetaTest/__unnamed_task__/MinReturn -52.0774 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.0227 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.552e+06 +__unnamed_task__/AverageDiscountedReturn -16.0475 +__unnamed_task__/AverageReturn -34.2287 +__unnamed_task__/Iteration 110 +__unnamed_task__/MaxReturn -10.1957 +__unnamed_task__/MinReturn -69.0882 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.9169 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:34:22 | [maml_trainer] epoch #111 | Sampling for adapation and meta-testing... +2025-04-03 04:35:53 | [maml_trainer] epoch #111 | Finished meta-testing... +2025-04-03 04:35:53 | [maml_trainer] epoch #111 | Saving snapshot... +2025-04-03 04:36:15 | [maml_trainer] epoch #111 | Saved +2025-04-03 04:36:15 | [maml_trainer] epoch #111 | Time 52997.95 s +2025-04-03 04:36:15 | [maml_trainer] epoch #111 | EpochTime 480.17 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.6452 +Average/AverageReturn -32.9544 +Average/Iteration 111 +Average/MaxReturn 8.86432 +Average/MinReturn -71.8964 +Average/NumEpisodes 80 +Average/StdReturn 13.1403 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94299 +GaussianMLPPolicy/KLAfter 0.00161781 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.81875e-05 +GaussianMLPPolicy/LossBefore 3.18885e-09 +GaussianMLPPolicy/dLoss -5.81843e-05 +Iteration 111 +MetaTest/Average/AverageDiscountedReturn -33.5338 +MetaTest/Average/AverageReturn -33.5338 +MetaTest/Average/Iteration 111 +MetaTest/Average/MaxReturn -4.7911 +MetaTest/Average/MinReturn -60.1726 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.4715 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.5338 +MetaTest/__unnamed_task__/AverageReturn -33.5338 +MetaTest/__unnamed_task__/Iteration 111 +MetaTest/__unnamed_task__/MaxReturn -4.7911 +MetaTest/__unnamed_task__/MinReturn -60.1726 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.4715 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.584e+06 +__unnamed_task__/AverageDiscountedReturn -15.6452 +__unnamed_task__/AverageReturn -32.9544 +__unnamed_task__/Iteration 111 +__unnamed_task__/MaxReturn 8.86432 +__unnamed_task__/MinReturn -71.8964 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.1403 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:42:25 | [maml_trainer] epoch #112 | Sampling for adapation and meta-testing... +2025-04-03 04:43:57 | [maml_trainer] epoch #112 | Finished meta-testing... +2025-04-03 04:43:57 | [maml_trainer] epoch #112 | Saving snapshot... +2025-04-03 04:44:22 | [maml_trainer] epoch #112 | Saved +2025-04-03 04:44:22 | [maml_trainer] epoch #112 | Time 53484.21 s +2025-04-03 04:44:22 | [maml_trainer] epoch #112 | EpochTime 486.26 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7218 +Average/AverageReturn -35.4274 +Average/Iteration 112 +Average/MaxReturn 16.6019 +Average/MinReturn -62.584 +Average/NumEpisodes 80 +Average/StdReturn 13.8222 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.94104 +GaussianMLPPolicy/KLAfter 0.00225869 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000127381 +GaussianMLPPolicy/LossBefore 1.0848e-08 +GaussianMLPPolicy/dLoss -0.00012737 +Iteration 112 +MetaTest/Average/AverageDiscountedReturn -36.4849 +MetaTest/Average/AverageReturn -36.4849 +MetaTest/Average/Iteration 112 +MetaTest/Average/MaxReturn -13.9152 +MetaTest/Average/MinReturn -63.5672 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.9924 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.4849 +MetaTest/__unnamed_task__/AverageReturn -36.4849 +MetaTest/__unnamed_task__/Iteration 112 +MetaTest/__unnamed_task__/MaxReturn -13.9152 +MetaTest/__unnamed_task__/MinReturn -63.5672 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.9924 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.616e+06 +__unnamed_task__/AverageDiscountedReturn -16.7218 +__unnamed_task__/AverageReturn -35.4274 +__unnamed_task__/Iteration 112 +__unnamed_task__/MaxReturn 16.6019 +__unnamed_task__/MinReturn -62.584 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8222 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:50:35 | [maml_trainer] epoch #113 | Sampling for adapation and meta-testing... +2025-04-03 04:52:07 | [maml_trainer] epoch #113 | Finished meta-testing... +2025-04-03 04:52:07 | [maml_trainer] epoch #113 | Saving snapshot... +2025-04-03 04:52:30 | [maml_trainer] epoch #113 | Saved +2025-04-03 04:52:30 | [maml_trainer] epoch #113 | Time 53972.94 s +2025-04-03 04:52:30 | [maml_trainer] epoch #113 | EpochTime 488.72 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.4562 +Average/AverageReturn -38.0378 +Average/Iteration 113 +Average/MaxReturn -13.3221 +Average/MinReturn -68.1951 +Average/NumEpisodes 80 +Average/StdReturn 11.6773 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93825 +GaussianMLPPolicy/KLAfter 0.00241692 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.17562e-05 +GaussianMLPPolicy/LossBefore 6.3777e-09 +GaussianMLPPolicy/dLoss -1.17499e-05 +Iteration 113 +MetaTest/Average/AverageDiscountedReturn -40.8645 +MetaTest/Average/AverageReturn -40.8645 +MetaTest/Average/Iteration 113 +MetaTest/Average/MaxReturn -21.2988 +MetaTest/Average/MinReturn -59.3706 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.6652 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.8645 +MetaTest/__unnamed_task__/AverageReturn -40.8645 +MetaTest/__unnamed_task__/Iteration 113 +MetaTest/__unnamed_task__/MaxReturn -21.2988 +MetaTest/__unnamed_task__/MinReturn -59.3706 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.6652 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.648e+06 +__unnamed_task__/AverageDiscountedReturn -17.4562 +__unnamed_task__/AverageReturn -38.0378 +__unnamed_task__/Iteration 113 +__unnamed_task__/MaxReturn -13.3221 +__unnamed_task__/MinReturn -68.1951 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6773 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 04:58:38 | [maml_trainer] epoch #114 | Sampling for adapation and meta-testing... +2025-04-03 05:00:10 | [maml_trainer] epoch #114 | Finished meta-testing... +2025-04-03 05:00:10 | [maml_trainer] epoch #114 | Saving snapshot... +2025-04-03 05:00:32 | [maml_trainer] epoch #114 | Saved +2025-04-03 05:00:32 | [maml_trainer] epoch #114 | Time 54454.70 s +2025-04-03 05:00:32 | [maml_trainer] epoch #114 | EpochTime 481.76 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2584 +Average/AverageReturn -34.4312 +Average/Iteration 114 +Average/MaxReturn 16.5494 +Average/MinReturn -62.9391 +Average/NumEpisodes 80 +Average/StdReturn 13.6154 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93609 +GaussianMLPPolicy/KLAfter 0.00543412 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000104532 +GaussianMLPPolicy/LossBefore -1.71363e-08 +GaussianMLPPolicy/dLoss 0.000104515 +Iteration 114 +MetaTest/Average/AverageDiscountedReturn -36.1241 +MetaTest/Average/AverageReturn -36.1241 +MetaTest/Average/Iteration 114 +MetaTest/Average/MaxReturn -3.27872 +MetaTest/Average/MinReturn -55.6682 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.9475 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.1241 +MetaTest/__unnamed_task__/AverageReturn -36.1241 +MetaTest/__unnamed_task__/Iteration 114 +MetaTest/__unnamed_task__/MaxReturn -3.27872 +MetaTest/__unnamed_task__/MinReturn -55.6682 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.9475 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.68e+06 +__unnamed_task__/AverageDiscountedReturn -16.2584 +__unnamed_task__/AverageReturn -34.4312 +__unnamed_task__/Iteration 114 +__unnamed_task__/MaxReturn 16.5494 +__unnamed_task__/MinReturn -62.9391 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.6154 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 05:06:37 | [maml_trainer] epoch #115 | Sampling for adapation and meta-testing... +2025-04-03 05:08:10 | [maml_trainer] epoch #115 | Finished meta-testing... +2025-04-03 05:08:10 | [maml_trainer] epoch #115 | Saving snapshot... +2025-04-03 05:08:33 | [maml_trainer] epoch #115 | Saved +2025-04-03 05:08:33 | [maml_trainer] epoch #115 | Time 54935.45 s +2025-04-03 05:08:33 | [maml_trainer] epoch #115 | EpochTime 480.74 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.8462 +Average/AverageReturn -34.496 +Average/Iteration 115 +Average/MaxReturn -2.40317 +Average/MinReturn -62.3039 +Average/NumEpisodes 80 +Average/StdReturn 10.5717 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93413 +GaussianMLPPolicy/KLAfter 0.00360903 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000149982 +GaussianMLPPolicy/LossBefore 2.14577e-09 +GaussianMLPPolicy/dLoss 0.000149984 +Iteration 115 +MetaTest/Average/AverageDiscountedReturn -30.45 +MetaTest/Average/AverageReturn -30.45 +MetaTest/Average/Iteration 115 +MetaTest/Average/MaxReturn -22.3604 +MetaTest/Average/MinReturn -45.7292 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.66179 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.45 +MetaTest/__unnamed_task__/AverageReturn -30.45 +MetaTest/__unnamed_task__/Iteration 115 +MetaTest/__unnamed_task__/MaxReturn -22.3604 +MetaTest/__unnamed_task__/MinReturn -45.7292 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.66179 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.712e+06 +__unnamed_task__/AverageDiscountedReturn -15.8462 +__unnamed_task__/AverageReturn -34.496 +__unnamed_task__/Iteration 115 +__unnamed_task__/MaxReturn -2.40317 +__unnamed_task__/MinReturn -62.3039 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.5717 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 05:14:41 | [maml_trainer] epoch #116 | Sampling for adapation and meta-testing... +2025-04-03 05:16:13 | [maml_trainer] epoch #116 | Finished meta-testing... +2025-04-03 05:16:13 | [maml_trainer] epoch #116 | Saving snapshot... +2025-04-03 05:16:36 | [maml_trainer] epoch #116 | Saved +2025-04-03 05:16:36 | [maml_trainer] epoch #116 | Time 55418.49 s +2025-04-03 05:16:36 | [maml_trainer] epoch #116 | EpochTime 483.04 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.0831 +Average/AverageReturn -37.2079 +Average/Iteration 116 +Average/MaxReturn -1.07445 +Average/MinReturn -67.9573 +Average/NumEpisodes 80 +Average/StdReturn 11.4039 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93105 +GaussianMLPPolicy/KLAfter 0.00303035 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.3573e-05 +GaussianMLPPolicy/LossBefore -6.85453e-09 +GaussianMLPPolicy/dLoss 3.35662e-05 +Iteration 116 +MetaTest/Average/AverageDiscountedReturn -29.8931 +MetaTest/Average/AverageReturn -29.8931 +MetaTest/Average/Iteration 116 +MetaTest/Average/MaxReturn -1.55061 +MetaTest/Average/MinReturn -54.9734 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.0768 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.8931 +MetaTest/__unnamed_task__/AverageReturn -29.8931 +MetaTest/__unnamed_task__/Iteration 116 +MetaTest/__unnamed_task__/MaxReturn -1.55061 +MetaTest/__unnamed_task__/MinReturn -54.9734 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.0768 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.744e+06 +__unnamed_task__/AverageDiscountedReturn -17.0831 +__unnamed_task__/AverageReturn -37.2079 +__unnamed_task__/Iteration 116 +__unnamed_task__/MaxReturn -1.07445 +__unnamed_task__/MinReturn -67.9573 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4039 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 05:22:43 | [maml_trainer] epoch #117 | Sampling for adapation and meta-testing... +2025-04-03 05:24:15 | [maml_trainer] epoch #117 | Finished meta-testing... +2025-04-03 05:24:15 | [maml_trainer] epoch #117 | Saving snapshot... +2025-04-03 05:24:38 | [maml_trainer] epoch #117 | Saved +2025-04-03 05:24:38 | [maml_trainer] epoch #117 | Time 55900.97 s +2025-04-03 05:24:38 | [maml_trainer] epoch #117 | EpochTime 482.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.1392 +Average/AverageReturn -37.3282 +Average/Iteration 117 +Average/MaxReturn -4.25129 +Average/MinReturn -62.1453 +Average/NumEpisodes 80 +Average/StdReturn 11.3516 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92779 +GaussianMLPPolicy/KLAfter 0.00294106 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000184538 +GaussianMLPPolicy/LossBefore -5.96046e-10 +GaussianMLPPolicy/dLoss 0.000184538 +Iteration 117 +MetaTest/Average/AverageDiscountedReturn -36.482 +MetaTest/Average/AverageReturn -36.482 +MetaTest/Average/Iteration 117 +MetaTest/Average/MaxReturn -18.4848 +MetaTest/Average/MinReturn -57.1253 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.74635 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.482 +MetaTest/__unnamed_task__/AverageReturn -36.482 +MetaTest/__unnamed_task__/Iteration 117 +MetaTest/__unnamed_task__/MaxReturn -18.4848 +MetaTest/__unnamed_task__/MinReturn -57.1253 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.74635 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.776e+06 +__unnamed_task__/AverageDiscountedReturn -17.1392 +__unnamed_task__/AverageReturn -37.3282 +__unnamed_task__/Iteration 117 +__unnamed_task__/MaxReturn -4.25129 +__unnamed_task__/MinReturn -62.1453 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.3516 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 05:30:47 | [maml_trainer] epoch #118 | Sampling for adapation and meta-testing... +2025-04-03 05:32:19 | [maml_trainer] epoch #118 | Finished meta-testing... +2025-04-03 05:32:19 | [maml_trainer] epoch #118 | Saving snapshot... +2025-04-03 05:32:42 | [maml_trainer] epoch #118 | Saved +2025-04-03 05:32:42 | [maml_trainer] epoch #118 | Time 56384.50 s +2025-04-03 05:32:42 | [maml_trainer] epoch #118 | EpochTime 483.52 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.6694 +Average/AverageReturn -33.9758 +Average/Iteration 118 +Average/MaxReturn 18.1689 +Average/MinReturn -57.505 +Average/NumEpisodes 80 +Average/StdReturn 13.1042 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9247 +GaussianMLPPolicy/KLAfter 0.00177 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000160755 +GaussianMLPPolicy/LossBefore 1.2517e-09 +GaussianMLPPolicy/dLoss -0.000160754 +Iteration 118 +MetaTest/Average/AverageDiscountedReturn -41.5438 +MetaTest/Average/AverageReturn -41.5438 +MetaTest/Average/Iteration 118 +MetaTest/Average/MaxReturn -22.1755 +MetaTest/Average/MinReturn -60.23 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.21929 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.5438 +MetaTest/__unnamed_task__/AverageReturn -41.5438 +MetaTest/__unnamed_task__/Iteration 118 +MetaTest/__unnamed_task__/MaxReturn -22.1755 +MetaTest/__unnamed_task__/MinReturn -60.23 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.21929 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.808e+06 +__unnamed_task__/AverageDiscountedReturn -15.6694 +__unnamed_task__/AverageReturn -33.9758 +__unnamed_task__/Iteration 118 +__unnamed_task__/MaxReturn 18.1689 +__unnamed_task__/MinReturn -57.505 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.1042 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 05:38:52 | [maml_trainer] epoch #119 | Sampling for adapation and meta-testing... +2025-04-03 05:40:24 | [maml_trainer] epoch #119 | Finished meta-testing... +2025-04-03 05:40:24 | [maml_trainer] epoch #119 | Saving snapshot... +2025-04-03 05:40:47 | [maml_trainer] epoch #119 | Saved +2025-04-03 05:40:47 | [maml_trainer] epoch #119 | Time 56869.39 s +2025-04-03 05:40:47 | [maml_trainer] epoch #119 | EpochTime 484.89 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.648 +Average/AverageReturn -36.8016 +Average/Iteration 119 +Average/MaxReturn -9.39231 +Average/MinReturn -68.6982 +Average/NumEpisodes 80 +Average/StdReturn 11.4507 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92072 +GaussianMLPPolicy/KLAfter 0.00143721 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000197311 +GaussianMLPPolicy/LossBefore -8.22544e-09 +GaussianMLPPolicy/dLoss 0.000197303 +Iteration 119 +MetaTest/Average/AverageDiscountedReturn -32.823 +MetaTest/Average/AverageReturn -32.823 +MetaTest/Average/Iteration 119 +MetaTest/Average/MaxReturn 1.73831 +MetaTest/Average/MinReturn -49.0187 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.102 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.823 +MetaTest/__unnamed_task__/AverageReturn -32.823 +MetaTest/__unnamed_task__/Iteration 119 +MetaTest/__unnamed_task__/MaxReturn 1.73831 +MetaTest/__unnamed_task__/MinReturn -49.0187 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.102 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.84e+06 +__unnamed_task__/AverageDiscountedReturn -16.648 +__unnamed_task__/AverageReturn -36.8016 +__unnamed_task__/Iteration 119 +__unnamed_task__/MaxReturn -9.39231 +__unnamed_task__/MinReturn -68.6982 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4507 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 05:46:58 | [maml_trainer] epoch #120 | Sampling for adapation and meta-testing... +2025-04-03 05:48:30 | [maml_trainer] epoch #120 | Finished meta-testing... +2025-04-03 05:48:30 | [maml_trainer] epoch #120 | Saving snapshot... +2025-04-03 05:48:54 | [maml_trainer] epoch #120 | Saved +2025-04-03 05:48:54 | [maml_trainer] epoch #120 | Time 57356.87 s +2025-04-03 05:48:54 | [maml_trainer] epoch #120 | EpochTime 487.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4939 +Average/AverageReturn -36.2104 +Average/Iteration 120 +Average/MaxReturn -0.464909 +Average/MinReturn -72.1639 +Average/NumEpisodes 80 +Average/StdReturn 10.5211 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91785 +GaussianMLPPolicy/KLAfter 0.00137747 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000101188 +GaussianMLPPolicy/LossBefore 2.33054e-08 +GaussianMLPPolicy/dLoss 0.000101211 +Iteration 120 +MetaTest/Average/AverageDiscountedReturn -34.2434 +MetaTest/Average/AverageReturn -34.2434 +MetaTest/Average/Iteration 120 +MetaTest/Average/MaxReturn -19.5476 +MetaTest/Average/MinReturn -46.8693 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.5006 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.2434 +MetaTest/__unnamed_task__/AverageReturn -34.2434 +MetaTest/__unnamed_task__/Iteration 120 +MetaTest/__unnamed_task__/MaxReturn -19.5476 +MetaTest/__unnamed_task__/MinReturn -46.8693 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.5006 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.872e+06 +__unnamed_task__/AverageDiscountedReturn -16.4939 +__unnamed_task__/AverageReturn -36.2104 +__unnamed_task__/Iteration 120 +__unnamed_task__/MaxReturn -0.464909 +__unnamed_task__/MinReturn -72.1639 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.5211 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 05:55:07 | [maml_trainer] epoch #121 | Sampling for adapation and meta-testing... +2025-04-03 05:56:39 | [maml_trainer] epoch #121 | Finished meta-testing... +2025-04-03 05:56:39 | [maml_trainer] epoch #121 | Saving snapshot... +2025-04-03 05:57:03 | [maml_trainer] epoch #121 | Saved +2025-04-03 05:57:03 | [maml_trainer] epoch #121 | Time 57845.23 s +2025-04-03 05:57:03 | [maml_trainer] epoch #121 | EpochTime 488.36 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.8691 +Average/AverageReturn -33.4633 +Average/Iteration 121 +Average/MaxReturn 5.98832 +Average/MinReturn -63.5717 +Average/NumEpisodes 80 +Average/StdReturn 10.9744 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91526 +GaussianMLPPolicy/KLAfter 0.00105264 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.4622e-05 +GaussianMLPPolicy/LossBefore 2.03848e-08 +GaussianMLPPolicy/dLoss 4.46424e-05 +Iteration 121 +MetaTest/Average/AverageDiscountedReturn -37.9498 +MetaTest/Average/AverageReturn -37.9498 +MetaTest/Average/Iteration 121 +MetaTest/Average/MaxReturn -22.3717 +MetaTest/Average/MinReturn -55.2297 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.0578 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.9498 +MetaTest/__unnamed_task__/AverageReturn -37.9498 +MetaTest/__unnamed_task__/Iteration 121 +MetaTest/__unnamed_task__/MaxReturn -22.3717 +MetaTest/__unnamed_task__/MinReturn -55.2297 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.0578 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.904e+06 +__unnamed_task__/AverageDiscountedReturn -15.8691 +__unnamed_task__/AverageReturn -33.4633 +__unnamed_task__/Iteration 121 +__unnamed_task__/MaxReturn 5.98832 +__unnamed_task__/MinReturn -63.5717 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.9744 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 06:03:15 | [maml_trainer] epoch #122 | Sampling for adapation and meta-testing... +2025-04-03 06:04:48 | [maml_trainer] epoch #122 | Finished meta-testing... +2025-04-03 06:04:48 | [maml_trainer] epoch #122 | Saving snapshot... +2025-04-03 06:05:10 | [maml_trainer] epoch #122 | Saved +2025-04-03 06:05:10 | [maml_trainer] epoch #122 | Time 58332.64 s +2025-04-03 06:05:10 | [maml_trainer] epoch #122 | EpochTime 487.40 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4154 +Average/AverageReturn -33.7599 +Average/Iteration 122 +Average/MaxReturn 10.6009 +Average/MinReturn -63.1432 +Average/NumEpisodes 80 +Average/StdReturn 13.7645 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91357 +GaussianMLPPolicy/KLAfter 0.00109991 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000114931 +GaussianMLPPolicy/LossBefore -1.72854e-09 +GaussianMLPPolicy/dLoss -0.000114933 +Iteration 122 +MetaTest/Average/AverageDiscountedReturn -38.1669 +MetaTest/Average/AverageReturn -38.1669 +MetaTest/Average/Iteration 122 +MetaTest/Average/MaxReturn -20.3024 +MetaTest/Average/MinReturn -58.5327 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.28891 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.1669 +MetaTest/__unnamed_task__/AverageReturn -38.1669 +MetaTest/__unnamed_task__/Iteration 122 +MetaTest/__unnamed_task__/MaxReturn -20.3024 +MetaTest/__unnamed_task__/MinReturn -58.5327 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.28891 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.936e+06 +__unnamed_task__/AverageDiscountedReturn -15.4154 +__unnamed_task__/AverageReturn -33.7599 +__unnamed_task__/Iteration 122 +__unnamed_task__/MaxReturn 10.6009 +__unnamed_task__/MinReturn -63.1432 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7645 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 06:11:20 | [maml_trainer] epoch #123 | Sampling for adapation and meta-testing... +2025-04-03 06:12:52 | [maml_trainer] epoch #123 | Finished meta-testing... +2025-04-03 06:12:52 | [maml_trainer] epoch #123 | Saving snapshot... +2025-04-03 06:13:16 | [maml_trainer] epoch #123 | Saved +2025-04-03 06:13:16 | [maml_trainer] epoch #123 | Time 58818.12 s +2025-04-03 06:13:16 | [maml_trainer] epoch #123 | EpochTime 485.49 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3981 +Average/AverageReturn -33.2281 +Average/Iteration 123 +Average/MaxReturn 36.6514 +Average/MinReturn -67.9365 +Average/NumEpisodes 80 +Average/StdReturn 15.8412 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91265 +GaussianMLPPolicy/KLAfter 0.00149946 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.13877e-05 +GaussianMLPPolicy/LossBefore 2.17557e-09 +GaussianMLPPolicy/dLoss 7.13898e-05 +Iteration 123 +MetaTest/Average/AverageDiscountedReturn -39.7769 +MetaTest/Average/AverageReturn -39.7769 +MetaTest/Average/Iteration 123 +MetaTest/Average/MaxReturn -13.338 +MetaTest/Average/MinReturn -65.5742 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.4439 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.7769 +MetaTest/__unnamed_task__/AverageReturn -39.7769 +MetaTest/__unnamed_task__/Iteration 123 +MetaTest/__unnamed_task__/MaxReturn -13.338 +MetaTest/__unnamed_task__/MinReturn -65.5742 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.4439 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.968e+06 +__unnamed_task__/AverageDiscountedReturn -15.3981 +__unnamed_task__/AverageReturn -33.2281 +__unnamed_task__/Iteration 123 +__unnamed_task__/MaxReturn 36.6514 +__unnamed_task__/MinReturn -67.9365 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.8412 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 06:19:30 | [maml_trainer] epoch #124 | Sampling for adapation and meta-testing... +2025-04-03 06:21:02 | [maml_trainer] epoch #124 | Finished meta-testing... +2025-04-03 06:21:02 | [maml_trainer] epoch #124 | Saving snapshot... +2025-04-03 06:21:25 | [maml_trainer] epoch #124 | Saved +2025-04-03 06:21:25 | [maml_trainer] epoch #124 | Time 59307.78 s +2025-04-03 06:21:25 | [maml_trainer] epoch #124 | EpochTime 489.65 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0293 +Average/AverageReturn -35.3133 +Average/Iteration 124 +Average/MaxReturn -0.313197 +Average/MinReturn -62.0066 +Average/NumEpisodes 80 +Average/StdReturn 11.3281 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91109 +GaussianMLPPolicy/KLAfter 0.000735985 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.56566e-06 +GaussianMLPPolicy/LossBefore 9.95398e-09 +GaussianMLPPolicy/dLoss -4.5557e-06 +Iteration 124 +MetaTest/Average/AverageDiscountedReturn -36.7851 +MetaTest/Average/AverageReturn -36.7851 +MetaTest/Average/Iteration 124 +MetaTest/Average/MaxReturn -21.7261 +MetaTest/Average/MinReturn -61.0948 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.79097 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.7851 +MetaTest/__unnamed_task__/AverageReturn -36.7851 +MetaTest/__unnamed_task__/Iteration 124 +MetaTest/__unnamed_task__/MaxReturn -21.7261 +MetaTest/__unnamed_task__/MinReturn -61.0948 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.79097 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4e+06 +__unnamed_task__/AverageDiscountedReturn -16.0293 +__unnamed_task__/AverageReturn -35.3133 +__unnamed_task__/Iteration 124 +__unnamed_task__/MaxReturn -0.313197 +__unnamed_task__/MinReturn -62.0066 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.3281 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 06:27:38 | [maml_trainer] epoch #125 | Sampling for adapation and meta-testing... +2025-04-03 06:29:09 | [maml_trainer] epoch #125 | Finished meta-testing... +2025-04-03 06:29:09 | [maml_trainer] epoch #125 | Saving snapshot... +2025-04-03 06:29:33 | [maml_trainer] epoch #125 | Saved +2025-04-03 06:29:33 | [maml_trainer] epoch #125 | Time 59795.51 s +2025-04-03 06:29:33 | [maml_trainer] epoch #125 | EpochTime 487.73 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5951 +Average/AverageReturn -36.4849 +Average/Iteration 125 +Average/MaxReturn 8.87142 +Average/MinReturn -80.0333 +Average/NumEpisodes 80 +Average/StdReturn 13.1911 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9098 +GaussianMLPPolicy/KLAfter 0.0012257 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.96246e-05 +GaussianMLPPolicy/LossBefore 4.52995e-09 +GaussianMLPPolicy/dLoss 3.96291e-05 +Iteration 125 +MetaTest/Average/AverageDiscountedReturn -31.4462 +MetaTest/Average/AverageReturn -31.4462 +MetaTest/Average/Iteration 125 +MetaTest/Average/MaxReturn -7.50671 +MetaTest/Average/MinReturn -43.7383 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.56849 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.4462 +MetaTest/__unnamed_task__/AverageReturn -31.4462 +MetaTest/__unnamed_task__/Iteration 125 +MetaTest/__unnamed_task__/MaxReturn -7.50671 +MetaTest/__unnamed_task__/MinReturn -43.7383 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.56849 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.032e+06 +__unnamed_task__/AverageDiscountedReturn -16.5951 +__unnamed_task__/AverageReturn -36.4849 +__unnamed_task__/Iteration 125 +__unnamed_task__/MaxReturn 8.87142 +__unnamed_task__/MinReturn -80.0333 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.1911 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 06:35:45 | [maml_trainer] epoch #126 | Sampling for adapation and meta-testing... +2025-04-03 06:37:18 | [maml_trainer] epoch #126 | Finished meta-testing... +2025-04-03 06:37:18 | [maml_trainer] epoch #126 | Saving snapshot... +2025-04-03 06:37:41 | [maml_trainer] epoch #126 | Saved +2025-04-03 06:37:41 | [maml_trainer] epoch #126 | Time 60283.68 s +2025-04-03 06:37:41 | [maml_trainer] epoch #126 | EpochTime 488.17 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9606 +Average/AverageReturn -36.8399 +Average/Iteration 126 +Average/MaxReturn 7.52529 +Average/MinReturn -74.0934 +Average/NumEpisodes 80 +Average/StdReturn 13.9731 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90993 +GaussianMLPPolicy/KLAfter 0.00116813 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000190467 +GaussianMLPPolicy/LossBefore -2.98023e-10 +GaussianMLPPolicy/dLoss 0.000190466 +Iteration 126 +MetaTest/Average/AverageDiscountedReturn -35.7553 +MetaTest/Average/AverageReturn -35.7553 +MetaTest/Average/Iteration 126 +MetaTest/Average/MaxReturn 27.9093 +MetaTest/Average/MinReturn -66.9114 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5285 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.7553 +MetaTest/__unnamed_task__/AverageReturn -35.7553 +MetaTest/__unnamed_task__/Iteration 126 +MetaTest/__unnamed_task__/MaxReturn 27.9093 +MetaTest/__unnamed_task__/MinReturn -66.9114 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5285 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.064e+06 +__unnamed_task__/AverageDiscountedReturn -16.9606 +__unnamed_task__/AverageReturn -36.8399 +__unnamed_task__/Iteration 126 +__unnamed_task__/MaxReturn 7.52529 +__unnamed_task__/MinReturn -74.0934 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.9731 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 06:43:52 | [maml_trainer] epoch #127 | Sampling for adapation and meta-testing... +2025-04-03 06:45:23 | [maml_trainer] epoch #127 | Finished meta-testing... +2025-04-03 06:45:23 | [maml_trainer] epoch #127 | Saving snapshot... +2025-04-03 06:45:45 | [maml_trainer] epoch #127 | Saved +2025-04-03 06:45:45 | [maml_trainer] epoch #127 | Time 60767.88 s +2025-04-03 06:45:45 | [maml_trainer] epoch #127 | EpochTime 484.19 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9374 +Average/AverageReturn -37.2839 +Average/Iteration 127 +Average/MaxReturn -7.03658 +Average/MinReturn -62.4076 +Average/NumEpisodes 80 +Average/StdReturn 10.3053 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90889 +GaussianMLPPolicy/KLAfter 0.00122745 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.59665e-05 +GaussianMLPPolicy/LossBefore 1.69873e-08 +GaussianMLPPolicy/dLoss -4.59495e-05 +Iteration 127 +MetaTest/Average/AverageDiscountedReturn -33.2727 +MetaTest/Average/AverageReturn -33.2727 +MetaTest/Average/Iteration 127 +MetaTest/Average/MaxReturn 3.7412 +MetaTest/Average/MinReturn -77.9756 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.472 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.2727 +MetaTest/__unnamed_task__/AverageReturn -33.2727 +MetaTest/__unnamed_task__/Iteration 127 +MetaTest/__unnamed_task__/MaxReturn 3.7412 +MetaTest/__unnamed_task__/MinReturn -77.9756 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.472 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.096e+06 +__unnamed_task__/AverageDiscountedReturn -16.9374 +__unnamed_task__/AverageReturn -37.2839 +__unnamed_task__/Iteration 127 +__unnamed_task__/MaxReturn -7.03658 +__unnamed_task__/MinReturn -62.4076 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.3053 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 06:51:54 | [maml_trainer] epoch #128 | Sampling for adapation and meta-testing... +2025-04-03 06:53:26 | [maml_trainer] epoch #128 | Finished meta-testing... +2025-04-03 06:53:26 | [maml_trainer] epoch #128 | Saving snapshot... +2025-04-03 06:53:51 | [maml_trainer] epoch #128 | Saved +2025-04-03 06:53:51 | [maml_trainer] epoch #128 | Time 61253.26 s +2025-04-03 06:53:51 | [maml_trainer] epoch #128 | EpochTime 485.38 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.1964 +Average/AverageReturn -37.4086 +Average/Iteration 128 +Average/MaxReturn 5.96965 +Average/MinReturn -73.0183 +Average/NumEpisodes 80 +Average/StdReturn 11.8973 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90874 +GaussianMLPPolicy/KLAfter 0.000847002 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.74837e-05 +GaussianMLPPolicy/LossBefore 5.69224e-09 +GaussianMLPPolicy/dLoss 7.74893e-05 +Iteration 128 +MetaTest/Average/AverageDiscountedReturn -33.9622 +MetaTest/Average/AverageReturn -33.9622 +MetaTest/Average/Iteration 128 +MetaTest/Average/MaxReturn 4.37219 +MetaTest/Average/MinReturn -51.711 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.6315 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.9622 +MetaTest/__unnamed_task__/AverageReturn -33.9622 +MetaTest/__unnamed_task__/Iteration 128 +MetaTest/__unnamed_task__/MaxReturn 4.37219 +MetaTest/__unnamed_task__/MinReturn -51.711 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.6315 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.128e+06 +__unnamed_task__/AverageDiscountedReturn -17.1964 +__unnamed_task__/AverageReturn -37.4086 +__unnamed_task__/Iteration 128 +__unnamed_task__/MaxReturn 5.96965 +__unnamed_task__/MinReturn -73.0183 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.8973 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:00:03 | [maml_trainer] epoch #129 | Sampling for adapation and meta-testing... +2025-04-03 07:01:36 | [maml_trainer] epoch #129 | Finished meta-testing... +2025-04-03 07:01:36 | [maml_trainer] epoch #129 | Saving snapshot... +2025-04-03 07:01:59 | [maml_trainer] epoch #129 | Saved +2025-04-03 07:01:59 | [maml_trainer] epoch #129 | Time 61741.40 s +2025-04-03 07:01:59 | [maml_trainer] epoch #129 | EpochTime 488.14 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8072 +Average/AverageReturn -37.312 +Average/Iteration 129 +Average/MaxReturn -5.17311 +Average/MinReturn -68.5167 +Average/NumEpisodes 80 +Average/StdReturn 11.741 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90763 +GaussianMLPPolicy/KLAfter 0.00146666 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.67123e-05 +GaussianMLPPolicy/LossBefore 1.32918e-08 +GaussianMLPPolicy/dLoss -7.6699e-05 +Iteration 129 +MetaTest/Average/AverageDiscountedReturn -37.7226 +MetaTest/Average/AverageReturn -37.7226 +MetaTest/Average/Iteration 129 +MetaTest/Average/MaxReturn -19.1292 +MetaTest/Average/MinReturn -63.0127 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.4073 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.7226 +MetaTest/__unnamed_task__/AverageReturn -37.7226 +MetaTest/__unnamed_task__/Iteration 129 +MetaTest/__unnamed_task__/MaxReturn -19.1292 +MetaTest/__unnamed_task__/MinReturn -63.0127 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.4073 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.16e+06 +__unnamed_task__/AverageDiscountedReturn -16.8072 +__unnamed_task__/AverageReturn -37.312 +__unnamed_task__/Iteration 129 +__unnamed_task__/MaxReturn -5.17311 +__unnamed_task__/MinReturn -68.5167 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.741 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:08:11 | [maml_trainer] epoch #130 | Sampling for adapation and meta-testing... +2025-04-03 07:09:43 | [maml_trainer] epoch #130 | Finished meta-testing... +2025-04-03 07:09:43 | [maml_trainer] epoch #130 | Saving snapshot... +2025-04-03 07:10:06 | [maml_trainer] epoch #130 | Saved +2025-04-03 07:10:06 | [maml_trainer] epoch #130 | Time 62228.48 s +2025-04-03 07:10:06 | [maml_trainer] epoch #130 | EpochTime 487.07 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0859 +Average/AverageReturn -35.3087 +Average/Iteration 130 +Average/MaxReturn 2.86177 +Average/MinReturn -66.2422 +Average/NumEpisodes 80 +Average/StdReturn 13.4242 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90617 +GaussianMLPPolicy/KLAfter 0.00101635 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.65823e-05 +GaussianMLPPolicy/LossBefore -1.30534e-08 +GaussianMLPPolicy/dLoss -8.65954e-05 +Iteration 130 +MetaTest/Average/AverageDiscountedReturn -35.8493 +MetaTest/Average/AverageReturn -35.8493 +MetaTest/Average/Iteration 130 +MetaTest/Average/MaxReturn -3.07099 +MetaTest/Average/MinReturn -58.4799 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.0854 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.8493 +MetaTest/__unnamed_task__/AverageReturn -35.8493 +MetaTest/__unnamed_task__/Iteration 130 +MetaTest/__unnamed_task__/MaxReturn -3.07099 +MetaTest/__unnamed_task__/MinReturn -58.4799 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.0854 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.192e+06 +__unnamed_task__/AverageDiscountedReturn -16.0859 +__unnamed_task__/AverageReturn -35.3087 +__unnamed_task__/Iteration 130 +__unnamed_task__/MaxReturn 2.86177 +__unnamed_task__/MinReturn -66.2422 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.4242 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:16:17 | [maml_trainer] epoch #131 | Sampling for adapation and meta-testing... +2025-04-03 07:17:52 | [maml_trainer] epoch #131 | Finished meta-testing... +2025-04-03 07:17:52 | [maml_trainer] epoch #131 | Saving snapshot... +2025-04-03 07:18:15 | [maml_trainer] epoch #131 | Saved +2025-04-03 07:18:15 | [maml_trainer] epoch #131 | Time 62717.91 s +2025-04-03 07:18:15 | [maml_trainer] epoch #131 | EpochTime 489.43 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0858 +Average/AverageReturn -35.2475 +Average/Iteration 131 +Average/MaxReturn 21.385 +Average/MinReturn -66.7704 +Average/NumEpisodes 80 +Average/StdReturn 13.7614 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.904 +GaussianMLPPolicy/KLAfter 0.00105701 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.15312e-05 +GaussianMLPPolicy/LossBefore 3.45707e-09 +GaussianMLPPolicy/dLoss 3.15346e-05 +Iteration 131 +MetaTest/Average/AverageDiscountedReturn -34.9455 +MetaTest/Average/AverageReturn -34.9455 +MetaTest/Average/Iteration 131 +MetaTest/Average/MaxReturn -23.4221 +MetaTest/Average/MinReturn -58.1522 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.6889 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.9455 +MetaTest/__unnamed_task__/AverageReturn -34.9455 +MetaTest/__unnamed_task__/Iteration 131 +MetaTest/__unnamed_task__/MaxReturn -23.4221 +MetaTest/__unnamed_task__/MinReturn -58.1522 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.6889 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.224e+06 +__unnamed_task__/AverageDiscountedReturn -16.0858 +__unnamed_task__/AverageReturn -35.2475 +__unnamed_task__/Iteration 131 +__unnamed_task__/MaxReturn 21.385 +__unnamed_task__/MinReturn -66.7704 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7614 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:24:25 | [maml_trainer] epoch #132 | Sampling for adapation and meta-testing... +2025-04-03 07:25:58 | [maml_trainer] epoch #132 | Finished meta-testing... +2025-04-03 07:25:58 | [maml_trainer] epoch #132 | Saving snapshot... +2025-04-03 07:26:21 | [maml_trainer] epoch #132 | Saved +2025-04-03 07:26:21 | [maml_trainer] epoch #132 | Time 63203.95 s +2025-04-03 07:26:21 | [maml_trainer] epoch #132 | EpochTime 486.03 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4015 +Average/AverageReturn -35.7721 +Average/Iteration 132 +Average/MaxReturn 15.5134 +Average/MinReturn -71.682 +Average/NumEpisodes 80 +Average/StdReturn 15.4204 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90281 +GaussianMLPPolicy/KLAfter 0.00174642 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.31946e-05 +GaussianMLPPolicy/LossBefore -9.86457e-09 +GaussianMLPPolicy/dLoss 3.31847e-05 +Iteration 132 +MetaTest/Average/AverageDiscountedReturn -26.3321 +MetaTest/Average/AverageReturn -26.3321 +MetaTest/Average/Iteration 132 +MetaTest/Average/MaxReturn 17.3688 +MetaTest/Average/MinReturn -52.4324 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.5295 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -26.3321 +MetaTest/__unnamed_task__/AverageReturn -26.3321 +MetaTest/__unnamed_task__/Iteration 132 +MetaTest/__unnamed_task__/MaxReturn 17.3688 +MetaTest/__unnamed_task__/MinReturn -52.4324 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.5295 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.256e+06 +__unnamed_task__/AverageDiscountedReturn -16.4015 +__unnamed_task__/AverageReturn -35.7721 +__unnamed_task__/Iteration 132 +__unnamed_task__/MaxReturn 15.5134 +__unnamed_task__/MinReturn -71.682 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.4204 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:32:34 | [maml_trainer] epoch #133 | Sampling for adapation and meta-testing... +2025-04-03 07:34:05 | [maml_trainer] epoch #133 | Finished meta-testing... +2025-04-03 07:34:05 | [maml_trainer] epoch #133 | Saving snapshot... +2025-04-03 07:34:29 | [maml_trainer] epoch #133 | Saved +2025-04-03 07:34:29 | [maml_trainer] epoch #133 | Time 63691.31 s +2025-04-03 07:34:29 | [maml_trainer] epoch #133 | EpochTime 487.36 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8179 +Average/AverageReturn -37.4878 +Average/Iteration 133 +Average/MaxReturn 34.296 +Average/MinReturn -71.5979 +Average/NumEpisodes 80 +Average/StdReturn 15.3883 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90294 +GaussianMLPPolicy/KLAfter 0.00173514 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.79914e-05 +GaussianMLPPolicy/LossBefore 1.29938e-08 +GaussianMLPPolicy/dLoss 5.80043e-05 +Iteration 133 +MetaTest/Average/AverageDiscountedReturn -36.89 +MetaTest/Average/AverageReturn -36.89 +MetaTest/Average/Iteration 133 +MetaTest/Average/MaxReturn 18.2039 +MetaTest/Average/MinReturn -58.5721 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.3969 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.89 +MetaTest/__unnamed_task__/AverageReturn -36.89 +MetaTest/__unnamed_task__/Iteration 133 +MetaTest/__unnamed_task__/MaxReturn 18.2039 +MetaTest/__unnamed_task__/MinReturn -58.5721 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.3969 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.288e+06 +__unnamed_task__/AverageDiscountedReturn -16.8179 +__unnamed_task__/AverageReturn -37.4878 +__unnamed_task__/Iteration 133 +__unnamed_task__/MaxReturn 34.296 +__unnamed_task__/MinReturn -71.5979 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.3883 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:40:42 | [maml_trainer] epoch #134 | Sampling for adapation and meta-testing... +2025-04-03 07:42:15 | [maml_trainer] epoch #134 | Finished meta-testing... +2025-04-03 07:42:15 | [maml_trainer] epoch #134 | Saving snapshot... +2025-04-03 07:42:39 | [maml_trainer] epoch #134 | Saved +2025-04-03 07:42:39 | [maml_trainer] epoch #134 | Time 64181.01 s +2025-04-03 07:42:39 | [maml_trainer] epoch #134 | EpochTime 489.69 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9001 +Average/AverageReturn -37.4898 +Average/Iteration 134 +Average/MaxReturn 1.31955 +Average/MinReturn -63.9469 +Average/NumEpisodes 80 +Average/StdReturn 12.1654 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90152 +GaussianMLPPolicy/KLAfter 0.00208581 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.63129e-05 +GaussianMLPPolicy/LossBefore 5.96043e-11 +GaussianMLPPolicy/dLoss 1.6313e-05 +Iteration 134 +MetaTest/Average/AverageDiscountedReturn -33.1047 +MetaTest/Average/AverageReturn -33.1047 +MetaTest/Average/Iteration 134 +MetaTest/Average/MaxReturn -20.5089 +MetaTest/Average/MinReturn -48.1884 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.72612 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.1047 +MetaTest/__unnamed_task__/AverageReturn -33.1047 +MetaTest/__unnamed_task__/Iteration 134 +MetaTest/__unnamed_task__/MaxReturn -20.5089 +MetaTest/__unnamed_task__/MinReturn -48.1884 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.72612 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.32e+06 +__unnamed_task__/AverageDiscountedReturn -16.9001 +__unnamed_task__/AverageReturn -37.4898 +__unnamed_task__/Iteration 134 +__unnamed_task__/MaxReturn 1.31955 +__unnamed_task__/MinReturn -63.9469 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1654 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:48:56 | [maml_trainer] epoch #135 | Sampling for adapation and meta-testing... +2025-04-03 07:50:28 | [maml_trainer] epoch #135 | Finished meta-testing... +2025-04-03 07:50:28 | [maml_trainer] epoch #135 | Saving snapshot... +2025-04-03 07:50:50 | [maml_trainer] epoch #135 | Saved +2025-04-03 07:50:50 | [maml_trainer] epoch #135 | Time 64672.97 s +2025-04-03 07:50:50 | [maml_trainer] epoch #135 | EpochTime 491.96 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3612 +Average/AverageReturn -35.8622 +Average/Iteration 135 +Average/MaxReturn 1.34972 +Average/MinReturn -66.4477 +Average/NumEpisodes 80 +Average/StdReturn 10.938 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90127 +GaussianMLPPolicy/KLAfter 0.00155027 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.80986e-05 +GaussianMLPPolicy/LossBefore -2.69413e-08 +GaussianMLPPolicy/dLoss 4.80716e-05 +Iteration 135 +MetaTest/Average/AverageDiscountedReturn -37.2891 +MetaTest/Average/AverageReturn -37.2891 +MetaTest/Average/Iteration 135 +MetaTest/Average/MaxReturn -25.0052 +MetaTest/Average/MinReturn -54.031 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.98879 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.2891 +MetaTest/__unnamed_task__/AverageReturn -37.2891 +MetaTest/__unnamed_task__/Iteration 135 +MetaTest/__unnamed_task__/MaxReturn -25.0052 +MetaTest/__unnamed_task__/MinReturn -54.031 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.98879 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.352e+06 +__unnamed_task__/AverageDiscountedReturn -16.3612 +__unnamed_task__/AverageReturn -35.8622 +__unnamed_task__/Iteration 135 +__unnamed_task__/MaxReturn 1.34972 +__unnamed_task__/MinReturn -66.4477 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.938 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 07:56:59 | [maml_trainer] epoch #136 | Sampling for adapation and meta-testing... +2025-04-03 07:58:32 | [maml_trainer] epoch #136 | Finished meta-testing... +2025-04-03 07:58:32 | [maml_trainer] epoch #136 | Saving snapshot... +2025-04-03 07:58:56 | [maml_trainer] epoch #136 | Saved +2025-04-03 07:58:56 | [maml_trainer] epoch #136 | Time 65158.74 s +2025-04-03 07:58:56 | [maml_trainer] epoch #136 | EpochTime 485.76 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2486 +Average/AverageReturn -36.1445 +Average/Iteration 136 +Average/MaxReturn 1.82041 +Average/MinReturn -70.5955 +Average/NumEpisodes 80 +Average/StdReturn 12.5788 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90199 +GaussianMLPPolicy/KLAfter 0.00170643 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.50785e-05 +GaussianMLPPolicy/LossBefore 2.31862e-08 +GaussianMLPPolicy/dLoss 2.51017e-05 +Iteration 136 +MetaTest/Average/AverageDiscountedReturn -40.4091 +MetaTest/Average/AverageReturn -40.4091 +MetaTest/Average/Iteration 136 +MetaTest/Average/MaxReturn -11.1832 +MetaTest/Average/MinReturn -54.7969 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.2837 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.4091 +MetaTest/__unnamed_task__/AverageReturn -40.4091 +MetaTest/__unnamed_task__/Iteration 136 +MetaTest/__unnamed_task__/MaxReturn -11.1832 +MetaTest/__unnamed_task__/MinReturn -54.7969 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.2837 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.384e+06 +__unnamed_task__/AverageDiscountedReturn -16.2486 +__unnamed_task__/AverageReturn -36.1445 +__unnamed_task__/Iteration 136 +__unnamed_task__/MaxReturn 1.82041 +__unnamed_task__/MinReturn -70.5955 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.5788 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 08:05:12 | [maml_trainer] epoch #137 | Sampling for adapation and meta-testing... +2025-04-03 08:06:45 | [maml_trainer] epoch #137 | Finished meta-testing... +2025-04-03 08:06:45 | [maml_trainer] epoch #137 | Saving snapshot... +2025-04-03 08:07:09 | [maml_trainer] epoch #137 | Saved +2025-04-03 08:07:09 | [maml_trainer] epoch #137 | Time 65650.99 s +2025-04-03 08:07:09 | [maml_trainer] epoch #137 | EpochTime 492.25 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.1746 +Average/AverageReturn -33.5914 +Average/Iteration 137 +Average/MaxReturn 12.2446 +Average/MinReturn -65.817 +Average/NumEpisodes 80 +Average/StdReturn 12.1376 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90218 +GaussianMLPPolicy/KLAfter 0.00177263 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.27038e-05 +GaussianMLPPolicy/LossBefore -9.14931e-09 +GaussianMLPPolicy/dLoss -3.2713e-05 +Iteration 137 +MetaTest/Average/AverageDiscountedReturn -34.6892 +MetaTest/Average/AverageReturn -34.6892 +MetaTest/Average/Iteration 137 +MetaTest/Average/MaxReturn -25.0834 +MetaTest/Average/MinReturn -52.9399 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.28888 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.6892 +MetaTest/__unnamed_task__/AverageReturn -34.6892 +MetaTest/__unnamed_task__/Iteration 137 +MetaTest/__unnamed_task__/MaxReturn -25.0834 +MetaTest/__unnamed_task__/MinReturn -52.9399 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.28888 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.416e+06 +__unnamed_task__/AverageDiscountedReturn -15.1746 +__unnamed_task__/AverageReturn -33.5914 +__unnamed_task__/Iteration 137 +__unnamed_task__/MaxReturn 12.2446 +__unnamed_task__/MinReturn -65.817 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1376 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 08:13:19 | [maml_trainer] epoch #138 | Sampling for adapation and meta-testing... +2025-04-03 08:14:53 | [maml_trainer] epoch #138 | Finished meta-testing... +2025-04-03 08:14:53 | [maml_trainer] epoch #138 | Saving snapshot... +2025-04-03 08:15:15 | [maml_trainer] epoch #138 | Saved +2025-04-03 08:15:15 | [maml_trainer] epoch #138 | Time 66137.50 s +2025-04-03 08:15:15 | [maml_trainer] epoch #138 | EpochTime 486.51 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.9418 +Average/AverageReturn -32.3662 +Average/Iteration 138 +Average/MaxReturn 31.2366 +Average/MinReturn -55.9038 +Average/NumEpisodes 80 +Average/StdReturn 14.5429 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90183 +GaussianMLPPolicy/KLAfter 0.00266482 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.80042e-05 +GaussianMLPPolicy/LossBefore -2.47359e-09 +GaussianMLPPolicy/dLoss -6.80067e-05 +Iteration 138 +MetaTest/Average/AverageDiscountedReturn -37.0631 +MetaTest/Average/AverageReturn -37.0631 +MetaTest/Average/Iteration 138 +MetaTest/Average/MaxReturn 20.8451 +MetaTest/Average/MinReturn -64.7256 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.6176 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.0631 +MetaTest/__unnamed_task__/AverageReturn -37.0631 +MetaTest/__unnamed_task__/Iteration 138 +MetaTest/__unnamed_task__/MaxReturn 20.8451 +MetaTest/__unnamed_task__/MinReturn -64.7256 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.6176 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.448e+06 +__unnamed_task__/AverageDiscountedReturn -14.9418 +__unnamed_task__/AverageReturn -32.3662 +__unnamed_task__/Iteration 138 +__unnamed_task__/MaxReturn 31.2366 +__unnamed_task__/MinReturn -55.9038 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.5429 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 08:21:26 | [maml_trainer] epoch #139 | Sampling for adapation and meta-testing... +2025-04-03 08:23:00 | [maml_trainer] epoch #139 | Finished meta-testing... +2025-04-03 08:23:00 | [maml_trainer] epoch #139 | Saving snapshot... +2025-04-03 08:23:24 | [maml_trainer] epoch #139 | Saved +2025-04-03 08:23:24 | [maml_trainer] epoch #139 | Time 66626.79 s +2025-04-03 08:23:24 | [maml_trainer] epoch #139 | EpochTime 489.28 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4837 +Average/AverageReturn -36.2485 +Average/Iteration 139 +Average/MaxReturn -11.0833 +Average/MinReturn -62.4118 +Average/NumEpisodes 80 +Average/StdReturn 10.2019 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9017 +GaussianMLPPolicy/KLAfter 0.00320958 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.19926e-05 +GaussianMLPPolicy/LossBefore -6.55652e-10 +GaussianMLPPolicy/dLoss 5.19919e-05 +Iteration 139 +MetaTest/Average/AverageDiscountedReturn -33.0823 +MetaTest/Average/AverageReturn -33.0823 +MetaTest/Average/Iteration 139 +MetaTest/Average/MaxReturn -21.8188 +MetaTest/Average/MinReturn -50.3178 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.9862 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.0823 +MetaTest/__unnamed_task__/AverageReturn -33.0823 +MetaTest/__unnamed_task__/Iteration 139 +MetaTest/__unnamed_task__/MaxReturn -21.8188 +MetaTest/__unnamed_task__/MinReturn -50.3178 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.9862 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.48e+06 +__unnamed_task__/AverageDiscountedReturn -16.4837 +__unnamed_task__/AverageReturn -36.2485 +__unnamed_task__/Iteration 139 +__unnamed_task__/MaxReturn -11.0833 +__unnamed_task__/MinReturn -62.4118 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2019 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 08:29:36 | [maml_trainer] epoch #140 | Sampling for adapation and meta-testing... +2025-04-03 08:31:08 | [maml_trainer] epoch #140 | Finished meta-testing... +2025-04-03 08:31:08 | [maml_trainer] epoch #140 | Saving snapshot... +2025-04-03 08:31:32 | [maml_trainer] epoch #140 | Saved +2025-04-03 08:31:32 | [maml_trainer] epoch #140 | Time 67114.17 s +2025-04-03 08:31:32 | [maml_trainer] epoch #140 | EpochTime 487.38 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0132 +Average/AverageReturn -35.8379 +Average/Iteration 140 +Average/MaxReturn 4.46141 +Average/MinReturn -57.6464 +Average/NumEpisodes 80 +Average/StdReturn 10.6307 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90161 +GaussianMLPPolicy/KLAfter 0.00548679 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000135347 +GaussianMLPPolicy/LossBefore 1.10865e-08 +GaussianMLPPolicy/dLoss -0.000135336 +Iteration 140 +MetaTest/Average/AverageDiscountedReturn -38.4381 +MetaTest/Average/AverageReturn -38.4381 +MetaTest/Average/Iteration 140 +MetaTest/Average/MaxReturn -6.60352 +MetaTest/Average/MinReturn -63.37 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.6252 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.4381 +MetaTest/__unnamed_task__/AverageReturn -38.4381 +MetaTest/__unnamed_task__/Iteration 140 +MetaTest/__unnamed_task__/MaxReturn -6.60352 +MetaTest/__unnamed_task__/MinReturn -63.37 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.6252 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.512e+06 +__unnamed_task__/AverageDiscountedReturn -16.0132 +__unnamed_task__/AverageReturn -35.8379 +__unnamed_task__/Iteration 140 +__unnamed_task__/MaxReturn 4.46141 +__unnamed_task__/MinReturn -57.6464 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.6307 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 08:37:43 | [maml_trainer] epoch #141 | Sampling for adapation and meta-testing... +2025-04-03 08:39:16 | [maml_trainer] epoch #141 | Finished meta-testing... +2025-04-03 08:39:16 | [maml_trainer] epoch #141 | Saving snapshot... +2025-04-03 08:39:39 | [maml_trainer] epoch #141 | Saved +2025-04-03 08:39:39 | [maml_trainer] epoch #141 | Time 67601.25 s +2025-04-03 08:39:39 | [maml_trainer] epoch #141 | EpochTime 487.08 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3169 +Average/AverageReturn -35.3602 +Average/Iteration 141 +Average/MaxReturn 10.5572 +Average/MinReturn -62.3291 +Average/NumEpisodes 80 +Average/StdReturn 12.3903 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90038 +GaussianMLPPolicy/KLAfter 0.0044484 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.3991e-05 +GaussianMLPPolicy/LossBefore 1.10269e-09 +GaussianMLPPolicy/dLoss 6.39921e-05 +Iteration 141 +MetaTest/Average/AverageDiscountedReturn -36.52 +MetaTest/Average/AverageReturn -36.52 +MetaTest/Average/Iteration 141 +MetaTest/Average/MaxReturn -8.68231 +MetaTest/Average/MinReturn -56.0601 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.0774 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.52 +MetaTest/__unnamed_task__/AverageReturn -36.52 +MetaTest/__unnamed_task__/Iteration 141 +MetaTest/__unnamed_task__/MaxReturn -8.68231 +MetaTest/__unnamed_task__/MinReturn -56.0601 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.0774 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.544e+06 +__unnamed_task__/AverageDiscountedReturn -16.3169 +__unnamed_task__/AverageReturn -35.3602 +__unnamed_task__/Iteration 141 +__unnamed_task__/MaxReturn 10.5572 +__unnamed_task__/MinReturn -62.3291 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.3903 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 08:45:50 | [maml_trainer] epoch #142 | Sampling for adapation and meta-testing... +2025-04-03 08:47:22 | [maml_trainer] epoch #142 | Finished meta-testing... +2025-04-03 08:47:22 | [maml_trainer] epoch #142 | Saving snapshot... +2025-04-03 08:47:47 | [maml_trainer] epoch #142 | Saved +2025-04-03 08:47:47 | [maml_trainer] epoch #142 | Time 68089.27 s +2025-04-03 08:47:47 | [maml_trainer] epoch #142 | EpochTime 488.01 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5495 +Average/AverageReturn -36.1179 +Average/Iteration 142 +Average/MaxReturn 10.7995 +Average/MinReturn -67.9176 +Average/NumEpisodes 80 +Average/StdReturn 13.7419 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89977 +GaussianMLPPolicy/KLAfter 0.00439689 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000154303 +GaussianMLPPolicy/LossBefore -2.5332e-09 +GaussianMLPPolicy/dLoss 0.0001543 +Iteration 142 +MetaTest/Average/AverageDiscountedReturn -35.4908 +MetaTest/Average/AverageReturn -35.4908 +MetaTest/Average/Iteration 142 +MetaTest/Average/MaxReturn -3.1522 +MetaTest/Average/MinReturn -57.8064 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.4518 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.4908 +MetaTest/__unnamed_task__/AverageReturn -35.4908 +MetaTest/__unnamed_task__/Iteration 142 +MetaTest/__unnamed_task__/MaxReturn -3.1522 +MetaTest/__unnamed_task__/MinReturn -57.8064 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.4518 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.576e+06 +__unnamed_task__/AverageDiscountedReturn -16.5495 +__unnamed_task__/AverageReturn -36.1179 +__unnamed_task__/Iteration 142 +__unnamed_task__/MaxReturn 10.7995 +__unnamed_task__/MinReturn -67.9176 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7419 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 08:54:00 | [maml_trainer] epoch #143 | Sampling for adapation and meta-testing... +2025-04-03 08:55:32 | [maml_trainer] epoch #143 | Finished meta-testing... +2025-04-03 08:55:32 | [maml_trainer] epoch #143 | Saving snapshot... +2025-04-03 08:55:55 | [maml_trainer] epoch #143 | Saved +2025-04-03 08:55:55 | [maml_trainer] epoch #143 | Time 68577.18 s +2025-04-03 08:55:55 | [maml_trainer] epoch #143 | EpochTime 487.91 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.3314 +Average/AverageReturn -37.8039 +Average/Iteration 143 +Average/MaxReturn -11.4913 +Average/MinReturn -61.0823 +Average/NumEpisodes 80 +Average/StdReturn 8.79003 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89944 +GaussianMLPPolicy/KLAfter 0.00232859 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.64541e-05 +GaussianMLPPolicy/LossBefore -1.72853e-09 +GaussianMLPPolicy/dLoss 3.64524e-05 +Iteration 143 +MetaTest/Average/AverageDiscountedReturn -35.9802 +MetaTest/Average/AverageReturn -35.9802 +MetaTest/Average/Iteration 143 +MetaTest/Average/MaxReturn -25.4686 +MetaTest/Average/MinReturn -47.579 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.00391 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.9802 +MetaTest/__unnamed_task__/AverageReturn -35.9802 +MetaTest/__unnamed_task__/Iteration 143 +MetaTest/__unnamed_task__/MaxReturn -25.4686 +MetaTest/__unnamed_task__/MinReturn -47.579 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.00391 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.608e+06 +__unnamed_task__/AverageDiscountedReturn -17.3314 +__unnamed_task__/AverageReturn -37.8039 +__unnamed_task__/Iteration 143 +__unnamed_task__/MaxReturn -11.4913 +__unnamed_task__/MinReturn -61.0823 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.79003 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:01:55 | [maml_trainer] epoch #144 | Sampling for adapation and meta-testing... +2025-04-03 09:03:23 | [maml_trainer] epoch #144 | Finished meta-testing... +2025-04-03 09:03:23 | [maml_trainer] epoch #144 | Saving snapshot... +2025-04-03 09:03:45 | [maml_trainer] epoch #144 | Saved +2025-04-03 09:03:45 | [maml_trainer] epoch #144 | Time 69047.66 s +2025-04-03 09:03:45 | [maml_trainer] epoch #144 | EpochTime 470.48 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.6427 +Average/AverageReturn -34.682 +Average/Iteration 144 +Average/MaxReturn -1.51511 +Average/MinReturn -63.1737 +Average/NumEpisodes 80 +Average/StdReturn 12.3497 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9001 +GaussianMLPPolicy/KLAfter 0.00179559 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.03099e-05 +GaussianMLPPolicy/LossBefore 5.126e-09 +GaussianMLPPolicy/dLoss -3.03047e-05 +Iteration 144 +MetaTest/Average/AverageDiscountedReturn -35.3643 +MetaTest/Average/AverageReturn -35.3643 +MetaTest/Average/Iteration 144 +MetaTest/Average/MaxReturn -2.33499 +MetaTest/Average/MinReturn -56.148 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.0143 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.3643 +MetaTest/__unnamed_task__/AverageReturn -35.3643 +MetaTest/__unnamed_task__/Iteration 144 +MetaTest/__unnamed_task__/MaxReturn -2.33499 +MetaTest/__unnamed_task__/MinReturn -56.148 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.0143 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.64e+06 +__unnamed_task__/AverageDiscountedReturn -15.6427 +__unnamed_task__/AverageReturn -34.682 +__unnamed_task__/Iteration 144 +__unnamed_task__/MaxReturn -1.51511 +__unnamed_task__/MinReturn -63.1737 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.3497 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:09:33 | [maml_trainer] epoch #145 | Sampling for adapation and meta-testing... +2025-04-03 09:11:02 | [maml_trainer] epoch #145 | Finished meta-testing... +2025-04-03 09:11:02 | [maml_trainer] epoch #145 | Saving snapshot... +2025-04-03 09:11:26 | [maml_trainer] epoch #145 | Saved +2025-04-03 09:11:26 | [maml_trainer] epoch #145 | Time 69508.04 s +2025-04-03 09:11:26 | [maml_trainer] epoch #145 | EpochTime 460.38 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.0824 +Average/AverageReturn -40.3634 +Average/Iteration 145 +Average/MaxReturn -1.13605 +Average/MinReturn -64.1173 +Average/NumEpisodes 80 +Average/StdReturn 11.7278 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89991 +GaussianMLPPolicy/KLAfter 0.002573 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.76094e-05 +GaussianMLPPolicy/LossBefore 4.58956e-09 +GaussianMLPPolicy/dLoss -9.76048e-05 +Iteration 145 +MetaTest/Average/AverageDiscountedReturn -40.3031 +MetaTest/Average/AverageReturn -40.3031 +MetaTest/Average/Iteration 145 +MetaTest/Average/MaxReturn -21.8439 +MetaTest/Average/MinReturn -64.3193 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.2911 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.3031 +MetaTest/__unnamed_task__/AverageReturn -40.3031 +MetaTest/__unnamed_task__/Iteration 145 +MetaTest/__unnamed_task__/MaxReturn -21.8439 +MetaTest/__unnamed_task__/MinReturn -64.3193 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.2911 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.672e+06 +__unnamed_task__/AverageDiscountedReturn -18.0824 +__unnamed_task__/AverageReturn -40.3634 +__unnamed_task__/Iteration 145 +__unnamed_task__/MaxReturn -1.13605 +__unnamed_task__/MinReturn -64.1173 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.7278 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:17:17 | [maml_trainer] epoch #146 | Sampling for adapation and meta-testing... +2025-04-03 09:18:46 | [maml_trainer] epoch #146 | Finished meta-testing... +2025-04-03 09:18:46 | [maml_trainer] epoch #146 | Saving snapshot... +2025-04-03 09:19:08 | [maml_trainer] epoch #146 | Saved +2025-04-03 09:19:08 | [maml_trainer] epoch #146 | Time 69970.60 s +2025-04-03 09:19:08 | [maml_trainer] epoch #146 | EpochTime 462.55 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.0166 +Average/AverageReturn -39.7833 +Average/Iteration 146 +Average/MaxReturn -17.462 +Average/MinReturn -63.0808 +Average/NumEpisodes 80 +Average/StdReturn 10.1547 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89936 +GaussianMLPPolicy/KLAfter 0.00206088 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.58101e-05 +GaussianMLPPolicy/LossBefore -9.77516e-09 +GaussianMLPPolicy/dLoss -3.58199e-05 +Iteration 146 +MetaTest/Average/AverageDiscountedReturn -41.1556 +MetaTest/Average/AverageReturn -41.1556 +MetaTest/Average/Iteration 146 +MetaTest/Average/MaxReturn -15.583 +MetaTest/Average/MinReturn -60.1431 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.379 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.1556 +MetaTest/__unnamed_task__/AverageReturn -41.1556 +MetaTest/__unnamed_task__/Iteration 146 +MetaTest/__unnamed_task__/MaxReturn -15.583 +MetaTest/__unnamed_task__/MinReturn -60.1431 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.379 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.704e+06 +__unnamed_task__/AverageDiscountedReturn -18.0166 +__unnamed_task__/AverageReturn -39.7833 +__unnamed_task__/Iteration 146 +__unnamed_task__/MaxReturn -17.462 +__unnamed_task__/MinReturn -63.0808 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1547 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:25:06 | [maml_trainer] epoch #147 | Sampling for adapation and meta-testing... +2025-04-03 09:26:35 | [maml_trainer] epoch #147 | Finished meta-testing... +2025-04-03 09:26:35 | [maml_trainer] epoch #147 | Saving snapshot... +2025-04-03 09:26:56 | [maml_trainer] epoch #147 | Saved +2025-04-03 09:26:56 | [maml_trainer] epoch #147 | Time 70438.89 s +2025-04-03 09:26:56 | [maml_trainer] epoch #147 | EpochTime 468.29 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.3884 +Average/AverageReturn -38.0938 +Average/Iteration 147 +Average/MaxReturn 1.0344 +Average/MinReturn -71.9844 +Average/NumEpisodes 80 +Average/StdReturn 11.6206 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89934 +GaussianMLPPolicy/KLAfter 0.0025538 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.00012324 +GaussianMLPPolicy/LossBefore 1.05202e-08 +GaussianMLPPolicy/dLoss 0.00012325 +Iteration 147 +MetaTest/Average/AverageDiscountedReturn -38.7483 +MetaTest/Average/AverageReturn -38.7483 +MetaTest/Average/Iteration 147 +MetaTest/Average/MaxReturn -22.0747 +MetaTest/Average/MinReturn -55.1071 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.14639 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.7483 +MetaTest/__unnamed_task__/AverageReturn -38.7483 +MetaTest/__unnamed_task__/Iteration 147 +MetaTest/__unnamed_task__/MaxReturn -22.0747 +MetaTest/__unnamed_task__/MinReturn -55.1071 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.14639 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.736e+06 +__unnamed_task__/AverageDiscountedReturn -17.3884 +__unnamed_task__/AverageReturn -38.0938 +__unnamed_task__/Iteration 147 +__unnamed_task__/MaxReturn 1.0344 +__unnamed_task__/MinReturn -71.9844 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6206 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:32:48 | [maml_trainer] epoch #148 | Sampling for adapation and meta-testing... +2025-04-03 09:34:16 | [maml_trainer] epoch #148 | Finished meta-testing... +2025-04-03 09:34:16 | [maml_trainer] epoch #148 | Saving snapshot... +2025-04-03 09:34:40 | [maml_trainer] epoch #148 | Saved +2025-04-03 09:34:40 | [maml_trainer] epoch #148 | Time 70902.01 s +2025-04-03 09:34:40 | [maml_trainer] epoch #148 | EpochTime 463.12 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.3259 +Average/AverageReturn -41.42 +Average/Iteration 148 +Average/MaxReturn -21.5413 +Average/MinReturn -67.007 +Average/NumEpisodes 80 +Average/StdReturn 10.8273 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90004 +GaussianMLPPolicy/KLAfter 0.00257483 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.68828e-05 +GaussianMLPPolicy/LossBefore -2.26498e-09 +GaussianMLPPolicy/dLoss 8.68805e-05 +Iteration 148 +MetaTest/Average/AverageDiscountedReturn -39.9252 +MetaTest/Average/AverageReturn -39.9252 +MetaTest/Average/Iteration 148 +MetaTest/Average/MaxReturn 33.4344 +MetaTest/Average/MinReturn -63.4607 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.5669 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.9252 +MetaTest/__unnamed_task__/AverageReturn -39.9252 +MetaTest/__unnamed_task__/Iteration 148 +MetaTest/__unnamed_task__/MaxReturn 33.4344 +MetaTest/__unnamed_task__/MinReturn -63.4607 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.5669 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.768e+06 +__unnamed_task__/AverageDiscountedReturn -18.3259 +__unnamed_task__/AverageReturn -41.42 +__unnamed_task__/Iteration 148 +__unnamed_task__/MaxReturn -21.5413 +__unnamed_task__/MinReturn -67.007 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.8273 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:40:34 | [maml_trainer] epoch #149 | Sampling for adapation and meta-testing... +2025-04-03 09:42:02 | [maml_trainer] epoch #149 | Finished meta-testing... +2025-04-03 09:42:02 | [maml_trainer] epoch #149 | Saving snapshot... +2025-04-03 09:42:25 | [maml_trainer] epoch #149 | Saved +2025-04-03 09:42:25 | [maml_trainer] epoch #149 | Time 71367.09 s +2025-04-03 09:42:25 | [maml_trainer] epoch #149 | EpochTime 465.07 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8766 +Average/AverageReturn -38.1326 +Average/Iteration 149 +Average/MaxReturn 20.0403 +Average/MinReturn -75.9014 +Average/NumEpisodes 80 +Average/StdReturn 13.9556 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90061 +GaussianMLPPolicy/KLAfter 0.00301012 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.3587e-05 +GaussianMLPPolicy/LossBefore -1.78814e-08 +GaussianMLPPolicy/dLoss 2.35692e-05 +Iteration 149 +MetaTest/Average/AverageDiscountedReturn -38.7868 +MetaTest/Average/AverageReturn -38.7868 +MetaTest/Average/Iteration 149 +MetaTest/Average/MaxReturn -10.0285 +MetaTest/Average/MinReturn -67.8111 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.2 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.7868 +MetaTest/__unnamed_task__/AverageReturn -38.7868 +MetaTest/__unnamed_task__/Iteration 149 +MetaTest/__unnamed_task__/MaxReturn -10.0285 +MetaTest/__unnamed_task__/MinReturn -67.8111 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.2 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.8e+06 +__unnamed_task__/AverageDiscountedReturn -16.8766 +__unnamed_task__/AverageReturn -38.1326 +__unnamed_task__/Iteration 149 +__unnamed_task__/MaxReturn 20.0403 +__unnamed_task__/MinReturn -75.9014 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.9556 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:48:18 | [maml_trainer] epoch #150 | Sampling for adapation and meta-testing... +2025-04-03 09:49:47 | [maml_trainer] epoch #150 | Finished meta-testing... +2025-04-03 09:49:47 | [maml_trainer] epoch #150 | Saving snapshot... +2025-04-03 09:50:09 | [maml_trainer] epoch #150 | Saved +2025-04-03 09:50:09 | [maml_trainer] epoch #150 | Time 71831.81 s +2025-04-03 09:50:09 | [maml_trainer] epoch #150 | EpochTime 464.72 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9344 +Average/AverageReturn -36.6461 +Average/Iteration 150 +Average/MaxReturn 21.7414 +Average/MinReturn -65.8758 +Average/NumEpisodes 80 +Average/StdReturn 10.7575 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90144 +GaussianMLPPolicy/KLAfter 0.00367293 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000167696 +GaussianMLPPolicy/LossBefore -5.87106e-09 +GaussianMLPPolicy/dLoss 0.00016769 +Iteration 150 +MetaTest/Average/AverageDiscountedReturn -35.4433 +MetaTest/Average/AverageReturn -35.4433 +MetaTest/Average/Iteration 150 +MetaTest/Average/MaxReturn -21.3892 +MetaTest/Average/MinReturn -52.1148 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.4926 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.4433 +MetaTest/__unnamed_task__/AverageReturn -35.4433 +MetaTest/__unnamed_task__/Iteration 150 +MetaTest/__unnamed_task__/MaxReturn -21.3892 +MetaTest/__unnamed_task__/MinReturn -52.1148 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.4926 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.832e+06 +__unnamed_task__/AverageDiscountedReturn -16.9344 +__unnamed_task__/AverageReturn -36.6461 +__unnamed_task__/Iteration 150 +__unnamed_task__/MaxReturn 21.7414 +__unnamed_task__/MinReturn -65.8758 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.7575 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 09:56:21 | [maml_trainer] epoch #151 | Sampling for adapation and meta-testing... +2025-04-03 09:57:54 | [maml_trainer] epoch #151 | Finished meta-testing... +2025-04-03 09:57:54 | [maml_trainer] epoch #151 | Saving snapshot... +2025-04-03 09:58:18 | [maml_trainer] epoch #151 | Saved +2025-04-03 09:58:18 | [maml_trainer] epoch #151 | Time 72320.15 s +2025-04-03 09:58:18 | [maml_trainer] epoch #151 | EpochTime 488.33 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4997 +Average/AverageReturn -35.8601 +Average/Iteration 151 +Average/MaxReturn 11.542 +Average/MinReturn -66.6681 +Average/NumEpisodes 80 +Average/StdReturn 11.9502 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90188 +GaussianMLPPolicy/KLAfter 0.00521105 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000194951 +GaussianMLPPolicy/LossBefore 7.7188e-09 +GaussianMLPPolicy/dLoss 0.000194958 +Iteration 151 +MetaTest/Average/AverageDiscountedReturn -37.938 +MetaTest/Average/AverageReturn -37.938 +MetaTest/Average/Iteration 151 +MetaTest/Average/MaxReturn -22.3087 +MetaTest/Average/MinReturn -52.4583 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.0219 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.938 +MetaTest/__unnamed_task__/AverageReturn -37.938 +MetaTest/__unnamed_task__/Iteration 151 +MetaTest/__unnamed_task__/MaxReturn -22.3087 +MetaTest/__unnamed_task__/MinReturn -52.4583 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.0219 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.864e+06 +__unnamed_task__/AverageDiscountedReturn -16.4997 +__unnamed_task__/AverageReturn -35.8601 +__unnamed_task__/Iteration 151 +__unnamed_task__/MaxReturn 11.542 +__unnamed_task__/MinReturn -66.6681 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.9502 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 10:04:27 | [maml_trainer] epoch #152 | Sampling for adapation and meta-testing... +2025-04-03 10:05:59 | [maml_trainer] epoch #152 | Finished meta-testing... +2025-04-03 10:05:59 | [maml_trainer] epoch #152 | Saving snapshot... +2025-04-03 10:06:23 | [maml_trainer] epoch #152 | Saved +2025-04-03 10:06:23 | [maml_trainer] epoch #152 | Time 72805.52 s +2025-04-03 10:06:23 | [maml_trainer] epoch #152 | EpochTime 485.37 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4763 +Average/AverageReturn -36.1078 +Average/Iteration 152 +Average/MaxReturn 10.6879 +Average/MinReturn -71.2778 +Average/NumEpisodes 80 +Average/StdReturn 12.9743 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90213 +GaussianMLPPolicy/KLAfter 0.00439413 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.37371e-05 +GaussianMLPPolicy/LossBefore 2.563e-09 +GaussianMLPPolicy/dLoss 7.37397e-05 +Iteration 152 +MetaTest/Average/AverageDiscountedReturn -34.7759 +MetaTest/Average/AverageReturn -34.7759 +MetaTest/Average/Iteration 152 +MetaTest/Average/MaxReturn -4.52053 +MetaTest/Average/MinReturn -53.8905 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.6128 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.7759 +MetaTest/__unnamed_task__/AverageReturn -34.7759 +MetaTest/__unnamed_task__/Iteration 152 +MetaTest/__unnamed_task__/MaxReturn -4.52053 +MetaTest/__unnamed_task__/MinReturn -53.8905 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.6128 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.896e+06 +__unnamed_task__/AverageDiscountedReturn -16.4763 +__unnamed_task__/AverageReturn -36.1078 +__unnamed_task__/Iteration 152 +__unnamed_task__/MaxReturn 10.6879 +__unnamed_task__/MinReturn -71.2778 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.9743 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 10:12:36 | [maml_trainer] epoch #153 | Sampling for adapation and meta-testing... +2025-04-03 10:14:10 | [maml_trainer] epoch #153 | Finished meta-testing... +2025-04-03 10:14:10 | [maml_trainer] epoch #153 | Saving snapshot... +2025-04-03 10:14:32 | [maml_trainer] epoch #153 | Saved +2025-04-03 10:14:32 | [maml_trainer] epoch #153 | Time 73294.54 s +2025-04-03 10:14:32 | [maml_trainer] epoch #153 | EpochTime 489.03 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3986 +Average/AverageReturn -35.8217 +Average/Iteration 153 +Average/MaxReturn -14.6199 +Average/MinReturn -54.6383 +Average/NumEpisodes 80 +Average/StdReturn 9.0894 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90332 +GaussianMLPPolicy/KLAfter 0.00502378 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000191094 +GaussianMLPPolicy/LossBefore 6.79493e-09 +GaussianMLPPolicy/dLoss -0.000191087 +Iteration 153 +MetaTest/Average/AverageDiscountedReturn -34.5437 +MetaTest/Average/AverageReturn -34.5437 +MetaTest/Average/Iteration 153 +MetaTest/Average/MaxReturn -13.2537 +MetaTest/Average/MinReturn -70.9012 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.4941 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.5437 +MetaTest/__unnamed_task__/AverageReturn -34.5437 +MetaTest/__unnamed_task__/Iteration 153 +MetaTest/__unnamed_task__/MaxReturn -13.2537 +MetaTest/__unnamed_task__/MinReturn -70.9012 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.4941 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.928e+06 +__unnamed_task__/AverageDiscountedReturn -16.3986 +__unnamed_task__/AverageReturn -35.8217 +__unnamed_task__/Iteration 153 +__unnamed_task__/MaxReturn -14.6199 +__unnamed_task__/MinReturn -54.6383 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.0894 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 10:20:40 | [maml_trainer] epoch #154 | Sampling for adapation and meta-testing... +2025-04-03 10:22:12 | [maml_trainer] epoch #154 | Finished meta-testing... +2025-04-03 10:22:12 | [maml_trainer] epoch #154 | Saving snapshot... +2025-04-03 10:22:37 | [maml_trainer] epoch #154 | Saved +2025-04-03 10:22:37 | [maml_trainer] epoch #154 | Time 73779.23 s +2025-04-03 10:22:37 | [maml_trainer] epoch #154 | EpochTime 484.68 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.031 +Average/AverageReturn -37.8579 +Average/Iteration 154 +Average/MaxReturn -12.3388 +Average/MinReturn -71.7846 +Average/NumEpisodes 80 +Average/StdReturn 11.1393 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90574 +GaussianMLPPolicy/KLAfter 0.00486539 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.00022507 +GaussianMLPPolicy/LossBefore 1.96695e-09 +GaussianMLPPolicy/dLoss -0.000225068 +Iteration 154 +MetaTest/Average/AverageDiscountedReturn -36.0344 +MetaTest/Average/AverageReturn -36.0344 +MetaTest/Average/Iteration 154 +MetaTest/Average/MaxReturn -23.4687 +MetaTest/Average/MinReturn -52.2 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.2918 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.0344 +MetaTest/__unnamed_task__/AverageReturn -36.0344 +MetaTest/__unnamed_task__/Iteration 154 +MetaTest/__unnamed_task__/MaxReturn -23.4687 +MetaTest/__unnamed_task__/MinReturn -52.2 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.2918 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.96e+06 +__unnamed_task__/AverageDiscountedReturn -17.031 +__unnamed_task__/AverageReturn -37.8579 +__unnamed_task__/Iteration 154 +__unnamed_task__/MaxReturn -12.3388 +__unnamed_task__/MinReturn -71.7846 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.1393 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 10:28:48 | [maml_trainer] epoch #155 | Sampling for adapation and meta-testing... +2025-04-03 10:30:21 | [maml_trainer] epoch #155 | Finished meta-testing... +2025-04-03 10:30:21 | [maml_trainer] epoch #155 | Saving snapshot... +2025-04-03 10:30:45 | [maml_trainer] epoch #155 | Saved +2025-04-03 10:30:45 | [maml_trainer] epoch #155 | Time 74267.25 s +2025-04-03 10:30:45 | [maml_trainer] epoch #155 | EpochTime 488.01 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.0264 +Average/AverageReturn -38.4375 +Average/Iteration 155 +Average/MaxReturn -0.770585 +Average/MinReturn -74.8192 +Average/NumEpisodes 80 +Average/StdReturn 11.6392 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90858 +GaussianMLPPolicy/KLAfter 0.00423401 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000186862 +GaussianMLPPolicy/LossBefore -6.61612e-09 +GaussianMLPPolicy/dLoss -0.000186868 +Iteration 155 +MetaTest/Average/AverageDiscountedReturn -41.0456 +MetaTest/Average/AverageReturn -41.0456 +MetaTest/Average/Iteration 155 +MetaTest/Average/MaxReturn -25.5137 +MetaTest/Average/MinReturn -63.7262 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.1715 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.0456 +MetaTest/__unnamed_task__/AverageReturn -41.0456 +MetaTest/__unnamed_task__/Iteration 155 +MetaTest/__unnamed_task__/MaxReturn -25.5137 +MetaTest/__unnamed_task__/MinReturn -63.7262 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.1715 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.992e+06 +__unnamed_task__/AverageDiscountedReturn -17.0264 +__unnamed_task__/AverageReturn -38.4375 +__unnamed_task__/Iteration 155 +__unnamed_task__/MaxReturn -0.770585 +__unnamed_task__/MinReturn -74.8192 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6392 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 10:36:56 | [maml_trainer] epoch #156 | Sampling for adapation and meta-testing... +2025-04-03 10:38:28 | [maml_trainer] epoch #156 | Finished meta-testing... +2025-04-03 10:38:28 | [maml_trainer] epoch #156 | Saving snapshot... +2025-04-03 10:38:51 | [maml_trainer] epoch #156 | Saved +2025-04-03 10:38:51 | [maml_trainer] epoch #156 | Time 74753.32 s +2025-04-03 10:38:51 | [maml_trainer] epoch #156 | EpochTime 486.06 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.8308 +Average/AverageReturn -36.8811 +Average/Iteration 156 +Average/MaxReturn 26.0513 +Average/MinReturn -67.5703 +Average/NumEpisodes 80 +Average/StdReturn 14.1322 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91021 +GaussianMLPPolicy/KLAfter 0.00288956 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.92154e-05 +GaussianMLPPolicy/LossBefore -5.66244e-10 +GaussianMLPPolicy/dLoss -7.9216e-05 +Iteration 156 +MetaTest/Average/AverageDiscountedReturn -41.0948 +MetaTest/Average/AverageReturn -41.0948 +MetaTest/Average/Iteration 156 +MetaTest/Average/MaxReturn -19.8492 +MetaTest/Average/MinReturn -64.2535 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.333 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.0948 +MetaTest/__unnamed_task__/AverageReturn -41.0948 +MetaTest/__unnamed_task__/Iteration 156 +MetaTest/__unnamed_task__/MaxReturn -19.8492 +MetaTest/__unnamed_task__/MinReturn -64.2535 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.333 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.024e+06 +__unnamed_task__/AverageDiscountedReturn -15.8308 +__unnamed_task__/AverageReturn -36.8811 +__unnamed_task__/Iteration 156 +__unnamed_task__/MaxReturn 26.0513 +__unnamed_task__/MinReturn -67.5703 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.1322 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 10:45:00 | [maml_trainer] epoch #157 | Sampling for adapation and meta-testing... +2025-04-03 10:46:34 | [maml_trainer] epoch #157 | Finished meta-testing... +2025-04-03 10:46:34 | [maml_trainer] epoch #157 | Saving snapshot... +2025-04-03 10:46:58 | [maml_trainer] epoch #157 | Saved +2025-04-03 10:46:58 | [maml_trainer] epoch #157 | Time 75240.04 s +2025-04-03 10:46:58 | [maml_trainer] epoch #157 | EpochTime 486.72 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.7353 +Average/AverageReturn -41.5705 +Average/Iteration 157 +Average/MaxReturn 20.1461 +Average/MinReturn -78.3593 +Average/NumEpisodes 80 +Average/StdReturn 16.1781 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9115 +GaussianMLPPolicy/KLAfter 0.000923282 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.29448e-06 +GaussianMLPPolicy/LossBefore -2.95043e-09 +GaussianMLPPolicy/dLoss 9.29153e-06 +Iteration 157 +MetaTest/Average/AverageDiscountedReturn -47.1381 +MetaTest/Average/AverageReturn -47.1381 +MetaTest/Average/Iteration 157 +MetaTest/Average/MaxReturn -23.5102 +MetaTest/Average/MinReturn -68.0615 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.0638 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -47.1381 +MetaTest/__unnamed_task__/AverageReturn -47.1381 +MetaTest/__unnamed_task__/Iteration 157 +MetaTest/__unnamed_task__/MaxReturn -23.5102 +MetaTest/__unnamed_task__/MinReturn -68.0615 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.0638 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.056e+06 +__unnamed_task__/AverageDiscountedReturn -17.7353 +__unnamed_task__/AverageReturn -41.5705 +__unnamed_task__/Iteration 157 +__unnamed_task__/MaxReturn 20.1461 +__unnamed_task__/MinReturn -78.3593 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.1781 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 10:53:05 | [maml_trainer] epoch #158 | Sampling for adapation and meta-testing... +2025-04-03 10:54:38 | [maml_trainer] epoch #158 | Finished meta-testing... +2025-04-03 10:54:38 | [maml_trainer] epoch #158 | Saving snapshot... +2025-04-03 10:55:02 | [maml_trainer] epoch #158 | Saved +2025-04-03 10:55:02 | [maml_trainer] epoch #158 | Time 75724.45 s +2025-04-03 10:55:02 | [maml_trainer] epoch #158 | EpochTime 484.41 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.5372 +Average/AverageReturn -44.5114 +Average/Iteration 158 +Average/MaxReturn -11.9803 +Average/MinReturn -93.2434 +Average/NumEpisodes 80 +Average/StdReturn 14.206 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91294 +GaussianMLPPolicy/KLAfter 0.00143092 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.05872e-05 +GaussianMLPPolicy/LossBefore 1.02818e-08 +GaussianMLPPolicy/dLoss 5.05975e-05 +Iteration 158 +MetaTest/Average/AverageDiscountedReturn -40.9302 +MetaTest/Average/AverageReturn -40.9302 +MetaTest/Average/Iteration 158 +MetaTest/Average/MaxReturn -19.3336 +MetaTest/Average/MinReturn -63.1761 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.3966 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.9302 +MetaTest/__unnamed_task__/AverageReturn -40.9302 +MetaTest/__unnamed_task__/Iteration 158 +MetaTest/__unnamed_task__/MaxReturn -19.3336 +MetaTest/__unnamed_task__/MinReturn -63.1761 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.3966 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.088e+06 +__unnamed_task__/AverageDiscountedReturn -18.5372 +__unnamed_task__/AverageReturn -44.5114 +__unnamed_task__/Iteration 158 +__unnamed_task__/MaxReturn -11.9803 +__unnamed_task__/MinReturn -93.2434 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.206 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:01:14 | [maml_trainer] epoch #159 | Sampling for adapation and meta-testing... +2025-04-03 11:02:45 | [maml_trainer] epoch #159 | Finished meta-testing... +2025-04-03 11:02:45 | [maml_trainer] epoch #159 | Saving snapshot... +2025-04-03 11:03:08 | [maml_trainer] epoch #159 | Saved +2025-04-03 11:03:08 | [maml_trainer] epoch #159 | Time 76210.28 s +2025-04-03 11:03:08 | [maml_trainer] epoch #159 | EpochTime 485.82 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.9547 +Average/AverageReturn -41.3284 +Average/Iteration 159 +Average/MaxReturn 9.7705 +Average/MinReturn -79.9277 +Average/NumEpisodes 80 +Average/StdReturn 15.839 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91414 +GaussianMLPPolicy/KLAfter 0.00175995 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.65166e-05 +GaussianMLPPolicy/LossBefore 3.42727e-09 +GaussianMLPPolicy/dLoss 8.652e-05 +Iteration 159 +MetaTest/Average/AverageDiscountedReturn -43.4811 +MetaTest/Average/AverageReturn -43.4811 +MetaTest/Average/Iteration 159 +MetaTest/Average/MaxReturn -22.4935 +MetaTest/Average/MinReturn -69.4047 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.0282 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -43.4811 +MetaTest/__unnamed_task__/AverageReturn -43.4811 +MetaTest/__unnamed_task__/Iteration 159 +MetaTest/__unnamed_task__/MaxReturn -22.4935 +MetaTest/__unnamed_task__/MinReturn -69.4047 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.0282 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.12e+06 +__unnamed_task__/AverageDiscountedReturn -17.9547 +__unnamed_task__/AverageReturn -41.3284 +__unnamed_task__/Iteration 159 +__unnamed_task__/MaxReturn 9.7705 +__unnamed_task__/MinReturn -79.9277 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.839 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:09:22 | [maml_trainer] epoch #160 | Sampling for adapation and meta-testing... +2025-04-03 11:10:53 | [maml_trainer] epoch #160 | Finished meta-testing... +2025-04-03 11:10:53 | [maml_trainer] epoch #160 | Saving snapshot... +2025-04-03 11:11:18 | [maml_trainer] epoch #160 | Saved +2025-04-03 11:11:18 | [maml_trainer] epoch #160 | Time 76700.15 s +2025-04-03 11:11:18 | [maml_trainer] epoch #160 | EpochTime 489.87 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.2088 +Average/AverageReturn -38.9559 +Average/Iteration 160 +Average/MaxReturn 2.22139 +Average/MinReturn -67.7909 +Average/NumEpisodes 80 +Average/StdReturn 14.4896 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91662 +GaussianMLPPolicy/KLAfter 0.00329621 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000188303 +GaussianMLPPolicy/LossBefore -4.35114e-09 +GaussianMLPPolicy/dLoss 0.000188299 +Iteration 160 +MetaTest/Average/AverageDiscountedReturn -40.8347 +MetaTest/Average/AverageReturn -40.8347 +MetaTest/Average/Iteration 160 +MetaTest/Average/MaxReturn -20.0823 +MetaTest/Average/MinReturn -54.287 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.74396 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.8347 +MetaTest/__unnamed_task__/AverageReturn -40.8347 +MetaTest/__unnamed_task__/Iteration 160 +MetaTest/__unnamed_task__/MaxReturn -20.0823 +MetaTest/__unnamed_task__/MinReturn -54.287 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.74396 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.152e+06 +__unnamed_task__/AverageDiscountedReturn -17.2088 +__unnamed_task__/AverageReturn -38.9559 +__unnamed_task__/Iteration 160 +__unnamed_task__/MaxReturn 2.22139 +__unnamed_task__/MinReturn -67.7909 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.4896 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:17:28 | [maml_trainer] epoch #161 | Sampling for adapation and meta-testing... +2025-04-03 11:19:01 | [maml_trainer] epoch #161 | Finished meta-testing... +2025-04-03 11:19:01 | [maml_trainer] epoch #161 | Saving snapshot... +2025-04-03 11:19:24 | [maml_trainer] epoch #161 | Saved +2025-04-03 11:19:24 | [maml_trainer] epoch #161 | Time 77186.96 s +2025-04-03 11:19:24 | [maml_trainer] epoch #161 | EpochTime 486.81 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.4051 +Average/AverageReturn -39.1382 +Average/Iteration 161 +Average/MaxReturn -2.47376 +Average/MinReturn -76.753 +Average/NumEpisodes 80 +Average/StdReturn 13.7184 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9196 +GaussianMLPPolicy/KLAfter 0.00582592 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.26951e-05 +GaussianMLPPolicy/LossBefore 2.77162e-09 +GaussianMLPPolicy/dLoss 1.26979e-05 +Iteration 161 +MetaTest/Average/AverageDiscountedReturn -35.2247 +MetaTest/Average/AverageReturn -35.2247 +MetaTest/Average/Iteration 161 +MetaTest/Average/MaxReturn -8.31652 +MetaTest/Average/MinReturn -53.4364 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.0387 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.2247 +MetaTest/__unnamed_task__/AverageReturn -35.2247 +MetaTest/__unnamed_task__/Iteration 161 +MetaTest/__unnamed_task__/MaxReturn -8.31652 +MetaTest/__unnamed_task__/MinReturn -53.4364 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.0387 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.184e+06 +__unnamed_task__/AverageDiscountedReturn -17.4051 +__unnamed_task__/AverageReturn -39.1382 +__unnamed_task__/Iteration 161 +__unnamed_task__/MaxReturn -2.47376 +__unnamed_task__/MinReturn -76.753 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7184 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:25:35 | [maml_trainer] epoch #162 | Sampling for adapation and meta-testing... +2025-04-03 11:27:07 | [maml_trainer] epoch #162 | Finished meta-testing... +2025-04-03 11:27:07 | [maml_trainer] epoch #162 | Saving snapshot... +2025-04-03 11:27:30 | [maml_trainer] epoch #162 | Saved +2025-04-03 11:27:30 | [maml_trainer] epoch #162 | Time 77672.08 s +2025-04-03 11:27:30 | [maml_trainer] epoch #162 | EpochTime 485.11 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.2382 +Average/AverageReturn -37.9348 +Average/Iteration 162 +Average/MaxReturn -6.36313 +Average/MinReturn -69.1663 +Average/NumEpisodes 80 +Average/StdReturn 10.9819 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92182 +GaussianMLPPolicy/KLAfter 0.00809483 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000334566 +GaussianMLPPolicy/LossBefore 7.15255e-10 +GaussianMLPPolicy/dLoss -0.000334566 +Iteration 162 +MetaTest/Average/AverageDiscountedReturn -38.1668 +MetaTest/Average/AverageReturn -38.1668 +MetaTest/Average/Iteration 162 +MetaTest/Average/MaxReturn -19.1512 +MetaTest/Average/MinReturn -62.6432 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.0537 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.1668 +MetaTest/__unnamed_task__/AverageReturn -38.1668 +MetaTest/__unnamed_task__/Iteration 162 +MetaTest/__unnamed_task__/MaxReturn -19.1512 +MetaTest/__unnamed_task__/MinReturn -62.6432 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.0537 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.216e+06 +__unnamed_task__/AverageDiscountedReturn -17.2382 +__unnamed_task__/AverageReturn -37.9348 +__unnamed_task__/Iteration 162 +__unnamed_task__/MaxReturn -6.36313 +__unnamed_task__/MinReturn -69.1663 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.9819 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:33:39 | [maml_trainer] epoch #163 | Sampling for adapation and meta-testing... +2025-04-03 11:35:13 | [maml_trainer] epoch #163 | Finished meta-testing... +2025-04-03 11:35:13 | [maml_trainer] epoch #163 | Saving snapshot... +2025-04-03 11:35:35 | [maml_trainer] epoch #163 | Saved +2025-04-03 11:35:35 | [maml_trainer] epoch #163 | Time 78157.86 s +2025-04-03 11:35:35 | [maml_trainer] epoch #163 | EpochTime 485.78 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5026 +Average/AverageReturn -37.8803 +Average/Iteration 163 +Average/MaxReturn -0.49756 +Average/MinReturn -70.2524 +Average/NumEpisodes 80 +Average/StdReturn 11.9979 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92372 +GaussianMLPPolicy/KLAfter 0.0074678 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000217917 +GaussianMLPPolicy/LossBefore 1.0252e-08 +GaussianMLPPolicy/dLoss -0.000217906 +Iteration 163 +MetaTest/Average/AverageDiscountedReturn -38.6711 +MetaTest/Average/AverageReturn -38.6711 +MetaTest/Average/Iteration 163 +MetaTest/Average/MaxReturn -27.6689 +MetaTest/Average/MinReturn -53.0731 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.20436 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.6711 +MetaTest/__unnamed_task__/AverageReturn -38.6711 +MetaTest/__unnamed_task__/Iteration 163 +MetaTest/__unnamed_task__/MaxReturn -27.6689 +MetaTest/__unnamed_task__/MinReturn -53.0731 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.20436 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.248e+06 +__unnamed_task__/AverageDiscountedReturn -17.5026 +__unnamed_task__/AverageReturn -37.8803 +__unnamed_task__/Iteration 163 +__unnamed_task__/MaxReturn -0.49756 +__unnamed_task__/MinReturn -70.2524 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.9979 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:41:42 | [maml_trainer] epoch #164 | Sampling for adapation and meta-testing... +2025-04-03 11:43:13 | [maml_trainer] epoch #164 | Finished meta-testing... +2025-04-03 11:43:13 | [maml_trainer] epoch #164 | Saving snapshot... +2025-04-03 11:43:37 | [maml_trainer] epoch #164 | Saved +2025-04-03 11:43:37 | [maml_trainer] epoch #164 | Time 78639.79 s +2025-04-03 11:43:37 | [maml_trainer] epoch #164 | EpochTime 481.92 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.4508 +Average/AverageReturn -38.0253 +Average/Iteration 164 +Average/MaxReturn -1.41725 +Average/MinReturn -63.4065 +Average/NumEpisodes 80 +Average/StdReturn 10.2358 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9252 +GaussianMLPPolicy/KLAfter 0.00434748 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000101546 +GaussianMLPPolicy/LossBefore -1.54972e-09 +GaussianMLPPolicy/dLoss -0.000101547 +Iteration 164 +MetaTest/Average/AverageDiscountedReturn -39.5852 +MetaTest/Average/AverageReturn -39.5852 +MetaTest/Average/Iteration 164 +MetaTest/Average/MaxReturn -19.8316 +MetaTest/Average/MinReturn -64.2703 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.2958 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.5852 +MetaTest/__unnamed_task__/AverageReturn -39.5852 +MetaTest/__unnamed_task__/Iteration 164 +MetaTest/__unnamed_task__/MaxReturn -19.8316 +MetaTest/__unnamed_task__/MinReturn -64.2703 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.2958 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.28e+06 +__unnamed_task__/AverageDiscountedReturn -17.4508 +__unnamed_task__/AverageReturn -38.0253 +__unnamed_task__/Iteration 164 +__unnamed_task__/MaxReturn -1.41725 +__unnamed_task__/MinReturn -63.4065 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2358 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:49:44 | [maml_trainer] epoch #165 | Sampling for adapation and meta-testing... +2025-04-03 11:51:18 | [maml_trainer] epoch #165 | Finished meta-testing... +2025-04-03 11:51:18 | [maml_trainer] epoch #165 | Saving snapshot... +2025-04-03 11:51:42 | [maml_trainer] epoch #165 | Saved +2025-04-03 11:51:42 | [maml_trainer] epoch #165 | Time 79124.36 s +2025-04-03 11:51:42 | [maml_trainer] epoch #165 | EpochTime 484.58 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.9274 +Average/AverageReturn -41.4508 +Average/Iteration 165 +Average/MaxReturn -18.1973 +Average/MinReturn -63.3739 +Average/NumEpisodes 80 +Average/StdReturn 11.0073 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92644 +GaussianMLPPolicy/KLAfter 0.00398362 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000165316 +GaussianMLPPolicy/LossBefore -1.53184e-08 +GaussianMLPPolicy/dLoss 0.000165301 +Iteration 165 +MetaTest/Average/AverageDiscountedReturn -37.6151 +MetaTest/Average/AverageReturn -37.6151 +MetaTest/Average/Iteration 165 +MetaTest/Average/MaxReturn -19.5734 +MetaTest/Average/MinReturn -56.0731 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.11992 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.6151 +MetaTest/__unnamed_task__/AverageReturn -37.6151 +MetaTest/__unnamed_task__/Iteration 165 +MetaTest/__unnamed_task__/MaxReturn -19.5734 +MetaTest/__unnamed_task__/MinReturn -56.0731 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.11992 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.312e+06 +__unnamed_task__/AverageDiscountedReturn -18.9274 +__unnamed_task__/AverageReturn -41.4508 +__unnamed_task__/Iteration 165 +__unnamed_task__/MaxReturn -18.1973 +__unnamed_task__/MinReturn -63.3739 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0073 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 11:57:56 | [maml_trainer] epoch #166 | Sampling for adapation and meta-testing... +2025-04-03 11:59:27 | [maml_trainer] epoch #166 | Finished meta-testing... +2025-04-03 11:59:27 | [maml_trainer] epoch #166 | Saving snapshot... +2025-04-03 11:59:50 | [maml_trainer] epoch #166 | Saved +2025-04-03 11:59:50 | [maml_trainer] epoch #166 | Time 79612.60 s +2025-04-03 11:59:50 | [maml_trainer] epoch #166 | EpochTime 488.24 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.3766 +Average/AverageReturn -40.2817 +Average/Iteration 166 +Average/MaxReturn -22.2832 +Average/MinReturn -69.1998 +Average/NumEpisodes 80 +Average/StdReturn 11.0683 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92881 +GaussianMLPPolicy/KLAfter 0.00498944 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.03549e-05 +GaussianMLPPolicy/LossBefore 1.40667e-08 +GaussianMLPPolicy/dLoss -1.03408e-05 +Iteration 166 +MetaTest/Average/AverageDiscountedReturn -36.7504 +MetaTest/Average/AverageReturn -36.7504 +MetaTest/Average/Iteration 166 +MetaTest/Average/MaxReturn -15.5731 +MetaTest/Average/MinReturn -62.9034 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.7647 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.7504 +MetaTest/__unnamed_task__/AverageReturn -36.7504 +MetaTest/__unnamed_task__/Iteration 166 +MetaTest/__unnamed_task__/MaxReturn -15.5731 +MetaTest/__unnamed_task__/MinReturn -62.9034 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.7647 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.344e+06 +__unnamed_task__/AverageDiscountedReturn -18.3766 +__unnamed_task__/AverageReturn -40.2817 +__unnamed_task__/Iteration 166 +__unnamed_task__/MaxReturn -22.2832 +__unnamed_task__/MinReturn -69.1998 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0683 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 12:05:57 | [maml_trainer] epoch #167 | Sampling for adapation and meta-testing... +2025-04-03 12:07:28 | [maml_trainer] epoch #167 | Finished meta-testing... +2025-04-03 12:07:28 | [maml_trainer] epoch #167 | Saving snapshot... +2025-04-03 12:07:53 | [maml_trainer] epoch #167 | Saved +2025-04-03 12:07:53 | [maml_trainer] epoch #167 | Time 80095.27 s +2025-04-03 12:07:53 | [maml_trainer] epoch #167 | EpochTime 482.66 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.7825 +Average/AverageReturn -37.7212 +Average/Iteration 167 +Average/MaxReturn -13.5621 +Average/MinReturn -67.3516 +Average/NumEpisodes 80 +Average/StdReturn 11.2073 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93182 +GaussianMLPPolicy/KLAfter 0.00720842 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.86845e-05 +GaussianMLPPolicy/LossBefore -6.55651e-10 +GaussianMLPPolicy/dLoss 9.86839e-05 +Iteration 167 +MetaTest/Average/AverageDiscountedReturn -32.9755 +MetaTest/Average/AverageReturn -32.9755 +MetaTest/Average/Iteration 167 +MetaTest/Average/MaxReturn -3.35965 +MetaTest/Average/MinReturn -47.9338 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.88932 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.9755 +MetaTest/__unnamed_task__/AverageReturn -32.9755 +MetaTest/__unnamed_task__/Iteration 167 +MetaTest/__unnamed_task__/MaxReturn -3.35965 +MetaTest/__unnamed_task__/MinReturn -47.9338 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.88932 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.376e+06 +__unnamed_task__/AverageDiscountedReturn -17.7825 +__unnamed_task__/AverageReturn -37.7212 +__unnamed_task__/Iteration 167 +__unnamed_task__/MaxReturn -13.5621 +__unnamed_task__/MinReturn -67.3516 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.2073 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 12:14:05 | [maml_trainer] epoch #168 | Sampling for adapation and meta-testing... +2025-04-03 12:15:37 | [maml_trainer] epoch #168 | Finished meta-testing... +2025-04-03 12:15:37 | [maml_trainer] epoch #168 | Saving snapshot... +2025-04-03 12:16:00 | [maml_trainer] epoch #168 | Saved +2025-04-03 12:16:00 | [maml_trainer] epoch #168 | Time 80582.74 s +2025-04-03 12:16:00 | [maml_trainer] epoch #168 | EpochTime 487.46 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7216 +Average/AverageReturn -35.8118 +Average/Iteration 168 +Average/MaxReturn 4.20373 +Average/MinReturn -55.5999 +Average/NumEpisodes 80 +Average/StdReturn 11.0962 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93377 +GaussianMLPPolicy/KLAfter 0.00950155 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000235806 +GaussianMLPPolicy/LossBefore 1.43051e-09 +GaussianMLPPolicy/dLoss 0.000235807 +Iteration 168 +MetaTest/Average/AverageDiscountedReturn -38.9756 +MetaTest/Average/AverageReturn -38.9756 +MetaTest/Average/Iteration 168 +MetaTest/Average/MaxReturn -26.0064 +MetaTest/Average/MinReturn -56.9663 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.82099 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.9756 +MetaTest/__unnamed_task__/AverageReturn -38.9756 +MetaTest/__unnamed_task__/Iteration 168 +MetaTest/__unnamed_task__/MaxReturn -26.0064 +MetaTest/__unnamed_task__/MinReturn -56.9663 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.82099 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.408e+06 +__unnamed_task__/AverageDiscountedReturn -16.7216 +__unnamed_task__/AverageReturn -35.8118 +__unnamed_task__/Iteration 168 +__unnamed_task__/MaxReturn 4.20373 +__unnamed_task__/MinReturn -55.5999 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0962 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 12:22:09 | [maml_trainer] epoch #169 | Sampling for adapation and meta-testing... +2025-04-03 12:23:42 | [maml_trainer] epoch #169 | Finished meta-testing... +2025-04-03 12:23:42 | [maml_trainer] epoch #169 | Saving snapshot... +2025-04-03 12:24:05 | [maml_trainer] epoch #169 | Saved +2025-04-03 12:24:05 | [maml_trainer] epoch #169 | Time 81067.55 s +2025-04-03 12:24:05 | [maml_trainer] epoch #169 | EpochTime 484.81 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8135 +Average/AverageReturn -34.864 +Average/Iteration 169 +Average/MaxReturn 3.96113 +Average/MinReturn -56.588 +Average/NumEpisodes 80 +Average/StdReturn 10.9303 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93647 +GaussianMLPPolicy/KLAfter 0.0127655 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.64948e-05 +GaussianMLPPolicy/LossBefore 3.75509e-09 +GaussianMLPPolicy/dLoss 5.64986e-05 +Iteration 169 +MetaTest/Average/AverageDiscountedReturn -35.6844 +MetaTest/Average/AverageReturn -35.6844 +MetaTest/Average/Iteration 169 +MetaTest/Average/MaxReturn -23.6645 +MetaTest/Average/MinReturn -54.3199 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.49165 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.6844 +MetaTest/__unnamed_task__/AverageReturn -35.6844 +MetaTest/__unnamed_task__/Iteration 169 +MetaTest/__unnamed_task__/MaxReturn -23.6645 +MetaTest/__unnamed_task__/MinReturn -54.3199 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.49165 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.44e+06 +__unnamed_task__/AverageDiscountedReturn -16.8135 +__unnamed_task__/AverageReturn -34.864 +__unnamed_task__/Iteration 169 +__unnamed_task__/MaxReturn 3.96113 +__unnamed_task__/MinReturn -56.588 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.9303 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 12:30:18 | [maml_trainer] epoch #170 | Sampling for adapation and meta-testing... +2025-04-03 12:31:50 | [maml_trainer] epoch #170 | Finished meta-testing... +2025-04-03 12:31:50 | [maml_trainer] epoch #170 | Saving snapshot... +2025-04-03 12:32:14 | [maml_trainer] epoch #170 | Saved +2025-04-03 12:32:14 | [maml_trainer] epoch #170 | Time 81556.27 s +2025-04-03 12:32:14 | [maml_trainer] epoch #170 | EpochTime 488.72 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2696 +Average/AverageReturn -33.725 +Average/Iteration 170 +Average/MaxReturn 8.78846 +Average/MinReturn -60.6232 +Average/NumEpisodes 80 +Average/StdReturn 13.2126 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93923 +GaussianMLPPolicy/KLAfter 0.0134388 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000239196 +GaussianMLPPolicy/LossBefore 2.67327e-08 +GaussianMLPPolicy/dLoss -0.000239169 +Iteration 170 +MetaTest/Average/AverageDiscountedReturn -36.4234 +MetaTest/Average/AverageReturn -36.4234 +MetaTest/Average/Iteration 170 +MetaTest/Average/MaxReturn -18.7264 +MetaTest/Average/MinReturn -67.2359 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.231 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.4234 +MetaTest/__unnamed_task__/AverageReturn -36.4234 +MetaTest/__unnamed_task__/Iteration 170 +MetaTest/__unnamed_task__/MaxReturn -18.7264 +MetaTest/__unnamed_task__/MinReturn -67.2359 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.231 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.472e+06 +__unnamed_task__/AverageDiscountedReturn -16.2696 +__unnamed_task__/AverageReturn -33.725 +__unnamed_task__/Iteration 170 +__unnamed_task__/MaxReturn 8.78846 +__unnamed_task__/MinReturn -60.6232 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2126 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 12:38:23 | [maml_trainer] epoch #171 | Sampling for adapation and meta-testing... +2025-04-03 12:39:54 | [maml_trainer] epoch #171 | Finished meta-testing... +2025-04-03 12:39:54 | [maml_trainer] epoch #171 | Saving snapshot... +2025-04-03 12:40:18 | [maml_trainer] epoch #171 | Saved +2025-04-03 12:40:18 | [maml_trainer] epoch #171 | Time 82040.05 s +2025-04-03 12:40:18 | [maml_trainer] epoch #171 | EpochTime 483.78 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7615 +Average/AverageReturn -33.3894 +Average/Iteration 171 +Average/MaxReturn 29.0443 +Average/MinReturn -67.675 +Average/NumEpisodes 80 +Average/StdReturn 12.2029 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93922 +GaussianMLPPolicy/KLAfter 0.0137917 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.00127e-05 +GaussianMLPPolicy/LossBefore -2.68221e-09 +GaussianMLPPolicy/dLoss -4.00154e-05 +Iteration 171 +MetaTest/Average/AverageDiscountedReturn -42.6443 +MetaTest/Average/AverageReturn -42.6443 +MetaTest/Average/Iteration 171 +MetaTest/Average/MaxReturn -16.7762 +MetaTest/Average/MinReturn -67.4644 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.7904 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.6443 +MetaTest/__unnamed_task__/AverageReturn -42.6443 +MetaTest/__unnamed_task__/Iteration 171 +MetaTest/__unnamed_task__/MaxReturn -16.7762 +MetaTest/__unnamed_task__/MinReturn -67.4644 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.7904 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.504e+06 +__unnamed_task__/AverageDiscountedReturn -15.7615 +__unnamed_task__/AverageReturn -33.3894 +__unnamed_task__/Iteration 171 +__unnamed_task__/MaxReturn 29.0443 +__unnamed_task__/MinReturn -67.675 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.2029 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 12:46:25 | [maml_trainer] epoch #172 | Sampling for adapation and meta-testing... +2025-04-03 12:47:57 | [maml_trainer] epoch #172 | Finished meta-testing... +2025-04-03 12:47:57 | [maml_trainer] epoch #172 | Saving snapshot... +2025-04-03 12:48:20 | [maml_trainer] epoch #172 | Saved +2025-04-03 12:48:20 | [maml_trainer] epoch #172 | Time 82522.09 s +2025-04-03 12:48:20 | [maml_trainer] epoch #172 | EpochTime 482.03 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.4595 +Average/AverageReturn -38.1847 +Average/Iteration 172 +Average/MaxReturn -4.66261 +Average/MinReturn -80.6419 +Average/NumEpisodes 80 +Average/StdReturn 13.2492 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93781 +GaussianMLPPolicy/KLAfter 0.00927553 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000111091 +GaussianMLPPolicy/LossBefore 6.19888e-09 +GaussianMLPPolicy/dLoss 0.000111097 +Iteration 172 +MetaTest/Average/AverageDiscountedReturn -40.2493 +MetaTest/Average/AverageReturn -40.2493 +MetaTest/Average/Iteration 172 +MetaTest/Average/MaxReturn -8.26547 +MetaTest/Average/MinReturn -69.5887 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.5033 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.2493 +MetaTest/__unnamed_task__/AverageReturn -40.2493 +MetaTest/__unnamed_task__/Iteration 172 +MetaTest/__unnamed_task__/MaxReturn -8.26547 +MetaTest/__unnamed_task__/MinReturn -69.5887 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.5033 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.536e+06 +__unnamed_task__/AverageDiscountedReturn -17.4595 +__unnamed_task__/AverageReturn -38.1847 +__unnamed_task__/Iteration 172 +__unnamed_task__/MaxReturn -4.66261 +__unnamed_task__/MinReturn -80.6419 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2492 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 12:54:28 | [maml_trainer] epoch #173 | Sampling for adapation and meta-testing... +2025-04-03 12:56:01 | [maml_trainer] epoch #173 | Finished meta-testing... +2025-04-03 12:56:01 | [maml_trainer] epoch #173 | Saving snapshot... +2025-04-03 12:56:26 | [maml_trainer] epoch #173 | Saved +2025-04-03 12:56:26 | [maml_trainer] epoch #173 | Time 83008.41 s +2025-04-03 12:56:26 | [maml_trainer] epoch #173 | EpochTime 486.32 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.2216 +Average/AverageReturn -43.8303 +Average/Iteration 173 +Average/MaxReturn -21.9266 +Average/MinReturn -72.7534 +Average/NumEpisodes 80 +Average/StdReturn 13.7316 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93558 +GaussianMLPPolicy/KLAfter 0.010392 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000150405 +GaussianMLPPolicy/LossBefore -2.68221e-09 +GaussianMLPPolicy/dLoss -0.000150408 +Iteration 173 +MetaTest/Average/AverageDiscountedReturn -42.0648 +MetaTest/Average/AverageReturn -42.0648 +MetaTest/Average/Iteration 173 +MetaTest/Average/MaxReturn -21.3742 +MetaTest/Average/MinReturn -72.6816 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.016 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -42.0648 +MetaTest/__unnamed_task__/AverageReturn -42.0648 +MetaTest/__unnamed_task__/Iteration 173 +MetaTest/__unnamed_task__/MaxReturn -21.3742 +MetaTest/__unnamed_task__/MinReturn -72.6816 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.016 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.568e+06 +__unnamed_task__/AverageDiscountedReturn -19.2216 +__unnamed_task__/AverageReturn -43.8303 +__unnamed_task__/Iteration 173 +__unnamed_task__/MaxReturn -21.9266 +__unnamed_task__/MinReturn -72.7534 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7316 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:02:38 | [maml_trainer] epoch #174 | Sampling for adapation and meta-testing... +2025-04-03 13:04:08 | [maml_trainer] epoch #174 | Finished meta-testing... +2025-04-03 13:04:08 | [maml_trainer] epoch #174 | Saving snapshot... +2025-04-03 13:04:32 | [maml_trainer] epoch #174 | Saved +2025-04-03 13:04:32 | [maml_trainer] epoch #174 | Time 83494.74 s +2025-04-03 13:04:32 | [maml_trainer] epoch #174 | EpochTime 486.32 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.9168 +Average/AverageReturn -43.5529 +Average/Iteration 174 +Average/MaxReturn -18.4189 +Average/MinReturn -71.5763 +Average/NumEpisodes 80 +Average/StdReturn 13.0359 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9334 +GaussianMLPPolicy/KLAfter 0.00686139 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.81222e-05 +GaussianMLPPolicy/LossBefore -6.22869e-09 +GaussianMLPPolicy/dLoss -7.81284e-05 +Iteration 174 +MetaTest/Average/AverageDiscountedReturn -44.5722 +MetaTest/Average/AverageReturn -44.5722 +MetaTest/Average/Iteration 174 +MetaTest/Average/MaxReturn -24.6291 +MetaTest/Average/MinReturn -75.5275 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.6373 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -44.5722 +MetaTest/__unnamed_task__/AverageReturn -44.5722 +MetaTest/__unnamed_task__/Iteration 174 +MetaTest/__unnamed_task__/MaxReturn -24.6291 +MetaTest/__unnamed_task__/MinReturn -75.5275 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.6373 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.6e+06 +__unnamed_task__/AverageDiscountedReturn -18.9168 +__unnamed_task__/AverageReturn -43.5529 +__unnamed_task__/Iteration 174 +__unnamed_task__/MaxReturn -18.4189 +__unnamed_task__/MinReturn -71.5763 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.0359 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:10:41 | [maml_trainer] epoch #175 | Sampling for adapation and meta-testing... +2025-04-03 13:12:12 | [maml_trainer] epoch #175 | Finished meta-testing... +2025-04-03 13:12:12 | [maml_trainer] epoch #175 | Saving snapshot... +2025-04-03 13:12:34 | [maml_trainer] epoch #175 | Saved +2025-04-03 13:12:34 | [maml_trainer] epoch #175 | Time 83976.81 s +2025-04-03 13:12:34 | [maml_trainer] epoch #175 | EpochTime 482.07 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.8875 +Average/AverageReturn -43.4891 +Average/Iteration 175 +Average/MaxReturn -20.2668 +Average/MinReturn -79.5002 +Average/NumEpisodes 80 +Average/StdReturn 12.1537 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.93169 +GaussianMLPPolicy/KLAfter 0.00523219 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.45678e-05 +GaussianMLPPolicy/LossBefore 7.36117e-09 +GaussianMLPPolicy/dLoss 7.45752e-05 +Iteration 175 +MetaTest/Average/AverageDiscountedReturn -45.061 +MetaTest/Average/AverageReturn -45.061 +MetaTest/Average/Iteration 175 +MetaTest/Average/MaxReturn -24.8088 +MetaTest/Average/MinReturn -66.0434 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7787 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -45.061 +MetaTest/__unnamed_task__/AverageReturn -45.061 +MetaTest/__unnamed_task__/Iteration 175 +MetaTest/__unnamed_task__/MaxReturn -24.8088 +MetaTest/__unnamed_task__/MinReturn -66.0434 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7787 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.632e+06 +__unnamed_task__/AverageDiscountedReturn -18.8875 +__unnamed_task__/AverageReturn -43.4891 +__unnamed_task__/Iteration 175 +__unnamed_task__/MaxReturn -20.2668 +__unnamed_task__/MinReturn -79.5002 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1537 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:18:43 | [maml_trainer] epoch #176 | Sampling for adapation and meta-testing... +2025-04-03 13:20:14 | [maml_trainer] epoch #176 | Finished meta-testing... +2025-04-03 13:20:14 | [maml_trainer] epoch #176 | Saving snapshot... +2025-04-03 13:20:38 | [maml_trainer] epoch #176 | Saved +2025-04-03 13:20:38 | [maml_trainer] epoch #176 | Time 84460.39 s +2025-04-03 13:20:38 | [maml_trainer] epoch #176 | EpochTime 483.58 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.5795 +Average/AverageReturn -42.6323 +Average/Iteration 176 +Average/MaxReturn -14.1417 +Average/MinReturn -72.5405 +Average/NumEpisodes 80 +Average/StdReturn 12.8949 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92993 +GaussianMLPPolicy/KLAfter 0.00449011 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000183198 +GaussianMLPPolicy/LossBefore 1.77622e-08 +GaussianMLPPolicy/dLoss 0.000183215 +Iteration 176 +MetaTest/Average/AverageDiscountedReturn -41.3317 +MetaTest/Average/AverageReturn -41.3317 +MetaTest/Average/Iteration 176 +MetaTest/Average/MaxReturn -22.0629 +MetaTest/Average/MinReturn -68.9359 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.2873 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -41.3317 +MetaTest/__unnamed_task__/AverageReturn -41.3317 +MetaTest/__unnamed_task__/Iteration 176 +MetaTest/__unnamed_task__/MaxReturn -22.0629 +MetaTest/__unnamed_task__/MinReturn -68.9359 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.2873 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.664e+06 +__unnamed_task__/AverageDiscountedReturn -18.5795 +__unnamed_task__/AverageReturn -42.6323 +__unnamed_task__/Iteration 176 +__unnamed_task__/MaxReturn -14.1417 +__unnamed_task__/MinReturn -72.5405 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.8949 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:26:42 | [maml_trainer] epoch #177 | Sampling for adapation and meta-testing... +2025-04-03 13:28:13 | [maml_trainer] epoch #177 | Finished meta-testing... +2025-04-03 13:28:13 | [maml_trainer] epoch #177 | Saving snapshot... +2025-04-03 13:28:37 | [maml_trainer] epoch #177 | Saved +2025-04-03 13:28:37 | [maml_trainer] epoch #177 | Time 84939.63 s +2025-04-03 13:28:37 | [maml_trainer] epoch #177 | EpochTime 479.24 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -19.1962 +Average/AverageReturn -42.3527 +Average/Iteration 177 +Average/MaxReturn -15.3382 +Average/MinReturn -71.8713 +Average/NumEpisodes 80 +Average/StdReturn 12.4845 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92852 +GaussianMLPPolicy/KLAfter 0.0036776 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000111772 +GaussianMLPPolicy/LossBefore 1.71065e-08 +GaussianMLPPolicy/dLoss 0.000111789 +Iteration 177 +MetaTest/Average/AverageDiscountedReturn -44.1123 +MetaTest/Average/AverageReturn -44.1123 +MetaTest/Average/Iteration 177 +MetaTest/Average/MaxReturn -23.344 +MetaTest/Average/MinReturn -74.8514 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.8667 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -44.1123 +MetaTest/__unnamed_task__/AverageReturn -44.1123 +MetaTest/__unnamed_task__/Iteration 177 +MetaTest/__unnamed_task__/MaxReturn -23.344 +MetaTest/__unnamed_task__/MinReturn -74.8514 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.8667 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.696e+06 +__unnamed_task__/AverageDiscountedReturn -19.1962 +__unnamed_task__/AverageReturn -42.3527 +__unnamed_task__/Iteration 177 +__unnamed_task__/MaxReturn -15.3382 +__unnamed_task__/MinReturn -71.8713 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.4845 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:34:47 | [maml_trainer] epoch #178 | Sampling for adapation and meta-testing... +2025-04-03 13:36:18 | [maml_trainer] epoch #178 | Finished meta-testing... +2025-04-03 13:36:18 | [maml_trainer] epoch #178 | Saving snapshot... +2025-04-03 13:36:41 | [maml_trainer] epoch #178 | Saved +2025-04-03 13:36:41 | [maml_trainer] epoch #178 | Time 85423.57 s +2025-04-03 13:36:41 | [maml_trainer] epoch #178 | EpochTime 483.93 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.7559 +Average/AverageReturn -38.5987 +Average/Iteration 178 +Average/MaxReturn 0.75531 +Average/MinReturn -72.6703 +Average/NumEpisodes 80 +Average/StdReturn 13.3796 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92651 +GaussianMLPPolicy/KLAfter 0.00410242 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000157476 +GaussianMLPPolicy/LossBefore 7.83801e-09 +GaussianMLPPolicy/dLoss 0.000157484 +Iteration 178 +MetaTest/Average/AverageDiscountedReturn -35.9784 +MetaTest/Average/AverageReturn -35.9784 +MetaTest/Average/Iteration 178 +MetaTest/Average/MaxReturn -13.7658 +MetaTest/Average/MinReturn -62.6854 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.5551 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.9784 +MetaTest/__unnamed_task__/AverageReturn -35.9784 +MetaTest/__unnamed_task__/Iteration 178 +MetaTest/__unnamed_task__/MaxReturn -13.7658 +MetaTest/__unnamed_task__/MinReturn -62.6854 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.5551 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.728e+06 +__unnamed_task__/AverageDiscountedReturn -17.7559 +__unnamed_task__/AverageReturn -38.5987 +__unnamed_task__/Iteration 178 +__unnamed_task__/MaxReturn 0.75531 +__unnamed_task__/MinReturn -72.6703 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.3796 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:42:50 | [maml_trainer] epoch #179 | Sampling for adapation and meta-testing... +2025-04-03 13:44:21 | [maml_trainer] epoch #179 | Finished meta-testing... +2025-04-03 13:44:21 | [maml_trainer] epoch #179 | Saving snapshot... +2025-04-03 13:44:45 | [maml_trainer] epoch #179 | Saved +2025-04-03 13:44:45 | [maml_trainer] epoch #179 | Time 85907.14 s +2025-04-03 13:44:45 | [maml_trainer] epoch #179 | EpochTime 483.56 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8648 +Average/AverageReturn -35.0683 +Average/Iteration 179 +Average/MaxReturn -10.4077 +Average/MinReturn -67.7046 +Average/NumEpisodes 80 +Average/StdReturn 11.9093 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.92447 +GaussianMLPPolicy/KLAfter 0.00597345 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000310171 +GaussianMLPPolicy/LossBefore -6.02007e-09 +GaussianMLPPolicy/dLoss 0.000310165 +Iteration 179 +MetaTest/Average/AverageDiscountedReturn -36.6653 +MetaTest/Average/AverageReturn -36.6653 +MetaTest/Average/Iteration 179 +MetaTest/Average/MaxReturn -1.22485 +MetaTest/Average/MinReturn -64.927 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.5154 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.6653 +MetaTest/__unnamed_task__/AverageReturn -36.6653 +MetaTest/__unnamed_task__/Iteration 179 +MetaTest/__unnamed_task__/MaxReturn -1.22485 +MetaTest/__unnamed_task__/MinReturn -64.927 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.5154 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.76e+06 +__unnamed_task__/AverageDiscountedReturn -16.8648 +__unnamed_task__/AverageReturn -35.0683 +__unnamed_task__/Iteration 179 +__unnamed_task__/MaxReturn -10.4077 +__unnamed_task__/MinReturn -67.7046 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.9093 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:50:51 | [maml_trainer] epoch #180 | Sampling for adapation and meta-testing... +2025-04-03 13:52:23 | [maml_trainer] epoch #180 | Finished meta-testing... +2025-04-03 13:52:23 | [maml_trainer] epoch #180 | Saving snapshot... +2025-04-03 13:52:45 | [maml_trainer] epoch #180 | Saved +2025-04-03 13:52:45 | [maml_trainer] epoch #180 | Time 86387.66 s +2025-04-03 13:52:45 | [maml_trainer] epoch #180 | EpochTime 480.52 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.3611 +Average/AverageReturn -35.9193 +Average/Iteration 180 +Average/MaxReturn -16.6309 +Average/MinReturn -66.529 +Average/NumEpisodes 80 +Average/StdReturn 12.1033 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9214 +GaussianMLPPolicy/KLAfter 0.00413176 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000159483 +GaussianMLPPolicy/LossBefore -2.09212e-08 +GaussianMLPPolicy/dLoss -0.000159504 +Iteration 180 +MetaTest/Average/AverageDiscountedReturn -34.9075 +MetaTest/Average/AverageReturn -34.9075 +MetaTest/Average/Iteration 180 +MetaTest/Average/MaxReturn 5.48008 +MetaTest/Average/MinReturn -55.2213 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.3854 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.9075 +MetaTest/__unnamed_task__/AverageReturn -34.9075 +MetaTest/__unnamed_task__/Iteration 180 +MetaTest/__unnamed_task__/MaxReturn 5.48008 +MetaTest/__unnamed_task__/MinReturn -55.2213 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.3854 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.792e+06 +__unnamed_task__/AverageDiscountedReturn -17.3611 +__unnamed_task__/AverageReturn -35.9193 +__unnamed_task__/Iteration 180 +__unnamed_task__/MaxReturn -16.6309 +__unnamed_task__/MinReturn -66.529 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1033 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 13:58:50 | [maml_trainer] epoch #181 | Sampling for adapation and meta-testing... +2025-04-03 14:00:20 | [maml_trainer] epoch #181 | Finished meta-testing... +2025-04-03 14:00:20 | [maml_trainer] epoch #181 | Saving snapshot... +2025-04-03 14:00:45 | [maml_trainer] epoch #181 | Saved +2025-04-03 14:00:45 | [maml_trainer] epoch #181 | Time 86867.41 s +2025-04-03 14:00:45 | [maml_trainer] epoch #181 | EpochTime 479.74 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2648 +Average/AverageReturn -33.5734 +Average/Iteration 181 +Average/MaxReturn 5.18389 +Average/MinReturn -65.9218 +Average/NumEpisodes 80 +Average/StdReturn 10.9182 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91978 +GaussianMLPPolicy/KLAfter 0.00764583 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000236951 +GaussianMLPPolicy/LossBefore -1.14441e-08 +GaussianMLPPolicy/dLoss 0.00023694 +Iteration 181 +MetaTest/Average/AverageDiscountedReturn -35.3499 +MetaTest/Average/AverageReturn -35.3499 +MetaTest/Average/Iteration 181 +MetaTest/Average/MaxReturn -1.85999 +MetaTest/Average/MinReturn -67.5439 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.6811 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.3499 +MetaTest/__unnamed_task__/AverageReturn -35.3499 +MetaTest/__unnamed_task__/Iteration 181 +MetaTest/__unnamed_task__/MaxReturn -1.85999 +MetaTest/__unnamed_task__/MinReturn -67.5439 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.6811 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.824e+06 +__unnamed_task__/AverageDiscountedReturn -16.2648 +__unnamed_task__/AverageReturn -33.5734 +__unnamed_task__/Iteration 181 +__unnamed_task__/MaxReturn 5.18389 +__unnamed_task__/MinReturn -65.9218 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.9182 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 14:06:49 | [maml_trainer] epoch #182 | Sampling for adapation and meta-testing... +2025-04-03 14:08:22 | [maml_trainer] epoch #182 | Finished meta-testing... +2025-04-03 14:08:22 | [maml_trainer] epoch #182 | Saving snapshot... +2025-04-03 14:08:46 | [maml_trainer] epoch #182 | Saved +2025-04-03 14:08:46 | [maml_trainer] epoch #182 | Time 87348.25 s +2025-04-03 14:08:46 | [maml_trainer] epoch #182 | EpochTime 480.84 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.6921 +Average/AverageReturn -35.0974 +Average/Iteration 182 +Average/MaxReturn -8.62579 +Average/MinReturn -58.9227 +Average/NumEpisodes 80 +Average/StdReturn 9.15752 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91939 +GaussianMLPPolicy/KLAfter 0.00448337 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.5956e-05 +GaussianMLPPolicy/LossBefore -1.28746e-08 +GaussianMLPPolicy/dLoss 7.59432e-05 +Iteration 182 +MetaTest/Average/AverageDiscountedReturn -38.6345 +MetaTest/Average/AverageReturn -38.6345 +MetaTest/Average/Iteration 182 +MetaTest/Average/MaxReturn -27.5427 +MetaTest/Average/MinReturn -57.9949 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.04562 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.6345 +MetaTest/__unnamed_task__/AverageReturn -38.6345 +MetaTest/__unnamed_task__/Iteration 182 +MetaTest/__unnamed_task__/MaxReturn -27.5427 +MetaTest/__unnamed_task__/MinReturn -57.9949 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.04562 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.856e+06 +__unnamed_task__/AverageDiscountedReturn -16.6921 +__unnamed_task__/AverageReturn -35.0974 +__unnamed_task__/Iteration 182 +__unnamed_task__/MaxReturn -8.62579 +__unnamed_task__/MinReturn -58.9227 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.15752 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 14:14:56 | [maml_trainer] epoch #183 | Sampling for adapation and meta-testing... +2025-04-03 14:16:28 | [maml_trainer] epoch #183 | Finished meta-testing... +2025-04-03 14:16:28 | [maml_trainer] epoch #183 | Saving snapshot... +2025-04-03 14:16:50 | [maml_trainer] epoch #183 | Saved +2025-04-03 14:16:50 | [maml_trainer] epoch #183 | Time 87832.59 s +2025-04-03 14:16:50 | [maml_trainer] epoch #183 | EpochTime 484.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.525 +Average/AverageReturn -32.6226 +Average/Iteration 183 +Average/MaxReturn -3.50168 +Average/MinReturn -59.8035 +Average/NumEpisodes 80 +Average/StdReturn 11.0025 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9199 +GaussianMLPPolicy/KLAfter 0.00356724 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000121935 +GaussianMLPPolicy/LossBefore -5.00679e-09 +GaussianMLPPolicy/dLoss -0.00012194 +Iteration 183 +MetaTest/Average/AverageDiscountedReturn -35.2413 +MetaTest/Average/AverageReturn -35.2413 +MetaTest/Average/Iteration 183 +MetaTest/Average/MaxReturn -20.6906 +MetaTest/Average/MinReturn -49.9661 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.0296 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.2413 +MetaTest/__unnamed_task__/AverageReturn -35.2413 +MetaTest/__unnamed_task__/Iteration 183 +MetaTest/__unnamed_task__/MaxReturn -20.6906 +MetaTest/__unnamed_task__/MinReturn -49.9661 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.0296 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.888e+06 +__unnamed_task__/AverageDiscountedReturn -15.525 +__unnamed_task__/AverageReturn -32.6226 +__unnamed_task__/Iteration 183 +__unnamed_task__/MaxReturn -3.50168 +__unnamed_task__/MinReturn -59.8035 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0025 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 14:22:55 | [maml_trainer] epoch #184 | Sampling for adapation and meta-testing... +2025-04-03 14:24:26 | [maml_trainer] epoch #184 | Finished meta-testing... +2025-04-03 14:24:26 | [maml_trainer] epoch #184 | Saving snapshot... +2025-04-03 14:24:49 | [maml_trainer] epoch #184 | Saved +2025-04-03 14:24:49 | [maml_trainer] epoch #184 | Time 88311.69 s +2025-04-03 14:24:49 | [maml_trainer] epoch #184 | EpochTime 479.09 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2285 +Average/AverageReturn -33.9458 +Average/Iteration 184 +Average/MaxReturn 11.82 +Average/MinReturn -52.7553 +Average/NumEpisodes 80 +Average/StdReturn 10.9905 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91959 +GaussianMLPPolicy/KLAfter 0.00171216 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.55854e-05 +GaussianMLPPolicy/LossBefore 9.05991e-09 +GaussianMLPPolicy/dLoss 2.55945e-05 +Iteration 184 +MetaTest/Average/AverageDiscountedReturn -38.3054 +MetaTest/Average/AverageReturn -38.3054 +MetaTest/Average/Iteration 184 +MetaTest/Average/MaxReturn -23.2586 +MetaTest/Average/MinReturn -58.2121 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.92245 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.3054 +MetaTest/__unnamed_task__/AverageReturn -38.3054 +MetaTest/__unnamed_task__/Iteration 184 +MetaTest/__unnamed_task__/MaxReturn -23.2586 +MetaTest/__unnamed_task__/MinReturn -58.2121 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.92245 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.92e+06 +__unnamed_task__/AverageDiscountedReturn -16.2285 +__unnamed_task__/AverageReturn -33.9458 +__unnamed_task__/Iteration 184 +__unnamed_task__/MaxReturn 11.82 +__unnamed_task__/MinReturn -52.7553 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.9905 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 14:31:01 | [maml_trainer] epoch #185 | Sampling for adapation and meta-testing... +2025-04-03 14:32:33 | [maml_trainer] epoch #185 | Finished meta-testing... +2025-04-03 14:32:33 | [maml_trainer] epoch #185 | Saving snapshot... +2025-04-03 14:32:57 | [maml_trainer] epoch #185 | Saved +2025-04-03 14:32:57 | [maml_trainer] epoch #185 | Time 88799.72 s +2025-04-03 14:32:57 | [maml_trainer] epoch #185 | EpochTime 488.03 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4322 +Average/AverageReturn -34.7129 +Average/Iteration 185 +Average/MaxReturn -4.96978 +Average/MinReturn -64.0285 +Average/NumEpisodes 80 +Average/StdReturn 10.7791 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91756 +GaussianMLPPolicy/KLAfter 0.00125667 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.51397e-06 +GaussianMLPPolicy/LossBefore 1.0252e-08 +GaussianMLPPolicy/dLoss -5.50371e-06 +Iteration 185 +MetaTest/Average/AverageDiscountedReturn -33.714 +MetaTest/Average/AverageReturn -33.714 +MetaTest/Average/Iteration 185 +MetaTest/Average/MaxReturn -20.854 +MetaTest/Average/MinReturn -54.6127 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.2649 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.714 +MetaTest/__unnamed_task__/AverageReturn -33.714 +MetaTest/__unnamed_task__/Iteration 185 +MetaTest/__unnamed_task__/MaxReturn -20.854 +MetaTest/__unnamed_task__/MinReturn -54.6127 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.2649 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.952e+06 +__unnamed_task__/AverageDiscountedReturn -16.4322 +__unnamed_task__/AverageReturn -34.7129 +__unnamed_task__/Iteration 185 +__unnamed_task__/MaxReturn -4.96978 +__unnamed_task__/MinReturn -64.0285 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.7791 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 14:39:03 | [maml_trainer] epoch #186 | Sampling for adapation and meta-testing... +2025-04-03 14:40:35 | [maml_trainer] epoch #186 | Finished meta-testing... +2025-04-03 14:40:35 | [maml_trainer] epoch #186 | Saving snapshot... +2025-04-03 14:40:58 | [maml_trainer] epoch #186 | Saved +2025-04-03 14:40:58 | [maml_trainer] epoch #186 | Time 89280.35 s +2025-04-03 14:40:58 | [maml_trainer] epoch #186 | EpochTime 480.62 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7318 +Average/AverageReturn -31.6875 +Average/Iteration 186 +Average/MaxReturn 144.406 +Average/MinReturn -60.0546 +Average/NumEpisodes 80 +Average/StdReturn 26.2468 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91519 +GaussianMLPPolicy/KLAfter 0.000400772 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.0068e-05 +GaussianMLPPolicy/LossBefore -8.9705e-09 +GaussianMLPPolicy/dLoss 3.00591e-05 +Iteration 186 +MetaTest/Average/AverageDiscountedReturn -32.9866 +MetaTest/Average/AverageReturn -32.9866 +MetaTest/Average/Iteration 186 +MetaTest/Average/MaxReturn -4.49864 +MetaTest/Average/MinReturn -51.9469 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7499 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.9866 +MetaTest/__unnamed_task__/AverageReturn -32.9866 +MetaTest/__unnamed_task__/Iteration 186 +MetaTest/__unnamed_task__/MaxReturn -4.49864 +MetaTest/__unnamed_task__/MinReturn -51.9469 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7499 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.984e+06 +__unnamed_task__/AverageDiscountedReturn -15.7318 +__unnamed_task__/AverageReturn -31.6875 +__unnamed_task__/Iteration 186 +__unnamed_task__/MaxReturn 144.406 +__unnamed_task__/MinReturn -60.0546 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.2468 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 14:47:04 | [maml_trainer] epoch #187 | Sampling for adapation and meta-testing... +2025-04-03 14:48:36 | [maml_trainer] epoch #187 | Finished meta-testing... +2025-04-03 14:48:36 | [maml_trainer] epoch #187 | Saving snapshot... +2025-04-03 14:49:00 | [maml_trainer] epoch #187 | Saved +2025-04-03 14:49:00 | [maml_trainer] epoch #187 | Time 89762.66 s +2025-04-03 14:49:00 | [maml_trainer] epoch #187 | EpochTime 482.31 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5048 +Average/AverageReturn -34.7029 +Average/Iteration 187 +Average/MaxReturn 12.6601 +Average/MinReturn -68.4937 +Average/NumEpisodes 80 +Average/StdReturn 10.0412 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91488 +GaussianMLPPolicy/KLAfter 0.001594 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000225681 +GaussianMLPPolicy/LossBefore -1.64509e-08 +GaussianMLPPolicy/dLoss 0.000225665 +Iteration 187 +MetaTest/Average/AverageDiscountedReturn -37.4347 +MetaTest/Average/AverageReturn -37.4347 +MetaTest/Average/Iteration 187 +MetaTest/Average/MaxReturn -24.5925 +MetaTest/Average/MinReturn -58.5368 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.1092 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.4347 +MetaTest/__unnamed_task__/AverageReturn -37.4347 +MetaTest/__unnamed_task__/Iteration 187 +MetaTest/__unnamed_task__/MaxReturn -24.5925 +MetaTest/__unnamed_task__/MinReturn -58.5368 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.1092 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.016e+06 +__unnamed_task__/AverageDiscountedReturn -16.5048 +__unnamed_task__/AverageReturn -34.7029 +__unnamed_task__/Iteration 187 +__unnamed_task__/MaxReturn 12.6601 +__unnamed_task__/MinReturn -68.4937 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.0412 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 14:55:09 | [maml_trainer] epoch #188 | Sampling for adapation and meta-testing... +2025-04-03 14:56:41 | [maml_trainer] epoch #188 | Finished meta-testing... +2025-04-03 14:56:41 | [maml_trainer] epoch #188 | Saving snapshot... +2025-04-03 14:57:05 | [maml_trainer] epoch #188 | Saved +2025-04-03 14:57:05 | [maml_trainer] epoch #188 | Time 90247.61 s +2025-04-03 14:57:05 | [maml_trainer] epoch #188 | EpochTime 484.95 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.6171 +Average/AverageReturn -32.4346 +Average/Iteration 188 +Average/MaxReturn 14.7812 +Average/MinReturn -54.0135 +Average/NumEpisodes 80 +Average/StdReturn 10.1836 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91382 +GaussianMLPPolicy/KLAfter 0.00420924 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000312834 +GaussianMLPPolicy/LossBefore 3.2872e-08 +GaussianMLPPolicy/dLoss 0.000312867 +Iteration 188 +MetaTest/Average/AverageDiscountedReturn -33.3288 +MetaTest/Average/AverageReturn -33.3288 +MetaTest/Average/Iteration 188 +MetaTest/Average/MaxReturn -9.27994 +MetaTest/Average/MinReturn -56.0384 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.90192 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.3288 +MetaTest/__unnamed_task__/AverageReturn -33.3288 +MetaTest/__unnamed_task__/Iteration 188 +MetaTest/__unnamed_task__/MaxReturn -9.27994 +MetaTest/__unnamed_task__/MinReturn -56.0384 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.90192 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.048e+06 +__unnamed_task__/AverageDiscountedReturn -15.6171 +__unnamed_task__/AverageReturn -32.4346 +__unnamed_task__/Iteration 188 +__unnamed_task__/MaxReturn 14.7812 +__unnamed_task__/MinReturn -54.0135 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1836 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:03:15 | [maml_trainer] epoch #189 | Sampling for adapation and meta-testing... +2025-04-03 15:04:47 | [maml_trainer] epoch #189 | Finished meta-testing... +2025-04-03 15:04:47 | [maml_trainer] epoch #189 | Saving snapshot... +2025-04-03 15:05:09 | [maml_trainer] epoch #189 | Saved +2025-04-03 15:05:09 | [maml_trainer] epoch #189 | Time 90731.94 s +2025-04-03 15:05:09 | [maml_trainer] epoch #189 | EpochTime 484.32 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.8901 +Average/AverageReturn -33.1221 +Average/Iteration 189 +Average/MaxReturn -14.2023 +Average/MinReturn -60.3378 +Average/NumEpisodes 80 +Average/StdReturn 7.88524 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.91282 +GaussianMLPPolicy/KLAfter 0.00534996 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.87441e-05 +GaussianMLPPolicy/LossBefore 9.59635e-09 +GaussianMLPPolicy/dLoss 7.87536e-05 +Iteration 189 +MetaTest/Average/AverageDiscountedReturn -27.378 +MetaTest/Average/AverageReturn -27.378 +MetaTest/Average/Iteration 189 +MetaTest/Average/MaxReturn 20.4373 +MetaTest/Average/MinReturn -47.2282 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.1321 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -27.378 +MetaTest/__unnamed_task__/AverageReturn -27.378 +MetaTest/__unnamed_task__/Iteration 189 +MetaTest/__unnamed_task__/MaxReturn 20.4373 +MetaTest/__unnamed_task__/MinReturn -47.2282 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.1321 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.08e+06 +__unnamed_task__/AverageDiscountedReturn -15.8901 +__unnamed_task__/AverageReturn -33.1221 +__unnamed_task__/Iteration 189 +__unnamed_task__/MaxReturn -14.2023 +__unnamed_task__/MinReturn -60.3378 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.88524 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:11:15 | [maml_trainer] epoch #190 | Sampling for adapation and meta-testing... +2025-04-03 15:12:47 | [maml_trainer] epoch #190 | Finished meta-testing... +2025-04-03 15:12:47 | [maml_trainer] epoch #190 | Saving snapshot... +2025-04-03 15:13:11 | [maml_trainer] epoch #190 | Saved +2025-04-03 15:13:11 | [maml_trainer] epoch #190 | Time 91213.39 s +2025-04-03 15:13:11 | [maml_trainer] epoch #190 | EpochTime 481.45 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2951 +Average/AverageReturn -31.6477 +Average/Iteration 190 +Average/MaxReturn 8.89111 +Average/MinReturn -61.7089 +Average/NumEpisodes 80 +Average/StdReturn 10.2914 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90968 +GaussianMLPPolicy/KLAfter 0.00647318 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.22117e-05 +GaussianMLPPolicy/LossBefore 1.10865e-08 +GaussianMLPPolicy/dLoss -6.22006e-05 +Iteration 190 +MetaTest/Average/AverageDiscountedReturn -33.7303 +MetaTest/Average/AverageReturn -33.7303 +MetaTest/Average/Iteration 190 +MetaTest/Average/MaxReturn -18.4448 +MetaTest/Average/MinReturn -43.8851 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.22163 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.7303 +MetaTest/__unnamed_task__/AverageReturn -33.7303 +MetaTest/__unnamed_task__/Iteration 190 +MetaTest/__unnamed_task__/MaxReturn -18.4448 +MetaTest/__unnamed_task__/MinReturn -43.8851 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.22163 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.112e+06 +__unnamed_task__/AverageDiscountedReturn -15.2951 +__unnamed_task__/AverageReturn -31.6477 +__unnamed_task__/Iteration 190 +__unnamed_task__/MaxReturn 8.89111 +__unnamed_task__/MinReturn -61.7089 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2914 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:19:21 | [maml_trainer] epoch #191 | Sampling for adapation and meta-testing... +2025-04-03 15:20:54 | [maml_trainer] epoch #191 | Finished meta-testing... +2025-04-03 15:20:54 | [maml_trainer] epoch #191 | Saving snapshot... +2025-04-03 15:21:18 | [maml_trainer] epoch #191 | Saved +2025-04-03 15:21:18 | [maml_trainer] epoch #191 | Time 91700.50 s +2025-04-03 15:21:18 | [maml_trainer] epoch #191 | EpochTime 487.11 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4831 +Average/AverageReturn -32.5666 +Average/Iteration 191 +Average/MaxReturn 20.5439 +Average/MinReturn -51.4177 +Average/NumEpisodes 80 +Average/StdReturn 11.6536 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90699 +GaussianMLPPolicy/KLAfter 0.00504903 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000191621 +GaussianMLPPolicy/LossBefore 4.73857e-09 +GaussianMLPPolicy/dLoss 0.000191626 +Iteration 191 +MetaTest/Average/AverageDiscountedReturn -33.3029 +MetaTest/Average/AverageReturn -33.3029 +MetaTest/Average/Iteration 191 +MetaTest/Average/MaxReturn -20.9485 +MetaTest/Average/MinReturn -57.2473 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.25004 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.3029 +MetaTest/__unnamed_task__/AverageReturn -33.3029 +MetaTest/__unnamed_task__/Iteration 191 +MetaTest/__unnamed_task__/MaxReturn -20.9485 +MetaTest/__unnamed_task__/MinReturn -57.2473 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.25004 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.144e+06 +__unnamed_task__/AverageDiscountedReturn -15.4831 +__unnamed_task__/AverageReturn -32.5666 +__unnamed_task__/Iteration 191 +__unnamed_task__/MaxReturn 20.5439 +__unnamed_task__/MinReturn -51.4177 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6536 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:27:29 | [maml_trainer] epoch #192 | Sampling for adapation and meta-testing... +2025-04-03 15:29:01 | [maml_trainer] epoch #192 | Finished meta-testing... +2025-04-03 15:29:01 | [maml_trainer] epoch #192 | Saving snapshot... +2025-04-03 15:29:25 | [maml_trainer] epoch #192 | Saved +2025-04-03 15:29:25 | [maml_trainer] epoch #192 | Time 92187.48 s +2025-04-03 15:29:25 | [maml_trainer] epoch #192 | EpochTime 486.97 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4559 +Average/AverageReturn -32.066 +Average/Iteration 192 +Average/MaxReturn 42.2026 +Average/MinReturn -53.3542 +Average/NumEpisodes 80 +Average/StdReturn 12.8271 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.9048 +GaussianMLPPolicy/KLAfter 0.00491402 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000234955 +GaussianMLPPolicy/LossBefore 1.52588e-08 +GaussianMLPPolicy/dLoss -0.00023494 +Iteration 192 +MetaTest/Average/AverageDiscountedReturn -34.9151 +MetaTest/Average/AverageReturn -34.9151 +MetaTest/Average/Iteration 192 +MetaTest/Average/MaxReturn -6.60547 +MetaTest/Average/MinReturn -54.2211 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.386 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.9151 +MetaTest/__unnamed_task__/AverageReturn -34.9151 +MetaTest/__unnamed_task__/Iteration 192 +MetaTest/__unnamed_task__/MaxReturn -6.60547 +MetaTest/__unnamed_task__/MinReturn -54.2211 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.386 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.176e+06 +__unnamed_task__/AverageDiscountedReturn -15.4559 +__unnamed_task__/AverageReturn -32.066 +__unnamed_task__/Iteration 192 +__unnamed_task__/MaxReturn 42.2026 +__unnamed_task__/MinReturn -53.3542 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.8271 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:35:32 | [maml_trainer] epoch #193 | Sampling for adapation and meta-testing... +2025-04-03 15:37:03 | [maml_trainer] epoch #193 | Finished meta-testing... +2025-04-03 15:37:03 | [maml_trainer] epoch #193 | Saving snapshot... +2025-04-03 15:37:27 | [maml_trainer] epoch #193 | Saved +2025-04-03 15:37:27 | [maml_trainer] epoch #193 | Time 92669.55 s +2025-04-03 15:37:27 | [maml_trainer] epoch #193 | EpochTime 482.07 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.5644 +Average/AverageReturn -33.8369 +Average/Iteration 193 +Average/MaxReturn -2.12092 +Average/MinReturn -51.5724 +Average/NumEpisodes 80 +Average/StdReturn 8.76878 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90293 +GaussianMLPPolicy/KLAfter 0.0039128 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.00014205 +GaussianMLPPolicy/LossBefore 9.71556e-09 +GaussianMLPPolicy/dLoss -0.00014204 +Iteration 193 +MetaTest/Average/AverageDiscountedReturn -35.7007 +MetaTest/Average/AverageReturn -35.7007 +MetaTest/Average/Iteration 193 +MetaTest/Average/MaxReturn -21.4273 +MetaTest/Average/MinReturn -57.8176 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.00377 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.7007 +MetaTest/__unnamed_task__/AverageReturn -35.7007 +MetaTest/__unnamed_task__/Iteration 193 +MetaTest/__unnamed_task__/MaxReturn -21.4273 +MetaTest/__unnamed_task__/MinReturn -57.8176 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.00377 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.208e+06 +__unnamed_task__/AverageDiscountedReturn -15.5644 +__unnamed_task__/AverageReturn -33.8369 +__unnamed_task__/Iteration 193 +__unnamed_task__/MaxReturn -2.12092 +__unnamed_task__/MinReturn -51.5724 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.76878 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:43:35 | [maml_trainer] epoch #194 | Sampling for adapation and meta-testing... +2025-04-03 15:45:06 | [maml_trainer] epoch #194 | Finished meta-testing... +2025-04-03 15:45:06 | [maml_trainer] epoch #194 | Saving snapshot... +2025-04-03 15:45:29 | [maml_trainer] epoch #194 | Saved +2025-04-03 15:45:29 | [maml_trainer] epoch #194 | Time 93151.03 s +2025-04-03 15:45:29 | [maml_trainer] epoch #194 | EpochTime 481.48 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9089 +Average/AverageReturn -36.0761 +Average/Iteration 194 +Average/MaxReturn -6.55522 +Average/MinReturn -54.5036 +Average/NumEpisodes 80 +Average/StdReturn 9.38681 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90148 +GaussianMLPPolicy/KLAfter 0.00249881 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.39766e-05 +GaussianMLPPolicy/LossBefore 8.64267e-09 +GaussianMLPPolicy/dLoss -3.39679e-05 +Iteration 194 +MetaTest/Average/AverageDiscountedReturn -34.1532 +MetaTest/Average/AverageReturn -34.1532 +MetaTest/Average/Iteration 194 +MetaTest/Average/MaxReturn -18.2892 +MetaTest/Average/MinReturn -55.4881 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.37526 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.1532 +MetaTest/__unnamed_task__/AverageReturn -34.1532 +MetaTest/__unnamed_task__/Iteration 194 +MetaTest/__unnamed_task__/MaxReturn -18.2892 +MetaTest/__unnamed_task__/MinReturn -55.4881 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.37526 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.24e+06 +__unnamed_task__/AverageDiscountedReturn -16.9089 +__unnamed_task__/AverageReturn -36.0761 +__unnamed_task__/Iteration 194 +__unnamed_task__/MaxReturn -6.55522 +__unnamed_task__/MinReturn -54.5036 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.38681 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:51:35 | [maml_trainer] epoch #195 | Sampling for adapation and meta-testing... +2025-04-03 15:53:05 | [maml_trainer] epoch #195 | Finished meta-testing... +2025-04-03 15:53:05 | [maml_trainer] epoch #195 | Saving snapshot... +2025-04-03 15:53:30 | [maml_trainer] epoch #195 | Saved +2025-04-03 15:53:30 | [maml_trainer] epoch #195 | Time 93632.11 s +2025-04-03 15:53:30 | [maml_trainer] epoch #195 | EpochTime 481.08 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3335 +Average/AverageReturn -35.3011 +Average/Iteration 195 +Average/MaxReturn -19.4369 +Average/MinReturn -58.4086 +Average/NumEpisodes 80 +Average/StdReturn 8.98074 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.90018 +GaussianMLPPolicy/KLAfter 0.0034708 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.73943e-05 +GaussianMLPPolicy/LossBefore -2.6226e-09 +GaussianMLPPolicy/dLoss -1.73969e-05 +Iteration 195 +MetaTest/Average/AverageDiscountedReturn -35.9392 +MetaTest/Average/AverageReturn -35.9392 +MetaTest/Average/Iteration 195 +MetaTest/Average/MaxReturn -17.958 +MetaTest/Average/MinReturn -60.8031 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.26962 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.9392 +MetaTest/__unnamed_task__/AverageReturn -35.9392 +MetaTest/__unnamed_task__/Iteration 195 +MetaTest/__unnamed_task__/MaxReturn -17.958 +MetaTest/__unnamed_task__/MinReturn -60.8031 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.26962 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.272e+06 +__unnamed_task__/AverageDiscountedReturn -16.3335 +__unnamed_task__/AverageReturn -35.3011 +__unnamed_task__/Iteration 195 +__unnamed_task__/MaxReturn -19.4369 +__unnamed_task__/MinReturn -58.4086 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.98074 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 15:59:39 | [maml_trainer] epoch #196 | Sampling for adapation and meta-testing... +2025-04-03 16:01:11 | [maml_trainer] epoch #196 | Finished meta-testing... +2025-04-03 16:01:11 | [maml_trainer] epoch #196 | Saving snapshot... +2025-04-03 16:01:35 | [maml_trainer] epoch #196 | Saved +2025-04-03 16:01:35 | [maml_trainer] epoch #196 | Time 94117.04 s +2025-04-03 16:01:35 | [maml_trainer] epoch #196 | EpochTime 484.93 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.9424 +Average/AverageReturn -34.1151 +Average/Iteration 196 +Average/MaxReturn 18.0582 +Average/MinReturn -52.1879 +Average/NumEpisodes 80 +Average/StdReturn 9.73235 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89772 +GaussianMLPPolicy/KLAfter 0.0037197 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.00017909 +GaussianMLPPolicy/LossBefore -2.59876e-08 +GaussianMLPPolicy/dLoss 0.000179064 +Iteration 196 +MetaTest/Average/AverageDiscountedReturn -32.7263 +MetaTest/Average/AverageReturn -32.7263 +MetaTest/Average/Iteration 196 +MetaTest/Average/MaxReturn -1.48005 +MetaTest/Average/MinReturn -60.5949 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.2601 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.7263 +MetaTest/__unnamed_task__/AverageReturn -32.7263 +MetaTest/__unnamed_task__/Iteration 196 +MetaTest/__unnamed_task__/MaxReturn -1.48005 +MetaTest/__unnamed_task__/MinReturn -60.5949 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.2601 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.304e+06 +__unnamed_task__/AverageDiscountedReturn -15.9424 +__unnamed_task__/AverageReturn -34.1151 +__unnamed_task__/Iteration 196 +__unnamed_task__/MaxReturn 18.0582 +__unnamed_task__/MinReturn -52.1879 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.73235 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 16:07:43 | [maml_trainer] epoch #197 | Sampling for adapation and meta-testing... +2025-04-03 16:09:17 | [maml_trainer] epoch #197 | Finished meta-testing... +2025-04-03 16:09:17 | [maml_trainer] epoch #197 | Saving snapshot... +2025-04-03 16:09:40 | [maml_trainer] epoch #197 | Saved +2025-04-03 16:09:40 | [maml_trainer] epoch #197 | Time 94602.30 s +2025-04-03 16:09:40 | [maml_trainer] epoch #197 | EpochTime 485.25 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3272 +Average/AverageReturn -34.1541 +Average/Iteration 197 +Average/MaxReturn 6.60798 +Average/MinReturn -57.1814 +Average/NumEpisodes 80 +Average/StdReturn 10.2441 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89631 +GaussianMLPPolicy/KLAfter 0.00362981 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.48787e-06 +GaussianMLPPolicy/LossBefore 1.65105e-08 +GaussianMLPPolicy/dLoss 4.50438e-06 +Iteration 197 +MetaTest/Average/AverageDiscountedReturn -32.9235 +MetaTest/Average/AverageReturn -32.9235 +MetaTest/Average/Iteration 197 +MetaTest/Average/MaxReturn -23.8559 +MetaTest/Average/MinReturn -47.0401 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.21318 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.9235 +MetaTest/__unnamed_task__/AverageReturn -32.9235 +MetaTest/__unnamed_task__/Iteration 197 +MetaTest/__unnamed_task__/MaxReturn -23.8559 +MetaTest/__unnamed_task__/MinReturn -47.0401 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.21318 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.336e+06 +__unnamed_task__/AverageDiscountedReturn -16.3272 +__unnamed_task__/AverageReturn -34.1541 +__unnamed_task__/Iteration 197 +__unnamed_task__/MaxReturn 6.60798 +__unnamed_task__/MinReturn -57.1814 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2441 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 16:15:47 | [maml_trainer] epoch #198 | Sampling for adapation and meta-testing... +2025-04-03 16:17:20 | [maml_trainer] epoch #198 | Finished meta-testing... +2025-04-03 16:17:20 | [maml_trainer] epoch #198 | Saving snapshot... +2025-04-03 16:17:43 | [maml_trainer] epoch #198 | Saved +2025-04-03 16:17:43 | [maml_trainer] epoch #198 | Time 95085.92 s +2025-04-03 16:17:43 | [maml_trainer] epoch #198 | EpochTime 483.62 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2721 +Average/AverageReturn -32.0077 +Average/Iteration 198 +Average/MaxReturn 25.3472 +Average/MinReturn -53.2422 +Average/NumEpisodes 80 +Average/StdReturn 11.265 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89497 +GaussianMLPPolicy/KLAfter 0.00568444 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000180218 +GaussianMLPPolicy/LossBefore -3.49581e-08 +GaussianMLPPolicy/dLoss 0.000180183 +Iteration 198 +MetaTest/Average/AverageDiscountedReturn -32.2639 +MetaTest/Average/AverageReturn -32.2639 +MetaTest/Average/Iteration 198 +MetaTest/Average/MaxReturn -19.1588 +MetaTest/Average/MinReturn -43.9894 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.76994 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.2639 +MetaTest/__unnamed_task__/AverageReturn -32.2639 +MetaTest/__unnamed_task__/Iteration 198 +MetaTest/__unnamed_task__/MaxReturn -19.1588 +MetaTest/__unnamed_task__/MinReturn -43.9894 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.76994 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.368e+06 +__unnamed_task__/AverageDiscountedReturn -15.2721 +__unnamed_task__/AverageReturn -32.0077 +__unnamed_task__/Iteration 198 +__unnamed_task__/MaxReturn 25.3472 +__unnamed_task__/MinReturn -53.2422 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.265 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 16:23:52 | [maml_trainer] epoch #199 | Sampling for adapation and meta-testing... +2025-04-03 16:25:23 | [maml_trainer] epoch #199 | Finished meta-testing... +2025-04-03 16:25:23 | [maml_trainer] epoch #199 | Saving snapshot... +2025-04-03 16:25:46 | [maml_trainer] epoch #199 | Saved +2025-04-03 16:25:46 | [maml_trainer] epoch #199 | Time 95568.68 s +2025-04-03 16:25:46 | [maml_trainer] epoch #199 | EpochTime 482.75 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.0489 +Average/AverageReturn -31.9949 +Average/Iteration 199 +Average/MaxReturn -4.24506 +Average/MinReturn -49.8063 +Average/NumEpisodes 80 +Average/StdReturn 10.1168 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89372 +GaussianMLPPolicy/KLAfter 0.00580496 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.72071e-05 +GaussianMLPPolicy/LossBefore -1.20401e-08 +GaussianMLPPolicy/dLoss -6.72191e-05 +Iteration 199 +MetaTest/Average/AverageDiscountedReturn -31.9268 +MetaTest/Average/AverageReturn -31.9268 +MetaTest/Average/Iteration 199 +MetaTest/Average/MaxReturn -7.04093 +MetaTest/Average/MinReturn -48.4507 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.8897 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.9268 +MetaTest/__unnamed_task__/AverageReturn -31.9268 +MetaTest/__unnamed_task__/Iteration 199 +MetaTest/__unnamed_task__/MaxReturn -7.04093 +MetaTest/__unnamed_task__/MinReturn -48.4507 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.8897 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.4e+06 +__unnamed_task__/AverageDiscountedReturn -15.0489 +__unnamed_task__/AverageReturn -31.9949 +__unnamed_task__/Iteration 199 +__unnamed_task__/MaxReturn -4.24506 +__unnamed_task__/MinReturn -49.8063 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1168 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 16:31:58 | [maml_trainer] epoch #200 | Sampling for adapation and meta-testing... +2025-04-03 16:33:31 | [maml_trainer] epoch #200 | Finished meta-testing... +2025-04-03 16:33:31 | [maml_trainer] epoch #200 | Saving snapshot... +2025-04-03 16:33:57 | [maml_trainer] epoch #200 | Saved +2025-04-03 16:33:57 | [maml_trainer] epoch #200 | Time 96059.95 s +2025-04-03 16:33:57 | [maml_trainer] epoch #200 | EpochTime 491.27 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.6762 +Average/AverageReturn -33.041 +Average/Iteration 200 +Average/MaxReturn 18.2638 +Average/MinReturn -56.9375 +Average/NumEpisodes 80 +Average/StdReturn 11.4193 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.89108 +GaussianMLPPolicy/KLAfter 0.00566682 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000228491 +GaussianMLPPolicy/LossBefore 2.36034e-08 +GaussianMLPPolicy/dLoss -0.000228467 +Iteration 200 +MetaTest/Average/AverageDiscountedReturn -36.6509 +MetaTest/Average/AverageReturn -36.6509 +MetaTest/Average/Iteration 200 +MetaTest/Average/MaxReturn -27.9346 +MetaTest/Average/MinReturn -49.4351 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.25205 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.6509 +MetaTest/__unnamed_task__/AverageReturn -36.6509 +MetaTest/__unnamed_task__/Iteration 200 +MetaTest/__unnamed_task__/MaxReturn -27.9346 +MetaTest/__unnamed_task__/MinReturn -49.4351 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.25205 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.432e+06 +__unnamed_task__/AverageDiscountedReturn -15.6762 +__unnamed_task__/AverageReturn -33.041 +__unnamed_task__/Iteration 200 +__unnamed_task__/MaxReturn 18.2638 +__unnamed_task__/MinReturn -56.9375 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4193 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 16:40:16 | [maml_trainer] epoch #201 | Sampling for adapation and meta-testing... +2025-04-03 16:41:49 | [maml_trainer] epoch #201 | Finished meta-testing... +2025-04-03 16:41:49 | [maml_trainer] epoch #201 | Saving snapshot... +2025-04-03 16:42:13 | [maml_trainer] epoch #201 | Saved +2025-04-03 16:42:13 | [maml_trainer] epoch #201 | Time 96555.30 s +2025-04-03 16:42:13 | [maml_trainer] epoch #201 | EpochTime 495.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.703 +Average/AverageReturn -33.9548 +Average/Iteration 201 +Average/MaxReturn 10.5574 +Average/MinReturn -62.9949 +Average/NumEpisodes 80 +Average/StdReturn 9.9432 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.888 +GaussianMLPPolicy/KLAfter 0.00406525 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.00020423 +GaussianMLPPolicy/LossBefore 3.93391e-09 +GaussianMLPPolicy/dLoss -0.000204226 +Iteration 201 +MetaTest/Average/AverageDiscountedReturn -34.4525 +MetaTest/Average/AverageReturn -34.4525 +MetaTest/Average/Iteration 201 +MetaTest/Average/MaxReturn -18.4578 +MetaTest/Average/MinReturn -46.5014 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.87809 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.4525 +MetaTest/__unnamed_task__/AverageReturn -34.4525 +MetaTest/__unnamed_task__/Iteration 201 +MetaTest/__unnamed_task__/MaxReturn -18.4578 +MetaTest/__unnamed_task__/MinReturn -46.5014 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.87809 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.464e+06 +__unnamed_task__/AverageDiscountedReturn -15.703 +__unnamed_task__/AverageReturn -33.9548 +__unnamed_task__/Iteration 201 +__unnamed_task__/MaxReturn 10.5574 +__unnamed_task__/MinReturn -62.9949 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.9432 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 16:48:25 | [maml_trainer] epoch #202 | Sampling for adapation and meta-testing... +2025-04-03 16:50:00 | [maml_trainer] epoch #202 | Finished meta-testing... +2025-04-03 16:50:00 | [maml_trainer] epoch #202 | Saving snapshot... +2025-04-03 16:50:24 | [maml_trainer] epoch #202 | Saved +2025-04-03 16:50:24 | [maml_trainer] epoch #202 | Time 97046.59 s +2025-04-03 16:50:24 | [maml_trainer] epoch #202 | EpochTime 491.29 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7961 +Average/AverageReturn -36.8455 +Average/Iteration 202 +Average/MaxReturn 8.16796 +Average/MinReturn -56.1606 +Average/NumEpisodes 80 +Average/StdReturn 9.41137 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.88561 +GaussianMLPPolicy/KLAfter 0.000766305 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.52123e-05 +GaussianMLPPolicy/LossBefore -2.5928e-08 +GaussianMLPPolicy/dLoss -8.52382e-05 +Iteration 202 +MetaTest/Average/AverageDiscountedReturn -40.6363 +MetaTest/Average/AverageReturn -40.6363 +MetaTest/Average/Iteration 202 +MetaTest/Average/MaxReturn -26.0081 +MetaTest/Average/MinReturn -56.4306 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.65301 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -40.6363 +MetaTest/__unnamed_task__/AverageReturn -40.6363 +MetaTest/__unnamed_task__/Iteration 202 +MetaTest/__unnamed_task__/MaxReturn -26.0081 +MetaTest/__unnamed_task__/MinReturn -56.4306 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.65301 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.496e+06 +__unnamed_task__/AverageDiscountedReturn -16.7961 +__unnamed_task__/AverageReturn -36.8455 +__unnamed_task__/Iteration 202 +__unnamed_task__/MaxReturn 8.16796 +__unnamed_task__/MinReturn -56.1606 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.41137 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 16:56:35 | [maml_trainer] epoch #203 | Sampling for adapation and meta-testing... +2025-04-03 16:58:07 | [maml_trainer] epoch #203 | Finished meta-testing... +2025-04-03 16:58:07 | [maml_trainer] epoch #203 | Saving snapshot... +2025-04-03 16:58:30 | [maml_trainer] epoch #203 | Saved +2025-04-03 16:58:30 | [maml_trainer] epoch #203 | Time 97532.93 s +2025-04-03 16:58:30 | [maml_trainer] epoch #203 | EpochTime 486.33 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0989 +Average/AverageReturn -35.2196 +Average/Iteration 203 +Average/MaxReturn -19.3645 +Average/MinReturn -61.2164 +Average/NumEpisodes 80 +Average/StdReturn 8.82715 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.88408 +GaussianMLPPolicy/KLAfter 0.00259937 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000130001 +GaussianMLPPolicy/LossBefore -2.78354e-08 +GaussianMLPPolicy/dLoss 0.000129973 +Iteration 203 +MetaTest/Average/AverageDiscountedReturn -32.2582 +MetaTest/Average/AverageReturn -32.2582 +MetaTest/Average/Iteration 203 +MetaTest/Average/MaxReturn -16.5725 +MetaTest/Average/MinReturn -50.5937 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.10004 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.2582 +MetaTest/__unnamed_task__/AverageReturn -32.2582 +MetaTest/__unnamed_task__/Iteration 203 +MetaTest/__unnamed_task__/MaxReturn -16.5725 +MetaTest/__unnamed_task__/MinReturn -50.5937 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.10004 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.528e+06 +__unnamed_task__/AverageDiscountedReturn -16.0989 +__unnamed_task__/AverageReturn -35.2196 +__unnamed_task__/Iteration 203 +__unnamed_task__/MaxReturn -19.3645 +__unnamed_task__/MinReturn -61.2164 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.82715 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 17:04:45 | [maml_trainer] epoch #204 | Sampling for adapation and meta-testing... +2025-04-03 17:06:17 | [maml_trainer] epoch #204 | Finished meta-testing... +2025-04-03 17:06:17 | [maml_trainer] epoch #204 | Saving snapshot... +2025-04-03 17:06:41 | [maml_trainer] epoch #204 | Saved +2025-04-03 17:06:41 | [maml_trainer] epoch #204 | Time 98023.92 s +2025-04-03 17:06:41 | [maml_trainer] epoch #204 | EpochTime 490.99 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3709 +Average/AverageReturn -35.2765 +Average/Iteration 204 +Average/MaxReturn 22.4725 +Average/MinReturn -69.9091 +Average/NumEpisodes 80 +Average/StdReturn 12.1422 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8817 +GaussianMLPPolicy/KLAfter 0.00410912 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000145511 +GaussianMLPPolicy/LossBefore -3.30806e-09 +GaussianMLPPolicy/dLoss 0.000145508 +Iteration 204 +MetaTest/Average/AverageDiscountedReturn -34.4721 +MetaTest/Average/AverageReturn -34.4721 +MetaTest/Average/Iteration 204 +MetaTest/Average/MaxReturn -21.9169 +MetaTest/Average/MinReturn -53.1341 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.92244 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.4721 +MetaTest/__unnamed_task__/AverageReturn -34.4721 +MetaTest/__unnamed_task__/Iteration 204 +MetaTest/__unnamed_task__/MaxReturn -21.9169 +MetaTest/__unnamed_task__/MinReturn -53.1341 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.92244 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.56e+06 +__unnamed_task__/AverageDiscountedReturn -16.3709 +__unnamed_task__/AverageReturn -35.2765 +__unnamed_task__/Iteration 204 +__unnamed_task__/MaxReturn 22.4725 +__unnamed_task__/MinReturn -69.9091 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1422 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 17:12:52 | [maml_trainer] epoch #205 | Sampling for adapation and meta-testing... +2025-04-03 17:14:25 | [maml_trainer] epoch #205 | Finished meta-testing... +2025-04-03 17:14:25 | [maml_trainer] epoch #205 | Saving snapshot... +2025-04-03 17:14:49 | [maml_trainer] epoch #205 | Saved +2025-04-03 17:14:49 | [maml_trainer] epoch #205 | Time 98511.90 s +2025-04-03 17:14:49 | [maml_trainer] epoch #205 | EpochTime 487.98 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.7727 +Average/AverageReturn -31.0924 +Average/Iteration 205 +Average/MaxReturn 0.444659 +Average/MinReturn -53.4238 +Average/NumEpisodes 80 +Average/StdReturn 9.496 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87963 +GaussianMLPPolicy/KLAfter 0.00490172 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000220844 +GaussianMLPPolicy/LossBefore -2.25306e-08 +GaussianMLPPolicy/dLoss 0.000220821 +Iteration 205 +MetaTest/Average/AverageDiscountedReturn -32.2745 +MetaTest/Average/AverageReturn -32.2745 +MetaTest/Average/Iteration 205 +MetaTest/Average/MaxReturn -23.6282 +MetaTest/Average/MinReturn -47.5625 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.45766 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.2745 +MetaTest/__unnamed_task__/AverageReturn -32.2745 +MetaTest/__unnamed_task__/Iteration 205 +MetaTest/__unnamed_task__/MaxReturn -23.6282 +MetaTest/__unnamed_task__/MinReturn -47.5625 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.45766 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.592e+06 +__unnamed_task__/AverageDiscountedReturn -14.7727 +__unnamed_task__/AverageReturn -31.0924 +__unnamed_task__/Iteration 205 +__unnamed_task__/MaxReturn 0.444659 +__unnamed_task__/MinReturn -53.4238 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.496 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 17:21:04 | [maml_trainer] epoch #206 | Sampling for adapation and meta-testing... +2025-04-03 17:22:36 | [maml_trainer] epoch #206 | Finished meta-testing... +2025-04-03 17:22:36 | [maml_trainer] epoch #206 | Saving snapshot... +2025-04-03 17:22:59 | [maml_trainer] epoch #206 | Saved +2025-04-03 17:22:59 | [maml_trainer] epoch #206 | Time 99001.93 s +2025-04-03 17:22:59 | [maml_trainer] epoch #206 | EpochTime 490.02 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.975 +Average/AverageReturn -30.1395 +Average/Iteration 206 +Average/MaxReturn 64.2574 +Average/MinReturn -58.73 +Average/NumEpisodes 80 +Average/StdReturn 16.65 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87715 +GaussianMLPPolicy/KLAfter 0.00498149 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.55355e-05 +GaussianMLPPolicy/LossBefore -1.78814e-09 +GaussianMLPPolicy/dLoss -9.55373e-05 +Iteration 206 +MetaTest/Average/AverageDiscountedReturn -28.9967 +MetaTest/Average/AverageReturn -28.9967 +MetaTest/Average/Iteration 206 +MetaTest/Average/MaxReturn 11.3353 +MetaTest/Average/MinReturn -47.8211 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.7714 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -28.9967 +MetaTest/__unnamed_task__/AverageReturn -28.9967 +MetaTest/__unnamed_task__/Iteration 206 +MetaTest/__unnamed_task__/MaxReturn 11.3353 +MetaTest/__unnamed_task__/MinReturn -47.8211 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.7714 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.624e+06 +__unnamed_task__/AverageDiscountedReturn -14.975 +__unnamed_task__/AverageReturn -30.1395 +__unnamed_task__/Iteration 206 +__unnamed_task__/MaxReturn 64.2574 +__unnamed_task__/MinReturn -58.73 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.65 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 17:29:11 | [maml_trainer] epoch #207 | Sampling for adapation and meta-testing... +2025-04-03 17:30:45 | [maml_trainer] epoch #207 | Finished meta-testing... +2025-04-03 17:30:45 | [maml_trainer] epoch #207 | Saving snapshot... +2025-04-03 17:31:09 | [maml_trainer] epoch #207 | Saved +2025-04-03 17:31:09 | [maml_trainer] epoch #207 | Time 99491.24 s +2025-04-03 17:31:09 | [maml_trainer] epoch #207 | EpochTime 489.31 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2194 +Average/AverageReturn -33.5589 +Average/Iteration 207 +Average/MaxReturn -1.46038 +Average/MinReturn -59.1115 +Average/NumEpisodes 80 +Average/StdReturn 10.1644 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87422 +GaussianMLPPolicy/KLAfter 0.00432361 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.98806e-06 +GaussianMLPPolicy/LossBefore -4.88758e-09 +GaussianMLPPolicy/dLoss 5.98317e-06 +Iteration 207 +MetaTest/Average/AverageDiscountedReturn -29.9926 +MetaTest/Average/AverageReturn -29.9926 +MetaTest/Average/Iteration 207 +MetaTest/Average/MaxReturn -5.67549 +MetaTest/Average/MinReturn -41.9314 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.73321 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.9926 +MetaTest/__unnamed_task__/AverageReturn -29.9926 +MetaTest/__unnamed_task__/Iteration 207 +MetaTest/__unnamed_task__/MaxReturn -5.67549 +MetaTest/__unnamed_task__/MinReturn -41.9314 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.73321 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.656e+06 +__unnamed_task__/AverageDiscountedReturn -16.2194 +__unnamed_task__/AverageReturn -33.5589 +__unnamed_task__/Iteration 207 +__unnamed_task__/MaxReturn -1.46038 +__unnamed_task__/MinReturn -59.1115 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1644 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 17:37:23 | [maml_trainer] epoch #208 | Sampling for adapation and meta-testing... +2025-04-03 17:38:56 | [maml_trainer] epoch #208 | Finished meta-testing... +2025-04-03 17:38:56 | [maml_trainer] epoch #208 | Saving snapshot... +2025-04-03 17:39:20 | [maml_trainer] epoch #208 | Saved +2025-04-03 17:39:20 | [maml_trainer] epoch #208 | Time 99982.85 s +2025-04-03 17:39:20 | [maml_trainer] epoch #208 | EpochTime 491.61 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4052 +Average/AverageReturn -31.3407 +Average/Iteration 208 +Average/MaxReturn 25.8066 +Average/MinReturn -51.6447 +Average/NumEpisodes 80 +Average/StdReturn 11.3272 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87198 +GaussianMLPPolicy/KLAfter 0.00254273 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.50132e-05 +GaussianMLPPolicy/LossBefore -1.65701e-08 +GaussianMLPPolicy/dLoss 4.49966e-05 +Iteration 208 +MetaTest/Average/AverageDiscountedReturn -30.434 +MetaTest/Average/AverageReturn -30.434 +MetaTest/Average/Iteration 208 +MetaTest/Average/MaxReturn -4.07474 +MetaTest/Average/MinReturn -49.942 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.8934 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.434 +MetaTest/__unnamed_task__/AverageReturn -30.434 +MetaTest/__unnamed_task__/Iteration 208 +MetaTest/__unnamed_task__/MaxReturn -4.07474 +MetaTest/__unnamed_task__/MinReturn -49.942 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.8934 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.688e+06 +__unnamed_task__/AverageDiscountedReturn -15.4052 +__unnamed_task__/AverageReturn -31.3407 +__unnamed_task__/Iteration 208 +__unnamed_task__/MaxReturn 25.8066 +__unnamed_task__/MinReturn -51.6447 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.3272 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 17:45:31 | [maml_trainer] epoch #209 | Sampling for adapation and meta-testing... +2025-04-03 17:47:04 | [maml_trainer] epoch #209 | Finished meta-testing... +2025-04-03 17:47:04 | [maml_trainer] epoch #209 | Saving snapshot... +2025-04-03 17:47:27 | [maml_trainer] epoch #209 | Saved +2025-04-03 17:47:27 | [maml_trainer] epoch #209 | Time 100469.35 s +2025-04-03 17:47:27 | [maml_trainer] epoch #209 | EpochTime 486.50 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3065 +Average/AverageReturn -32.7683 +Average/Iteration 209 +Average/MaxReturn 46.3412 +Average/MinReturn -54.4511 +Average/NumEpisodes 80 +Average/StdReturn 12.3265 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87103 +GaussianMLPPolicy/KLAfter 0.0015756 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.2516e-05 +GaussianMLPPolicy/LossBefore 8.55327e-09 +GaussianMLPPolicy/dLoss 2.25246e-05 +Iteration 209 +MetaTest/Average/AverageDiscountedReturn -31.1033 +MetaTest/Average/AverageReturn -31.1033 +MetaTest/Average/Iteration 209 +MetaTest/Average/MaxReturn -22.278 +MetaTest/Average/MinReturn -42.6544 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.14617 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.1033 +MetaTest/__unnamed_task__/AverageReturn -31.1033 +MetaTest/__unnamed_task__/Iteration 209 +MetaTest/__unnamed_task__/MaxReturn -22.278 +MetaTest/__unnamed_task__/MinReturn -42.6544 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.14617 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.72e+06 +__unnamed_task__/AverageDiscountedReturn -16.3065 +__unnamed_task__/AverageReturn -32.7683 +__unnamed_task__/Iteration 209 +__unnamed_task__/MaxReturn 46.3412 +__unnamed_task__/MinReturn -54.4511 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.3265 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 17:53:36 | [maml_trainer] epoch #210 | Sampling for adapation and meta-testing... +2025-04-03 17:55:07 | [maml_trainer] epoch #210 | Finished meta-testing... +2025-04-03 17:55:07 | [maml_trainer] epoch #210 | Saving snapshot... +2025-04-03 17:55:32 | [maml_trainer] epoch #210 | Saved +2025-04-03 17:55:32 | [maml_trainer] epoch #210 | Time 100954.83 s +2025-04-03 17:55:32 | [maml_trainer] epoch #210 | EpochTime 485.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.0636 +Average/AverageReturn -29.6542 +Average/Iteration 210 +Average/MaxReturn 14.2169 +Average/MinReturn -53.3251 +Average/NumEpisodes 80 +Average/StdReturn 12.5488 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87134 +GaussianMLPPolicy/KLAfter 0.00355319 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.10268e-05 +GaussianMLPPolicy/LossBefore 6.88434e-09 +GaussianMLPPolicy/dLoss 3.10337e-05 +Iteration 210 +MetaTest/Average/AverageDiscountedReturn -27.9051 +MetaTest/Average/AverageReturn -27.9051 +MetaTest/Average/Iteration 210 +MetaTest/Average/MaxReturn 18.1526 +MetaTest/Average/MinReturn -48.7511 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.0317 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -27.9051 +MetaTest/__unnamed_task__/AverageReturn -27.9051 +MetaTest/__unnamed_task__/Iteration 210 +MetaTest/__unnamed_task__/MaxReturn 18.1526 +MetaTest/__unnamed_task__/MinReturn -48.7511 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.0317 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.752e+06 +__unnamed_task__/AverageDiscountedReturn -15.0636 +__unnamed_task__/AverageReturn -29.6542 +__unnamed_task__/Iteration 210 +__unnamed_task__/MaxReturn 14.2169 +__unnamed_task__/MinReturn -53.3251 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.5488 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:01:43 | [maml_trainer] epoch #211 | Sampling for adapation and meta-testing... +2025-04-03 18:03:15 | [maml_trainer] epoch #211 | Finished meta-testing... +2025-04-03 18:03:15 | [maml_trainer] epoch #211 | Saving snapshot... +2025-04-03 18:03:39 | [maml_trainer] epoch #211 | Saved +2025-04-03 18:03:39 | [maml_trainer] epoch #211 | Time 101441.17 s +2025-04-03 18:03:39 | [maml_trainer] epoch #211 | EpochTime 486.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4061 +Average/AverageReturn -30.7068 +Average/Iteration 211 +Average/MaxReturn -0.727871 +Average/MinReturn -49.0511 +Average/NumEpisodes 80 +Average/StdReturn 8.34618 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87212 +GaussianMLPPolicy/KLAfter 0.00242017 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000144649 +GaussianMLPPolicy/LossBefore 1.97887e-08 +GaussianMLPPolicy/dLoss -0.000144629 +Iteration 211 +MetaTest/Average/AverageDiscountedReturn -32.1945 +MetaTest/Average/AverageReturn -32.1945 +MetaTest/Average/Iteration 211 +MetaTest/Average/MaxReturn -12.9996 +MetaTest/Average/MinReturn -51.7684 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.6822 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.1945 +MetaTest/__unnamed_task__/AverageReturn -32.1945 +MetaTest/__unnamed_task__/Iteration 211 +MetaTest/__unnamed_task__/MaxReturn -12.9996 +MetaTest/__unnamed_task__/MinReturn -51.7684 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.6822 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.784e+06 +__unnamed_task__/AverageDiscountedReturn -15.4061 +__unnamed_task__/AverageReturn -30.7068 +__unnamed_task__/Iteration 211 +__unnamed_task__/MaxReturn -0.727871 +__unnamed_task__/MinReturn -49.0511 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.34618 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:09:49 | [maml_trainer] epoch #212 | Sampling for adapation and meta-testing... +2025-04-03 18:11:22 | [maml_trainer] epoch #212 | Finished meta-testing... +2025-04-03 18:11:22 | [maml_trainer] epoch #212 | Saving snapshot... +2025-04-03 18:11:45 | [maml_trainer] epoch #212 | Saved +2025-04-03 18:11:45 | [maml_trainer] epoch #212 | Time 101927.16 s +2025-04-03 18:11:45 | [maml_trainer] epoch #212 | EpochTime 485.99 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.526 +Average/AverageReturn -29.5662 +Average/Iteration 212 +Average/MaxReturn 18.0941 +Average/MinReturn -57.5752 +Average/NumEpisodes 80 +Average/StdReturn 9.52227 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87248 +GaussianMLPPolicy/KLAfter 0.00267848 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.46363e-05 +GaussianMLPPolicy/LossBefore 2.38419e-09 +GaussianMLPPolicy/dLoss 3.46387e-05 +Iteration 212 +MetaTest/Average/AverageDiscountedReturn -27.8975 +MetaTest/Average/AverageReturn -27.8975 +MetaTest/Average/Iteration 212 +MetaTest/Average/MaxReturn -4.45983 +MetaTest/Average/MinReturn -36.5199 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.55869 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -27.8975 +MetaTest/__unnamed_task__/AverageReturn -27.8975 +MetaTest/__unnamed_task__/Iteration 212 +MetaTest/__unnamed_task__/MaxReturn -4.45983 +MetaTest/__unnamed_task__/MinReturn -36.5199 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.55869 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.816e+06 +__unnamed_task__/AverageDiscountedReturn -14.526 +__unnamed_task__/AverageReturn -29.5662 +__unnamed_task__/Iteration 212 +__unnamed_task__/MaxReturn 18.0941 +__unnamed_task__/MinReturn -57.5752 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.52227 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:17:54 | [maml_trainer] epoch #213 | Sampling for adapation and meta-testing... +2025-04-03 18:19:28 | [maml_trainer] epoch #213 | Finished meta-testing... +2025-04-03 18:19:28 | [maml_trainer] epoch #213 | Saving snapshot... +2025-04-03 18:19:52 | [maml_trainer] epoch #213 | Saved +2025-04-03 18:19:52 | [maml_trainer] epoch #213 | Time 102414.43 s +2025-04-03 18:19:52 | [maml_trainer] epoch #213 | EpochTime 487.26 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3085 +Average/AverageReturn -31.4056 +Average/Iteration 213 +Average/MaxReturn 8.16902 +Average/MinReturn -53.8946 +Average/NumEpisodes 80 +Average/StdReturn 8.6288 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87307 +GaussianMLPPolicy/KLAfter 0.00552239 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000125195 +GaussianMLPPolicy/LossBefore -1.13249e-09 +GaussianMLPPolicy/dLoss -0.000125196 +Iteration 213 +MetaTest/Average/AverageDiscountedReturn -29.2875 +MetaTest/Average/AverageReturn -29.2875 +MetaTest/Average/Iteration 213 +MetaTest/Average/MaxReturn 14.464 +MetaTest/Average/MinReturn -45.408 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.9909 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.2875 +MetaTest/__unnamed_task__/AverageReturn -29.2875 +MetaTest/__unnamed_task__/Iteration 213 +MetaTest/__unnamed_task__/MaxReturn 14.464 +MetaTest/__unnamed_task__/MinReturn -45.408 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.9909 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.848e+06 +__unnamed_task__/AverageDiscountedReturn -15.3085 +__unnamed_task__/AverageReturn -31.4056 +__unnamed_task__/Iteration 213 +__unnamed_task__/MaxReturn 8.16902 +__unnamed_task__/MinReturn -53.8946 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.6288 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:26:04 | [maml_trainer] epoch #214 | Sampling for adapation and meta-testing... +2025-04-03 18:27:36 | [maml_trainer] epoch #214 | Finished meta-testing... +2025-04-03 18:27:36 | [maml_trainer] epoch #214 | Saving snapshot... +2025-04-03 18:28:00 | [maml_trainer] epoch #214 | Saved +2025-04-03 18:28:00 | [maml_trainer] epoch #214 | Time 102902.72 s +2025-04-03 18:28:00 | [maml_trainer] epoch #214 | EpochTime 488.29 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.5053 +Average/AverageReturn -31.5563 +Average/Iteration 214 +Average/MaxReturn 9.38562 +Average/MinReturn -55.8861 +Average/NumEpisodes 80 +Average/StdReturn 9.02206 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87268 +GaussianMLPPolicy/KLAfter 0.0064025 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.72955e-05 +GaussianMLPPolicy/LossBefore -1.01328e-09 +GaussianMLPPolicy/dLoss -7.72965e-05 +Iteration 214 +MetaTest/Average/AverageDiscountedReturn -30.0876 +MetaTest/Average/AverageReturn -30.0876 +MetaTest/Average/Iteration 214 +MetaTest/Average/MaxReturn 5.37967 +MetaTest/Average/MinReturn -43.9203 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.1033 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.0876 +MetaTest/__unnamed_task__/AverageReturn -30.0876 +MetaTest/__unnamed_task__/Iteration 214 +MetaTest/__unnamed_task__/MaxReturn 5.37967 +MetaTest/__unnamed_task__/MinReturn -43.9203 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.1033 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.88e+06 +__unnamed_task__/AverageDiscountedReturn -15.5053 +__unnamed_task__/AverageReturn -31.5563 +__unnamed_task__/Iteration 214 +__unnamed_task__/MaxReturn 9.38562 +__unnamed_task__/MinReturn -55.8861 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.02206 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:34:12 | [maml_trainer] epoch #215 | Sampling for adapation and meta-testing... +2025-04-03 18:35:45 | [maml_trainer] epoch #215 | Finished meta-testing... +2025-04-03 18:35:45 | [maml_trainer] epoch #215 | Saving snapshot... +2025-04-03 18:36:07 | [maml_trainer] epoch #215 | Saved +2025-04-03 18:36:07 | [maml_trainer] epoch #215 | Time 103389.84 s +2025-04-03 18:36:07 | [maml_trainer] epoch #215 | EpochTime 487.11 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2718 +Average/AverageReturn -31.2047 +Average/Iteration 215 +Average/MaxReturn -11.508 +Average/MinReturn -46.9995 +Average/NumEpisodes 80 +Average/StdReturn 6.87953 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87167 +GaussianMLPPolicy/KLAfter 0.00393195 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000110943 +GaussianMLPPolicy/LossBefore 2.56896e-08 +GaussianMLPPolicy/dLoss 0.000110968 +Iteration 215 +MetaTest/Average/AverageDiscountedReturn -31.2913 +MetaTest/Average/AverageReturn -31.2913 +MetaTest/Average/Iteration 215 +MetaTest/Average/MaxReturn -21.0594 +MetaTest/Average/MinReturn -46.5084 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.56684 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.2913 +MetaTest/__unnamed_task__/AverageReturn -31.2913 +MetaTest/__unnamed_task__/Iteration 215 +MetaTest/__unnamed_task__/MaxReturn -21.0594 +MetaTest/__unnamed_task__/MinReturn -46.5084 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.56684 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.912e+06 +__unnamed_task__/AverageDiscountedReturn -15.2718 +__unnamed_task__/AverageReturn -31.2047 +__unnamed_task__/Iteration 215 +__unnamed_task__/MaxReturn -11.508 +__unnamed_task__/MinReturn -46.9995 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.87953 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:42:16 | [maml_trainer] epoch #216 | Sampling for adapation and meta-testing... +2025-04-03 18:43:47 | [maml_trainer] epoch #216 | Finished meta-testing... +2025-04-03 18:43:47 | [maml_trainer] epoch #216 | Saving snapshot... +2025-04-03 18:44:12 | [maml_trainer] epoch #216 | Saved +2025-04-03 18:44:12 | [maml_trainer] epoch #216 | Time 103874.47 s +2025-04-03 18:44:12 | [maml_trainer] epoch #216 | EpochTime 484.63 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3823 +Average/AverageReturn -32.866 +Average/Iteration 216 +Average/MaxReturn -4.06642 +Average/MinReturn -53.5694 +Average/NumEpisodes 80 +Average/StdReturn 9.73017 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87206 +GaussianMLPPolicy/KLAfter 0.00818862 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000122501 +GaussianMLPPolicy/LossBefore -7.39098e-09 +GaussianMLPPolicy/dLoss 0.000122493 +Iteration 216 +MetaTest/Average/AverageDiscountedReturn -31.2395 +MetaTest/Average/AverageReturn -31.2395 +MetaTest/Average/Iteration 216 +MetaTest/Average/MaxReturn 45.029 +MetaTest/Average/MinReturn -54.9389 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.8014 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.2395 +MetaTest/__unnamed_task__/AverageReturn -31.2395 +MetaTest/__unnamed_task__/Iteration 216 +MetaTest/__unnamed_task__/MaxReturn 45.029 +MetaTest/__unnamed_task__/MinReturn -54.9389 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.8014 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.944e+06 +__unnamed_task__/AverageDiscountedReturn -16.3823 +__unnamed_task__/AverageReturn -32.866 +__unnamed_task__/Iteration 216 +__unnamed_task__/MaxReturn -4.06642 +__unnamed_task__/MinReturn -53.5694 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.73017 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:50:21 | [maml_trainer] epoch #217 | Sampling for adapation and meta-testing... +2025-04-03 18:51:52 | [maml_trainer] epoch #217 | Finished meta-testing... +2025-04-03 18:51:52 | [maml_trainer] epoch #217 | Saving snapshot... +2025-04-03 18:52:17 | [maml_trainer] epoch #217 | Saved +2025-04-03 18:52:17 | [maml_trainer] epoch #217 | Time 104359.11 s +2025-04-03 18:52:17 | [maml_trainer] epoch #217 | EpochTime 484.64 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4008 +Average/AverageReturn -32.5021 +Average/Iteration 217 +Average/MaxReturn -12.2909 +Average/MinReturn -55.883 +Average/NumEpisodes 80 +Average/StdReturn 8.13841 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87216 +GaussianMLPPolicy/KLAfter 0.00711212 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000472457 +GaussianMLPPolicy/LossBefore -1.69873e-08 +GaussianMLPPolicy/dLoss 0.00047244 +Iteration 217 +MetaTest/Average/AverageDiscountedReturn -27.0328 +MetaTest/Average/AverageReturn -27.0328 +MetaTest/Average/Iteration 217 +MetaTest/Average/MaxReturn 12.0509 +MetaTest/Average/MinReturn -43.9717 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.3475 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -27.0328 +MetaTest/__unnamed_task__/AverageReturn -27.0328 +MetaTest/__unnamed_task__/Iteration 217 +MetaTest/__unnamed_task__/MaxReturn 12.0509 +MetaTest/__unnamed_task__/MinReturn -43.9717 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.3475 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.976e+06 +__unnamed_task__/AverageDiscountedReturn -16.4008 +__unnamed_task__/AverageReturn -32.5021 +__unnamed_task__/Iteration 217 +__unnamed_task__/MaxReturn -12.2909 +__unnamed_task__/MinReturn -55.883 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.13841 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 18:58:29 | [maml_trainer] epoch #218 | Sampling for adapation and meta-testing... +2025-04-03 19:00:00 | [maml_trainer] epoch #218 | Finished meta-testing... +2025-04-03 19:00:00 | [maml_trainer] epoch #218 | Saving snapshot... +2025-04-03 19:00:23 | [maml_trainer] epoch #218 | Saved +2025-04-03 19:00:23 | [maml_trainer] epoch #218 | Time 104845.78 s +2025-04-03 19:00:23 | [maml_trainer] epoch #218 | EpochTime 486.67 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.046 +Average/AverageReturn -30.3646 +Average/Iteration 218 +Average/MaxReturn 15.1631 +Average/MinReturn -54.1575 +Average/NumEpisodes 80 +Average/StdReturn 11.1488 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87298 +GaussianMLPPolicy/KLAfter 0.00964648 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000186043 +GaussianMLPPolicy/LossBefore -1.86861e-08 +GaussianMLPPolicy/dLoss -0.000186062 +Iteration 218 +MetaTest/Average/AverageDiscountedReturn -31.0874 +MetaTest/Average/AverageReturn -31.0874 +MetaTest/Average/Iteration 218 +MetaTest/Average/MaxReturn -10.7102 +MetaTest/Average/MinReturn -45.9815 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.99333 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.0874 +MetaTest/__unnamed_task__/AverageReturn -31.0874 +MetaTest/__unnamed_task__/Iteration 218 +MetaTest/__unnamed_task__/MaxReturn -10.7102 +MetaTest/__unnamed_task__/MinReturn -45.9815 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.99333 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.008e+06 +__unnamed_task__/AverageDiscountedReturn -16.046 +__unnamed_task__/AverageReturn -30.3646 +__unnamed_task__/Iteration 218 +__unnamed_task__/MaxReturn 15.1631 +__unnamed_task__/MinReturn -54.1575 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.1488 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 19:06:29 | [maml_trainer] epoch #219 | Sampling for adapation and meta-testing... +2025-04-03 19:07:57 | [maml_trainer] epoch #219 | Finished meta-testing... +2025-04-03 19:07:57 | [maml_trainer] epoch #219 | Saving snapshot... +2025-04-03 19:08:21 | [maml_trainer] epoch #219 | Saved +2025-04-03 19:08:21 | [maml_trainer] epoch #219 | Time 105323.02 s +2025-04-03 19:08:21 | [maml_trainer] epoch #219 | EpochTime 477.24 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.632 +Average/AverageReturn -30.1553 +Average/Iteration 219 +Average/MaxReturn 10.3888 +Average/MinReturn -51.0433 +Average/NumEpisodes 80 +Average/StdReturn 10.021 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87339 +GaussianMLPPolicy/KLAfter 0.00939891 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000253738 +GaussianMLPPolicy/LossBefore 2.98023e-09 +GaussianMLPPolicy/dLoss -0.000253735 +Iteration 219 +MetaTest/Average/AverageDiscountedReturn -31.0297 +MetaTest/Average/AverageReturn -31.0297 +MetaTest/Average/Iteration 219 +MetaTest/Average/MaxReturn -14.8554 +MetaTest/Average/MinReturn -45.3497 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.238 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.0297 +MetaTest/__unnamed_task__/AverageReturn -31.0297 +MetaTest/__unnamed_task__/Iteration 219 +MetaTest/__unnamed_task__/MaxReturn -14.8554 +MetaTest/__unnamed_task__/MinReturn -45.3497 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.238 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.04e+06 +__unnamed_task__/AverageDiscountedReturn -15.632 +__unnamed_task__/AverageReturn -30.1553 +__unnamed_task__/Iteration 219 +__unnamed_task__/MaxReturn 10.3888 +__unnamed_task__/MinReturn -51.0433 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.021 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 19:14:06 | [maml_trainer] epoch #220 | Sampling for adapation and meta-testing... +2025-04-03 19:15:32 | [maml_trainer] epoch #220 | Finished meta-testing... +2025-04-03 19:15:32 | [maml_trainer] epoch #220 | Saving snapshot... +2025-04-03 19:15:55 | [maml_trainer] epoch #220 | Saved +2025-04-03 19:15:55 | [maml_trainer] epoch #220 | Time 105777.40 s +2025-04-03 19:15:55 | [maml_trainer] epoch #220 | EpochTime 454.37 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5428 +Average/AverageReturn -32.6075 +Average/Iteration 220 +Average/MaxReturn 11.4338 +Average/MinReturn -61.7577 +Average/NumEpisodes 80 +Average/StdReturn 12.3961 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87398 +GaussianMLPPolicy/KLAfter 0.00809536 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.17913e-05 +GaussianMLPPolicy/LossBefore -1.06692e-08 +GaussianMLPPolicy/dLoss -4.1802e-05 +Iteration 220 +MetaTest/Average/AverageDiscountedReturn -35.7866 +MetaTest/Average/AverageReturn -35.7866 +MetaTest/Average/Iteration 220 +MetaTest/Average/MaxReturn -22.3814 +MetaTest/Average/MinReturn -67.6302 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.1545 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.7866 +MetaTest/__unnamed_task__/AverageReturn -35.7866 +MetaTest/__unnamed_task__/Iteration 220 +MetaTest/__unnamed_task__/MaxReturn -22.3814 +MetaTest/__unnamed_task__/MinReturn -67.6302 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.1545 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.072e+06 +__unnamed_task__/AverageDiscountedReturn -16.5428 +__unnamed_task__/AverageReturn -32.6075 +__unnamed_task__/Iteration 220 +__unnamed_task__/MaxReturn 11.4338 +__unnamed_task__/MinReturn -61.7577 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.3961 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 19:21:42 | [maml_trainer] epoch #221 | Sampling for adapation and meta-testing... +2025-04-03 19:23:09 | [maml_trainer] epoch #221 | Finished meta-testing... +2025-04-03 19:23:09 | [maml_trainer] epoch #221 | Saving snapshot... +2025-04-03 19:23:31 | [maml_trainer] epoch #221 | Saved +2025-04-03 19:23:31 | [maml_trainer] epoch #221 | Time 106233.87 s +2025-04-03 19:23:31 | [maml_trainer] epoch #221 | EpochTime 456.47 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.1557 +Average/AverageReturn -31.7935 +Average/Iteration 221 +Average/MaxReturn 18.6632 +Average/MinReturn -58.7293 +Average/NumEpisodes 80 +Average/StdReturn 11.4792 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87415 +GaussianMLPPolicy/KLAfter 0.00584499 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.35772e-05 +GaussianMLPPolicy/LossBefore -1.77622e-08 +GaussianMLPPolicy/dLoss 3.35595e-05 +Iteration 221 +MetaTest/Average/AverageDiscountedReturn -35.9393 +MetaTest/Average/AverageReturn -35.9393 +MetaTest/Average/Iteration 221 +MetaTest/Average/MaxReturn -24.5374 +MetaTest/Average/MinReturn -58.3235 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.60868 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.9393 +MetaTest/__unnamed_task__/AverageReturn -35.9393 +MetaTest/__unnamed_task__/Iteration 221 +MetaTest/__unnamed_task__/MaxReturn -24.5374 +MetaTest/__unnamed_task__/MinReturn -58.3235 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.60868 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.104e+06 +__unnamed_task__/AverageDiscountedReturn -16.1557 +__unnamed_task__/AverageReturn -31.7935 +__unnamed_task__/Iteration 221 +__unnamed_task__/MaxReturn 18.6632 +__unnamed_task__/MinReturn -58.7293 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4792 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 19:29:23 | [maml_trainer] epoch #222 | Sampling for adapation and meta-testing... +2025-04-03 19:30:48 | [maml_trainer] epoch #222 | Finished meta-testing... +2025-04-03 19:30:48 | [maml_trainer] epoch #222 | Saving snapshot... +2025-04-03 19:31:11 | [maml_trainer] epoch #222 | Saved +2025-04-03 19:31:11 | [maml_trainer] epoch #222 | Time 106693.95 s +2025-04-03 19:31:11 | [maml_trainer] epoch #222 | EpochTime 460.08 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9585 +Average/AverageReturn -34.3249 +Average/Iteration 222 +Average/MaxReturn -1.4804 +Average/MinReturn -65.5084 +Average/NumEpisodes 80 +Average/StdReturn 10.1691 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87436 +GaussianMLPPolicy/KLAfter 0.00518719 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.37194e-05 +GaussianMLPPolicy/LossBefore 4.17233e-10 +GaussianMLPPolicy/dLoss 2.37198e-05 +Iteration 222 +MetaTest/Average/AverageDiscountedReturn -34.4667 +MetaTest/Average/AverageReturn -34.4667 +MetaTest/Average/Iteration 222 +MetaTest/Average/MaxReturn -2.8891 +MetaTest/Average/MinReturn -61.3189 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.5454 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.4667 +MetaTest/__unnamed_task__/AverageReturn -34.4667 +MetaTest/__unnamed_task__/Iteration 222 +MetaTest/__unnamed_task__/MaxReturn -2.8891 +MetaTest/__unnamed_task__/MinReturn -61.3189 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.5454 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.136e+06 +__unnamed_task__/AverageDiscountedReturn -16.9585 +__unnamed_task__/AverageReturn -34.3249 +__unnamed_task__/Iteration 222 +__unnamed_task__/MaxReturn -1.4804 +__unnamed_task__/MinReturn -65.5084 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1691 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 19:36:54 | [maml_trainer] epoch #223 | Sampling for adapation and meta-testing... +2025-04-03 19:38:20 | [maml_trainer] epoch #223 | Finished meta-testing... +2025-04-03 19:38:20 | [maml_trainer] epoch #223 | Saving snapshot... +2025-04-03 19:38:43 | [maml_trainer] epoch #223 | Saved +2025-04-03 19:38:43 | [maml_trainer] epoch #223 | Time 107145.41 s +2025-04-03 19:38:43 | [maml_trainer] epoch #223 | EpochTime 451.46 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.8222 +Average/AverageReturn -35.9921 +Average/Iteration 223 +Average/MaxReturn -1.98077 +Average/MinReturn -68.8502 +Average/NumEpisodes 80 +Average/StdReturn 10.3495 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87443 +GaussianMLPPolicy/KLAfter 0.00622569 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.38318e-05 +GaussianMLPPolicy/LossBefore -6.55651e-09 +GaussianMLPPolicy/dLoss 7.38253e-05 +Iteration 223 +MetaTest/Average/AverageDiscountedReturn -38.861 +MetaTest/Average/AverageReturn -38.861 +MetaTest/Average/Iteration 223 +MetaTest/Average/MaxReturn -23.5075 +MetaTest/Average/MinReturn -59.3318 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.70906 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -38.861 +MetaTest/__unnamed_task__/AverageReturn -38.861 +MetaTest/__unnamed_task__/Iteration 223 +MetaTest/__unnamed_task__/MaxReturn -23.5075 +MetaTest/__unnamed_task__/MinReturn -59.3318 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.70906 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.168e+06 +__unnamed_task__/AverageDiscountedReturn -17.8222 +__unnamed_task__/AverageReturn -35.9921 +__unnamed_task__/Iteration 223 +__unnamed_task__/MaxReturn -1.98077 +__unnamed_task__/MinReturn -68.8502 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.3495 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 19:44:44 | [maml_trainer] epoch #224 | Sampling for adapation and meta-testing... +2025-04-03 19:46:16 | [maml_trainer] epoch #224 | Finished meta-testing... +2025-04-03 19:46:16 | [maml_trainer] epoch #224 | Saving snapshot... +2025-04-03 19:46:39 | [maml_trainer] epoch #224 | Saved +2025-04-03 19:46:39 | [maml_trainer] epoch #224 | Time 107621.72 s +2025-04-03 19:46:39 | [maml_trainer] epoch #224 | EpochTime 476.30 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.601 +Average/AverageReturn -34.9604 +Average/Iteration 224 +Average/MaxReturn -3.96904 +Average/MinReturn -73.0513 +Average/NumEpisodes 80 +Average/StdReturn 10.769 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8746 +GaussianMLPPolicy/KLAfter 0.00399106 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000126438 +GaussianMLPPolicy/LossBefore 1.34707e-08 +GaussianMLPPolicy/dLoss 0.000126452 +Iteration 224 +MetaTest/Average/AverageDiscountedReturn -32.9786 +MetaTest/Average/AverageReturn -32.9786 +MetaTest/Average/Iteration 224 +MetaTest/Average/MaxReturn -8.63131 +MetaTest/Average/MinReturn -54.152 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.4776 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.9786 +MetaTest/__unnamed_task__/AverageReturn -32.9786 +MetaTest/__unnamed_task__/Iteration 224 +MetaTest/__unnamed_task__/MaxReturn -8.63131 +MetaTest/__unnamed_task__/MinReturn -54.152 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.4776 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.2e+06 +__unnamed_task__/AverageDiscountedReturn -17.601 +__unnamed_task__/AverageReturn -34.9604 +__unnamed_task__/Iteration 224 +__unnamed_task__/MaxReturn -3.96904 +__unnamed_task__/MinReturn -73.0513 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.769 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 19:52:49 | [maml_trainer] epoch #225 | Sampling for adapation and meta-testing... +2025-04-03 19:54:21 | [maml_trainer] epoch #225 | Finished meta-testing... +2025-04-03 19:54:21 | [maml_trainer] epoch #225 | Saving snapshot... +2025-04-03 19:54:47 | [maml_trainer] epoch #225 | Saved +2025-04-03 19:54:47 | [maml_trainer] epoch #225 | Time 108109.39 s +2025-04-03 19:54:47 | [maml_trainer] epoch #225 | EpochTime 487.67 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.336 +Average/AverageReturn -34.5331 +Average/Iteration 225 +Average/MaxReturn -17.627 +Average/MinReturn -57.3649 +Average/NumEpisodes 80 +Average/StdReturn 8.17941 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87443 +GaussianMLPPolicy/KLAfter 0.00283235 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000117792 +GaussianMLPPolicy/LossBefore 1.03116e-08 +GaussianMLPPolicy/dLoss -0.000117781 +Iteration 225 +MetaTest/Average/AverageDiscountedReturn -33.1252 +MetaTest/Average/AverageReturn -33.1252 +MetaTest/Average/Iteration 225 +MetaTest/Average/MaxReturn 36.0742 +MetaTest/Average/MinReturn -58.3462 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5639 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.1252 +MetaTest/__unnamed_task__/AverageReturn -33.1252 +MetaTest/__unnamed_task__/Iteration 225 +MetaTest/__unnamed_task__/MaxReturn 36.0742 +MetaTest/__unnamed_task__/MinReturn -58.3462 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5639 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.232e+06 +__unnamed_task__/AverageDiscountedReturn -17.336 +__unnamed_task__/AverageReturn -34.5331 +__unnamed_task__/Iteration 225 +__unnamed_task__/MaxReturn -17.627 +__unnamed_task__/MinReturn -57.3649 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.17941 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:01:13 | [maml_trainer] epoch #226 | Sampling for adapation and meta-testing... +2025-04-03 20:02:45 | [maml_trainer] epoch #226 | Finished meta-testing... +2025-04-03 20:02:45 | [maml_trainer] epoch #226 | Saving snapshot... +2025-04-03 20:03:09 | [maml_trainer] epoch #226 | Saved +2025-04-03 20:03:09 | [maml_trainer] epoch #226 | Time 108611.67 s +2025-04-03 20:03:09 | [maml_trainer] epoch #226 | EpochTime 502.28 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.2604 +Average/AverageReturn -33.9029 +Average/Iteration 226 +Average/MaxReturn 2.32998 +Average/MinReturn -60.9104 +Average/NumEpisodes 80 +Average/StdReturn 10.2617 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87546 +GaussianMLPPolicy/KLAfter 0.00119208 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.07024e-05 +GaussianMLPPolicy/LossBefore 2.90871e-08 +GaussianMLPPolicy/dLoss 2.07315e-05 +Iteration 226 +MetaTest/Average/AverageDiscountedReturn -32.1933 +MetaTest/Average/AverageReturn -32.1933 +MetaTest/Average/Iteration 226 +MetaTest/Average/MaxReturn -15.2746 +MetaTest/Average/MinReturn -51.8583 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.12732 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.1933 +MetaTest/__unnamed_task__/AverageReturn -32.1933 +MetaTest/__unnamed_task__/Iteration 226 +MetaTest/__unnamed_task__/MaxReturn -15.2746 +MetaTest/__unnamed_task__/MinReturn -51.8583 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.12732 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.264e+06 +__unnamed_task__/AverageDiscountedReturn -17.2604 +__unnamed_task__/AverageReturn -33.9029 +__unnamed_task__/Iteration 226 +__unnamed_task__/MaxReturn 2.32998 +__unnamed_task__/MinReturn -60.9104 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2617 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:09:23 | [maml_trainer] epoch #227 | Sampling for adapation and meta-testing... +2025-04-03 20:10:57 | [maml_trainer] epoch #227 | Finished meta-testing... +2025-04-03 20:10:57 | [maml_trainer] epoch #227 | Saving snapshot... +2025-04-03 20:11:22 | [maml_trainer] epoch #227 | Saved +2025-04-03 20:11:22 | [maml_trainer] epoch #227 | Time 109104.12 s +2025-04-03 20:11:22 | [maml_trainer] epoch #227 | EpochTime 492.45 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.0557 +Average/AverageReturn -35.5945 +Average/Iteration 227 +Average/MaxReturn -8.18357 +Average/MinReturn -58.747 +Average/NumEpisodes 80 +Average/StdReturn 10.1013 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87554 +GaussianMLPPolicy/KLAfter 0.00121761 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.42265e-05 +GaussianMLPPolicy/LossBefore 2.44379e-09 +GaussianMLPPolicy/dLoss 2.4229e-05 +Iteration 227 +MetaTest/Average/AverageDiscountedReturn -31.6607 +MetaTest/Average/AverageReturn -31.6607 +MetaTest/Average/Iteration 227 +MetaTest/Average/MaxReturn -16.6241 +MetaTest/Average/MinReturn -42.2725 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.0538 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.6607 +MetaTest/__unnamed_task__/AverageReturn -31.6607 +MetaTest/__unnamed_task__/Iteration 227 +MetaTest/__unnamed_task__/MaxReturn -16.6241 +MetaTest/__unnamed_task__/MinReturn -42.2725 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.0538 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.296e+06 +__unnamed_task__/AverageDiscountedReturn -18.0557 +__unnamed_task__/AverageReturn -35.5945 +__unnamed_task__/Iteration 227 +__unnamed_task__/MaxReturn -8.18357 +__unnamed_task__/MinReturn -58.747 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1013 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:17:18 | [maml_trainer] epoch #228 | Sampling for adapation and meta-testing... +2025-04-03 20:19:12 | [maml_trainer] epoch #228 | Finished meta-testing... +2025-04-03 20:19:12 | [maml_trainer] epoch #228 | Saving snapshot... +2025-04-03 20:19:39 | [maml_trainer] epoch #228 | Saved +2025-04-03 20:19:39 | [maml_trainer] epoch #228 | Time 109601.27 s +2025-04-03 20:19:39 | [maml_trainer] epoch #228 | EpochTime 497.15 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5791 +Average/AverageReturn -34.6572 +Average/Iteration 228 +Average/MaxReturn 4.55914 +Average/MinReturn -62.731 +Average/NumEpisodes 80 +Average/StdReturn 11.7714 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87475 +GaussianMLPPolicy/KLAfter 0.00165685 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.70321e-05 +GaussianMLPPolicy/LossBefore 4.17232e-09 +GaussianMLPPolicy/dLoss -8.70279e-05 +Iteration 228 +MetaTest/Average/AverageDiscountedReturn -32.9679 +MetaTest/Average/AverageReturn -32.9679 +MetaTest/Average/Iteration 228 +MetaTest/Average/MaxReturn -21.167 +MetaTest/Average/MinReturn -49.4788 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.68428 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.9679 +MetaTest/__unnamed_task__/AverageReturn -32.9679 +MetaTest/__unnamed_task__/Iteration 228 +MetaTest/__unnamed_task__/MaxReturn -21.167 +MetaTest/__unnamed_task__/MinReturn -49.4788 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.68428 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.328e+06 +__unnamed_task__/AverageDiscountedReturn -17.5791 +__unnamed_task__/AverageReturn -34.6572 +__unnamed_task__/Iteration 228 +__unnamed_task__/MaxReturn 4.55914 +__unnamed_task__/MinReturn -62.731 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.7714 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:25:32 | [maml_trainer] epoch #229 | Sampling for adapation and meta-testing... +2025-04-03 20:27:00 | [maml_trainer] epoch #229 | Finished meta-testing... +2025-04-03 20:27:00 | [maml_trainer] epoch #229 | Saving snapshot... +2025-04-03 20:27:22 | [maml_trainer] epoch #229 | Saved +2025-04-03 20:27:22 | [maml_trainer] epoch #229 | Time 110064.62 s +2025-04-03 20:27:22 | [maml_trainer] epoch #229 | EpochTime 463.34 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.0713 +Average/AverageReturn -33.3429 +Average/Iteration 229 +Average/MaxReturn 18.1931 +Average/MinReturn -59.6554 +Average/NumEpisodes 80 +Average/StdReturn 11.6183 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87346 +GaussianMLPPolicy/KLAfter 0.0013369 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000108625 +GaussianMLPPolicy/LossBefore 8.9407e-10 +GaussianMLPPolicy/dLoss 0.000108626 +Iteration 229 +MetaTest/Average/AverageDiscountedReturn -33.6757 +MetaTest/Average/AverageReturn -33.6757 +MetaTest/Average/Iteration 229 +MetaTest/Average/MaxReturn -15.2992 +MetaTest/Average/MinReturn -54.1673 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.47039 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.6757 +MetaTest/__unnamed_task__/AverageReturn -33.6757 +MetaTest/__unnamed_task__/Iteration 229 +MetaTest/__unnamed_task__/MaxReturn -15.2992 +MetaTest/__unnamed_task__/MinReturn -54.1673 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.47039 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.36e+06 +__unnamed_task__/AverageDiscountedReturn -17.0713 +__unnamed_task__/AverageReturn -33.3429 +__unnamed_task__/Iteration 229 +__unnamed_task__/MaxReturn 18.1931 +__unnamed_task__/MinReturn -59.6554 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6183 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:33:16 | [maml_trainer] epoch #230 | Sampling for adapation and meta-testing... +2025-04-03 20:34:46 | [maml_trainer] epoch #230 | Finished meta-testing... +2025-04-03 20:34:46 | [maml_trainer] epoch #230 | Saving snapshot... +2025-04-03 20:35:12 | [maml_trainer] epoch #230 | Saved +2025-04-03 20:35:12 | [maml_trainer] epoch #230 | Time 110534.06 s +2025-04-03 20:35:12 | [maml_trainer] epoch #230 | EpochTime 469.44 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -18.2204 +Average/AverageReturn -36.0206 +Average/Iteration 230 +Average/MaxReturn -10.1894 +Average/MinReturn -59.8046 +Average/NumEpisodes 80 +Average/StdReturn 9.26776 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87286 +GaussianMLPPolicy/KLAfter 0.00173376 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.10444e-05 +GaussianMLPPolicy/LossBefore -7.92742e-09 +GaussianMLPPolicy/dLoss 1.10365e-05 +Iteration 230 +MetaTest/Average/AverageDiscountedReturn -31.8793 +MetaTest/Average/AverageReturn -31.8793 +MetaTest/Average/Iteration 230 +MetaTest/Average/MaxReturn -17.3862 +MetaTest/Average/MinReturn -58.4897 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.13403 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.8793 +MetaTest/__unnamed_task__/AverageReturn -31.8793 +MetaTest/__unnamed_task__/Iteration 230 +MetaTest/__unnamed_task__/MaxReturn -17.3862 +MetaTest/__unnamed_task__/MinReturn -58.4897 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.13403 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.392e+06 +__unnamed_task__/AverageDiscountedReturn -18.2204 +__unnamed_task__/AverageReturn -36.0206 +__unnamed_task__/Iteration 230 +__unnamed_task__/MaxReturn -10.1894 +__unnamed_task__/MinReturn -59.8046 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.26776 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:41:09 | [maml_trainer] epoch #231 | Sampling for adapation and meta-testing... +2025-04-03 20:42:39 | [maml_trainer] epoch #231 | Finished meta-testing... +2025-04-03 20:42:39 | [maml_trainer] epoch #231 | Saving snapshot... +2025-04-03 20:43:03 | [maml_trainer] epoch #231 | Saved +2025-04-03 20:43:03 | [maml_trainer] epoch #231 | Time 111005.18 s +2025-04-03 20:43:03 | [maml_trainer] epoch #231 | EpochTime 471.12 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.4855 +Average/AverageReturn -34.5873 +Average/Iteration 231 +Average/MaxReturn 24.9121 +Average/MinReturn -56.5397 +Average/NumEpisodes 80 +Average/StdReturn 10.2078 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87272 +GaussianMLPPolicy/KLAfter 0.0025754 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000206014 +GaussianMLPPolicy/LossBefore -1.51992e-09 +GaussianMLPPolicy/dLoss 0.000206012 +Iteration 231 +MetaTest/Average/AverageDiscountedReturn -33.2672 +MetaTest/Average/AverageReturn -33.2672 +MetaTest/Average/Iteration 231 +MetaTest/Average/MaxReturn -20.6589 +MetaTest/Average/MinReturn -50.4219 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.7574 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.2672 +MetaTest/__unnamed_task__/AverageReturn -33.2672 +MetaTest/__unnamed_task__/Iteration 231 +MetaTest/__unnamed_task__/MaxReturn -20.6589 +MetaTest/__unnamed_task__/MinReturn -50.4219 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.7574 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.424e+06 +__unnamed_task__/AverageDiscountedReturn -17.4855 +__unnamed_task__/AverageReturn -34.5873 +__unnamed_task__/Iteration 231 +__unnamed_task__/MaxReturn 24.9121 +__unnamed_task__/MinReturn -56.5397 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2078 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:49:01 | [maml_trainer] epoch #232 | Sampling for adapation and meta-testing... +2025-04-03 20:50:33 | [maml_trainer] epoch #232 | Finished meta-testing... +2025-04-03 20:50:33 | [maml_trainer] epoch #232 | Saving snapshot... +2025-04-03 20:50:56 | [maml_trainer] epoch #232 | Saved +2025-04-03 20:50:56 | [maml_trainer] epoch #232 | Time 111478.55 s +2025-04-03 20:50:56 | [maml_trainer] epoch #232 | EpochTime 473.37 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5843 +Average/AverageReturn -32.1775 +Average/Iteration 232 +Average/MaxReturn 26.9667 +Average/MinReturn -60.666 +Average/NumEpisodes 80 +Average/StdReturn 13.2066 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87244 +GaussianMLPPolicy/KLAfter 0.00373923 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000161531 +GaussianMLPPolicy/LossBefore 8.9407e-09 +GaussianMLPPolicy/dLoss 0.00016154 +Iteration 232 +MetaTest/Average/AverageDiscountedReturn -32.9794 +MetaTest/Average/AverageReturn -32.9794 +MetaTest/Average/Iteration 232 +MetaTest/Average/MaxReturn -3.58206 +MetaTest/Average/MinReturn -50.4131 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.7296 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.9794 +MetaTest/__unnamed_task__/AverageReturn -32.9794 +MetaTest/__unnamed_task__/Iteration 232 +MetaTest/__unnamed_task__/MaxReturn -3.58206 +MetaTest/__unnamed_task__/MinReturn -50.4131 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.7296 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.456e+06 +__unnamed_task__/AverageDiscountedReturn -16.5843 +__unnamed_task__/AverageReturn -32.1775 +__unnamed_task__/Iteration 232 +__unnamed_task__/MaxReturn 26.9667 +__unnamed_task__/MinReturn -60.666 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2066 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 20:56:57 | [maml_trainer] epoch #233 | Sampling for adapation and meta-testing... +2025-04-03 20:58:27 | [maml_trainer] epoch #233 | Finished meta-testing... +2025-04-03 20:58:27 | [maml_trainer] epoch #233 | Saving snapshot... +2025-04-03 20:58:50 | [maml_trainer] epoch #233 | Saved +2025-04-03 20:58:50 | [maml_trainer] epoch #233 | Time 111952.14 s +2025-04-03 20:58:50 | [maml_trainer] epoch #233 | EpochTime 473.58 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7773 +Average/AverageReturn -30.3631 +Average/Iteration 233 +Average/MaxReturn 17.2819 +Average/MinReturn -55.7666 +Average/NumEpisodes 80 +Average/StdReturn 11.417 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.87216 +GaussianMLPPolicy/KLAfter 0.00422931 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000114672 +GaussianMLPPolicy/LossBefore 3.09944e-09 +GaussianMLPPolicy/dLoss 0.000114675 +Iteration 233 +MetaTest/Average/AverageDiscountedReturn -33.0271 +MetaTest/Average/AverageReturn -33.0271 +MetaTest/Average/Iteration 233 +MetaTest/Average/MaxReturn -21.9658 +MetaTest/Average/MinReturn -43.8917 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.45293 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.0271 +MetaTest/__unnamed_task__/AverageReturn -33.0271 +MetaTest/__unnamed_task__/Iteration 233 +MetaTest/__unnamed_task__/MaxReturn -21.9658 +MetaTest/__unnamed_task__/MinReturn -43.8917 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.45293 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.488e+06 +__unnamed_task__/AverageDiscountedReturn -15.7773 +__unnamed_task__/AverageReturn -30.3631 +__unnamed_task__/Iteration 233 +__unnamed_task__/MaxReturn 17.2819 +__unnamed_task__/MinReturn -55.7666 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.417 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:04:42 | [maml_trainer] epoch #234 | Sampling for adapation and meta-testing... +2025-04-03 21:06:12 | [maml_trainer] epoch #234 | Finished meta-testing... +2025-04-03 21:06:12 | [maml_trainer] epoch #234 | Saving snapshot... +2025-04-03 21:06:37 | [maml_trainer] epoch #234 | Saved +2025-04-03 21:06:37 | [maml_trainer] epoch #234 | Time 112419.05 s +2025-04-03 21:06:37 | [maml_trainer] epoch #234 | EpochTime 466.91 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7153 +Average/AverageReturn -30.1009 +Average/Iteration 234 +Average/MaxReturn 25.9986 +Average/MinReturn -51.7522 +Average/NumEpisodes 80 +Average/StdReturn 12.5031 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8717 +GaussianMLPPolicy/KLAfter 0.00344896 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000108329 +GaussianMLPPolicy/LossBefore 7.80821e-09 +GaussianMLPPolicy/dLoss -0.000108321 +Iteration 234 +MetaTest/Average/AverageDiscountedReturn -33.6828 +MetaTest/Average/AverageReturn -33.6828 +MetaTest/Average/Iteration 234 +MetaTest/Average/MaxReturn -19.4716 +MetaTest/Average/MinReturn -46.7989 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.17601 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.6828 +MetaTest/__unnamed_task__/AverageReturn -33.6828 +MetaTest/__unnamed_task__/Iteration 234 +MetaTest/__unnamed_task__/MaxReturn -19.4716 +MetaTest/__unnamed_task__/MinReturn -46.7989 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.17601 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.52e+06 +__unnamed_task__/AverageDiscountedReturn -15.7153 +__unnamed_task__/AverageReturn -30.1009 +__unnamed_task__/Iteration 234 +__unnamed_task__/MaxReturn 25.9986 +__unnamed_task__/MinReturn -51.7522 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.5031 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:12:41 | [maml_trainer] epoch #235 | Sampling for adapation and meta-testing... +2025-04-03 21:14:07 | [maml_trainer] epoch #235 | Finished meta-testing... +2025-04-03 21:14:07 | [maml_trainer] epoch #235 | Saving snapshot... +2025-04-03 21:14:29 | [maml_trainer] epoch #235 | Saved +2025-04-03 21:14:29 | [maml_trainer] epoch #235 | Time 112891.37 s +2025-04-03 21:14:29 | [maml_trainer] epoch #235 | EpochTime 472.32 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9523 +Average/AverageReturn -34.3865 +Average/Iteration 235 +Average/MaxReturn -6.32435 +Average/MinReturn -52.632 +Average/NumEpisodes 80 +Average/StdReturn 8.44564 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.86975 +GaussianMLPPolicy/KLAfter 0.00390951 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.41703e-05 +GaussianMLPPolicy/LossBefore -6.91414e-09 +GaussianMLPPolicy/dLoss -1.41773e-05 +Iteration 235 +MetaTest/Average/AverageDiscountedReturn -31.3468 +MetaTest/Average/AverageReturn -31.3468 +MetaTest/Average/Iteration 235 +MetaTest/Average/MaxReturn 7.48094 +MetaTest/Average/MinReturn -52.8668 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.6093 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.3468 +MetaTest/__unnamed_task__/AverageReturn -31.3468 +MetaTest/__unnamed_task__/Iteration 235 +MetaTest/__unnamed_task__/MaxReturn 7.48094 +MetaTest/__unnamed_task__/MinReturn -52.8668 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.6093 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.552e+06 +__unnamed_task__/AverageDiscountedReturn -16.9523 +__unnamed_task__/AverageReturn -34.3865 +__unnamed_task__/Iteration 235 +__unnamed_task__/MaxReturn -6.32435 +__unnamed_task__/MinReturn -52.632 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.44564 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:20:43 | [maml_trainer] epoch #236 | Sampling for adapation and meta-testing... +2025-04-03 21:22:12 | [maml_trainer] epoch #236 | Finished meta-testing... +2025-04-03 21:22:12 | [maml_trainer] epoch #236 | Saving snapshot... +2025-04-03 21:22:36 | [maml_trainer] epoch #236 | Saved +2025-04-03 21:22:36 | [maml_trainer] epoch #236 | Time 113378.53 s +2025-04-03 21:22:36 | [maml_trainer] epoch #236 | EpochTime 487.15 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2399 +Average/AverageReturn -32.4307 +Average/Iteration 236 +Average/MaxReturn -3.63031 +Average/MinReturn -52.1637 +Average/NumEpisodes 80 +Average/StdReturn 9.54102 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.868 +GaussianMLPPolicy/KLAfter 0.00430893 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.37355e-05 +GaussianMLPPolicy/LossBefore 1.2517e-09 +GaussianMLPPolicy/dLoss 6.37367e-05 +Iteration 236 +MetaTest/Average/AverageDiscountedReturn -36.6632 +MetaTest/Average/AverageReturn -36.6632 +MetaTest/Average/Iteration 236 +MetaTest/Average/MaxReturn -23.1222 +MetaTest/Average/MinReturn -55.3907 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.51912 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.6632 +MetaTest/__unnamed_task__/AverageReturn -36.6632 +MetaTest/__unnamed_task__/Iteration 236 +MetaTest/__unnamed_task__/MaxReturn -23.1222 +MetaTest/__unnamed_task__/MinReturn -55.3907 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.51912 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.584e+06 +__unnamed_task__/AverageDiscountedReturn -16.2399 +__unnamed_task__/AverageReturn -32.4307 +__unnamed_task__/Iteration 236 +__unnamed_task__/MaxReturn -3.63031 +__unnamed_task__/MinReturn -52.1637 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.54102 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:28:55 | [maml_trainer] epoch #237 | Sampling for adapation and meta-testing... +2025-04-03 21:30:22 | [maml_trainer] epoch #237 | Finished meta-testing... +2025-04-03 21:30:22 | [maml_trainer] epoch #237 | Saving snapshot... +2025-04-03 21:30:45 | [maml_trainer] epoch #237 | Saved +2025-04-03 21:30:45 | [maml_trainer] epoch #237 | Time 113867.84 s +2025-04-03 21:30:45 | [maml_trainer] epoch #237 | EpochTime 489.31 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4274 +Average/AverageReturn -32.5221 +Average/Iteration 237 +Average/MaxReturn -5.7152 +Average/MinReturn -60.0314 +Average/NumEpisodes 80 +Average/StdReturn 8.93074 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.86734 +GaussianMLPPolicy/KLAfter 0.00264751 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.58835e-05 +GaussianMLPPolicy/LossBefore -1.91331e-08 +GaussianMLPPolicy/dLoss -4.59027e-05 +Iteration 237 +MetaTest/Average/AverageDiscountedReturn -30.3026 +MetaTest/Average/AverageReturn -30.3026 +MetaTest/Average/Iteration 237 +MetaTest/Average/MaxReturn -11.6164 +MetaTest/Average/MinReturn -50.3382 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.1826 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.3026 +MetaTest/__unnamed_task__/AverageReturn -30.3026 +MetaTest/__unnamed_task__/Iteration 237 +MetaTest/__unnamed_task__/MaxReturn -11.6164 +MetaTest/__unnamed_task__/MinReturn -50.3382 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.1826 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.616e+06 +__unnamed_task__/AverageDiscountedReturn -16.4274 +__unnamed_task__/AverageReturn -32.5221 +__unnamed_task__/Iteration 237 +__unnamed_task__/MaxReturn -5.7152 +__unnamed_task__/MinReturn -60.0314 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.93074 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:36:32 | [maml_trainer] epoch #238 | Sampling for adapation and meta-testing... +2025-04-03 21:37:59 | [maml_trainer] epoch #238 | Finished meta-testing... +2025-04-03 21:37:59 | [maml_trainer] epoch #238 | Saving snapshot... +2025-04-03 21:38:20 | [maml_trainer] epoch #238 | Saved +2025-04-03 21:38:20 | [maml_trainer] epoch #238 | Time 114322.77 s +2025-04-03 21:38:20 | [maml_trainer] epoch #238 | EpochTime 454.93 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8633 +Average/AverageReturn -32.607 +Average/Iteration 238 +Average/MaxReturn 5.48344 +Average/MinReturn -51.7166 +Average/NumEpisodes 80 +Average/StdReturn 10.2992 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.86495 +GaussianMLPPolicy/KLAfter 0.00316588 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000135541 +GaussianMLPPolicy/LossBefore 5.06639e-09 +GaussianMLPPolicy/dLoss 0.000135546 +Iteration 238 +MetaTest/Average/AverageDiscountedReturn -30.5579 +MetaTest/Average/AverageReturn -30.5579 +MetaTest/Average/Iteration 238 +MetaTest/Average/MaxReturn -19.0215 +MetaTest/Average/MinReturn -47.5033 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.45738 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.5579 +MetaTest/__unnamed_task__/AverageReturn -30.5579 +MetaTest/__unnamed_task__/Iteration 238 +MetaTest/__unnamed_task__/MaxReturn -19.0215 +MetaTest/__unnamed_task__/MinReturn -47.5033 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.45738 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.648e+06 +__unnamed_task__/AverageDiscountedReturn -16.8633 +__unnamed_task__/AverageReturn -32.607 +__unnamed_task__/Iteration 238 +__unnamed_task__/MaxReturn 5.48344 +__unnamed_task__/MinReturn -51.7166 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2992 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:44:01 | [maml_trainer] epoch #239 | Sampling for adapation and meta-testing... +2025-04-03 21:45:28 | [maml_trainer] epoch #239 | Finished meta-testing... +2025-04-03 21:45:28 | [maml_trainer] epoch #239 | Saving snapshot... +2025-04-03 21:45:51 | [maml_trainer] epoch #239 | Saved +2025-04-03 21:45:51 | [maml_trainer] epoch #239 | Time 114773.40 s +2025-04-03 21:45:51 | [maml_trainer] epoch #239 | EpochTime 450.63 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.7726 +Average/AverageReturn -32.7072 +Average/Iteration 239 +Average/MaxReturn -13.6736 +Average/MinReturn -53.2705 +Average/NumEpisodes 80 +Average/StdReturn 7.20469 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.86292 +GaussianMLPPolicy/KLAfter 0.00318875 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.41672e-05 +GaussianMLPPolicy/LossBefore 5.00679e-09 +GaussianMLPPolicy/dLoss 2.41722e-05 +Iteration 239 +MetaTest/Average/AverageDiscountedReturn -27.6049 +MetaTest/Average/AverageReturn -27.6049 +MetaTest/Average/Iteration 239 +MetaTest/Average/MaxReturn 12.2661 +MetaTest/Average/MinReturn -49.7924 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.4717 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -27.6049 +MetaTest/__unnamed_task__/AverageReturn -27.6049 +MetaTest/__unnamed_task__/Iteration 239 +MetaTest/__unnamed_task__/MaxReturn 12.2661 +MetaTest/__unnamed_task__/MinReturn -49.7924 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.4717 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.68e+06 +__unnamed_task__/AverageDiscountedReturn -16.7726 +__unnamed_task__/AverageReturn -32.7072 +__unnamed_task__/Iteration 239 +__unnamed_task__/MaxReturn -13.6736 +__unnamed_task__/MinReturn -53.2705 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.20469 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:51:34 | [maml_trainer] epoch #240 | Sampling for adapation and meta-testing... +2025-04-03 21:53:01 | [maml_trainer] epoch #240 | Finished meta-testing... +2025-04-03 21:53:01 | [maml_trainer] epoch #240 | Saving snapshot... +2025-04-03 21:53:23 | [maml_trainer] epoch #240 | Saved +2025-04-03 21:53:23 | [maml_trainer] epoch #240 | Time 115225.89 s +2025-04-03 21:53:23 | [maml_trainer] epoch #240 | EpochTime 452.49 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.5205 +Average/AverageReturn -33.9265 +Average/Iteration 240 +Average/MaxReturn 11.2735 +Average/MinReturn -63.2428 +Average/NumEpisodes 80 +Average/StdReturn 11.1875 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.86142 +GaussianMLPPolicy/KLAfter 0.0027988 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.00011136 +GaussianMLPPolicy/LossBefore -1.09375e-08 +GaussianMLPPolicy/dLoss -0.000111371 +Iteration 240 +MetaTest/Average/AverageDiscountedReturn -32.4025 +MetaTest/Average/AverageReturn -32.4025 +MetaTest/Average/Iteration 240 +MetaTest/Average/MaxReturn 0.494428 +MetaTest/Average/MinReturn -64.1431 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.6932 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.4025 +MetaTest/__unnamed_task__/AverageReturn -32.4025 +MetaTest/__unnamed_task__/Iteration 240 +MetaTest/__unnamed_task__/MaxReturn 0.494428 +MetaTest/__unnamed_task__/MinReturn -64.1431 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.6932 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.712e+06 +__unnamed_task__/AverageDiscountedReturn -17.5205 +__unnamed_task__/AverageReturn -33.9265 +__unnamed_task__/Iteration 240 +__unnamed_task__/MaxReturn 11.2735 +__unnamed_task__/MinReturn -63.2428 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.1875 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 21:59:09 | [maml_trainer] epoch #241 | Sampling for adapation and meta-testing... +2025-04-03 22:00:36 | [maml_trainer] epoch #241 | Finished meta-testing... +2025-04-03 22:00:36 | [maml_trainer] epoch #241 | Saving snapshot... +2025-04-03 22:00:58 | [maml_trainer] epoch #241 | Saved +2025-04-03 22:00:58 | [maml_trainer] epoch #241 | Time 115680.00 s +2025-04-03 22:00:58 | [maml_trainer] epoch #241 | EpochTime 454.10 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.2154 +Average/AverageReturn -33.2385 +Average/Iteration 241 +Average/MaxReturn 5.80603 +Average/MinReturn -61.4112 +Average/NumEpisodes 80 +Average/StdReturn 11.5307 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85991 +GaussianMLPPolicy/KLAfter 0.00222073 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.2686e-05 +GaussianMLPPolicy/LossBefore -1.5229e-08 +GaussianMLPPolicy/dLoss -8.27013e-05 +Iteration 241 +MetaTest/Average/AverageDiscountedReturn -34.6177 +MetaTest/Average/AverageReturn -34.6177 +MetaTest/Average/Iteration 241 +MetaTest/Average/MaxReturn -7.28354 +MetaTest/Average/MinReturn -52.1718 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.60454 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.6177 +MetaTest/__unnamed_task__/AverageReturn -34.6177 +MetaTest/__unnamed_task__/Iteration 241 +MetaTest/__unnamed_task__/MaxReturn -7.28354 +MetaTest/__unnamed_task__/MinReturn -52.1718 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.60454 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.744e+06 +__unnamed_task__/AverageDiscountedReturn -17.2154 +__unnamed_task__/AverageReturn -33.2385 +__unnamed_task__/Iteration 241 +__unnamed_task__/MaxReturn 5.80603 +__unnamed_task__/MinReturn -61.4112 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.5307 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 22:06:38 | [maml_trainer] epoch #242 | Sampling for adapation and meta-testing... +2025-04-03 22:08:03 | [maml_trainer] epoch #242 | Finished meta-testing... +2025-04-03 22:08:03 | [maml_trainer] epoch #242 | Saving snapshot... +2025-04-03 22:08:27 | [maml_trainer] epoch #242 | Saved +2025-04-03 22:08:27 | [maml_trainer] epoch #242 | Time 116129.69 s +2025-04-03 22:08:27 | [maml_trainer] epoch #242 | EpochTime 449.68 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.1737 +Average/AverageReturn -33.5931 +Average/Iteration 242 +Average/MaxReturn -11.5778 +Average/MinReturn -60.1661 +Average/NumEpisodes 80 +Average/StdReturn 9.51785 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85966 +GaussianMLPPolicy/KLAfter 0.00131943 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.32019e-05 +GaussianMLPPolicy/LossBefore 5.126e-09 +GaussianMLPPolicy/dLoss 4.3207e-05 +Iteration 242 +MetaTest/Average/AverageDiscountedReturn -37.3092 +MetaTest/Average/AverageReturn -37.3092 +MetaTest/Average/Iteration 242 +MetaTest/Average/MaxReturn -24.2314 +MetaTest/Average/MinReturn -58.7115 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.59853 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.3092 +MetaTest/__unnamed_task__/AverageReturn -37.3092 +MetaTest/__unnamed_task__/Iteration 242 +MetaTest/__unnamed_task__/MaxReturn -24.2314 +MetaTest/__unnamed_task__/MinReturn -58.7115 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.59853 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.776e+06 +__unnamed_task__/AverageDiscountedReturn -17.1737 +__unnamed_task__/AverageReturn -33.5931 +__unnamed_task__/Iteration 242 +__unnamed_task__/MaxReturn -11.5778 +__unnamed_task__/MinReturn -60.1661 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.51785 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 22:14:19 | [maml_trainer] epoch #243 | Sampling for adapation and meta-testing... +2025-04-03 22:15:52 | [maml_trainer] epoch #243 | Finished meta-testing... +2025-04-03 22:15:52 | [maml_trainer] epoch #243 | Saving snapshot... +2025-04-03 22:16:16 | [maml_trainer] epoch #243 | Saved +2025-04-03 22:16:16 | [maml_trainer] epoch #243 | Time 116598.73 s +2025-04-03 22:16:16 | [maml_trainer] epoch #243 | EpochTime 469.04 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.3925 +Average/AverageReturn -33.9822 +Average/Iteration 243 +Average/MaxReturn -1.54747 +Average/MinReturn -66.0087 +Average/NumEpisodes 80 +Average/StdReturn 12.4184 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85867 +GaussianMLPPolicy/KLAfter 0.00127442 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.46105e-06 +GaussianMLPPolicy/LossBefore 1.97887e-08 +GaussianMLPPolicy/dLoss -6.44127e-06 +Iteration 243 +MetaTest/Average/AverageDiscountedReturn -35.3253 +MetaTest/Average/AverageReturn -35.3253 +MetaTest/Average/Iteration 243 +MetaTest/Average/MaxReturn -16.9336 +MetaTest/Average/MinReturn -59.368 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.68308 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.3253 +MetaTest/__unnamed_task__/AverageReturn -35.3253 +MetaTest/__unnamed_task__/Iteration 243 +MetaTest/__unnamed_task__/MaxReturn -16.9336 +MetaTest/__unnamed_task__/MinReturn -59.368 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.68308 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.808e+06 +__unnamed_task__/AverageDiscountedReturn -17.3925 +__unnamed_task__/AverageReturn -33.9822 +__unnamed_task__/Iteration 243 +__unnamed_task__/MaxReturn -1.54747 +__unnamed_task__/MinReturn -66.0087 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.4184 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 22:22:35 | [maml_trainer] epoch #244 | Sampling for adapation and meta-testing... +2025-04-03 22:24:18 | [maml_trainer] epoch #244 | Finished meta-testing... +2025-04-03 22:24:18 | [maml_trainer] epoch #244 | Saving snapshot... +2025-04-03 22:24:41 | [maml_trainer] epoch #244 | Saved +2025-04-03 22:24:41 | [maml_trainer] epoch #244 | Time 117103.33 s +2025-04-03 22:24:41 | [maml_trainer] epoch #244 | EpochTime 504.60 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5722 +Average/AverageReturn -31.5757 +Average/Iteration 244 +Average/MaxReturn 3.17201 +Average/MinReturn -58.588 +Average/NumEpisodes 80 +Average/StdReturn 11.2161 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85841 +GaussianMLPPolicy/KLAfter 0.00446976 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000167975 +GaussianMLPPolicy/LossBefore -3.63588e-09 +GaussianMLPPolicy/dLoss -0.000167978 +Iteration 244 +MetaTest/Average/AverageDiscountedReturn -30.2438 +MetaTest/Average/AverageReturn -30.2438 +MetaTest/Average/Iteration 244 +MetaTest/Average/MaxReturn -14.4808 +MetaTest/Average/MinReturn -58.1693 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.1472 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.2438 +MetaTest/__unnamed_task__/AverageReturn -30.2438 +MetaTest/__unnamed_task__/Iteration 244 +MetaTest/__unnamed_task__/MaxReturn -14.4808 +MetaTest/__unnamed_task__/MinReturn -58.1693 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.1472 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.84e+06 +__unnamed_task__/AverageDiscountedReturn -16.5722 +__unnamed_task__/AverageReturn -31.5757 +__unnamed_task__/Iteration 244 +__unnamed_task__/MaxReturn 3.17201 +__unnamed_task__/MinReturn -58.588 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.2161 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 22:30:59 | [maml_trainer] epoch #245 | Sampling for adapation and meta-testing... +2025-04-03 22:32:31 | [maml_trainer] epoch #245 | Finished meta-testing... +2025-04-03 22:32:31 | [maml_trainer] epoch #245 | Saving snapshot... +2025-04-03 22:32:55 | [maml_trainer] epoch #245 | Saved +2025-04-03 22:32:55 | [maml_trainer] epoch #245 | Time 117597.25 s +2025-04-03 22:32:55 | [maml_trainer] epoch #245 | EpochTime 493.92 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.0828 +Average/AverageReturn -30.0528 +Average/Iteration 245 +Average/MaxReturn 12.4375 +Average/MinReturn -51.5847 +Average/NumEpisodes 80 +Average/StdReturn 10.227 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85896 +GaussianMLPPolicy/KLAfter 0.00579469 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.79062e-06 +GaussianMLPPolicy/LossBefore 6.79493e-09 +GaussianMLPPolicy/dLoss -5.78383e-06 +Iteration 245 +MetaTest/Average/AverageDiscountedReturn -31.5047 +MetaTest/Average/AverageReturn -31.5047 +MetaTest/Average/Iteration 245 +MetaTest/Average/MaxReturn 8.33095 +MetaTest/Average/MinReturn -50.2065 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.5126 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.5047 +MetaTest/__unnamed_task__/AverageReturn -31.5047 +MetaTest/__unnamed_task__/Iteration 245 +MetaTest/__unnamed_task__/MaxReturn 8.33095 +MetaTest/__unnamed_task__/MinReturn -50.2065 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.5126 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.872e+06 +__unnamed_task__/AverageDiscountedReturn -16.0828 +__unnamed_task__/AverageReturn -30.0528 +__unnamed_task__/Iteration 245 +__unnamed_task__/MaxReturn 12.4375 +__unnamed_task__/MinReturn -51.5847 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.227 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 22:38:49 | [maml_trainer] epoch #246 | Sampling for adapation and meta-testing... +2025-04-03 22:40:16 | [maml_trainer] epoch #246 | Finished meta-testing... +2025-04-03 22:40:16 | [maml_trainer] epoch #246 | Saving snapshot... +2025-04-03 22:40:38 | [maml_trainer] epoch #246 | Saved +2025-04-03 22:40:38 | [maml_trainer] epoch #246 | Time 118060.61 s +2025-04-03 22:40:38 | [maml_trainer] epoch #246 | EpochTime 463.35 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.8048 +Average/AverageReturn -33.1831 +Average/Iteration 246 +Average/MaxReturn 4.92859 +Average/MinReturn -56.2546 +Average/NumEpisodes 80 +Average/StdReturn 11.8155 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85947 +GaussianMLPPolicy/KLAfter 0.00571666 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000326572 +GaussianMLPPolicy/LossBefore -7.27177e-09 +GaussianMLPPolicy/dLoss -0.000326579 +Iteration 246 +MetaTest/Average/AverageDiscountedReturn -32.7545 +MetaTest/Average/AverageReturn -32.7545 +MetaTest/Average/Iteration 246 +MetaTest/Average/MaxReturn -15.6461 +MetaTest/Average/MinReturn -44.2087 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.26258 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.7545 +MetaTest/__unnamed_task__/AverageReturn -32.7545 +MetaTest/__unnamed_task__/Iteration 246 +MetaTest/__unnamed_task__/MaxReturn -15.6461 +MetaTest/__unnamed_task__/MinReturn -44.2087 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.26258 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.904e+06 +__unnamed_task__/AverageDiscountedReturn -16.8048 +__unnamed_task__/AverageReturn -33.1831 +__unnamed_task__/Iteration 246 +__unnamed_task__/MaxReturn 4.92859 +__unnamed_task__/MinReturn -56.2546 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.8155 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 22:46:26 | [maml_trainer] epoch #247 | Sampling for adapation and meta-testing... +2025-04-03 22:47:55 | [maml_trainer] epoch #247 | Finished meta-testing... +2025-04-03 22:47:55 | [maml_trainer] epoch #247 | Saving snapshot... +2025-04-03 22:48:19 | [maml_trainer] epoch #247 | Saved +2025-04-03 22:48:19 | [maml_trainer] epoch #247 | Time 118521.35 s +2025-04-03 22:48:19 | [maml_trainer] epoch #247 | EpochTime 460.73 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5978 +Average/AverageReturn -32.5997 +Average/Iteration 247 +Average/MaxReturn 3.36004 +Average/MinReturn -49.8894 +Average/NumEpisodes 80 +Average/StdReturn 8.67863 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.86093 +GaussianMLPPolicy/KLAfter 0.00542741 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000217984 +GaussianMLPPolicy/LossBefore 7.7486e-10 +GaussianMLPPolicy/dLoss -0.000217983 +Iteration 247 +MetaTest/Average/AverageDiscountedReturn -36.1036 +MetaTest/Average/AverageReturn -36.1036 +MetaTest/Average/Iteration 247 +MetaTest/Average/MaxReturn -24.9563 +MetaTest/Average/MinReturn -55.5918 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.16197 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -36.1036 +MetaTest/__unnamed_task__/AverageReturn -36.1036 +MetaTest/__unnamed_task__/Iteration 247 +MetaTest/__unnamed_task__/MaxReturn -24.9563 +MetaTest/__unnamed_task__/MinReturn -55.5918 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.16197 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.936e+06 +__unnamed_task__/AverageDiscountedReturn -16.5978 +__unnamed_task__/AverageReturn -32.5997 +__unnamed_task__/Iteration 247 +__unnamed_task__/MaxReturn 3.36004 +__unnamed_task__/MinReturn -49.8894 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.67863 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 22:54:05 | [maml_trainer] epoch #248 | Sampling for adapation and meta-testing... +2025-04-03 22:55:35 | [maml_trainer] epoch #248 | Finished meta-testing... +2025-04-03 22:55:35 | [maml_trainer] epoch #248 | Saving snapshot... +2025-04-03 22:55:59 | [maml_trainer] epoch #248 | Saved +2025-04-03 22:55:59 | [maml_trainer] epoch #248 | Time 118981.00 s +2025-04-03 22:55:59 | [maml_trainer] epoch #248 | EpochTime 459.65 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.2397 +Average/AverageReturn -32.902 +Average/Iteration 248 +Average/MaxReturn -1.62621 +Average/MinReturn -66.4008 +Average/NumEpisodes 80 +Average/StdReturn 10.3683 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.86057 +GaussianMLPPolicy/KLAfter 0.0036658 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000263084 +GaussianMLPPolicy/LossBefore -2.24113e-08 +GaussianMLPPolicy/dLoss 0.000263061 +Iteration 248 +MetaTest/Average/AverageDiscountedReturn -35.5903 +MetaTest/Average/AverageReturn -35.5903 +MetaTest/Average/Iteration 248 +MetaTest/Average/MaxReturn -26.4871 +MetaTest/Average/MinReturn -51.132 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.02003 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.5903 +MetaTest/__unnamed_task__/AverageReturn -35.5903 +MetaTest/__unnamed_task__/Iteration 248 +MetaTest/__unnamed_task__/MaxReturn -26.4871 +MetaTest/__unnamed_task__/MinReturn -51.132 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.02003 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.968e+06 +__unnamed_task__/AverageDiscountedReturn -16.2397 +__unnamed_task__/AverageReturn -32.902 +__unnamed_task__/Iteration 248 +__unnamed_task__/MaxReturn -1.62621 +__unnamed_task__/MinReturn -66.4008 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.3683 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:02:00 | [maml_trainer] epoch #249 | Sampling for adapation and meta-testing... +2025-04-03 23:03:32 | [maml_trainer] epoch #249 | Finished meta-testing... +2025-04-03 23:03:32 | [maml_trainer] epoch #249 | Saving snapshot... +2025-04-03 23:03:55 | [maml_trainer] epoch #249 | Saved +2025-04-03 23:03:55 | [maml_trainer] epoch #249 | Time 119457.05 s +2025-04-03 23:03:55 | [maml_trainer] epoch #249 | EpochTime 476.05 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.9689 +Average/AverageReturn -34.6457 +Average/Iteration 249 +Average/MaxReturn -20.5539 +Average/MinReturn -69.4793 +Average/NumEpisodes 80 +Average/StdReturn 7.41108 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85839 +GaussianMLPPolicy/KLAfter 0.00422551 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.93154e-05 +GaussianMLPPolicy/LossBefore 3.57629e-10 +GaussianMLPPolicy/dLoss 6.93158e-05 +Iteration 249 +MetaTest/Average/AverageDiscountedReturn -31.5912 +MetaTest/Average/AverageReturn -31.5912 +MetaTest/Average/Iteration 249 +MetaTest/Average/MaxReturn -18.2352 +MetaTest/Average/MinReturn -42.3416 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.10098 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.5912 +MetaTest/__unnamed_task__/AverageReturn -31.5912 +MetaTest/__unnamed_task__/Iteration 249 +MetaTest/__unnamed_task__/MaxReturn -18.2352 +MetaTest/__unnamed_task__/MinReturn -42.3416 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.10098 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8e+06 +__unnamed_task__/AverageDiscountedReturn -16.9689 +__unnamed_task__/AverageReturn -34.6457 +__unnamed_task__/Iteration 249 +__unnamed_task__/MaxReturn -20.5539 +__unnamed_task__/MinReturn -69.4793 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.41108 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:09:58 | [maml_trainer] epoch #250 | Sampling for adapation and meta-testing... +2025-04-03 23:11:28 | [maml_trainer] epoch #250 | Finished meta-testing... +2025-04-03 23:11:28 | [maml_trainer] epoch #250 | Saving snapshot... +2025-04-03 23:11:53 | [maml_trainer] epoch #250 | Saved +2025-04-03 23:11:53 | [maml_trainer] epoch #250 | Time 119935.14 s +2025-04-03 23:11:53 | [maml_trainer] epoch #250 | EpochTime 478.09 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7977 +Average/AverageReturn -32.1161 +Average/Iteration 250 +Average/MaxReturn -15.9373 +Average/MinReturn -56.8944 +Average/NumEpisodes 80 +Average/StdReturn 7.50688 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8567 +GaussianMLPPolicy/KLAfter 0.00394163 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000394507 +GaussianMLPPolicy/LossBefore -2.63453e-08 +GaussianMLPPolicy/dLoss 0.00039448 +Iteration 250 +MetaTest/Average/AverageDiscountedReturn -30.8278 +MetaTest/Average/AverageReturn -30.8278 +MetaTest/Average/Iteration 250 +MetaTest/Average/MaxReturn -6.24935 +MetaTest/Average/MinReturn -45.2931 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.27546 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.8278 +MetaTest/__unnamed_task__/AverageReturn -30.8278 +MetaTest/__unnamed_task__/Iteration 250 +MetaTest/__unnamed_task__/MaxReturn -6.24935 +MetaTest/__unnamed_task__/MinReturn -45.2931 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.27546 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.032e+06 +__unnamed_task__/AverageDiscountedReturn -15.7977 +__unnamed_task__/AverageReturn -32.1161 +__unnamed_task__/Iteration 250 +__unnamed_task__/MaxReturn -15.9373 +__unnamed_task__/MinReturn -56.8944 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.50688 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:17:55 | [maml_trainer] epoch #251 | Sampling for adapation and meta-testing... +2025-04-03 23:19:28 | [maml_trainer] epoch #251 | Finished meta-testing... +2025-04-03 23:19:28 | [maml_trainer] epoch #251 | Saving snapshot... +2025-04-03 23:19:52 | [maml_trainer] epoch #251 | Saved +2025-04-03 23:19:52 | [maml_trainer] epoch #251 | Time 120414.94 s +2025-04-03 23:19:52 | [maml_trainer] epoch #251 | EpochTime 479.80 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3488 +Average/AverageReturn -29.7249 +Average/Iteration 251 +Average/MaxReturn 11.5618 +Average/MinReturn -53.2295 +Average/NumEpisodes 80 +Average/StdReturn 12.7859 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85576 +GaussianMLPPolicy/KLAfter 0.0038123 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000214025 +GaussianMLPPolicy/LossBefore -5.0962e-09 +GaussianMLPPolicy/dLoss 0.00021402 +Iteration 251 +MetaTest/Average/AverageDiscountedReturn -30.3229 +MetaTest/Average/AverageReturn -30.3229 +MetaTest/Average/Iteration 251 +MetaTest/Average/MaxReturn -9.64626 +MetaTest/Average/MinReturn -44.5151 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.81947 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.3229 +MetaTest/__unnamed_task__/AverageReturn -30.3229 +MetaTest/__unnamed_task__/Iteration 251 +MetaTest/__unnamed_task__/MaxReturn -9.64626 +MetaTest/__unnamed_task__/MinReturn -44.5151 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.81947 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.064e+06 +__unnamed_task__/AverageDiscountedReturn -15.3488 +__unnamed_task__/AverageReturn -29.7249 +__unnamed_task__/Iteration 251 +__unnamed_task__/MaxReturn 11.5618 +__unnamed_task__/MinReturn -53.2295 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.7859 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:25:55 | [maml_trainer] epoch #252 | Sampling for adapation and meta-testing... +2025-04-03 23:27:27 | [maml_trainer] epoch #252 | Finished meta-testing... +2025-04-03 23:27:27 | [maml_trainer] epoch #252 | Saving snapshot... +2025-04-03 23:27:50 | [maml_trainer] epoch #252 | Saved +2025-04-03 23:27:50 | [maml_trainer] epoch #252 | Time 120892.69 s +2025-04-03 23:27:50 | [maml_trainer] epoch #252 | EpochTime 477.74 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.5829 +Average/AverageReturn -30.5136 +Average/Iteration 252 +Average/MaxReturn 7.6847 +Average/MinReturn -58.3803 +Average/NumEpisodes 80 +Average/StdReturn 11.1684 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8542 +GaussianMLPPolicy/KLAfter 0.00639167 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000202269 +GaussianMLPPolicy/LossBefore 5.78165e-09 +GaussianMLPPolicy/dLoss 0.000202275 +Iteration 252 +MetaTest/Average/AverageDiscountedReturn -28.5307 +MetaTest/Average/AverageReturn -28.5307 +MetaTest/Average/Iteration 252 +MetaTest/Average/MaxReturn -2.02714 +MetaTest/Average/MinReturn -45.0272 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.553 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -28.5307 +MetaTest/__unnamed_task__/AverageReturn -28.5307 +MetaTest/__unnamed_task__/Iteration 252 +MetaTest/__unnamed_task__/MaxReturn -2.02714 +MetaTest/__unnamed_task__/MinReturn -45.0272 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.553 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.096e+06 +__unnamed_task__/AverageDiscountedReturn -15.5829 +__unnamed_task__/AverageReturn -30.5136 +__unnamed_task__/Iteration 252 +__unnamed_task__/MaxReturn 7.6847 +__unnamed_task__/MinReturn -58.3803 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.1684 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:33:58 | [maml_trainer] epoch #253 | Sampling for adapation and meta-testing... +2025-04-03 23:35:30 | [maml_trainer] epoch #253 | Finished meta-testing... +2025-04-03 23:35:30 | [maml_trainer] epoch #253 | Saving snapshot... +2025-04-03 23:35:54 | [maml_trainer] epoch #253 | Saved +2025-04-03 23:35:54 | [maml_trainer] epoch #253 | Time 121376.62 s +2025-04-03 23:35:54 | [maml_trainer] epoch #253 | EpochTime 483.92 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7077 +Average/AverageReturn -30.1699 +Average/Iteration 253 +Average/MaxReturn -3.2687 +Average/MinReturn -50.518 +Average/NumEpisodes 80 +Average/StdReturn 8.05908 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85309 +GaussianMLPPolicy/KLAfter 0.00499923 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.00031054 +GaussianMLPPolicy/LossBefore -7.51019e-09 +GaussianMLPPolicy/dLoss -0.000310547 +Iteration 253 +MetaTest/Average/AverageDiscountedReturn -32.2345 +MetaTest/Average/AverageReturn -32.2345 +MetaTest/Average/Iteration 253 +MetaTest/Average/MaxReturn -12.1986 +MetaTest/Average/MinReturn -50.9046 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.1203 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.2345 +MetaTest/__unnamed_task__/AverageReturn -32.2345 +MetaTest/__unnamed_task__/Iteration 253 +MetaTest/__unnamed_task__/MaxReturn -12.1986 +MetaTest/__unnamed_task__/MinReturn -50.9046 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.1203 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.128e+06 +__unnamed_task__/AverageDiscountedReturn -15.7077 +__unnamed_task__/AverageReturn -30.1699 +__unnamed_task__/Iteration 253 +__unnamed_task__/MaxReturn -3.2687 +__unnamed_task__/MinReturn -50.518 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.05908 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:42:01 | [maml_trainer] epoch #254 | Sampling for adapation and meta-testing... +2025-04-03 23:43:32 | [maml_trainer] epoch #254 | Finished meta-testing... +2025-04-03 23:43:32 | [maml_trainer] epoch #254 | Saving snapshot... +2025-04-03 23:43:56 | [maml_trainer] epoch #254 | Saved +2025-04-03 23:43:56 | [maml_trainer] epoch #254 | Time 121858.59 s +2025-04-03 23:43:56 | [maml_trainer] epoch #254 | EpochTime 481.97 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.1236 +Average/AverageReturn -32.5369 +Average/Iteration 254 +Average/MaxReturn -0.110867 +Average/MinReturn -53.6195 +Average/NumEpisodes 80 +Average/StdReturn 8.15434 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8524 +GaussianMLPPolicy/KLAfter 0.0023487 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000112468 +GaussianMLPPolicy/LossBefore 1.54972e-09 +GaussianMLPPolicy/dLoss -0.000112467 +Iteration 254 +MetaTest/Average/AverageDiscountedReturn -37.3473 +MetaTest/Average/AverageReturn -37.3473 +MetaTest/Average/Iteration 254 +MetaTest/Average/MaxReturn -25.3506 +MetaTest/Average/MinReturn -51.0367 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.1795 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.3473 +MetaTest/__unnamed_task__/AverageReturn -37.3473 +MetaTest/__unnamed_task__/Iteration 254 +MetaTest/__unnamed_task__/MaxReturn -25.3506 +MetaTest/__unnamed_task__/MinReturn -51.0367 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.1795 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.16e+06 +__unnamed_task__/AverageDiscountedReturn -16.1236 +__unnamed_task__/AverageReturn -32.5369 +__unnamed_task__/Iteration 254 +__unnamed_task__/MaxReturn -0.110867 +__unnamed_task__/MinReturn -53.6195 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.15434 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:50:03 | [maml_trainer] epoch #255 | Sampling for adapation and meta-testing... +2025-04-03 23:51:34 | [maml_trainer] epoch #255 | Finished meta-testing... +2025-04-03 23:51:34 | [maml_trainer] epoch #255 | Saving snapshot... +2025-04-03 23:51:58 | [maml_trainer] epoch #255 | Saved +2025-04-03 23:51:58 | [maml_trainer] epoch #255 | Time 122340.61 s +2025-04-03 23:51:58 | [maml_trainer] epoch #255 | EpochTime 482.02 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4357 +Average/AverageReturn -33.3001 +Average/Iteration 255 +Average/MaxReturn -0.360736 +Average/MinReturn -59.8581 +Average/NumEpisodes 80 +Average/StdReturn 8.25476 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8524 +GaussianMLPPolicy/KLAfter 0.00233318 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000186192 +GaussianMLPPolicy/LossBefore -2.12789e-08 +GaussianMLPPolicy/dLoss -0.000186213 +Iteration 255 +MetaTest/Average/AverageDiscountedReturn -34.8843 +MetaTest/Average/AverageReturn -34.8843 +MetaTest/Average/Iteration 255 +MetaTest/Average/MaxReturn -11.8947 +MetaTest/Average/MinReturn -59.1312 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.70199 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.8843 +MetaTest/__unnamed_task__/AverageReturn -34.8843 +MetaTest/__unnamed_task__/Iteration 255 +MetaTest/__unnamed_task__/MaxReturn -11.8947 +MetaTest/__unnamed_task__/MinReturn -59.1312 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.70199 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.192e+06 +__unnamed_task__/AverageDiscountedReturn -16.4357 +__unnamed_task__/AverageReturn -33.3001 +__unnamed_task__/Iteration 255 +__unnamed_task__/MaxReturn -0.360736 +__unnamed_task__/MinReturn -59.8581 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.25476 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-03 23:58:02 | [maml_trainer] epoch #256 | Sampling for adapation and meta-testing... +2025-04-03 23:59:34 | [maml_trainer] epoch #256 | Finished meta-testing... +2025-04-03 23:59:34 | [maml_trainer] epoch #256 | Saving snapshot... +2025-04-03 23:59:58 | [maml_trainer] epoch #256 | Saved +2025-04-03 23:59:58 | [maml_trainer] epoch #256 | Time 122820.38 s +2025-04-03 23:59:58 | [maml_trainer] epoch #256 | EpochTime 479.77 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -17.4778 +Average/AverageReturn -35.8922 +Average/Iteration 256 +Average/MaxReturn 16.1805 +Average/MinReturn -60.7126 +Average/NumEpisodes 80 +Average/StdReturn 10.7565 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.85076 +GaussianMLPPolicy/KLAfter 0.0022195 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000116427 +GaussianMLPPolicy/LossBefore 3.5286e-08 +GaussianMLPPolicy/dLoss 0.000116462 +Iteration 256 +MetaTest/Average/AverageDiscountedReturn -39.4945 +MetaTest/Average/AverageReturn -39.4945 +MetaTest/Average/Iteration 256 +MetaTest/Average/MaxReturn -25.8393 +MetaTest/Average/MinReturn -59.8067 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3848 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -39.4945 +MetaTest/__unnamed_task__/AverageReturn -39.4945 +MetaTest/__unnamed_task__/Iteration 256 +MetaTest/__unnamed_task__/MaxReturn -25.8393 +MetaTest/__unnamed_task__/MinReturn -59.8067 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3848 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.224e+06 +__unnamed_task__/AverageDiscountedReturn -17.4778 +__unnamed_task__/AverageReturn -35.8922 +__unnamed_task__/Iteration 256 +__unnamed_task__/MaxReturn 16.1805 +__unnamed_task__/MinReturn -60.7126 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.7565 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 00:06:03 | [maml_trainer] epoch #257 | Sampling for adapation and meta-testing... +2025-04-04 00:07:35 | [maml_trainer] epoch #257 | Finished meta-testing... +2025-04-04 00:07:35 | [maml_trainer] epoch #257 | Saving snapshot... +2025-04-04 00:07:58 | [maml_trainer] epoch #257 | Saved +2025-04-04 00:07:58 | [maml_trainer] epoch #257 | Time 123300.04 s +2025-04-04 00:07:58 | [maml_trainer] epoch #257 | EpochTime 479.65 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.428 +Average/AverageReturn -33.6306 +Average/Iteration 257 +Average/MaxReturn -3.46599 +Average/MinReturn -50.2605 +Average/NumEpisodes 80 +Average/StdReturn 8.43597 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.84913 +GaussianMLPPolicy/KLAfter 0.00530483 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000381086 +GaussianMLPPolicy/LossBefore -3.02792e-08 +GaussianMLPPolicy/dLoss 0.000381056 +Iteration 257 +MetaTest/Average/AverageDiscountedReturn -29.4142 +MetaTest/Average/AverageReturn -29.4142 +MetaTest/Average/Iteration 257 +MetaTest/Average/MaxReturn -1.37287 +MetaTest/Average/MinReturn -42.6494 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.17618 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.4142 +MetaTest/__unnamed_task__/AverageReturn -29.4142 +MetaTest/__unnamed_task__/Iteration 257 +MetaTest/__unnamed_task__/MaxReturn -1.37287 +MetaTest/__unnamed_task__/MinReturn -42.6494 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.17618 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.256e+06 +__unnamed_task__/AverageDiscountedReturn -16.428 +__unnamed_task__/AverageReturn -33.6306 +__unnamed_task__/Iteration 257 +__unnamed_task__/MaxReturn -3.46599 +__unnamed_task__/MinReturn -50.2605 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.43597 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 00:14:00 | [maml_trainer] epoch #258 | Sampling for adapation and meta-testing... +2025-04-04 00:15:31 | [maml_trainer] epoch #258 | Finished meta-testing... +2025-04-04 00:15:31 | [maml_trainer] epoch #258 | Saving snapshot... +2025-04-04 00:15:56 | [maml_trainer] epoch #258 | Saved +2025-04-04 00:15:56 | [maml_trainer] epoch #258 | Time 123778.93 s +2025-04-04 00:15:56 | [maml_trainer] epoch #258 | EpochTime 478.89 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.4046 +Average/AverageReturn -33.217 +Average/Iteration 258 +Average/MaxReturn 15.4374 +Average/MinReturn -64.1166 +Average/NumEpisodes 80 +Average/StdReturn 11.1004 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8485 +GaussianMLPPolicy/KLAfter 0.00863303 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000308417 +GaussianMLPPolicy/LossBefore -7.7486e-09 +GaussianMLPPolicy/dLoss 0.00030841 +Iteration 258 +MetaTest/Average/AverageDiscountedReturn -30.3235 +MetaTest/Average/AverageReturn -30.3235 +MetaTest/Average/Iteration 258 +MetaTest/Average/MaxReturn -12.5241 +MetaTest/Average/MinReturn -47.9031 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.55115 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.3235 +MetaTest/__unnamed_task__/AverageReturn -30.3235 +MetaTest/__unnamed_task__/Iteration 258 +MetaTest/__unnamed_task__/MaxReturn -12.5241 +MetaTest/__unnamed_task__/MinReturn -47.9031 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.55115 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.288e+06 +__unnamed_task__/AverageDiscountedReturn -16.4046 +__unnamed_task__/AverageReturn -33.217 +__unnamed_task__/Iteration 258 +__unnamed_task__/MaxReturn 15.4374 +__unnamed_task__/MinReturn -64.1166 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.1004 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 00:22:11 | [maml_trainer] epoch #259 | Sampling for adapation and meta-testing... +2025-04-04 00:23:43 | [maml_trainer] epoch #259 | Finished meta-testing... +2025-04-04 00:23:43 | [maml_trainer] epoch #259 | Saving snapshot... +2025-04-04 00:24:07 | [maml_trainer] epoch #259 | Saved +2025-04-04 00:24:07 | [maml_trainer] epoch #259 | Time 124269.89 s +2025-04-04 00:24:07 | [maml_trainer] epoch #259 | EpochTime 490.95 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.9096 +Average/AverageReturn -29.3779 +Average/Iteration 259 +Average/MaxReturn 15.4253 +Average/MinReturn -59.9211 +Average/NumEpisodes 80 +Average/StdReturn 12.5425 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8471 +GaussianMLPPolicy/KLAfter 0.00480934 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000154482 +GaussianMLPPolicy/LossBefore 1.59144e-08 +GaussianMLPPolicy/dLoss 0.000154498 +Iteration 259 +MetaTest/Average/AverageDiscountedReturn -37.7734 +MetaTest/Average/AverageReturn -37.7734 +MetaTest/Average/Iteration 259 +MetaTest/Average/MaxReturn -16.5359 +MetaTest/Average/MinReturn -62.8311 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.111 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -37.7734 +MetaTest/__unnamed_task__/AverageReturn -37.7734 +MetaTest/__unnamed_task__/Iteration 259 +MetaTest/__unnamed_task__/MaxReturn -16.5359 +MetaTest/__unnamed_task__/MinReturn -62.8311 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.111 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.32e+06 +__unnamed_task__/AverageDiscountedReturn -14.9096 +__unnamed_task__/AverageReturn -29.3779 +__unnamed_task__/Iteration 259 +__unnamed_task__/MaxReturn 15.4253 +__unnamed_task__/MinReturn -59.9211 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.5425 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 00:30:12 | [maml_trainer] epoch #260 | Sampling for adapation and meta-testing... +2025-04-04 00:31:43 | [maml_trainer] epoch #260 | Finished meta-testing... +2025-04-04 00:31:43 | [maml_trainer] epoch #260 | Saving snapshot... +2025-04-04 00:32:06 | [maml_trainer] epoch #260 | Saved +2025-04-04 00:32:06 | [maml_trainer] epoch #260 | Time 124748.66 s +2025-04-04 00:32:06 | [maml_trainer] epoch #260 | EpochTime 478.77 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.1833 +Average/AverageReturn -29.8719 +Average/Iteration 260 +Average/MaxReturn 17.7546 +Average/MinReturn -57.4994 +Average/NumEpisodes 80 +Average/StdReturn 11.1523 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.84657 +GaussianMLPPolicy/KLAfter 0.0035866 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000149142 +GaussianMLPPolicy/LossBefore -2.96831e-08 +GaussianMLPPolicy/dLoss -0.000149172 +Iteration 260 +MetaTest/Average/AverageDiscountedReturn -31.5135 +MetaTest/Average/AverageReturn -31.5135 +MetaTest/Average/Iteration 260 +MetaTest/Average/MaxReturn -11.8205 +MetaTest/Average/MinReturn -59.4852 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.0516 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.5135 +MetaTest/__unnamed_task__/AverageReturn -31.5135 +MetaTest/__unnamed_task__/Iteration 260 +MetaTest/__unnamed_task__/MaxReturn -11.8205 +MetaTest/__unnamed_task__/MinReturn -59.4852 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.0516 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.352e+06 +__unnamed_task__/AverageDiscountedReturn -15.1833 +__unnamed_task__/AverageReturn -29.8719 +__unnamed_task__/Iteration 260 +__unnamed_task__/MaxReturn 17.7546 +__unnamed_task__/MinReturn -57.4994 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.1523 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 00:38:07 | [maml_trainer] epoch #261 | Sampling for adapation and meta-testing... +2025-04-04 00:39:38 | [maml_trainer] epoch #261 | Finished meta-testing... +2025-04-04 00:39:38 | [maml_trainer] epoch #261 | Saving snapshot... +2025-04-04 00:40:02 | [maml_trainer] epoch #261 | Saved +2025-04-04 00:40:02 | [maml_trainer] epoch #261 | Time 125224.58 s +2025-04-04 00:40:02 | [maml_trainer] epoch #261 | EpochTime 475.92 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7129 +Average/AverageReturn -31.51 +Average/Iteration 261 +Average/MaxReturn 21.8307 +Average/MinReturn -62.844 +Average/NumEpisodes 80 +Average/StdReturn 10.8067 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.84564 +GaussianMLPPolicy/KLAfter 0.00183941 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.38224e-05 +GaussianMLPPolicy/LossBefore 2.17557e-08 +GaussianMLPPolicy/dLoss 3.38442e-05 +Iteration 261 +MetaTest/Average/AverageDiscountedReturn -33.2467 +MetaTest/Average/AverageReturn -33.2467 +MetaTest/Average/Iteration 261 +MetaTest/Average/MaxReturn -15.9784 +MetaTest/Average/MinReturn -46.4393 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.10796 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.2467 +MetaTest/__unnamed_task__/AverageReturn -33.2467 +MetaTest/__unnamed_task__/Iteration 261 +MetaTest/__unnamed_task__/MaxReturn -15.9784 +MetaTest/__unnamed_task__/MinReturn -46.4393 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.10796 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.384e+06 +__unnamed_task__/AverageDiscountedReturn -15.7129 +__unnamed_task__/AverageReturn -31.51 +__unnamed_task__/Iteration 261 +__unnamed_task__/MaxReturn 21.8307 +__unnamed_task__/MinReturn -62.844 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.8067 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 00:46:07 | [maml_trainer] epoch #262 | Sampling for adapation and meta-testing... +2025-04-04 00:47:39 | [maml_trainer] epoch #262 | Finished meta-testing... +2025-04-04 00:47:39 | [maml_trainer] epoch #262 | Saving snapshot... +2025-04-04 00:48:03 | [maml_trainer] epoch #262 | Saved +2025-04-04 00:48:03 | [maml_trainer] epoch #262 | Time 125705.78 s +2025-04-04 00:48:03 | [maml_trainer] epoch #262 | EpochTime 481.20 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.7717 +Average/AverageReturn -31.8456 +Average/Iteration 262 +Average/MaxReturn -7.63414 +Average/MinReturn -58.8641 +Average/NumEpisodes 80 +Average/StdReturn 9.41271 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.84346 +GaussianMLPPolicy/KLAfter 0.00109424 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.0064e-06 +GaussianMLPPolicy/LossBefore -1.41263e-08 +GaussianMLPPolicy/dLoss 2.99227e-06 +Iteration 262 +MetaTest/Average/AverageDiscountedReturn -29.7806 +MetaTest/Average/AverageReturn -29.7806 +MetaTest/Average/Iteration 262 +MetaTest/Average/MaxReturn 8.7188 +MetaTest/Average/MinReturn -44.046 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.7306 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.7806 +MetaTest/__unnamed_task__/AverageReturn -29.7806 +MetaTest/__unnamed_task__/Iteration 262 +MetaTest/__unnamed_task__/MaxReturn 8.7188 +MetaTest/__unnamed_task__/MinReturn -44.046 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.7306 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.416e+06 +__unnamed_task__/AverageDiscountedReturn -15.7717 +__unnamed_task__/AverageReturn -31.8456 +__unnamed_task__/Iteration 262 +__unnamed_task__/MaxReturn -7.63414 +__unnamed_task__/MinReturn -58.8641 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.41271 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 00:54:08 | [maml_trainer] epoch #263 | Sampling for adapation and meta-testing... +2025-04-04 00:55:40 | [maml_trainer] epoch #263 | Finished meta-testing... +2025-04-04 00:55:40 | [maml_trainer] epoch #263 | Saving snapshot... +2025-04-04 00:56:02 | [maml_trainer] epoch #263 | Saved +2025-04-04 00:56:02 | [maml_trainer] epoch #263 | Time 126184.96 s +2025-04-04 00:56:02 | [maml_trainer] epoch #263 | EpochTime 479.18 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.5273 +Average/AverageReturn -31.4338 +Average/Iteration 263 +Average/MaxReturn 2.13765 +Average/MinReturn -65.6314 +Average/NumEpisodes 80 +Average/StdReturn 9.98225 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.84116 +GaussianMLPPolicy/KLAfter 0.00141651 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.54676e-05 +GaussianMLPPolicy/LossBefore 2.44379e-09 +GaussianMLPPolicy/dLoss 3.547e-05 +Iteration 263 +MetaTest/Average/AverageDiscountedReturn -31.8475 +MetaTest/Average/AverageReturn -31.8475 +MetaTest/Average/Iteration 263 +MetaTest/Average/MaxReturn -5.5256 +MetaTest/Average/MinReturn -43.5691 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.16203 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.8475 +MetaTest/__unnamed_task__/AverageReturn -31.8475 +MetaTest/__unnamed_task__/Iteration 263 +MetaTest/__unnamed_task__/MaxReturn -5.5256 +MetaTest/__unnamed_task__/MinReturn -43.5691 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.16203 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.448e+06 +__unnamed_task__/AverageDiscountedReturn -15.5273 +__unnamed_task__/AverageReturn -31.4338 +__unnamed_task__/Iteration 263 +__unnamed_task__/MaxReturn 2.13765 +__unnamed_task__/MinReturn -65.6314 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.98225 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:02:12 | [maml_trainer] epoch #264 | Sampling for adapation and meta-testing... +2025-04-04 01:03:42 | [maml_trainer] epoch #264 | Finished meta-testing... +2025-04-04 01:03:42 | [maml_trainer] epoch #264 | Saving snapshot... +2025-04-04 01:04:07 | [maml_trainer] epoch #264 | Saved +2025-04-04 01:04:07 | [maml_trainer] epoch #264 | Time 126669.87 s +2025-04-04 01:04:07 | [maml_trainer] epoch #264 | EpochTime 484.91 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.5778 +Average/AverageReturn -33.7036 +Average/Iteration 264 +Average/MaxReturn -7.18534 +Average/MinReturn -55.053 +Average/NumEpisodes 80 +Average/StdReturn 8.52942 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83985 +GaussianMLPPolicy/KLAfter 0.00191822 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.14746e-05 +GaussianMLPPolicy/LossBefore 1.23978e-08 +GaussianMLPPolicy/dLoss -2.14622e-05 +Iteration 264 +MetaTest/Average/AverageDiscountedReturn -30.8674 +MetaTest/Average/AverageReturn -30.8674 +MetaTest/Average/Iteration 264 +MetaTest/Average/MaxReturn -8.52149 +MetaTest/Average/MinReturn -48.4366 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.18625 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.8674 +MetaTest/__unnamed_task__/AverageReturn -30.8674 +MetaTest/__unnamed_task__/Iteration 264 +MetaTest/__unnamed_task__/MaxReturn -8.52149 +MetaTest/__unnamed_task__/MinReturn -48.4366 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.18625 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.48e+06 +__unnamed_task__/AverageDiscountedReturn -16.5778 +__unnamed_task__/AverageReturn -33.7036 +__unnamed_task__/Iteration 264 +__unnamed_task__/MaxReturn -7.18534 +__unnamed_task__/MinReturn -55.053 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.52942 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:10:11 | [maml_trainer] epoch #265 | Sampling for adapation and meta-testing... +2025-04-04 01:11:42 | [maml_trainer] epoch #265 | Finished meta-testing... +2025-04-04 01:11:42 | [maml_trainer] epoch #265 | Saving snapshot... +2025-04-04 01:12:06 | [maml_trainer] epoch #265 | Saved +2025-04-04 01:12:06 | [maml_trainer] epoch #265 | Time 127148.83 s +2025-04-04 01:12:06 | [maml_trainer] epoch #265 | EpochTime 478.95 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -16.3526 +Average/AverageReturn -32.7387 +Average/Iteration 265 +Average/MaxReturn 4.29343 +Average/MinReturn -55.2064 +Average/NumEpisodes 80 +Average/StdReturn 9.26369 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83807 +GaussianMLPPolicy/KLAfter 0.00219037 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.00015887 +GaussianMLPPolicy/LossBefore 7.15256e-10 +GaussianMLPPolicy/dLoss -0.000158869 +Iteration 265 +MetaTest/Average/AverageDiscountedReturn -28.9252 +MetaTest/Average/AverageReturn -28.9252 +MetaTest/Average/Iteration 265 +MetaTest/Average/MaxReturn 7.32031 +MetaTest/Average/MinReturn -43.5228 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.6154 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -28.9252 +MetaTest/__unnamed_task__/AverageReturn -28.9252 +MetaTest/__unnamed_task__/Iteration 265 +MetaTest/__unnamed_task__/MaxReturn 7.32031 +MetaTest/__unnamed_task__/MinReturn -43.5228 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.6154 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.512e+06 +__unnamed_task__/AverageDiscountedReturn -16.3526 +__unnamed_task__/AverageReturn -32.7387 +__unnamed_task__/Iteration 265 +__unnamed_task__/MaxReturn 4.29343 +__unnamed_task__/MinReturn -55.2064 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.26369 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:18:13 | [maml_trainer] epoch #266 | Sampling for adapation and meta-testing... +2025-04-04 01:19:45 | [maml_trainer] epoch #266 | Finished meta-testing... +2025-04-04 01:19:45 | [maml_trainer] epoch #266 | Saving snapshot... +2025-04-04 01:20:08 | [maml_trainer] epoch #266 | Saved +2025-04-04 01:20:08 | [maml_trainer] epoch #266 | Time 127630.50 s +2025-04-04 01:20:08 | [maml_trainer] epoch #266 | EpochTime 481.67 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.6345 +Average/AverageReturn -30.9817 +Average/Iteration 266 +Average/MaxReturn -0.693005 +Average/MinReturn -45.6915 +Average/NumEpisodes 80 +Average/StdReturn 7.84196 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83582 +GaussianMLPPolicy/KLAfter 0.00184901 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000226197 +GaussianMLPPolicy/LossBefore -3.57628e-09 +GaussianMLPPolicy/dLoss -0.000226201 +Iteration 266 +MetaTest/Average/AverageDiscountedReturn -31.5278 +MetaTest/Average/AverageReturn -31.5278 +MetaTest/Average/Iteration 266 +MetaTest/Average/MaxReturn -15.1378 +MetaTest/Average/MinReturn -43.673 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.15347 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.5278 +MetaTest/__unnamed_task__/AverageReturn -31.5278 +MetaTest/__unnamed_task__/Iteration 266 +MetaTest/__unnamed_task__/MaxReturn -15.1378 +MetaTest/__unnamed_task__/MinReturn -43.673 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.15347 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.544e+06 +__unnamed_task__/AverageDiscountedReturn -15.6345 +__unnamed_task__/AverageReturn -30.9817 +__unnamed_task__/Iteration 266 +__unnamed_task__/MaxReturn -0.693005 +__unnamed_task__/MinReturn -45.6915 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.84196 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:26:08 | [maml_trainer] epoch #267 | Sampling for adapation and meta-testing... +2025-04-04 01:27:36 | [maml_trainer] epoch #267 | Finished meta-testing... +2025-04-04 01:27:36 | [maml_trainer] epoch #267 | Saving snapshot... +2025-04-04 01:28:00 | [maml_trainer] epoch #267 | Saved +2025-04-04 01:28:00 | [maml_trainer] epoch #267 | Time 128102.10 s +2025-04-04 01:28:00 | [maml_trainer] epoch #267 | EpochTime 471.59 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3586 +Average/AverageReturn -30.2305 +Average/Iteration 267 +Average/MaxReturn 27.2513 +Average/MinReturn -48.9457 +Average/NumEpisodes 80 +Average/StdReturn 11.9609 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83295 +GaussianMLPPolicy/KLAfter 0.00347683 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.99129e-05 +GaussianMLPPolicy/LossBefore 1.51992e-09 +GaussianMLPPolicy/dLoss -6.99114e-05 +Iteration 267 +MetaTest/Average/AverageDiscountedReturn -29.2862 +MetaTest/Average/AverageReturn -29.2862 +MetaTest/Average/Iteration 267 +MetaTest/Average/MaxReturn -17.6794 +MetaTest/Average/MinReturn -42.8663 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.07356 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.2862 +MetaTest/__unnamed_task__/AverageReturn -29.2862 +MetaTest/__unnamed_task__/Iteration 267 +MetaTest/__unnamed_task__/MaxReturn -17.6794 +MetaTest/__unnamed_task__/MinReturn -42.8663 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.07356 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.576e+06 +__unnamed_task__/AverageDiscountedReturn -15.3586 +__unnamed_task__/AverageReturn -30.2305 +__unnamed_task__/Iteration 267 +__unnamed_task__/MaxReturn 27.2513 +__unnamed_task__/MinReturn -48.9457 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.9609 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:33:46 | [maml_trainer] epoch #268 | Sampling for adapation and meta-testing... +2025-04-04 01:35:11 | [maml_trainer] epoch #268 | Finished meta-testing... +2025-04-04 01:35:11 | [maml_trainer] epoch #268 | Saving snapshot... +2025-04-04 01:35:33 | [maml_trainer] epoch #268 | Saved +2025-04-04 01:35:33 | [maml_trainer] epoch #268 | Time 128555.72 s +2025-04-04 01:35:33 | [maml_trainer] epoch #268 | EpochTime 453.62 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3998 +Average/AverageReturn -31.1587 +Average/Iteration 268 +Average/MaxReturn -5.12321 +Average/MinReturn -52.9689 +Average/NumEpisodes 80 +Average/StdReturn 7.76223 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83122 +GaussianMLPPolicy/KLAfter 0.00254264 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000125633 +GaussianMLPPolicy/LossBefore 1.56164e-08 +GaussianMLPPolicy/dLoss 0.000125649 +Iteration 268 +MetaTest/Average/AverageDiscountedReturn -32.6817 +MetaTest/Average/AverageReturn -32.6817 +MetaTest/Average/Iteration 268 +MetaTest/Average/MaxReturn -23.8523 +MetaTest/Average/MinReturn -41.2305 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.58996 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.6817 +MetaTest/__unnamed_task__/AverageReturn -32.6817 +MetaTest/__unnamed_task__/Iteration 268 +MetaTest/__unnamed_task__/MaxReturn -23.8523 +MetaTest/__unnamed_task__/MinReturn -41.2305 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.58996 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.608e+06 +__unnamed_task__/AverageDiscountedReturn -15.3998 +__unnamed_task__/AverageReturn -31.1587 +__unnamed_task__/Iteration 268 +__unnamed_task__/MaxReturn -5.12321 +__unnamed_task__/MinReturn -52.9689 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.76223 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:41:17 | [maml_trainer] epoch #269 | Sampling for adapation and meta-testing... +2025-04-04 01:42:45 | [maml_trainer] epoch #269 | Finished meta-testing... +2025-04-04 01:42:45 | [maml_trainer] epoch #269 | Saving snapshot... +2025-04-04 01:43:07 | [maml_trainer] epoch #269 | Saved +2025-04-04 01:43:07 | [maml_trainer] epoch #269 | Time 129009.68 s +2025-04-04 01:43:07 | [maml_trainer] epoch #269 | EpochTime 453.95 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.401 +Average/AverageReturn -31.4863 +Average/Iteration 269 +Average/MaxReturn 5.65537 +Average/MinReturn -51.0605 +Average/NumEpisodes 80 +Average/StdReturn 8.9625 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83047 +GaussianMLPPolicy/KLAfter 0.00518084 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000243511 +GaussianMLPPolicy/LossBefore -7.21216e-09 +GaussianMLPPolicy/dLoss 0.000243504 +Iteration 269 +MetaTest/Average/AverageDiscountedReturn -31.7856 +MetaTest/Average/AverageReturn -31.7856 +MetaTest/Average/Iteration 269 +MetaTest/Average/MaxReturn 4.78192 +MetaTest/Average/MinReturn -54.9269 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.7945 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.7856 +MetaTest/__unnamed_task__/AverageReturn -31.7856 +MetaTest/__unnamed_task__/Iteration 269 +MetaTest/__unnamed_task__/MaxReturn 4.78192 +MetaTest/__unnamed_task__/MinReturn -54.9269 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.7945 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.64e+06 +__unnamed_task__/AverageDiscountedReturn -15.401 +__unnamed_task__/AverageReturn -31.4863 +__unnamed_task__/Iteration 269 +__unnamed_task__/MaxReturn 5.65537 +__unnamed_task__/MinReturn -51.0605 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.9625 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:48:53 | [maml_trainer] epoch #270 | Sampling for adapation and meta-testing... +2025-04-04 01:50:18 | [maml_trainer] epoch #270 | Finished meta-testing... +2025-04-04 01:50:18 | [maml_trainer] epoch #270 | Saving snapshot... +2025-04-04 01:50:41 | [maml_trainer] epoch #270 | Saved +2025-04-04 01:50:41 | [maml_trainer] epoch #270 | Time 129463.67 s +2025-04-04 01:50:41 | [maml_trainer] epoch #270 | EpochTime 453.99 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.8034 +Average/AverageReturn -30.1899 +Average/Iteration 270 +Average/MaxReturn 17.6971 +Average/MinReturn -44.7769 +Average/NumEpisodes 80 +Average/StdReturn 10.7066 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83161 +GaussianMLPPolicy/KLAfter 0.00551129 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.49737e-06 +GaussianMLPPolicy/LossBefore 3.47793e-08 +GaussianMLPPolicy/dLoss -9.46259e-06 +Iteration 270 +MetaTest/Average/AverageDiscountedReturn -33.459 +MetaTest/Average/AverageReturn -33.459 +MetaTest/Average/Iteration 270 +MetaTest/Average/MaxReturn -23.1203 +MetaTest/Average/MinReturn -45.1979 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.77514 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.459 +MetaTest/__unnamed_task__/AverageReturn -33.459 +MetaTest/__unnamed_task__/Iteration 270 +MetaTest/__unnamed_task__/MaxReturn -23.1203 +MetaTest/__unnamed_task__/MinReturn -45.1979 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.77514 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.672e+06 +__unnamed_task__/AverageDiscountedReturn -14.8034 +__unnamed_task__/AverageReturn -30.1899 +__unnamed_task__/Iteration 270 +__unnamed_task__/MaxReturn 17.6971 +__unnamed_task__/MinReturn -44.7769 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.7066 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 01:56:32 | [maml_trainer] epoch #271 | Sampling for adapation and meta-testing... +2025-04-04 01:57:58 | [maml_trainer] epoch #271 | Finished meta-testing... +2025-04-04 01:57:58 | [maml_trainer] epoch #271 | Saving snapshot... +2025-04-04 01:58:20 | [maml_trainer] epoch #271 | Saved +2025-04-04 01:58:20 | [maml_trainer] epoch #271 | Time 129922.12 s +2025-04-04 01:58:20 | [maml_trainer] epoch #271 | EpochTime 458.44 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.709 +Average/AverageReturn -32.3261 +Average/Iteration 271 +Average/MaxReturn 0.53047 +Average/MinReturn -49.1263 +Average/NumEpisodes 80 +Average/StdReturn 8.18668 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83265 +GaussianMLPPolicy/KLAfter 0.00438798 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.58732e-05 +GaussianMLPPolicy/LossBefore -1.11461e-08 +GaussianMLPPolicy/dLoss 6.58621e-05 +Iteration 271 +MetaTest/Average/AverageDiscountedReturn -32.2568 +MetaTest/Average/AverageReturn -32.2568 +MetaTest/Average/Iteration 271 +MetaTest/Average/MaxReturn -11.6582 +MetaTest/Average/MinReturn -46.7588 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.08697 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.2568 +MetaTest/__unnamed_task__/AverageReturn -32.2568 +MetaTest/__unnamed_task__/Iteration 271 +MetaTest/__unnamed_task__/MaxReturn -11.6582 +MetaTest/__unnamed_task__/MinReturn -46.7588 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.08697 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.704e+06 +__unnamed_task__/AverageDiscountedReturn -15.709 +__unnamed_task__/AverageReturn -32.3261 +__unnamed_task__/Iteration 271 +__unnamed_task__/MaxReturn 0.53047 +__unnamed_task__/MinReturn -49.1263 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.18668 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:04:14 | [maml_trainer] epoch #272 | Sampling for adapation and meta-testing... +2025-04-04 02:05:37 | [maml_trainer] epoch #272 | Finished meta-testing... +2025-04-04 02:05:37 | [maml_trainer] epoch #272 | Saving snapshot... +2025-04-04 02:06:01 | [maml_trainer] epoch #272 | Saved +2025-04-04 02:06:01 | [maml_trainer] epoch #272 | Time 130383.01 s +2025-04-04 02:06:01 | [maml_trainer] epoch #272 | EpochTime 460.89 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.8747 +Average/AverageReturn -30.2588 +Average/Iteration 272 +Average/MaxReturn 9.29846 +Average/MinReturn -57.3247 +Average/NumEpisodes 80 +Average/StdReturn 9.69638 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83257 +GaussianMLPPolicy/KLAfter 0.00416441 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.75368e-05 +GaussianMLPPolicy/LossBefore -1.11461e-08 +GaussianMLPPolicy/dLoss -4.75479e-05 +Iteration 272 +MetaTest/Average/AverageDiscountedReturn -30.4712 +MetaTest/Average/AverageReturn -30.4712 +MetaTest/Average/Iteration 272 +MetaTest/Average/MaxReturn -20.1547 +MetaTest/Average/MinReturn -44.3867 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.28266 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.4712 +MetaTest/__unnamed_task__/AverageReturn -30.4712 +MetaTest/__unnamed_task__/Iteration 272 +MetaTest/__unnamed_task__/MaxReturn -20.1547 +MetaTest/__unnamed_task__/MinReturn -44.3867 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.28266 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.736e+06 +__unnamed_task__/AverageDiscountedReturn -14.8747 +__unnamed_task__/AverageReturn -30.2588 +__unnamed_task__/Iteration 272 +__unnamed_task__/MaxReturn 9.29846 +__unnamed_task__/MinReturn -57.3247 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.69638 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:11:38 | [maml_trainer] epoch #273 | Sampling for adapation and meta-testing... +2025-04-04 02:13:05 | [maml_trainer] epoch #273 | Finished meta-testing... +2025-04-04 02:13:05 | [maml_trainer] epoch #273 | Saving snapshot... +2025-04-04 02:13:28 | [maml_trainer] epoch #273 | Saved +2025-04-04 02:13:28 | [maml_trainer] epoch #273 | Time 130830.18 s +2025-04-04 02:13:28 | [maml_trainer] epoch #273 | EpochTime 447.16 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3813 +Average/AverageReturn -32.0102 +Average/Iteration 273 +Average/MaxReturn -14.4238 +Average/MinReturn -54.2858 +Average/NumEpisodes 80 +Average/StdReturn 7.82044 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83259 +GaussianMLPPolicy/KLAfter 0.00384715 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.40257e-06 +GaussianMLPPolicy/LossBefore 1.4782e-08 +GaussianMLPPolicy/dLoss 5.41735e-06 +Iteration 273 +MetaTest/Average/AverageDiscountedReturn -31.9069 +MetaTest/Average/AverageReturn -31.9069 +MetaTest/Average/Iteration 273 +MetaTest/Average/MaxReturn -19.9663 +MetaTest/Average/MinReturn -44.7171 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.08976 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.9069 +MetaTest/__unnamed_task__/AverageReturn -31.9069 +MetaTest/__unnamed_task__/Iteration 273 +MetaTest/__unnamed_task__/MaxReturn -19.9663 +MetaTest/__unnamed_task__/MinReturn -44.7171 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.08976 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.768e+06 +__unnamed_task__/AverageDiscountedReturn -15.3813 +__unnamed_task__/AverageReturn -32.0102 +__unnamed_task__/Iteration 273 +__unnamed_task__/MaxReturn -14.4238 +__unnamed_task__/MinReturn -54.2858 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.82044 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:19:18 | [maml_trainer] epoch #274 | Sampling for adapation and meta-testing... +2025-04-04 02:20:44 | [maml_trainer] epoch #274 | Finished meta-testing... +2025-04-04 02:20:44 | [maml_trainer] epoch #274 | Saving snapshot... +2025-04-04 02:21:05 | [maml_trainer] epoch #274 | Saved +2025-04-04 02:21:05 | [maml_trainer] epoch #274 | Time 131287.79 s +2025-04-04 02:21:05 | [maml_trainer] epoch #274 | EpochTime 457.61 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3486 +Average/AverageReturn -32.1291 +Average/Iteration 274 +Average/MaxReturn 0.431649 +Average/MinReturn -63.0275 +Average/NumEpisodes 80 +Average/StdReturn 10.6343 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83446 +GaussianMLPPolicy/KLAfter 0.00445865 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.36098e-05 +GaussianMLPPolicy/LossBefore 2.05636e-08 +GaussianMLPPolicy/dLoss -6.35892e-05 +Iteration 274 +MetaTest/Average/AverageDiscountedReturn -35.8173 +MetaTest/Average/AverageReturn -35.8173 +MetaTest/Average/Iteration 274 +MetaTest/Average/MaxReturn -23.2747 +MetaTest/Average/MinReturn -60.6084 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.5187 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.8173 +MetaTest/__unnamed_task__/AverageReturn -35.8173 +MetaTest/__unnamed_task__/Iteration 274 +MetaTest/__unnamed_task__/MaxReturn -23.2747 +MetaTest/__unnamed_task__/MinReturn -60.6084 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.5187 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.8e+06 +__unnamed_task__/AverageDiscountedReturn -15.3486 +__unnamed_task__/AverageReturn -32.1291 +__unnamed_task__/Iteration 274 +__unnamed_task__/MaxReturn 0.431649 +__unnamed_task__/MinReturn -63.0275 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.6343 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:26:48 | [maml_trainer] epoch #275 | Sampling for adapation and meta-testing... +2025-04-04 02:28:16 | [maml_trainer] epoch #275 | Finished meta-testing... +2025-04-04 02:28:16 | [maml_trainer] epoch #275 | Saving snapshot... +2025-04-04 02:28:38 | [maml_trainer] epoch #275 | Saved +2025-04-04 02:28:38 | [maml_trainer] epoch #275 | Time 131740.96 s +2025-04-04 02:28:38 | [maml_trainer] epoch #275 | EpochTime 453.16 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.6162 +Average/AverageReturn -30.47 +Average/Iteration 275 +Average/MaxReturn -0.13661 +Average/MinReturn -62.7281 +Average/NumEpisodes 80 +Average/StdReturn 9.94243 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83564 +GaussianMLPPolicy/KLAfter 0.00298466 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000169791 +GaussianMLPPolicy/LossBefore -5.78165e-09 +GaussianMLPPolicy/dLoss -0.000169796 +Iteration 275 +MetaTest/Average/AverageDiscountedReturn -35.7838 +MetaTest/Average/AverageReturn -35.7838 +MetaTest/Average/Iteration 275 +MetaTest/Average/MaxReturn -13.4004 +MetaTest/Average/MinReturn -59.4059 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.4533 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.7838 +MetaTest/__unnamed_task__/AverageReturn -35.7838 +MetaTest/__unnamed_task__/Iteration 275 +MetaTest/__unnamed_task__/MaxReturn -13.4004 +MetaTest/__unnamed_task__/MinReturn -59.4059 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.4533 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.832e+06 +__unnamed_task__/AverageDiscountedReturn -14.6162 +__unnamed_task__/AverageReturn -30.47 +__unnamed_task__/Iteration 275 +__unnamed_task__/MaxReturn -0.13661 +__unnamed_task__/MinReturn -62.7281 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.94243 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:34:24 | [maml_trainer] epoch #276 | Sampling for adapation and meta-testing... +2025-04-04 02:35:49 | [maml_trainer] epoch #276 | Finished meta-testing... +2025-04-04 02:35:49 | [maml_trainer] epoch #276 | Saving snapshot... +2025-04-04 02:36:12 | [maml_trainer] epoch #276 | Saved +2025-04-04 02:36:12 | [maml_trainer] epoch #276 | Time 132194.12 s +2025-04-04 02:36:12 | [maml_trainer] epoch #276 | EpochTime 453.16 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.354 +Average/AverageReturn -32.0651 +Average/Iteration 276 +Average/MaxReturn 4.74467 +Average/MinReturn -60.4305 +Average/NumEpisodes 80 +Average/StdReturn 11.8145 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83575 +GaussianMLPPolicy/KLAfter 0.00158171 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000106035 +GaussianMLPPolicy/LossBefore -1.01328e-09 +GaussianMLPPolicy/dLoss -0.000106036 +Iteration 276 +MetaTest/Average/AverageDiscountedReturn -34.3669 +MetaTest/Average/AverageReturn -34.3669 +MetaTest/Average/Iteration 276 +MetaTest/Average/MaxReturn -22.9739 +MetaTest/Average/MinReturn -47.9789 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.81493 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.3669 +MetaTest/__unnamed_task__/AverageReturn -34.3669 +MetaTest/__unnamed_task__/Iteration 276 +MetaTest/__unnamed_task__/MaxReturn -22.9739 +MetaTest/__unnamed_task__/MinReturn -47.9789 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.81493 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.864e+06 +__unnamed_task__/AverageDiscountedReturn -15.354 +__unnamed_task__/AverageReturn -32.0651 +__unnamed_task__/Iteration 276 +__unnamed_task__/MaxReturn 4.74467 +__unnamed_task__/MinReturn -60.4305 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.8145 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:41:56 | [maml_trainer] epoch #277 | Sampling for adapation and meta-testing... +2025-04-04 02:43:22 | [maml_trainer] epoch #277 | Finished meta-testing... +2025-04-04 02:43:22 | [maml_trainer] epoch #277 | Saving snapshot... +2025-04-04 02:43:43 | [maml_trainer] epoch #277 | Saved +2025-04-04 02:43:43 | [maml_trainer] epoch #277 | Time 132645.65 s +2025-04-04 02:43:43 | [maml_trainer] epoch #277 | EpochTime 451.53 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4272 +Average/AverageReturn -32.6619 +Average/Iteration 277 +Average/MaxReturn 8.06306 +Average/MinReturn -62.0861 +Average/NumEpisodes 80 +Average/StdReturn 13.0528 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83591 +GaussianMLPPolicy/KLAfter 0.00122189 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.75235e-06 +GaussianMLPPolicy/LossBefore -1.61231e-08 +GaussianMLPPolicy/dLoss 1.73622e-06 +Iteration 277 +MetaTest/Average/AverageDiscountedReturn -34.0253 +MetaTest/Average/AverageReturn -34.0253 +MetaTest/Average/Iteration 277 +MetaTest/Average/MaxReturn -22.9949 +MetaTest/Average/MinReturn -42.5606 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.80059 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -34.0253 +MetaTest/__unnamed_task__/AverageReturn -34.0253 +MetaTest/__unnamed_task__/Iteration 277 +MetaTest/__unnamed_task__/MaxReturn -22.9949 +MetaTest/__unnamed_task__/MinReturn -42.5606 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.80059 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.896e+06 +__unnamed_task__/AverageDiscountedReturn -15.4272 +__unnamed_task__/AverageReturn -32.6619 +__unnamed_task__/Iteration 277 +__unnamed_task__/MaxReturn 8.06306 +__unnamed_task__/MinReturn -62.0861 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.0528 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:49:27 | [maml_trainer] epoch #278 | Sampling for adapation and meta-testing... +2025-04-04 02:50:54 | [maml_trainer] epoch #278 | Finished meta-testing... +2025-04-04 02:50:54 | [maml_trainer] epoch #278 | Saving snapshot... +2025-04-04 02:51:18 | [maml_trainer] epoch #278 | Saved +2025-04-04 02:51:18 | [maml_trainer] epoch #278 | Time 133100.07 s +2025-04-04 02:51:18 | [maml_trainer] epoch #278 | EpochTime 454.42 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.8637 +Average/AverageReturn -32.4269 +Average/Iteration 278 +Average/MaxReturn -1.82316 +Average/MinReturn -59.8552 +Average/NumEpisodes 80 +Average/StdReturn 11.383 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83559 +GaussianMLPPolicy/KLAfter 0.00160437 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.11494e-05 +GaussianMLPPolicy/LossBefore -1.00136e-08 +GaussianMLPPolicy/dLoss -6.11594e-05 +Iteration 278 +MetaTest/Average/AverageDiscountedReturn -30.6894 +MetaTest/Average/AverageReturn -30.6894 +MetaTest/Average/Iteration 278 +MetaTest/Average/MaxReturn -10.0498 +MetaTest/Average/MinReturn -59.2179 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.05263 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.6894 +MetaTest/__unnamed_task__/AverageReturn -30.6894 +MetaTest/__unnamed_task__/Iteration 278 +MetaTest/__unnamed_task__/MaxReturn -10.0498 +MetaTest/__unnamed_task__/MinReturn -59.2179 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.05263 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.928e+06 +__unnamed_task__/AverageDiscountedReturn -15.8637 +__unnamed_task__/AverageReturn -32.4269 +__unnamed_task__/Iteration 278 +__unnamed_task__/MaxReturn -1.82316 +__unnamed_task__/MinReturn -59.8552 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.383 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 02:57:04 | [maml_trainer] epoch #279 | Sampling for adapation and meta-testing... +2025-04-04 02:58:29 | [maml_trainer] epoch #279 | Finished meta-testing... +2025-04-04 02:58:29 | [maml_trainer] epoch #279 | Saving snapshot... +2025-04-04 02:58:52 | [maml_trainer] epoch #279 | Saved +2025-04-04 02:58:52 | [maml_trainer] epoch #279 | Time 133554.68 s +2025-04-04 02:58:52 | [maml_trainer] epoch #279 | EpochTime 454.61 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.1383 +Average/AverageReturn -31.1418 +Average/Iteration 279 +Average/MaxReturn 10.2481 +Average/MinReturn -59.4906 +Average/NumEpisodes 80 +Average/StdReturn 10.4565 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83564 +GaussianMLPPolicy/KLAfter 0.00161106 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.19419e-05 +GaussianMLPPolicy/LossBefore 3.20077e-08 +GaussianMLPPolicy/dLoss -2.19099e-05 +Iteration 279 +MetaTest/Average/AverageDiscountedReturn -31.5589 +MetaTest/Average/AverageReturn -31.5589 +MetaTest/Average/Iteration 279 +MetaTest/Average/MaxReturn -8.21881 +MetaTest/Average/MinReturn -54.2416 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.59297 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -31.5589 +MetaTest/__unnamed_task__/AverageReturn -31.5589 +MetaTest/__unnamed_task__/Iteration 279 +MetaTest/__unnamed_task__/MaxReturn -8.21881 +MetaTest/__unnamed_task__/MinReturn -54.2416 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.59297 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.96e+06 +__unnamed_task__/AverageDiscountedReturn -15.1383 +__unnamed_task__/AverageReturn -31.1418 +__unnamed_task__/Iteration 279 +__unnamed_task__/MaxReturn 10.2481 +__unnamed_task__/MinReturn -59.4906 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.4565 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:04:39 | [maml_trainer] epoch #280 | Sampling for adapation and meta-testing... +2025-04-04 03:06:05 | [maml_trainer] epoch #280 | Finished meta-testing... +2025-04-04 03:06:05 | [maml_trainer] epoch #280 | Saving snapshot... +2025-04-04 03:06:26 | [maml_trainer] epoch #280 | Saved +2025-04-04 03:06:26 | [maml_trainer] epoch #280 | Time 134008.55 s +2025-04-04 03:06:26 | [maml_trainer] epoch #280 | EpochTime 453.86 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.0409 +Average/AverageReturn -30.9891 +Average/Iteration 280 +Average/MaxReturn -1.52524 +Average/MinReturn -48.4797 +Average/NumEpisodes 80 +Average/StdReturn 7.95249 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83554 +GaussianMLPPolicy/KLAfter 0.00202826 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.18249e-06 +GaussianMLPPolicy/LossBefore -4.17232e-10 +GaussianMLPPolicy/dLoss 6.18207e-06 +Iteration 280 +MetaTest/Average/AverageDiscountedReturn -35.8646 +MetaTest/Average/AverageReturn -35.8646 +MetaTest/Average/Iteration 280 +MetaTest/Average/MaxReturn -19.3001 +MetaTest/Average/MinReturn -50.0103 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.158 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -35.8646 +MetaTest/__unnamed_task__/AverageReturn -35.8646 +MetaTest/__unnamed_task__/Iteration 280 +MetaTest/__unnamed_task__/MaxReturn -19.3001 +MetaTest/__unnamed_task__/MinReturn -50.0103 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.158 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.992e+06 +__unnamed_task__/AverageDiscountedReturn -15.0409 +__unnamed_task__/AverageReturn -30.9891 +__unnamed_task__/Iteration 280 +__unnamed_task__/MaxReturn -1.52524 +__unnamed_task__/MinReturn -48.4797 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.95249 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:12:09 | [maml_trainer] epoch #281 | Sampling for adapation and meta-testing... +2025-04-04 03:13:34 | [maml_trainer] epoch #281 | Finished meta-testing... +2025-04-04 03:13:34 | [maml_trainer] epoch #281 | Saving snapshot... +2025-04-04 03:13:56 | [maml_trainer] epoch #281 | Saved +2025-04-04 03:13:56 | [maml_trainer] epoch #281 | Time 134458.82 s +2025-04-04 03:13:56 | [maml_trainer] epoch #281 | EpochTime 450.27 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.5613 +Average/AverageReturn -32.5773 +Average/Iteration 281 +Average/MaxReturn -6.70908 +Average/MinReturn -63.7004 +Average/NumEpisodes 80 +Average/StdReturn 9.56076 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83415 +GaussianMLPPolicy/KLAfter 0.00254492 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000106015 +GaussianMLPPolicy/LossBefore 1.61529e-08 +GaussianMLPPolicy/dLoss 0.000106031 +Iteration 281 +MetaTest/Average/AverageDiscountedReturn -30.3855 +MetaTest/Average/AverageReturn -30.3855 +MetaTest/Average/Iteration 281 +MetaTest/Average/MaxReturn -12.1055 +MetaTest/Average/MinReturn -40.2801 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.81792 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.3855 +MetaTest/__unnamed_task__/AverageReturn -30.3855 +MetaTest/__unnamed_task__/Iteration 281 +MetaTest/__unnamed_task__/MaxReturn -12.1055 +MetaTest/__unnamed_task__/MinReturn -40.2801 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.81792 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.024e+06 +__unnamed_task__/AverageDiscountedReturn -15.5613 +__unnamed_task__/AverageReturn -32.5773 +__unnamed_task__/Iteration 281 +__unnamed_task__/MaxReturn -6.70908 +__unnamed_task__/MinReturn -63.7004 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.56076 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:19:36 | [maml_trainer] epoch #282 | Sampling for adapation and meta-testing... +2025-04-04 03:21:02 | [maml_trainer] epoch #282 | Finished meta-testing... +2025-04-04 03:21:02 | [maml_trainer] epoch #282 | Saving snapshot... +2025-04-04 03:21:24 | [maml_trainer] epoch #282 | Saved +2025-04-04 03:21:24 | [maml_trainer] epoch #282 | Time 134906.53 s +2025-04-04 03:21:24 | [maml_trainer] epoch #282 | EpochTime 447.71 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3937 +Average/AverageReturn -30.7175 +Average/Iteration 282 +Average/MaxReturn 28.3031 +Average/MinReturn -48.3517 +Average/NumEpisodes 80 +Average/StdReturn 11.6909 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.83255 +GaussianMLPPolicy/KLAfter 0.00309119 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.42219e-05 +GaussianMLPPolicy/LossBefore 1.12057e-08 +GaussianMLPPolicy/dLoss -4.42107e-05 +Iteration 282 +MetaTest/Average/AverageDiscountedReturn -33.0147 +MetaTest/Average/AverageReturn -33.0147 +MetaTest/Average/Iteration 282 +MetaTest/Average/MaxReturn -20.3397 +MetaTest/Average/MinReturn -47.6009 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.94803 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -33.0147 +MetaTest/__unnamed_task__/AverageReturn -33.0147 +MetaTest/__unnamed_task__/Iteration 282 +MetaTest/__unnamed_task__/MaxReturn -20.3397 +MetaTest/__unnamed_task__/MinReturn -47.6009 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.94803 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.056e+06 +__unnamed_task__/AverageDiscountedReturn -15.3937 +__unnamed_task__/AverageReturn -30.7175 +__unnamed_task__/Iteration 282 +__unnamed_task__/MaxReturn 28.3031 +__unnamed_task__/MinReturn -48.3517 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6909 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:27:07 | [maml_trainer] epoch #283 | Sampling for adapation and meta-testing... +2025-04-04 03:28:32 | [maml_trainer] epoch #283 | Finished meta-testing... +2025-04-04 03:28:32 | [maml_trainer] epoch #283 | Saving snapshot... +2025-04-04 03:28:54 | [maml_trainer] epoch #283 | Saved +2025-04-04 03:28:54 | [maml_trainer] epoch #283 | Time 135356.54 s +2025-04-04 03:28:54 | [maml_trainer] epoch #283 | EpochTime 450.00 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.5575 +Average/AverageReturn -29.1777 +Average/Iteration 283 +Average/MaxReturn 21.0306 +Average/MinReturn -46.9902 +Average/NumEpisodes 80 +Average/StdReturn 11.2313 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.8302 +GaussianMLPPolicy/KLAfter 0.00397535 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.06307e-06 +GaussianMLPPolicy/LossBefore 6.13928e-09 +GaussianMLPPolicy/dLoss -9.05693e-06 +Iteration 283 +MetaTest/Average/AverageDiscountedReturn -30.0073 +MetaTest/Average/AverageReturn -30.0073 +MetaTest/Average/Iteration 283 +MetaTest/Average/MaxReturn -4.66376 +MetaTest/Average/MinReturn -42.08 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.2191 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.0073 +MetaTest/__unnamed_task__/AverageReturn -30.0073 +MetaTest/__unnamed_task__/Iteration 283 +MetaTest/__unnamed_task__/MaxReturn -4.66376 +MetaTest/__unnamed_task__/MinReturn -42.08 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.2191 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.088e+06 +__unnamed_task__/AverageDiscountedReturn -14.5575 +__unnamed_task__/AverageReturn -29.1777 +__unnamed_task__/Iteration 283 +__unnamed_task__/MaxReturn 21.0306 +__unnamed_task__/MinReturn -46.9902 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.2313 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:34:38 | [maml_trainer] epoch #284 | Sampling for adapation and meta-testing... +2025-04-04 03:36:01 | [maml_trainer] epoch #284 | Finished meta-testing... +2025-04-04 03:36:01 | [maml_trainer] epoch #284 | Saving snapshot... +2025-04-04 03:36:24 | [maml_trainer] epoch #284 | Saved +2025-04-04 03:36:25 | [maml_trainer] epoch #284 | Time 135806.98 s +2025-04-04 03:36:25 | [maml_trainer] epoch #284 | EpochTime 450.44 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.272 +Average/AverageReturn -30.583 +Average/Iteration 284 +Average/MaxReturn -8.35786 +Average/MinReturn -53.6617 +Average/NumEpisodes 80 +Average/StdReturn 7.59319 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82879 +GaussianMLPPolicy/KLAfter 0.00526852 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.44374e-05 +GaussianMLPPolicy/LossBefore 2.63453e-08 +GaussianMLPPolicy/dLoss 9.44638e-05 +Iteration 284 +MetaTest/Average/AverageDiscountedReturn -26.6958 +MetaTest/Average/AverageReturn -26.6958 +MetaTest/Average/Iteration 284 +MetaTest/Average/MaxReturn 3.01377 +MetaTest/Average/MinReturn -45.9007 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.9873 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -26.6958 +MetaTest/__unnamed_task__/AverageReturn -26.6958 +MetaTest/__unnamed_task__/Iteration 284 +MetaTest/__unnamed_task__/MaxReturn 3.01377 +MetaTest/__unnamed_task__/MinReturn -45.9007 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.9873 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.12e+06 +__unnamed_task__/AverageDiscountedReturn -15.272 +__unnamed_task__/AverageReturn -30.583 +__unnamed_task__/Iteration 284 +__unnamed_task__/MaxReturn -8.35786 +__unnamed_task__/MinReturn -53.6617 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.59319 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:42:03 | [maml_trainer] epoch #285 | Sampling for adapation and meta-testing... +2025-04-04 03:43:29 | [maml_trainer] epoch #285 | Finished meta-testing... +2025-04-04 03:43:29 | [maml_trainer] epoch #285 | Saving snapshot... +2025-04-04 03:43:52 | [maml_trainer] epoch #285 | Saved +2025-04-04 03:43:52 | [maml_trainer] epoch #285 | Time 136254.65 s +2025-04-04 03:43:52 | [maml_trainer] epoch #285 | EpochTime 447.66 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.3914 +Average/AverageReturn -27.9103 +Average/Iteration 285 +Average/MaxReturn 14.4876 +Average/MinReturn -48.7384 +Average/NumEpisodes 80 +Average/StdReturn 10.1852 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82815 +GaussianMLPPolicy/KLAfter 0.00574501 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.75908e-05 +GaussianMLPPolicy/LossBefore 1.17421e-08 +GaussianMLPPolicy/dLoss 7.76025e-05 +Iteration 285 +MetaTest/Average/AverageDiscountedReturn -30.4515 +MetaTest/Average/AverageReturn -30.4515 +MetaTest/Average/Iteration 285 +MetaTest/Average/MaxReturn -14.3683 +MetaTest/Average/MinReturn -42.5816 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.2499 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.4515 +MetaTest/__unnamed_task__/AverageReturn -30.4515 +MetaTest/__unnamed_task__/Iteration 285 +MetaTest/__unnamed_task__/MaxReturn -14.3683 +MetaTest/__unnamed_task__/MinReturn -42.5816 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.2499 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.152e+06 +__unnamed_task__/AverageDiscountedReturn -14.3914 +__unnamed_task__/AverageReturn -27.9103 +__unnamed_task__/Iteration 285 +__unnamed_task__/MaxReturn 14.4876 +__unnamed_task__/MinReturn -48.7384 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1852 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:49:37 | [maml_trainer] epoch #286 | Sampling for adapation and meta-testing... +2025-04-04 03:51:03 | [maml_trainer] epoch #286 | Finished meta-testing... +2025-04-04 03:51:03 | [maml_trainer] epoch #286 | Saving snapshot... +2025-04-04 03:51:25 | [maml_trainer] epoch #286 | Saved +2025-04-04 03:51:25 | [maml_trainer] epoch #286 | Time 136707.45 s +2025-04-04 03:51:25 | [maml_trainer] epoch #286 | EpochTime 452.80 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.0582 +Average/AverageReturn -28.6619 +Average/Iteration 286 +Average/MaxReturn 18.4117 +Average/MinReturn -46.9877 +Average/NumEpisodes 80 +Average/StdReturn 9.5765 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82789 +GaussianMLPPolicy/KLAfter 0.00518873 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.00036918 +GaussianMLPPolicy/LossBefore 1.96695e-09 +GaussianMLPPolicy/dLoss -0.000369178 +Iteration 286 +MetaTest/Average/AverageDiscountedReturn -28.1758 +MetaTest/Average/AverageReturn -28.1758 +MetaTest/Average/Iteration 286 +MetaTest/Average/MaxReturn -20.1646 +MetaTest/Average/MinReturn -42.0494 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.74727 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -28.1758 +MetaTest/__unnamed_task__/AverageReturn -28.1758 +MetaTest/__unnamed_task__/Iteration 286 +MetaTest/__unnamed_task__/MaxReturn -20.1646 +MetaTest/__unnamed_task__/MinReturn -42.0494 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.74727 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.184e+06 +__unnamed_task__/AverageDiscountedReturn -15.0582 +__unnamed_task__/AverageReturn -28.6619 +__unnamed_task__/Iteration 286 +__unnamed_task__/MaxReturn 18.4117 +__unnamed_task__/MinReturn -46.9877 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.5765 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 03:57:08 | [maml_trainer] epoch #287 | Sampling for adapation and meta-testing... +2025-04-04 03:58:35 | [maml_trainer] epoch #287 | Finished meta-testing... +2025-04-04 03:58:35 | [maml_trainer] epoch #287 | Saving snapshot... +2025-04-04 03:58:58 | [maml_trainer] epoch #287 | Saved +2025-04-04 03:58:58 | [maml_trainer] epoch #287 | Time 137160.39 s +2025-04-04 03:58:58 | [maml_trainer] epoch #287 | EpochTime 452.93 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.2182 +Average/AverageReturn -28.6811 +Average/Iteration 287 +Average/MaxReturn 9.37092 +Average/MinReturn -57.7594 +Average/NumEpisodes 80 +Average/StdReturn 9.59552 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82797 +GaussianMLPPolicy/KLAfter 0.00450477 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.30803e-05 +GaussianMLPPolicy/LossBefore 2.49147e-08 +GaussianMLPPolicy/dLoss -5.30554e-05 +Iteration 287 +MetaTest/Average/AverageDiscountedReturn -29.4894 +MetaTest/Average/AverageReturn -29.4894 +MetaTest/Average/Iteration 287 +MetaTest/Average/MaxReturn -10.6568 +MetaTest/Average/MinReturn -44.0586 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.75984 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.4894 +MetaTest/__unnamed_task__/AverageReturn -29.4894 +MetaTest/__unnamed_task__/Iteration 287 +MetaTest/__unnamed_task__/MaxReturn -10.6568 +MetaTest/__unnamed_task__/MinReturn -44.0586 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.75984 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.216e+06 +__unnamed_task__/AverageDiscountedReturn -15.2182 +__unnamed_task__/AverageReturn -28.6811 +__unnamed_task__/Iteration 287 +__unnamed_task__/MaxReturn 9.37092 +__unnamed_task__/MinReturn -57.7594 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.59552 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:04:40 | [maml_trainer] epoch #288 | Sampling for adapation and meta-testing... +2025-04-04 04:06:06 | [maml_trainer] epoch #288 | Finished meta-testing... +2025-04-04 04:06:06 | [maml_trainer] epoch #288 | Saving snapshot... +2025-04-04 04:06:29 | [maml_trainer] epoch #288 | Saved +2025-04-04 04:06:29 | [maml_trainer] epoch #288 | Time 137611.17 s +2025-04-04 04:06:29 | [maml_trainer] epoch #288 | EpochTime 450.78 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4786 +Average/AverageReturn -28.854 +Average/Iteration 288 +Average/MaxReturn 21.9789 +Average/MinReturn -57.5887 +Average/NumEpisodes 80 +Average/StdReturn 11.0519 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82853 +GaussianMLPPolicy/KLAfter 0.00356446 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.7938e-05 +GaussianMLPPolicy/LossBefore 4.14252e-09 +GaussianMLPPolicy/dLoss -2.79338e-05 +Iteration 288 +MetaTest/Average/AverageDiscountedReturn -30.7842 +MetaTest/Average/AverageReturn -30.7842 +MetaTest/Average/Iteration 288 +MetaTest/Average/MaxReturn -11.0571 +MetaTest/Average/MinReturn -43.8004 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.96351 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.7842 +MetaTest/__unnamed_task__/AverageReturn -30.7842 +MetaTest/__unnamed_task__/Iteration 288 +MetaTest/__unnamed_task__/MaxReturn -11.0571 +MetaTest/__unnamed_task__/MinReturn -43.8004 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.96351 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.248e+06 +__unnamed_task__/AverageDiscountedReturn -15.4786 +__unnamed_task__/AverageReturn -28.854 +__unnamed_task__/Iteration 288 +__unnamed_task__/MaxReturn 21.9789 +__unnamed_task__/MinReturn -57.5887 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0519 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:12:11 | [maml_trainer] epoch #289 | Sampling for adapation and meta-testing... +2025-04-04 04:13:38 | [maml_trainer] epoch #289 | Finished meta-testing... +2025-04-04 04:13:38 | [maml_trainer] epoch #289 | Saving snapshot... +2025-04-04 04:14:00 | [maml_trainer] epoch #289 | Saved +2025-04-04 04:14:00 | [maml_trainer] epoch #289 | Time 138062.03 s +2025-04-04 04:14:00 | [maml_trainer] epoch #289 | EpochTime 450.86 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.1776 +Average/AverageReturn -29.4566 +Average/Iteration 289 +Average/MaxReturn 5.6211 +Average/MinReturn -51.8253 +Average/NumEpisodes 80 +Average/StdReturn 9.43052 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82829 +GaussianMLPPolicy/KLAfter 0.00207213 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.84434e-05 +GaussianMLPPolicy/LossBefore 2.86102e-09 +GaussianMLPPolicy/dLoss 3.84462e-05 +Iteration 289 +MetaTest/Average/AverageDiscountedReturn -29.684 +MetaTest/Average/AverageReturn -29.684 +MetaTest/Average/Iteration 289 +MetaTest/Average/MaxReturn -16.8085 +MetaTest/Average/MinReturn -44.3896 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.81489 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.684 +MetaTest/__unnamed_task__/AverageReturn -29.684 +MetaTest/__unnamed_task__/Iteration 289 +MetaTest/__unnamed_task__/MaxReturn -16.8085 +MetaTest/__unnamed_task__/MinReturn -44.3896 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.81489 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.28e+06 +__unnamed_task__/AverageDiscountedReturn -15.1776 +__unnamed_task__/AverageReturn -29.4566 +__unnamed_task__/Iteration 289 +__unnamed_task__/MaxReturn 5.6211 +__unnamed_task__/MinReturn -51.8253 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.43052 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:19:42 | [maml_trainer] epoch #290 | Sampling for adapation and meta-testing... +2025-04-04 04:21:07 | [maml_trainer] epoch #290 | Finished meta-testing... +2025-04-04 04:21:07 | [maml_trainer] epoch #290 | Saving snapshot... +2025-04-04 04:21:31 | [maml_trainer] epoch #290 | Saved +2025-04-04 04:21:31 | [maml_trainer] epoch #290 | Time 138513.49 s +2025-04-04 04:21:31 | [maml_trainer] epoch #290 | EpochTime 451.45 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.4492 +Average/AverageReturn -29.7401 +Average/Iteration 290 +Average/MaxReturn 36.5495 +Average/MinReturn -58.2966 +Average/NumEpisodes 80 +Average/StdReturn 11.0365 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82765 +GaussianMLPPolicy/KLAfter 0.00220515 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.92155e-05 +GaussianMLPPolicy/LossBefore 1.90437e-08 +GaussianMLPPolicy/dLoss 7.92346e-05 +Iteration 290 +MetaTest/Average/AverageDiscountedReturn -28.5457 +MetaTest/Average/AverageReturn -28.5457 +MetaTest/Average/Iteration 290 +MetaTest/Average/MaxReturn -1.5771 +MetaTest/Average/MinReturn -46.2672 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.3177 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -28.5457 +MetaTest/__unnamed_task__/AverageReturn -28.5457 +MetaTest/__unnamed_task__/Iteration 290 +MetaTest/__unnamed_task__/MaxReturn -1.5771 +MetaTest/__unnamed_task__/MinReturn -46.2672 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.3177 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.312e+06 +__unnamed_task__/AverageDiscountedReturn -15.4492 +__unnamed_task__/AverageReturn -29.7401 +__unnamed_task__/Iteration 290 +__unnamed_task__/MaxReturn 36.5495 +__unnamed_task__/MinReturn -58.2966 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.0365 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:27:15 | [maml_trainer] epoch #291 | Sampling for adapation and meta-testing... +2025-04-04 04:28:42 | [maml_trainer] epoch #291 | Finished meta-testing... +2025-04-04 04:28:42 | [maml_trainer] epoch #291 | Saving snapshot... +2025-04-04 04:29:05 | [maml_trainer] epoch #291 | Saved +2025-04-04 04:29:05 | [maml_trainer] epoch #291 | Time 138967.23 s +2025-04-04 04:29:05 | [maml_trainer] epoch #291 | EpochTime 453.74 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.7021 +Average/AverageReturn -28.4563 +Average/Iteration 291 +Average/MaxReturn 19.7575 +Average/MinReturn -55.3864 +Average/NumEpisodes 80 +Average/StdReturn 10.8282 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82585 +GaussianMLPPolicy/KLAfter 0.00206384 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.11254e-05 +GaussianMLPPolicy/LossBefore -1.03116e-08 +GaussianMLPPolicy/dLoss 2.11151e-05 +Iteration 291 +MetaTest/Average/AverageDiscountedReturn -29.1093 +MetaTest/Average/AverageReturn -29.1093 +MetaTest/Average/Iteration 291 +MetaTest/Average/MaxReturn -10.6101 +MetaTest/Average/MinReturn -39.5562 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.96817 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.1093 +MetaTest/__unnamed_task__/AverageReturn -29.1093 +MetaTest/__unnamed_task__/Iteration 291 +MetaTest/__unnamed_task__/MaxReturn -10.6101 +MetaTest/__unnamed_task__/MinReturn -39.5562 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.96817 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.344e+06 +__unnamed_task__/AverageDiscountedReturn -14.7021 +__unnamed_task__/AverageReturn -28.4563 +__unnamed_task__/Iteration 291 +__unnamed_task__/MaxReturn 19.7575 +__unnamed_task__/MinReturn -55.3864 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.8282 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:34:53 | [maml_trainer] epoch #292 | Sampling for adapation and meta-testing... +2025-04-04 04:36:18 | [maml_trainer] epoch #292 | Finished meta-testing... +2025-04-04 04:36:18 | [maml_trainer] epoch #292 | Saving snapshot... +2025-04-04 04:36:40 | [maml_trainer] epoch #292 | Saved +2025-04-04 04:36:40 | [maml_trainer] epoch #292 | Time 139422.11 s +2025-04-04 04:36:40 | [maml_trainer] epoch #292 | EpochTime 454.87 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.5367 +Average/AverageReturn -27.5979 +Average/Iteration 292 +Average/MaxReturn 0.197223 +Average/MinReturn -50.3118 +Average/NumEpisodes 80 +Average/StdReturn 8.48651 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82441 +GaussianMLPPolicy/KLAfter 0.00048006 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.55609e-05 +GaussianMLPPolicy/LossBefore -2.53916e-08 +GaussianMLPPolicy/dLoss -5.55863e-05 +Iteration 292 +MetaTest/Average/AverageDiscountedReturn -28.1798 +MetaTest/Average/AverageReturn -28.1798 +MetaTest/Average/Iteration 292 +MetaTest/Average/MaxReturn -13.9901 +MetaTest/Average/MinReturn -41.5498 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.03125 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -28.1798 +MetaTest/__unnamed_task__/AverageReturn -28.1798 +MetaTest/__unnamed_task__/Iteration 292 +MetaTest/__unnamed_task__/MaxReturn -13.9901 +MetaTest/__unnamed_task__/MinReturn -41.5498 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.03125 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.376e+06 +__unnamed_task__/AverageDiscountedReturn -14.5367 +__unnamed_task__/AverageReturn -27.5979 +__unnamed_task__/Iteration 292 +__unnamed_task__/MaxReturn 0.197223 +__unnamed_task__/MinReturn -50.3118 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.48651 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:42:18 | [maml_trainer] epoch #293 | Sampling for adapation and meta-testing... +2025-04-04 04:43:46 | [maml_trainer] epoch #293 | Finished meta-testing... +2025-04-04 04:43:46 | [maml_trainer] epoch #293 | Saving snapshot... +2025-04-04 04:44:09 | [maml_trainer] epoch #293 | Saved +2025-04-04 04:44:09 | [maml_trainer] epoch #293 | Time 139871.42 s +2025-04-04 04:44:09 | [maml_trainer] epoch #293 | EpochTime 449.31 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.6252 +Average/AverageReturn -29.3756 +Average/Iteration 293 +Average/MaxReturn -2.24804 +Average/MinReturn -46.988 +Average/NumEpisodes 80 +Average/StdReturn 8.80481 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.82224 +GaussianMLPPolicy/KLAfter 0.00137899 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.87531e-05 +GaussianMLPPolicy/LossBefore -6.02007e-09 +GaussianMLPPolicy/dLoss 8.87471e-05 +Iteration 293 +MetaTest/Average/AverageDiscountedReturn -29.3145 +MetaTest/Average/AverageReturn -29.3145 +MetaTest/Average/Iteration 293 +MetaTest/Average/MaxReturn -9.18963 +MetaTest/Average/MinReturn -47.3361 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.96361 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.3145 +MetaTest/__unnamed_task__/AverageReturn -29.3145 +MetaTest/__unnamed_task__/Iteration 293 +MetaTest/__unnamed_task__/MaxReturn -9.18963 +MetaTest/__unnamed_task__/MinReturn -47.3361 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.96361 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.408e+06 +__unnamed_task__/AverageDiscountedReturn -15.6252 +__unnamed_task__/AverageReturn -29.3756 +__unnamed_task__/Iteration 293 +__unnamed_task__/MaxReturn -2.24804 +__unnamed_task__/MinReturn -46.988 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.80481 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:49:50 | [maml_trainer] epoch #294 | Sampling for adapation and meta-testing... +2025-04-04 04:51:17 | [maml_trainer] epoch #294 | Finished meta-testing... +2025-04-04 04:51:17 | [maml_trainer] epoch #294 | Saving snapshot... +2025-04-04 04:51:39 | [maml_trainer] epoch #294 | Saved +2025-04-04 04:51:39 | [maml_trainer] epoch #294 | Time 140321.82 s +2025-04-04 04:51:39 | [maml_trainer] epoch #294 | EpochTime 450.40 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.1731 +Average/AverageReturn -29.4943 +Average/Iteration 294 +Average/MaxReturn 3.78434 +Average/MinReturn -45.9221 +Average/NumEpisodes 80 +Average/StdReturn 7.87367 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.81965 +GaussianMLPPolicy/KLAfter 0.00195311 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000273751 +GaussianMLPPolicy/LossBefore -1.72853e-08 +GaussianMLPPolicy/dLoss -0.000273768 +Iteration 294 +MetaTest/Average/AverageDiscountedReturn -30.834 +MetaTest/Average/AverageReturn -30.834 +MetaTest/Average/Iteration 294 +MetaTest/Average/MaxReturn -4.49831 +MetaTest/Average/MinReturn -52.0834 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3311 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -30.834 +MetaTest/__unnamed_task__/AverageReturn -30.834 +MetaTest/__unnamed_task__/Iteration 294 +MetaTest/__unnamed_task__/MaxReturn -4.49831 +MetaTest/__unnamed_task__/MinReturn -52.0834 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3311 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.44e+06 +__unnamed_task__/AverageDiscountedReturn -15.1731 +__unnamed_task__/AverageReturn -29.4943 +__unnamed_task__/Iteration 294 +__unnamed_task__/MaxReturn 3.78434 +__unnamed_task__/MinReturn -45.9221 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.87367 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 04:57:21 | [maml_trainer] epoch #295 | Sampling for adapation and meta-testing... +2025-04-04 04:58:47 | [maml_trainer] epoch #295 | Finished meta-testing... +2025-04-04 04:58:47 | [maml_trainer] epoch #295 | Saving snapshot... +2025-04-04 04:59:08 | [maml_trainer] epoch #295 | Saved +2025-04-04 04:59:08 | [maml_trainer] epoch #295 | Time 140770.83 s +2025-04-04 04:59:08 | [maml_trainer] epoch #295 | EpochTime 449.01 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.3871 +Average/AverageReturn -29.3465 +Average/Iteration 295 +Average/MaxReturn 31.0527 +Average/MinReturn -64.2736 +Average/NumEpisodes 80 +Average/StdReturn 10.1466 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.81673 +GaussianMLPPolicy/KLAfter 0.00296157 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.14454e-05 +GaussianMLPPolicy/LossBefore 5.33462e-09 +GaussianMLPPolicy/dLoss 2.14507e-05 +Iteration 295 +MetaTest/Average/AverageDiscountedReturn -26.8348 +MetaTest/Average/AverageReturn -26.8348 +MetaTest/Average/Iteration 295 +MetaTest/Average/MaxReturn 5.36312 +MetaTest/Average/MinReturn -48.8608 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.1516 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -26.8348 +MetaTest/__unnamed_task__/AverageReturn -26.8348 +MetaTest/__unnamed_task__/Iteration 295 +MetaTest/__unnamed_task__/MaxReturn 5.36312 +MetaTest/__unnamed_task__/MinReturn -48.8608 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.1516 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.472e+06 +__unnamed_task__/AverageDiscountedReturn -15.3871 +__unnamed_task__/AverageReturn -29.3465 +__unnamed_task__/Iteration 295 +__unnamed_task__/MaxReturn 31.0527 +__unnamed_task__/MinReturn -64.2736 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.1466 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 05:04:47 | [maml_trainer] epoch #296 | Sampling for adapation and meta-testing... +2025-04-04 05:06:11 | [maml_trainer] epoch #296 | Finished meta-testing... +2025-04-04 05:06:11 | [maml_trainer] epoch #296 | Saving snapshot... +2025-04-04 05:06:34 | [maml_trainer] epoch #296 | Saved +2025-04-04 05:06:34 | [maml_trainer] epoch #296 | Time 141216.89 s +2025-04-04 05:06:34 | [maml_trainer] epoch #296 | EpochTime 446.05 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.808 +Average/AverageReturn -28.1426 +Average/Iteration 296 +Average/MaxReturn 1.03945 +Average/MinReturn -49.7819 +Average/NumEpisodes 80 +Average/StdReturn 8.85452 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.81571 +GaussianMLPPolicy/KLAfter 0.00412172 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000164574 +GaussianMLPPolicy/LossBefore 2.08616e-08 +GaussianMLPPolicy/dLoss 0.000164595 +Iteration 296 +MetaTest/Average/AverageDiscountedReturn -28.5564 +MetaTest/Average/AverageReturn -28.5564 +MetaTest/Average/Iteration 296 +MetaTest/Average/MaxReturn -21.4458 +MetaTest/Average/MinReturn -39.0175 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 4.60842 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -28.5564 +MetaTest/__unnamed_task__/AverageReturn -28.5564 +MetaTest/__unnamed_task__/Iteration 296 +MetaTest/__unnamed_task__/MaxReturn -21.4458 +MetaTest/__unnamed_task__/MinReturn -39.0175 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 4.60842 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.504e+06 +__unnamed_task__/AverageDiscountedReturn -14.808 +__unnamed_task__/AverageReturn -28.1426 +__unnamed_task__/Iteration 296 +__unnamed_task__/MaxReturn 1.03945 +__unnamed_task__/MinReturn -49.7819 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.85452 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 05:12:13 | [maml_trainer] epoch #297 | Sampling for adapation and meta-testing... +2025-04-04 05:13:37 | [maml_trainer] epoch #297 | Finished meta-testing... +2025-04-04 05:13:37 | [maml_trainer] epoch #297 | Saving snapshot... +2025-04-04 05:13:59 | [maml_trainer] epoch #297 | Saved +2025-04-04 05:13:59 | [maml_trainer] epoch #297 | Time 141661.46 s +2025-04-04 05:13:59 | [maml_trainer] epoch #297 | EpochTime 444.57 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -15.0911 +Average/AverageReturn -28.9775 +Average/Iteration 297 +Average/MaxReturn -5.81223 +Average/MinReturn -44.075 +Average/NumEpisodes 80 +Average/StdReturn 6.71785 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.81503 +GaussianMLPPolicy/KLAfter 0.00221584 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.48019e-05 +GaussianMLPPolicy/LossBefore 2.96235e-08 +GaussianMLPPolicy/dLoss 5.48316e-05 +Iteration 297 +MetaTest/Average/AverageDiscountedReturn -32.6012 +MetaTest/Average/AverageReturn -32.6012 +MetaTest/Average/Iteration 297 +MetaTest/Average/MaxReturn -22.5053 +MetaTest/Average/MinReturn -47.8494 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.87239 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -32.6012 +MetaTest/__unnamed_task__/AverageReturn -32.6012 +MetaTest/__unnamed_task__/Iteration 297 +MetaTest/__unnamed_task__/MaxReturn -22.5053 +MetaTest/__unnamed_task__/MinReturn -47.8494 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.87239 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.536e+06 +__unnamed_task__/AverageDiscountedReturn -15.0911 +__unnamed_task__/AverageReturn -28.9775 +__unnamed_task__/Iteration 297 +__unnamed_task__/MaxReturn -5.81223 +__unnamed_task__/MinReturn -44.075 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 6.71785 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 05:19:40 | [maml_trainer] epoch #298 | Sampling for adapation and meta-testing... +2025-04-04 05:21:05 | [maml_trainer] epoch #298 | Finished meta-testing... +2025-04-04 05:21:05 | [maml_trainer] epoch #298 | Saving snapshot... +2025-04-04 05:21:27 | [maml_trainer] epoch #298 | Saved +2025-04-04 05:21:27 | [maml_trainer] epoch #298 | Time 142109.31 s +2025-04-04 05:21:27 | [maml_trainer] epoch #298 | EpochTime 447.85 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.7757 +Average/AverageReturn -28.4348 +Average/Iteration 298 +Average/MaxReturn 3.06664 +Average/MinReturn -46.4034 +Average/NumEpisodes 80 +Average/StdReturn 8.35218 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.81308 +GaussianMLPPolicy/KLAfter 0.00182217 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000173897 +GaussianMLPPolicy/LossBefore -1.055e-08 +GaussianMLPPolicy/dLoss -0.000173908 +Iteration 298 +MetaTest/Average/AverageDiscountedReturn -27.4229 +MetaTest/Average/AverageReturn -27.4229 +MetaTest/Average/Iteration 298 +MetaTest/Average/MaxReturn -7.15364 +MetaTest/Average/MinReturn -37.9788 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.18082 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -27.4229 +MetaTest/__unnamed_task__/AverageReturn -27.4229 +MetaTest/__unnamed_task__/Iteration 298 +MetaTest/__unnamed_task__/MaxReturn -7.15364 +MetaTest/__unnamed_task__/MinReturn -37.9788 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.18082 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.568e+06 +__unnamed_task__/AverageDiscountedReturn -14.7757 +__unnamed_task__/AverageReturn -28.4348 +__unnamed_task__/Iteration 298 +__unnamed_task__/MaxReturn 3.06664 +__unnamed_task__/MinReturn -46.4034 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 8.35218 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ------------- +2025-04-04 05:27:13 | [maml_trainer] epoch #299 | Sampling for adapation and meta-testing... +2025-04-04 05:28:38 | [maml_trainer] epoch #299 | Finished meta-testing... +2025-04-04 05:28:38 | [maml_trainer] epoch #299 | Saving snapshot... +2025-04-04 05:29:00 | [maml_trainer] epoch #299 | Saved +2025-04-04 05:29:00 | [maml_trainer] epoch #299 | Time 142562.75 s +2025-04-04 05:29:00 | [maml_trainer] epoch #299 | EpochTime 453.44 s +------------------------------------------------- ------------- +Average/AverageDiscountedReturn -14.5894 +Average/AverageReturn -29.3589 +Average/Iteration 299 +Average/MaxReturn -1.38713 +Average/MinReturn -42.4645 +Average/NumEpisodes 80 +Average/StdReturn 7.47066 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 9.80979 +GaussianMLPPolicy/KLAfter 0.00144226 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -0.000109842 +GaussianMLPPolicy/LossBefore -8.40426e-09 +GaussianMLPPolicy/dLoss 0.000109834 +Iteration 299 +MetaTest/Average/AverageDiscountedReturn -29.1141 +MetaTest/Average/AverageReturn -29.1141 +MetaTest/Average/Iteration 299 +MetaTest/Average/MaxReturn -1.47222 +MetaTest/Average/MinReturn -46.9089 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.4936 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -29.1141 +MetaTest/__unnamed_task__/AverageReturn -29.1141 +MetaTest/__unnamed_task__/Iteration 299 +MetaTest/__unnamed_task__/MaxReturn -1.47222 +MetaTest/__unnamed_task__/MinReturn -46.9089 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.4936 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.6e+06 +__unnamed_task__/AverageDiscountedReturn -14.5894 +__unnamed_task__/AverageReturn -29.3589 +__unnamed_task__/Iteration 299 +__unnamed_task__/MaxReturn -1.38713 +__unnamed_task__/MinReturn -42.4645 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 7.47066 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------