diff --git "a/experiment/maml_trainer_1/debug.log" "b/experiment/maml_trainer_1/debug.log" new file mode 100644--- /dev/null +++ "b/experiment/maml_trainer_1/debug.log" @@ -0,0 +1,14402 @@ +2025-04-02 13:30:02 | [maml_trainer] Logging to /home/h2khalil/MetaRL-Assistive-Robotics/data/local/experiment/maml_trainer_1 +2025-04-02 13:35:21 | [maml_trainer] Obtaining samples... +2025-04-02 13:39:49 | [maml_trainer] epoch #0 | Sampling for adapation and meta-testing... +2025-04-02 13:40:57 | [maml_trainer] epoch #0 | Finished meta-testing... +2025-04-02 13:40:57 | [maml_trainer] epoch #0 | Saving snapshot... +2025-04-02 13:41:16 | [maml_trainer] epoch #0 | Saved +2025-04-02 13:41:16 | [maml_trainer] epoch #0 | Time 354.93 s +2025-04-02 13:41:16 | [maml_trainer] epoch #0 | EpochTime 354.93 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -75.0138 +Average/AverageReturn -179.122 +Average/Iteration 0 +Average/MaxReturn -142.547 +Average/MinReturn -256.108 +Average/NumEpisodes 80 +Average/StdReturn 13.1643 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.8512 +GaussianMLPPolicy/KLAfter 0.0293924 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000417959 +GaussianMLPPolicy/LossBefore 1.81794e-09 +GaussianMLPPolicy/dLoss -0.000417957 +Iteration 0 +MetaTest/Average/AverageDiscountedReturn -173.938 +MetaTest/Average/AverageReturn -173.938 +MetaTest/Average/Iteration 0 +MetaTest/Average/MaxReturn -149.767 +MetaTest/Average/MinReturn -191.06 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.70461 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.938 +MetaTest/__unnamed_task__/AverageReturn -173.938 +MetaTest/__unnamed_task__/Iteration 0 +MetaTest/__unnamed_task__/MaxReturn -149.767 +MetaTest/__unnamed_task__/MinReturn -191.06 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.70461 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 32000 +__unnamed_task__/AverageDiscountedReturn -75.0138 +__unnamed_task__/AverageReturn -179.122 +__unnamed_task__/Iteration 0 +__unnamed_task__/MaxReturn -142.547 +__unnamed_task__/MinReturn -256.108 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.1643 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-04-02 13:45:49 | [maml_trainer] epoch #1 | Sampling for adapation and meta-testing... +2025-04-02 13:46:59 | [maml_trainer] epoch #1 | Finished meta-testing... +2025-04-02 13:46:59 | [maml_trainer] epoch #1 | Saving snapshot... +2025-04-02 13:47:18 | [maml_trainer] epoch #1 | Saved +2025-04-02 13:47:18 | [maml_trainer] epoch #1 | Time 716.77 s +2025-04-02 13:47:18 | [maml_trainer] epoch #1 | EpochTime 361.84 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -72.7798 +Average/AverageReturn -174.785 +Average/Iteration 1 +Average/MaxReturn -154.256 +Average/MinReturn -196.673 +Average/NumEpisodes 80 +Average/StdReturn 9.3126 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.8377 +GaussianMLPPolicy/KLAfter 0.0163152 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.91306e-05 +GaussianMLPPolicy/LossBefore -3.05474e-09 +GaussianMLPPolicy/dLoss -3.91337e-05 +Iteration 1 +MetaTest/Average/AverageDiscountedReturn -175.247 +MetaTest/Average/AverageReturn -175.247 +MetaTest/Average/Iteration 1 +MetaTest/Average/MaxReturn -157.058 +MetaTest/Average/MinReturn -206.201 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.2426 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.247 +MetaTest/__unnamed_task__/AverageReturn -175.247 +MetaTest/__unnamed_task__/Iteration 1 +MetaTest/__unnamed_task__/MaxReturn -157.058 +MetaTest/__unnamed_task__/MinReturn -206.201 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.2426 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 64000 +__unnamed_task__/AverageDiscountedReturn -72.7798 +__unnamed_task__/AverageReturn -174.785 +__unnamed_task__/Iteration 1 +__unnamed_task__/MaxReturn -154.256 +__unnamed_task__/MinReturn -196.673 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.3126 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-04-02 13:51:53 | [maml_trainer] epoch #2 | Sampling for adapation and meta-testing... +2025-04-02 13:53:01 | [maml_trainer] epoch #2 | Finished meta-testing... +2025-04-02 13:53:01 | [maml_trainer] epoch #2 | Saving snapshot... +2025-04-02 13:53:19 | [maml_trainer] epoch #2 | Saved +2025-04-02 13:53:19 | [maml_trainer] epoch #2 | Time 1077.95 s +2025-04-02 13:53:19 | [maml_trainer] epoch #2 | EpochTime 361.18 s +------------------------------------------------- --------------- +Average/AverageDiscountedReturn -72.9905 +Average/AverageReturn -175.113 +Average/Iteration 2 +Average/MaxReturn -155.559 +Average/MinReturn -211.63 +Average/NumEpisodes 80 +Average/StdReturn 10.338 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.8239 +GaussianMLPPolicy/KLAfter 0.0127655 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.30731e-05 +GaussianMLPPolicy/LossBefore 3.93391e-09 +GaussianMLPPolicy/dLoss -7.30692e-05 +Iteration 2 +MetaTest/Average/AverageDiscountedReturn -177.682 +MetaTest/Average/AverageReturn -177.682 +MetaTest/Average/Iteration 2 +MetaTest/Average/MaxReturn -154.717 +MetaTest/Average/MinReturn -201.867 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.4574 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.682 +MetaTest/__unnamed_task__/AverageReturn -177.682 +MetaTest/__unnamed_task__/Iteration 2 +MetaTest/__unnamed_task__/MaxReturn -154.717 +MetaTest/__unnamed_task__/MinReturn -201.867 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.4574 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 96000 +__unnamed_task__/AverageDiscountedReturn -72.9905 +__unnamed_task__/AverageReturn -175.113 +__unnamed_task__/Iteration 2 +__unnamed_task__/MaxReturn -155.559 +__unnamed_task__/MinReturn -211.63 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.338 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------- +2025-04-02 13:57:49 | [maml_trainer] epoch #3 | Sampling for adapation and meta-testing... +2025-04-02 13:58:56 | [maml_trainer] epoch #3 | Finished meta-testing... +2025-04-02 13:58:56 | [maml_trainer] epoch #3 | Saving snapshot... +2025-04-02 13:59:16 | [maml_trainer] epoch #3 | Saved +2025-04-02 13:59:16 | [maml_trainer] epoch #3 | Time 1434.13 s +2025-04-02 13:59:16 | [maml_trainer] epoch #3 | EpochTime 356.18 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -73.7541 +Average/AverageReturn -177.172 +Average/Iteration 3 +Average/MaxReturn -155.498 +Average/MinReturn -242.1 +Average/NumEpisodes 80 +Average/StdReturn 13.1041 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.8101 +GaussianMLPPolicy/KLAfter 0.0121306 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.52898e-06 +GaussianMLPPolicy/LossBefore 2.45869e-09 +GaussianMLPPolicy/dLoss -2.52652e-06 +Iteration 3 +MetaTest/Average/AverageDiscountedReturn -176.249 +MetaTest/Average/AverageReturn -176.249 +MetaTest/Average/Iteration 3 +MetaTest/Average/MaxReturn -163.921 +MetaTest/Average/MinReturn -206.719 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.52459 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.249 +MetaTest/__unnamed_task__/AverageReturn -176.249 +MetaTest/__unnamed_task__/Iteration 3 +MetaTest/__unnamed_task__/MaxReturn -163.921 +MetaTest/__unnamed_task__/MinReturn -206.719 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.52459 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 128000 +__unnamed_task__/AverageDiscountedReturn -73.7541 +__unnamed_task__/AverageReturn -177.172 +__unnamed_task__/Iteration 3 +__unnamed_task__/MaxReturn -155.498 +__unnamed_task__/MinReturn -242.1 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.1041 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:03:52 | [maml_trainer] epoch #4 | Sampling for adapation and meta-testing... +2025-04-02 14:05:02 | [maml_trainer] epoch #4 | Finished meta-testing... +2025-04-02 14:05:02 | [maml_trainer] epoch #4 | Saving snapshot... +2025-04-02 14:05:23 | [maml_trainer] epoch #4 | Saved +2025-04-02 14:05:23 | [maml_trainer] epoch #4 | Time 1801.30 s +2025-04-02 14:05:23 | [maml_trainer] epoch #4 | EpochTime 367.17 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -73.6308 +Average/AverageReturn -176.054 +Average/Iteration 4 +Average/MaxReturn -157.157 +Average/MinReturn -280.311 +Average/NumEpisodes 80 +Average/StdReturn 14.6026 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7979 +GaussianMLPPolicy/KLAfter 0.00842935 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.61326e-05 +GaussianMLPPolicy/LossBefore -3.8743e-09 +GaussianMLPPolicy/dLoss -2.61365e-05 +Iteration 4 +MetaTest/Average/AverageDiscountedReturn -176.47 +MetaTest/Average/AverageReturn -176.47 +MetaTest/Average/Iteration 4 +MetaTest/Average/MaxReturn -159.291 +MetaTest/Average/MinReturn -200.868 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.1801 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.47 +MetaTest/__unnamed_task__/AverageReturn -176.47 +MetaTest/__unnamed_task__/Iteration 4 +MetaTest/__unnamed_task__/MaxReturn -159.291 +MetaTest/__unnamed_task__/MinReturn -200.868 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.1801 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 160000 +__unnamed_task__/AverageDiscountedReturn -73.6308 +__unnamed_task__/AverageReturn -176.054 +__unnamed_task__/Iteration 4 +__unnamed_task__/MaxReturn -157.157 +__unnamed_task__/MinReturn -280.311 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.6026 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:09:58 | [maml_trainer] epoch #5 | Sampling for adapation and meta-testing... +2025-04-02 14:11:05 | [maml_trainer] epoch #5 | Finished meta-testing... +2025-04-02 14:11:05 | [maml_trainer] epoch #5 | Saving snapshot... +2025-04-02 14:11:26 | [maml_trainer] epoch #5 | Saved +2025-04-02 14:11:26 | [maml_trainer] epoch #5 | Time 2164.24 s +2025-04-02 14:11:26 | [maml_trainer] epoch #5 | EpochTime 362.94 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -72.3182 +Average/AverageReturn -173.458 +Average/Iteration 5 +Average/MaxReturn -157.439 +Average/MinReturn -203.94 +Average/NumEpisodes 80 +Average/StdReturn 9.12518 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7881 +GaussianMLPPolicy/KLAfter 0.00673696 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.53467e-05 +GaussianMLPPolicy/LossBefore 5.31971e-09 +GaussianMLPPolicy/dLoss -1.53414e-05 +Iteration 5 +MetaTest/Average/AverageDiscountedReturn -169.398 +MetaTest/Average/AverageReturn -169.398 +MetaTest/Average/Iteration 5 +MetaTest/Average/MaxReturn -156.011 +MetaTest/Average/MinReturn -182.036 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.43762 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.398 +MetaTest/__unnamed_task__/AverageReturn -169.398 +MetaTest/__unnamed_task__/Iteration 5 +MetaTest/__unnamed_task__/MaxReturn -156.011 +MetaTest/__unnamed_task__/MinReturn -182.036 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.43762 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 192000 +__unnamed_task__/AverageDiscountedReturn -72.3182 +__unnamed_task__/AverageReturn -173.458 +__unnamed_task__/Iteration 5 +__unnamed_task__/MaxReturn -157.439 +__unnamed_task__/MinReturn -203.94 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.12518 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:16:02 | [maml_trainer] epoch #6 | Sampling for adapation and meta-testing... +2025-04-02 14:17:12 | [maml_trainer] epoch #6 | Finished meta-testing... +2025-04-02 14:17:12 | [maml_trainer] epoch #6 | Saving snapshot... +2025-04-02 14:17:31 | [maml_trainer] epoch #6 | Saved +2025-04-02 14:17:31 | [maml_trainer] epoch #6 | Time 2529.99 s +2025-04-02 14:17:31 | [maml_trainer] epoch #6 | EpochTime 365.74 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -73.3679 +Average/AverageReturn -175.418 +Average/Iteration 6 +Average/MaxReturn -157.139 +Average/MinReturn -236.461 +Average/NumEpisodes 80 +Average/StdReturn 12.751 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7794 +GaussianMLPPolicy/KLAfter 0.0058501 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.09646e-05 +GaussianMLPPolicy/LossBefore -1.50502e-09 +GaussianMLPPolicy/dLoss 4.09631e-05 +Iteration 6 +MetaTest/Average/AverageDiscountedReturn -169.735 +MetaTest/Average/AverageReturn -169.735 +MetaTest/Average/Iteration 6 +MetaTest/Average/MaxReturn -154.577 +MetaTest/Average/MinReturn -196.266 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.02865 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.735 +MetaTest/__unnamed_task__/AverageReturn -169.735 +MetaTest/__unnamed_task__/Iteration 6 +MetaTest/__unnamed_task__/MaxReturn -154.577 +MetaTest/__unnamed_task__/MinReturn -196.266 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.02865 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 224000 +__unnamed_task__/AverageDiscountedReturn -73.3679 +__unnamed_task__/AverageReturn -175.418 +__unnamed_task__/Iteration 6 +__unnamed_task__/MaxReturn -157.139 +__unnamed_task__/MinReturn -236.461 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.751 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:22:06 | [maml_trainer] epoch #7 | Sampling for adapation and meta-testing... +2025-04-02 14:23:15 | [maml_trainer] epoch #7 | Finished meta-testing... +2025-04-02 14:23:15 | [maml_trainer] epoch #7 | Saving snapshot... +2025-04-02 14:23:35 | [maml_trainer] epoch #7 | Saved +2025-04-02 14:23:35 | [maml_trainer] epoch #7 | Time 2893.28 s +2025-04-02 14:23:35 | [maml_trainer] epoch #7 | EpochTime 363.29 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -72.3254 +Average/AverageReturn -171.739 +Average/Iteration 7 +Average/MaxReturn -154.284 +Average/MinReturn -274.633 +Average/NumEpisodes 80 +Average/StdReturn 14.5315 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7691 +GaussianMLPPolicy/KLAfter 0.00639265 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.31397e-05 +GaussianMLPPolicy/LossBefore 9.26852e-09 +GaussianMLPPolicy/dLoss 7.31489e-05 +Iteration 7 +MetaTest/Average/AverageDiscountedReturn -173.486 +MetaTest/Average/AverageReturn -173.486 +MetaTest/Average/Iteration 7 +MetaTest/Average/MaxReturn -160.689 +MetaTest/Average/MinReturn -207.277 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.0113 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.486 +MetaTest/__unnamed_task__/AverageReturn -173.486 +MetaTest/__unnamed_task__/Iteration 7 +MetaTest/__unnamed_task__/MaxReturn -160.689 +MetaTest/__unnamed_task__/MinReturn -207.277 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.0113 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 256000 +__unnamed_task__/AverageDiscountedReturn -72.3254 +__unnamed_task__/AverageReturn -171.739 +__unnamed_task__/Iteration 7 +__unnamed_task__/MaxReturn -154.284 +__unnamed_task__/MinReturn -274.633 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.5315 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:28:14 | [maml_trainer] epoch #8 | Sampling for adapation and meta-testing... +2025-04-02 14:29:22 | [maml_trainer] epoch #8 | Finished meta-testing... +2025-04-02 14:29:22 | [maml_trainer] epoch #8 | Saving snapshot... +2025-04-02 14:29:42 | [maml_trainer] epoch #8 | Saved +2025-04-02 14:29:42 | [maml_trainer] epoch #8 | Time 3260.57 s +2025-04-02 14:29:42 | [maml_trainer] epoch #8 | EpochTime 367.28 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.2884 +Average/AverageReturn -169.847 +Average/Iteration 8 +Average/MaxReturn -150.479 +Average/MinReturn -206.957 +Average/NumEpisodes 80 +Average/StdReturn 10.2425 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.76 +GaussianMLPPolicy/KLAfter 0.00523702 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.24274e-05 +GaussianMLPPolicy/LossBefore -2.05636e-09 +GaussianMLPPolicy/dLoss 2.24253e-05 +Iteration 8 +MetaTest/Average/AverageDiscountedReturn -169.176 +MetaTest/Average/AverageReturn -169.176 +MetaTest/Average/Iteration 8 +MetaTest/Average/MaxReturn -159.636 +MetaTest/Average/MinReturn -192.749 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.34766 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.176 +MetaTest/__unnamed_task__/AverageReturn -169.176 +MetaTest/__unnamed_task__/Iteration 8 +MetaTest/__unnamed_task__/MaxReturn -159.636 +MetaTest/__unnamed_task__/MinReturn -192.749 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.34766 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 288000 +__unnamed_task__/AverageDiscountedReturn -71.2884 +__unnamed_task__/AverageReturn -169.847 +__unnamed_task__/Iteration 8 +__unnamed_task__/MaxReturn -150.479 +__unnamed_task__/MinReturn -206.957 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.2425 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:34:17 | [maml_trainer] epoch #9 | Sampling for adapation and meta-testing... +2025-04-02 14:35:26 | [maml_trainer] epoch #9 | Finished meta-testing... +2025-04-02 14:35:26 | [maml_trainer] epoch #9 | Saving snapshot... +2025-04-02 14:35:45 | [maml_trainer] epoch #9 | Saved +2025-04-02 14:35:45 | [maml_trainer] epoch #9 | Time 3623.64 s +2025-04-02 14:35:45 | [maml_trainer] epoch #9 | EpochTime 363.06 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -72.365 +Average/AverageReturn -170.969 +Average/Iteration 9 +Average/MaxReturn -151.324 +Average/MinReturn -245.646 +Average/NumEpisodes 80 +Average/StdReturn 13.1982 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7538 +GaussianMLPPolicy/KLAfter 0.00426195 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.46094e-06 +GaussianMLPPolicy/LossBefore -9.17912e-09 +GaussianMLPPolicy/dLoss 1.45176e-06 +Iteration 9 +MetaTest/Average/AverageDiscountedReturn -165.123 +MetaTest/Average/AverageReturn -165.123 +MetaTest/Average/Iteration 9 +MetaTest/Average/MaxReturn -151.293 +MetaTest/Average/MinReturn -191.03 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.21081 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.123 +MetaTest/__unnamed_task__/AverageReturn -165.123 +MetaTest/__unnamed_task__/Iteration 9 +MetaTest/__unnamed_task__/MaxReturn -151.293 +MetaTest/__unnamed_task__/MinReturn -191.03 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.21081 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 320000 +__unnamed_task__/AverageDiscountedReturn -72.365 +__unnamed_task__/AverageReturn -170.969 +__unnamed_task__/Iteration 9 +__unnamed_task__/MaxReturn -151.324 +__unnamed_task__/MinReturn -245.646 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.1982 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:40:22 | [maml_trainer] epoch #10 | Sampling for adapation and meta-testing... +2025-04-02 14:41:30 | [maml_trainer] epoch #10 | Finished meta-testing... +2025-04-02 14:41:30 | [maml_trainer] epoch #10 | Saving snapshot... +2025-04-02 14:41:49 | [maml_trainer] epoch #10 | Saved +2025-04-02 14:41:49 | [maml_trainer] epoch #10 | Time 3987.58 s +2025-04-02 14:41:49 | [maml_trainer] epoch #10 | EpochTime 363.94 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.6424 +Average/AverageReturn -169.674 +Average/Iteration 10 +Average/MaxReturn -151.784 +Average/MinReturn -242.653 +Average/NumEpisodes 80 +Average/StdReturn 14.7401 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7511 +GaussianMLPPolicy/KLAfter 0.00465591 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.1722e-05 +GaussianMLPPolicy/LossBefore 1.68383e-09 +GaussianMLPPolicy/dLoss 3.17237e-05 +Iteration 10 +MetaTest/Average/AverageDiscountedReturn -167.534 +MetaTest/Average/AverageReturn -167.534 +MetaTest/Average/Iteration 10 +MetaTest/Average/MaxReturn -153.1 +MetaTest/Average/MinReturn -194.426 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.67205 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.534 +MetaTest/__unnamed_task__/AverageReturn -167.534 +MetaTest/__unnamed_task__/Iteration 10 +MetaTest/__unnamed_task__/MaxReturn -153.1 +MetaTest/__unnamed_task__/MinReturn -194.426 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.67205 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 352000 +__unnamed_task__/AverageDiscountedReturn -71.6424 +__unnamed_task__/AverageReturn -169.674 +__unnamed_task__/Iteration 10 +__unnamed_task__/MaxReturn -151.784 +__unnamed_task__/MinReturn -242.653 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.7401 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:46:24 | [maml_trainer] epoch #11 | Sampling for adapation and meta-testing... +2025-04-02 14:47:32 | [maml_trainer] epoch #11 | Finished meta-testing... +2025-04-02 14:47:32 | [maml_trainer] epoch #11 | Saving snapshot... +2025-04-02 14:47:52 | [maml_trainer] epoch #11 | Saved +2025-04-02 14:47:52 | [maml_trainer] epoch #11 | Time 4350.93 s +2025-04-02 14:47:52 | [maml_trainer] epoch #11 | EpochTime 363.35 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.6056 +Average/AverageReturn -169.223 +Average/Iteration 11 +Average/MaxReturn -150.552 +Average/MinReturn -229.048 +Average/NumEpisodes 80 +Average/StdReturn 13.5238 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7465 +GaussianMLPPolicy/KLAfter 0.00523354 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.45049e-06 +GaussianMLPPolicy/LossBefore -5.96046e-10 +GaussianMLPPolicy/dLoss 5.44989e-06 +Iteration 11 +MetaTest/Average/AverageDiscountedReturn -164.941 +MetaTest/Average/AverageReturn -164.941 +MetaTest/Average/Iteration 11 +MetaTest/Average/MaxReturn -155.209 +MetaTest/Average/MinReturn -175.621 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.77966 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.941 +MetaTest/__unnamed_task__/AverageReturn -164.941 +MetaTest/__unnamed_task__/Iteration 11 +MetaTest/__unnamed_task__/MaxReturn -155.209 +MetaTest/__unnamed_task__/MinReturn -175.621 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.77966 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 384000 +__unnamed_task__/AverageDiscountedReturn -71.6056 +__unnamed_task__/AverageReturn -169.223 +__unnamed_task__/Iteration 11 +__unnamed_task__/MaxReturn -150.552 +__unnamed_task__/MinReturn -229.048 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.5238 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:52:25 | [maml_trainer] epoch #12 | Sampling for adapation and meta-testing... +2025-04-02 14:53:33 | [maml_trainer] epoch #12 | Finished meta-testing... +2025-04-02 14:53:33 | [maml_trainer] epoch #12 | Saving snapshot... +2025-04-02 14:53:52 | [maml_trainer] epoch #12 | Saved +2025-04-02 14:53:52 | [maml_trainer] epoch #12 | Time 4711.11 s +2025-04-02 14:53:52 | [maml_trainer] epoch #12 | EpochTime 360.17 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -72.4268 +Average/AverageReturn -170.532 +Average/Iteration 12 +Average/MaxReturn -143.902 +Average/MinReturn -244.639 +Average/NumEpisodes 80 +Average/StdReturn 18.4226 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7453 +GaussianMLPPolicy/KLAfter 0.00459388 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.18635e-05 +GaussianMLPPolicy/LossBefore 2.68221e-09 +GaussianMLPPolicy/dLoss -1.18609e-05 +Iteration 12 +MetaTest/Average/AverageDiscountedReturn -171.786 +MetaTest/Average/AverageReturn -171.786 +MetaTest/Average/Iteration 12 +MetaTest/Average/MaxReturn -150.097 +MetaTest/Average/MinReturn -236.11 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.9584 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.786 +MetaTest/__unnamed_task__/AverageReturn -171.786 +MetaTest/__unnamed_task__/Iteration 12 +MetaTest/__unnamed_task__/MaxReturn -150.097 +MetaTest/__unnamed_task__/MinReturn -236.11 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.9584 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 416000 +__unnamed_task__/AverageDiscountedReturn -72.4268 +__unnamed_task__/AverageReturn -170.532 +__unnamed_task__/Iteration 12 +__unnamed_task__/MaxReturn -143.902 +__unnamed_task__/MinReturn -244.639 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.4226 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 14:58:29 | [maml_trainer] epoch #13 | Sampling for adapation and meta-testing... +2025-04-02 14:59:37 | [maml_trainer] epoch #13 | Finished meta-testing... +2025-04-02 14:59:37 | [maml_trainer] epoch #13 | Saving snapshot... +2025-04-02 14:59:56 | [maml_trainer] epoch #13 | Saved +2025-04-02 14:59:56 | [maml_trainer] epoch #13 | Time 5074.96 s +2025-04-02 14:59:56 | [maml_trainer] epoch #13 | EpochTime 363.84 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.6226 +Average/AverageReturn -168.53 +Average/Iteration 13 +Average/MaxReturn -148.999 +Average/MinReturn -237.727 +Average/NumEpisodes 80 +Average/StdReturn 17.5229 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7436 +GaussianMLPPolicy/KLAfter 0.0039096 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.53291e-05 +GaussianMLPPolicy/LossBefore -4.29153e-09 +GaussianMLPPolicy/dLoss 3.53249e-05 +Iteration 13 +MetaTest/Average/AverageDiscountedReturn -165.461 +MetaTest/Average/AverageReturn -165.461 +MetaTest/Average/Iteration 13 +MetaTest/Average/MaxReturn -156.631 +MetaTest/Average/MinReturn -184.738 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.54982 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.461 +MetaTest/__unnamed_task__/AverageReturn -165.461 +MetaTest/__unnamed_task__/Iteration 13 +MetaTest/__unnamed_task__/MaxReturn -156.631 +MetaTest/__unnamed_task__/MinReturn -184.738 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.54982 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 448000 +__unnamed_task__/AverageDiscountedReturn -71.6226 +__unnamed_task__/AverageReturn -168.53 +__unnamed_task__/Iteration 13 +__unnamed_task__/MaxReturn -148.999 +__unnamed_task__/MinReturn -237.727 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.5229 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:04:39 | [maml_trainer] epoch #14 | Sampling for adapation and meta-testing... +2025-04-02 15:05:52 | [maml_trainer] epoch #14 | Finished meta-testing... +2025-04-02 15:05:52 | [maml_trainer] epoch #14 | Saving snapshot... +2025-04-02 15:06:12 | [maml_trainer] epoch #14 | Saved +2025-04-02 15:06:12 | [maml_trainer] epoch #14 | Time 5450.38 s +2025-04-02 15:06:12 | [maml_trainer] epoch #14 | EpochTime 375.42 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.1152 +Average/AverageReturn -167.072 +Average/Iteration 14 +Average/MaxReturn -150.474 +Average/MinReturn -269.697 +Average/NumEpisodes 80 +Average/StdReturn 16.7935 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7418 +GaussianMLPPolicy/KLAfter 0.00252159 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.86693e-05 +GaussianMLPPolicy/LossBefore -7.01845e-09 +GaussianMLPPolicy/dLoss 2.86623e-05 +Iteration 14 +MetaTest/Average/AverageDiscountedReturn -162.156 +MetaTest/Average/AverageReturn -162.156 +MetaTest/Average/Iteration 14 +MetaTest/Average/MaxReturn -149.54 +MetaTest/Average/MinReturn -175.216 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.62052 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.156 +MetaTest/__unnamed_task__/AverageReturn -162.156 +MetaTest/__unnamed_task__/Iteration 14 +MetaTest/__unnamed_task__/MaxReturn -149.54 +MetaTest/__unnamed_task__/MinReturn -175.216 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.62052 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 480000 +__unnamed_task__/AverageDiscountedReturn -71.1152 +__unnamed_task__/AverageReturn -167.072 +__unnamed_task__/Iteration 14 +__unnamed_task__/MaxReturn -150.474 +__unnamed_task__/MinReturn -269.697 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.7935 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:11:01 | [maml_trainer] epoch #15 | Sampling for adapation and meta-testing... +2025-04-02 15:12:12 | [maml_trainer] epoch #15 | Finished meta-testing... +2025-04-02 15:12:12 | [maml_trainer] epoch #15 | Saving snapshot... +2025-04-02 15:12:33 | [maml_trainer] epoch #15 | Saved +2025-04-02 15:12:33 | [maml_trainer] epoch #15 | Time 5832.00 s +2025-04-02 15:12:33 | [maml_trainer] epoch #15 | EpochTime 381.61 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.556 +Average/AverageReturn -167.271 +Average/Iteration 15 +Average/MaxReturn -145.611 +Average/MinReturn -251.626 +Average/NumEpisodes 80 +Average/StdReturn 17.3989 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7373 +GaussianMLPPolicy/KLAfter 0.00255537 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.34301e-06 +GaussianMLPPolicy/LossBefore -5.57303e-09 +GaussianMLPPolicy/dLoss 6.33743e-06 +Iteration 15 +MetaTest/Average/AverageDiscountedReturn -173.938 +MetaTest/Average/AverageReturn -173.938 +MetaTest/Average/Iteration 15 +MetaTest/Average/MaxReturn -151.943 +MetaTest/Average/MinReturn -247.831 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.4678 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.938 +MetaTest/__unnamed_task__/AverageReturn -173.938 +MetaTest/__unnamed_task__/Iteration 15 +MetaTest/__unnamed_task__/MaxReturn -151.943 +MetaTest/__unnamed_task__/MinReturn -247.831 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.4678 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 512000 +__unnamed_task__/AverageDiscountedReturn -71.556 +__unnamed_task__/AverageReturn -167.271 +__unnamed_task__/Iteration 15 +__unnamed_task__/MaxReturn -145.611 +__unnamed_task__/MinReturn -251.626 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.3989 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:17:19 | [maml_trainer] epoch #16 | Sampling for adapation and meta-testing... +2025-04-02 15:18:31 | [maml_trainer] epoch #16 | Finished meta-testing... +2025-04-02 15:18:31 | [maml_trainer] epoch #16 | Saving snapshot... +2025-04-02 15:18:52 | [maml_trainer] epoch #16 | Saved +2025-04-02 15:18:52 | [maml_trainer] epoch #16 | Time 6210.71 s +2025-04-02 15:18:52 | [maml_trainer] epoch #16 | EpochTime 378.71 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.1988 +Average/AverageReturn -167.621 +Average/Iteration 16 +Average/MaxReturn -149.459 +Average/MinReturn -229.78 +Average/NumEpisodes 80 +Average/StdReturn 15.9137 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7325 +GaussianMLPPolicy/KLAfter 0.00298558 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.0819e-05 +GaussianMLPPolicy/LossBefore -5.51343e-10 +GaussianMLPPolicy/dLoss -1.08195e-05 +Iteration 16 +MetaTest/Average/AverageDiscountedReturn -167.136 +MetaTest/Average/AverageReturn -167.136 +MetaTest/Average/Iteration 16 +MetaTest/Average/MaxReturn -157.873 +MetaTest/Average/MinReturn -195.587 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.8006 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.136 +MetaTest/__unnamed_task__/AverageReturn -167.136 +MetaTest/__unnamed_task__/Iteration 16 +MetaTest/__unnamed_task__/MaxReturn -157.873 +MetaTest/__unnamed_task__/MinReturn -195.587 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.8006 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 544000 +__unnamed_task__/AverageDiscountedReturn -71.1988 +__unnamed_task__/AverageReturn -167.621 +__unnamed_task__/Iteration 16 +__unnamed_task__/MaxReturn -149.459 +__unnamed_task__/MinReturn -229.78 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.9137 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:23:42 | [maml_trainer] epoch #17 | Sampling for adapation and meta-testing... +2025-04-02 15:24:54 | [maml_trainer] epoch #17 | Finished meta-testing... +2025-04-02 15:24:54 | [maml_trainer] epoch #17 | Saving snapshot... +2025-04-02 15:25:15 | [maml_trainer] epoch #17 | Saved +2025-04-02 15:25:15 | [maml_trainer] epoch #17 | Time 6593.60 s +2025-04-02 15:25:15 | [maml_trainer] epoch #17 | EpochTime 382.88 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -70.0974 +Average/AverageReturn -165.138 +Average/Iteration 17 +Average/MaxReturn -146.544 +Average/MinReturn -216.202 +Average/NumEpisodes 80 +Average/StdReturn 10.4774 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7268 +GaussianMLPPolicy/KLAfter 0.00310437 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.48679e-05 +GaussianMLPPolicy/LossBefore 5.1558e-09 +GaussianMLPPolicy/dLoss 1.4873e-05 +Iteration 17 +MetaTest/Average/AverageDiscountedReturn -164.91 +MetaTest/Average/AverageReturn -164.91 +MetaTest/Average/Iteration 17 +MetaTest/Average/MaxReturn -153.03 +MetaTest/Average/MinReturn -182.641 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.04312 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.91 +MetaTest/__unnamed_task__/AverageReturn -164.91 +MetaTest/__unnamed_task__/Iteration 17 +MetaTest/__unnamed_task__/MaxReturn -153.03 +MetaTest/__unnamed_task__/MinReturn -182.641 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.04312 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 576000 +__unnamed_task__/AverageDiscountedReturn -70.0974 +__unnamed_task__/AverageReturn -165.138 +__unnamed_task__/Iteration 17 +__unnamed_task__/MaxReturn -146.544 +__unnamed_task__/MinReturn -216.202 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.4774 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:29:57 | [maml_trainer] epoch #18 | Sampling for adapation and meta-testing... +2025-04-02 15:31:09 | [maml_trainer] epoch #18 | Finished meta-testing... +2025-04-02 15:31:09 | [maml_trainer] epoch #18 | Saving snapshot... +2025-04-02 15:31:29 | [maml_trainer] epoch #18 | Saved +2025-04-02 15:31:29 | [maml_trainer] epoch #18 | Time 6967.67 s +2025-04-02 15:31:29 | [maml_trainer] epoch #18 | EpochTime 374.07 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -70.7903 +Average/AverageReturn -167.036 +Average/Iteration 18 +Average/MaxReturn -146.978 +Average/MinReturn -232.639 +Average/NumEpisodes 80 +Average/StdReturn 13.6199 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7191 +GaussianMLPPolicy/KLAfter 0.00477303 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.14285e-05 +GaussianMLPPolicy/LossBefore -3.53158e-09 +GaussianMLPPolicy/dLoss 1.1425e-05 +Iteration 18 +MetaTest/Average/AverageDiscountedReturn -164.221 +MetaTest/Average/AverageReturn -164.221 +MetaTest/Average/Iteration 18 +MetaTest/Average/MaxReturn -151.208 +MetaTest/Average/MinReturn -190.284 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.1563 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.221 +MetaTest/__unnamed_task__/AverageReturn -164.221 +MetaTest/__unnamed_task__/Iteration 18 +MetaTest/__unnamed_task__/MaxReturn -151.208 +MetaTest/__unnamed_task__/MinReturn -190.284 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.1563 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 608000 +__unnamed_task__/AverageDiscountedReturn -70.7903 +__unnamed_task__/AverageReturn -167.036 +__unnamed_task__/Iteration 18 +__unnamed_task__/MaxReturn -146.978 +__unnamed_task__/MinReturn -232.639 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.6199 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:36:16 | [maml_trainer] epoch #19 | Sampling for adapation and meta-testing... +2025-04-02 15:37:28 | [maml_trainer] epoch #19 | Finished meta-testing... +2025-04-02 15:37:28 | [maml_trainer] epoch #19 | Saving snapshot... +2025-04-02 15:37:48 | [maml_trainer] epoch #19 | Saved +2025-04-02 15:37:48 | [maml_trainer] epoch #19 | Time 7346.73 s +2025-04-02 15:37:48 | [maml_trainer] epoch #19 | EpochTime 379.05 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -70.6742 +Average/AverageReturn -166.933 +Average/Iteration 19 +Average/MaxReturn -147.182 +Average/MinReturn -211.547 +Average/NumEpisodes 80 +Average/StdReturn 13.3413 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.712 +GaussianMLPPolicy/KLAfter 0.00502437 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.24107e-06 +GaussianMLPPolicy/LossBefore -9.07481e-09 +GaussianMLPPolicy/dLoss 7.232e-06 +Iteration 19 +MetaTest/Average/AverageDiscountedReturn -169.149 +MetaTest/Average/AverageReturn -169.149 +MetaTest/Average/Iteration 19 +MetaTest/Average/MaxReturn -154.766 +MetaTest/Average/MinReturn -213.841 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.0812 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.149 +MetaTest/__unnamed_task__/AverageReturn -169.149 +MetaTest/__unnamed_task__/Iteration 19 +MetaTest/__unnamed_task__/MaxReturn -154.766 +MetaTest/__unnamed_task__/MinReturn -213.841 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.0812 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 640000 +__unnamed_task__/AverageDiscountedReturn -70.6742 +__unnamed_task__/AverageReturn -166.933 +__unnamed_task__/Iteration 19 +__unnamed_task__/MaxReturn -147.182 +__unnamed_task__/MinReturn -211.547 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.3413 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:42:35 | [maml_trainer] epoch #20 | Sampling for adapation and meta-testing... +2025-04-02 15:43:43 | [maml_trainer] epoch #20 | Finished meta-testing... +2025-04-02 15:43:43 | [maml_trainer] epoch #20 | Saving snapshot... +2025-04-02 15:44:02 | [maml_trainer] epoch #20 | Saved +2025-04-02 15:44:02 | [maml_trainer] epoch #20 | Time 7720.85 s +2025-04-02 15:44:02 | [maml_trainer] epoch #20 | EpochTime 374.12 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -70.6256 +Average/AverageReturn -166.056 +Average/Iteration 20 +Average/MaxReturn -146.597 +Average/MinReturn -241.906 +Average/NumEpisodes 80 +Average/StdReturn 13.4134 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7077 +GaussianMLPPolicy/KLAfter 0.00488629 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.15406e-05 +GaussianMLPPolicy/LossBefore 4.38094e-09 +GaussianMLPPolicy/dLoss 7.1545e-05 +Iteration 20 +MetaTest/Average/AverageDiscountedReturn -168.166 +MetaTest/Average/AverageReturn -168.166 +MetaTest/Average/Iteration 20 +MetaTest/Average/MaxReturn -147.433 +MetaTest/Average/MinReturn -253.194 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.956 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.166 +MetaTest/__unnamed_task__/AverageReturn -168.166 +MetaTest/__unnamed_task__/Iteration 20 +MetaTest/__unnamed_task__/MaxReturn -147.433 +MetaTest/__unnamed_task__/MinReturn -253.194 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.956 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 672000 +__unnamed_task__/AverageDiscountedReturn -70.6256 +__unnamed_task__/AverageReturn -166.056 +__unnamed_task__/Iteration 20 +__unnamed_task__/MaxReturn -146.597 +__unnamed_task__/MinReturn -241.906 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.4134 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:48:36 | [maml_trainer] epoch #21 | Sampling for adapation and meta-testing... +2025-04-02 15:49:48 | [maml_trainer] epoch #21 | Finished meta-testing... +2025-04-02 15:49:48 | [maml_trainer] epoch #21 | Saving snapshot... +2025-04-02 15:50:09 | [maml_trainer] epoch #21 | Saved +2025-04-02 15:50:09 | [maml_trainer] epoch #21 | Time 8087.34 s +2025-04-02 15:50:09 | [maml_trainer] epoch #21 | EpochTime 366.49 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.4561 +Average/AverageReturn -167.755 +Average/Iteration 21 +Average/MaxReturn -147.629 +Average/MinReturn -267.346 +Average/NumEpisodes 80 +Average/StdReturn 15.6501 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.7031 +GaussianMLPPolicy/KLAfter 0.00421476 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.68139e-05 +GaussianMLPPolicy/LossBefore -7.30157e-10 +GaussianMLPPolicy/dLoss 2.68132e-05 +Iteration 21 +MetaTest/Average/AverageDiscountedReturn -164.07 +MetaTest/Average/AverageReturn -164.07 +MetaTest/Average/Iteration 21 +MetaTest/Average/MaxReturn -149.213 +MetaTest/Average/MinReturn -195.154 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.44078 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.07 +MetaTest/__unnamed_task__/AverageReturn -164.07 +MetaTest/__unnamed_task__/Iteration 21 +MetaTest/__unnamed_task__/MaxReturn -149.213 +MetaTest/__unnamed_task__/MinReturn -195.154 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.44078 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 704000 +__unnamed_task__/AverageDiscountedReturn -71.4561 +__unnamed_task__/AverageReturn -167.755 +__unnamed_task__/Iteration 21 +__unnamed_task__/MaxReturn -147.629 +__unnamed_task__/MinReturn -267.346 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.6501 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 15:54:58 | [maml_trainer] epoch #22 | Sampling for adapation and meta-testing... +2025-04-02 15:56:09 | [maml_trainer] epoch #22 | Finished meta-testing... +2025-04-02 15:56:09 | [maml_trainer] epoch #22 | Saving snapshot... +2025-04-02 15:56:30 | [maml_trainer] epoch #22 | Saved +2025-04-02 15:56:30 | [maml_trainer] epoch #22 | Time 8469.04 s +2025-04-02 15:56:30 | [maml_trainer] epoch #22 | EpochTime 381.69 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.1372 +Average/AverageReturn -167.523 +Average/Iteration 22 +Average/MaxReturn -150.522 +Average/MinReturn -226.674 +Average/NumEpisodes 80 +Average/StdReturn 14.8474 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6972 +GaussianMLPPolicy/KLAfter 0.00298392 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.15843e-05 +GaussianMLPPolicy/LossBefore 4.61936e-09 +GaussianMLPPolicy/dLoss 4.15889e-05 +Iteration 22 +MetaTest/Average/AverageDiscountedReturn -169.321 +MetaTest/Average/AverageReturn -169.321 +MetaTest/Average/Iteration 22 +MetaTest/Average/MaxReturn -148.959 +MetaTest/Average/MinReturn -283.402 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 28.8397 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.321 +MetaTest/__unnamed_task__/AverageReturn -169.321 +MetaTest/__unnamed_task__/Iteration 22 +MetaTest/__unnamed_task__/MaxReturn -148.959 +MetaTest/__unnamed_task__/MinReturn -283.402 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 28.8397 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 736000 +__unnamed_task__/AverageDiscountedReturn -71.1372 +__unnamed_task__/AverageReturn -167.523 +__unnamed_task__/Iteration 22 +__unnamed_task__/MaxReturn -150.522 +__unnamed_task__/MinReturn -226.674 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.8474 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:01:17 | [maml_trainer] epoch #23 | Sampling for adapation and meta-testing... +2025-04-02 16:02:27 | [maml_trainer] epoch #23 | Finished meta-testing... +2025-04-02 16:02:27 | [maml_trainer] epoch #23 | Saving snapshot... +2025-04-02 16:02:46 | [maml_trainer] epoch #23 | Saved +2025-04-02 16:02:46 | [maml_trainer] epoch #23 | Time 8845.08 s +2025-04-02 16:02:46 | [maml_trainer] epoch #23 | EpochTime 376.04 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -72.8914 +Average/AverageReturn -171.209 +Average/Iteration 23 +Average/MaxReturn -148.132 +Average/MinReturn -234.671 +Average/NumEpisodes 80 +Average/StdReturn 18.5031 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6924 +GaussianMLPPolicy/KLAfter 0.00343269 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.25473e-05 +GaussianMLPPolicy/LossBefore 4.08292e-09 +GaussianMLPPolicy/dLoss 3.25513e-05 +Iteration 23 +MetaTest/Average/AverageDiscountedReturn -164.361 +MetaTest/Average/AverageReturn -164.361 +MetaTest/Average/Iteration 23 +MetaTest/Average/MaxReturn -152.414 +MetaTest/Average/MinReturn -186.653 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.51712 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.361 +MetaTest/__unnamed_task__/AverageReturn -164.361 +MetaTest/__unnamed_task__/Iteration 23 +MetaTest/__unnamed_task__/MaxReturn -152.414 +MetaTest/__unnamed_task__/MinReturn -186.653 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.51712 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 768000 +__unnamed_task__/AverageDiscountedReturn -72.8914 +__unnamed_task__/AverageReturn -171.209 +__unnamed_task__/Iteration 23 +__unnamed_task__/MaxReturn -148.132 +__unnamed_task__/MinReturn -234.671 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.5031 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:07:20 | [maml_trainer] epoch #24 | Sampling for adapation and meta-testing... +2025-04-02 16:08:26 | [maml_trainer] epoch #24 | Finished meta-testing... +2025-04-02 16:08:26 | [maml_trainer] epoch #24 | Saving snapshot... +2025-04-02 16:08:45 | [maml_trainer] epoch #24 | Saved +2025-04-02 16:08:45 | [maml_trainer] epoch #24 | Time 9203.78 s +2025-04-02 16:08:45 | [maml_trainer] epoch #24 | EpochTime 358.70 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -68.9849 +Average/AverageReturn -162.373 +Average/Iteration 24 +Average/MaxReturn -133.631 +Average/MinReturn -206.705 +Average/NumEpisodes 80 +Average/StdReturn 9.92864 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6906 +GaussianMLPPolicy/KLAfter 0.00335641 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.23789e-05 +GaussianMLPPolicy/LossBefore -2.48849e-09 +GaussianMLPPolicy/dLoss -4.23814e-05 +Iteration 24 +MetaTest/Average/AverageDiscountedReturn -164.073 +MetaTest/Average/AverageReturn -164.073 +MetaTest/Average/Iteration 24 +MetaTest/Average/MaxReturn -147.185 +MetaTest/Average/MinReturn -222.503 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.7373 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.073 +MetaTest/__unnamed_task__/AverageReturn -164.073 +MetaTest/__unnamed_task__/Iteration 24 +MetaTest/__unnamed_task__/MaxReturn -147.185 +MetaTest/__unnamed_task__/MinReturn -222.503 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.7373 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 800000 +__unnamed_task__/AverageDiscountedReturn -68.9849 +__unnamed_task__/AverageReturn -162.373 +__unnamed_task__/Iteration 24 +__unnamed_task__/MaxReturn -133.631 +__unnamed_task__/MinReturn -206.705 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.92864 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:13:17 | [maml_trainer] epoch #25 | Sampling for adapation and meta-testing... +2025-04-02 16:14:25 | [maml_trainer] epoch #25 | Finished meta-testing... +2025-04-02 16:14:25 | [maml_trainer] epoch #25 | Saving snapshot... +2025-04-02 16:14:45 | [maml_trainer] epoch #25 | Saved +2025-04-02 16:14:45 | [maml_trainer] epoch #25 | Time 9563.53 s +2025-04-02 16:14:45 | [maml_trainer] epoch #25 | EpochTime 359.74 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -72.9309 +Average/AverageReturn -170.843 +Average/Iteration 25 +Average/MaxReturn -142.684 +Average/MinReturn -288.643 +Average/NumEpisodes 80 +Average/StdReturn 21.8927 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6887 +GaussianMLPPolicy/KLAfter 0.00343144 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.97696e-05 +GaussianMLPPolicy/LossBefore -4.94719e-09 +GaussianMLPPolicy/dLoss -3.97746e-05 +Iteration 25 +MetaTest/Average/AverageDiscountedReturn -163.653 +MetaTest/Average/AverageReturn -163.653 +MetaTest/Average/Iteration 25 +MetaTest/Average/MaxReturn -145.693 +MetaTest/Average/MinReturn -202.275 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.2585 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.653 +MetaTest/__unnamed_task__/AverageReturn -163.653 +MetaTest/__unnamed_task__/Iteration 25 +MetaTest/__unnamed_task__/MaxReturn -145.693 +MetaTest/__unnamed_task__/MinReturn -202.275 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.2585 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 832000 +__unnamed_task__/AverageDiscountedReturn -72.9309 +__unnamed_task__/AverageReturn -170.843 +__unnamed_task__/Iteration 25 +__unnamed_task__/MaxReturn -142.684 +__unnamed_task__/MinReturn -288.643 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.8927 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:19:17 | [maml_trainer] epoch #26 | Sampling for adapation and meta-testing... +2025-04-02 16:20:25 | [maml_trainer] epoch #26 | Finished meta-testing... +2025-04-02 16:20:25 | [maml_trainer] epoch #26 | Saving snapshot... +2025-04-02 16:20:44 | [maml_trainer] epoch #26 | Saved +2025-04-02 16:20:44 | [maml_trainer] epoch #26 | Time 9923.02 s +2025-04-02 16:20:44 | [maml_trainer] epoch #26 | EpochTime 359.49 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.9707 +Average/AverageReturn -167.953 +Average/Iteration 26 +Average/MaxReturn -148.042 +Average/MinReturn -241.667 +Average/NumEpisodes 80 +Average/StdReturn 18.5947 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.684 +GaussianMLPPolicy/KLAfter 0.00261665 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.29467e-06 +GaussianMLPPolicy/LossBefore 1.92225e-09 +GaussianMLPPolicy/dLoss 7.29659e-06 +Iteration 26 +MetaTest/Average/AverageDiscountedReturn -165.847 +MetaTest/Average/AverageReturn -165.847 +MetaTest/Average/Iteration 26 +MetaTest/Average/MaxReturn -151.825 +MetaTest/Average/MinReturn -207.858 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.2208 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.847 +MetaTest/__unnamed_task__/AverageReturn -165.847 +MetaTest/__unnamed_task__/Iteration 26 +MetaTest/__unnamed_task__/MaxReturn -151.825 +MetaTest/__unnamed_task__/MinReturn -207.858 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.2208 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 864000 +__unnamed_task__/AverageDiscountedReturn -71.9707 +__unnamed_task__/AverageReturn -167.953 +__unnamed_task__/Iteration 26 +__unnamed_task__/MaxReturn -148.042 +__unnamed_task__/MinReturn -241.667 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.5947 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:25:18 | [maml_trainer] epoch #27 | Sampling for adapation and meta-testing... +2025-04-02 16:26:28 | [maml_trainer] epoch #27 | Finished meta-testing... +2025-04-02 16:26:28 | [maml_trainer] epoch #27 | Saving snapshot... +2025-04-02 16:26:48 | [maml_trainer] epoch #27 | Saved +2025-04-02 16:26:48 | [maml_trainer] epoch #27 | Time 10286.49 s +2025-04-02 16:26:48 | [maml_trainer] epoch #27 | EpochTime 363.46 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -70.6908 +Average/AverageReturn -166.569 +Average/Iteration 27 +Average/MaxReturn -144.807 +Average/MinReturn -207.19 +Average/NumEpisodes 80 +Average/StdReturn 12.8088 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6813 +GaussianMLPPolicy/KLAfter 0.00288408 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.03253e-06 +GaussianMLPPolicy/LossBefore -2.84612e-09 +GaussianMLPPolicy/dLoss 6.02968e-06 +Iteration 27 +MetaTest/Average/AverageDiscountedReturn -171.79 +MetaTest/Average/AverageReturn -171.79 +MetaTest/Average/Iteration 27 +MetaTest/Average/MaxReturn -148.905 +MetaTest/Average/MinReturn -283.049 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 31.2218 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.79 +MetaTest/__unnamed_task__/AverageReturn -171.79 +MetaTest/__unnamed_task__/Iteration 27 +MetaTest/__unnamed_task__/MaxReturn -148.905 +MetaTest/__unnamed_task__/MinReturn -283.049 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 31.2218 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 896000 +__unnamed_task__/AverageDiscountedReturn -70.6908 +__unnamed_task__/AverageReturn -166.569 +__unnamed_task__/Iteration 27 +__unnamed_task__/MaxReturn -144.807 +__unnamed_task__/MinReturn -207.19 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.8088 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:33:09 | [maml_trainer] epoch #28 | Sampling for adapation and meta-testing... +2025-04-02 16:34:22 | [maml_trainer] epoch #28 | Finished meta-testing... +2025-04-02 16:34:22 | [maml_trainer] epoch #28 | Saving snapshot... +2025-04-02 16:34:43 | [maml_trainer] epoch #28 | Saved +2025-04-02 16:34:43 | [maml_trainer] epoch #28 | Time 10761.97 s +2025-04-02 16:34:43 | [maml_trainer] epoch #28 | EpochTime 475.48 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -70.8336 +Average/AverageReturn -166.202 +Average/Iteration 28 +Average/MaxReturn -146.999 +Average/MinReturn -247.562 +Average/NumEpisodes 80 +Average/StdReturn 18.0236 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6757 +GaussianMLPPolicy/KLAfter 0.0023898 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.37121e-06 +GaussianMLPPolicy/LossBefore -8.49366e-10 +GaussianMLPPolicy/dLoss 5.37036e-06 +Iteration 28 +MetaTest/Average/AverageDiscountedReturn -164.897 +MetaTest/Average/AverageReturn -164.897 +MetaTest/Average/Iteration 28 +MetaTest/Average/MaxReturn -148.309 +MetaTest/Average/MinReturn -196.775 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.9723 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.897 +MetaTest/__unnamed_task__/AverageReturn -164.897 +MetaTest/__unnamed_task__/Iteration 28 +MetaTest/__unnamed_task__/MaxReturn -148.309 +MetaTest/__unnamed_task__/MinReturn -196.775 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.9723 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 928000 +__unnamed_task__/AverageDiscountedReturn -70.8336 +__unnamed_task__/AverageReturn -166.202 +__unnamed_task__/Iteration 28 +__unnamed_task__/MaxReturn -146.999 +__unnamed_task__/MinReturn -247.562 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.0236 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:39:36 | [maml_trainer] epoch #29 | Sampling for adapation and meta-testing... +2025-04-02 16:40:49 | [maml_trainer] epoch #29 | Finished meta-testing... +2025-04-02 16:40:49 | [maml_trainer] epoch #29 | Saving snapshot... +2025-04-02 16:41:09 | [maml_trainer] epoch #29 | Saved +2025-04-02 16:41:09 | [maml_trainer] epoch #29 | Time 11148.02 s +2025-04-02 16:41:09 | [maml_trainer] epoch #29 | EpochTime 386.04 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -71.8309 +Average/AverageReturn -168.413 +Average/Iteration 29 +Average/MaxReturn -143.435 +Average/MinReturn -240.021 +Average/NumEpisodes 80 +Average/StdReturn 18.0539 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6678 +GaussianMLPPolicy/KLAfter 0.00236432 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.80969e-05 +GaussianMLPPolicy/LossBefore 1.69873e-09 +GaussianMLPPolicy/dLoss -2.80952e-05 +Iteration 29 +MetaTest/Average/AverageDiscountedReturn -168.653 +MetaTest/Average/AverageReturn -168.653 +MetaTest/Average/Iteration 29 +MetaTest/Average/MaxReturn -145.533 +MetaTest/Average/MinReturn -241.538 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.2388 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.653 +MetaTest/__unnamed_task__/AverageReturn -168.653 +MetaTest/__unnamed_task__/Iteration 29 +MetaTest/__unnamed_task__/MaxReturn -145.533 +MetaTest/__unnamed_task__/MinReturn -241.538 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.2388 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 960000 +__unnamed_task__/AverageDiscountedReturn -71.8309 +__unnamed_task__/AverageReturn -168.413 +__unnamed_task__/Iteration 29 +__unnamed_task__/MaxReturn -143.435 +__unnamed_task__/MinReturn -240.021 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.0539 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:46:00 | [maml_trainer] epoch #30 | Sampling for adapation and meta-testing... +2025-04-02 16:47:13 | [maml_trainer] epoch #30 | Finished meta-testing... +2025-04-02 16:47:13 | [maml_trainer] epoch #30 | Saving snapshot... +2025-04-02 16:47:34 | [maml_trainer] epoch #30 | Saved +2025-04-02 16:47:34 | [maml_trainer] epoch #30 | Time 11532.27 s +2025-04-02 16:47:34 | [maml_trainer] epoch #30 | EpochTime 384.24 s +------------------------------------------------- ---------------- +Average/AverageDiscountedReturn -70.296 +Average/AverageReturn -164.378 +Average/Iteration 30 +Average/MaxReturn -147.289 +Average/MinReturn -233.064 +Average/NumEpisodes 80 +Average/StdReturn 15.1827 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6594 +GaussianMLPPolicy/KLAfter 0.00230621 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.40797e-05 +GaussianMLPPolicy/LossBefore 2.47359e-09 +GaussianMLPPolicy/dLoss -2.40772e-05 +Iteration 30 +MetaTest/Average/AverageDiscountedReturn -170.585 +MetaTest/Average/AverageReturn -170.585 +MetaTest/Average/Iteration 30 +MetaTest/Average/MaxReturn -153.001 +MetaTest/Average/MinReturn -222.044 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.4473 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.585 +MetaTest/__unnamed_task__/AverageReturn -170.585 +MetaTest/__unnamed_task__/Iteration 30 +MetaTest/__unnamed_task__/MaxReturn -153.001 +MetaTest/__unnamed_task__/MinReturn -222.044 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.4473 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 992000 +__unnamed_task__/AverageDiscountedReturn -70.296 +__unnamed_task__/AverageReturn -164.378 +__unnamed_task__/Iteration 30 +__unnamed_task__/MaxReturn -147.289 +__unnamed_task__/MinReturn -233.064 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.1827 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- ---------------- +2025-04-02 16:52:27 | [maml_trainer] epoch #31 | Sampling for adapation and meta-testing... +2025-04-02 16:53:40 | [maml_trainer] epoch #31 | Finished meta-testing... +2025-04-02 16:53:40 | [maml_trainer] epoch #31 | Saving snapshot... +2025-04-02 16:54:00 | [maml_trainer] epoch #31 | Saved +2025-04-02 16:54:00 | [maml_trainer] epoch #31 | Time 11919.07 s +2025-04-02 16:54:00 | [maml_trainer] epoch #31 | EpochTime 386.80 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.1884 +Average/AverageReturn -164.309 +Average/Iteration 31 +Average/MaxReturn -147.137 +Average/MinReturn -245.498 +Average/NumEpisodes 80 +Average/StdReturn 16.6923 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6505 +GaussianMLPPolicy/KLAfter 0.00233092 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.49288e-05 +GaussianMLPPolicy/LossBefore -7.34627e-09 +GaussianMLPPolicy/dLoss 1.49214e-05 +Iteration 31 +MetaTest/Average/AverageDiscountedReturn -165.502 +MetaTest/Average/AverageReturn -165.502 +MetaTest/Average/Iteration 31 +MetaTest/Average/MaxReturn -144.497 +MetaTest/Average/MinReturn -203.316 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.7763 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.502 +MetaTest/__unnamed_task__/AverageReturn -165.502 +MetaTest/__unnamed_task__/Iteration 31 +MetaTest/__unnamed_task__/MaxReturn -144.497 +MetaTest/__unnamed_task__/MinReturn -203.316 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.7763 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.024e+06 +__unnamed_task__/AverageDiscountedReturn -70.1884 +__unnamed_task__/AverageReturn -164.309 +__unnamed_task__/Iteration 31 +__unnamed_task__/MaxReturn -147.137 +__unnamed_task__/MinReturn -245.498 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.6923 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 16:58:51 | [maml_trainer] epoch #32 | Sampling for adapation and meta-testing... +2025-04-02 17:00:02 | [maml_trainer] epoch #32 | Finished meta-testing... +2025-04-02 17:00:02 | [maml_trainer] epoch #32 | Saving snapshot... +2025-04-02 17:00:23 | [maml_trainer] epoch #32 | Saved +2025-04-02 17:00:23 | [maml_trainer] epoch #32 | Time 12302.10 s +2025-04-02 17:00:23 | [maml_trainer] epoch #32 | EpochTime 383.03 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.1157 +Average/AverageReturn -163.343 +Average/Iteration 32 +Average/MaxReturn -141.454 +Average/MinReturn -234.343 +Average/NumEpisodes 80 +Average/StdReturn 15.1464 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6426 +GaussianMLPPolicy/KLAfter 0.00396476 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.91252e-06 +GaussianMLPPolicy/LossBefore -5.36442e-10 +GaussianMLPPolicy/dLoss -9.91306e-06 +Iteration 32 +MetaTest/Average/AverageDiscountedReturn -167.501 +MetaTest/Average/AverageReturn -167.501 +MetaTest/Average/Iteration 32 +MetaTest/Average/MaxReturn -144.169 +MetaTest/Average/MinReturn -213.645 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.6715 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.501 +MetaTest/__unnamed_task__/AverageReturn -167.501 +MetaTest/__unnamed_task__/Iteration 32 +MetaTest/__unnamed_task__/MaxReturn -144.169 +MetaTest/__unnamed_task__/MinReturn -213.645 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.6715 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.056e+06 +__unnamed_task__/AverageDiscountedReturn -70.1157 +__unnamed_task__/AverageReturn -163.343 +__unnamed_task__/Iteration 32 +__unnamed_task__/MaxReturn -141.454 +__unnamed_task__/MinReturn -234.343 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.1464 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:05:14 | [maml_trainer] epoch #33 | Sampling for adapation and meta-testing... +2025-04-02 17:06:28 | [maml_trainer] epoch #33 | Finished meta-testing... +2025-04-02 17:06:28 | [maml_trainer] epoch #33 | Saving snapshot... +2025-04-02 17:06:49 | [maml_trainer] epoch #33 | Saved +2025-04-02 17:06:49 | [maml_trainer] epoch #33 | Time 12687.99 s +2025-04-02 17:06:49 | [maml_trainer] epoch #33 | EpochTime 385.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.0353 +Average/AverageReturn -160.172 +Average/Iteration 33 +Average/MaxReturn -143.329 +Average/MinReturn -219.284 +Average/NumEpisodes 80 +Average/StdReturn 12.1503 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6329 +GaussianMLPPolicy/KLAfter 0.00392108 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.82422e-05 +GaussianMLPPolicy/LossBefore -1.3262e-09 +GaussianMLPPolicy/dLoss 2.82409e-05 +Iteration 33 +MetaTest/Average/AverageDiscountedReturn -169.789 +MetaTest/Average/AverageReturn -169.789 +MetaTest/Average/Iteration 33 +MetaTest/Average/MaxReturn -147.376 +MetaTest/Average/MinReturn -216.909 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.0451 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.789 +MetaTest/__unnamed_task__/AverageReturn -169.789 +MetaTest/__unnamed_task__/Iteration 33 +MetaTest/__unnamed_task__/MaxReturn -147.376 +MetaTest/__unnamed_task__/MinReturn -216.909 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.0451 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.088e+06 +__unnamed_task__/AverageDiscountedReturn -69.0353 +__unnamed_task__/AverageReturn -160.172 +__unnamed_task__/Iteration 33 +__unnamed_task__/MaxReturn -143.329 +__unnamed_task__/MinReturn -219.284 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1503 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:11:27 | [maml_trainer] epoch #34 | Sampling for adapation and meta-testing... +2025-04-02 17:12:40 | [maml_trainer] epoch #34 | Finished meta-testing... +2025-04-02 17:12:40 | [maml_trainer] epoch #34 | Saving snapshot... +2025-04-02 17:13:02 | [maml_trainer] epoch #34 | Saved +2025-04-02 17:13:02 | [maml_trainer] epoch #34 | Time 13060.23 s +2025-04-02 17:13:02 | [maml_trainer] epoch #34 | EpochTime 372.24 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.4144 +Average/AverageReturn -164.649 +Average/Iteration 34 +Average/MaxReturn -146.467 +Average/MinReturn -211.95 +Average/NumEpisodes 80 +Average/StdReturn 14.4405 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6252 +GaussianMLPPolicy/KLAfter 0.0035162 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.43626e-06 +GaussianMLPPolicy/LossBefore -4.70877e-09 +GaussianMLPPolicy/dLoss 5.43155e-06 +Iteration 34 +MetaTest/Average/AverageDiscountedReturn -173.281 +MetaTest/Average/AverageReturn -173.281 +MetaTest/Average/Iteration 34 +MetaTest/Average/MaxReturn -150.059 +MetaTest/Average/MinReturn -210.134 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.0305 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.281 +MetaTest/__unnamed_task__/AverageReturn -173.281 +MetaTest/__unnamed_task__/Iteration 34 +MetaTest/__unnamed_task__/MaxReturn -150.059 +MetaTest/__unnamed_task__/MinReturn -210.134 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.0305 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.12e+06 +__unnamed_task__/AverageDiscountedReturn -70.4144 +__unnamed_task__/AverageReturn -164.649 +__unnamed_task__/Iteration 34 +__unnamed_task__/MaxReturn -146.467 +__unnamed_task__/MinReturn -211.95 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.4405 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:17:54 | [maml_trainer] epoch #35 | Sampling for adapation and meta-testing... +2025-04-02 17:19:07 | [maml_trainer] epoch #35 | Finished meta-testing... +2025-04-02 17:19:07 | [maml_trainer] epoch #35 | Saving snapshot... +2025-04-02 17:19:27 | [maml_trainer] epoch #35 | Saved +2025-04-02 17:19:27 | [maml_trainer] epoch #35 | Time 13445.79 s +2025-04-02 17:19:27 | [maml_trainer] epoch #35 | EpochTime 385.56 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.1967 +Average/AverageReturn -165.288 +Average/Iteration 35 +Average/MaxReturn -144.654 +Average/MinReturn -240.281 +Average/NumEpisodes 80 +Average/StdReturn 14.6969 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6213 +GaussianMLPPolicy/KLAfter 0.00402779 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.1524e-05 +GaussianMLPPolicy/LossBefore 4.44055e-09 +GaussianMLPPolicy/dLoss 2.15284e-05 +Iteration 35 +MetaTest/Average/AverageDiscountedReturn -167.537 +MetaTest/Average/AverageReturn -167.537 +MetaTest/Average/Iteration 35 +MetaTest/Average/MaxReturn -147.149 +MetaTest/Average/MinReturn -204.69 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.2591 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.537 +MetaTest/__unnamed_task__/AverageReturn -167.537 +MetaTest/__unnamed_task__/Iteration 35 +MetaTest/__unnamed_task__/MaxReturn -147.149 +MetaTest/__unnamed_task__/MinReturn -204.69 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.2591 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.152e+06 +__unnamed_task__/AverageDiscountedReturn -71.1967 +__unnamed_task__/AverageReturn -165.288 +__unnamed_task__/Iteration 35 +__unnamed_task__/MaxReturn -144.654 +__unnamed_task__/MinReturn -240.281 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.6969 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:24:30 | [maml_trainer] epoch #36 | Sampling for adapation and meta-testing... +2025-04-02 17:25:43 | [maml_trainer] epoch #36 | Finished meta-testing... +2025-04-02 17:25:43 | [maml_trainer] epoch #36 | Saving snapshot... +2025-04-02 17:26:03 | [maml_trainer] epoch #36 | Saved +2025-04-02 17:26:03 | [maml_trainer] epoch #36 | Time 13841.81 s +2025-04-02 17:26:03 | [maml_trainer] epoch #36 | EpochTime 396.02 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.8919 +Average/AverageReturn -162.706 +Average/Iteration 36 +Average/MaxReturn -146.269 +Average/MinReturn -213.005 +Average/NumEpisodes 80 +Average/StdReturn 9.98592 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6159 +GaussianMLPPolicy/KLAfter 0.00445043 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.03836e-05 +GaussianMLPPolicy/LossBefore 3.05474e-09 +GaussianMLPPolicy/dLoss 2.03867e-05 +Iteration 36 +MetaTest/Average/AverageDiscountedReturn -166.983 +MetaTest/Average/AverageReturn -166.983 +MetaTest/Average/Iteration 36 +MetaTest/Average/MaxReturn -150.482 +MetaTest/Average/MinReturn -213.681 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.6181 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.983 +MetaTest/__unnamed_task__/AverageReturn -166.983 +MetaTest/__unnamed_task__/Iteration 36 +MetaTest/__unnamed_task__/MaxReturn -150.482 +MetaTest/__unnamed_task__/MinReturn -213.681 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.6181 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.184e+06 +__unnamed_task__/AverageDiscountedReturn -69.8919 +__unnamed_task__/AverageReturn -162.706 +__unnamed_task__/Iteration 36 +__unnamed_task__/MaxReturn -146.269 +__unnamed_task__/MinReturn -213.005 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.98592 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:30:53 | [maml_trainer] epoch #37 | Sampling for adapation and meta-testing... +2025-04-02 17:32:05 | [maml_trainer] epoch #37 | Finished meta-testing... +2025-04-02 17:32:05 | [maml_trainer] epoch #37 | Saving snapshot... +2025-04-02 17:32:27 | [maml_trainer] epoch #37 | Saved +2025-04-02 17:32:27 | [maml_trainer] epoch #37 | Time 14225.59 s +2025-04-02 17:32:27 | [maml_trainer] epoch #37 | EpochTime 383.78 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3258 +Average/AverageReturn -168.362 +Average/Iteration 37 +Average/MaxReturn -141.43 +Average/MinReturn -249.459 +Average/NumEpisodes 80 +Average/StdReturn 19.342 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6098 +GaussianMLPPolicy/KLAfter 0.00359539 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.84439e-05 +GaussianMLPPolicy/LossBefore -6.25849e-10 +GaussianMLPPolicy/dLoss -1.84446e-05 +Iteration 37 +MetaTest/Average/AverageDiscountedReturn -165.787 +MetaTest/Average/AverageReturn -165.787 +MetaTest/Average/Iteration 37 +MetaTest/Average/MaxReturn -148.805 +MetaTest/Average/MinReturn -201.189 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.1041 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.787 +MetaTest/__unnamed_task__/AverageReturn -165.787 +MetaTest/__unnamed_task__/Iteration 37 +MetaTest/__unnamed_task__/MaxReturn -148.805 +MetaTest/__unnamed_task__/MinReturn -201.189 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.1041 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.216e+06 +__unnamed_task__/AverageDiscountedReturn -72.3258 +__unnamed_task__/AverageReturn -168.362 +__unnamed_task__/Iteration 37 +__unnamed_task__/MaxReturn -141.43 +__unnamed_task__/MinReturn -249.459 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.342 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:37:03 | [maml_trainer] epoch #38 | Sampling for adapation and meta-testing... +2025-04-02 17:38:16 | [maml_trainer] epoch #38 | Finished meta-testing... +2025-04-02 17:38:16 | [maml_trainer] epoch #38 | Saving snapshot... +2025-04-02 17:38:37 | [maml_trainer] epoch #38 | Saved +2025-04-02 17:38:37 | [maml_trainer] epoch #38 | Time 14595.60 s +2025-04-02 17:38:37 | [maml_trainer] epoch #38 | EpochTime 370.00 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.3005 +Average/AverageReturn -163.791 +Average/Iteration 38 +Average/MaxReturn -149.764 +Average/MinReturn -202.091 +Average/NumEpisodes 80 +Average/StdReturn 9.85371 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6062 +GaussianMLPPolicy/KLAfter 0.00361439 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.7904e-05 +GaussianMLPPolicy/LossBefore -6.09458e-09 +GaussianMLPPolicy/dLoss 1.78979e-05 +Iteration 38 +MetaTest/Average/AverageDiscountedReturn -168.72 +MetaTest/Average/AverageReturn -168.72 +MetaTest/Average/Iteration 38 +MetaTest/Average/MaxReturn -150.713 +MetaTest/Average/MinReturn -205.652 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.4793 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.72 +MetaTest/__unnamed_task__/AverageReturn -168.72 +MetaTest/__unnamed_task__/Iteration 38 +MetaTest/__unnamed_task__/MaxReturn -150.713 +MetaTest/__unnamed_task__/MinReturn -205.652 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.4793 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.248e+06 +__unnamed_task__/AverageDiscountedReturn -70.3005 +__unnamed_task__/AverageReturn -163.791 +__unnamed_task__/Iteration 38 +__unnamed_task__/MaxReturn -149.764 +__unnamed_task__/MinReturn -202.091 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.85371 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:43:27 | [maml_trainer] epoch #39 | Sampling for adapation and meta-testing... +2025-04-02 17:44:40 | [maml_trainer] epoch #39 | Finished meta-testing... +2025-04-02 17:44:40 | [maml_trainer] epoch #39 | Saving snapshot... +2025-04-02 17:45:01 | [maml_trainer] epoch #39 | Saved +2025-04-02 17:45:01 | [maml_trainer] epoch #39 | Time 14979.80 s +2025-04-02 17:45:01 | [maml_trainer] epoch #39 | EpochTime 384.20 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.7768 +Average/AverageReturn -169.524 +Average/Iteration 39 +Average/MaxReturn -149.039 +Average/MinReturn -294.631 +Average/NumEpisodes 80 +Average/StdReturn 20.2351 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6018 +GaussianMLPPolicy/KLAfter 0.00350577 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.42819e-05 +GaussianMLPPolicy/LossBefore -6.49691e-09 +GaussianMLPPolicy/dLoss 3.42754e-05 +Iteration 39 +MetaTest/Average/AverageDiscountedReturn -166.397 +MetaTest/Average/AverageReturn -166.397 +MetaTest/Average/Iteration 39 +MetaTest/Average/MaxReturn -152.799 +MetaTest/Average/MinReturn -202.004 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.5785 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.397 +MetaTest/__unnamed_task__/AverageReturn -166.397 +MetaTest/__unnamed_task__/Iteration 39 +MetaTest/__unnamed_task__/MaxReturn -152.799 +MetaTest/__unnamed_task__/MinReturn -202.004 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.5785 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.28e+06 +__unnamed_task__/AverageDiscountedReturn -72.7768 +__unnamed_task__/AverageReturn -169.524 +__unnamed_task__/Iteration 39 +__unnamed_task__/MaxReturn -149.039 +__unnamed_task__/MinReturn -294.631 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.2351 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:49:52 | [maml_trainer] epoch #40 | Sampling for adapation and meta-testing... +2025-04-02 17:51:04 | [maml_trainer] epoch #40 | Finished meta-testing... +2025-04-02 17:51:04 | [maml_trainer] epoch #40 | Saving snapshot... +2025-04-02 17:51:25 | [maml_trainer] epoch #40 | Saved +2025-04-02 17:51:25 | [maml_trainer] epoch #40 | Time 15363.37 s +2025-04-02 17:51:25 | [maml_trainer] epoch #40 | EpochTime 383.56 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5583 +Average/AverageReturn -169.385 +Average/Iteration 40 +Average/MaxReturn -149.655 +Average/MinReturn -237.268 +Average/NumEpisodes 80 +Average/StdReturn 17.1973 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5987 +GaussianMLPPolicy/KLAfter 0.002284 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.88184e-07 +GaussianMLPPolicy/LossBefore -9.70066e-09 +GaussianMLPPolicy/dLoss 8.78483e-07 +Iteration 40 +MetaTest/Average/AverageDiscountedReturn -176.082 +MetaTest/Average/AverageReturn -176.082 +MetaTest/Average/Iteration 40 +MetaTest/Average/MaxReturn -156.255 +MetaTest/Average/MinReturn -239.463 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.3135 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.082 +MetaTest/__unnamed_task__/AverageReturn -176.082 +MetaTest/__unnamed_task__/Iteration 40 +MetaTest/__unnamed_task__/MaxReturn -156.255 +MetaTest/__unnamed_task__/MinReturn -239.463 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.3135 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.312e+06 +__unnamed_task__/AverageDiscountedReturn -72.5583 +__unnamed_task__/AverageReturn -169.385 +__unnamed_task__/Iteration 40 +__unnamed_task__/MaxReturn -149.655 +__unnamed_task__/MinReturn -237.268 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.1973 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 17:56:14 | [maml_trainer] epoch #41 | Sampling for adapation and meta-testing... +2025-04-02 17:57:26 | [maml_trainer] epoch #41 | Finished meta-testing... +2025-04-02 17:57:26 | [maml_trainer] epoch #41 | Saving snapshot... +2025-04-02 17:57:47 | [maml_trainer] epoch #41 | Saved +2025-04-02 17:57:47 | [maml_trainer] epoch #41 | Time 15745.76 s +2025-04-02 17:57:47 | [maml_trainer] epoch #41 | EpochTime 382.39 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8891 +Average/AverageReturn -171.098 +Average/Iteration 41 +Average/MaxReturn -147.558 +Average/MinReturn -233.276 +Average/NumEpisodes 80 +Average/StdReturn 18.7151 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5971 +GaussianMLPPolicy/KLAfter 0.00224349 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.0498e-05 +GaussianMLPPolicy/LossBefore -3.17395e-09 +GaussianMLPPolicy/dLoss -3.05012e-05 +Iteration 41 +MetaTest/Average/AverageDiscountedReturn -167.373 +MetaTest/Average/AverageReturn -167.373 +MetaTest/Average/Iteration 41 +MetaTest/Average/MaxReturn -152.748 +MetaTest/Average/MinReturn -206.917 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.9268 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.373 +MetaTest/__unnamed_task__/AverageReturn -167.373 +MetaTest/__unnamed_task__/Iteration 41 +MetaTest/__unnamed_task__/MaxReturn -152.748 +MetaTest/__unnamed_task__/MinReturn -206.917 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.9268 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.344e+06 +__unnamed_task__/AverageDiscountedReturn -72.8891 +__unnamed_task__/AverageReturn -171.098 +__unnamed_task__/Iteration 41 +__unnamed_task__/MaxReturn -147.558 +__unnamed_task__/MinReturn -233.276 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.7151 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:02:36 | [maml_trainer] epoch #42 | Sampling for adapation and meta-testing... +2025-04-02 18:03:48 | [maml_trainer] epoch #42 | Finished meta-testing... +2025-04-02 18:03:48 | [maml_trainer] epoch #42 | Saving snapshot... +2025-04-02 18:04:10 | [maml_trainer] epoch #42 | Saved +2025-04-02 18:04:10 | [maml_trainer] epoch #42 | Time 16128.35 s +2025-04-02 18:04:10 | [maml_trainer] epoch #42 | EpochTime 382.59 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5559 +Average/AverageReturn -167.42 +Average/Iteration 42 +Average/MaxReturn -146.848 +Average/MinReturn -216.718 +Average/NumEpisodes 80 +Average/StdReturn 12.4661 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5967 +GaussianMLPPolicy/KLAfter 0.00184167 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.22262e-06 +GaussianMLPPolicy/LossBefore 5.06639e-09 +GaussianMLPPolicy/dLoss -5.21755e-06 +Iteration 42 +MetaTest/Average/AverageDiscountedReturn -171.381 +MetaTest/Average/AverageReturn -171.381 +MetaTest/Average/Iteration 42 +MetaTest/Average/MaxReturn -150.906 +MetaTest/Average/MinReturn -236.774 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.1854 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.381 +MetaTest/__unnamed_task__/AverageReturn -171.381 +MetaTest/__unnamed_task__/Iteration 42 +MetaTest/__unnamed_task__/MaxReturn -150.906 +MetaTest/__unnamed_task__/MinReturn -236.774 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.1854 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.376e+06 +__unnamed_task__/AverageDiscountedReturn -71.5559 +__unnamed_task__/AverageReturn -167.42 +__unnamed_task__/Iteration 42 +__unnamed_task__/MaxReturn -146.848 +__unnamed_task__/MinReturn -216.718 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.4661 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:08:59 | [maml_trainer] epoch #43 | Sampling for adapation and meta-testing... +2025-04-02 18:10:10 | [maml_trainer] epoch #43 | Finished meta-testing... +2025-04-02 18:10:10 | [maml_trainer] epoch #43 | Saving snapshot... +2025-04-02 18:10:31 | [maml_trainer] epoch #43 | Saved +2025-04-02 18:10:31 | [maml_trainer] epoch #43 | Time 16509.87 s +2025-04-02 18:10:31 | [maml_trainer] epoch #43 | EpochTime 381.51 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.9419 +Average/AverageReturn -168.973 +Average/Iteration 43 +Average/MaxReturn -151.856 +Average/MinReturn -213.675 +Average/NumEpisodes 80 +Average/StdReturn 13.7419 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5992 +GaussianMLPPolicy/KLAfter 0.00277554 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.80902e-06 +GaussianMLPPolicy/LossBefore 4.88758e-09 +GaussianMLPPolicy/dLoss -2.80413e-06 +Iteration 43 +MetaTest/Average/AverageDiscountedReturn -173.769 +MetaTest/Average/AverageReturn -173.769 +MetaTest/Average/Iteration 43 +MetaTest/Average/MaxReturn -157.499 +MetaTest/Average/MinReturn -224.425 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.6946 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.769 +MetaTest/__unnamed_task__/AverageReturn -173.769 +MetaTest/__unnamed_task__/Iteration 43 +MetaTest/__unnamed_task__/MaxReturn -157.499 +MetaTest/__unnamed_task__/MinReturn -224.425 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.6946 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.408e+06 +__unnamed_task__/AverageDiscountedReturn -71.9419 +__unnamed_task__/AverageReturn -168.973 +__unnamed_task__/Iteration 43 +__unnamed_task__/MaxReturn -151.856 +__unnamed_task__/MinReturn -213.675 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.7419 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:15:20 | [maml_trainer] epoch #44 | Sampling for adapation and meta-testing... +2025-04-02 18:16:33 | [maml_trainer] epoch #44 | Finished meta-testing... +2025-04-02 18:16:33 | [maml_trainer] epoch #44 | Saving snapshot... +2025-04-02 18:16:53 | [maml_trainer] epoch #44 | Saved +2025-04-02 18:16:53 | [maml_trainer] epoch #44 | Time 16892.03 s +2025-04-02 18:16:53 | [maml_trainer] epoch #44 | EpochTime 382.16 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.6661 +Average/AverageReturn -168.263 +Average/Iteration 44 +Average/MaxReturn -151.448 +Average/MinReturn -220.303 +Average/NumEpisodes 80 +Average/StdReturn 11.4389 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6041 +GaussianMLPPolicy/KLAfter 0.00179973 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.00515e-05 +GaussianMLPPolicy/LossBefore 3.51667e-09 +GaussianMLPPolicy/dLoss -2.0048e-05 +Iteration 44 +MetaTest/Average/AverageDiscountedReturn -162.921 +MetaTest/Average/AverageReturn -162.921 +MetaTest/Average/Iteration 44 +MetaTest/Average/MaxReturn -149.682 +MetaTest/Average/MinReturn -172.729 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 5.14014 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.921 +MetaTest/__unnamed_task__/AverageReturn -162.921 +MetaTest/__unnamed_task__/Iteration 44 +MetaTest/__unnamed_task__/MaxReturn -149.682 +MetaTest/__unnamed_task__/MinReturn -172.729 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 5.14014 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.44e+06 +__unnamed_task__/AverageDiscountedReturn -71.6661 +__unnamed_task__/AverageReturn -168.263 +__unnamed_task__/Iteration 44 +__unnamed_task__/MaxReturn -151.448 +__unnamed_task__/MinReturn -220.303 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.4389 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:21:40 | [maml_trainer] epoch #45 | Sampling for adapation and meta-testing... +2025-04-02 18:22:53 | [maml_trainer] epoch #45 | Finished meta-testing... +2025-04-02 18:22:53 | [maml_trainer] epoch #45 | Saving snapshot... +2025-04-02 18:23:14 | [maml_trainer] epoch #45 | Saved +2025-04-02 18:23:14 | [maml_trainer] epoch #45 | Time 17272.91 s +2025-04-02 18:23:14 | [maml_trainer] epoch #45 | EpochTime 380.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.0975 +Average/AverageReturn -171.044 +Average/Iteration 45 +Average/MaxReturn -148.632 +Average/MinReturn -273.276 +Average/NumEpisodes 80 +Average/StdReturn 17.8674 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6094 +GaussianMLPPolicy/KLAfter 0.00160816 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.80491e-05 +GaussianMLPPolicy/LossBefore 4.61936e-09 +GaussianMLPPolicy/dLoss 1.80537e-05 +Iteration 45 +MetaTest/Average/AverageDiscountedReturn -172.159 +MetaTest/Average/AverageReturn -172.159 +MetaTest/Average/Iteration 45 +MetaTest/Average/MaxReturn -155.048 +MetaTest/Average/MinReturn -231.868 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.2064 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.159 +MetaTest/__unnamed_task__/AverageReturn -172.159 +MetaTest/__unnamed_task__/Iteration 45 +MetaTest/__unnamed_task__/MaxReturn -155.048 +MetaTest/__unnamed_task__/MinReturn -231.868 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.2064 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.472e+06 +__unnamed_task__/AverageDiscountedReturn -73.0975 +__unnamed_task__/AverageReturn -171.044 +__unnamed_task__/Iteration 45 +__unnamed_task__/MaxReturn -148.632 +__unnamed_task__/MinReturn -273.276 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.8674 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:28:01 | [maml_trainer] epoch #46 | Sampling for adapation and meta-testing... +2025-04-02 18:29:13 | [maml_trainer] epoch #46 | Finished meta-testing... +2025-04-02 18:29:13 | [maml_trainer] epoch #46 | Saving snapshot... +2025-04-02 18:29:35 | [maml_trainer] epoch #46 | Saved +2025-04-02 18:29:35 | [maml_trainer] epoch #46 | Time 17653.43 s +2025-04-02 18:29:35 | [maml_trainer] epoch #46 | EpochTime 380.52 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.1427 +Average/AverageReturn -168.621 +Average/Iteration 46 +Average/MaxReturn -151.138 +Average/MinReturn -235.955 +Average/NumEpisodes 80 +Average/StdReturn 17.007 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6147 +GaussianMLPPolicy/KLAfter 0.00203657 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.17573e-05 +GaussianMLPPolicy/LossBefore -2.48849e-09 +GaussianMLPPolicy/dLoss -2.17598e-05 +Iteration 46 +MetaTest/Average/AverageDiscountedReturn -169.051 +MetaTest/Average/AverageReturn -169.051 +MetaTest/Average/Iteration 46 +MetaTest/Average/MaxReturn -150.922 +MetaTest/Average/MinReturn -210.151 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.9755 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.051 +MetaTest/__unnamed_task__/AverageReturn -169.051 +MetaTest/__unnamed_task__/Iteration 46 +MetaTest/__unnamed_task__/MaxReturn -150.922 +MetaTest/__unnamed_task__/MinReturn -210.151 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.9755 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.504e+06 +__unnamed_task__/AverageDiscountedReturn -72.1427 +__unnamed_task__/AverageReturn -168.621 +__unnamed_task__/Iteration 46 +__unnamed_task__/MaxReturn -151.138 +__unnamed_task__/MinReturn -235.955 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.007 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:34:24 | [maml_trainer] epoch #47 | Sampling for adapation and meta-testing... +2025-04-02 18:35:36 | [maml_trainer] epoch #47 | Finished meta-testing... +2025-04-02 18:35:36 | [maml_trainer] epoch #47 | Saving snapshot... +2025-04-02 18:35:58 | [maml_trainer] epoch #47 | Saved +2025-04-02 18:35:58 | [maml_trainer] epoch #47 | Time 18036.75 s +2025-04-02 18:35:58 | [maml_trainer] epoch #47 | EpochTime 383.31 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8236 +Average/AverageReturn -170.964 +Average/Iteration 47 +Average/MaxReturn -151.601 +Average/MinReturn -226.16 +Average/NumEpisodes 80 +Average/StdReturn 15.5126 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6178 +GaussianMLPPolicy/KLAfter 0.00138915 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.96696e-06 +GaussianMLPPolicy/LossBefore -1.80304e-09 +GaussianMLPPolicy/dLoss -2.96876e-06 +Iteration 47 +MetaTest/Average/AverageDiscountedReturn -163.926 +MetaTest/Average/AverageReturn -163.926 +MetaTest/Average/Iteration 47 +MetaTest/Average/MaxReturn -150.645 +MetaTest/Average/MinReturn -178.12 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.71231 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.926 +MetaTest/__unnamed_task__/AverageReturn -163.926 +MetaTest/__unnamed_task__/Iteration 47 +MetaTest/__unnamed_task__/MaxReturn -150.645 +MetaTest/__unnamed_task__/MinReturn -178.12 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.71231 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.536e+06 +__unnamed_task__/AverageDiscountedReturn -72.8236 +__unnamed_task__/AverageReturn -170.964 +__unnamed_task__/Iteration 47 +__unnamed_task__/MaxReturn -151.601 +__unnamed_task__/MinReturn -226.16 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.5126 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:40:40 | [maml_trainer] epoch #48 | Sampling for adapation and meta-testing... +2025-04-02 18:41:52 | [maml_trainer] epoch #48 | Finished meta-testing... +2025-04-02 18:41:52 | [maml_trainer] epoch #48 | Saving snapshot... +2025-04-02 18:42:13 | [maml_trainer] epoch #48 | Saved +2025-04-02 18:42:13 | [maml_trainer] epoch #48 | Time 18411.76 s +2025-04-02 18:42:13 | [maml_trainer] epoch #48 | EpochTime 375.01 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.9289 +Average/AverageReturn -168.47 +Average/Iteration 48 +Average/MaxReturn -150.346 +Average/MinReturn -238.16 +Average/NumEpisodes 80 +Average/StdReturn 12.931 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.62 +GaussianMLPPolicy/KLAfter 0.00143354 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.06499e-06 +GaussianMLPPolicy/LossBefore 6.69062e-09 +GaussianMLPPolicy/dLoss 9.07168e-06 +Iteration 48 +MetaTest/Average/AverageDiscountedReturn -169.054 +MetaTest/Average/AverageReturn -169.054 +MetaTest/Average/Iteration 48 +MetaTest/Average/MaxReturn -158.452 +MetaTest/Average/MinReturn -224.701 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.9219 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.054 +MetaTest/__unnamed_task__/AverageReturn -169.054 +MetaTest/__unnamed_task__/Iteration 48 +MetaTest/__unnamed_task__/MaxReturn -158.452 +MetaTest/__unnamed_task__/MinReturn -224.701 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.9219 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.568e+06 +__unnamed_task__/AverageDiscountedReturn -71.9289 +__unnamed_task__/AverageReturn -168.47 +__unnamed_task__/Iteration 48 +__unnamed_task__/MaxReturn -150.346 +__unnamed_task__/MinReturn -238.16 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.931 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:47:01 | [maml_trainer] epoch #49 | Sampling for adapation and meta-testing... +2025-04-02 18:48:13 | [maml_trainer] epoch #49 | Finished meta-testing... +2025-04-02 18:48:13 | [maml_trainer] epoch #49 | Saving snapshot... +2025-04-02 18:48:34 | [maml_trainer] epoch #49 | Saved +2025-04-02 18:48:34 | [maml_trainer] epoch #49 | Time 18792.88 s +2025-04-02 18:48:34 | [maml_trainer] epoch #49 | EpochTime 381.11 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6095 +Average/AverageReturn -170.26 +Average/Iteration 49 +Average/MaxReturn -151.903 +Average/MinReturn -233.563 +Average/NumEpisodes 80 +Average/StdReturn 13.2155 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6195 +GaussianMLPPolicy/KLAfter 0.0021001 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.27031e-05 +GaussianMLPPolicy/LossBefore -3.17395e-09 +GaussianMLPPolicy/dLoss 2.27e-05 +Iteration 49 +MetaTest/Average/AverageDiscountedReturn -167.887 +MetaTest/Average/AverageReturn -167.887 +MetaTest/Average/Iteration 49 +MetaTest/Average/MaxReturn -152.682 +MetaTest/Average/MinReturn -206.139 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.3346 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.887 +MetaTest/__unnamed_task__/AverageReturn -167.887 +MetaTest/__unnamed_task__/Iteration 49 +MetaTest/__unnamed_task__/MaxReturn -152.682 +MetaTest/__unnamed_task__/MinReturn -206.139 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.3346 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.6e+06 +__unnamed_task__/AverageDiscountedReturn -72.6095 +__unnamed_task__/AverageReturn -170.26 +__unnamed_task__/Iteration 49 +__unnamed_task__/MaxReturn -151.903 +__unnamed_task__/MinReturn -233.563 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2155 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:53:22 | [maml_trainer] epoch #50 | Sampling for adapation and meta-testing... +2025-04-02 18:54:34 | [maml_trainer] epoch #50 | Finished meta-testing... +2025-04-02 18:54:34 | [maml_trainer] epoch #50 | Saving snapshot... +2025-04-02 18:54:55 | [maml_trainer] epoch #50 | Saved +2025-04-02 18:54:56 | [maml_trainer] epoch #50 | Time 19174.11 s +2025-04-02 18:54:56 | [maml_trainer] epoch #50 | EpochTime 381.23 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.7906 +Average/AverageReturn -170.724 +Average/Iteration 50 +Average/MaxReturn -151.975 +Average/MinReturn -244.725 +Average/NumEpisodes 80 +Average/StdReturn 17.0917 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6189 +GaussianMLPPolicy/KLAfter 0.00200618 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.9925e-05 +GaussianMLPPolicy/LossBefore -2.5481e-09 +GaussianMLPPolicy/dLoss 1.99225e-05 +Iteration 50 +MetaTest/Average/AverageDiscountedReturn -169.227 +MetaTest/Average/AverageReturn -169.227 +MetaTest/Average/Iteration 50 +MetaTest/Average/MaxReturn -153.631 +MetaTest/Average/MinReturn -207.631 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.4566 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.227 +MetaTest/__unnamed_task__/AverageReturn -169.227 +MetaTest/__unnamed_task__/Iteration 50 +MetaTest/__unnamed_task__/MaxReturn -153.631 +MetaTest/__unnamed_task__/MinReturn -207.631 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.4566 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.632e+06 +__unnamed_task__/AverageDiscountedReturn -72.7906 +__unnamed_task__/AverageReturn -170.724 +__unnamed_task__/Iteration 50 +__unnamed_task__/MaxReturn -151.975 +__unnamed_task__/MinReturn -244.725 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.0917 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 18:59:45 | [maml_trainer] epoch #51 | Sampling for adapation and meta-testing... +2025-04-02 19:00:58 | [maml_trainer] epoch #51 | Finished meta-testing... +2025-04-02 19:00:58 | [maml_trainer] epoch #51 | Saving snapshot... +2025-04-02 19:01:20 | [maml_trainer] epoch #51 | Saved +2025-04-02 19:01:20 | [maml_trainer] epoch #51 | Time 19558.68 s +2025-04-02 19:01:20 | [maml_trainer] epoch #51 | EpochTime 384.56 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5963 +Average/AverageReturn -169.396 +Average/Iteration 51 +Average/MaxReturn -148.018 +Average/MinReturn -237.722 +Average/NumEpisodes 80 +Average/StdReturn 18.4142 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6186 +GaussianMLPPolicy/KLAfter 0.00198278 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.53742e-05 +GaussianMLPPolicy/LossBefore 1.41561e-09 +GaussianMLPPolicy/dLoss 1.53756e-05 +Iteration 51 +MetaTest/Average/AverageDiscountedReturn -164.63 +MetaTest/Average/AverageReturn -164.63 +MetaTest/Average/Iteration 51 +MetaTest/Average/MaxReturn -149.452 +MetaTest/Average/MinReturn -197.45 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.5693 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.63 +MetaTest/__unnamed_task__/AverageReturn -164.63 +MetaTest/__unnamed_task__/Iteration 51 +MetaTest/__unnamed_task__/MaxReturn -149.452 +MetaTest/__unnamed_task__/MinReturn -197.45 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.5693 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.664e+06 +__unnamed_task__/AverageDiscountedReturn -72.5963 +__unnamed_task__/AverageReturn -169.396 +__unnamed_task__/Iteration 51 +__unnamed_task__/MaxReturn -148.018 +__unnamed_task__/MinReturn -237.722 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.4142 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:06:13 | [maml_trainer] epoch #52 | Sampling for adapation and meta-testing... +2025-04-02 19:07:26 | [maml_trainer] epoch #52 | Finished meta-testing... +2025-04-02 19:07:26 | [maml_trainer] epoch #52 | Saving snapshot... +2025-04-02 19:07:45 | [maml_trainer] epoch #52 | Saved +2025-04-02 19:07:45 | [maml_trainer] epoch #52 | Time 19943.69 s +2025-04-02 19:07:45 | [maml_trainer] epoch #52 | EpochTime 385.00 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3229 +Average/AverageReturn -168.669 +Average/Iteration 52 +Average/MaxReturn -146.821 +Average/MinReturn -261.159 +Average/NumEpisodes 80 +Average/StdReturn 17.9493 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6203 +GaussianMLPPolicy/KLAfter 0.00290244 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.5673e-05 +GaussianMLPPolicy/LossBefore -3.32296e-09 +GaussianMLPPolicy/dLoss 2.56697e-05 +Iteration 52 +MetaTest/Average/AverageDiscountedReturn -160.271 +MetaTest/Average/AverageReturn -160.271 +MetaTest/Average/Iteration 52 +MetaTest/Average/MaxReturn -147.87 +MetaTest/Average/MinReturn -171.685 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.0229 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -160.271 +MetaTest/__unnamed_task__/AverageReturn -160.271 +MetaTest/__unnamed_task__/Iteration 52 +MetaTest/__unnamed_task__/MaxReturn -147.87 +MetaTest/__unnamed_task__/MinReturn -171.685 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.0229 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.696e+06 +__unnamed_task__/AverageDiscountedReturn -72.3229 +__unnamed_task__/AverageReturn -168.669 +__unnamed_task__/Iteration 52 +__unnamed_task__/MaxReturn -146.821 +__unnamed_task__/MinReturn -261.159 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9493 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:12:20 | [maml_trainer] epoch #53 | Sampling for adapation and meta-testing... +2025-04-02 19:13:29 | [maml_trainer] epoch #53 | Finished meta-testing... +2025-04-02 19:13:29 | [maml_trainer] epoch #53 | Saving snapshot... +2025-04-02 19:13:49 | [maml_trainer] epoch #53 | Saved +2025-04-02 19:13:49 | [maml_trainer] epoch #53 | Time 20307.53 s +2025-04-02 19:13:49 | [maml_trainer] epoch #53 | EpochTime 363.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3784 +Average/AverageReturn -165.431 +Average/Iteration 53 +Average/MaxReturn -149.019 +Average/MinReturn -245.785 +Average/NumEpisodes 80 +Average/StdReturn 15.1969 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6219 +GaussianMLPPolicy/KLAfter 0.00324446 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.35877e-06 +GaussianMLPPolicy/LossBefore 1.08778e-09 +GaussianMLPPolicy/dLoss 7.35986e-06 +Iteration 53 +MetaTest/Average/AverageDiscountedReturn -170.526 +MetaTest/Average/AverageReturn -170.526 +MetaTest/Average/Iteration 53 +MetaTest/Average/MaxReturn -152.683 +MetaTest/Average/MinReturn -251.454 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.2112 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.526 +MetaTest/__unnamed_task__/AverageReturn -170.526 +MetaTest/__unnamed_task__/Iteration 53 +MetaTest/__unnamed_task__/MaxReturn -152.683 +MetaTest/__unnamed_task__/MinReturn -251.454 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.2112 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.728e+06 +__unnamed_task__/AverageDiscountedReturn -71.3784 +__unnamed_task__/AverageReturn -165.431 +__unnamed_task__/Iteration 53 +__unnamed_task__/MaxReturn -149.019 +__unnamed_task__/MinReturn -245.785 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.1969 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:18:28 | [maml_trainer] epoch #54 | Sampling for adapation and meta-testing... +2025-04-02 19:19:39 | [maml_trainer] epoch #54 | Finished meta-testing... +2025-04-02 19:19:39 | [maml_trainer] epoch #54 | Saving snapshot... +2025-04-02 19:20:01 | [maml_trainer] epoch #54 | Saved +2025-04-02 19:20:01 | [maml_trainer] epoch #54 | Time 20679.49 s +2025-04-02 19:20:01 | [maml_trainer] epoch #54 | EpochTime 371.96 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.2979 +Average/AverageReturn -165.757 +Average/Iteration 54 +Average/MaxReturn -146.669 +Average/MinReturn -222.975 +Average/NumEpisodes 80 +Average/StdReturn 17.9668 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6243 +GaussianMLPPolicy/KLAfter 0.00224091 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.289e-06 +GaussianMLPPolicy/LossBefore -1.84774e-09 +GaussianMLPPolicy/dLoss -3.29085e-06 +Iteration 54 +MetaTest/Average/AverageDiscountedReturn -161.856 +MetaTest/Average/AverageReturn -161.856 +MetaTest/Average/Iteration 54 +MetaTest/Average/MaxReturn -150.293 +MetaTest/Average/MinReturn -175.767 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.73415 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -161.856 +MetaTest/__unnamed_task__/AverageReturn -161.856 +MetaTest/__unnamed_task__/Iteration 54 +MetaTest/__unnamed_task__/MaxReturn -150.293 +MetaTest/__unnamed_task__/MinReturn -175.767 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.73415 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.76e+06 +__unnamed_task__/AverageDiscountedReturn -71.2979 +__unnamed_task__/AverageReturn -165.757 +__unnamed_task__/Iteration 54 +__unnamed_task__/MaxReturn -146.669 +__unnamed_task__/MinReturn -222.975 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9668 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:24:52 | [maml_trainer] epoch #55 | Sampling for adapation and meta-testing... +2025-04-02 19:26:03 | [maml_trainer] epoch #55 | Finished meta-testing... +2025-04-02 19:26:03 | [maml_trainer] epoch #55 | Saving snapshot... +2025-04-02 19:26:25 | [maml_trainer] epoch #55 | Saved +2025-04-02 19:26:25 | [maml_trainer] epoch #55 | Time 21063.76 s +2025-04-02 19:26:25 | [maml_trainer] epoch #55 | EpochTime 384.26 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.2126 +Average/AverageReturn -165.644 +Average/Iteration 55 +Average/MaxReturn -144.784 +Average/MinReturn -231.458 +Average/NumEpisodes 80 +Average/StdReturn 16.8513 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6271 +GaussianMLPPolicy/KLAfter 0.00243954 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.16466e-05 +GaussianMLPPolicy/LossBefore 4.20213e-09 +GaussianMLPPolicy/dLoss -3.16424e-05 +Iteration 55 +MetaTest/Average/AverageDiscountedReturn -165.463 +MetaTest/Average/AverageReturn -165.463 +MetaTest/Average/Iteration 55 +MetaTest/Average/MaxReturn -148.965 +MetaTest/Average/MinReturn -215.989 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.0392 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.463 +MetaTest/__unnamed_task__/AverageReturn -165.463 +MetaTest/__unnamed_task__/Iteration 55 +MetaTest/__unnamed_task__/MaxReturn -148.965 +MetaTest/__unnamed_task__/MinReturn -215.989 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.0392 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.792e+06 +__unnamed_task__/AverageDiscountedReturn -71.2126 +__unnamed_task__/AverageReturn -165.644 +__unnamed_task__/Iteration 55 +__unnamed_task__/MaxReturn -144.784 +__unnamed_task__/MinReturn -231.458 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.8513 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:31:18 | [maml_trainer] epoch #56 | Sampling for adapation and meta-testing... +2025-04-02 19:32:30 | [maml_trainer] epoch #56 | Finished meta-testing... +2025-04-02 19:32:30 | [maml_trainer] epoch #56 | Saving snapshot... +2025-04-02 19:32:52 | [maml_trainer] epoch #56 | Saved +2025-04-02 19:32:52 | [maml_trainer] epoch #56 | Time 21450.87 s +2025-04-02 19:32:52 | [maml_trainer] epoch #56 | EpochTime 387.11 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8671 +Average/AverageReturn -168.477 +Average/Iteration 56 +Average/MaxReturn -145.44 +Average/MinReturn -287.914 +Average/NumEpisodes 80 +Average/StdReturn 25.991 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6303 +GaussianMLPPolicy/KLAfter 0.00222239 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.15512e-05 +GaussianMLPPolicy/LossBefore -1.78814e-10 +GaussianMLPPolicy/dLoss -1.15514e-05 +Iteration 56 +MetaTest/Average/AverageDiscountedReturn -162.363 +MetaTest/Average/AverageReturn -162.363 +MetaTest/Average/Iteration 56 +MetaTest/Average/MaxReturn -146.35 +MetaTest/Average/MinReturn -199.648 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.5345 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.363 +MetaTest/__unnamed_task__/AverageReturn -162.363 +MetaTest/__unnamed_task__/Iteration 56 +MetaTest/__unnamed_task__/MaxReturn -146.35 +MetaTest/__unnamed_task__/MinReturn -199.648 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.5345 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.824e+06 +__unnamed_task__/AverageDiscountedReturn -72.8671 +__unnamed_task__/AverageReturn -168.477 +__unnamed_task__/Iteration 56 +__unnamed_task__/MaxReturn -145.44 +__unnamed_task__/MinReturn -287.914 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.991 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:37:42 | [maml_trainer] epoch #57 | Sampling for adapation and meta-testing... +2025-04-02 19:38:54 | [maml_trainer] epoch #57 | Finished meta-testing... +2025-04-02 19:38:54 | [maml_trainer] epoch #57 | Saving snapshot... +2025-04-02 19:39:15 | [maml_trainer] epoch #57 | Saved +2025-04-02 19:39:15 | [maml_trainer] epoch #57 | Time 21834.09 s +2025-04-02 19:39:15 | [maml_trainer] epoch #57 | EpochTime 383.22 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.1722 +Average/AverageReturn -162.574 +Average/Iteration 57 +Average/MaxReturn -143.975 +Average/MinReturn -217.284 +Average/NumEpisodes 80 +Average/StdReturn 15.8579 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6326 +GaussianMLPPolicy/KLAfter 0.00227881 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.19588e-06 +GaussianMLPPolicy/LossBefore -7.07805e-09 +GaussianMLPPolicy/dLoss 1.1888e-06 +Iteration 57 +MetaTest/Average/AverageDiscountedReturn -167.084 +MetaTest/Average/AverageReturn -167.084 +MetaTest/Average/Iteration 57 +MetaTest/Average/MaxReturn -146.364 +MetaTest/Average/MinReturn -232.416 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.8059 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.084 +MetaTest/__unnamed_task__/AverageReturn -167.084 +MetaTest/__unnamed_task__/Iteration 57 +MetaTest/__unnamed_task__/MaxReturn -146.364 +MetaTest/__unnamed_task__/MinReturn -232.416 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.8059 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.856e+06 +__unnamed_task__/AverageDiscountedReturn -70.1722 +__unnamed_task__/AverageReturn -162.574 +__unnamed_task__/Iteration 57 +__unnamed_task__/MaxReturn -143.975 +__unnamed_task__/MinReturn -217.284 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.8579 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:43:58 | [maml_trainer] epoch #58 | Sampling for adapation and meta-testing... +2025-04-02 19:45:06 | [maml_trainer] epoch #58 | Finished meta-testing... +2025-04-02 19:45:06 | [maml_trainer] epoch #58 | Saving snapshot... +2025-04-02 19:45:28 | [maml_trainer] epoch #58 | Saved +2025-04-02 19:45:28 | [maml_trainer] epoch #58 | Time 22206.19 s +2025-04-02 19:45:28 | [maml_trainer] epoch #58 | EpochTime 372.09 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.0276 +Average/AverageReturn -165.267 +Average/Iteration 58 +Average/MaxReturn -147.951 +Average/MinReturn -230.661 +Average/NumEpisodes 80 +Average/StdReturn 16.348 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6334 +GaussianMLPPolicy/KLAfter 0.00206368 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.15061e-05 +GaussianMLPPolicy/LossBefore -1.3262e-09 +GaussianMLPPolicy/dLoss -3.15074e-05 +Iteration 58 +MetaTest/Average/AverageDiscountedReturn -170.574 +MetaTest/Average/AverageReturn -170.574 +MetaTest/Average/Iteration 58 +MetaTest/Average/MaxReturn -138.522 +MetaTest/Average/MinReturn -226.152 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.2611 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.574 +MetaTest/__unnamed_task__/AverageReturn -170.574 +MetaTest/__unnamed_task__/Iteration 58 +MetaTest/__unnamed_task__/MaxReturn -138.522 +MetaTest/__unnamed_task__/MinReturn -226.152 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.2611 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.888e+06 +__unnamed_task__/AverageDiscountedReturn -71.0276 +__unnamed_task__/AverageReturn -165.267 +__unnamed_task__/Iteration 58 +__unnamed_task__/MaxReturn -147.951 +__unnamed_task__/MinReturn -230.661 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.348 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:50:20 | [maml_trainer] epoch #59 | Sampling for adapation and meta-testing... +2025-04-02 19:51:32 | [maml_trainer] epoch #59 | Finished meta-testing... +2025-04-02 19:51:32 | [maml_trainer] epoch #59 | Saving snapshot... +2025-04-02 19:51:54 | [maml_trainer] epoch #59 | Saved +2025-04-02 19:51:54 | [maml_trainer] epoch #59 | Time 22592.96 s +2025-04-02 19:51:54 | [maml_trainer] epoch #59 | EpochTime 386.77 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.2702 +Average/AverageReturn -161.242 +Average/Iteration 59 +Average/MaxReturn -142.851 +Average/MinReturn -209.015 +Average/NumEpisodes 80 +Average/StdReturn 11.3295 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6342 +GaussianMLPPolicy/KLAfter 0.00236786 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.06094e-06 +GaussianMLPPolicy/LossBefore 8.24034e-09 +GaussianMLPPolicy/dLoss 5.06918e-06 +Iteration 59 +MetaTest/Average/AverageDiscountedReturn -168.402 +MetaTest/Average/AverageReturn -168.402 +MetaTest/Average/Iteration 59 +MetaTest/Average/MaxReturn -148.235 +MetaTest/Average/MinReturn -236.561 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.6845 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.402 +MetaTest/__unnamed_task__/AverageReturn -168.402 +MetaTest/__unnamed_task__/Iteration 59 +MetaTest/__unnamed_task__/MaxReturn -148.235 +MetaTest/__unnamed_task__/MinReturn -236.561 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.6845 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.92e+06 +__unnamed_task__/AverageDiscountedReturn -69.2702 +__unnamed_task__/AverageReturn -161.242 +__unnamed_task__/Iteration 59 +__unnamed_task__/MaxReturn -142.851 +__unnamed_task__/MinReturn -209.015 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.3295 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 19:56:46 | [maml_trainer] epoch #60 | Sampling for adapation and meta-testing... +2025-04-02 19:57:59 | [maml_trainer] epoch #60 | Finished meta-testing... +2025-04-02 19:57:59 | [maml_trainer] epoch #60 | Saving snapshot... +2025-04-02 19:58:21 | [maml_trainer] epoch #60 | Saved +2025-04-02 19:58:21 | [maml_trainer] epoch #60 | Time 22979.78 s +2025-04-02 19:58:21 | [maml_trainer] epoch #60 | EpochTime 386.82 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.9368 +Average/AverageReturn -165.039 +Average/Iteration 60 +Average/MaxReturn -142.42 +Average/MinReturn -241.333 +Average/NumEpisodes 80 +Average/StdReturn 20.628 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6346 +GaussianMLPPolicy/KLAfter 0.00237811 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.30315e-05 +GaussianMLPPolicy/LossBefore -5.39422e-09 +GaussianMLPPolicy/dLoss -1.30368e-05 +Iteration 60 +MetaTest/Average/AverageDiscountedReturn -169.413 +MetaTest/Average/AverageReturn -169.413 +MetaTest/Average/Iteration 60 +MetaTest/Average/MaxReturn -147.519 +MetaTest/Average/MinReturn -234.137 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.6143 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.413 +MetaTest/__unnamed_task__/AverageReturn -169.413 +MetaTest/__unnamed_task__/Iteration 60 +MetaTest/__unnamed_task__/MaxReturn -147.519 +MetaTest/__unnamed_task__/MinReturn -234.137 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.6143 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.952e+06 +__unnamed_task__/AverageDiscountedReturn -70.9368 +__unnamed_task__/AverageReturn -165.039 +__unnamed_task__/Iteration 60 +__unnamed_task__/MaxReturn -142.42 +__unnamed_task__/MinReturn -241.333 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.628 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:03:14 | [maml_trainer] epoch #61 | Sampling for adapation and meta-testing... +2025-04-02 20:04:27 | [maml_trainer] epoch #61 | Finished meta-testing... +2025-04-02 20:04:27 | [maml_trainer] epoch #61 | Saving snapshot... +2025-04-02 20:04:48 | [maml_trainer] epoch #61 | Saved +2025-04-02 20:04:48 | [maml_trainer] epoch #61 | Time 23366.48 s +2025-04-02 20:04:48 | [maml_trainer] epoch #61 | EpochTime 386.69 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.3462 +Average/AverageReturn -162.054 +Average/Iteration 61 +Average/MaxReturn -146.936 +Average/MinReturn -195.168 +Average/NumEpisodes 80 +Average/StdReturn 10.917 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6322 +GaussianMLPPolicy/KLAfter 0.00276805 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.77823e-05 +GaussianMLPPolicy/LossBefore 9.96888e-09 +GaussianMLPPolicy/dLoss 1.77923e-05 +Iteration 61 +MetaTest/Average/AverageDiscountedReturn -165.798 +MetaTest/Average/AverageReturn -165.798 +MetaTest/Average/Iteration 61 +MetaTest/Average/MaxReturn -146.477 +MetaTest/Average/MinReturn -221.145 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.6854 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.798 +MetaTest/__unnamed_task__/AverageReturn -165.798 +MetaTest/__unnamed_task__/Iteration 61 +MetaTest/__unnamed_task__/MaxReturn -146.477 +MetaTest/__unnamed_task__/MinReturn -221.145 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.6854 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 1.984e+06 +__unnamed_task__/AverageDiscountedReturn -69.3462 +__unnamed_task__/AverageReturn -162.054 +__unnamed_task__/Iteration 61 +__unnamed_task__/MaxReturn -146.936 +__unnamed_task__/MinReturn -195.168 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 10.917 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:09:40 | [maml_trainer] epoch #62 | Sampling for adapation and meta-testing... +2025-04-02 20:10:51 | [maml_trainer] epoch #62 | Finished meta-testing... +2025-04-02 20:10:51 | [maml_trainer] epoch #62 | Saving snapshot... +2025-04-02 20:11:13 | [maml_trainer] epoch #62 | Saved +2025-04-02 20:11:13 | [maml_trainer] epoch #62 | Time 23751.41 s +2025-04-02 20:11:13 | [maml_trainer] epoch #62 | EpochTime 384.94 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3625 +Average/AverageReturn -166.696 +Average/Iteration 62 +Average/MaxReturn -142.01 +Average/MinReturn -262.872 +Average/NumEpisodes 80 +Average/StdReturn 17.5382 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6308 +GaussianMLPPolicy/KLAfter 0.00271977 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.82832e-05 +GaussianMLPPolicy/LossBefore -1.03563e-08 +GaussianMLPPolicy/dLoss -1.82936e-05 +Iteration 62 +MetaTest/Average/AverageDiscountedReturn -163.534 +MetaTest/Average/AverageReturn -163.534 +MetaTest/Average/Iteration 62 +MetaTest/Average/MaxReturn -153.973 +MetaTest/Average/MinReturn -193.519 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.8588 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.534 +MetaTest/__unnamed_task__/AverageReturn -163.534 +MetaTest/__unnamed_task__/Iteration 62 +MetaTest/__unnamed_task__/MaxReturn -153.973 +MetaTest/__unnamed_task__/MinReturn -193.519 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.8588 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.016e+06 +__unnamed_task__/AverageDiscountedReturn -71.3625 +__unnamed_task__/AverageReturn -166.696 +__unnamed_task__/Iteration 62 +__unnamed_task__/MaxReturn -142.01 +__unnamed_task__/MinReturn -262.872 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.5382 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:15:50 | [maml_trainer] epoch #63 | Sampling for adapation and meta-testing... +2025-04-02 20:17:00 | [maml_trainer] epoch #63 | Finished meta-testing... +2025-04-02 20:17:00 | [maml_trainer] epoch #63 | Saving snapshot... +2025-04-02 20:17:21 | [maml_trainer] epoch #63 | Saved +2025-04-02 20:17:21 | [maml_trainer] epoch #63 | Time 24120.00 s +2025-04-02 20:17:21 | [maml_trainer] epoch #63 | EpochTime 368.58 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.538 +Average/AverageReturn -165.563 +Average/Iteration 63 +Average/MaxReturn -145.652 +Average/MinReturn -225.651 +Average/NumEpisodes 80 +Average/StdReturn 15.3177 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6294 +GaussianMLPPolicy/KLAfter 0.00204281 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.27467e-05 +GaussianMLPPolicy/LossBefore 4.91738e-10 +GaussianMLPPolicy/dLoss -3.27462e-05 +Iteration 63 +MetaTest/Average/AverageDiscountedReturn -167.586 +MetaTest/Average/AverageReturn -167.586 +MetaTest/Average/Iteration 63 +MetaTest/Average/MaxReturn -147.559 +MetaTest/Average/MinReturn -236.611 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5536 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.586 +MetaTest/__unnamed_task__/AverageReturn -167.586 +MetaTest/__unnamed_task__/Iteration 63 +MetaTest/__unnamed_task__/MaxReturn -147.559 +MetaTest/__unnamed_task__/MinReturn -236.611 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5536 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.048e+06 +__unnamed_task__/AverageDiscountedReturn -70.538 +__unnamed_task__/AverageReturn -165.563 +__unnamed_task__/Iteration 63 +__unnamed_task__/MaxReturn -145.652 +__unnamed_task__/MinReturn -225.651 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.3177 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:22:14 | [maml_trainer] epoch #64 | Sampling for adapation and meta-testing... +2025-04-02 20:23:25 | [maml_trainer] epoch #64 | Finished meta-testing... +2025-04-02 20:23:25 | [maml_trainer] epoch #64 | Saving snapshot... +2025-04-02 20:23:46 | [maml_trainer] epoch #64 | Saved +2025-04-02 20:23:46 | [maml_trainer] epoch #64 | Time 24504.73 s +2025-04-02 20:23:46 | [maml_trainer] epoch #64 | EpochTime 384.73 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5651 +Average/AverageReturn -169.641 +Average/Iteration 64 +Average/MaxReturn -143.479 +Average/MinReturn -294.445 +Average/NumEpisodes 80 +Average/StdReturn 21.6612 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6267 +GaussianMLPPolicy/KLAfter 0.00166979 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.53063e-05 +GaussianMLPPolicy/LossBefore -4.91738e-10 +GaussianMLPPolicy/dLoss -1.53068e-05 +Iteration 64 +MetaTest/Average/AverageDiscountedReturn -167.87 +MetaTest/Average/AverageReturn -167.87 +MetaTest/Average/Iteration 64 +MetaTest/Average/MaxReturn -150.064 +MetaTest/Average/MinReturn -218.953 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.071 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.87 +MetaTest/__unnamed_task__/AverageReturn -167.87 +MetaTest/__unnamed_task__/Iteration 64 +MetaTest/__unnamed_task__/MaxReturn -150.064 +MetaTest/__unnamed_task__/MinReturn -218.953 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.071 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.08e+06 +__unnamed_task__/AverageDiscountedReturn -72.5651 +__unnamed_task__/AverageReturn -169.641 +__unnamed_task__/Iteration 64 +__unnamed_task__/MaxReturn -143.479 +__unnamed_task__/MinReturn -294.445 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.6612 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:28:22 | [maml_trainer] epoch #65 | Sampling for adapation and meta-testing... +2025-04-02 20:29:32 | [maml_trainer] epoch #65 | Finished meta-testing... +2025-04-02 20:29:32 | [maml_trainer] epoch #65 | Saving snapshot... +2025-04-02 20:29:52 | [maml_trainer] epoch #65 | Saved +2025-04-02 20:29:52 | [maml_trainer] epoch #65 | Time 24870.61 s +2025-04-02 20:29:52 | [maml_trainer] epoch #65 | EpochTime 365.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3874 +Average/AverageReturn -170.316 +Average/Iteration 65 +Average/MaxReturn -149.504 +Average/MinReturn -241.864 +Average/NumEpisodes 80 +Average/StdReturn 19.5339 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.623 +GaussianMLPPolicy/KLAfter 0.00184619 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.30978e-05 +GaussianMLPPolicy/LossBefore -5.21541e-09 +GaussianMLPPolicy/dLoss -1.3103e-05 +Iteration 65 +MetaTest/Average/AverageDiscountedReturn -166.652 +MetaTest/Average/AverageReturn -166.652 +MetaTest/Average/Iteration 65 +MetaTest/Average/MaxReturn -151.462 +MetaTest/Average/MinReturn -223.344 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.3971 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.652 +MetaTest/__unnamed_task__/AverageReturn -166.652 +MetaTest/__unnamed_task__/Iteration 65 +MetaTest/__unnamed_task__/MaxReturn -151.462 +MetaTest/__unnamed_task__/MinReturn -223.344 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.3971 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.112e+06 +__unnamed_task__/AverageDiscountedReturn -72.3874 +__unnamed_task__/AverageReturn -170.316 +__unnamed_task__/Iteration 65 +__unnamed_task__/MaxReturn -149.504 +__unnamed_task__/MinReturn -241.864 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.5339 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:34:34 | [maml_trainer] epoch #66 | Sampling for adapation and meta-testing... +2025-04-02 20:35:47 | [maml_trainer] epoch #66 | Finished meta-testing... +2025-04-02 20:35:47 | [maml_trainer] epoch #66 | Saving snapshot... +2025-04-02 20:36:09 | [maml_trainer] epoch #66 | Saved +2025-04-02 20:36:09 | [maml_trainer] epoch #66 | Time 25248.00 s +2025-04-02 20:36:09 | [maml_trainer] epoch #66 | EpochTime 377.39 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4207 +Average/AverageReturn -168.878 +Average/Iteration 66 +Average/MaxReturn -146.966 +Average/MinReturn -225.036 +Average/NumEpisodes 80 +Average/StdReturn 15.7116 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6208 +GaussianMLPPolicy/KLAfter 0.00164325 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.90111e-07 +GaussianMLPPolicy/LossBefore -1.96695e-09 +GaussianMLPPolicy/dLoss 2.88144e-07 +Iteration 66 +MetaTest/Average/AverageDiscountedReturn -167.795 +MetaTest/Average/AverageReturn -167.795 +MetaTest/Average/Iteration 66 +MetaTest/Average/MaxReturn -150.609 +MetaTest/Average/MinReturn -199.238 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.5709 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.795 +MetaTest/__unnamed_task__/AverageReturn -167.795 +MetaTest/__unnamed_task__/Iteration 66 +MetaTest/__unnamed_task__/MaxReturn -150.609 +MetaTest/__unnamed_task__/MinReturn -199.238 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.5709 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.144e+06 +__unnamed_task__/AverageDiscountedReturn -71.4207 +__unnamed_task__/AverageReturn -168.878 +__unnamed_task__/Iteration 66 +__unnamed_task__/MaxReturn -146.966 +__unnamed_task__/MinReturn -225.036 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.7116 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:40:59 | [maml_trainer] epoch #67 | Sampling for adapation and meta-testing... +2025-04-02 20:42:11 | [maml_trainer] epoch #67 | Finished meta-testing... +2025-04-02 20:42:11 | [maml_trainer] epoch #67 | Saving snapshot... +2025-04-02 20:42:34 | [maml_trainer] epoch #67 | Saved +2025-04-02 20:42:34 | [maml_trainer] epoch #67 | Time 25632.29 s +2025-04-02 20:42:34 | [maml_trainer] epoch #67 | EpochTime 384.29 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3484 +Average/AverageReturn -168.767 +Average/Iteration 67 +Average/MaxReturn -146.102 +Average/MinReturn -219.453 +Average/NumEpisodes 80 +Average/StdReturn 14.7419 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6204 +GaussianMLPPolicy/KLAfter 0.00229178 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.97764e-06 +GaussianMLPPolicy/LossBefore 1.16229e-09 +GaussianMLPPolicy/dLoss 4.97881e-06 +Iteration 67 +MetaTest/Average/AverageDiscountedReturn -166.137 +MetaTest/Average/AverageReturn -166.137 +MetaTest/Average/Iteration 67 +MetaTest/Average/MaxReturn -150.255 +MetaTest/Average/MinReturn -178.423 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.18828 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.137 +MetaTest/__unnamed_task__/AverageReturn -166.137 +MetaTest/__unnamed_task__/Iteration 67 +MetaTest/__unnamed_task__/MaxReturn -150.255 +MetaTest/__unnamed_task__/MinReturn -178.423 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.18828 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.176e+06 +__unnamed_task__/AverageDiscountedReturn -71.3484 +__unnamed_task__/AverageReturn -168.767 +__unnamed_task__/Iteration 67 +__unnamed_task__/MaxReturn -146.102 +__unnamed_task__/MinReturn -219.453 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.7419 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:47:24 | [maml_trainer] epoch #68 | Sampling for adapation and meta-testing... +2025-04-02 20:48:39 | [maml_trainer] epoch #68 | Finished meta-testing... +2025-04-02 20:48:39 | [maml_trainer] epoch #68 | Saving snapshot... +2025-04-02 20:49:02 | [maml_trainer] epoch #68 | Saved +2025-04-02 20:49:02 | [maml_trainer] epoch #68 | Time 26020.42 s +2025-04-02 20:49:02 | [maml_trainer] epoch #68 | EpochTime 388.13 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6654 +Average/AverageReturn -171.452 +Average/Iteration 68 +Average/MaxReturn -143.925 +Average/MinReturn -245.286 +Average/NumEpisodes 80 +Average/StdReturn 19.9555 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6191 +GaussianMLPPolicy/KLAfter 0.00234555 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.31583e-05 +GaussianMLPPolicy/LossBefore 4.76837e-10 +GaussianMLPPolicy/dLoss -1.31578e-05 +Iteration 68 +MetaTest/Average/AverageDiscountedReturn -172.691 +MetaTest/Average/AverageReturn -172.691 +MetaTest/Average/Iteration 68 +MetaTest/Average/MaxReturn -150.057 +MetaTest/Average/MinReturn -233.203 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.3905 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.691 +MetaTest/__unnamed_task__/AverageReturn -172.691 +MetaTest/__unnamed_task__/Iteration 68 +MetaTest/__unnamed_task__/MaxReturn -150.057 +MetaTest/__unnamed_task__/MinReturn -233.203 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.3905 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.208e+06 +__unnamed_task__/AverageDiscountedReturn -72.6654 +__unnamed_task__/AverageReturn -171.452 +__unnamed_task__/Iteration 68 +__unnamed_task__/MaxReturn -143.925 +__unnamed_task__/MinReturn -245.286 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.9555 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 20:53:58 | [maml_trainer] epoch #69 | Sampling for adapation and meta-testing... +2025-04-02 20:55:12 | [maml_trainer] epoch #69 | Finished meta-testing... +2025-04-02 20:55:12 | [maml_trainer] epoch #69 | Saving snapshot... +2025-04-02 20:55:34 | [maml_trainer] epoch #69 | Saved +2025-04-02 20:55:34 | [maml_trainer] epoch #69 | Time 26412.29 s +2025-04-02 20:55:34 | [maml_trainer] epoch #69 | EpochTime 391.86 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.6368 +Average/AverageReturn -168.294 +Average/Iteration 69 +Average/MaxReturn -148.036 +Average/MinReturn -284.705 +Average/NumEpisodes 80 +Average/StdReturn 17.8408 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6187 +GaussianMLPPolicy/KLAfter 0.00228197 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.66965e-05 +GaussianMLPPolicy/LossBefore 2.68221e-09 +GaussianMLPPolicy/dLoss -3.66939e-05 +Iteration 69 +MetaTest/Average/AverageDiscountedReturn -172.05 +MetaTest/Average/AverageReturn -172.05 +MetaTest/Average/Iteration 69 +MetaTest/Average/MaxReturn -157.02 +MetaTest/Average/MinReturn -216.634 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.8021 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.05 +MetaTest/__unnamed_task__/AverageReturn -172.05 +MetaTest/__unnamed_task__/Iteration 69 +MetaTest/__unnamed_task__/MaxReturn -157.02 +MetaTest/__unnamed_task__/MinReturn -216.634 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.8021 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.24e+06 +__unnamed_task__/AverageDiscountedReturn -71.6368 +__unnamed_task__/AverageReturn -168.294 +__unnamed_task__/Iteration 69 +__unnamed_task__/MaxReturn -148.036 +__unnamed_task__/MinReturn -284.705 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.8408 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:00:24 | [maml_trainer] epoch #70 | Sampling for adapation and meta-testing... +2025-04-02 21:01:38 | [maml_trainer] epoch #70 | Finished meta-testing... +2025-04-02 21:01:38 | [maml_trainer] epoch #70 | Saving snapshot... +2025-04-02 21:01:59 | [maml_trainer] epoch #70 | Saved +2025-04-02 21:01:59 | [maml_trainer] epoch #70 | Time 26797.89 s +2025-04-02 21:01:59 | [maml_trainer] epoch #70 | EpochTime 385.60 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.0247 +Average/AverageReturn -167.399 +Average/Iteration 70 +Average/MaxReturn -149.343 +Average/MinReturn -247.396 +Average/NumEpisodes 80 +Average/StdReturn 16.1196 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6169 +GaussianMLPPolicy/KLAfter 0.00191537 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.73548e-06 +GaussianMLPPolicy/LossBefore 1.07288e-09 +GaussianMLPPolicy/dLoss 2.73655e-06 +Iteration 70 +MetaTest/Average/AverageDiscountedReturn -169.269 +MetaTest/Average/AverageReturn -169.269 +MetaTest/Average/Iteration 70 +MetaTest/Average/MaxReturn -150.815 +MetaTest/Average/MinReturn -221.621 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.5827 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.269 +MetaTest/__unnamed_task__/AverageReturn -169.269 +MetaTest/__unnamed_task__/Iteration 70 +MetaTest/__unnamed_task__/MaxReturn -150.815 +MetaTest/__unnamed_task__/MinReturn -221.621 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.5827 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.272e+06 +__unnamed_task__/AverageDiscountedReturn -71.0247 +__unnamed_task__/AverageReturn -167.399 +__unnamed_task__/Iteration 70 +__unnamed_task__/MaxReturn -149.343 +__unnamed_task__/MinReturn -247.396 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.1196 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:06:47 | [maml_trainer] epoch #71 | Sampling for adapation and meta-testing... +2025-04-02 21:08:01 | [maml_trainer] epoch #71 | Finished meta-testing... +2025-04-02 21:08:01 | [maml_trainer] epoch #71 | Saving snapshot... +2025-04-02 21:08:23 | [maml_trainer] epoch #71 | Saved +2025-04-02 21:08:23 | [maml_trainer] epoch #71 | Time 27182.06 s +2025-04-02 21:08:23 | [maml_trainer] epoch #71 | EpochTime 384.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.6935 +Average/AverageReturn -168.964 +Average/Iteration 71 +Average/MaxReturn -148.929 +Average/MinReturn -233.732 +Average/NumEpisodes 80 +Average/StdReturn 15.8081 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6157 +GaussianMLPPolicy/KLAfter 0.00264418 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.87955e-05 +GaussianMLPPolicy/LossBefore -6.07967e-09 +GaussianMLPPolicy/dLoss -1.88016e-05 +Iteration 71 +MetaTest/Average/AverageDiscountedReturn -170.185 +MetaTest/Average/AverageReturn -170.185 +MetaTest/Average/Iteration 71 +MetaTest/Average/MaxReturn -146.715 +MetaTest/Average/MinReturn -244.02 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.1384 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.185 +MetaTest/__unnamed_task__/AverageReturn -170.185 +MetaTest/__unnamed_task__/Iteration 71 +MetaTest/__unnamed_task__/MaxReturn -146.715 +MetaTest/__unnamed_task__/MinReturn -244.02 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.1384 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.304e+06 +__unnamed_task__/AverageDiscountedReturn -71.6935 +__unnamed_task__/AverageReturn -168.964 +__unnamed_task__/Iteration 71 +__unnamed_task__/MaxReturn -148.929 +__unnamed_task__/MinReturn -233.732 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.8081 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:13:12 | [maml_trainer] epoch #72 | Sampling for adapation and meta-testing... +2025-04-02 21:14:24 | [maml_trainer] epoch #72 | Finished meta-testing... +2025-04-02 21:14:24 | [maml_trainer] epoch #72 | Saving snapshot... +2025-04-02 21:14:46 | [maml_trainer] epoch #72 | Saved +2025-04-02 21:14:46 | [maml_trainer] epoch #72 | Time 27564.68 s +2025-04-02 21:14:46 | [maml_trainer] epoch #72 | EpochTime 382.62 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.7486 +Average/AverageReturn -166.899 +Average/Iteration 72 +Average/MaxReturn -144.702 +Average/MinReturn -226.86 +Average/NumEpisodes 80 +Average/StdReturn 16.0541 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6159 +GaussianMLPPolicy/KLAfter 0.00377653 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.75879e-05 +GaussianMLPPolicy/LossBefore 7.62939e-09 +GaussianMLPPolicy/dLoss 1.75955e-05 +Iteration 72 +MetaTest/Average/AverageDiscountedReturn -165.201 +MetaTest/Average/AverageReturn -165.201 +MetaTest/Average/Iteration 72 +MetaTest/Average/MaxReturn -147.355 +MetaTest/Average/MinReturn -205.456 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.0771 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.201 +MetaTest/__unnamed_task__/AverageReturn -165.201 +MetaTest/__unnamed_task__/Iteration 72 +MetaTest/__unnamed_task__/MaxReturn -147.355 +MetaTest/__unnamed_task__/MinReturn -205.456 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.0771 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.336e+06 +__unnamed_task__/AverageDiscountedReturn -70.7486 +__unnamed_task__/AverageReturn -166.899 +__unnamed_task__/Iteration 72 +__unnamed_task__/MaxReturn -144.702 +__unnamed_task__/MinReturn -226.86 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.0541 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:19:39 | [maml_trainer] epoch #73 | Sampling for adapation and meta-testing... +2025-04-02 21:20:52 | [maml_trainer] epoch #73 | Finished meta-testing... +2025-04-02 21:20:52 | [maml_trainer] epoch #73 | Saving snapshot... +2025-04-02 21:21:14 | [maml_trainer] epoch #73 | Saved +2025-04-02 21:21:14 | [maml_trainer] epoch #73 | Time 27953.08 s +2025-04-02 21:21:14 | [maml_trainer] epoch #73 | EpochTime 388.40 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.5999 +Average/AverageReturn -165.849 +Average/Iteration 73 +Average/MaxReturn -140.155 +Average/MinReturn -236.826 +Average/NumEpisodes 80 +Average/StdReturn 16.3423 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6145 +GaussianMLPPolicy/KLAfter 0.00348556 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.55233e-05 +GaussianMLPPolicy/LossBefore 2.08616e-10 +GaussianMLPPolicy/dLoss 2.55235e-05 +Iteration 73 +MetaTest/Average/AverageDiscountedReturn -162.502 +MetaTest/Average/AverageReturn -162.502 +MetaTest/Average/Iteration 73 +MetaTest/Average/MaxReturn -149.091 +MetaTest/Average/MinReturn -176.56 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.95084 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.502 +MetaTest/__unnamed_task__/AverageReturn -162.502 +MetaTest/__unnamed_task__/Iteration 73 +MetaTest/__unnamed_task__/MaxReturn -149.091 +MetaTest/__unnamed_task__/MinReturn -176.56 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.95084 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.368e+06 +__unnamed_task__/AverageDiscountedReturn -70.5999 +__unnamed_task__/AverageReturn -165.849 +__unnamed_task__/Iteration 73 +__unnamed_task__/MaxReturn -140.155 +__unnamed_task__/MinReturn -236.826 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.3423 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:26:10 | [maml_trainer] epoch #74 | Sampling for adapation and meta-testing... +2025-04-02 21:27:23 | [maml_trainer] epoch #74 | Finished meta-testing... +2025-04-02 21:27:23 | [maml_trainer] epoch #74 | Saving snapshot... +2025-04-02 21:27:44 | [maml_trainer] epoch #74 | Saved +2025-04-02 21:27:44 | [maml_trainer] epoch #74 | Time 28342.86 s +2025-04-02 21:27:44 | [maml_trainer] epoch #74 | EpochTime 389.77 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.9906 +Average/AverageReturn -167.696 +Average/Iteration 74 +Average/MaxReturn -134.251 +Average/MinReturn -219.267 +Average/NumEpisodes 80 +Average/StdReturn 17.0244 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6146 +GaussianMLPPolicy/KLAfter 0.00339943 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.4978e-06 +GaussianMLPPolicy/LossBefore 6.21378e-09 +GaussianMLPPolicy/dLoss -8.49159e-06 +Iteration 74 +MetaTest/Average/AverageDiscountedReturn -165.068 +MetaTest/Average/AverageReturn -165.068 +MetaTest/Average/Iteration 74 +MetaTest/Average/MaxReturn -146.853 +MetaTest/Average/MinReturn -216.516 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.327 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.068 +MetaTest/__unnamed_task__/AverageReturn -165.068 +MetaTest/__unnamed_task__/Iteration 74 +MetaTest/__unnamed_task__/MaxReturn -146.853 +MetaTest/__unnamed_task__/MinReturn -216.516 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.327 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.4e+06 +__unnamed_task__/AverageDiscountedReturn -70.9906 +__unnamed_task__/AverageReturn -167.696 +__unnamed_task__/Iteration 74 +__unnamed_task__/MaxReturn -134.251 +__unnamed_task__/MinReturn -219.267 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.0244 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:32:34 | [maml_trainer] epoch #75 | Sampling for adapation and meta-testing... +2025-04-02 21:33:42 | [maml_trainer] epoch #75 | Finished meta-testing... +2025-04-02 21:33:42 | [maml_trainer] epoch #75 | Saving snapshot... +2025-04-02 21:34:04 | [maml_trainer] epoch #75 | Saved +2025-04-02 21:34:04 | [maml_trainer] epoch #75 | Time 28722.32 s +2025-04-02 21:34:04 | [maml_trainer] epoch #75 | EpochTime 379.46 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.8818 +Average/AverageReturn -166.85 +Average/Iteration 75 +Average/MaxReturn -145.842 +Average/MinReturn -228.329 +Average/NumEpisodes 80 +Average/StdReturn 15.1036 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6152 +GaussianMLPPolicy/KLAfter 0.00251808 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.74248e-05 +GaussianMLPPolicy/LossBefore 6.25849e-09 +GaussianMLPPolicy/dLoss 4.7431e-05 +Iteration 75 +MetaTest/Average/AverageDiscountedReturn -165.43 +MetaTest/Average/AverageReturn -165.43 +MetaTest/Average/Iteration 75 +MetaTest/Average/MaxReturn -146.118 +MetaTest/Average/MinReturn -198.671 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.9166 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.43 +MetaTest/__unnamed_task__/AverageReturn -165.43 +MetaTest/__unnamed_task__/Iteration 75 +MetaTest/__unnamed_task__/MaxReturn -146.118 +MetaTest/__unnamed_task__/MinReturn -198.671 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.9166 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.432e+06 +__unnamed_task__/AverageDiscountedReturn -70.8818 +__unnamed_task__/AverageReturn -166.85 +__unnamed_task__/Iteration 75 +__unnamed_task__/MaxReturn -145.842 +__unnamed_task__/MinReturn -228.329 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.1036 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:38:41 | [maml_trainer] epoch #76 | Sampling for adapation and meta-testing... +2025-04-02 21:39:50 | [maml_trainer] epoch #76 | Finished meta-testing... +2025-04-02 21:39:50 | [maml_trainer] epoch #76 | Saving snapshot... +2025-04-02 21:40:11 | [maml_trainer] epoch #76 | Saved +2025-04-02 21:40:11 | [maml_trainer] epoch #76 | Time 29089.23 s +2025-04-02 21:40:11 | [maml_trainer] epoch #76 | EpochTime 366.91 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7167 +Average/AverageReturn -168.439 +Average/Iteration 76 +Average/MaxReturn -144.837 +Average/MinReturn -246.314 +Average/NumEpisodes 80 +Average/StdReturn 21.7999 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6176 +GaussianMLPPolicy/KLAfter 0.00220466 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.39505e-05 +GaussianMLPPolicy/LossBefore -5.91576e-09 +GaussianMLPPolicy/dLoss 1.39446e-05 +Iteration 76 +MetaTest/Average/AverageDiscountedReturn -165.997 +MetaTest/Average/AverageReturn -165.997 +MetaTest/Average/Iteration 76 +MetaTest/Average/MaxReturn -143.593 +MetaTest/Average/MinReturn -234.385 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.6875 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.997 +MetaTest/__unnamed_task__/AverageReturn -165.997 +MetaTest/__unnamed_task__/Iteration 76 +MetaTest/__unnamed_task__/MaxReturn -143.593 +MetaTest/__unnamed_task__/MinReturn -234.385 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.6875 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.464e+06 +__unnamed_task__/AverageDiscountedReturn -71.7167 +__unnamed_task__/AverageReturn -168.439 +__unnamed_task__/Iteration 76 +__unnamed_task__/MaxReturn -144.837 +__unnamed_task__/MinReturn -246.314 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.7999 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:44:47 | [maml_trainer] epoch #77 | Sampling for adapation and meta-testing... +2025-04-02 21:45:58 | [maml_trainer] epoch #77 | Finished meta-testing... +2025-04-02 21:45:58 | [maml_trainer] epoch #77 | Saving snapshot... +2025-04-02 21:46:19 | [maml_trainer] epoch #77 | Saved +2025-04-02 21:46:19 | [maml_trainer] epoch #77 | Time 29457.99 s +2025-04-02 21:46:19 | [maml_trainer] epoch #77 | EpochTime 368.75 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6794 +Average/AverageReturn -170.593 +Average/Iteration 77 +Average/MaxReturn -144.429 +Average/MinReturn -246.919 +Average/NumEpisodes 80 +Average/StdReturn 22.3038 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6193 +GaussianMLPPolicy/KLAfter 0.00258349 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.50936e-06 +GaussianMLPPolicy/LossBefore -7.27177e-09 +GaussianMLPPolicy/dLoss 6.50209e-06 +Iteration 77 +MetaTest/Average/AverageDiscountedReturn -163.157 +MetaTest/Average/AverageReturn -163.157 +MetaTest/Average/Iteration 77 +MetaTest/Average/MaxReturn -143.953 +MetaTest/Average/MinReturn -180.791 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.78368 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.157 +MetaTest/__unnamed_task__/AverageReturn -163.157 +MetaTest/__unnamed_task__/Iteration 77 +MetaTest/__unnamed_task__/MaxReturn -143.953 +MetaTest/__unnamed_task__/MinReturn -180.791 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.78368 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.496e+06 +__unnamed_task__/AverageDiscountedReturn -72.6794 +__unnamed_task__/AverageReturn -170.593 +__unnamed_task__/Iteration 77 +__unnamed_task__/MaxReturn -144.429 +__unnamed_task__/MinReturn -246.919 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.3038 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:50:59 | [maml_trainer] epoch #78 | Sampling for adapation and meta-testing... +2025-04-02 21:52:08 | [maml_trainer] epoch #78 | Finished meta-testing... +2025-04-02 21:52:08 | [maml_trainer] epoch #78 | Saving snapshot... +2025-04-02 21:52:28 | [maml_trainer] epoch #78 | Saved +2025-04-02 21:52:28 | [maml_trainer] epoch #78 | Time 29826.50 s +2025-04-02 21:52:28 | [maml_trainer] epoch #78 | EpochTime 368.51 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.1443 +Average/AverageReturn -164.582 +Average/Iteration 78 +Average/MaxReturn -142.075 +Average/MinReturn -220.748 +Average/NumEpisodes 80 +Average/StdReturn 14.9133 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6224 +GaussianMLPPolicy/KLAfter 0.00259562 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.15413e-05 +GaussianMLPPolicy/LossBefore -5.93066e-09 +GaussianMLPPolicy/dLoss -4.15472e-05 +Iteration 78 +MetaTest/Average/AverageDiscountedReturn -171.035 +MetaTest/Average/AverageReturn -171.035 +MetaTest/Average/Iteration 78 +MetaTest/Average/MaxReturn -147.326 +MetaTest/Average/MinReturn -249.624 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.5377 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.035 +MetaTest/__unnamed_task__/AverageReturn -171.035 +MetaTest/__unnamed_task__/Iteration 78 +MetaTest/__unnamed_task__/MaxReturn -147.326 +MetaTest/__unnamed_task__/MinReturn -249.624 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.5377 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.528e+06 +__unnamed_task__/AverageDiscountedReturn -70.1443 +__unnamed_task__/AverageReturn -164.582 +__unnamed_task__/Iteration 78 +__unnamed_task__/MaxReturn -142.075 +__unnamed_task__/MinReturn -220.748 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.9133 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 21:57:05 | [maml_trainer] epoch #79 | Sampling for adapation and meta-testing... +2025-04-02 21:58:14 | [maml_trainer] epoch #79 | Finished meta-testing... +2025-04-02 21:58:14 | [maml_trainer] epoch #79 | Saving snapshot... +2025-04-02 21:58:35 | [maml_trainer] epoch #79 | Saved +2025-04-02 21:58:35 | [maml_trainer] epoch #79 | Time 30193.91 s +2025-04-02 21:58:35 | [maml_trainer] epoch #79 | EpochTime 367.40 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.3274 +Average/AverageReturn -161.816 +Average/Iteration 79 +Average/MaxReturn -142.706 +Average/MinReturn -211.506 +Average/NumEpisodes 80 +Average/StdReturn 14.0808 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.625 +GaussianMLPPolicy/KLAfter 0.00226528 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.50397e-06 +GaussianMLPPolicy/LossBefore 4.38094e-09 +GaussianMLPPolicy/dLoss -8.49959e-06 +Iteration 79 +MetaTest/Average/AverageDiscountedReturn -169.262 +MetaTest/Average/AverageReturn -169.262 +MetaTest/Average/Iteration 79 +MetaTest/Average/MaxReturn -145.16 +MetaTest/Average/MinReturn -242.765 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.2661 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.262 +MetaTest/__unnamed_task__/AverageReturn -169.262 +MetaTest/__unnamed_task__/Iteration 79 +MetaTest/__unnamed_task__/MaxReturn -145.16 +MetaTest/__unnamed_task__/MinReturn -242.765 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.2661 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.56e+06 +__unnamed_task__/AverageDiscountedReturn -69.3274 +__unnamed_task__/AverageReturn -161.816 +__unnamed_task__/Iteration 79 +__unnamed_task__/MaxReturn -142.706 +__unnamed_task__/MinReturn -211.506 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.0808 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:03:14 | [maml_trainer] epoch #80 | Sampling for adapation and meta-testing... +2025-04-02 22:04:30 | [maml_trainer] epoch #80 | Finished meta-testing... +2025-04-02 22:04:30 | [maml_trainer] epoch #80 | Saving snapshot... +2025-04-02 22:04:59 | [maml_trainer] epoch #80 | Saved +2025-04-02 22:04:59 | [maml_trainer] epoch #80 | Time 30577.49 s +2025-04-02 22:04:59 | [maml_trainer] epoch #80 | EpochTime 383.58 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.8768 +Average/AverageReturn -162.749 +Average/Iteration 80 +Average/MaxReturn -140.651 +Average/MinReturn -240.716 +Average/NumEpisodes 80 +Average/StdReturn 21.5795 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6288 +GaussianMLPPolicy/KLAfter 0.00196833 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.26365e-06 +GaussianMLPPolicy/LossBefore 5.36442e-10 +GaussianMLPPolicy/dLoss 7.26418e-06 +Iteration 80 +MetaTest/Average/AverageDiscountedReturn -168.515 +MetaTest/Average/AverageReturn -168.515 +MetaTest/Average/Iteration 80 +MetaTest/Average/MaxReturn -143.472 +MetaTest/Average/MinReturn -224.495 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.996 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.515 +MetaTest/__unnamed_task__/AverageReturn -168.515 +MetaTest/__unnamed_task__/Iteration 80 +MetaTest/__unnamed_task__/MaxReturn -143.472 +MetaTest/__unnamed_task__/MinReturn -224.495 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.996 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.592e+06 +__unnamed_task__/AverageDiscountedReturn -69.8768 +__unnamed_task__/AverageReturn -162.749 +__unnamed_task__/Iteration 80 +__unnamed_task__/MaxReturn -140.651 +__unnamed_task__/MinReturn -240.716 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.5795 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:09:57 | [maml_trainer] epoch #81 | Sampling for adapation and meta-testing... +2025-04-02 22:11:07 | [maml_trainer] epoch #81 | Finished meta-testing... +2025-04-02 22:11:07 | [maml_trainer] epoch #81 | Saving snapshot... +2025-04-02 22:11:28 | [maml_trainer] epoch #81 | Saved +2025-04-02 22:11:28 | [maml_trainer] epoch #81 | Time 30966.35 s +2025-04-02 22:11:28 | [maml_trainer] epoch #81 | EpochTime 388.85 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.0523 +Average/AverageReturn -163.124 +Average/Iteration 81 +Average/MaxReturn -143.174 +Average/MinReturn -240.485 +Average/NumEpisodes 80 +Average/StdReturn 19.1272 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6301 +GaussianMLPPolicy/KLAfter 0.00211687 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.74903e-05 +GaussianMLPPolicy/LossBefore 5.126e-09 +GaussianMLPPolicy/dLoss -3.74851e-05 +Iteration 81 +MetaTest/Average/AverageDiscountedReturn -177.74 +MetaTest/Average/AverageReturn -177.74 +MetaTest/Average/Iteration 81 +MetaTest/Average/MaxReturn -142.601 +MetaTest/Average/MinReturn -254.415 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 32.8162 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.74 +MetaTest/__unnamed_task__/AverageReturn -177.74 +MetaTest/__unnamed_task__/Iteration 81 +MetaTest/__unnamed_task__/MaxReturn -142.601 +MetaTest/__unnamed_task__/MinReturn -254.415 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 32.8162 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.624e+06 +__unnamed_task__/AverageDiscountedReturn -70.0523 +__unnamed_task__/AverageReturn -163.124 +__unnamed_task__/Iteration 81 +__unnamed_task__/MaxReturn -143.174 +__unnamed_task__/MinReturn -240.485 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.1272 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:16:11 | [maml_trainer] epoch #82 | Sampling for adapation and meta-testing... +2025-04-02 22:17:20 | [maml_trainer] epoch #82 | Finished meta-testing... +2025-04-02 22:17:20 | [maml_trainer] epoch #82 | Saving snapshot... +2025-04-02 22:17:40 | [maml_trainer] epoch #82 | Saved +2025-04-02 22:17:40 | [maml_trainer] epoch #82 | Time 31338.22 s +2025-04-02 22:17:40 | [maml_trainer] epoch #82 | EpochTime 371.86 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.0088 +Average/AverageReturn -163.606 +Average/Iteration 82 +Average/MaxReturn -139.113 +Average/MinReturn -235.116 +Average/NumEpisodes 80 +Average/StdReturn 19.6023 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6336 +GaussianMLPPolicy/KLAfter 0.00193306 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.87451e-05 +GaussianMLPPolicy/LossBefore 5.37932e-09 +GaussianMLPPolicy/dLoss 1.87505e-05 +Iteration 82 +MetaTest/Average/AverageDiscountedReturn -166.592 +MetaTest/Average/AverageReturn -166.592 +MetaTest/Average/Iteration 82 +MetaTest/Average/MaxReturn -144.187 +MetaTest/Average/MinReturn -228.431 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.3356 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.592 +MetaTest/__unnamed_task__/AverageReturn -166.592 +MetaTest/__unnamed_task__/Iteration 82 +MetaTest/__unnamed_task__/MaxReturn -144.187 +MetaTest/__unnamed_task__/MinReturn -228.431 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.3356 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.656e+06 +__unnamed_task__/AverageDiscountedReturn -70.0088 +__unnamed_task__/AverageReturn -163.606 +__unnamed_task__/Iteration 82 +__unnamed_task__/MaxReturn -139.113 +__unnamed_task__/MinReturn -235.116 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.6023 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:22:15 | [maml_trainer] epoch #83 | Sampling for adapation and meta-testing... +2025-04-02 22:23:27 | [maml_trainer] epoch #83 | Finished meta-testing... +2025-04-02 22:23:27 | [maml_trainer] epoch #83 | Saving snapshot... +2025-04-02 22:23:49 | [maml_trainer] epoch #83 | Saved +2025-04-02 22:23:49 | [maml_trainer] epoch #83 | Time 31707.14 s +2025-04-02 22:23:49 | [maml_trainer] epoch #83 | EpochTime 368.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.2339 +Average/AverageReturn -166.283 +Average/Iteration 83 +Average/MaxReturn -140.72 +Average/MinReturn -239.5 +Average/NumEpisodes 80 +Average/StdReturn 22.45 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6371 +GaussianMLPPolicy/KLAfter 0.00295173 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.18924e-05 +GaussianMLPPolicy/LossBefore -3.65078e-09 +GaussianMLPPolicy/dLoss 3.18888e-05 +Iteration 83 +MetaTest/Average/AverageDiscountedReturn -166.162 +MetaTest/Average/AverageReturn -166.162 +MetaTest/Average/Iteration 83 +MetaTest/Average/MaxReturn -144.772 +MetaTest/Average/MinReturn -203.019 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.5497 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.162 +MetaTest/__unnamed_task__/AverageReturn -166.162 +MetaTest/__unnamed_task__/Iteration 83 +MetaTest/__unnamed_task__/MaxReturn -144.772 +MetaTest/__unnamed_task__/MinReturn -203.019 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.5497 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.688e+06 +__unnamed_task__/AverageDiscountedReturn -71.2339 +__unnamed_task__/AverageReturn -166.283 +__unnamed_task__/Iteration 83 +__unnamed_task__/MaxReturn -140.72 +__unnamed_task__/MinReturn -239.5 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.45 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:28:34 | [maml_trainer] epoch #84 | Sampling for adapation and meta-testing... +2025-04-02 22:29:46 | [maml_trainer] epoch #84 | Finished meta-testing... +2025-04-02 22:29:46 | [maml_trainer] epoch #84 | Saving snapshot... +2025-04-02 22:30:09 | [maml_trainer] epoch #84 | Saved +2025-04-02 22:30:09 | [maml_trainer] epoch #84 | Time 32087.30 s +2025-04-02 22:30:09 | [maml_trainer] epoch #84 | EpochTime 380.15 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3814 +Average/AverageReturn -166.28 +Average/Iteration 84 +Average/MaxReturn -143.965 +Average/MinReturn -279.576 +Average/NumEpisodes 80 +Average/StdReturn 22.5781 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6398 +GaussianMLPPolicy/KLAfter 0.00265747 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.67415e-05 +GaussianMLPPolicy/LossBefore 5.27501e-09 +GaussianMLPPolicy/dLoss -1.67363e-05 +Iteration 84 +MetaTest/Average/AverageDiscountedReturn -162.877 +MetaTest/Average/AverageReturn -162.877 +MetaTest/Average/Iteration 84 +MetaTest/Average/MaxReturn -145.009 +MetaTest/Average/MinReturn -196.638 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.5678 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.877 +MetaTest/__unnamed_task__/AverageReturn -162.877 +MetaTest/__unnamed_task__/Iteration 84 +MetaTest/__unnamed_task__/MaxReturn -145.009 +MetaTest/__unnamed_task__/MinReturn -196.638 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.5678 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.72e+06 +__unnamed_task__/AverageDiscountedReturn -71.3814 +__unnamed_task__/AverageReturn -166.28 +__unnamed_task__/Iteration 84 +__unnamed_task__/MaxReturn -143.965 +__unnamed_task__/MinReturn -279.576 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.5781 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:34:58 | [maml_trainer] epoch #85 | Sampling for adapation and meta-testing... +2025-04-02 22:36:06 | [maml_trainer] epoch #85 | Finished meta-testing... +2025-04-02 22:36:06 | [maml_trainer] epoch #85 | Saving snapshot... +2025-04-02 22:36:28 | [maml_trainer] epoch #85 | Saved +2025-04-02 22:36:28 | [maml_trainer] epoch #85 | Time 32466.44 s +2025-04-02 22:36:28 | [maml_trainer] epoch #85 | EpochTime 379.14 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.1993 +Average/AverageReturn -166.007 +Average/Iteration 85 +Average/MaxReturn -139.307 +Average/MinReturn -242.726 +Average/NumEpisodes 80 +Average/StdReturn 20.8673 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6413 +GaussianMLPPolicy/KLAfter 0.00236659 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.45965e-06 +GaussianMLPPolicy/LossBefore 1.63913e-10 +GaussianMLPPolicy/dLoss 2.45981e-06 +Iteration 85 +MetaTest/Average/AverageDiscountedReturn -165.64 +MetaTest/Average/AverageReturn -165.64 +MetaTest/Average/Iteration 85 +MetaTest/Average/MaxReturn -139.677 +MetaTest/Average/MinReturn -230.099 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 27.8261 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.64 +MetaTest/__unnamed_task__/AverageReturn -165.64 +MetaTest/__unnamed_task__/Iteration 85 +MetaTest/__unnamed_task__/MaxReturn -139.677 +MetaTest/__unnamed_task__/MinReturn -230.099 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 27.8261 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.752e+06 +__unnamed_task__/AverageDiscountedReturn -71.1993 +__unnamed_task__/AverageReturn -166.007 +__unnamed_task__/Iteration 85 +__unnamed_task__/MaxReturn -139.307 +__unnamed_task__/MinReturn -242.726 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.8673 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:41:05 | [maml_trainer] epoch #86 | Sampling for adapation and meta-testing... +2025-04-02 22:42:13 | [maml_trainer] epoch #86 | Finished meta-testing... +2025-04-02 22:42:13 | [maml_trainer] epoch #86 | Saving snapshot... +2025-04-02 22:42:35 | [maml_trainer] epoch #86 | Saved +2025-04-02 22:42:35 | [maml_trainer] epoch #86 | Time 32833.27 s +2025-04-02 22:42:35 | [maml_trainer] epoch #86 | EpochTime 366.83 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.7929 +Average/AverageReturn -162.942 +Average/Iteration 86 +Average/MaxReturn -139.768 +Average/MinReturn -247.189 +Average/NumEpisodes 80 +Average/StdReturn 18.5588 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6437 +GaussianMLPPolicy/KLAfter 0.00270349 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.53639e-06 +GaussianMLPPolicy/LossBefore 1.00136e-08 +GaussianMLPPolicy/dLoss -4.52638e-06 +Iteration 86 +MetaTest/Average/AverageDiscountedReturn -168.276 +MetaTest/Average/AverageReturn -168.276 +MetaTest/Average/Iteration 86 +MetaTest/Average/MaxReturn -142.564 +MetaTest/Average/MinReturn -221.133 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.5675 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.276 +MetaTest/__unnamed_task__/AverageReturn -168.276 +MetaTest/__unnamed_task__/Iteration 86 +MetaTest/__unnamed_task__/MaxReturn -142.564 +MetaTest/__unnamed_task__/MinReturn -221.133 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.5675 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.784e+06 +__unnamed_task__/AverageDiscountedReturn -69.7929 +__unnamed_task__/AverageReturn -162.942 +__unnamed_task__/Iteration 86 +__unnamed_task__/MaxReturn -139.768 +__unnamed_task__/MinReturn -247.189 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.5588 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:47:14 | [maml_trainer] epoch #87 | Sampling for adapation and meta-testing... +2025-04-02 22:48:27 | [maml_trainer] epoch #87 | Finished meta-testing... +2025-04-02 22:48:27 | [maml_trainer] epoch #87 | Saving snapshot... +2025-04-02 22:48:48 | [maml_trainer] epoch #87 | Saved +2025-04-02 22:48:48 | [maml_trainer] epoch #87 | Time 33206.85 s +2025-04-02 22:48:48 | [maml_trainer] epoch #87 | EpochTime 373.58 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.2323 +Average/AverageReturn -166.206 +Average/Iteration 87 +Average/MaxReturn -138.045 +Average/MinReturn -236.035 +Average/NumEpisodes 80 +Average/StdReturn 21.3971 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6459 +GaussianMLPPolicy/KLAfter 0.00242449 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.98939e-05 +GaussianMLPPolicy/LossBefore -1.2517e-09 +GaussianMLPPolicy/dLoss -1.98951e-05 +Iteration 87 +MetaTest/Average/AverageDiscountedReturn -175.557 +MetaTest/Average/AverageReturn -175.557 +MetaTest/Average/Iteration 87 +MetaTest/Average/MaxReturn -146.772 +MetaTest/Average/MinReturn -258.211 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 28.9693 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.557 +MetaTest/__unnamed_task__/AverageReturn -175.557 +MetaTest/__unnamed_task__/Iteration 87 +MetaTest/__unnamed_task__/MaxReturn -146.772 +MetaTest/__unnamed_task__/MinReturn -258.211 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 28.9693 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.816e+06 +__unnamed_task__/AverageDiscountedReturn -71.2323 +__unnamed_task__/AverageReturn -166.206 +__unnamed_task__/Iteration 87 +__unnamed_task__/MaxReturn -138.045 +__unnamed_task__/MinReturn -236.035 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.3971 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:53:31 | [maml_trainer] epoch #88 | Sampling for adapation and meta-testing... +2025-04-02 22:54:40 | [maml_trainer] epoch #88 | Finished meta-testing... +2025-04-02 22:54:40 | [maml_trainer] epoch #88 | Saving snapshot... +2025-04-02 22:55:02 | [maml_trainer] epoch #88 | Saved +2025-04-02 22:55:02 | [maml_trainer] epoch #88 | Time 33580.42 s +2025-04-02 22:55:02 | [maml_trainer] epoch #88 | EpochTime 373.56 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.8038 +Average/AverageReturn -166.057 +Average/Iteration 88 +Average/MaxReturn -144.85 +Average/MinReturn -222.661 +Average/NumEpisodes 80 +Average/StdReturn 17.7739 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6476 +GaussianMLPPolicy/KLAfter 0.00267529 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.3777e-07 +GaussianMLPPolicy/LossBefore -1.86265e-09 +GaussianMLPPolicy/dLoss -6.39632e-07 +Iteration 88 +MetaTest/Average/AverageDiscountedReturn -177.847 +MetaTest/Average/AverageReturn -177.847 +MetaTest/Average/Iteration 88 +MetaTest/Average/MaxReturn -145.34 +MetaTest/Average/MinReturn -245.373 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 27.5545 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.847 +MetaTest/__unnamed_task__/AverageReturn -177.847 +MetaTest/__unnamed_task__/Iteration 88 +MetaTest/__unnamed_task__/MaxReturn -145.34 +MetaTest/__unnamed_task__/MinReturn -245.373 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 27.5545 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.848e+06 +__unnamed_task__/AverageDiscountedReturn -70.8038 +__unnamed_task__/AverageReturn -166.057 +__unnamed_task__/Iteration 88 +__unnamed_task__/MaxReturn -144.85 +__unnamed_task__/MinReturn -222.661 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.7739 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 22:59:43 | [maml_trainer] epoch #89 | Sampling for adapation and meta-testing... +2025-04-02 23:00:51 | [maml_trainer] epoch #89 | Finished meta-testing... +2025-04-02 23:00:51 | [maml_trainer] epoch #89 | Saving snapshot... +2025-04-02 23:01:12 | [maml_trainer] epoch #89 | Saved +2025-04-02 23:01:12 | [maml_trainer] epoch #89 | Time 33950.20 s +2025-04-02 23:01:12 | [maml_trainer] epoch #89 | EpochTime 369.78 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.12 +Average/AverageReturn -167.496 +Average/Iteration 89 +Average/MaxReturn -142.518 +Average/MinReturn -247.124 +Average/NumEpisodes 80 +Average/StdReturn 20.0232 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6501 +GaussianMLPPolicy/KLAfter 0.00267123 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.0926e-05 +GaussianMLPPolicy/LossBefore 1.65403e-09 +GaussianMLPPolicy/dLoss -2.09243e-05 +Iteration 89 +MetaTest/Average/AverageDiscountedReturn -168.233 +MetaTest/Average/AverageReturn -168.233 +MetaTest/Average/Iteration 89 +MetaTest/Average/MaxReturn -144.818 +MetaTest/Average/MinReturn -218.344 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.8049 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.233 +MetaTest/__unnamed_task__/AverageReturn -168.233 +MetaTest/__unnamed_task__/Iteration 89 +MetaTest/__unnamed_task__/MaxReturn -144.818 +MetaTest/__unnamed_task__/MinReturn -218.344 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.8049 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.88e+06 +__unnamed_task__/AverageDiscountedReturn -71.12 +__unnamed_task__/AverageReturn -167.496 +__unnamed_task__/Iteration 89 +__unnamed_task__/MaxReturn -142.518 +__unnamed_task__/MinReturn -247.124 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.0232 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:05:53 | [maml_trainer] epoch #90 | Sampling for adapation and meta-testing... +2025-04-02 23:07:03 | [maml_trainer] epoch #90 | Finished meta-testing... +2025-04-02 23:07:03 | [maml_trainer] epoch #90 | Saving snapshot... +2025-04-02 23:07:24 | [maml_trainer] epoch #90 | Saved +2025-04-02 23:07:24 | [maml_trainer] epoch #90 | Time 34322.49 s +2025-04-02 23:07:24 | [maml_trainer] epoch #90 | EpochTime 372.29 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.5498 +Average/AverageReturn -165.72 +Average/Iteration 90 +Average/MaxReturn -141.372 +Average/MinReturn -236.522 +Average/NumEpisodes 80 +Average/StdReturn 19.3145 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6498 +GaussianMLPPolicy/KLAfter 0.00210965 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.69524e-05 +GaussianMLPPolicy/LossBefore 4.29153e-09 +GaussianMLPPolicy/dLoss 3.69567e-05 +Iteration 90 +MetaTest/Average/AverageDiscountedReturn -167.964 +MetaTest/Average/AverageReturn -167.964 +MetaTest/Average/Iteration 90 +MetaTest/Average/MaxReturn -147.731 +MetaTest/Average/MinReturn -222.876 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.0496 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.964 +MetaTest/__unnamed_task__/AverageReturn -167.964 +MetaTest/__unnamed_task__/Iteration 90 +MetaTest/__unnamed_task__/MaxReturn -147.731 +MetaTest/__unnamed_task__/MinReturn -222.876 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.0496 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.912e+06 +__unnamed_task__/AverageDiscountedReturn -70.5498 +__unnamed_task__/AverageReturn -165.72 +__unnamed_task__/Iteration 90 +__unnamed_task__/MaxReturn -141.372 +__unnamed_task__/MinReturn -236.522 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.3145 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:12:11 | [maml_trainer] epoch #91 | Sampling for adapation and meta-testing... +2025-04-02 23:13:21 | [maml_trainer] epoch #91 | Finished meta-testing... +2025-04-02 23:13:21 | [maml_trainer] epoch #91 | Saving snapshot... +2025-04-02 23:13:42 | [maml_trainer] epoch #91 | Saved +2025-04-02 23:13:42 | [maml_trainer] epoch #91 | Time 34700.91 s +2025-04-02 23:13:42 | [maml_trainer] epoch #91 | EpochTime 378.42 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.8874 +Average/AverageReturn -168.595 +Average/Iteration 91 +Average/MaxReturn -141.954 +Average/MinReturn -243.937 +Average/NumEpisodes 80 +Average/StdReturn 23.137 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6498 +GaussianMLPPolicy/KLAfter 0.00203597 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.29727e-05 +GaussianMLPPolicy/LossBefore -6.52671e-09 +GaussianMLPPolicy/dLoss -4.29792e-05 +Iteration 91 +MetaTest/Average/AverageDiscountedReturn -170.371 +MetaTest/Average/AverageReturn -170.371 +MetaTest/Average/Iteration 91 +MetaTest/Average/MaxReturn -145.335 +MetaTest/Average/MinReturn -226.965 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.2126 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.371 +MetaTest/__unnamed_task__/AverageReturn -170.371 +MetaTest/__unnamed_task__/Iteration 91 +MetaTest/__unnamed_task__/MaxReturn -145.335 +MetaTest/__unnamed_task__/MinReturn -226.965 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.2126 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.944e+06 +__unnamed_task__/AverageDiscountedReturn -71.8874 +__unnamed_task__/AverageReturn -168.595 +__unnamed_task__/Iteration 91 +__unnamed_task__/MaxReturn -141.954 +__unnamed_task__/MinReturn -243.937 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.137 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:18:28 | [maml_trainer] epoch #92 | Sampling for adapation and meta-testing... +2025-04-02 23:19:37 | [maml_trainer] epoch #92 | Finished meta-testing... +2025-04-02 23:19:37 | [maml_trainer] epoch #92 | Saving snapshot... +2025-04-02 23:19:58 | [maml_trainer] epoch #92 | Saved +2025-04-02 23:19:58 | [maml_trainer] epoch #92 | Time 35076.97 s +2025-04-02 23:19:58 | [maml_trainer] epoch #92 | EpochTime 376.05 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.1116 +Average/AverageReturn -167.274 +Average/Iteration 92 +Average/MaxReturn -137.116 +Average/MinReturn -244.417 +Average/NumEpisodes 80 +Average/StdReturn 21.4729 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6496 +GaussianMLPPolicy/KLAfter 0.00234504 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.51831e-06 +GaussianMLPPolicy/LossBefore -8.49366e-10 +GaussianMLPPolicy/dLoss 3.51746e-06 +Iteration 92 +MetaTest/Average/AverageDiscountedReturn -162.43 +MetaTest/Average/AverageReturn -162.43 +MetaTest/Average/Iteration 92 +MetaTest/Average/MaxReturn -147.8 +MetaTest/Average/MinReturn -192.621 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.4866 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.43 +MetaTest/__unnamed_task__/AverageReturn -162.43 +MetaTest/__unnamed_task__/Iteration 92 +MetaTest/__unnamed_task__/MaxReturn -147.8 +MetaTest/__unnamed_task__/MinReturn -192.621 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.4866 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 2.976e+06 +__unnamed_task__/AverageDiscountedReturn -71.1116 +__unnamed_task__/AverageReturn -167.274 +__unnamed_task__/Iteration 92 +__unnamed_task__/MaxReturn -137.116 +__unnamed_task__/MinReturn -244.417 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.4729 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:24:42 | [maml_trainer] epoch #93 | Sampling for adapation and meta-testing... +2025-04-02 23:25:54 | [maml_trainer] epoch #93 | Finished meta-testing... +2025-04-02 23:25:54 | [maml_trainer] epoch #93 | Saving snapshot... +2025-04-02 23:26:16 | [maml_trainer] epoch #93 | Saved +2025-04-02 23:26:16 | [maml_trainer] epoch #93 | Time 35455.05 s +2025-04-02 23:26:16 | [maml_trainer] epoch #93 | EpochTime 378.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4897 +Average/AverageReturn -168.285 +Average/Iteration 93 +Average/MaxReturn -140.813 +Average/MinReturn -236.068 +Average/NumEpisodes 80 +Average/StdReturn 19.0464 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6499 +GaussianMLPPolicy/KLAfter 0.00257908 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.91857e-06 +GaussianMLPPolicy/LossBefore -5.39422e-09 +GaussianMLPPolicy/dLoss 2.91318e-06 +Iteration 93 +MetaTest/Average/AverageDiscountedReturn -173.905 +MetaTest/Average/AverageReturn -173.905 +MetaTest/Average/Iteration 93 +MetaTest/Average/MaxReturn -145.114 +MetaTest/Average/MinReturn -250.287 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 29.3861 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.905 +MetaTest/__unnamed_task__/AverageReturn -173.905 +MetaTest/__unnamed_task__/Iteration 93 +MetaTest/__unnamed_task__/MaxReturn -145.114 +MetaTest/__unnamed_task__/MinReturn -250.287 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 29.3861 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.008e+06 +__unnamed_task__/AverageDiscountedReturn -71.4897 +__unnamed_task__/AverageReturn -168.285 +__unnamed_task__/Iteration 93 +__unnamed_task__/MaxReturn -140.813 +__unnamed_task__/MinReturn -236.068 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.0464 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:31:10 | [maml_trainer] epoch #94 | Sampling for adapation and meta-testing... +2025-04-02 23:32:22 | [maml_trainer] epoch #94 | Finished meta-testing... +2025-04-02 23:32:22 | [maml_trainer] epoch #94 | Saving snapshot... +2025-04-02 23:32:44 | [maml_trainer] epoch #94 | Saved +2025-04-02 23:32:44 | [maml_trainer] epoch #94 | Time 35842.85 s +2025-04-02 23:32:44 | [maml_trainer] epoch #94 | EpochTime 387.80 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.185 +Average/AverageReturn -170.501 +Average/Iteration 94 +Average/MaxReturn -141.111 +Average/MinReturn -234.767 +Average/NumEpisodes 80 +Average/StdReturn 19.4725 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6506 +GaussianMLPPolicy/KLAfter 0.00244182 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.19153e-05 +GaussianMLPPolicy/LossBefore 3.57628e-10 +GaussianMLPPolicy/dLoss 3.19156e-05 +Iteration 94 +MetaTest/Average/AverageDiscountedReturn -162.82 +MetaTest/Average/AverageReturn -162.82 +MetaTest/Average/Iteration 94 +MetaTest/Average/MaxReturn -146.113 +MetaTest/Average/MinReturn -193.06 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.3619 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.82 +MetaTest/__unnamed_task__/AverageReturn -162.82 +MetaTest/__unnamed_task__/Iteration 94 +MetaTest/__unnamed_task__/MaxReturn -146.113 +MetaTest/__unnamed_task__/MinReturn -193.06 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.3619 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.04e+06 +__unnamed_task__/AverageDiscountedReturn -72.185 +__unnamed_task__/AverageReturn -170.501 +__unnamed_task__/Iteration 94 +__unnamed_task__/MaxReturn -141.111 +__unnamed_task__/MinReturn -234.767 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.4725 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:37:31 | [maml_trainer] epoch #95 | Sampling for adapation and meta-testing... +2025-04-02 23:38:41 | [maml_trainer] epoch #95 | Finished meta-testing... +2025-04-02 23:38:41 | [maml_trainer] epoch #95 | Saving snapshot... +2025-04-02 23:39:00 | [maml_trainer] epoch #95 | Saved +2025-04-02 23:39:00 | [maml_trainer] epoch #95 | Time 36219.09 s +2025-04-02 23:39:00 | [maml_trainer] epoch #95 | EpochTime 376.23 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.8697 +Average/AverageReturn -169.753 +Average/Iteration 95 +Average/MaxReturn -145.457 +Average/MinReturn -248.318 +Average/NumEpisodes 80 +Average/StdReturn 21.9509 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6513 +GaussianMLPPolicy/KLAfter 0.00265194 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.11988e-05 +GaussianMLPPolicy/LossBefore -1.19209e-09 +GaussianMLPPolicy/dLoss -5.12e-05 +Iteration 95 +MetaTest/Average/AverageDiscountedReturn -168.115 +MetaTest/Average/AverageReturn -168.115 +MetaTest/Average/Iteration 95 +MetaTest/Average/MaxReturn -141.446 +MetaTest/Average/MinReturn -244.341 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.1494 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.115 +MetaTest/__unnamed_task__/AverageReturn -168.115 +MetaTest/__unnamed_task__/Iteration 95 +MetaTest/__unnamed_task__/MaxReturn -141.446 +MetaTest/__unnamed_task__/MinReturn -244.341 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.1494 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.072e+06 +__unnamed_task__/AverageDiscountedReturn -71.8697 +__unnamed_task__/AverageReturn -169.753 +__unnamed_task__/Iteration 95 +__unnamed_task__/MaxReturn -145.457 +__unnamed_task__/MinReturn -248.318 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.9509 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:43:36 | [maml_trainer] epoch #96 | Sampling for adapation and meta-testing... +2025-04-02 23:44:46 | [maml_trainer] epoch #96 | Finished meta-testing... +2025-04-02 23:44:46 | [maml_trainer] epoch #96 | Saving snapshot... +2025-04-02 23:45:07 | [maml_trainer] epoch #96 | Saved +2025-04-02 23:45:07 | [maml_trainer] epoch #96 | Time 36585.97 s +2025-04-02 23:45:07 | [maml_trainer] epoch #96 | EpochTime 366.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.0881 +Average/AverageReturn -167.594 +Average/Iteration 96 +Average/MaxReturn -145.163 +Average/MinReturn -226.649 +Average/NumEpisodes 80 +Average/StdReturn 16.8416 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6507 +GaussianMLPPolicy/KLAfter 0.00309159 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.31993e-06 +GaussianMLPPolicy/LossBefore -9.20892e-09 +GaussianMLPPolicy/dLoss 8.31072e-06 +Iteration 96 +MetaTest/Average/AverageDiscountedReturn -172.455 +MetaTest/Average/AverageReturn -172.455 +MetaTest/Average/Iteration 96 +MetaTest/Average/MaxReturn -145.938 +MetaTest/Average/MinReturn -230.585 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.4821 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.455 +MetaTest/__unnamed_task__/AverageReturn -172.455 +MetaTest/__unnamed_task__/Iteration 96 +MetaTest/__unnamed_task__/MaxReturn -145.938 +MetaTest/__unnamed_task__/MinReturn -230.585 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.4821 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.104e+06 +__unnamed_task__/AverageDiscountedReturn -71.0881 +__unnamed_task__/AverageReturn -167.594 +__unnamed_task__/Iteration 96 +__unnamed_task__/MaxReturn -145.163 +__unnamed_task__/MinReturn -226.649 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.8416 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:49:53 | [maml_trainer] epoch #97 | Sampling for adapation and meta-testing... +2025-04-02 23:51:06 | [maml_trainer] epoch #97 | Finished meta-testing... +2025-04-02 23:51:06 | [maml_trainer] epoch #97 | Saving snapshot... +2025-04-02 23:51:33 | [maml_trainer] epoch #97 | Saved +2025-04-02 23:51:33 | [maml_trainer] epoch #97 | Time 36971.83 s +2025-04-02 23:51:33 | [maml_trainer] epoch #97 | EpochTime 385.85 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.2009 +Average/AverageReturn -169.973 +Average/Iteration 97 +Average/MaxReturn -147.524 +Average/MinReturn -277.524 +Average/NumEpisodes 80 +Average/StdReturn 20.2395 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6509 +GaussianMLPPolicy/KLAfter 0.00244373 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.06619e-05 +GaussianMLPPolicy/LossBefore 4.54485e-09 +GaussianMLPPolicy/dLoss -1.06574e-05 +Iteration 97 +MetaTest/Average/AverageDiscountedReturn -169.523 +MetaTest/Average/AverageReturn -169.523 +MetaTest/Average/Iteration 97 +MetaTest/Average/MaxReturn -147.421 +MetaTest/Average/MinReturn -234.126 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.95 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.523 +MetaTest/__unnamed_task__/AverageReturn -169.523 +MetaTest/__unnamed_task__/Iteration 97 +MetaTest/__unnamed_task__/MaxReturn -147.421 +MetaTest/__unnamed_task__/MinReturn -234.126 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.95 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.136e+06 +__unnamed_task__/AverageDiscountedReturn -72.2009 +__unnamed_task__/AverageReturn -169.973 +__unnamed_task__/Iteration 97 +__unnamed_task__/MaxReturn -147.524 +__unnamed_task__/MinReturn -277.524 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.2395 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-02 23:56:29 | [maml_trainer] epoch #98 | Sampling for adapation and meta-testing... +2025-04-02 23:57:42 | [maml_trainer] epoch #98 | Finished meta-testing... +2025-04-02 23:57:42 | [maml_trainer] epoch #98 | Saving snapshot... +2025-04-02 23:58:05 | [maml_trainer] epoch #98 | Saved +2025-04-02 23:58:05 | [maml_trainer] epoch #98 | Time 37363.20 s +2025-04-02 23:58:05 | [maml_trainer] epoch #98 | EpochTime 391.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.2687 +Average/AverageReturn -168.236 +Average/Iteration 98 +Average/MaxReturn -142.109 +Average/MinReturn -234.093 +Average/NumEpisodes 80 +Average/StdReturn 17.9657 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6508 +GaussianMLPPolicy/KLAfter 0.00248001 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.57345e-05 +GaussianMLPPolicy/LossBefore 1.71363e-09 +GaussianMLPPolicy/dLoss 1.57362e-05 +Iteration 98 +MetaTest/Average/AverageDiscountedReturn -171.098 +MetaTest/Average/AverageReturn -171.098 +MetaTest/Average/Iteration 98 +MetaTest/Average/MaxReturn -142.015 +MetaTest/Average/MinReturn -231.19 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.4192 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.098 +MetaTest/__unnamed_task__/AverageReturn -171.098 +MetaTest/__unnamed_task__/Iteration 98 +MetaTest/__unnamed_task__/MaxReturn -142.015 +MetaTest/__unnamed_task__/MinReturn -231.19 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.4192 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.168e+06 +__unnamed_task__/AverageDiscountedReturn -71.2687 +__unnamed_task__/AverageReturn -168.236 +__unnamed_task__/Iteration 98 +__unnamed_task__/MaxReturn -142.109 +__unnamed_task__/MinReturn -234.093 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9657 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:02:55 | [maml_trainer] epoch #99 | Sampling for adapation and meta-testing... +2025-04-03 00:04:09 | [maml_trainer] epoch #99 | Finished meta-testing... +2025-04-03 00:04:09 | [maml_trainer] epoch #99 | Saving snapshot... +2025-04-03 00:04:31 | [maml_trainer] epoch #99 | Saved +2025-04-03 00:04:31 | [maml_trainer] epoch #99 | Time 37749.67 s +2025-04-03 00:04:31 | [maml_trainer] epoch #99 | EpochTime 386.47 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.1276 +Average/AverageReturn -170.102 +Average/Iteration 99 +Average/MaxReturn -142.234 +Average/MinReturn -235.931 +Average/NumEpisodes 80 +Average/StdReturn 18.8219 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6508 +GaussianMLPPolicy/KLAfter 0.00236899 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.99016e-06 +GaussianMLPPolicy/LossBefore 3.8743e-10 +GaussianMLPPolicy/dLoss -2.98977e-06 +Iteration 99 +MetaTest/Average/AverageDiscountedReturn -174.656 +MetaTest/Average/AverageReturn -174.656 +MetaTest/Average/Iteration 99 +MetaTest/Average/MaxReturn -150.32 +MetaTest/Average/MinReturn -247.659 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.0763 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.656 +MetaTest/__unnamed_task__/AverageReturn -174.656 +MetaTest/__unnamed_task__/Iteration 99 +MetaTest/__unnamed_task__/MaxReturn -150.32 +MetaTest/__unnamed_task__/MinReturn -247.659 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.0763 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.2e+06 +__unnamed_task__/AverageDiscountedReturn -72.1276 +__unnamed_task__/AverageReturn -170.102 +__unnamed_task__/Iteration 99 +__unnamed_task__/MaxReturn -142.234 +__unnamed_task__/MinReturn -235.931 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.8219 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:09:19 | [maml_trainer] epoch #100 | Sampling for adapation and meta-testing... +2025-04-03 00:10:27 | [maml_trainer] epoch #100 | Finished meta-testing... +2025-04-03 00:10:27 | [maml_trainer] epoch #100 | Saving snapshot... +2025-04-03 00:10:48 | [maml_trainer] epoch #100 | Saved +2025-04-03 00:10:48 | [maml_trainer] epoch #100 | Time 38127.05 s +2025-04-03 00:10:48 | [maml_trainer] epoch #100 | EpochTime 377.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.0865 +Average/AverageReturn -174.998 +Average/Iteration 100 +Average/MaxReturn -147.995 +Average/MinReturn -249.959 +Average/NumEpisodes 80 +Average/StdReturn 23.2312 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6487 +GaussianMLPPolicy/KLAfter 0.00229703 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.57e-05 +GaussianMLPPolicy/LossBefore -2.25008e-09 +GaussianMLPPolicy/dLoss 1.56977e-05 +Iteration 100 +MetaTest/Average/AverageDiscountedReturn -163.326 +MetaTest/Average/AverageReturn -163.326 +MetaTest/Average/Iteration 100 +MetaTest/Average/MaxReturn -148.082 +MetaTest/Average/MinReturn -188.003 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.3437 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.326 +MetaTest/__unnamed_task__/AverageReturn -163.326 +MetaTest/__unnamed_task__/Iteration 100 +MetaTest/__unnamed_task__/MaxReturn -148.082 +MetaTest/__unnamed_task__/MinReturn -188.003 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.3437 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.232e+06 +__unnamed_task__/AverageDiscountedReturn -74.0865 +__unnamed_task__/AverageReturn -174.998 +__unnamed_task__/Iteration 100 +__unnamed_task__/MaxReturn -147.995 +__unnamed_task__/MinReturn -249.959 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.2312 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:15:36 | [maml_trainer] epoch #101 | Sampling for adapation and meta-testing... +2025-04-03 00:16:49 | [maml_trainer] epoch #101 | Finished meta-testing... +2025-04-03 00:16:49 | [maml_trainer] epoch #101 | Saving snapshot... +2025-04-03 00:17:12 | [maml_trainer] epoch #101 | Saved +2025-04-03 00:17:12 | [maml_trainer] epoch #101 | Time 38510.61 s +2025-04-03 00:17:12 | [maml_trainer] epoch #101 | EpochTime 383.55 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.2083 +Average/AverageReturn -170.183 +Average/Iteration 101 +Average/MaxReturn -147.793 +Average/MinReturn -234.805 +Average/NumEpisodes 80 +Average/StdReturn 19.1678 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6463 +GaussianMLPPolicy/KLAfter 0.00200527 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.0146e-05 +GaussianMLPPolicy/LossBefore -9.68575e-09 +GaussianMLPPolicy/dLoss 2.01364e-05 +Iteration 101 +MetaTest/Average/AverageDiscountedReturn -168.061 +MetaTest/Average/AverageReturn -168.061 +MetaTest/Average/Iteration 101 +MetaTest/Average/MaxReturn -143.843 +MetaTest/Average/MinReturn -218.617 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.8678 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.061 +MetaTest/__unnamed_task__/AverageReturn -168.061 +MetaTest/__unnamed_task__/Iteration 101 +MetaTest/__unnamed_task__/MaxReturn -143.843 +MetaTest/__unnamed_task__/MinReturn -218.617 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.8678 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.264e+06 +__unnamed_task__/AverageDiscountedReturn -72.2083 +__unnamed_task__/AverageReturn -170.183 +__unnamed_task__/Iteration 101 +__unnamed_task__/MaxReturn -147.793 +__unnamed_task__/MinReturn -234.805 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.1678 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:22:05 | [maml_trainer] epoch #102 | Sampling for adapation and meta-testing... +2025-04-03 00:23:16 | [maml_trainer] epoch #102 | Finished meta-testing... +2025-04-03 00:23:16 | [maml_trainer] epoch #102 | Saving snapshot... +2025-04-03 00:23:38 | [maml_trainer] epoch #102 | Saved +2025-04-03 00:23:38 | [maml_trainer] epoch #102 | Time 38896.20 s +2025-04-03 00:23:38 | [maml_trainer] epoch #102 | EpochTime 385.59 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.2412 +Average/AverageReturn -165.07 +Average/Iteration 102 +Average/MaxReturn -141.815 +Average/MinReturn -219.558 +Average/NumEpisodes 80 +Average/StdReturn 17.323 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6442 +GaussianMLPPolicy/KLAfter 0.0015532 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.94057e-05 +GaussianMLPPolicy/LossBefore -4.47035e-09 +GaussianMLPPolicy/dLoss -2.94102e-05 +Iteration 102 +MetaTest/Average/AverageDiscountedReturn -167.821 +MetaTest/Average/AverageReturn -167.821 +MetaTest/Average/Iteration 102 +MetaTest/Average/MaxReturn -145.048 +MetaTest/Average/MinReturn -235.488 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.4283 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.821 +MetaTest/__unnamed_task__/AverageReturn -167.821 +MetaTest/__unnamed_task__/Iteration 102 +MetaTest/__unnamed_task__/MaxReturn -145.048 +MetaTest/__unnamed_task__/MinReturn -235.488 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.4283 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.296e+06 +__unnamed_task__/AverageDiscountedReturn -70.2412 +__unnamed_task__/AverageReturn -165.07 +__unnamed_task__/Iteration 102 +__unnamed_task__/MaxReturn -141.815 +__unnamed_task__/MinReturn -219.558 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.323 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:28:18 | [maml_trainer] epoch #103 | Sampling for adapation and meta-testing... +2025-04-03 00:29:30 | [maml_trainer] epoch #103 | Finished meta-testing... +2025-04-03 00:29:30 | [maml_trainer] epoch #103 | Saving snapshot... +2025-04-03 00:29:51 | [maml_trainer] epoch #103 | Saved +2025-04-03 00:29:51 | [maml_trainer] epoch #103 | Time 39269.69 s +2025-04-03 00:29:51 | [maml_trainer] epoch #103 | EpochTime 373.48 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3992 +Average/AverageReturn -168.253 +Average/Iteration 103 +Average/MaxReturn -142.687 +Average/MinReturn -235.461 +Average/NumEpisodes 80 +Average/StdReturn 19.2065 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6425 +GaussianMLPPolicy/KLAfter 0.00145216 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.54094e-06 +GaussianMLPPolicy/LossBefore -9.17912e-09 +GaussianMLPPolicy/dLoss -1.55012e-06 +Iteration 103 +MetaTest/Average/AverageDiscountedReturn -175.382 +MetaTest/Average/AverageReturn -175.382 +MetaTest/Average/Iteration 103 +MetaTest/Average/MaxReturn -143.837 +MetaTest/Average/MinReturn -238.914 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 27.4743 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.382 +MetaTest/__unnamed_task__/AverageReturn -175.382 +MetaTest/__unnamed_task__/Iteration 103 +MetaTest/__unnamed_task__/MaxReturn -143.837 +MetaTest/__unnamed_task__/MinReturn -238.914 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 27.4743 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.328e+06 +__unnamed_task__/AverageDiscountedReturn -71.3992 +__unnamed_task__/AverageReturn -168.253 +__unnamed_task__/Iteration 103 +__unnamed_task__/MaxReturn -142.687 +__unnamed_task__/MinReturn -235.461 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.2065 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:34:39 | [maml_trainer] epoch #104 | Sampling for adapation and meta-testing... +2025-04-03 00:35:49 | [maml_trainer] epoch #104 | Finished meta-testing... +2025-04-03 00:35:49 | [maml_trainer] epoch #104 | Saving snapshot... +2025-04-03 00:36:10 | [maml_trainer] epoch #104 | Saved +2025-04-03 00:36:10 | [maml_trainer] epoch #104 | Time 39648.63 s +2025-04-03 00:36:10 | [maml_trainer] epoch #104 | EpochTime 378.94 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.1076 +Average/AverageReturn -166.589 +Average/Iteration 104 +Average/MaxReturn -139.96 +Average/MinReturn -248.336 +Average/NumEpisodes 80 +Average/StdReturn 19.0739 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6404 +GaussianMLPPolicy/KLAfter 0.00150799 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.73551e-06 +GaussianMLPPolicy/LossBefore 1.41561e-09 +GaussianMLPPolicy/dLoss -8.73409e-06 +Iteration 104 +MetaTest/Average/AverageDiscountedReturn -164.009 +MetaTest/Average/AverageReturn -164.009 +MetaTest/Average/Iteration 104 +MetaTest/Average/MaxReturn -140.131 +MetaTest/Average/MinReturn -222.961 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.3557 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.009 +MetaTest/__unnamed_task__/AverageReturn -164.009 +MetaTest/__unnamed_task__/Iteration 104 +MetaTest/__unnamed_task__/MaxReturn -140.131 +MetaTest/__unnamed_task__/MinReturn -222.961 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.3557 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.36e+06 +__unnamed_task__/AverageDiscountedReturn -71.1076 +__unnamed_task__/AverageReturn -166.589 +__unnamed_task__/Iteration 104 +__unnamed_task__/MaxReturn -139.96 +__unnamed_task__/MinReturn -248.336 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.0739 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:40:57 | [maml_trainer] epoch #105 | Sampling for adapation and meta-testing... +2025-04-03 00:42:07 | [maml_trainer] epoch #105 | Finished meta-testing... +2025-04-03 00:42:07 | [maml_trainer] epoch #105 | Saving snapshot... +2025-04-03 00:42:29 | [maml_trainer] epoch #105 | Saved +2025-04-03 00:42:29 | [maml_trainer] epoch #105 | Time 40027.92 s +2025-04-03 00:42:29 | [maml_trainer] epoch #105 | EpochTime 379.29 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.8465 +Average/AverageReturn -167.358 +Average/Iteration 105 +Average/MaxReturn -145.225 +Average/MinReturn -230.948 +Average/NumEpisodes 80 +Average/StdReturn 17.3507 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6382 +GaussianMLPPolicy/KLAfter 0.00113225 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.75926e-06 +GaussianMLPPolicy/LossBefore -3.60608e-09 +GaussianMLPPolicy/dLoss -7.76286e-06 +Iteration 105 +MetaTest/Average/AverageDiscountedReturn -159.772 +MetaTest/Average/AverageReturn -159.772 +MetaTest/Average/Iteration 105 +MetaTest/Average/MaxReturn -141.89 +MetaTest/Average/MinReturn -186.638 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.7558 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -159.772 +MetaTest/__unnamed_task__/AverageReturn -159.772 +MetaTest/__unnamed_task__/Iteration 105 +MetaTest/__unnamed_task__/MaxReturn -141.89 +MetaTest/__unnamed_task__/MinReturn -186.638 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.7558 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.392e+06 +__unnamed_task__/AverageDiscountedReturn -70.8465 +__unnamed_task__/AverageReturn -167.358 +__unnamed_task__/Iteration 105 +__unnamed_task__/MaxReturn -145.225 +__unnamed_task__/MinReturn -230.948 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.3507 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:47:09 | [maml_trainer] epoch #106 | Sampling for adapation and meta-testing... +2025-04-03 00:48:21 | [maml_trainer] epoch #106 | Finished meta-testing... +2025-04-03 00:48:21 | [maml_trainer] epoch #106 | Saving snapshot... +2025-04-03 00:48:43 | [maml_trainer] epoch #106 | Saved +2025-04-03 00:48:43 | [maml_trainer] epoch #106 | Time 40401.82 s +2025-04-03 00:48:43 | [maml_trainer] epoch #106 | EpochTime 373.89 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.8528 +Average/AverageReturn -169.211 +Average/Iteration 106 +Average/MaxReturn -142.944 +Average/MinReturn -261.25 +Average/NumEpisodes 80 +Average/StdReturn 24.3109 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6358 +GaussianMLPPolicy/KLAfter 0.00130113 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.77823e-06 +GaussianMLPPolicy/LossBefore -7.19726e-09 +GaussianMLPPolicy/dLoss -1.78543e-06 +Iteration 106 +MetaTest/Average/AverageDiscountedReturn -171.577 +MetaTest/Average/AverageReturn -171.577 +MetaTest/Average/Iteration 106 +MetaTest/Average/MaxReturn -148.075 +MetaTest/Average/MinReturn -225.015 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.0019 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.577 +MetaTest/__unnamed_task__/AverageReturn -171.577 +MetaTest/__unnamed_task__/Iteration 106 +MetaTest/__unnamed_task__/MaxReturn -148.075 +MetaTest/__unnamed_task__/MinReturn -225.015 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.0019 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.424e+06 +__unnamed_task__/AverageDiscountedReturn -71.8528 +__unnamed_task__/AverageReturn -169.211 +__unnamed_task__/Iteration 106 +__unnamed_task__/MaxReturn -142.944 +__unnamed_task__/MinReturn -261.25 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.3109 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:53:32 | [maml_trainer] epoch #107 | Sampling for adapation and meta-testing... +2025-04-03 00:54:44 | [maml_trainer] epoch #107 | Finished meta-testing... +2025-04-03 00:54:44 | [maml_trainer] epoch #107 | Saving snapshot... +2025-04-03 00:55:06 | [maml_trainer] epoch #107 | Saved +2025-04-03 00:55:06 | [maml_trainer] epoch #107 | Time 40784.33 s +2025-04-03 00:55:06 | [maml_trainer] epoch #107 | EpochTime 382.51 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5335 +Average/AverageReturn -170.717 +Average/Iteration 107 +Average/MaxReturn -146.034 +Average/MinReturn -236.203 +Average/NumEpisodes 80 +Average/StdReturn 19.5893 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6337 +GaussianMLPPolicy/KLAfter 0.00153322 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.23494e-06 +GaussianMLPPolicy/LossBefore -3.75509e-09 +GaussianMLPPolicy/dLoss -2.23869e-06 +Iteration 107 +MetaTest/Average/AverageDiscountedReturn -169.858 +MetaTest/Average/AverageReturn -169.858 +MetaTest/Average/Iteration 107 +MetaTest/Average/MaxReturn -140.158 +MetaTest/Average/MinReturn -223.841 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.3668 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.858 +MetaTest/__unnamed_task__/AverageReturn -169.858 +MetaTest/__unnamed_task__/Iteration 107 +MetaTest/__unnamed_task__/MaxReturn -140.158 +MetaTest/__unnamed_task__/MinReturn -223.841 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.3668 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.456e+06 +__unnamed_task__/AverageDiscountedReturn -72.5335 +__unnamed_task__/AverageReturn -170.717 +__unnamed_task__/Iteration 107 +__unnamed_task__/MaxReturn -146.034 +__unnamed_task__/MinReturn -236.203 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.5893 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 00:59:59 | [maml_trainer] epoch #108 | Sampling for adapation and meta-testing... +2025-04-03 01:01:13 | [maml_trainer] epoch #108 | Finished meta-testing... +2025-04-03 01:01:13 | [maml_trainer] epoch #108 | Saving snapshot... +2025-04-03 01:01:35 | [maml_trainer] epoch #108 | Saved +2025-04-03 01:01:35 | [maml_trainer] epoch #108 | Time 41173.48 s +2025-04-03 01:01:35 | [maml_trainer] epoch #108 | EpochTime 389.15 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.9958 +Average/AverageReturn -168.108 +Average/Iteration 108 +Average/MaxReturn -142.698 +Average/MinReturn -258.4 +Average/NumEpisodes 80 +Average/StdReturn 24.916 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6319 +GaussianMLPPolicy/KLAfter 0.00242614 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.43762e-05 +GaussianMLPPolicy/LossBefore 1.72853e-09 +GaussianMLPPolicy/dLoss -2.43745e-05 +Iteration 108 +MetaTest/Average/AverageDiscountedReturn -182.235 +MetaTest/Average/AverageReturn -182.235 +MetaTest/Average/Iteration 108 +MetaTest/Average/MaxReturn -144.248 +MetaTest/Average/MinReturn -238.773 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.8491 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -182.235 +MetaTest/__unnamed_task__/AverageReturn -182.235 +MetaTest/__unnamed_task__/Iteration 108 +MetaTest/__unnamed_task__/MaxReturn -144.248 +MetaTest/__unnamed_task__/MinReturn -238.773 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.8491 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.488e+06 +__unnamed_task__/AverageDiscountedReturn -71.9958 +__unnamed_task__/AverageReturn -168.108 +__unnamed_task__/Iteration 108 +__unnamed_task__/MaxReturn -142.698 +__unnamed_task__/MinReturn -258.4 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.916 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:06:22 | [maml_trainer] epoch #109 | Sampling for adapation and meta-testing... +2025-04-03 01:07:34 | [maml_trainer] epoch #109 | Finished meta-testing... +2025-04-03 01:07:34 | [maml_trainer] epoch #109 | Saving snapshot... +2025-04-03 01:07:56 | [maml_trainer] epoch #109 | Saved +2025-04-03 01:07:56 | [maml_trainer] epoch #109 | Time 41555.07 s +2025-04-03 01:07:56 | [maml_trainer] epoch #109 | EpochTime 381.58 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.8059 +Average/AverageReturn -163.682 +Average/Iteration 109 +Average/MaxReturn -142.265 +Average/MinReturn -246.01 +Average/NumEpisodes 80 +Average/StdReturn 16.9768 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6322 +GaussianMLPPolicy/KLAfter 0.00245014 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.61102e-05 +GaussianMLPPolicy/LossBefore -1.19209e-09 +GaussianMLPPolicy/dLoss -1.61114e-05 +Iteration 109 +MetaTest/Average/AverageDiscountedReturn -175.7 +MetaTest/Average/AverageReturn -175.7 +MetaTest/Average/Iteration 109 +MetaTest/Average/MaxReturn -153.5 +MetaTest/Average/MinReturn -241.719 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.7084 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.7 +MetaTest/__unnamed_task__/AverageReturn -175.7 +MetaTest/__unnamed_task__/Iteration 109 +MetaTest/__unnamed_task__/MaxReturn -153.5 +MetaTest/__unnamed_task__/MinReturn -241.719 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.7084 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.52e+06 +__unnamed_task__/AverageDiscountedReturn -69.8059 +__unnamed_task__/AverageReturn -163.682 +__unnamed_task__/Iteration 109 +__unnamed_task__/MaxReturn -142.265 +__unnamed_task__/MinReturn -246.01 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.9768 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:12:48 | [maml_trainer] epoch #110 | Sampling for adapation and meta-testing... +2025-04-03 01:13:59 | [maml_trainer] epoch #110 | Finished meta-testing... +2025-04-03 01:13:59 | [maml_trainer] epoch #110 | Saving snapshot... +2025-04-03 01:14:22 | [maml_trainer] epoch #110 | Saved +2025-04-03 01:14:22 | [maml_trainer] epoch #110 | Time 41940.52 s +2025-04-03 01:14:22 | [maml_trainer] epoch #110 | EpochTime 385.45 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.7444 +Average/AverageReturn -170.388 +Average/Iteration 110 +Average/MaxReturn -138.113 +Average/MinReturn -266.882 +Average/NumEpisodes 80 +Average/StdReturn 23.8321 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6342 +GaussianMLPPolicy/KLAfter 0.00286785 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.51289e-05 +GaussianMLPPolicy/LossBefore 1.34408e-08 +GaussianMLPPolicy/dLoss 1.51423e-05 +Iteration 110 +MetaTest/Average/AverageDiscountedReturn -176.806 +MetaTest/Average/AverageReturn -176.806 +MetaTest/Average/Iteration 110 +MetaTest/Average/MaxReturn -147.465 +MetaTest/Average/MinReturn -226.103 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.6506 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.806 +MetaTest/__unnamed_task__/AverageReturn -176.806 +MetaTest/__unnamed_task__/Iteration 110 +MetaTest/__unnamed_task__/MaxReturn -147.465 +MetaTest/__unnamed_task__/MinReturn -226.103 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.6506 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.552e+06 +__unnamed_task__/AverageDiscountedReturn -72.7444 +__unnamed_task__/AverageReturn -170.388 +__unnamed_task__/Iteration 110 +__unnamed_task__/MaxReturn -138.113 +__unnamed_task__/MinReturn -266.882 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.8321 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:19:11 | [maml_trainer] epoch #111 | Sampling for adapation and meta-testing... +2025-04-03 01:20:24 | [maml_trainer] epoch #111 | Finished meta-testing... +2025-04-03 01:20:24 | [maml_trainer] epoch #111 | Saving snapshot... +2025-04-03 01:20:46 | [maml_trainer] epoch #111 | Saved +2025-04-03 01:20:46 | [maml_trainer] epoch #111 | Time 42324.31 s +2025-04-03 01:20:46 | [maml_trainer] epoch #111 | EpochTime 383.78 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.5211 +Average/AverageReturn -173.409 +Average/Iteration 111 +Average/MaxReturn -142.011 +Average/MinReturn -272.77 +Average/NumEpisodes 80 +Average/StdReturn 26.1544 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6371 +GaussianMLPPolicy/KLAfter 0.00318482 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.08324e-05 +GaussianMLPPolicy/LossBefore 1.72853e-09 +GaussianMLPPolicy/dLoss -2.08306e-05 +Iteration 111 +MetaTest/Average/AverageDiscountedReturn -175.265 +MetaTest/Average/AverageReturn -175.265 +MetaTest/Average/Iteration 111 +MetaTest/Average/MaxReturn -146.971 +MetaTest/Average/MinReturn -249.076 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 30.9382 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.265 +MetaTest/__unnamed_task__/AverageReturn -175.265 +MetaTest/__unnamed_task__/Iteration 111 +MetaTest/__unnamed_task__/MaxReturn -146.971 +MetaTest/__unnamed_task__/MinReturn -249.076 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 30.9382 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.584e+06 +__unnamed_task__/AverageDiscountedReturn -74.5211 +__unnamed_task__/AverageReturn -173.409 +__unnamed_task__/Iteration 111 +__unnamed_task__/MaxReturn -142.011 +__unnamed_task__/MinReturn -272.77 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.1544 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:25:34 | [maml_trainer] epoch #112 | Sampling for adapation and meta-testing... +2025-04-03 01:26:47 | [maml_trainer] epoch #112 | Finished meta-testing... +2025-04-03 01:26:47 | [maml_trainer] epoch #112 | Saving snapshot... +2025-04-03 01:27:08 | [maml_trainer] epoch #112 | Saved +2025-04-03 01:27:08 | [maml_trainer] epoch #112 | Time 42706.86 s +2025-04-03 01:27:08 | [maml_trainer] epoch #112 | EpochTime 382.55 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.5407 +Average/AverageReturn -174.086 +Average/Iteration 112 +Average/MaxReturn -144.832 +Average/MinReturn -248.694 +Average/NumEpisodes 80 +Average/StdReturn 23.6731 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.64 +GaussianMLPPolicy/KLAfter 0.00319607 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.56347e-05 +GaussianMLPPolicy/LossBefore -5.0962e-09 +GaussianMLPPolicy/dLoss 6.56296e-05 +Iteration 112 +MetaTest/Average/AverageDiscountedReturn -166.072 +MetaTest/Average/AverageReturn -166.072 +MetaTest/Average/Iteration 112 +MetaTest/Average/MaxReturn -143.88 +MetaTest/Average/MinReturn -201.823 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.4988 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.072 +MetaTest/__unnamed_task__/AverageReturn -166.072 +MetaTest/__unnamed_task__/Iteration 112 +MetaTest/__unnamed_task__/MaxReturn -143.88 +MetaTest/__unnamed_task__/MinReturn -201.823 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.4988 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.616e+06 +__unnamed_task__/AverageDiscountedReturn -73.5407 +__unnamed_task__/AverageReturn -174.086 +__unnamed_task__/Iteration 112 +__unnamed_task__/MaxReturn -144.832 +__unnamed_task__/MinReturn -248.694 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.6731 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:31:56 | [maml_trainer] epoch #113 | Sampling for adapation and meta-testing... +2025-04-03 01:33:08 | [maml_trainer] epoch #113 | Finished meta-testing... +2025-04-03 01:33:08 | [maml_trainer] epoch #113 | Saving snapshot... +2025-04-03 01:33:31 | [maml_trainer] epoch #113 | Saved +2025-04-03 01:33:31 | [maml_trainer] epoch #113 | Time 43089.22 s +2025-04-03 01:33:31 | [maml_trainer] epoch #113 | EpochTime 382.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.7315 +Average/AverageReturn -171.407 +Average/Iteration 113 +Average/MaxReturn -136.631 +Average/MinReturn -242.775 +Average/NumEpisodes 80 +Average/StdReturn 23.1529 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6413 +GaussianMLPPolicy/KLAfter 0.0019425 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.16485e-05 +GaussianMLPPolicy/LossBefore 3.8743e-10 +GaussianMLPPolicy/dLoss 3.16489e-05 +Iteration 113 +MetaTest/Average/AverageDiscountedReturn -170.395 +MetaTest/Average/AverageReturn -170.395 +MetaTest/Average/Iteration 113 +MetaTest/Average/MaxReturn -145.998 +MetaTest/Average/MinReturn -212.71 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.2261 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.395 +MetaTest/__unnamed_task__/AverageReturn -170.395 +MetaTest/__unnamed_task__/Iteration 113 +MetaTest/__unnamed_task__/MaxReturn -145.998 +MetaTest/__unnamed_task__/MinReturn -212.71 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.2261 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.648e+06 +__unnamed_task__/AverageDiscountedReturn -72.7315 +__unnamed_task__/AverageReturn -171.407 +__unnamed_task__/Iteration 113 +__unnamed_task__/MaxReturn -136.631 +__unnamed_task__/MinReturn -242.775 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.1529 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:38:22 | [maml_trainer] epoch #114 | Sampling for adapation and meta-testing... +2025-04-03 01:39:34 | [maml_trainer] epoch #114 | Finished meta-testing... +2025-04-03 01:39:34 | [maml_trainer] epoch #114 | Saving snapshot... +2025-04-03 01:39:57 | [maml_trainer] epoch #114 | Saved +2025-04-03 01:39:57 | [maml_trainer] epoch #114 | Time 43475.93 s +2025-04-03 01:39:57 | [maml_trainer] epoch #114 | EpochTime 386.70 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5111 +Average/AverageReturn -168.484 +Average/Iteration 114 +Average/MaxReturn -140.654 +Average/MinReturn -238.254 +Average/NumEpisodes 80 +Average/StdReturn 18.0328 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6425 +GaussianMLPPolicy/KLAfter 0.00215491 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.9797e-05 +GaussianMLPPolicy/LossBefore 1.2219e-09 +GaussianMLPPolicy/dLoss 2.97982e-05 +Iteration 114 +MetaTest/Average/AverageDiscountedReturn -172.748 +MetaTest/Average/AverageReturn -172.748 +MetaTest/Average/Iteration 114 +MetaTest/Average/MaxReturn -145.624 +MetaTest/Average/MinReturn -232.883 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.789 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.748 +MetaTest/__unnamed_task__/AverageReturn -172.748 +MetaTest/__unnamed_task__/Iteration 114 +MetaTest/__unnamed_task__/MaxReturn -145.624 +MetaTest/__unnamed_task__/MinReturn -232.883 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.789 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.68e+06 +__unnamed_task__/AverageDiscountedReturn -71.5111 +__unnamed_task__/AverageReturn -168.484 +__unnamed_task__/Iteration 114 +__unnamed_task__/MaxReturn -140.654 +__unnamed_task__/MinReturn -238.254 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.0328 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:44:48 | [maml_trainer] epoch #115 | Sampling for adapation and meta-testing... +2025-04-03 01:46:00 | [maml_trainer] epoch #115 | Finished meta-testing... +2025-04-03 01:46:00 | [maml_trainer] epoch #115 | Saving snapshot... +2025-04-03 01:46:22 | [maml_trainer] epoch #115 | Saved +2025-04-03 01:46:22 | [maml_trainer] epoch #115 | Time 43861.11 s +2025-04-03 01:46:22 | [maml_trainer] epoch #115 | EpochTime 385.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.4117 +Average/AverageReturn -173.974 +Average/Iteration 115 +Average/MaxReturn -150.022 +Average/MinReturn -233.789 +Average/NumEpisodes 80 +Average/StdReturn 19.2364 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6435 +GaussianMLPPolicy/KLAfter 0.00269606 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.64892e-05 +GaussianMLPPolicy/LossBefore 5.94556e-09 +GaussianMLPPolicy/dLoss -1.64833e-05 +Iteration 115 +MetaTest/Average/AverageDiscountedReturn -172.705 +MetaTest/Average/AverageReturn -172.705 +MetaTest/Average/Iteration 115 +MetaTest/Average/MaxReturn -148.741 +MetaTest/Average/MinReturn -219.338 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.4251 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.705 +MetaTest/__unnamed_task__/AverageReturn -172.705 +MetaTest/__unnamed_task__/Iteration 115 +MetaTest/__unnamed_task__/MaxReturn -148.741 +MetaTest/__unnamed_task__/MinReturn -219.338 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.4251 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.712e+06 +__unnamed_task__/AverageDiscountedReturn -73.4117 +__unnamed_task__/AverageReturn -173.974 +__unnamed_task__/Iteration 115 +__unnamed_task__/MaxReturn -150.022 +__unnamed_task__/MinReturn -233.789 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.2364 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:51:16 | [maml_trainer] epoch #116 | Sampling for adapation and meta-testing... +2025-04-03 01:52:27 | [maml_trainer] epoch #116 | Finished meta-testing... +2025-04-03 01:52:27 | [maml_trainer] epoch #116 | Saving snapshot... +2025-04-03 01:52:49 | [maml_trainer] epoch #116 | Saved +2025-04-03 01:52:49 | [maml_trainer] epoch #116 | Time 44247.50 s +2025-04-03 01:52:49 | [maml_trainer] epoch #116 | EpochTime 386.40 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.0223 +Average/AverageReturn -167.194 +Average/Iteration 116 +Average/MaxReturn -145.919 +Average/MinReturn -239.882 +Average/NumEpisodes 80 +Average/StdReturn 17.5694 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6458 +GaussianMLPPolicy/KLAfter 0.00197041 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.94056e-05 +GaussianMLPPolicy/LossBefore 5.37932e-09 +GaussianMLPPolicy/dLoss 2.9411e-05 +Iteration 116 +MetaTest/Average/AverageDiscountedReturn -168.665 +MetaTest/Average/AverageReturn -168.665 +MetaTest/Average/Iteration 116 +MetaTest/Average/MaxReturn -150.615 +MetaTest/Average/MinReturn -202.493 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.8235 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.665 +MetaTest/__unnamed_task__/AverageReturn -168.665 +MetaTest/__unnamed_task__/Iteration 116 +MetaTest/__unnamed_task__/MaxReturn -150.615 +MetaTest/__unnamed_task__/MinReturn -202.493 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.8235 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.744e+06 +__unnamed_task__/AverageDiscountedReturn -71.0223 +__unnamed_task__/AverageReturn -167.194 +__unnamed_task__/Iteration 116 +__unnamed_task__/MaxReturn -145.919 +__unnamed_task__/MinReturn -239.882 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.5694 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 01:57:37 | [maml_trainer] epoch #117 | Sampling for adapation and meta-testing... +2025-04-03 01:58:49 | [maml_trainer] epoch #117 | Finished meta-testing... +2025-04-03 01:58:49 | [maml_trainer] epoch #117 | Saving snapshot... +2025-04-03 01:59:12 | [maml_trainer] epoch #117 | Saved +2025-04-03 01:59:12 | [maml_trainer] epoch #117 | Time 44630.13 s +2025-04-03 01:59:12 | [maml_trainer] epoch #117 | EpochTime 382.62 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5691 +Average/AverageReturn -171.201 +Average/Iteration 117 +Average/MaxReturn -145.869 +Average/MinReturn -236.284 +Average/NumEpisodes 80 +Average/StdReturn 20.0076 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6479 +GaussianMLPPolicy/KLAfter 0.00115833 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.19964e-07 +GaussianMLPPolicy/LossBefore -2.33948e-09 +GaussianMLPPolicy/dLoss 7.17625e-07 +Iteration 117 +MetaTest/Average/AverageDiscountedReturn -174.58 +MetaTest/Average/AverageReturn -174.58 +MetaTest/Average/Iteration 117 +MetaTest/Average/MaxReturn -148.172 +MetaTest/Average/MinReturn -233.198 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.254 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.58 +MetaTest/__unnamed_task__/AverageReturn -174.58 +MetaTest/__unnamed_task__/Iteration 117 +MetaTest/__unnamed_task__/MaxReturn -148.172 +MetaTest/__unnamed_task__/MinReturn -233.198 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.254 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.776e+06 +__unnamed_task__/AverageDiscountedReturn -72.5691 +__unnamed_task__/AverageReturn -171.201 +__unnamed_task__/Iteration 117 +__unnamed_task__/MaxReturn -145.869 +__unnamed_task__/MinReturn -236.284 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.0076 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:04:02 | [maml_trainer] epoch #118 | Sampling for adapation and meta-testing... +2025-04-03 02:05:14 | [maml_trainer] epoch #118 | Finished meta-testing... +2025-04-03 02:05:14 | [maml_trainer] epoch #118 | Saving snapshot... +2025-04-03 02:05:37 | [maml_trainer] epoch #118 | Saved +2025-04-03 02:05:37 | [maml_trainer] epoch #118 | Time 45015.37 s +2025-04-03 02:05:37 | [maml_trainer] epoch #118 | EpochTime 385.24 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.0422 +Average/AverageReturn -171.723 +Average/Iteration 118 +Average/MaxReturn -141.811 +Average/MinReturn -257.578 +Average/NumEpisodes 80 +Average/StdReturn 23.4244 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6498 +GaussianMLPPolicy/KLAfter 0.00201455 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.93225e-05 +GaussianMLPPolicy/LossBefore -5.76675e-09 +GaussianMLPPolicy/dLoss 3.93168e-05 +Iteration 118 +MetaTest/Average/AverageDiscountedReturn -171.852 +MetaTest/Average/AverageReturn -171.852 +MetaTest/Average/Iteration 118 +MetaTest/Average/MaxReturn -146.392 +MetaTest/Average/MinReturn -243.817 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.9275 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.852 +MetaTest/__unnamed_task__/AverageReturn -171.852 +MetaTest/__unnamed_task__/Iteration 118 +MetaTest/__unnamed_task__/MaxReturn -146.392 +MetaTest/__unnamed_task__/MinReturn -243.817 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.9275 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.808e+06 +__unnamed_task__/AverageDiscountedReturn -73.0422 +__unnamed_task__/AverageReturn -171.723 +__unnamed_task__/Iteration 118 +__unnamed_task__/MaxReturn -141.811 +__unnamed_task__/MinReturn -257.578 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.4244 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:10:28 | [maml_trainer] epoch #119 | Sampling for adapation and meta-testing... +2025-04-03 02:11:40 | [maml_trainer] epoch #119 | Finished meta-testing... +2025-04-03 02:11:40 | [maml_trainer] epoch #119 | Saving snapshot... +2025-04-03 02:12:03 | [maml_trainer] epoch #119 | Saved +2025-04-03 02:12:03 | [maml_trainer] epoch #119 | Time 45401.91 s +2025-04-03 02:12:03 | [maml_trainer] epoch #119 | EpochTime 386.53 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4257 +Average/AverageReturn -168.407 +Average/Iteration 119 +Average/MaxReturn -126.108 +Average/MinReturn -241.241 +Average/NumEpisodes 80 +Average/StdReturn 18.5482 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6509 +GaussianMLPPolicy/KLAfter 0.00162193 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.17579e-06 +GaussianMLPPolicy/LossBefore -1.69873e-09 +GaussianMLPPolicy/dLoss -3.17749e-06 +Iteration 119 +MetaTest/Average/AverageDiscountedReturn -169.13 +MetaTest/Average/AverageReturn -169.13 +MetaTest/Average/Iteration 119 +MetaTest/Average/MaxReturn -143.291 +MetaTest/Average/MinReturn -219.318 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.7314 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.13 +MetaTest/__unnamed_task__/AverageReturn -169.13 +MetaTest/__unnamed_task__/Iteration 119 +MetaTest/__unnamed_task__/MaxReturn -143.291 +MetaTest/__unnamed_task__/MinReturn -219.318 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.7314 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.84e+06 +__unnamed_task__/AverageDiscountedReturn -71.4257 +__unnamed_task__/AverageReturn -168.407 +__unnamed_task__/Iteration 119 +__unnamed_task__/MaxReturn -126.108 +__unnamed_task__/MinReturn -241.241 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.5482 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:16:52 | [maml_trainer] epoch #120 | Sampling for adapation and meta-testing... +2025-04-03 02:18:06 | [maml_trainer] epoch #120 | Finished meta-testing... +2025-04-03 02:18:06 | [maml_trainer] epoch #120 | Saving snapshot... +2025-04-03 02:18:27 | [maml_trainer] epoch #120 | Saved +2025-04-03 02:18:27 | [maml_trainer] epoch #120 | Time 45785.97 s +2025-04-03 02:18:27 | [maml_trainer] epoch #120 | EpochTime 384.06 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5246 +Average/AverageReturn -168.862 +Average/Iteration 120 +Average/MaxReturn -146.435 +Average/MinReturn -241.424 +Average/NumEpisodes 80 +Average/StdReturn 16.1487 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6493 +GaussianMLPPolicy/KLAfter 0.00224598 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.64501e-06 +GaussianMLPPolicy/LossBefore -3.76999e-09 +GaussianMLPPolicy/dLoss -7.64878e-06 +Iteration 120 +MetaTest/Average/AverageDiscountedReturn -177.785 +MetaTest/Average/AverageReturn -177.785 +MetaTest/Average/Iteration 120 +MetaTest/Average/MaxReturn -147.142 +MetaTest/Average/MinReturn -257.896 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 27.7551 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.785 +MetaTest/__unnamed_task__/AverageReturn -177.785 +MetaTest/__unnamed_task__/Iteration 120 +MetaTest/__unnamed_task__/MaxReturn -147.142 +MetaTest/__unnamed_task__/MinReturn -257.896 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 27.7551 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.872e+06 +__unnamed_task__/AverageDiscountedReturn -71.5246 +__unnamed_task__/AverageReturn -168.862 +__unnamed_task__/Iteration 120 +__unnamed_task__/MaxReturn -146.435 +__unnamed_task__/MinReturn -241.424 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.1487 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:23:15 | [maml_trainer] epoch #121 | Sampling for adapation and meta-testing... +2025-04-03 02:24:29 | [maml_trainer] epoch #121 | Finished meta-testing... +2025-04-03 02:24:29 | [maml_trainer] epoch #121 | Saving snapshot... +2025-04-03 02:24:51 | [maml_trainer] epoch #121 | Saved +2025-04-03 02:24:51 | [maml_trainer] epoch #121 | Time 46169.84 s +2025-04-03 02:24:51 | [maml_trainer] epoch #121 | EpochTime 383.86 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.4156 +Average/AverageReturn -171.351 +Average/Iteration 121 +Average/MaxReturn -147.337 +Average/MinReturn -241.864 +Average/NumEpisodes 80 +Average/StdReturn 20.6355 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6489 +GaussianMLPPolicy/KLAfter 0.00195299 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.21048e-05 +GaussianMLPPolicy/LossBefore -2.16067e-09 +GaussianMLPPolicy/dLoss -3.2107e-05 +Iteration 121 +MetaTest/Average/AverageDiscountedReturn -180.287 +MetaTest/Average/AverageReturn -180.287 +MetaTest/Average/Iteration 121 +MetaTest/Average/MaxReturn -154.41 +MetaTest/Average/MinReturn -237.215 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.856 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -180.287 +MetaTest/__unnamed_task__/AverageReturn -180.287 +MetaTest/__unnamed_task__/Iteration 121 +MetaTest/__unnamed_task__/MaxReturn -154.41 +MetaTest/__unnamed_task__/MinReturn -237.215 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.856 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.904e+06 +__unnamed_task__/AverageDiscountedReturn -72.4156 +__unnamed_task__/AverageReturn -171.351 +__unnamed_task__/Iteration 121 +__unnamed_task__/MaxReturn -147.337 +__unnamed_task__/MinReturn -241.864 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.6355 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:29:39 | [maml_trainer] epoch #122 | Sampling for adapation and meta-testing... +2025-04-03 02:30:52 | [maml_trainer] epoch #122 | Finished meta-testing... +2025-04-03 02:30:52 | [maml_trainer] epoch #122 | Saving snapshot... +2025-04-03 02:31:15 | [maml_trainer] epoch #122 | Saved +2025-04-03 02:31:15 | [maml_trainer] epoch #122 | Time 46553.79 s +2025-04-03 02:31:15 | [maml_trainer] epoch #122 | EpochTime 383.95 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.3625 +Average/AverageReturn -176.011 +Average/Iteration 122 +Average/MaxReturn -149.088 +Average/MinReturn -241.443 +Average/NumEpisodes 80 +Average/StdReturn 22.321 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6489 +GaussianMLPPolicy/KLAfter 0.0020698 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.53075e-06 +GaussianMLPPolicy/LossBefore 1.38581e-09 +GaussianMLPPolicy/dLoss -7.52936e-06 +Iteration 122 +MetaTest/Average/AverageDiscountedReturn -166.518 +MetaTest/Average/AverageReturn -166.518 +MetaTest/Average/Iteration 122 +MetaTest/Average/MaxReturn -154.12 +MetaTest/Average/MinReturn -181.648 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.49088 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.518 +MetaTest/__unnamed_task__/AverageReturn -166.518 +MetaTest/__unnamed_task__/Iteration 122 +MetaTest/__unnamed_task__/MaxReturn -154.12 +MetaTest/__unnamed_task__/MinReturn -181.648 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.49088 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.936e+06 +__unnamed_task__/AverageDiscountedReturn -74.3625 +__unnamed_task__/AverageReturn -176.011 +__unnamed_task__/Iteration 122 +__unnamed_task__/MaxReturn -149.088 +__unnamed_task__/MinReturn -241.443 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.321 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:36:06 | [maml_trainer] epoch #123 | Sampling for adapation and meta-testing... +2025-04-03 02:37:19 | [maml_trainer] epoch #123 | Finished meta-testing... +2025-04-03 02:37:19 | [maml_trainer] epoch #123 | Saving snapshot... +2025-04-03 02:37:42 | [maml_trainer] epoch #123 | Saved +2025-04-03 02:37:42 | [maml_trainer] epoch #123 | Time 46940.88 s +2025-04-03 02:37:42 | [maml_trainer] epoch #123 | EpochTime 387.09 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5661 +Average/AverageReturn -172.501 +Average/Iteration 123 +Average/MaxReturn -150.433 +Average/MinReturn -229.364 +Average/NumEpisodes 80 +Average/StdReturn 14.3932 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6465 +GaussianMLPPolicy/KLAfter 0.00208592 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.14895e-05 +GaussianMLPPolicy/LossBefore -2.5928e-09 +GaussianMLPPolicy/dLoss -3.14921e-05 +Iteration 123 +MetaTest/Average/AverageDiscountedReturn -175.993 +MetaTest/Average/AverageReturn -175.993 +MetaTest/Average/Iteration 123 +MetaTest/Average/MaxReturn -155.263 +MetaTest/Average/MinReturn -246.359 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.5869 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.993 +MetaTest/__unnamed_task__/AverageReturn -175.993 +MetaTest/__unnamed_task__/Iteration 123 +MetaTest/__unnamed_task__/MaxReturn -155.263 +MetaTest/__unnamed_task__/MinReturn -246.359 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.5869 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 3.968e+06 +__unnamed_task__/AverageDiscountedReturn -72.5661 +__unnamed_task__/AverageReturn -172.501 +__unnamed_task__/Iteration 123 +__unnamed_task__/MaxReturn -150.433 +__unnamed_task__/MinReturn -229.364 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.3932 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:42:36 | [maml_trainer] epoch #124 | Sampling for adapation and meta-testing... +2025-04-03 02:43:50 | [maml_trainer] epoch #124 | Finished meta-testing... +2025-04-03 02:43:50 | [maml_trainer] epoch #124 | Saving snapshot... +2025-04-03 02:44:12 | [maml_trainer] epoch #124 | Saved +2025-04-03 02:44:12 | [maml_trainer] epoch #124 | Time 47330.61 s +2025-04-03 02:44:12 | [maml_trainer] epoch #124 | EpochTime 389.72 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6923 +Average/AverageReturn -172.233 +Average/Iteration 124 +Average/MaxReturn -145.377 +Average/MinReturn -241.343 +Average/NumEpisodes 80 +Average/StdReturn 13.8331 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6462 +GaussianMLPPolicy/KLAfter 0.00146036 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.94544e-05 +GaussianMLPPolicy/LossBefore -1.49012e-10 +GaussianMLPPolicy/dLoss -1.94545e-05 +Iteration 124 +MetaTest/Average/AverageDiscountedReturn -176.269 +MetaTest/Average/AverageReturn -176.269 +MetaTest/Average/Iteration 124 +MetaTest/Average/MaxReturn -155.138 +MetaTest/Average/MinReturn -231.025 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.3807 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.269 +MetaTest/__unnamed_task__/AverageReturn -176.269 +MetaTest/__unnamed_task__/Iteration 124 +MetaTest/__unnamed_task__/MaxReturn -155.138 +MetaTest/__unnamed_task__/MinReturn -231.025 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.3807 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4e+06 +__unnamed_task__/AverageDiscountedReturn -72.6923 +__unnamed_task__/AverageReturn -172.233 +__unnamed_task__/Iteration 124 +__unnamed_task__/MaxReturn -145.377 +__unnamed_task__/MinReturn -241.343 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8331 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:49:02 | [maml_trainer] epoch #125 | Sampling for adapation and meta-testing... +2025-04-03 02:50:15 | [maml_trainer] epoch #125 | Finished meta-testing... +2025-04-03 02:50:15 | [maml_trainer] epoch #125 | Saving snapshot... +2025-04-03 02:50:37 | [maml_trainer] epoch #125 | Saved +2025-04-03 02:50:37 | [maml_trainer] epoch #125 | Time 47716.00 s +2025-04-03 02:50:37 | [maml_trainer] epoch #125 | EpochTime 385.39 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.8469 +Average/AverageReturn -174.681 +Average/Iteration 125 +Average/MaxReturn -150.479 +Average/MinReturn -252.851 +Average/NumEpisodes 80 +Average/StdReturn 20.1054 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6482 +GaussianMLPPolicy/KLAfter 0.00119682 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.06486e-05 +GaussianMLPPolicy/LossBefore 5.66244e-10 +GaussianMLPPolicy/dLoss -3.0648e-05 +Iteration 125 +MetaTest/Average/AverageDiscountedReturn -175.986 +MetaTest/Average/AverageReturn -175.986 +MetaTest/Average/Iteration 125 +MetaTest/Average/MaxReturn -155.766 +MetaTest/Average/MinReturn -218.523 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.7248 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.986 +MetaTest/__unnamed_task__/AverageReturn -175.986 +MetaTest/__unnamed_task__/Iteration 125 +MetaTest/__unnamed_task__/MaxReturn -155.766 +MetaTest/__unnamed_task__/MinReturn -218.523 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.7248 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.032e+06 +__unnamed_task__/AverageDiscountedReturn -73.8469 +__unnamed_task__/AverageReturn -174.681 +__unnamed_task__/Iteration 125 +__unnamed_task__/MaxReturn -150.479 +__unnamed_task__/MinReturn -252.851 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.1054 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 02:55:29 | [maml_trainer] epoch #126 | Sampling for adapation and meta-testing... +2025-04-03 02:56:43 | [maml_trainer] epoch #126 | Finished meta-testing... +2025-04-03 02:56:43 | [maml_trainer] epoch #126 | Saving snapshot... +2025-04-03 02:57:06 | [maml_trainer] epoch #126 | Saved +2025-04-03 02:57:06 | [maml_trainer] epoch #126 | Time 48104.45 s +2025-04-03 02:57:06 | [maml_trainer] epoch #126 | EpochTime 388.45 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2142 +Average/AverageReturn -174.117 +Average/Iteration 126 +Average/MaxReturn -145.362 +Average/MinReturn -257.896 +Average/NumEpisodes 80 +Average/StdReturn 20.4819 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6502 +GaussianMLPPolicy/KLAfter 0.0011337 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.3964e-06 +GaussianMLPPolicy/LossBefore -9.59635e-09 +GaussianMLPPolicy/dLoss 5.3868e-06 +Iteration 126 +MetaTest/Average/AverageDiscountedReturn -183.76 +MetaTest/Average/AverageReturn -183.76 +MetaTest/Average/Iteration 126 +MetaTest/Average/MaxReturn -155.355 +MetaTest/Average/MinReturn -249.414 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.8531 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -183.76 +MetaTest/__unnamed_task__/AverageReturn -183.76 +MetaTest/__unnamed_task__/Iteration 126 +MetaTest/__unnamed_task__/MaxReturn -155.355 +MetaTest/__unnamed_task__/MinReturn -249.414 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.8531 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.064e+06 +__unnamed_task__/AverageDiscountedReturn -73.2142 +__unnamed_task__/AverageReturn -174.117 +__unnamed_task__/Iteration 126 +__unnamed_task__/MaxReturn -145.362 +__unnamed_task__/MinReturn -257.896 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.4819 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:01:57 | [maml_trainer] epoch #127 | Sampling for adapation and meta-testing... +2025-04-03 03:03:09 | [maml_trainer] epoch #127 | Finished meta-testing... +2025-04-03 03:03:09 | [maml_trainer] epoch #127 | Saving snapshot... +2025-04-03 03:03:32 | [maml_trainer] epoch #127 | Saved +2025-04-03 03:03:32 | [maml_trainer] epoch #127 | Time 48490.49 s +2025-04-03 03:03:32 | [maml_trainer] epoch #127 | EpochTime 386.03 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.3054 +Average/AverageReturn -174.005 +Average/Iteration 127 +Average/MaxReturn -154.667 +Average/MinReturn -219.327 +Average/NumEpisodes 80 +Average/StdReturn 14.0775 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6516 +GaussianMLPPolicy/KLAfter 0.00129186 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.5308e-05 +GaussianMLPPolicy/LossBefore -3.59118e-09 +GaussianMLPPolicy/dLoss 6.53044e-05 +Iteration 127 +MetaTest/Average/AverageDiscountedReturn -170.738 +MetaTest/Average/AverageReturn -170.738 +MetaTest/Average/Iteration 127 +MetaTest/Average/MaxReturn -156.215 +MetaTest/Average/MinReturn -203.257 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.0214 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.738 +MetaTest/__unnamed_task__/AverageReturn -170.738 +MetaTest/__unnamed_task__/Iteration 127 +MetaTest/__unnamed_task__/MaxReturn -156.215 +MetaTest/__unnamed_task__/MinReturn -203.257 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.0214 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.096e+06 +__unnamed_task__/AverageDiscountedReturn -73.3054 +__unnamed_task__/AverageReturn -174.005 +__unnamed_task__/Iteration 127 +__unnamed_task__/MaxReturn -154.667 +__unnamed_task__/MinReturn -219.327 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.0775 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:08:23 | [maml_trainer] epoch #128 | Sampling for adapation and meta-testing... +2025-04-03 03:09:35 | [maml_trainer] epoch #128 | Finished meta-testing... +2025-04-03 03:09:35 | [maml_trainer] epoch #128 | Saving snapshot... +2025-04-03 03:09:59 | [maml_trainer] epoch #128 | Saved +2025-04-03 03:09:59 | [maml_trainer] epoch #128 | Time 48877.13 s +2025-04-03 03:09:59 | [maml_trainer] epoch #128 | EpochTime 386.64 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.2584 +Average/AverageReturn -175.582 +Average/Iteration 128 +Average/MaxReturn -155.458 +Average/MinReturn -311.307 +Average/NumEpisodes 80 +Average/StdReturn 23.4979 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6544 +GaussianMLPPolicy/KLAfter 0.00149354 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.28176e-05 +GaussianMLPPolicy/LossBefore 2.14577e-09 +GaussianMLPPolicy/dLoss 2.28197e-05 +Iteration 128 +MetaTest/Average/AverageDiscountedReturn -174.326 +MetaTest/Average/AverageReturn -174.326 +MetaTest/Average/Iteration 128 +MetaTest/Average/MaxReturn -152.908 +MetaTest/Average/MinReturn -213.823 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.9726 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.326 +MetaTest/__unnamed_task__/AverageReturn -174.326 +MetaTest/__unnamed_task__/Iteration 128 +MetaTest/__unnamed_task__/MaxReturn -152.908 +MetaTest/__unnamed_task__/MinReturn -213.823 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.9726 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.128e+06 +__unnamed_task__/AverageDiscountedReturn -74.2584 +__unnamed_task__/AverageReturn -175.582 +__unnamed_task__/Iteration 128 +__unnamed_task__/MaxReturn -155.458 +__unnamed_task__/MinReturn -311.307 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.4979 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:14:50 | [maml_trainer] epoch #129 | Sampling for adapation and meta-testing... +2025-04-03 03:16:04 | [maml_trainer] epoch #129 | Finished meta-testing... +2025-04-03 03:16:04 | [maml_trainer] epoch #129 | Saving snapshot... +2025-04-03 03:16:25 | [maml_trainer] epoch #129 | Saved +2025-04-03 03:16:25 | [maml_trainer] epoch #129 | Time 49263.63 s +2025-04-03 03:16:25 | [maml_trainer] epoch #129 | EpochTime 386.49 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -76.7156 +Average/AverageReturn -178.381 +Average/Iteration 129 +Average/MaxReturn -152.584 +Average/MinReturn -374.038 +Average/NumEpisodes 80 +Average/StdReturn 29.5027 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6577 +GaussianMLPPolicy/KLAfter 0.00137696 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.35272e-05 +GaussianMLPPolicy/LossBefore -2.5779e-09 +GaussianMLPPolicy/dLoss 1.35246e-05 +Iteration 129 +MetaTest/Average/AverageDiscountedReturn -172.978 +MetaTest/Average/AverageReturn -172.978 +MetaTest/Average/Iteration 129 +MetaTest/Average/MaxReturn -157.583 +MetaTest/Average/MinReturn -227.417 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.6139 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.978 +MetaTest/__unnamed_task__/AverageReturn -172.978 +MetaTest/__unnamed_task__/Iteration 129 +MetaTest/__unnamed_task__/MaxReturn -157.583 +MetaTest/__unnamed_task__/MinReturn -227.417 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.6139 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.16e+06 +__unnamed_task__/AverageDiscountedReturn -76.7156 +__unnamed_task__/AverageReturn -178.381 +__unnamed_task__/Iteration 129 +__unnamed_task__/MaxReturn -152.584 +__unnamed_task__/MinReturn -374.038 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 29.5027 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:21:16 | [maml_trainer] epoch #130 | Sampling for adapation and meta-testing... +2025-04-03 03:22:28 | [maml_trainer] epoch #130 | Finished meta-testing... +2025-04-03 03:22:28 | [maml_trainer] epoch #130 | Saving snapshot... +2025-04-03 03:22:51 | [maml_trainer] epoch #130 | Saved +2025-04-03 03:22:51 | [maml_trainer] epoch #130 | Time 49649.99 s +2025-04-03 03:22:51 | [maml_trainer] epoch #130 | EpochTime 386.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.4063 +Average/AverageReturn -176.071 +Average/Iteration 130 +Average/MaxReturn -141.99 +Average/MinReturn -253.01 +Average/NumEpisodes 80 +Average/StdReturn 21.3981 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6608 +GaussianMLPPolicy/KLAfter 0.0012904 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.46664e-05 +GaussianMLPPolicy/LossBefore 2.90573e-09 +GaussianMLPPolicy/dLoss 1.46693e-05 +Iteration 130 +MetaTest/Average/AverageDiscountedReturn -171.289 +MetaTest/Average/AverageReturn -171.289 +MetaTest/Average/Iteration 130 +MetaTest/Average/MaxReturn -156.922 +MetaTest/Average/MinReturn -191.083 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.30379 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.289 +MetaTest/__unnamed_task__/AverageReturn -171.289 +MetaTest/__unnamed_task__/Iteration 130 +MetaTest/__unnamed_task__/MaxReturn -156.922 +MetaTest/__unnamed_task__/MinReturn -191.083 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.30379 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.192e+06 +__unnamed_task__/AverageDiscountedReturn -74.4063 +__unnamed_task__/AverageReturn -176.071 +__unnamed_task__/Iteration 130 +__unnamed_task__/MaxReturn -141.99 +__unnamed_task__/MinReturn -253.01 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.3981 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:27:45 | [maml_trainer] epoch #131 | Sampling for adapation and meta-testing... +2025-04-03 03:28:58 | [maml_trainer] epoch #131 | Finished meta-testing... +2025-04-03 03:28:58 | [maml_trainer] epoch #131 | Saving snapshot... +2025-04-03 03:29:21 | [maml_trainer] epoch #131 | Saved +2025-04-03 03:29:21 | [maml_trainer] epoch #131 | Time 50039.37 s +2025-04-03 03:29:21 | [maml_trainer] epoch #131 | EpochTime 389.37 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.4424 +Average/AverageReturn -174.842 +Average/Iteration 131 +Average/MaxReturn -150.849 +Average/MinReturn -245.771 +Average/NumEpisodes 80 +Average/StdReturn 18.0447 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6636 +GaussianMLPPolicy/KLAfter 0.00188119 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -9.78123e-06 +GaussianMLPPolicy/LossBefore 5.96047e-11 +GaussianMLPPolicy/dLoss 9.78129e-06 +Iteration 131 +MetaTest/Average/AverageDiscountedReturn -176.814 +MetaTest/Average/AverageReturn -176.814 +MetaTest/Average/Iteration 131 +MetaTest/Average/MaxReturn -164.091 +MetaTest/Average/MinReturn -244.683 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.3142 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.814 +MetaTest/__unnamed_task__/AverageReturn -176.814 +MetaTest/__unnamed_task__/Iteration 131 +MetaTest/__unnamed_task__/MaxReturn -164.091 +MetaTest/__unnamed_task__/MinReturn -244.683 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.3142 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.224e+06 +__unnamed_task__/AverageDiscountedReturn -73.4424 +__unnamed_task__/AverageReturn -174.842 +__unnamed_task__/Iteration 131 +__unnamed_task__/MaxReturn -150.849 +__unnamed_task__/MinReturn -245.771 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.0447 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:34:15 | [maml_trainer] epoch #132 | Sampling for adapation and meta-testing... +2025-04-03 03:35:28 | [maml_trainer] epoch #132 | Finished meta-testing... +2025-04-03 03:35:28 | [maml_trainer] epoch #132 | Saving snapshot... +2025-04-03 03:35:51 | [maml_trainer] epoch #132 | Saved +2025-04-03 03:35:51 | [maml_trainer] epoch #132 | Time 50429.22 s +2025-04-03 03:35:51 | [maml_trainer] epoch #132 | EpochTime 389.85 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.7596 +Average/AverageReturn -177.465 +Average/Iteration 132 +Average/MaxReturn -154.986 +Average/MinReturn -244.244 +Average/NumEpisodes 80 +Average/StdReturn 20.746 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6667 +GaussianMLPPolicy/KLAfter 0.00183823 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.52964e-05 +GaussianMLPPolicy/LossBefore 5.31971e-09 +GaussianMLPPolicy/dLoss -4.52911e-05 +Iteration 132 +MetaTest/Average/AverageDiscountedReturn -171.971 +MetaTest/Average/AverageReturn -171.971 +MetaTest/Average/Iteration 132 +MetaTest/Average/MaxReturn -154.53 +MetaTest/Average/MinReturn -227.558 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.1948 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.971 +MetaTest/__unnamed_task__/AverageReturn -171.971 +MetaTest/__unnamed_task__/Iteration 132 +MetaTest/__unnamed_task__/MaxReturn -154.53 +MetaTest/__unnamed_task__/MinReturn -227.558 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.1948 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.256e+06 +__unnamed_task__/AverageDiscountedReturn -74.7596 +__unnamed_task__/AverageReturn -177.465 +__unnamed_task__/Iteration 132 +__unnamed_task__/MaxReturn -154.986 +__unnamed_task__/MinReturn -244.244 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.746 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:40:43 | [maml_trainer] epoch #133 | Sampling for adapation and meta-testing... +2025-04-03 03:41:55 | [maml_trainer] epoch #133 | Finished meta-testing... +2025-04-03 03:41:55 | [maml_trainer] epoch #133 | Saving snapshot... +2025-04-03 03:42:17 | [maml_trainer] epoch #133 | Saved +2025-04-03 03:42:17 | [maml_trainer] epoch #133 | Time 50815.27 s +2025-04-03 03:42:17 | [maml_trainer] epoch #133 | EpochTime 386.04 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2001 +Average/AverageReturn -174.443 +Average/Iteration 133 +Average/MaxReturn -151.281 +Average/MinReturn -242.541 +Average/NumEpisodes 80 +Average/StdReturn 16.6489 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6686 +GaussianMLPPolicy/KLAfter 0.00178409 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.46806e-05 +GaussianMLPPolicy/LossBefore -4.79817e-09 +GaussianMLPPolicy/dLoss 4.46758e-05 +Iteration 133 +MetaTest/Average/AverageDiscountedReturn -177.702 +MetaTest/Average/AverageReturn -177.702 +MetaTest/Average/Iteration 133 +MetaTest/Average/MaxReturn -155.312 +MetaTest/Average/MinReturn -234.521 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.6909 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.702 +MetaTest/__unnamed_task__/AverageReturn -177.702 +MetaTest/__unnamed_task__/Iteration 133 +MetaTest/__unnamed_task__/MaxReturn -155.312 +MetaTest/__unnamed_task__/MinReturn -234.521 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.6909 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.288e+06 +__unnamed_task__/AverageDiscountedReturn -73.2001 +__unnamed_task__/AverageReturn -174.443 +__unnamed_task__/Iteration 133 +__unnamed_task__/MaxReturn -151.281 +__unnamed_task__/MinReturn -242.541 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.6489 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:47:08 | [maml_trainer] epoch #134 | Sampling for adapation and meta-testing... +2025-04-03 03:48:21 | [maml_trainer] epoch #134 | Finished meta-testing... +2025-04-03 03:48:21 | [maml_trainer] epoch #134 | Saving snapshot... +2025-04-03 03:48:44 | [maml_trainer] epoch #134 | Saved +2025-04-03 03:48:44 | [maml_trainer] epoch #134 | Time 51202.45 s +2025-04-03 03:48:44 | [maml_trainer] epoch #134 | EpochTime 387.18 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2025 +Average/AverageReturn -173.989 +Average/Iteration 134 +Average/MaxReturn -147.978 +Average/MinReturn -235.511 +Average/NumEpisodes 80 +Average/StdReturn 16.0477 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6693 +GaussianMLPPolicy/KLAfter 0.00211372 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.67534e-05 +GaussianMLPPolicy/LossBefore -1.71363e-09 +GaussianMLPPolicy/dLoss -3.67552e-05 +Iteration 134 +MetaTest/Average/AverageDiscountedReturn -172.935 +MetaTest/Average/AverageReturn -172.935 +MetaTest/Average/Iteration 134 +MetaTest/Average/MaxReturn -158.188 +MetaTest/Average/MinReturn -196.823 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.9924 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.935 +MetaTest/__unnamed_task__/AverageReturn -172.935 +MetaTest/__unnamed_task__/Iteration 134 +MetaTest/__unnamed_task__/MaxReturn -158.188 +MetaTest/__unnamed_task__/MinReturn -196.823 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.9924 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.32e+06 +__unnamed_task__/AverageDiscountedReturn -73.2025 +__unnamed_task__/AverageReturn -173.989 +__unnamed_task__/Iteration 134 +__unnamed_task__/MaxReturn -147.978 +__unnamed_task__/MinReturn -235.511 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.0477 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 03:53:37 | [maml_trainer] epoch #135 | Sampling for adapation and meta-testing... +2025-04-03 03:54:49 | [maml_trainer] epoch #135 | Finished meta-testing... +2025-04-03 03:54:49 | [maml_trainer] epoch #135 | Saving snapshot... +2025-04-03 03:55:12 | [maml_trainer] epoch #135 | Saved +2025-04-03 03:55:12 | [maml_trainer] epoch #135 | Time 51590.53 s +2025-04-03 03:55:12 | [maml_trainer] epoch #135 | EpochTime 388.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.5556 +Average/AverageReturn -177.48 +Average/Iteration 135 +Average/MaxReturn -154.487 +Average/MinReturn -237.41 +Average/NumEpisodes 80 +Average/StdReturn 19.3708 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6689 +GaussianMLPPolicy/KLAfter 0.00225238 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.91516e-05 +GaussianMLPPolicy/LossBefore 4.38094e-09 +GaussianMLPPolicy/dLoss 2.9156e-05 +Iteration 135 +MetaTest/Average/AverageDiscountedReturn -180.981 +MetaTest/Average/AverageReturn -180.981 +MetaTest/Average/Iteration 135 +MetaTest/Average/MaxReturn -157.448 +MetaTest/Average/MinReturn -240.322 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.8187 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -180.981 +MetaTest/__unnamed_task__/AverageReturn -180.981 +MetaTest/__unnamed_task__/Iteration 135 +MetaTest/__unnamed_task__/MaxReturn -157.448 +MetaTest/__unnamed_task__/MinReturn -240.322 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.8187 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.352e+06 +__unnamed_task__/AverageDiscountedReturn -74.5556 +__unnamed_task__/AverageReturn -177.48 +__unnamed_task__/Iteration 135 +__unnamed_task__/MaxReturn -154.487 +__unnamed_task__/MinReturn -237.41 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.3708 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:00:06 | [maml_trainer] epoch #136 | Sampling for adapation and meta-testing... +2025-04-03 04:01:19 | [maml_trainer] epoch #136 | Finished meta-testing... +2025-04-03 04:01:19 | [maml_trainer] epoch #136 | Saving snapshot... +2025-04-03 04:01:42 | [maml_trainer] epoch #136 | Saved +2025-04-03 04:01:42 | [maml_trainer] epoch #136 | Time 51980.41 s +2025-04-03 04:01:42 | [maml_trainer] epoch #136 | EpochTime 389.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2992 +Average/AverageReturn -174.209 +Average/Iteration 136 +Average/MaxReturn -152.058 +Average/MinReturn -239.275 +Average/NumEpisodes 80 +Average/StdReturn 15.4918 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6652 +GaussianMLPPolicy/KLAfter 0.00270544 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.14834e-05 +GaussianMLPPolicy/LossBefore 3.65078e-09 +GaussianMLPPolicy/dLoss -4.14797e-05 +Iteration 136 +MetaTest/Average/AverageDiscountedReturn -175.039 +MetaTest/Average/AverageReturn -175.039 +MetaTest/Average/Iteration 136 +MetaTest/Average/MaxReturn -156.982 +MetaTest/Average/MinReturn -201.997 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.3568 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.039 +MetaTest/__unnamed_task__/AverageReturn -175.039 +MetaTest/__unnamed_task__/Iteration 136 +MetaTest/__unnamed_task__/MaxReturn -156.982 +MetaTest/__unnamed_task__/MinReturn -201.997 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.3568 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.384e+06 +__unnamed_task__/AverageDiscountedReturn -73.2992 +__unnamed_task__/AverageReturn -174.209 +__unnamed_task__/Iteration 136 +__unnamed_task__/MaxReturn -152.058 +__unnamed_task__/MinReturn -239.275 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.4918 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:06:35 | [maml_trainer] epoch #137 | Sampling for adapation and meta-testing... +2025-04-03 04:07:49 | [maml_trainer] epoch #137 | Finished meta-testing... +2025-04-03 04:07:49 | [maml_trainer] epoch #137 | Saving snapshot... +2025-04-03 04:08:11 | [maml_trainer] epoch #137 | Saved +2025-04-03 04:08:11 | [maml_trainer] epoch #137 | Time 52369.42 s +2025-04-03 04:08:11 | [maml_trainer] epoch #137 | EpochTime 389.01 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.4072 +Average/AverageReturn -179.247 +Average/Iteration 137 +Average/MaxReturn -155.465 +Average/MinReturn -240.59 +Average/NumEpisodes 80 +Average/StdReturn 19.3157 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6619 +GaussianMLPPolicy/KLAfter 0.00259683 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.39312e-06 +GaussianMLPPolicy/LossBefore 3.06964e-09 +GaussianMLPPolicy/dLoss -8.39005e-06 +Iteration 137 +MetaTest/Average/AverageDiscountedReturn -176.027 +MetaTest/Average/AverageReturn -176.027 +MetaTest/Average/Iteration 137 +MetaTest/Average/MaxReturn -155.221 +MetaTest/Average/MinReturn -232.345 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.4928 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.027 +MetaTest/__unnamed_task__/AverageReturn -176.027 +MetaTest/__unnamed_task__/Iteration 137 +MetaTest/__unnamed_task__/MaxReturn -155.221 +MetaTest/__unnamed_task__/MinReturn -232.345 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.4928 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.416e+06 +__unnamed_task__/AverageDiscountedReturn -75.4072 +__unnamed_task__/AverageReturn -179.247 +__unnamed_task__/Iteration 137 +__unnamed_task__/MaxReturn -155.465 +__unnamed_task__/MinReturn -240.59 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.3157 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:13:02 | [maml_trainer] epoch #138 | Sampling for adapation and meta-testing... +2025-04-03 04:14:15 | [maml_trainer] epoch #138 | Finished meta-testing... +2025-04-03 04:14:15 | [maml_trainer] epoch #138 | Saving snapshot... +2025-04-03 04:14:38 | [maml_trainer] epoch #138 | Saved +2025-04-03 04:14:38 | [maml_trainer] epoch #138 | Time 52756.66 s +2025-04-03 04:14:38 | [maml_trainer] epoch #138 | EpochTime 387.23 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.4801 +Average/AverageReturn -177.14 +Average/Iteration 138 +Average/MaxReturn -157.142 +Average/MinReturn -242.864 +Average/NumEpisodes 80 +Average/StdReturn 19.1679 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6564 +GaussianMLPPolicy/KLAfter 0.00256376 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.45661e-05 +GaussianMLPPolicy/LossBefore 4.61936e-10 +GaussianMLPPolicy/dLoss -5.45656e-05 +Iteration 138 +MetaTest/Average/AverageDiscountedReturn -180.034 +MetaTest/Average/AverageReturn -180.034 +MetaTest/Average/Iteration 138 +MetaTest/Average/MaxReturn -158.123 +MetaTest/Average/MinReturn -233.162 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.2312 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -180.034 +MetaTest/__unnamed_task__/AverageReturn -180.034 +MetaTest/__unnamed_task__/Iteration 138 +MetaTest/__unnamed_task__/MaxReturn -158.123 +MetaTest/__unnamed_task__/MinReturn -233.162 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.2312 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.448e+06 +__unnamed_task__/AverageDiscountedReturn -74.4801 +__unnamed_task__/AverageReturn -177.14 +__unnamed_task__/Iteration 138 +__unnamed_task__/MaxReturn -157.142 +__unnamed_task__/MinReturn -242.864 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.1679 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:19:32 | [maml_trainer] epoch #139 | Sampling for adapation and meta-testing... +2025-04-03 04:20:45 | [maml_trainer] epoch #139 | Finished meta-testing... +2025-04-03 04:20:45 | [maml_trainer] epoch #139 | Saving snapshot... +2025-04-03 04:21:08 | [maml_trainer] epoch #139 | Saved +2025-04-03 04:21:08 | [maml_trainer] epoch #139 | Time 53146.82 s +2025-04-03 04:21:08 | [maml_trainer] epoch #139 | EpochTime 390.16 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.9671 +Average/AverageReturn -176.352 +Average/Iteration 139 +Average/MaxReturn -148.725 +Average/MinReturn -288.673 +Average/NumEpisodes 80 +Average/StdReturn 18.514 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6496 +GaussianMLPPolicy/KLAfter 0.00233454 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.69884e-05 +GaussianMLPPolicy/LossBefore -3.12924e-09 +GaussianMLPPolicy/dLoss 1.69853e-05 +Iteration 139 +MetaTest/Average/AverageDiscountedReturn -179.101 +MetaTest/Average/AverageReturn -179.101 +MetaTest/Average/Iteration 139 +MetaTest/Average/MaxReturn -158.98 +MetaTest/Average/MinReturn -248.853 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.937 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -179.101 +MetaTest/__unnamed_task__/AverageReturn -179.101 +MetaTest/__unnamed_task__/Iteration 139 +MetaTest/__unnamed_task__/MaxReturn -158.98 +MetaTest/__unnamed_task__/MinReturn -248.853 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.937 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.48e+06 +__unnamed_task__/AverageDiscountedReturn -74.9671 +__unnamed_task__/AverageReturn -176.352 +__unnamed_task__/Iteration 139 +__unnamed_task__/MaxReturn -148.725 +__unnamed_task__/MinReturn -288.673 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.514 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:26:03 | [maml_trainer] epoch #140 | Sampling for adapation and meta-testing... +2025-04-03 04:27:18 | [maml_trainer] epoch #140 | Finished meta-testing... +2025-04-03 04:27:18 | [maml_trainer] epoch #140 | Saving snapshot... +2025-04-03 04:27:41 | [maml_trainer] epoch #140 | Saved +2025-04-03 04:27:41 | [maml_trainer] epoch #140 | Time 53540.00 s +2025-04-03 04:27:41 | [maml_trainer] epoch #140 | EpochTime 393.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.4424 +Average/AverageReturn -177.22 +Average/Iteration 140 +Average/MaxReturn -156.053 +Average/MinReturn -239.506 +Average/NumEpisodes 80 +Average/StdReturn 18.0228 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6428 +GaussianMLPPolicy/KLAfter 0.00183634 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.34664e-05 +GaussianMLPPolicy/LossBefore -3.8296e-09 +GaussianMLPPolicy/dLoss 3.34626e-05 +Iteration 140 +MetaTest/Average/AverageDiscountedReturn -175.232 +MetaTest/Average/AverageReturn -175.232 +MetaTest/Average/Iteration 140 +MetaTest/Average/MaxReturn -158.846 +MetaTest/Average/MinReturn -195.128 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.2853 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.232 +MetaTest/__unnamed_task__/AverageReturn -175.232 +MetaTest/__unnamed_task__/Iteration 140 +MetaTest/__unnamed_task__/MaxReturn -158.846 +MetaTest/__unnamed_task__/MinReturn -195.128 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.2853 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.512e+06 +__unnamed_task__/AverageDiscountedReturn -74.4424 +__unnamed_task__/AverageReturn -177.22 +__unnamed_task__/Iteration 140 +__unnamed_task__/MaxReturn -156.053 +__unnamed_task__/MinReturn -239.506 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.0228 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:32:36 | [maml_trainer] epoch #141 | Sampling for adapation and meta-testing... +2025-04-03 04:33:49 | [maml_trainer] epoch #141 | Finished meta-testing... +2025-04-03 04:33:49 | [maml_trainer] epoch #141 | Saving snapshot... +2025-04-03 04:34:11 | [maml_trainer] epoch #141 | Saved +2025-04-03 04:34:11 | [maml_trainer] epoch #141 | Time 53929.80 s +2025-04-03 04:34:11 | [maml_trainer] epoch #141 | EpochTime 389.80 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.4869 +Average/AverageReturn -176.996 +Average/Iteration 141 +Average/MaxReturn -152.867 +Average/MinReturn -237.025 +Average/NumEpisodes 80 +Average/StdReturn 20.1881 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6373 +GaussianMLPPolicy/KLAfter 0.00216882 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.48118e-05 +GaussianMLPPolicy/LossBefore 3.8743e-10 +GaussianMLPPolicy/dLoss 1.48122e-05 +Iteration 141 +MetaTest/Average/AverageDiscountedReturn -171.301 +MetaTest/Average/AverageReturn -171.301 +MetaTest/Average/Iteration 141 +MetaTest/Average/MaxReturn -140.105 +MetaTest/Average/MinReturn -193.558 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.366 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.301 +MetaTest/__unnamed_task__/AverageReturn -171.301 +MetaTest/__unnamed_task__/Iteration 141 +MetaTest/__unnamed_task__/MaxReturn -140.105 +MetaTest/__unnamed_task__/MinReturn -193.558 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.366 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.544e+06 +__unnamed_task__/AverageDiscountedReturn -74.4869 +__unnamed_task__/AverageReturn -176.996 +__unnamed_task__/Iteration 141 +__unnamed_task__/MaxReturn -152.867 +__unnamed_task__/MinReturn -237.025 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.1881 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:39:03 | [maml_trainer] epoch #142 | Sampling for adapation and meta-testing... +2025-04-03 04:40:17 | [maml_trainer] epoch #142 | Finished meta-testing... +2025-04-03 04:40:17 | [maml_trainer] epoch #142 | Saving snapshot... +2025-04-03 04:40:39 | [maml_trainer] epoch #142 | Saved +2025-04-03 04:40:39 | [maml_trainer] epoch #142 | Time 54318.11 s +2025-04-03 04:40:39 | [maml_trainer] epoch #142 | EpochTime 388.31 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.7287 +Average/AverageReturn -172.582 +Average/Iteration 142 +Average/MaxReturn -154.948 +Average/MinReturn -234.632 +Average/NumEpisodes 80 +Average/StdReturn 13.3491 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6313 +GaussianMLPPolicy/KLAfter 0.00211673 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.89971e-06 +GaussianMLPPolicy/LossBefore 2.16067e-09 +GaussianMLPPolicy/dLoss -3.89755e-06 +Iteration 142 +MetaTest/Average/AverageDiscountedReturn -174.021 +MetaTest/Average/AverageReturn -174.021 +MetaTest/Average/Iteration 142 +MetaTest/Average/MaxReturn -161.758 +MetaTest/Average/MinReturn -235.492 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.3197 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.021 +MetaTest/__unnamed_task__/AverageReturn -174.021 +MetaTest/__unnamed_task__/Iteration 142 +MetaTest/__unnamed_task__/MaxReturn -161.758 +MetaTest/__unnamed_task__/MinReturn -235.492 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.3197 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.576e+06 +__unnamed_task__/AverageDiscountedReturn -72.7287 +__unnamed_task__/AverageReturn -172.582 +__unnamed_task__/Iteration 142 +__unnamed_task__/MaxReturn -154.948 +__unnamed_task__/MinReturn -234.632 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.3491 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:45:34 | [maml_trainer] epoch #143 | Sampling for adapation and meta-testing... +2025-04-03 04:46:47 | [maml_trainer] epoch #143 | Finished meta-testing... +2025-04-03 04:46:47 | [maml_trainer] epoch #143 | Saving snapshot... +2025-04-03 04:47:10 | [maml_trainer] epoch #143 | Saved +2025-04-03 04:47:10 | [maml_trainer] epoch #143 | Time 54708.81 s +2025-04-03 04:47:10 | [maml_trainer] epoch #143 | EpochTime 390.69 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.5849 +Average/AverageReturn -178.783 +Average/Iteration 143 +Average/MaxReturn -152.715 +Average/MinReturn -259.527 +Average/NumEpisodes 80 +Average/StdReturn 20.6051 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6249 +GaussianMLPPolicy/KLAfter 0.00261572 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.18342e-06 +GaussianMLPPolicy/LossBefore 3.15905e-09 +GaussianMLPPolicy/dLoss 3.18658e-06 +Iteration 143 +MetaTest/Average/AverageDiscountedReturn -177.338 +MetaTest/Average/AverageReturn -177.338 +MetaTest/Average/Iteration 143 +MetaTest/Average/MaxReturn -155.606 +MetaTest/Average/MinReturn -250.469 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.1887 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.338 +MetaTest/__unnamed_task__/AverageReturn -177.338 +MetaTest/__unnamed_task__/Iteration 143 +MetaTest/__unnamed_task__/MaxReturn -155.606 +MetaTest/__unnamed_task__/MinReturn -250.469 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.1887 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.608e+06 +__unnamed_task__/AverageDiscountedReturn -75.5849 +__unnamed_task__/AverageReturn -178.783 +__unnamed_task__/Iteration 143 +__unnamed_task__/MaxReturn -152.715 +__unnamed_task__/MinReturn -259.527 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.6051 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:52:06 | [maml_trainer] epoch #144 | Sampling for adapation and meta-testing... +2025-04-03 04:53:19 | [maml_trainer] epoch #144 | Finished meta-testing... +2025-04-03 04:53:19 | [maml_trainer] epoch #144 | Saving snapshot... +2025-04-03 04:53:42 | [maml_trainer] epoch #144 | Saved +2025-04-03 04:53:42 | [maml_trainer] epoch #144 | Time 55100.82 s +2025-04-03 04:53:42 | [maml_trainer] epoch #144 | EpochTime 392.01 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.0153 +Average/AverageReturn -177.808 +Average/Iteration 144 +Average/MaxReturn -155.259 +Average/MinReturn -301.834 +Average/NumEpisodes 80 +Average/StdReturn 21.6625 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6189 +GaussianMLPPolicy/KLAfter 0.0022756 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.10432e-06 +GaussianMLPPolicy/LossBefore 5.0813e-09 +GaussianMLPPolicy/dLoss 8.10941e-06 +Iteration 144 +MetaTest/Average/AverageDiscountedReturn -183.271 +MetaTest/Average/AverageReturn -183.271 +MetaTest/Average/Iteration 144 +MetaTest/Average/MaxReturn -152.119 +MetaTest/Average/MinReturn -226.058 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.7415 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -183.271 +MetaTest/__unnamed_task__/AverageReturn -183.271 +MetaTest/__unnamed_task__/Iteration 144 +MetaTest/__unnamed_task__/MaxReturn -152.119 +MetaTest/__unnamed_task__/MinReturn -226.058 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.7415 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.64e+06 +__unnamed_task__/AverageDiscountedReturn -75.0153 +__unnamed_task__/AverageReturn -177.808 +__unnamed_task__/Iteration 144 +__unnamed_task__/MaxReturn -155.259 +__unnamed_task__/MinReturn -301.834 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.6625 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 04:58:40 | [maml_trainer] epoch #145 | Sampling for adapation and meta-testing... +2025-04-03 04:59:53 | [maml_trainer] epoch #145 | Finished meta-testing... +2025-04-03 04:59:53 | [maml_trainer] epoch #145 | Saving snapshot... +2025-04-03 05:00:16 | [maml_trainer] epoch #145 | Saved +2025-04-03 05:00:16 | [maml_trainer] epoch #145 | Time 55494.68 s +2025-04-03 05:00:16 | [maml_trainer] epoch #145 | EpochTime 393.85 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.6043 +Average/AverageReturn -179.288 +Average/Iteration 145 +Average/MaxReturn -158.914 +Average/MinReturn -324.292 +Average/NumEpisodes 80 +Average/StdReturn 22.9105 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6107 +GaussianMLPPolicy/KLAfter 0.00225173 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.73369e-07 +GaussianMLPPolicy/LossBefore 6.16908e-09 +GaussianMLPPolicy/dLoss -6.67199e-07 +Iteration 145 +MetaTest/Average/AverageDiscountedReturn -177.807 +MetaTest/Average/AverageReturn -177.807 +MetaTest/Average/Iteration 145 +MetaTest/Average/MaxReturn -159.263 +MetaTest/Average/MinReturn -270.886 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.2105 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.807 +MetaTest/__unnamed_task__/AverageReturn -177.807 +MetaTest/__unnamed_task__/Iteration 145 +MetaTest/__unnamed_task__/MaxReturn -159.263 +MetaTest/__unnamed_task__/MinReturn -270.886 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.2105 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.672e+06 +__unnamed_task__/AverageDiscountedReturn -75.6043 +__unnamed_task__/AverageReturn -179.288 +__unnamed_task__/Iteration 145 +__unnamed_task__/MaxReturn -158.914 +__unnamed_task__/MinReturn -324.292 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.9105 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:05:11 | [maml_trainer] epoch #146 | Sampling for adapation and meta-testing... +2025-04-03 05:06:22 | [maml_trainer] epoch #146 | Finished meta-testing... +2025-04-03 05:06:22 | [maml_trainer] epoch #146 | Saving snapshot... +2025-04-03 05:06:44 | [maml_trainer] epoch #146 | Saved +2025-04-03 05:06:44 | [maml_trainer] epoch #146 | Time 55882.83 s +2025-04-03 05:06:44 | [maml_trainer] epoch #146 | EpochTime 388.15 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.8754 +Average/AverageReturn -175.796 +Average/Iteration 146 +Average/MaxReturn -153.744 +Average/MinReturn -236.674 +Average/NumEpisodes 80 +Average/StdReturn 17.9286 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.6024 +GaussianMLPPolicy/KLAfter 0.00210442 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.47727e-05 +GaussianMLPPolicy/LossBefore 3.09944e-09 +GaussianMLPPolicy/dLoss -1.47696e-05 +Iteration 146 +MetaTest/Average/AverageDiscountedReturn -173.77 +MetaTest/Average/AverageReturn -173.77 +MetaTest/Average/Iteration 146 +MetaTest/Average/MaxReturn -158.152 +MetaTest/Average/MinReturn -226.989 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.3486 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.77 +MetaTest/__unnamed_task__/AverageReturn -173.77 +MetaTest/__unnamed_task__/Iteration 146 +MetaTest/__unnamed_task__/MaxReturn -158.152 +MetaTest/__unnamed_task__/MinReturn -226.989 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.3486 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.704e+06 +__unnamed_task__/AverageDiscountedReturn -73.8754 +__unnamed_task__/AverageReturn -175.796 +__unnamed_task__/Iteration 146 +__unnamed_task__/MaxReturn -153.744 +__unnamed_task__/MinReturn -236.674 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9286 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:11:36 | [maml_trainer] epoch #147 | Sampling for adapation and meta-testing... +2025-04-03 05:12:49 | [maml_trainer] epoch #147 | Finished meta-testing... +2025-04-03 05:12:49 | [maml_trainer] epoch #147 | Saving snapshot... +2025-04-03 05:13:12 | [maml_trainer] epoch #147 | Saved +2025-04-03 05:13:12 | [maml_trainer] epoch #147 | Time 56271.08 s +2025-04-03 05:13:12 | [maml_trainer] epoch #147 | EpochTime 388.25 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.8307 +Average/AverageReturn -177.836 +Average/Iteration 147 +Average/MaxReturn -153.028 +Average/MinReturn -236.819 +Average/NumEpisodes 80 +Average/StdReturn 19.6348 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5942 +GaussianMLPPolicy/KLAfter 0.0023999 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.20605e-05 +GaussianMLPPolicy/LossBefore -1.01179e-08 +GaussianMLPPolicy/dLoss -1.20706e-05 +Iteration 147 +MetaTest/Average/AverageDiscountedReturn -176.718 +MetaTest/Average/AverageReturn -176.718 +MetaTest/Average/Iteration 147 +MetaTest/Average/MaxReturn -153.852 +MetaTest/Average/MinReturn -242.058 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.0627 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.718 +MetaTest/__unnamed_task__/AverageReturn -176.718 +MetaTest/__unnamed_task__/Iteration 147 +MetaTest/__unnamed_task__/MaxReturn -153.852 +MetaTest/__unnamed_task__/MinReturn -242.058 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.0627 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.736e+06 +__unnamed_task__/AverageDiscountedReturn -74.8307 +__unnamed_task__/AverageReturn -177.836 +__unnamed_task__/Iteration 147 +__unnamed_task__/MaxReturn -153.028 +__unnamed_task__/MinReturn -236.819 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.6348 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:18:11 | [maml_trainer] epoch #148 | Sampling for adapation and meta-testing... +2025-04-03 05:19:24 | [maml_trainer] epoch #148 | Finished meta-testing... +2025-04-03 05:19:24 | [maml_trainer] epoch #148 | Saving snapshot... +2025-04-03 05:19:47 | [maml_trainer] epoch #148 | Saved +2025-04-03 05:19:47 | [maml_trainer] epoch #148 | Time 56665.47 s +2025-04-03 05:19:47 | [maml_trainer] epoch #148 | EpochTime 394.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.3513 +Average/AverageReturn -174.418 +Average/Iteration 148 +Average/MaxReturn -155.047 +Average/MinReturn -254.865 +Average/NumEpisodes 80 +Average/StdReturn 17.2695 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5876 +GaussianMLPPolicy/KLAfter 0.00249329 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.11175e-05 +GaussianMLPPolicy/LossBefore 7.25687e-09 +GaussianMLPPolicy/dLoss 2.11248e-05 +Iteration 148 +MetaTest/Average/AverageDiscountedReturn -178.345 +MetaTest/Average/AverageReturn -178.345 +MetaTest/Average/Iteration 148 +MetaTest/Average/MaxReturn -153.854 +MetaTest/Average/MinReturn -232.372 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.6602 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.345 +MetaTest/__unnamed_task__/AverageReturn -178.345 +MetaTest/__unnamed_task__/Iteration 148 +MetaTest/__unnamed_task__/MaxReturn -153.854 +MetaTest/__unnamed_task__/MinReturn -232.372 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.6602 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.768e+06 +__unnamed_task__/AverageDiscountedReturn -73.3513 +__unnamed_task__/AverageReturn -174.418 +__unnamed_task__/Iteration 148 +__unnamed_task__/MaxReturn -155.047 +__unnamed_task__/MinReturn -254.865 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.2695 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:24:42 | [maml_trainer] epoch #149 | Sampling for adapation and meta-testing... +2025-04-03 05:25:55 | [maml_trainer] epoch #149 | Finished meta-testing... +2025-04-03 05:25:55 | [maml_trainer] epoch #149 | Saving snapshot... +2025-04-03 05:26:18 | [maml_trainer] epoch #149 | Saved +2025-04-03 05:26:18 | [maml_trainer] epoch #149 | Time 57056.55 s +2025-04-03 05:26:18 | [maml_trainer] epoch #149 | EpochTime 391.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.2405 +Average/AverageReturn -176.925 +Average/Iteration 149 +Average/MaxReturn -151.582 +Average/MinReturn -237.732 +Average/NumEpisodes 80 +Average/StdReturn 16.561 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5816 +GaussianMLPPolicy/KLAfter 0.0029615 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.47311e-06 +GaussianMLPPolicy/LossBefore 4.85778e-09 +GaussianMLPPolicy/dLoss -7.46825e-06 +Iteration 149 +MetaTest/Average/AverageDiscountedReturn -180.804 +MetaTest/Average/AverageReturn -180.804 +MetaTest/Average/Iteration 149 +MetaTest/Average/MaxReturn -162.947 +MetaTest/Average/MinReturn -249.872 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5287 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -180.804 +MetaTest/__unnamed_task__/AverageReturn -180.804 +MetaTest/__unnamed_task__/Iteration 149 +MetaTest/__unnamed_task__/MaxReturn -162.947 +MetaTest/__unnamed_task__/MinReturn -249.872 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5287 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.8e+06 +__unnamed_task__/AverageDiscountedReturn -74.2405 +__unnamed_task__/AverageReturn -176.925 +__unnamed_task__/Iteration 149 +__unnamed_task__/MaxReturn -151.582 +__unnamed_task__/MinReturn -237.732 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.561 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:31:13 | [maml_trainer] epoch #150 | Sampling for adapation and meta-testing... +2025-04-03 05:32:26 | [maml_trainer] epoch #150 | Finished meta-testing... +2025-04-03 05:32:26 | [maml_trainer] epoch #150 | Saving snapshot... +2025-04-03 05:32:48 | [maml_trainer] epoch #150 | Saved +2025-04-03 05:32:48 | [maml_trainer] epoch #150 | Time 57446.73 s +2025-04-03 05:32:48 | [maml_trainer] epoch #150 | EpochTime 390.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.2022 +Average/AverageReturn -176.888 +Average/Iteration 150 +Average/MaxReturn -156.09 +Average/MinReturn -242.559 +Average/NumEpisodes 80 +Average/StdReturn 15.8653 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5766 +GaussianMLPPolicy/KLAfter 0.003296 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.23092e-06 +GaussianMLPPolicy/LossBefore 7.30157e-10 +GaussianMLPPolicy/dLoss -4.23019e-06 +Iteration 150 +MetaTest/Average/AverageDiscountedReturn -166.74 +MetaTest/Average/AverageReturn -166.74 +MetaTest/Average/Iteration 150 +MetaTest/Average/MaxReturn -152.876 +MetaTest/Average/MinReturn -225.111 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.3813 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.74 +MetaTest/__unnamed_task__/AverageReturn -166.74 +MetaTest/__unnamed_task__/Iteration 150 +MetaTest/__unnamed_task__/MaxReturn -152.876 +MetaTest/__unnamed_task__/MinReturn -225.111 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.3813 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.832e+06 +__unnamed_task__/AverageDiscountedReturn -74.2022 +__unnamed_task__/AverageReturn -176.888 +__unnamed_task__/Iteration 150 +__unnamed_task__/MaxReturn -156.09 +__unnamed_task__/MinReturn -242.559 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.8653 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:37:42 | [maml_trainer] epoch #151 | Sampling for adapation and meta-testing... +2025-04-03 05:38:55 | [maml_trainer] epoch #151 | Finished meta-testing... +2025-04-03 05:38:55 | [maml_trainer] epoch #151 | Saving snapshot... +2025-04-03 05:39:18 | [maml_trainer] epoch #151 | Saved +2025-04-03 05:39:18 | [maml_trainer] epoch #151 | Time 57836.62 s +2025-04-03 05:39:18 | [maml_trainer] epoch #151 | EpochTime 389.90 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.5636 +Average/AverageReturn -177.246 +Average/Iteration 151 +Average/MaxReturn -149.956 +Average/MinReturn -256.437 +Average/NumEpisodes 80 +Average/StdReturn 19.378 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.571 +GaussianMLPPolicy/KLAfter 0.00282915 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.00839e-05 +GaussianMLPPolicy/LossBefore -8.19564e-10 +GaussianMLPPolicy/dLoss -2.00848e-05 +Iteration 151 +MetaTest/Average/AverageDiscountedReturn -178.522 +MetaTest/Average/AverageReturn -178.522 +MetaTest/Average/Iteration 151 +MetaTest/Average/MaxReturn -154.925 +MetaTest/Average/MinReturn -227.768 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.3114 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.522 +MetaTest/__unnamed_task__/AverageReturn -178.522 +MetaTest/__unnamed_task__/Iteration 151 +MetaTest/__unnamed_task__/MaxReturn -154.925 +MetaTest/__unnamed_task__/MinReturn -227.768 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.3114 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.864e+06 +__unnamed_task__/AverageDiscountedReturn -74.5636 +__unnamed_task__/AverageReturn -177.246 +__unnamed_task__/Iteration 151 +__unnamed_task__/MaxReturn -149.956 +__unnamed_task__/MinReturn -256.437 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.378 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:44:13 | [maml_trainer] epoch #152 | Sampling for adapation and meta-testing... +2025-04-03 05:45:25 | [maml_trainer] epoch #152 | Finished meta-testing... +2025-04-03 05:45:25 | [maml_trainer] epoch #152 | Saving snapshot... +2025-04-03 05:45:49 | [maml_trainer] epoch #152 | Saved +2025-04-03 05:45:49 | [maml_trainer] epoch #152 | Time 58227.13 s +2025-04-03 05:45:49 | [maml_trainer] epoch #152 | EpochTime 390.50 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.0755 +Average/AverageReturn -176.149 +Average/Iteration 152 +Average/MaxReturn -150.73 +Average/MinReturn -268.107 +Average/NumEpisodes 80 +Average/StdReturn 17.9262 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5677 +GaussianMLPPolicy/KLAfter 0.00275873 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.57226e-05 +GaussianMLPPolicy/LossBefore -5.78165e-09 +GaussianMLPPolicy/dLoss 4.57168e-05 +Iteration 152 +MetaTest/Average/AverageDiscountedReturn -177.106 +MetaTest/Average/AverageReturn -177.106 +MetaTest/Average/Iteration 152 +MetaTest/Average/MaxReturn -160.558 +MetaTest/Average/MinReturn -241.371 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.6903 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.106 +MetaTest/__unnamed_task__/AverageReturn -177.106 +MetaTest/__unnamed_task__/Iteration 152 +MetaTest/__unnamed_task__/MaxReturn -160.558 +MetaTest/__unnamed_task__/MinReturn -241.371 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.6903 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.896e+06 +__unnamed_task__/AverageDiscountedReturn -74.0755 +__unnamed_task__/AverageReturn -176.149 +__unnamed_task__/Iteration 152 +__unnamed_task__/MaxReturn -150.73 +__unnamed_task__/MinReturn -268.107 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9262 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:50:45 | [maml_trainer] epoch #153 | Sampling for adapation and meta-testing... +2025-04-03 05:51:58 | [maml_trainer] epoch #153 | Finished meta-testing... +2025-04-03 05:51:58 | [maml_trainer] epoch #153 | Saving snapshot... +2025-04-03 05:52:21 | [maml_trainer] epoch #153 | Saved +2025-04-03 05:52:21 | [maml_trainer] epoch #153 | Time 58619.69 s +2025-04-03 05:52:21 | [maml_trainer] epoch #153 | EpochTime 392.55 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8495 +Average/AverageReturn -172.798 +Average/Iteration 153 +Average/MaxReturn -156.923 +Average/MinReturn -254.717 +Average/NumEpisodes 80 +Average/StdReturn 15.112 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5656 +GaussianMLPPolicy/KLAfter 0.0023256 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.22256e-05 +GaussianMLPPolicy/LossBefore 4.12762e-09 +GaussianMLPPolicy/dLoss 2.22297e-05 +Iteration 153 +MetaTest/Average/AverageDiscountedReturn -174.72 +MetaTest/Average/AverageReturn -174.72 +MetaTest/Average/Iteration 153 +MetaTest/Average/MaxReturn -158.125 +MetaTest/Average/MinReturn -230.394 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.4099 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.72 +MetaTest/__unnamed_task__/AverageReturn -174.72 +MetaTest/__unnamed_task__/Iteration 153 +MetaTest/__unnamed_task__/MaxReturn -158.125 +MetaTest/__unnamed_task__/MinReturn -230.394 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.4099 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.928e+06 +__unnamed_task__/AverageDiscountedReturn -72.8495 +__unnamed_task__/AverageReturn -172.798 +__unnamed_task__/Iteration 153 +__unnamed_task__/MaxReturn -156.923 +__unnamed_task__/MinReturn -254.717 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.112 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 05:57:16 | [maml_trainer] epoch #154 | Sampling for adapation and meta-testing... +2025-04-03 05:58:29 | [maml_trainer] epoch #154 | Finished meta-testing... +2025-04-03 05:58:29 | [maml_trainer] epoch #154 | Saving snapshot... +2025-04-03 05:58:50 | [maml_trainer] epoch #154 | Saved +2025-04-03 05:58:50 | [maml_trainer] epoch #154 | Time 59008.88 s +2025-04-03 05:58:50 | [maml_trainer] epoch #154 | EpochTime 389.19 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.6608 +Average/AverageReturn -175.273 +Average/Iteration 154 +Average/MaxReturn -154.004 +Average/MinReturn -231.142 +Average/NumEpisodes 80 +Average/StdReturn 17.7522 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5616 +GaussianMLPPolicy/KLAfter 0.00212179 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.16636e-05 +GaussianMLPPolicy/LossBefore 7.15256e-10 +GaussianMLPPolicy/dLoss 2.16643e-05 +Iteration 154 +MetaTest/Average/AverageDiscountedReturn -172.08 +MetaTest/Average/AverageReturn -172.08 +MetaTest/Average/Iteration 154 +MetaTest/Average/MaxReturn -154.29 +MetaTest/Average/MinReturn -242.581 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.0281 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.08 +MetaTest/__unnamed_task__/AverageReturn -172.08 +MetaTest/__unnamed_task__/Iteration 154 +MetaTest/__unnamed_task__/MaxReturn -154.29 +MetaTest/__unnamed_task__/MinReturn -242.581 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.0281 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.96e+06 +__unnamed_task__/AverageDiscountedReturn -73.6608 +__unnamed_task__/AverageReturn -175.273 +__unnamed_task__/Iteration 154 +__unnamed_task__/MaxReturn -154.004 +__unnamed_task__/MinReturn -231.142 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.7522 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:03:42 | [maml_trainer] epoch #155 | Sampling for adapation and meta-testing... +2025-04-03 06:04:56 | [maml_trainer] epoch #155 | Finished meta-testing... +2025-04-03 06:04:56 | [maml_trainer] epoch #155 | Saving snapshot... +2025-04-03 06:05:19 | [maml_trainer] epoch #155 | Saved +2025-04-03 06:05:19 | [maml_trainer] epoch #155 | Time 59397.53 s +2025-04-03 06:05:19 | [maml_trainer] epoch #155 | EpochTime 388.64 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.3121 +Average/AverageReturn -178.898 +Average/Iteration 155 +Average/MaxReturn -155.531 +Average/MinReturn -240.477 +Average/NumEpisodes 80 +Average/StdReturn 21.9092 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5587 +GaussianMLPPolicy/KLAfter 0.001941 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.59447e-05 +GaussianMLPPolicy/LossBefore -4.52995e-09 +GaussianMLPPolicy/dLoss 1.59402e-05 +Iteration 155 +MetaTest/Average/AverageDiscountedReturn -170.718 +MetaTest/Average/AverageReturn -170.718 +MetaTest/Average/Iteration 155 +MetaTest/Average/MaxReturn -156.411 +MetaTest/Average/MinReturn -210.04 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.2295 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.718 +MetaTest/__unnamed_task__/AverageReturn -170.718 +MetaTest/__unnamed_task__/Iteration 155 +MetaTest/__unnamed_task__/MaxReturn -156.411 +MetaTest/__unnamed_task__/MinReturn -210.04 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.2295 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 4.992e+06 +__unnamed_task__/AverageDiscountedReturn -75.3121 +__unnamed_task__/AverageReturn -178.898 +__unnamed_task__/Iteration 155 +__unnamed_task__/MaxReturn -155.531 +__unnamed_task__/MinReturn -240.477 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.9092 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:10:15 | [maml_trainer] epoch #156 | Sampling for adapation and meta-testing... +2025-04-03 06:11:28 | [maml_trainer] epoch #156 | Finished meta-testing... +2025-04-03 06:11:28 | [maml_trainer] epoch #156 | Saving snapshot... +2025-04-03 06:11:51 | [maml_trainer] epoch #156 | Saved +2025-04-03 06:11:51 | [maml_trainer] epoch #156 | Time 59790.10 s +2025-04-03 06:11:51 | [maml_trainer] epoch #156 | EpochTime 392.57 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.2845 +Average/AverageReturn -178.263 +Average/Iteration 156 +Average/MaxReturn -154.244 +Average/MinReturn -250.947 +Average/NumEpisodes 80 +Average/StdReturn 22.8426 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5555 +GaussianMLPPolicy/KLAfter 0.00287195 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.22024e-05 +GaussianMLPPolicy/LossBefore -5.23031e-09 +GaussianMLPPolicy/dLoss -1.22076e-05 +Iteration 156 +MetaTest/Average/AverageDiscountedReturn -171.386 +MetaTest/Average/AverageReturn -171.386 +MetaTest/Average/Iteration 156 +MetaTest/Average/MaxReturn -154.466 +MetaTest/Average/MinReturn -194.188 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.6025 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.386 +MetaTest/__unnamed_task__/AverageReturn -171.386 +MetaTest/__unnamed_task__/Iteration 156 +MetaTest/__unnamed_task__/MaxReturn -154.466 +MetaTest/__unnamed_task__/MinReturn -194.188 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.6025 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.024e+06 +__unnamed_task__/AverageDiscountedReturn -75.2845 +__unnamed_task__/AverageReturn -178.263 +__unnamed_task__/Iteration 156 +__unnamed_task__/MaxReturn -154.244 +__unnamed_task__/MinReturn -250.947 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.8426 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:16:49 | [maml_trainer] epoch #157 | Sampling for adapation and meta-testing... +2025-04-03 06:18:02 | [maml_trainer] epoch #157 | Finished meta-testing... +2025-04-03 06:18:02 | [maml_trainer] epoch #157 | Saving snapshot... +2025-04-03 06:18:25 | [maml_trainer] epoch #157 | Saved +2025-04-03 06:18:25 | [maml_trainer] epoch #157 | Time 60184.09 s +2025-04-03 06:18:25 | [maml_trainer] epoch #157 | EpochTime 393.98 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.4845 +Average/AverageReturn -176.869 +Average/Iteration 157 +Average/MaxReturn -153.471 +Average/MinReturn -378.738 +Average/NumEpisodes 80 +Average/StdReturn 26.0672 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5532 +GaussianMLPPolicy/KLAfter 0.00186374 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.1491e-05 +GaussianMLPPolicy/LossBefore -2.5928e-09 +GaussianMLPPolicy/dLoss -1.14936e-05 +Iteration 157 +MetaTest/Average/AverageDiscountedReturn -174.171 +MetaTest/Average/AverageReturn -174.171 +MetaTest/Average/Iteration 157 +MetaTest/Average/MaxReturn -157.255 +MetaTest/Average/MinReturn -230.03 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.3599 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.171 +MetaTest/__unnamed_task__/AverageReturn -174.171 +MetaTest/__unnamed_task__/Iteration 157 +MetaTest/__unnamed_task__/MaxReturn -157.255 +MetaTest/__unnamed_task__/MinReturn -230.03 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.3599 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.056e+06 +__unnamed_task__/AverageDiscountedReturn -75.4845 +__unnamed_task__/AverageReturn -176.869 +__unnamed_task__/Iteration 157 +__unnamed_task__/MaxReturn -153.471 +__unnamed_task__/MinReturn -378.738 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.0672 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:23:24 | [maml_trainer] epoch #158 | Sampling for adapation and meta-testing... +2025-04-03 06:24:38 | [maml_trainer] epoch #158 | Finished meta-testing... +2025-04-03 06:24:38 | [maml_trainer] epoch #158 | Saving snapshot... +2025-04-03 06:25:00 | [maml_trainer] epoch #158 | Saved +2025-04-03 06:25:00 | [maml_trainer] epoch #158 | Time 60578.58 s +2025-04-03 06:25:00 | [maml_trainer] epoch #158 | EpochTime 394.49 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.8507 +Average/AverageReturn -177.292 +Average/Iteration 158 +Average/MaxReturn -156.415 +Average/MinReturn -279.88 +Average/NumEpisodes 80 +Average/StdReturn 22.8206 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.551 +GaussianMLPPolicy/KLAfter 0.00165626 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.53861e-05 +GaussianMLPPolicy/LossBefore -2.84612e-09 +GaussianMLPPolicy/dLoss 2.53833e-05 +Iteration 158 +MetaTest/Average/AverageDiscountedReturn -175.596 +MetaTest/Average/AverageReturn -175.596 +MetaTest/Average/Iteration 158 +MetaTest/Average/MaxReturn -153.616 +MetaTest/Average/MinReturn -230.983 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.8321 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.596 +MetaTest/__unnamed_task__/AverageReturn -175.596 +MetaTest/__unnamed_task__/Iteration 158 +MetaTest/__unnamed_task__/MaxReturn -153.616 +MetaTest/__unnamed_task__/MinReturn -230.983 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.8321 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.088e+06 +__unnamed_task__/AverageDiscountedReturn -74.8507 +__unnamed_task__/AverageReturn -177.292 +__unnamed_task__/Iteration 158 +__unnamed_task__/MaxReturn -156.415 +__unnamed_task__/MinReturn -279.88 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.8206 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:29:54 | [maml_trainer] epoch #159 | Sampling for adapation and meta-testing... +2025-04-03 06:31:07 | [maml_trainer] epoch #159 | Finished meta-testing... +2025-04-03 06:31:07 | [maml_trainer] epoch #159 | Saving snapshot... +2025-04-03 06:31:31 | [maml_trainer] epoch #159 | Saved +2025-04-03 06:31:31 | [maml_trainer] epoch #159 | Time 60969.27 s +2025-04-03 06:31:31 | [maml_trainer] epoch #159 | EpochTime 390.69 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2776 +Average/AverageReturn -173.78 +Average/Iteration 159 +Average/MaxReturn -149.988 +Average/MinReturn -241.922 +Average/NumEpisodes 80 +Average/StdReturn 17.6879 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.55 +GaussianMLPPolicy/KLAfter 0.00186658 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.87548e-06 +GaussianMLPPolicy/LossBefore 4.90248e-09 +GaussianMLPPolicy/dLoss 4.88038e-06 +Iteration 159 +MetaTest/Average/AverageDiscountedReturn -169.014 +MetaTest/Average/AverageReturn -169.014 +MetaTest/Average/Iteration 159 +MetaTest/Average/MaxReturn -152.798 +MetaTest/Average/MinReturn -232.244 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.801 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.014 +MetaTest/__unnamed_task__/AverageReturn -169.014 +MetaTest/__unnamed_task__/Iteration 159 +MetaTest/__unnamed_task__/MaxReturn -152.798 +MetaTest/__unnamed_task__/MinReturn -232.244 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.801 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.12e+06 +__unnamed_task__/AverageDiscountedReturn -73.2776 +__unnamed_task__/AverageReturn -173.78 +__unnamed_task__/Iteration 159 +__unnamed_task__/MaxReturn -149.988 +__unnamed_task__/MinReturn -241.922 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.6879 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:36:26 | [maml_trainer] epoch #160 | Sampling for adapation and meta-testing... +2025-04-03 06:37:39 | [maml_trainer] epoch #160 | Finished meta-testing... +2025-04-03 06:37:39 | [maml_trainer] epoch #160 | Saving snapshot... +2025-04-03 06:38:02 | [maml_trainer] epoch #160 | Saved +2025-04-03 06:38:02 | [maml_trainer] epoch #160 | Time 61360.82 s +2025-04-03 06:38:02 | [maml_trainer] epoch #160 | EpochTime 391.54 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.522 +Average/AverageReturn -176.989 +Average/Iteration 160 +Average/MaxReturn -148.695 +Average/MinReturn -242.647 +Average/NumEpisodes 80 +Average/StdReturn 21.2737 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5492 +GaussianMLPPolicy/KLAfter 0.00225742 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.87653e-05 +GaussianMLPPolicy/LossBefore 1.13249e-09 +GaussianMLPPolicy/dLoss -2.87642e-05 +Iteration 160 +MetaTest/Average/AverageDiscountedReturn -177.734 +MetaTest/Average/AverageReturn -177.734 +MetaTest/Average/Iteration 160 +MetaTest/Average/MaxReturn -160.67 +MetaTest/Average/MinReturn -234.731 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.8531 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.734 +MetaTest/__unnamed_task__/AverageReturn -177.734 +MetaTest/__unnamed_task__/Iteration 160 +MetaTest/__unnamed_task__/MaxReturn -160.67 +MetaTest/__unnamed_task__/MinReturn -234.731 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.8531 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.152e+06 +__unnamed_task__/AverageDiscountedReturn -74.522 +__unnamed_task__/AverageReturn -176.989 +__unnamed_task__/Iteration 160 +__unnamed_task__/MaxReturn -148.695 +__unnamed_task__/MinReturn -242.647 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.2737 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:42:58 | [maml_trainer] epoch #161 | Sampling for adapation and meta-testing... +2025-04-03 06:44:10 | [maml_trainer] epoch #161 | Finished meta-testing... +2025-04-03 06:44:10 | [maml_trainer] epoch #161 | Saving snapshot... +2025-04-03 06:44:34 | [maml_trainer] epoch #161 | Saved +2025-04-03 06:44:34 | [maml_trainer] epoch #161 | Time 61752.28 s +2025-04-03 06:44:34 | [maml_trainer] epoch #161 | EpochTime 391.46 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3331 +Average/AverageReturn -171.546 +Average/Iteration 161 +Average/MaxReturn -152.395 +Average/MinReturn -230.081 +Average/NumEpisodes 80 +Average/StdReturn 15.6404 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5477 +GaussianMLPPolicy/KLAfter 0.00188276 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.30603e-05 +GaussianMLPPolicy/LossBefore -6.88434e-09 +GaussianMLPPolicy/dLoss 3.30534e-05 +Iteration 161 +MetaTest/Average/AverageDiscountedReturn -183.905 +MetaTest/Average/AverageReturn -183.905 +MetaTest/Average/Iteration 161 +MetaTest/Average/MaxReturn -156.507 +MetaTest/Average/MinReturn -225.682 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.6086 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -183.905 +MetaTest/__unnamed_task__/AverageReturn -183.905 +MetaTest/__unnamed_task__/Iteration 161 +MetaTest/__unnamed_task__/MaxReturn -156.507 +MetaTest/__unnamed_task__/MinReturn -225.682 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.6086 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.184e+06 +__unnamed_task__/AverageDiscountedReturn -72.3331 +__unnamed_task__/AverageReturn -171.546 +__unnamed_task__/Iteration 161 +__unnamed_task__/MaxReturn -152.395 +__unnamed_task__/MinReturn -230.081 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.6404 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:49:28 | [maml_trainer] epoch #162 | Sampling for adapation and meta-testing... +2025-04-03 06:50:41 | [maml_trainer] epoch #162 | Finished meta-testing... +2025-04-03 06:50:41 | [maml_trainer] epoch #162 | Saving snapshot... +2025-04-03 06:51:03 | [maml_trainer] epoch #162 | Saved +2025-04-03 06:51:03 | [maml_trainer] epoch #162 | Time 62141.67 s +2025-04-03 06:51:03 | [maml_trainer] epoch #162 | EpochTime 389.39 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.6289 +Average/AverageReturn -176.919 +Average/Iteration 162 +Average/MaxReturn -152.668 +Average/MinReturn -238.404 +Average/NumEpisodes 80 +Average/StdReturn 20.3495 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5451 +GaussianMLPPolicy/KLAfter 0.00166313 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.62553e-05 +GaussianMLPPolicy/LossBefore -4.93228e-09 +GaussianMLPPolicy/dLoss -2.62602e-05 +Iteration 162 +MetaTest/Average/AverageDiscountedReturn -178.855 +MetaTest/Average/AverageReturn -178.855 +MetaTest/Average/Iteration 162 +MetaTest/Average/MaxReturn -154.415 +MetaTest/Average/MinReturn -229.19 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.0371 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.855 +MetaTest/__unnamed_task__/AverageReturn -178.855 +MetaTest/__unnamed_task__/Iteration 162 +MetaTest/__unnamed_task__/MaxReturn -154.415 +MetaTest/__unnamed_task__/MinReturn -229.19 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.0371 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.216e+06 +__unnamed_task__/AverageDiscountedReturn -74.6289 +__unnamed_task__/AverageReturn -176.919 +__unnamed_task__/Iteration 162 +__unnamed_task__/MaxReturn -152.668 +__unnamed_task__/MinReturn -238.404 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.3495 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 06:56:00 | [maml_trainer] epoch #163 | Sampling for adapation and meta-testing... +2025-04-03 06:57:14 | [maml_trainer] epoch #163 | Finished meta-testing... +2025-04-03 06:57:14 | [maml_trainer] epoch #163 | Saving snapshot... +2025-04-03 06:57:36 | [maml_trainer] epoch #163 | Saved +2025-04-03 06:57:36 | [maml_trainer] epoch #163 | Time 62534.15 s +2025-04-03 06:57:36 | [maml_trainer] epoch #163 | EpochTime 392.48 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.4912 +Average/AverageReturn -174.686 +Average/Iteration 163 +Average/MaxReturn -150.693 +Average/MinReturn -241.382 +Average/NumEpisodes 80 +Average/StdReturn 17.9196 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5424 +GaussianMLPPolicy/KLAfter 0.00172704 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.21231e-05 +GaussianMLPPolicy/LossBefore 1.78814e-09 +GaussianMLPPolicy/dLoss -1.21213e-05 +Iteration 163 +MetaTest/Average/AverageDiscountedReturn -168.363 +MetaTest/Average/AverageReturn -168.363 +MetaTest/Average/Iteration 163 +MetaTest/Average/MaxReturn -156.076 +MetaTest/Average/MinReturn -193.236 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.639 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.363 +MetaTest/__unnamed_task__/AverageReturn -168.363 +MetaTest/__unnamed_task__/Iteration 163 +MetaTest/__unnamed_task__/MaxReturn -156.076 +MetaTest/__unnamed_task__/MinReturn -193.236 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.639 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.248e+06 +__unnamed_task__/AverageDiscountedReturn -73.4912 +__unnamed_task__/AverageReturn -174.686 +__unnamed_task__/Iteration 163 +__unnamed_task__/MaxReturn -150.693 +__unnamed_task__/MinReturn -241.382 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9196 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:02:27 | [maml_trainer] epoch #164 | Sampling for adapation and meta-testing... +2025-04-03 07:03:40 | [maml_trainer] epoch #164 | Finished meta-testing... +2025-04-03 07:03:40 | [maml_trainer] epoch #164 | Saving snapshot... +2025-04-03 07:04:03 | [maml_trainer] epoch #164 | Saved +2025-04-03 07:04:03 | [maml_trainer] epoch #164 | Time 62921.83 s +2025-04-03 07:04:03 | [maml_trainer] epoch #164 | EpochTime 387.67 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2915 +Average/AverageReturn -173.701 +Average/Iteration 164 +Average/MaxReturn -151.269 +Average/MinReturn -251.281 +Average/NumEpisodes 80 +Average/StdReturn 18.4208 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5408 +GaussianMLPPolicy/KLAfter 0.00173097 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.00863e-05 +GaussianMLPPolicy/LossBefore -5.88596e-09 +GaussianMLPPolicy/dLoss -2.00921e-05 +Iteration 164 +MetaTest/Average/AverageDiscountedReturn -171.092 +MetaTest/Average/AverageReturn -171.092 +MetaTest/Average/Iteration 164 +MetaTest/Average/MaxReturn -156.978 +MetaTest/Average/MinReturn -217.916 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.3894 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.092 +MetaTest/__unnamed_task__/AverageReturn -171.092 +MetaTest/__unnamed_task__/Iteration 164 +MetaTest/__unnamed_task__/MaxReturn -156.978 +MetaTest/__unnamed_task__/MinReturn -217.916 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.3894 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.28e+06 +__unnamed_task__/AverageDiscountedReturn -73.2915 +__unnamed_task__/AverageReturn -173.701 +__unnamed_task__/Iteration 164 +__unnamed_task__/MaxReturn -151.269 +__unnamed_task__/MinReturn -251.281 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.4208 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:08:57 | [maml_trainer] epoch #165 | Sampling for adapation and meta-testing... +2025-04-03 07:10:11 | [maml_trainer] epoch #165 | Finished meta-testing... +2025-04-03 07:10:11 | [maml_trainer] epoch #165 | Saving snapshot... +2025-04-03 07:10:34 | [maml_trainer] epoch #165 | Saved +2025-04-03 07:10:34 | [maml_trainer] epoch #165 | Time 63312.80 s +2025-04-03 07:10:34 | [maml_trainer] epoch #165 | EpochTime 390.97 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.3959 +Average/AverageReturn -173.923 +Average/Iteration 165 +Average/MaxReturn -153.445 +Average/MinReturn -256.195 +Average/NumEpisodes 80 +Average/StdReturn 17.7702 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5384 +GaussianMLPPolicy/KLAfter 0.00174377 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.39255e-07 +GaussianMLPPolicy/LossBefore -5.1558e-09 +GaussianMLPPolicy/dLoss -3.44411e-07 +Iteration 165 +MetaTest/Average/AverageDiscountedReturn -170.165 +MetaTest/Average/AverageReturn -170.165 +MetaTest/Average/Iteration 165 +MetaTest/Average/MaxReturn -154.761 +MetaTest/Average/MinReturn -206.604 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.0798 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.165 +MetaTest/__unnamed_task__/AverageReturn -170.165 +MetaTest/__unnamed_task__/Iteration 165 +MetaTest/__unnamed_task__/MaxReturn -154.761 +MetaTest/__unnamed_task__/MinReturn -206.604 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.0798 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.312e+06 +__unnamed_task__/AverageDiscountedReturn -73.3959 +__unnamed_task__/AverageReturn -173.923 +__unnamed_task__/Iteration 165 +__unnamed_task__/MaxReturn -153.445 +__unnamed_task__/MinReturn -256.195 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.7702 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:15:27 | [maml_trainer] epoch #166 | Sampling for adapation and meta-testing... +2025-04-03 07:16:41 | [maml_trainer] epoch #166 | Finished meta-testing... +2025-04-03 07:16:41 | [maml_trainer] epoch #166 | Saving snapshot... +2025-04-03 07:17:03 | [maml_trainer] epoch #166 | Saved +2025-04-03 07:17:03 | [maml_trainer] epoch #166 | Time 63702.08 s +2025-04-03 07:17:03 | [maml_trainer] epoch #166 | EpochTime 389.27 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.4248 +Average/AverageReturn -173.825 +Average/Iteration 166 +Average/MaxReturn -151.972 +Average/MinReturn -245.283 +Average/NumEpisodes 80 +Average/StdReturn 21.0152 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5354 +GaussianMLPPolicy/KLAfter 0.00151268 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.93853e-05 +GaussianMLPPolicy/LossBefore 1.86265e-09 +GaussianMLPPolicy/dLoss -1.93834e-05 +Iteration 166 +MetaTest/Average/AverageDiscountedReturn -167.017 +MetaTest/Average/AverageReturn -167.017 +MetaTest/Average/Iteration 166 +MetaTest/Average/MaxReturn -157.995 +MetaTest/Average/MinReturn -189.739 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.99631 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.017 +MetaTest/__unnamed_task__/AverageReturn -167.017 +MetaTest/__unnamed_task__/Iteration 166 +MetaTest/__unnamed_task__/MaxReturn -157.995 +MetaTest/__unnamed_task__/MinReturn -189.739 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.99631 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.344e+06 +__unnamed_task__/AverageDiscountedReturn -73.4248 +__unnamed_task__/AverageReturn -173.825 +__unnamed_task__/Iteration 166 +__unnamed_task__/MaxReturn -151.972 +__unnamed_task__/MinReturn -245.283 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.0152 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:21:56 | [maml_trainer] epoch #167 | Sampling for adapation and meta-testing... +2025-04-03 07:23:10 | [maml_trainer] epoch #167 | Finished meta-testing... +2025-04-03 07:23:10 | [maml_trainer] epoch #167 | Saving snapshot... +2025-04-03 07:23:33 | [maml_trainer] epoch #167 | Saved +2025-04-03 07:23:33 | [maml_trainer] epoch #167 | Time 64091.38 s +2025-04-03 07:23:33 | [maml_trainer] epoch #167 | EpochTime 389.30 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3533 +Average/AverageReturn -172.026 +Average/Iteration 167 +Average/MaxReturn -154.826 +Average/MinReturn -216.333 +Average/NumEpisodes 80 +Average/StdReturn 12.9364 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5298 +GaussianMLPPolicy/KLAfter 0.00150832 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.21639e-06 +GaussianMLPPolicy/LossBefore 5.45382e-09 +GaussianMLPPolicy/dLoss 5.22184e-06 +Iteration 167 +MetaTest/Average/AverageDiscountedReturn -169.664 +MetaTest/Average/AverageReturn -169.664 +MetaTest/Average/Iteration 167 +MetaTest/Average/MaxReturn -154.66 +MetaTest/Average/MinReturn -190.871 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.5899 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.664 +MetaTest/__unnamed_task__/AverageReturn -169.664 +MetaTest/__unnamed_task__/Iteration 167 +MetaTest/__unnamed_task__/MaxReturn -154.66 +MetaTest/__unnamed_task__/MinReturn -190.871 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.5899 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.376e+06 +__unnamed_task__/AverageDiscountedReturn -72.3533 +__unnamed_task__/AverageReturn -172.026 +__unnamed_task__/Iteration 167 +__unnamed_task__/MaxReturn -154.826 +__unnamed_task__/MinReturn -216.333 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.9364 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:28:26 | [maml_trainer] epoch #168 | Sampling for adapation and meta-testing... +2025-04-03 07:29:39 | [maml_trainer] epoch #168 | Finished meta-testing... +2025-04-03 07:29:39 | [maml_trainer] epoch #168 | Saving snapshot... +2025-04-03 07:30:03 | [maml_trainer] epoch #168 | Saved +2025-04-03 07:30:03 | [maml_trainer] epoch #168 | Time 64481.27 s +2025-04-03 07:30:03 | [maml_trainer] epoch #168 | EpochTime 389.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8505 +Average/AverageReturn -172.569 +Average/Iteration 168 +Average/MaxReturn -151.699 +Average/MinReturn -226.949 +Average/NumEpisodes 80 +Average/StdReturn 17.8132 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5248 +GaussianMLPPolicy/KLAfter 0.0014005 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.33716e-07 +GaussianMLPPolicy/LossBefore 1.2964e-09 +GaussianMLPPolicy/dLoss -6.32419e-07 +Iteration 168 +MetaTest/Average/AverageDiscountedReturn -173.159 +MetaTest/Average/AverageReturn -173.159 +MetaTest/Average/Iteration 168 +MetaTest/Average/MaxReturn -153.98 +MetaTest/Average/MinReturn -201.91 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.3046 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.159 +MetaTest/__unnamed_task__/AverageReturn -173.159 +MetaTest/__unnamed_task__/Iteration 168 +MetaTest/__unnamed_task__/MaxReturn -153.98 +MetaTest/__unnamed_task__/MinReturn -201.91 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.3046 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.408e+06 +__unnamed_task__/AverageDiscountedReturn -72.8505 +__unnamed_task__/AverageReturn -172.569 +__unnamed_task__/Iteration 168 +__unnamed_task__/MaxReturn -151.699 +__unnamed_task__/MinReturn -226.949 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.8132 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:34:57 | [maml_trainer] epoch #169 | Sampling for adapation and meta-testing... +2025-04-03 07:36:10 | [maml_trainer] epoch #169 | Finished meta-testing... +2025-04-03 07:36:10 | [maml_trainer] epoch #169 | Saving snapshot... +2025-04-03 07:36:34 | [maml_trainer] epoch #169 | Saved +2025-04-03 07:36:34 | [maml_trainer] epoch #169 | Time 64872.16 s +2025-04-03 07:36:34 | [maml_trainer] epoch #169 | EpochTime 390.89 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.147 +Average/AverageReturn -170.706 +Average/Iteration 169 +Average/MaxReturn -151.525 +Average/MinReturn -243.656 +Average/NumEpisodes 80 +Average/StdReturn 14.0192 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5202 +GaussianMLPPolicy/KLAfter 0.00151264 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.43144e-05 +GaussianMLPPolicy/LossBefore 9.0152e-09 +GaussianMLPPolicy/dLoss 5.43234e-05 +Iteration 169 +MetaTest/Average/AverageDiscountedReturn -171.348 +MetaTest/Average/AverageReturn -171.348 +MetaTest/Average/Iteration 169 +MetaTest/Average/MaxReturn -158.326 +MetaTest/Average/MinReturn -196.512 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 9.22564 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.348 +MetaTest/__unnamed_task__/AverageReturn -171.348 +MetaTest/__unnamed_task__/Iteration 169 +MetaTest/__unnamed_task__/MaxReturn -158.326 +MetaTest/__unnamed_task__/MinReturn -196.512 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 9.22564 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.44e+06 +__unnamed_task__/AverageDiscountedReturn -72.147 +__unnamed_task__/AverageReturn -170.706 +__unnamed_task__/Iteration 169 +__unnamed_task__/MaxReturn -151.525 +__unnamed_task__/MinReturn -243.656 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.0192 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:41:28 | [maml_trainer] epoch #170 | Sampling for adapation and meta-testing... +2025-04-03 07:42:39 | [maml_trainer] epoch #170 | Finished meta-testing... +2025-04-03 07:42:39 | [maml_trainer] epoch #170 | Saving snapshot... +2025-04-03 07:43:02 | [maml_trainer] epoch #170 | Saved +2025-04-03 07:43:02 | [maml_trainer] epoch #170 | Time 65260.81 s +2025-04-03 07:43:02 | [maml_trainer] epoch #170 | EpochTime 388.65 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.518 +Average/AverageReturn -172.202 +Average/Iteration 170 +Average/MaxReturn -150.618 +Average/MinReturn -234.759 +Average/NumEpisodes 80 +Average/StdReturn 15.2705 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5155 +GaussianMLPPolicy/KLAfter 0.00137873 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.28372e-05 +GaussianMLPPolicy/LossBefore 6.78003e-09 +GaussianMLPPolicy/dLoss -3.28304e-05 +Iteration 170 +MetaTest/Average/AverageDiscountedReturn -171.273 +MetaTest/Average/AverageReturn -171.273 +MetaTest/Average/Iteration 170 +MetaTest/Average/MaxReturn -157.154 +MetaTest/Average/MinReturn -226.254 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.1135 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.273 +MetaTest/__unnamed_task__/AverageReturn -171.273 +MetaTest/__unnamed_task__/Iteration 170 +MetaTest/__unnamed_task__/MaxReturn -157.154 +MetaTest/__unnamed_task__/MinReturn -226.254 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.1135 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.472e+06 +__unnamed_task__/AverageDiscountedReturn -72.518 +__unnamed_task__/AverageReturn -172.202 +__unnamed_task__/Iteration 170 +__unnamed_task__/MaxReturn -150.618 +__unnamed_task__/MinReturn -234.759 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.2705 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:47:54 | [maml_trainer] epoch #171 | Sampling for adapation and meta-testing... +2025-04-03 07:49:08 | [maml_trainer] epoch #171 | Finished meta-testing... +2025-04-03 07:49:08 | [maml_trainer] epoch #171 | Saving snapshot... +2025-04-03 07:49:30 | [maml_trainer] epoch #171 | Saved +2025-04-03 07:49:30 | [maml_trainer] epoch #171 | Time 65648.70 s +2025-04-03 07:49:30 | [maml_trainer] epoch #171 | EpochTime 387.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.0511 +Average/AverageReturn -170.612 +Average/Iteration 171 +Average/MaxReturn -148.212 +Average/MinReturn -242.477 +Average/NumEpisodes 80 +Average/StdReturn 15.505 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5101 +GaussianMLPPolicy/KLAfter 0.00172038 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.3736e-05 +GaussianMLPPolicy/LossBefore 5.31971e-09 +GaussianMLPPolicy/dLoss -1.37307e-05 +Iteration 171 +MetaTest/Average/AverageDiscountedReturn -175.701 +MetaTest/Average/AverageReturn -175.701 +MetaTest/Average/Iteration 171 +MetaTest/Average/MaxReturn -152.285 +MetaTest/Average/MinReturn -234.257 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.1534 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.701 +MetaTest/__unnamed_task__/AverageReturn -175.701 +MetaTest/__unnamed_task__/Iteration 171 +MetaTest/__unnamed_task__/MaxReturn -152.285 +MetaTest/__unnamed_task__/MinReturn -234.257 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.1534 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.504e+06 +__unnamed_task__/AverageDiscountedReturn -72.0511 +__unnamed_task__/AverageReturn -170.612 +__unnamed_task__/Iteration 171 +__unnamed_task__/MaxReturn -148.212 +__unnamed_task__/MinReturn -242.477 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.505 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 07:54:22 | [maml_trainer] epoch #172 | Sampling for adapation and meta-testing... +2025-04-03 07:55:35 | [maml_trainer] epoch #172 | Finished meta-testing... +2025-04-03 07:55:35 | [maml_trainer] epoch #172 | Saving snapshot... +2025-04-03 07:55:59 | [maml_trainer] epoch #172 | Saved +2025-04-03 07:55:59 | [maml_trainer] epoch #172 | Time 66037.36 s +2025-04-03 07:55:59 | [maml_trainer] epoch #172 | EpochTime 388.66 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7541 +Average/AverageReturn -170.069 +Average/Iteration 172 +Average/MaxReturn -154.543 +Average/MinReturn -232.258 +Average/NumEpisodes 80 +Average/StdReturn 12.8163 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5056 +GaussianMLPPolicy/KLAfter 0.00146552 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.6122e-05 +GaussianMLPPolicy/LossBefore 6.64592e-09 +GaussianMLPPolicy/dLoss -2.61154e-05 +Iteration 172 +MetaTest/Average/AverageDiscountedReturn -183.157 +MetaTest/Average/AverageReturn -183.157 +MetaTest/Average/Iteration 172 +MetaTest/Average/MaxReturn -158.461 +MetaTest/Average/MinReturn -243.993 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.0942 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -183.157 +MetaTest/__unnamed_task__/AverageReturn -183.157 +MetaTest/__unnamed_task__/Iteration 172 +MetaTest/__unnamed_task__/MaxReturn -158.461 +MetaTest/__unnamed_task__/MinReturn -243.993 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.0942 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.536e+06 +__unnamed_task__/AverageDiscountedReturn -71.7541 +__unnamed_task__/AverageReturn -170.069 +__unnamed_task__/Iteration 172 +__unnamed_task__/MaxReturn -154.543 +__unnamed_task__/MinReturn -232.258 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.8163 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:00:53 | [maml_trainer] epoch #173 | Sampling for adapation and meta-testing... +2025-04-03 08:02:07 | [maml_trainer] epoch #173 | Finished meta-testing... +2025-04-03 08:02:07 | [maml_trainer] epoch #173 | Saving snapshot... +2025-04-03 08:02:30 | [maml_trainer] epoch #173 | Saved +2025-04-03 08:02:30 | [maml_trainer] epoch #173 | Time 66428.43 s +2025-04-03 08:02:30 | [maml_trainer] epoch #173 | EpochTime 391.07 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.7435 +Average/AverageReturn -174.36 +Average/Iteration 173 +Average/MaxReturn -148.15 +Average/MinReturn -258.353 +Average/NumEpisodes 80 +Average/StdReturn 20.1422 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.5008 +GaussianMLPPolicy/KLAfter 0.00125188 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.02547e-06 +GaussianMLPPolicy/LossBefore 3.8743e-09 +GaussianMLPPolicy/dLoss -2.0216e-06 +Iteration 173 +MetaTest/Average/AverageDiscountedReturn -175.876 +MetaTest/Average/AverageReturn -175.876 +MetaTest/Average/Iteration 173 +MetaTest/Average/MaxReturn -160.353 +MetaTest/Average/MinReturn -215.5 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.9308 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.876 +MetaTest/__unnamed_task__/AverageReturn -175.876 +MetaTest/__unnamed_task__/Iteration 173 +MetaTest/__unnamed_task__/MaxReturn -160.353 +MetaTest/__unnamed_task__/MinReturn -215.5 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.9308 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.568e+06 +__unnamed_task__/AverageDiscountedReturn -73.7435 +__unnamed_task__/AverageReturn -174.36 +__unnamed_task__/Iteration 173 +__unnamed_task__/MaxReturn -148.15 +__unnamed_task__/MinReturn -258.353 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.1422 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:07:27 | [maml_trainer] epoch #174 | Sampling for adapation and meta-testing... +2025-04-03 08:08:40 | [maml_trainer] epoch #174 | Finished meta-testing... +2025-04-03 08:08:40 | [maml_trainer] epoch #174 | Saving snapshot... +2025-04-03 08:09:03 | [maml_trainer] epoch #174 | Saved +2025-04-03 08:09:03 | [maml_trainer] epoch #174 | Time 66821.67 s +2025-04-03 08:09:03 | [maml_trainer] epoch #174 | EpochTime 393.24 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.2068 +Average/AverageReturn -170.899 +Average/Iteration 174 +Average/MaxReturn -147.107 +Average/MinReturn -226.985 +Average/NumEpisodes 80 +Average/StdReturn 13.8869 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4987 +GaussianMLPPolicy/KLAfter 0.00231777 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.42853e-07 +GaussianMLPPolicy/LossBefore -1.54972e-09 +GaussianMLPPolicy/dLoss 7.41303e-07 +Iteration 174 +MetaTest/Average/AverageDiscountedReturn -169.274 +MetaTest/Average/AverageReturn -169.274 +MetaTest/Average/Iteration 174 +MetaTest/Average/MaxReturn -154.813 +MetaTest/Average/MinReturn -226.814 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.1204 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.274 +MetaTest/__unnamed_task__/AverageReturn -169.274 +MetaTest/__unnamed_task__/Iteration 174 +MetaTest/__unnamed_task__/MaxReturn -154.813 +MetaTest/__unnamed_task__/MinReturn -226.814 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.1204 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.6e+06 +__unnamed_task__/AverageDiscountedReturn -72.2068 +__unnamed_task__/AverageReturn -170.899 +__unnamed_task__/Iteration 174 +__unnamed_task__/MaxReturn -147.107 +__unnamed_task__/MinReturn -226.985 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.8869 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:13:59 | [maml_trainer] epoch #175 | Sampling for adapation and meta-testing... +2025-04-03 08:15:12 | [maml_trainer] epoch #175 | Finished meta-testing... +2025-04-03 08:15:12 | [maml_trainer] epoch #175 | Saving snapshot... +2025-04-03 08:15:34 | [maml_trainer] epoch #175 | Saved +2025-04-03 08:15:34 | [maml_trainer] epoch #175 | Time 67212.45 s +2025-04-03 08:15:34 | [maml_trainer] epoch #175 | EpochTime 390.78 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.171 +Average/AverageReturn -171.085 +Average/Iteration 175 +Average/MaxReturn -153.963 +Average/MinReturn -240.836 +Average/NumEpisodes 80 +Average/StdReturn 16.6219 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4979 +GaussianMLPPolicy/KLAfter 0.00175387 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.56659e-07 +GaussianMLPPolicy/LossBefore -6.63102e-09 +GaussianMLPPolicy/dLoss -3.6329e-07 +Iteration 175 +MetaTest/Average/AverageDiscountedReturn -169.554 +MetaTest/Average/AverageReturn -169.554 +MetaTest/Average/Iteration 175 +MetaTest/Average/MaxReturn -154.132 +MetaTest/Average/MinReturn -199.359 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.5053 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.554 +MetaTest/__unnamed_task__/AverageReturn -169.554 +MetaTest/__unnamed_task__/Iteration 175 +MetaTest/__unnamed_task__/MaxReturn -154.132 +MetaTest/__unnamed_task__/MinReturn -199.359 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.5053 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.632e+06 +__unnamed_task__/AverageDiscountedReturn -72.171 +__unnamed_task__/AverageReturn -171.085 +__unnamed_task__/Iteration 175 +__unnamed_task__/MaxReturn -153.963 +__unnamed_task__/MinReturn -240.836 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.6219 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:20:27 | [maml_trainer] epoch #176 | Sampling for adapation and meta-testing... +2025-04-03 08:21:41 | [maml_trainer] epoch #176 | Finished meta-testing... +2025-04-03 08:21:41 | [maml_trainer] epoch #176 | Saving snapshot... +2025-04-03 08:22:04 | [maml_trainer] epoch #176 | Saved +2025-04-03 08:22:04 | [maml_trainer] epoch #176 | Time 67602.83 s +2025-04-03 08:22:04 | [maml_trainer] epoch #176 | EpochTime 390.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.8834 +Average/AverageReturn -170.664 +Average/Iteration 176 +Average/MaxReturn -151.117 +Average/MinReturn -229.187 +Average/NumEpisodes 80 +Average/StdReturn 14.8016 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4968 +GaussianMLPPolicy/KLAfter 0.00176253 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.46393e-05 +GaussianMLPPolicy/LossBefore -2.23517e-09 +GaussianMLPPolicy/dLoss -1.46416e-05 +Iteration 176 +MetaTest/Average/AverageDiscountedReturn -177.527 +MetaTest/Average/AverageReturn -177.527 +MetaTest/Average/Iteration 176 +MetaTest/Average/MaxReturn -159.755 +MetaTest/Average/MinReturn -228.124 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.9711 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.527 +MetaTest/__unnamed_task__/AverageReturn -177.527 +MetaTest/__unnamed_task__/Iteration 176 +MetaTest/__unnamed_task__/MaxReturn -159.755 +MetaTest/__unnamed_task__/MinReturn -228.124 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.9711 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.664e+06 +__unnamed_task__/AverageDiscountedReturn -71.8834 +__unnamed_task__/AverageReturn -170.664 +__unnamed_task__/Iteration 176 +__unnamed_task__/MaxReturn -151.117 +__unnamed_task__/MinReturn -229.187 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.8016 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:26:55 | [maml_trainer] epoch #177 | Sampling for adapation and meta-testing... +2025-04-03 08:28:07 | [maml_trainer] epoch #177 | Finished meta-testing... +2025-04-03 08:28:07 | [maml_trainer] epoch #177 | Saving snapshot... +2025-04-03 08:28:30 | [maml_trainer] epoch #177 | Saved +2025-04-03 08:28:30 | [maml_trainer] epoch #177 | Time 67988.95 s +2025-04-03 08:28:30 | [maml_trainer] epoch #177 | EpochTime 386.11 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3954 +Average/AverageReturn -171.205 +Average/Iteration 177 +Average/MaxReturn -153.12 +Average/MinReturn -253.189 +Average/NumEpisodes 80 +Average/StdReturn 18.4679 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4957 +GaussianMLPPolicy/KLAfter 0.00276101 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.91321e-05 +GaussianMLPPolicy/LossBefore 1.07288e-09 +GaussianMLPPolicy/dLoss -2.9131e-05 +Iteration 177 +MetaTest/Average/AverageDiscountedReturn -177.694 +MetaTest/Average/AverageReturn -177.694 +MetaTest/Average/Iteration 177 +MetaTest/Average/MaxReturn -155.041 +MetaTest/Average/MinReturn -243.764 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.2894 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.694 +MetaTest/__unnamed_task__/AverageReturn -177.694 +MetaTest/__unnamed_task__/Iteration 177 +MetaTest/__unnamed_task__/MaxReturn -155.041 +MetaTest/__unnamed_task__/MinReturn -243.764 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.2894 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.696e+06 +__unnamed_task__/AverageDiscountedReturn -72.3954 +__unnamed_task__/AverageReturn -171.205 +__unnamed_task__/Iteration 177 +__unnamed_task__/MaxReturn -153.12 +__unnamed_task__/MinReturn -253.189 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.4679 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:33:22 | [maml_trainer] epoch #178 | Sampling for adapation and meta-testing... +2025-04-03 08:34:35 | [maml_trainer] epoch #178 | Finished meta-testing... +2025-04-03 08:34:35 | [maml_trainer] epoch #178 | Saving snapshot... +2025-04-03 08:34:58 | [maml_trainer] epoch #178 | Saved +2025-04-03 08:34:58 | [maml_trainer] epoch #178 | Time 68377.01 s +2025-04-03 08:34:58 | [maml_trainer] epoch #178 | EpochTime 388.06 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5902 +Average/AverageReturn -172.167 +Average/Iteration 178 +Average/MaxReturn -150.959 +Average/MinReturn -236.373 +Average/NumEpisodes 80 +Average/StdReturn 16.1428 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4946 +GaussianMLPPolicy/KLAfter 0.0021574 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.93925e-05 +GaussianMLPPolicy/LossBefore -5.70714e-09 +GaussianMLPPolicy/dLoss -1.93982e-05 +Iteration 178 +MetaTest/Average/AverageDiscountedReturn -171.513 +MetaTest/Average/AverageReturn -171.513 +MetaTest/Average/Iteration 178 +MetaTest/Average/MaxReturn -157.451 +MetaTest/Average/MinReturn -232.141 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.3887 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.513 +MetaTest/__unnamed_task__/AverageReturn -171.513 +MetaTest/__unnamed_task__/Iteration 178 +MetaTest/__unnamed_task__/MaxReturn -157.451 +MetaTest/__unnamed_task__/MinReturn -232.141 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.3887 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.728e+06 +__unnamed_task__/AverageDiscountedReturn -72.5902 +__unnamed_task__/AverageReturn -172.167 +__unnamed_task__/Iteration 178 +__unnamed_task__/MaxReturn -150.959 +__unnamed_task__/MinReturn -236.373 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.1428 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:39:52 | [maml_trainer] epoch #179 | Sampling for adapation and meta-testing... +2025-04-03 08:41:05 | [maml_trainer] epoch #179 | Finished meta-testing... +2025-04-03 08:41:05 | [maml_trainer] epoch #179 | Saving snapshot... +2025-04-03 08:41:27 | [maml_trainer] epoch #179 | Saved +2025-04-03 08:41:27 | [maml_trainer] epoch #179 | Time 68765.55 s +2025-04-03 08:41:27 | [maml_trainer] epoch #179 | EpochTime 388.53 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.8009 +Average/AverageReturn -174.381 +Average/Iteration 179 +Average/MaxReturn -152.794 +Average/MinReturn -271.861 +Average/NumEpisodes 80 +Average/StdReturn 21.2488 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4931 +GaussianMLPPolicy/KLAfter 0.00356276 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.97071e-05 +GaussianMLPPolicy/LossBefore -4.47035e-09 +GaussianMLPPolicy/dLoss 6.97027e-05 +Iteration 179 +MetaTest/Average/AverageDiscountedReturn -167.893 +MetaTest/Average/AverageReturn -167.893 +MetaTest/Average/Iteration 179 +MetaTest/Average/MaxReturn -149.93 +MetaTest/Average/MinReturn -194.483 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 10.7541 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.893 +MetaTest/__unnamed_task__/AverageReturn -167.893 +MetaTest/__unnamed_task__/Iteration 179 +MetaTest/__unnamed_task__/MaxReturn -149.93 +MetaTest/__unnamed_task__/MinReturn -194.483 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 10.7541 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.76e+06 +__unnamed_task__/AverageDiscountedReturn -73.8009 +__unnamed_task__/AverageReturn -174.381 +__unnamed_task__/Iteration 179 +__unnamed_task__/MaxReturn -152.794 +__unnamed_task__/MinReturn -271.861 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.2488 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:46:20 | [maml_trainer] epoch #180 | Sampling for adapation and meta-testing... +2025-04-03 08:47:34 | [maml_trainer] epoch #180 | Finished meta-testing... +2025-04-03 08:47:34 | [maml_trainer] epoch #180 | Saving snapshot... +2025-04-03 08:47:57 | [maml_trainer] epoch #180 | Saved +2025-04-03 08:47:57 | [maml_trainer] epoch #180 | Time 69155.68 s +2025-04-03 08:47:57 | [maml_trainer] epoch #180 | EpochTime 390.13 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7091 +Average/AverageReturn -170.086 +Average/Iteration 180 +Average/MaxReturn -149.526 +Average/MinReturn -230.559 +Average/NumEpisodes 80 +Average/StdReturn 15.5262 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4929 +GaussianMLPPolicy/KLAfter 0.00724982 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.02789e-05 +GaussianMLPPolicy/LossBefore -4.58956e-09 +GaussianMLPPolicy/dLoss -1.02835e-05 +Iteration 180 +MetaTest/Average/AverageDiscountedReturn -172.942 +MetaTest/Average/AverageReturn -172.942 +MetaTest/Average/Iteration 180 +MetaTest/Average/MaxReturn -153.956 +MetaTest/Average/MinReturn -231.467 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.1353 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.942 +MetaTest/__unnamed_task__/AverageReturn -172.942 +MetaTest/__unnamed_task__/Iteration 180 +MetaTest/__unnamed_task__/MaxReturn -153.956 +MetaTest/__unnamed_task__/MinReturn -231.467 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.1353 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.792e+06 +__unnamed_task__/AverageDiscountedReturn -71.7091 +__unnamed_task__/AverageReturn -170.086 +__unnamed_task__/Iteration 180 +__unnamed_task__/MaxReturn -149.526 +__unnamed_task__/MinReturn -230.559 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.5262 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:52:53 | [maml_trainer] epoch #181 | Sampling for adapation and meta-testing... +2025-04-03 08:54:07 | [maml_trainer] epoch #181 | Finished meta-testing... +2025-04-03 08:54:07 | [maml_trainer] epoch #181 | Saving snapshot... +2025-04-03 08:54:31 | [maml_trainer] epoch #181 | Saved +2025-04-03 08:54:31 | [maml_trainer] epoch #181 | Time 69549.73 s +2025-04-03 08:54:31 | [maml_trainer] epoch #181 | EpochTime 394.05 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.6075 +Average/AverageReturn -174.671 +Average/Iteration 181 +Average/MaxReturn -146.738 +Average/MinReturn -249.085 +Average/NumEpisodes 80 +Average/StdReturn 21.4989 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.492 +GaussianMLPPolicy/KLAfter 0.00738738 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.11035e-05 +GaussianMLPPolicy/LossBefore -1.80304e-09 +GaussianMLPPolicy/dLoss 1.11017e-05 +Iteration 181 +MetaTest/Average/AverageDiscountedReturn -171.351 +MetaTest/Average/AverageReturn -171.351 +MetaTest/Average/Iteration 181 +MetaTest/Average/MaxReturn -155.473 +MetaTest/Average/MinReturn -229.96 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.9294 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.351 +MetaTest/__unnamed_task__/AverageReturn -171.351 +MetaTest/__unnamed_task__/Iteration 181 +MetaTest/__unnamed_task__/MaxReturn -155.473 +MetaTest/__unnamed_task__/MinReturn -229.96 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.9294 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.824e+06 +__unnamed_task__/AverageDiscountedReturn -73.6075 +__unnamed_task__/AverageReturn -174.671 +__unnamed_task__/Iteration 181 +__unnamed_task__/MaxReturn -146.738 +__unnamed_task__/MinReturn -249.085 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.4989 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 08:59:15 | [maml_trainer] epoch #182 | Sampling for adapation and meta-testing... +2025-04-03 09:00:24 | [maml_trainer] epoch #182 | Finished meta-testing... +2025-04-03 09:00:24 | [maml_trainer] epoch #182 | Saving snapshot... +2025-04-03 09:00:47 | [maml_trainer] epoch #182 | Saved +2025-04-03 09:00:47 | [maml_trainer] epoch #182 | Time 69925.59 s +2025-04-03 09:00:47 | [maml_trainer] epoch #182 | EpochTime 375.86 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.9379 +Average/AverageReturn -168.157 +Average/Iteration 182 +Average/MaxReturn -146.789 +Average/MinReturn -224.853 +Average/NumEpisodes 80 +Average/StdReturn 12.0783 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4897 +GaussianMLPPolicy/KLAfter 0.00855258 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.22132e-05 +GaussianMLPPolicy/LossBefore -7.18236e-09 +GaussianMLPPolicy/dLoss 4.2206e-05 +Iteration 182 +MetaTest/Average/AverageDiscountedReturn -175.199 +MetaTest/Average/AverageReturn -175.199 +MetaTest/Average/Iteration 182 +MetaTest/Average/MaxReturn -156.17 +MetaTest/Average/MinReturn -236.549 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.0463 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.199 +MetaTest/__unnamed_task__/AverageReturn -175.199 +MetaTest/__unnamed_task__/Iteration 182 +MetaTest/__unnamed_task__/MaxReturn -156.17 +MetaTest/__unnamed_task__/MinReturn -236.549 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.0463 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.856e+06 +__unnamed_task__/AverageDiscountedReturn -70.9379 +__unnamed_task__/AverageReturn -168.157 +__unnamed_task__/Iteration 182 +__unnamed_task__/MaxReturn -146.789 +__unnamed_task__/MinReturn -224.853 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.0783 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:05:28 | [maml_trainer] epoch #183 | Sampling for adapation and meta-testing... +2025-04-03 09:06:37 | [maml_trainer] epoch #183 | Finished meta-testing... +2025-04-03 09:06:37 | [maml_trainer] epoch #183 | Saving snapshot... +2025-04-03 09:06:59 | [maml_trainer] epoch #183 | Saved +2025-04-03 09:06:59 | [maml_trainer] epoch #183 | Time 70297.62 s +2025-04-03 09:06:59 | [maml_trainer] epoch #183 | EpochTime 372.03 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.5883 +Average/AverageReturn -174.274 +Average/Iteration 183 +Average/MaxReturn -145.691 +Average/MinReturn -267.17 +Average/NumEpisodes 80 +Average/StdReturn 20.811 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.488 +GaussianMLPPolicy/KLAfter 0.00824412 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.34861e-05 +GaussianMLPPolicy/LossBefore -4.08292e-09 +GaussianMLPPolicy/dLoss 6.3482e-05 +Iteration 183 +MetaTest/Average/AverageDiscountedReturn -168.428 +MetaTest/Average/AverageReturn -168.428 +MetaTest/Average/Iteration 183 +MetaTest/Average/MaxReturn -153.934 +MetaTest/Average/MinReturn -216.036 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.6535 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.428 +MetaTest/__unnamed_task__/AverageReturn -168.428 +MetaTest/__unnamed_task__/Iteration 183 +MetaTest/__unnamed_task__/MaxReturn -153.934 +MetaTest/__unnamed_task__/MinReturn -216.036 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.6535 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.888e+06 +__unnamed_task__/AverageDiscountedReturn -73.5883 +__unnamed_task__/AverageReturn -174.274 +__unnamed_task__/Iteration 183 +__unnamed_task__/MaxReturn -145.691 +__unnamed_task__/MinReturn -267.17 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.811 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:11:39 | [maml_trainer] epoch #184 | Sampling for adapation and meta-testing... +2025-04-03 09:12:48 | [maml_trainer] epoch #184 | Finished meta-testing... +2025-04-03 09:12:48 | [maml_trainer] epoch #184 | Saving snapshot... +2025-04-03 09:13:09 | [maml_trainer] epoch #184 | Saved +2025-04-03 09:13:09 | [maml_trainer] epoch #184 | Time 70667.50 s +2025-04-03 09:13:09 | [maml_trainer] epoch #184 | EpochTime 369.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.3471 +Average/AverageReturn -175.335 +Average/Iteration 184 +Average/MaxReturn -144.44 +Average/MinReturn -277.645 +Average/NumEpisodes 80 +Average/StdReturn 24.6848 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.489 +GaussianMLPPolicy/KLAfter 0.00599446 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.53933e-05 +GaussianMLPPolicy/LossBefore -6.70552e-10 +GaussianMLPPolicy/dLoss 2.53926e-05 +Iteration 184 +MetaTest/Average/AverageDiscountedReturn -169.928 +MetaTest/Average/AverageReturn -169.928 +MetaTest/Average/Iteration 184 +MetaTest/Average/MaxReturn -155.591 +MetaTest/Average/MinReturn -184.684 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 8.1298 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.928 +MetaTest/__unnamed_task__/AverageReturn -169.928 +MetaTest/__unnamed_task__/Iteration 184 +MetaTest/__unnamed_task__/MaxReturn -155.591 +MetaTest/__unnamed_task__/MinReturn -184.684 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 8.1298 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.92e+06 +__unnamed_task__/AverageDiscountedReturn -74.3471 +__unnamed_task__/AverageReturn -175.335 +__unnamed_task__/Iteration 184 +__unnamed_task__/MaxReturn -144.44 +__unnamed_task__/MinReturn -277.645 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.6848 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:17:47 | [maml_trainer] epoch #185 | Sampling for adapation and meta-testing... +2025-04-03 09:18:58 | [maml_trainer] epoch #185 | Finished meta-testing... +2025-04-03 09:18:58 | [maml_trainer] epoch #185 | Saving snapshot... +2025-04-03 09:19:20 | [maml_trainer] epoch #185 | Saved +2025-04-03 09:19:20 | [maml_trainer] epoch #185 | Time 71038.50 s +2025-04-03 09:19:20 | [maml_trainer] epoch #185 | EpochTime 370.99 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.7524 +Average/AverageReturn -175.695 +Average/Iteration 185 +Average/MaxReturn -148.309 +Average/MinReturn -248.643 +Average/NumEpisodes 80 +Average/StdReturn 22.3658 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4888 +GaussianMLPPolicy/KLAfter 0.00366192 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.8629e-05 +GaussianMLPPolicy/LossBefore 1.69873e-09 +GaussianMLPPolicy/dLoss -2.86273e-05 +Iteration 185 +MetaTest/Average/AverageDiscountedReturn -170.45 +MetaTest/Average/AverageReturn -170.45 +MetaTest/Average/Iteration 185 +MetaTest/Average/MaxReturn -156.224 +MetaTest/Average/MinReturn -213.768 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.4197 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.45 +MetaTest/__unnamed_task__/AverageReturn -170.45 +MetaTest/__unnamed_task__/Iteration 185 +MetaTest/__unnamed_task__/MaxReturn -156.224 +MetaTest/__unnamed_task__/MinReturn -213.768 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.4197 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.952e+06 +__unnamed_task__/AverageDiscountedReturn -73.7524 +__unnamed_task__/AverageReturn -175.695 +__unnamed_task__/Iteration 185 +__unnamed_task__/MaxReturn -148.309 +__unnamed_task__/MinReturn -248.643 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.3658 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:24:00 | [maml_trainer] epoch #186 | Sampling for adapation and meta-testing... +2025-04-03 09:25:11 | [maml_trainer] epoch #186 | Finished meta-testing... +2025-04-03 09:25:11 | [maml_trainer] epoch #186 | Saving snapshot... +2025-04-03 09:25:33 | [maml_trainer] epoch #186 | Saved +2025-04-03 09:25:33 | [maml_trainer] epoch #186 | Time 71411.65 s +2025-04-03 09:25:33 | [maml_trainer] epoch #186 | EpochTime 373.15 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.0146 +Average/AverageReturn -173.808 +Average/Iteration 186 +Average/MaxReturn -152.332 +Average/MinReturn -492.943 +Average/NumEpisodes 80 +Average/StdReturn 38.3107 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4881 +GaussianMLPPolicy/KLAfter 0.00335624 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.20332e-06 +GaussianMLPPolicy/LossBefore -7.83801e-09 +GaussianMLPPolicy/dLoss 6.19549e-06 +Iteration 186 +MetaTest/Average/AverageDiscountedReturn -166.918 +MetaTest/Average/AverageReturn -166.918 +MetaTest/Average/Iteration 186 +MetaTest/Average/MaxReturn -143.005 +MetaTest/Average/MinReturn -222.703 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.5894 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.918 +MetaTest/__unnamed_task__/AverageReturn -166.918 +MetaTest/__unnamed_task__/Iteration 186 +MetaTest/__unnamed_task__/MaxReturn -143.005 +MetaTest/__unnamed_task__/MinReturn -222.703 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.5894 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 5.984e+06 +__unnamed_task__/AverageDiscountedReturn -75.0146 +__unnamed_task__/AverageReturn -173.808 +__unnamed_task__/Iteration 186 +__unnamed_task__/MaxReturn -152.332 +__unnamed_task__/MinReturn -492.943 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 38.3107 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:30:15 | [maml_trainer] epoch #187 | Sampling for adapation and meta-testing... +2025-04-03 09:31:23 | [maml_trainer] epoch #187 | Finished meta-testing... +2025-04-03 09:31:23 | [maml_trainer] epoch #187 | Saving snapshot... +2025-04-03 09:31:45 | [maml_trainer] epoch #187 | Saved +2025-04-03 09:31:45 | [maml_trainer] epoch #187 | Time 71784.02 s +2025-04-03 09:31:45 | [maml_trainer] epoch #187 | EpochTime 372.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4119 +Average/AverageReturn -170.07 +Average/Iteration 187 +Average/MaxReturn -149.881 +Average/MinReturn -238.823 +Average/NumEpisodes 80 +Average/StdReturn 15.5632 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4887 +GaussianMLPPolicy/KLAfter 0.00373477 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.06483e-05 +GaussianMLPPolicy/LossBefore 6.63102e-09 +GaussianMLPPolicy/dLoss -1.06417e-05 +Iteration 187 +MetaTest/Average/AverageDiscountedReturn -176.904 +MetaTest/Average/AverageReturn -176.904 +MetaTest/Average/Iteration 187 +MetaTest/Average/MaxReturn -158.841 +MetaTest/Average/MinReturn -230.662 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.9089 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.904 +MetaTest/__unnamed_task__/AverageReturn -176.904 +MetaTest/__unnamed_task__/Iteration 187 +MetaTest/__unnamed_task__/MaxReturn -158.841 +MetaTest/__unnamed_task__/MinReturn -230.662 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.9089 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.016e+06 +__unnamed_task__/AverageDiscountedReturn -71.4119 +__unnamed_task__/AverageReturn -170.07 +__unnamed_task__/Iteration 187 +__unnamed_task__/MaxReturn -149.881 +__unnamed_task__/MinReturn -238.823 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.5632 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:36:24 | [maml_trainer] epoch #188 | Sampling for adapation and meta-testing... +2025-04-03 09:37:34 | [maml_trainer] epoch #188 | Finished meta-testing... +2025-04-03 09:37:34 | [maml_trainer] epoch #188 | Saving snapshot... +2025-04-03 09:37:55 | [maml_trainer] epoch #188 | Saved +2025-04-03 09:37:55 | [maml_trainer] epoch #188 | Time 72153.85 s +2025-04-03 09:37:55 | [maml_trainer] epoch #188 | EpochTime 369.83 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.0107 +Average/AverageReturn -172.006 +Average/Iteration 188 +Average/MaxReturn -147.664 +Average/MinReturn -232.018 +Average/NumEpisodes 80 +Average/StdReturn 16.8066 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4867 +GaussianMLPPolicy/KLAfter 0.00311106 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 8.09215e-06 +GaussianMLPPolicy/LossBefore -1.43051e-09 +GaussianMLPPolicy/dLoss -8.09358e-06 +Iteration 188 +MetaTest/Average/AverageDiscountedReturn -170.913 +MetaTest/Average/AverageReturn -170.913 +MetaTest/Average/Iteration 188 +MetaTest/Average/MaxReturn -151.405 +MetaTest/Average/MinReturn -213.929 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.1048 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.913 +MetaTest/__unnamed_task__/AverageReturn -170.913 +MetaTest/__unnamed_task__/Iteration 188 +MetaTest/__unnamed_task__/MaxReturn -151.405 +MetaTest/__unnamed_task__/MinReturn -213.929 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.1048 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.048e+06 +__unnamed_task__/AverageDiscountedReturn -72.0107 +__unnamed_task__/AverageReturn -172.006 +__unnamed_task__/Iteration 188 +__unnamed_task__/MaxReturn -147.664 +__unnamed_task__/MinReturn -232.018 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.8066 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:42:34 | [maml_trainer] epoch #189 | Sampling for adapation and meta-testing... +2025-04-03 09:43:43 | [maml_trainer] epoch #189 | Finished meta-testing... +2025-04-03 09:43:43 | [maml_trainer] epoch #189 | Saving snapshot... +2025-04-03 09:44:05 | [maml_trainer] epoch #189 | Saved +2025-04-03 09:44:05 | [maml_trainer] epoch #189 | Time 72523.84 s +2025-04-03 09:44:05 | [maml_trainer] epoch #189 | EpochTime 369.99 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.3268 +Average/AverageReturn -174.351 +Average/Iteration 189 +Average/MaxReturn -152.691 +Average/MinReturn -233.951 +Average/NumEpisodes 80 +Average/StdReturn 19.0583 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.484 +GaussianMLPPolicy/KLAfter 0.00354673 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.05505e-05 +GaussianMLPPolicy/LossBefore 4.18723e-09 +GaussianMLPPolicy/dLoss -3.05463e-05 +Iteration 189 +MetaTest/Average/AverageDiscountedReturn -176.818 +MetaTest/Average/AverageReturn -176.818 +MetaTest/Average/Iteration 189 +MetaTest/Average/MaxReturn -153.283 +MetaTest/Average/MinReturn -235.618 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.417 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.818 +MetaTest/__unnamed_task__/AverageReturn -176.818 +MetaTest/__unnamed_task__/Iteration 189 +MetaTest/__unnamed_task__/MaxReturn -153.283 +MetaTest/__unnamed_task__/MinReturn -235.618 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.417 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.08e+06 +__unnamed_task__/AverageDiscountedReturn -73.3268 +__unnamed_task__/AverageReturn -174.351 +__unnamed_task__/Iteration 189 +__unnamed_task__/MaxReturn -152.691 +__unnamed_task__/MinReturn -233.951 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.0583 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:48:47 | [maml_trainer] epoch #190 | Sampling for adapation and meta-testing... +2025-04-03 09:49:57 | [maml_trainer] epoch #190 | Finished meta-testing... +2025-04-03 09:49:57 | [maml_trainer] epoch #190 | Saving snapshot... +2025-04-03 09:50:20 | [maml_trainer] epoch #190 | Saved +2025-04-03 09:50:20 | [maml_trainer] epoch #190 | Time 72898.42 s +2025-04-03 09:50:20 | [maml_trainer] epoch #190 | EpochTime 374.57 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.1794 +Average/AverageReturn -171.868 +Average/Iteration 190 +Average/MaxReturn -151.68 +Average/MinReturn -238.28 +Average/NumEpisodes 80 +Average/StdReturn 17.4344 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4804 +GaussianMLPPolicy/KLAfter 0.0034989 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -8.11826e-06 +GaussianMLPPolicy/LossBefore -7.51019e-09 +GaussianMLPPolicy/dLoss 8.11075e-06 +Iteration 190 +MetaTest/Average/AverageDiscountedReturn -165.575 +MetaTest/Average/AverageReturn -165.575 +MetaTest/Average/Iteration 190 +MetaTest/Average/MaxReturn -151.023 +MetaTest/Average/MinReturn -175.477 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.76388 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.575 +MetaTest/__unnamed_task__/AverageReturn -165.575 +MetaTest/__unnamed_task__/Iteration 190 +MetaTest/__unnamed_task__/MaxReturn -151.023 +MetaTest/__unnamed_task__/MinReturn -175.477 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.76388 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.112e+06 +__unnamed_task__/AverageDiscountedReturn -72.1794 +__unnamed_task__/AverageReturn -171.868 +__unnamed_task__/Iteration 190 +__unnamed_task__/MaxReturn -151.68 +__unnamed_task__/MinReturn -238.28 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.4344 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 09:55:11 | [maml_trainer] epoch #191 | Sampling for adapation and meta-testing... +2025-04-03 09:56:24 | [maml_trainer] epoch #191 | Finished meta-testing... +2025-04-03 09:56:24 | [maml_trainer] epoch #191 | Saving snapshot... +2025-04-03 09:56:47 | [maml_trainer] epoch #191 | Saved +2025-04-03 09:56:47 | [maml_trainer] epoch #191 | Time 73285.62 s +2025-04-03 09:56:47 | [maml_trainer] epoch #191 | EpochTime 387.19 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2354 +Average/AverageReturn -174.645 +Average/Iteration 191 +Average/MaxReturn -151.594 +Average/MinReturn -270.829 +Average/NumEpisodes 80 +Average/StdReturn 19.7564 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4757 +GaussianMLPPolicy/KLAfter 0.00394861 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 4.74253e-05 +GaussianMLPPolicy/LossBefore -1.56462e-09 +GaussianMLPPolicy/dLoss -4.74269e-05 +Iteration 191 +MetaTest/Average/AverageDiscountedReturn -169.466 +MetaTest/Average/AverageReturn -169.466 +MetaTest/Average/Iteration 191 +MetaTest/Average/MaxReturn -155.407 +MetaTest/Average/MinReturn -220.426 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.9617 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.466 +MetaTest/__unnamed_task__/AverageReturn -169.466 +MetaTest/__unnamed_task__/Iteration 191 +MetaTest/__unnamed_task__/MaxReturn -155.407 +MetaTest/__unnamed_task__/MinReturn -220.426 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.9617 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.144e+06 +__unnamed_task__/AverageDiscountedReturn -73.2354 +__unnamed_task__/AverageReturn -174.645 +__unnamed_task__/Iteration 191 +__unnamed_task__/MaxReturn -151.594 +__unnamed_task__/MinReturn -270.829 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.7564 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:01:41 | [maml_trainer] epoch #192 | Sampling for adapation and meta-testing... +2025-04-03 10:02:53 | [maml_trainer] epoch #192 | Finished meta-testing... +2025-04-03 10:02:53 | [maml_trainer] epoch #192 | Saving snapshot... +2025-04-03 10:03:15 | [maml_trainer] epoch #192 | Saved +2025-04-03 10:03:15 | [maml_trainer] epoch #192 | Time 73673.93 s +2025-04-03 10:03:15 | [maml_trainer] epoch #192 | EpochTime 388.31 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6543 +Average/AverageReturn -173.293 +Average/Iteration 192 +Average/MaxReturn -143.065 +Average/MinReturn -226.143 +Average/NumEpisodes 80 +Average/StdReturn 16.9801 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4693 +GaussianMLPPolicy/KLAfter 0.00611636 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.54909e-05 +GaussianMLPPolicy/LossBefore -1.56462e-09 +GaussianMLPPolicy/dLoss 3.54894e-05 +Iteration 192 +MetaTest/Average/AverageDiscountedReturn -173.424 +MetaTest/Average/AverageReturn -173.424 +MetaTest/Average/Iteration 192 +MetaTest/Average/MaxReturn -150.616 +MetaTest/Average/MinReturn -255.521 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.5029 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.424 +MetaTest/__unnamed_task__/AverageReturn -173.424 +MetaTest/__unnamed_task__/Iteration 192 +MetaTest/__unnamed_task__/MaxReturn -150.616 +MetaTest/__unnamed_task__/MinReturn -255.521 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.5029 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.176e+06 +__unnamed_task__/AverageDiscountedReturn -72.6543 +__unnamed_task__/AverageReturn -173.293 +__unnamed_task__/Iteration 192 +__unnamed_task__/MaxReturn -143.065 +__unnamed_task__/MinReturn -226.143 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.9801 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:08:06 | [maml_trainer] epoch #193 | Sampling for adapation and meta-testing... +2025-04-03 10:09:18 | [maml_trainer] epoch #193 | Finished meta-testing... +2025-04-03 10:09:18 | [maml_trainer] epoch #193 | Saving snapshot... +2025-04-03 10:09:42 | [maml_trainer] epoch #193 | Saved +2025-04-03 10:09:42 | [maml_trainer] epoch #193 | Time 74060.54 s +2025-04-03 10:09:42 | [maml_trainer] epoch #193 | EpochTime 386.60 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.2135 +Average/AverageReturn -172.01 +Average/Iteration 193 +Average/MaxReturn -152.824 +Average/MinReturn -222.3 +Average/NumEpisodes 80 +Average/StdReturn 15.4261 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4634 +GaussianMLPPolicy/KLAfter 0.00441854 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.57083e-05 +GaussianMLPPolicy/LossBefore 1.89245e-09 +GaussianMLPPolicy/dLoss 1.57102e-05 +Iteration 193 +MetaTest/Average/AverageDiscountedReturn -169.785 +MetaTest/Average/AverageReturn -169.785 +MetaTest/Average/Iteration 193 +MetaTest/Average/MaxReturn -149.735 +MetaTest/Average/MinReturn -220.124 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.9628 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.785 +MetaTest/__unnamed_task__/AverageReturn -169.785 +MetaTest/__unnamed_task__/Iteration 193 +MetaTest/__unnamed_task__/MaxReturn -149.735 +MetaTest/__unnamed_task__/MinReturn -220.124 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.9628 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.208e+06 +__unnamed_task__/AverageDiscountedReturn -72.2135 +__unnamed_task__/AverageReturn -172.01 +__unnamed_task__/Iteration 193 +__unnamed_task__/MaxReturn -152.824 +__unnamed_task__/MinReturn -222.3 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.4261 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:14:35 | [maml_trainer] epoch #194 | Sampling for adapation and meta-testing... +2025-04-03 10:15:48 | [maml_trainer] epoch #194 | Finished meta-testing... +2025-04-03 10:15:48 | [maml_trainer] epoch #194 | Saving snapshot... +2025-04-03 10:16:12 | [maml_trainer] epoch #194 | Saved +2025-04-03 10:16:12 | [maml_trainer] epoch #194 | Time 74450.15 s +2025-04-03 10:16:12 | [maml_trainer] epoch #194 | EpochTime 389.61 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4604 +Average/AverageReturn -170.759 +Average/Iteration 194 +Average/MaxReturn -148.644 +Average/MinReturn -229.209 +Average/NumEpisodes 80 +Average/StdReturn 15.4885 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4569 +GaussianMLPPolicy/KLAfter 0.00460288 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.22281e-06 +GaussianMLPPolicy/LossBefore 8.10623e-09 +GaussianMLPPolicy/dLoss 6.23092e-06 +Iteration 194 +MetaTest/Average/AverageDiscountedReturn -175.066 +MetaTest/Average/AverageReturn -175.066 +MetaTest/Average/Iteration 194 +MetaTest/Average/MaxReturn -146.831 +MetaTest/Average/MinReturn -249.595 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.4845 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.066 +MetaTest/__unnamed_task__/AverageReturn -175.066 +MetaTest/__unnamed_task__/Iteration 194 +MetaTest/__unnamed_task__/MaxReturn -146.831 +MetaTest/__unnamed_task__/MinReturn -249.595 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.4845 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.24e+06 +__unnamed_task__/AverageDiscountedReturn -71.4604 +__unnamed_task__/AverageReturn -170.759 +__unnamed_task__/Iteration 194 +__unnamed_task__/MaxReturn -148.644 +__unnamed_task__/MinReturn -229.209 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.4885 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:21:06 | [maml_trainer] epoch #195 | Sampling for adapation and meta-testing... +2025-04-03 10:22:19 | [maml_trainer] epoch #195 | Finished meta-testing... +2025-04-03 10:22:19 | [maml_trainer] epoch #195 | Saving snapshot... +2025-04-03 10:22:43 | [maml_trainer] epoch #195 | Saved +2025-04-03 10:22:43 | [maml_trainer] epoch #195 | Time 74841.23 s +2025-04-03 10:22:43 | [maml_trainer] epoch #195 | EpochTime 391.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.569 +Average/AverageReturn -173.541 +Average/Iteration 195 +Average/MaxReturn -148.751 +Average/MinReturn -229.753 +Average/NumEpisodes 80 +Average/StdReturn 14.4051 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4514 +GaussianMLPPolicy/KLAfter 0.00334807 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.41809e-05 +GaussianMLPPolicy/LossBefore -8.19564e-09 +GaussianMLPPolicy/dLoss 2.41727e-05 +Iteration 195 +MetaTest/Average/AverageDiscountedReturn -174.684 +MetaTest/Average/AverageReturn -174.684 +MetaTest/Average/Iteration 195 +MetaTest/Average/MaxReturn -153.499 +MetaTest/Average/MinReturn -219.68 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5744 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.684 +MetaTest/__unnamed_task__/AverageReturn -174.684 +MetaTest/__unnamed_task__/Iteration 195 +MetaTest/__unnamed_task__/MaxReturn -153.499 +MetaTest/__unnamed_task__/MinReturn -219.68 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5744 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.272e+06 +__unnamed_task__/AverageDiscountedReturn -72.569 +__unnamed_task__/AverageReturn -173.541 +__unnamed_task__/Iteration 195 +__unnamed_task__/MaxReturn -148.751 +__unnamed_task__/MinReturn -229.753 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.4051 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:27:39 | [maml_trainer] epoch #196 | Sampling for adapation and meta-testing... +2025-04-03 10:28:51 | [maml_trainer] epoch #196 | Finished meta-testing... +2025-04-03 10:28:51 | [maml_trainer] epoch #196 | Saving snapshot... +2025-04-03 10:29:14 | [maml_trainer] epoch #196 | Saved +2025-04-03 10:29:14 | [maml_trainer] epoch #196 | Time 75232.46 s +2025-04-03 10:29:14 | [maml_trainer] epoch #196 | EpochTime 391.23 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.0535 +Average/AverageReturn -169.019 +Average/Iteration 196 +Average/MaxReturn -149.711 +Average/MinReturn -230.572 +Average/NumEpisodes 80 +Average/StdReturn 12.9821 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4464 +GaussianMLPPolicy/KLAfter 0.00261421 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.97768e-06 +GaussianMLPPolicy/LossBefore -2.83122e-09 +GaussianMLPPolicy/dLoss 2.97485e-06 +Iteration 196 +MetaTest/Average/AverageDiscountedReturn -171.325 +MetaTest/Average/AverageReturn -171.325 +MetaTest/Average/Iteration 196 +MetaTest/Average/MaxReturn -155.191 +MetaTest/Average/MinReturn -216.619 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.3194 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.325 +MetaTest/__unnamed_task__/AverageReturn -171.325 +MetaTest/__unnamed_task__/Iteration 196 +MetaTest/__unnamed_task__/MaxReturn -155.191 +MetaTest/__unnamed_task__/MinReturn -216.619 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.3194 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.304e+06 +__unnamed_task__/AverageDiscountedReturn -71.0535 +__unnamed_task__/AverageReturn -169.019 +__unnamed_task__/Iteration 196 +__unnamed_task__/MaxReturn -149.711 +__unnamed_task__/MinReturn -230.572 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.9821 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:34:02 | [maml_trainer] epoch #197 | Sampling for adapation and meta-testing... +2025-04-03 10:35:15 | [maml_trainer] epoch #197 | Finished meta-testing... +2025-04-03 10:35:15 | [maml_trainer] epoch #197 | Saving snapshot... +2025-04-03 10:35:38 | [maml_trainer] epoch #197 | Saved +2025-04-03 10:35:38 | [maml_trainer] epoch #197 | Time 75617.00 s +2025-04-03 10:35:38 | [maml_trainer] epoch #197 | EpochTime 384.54 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7298 +Average/AverageReturn -170.644 +Average/Iteration 197 +Average/MaxReturn -148.536 +Average/MinReturn -235.622 +Average/NumEpisodes 80 +Average/StdReturn 16.0787 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.44 +GaussianMLPPolicy/KLAfter 0.00250695 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.22303e-05 +GaussianMLPPolicy/LossBefore -1.19209e-10 +GaussianMLPPolicy/dLoss 3.22302e-05 +Iteration 197 +MetaTest/Average/AverageDiscountedReturn -178.853 +MetaTest/Average/AverageReturn -178.853 +MetaTest/Average/Iteration 197 +MetaTest/Average/MaxReturn -152.601 +MetaTest/Average/MinReturn -235.277 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.2293 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.853 +MetaTest/__unnamed_task__/AverageReturn -178.853 +MetaTest/__unnamed_task__/Iteration 197 +MetaTest/__unnamed_task__/MaxReturn -152.601 +MetaTest/__unnamed_task__/MinReturn -235.277 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.2293 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.336e+06 +__unnamed_task__/AverageDiscountedReturn -71.7298 +__unnamed_task__/AverageReturn -170.644 +__unnamed_task__/Iteration 197 +__unnamed_task__/MaxReturn -148.536 +__unnamed_task__/MinReturn -235.622 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.0787 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:40:32 | [maml_trainer] epoch #198 | Sampling for adapation and meta-testing... +2025-04-03 10:41:44 | [maml_trainer] epoch #198 | Finished meta-testing... +2025-04-03 10:41:44 | [maml_trainer] epoch #198 | Saving snapshot... +2025-04-03 10:42:08 | [maml_trainer] epoch #198 | Saved +2025-04-03 10:42:08 | [maml_trainer] epoch #198 | Time 76006.19 s +2025-04-03 10:42:08 | [maml_trainer] epoch #198 | EpochTime 389.18 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3742 +Average/AverageReturn -170.032 +Average/Iteration 198 +Average/MaxReturn -151.409 +Average/MinReturn -230.068 +Average/NumEpisodes 80 +Average/StdReturn 12.6645 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4327 +GaussianMLPPolicy/KLAfter 0.00380486 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.36153e-06 +GaussianMLPPolicy/LossBefore -1.74344e-09 +GaussianMLPPolicy/dLoss -9.36328e-06 +Iteration 198 +MetaTest/Average/AverageDiscountedReturn -178.505 +MetaTest/Average/AverageReturn -178.505 +MetaTest/Average/Iteration 198 +MetaTest/Average/MaxReturn -160.186 +MetaTest/Average/MinReturn -221.839 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.9681 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.505 +MetaTest/__unnamed_task__/AverageReturn -178.505 +MetaTest/__unnamed_task__/Iteration 198 +MetaTest/__unnamed_task__/MaxReturn -160.186 +MetaTest/__unnamed_task__/MinReturn -221.839 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.9681 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.368e+06 +__unnamed_task__/AverageDiscountedReturn -71.3742 +__unnamed_task__/AverageReturn -170.032 +__unnamed_task__/Iteration 198 +__unnamed_task__/MaxReturn -151.409 +__unnamed_task__/MinReturn -230.068 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.6645 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:47:04 | [maml_trainer] epoch #199 | Sampling for adapation and meta-testing... +2025-04-03 10:48:16 | [maml_trainer] epoch #199 | Finished meta-testing... +2025-04-03 10:48:16 | [maml_trainer] epoch #199 | Saving snapshot... +2025-04-03 10:48:40 | [maml_trainer] epoch #199 | Saved +2025-04-03 10:48:40 | [maml_trainer] epoch #199 | Time 76398.56 s +2025-04-03 10:48:40 | [maml_trainer] epoch #199 | EpochTime 392.37 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3596 +Average/AverageReturn -172.166 +Average/Iteration 199 +Average/MaxReturn -154.617 +Average/MinReturn -250.916 +Average/NumEpisodes 80 +Average/StdReturn 18.3191 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4259 +GaussianMLPPolicy/KLAfter 0.00374439 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.78124e-06 +GaussianMLPPolicy/LossBefore 6.67572e-09 +GaussianMLPPolicy/dLoss 7.78791e-06 +Iteration 199 +MetaTest/Average/AverageDiscountedReturn -170.35 +MetaTest/Average/AverageReturn -170.35 +MetaTest/Average/Iteration 199 +MetaTest/Average/MaxReturn -153.922 +MetaTest/Average/MinReturn -206.514 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.5206 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.35 +MetaTest/__unnamed_task__/AverageReturn -170.35 +MetaTest/__unnamed_task__/Iteration 199 +MetaTest/__unnamed_task__/MaxReturn -153.922 +MetaTest/__unnamed_task__/MinReturn -206.514 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.5206 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.4e+06 +__unnamed_task__/AverageDiscountedReturn -72.3596 +__unnamed_task__/AverageReturn -172.166 +__unnamed_task__/Iteration 199 +__unnamed_task__/MaxReturn -154.617 +__unnamed_task__/MinReturn -250.916 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.3191 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 10:53:36 | [maml_trainer] epoch #200 | Sampling for adapation and meta-testing... +2025-04-03 10:54:48 | [maml_trainer] epoch #200 | Finished meta-testing... +2025-04-03 10:54:48 | [maml_trainer] epoch #200 | Saving snapshot... +2025-04-03 10:55:10 | [maml_trainer] epoch #200 | Saved +2025-04-03 10:55:10 | [maml_trainer] epoch #200 | Time 76789.00 s +2025-04-03 10:55:10 | [maml_trainer] epoch #200 | EpochTime 390.44 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3406 +Average/AverageReturn -172.282 +Average/Iteration 200 +Average/MaxReturn -119.738 +Average/MinReturn -250.411 +Average/NumEpisodes 80 +Average/StdReturn 17.9517 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4205 +GaussianMLPPolicy/KLAfter 0.00425712 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.52458e-06 +GaussianMLPPolicy/LossBefore -4.67896e-09 +GaussianMLPPolicy/dLoss 7.5199e-06 +Iteration 200 +MetaTest/Average/AverageDiscountedReturn -174.059 +MetaTest/Average/AverageReturn -174.059 +MetaTest/Average/Iteration 200 +MetaTest/Average/MaxReturn -157.483 +MetaTest/Average/MinReturn -234.855 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.306 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.059 +MetaTest/__unnamed_task__/AverageReturn -174.059 +MetaTest/__unnamed_task__/Iteration 200 +MetaTest/__unnamed_task__/MaxReturn -157.483 +MetaTest/__unnamed_task__/MinReturn -234.855 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.306 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.432e+06 +__unnamed_task__/AverageDiscountedReturn -72.3406 +__unnamed_task__/AverageReturn -172.282 +__unnamed_task__/Iteration 200 +__unnamed_task__/MaxReturn -119.738 +__unnamed_task__/MinReturn -250.411 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.9517 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:00:05 | [maml_trainer] epoch #201 | Sampling for adapation and meta-testing... +2025-04-03 11:01:18 | [maml_trainer] epoch #201 | Finished meta-testing... +2025-04-03 11:01:18 | [maml_trainer] epoch #201 | Saving snapshot... +2025-04-03 11:01:40 | [maml_trainer] epoch #201 | Saved +2025-04-03 11:01:40 | [maml_trainer] epoch #201 | Time 77178.84 s +2025-04-03 11:01:40 | [maml_trainer] epoch #201 | EpochTime 389.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.7318 +Average/AverageReturn -175.446 +Average/Iteration 201 +Average/MaxReturn -151.226 +Average/MinReturn -254.565 +Average/NumEpisodes 80 +Average/StdReturn 19.4602 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4162 +GaussianMLPPolicy/KLAfter 0.00337832 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 0.000119648 +GaussianMLPPolicy/LossBefore 4.14252e-09 +GaussianMLPPolicy/dLoss -0.000119644 +Iteration 201 +MetaTest/Average/AverageDiscountedReturn -171.998 +MetaTest/Average/AverageReturn -171.998 +MetaTest/Average/Iteration 201 +MetaTest/Average/MaxReturn -153.857 +MetaTest/Average/MinReturn -222.988 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.8082 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.998 +MetaTest/__unnamed_task__/AverageReturn -171.998 +MetaTest/__unnamed_task__/Iteration 201 +MetaTest/__unnamed_task__/MaxReturn -153.857 +MetaTest/__unnamed_task__/MinReturn -222.988 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.8082 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.464e+06 +__unnamed_task__/AverageDiscountedReturn -73.7318 +__unnamed_task__/AverageReturn -175.446 +__unnamed_task__/Iteration 201 +__unnamed_task__/MaxReturn -151.226 +__unnamed_task__/MinReturn -254.565 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.4602 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:06:34 | [maml_trainer] epoch #202 | Sampling for adapation and meta-testing... +2025-04-03 11:07:47 | [maml_trainer] epoch #202 | Finished meta-testing... +2025-04-03 11:07:47 | [maml_trainer] epoch #202 | Saving snapshot... +2025-04-03 11:08:10 | [maml_trainer] epoch #202 | Saved +2025-04-03 11:08:10 | [maml_trainer] epoch #202 | Time 77568.76 s +2025-04-03 11:08:10 | [maml_trainer] epoch #202 | EpochTime 389.92 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3048 +Average/AverageReturn -172.325 +Average/Iteration 202 +Average/MaxReturn -153.732 +Average/MinReturn -234.398 +Average/NumEpisodes 80 +Average/StdReturn 15.4998 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4113 +GaussianMLPPolicy/KLAfter 0.003396 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.29593e-05 +GaussianMLPPolicy/LossBefore 4.93228e-09 +GaussianMLPPolicy/dLoss -2.29543e-05 +Iteration 202 +MetaTest/Average/AverageDiscountedReturn -173.854 +MetaTest/Average/AverageReturn -173.854 +MetaTest/Average/Iteration 202 +MetaTest/Average/MaxReturn -156.38 +MetaTest/Average/MinReturn -229.534 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.0335 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.854 +MetaTest/__unnamed_task__/AverageReturn -173.854 +MetaTest/__unnamed_task__/Iteration 202 +MetaTest/__unnamed_task__/MaxReturn -156.38 +MetaTest/__unnamed_task__/MinReturn -229.534 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.0335 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.496e+06 +__unnamed_task__/AverageDiscountedReturn -72.3048 +__unnamed_task__/AverageReturn -172.325 +__unnamed_task__/Iteration 202 +__unnamed_task__/MaxReturn -153.732 +__unnamed_task__/MinReturn -234.398 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.4998 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:13:02 | [maml_trainer] epoch #203 | Sampling for adapation and meta-testing... +2025-04-03 11:14:15 | [maml_trainer] epoch #203 | Finished meta-testing... +2025-04-03 11:14:15 | [maml_trainer] epoch #203 | Saving snapshot... +2025-04-03 11:14:39 | [maml_trainer] epoch #203 | Saved +2025-04-03 11:14:39 | [maml_trainer] epoch #203 | Time 77957.60 s +2025-04-03 11:14:39 | [maml_trainer] epoch #203 | EpochTime 388.83 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3215 +Average/AverageReturn -172.099 +Average/Iteration 203 +Average/MaxReturn -149.741 +Average/MinReturn -227.808 +Average/NumEpisodes 80 +Average/StdReturn 13.2421 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.407 +GaussianMLPPolicy/KLAfter 0.00259991 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.35972e-05 +GaussianMLPPolicy/LossBefore 2.83122e-10 +GaussianMLPPolicy/dLoss -3.35969e-05 +Iteration 203 +MetaTest/Average/AverageDiscountedReturn -168.546 +MetaTest/Average/AverageReturn -168.546 +MetaTest/Average/Iteration 203 +MetaTest/Average/MaxReturn -157.366 +MetaTest/Average/MinReturn -183.89 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.28675 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.546 +MetaTest/__unnamed_task__/AverageReturn -168.546 +MetaTest/__unnamed_task__/Iteration 203 +MetaTest/__unnamed_task__/MaxReturn -157.366 +MetaTest/__unnamed_task__/MinReturn -183.89 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.28675 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.528e+06 +__unnamed_task__/AverageDiscountedReturn -72.3215 +__unnamed_task__/AverageReturn -172.099 +__unnamed_task__/Iteration 203 +__unnamed_task__/MaxReturn -149.741 +__unnamed_task__/MinReturn -227.808 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 13.2421 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:19:33 | [maml_trainer] epoch #204 | Sampling for adapation and meta-testing... +2025-04-03 11:20:47 | [maml_trainer] epoch #204 | Finished meta-testing... +2025-04-03 11:20:47 | [maml_trainer] epoch #204 | Saving snapshot... +2025-04-03 11:21:11 | [maml_trainer] epoch #204 | Saved +2025-04-03 11:21:11 | [maml_trainer] epoch #204 | Time 78349.40 s +2025-04-03 11:21:11 | [maml_trainer] epoch #204 | EpochTime 391.80 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.7332 +Average/AverageReturn -172.374 +Average/Iteration 204 +Average/MaxReturn -151.481 +Average/MinReturn -240.545 +Average/NumEpisodes 80 +Average/StdReturn 15.496 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4039 +GaussianMLPPolicy/KLAfter 0.00482965 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.47903e-05 +GaussianMLPPolicy/LossBefore 2.5183e-09 +GaussianMLPPolicy/dLoss 1.47929e-05 +Iteration 204 +MetaTest/Average/AverageDiscountedReturn -179.001 +MetaTest/Average/AverageReturn -179.001 +MetaTest/Average/Iteration 204 +MetaTest/Average/MaxReturn -156.937 +MetaTest/Average/MinReturn -247.703 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.1344 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -179.001 +MetaTest/__unnamed_task__/AverageReturn -179.001 +MetaTest/__unnamed_task__/Iteration 204 +MetaTest/__unnamed_task__/MaxReturn -156.937 +MetaTest/__unnamed_task__/MinReturn -247.703 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.1344 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.56e+06 +__unnamed_task__/AverageDiscountedReturn -72.7332 +__unnamed_task__/AverageReturn -172.374 +__unnamed_task__/Iteration 204 +__unnamed_task__/MaxReturn -151.481 +__unnamed_task__/MinReturn -240.545 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.496 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:26:05 | [maml_trainer] epoch #205 | Sampling for adapation and meta-testing... +2025-04-03 11:27:18 | [maml_trainer] epoch #205 | Finished meta-testing... +2025-04-03 11:27:18 | [maml_trainer] epoch #205 | Saving snapshot... +2025-04-03 11:27:41 | [maml_trainer] epoch #205 | Saved +2025-04-03 11:27:41 | [maml_trainer] epoch #205 | Time 78739.23 s +2025-04-03 11:27:41 | [maml_trainer] epoch #205 | EpochTime 389.82 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4171 +Average/AverageReturn -169.618 +Average/Iteration 205 +Average/MaxReturn -151.861 +Average/MinReturn -229.365 +Average/NumEpisodes 80 +Average/StdReturn 12.1097 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4022 +GaussianMLPPolicy/KLAfter 0.00370394 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.62171e-06 +GaussianMLPPolicy/LossBefore -1.89245e-09 +GaussianMLPPolicy/dLoss 2.61982e-06 +Iteration 205 +MetaTest/Average/AverageDiscountedReturn -176.204 +MetaTest/Average/AverageReturn -176.204 +MetaTest/Average/Iteration 205 +MetaTest/Average/MaxReturn -155.965 +MetaTest/Average/MinReturn -212.274 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.242 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.204 +MetaTest/__unnamed_task__/AverageReturn -176.204 +MetaTest/__unnamed_task__/Iteration 205 +MetaTest/__unnamed_task__/MaxReturn -155.965 +MetaTest/__unnamed_task__/MinReturn -212.274 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.242 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.592e+06 +__unnamed_task__/AverageDiscountedReturn -71.4171 +__unnamed_task__/AverageReturn -169.618 +__unnamed_task__/Iteration 205 +__unnamed_task__/MaxReturn -151.861 +__unnamed_task__/MinReturn -229.365 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.1097 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:32:33 | [maml_trainer] epoch #206 | Sampling for adapation and meta-testing... +2025-04-03 11:33:46 | [maml_trainer] epoch #206 | Finished meta-testing... +2025-04-03 11:33:46 | [maml_trainer] epoch #206 | Saving snapshot... +2025-04-03 11:34:09 | [maml_trainer] epoch #206 | Saved +2025-04-03 11:34:09 | [maml_trainer] epoch #206 | Time 79127.57 s +2025-04-03 11:34:09 | [maml_trainer] epoch #206 | EpochTime 388.34 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8731 +Average/AverageReturn -173.366 +Average/Iteration 206 +Average/MaxReturn -151.412 +Average/MinReturn -240.445 +Average/NumEpisodes 80 +Average/StdReturn 19.0948 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.4012 +GaussianMLPPolicy/KLAfter 0.00340824 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.35096e-05 +GaussianMLPPolicy/LossBefore 4.26173e-09 +GaussianMLPPolicy/dLoss -2.35053e-05 +Iteration 206 +MetaTest/Average/AverageDiscountedReturn -172.852 +MetaTest/Average/AverageReturn -172.852 +MetaTest/Average/Iteration 206 +MetaTest/Average/MaxReturn -149.551 +MetaTest/Average/MinReturn -225.42 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.5318 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.852 +MetaTest/__unnamed_task__/AverageReturn -172.852 +MetaTest/__unnamed_task__/Iteration 206 +MetaTest/__unnamed_task__/MaxReturn -149.551 +MetaTest/__unnamed_task__/MinReturn -225.42 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.5318 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.624e+06 +__unnamed_task__/AverageDiscountedReturn -72.8731 +__unnamed_task__/AverageReturn -173.366 +__unnamed_task__/Iteration 206 +__unnamed_task__/MaxReturn -151.412 +__unnamed_task__/MinReturn -240.445 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.0948 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:39:00 | [maml_trainer] epoch #207 | Sampling for adapation and meta-testing... +2025-04-03 11:40:13 | [maml_trainer] epoch #207 | Finished meta-testing... +2025-04-03 11:40:13 | [maml_trainer] epoch #207 | Saving snapshot... +2025-04-03 11:40:37 | [maml_trainer] epoch #207 | Saved +2025-04-03 11:40:37 | [maml_trainer] epoch #207 | Time 79515.13 s +2025-04-03 11:40:37 | [maml_trainer] epoch #207 | EpochTime 387.55 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.411 +Average/AverageReturn -169.181 +Average/Iteration 207 +Average/MaxReturn -145.279 +Average/MinReturn -200.278 +Average/NumEpisodes 80 +Average/StdReturn 9.08822 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3996 +GaussianMLPPolicy/KLAfter 0.00316732 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.83874e-06 +GaussianMLPPolicy/LossBefore 4.11272e-09 +GaussianMLPPolicy/dLoss 5.84285e-06 +Iteration 207 +MetaTest/Average/AverageDiscountedReturn -169.184 +MetaTest/Average/AverageReturn -169.184 +MetaTest/Average/Iteration 207 +MetaTest/Average/MaxReturn -157.929 +MetaTest/Average/MinReturn -185.371 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.10823 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.184 +MetaTest/__unnamed_task__/AverageReturn -169.184 +MetaTest/__unnamed_task__/Iteration 207 +MetaTest/__unnamed_task__/MaxReturn -157.929 +MetaTest/__unnamed_task__/MinReturn -185.371 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.10823 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.656e+06 +__unnamed_task__/AverageDiscountedReturn -71.411 +__unnamed_task__/AverageReturn -169.181 +__unnamed_task__/Iteration 207 +__unnamed_task__/MaxReturn -145.279 +__unnamed_task__/MinReturn -200.278 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 9.08822 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:45:32 | [maml_trainer] epoch #208 | Sampling for adapation and meta-testing... +2025-04-03 11:46:45 | [maml_trainer] epoch #208 | Finished meta-testing... +2025-04-03 11:46:45 | [maml_trainer] epoch #208 | Saving snapshot... +2025-04-03 11:47:08 | [maml_trainer] epoch #208 | Saved +2025-04-03 11:47:08 | [maml_trainer] epoch #208 | Time 79907.10 s +2025-04-03 11:47:08 | [maml_trainer] epoch #208 | EpochTime 391.97 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.0881 +Average/AverageReturn -173.451 +Average/Iteration 208 +Average/MaxReturn -152.508 +Average/MinReturn -254.375 +Average/NumEpisodes 80 +Average/StdReturn 18.1932 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3964 +GaussianMLPPolicy/KLAfter 0.00250068 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.1465e-05 +GaussianMLPPolicy/LossBefore 1.66893e-09 +GaussianMLPPolicy/dLoss -1.14633e-05 +Iteration 208 +MetaTest/Average/AverageDiscountedReturn -174.085 +MetaTest/Average/AverageReturn -174.085 +MetaTest/Average/Iteration 208 +MetaTest/Average/MaxReturn -160.104 +MetaTest/Average/MinReturn -219.369 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.6972 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.085 +MetaTest/__unnamed_task__/AverageReturn -174.085 +MetaTest/__unnamed_task__/Iteration 208 +MetaTest/__unnamed_task__/MaxReturn -160.104 +MetaTest/__unnamed_task__/MinReturn -219.369 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.6972 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.688e+06 +__unnamed_task__/AverageDiscountedReturn -73.0881 +__unnamed_task__/AverageReturn -173.451 +__unnamed_task__/Iteration 208 +__unnamed_task__/MaxReturn -152.508 +__unnamed_task__/MinReturn -254.375 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.1932 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:52:05 | [maml_trainer] epoch #209 | Sampling for adapation and meta-testing... +2025-04-03 11:53:19 | [maml_trainer] epoch #209 | Finished meta-testing... +2025-04-03 11:53:19 | [maml_trainer] epoch #209 | Saving snapshot... +2025-04-03 11:53:42 | [maml_trainer] epoch #209 | Saved +2025-04-03 11:53:42 | [maml_trainer] epoch #209 | Time 80300.38 s +2025-04-03 11:53:42 | [maml_trainer] epoch #209 | EpochTime 393.28 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2161 +Average/AverageReturn -174.095 +Average/Iteration 209 +Average/MaxReturn -153.926 +Average/MinReturn -248.431 +Average/NumEpisodes 80 +Average/StdReturn 15.2016 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3944 +GaussianMLPPolicy/KLAfter 0.00238754 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.39746e-05 +GaussianMLPPolicy/LossBefore 7.15256e-09 +GaussianMLPPolicy/dLoss -1.39674e-05 +Iteration 209 +MetaTest/Average/AverageDiscountedReturn -179.621 +MetaTest/Average/AverageReturn -179.621 +MetaTest/Average/Iteration 209 +MetaTest/Average/MaxReturn -157.865 +MetaTest/Average/MinReturn -236.342 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.9299 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -179.621 +MetaTest/__unnamed_task__/AverageReturn -179.621 +MetaTest/__unnamed_task__/Iteration 209 +MetaTest/__unnamed_task__/MaxReturn -157.865 +MetaTest/__unnamed_task__/MinReturn -236.342 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.9299 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.72e+06 +__unnamed_task__/AverageDiscountedReturn -73.2161 +__unnamed_task__/AverageReturn -174.095 +__unnamed_task__/Iteration 209 +__unnamed_task__/MaxReturn -153.926 +__unnamed_task__/MinReturn -248.431 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.2016 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 11:58:36 | [maml_trainer] epoch #210 | Sampling for adapation and meta-testing... +2025-04-03 11:59:49 | [maml_trainer] epoch #210 | Finished meta-testing... +2025-04-03 11:59:49 | [maml_trainer] epoch #210 | Saving snapshot... +2025-04-03 12:00:13 | [maml_trainer] epoch #210 | Saved +2025-04-03 12:00:13 | [maml_trainer] epoch #210 | Time 80691.23 s +2025-04-03 12:00:13 | [maml_trainer] epoch #210 | EpochTime 390.85 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.1016 +Average/AverageReturn -174.082 +Average/Iteration 210 +Average/MaxReturn -153.492 +Average/MinReturn -235.94 +Average/NumEpisodes 80 +Average/StdReturn 14.5668 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.393 +GaussianMLPPolicy/KLAfter 0.00210559 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.23695e-05 +GaussianMLPPolicy/LossBefore 7.15256e-10 +GaussianMLPPolicy/dLoss 1.23702e-05 +Iteration 210 +MetaTest/Average/AverageDiscountedReturn -174.008 +MetaTest/Average/AverageReturn -174.008 +MetaTest/Average/Iteration 210 +MetaTest/Average/MaxReturn -157.768 +MetaTest/Average/MinReturn -227.339 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.667 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.008 +MetaTest/__unnamed_task__/AverageReturn -174.008 +MetaTest/__unnamed_task__/Iteration 210 +MetaTest/__unnamed_task__/MaxReturn -157.768 +MetaTest/__unnamed_task__/MinReturn -227.339 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.667 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.752e+06 +__unnamed_task__/AverageDiscountedReturn -73.1016 +__unnamed_task__/AverageReturn -174.082 +__unnamed_task__/Iteration 210 +__unnamed_task__/MaxReturn -153.492 +__unnamed_task__/MinReturn -235.94 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.5668 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:05:06 | [maml_trainer] epoch #211 | Sampling for adapation and meta-testing... +2025-04-03 12:06:20 | [maml_trainer] epoch #211 | Finished meta-testing... +2025-04-03 12:06:20 | [maml_trainer] epoch #211 | Saving snapshot... +2025-04-03 12:06:44 | [maml_trainer] epoch #211 | Saved +2025-04-03 12:06:44 | [maml_trainer] epoch #211 | Time 81082.22 s +2025-04-03 12:06:44 | [maml_trainer] epoch #211 | EpochTime 390.98 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.2211 +Average/AverageReturn -178.333 +Average/Iteration 211 +Average/MaxReturn -150.211 +Average/MinReturn -248.349 +Average/NumEpisodes 80 +Average/StdReturn 22.3911 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3913 +GaussianMLPPolicy/KLAfter 0.00196841 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.37948e-06 +GaussianMLPPolicy/LossBefore 1.84774e-09 +GaussianMLPPolicy/dLoss -3.37763e-06 +Iteration 211 +MetaTest/Average/AverageDiscountedReturn -170.131 +MetaTest/Average/AverageReturn -170.131 +MetaTest/Average/Iteration 211 +MetaTest/Average/MaxReturn -159.123 +MetaTest/Average/MinReturn -181.074 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.73866 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.131 +MetaTest/__unnamed_task__/AverageReturn -170.131 +MetaTest/__unnamed_task__/Iteration 211 +MetaTest/__unnamed_task__/MaxReturn -159.123 +MetaTest/__unnamed_task__/MinReturn -181.074 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.73866 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.784e+06 +__unnamed_task__/AverageDiscountedReturn -75.2211 +__unnamed_task__/AverageReturn -178.333 +__unnamed_task__/Iteration 211 +__unnamed_task__/MaxReturn -150.211 +__unnamed_task__/MinReturn -248.349 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.3911 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:11:40 | [maml_trainer] epoch #212 | Sampling for adapation and meta-testing... +2025-04-03 12:12:52 | [maml_trainer] epoch #212 | Finished meta-testing... +2025-04-03 12:12:52 | [maml_trainer] epoch #212 | Saving snapshot... +2025-04-03 12:13:15 | [maml_trainer] epoch #212 | Saved +2025-04-03 12:13:15 | [maml_trainer] epoch #212 | Time 81473.78 s +2025-04-03 12:13:15 | [maml_trainer] epoch #212 | EpochTime 391.56 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5858 +Average/AverageReturn -172.018 +Average/Iteration 212 +Average/MaxReturn -157.63 +Average/MinReturn -228.184 +Average/NumEpisodes 80 +Average/StdReturn 11.6646 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.39 +GaussianMLPPolicy/KLAfter 0.00190608 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.66599e-05 +GaussianMLPPolicy/LossBefore -3.21865e-09 +GaussianMLPPolicy/dLoss -1.66631e-05 +Iteration 212 +MetaTest/Average/AverageDiscountedReturn -173.889 +MetaTest/Average/AverageReturn -173.889 +MetaTest/Average/Iteration 212 +MetaTest/Average/MaxReturn -162.263 +MetaTest/Average/MinReturn -217.345 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.3053 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.889 +MetaTest/__unnamed_task__/AverageReturn -173.889 +MetaTest/__unnamed_task__/Iteration 212 +MetaTest/__unnamed_task__/MaxReturn -162.263 +MetaTest/__unnamed_task__/MinReturn -217.345 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.3053 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.816e+06 +__unnamed_task__/AverageDiscountedReturn -72.5858 +__unnamed_task__/AverageReturn -172.018 +__unnamed_task__/Iteration 212 +__unnamed_task__/MaxReturn -157.63 +__unnamed_task__/MinReturn -228.184 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 11.6646 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:18:07 | [maml_trainer] epoch #213 | Sampling for adapation and meta-testing... +2025-04-03 12:19:20 | [maml_trainer] epoch #213 | Finished meta-testing... +2025-04-03 12:19:20 | [maml_trainer] epoch #213 | Saving snapshot... +2025-04-03 12:19:42 | [maml_trainer] epoch #213 | Saved +2025-04-03 12:19:42 | [maml_trainer] epoch #213 | Time 81860.60 s +2025-04-03 12:19:42 | [maml_trainer] epoch #213 | EpochTime 386.82 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.4341 +Average/AverageReturn -174.174 +Average/Iteration 213 +Average/MaxReturn -130.586 +Average/MinReturn -232.057 +Average/NumEpisodes 80 +Average/StdReturn 18.6317 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3869 +GaussianMLPPolicy/KLAfter 0.00170844 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.28664e-07 +GaussianMLPPolicy/LossBefore 3.8743e-09 +GaussianMLPPolicy/dLoss 5.32538e-07 +Iteration 213 +MetaTest/Average/AverageDiscountedReturn -173.799 +MetaTest/Average/AverageReturn -173.799 +MetaTest/Average/Iteration 213 +MetaTest/Average/MaxReturn -159.671 +MetaTest/Average/MinReturn -234.617 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5871 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.799 +MetaTest/__unnamed_task__/AverageReturn -173.799 +MetaTest/__unnamed_task__/Iteration 213 +MetaTest/__unnamed_task__/MaxReturn -159.671 +MetaTest/__unnamed_task__/MinReturn -234.617 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5871 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.848e+06 +__unnamed_task__/AverageDiscountedReturn -73.4341 +__unnamed_task__/AverageReturn -174.174 +__unnamed_task__/Iteration 213 +__unnamed_task__/MaxReturn -130.586 +__unnamed_task__/MinReturn -232.057 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.6317 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:24:35 | [maml_trainer] epoch #214 | Sampling for adapation and meta-testing... +2025-04-03 12:25:49 | [maml_trainer] epoch #214 | Finished meta-testing... +2025-04-03 12:25:49 | [maml_trainer] epoch #214 | Saving snapshot... +2025-04-03 12:26:13 | [maml_trainer] epoch #214 | Saved +2025-04-03 12:26:13 | [maml_trainer] epoch #214 | Time 82251.47 s +2025-04-03 12:26:13 | [maml_trainer] epoch #214 | EpochTime 390.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.4778 +Average/AverageReturn -174.206 +Average/Iteration 214 +Average/MaxReturn -152.352 +Average/MinReturn -235.328 +Average/NumEpisodes 80 +Average/StdReturn 16.2969 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3847 +GaussianMLPPolicy/KLAfter 0.0019233 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.52245e-05 +GaussianMLPPolicy/LossBefore -7.52509e-09 +GaussianMLPPolicy/dLoss 2.5217e-05 +Iteration 214 +MetaTest/Average/AverageDiscountedReturn -170.63 +MetaTest/Average/AverageReturn -170.63 +MetaTest/Average/Iteration 214 +MetaTest/Average/MaxReturn -155.797 +MetaTest/Average/MinReturn -182.31 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.06693 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.63 +MetaTest/__unnamed_task__/AverageReturn -170.63 +MetaTest/__unnamed_task__/Iteration 214 +MetaTest/__unnamed_task__/MaxReturn -155.797 +MetaTest/__unnamed_task__/MinReturn -182.31 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.06693 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.88e+06 +__unnamed_task__/AverageDiscountedReturn -73.4778 +__unnamed_task__/AverageReturn -174.206 +__unnamed_task__/Iteration 214 +__unnamed_task__/MaxReturn -152.352 +__unnamed_task__/MinReturn -235.328 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.2969 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:31:07 | [maml_trainer] epoch #215 | Sampling for adapation and meta-testing... +2025-04-03 12:32:19 | [maml_trainer] epoch #215 | Finished meta-testing... +2025-04-03 12:32:19 | [maml_trainer] epoch #215 | Saving snapshot... +2025-04-03 12:32:42 | [maml_trainer] epoch #215 | Saved +2025-04-03 12:32:42 | [maml_trainer] epoch #215 | Time 82641.02 s +2025-04-03 12:32:42 | [maml_trainer] epoch #215 | EpochTime 389.55 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.7351 +Average/AverageReturn -175.191 +Average/Iteration 215 +Average/MaxReturn -153.223 +Average/MinReturn -274.058 +Average/NumEpisodes 80 +Average/StdReturn 18.7181 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.384 +GaussianMLPPolicy/KLAfter 0.00204607 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.95209e-06 +GaussianMLPPolicy/LossBefore 3.05474e-09 +GaussianMLPPolicy/dLoss -7.94904e-06 +Iteration 215 +MetaTest/Average/AverageDiscountedReturn -174.386 +MetaTest/Average/AverageReturn -174.386 +MetaTest/Average/Iteration 215 +MetaTest/Average/MaxReturn -159.198 +MetaTest/Average/MinReturn -223.089 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.5241 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.386 +MetaTest/__unnamed_task__/AverageReturn -174.386 +MetaTest/__unnamed_task__/Iteration 215 +MetaTest/__unnamed_task__/MaxReturn -159.198 +MetaTest/__unnamed_task__/MinReturn -223.089 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.5241 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.912e+06 +__unnamed_task__/AverageDiscountedReturn -73.7351 +__unnamed_task__/AverageReturn -175.191 +__unnamed_task__/Iteration 215 +__unnamed_task__/MaxReturn -153.223 +__unnamed_task__/MinReturn -274.058 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.7181 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:37:33 | [maml_trainer] epoch #216 | Sampling for adapation and meta-testing... +2025-04-03 12:38:47 | [maml_trainer] epoch #216 | Finished meta-testing... +2025-04-03 12:38:47 | [maml_trainer] epoch #216 | Saving snapshot... +2025-04-03 12:39:11 | [maml_trainer] epoch #216 | Saved +2025-04-03 12:39:11 | [maml_trainer] epoch #216 | Time 83029.25 s +2025-04-03 12:39:11 | [maml_trainer] epoch #216 | EpochTime 388.22 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.515 +Average/AverageReturn -175.036 +Average/Iteration 216 +Average/MaxReturn -150.092 +Average/MinReturn -253.681 +Average/NumEpisodes 80 +Average/StdReturn 19.7868 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3818 +GaussianMLPPolicy/KLAfter 0.00167913 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.53935e-05 +GaussianMLPPolicy/LossBefore -7.30157e-10 +GaussianMLPPolicy/dLoss -2.53942e-05 +Iteration 216 +MetaTest/Average/AverageDiscountedReturn -171.736 +MetaTest/Average/AverageReturn -171.736 +MetaTest/Average/Iteration 216 +MetaTest/Average/MaxReturn -153.818 +MetaTest/Average/MinReturn -225.27 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.2821 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.736 +MetaTest/__unnamed_task__/AverageReturn -171.736 +MetaTest/__unnamed_task__/Iteration 216 +MetaTest/__unnamed_task__/MaxReturn -153.818 +MetaTest/__unnamed_task__/MinReturn -225.27 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.2821 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.944e+06 +__unnamed_task__/AverageDiscountedReturn -73.515 +__unnamed_task__/AverageReturn -175.036 +__unnamed_task__/Iteration 216 +__unnamed_task__/MaxReturn -150.092 +__unnamed_task__/MinReturn -253.681 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.7868 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:44:05 | [maml_trainer] epoch #217 | Sampling for adapation and meta-testing... +2025-04-03 12:45:19 | [maml_trainer] epoch #217 | Finished meta-testing... +2025-04-03 12:45:19 | [maml_trainer] epoch #217 | Saving snapshot... +2025-04-03 12:45:42 | [maml_trainer] epoch #217 | Saved +2025-04-03 12:45:42 | [maml_trainer] epoch #217 | Time 83420.43 s +2025-04-03 12:45:42 | [maml_trainer] epoch #217 | EpochTime 391.18 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5474 +Average/AverageReturn -172.19 +Average/Iteration 217 +Average/MaxReturn -152.688 +Average/MinReturn -239.071 +Average/NumEpisodes 80 +Average/StdReturn 17.0115 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3765 +GaussianMLPPolicy/KLAfter 0.00230259 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.45145e-05 +GaussianMLPPolicy/LossBefore -1.14739e-09 +GaussianMLPPolicy/dLoss -2.45156e-05 +Iteration 217 +MetaTest/Average/AverageDiscountedReturn -171.219 +MetaTest/Average/AverageReturn -171.219 +MetaTest/Average/Iteration 217 +MetaTest/Average/MaxReturn -158.927 +MetaTest/Average/MinReturn -185.041 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 6.49434 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.219 +MetaTest/__unnamed_task__/AverageReturn -171.219 +MetaTest/__unnamed_task__/Iteration 217 +MetaTest/__unnamed_task__/MaxReturn -158.927 +MetaTest/__unnamed_task__/MinReturn -185.041 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 6.49434 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 6.976e+06 +__unnamed_task__/AverageDiscountedReturn -72.5474 +__unnamed_task__/AverageReturn -172.19 +__unnamed_task__/Iteration 217 +__unnamed_task__/MaxReturn -152.688 +__unnamed_task__/MinReturn -239.071 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.0115 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:50:33 | [maml_trainer] epoch #218 | Sampling for adapation and meta-testing... +2025-04-03 12:51:45 | [maml_trainer] epoch #218 | Finished meta-testing... +2025-04-03 12:51:45 | [maml_trainer] epoch #218 | Saving snapshot... +2025-04-03 12:52:08 | [maml_trainer] epoch #218 | Saved +2025-04-03 12:52:08 | [maml_trainer] epoch #218 | Time 83806.31 s +2025-04-03 12:52:08 | [maml_trainer] epoch #218 | EpochTime 385.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.6632 +Average/AverageReturn -170.837 +Average/Iteration 218 +Average/MaxReturn -151.088 +Average/MinReturn -243.657 +Average/NumEpisodes 80 +Average/StdReturn 16.7336 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3728 +GaussianMLPPolicy/KLAfter 0.00230569 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.10266e-05 +GaussianMLPPolicy/LossBefore -1.93715e-10 +GaussianMLPPolicy/dLoss 1.10264e-05 +Iteration 218 +MetaTest/Average/AverageDiscountedReturn -173.572 +MetaTest/Average/AverageReturn -173.572 +MetaTest/Average/Iteration 218 +MetaTest/Average/MaxReturn -148.549 +MetaTest/Average/MinReturn -235.956 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.8438 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.572 +MetaTest/__unnamed_task__/AverageReturn -173.572 +MetaTest/__unnamed_task__/Iteration 218 +MetaTest/__unnamed_task__/MaxReturn -148.549 +MetaTest/__unnamed_task__/MinReturn -235.956 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.8438 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.008e+06 +__unnamed_task__/AverageDiscountedReturn -71.6632 +__unnamed_task__/AverageReturn -170.837 +__unnamed_task__/Iteration 218 +__unnamed_task__/MaxReturn -151.088 +__unnamed_task__/MinReturn -243.657 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 16.7336 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 12:57:05 | [maml_trainer] epoch #219 | Sampling for adapation and meta-testing... +2025-04-03 12:58:18 | [maml_trainer] epoch #219 | Finished meta-testing... +2025-04-03 12:58:18 | [maml_trainer] epoch #219 | Saving snapshot... +2025-04-03 12:58:42 | [maml_trainer] epoch #219 | Saved +2025-04-03 12:58:42 | [maml_trainer] epoch #219 | Time 84200.44 s +2025-04-03 12:58:42 | [maml_trainer] epoch #219 | EpochTime 394.12 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.8079 +Average/AverageReturn -175.626 +Average/Iteration 219 +Average/MaxReturn -155.305 +Average/MinReturn -251.816 +Average/NumEpisodes 80 +Average/StdReturn 21.0872 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3658 +GaussianMLPPolicy/KLAfter 0.00309944 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.66713e-05 +GaussianMLPPolicy/LossBefore 4.23193e-09 +GaussianMLPPolicy/dLoss 1.66755e-05 +Iteration 219 +MetaTest/Average/AverageDiscountedReturn -173.545 +MetaTest/Average/AverageReturn -173.545 +MetaTest/Average/Iteration 219 +MetaTest/Average/MaxReturn -157.512 +MetaTest/Average/MinReturn -229.708 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.1845 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.545 +MetaTest/__unnamed_task__/AverageReturn -173.545 +MetaTest/__unnamed_task__/Iteration 219 +MetaTest/__unnamed_task__/MaxReturn -157.512 +MetaTest/__unnamed_task__/MinReturn -229.708 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.1845 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.04e+06 +__unnamed_task__/AverageDiscountedReturn -73.8079 +__unnamed_task__/AverageReturn -175.626 +__unnamed_task__/Iteration 219 +__unnamed_task__/MaxReturn -155.305 +__unnamed_task__/MinReturn -251.816 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.0872 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:03:36 | [maml_trainer] epoch #220 | Sampling for adapation and meta-testing... +2025-04-03 13:04:48 | [maml_trainer] epoch #220 | Finished meta-testing... +2025-04-03 13:04:48 | [maml_trainer] epoch #220 | Saving snapshot... +2025-04-03 13:05:12 | [maml_trainer] epoch #220 | Saved +2025-04-03 13:05:12 | [maml_trainer] epoch #220 | Time 84590.12 s +2025-04-03 13:05:12 | [maml_trainer] epoch #220 | EpochTime 389.68 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.49 +Average/AverageReturn -172.289 +Average/Iteration 220 +Average/MaxReturn -149.01 +Average/MinReturn -238.363 +Average/NumEpisodes 80 +Average/StdReturn 17.2518 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3577 +GaussianMLPPolicy/KLAfter 0.00177049 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.2015e-06 +GaussianMLPPolicy/LossBefore -1.84774e-09 +GaussianMLPPolicy/dLoss 1.19965e-06 +Iteration 220 +MetaTest/Average/AverageDiscountedReturn -171.378 +MetaTest/Average/AverageReturn -171.378 +MetaTest/Average/Iteration 220 +MetaTest/Average/MaxReturn -157.906 +MetaTest/Average/MinReturn -238.694 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.6815 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.378 +MetaTest/__unnamed_task__/AverageReturn -171.378 +MetaTest/__unnamed_task__/Iteration 220 +MetaTest/__unnamed_task__/MaxReturn -157.906 +MetaTest/__unnamed_task__/MinReturn -238.694 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.6815 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.072e+06 +__unnamed_task__/AverageDiscountedReturn -72.49 +__unnamed_task__/AverageReturn -172.289 +__unnamed_task__/Iteration 220 +__unnamed_task__/MaxReturn -149.01 +__unnamed_task__/MinReturn -238.363 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.2518 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:10:03 | [maml_trainer] epoch #221 | Sampling for adapation and meta-testing... +2025-04-03 13:11:15 | [maml_trainer] epoch #221 | Finished meta-testing... +2025-04-03 13:11:15 | [maml_trainer] epoch #221 | Saving snapshot... +2025-04-03 13:11:39 | [maml_trainer] epoch #221 | Saved +2025-04-03 13:11:39 | [maml_trainer] epoch #221 | Time 84977.61 s +2025-04-03 13:11:39 | [maml_trainer] epoch #221 | EpochTime 387.48 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.3647 +Average/AverageReturn -175.206 +Average/Iteration 221 +Average/MaxReturn -153.468 +Average/MinReturn -246.988 +Average/NumEpisodes 80 +Average/StdReturn 20.3203 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3509 +GaussianMLPPolicy/KLAfter 0.00183577 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.9344e-06 +GaussianMLPPolicy/LossBefore -1.50502e-09 +GaussianMLPPolicy/dLoss -6.93591e-06 +Iteration 221 +MetaTest/Average/AverageDiscountedReturn -169.64 +MetaTest/Average/AverageReturn -169.64 +MetaTest/Average/Iteration 221 +MetaTest/Average/MaxReturn -155.77 +MetaTest/Average/MinReturn -224.335 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.4929 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.64 +MetaTest/__unnamed_task__/AverageReturn -169.64 +MetaTest/__unnamed_task__/Iteration 221 +MetaTest/__unnamed_task__/MaxReturn -155.77 +MetaTest/__unnamed_task__/MinReturn -224.335 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.4929 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.104e+06 +__unnamed_task__/AverageDiscountedReturn -73.3647 +__unnamed_task__/AverageReturn -175.206 +__unnamed_task__/Iteration 221 +__unnamed_task__/MaxReturn -153.468 +__unnamed_task__/MinReturn -246.988 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.3203 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:16:32 | [maml_trainer] epoch #222 | Sampling for adapation and meta-testing... +2025-04-03 13:17:46 | [maml_trainer] epoch #222 | Finished meta-testing... +2025-04-03 13:17:46 | [maml_trainer] epoch #222 | Saving snapshot... +2025-04-03 13:18:08 | [maml_trainer] epoch #222 | Saved +2025-04-03 13:18:08 | [maml_trainer] epoch #222 | Time 85366.99 s +2025-04-03 13:18:08 | [maml_trainer] epoch #222 | EpochTime 389.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7508 +Average/AverageReturn -171.214 +Average/Iteration 222 +Average/MaxReturn -148.711 +Average/MinReturn -231.155 +Average/NumEpisodes 80 +Average/StdReturn 14.8429 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3442 +GaussianMLPPolicy/KLAfter 0.00214662 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.00843e-05 +GaussianMLPPolicy/LossBefore -1.90735e-09 +GaussianMLPPolicy/dLoss -2.00862e-05 +Iteration 222 +MetaTest/Average/AverageDiscountedReturn -176.95 +MetaTest/Average/AverageReturn -176.95 +MetaTest/Average/Iteration 222 +MetaTest/Average/MaxReturn -156.496 +MetaTest/Average/MinReturn -232.68 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.6845 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.95 +MetaTest/__unnamed_task__/AverageReturn -176.95 +MetaTest/__unnamed_task__/Iteration 222 +MetaTest/__unnamed_task__/MaxReturn -156.496 +MetaTest/__unnamed_task__/MinReturn -232.68 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.6845 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.136e+06 +__unnamed_task__/AverageDiscountedReturn -71.7508 +__unnamed_task__/AverageReturn -171.214 +__unnamed_task__/Iteration 222 +__unnamed_task__/MaxReturn -148.711 +__unnamed_task__/MinReturn -231.155 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 14.8429 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:23:00 | [maml_trainer] epoch #223 | Sampling for adapation and meta-testing... +2025-04-03 13:24:13 | [maml_trainer] epoch #223 | Finished meta-testing... +2025-04-03 13:24:13 | [maml_trainer] epoch #223 | Saving snapshot... +2025-04-03 13:24:37 | [maml_trainer] epoch #223 | Saved +2025-04-03 13:24:37 | [maml_trainer] epoch #223 | Time 85755.82 s +2025-04-03 13:24:37 | [maml_trainer] epoch #223 | EpochTime 388.83 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.1056 +Average/AverageReturn -175.886 +Average/Iteration 223 +Average/MaxReturn -152.11 +Average/MinReturn -263.464 +Average/NumEpisodes 80 +Average/StdReturn 20.915 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.339 +GaussianMLPPolicy/KLAfter 0.00277496 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.99052e-05 +GaussianMLPPolicy/LossBefore 4.08292e-09 +GaussianMLPPolicy/dLoss 1.99092e-05 +Iteration 223 +MetaTest/Average/AverageDiscountedReturn -173.86 +MetaTest/Average/AverageReturn -173.86 +MetaTest/Average/Iteration 223 +MetaTest/Average/MaxReturn -159.121 +MetaTest/Average/MinReturn -230.83 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.5878 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.86 +MetaTest/__unnamed_task__/AverageReturn -173.86 +MetaTest/__unnamed_task__/Iteration 223 +MetaTest/__unnamed_task__/MaxReturn -159.121 +MetaTest/__unnamed_task__/MinReturn -230.83 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.5878 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.168e+06 +__unnamed_task__/AverageDiscountedReturn -74.1056 +__unnamed_task__/AverageReturn -175.886 +__unnamed_task__/Iteration 223 +__unnamed_task__/MaxReturn -152.11 +__unnamed_task__/MinReturn -263.464 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.915 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:29:28 | [maml_trainer] epoch #224 | Sampling for adapation and meta-testing... +2025-04-03 13:30:41 | [maml_trainer] epoch #224 | Finished meta-testing... +2025-04-03 13:30:41 | [maml_trainer] epoch #224 | Saving snapshot... +2025-04-03 13:31:05 | [maml_trainer] epoch #224 | Saved +2025-04-03 13:31:05 | [maml_trainer] epoch #224 | Time 86143.19 s +2025-04-03 13:31:05 | [maml_trainer] epoch #224 | EpochTime 387.37 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.2242 +Average/AverageReturn -174.776 +Average/Iteration 224 +Average/MaxReturn -155.21 +Average/MinReturn -239.02 +Average/NumEpisodes 80 +Average/StdReturn 18.5954 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3337 +GaussianMLPPolicy/KLAfter 0.0028601 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.88246e-05 +GaussianMLPPolicy/LossBefore -1.49012e-11 +GaussianMLPPolicy/dLoss 5.88246e-05 +Iteration 224 +MetaTest/Average/AverageDiscountedReturn -174.079 +MetaTest/Average/AverageReturn -174.079 +MetaTest/Average/Iteration 224 +MetaTest/Average/MaxReturn -158.602 +MetaTest/Average/MinReturn -234.779 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.8696 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.079 +MetaTest/__unnamed_task__/AverageReturn -174.079 +MetaTest/__unnamed_task__/Iteration 224 +MetaTest/__unnamed_task__/MaxReturn -158.602 +MetaTest/__unnamed_task__/MinReturn -234.779 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.8696 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.2e+06 +__unnamed_task__/AverageDiscountedReturn -73.2242 +__unnamed_task__/AverageReturn -174.776 +__unnamed_task__/Iteration 224 +__unnamed_task__/MaxReturn -155.21 +__unnamed_task__/MinReturn -239.02 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.5954 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:35:58 | [maml_trainer] epoch #225 | Sampling for adapation and meta-testing... +2025-04-03 13:37:11 | [maml_trainer] epoch #225 | Finished meta-testing... +2025-04-03 13:37:11 | [maml_trainer] epoch #225 | Saving snapshot... +2025-04-03 13:37:34 | [maml_trainer] epoch #225 | Saved +2025-04-03 13:37:34 | [maml_trainer] epoch #225 | Time 86532.63 s +2025-04-03 13:37:34 | [maml_trainer] epoch #225 | EpochTime 389.43 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8777 +Average/AverageReturn -173.732 +Average/Iteration 225 +Average/MaxReturn -154.451 +Average/MinReturn -241.81 +Average/NumEpisodes 80 +Average/StdReturn 18.3725 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3284 +GaussianMLPPolicy/KLAfter 0.00291636 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.67923e-06 +GaussianMLPPolicy/LossBefore -4.67896e-09 +GaussianMLPPolicy/dLoss -9.68391e-06 +Iteration 225 +MetaTest/Average/AverageDiscountedReturn -174.853 +MetaTest/Average/AverageReturn -174.853 +MetaTest/Average/Iteration 225 +MetaTest/Average/MaxReturn -163.843 +MetaTest/Average/MinReturn -252.713 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5198 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.853 +MetaTest/__unnamed_task__/AverageReturn -174.853 +MetaTest/__unnamed_task__/Iteration 225 +MetaTest/__unnamed_task__/MaxReturn -163.843 +MetaTest/__unnamed_task__/MinReturn -252.713 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5198 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.232e+06 +__unnamed_task__/AverageDiscountedReturn -72.8777 +__unnamed_task__/AverageReturn -173.732 +__unnamed_task__/Iteration 225 +__unnamed_task__/MaxReturn -154.451 +__unnamed_task__/MinReturn -241.81 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.3725 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:42:24 | [maml_trainer] epoch #226 | Sampling for adapation and meta-testing... +2025-04-03 13:43:39 | [maml_trainer] epoch #226 | Finished meta-testing... +2025-04-03 13:43:39 | [maml_trainer] epoch #226 | Saving snapshot... +2025-04-03 13:44:01 | [maml_trainer] epoch #226 | Saved +2025-04-03 13:44:01 | [maml_trainer] epoch #226 | Time 86920.06 s +2025-04-03 13:44:01 | [maml_trainer] epoch #226 | EpochTime 387.43 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.9026 +Average/AverageReturn -176.686 +Average/Iteration 226 +Average/MaxReturn -153.361 +Average/MinReturn -242.372 +Average/NumEpisodes 80 +Average/StdReturn 20.0364 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3263 +GaussianMLPPolicy/KLAfter 0.00282891 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.78665e-05 +GaussianMLPPolicy/LossBefore -4.72367e-09 +GaussianMLPPolicy/dLoss 3.78618e-05 +Iteration 226 +MetaTest/Average/AverageDiscountedReturn -175.572 +MetaTest/Average/AverageReturn -175.572 +MetaTest/Average/Iteration 226 +MetaTest/Average/MaxReturn -154.337 +MetaTest/Average/MinReturn -244.831 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.026 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.572 +MetaTest/__unnamed_task__/AverageReturn -175.572 +MetaTest/__unnamed_task__/Iteration 226 +MetaTest/__unnamed_task__/MaxReturn -154.337 +MetaTest/__unnamed_task__/MinReturn -244.831 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.026 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.264e+06 +__unnamed_task__/AverageDiscountedReturn -73.9026 +__unnamed_task__/AverageReturn -176.686 +__unnamed_task__/Iteration 226 +__unnamed_task__/MaxReturn -153.361 +__unnamed_task__/MinReturn -242.372 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.0364 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:48:54 | [maml_trainer] epoch #227 | Sampling for adapation and meta-testing... +2025-04-03 13:50:07 | [maml_trainer] epoch #227 | Finished meta-testing... +2025-04-03 13:50:07 | [maml_trainer] epoch #227 | Saving snapshot... +2025-04-03 13:50:31 | [maml_trainer] epoch #227 | Saved +2025-04-03 13:50:31 | [maml_trainer] epoch #227 | Time 87309.14 s +2025-04-03 13:50:31 | [maml_trainer] epoch #227 | EpochTime 389.08 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6709 +Average/AverageReturn -172.932 +Average/Iteration 227 +Average/MaxReturn -150.345 +Average/MinReturn -248.16 +Average/NumEpisodes 80 +Average/StdReturn 17.5832 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3257 +GaussianMLPPolicy/KLAfter 0.00313983 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.49548e-05 +GaussianMLPPolicy/LossBefore -3.7849e-09 +GaussianMLPPolicy/dLoss -1.49585e-05 +Iteration 227 +MetaTest/Average/AverageDiscountedReturn -174.88 +MetaTest/Average/AverageReturn -174.88 +MetaTest/Average/Iteration 227 +MetaTest/Average/MaxReturn -152.435 +MetaTest/Average/MinReturn -239.815 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.7036 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.88 +MetaTest/__unnamed_task__/AverageReturn -174.88 +MetaTest/__unnamed_task__/Iteration 227 +MetaTest/__unnamed_task__/MaxReturn -152.435 +MetaTest/__unnamed_task__/MinReturn -239.815 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.7036 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.296e+06 +__unnamed_task__/AverageDiscountedReturn -72.6709 +__unnamed_task__/AverageReturn -172.932 +__unnamed_task__/Iteration 227 +__unnamed_task__/MaxReturn -150.345 +__unnamed_task__/MinReturn -248.16 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.5832 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 13:55:21 | [maml_trainer] epoch #228 | Sampling for adapation and meta-testing... +2025-04-03 13:56:34 | [maml_trainer] epoch #228 | Finished meta-testing... +2025-04-03 13:56:34 | [maml_trainer] epoch #228 | Saving snapshot... +2025-04-03 13:56:58 | [maml_trainer] epoch #228 | Saved +2025-04-03 13:56:58 | [maml_trainer] epoch #228 | Time 87696.31 s +2025-04-03 13:56:58 | [maml_trainer] epoch #228 | EpochTime 387.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.3584 +Average/AverageReturn -177.078 +Average/Iteration 228 +Average/MaxReturn -152.438 +Average/MinReturn -247.431 +Average/NumEpisodes 80 +Average/StdReturn 23.6942 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3272 +GaussianMLPPolicy/KLAfter 0.00366544 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.56327e-05 +GaussianMLPPolicy/LossBefore -1.19209e-10 +GaussianMLPPolicy/dLoss 4.56326e-05 +Iteration 228 +MetaTest/Average/AverageDiscountedReturn -176.74 +MetaTest/Average/AverageReturn -176.74 +MetaTest/Average/Iteration 228 +MetaTest/Average/MaxReturn -160.645 +MetaTest/Average/MinReturn -230.892 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.1693 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.74 +MetaTest/__unnamed_task__/AverageReturn -176.74 +MetaTest/__unnamed_task__/Iteration 228 +MetaTest/__unnamed_task__/MaxReturn -160.645 +MetaTest/__unnamed_task__/MinReturn -230.892 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.1693 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.328e+06 +__unnamed_task__/AverageDiscountedReturn -74.3584 +__unnamed_task__/AverageReturn -177.078 +__unnamed_task__/Iteration 228 +__unnamed_task__/MaxReturn -152.438 +__unnamed_task__/MinReturn -247.431 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.6942 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:01:49 | [maml_trainer] epoch #229 | Sampling for adapation and meta-testing... +2025-04-03 14:03:02 | [maml_trainer] epoch #229 | Finished meta-testing... +2025-04-03 14:03:02 | [maml_trainer] epoch #229 | Saving snapshot... +2025-04-03 14:03:25 | [maml_trainer] epoch #229 | Saved +2025-04-03 14:03:25 | [maml_trainer] epoch #229 | Time 88083.97 s +2025-04-03 14:03:25 | [maml_trainer] epoch #229 | EpochTime 387.65 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.4418 +Average/AverageReturn -172.285 +Average/Iteration 229 +Average/MaxReturn -149.832 +Average/MinReturn -233.526 +Average/NumEpisodes 80 +Average/StdReturn 15.1597 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3258 +GaussianMLPPolicy/KLAfter 0.00343934 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.69951e-05 +GaussianMLPPolicy/LossBefore -6.10948e-10 +GaussianMLPPolicy/dLoss 1.69945e-05 +Iteration 229 +MetaTest/Average/AverageDiscountedReturn -168.851 +MetaTest/Average/AverageReturn -168.851 +MetaTest/Average/Iteration 229 +MetaTest/Average/MaxReturn -156.354 +MetaTest/Average/MinReturn -187.479 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 7.64961 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.851 +MetaTest/__unnamed_task__/AverageReturn -168.851 +MetaTest/__unnamed_task__/Iteration 229 +MetaTest/__unnamed_task__/MaxReturn -156.354 +MetaTest/__unnamed_task__/MinReturn -187.479 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 7.64961 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.36e+06 +__unnamed_task__/AverageDiscountedReturn -72.4418 +__unnamed_task__/AverageReturn -172.285 +__unnamed_task__/Iteration 229 +__unnamed_task__/MaxReturn -149.832 +__unnamed_task__/MinReturn -233.526 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.1597 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:08:20 | [maml_trainer] epoch #230 | Sampling for adapation and meta-testing... +2025-04-03 14:09:32 | [maml_trainer] epoch #230 | Finished meta-testing... +2025-04-03 14:09:32 | [maml_trainer] epoch #230 | Saving snapshot... +2025-04-03 14:09:54 | [maml_trainer] epoch #230 | Saved +2025-04-03 14:09:54 | [maml_trainer] epoch #230 | Time 88473.00 s +2025-04-03 14:09:54 | [maml_trainer] epoch #230 | EpochTime 389.02 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5475 +Average/AverageReturn -170.539 +Average/Iteration 230 +Average/MaxReturn -151.639 +Average/MinReturn -251.412 +Average/NumEpisodes 80 +Average/StdReturn 17.4188 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3235 +GaussianMLPPolicy/KLAfter 0.00360288 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.68155e-05 +GaussianMLPPolicy/LossBefore 4.64916e-09 +GaussianMLPPolicy/dLoss -2.68109e-05 +Iteration 230 +MetaTest/Average/AverageDiscountedReturn -172.705 +MetaTest/Average/AverageReturn -172.705 +MetaTest/Average/Iteration 230 +MetaTest/Average/MaxReturn -158.338 +MetaTest/Average/MinReturn -234.222 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.3703 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.705 +MetaTest/__unnamed_task__/AverageReturn -172.705 +MetaTest/__unnamed_task__/Iteration 230 +MetaTest/__unnamed_task__/MaxReturn -158.338 +MetaTest/__unnamed_task__/MinReturn -234.222 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.3703 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.392e+06 +__unnamed_task__/AverageDiscountedReturn -71.5475 +__unnamed_task__/AverageReturn -170.539 +__unnamed_task__/Iteration 230 +__unnamed_task__/MaxReturn -151.639 +__unnamed_task__/MinReturn -251.412 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.4188 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:14:45 | [maml_trainer] epoch #231 | Sampling for adapation and meta-testing... +2025-04-03 14:15:58 | [maml_trainer] epoch #231 | Finished meta-testing... +2025-04-03 14:15:58 | [maml_trainer] epoch #231 | Saving snapshot... +2025-04-03 14:16:22 | [maml_trainer] epoch #231 | Saved +2025-04-03 14:16:22 | [maml_trainer] epoch #231 | Time 88860.38 s +2025-04-03 14:16:22 | [maml_trainer] epoch #231 | EpochTime 387.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -76.3136 +Average/AverageReturn -176.918 +Average/Iteration 231 +Average/MaxReturn -151.545 +Average/MinReturn -495.598 +Average/NumEpisodes 80 +Average/StdReturn 41.4748 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3216 +GaussianMLPPolicy/KLAfter 0.00377302 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.32374e-05 +GaussianMLPPolicy/LossBefore 2.563e-09 +GaussianMLPPolicy/dLoss -2.32348e-05 +Iteration 231 +MetaTest/Average/AverageDiscountedReturn -177.173 +MetaTest/Average/AverageReturn -177.173 +MetaTest/Average/Iteration 231 +MetaTest/Average/MaxReturn -158.942 +MetaTest/Average/MinReturn -233.555 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.638 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.173 +MetaTest/__unnamed_task__/AverageReturn -177.173 +MetaTest/__unnamed_task__/Iteration 231 +MetaTest/__unnamed_task__/MaxReturn -158.942 +MetaTest/__unnamed_task__/MinReturn -233.555 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.638 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.424e+06 +__unnamed_task__/AverageDiscountedReturn -76.3136 +__unnamed_task__/AverageReturn -176.918 +__unnamed_task__/Iteration 231 +__unnamed_task__/MaxReturn -151.545 +__unnamed_task__/MinReturn -495.598 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 41.4748 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:21:14 | [maml_trainer] epoch #232 | Sampling for adapation and meta-testing... +2025-04-03 14:22:26 | [maml_trainer] epoch #232 | Finished meta-testing... +2025-04-03 14:22:26 | [maml_trainer] epoch #232 | Saving snapshot... +2025-04-03 14:22:50 | [maml_trainer] epoch #232 | Saved +2025-04-03 14:22:50 | [maml_trainer] epoch #232 | Time 89248.39 s +2025-04-03 14:22:50 | [maml_trainer] epoch #232 | EpochTime 388.01 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.258 +Average/AverageReturn -171.733 +Average/Iteration 232 +Average/MaxReturn -150.365 +Average/MinReturn -235.025 +Average/NumEpisodes 80 +Average/StdReturn 20.4245 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3219 +GaussianMLPPolicy/KLAfter 0.00396605 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.25653e-05 +GaussianMLPPolicy/LossBefore 1.11759e-09 +GaussianMLPPolicy/dLoss 4.25664e-05 +Iteration 232 +MetaTest/Average/AverageDiscountedReturn -173.749 +MetaTest/Average/AverageReturn -173.749 +MetaTest/Average/Iteration 232 +MetaTest/Average/MaxReturn -158.557 +MetaTest/Average/MinReturn -233.338 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.1875 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.749 +MetaTest/__unnamed_task__/AverageReturn -173.749 +MetaTest/__unnamed_task__/Iteration 232 +MetaTest/__unnamed_task__/MaxReturn -158.557 +MetaTest/__unnamed_task__/MinReturn -233.338 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.1875 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.456e+06 +__unnamed_task__/AverageDiscountedReturn -72.258 +__unnamed_task__/AverageReturn -171.733 +__unnamed_task__/Iteration 232 +__unnamed_task__/MaxReturn -150.365 +__unnamed_task__/MinReturn -235.025 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.4245 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:27:39 | [maml_trainer] epoch #233 | Sampling for adapation and meta-testing... +2025-04-03 14:28:51 | [maml_trainer] epoch #233 | Finished meta-testing... +2025-04-03 14:28:51 | [maml_trainer] epoch #233 | Saving snapshot... +2025-04-03 14:29:16 | [maml_trainer] epoch #233 | Saved +2025-04-03 14:29:16 | [maml_trainer] epoch #233 | Time 89634.14 s +2025-04-03 14:29:16 | [maml_trainer] epoch #233 | EpochTime 385.75 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.6425 +Average/AverageReturn -169.749 +Average/Iteration 233 +Average/MaxReturn -146.185 +Average/MinReturn -256.08 +Average/NumEpisodes 80 +Average/StdReturn 19.7564 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3205 +GaussianMLPPolicy/KLAfter 0.00396996 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.85632e-06 +GaussianMLPPolicy/LossBefore 1.02073e-08 +GaussianMLPPolicy/dLoss -6.84611e-06 +Iteration 233 +MetaTest/Average/AverageDiscountedReturn -169.047 +MetaTest/Average/AverageReturn -169.047 +MetaTest/Average/Iteration 233 +MetaTest/Average/MaxReturn -153.62 +MetaTest/Average/MinReturn -232.417 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.3343 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.047 +MetaTest/__unnamed_task__/AverageReturn -169.047 +MetaTest/__unnamed_task__/Iteration 233 +MetaTest/__unnamed_task__/MaxReturn -153.62 +MetaTest/__unnamed_task__/MinReturn -232.417 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.3343 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.488e+06 +__unnamed_task__/AverageDiscountedReturn -71.6425 +__unnamed_task__/AverageReturn -169.749 +__unnamed_task__/Iteration 233 +__unnamed_task__/MaxReturn -146.185 +__unnamed_task__/MinReturn -256.08 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.7564 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:34:07 | [maml_trainer] epoch #234 | Sampling for adapation and meta-testing... +2025-04-03 14:35:20 | [maml_trainer] epoch #234 | Finished meta-testing... +2025-04-03 14:35:20 | [maml_trainer] epoch #234 | Saving snapshot... +2025-04-03 14:35:42 | [maml_trainer] epoch #234 | Saved +2025-04-03 14:35:42 | [maml_trainer] epoch #234 | Time 90020.57 s +2025-04-03 14:35:42 | [maml_trainer] epoch #234 | EpochTime 386.43 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.8691 +Average/AverageReturn -169.32 +Average/Iteration 234 +Average/MaxReturn -147.427 +Average/MinReturn -253.868 +Average/NumEpisodes 80 +Average/StdReturn 22.3561 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3193 +GaussianMLPPolicy/KLAfter 0.00345346 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.27129e-05 +GaussianMLPPolicy/LossBefore -2.87592e-09 +GaussianMLPPolicy/dLoss -1.27157e-05 +Iteration 234 +MetaTest/Average/AverageDiscountedReturn -167.382 +MetaTest/Average/AverageReturn -167.382 +MetaTest/Average/Iteration 234 +MetaTest/Average/MaxReturn -150.631 +MetaTest/Average/MinReturn -226.255 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.1281 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.382 +MetaTest/__unnamed_task__/AverageReturn -167.382 +MetaTest/__unnamed_task__/Iteration 234 +MetaTest/__unnamed_task__/MaxReturn -150.631 +MetaTest/__unnamed_task__/MinReturn -226.255 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.1281 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.52e+06 +__unnamed_task__/AverageDiscountedReturn -71.8691 +__unnamed_task__/AverageReturn -169.32 +__unnamed_task__/Iteration 234 +__unnamed_task__/MaxReturn -147.427 +__unnamed_task__/MinReturn -253.868 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.3561 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:40:31 | [maml_trainer] epoch #235 | Sampling for adapation and meta-testing... +2025-04-03 14:41:43 | [maml_trainer] epoch #235 | Finished meta-testing... +2025-04-03 14:41:43 | [maml_trainer] epoch #235 | Saving snapshot... +2025-04-03 14:42:06 | [maml_trainer] epoch #235 | Saved +2025-04-03 14:42:06 | [maml_trainer] epoch #235 | Time 90404.67 s +2025-04-03 14:42:06 | [maml_trainer] epoch #235 | EpochTime 384.09 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.2531 +Average/AverageReturn -170.849 +Average/Iteration 235 +Average/MaxReturn -145.154 +Average/MinReturn -256.953 +Average/NumEpisodes 80 +Average/StdReturn 22.7961 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3179 +GaussianMLPPolicy/KLAfter 0.00325416 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.85759e-05 +GaussianMLPPolicy/LossBefore -1.07288e-09 +GaussianMLPPolicy/dLoss 1.85749e-05 +Iteration 235 +MetaTest/Average/AverageDiscountedReturn -165.578 +MetaTest/Average/AverageReturn -165.578 +MetaTest/Average/Iteration 235 +MetaTest/Average/MaxReturn -147.046 +MetaTest/Average/MinReturn -220.563 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.543 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.578 +MetaTest/__unnamed_task__/AverageReturn -165.578 +MetaTest/__unnamed_task__/Iteration 235 +MetaTest/__unnamed_task__/MaxReturn -147.046 +MetaTest/__unnamed_task__/MinReturn -220.563 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.543 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.552e+06 +__unnamed_task__/AverageDiscountedReturn -72.2531 +__unnamed_task__/AverageReturn -170.849 +__unnamed_task__/Iteration 235 +__unnamed_task__/MaxReturn -145.154 +__unnamed_task__/MinReturn -256.953 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.7961 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:46:57 | [maml_trainer] epoch #236 | Sampling for adapation and meta-testing... +2025-04-03 14:48:09 | [maml_trainer] epoch #236 | Finished meta-testing... +2025-04-03 14:48:09 | [maml_trainer] epoch #236 | Saving snapshot... +2025-04-03 14:48:33 | [maml_trainer] epoch #236 | Saved +2025-04-03 14:48:33 | [maml_trainer] epoch #236 | Time 90791.61 s +2025-04-03 14:48:33 | [maml_trainer] epoch #236 | EpochTime 386.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7802 +Average/AverageReturn -169.58 +Average/Iteration 236 +Average/MaxReturn -145.058 +Average/MinReturn -240.828 +Average/NumEpisodes 80 +Average/StdReturn 18.4851 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3155 +GaussianMLPPolicy/KLAfter 0.00241183 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.82128e-06 +GaussianMLPPolicy/LossBefore 3.42727e-09 +GaussianMLPPolicy/dLoss 2.82471e-06 +Iteration 236 +MetaTest/Average/AverageDiscountedReturn -166.701 +MetaTest/Average/AverageReturn -166.701 +MetaTest/Average/Iteration 236 +MetaTest/Average/MaxReturn -144.174 +MetaTest/Average/MinReturn -229.622 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.5946 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.701 +MetaTest/__unnamed_task__/AverageReturn -166.701 +MetaTest/__unnamed_task__/Iteration 236 +MetaTest/__unnamed_task__/MaxReturn -144.174 +MetaTest/__unnamed_task__/MinReturn -229.622 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.5946 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.584e+06 +__unnamed_task__/AverageDiscountedReturn -71.7802 +__unnamed_task__/AverageReturn -169.58 +__unnamed_task__/Iteration 236 +__unnamed_task__/MaxReturn -145.058 +__unnamed_task__/MinReturn -240.828 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 18.4851 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:53:21 | [maml_trainer] epoch #237 | Sampling for adapation and meta-testing... +2025-04-03 14:54:33 | [maml_trainer] epoch #237 | Finished meta-testing... +2025-04-03 14:54:33 | [maml_trainer] epoch #237 | Saving snapshot... +2025-04-03 14:54:56 | [maml_trainer] epoch #237 | Saved +2025-04-03 14:54:56 | [maml_trainer] epoch #237 | Time 91174.91 s +2025-04-03 14:54:56 | [maml_trainer] epoch #237 | EpochTime 383.30 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.4242 +Average/AverageReturn -170.481 +Average/Iteration 237 +Average/MaxReturn -143.505 +Average/MinReturn -256.615 +Average/NumEpisodes 80 +Average/StdReturn 24.098 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3111 +GaussianMLPPolicy/KLAfter 0.00282974 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.39621e-05 +GaussianMLPPolicy/LossBefore 4.00841e-09 +GaussianMLPPolicy/dLoss -2.39581e-05 +Iteration 237 +MetaTest/Average/AverageDiscountedReturn -161.654 +MetaTest/Average/AverageReturn -161.654 +MetaTest/Average/Iteration 237 +MetaTest/Average/MaxReturn -138.414 +MetaTest/Average/MinReturn -242.669 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.7517 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -161.654 +MetaTest/__unnamed_task__/AverageReturn -161.654 +MetaTest/__unnamed_task__/Iteration 237 +MetaTest/__unnamed_task__/MaxReturn -138.414 +MetaTest/__unnamed_task__/MinReturn -242.669 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.7517 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.616e+06 +__unnamed_task__/AverageDiscountedReturn -72.4242 +__unnamed_task__/AverageReturn -170.481 +__unnamed_task__/Iteration 237 +__unnamed_task__/MaxReturn -143.505 +__unnamed_task__/MinReturn -256.615 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.098 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 14:59:44 | [maml_trainer] epoch #238 | Sampling for adapation and meta-testing... +2025-04-03 15:00:57 | [maml_trainer] epoch #238 | Finished meta-testing... +2025-04-03 15:00:57 | [maml_trainer] epoch #238 | Saving snapshot... +2025-04-03 15:01:19 | [maml_trainer] epoch #238 | Saved +2025-04-03 15:01:19 | [maml_trainer] epoch #238 | Time 91557.58 s +2025-04-03 15:01:19 | [maml_trainer] epoch #238 | EpochTime 382.66 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.1961 +Average/AverageReturn -164.767 +Average/Iteration 238 +Average/MaxReturn -138.926 +Average/MinReturn -235.32 +Average/NumEpisodes 80 +Average/StdReturn 17.6332 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3047 +GaussianMLPPolicy/KLAfter 0.00292259 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.3436e-05 +GaussianMLPPolicy/LossBefore 3.9041e-09 +GaussianMLPPolicy/dLoss 3.34399e-05 +Iteration 238 +MetaTest/Average/AverageDiscountedReturn -170.476 +MetaTest/Average/AverageReturn -170.476 +MetaTest/Average/Iteration 238 +MetaTest/Average/MaxReturn -146 +MetaTest/Average/MinReturn -226.078 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.9717 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.476 +MetaTest/__unnamed_task__/AverageReturn -170.476 +MetaTest/__unnamed_task__/Iteration 238 +MetaTest/__unnamed_task__/MaxReturn -146 +MetaTest/__unnamed_task__/MinReturn -226.078 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.9717 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.648e+06 +__unnamed_task__/AverageDiscountedReturn -70.1961 +__unnamed_task__/AverageReturn -164.767 +__unnamed_task__/Iteration 238 +__unnamed_task__/MaxReturn -138.926 +__unnamed_task__/MinReturn -235.32 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.6332 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:06:08 | [maml_trainer] epoch #239 | Sampling for adapation and meta-testing... +2025-04-03 15:07:21 | [maml_trainer] epoch #239 | Finished meta-testing... +2025-04-03 15:07:21 | [maml_trainer] epoch #239 | Saving snapshot... +2025-04-03 15:07:44 | [maml_trainer] epoch #239 | Saved +2025-04-03 15:07:44 | [maml_trainer] epoch #239 | Time 91942.19 s +2025-04-03 15:07:44 | [maml_trainer] epoch #239 | EpochTime 384.61 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.3069 +Average/AverageReturn -165.474 +Average/Iteration 239 +Average/MaxReturn -144.45 +Average/MinReturn -224.729 +Average/NumEpisodes 80 +Average/StdReturn 15.792 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2968 +GaussianMLPPolicy/KLAfter 0.00374209 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.11405e-06 +GaussianMLPPolicy/LossBefore -3.8147e-09 +GaussianMLPPolicy/dLoss -7.11787e-06 +Iteration 239 +MetaTest/Average/AverageDiscountedReturn -160.267 +MetaTest/Average/AverageReturn -160.267 +MetaTest/Average/Iteration 239 +MetaTest/Average/MaxReturn -146.795 +MetaTest/Average/MinReturn -222.206 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 15.5696 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -160.267 +MetaTest/__unnamed_task__/AverageReturn -160.267 +MetaTest/__unnamed_task__/Iteration 239 +MetaTest/__unnamed_task__/MaxReturn -146.795 +MetaTest/__unnamed_task__/MinReturn -222.206 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 15.5696 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.68e+06 +__unnamed_task__/AverageDiscountedReturn -70.3069 +__unnamed_task__/AverageReturn -165.474 +__unnamed_task__/Iteration 239 +__unnamed_task__/MaxReturn -144.45 +__unnamed_task__/MinReturn -224.729 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.792 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:12:37 | [maml_trainer] epoch #240 | Sampling for adapation and meta-testing... +2025-04-03 15:13:48 | [maml_trainer] epoch #240 | Finished meta-testing... +2025-04-03 15:13:48 | [maml_trainer] epoch #240 | Saving snapshot... +2025-04-03 15:14:12 | [maml_trainer] epoch #240 | Saved +2025-04-03 15:14:12 | [maml_trainer] epoch #240 | Time 92330.20 s +2025-04-03 15:14:12 | [maml_trainer] epoch #240 | EpochTime 388.01 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.8024 +Average/AverageReturn -171.844 +Average/Iteration 240 +Average/MaxReturn -142.889 +Average/MinReturn -237.602 +Average/NumEpisodes 80 +Average/StdReturn 23.7115 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.289 +GaussianMLPPolicy/KLAfter 0.00360214 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.97449e-06 +GaussianMLPPolicy/LossBefore 4.48525e-09 +GaussianMLPPolicy/dLoss -1.97001e-06 +Iteration 240 +MetaTest/Average/AverageDiscountedReturn -159.9 +MetaTest/Average/AverageReturn -159.9 +MetaTest/Average/Iteration 240 +MetaTest/Average/MaxReturn -143.219 +MetaTest/Average/MinReturn -239.471 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.0745 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -159.9 +MetaTest/__unnamed_task__/AverageReturn -159.9 +MetaTest/__unnamed_task__/Iteration 240 +MetaTest/__unnamed_task__/MaxReturn -143.219 +MetaTest/__unnamed_task__/MinReturn -239.471 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.0745 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.712e+06 +__unnamed_task__/AverageDiscountedReturn -72.8024 +__unnamed_task__/AverageReturn -171.844 +__unnamed_task__/Iteration 240 +__unnamed_task__/MaxReturn -142.889 +__unnamed_task__/MinReturn -237.602 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.7115 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:18:58 | [maml_trainer] epoch #241 | Sampling for adapation and meta-testing... +2025-04-03 15:20:12 | [maml_trainer] epoch #241 | Finished meta-testing... +2025-04-03 15:20:12 | [maml_trainer] epoch #241 | Saving snapshot... +2025-04-03 15:20:35 | [maml_trainer] epoch #241 | Saved +2025-04-03 15:20:35 | [maml_trainer] epoch #241 | Time 92713.49 s +2025-04-03 15:20:35 | [maml_trainer] epoch #241 | EpochTime 383.28 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.9834 +Average/AverageReturn -167.306 +Average/Iteration 241 +Average/MaxReturn -145.258 +Average/MinReturn -236.584 +Average/NumEpisodes 80 +Average/StdReturn 20.5891 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2807 +GaussianMLPPolicy/KLAfter 0.00359582 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.9819e-05 +GaussianMLPPolicy/LossBefore 8.62777e-09 +GaussianMLPPolicy/dLoss -2.98103e-05 +Iteration 241 +MetaTest/Average/AverageDiscountedReturn -173.137 +MetaTest/Average/AverageReturn -173.137 +MetaTest/Average/Iteration 241 +MetaTest/Average/MaxReturn -153.215 +MetaTest/Average/MinReturn -253.276 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.1407 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.137 +MetaTest/__unnamed_task__/AverageReturn -173.137 +MetaTest/__unnamed_task__/Iteration 241 +MetaTest/__unnamed_task__/MaxReturn -153.215 +MetaTest/__unnamed_task__/MinReturn -253.276 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.1407 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.744e+06 +__unnamed_task__/AverageDiscountedReturn -70.9834 +__unnamed_task__/AverageReturn -167.306 +__unnamed_task__/Iteration 241 +__unnamed_task__/MaxReturn -145.258 +__unnamed_task__/MinReturn -236.584 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.5891 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:25:27 | [maml_trainer] epoch #242 | Sampling for adapation and meta-testing... +2025-04-03 15:26:39 | [maml_trainer] epoch #242 | Finished meta-testing... +2025-04-03 15:26:39 | [maml_trainer] epoch #242 | Saving snapshot... +2025-04-03 15:27:03 | [maml_trainer] epoch #242 | Saved +2025-04-03 15:27:03 | [maml_trainer] epoch #242 | Time 93101.36 s +2025-04-03 15:27:03 | [maml_trainer] epoch #242 | EpochTime 387.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.1017 +Average/AverageReturn -167.002 +Average/Iteration 242 +Average/MaxReturn -146.12 +Average/MinReturn -239.508 +Average/NumEpisodes 80 +Average/StdReturn 21.4314 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.276 +GaussianMLPPolicy/KLAfter 0.00359399 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.2475e-05 +GaussianMLPPolicy/LossBefore -1.93715e-09 +GaussianMLPPolicy/dLoss 1.2473e-05 +Iteration 242 +MetaTest/Average/AverageDiscountedReturn -164.529 +MetaTest/Average/AverageReturn -164.529 +MetaTest/Average/Iteration 242 +MetaTest/Average/MaxReturn -144.368 +MetaTest/Average/MinReturn -253.131 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 22.3531 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.529 +MetaTest/__unnamed_task__/AverageReturn -164.529 +MetaTest/__unnamed_task__/Iteration 242 +MetaTest/__unnamed_task__/MaxReturn -144.368 +MetaTest/__unnamed_task__/MinReturn -253.131 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 22.3531 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.776e+06 +__unnamed_task__/AverageDiscountedReturn -71.1017 +__unnamed_task__/AverageReturn -167.002 +__unnamed_task__/Iteration 242 +__unnamed_task__/MaxReturn -146.12 +__unnamed_task__/MinReturn -239.508 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.4314 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:31:55 | [maml_trainer] epoch #243 | Sampling for adapation and meta-testing... +2025-04-03 15:33:09 | [maml_trainer] epoch #243 | Finished meta-testing... +2025-04-03 15:33:09 | [maml_trainer] epoch #243 | Saving snapshot... +2025-04-03 15:33:31 | [maml_trainer] epoch #243 | Saved +2025-04-03 15:33:31 | [maml_trainer] epoch #243 | Time 93489.87 s +2025-04-03 15:33:31 | [maml_trainer] epoch #243 | EpochTime 388.51 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.2092 +Average/AverageReturn -161.934 +Average/Iteration 243 +Average/MaxReturn -141.09 +Average/MinReturn -237.197 +Average/NumEpisodes 80 +Average/StdReturn 17.3409 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2727 +GaussianMLPPolicy/KLAfter 0.00401826 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.43224e-05 +GaussianMLPPolicy/LossBefore 7.86781e-09 +GaussianMLPPolicy/dLoss -6.43145e-05 +Iteration 243 +MetaTest/Average/AverageDiscountedReturn -161.075 +MetaTest/Average/AverageReturn -161.075 +MetaTest/Average/Iteration 243 +MetaTest/Average/MaxReturn -140.73 +MetaTest/Average/MinReturn -228.774 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.011 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -161.075 +MetaTest/__unnamed_task__/AverageReturn -161.075 +MetaTest/__unnamed_task__/Iteration 243 +MetaTest/__unnamed_task__/MaxReturn -140.73 +MetaTest/__unnamed_task__/MinReturn -228.774 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.011 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.808e+06 +__unnamed_task__/AverageDiscountedReturn -69.2092 +__unnamed_task__/AverageReturn -161.934 +__unnamed_task__/Iteration 243 +__unnamed_task__/MaxReturn -141.09 +__unnamed_task__/MinReturn -237.197 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.3409 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:38:23 | [maml_trainer] epoch #244 | Sampling for adapation and meta-testing... +2025-04-03 15:39:35 | [maml_trainer] epoch #244 | Finished meta-testing... +2025-04-03 15:39:35 | [maml_trainer] epoch #244 | Saving snapshot... +2025-04-03 15:39:58 | [maml_trainer] epoch #244 | Saved +2025-04-03 15:39:58 | [maml_trainer] epoch #244 | Time 93877.01 s +2025-04-03 15:39:58 | [maml_trainer] epoch #244 | EpochTime 387.14 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.034 +Average/AverageReturn -164.603 +Average/Iteration 244 +Average/MaxReturn -140.375 +Average/MinReturn -224.769 +Average/NumEpisodes 80 +Average/StdReturn 15.5308 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2711 +GaussianMLPPolicy/KLAfter 0.00356923 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.65552e-05 +GaussianMLPPolicy/LossBefore 3.18885e-09 +GaussianMLPPolicy/dLoss -1.6552e-05 +Iteration 244 +MetaTest/Average/AverageDiscountedReturn -163.753 +MetaTest/Average/AverageReturn -163.753 +MetaTest/Average/Iteration 244 +MetaTest/Average/MaxReturn -147.493 +MetaTest/Average/MinReturn -203.534 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.1775 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.753 +MetaTest/__unnamed_task__/AverageReturn -163.753 +MetaTest/__unnamed_task__/Iteration 244 +MetaTest/__unnamed_task__/MaxReturn -147.493 +MetaTest/__unnamed_task__/MinReturn -203.534 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.1775 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.84e+06 +__unnamed_task__/AverageDiscountedReturn -70.034 +__unnamed_task__/AverageReturn -164.603 +__unnamed_task__/Iteration 244 +__unnamed_task__/MaxReturn -140.375 +__unnamed_task__/MinReturn -224.769 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.5308 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:44:52 | [maml_trainer] epoch #245 | Sampling for adapation and meta-testing... +2025-04-03 15:46:04 | [maml_trainer] epoch #245 | Finished meta-testing... +2025-04-03 15:46:04 | [maml_trainer] epoch #245 | Saving snapshot... +2025-04-03 15:46:27 | [maml_trainer] epoch #245 | Saved +2025-04-03 15:46:27 | [maml_trainer] epoch #245 | Time 94265.39 s +2025-04-03 15:46:27 | [maml_trainer] epoch #245 | EpochTime 388.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.7577 +Average/AverageReturn -171.308 +Average/Iteration 245 +Average/MaxReturn -141.952 +Average/MinReturn -243.676 +Average/NumEpisodes 80 +Average/StdReturn 25.7794 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2706 +GaussianMLPPolicy/KLAfter 0.0035214 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.96544e-05 +GaussianMLPPolicy/LossBefore -2.25008e-09 +GaussianMLPPolicy/dLoss -5.96566e-05 +Iteration 245 +MetaTest/Average/AverageDiscountedReturn -167.984 +MetaTest/Average/AverageReturn -167.984 +MetaTest/Average/Iteration 245 +MetaTest/Average/MaxReturn -148.179 +MetaTest/Average/MinReturn -230.882 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.6638 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.984 +MetaTest/__unnamed_task__/AverageReturn -167.984 +MetaTest/__unnamed_task__/Iteration 245 +MetaTest/__unnamed_task__/MaxReturn -148.179 +MetaTest/__unnamed_task__/MinReturn -230.882 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.6638 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.872e+06 +__unnamed_task__/AverageDiscountedReturn -72.7577 +__unnamed_task__/AverageReturn -171.308 +__unnamed_task__/Iteration 245 +__unnamed_task__/MaxReturn -141.952 +__unnamed_task__/MinReturn -243.676 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.7794 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:51:17 | [maml_trainer] epoch #246 | Sampling for adapation and meta-testing... +2025-04-03 15:52:29 | [maml_trainer] epoch #246 | Finished meta-testing... +2025-04-03 15:52:29 | [maml_trainer] epoch #246 | Saving snapshot... +2025-04-03 15:52:53 | [maml_trainer] epoch #246 | Saved +2025-04-03 15:52:53 | [maml_trainer] epoch #246 | Time 94651.56 s +2025-04-03 15:52:53 | [maml_trainer] epoch #246 | EpochTime 386.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.4081 +Average/AverageReturn -167.584 +Average/Iteration 246 +Average/MaxReturn -141.513 +Average/MinReturn -231.955 +Average/NumEpisodes 80 +Average/StdReturn 21.1195 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2702 +GaussianMLPPolicy/KLAfter 0.00374237 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.92507e-05 +GaussianMLPPolicy/LossBefore 2.90573e-09 +GaussianMLPPolicy/dLoss -1.92478e-05 +Iteration 246 +MetaTest/Average/AverageDiscountedReturn -175.43 +MetaTest/Average/AverageReturn -175.43 +MetaTest/Average/Iteration 246 +MetaTest/Average/MaxReturn -146.17 +MetaTest/Average/MinReturn -236.124 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 31.7999 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -175.43 +MetaTest/__unnamed_task__/AverageReturn -175.43 +MetaTest/__unnamed_task__/Iteration 246 +MetaTest/__unnamed_task__/MaxReturn -146.17 +MetaTest/__unnamed_task__/MinReturn -236.124 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 31.7999 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.904e+06 +__unnamed_task__/AverageDiscountedReturn -71.4081 +__unnamed_task__/AverageReturn -167.584 +__unnamed_task__/Iteration 246 +__unnamed_task__/MaxReturn -141.513 +__unnamed_task__/MinReturn -231.955 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.1195 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 15:57:43 | [maml_trainer] epoch #247 | Sampling for adapation and meta-testing... +2025-04-03 15:58:55 | [maml_trainer] epoch #247 | Finished meta-testing... +2025-04-03 15:58:55 | [maml_trainer] epoch #247 | Saving snapshot... +2025-04-03 15:59:17 | [maml_trainer] epoch #247 | Saved +2025-04-03 15:59:17 | [maml_trainer] epoch #247 | Time 95035.44 s +2025-04-03 15:59:17 | [maml_trainer] epoch #247 | EpochTime 383.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.7907 +Average/AverageReturn -166.736 +Average/Iteration 247 +Average/MaxReturn -144.333 +Average/MinReturn -236.071 +Average/NumEpisodes 80 +Average/StdReturn 20.135 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.27 +GaussianMLPPolicy/KLAfter 0.00395351 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.13558e-05 +GaussianMLPPolicy/LossBefore 5.06639e-10 +GaussianMLPPolicy/dLoss -5.13553e-05 +Iteration 247 +MetaTest/Average/AverageDiscountedReturn -167.241 +MetaTest/Average/AverageReturn -167.241 +MetaTest/Average/Iteration 247 +MetaTest/Average/MaxReturn -141.765 +MetaTest/Average/MinReturn -236.798 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.5418 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.241 +MetaTest/__unnamed_task__/AverageReturn -167.241 +MetaTest/__unnamed_task__/Iteration 247 +MetaTest/__unnamed_task__/MaxReturn -141.765 +MetaTest/__unnamed_task__/MinReturn -236.798 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.5418 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.936e+06 +__unnamed_task__/AverageDiscountedReturn -70.7907 +__unnamed_task__/AverageReturn -166.736 +__unnamed_task__/Iteration 247 +__unnamed_task__/MaxReturn -144.333 +__unnamed_task__/MinReturn -236.071 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.135 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:04:06 | [maml_trainer] epoch #248 | Sampling for adapation and meta-testing... +2025-04-03 16:05:17 | [maml_trainer] epoch #248 | Finished meta-testing... +2025-04-03 16:05:17 | [maml_trainer] epoch #248 | Saving snapshot... +2025-04-03 16:05:40 | [maml_trainer] epoch #248 | Saved +2025-04-03 16:05:40 | [maml_trainer] epoch #248 | Time 95418.82 s +2025-04-03 16:05:40 | [maml_trainer] epoch #248 | EpochTime 383.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.5952 +Average/AverageReturn -162.106 +Average/Iteration 248 +Average/MaxReturn -142.119 +Average/MinReturn -226.637 +Average/NumEpisodes 80 +Average/StdReturn 15.0095 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2707 +GaussianMLPPolicy/KLAfter 0.00292049 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.05066e-05 +GaussianMLPPolicy/LossBefore -2.6077e-09 +GaussianMLPPolicy/dLoss -2.05092e-05 +Iteration 248 +MetaTest/Average/AverageDiscountedReturn -164.859 +MetaTest/Average/AverageReturn -164.859 +MetaTest/Average/Iteration 248 +MetaTest/Average/MaxReturn -145.481 +MetaTest/Average/MinReturn -229.692 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.6562 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.859 +MetaTest/__unnamed_task__/AverageReturn -164.859 +MetaTest/__unnamed_task__/Iteration 248 +MetaTest/__unnamed_task__/MaxReturn -145.481 +MetaTest/__unnamed_task__/MinReturn -229.692 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.6562 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 7.968e+06 +__unnamed_task__/AverageDiscountedReturn -69.5952 +__unnamed_task__/AverageReturn -162.106 +__unnamed_task__/Iteration 248 +__unnamed_task__/MaxReturn -142.119 +__unnamed_task__/MinReturn -226.637 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 15.0095 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:10:30 | [maml_trainer] epoch #249 | Sampling for adapation and meta-testing... +2025-04-03 16:11:43 | [maml_trainer] epoch #249 | Finished meta-testing... +2025-04-03 16:11:43 | [maml_trainer] epoch #249 | Saving snapshot... +2025-04-03 16:12:06 | [maml_trainer] epoch #249 | Saved +2025-04-03 16:12:06 | [maml_trainer] epoch #249 | Time 95804.83 s +2025-04-03 16:12:06 | [maml_trainer] epoch #249 | EpochTime 386.00 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.6638 +Average/AverageReturn -167.827 +Average/Iteration 249 +Average/MaxReturn -146.449 +Average/MinReturn -244.603 +Average/NumEpisodes 80 +Average/StdReturn 23.2453 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2734 +GaussianMLPPolicy/KLAfter 0.00304354 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.25151e-06 +GaussianMLPPolicy/LossBefore -4.20213e-09 +GaussianMLPPolicy/dLoss -6.25572e-06 +Iteration 249 +MetaTest/Average/AverageDiscountedReturn -161.297 +MetaTest/Average/AverageReturn -161.297 +MetaTest/Average/Iteration 249 +MetaTest/Average/MaxReturn -144.858 +MetaTest/Average/MinReturn -235.158 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.5653 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -161.297 +MetaTest/__unnamed_task__/AverageReturn -161.297 +MetaTest/__unnamed_task__/Iteration 249 +MetaTest/__unnamed_task__/MaxReturn -144.858 +MetaTest/__unnamed_task__/MinReturn -235.158 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.5653 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8e+06 +__unnamed_task__/AverageDiscountedReturn -71.6638 +__unnamed_task__/AverageReturn -167.827 +__unnamed_task__/Iteration 249 +__unnamed_task__/MaxReturn -146.449 +__unnamed_task__/MinReturn -244.603 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.2453 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:16:57 | [maml_trainer] epoch #250 | Sampling for adapation and meta-testing... +2025-04-03 16:18:10 | [maml_trainer] epoch #250 | Finished meta-testing... +2025-04-03 16:18:10 | [maml_trainer] epoch #250 | Saving snapshot... +2025-04-03 16:18:33 | [maml_trainer] epoch #250 | Saved +2025-04-03 16:18:33 | [maml_trainer] epoch #250 | Time 96191.76 s +2025-04-03 16:18:33 | [maml_trainer] epoch #250 | EpochTime 386.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.475 +Average/AverageReturn -166.948 +Average/Iteration 250 +Average/MaxReturn -142.263 +Average/MinReturn -230.902 +Average/NumEpisodes 80 +Average/StdReturn 24.5209 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2752 +GaussianMLPPolicy/KLAfter 0.00361342 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.12381e-05 +GaussianMLPPolicy/LossBefore 3.01003e-09 +GaussianMLPPolicy/dLoss -5.12351e-05 +Iteration 250 +MetaTest/Average/AverageDiscountedReturn -165.079 +MetaTest/Average/AverageReturn -165.079 +MetaTest/Average/Iteration 250 +MetaTest/Average/MaxReturn -146.313 +MetaTest/Average/MinReturn -248.935 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.7343 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.079 +MetaTest/__unnamed_task__/AverageReturn -165.079 +MetaTest/__unnamed_task__/Iteration 250 +MetaTest/__unnamed_task__/MaxReturn -146.313 +MetaTest/__unnamed_task__/MinReturn -248.935 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.7343 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.032e+06 +__unnamed_task__/AverageDiscountedReturn -71.475 +__unnamed_task__/AverageReturn -166.948 +__unnamed_task__/Iteration 250 +__unnamed_task__/MaxReturn -142.263 +__unnamed_task__/MinReturn -230.902 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.5209 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:23:23 | [maml_trainer] epoch #251 | Sampling for adapation and meta-testing... +2025-04-03 16:24:35 | [maml_trainer] epoch #251 | Finished meta-testing... +2025-04-03 16:24:35 | [maml_trainer] epoch #251 | Saving snapshot... +2025-04-03 16:24:57 | [maml_trainer] epoch #251 | Saved +2025-04-03 16:24:57 | [maml_trainer] epoch #251 | Time 96575.70 s +2025-04-03 16:24:57 | [maml_trainer] epoch #251 | EpochTime 383.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5872 +Average/AverageReturn -168.958 +Average/Iteration 251 +Average/MaxReturn -143.602 +Average/MinReturn -265.978 +Average/NumEpisodes 80 +Average/StdReturn 25.0989 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2768 +GaussianMLPPolicy/KLAfter 0.00299027 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.43036e-05 +GaussianMLPPolicy/LossBefore 1.08778e-09 +GaussianMLPPolicy/dLoss 3.43047e-05 +Iteration 251 +MetaTest/Average/AverageDiscountedReturn -161.242 +MetaTest/Average/AverageReturn -161.242 +MetaTest/Average/Iteration 251 +MetaTest/Average/MaxReturn -140.749 +MetaTest/Average/MinReturn -230.873 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.1174 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -161.242 +MetaTest/__unnamed_task__/AverageReturn -161.242 +MetaTest/__unnamed_task__/Iteration 251 +MetaTest/__unnamed_task__/MaxReturn -140.749 +MetaTest/__unnamed_task__/MinReturn -230.873 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.1174 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.064e+06 +__unnamed_task__/AverageDiscountedReturn -72.5872 +__unnamed_task__/AverageReturn -168.958 +__unnamed_task__/Iteration 251 +__unnamed_task__/MaxReturn -143.602 +__unnamed_task__/MinReturn -265.978 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.0989 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:29:45 | [maml_trainer] epoch #252 | Sampling for adapation and meta-testing... +2025-04-03 16:30:58 | [maml_trainer] epoch #252 | Finished meta-testing... +2025-04-03 16:30:58 | [maml_trainer] epoch #252 | Saving snapshot... +2025-04-03 16:31:23 | [maml_trainer] epoch #252 | Saved +2025-04-03 16:31:23 | [maml_trainer] epoch #252 | Time 96961.77 s +2025-04-03 16:31:23 | [maml_trainer] epoch #252 | EpochTime 386.07 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.0512 +Average/AverageReturn -162.919 +Average/Iteration 252 +Average/MaxReturn -143.387 +Average/MinReturn -250.321 +Average/NumEpisodes 80 +Average/StdReturn 22.4906 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2798 +GaussianMLPPolicy/KLAfter 0.00328374 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.82929e-05 +GaussianMLPPolicy/LossBefore -4.60446e-09 +GaussianMLPPolicy/dLoss 5.82882e-05 +Iteration 252 +MetaTest/Average/AverageDiscountedReturn -162.608 +MetaTest/Average/AverageReturn -162.608 +MetaTest/Average/Iteration 252 +MetaTest/Average/MaxReturn -141.049 +MetaTest/Average/MinReturn -227.9 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.5601 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -162.608 +MetaTest/__unnamed_task__/AverageReturn -162.608 +MetaTest/__unnamed_task__/Iteration 252 +MetaTest/__unnamed_task__/MaxReturn -141.049 +MetaTest/__unnamed_task__/MinReturn -227.9 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.5601 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.096e+06 +__unnamed_task__/AverageDiscountedReturn -70.0512 +__unnamed_task__/AverageReturn -162.919 +__unnamed_task__/Iteration 252 +__unnamed_task__/MaxReturn -143.387 +__unnamed_task__/MinReturn -250.321 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.4906 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:36:17 | [maml_trainer] epoch #253 | Sampling for adapation and meta-testing... +2025-04-03 16:37:31 | [maml_trainer] epoch #253 | Finished meta-testing... +2025-04-03 16:37:31 | [maml_trainer] epoch #253 | Saving snapshot... +2025-04-03 16:37:56 | [maml_trainer] epoch #253 | Saved +2025-04-03 16:37:56 | [maml_trainer] epoch #253 | Time 97354.64 s +2025-04-03 16:37:56 | [maml_trainer] epoch #253 | EpochTime 392.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7001 +Average/AverageReturn -167.508 +Average/Iteration 253 +Average/MaxReturn -142.527 +Average/MinReturn -291.942 +Average/NumEpisodes 80 +Average/StdReturn 25.3366 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2857 +GaussianMLPPolicy/KLAfter 0.00282539 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.46068e-05 +GaussianMLPPolicy/LossBefore 3.8147e-09 +GaussianMLPPolicy/dLoss 1.46106e-05 +Iteration 253 +MetaTest/Average/AverageDiscountedReturn -178.678 +MetaTest/Average/AverageReturn -178.678 +MetaTest/Average/Iteration 253 +MetaTest/Average/MaxReturn -146.952 +MetaTest/Average/MinReturn -251.94 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 35.4619 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.678 +MetaTest/__unnamed_task__/AverageReturn -178.678 +MetaTest/__unnamed_task__/Iteration 253 +MetaTest/__unnamed_task__/MaxReturn -146.952 +MetaTest/__unnamed_task__/MinReturn -251.94 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 35.4619 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.128e+06 +__unnamed_task__/AverageDiscountedReturn -71.7001 +__unnamed_task__/AverageReturn -167.508 +__unnamed_task__/Iteration 253 +__unnamed_task__/MaxReturn -142.527 +__unnamed_task__/MinReturn -291.942 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.3366 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:42:50 | [maml_trainer] epoch #254 | Sampling for adapation and meta-testing... +2025-04-03 16:44:02 | [maml_trainer] epoch #254 | Finished meta-testing... +2025-04-03 16:44:02 | [maml_trainer] epoch #254 | Saving snapshot... +2025-04-03 16:44:26 | [maml_trainer] epoch #254 | Saved +2025-04-03 16:44:26 | [maml_trainer] epoch #254 | Time 97744.76 s +2025-04-03 16:44:26 | [maml_trainer] epoch #254 | EpochTime 390.11 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.543 +Average/AverageReturn -166.507 +Average/Iteration 254 +Average/MaxReturn -145.2 +Average/MinReturn -262.238 +Average/NumEpisodes 80 +Average/StdReturn 24.1483 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2921 +GaussianMLPPolicy/KLAfter 0.0037379 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.85901e-05 +GaussianMLPPolicy/LossBefore -1.16825e-08 +GaussianMLPPolicy/dLoss -2.86018e-05 +Iteration 254 +MetaTest/Average/AverageDiscountedReturn -165.341 +MetaTest/Average/AverageReturn -165.341 +MetaTest/Average/Iteration 254 +MetaTest/Average/MaxReturn -139.865 +MetaTest/Average/MinReturn -227.406 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.5914 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -165.341 +MetaTest/__unnamed_task__/AverageReturn -165.341 +MetaTest/__unnamed_task__/Iteration 254 +MetaTest/__unnamed_task__/MaxReturn -139.865 +MetaTest/__unnamed_task__/MinReturn -227.406 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.5914 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.16e+06 +__unnamed_task__/AverageDiscountedReturn -71.543 +__unnamed_task__/AverageReturn -166.507 +__unnamed_task__/Iteration 254 +__unnamed_task__/MaxReturn -145.2 +__unnamed_task__/MinReturn -262.238 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.1483 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:49:22 | [maml_trainer] epoch #255 | Sampling for adapation and meta-testing... +2025-04-03 16:50:35 | [maml_trainer] epoch #255 | Finished meta-testing... +2025-04-03 16:50:35 | [maml_trainer] epoch #255 | Saving snapshot... +2025-04-03 16:50:57 | [maml_trainer] epoch #255 | Saved +2025-04-03 16:50:57 | [maml_trainer] epoch #255 | Time 98135.70 s +2025-04-03 16:50:57 | [maml_trainer] epoch #255 | EpochTime 390.94 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.0608 +Average/AverageReturn -169.704 +Average/Iteration 255 +Average/MaxReturn -143.616 +Average/MinReturn -233.59 +Average/NumEpisodes 80 +Average/StdReturn 22.39 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.2968 +GaussianMLPPolicy/KLAfter 0.00285155 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.41344e-05 +GaussianMLPPolicy/LossBefore 6.27339e-09 +GaussianMLPPolicy/dLoss -3.41281e-05 +Iteration 255 +MetaTest/Average/AverageDiscountedReturn -164.463 +MetaTest/Average/AverageReturn -164.463 +MetaTest/Average/Iteration 255 +MetaTest/Average/MaxReturn -148.153 +MetaTest/Average/MinReturn -214.272 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.4582 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.463 +MetaTest/__unnamed_task__/AverageReturn -164.463 +MetaTest/__unnamed_task__/Iteration 255 +MetaTest/__unnamed_task__/MaxReturn -148.153 +MetaTest/__unnamed_task__/MinReturn -214.272 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.4582 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.192e+06 +__unnamed_task__/AverageDiscountedReturn -72.0608 +__unnamed_task__/AverageReturn -169.704 +__unnamed_task__/Iteration 255 +__unnamed_task__/MaxReturn -143.616 +__unnamed_task__/MinReturn -233.59 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.39 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 16:55:47 | [maml_trainer] epoch #256 | Sampling for adapation and meta-testing... +2025-04-03 16:56:59 | [maml_trainer] epoch #256 | Finished meta-testing... +2025-04-03 16:56:59 | [maml_trainer] epoch #256 | Saving snapshot... +2025-04-03 16:57:21 | [maml_trainer] epoch #256 | Saved +2025-04-03 16:57:21 | [maml_trainer] epoch #256 | Time 98520.11 s +2025-04-03 16:57:21 | [maml_trainer] epoch #256 | EpochTime 384.41 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7815 +Average/AverageReturn -168.814 +Average/Iteration 256 +Average/MaxReturn -126.342 +Average/MinReturn -238.847 +Average/NumEpisodes 80 +Average/StdReturn 25.1841 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3016 +GaussianMLPPolicy/KLAfter 0.00298244 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.8035e-05 +GaussianMLPPolicy/LossBefore 5.33462e-09 +GaussianMLPPolicy/dLoss -1.80296e-05 +Iteration 256 +MetaTest/Average/AverageDiscountedReturn -163.577 +MetaTest/Average/AverageReturn -163.577 +MetaTest/Average/Iteration 256 +MetaTest/Average/MaxReturn -146.023 +MetaTest/Average/MinReturn -184.667 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.3719 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.577 +MetaTest/__unnamed_task__/AverageReturn -163.577 +MetaTest/__unnamed_task__/Iteration 256 +MetaTest/__unnamed_task__/MaxReturn -146.023 +MetaTest/__unnamed_task__/MinReturn -184.667 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.3719 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.224e+06 +__unnamed_task__/AverageDiscountedReturn -71.7815 +__unnamed_task__/AverageReturn -168.814 +__unnamed_task__/Iteration 256 +__unnamed_task__/MaxReturn -126.342 +__unnamed_task__/MinReturn -238.847 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.1841 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:02:08 | [maml_trainer] epoch #257 | Sampling for adapation and meta-testing... +2025-04-03 17:03:22 | [maml_trainer] epoch #257 | Finished meta-testing... +2025-04-03 17:03:22 | [maml_trainer] epoch #257 | Saving snapshot... +2025-04-03 17:03:46 | [maml_trainer] epoch #257 | Saved +2025-04-03 17:03:46 | [maml_trainer] epoch #257 | Time 98904.88 s +2025-04-03 17:03:46 | [maml_trainer] epoch #257 | EpochTime 384.77 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.2752 +Average/AverageReturn -169.786 +Average/Iteration 257 +Average/MaxReturn -145.819 +Average/MinReturn -243.259 +Average/NumEpisodes 80 +Average/StdReturn 24.8469 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3058 +GaussianMLPPolicy/KLAfter 0.00304664 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.19351e-06 +GaussianMLPPolicy/LossBefore 4.42564e-09 +GaussianMLPPolicy/dLoss 1.19793e-06 +Iteration 257 +MetaTest/Average/AverageDiscountedReturn -169.003 +MetaTest/Average/AverageReturn -169.003 +MetaTest/Average/Iteration 257 +MetaTest/Average/MaxReturn -143.056 +MetaTest/Average/MinReturn -236.225 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 27.8232 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.003 +MetaTest/__unnamed_task__/AverageReturn -169.003 +MetaTest/__unnamed_task__/Iteration 257 +MetaTest/__unnamed_task__/MaxReturn -143.056 +MetaTest/__unnamed_task__/MinReturn -236.225 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 27.8232 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.256e+06 +__unnamed_task__/AverageDiscountedReturn -72.2752 +__unnamed_task__/AverageReturn -169.786 +__unnamed_task__/Iteration 257 +__unnamed_task__/MaxReturn -145.819 +__unnamed_task__/MinReturn -243.259 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.8469 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:08:39 | [maml_trainer] epoch #258 | Sampling for adapation and meta-testing... +2025-04-03 17:09:53 | [maml_trainer] epoch #258 | Finished meta-testing... +2025-04-03 17:09:53 | [maml_trainer] epoch #258 | Saving snapshot... +2025-04-03 17:10:17 | [maml_trainer] epoch #258 | Saved +2025-04-03 17:10:17 | [maml_trainer] epoch #258 | Time 99295.72 s +2025-04-03 17:10:17 | [maml_trainer] epoch #258 | EpochTime 390.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.5602 +Average/AverageReturn -167.181 +Average/Iteration 258 +Average/MaxReturn -138.238 +Average/MinReturn -255.074 +Average/NumEpisodes 80 +Average/StdReturn 26.3244 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3102 +GaussianMLPPolicy/KLAfter 0.00334873 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.30046e-05 +GaussianMLPPolicy/LossBefore 7.59959e-10 +GaussianMLPPolicy/dLoss 1.30054e-05 +Iteration 258 +MetaTest/Average/AverageDiscountedReturn -164.695 +MetaTest/Average/AverageReturn -164.695 +MetaTest/Average/Iteration 258 +MetaTest/Average/MaxReturn -148.562 +MetaTest/Average/MinReturn -209.183 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.9499 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -164.695 +MetaTest/__unnamed_task__/AverageReturn -164.695 +MetaTest/__unnamed_task__/Iteration 258 +MetaTest/__unnamed_task__/MaxReturn -148.562 +MetaTest/__unnamed_task__/MinReturn -209.183 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.9499 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.288e+06 +__unnamed_task__/AverageDiscountedReturn -71.5602 +__unnamed_task__/AverageReturn -167.181 +__unnamed_task__/Iteration 258 +__unnamed_task__/MaxReturn -138.238 +__unnamed_task__/MinReturn -255.074 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.3244 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:15:11 | [maml_trainer] epoch #259 | Sampling for adapation and meta-testing... +2025-04-03 17:16:23 | [maml_trainer] epoch #259 | Finished meta-testing... +2025-04-03 17:16:23 | [maml_trainer] epoch #259 | Saving snapshot... +2025-04-03 17:16:46 | [maml_trainer] epoch #259 | Saved +2025-04-03 17:16:46 | [maml_trainer] epoch #259 | Time 99685.10 s +2025-04-03 17:16:46 | [maml_trainer] epoch #259 | EpochTime 389.38 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.4209 +Average/AverageReturn -164.9 +Average/Iteration 259 +Average/MaxReturn -141.13 +Average/MinReturn -249.769 +Average/NumEpisodes 80 +Average/StdReturn 20.3554 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3134 +GaussianMLPPolicy/KLAfter 0.00196528 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.82319e-05 +GaussianMLPPolicy/LossBefore 2.87592e-09 +GaussianMLPPolicy/dLoss -1.8229e-05 +Iteration 259 +MetaTest/Average/AverageDiscountedReturn -176.118 +MetaTest/Average/AverageReturn -176.118 +MetaTest/Average/Iteration 259 +MetaTest/Average/MaxReturn -147.76 +MetaTest/Average/MinReturn -241.389 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 32.679 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.118 +MetaTest/__unnamed_task__/AverageReturn -176.118 +MetaTest/__unnamed_task__/Iteration 259 +MetaTest/__unnamed_task__/MaxReturn -147.76 +MetaTest/__unnamed_task__/MinReturn -241.389 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 32.679 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.32e+06 +__unnamed_task__/AverageDiscountedReturn -70.4209 +__unnamed_task__/AverageReturn -164.9 +__unnamed_task__/Iteration 259 +__unnamed_task__/MaxReturn -141.13 +__unnamed_task__/MinReturn -249.769 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.3554 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:21:39 | [maml_trainer] epoch #260 | Sampling for adapation and meta-testing... +2025-04-03 17:22:51 | [maml_trainer] epoch #260 | Finished meta-testing... +2025-04-03 17:22:51 | [maml_trainer] epoch #260 | Saving snapshot... +2025-04-03 17:23:13 | [maml_trainer] epoch #260 | Saved +2025-04-03 17:23:13 | [maml_trainer] epoch #260 | Time 100071.88 s +2025-04-03 17:23:13 | [maml_trainer] epoch #260 | EpochTime 386.77 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6544 +Average/AverageReturn -169.917 +Average/Iteration 260 +Average/MaxReturn -145.49 +Average/MinReturn -286.815 +Average/NumEpisodes 80 +Average/StdReturn 24.7724 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3165 +GaussianMLPPolicy/KLAfter 0.00340722 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.68721e-05 +GaussianMLPPolicy/LossBefore 5.43892e-09 +GaussianMLPPolicy/dLoss 1.68775e-05 +Iteration 260 +MetaTest/Average/AverageDiscountedReturn -168.238 +MetaTest/Average/AverageReturn -168.238 +MetaTest/Average/Iteration 260 +MetaTest/Average/MaxReturn -145.552 +MetaTest/Average/MinReturn -238.159 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.4964 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.238 +MetaTest/__unnamed_task__/AverageReturn -168.238 +MetaTest/__unnamed_task__/Iteration 260 +MetaTest/__unnamed_task__/MaxReturn -145.552 +MetaTest/__unnamed_task__/MinReturn -238.159 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.4964 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.352e+06 +__unnamed_task__/AverageDiscountedReturn -72.6544 +__unnamed_task__/AverageReturn -169.917 +__unnamed_task__/Iteration 260 +__unnamed_task__/MaxReturn -145.49 +__unnamed_task__/MinReturn -286.815 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.7724 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:28:06 | [maml_trainer] epoch #261 | Sampling for adapation and meta-testing... +2025-04-03 17:29:19 | [maml_trainer] epoch #261 | Finished meta-testing... +2025-04-03 17:29:19 | [maml_trainer] epoch #261 | Saving snapshot... +2025-04-03 17:29:43 | [maml_trainer] epoch #261 | Saved +2025-04-03 17:29:43 | [maml_trainer] epoch #261 | Time 100461.28 s +2025-04-03 17:29:43 | [maml_trainer] epoch #261 | EpochTime 389.39 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.9597 +Average/AverageReturn -167.488 +Average/Iteration 261 +Average/MaxReturn -142.637 +Average/MinReturn -308.889 +Average/NumEpisodes 80 +Average/StdReturn 25.6273 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3186 +GaussianMLPPolicy/KLAfter 0.00243566 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.29048e-05 +GaussianMLPPolicy/LossBefore 3.33786e-09 +GaussianMLPPolicy/dLoss -3.29015e-05 +Iteration 261 +MetaTest/Average/AverageDiscountedReturn -167.564 +MetaTest/Average/AverageReturn -167.564 +MetaTest/Average/Iteration 261 +MetaTest/Average/MaxReturn -148.04 +MetaTest/Average/MinReturn -234.655 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.6744 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.564 +MetaTest/__unnamed_task__/AverageReturn -167.564 +MetaTest/__unnamed_task__/Iteration 261 +MetaTest/__unnamed_task__/MaxReturn -148.04 +MetaTest/__unnamed_task__/MinReturn -234.655 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.6744 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.384e+06 +__unnamed_task__/AverageDiscountedReturn -70.9597 +__unnamed_task__/AverageReturn -167.488 +__unnamed_task__/Iteration 261 +__unnamed_task__/MaxReturn -142.637 +__unnamed_task__/MinReturn -308.889 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.6273 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:34:35 | [maml_trainer] epoch #262 | Sampling for adapation and meta-testing... +2025-04-03 17:35:48 | [maml_trainer] epoch #262 | Finished meta-testing... +2025-04-03 17:35:48 | [maml_trainer] epoch #262 | Saving snapshot... +2025-04-03 17:36:12 | [maml_trainer] epoch #262 | Saved +2025-04-03 17:36:12 | [maml_trainer] epoch #262 | Time 100850.21 s +2025-04-03 17:36:12 | [maml_trainer] epoch #262 | EpochTime 388.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.6814 +Average/AverageReturn -169.123 +Average/Iteration 262 +Average/MaxReturn -141.713 +Average/MinReturn -281.505 +Average/NumEpisodes 80 +Average/StdReturn 26.2425 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3206 +GaussianMLPPolicy/KLAfter 0.00302939 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.39259e-05 +GaussianMLPPolicy/LossBefore -4.30644e-09 +GaussianMLPPolicy/dLoss -2.39302e-05 +Iteration 262 +MetaTest/Average/AverageDiscountedReturn -163.639 +MetaTest/Average/AverageReturn -163.639 +MetaTest/Average/Iteration 262 +MetaTest/Average/MaxReturn -144.517 +MetaTest/Average/MinReturn -187.485 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.8931 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -163.639 +MetaTest/__unnamed_task__/AverageReturn -163.639 +MetaTest/__unnamed_task__/Iteration 262 +MetaTest/__unnamed_task__/MaxReturn -144.517 +MetaTest/__unnamed_task__/MinReturn -187.485 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.8931 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.416e+06 +__unnamed_task__/AverageDiscountedReturn -71.6814 +__unnamed_task__/AverageReturn -169.123 +__unnamed_task__/Iteration 262 +__unnamed_task__/MaxReturn -141.713 +__unnamed_task__/MinReturn -281.505 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.2425 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:41:04 | [maml_trainer] epoch #263 | Sampling for adapation and meta-testing... +2025-04-03 17:42:16 | [maml_trainer] epoch #263 | Finished meta-testing... +2025-04-03 17:42:16 | [maml_trainer] epoch #263 | Saving snapshot... +2025-04-03 17:42:40 | [maml_trainer] epoch #263 | Saved +2025-04-03 17:42:40 | [maml_trainer] epoch #263 | Time 101238.63 s +2025-04-03 17:42:40 | [maml_trainer] epoch #263 | EpochTime 388.42 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6132 +Average/AverageReturn -172.568 +Average/Iteration 263 +Average/MaxReturn -145.644 +Average/MinReturn -259.58 +Average/NumEpisodes 80 +Average/StdReturn 29.0812 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3221 +GaussianMLPPolicy/KLAfter 0.00307506 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.70885e-05 +GaussianMLPPolicy/LossBefore 2.98024e-11 +GaussianMLPPolicy/dLoss -1.70884e-05 +Iteration 263 +MetaTest/Average/AverageDiscountedReturn -161.94 +MetaTest/Average/AverageReturn -161.94 +MetaTest/Average/Iteration 263 +MetaTest/Average/MaxReturn -143.445 +MetaTest/Average/MinReturn -180.616 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.2205 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -161.94 +MetaTest/__unnamed_task__/AverageReturn -161.94 +MetaTest/__unnamed_task__/Iteration 263 +MetaTest/__unnamed_task__/MaxReturn -143.445 +MetaTest/__unnamed_task__/MinReturn -180.616 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.2205 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.448e+06 +__unnamed_task__/AverageDiscountedReturn -72.6132 +__unnamed_task__/AverageReturn -172.568 +__unnamed_task__/Iteration 263 +__unnamed_task__/MaxReturn -145.644 +__unnamed_task__/MinReturn -259.58 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 29.0812 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:47:33 | [maml_trainer] epoch #264 | Sampling for adapation and meta-testing... +2025-04-03 17:48:45 | [maml_trainer] epoch #264 | Finished meta-testing... +2025-04-03 17:48:45 | [maml_trainer] epoch #264 | Saving snapshot... +2025-04-03 17:49:07 | [maml_trainer] epoch #264 | Saved +2025-04-03 17:49:07 | [maml_trainer] epoch #264 | Time 101625.78 s +2025-04-03 17:49:07 | [maml_trainer] epoch #264 | EpochTime 387.15 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -69.3785 +Average/AverageReturn -164.791 +Average/Iteration 264 +Average/MaxReturn -147.803 +Average/MinReturn -222.347 +Average/NumEpisodes 80 +Average/StdReturn 12.7448 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3246 +GaussianMLPPolicy/KLAfter 0.00273015 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.15089e-05 +GaussianMLPPolicy/LossBefore -6.55651e-10 +GaussianMLPPolicy/dLoss 2.15082e-05 +Iteration 264 +MetaTest/Average/AverageDiscountedReturn -176.06 +MetaTest/Average/AverageReturn -176.06 +MetaTest/Average/Iteration 264 +MetaTest/Average/MaxReturn -149.659 +MetaTest/Average/MinReturn -238.434 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.815 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.06 +MetaTest/__unnamed_task__/AverageReturn -176.06 +MetaTest/__unnamed_task__/Iteration 264 +MetaTest/__unnamed_task__/MaxReturn -149.659 +MetaTest/__unnamed_task__/MinReturn -238.434 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.815 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.48e+06 +__unnamed_task__/AverageDiscountedReturn -69.3785 +__unnamed_task__/AverageReturn -164.791 +__unnamed_task__/Iteration 264 +__unnamed_task__/MaxReturn -147.803 +__unnamed_task__/MinReturn -222.347 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 12.7448 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 17:54:00 | [maml_trainer] epoch #265 | Sampling for adapation and meta-testing... +2025-04-03 17:55:13 | [maml_trainer] epoch #265 | Finished meta-testing... +2025-04-03 17:55:13 | [maml_trainer] epoch #265 | Saving snapshot... +2025-04-03 17:55:37 | [maml_trainer] epoch #265 | Saved +2025-04-03 17:55:37 | [maml_trainer] epoch #265 | Time 102015.15 s +2025-04-03 17:55:37 | [maml_trainer] epoch #265 | EpochTime 389.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.9679 +Average/AverageReturn -168.859 +Average/Iteration 265 +Average/MaxReturn -140.296 +Average/MinReturn -257.869 +Average/NumEpisodes 80 +Average/StdReturn 21.1185 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3248 +GaussianMLPPolicy/KLAfter 0.00280506 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.55294e-05 +GaussianMLPPolicy/LossBefore 2.23517e-10 +GaussianMLPPolicy/dLoss 2.55296e-05 +Iteration 265 +MetaTest/Average/AverageDiscountedReturn -169.83 +MetaTest/Average/AverageReturn -169.83 +MetaTest/Average/Iteration 265 +MetaTest/Average/MaxReturn -146.748 +MetaTest/Average/MinReturn -223.112 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.5346 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.83 +MetaTest/__unnamed_task__/AverageReturn -169.83 +MetaTest/__unnamed_task__/Iteration 265 +MetaTest/__unnamed_task__/MaxReturn -146.748 +MetaTest/__unnamed_task__/MinReturn -223.112 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.5346 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.512e+06 +__unnamed_task__/AverageDiscountedReturn -70.9679 +__unnamed_task__/AverageReturn -168.859 +__unnamed_task__/Iteration 265 +__unnamed_task__/MaxReturn -140.296 +__unnamed_task__/MinReturn -257.869 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.1185 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:00:29 | [maml_trainer] epoch #266 | Sampling for adapation and meta-testing... +2025-04-03 18:01:41 | [maml_trainer] epoch #266 | Finished meta-testing... +2025-04-03 18:01:41 | [maml_trainer] epoch #266 | Saving snapshot... +2025-04-03 18:02:05 | [maml_trainer] epoch #266 | Saved +2025-04-03 18:02:05 | [maml_trainer] epoch #266 | Time 102403.42 s +2025-04-03 18:02:05 | [maml_trainer] epoch #266 | EpochTime 388.26 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.6353 +Average/AverageReturn -176.891 +Average/Iteration 266 +Average/MaxReturn -147.673 +Average/MinReturn -266.898 +Average/NumEpisodes 80 +Average/StdReturn 25.2123 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3263 +GaussianMLPPolicy/KLAfter 0.00302588 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -5.67077e-06 +GaussianMLPPolicy/LossBefore -3.8296e-09 +GaussianMLPPolicy/dLoss 5.66694e-06 +Iteration 266 +MetaTest/Average/AverageDiscountedReturn -168.116 +MetaTest/Average/AverageReturn -168.116 +MetaTest/Average/Iteration 266 +MetaTest/Average/MaxReturn -144.851 +MetaTest/Average/MinReturn -192.349 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 11.8843 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.116 +MetaTest/__unnamed_task__/AverageReturn -168.116 +MetaTest/__unnamed_task__/Iteration 266 +MetaTest/__unnamed_task__/MaxReturn -144.851 +MetaTest/__unnamed_task__/MinReturn -192.349 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 11.8843 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.544e+06 +__unnamed_task__/AverageDiscountedReturn -73.6353 +__unnamed_task__/AverageReturn -176.891 +__unnamed_task__/Iteration 266 +__unnamed_task__/MaxReturn -147.673 +__unnamed_task__/MinReturn -266.898 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.2123 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:06:56 | [maml_trainer] epoch #267 | Sampling for adapation and meta-testing... +2025-04-03 18:08:08 | [maml_trainer] epoch #267 | Finished meta-testing... +2025-04-03 18:08:08 | [maml_trainer] epoch #267 | Saving snapshot... +2025-04-03 18:08:32 | [maml_trainer] epoch #267 | Saved +2025-04-03 18:08:32 | [maml_trainer] epoch #267 | Time 102790.23 s +2025-04-03 18:08:32 | [maml_trainer] epoch #267 | EpochTime 386.81 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.1168 +Average/AverageReturn -169.438 +Average/Iteration 267 +Average/MaxReturn -145.874 +Average/MinReturn -255.853 +Average/NumEpisodes 80 +Average/StdReturn 23.6953 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3273 +GaussianMLPPolicy/KLAfter 0.00237065 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.14151e-06 +GaussianMLPPolicy/LossBefore -9.59635e-09 +GaussianMLPPolicy/dLoss -2.1511e-06 +Iteration 267 +MetaTest/Average/AverageDiscountedReturn -167.924 +MetaTest/Average/AverageReturn -167.924 +MetaTest/Average/Iteration 267 +MetaTest/Average/MaxReturn -144.137 +MetaTest/Average/MinReturn -237.894 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.7976 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.924 +MetaTest/__unnamed_task__/AverageReturn -167.924 +MetaTest/__unnamed_task__/Iteration 267 +MetaTest/__unnamed_task__/MaxReturn -144.137 +MetaTest/__unnamed_task__/MinReturn -237.894 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.7976 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.576e+06 +__unnamed_task__/AverageDiscountedReturn -71.1168 +__unnamed_task__/AverageReturn -169.438 +__unnamed_task__/Iteration 267 +__unnamed_task__/MaxReturn -145.874 +__unnamed_task__/MinReturn -255.853 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.6953 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:13:22 | [maml_trainer] epoch #268 | Sampling for adapation and meta-testing... +2025-04-03 18:14:36 | [maml_trainer] epoch #268 | Finished meta-testing... +2025-04-03 18:14:36 | [maml_trainer] epoch #268 | Saving snapshot... +2025-04-03 18:14:58 | [maml_trainer] epoch #268 | Saved +2025-04-03 18:14:58 | [maml_trainer] epoch #268 | Time 103176.97 s +2025-04-03 18:14:58 | [maml_trainer] epoch #268 | EpochTime 386.73 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3354 +Average/AverageReturn -174.085 +Average/Iteration 268 +Average/MaxReturn -146.866 +Average/MinReturn -247.71 +Average/NumEpisodes 80 +Average/StdReturn 24.0561 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3278 +GaussianMLPPolicy/KLAfter 0.00195391 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.02356e-06 +GaussianMLPPolicy/LossBefore 2.5779e-09 +GaussianMLPPolicy/dLoss 1.02614e-06 +Iteration 268 +MetaTest/Average/AverageDiscountedReturn -167.433 +MetaTest/Average/AverageReturn -167.433 +MetaTest/Average/Iteration 268 +MetaTest/Average/MaxReturn -143.993 +MetaTest/Average/MinReturn -236.416 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.9635 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.433 +MetaTest/__unnamed_task__/AverageReturn -167.433 +MetaTest/__unnamed_task__/Iteration 268 +MetaTest/__unnamed_task__/MaxReturn -143.993 +MetaTest/__unnamed_task__/MinReturn -236.416 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.9635 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.608e+06 +__unnamed_task__/AverageDiscountedReturn -72.3354 +__unnamed_task__/AverageReturn -174.085 +__unnamed_task__/Iteration 268 +__unnamed_task__/MaxReturn -146.866 +__unnamed_task__/MinReturn -247.71 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.0561 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:19:51 | [maml_trainer] epoch #269 | Sampling for adapation and meta-testing... +2025-04-03 18:21:04 | [maml_trainer] epoch #269 | Finished meta-testing... +2025-04-03 18:21:04 | [maml_trainer] epoch #269 | Saving snapshot... +2025-04-03 18:21:27 | [maml_trainer] epoch #269 | Saved +2025-04-03 18:21:27 | [maml_trainer] epoch #269 | Time 103565.72 s +2025-04-03 18:21:27 | [maml_trainer] epoch #269 | EpochTime 388.75 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3159 +Average/AverageReturn -170.833 +Average/Iteration 269 +Average/MaxReturn -140.835 +Average/MinReturn -254.195 +Average/NumEpisodes 80 +Average/StdReturn 21.471 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3279 +GaussianMLPPolicy/KLAfter 0.00232042 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.30993e-05 +GaussianMLPPolicy/LossBefore -4.26173e-09 +GaussianMLPPolicy/dLoss -1.31036e-05 +Iteration 269 +MetaTest/Average/AverageDiscountedReturn -174.477 +MetaTest/Average/AverageReturn -174.477 +MetaTest/Average/Iteration 269 +MetaTest/Average/MaxReturn -152.903 +MetaTest/Average/MinReturn -248.501 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 27.1958 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.477 +MetaTest/__unnamed_task__/AverageReturn -174.477 +MetaTest/__unnamed_task__/Iteration 269 +MetaTest/__unnamed_task__/MaxReturn -152.903 +MetaTest/__unnamed_task__/MinReturn -248.501 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 27.1958 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.64e+06 +__unnamed_task__/AverageDiscountedReturn -71.3159 +__unnamed_task__/AverageReturn -170.833 +__unnamed_task__/Iteration 269 +__unnamed_task__/MaxReturn -140.835 +__unnamed_task__/MinReturn -254.195 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.471 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:26:20 | [maml_trainer] epoch #270 | Sampling for adapation and meta-testing... +2025-04-03 18:27:33 | [maml_trainer] epoch #270 | Finished meta-testing... +2025-04-03 18:27:33 | [maml_trainer] epoch #270 | Saving snapshot... +2025-04-03 18:27:56 | [maml_trainer] epoch #270 | Saved +2025-04-03 18:27:56 | [maml_trainer] epoch #270 | Time 103954.89 s +2025-04-03 18:27:56 | [maml_trainer] epoch #270 | EpochTime 389.16 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5753 +Average/AverageReturn -174.158 +Average/Iteration 270 +Average/MaxReturn -141.814 +Average/MinReturn -253.399 +Average/NumEpisodes 80 +Average/StdReturn 23.0556 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3284 +GaussianMLPPolicy/KLAfter 0.00154983 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.20944e-06 +GaussianMLPPolicy/LossBefore 3.91901e-09 +GaussianMLPPolicy/dLoss 1.21336e-06 +Iteration 270 +MetaTest/Average/AverageDiscountedReturn -177.121 +MetaTest/Average/AverageReturn -177.121 +MetaTest/Average/Iteration 270 +MetaTest/Average/MaxReturn -147.601 +MetaTest/Average/MinReturn -273.117 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 33.7103 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.121 +MetaTest/__unnamed_task__/AverageReturn -177.121 +MetaTest/__unnamed_task__/Iteration 270 +MetaTest/__unnamed_task__/MaxReturn -147.601 +MetaTest/__unnamed_task__/MinReturn -273.117 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 33.7103 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.672e+06 +__unnamed_task__/AverageDiscountedReturn -72.5753 +__unnamed_task__/AverageReturn -174.158 +__unnamed_task__/Iteration 270 +__unnamed_task__/MaxReturn -141.814 +__unnamed_task__/MinReturn -253.399 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.0556 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:32:48 | [maml_trainer] epoch #271 | Sampling for adapation and meta-testing... +2025-04-03 18:34:01 | [maml_trainer] epoch #271 | Finished meta-testing... +2025-04-03 18:34:01 | [maml_trainer] epoch #271 | Saving snapshot... +2025-04-03 18:34:25 | [maml_trainer] epoch #271 | Saved +2025-04-03 18:34:25 | [maml_trainer] epoch #271 | Time 104343.76 s +2025-04-03 18:34:25 | [maml_trainer] epoch #271 | EpochTime 388.87 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.4005 +Average/AverageReturn -173.52 +Average/Iteration 271 +Average/MaxReturn -144.669 +Average/MinReturn -252.43 +Average/NumEpisodes 80 +Average/StdReturn 24.6608 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3298 +GaussianMLPPolicy/KLAfter 0.00140666 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.1213e-05 +GaussianMLPPolicy/LossBefore -4.72367e-09 +GaussianMLPPolicy/dLoss 1.12083e-05 +Iteration 271 +MetaTest/Average/AverageDiscountedReturn -177.056 +MetaTest/Average/AverageReturn -177.056 +MetaTest/Average/Iteration 271 +MetaTest/Average/MaxReturn -145.909 +MetaTest/Average/MinReturn -253.749 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 28.06 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.056 +MetaTest/__unnamed_task__/AverageReturn -177.056 +MetaTest/__unnamed_task__/Iteration 271 +MetaTest/__unnamed_task__/MaxReturn -145.909 +MetaTest/__unnamed_task__/MinReturn -253.749 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 28.06 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.704e+06 +__unnamed_task__/AverageDiscountedReturn -72.4005 +__unnamed_task__/AverageReturn -173.52 +__unnamed_task__/Iteration 271 +__unnamed_task__/MaxReturn -144.669 +__unnamed_task__/MinReturn -252.43 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.6608 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:39:17 | [maml_trainer] epoch #272 | Sampling for adapation and meta-testing... +2025-04-03 18:40:29 | [maml_trainer] epoch #272 | Finished meta-testing... +2025-04-03 18:40:29 | [maml_trainer] epoch #272 | Saving snapshot... +2025-04-03 18:40:53 | [maml_trainer] epoch #272 | Saved +2025-04-03 18:40:53 | [maml_trainer] epoch #272 | Time 104731.69 s +2025-04-03 18:40:53 | [maml_trainer] epoch #272 | EpochTime 387.93 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.1729 +Average/AverageReturn -175.133 +Average/Iteration 272 +Average/MaxReturn -149.818 +Average/MinReturn -255.434 +Average/NumEpisodes 80 +Average/StdReturn 23.2497 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3327 +GaussianMLPPolicy/KLAfter 0.0013076 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.04699e-05 +GaussianMLPPolicy/LossBefore 3.48687e-09 +GaussianMLPPolicy/dLoss 2.04734e-05 +Iteration 272 +MetaTest/Average/AverageDiscountedReturn -167.241 +MetaTest/Average/AverageReturn -167.241 +MetaTest/Average/Iteration 272 +MetaTest/Average/MaxReturn -148.582 +MetaTest/Average/MinReturn -188.748 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 13.5223 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.241 +MetaTest/__unnamed_task__/AverageReturn -167.241 +MetaTest/__unnamed_task__/Iteration 272 +MetaTest/__unnamed_task__/MaxReturn -148.582 +MetaTest/__unnamed_task__/MinReturn -188.748 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 13.5223 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.736e+06 +__unnamed_task__/AverageDiscountedReturn -73.1729 +__unnamed_task__/AverageReturn -175.133 +__unnamed_task__/Iteration 272 +__unnamed_task__/MaxReturn -149.818 +__unnamed_task__/MinReturn -255.434 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 23.2497 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:45:47 | [maml_trainer] epoch #273 | Sampling for adapation and meta-testing... +2025-04-03 18:47:00 | [maml_trainer] epoch #273 | Finished meta-testing... +2025-04-03 18:47:00 | [maml_trainer] epoch #273 | Saving snapshot... +2025-04-03 18:47:23 | [maml_trainer] epoch #273 | Saved +2025-04-03 18:47:23 | [maml_trainer] epoch #273 | Time 105121.34 s +2025-04-03 18:47:23 | [maml_trainer] epoch #273 | EpochTime 389.64 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.5017 +Average/AverageReturn -173.839 +Average/Iteration 273 +Average/MaxReturn -146.49 +Average/MinReturn -251.35 +Average/NumEpisodes 80 +Average/StdReturn 25.408 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3368 +GaussianMLPPolicy/KLAfter 0.00129678 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -6.60382e-06 +GaussianMLPPolicy/LossBefore 6.70552e-10 +GaussianMLPPolicy/dLoss 6.60449e-06 +Iteration 273 +MetaTest/Average/AverageDiscountedReturn -172.037 +MetaTest/Average/AverageReturn -172.037 +MetaTest/Average/Iteration 273 +MetaTest/Average/MaxReturn -147.1 +MetaTest/Average/MinReturn -253.12 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 28.8316 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.037 +MetaTest/__unnamed_task__/AverageReturn -172.037 +MetaTest/__unnamed_task__/Iteration 273 +MetaTest/__unnamed_task__/MaxReturn -147.1 +MetaTest/__unnamed_task__/MinReturn -253.12 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 28.8316 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.768e+06 +__unnamed_task__/AverageDiscountedReturn -72.5017 +__unnamed_task__/AverageReturn -173.839 +__unnamed_task__/Iteration 273 +__unnamed_task__/MaxReturn -146.49 +__unnamed_task__/MinReturn -251.35 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.408 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:52:16 | [maml_trainer] epoch #274 | Sampling for adapation and meta-testing... +2025-04-03 18:53:31 | [maml_trainer] epoch #274 | Finished meta-testing... +2025-04-03 18:53:31 | [maml_trainer] epoch #274 | Saving snapshot... +2025-04-03 18:53:55 | [maml_trainer] epoch #274 | Saved +2025-04-03 18:53:55 | [maml_trainer] epoch #274 | Time 105513.31 s +2025-04-03 18:53:55 | [maml_trainer] epoch #274 | EpochTime 391.96 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6526 +Average/AverageReturn -173.318 +Average/Iteration 274 +Average/MaxReturn -142.638 +Average/MinReturn -265.643 +Average/NumEpisodes 80 +Average/StdReturn 28.6753 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3439 +GaussianMLPPolicy/KLAfter 0.00117068 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 6.42762e-07 +GaussianMLPPolicy/LossBefore -2.39909e-09 +GaussianMLPPolicy/dLoss -6.45161e-07 +Iteration 274 +MetaTest/Average/AverageDiscountedReturn -166.732 +MetaTest/Average/AverageReturn -166.732 +MetaTest/Average/Iteration 274 +MetaTest/Average/MaxReturn -148.451 +MetaTest/Average/MinReturn -224.128 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 18.892 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.732 +MetaTest/__unnamed_task__/AverageReturn -166.732 +MetaTest/__unnamed_task__/Iteration 274 +MetaTest/__unnamed_task__/MaxReturn -148.451 +MetaTest/__unnamed_task__/MinReturn -224.128 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 18.892 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.8e+06 +__unnamed_task__/AverageDiscountedReturn -72.6526 +__unnamed_task__/AverageReturn -173.318 +__unnamed_task__/Iteration 274 +__unnamed_task__/MaxReturn -142.638 +__unnamed_task__/MinReturn -265.643 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 28.6753 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 18:58:47 | [maml_trainer] epoch #275 | Sampling for adapation and meta-testing... +2025-04-03 19:00:00 | [maml_trainer] epoch #275 | Finished meta-testing... +2025-04-03 19:00:00 | [maml_trainer] epoch #275 | Saving snapshot... +2025-04-03 19:00:24 | [maml_trainer] epoch #275 | Saved +2025-04-03 19:00:24 | [maml_trainer] epoch #275 | Time 105902.55 s +2025-04-03 19:00:24 | [maml_trainer] epoch #275 | EpochTime 389.24 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.1926 +Average/AverageReturn -172.99 +Average/Iteration 275 +Average/MaxReturn -146.536 +Average/MinReturn -250.675 +Average/NumEpisodes 80 +Average/StdReturn 24.545 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3515 +GaussianMLPPolicy/KLAfter 0.00351081 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.34679e-05 +GaussianMLPPolicy/LossBefore 2.68221e-10 +GaussianMLPPolicy/dLoss 3.34682e-05 +Iteration 275 +MetaTest/Average/AverageDiscountedReturn -176.314 +MetaTest/Average/AverageReturn -176.314 +MetaTest/Average/Iteration 275 +MetaTest/Average/MaxReturn -149.488 +MetaTest/Average/MinReturn -244.335 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.425 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.314 +MetaTest/__unnamed_task__/AverageReturn -176.314 +MetaTest/__unnamed_task__/Iteration 275 +MetaTest/__unnamed_task__/MaxReturn -149.488 +MetaTest/__unnamed_task__/MinReturn -244.335 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.425 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.832e+06 +__unnamed_task__/AverageDiscountedReturn -72.1926 +__unnamed_task__/AverageReturn -172.99 +__unnamed_task__/Iteration 275 +__unnamed_task__/MaxReturn -146.536 +__unnamed_task__/MinReturn -250.675 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.545 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:05:17 | [maml_trainer] epoch #276 | Sampling for adapation and meta-testing... +2025-04-03 19:06:30 | [maml_trainer] epoch #276 | Finished meta-testing... +2025-04-03 19:06:30 | [maml_trainer] epoch #276 | Saving snapshot... +2025-04-03 19:06:52 | [maml_trainer] epoch #276 | Saved +2025-04-03 19:06:52 | [maml_trainer] epoch #276 | Time 106290.86 s +2025-04-03 19:06:52 | [maml_trainer] epoch #276 | EpochTime 388.31 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.1085 +Average/AverageReturn -174.607 +Average/Iteration 276 +Average/MaxReturn -145.371 +Average/MinReturn -261.194 +Average/NumEpisodes 80 +Average/StdReturn 29.5365 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3559 +GaussianMLPPolicy/KLAfter 0.00330584 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -3.68463e-05 +GaussianMLPPolicy/LossBefore -3.60608e-09 +GaussianMLPPolicy/dLoss 3.68427e-05 +Iteration 276 +MetaTest/Average/AverageDiscountedReturn -171.318 +MetaTest/Average/AverageReturn -171.318 +MetaTest/Average/Iteration 276 +MetaTest/Average/MaxReturn -154.313 +MetaTest/Average/MinReturn -237.916 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.4162 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.318 +MetaTest/__unnamed_task__/AverageReturn -171.318 +MetaTest/__unnamed_task__/Iteration 276 +MetaTest/__unnamed_task__/MaxReturn -154.313 +MetaTest/__unnamed_task__/MinReturn -237.916 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.4162 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.864e+06 +__unnamed_task__/AverageDiscountedReturn -73.1085 +__unnamed_task__/AverageReturn -174.607 +__unnamed_task__/Iteration 276 +__unnamed_task__/MaxReturn -145.371 +__unnamed_task__/MinReturn -261.194 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 29.5365 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:11:25 | [maml_trainer] epoch #277 | Sampling for adapation and meta-testing... +2025-04-03 19:12:32 | [maml_trainer] epoch #277 | Finished meta-testing... +2025-04-03 19:12:32 | [maml_trainer] epoch #277 | Saving snapshot... +2025-04-03 19:12:54 | [maml_trainer] epoch #277 | Saved +2025-04-03 19:12:54 | [maml_trainer] epoch #277 | Time 106652.15 s +2025-04-03 19:12:54 | [maml_trainer] epoch #277 | EpochTime 361.28 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.7793 +Average/AverageReturn -168.249 +Average/Iteration 277 +Average/MaxReturn -145.65 +Average/MinReturn -259.136 +Average/NumEpisodes 80 +Average/StdReturn 24.6257 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3594 +GaussianMLPPolicy/KLAfter 0.00279448 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.46429e-05 +GaussianMLPPolicy/LossBefore -9.37283e-09 +GaussianMLPPolicy/dLoss 1.46336e-05 +Iteration 277 +MetaTest/Average/AverageDiscountedReturn -172.549 +MetaTest/Average/AverageReturn -172.549 +MetaTest/Average/Iteration 277 +MetaTest/Average/MaxReturn -143.597 +MetaTest/Average/MinReturn -257.159 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.0402 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.549 +MetaTest/__unnamed_task__/AverageReturn -172.549 +MetaTest/__unnamed_task__/Iteration 277 +MetaTest/__unnamed_task__/MaxReturn -143.597 +MetaTest/__unnamed_task__/MinReturn -257.159 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.0402 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.896e+06 +__unnamed_task__/AverageDiscountedReturn -70.7793 +__unnamed_task__/AverageReturn -168.249 +__unnamed_task__/Iteration 277 +__unnamed_task__/MaxReturn -145.65 +__unnamed_task__/MinReturn -259.136 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.6257 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:17:32 | [maml_trainer] epoch #278 | Sampling for adapation and meta-testing... +2025-04-03 19:18:42 | [maml_trainer] epoch #278 | Finished meta-testing... +2025-04-03 19:18:42 | [maml_trainer] epoch #278 | Saving snapshot... +2025-04-03 19:19:05 | [maml_trainer] epoch #278 | Saved +2025-04-03 19:19:05 | [maml_trainer] epoch #278 | Time 107023.37 s +2025-04-03 19:19:05 | [maml_trainer] epoch #278 | EpochTime 371.22 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.3262 +Average/AverageReturn -169.514 +Average/Iteration 278 +Average/MaxReturn -143.165 +Average/MinReturn -264.681 +Average/NumEpisodes 80 +Average/StdReturn 26.475 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3599 +GaussianMLPPolicy/KLAfter 0.00295095 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.63744e-05 +GaussianMLPPolicy/LossBefore -5.51343e-10 +GaussianMLPPolicy/dLoss -3.6375e-05 +Iteration 278 +MetaTest/Average/AverageDiscountedReturn -178.203 +MetaTest/Average/AverageReturn -178.203 +MetaTest/Average/Iteration 278 +MetaTest/Average/MaxReturn -147.003 +MetaTest/Average/MinReturn -271.268 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 36.3991 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.203 +MetaTest/__unnamed_task__/AverageReturn -178.203 +MetaTest/__unnamed_task__/Iteration 278 +MetaTest/__unnamed_task__/MaxReturn -147.003 +MetaTest/__unnamed_task__/MinReturn -271.268 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 36.3991 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.928e+06 +__unnamed_task__/AverageDiscountedReturn -71.3262 +__unnamed_task__/AverageReturn -169.514 +__unnamed_task__/Iteration 278 +__unnamed_task__/MaxReturn -143.165 +__unnamed_task__/MinReturn -264.681 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.475 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:23:40 | [maml_trainer] epoch #279 | Sampling for adapation and meta-testing... +2025-04-03 19:24:49 | [maml_trainer] epoch #279 | Finished meta-testing... +2025-04-03 19:24:49 | [maml_trainer] epoch #279 | Saving snapshot... +2025-04-03 19:25:11 | [maml_trainer] epoch #279 | Saved +2025-04-03 19:25:11 | [maml_trainer] epoch #279 | Time 107389.74 s +2025-04-03 19:25:11 | [maml_trainer] epoch #279 | EpochTime 366.36 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.7698 +Average/AverageReturn -176.631 +Average/Iteration 279 +Average/MaxReturn -144.74 +Average/MinReturn -248.577 +Average/NumEpisodes 80 +Average/StdReturn 29.4498 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3604 +GaussianMLPPolicy/KLAfter 0.00268586 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.70269e-05 +GaussianMLPPolicy/LossBefore 2.22027e-09 +GaussianMLPPolicy/dLoss 4.70291e-05 +Iteration 279 +MetaTest/Average/AverageDiscountedReturn -178.953 +MetaTest/Average/AverageReturn -178.953 +MetaTest/Average/Iteration 279 +MetaTest/Average/MaxReturn -151.157 +MetaTest/Average/MinReturn -247.039 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 26.4187 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.953 +MetaTest/__unnamed_task__/AverageReturn -178.953 +MetaTest/__unnamed_task__/Iteration 279 +MetaTest/__unnamed_task__/MaxReturn -151.157 +MetaTest/__unnamed_task__/MinReturn -247.039 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 26.4187 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.96e+06 +__unnamed_task__/AverageDiscountedReturn -73.7698 +__unnamed_task__/AverageReturn -176.631 +__unnamed_task__/Iteration 279 +__unnamed_task__/MaxReturn -144.74 +__unnamed_task__/MinReturn -248.577 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 29.4498 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:29:48 | [maml_trainer] epoch #280 | Sampling for adapation and meta-testing... +2025-04-03 19:30:56 | [maml_trainer] epoch #280 | Finished meta-testing... +2025-04-03 19:30:56 | [maml_trainer] epoch #280 | Saving snapshot... +2025-04-03 19:31:18 | [maml_trainer] epoch #280 | Saved +2025-04-03 19:31:18 | [maml_trainer] epoch #280 | Time 107756.71 s +2025-04-03 19:31:18 | [maml_trainer] epoch #280 | EpochTime 366.97 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.3315 +Average/AverageReturn -173.356 +Average/Iteration 280 +Average/MaxReturn -143.848 +Average/MinReturn -334.18 +Average/NumEpisodes 80 +Average/StdReturn 30.7005 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3601 +GaussianMLPPolicy/KLAfter 0.00202791 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.46742e-06 +GaussianMLPPolicy/LossBefore -1.04308e-10 +GaussianMLPPolicy/dLoss 1.46732e-06 +Iteration 280 +MetaTest/Average/AverageDiscountedReturn -170.271 +MetaTest/Average/AverageReturn -170.271 +MetaTest/Average/Iteration 280 +MetaTest/Average/MaxReturn -151.054 +MetaTest/Average/MinReturn -242.084 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.3568 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -170.271 +MetaTest/__unnamed_task__/AverageReturn -170.271 +MetaTest/__unnamed_task__/Iteration 280 +MetaTest/__unnamed_task__/MaxReturn -151.054 +MetaTest/__unnamed_task__/MinReturn -242.084 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.3568 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 8.992e+06 +__unnamed_task__/AverageDiscountedReturn -73.3315 +__unnamed_task__/AverageReturn -173.356 +__unnamed_task__/Iteration 280 +__unnamed_task__/MaxReturn -143.848 +__unnamed_task__/MinReturn -334.18 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 30.7005 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:35:51 | [maml_trainer] epoch #281 | Sampling for adapation and meta-testing... +2025-04-03 19:36:59 | [maml_trainer] epoch #281 | Finished meta-testing... +2025-04-03 19:36:59 | [maml_trainer] epoch #281 | Saving snapshot... +2025-04-03 19:37:21 | [maml_trainer] epoch #281 | Saved +2025-04-03 19:37:21 | [maml_trainer] epoch #281 | Time 108119.39 s +2025-04-03 19:37:21 | [maml_trainer] epoch #281 | EpochTime 362.67 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.7812 +Average/AverageReturn -178.196 +Average/Iteration 281 +Average/MaxReturn -145.362 +Average/MinReturn -268.609 +Average/NumEpisodes 80 +Average/StdReturn 33.9731 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3603 +GaussianMLPPolicy/KLAfter 0.00236851 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.88277e-05 +GaussianMLPPolicy/LossBefore 4.44055e-09 +GaussianMLPPolicy/dLoss -1.88232e-05 +Iteration 281 +MetaTest/Average/AverageDiscountedReturn -172.534 +MetaTest/Average/AverageReturn -172.534 +MetaTest/Average/Iteration 281 +MetaTest/Average/MaxReturn -151.014 +MetaTest/Average/MinReturn -242.149 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 20.3767 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -172.534 +MetaTest/__unnamed_task__/AverageReturn -172.534 +MetaTest/__unnamed_task__/Iteration 281 +MetaTest/__unnamed_task__/MaxReturn -151.014 +MetaTest/__unnamed_task__/MinReturn -242.149 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 20.3767 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.024e+06 +__unnamed_task__/AverageDiscountedReturn -74.7812 +__unnamed_task__/AverageReturn -178.196 +__unnamed_task__/Iteration 281 +__unnamed_task__/MaxReturn -145.362 +__unnamed_task__/MinReturn -268.609 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 33.9731 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:41:59 | [maml_trainer] epoch #282 | Sampling for adapation and meta-testing... +2025-04-03 19:43:12 | [maml_trainer] epoch #282 | Finished meta-testing... +2025-04-03 19:43:12 | [maml_trainer] epoch #282 | Saving snapshot... +2025-04-03 19:43:36 | [maml_trainer] epoch #282 | Saved +2025-04-03 19:43:36 | [maml_trainer] epoch #282 | Time 108494.79 s +2025-04-03 19:43:36 | [maml_trainer] epoch #282 | EpochTime 375.40 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.2435 +Average/AverageReturn -172.927 +Average/Iteration 282 +Average/MaxReturn -147.222 +Average/MinReturn -259.251 +Average/NumEpisodes 80 +Average/StdReturn 27.7328 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3612 +GaussianMLPPolicy/KLAfter 0.00165245 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -7.30056e-06 +GaussianMLPPolicy/LossBefore -3.75509e-09 +GaussianMLPPolicy/dLoss 7.2968e-06 +Iteration 282 +MetaTest/Average/AverageDiscountedReturn -168.067 +MetaTest/Average/AverageReturn -168.067 +MetaTest/Average/Iteration 282 +MetaTest/Average/MaxReturn -144.988 +MetaTest/Average/MinReturn -244.043 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 21.5915 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -168.067 +MetaTest/__unnamed_task__/AverageReturn -168.067 +MetaTest/__unnamed_task__/Iteration 282 +MetaTest/__unnamed_task__/MaxReturn -144.988 +MetaTest/__unnamed_task__/MinReturn -244.043 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 21.5915 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.056e+06 +__unnamed_task__/AverageDiscountedReturn -72.2435 +__unnamed_task__/AverageReturn -172.927 +__unnamed_task__/Iteration 282 +__unnamed_task__/MaxReturn -147.222 +__unnamed_task__/MinReturn -259.251 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 27.7328 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:48:31 | [maml_trainer] epoch #283 | Sampling for adapation and meta-testing... +2025-04-03 19:49:45 | [maml_trainer] epoch #283 | Finished meta-testing... +2025-04-03 19:49:45 | [maml_trainer] epoch #283 | Saving snapshot... +2025-04-03 19:50:10 | [maml_trainer] epoch #283 | Saved +2025-04-03 19:50:10 | [maml_trainer] epoch #283 | Time 108888.14 s +2025-04-03 19:50:10 | [maml_trainer] epoch #283 | EpochTime 393.35 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.8265 +Average/AverageReturn -169.988 +Average/Iteration 283 +Average/MaxReturn -144.965 +Average/MinReturn -268.038 +Average/NumEpisodes 80 +Average/StdReturn 24.9588 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3631 +GaussianMLPPolicy/KLAfter 0.0015496 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.14619e-05 +GaussianMLPPolicy/LossBefore -1.51992e-09 +GaussianMLPPolicy/dLoss -1.14635e-05 +Iteration 283 +MetaTest/Average/AverageDiscountedReturn -177.336 +MetaTest/Average/AverageReturn -177.336 +MetaTest/Average/Iteration 283 +MetaTest/Average/MaxReturn -148.977 +MetaTest/Average/MinReturn -251.481 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 28.4492 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.336 +MetaTest/__unnamed_task__/AverageReturn -177.336 +MetaTest/__unnamed_task__/Iteration 283 +MetaTest/__unnamed_task__/MaxReturn -148.977 +MetaTest/__unnamed_task__/MinReturn -251.481 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 28.4492 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.088e+06 +__unnamed_task__/AverageDiscountedReturn -71.8265 +__unnamed_task__/AverageReturn -169.988 +__unnamed_task__/Iteration 283 +__unnamed_task__/MaxReturn -144.965 +__unnamed_task__/MinReturn -268.038 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 24.9588 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 19:55:05 | [maml_trainer] epoch #284 | Sampling for adapation and meta-testing... +2025-04-03 19:56:26 | [maml_trainer] epoch #284 | Finished meta-testing... +2025-04-03 19:56:26 | [maml_trainer] epoch #284 | Saving snapshot... +2025-04-03 19:56:51 | [maml_trainer] epoch #284 | Saved +2025-04-03 19:56:51 | [maml_trainer] epoch #284 | Time 109289.13 s +2025-04-03 19:56:51 | [maml_trainer] epoch #284 | EpochTime 400.98 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.5054 +Average/AverageReturn -166.904 +Average/Iteration 284 +Average/MaxReturn -143.589 +Average/MinReturn -259.306 +Average/NumEpisodes 80 +Average/StdReturn 20.6068 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3652 +GaussianMLPPolicy/KLAfter 0.00154954 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.39075e-05 +GaussianMLPPolicy/LossBefore -5.69224e-09 +GaussianMLPPolicy/dLoss -1.39132e-05 +Iteration 284 +MetaTest/Average/AverageDiscountedReturn -182.78 +MetaTest/Average/AverageReturn -182.78 +MetaTest/Average/Iteration 284 +MetaTest/Average/MaxReturn -144.616 +MetaTest/Average/MinReturn -256.286 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 39.3907 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -182.78 +MetaTest/__unnamed_task__/AverageReturn -182.78 +MetaTest/__unnamed_task__/Iteration 284 +MetaTest/__unnamed_task__/MaxReturn -144.616 +MetaTest/__unnamed_task__/MinReturn -256.286 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 39.3907 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.12e+06 +__unnamed_task__/AverageDiscountedReturn -70.5054 +__unnamed_task__/AverageReturn -166.904 +__unnamed_task__/Iteration 284 +__unnamed_task__/MaxReturn -143.589 +__unnamed_task__/MinReturn -259.306 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 20.6068 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:01:51 | [maml_trainer] epoch #285 | Sampling for adapation and meta-testing... +2025-04-03 20:03:05 | [maml_trainer] epoch #285 | Finished meta-testing... +2025-04-03 20:03:05 | [maml_trainer] epoch #285 | Saving snapshot... +2025-04-03 20:03:28 | [maml_trainer] epoch #285 | Saved +2025-04-03 20:03:28 | [maml_trainer] epoch #285 | Time 109686.47 s +2025-04-03 20:03:28 | [maml_trainer] epoch #285 | EpochTime 397.33 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.4481 +Average/AverageReturn -172.292 +Average/Iteration 285 +Average/MaxReturn -142.21 +Average/MinReturn -263.507 +Average/NumEpisodes 80 +Average/StdReturn 22.9056 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.368 +GaussianMLPPolicy/KLAfter 0.00175447 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.09851e-05 +GaussianMLPPolicy/LossBefore 3.65078e-09 +GaussianMLPPolicy/dLoss -5.09815e-05 +Iteration 285 +MetaTest/Average/AverageDiscountedReturn -178.718 +MetaTest/Average/AverageReturn -178.718 +MetaTest/Average/Iteration 285 +MetaTest/Average/MaxReturn -153.753 +MetaTest/Average/MinReturn -255.305 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 29.4639 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -178.718 +MetaTest/__unnamed_task__/AverageReturn -178.718 +MetaTest/__unnamed_task__/Iteration 285 +MetaTest/__unnamed_task__/MaxReturn -153.753 +MetaTest/__unnamed_task__/MinReturn -255.305 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 29.4639 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.152e+06 +__unnamed_task__/AverageDiscountedReturn -72.4481 +__unnamed_task__/AverageReturn -172.292 +__unnamed_task__/Iteration 285 +__unnamed_task__/MaxReturn -142.21 +__unnamed_task__/MinReturn -263.507 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.9056 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:08:21 | [maml_trainer] epoch #286 | Sampling for adapation and meta-testing... +2025-04-03 20:09:35 | [maml_trainer] epoch #286 | Finished meta-testing... +2025-04-03 20:09:35 | [maml_trainer] epoch #286 | Saving snapshot... +2025-04-03 20:10:00 | [maml_trainer] epoch #286 | Saved +2025-04-03 20:10:00 | [maml_trainer] epoch #286 | Time 110078.28 s +2025-04-03 20:10:00 | [maml_trainer] epoch #286 | EpochTime 391.81 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.6425 +Average/AverageReturn -167.395 +Average/Iteration 286 +Average/MaxReturn -147.78 +Average/MinReturn -250.303 +Average/NumEpisodes 80 +Average/StdReturn 17.2638 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3707 +GaussianMLPPolicy/KLAfter 0.00153312 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.12745e-05 +GaussianMLPPolicy/LossBefore 3.68059e-09 +GaussianMLPPolicy/dLoss -1.12708e-05 +Iteration 286 +MetaTest/Average/AverageDiscountedReturn -177.339 +MetaTest/Average/AverageReturn -177.339 +MetaTest/Average/Iteration 286 +MetaTest/Average/MaxReturn -152.469 +MetaTest/Average/MinReturn -251.969 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 29.3044 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -177.339 +MetaTest/__unnamed_task__/AverageReturn -177.339 +MetaTest/__unnamed_task__/Iteration 286 +MetaTest/__unnamed_task__/MaxReturn -152.469 +MetaTest/__unnamed_task__/MinReturn -251.969 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 29.3044 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.184e+06 +__unnamed_task__/AverageDiscountedReturn -70.6425 +__unnamed_task__/AverageReturn -167.395 +__unnamed_task__/Iteration 286 +__unnamed_task__/MaxReturn -147.78 +__unnamed_task__/MinReturn -250.303 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 17.2638 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:14:50 | [maml_trainer] epoch #287 | Sampling for adapation and meta-testing... +2025-04-03 20:16:00 | [maml_trainer] epoch #287 | Finished meta-testing... +2025-04-03 20:16:00 | [maml_trainer] epoch #287 | Saving snapshot... +2025-04-03 20:16:23 | [maml_trainer] epoch #287 | Saved +2025-04-03 20:16:23 | [maml_trainer] epoch #287 | Time 110461.43 s +2025-04-03 20:16:23 | [maml_trainer] epoch #287 | EpochTime 383.14 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.8151 +Average/AverageReturn -175.074 +Average/Iteration 287 +Average/MaxReturn -141.055 +Average/MinReturn -258.669 +Average/NumEpisodes 80 +Average/StdReturn 21.4646 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.372 +GaussianMLPPolicy/KLAfter 0.00144233 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 9.00368e-06 +GaussianMLPPolicy/LossBefore 1.2517e-09 +GaussianMLPPolicy/dLoss -9.00243e-06 +Iteration 287 +MetaTest/Average/AverageDiscountedReturn -167.264 +MetaTest/Average/AverageReturn -167.264 +MetaTest/Average/Iteration 287 +MetaTest/Average/MaxReturn -150.639 +MetaTest/Average/MinReturn -215.142 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 17.7561 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -167.264 +MetaTest/__unnamed_task__/AverageReturn -167.264 +MetaTest/__unnamed_task__/Iteration 287 +MetaTest/__unnamed_task__/MaxReturn -150.639 +MetaTest/__unnamed_task__/MinReturn -215.142 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 17.7561 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.216e+06 +__unnamed_task__/AverageDiscountedReturn -73.8151 +__unnamed_task__/AverageReturn -175.074 +__unnamed_task__/Iteration 287 +__unnamed_task__/MaxReturn -141.055 +__unnamed_task__/MinReturn -258.669 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.4646 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:21:31 | [maml_trainer] epoch #288 | Sampling for adapation and meta-testing... +2025-04-03 20:22:40 | [maml_trainer] epoch #288 | Finished meta-testing... +2025-04-03 20:22:40 | [maml_trainer] epoch #288 | Saving snapshot... +2025-04-03 20:23:03 | [maml_trainer] epoch #288 | Saved +2025-04-03 20:23:03 | [maml_trainer] epoch #288 | Time 110861.32 s +2025-04-03 20:23:03 | [maml_trainer] epoch #288 | EpochTime 399.88 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.6726 +Average/AverageReturn -172.616 +Average/Iteration 288 +Average/MaxReturn -144.362 +Average/MinReturn -265.938 +Average/NumEpisodes 80 +Average/StdReturn 26.0597 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3724 +GaussianMLPPolicy/KLAfter 0.00139169 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.87796e-05 +GaussianMLPPolicy/LossBefore 1.13249e-09 +GaussianMLPPolicy/dLoss 2.87807e-05 +Iteration 288 +MetaTest/Average/AverageDiscountedReturn -179.111 +MetaTest/Average/AverageReturn -179.111 +MetaTest/Average/Iteration 288 +MetaTest/Average/MaxReturn -152.639 +MetaTest/Average/MinReturn -237.278 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 28.5954 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -179.111 +MetaTest/__unnamed_task__/AverageReturn -179.111 +MetaTest/__unnamed_task__/Iteration 288 +MetaTest/__unnamed_task__/MaxReturn -152.639 +MetaTest/__unnamed_task__/MinReturn -237.278 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 28.5954 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.248e+06 +__unnamed_task__/AverageDiscountedReturn -72.6726 +__unnamed_task__/AverageReturn -172.616 +__unnamed_task__/Iteration 288 +__unnamed_task__/MaxReturn -144.362 +__unnamed_task__/MinReturn -265.938 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 26.0597 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:27:40 | [maml_trainer] epoch #289 | Sampling for adapation and meta-testing... +2025-04-03 20:28:50 | [maml_trainer] epoch #289 | Finished meta-testing... +2025-04-03 20:28:50 | [maml_trainer] epoch #289 | Saving snapshot... +2025-04-03 20:29:11 | [maml_trainer] epoch #289 | Saved +2025-04-03 20:29:11 | [maml_trainer] epoch #289 | Time 111229.64 s +2025-04-03 20:29:11 | [maml_trainer] epoch #289 | EpochTime 368.32 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.8265 +Average/AverageReturn -176.324 +Average/Iteration 289 +Average/MaxReturn -149.393 +Average/MinReturn -248.863 +Average/NumEpisodes 80 +Average/StdReturn 28.5598 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3732 +GaussianMLPPolicy/KLAfter 0.00183943 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.31479e-05 +GaussianMLPPolicy/LossBefore 2.5481e-09 +GaussianMLPPolicy/dLoss -1.31454e-05 +Iteration 289 +MetaTest/Average/AverageDiscountedReturn -181.41 +MetaTest/Average/AverageReturn -181.41 +MetaTest/Average/Iteration 289 +MetaTest/Average/MaxReturn -152.661 +MetaTest/Average/MinReturn -252.129 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 29.4646 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -181.41 +MetaTest/__unnamed_task__/AverageReturn -181.41 +MetaTest/__unnamed_task__/Iteration 289 +MetaTest/__unnamed_task__/MaxReturn -152.661 +MetaTest/__unnamed_task__/MinReturn -252.129 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 29.4646 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.28e+06 +__unnamed_task__/AverageDiscountedReturn -73.8265 +__unnamed_task__/AverageReturn -176.324 +__unnamed_task__/Iteration 289 +__unnamed_task__/MaxReturn -149.393 +__unnamed_task__/MinReturn -248.863 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 28.5598 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:33:54 | [maml_trainer] epoch #290 | Sampling for adapation and meta-testing... +2025-04-03 20:35:08 | [maml_trainer] epoch #290 | Finished meta-testing... +2025-04-03 20:35:08 | [maml_trainer] epoch #290 | Saving snapshot... +2025-04-03 20:35:29 | [maml_trainer] epoch #290 | Saved +2025-04-03 20:35:29 | [maml_trainer] epoch #290 | Time 111607.81 s +2025-04-03 20:35:29 | [maml_trainer] epoch #290 | EpochTime 378.17 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.9601 +Average/AverageReturn -173.198 +Average/Iteration 290 +Average/MaxReturn -144.687 +Average/MinReturn -270.644 +Average/NumEpisodes 80 +Average/StdReturn 29.4108 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3726 +GaussianMLPPolicy/KLAfter 0.00178566 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 5.98344e-06 +GaussianMLPPolicy/LossBefore 3.48687e-09 +GaussianMLPPolicy/dLoss -5.97996e-06 +Iteration 290 +MetaTest/Average/AverageDiscountedReturn -174.074 +MetaTest/Average/AverageReturn -174.074 +MetaTest/Average/Iteration 290 +MetaTest/Average/MaxReturn -151.467 +MetaTest/Average/MinReturn -243.599 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.4034 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.074 +MetaTest/__unnamed_task__/AverageReturn -174.074 +MetaTest/__unnamed_task__/Iteration 290 +MetaTest/__unnamed_task__/MaxReturn -151.467 +MetaTest/__unnamed_task__/MinReturn -243.599 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.4034 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.312e+06 +__unnamed_task__/AverageDiscountedReturn -72.9601 +__unnamed_task__/AverageReturn -173.198 +__unnamed_task__/Iteration 290 +__unnamed_task__/MaxReturn -144.687 +__unnamed_task__/MinReturn -270.644 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 29.4108 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:40:09 | [maml_trainer] epoch #291 | Sampling for adapation and meta-testing... +2025-04-03 20:41:20 | [maml_trainer] epoch #291 | Finished meta-testing... +2025-04-03 20:41:20 | [maml_trainer] epoch #291 | Saving snapshot... +2025-04-03 20:41:43 | [maml_trainer] epoch #291 | Saved +2025-04-03 20:41:43 | [maml_trainer] epoch #291 | Time 111982.08 s +2025-04-03 20:41:43 | [maml_trainer] epoch #291 | EpochTime 374.26 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.1696 +Average/AverageReturn -178.957 +Average/Iteration 291 +Average/MaxReturn -147.159 +Average/MinReturn -259.195 +Average/NumEpisodes 80 +Average/StdReturn 30.2531 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3717 +GaussianMLPPolicy/KLAfter 0.00148154 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.5184e-05 +GaussianMLPPolicy/LossBefore -2.68221e-10 +GaussianMLPPolicy/dLoss 1.51838e-05 +Iteration 291 +MetaTest/Average/AverageDiscountedReturn -171.666 +MetaTest/Average/AverageReturn -171.666 +MetaTest/Average/Iteration 291 +MetaTest/Average/MaxReturn -149.081 +MetaTest/Average/MinReturn -274.752 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 25.5837 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -171.666 +MetaTest/__unnamed_task__/AverageReturn -171.666 +MetaTest/__unnamed_task__/Iteration 291 +MetaTest/__unnamed_task__/MaxReturn -149.081 +MetaTest/__unnamed_task__/MinReturn -274.752 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 25.5837 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.344e+06 +__unnamed_task__/AverageDiscountedReturn -75.1696 +__unnamed_task__/AverageReturn -178.957 +__unnamed_task__/Iteration 291 +__unnamed_task__/MaxReturn -147.159 +__unnamed_task__/MinReturn -259.195 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 30.2531 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:46:31 | [maml_trainer] epoch #292 | Sampling for adapation and meta-testing... +2025-04-03 20:47:40 | [maml_trainer] epoch #292 | Finished meta-testing... +2025-04-03 20:47:40 | [maml_trainer] epoch #292 | Saving snapshot... +2025-04-03 20:48:03 | [maml_trainer] epoch #292 | Saved +2025-04-03 20:48:03 | [maml_trainer] epoch #292 | Time 112361.64 s +2025-04-03 20:48:03 | [maml_trainer] epoch #292 | EpochTime 379.56 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.3178 +Average/AverageReturn -171.623 +Average/Iteration 292 +Average/MaxReturn -148.548 +Average/MinReturn -251.052 +Average/NumEpisodes 80 +Average/StdReturn 21.6162 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3692 +GaussianMLPPolicy/KLAfter 0.00192854 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -1.36399e-05 +GaussianMLPPolicy/LossBefore 1.00732e-08 +GaussianMLPPolicy/dLoss 1.365e-05 +Iteration 292 +MetaTest/Average/AverageDiscountedReturn -187.631 +MetaTest/Average/AverageReturn -187.631 +MetaTest/Average/Iteration 292 +MetaTest/Average/MaxReturn -147.157 +MetaTest/Average/MinReturn -284.213 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 39.0996 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -187.631 +MetaTest/__unnamed_task__/AverageReturn -187.631 +MetaTest/__unnamed_task__/Iteration 292 +MetaTest/__unnamed_task__/MaxReturn -147.157 +MetaTest/__unnamed_task__/MinReturn -284.213 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 39.0996 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.376e+06 +__unnamed_task__/AverageDiscountedReturn -72.3178 +__unnamed_task__/AverageReturn -171.623 +__unnamed_task__/Iteration 292 +__unnamed_task__/MaxReturn -148.548 +__unnamed_task__/MinReturn -251.052 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.6162 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:52:53 | [maml_trainer] epoch #293 | Sampling for adapation and meta-testing... +2025-04-03 20:54:05 | [maml_trainer] epoch #293 | Finished meta-testing... +2025-04-03 20:54:05 | [maml_trainer] epoch #293 | Saving snapshot... +2025-04-03 20:54:29 | [maml_trainer] epoch #293 | Saved +2025-04-03 20:54:29 | [maml_trainer] epoch #293 | Time 112747.95 s +2025-04-03 20:54:29 | [maml_trainer] epoch #293 | EpochTime 386.31 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -74.1925 +Average/AverageReturn -177.327 +Average/Iteration 293 +Average/MaxReturn -143.385 +Average/MinReturn -255.553 +Average/NumEpisodes 80 +Average/StdReturn 30.1141 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3662 +GaussianMLPPolicy/KLAfter 0.00225784 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -4.57549e-05 +GaussianMLPPolicy/LossBefore 8.44896e-09 +GaussianMLPPolicy/dLoss 4.57633e-05 +Iteration 293 +MetaTest/Average/AverageDiscountedReturn -169.514 +MetaTest/Average/AverageReturn -169.514 +MetaTest/Average/Iteration 293 +MetaTest/Average/MaxReturn -147.023 +MetaTest/Average/MinReturn -234.796 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 19.6942 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.514 +MetaTest/__unnamed_task__/AverageReturn -169.514 +MetaTest/__unnamed_task__/Iteration 293 +MetaTest/__unnamed_task__/MaxReturn -147.023 +MetaTest/__unnamed_task__/MinReturn -234.796 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 19.6942 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.408e+06 +__unnamed_task__/AverageDiscountedReturn -74.1925 +__unnamed_task__/AverageReturn -177.327 +__unnamed_task__/Iteration 293 +__unnamed_task__/MaxReturn -143.385 +__unnamed_task__/MinReturn -255.553 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 30.1141 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 20:59:12 | [maml_trainer] epoch #294 | Sampling for adapation and meta-testing... +2025-04-03 21:00:22 | [maml_trainer] epoch #294 | Finished meta-testing... +2025-04-03 21:00:22 | [maml_trainer] epoch #294 | Saving snapshot... +2025-04-03 21:00:44 | [maml_trainer] epoch #294 | Saved +2025-04-03 21:00:44 | [maml_trainer] epoch #294 | Time 113122.38 s +2025-04-03 21:00:44 | [maml_trainer] epoch #294 | EpochTime 374.42 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -72.1632 +Average/AverageReturn -171.178 +Average/Iteration 294 +Average/MaxReturn -144.086 +Average/MinReturn -245.256 +Average/NumEpisodes 80 +Average/StdReturn 22.8647 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3641 +GaussianMLPPolicy/KLAfter 0.00147158 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.12132e-05 +GaussianMLPPolicy/LossBefore -6.10948e-09 +GaussianMLPPolicy/dLoss -3.12193e-05 +Iteration 294 +MetaTest/Average/AverageDiscountedReturn -169.536 +MetaTest/Average/AverageReturn -169.536 +MetaTest/Average/Iteration 294 +MetaTest/Average/MaxReturn -152.112 +MetaTest/Average/MinReturn -228.25 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 16.9397 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -169.536 +MetaTest/__unnamed_task__/AverageReturn -169.536 +MetaTest/__unnamed_task__/Iteration 294 +MetaTest/__unnamed_task__/MaxReturn -152.112 +MetaTest/__unnamed_task__/MinReturn -228.25 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 16.9397 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.44e+06 +__unnamed_task__/AverageDiscountedReturn -72.1632 +__unnamed_task__/AverageReturn -171.178 +__unnamed_task__/Iteration 294 +__unnamed_task__/MaxReturn -144.086 +__unnamed_task__/MinReturn -245.256 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 22.8647 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 21:05:27 | [maml_trainer] epoch #295 | Sampling for adapation and meta-testing... +2025-04-03 21:06:40 | [maml_trainer] epoch #295 | Finished meta-testing... +2025-04-03 21:06:40 | [maml_trainer] epoch #295 | Saving snapshot... +2025-04-03 21:07:04 | [maml_trainer] epoch #295 | Saved +2025-04-03 21:07:04 | [maml_trainer] epoch #295 | Time 113502.37 s +2025-04-03 21:07:04 | [maml_trainer] epoch #295 | EpochTime 379.99 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -75.4522 +Average/AverageReturn -181.286 +Average/Iteration 295 +Average/MaxReturn -141.611 +Average/MinReturn -273.381 +Average/NumEpisodes 80 +Average/StdReturn 31.85 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.362 +GaussianMLPPolicy/KLAfter 0.00228395 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 1.1421e-06 +GaussianMLPPolicy/LossBefore 5.96046e-09 +GaussianMLPPolicy/dLoss -1.13614e-06 +Iteration 295 +MetaTest/Average/AverageDiscountedReturn -176.069 +MetaTest/Average/AverageReturn -176.069 +MetaTest/Average/Iteration 295 +MetaTest/Average/MaxReturn -146.699 +MetaTest/Average/MinReturn -234.511 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 24.5037 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -176.069 +MetaTest/__unnamed_task__/AverageReturn -176.069 +MetaTest/__unnamed_task__/Iteration 295 +MetaTest/__unnamed_task__/MaxReturn -146.699 +MetaTest/__unnamed_task__/MinReturn -234.511 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 24.5037 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.472e+06 +__unnamed_task__/AverageDiscountedReturn -75.4522 +__unnamed_task__/AverageReturn -181.286 +__unnamed_task__/Iteration 295 +__unnamed_task__/MaxReturn -141.611 +__unnamed_task__/MinReturn -273.381 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 31.85 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 21:11:54 | [maml_trainer] epoch #296 | Sampling for adapation and meta-testing... +2025-04-03 21:13:05 | [maml_trainer] epoch #296 | Finished meta-testing... +2025-04-03 21:13:05 | [maml_trainer] epoch #296 | Saving snapshot... +2025-04-03 21:13:27 | [maml_trainer] epoch #296 | Saved +2025-04-03 21:13:27 | [maml_trainer] epoch #296 | Time 113886.00 s +2025-04-03 21:13:27 | [maml_trainer] epoch #296 | EpochTime 383.63 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.191 +Average/AverageReturn -168.701 +Average/Iteration 296 +Average/MaxReturn -133.586 +Average/MinReturn -237.8 +Average/NumEpisodes 80 +Average/StdReturn 19.4691 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3598 +GaussianMLPPolicy/KLAfter 0.00333436 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter -2.0586e-05 +GaussianMLPPolicy/LossBefore -3.8743e-09 +GaussianMLPPolicy/dLoss 2.05821e-05 +Iteration 296 +MetaTest/Average/AverageDiscountedReturn -161.099 +MetaTest/Average/AverageReturn -161.099 +MetaTest/Average/Iteration 296 +MetaTest/Average/MaxReturn -144.712 +MetaTest/Average/MinReturn -197.575 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 12.7881 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -161.099 +MetaTest/__unnamed_task__/AverageReturn -161.099 +MetaTest/__unnamed_task__/Iteration 296 +MetaTest/__unnamed_task__/MaxReturn -144.712 +MetaTest/__unnamed_task__/MinReturn -197.575 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 12.7881 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.504e+06 +__unnamed_task__/AverageDiscountedReturn -71.191 +__unnamed_task__/AverageReturn -168.701 +__unnamed_task__/Iteration 296 +__unnamed_task__/MaxReturn -133.586 +__unnamed_task__/MinReturn -237.8 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.4691 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 21:17:59 | [maml_trainer] epoch #297 | Sampling for adapation and meta-testing... +2025-04-03 21:19:08 | [maml_trainer] epoch #297 | Finished meta-testing... +2025-04-03 21:19:08 | [maml_trainer] epoch #297 | Saving snapshot... +2025-04-03 21:19:31 | [maml_trainer] epoch #297 | Saved +2025-04-03 21:19:31 | [maml_trainer] epoch #297 | Time 114249.64 s +2025-04-03 21:19:31 | [maml_trainer] epoch #297 | EpochTime 363.63 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -71.7137 +Average/AverageReturn -169.813 +Average/Iteration 297 +Average/MaxReturn -148.2 +Average/MinReturn -248.355 +Average/NumEpisodes 80 +Average/StdReturn 21.3602 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3578 +GaussianMLPPolicy/KLAfter 0.0033978 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 3.0634e-05 +GaussianMLPPolicy/LossBefore -7.40588e-09 +GaussianMLPPolicy/dLoss -3.06414e-05 +Iteration 297 +MetaTest/Average/AverageDiscountedReturn -173.239 +MetaTest/Average/AverageReturn -173.239 +MetaTest/Average/Iteration 297 +MetaTest/Average/MaxReturn -149.417 +MetaTest/Average/MinReturn -197.427 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 14.4173 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -173.239 +MetaTest/__unnamed_task__/AverageReturn -173.239 +MetaTest/__unnamed_task__/Iteration 297 +MetaTest/__unnamed_task__/MaxReturn -149.417 +MetaTest/__unnamed_task__/MinReturn -197.427 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 14.4173 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.536e+06 +__unnamed_task__/AverageDiscountedReturn -71.7137 +__unnamed_task__/AverageReturn -169.813 +__unnamed_task__/Iteration 297 +__unnamed_task__/MaxReturn -148.2 +__unnamed_task__/MinReturn -248.355 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 21.3602 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 21:24:59 | [maml_trainer] epoch #298 | Sampling for adapation and meta-testing... +2025-04-03 21:26:08 | [maml_trainer] epoch #298 | Finished meta-testing... +2025-04-03 21:26:08 | [maml_trainer] epoch #298 | Saving snapshot... +2025-04-03 21:26:29 | [maml_trainer] epoch #298 | Saved +2025-04-03 21:26:29 | [maml_trainer] epoch #298 | Time 114667.84 s +2025-04-03 21:26:29 | [maml_trainer] epoch #298 | EpochTime 418.20 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -70.2542 +Average/AverageReturn -166.447 +Average/Iteration 298 +Average/MaxReturn -144.305 +Average/MinReturn -242.4 +Average/NumEpisodes 80 +Average/StdReturn 19.7533 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3568 +GaussianMLPPolicy/KLAfter 0.0039289 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 2.65908e-05 +GaussianMLPPolicy/LossBefore 2.5332e-10 +GaussianMLPPolicy/dLoss -2.65905e-05 +Iteration 298 +MetaTest/Average/AverageDiscountedReturn -166.681 +MetaTest/Average/AverageReturn -166.681 +MetaTest/Average/Iteration 298 +MetaTest/Average/MaxReturn -145.206 +MetaTest/Average/MinReturn -246.858 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 23.8889 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -166.681 +MetaTest/__unnamed_task__/AverageReturn -166.681 +MetaTest/__unnamed_task__/Iteration 298 +MetaTest/__unnamed_task__/MaxReturn -145.206 +MetaTest/__unnamed_task__/MinReturn -246.858 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 23.8889 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.568e+06 +__unnamed_task__/AverageDiscountedReturn -70.2542 +__unnamed_task__/AverageReturn -166.447 +__unnamed_task__/Iteration 298 +__unnamed_task__/MaxReturn -144.305 +__unnamed_task__/MinReturn -242.4 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 19.7533 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- -------------- +2025-04-03 21:31:04 | [maml_trainer] epoch #299 | Sampling for adapation and meta-testing... +2025-04-03 21:32:12 | [maml_trainer] epoch #299 | Finished meta-testing... +2025-04-03 21:32:12 | [maml_trainer] epoch #299 | Saving snapshot... +2025-04-03 21:32:34 | [maml_trainer] epoch #299 | Saved +2025-04-03 21:32:34 | [maml_trainer] epoch #299 | Time 115032.69 s +2025-04-03 21:32:34 | [maml_trainer] epoch #299 | EpochTime 364.84 s +------------------------------------------------- -------------- +Average/AverageDiscountedReturn -73.0962 +Average/AverageReturn -173.621 +Average/Iteration 299 +Average/MaxReturn -142.596 +Average/MinReturn -246.929 +Average/NumEpisodes 80 +Average/StdReturn 25.9552 +Average/TerminationRate 0 +GaussianMLPPolicy/Entropy 19.3556 +GaussianMLPPolicy/KLAfter 0.00389229 +GaussianMLPPolicy/KLBefore 0 +GaussianMLPPolicy/LossAfter 7.883e-06 +GaussianMLPPolicy/LossBefore 3.42727e-09 +GaussianMLPPolicy/dLoss -7.87957e-06 +Iteration 299 +MetaTest/Average/AverageDiscountedReturn -174.034 +MetaTest/Average/AverageReturn -174.034 +MetaTest/Average/Iteration 299 +MetaTest/Average/MaxReturn -142.113 +MetaTest/Average/MinReturn -239.768 +MetaTest/Average/NumEpisodes 20 +MetaTest/Average/StdReturn 30.6841 +MetaTest/Average/TerminationRate 0 +MetaTest/__unnamed_task__/AverageDiscountedReturn -174.034 +MetaTest/__unnamed_task__/AverageReturn -174.034 +MetaTest/__unnamed_task__/Iteration 299 +MetaTest/__unnamed_task__/MaxReturn -142.113 +MetaTest/__unnamed_task__/MinReturn -239.768 +MetaTest/__unnamed_task__/NumEpisodes 20 +MetaTest/__unnamed_task__/StdReturn 30.6841 +MetaTest/__unnamed_task__/TerminationRate 0 +TotalEnvSteps 9.6e+06 +__unnamed_task__/AverageDiscountedReturn -73.0962 +__unnamed_task__/AverageReturn -173.621 +__unnamed_task__/Iteration 299 +__unnamed_task__/MaxReturn -142.596 +__unnamed_task__/MinReturn -246.929 +__unnamed_task__/NumEpisodes 80 +__unnamed_task__/StdReturn 25.9552 +__unnamed_task__/TerminationRate 0 +------------------------------------------------- --------------