| [ | |
| { | |
| "step": 200, | |
| "policy_loss": 6.2601, | |
| "value_loss": 0.3127, | |
| "lr": 2.82e-05, | |
| "gnorm": 9.03 | |
| }, | |
| { | |
| "step": 400, | |
| "policy_loss": 4.3384, | |
| "value_loss": 0.2786, | |
| "lr": 5.64e-05, | |
| "gnorm": 6.71 | |
| }, | |
| { | |
| "step": 600, | |
| "policy_loss": 3.9855, | |
| "value_loss": 0.2473, | |
| "lr": 8.46e-05, | |
| "gnorm": 4.17 | |
| }, | |
| { | |
| "step": 800, | |
| "policy_loss": 3.7476, | |
| "value_loss": 0.231, | |
| "lr": 0.000113, | |
| "gnorm": 2.66 | |
| }, | |
| { | |
| "step": 1000, | |
| "policy_loss": 3.5844, | |
| "value_loss": 0.224, | |
| "lr": 0.000141, | |
| "gnorm": 2.37 | |
| }, | |
| { | |
| "step": 1200, | |
| "policy_loss": 3.4408, | |
| "value_loss": 0.2213, | |
| "lr": 0.000169, | |
| "gnorm": 1.9 | |
| }, | |
| { | |
| "step": 1400, | |
| "policy_loss": 3.3147, | |
| "value_loss": 0.2153, | |
| "lr": 0.000197, | |
| "gnorm": 1.73 | |
| }, | |
| { | |
| "step": 1600, | |
| "policy_loss": 3.1981, | |
| "value_loss": 0.2103, | |
| "lr": 0.0002, | |
| "gnorm": 1.48 | |
| }, | |
| { | |
| "step": 1800, | |
| "policy_loss": 3.0781, | |
| "value_loss": 0.2041, | |
| "lr": 0.0002, | |
| "gnorm": 1.52 | |
| }, | |
| { | |
| "step": 2000, | |
| "policy_loss": 2.9918, | |
| "value_loss": 0.2015, | |
| "lr": 0.0002, | |
| "gnorm": 1.26 | |
| }, | |
| { | |
| "step": 2200, | |
| "policy_loss": 2.9199, | |
| "value_loss": 0.1948, | |
| "lr": 0.0002, | |
| "gnorm": 1.24 | |
| }, | |
| { | |
| "step": 2400, | |
| "policy_loss": 2.8578, | |
| "value_loss": 0.1895, | |
| "lr": 0.0002, | |
| "gnorm": 1.14 | |
| }, | |
| { | |
| "step": 2600, | |
| "policy_loss": 2.8174, | |
| "value_loss": 0.1862, | |
| "lr": 0.0002, | |
| "gnorm": 1.1 | |
| }, | |
| { | |
| "step": 2800, | |
| "policy_loss": 2.7771, | |
| "value_loss": 0.1822, | |
| "lr": 0.0002, | |
| "gnorm": 1.24 | |
| }, | |
| { | |
| "step": 3000, | |
| "policy_loss": 2.7415, | |
| "value_loss": 0.1807, | |
| "lr": 0.000199, | |
| "gnorm": 1.14 | |
| }, | |
| { | |
| "step": 3200, | |
| "policy_loss": 2.6978, | |
| "value_loss": 0.1727, | |
| "lr": 0.000199, | |
| "gnorm": 1.1 | |
| } | |
| ] |