| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": null, |
| "global_step": 0, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.999359182892298e-06, |
| "loss": 0.0058, |
| "reward": -0.5258, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.997144223525905e-06, |
| "loss": 0.0053, |
| "reward": -0.5235, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 9.993347911338379e-06, |
| "loss": 0.0063, |
| "reward": -0.5422, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.987971447677674e-06, |
| "loss": 0.0052, |
| "reward": -0.5766, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.981016533932741e-06, |
| "loss": 0.0042, |
| "reward": -0.3694, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.972485370995129e-06, |
| "loss": 0.004, |
| "reward": -0.346, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.962380658562499e-06, |
| "loss": 0.0047, |
| "reward": -0.4197, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.950705594284304e-06, |
| "loss": 0.0043, |
| "reward": -0.1848, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.937463872749902e-06, |
| "loss": 0.0046, |
| "reward": -0.4406, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.922659684319374e-06, |
| "loss": 0.0044, |
| "reward": -0.4997, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.906297713797501e-06, |
| "loss": 0.0048, |
| "reward": -0.4274, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.888383138951242e-06, |
| "loss": 0.0046, |
| "reward": -0.3451, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.868921628871223e-06, |
| "loss": 0.0051, |
| "reward": -0.4131, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.847919342177744e-06, |
| "loss": 0.004, |
| "reward": -0.2998, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.825382925071883e-06, |
| "loss": 0.0045, |
| "reward": -0.4068, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.801319509232287e-06, |
| "loss": 0.0051, |
| "reward": -0.4744, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.775736709558352e-06, |
| "loss": 0.0054, |
| "reward": -0.5511, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.748642621760474e-06, |
| "loss": 0.0055, |
| "reward": -0.2605, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.720045819798151e-06, |
| "loss": 0.0043, |
| "reward": -0.4817, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.68995535316674e-06, |
| "loss": 0.0061, |
| "reward": -0.3717, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.658380744033737e-06, |
| "loss": 0.0041, |
| "reward": -0.3632, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.625331984225475e-06, |
| "loss": 0.0039, |
| "reward": -0.4044, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.590819532065188e-06, |
| "loss": 0.0049, |
| "reward": -0.3763, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.554854309063474e-06, |
| "loss": 0.0053, |
| "reward": -0.3172, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.517447696462163e-06, |
| "loss": 0.0047, |
| "reward": -0.5815, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.478611531632702e-06, |
| "loss": 0.0048, |
| "reward": -0.35, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.438358104330215e-06, |
| "loss": 0.0054, |
| "reward": -0.4544, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.396700152804386e-06, |
| "loss": 0.0039, |
| "reward": -0.3515, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.353650859768423e-06, |
| "loss": 0.0052, |
| "reward": -0.3799, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.309223848227377e-06, |
| "loss": 0.0048, |
| "reward": -0.5378, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.26343317716712e-06, |
| "loss": 0.0057, |
| "reward": -0.4758, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.216293337105358e-06, |
| "loss": 0.0044, |
| "reward": -0.5203, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.167819245506095e-06, |
| "loss": 0.0054, |
| "reward": -0.5237, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.118026242058976e-06, |
| "loss": 0.0058, |
| "reward": -0.2535, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.066930083825025e-06, |
| "loss": 0.0067, |
| "reward": -0.5059, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.014546940250301e-06, |
| "loss": 0.0059, |
| "reward": -0.5255, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 8.960893388049056e-06, |
| "loss": 0.0047, |
| "reward": -0.3411, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 8.905986405958015e-06, |
| "loss": 0.0048, |
| "reward": -0.3859, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.849843369363425e-06, |
| "loss": 0.0058, |
| "reward": -0.3785, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.792482044802602e-06, |
| "loss": 0.0047, |
| "reward": -0.3432, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.733920584341673e-06, |
| "loss": 0.0057, |
| "reward": -0.3958, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.674177519831351e-06, |
| "loss": 0.0049, |
| "reward": -0.4713, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.613271757042483e-06, |
| "loss": 0.0048, |
| "reward": -0.4485, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.551222569683315e-06, |
| "loss": 0.0054, |
| "reward": -0.5861, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.488049593300287e-06, |
| "loss": 0.0052, |
| "reward": -0.292, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 8.423772819064349e-06, |
| "loss": 0.0049, |
| "reward": -0.4702, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.358412587444719e-06, |
| "loss": 0.0053, |
| "reward": -0.5542, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.291989581772126e-06, |
| "loss": 0.0048, |
| "reward": -0.4116, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.224524821693534e-06, |
| "loss": 0.005, |
| "reward": -0.3746, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.156039656520448e-06, |
| "loss": 0.0046, |
| "reward": -0.4481, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.086555758472905e-06, |
| "loss": 0.0043, |
| "reward": -0.4127, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.016095115821258e-06, |
| "loss": 0.0069, |
| "reward": -0.5597, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.944680025927976e-06, |
| "loss": 0.0061, |
| "reward": -0.4318, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.872333088191598e-06, |
| "loss": 0.0045, |
| "reward": -0.3063, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.799077196895131e-06, |
| "loss": 0.0048, |
| "reward": -0.3184, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.724935533961129e-06, |
| "loss": 0.0058, |
| "reward": -0.476, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.649931561615733e-06, |
| "loss": 0.0051, |
| "reward": -0.3959, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.574089014964029e-06, |
| "loss": 0.0051, |
| "reward": -0.4472, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.497431894479034e-06, |
| "loss": 0.0058, |
| "reward": -0.3736, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.419984458406722e-06, |
| "loss": 0.0043, |
| "reward": -0.4911, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.341771215089468e-06, |
| "loss": 0.005, |
| "reward": -0.3037, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.26281691521035e-06, |
| "loss": 0.0062, |
| "reward": -0.4219, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.183146543960761e-06, |
| "loss": 0.0051, |
| "reward": -0.3435, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.102785313133821e-06, |
| "loss": 0.0038, |
| "reward": -0.3745, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.021758653146058e-06, |
| "loss": 0.0053, |
| "reward": -0.3743, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 6.940092204989934e-06, |
| "loss": 0.006, |
| "reward": -0.4696, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 6.8578118121197144e-06, |
| "loss": 0.0058, |
| "reward": -0.4343, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 6.774943512273267e-06, |
| "loss": 0.0044, |
| "reward": -0.2842, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 6.691513529232391e-06, |
| "loss": 0.0046, |
| "reward": -0.345, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 6.607548264524268e-06, |
| "loss": 0.0047, |
| "reward": -0.3973, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 6.523074289066666e-06, |
| "loss": 0.0039, |
| "reward": -0.3412, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 6.43811833475953e-06, |
| "loss": 0.0054, |
| "reward": -0.3593, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 6.352707286025644e-06, |
| "loss": 0.0049, |
| "reward": -0.3673, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 6.266868171303017e-06, |
| "loss": 0.0046, |
| "reward": -0.4224, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 6.1806281544917e-06, |
| "loss": 0.0045, |
| "reward": -0.1521, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 6.094014526357734e-06, |
| "loss": 0.0056, |
| "reward": -0.3997, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 6.007054695896955e-06, |
| "loss": 0.0046, |
| "reward": -0.289, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 5.9197761816613705e-06, |
| "loss": 0.0042, |
| "reward": -0.3431, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 5.8322066030508924e-06, |
| "loss": 0.0049, |
| "reward": -0.3222, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 5.74437367157313e-06, |
| "loss": 0.0055, |
| "reward": -0.4949, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 5.656305182074053e-06, |
| "loss": 0.0065, |
| "reward": -0.4194, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5.568029003942271e-06, |
| "loss": 0.005, |
| "reward": -0.5284, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5.4795730722897435e-06, |
| "loss": 0.0058, |
| "reward": -0.5156, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5.390965379111655e-06, |
| "loss": 0.0043, |
| "reward": -0.3164, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5.302233964428329e-06, |
| "loss": 0.004, |
| "reward": -0.1936, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5.213406907411921e-06, |
| "loss": 0.0039, |
| "reward": -0.2792, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5.124512317500723e-06, |
| "loss": 0.005, |
| "reward": -0.2348, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 5.035578325503908e-06, |
| "loss": 0.0042, |
| "reward": -0.3528, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.946633074699485e-06, |
| "loss": 0.005, |
| "reward": -0.3753, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.857704711928325e-06, |
| "loss": 0.0052, |
| "reward": -0.4383, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.768821378687066e-06, |
| "loss": 0.0045, |
| "reward": -0.3664, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.6800112022226865e-06, |
| "loss": 0.0043, |
| "reward": -0.3584, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.591302286631612e-06, |
| "loss": 0.0048, |
| "reward": -0.455, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.502722703966126e-06, |
| "loss": 0.004, |
| "reward": -0.3906, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.414300485350945e-06, |
| "loss": 0.0046, |
| "reward": -0.4897, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.326063612112721e-06, |
| "loss": 0.0042, |
| "reward": -0.384, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.2380400069253255e-06, |
| "loss": 0.0039, |
| "reward": -0.4159, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.15025752497367e-06, |
| "loss": 0.0048, |
| "reward": -0.4036, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.062743945138906e-06, |
| "loss": 0.0042, |
| "reward": -0.4189, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.975526961207759e-06, |
| "loss": 0.005, |
| "reward": -0.2446, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.88863417310879e-06, |
| "loss": 0.0047, |
| "reward": -0.3705, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.8020930781783704e-06, |
| "loss": 0.0048, |
| "reward": -0.3862, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.7159310624591055e-06, |
| "loss": 0.0041, |
| "reward": -0.4077, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.630175392033488e-06, |
| "loss": 0.0072, |
| "reward": -0.2784, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.544853204395513e-06, |
| "loss": 0.0058, |
| "reward": -0.436, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.45999149986298e-06, |
| "loss": 0.0045, |
| "reward": -0.3979, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.375617133033209e-06, |
| "loss": 0.0055, |
| "reward": -0.4709, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.2917568042848648e-06, |
| "loss": 0.0036, |
| "reward": -0.3748, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.2167439478854534e-06, |
| "loss": 0.0057, |
| "reward": -0.586, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 3.1339332616713215e-06, |
| "loss": 0.0047, |
| "reward": -0.2301, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 3.051713094715606e-06, |
| "loss": 0.0052, |
| "reward": -0.4129, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 2.9701094656976084e-06, |
| "loss": 0.0044, |
| "reward": -0.2389, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 2.8891481981923897e-06, |
| "loss": 0.0039, |
| "reward": -0.3641, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 2.8088549124988785e-06, |
| "loss": 0.0052, |
| "reward": -0.3099, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 2.729255017532277e-06, |
| "loss": 0.0037, |
| "reward": -0.3203, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 2.6503737027833926e-06, |
| "loss": 0.0052, |
| "reward": -0.4073, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.5722359303473778e-06, |
| "loss": 0.0052, |
| "reward": -0.3275, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.494866427024452e-06, |
| "loss": 0.004, |
| "reward": -0.3811, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.4182896764950726e-06, |
| "loss": 0.0052, |
| "reward": -0.3413, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.3425299115720295e-06, |
| "loss": 0.0052, |
| "reward": -0.5202, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.267611106531961e-06, |
| "loss": 0.0048, |
| "reward": -0.3018, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.193556969528642e-06, |
| "loss": 0.0056, |
| "reward": -0.4154, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.1203909350905215e-06, |
| "loss": 0.0046, |
| "reward": -0.3844, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 2.048136156704833e-06, |
| "loss": 0.0052, |
| "reward": -0.3442, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.9768154994906352e-06, |
| "loss": 0.0053, |
| "reward": -0.357, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.9064515329631267e-06, |
| "loss": 0.0043, |
| "reward": -0.2778, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.837066523891483e-06, |
| "loss": 0.0066, |
| "reward": -0.3448, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.768682429252519e-06, |
| "loss": 0.0051, |
| "reward": -0.3787, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.7013208892823618e-06, |
| "loss": 0.004, |
| "reward": -0.4296, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.6350032206283833e-06, |
| "loss": 0.0047, |
| "reward": -0.5144, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.5697504096035116e-06, |
| "loss": 0.0052, |
| "reward": -0.331, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.5055831055450825e-06, |
| "loss": 0.0039, |
| "reward": -0.3365, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.4425216142803395e-06, |
| "loss": 0.0047, |
| "reward": -0.3728, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.3805858917006148e-06, |
| "loss": 0.0048, |
| "reward": -0.3547, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.3197955374462767e-06, |
| "loss": 0.0043, |
| "reward": -0.3398, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.260169788704383e-06, |
| "loss": 0.0056, |
| "reward": -0.2891, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.2017275141210604e-06, |
| "loss": 0.0042, |
| "reward": -0.3165, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.1444872078304865e-06, |
| "loss": 0.0039, |
| "reward": -0.366, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.0884669836024058e-06, |
| "loss": 0.004, |
| "reward": -0.2873, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.033684569109994e-06, |
| "loss": 0.0046, |
| "reward": -0.2922, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.80157300319926e-07, |
| "loss": 0.0037, |
| "reward": -0.3076, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.279021160063712e-07, |
| "loss": 0.0066, |
| "reward": -0.5213, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 8.769355523907086e-07, |
| "loss": 0.0044, |
| "reward": -0.5063, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 8.272737379086088e-07, |
| "loss": 0.0041, |
| "reward": -0.5909, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 7.789323881061777e-07, |
| "loss": 0.0044, |
| "reward": -0.2486, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 7.31926800666744e-07, |
| "loss": 0.0044, |
| "reward": -0.4971, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.862718505698945e-07, |
| "loss": 0.0043, |
| "reward": -0.5182, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.419819853842591e-07, |
| "loss": 0.0046, |
| "reward": -0.2389, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.990712206955674e-07, |
| "loss": 0.0039, |
| "reward": -0.3725, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.575531356713904e-07, |
| "loss": 0.0046, |
| "reward": -0.4709, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.17440868764002e-07, |
| "loss": 0.005, |
| "reward": -0.3545, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.787471135526945e-07, |
| "loss": 0.0041, |
| "reward": -0.2898, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.414841147268811e-07, |
| "loss": 0.0045, |
| "reward": -0.4093, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.056636642112427e-07, |
| "loss": 0.004, |
| "reward": -0.3965, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 3.7129709743416733e-07, |
| "loss": 0.0045, |
| "reward": -0.3877, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 3.3839528974062843e-07, |
| "loss": 0.0046, |
| "reward": -0.3655, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 3.069686529506777e-07, |
| "loss": 0.0045, |
| "reward": -0.5236, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 2.770271320646023e-07, |
| "loss": 0.0053, |
| "reward": -0.441, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 2.4858020211582346e-07, |
| "loss": 0.004, |
| "reward": -0.2618, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 2.2163686517250427e-07, |
| "loss": 0.004, |
| "reward": -0.4623, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.962056474888391e-07, |
| "loss": 0.0038, |
| "reward": -0.2542, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7229459680690176e-07, |
| "loss": 0.0051, |
| "reward": -0.377, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.4991127980993503e-07, |
| "loss": 0.0046, |
| "reward": -0.3763, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.290627797278593e-07, |
| "loss": 0.0046, |
| "reward": -0.4469, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.0975569409577668e-07, |
| "loss": 0.0054, |
| "reward": -0.4301, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.199613266616891e-08, |
| "loss": 0.0047, |
| "reward": -0.481, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 7.578971547546554e-08, |
| "loss": 0.0047, |
| "reward": -0.549, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.114157106556773e-08, |
| "loss": 0.0058, |
| "reward": -0.3571, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.8056334860921426e-08, |
| "loss": 0.005, |
| "reward": -0.3539, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 3.653814770162578e-08, |
| "loss": 0.0037, |
| "reward": -0.3161, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 2.6590654533062975e-08, |
| "loss": 0.0053, |
| "reward": -0.4499, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.821700325244813e-08, |
| "loss": 0.0047, |
| "reward": -0.3267, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.141984371267335e-08, |
| "loss": 0.0043, |
| "reward": -0.4299, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.2013268837601834e-09, |
| "loss": 0.0051, |
| "reward": -0.4119, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 2.5631041721801976e-09, |
| "loss": 0.0048, |
| "reward": -0.492, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 5.063268982646863e-10, |
| "loss": 0.0053, |
| "reward": -0.5111, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 3.164593187165999e-11, |
| "loss": 0.0057, |
| "reward": -0.5665, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.1392114864122727e-09, |
| "loss": 0.0039, |
| "reward": -0.3717, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 3.828673071319955e-09, |
| "loss": 0.0049, |
| "reward": -0.4831, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.099179602960583e-09, |
| "loss": 0.0052, |
| "reward": -0.4277, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.3949379673960372e-08, |
| "loss": 0.0047, |
| "reward": -0.3236, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 2.1377421980859903e-08, |
| "loss": 0.0044, |
| "reward": -0.4584, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 3.0380955909962706e-08, |
| "loss": 0.004, |
| "reward": -0.427, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.095713228118803e-08, |
| "loss": 0.0046, |
| "reward": -0.4793, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.310260424970071e-08, |
| "loss": 0.005, |
| "reward": -0.3642, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 6.681352836502009e-08, |
| "loss": 0.0036, |
| "reward": -0.3235, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.208556578728933e-08, |
| "loss": 0.0055, |
| "reward": -0.4695, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 9.891388366030486e-08, |
| "loss": 0.005, |
| "reward": -0.3998, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.172931566408797e-07, |
| "loss": 0.0046, |
| "reward": -0.3878, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.3721756858405545e-07, |
| "loss": 0.0055, |
| "reward": -0.5067, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.5868081438363213e-07, |
| "loss": 0.0038, |
| "reward": -0.3747, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.8167610196742125e-07, |
| "loss": 0.0052, |
| "reward": -0.2461, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2.0619615444661012e-07, |
| "loss": 0.0044, |
| "reward": -0.2565, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 2.3223321241854335e-07, |
| "loss": 0.0045, |
| "reward": -0.4397, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.5977903642219305e-07, |
| "loss": 0.0047, |
| "reward": -0.2828, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.8882490954554976e-07, |
| "loss": 0.0049, |
| "reward": -0.361, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 3.193616401840971e-07, |
| "loss": 0.0054, |
| "reward": -0.2941, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 3.513795649495133e-07, |
| "loss": 0.0046, |
| "reward": -0.3027, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 3.848685517276546e-07, |
| "loss": 0.0037, |
| "reward": -0.391, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.1981800288488416e-07, |
| "loss": 0.0048, |
| "reward": -0.3608, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.562168586217008e-07, |
| "loss": 0.0044, |
| "reward": -0.3958, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.940536004726337e-07, |
| "loss": 0.0072, |
| "reward": -0.5123, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.33316254951266e-07, |
| "loss": 0.0044, |
| "reward": -0.2958, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.739923973392685e-07, |
| "loss": 0.0047, |
| "reward": -0.4311, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 6.160691556182191e-07, |
| "loss": 0.0039, |
| "reward": -0.4388, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 6.595332145429611e-07, |
| "loss": 0.0066, |
| "reward": -0.4963, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 7.043708198552385e-07, |
| "loss": 0.0051, |
| "reward": -0.3088, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 7.505677826362434e-07, |
| "loss": 0.0054, |
| "reward": -0.3397, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.981094837967229e-07, |
| "loss": 0.0044, |
| "reward": -0.4052, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.46980878703203e-07, |
| "loss": 0.0049, |
| "reward": -0.3994, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.971665019388887e-07, |
| "loss": 0.0051, |
| "reward": -0.4254, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.486504721977097e-07, |
| "loss": 0.0043, |
| "reward": -0.3374, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.0014164973099866e-06, |
| "loss": 0.0058, |
| "reward": -0.3854, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.0554478793981015e-06, |
| "loss": 0.0043, |
| "reward": -0.3606, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.1107275201605728e-06, |
| "loss": 0.0041, |
| "reward": -0.3453, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.1672379262828238e-06, |
| "loss": 0.008, |
| "reward": -0.3853, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.2249612149729779e-06, |
| "loss": 0.0052, |
| "reward": -0.3406, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.2838791196208771e-06, |
| "loss": 0.0043, |
| "reward": -0.3696, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.3439729955785901e-06, |
| "loss": 0.0048, |
| "reward": -0.326, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.40522382606052e-06, |
| "loss": 0.0048, |
| "reward": -0.3875, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.4676122281613041e-06, |
| "loss": 0.0044, |
| "reward": -0.3224, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.5311184589895495e-06, |
| "loss": 0.0054, |
| "reward": -0.4125, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.595722421915507e-06, |
| "loss": 0.0051, |
| "reward": -0.5115, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.6614036729306638e-06, |
| "loss": 0.0042, |
| "reward": -0.5107, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.7281414271172925e-06, |
| "loss": 0.0048, |
| "reward": -0.4813, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.7959145652258408e-06, |
| "loss": 0.0036, |
| "reward": -0.2592, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.8647016403581763e-06, |
| "loss": 0.0045, |
| "reward": -0.431, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.9344808847544632e-06, |
| "loss": 0.0045, |
| "reward": -0.3041, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 2.0052302166816017e-06, |
| "loss": 0.0054, |
| "reward": -0.2746, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.0769272474210374e-06, |
| "loss": 0.0059, |
| "reward": -0.5107, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.149549288353676e-06, |
| "loss": 0.0044, |
| "reward": -0.3638, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.2230733581397427e-06, |
| "loss": 0.0055, |
| "reward": -0.3426, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.297476189991249e-06, |
| "loss": 0.0048, |
| "reward": -0.3561, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.3727342390348134e-06, |
| "loss": 0.0058, |
| "reward": -0.334, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.4488236897624457e-06, |
| "loss": 0.0043, |
| "reward": -0.188, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.525720463568009e-06, |
| "loss": 0.005, |
| "reward": -0.3464, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.603400226366919e-06, |
| "loss": 0.0041, |
| "reward": -0.4215, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.6818383962967134e-06, |
| "loss": 0.0059, |
| "reward": -0.483, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.7610101514959903e-06, |
| "loss": 0.0042, |
| "reward": -0.3055, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.840890437959334e-06, |
| "loss": 0.0047, |
| "reward": -0.2254, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.92145397746568e-06, |
| "loss": 0.0049, |
| "reward": -0.4363, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 3.002675275577659e-06, |
| "loss": 0.0053, |
| "reward": -0.4156, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 3.0845286297093167e-06, |
| "loss": 0.0032, |
| "reward": -0.3561, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 3.166988137259751e-06, |
| "loss": 0.0039, |
| "reward": -0.4369, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 3.2500277038100126e-06, |
| "loss": 0.0049, |
| "reward": -0.431, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 3.3336210513807366e-06, |
| "loss": 0.0047, |
| "reward": -0.4215, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 3.4177417267478236e-06, |
| "loss": 0.0044, |
| "reward": -0.2004, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 3.502363109813589e-06, |
| "loss": 0.0039, |
| "reward": -0.3595, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 3.5874584220307686e-06, |
| "loss": 0.0052, |
| "reward": -0.3384, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 3.673000734876582e-06, |
| "loss": 0.0042, |
| "reward": -0.1973, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 3.7589629783743188e-06, |
| "loss": 0.0044, |
| "reward": -0.4077, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 3.845317949659662e-06, |
| "loss": 0.0046, |
| "reward": -0.5238, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 3.932038321589086e-06, |
| "loss": 0.0039, |
| "reward": -0.3569, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.01909665138754e-06, |
| "loss": 0.005, |
| "reward": -0.4477, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.106465389332768e-06, |
| "loss": 0.0035, |
| "reward": -0.303, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.194116887473445e-06, |
| "loss": 0.0042, |
| "reward": -0.3804, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.282023408378429e-06, |
| "loss": 0.0047, |
| "reward": -0.4259, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.370157133914274e-06, |
| "loss": 0.0049, |
| "reward": -0.3371, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.458490174048339e-06, |
| "loss": 0.0047, |
| "reward": -0.4291, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.5469945756746145e-06, |
| "loss": 0.0044, |
| "reward": -0.4106, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.635642331459522e-06, |
| "loss": 0.0036, |
| "reward": -0.3657, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.724405388704883e-06, |
| "loss": 0.0052, |
| "reward": -0.3857, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.804367507873278e-06, |
| "loss": 0.0047, |
| "reward": -0.3853, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.893272229193558e-06, |
| "loss": 0.0054, |
| "reward": -0.3366, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.982210724656406e-06, |
| "loss": 0.005, |
| "reward": -0.4375, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.071154849558693e-06, |
| "loss": 0.0043, |
| "reward": -0.3342, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.160076457415833e-06, |
| "loss": 0.0047, |
| "reward": -0.4507, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.248947408868792e-06, |
| "loss": 0.005, |
| "reward": -0.4391, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.33773958058882e-06, |
| "loss": 0.0036, |
| "reward": -0.2586, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.426424874177126e-06, |
| "loss": 0.0044, |
| "reward": -0.4199, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.514975225056634e-06, |
| "loss": 0.0044, |
| "reward": -0.4119, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.603362611353067e-06, |
| "loss": 0.0043, |
| "reward": -0.3747, |
| "step": 2719 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 5.691559062762498e-06, |
| "loss": 0.0051, |
| "reward": -0.2529, |
| "step": 2729 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 5.779536669402608e-06, |
| "loss": 0.0038, |
| "reward": -0.3863, |
| "step": 2739 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 5.867267590644781e-06, |
| "loss": 0.0043, |
| "reward": -0.4039, |
| "step": 2749 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 5.954724063924317e-06, |
| "loss": 0.0041, |
| "reward": -0.3328, |
| "step": 2759 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.041878413525932e-06, |
| "loss": 0.0044, |
| "reward": -0.2204, |
| "step": 2769 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.128703059341788e-06, |
| "loss": 0.0056, |
| "reward": -0.4904, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.215170525599225e-06, |
| "loss": 0.0047, |
| "reward": -0.3944, |
| "step": 2789 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.3012534495555275e-06, |
| "loss": 0.0053, |
| "reward": -0.5206, |
| "step": 2799 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.386924590156891e-06, |
| "loss": 0.0033, |
| "reward": -0.1779, |
| "step": 2809 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.4721568366589e-06, |
| "loss": 0.0045, |
| "reward": -0.4464, |
| "step": 2819 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.556923217205744e-06, |
| "loss": 0.0036, |
| "reward": -0.3361, |
| "step": 2829 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.641196907365499e-06, |
| "loss": 0.0049, |
| "reward": -0.5177, |
| "step": 2839 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.724951238618763e-06, |
| "loss": 0.0048, |
| "reward": -0.4074, |
| "step": 2849 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.808159706797949e-06, |
| "loss": 0.0047, |
| "reward": -0.2954, |
| "step": 2859 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.8907959804745294e-06, |
| "loss": 0.0046, |
| "reward": -0.312, |
| "step": 2869 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 6.972833909291694e-06, |
| "loss": 0.0053, |
| "reward": -0.4417, |
| "step": 2879 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 7.054247532239634e-06, |
| "loss": 0.0036, |
| "reward": -0.2909, |
| "step": 2889 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 7.135011085870964e-06, |
| "loss": 0.0044, |
| "reward": -0.2276, |
| "step": 2899 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 7.21509901245357e-06, |
| "loss": 0.0049, |
| "reward": -0.3637, |
| "step": 2909 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 7.294485968058404e-06, |
| "loss": 0.0067, |
| "reward": -0.3676, |
| "step": 2919 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 7.373146830579596e-06, |
| "loss": 0.0034, |
| "reward": -0.1601, |
| "step": 2929 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.451056707684396e-06, |
| "loss": 0.0041, |
| "reward": -0.1314, |
| "step": 2939 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.528190944690356e-06, |
| "loss": 0.0035, |
| "reward": -0.2391, |
| "step": 2949 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.6045251323673555e-06, |
| "loss": 0.0052, |
| "reward": -0.4042, |
| "step": 2959 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.680035114661929e-06, |
| "loss": 0.0044, |
| "reward": -0.3035, |
| "step": 2969 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.754696996341486e-06, |
| "loss": 0.0049, |
| "reward": -0.3552, |
| "step": 2979 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.828487150555979e-06, |
| "loss": 0.0049, |
| "reward": -0.2459, |
| "step": 2989 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.901382226314662e-06, |
| "loss": 0.0036, |
| "reward": -0.2989, |
| "step": 2999 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 7.973359155875521e-06, |
| "loss": 0.0057, |
| "reward": -0.5494, |
| "step": 3009 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.044395162045135e-06, |
| "loss": 0.0045, |
| "reward": -0.3344, |
| "step": 3019 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.11446776538649e-06, |
| "loss": 0.0045, |
| "reward": -0.3949, |
| "step": 3029 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.183554791332675e-06, |
| "loss": 0.004, |
| "reward": -0.2997, |
| "step": 3039 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.251634377204023e-06, |
| "loss": 0.0034, |
| "reward": -0.2654, |
| "step": 3049 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.318684979126607e-06, |
| "loss": 0.0034, |
| "reward": -0.4013, |
| "step": 3059 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.384685378849804e-06, |
| "loss": 0.0035, |
| "reward": -0.4777, |
| "step": 3069 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 8.44961469046086e-06, |
| "loss": 0.0031, |
| "reward": -0.3525, |
| "step": 3079 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 8.513452366994242e-06, |
| "loss": 0.0042, |
| "reward": -0.1987, |
| "step": 3089 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 8.576178206933773e-06, |
| "loss": 0.0043, |
| "reward": -0.439, |
| "step": 3099 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 8.637772360605421e-06, |
| "loss": 0.0048, |
| "reward": -0.2539, |
| "step": 3109 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 8.698215336458743e-06, |
| "loss": 0.0047, |
| "reward": -0.4092, |
| "step": 3119 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 8.757488007235031e-06, |
| "loss": 0.0042, |
| "reward": -0.288, |
| "step": 3129 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 8.815571616020147e-06, |
| "loss": 0.0033, |
| "reward": -0.2841, |
| "step": 3139 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 8.872447782180168e-06, |
| "loss": 0.0044, |
| "reward": -0.3696, |
| "step": 3149 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 8.92809850717797e-06, |
| "loss": 0.0034, |
| "reward": -0.1236, |
| "step": 3159 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 8.982506180268895e-06, |
| "loss": 0.0041, |
| "reward": -0.269, |
| "step": 3169 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.035653584073673e-06, |
| "loss": 0.0029, |
| "reward": -0.4326, |
| "step": 3179 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.087523900026907e-06, |
| "loss": 0.0038, |
| "reward": -0.2885, |
| "step": 3189 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.138100713699312e-06, |
| "loss": 0.0038, |
| "reward": -0.3119, |
| "step": 3199 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.187368019992092e-06, |
| "loss": 0.004, |
| "reward": -0.196, |
| "step": 3209 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.235310228201782e-06, |
| "loss": 0.0036, |
| "reward": -0.3683, |
| "step": 3219 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.281912166953929e-06, |
| "loss": 0.0035, |
| "reward": -0.1997, |
| "step": 3229 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.327159089004098e-06, |
| "loss": 0.0042, |
| "reward": -0.403, |
| "step": 3239 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.371036675904667e-06, |
| "loss": 0.0034, |
| "reward": -0.3637, |
| "step": 3249 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.413531042535915e-06, |
| "loss": 0.0049, |
| "reward": -0.3766, |
| "step": 3259 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.454628741499976e-06, |
| "loss": 0.0044, |
| "reward": -0.3386, |
| "step": 3269 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.494316767376295e-06, |
| "loss": 0.0039, |
| "reward": -0.3232, |
| "step": 3279 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.532582560837204e-06, |
| "loss": 0.0032, |
| "reward": -0.3225, |
| "step": 3289 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.569414012622356e-06, |
| "loss": 0.0037, |
| "reward": -0.3366, |
| "step": 3299 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.604799467370689e-06, |
| "loss": 0.0038, |
| "reward": -0.3283, |
| "step": 3309 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.63872772730879e-06, |
| "loss": 0.0033, |
| "reward": -0.2504, |
| "step": 3319 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.671188055794462e-06, |
| "loss": 0.0037, |
| "reward": -0.3496, |
| "step": 3329 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.702170180714328e-06, |
| "loss": 0.0035, |
| "reward": -0.3135, |
| "step": 3339 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.731664297734458e-06, |
| "loss": 0.0036, |
| "reward": -0.3951, |
| "step": 3349 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.75966107340297e-06, |
| "loss": 0.0032, |
| "reward": -0.4631, |
| "step": 3359 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.786151648103613e-06, |
| "loss": 0.0036, |
| "reward": -0.3141, |
| "step": 3369 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.811127638859398e-06, |
| "loss": 0.003, |
| "reward": -0.1992, |
| "step": 3379 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.834581141985404e-06, |
| "loss": 0.0038, |
| "reward": -0.3201, |
| "step": 3389 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.85650473558991e-06, |
| "loss": 0.0031, |
| "reward": -0.1577, |
| "step": 3399 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.876891481923067e-06, |
| "loss": 0.0035, |
| "reward": -0.2635, |
| "step": 3409 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.89573492957235e-06, |
| "loss": 0.0044, |
| "reward": -0.3539, |
| "step": 3419 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.91302911550412e-06, |
| "loss": 0.0031, |
| "reward": -0.3358, |
| "step": 3429 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.928768566950632e-06, |
| "loss": 0.0041, |
| "reward": -0.2885, |
| "step": 3439 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.94294830314191e-06, |
| "loss": 0.0037, |
| "reward": -0.2699, |
| "step": 3449 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.955563836881898e-06, |
| "loss": 0.0053, |
| "reward": -0.317, |
| "step": 3459 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.966611175968454e-06, |
| "loss": 0.0035, |
| "reward": -0.2824, |
| "step": 3469 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.976086824456686e-06, |
| "loss": 0.004, |
| "reward": -0.5046, |
| "step": 3479 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.983987783765243e-06, |
| "loss": 0.0036, |
| "reward": -0.4293, |
| "step": 3489 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.990311553625227e-06, |
| "loss": 0.0034, |
| "reward": -0.1845, |
| "step": 3499 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.995056132871399e-06, |
| "loss": 0.0035, |
| "reward": -0.1239, |
| "step": 3509 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.998220020075455e-06, |
| "loss": 0.0031, |
| "reward": -0.271, |
| "step": 3519 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.999802214021156e-06, |
| "loss": 0.0039, |
| "reward": -0.4751, |
| "step": 3529 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.999802214021156e-06, |
| "loss": 0.0038, |
| "reward": -0.454, |
| "step": 3539 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.998220020075455e-06, |
| "loss": 0.0041, |
| "reward": -0.1722, |
| "step": 3549 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.9950561328714e-06, |
| "loss": 0.0041, |
| "reward": -0.2842, |
| "step": 3559 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.990311553625229e-06, |
| "loss": 0.0042, |
| "reward": -0.3914, |
| "step": 3569 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.983987783765245e-06, |
| "loss": 0.0038, |
| "reward": -0.1993, |
| "step": 3579 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.976086824456686e-06, |
| "loss": 0.0029, |
| "reward": -0.3465, |
| "step": 3589 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.966611175968454e-06, |
| "loss": 0.0034, |
| "reward": -0.209, |
| "step": 3599 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.955563836881898e-06, |
| "loss": 0.0032, |
| "reward": -0.2638, |
| "step": 3609 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.94294830314191e-06, |
| "loss": 0.0032, |
| "reward": -0.2811, |
| "step": 3619 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.928768566950635e-06, |
| "loss": 0.0038, |
| "reward": -0.3794, |
| "step": 3629 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.91302911550412e-06, |
| "loss": 0.0038, |
| "reward": -0.3419, |
| "step": 3639 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.89573492957235e-06, |
| "loss": 0.0032, |
| "reward": -0.31, |
| "step": 3649 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.876891481923067e-06, |
| "loss": 0.0034, |
| "reward": -0.1604, |
| "step": 3659 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.85650473558991e-06, |
| "loss": 0.0035, |
| "reward": -0.1379, |
| "step": 3669 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.834581141985404e-06, |
| "loss": 0.0038, |
| "reward": -0.3531, |
| "step": 3679 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.811127638859398e-06, |
| "loss": 0.0028, |
| "reward": -0.1161, |
| "step": 3689 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.786151648103615e-06, |
| "loss": 0.0038, |
| "reward": -0.3628, |
| "step": 3699 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.759661073402971e-06, |
| "loss": 0.0033, |
| "reward": -0.3243, |
| "step": 3709 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.731664297734458e-06, |
| "loss": 0.0028, |
| "reward": -0.2481, |
| "step": 3719 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.702170180714325e-06, |
| "loss": 0.0038, |
| "reward": -0.3179, |
| "step": 3729 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.671188055794462e-06, |
| "loss": 0.0028, |
| "reward": -0.1725, |
| "step": 3739 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.63872772730879e-06, |
| "loss": 0.003, |
| "reward": -0.1697, |
| "step": 3749 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.604799467370689e-06, |
| "loss": 0.0035, |
| "reward": -0.2575, |
| "step": 3759 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.569414012622358e-06, |
| "loss": 0.0043, |
| "reward": -0.3233, |
| "step": 3769 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.532582560837208e-06, |
| "loss": 0.0031, |
| "reward": -0.0708, |
| "step": 3779 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.494316767376295e-06, |
| "loss": 0.003, |
| "reward": -0.2344, |
| "step": 3789 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.454628741499978e-06, |
| "loss": 0.0036, |
| "reward": -0.0844, |
| "step": 3799 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.413531042535916e-06, |
| "loss": 0.0034, |
| "reward": -0.2369, |
| "step": 3809 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.371036675904671e-06, |
| "loss": 0.0036, |
| "reward": -0.3654, |
| "step": 3819 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.327159089004098e-06, |
| "loss": 0.0036, |
| "reward": -0.2757, |
| "step": 3829 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.281912166953932e-06, |
| "loss": 0.0035, |
| "reward": -0.3181, |
| "step": 3839 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.235310228201784e-06, |
| "loss": 0.0048, |
| "reward": -0.3122, |
| "step": 3849 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.187368019992095e-06, |
| "loss": 0.0033, |
| "reward": -0.2904, |
| "step": 3859 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.138100713699312e-06, |
| "loss": 0.0037, |
| "reward": -0.1489, |
| "step": 3869 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.087523900026905e-06, |
| "loss": 0.0029, |
| "reward": -0.2763, |
| "step": 3879 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.035653584073675e-06, |
| "loss": 0.003, |
| "reward": -0.1622, |
| "step": 3889 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.982506180268893e-06, |
| "loss": 0.0034, |
| "reward": -0.3107, |
| "step": 3899 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.928098507177972e-06, |
| "loss": 0.0033, |
| "reward": -0.1224, |
| "step": 3909 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.872447782180166e-06, |
| "loss": 0.0038, |
| "reward": -0.1105, |
| "step": 3919 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.815571616020149e-06, |
| "loss": 0.0031, |
| "reward": -0.348, |
| "step": 3929 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.757488007235031e-06, |
| "loss": 0.0035, |
| "reward": -0.2409, |
| "step": 3939 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.698215336458744e-06, |
| "loss": 0.0033, |
| "reward": -0.3141, |
| "step": 3949 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.637772360605418e-06, |
| "loss": 0.0034, |
| "reward": -0.313, |
| "step": 3959 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.576178206933775e-06, |
| "loss": 0.0046, |
| "reward": -0.4326, |
| "step": 3969 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.513452366994239e-06, |
| "loss": 0.003, |
| "reward": -0.1198, |
| "step": 3979 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.44961469046086e-06, |
| "loss": 0.0032, |
| "reward": -0.1191, |
| "step": 3989 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.384685378849806e-06, |
| "loss": 0.0041, |
| "reward": -0.3557, |
| "step": 3999 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.318684979126612e-06, |
| "loss": 0.0035, |
| "reward": -0.2939, |
| "step": 4009 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.251634377204026e-06, |
| "loss": 0.0049, |
| "reward": -0.1952, |
| "step": 4019 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.183554791332677e-06, |
| "loss": 0.0029, |
| "reward": -0.1551, |
| "step": 4029 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.114467765386494e-06, |
| "loss": 0.0043, |
| "reward": -0.2539, |
| "step": 4039 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.04439516204514e-06, |
| "loss": 0.0037, |
| "reward": -0.3849, |
| "step": 4049 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 7.973359155875525e-06, |
| "loss": 0.0028, |
| "reward": -0.2593, |
| "step": 4059 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.901382226314662e-06, |
| "loss": 0.0032, |
| "reward": -0.1569, |
| "step": 4069 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.82848715055598e-06, |
| "loss": 0.0035, |
| "reward": -0.4049, |
| "step": 4079 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.75469699634149e-06, |
| "loss": 0.0031, |
| "reward": -0.3131, |
| "step": 4089 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.68003511466193e-06, |
| "loss": 0.0033, |
| "reward": -0.2798, |
| "step": 4099 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.604525132367354e-06, |
| "loss": 0.0034, |
| "reward": -0.2349, |
| "step": 4109 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.528190944690358e-06, |
| "loss": 0.0034, |
| "reward": -0.1358, |
| "step": 4119 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.4510567076843945e-06, |
| "loss": 0.0034, |
| "reward": -0.1241, |
| "step": 4129 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.373146830579598e-06, |
| "loss": 0.0031, |
| "reward": -0.0964, |
| "step": 4139 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.294485968058401e-06, |
| "loss": 0.003, |
| "reward": -0.064, |
| "step": 4149 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.2150990124535726e-06, |
| "loss": 0.0031, |
| "reward": -0.2023, |
| "step": 4159 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.135011085870962e-06, |
| "loss": 0.0035, |
| "reward": -0.3227, |
| "step": 4169 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.054247532239637e-06, |
| "loss": 0.003, |
| "reward": -0.1747, |
| "step": 4179 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 6.9728339092916915e-06, |
| "loss": 0.003, |
| "reward": -0.1391, |
| "step": 4189 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 6.890795980474532e-06, |
| "loss": 0.0029, |
| "reward": -0.1152, |
| "step": 4199 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.808159706797946e-06, |
| "loss": 0.0027, |
| "reward": -0.225, |
| "step": 4209 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.72495123861877e-06, |
| "loss": 0.0035, |
| "reward": -0.107, |
| "step": 4219 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.6411969073655014e-06, |
| "loss": 0.0037, |
| "reward": -0.213, |
| "step": 4229 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.55692321720575e-06, |
| "loss": 0.0029, |
| "reward": -0.2639, |
| "step": 4239 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.472156836658903e-06, |
| "loss": 0.0034, |
| "reward": -0.1981, |
| "step": 4249 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.386924590156898e-06, |
| "loss": 0.0035, |
| "reward": -0.1908, |
| "step": 4259 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.301253449555531e-06, |
| "loss": 0.0028, |
| "reward": -0.2391, |
| "step": 4269 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.215170525599231e-06, |
| "loss": 0.0032, |
| "reward": -0.2446, |
| "step": 4279 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.128703059341789e-06, |
| "loss": 0.0032, |
| "reward": -0.2266, |
| "step": 4289 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.041878413525939e-06, |
| "loss": 0.0032, |
| "reward": -0.1362, |
| "step": 4299 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 5.9547240639243184e-06, |
| "loss": 0.0035, |
| "reward": -0.2071, |
| "step": 4309 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 5.867267590644787e-06, |
| "loss": 0.0035, |
| "reward": -0.2041, |
| "step": 4319 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 5.779536669402611e-06, |
| "loss": 0.0029, |
| "reward": -0.2125, |
| "step": 4329 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 5.6915590627625005e-06, |
| "loss": 0.0033, |
| "reward": -0.3047, |
| "step": 4339 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 5.60336261135307e-06, |
| "loss": 0.0028, |
| "reward": -0.2644, |
| "step": 4349 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 5.514975225056633e-06, |
| "loss": 0.0032, |
| "reward": -0.2142, |
| "step": 4359 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 5.4264248741771295e-06, |
| "loss": 0.0036, |
| "reward": -0.2406, |
| "step": 4369 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 5.337739580588822e-06, |
| "loss": 0.0035, |
| "reward": -0.4047, |
| "step": 4379 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 5.248947408868794e-06, |
| "loss": 0.0034, |
| "reward": -0.1619, |
| "step": 4389 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 5.16007645741583e-06, |
| "loss": 0.0028, |
| "reward": -0.1479, |
| "step": 4399 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 5.071154849558695e-06, |
| "loss": 0.0034, |
| "reward": -0.1027, |
| "step": 4409 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.982210724656409e-06, |
| "loss": 0.0034, |
| "reward": -0.2996, |
| "step": 4419 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.893272229193561e-06, |
| "loss": 0.0028, |
| "reward": -0.0919, |
| "step": 4429 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.804367507873277e-06, |
| "loss": 0.003, |
| "reward": -0.1089, |
| "step": 4439 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.715524694710839e-06, |
| "loss": 0.0031, |
| "reward": -0.1168, |
| "step": 4449 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.626771904130584e-06, |
| "loss": 0.0034, |
| "reward": -0.3128, |
| "step": 4459 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.538137222069105e-06, |
| "loss": 0.0041, |
| "reward": -0.1848, |
| "step": 4469 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.449648697087378e-06, |
| "loss": 0.0032, |
| "reward": -0.1865, |
| "step": 4479 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.361334331494812e-06, |
| "loss": 0.0027, |
| "reward": -0.1697, |
| "step": 4489 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.2732220724878194e-06, |
| "loss": 0.0035, |
| "reward": -0.1667, |
| "step": 4499 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.185339803305934e-06, |
| "loss": 0.0038, |
| "reward": -0.3111, |
| "step": 4509 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.097715334408112e-06, |
| "loss": 0.003, |
| "reward": -0.2045, |
| "step": 4519 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.010376394672062e-06, |
| "loss": 0.0031, |
| "reward": -0.152, |
| "step": 4529 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.9233506226194126e-06, |
| "loss": 0.003, |
| "reward": -0.2064, |
| "step": 4539 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.836665557669496e-06, |
| "loss": 0.0034, |
| "reward": -0.2407, |
| "step": 4549 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.750348631424402e-06, |
| "loss": 0.0029, |
| "reward": -0.1174, |
| "step": 4559 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.66442715898827e-06, |
| "loss": 0.003, |
| "reward": -0.2606, |
| "step": 4569 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.578928330323367e-06, |
| "loss": 0.003, |
| "reward": -0.1689, |
| "step": 4579 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.493879201645759e-06, |
| "loss": 0.0028, |
| "reward": -0.2151, |
| "step": 4589 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.409306686863399e-06, |
| "loss": 0.0029, |
| "reward": -0.1513, |
| "step": 4599 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.3252375490591217e-06, |
| "loss": 0.0026, |
| "reward": -0.0821, |
| "step": 4609 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.24169839202147e-06, |
| "loss": 0.0034, |
| "reward": -0.2608, |
| "step": 4619 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.158715651825871e-06, |
| "loss": 0.0028, |
| "reward": -0.132, |
| "step": 4629 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.076315588468941e-06, |
| "loss": 0.0028, |
| "reward": -0.0714, |
| "step": 4639 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.9945242775584143e-06, |
| "loss": 0.0032, |
| "reward": -0.2108, |
| "step": 4649 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.913367602061552e-06, |
| "loss": 0.0027, |
| "reward": -0.0474, |
| "step": 4659 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.832871244114375e-06, |
| "loss": 0.0033, |
| "reward": -0.287, |
| "step": 4669 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.753060676894588e-06, |
| "loss": 0.0029, |
| "reward": -0.2081, |
| "step": 4679 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.6739611565604947e-06, |
| "loss": 0.0036, |
| "reward": -0.1504, |
| "step": 4689 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.5955977142586946e-06, |
| "loss": 0.0033, |
| "reward": -0.2813, |
| "step": 4699 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.5179951482029225e-06, |
| "loss": 0.0029, |
| "reward": -0.3317, |
| "step": 4709 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.4411780158266533e-06, |
| "loss": 0.0029, |
| "reward": -0.1715, |
| "step": 4719 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.3651706260118184e-06, |
| "loss": 0.0029, |
| "reward": -0.1342, |
| "step": 4729 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.289997031396286e-06, |
| "loss": 0.0032, |
| "reward": -0.0324, |
| "step": 4739 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.215681020762313e-06, |
| "loss": 0.0032, |
| "reward": -0.1613, |
| "step": 4749 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.1422461115086167e-06, |
| "loss": 0.0027, |
| "reward": -0.2861, |
| "step": 4759 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.069715542208207e-06, |
| "loss": 0.0026, |
| "reward": -0.181, |
| "step": 4769 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.998112265254541e-06, |
| "loss": 0.0028, |
| "reward": -0.1974, |
| "step": 4779 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.9274589395981937e-06, |
| "loss": 0.0036, |
| "reward": -0.2612, |
| "step": 4789 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.8577779235764249e-06, |
| "loss": 0.0031, |
| "reward": -0.1949, |
| "step": 4799 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.7890912678378392e-06, |
| "loss": 0.0033, |
| "reward": -0.1398, |
| "step": 4809 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.7214207083644098e-06, |
| "loss": 0.0032, |
| "reward": -0.1068, |
| "step": 4819 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 1.6547876595931444e-06, |
| "loss": 0.0028, |
| "reward": -0.2427, |
| "step": 4829 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.5892132076394151e-06, |
| "loss": 0.0031, |
| "reward": -0.0524, |
| "step": 4839 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.524718103624252e-06, |
| "loss": 0.0029, |
| "reward": -0.1146, |
| "step": 4849 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.4613227571076138e-06, |
| "loss": 0.0029, |
| "reward": -0.1041, |
| "step": 4859 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.3990472296297808e-06, |
| "loss": 0.0025, |
| "reward": -0.2616, |
| "step": 4869 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.3379112283628081e-06, |
| "loss": 0.0029, |
| "reward": -0.187, |
| "step": 4879 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.2779340998742185e-06, |
| "loss": 0.0032, |
| "reward": -0.1955, |
| "step": 4889 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.219134824004704e-06, |
| "loss": 0.0028, |
| "reward": -0.106, |
| "step": 4899 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.16153200786198e-06, |
| "loss": 0.0032, |
| "reward": -0.2001, |
| "step": 4909 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.1051438799324999e-06, |
| "loss": 0.0029, |
| "reward": -0.1302, |
| "step": 4919 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.0499882843130487e-06, |
| "loss": 0.0032, |
| "reward": -0.2308, |
| "step": 4929 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.96082675063948e-07, |
| "loss": 0.0028, |
| "reward": -0.2366, |
| "step": 4939 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.43444110685714e-07, |
| "loss": 0.003, |
| "reward": -0.1516, |
| "step": 4949 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 8.920892487208343e-07, |
| "loss": 0.0034, |
| "reward": 0.0169, |
| "step": 4959 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 8.420343404825132e-07, |
| "loss": 0.0026, |
| "reward": -0.0979, |
| "step": 4969 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 7.932952259118776e-07, |
| "loss": 0.003, |
| "reward": -0.1379, |
| "step": 4979 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 7.458873285654489e-07, |
| "loss": 0.0035, |
| "reward": -0.0396, |
| "step": 4989 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 6.998256507343016e-07, |
| "loss": 0.0027, |
| "reward": 0.0041, |
| "step": 4999 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 6.551247686965872e-07, |
| "loss": 0.0034, |
| "reward": -0.1662, |
| "step": 5009 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 6.117988281048626e-07, |
| "loss": 0.0032, |
| "reward": -0.1413, |
| "step": 5019 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.698615395096485e-07, |
| "loss": 0.0029, |
| "reward": -0.2091, |
| "step": 5029 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 5.293261740207456e-07, |
| "loss": 0.0028, |
| "reward": -0.2433, |
| "step": 5039 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.902055591075355e-07, |
| "loss": 0.0031, |
| "reward": -0.2965, |
| "step": 5049 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 4.525120745397493e-07, |
| "loss": 0.0029, |
| "reward": -0.1276, |
| "step": 5059 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 4.1625764846984276e-07, |
| "loss": 0.0031, |
| "reward": -0.1481, |
| "step": 5069 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.814537536583318e-07, |
| "loss": 0.0028, |
| "reward": -0.1388, |
| "step": 5079 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.481114038432176e-07, |
| "loss": 0.003, |
| "reward": -0.2624, |
| "step": 5089 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.1624115025468695e-07, |
| "loss": 0.0029, |
| "reward": -0.0321, |
| "step": 5099 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.8585307827613764e-07, |
| "loss": 0.0029, |
| "reward": -0.2161, |
| "step": 5109 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.569568042526721e-07, |
| "loss": 0.0035, |
| "reward": -0.2493, |
| "step": 5119 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.2956147244796946e-07, |
| "loss": 0.0031, |
| "reward": -0.1123, |
| "step": 5129 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.0367575215059222e-07, |
| "loss": 0.0031, |
| "reward": -0.1892, |
| "step": 5139 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7930783493055936e-07, |
| "loss": 0.0031, |
| "reward": -0.3653, |
| "step": 5149 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.5646543204712595e-07, |
| "loss": 0.003, |
| "reward": -0.1532, |
| "step": 5159 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.3515577200853946e-07, |
| "loss": 0.0031, |
| "reward": -0.2271, |
| "step": 5169 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.1538559828457586e-07, |
| "loss": 0.0033, |
| "reward": -0.1938, |
| "step": 5179 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.716116717254698e-08, |
| "loss": 0.003, |
| "reward": -0.2875, |
| "step": 5189 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 8.048824581750325e-08, |
| "loss": 0.0032, |
| "reward": -0.1349, |
| "step": 5199 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 6.537211038719571e-08, |
| "loss": 0.0027, |
| "reward": -0.1755, |
| "step": 5209 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 5.1817544402442686e-08, |
| "loss": 0.0033, |
| "reward": -0.1694, |
| "step": 5219 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.9828837223365166e-08, |
| "loss": 0.0027, |
| "reward": -0.1913, |
| "step": 5229 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.9409782692019218e-08, |
| "loss": 0.0032, |
| "reward": -0.2301, |
| "step": 5239 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.056367793183134e-08, |
| "loss": 0.0032, |
| "reward": -0.3049, |
| "step": 5249 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.3293322304213652e-08, |
| "loss": 0.0034, |
| "reward": -0.2132, |
| "step": 5259 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 7.601016522708616e-09, |
| "loss": 0.0031, |
| "reward": -0.2705, |
| "step": 5269 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.4885619249203086e-09, |
| "loss": 0.0031, |
| "reward": -0.1178, |
| "step": 5279 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.572599024820773e-10, |
| "loss": 0.0031, |
| "reward": -0.1919, |
| "step": 5289 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 7.911489227074853e-12, |
| "loss": 0.0029, |
| "reward": -0.0784, |
| "step": 5299 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 6.408171077015856e-10, |
| "loss": 0.0031, |
| "reward": -0.1285, |
| "step": 5309 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.8557764740955172e-09, |
| "loss": 0.0031, |
| "reward": -0.1184, |
| "step": 5319 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 6.652088661621703e-09, |
| "loss": 0.003, |
| "reward": -0.2125, |
| "step": 5329 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.2028552322327358e-08, |
| "loss": 0.004, |
| "reward": -0.202, |
| "step": 5339 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.898346606725887e-08, |
| "loss": 0.0032, |
| "reward": -0.206, |
| "step": 5349 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.7514629004871673e-08, |
| "loss": 0.0033, |
| "reward": -0.2027, |
| "step": 5359 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.761934143750256e-08, |
| "loss": 0.0044, |
| "reward": -0.2893, |
| "step": 5369 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.9294405715696324e-08, |
| "loss": 0.0032, |
| "reward": -0.1786, |
| "step": 5379 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 6.253612725009962e-08, |
| "loss": 0.0033, |
| "reward": -0.26, |
| "step": 5389 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 7.734031568062683e-08, |
| "loss": 0.0024, |
| "reward": -0.2568, |
| "step": 5399 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.370228620249778e-08, |
| "loss": 0.0037, |
| "reward": -0.1439, |
| "step": 5409 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.116168610487578e-07, |
| "loss": 0.0028, |
| "reward": -0.2807, |
| "step": 5419 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.3107837112877664e-07, |
| "loss": 0.0032, |
| "reward": -0.2143, |
| "step": 5429 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.5208065782225667e-07, |
| "loss": 0.0031, |
| "reward": -0.2846, |
| "step": 5439 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7461707492811786e-07, |
| "loss": 0.0032, |
| "reward": -0.2704, |
| "step": 5449 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.9868049076771478e-07, |
| "loss": 0.0035, |
| "reward": -0.2247, |
| "step": 5459 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.2426329044164808e-07, |
| "loss": 0.0032, |
| "reward": -0.1316, |
| "step": 5469 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.5135737823952457e-07, |
| "loss": 0.0033, |
| "reward": -0.1427, |
| "step": 5479 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.7995418020185016e-07, |
| "loss": 0.0029, |
| "reward": -0.2073, |
| "step": 5489 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.100446468332596e-07, |
| "loss": 0.0026, |
| "reward": -0.1949, |
| "step": 5499 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.41619255966264e-07, |
| "loss": 0.0028, |
| "reward": -0.2821, |
| "step": 5509 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.746680157745258e-07, |
| "loss": 0.0029, |
| "reward": -0.1413, |
| "step": 5519 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.091804679348144e-07, |
| "loss": 0.0032, |
| "reward": -0.2282, |
| "step": 5529 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.451456909365265e-07, |
| "loss": 0.003, |
| "reward": -0.1473, |
| "step": 5539 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.825523035378365e-07, |
| "loss": 0.0032, |
| "reward": -0.1949, |
| "step": 5549 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 5.213884683672954e-07, |
| "loss": 0.0041, |
| "reward": -0.0118, |
| "step": 5559 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 5.61641895669785e-07, |
| "loss": 0.0031, |
| "reward": -0.1958, |
| "step": 5569 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 6.03299847195613e-07, |
| "loss": 0.003, |
| "reward": -0.0907, |
| "step": 5579 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 6.46349140231578e-07, |
| "loss": 0.0032, |
| "reward": -0.1633, |
| "step": 5589 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 6.907761517726225e-07, |
| "loss": 0.0037, |
| "reward": -0.3335, |
| "step": 5599 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 7.365668228328832e-07, |
| "loss": 0.0032, |
| "reward": -0.2729, |
| "step": 5609 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 7.837066628946427e-07, |
| "loss": 0.004, |
| "reward": -0.2201, |
| "step": 5619 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 8.321807544939037e-07, |
| "loss": 0.0035, |
| "reward": -0.1243, |
| "step": 5629 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 8.819737579410198e-07, |
| "loss": 0.003, |
| "reward": -0.1845, |
| "step": 5639 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.33069916174974e-07, |
| "loss": 0.0032, |
| "reward": -0.2037, |
| "step": 5649 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.854530597496953e-07, |
| "loss": 0.0031, |
| "reward": -0.0922, |
| "step": 5659 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.039106611950943e-06, |
| "loss": 0.0025, |
| "reward": -0.2618, |
| "step": 5669 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.0940135940419828e-06, |
| "loss": 0.003, |
| "reward": -0.1229, |
| "step": 5679 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.1501566306365758e-06, |
| "loss": 0.0029, |
| "reward": -0.0647, |
| "step": 5689 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.2075179551973986e-06, |
| "loss": 0.003, |
| "reward": -0.1913, |
| "step": 5699 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.2660794156583233e-06, |
| "loss": 0.0028, |
| "reward": -0.1541, |
| "step": 5709 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.3258224801686503e-06, |
| "loss": 0.0033, |
| "reward": -0.1554, |
| "step": 5719 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.3867282429575152e-06, |
| "loss": 0.0041, |
| "reward": -0.3137, |
| "step": 5729 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.4487774303166875e-06, |
| "loss": 0.0033, |
| "reward": -0.2465, |
| "step": 5739 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.511950406699712e-06, |
| "loss": 0.0038, |
| "reward": -0.2697, |
| "step": 5749 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.576227180935655e-06, |
| "loss": 0.0032, |
| "reward": -0.155, |
| "step": 5759 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.641587412555281e-06, |
| "loss": 0.0026, |
| "reward": -0.128, |
| "step": 5769 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7080104182278716e-06, |
| "loss": 0.0036, |
| "reward": -0.1322, |
| "step": 5779 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7754751783064622e-06, |
| "loss": 0.0036, |
| "reward": -0.2405, |
| "step": 5789 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.8439603434795516e-06, |
| "loss": 0.0032, |
| "reward": -0.1742, |
| "step": 5799 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.9134442415270926e-06, |
| "loss": 0.003, |
| "reward": -0.1415, |
| "step": 5809 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.983904884178742e-06, |
| "loss": 0.0027, |
| "reward": -0.3485, |
| "step": 5819 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.0553199740720214e-06, |
| "loss": 0.0033, |
| "reward": -0.2301, |
| "step": 5829 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.1276669118084043e-06, |
| "loss": 0.0038, |
| "reward": -0.419, |
| "step": 5839 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.200922803104868e-06, |
| "loss": 0.0028, |
| "reward": -0.1832, |
| "step": 5849 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.2750644660388675e-06, |
| "loss": 0.0028, |
| "reward": -0.1299, |
| "step": 5859 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.3500684383842605e-06, |
| "loss": 0.0027, |
| "reward": -0.0986, |
| "step": 5869 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.4259109850359695e-06, |
| "loss": 0.0027, |
| "reward": -0.1309, |
| "step": 5879 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.502568105520961e-06, |
| "loss": 0.0034, |
| "reward": -0.1672, |
| "step": 5889 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.580015541593277e-06, |
| "loss": 0.0031, |
| "reward": -0.1751, |
| "step": 5899 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.6582287849105274e-06, |
| "loss": 0.0031, |
| "reward": -0.2151, |
| "step": 5909 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.737183084789651e-06, |
| "loss": 0.0029, |
| "reward": -0.1455, |
| "step": 5919 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.816853456039244e-06, |
| "loss": 0.0026, |
| "reward": -0.1059, |
| "step": 5929 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.8972146868661828e-06, |
| "loss": 0.0028, |
| "reward": -0.1925, |
| "step": 5939 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 2.9782413468539417e-06, |
| "loss": 0.0028, |
| "reward": -0.3239, |
| "step": 5949 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.0599077950100608e-06, |
| "loss": 0.0039, |
| "reward": -0.2807, |
| "step": 5959 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.1421881878802864e-06, |
| "loss": 0.0028, |
| "reward": -0.2573, |
| "step": 5969 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.225056487726732e-06, |
| "loss": 0.003, |
| "reward": -0.2115, |
| "step": 5979 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.308486470767613e-06, |
| "loss": 0.0028, |
| "reward": -0.1847, |
| "step": 5989 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.3924517354757315e-06, |
| "loss": 0.0027, |
| "reward": -0.1512, |
| "step": 5999 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.476925710933339e-06, |
| "loss": 0.0031, |
| "reward": -0.0941, |
| "step": 6009 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.5618816652404714e-06, |
| "loss": 0.0033, |
| "reward": -0.2708, |
| "step": 6019 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.6472927139743546e-06, |
| "loss": 0.0024, |
| "reward": -0.1351, |
| "step": 6029 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.7331318286969787e-06, |
| "loss": 0.003, |
| "reward": -0.2288, |
| "step": 6039 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.819371845508301e-06, |
| "loss": 0.0025, |
| "reward": -0.1268, |
| "step": 6049 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.9059854736422616e-06, |
| "loss": 0.0035, |
| "reward": -0.1775, |
| "step": 6059 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.992945304103046e-06, |
| "loss": 0.0029, |
| "reward": -0.1164, |
| "step": 6069 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.080223818338627e-06, |
| "loss": 0.003, |
| "reward": -0.3306, |
| "step": 6079 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.16779339694911e-06, |
| "loss": 0.0029, |
| "reward": -0.124, |
| "step": 6089 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.25562632842687e-06, |
| "loss": 0.0029, |
| "reward": -0.1823, |
| "step": 6099 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.343694817925945e-06, |
| "loss": 0.0026, |
| "reward": -0.2144, |
| "step": 6109 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.4319709960577205e-06, |
| "loss": 0.0028, |
| "reward": -0.0664, |
| "step": 6119 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.520426927710255e-06, |
| "loss": 0.0027, |
| "reward": -0.1177, |
| "step": 6129 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.609034620888349e-06, |
| "loss": 0.0034, |
| "reward": -0.135, |
| "step": 6139 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.697766035571671e-06, |
| "loss": 0.0043, |
| "reward": -0.3131, |
| "step": 6149 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.786593092588086e-06, |
| "loss": 0.0024, |
| "reward": -0.234, |
| "step": 6159 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.875487682499278e-06, |
| "loss": 0.0027, |
| "reward": -0.0573, |
| "step": 6169 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.96442167449609e-06, |
| "loss": 0.0034, |
| "reward": -0.1771, |
| "step": 6179 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5.053366925300511e-06, |
| "loss": 0.0028, |
| "reward": -0.1481, |
| "step": 6189 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5.142295288071675e-06, |
| "loss": 0.0041, |
| "reward": -0.2635, |
| "step": 6199 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5.2311786213129315e-06, |
| "loss": 0.003, |
| "reward": -0.1914, |
| "step": 6209 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5.319988797777316e-06, |
| "loss": 0.0027, |
| "reward": -0.181, |
| "step": 6219 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5.408697713368388e-06, |
| "loss": 0.0028, |
| "reward": -0.1659, |
| "step": 6229 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5.4972772960338784e-06, |
| "loss": 0.0024, |
| "reward": -0.0837, |
| "step": 6239 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 5.585699514649057e-06, |
| "loss": 0.003, |
| "reward": -0.1156, |
| "step": 6249 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.6739363878872754e-06, |
| "loss": 0.003, |
| "reward": -0.1918, |
| "step": 6259 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.761959993074669e-06, |
| "loss": 0.0027, |
| "reward": -0.2119, |
| "step": 6269 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.84974247502633e-06, |
| "loss": 0.0031, |
| "reward": -0.1654, |
| "step": 6279 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.93725605486109e-06, |
| "loss": 0.0029, |
| "reward": -0.1048, |
| "step": 6289 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.024473038792242e-06, |
| "loss": 0.0029, |
| "reward": -0.1743, |
| "step": 6299 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.1113658268912065e-06, |
| "loss": 0.0028, |
| "reward": -0.1883, |
| "step": 6309 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.197906921821632e-06, |
| "loss": 0.0027, |
| "reward": -0.1351, |
| "step": 6319 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.284068937540894e-06, |
| "loss": 0.0027, |
| "reward": -0.1687, |
| "step": 6329 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.369824607966508e-06, |
| "loss": 0.0028, |
| "reward": -0.1027, |
| "step": 6339 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.455146795604479e-06, |
| "loss": 0.0028, |
| "reward": -0.0773, |
| "step": 6349 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.5400085001370186e-06, |
| "loss": 0.0025, |
| "reward": -0.2412, |
| "step": 6359 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.624382866966794e-06, |
| "loss": 0.003, |
| "reward": -0.1633, |
| "step": 6369 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.708243195715136e-06, |
| "loss": 0.0026, |
| "reward": -0.2322, |
| "step": 6379 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 6.791562948671414e-06, |
| "loss": 0.0032, |
| "reward": -0.1261, |
| "step": 6389 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.874315759190886e-06, |
| "loss": 0.0025, |
| "reward": -0.1441, |
| "step": 6399 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.956475440038507e-06, |
| "loss": 0.0031, |
| "reward": -0.0489, |
| "step": 6409 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.038015991675885e-06, |
| "loss": 0.0028, |
| "reward": -0.1033, |
| "step": 6419 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.118911610488885e-06, |
| "loss": 0.0027, |
| "reward": -0.2106, |
| "step": 6429 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.199136696953178e-06, |
| "loss": 0.0036, |
| "reward": -0.0478, |
| "step": 6439 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.2786658637353125e-06, |
| "loss": 0.003, |
| "reward": -0.218, |
| "step": 6449 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.357473943726493e-06, |
| "loss": 0.0026, |
| "reward": -0.0883, |
| "step": 6459 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.435535998006814e-06, |
| "loss": 0.0034, |
| "reward": -0.2207, |
| "step": 6469 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.512827323737152e-06, |
| "loss": 0.0033, |
| "reward": -0.192, |
| "step": 6479 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.589323461976461e-06, |
| "loss": 0.0028, |
| "reward": -0.1123, |
| "step": 6489 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.665000205421812e-06, |
| "loss": 0.0029, |
| "reward": -0.2671, |
| "step": 6499 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.739833606068832e-06, |
| "loss": 0.0031, |
| "reward": -0.1165, |
| "step": 6509 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.81379998279006e-06, |
| "loss": 0.0029, |
| "reward": -0.1378, |
| "step": 6519 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 7.88687592882892e-06, |
| "loss": 0.0033, |
| "reward": -0.1337, |
| "step": 6529 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 7.959038319206758e-06, |
| "loss": 0.0027, |
| "reward": -0.2007, |
| "step": 6539 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 8.03026431804083e-06, |
| "loss": 0.0026, |
| "reward": -0.1243, |
| "step": 6549 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 8.100531385770696e-06, |
| "loss": 0.0029, |
| "reward": -0.0918, |
| "step": 6559 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 8.169817286290935e-06, |
| "loss": 0.0029, |
| "reward": -0.0355, |
| "step": 6569 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 8.238100093987765e-06, |
| "loss": 0.0027, |
| "reward": -0.2479, |
| "step": 6579 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 8.305358200677449e-06, |
| "loss": 0.0026, |
| "reward": -0.074, |
| "step": 6589 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 8.371570322444209e-06, |
| "loss": 0.0029, |
| "reward": -0.0884, |
| "step": 6599 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.436715506375557e-06, |
| "loss": 0.0028, |
| "reward": -0.1674, |
| "step": 6609 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.500773137192906e-06, |
| "loss": 0.0035, |
| "reward": -0.2794, |
| "step": 6619 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.56372294377524e-06, |
| "loss": 0.0027, |
| "reward": -0.1072, |
| "step": 6629 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.625545005574002e-06, |
| "loss": 0.0023, |
| "reward": -0.0992, |
| "step": 6639 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.686219758916918e-06, |
| "loss": 0.0029, |
| "reward": -0.1256, |
| "step": 6649 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.745728003198991e-06, |
| "loss": 0.0039, |
| "reward": 0.0413, |
| "step": 6659 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 8.804050906958537e-06, |
| "loss": 0.0026, |
| "reward": -0.1593, |
| "step": 6669 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 8.861170013836436e-06, |
| "loss": 0.0031, |
| "reward": -0.0352, |
| "step": 6679 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 8.917067248416647e-06, |
| "loss": 0.003, |
| "reward": -0.0722, |
| "step": 6689 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 8.971724921946235e-06, |
| "loss": 0.0027, |
| "reward": -0.1898, |
| "step": 6699 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.025125737932962e-06, |
| "loss": 0.003, |
| "reward": -0.1166, |
| "step": 6709 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.077252797618818e-06, |
| "loss": 0.0024, |
| "reward": 0.0003, |
| "step": 6719 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.128089605327627e-06, |
| "loss": 0.0028, |
| "reward": -0.0084, |
| "step": 6729 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.177620073685139e-06, |
| "loss": 0.0025, |
| "reward": 0.0006, |
| "step": 6739 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.225828528709911e-06, |
| "loss": 0.0025, |
| "reward": -0.0565, |
| "step": 6749 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.27269971477334e-06, |
| "loss": 0.0027, |
| "reward": -0.2262, |
| "step": 6759 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.318218799427321e-06, |
| "loss": 0.0024, |
| "reward": -0.2918, |
| "step": 6769 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.362371378098033e-06, |
| "loss": 0.0027, |
| "reward": -0.1157, |
| "step": 6779 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.405143478644232e-06, |
| "loss": 0.0031, |
| "reward": -0.1187, |
| "step": 6789 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.446521565778804e-06, |
| "loss": 0.0028, |
| "reward": -0.0971, |
| "step": 6799 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.486492545351985e-06, |
| "loss": 0.0029, |
| "reward": -0.0164, |
| "step": 6809 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.525043768495047e-06, |
| "loss": 0.0029, |
| "reward": -0.2, |
| "step": 6819 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.562163035623038e-06, |
| "loss": 0.0022, |
| "reward": 0.0569, |
| "step": 6829 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.597838600295355e-06, |
| "loss": 0.0023, |
| "reward": -0.163, |
| "step": 6839 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.632059172932935e-06, |
| "loss": 0.0027, |
| "reward": -0.1407, |
| "step": 6849 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.664813924390828e-06, |
| "loss": 0.0026, |
| "reward": -0.1893, |
| "step": 6859 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.696092489385132e-06, |
| "loss": 0.0027, |
| "reward": -0.0518, |
| "step": 6869 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.72588496977306e-06, |
| "loss": 0.0026, |
| "reward": -0.1236, |
| "step": 6879 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.754181937685261e-06, |
| "loss": 0.0026, |
| "reward": -0.2622, |
| "step": 6889 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.780974438509254e-06, |
| "loss": 0.0025, |
| "reward": -0.1576, |
| "step": 6899 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.80625399372313e-06, |
| "loss": 0.0036, |
| "reward": -0.0548, |
| "step": 6909 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.8300126035786e-06, |
| "loss": 0.0037, |
| "reward": -0.219, |
| "step": 6919 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.852242749632524e-06, |
| "loss": 0.0028, |
| "reward": -0.0597, |
| "step": 6929 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.872937397126121e-06, |
| "loss": 0.0024, |
| "reward": -0.133, |
| "step": 6939 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.89208999721115e-06, |
| "loss": 0.0027, |
| "reward": -0.0708, |
| "step": 6949 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.909694489022273e-06, |
| "loss": 0.0026, |
| "reward": -0.0812, |
| "step": 6959 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.92574530159505e-06, |
| "loss": 0.0031, |
| "reward": 0.0002, |
| "step": 6969 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.940237355628861e-06, |
| "loss": 0.003, |
| "reward": 0.0289, |
| "step": 6979 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.95316606509427e-06, |
| "loss": 0.0029, |
| "reward": -0.0353, |
| "step": 6989 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.964527338684262e-06, |
| "loss": 0.0023, |
| "reward": -0.0618, |
| "step": 6999 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.974317581108963e-06, |
| "loss": 0.003, |
| "reward": -0.1512, |
| "step": 7009 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.98253369423336e-06, |
| "loss": 0.0032, |
| "reward": 0.0394, |
| "step": 7019 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.989173078057715e-06, |
| "loss": 0.0027, |
| "reward": -0.081, |
| "step": 7029 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.99423363154034e-06, |
| "loss": 0.0028, |
| "reward": -0.0309, |
| "step": 7039 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.99771375326247e-06, |
| "loss": 0.0025, |
| "reward": -0.1167, |
| "step": 7049 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.999612341935039e-06, |
| "loss": 0.0024, |
| "reward": -0.0893, |
| "step": 7059 |
| } |
| ], |
| "max_steps": 7063, |
| "num_train_epochs": 1.0, |
| "total_flos": 0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|