{ "best_metric": null, "best_model_checkpoint": null, "epoch": null, "global_step": 0, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.999359182892298e-06, "loss": 0.0058, "reward": -0.5258, "step": 9 }, { "epoch": 0.0, "learning_rate": 9.997144223525905e-06, "loss": 0.0053, "reward": -0.5235, "step": 19 }, { "epoch": 0.0, "learning_rate": 9.993347911338379e-06, "loss": 0.0063, "reward": -0.5422, "step": 29 }, { "epoch": 0.01, "learning_rate": 9.987971447677674e-06, "loss": 0.0052, "reward": -0.5766, "step": 39 }, { "epoch": 0.01, "learning_rate": 9.981016533932741e-06, "loss": 0.0042, "reward": -0.3694, "step": 49 }, { "epoch": 0.01, "learning_rate": 9.972485370995129e-06, "loss": 0.004, "reward": -0.346, "step": 59 }, { "epoch": 0.01, "learning_rate": 9.962380658562499e-06, "loss": 0.0047, "reward": -0.4197, "step": 69 }, { "epoch": 0.01, "learning_rate": 9.950705594284304e-06, "loss": 0.0043, "reward": -0.1848, "step": 79 }, { "epoch": 0.01, "learning_rate": 9.937463872749902e-06, "loss": 0.0046, "reward": -0.4406, "step": 89 }, { "epoch": 0.01, "learning_rate": 9.922659684319374e-06, "loss": 0.0044, "reward": -0.4997, "step": 99 }, { "epoch": 0.02, "learning_rate": 9.906297713797501e-06, "loss": 0.0048, "reward": -0.4274, "step": 109 }, { "epoch": 0.02, "learning_rate": 9.888383138951242e-06, "loss": 0.0046, "reward": -0.3451, "step": 119 }, { "epoch": 0.02, "learning_rate": 9.868921628871223e-06, "loss": 0.0051, "reward": -0.4131, "step": 129 }, { "epoch": 0.02, "learning_rate": 9.847919342177744e-06, "loss": 0.004, "reward": -0.2998, "step": 139 }, { "epoch": 0.02, "learning_rate": 9.825382925071883e-06, "loss": 0.0045, "reward": -0.4068, "step": 149 }, { "epoch": 0.02, "learning_rate": 9.801319509232287e-06, "loss": 0.0051, "reward": -0.4744, "step": 159 }, { "epoch": 0.02, "learning_rate": 9.775736709558352e-06, "loss": 0.0054, "reward": -0.5511, "step": 169 }, { "epoch": 0.03, "learning_rate": 9.748642621760474e-06, "loss": 0.0055, "reward": -0.2605, "step": 179 }, { "epoch": 0.03, "learning_rate": 9.720045819798151e-06, "loss": 0.0043, "reward": -0.4817, "step": 189 }, { "epoch": 0.03, "learning_rate": 9.68995535316674e-06, "loss": 0.0061, "reward": -0.3717, "step": 199 }, { "epoch": 0.03, "learning_rate": 9.658380744033737e-06, "loss": 0.0041, "reward": -0.3632, "step": 209 }, { "epoch": 0.03, "learning_rate": 9.625331984225475e-06, "loss": 0.0039, "reward": -0.4044, "step": 219 }, { "epoch": 0.03, "learning_rate": 9.590819532065188e-06, "loss": 0.0049, "reward": -0.3763, "step": 229 }, { "epoch": 0.03, "learning_rate": 9.554854309063474e-06, "loss": 0.0053, "reward": -0.3172, "step": 239 }, { "epoch": 0.04, "learning_rate": 9.517447696462163e-06, "loss": 0.0047, "reward": -0.5815, "step": 249 }, { "epoch": 0.04, "learning_rate": 9.478611531632702e-06, "loss": 0.0048, "reward": -0.35, "step": 259 }, { "epoch": 0.04, "learning_rate": 9.438358104330215e-06, "loss": 0.0054, "reward": -0.4544, "step": 269 }, { "epoch": 0.04, "learning_rate": 9.396700152804386e-06, "loss": 0.0039, "reward": -0.3515, "step": 279 }, { "epoch": 0.04, "learning_rate": 9.353650859768423e-06, "loss": 0.0052, "reward": -0.3799, "step": 289 }, { "epoch": 0.04, "learning_rate": 9.309223848227377e-06, "loss": 0.0048, "reward": -0.5378, "step": 299 }, { "epoch": 0.04, "learning_rate": 9.26343317716712e-06, "loss": 0.0057, "reward": -0.4758, "step": 309 }, { "epoch": 0.05, "learning_rate": 9.216293337105358e-06, "loss": 0.0044, "reward": -0.5203, "step": 319 }, { "epoch": 0.05, "learning_rate": 9.167819245506095e-06, "loss": 0.0054, "reward": -0.5237, "step": 329 }, { "epoch": 0.05, "learning_rate": 9.118026242058976e-06, "loss": 0.0058, "reward": -0.2535, "step": 339 }, { "epoch": 0.05, "learning_rate": 9.066930083825025e-06, "loss": 0.0067, "reward": -0.5059, "step": 349 }, { "epoch": 0.05, "learning_rate": 9.014546940250301e-06, "loss": 0.0059, "reward": -0.5255, "step": 359 }, { "epoch": 0.05, "learning_rate": 8.960893388049056e-06, "loss": 0.0047, "reward": -0.3411, "step": 369 }, { "epoch": 0.05, "learning_rate": 8.905986405958015e-06, "loss": 0.0048, "reward": -0.3859, "step": 379 }, { "epoch": 0.06, "learning_rate": 8.849843369363425e-06, "loss": 0.0058, "reward": -0.3785, "step": 389 }, { "epoch": 0.06, "learning_rate": 8.792482044802602e-06, "loss": 0.0047, "reward": -0.3432, "step": 399 }, { "epoch": 0.06, "learning_rate": 8.733920584341673e-06, "loss": 0.0057, "reward": -0.3958, "step": 409 }, { "epoch": 0.06, "learning_rate": 8.674177519831351e-06, "loss": 0.0049, "reward": -0.4713, "step": 419 }, { "epoch": 0.06, "learning_rate": 8.613271757042483e-06, "loss": 0.0048, "reward": -0.4485, "step": 429 }, { "epoch": 0.06, "learning_rate": 8.551222569683315e-06, "loss": 0.0054, "reward": -0.5861, "step": 439 }, { "epoch": 0.06, "learning_rate": 8.488049593300287e-06, "loss": 0.0052, "reward": -0.292, "step": 449 }, { "epoch": 0.06, "learning_rate": 8.423772819064349e-06, "loss": 0.0049, "reward": -0.4702, "step": 459 }, { "epoch": 0.07, "learning_rate": 8.358412587444719e-06, "loss": 0.0053, "reward": -0.5542, "step": 469 }, { "epoch": 0.07, "learning_rate": 8.291989581772126e-06, "loss": 0.0048, "reward": -0.4116, "step": 479 }, { "epoch": 0.07, "learning_rate": 8.224524821693534e-06, "loss": 0.005, "reward": -0.3746, "step": 489 }, { "epoch": 0.07, "learning_rate": 8.156039656520448e-06, "loss": 0.0046, "reward": -0.4481, "step": 499 }, { "epoch": 0.07, "learning_rate": 8.086555758472905e-06, "loss": 0.0043, "reward": -0.4127, "step": 509 }, { "epoch": 0.07, "learning_rate": 8.016095115821258e-06, "loss": 0.0069, "reward": -0.5597, "step": 519 }, { "epoch": 0.07, "learning_rate": 7.944680025927976e-06, "loss": 0.0061, "reward": -0.4318, "step": 529 }, { "epoch": 0.08, "learning_rate": 7.872333088191598e-06, "loss": 0.0045, "reward": -0.3063, "step": 539 }, { "epoch": 0.08, "learning_rate": 7.799077196895131e-06, "loss": 0.0048, "reward": -0.3184, "step": 549 }, { "epoch": 0.08, "learning_rate": 7.724935533961129e-06, "loss": 0.0058, "reward": -0.476, "step": 559 }, { "epoch": 0.08, "learning_rate": 7.649931561615733e-06, "loss": 0.0051, "reward": -0.3959, "step": 569 }, { "epoch": 0.08, "learning_rate": 7.574089014964029e-06, "loss": 0.0051, "reward": -0.4472, "step": 579 }, { "epoch": 0.08, "learning_rate": 7.497431894479034e-06, "loss": 0.0058, "reward": -0.3736, "step": 589 }, { "epoch": 0.08, "learning_rate": 7.419984458406722e-06, "loss": 0.0043, "reward": -0.4911, "step": 599 }, { "epoch": 0.09, "learning_rate": 7.341771215089468e-06, "loss": 0.005, "reward": -0.3037, "step": 609 }, { "epoch": 0.09, "learning_rate": 7.26281691521035e-06, "loss": 0.0062, "reward": -0.4219, "step": 619 }, { "epoch": 0.09, "learning_rate": 7.183146543960761e-06, "loss": 0.0051, "reward": -0.3435, "step": 629 }, { "epoch": 0.09, "learning_rate": 7.102785313133821e-06, "loss": 0.0038, "reward": -0.3745, "step": 639 }, { "epoch": 0.09, "learning_rate": 7.021758653146058e-06, "loss": 0.0053, "reward": -0.3743, "step": 649 }, { "epoch": 0.09, "learning_rate": 6.940092204989934e-06, "loss": 0.006, "reward": -0.4696, "step": 659 }, { "epoch": 0.09, "learning_rate": 6.8578118121197144e-06, "loss": 0.0058, "reward": -0.4343, "step": 669 }, { "epoch": 0.1, "learning_rate": 6.774943512273267e-06, "loss": 0.0044, "reward": -0.2842, "step": 679 }, { "epoch": 0.1, "learning_rate": 6.691513529232391e-06, "loss": 0.0046, "reward": -0.345, "step": 689 }, { "epoch": 0.1, "learning_rate": 6.607548264524268e-06, "loss": 0.0047, "reward": -0.3973, "step": 699 }, { "epoch": 0.1, "learning_rate": 6.523074289066666e-06, "loss": 0.0039, "reward": -0.3412, "step": 709 }, { "epoch": 0.1, "learning_rate": 6.43811833475953e-06, "loss": 0.0054, "reward": -0.3593, "step": 719 }, { "epoch": 0.1, "learning_rate": 6.352707286025644e-06, "loss": 0.0049, "reward": -0.3673, "step": 729 }, { "epoch": 0.1, "learning_rate": 6.266868171303017e-06, "loss": 0.0046, "reward": -0.4224, "step": 739 }, { "epoch": 0.11, "learning_rate": 6.1806281544917e-06, "loss": 0.0045, "reward": -0.1521, "step": 749 }, { "epoch": 0.11, "learning_rate": 6.094014526357734e-06, "loss": 0.0056, "reward": -0.3997, "step": 759 }, { "epoch": 0.11, "learning_rate": 6.007054695896955e-06, "loss": 0.0046, "reward": -0.289, "step": 769 }, { "epoch": 0.11, "learning_rate": 5.9197761816613705e-06, "loss": 0.0042, "reward": -0.3431, "step": 779 }, { "epoch": 0.11, "learning_rate": 5.8322066030508924e-06, "loss": 0.0049, "reward": -0.3222, "step": 789 }, { "epoch": 0.11, "learning_rate": 5.74437367157313e-06, "loss": 0.0055, "reward": -0.4949, "step": 799 }, { "epoch": 0.11, "learning_rate": 5.656305182074053e-06, "loss": 0.0065, "reward": -0.4194, "step": 809 }, { "epoch": 0.12, "learning_rate": 5.568029003942271e-06, "loss": 0.005, "reward": -0.5284, "step": 819 }, { "epoch": 0.12, "learning_rate": 5.4795730722897435e-06, "loss": 0.0058, "reward": -0.5156, "step": 829 }, { "epoch": 0.12, "learning_rate": 5.390965379111655e-06, "loss": 0.0043, "reward": -0.3164, "step": 839 }, { "epoch": 0.12, "learning_rate": 5.302233964428329e-06, "loss": 0.004, "reward": -0.1936, "step": 849 }, { "epoch": 0.12, "learning_rate": 5.213406907411921e-06, "loss": 0.0039, "reward": -0.2792, "step": 859 }, { "epoch": 0.12, "learning_rate": 5.124512317500723e-06, "loss": 0.005, "reward": -0.2348, "step": 869 }, { "epoch": 0.12, "learning_rate": 5.035578325503908e-06, "loss": 0.0042, "reward": -0.3528, "step": 879 }, { "epoch": 0.13, "learning_rate": 4.946633074699485e-06, "loss": 0.005, "reward": -0.3753, "step": 889 }, { "epoch": 0.13, "learning_rate": 4.857704711928325e-06, "loss": 0.0052, "reward": -0.4383, "step": 899 }, { "epoch": 0.13, "learning_rate": 4.768821378687066e-06, "loss": 0.0045, "reward": -0.3664, "step": 909 }, { "epoch": 0.13, "learning_rate": 4.6800112022226865e-06, "loss": 0.0043, "reward": -0.3584, "step": 919 }, { "epoch": 0.13, "learning_rate": 4.591302286631612e-06, "loss": 0.0048, "reward": -0.455, "step": 929 }, { "epoch": 0.13, "learning_rate": 4.502722703966126e-06, "loss": 0.004, "reward": -0.3906, "step": 939 }, { "epoch": 0.13, "learning_rate": 4.414300485350945e-06, "loss": 0.0046, "reward": -0.4897, "step": 949 }, { "epoch": 0.14, "learning_rate": 4.326063612112721e-06, "loss": 0.0042, "reward": -0.384, "step": 959 }, { "epoch": 0.14, "learning_rate": 4.2380400069253255e-06, "loss": 0.0039, "reward": -0.4159, "step": 969 }, { "epoch": 0.14, "learning_rate": 4.15025752497367e-06, "loss": 0.0048, "reward": -0.4036, "step": 979 }, { "epoch": 0.14, "learning_rate": 4.062743945138906e-06, "loss": 0.0042, "reward": -0.4189, "step": 989 }, { "epoch": 0.14, "learning_rate": 3.975526961207759e-06, "loss": 0.005, "reward": -0.2446, "step": 999 }, { "epoch": 0.14, "learning_rate": 3.88863417310879e-06, "loss": 0.0047, "reward": -0.3705, "step": 1009 }, { "epoch": 0.14, "learning_rate": 3.8020930781783704e-06, "loss": 0.0048, "reward": -0.3862, "step": 1019 }, { "epoch": 0.15, "learning_rate": 3.7159310624591055e-06, "loss": 0.0041, "reward": -0.4077, "step": 1029 }, { "epoch": 0.15, "learning_rate": 3.630175392033488e-06, "loss": 0.0072, "reward": -0.2784, "step": 1039 }, { "epoch": 0.15, "learning_rate": 3.544853204395513e-06, "loss": 0.0058, "reward": -0.436, "step": 1049 }, { "epoch": 0.15, "learning_rate": 3.45999149986298e-06, "loss": 0.0045, "reward": -0.3979, "step": 1059 }, { "epoch": 0.15, "learning_rate": 3.375617133033209e-06, "loss": 0.0055, "reward": -0.4709, "step": 1069 }, { "epoch": 0.15, "learning_rate": 3.2917568042848648e-06, "loss": 0.0036, "reward": -0.3748, "step": 1079 }, { "epoch": 0.15, "learning_rate": 3.2167439478854534e-06, "loss": 0.0057, "reward": -0.586, "step": 1089 }, { "epoch": 0.16, "learning_rate": 3.1339332616713215e-06, "loss": 0.0047, "reward": -0.2301, "step": 1099 }, { "epoch": 0.16, "learning_rate": 3.051713094715606e-06, "loss": 0.0052, "reward": -0.4129, "step": 1109 }, { "epoch": 0.16, "learning_rate": 2.9701094656976084e-06, "loss": 0.0044, "reward": -0.2389, "step": 1119 }, { "epoch": 0.16, "learning_rate": 2.8891481981923897e-06, "loss": 0.0039, "reward": -0.3641, "step": 1129 }, { "epoch": 0.16, "learning_rate": 2.8088549124988785e-06, "loss": 0.0052, "reward": -0.3099, "step": 1139 }, { "epoch": 0.16, "learning_rate": 2.729255017532277e-06, "loss": 0.0037, "reward": -0.3203, "step": 1149 }, { "epoch": 0.16, "learning_rate": 2.6503737027833926e-06, "loss": 0.0052, "reward": -0.4073, "step": 1159 }, { "epoch": 0.17, "learning_rate": 2.5722359303473778e-06, "loss": 0.0052, "reward": -0.3275, "step": 1169 }, { "epoch": 0.17, "learning_rate": 2.494866427024452e-06, "loss": 0.004, "reward": -0.3811, "step": 1179 }, { "epoch": 0.17, "learning_rate": 2.4182896764950726e-06, "loss": 0.0052, "reward": -0.3413, "step": 1189 }, { "epoch": 0.17, "learning_rate": 2.3425299115720295e-06, "loss": 0.0052, "reward": -0.5202, "step": 1199 }, { "epoch": 0.17, "learning_rate": 2.267611106531961e-06, "loss": 0.0048, "reward": -0.3018, "step": 1209 }, { "epoch": 0.17, "learning_rate": 2.193556969528642e-06, "loss": 0.0056, "reward": -0.4154, "step": 1219 }, { "epoch": 0.17, "learning_rate": 2.1203909350905215e-06, "loss": 0.0046, "reward": -0.3844, "step": 1229 }, { "epoch": 0.18, "learning_rate": 2.048136156704833e-06, "loss": 0.0052, "reward": -0.3442, "step": 1239 }, { "epoch": 0.18, "learning_rate": 1.9768154994906352e-06, "loss": 0.0053, "reward": -0.357, "step": 1249 }, { "epoch": 0.18, "learning_rate": 1.9064515329631267e-06, "loss": 0.0043, "reward": -0.2778, "step": 1259 }, { "epoch": 0.18, "learning_rate": 1.837066523891483e-06, "loss": 0.0066, "reward": -0.3448, "step": 1269 }, { "epoch": 0.18, "learning_rate": 1.768682429252519e-06, "loss": 0.0051, "reward": -0.3787, "step": 1279 }, { "epoch": 0.18, "learning_rate": 1.7013208892823618e-06, "loss": 0.004, "reward": -0.4296, "step": 1289 }, { "epoch": 0.18, "learning_rate": 1.6350032206283833e-06, "loss": 0.0047, "reward": -0.5144, "step": 1299 }, { "epoch": 0.19, "learning_rate": 1.5697504096035116e-06, "loss": 0.0052, "reward": -0.331, "step": 1309 }, { "epoch": 0.19, "learning_rate": 1.5055831055450825e-06, "loss": 0.0039, "reward": -0.3365, "step": 1319 }, { "epoch": 0.19, "learning_rate": 1.4425216142803395e-06, "loss": 0.0047, "reward": -0.3728, "step": 1329 }, { "epoch": 0.19, "learning_rate": 1.3805858917006148e-06, "loss": 0.0048, "reward": -0.3547, "step": 1339 }, { "epoch": 0.19, "learning_rate": 1.3197955374462767e-06, "loss": 0.0043, "reward": -0.3398, "step": 1349 }, { "epoch": 0.19, "learning_rate": 1.260169788704383e-06, "loss": 0.0056, "reward": -0.2891, "step": 1359 }, { "epoch": 0.19, "learning_rate": 1.2017275141210604e-06, "loss": 0.0042, "reward": -0.3165, "step": 1369 }, { "epoch": 0.2, "learning_rate": 1.1444872078304865e-06, "loss": 0.0039, "reward": -0.366, "step": 1379 }, { "epoch": 0.2, "learning_rate": 1.0884669836024058e-06, "loss": 0.004, "reward": -0.2873, "step": 1389 }, { "epoch": 0.2, "learning_rate": 1.033684569109994e-06, "loss": 0.0046, "reward": -0.2922, "step": 1399 }, { "epoch": 0.2, "learning_rate": 9.80157300319926e-07, "loss": 0.0037, "reward": -0.3076, "step": 1409 }, { "epoch": 0.2, "learning_rate": 9.279021160063712e-07, "loss": 0.0066, "reward": -0.5213, "step": 1419 }, { "epoch": 0.2, "learning_rate": 8.769355523907086e-07, "loss": 0.0044, "reward": -0.5063, "step": 1429 }, { "epoch": 0.2, "learning_rate": 8.272737379086088e-07, "loss": 0.0041, "reward": -0.5909, "step": 1439 }, { "epoch": 0.21, "learning_rate": 7.789323881061777e-07, "loss": 0.0044, "reward": -0.2486, "step": 1449 }, { "epoch": 0.21, "learning_rate": 7.31926800666744e-07, "loss": 0.0044, "reward": -0.4971, "step": 1459 }, { "epoch": 0.21, "learning_rate": 6.862718505698945e-07, "loss": 0.0043, "reward": -0.5182, "step": 1469 }, { "epoch": 0.21, "learning_rate": 6.419819853842591e-07, "loss": 0.0046, "reward": -0.2389, "step": 1479 }, { "epoch": 0.21, "learning_rate": 5.990712206955674e-07, "loss": 0.0039, "reward": -0.3725, "step": 1489 }, { "epoch": 0.21, "learning_rate": 5.575531356713904e-07, "loss": 0.0046, "reward": -0.4709, "step": 1499 }, { "epoch": 0.21, "learning_rate": 5.17440868764002e-07, "loss": 0.005, "reward": -0.3545, "step": 1509 }, { "epoch": 0.22, "learning_rate": 4.787471135526945e-07, "loss": 0.0041, "reward": -0.2898, "step": 1519 }, { "epoch": 0.22, "learning_rate": 4.414841147268811e-07, "loss": 0.0045, "reward": -0.4093, "step": 1529 }, { "epoch": 0.22, "learning_rate": 4.056636642112427e-07, "loss": 0.004, "reward": -0.3965, "step": 1539 }, { "epoch": 0.22, "learning_rate": 3.7129709743416733e-07, "loss": 0.0045, "reward": -0.3877, "step": 1549 }, { "epoch": 0.22, "learning_rate": 3.3839528974062843e-07, "loss": 0.0046, "reward": -0.3655, "step": 1559 }, { "epoch": 0.22, "learning_rate": 3.069686529506777e-07, "loss": 0.0045, "reward": -0.5236, "step": 1569 }, { "epoch": 0.22, "learning_rate": 2.770271320646023e-07, "loss": 0.0053, "reward": -0.441, "step": 1579 }, { "epoch": 0.22, "learning_rate": 2.4858020211582346e-07, "loss": 0.004, "reward": -0.2618, "step": 1589 }, { "epoch": 0.23, "learning_rate": 2.2163686517250427e-07, "loss": 0.004, "reward": -0.4623, "step": 1599 }, { "epoch": 0.23, "learning_rate": 1.962056474888391e-07, "loss": 0.0038, "reward": -0.2542, "step": 1609 }, { "epoch": 0.23, "learning_rate": 1.7229459680690176e-07, "loss": 0.0051, "reward": -0.377, "step": 1619 }, { "epoch": 0.23, "learning_rate": 1.4991127980993503e-07, "loss": 0.0046, "reward": -0.3763, "step": 1629 }, { "epoch": 0.23, "learning_rate": 1.290627797278593e-07, "loss": 0.0046, "reward": -0.4469, "step": 1639 }, { "epoch": 0.23, "learning_rate": 1.0975569409577668e-07, "loss": 0.0054, "reward": -0.4301, "step": 1649 }, { "epoch": 0.23, "learning_rate": 9.199613266616891e-08, "loss": 0.0047, "reward": -0.481, "step": 1659 }, { "epoch": 0.24, "learning_rate": 7.578971547546554e-08, "loss": 0.0047, "reward": -0.549, "step": 1669 }, { "epoch": 0.24, "learning_rate": 6.114157106556773e-08, "loss": 0.0058, "reward": -0.3571, "step": 1679 }, { "epoch": 0.24, "learning_rate": 4.8056334860921426e-08, "loss": 0.005, "reward": -0.3539, "step": 1689 }, { "epoch": 0.24, "learning_rate": 3.653814770162578e-08, "loss": 0.0037, "reward": -0.3161, "step": 1699 }, { "epoch": 0.24, "learning_rate": 2.6590654533062975e-08, "loss": 0.0053, "reward": -0.4499, "step": 1709 }, { "epoch": 0.24, "learning_rate": 1.821700325244813e-08, "loss": 0.0047, "reward": -0.3267, "step": 1719 }, { "epoch": 0.24, "learning_rate": 1.141984371267335e-08, "loss": 0.0043, "reward": -0.4299, "step": 1729 }, { "epoch": 0.25, "learning_rate": 6.2013268837601834e-09, "loss": 0.0051, "reward": -0.4119, "step": 1739 }, { "epoch": 0.25, "learning_rate": 2.5631041721801976e-09, "loss": 0.0048, "reward": -0.492, "step": 1749 }, { "epoch": 0.25, "learning_rate": 5.063268982646863e-10, "loss": 0.0053, "reward": -0.5111, "step": 1759 }, { "epoch": 0.25, "learning_rate": 3.164593187165999e-11, "loss": 0.0057, "reward": -0.5665, "step": 1769 }, { "epoch": 0.25, "learning_rate": 1.1392114864122727e-09, "loss": 0.0039, "reward": -0.3717, "step": 1779 }, { "epoch": 0.25, "learning_rate": 3.828673071319955e-09, "loss": 0.0049, "reward": -0.4831, "step": 1789 }, { "epoch": 0.25, "learning_rate": 8.099179602960583e-09, "loss": 0.0052, "reward": -0.4277, "step": 1799 }, { "epoch": 0.26, "learning_rate": 1.3949379673960372e-08, "loss": 0.0047, "reward": -0.3236, "step": 1809 }, { "epoch": 0.26, "learning_rate": 2.1377421980859903e-08, "loss": 0.0044, "reward": -0.4584, "step": 1819 }, { "epoch": 0.26, "learning_rate": 3.0380955909962706e-08, "loss": 0.004, "reward": -0.427, "step": 1829 }, { "epoch": 0.26, "learning_rate": 4.095713228118803e-08, "loss": 0.0046, "reward": -0.4793, "step": 1839 }, { "epoch": 0.26, "learning_rate": 5.310260424970071e-08, "loss": 0.005, "reward": -0.3642, "step": 1849 }, { "epoch": 0.26, "learning_rate": 6.681352836502009e-08, "loss": 0.0036, "reward": -0.3235, "step": 1859 }, { "epoch": 0.26, "learning_rate": 8.208556578728933e-08, "loss": 0.0055, "reward": -0.4695, "step": 1869 }, { "epoch": 0.27, "learning_rate": 9.891388366030486e-08, "loss": 0.005, "reward": -0.3998, "step": 1879 }, { "epoch": 0.27, "learning_rate": 1.172931566408797e-07, "loss": 0.0046, "reward": -0.3878, "step": 1889 }, { "epoch": 0.27, "learning_rate": 1.3721756858405545e-07, "loss": 0.0055, "reward": -0.5067, "step": 1899 }, { "epoch": 0.27, "learning_rate": 1.5868081438363213e-07, "loss": 0.0038, "reward": -0.3747, "step": 1909 }, { "epoch": 0.27, "learning_rate": 1.8167610196742125e-07, "loss": 0.0052, "reward": -0.2461, "step": 1919 }, { "epoch": 0.27, "learning_rate": 2.0619615444661012e-07, "loss": 0.0044, "reward": -0.2565, "step": 1929 }, { "epoch": 0.27, "learning_rate": 2.3223321241854335e-07, "loss": 0.0045, "reward": -0.4397, "step": 1939 }, { "epoch": 0.28, "learning_rate": 2.5977903642219305e-07, "loss": 0.0047, "reward": -0.2828, "step": 1949 }, { "epoch": 0.28, "learning_rate": 2.8882490954554976e-07, "loss": 0.0049, "reward": -0.361, "step": 1959 }, { "epoch": 0.28, "learning_rate": 3.193616401840971e-07, "loss": 0.0054, "reward": -0.2941, "step": 1969 }, { "epoch": 0.28, "learning_rate": 3.513795649495133e-07, "loss": 0.0046, "reward": -0.3027, "step": 1979 }, { "epoch": 0.28, "learning_rate": 3.848685517276546e-07, "loss": 0.0037, "reward": -0.391, "step": 1989 }, { "epoch": 0.28, "learning_rate": 4.1981800288488416e-07, "loss": 0.0048, "reward": -0.3608, "step": 1999 }, { "epoch": 0.28, "learning_rate": 4.562168586217008e-07, "loss": 0.0044, "reward": -0.3958, "step": 2009 }, { "epoch": 0.29, "learning_rate": 4.940536004726337e-07, "loss": 0.0072, "reward": -0.5123, "step": 2019 }, { "epoch": 0.29, "learning_rate": 5.33316254951266e-07, "loss": 0.0044, "reward": -0.2958, "step": 2029 }, { "epoch": 0.29, "learning_rate": 5.739923973392685e-07, "loss": 0.0047, "reward": -0.4311, "step": 2039 }, { "epoch": 0.29, "learning_rate": 6.160691556182191e-07, "loss": 0.0039, "reward": -0.4388, "step": 2049 }, { "epoch": 0.29, "learning_rate": 6.595332145429611e-07, "loss": 0.0066, "reward": -0.4963, "step": 2059 }, { "epoch": 0.29, "learning_rate": 7.043708198552385e-07, "loss": 0.0051, "reward": -0.3088, "step": 2069 }, { "epoch": 0.29, "learning_rate": 7.505677826362434e-07, "loss": 0.0054, "reward": -0.3397, "step": 2079 }, { "epoch": 0.3, "learning_rate": 7.981094837967229e-07, "loss": 0.0044, "reward": -0.4052, "step": 2089 }, { "epoch": 0.3, "learning_rate": 8.46980878703203e-07, "loss": 0.0049, "reward": -0.3994, "step": 2099 }, { "epoch": 0.3, "learning_rate": 8.971665019388887e-07, "loss": 0.0051, "reward": -0.4254, "step": 2109 }, { "epoch": 0.3, "learning_rate": 9.486504721977097e-07, "loss": 0.0043, "reward": -0.3374, "step": 2119 }, { "epoch": 0.3, "learning_rate": 1.0014164973099866e-06, "loss": 0.0058, "reward": -0.3854, "step": 2129 }, { "epoch": 0.3, "learning_rate": 1.0554478793981015e-06, "loss": 0.0043, "reward": -0.3606, "step": 2139 }, { "epoch": 0.3, "learning_rate": 1.1107275201605728e-06, "loss": 0.0041, "reward": -0.3453, "step": 2149 }, { "epoch": 0.31, "learning_rate": 1.1672379262828238e-06, "loss": 0.008, "reward": -0.3853, "step": 2159 }, { "epoch": 0.31, "learning_rate": 1.2249612149729779e-06, "loss": 0.0052, "reward": -0.3406, "step": 2169 }, { "epoch": 0.31, "learning_rate": 1.2838791196208771e-06, "loss": 0.0043, "reward": -0.3696, "step": 2179 }, { "epoch": 0.31, "learning_rate": 1.3439729955785901e-06, "loss": 0.0048, "reward": -0.326, "step": 2189 }, { "epoch": 0.31, "learning_rate": 1.40522382606052e-06, "loss": 0.0048, "reward": -0.3875, "step": 2199 }, { "epoch": 0.31, "learning_rate": 1.4676122281613041e-06, "loss": 0.0044, "reward": -0.3224, "step": 2209 }, { "epoch": 0.31, "learning_rate": 1.5311184589895495e-06, "loss": 0.0054, "reward": -0.4125, "step": 2219 }, { "epoch": 0.32, "learning_rate": 1.595722421915507e-06, "loss": 0.0051, "reward": -0.5115, "step": 2229 }, { "epoch": 0.32, "learning_rate": 1.6614036729306638e-06, "loss": 0.0042, "reward": -0.5107, "step": 2239 }, { "epoch": 0.32, "learning_rate": 1.7281414271172925e-06, "loss": 0.0048, "reward": -0.4813, "step": 2249 }, { "epoch": 0.32, "learning_rate": 1.7959145652258408e-06, "loss": 0.0036, "reward": -0.2592, "step": 2259 }, { "epoch": 0.32, "learning_rate": 1.8647016403581763e-06, "loss": 0.0045, "reward": -0.431, "step": 2269 }, { "epoch": 0.32, "learning_rate": 1.9344808847544632e-06, "loss": 0.0045, "reward": -0.3041, "step": 2279 }, { "epoch": 0.32, "learning_rate": 2.0052302166816017e-06, "loss": 0.0054, "reward": -0.2746, "step": 2289 }, { "epoch": 0.33, "learning_rate": 2.0769272474210374e-06, "loss": 0.0059, "reward": -0.5107, "step": 2299 }, { "epoch": 0.33, "learning_rate": 2.149549288353676e-06, "loss": 0.0044, "reward": -0.3638, "step": 2309 }, { "epoch": 0.33, "learning_rate": 2.2230733581397427e-06, "loss": 0.0055, "reward": -0.3426, "step": 2319 }, { "epoch": 0.33, "learning_rate": 2.297476189991249e-06, "loss": 0.0048, "reward": -0.3561, "step": 2329 }, { "epoch": 0.33, "learning_rate": 2.3727342390348134e-06, "loss": 0.0058, "reward": -0.334, "step": 2339 }, { "epoch": 0.33, "learning_rate": 2.4488236897624457e-06, "loss": 0.0043, "reward": -0.188, "step": 2349 }, { "epoch": 0.33, "learning_rate": 2.525720463568009e-06, "loss": 0.005, "reward": -0.3464, "step": 2359 }, { "epoch": 0.34, "learning_rate": 2.603400226366919e-06, "loss": 0.0041, "reward": -0.4215, "step": 2369 }, { "epoch": 0.34, "learning_rate": 2.6818383962967134e-06, "loss": 0.0059, "reward": -0.483, "step": 2379 }, { "epoch": 0.34, "learning_rate": 2.7610101514959903e-06, "loss": 0.0042, "reward": -0.3055, "step": 2389 }, { "epoch": 0.34, "learning_rate": 2.840890437959334e-06, "loss": 0.0047, "reward": -0.2254, "step": 2399 }, { "epoch": 0.34, "learning_rate": 2.92145397746568e-06, "loss": 0.0049, "reward": -0.4363, "step": 2409 }, { "epoch": 0.34, "learning_rate": 3.002675275577659e-06, "loss": 0.0053, "reward": -0.4156, "step": 2419 }, { "epoch": 0.34, "learning_rate": 3.0845286297093167e-06, "loss": 0.0032, "reward": -0.3561, "step": 2429 }, { "epoch": 0.35, "learning_rate": 3.166988137259751e-06, "loss": 0.0039, "reward": -0.4369, "step": 2439 }, { "epoch": 0.35, "learning_rate": 3.2500277038100126e-06, "loss": 0.0049, "reward": -0.431, "step": 2449 }, { "epoch": 0.35, "learning_rate": 3.3336210513807366e-06, "loss": 0.0047, "reward": -0.4215, "step": 2459 }, { "epoch": 0.35, "learning_rate": 3.4177417267478236e-06, "loss": 0.0044, "reward": -0.2004, "step": 2469 }, { "epoch": 0.35, "learning_rate": 3.502363109813589e-06, "loss": 0.0039, "reward": -0.3595, "step": 2479 }, { "epoch": 0.35, "learning_rate": 3.5874584220307686e-06, "loss": 0.0052, "reward": -0.3384, "step": 2489 }, { "epoch": 0.35, "learning_rate": 3.673000734876582e-06, "loss": 0.0042, "reward": -0.1973, "step": 2499 }, { "epoch": 0.36, "learning_rate": 3.7589629783743188e-06, "loss": 0.0044, "reward": -0.4077, "step": 2509 }, { "epoch": 0.36, "learning_rate": 3.845317949659662e-06, "loss": 0.0046, "reward": -0.5238, "step": 2519 }, { "epoch": 0.36, "learning_rate": 3.932038321589086e-06, "loss": 0.0039, "reward": -0.3569, "step": 2529 }, { "epoch": 0.36, "learning_rate": 4.01909665138754e-06, "loss": 0.005, "reward": -0.4477, "step": 2539 }, { "epoch": 0.36, "learning_rate": 4.106465389332768e-06, "loss": 0.0035, "reward": -0.303, "step": 2549 }, { "epoch": 0.36, "learning_rate": 4.194116887473445e-06, "loss": 0.0042, "reward": -0.3804, "step": 2559 }, { "epoch": 0.36, "learning_rate": 4.282023408378429e-06, "loss": 0.0047, "reward": -0.4259, "step": 2569 }, { "epoch": 0.37, "learning_rate": 4.370157133914274e-06, "loss": 0.0049, "reward": -0.3371, "step": 2579 }, { "epoch": 0.37, "learning_rate": 4.458490174048339e-06, "loss": 0.0047, "reward": -0.4291, "step": 2589 }, { "epoch": 0.37, "learning_rate": 4.5469945756746145e-06, "loss": 0.0044, "reward": -0.4106, "step": 2599 }, { "epoch": 0.37, "learning_rate": 4.635642331459522e-06, "loss": 0.0036, "reward": -0.3657, "step": 2609 }, { "epoch": 0.37, "learning_rate": 4.724405388704883e-06, "loss": 0.0052, "reward": -0.3857, "step": 2619 }, { "epoch": 0.37, "learning_rate": 4.804367507873278e-06, "loss": 0.0047, "reward": -0.3853, "step": 2629 }, { "epoch": 0.37, "learning_rate": 4.893272229193558e-06, "loss": 0.0054, "reward": -0.3366, "step": 2639 }, { "epoch": 0.38, "learning_rate": 4.982210724656406e-06, "loss": 0.005, "reward": -0.4375, "step": 2649 }, { "epoch": 0.38, "learning_rate": 5.071154849558693e-06, "loss": 0.0043, "reward": -0.3342, "step": 2659 }, { "epoch": 0.38, "learning_rate": 5.160076457415833e-06, "loss": 0.0047, "reward": -0.4507, "step": 2669 }, { "epoch": 0.38, "learning_rate": 5.248947408868792e-06, "loss": 0.005, "reward": -0.4391, "step": 2679 }, { "epoch": 0.38, "learning_rate": 5.33773958058882e-06, "loss": 0.0036, "reward": -0.2586, "step": 2689 }, { "epoch": 0.38, "learning_rate": 5.426424874177126e-06, "loss": 0.0044, "reward": -0.4199, "step": 2699 }, { "epoch": 0.38, "learning_rate": 5.514975225056634e-06, "loss": 0.0044, "reward": -0.4119, "step": 2709 }, { "epoch": 0.38, "learning_rate": 5.603362611353067e-06, "loss": 0.0043, "reward": -0.3747, "step": 2719 }, { "epoch": 0.39, "learning_rate": 5.691559062762498e-06, "loss": 0.0051, "reward": -0.2529, "step": 2729 }, { "epoch": 0.39, "learning_rate": 5.779536669402608e-06, "loss": 0.0038, "reward": -0.3863, "step": 2739 }, { "epoch": 0.39, "learning_rate": 5.867267590644781e-06, "loss": 0.0043, "reward": -0.4039, "step": 2749 }, { "epoch": 0.39, "learning_rate": 5.954724063924317e-06, "loss": 0.0041, "reward": -0.3328, "step": 2759 }, { "epoch": 0.39, "learning_rate": 6.041878413525932e-06, "loss": 0.0044, "reward": -0.2204, "step": 2769 }, { "epoch": 0.39, "learning_rate": 6.128703059341788e-06, "loss": 0.0056, "reward": -0.4904, "step": 2779 }, { "epoch": 0.39, "learning_rate": 6.215170525599225e-06, "loss": 0.0047, "reward": -0.3944, "step": 2789 }, { "epoch": 0.4, "learning_rate": 6.3012534495555275e-06, "loss": 0.0053, "reward": -0.5206, "step": 2799 }, { "epoch": 0.4, "learning_rate": 6.386924590156891e-06, "loss": 0.0033, "reward": -0.1779, "step": 2809 }, { "epoch": 0.4, "learning_rate": 6.4721568366589e-06, "loss": 0.0045, "reward": -0.4464, "step": 2819 }, { "epoch": 0.4, "learning_rate": 6.556923217205744e-06, "loss": 0.0036, "reward": -0.3361, "step": 2829 }, { "epoch": 0.4, "learning_rate": 6.641196907365499e-06, "loss": 0.0049, "reward": -0.5177, "step": 2839 }, { "epoch": 0.4, "learning_rate": 6.724951238618763e-06, "loss": 0.0048, "reward": -0.4074, "step": 2849 }, { "epoch": 0.4, "learning_rate": 6.808159706797949e-06, "loss": 0.0047, "reward": -0.2954, "step": 2859 }, { "epoch": 0.41, "learning_rate": 6.8907959804745294e-06, "loss": 0.0046, "reward": -0.312, "step": 2869 }, { "epoch": 0.41, "learning_rate": 6.972833909291694e-06, "loss": 0.0053, "reward": -0.4417, "step": 2879 }, { "epoch": 0.41, "learning_rate": 7.054247532239634e-06, "loss": 0.0036, "reward": -0.2909, "step": 2889 }, { "epoch": 0.41, "learning_rate": 7.135011085870964e-06, "loss": 0.0044, "reward": -0.2276, "step": 2899 }, { "epoch": 0.41, "learning_rate": 7.21509901245357e-06, "loss": 0.0049, "reward": -0.3637, "step": 2909 }, { "epoch": 0.41, "learning_rate": 7.294485968058404e-06, "loss": 0.0067, "reward": -0.3676, "step": 2919 }, { "epoch": 0.41, "learning_rate": 7.373146830579596e-06, "loss": 0.0034, "reward": -0.1601, "step": 2929 }, { "epoch": 0.42, "learning_rate": 7.451056707684396e-06, "loss": 0.0041, "reward": -0.1314, "step": 2939 }, { "epoch": 0.42, "learning_rate": 7.528190944690356e-06, "loss": 0.0035, "reward": -0.2391, "step": 2949 }, { "epoch": 0.42, "learning_rate": 7.6045251323673555e-06, "loss": 0.0052, "reward": -0.4042, "step": 2959 }, { "epoch": 0.42, "learning_rate": 7.680035114661929e-06, "loss": 0.0044, "reward": -0.3035, "step": 2969 }, { "epoch": 0.42, "learning_rate": 7.754696996341486e-06, "loss": 0.0049, "reward": -0.3552, "step": 2979 }, { "epoch": 0.42, "learning_rate": 7.828487150555979e-06, "loss": 0.0049, "reward": -0.2459, "step": 2989 }, { "epoch": 0.42, "learning_rate": 7.901382226314662e-06, "loss": 0.0036, "reward": -0.2989, "step": 2999 }, { "epoch": 0.43, "learning_rate": 7.973359155875521e-06, "loss": 0.0057, "reward": -0.5494, "step": 3009 }, { "epoch": 0.43, "learning_rate": 8.044395162045135e-06, "loss": 0.0045, "reward": -0.3344, "step": 3019 }, { "epoch": 0.43, "learning_rate": 8.11446776538649e-06, "loss": 0.0045, "reward": -0.3949, "step": 3029 }, { "epoch": 0.43, "learning_rate": 8.183554791332675e-06, "loss": 0.004, "reward": -0.2997, "step": 3039 }, { "epoch": 0.43, "learning_rate": 8.251634377204023e-06, "loss": 0.0034, "reward": -0.2654, "step": 3049 }, { "epoch": 0.43, "learning_rate": 8.318684979126607e-06, "loss": 0.0034, "reward": -0.4013, "step": 3059 }, { "epoch": 0.43, "learning_rate": 8.384685378849804e-06, "loss": 0.0035, "reward": -0.4777, "step": 3069 }, { "epoch": 0.44, "learning_rate": 8.44961469046086e-06, "loss": 0.0031, "reward": -0.3525, "step": 3079 }, { "epoch": 0.44, "learning_rate": 8.513452366994242e-06, "loss": 0.0042, "reward": -0.1987, "step": 3089 }, { "epoch": 0.44, "learning_rate": 8.576178206933773e-06, "loss": 0.0043, "reward": -0.439, "step": 3099 }, { "epoch": 0.44, "learning_rate": 8.637772360605421e-06, "loss": 0.0048, "reward": -0.2539, "step": 3109 }, { "epoch": 0.44, "learning_rate": 8.698215336458743e-06, "loss": 0.0047, "reward": -0.4092, "step": 3119 }, { "epoch": 0.44, "learning_rate": 8.757488007235031e-06, "loss": 0.0042, "reward": -0.288, "step": 3129 }, { "epoch": 0.44, "learning_rate": 8.815571616020147e-06, "loss": 0.0033, "reward": -0.2841, "step": 3139 }, { "epoch": 0.45, "learning_rate": 8.872447782180168e-06, "loss": 0.0044, "reward": -0.3696, "step": 3149 }, { "epoch": 0.45, "learning_rate": 8.92809850717797e-06, "loss": 0.0034, "reward": -0.1236, "step": 3159 }, { "epoch": 0.45, "learning_rate": 8.982506180268895e-06, "loss": 0.0041, "reward": -0.269, "step": 3169 }, { "epoch": 0.45, "learning_rate": 9.035653584073673e-06, "loss": 0.0029, "reward": -0.4326, "step": 3179 }, { "epoch": 0.45, "learning_rate": 9.087523900026907e-06, "loss": 0.0038, "reward": -0.2885, "step": 3189 }, { "epoch": 0.45, "learning_rate": 9.138100713699312e-06, "loss": 0.0038, "reward": -0.3119, "step": 3199 }, { "epoch": 0.45, "learning_rate": 9.187368019992092e-06, "loss": 0.004, "reward": -0.196, "step": 3209 }, { "epoch": 0.46, "learning_rate": 9.235310228201782e-06, "loss": 0.0036, "reward": -0.3683, "step": 3219 }, { "epoch": 0.46, "learning_rate": 9.281912166953929e-06, "loss": 0.0035, "reward": -0.1997, "step": 3229 }, { "epoch": 0.46, "learning_rate": 9.327159089004098e-06, "loss": 0.0042, "reward": -0.403, "step": 3239 }, { "epoch": 0.46, "learning_rate": 9.371036675904667e-06, "loss": 0.0034, "reward": -0.3637, "step": 3249 }, { "epoch": 0.46, "learning_rate": 9.413531042535915e-06, "loss": 0.0049, "reward": -0.3766, "step": 3259 }, { "epoch": 0.46, "learning_rate": 9.454628741499976e-06, "loss": 0.0044, "reward": -0.3386, "step": 3269 }, { "epoch": 0.46, "learning_rate": 9.494316767376295e-06, "loss": 0.0039, "reward": -0.3232, "step": 3279 }, { "epoch": 0.47, "learning_rate": 9.532582560837204e-06, "loss": 0.0032, "reward": -0.3225, "step": 3289 }, { "epoch": 0.47, "learning_rate": 9.569414012622356e-06, "loss": 0.0037, "reward": -0.3366, "step": 3299 }, { "epoch": 0.47, "learning_rate": 9.604799467370689e-06, "loss": 0.0038, "reward": -0.3283, "step": 3309 }, { "epoch": 0.47, "learning_rate": 9.63872772730879e-06, "loss": 0.0033, "reward": -0.2504, "step": 3319 }, { "epoch": 0.47, "learning_rate": 9.671188055794462e-06, "loss": 0.0037, "reward": -0.3496, "step": 3329 }, { "epoch": 0.47, "learning_rate": 9.702170180714328e-06, "loss": 0.0035, "reward": -0.3135, "step": 3339 }, { "epoch": 0.47, "learning_rate": 9.731664297734458e-06, "loss": 0.0036, "reward": -0.3951, "step": 3349 }, { "epoch": 0.48, "learning_rate": 9.75966107340297e-06, "loss": 0.0032, "reward": -0.4631, "step": 3359 }, { "epoch": 0.48, "learning_rate": 9.786151648103613e-06, "loss": 0.0036, "reward": -0.3141, "step": 3369 }, { "epoch": 0.48, "learning_rate": 9.811127638859398e-06, "loss": 0.003, "reward": -0.1992, "step": 3379 }, { "epoch": 0.48, "learning_rate": 9.834581141985404e-06, "loss": 0.0038, "reward": -0.3201, "step": 3389 }, { "epoch": 0.48, "learning_rate": 9.85650473558991e-06, "loss": 0.0031, "reward": -0.1577, "step": 3399 }, { "epoch": 0.48, "learning_rate": 9.876891481923067e-06, "loss": 0.0035, "reward": -0.2635, "step": 3409 }, { "epoch": 0.48, "learning_rate": 9.89573492957235e-06, "loss": 0.0044, "reward": -0.3539, "step": 3419 }, { "epoch": 0.49, "learning_rate": 9.91302911550412e-06, "loss": 0.0031, "reward": -0.3358, "step": 3429 }, { "epoch": 0.49, "learning_rate": 9.928768566950632e-06, "loss": 0.0041, "reward": -0.2885, "step": 3439 }, { "epoch": 0.49, "learning_rate": 9.94294830314191e-06, "loss": 0.0037, "reward": -0.2699, "step": 3449 }, { "epoch": 0.49, "learning_rate": 9.955563836881898e-06, "loss": 0.0053, "reward": -0.317, "step": 3459 }, { "epoch": 0.49, "learning_rate": 9.966611175968454e-06, "loss": 0.0035, "reward": -0.2824, "step": 3469 }, { "epoch": 0.49, "learning_rate": 9.976086824456686e-06, "loss": 0.004, "reward": -0.5046, "step": 3479 }, { "epoch": 0.49, "learning_rate": 9.983987783765243e-06, "loss": 0.0036, "reward": -0.4293, "step": 3489 }, { "epoch": 0.5, "learning_rate": 9.990311553625227e-06, "loss": 0.0034, "reward": -0.1845, "step": 3499 }, { "epoch": 0.5, "learning_rate": 9.995056132871399e-06, "loss": 0.0035, "reward": -0.1239, "step": 3509 }, { "epoch": 0.5, "learning_rate": 9.998220020075455e-06, "loss": 0.0031, "reward": -0.271, "step": 3519 }, { "epoch": 0.5, "learning_rate": 9.999802214021156e-06, "loss": 0.0039, "reward": -0.4751, "step": 3529 }, { "epoch": 0.5, "learning_rate": 9.999802214021156e-06, "loss": 0.0038, "reward": -0.454, "step": 3539 }, { "epoch": 0.5, "learning_rate": 9.998220020075455e-06, "loss": 0.0041, "reward": -0.1722, "step": 3549 }, { "epoch": 0.5, "learning_rate": 9.9950561328714e-06, "loss": 0.0041, "reward": -0.2842, "step": 3559 }, { "epoch": 0.51, "learning_rate": 9.990311553625229e-06, "loss": 0.0042, "reward": -0.3914, "step": 3569 }, { "epoch": 0.51, "learning_rate": 9.983987783765245e-06, "loss": 0.0038, "reward": -0.1993, "step": 3579 }, { "epoch": 0.51, "learning_rate": 9.976086824456686e-06, "loss": 0.0029, "reward": -0.3465, "step": 3589 }, { "epoch": 0.51, "learning_rate": 9.966611175968454e-06, "loss": 0.0034, "reward": -0.209, "step": 3599 }, { "epoch": 0.51, "learning_rate": 9.955563836881898e-06, "loss": 0.0032, "reward": -0.2638, "step": 3609 }, { "epoch": 0.51, "learning_rate": 9.94294830314191e-06, "loss": 0.0032, "reward": -0.2811, "step": 3619 }, { "epoch": 0.51, "learning_rate": 9.928768566950635e-06, "loss": 0.0038, "reward": -0.3794, "step": 3629 }, { "epoch": 0.52, "learning_rate": 9.91302911550412e-06, "loss": 0.0038, "reward": -0.3419, "step": 3639 }, { "epoch": 0.52, "learning_rate": 9.89573492957235e-06, "loss": 0.0032, "reward": -0.31, "step": 3649 }, { "epoch": 0.52, "learning_rate": 9.876891481923067e-06, "loss": 0.0034, "reward": -0.1604, "step": 3659 }, { "epoch": 0.52, "learning_rate": 9.85650473558991e-06, "loss": 0.0035, "reward": -0.1379, "step": 3669 }, { "epoch": 0.52, "learning_rate": 9.834581141985404e-06, "loss": 0.0038, "reward": -0.3531, "step": 3679 }, { "epoch": 0.52, "learning_rate": 9.811127638859398e-06, "loss": 0.0028, "reward": -0.1161, "step": 3689 }, { "epoch": 0.52, "learning_rate": 9.786151648103615e-06, "loss": 0.0038, "reward": -0.3628, "step": 3699 }, { "epoch": 0.53, "learning_rate": 9.759661073402971e-06, "loss": 0.0033, "reward": -0.3243, "step": 3709 }, { "epoch": 0.53, "learning_rate": 9.731664297734458e-06, "loss": 0.0028, "reward": -0.2481, "step": 3719 }, { "epoch": 0.53, "learning_rate": 9.702170180714325e-06, "loss": 0.0038, "reward": -0.3179, "step": 3729 }, { "epoch": 0.53, "learning_rate": 9.671188055794462e-06, "loss": 0.0028, "reward": -0.1725, "step": 3739 }, { "epoch": 0.53, "learning_rate": 9.63872772730879e-06, "loss": 0.003, "reward": -0.1697, "step": 3749 }, { "epoch": 0.53, "learning_rate": 9.604799467370689e-06, "loss": 0.0035, "reward": -0.2575, "step": 3759 }, { "epoch": 0.53, "learning_rate": 9.569414012622358e-06, "loss": 0.0043, "reward": -0.3233, "step": 3769 }, { "epoch": 0.54, "learning_rate": 9.532582560837208e-06, "loss": 0.0031, "reward": -0.0708, "step": 3779 }, { "epoch": 0.54, "learning_rate": 9.494316767376295e-06, "loss": 0.003, "reward": -0.2344, "step": 3789 }, { "epoch": 0.54, "learning_rate": 9.454628741499978e-06, "loss": 0.0036, "reward": -0.0844, "step": 3799 }, { "epoch": 0.54, "learning_rate": 9.413531042535916e-06, "loss": 0.0034, "reward": -0.2369, "step": 3809 }, { "epoch": 0.54, "learning_rate": 9.371036675904671e-06, "loss": 0.0036, "reward": -0.3654, "step": 3819 }, { "epoch": 0.54, "learning_rate": 9.327159089004098e-06, "loss": 0.0036, "reward": -0.2757, "step": 3829 }, { "epoch": 0.54, "learning_rate": 9.281912166953932e-06, "loss": 0.0035, "reward": -0.3181, "step": 3839 }, { "epoch": 0.54, "learning_rate": 9.235310228201784e-06, "loss": 0.0048, "reward": -0.3122, "step": 3849 }, { "epoch": 0.55, "learning_rate": 9.187368019992095e-06, "loss": 0.0033, "reward": -0.2904, "step": 3859 }, { "epoch": 0.55, "learning_rate": 9.138100713699312e-06, "loss": 0.0037, "reward": -0.1489, "step": 3869 }, { "epoch": 0.55, "learning_rate": 9.087523900026905e-06, "loss": 0.0029, "reward": -0.2763, "step": 3879 }, { "epoch": 0.55, "learning_rate": 9.035653584073675e-06, "loss": 0.003, "reward": -0.1622, "step": 3889 }, { "epoch": 0.55, "learning_rate": 8.982506180268893e-06, "loss": 0.0034, "reward": -0.3107, "step": 3899 }, { "epoch": 0.55, "learning_rate": 8.928098507177972e-06, "loss": 0.0033, "reward": -0.1224, "step": 3909 }, { "epoch": 0.55, "learning_rate": 8.872447782180166e-06, "loss": 0.0038, "reward": -0.1105, "step": 3919 }, { "epoch": 0.56, "learning_rate": 8.815571616020149e-06, "loss": 0.0031, "reward": -0.348, "step": 3929 }, { "epoch": 0.56, "learning_rate": 8.757488007235031e-06, "loss": 0.0035, "reward": -0.2409, "step": 3939 }, { "epoch": 0.56, "learning_rate": 8.698215336458744e-06, "loss": 0.0033, "reward": -0.3141, "step": 3949 }, { "epoch": 0.56, "learning_rate": 8.637772360605418e-06, "loss": 0.0034, "reward": -0.313, "step": 3959 }, { "epoch": 0.56, "learning_rate": 8.576178206933775e-06, "loss": 0.0046, "reward": -0.4326, "step": 3969 }, { "epoch": 0.56, "learning_rate": 8.513452366994239e-06, "loss": 0.003, "reward": -0.1198, "step": 3979 }, { "epoch": 0.56, "learning_rate": 8.44961469046086e-06, "loss": 0.0032, "reward": -0.1191, "step": 3989 }, { "epoch": 0.57, "learning_rate": 8.384685378849806e-06, "loss": 0.0041, "reward": -0.3557, "step": 3999 }, { "epoch": 0.57, "learning_rate": 8.318684979126612e-06, "loss": 0.0035, "reward": -0.2939, "step": 4009 }, { "epoch": 0.57, "learning_rate": 8.251634377204026e-06, "loss": 0.0049, "reward": -0.1952, "step": 4019 }, { "epoch": 0.57, "learning_rate": 8.183554791332677e-06, "loss": 0.0029, "reward": -0.1551, "step": 4029 }, { "epoch": 0.57, "learning_rate": 8.114467765386494e-06, "loss": 0.0043, "reward": -0.2539, "step": 4039 }, { "epoch": 0.57, "learning_rate": 8.04439516204514e-06, "loss": 0.0037, "reward": -0.3849, "step": 4049 }, { "epoch": 0.57, "learning_rate": 7.973359155875525e-06, "loss": 0.0028, "reward": -0.2593, "step": 4059 }, { "epoch": 0.58, "learning_rate": 7.901382226314662e-06, "loss": 0.0032, "reward": -0.1569, "step": 4069 }, { "epoch": 0.58, "learning_rate": 7.82848715055598e-06, "loss": 0.0035, "reward": -0.4049, "step": 4079 }, { "epoch": 0.58, "learning_rate": 7.75469699634149e-06, "loss": 0.0031, "reward": -0.3131, "step": 4089 }, { "epoch": 0.58, "learning_rate": 7.68003511466193e-06, "loss": 0.0033, "reward": -0.2798, "step": 4099 }, { "epoch": 0.58, "learning_rate": 7.604525132367354e-06, "loss": 0.0034, "reward": -0.2349, "step": 4109 }, { "epoch": 0.58, "learning_rate": 7.528190944690358e-06, "loss": 0.0034, "reward": -0.1358, "step": 4119 }, { "epoch": 0.58, "learning_rate": 7.4510567076843945e-06, "loss": 0.0034, "reward": -0.1241, "step": 4129 }, { "epoch": 0.59, "learning_rate": 7.373146830579598e-06, "loss": 0.0031, "reward": -0.0964, "step": 4139 }, { "epoch": 0.59, "learning_rate": 7.294485968058401e-06, "loss": 0.003, "reward": -0.064, "step": 4149 }, { "epoch": 0.59, "learning_rate": 7.2150990124535726e-06, "loss": 0.0031, "reward": -0.2023, "step": 4159 }, { "epoch": 0.59, "learning_rate": 7.135011085870962e-06, "loss": 0.0035, "reward": -0.3227, "step": 4169 }, { "epoch": 0.59, "learning_rate": 7.054247532239637e-06, "loss": 0.003, "reward": -0.1747, "step": 4179 }, { "epoch": 0.59, "learning_rate": 6.9728339092916915e-06, "loss": 0.003, "reward": -0.1391, "step": 4189 }, { "epoch": 0.59, "learning_rate": 6.890795980474532e-06, "loss": 0.0029, "reward": -0.1152, "step": 4199 }, { "epoch": 0.6, "learning_rate": 6.808159706797946e-06, "loss": 0.0027, "reward": -0.225, "step": 4209 }, { "epoch": 0.6, "learning_rate": 6.72495123861877e-06, "loss": 0.0035, "reward": -0.107, "step": 4219 }, { "epoch": 0.6, "learning_rate": 6.6411969073655014e-06, "loss": 0.0037, "reward": -0.213, "step": 4229 }, { "epoch": 0.6, "learning_rate": 6.55692321720575e-06, "loss": 0.0029, "reward": -0.2639, "step": 4239 }, { "epoch": 0.6, "learning_rate": 6.472156836658903e-06, "loss": 0.0034, "reward": -0.1981, "step": 4249 }, { "epoch": 0.6, "learning_rate": 6.386924590156898e-06, "loss": 0.0035, "reward": -0.1908, "step": 4259 }, { "epoch": 0.6, "learning_rate": 6.301253449555531e-06, "loss": 0.0028, "reward": -0.2391, "step": 4269 }, { "epoch": 0.61, "learning_rate": 6.215170525599231e-06, "loss": 0.0032, "reward": -0.2446, "step": 4279 }, { "epoch": 0.61, "learning_rate": 6.128703059341789e-06, "loss": 0.0032, "reward": -0.2266, "step": 4289 }, { "epoch": 0.61, "learning_rate": 6.041878413525939e-06, "loss": 0.0032, "reward": -0.1362, "step": 4299 }, { "epoch": 0.61, "learning_rate": 5.9547240639243184e-06, "loss": 0.0035, "reward": -0.2071, "step": 4309 }, { "epoch": 0.61, "learning_rate": 5.867267590644787e-06, "loss": 0.0035, "reward": -0.2041, "step": 4319 }, { "epoch": 0.61, "learning_rate": 5.779536669402611e-06, "loss": 0.0029, "reward": -0.2125, "step": 4329 }, { "epoch": 0.61, "learning_rate": 5.6915590627625005e-06, "loss": 0.0033, "reward": -0.3047, "step": 4339 }, { "epoch": 0.62, "learning_rate": 5.60336261135307e-06, "loss": 0.0028, "reward": -0.2644, "step": 4349 }, { "epoch": 0.62, "learning_rate": 5.514975225056633e-06, "loss": 0.0032, "reward": -0.2142, "step": 4359 }, { "epoch": 0.62, "learning_rate": 5.4264248741771295e-06, "loss": 0.0036, "reward": -0.2406, "step": 4369 }, { "epoch": 0.62, "learning_rate": 5.337739580588822e-06, "loss": 0.0035, "reward": -0.4047, "step": 4379 }, { "epoch": 0.62, "learning_rate": 5.248947408868794e-06, "loss": 0.0034, "reward": -0.1619, "step": 4389 }, { "epoch": 0.62, "learning_rate": 5.16007645741583e-06, "loss": 0.0028, "reward": -0.1479, "step": 4399 }, { "epoch": 0.62, "learning_rate": 5.071154849558695e-06, "loss": 0.0034, "reward": -0.1027, "step": 4409 }, { "epoch": 0.63, "learning_rate": 4.982210724656409e-06, "loss": 0.0034, "reward": -0.2996, "step": 4419 }, { "epoch": 0.63, "learning_rate": 4.893272229193561e-06, "loss": 0.0028, "reward": -0.0919, "step": 4429 }, { "epoch": 0.63, "learning_rate": 4.804367507873277e-06, "loss": 0.003, "reward": -0.1089, "step": 4439 }, { "epoch": 0.63, "learning_rate": 4.715524694710839e-06, "loss": 0.0031, "reward": -0.1168, "step": 4449 }, { "epoch": 0.63, "learning_rate": 4.626771904130584e-06, "loss": 0.0034, "reward": -0.3128, "step": 4459 }, { "epoch": 0.63, "learning_rate": 4.538137222069105e-06, "loss": 0.0041, "reward": -0.1848, "step": 4469 }, { "epoch": 0.63, "learning_rate": 4.449648697087378e-06, "loss": 0.0032, "reward": -0.1865, "step": 4479 }, { "epoch": 0.64, "learning_rate": 4.361334331494812e-06, "loss": 0.0027, "reward": -0.1697, "step": 4489 }, { "epoch": 0.64, "learning_rate": 4.2732220724878194e-06, "loss": 0.0035, "reward": -0.1667, "step": 4499 }, { "epoch": 0.64, "learning_rate": 4.185339803305934e-06, "loss": 0.0038, "reward": -0.3111, "step": 4509 }, { "epoch": 0.64, "learning_rate": 4.097715334408112e-06, "loss": 0.003, "reward": -0.2045, "step": 4519 }, { "epoch": 0.64, "learning_rate": 4.010376394672062e-06, "loss": 0.0031, "reward": -0.152, "step": 4529 }, { "epoch": 0.64, "learning_rate": 3.9233506226194126e-06, "loss": 0.003, "reward": -0.2064, "step": 4539 }, { "epoch": 0.64, "learning_rate": 3.836665557669496e-06, "loss": 0.0034, "reward": -0.2407, "step": 4549 }, { "epoch": 0.65, "learning_rate": 3.750348631424402e-06, "loss": 0.0029, "reward": -0.1174, "step": 4559 }, { "epoch": 0.65, "learning_rate": 3.66442715898827e-06, "loss": 0.003, "reward": -0.2606, "step": 4569 }, { "epoch": 0.65, "learning_rate": 3.578928330323367e-06, "loss": 0.003, "reward": -0.1689, "step": 4579 }, { "epoch": 0.65, "learning_rate": 3.493879201645759e-06, "loss": 0.0028, "reward": -0.2151, "step": 4589 }, { "epoch": 0.65, "learning_rate": 3.409306686863399e-06, "loss": 0.0029, "reward": -0.1513, "step": 4599 }, { "epoch": 0.65, "learning_rate": 3.3252375490591217e-06, "loss": 0.0026, "reward": -0.0821, "step": 4609 }, { "epoch": 0.65, "learning_rate": 3.24169839202147e-06, "loss": 0.0034, "reward": -0.2608, "step": 4619 }, { "epoch": 0.66, "learning_rate": 3.158715651825871e-06, "loss": 0.0028, "reward": -0.132, "step": 4629 }, { "epoch": 0.66, "learning_rate": 3.076315588468941e-06, "loss": 0.0028, "reward": -0.0714, "step": 4639 }, { "epoch": 0.66, "learning_rate": 2.9945242775584143e-06, "loss": 0.0032, "reward": -0.2108, "step": 4649 }, { "epoch": 0.66, "learning_rate": 2.913367602061552e-06, "loss": 0.0027, "reward": -0.0474, "step": 4659 }, { "epoch": 0.66, "learning_rate": 2.832871244114375e-06, "loss": 0.0033, "reward": -0.287, "step": 4669 }, { "epoch": 0.66, "learning_rate": 2.753060676894588e-06, "loss": 0.0029, "reward": -0.2081, "step": 4679 }, { "epoch": 0.66, "learning_rate": 2.6739611565604947e-06, "loss": 0.0036, "reward": -0.1504, "step": 4689 }, { "epoch": 0.67, "learning_rate": 2.5955977142586946e-06, "loss": 0.0033, "reward": -0.2813, "step": 4699 }, { "epoch": 0.67, "learning_rate": 2.5179951482029225e-06, "loss": 0.0029, "reward": -0.3317, "step": 4709 }, { "epoch": 0.67, "learning_rate": 2.4411780158266533e-06, "loss": 0.0029, "reward": -0.1715, "step": 4719 }, { "epoch": 0.67, "learning_rate": 2.3651706260118184e-06, "loss": 0.0029, "reward": -0.1342, "step": 4729 }, { "epoch": 0.67, "learning_rate": 2.289997031396286e-06, "loss": 0.0032, "reward": -0.0324, "step": 4739 }, { "epoch": 0.67, "learning_rate": 2.215681020762313e-06, "loss": 0.0032, "reward": -0.1613, "step": 4749 }, { "epoch": 0.67, "learning_rate": 2.1422461115086167e-06, "loss": 0.0027, "reward": -0.2861, "step": 4759 }, { "epoch": 0.68, "learning_rate": 2.069715542208207e-06, "loss": 0.0026, "reward": -0.181, "step": 4769 }, { "epoch": 0.68, "learning_rate": 1.998112265254541e-06, "loss": 0.0028, "reward": -0.1974, "step": 4779 }, { "epoch": 0.68, "learning_rate": 1.9274589395981937e-06, "loss": 0.0036, "reward": -0.2612, "step": 4789 }, { "epoch": 0.68, "learning_rate": 1.8577779235764249e-06, "loss": 0.0031, "reward": -0.1949, "step": 4799 }, { "epoch": 0.68, "learning_rate": 1.7890912678378392e-06, "loss": 0.0033, "reward": -0.1398, "step": 4809 }, { "epoch": 0.68, "learning_rate": 1.7214207083644098e-06, "loss": 0.0032, "reward": -0.1068, "step": 4819 }, { "epoch": 0.68, "learning_rate": 1.6547876595931444e-06, "loss": 0.0028, "reward": -0.2427, "step": 4829 }, { "epoch": 0.69, "learning_rate": 1.5892132076394151e-06, "loss": 0.0031, "reward": -0.0524, "step": 4839 }, { "epoch": 0.69, "learning_rate": 1.524718103624252e-06, "loss": 0.0029, "reward": -0.1146, "step": 4849 }, { "epoch": 0.69, "learning_rate": 1.4613227571076138e-06, "loss": 0.0029, "reward": -0.1041, "step": 4859 }, { "epoch": 0.69, "learning_rate": 1.3990472296297808e-06, "loss": 0.0025, "reward": -0.2616, "step": 4869 }, { "epoch": 0.69, "learning_rate": 1.3379112283628081e-06, "loss": 0.0029, "reward": -0.187, "step": 4879 }, { "epoch": 0.69, "learning_rate": 1.2779340998742185e-06, "loss": 0.0032, "reward": -0.1955, "step": 4889 }, { "epoch": 0.69, "learning_rate": 1.219134824004704e-06, "loss": 0.0028, "reward": -0.106, "step": 4899 }, { "epoch": 0.7, "learning_rate": 1.16153200786198e-06, "loss": 0.0032, "reward": -0.2001, "step": 4909 }, { "epoch": 0.7, "learning_rate": 1.1051438799324999e-06, "loss": 0.0029, "reward": -0.1302, "step": 4919 }, { "epoch": 0.7, "learning_rate": 1.0499882843130487e-06, "loss": 0.0032, "reward": -0.2308, "step": 4929 }, { "epoch": 0.7, "learning_rate": 9.96082675063948e-07, "loss": 0.0028, "reward": -0.2366, "step": 4939 }, { "epoch": 0.7, "learning_rate": 9.43444110685714e-07, "loss": 0.003, "reward": -0.1516, "step": 4949 }, { "epoch": 0.7, "learning_rate": 8.920892487208343e-07, "loss": 0.0034, "reward": 0.0169, "step": 4959 }, { "epoch": 0.7, "learning_rate": 8.420343404825132e-07, "loss": 0.0026, "reward": -0.0979, "step": 4969 }, { "epoch": 0.7, "learning_rate": 7.932952259118776e-07, "loss": 0.003, "reward": -0.1379, "step": 4979 }, { "epoch": 0.71, "learning_rate": 7.458873285654489e-07, "loss": 0.0035, "reward": -0.0396, "step": 4989 }, { "epoch": 0.71, "learning_rate": 6.998256507343016e-07, "loss": 0.0027, "reward": 0.0041, "step": 4999 }, { "epoch": 0.71, "learning_rate": 6.551247686965872e-07, "loss": 0.0034, "reward": -0.1662, "step": 5009 }, { "epoch": 0.71, "learning_rate": 6.117988281048626e-07, "loss": 0.0032, "reward": -0.1413, "step": 5019 }, { "epoch": 0.71, "learning_rate": 5.698615395096485e-07, "loss": 0.0029, "reward": -0.2091, "step": 5029 }, { "epoch": 0.71, "learning_rate": 5.293261740207456e-07, "loss": 0.0028, "reward": -0.2433, "step": 5039 }, { "epoch": 0.71, "learning_rate": 4.902055591075355e-07, "loss": 0.0031, "reward": -0.2965, "step": 5049 }, { "epoch": 0.72, "learning_rate": 4.525120745397493e-07, "loss": 0.0029, "reward": -0.1276, "step": 5059 }, { "epoch": 0.72, "learning_rate": 4.1625764846984276e-07, "loss": 0.0031, "reward": -0.1481, "step": 5069 }, { "epoch": 0.72, "learning_rate": 3.814537536583318e-07, "loss": 0.0028, "reward": -0.1388, "step": 5079 }, { "epoch": 0.72, "learning_rate": 3.481114038432176e-07, "loss": 0.003, "reward": -0.2624, "step": 5089 }, { "epoch": 0.72, "learning_rate": 3.1624115025468695e-07, "loss": 0.0029, "reward": -0.0321, "step": 5099 }, { "epoch": 0.72, "learning_rate": 2.8585307827613764e-07, "loss": 0.0029, "reward": -0.2161, "step": 5109 }, { "epoch": 0.72, "learning_rate": 2.569568042526721e-07, "loss": 0.0035, "reward": -0.2493, "step": 5119 }, { "epoch": 0.73, "learning_rate": 2.2956147244796946e-07, "loss": 0.0031, "reward": -0.1123, "step": 5129 }, { "epoch": 0.73, "learning_rate": 2.0367575215059222e-07, "loss": 0.0031, "reward": -0.1892, "step": 5139 }, { "epoch": 0.73, "learning_rate": 1.7930783493055936e-07, "loss": 0.0031, "reward": -0.3653, "step": 5149 }, { "epoch": 0.73, "learning_rate": 1.5646543204712595e-07, "loss": 0.003, "reward": -0.1532, "step": 5159 }, { "epoch": 0.73, "learning_rate": 1.3515577200853946e-07, "loss": 0.0031, "reward": -0.2271, "step": 5169 }, { "epoch": 0.73, "learning_rate": 1.1538559828457586e-07, "loss": 0.0033, "reward": -0.1938, "step": 5179 }, { "epoch": 0.73, "learning_rate": 9.716116717254698e-08, "loss": 0.003, "reward": -0.2875, "step": 5189 }, { "epoch": 0.74, "learning_rate": 8.048824581750325e-08, "loss": 0.0032, "reward": -0.1349, "step": 5199 }, { "epoch": 0.74, "learning_rate": 6.537211038719571e-08, "loss": 0.0027, "reward": -0.1755, "step": 5209 }, { "epoch": 0.74, "learning_rate": 5.1817544402442686e-08, "loss": 0.0033, "reward": -0.1694, "step": 5219 }, { "epoch": 0.74, "learning_rate": 3.9828837223365166e-08, "loss": 0.0027, "reward": -0.1913, "step": 5229 }, { "epoch": 0.74, "learning_rate": 2.9409782692019218e-08, "loss": 0.0032, "reward": -0.2301, "step": 5239 }, { "epoch": 0.74, "learning_rate": 2.056367793183134e-08, "loss": 0.0032, "reward": -0.3049, "step": 5249 }, { "epoch": 0.74, "learning_rate": 1.3293322304213652e-08, "loss": 0.0034, "reward": -0.2132, "step": 5259 }, { "epoch": 0.75, "learning_rate": 7.601016522708616e-09, "loss": 0.0031, "reward": -0.2705, "step": 5269 }, { "epoch": 0.75, "learning_rate": 3.4885619249203086e-09, "loss": 0.0031, "reward": -0.1178, "step": 5279 }, { "epoch": 0.75, "learning_rate": 9.572599024820773e-10, "loss": 0.0031, "reward": -0.1919, "step": 5289 }, { "epoch": 0.75, "learning_rate": 7.911489227074853e-12, "loss": 0.0029, "reward": -0.0784, "step": 5299 }, { "epoch": 0.75, "learning_rate": 6.408171077015856e-10, "loss": 0.0031, "reward": -0.1285, "step": 5309 }, { "epoch": 0.75, "learning_rate": 2.8557764740955172e-09, "loss": 0.0031, "reward": -0.1184, "step": 5319 }, { "epoch": 0.75, "learning_rate": 6.652088661621703e-09, "loss": 0.003, "reward": -0.2125, "step": 5329 }, { "epoch": 0.76, "learning_rate": 1.2028552322327358e-08, "loss": 0.004, "reward": -0.202, "step": 5339 }, { "epoch": 0.76, "learning_rate": 1.898346606725887e-08, "loss": 0.0032, "reward": -0.206, "step": 5349 }, { "epoch": 0.76, "learning_rate": 2.7514629004871673e-08, "loss": 0.0033, "reward": -0.2027, "step": 5359 }, { "epoch": 0.76, "learning_rate": 3.761934143750256e-08, "loss": 0.0044, "reward": -0.2893, "step": 5369 }, { "epoch": 0.76, "learning_rate": 4.9294405715696324e-08, "loss": 0.0032, "reward": -0.1786, "step": 5379 }, { "epoch": 0.76, "learning_rate": 6.253612725009962e-08, "loss": 0.0033, "reward": -0.26, "step": 5389 }, { "epoch": 0.76, "learning_rate": 7.734031568062683e-08, "loss": 0.0024, "reward": -0.2568, "step": 5399 }, { "epoch": 0.77, "learning_rate": 9.370228620249778e-08, "loss": 0.0037, "reward": -0.1439, "step": 5409 }, { "epoch": 0.77, "learning_rate": 1.116168610487578e-07, "loss": 0.0028, "reward": -0.2807, "step": 5419 }, { "epoch": 0.77, "learning_rate": 1.3107837112877664e-07, "loss": 0.0032, "reward": -0.2143, "step": 5429 }, { "epoch": 0.77, "learning_rate": 1.5208065782225667e-07, "loss": 0.0031, "reward": -0.2846, "step": 5439 }, { "epoch": 0.77, "learning_rate": 1.7461707492811786e-07, "loss": 0.0032, "reward": -0.2704, "step": 5449 }, { "epoch": 0.77, "learning_rate": 1.9868049076771478e-07, "loss": 0.0035, "reward": -0.2247, "step": 5459 }, { "epoch": 0.77, "learning_rate": 2.2426329044164808e-07, "loss": 0.0032, "reward": -0.1316, "step": 5469 }, { "epoch": 0.78, "learning_rate": 2.5135737823952457e-07, "loss": 0.0033, "reward": -0.1427, "step": 5479 }, { "epoch": 0.78, "learning_rate": 2.7995418020185016e-07, "loss": 0.0029, "reward": -0.2073, "step": 5489 }, { "epoch": 0.78, "learning_rate": 3.100446468332596e-07, "loss": 0.0026, "reward": -0.1949, "step": 5499 }, { "epoch": 0.78, "learning_rate": 3.41619255966264e-07, "loss": 0.0028, "reward": -0.2821, "step": 5509 }, { "epoch": 0.78, "learning_rate": 3.746680157745258e-07, "loss": 0.0029, "reward": -0.1413, "step": 5519 }, { "epoch": 0.78, "learning_rate": 4.091804679348144e-07, "loss": 0.0032, "reward": -0.2282, "step": 5529 }, { "epoch": 0.78, "learning_rate": 4.451456909365265e-07, "loss": 0.003, "reward": -0.1473, "step": 5539 }, { "epoch": 0.79, "learning_rate": 4.825523035378365e-07, "loss": 0.0032, "reward": -0.1949, "step": 5549 }, { "epoch": 0.79, "learning_rate": 5.213884683672954e-07, "loss": 0.0041, "reward": -0.0118, "step": 5559 }, { "epoch": 0.79, "learning_rate": 5.61641895669785e-07, "loss": 0.0031, "reward": -0.1958, "step": 5569 }, { "epoch": 0.79, "learning_rate": 6.03299847195613e-07, "loss": 0.003, "reward": -0.0907, "step": 5579 }, { "epoch": 0.79, "learning_rate": 6.46349140231578e-07, "loss": 0.0032, "reward": -0.1633, "step": 5589 }, { "epoch": 0.79, "learning_rate": 6.907761517726225e-07, "loss": 0.0037, "reward": -0.3335, "step": 5599 }, { "epoch": 0.79, "learning_rate": 7.365668228328832e-07, "loss": 0.0032, "reward": -0.2729, "step": 5609 }, { "epoch": 0.8, "learning_rate": 7.837066628946427e-07, "loss": 0.004, "reward": -0.2201, "step": 5619 }, { "epoch": 0.8, "learning_rate": 8.321807544939037e-07, "loss": 0.0035, "reward": -0.1243, "step": 5629 }, { "epoch": 0.8, "learning_rate": 8.819737579410198e-07, "loss": 0.003, "reward": -0.1845, "step": 5639 }, { "epoch": 0.8, "learning_rate": 9.33069916174974e-07, "loss": 0.0032, "reward": -0.2037, "step": 5649 }, { "epoch": 0.8, "learning_rate": 9.854530597496953e-07, "loss": 0.0031, "reward": -0.0922, "step": 5659 }, { "epoch": 0.8, "learning_rate": 1.039106611950943e-06, "loss": 0.0025, "reward": -0.2618, "step": 5669 }, { "epoch": 0.8, "learning_rate": 1.0940135940419828e-06, "loss": 0.003, "reward": -0.1229, "step": 5679 }, { "epoch": 0.81, "learning_rate": 1.1501566306365758e-06, "loss": 0.0029, "reward": -0.0647, "step": 5689 }, { "epoch": 0.81, "learning_rate": 1.2075179551973986e-06, "loss": 0.003, "reward": -0.1913, "step": 5699 }, { "epoch": 0.81, "learning_rate": 1.2660794156583233e-06, "loss": 0.0028, "reward": -0.1541, "step": 5709 }, { "epoch": 0.81, "learning_rate": 1.3258224801686503e-06, "loss": 0.0033, "reward": -0.1554, "step": 5719 }, { "epoch": 0.81, "learning_rate": 1.3867282429575152e-06, "loss": 0.0041, "reward": -0.3137, "step": 5729 }, { "epoch": 0.81, "learning_rate": 1.4487774303166875e-06, "loss": 0.0033, "reward": -0.2465, "step": 5739 }, { "epoch": 0.81, "learning_rate": 1.511950406699712e-06, "loss": 0.0038, "reward": -0.2697, "step": 5749 }, { "epoch": 0.82, "learning_rate": 1.576227180935655e-06, "loss": 0.0032, "reward": -0.155, "step": 5759 }, { "epoch": 0.82, "learning_rate": 1.641587412555281e-06, "loss": 0.0026, "reward": -0.128, "step": 5769 }, { "epoch": 0.82, "learning_rate": 1.7080104182278716e-06, "loss": 0.0036, "reward": -0.1322, "step": 5779 }, { "epoch": 0.82, "learning_rate": 1.7754751783064622e-06, "loss": 0.0036, "reward": -0.2405, "step": 5789 }, { "epoch": 0.82, "learning_rate": 1.8439603434795516e-06, "loss": 0.0032, "reward": -0.1742, "step": 5799 }, { "epoch": 0.82, "learning_rate": 1.9134442415270926e-06, "loss": 0.003, "reward": -0.1415, "step": 5809 }, { "epoch": 0.82, "learning_rate": 1.983904884178742e-06, "loss": 0.0027, "reward": -0.3485, "step": 5819 }, { "epoch": 0.83, "learning_rate": 2.0553199740720214e-06, "loss": 0.0033, "reward": -0.2301, "step": 5829 }, { "epoch": 0.83, "learning_rate": 2.1276669118084043e-06, "loss": 0.0038, "reward": -0.419, "step": 5839 }, { "epoch": 0.83, "learning_rate": 2.200922803104868e-06, "loss": 0.0028, "reward": -0.1832, "step": 5849 }, { "epoch": 0.83, "learning_rate": 2.2750644660388675e-06, "loss": 0.0028, "reward": -0.1299, "step": 5859 }, { "epoch": 0.83, "learning_rate": 2.3500684383842605e-06, "loss": 0.0027, "reward": -0.0986, "step": 5869 }, { "epoch": 0.83, "learning_rate": 2.4259109850359695e-06, "loss": 0.0027, "reward": -0.1309, "step": 5879 }, { "epoch": 0.83, "learning_rate": 2.502568105520961e-06, "loss": 0.0034, "reward": -0.1672, "step": 5889 }, { "epoch": 0.84, "learning_rate": 2.580015541593277e-06, "loss": 0.0031, "reward": -0.1751, "step": 5899 }, { "epoch": 0.84, "learning_rate": 2.6582287849105274e-06, "loss": 0.0031, "reward": -0.2151, "step": 5909 }, { "epoch": 0.84, "learning_rate": 2.737183084789651e-06, "loss": 0.0029, "reward": -0.1455, "step": 5919 }, { "epoch": 0.84, "learning_rate": 2.816853456039244e-06, "loss": 0.0026, "reward": -0.1059, "step": 5929 }, { "epoch": 0.84, "learning_rate": 2.8972146868661828e-06, "loss": 0.0028, "reward": -0.1925, "step": 5939 }, { "epoch": 0.84, "learning_rate": 2.9782413468539417e-06, "loss": 0.0028, "reward": -0.3239, "step": 5949 }, { "epoch": 0.84, "learning_rate": 3.0599077950100608e-06, "loss": 0.0039, "reward": -0.2807, "step": 5959 }, { "epoch": 0.85, "learning_rate": 3.1421881878802864e-06, "loss": 0.0028, "reward": -0.2573, "step": 5969 }, { "epoch": 0.85, "learning_rate": 3.225056487726732e-06, "loss": 0.003, "reward": -0.2115, "step": 5979 }, { "epoch": 0.85, "learning_rate": 3.308486470767613e-06, "loss": 0.0028, "reward": -0.1847, "step": 5989 }, { "epoch": 0.85, "learning_rate": 3.3924517354757315e-06, "loss": 0.0027, "reward": -0.1512, "step": 5999 }, { "epoch": 0.85, "learning_rate": 3.476925710933339e-06, "loss": 0.0031, "reward": -0.0941, "step": 6009 }, { "epoch": 0.85, "learning_rate": 3.5618816652404714e-06, "loss": 0.0033, "reward": -0.2708, "step": 6019 }, { "epoch": 0.85, "learning_rate": 3.6472927139743546e-06, "loss": 0.0024, "reward": -0.1351, "step": 6029 }, { "epoch": 0.86, "learning_rate": 3.7331318286969787e-06, "loss": 0.003, "reward": -0.2288, "step": 6039 }, { "epoch": 0.86, "learning_rate": 3.819371845508301e-06, "loss": 0.0025, "reward": -0.1268, "step": 6049 }, { "epoch": 0.86, "learning_rate": 3.9059854736422616e-06, "loss": 0.0035, "reward": -0.1775, "step": 6059 }, { "epoch": 0.86, "learning_rate": 3.992945304103046e-06, "loss": 0.0029, "reward": -0.1164, "step": 6069 }, { "epoch": 0.86, "learning_rate": 4.080223818338627e-06, "loss": 0.003, "reward": -0.3306, "step": 6079 }, { "epoch": 0.86, "learning_rate": 4.16779339694911e-06, "loss": 0.0029, "reward": -0.124, "step": 6089 }, { "epoch": 0.86, "learning_rate": 4.25562632842687e-06, "loss": 0.0029, "reward": -0.1823, "step": 6099 }, { "epoch": 0.86, "learning_rate": 4.343694817925945e-06, "loss": 0.0026, "reward": -0.2144, "step": 6109 }, { "epoch": 0.87, "learning_rate": 4.4319709960577205e-06, "loss": 0.0028, "reward": -0.0664, "step": 6119 }, { "epoch": 0.87, "learning_rate": 4.520426927710255e-06, "loss": 0.0027, "reward": -0.1177, "step": 6129 }, { "epoch": 0.87, "learning_rate": 4.609034620888349e-06, "loss": 0.0034, "reward": -0.135, "step": 6139 }, { "epoch": 0.87, "learning_rate": 4.697766035571671e-06, "loss": 0.0043, "reward": -0.3131, "step": 6149 }, { "epoch": 0.87, "learning_rate": 4.786593092588086e-06, "loss": 0.0024, "reward": -0.234, "step": 6159 }, { "epoch": 0.87, "learning_rate": 4.875487682499278e-06, "loss": 0.0027, "reward": -0.0573, "step": 6169 }, { "epoch": 0.87, "learning_rate": 4.96442167449609e-06, "loss": 0.0034, "reward": -0.1771, "step": 6179 }, { "epoch": 0.88, "learning_rate": 5.053366925300511e-06, "loss": 0.0028, "reward": -0.1481, "step": 6189 }, { "epoch": 0.88, "learning_rate": 5.142295288071675e-06, "loss": 0.0041, "reward": -0.2635, "step": 6199 }, { "epoch": 0.88, "learning_rate": 5.2311786213129315e-06, "loss": 0.003, "reward": -0.1914, "step": 6209 }, { "epoch": 0.88, "learning_rate": 5.319988797777316e-06, "loss": 0.0027, "reward": -0.181, "step": 6219 }, { "epoch": 0.88, "learning_rate": 5.408697713368388e-06, "loss": 0.0028, "reward": -0.1659, "step": 6229 }, { "epoch": 0.88, "learning_rate": 5.4972772960338784e-06, "loss": 0.0024, "reward": -0.0837, "step": 6239 }, { "epoch": 0.88, "learning_rate": 5.585699514649057e-06, "loss": 0.003, "reward": -0.1156, "step": 6249 }, { "epoch": 0.89, "learning_rate": 5.6739363878872754e-06, "loss": 0.003, "reward": -0.1918, "step": 6259 }, { "epoch": 0.89, "learning_rate": 5.761959993074669e-06, "loss": 0.0027, "reward": -0.2119, "step": 6269 }, { "epoch": 0.89, "learning_rate": 5.84974247502633e-06, "loss": 0.0031, "reward": -0.1654, "step": 6279 }, { "epoch": 0.89, "learning_rate": 5.93725605486109e-06, "loss": 0.0029, "reward": -0.1048, "step": 6289 }, { "epoch": 0.89, "learning_rate": 6.024473038792242e-06, "loss": 0.0029, "reward": -0.1743, "step": 6299 }, { "epoch": 0.89, "learning_rate": 6.1113658268912065e-06, "loss": 0.0028, "reward": -0.1883, "step": 6309 }, { "epoch": 0.89, "learning_rate": 6.197906921821632e-06, "loss": 0.0027, "reward": -0.1351, "step": 6319 }, { "epoch": 0.9, "learning_rate": 6.284068937540894e-06, "loss": 0.0027, "reward": -0.1687, "step": 6329 }, { "epoch": 0.9, "learning_rate": 6.369824607966508e-06, "loss": 0.0028, "reward": -0.1027, "step": 6339 }, { "epoch": 0.9, "learning_rate": 6.455146795604479e-06, "loss": 0.0028, "reward": -0.0773, "step": 6349 }, { "epoch": 0.9, "learning_rate": 6.5400085001370186e-06, "loss": 0.0025, "reward": -0.2412, "step": 6359 }, { "epoch": 0.9, "learning_rate": 6.624382866966794e-06, "loss": 0.003, "reward": -0.1633, "step": 6369 }, { "epoch": 0.9, "learning_rate": 6.708243195715136e-06, "loss": 0.0026, "reward": -0.2322, "step": 6379 }, { "epoch": 0.9, "learning_rate": 6.791562948671414e-06, "loss": 0.0032, "reward": -0.1261, "step": 6389 }, { "epoch": 0.91, "learning_rate": 6.874315759190886e-06, "loss": 0.0025, "reward": -0.1441, "step": 6399 }, { "epoch": 0.91, "learning_rate": 6.956475440038507e-06, "loss": 0.0031, "reward": -0.0489, "step": 6409 }, { "epoch": 0.91, "learning_rate": 7.038015991675885e-06, "loss": 0.0028, "reward": -0.1033, "step": 6419 }, { "epoch": 0.91, "learning_rate": 7.118911610488885e-06, "loss": 0.0027, "reward": -0.2106, "step": 6429 }, { "epoch": 0.91, "learning_rate": 7.199136696953178e-06, "loss": 0.0036, "reward": -0.0478, "step": 6439 }, { "epoch": 0.91, "learning_rate": 7.2786658637353125e-06, "loss": 0.003, "reward": -0.218, "step": 6449 }, { "epoch": 0.91, "learning_rate": 7.357473943726493e-06, "loss": 0.0026, "reward": -0.0883, "step": 6459 }, { "epoch": 0.92, "learning_rate": 7.435535998006814e-06, "loss": 0.0034, "reward": -0.2207, "step": 6469 }, { "epoch": 0.92, "learning_rate": 7.512827323737152e-06, "loss": 0.0033, "reward": -0.192, "step": 6479 }, { "epoch": 0.92, "learning_rate": 7.589323461976461e-06, "loss": 0.0028, "reward": -0.1123, "step": 6489 }, { "epoch": 0.92, "learning_rate": 7.665000205421812e-06, "loss": 0.0029, "reward": -0.2671, "step": 6499 }, { "epoch": 0.92, "learning_rate": 7.739833606068832e-06, "loss": 0.0031, "reward": -0.1165, "step": 6509 }, { "epoch": 0.92, "learning_rate": 7.81379998279006e-06, "loss": 0.0029, "reward": -0.1378, "step": 6519 }, { "epoch": 0.92, "learning_rate": 7.88687592882892e-06, "loss": 0.0033, "reward": -0.1337, "step": 6529 }, { "epoch": 0.93, "learning_rate": 7.959038319206758e-06, "loss": 0.0027, "reward": -0.2007, "step": 6539 }, { "epoch": 0.93, "learning_rate": 8.03026431804083e-06, "loss": 0.0026, "reward": -0.1243, "step": 6549 }, { "epoch": 0.93, "learning_rate": 8.100531385770696e-06, "loss": 0.0029, "reward": -0.0918, "step": 6559 }, { "epoch": 0.93, "learning_rate": 8.169817286290935e-06, "loss": 0.0029, "reward": -0.0355, "step": 6569 }, { "epoch": 0.93, "learning_rate": 8.238100093987765e-06, "loss": 0.0027, "reward": -0.2479, "step": 6579 }, { "epoch": 0.93, "learning_rate": 8.305358200677449e-06, "loss": 0.0026, "reward": -0.074, "step": 6589 }, { "epoch": 0.93, "learning_rate": 8.371570322444209e-06, "loss": 0.0029, "reward": -0.0884, "step": 6599 }, { "epoch": 0.94, "learning_rate": 8.436715506375557e-06, "loss": 0.0028, "reward": -0.1674, "step": 6609 }, { "epoch": 0.94, "learning_rate": 8.500773137192906e-06, "loss": 0.0035, "reward": -0.2794, "step": 6619 }, { "epoch": 0.94, "learning_rate": 8.56372294377524e-06, "loss": 0.0027, "reward": -0.1072, "step": 6629 }, { "epoch": 0.94, "learning_rate": 8.625545005574002e-06, "loss": 0.0023, "reward": -0.0992, "step": 6639 }, { "epoch": 0.94, "learning_rate": 8.686219758916918e-06, "loss": 0.0029, "reward": -0.1256, "step": 6649 }, { "epoch": 0.94, "learning_rate": 8.745728003198991e-06, "loss": 0.0039, "reward": 0.0413, "step": 6659 }, { "epoch": 0.94, "learning_rate": 8.804050906958537e-06, "loss": 0.0026, "reward": -0.1593, "step": 6669 }, { "epoch": 0.95, "learning_rate": 8.861170013836436e-06, "loss": 0.0031, "reward": -0.0352, "step": 6679 }, { "epoch": 0.95, "learning_rate": 8.917067248416647e-06, "loss": 0.003, "reward": -0.0722, "step": 6689 }, { "epoch": 0.95, "learning_rate": 8.971724921946235e-06, "loss": 0.0027, "reward": -0.1898, "step": 6699 }, { "epoch": 0.95, "learning_rate": 9.025125737932962e-06, "loss": 0.003, "reward": -0.1166, "step": 6709 }, { "epoch": 0.95, "learning_rate": 9.077252797618818e-06, "loss": 0.0024, "reward": 0.0003, "step": 6719 }, { "epoch": 0.95, "learning_rate": 9.128089605327627e-06, "loss": 0.0028, "reward": -0.0084, "step": 6729 }, { "epoch": 0.95, "learning_rate": 9.177620073685139e-06, "loss": 0.0025, "reward": 0.0006, "step": 6739 }, { "epoch": 0.96, "learning_rate": 9.225828528709911e-06, "loss": 0.0025, "reward": -0.0565, "step": 6749 }, { "epoch": 0.96, "learning_rate": 9.27269971477334e-06, "loss": 0.0027, "reward": -0.2262, "step": 6759 }, { "epoch": 0.96, "learning_rate": 9.318218799427321e-06, "loss": 0.0024, "reward": -0.2918, "step": 6769 }, { "epoch": 0.96, "learning_rate": 9.362371378098033e-06, "loss": 0.0027, "reward": -0.1157, "step": 6779 }, { "epoch": 0.96, "learning_rate": 9.405143478644232e-06, "loss": 0.0031, "reward": -0.1187, "step": 6789 }, { "epoch": 0.96, "learning_rate": 9.446521565778804e-06, "loss": 0.0028, "reward": -0.0971, "step": 6799 }, { "epoch": 0.96, "learning_rate": 9.486492545351985e-06, "loss": 0.0029, "reward": -0.0164, "step": 6809 }, { "epoch": 0.97, "learning_rate": 9.525043768495047e-06, "loss": 0.0029, "reward": -0.2, "step": 6819 }, { "epoch": 0.97, "learning_rate": 9.562163035623038e-06, "loss": 0.0022, "reward": 0.0569, "step": 6829 }, { "epoch": 0.97, "learning_rate": 9.597838600295355e-06, "loss": 0.0023, "reward": -0.163, "step": 6839 }, { "epoch": 0.97, "learning_rate": 9.632059172932935e-06, "loss": 0.0027, "reward": -0.1407, "step": 6849 }, { "epoch": 0.97, "learning_rate": 9.664813924390828e-06, "loss": 0.0026, "reward": -0.1893, "step": 6859 }, { "epoch": 0.97, "learning_rate": 9.696092489385132e-06, "loss": 0.0027, "reward": -0.0518, "step": 6869 }, { "epoch": 0.97, "learning_rate": 9.72588496977306e-06, "loss": 0.0026, "reward": -0.1236, "step": 6879 }, { "epoch": 0.98, "learning_rate": 9.754181937685261e-06, "loss": 0.0026, "reward": -0.2622, "step": 6889 }, { "epoch": 0.98, "learning_rate": 9.780974438509254e-06, "loss": 0.0025, "reward": -0.1576, "step": 6899 }, { "epoch": 0.98, "learning_rate": 9.80625399372313e-06, "loss": 0.0036, "reward": -0.0548, "step": 6909 }, { "epoch": 0.98, "learning_rate": 9.8300126035786e-06, "loss": 0.0037, "reward": -0.219, "step": 6919 }, { "epoch": 0.98, "learning_rate": 9.852242749632524e-06, "loss": 0.0028, "reward": -0.0597, "step": 6929 }, { "epoch": 0.98, "learning_rate": 9.872937397126121e-06, "loss": 0.0024, "reward": -0.133, "step": 6939 }, { "epoch": 0.98, "learning_rate": 9.89208999721115e-06, "loss": 0.0027, "reward": -0.0708, "step": 6949 }, { "epoch": 0.99, "learning_rate": 9.909694489022273e-06, "loss": 0.0026, "reward": -0.0812, "step": 6959 }, { "epoch": 0.99, "learning_rate": 9.92574530159505e-06, "loss": 0.0031, "reward": 0.0002, "step": 6969 }, { "epoch": 0.99, "learning_rate": 9.940237355628861e-06, "loss": 0.003, "reward": 0.0289, "step": 6979 }, { "epoch": 0.99, "learning_rate": 9.95316606509427e-06, "loss": 0.0029, "reward": -0.0353, "step": 6989 }, { "epoch": 0.99, "learning_rate": 9.964527338684262e-06, "loss": 0.0023, "reward": -0.0618, "step": 6999 }, { "epoch": 0.99, "learning_rate": 9.974317581108963e-06, "loss": 0.003, "reward": -0.1512, "step": 7009 }, { "epoch": 0.99, "learning_rate": 9.98253369423336e-06, "loss": 0.0032, "reward": 0.0394, "step": 7019 }, { "epoch": 1.0, "learning_rate": 9.989173078057715e-06, "loss": 0.0027, "reward": -0.081, "step": 7029 }, { "epoch": 1.0, "learning_rate": 9.99423363154034e-06, "loss": 0.0028, "reward": -0.0309, "step": 7039 }, { "epoch": 1.0, "learning_rate": 9.99771375326247e-06, "loss": 0.0025, "reward": -0.1167, "step": 7049 }, { "epoch": 1.0, "learning_rate": 9.999612341935039e-06, "loss": 0.0024, "reward": -0.0893, "step": 7059 } ], "max_steps": 7063, "num_train_epochs": 1.0, "total_flos": 0, "trial_name": null, "trial_params": null }