| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 16517, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.991281709753588e-05, |
| "loss": 1.7472, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.981594720590907e-05, |
| "loss": 1.7753, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.971907731428226e-05, |
| "loss": 1.7184, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 7.962220742265544e-05, |
| "loss": 1.7141, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.952533753102863e-05, |
| "loss": 1.7126, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.942846763940184e-05, |
| "loss": 1.7041, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.933159774777503e-05, |
| "loss": 1.7034, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.923472785614822e-05, |
| "loss": 1.7561, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.913785796452141e-05, |
| "loss": 1.7129, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.90409880728946e-05, |
| "loss": 1.7346, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.894411818126779e-05, |
| "loss": 1.7077, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.884724828964099e-05, |
| "loss": 1.6912, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.875037839801418e-05, |
| "loss": 1.7212, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.865350850638737e-05, |
| "loss": 1.6983, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.855663861476056e-05, |
| "loss": 1.7179, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.845976872313375e-05, |
| "loss": 1.7045, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.836289883150694e-05, |
| "loss": 1.728, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.826602893988014e-05, |
| "loss": 1.7014, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.816915904825333e-05, |
| "loss": 1.653, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.807228915662652e-05, |
| "loss": 1.6554, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.797541926499971e-05, |
| "loss": 1.6843, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.78785493733729e-05, |
| "loss": 1.6688, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.778167948174609e-05, |
| "loss": 1.7169, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.768480959011928e-05, |
| "loss": 1.7075, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.758793969849247e-05, |
| "loss": 1.683, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.749106980686566e-05, |
| "loss": 1.7154, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.739419991523885e-05, |
| "loss": 1.6954, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.729733002361204e-05, |
| "loss": 1.7439, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.720046013198524e-05, |
| "loss": 1.6487, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.710359024035843e-05, |
| "loss": 1.703, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.700672034873162e-05, |
| "loss": 1.6851, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.690985045710481e-05, |
| "loss": 1.6851, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.6812980565478e-05, |
| "loss": 1.7042, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.671611067385119e-05, |
| "loss": 1.7165, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.661924078222438e-05, |
| "loss": 1.6873, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.652237089059757e-05, |
| "loss": 1.6847, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 7.642550099897076e-05, |
| "loss": 1.6251, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.632863110734395e-05, |
| "loss": 1.6964, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.623176121571715e-05, |
| "loss": 1.6821, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.613489132409034e-05, |
| "loss": 1.6484, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.603802143246353e-05, |
| "loss": 1.7012, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.594115154083672e-05, |
| "loss": 1.7086, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.584428164920991e-05, |
| "loss": 1.6811, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.57474117575831e-05, |
| "loss": 1.6587, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.565054186595629e-05, |
| "loss": 1.6945, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.555367197432948e-05, |
| "loss": 1.6781, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.545680208270267e-05, |
| "loss": 1.6977, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.535993219107586e-05, |
| "loss": 1.6935, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.526306229944905e-05, |
| "loss": 1.6759, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.516619240782225e-05, |
| "loss": 1.6708, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.507900950535812e-05, |
| "loss": 1.7009, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.498213961373132e-05, |
| "loss": 1.6572, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.488526972210451e-05, |
| "loss": 1.705, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.47883998304777e-05, |
| "loss": 1.6606, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.469152993885089e-05, |
| "loss": 1.6746, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.459466004722408e-05, |
| "loss": 1.6656, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.449779015559727e-05, |
| "loss": 1.6738, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.440092026397046e-05, |
| "loss": 1.6524, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.430405037234365e-05, |
| "loss": 1.6875, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.420718048071684e-05, |
| "loss": 1.6954, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 7.411031058909003e-05, |
| "loss": 1.6745, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.401344069746322e-05, |
| "loss": 1.6925, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.391657080583642e-05, |
| "loss": 1.6939, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.381970091420961e-05, |
| "loss": 1.6746, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.37228310225828e-05, |
| "loss": 1.707, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.362596113095599e-05, |
| "loss": 1.6802, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.352909123932918e-05, |
| "loss": 1.6477, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.343222134770237e-05, |
| "loss": 1.6885, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.333535145607556e-05, |
| "loss": 1.6761, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 7.323848156444875e-05, |
| "loss": 1.6532, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.314161167282194e-05, |
| "loss": 1.6493, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.304474178119513e-05, |
| "loss": 1.654, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.295271538414967e-05, |
| "loss": 1.7094, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.285584549252286e-05, |
| "loss": 1.6744, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.275897560089605e-05, |
| "loss": 1.6327, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.266210570926924e-05, |
| "loss": 1.7002, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.256523581764243e-05, |
| "loss": 1.6273, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 7.246836592601563e-05, |
| "loss": 1.6569, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.237149603438882e-05, |
| "loss": 1.6679, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.227462614276201e-05, |
| "loss": 1.6805, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.21777562511352e-05, |
| "loss": 1.6357, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.208088635950839e-05, |
| "loss": 1.6735, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.198401646788158e-05, |
| "loss": 1.7039, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.188714657625478e-05, |
| "loss": 1.6771, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.179027668462797e-05, |
| "loss": 1.6594, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.169340679300116e-05, |
| "loss": 1.6508, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.159653690137435e-05, |
| "loss": 1.695, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.149966700974754e-05, |
| "loss": 1.7088, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.140279711812073e-05, |
| "loss": 1.687, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.130592722649393e-05, |
| "loss": 1.6499, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.120905733486712e-05, |
| "loss": 1.6608, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.111218744324031e-05, |
| "loss": 1.7031, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.10153175516135e-05, |
| "loss": 1.6258, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 7.091844765998669e-05, |
| "loss": 1.6773, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.082157776835988e-05, |
| "loss": 1.6606, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.072470787673307e-05, |
| "loss": 1.6672, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.062783798510626e-05, |
| "loss": 1.6583, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.053096809347945e-05, |
| "loss": 1.6742, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.043409820185264e-05, |
| "loss": 1.6509, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.033722831022584e-05, |
| "loss": 1.7019, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.024035841859903e-05, |
| "loss": 1.6296, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.014348852697222e-05, |
| "loss": 1.6784, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 7.004661863534541e-05, |
| "loss": 1.6412, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.99497487437186e-05, |
| "loss": 1.6583, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.985287885209179e-05, |
| "loss": 1.6388, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.975600896046498e-05, |
| "loss": 1.6553, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.965913906883817e-05, |
| "loss": 1.6597, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.956226917721136e-05, |
| "loss": 1.6743, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.946539928558455e-05, |
| "loss": 1.651, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.936852939395774e-05, |
| "loss": 1.6682, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.927165950233094e-05, |
| "loss": 1.6666, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.917478961070413e-05, |
| "loss": 1.681, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.907791971907732e-05, |
| "loss": 1.6866, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.898104982745051e-05, |
| "loss": 1.664, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.88841799358237e-05, |
| "loss": 1.6581, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.878731004419689e-05, |
| "loss": 1.6411, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.869044015257008e-05, |
| "loss": 1.6443, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.859357026094327e-05, |
| "loss": 1.6331, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 6.849670036931646e-05, |
| "loss": 1.6616, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.839983047768965e-05, |
| "loss": 1.6334, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.830296058606285e-05, |
| "loss": 1.6791, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.820609069443604e-05, |
| "loss": 1.6658, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.810922080280923e-05, |
| "loss": 1.648, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.801235091118242e-05, |
| "loss": 1.6717, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.791548101955561e-05, |
| "loss": 1.6567, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.78186111279288e-05, |
| "loss": 1.6754, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.772174123630199e-05, |
| "loss": 1.6728, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.76248713446752e-05, |
| "loss": 1.6699, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.752800145304838e-05, |
| "loss": 1.7137, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.743113156142157e-05, |
| "loss": 1.6644, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.733426166979476e-05, |
| "loss": 1.6794, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.723739177816795e-05, |
| "loss": 1.6934, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.714052188654114e-05, |
| "loss": 1.7115, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.704365199491434e-05, |
| "loss": 1.7005, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.694678210328753e-05, |
| "loss": 1.6568, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 6.684991221166072e-05, |
| "loss": 1.6423, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.675304232003391e-05, |
| "loss": 1.6507, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.66561724284071e-05, |
| "loss": 1.6431, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.65593025367803e-05, |
| "loss": 1.6746, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.646243264515348e-05, |
| "loss": 1.6981, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.636556275352667e-05, |
| "loss": 1.6163, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.626869286189988e-05, |
| "loss": 1.6597, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.617182297027307e-05, |
| "loss": 1.6613, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 6.607495307864626e-05, |
| "loss": 1.6561, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.597808318701944e-05, |
| "loss": 1.7012, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.588121329539263e-05, |
| "loss": 1.6617, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.578434340376582e-05, |
| "loss": 1.6302, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.568747351213901e-05, |
| "loss": 1.6831, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.55906036205122e-05, |
| "loss": 1.6385, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.549373372888539e-05, |
| "loss": 1.6413, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.539686383725858e-05, |
| "loss": 1.6389, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 6.529999394563177e-05, |
| "loss": 1.6309, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.520312405400498e-05, |
| "loss": 1.6157, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.510625416237817e-05, |
| "loss": 1.6425, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.500938427075136e-05, |
| "loss": 1.648, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.491251437912454e-05, |
| "loss": 1.6576, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.481564448749773e-05, |
| "loss": 1.6705, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.471877459587092e-05, |
| "loss": 1.6638, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.462190470424411e-05, |
| "loss": 1.688, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.45250348126173e-05, |
| "loss": 1.6642, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 6.442816492099049e-05, |
| "loss": 1.6906, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.433129502936368e-05, |
| "loss": 1.6478, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.423442513773689e-05, |
| "loss": 1.6413, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.413755524611008e-05, |
| "loss": 1.6741, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.404068535448327e-05, |
| "loss": 1.651, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.394381546285645e-05, |
| "loss": 1.6515, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.384694557122964e-05, |
| "loss": 1.6146, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.375007567960283e-05, |
| "loss": 1.6759, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 6.365320578797602e-05, |
| "loss": 1.6573, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.355633589634921e-05, |
| "loss": 1.6676, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.34594660047224e-05, |
| "loss": 1.6448, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.33625961130956e-05, |
| "loss": 1.6594, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.327541321063148e-05, |
| "loss": 1.6245, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.318338681358601e-05, |
| "loss": 1.67, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.30865169219592e-05, |
| "loss": 1.6262, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.298964703033239e-05, |
| "loss": 1.6462, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 6.289277713870558e-05, |
| "loss": 1.6803, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.279590724707877e-05, |
| "loss": 1.6403, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.269903735545196e-05, |
| "loss": 1.653, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.260216746382515e-05, |
| "loss": 1.6777, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.250529757219834e-05, |
| "loss": 1.6302, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.240842768057154e-05, |
| "loss": 1.7027, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.231155778894473e-05, |
| "loss": 1.636, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.221468789731792e-05, |
| "loss": 1.665, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 6.211781800569111e-05, |
| "loss": 1.6566, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.20209481140643e-05, |
| "loss": 1.6898, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.192407822243749e-05, |
| "loss": 1.6244, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.182720833081068e-05, |
| "loss": 1.6511, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.173033843918387e-05, |
| "loss": 1.6601, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.163346854755706e-05, |
| "loss": 1.6207, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.153659865593025e-05, |
| "loss": 1.6243, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.143972876430344e-05, |
| "loss": 1.6506, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.134285887267664e-05, |
| "loss": 1.6521, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 6.124598898104983e-05, |
| "loss": 1.6568, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.114911908942302e-05, |
| "loss": 1.6896, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.105224919779621e-05, |
| "loss": 1.647, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.095537930616941e-05, |
| "loss": 1.6389, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.08585094145426e-05, |
| "loss": 1.6302, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.076163952291579e-05, |
| "loss": 1.6465, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.066476963128898e-05, |
| "loss": 1.6929, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.056789973966217e-05, |
| "loss": 1.7165, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.047102984803536e-05, |
| "loss": 1.6415, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.037415995640856e-05, |
| "loss": 1.6101, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.027729006478175e-05, |
| "loss": 1.6642, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.018042017315494e-05, |
| "loss": 1.6804, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.008355028152813e-05, |
| "loss": 1.6298, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 5.998668038990132e-05, |
| "loss": 1.6838, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 5.988981049827451e-05, |
| "loss": 1.6595, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 5.97929406066477e-05, |
| "loss": 1.6489, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 5.969607071502089e-05, |
| "loss": 1.6692, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.9599200823394084e-05, |
| "loss": 1.6966, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.9502330931767274e-05, |
| "loss": 1.646, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.9405461040140463e-05, |
| "loss": 1.6557, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.930859114851366e-05, |
| "loss": 1.6043, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.921172125688685e-05, |
| "loss": 1.6096, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.911485136526004e-05, |
| "loss": 1.6354, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.9017981473633236e-05, |
| "loss": 1.6252, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 5.8921111582006425e-05, |
| "loss": 1.6732, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8824241690379615e-05, |
| "loss": 1.6357, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8727371798752805e-05, |
| "loss": 1.6793, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8630501907125994e-05, |
| "loss": 1.6414, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8533632015499184e-05, |
| "loss": 1.6417, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8436762123872374e-05, |
| "loss": 1.6241, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.833989223224558e-05, |
| "loss": 1.6493, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8243022340618767e-05, |
| "loss": 1.6638, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8146152448991956e-05, |
| "loss": 1.6724, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 5.8049282557365146e-05, |
| "loss": 1.6689, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.7952412665738336e-05, |
| "loss": 1.6383, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.7855542774111525e-05, |
| "loss": 1.6477, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.7758672882484715e-05, |
| "loss": 1.6391, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.7661802990857905e-05, |
| "loss": 1.6811, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.7564933099231094e-05, |
| "loss": 1.6605, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.746806320760429e-05, |
| "loss": 1.6345, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.737119331597748e-05, |
| "loss": 1.6144, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 5.727432342435068e-05, |
| "loss": 1.6525, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.7177453532723866e-05, |
| "loss": 1.6995, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.7080583641097056e-05, |
| "loss": 1.6183, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.6983713749470246e-05, |
| "loss": 1.6517, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.688684385784344e-05, |
| "loss": 1.6354, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.678997396621663e-05, |
| "loss": 1.6626, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.669310407458982e-05, |
| "loss": 1.6677, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.660107767754436e-05, |
| "loss": 1.6899, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 5.650420778591755e-05, |
| "loss": 1.6313, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.640733789429074e-05, |
| "loss": 1.6399, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.631046800266393e-05, |
| "loss": 1.6035, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.6213598111037117e-05, |
| "loss": 1.6412, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.6116728219410306e-05, |
| "loss": 1.6152, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.6019858327783496e-05, |
| "loss": 1.6289, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.5922988436156686e-05, |
| "loss": 1.6329, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.582611854452988e-05, |
| "loss": 1.6427, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 5.572924865290308e-05, |
| "loss": 1.6263, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.563237876127627e-05, |
| "loss": 1.628, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.553550886964946e-05, |
| "loss": 1.59, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.543863897802265e-05, |
| "loss": 1.6063, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.534176908639584e-05, |
| "loss": 1.6684, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.5244899194769034e-05, |
| "loss": 1.6565, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.514802930314222e-05, |
| "loss": 1.6514, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.505115941151541e-05, |
| "loss": 1.6782, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.49542895198886e-05, |
| "loss": 1.6394, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 5.485741962826179e-05, |
| "loss": 1.6535, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.476054973663498e-05, |
| "loss": 1.6642, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.4663679845008185e-05, |
| "loss": 1.65, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.4566809953381375e-05, |
| "loss": 1.653, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.4469940061754564e-05, |
| "loss": 1.6605, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.4373070170127754e-05, |
| "loss": 1.6758, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.4276200278500944e-05, |
| "loss": 1.6901, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.4179330386874133e-05, |
| "loss": 1.678, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 5.408246049524732e-05, |
| "loss": 1.6638, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.398559060362051e-05, |
| "loss": 1.6284, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.38887207119937e-05, |
| "loss": 1.6429, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.379185082036689e-05, |
| "loss": 1.6303, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.3694980928740095e-05, |
| "loss": 1.6324, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.3598111037113285e-05, |
| "loss": 1.6158, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.3501241145486475e-05, |
| "loss": 1.655, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.3404371253859664e-05, |
| "loss": 1.6224, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 5.3307501362232854e-05, |
| "loss": 1.6428, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.3210631470606044e-05, |
| "loss": 1.681, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.311376157897924e-05, |
| "loss": 1.6493, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.301689168735243e-05, |
| "loss": 1.6387, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.292002179572562e-05, |
| "loss": 1.6817, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.282315190409881e-05, |
| "loss": 1.6243, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.2726282012472e-05, |
| "loss": 1.5869, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.2629412120845195e-05, |
| "loss": 1.6693, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 5.253254222921839e-05, |
| "loss": 1.6266, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.243567233759158e-05, |
| "loss": 1.6395, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.233880244596477e-05, |
| "loss": 1.6288, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.2246776048919294e-05, |
| "loss": 1.7032, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.214990615729249e-05, |
| "loss": 1.6419, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.205303626566569e-05, |
| "loss": 1.6131, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.1956166374038876e-05, |
| "loss": 1.6691, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.1859296482412066e-05, |
| "loss": 1.656, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.1762426590785256e-05, |
| "loss": 1.64, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 5.1665556699158445e-05, |
| "loss": 1.6653, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.1568686807531635e-05, |
| "loss": 1.6299, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.147181691590483e-05, |
| "loss": 1.6345, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.137494702427802e-05, |
| "loss": 1.6253, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.127807713265121e-05, |
| "loss": 1.6713, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.11812072410244e-05, |
| "loss": 1.6141, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.108433734939759e-05, |
| "loss": 1.6324, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.0987467457770787e-05, |
| "loss": 1.6248, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 5.089059756614398e-05, |
| "loss": 1.615, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.079372767451717e-05, |
| "loss": 1.6247, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.069685778289036e-05, |
| "loss": 1.6629, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.059998789126355e-05, |
| "loss": 1.6665, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.050311799963674e-05, |
| "loss": 1.6781, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.040624810800993e-05, |
| "loss": 1.7025, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.030937821638312e-05, |
| "loss": 1.6107, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.021250832475631e-05, |
| "loss": 1.6079, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 5.01156384331295e-05, |
| "loss": 1.5942, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 5.0018768541502704e-05, |
| "loss": 1.662, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.992189864987589e-05, |
| "loss": 1.6141, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.982502875824908e-05, |
| "loss": 1.6506, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.972815886662227e-05, |
| "loss": 1.6635, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.963128897499546e-05, |
| "loss": 1.6342, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.953441908336865e-05, |
| "loss": 1.6126, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.943754919174184e-05, |
| "loss": 1.6632, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.934067930011504e-05, |
| "loss": 1.6529, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.924380940848823e-05, |
| "loss": 1.6363, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.914693951686142e-05, |
| "loss": 1.6081, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.905006962523461e-05, |
| "loss": 1.6618, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.8953199733607803e-05, |
| "loss": 1.6469, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.885632984198099e-05, |
| "loss": 1.6323, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.875945995035419e-05, |
| "loss": 1.6043, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.866259005872738e-05, |
| "loss": 1.6382, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.856572016710057e-05, |
| "loss": 1.6343, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.846885027547376e-05, |
| "loss": 1.64, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.837198038384695e-05, |
| "loss": 1.6238, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.827511049222014e-05, |
| "loss": 1.6478, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.817824060059333e-05, |
| "loss": 1.6598, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.808137070896652e-05, |
| "loss": 1.598, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.798450081733972e-05, |
| "loss": 1.6265, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.788763092571291e-05, |
| "loss": 1.662, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.77907610340861e-05, |
| "loss": 1.6587, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.769389114245929e-05, |
| "loss": 1.6199, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.759702125083248e-05, |
| "loss": 1.5937, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.750015135920567e-05, |
| "loss": 1.6235, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.740328146757886e-05, |
| "loss": 1.645, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.730641157595205e-05, |
| "loss": 1.6179, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.7209541684325245e-05, |
| "loss": 1.6384, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.7112671792698434e-05, |
| "loss": 1.6708, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.7015801901071624e-05, |
| "loss": 1.5984, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.691893200944482e-05, |
| "loss": 1.583, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.682206211781801e-05, |
| "loss": 1.6398, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.67251922261912e-05, |
| "loss": 1.6267, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.6628322334564396e-05, |
| "loss": 1.6761, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.6531452442937586e-05, |
| "loss": 1.6057, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.6434582551310776e-05, |
| "loss": 1.6732, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.6337712659683965e-05, |
| "loss": 1.6742, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.6240842768057155e-05, |
| "loss": 1.634, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.6143972876430345e-05, |
| "loss": 1.6217, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.6047102984803534e-05, |
| "loss": 1.6745, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.595023309317674e-05, |
| "loss": 1.6547, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.585336320154993e-05, |
| "loss": 1.6313, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.575649330992312e-05, |
| "loss": 1.6233, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.5659623418296306e-05, |
| "loss": 1.6077, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.5562753526669496e-05, |
| "loss": 1.6164, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.5465883635042686e-05, |
| "loss": 1.5924, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.5369013743415875e-05, |
| "loss": 1.6472, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.5272143851789065e-05, |
| "loss": 1.602, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.5175273960162255e-05, |
| "loss": 1.6277, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.507840406853545e-05, |
| "loss": 1.6479, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.498153417690864e-05, |
| "loss": 1.6605, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.488950777986318e-05, |
| "loss": 1.654, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.479263788823637e-05, |
| "loss": 1.6573, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.4695767996609557e-05, |
| "loss": 1.6305, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.4598898104982746e-05, |
| "loss": 1.6183, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.4502028213355936e-05, |
| "loss": 1.6172, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.4405158321729126e-05, |
| "loss": 1.6151, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.430828843010233e-05, |
| "loss": 1.6275, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.421141853847552e-05, |
| "loss": 1.6516, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.411454864684871e-05, |
| "loss": 1.6697, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.40176787552219e-05, |
| "loss": 1.6268, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.392080886359509e-05, |
| "loss": 1.6127, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.382393897196828e-05, |
| "loss": 1.6206, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.372706908034147e-05, |
| "loss": 1.6071, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.3635042683296e-05, |
| "loss": 1.6238, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.353817279166919e-05, |
| "loss": 1.6293, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.344130290004239e-05, |
| "loss": 1.6227, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.334443300841558e-05, |
| "loss": 1.6475, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.324756311678877e-05, |
| "loss": 1.6461, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.315069322516196e-05, |
| "loss": 1.6342, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.305382333353515e-05, |
| "loss": 1.6426, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.295695344190834e-05, |
| "loss": 1.6257, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.286008355028153e-05, |
| "loss": 1.6107, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.276321365865472e-05, |
| "loss": 1.6541, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.266634376702792e-05, |
| "loss": 1.6058, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.256947387540111e-05, |
| "loss": 1.6299, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.24726039837743e-05, |
| "loss": 1.6259, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.237573409214749e-05, |
| "loss": 1.5975, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.227886420052068e-05, |
| "loss": 1.6052, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.218199430889387e-05, |
| "loss": 1.6554, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.208512441726706e-05, |
| "loss": 1.6382, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.198825452564025e-05, |
| "loss": 1.6169, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.1891384634013444e-05, |
| "loss": 1.5987, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.1794514742386634e-05, |
| "loss": 1.6301, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.169764485075983e-05, |
| "loss": 1.651, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.160077495913302e-05, |
| "loss": 1.6717, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.150390506750621e-05, |
| "loss": 1.6473, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.14070351758794e-05, |
| "loss": 1.6392, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.1310165284252596e-05, |
| "loss": 1.6427, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.1213295392625785e-05, |
| "loss": 1.6286, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.1116425500998975e-05, |
| "loss": 1.6028, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.1019555609372165e-05, |
| "loss": 1.6379, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.0922685717745354e-05, |
| "loss": 1.6602, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.0825815826118544e-05, |
| "loss": 1.6526, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.0728945934491734e-05, |
| "loss": 1.6584, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.063207604286494e-05, |
| "loss": 1.6524, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.053520615123813e-05, |
| "loss": 1.613, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.0438336259611316e-05, |
| "loss": 1.6253, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.0341466367984506e-05, |
| "loss": 1.6772, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.0244596476357696e-05, |
| "loss": 1.6434, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.0147726584730885e-05, |
| "loss": 1.6362, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.0050856693104075e-05, |
| "loss": 1.6365, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.9953986801477265e-05, |
| "loss": 1.6536, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.985711690985046e-05, |
| "loss": 1.6025, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.976024701822365e-05, |
| "loss": 1.6554, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.966337712659684e-05, |
| "loss": 1.6463, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.956650723497003e-05, |
| "loss": 1.6767, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.9469637343343227e-05, |
| "loss": 1.6266, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.9372767451716416e-05, |
| "loss": 1.6107, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.9275897560089606e-05, |
| "loss": 1.6846, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.91790276684628e-05, |
| "loss": 1.6271, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.908215777683599e-05, |
| "loss": 1.6517, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.898528788520918e-05, |
| "loss": 1.6377, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.888841799358237e-05, |
| "loss": 1.6022, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.879639159653691e-05, |
| "loss": 1.6196, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.86995217049101e-05, |
| "loss": 1.609, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.860265181328329e-05, |
| "loss": 1.5948, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.8505781921656477e-05, |
| "loss": 1.6345, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.8408912030029666e-05, |
| "loss": 1.6538, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.831204213840286e-05, |
| "loss": 1.5958, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.821517224677605e-05, |
| "loss": 1.5788, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.811830235514924e-05, |
| "loss": 1.6435, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.802143246352243e-05, |
| "loss": 1.6344, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.792456257189562e-05, |
| "loss": 1.6415, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.782769268026882e-05, |
| "loss": 1.6518, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.773082278864201e-05, |
| "loss": 1.6454, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.76339528970152e-05, |
| "loss": 1.6205, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.7537083005388394e-05, |
| "loss": 1.6164, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.744021311376158e-05, |
| "loss": 1.6247, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 3.734334322213477e-05, |
| "loss": 1.6439, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.724647333050797e-05, |
| "loss": 1.6466, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.714960343888116e-05, |
| "loss": 1.6438, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.705273354725435e-05, |
| "loss": 1.6367, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.695586365562754e-05, |
| "loss": 1.6494, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.685899376400073e-05, |
| "loss": 1.638, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.6762123872373925e-05, |
| "loss": 1.6599, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.6665253980747114e-05, |
| "loss": 1.6203, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.6568384089120304e-05, |
| "loss": 1.6327, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.6471514197493494e-05, |
| "loss": 1.6339, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.637464430586668e-05, |
| "loss": 1.6199, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.627777441423988e-05, |
| "loss": 1.6392, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.618090452261307e-05, |
| "loss": 1.6041, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.608403463098626e-05, |
| "loss": 1.6333, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.598716473935945e-05, |
| "loss": 1.6227, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.589029484773264e-05, |
| "loss": 1.6119, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.579342495610583e-05, |
| "loss": 1.6078, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 3.5696555064479024e-05, |
| "loss": 1.6142, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.5599685172852214e-05, |
| "loss": 1.6544, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.5502815281225404e-05, |
| "loss": 1.6159, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.54059453895986e-05, |
| "loss": 1.6207, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.530907549797179e-05, |
| "loss": 1.6188, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.521220560634498e-05, |
| "loss": 1.6436, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.5115335714718176e-05, |
| "loss": 1.6105, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.5018465823091366e-05, |
| "loss": 1.587, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 3.4921595931464555e-05, |
| "loss": 1.6655, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.4824726039837745e-05, |
| "loss": 1.599, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.472785614821094e-05, |
| "loss": 1.656, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.463098625658413e-05, |
| "loss": 1.6239, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.453411636495732e-05, |
| "loss": 1.6149, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.443724647333051e-05, |
| "loss": 1.5823, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.43403765817037e-05, |
| "loss": 1.6556, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.4243506690076897e-05, |
| "loss": 1.642, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 3.4146636798450086e-05, |
| "loss": 1.6644, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.4049766906823276e-05, |
| "loss": 1.6018, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.3952897015196466e-05, |
| "loss": 1.6551, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.3856027123569655e-05, |
| "loss": 1.6417, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.3759157231942845e-05, |
| "loss": 1.635, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.366228734031604e-05, |
| "loss": 1.5891, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.356541744868923e-05, |
| "loss": 1.6094, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.346854755706242e-05, |
| "loss": 1.6072, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.337167766543561e-05, |
| "loss": 1.5916, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.327480777380881e-05, |
| "loss": 1.6311, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.3177937882181996e-05, |
| "loss": 1.5902, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.3081067990555186e-05, |
| "loss": 1.6291, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.298419809892838e-05, |
| "loss": 1.6226, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.288732820730157e-05, |
| "loss": 1.6495, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.279045831567476e-05, |
| "loss": 1.6452, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.269358842404796e-05, |
| "loss": 1.6255, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.259671853242115e-05, |
| "loss": 1.6299, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.249984864079434e-05, |
| "loss": 1.6181, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.240297874916753e-05, |
| "loss": 1.5816, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.230610885754072e-05, |
| "loss": 1.6412, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.2209238965913913e-05, |
| "loss": 1.6286, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.21123690742871e-05, |
| "loss": 1.6004, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.201549918266029e-05, |
| "loss": 1.6279, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.191862929103348e-05, |
| "loss": 1.6091, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.182175939940667e-05, |
| "loss": 1.6441, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.172488950777987e-05, |
| "loss": 1.607, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.162801961615306e-05, |
| "loss": 1.6157, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.153114972452625e-05, |
| "loss": 1.5979, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.143427983289944e-05, |
| "loss": 1.6115, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.133740994127263e-05, |
| "loss": 1.6077, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.124054004964582e-05, |
| "loss": 1.6243, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.114367015801901e-05, |
| "loss": 1.6196, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.10468002663922e-05, |
| "loss": 1.617, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.094993037476539e-05, |
| "loss": 1.5994, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.085306048313859e-05, |
| "loss": 1.6345, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.075619059151178e-05, |
| "loss": 1.6041, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.065932069988497e-05, |
| "loss": 1.6216, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.0562450808258165e-05, |
| "loss": 1.662, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.046558091663135e-05, |
| "loss": 1.6519, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.036871102500454e-05, |
| "loss": 1.5814, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.0271841133377734e-05, |
| "loss": 1.5762, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.0174971241750927e-05, |
| "loss": 1.6593, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.0078101350124117e-05, |
| "loss": 1.6166, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.998123145849731e-05, |
| "loss": 1.5812, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.98843615668705e-05, |
| "loss": 1.6252, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.978749167524369e-05, |
| "loss": 1.6418, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.9690621783616886e-05, |
| "loss": 1.5902, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.9593751891990075e-05, |
| "loss": 1.6068, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.9496882000363265e-05, |
| "loss": 1.6119, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.9400012108736455e-05, |
| "loss": 1.6209, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 2.9303142217109644e-05, |
| "loss": 1.6088, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.9206272325482837e-05, |
| "loss": 1.6232, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.910940243385603e-05, |
| "loss": 1.6127, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.901253254222922e-05, |
| "loss": 1.6084, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.8915662650602413e-05, |
| "loss": 1.6199, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.8818792758975603e-05, |
| "loss": 1.621, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.8721922867348792e-05, |
| "loss": 1.5999, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.862505297572199e-05, |
| "loss": 1.6145, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.852818308409518e-05, |
| "loss": 1.6529, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.8431313192468368e-05, |
| "loss": 1.6412, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.8334443300841558e-05, |
| "loss": 1.6285, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.8237573409214747e-05, |
| "loss": 1.6022, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.8140703517587944e-05, |
| "loss": 1.6413, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.8043833625961134e-05, |
| "loss": 1.6499, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.7946963734334323e-05, |
| "loss": 1.6409, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.7850093842707516e-05, |
| "loss": 1.6504, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.7753223951080706e-05, |
| "loss": 1.6054, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.76563540594539e-05, |
| "loss": 1.6421, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.7559484167827092e-05, |
| "loss": 1.5858, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.7462614276200282e-05, |
| "loss": 1.6222, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.736574438457347e-05, |
| "loss": 1.6614, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.726887449294666e-05, |
| "loss": 1.6056, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.717200460131985e-05, |
| "loss": 1.6194, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.7075134709693047e-05, |
| "loss": 1.6388, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.6978264818066237e-05, |
| "loss": 1.6386, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.6881394926439427e-05, |
| "loss": 1.6303, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.678452503481262e-05, |
| "loss": 1.6549, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.668765514318581e-05, |
| "loss": 1.6619, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6590785251559002e-05, |
| "loss": 1.6272, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6493915359932195e-05, |
| "loss": 1.6504, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6397045468305385e-05, |
| "loss": 1.6099, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6300175576678575e-05, |
| "loss": 1.6438, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.6203305685051764e-05, |
| "loss": 1.6325, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.610643579342496e-05, |
| "loss": 1.5917, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.600956590179815e-05, |
| "loss": 1.6323, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.591269601017134e-05, |
| "loss": 1.615, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.581582611854453e-05, |
| "loss": 1.6116, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.5718956226917723e-05, |
| "loss": 1.6488, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.5622086335290916e-05, |
| "loss": 1.626, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.5525216443664106e-05, |
| "loss": 1.6602, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.54283465520373e-05, |
| "loss": 1.6268, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.533147666041049e-05, |
| "loss": 1.5818, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.5234606768783678e-05, |
| "loss": 1.6234, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.5137736877156868e-05, |
| "loss": 1.6101, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.5040866985530064e-05, |
| "loss": 1.5931, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.4943997093903254e-05, |
| "loss": 1.5965, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.4847127202276443e-05, |
| "loss": 1.6204, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.4750257310649633e-05, |
| "loss": 1.6493, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.4653387419022826e-05, |
| "loss": 1.6024, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.455651752739602e-05, |
| "loss": 1.6381, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.445964763576921e-05, |
| "loss": 1.6501, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.4362777744142402e-05, |
| "loss": 1.6419, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.4275594841678275e-05, |
| "loss": 1.6053, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.4178724950051464e-05, |
| "loss": 1.633, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.4081855058424654e-05, |
| "loss": 1.6474, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.3984985166797844e-05, |
| "loss": 1.6556, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.388811527517104e-05, |
| "loss": 1.574, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.379124538354423e-05, |
| "loss": 1.6143, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.369437549191742e-05, |
| "loss": 1.5776, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.3597505600290613e-05, |
| "loss": 1.6133, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.3500635708663802e-05, |
| "loss": 1.5879, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.3403765817036992e-05, |
| "loss": 1.6376, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.330689592541019e-05, |
| "loss": 1.643, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.3210026033783378e-05, |
| "loss": 1.6301, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.3113156142156568e-05, |
| "loss": 1.6139, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.3016286250529757e-05, |
| "loss": 1.6043, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.2919416358902947e-05, |
| "loss": 1.6527, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.2822546467276143e-05, |
| "loss": 1.6155, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.2725676575649333e-05, |
| "loss": 1.6381, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.2628806684022523e-05, |
| "loss": 1.6296, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.2531936792395716e-05, |
| "loss": 1.6015, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.2435066900768906e-05, |
| "loss": 1.6142, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.23381970091421e-05, |
| "loss": 1.6117, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.224132711751529e-05, |
| "loss": 1.6364, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 2.214445722588848e-05, |
| "loss": 1.6433, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.204758733426167e-05, |
| "loss": 1.6061, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.195071744263486e-05, |
| "loss": 1.635, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.1853847551008057e-05, |
| "loss": 1.6139, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.1756977659381247e-05, |
| "loss": 1.5924, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.1660107767754436e-05, |
| "loss": 1.6072, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.1563237876127626e-05, |
| "loss": 1.6365, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.146636798450082e-05, |
| "loss": 1.623, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 2.1369498092874012e-05, |
| "loss": 1.5519, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.1272628201247202e-05, |
| "loss": 1.6497, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.1175758309620395e-05, |
| "loss": 1.584, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.1078888417993585e-05, |
| "loss": 1.6577, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.0982018526366774e-05, |
| "loss": 1.6174, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.0885148634739964e-05, |
| "loss": 1.6349, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.078827874311316e-05, |
| "loss": 1.6145, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.069140885148635e-05, |
| "loss": 1.6049, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.059453895985954e-05, |
| "loss": 1.6141, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.049766906823273e-05, |
| "loss": 1.5754, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.0400799176605922e-05, |
| "loss": 1.6248, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.0303929284979115e-05, |
| "loss": 1.6399, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.0207059393352305e-05, |
| "loss": 1.6479, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.0110189501725498e-05, |
| "loss": 1.6281, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.0013319610098688e-05, |
| "loss": 1.6421, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.991644971847188e-05, |
| "loss": 1.6336, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.981957982684507e-05, |
| "loss": 1.6385, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.972270993521826e-05, |
| "loss": 1.6422, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.9625840043591453e-05, |
| "loss": 1.6494, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.9528970151964643e-05, |
| "loss": 1.6346, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.9441787249500516e-05, |
| "loss": 1.6043, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.9344917357873705e-05, |
| "loss": 1.6199, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.92480474662469e-05, |
| "loss": 1.6253, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.915117757462009e-05, |
| "loss": 1.5972, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.905430768299328e-05, |
| "loss": 1.6235, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.8957437791366474e-05, |
| "loss": 1.6214, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.8860567899739664e-05, |
| "loss": 1.6023, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.8763698008112854e-05, |
| "loss": 1.6415, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.8666828116486047e-05, |
| "loss": 1.626, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.8569958224859236e-05, |
| "loss": 1.6269, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.847308833323243e-05, |
| "loss": 1.6438, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.837621844160562e-05, |
| "loss": 1.6215, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.827934854997881e-05, |
| "loss": 1.6225, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.8182478658352002e-05, |
| "loss": 1.6084, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.8085608766725195e-05, |
| "loss": 1.6473, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7988738875098384e-05, |
| "loss": 1.6381, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7891868983471578e-05, |
| "loss": 1.6372, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7794999091844767e-05, |
| "loss": 1.5853, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.769812920021796e-05, |
| "loss": 1.6291, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.760125930859115e-05, |
| "loss": 1.5902, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.750438941696434e-05, |
| "loss": 1.6082, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7407519525337533e-05, |
| "loss": 1.6108, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.7310649633710722e-05, |
| "loss": 1.5951, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7213779742083915e-05, |
| "loss": 1.618, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7116909850457105e-05, |
| "loss": 1.6123, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.7020039958830298e-05, |
| "loss": 1.6595, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.6923170067203488e-05, |
| "loss": 1.6186, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.682630017557668e-05, |
| "loss": 1.5884, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.672943028394987e-05, |
| "loss": 1.6596, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.6632560392323064e-05, |
| "loss": 1.6358, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.6535690500696253e-05, |
| "loss": 1.5966, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.6438820609069446e-05, |
| "loss": 1.6002, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.6341950717442636e-05, |
| "loss": 1.6349, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.6245080825815826e-05, |
| "loss": 1.633, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.614821093418902e-05, |
| "loss": 1.62, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.605134104256221e-05, |
| "loss": 1.6009, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.59544711509354e-05, |
| "loss": 1.6092, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.585760125930859e-05, |
| "loss": 1.6431, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.5760731367681784e-05, |
| "loss": 1.6354, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.5663861476054977e-05, |
| "loss": 1.6481, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.5566991584428167e-05, |
| "loss": 1.615, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.5470121692801357e-05, |
| "loss": 1.6221, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.537325180117455e-05, |
| "loss": 1.6417, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.527638190954774e-05, |
| "loss": 1.6291, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.5179512017920932e-05, |
| "loss": 1.6075, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.5082642126294122e-05, |
| "loss": 1.6505, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.4985772234667313e-05, |
| "loss": 1.6026, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.4888902343040505e-05, |
| "loss": 1.6148, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.4792032451413696e-05, |
| "loss": 1.6282, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.4695162559786889e-05, |
| "loss": 1.6298, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.460797965732276e-05, |
| "loss": 1.5895, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.451110976569595e-05, |
| "loss": 1.6124, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.4414239874069143e-05, |
| "loss": 1.6071, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.4317369982442333e-05, |
| "loss": 1.6016, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.4220500090815526e-05, |
| "loss": 1.6199, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.4123630199188715e-05, |
| "loss": 1.6594, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4026760307561907e-05, |
| "loss": 1.5613, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.39298904159351e-05, |
| "loss": 1.6392, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.383302052430829e-05, |
| "loss": 1.6183, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.3736150632681479e-05, |
| "loss": 1.622, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.3639280741054672e-05, |
| "loss": 1.5837, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.3542410849427863e-05, |
| "loss": 1.6705, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.3445540957801055e-05, |
| "loss": 1.6064, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.3348671066174246e-05, |
| "loss": 1.6101, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3251801174547436e-05, |
| "loss": 1.6061, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3154931282920629e-05, |
| "loss": 1.6196, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3058061391293819e-05, |
| "loss": 1.61, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2961191499667012e-05, |
| "loss": 1.592, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2864321608040203e-05, |
| "loss": 1.6764, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2767451716413393e-05, |
| "loss": 1.6044, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2670581824786586e-05, |
| "loss": 1.6059, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2573711933159775e-05, |
| "loss": 1.6053, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2476842041532967e-05, |
| "loss": 1.6275, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2384815644487498e-05, |
| "loss": 1.6191, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2287945752860691e-05, |
| "loss": 1.5719, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.219107586123388e-05, |
| "loss": 1.6111, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2094205969607074e-05, |
| "loss": 1.6068, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1997336077980263e-05, |
| "loss": 1.6219, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1900466186353455e-05, |
| "loss": 1.5711, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1803596294726646e-05, |
| "loss": 1.618, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1706726403099837e-05, |
| "loss": 1.591, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.1609856511473027e-05, |
| "loss": 1.6204, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.151298661984622e-05, |
| "loss": 1.5756, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.1416116728219412e-05, |
| "loss": 1.6675, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.1319246836592603e-05, |
| "loss": 1.6211, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.1222376944965794e-05, |
| "loss": 1.59, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.1125507053338984e-05, |
| "loss": 1.603, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.1028637161712177e-05, |
| "loss": 1.6458, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0931767270085367e-05, |
| "loss": 1.6501, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.083489737845856e-05, |
| "loss": 1.6259, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.073802748683175e-05, |
| "loss": 1.6043, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.064115759520494e-05, |
| "loss": 1.6215, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.0544287703578134e-05, |
| "loss": 1.629, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.0447417811951323e-05, |
| "loss": 1.5798, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.0350547920324515e-05, |
| "loss": 1.6105, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.0253678028697706e-05, |
| "loss": 1.6394, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.0156808137070898e-05, |
| "loss": 1.6194, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.0059938245444089e-05, |
| "loss": 1.613, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.96306835381728e-06, |
| "loss": 1.5793, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.866198462190472e-06, |
| "loss": 1.6265, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.769328570563661e-06, |
| "loss": 1.608, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.672458678936853e-06, |
| "loss": 1.6366, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.575588787310046e-06, |
| "loss": 1.6604, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.478718895683237e-06, |
| "loss": 1.6664, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.381849004056427e-06, |
| "loss": 1.6276, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.284979112429618e-06, |
| "loss": 1.5965, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.18810922080281e-06, |
| "loss": 1.6168, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.091239329176001e-06, |
| "loss": 1.6102, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 8.994369437549192e-06, |
| "loss": 1.6648, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 8.897499545922384e-06, |
| "loss": 1.6623, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 8.800629654295575e-06, |
| "loss": 1.6088, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 8.703759762668766e-06, |
| "loss": 1.6608, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 8.606889871041958e-06, |
| "loss": 1.6145, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 8.510019979415149e-06, |
| "loss": 1.6249, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.41315008778834e-06, |
| "loss": 1.6214, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.316280196161532e-06, |
| "loss": 1.6216, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.219410304534723e-06, |
| "loss": 1.6071, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.122540412907913e-06, |
| "loss": 1.6488, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.025670521281104e-06, |
| "loss": 1.6402, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 7.928800629654296e-06, |
| "loss": 1.6431, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 7.831930738027489e-06, |
| "loss": 1.6442, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 7.735060846400678e-06, |
| "loss": 1.5912, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.63819095477387e-06, |
| "loss": 1.6202, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.541321063147061e-06, |
| "loss": 1.5843, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.444451171520252e-06, |
| "loss": 1.6148, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.3475812798934446e-06, |
| "loss": 1.577, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.250711388266635e-06, |
| "loss": 1.6337, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.1538414966398264e-06, |
| "loss": 1.5959, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 7.056971605013018e-06, |
| "loss": 1.5869, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 6.960101713386209e-06, |
| "loss": 1.6214, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.8632318217594e-06, |
| "loss": 1.6185, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.766361930132591e-06, |
| "loss": 1.6571, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.669492038505782e-06, |
| "loss": 1.603, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.572622146878974e-06, |
| "loss": 1.6044, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.475752255252164e-06, |
| "loss": 1.6188, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.3788823636253565e-06, |
| "loss": 1.6317, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.282012471998548e-06, |
| "loss": 1.6314, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 6.185142580371739e-06, |
| "loss": 1.6427, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6.08827268874493e-06, |
| "loss": 1.583, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.991402797118121e-06, |
| "loss": 1.6431, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.8945329054913125e-06, |
| "loss": 1.6482, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.797663013864504e-06, |
| "loss": 1.6195, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.700793122237694e-06, |
| "loss": 1.6567, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.603923230610886e-06, |
| "loss": 1.6008, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.507053338984077e-06, |
| "loss": 1.5918, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.410183447357269e-06, |
| "loss": 1.5933, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.313313555730461e-06, |
| "loss": 1.6406, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 5.216443664103651e-06, |
| "loss": 1.6311, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 5.1195737724768425e-06, |
| "loss": 1.5971, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 5.022703880850034e-06, |
| "loss": 1.6802, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.925833989223225e-06, |
| "loss": 1.6229, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.828964097596417e-06, |
| "loss": 1.6126, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.732094205969607e-06, |
| "loss": 1.6453, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.6352243143427985e-06, |
| "loss": 1.6218, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.53835442271599e-06, |
| "loss": 1.6083, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 4.441484531089181e-06, |
| "loss": 1.6494, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 4.3446146394623726e-06, |
| "loss": 1.5942, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 4.247744747835564e-06, |
| "loss": 1.6456, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 4.1508748562087544e-06, |
| "loss": 1.6403, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 4.054004964581947e-06, |
| "loss": 1.6275, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.957135072955137e-06, |
| "loss": 1.6257, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.8602651813283285e-06, |
| "loss": 1.6009, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.76339528970152e-06, |
| "loss": 1.6072, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.6665253980747112e-06, |
| "loss": 1.629, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.5696555064479026e-06, |
| "loss": 1.6552, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.4727856148210936e-06, |
| "loss": 1.6031, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.3759157231942853e-06, |
| "loss": 1.6107, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.2790458315674763e-06, |
| "loss": 1.6204, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.1821759399406676e-06, |
| "loss": 1.6136, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.0853060483138586e-06, |
| "loss": 1.6245, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 2.98843615668705e-06, |
| "loss": 1.5918, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.891566265060241e-06, |
| "loss": 1.6492, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.7946963734334327e-06, |
| "loss": 1.6135, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.7075134709693046e-06, |
| "loss": 1.6236, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.6106435793424955e-06, |
| "loss": 1.6056, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.5137736877156873e-06, |
| "loss": 1.6172, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.4169037960888782e-06, |
| "loss": 1.6262, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.3200339044620696e-06, |
| "loss": 1.6332, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.223164012835261e-06, |
| "loss": 1.5828, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 2.126294121208452e-06, |
| "loss": 1.5993, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.0294242295816432e-06, |
| "loss": 1.6222, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.9325543379548346e-06, |
| "loss": 1.5744, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.8356844463280258e-06, |
| "loss": 1.5746, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.738814554701217e-06, |
| "loss": 1.6224, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.6419446630744083e-06, |
| "loss": 1.6085, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5450747714475994e-06, |
| "loss": 1.6125, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.448204879820791e-06, |
| "loss": 1.6118, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.3513349881939821e-06, |
| "loss": 1.6274, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.2544650965671735e-06, |
| "loss": 1.6516, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.1575952049403644e-06, |
| "loss": 1.5913, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.0607253133135558e-06, |
| "loss": 1.6084, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.638554216867472e-07, |
| "loss": 1.6105, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 8.669855300599383e-07, |
| "loss": 1.5631, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 7.701156384331296e-07, |
| "loss": 1.592, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.732457468063208e-07, |
| "loss": 1.5951, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 5.763758551795121e-07, |
| "loss": 1.6118, |
| "step": 16420 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.795059635527032e-07, |
| "loss": 1.5745, |
| "step": 16440 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.826360719258946e-07, |
| "loss": 1.6601, |
| "step": 16460 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.857661802990858e-07, |
| "loss": 1.6232, |
| "step": 16480 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.8889628867227707e-07, |
| "loss": 1.5696, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 16517, |
| "total_flos": 2.161122321010524e+17, |
| "train_loss": 1.638563478888687, |
| "train_runtime": 6501.0445, |
| "train_samples_per_second": 15.244, |
| "train_steps_per_second": 2.541 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 16517, |
| "num_train_epochs": 1, |
| "save_steps": 5000, |
| "total_flos": 2.161122321010524e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|