{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9999977087395546, "global_step": 654660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.996196498946018e-05, "loss": 3.0555, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.9923777227874016e-05, "loss": 3.0074, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.988558946628785e-05, "loss": 2.9648, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.984740170470168e-05, "loss": 2.9454, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.980921394311551e-05, "loss": 2.9381, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.9771026181529345e-05, "loss": 2.9264, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.973283841994318e-05, "loss": 2.9214, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.969465065835701e-05, "loss": 2.9099, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.965646289677084e-05, "loss": 2.8927, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.961827513518468e-05, "loss": 2.8839, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.958008737359851e-05, "loss": 2.8857, "step": 5500 }, { "epoch": 0.03, "learning_rate": 4.9541899612012344e-05, "loss": 2.898, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.950378822594935e-05, "loss": 2.8848, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.9465600464363184e-05, "loss": 2.879, "step": 7000 }, { "epoch": 0.03, "learning_rate": 4.9427412702777016e-05, "loss": 2.8717, "step": 7500 }, { "epoch": 0.04, "learning_rate": 4.938922494119085e-05, "loss": 2.8649, "step": 8000 }, { "epoch": 0.04, "learning_rate": 4.935103717960468e-05, "loss": 2.8712, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.931284941801851e-05, "loss": 2.8493, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.9274661656432344e-05, "loss": 2.8726, "step": 9500 }, { "epoch": 0.05, "learning_rate": 4.923647389484618e-05, "loss": 2.8612, "step": 10000 }, { "epoch": 0.05, "learning_rate": 4.9198286133260015e-05, "loss": 2.8523, "step": 10500 }, { "epoch": 0.05, "learning_rate": 4.916025112272019e-05, "loss": 2.8501, "step": 11000 }, { "epoch": 0.05, "learning_rate": 4.912206336113402e-05, "loss": 2.8377, "step": 11500 }, { "epoch": 0.05, "learning_rate": 4.9083875599547855e-05, "loss": 2.8491, "step": 12000 }, { "epoch": 0.06, "learning_rate": 4.9045687837961693e-05, "loss": 2.844, "step": 12500 }, { "epoch": 0.06, "learning_rate": 4.9007500076375526e-05, "loss": 2.8197, "step": 13000 }, { "epoch": 0.06, "learning_rate": 4.896931231478936e-05, "loss": 2.8503, "step": 13500 }, { "epoch": 0.06, "learning_rate": 4.893112455320319e-05, "loss": 2.825, "step": 14000 }, { "epoch": 0.07, "learning_rate": 4.889293679161702e-05, "loss": 2.8277, "step": 14500 }, { "epoch": 0.07, "learning_rate": 4.885474903003086e-05, "loss": 2.8359, "step": 15000 }, { "epoch": 0.07, "learning_rate": 4.881663764396786e-05, "loss": 2.8288, "step": 15500 }, { "epoch": 0.07, "learning_rate": 4.87784498823817e-05, "loss": 2.812, "step": 16000 }, { "epoch": 0.08, "learning_rate": 4.874033849631871e-05, "loss": 2.8276, "step": 16500 }, { "epoch": 0.08, "learning_rate": 4.870215073473254e-05, "loss": 2.823, "step": 17000 }, { "epoch": 0.08, "learning_rate": 4.866396297314637e-05, "loss": 2.8204, "step": 17500 }, { "epoch": 0.08, "learning_rate": 4.86257752115602e-05, "loss": 2.8187, "step": 18000 }, { "epoch": 0.08, "learning_rate": 4.8587587449974036e-05, "loss": 2.8106, "step": 18500 }, { "epoch": 0.09, "learning_rate": 4.854939968838787e-05, "loss": 2.811, "step": 19000 }, { "epoch": 0.09, "learning_rate": 4.85112119268017e-05, "loss": 2.8186, "step": 19500 }, { "epoch": 0.09, "learning_rate": 4.847302416521554e-05, "loss": 2.7965, "step": 20000 }, { "epoch": 0.09, "learning_rate": 4.843483640362937e-05, "loss": 2.8085, "step": 20500 }, { "epoch": 0.1, "learning_rate": 4.83966486420432e-05, "loss": 2.8108, "step": 21000 }, { "epoch": 0.1, "learning_rate": 4.835853725598021e-05, "loss": 2.8031, "step": 21500 }, { "epoch": 0.1, "learning_rate": 4.832034949439404e-05, "loss": 2.8085, "step": 22000 }, { "epoch": 0.1, "learning_rate": 4.8282161732807874e-05, "loss": 2.8044, "step": 22500 }, { "epoch": 0.11, "learning_rate": 4.8243973971221706e-05, "loss": 2.802, "step": 23000 }, { "epoch": 0.11, "learning_rate": 4.820578620963554e-05, "loss": 2.7915, "step": 23500 }, { "epoch": 0.11, "learning_rate": 4.8167674823572545e-05, "loss": 2.7981, "step": 24000 }, { "epoch": 0.11, "learning_rate": 4.812948706198638e-05, "loss": 2.8002, "step": 24500 }, { "epoch": 0.11, "learning_rate": 4.809129930040021e-05, "loss": 2.7929, "step": 25000 }, { "epoch": 0.12, "learning_rate": 4.805311153881404e-05, "loss": 2.7923, "step": 25500 }, { "epoch": 0.12, "learning_rate": 4.8014923777227874e-05, "loss": 2.7845, "step": 26000 }, { "epoch": 0.12, "learning_rate": 4.797681239116488e-05, "loss": 2.792, "step": 26500 }, { "epoch": 0.12, "learning_rate": 4.793862462957871e-05, "loss": 2.7944, "step": 27000 }, { "epoch": 0.13, "learning_rate": 4.7900436867992545e-05, "loss": 2.7897, "step": 27500 }, { "epoch": 0.13, "learning_rate": 4.7862249106406384e-05, "loss": 2.7927, "step": 28000 }, { "epoch": 0.13, "learning_rate": 4.7824061344820216e-05, "loss": 2.7925, "step": 28500 }, { "epoch": 0.13, "learning_rate": 4.778594995875722e-05, "loss": 2.7703, "step": 29000 }, { "epoch": 0.14, "learning_rate": 4.7747762197171055e-05, "loss": 2.7849, "step": 29500 }, { "epoch": 0.14, "learning_rate": 4.770957443558489e-05, "loss": 2.786, "step": 30000 }, { "epoch": 0.14, "learning_rate": 4.767138667399872e-05, "loss": 2.7615, "step": 30500 }, { "epoch": 0.14, "learning_rate": 4.763319891241255e-05, "loss": 2.7704, "step": 31000 }, { "epoch": 0.14, "learning_rate": 4.7595011150826384e-05, "loss": 2.7749, "step": 31500 }, { "epoch": 0.15, "learning_rate": 4.755689976476339e-05, "loss": 2.7844, "step": 32000 }, { "epoch": 0.15, "learning_rate": 4.751871200317722e-05, "loss": 2.7758, "step": 32500 }, { "epoch": 0.15, "learning_rate": 4.7480524241591055e-05, "loss": 2.7723, "step": 33000 }, { "epoch": 0.15, "learning_rate": 4.744233648000489e-05, "loss": 2.7674, "step": 33500 }, { "epoch": 0.16, "learning_rate": 4.740414871841872e-05, "loss": 2.7649, "step": 34000 }, { "epoch": 0.16, "learning_rate": 4.7366037332355726e-05, "loss": 2.7876, "step": 34500 }, { "epoch": 0.16, "learning_rate": 4.732784957076956e-05, "loss": 2.7723, "step": 35000 }, { "epoch": 0.16, "learning_rate": 4.72896618091834e-05, "loss": 2.7732, "step": 35500 }, { "epoch": 0.16, "learning_rate": 4.725147404759723e-05, "loss": 2.7617, "step": 36000 }, { "epoch": 0.17, "learning_rate": 4.721328628601106e-05, "loss": 2.7687, "step": 36500 }, { "epoch": 0.17, "learning_rate": 4.7175098524424894e-05, "loss": 2.7682, "step": 37000 }, { "epoch": 0.17, "learning_rate": 4.71369871383619e-05, "loss": 2.7533, "step": 37500 }, { "epoch": 0.17, "learning_rate": 4.709879937677573e-05, "loss": 2.7571, "step": 38000 }, { "epoch": 0.18, "learning_rate": 4.7060611615189565e-05, "loss": 2.763, "step": 38500 }, { "epoch": 0.18, "learning_rate": 4.70224238536034e-05, "loss": 2.7686, "step": 39000 }, { "epoch": 0.18, "learning_rate": 4.698423609201723e-05, "loss": 2.7591, "step": 39500 }, { "epoch": 0.18, "learning_rate": 4.6946124705954236e-05, "loss": 2.761, "step": 40000 }, { "epoch": 0.19, "learning_rate": 4.690793694436807e-05, "loss": 2.7617, "step": 40500 }, { "epoch": 0.19, "learning_rate": 4.6869825558305075e-05, "loss": 2.7551, "step": 41000 }, { "epoch": 0.19, "learning_rate": 4.683163779671891e-05, "loss": 2.7651, "step": 41500 }, { "epoch": 0.19, "learning_rate": 4.679345003513274e-05, "loss": 2.7581, "step": 42000 }, { "epoch": 0.19, "learning_rate": 4.675526227354657e-05, "loss": 2.7431, "step": 42500 }, { "epoch": 0.2, "learning_rate": 4.6717074511960404e-05, "loss": 2.7553, "step": 43000 }, { "epoch": 0.2, "learning_rate": 4.667888675037424e-05, "loss": 2.7451, "step": 43500 }, { "epoch": 0.2, "learning_rate": 4.6640698988788075e-05, "loss": 2.763, "step": 44000 }, { "epoch": 0.2, "learning_rate": 4.660251122720191e-05, "loss": 2.7454, "step": 44500 }, { "epoch": 0.21, "learning_rate": 4.656432346561574e-05, "loss": 2.7472, "step": 45000 }, { "epoch": 0.21, "learning_rate": 4.6526212079552746e-05, "loss": 2.7321, "step": 45500 }, { "epoch": 0.21, "learning_rate": 4.648802431796658e-05, "loss": 2.7418, "step": 46000 }, { "epoch": 0.21, "learning_rate": 4.644983655638042e-05, "loss": 2.7441, "step": 46500 }, { "epoch": 0.22, "learning_rate": 4.641164879479425e-05, "loss": 2.7342, "step": 47000 }, { "epoch": 0.22, "learning_rate": 4.6373537408731256e-05, "loss": 2.7211, "step": 47500 }, { "epoch": 0.22, "learning_rate": 4.633534964714509e-05, "loss": 2.7377, "step": 48000 }, { "epoch": 0.22, "learning_rate": 4.629716188555892e-05, "loss": 2.7481, "step": 48500 }, { "epoch": 0.22, "learning_rate": 4.625897412397275e-05, "loss": 2.7373, "step": 49000 }, { "epoch": 0.23, "learning_rate": 4.6220786362386585e-05, "loss": 2.7236, "step": 49500 }, { "epoch": 0.23, "learning_rate": 4.618259860080042e-05, "loss": 2.7393, "step": 50000 }, { "epoch": 0.23, "learning_rate": 4.6144410839214256e-05, "loss": 2.7406, "step": 50500 }, { "epoch": 0.23, "learning_rate": 4.610622307762809e-05, "loss": 2.7201, "step": 51000 }, { "epoch": 0.24, "learning_rate": 4.606803531604192e-05, "loss": 2.7447, "step": 51500 }, { "epoch": 0.24, "learning_rate": 4.602984755445575e-05, "loss": 2.7393, "step": 52000 }, { "epoch": 0.24, "learning_rate": 4.5991659792869585e-05, "loss": 2.7194, "step": 52500 }, { "epoch": 0.24, "learning_rate": 4.595347203128342e-05, "loss": 2.7429, "step": 53000 }, { "epoch": 0.25, "learning_rate": 4.5915360645220424e-05, "loss": 2.7358, "step": 53500 }, { "epoch": 0.25, "learning_rate": 4.587724925915743e-05, "loss": 2.7373, "step": 54000 }, { "epoch": 0.25, "learning_rate": 4.583906149757126e-05, "loss": 2.7368, "step": 54500 }, { "epoch": 0.25, "learning_rate": 4.5800873735985095e-05, "loss": 2.7161, "step": 55000 }, { "epoch": 0.25, "learning_rate": 4.576268597439893e-05, "loss": 2.7314, "step": 55500 }, { "epoch": 0.26, "learning_rate": 4.572449821281276e-05, "loss": 2.7221, "step": 56000 }, { "epoch": 0.26, "learning_rate": 4.568631045122659e-05, "loss": 2.7051, "step": 56500 }, { "epoch": 0.26, "learning_rate": 4.5648122689640424e-05, "loss": 2.7213, "step": 57000 }, { "epoch": 0.26, "learning_rate": 4.560993492805426e-05, "loss": 2.7178, "step": 57500 }, { "epoch": 0.27, "learning_rate": 4.557182354199126e-05, "loss": 2.7164, "step": 58000 }, { "epoch": 0.27, "learning_rate": 4.553371215592827e-05, "loss": 2.7223, "step": 58500 }, { "epoch": 0.27, "learning_rate": 4.54955243943421e-05, "loss": 2.7162, "step": 59000 }, { "epoch": 0.27, "learning_rate": 4.5457336632755934e-05, "loss": 2.7074, "step": 59500 }, { "epoch": 0.27, "learning_rate": 4.541914887116977e-05, "loss": 2.7116, "step": 60000 }, { "epoch": 0.28, "learning_rate": 4.5380961109583605e-05, "loss": 2.709, "step": 60500 }, { "epoch": 0.28, "learning_rate": 4.534277334799744e-05, "loss": 2.7131, "step": 61000 }, { "epoch": 0.28, "learning_rate": 4.5304661961934444e-05, "loss": 2.725, "step": 61500 }, { "epoch": 0.28, "learning_rate": 4.5266474200348276e-05, "loss": 2.7129, "step": 62000 }, { "epoch": 0.29, "learning_rate": 4.522828643876211e-05, "loss": 2.7157, "step": 62500 }, { "epoch": 0.29, "learning_rate": 4.519009867717594e-05, "loss": 2.7114, "step": 63000 }, { "epoch": 0.29, "learning_rate": 4.515191091558977e-05, "loss": 2.7101, "step": 63500 }, { "epoch": 0.29, "learning_rate": 4.5113723154003605e-05, "loss": 2.7129, "step": 64000 }, { "epoch": 0.3, "learning_rate": 4.507553539241744e-05, "loss": 2.715, "step": 64500 }, { "epoch": 0.3, "learning_rate": 4.5037347630831276e-05, "loss": 2.7164, "step": 65000 }, { "epoch": 0.3, "learning_rate": 4.499915986924511e-05, "loss": 2.7004, "step": 65500 }, { "epoch": 0.3, "learning_rate": 4.496097210765894e-05, "loss": 2.7114, "step": 66000 }, { "epoch": 0.3, "learning_rate": 4.492286072159595e-05, "loss": 2.7219, "step": 66500 }, { "epoch": 0.31, "learning_rate": 4.488467296000978e-05, "loss": 2.6966, "step": 67000 }, { "epoch": 0.31, "learning_rate": 4.484648519842361e-05, "loss": 2.7097, "step": 67500 }, { "epoch": 0.31, "learning_rate": 4.480829743683744e-05, "loss": 2.72, "step": 68000 }, { "epoch": 0.31, "learning_rate": 4.477018605077445e-05, "loss": 2.7164, "step": 68500 }, { "epoch": 0.32, "learning_rate": 4.473199828918828e-05, "loss": 2.7056, "step": 69000 }, { "epoch": 0.32, "learning_rate": 4.4693810527602115e-05, "loss": 2.7102, "step": 69500 }, { "epoch": 0.32, "learning_rate": 4.465562276601595e-05, "loss": 2.7023, "step": 70000 }, { "epoch": 0.32, "learning_rate": 4.461743500442978e-05, "loss": 2.7024, "step": 70500 }, { "epoch": 0.33, "learning_rate": 4.4579323618366786e-05, "loss": 2.7, "step": 71000 }, { "epoch": 0.33, "learning_rate": 4.454121223230379e-05, "loss": 2.7006, "step": 71500 }, { "epoch": 0.33, "learning_rate": 4.4503024470717625e-05, "loss": 2.6956, "step": 72000 }, { "epoch": 0.33, "learning_rate": 4.446491308465463e-05, "loss": 2.7053, "step": 72500 }, { "epoch": 0.33, "learning_rate": 4.4426725323068464e-05, "loss": 2.7132, "step": 73000 }, { "epoch": 0.34, "learning_rate": 4.4388537561482296e-05, "loss": 2.692, "step": 73500 }, { "epoch": 0.34, "learning_rate": 4.435034979989613e-05, "loss": 2.6934, "step": 74000 }, { "epoch": 0.34, "learning_rate": 4.431216203830996e-05, "loss": 2.6871, "step": 74500 }, { "epoch": 0.34, "learning_rate": 4.427397427672379e-05, "loss": 2.6979, "step": 75000 }, { "epoch": 0.35, "learning_rate": 4.423578651513763e-05, "loss": 2.696, "step": 75500 }, { "epoch": 0.35, "learning_rate": 4.4197598753551464e-05, "loss": 2.6928, "step": 76000 }, { "epoch": 0.35, "learning_rate": 4.41594109919653e-05, "loss": 2.6864, "step": 76500 }, { "epoch": 0.35, "learning_rate": 4.4121223230379135e-05, "loss": 2.6974, "step": 77000 }, { "epoch": 0.36, "learning_rate": 4.408303546879297e-05, "loss": 2.6817, "step": 77500 }, { "epoch": 0.36, "learning_rate": 4.4044924082729974e-05, "loss": 2.6817, "step": 78000 }, { "epoch": 0.36, "learning_rate": 4.4006736321143806e-05, "loss": 2.685, "step": 78500 }, { "epoch": 0.36, "learning_rate": 4.396854855955764e-05, "loss": 2.6829, "step": 79000 }, { "epoch": 0.36, "learning_rate": 4.393036079797147e-05, "loss": 2.6871, "step": 79500 }, { "epoch": 0.37, "learning_rate": 4.38921730363853e-05, "loss": 2.6841, "step": 80000 }, { "epoch": 0.37, "learning_rate": 4.385406165032231e-05, "loss": 2.671, "step": 80500 }, { "epoch": 0.37, "learning_rate": 4.381587388873614e-05, "loss": 2.6906, "step": 81000 }, { "epoch": 0.37, "learning_rate": 4.3777686127149973e-05, "loss": 2.6874, "step": 81500 }, { "epoch": 0.38, "learning_rate": 4.3739498365563806e-05, "loss": 2.6838, "step": 82000 }, { "epoch": 0.38, "learning_rate": 4.370131060397764e-05, "loss": 2.6918, "step": 82500 }, { "epoch": 0.38, "learning_rate": 4.366312284239148e-05, "loss": 2.7004, "step": 83000 }, { "epoch": 0.38, "learning_rate": 4.362501145632848e-05, "loss": 2.6895, "step": 83500 }, { "epoch": 0.38, "learning_rate": 4.3586823694742316e-05, "loss": 2.6846, "step": 84000 }, { "epoch": 0.39, "learning_rate": 4.354863593315615e-05, "loss": 2.6808, "step": 84500 }, { "epoch": 0.39, "learning_rate": 4.351044817156998e-05, "loss": 2.6849, "step": 85000 }, { "epoch": 0.39, "learning_rate": 4.347226040998381e-05, "loss": 2.6967, "step": 85500 }, { "epoch": 0.39, "learning_rate": 4.343414902392082e-05, "loss": 2.6933, "step": 86000 }, { "epoch": 0.4, "learning_rate": 4.339596126233465e-05, "loss": 2.6779, "step": 86500 }, { "epoch": 0.4, "learning_rate": 4.335777350074848e-05, "loss": 2.6937, "step": 87000 }, { "epoch": 0.4, "learning_rate": 4.3319585739162316e-05, "loss": 2.691, "step": 87500 }, { "epoch": 0.4, "learning_rate": 4.328147435309932e-05, "loss": 2.6864, "step": 88000 }, { "epoch": 0.41, "learning_rate": 4.3243286591513155e-05, "loss": 2.6732, "step": 88500 }, { "epoch": 0.41, "learning_rate": 4.320509882992699e-05, "loss": 2.6637, "step": 89000 }, { "epoch": 0.41, "learning_rate": 4.3166987443863994e-05, "loss": 2.6791, "step": 89500 }, { "epoch": 0.41, "learning_rate": 4.3128799682277826e-05, "loss": 2.673, "step": 90000 }, { "epoch": 0.41, "learning_rate": 4.309061192069166e-05, "loss": 2.6774, "step": 90500 }, { "epoch": 0.42, "learning_rate": 4.305242415910549e-05, "loss": 2.671, "step": 91000 }, { "epoch": 0.42, "learning_rate": 4.301423639751932e-05, "loss": 2.6695, "step": 91500 }, { "epoch": 0.42, "learning_rate": 4.297604863593316e-05, "loss": 2.6704, "step": 92000 }, { "epoch": 0.42, "learning_rate": 4.293786087434699e-05, "loss": 2.6632, "step": 92500 }, { "epoch": 0.43, "learning_rate": 4.2899673112760825e-05, "loss": 2.6861, "step": 93000 }, { "epoch": 0.43, "learning_rate": 4.286148535117466e-05, "loss": 2.6649, "step": 93500 }, { "epoch": 0.43, "learning_rate": 4.282329758958849e-05, "loss": 2.6774, "step": 94000 }, { "epoch": 0.43, "learning_rate": 4.278510982800232e-05, "loss": 2.6544, "step": 94500 }, { "epoch": 0.44, "learning_rate": 4.2746922066416154e-05, "loss": 2.6761, "step": 95000 }, { "epoch": 0.44, "learning_rate": 4.270881068035316e-05, "loss": 2.6762, "step": 95500 }, { "epoch": 0.44, "learning_rate": 4.267062291876699e-05, "loss": 2.6762, "step": 96000 }, { "epoch": 0.44, "learning_rate": 4.2632511532704e-05, "loss": 2.6695, "step": 96500 }, { "epoch": 0.44, "learning_rate": 4.259432377111783e-05, "loss": 2.6717, "step": 97000 }, { "epoch": 0.45, "learning_rate": 4.2556136009531664e-05, "loss": 2.6925, "step": 97500 }, { "epoch": 0.45, "learning_rate": 4.2517948247945496e-05, "loss": 2.6566, "step": 98000 }, { "epoch": 0.45, "learning_rate": 4.2479760486359335e-05, "loss": 2.669, "step": 98500 }, { "epoch": 0.45, "learning_rate": 4.244157272477317e-05, "loss": 2.6793, "step": 99000 }, { "epoch": 0.46, "learning_rate": 4.240346133871017e-05, "loss": 2.6654, "step": 99500 }, { "epoch": 0.46, "learning_rate": 4.2365273577124007e-05, "loss": 2.665, "step": 100000 }, { "epoch": 0.46, "learning_rate": 4.232708581553784e-05, "loss": 2.6532, "step": 100500 }, { "epoch": 0.46, "learning_rate": 4.228889805395167e-05, "loss": 2.6686, "step": 101000 }, { "epoch": 0.47, "learning_rate": 4.22507102923655e-05, "loss": 2.6629, "step": 101500 }, { "epoch": 0.47, "learning_rate": 4.2212522530779335e-05, "loss": 2.6792, "step": 102000 }, { "epoch": 0.47, "learning_rate": 4.217433476919317e-05, "loss": 2.6797, "step": 102500 }, { "epoch": 0.47, "learning_rate": 4.2136147007607e-05, "loss": 2.6618, "step": 103000 }, { "epoch": 0.47, "learning_rate": 4.2098035621544006e-05, "loss": 2.6528, "step": 103500 }, { "epoch": 0.48, "learning_rate": 4.205984785995784e-05, "loss": 2.6497, "step": 104000 }, { "epoch": 0.48, "learning_rate": 4.202166009837167e-05, "loss": 2.6629, "step": 104500 }, { "epoch": 0.48, "learning_rate": 4.198347233678551e-05, "loss": 2.656, "step": 105000 }, { "epoch": 0.48, "learning_rate": 4.194528457519934e-05, "loss": 2.6646, "step": 105500 }, { "epoch": 0.49, "learning_rate": 4.1907096813613174e-05, "loss": 2.6467, "step": 106000 }, { "epoch": 0.49, "learning_rate": 4.186890905202701e-05, "loss": 2.6529, "step": 106500 }, { "epoch": 0.49, "learning_rate": 4.183087404148718e-05, "loss": 2.6593, "step": 107000 }, { "epoch": 0.49, "learning_rate": 4.179268627990102e-05, "loss": 2.6556, "step": 107500 }, { "epoch": 0.49, "learning_rate": 4.175449851831485e-05, "loss": 2.661, "step": 108000 }, { "epoch": 0.5, "learning_rate": 4.171631075672869e-05, "loss": 2.6502, "step": 108500 }, { "epoch": 0.5, "learning_rate": 4.167812299514252e-05, "loss": 2.66, "step": 109000 }, { "epoch": 0.5, "learning_rate": 4.164001160907953e-05, "loss": 2.6497, "step": 109500 }, { "epoch": 0.5, "learning_rate": 4.160182384749336e-05, "loss": 2.6573, "step": 110000 }, { "epoch": 0.51, "learning_rate": 4.1563636085907194e-05, "loss": 2.6646, "step": 110500 }, { "epoch": 0.51, "learning_rate": 4.1525448324321026e-05, "loss": 2.6692, "step": 111000 }, { "epoch": 0.51, "learning_rate": 4.148726056273486e-05, "loss": 2.6464, "step": 111500 }, { "epoch": 0.51, "learning_rate": 4.144907280114869e-05, "loss": 2.6656, "step": 112000 }, { "epoch": 0.52, "learning_rate": 4.141088503956252e-05, "loss": 2.6579, "step": 112500 }, { "epoch": 0.52, "learning_rate": 4.137277365349953e-05, "loss": 2.6474, "step": 113000 }, { "epoch": 0.52, "learning_rate": 4.133458589191336e-05, "loss": 2.6526, "step": 113500 }, { "epoch": 0.52, "learning_rate": 4.1296398130327194e-05, "loss": 2.6537, "step": 114000 }, { "epoch": 0.52, "learning_rate": 4.1258210368741026e-05, "loss": 2.6416, "step": 114500 }, { "epoch": 0.53, "learning_rate": 4.1220022607154865e-05, "loss": 2.6602, "step": 115000 }, { "epoch": 0.53, "learning_rate": 4.11818348455687e-05, "loss": 2.6473, "step": 115500 }, { "epoch": 0.53, "learning_rate": 4.114364708398253e-05, "loss": 2.666, "step": 116000 }, { "epoch": 0.53, "learning_rate": 4.110545932239636e-05, "loss": 2.6553, "step": 116500 }, { "epoch": 0.54, "learning_rate": 4.106734793633337e-05, "loss": 2.6489, "step": 117000 }, { "epoch": 0.54, "learning_rate": 4.1029236550270375e-05, "loss": 2.646, "step": 117500 }, { "epoch": 0.54, "learning_rate": 4.0991125164207376e-05, "loss": 2.6448, "step": 118000 }, { "epoch": 0.54, "learning_rate": 4.095293740262121e-05, "loss": 2.6547, "step": 118500 }, { "epoch": 0.55, "learning_rate": 4.0914749641035047e-05, "loss": 2.6467, "step": 119000 }, { "epoch": 0.55, "learning_rate": 4.087656187944888e-05, "loss": 2.6508, "step": 119500 }, { "epoch": 0.55, "learning_rate": 4.083837411786271e-05, "loss": 2.6422, "step": 120000 }, { "epoch": 0.55, "learning_rate": 4.080018635627654e-05, "loss": 2.6576, "step": 120500 }, { "epoch": 0.55, "learning_rate": 4.0761998594690375e-05, "loss": 2.662, "step": 121000 }, { "epoch": 0.56, "learning_rate": 4.072381083310421e-05, "loss": 2.6479, "step": 121500 }, { "epoch": 0.56, "learning_rate": 4.068562307151804e-05, "loss": 2.6385, "step": 122000 }, { "epoch": 0.56, "learning_rate": 4.064743530993187e-05, "loss": 2.6539, "step": 122500 }, { "epoch": 0.56, "learning_rate": 4.060924754834571e-05, "loss": 2.6507, "step": 123000 }, { "epoch": 0.57, "learning_rate": 4.057113616228271e-05, "loss": 2.6231, "step": 123500 }, { "epoch": 0.57, "learning_rate": 4.053294840069655e-05, "loss": 2.6401, "step": 124000 }, { "epoch": 0.57, "learning_rate": 4.049476063911038e-05, "loss": 2.6362, "step": 124500 }, { "epoch": 0.57, "learning_rate": 4.0456572877524214e-05, "loss": 2.6484, "step": 125000 }, { "epoch": 0.58, "learning_rate": 4.0418385115938046e-05, "loss": 2.6523, "step": 125500 }, { "epoch": 0.58, "learning_rate": 4.038019735435188e-05, "loss": 2.6453, "step": 126000 }, { "epoch": 0.58, "learning_rate": 4.0342085968288885e-05, "loss": 2.6572, "step": 126500 }, { "epoch": 0.58, "learning_rate": 4.030389820670272e-05, "loss": 2.6525, "step": 127000 }, { "epoch": 0.58, "learning_rate": 4.026571044511655e-05, "loss": 2.6412, "step": 127500 }, { "epoch": 0.59, "learning_rate": 4.022752268353038e-05, "loss": 2.6408, "step": 128000 }, { "epoch": 0.59, "learning_rate": 4.018941129746739e-05, "loss": 2.6422, "step": 128500 }, { "epoch": 0.59, "learning_rate": 4.015122353588122e-05, "loss": 2.6263, "step": 129000 }, { "epoch": 0.59, "learning_rate": 4.011303577429505e-05, "loss": 2.6372, "step": 129500 }, { "epoch": 0.6, "learning_rate": 4.0074848012708885e-05, "loss": 2.6506, "step": 130000 }, { "epoch": 0.6, "learning_rate": 4.0036660251122724e-05, "loss": 2.6432, "step": 130500 }, { "epoch": 0.6, "learning_rate": 3.9998472489536556e-05, "loss": 2.6233, "step": 131000 }, { "epoch": 0.6, "learning_rate": 3.9960361103473556e-05, "loss": 2.6392, "step": 131500 }, { "epoch": 0.6, "learning_rate": 3.9922173341887395e-05, "loss": 2.6375, "step": 132000 }, { "epoch": 0.61, "learning_rate": 3.988398558030123e-05, "loss": 2.6172, "step": 132500 }, { "epoch": 0.61, "learning_rate": 3.984579781871506e-05, "loss": 2.6359, "step": 133000 }, { "epoch": 0.61, "learning_rate": 3.980761005712889e-05, "loss": 2.6329, "step": 133500 }, { "epoch": 0.61, "learning_rate": 3.9769422295542724e-05, "loss": 2.6304, "step": 134000 }, { "epoch": 0.62, "learning_rate": 3.9731234533956556e-05, "loss": 2.6381, "step": 134500 }, { "epoch": 0.62, "learning_rate": 3.969304677237039e-05, "loss": 2.6382, "step": 135000 }, { "epoch": 0.62, "learning_rate": 3.965485901078422e-05, "loss": 2.6273, "step": 135500 }, { "epoch": 0.62, "learning_rate": 3.961674762472123e-05, "loss": 2.6309, "step": 136000 }, { "epoch": 0.63, "learning_rate": 3.9578636238658234e-05, "loss": 2.6375, "step": 136500 }, { "epoch": 0.63, "learning_rate": 3.954052485259524e-05, "loss": 2.6384, "step": 137000 }, { "epoch": 0.63, "learning_rate": 3.950233709100907e-05, "loss": 2.6379, "step": 137500 }, { "epoch": 0.63, "learning_rate": 3.9464149329422905e-05, "loss": 2.6143, "step": 138000 }, { "epoch": 0.63, "learning_rate": 3.942596156783674e-05, "loss": 2.6299, "step": 138500 }, { "epoch": 0.64, "learning_rate": 3.9387773806250576e-05, "loss": 2.6331, "step": 139000 }, { "epoch": 0.64, "learning_rate": 3.934958604466441e-05, "loss": 2.6242, "step": 139500 }, { "epoch": 0.64, "learning_rate": 3.931139828307824e-05, "loss": 2.6214, "step": 140000 }, { "epoch": 0.64, "learning_rate": 3.927321052149208e-05, "loss": 2.6365, "step": 140500 }, { "epoch": 0.65, "learning_rate": 3.923502275990591e-05, "loss": 2.6411, "step": 141000 }, { "epoch": 0.65, "learning_rate": 3.9196834998319744e-05, "loss": 2.6103, "step": 141500 }, { "epoch": 0.65, "learning_rate": 3.9158647236733576e-05, "loss": 2.6253, "step": 142000 }, { "epoch": 0.65, "learning_rate": 3.912053585067058e-05, "loss": 2.634, "step": 142500 }, { "epoch": 0.66, "learning_rate": 3.9082348089084415e-05, "loss": 2.6405, "step": 143000 }, { "epoch": 0.66, "learning_rate": 3.904416032749825e-05, "loss": 2.6187, "step": 143500 }, { "epoch": 0.66, "learning_rate": 3.900597256591208e-05, "loss": 2.6322, "step": 144000 }, { "epoch": 0.66, "learning_rate": 3.896778480432591e-05, "loss": 2.6313, "step": 144500 }, { "epoch": 0.66, "learning_rate": 3.8929597042739744e-05, "loss": 2.6342, "step": 145000 }, { "epoch": 0.67, "learning_rate": 3.889148565667675e-05, "loss": 2.6221, "step": 145500 }, { "epoch": 0.67, "learning_rate": 3.885329789509058e-05, "loss": 2.6278, "step": 146000 }, { "epoch": 0.67, "learning_rate": 3.8815110133504415e-05, "loss": 2.6224, "step": 146500 }, { "epoch": 0.67, "learning_rate": 3.8776922371918254e-05, "loss": 2.6191, "step": 147000 }, { "epoch": 0.68, "learning_rate": 3.8738734610332086e-05, "loss": 2.6043, "step": 147500 }, { "epoch": 0.68, "learning_rate": 3.8700623224269086e-05, "loss": 2.6286, "step": 148000 }, { "epoch": 0.68, "learning_rate": 3.8662435462682925e-05, "loss": 2.6093, "step": 148500 }, { "epoch": 0.68, "learning_rate": 3.862424770109676e-05, "loss": 2.6345, "step": 149000 }, { "epoch": 0.69, "learning_rate": 3.858605993951059e-05, "loss": 2.6133, "step": 149500 }, { "epoch": 0.69, "learning_rate": 3.854787217792442e-05, "loss": 2.6221, "step": 150000 }, { "epoch": 0.69, "learning_rate": 3.8509684416338253e-05, "loss": 2.6208, "step": 150500 }, { "epoch": 0.69, "learning_rate": 3.847157303027526e-05, "loss": 2.6144, "step": 151000 }, { "epoch": 0.69, "learning_rate": 3.843338526868909e-05, "loss": 2.6165, "step": 151500 }, { "epoch": 0.7, "learning_rate": 3.8395197507102925e-05, "loss": 2.6218, "step": 152000 }, { "epoch": 0.7, "learning_rate": 3.835700974551676e-05, "loss": 2.6376, "step": 152500 }, { "epoch": 0.7, "learning_rate": 3.831882198393059e-05, "loss": 2.637, "step": 153000 }, { "epoch": 0.7, "learning_rate": 3.828063422234443e-05, "loss": 2.6056, "step": 153500 }, { "epoch": 0.71, "learning_rate": 3.824244646075826e-05, "loss": 2.6012, "step": 154000 }, { "epoch": 0.71, "learning_rate": 3.820425869917209e-05, "loss": 2.6156, "step": 154500 }, { "epoch": 0.71, "learning_rate": 3.8166070937585924e-05, "loss": 2.6286, "step": 155000 }, { "epoch": 0.71, "learning_rate": 3.8127883175999756e-05, "loss": 2.6332, "step": 155500 }, { "epoch": 0.71, "learning_rate": 3.808977178993676e-05, "loss": 2.6177, "step": 156000 }, { "epoch": 0.72, "learning_rate": 3.8051584028350596e-05, "loss": 2.613, "step": 156500 }, { "epoch": 0.72, "learning_rate": 3.801339626676443e-05, "loss": 2.6077, "step": 157000 }, { "epoch": 0.72, "learning_rate": 3.797520850517826e-05, "loss": 2.6197, "step": 157500 }, { "epoch": 0.72, "learning_rate": 3.793709711911527e-05, "loss": 2.6205, "step": 158000 }, { "epoch": 0.73, "learning_rate": 3.78989093575291e-05, "loss": 2.6046, "step": 158500 }, { "epoch": 0.73, "learning_rate": 3.786072159594293e-05, "loss": 2.6028, "step": 159000 }, { "epoch": 0.73, "learning_rate": 3.782253383435676e-05, "loss": 2.6369, "step": 159500 }, { "epoch": 0.73, "learning_rate": 3.778442244829377e-05, "loss": 2.6246, "step": 160000 }, { "epoch": 0.74, "learning_rate": 3.774631106223078e-05, "loss": 2.6113, "step": 160500 }, { "epoch": 0.74, "learning_rate": 3.770812330064461e-05, "loss": 2.613, "step": 161000 }, { "epoch": 0.74, "learning_rate": 3.766993553905844e-05, "loss": 2.6249, "step": 161500 }, { "epoch": 0.74, "learning_rate": 3.7631747777472273e-05, "loss": 2.6026, "step": 162000 }, { "epoch": 0.74, "learning_rate": 3.759356001588611e-05, "loss": 2.6113, "step": 162500 }, { "epoch": 0.75, "learning_rate": 3.7555372254299944e-05, "loss": 2.6041, "step": 163000 }, { "epoch": 0.75, "learning_rate": 3.751718449271378e-05, "loss": 2.6257, "step": 163500 }, { "epoch": 0.75, "learning_rate": 3.747899673112761e-05, "loss": 2.6065, "step": 164000 }, { "epoch": 0.75, "learning_rate": 3.744080896954144e-05, "loss": 2.6112, "step": 164500 }, { "epoch": 0.76, "learning_rate": 3.740269758347845e-05, "loss": 2.6151, "step": 165000 }, { "epoch": 0.76, "learning_rate": 3.736450982189228e-05, "loss": 2.6017, "step": 165500 }, { "epoch": 0.76, "learning_rate": 3.732632206030611e-05, "loss": 2.6126, "step": 166000 }, { "epoch": 0.76, "learning_rate": 3.7288134298719944e-05, "loss": 2.6254, "step": 166500 }, { "epoch": 0.77, "learning_rate": 3.725002291265695e-05, "loss": 2.6014, "step": 167000 }, { "epoch": 0.77, "learning_rate": 3.721183515107078e-05, "loss": 2.6046, "step": 167500 }, { "epoch": 0.77, "learning_rate": 3.7173647389484615e-05, "loss": 2.6185, "step": 168000 }, { "epoch": 0.77, "learning_rate": 3.713545962789845e-05, "loss": 2.601, "step": 168500 }, { "epoch": 0.77, "learning_rate": 3.7097271866312287e-05, "loss": 2.6168, "step": 169000 }, { "epoch": 0.78, "learning_rate": 3.7059160480249293e-05, "loss": 2.6054, "step": 169500 }, { "epoch": 0.78, "learning_rate": 3.7020972718663126e-05, "loss": 2.6124, "step": 170000 }, { "epoch": 0.78, "learning_rate": 3.698278495707696e-05, "loss": 2.6031, "step": 170500 }, { "epoch": 0.78, "learning_rate": 3.694459719549079e-05, "loss": 2.6041, "step": 171000 }, { "epoch": 0.79, "learning_rate": 3.690640943390463e-05, "loss": 2.6057, "step": 171500 }, { "epoch": 0.79, "learning_rate": 3.686822167231846e-05, "loss": 2.5905, "step": 172000 }, { "epoch": 0.79, "learning_rate": 3.683003391073229e-05, "loss": 2.601, "step": 172500 }, { "epoch": 0.79, "learning_rate": 3.6791846149146125e-05, "loss": 2.6032, "step": 173000 }, { "epoch": 0.8, "learning_rate": 3.675365838755996e-05, "loss": 2.6097, "step": 173500 }, { "epoch": 0.8, "learning_rate": 3.671547062597379e-05, "loss": 2.6002, "step": 174000 }, { "epoch": 0.8, "learning_rate": 3.6677359239910797e-05, "loss": 2.6165, "step": 174500 }, { "epoch": 0.8, "learning_rate": 3.663917147832463e-05, "loss": 2.6062, "step": 175000 }, { "epoch": 0.8, "learning_rate": 3.660098371673846e-05, "loss": 2.5957, "step": 175500 }, { "epoch": 0.81, "learning_rate": 3.656279595515229e-05, "loss": 2.6121, "step": 176000 }, { "epoch": 0.81, "learning_rate": 3.65246845690893e-05, "loss": 2.5918, "step": 176500 }, { "epoch": 0.81, "learning_rate": 3.648649680750313e-05, "loss": 2.6204, "step": 177000 }, { "epoch": 0.81, "learning_rate": 3.6448309045916964e-05, "loss": 2.6062, "step": 177500 }, { "epoch": 0.82, "learning_rate": 3.64101212843308e-05, "loss": 2.5986, "step": 178000 }, { "epoch": 0.82, "learning_rate": 3.63720098982678e-05, "loss": 2.605, "step": 178500 }, { "epoch": 0.82, "learning_rate": 3.633382213668164e-05, "loss": 2.5993, "step": 179000 }, { "epoch": 0.82, "learning_rate": 3.6295634375095474e-05, "loss": 2.606, "step": 179500 }, { "epoch": 0.82, "learning_rate": 3.6257446613509306e-05, "loss": 2.5927, "step": 180000 }, { "epoch": 0.83, "learning_rate": 3.621933522744631e-05, "loss": 2.6058, "step": 180500 }, { "epoch": 0.83, "learning_rate": 3.6181147465860145e-05, "loss": 2.6094, "step": 181000 }, { "epoch": 0.83, "learning_rate": 3.614295970427398e-05, "loss": 2.5979, "step": 181500 }, { "epoch": 0.83, "learning_rate": 3.610477194268781e-05, "loss": 2.5967, "step": 182000 }, { "epoch": 0.84, "learning_rate": 3.606666055662482e-05, "loss": 2.6083, "step": 182500 }, { "epoch": 0.84, "learning_rate": 3.602847279503865e-05, "loss": 2.6111, "step": 183000 }, { "epoch": 0.84, "learning_rate": 3.599028503345248e-05, "loss": 2.6049, "step": 183500 }, { "epoch": 0.84, "learning_rate": 3.595209727186631e-05, "loss": 2.6157, "step": 184000 }, { "epoch": 0.85, "learning_rate": 3.5913909510280145e-05, "loss": 2.6048, "step": 184500 }, { "epoch": 0.85, "learning_rate": 3.587579812421715e-05, "loss": 2.6068, "step": 185000 }, { "epoch": 0.85, "learning_rate": 3.5837610362630984e-05, "loss": 2.5958, "step": 185500 }, { "epoch": 0.85, "learning_rate": 3.5799422601044816e-05, "loss": 2.5922, "step": 186000 }, { "epoch": 0.85, "learning_rate": 3.576123483945865e-05, "loss": 2.5969, "step": 186500 }, { "epoch": 0.86, "learning_rate": 3.572319982891883e-05, "loss": 2.6064, "step": 187000 }, { "epoch": 0.86, "learning_rate": 3.568501206733266e-05, "loss": 2.5968, "step": 187500 }, { "epoch": 0.86, "learning_rate": 3.5646824305746495e-05, "loss": 2.5958, "step": 188000 }, { "epoch": 0.86, "learning_rate": 3.560863654416033e-05, "loss": 2.5861, "step": 188500 }, { "epoch": 0.87, "learning_rate": 3.557044878257416e-05, "loss": 2.5702, "step": 189000 }, { "epoch": 0.87, "learning_rate": 3.5532261020988e-05, "loss": 2.5778, "step": 189500 }, { "epoch": 0.87, "learning_rate": 3.549407325940183e-05, "loss": 2.5904, "step": 190000 }, { "epoch": 0.87, "learning_rate": 3.545596187333883e-05, "loss": 2.5935, "step": 190500 }, { "epoch": 0.88, "learning_rate": 3.541777411175267e-05, "loss": 2.6034, "step": 191000 }, { "epoch": 0.88, "learning_rate": 3.53795863501665e-05, "loss": 2.5932, "step": 191500 }, { "epoch": 0.88, "learning_rate": 3.534139858858033e-05, "loss": 2.5814, "step": 192000 }, { "epoch": 0.88, "learning_rate": 3.5303210826994165e-05, "loss": 2.5764, "step": 192500 }, { "epoch": 0.88, "learning_rate": 3.5265023065408e-05, "loss": 2.5804, "step": 193000 }, { "epoch": 0.89, "learning_rate": 3.522683530382183e-05, "loss": 2.6004, "step": 193500 }, { "epoch": 0.89, "learning_rate": 3.518864754223566e-05, "loss": 2.5761, "step": 194000 }, { "epoch": 0.89, "learning_rate": 3.5150459780649494e-05, "loss": 2.6058, "step": 194500 }, { "epoch": 0.89, "learning_rate": 3.51123483945865e-05, "loss": 2.6052, "step": 195000 }, { "epoch": 0.9, "learning_rate": 3.507423700852351e-05, "loss": 2.5899, "step": 195500 }, { "epoch": 0.9, "learning_rate": 3.503604924693734e-05, "loss": 2.5873, "step": 196000 }, { "epoch": 0.9, "learning_rate": 3.499786148535117e-05, "loss": 2.5917, "step": 196500 }, { "epoch": 0.9, "learning_rate": 3.4959673723765004e-05, "loss": 2.5979, "step": 197000 }, { "epoch": 0.91, "learning_rate": 3.492148596217884e-05, "loss": 2.5871, "step": 197500 }, { "epoch": 0.91, "learning_rate": 3.4883298200592675e-05, "loss": 2.5734, "step": 198000 }, { "epoch": 0.91, "learning_rate": 3.484511043900651e-05, "loss": 2.5835, "step": 198500 }, { "epoch": 0.91, "learning_rate": 3.480692267742034e-05, "loss": 2.5906, "step": 199000 }, { "epoch": 0.91, "learning_rate": 3.4768811291357347e-05, "loss": 2.5929, "step": 199500 }, { "epoch": 0.92, "learning_rate": 3.473062352977118e-05, "loss": 2.5853, "step": 200000 }, { "epoch": 0.92, "learning_rate": 3.469243576818501e-05, "loss": 2.5786, "step": 200500 }, { "epoch": 0.92, "learning_rate": 3.465424800659885e-05, "loss": 2.5977, "step": 201000 }, { "epoch": 0.92, "learning_rate": 3.461606024501268e-05, "loss": 2.5808, "step": 201500 }, { "epoch": 0.93, "learning_rate": 3.4577872483426514e-05, "loss": 2.5888, "step": 202000 }, { "epoch": 0.93, "learning_rate": 3.453976109736352e-05, "loss": 2.5883, "step": 202500 }, { "epoch": 0.93, "learning_rate": 3.450157333577735e-05, "loss": 2.5927, "step": 203000 }, { "epoch": 0.93, "learning_rate": 3.4463385574191185e-05, "loss": 2.5882, "step": 203500 }, { "epoch": 0.93, "learning_rate": 3.442519781260502e-05, "loss": 2.5923, "step": 204000 }, { "epoch": 0.94, "learning_rate": 3.4387010051018856e-05, "loss": 2.5892, "step": 204500 }, { "epoch": 0.94, "learning_rate": 3.434882228943269e-05, "loss": 2.5886, "step": 205000 }, { "epoch": 0.94, "learning_rate": 3.431063452784652e-05, "loss": 2.5904, "step": 205500 }, { "epoch": 0.94, "learning_rate": 3.427244676626035e-05, "loss": 2.5851, "step": 206000 }, { "epoch": 0.95, "learning_rate": 3.423433538019736e-05, "loss": 2.5894, "step": 206500 }, { "epoch": 0.95, "learning_rate": 3.419614761861119e-05, "loss": 2.5733, "step": 207000 }, { "epoch": 0.95, "learning_rate": 3.4157959857025024e-05, "loss": 2.594, "step": 207500 }, { "epoch": 0.95, "learning_rate": 3.4119772095438856e-05, "loss": 2.5767, "step": 208000 }, { "epoch": 0.96, "learning_rate": 3.408166070937586e-05, "loss": 2.5857, "step": 208500 }, { "epoch": 0.96, "learning_rate": 3.4043472947789695e-05, "loss": 2.5687, "step": 209000 }, { "epoch": 0.96, "learning_rate": 3.400528518620353e-05, "loss": 2.5883, "step": 209500 }, { "epoch": 0.96, "learning_rate": 3.396709742461736e-05, "loss": 2.584, "step": 210000 }, { "epoch": 0.96, "learning_rate": 3.392890966303119e-05, "loss": 2.5787, "step": 210500 }, { "epoch": 0.97, "learning_rate": 3.38907982769682e-05, "loss": 2.5826, "step": 211000 }, { "epoch": 0.97, "learning_rate": 3.385261051538203e-05, "loss": 2.5828, "step": 211500 }, { "epoch": 0.97, "learning_rate": 3.381442275379586e-05, "loss": 2.5865, "step": 212000 }, { "epoch": 0.97, "learning_rate": 3.37762349922097e-05, "loss": 2.5867, "step": 212500 }, { "epoch": 0.98, "learning_rate": 3.3738047230623534e-05, "loss": 2.5799, "step": 213000 }, { "epoch": 0.98, "learning_rate": 3.3699935844560534e-05, "loss": 2.5761, "step": 213500 }, { "epoch": 0.98, "learning_rate": 3.366174808297437e-05, "loss": 2.5771, "step": 214000 }, { "epoch": 0.98, "learning_rate": 3.362363669691137e-05, "loss": 2.5876, "step": 214500 }, { "epoch": 0.99, "learning_rate": 3.358544893532521e-05, "loss": 2.574, "step": 215000 }, { "epoch": 0.99, "learning_rate": 3.3547261173739044e-05, "loss": 2.5878, "step": 215500 }, { "epoch": 0.99, "learning_rate": 3.3509073412152876e-05, "loss": 2.5874, "step": 216000 }, { "epoch": 0.99, "learning_rate": 3.347088565056671e-05, "loss": 2.5752, "step": 216500 }, { "epoch": 0.99, "learning_rate": 3.343269788898054e-05, "loss": 2.5783, "step": 217000 }, { "epoch": 1.0, "learning_rate": 3.339451012739437e-05, "loss": 2.5884, "step": 217500 }, { "epoch": 1.0, "learning_rate": 3.3356322365808205e-05, "loss": 2.5687, "step": 218000 }, { "epoch": 1.0, "learning_rate": 3.331821097974521e-05, "loss": 2.5582, "step": 218500 }, { "epoch": 1.0, "learning_rate": 3.3280023218159044e-05, "loss": 2.5573, "step": 219000 }, { "epoch": 1.01, "learning_rate": 3.3241835456572876e-05, "loss": 2.551, "step": 219500 }, { "epoch": 1.01, "learning_rate": 3.320364769498671e-05, "loss": 2.5366, "step": 220000 }, { "epoch": 1.01, "learning_rate": 3.316545993340055e-05, "loss": 2.5569, "step": 220500 }, { "epoch": 1.01, "learning_rate": 3.312742492286072e-05, "loss": 2.5285, "step": 221000 }, { "epoch": 1.02, "learning_rate": 3.3089237161274554e-05, "loss": 2.5448, "step": 221500 }, { "epoch": 1.02, "learning_rate": 3.3051049399688386e-05, "loss": 2.5567, "step": 222000 }, { "epoch": 1.02, "learning_rate": 3.301286163810222e-05, "loss": 2.5396, "step": 222500 }, { "epoch": 1.02, "learning_rate": 3.297467387651606e-05, "loss": 2.5534, "step": 223000 }, { "epoch": 1.02, "learning_rate": 3.293648611492989e-05, "loss": 2.5584, "step": 223500 }, { "epoch": 1.03, "learning_rate": 3.289829835334372e-05, "loss": 2.5447, "step": 224000 }, { "epoch": 1.03, "learning_rate": 3.286018696728073e-05, "loss": 2.5504, "step": 224500 }, { "epoch": 1.03, "learning_rate": 3.282199920569456e-05, "loss": 2.562, "step": 225000 }, { "epoch": 1.03, "learning_rate": 3.278381144410839e-05, "loss": 2.5367, "step": 225500 }, { "epoch": 1.04, "learning_rate": 3.2745623682522225e-05, "loss": 2.548, "step": 226000 }, { "epoch": 1.04, "learning_rate": 3.270743592093606e-05, "loss": 2.5425, "step": 226500 }, { "epoch": 1.04, "learning_rate": 3.266924815934989e-05, "loss": 2.5467, "step": 227000 }, { "epoch": 1.04, "learning_rate": 3.263106039776372e-05, "loss": 2.5352, "step": 227500 }, { "epoch": 1.04, "learning_rate": 3.259287263617756e-05, "loss": 2.5381, "step": 228000 }, { "epoch": 1.05, "learning_rate": 3.255476125011456e-05, "loss": 2.5468, "step": 228500 }, { "epoch": 1.05, "learning_rate": 3.251657348852839e-05, "loss": 2.523, "step": 229000 }, { "epoch": 1.05, "learning_rate": 3.247838572694223e-05, "loss": 2.5617, "step": 229500 }, { "epoch": 1.05, "learning_rate": 3.244027434087923e-05, "loss": 2.5385, "step": 230000 }, { "epoch": 1.06, "learning_rate": 3.2402086579293064e-05, "loss": 2.5488, "step": 230500 }, { "epoch": 1.06, "learning_rate": 3.23638988177069e-05, "loss": 2.5556, "step": 231000 }, { "epoch": 1.06, "learning_rate": 3.2325711056120735e-05, "loss": 2.545, "step": 231500 }, { "epoch": 1.06, "learning_rate": 3.2287523294534574e-05, "loss": 2.5458, "step": 232000 }, { "epoch": 1.07, "learning_rate": 3.2249335532948406e-05, "loss": 2.5588, "step": 232500 }, { "epoch": 1.07, "learning_rate": 3.221114777136224e-05, "loss": 2.5626, "step": 233000 }, { "epoch": 1.07, "learning_rate": 3.217296000977607e-05, "loss": 2.5346, "step": 233500 }, { "epoch": 1.07, "learning_rate": 3.21347722481899e-05, "loss": 2.5441, "step": 234000 }, { "epoch": 1.07, "learning_rate": 3.209666086212691e-05, "loss": 2.5406, "step": 234500 }, { "epoch": 1.08, "learning_rate": 3.205847310054074e-05, "loss": 2.5426, "step": 235000 }, { "epoch": 1.08, "learning_rate": 3.202036171447775e-05, "loss": 2.5486, "step": 235500 }, { "epoch": 1.08, "learning_rate": 3.198217395289158e-05, "loss": 2.5389, "step": 236000 }, { "epoch": 1.08, "learning_rate": 3.194398619130541e-05, "loss": 2.5307, "step": 236500 }, { "epoch": 1.09, "learning_rate": 3.1905798429719245e-05, "loss": 2.5546, "step": 237000 }, { "epoch": 1.09, "learning_rate": 3.186761066813308e-05, "loss": 2.5433, "step": 237500 }, { "epoch": 1.09, "learning_rate": 3.1829422906546916e-05, "loss": 2.5518, "step": 238000 }, { "epoch": 1.09, "learning_rate": 3.179123514496075e-05, "loss": 2.5476, "step": 238500 }, { "epoch": 1.1, "learning_rate": 3.175304738337458e-05, "loss": 2.5427, "step": 239000 }, { "epoch": 1.1, "learning_rate": 3.171485962178841e-05, "loss": 2.5359, "step": 239500 }, { "epoch": 1.1, "learning_rate": 3.167674823572542e-05, "loss": 2.5404, "step": 240000 }, { "epoch": 1.1, "learning_rate": 3.163856047413925e-05, "loss": 2.537, "step": 240500 }, { "epoch": 1.1, "learning_rate": 3.1600372712553084e-05, "loss": 2.5354, "step": 241000 }, { "epoch": 1.11, "learning_rate": 3.1562184950966916e-05, "loss": 2.5492, "step": 241500 }, { "epoch": 1.11, "learning_rate": 3.152399718938075e-05, "loss": 2.547, "step": 242000 }, { "epoch": 1.11, "learning_rate": 3.148580942779458e-05, "loss": 2.5364, "step": 242500 }, { "epoch": 1.11, "learning_rate": 3.144769804173159e-05, "loss": 2.566, "step": 243000 }, { "epoch": 1.12, "learning_rate": 3.140951028014542e-05, "loss": 2.5474, "step": 243500 }, { "epoch": 1.12, "learning_rate": 3.137132251855925e-05, "loss": 2.5425, "step": 244000 }, { "epoch": 1.12, "learning_rate": 3.133313475697309e-05, "loss": 2.5494, "step": 244500 }, { "epoch": 1.12, "learning_rate": 3.129494699538692e-05, "loss": 2.5373, "step": 245000 }, { "epoch": 1.13, "learning_rate": 3.125683560932392e-05, "loss": 2.5279, "step": 245500 }, { "epoch": 1.13, "learning_rate": 3.121864784773776e-05, "loss": 2.5559, "step": 246000 }, { "epoch": 1.13, "learning_rate": 3.118053646167476e-05, "loss": 2.5413, "step": 246500 }, { "epoch": 1.13, "learning_rate": 3.11423487000886e-05, "loss": 2.5333, "step": 247000 }, { "epoch": 1.13, "learning_rate": 3.110416093850243e-05, "loss": 2.541, "step": 247500 }, { "epoch": 1.14, "learning_rate": 3.1065973176916265e-05, "loss": 2.5568, "step": 248000 }, { "epoch": 1.14, "learning_rate": 3.10277854153301e-05, "loss": 2.5394, "step": 248500 }, { "epoch": 1.14, "learning_rate": 3.098959765374393e-05, "loss": 2.5354, "step": 249000 }, { "epoch": 1.14, "learning_rate": 3.095140989215776e-05, "loss": 2.5219, "step": 249500 }, { "epoch": 1.15, "learning_rate": 3.091329850609477e-05, "loss": 2.5332, "step": 250000 }, { "epoch": 1.15, "learning_rate": 3.08751107445086e-05, "loss": 2.5265, "step": 250500 }, { "epoch": 1.15, "learning_rate": 3.083692298292243e-05, "loss": 2.5466, "step": 251000 }, { "epoch": 1.15, "learning_rate": 3.0798735221336265e-05, "loss": 2.5518, "step": 251500 }, { "epoch": 1.15, "learning_rate": 3.07605474597501e-05, "loss": 2.5508, "step": 252000 }, { "epoch": 1.16, "learning_rate": 3.0722359698163936e-05, "loss": 2.5337, "step": 252500 }, { "epoch": 1.16, "learning_rate": 3.068417193657777e-05, "loss": 2.539, "step": 253000 }, { "epoch": 1.16, "learning_rate": 3.06459841749916e-05, "loss": 2.543, "step": 253500 }, { "epoch": 1.16, "learning_rate": 3.060779641340543e-05, "loss": 2.5212, "step": 254000 }, { "epoch": 1.17, "learning_rate": 3.056968502734244e-05, "loss": 2.5283, "step": 254500 }, { "epoch": 1.17, "learning_rate": 3.053149726575627e-05, "loss": 2.5406, "step": 255000 }, { "epoch": 1.17, "learning_rate": 3.0493385879693275e-05, "loss": 2.5387, "step": 255500 }, { "epoch": 1.17, "learning_rate": 3.045519811810711e-05, "loss": 2.5409, "step": 256000 }, { "epoch": 1.18, "learning_rate": 3.0417010356520942e-05, "loss": 2.5463, "step": 256500 }, { "epoch": 1.18, "learning_rate": 3.0378822594934775e-05, "loss": 2.5373, "step": 257000 }, { "epoch": 1.18, "learning_rate": 3.0340634833348607e-05, "loss": 2.5277, "step": 257500 }, { "epoch": 1.18, "learning_rate": 3.0302447071762442e-05, "loss": 2.5286, "step": 258000 }, { "epoch": 1.18, "learning_rate": 3.0264259310176274e-05, "loss": 2.5371, "step": 258500 }, { "epoch": 1.19, "learning_rate": 3.0226071548590107e-05, "loss": 2.5496, "step": 259000 }, { "epoch": 1.19, "learning_rate": 3.018788378700394e-05, "loss": 2.5281, "step": 259500 }, { "epoch": 1.19, "learning_rate": 3.0149696025417774e-05, "loss": 2.5319, "step": 260000 }, { "epoch": 1.19, "learning_rate": 3.0111584639354778e-05, "loss": 2.5421, "step": 260500 }, { "epoch": 1.2, "learning_rate": 3.007339687776861e-05, "loss": 2.5277, "step": 261000 }, { "epoch": 1.2, "learning_rate": 3.003520911618245e-05, "loss": 2.5327, "step": 261500 }, { "epoch": 1.2, "learning_rate": 2.9997021354596284e-05, "loss": 2.5222, "step": 262000 }, { "epoch": 1.2, "learning_rate": 2.9958909968533288e-05, "loss": 2.5413, "step": 262500 }, { "epoch": 1.21, "learning_rate": 2.992072220694712e-05, "loss": 2.5395, "step": 263000 }, { "epoch": 1.21, "learning_rate": 2.9882610820884127e-05, "loss": 2.5356, "step": 263500 }, { "epoch": 1.21, "learning_rate": 2.984442305929796e-05, "loss": 2.5391, "step": 264000 }, { "epoch": 1.21, "learning_rate": 2.9806235297711795e-05, "loss": 2.5429, "step": 264500 }, { "epoch": 1.21, "learning_rate": 2.9768047536125627e-05, "loss": 2.5278, "step": 265000 }, { "epoch": 1.22, "learning_rate": 2.972985977453946e-05, "loss": 2.5308, "step": 265500 }, { "epoch": 1.22, "learning_rate": 2.969167201295329e-05, "loss": 2.544, "step": 266000 }, { "epoch": 1.22, "learning_rate": 2.9653484251367127e-05, "loss": 2.5335, "step": 266500 }, { "epoch": 1.22, "learning_rate": 2.961529648978096e-05, "loss": 2.5356, "step": 267000 }, { "epoch": 1.23, "learning_rate": 2.9577185103717962e-05, "loss": 2.5371, "step": 267500 }, { "epoch": 1.23, "learning_rate": 2.9538997342131798e-05, "loss": 2.5302, "step": 268000 }, { "epoch": 1.23, "learning_rate": 2.950080958054563e-05, "loss": 2.5275, "step": 268500 }, { "epoch": 1.23, "learning_rate": 2.9462621818959462e-05, "loss": 2.5434, "step": 269000 }, { "epoch": 1.23, "learning_rate": 2.942451043289647e-05, "loss": 2.5332, "step": 269500 }, { "epoch": 1.24, "learning_rate": 2.93863226713103e-05, "loss": 2.5238, "step": 270000 }, { "epoch": 1.24, "learning_rate": 2.9348134909724133e-05, "loss": 2.5383, "step": 270500 }, { "epoch": 1.24, "learning_rate": 2.930994714813797e-05, "loss": 2.5484, "step": 271000 }, { "epoch": 1.24, "learning_rate": 2.92717593865518e-05, "loss": 2.532, "step": 271500 }, { "epoch": 1.25, "learning_rate": 2.9233571624965633e-05, "loss": 2.5318, "step": 272000 }, { "epoch": 1.25, "learning_rate": 2.9195383863379465e-05, "loss": 2.5163, "step": 272500 }, { "epoch": 1.25, "learning_rate": 2.91571961017933e-05, "loss": 2.5415, "step": 273000 }, { "epoch": 1.25, "learning_rate": 2.9119161091253476e-05, "loss": 2.5271, "step": 273500 }, { "epoch": 1.26, "learning_rate": 2.908097332966731e-05, "loss": 2.5285, "step": 274000 }, { "epoch": 1.26, "learning_rate": 2.9042785568081143e-05, "loss": 2.5256, "step": 274500 }, { "epoch": 1.26, "learning_rate": 2.9004597806494976e-05, "loss": 2.5373, "step": 275000 }, { "epoch": 1.26, "learning_rate": 2.896641004490881e-05, "loss": 2.5252, "step": 275500 }, { "epoch": 1.26, "learning_rate": 2.8928222283322643e-05, "loss": 2.5303, "step": 276000 }, { "epoch": 1.27, "learning_rate": 2.8890110897259647e-05, "loss": 2.5321, "step": 276500 }, { "epoch": 1.27, "learning_rate": 2.8851923135673482e-05, "loss": 2.5266, "step": 277000 }, { "epoch": 1.27, "learning_rate": 2.8813735374087314e-05, "loss": 2.5352, "step": 277500 }, { "epoch": 1.27, "learning_rate": 2.8775547612501147e-05, "loss": 2.5234, "step": 278000 }, { "epoch": 1.28, "learning_rate": 2.8737436226438153e-05, "loss": 2.5307, "step": 278500 }, { "epoch": 1.28, "learning_rate": 2.8699248464851986e-05, "loss": 2.5142, "step": 279000 }, { "epoch": 1.28, "learning_rate": 2.8661060703265818e-05, "loss": 2.5325, "step": 279500 }, { "epoch": 1.28, "learning_rate": 2.862287294167965e-05, "loss": 2.5186, "step": 280000 }, { "epoch": 1.29, "learning_rate": 2.8584761555616657e-05, "loss": 2.5356, "step": 280500 }, { "epoch": 1.29, "learning_rate": 2.854657379403049e-05, "loss": 2.5329, "step": 281000 }, { "epoch": 1.29, "learning_rate": 2.8508386032444324e-05, "loss": 2.5316, "step": 281500 }, { "epoch": 1.29, "learning_rate": 2.8470274646381328e-05, "loss": 2.5192, "step": 282000 }, { "epoch": 1.29, "learning_rate": 2.843208688479516e-05, "loss": 2.5469, "step": 282500 }, { "epoch": 1.3, "learning_rate": 2.8393899123208996e-05, "loss": 2.532, "step": 283000 }, { "epoch": 1.3, "learning_rate": 2.8355711361622828e-05, "loss": 2.525, "step": 283500 }, { "epoch": 1.3, "learning_rate": 2.831752360003666e-05, "loss": 2.5439, "step": 284000 }, { "epoch": 1.3, "learning_rate": 2.8279412213973667e-05, "loss": 2.5347, "step": 284500 }, { "epoch": 1.31, "learning_rate": 2.82412244523875e-05, "loss": 2.5168, "step": 285000 }, { "epoch": 1.31, "learning_rate": 2.820303669080133e-05, "loss": 2.5167, "step": 285500 }, { "epoch": 1.31, "learning_rate": 2.8164848929215167e-05, "loss": 2.5167, "step": 286000 }, { "epoch": 1.31, "learning_rate": 2.8126661167629e-05, "loss": 2.5207, "step": 286500 }, { "epoch": 1.32, "learning_rate": 2.808847340604283e-05, "loss": 2.5361, "step": 287000 }, { "epoch": 1.32, "learning_rate": 2.8050285644456663e-05, "loss": 2.529, "step": 287500 }, { "epoch": 1.32, "learning_rate": 2.80120978828705e-05, "loss": 2.5263, "step": 288000 }, { "epoch": 1.32, "learning_rate": 2.7974062872330674e-05, "loss": 2.528, "step": 288500 }, { "epoch": 1.32, "learning_rate": 2.793587511074451e-05, "loss": 2.5292, "step": 289000 }, { "epoch": 1.33, "learning_rate": 2.789768734915834e-05, "loss": 2.5142, "step": 289500 }, { "epoch": 1.33, "learning_rate": 2.7859499587572173e-05, "loss": 2.5312, "step": 290000 }, { "epoch": 1.33, "learning_rate": 2.782131182598601e-05, "loss": 2.5294, "step": 290500 }, { "epoch": 1.33, "learning_rate": 2.778312406439984e-05, "loss": 2.5228, "step": 291000 }, { "epoch": 1.34, "learning_rate": 2.7744936302813673e-05, "loss": 2.5368, "step": 291500 }, { "epoch": 1.34, "learning_rate": 2.7706748541227505e-05, "loss": 2.5152, "step": 292000 }, { "epoch": 1.34, "learning_rate": 2.7668637155164512e-05, "loss": 2.5217, "step": 292500 }, { "epoch": 1.34, "learning_rate": 2.7630449393578344e-05, "loss": 2.532, "step": 293000 }, { "epoch": 1.34, "learning_rate": 2.7592261631992177e-05, "loss": 2.5123, "step": 293500 }, { "epoch": 1.35, "learning_rate": 2.7554073870406012e-05, "loss": 2.5352, "step": 294000 }, { "epoch": 1.35, "learning_rate": 2.7515962484343016e-05, "loss": 2.5273, "step": 294500 }, { "epoch": 1.35, "learning_rate": 2.7477774722756855e-05, "loss": 2.531, "step": 295000 }, { "epoch": 1.35, "learning_rate": 2.7439586961170687e-05, "loss": 2.5236, "step": 295500 }, { "epoch": 1.36, "learning_rate": 2.7401399199584522e-05, "loss": 2.5073, "step": 296000 }, { "epoch": 1.36, "learning_rate": 2.7363287813521526e-05, "loss": 2.5285, "step": 296500 }, { "epoch": 1.36, "learning_rate": 2.7325100051935358e-05, "loss": 2.5331, "step": 297000 }, { "epoch": 1.36, "learning_rate": 2.7286912290349193e-05, "loss": 2.5148, "step": 297500 }, { "epoch": 1.37, "learning_rate": 2.7248724528763026e-05, "loss": 2.511, "step": 298000 }, { "epoch": 1.37, "learning_rate": 2.7210536767176858e-05, "loss": 2.5316, "step": 298500 }, { "epoch": 1.37, "learning_rate": 2.7172425381113865e-05, "loss": 2.5126, "step": 299000 }, { "epoch": 1.37, "learning_rate": 2.7134237619527697e-05, "loss": 2.5141, "step": 299500 }, { "epoch": 1.37, "learning_rate": 2.709604985794153e-05, "loss": 2.5339, "step": 300000 }, { "epoch": 1.38, "learning_rate": 2.7057862096355365e-05, "loss": 2.5261, "step": 300500 }, { "epoch": 1.38, "learning_rate": 2.7019674334769197e-05, "loss": 2.5023, "step": 301000 }, { "epoch": 1.38, "learning_rate": 2.69815629487062e-05, "loss": 2.5203, "step": 301500 }, { "epoch": 1.38, "learning_rate": 2.6943375187120036e-05, "loss": 2.5198, "step": 302000 }, { "epoch": 1.39, "learning_rate": 2.6905187425533868e-05, "loss": 2.5013, "step": 302500 }, { "epoch": 1.39, "learning_rate": 2.68669996639477e-05, "loss": 2.5304, "step": 303000 }, { "epoch": 1.39, "learning_rate": 2.6828811902361532e-05, "loss": 2.5089, "step": 303500 }, { "epoch": 1.39, "learning_rate": 2.6790624140775368e-05, "loss": 2.5109, "step": 304000 }, { "epoch": 1.4, "learning_rate": 2.67524363791892e-05, "loss": 2.512, "step": 304500 }, { "epoch": 1.4, "learning_rate": 2.6714248617603032e-05, "loss": 2.4975, "step": 305000 }, { "epoch": 1.4, "learning_rate": 2.667613723154004e-05, "loss": 2.497, "step": 305500 }, { "epoch": 1.4, "learning_rate": 2.663794946995387e-05, "loss": 2.504, "step": 306000 }, { "epoch": 1.4, "learning_rate": 2.6599761708367703e-05, "loss": 2.5093, "step": 306500 }, { "epoch": 1.41, "learning_rate": 2.656157394678154e-05, "loss": 2.5111, "step": 307000 }, { "epoch": 1.41, "learning_rate": 2.652338618519537e-05, "loss": 2.5143, "step": 307500 }, { "epoch": 1.41, "learning_rate": 2.6485198423609203e-05, "loss": 2.5183, "step": 308000 }, { "epoch": 1.41, "learning_rate": 2.644708703754621e-05, "loss": 2.5143, "step": 308500 }, { "epoch": 1.42, "learning_rate": 2.6408899275960042e-05, "loss": 2.4955, "step": 309000 }, { "epoch": 1.42, "learning_rate": 2.6370711514373874e-05, "loss": 2.5183, "step": 309500 }, { "epoch": 1.42, "learning_rate": 2.633260012831088e-05, "loss": 2.5143, "step": 310000 }, { "epoch": 1.42, "learning_rate": 2.6294412366724713e-05, "loss": 2.5165, "step": 310500 }, { "epoch": 1.43, "learning_rate": 2.6256224605138545e-05, "loss": 2.5055, "step": 311000 }, { "epoch": 1.43, "learning_rate": 2.621803684355238e-05, "loss": 2.5095, "step": 311500 }, { "epoch": 1.43, "learning_rate": 2.6179849081966213e-05, "loss": 2.5243, "step": 312000 }, { "epoch": 1.43, "learning_rate": 2.6141661320380045e-05, "loss": 2.5074, "step": 312500 }, { "epoch": 1.43, "learning_rate": 2.6103549934317052e-05, "loss": 2.5096, "step": 313000 }, { "epoch": 1.44, "learning_rate": 2.6065362172730884e-05, "loss": 2.508, "step": 313500 }, { "epoch": 1.44, "learning_rate": 2.6027174411144716e-05, "loss": 2.5096, "step": 314000 }, { "epoch": 1.44, "learning_rate": 2.598898664955855e-05, "loss": 2.4983, "step": 314500 }, { "epoch": 1.44, "learning_rate": 2.5950798887972384e-05, "loss": 2.5052, "step": 315000 }, { "epoch": 1.45, "learning_rate": 2.5912611126386216e-05, "loss": 2.5174, "step": 315500 }, { "epoch": 1.45, "learning_rate": 2.587442336480005e-05, "loss": 2.5116, "step": 316000 }, { "epoch": 1.45, "learning_rate": 2.583623560321388e-05, "loss": 2.5065, "step": 316500 }, { "epoch": 1.45, "learning_rate": 2.5798047841627716e-05, "loss": 2.52, "step": 317000 }, { "epoch": 1.45, "learning_rate": 2.575993645556472e-05, "loss": 2.5261, "step": 317500 }, { "epoch": 1.46, "learning_rate": 2.5721748693978552e-05, "loss": 2.5244, "step": 318000 }, { "epoch": 1.46, "learning_rate": 2.5683560932392387e-05, "loss": 2.5157, "step": 318500 }, { "epoch": 1.46, "learning_rate": 2.564537317080622e-05, "loss": 2.5092, "step": 319000 }, { "epoch": 1.46, "learning_rate": 2.5607261784743226e-05, "loss": 2.5104, "step": 319500 }, { "epoch": 1.47, "learning_rate": 2.556907402315706e-05, "loss": 2.5138, "step": 320000 }, { "epoch": 1.47, "learning_rate": 2.553088626157089e-05, "loss": 2.5047, "step": 320500 }, { "epoch": 1.47, "learning_rate": 2.5492698499984723e-05, "loss": 2.5195, "step": 321000 }, { "epoch": 1.47, "learning_rate": 2.545458711392173e-05, "loss": 2.5151, "step": 321500 }, { "epoch": 1.48, "learning_rate": 2.5416399352335562e-05, "loss": 2.5174, "step": 322000 }, { "epoch": 1.48, "learning_rate": 2.5378211590749394e-05, "loss": 2.5153, "step": 322500 }, { "epoch": 1.48, "learning_rate": 2.534002382916323e-05, "loss": 2.5133, "step": 323000 }, { "epoch": 1.48, "learning_rate": 2.5301836067577062e-05, "loss": 2.5207, "step": 323500 }, { "epoch": 1.48, "learning_rate": 2.5263648305990894e-05, "loss": 2.521, "step": 324000 }, { "epoch": 1.49, "learning_rate": 2.5225460544404726e-05, "loss": 2.5116, "step": 324500 }, { "epoch": 1.49, "learning_rate": 2.5187272782818565e-05, "loss": 2.5153, "step": 325000 }, { "epoch": 1.49, "learning_rate": 2.51490850212324e-05, "loss": 2.5185, "step": 325500 }, { "epoch": 1.49, "learning_rate": 2.5110973635169404e-05, "loss": 2.5144, "step": 326000 }, { "epoch": 1.5, "learning_rate": 2.5072785873583236e-05, "loss": 2.4997, "step": 326500 }, { "epoch": 1.5, "learning_rate": 2.5034598111997072e-05, "loss": 2.5065, "step": 327000 }, { "epoch": 1.5, "learning_rate": 2.4996486725934072e-05, "loss": 2.4958, "step": 327500 }, { "epoch": 1.5, "learning_rate": 2.4958298964347907e-05, "loss": 2.5255, "step": 328000 }, { "epoch": 1.51, "learning_rate": 2.492011120276174e-05, "loss": 2.5061, "step": 328500 }, { "epoch": 1.51, "learning_rate": 2.488192344117557e-05, "loss": 2.5157, "step": 329000 }, { "epoch": 1.51, "learning_rate": 2.4843735679589404e-05, "loss": 2.5118, "step": 329500 }, { "epoch": 1.51, "learning_rate": 2.480554791800324e-05, "loss": 2.5054, "step": 330000 }, { "epoch": 1.51, "learning_rate": 2.476736015641707e-05, "loss": 2.5111, "step": 330500 }, { "epoch": 1.52, "learning_rate": 2.4729172394830904e-05, "loss": 2.5024, "step": 331000 }, { "epoch": 1.52, "learning_rate": 2.469098463324474e-05, "loss": 2.5045, "step": 331500 }, { "epoch": 1.52, "learning_rate": 2.4652796871658575e-05, "loss": 2.4979, "step": 332000 }, { "epoch": 1.52, "learning_rate": 2.4614685485595578e-05, "loss": 2.5169, "step": 332500 }, { "epoch": 1.53, "learning_rate": 2.457649772400941e-05, "loss": 2.5107, "step": 333000 }, { "epoch": 1.53, "learning_rate": 2.4538309962423246e-05, "loss": 2.507, "step": 333500 }, { "epoch": 1.53, "learning_rate": 2.450019857636025e-05, "loss": 2.5057, "step": 334000 }, { "epoch": 1.53, "learning_rate": 2.4462010814774085e-05, "loss": 2.5156, "step": 334500 }, { "epoch": 1.54, "learning_rate": 2.4423823053187917e-05, "loss": 2.4845, "step": 335000 }, { "epoch": 1.54, "learning_rate": 2.438563529160175e-05, "loss": 2.5032, "step": 335500 }, { "epoch": 1.54, "learning_rate": 2.434744753001558e-05, "loss": 2.4944, "step": 336000 }, { "epoch": 1.54, "learning_rate": 2.4309259768429417e-05, "loss": 2.5036, "step": 336500 }, { "epoch": 1.54, "learning_rate": 2.427107200684325e-05, "loss": 2.5061, "step": 337000 }, { "epoch": 1.55, "learning_rate": 2.423288424525708e-05, "loss": 2.509, "step": 337500 }, { "epoch": 1.55, "learning_rate": 2.4194772859194088e-05, "loss": 2.4925, "step": 338000 }, { "epoch": 1.55, "learning_rate": 2.415658509760792e-05, "loss": 2.5023, "step": 338500 }, { "epoch": 1.55, "learning_rate": 2.4118473711544924e-05, "loss": 2.5172, "step": 339000 }, { "epoch": 1.56, "learning_rate": 2.408028594995876e-05, "loss": 2.5129, "step": 339500 }, { "epoch": 1.56, "learning_rate": 2.404209818837259e-05, "loss": 2.5011, "step": 340000 }, { "epoch": 1.56, "learning_rate": 2.4003910426786424e-05, "loss": 2.4977, "step": 340500 }, { "epoch": 1.56, "learning_rate": 2.396572266520026e-05, "loss": 2.4979, "step": 341000 }, { "epoch": 1.56, "learning_rate": 2.392753490361409e-05, "loss": 2.5248, "step": 341500 }, { "epoch": 1.57, "learning_rate": 2.3889347142027924e-05, "loss": 2.5024, "step": 342000 }, { "epoch": 1.57, "learning_rate": 2.385123575596493e-05, "loss": 2.5032, "step": 342500 }, { "epoch": 1.57, "learning_rate": 2.3813047994378763e-05, "loss": 2.5078, "step": 343000 }, { "epoch": 1.57, "learning_rate": 2.3774860232792595e-05, "loss": 2.514, "step": 343500 }, { "epoch": 1.58, "learning_rate": 2.3736672471206427e-05, "loss": 2.5069, "step": 344000 }, { "epoch": 1.58, "learning_rate": 2.3698484709620262e-05, "loss": 2.5132, "step": 344500 }, { "epoch": 1.58, "learning_rate": 2.3660296948034095e-05, "loss": 2.5165, "step": 345000 }, { "epoch": 1.58, "learning_rate": 2.3622109186447927e-05, "loss": 2.5133, "step": 345500 }, { "epoch": 1.59, "learning_rate": 2.358392142486176e-05, "loss": 2.5142, "step": 346000 }, { "epoch": 1.59, "learning_rate": 2.3545810038798766e-05, "loss": 2.5116, "step": 346500 }, { "epoch": 1.59, "learning_rate": 2.3507698652735773e-05, "loss": 2.506, "step": 347000 }, { "epoch": 1.59, "learning_rate": 2.3469587266672776e-05, "loss": 2.481, "step": 347500 }, { "epoch": 1.59, "learning_rate": 2.3431399505086608e-05, "loss": 2.496, "step": 348000 }, { "epoch": 1.6, "learning_rate": 2.3393211743500444e-05, "loss": 2.5162, "step": 348500 }, { "epoch": 1.6, "learning_rate": 2.335502398191428e-05, "loss": 2.4868, "step": 349000 }, { "epoch": 1.6, "learning_rate": 2.331683622032811e-05, "loss": 2.4927, "step": 349500 }, { "epoch": 1.6, "learning_rate": 2.3278648458741944e-05, "loss": 2.4977, "step": 350000 }, { "epoch": 1.61, "learning_rate": 2.324046069715578e-05, "loss": 2.5081, "step": 350500 }, { "epoch": 1.61, "learning_rate": 2.320227293556961e-05, "loss": 2.5005, "step": 351000 }, { "epoch": 1.61, "learning_rate": 2.3164085173983444e-05, "loss": 2.5109, "step": 351500 }, { "epoch": 1.61, "learning_rate": 2.3125897412397276e-05, "loss": 2.5022, "step": 352000 }, { "epoch": 1.62, "learning_rate": 2.308770965081111e-05, "loss": 2.4936, "step": 352500 }, { "epoch": 1.62, "learning_rate": 2.3049521889224943e-05, "loss": 2.4932, "step": 353000 }, { "epoch": 1.62, "learning_rate": 2.3011410503161947e-05, "loss": 2.4868, "step": 353500 }, { "epoch": 1.62, "learning_rate": 2.2973222741575782e-05, "loss": 2.5014, "step": 354000 }, { "epoch": 1.62, "learning_rate": 2.2935034979989615e-05, "loss": 2.5233, "step": 354500 }, { "epoch": 1.63, "learning_rate": 2.2896847218403447e-05, "loss": 2.4916, "step": 355000 }, { "epoch": 1.63, "learning_rate": 2.285865945681728e-05, "loss": 2.4821, "step": 355500 }, { "epoch": 1.63, "learning_rate": 2.2820548070754286e-05, "loss": 2.51, "step": 356000 }, { "epoch": 1.63, "learning_rate": 2.2782360309168118e-05, "loss": 2.5103, "step": 356500 }, { "epoch": 1.64, "learning_rate": 2.274417254758195e-05, "loss": 2.4884, "step": 357000 }, { "epoch": 1.64, "learning_rate": 2.2705984785995786e-05, "loss": 2.4913, "step": 357500 }, { "epoch": 1.64, "learning_rate": 2.2667797024409618e-05, "loss": 2.4929, "step": 358000 }, { "epoch": 1.64, "learning_rate": 2.262960926282345e-05, "loss": 2.5079, "step": 358500 }, { "epoch": 1.65, "learning_rate": 2.2591421501237282e-05, "loss": 2.5, "step": 359000 }, { "epoch": 1.65, "learning_rate": 2.255331011517429e-05, "loss": 2.4958, "step": 359500 }, { "epoch": 1.65, "learning_rate": 2.251512235358812e-05, "loss": 2.5076, "step": 360000 }, { "epoch": 1.65, "learning_rate": 2.2476934592001957e-05, "loss": 2.4991, "step": 360500 }, { "epoch": 1.65, "learning_rate": 2.243874683041579e-05, "loss": 2.4979, "step": 361000 }, { "epoch": 1.66, "learning_rate": 2.240055906882962e-05, "loss": 2.4877, "step": 361500 }, { "epoch": 1.66, "learning_rate": 2.2362447682766628e-05, "loss": 2.5059, "step": 362000 }, { "epoch": 1.66, "learning_rate": 2.232425992118046e-05, "loss": 2.4843, "step": 362500 }, { "epoch": 1.66, "learning_rate": 2.2286072159594296e-05, "loss": 2.4881, "step": 363000 }, { "epoch": 1.67, "learning_rate": 2.2247884398008128e-05, "loss": 2.5017, "step": 363500 }, { "epoch": 1.67, "learning_rate": 2.2209773011945135e-05, "loss": 2.4981, "step": 364000 }, { "epoch": 1.67, "learning_rate": 2.2171585250358967e-05, "loss": 2.489, "step": 364500 }, { "epoch": 1.67, "learning_rate": 2.21333974887728e-05, "loss": 2.4955, "step": 365000 }, { "epoch": 1.67, "learning_rate": 2.2095209727186634e-05, "loss": 2.4974, "step": 365500 }, { "epoch": 1.68, "learning_rate": 2.2057021965600467e-05, "loss": 2.4976, "step": 366000 }, { "epoch": 1.68, "learning_rate": 2.20188342040143e-05, "loss": 2.4934, "step": 366500 }, { "epoch": 1.68, "learning_rate": 2.198064644242813e-05, "loss": 2.5053, "step": 367000 }, { "epoch": 1.68, "learning_rate": 2.1942535056365138e-05, "loss": 2.4881, "step": 367500 }, { "epoch": 1.69, "learning_rate": 2.190434729477897e-05, "loss": 2.501, "step": 368000 }, { "epoch": 1.69, "learning_rate": 2.1866159533192802e-05, "loss": 2.4958, "step": 368500 }, { "epoch": 1.69, "learning_rate": 2.182804814712981e-05, "loss": 2.4916, "step": 369000 }, { "epoch": 1.69, "learning_rate": 2.178986038554364e-05, "loss": 2.4897, "step": 369500 }, { "epoch": 1.7, "learning_rate": 2.1751672623957477e-05, "loss": 2.4845, "step": 370000 }, { "epoch": 1.7, "learning_rate": 2.171348486237131e-05, "loss": 2.5041, "step": 370500 }, { "epoch": 1.7, "learning_rate": 2.167529710078514e-05, "loss": 2.4962, "step": 371000 }, { "epoch": 1.7, "learning_rate": 2.1637109339198973e-05, "loss": 2.4884, "step": 371500 }, { "epoch": 1.7, "learning_rate": 2.159892157761281e-05, "loss": 2.4941, "step": 372000 }, { "epoch": 1.71, "learning_rate": 2.156073381602664e-05, "loss": 2.5096, "step": 372500 }, { "epoch": 1.71, "learning_rate": 2.1522546054440473e-05, "loss": 2.4854, "step": 373000 }, { "epoch": 1.71, "learning_rate": 2.148443466837748e-05, "loss": 2.4825, "step": 373500 }, { "epoch": 1.71, "learning_rate": 2.1446246906791312e-05, "loss": 2.5111, "step": 374000 }, { "epoch": 1.72, "learning_rate": 2.1408059145205144e-05, "loss": 2.5055, "step": 374500 }, { "epoch": 1.72, "learning_rate": 2.1369871383618976e-05, "loss": 2.5146, "step": 375000 }, { "epoch": 1.72, "learning_rate": 2.1331759997555983e-05, "loss": 2.4959, "step": 375500 }, { "epoch": 1.72, "learning_rate": 2.1293572235969815e-05, "loss": 2.4997, "step": 376000 }, { "epoch": 1.73, "learning_rate": 2.125538447438365e-05, "loss": 2.4885, "step": 376500 }, { "epoch": 1.73, "learning_rate": 2.1217196712797483e-05, "loss": 2.526, "step": 377000 }, { "epoch": 1.73, "learning_rate": 2.1179008951211315e-05, "loss": 2.5, "step": 377500 }, { "epoch": 1.73, "learning_rate": 2.1140897565148322e-05, "loss": 2.4943, "step": 378000 }, { "epoch": 1.73, "learning_rate": 2.1102709803562158e-05, "loss": 2.4872, "step": 378500 }, { "epoch": 1.74, "learning_rate": 2.106452204197599e-05, "loss": 2.4927, "step": 379000 }, { "epoch": 1.74, "learning_rate": 2.1026334280389822e-05, "loss": 2.5075, "step": 379500 }, { "epoch": 1.74, "learning_rate": 2.0988146518803657e-05, "loss": 2.5004, "step": 380000 }, { "epoch": 1.74, "learning_rate": 2.095003513274066e-05, "loss": 2.5027, "step": 380500 }, { "epoch": 1.75, "learning_rate": 2.0911923746677668e-05, "loss": 2.4956, "step": 381000 }, { "epoch": 1.75, "learning_rate": 2.08737359850915e-05, "loss": 2.487, "step": 381500 }, { "epoch": 1.75, "learning_rate": 2.0835548223505332e-05, "loss": 2.4883, "step": 382000 }, { "epoch": 1.75, "learning_rate": 2.0797360461919164e-05, "loss": 2.4898, "step": 382500 }, { "epoch": 1.76, "learning_rate": 2.0759172700333e-05, "loss": 2.4909, "step": 383000 }, { "epoch": 1.76, "learning_rate": 2.0720984938746832e-05, "loss": 2.4797, "step": 383500 }, { "epoch": 1.76, "learning_rate": 2.0682797177160664e-05, "loss": 2.4808, "step": 384000 }, { "epoch": 1.76, "learning_rate": 2.0644609415574496e-05, "loss": 2.4991, "step": 384500 }, { "epoch": 1.76, "learning_rate": 2.0606498029511503e-05, "loss": 2.4897, "step": 385000 }, { "epoch": 1.77, "learning_rate": 2.0568310267925335e-05, "loss": 2.496, "step": 385500 }, { "epoch": 1.77, "learning_rate": 2.0530198881862342e-05, "loss": 2.4874, "step": 386000 }, { "epoch": 1.77, "learning_rate": 2.0492011120276174e-05, "loss": 2.4978, "step": 386500 }, { "epoch": 1.77, "learning_rate": 2.0453823358690007e-05, "loss": 2.4873, "step": 387000 }, { "epoch": 1.78, "learning_rate": 2.0415635597103842e-05, "loss": 2.501, "step": 387500 }, { "epoch": 1.78, "learning_rate": 2.0377447835517674e-05, "loss": 2.4895, "step": 388000 }, { "epoch": 1.78, "learning_rate": 2.0339260073931506e-05, "loss": 2.4798, "step": 388500 }, { "epoch": 1.78, "learning_rate": 2.030107231234534e-05, "loss": 2.4804, "step": 389000 }, { "epoch": 1.78, "learning_rate": 2.0262884550759174e-05, "loss": 2.4919, "step": 389500 }, { "epoch": 1.79, "learning_rate": 2.0224696789173006e-05, "loss": 2.4777, "step": 390000 }, { "epoch": 1.79, "learning_rate": 2.0186585403110013e-05, "loss": 2.4773, "step": 390500 }, { "epoch": 1.79, "learning_rate": 2.0148474017047017e-05, "loss": 2.4739, "step": 391000 }, { "epoch": 1.79, "learning_rate": 2.011028625546085e-05, "loss": 2.5024, "step": 391500 }, { "epoch": 1.8, "learning_rate": 2.0072098493874684e-05, "loss": 2.4834, "step": 392000 }, { "epoch": 1.8, "learning_rate": 2.0033910732288516e-05, "loss": 2.4924, "step": 392500 }, { "epoch": 1.8, "learning_rate": 1.999572297070235e-05, "loss": 2.4797, "step": 393000 }, { "epoch": 1.8, "learning_rate": 1.995753520911618e-05, "loss": 2.484, "step": 393500 }, { "epoch": 1.81, "learning_rate": 1.9919347447530016e-05, "loss": 2.4977, "step": 394000 }, { "epoch": 1.81, "learning_rate": 1.9881159685943852e-05, "loss": 2.4783, "step": 394500 }, { "epoch": 1.81, "learning_rate": 1.9843048299880855e-05, "loss": 2.4962, "step": 395000 }, { "epoch": 1.81, "learning_rate": 1.9804936913817862e-05, "loss": 2.4938, "step": 395500 }, { "epoch": 1.81, "learning_rate": 1.9766749152231694e-05, "loss": 2.4844, "step": 396000 }, { "epoch": 1.82, "learning_rate": 1.9728561390645527e-05, "loss": 2.4872, "step": 396500 }, { "epoch": 1.82, "learning_rate": 1.9690373629059362e-05, "loss": 2.4955, "step": 397000 }, { "epoch": 1.82, "learning_rate": 1.9652185867473194e-05, "loss": 2.4852, "step": 397500 }, { "epoch": 1.82, "learning_rate": 1.9613998105887026e-05, "loss": 2.4799, "step": 398000 }, { "epoch": 1.83, "learning_rate": 1.957581034430086e-05, "loss": 2.4944, "step": 398500 }, { "epoch": 1.83, "learning_rate": 1.9537698958237865e-05, "loss": 2.4988, "step": 399000 }, { "epoch": 1.83, "learning_rate": 1.9499511196651698e-05, "loss": 2.4875, "step": 399500 }, { "epoch": 1.83, "learning_rate": 1.9461323435065533e-05, "loss": 2.4724, "step": 400000 }, { "epoch": 1.84, "learning_rate": 1.9423212049002537e-05, "loss": 2.4987, "step": 400500 }, { "epoch": 1.84, "learning_rate": 1.938502428741637e-05, "loss": 2.4957, "step": 401000 }, { "epoch": 1.84, "learning_rate": 1.9346836525830204e-05, "loss": 2.4821, "step": 401500 }, { "epoch": 1.84, "learning_rate": 1.9308648764244036e-05, "loss": 2.4774, "step": 402000 }, { "epoch": 1.84, "learning_rate": 1.927046100265787e-05, "loss": 2.4921, "step": 402500 }, { "epoch": 1.85, "learning_rate": 1.92322732410717e-05, "loss": 2.4885, "step": 403000 }, { "epoch": 1.85, "learning_rate": 1.9194085479485536e-05, "loss": 2.4955, "step": 403500 }, { "epoch": 1.85, "learning_rate": 1.915589771789937e-05, "loss": 2.4887, "step": 404000 }, { "epoch": 1.85, "learning_rate": 1.91177099563132e-05, "loss": 2.4868, "step": 404500 }, { "epoch": 1.86, "learning_rate": 1.9079598570250208e-05, "loss": 2.4786, "step": 405000 }, { "epoch": 1.86, "learning_rate": 1.904141080866404e-05, "loss": 2.4996, "step": 405500 }, { "epoch": 1.86, "learning_rate": 1.9003223047077872e-05, "loss": 2.4757, "step": 406000 }, { "epoch": 1.86, "learning_rate": 1.8965035285491707e-05, "loss": 2.4956, "step": 406500 }, { "epoch": 1.87, "learning_rate": 1.892684752390554e-05, "loss": 2.4914, "step": 407000 }, { "epoch": 1.87, "learning_rate": 1.8888736137842543e-05, "loss": 2.4867, "step": 407500 }, { "epoch": 1.87, "learning_rate": 1.885054837625638e-05, "loss": 2.4797, "step": 408000 }, { "epoch": 1.87, "learning_rate": 1.881236061467021e-05, "loss": 2.4955, "step": 408500 }, { "epoch": 1.87, "learning_rate": 1.8774172853084043e-05, "loss": 2.4922, "step": 409000 }, { "epoch": 1.88, "learning_rate": 1.8735985091497875e-05, "loss": 2.4875, "step": 409500 }, { "epoch": 1.88, "learning_rate": 1.869779732991171e-05, "loss": 2.4836, "step": 410000 }, { "epoch": 1.88, "learning_rate": 1.8659609568325546e-05, "loss": 2.4803, "step": 410500 }, { "epoch": 1.88, "learning_rate": 1.8621421806739378e-05, "loss": 2.4964, "step": 411000 }, { "epoch": 1.89, "learning_rate": 1.8583310420676385e-05, "loss": 2.4875, "step": 411500 }, { "epoch": 1.89, "learning_rate": 1.8545122659090217e-05, "loss": 2.4915, "step": 412000 }, { "epoch": 1.89, "learning_rate": 1.850693489750405e-05, "loss": 2.4995, "step": 412500 }, { "epoch": 1.89, "learning_rate": 1.846874713591788e-05, "loss": 2.4809, "step": 413000 }, { "epoch": 1.89, "learning_rate": 1.8430559374331717e-05, "loss": 2.4906, "step": 413500 }, { "epoch": 1.9, "learning_rate": 1.839244798826872e-05, "loss": 2.4659, "step": 414000 }, { "epoch": 1.9, "learning_rate": 1.8354336602205728e-05, "loss": 2.4745, "step": 414500 }, { "epoch": 1.9, "learning_rate": 1.831614884061956e-05, "loss": 2.4956, "step": 415000 }, { "epoch": 1.9, "learning_rate": 1.8277961079033392e-05, "loss": 2.4996, "step": 415500 }, { "epoch": 1.91, "learning_rate": 1.8239773317447227e-05, "loss": 2.4669, "step": 416000 }, { "epoch": 1.91, "learning_rate": 1.820158555586106e-05, "loss": 2.4857, "step": 416500 }, { "epoch": 1.91, "learning_rate": 1.8163397794274892e-05, "loss": 2.4912, "step": 417000 }, { "epoch": 1.91, "learning_rate": 1.8125210032688724e-05, "loss": 2.4874, "step": 417500 }, { "epoch": 1.92, "learning_rate": 1.808702227110256e-05, "loss": 2.4717, "step": 418000 }, { "epoch": 1.92, "learning_rate": 1.8048910885039563e-05, "loss": 2.5005, "step": 418500 }, { "epoch": 1.92, "learning_rate": 1.801079949897657e-05, "loss": 2.485, "step": 419000 }, { "epoch": 1.92, "learning_rate": 1.7972611737390402e-05, "loss": 2.4802, "step": 419500 }, { "epoch": 1.92, "learning_rate": 1.7934423975804234e-05, "loss": 2.5059, "step": 420000 }, { "epoch": 1.93, "learning_rate": 1.7896236214218066e-05, "loss": 2.481, "step": 420500 }, { "epoch": 1.93, "learning_rate": 1.7858048452631902e-05, "loss": 2.4955, "step": 421000 }, { "epoch": 1.93, "learning_rate": 1.7819860691045734e-05, "loss": 2.4833, "step": 421500 }, { "epoch": 1.93, "learning_rate": 1.7781672929459566e-05, "loss": 2.4695, "step": 422000 }, { "epoch": 1.94, "learning_rate": 1.7743485167873398e-05, "loss": 2.4649, "step": 422500 }, { "epoch": 1.94, "learning_rate": 1.7705373781810405e-05, "loss": 2.4864, "step": 423000 }, { "epoch": 1.94, "learning_rate": 1.7667186020224237e-05, "loss": 2.4922, "step": 423500 }, { "epoch": 1.94, "learning_rate": 1.7629074634161244e-05, "loss": 2.4674, "step": 424000 }, { "epoch": 1.95, "learning_rate": 1.7590886872575076e-05, "loss": 2.4778, "step": 424500 }, { "epoch": 1.95, "learning_rate": 1.755269911098891e-05, "loss": 2.4762, "step": 425000 }, { "epoch": 1.95, "learning_rate": 1.7514511349402744e-05, "loss": 2.4852, "step": 425500 }, { "epoch": 1.95, "learning_rate": 1.747632358781658e-05, "loss": 2.4944, "step": 426000 }, { "epoch": 1.95, "learning_rate": 1.7438135826230412e-05, "loss": 2.4754, "step": 426500 }, { "epoch": 1.96, "learning_rate": 1.740002444016742e-05, "loss": 2.4842, "step": 427000 }, { "epoch": 1.96, "learning_rate": 1.736183667858125e-05, "loss": 2.4783, "step": 427500 }, { "epoch": 1.96, "learning_rate": 1.7323648916995083e-05, "loss": 2.4826, "step": 428000 }, { "epoch": 1.96, "learning_rate": 1.728553753093209e-05, "loss": 2.4984, "step": 428500 }, { "epoch": 1.97, "learning_rate": 1.7247349769345922e-05, "loss": 2.4978, "step": 429000 }, { "epoch": 1.97, "learning_rate": 1.7209162007759754e-05, "loss": 2.4852, "step": 429500 }, { "epoch": 1.97, "learning_rate": 1.7170974246173586e-05, "loss": 2.4654, "step": 430000 }, { "epoch": 1.97, "learning_rate": 1.7132786484587422e-05, "loss": 2.4961, "step": 430500 }, { "epoch": 1.98, "learning_rate": 1.7094675098524425e-05, "loss": 2.4828, "step": 431000 }, { "epoch": 1.98, "learning_rate": 1.705648733693826e-05, "loss": 2.4824, "step": 431500 }, { "epoch": 1.98, "learning_rate": 1.7018299575352093e-05, "loss": 2.4922, "step": 432000 }, { "epoch": 1.98, "learning_rate": 1.6980111813765925e-05, "loss": 2.4749, "step": 432500 }, { "epoch": 1.98, "learning_rate": 1.6941924052179757e-05, "loss": 2.4765, "step": 433000 }, { "epoch": 1.99, "learning_rate": 1.6903736290593593e-05, "loss": 2.4909, "step": 433500 }, { "epoch": 1.99, "learning_rate": 1.6865548529007425e-05, "loss": 2.4865, "step": 434000 }, { "epoch": 1.99, "learning_rate": 1.6827360767421257e-05, "loss": 2.4848, "step": 434500 }, { "epoch": 1.99, "learning_rate": 1.678917300583509e-05, "loss": 2.4798, "step": 435000 }, { "epoch": 2.0, "learning_rate": 1.6750985244248925e-05, "loss": 2.4661, "step": 435500 }, { "epoch": 2.0, "learning_rate": 1.6712797482662757e-05, "loss": 2.456, "step": 436000 }, { "epoch": 2.0, "learning_rate": 1.667468609659976e-05, "loss": 2.4836, "step": 436500 }, { "epoch": 2.0, "learning_rate": 1.6636498335013596e-05, "loss": 2.4701, "step": 437000 }, { "epoch": 2.0, "learning_rate": 1.6598310573427428e-05, "loss": 2.4511, "step": 437500 }, { "epoch": 2.01, "learning_rate": 1.656012281184126e-05, "loss": 2.4736, "step": 438000 }, { "epoch": 2.01, "learning_rate": 1.6521935050255092e-05, "loss": 2.442, "step": 438500 }, { "epoch": 2.01, "learning_rate": 1.64838236641921e-05, "loss": 2.4498, "step": 439000 }, { "epoch": 2.01, "learning_rate": 1.644563590260593e-05, "loss": 2.4634, "step": 439500 }, { "epoch": 2.02, "learning_rate": 1.6407448141019767e-05, "loss": 2.4437, "step": 440000 }, { "epoch": 2.02, "learning_rate": 1.63692603794336e-05, "loss": 2.4635, "step": 440500 }, { "epoch": 2.02, "learning_rate": 1.6331072617847435e-05, "loss": 2.4361, "step": 441000 }, { "epoch": 2.02, "learning_rate": 1.6292884856261267e-05, "loss": 2.4482, "step": 441500 }, { "epoch": 2.03, "learning_rate": 1.62546970946751e-05, "loss": 2.4501, "step": 442000 }, { "epoch": 2.03, "learning_rate": 1.6216509333088935e-05, "loss": 2.448, "step": 442500 }, { "epoch": 2.03, "learning_rate": 1.6178321571502767e-05, "loss": 2.4459, "step": 443000 }, { "epoch": 2.03, "learning_rate": 1.61401338099166e-05, "loss": 2.4403, "step": 443500 }, { "epoch": 2.03, "learning_rate": 1.610194604833043e-05, "loss": 2.4537, "step": 444000 }, { "epoch": 2.04, "learning_rate": 1.6063758286744267e-05, "loss": 2.4677, "step": 444500 }, { "epoch": 2.04, "learning_rate": 1.602564690068127e-05, "loss": 2.4495, "step": 445000 }, { "epoch": 2.04, "learning_rate": 1.5987459139095106e-05, "loss": 2.4662, "step": 445500 }, { "epoch": 2.04, "learning_rate": 1.594934775303211e-05, "loss": 2.4608, "step": 446000 }, { "epoch": 2.05, "learning_rate": 1.591115999144594e-05, "loss": 2.462, "step": 446500 }, { "epoch": 2.05, "learning_rate": 1.5873048605382948e-05, "loss": 2.4542, "step": 447000 }, { "epoch": 2.05, "learning_rate": 1.583486084379678e-05, "loss": 2.4337, "step": 447500 }, { "epoch": 2.05, "learning_rate": 1.5796673082210612e-05, "loss": 2.4451, "step": 448000 }, { "epoch": 2.06, "learning_rate": 1.5758485320624448e-05, "loss": 2.4314, "step": 448500 }, { "epoch": 2.06, "learning_rate": 1.572029755903828e-05, "loss": 2.4542, "step": 449000 }, { "epoch": 2.06, "learning_rate": 1.5682109797452112e-05, "loss": 2.4507, "step": 449500 }, { "epoch": 2.06, "learning_rate": 1.5643922035865944e-05, "loss": 2.439, "step": 450000 }, { "epoch": 2.06, "learning_rate": 1.560573427427978e-05, "loss": 2.449, "step": 450500 }, { "epoch": 2.07, "learning_rate": 1.5567546512693612e-05, "loss": 2.4579, "step": 451000 }, { "epoch": 2.07, "learning_rate": 1.552943512663062e-05, "loss": 2.4466, "step": 451500 }, { "epoch": 2.07, "learning_rate": 1.549124736504445e-05, "loss": 2.4515, "step": 452000 }, { "epoch": 2.07, "learning_rate": 1.5453135978981455e-05, "loss": 2.4527, "step": 452500 }, { "epoch": 2.08, "learning_rate": 1.541494821739529e-05, "loss": 2.4501, "step": 453000 }, { "epoch": 2.08, "learning_rate": 1.5376760455809122e-05, "loss": 2.4574, "step": 453500 }, { "epoch": 2.08, "learning_rate": 1.5338572694222955e-05, "loss": 2.4663, "step": 454000 }, { "epoch": 2.08, "learning_rate": 1.530046130815996e-05, "loss": 2.4687, "step": 454500 }, { "epoch": 2.09, "learning_rate": 1.5262273546573794e-05, "loss": 2.4509, "step": 455000 }, { "epoch": 2.09, "learning_rate": 1.5224085784987627e-05, "loss": 2.4578, "step": 455500 }, { "epoch": 2.09, "learning_rate": 1.518589802340146e-05, "loss": 2.4494, "step": 456000 }, { "epoch": 2.09, "learning_rate": 1.5147710261815295e-05, "loss": 2.4576, "step": 456500 }, { "epoch": 2.09, "learning_rate": 1.5109598875752299e-05, "loss": 2.4476, "step": 457000 }, { "epoch": 2.1, "learning_rate": 1.5071411114166134e-05, "loss": 2.4516, "step": 457500 }, { "epoch": 2.1, "learning_rate": 1.5033223352579966e-05, "loss": 2.4582, "step": 458000 }, { "epoch": 2.1, "learning_rate": 1.49950355909938e-05, "loss": 2.4447, "step": 458500 }, { "epoch": 2.1, "learning_rate": 1.4956847829407632e-05, "loss": 2.4487, "step": 459000 }, { "epoch": 2.11, "learning_rate": 1.491873644334464e-05, "loss": 2.4449, "step": 459500 }, { "epoch": 2.11, "learning_rate": 1.4880548681758471e-05, "loss": 2.4465, "step": 460000 }, { "epoch": 2.11, "learning_rate": 1.4842360920172305e-05, "loss": 2.4439, "step": 460500 }, { "epoch": 2.11, "learning_rate": 1.4804173158586137e-05, "loss": 2.4527, "step": 461000 }, { "epoch": 2.11, "learning_rate": 1.4766061772523143e-05, "loss": 2.4518, "step": 461500 }, { "epoch": 2.12, "learning_rate": 1.4727874010936976e-05, "loss": 2.4407, "step": 462000 }, { "epoch": 2.12, "learning_rate": 1.4689686249350809e-05, "loss": 2.4485, "step": 462500 }, { "epoch": 2.12, "learning_rate": 1.4651498487764642e-05, "loss": 2.4401, "step": 463000 }, { "epoch": 2.12, "learning_rate": 1.4613310726178475e-05, "loss": 2.4549, "step": 463500 }, { "epoch": 2.13, "learning_rate": 1.4575122964592308e-05, "loss": 2.4468, "step": 464000 }, { "epoch": 2.13, "learning_rate": 1.453693520300614e-05, "loss": 2.4626, "step": 464500 }, { "epoch": 2.13, "learning_rate": 1.4498747441419974e-05, "loss": 2.4491, "step": 465000 }, { "epoch": 2.13, "learning_rate": 1.446063605535698e-05, "loss": 2.4539, "step": 465500 }, { "epoch": 2.14, "learning_rate": 1.4422524669293985e-05, "loss": 2.4641, "step": 466000 }, { "epoch": 2.14, "learning_rate": 1.4384336907707819e-05, "loss": 2.4342, "step": 466500 }, { "epoch": 2.14, "learning_rate": 1.434614914612165e-05, "loss": 2.4475, "step": 467000 }, { "epoch": 2.14, "learning_rate": 1.4307961384535485e-05, "loss": 2.4415, "step": 467500 }, { "epoch": 2.14, "learning_rate": 1.4269773622949317e-05, "loss": 2.4543, "step": 468000 }, { "epoch": 2.15, "learning_rate": 1.423158586136315e-05, "loss": 2.4339, "step": 468500 }, { "epoch": 2.15, "learning_rate": 1.4193398099776983e-05, "loss": 2.4473, "step": 469000 }, { "epoch": 2.15, "learning_rate": 1.4155210338190817e-05, "loss": 2.4478, "step": 469500 }, { "epoch": 2.15, "learning_rate": 1.4117022576604649e-05, "loss": 2.4604, "step": 470000 }, { "epoch": 2.16, "learning_rate": 1.4078834815018483e-05, "loss": 2.4417, "step": 470500 }, { "epoch": 2.16, "learning_rate": 1.4040647053432315e-05, "loss": 2.4471, "step": 471000 }, { "epoch": 2.16, "learning_rate": 1.400245929184615e-05, "loss": 2.4461, "step": 471500 }, { "epoch": 2.16, "learning_rate": 1.3964271530259984e-05, "loss": 2.4557, "step": 472000 }, { "epoch": 2.17, "learning_rate": 1.392616014419699e-05, "loss": 2.4534, "step": 472500 }, { "epoch": 2.17, "learning_rate": 1.3888048758133995e-05, "loss": 2.4506, "step": 473000 }, { "epoch": 2.17, "learning_rate": 1.3849860996547828e-05, "loss": 2.4578, "step": 473500 }, { "epoch": 2.17, "learning_rate": 1.381167323496166e-05, "loss": 2.4611, "step": 474000 }, { "epoch": 2.17, "learning_rate": 1.3773485473375494e-05, "loss": 2.426, "step": 474500 }, { "epoch": 2.18, "learning_rate": 1.3735297711789327e-05, "loss": 2.4545, "step": 475000 }, { "epoch": 2.18, "learning_rate": 1.369710995020316e-05, "loss": 2.4478, "step": 475500 }, { "epoch": 2.18, "learning_rate": 1.3658922188616993e-05, "loss": 2.4582, "step": 476000 }, { "epoch": 2.18, "learning_rate": 1.3620810802554e-05, "loss": 2.4719, "step": 476500 }, { "epoch": 2.19, "learning_rate": 1.3582623040967832e-05, "loss": 2.4513, "step": 477000 }, { "epoch": 2.19, "learning_rate": 1.3544435279381665e-05, "loss": 2.4509, "step": 477500 }, { "epoch": 2.19, "learning_rate": 1.3506247517795498e-05, "loss": 2.4464, "step": 478000 }, { "epoch": 2.19, "learning_rate": 1.3468059756209331e-05, "loss": 2.4385, "step": 478500 }, { "epoch": 2.2, "learning_rate": 1.3429871994623164e-05, "loss": 2.4486, "step": 479000 }, { "epoch": 2.2, "learning_rate": 1.3391684233036997e-05, "loss": 2.4516, "step": 479500 }, { "epoch": 2.2, "learning_rate": 1.335349647145083e-05, "loss": 2.4542, "step": 480000 }, { "epoch": 2.2, "learning_rate": 1.3315385085387835e-05, "loss": 2.4439, "step": 480500 }, { "epoch": 2.2, "learning_rate": 1.3277197323801669e-05, "loss": 2.4697, "step": 481000 }, { "epoch": 2.21, "learning_rate": 1.32390095622155e-05, "loss": 2.4428, "step": 481500 }, { "epoch": 2.21, "learning_rate": 1.3200821800629335e-05, "loss": 2.4372, "step": 482000 }, { "epoch": 2.21, "learning_rate": 1.316271041456634e-05, "loss": 2.4531, "step": 482500 }, { "epoch": 2.21, "learning_rate": 1.3124522652980174e-05, "loss": 2.4374, "step": 483000 }, { "epoch": 2.22, "learning_rate": 1.3086334891394006e-05, "loss": 2.4511, "step": 483500 }, { "epoch": 2.22, "learning_rate": 1.304814712980784e-05, "loss": 2.4434, "step": 484000 }, { "epoch": 2.22, "learning_rate": 1.3009959368221672e-05, "loss": 2.4376, "step": 484500 }, { "epoch": 2.22, "learning_rate": 1.2971771606635506e-05, "loss": 2.4383, "step": 485000 }, { "epoch": 2.22, "learning_rate": 1.2933660220572511e-05, "loss": 2.4472, "step": 485500 }, { "epoch": 2.23, "learning_rate": 1.2895472458986343e-05, "loss": 2.4618, "step": 486000 }, { "epoch": 2.23, "learning_rate": 1.2857284697400177e-05, "loss": 2.4335, "step": 486500 }, { "epoch": 2.23, "learning_rate": 1.2819096935814012e-05, "loss": 2.4536, "step": 487000 }, { "epoch": 2.23, "learning_rate": 1.2781061925274187e-05, "loss": 2.4489, "step": 487500 }, { "epoch": 2.24, "learning_rate": 1.2742874163688021e-05, "loss": 2.4469, "step": 488000 }, { "epoch": 2.24, "learning_rate": 1.2704686402101857e-05, "loss": 2.4408, "step": 488500 }, { "epoch": 2.24, "learning_rate": 1.2666498640515689e-05, "loss": 2.4476, "step": 489000 }, { "epoch": 2.24, "learning_rate": 1.2628310878929523e-05, "loss": 2.4503, "step": 489500 }, { "epoch": 2.25, "learning_rate": 1.2590123117343355e-05, "loss": 2.4336, "step": 490000 }, { "epoch": 2.25, "learning_rate": 1.2551935355757189e-05, "loss": 2.4564, "step": 490500 }, { "epoch": 2.25, "learning_rate": 1.2513823969694194e-05, "loss": 2.431, "step": 491000 }, { "epoch": 2.25, "learning_rate": 1.2475636208108026e-05, "loss": 2.4405, "step": 491500 }, { "epoch": 2.25, "learning_rate": 1.243744844652186e-05, "loss": 2.443, "step": 492000 }, { "epoch": 2.26, "learning_rate": 1.2399260684935694e-05, "loss": 2.4474, "step": 492500 }, { "epoch": 2.26, "learning_rate": 1.2361072923349526e-05, "loss": 2.443, "step": 493000 }, { "epoch": 2.26, "learning_rate": 1.232288516176336e-05, "loss": 2.4471, "step": 493500 }, { "epoch": 2.26, "learning_rate": 1.2284697400177192e-05, "loss": 2.4584, "step": 494000 }, { "epoch": 2.27, "learning_rate": 1.2246509638591026e-05, "loss": 2.4465, "step": 494500 }, { "epoch": 2.27, "learning_rate": 1.2208321877004858e-05, "loss": 2.4397, "step": 495000 }, { "epoch": 2.27, "learning_rate": 1.2170134115418692e-05, "loss": 2.4618, "step": 495500 }, { "epoch": 2.27, "learning_rate": 1.2131946353832524e-05, "loss": 2.4752, "step": 496000 }, { "epoch": 2.28, "learning_rate": 1.2093758592246358e-05, "loss": 2.4567, "step": 496500 }, { "epoch": 2.28, "learning_rate": 1.205557083066019e-05, "loss": 2.4568, "step": 497000 }, { "epoch": 2.28, "learning_rate": 1.2017612195643541e-05, "loss": 2.4374, "step": 497500 }, { "epoch": 2.28, "learning_rate": 1.1979424434057373e-05, "loss": 2.443, "step": 498000 }, { "epoch": 2.28, "learning_rate": 1.1941236672471207e-05, "loss": 2.4478, "step": 498500 }, { "epoch": 2.29, "learning_rate": 1.190304891088504e-05, "loss": 2.4496, "step": 499000 }, { "epoch": 2.29, "learning_rate": 1.1864861149298873e-05, "loss": 2.4571, "step": 499500 }, { "epoch": 2.29, "learning_rate": 1.1826673387712707e-05, "loss": 2.4355, "step": 500000 }, { "epoch": 2.29, "learning_rate": 1.178848562612654e-05, "loss": 2.4499, "step": 500500 }, { "epoch": 2.3, "learning_rate": 1.1750297864540373e-05, "loss": 2.4439, "step": 501000 }, { "epoch": 2.3, "learning_rate": 1.1712110102954207e-05, "loss": 2.4372, "step": 501500 }, { "epoch": 2.3, "learning_rate": 1.1673998716891212e-05, "loss": 2.4406, "step": 502000 }, { "epoch": 2.3, "learning_rate": 1.1635810955305044e-05, "loss": 2.4465, "step": 502500 }, { "epoch": 2.31, "learning_rate": 1.1597623193718878e-05, "loss": 2.4579, "step": 503000 }, { "epoch": 2.31, "learning_rate": 1.155943543213271e-05, "loss": 2.456, "step": 503500 }, { "epoch": 2.31, "learning_rate": 1.1521324046069715e-05, "loss": 2.4521, "step": 504000 }, { "epoch": 2.31, "learning_rate": 1.148313628448355e-05, "loss": 2.4564, "step": 504500 }, { "epoch": 2.31, "learning_rate": 1.1445024898420554e-05, "loss": 2.4622, "step": 505000 }, { "epoch": 2.32, "learning_rate": 1.1406837136834388e-05, "loss": 2.4393, "step": 505500 }, { "epoch": 2.32, "learning_rate": 1.136864937524822e-05, "loss": 2.452, "step": 506000 }, { "epoch": 2.32, "learning_rate": 1.1330461613662054e-05, "loss": 2.4385, "step": 506500 }, { "epoch": 2.32, "learning_rate": 1.1292273852075886e-05, "loss": 2.4319, "step": 507000 }, { "epoch": 2.33, "learning_rate": 1.125408609048972e-05, "loss": 2.4473, "step": 507500 }, { "epoch": 2.33, "learning_rate": 1.1215898328903554e-05, "loss": 2.4442, "step": 508000 }, { "epoch": 2.33, "learning_rate": 1.1177710567317386e-05, "loss": 2.4359, "step": 508500 }, { "epoch": 2.33, "learning_rate": 1.113952280573122e-05, "loss": 2.4457, "step": 509000 }, { "epoch": 2.33, "learning_rate": 1.1101335044145052e-05, "loss": 2.4606, "step": 509500 }, { "epoch": 2.34, "learning_rate": 1.1063223658082059e-05, "loss": 2.4343, "step": 510000 }, { "epoch": 2.34, "learning_rate": 1.1025035896495891e-05, "loss": 2.4368, "step": 510500 }, { "epoch": 2.34, "learning_rate": 1.0986848134909725e-05, "loss": 2.4222, "step": 511000 }, { "epoch": 2.34, "learning_rate": 1.0948660373323557e-05, "loss": 2.442, "step": 511500 }, { "epoch": 2.35, "learning_rate": 1.0910472611737391e-05, "loss": 2.451, "step": 512000 }, { "epoch": 2.35, "learning_rate": 1.0872284850151223e-05, "loss": 2.4269, "step": 512500 }, { "epoch": 2.35, "learning_rate": 1.0834173464088228e-05, "loss": 2.455, "step": 513000 }, { "epoch": 2.35, "learning_rate": 1.0795985702502062e-05, "loss": 2.4436, "step": 513500 }, { "epoch": 2.36, "learning_rate": 1.0757874316439068e-05, "loss": 2.4479, "step": 514000 }, { "epoch": 2.36, "learning_rate": 1.0719686554852901e-05, "loss": 2.448, "step": 514500 }, { "epoch": 2.36, "learning_rate": 1.0681498793266734e-05, "loss": 2.4465, "step": 515000 }, { "epoch": 2.36, "learning_rate": 1.0643311031680567e-05, "loss": 2.4565, "step": 515500 }, { "epoch": 2.36, "learning_rate": 1.0605123270094401e-05, "loss": 2.4629, "step": 516000 }, { "epoch": 2.37, "learning_rate": 1.0566935508508233e-05, "loss": 2.4651, "step": 516500 }, { "epoch": 2.37, "learning_rate": 1.0528747746922067e-05, "loss": 2.4393, "step": 517000 }, { "epoch": 2.37, "learning_rate": 1.04905599853359e-05, "loss": 2.4447, "step": 517500 }, { "epoch": 2.37, "learning_rate": 1.0452372223749733e-05, "loss": 2.4384, "step": 518000 }, { "epoch": 2.38, "learning_rate": 1.0414260837686738e-05, "loss": 2.4547, "step": 518500 }, { "epoch": 2.38, "learning_rate": 1.0376073076100572e-05, "loss": 2.4342, "step": 519000 }, { "epoch": 2.38, "learning_rate": 1.0337885314514404e-05, "loss": 2.4307, "step": 519500 }, { "epoch": 2.38, "learning_rate": 1.0299697552928238e-05, "loss": 2.4439, "step": 520000 }, { "epoch": 2.39, "learning_rate": 1.026150979134207e-05, "loss": 2.4509, "step": 520500 }, { "epoch": 2.39, "learning_rate": 1.0223322029755904e-05, "loss": 2.4485, "step": 521000 }, { "epoch": 2.39, "learning_rate": 1.0185134268169736e-05, "loss": 2.4511, "step": 521500 }, { "epoch": 2.39, "learning_rate": 1.0146946506583572e-05, "loss": 2.4464, "step": 522000 }, { "epoch": 2.39, "learning_rate": 1.0108835120520577e-05, "loss": 2.4437, "step": 522500 }, { "epoch": 2.4, "learning_rate": 1.007064735893441e-05, "loss": 2.4495, "step": 523000 }, { "epoch": 2.4, "learning_rate": 1.0032535972871414e-05, "loss": 2.4382, "step": 523500 }, { "epoch": 2.4, "learning_rate": 9.994348211285248e-06, "loss": 2.443, "step": 524000 }, { "epoch": 2.4, "learning_rate": 9.956236825222254e-06, "loss": 2.4442, "step": 524500 }, { "epoch": 2.41, "learning_rate": 9.918049063636087e-06, "loss": 2.4554, "step": 525000 }, { "epoch": 2.41, "learning_rate": 9.87986130204992e-06, "loss": 2.4317, "step": 525500 }, { "epoch": 2.41, "learning_rate": 9.841673540463753e-06, "loss": 2.4502, "step": 526000 }, { "epoch": 2.41, "learning_rate": 9.803485778877586e-06, "loss": 2.4515, "step": 526500 }, { "epoch": 2.41, "learning_rate": 9.76529801729142e-06, "loss": 2.4367, "step": 527000 }, { "epoch": 2.42, "learning_rate": 9.727110255705252e-06, "loss": 2.4285, "step": 527500 }, { "epoch": 2.42, "learning_rate": 9.688922494119085e-06, "loss": 2.4296, "step": 528000 }, { "epoch": 2.42, "learning_rate": 9.650734732532918e-06, "loss": 2.4533, "step": 528500 }, { "epoch": 2.42, "learning_rate": 9.612623346469923e-06, "loss": 2.427, "step": 529000 }, { "epoch": 2.43, "learning_rate": 9.574435584883757e-06, "loss": 2.4342, "step": 529500 }, { "epoch": 2.43, "learning_rate": 9.536247823297589e-06, "loss": 2.4367, "step": 530000 }, { "epoch": 2.43, "learning_rate": 9.498060061711424e-06, "loss": 2.4302, "step": 530500 }, { "epoch": 2.43, "learning_rate": 9.45994867564843e-06, "loss": 2.4319, "step": 531000 }, { "epoch": 2.44, "learning_rate": 9.421760914062262e-06, "loss": 2.4366, "step": 531500 }, { "epoch": 2.44, "learning_rate": 9.383573152476095e-06, "loss": 2.457, "step": 532000 }, { "epoch": 2.44, "learning_rate": 9.345385390889928e-06, "loss": 2.4379, "step": 532500 }, { "epoch": 2.44, "learning_rate": 9.307197629303761e-06, "loss": 2.4566, "step": 533000 }, { "epoch": 2.44, "learning_rate": 9.269086243240767e-06, "loss": 2.4534, "step": 533500 }, { "epoch": 2.45, "learning_rate": 9.2308984816546e-06, "loss": 2.4507, "step": 534000 }, { "epoch": 2.45, "learning_rate": 9.192710720068433e-06, "loss": 2.4391, "step": 534500 }, { "epoch": 2.45, "learning_rate": 9.154599334005438e-06, "loss": 2.4498, "step": 535000 }, { "epoch": 2.45, "learning_rate": 9.116411572419272e-06, "loss": 2.4481, "step": 535500 }, { "epoch": 2.46, "learning_rate": 9.078223810833104e-06, "loss": 2.4372, "step": 536000 }, { "epoch": 2.46, "learning_rate": 9.040036049246938e-06, "loss": 2.4461, "step": 536500 }, { "epoch": 2.46, "learning_rate": 9.00184828766077e-06, "loss": 2.4341, "step": 537000 }, { "epoch": 2.46, "learning_rate": 8.963660526074604e-06, "loss": 2.4365, "step": 537500 }, { "epoch": 2.47, "learning_rate": 8.925472764488438e-06, "loss": 2.4351, "step": 538000 }, { "epoch": 2.47, "learning_rate": 8.887285002902271e-06, "loss": 2.4408, "step": 538500 }, { "epoch": 2.47, "learning_rate": 8.849173616839277e-06, "loss": 2.4623, "step": 539000 }, { "epoch": 2.47, "learning_rate": 8.810985855253109e-06, "loss": 2.4498, "step": 539500 }, { "epoch": 2.47, "learning_rate": 8.772798093666943e-06, "loss": 2.4255, "step": 540000 }, { "epoch": 2.48, "learning_rate": 8.734610332080775e-06, "loss": 2.4218, "step": 540500 }, { "epoch": 2.48, "learning_rate": 8.696422570494609e-06, "loss": 2.4306, "step": 541000 }, { "epoch": 2.48, "learning_rate": 8.658311184431614e-06, "loss": 2.441, "step": 541500 }, { "epoch": 2.48, "learning_rate": 8.620123422845448e-06, "loss": 2.4526, "step": 542000 }, { "epoch": 2.49, "learning_rate": 8.58193566125928e-06, "loss": 2.4509, "step": 542500 }, { "epoch": 2.49, "learning_rate": 8.543824275196285e-06, "loss": 2.4365, "step": 543000 }, { "epoch": 2.49, "learning_rate": 8.505636513610119e-06, "loss": 2.4411, "step": 543500 }, { "epoch": 2.49, "learning_rate": 8.467448752023951e-06, "loss": 2.4501, "step": 544000 }, { "epoch": 2.5, "learning_rate": 8.429260990437785e-06, "loss": 2.4357, "step": 544500 }, { "epoch": 2.5, "learning_rate": 8.391073228851617e-06, "loss": 2.4511, "step": 545000 }, { "epoch": 2.5, "learning_rate": 8.35288546726545e-06, "loss": 2.4388, "step": 545500 }, { "epoch": 2.5, "learning_rate": 8.314697705679285e-06, "loss": 2.4441, "step": 546000 }, { "epoch": 2.5, "learning_rate": 8.276509944093119e-06, "loss": 2.4441, "step": 546500 }, { "epoch": 2.51, "learning_rate": 8.23832218250695e-06, "loss": 2.4389, "step": 547000 }, { "epoch": 2.51, "learning_rate": 8.200134420920785e-06, "loss": 2.4493, "step": 547500 }, { "epoch": 2.51, "learning_rate": 8.161946659334617e-06, "loss": 2.46, "step": 548000 }, { "epoch": 2.51, "learning_rate": 8.12375889774845e-06, "loss": 2.4401, "step": 548500 }, { "epoch": 2.52, "learning_rate": 8.085571136162283e-06, "loss": 2.4234, "step": 549000 }, { "epoch": 2.52, "learning_rate": 8.047383374576117e-06, "loss": 2.432, "step": 549500 }, { "epoch": 2.52, "learning_rate": 8.009195612989949e-06, "loss": 2.4419, "step": 550000 }, { "epoch": 2.52, "learning_rate": 7.971007851403783e-06, "loss": 2.4433, "step": 550500 }, { "epoch": 2.52, "learning_rate": 7.932896465340788e-06, "loss": 2.4406, "step": 551000 }, { "epoch": 2.53, "learning_rate": 7.89470870375462e-06, "loss": 2.4506, "step": 551500 }, { "epoch": 2.53, "learning_rate": 7.856520942168455e-06, "loss": 2.4455, "step": 552000 }, { "epoch": 2.53, "learning_rate": 7.818409556105459e-06, "loss": 2.426, "step": 552500 }, { "epoch": 2.53, "learning_rate": 7.780221794519293e-06, "loss": 2.4455, "step": 553000 }, { "epoch": 2.54, "learning_rate": 7.742034032933127e-06, "loss": 2.4256, "step": 553500 }, { "epoch": 2.54, "learning_rate": 7.703846271346959e-06, "loss": 2.4406, "step": 554000 }, { "epoch": 2.54, "learning_rate": 7.665734885283966e-06, "loss": 2.4354, "step": 554500 }, { "epoch": 2.54, "learning_rate": 7.627547123697798e-06, "loss": 2.4411, "step": 555000 }, { "epoch": 2.55, "learning_rate": 7.589359362111631e-06, "loss": 2.4288, "step": 555500 }, { "epoch": 2.55, "learning_rate": 7.551171600525464e-06, "loss": 2.4325, "step": 556000 }, { "epoch": 2.55, "learning_rate": 7.512983838939297e-06, "loss": 2.4231, "step": 556500 }, { "epoch": 2.55, "learning_rate": 7.47479607735313e-06, "loss": 2.4492, "step": 557000 }, { "epoch": 2.55, "learning_rate": 7.436608315766963e-06, "loss": 2.4552, "step": 557500 }, { "epoch": 2.56, "learning_rate": 7.398420554180796e-06, "loss": 2.4487, "step": 558000 }, { "epoch": 2.56, "learning_rate": 7.360385543640974e-06, "loss": 2.4361, "step": 558500 }, { "epoch": 2.56, "learning_rate": 7.322197782054807e-06, "loss": 2.4493, "step": 559000 }, { "epoch": 2.56, "learning_rate": 7.28401002046864e-06, "loss": 2.4353, "step": 559500 }, { "epoch": 2.57, "learning_rate": 7.245822258882473e-06, "loss": 2.4285, "step": 560000 }, { "epoch": 2.57, "learning_rate": 7.207634497296306e-06, "loss": 2.4285, "step": 560500 }, { "epoch": 2.57, "learning_rate": 7.169446735710141e-06, "loss": 2.4331, "step": 561000 }, { "epoch": 2.57, "learning_rate": 7.131258974123974e-06, "loss": 2.4412, "step": 561500 }, { "epoch": 2.58, "learning_rate": 7.093071212537807e-06, "loss": 2.4206, "step": 562000 }, { "epoch": 2.58, "learning_rate": 7.05488345095164e-06, "loss": 2.4278, "step": 562500 }, { "epoch": 2.58, "learning_rate": 7.016695689365473e-06, "loss": 2.4425, "step": 563000 }, { "epoch": 2.58, "learning_rate": 6.978507927779306e-06, "loss": 2.4253, "step": 563500 }, { "epoch": 2.58, "learning_rate": 6.940320166193139e-06, "loss": 2.4526, "step": 564000 }, { "epoch": 2.59, "learning_rate": 6.902208780130144e-06, "loss": 2.4563, "step": 564500 }, { "epoch": 2.59, "learning_rate": 6.864021018543977e-06, "loss": 2.4494, "step": 565000 }, { "epoch": 2.59, "learning_rate": 6.82583325695781e-06, "loss": 2.4169, "step": 565500 }, { "epoch": 2.59, "learning_rate": 6.787645495371643e-06, "loss": 2.4331, "step": 566000 }, { "epoch": 2.6, "learning_rate": 6.749457733785476e-06, "loss": 2.4448, "step": 566500 }, { "epoch": 2.6, "learning_rate": 6.711346347722482e-06, "loss": 2.4283, "step": 567000 }, { "epoch": 2.6, "learning_rate": 6.673158586136315e-06, "loss": 2.4315, "step": 567500 }, { "epoch": 2.6, "learning_rate": 6.634970824550149e-06, "loss": 2.4446, "step": 568000 }, { "epoch": 2.61, "learning_rate": 6.596783062963982e-06, "loss": 2.4255, "step": 568500 }, { "epoch": 2.61, "learning_rate": 6.558671676900988e-06, "loss": 2.4402, "step": 569000 }, { "epoch": 2.61, "learning_rate": 6.520560290837993e-06, "loss": 2.4345, "step": 569500 }, { "epoch": 2.61, "learning_rate": 6.482372529251826e-06, "loss": 2.4266, "step": 570000 }, { "epoch": 2.61, "learning_rate": 6.444184767665659e-06, "loss": 2.4297, "step": 570500 }, { "epoch": 2.62, "learning_rate": 6.405997006079492e-06, "loss": 2.4315, "step": 571000 }, { "epoch": 2.62, "learning_rate": 6.367885620016498e-06, "loss": 2.4302, "step": 571500 }, { "epoch": 2.62, "learning_rate": 6.329697858430331e-06, "loss": 2.428, "step": 572000 }, { "epoch": 2.62, "learning_rate": 6.291510096844164e-06, "loss": 2.4299, "step": 572500 }, { "epoch": 2.63, "learning_rate": 6.253322335257997e-06, "loss": 2.4304, "step": 573000 }, { "epoch": 2.63, "learning_rate": 6.21513457367183e-06, "loss": 2.452, "step": 573500 }, { "epoch": 2.63, "learning_rate": 6.176946812085663e-06, "loss": 2.4342, "step": 574000 }, { "epoch": 2.63, "learning_rate": 6.138835426022668e-06, "loss": 2.4262, "step": 574500 }, { "epoch": 2.63, "learning_rate": 6.100647664436502e-06, "loss": 2.446, "step": 575000 }, { "epoch": 2.64, "learning_rate": 6.062536278373507e-06, "loss": 2.4221, "step": 575500 }, { "epoch": 2.64, "learning_rate": 6.02434851678734e-06, "loss": 2.4428, "step": 576000 }, { "epoch": 2.64, "learning_rate": 5.986160755201173e-06, "loss": 2.4236, "step": 576500 }, { "epoch": 2.64, "learning_rate": 5.947972993615006e-06, "loss": 2.4389, "step": 577000 }, { "epoch": 2.65, "learning_rate": 5.909785232028839e-06, "loss": 2.4393, "step": 577500 }, { "epoch": 2.65, "learning_rate": 5.871597470442672e-06, "loss": 2.4377, "step": 578000 }, { "epoch": 2.65, "learning_rate": 5.833409708856506e-06, "loss": 2.4413, "step": 578500 }, { "epoch": 2.65, "learning_rate": 5.795221947270339e-06, "loss": 2.4346, "step": 579000 }, { "epoch": 2.66, "learning_rate": 5.757034185684172e-06, "loss": 2.4069, "step": 579500 }, { "epoch": 2.66, "learning_rate": 5.718846424098005e-06, "loss": 2.4508, "step": 580000 }, { "epoch": 2.66, "learning_rate": 5.680658662511838e-06, "loss": 2.435, "step": 580500 }, { "epoch": 2.66, "learning_rate": 5.642470900925671e-06, "loss": 2.4577, "step": 581000 }, { "epoch": 2.66, "learning_rate": 5.604283139339505e-06, "loss": 2.4099, "step": 581500 }, { "epoch": 2.67, "learning_rate": 5.566095377753338e-06, "loss": 2.436, "step": 582000 }, { "epoch": 2.67, "learning_rate": 5.527983991690343e-06, "loss": 2.4231, "step": 582500 }, { "epoch": 2.67, "learning_rate": 5.489796230104176e-06, "loss": 2.4254, "step": 583000 }, { "epoch": 2.67, "learning_rate": 5.451608468518009e-06, "loss": 2.448, "step": 583500 }, { "epoch": 2.68, "learning_rate": 5.413497082455015e-06, "loss": 2.4402, "step": 584000 }, { "epoch": 2.68, "learning_rate": 5.375309320868848e-06, "loss": 2.4291, "step": 584500 }, { "epoch": 2.68, "learning_rate": 5.337121559282681e-06, "loss": 2.4349, "step": 585000 }, { "epoch": 2.68, "learning_rate": 5.298933797696515e-06, "loss": 2.4311, "step": 585500 }, { "epoch": 2.69, "learning_rate": 5.260746036110348e-06, "loss": 2.4328, "step": 586000 }, { "epoch": 2.69, "learning_rate": 5.222558274524181e-06, "loss": 2.4363, "step": 586500 }, { "epoch": 2.69, "learning_rate": 5.184370512938014e-06, "loss": 2.4224, "step": 587000 }, { "epoch": 2.69, "learning_rate": 5.146182751351847e-06, "loss": 2.437, "step": 587500 }, { "epoch": 2.69, "learning_rate": 5.10799498976568e-06, "loss": 2.4392, "step": 588000 }, { "epoch": 2.7, "learning_rate": 5.069883603702685e-06, "loss": 2.4361, "step": 588500 }, { "epoch": 2.7, "learning_rate": 5.031695842116519e-06, "loss": 2.4345, "step": 589000 }, { "epoch": 2.7, "learning_rate": 4.993508080530352e-06, "loss": 2.4301, "step": 589500 }, { "epoch": 2.7, "learning_rate": 4.955320318944185e-06, "loss": 2.4188, "step": 590000 }, { "epoch": 2.71, "learning_rate": 4.917132557358018e-06, "loss": 2.4286, "step": 590500 }, { "epoch": 2.71, "learning_rate": 4.879021171295023e-06, "loss": 2.4451, "step": 591000 }, { "epoch": 2.71, "learning_rate": 4.840833409708856e-06, "loss": 2.4475, "step": 591500 }, { "epoch": 2.71, "learning_rate": 4.802645648122689e-06, "loss": 2.4494, "step": 592000 }, { "epoch": 2.72, "learning_rate": 4.764457886536523e-06, "loss": 2.4347, "step": 592500 }, { "epoch": 2.72, "learning_rate": 4.726270124950356e-06, "loss": 2.4342, "step": 593000 }, { "epoch": 2.72, "learning_rate": 4.688158738887362e-06, "loss": 2.4301, "step": 593500 }, { "epoch": 2.72, "learning_rate": 4.649970977301195e-06, "loss": 2.435, "step": 594000 }, { "epoch": 2.72, "learning_rate": 4.611783215715028e-06, "loss": 2.4211, "step": 594500 }, { "epoch": 2.73, "learning_rate": 4.573595454128861e-06, "loss": 2.4424, "step": 595000 }, { "epoch": 2.73, "learning_rate": 4.535407692542694e-06, "loss": 2.4131, "step": 595500 }, { "epoch": 2.73, "learning_rate": 4.497219930956527e-06, "loss": 2.4284, "step": 596000 }, { "epoch": 2.73, "learning_rate": 4.459108544893532e-06, "loss": 2.4395, "step": 596500 }, { "epoch": 2.74, "learning_rate": 4.420997158830538e-06, "loss": 2.4276, "step": 597000 }, { "epoch": 2.74, "learning_rate": 4.3828093972443714e-06, "loss": 2.4365, "step": 597500 }, { "epoch": 2.74, "learning_rate": 4.3446216356582044e-06, "loss": 2.4223, "step": 598000 }, { "epoch": 2.74, "learning_rate": 4.3064338740720374e-06, "loss": 2.4116, "step": 598500 }, { "epoch": 2.74, "learning_rate": 4.2682461124858704e-06, "loss": 2.4536, "step": 599000 }, { "epoch": 2.75, "learning_rate": 4.2300583508997034e-06, "loss": 2.4433, "step": 599500 }, { "epoch": 2.75, "learning_rate": 4.1918705893135364e-06, "loss": 2.4403, "step": 600000 }, { "epoch": 2.75, "learning_rate": 4.15368282772737e-06, "loss": 2.4414, "step": 600500 }, { "epoch": 2.75, "learning_rate": 4.115495066141203e-06, "loss": 2.4472, "step": 601000 }, { "epoch": 2.76, "learning_rate": 4.077383680078209e-06, "loss": 2.4324, "step": 601500 }, { "epoch": 2.76, "learning_rate": 4.039195918492042e-06, "loss": 2.4367, "step": 602000 }, { "epoch": 2.76, "learning_rate": 4.001008156905875e-06, "loss": 2.4215, "step": 602500 }, { "epoch": 2.76, "learning_rate": 3.962820395319708e-06, "loss": 2.4327, "step": 603000 }, { "epoch": 2.77, "learning_rate": 3.924632633733541e-06, "loss": 2.4284, "step": 603500 }, { "epoch": 2.77, "learning_rate": 3.886444872147374e-06, "loss": 2.4257, "step": 604000 }, { "epoch": 2.77, "learning_rate": 3.84833348608438e-06, "loss": 2.4366, "step": 604500 }, { "epoch": 2.77, "learning_rate": 3.8101457244982134e-06, "loss": 2.4546, "step": 605000 }, { "epoch": 2.77, "learning_rate": 3.7719579629120464e-06, "loss": 2.4376, "step": 605500 }, { "epoch": 2.78, "learning_rate": 3.7337702013258794e-06, "loss": 2.4267, "step": 606000 }, { "epoch": 2.78, "learning_rate": 3.6956588152628845e-06, "loss": 2.4348, "step": 606500 }, { "epoch": 2.78, "learning_rate": 3.6574710536767175e-06, "loss": 2.4243, "step": 607000 }, { "epoch": 2.78, "learning_rate": 3.6192832920905505e-06, "loss": 2.4352, "step": 607500 }, { "epoch": 2.79, "learning_rate": 3.5810955305043844e-06, "loss": 2.424, "step": 608000 }, { "epoch": 2.79, "learning_rate": 3.542984144441389e-06, "loss": 2.4384, "step": 608500 }, { "epoch": 2.79, "learning_rate": 3.504796382855223e-06, "loss": 2.4168, "step": 609000 }, { "epoch": 2.79, "learning_rate": 3.466608621269056e-06, "loss": 2.4274, "step": 609500 }, { "epoch": 2.8, "learning_rate": 3.4284972352060616e-06, "loss": 2.4322, "step": 610000 }, { "epoch": 2.8, "learning_rate": 3.3903094736198946e-06, "loss": 2.4362, "step": 610500 }, { "epoch": 2.8, "learning_rate": 3.3521217120337276e-06, "loss": 2.4174, "step": 611000 }, { "epoch": 2.8, "learning_rate": 3.3139339504475606e-06, "loss": 2.4281, "step": 611500 }, { "epoch": 2.8, "learning_rate": 3.2757461888613936e-06, "loss": 2.4349, "step": 612000 }, { "epoch": 2.81, "learning_rate": 3.2375584272752275e-06, "loss": 2.4416, "step": 612500 }, { "epoch": 2.81, "learning_rate": 3.1993706656890605e-06, "loss": 2.4361, "step": 613000 }, { "epoch": 2.81, "learning_rate": 3.1611829041028935e-06, "loss": 2.4214, "step": 613500 }, { "epoch": 2.81, "learning_rate": 3.1229951425167265e-06, "loss": 2.4186, "step": 614000 }, { "epoch": 2.82, "learning_rate": 3.0848073809305595e-06, "loss": 2.4362, "step": 614500 }, { "epoch": 2.82, "learning_rate": 3.0466196193443925e-06, "loss": 2.436, "step": 615000 }, { "epoch": 2.82, "learning_rate": 3.0084318577582255e-06, "loss": 2.4341, "step": 615500 }, { "epoch": 2.82, "learning_rate": 2.970244096172059e-06, "loss": 2.4275, "step": 616000 }, { "epoch": 2.83, "learning_rate": 2.9321327101090645e-06, "loss": 2.4447, "step": 616500 }, { "epoch": 2.83, "learning_rate": 2.8939449485228975e-06, "loss": 2.4238, "step": 617000 }, { "epoch": 2.83, "learning_rate": 2.855833562459903e-06, "loss": 2.4418, "step": 617500 }, { "epoch": 2.83, "learning_rate": 2.8177221763969087e-06, "loss": 2.4255, "step": 618000 }, { "epoch": 2.83, "learning_rate": 2.7795344148107417e-06, "loss": 2.4169, "step": 618500 }, { "epoch": 2.84, "learning_rate": 2.7413466532245747e-06, "loss": 2.4201, "step": 619000 }, { "epoch": 2.84, "learning_rate": 2.703158891638408e-06, "loss": 2.4145, "step": 619500 }, { "epoch": 2.84, "learning_rate": 2.664971130052241e-06, "loss": 2.4464, "step": 620000 }, { "epoch": 2.84, "learning_rate": 2.626783368466074e-06, "loss": 2.4336, "step": 620500 }, { "epoch": 2.85, "learning_rate": 2.588595606879907e-06, "loss": 2.4021, "step": 621000 }, { "epoch": 2.85, "learning_rate": 2.55040784529374e-06, "loss": 2.4143, "step": 621500 }, { "epoch": 2.85, "learning_rate": 2.512220083707573e-06, "loss": 2.4113, "step": 622000 }, { "epoch": 2.85, "learning_rate": 2.4740323221214066e-06, "loss": 2.4401, "step": 622500 }, { "epoch": 2.85, "learning_rate": 2.435920936058412e-06, "loss": 2.4253, "step": 623000 }, { "epoch": 2.86, "learning_rate": 2.397733174472245e-06, "loss": 2.4336, "step": 623500 }, { "epoch": 2.86, "learning_rate": 2.359545412886078e-06, "loss": 2.4242, "step": 624000 }, { "epoch": 2.86, "learning_rate": 2.3213576512999116e-06, "loss": 2.4268, "step": 624500 }, { "epoch": 2.86, "learning_rate": 2.2831698897137446e-06, "loss": 2.4487, "step": 625000 }, { "epoch": 2.87, "learning_rate": 2.2449821281275776e-06, "loss": 2.4442, "step": 625500 }, { "epoch": 2.87, "learning_rate": 2.2068707420645833e-06, "loss": 2.4219, "step": 626000 }, { "epoch": 2.87, "learning_rate": 2.1686829804784167e-06, "loss": 2.438, "step": 626500 }, { "epoch": 2.87, "learning_rate": 2.1304952188922497e-06, "loss": 2.4211, "step": 627000 }, { "epoch": 2.88, "learning_rate": 2.0923074573060827e-06, "loss": 2.4224, "step": 627500 }, { "epoch": 2.88, "learning_rate": 2.054196071243088e-06, "loss": 2.4236, "step": 628000 }, { "epoch": 2.88, "learning_rate": 2.0160083096569213e-06, "loss": 2.4088, "step": 628500 }, { "epoch": 2.88, "learning_rate": 1.9778205480707543e-06, "loss": 2.4216, "step": 629000 }, { "epoch": 2.88, "learning_rate": 1.93970916200776e-06, "loss": 2.434, "step": 629500 }, { "epoch": 2.89, "learning_rate": 1.901521400421593e-06, "loss": 2.4511, "step": 630000 }, { "epoch": 2.89, "learning_rate": 1.863333638835426e-06, "loss": 2.4358, "step": 630500 }, { "epoch": 2.89, "learning_rate": 1.8251458772492593e-06, "loss": 2.4467, "step": 631000 }, { "epoch": 2.89, "learning_rate": 1.7869581156630923e-06, "loss": 2.425, "step": 631500 }, { "epoch": 2.9, "learning_rate": 1.7487703540769253e-06, "loss": 2.4136, "step": 632000 }, { "epoch": 2.9, "learning_rate": 1.7105825924907588e-06, "loss": 2.4209, "step": 632500 }, { "epoch": 2.9, "learning_rate": 1.6723948309045918e-06, "loss": 2.4297, "step": 633000 }, { "epoch": 2.9, "learning_rate": 1.634207069318425e-06, "loss": 2.4426, "step": 633500 }, { "epoch": 2.91, "learning_rate": 1.5960956832554302e-06, "loss": 2.4172, "step": 634000 }, { "epoch": 2.91, "learning_rate": 1.5579079216692636e-06, "loss": 2.4378, "step": 634500 }, { "epoch": 2.91, "learning_rate": 1.519796535606269e-06, "loss": 2.4301, "step": 635000 }, { "epoch": 2.91, "learning_rate": 1.4816087740201022e-06, "loss": 2.4116, "step": 635500 }, { "epoch": 2.91, "learning_rate": 1.4434210124339354e-06, "loss": 2.4258, "step": 636000 }, { "epoch": 2.92, "learning_rate": 1.4052332508477684e-06, "loss": 2.4371, "step": 636500 }, { "epoch": 2.92, "learning_rate": 1.3670454892616014e-06, "loss": 2.4201, "step": 637000 }, { "epoch": 2.92, "learning_rate": 1.3288577276754346e-06, "loss": 2.4293, "step": 637500 }, { "epoch": 2.92, "learning_rate": 1.2906699660892676e-06, "loss": 2.4312, "step": 638000 }, { "epoch": 2.93, "learning_rate": 1.2524822045031008e-06, "loss": 2.4226, "step": 638500 }, { "epoch": 2.93, "learning_rate": 1.2143708184401065e-06, "loss": 2.433, "step": 639000 }, { "epoch": 2.93, "learning_rate": 1.1762594323771119e-06, "loss": 2.4163, "step": 639500 }, { "epoch": 2.93, "learning_rate": 1.1380716707909449e-06, "loss": 2.4187, "step": 640000 }, { "epoch": 2.94, "learning_rate": 1.099883909204778e-06, "loss": 2.4133, "step": 640500 }, { "epoch": 2.94, "learning_rate": 1.0616961476186113e-06, "loss": 2.4094, "step": 641000 }, { "epoch": 2.94, "learning_rate": 1.0235083860324443e-06, "loss": 2.4239, "step": 641500 }, { "epoch": 2.94, "learning_rate": 9.853206244462775e-07, "loss": 2.4384, "step": 642000 }, { "epoch": 2.94, "learning_rate": 9.471328628601107e-07, "loss": 2.4324, "step": 642500 }, { "epoch": 2.95, "learning_rate": 9.09021476797116e-07, "loss": 2.4286, "step": 643000 }, { "epoch": 2.95, "learning_rate": 8.708337152109492e-07, "loss": 2.4441, "step": 643500 }, { "epoch": 2.95, "learning_rate": 8.326459536247824e-07, "loss": 2.4167, "step": 644000 }, { "epoch": 2.95, "learning_rate": 7.944581920386155e-07, "loss": 2.4394, "step": 644500 }, { "epoch": 2.96, "learning_rate": 7.562704304524486e-07, "loss": 2.4224, "step": 645000 }, { "epoch": 2.96, "learning_rate": 7.180826688662817e-07, "loss": 2.4448, "step": 645500 }, { "epoch": 2.96, "learning_rate": 6.79894907280115e-07, "loss": 2.4181, "step": 646000 }, { "epoch": 2.96, "learning_rate": 6.417071456939481e-07, "loss": 2.4207, "step": 646500 }, { "epoch": 2.96, "learning_rate": 6.035193841077812e-07, "loss": 2.4257, "step": 647000 }, { "epoch": 2.97, "learning_rate": 5.653316225216143e-07, "loss": 2.4369, "step": 647500 }, { "epoch": 2.97, "learning_rate": 5.272202364586198e-07, "loss": 2.4371, "step": 648000 }, { "epoch": 2.97, "learning_rate": 4.890324748724529e-07, "loss": 2.4236, "step": 648500 }, { "epoch": 2.97, "learning_rate": 4.5084471328628605e-07, "loss": 2.43, "step": 649000 }, { "epoch": 2.98, "learning_rate": 4.126569517001191e-07, "loss": 2.4313, "step": 649500 }, { "epoch": 2.98, "learning_rate": 3.744691901139523e-07, "loss": 2.4409, "step": 650000 }, { "epoch": 2.98, "learning_rate": 3.3635780405095777e-07, "loss": 2.4284, "step": 650500 }, { "epoch": 2.98, "learning_rate": 2.981700424647909e-07, "loss": 2.4234, "step": 651000 }, { "epoch": 2.99, "learning_rate": 2.5998228087862404e-07, "loss": 2.4511, "step": 651500 }, { "epoch": 2.99, "learning_rate": 2.2179451929245717e-07, "loss": 2.4348, "step": 652000 }, { "epoch": 2.99, "learning_rate": 1.8368313322946263e-07, "loss": 2.4319, "step": 652500 }, { "epoch": 2.99, "learning_rate": 1.4549537164329576e-07, "loss": 2.4315, "step": 653000 }, { "epoch": 2.99, "learning_rate": 1.073076100571289e-07, "loss": 2.4327, "step": 653500 }, { "epoch": 3.0, "learning_rate": 6.911984847096204e-08, "loss": 2.4033, "step": 654000 }, { "epoch": 3.0, "learning_rate": 3.093208688479516e-08, "loss": 2.4401, "step": 654500 }, { "epoch": 3.0, "step": 654660, "total_flos": 9.727742291283542e+18, "train_loss": 2.543785205228415, "train_runtime": 2023217.6227, "train_samples_per_second": 2.589, "train_steps_per_second": 0.324 } ], "max_steps": 654660, "num_train_epochs": 3, "total_flos": 9.727742291283542e+18, "trial_name": null, "trial_params": null }