{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9972350230414744, "eval_steps": 500, "global_step": 1626, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018433179723502304, "grad_norm": 8.069196701049805, "learning_rate": 1.8404907975460125e-07, "loss": 1.9595, "step": 1 }, { "epoch": 0.003686635944700461, "grad_norm": 7.9624152183532715, "learning_rate": 3.680981595092025e-07, "loss": 1.9238, "step": 2 }, { "epoch": 0.005529953917050691, "grad_norm": 7.855696201324463, "learning_rate": 5.521472392638038e-07, "loss": 1.9753, "step": 3 }, { "epoch": 0.007373271889400922, "grad_norm": 7.537660121917725, "learning_rate": 7.36196319018405e-07, "loss": 1.9118, "step": 4 }, { "epoch": 0.009216589861751152, "grad_norm": 7.701188087463379, "learning_rate": 9.202453987730061e-07, "loss": 1.9513, "step": 5 }, { "epoch": 0.011059907834101382, "grad_norm": 6.864987373352051, "learning_rate": 1.1042944785276075e-06, "loss": 1.8933, "step": 6 }, { "epoch": 0.012903225806451613, "grad_norm": 6.382201194763184, "learning_rate": 1.2883435582822088e-06, "loss": 1.914, "step": 7 }, { "epoch": 0.014746543778801843, "grad_norm": 5.355184555053711, "learning_rate": 1.47239263803681e-06, "loss": 1.8792, "step": 8 }, { "epoch": 0.016589861751152075, "grad_norm": 4.214444637298584, "learning_rate": 1.656441717791411e-06, "loss": 1.8518, "step": 9 }, { "epoch": 0.018433179723502304, "grad_norm": 3.039278745651245, "learning_rate": 1.8404907975460122e-06, "loss": 1.8212, "step": 10 }, { "epoch": 0.020276497695852536, "grad_norm": 2.639392137527466, "learning_rate": 2.0245398773006137e-06, "loss": 1.8135, "step": 11 }, { "epoch": 0.022119815668202765, "grad_norm": 3.5517666339874268, "learning_rate": 2.208588957055215e-06, "loss": 1.7826, "step": 12 }, { "epoch": 0.023963133640552997, "grad_norm": 4.091638565063477, "learning_rate": 2.392638036809816e-06, "loss": 1.7977, "step": 13 }, { "epoch": 0.025806451612903226, "grad_norm": 4.3311591148376465, "learning_rate": 2.5766871165644175e-06, "loss": 1.8263, "step": 14 }, { "epoch": 0.027649769585253458, "grad_norm": 4.099533557891846, "learning_rate": 2.7607361963190186e-06, "loss": 1.7905, "step": 15 }, { "epoch": 0.029493087557603687, "grad_norm": 3.3490426540374756, "learning_rate": 2.94478527607362e-06, "loss": 1.8255, "step": 16 }, { "epoch": 0.03133640552995392, "grad_norm": 2.6265652179718018, "learning_rate": 3.1288343558282206e-06, "loss": 1.7975, "step": 17 }, { "epoch": 0.03317972350230415, "grad_norm": 2.011012315750122, "learning_rate": 3.312883435582822e-06, "loss": 1.8177, "step": 18 }, { "epoch": 0.035023041474654376, "grad_norm": 1.4565352201461792, "learning_rate": 3.496932515337423e-06, "loss": 1.7638, "step": 19 }, { "epoch": 0.03686635944700461, "grad_norm": 1.5588182210922241, "learning_rate": 3.6809815950920245e-06, "loss": 1.7664, "step": 20 }, { "epoch": 0.03870967741935484, "grad_norm": 1.712164282798767, "learning_rate": 3.865030674846626e-06, "loss": 1.8482, "step": 21 }, { "epoch": 0.04055299539170507, "grad_norm": 1.592856764793396, "learning_rate": 4.049079754601227e-06, "loss": 1.7808, "step": 22 }, { "epoch": 0.0423963133640553, "grad_norm": 1.5242897272109985, "learning_rate": 4.233128834355828e-06, "loss": 1.7608, "step": 23 }, { "epoch": 0.04423963133640553, "grad_norm": 1.4105795621871948, "learning_rate": 4.41717791411043e-06, "loss": 1.751, "step": 24 }, { "epoch": 0.04608294930875576, "grad_norm": 1.3280014991760254, "learning_rate": 4.601226993865031e-06, "loss": 1.6889, "step": 25 }, { "epoch": 0.047926267281105994, "grad_norm": 1.180946707725525, "learning_rate": 4.785276073619632e-06, "loss": 1.7896, "step": 26 }, { "epoch": 0.04976958525345622, "grad_norm": 1.3021069765090942, "learning_rate": 4.969325153374233e-06, "loss": 1.7427, "step": 27 }, { "epoch": 0.05161290322580645, "grad_norm": 1.271753191947937, "learning_rate": 5.153374233128835e-06, "loss": 1.8205, "step": 28 }, { "epoch": 0.053456221198156684, "grad_norm": 1.172203779220581, "learning_rate": 5.337423312883436e-06, "loss": 1.7236, "step": 29 }, { "epoch": 0.055299539170506916, "grad_norm": 1.180019736289978, "learning_rate": 5.521472392638037e-06, "loss": 1.766, "step": 30 }, { "epoch": 0.05714285714285714, "grad_norm": 1.171268105506897, "learning_rate": 5.7055214723926385e-06, "loss": 1.7719, "step": 31 }, { "epoch": 0.05898617511520737, "grad_norm": 1.1073256731033325, "learning_rate": 5.88957055214724e-06, "loss": 1.7332, "step": 32 }, { "epoch": 0.060829493087557605, "grad_norm": 1.1015299558639526, "learning_rate": 6.0736196319018406e-06, "loss": 1.7033, "step": 33 }, { "epoch": 0.06267281105990784, "grad_norm": 1.0643725395202637, "learning_rate": 6.257668711656441e-06, "loss": 1.7381, "step": 34 }, { "epoch": 0.06451612903225806, "grad_norm": 1.108185052871704, "learning_rate": 6.4417177914110434e-06, "loss": 1.6793, "step": 35 }, { "epoch": 0.0663594470046083, "grad_norm": 1.1295511722564697, "learning_rate": 6.625766871165644e-06, "loss": 1.7328, "step": 36 }, { "epoch": 0.06820276497695853, "grad_norm": 1.015434741973877, "learning_rate": 6.8098159509202454e-06, "loss": 1.7344, "step": 37 }, { "epoch": 0.07004608294930875, "grad_norm": 0.9691202640533447, "learning_rate": 6.993865030674846e-06, "loss": 1.6537, "step": 38 }, { "epoch": 0.07188940092165899, "grad_norm": 1.0023833513259888, "learning_rate": 7.177914110429448e-06, "loss": 1.7013, "step": 39 }, { "epoch": 0.07373271889400922, "grad_norm": 1.0794254541397095, "learning_rate": 7.361963190184049e-06, "loss": 1.7252, "step": 40 }, { "epoch": 0.07557603686635944, "grad_norm": 1.00966215133667, "learning_rate": 7.54601226993865e-06, "loss": 1.6653, "step": 41 }, { "epoch": 0.07741935483870968, "grad_norm": 1.0616544485092163, "learning_rate": 7.730061349693252e-06, "loss": 1.6795, "step": 42 }, { "epoch": 0.0792626728110599, "grad_norm": 1.0308103561401367, "learning_rate": 7.914110429447852e-06, "loss": 1.7097, "step": 43 }, { "epoch": 0.08110599078341015, "grad_norm": 0.9981330037117004, "learning_rate": 8.098159509202455e-06, "loss": 1.6653, "step": 44 }, { "epoch": 0.08294930875576037, "grad_norm": 0.9680313467979431, "learning_rate": 8.282208588957055e-06, "loss": 1.6352, "step": 45 }, { "epoch": 0.0847926267281106, "grad_norm": 1.0043315887451172, "learning_rate": 8.466257668711656e-06, "loss": 1.7177, "step": 46 }, { "epoch": 0.08663594470046083, "grad_norm": 1.0794323682785034, "learning_rate": 8.650306748466258e-06, "loss": 1.816, "step": 47 }, { "epoch": 0.08847926267281106, "grad_norm": 1.0179901123046875, "learning_rate": 8.83435582822086e-06, "loss": 1.6787, "step": 48 }, { "epoch": 0.09032258064516129, "grad_norm": 0.9359702467918396, "learning_rate": 9.01840490797546e-06, "loss": 1.68, "step": 49 }, { "epoch": 0.09216589861751152, "grad_norm": 0.9982637166976929, "learning_rate": 9.202453987730062e-06, "loss": 1.659, "step": 50 }, { "epoch": 0.09400921658986175, "grad_norm": 0.9992314577102661, "learning_rate": 9.386503067484664e-06, "loss": 1.6726, "step": 51 }, { "epoch": 0.09585253456221199, "grad_norm": 0.9390056729316711, "learning_rate": 9.570552147239264e-06, "loss": 1.6688, "step": 52 }, { "epoch": 0.09769585253456221, "grad_norm": 0.9960243105888367, "learning_rate": 9.754601226993865e-06, "loss": 1.7382, "step": 53 }, { "epoch": 0.09953917050691244, "grad_norm": 0.9396870732307434, "learning_rate": 9.938650306748466e-06, "loss": 1.6834, "step": 54 }, { "epoch": 0.10138248847926268, "grad_norm": 0.9374381303787231, "learning_rate": 1.0122699386503068e-05, "loss": 1.6393, "step": 55 }, { "epoch": 0.1032258064516129, "grad_norm": 0.9893936514854431, "learning_rate": 1.030674846625767e-05, "loss": 1.6877, "step": 56 }, { "epoch": 0.10506912442396313, "grad_norm": 0.9704030156135559, "learning_rate": 1.0490797546012269e-05, "loss": 1.6502, "step": 57 }, { "epoch": 0.10691244239631337, "grad_norm": 0.9695441126823425, "learning_rate": 1.0674846625766871e-05, "loss": 1.6469, "step": 58 }, { "epoch": 0.10875576036866359, "grad_norm": 0.9624999761581421, "learning_rate": 1.0858895705521474e-05, "loss": 1.7038, "step": 59 }, { "epoch": 0.11059907834101383, "grad_norm": 0.969788134098053, "learning_rate": 1.1042944785276074e-05, "loss": 1.6878, "step": 60 }, { "epoch": 0.11244239631336406, "grad_norm": 0.9609492421150208, "learning_rate": 1.1226993865030675e-05, "loss": 1.7159, "step": 61 }, { "epoch": 0.11428571428571428, "grad_norm": 0.9911996722221375, "learning_rate": 1.1411042944785277e-05, "loss": 1.6655, "step": 62 }, { "epoch": 0.11612903225806452, "grad_norm": 0.959848940372467, "learning_rate": 1.1595092024539878e-05, "loss": 1.6993, "step": 63 }, { "epoch": 0.11797235023041475, "grad_norm": 0.9436675906181335, "learning_rate": 1.177914110429448e-05, "loss": 1.6939, "step": 64 }, { "epoch": 0.11981566820276497, "grad_norm": 0.9513627886772156, "learning_rate": 1.1963190184049079e-05, "loss": 1.7223, "step": 65 }, { "epoch": 0.12165898617511521, "grad_norm": 0.9513629674911499, "learning_rate": 1.2147239263803681e-05, "loss": 1.6323, "step": 66 }, { "epoch": 0.12350230414746544, "grad_norm": 1.0369120836257935, "learning_rate": 1.2331288343558283e-05, "loss": 1.6881, "step": 67 }, { "epoch": 0.12534562211981568, "grad_norm": 0.891363263130188, "learning_rate": 1.2515337423312882e-05, "loss": 1.6377, "step": 68 }, { "epoch": 0.1271889400921659, "grad_norm": 0.9345976114273071, "learning_rate": 1.2699386503067485e-05, "loss": 1.6559, "step": 69 }, { "epoch": 0.12903225806451613, "grad_norm": 0.943684995174408, "learning_rate": 1.2883435582822087e-05, "loss": 1.6979, "step": 70 }, { "epoch": 0.13087557603686636, "grad_norm": 0.9224227666854858, "learning_rate": 1.3067484662576687e-05, "loss": 1.6825, "step": 71 }, { "epoch": 0.1327188940092166, "grad_norm": 0.9455180168151855, "learning_rate": 1.3251533742331288e-05, "loss": 1.6729, "step": 72 }, { "epoch": 0.13456221198156681, "grad_norm": 0.9461382031440735, "learning_rate": 1.343558282208589e-05, "loss": 1.6633, "step": 73 }, { "epoch": 0.13640552995391705, "grad_norm": 1.007354736328125, "learning_rate": 1.3619631901840491e-05, "loss": 1.6883, "step": 74 }, { "epoch": 0.1382488479262673, "grad_norm": 0.960409939289093, "learning_rate": 1.3803680981595093e-05, "loss": 1.6666, "step": 75 }, { "epoch": 0.1400921658986175, "grad_norm": 0.9324995279312134, "learning_rate": 1.3987730061349692e-05, "loss": 1.6486, "step": 76 }, { "epoch": 0.14193548387096774, "grad_norm": 0.960115909576416, "learning_rate": 1.4171779141104294e-05, "loss": 1.6825, "step": 77 }, { "epoch": 0.14377880184331798, "grad_norm": 1.0422765016555786, "learning_rate": 1.4355828220858897e-05, "loss": 1.6711, "step": 78 }, { "epoch": 0.1456221198156682, "grad_norm": 0.979534924030304, "learning_rate": 1.4539877300613497e-05, "loss": 1.7024, "step": 79 }, { "epoch": 0.14746543778801843, "grad_norm": 0.935651421546936, "learning_rate": 1.4723926380368098e-05, "loss": 1.6201, "step": 80 }, { "epoch": 0.14930875576036867, "grad_norm": 0.9288959503173828, "learning_rate": 1.49079754601227e-05, "loss": 1.7174, "step": 81 }, { "epoch": 0.15115207373271888, "grad_norm": 0.9639124870300293, "learning_rate": 1.50920245398773e-05, "loss": 1.6593, "step": 82 }, { "epoch": 0.15299539170506912, "grad_norm": 0.9626865386962891, "learning_rate": 1.52760736196319e-05, "loss": 1.6673, "step": 83 }, { "epoch": 0.15483870967741936, "grad_norm": 0.9012230038642883, "learning_rate": 1.5460122699386504e-05, "loss": 1.6566, "step": 84 }, { "epoch": 0.15668202764976957, "grad_norm": 0.9206567406654358, "learning_rate": 1.5644171779141104e-05, "loss": 1.7307, "step": 85 }, { "epoch": 0.1585253456221198, "grad_norm": 0.9018635749816895, "learning_rate": 1.5828220858895705e-05, "loss": 1.658, "step": 86 }, { "epoch": 0.16036866359447005, "grad_norm": 0.8840540051460266, "learning_rate": 1.601226993865031e-05, "loss": 1.6571, "step": 87 }, { "epoch": 0.1622119815668203, "grad_norm": 0.9978141188621521, "learning_rate": 1.619631901840491e-05, "loss": 1.7104, "step": 88 }, { "epoch": 0.1640552995391705, "grad_norm": 0.9408591985702515, "learning_rate": 1.638036809815951e-05, "loss": 1.6353, "step": 89 }, { "epoch": 0.16589861751152074, "grad_norm": 0.9079807996749878, "learning_rate": 1.656441717791411e-05, "loss": 1.6125, "step": 90 }, { "epoch": 0.16774193548387098, "grad_norm": 0.8854590654373169, "learning_rate": 1.674846625766871e-05, "loss": 1.6634, "step": 91 }, { "epoch": 0.1695852534562212, "grad_norm": 0.8910956978797913, "learning_rate": 1.693251533742331e-05, "loss": 1.6176, "step": 92 }, { "epoch": 0.17142857142857143, "grad_norm": 0.91642165184021, "learning_rate": 1.7116564417177916e-05, "loss": 1.6327, "step": 93 }, { "epoch": 0.17327188940092167, "grad_norm": 0.8759831190109253, "learning_rate": 1.7300613496932516e-05, "loss": 1.6332, "step": 94 }, { "epoch": 0.17511520737327188, "grad_norm": 0.8434805870056152, "learning_rate": 1.7484662576687117e-05, "loss": 1.6224, "step": 95 }, { "epoch": 0.17695852534562212, "grad_norm": 0.888469934463501, "learning_rate": 1.766871165644172e-05, "loss": 1.6915, "step": 96 }, { "epoch": 0.17880184331797236, "grad_norm": 0.9267154335975647, "learning_rate": 1.785276073619632e-05, "loss": 1.7079, "step": 97 }, { "epoch": 0.18064516129032257, "grad_norm": 0.8888590931892395, "learning_rate": 1.803680981595092e-05, "loss": 1.7281, "step": 98 }, { "epoch": 0.1824884792626728, "grad_norm": 0.8610063791275024, "learning_rate": 1.8220858895705523e-05, "loss": 1.6496, "step": 99 }, { "epoch": 0.18433179723502305, "grad_norm": 0.8801419734954834, "learning_rate": 1.8404907975460123e-05, "loss": 1.6686, "step": 100 }, { "epoch": 0.18617511520737326, "grad_norm": 0.8765119910240173, "learning_rate": 1.8588957055214724e-05, "loss": 1.6166, "step": 101 }, { "epoch": 0.1880184331797235, "grad_norm": 0.8312994241714478, "learning_rate": 1.8773006134969328e-05, "loss": 1.6892, "step": 102 }, { "epoch": 0.18986175115207374, "grad_norm": 0.8541645407676697, "learning_rate": 1.8957055214723928e-05, "loss": 1.6141, "step": 103 }, { "epoch": 0.19170506912442398, "grad_norm": 0.8821831941604614, "learning_rate": 1.914110429447853e-05, "loss": 1.6986, "step": 104 }, { "epoch": 0.1935483870967742, "grad_norm": 0.8239928483963013, "learning_rate": 1.9325153374233126e-05, "loss": 1.6453, "step": 105 }, { "epoch": 0.19539170506912443, "grad_norm": 1.0745067596435547, "learning_rate": 1.950920245398773e-05, "loss": 1.6705, "step": 106 }, { "epoch": 0.19723502304147467, "grad_norm": 0.8425748348236084, "learning_rate": 1.969325153374233e-05, "loss": 1.6595, "step": 107 }, { "epoch": 0.19907834101382488, "grad_norm": 0.8728457093238831, "learning_rate": 1.987730061349693e-05, "loss": 1.6565, "step": 108 }, { "epoch": 0.20092165898617512, "grad_norm": 0.8984567523002625, "learning_rate": 2.0061349693251535e-05, "loss": 1.6653, "step": 109 }, { "epoch": 0.20276497695852536, "grad_norm": 0.9448698163032532, "learning_rate": 2.0245398773006136e-05, "loss": 1.6711, "step": 110 }, { "epoch": 0.20460829493087557, "grad_norm": 0.8336430788040161, "learning_rate": 2.0429447852760736e-05, "loss": 1.6438, "step": 111 }, { "epoch": 0.2064516129032258, "grad_norm": 0.822298526763916, "learning_rate": 2.061349693251534e-05, "loss": 1.5347, "step": 112 }, { "epoch": 0.20829493087557605, "grad_norm": 0.8052050471305847, "learning_rate": 2.0797546012269938e-05, "loss": 1.6567, "step": 113 }, { "epoch": 0.21013824884792626, "grad_norm": 0.8422743678092957, "learning_rate": 2.0981595092024538e-05, "loss": 1.6369, "step": 114 }, { "epoch": 0.2119815668202765, "grad_norm": 0.808554470539093, "learning_rate": 2.1165644171779142e-05, "loss": 1.6483, "step": 115 }, { "epoch": 0.21382488479262673, "grad_norm": 0.8404141068458557, "learning_rate": 2.1349693251533743e-05, "loss": 1.6582, "step": 116 }, { "epoch": 0.21566820276497695, "grad_norm": 0.8479593396186829, "learning_rate": 2.1533742331288343e-05, "loss": 1.6392, "step": 117 }, { "epoch": 0.21751152073732719, "grad_norm": 0.782650887966156, "learning_rate": 2.1717791411042947e-05, "loss": 1.591, "step": 118 }, { "epoch": 0.21935483870967742, "grad_norm": 0.8297327160835266, "learning_rate": 2.1901840490797548e-05, "loss": 1.6314, "step": 119 }, { "epoch": 0.22119815668202766, "grad_norm": 0.8564377427101135, "learning_rate": 2.208588957055215e-05, "loss": 1.6073, "step": 120 }, { "epoch": 0.22304147465437787, "grad_norm": 0.8538500666618347, "learning_rate": 2.226993865030675e-05, "loss": 1.7328, "step": 121 }, { "epoch": 0.2248847926267281, "grad_norm": 0.8634692430496216, "learning_rate": 2.245398773006135e-05, "loss": 1.6588, "step": 122 }, { "epoch": 0.22672811059907835, "grad_norm": 0.807186484336853, "learning_rate": 2.263803680981595e-05, "loss": 1.657, "step": 123 }, { "epoch": 0.22857142857142856, "grad_norm": 0.9123692512512207, "learning_rate": 2.2822085889570554e-05, "loss": 1.6532, "step": 124 }, { "epoch": 0.2304147465437788, "grad_norm": 0.8583146929740906, "learning_rate": 2.3006134969325155e-05, "loss": 1.668, "step": 125 }, { "epoch": 0.23225806451612904, "grad_norm": 0.8219059109687805, "learning_rate": 2.3190184049079755e-05, "loss": 1.6746, "step": 126 }, { "epoch": 0.23410138248847925, "grad_norm": 0.9336742162704468, "learning_rate": 2.337423312883436e-05, "loss": 1.6452, "step": 127 }, { "epoch": 0.2359447004608295, "grad_norm": 0.7920992970466614, "learning_rate": 2.355828220858896e-05, "loss": 1.6609, "step": 128 }, { "epoch": 0.23778801843317973, "grad_norm": 0.8894769549369812, "learning_rate": 2.3742331288343557e-05, "loss": 1.6412, "step": 129 }, { "epoch": 0.23963133640552994, "grad_norm": 0.7901114821434021, "learning_rate": 2.3926380368098158e-05, "loss": 1.6449, "step": 130 }, { "epoch": 0.24147465437788018, "grad_norm": 0.9107617735862732, "learning_rate": 2.411042944785276e-05, "loss": 1.677, "step": 131 }, { "epoch": 0.24331797235023042, "grad_norm": 0.7705798745155334, "learning_rate": 2.4294478527607362e-05, "loss": 1.6021, "step": 132 }, { "epoch": 0.24516129032258063, "grad_norm": 0.8251765966415405, "learning_rate": 2.4478527607361963e-05, "loss": 1.622, "step": 133 }, { "epoch": 0.24700460829493087, "grad_norm": 0.8311643600463867, "learning_rate": 2.4662576687116567e-05, "loss": 1.6904, "step": 134 }, { "epoch": 0.2488479262672811, "grad_norm": 0.8353893756866455, "learning_rate": 2.4846625766871167e-05, "loss": 1.6874, "step": 135 }, { "epoch": 0.25069124423963135, "grad_norm": 0.7548201680183411, "learning_rate": 2.5030674846625765e-05, "loss": 1.6085, "step": 136 }, { "epoch": 0.25253456221198156, "grad_norm": 0.8034607768058777, "learning_rate": 2.521472392638037e-05, "loss": 1.7073, "step": 137 }, { "epoch": 0.2543778801843318, "grad_norm": 0.7662417888641357, "learning_rate": 2.539877300613497e-05, "loss": 1.6925, "step": 138 }, { "epoch": 0.25622119815668204, "grad_norm": 0.8063399791717529, "learning_rate": 2.558282208588957e-05, "loss": 1.6682, "step": 139 }, { "epoch": 0.25806451612903225, "grad_norm": 0.7540117502212524, "learning_rate": 2.5766871165644174e-05, "loss": 1.6501, "step": 140 }, { "epoch": 0.25990783410138246, "grad_norm": 0.7647683024406433, "learning_rate": 2.5950920245398774e-05, "loss": 1.6679, "step": 141 }, { "epoch": 0.26175115207373273, "grad_norm": 0.7684164047241211, "learning_rate": 2.6134969325153375e-05, "loss": 1.6454, "step": 142 }, { "epoch": 0.26359447004608294, "grad_norm": 0.7761498689651489, "learning_rate": 2.631901840490798e-05, "loss": 1.601, "step": 143 }, { "epoch": 0.2654377880184332, "grad_norm": 0.7522398233413696, "learning_rate": 2.6503067484662576e-05, "loss": 1.5725, "step": 144 }, { "epoch": 0.2672811059907834, "grad_norm": 0.7951441407203674, "learning_rate": 2.6687116564417177e-05, "loss": 1.7098, "step": 145 }, { "epoch": 0.26912442396313363, "grad_norm": 0.7525542974472046, "learning_rate": 2.687116564417178e-05, "loss": 1.6897, "step": 146 }, { "epoch": 0.2709677419354839, "grad_norm": 0.7537124752998352, "learning_rate": 2.705521472392638e-05, "loss": 1.6175, "step": 147 }, { "epoch": 0.2728110599078341, "grad_norm": 0.7485160231590271, "learning_rate": 2.7239263803680982e-05, "loss": 1.5888, "step": 148 }, { "epoch": 0.2746543778801843, "grad_norm": 0.7668492197990417, "learning_rate": 2.7423312883435586e-05, "loss": 1.6654, "step": 149 }, { "epoch": 0.2764976958525346, "grad_norm": 0.7543622255325317, "learning_rate": 2.7607361963190186e-05, "loss": 1.5829, "step": 150 }, { "epoch": 0.2783410138248848, "grad_norm": 0.7573733329772949, "learning_rate": 2.7791411042944787e-05, "loss": 1.6472, "step": 151 }, { "epoch": 0.280184331797235, "grad_norm": 0.8029415011405945, "learning_rate": 2.7975460122699384e-05, "loss": 1.6164, "step": 152 }, { "epoch": 0.2820276497695853, "grad_norm": 0.726951539516449, "learning_rate": 2.8159509202453988e-05, "loss": 1.6185, "step": 153 }, { "epoch": 0.2838709677419355, "grad_norm": 0.7413868308067322, "learning_rate": 2.834355828220859e-05, "loss": 1.6217, "step": 154 }, { "epoch": 0.2857142857142857, "grad_norm": 0.7553615570068359, "learning_rate": 2.852760736196319e-05, "loss": 1.6231, "step": 155 }, { "epoch": 0.28755760368663597, "grad_norm": 0.7762808799743652, "learning_rate": 2.8711656441717793e-05, "loss": 1.6137, "step": 156 }, { "epoch": 0.2894009216589862, "grad_norm": 0.7329282164573669, "learning_rate": 2.8895705521472394e-05, "loss": 1.6392, "step": 157 }, { "epoch": 0.2912442396313364, "grad_norm": 0.7602019906044006, "learning_rate": 2.9079754601226994e-05, "loss": 1.6757, "step": 158 }, { "epoch": 0.29308755760368665, "grad_norm": 0.7417956590652466, "learning_rate": 2.92638036809816e-05, "loss": 1.6804, "step": 159 }, { "epoch": 0.29493087557603687, "grad_norm": 0.722017765045166, "learning_rate": 2.9447852760736196e-05, "loss": 1.6026, "step": 160 }, { "epoch": 0.2967741935483871, "grad_norm": 0.7312431931495667, "learning_rate": 2.9631901840490796e-05, "loss": 1.6237, "step": 161 }, { "epoch": 0.29861751152073734, "grad_norm": 0.7567271590232849, "learning_rate": 2.98159509202454e-05, "loss": 1.6017, "step": 162 }, { "epoch": 0.30046082949308756, "grad_norm": 0.8126540184020996, "learning_rate": 3e-05, "loss": 1.6855, "step": 163 }, { "epoch": 0.30230414746543777, "grad_norm": 0.7164492607116699, "learning_rate": 2.9999965416241516e-05, "loss": 1.5946, "step": 164 }, { "epoch": 0.30414746543778803, "grad_norm": 0.7384732365608215, "learning_rate": 2.999986166512553e-05, "loss": 1.6838, "step": 165 }, { "epoch": 0.30599078341013825, "grad_norm": 0.7301955223083496, "learning_rate": 2.9999688747130467e-05, "loss": 1.6634, "step": 166 }, { "epoch": 0.30783410138248846, "grad_norm": 0.8040173649787903, "learning_rate": 2.999944666305367e-05, "loss": 1.6432, "step": 167 }, { "epoch": 0.3096774193548387, "grad_norm": 0.7109978199005127, "learning_rate": 2.999913541401143e-05, "loss": 1.5319, "step": 168 }, { "epoch": 0.31152073732718893, "grad_norm": 0.7347285747528076, "learning_rate": 2.9998755001438975e-05, "loss": 1.6154, "step": 169 }, { "epoch": 0.31336405529953915, "grad_norm": 0.7118402123451233, "learning_rate": 2.999830542709045e-05, "loss": 1.5931, "step": 170 }, { "epoch": 0.3152073732718894, "grad_norm": 0.7542247772216797, "learning_rate": 2.9997786693038913e-05, "loss": 1.6105, "step": 171 }, { "epoch": 0.3170506912442396, "grad_norm": 0.7288558483123779, "learning_rate": 2.9997198801676335e-05, "loss": 1.619, "step": 172 }, { "epoch": 0.31889400921658984, "grad_norm": 0.7291957139968872, "learning_rate": 2.9996541755713585e-05, "loss": 1.6477, "step": 173 }, { "epoch": 0.3207373271889401, "grad_norm": 0.7321206331253052, "learning_rate": 2.999581555818041e-05, "loss": 1.6617, "step": 174 }, { "epoch": 0.3225806451612903, "grad_norm": 0.7169788479804993, "learning_rate": 2.9995020212425432e-05, "loss": 1.6317, "step": 175 }, { "epoch": 0.3244239631336406, "grad_norm": 0.7309001088142395, "learning_rate": 2.9994155722116118e-05, "loss": 1.6971, "step": 176 }, { "epoch": 0.3262672811059908, "grad_norm": 0.7466540932655334, "learning_rate": 2.999322209123878e-05, "loss": 1.7202, "step": 177 }, { "epoch": 0.328110599078341, "grad_norm": 0.7057178020477295, "learning_rate": 2.9992219324098545e-05, "loss": 1.5774, "step": 178 }, { "epoch": 0.32995391705069127, "grad_norm": 0.7252513766288757, "learning_rate": 2.9991147425319346e-05, "loss": 1.6047, "step": 179 }, { "epoch": 0.3317972350230415, "grad_norm": 0.7173789739608765, "learning_rate": 2.9990006399843884e-05, "loss": 1.6742, "step": 180 }, { "epoch": 0.3336405529953917, "grad_norm": 0.75115966796875, "learning_rate": 2.998879625293362e-05, "loss": 1.6813, "step": 181 }, { "epoch": 0.33548387096774196, "grad_norm": 0.7187711596488953, "learning_rate": 2.9987516990168743e-05, "loss": 1.5739, "step": 182 }, { "epoch": 0.33732718894009217, "grad_norm": 0.7093032598495483, "learning_rate": 2.9986168617448153e-05, "loss": 1.6014, "step": 183 }, { "epoch": 0.3391705069124424, "grad_norm": 0.7232037782669067, "learning_rate": 2.9984751140989417e-05, "loss": 1.6312, "step": 184 }, { "epoch": 0.34101382488479265, "grad_norm": 0.6996629238128662, "learning_rate": 2.9983264567328756e-05, "loss": 1.6618, "step": 185 }, { "epoch": 0.34285714285714286, "grad_norm": 0.7100509405136108, "learning_rate": 2.9981708903321017e-05, "loss": 1.6206, "step": 186 }, { "epoch": 0.34470046082949307, "grad_norm": 0.6927889585494995, "learning_rate": 2.9980084156139625e-05, "loss": 1.6114, "step": 187 }, { "epoch": 0.34654377880184334, "grad_norm": 0.7183358669281006, "learning_rate": 2.9978390333276565e-05, "loss": 1.6499, "step": 188 }, { "epoch": 0.34838709677419355, "grad_norm": 0.7246880531311035, "learning_rate": 2.9976627442542325e-05, "loss": 1.7149, "step": 189 }, { "epoch": 0.35023041474654376, "grad_norm": 0.6915476322174072, "learning_rate": 2.997479549206591e-05, "loss": 1.6581, "step": 190 }, { "epoch": 0.35207373271889403, "grad_norm": 0.7085281610488892, "learning_rate": 2.9972894490294738e-05, "loss": 1.599, "step": 191 }, { "epoch": 0.35391705069124424, "grad_norm": 0.6912311911582947, "learning_rate": 2.9970924445994645e-05, "loss": 1.6758, "step": 192 }, { "epoch": 0.35576036866359445, "grad_norm": 0.7184807062149048, "learning_rate": 2.9968885368249847e-05, "loss": 1.6618, "step": 193 }, { "epoch": 0.3576036866359447, "grad_norm": 0.667137861251831, "learning_rate": 2.9966777266462863e-05, "loss": 1.6717, "step": 194 }, { "epoch": 0.35944700460829493, "grad_norm": 0.7517136335372925, "learning_rate": 2.9964600150354512e-05, "loss": 1.6093, "step": 195 }, { "epoch": 0.36129032258064514, "grad_norm": 0.6755937933921814, "learning_rate": 2.9962354029963835e-05, "loss": 1.6472, "step": 196 }, { "epoch": 0.3631336405529954, "grad_norm": 0.745136022567749, "learning_rate": 2.9960038915648076e-05, "loss": 1.7045, "step": 197 }, { "epoch": 0.3649769585253456, "grad_norm": 0.7117925882339478, "learning_rate": 2.9957654818082615e-05, "loss": 1.6463, "step": 198 }, { "epoch": 0.36682027649769583, "grad_norm": 0.7289886474609375, "learning_rate": 2.9955201748260923e-05, "loss": 1.6875, "step": 199 }, { "epoch": 0.3686635944700461, "grad_norm": 0.6860397458076477, "learning_rate": 2.9952679717494516e-05, "loss": 1.663, "step": 200 }, { "epoch": 0.3705069124423963, "grad_norm": 0.6836693286895752, "learning_rate": 2.9950088737412898e-05, "loss": 1.623, "step": 201 }, { "epoch": 0.3723502304147465, "grad_norm": 0.7017030119895935, "learning_rate": 2.9947428819963526e-05, "loss": 1.6689, "step": 202 }, { "epoch": 0.3741935483870968, "grad_norm": 0.6897289156913757, "learning_rate": 2.994469997741171e-05, "loss": 1.6149, "step": 203 }, { "epoch": 0.376036866359447, "grad_norm": 0.6873739957809448, "learning_rate": 2.994190222234061e-05, "loss": 1.6619, "step": 204 }, { "epoch": 0.3778801843317972, "grad_norm": 0.7019942402839661, "learning_rate": 2.9939035567651146e-05, "loss": 1.6472, "step": 205 }, { "epoch": 0.3797235023041475, "grad_norm": 0.66780024766922, "learning_rate": 2.9936100026561933e-05, "loss": 1.6537, "step": 206 }, { "epoch": 0.3815668202764977, "grad_norm": 0.692183792591095, "learning_rate": 2.9933095612609253e-05, "loss": 1.6423, "step": 207 }, { "epoch": 0.38341013824884795, "grad_norm": 0.6754509210586548, "learning_rate": 2.993002233964696e-05, "loss": 1.6991, "step": 208 }, { "epoch": 0.38525345622119817, "grad_norm": 0.6930481791496277, "learning_rate": 2.9926880221846435e-05, "loss": 1.6344, "step": 209 }, { "epoch": 0.3870967741935484, "grad_norm": 0.6551826596260071, "learning_rate": 2.9923669273696506e-05, "loss": 1.5657, "step": 210 }, { "epoch": 0.38894009216589864, "grad_norm": 0.6788101196289062, "learning_rate": 2.9920389510003395e-05, "loss": 1.6145, "step": 211 }, { "epoch": 0.39078341013824885, "grad_norm": 0.685989499092102, "learning_rate": 2.9917040945890638e-05, "loss": 1.6968, "step": 212 }, { "epoch": 0.39262672811059907, "grad_norm": 0.6733895540237427, "learning_rate": 2.9913623596799032e-05, "loss": 1.6688, "step": 213 }, { "epoch": 0.39447004608294933, "grad_norm": 0.6755836606025696, "learning_rate": 2.9910137478486545e-05, "loss": 1.6203, "step": 214 }, { "epoch": 0.39631336405529954, "grad_norm": 0.6805539131164551, "learning_rate": 2.990658260702826e-05, "loss": 1.6493, "step": 215 }, { "epoch": 0.39815668202764976, "grad_norm": 0.6727429032325745, "learning_rate": 2.9902958998816274e-05, "loss": 1.6811, "step": 216 }, { "epoch": 0.4, "grad_norm": 0.6738969087600708, "learning_rate": 2.989926667055966e-05, "loss": 1.6544, "step": 217 }, { "epoch": 0.40184331797235023, "grad_norm": 0.6789515018463135, "learning_rate": 2.989550563928436e-05, "loss": 1.6148, "step": 218 }, { "epoch": 0.40368663594470044, "grad_norm": 0.6661167740821838, "learning_rate": 2.9891675922333125e-05, "loss": 1.6403, "step": 219 }, { "epoch": 0.4055299539170507, "grad_norm": 0.6614462733268738, "learning_rate": 2.9887777537365416e-05, "loss": 1.6355, "step": 220 }, { "epoch": 0.4073732718894009, "grad_norm": 0.6668274998664856, "learning_rate": 2.9883810502357346e-05, "loss": 1.6464, "step": 221 }, { "epoch": 0.40921658986175113, "grad_norm": 0.6865269541740417, "learning_rate": 2.9879774835601574e-05, "loss": 1.6259, "step": 222 }, { "epoch": 0.4110599078341014, "grad_norm": 0.6621260643005371, "learning_rate": 2.987567055570724e-05, "loss": 1.669, "step": 223 }, { "epoch": 0.4129032258064516, "grad_norm": 0.6595982313156128, "learning_rate": 2.987149768159987e-05, "loss": 1.5501, "step": 224 }, { "epoch": 0.4147465437788018, "grad_norm": 0.67420893907547, "learning_rate": 2.986725623252128e-05, "loss": 1.6995, "step": 225 }, { "epoch": 0.4165898617511521, "grad_norm": 0.6542205214500427, "learning_rate": 2.9862946228029507e-05, "loss": 1.602, "step": 226 }, { "epoch": 0.4184331797235023, "grad_norm": 0.6605645418167114, "learning_rate": 2.9858567687998702e-05, "loss": 1.588, "step": 227 }, { "epoch": 0.4202764976958525, "grad_norm": 0.6750871539115906, "learning_rate": 2.9854120632619053e-05, "loss": 1.6054, "step": 228 }, { "epoch": 0.4221198156682028, "grad_norm": 0.6753493547439575, "learning_rate": 2.9849605082396678e-05, "loss": 1.691, "step": 229 }, { "epoch": 0.423963133640553, "grad_norm": 0.6629025340080261, "learning_rate": 2.9845021058153532e-05, "loss": 1.6008, "step": 230 }, { "epoch": 0.4258064516129032, "grad_norm": 0.6610414385795593, "learning_rate": 2.984036858102732e-05, "loss": 1.6168, "step": 231 }, { "epoch": 0.42764976958525347, "grad_norm": 0.6786680221557617, "learning_rate": 2.98356476724714e-05, "loss": 1.7059, "step": 232 }, { "epoch": 0.4294930875576037, "grad_norm": 0.6556485891342163, "learning_rate": 2.9830858354254672e-05, "loss": 1.6078, "step": 233 }, { "epoch": 0.4313364055299539, "grad_norm": 0.641831636428833, "learning_rate": 2.9826000648461484e-05, "loss": 1.5981, "step": 234 }, { "epoch": 0.43317972350230416, "grad_norm": 0.6749414205551147, "learning_rate": 2.982107457749153e-05, "loss": 1.6019, "step": 235 }, { "epoch": 0.43502304147465437, "grad_norm": 0.6634622812271118, "learning_rate": 2.9816080164059758e-05, "loss": 1.6097, "step": 236 }, { "epoch": 0.4368663594470046, "grad_norm": 0.6489592790603638, "learning_rate": 2.981101743119624e-05, "loss": 1.5409, "step": 237 }, { "epoch": 0.43870967741935485, "grad_norm": 0.6667509078979492, "learning_rate": 2.9805886402246084e-05, "loss": 1.6628, "step": 238 }, { "epoch": 0.44055299539170506, "grad_norm": 0.6460263729095459, "learning_rate": 2.9800687100869334e-05, "loss": 1.6128, "step": 239 }, { "epoch": 0.4423963133640553, "grad_norm": 0.6403085589408875, "learning_rate": 2.9795419551040836e-05, "loss": 1.6476, "step": 240 }, { "epoch": 0.44423963133640554, "grad_norm": 0.6641395092010498, "learning_rate": 2.9790083777050148e-05, "loss": 1.6254, "step": 241 }, { "epoch": 0.44608294930875575, "grad_norm": 0.6574083566665649, "learning_rate": 2.9784679803501416e-05, "loss": 1.7013, "step": 242 }, { "epoch": 0.447926267281106, "grad_norm": 0.6553590893745422, "learning_rate": 2.977920765531327e-05, "loss": 1.6274, "step": 243 }, { "epoch": 0.4497695852534562, "grad_norm": 0.6555891633033752, "learning_rate": 2.9773667357718706e-05, "loss": 1.6221, "step": 244 }, { "epoch": 0.45161290322580644, "grad_norm": 0.6527219414710999, "learning_rate": 2.9768058936264967e-05, "loss": 1.6531, "step": 245 }, { "epoch": 0.4534562211981567, "grad_norm": 0.6400430202484131, "learning_rate": 2.976238241681342e-05, "loss": 1.6115, "step": 246 }, { "epoch": 0.4552995391705069, "grad_norm": 0.6386510133743286, "learning_rate": 2.9756637825539453e-05, "loss": 1.563, "step": 247 }, { "epoch": 0.45714285714285713, "grad_norm": 0.6651750206947327, "learning_rate": 2.9750825188932334e-05, "loss": 1.5745, "step": 248 }, { "epoch": 0.4589861751152074, "grad_norm": 0.6532901525497437, "learning_rate": 2.9744944533795112e-05, "loss": 1.6333, "step": 249 }, { "epoch": 0.4608294930875576, "grad_norm": 0.6358110308647156, "learning_rate": 2.973899588724448e-05, "loss": 1.5915, "step": 250 }, { "epoch": 0.4626728110599078, "grad_norm": 0.623234212398529, "learning_rate": 2.973297927671063e-05, "loss": 1.6112, "step": 251 }, { "epoch": 0.4645161290322581, "grad_norm": 0.6543712019920349, "learning_rate": 2.9726894729937177e-05, "loss": 1.6007, "step": 252 }, { "epoch": 0.4663594470046083, "grad_norm": 0.6477133631706238, "learning_rate": 2.972074227498098e-05, "loss": 1.6409, "step": 253 }, { "epoch": 0.4682027649769585, "grad_norm": 0.6281903982162476, "learning_rate": 2.971452194021204e-05, "loss": 1.6164, "step": 254 }, { "epoch": 0.4700460829493088, "grad_norm": 0.6434081792831421, "learning_rate": 2.9708233754313365e-05, "loss": 1.6447, "step": 255 }, { "epoch": 0.471889400921659, "grad_norm": 0.6493398547172546, "learning_rate": 2.9701877746280843e-05, "loss": 1.6623, "step": 256 }, { "epoch": 0.4737327188940092, "grad_norm": 0.6439070105552673, "learning_rate": 2.9695453945423087e-05, "loss": 1.6663, "step": 257 }, { "epoch": 0.47557603686635946, "grad_norm": 0.6540718078613281, "learning_rate": 2.9688962381361317e-05, "loss": 1.6317, "step": 258 }, { "epoch": 0.4774193548387097, "grad_norm": 0.6279379725456238, "learning_rate": 2.968240308402923e-05, "loss": 1.6014, "step": 259 }, { "epoch": 0.4792626728110599, "grad_norm": 0.6304627656936646, "learning_rate": 2.967577608367285e-05, "loss": 1.5871, "step": 260 }, { "epoch": 0.48110599078341015, "grad_norm": 0.6160934567451477, "learning_rate": 2.9669081410850378e-05, "loss": 1.5645, "step": 261 }, { "epoch": 0.48294930875576036, "grad_norm": 0.6426898837089539, "learning_rate": 2.966231909643208e-05, "loss": 1.6148, "step": 262 }, { "epoch": 0.4847926267281106, "grad_norm": 0.6386516094207764, "learning_rate": 2.9655489171600118e-05, "loss": 1.5938, "step": 263 }, { "epoch": 0.48663594470046084, "grad_norm": 0.6404203176498413, "learning_rate": 2.9648591667848428e-05, "loss": 1.6709, "step": 264 }, { "epoch": 0.48847926267281105, "grad_norm": 0.6466663479804993, "learning_rate": 2.9641626616982555e-05, "loss": 1.647, "step": 265 }, { "epoch": 0.49032258064516127, "grad_norm": 0.6401766538619995, "learning_rate": 2.9634594051119515e-05, "loss": 1.6588, "step": 266 }, { "epoch": 0.49216589861751153, "grad_norm": 0.6396846771240234, "learning_rate": 2.9627494002687653e-05, "loss": 1.6756, "step": 267 }, { "epoch": 0.49400921658986174, "grad_norm": 0.6392101645469666, "learning_rate": 2.9620326504426476e-05, "loss": 1.6193, "step": 268 }, { "epoch": 0.49585253456221196, "grad_norm": 0.6435759663581848, "learning_rate": 2.9613091589386526e-05, "loss": 1.615, "step": 269 }, { "epoch": 0.4976958525345622, "grad_norm": 0.6373534798622131, "learning_rate": 2.9605789290929214e-05, "loss": 1.6261, "step": 270 }, { "epoch": 0.49953917050691243, "grad_norm": 0.6510592699050903, "learning_rate": 2.9598419642726655e-05, "loss": 1.6202, "step": 271 }, { "epoch": 0.5013824884792627, "grad_norm": 0.6328617334365845, "learning_rate": 2.9590982678761544e-05, "loss": 1.5735, "step": 272 }, { "epoch": 0.5032258064516129, "grad_norm": 0.6566398739814758, "learning_rate": 2.958347843332696e-05, "loss": 1.6297, "step": 273 }, { "epoch": 0.5050691244239631, "grad_norm": 0.6189443469047546, "learning_rate": 2.957590694102624e-05, "loss": 1.5958, "step": 274 }, { "epoch": 0.5069124423963134, "grad_norm": 0.6545754075050354, "learning_rate": 2.9568268236772816e-05, "loss": 1.6404, "step": 275 }, { "epoch": 0.5087557603686635, "grad_norm": 0.6186112761497498, "learning_rate": 2.956056235579002e-05, "loss": 1.6051, "step": 276 }, { "epoch": 0.5105990783410138, "grad_norm": 0.6496976613998413, "learning_rate": 2.955278933361097e-05, "loss": 1.6244, "step": 277 }, { "epoch": 0.5124423963133641, "grad_norm": 0.6337511539459229, "learning_rate": 2.9544949206078372e-05, "loss": 1.6199, "step": 278 }, { "epoch": 0.5142857142857142, "grad_norm": 0.6249182224273682, "learning_rate": 2.9537042009344376e-05, "loss": 1.5762, "step": 279 }, { "epoch": 0.5161290322580645, "grad_norm": 0.6257767677307129, "learning_rate": 2.9529067779870385e-05, "loss": 1.5901, "step": 280 }, { "epoch": 0.5179723502304148, "grad_norm": 0.6280904412269592, "learning_rate": 2.952102655442692e-05, "loss": 1.5902, "step": 281 }, { "epoch": 0.5198156682027649, "grad_norm": 0.6255326271057129, "learning_rate": 2.9512918370093407e-05, "loss": 1.6444, "step": 282 }, { "epoch": 0.5216589861751152, "grad_norm": 0.6127741932868958, "learning_rate": 2.950474326425805e-05, "loss": 1.5508, "step": 283 }, { "epoch": 0.5235023041474655, "grad_norm": 0.6406272053718567, "learning_rate": 2.949650127461764e-05, "loss": 1.6305, "step": 284 }, { "epoch": 0.5253456221198156, "grad_norm": 0.6196235418319702, "learning_rate": 2.948819243917737e-05, "loss": 1.6698, "step": 285 }, { "epoch": 0.5271889400921659, "grad_norm": 0.6351480484008789, "learning_rate": 2.947981679625067e-05, "loss": 1.6892, "step": 286 }, { "epoch": 0.5290322580645161, "grad_norm": 0.6279726028442383, "learning_rate": 2.947137438445904e-05, "loss": 1.6301, "step": 287 }, { "epoch": 0.5308755760368664, "grad_norm": 0.632819414138794, "learning_rate": 2.9462865242731856e-05, "loss": 1.6269, "step": 288 }, { "epoch": 0.5327188940092166, "grad_norm": 0.6105265617370605, "learning_rate": 2.9454289410306202e-05, "loss": 1.5656, "step": 289 }, { "epoch": 0.5345622119815668, "grad_norm": 0.6371070146560669, "learning_rate": 2.944564692672667e-05, "loss": 1.669, "step": 290 }, { "epoch": 0.5364055299539171, "grad_norm": 0.6425980925559998, "learning_rate": 2.9436937831845217e-05, "loss": 1.6442, "step": 291 }, { "epoch": 0.5382488479262673, "grad_norm": 0.6330287456512451, "learning_rate": 2.942816216582093e-05, "loss": 1.6027, "step": 292 }, { "epoch": 0.5400921658986175, "grad_norm": 0.6005653142929077, "learning_rate": 2.9419319969119875e-05, "loss": 1.5609, "step": 293 }, { "epoch": 0.5419354838709678, "grad_norm": 0.6385816335678101, "learning_rate": 2.9410411282514913e-05, "loss": 1.6235, "step": 294 }, { "epoch": 0.543778801843318, "grad_norm": 0.6397689580917358, "learning_rate": 2.940143614708549e-05, "loss": 1.6219, "step": 295 }, { "epoch": 0.5456221198156682, "grad_norm": 0.6536114811897278, "learning_rate": 2.939239460421746e-05, "loss": 1.6438, "step": 296 }, { "epoch": 0.5474654377880185, "grad_norm": 0.6379759311676025, "learning_rate": 2.93832866956029e-05, "loss": 1.5976, "step": 297 }, { "epoch": 0.5493087557603686, "grad_norm": 0.6420155763626099, "learning_rate": 2.9374112463239896e-05, "loss": 1.6805, "step": 298 }, { "epoch": 0.5511520737327189, "grad_norm": 0.6541593074798584, "learning_rate": 2.9364871949432378e-05, "loss": 1.6579, "step": 299 }, { "epoch": 0.5529953917050692, "grad_norm": 0.6222660541534424, "learning_rate": 2.9355565196789906e-05, "loss": 1.6677, "step": 300 }, { "epoch": 0.5548387096774193, "grad_norm": 0.6186485886573792, "learning_rate": 2.9346192248227476e-05, "loss": 1.5948, "step": 301 }, { "epoch": 0.5566820276497696, "grad_norm": 0.6444990634918213, "learning_rate": 2.9336753146965327e-05, "loss": 1.6343, "step": 302 }, { "epoch": 0.5585253456221199, "grad_norm": 0.6367663741111755, "learning_rate": 2.9327247936528742e-05, "loss": 1.5646, "step": 303 }, { "epoch": 0.56036866359447, "grad_norm": 0.6469078660011292, "learning_rate": 2.9317676660747837e-05, "loss": 1.6245, "step": 304 }, { "epoch": 0.5622119815668203, "grad_norm": 0.6145840287208557, "learning_rate": 2.9308039363757372e-05, "loss": 1.6069, "step": 305 }, { "epoch": 0.5640552995391706, "grad_norm": 0.641346275806427, "learning_rate": 2.9298336089996538e-05, "loss": 1.6869, "step": 306 }, { "epoch": 0.5658986175115207, "grad_norm": 0.6414051651954651, "learning_rate": 2.9288566884208766e-05, "loss": 1.6019, "step": 307 }, { "epoch": 0.567741935483871, "grad_norm": 0.6625493764877319, "learning_rate": 2.9278731791441497e-05, "loss": 1.6001, "step": 308 }, { "epoch": 0.5695852534562212, "grad_norm": 0.6369771361351013, "learning_rate": 2.9268830857045997e-05, "loss": 1.5776, "step": 309 }, { "epoch": 0.5714285714285714, "grad_norm": 0.6421422958374023, "learning_rate": 2.9258864126677132e-05, "loss": 1.6161, "step": 310 }, { "epoch": 0.5732718894009217, "grad_norm": 0.6470807790756226, "learning_rate": 2.9248831646293174e-05, "loss": 1.6204, "step": 311 }, { "epoch": 0.5751152073732719, "grad_norm": 0.6122328639030457, "learning_rate": 2.9238733462155564e-05, "loss": 1.6082, "step": 312 }, { "epoch": 0.5769585253456221, "grad_norm": 0.6445775628089905, "learning_rate": 2.9228569620828735e-05, "loss": 1.597, "step": 313 }, { "epoch": 0.5788018433179724, "grad_norm": 0.6022579669952393, "learning_rate": 2.921834016917986e-05, "loss": 1.5485, "step": 314 }, { "epoch": 0.5806451612903226, "grad_norm": 0.6342116594314575, "learning_rate": 2.920804515437865e-05, "loss": 1.5924, "step": 315 }, { "epoch": 0.5824884792626728, "grad_norm": 0.6361576318740845, "learning_rate": 2.9197684623897157e-05, "loss": 1.6063, "step": 316 }, { "epoch": 0.584331797235023, "grad_norm": 0.6282086968421936, "learning_rate": 2.9187258625509518e-05, "loss": 1.6495, "step": 317 }, { "epoch": 0.5861751152073733, "grad_norm": 0.6247148513793945, "learning_rate": 2.917676720729177e-05, "loss": 1.6128, "step": 318 }, { "epoch": 0.5880184331797235, "grad_norm": 0.6350305080413818, "learning_rate": 2.916621041762159e-05, "loss": 1.6036, "step": 319 }, { "epoch": 0.5898617511520737, "grad_norm": 0.6136136651039124, "learning_rate": 2.9155588305178113e-05, "loss": 1.542, "step": 320 }, { "epoch": 0.591705069124424, "grad_norm": 0.6367561221122742, "learning_rate": 2.9144900918941687e-05, "loss": 1.589, "step": 321 }, { "epoch": 0.5935483870967742, "grad_norm": 0.6414176821708679, "learning_rate": 2.9134148308193637e-05, "loss": 1.6858, "step": 322 }, { "epoch": 0.5953917050691244, "grad_norm": 0.6203527450561523, "learning_rate": 2.9123330522516053e-05, "loss": 1.6199, "step": 323 }, { "epoch": 0.5972350230414747, "grad_norm": 0.613697350025177, "learning_rate": 2.9112447611791563e-05, "loss": 1.6074, "step": 324 }, { "epoch": 0.5990783410138248, "grad_norm": 0.6246370077133179, "learning_rate": 2.9101499626203102e-05, "loss": 1.5769, "step": 325 }, { "epoch": 0.6009216589861751, "grad_norm": 0.6141286492347717, "learning_rate": 2.9090486616233654e-05, "loss": 1.5905, "step": 326 }, { "epoch": 0.6027649769585254, "grad_norm": 0.6158791780471802, "learning_rate": 2.907940863266607e-05, "loss": 1.614, "step": 327 }, { "epoch": 0.6046082949308755, "grad_norm": 0.6051566004753113, "learning_rate": 2.906826572658278e-05, "loss": 1.5523, "step": 328 }, { "epoch": 0.6064516129032258, "grad_norm": 0.6224699020385742, "learning_rate": 2.9057057949365602e-05, "loss": 1.5882, "step": 329 }, { "epoch": 0.6082949308755761, "grad_norm": 0.6377244591712952, "learning_rate": 2.904578535269547e-05, "loss": 1.6132, "step": 330 }, { "epoch": 0.6101382488479262, "grad_norm": 0.6425986886024475, "learning_rate": 2.9034447988552227e-05, "loss": 1.6566, "step": 331 }, { "epoch": 0.6119815668202765, "grad_norm": 0.6360111236572266, "learning_rate": 2.902304590921435e-05, "loss": 1.6499, "step": 332 }, { "epoch": 0.6138248847926268, "grad_norm": 0.6156293749809265, "learning_rate": 2.9011579167258756e-05, "loss": 1.6282, "step": 333 }, { "epoch": 0.6156682027649769, "grad_norm": 0.6056556701660156, "learning_rate": 2.90000478155605e-05, "loss": 1.5418, "step": 334 }, { "epoch": 0.6175115207373272, "grad_norm": 0.6212233304977417, "learning_rate": 2.8988451907292594e-05, "loss": 1.6308, "step": 335 }, { "epoch": 0.6193548387096774, "grad_norm": 0.61768639087677, "learning_rate": 2.8976791495925704e-05, "loss": 1.6851, "step": 336 }, { "epoch": 0.6211981566820276, "grad_norm": 0.6400591135025024, "learning_rate": 2.896506663522795e-05, "loss": 1.6341, "step": 337 }, { "epoch": 0.6230414746543779, "grad_norm": 0.6200737953186035, "learning_rate": 2.8953277379264633e-05, "loss": 1.6279, "step": 338 }, { "epoch": 0.6248847926267281, "grad_norm": 0.6162114143371582, "learning_rate": 2.8941423782397987e-05, "loss": 1.6179, "step": 339 }, { "epoch": 0.6267281105990783, "grad_norm": 0.6042874455451965, "learning_rate": 2.892950589928694e-05, "loss": 1.6098, "step": 340 }, { "epoch": 0.6285714285714286, "grad_norm": 0.6231991052627563, "learning_rate": 2.8917523784886846e-05, "loss": 1.6555, "step": 341 }, { "epoch": 0.6304147465437788, "grad_norm": 0.612109899520874, "learning_rate": 2.890547749444925e-05, "loss": 1.6435, "step": 342 }, { "epoch": 0.632258064516129, "grad_norm": 0.6127018928527832, "learning_rate": 2.8893367083521616e-05, "loss": 1.5944, "step": 343 }, { "epoch": 0.6341013824884792, "grad_norm": 0.6134934425354004, "learning_rate": 2.888119260794708e-05, "loss": 1.5785, "step": 344 }, { "epoch": 0.6359447004608295, "grad_norm": 0.6125690340995789, "learning_rate": 2.8868954123864194e-05, "loss": 1.6162, "step": 345 }, { "epoch": 0.6377880184331797, "grad_norm": 0.5980225801467896, "learning_rate": 2.885665168770666e-05, "loss": 1.6162, "step": 346 }, { "epoch": 0.6396313364055299, "grad_norm": 0.606338620185852, "learning_rate": 2.8844285356203074e-05, "loss": 1.6034, "step": 347 }, { "epoch": 0.6414746543778802, "grad_norm": 0.6145090460777283, "learning_rate": 2.8831855186376672e-05, "loss": 1.6568, "step": 348 }, { "epoch": 0.6433179723502304, "grad_norm": 0.6216673851013184, "learning_rate": 2.8819361235545047e-05, "loss": 1.6628, "step": 349 }, { "epoch": 0.6451612903225806, "grad_norm": 0.6149229407310486, "learning_rate": 2.8806803561319903e-05, "loss": 1.6205, "step": 350 }, { "epoch": 0.6470046082949309, "grad_norm": 0.590297520160675, "learning_rate": 2.8794182221606784e-05, "loss": 1.5108, "step": 351 }, { "epoch": 0.6488479262672812, "grad_norm": 0.5937800407409668, "learning_rate": 2.878149727460481e-05, "loss": 1.543, "step": 352 }, { "epoch": 0.6506912442396313, "grad_norm": 0.6195124387741089, "learning_rate": 2.876874877880639e-05, "loss": 1.5507, "step": 353 }, { "epoch": 0.6525345622119816, "grad_norm": 0.6115301847457886, "learning_rate": 2.8755936792996987e-05, "loss": 1.6562, "step": 354 }, { "epoch": 0.6543778801843319, "grad_norm": 0.6090995669364929, "learning_rate": 2.8743061376254813e-05, "loss": 1.6053, "step": 355 }, { "epoch": 0.656221198156682, "grad_norm": 0.6057084798812866, "learning_rate": 2.873012258795057e-05, "loss": 1.6183, "step": 356 }, { "epoch": 0.6580645161290323, "grad_norm": 0.6177878379821777, "learning_rate": 2.8717120487747193e-05, "loss": 1.598, "step": 357 }, { "epoch": 0.6599078341013825, "grad_norm": 0.5839776396751404, "learning_rate": 2.870405513559954e-05, "loss": 1.5645, "step": 358 }, { "epoch": 0.6617511520737327, "grad_norm": 0.6190894246101379, "learning_rate": 2.8690926591754142e-05, "loss": 1.5611, "step": 359 }, { "epoch": 0.663594470046083, "grad_norm": 0.5953087210655212, "learning_rate": 2.8677734916748927e-05, "loss": 1.6132, "step": 360 }, { "epoch": 0.6654377880184332, "grad_norm": 0.6048798561096191, "learning_rate": 2.866448017141291e-05, "loss": 1.5858, "step": 361 }, { "epoch": 0.6672811059907834, "grad_norm": 0.6047928929328918, "learning_rate": 2.865116241686595e-05, "loss": 1.6597, "step": 362 }, { "epoch": 0.6691244239631337, "grad_norm": 0.6029597520828247, "learning_rate": 2.863778171451845e-05, "loss": 1.596, "step": 363 }, { "epoch": 0.6709677419354839, "grad_norm": 0.6115718483924866, "learning_rate": 2.8624338126071073e-05, "loss": 1.5827, "step": 364 }, { "epoch": 0.6728110599078341, "grad_norm": 0.6102750897407532, "learning_rate": 2.861083171351446e-05, "loss": 1.5579, "step": 365 }, { "epoch": 0.6746543778801843, "grad_norm": 0.6236565709114075, "learning_rate": 2.8597262539128947e-05, "loss": 1.6574, "step": 366 }, { "epoch": 0.6764976958525346, "grad_norm": 0.607481837272644, "learning_rate": 2.858363066548427e-05, "loss": 1.6287, "step": 367 }, { "epoch": 0.6783410138248848, "grad_norm": 0.5920954346656799, "learning_rate": 2.856993615543929e-05, "loss": 1.5874, "step": 368 }, { "epoch": 0.680184331797235, "grad_norm": 0.5899093747138977, "learning_rate": 2.8556179072141693e-05, "loss": 1.5885, "step": 369 }, { "epoch": 0.6820276497695853, "grad_norm": 0.6161470413208008, "learning_rate": 2.8542359479027693e-05, "loss": 1.5329, "step": 370 }, { "epoch": 0.6838709677419355, "grad_norm": 0.5827044248580933, "learning_rate": 2.8528477439821753e-05, "loss": 1.6023, "step": 371 }, { "epoch": 0.6857142857142857, "grad_norm": 0.6086022257804871, "learning_rate": 2.8514533018536286e-05, "loss": 1.6226, "step": 372 }, { "epoch": 0.687557603686636, "grad_norm": 0.6146913766860962, "learning_rate": 2.8500526279471362e-05, "loss": 1.5709, "step": 373 }, { "epoch": 0.6894009216589861, "grad_norm": 0.6071475148200989, "learning_rate": 2.8486457287214403e-05, "loss": 1.6433, "step": 374 }, { "epoch": 0.6912442396313364, "grad_norm": 0.6268134117126465, "learning_rate": 2.8472326106639896e-05, "loss": 1.6552, "step": 375 }, { "epoch": 0.6930875576036867, "grad_norm": 0.5837965607643127, "learning_rate": 2.8458132802909075e-05, "loss": 1.5622, "step": 376 }, { "epoch": 0.6949308755760368, "grad_norm": 0.6217747330665588, "learning_rate": 2.8443877441469653e-05, "loss": 1.6059, "step": 377 }, { "epoch": 0.6967741935483871, "grad_norm": 0.5973568558692932, "learning_rate": 2.8429560088055502e-05, "loss": 1.5998, "step": 378 }, { "epoch": 0.6986175115207374, "grad_norm": 0.6079529523849487, "learning_rate": 2.8415180808686326e-05, "loss": 1.577, "step": 379 }, { "epoch": 0.7004608294930875, "grad_norm": 0.5889536142349243, "learning_rate": 2.84007396696674e-05, "loss": 1.6045, "step": 380 }, { "epoch": 0.7023041474654378, "grad_norm": 0.594224750995636, "learning_rate": 2.8386236737589244e-05, "loss": 1.5916, "step": 381 }, { "epoch": 0.7041474654377881, "grad_norm": 0.5908277034759521, "learning_rate": 2.8371672079327304e-05, "loss": 1.5616, "step": 382 }, { "epoch": 0.7059907834101382, "grad_norm": 0.605206310749054, "learning_rate": 2.835704576204167e-05, "loss": 1.6351, "step": 383 }, { "epoch": 0.7078341013824885, "grad_norm": 0.5998572707176208, "learning_rate": 2.8342357853176742e-05, "loss": 1.6206, "step": 384 }, { "epoch": 0.7096774193548387, "grad_norm": 0.6308000683784485, "learning_rate": 2.8327608420460933e-05, "loss": 1.6266, "step": 385 }, { "epoch": 0.7115207373271889, "grad_norm": 0.6080827713012695, "learning_rate": 2.8312797531906346e-05, "loss": 1.6199, "step": 386 }, { "epoch": 0.7133640552995392, "grad_norm": 0.5943049192428589, "learning_rate": 2.8297925255808484e-05, "loss": 1.5509, "step": 387 }, { "epoch": 0.7152073732718894, "grad_norm": 0.5954582095146179, "learning_rate": 2.82829916607459e-05, "loss": 1.5289, "step": 388 }, { "epoch": 0.7170506912442396, "grad_norm": 0.5943238139152527, "learning_rate": 2.826799681557991e-05, "loss": 1.5963, "step": 389 }, { "epoch": 0.7188940092165899, "grad_norm": 0.6012473702430725, "learning_rate": 2.8252940789454268e-05, "loss": 1.6007, "step": 390 }, { "epoch": 0.7207373271889401, "grad_norm": 0.6040669679641724, "learning_rate": 2.823782365179482e-05, "loss": 1.6802, "step": 391 }, { "epoch": 0.7225806451612903, "grad_norm": 0.6036317348480225, "learning_rate": 2.822264547230924e-05, "loss": 1.6206, "step": 392 }, { "epoch": 0.7244239631336405, "grad_norm": 0.5940215587615967, "learning_rate": 2.820740632098665e-05, "loss": 1.5686, "step": 393 }, { "epoch": 0.7262672811059908, "grad_norm": 0.5901139974594116, "learning_rate": 2.8192106268097336e-05, "loss": 1.5891, "step": 394 }, { "epoch": 0.728110599078341, "grad_norm": 0.5984019041061401, "learning_rate": 2.8176745384192417e-05, "loss": 1.5882, "step": 395 }, { "epoch": 0.7299539170506912, "grad_norm": 0.580982506275177, "learning_rate": 2.8161323740103495e-05, "loss": 1.5787, "step": 396 }, { "epoch": 0.7317972350230415, "grad_norm": 0.6106325387954712, "learning_rate": 2.814584140694237e-05, "loss": 1.6653, "step": 397 }, { "epoch": 0.7336405529953917, "grad_norm": 0.6030483245849609, "learning_rate": 2.8130298456100667e-05, "loss": 1.6355, "step": 398 }, { "epoch": 0.7354838709677419, "grad_norm": 0.6000080108642578, "learning_rate": 2.811469495924955e-05, "loss": 1.6143, "step": 399 }, { "epoch": 0.7373271889400922, "grad_norm": 0.5923748016357422, "learning_rate": 2.8099030988339353e-05, "loss": 1.5152, "step": 400 }, { "epoch": 0.7391705069124423, "grad_norm": 0.5920352935791016, "learning_rate": 2.8083306615599283e-05, "loss": 1.6055, "step": 401 }, { "epoch": 0.7410138248847926, "grad_norm": 0.6110079884529114, "learning_rate": 2.8067521913537047e-05, "loss": 1.6085, "step": 402 }, { "epoch": 0.7428571428571429, "grad_norm": 0.6105523109436035, "learning_rate": 2.8051676954938574e-05, "loss": 1.6087, "step": 403 }, { "epoch": 0.744700460829493, "grad_norm": 0.6023123860359192, "learning_rate": 2.8035771812867613e-05, "loss": 1.6005, "step": 404 }, { "epoch": 0.7465437788018433, "grad_norm": 0.631851077079773, "learning_rate": 2.801980656066545e-05, "loss": 1.6622, "step": 405 }, { "epoch": 0.7483870967741936, "grad_norm": 0.6138731837272644, "learning_rate": 2.8003781271950535e-05, "loss": 1.6583, "step": 406 }, { "epoch": 0.7502304147465437, "grad_norm": 0.5950550436973572, "learning_rate": 2.7987696020618163e-05, "loss": 1.5431, "step": 407 }, { "epoch": 0.752073732718894, "grad_norm": 0.5777524709701538, "learning_rate": 2.7971550880840138e-05, "loss": 1.5939, "step": 408 }, { "epoch": 0.7539170506912443, "grad_norm": 0.6048431992530823, "learning_rate": 2.79553459270644e-05, "loss": 1.5933, "step": 409 }, { "epoch": 0.7557603686635944, "grad_norm": 0.59700608253479, "learning_rate": 2.7939081234014708e-05, "loss": 1.6151, "step": 410 }, { "epoch": 0.7576036866359447, "grad_norm": 0.5976991057395935, "learning_rate": 2.7922756876690298e-05, "loss": 1.6591, "step": 411 }, { "epoch": 0.759447004608295, "grad_norm": 0.6004019379615784, "learning_rate": 2.790637293036552e-05, "loss": 1.6231, "step": 412 }, { "epoch": 0.7612903225806451, "grad_norm": 0.614355206489563, "learning_rate": 2.7889929470589494e-05, "loss": 1.6157, "step": 413 }, { "epoch": 0.7631336405529954, "grad_norm": 0.5971264243125916, "learning_rate": 2.7873426573185777e-05, "loss": 1.6251, "step": 414 }, { "epoch": 0.7649769585253456, "grad_norm": 0.5966277122497559, "learning_rate": 2.7856864314251994e-05, "loss": 1.6138, "step": 415 }, { "epoch": 0.7668202764976959, "grad_norm": 0.5930648446083069, "learning_rate": 2.78402427701595e-05, "loss": 1.6057, "step": 416 }, { "epoch": 0.7686635944700461, "grad_norm": 0.6035694479942322, "learning_rate": 2.782356201755303e-05, "loss": 1.6312, "step": 417 }, { "epoch": 0.7705069124423963, "grad_norm": 0.5882773995399475, "learning_rate": 2.780682213335033e-05, "loss": 1.5961, "step": 418 }, { "epoch": 0.7723502304147466, "grad_norm": 0.6116384267807007, "learning_rate": 2.7790023194741812e-05, "loss": 1.6408, "step": 419 }, { "epoch": 0.7741935483870968, "grad_norm": 0.5867911577224731, "learning_rate": 2.7773165279190206e-05, "loss": 1.592, "step": 420 }, { "epoch": 0.776036866359447, "grad_norm": 0.5933607220649719, "learning_rate": 2.7756248464430186e-05, "loss": 1.6015, "step": 421 }, { "epoch": 0.7778801843317973, "grad_norm": 0.5931692719459534, "learning_rate": 2.7739272828468022e-05, "loss": 1.5793, "step": 422 }, { "epoch": 0.7797235023041474, "grad_norm": 0.5770699977874756, "learning_rate": 2.7722238449581227e-05, "loss": 1.5885, "step": 423 }, { "epoch": 0.7815668202764977, "grad_norm": 0.5926551222801208, "learning_rate": 2.7705145406318167e-05, "loss": 1.6792, "step": 424 }, { "epoch": 0.783410138248848, "grad_norm": 0.586753249168396, "learning_rate": 2.7687993777497747e-05, "loss": 1.5396, "step": 425 }, { "epoch": 0.7852534562211981, "grad_norm": 0.5846116542816162, "learning_rate": 2.7670783642208996e-05, "loss": 1.5881, "step": 426 }, { "epoch": 0.7870967741935484, "grad_norm": 0.596070408821106, "learning_rate": 2.7653515079810744e-05, "loss": 1.6602, "step": 427 }, { "epoch": 0.7889400921658987, "grad_norm": 0.5840433835983276, "learning_rate": 2.7636188169931217e-05, "loss": 1.6387, "step": 428 }, { "epoch": 0.7907834101382488, "grad_norm": 0.5922804474830627, "learning_rate": 2.7618802992467718e-05, "loss": 1.6529, "step": 429 }, { "epoch": 0.7926267281105991, "grad_norm": 0.5911166071891785, "learning_rate": 2.760135962758621e-05, "loss": 1.6465, "step": 430 }, { "epoch": 0.7944700460829494, "grad_norm": 0.6027465462684631, "learning_rate": 2.7583858155720977e-05, "loss": 1.599, "step": 431 }, { "epoch": 0.7963133640552995, "grad_norm": 0.5964241027832031, "learning_rate": 2.756629865757424e-05, "loss": 1.6362, "step": 432 }, { "epoch": 0.7981566820276498, "grad_norm": 0.5907885432243347, "learning_rate": 2.7548681214115798e-05, "loss": 1.5406, "step": 433 }, { "epoch": 0.8, "grad_norm": 0.607391893863678, "learning_rate": 2.7531005906582628e-05, "loss": 1.6535, "step": 434 }, { "epoch": 0.8018433179723502, "grad_norm": 0.5928655862808228, "learning_rate": 2.7513272816478554e-05, "loss": 1.5837, "step": 435 }, { "epoch": 0.8036866359447005, "grad_norm": 0.5965401530265808, "learning_rate": 2.7495482025573817e-05, "loss": 1.6395, "step": 436 }, { "epoch": 0.8055299539170507, "grad_norm": 0.5932811498641968, "learning_rate": 2.7477633615904744e-05, "loss": 1.6221, "step": 437 }, { "epoch": 0.8073732718894009, "grad_norm": 0.6015645265579224, "learning_rate": 2.7459727669773344e-05, "loss": 1.6323, "step": 438 }, { "epoch": 0.8092165898617512, "grad_norm": 0.5925923585891724, "learning_rate": 2.7441764269746946e-05, "loss": 1.6063, "step": 439 }, { "epoch": 0.8110599078341014, "grad_norm": 0.6017428636550903, "learning_rate": 2.7423743498657794e-05, "loss": 1.5991, "step": 440 }, { "epoch": 0.8129032258064516, "grad_norm": 0.6025856733322144, "learning_rate": 2.7405665439602695e-05, "loss": 1.6966, "step": 441 }, { "epoch": 0.8147465437788018, "grad_norm": 0.6528899669647217, "learning_rate": 2.7387530175942604e-05, "loss": 1.61, "step": 442 }, { "epoch": 0.8165898617511521, "grad_norm": 0.624174177646637, "learning_rate": 2.7369337791302272e-05, "loss": 1.6135, "step": 443 }, { "epoch": 0.8184331797235023, "grad_norm": 0.5888923406600952, "learning_rate": 2.7351088369569833e-05, "loss": 1.6345, "step": 444 }, { "epoch": 0.8202764976958525, "grad_norm": 0.5956777334213257, "learning_rate": 2.7332781994896438e-05, "loss": 1.6247, "step": 445 }, { "epoch": 0.8221198156682028, "grad_norm": 0.5883004069328308, "learning_rate": 2.7314418751695845e-05, "loss": 1.5897, "step": 446 }, { "epoch": 0.823963133640553, "grad_norm": 0.5987078547477722, "learning_rate": 2.7295998724644058e-05, "loss": 1.5766, "step": 447 }, { "epoch": 0.8258064516129032, "grad_norm": 0.59053635597229, "learning_rate": 2.7277521998678904e-05, "loss": 1.6002, "step": 448 }, { "epoch": 0.8276497695852535, "grad_norm": 0.6095952987670898, "learning_rate": 2.725898865899967e-05, "loss": 1.6435, "step": 449 }, { "epoch": 0.8294930875576036, "grad_norm": 0.5901498794555664, "learning_rate": 2.72403987910667e-05, "loss": 1.5964, "step": 450 }, { "epoch": 0.8313364055299539, "grad_norm": 0.5867377519607544, "learning_rate": 2.722175248060099e-05, "loss": 1.5673, "step": 451 }, { "epoch": 0.8331797235023042, "grad_norm": 0.5956171154975891, "learning_rate": 2.7203049813583803e-05, "loss": 1.5622, "step": 452 }, { "epoch": 0.8350230414746543, "grad_norm": 0.5861604809761047, "learning_rate": 2.7184290876256278e-05, "loss": 1.5743, "step": 453 }, { "epoch": 0.8368663594470046, "grad_norm": 0.5932233929634094, "learning_rate": 2.716547575511903e-05, "loss": 1.6032, "step": 454 }, { "epoch": 0.8387096774193549, "grad_norm": 0.5999817252159119, "learning_rate": 2.714660453693173e-05, "loss": 1.6612, "step": 455 }, { "epoch": 0.840552995391705, "grad_norm": 0.6004263758659363, "learning_rate": 2.7127677308712733e-05, "loss": 1.6251, "step": 456 }, { "epoch": 0.8423963133640553, "grad_norm": 0.5962221622467041, "learning_rate": 2.710869415773867e-05, "loss": 1.5461, "step": 457 }, { "epoch": 0.8442396313364056, "grad_norm": 0.6119489073753357, "learning_rate": 2.7089655171544026e-05, "loss": 1.6611, "step": 458 }, { "epoch": 0.8460829493087557, "grad_norm": 0.5894267559051514, "learning_rate": 2.707056043792077e-05, "loss": 1.5934, "step": 459 }, { "epoch": 0.847926267281106, "grad_norm": 0.5871574282646179, "learning_rate": 2.705141004491792e-05, "loss": 1.652, "step": 460 }, { "epoch": 0.8497695852534562, "grad_norm": 0.6047919392585754, "learning_rate": 2.703220408084115e-05, "loss": 1.6064, "step": 461 }, { "epoch": 0.8516129032258064, "grad_norm": 0.5787231922149658, "learning_rate": 2.7012942634252384e-05, "loss": 1.5154, "step": 462 }, { "epoch": 0.8534562211981567, "grad_norm": 0.5826706886291504, "learning_rate": 2.6993625793969383e-05, "loss": 1.6447, "step": 463 }, { "epoch": 0.8552995391705069, "grad_norm": 0.5927720069885254, "learning_rate": 2.697425364906534e-05, "loss": 1.6006, "step": 464 }, { "epoch": 0.8571428571428571, "grad_norm": 0.5936691761016846, "learning_rate": 2.6954826288868463e-05, "loss": 1.5829, "step": 465 }, { "epoch": 0.8589861751152074, "grad_norm": 0.5819424390792847, "learning_rate": 2.693534380296158e-05, "loss": 1.5587, "step": 466 }, { "epoch": 0.8608294930875576, "grad_norm": 0.6030734181404114, "learning_rate": 2.6915806281181688e-05, "loss": 1.6062, "step": 467 }, { "epoch": 0.8626728110599078, "grad_norm": 0.5921188592910767, "learning_rate": 2.6896213813619592e-05, "loss": 1.6007, "step": 468 }, { "epoch": 0.864516129032258, "grad_norm": 0.5946605205535889, "learning_rate": 2.6876566490619437e-05, "loss": 1.565, "step": 469 }, { "epoch": 0.8663594470046083, "grad_norm": 0.5877719521522522, "learning_rate": 2.685686440277833e-05, "loss": 1.5992, "step": 470 }, { "epoch": 0.8682027649769585, "grad_norm": 0.5992136597633362, "learning_rate": 2.6837107640945904e-05, "loss": 1.6587, "step": 471 }, { "epoch": 0.8700460829493087, "grad_norm": 0.5893521904945374, "learning_rate": 2.681729629622391e-05, "loss": 1.5592, "step": 472 }, { "epoch": 0.871889400921659, "grad_norm": 0.5969487428665161, "learning_rate": 2.6797430459965766e-05, "loss": 1.6201, "step": 473 }, { "epoch": 0.8737327188940092, "grad_norm": 0.582004189491272, "learning_rate": 2.6777510223776187e-05, "loss": 1.5921, "step": 474 }, { "epoch": 0.8755760368663594, "grad_norm": 0.5922407507896423, "learning_rate": 2.6757535679510727e-05, "loss": 1.5698, "step": 475 }, { "epoch": 0.8774193548387097, "grad_norm": 0.5871175527572632, "learning_rate": 2.6737506919275363e-05, "loss": 1.6294, "step": 476 }, { "epoch": 0.8792626728110599, "grad_norm": 0.5947887301445007, "learning_rate": 2.6717424035426054e-05, "loss": 1.599, "step": 477 }, { "epoch": 0.8811059907834101, "grad_norm": 0.6123536229133606, "learning_rate": 2.6697287120568364e-05, "loss": 1.6298, "step": 478 }, { "epoch": 0.8829493087557604, "grad_norm": 0.5811368227005005, "learning_rate": 2.6677096267556984e-05, "loss": 1.565, "step": 479 }, { "epoch": 0.8847926267281107, "grad_norm": 0.604063093662262, "learning_rate": 2.6656851569495316e-05, "loss": 1.6195, "step": 480 }, { "epoch": 0.8866359447004608, "grad_norm": 0.6037458181381226, "learning_rate": 2.6636553119735066e-05, "loss": 1.5856, "step": 481 }, { "epoch": 0.8884792626728111, "grad_norm": 0.5966126322746277, "learning_rate": 2.6616201011875792e-05, "loss": 1.601, "step": 482 }, { "epoch": 0.8903225806451613, "grad_norm": 0.5849453210830688, "learning_rate": 2.6595795339764478e-05, "loss": 1.5757, "step": 483 }, { "epoch": 0.8921658986175115, "grad_norm": 0.5842651128768921, "learning_rate": 2.6575336197495098e-05, "loss": 1.5725, "step": 484 }, { "epoch": 0.8940092165898618, "grad_norm": 0.6374698877334595, "learning_rate": 2.6554823679408195e-05, "loss": 1.6265, "step": 485 }, { "epoch": 0.895852534562212, "grad_norm": 0.5950853228569031, "learning_rate": 2.653425788009043e-05, "loss": 1.5227, "step": 486 }, { "epoch": 0.8976958525345622, "grad_norm": 0.5879155397415161, "learning_rate": 2.6513638894374158e-05, "loss": 1.6458, "step": 487 }, { "epoch": 0.8995391705069125, "grad_norm": 0.6029437780380249, "learning_rate": 2.6492966817336977e-05, "loss": 1.5862, "step": 488 }, { "epoch": 0.9013824884792627, "grad_norm": 0.5757433176040649, "learning_rate": 2.6472241744301304e-05, "loss": 1.5629, "step": 489 }, { "epoch": 0.9032258064516129, "grad_norm": 0.5847199559211731, "learning_rate": 2.645146377083393e-05, "loss": 1.5599, "step": 490 }, { "epoch": 0.9050691244239631, "grad_norm": 0.5835124850273132, "learning_rate": 2.6430632992745577e-05, "loss": 1.5966, "step": 491 }, { "epoch": 0.9069124423963134, "grad_norm": 0.6034891605377197, "learning_rate": 2.6409749506090456e-05, "loss": 1.6021, "step": 492 }, { "epoch": 0.9087557603686636, "grad_norm": 0.593477189540863, "learning_rate": 2.638881340716583e-05, "loss": 1.6751, "step": 493 }, { "epoch": 0.9105990783410138, "grad_norm": 0.5806865096092224, "learning_rate": 2.6367824792511565e-05, "loss": 1.6338, "step": 494 }, { "epoch": 0.9124423963133641, "grad_norm": 0.6020203828811646, "learning_rate": 2.6346783758909683e-05, "loss": 1.6274, "step": 495 }, { "epoch": 0.9142857142857143, "grad_norm": 0.602532684803009, "learning_rate": 2.632569040338392e-05, "loss": 1.5896, "step": 496 }, { "epoch": 0.9161290322580645, "grad_norm": 0.5935066938400269, "learning_rate": 2.6304544823199282e-05, "loss": 1.5898, "step": 497 }, { "epoch": 0.9179723502304148, "grad_norm": 0.6123016476631165, "learning_rate": 2.6283347115861586e-05, "loss": 1.6026, "step": 498 }, { "epoch": 0.919815668202765, "grad_norm": 0.6005870699882507, "learning_rate": 2.6262097379117015e-05, "loss": 1.6302, "step": 499 }, { "epoch": 0.9216589861751152, "grad_norm": 0.5887797474861145, "learning_rate": 2.624079571095167e-05, "loss": 1.6143, "step": 500 }, { "epoch": 0.9235023041474655, "grad_norm": 0.5779260993003845, "learning_rate": 2.6219442209591123e-05, "loss": 1.6581, "step": 501 }, { "epoch": 0.9253456221198156, "grad_norm": 0.5799979567527771, "learning_rate": 2.619803697349994e-05, "loss": 1.5836, "step": 502 }, { "epoch": 0.9271889400921659, "grad_norm": 0.5896419286727905, "learning_rate": 2.6176580101381273e-05, "loss": 1.5648, "step": 503 }, { "epoch": 0.9290322580645162, "grad_norm": 0.5804762840270996, "learning_rate": 2.6155071692176348e-05, "loss": 1.5778, "step": 504 }, { "epoch": 0.9308755760368663, "grad_norm": 0.5797714591026306, "learning_rate": 2.613351184506405e-05, "loss": 1.6142, "step": 505 }, { "epoch": 0.9327188940092166, "grad_norm": 0.6004733443260193, "learning_rate": 2.6111900659460455e-05, "loss": 1.6014, "step": 506 }, { "epoch": 0.9345622119815669, "grad_norm": 0.5803670883178711, "learning_rate": 2.6090238235018365e-05, "loss": 1.5852, "step": 507 }, { "epoch": 0.936405529953917, "grad_norm": 0.5783745050430298, "learning_rate": 2.6068524671626856e-05, "loss": 1.6339, "step": 508 }, { "epoch": 0.9382488479262673, "grad_norm": 0.5670767426490784, "learning_rate": 2.6046760069410806e-05, "loss": 1.5784, "step": 509 }, { "epoch": 0.9400921658986175, "grad_norm": 0.5693550705909729, "learning_rate": 2.6024944528730453e-05, "loss": 1.5551, "step": 510 }, { "epoch": 0.9419354838709677, "grad_norm": 0.5894502997398376, "learning_rate": 2.6003078150180922e-05, "loss": 1.6354, "step": 511 }, { "epoch": 0.943778801843318, "grad_norm": 0.5847663283348083, "learning_rate": 2.598116103459174e-05, "loss": 1.5898, "step": 512 }, { "epoch": 0.9456221198156682, "grad_norm": 0.5859915614128113, "learning_rate": 2.595919328302641e-05, "loss": 1.5634, "step": 513 }, { "epoch": 0.9474654377880184, "grad_norm": 0.5709742307662964, "learning_rate": 2.5937174996781927e-05, "loss": 1.5446, "step": 514 }, { "epoch": 0.9493087557603687, "grad_norm": 0.5835738182067871, "learning_rate": 2.5915106277388293e-05, "loss": 1.552, "step": 515 }, { "epoch": 0.9511520737327189, "grad_norm": 0.5724872350692749, "learning_rate": 2.5892987226608082e-05, "loss": 1.5853, "step": 516 }, { "epoch": 0.9529953917050691, "grad_norm": 0.5722587704658508, "learning_rate": 2.5870817946435953e-05, "loss": 1.5472, "step": 517 }, { "epoch": 0.9548387096774194, "grad_norm": 0.5913331508636475, "learning_rate": 2.5848598539098164e-05, "loss": 1.6198, "step": 518 }, { "epoch": 0.9566820276497696, "grad_norm": 0.5760647654533386, "learning_rate": 2.5826329107052144e-05, "loss": 1.559, "step": 519 }, { "epoch": 0.9585253456221198, "grad_norm": 0.5775203108787537, "learning_rate": 2.5804009752985975e-05, "loss": 1.5772, "step": 520 }, { "epoch": 0.96036866359447, "grad_norm": 0.5921741127967834, "learning_rate": 2.5781640579817946e-05, "loss": 1.5963, "step": 521 }, { "epoch": 0.9622119815668203, "grad_norm": 0.5962147116661072, "learning_rate": 2.5759221690696062e-05, "loss": 1.5971, "step": 522 }, { "epoch": 0.9640552995391705, "grad_norm": 0.5759799480438232, "learning_rate": 2.573675318899759e-05, "loss": 1.6111, "step": 523 }, { "epoch": 0.9658986175115207, "grad_norm": 0.5976126790046692, "learning_rate": 2.5714235178328554e-05, "loss": 1.6293, "step": 524 }, { "epoch": 0.967741935483871, "grad_norm": 0.6145676374435425, "learning_rate": 2.5691667762523284e-05, "loss": 1.5648, "step": 525 }, { "epoch": 0.9695852534562212, "grad_norm": 0.5874765515327454, "learning_rate": 2.566905104564393e-05, "loss": 1.5917, "step": 526 }, { "epoch": 0.9714285714285714, "grad_norm": 0.5827004909515381, "learning_rate": 2.564638513197995e-05, "loss": 1.5413, "step": 527 }, { "epoch": 0.9732718894009217, "grad_norm": 0.5728251338005066, "learning_rate": 2.562367012604769e-05, "loss": 1.5717, "step": 528 }, { "epoch": 0.9751152073732718, "grad_norm": 0.5728136301040649, "learning_rate": 2.5600906132589846e-05, "loss": 1.5968, "step": 529 }, { "epoch": 0.9769585253456221, "grad_norm": 0.5801060199737549, "learning_rate": 2.557809325657501e-05, "loss": 1.568, "step": 530 }, { "epoch": 0.9788018433179724, "grad_norm": 0.5835599899291992, "learning_rate": 2.555523160319719e-05, "loss": 1.5918, "step": 531 }, { "epoch": 0.9806451612903225, "grad_norm": 0.5866450071334839, "learning_rate": 2.5532321277875305e-05, "loss": 1.6424, "step": 532 }, { "epoch": 0.9824884792626728, "grad_norm": 0.5647181868553162, "learning_rate": 2.5509362386252702e-05, "loss": 1.5842, "step": 533 }, { "epoch": 0.9843317972350231, "grad_norm": 0.5813072919845581, "learning_rate": 2.5486355034196686e-05, "loss": 1.6344, "step": 534 }, { "epoch": 0.9861751152073732, "grad_norm": 0.5855548977851868, "learning_rate": 2.5463299327798015e-05, "loss": 1.6395, "step": 535 }, { "epoch": 0.9880184331797235, "grad_norm": 0.5718809962272644, "learning_rate": 2.544019537337043e-05, "loss": 1.5976, "step": 536 }, { "epoch": 0.9898617511520738, "grad_norm": 0.5813997387886047, "learning_rate": 2.541704327745013e-05, "loss": 1.6117, "step": 537 }, { "epoch": 0.9917050691244239, "grad_norm": 0.5889962315559387, "learning_rate": 2.539384314679532e-05, "loss": 1.6184, "step": 538 }, { "epoch": 0.9935483870967742, "grad_norm": 0.5822246670722961, "learning_rate": 2.5370595088385696e-05, "loss": 1.5694, "step": 539 }, { "epoch": 0.9953917050691244, "grad_norm": 0.5867295861244202, "learning_rate": 2.5347299209421955e-05, "loss": 1.5771, "step": 540 }, { "epoch": 0.9972350230414746, "grad_norm": 0.5868269205093384, "learning_rate": 2.53239556173253e-05, "loss": 1.593, "step": 541 }, { "epoch": 0.9990783410138249, "grad_norm": 0.5741949081420898, "learning_rate": 2.530056441973696e-05, "loss": 1.5291, "step": 542 }, { "epoch": 1.0009216589861751, "grad_norm": 0.5708165764808655, "learning_rate": 2.5277125724517665e-05, "loss": 1.5793, "step": 543 }, { "epoch": 1.0027649769585254, "grad_norm": 0.5975103974342346, "learning_rate": 2.525363963974717e-05, "loss": 1.5366, "step": 544 }, { "epoch": 1.0046082949308757, "grad_norm": 0.5615414977073669, "learning_rate": 2.523010627372376e-05, "loss": 1.4192, "step": 545 }, { "epoch": 1.0064516129032257, "grad_norm": 0.6589085459709167, "learning_rate": 2.520652573496373e-05, "loss": 1.5098, "step": 546 }, { "epoch": 1.008294930875576, "grad_norm": 0.6412519812583923, "learning_rate": 2.51828981322009e-05, "loss": 1.5223, "step": 547 }, { "epoch": 1.0101382488479262, "grad_norm": 0.6192975044250488, "learning_rate": 2.5159223574386117e-05, "loss": 1.5095, "step": 548 }, { "epoch": 1.0119815668202765, "grad_norm": 0.6341859698295593, "learning_rate": 2.513550217068673e-05, "loss": 1.5128, "step": 549 }, { "epoch": 1.0138248847926268, "grad_norm": 0.623396635055542, "learning_rate": 2.5111734030486127e-05, "loss": 1.4798, "step": 550 }, { "epoch": 1.015668202764977, "grad_norm": 0.6472728848457336, "learning_rate": 2.508791926338317e-05, "loss": 1.4911, "step": 551 }, { "epoch": 1.017511520737327, "grad_norm": 0.6628040075302124, "learning_rate": 2.5064057979191766e-05, "loss": 1.4964, "step": 552 }, { "epoch": 1.0193548387096774, "grad_norm": 0.6454623341560364, "learning_rate": 2.5040150287940286e-05, "loss": 1.4927, "step": 553 }, { "epoch": 1.0211981566820276, "grad_norm": 0.6401841640472412, "learning_rate": 2.5016196299871115e-05, "loss": 1.4844, "step": 554 }, { "epoch": 1.023041474654378, "grad_norm": 0.6242401599884033, "learning_rate": 2.49921961254401e-05, "loss": 1.5195, "step": 555 }, { "epoch": 1.0248847926267282, "grad_norm": 0.6155444979667664, "learning_rate": 2.496814987531609e-05, "loss": 1.5232, "step": 556 }, { "epoch": 1.0267281105990784, "grad_norm": 0.6328471899032593, "learning_rate": 2.4944057660380363e-05, "loss": 1.5214, "step": 557 }, { "epoch": 1.0285714285714285, "grad_norm": 0.616779625415802, "learning_rate": 2.4919919591726175e-05, "loss": 1.4965, "step": 558 }, { "epoch": 1.0304147465437787, "grad_norm": 0.6296596527099609, "learning_rate": 2.489573578065821e-05, "loss": 1.5033, "step": 559 }, { "epoch": 1.032258064516129, "grad_norm": 0.6397867202758789, "learning_rate": 2.487150633869207e-05, "loss": 1.4613, "step": 560 }, { "epoch": 1.0341013824884793, "grad_norm": 0.6106263995170593, "learning_rate": 2.484723137755379e-05, "loss": 1.4658, "step": 561 }, { "epoch": 1.0359447004608295, "grad_norm": 0.6248694658279419, "learning_rate": 2.482291100917928e-05, "loss": 1.599, "step": 562 }, { "epoch": 1.0377880184331798, "grad_norm": 0.610860288143158, "learning_rate": 2.4798545345713837e-05, "loss": 1.4927, "step": 563 }, { "epoch": 1.0396313364055298, "grad_norm": 0.6477883458137512, "learning_rate": 2.4774134499511636e-05, "loss": 1.5784, "step": 564 }, { "epoch": 1.0414746543778801, "grad_norm": 0.6159871816635132, "learning_rate": 2.4749678583135175e-05, "loss": 1.4319, "step": 565 }, { "epoch": 1.0433179723502304, "grad_norm": 0.6450619697570801, "learning_rate": 2.472517770935479e-05, "loss": 1.5053, "step": 566 }, { "epoch": 1.0451612903225806, "grad_norm": 0.6325564384460449, "learning_rate": 2.4700631991148126e-05, "loss": 1.4503, "step": 567 }, { "epoch": 1.047004608294931, "grad_norm": 0.6441475749015808, "learning_rate": 2.46760415416996e-05, "loss": 1.5312, "step": 568 }, { "epoch": 1.0488479262672812, "grad_norm": 0.608188271522522, "learning_rate": 2.465140647439991e-05, "loss": 1.4875, "step": 569 }, { "epoch": 1.0506912442396312, "grad_norm": 0.655303955078125, "learning_rate": 2.4626726902845477e-05, "loss": 1.5856, "step": 570 }, { "epoch": 1.0525345622119815, "grad_norm": 0.6212196946144104, "learning_rate": 2.4602002940837948e-05, "loss": 1.4501, "step": 571 }, { "epoch": 1.0543778801843318, "grad_norm": 0.5928766131401062, "learning_rate": 2.4577234702383666e-05, "loss": 1.4214, "step": 572 }, { "epoch": 1.056221198156682, "grad_norm": 0.6004055142402649, "learning_rate": 2.4552422301693128e-05, "loss": 1.4529, "step": 573 }, { "epoch": 1.0580645161290323, "grad_norm": 0.6340869069099426, "learning_rate": 2.452756585318048e-05, "loss": 1.4805, "step": 574 }, { "epoch": 1.0599078341013826, "grad_norm": 0.6359026432037354, "learning_rate": 2.4502665471462983e-05, "loss": 1.4811, "step": 575 }, { "epoch": 1.0617511520737328, "grad_norm": 0.6191946864128113, "learning_rate": 2.447772127136046e-05, "loss": 1.4773, "step": 576 }, { "epoch": 1.0635944700460829, "grad_norm": 0.6260502338409424, "learning_rate": 2.4452733367894816e-05, "loss": 1.4597, "step": 577 }, { "epoch": 1.0654377880184331, "grad_norm": 0.6205639839172363, "learning_rate": 2.4427701876289465e-05, "loss": 1.4425, "step": 578 }, { "epoch": 1.0672811059907834, "grad_norm": 0.6401079297065735, "learning_rate": 2.440262691196881e-05, "loss": 1.506, "step": 579 }, { "epoch": 1.0691244239631337, "grad_norm": 0.6314127445220947, "learning_rate": 2.437750859055773e-05, "loss": 1.4969, "step": 580 }, { "epoch": 1.070967741935484, "grad_norm": 0.6094170212745667, "learning_rate": 2.4352347027881003e-05, "loss": 1.4873, "step": 581 }, { "epoch": 1.072811059907834, "grad_norm": 0.6318868398666382, "learning_rate": 2.4327142339962827e-05, "loss": 1.4825, "step": 582 }, { "epoch": 1.0746543778801843, "grad_norm": 0.6387438178062439, "learning_rate": 2.430189464302625e-05, "loss": 1.5198, "step": 583 }, { "epoch": 1.0764976958525345, "grad_norm": 0.6123984456062317, "learning_rate": 2.4276604053492636e-05, "loss": 1.4885, "step": 584 }, { "epoch": 1.0783410138248848, "grad_norm": 0.618430495262146, "learning_rate": 2.425127068798113e-05, "loss": 1.4667, "step": 585 }, { "epoch": 1.080184331797235, "grad_norm": 0.6012323498725891, "learning_rate": 2.422589466330814e-05, "loss": 1.5483, "step": 586 }, { "epoch": 1.0820276497695853, "grad_norm": 0.6048386693000793, "learning_rate": 2.4200476096486774e-05, "loss": 1.4619, "step": 587 }, { "epoch": 1.0838709677419356, "grad_norm": 0.6172820329666138, "learning_rate": 2.4175015104726306e-05, "loss": 1.5118, "step": 588 }, { "epoch": 1.0857142857142856, "grad_norm": 0.636820375919342, "learning_rate": 2.414951180543164e-05, "loss": 1.5904, "step": 589 }, { "epoch": 1.087557603686636, "grad_norm": 0.5951140522956848, "learning_rate": 2.4123966316202768e-05, "loss": 1.4224, "step": 590 }, { "epoch": 1.0894009216589862, "grad_norm": 0.6256266832351685, "learning_rate": 2.4098378754834227e-05, "loss": 1.4852, "step": 591 }, { "epoch": 1.0912442396313364, "grad_norm": 0.6289697885513306, "learning_rate": 2.4072749239314565e-05, "loss": 1.473, "step": 592 }, { "epoch": 1.0930875576036867, "grad_norm": 0.6038630604743958, "learning_rate": 2.4047077887825765e-05, "loss": 1.4583, "step": 593 }, { "epoch": 1.094930875576037, "grad_norm": 0.6234686374664307, "learning_rate": 2.402136481874275e-05, "loss": 1.4952, "step": 594 }, { "epoch": 1.096774193548387, "grad_norm": 0.6135314106941223, "learning_rate": 2.399561015063278e-05, "loss": 1.4453, "step": 595 }, { "epoch": 1.0986175115207373, "grad_norm": 0.6317179203033447, "learning_rate": 2.3969814002254965e-05, "loss": 1.4902, "step": 596 }, { "epoch": 1.1004608294930875, "grad_norm": 0.6388834118843079, "learning_rate": 2.3943976492559675e-05, "loss": 1.4936, "step": 597 }, { "epoch": 1.1023041474654378, "grad_norm": 0.6384425163269043, "learning_rate": 2.3918097740687987e-05, "loss": 1.5069, "step": 598 }, { "epoch": 1.104147465437788, "grad_norm": 0.6023929715156555, "learning_rate": 2.3892177865971183e-05, "loss": 1.4142, "step": 599 }, { "epoch": 1.1059907834101383, "grad_norm": 0.6068909168243408, "learning_rate": 2.386621698793015e-05, "loss": 1.4939, "step": 600 }, { "epoch": 1.1078341013824884, "grad_norm": 0.6102262139320374, "learning_rate": 2.3840215226274847e-05, "loss": 1.4926, "step": 601 }, { "epoch": 1.1096774193548387, "grad_norm": 0.6022539138793945, "learning_rate": 2.3814172700903775e-05, "loss": 1.4765, "step": 602 }, { "epoch": 1.111520737327189, "grad_norm": 0.59644615650177, "learning_rate": 2.3788089531903372e-05, "loss": 1.4122, "step": 603 }, { "epoch": 1.1133640552995392, "grad_norm": 0.6247356534004211, "learning_rate": 2.3761965839547515e-05, "loss": 1.4566, "step": 604 }, { "epoch": 1.1152073732718895, "grad_norm": 0.6365731358528137, "learning_rate": 2.3735801744296934e-05, "loss": 1.5111, "step": 605 }, { "epoch": 1.1170506912442397, "grad_norm": 0.6230877041816711, "learning_rate": 2.3709597366798662e-05, "loss": 1.5082, "step": 606 }, { "epoch": 1.1188940092165898, "grad_norm": 0.6125620007514954, "learning_rate": 2.3683352827885472e-05, "loss": 1.5319, "step": 607 }, { "epoch": 1.12073732718894, "grad_norm": 0.6090733408927917, "learning_rate": 2.365706824857535e-05, "loss": 1.4879, "step": 608 }, { "epoch": 1.1225806451612903, "grad_norm": 0.616420567035675, "learning_rate": 2.3630743750070892e-05, "loss": 1.4831, "step": 609 }, { "epoch": 1.1244239631336406, "grad_norm": 0.6392646431922913, "learning_rate": 2.360437945375878e-05, "loss": 1.5237, "step": 610 }, { "epoch": 1.1262672811059908, "grad_norm": 0.6059160232543945, "learning_rate": 2.3577975481209214e-05, "loss": 1.4769, "step": 611 }, { "epoch": 1.128110599078341, "grad_norm": 0.6207824945449829, "learning_rate": 2.3551531954175335e-05, "loss": 1.5327, "step": 612 }, { "epoch": 1.1299539170506911, "grad_norm": 0.6008784770965576, "learning_rate": 2.3525048994592684e-05, "loss": 1.4142, "step": 613 }, { "epoch": 1.1317972350230414, "grad_norm": 0.613699734210968, "learning_rate": 2.3498526724578637e-05, "loss": 1.4875, "step": 614 }, { "epoch": 1.1336405529953917, "grad_norm": 0.614940345287323, "learning_rate": 2.3471965266431824e-05, "loss": 1.4974, "step": 615 }, { "epoch": 1.135483870967742, "grad_norm": 0.6247910261154175, "learning_rate": 2.3445364742631592e-05, "loss": 1.4483, "step": 616 }, { "epoch": 1.1373271889400922, "grad_norm": 0.6494306325912476, "learning_rate": 2.3418725275837413e-05, "loss": 1.4588, "step": 617 }, { "epoch": 1.1391705069124425, "grad_norm": 0.6100001931190491, "learning_rate": 2.3392046988888345e-05, "loss": 1.4684, "step": 618 }, { "epoch": 1.1410138248847925, "grad_norm": 0.6268426775932312, "learning_rate": 2.3365330004802443e-05, "loss": 1.5188, "step": 619 }, { "epoch": 1.1428571428571428, "grad_norm": 0.6207700371742249, "learning_rate": 2.33385744467762e-05, "loss": 1.4679, "step": 620 }, { "epoch": 1.144700460829493, "grad_norm": 0.6368051767349243, "learning_rate": 2.331178043818399e-05, "loss": 1.5252, "step": 621 }, { "epoch": 1.1465437788018433, "grad_norm": 0.6070255637168884, "learning_rate": 2.328494810257748e-05, "loss": 1.41, "step": 622 }, { "epoch": 1.1483870967741936, "grad_norm": 0.6187101602554321, "learning_rate": 2.3258077563685072e-05, "loss": 1.4959, "step": 623 }, { "epoch": 1.1502304147465439, "grad_norm": 0.6142088174819946, "learning_rate": 2.3231168945411326e-05, "loss": 1.4736, "step": 624 }, { "epoch": 1.1520737327188941, "grad_norm": 0.6197490096092224, "learning_rate": 2.320422237183641e-05, "loss": 1.4959, "step": 625 }, { "epoch": 1.1539170506912442, "grad_norm": 0.5996073484420776, "learning_rate": 2.317723796721547e-05, "loss": 1.4812, "step": 626 }, { "epoch": 1.1557603686635944, "grad_norm": 0.5983907580375671, "learning_rate": 2.315021585597815e-05, "loss": 1.4176, "step": 627 }, { "epoch": 1.1576036866359447, "grad_norm": 0.6288896203041077, "learning_rate": 2.3123156162727923e-05, "loss": 1.4988, "step": 628 }, { "epoch": 1.159447004608295, "grad_norm": 0.6163783073425293, "learning_rate": 2.3096059012241583e-05, "loss": 1.414, "step": 629 }, { "epoch": 1.1612903225806452, "grad_norm": 0.6152181029319763, "learning_rate": 2.3068924529468638e-05, "loss": 1.5384, "step": 630 }, { "epoch": 1.1631336405529953, "grad_norm": 0.6216493844985962, "learning_rate": 2.3041752839530735e-05, "loss": 1.5039, "step": 631 }, { "epoch": 1.1649769585253456, "grad_norm": 0.6075827479362488, "learning_rate": 2.3014544067721096e-05, "loss": 1.418, "step": 632 }, { "epoch": 1.1668202764976958, "grad_norm": 0.6097463965415955, "learning_rate": 2.298729833950394e-05, "loss": 1.4452, "step": 633 }, { "epoch": 1.168663594470046, "grad_norm": 0.6165160536766052, "learning_rate": 2.2960015780513893e-05, "loss": 1.4974, "step": 634 }, { "epoch": 1.1705069124423964, "grad_norm": 0.624851405620575, "learning_rate": 2.2932696516555396e-05, "loss": 1.451, "step": 635 }, { "epoch": 1.1723502304147466, "grad_norm": 0.6457446217536926, "learning_rate": 2.2905340673602184e-05, "loss": 1.4522, "step": 636 }, { "epoch": 1.1741935483870969, "grad_norm": 0.6631104946136475, "learning_rate": 2.287794837779662e-05, "loss": 1.5367, "step": 637 }, { "epoch": 1.176036866359447, "grad_norm": 0.6243942379951477, "learning_rate": 2.2850519755449183e-05, "loss": 1.4571, "step": 638 }, { "epoch": 1.1778801843317972, "grad_norm": 0.6196613311767578, "learning_rate": 2.282305493303785e-05, "loss": 1.5354, "step": 639 }, { "epoch": 1.1797235023041475, "grad_norm": 0.6360357999801636, "learning_rate": 2.2795554037207528e-05, "loss": 1.5752, "step": 640 }, { "epoch": 1.1815668202764977, "grad_norm": 0.6158603429794312, "learning_rate": 2.2768017194769466e-05, "loss": 1.4614, "step": 641 }, { "epoch": 1.183410138248848, "grad_norm": 0.6168328523635864, "learning_rate": 2.2740444532700657e-05, "loss": 1.4945, "step": 642 }, { "epoch": 1.185253456221198, "grad_norm": 0.640100359916687, "learning_rate": 2.271283617814328e-05, "loss": 1.5292, "step": 643 }, { "epoch": 1.1870967741935483, "grad_norm": 0.6270676255226135, "learning_rate": 2.268519225840409e-05, "loss": 1.4589, "step": 644 }, { "epoch": 1.1889400921658986, "grad_norm": 0.6460107564926147, "learning_rate": 2.2657512900953832e-05, "loss": 1.497, "step": 645 }, { "epoch": 1.1907834101382488, "grad_norm": 0.6342102289199829, "learning_rate": 2.2629798233426677e-05, "loss": 1.4977, "step": 646 }, { "epoch": 1.192626728110599, "grad_norm": 0.6048287749290466, "learning_rate": 2.26020483836196e-05, "loss": 1.4563, "step": 647 }, { "epoch": 1.1944700460829494, "grad_norm": 0.6108403205871582, "learning_rate": 2.2574263479491816e-05, "loss": 1.5107, "step": 648 }, { "epoch": 1.1963133640552996, "grad_norm": 0.6178791522979736, "learning_rate": 2.2546443649164186e-05, "loss": 1.4693, "step": 649 }, { "epoch": 1.1981566820276497, "grad_norm": 0.6287988424301147, "learning_rate": 2.2518589020918612e-05, "loss": 1.4909, "step": 650 }, { "epoch": 1.2, "grad_norm": 0.6233773827552795, "learning_rate": 2.2490699723197454e-05, "loss": 1.4906, "step": 651 }, { "epoch": 1.2018433179723502, "grad_norm": 0.6186560988426208, "learning_rate": 2.2462775884602954e-05, "loss": 1.5139, "step": 652 }, { "epoch": 1.2036866359447005, "grad_norm": 0.6272045373916626, "learning_rate": 2.243481763389661e-05, "loss": 1.4744, "step": 653 }, { "epoch": 1.2055299539170508, "grad_norm": 0.6194866299629211, "learning_rate": 2.24068250999986e-05, "loss": 1.4715, "step": 654 }, { "epoch": 1.2073732718894008, "grad_norm": 0.6156409978866577, "learning_rate": 2.2378798411987218e-05, "loss": 1.4826, "step": 655 }, { "epoch": 1.209216589861751, "grad_norm": 0.6119863390922546, "learning_rate": 2.2350737699098203e-05, "loss": 1.5056, "step": 656 }, { "epoch": 1.2110599078341013, "grad_norm": 0.6189553737640381, "learning_rate": 2.2322643090724218e-05, "loss": 1.5198, "step": 657 }, { "epoch": 1.2129032258064516, "grad_norm": 0.6089476943016052, "learning_rate": 2.229451471641422e-05, "loss": 1.5074, "step": 658 }, { "epoch": 1.2147465437788019, "grad_norm": 0.6062411665916443, "learning_rate": 2.226635270587286e-05, "loss": 1.4107, "step": 659 }, { "epoch": 1.2165898617511521, "grad_norm": 0.6161401271820068, "learning_rate": 2.2238157188959893e-05, "loss": 1.4862, "step": 660 }, { "epoch": 1.2184331797235024, "grad_norm": 0.6359903812408447, "learning_rate": 2.2209928295689582e-05, "loss": 1.5561, "step": 661 }, { "epoch": 1.2202764976958524, "grad_norm": 0.6167048811912537, "learning_rate": 2.2181666156230082e-05, "loss": 1.4549, "step": 662 }, { "epoch": 1.2221198156682027, "grad_norm": 0.6125354766845703, "learning_rate": 2.2153370900902872e-05, "loss": 1.4708, "step": 663 }, { "epoch": 1.223963133640553, "grad_norm": 0.6094929575920105, "learning_rate": 2.2125042660182115e-05, "loss": 1.4483, "step": 664 }, { "epoch": 1.2258064516129032, "grad_norm": 0.6325694918632507, "learning_rate": 2.2096681564694087e-05, "loss": 1.5043, "step": 665 }, { "epoch": 1.2276497695852535, "grad_norm": 0.6466827392578125, "learning_rate": 2.2068287745216552e-05, "loss": 1.5032, "step": 666 }, { "epoch": 1.2294930875576038, "grad_norm": 0.6111404895782471, "learning_rate": 2.203986133267818e-05, "loss": 1.4706, "step": 667 }, { "epoch": 1.2313364055299538, "grad_norm": 0.6186912655830383, "learning_rate": 2.2011402458157935e-05, "loss": 1.5304, "step": 668 }, { "epoch": 1.233179723502304, "grad_norm": 0.6083782911300659, "learning_rate": 2.198291125288445e-05, "loss": 1.418, "step": 669 }, { "epoch": 1.2350230414746544, "grad_norm": 0.6159315705299377, "learning_rate": 2.1954387848235455e-05, "loss": 1.4578, "step": 670 }, { "epoch": 1.2368663594470046, "grad_norm": 0.6176555156707764, "learning_rate": 2.1925832375737168e-05, "loss": 1.4715, "step": 671 }, { "epoch": 1.238709677419355, "grad_norm": 0.6314014196395874, "learning_rate": 2.1897244967063653e-05, "loss": 1.494, "step": 672 }, { "epoch": 1.2405529953917052, "grad_norm": 0.6438396573066711, "learning_rate": 2.1868625754036256e-05, "loss": 1.469, "step": 673 }, { "epoch": 1.2423963133640552, "grad_norm": 0.6174535155296326, "learning_rate": 2.1839974868622956e-05, "loss": 1.4753, "step": 674 }, { "epoch": 1.2442396313364055, "grad_norm": 0.6103483438491821, "learning_rate": 2.1811292442937808e-05, "loss": 1.4497, "step": 675 }, { "epoch": 1.2460829493087557, "grad_norm": 0.6072337627410889, "learning_rate": 2.1782578609240286e-05, "loss": 1.4343, "step": 676 }, { "epoch": 1.247926267281106, "grad_norm": 0.61222904920578, "learning_rate": 2.1753833499934694e-05, "loss": 1.453, "step": 677 }, { "epoch": 1.2497695852534563, "grad_norm": 0.6256713271141052, "learning_rate": 2.1725057247569552e-05, "loss": 1.4801, "step": 678 }, { "epoch": 1.2516129032258063, "grad_norm": 0.6409046649932861, "learning_rate": 2.1696249984836993e-05, "loss": 1.5059, "step": 679 }, { "epoch": 1.2534562211981566, "grad_norm": 0.6414708495140076, "learning_rate": 2.166741184457214e-05, "loss": 1.5416, "step": 680 }, { "epoch": 1.2552995391705069, "grad_norm": 0.6176203489303589, "learning_rate": 2.1638542959752485e-05, "loss": 1.4793, "step": 681 }, { "epoch": 1.2571428571428571, "grad_norm": 0.609304666519165, "learning_rate": 2.160964346349731e-05, "loss": 1.523, "step": 682 }, { "epoch": 1.2589861751152074, "grad_norm": 0.6206054091453552, "learning_rate": 2.1580713489067043e-05, "loss": 1.5162, "step": 683 }, { "epoch": 1.2608294930875577, "grad_norm": 0.668765664100647, "learning_rate": 2.155175316986265e-05, "loss": 1.545, "step": 684 }, { "epoch": 1.262672811059908, "grad_norm": 0.6183255314826965, "learning_rate": 2.1522762639425012e-05, "loss": 1.4734, "step": 685 }, { "epoch": 1.2645161290322582, "grad_norm": 0.6271519064903259, "learning_rate": 2.1493742031434343e-05, "loss": 1.4408, "step": 686 }, { "epoch": 1.2663594470046082, "grad_norm": 0.6174322962760925, "learning_rate": 2.1464691479709534e-05, "loss": 1.4592, "step": 687 }, { "epoch": 1.2682027649769585, "grad_norm": 0.6306519508361816, "learning_rate": 2.1435611118207546e-05, "loss": 1.4427, "step": 688 }, { "epoch": 1.2700460829493088, "grad_norm": 0.6183598637580872, "learning_rate": 2.140650108102281e-05, "loss": 1.4525, "step": 689 }, { "epoch": 1.271889400921659, "grad_norm": 0.6512234210968018, "learning_rate": 2.137736150238659e-05, "loss": 1.5067, "step": 690 }, { "epoch": 1.2737327188940093, "grad_norm": 0.636774480342865, "learning_rate": 2.1348192516666376e-05, "loss": 1.4934, "step": 691 }, { "epoch": 1.2755760368663593, "grad_norm": 0.6048219799995422, "learning_rate": 2.1318994258365253e-05, "loss": 1.4666, "step": 692 }, { "epoch": 1.2774193548387096, "grad_norm": 0.6019451022148132, "learning_rate": 2.128976686212129e-05, "loss": 1.4448, "step": 693 }, { "epoch": 1.2792626728110599, "grad_norm": 0.6223950982093811, "learning_rate": 2.1260510462706914e-05, "loss": 1.531, "step": 694 }, { "epoch": 1.2811059907834101, "grad_norm": 0.6406816244125366, "learning_rate": 2.12312251950283e-05, "loss": 1.4607, "step": 695 }, { "epoch": 1.2829493087557604, "grad_norm": 0.6629902124404907, "learning_rate": 2.120191119412472e-05, "loss": 1.4808, "step": 696 }, { "epoch": 1.2847926267281107, "grad_norm": 0.6347109079360962, "learning_rate": 2.117256859516795e-05, "loss": 1.4904, "step": 697 }, { "epoch": 1.286635944700461, "grad_norm": 0.6273084282875061, "learning_rate": 2.1143197533461655e-05, "loss": 1.4774, "step": 698 }, { "epoch": 1.288479262672811, "grad_norm": 0.6307364702224731, "learning_rate": 2.1113798144440712e-05, "loss": 1.48, "step": 699 }, { "epoch": 1.2903225806451613, "grad_norm": 0.6333828568458557, "learning_rate": 2.108437056367064e-05, "loss": 1.5471, "step": 700 }, { "epoch": 1.2921658986175115, "grad_norm": 0.6215311288833618, "learning_rate": 2.1054914926846957e-05, "loss": 1.4295, "step": 701 }, { "epoch": 1.2940092165898618, "grad_norm": 0.6435150504112244, "learning_rate": 2.1025431369794546e-05, "loss": 1.5537, "step": 702 }, { "epoch": 1.295852534562212, "grad_norm": 0.6135848164558411, "learning_rate": 2.0995920028467027e-05, "loss": 1.4887, "step": 703 }, { "epoch": 1.297695852534562, "grad_norm": 0.6267658472061157, "learning_rate": 2.096638103894616e-05, "loss": 1.4782, "step": 704 }, { "epoch": 1.2995391705069124, "grad_norm": 0.609831690788269, "learning_rate": 2.0936814537441173e-05, "loss": 1.4787, "step": 705 }, { "epoch": 1.3013824884792626, "grad_norm": 0.627744197845459, "learning_rate": 2.0907220660288166e-05, "loss": 1.4313, "step": 706 }, { "epoch": 1.303225806451613, "grad_norm": 0.6227003335952759, "learning_rate": 2.087759954394948e-05, "loss": 1.5045, "step": 707 }, { "epoch": 1.3050691244239632, "grad_norm": 0.6434057354927063, "learning_rate": 2.084795132501304e-05, "loss": 1.5217, "step": 708 }, { "epoch": 1.3069124423963134, "grad_norm": 0.6197289228439331, "learning_rate": 2.081827614019177e-05, "loss": 1.4662, "step": 709 }, { "epoch": 1.3087557603686637, "grad_norm": 0.6186530590057373, "learning_rate": 2.0788574126322928e-05, "loss": 1.4821, "step": 710 }, { "epoch": 1.3105990783410137, "grad_norm": 0.6356285810470581, "learning_rate": 2.0758845420367474e-05, "loss": 1.4861, "step": 711 }, { "epoch": 1.312442396313364, "grad_norm": 0.6237668395042419, "learning_rate": 2.0729090159409467e-05, "loss": 1.4519, "step": 712 }, { "epoch": 1.3142857142857143, "grad_norm": 0.6121145486831665, "learning_rate": 2.0699308480655397e-05, "loss": 1.4485, "step": 713 }, { "epoch": 1.3161290322580645, "grad_norm": 0.6238338351249695, "learning_rate": 2.06695005214336e-05, "loss": 1.4947, "step": 714 }, { "epoch": 1.3179723502304148, "grad_norm": 0.642352283000946, "learning_rate": 2.0639666419193565e-05, "loss": 1.5261, "step": 715 }, { "epoch": 1.3198156682027649, "grad_norm": 0.6184538006782532, "learning_rate": 2.0609806311505345e-05, "loss": 1.4861, "step": 716 }, { "epoch": 1.3216589861751151, "grad_norm": 0.6436656713485718, "learning_rate": 2.057992033605891e-05, "loss": 1.5165, "step": 717 }, { "epoch": 1.3235023041474654, "grad_norm": 0.6331259608268738, "learning_rate": 2.0550008630663507e-05, "loss": 1.5454, "step": 718 }, { "epoch": 1.3253456221198157, "grad_norm": 0.6264419555664062, "learning_rate": 2.0520071333247025e-05, "loss": 1.558, "step": 719 }, { "epoch": 1.327188940092166, "grad_norm": 0.6347679495811462, "learning_rate": 2.049010858185537e-05, "loss": 1.5548, "step": 720 }, { "epoch": 1.3290322580645162, "grad_norm": 0.6179953217506409, "learning_rate": 2.0460120514651814e-05, "loss": 1.4423, "step": 721 }, { "epoch": 1.3308755760368665, "grad_norm": 0.6270414590835571, "learning_rate": 2.0430107269916368e-05, "loss": 1.4138, "step": 722 }, { "epoch": 1.3327188940092167, "grad_norm": 0.6328302025794983, "learning_rate": 2.0400068986045142e-05, "loss": 1.5058, "step": 723 }, { "epoch": 1.3345622119815668, "grad_norm": 0.6300492882728577, "learning_rate": 2.03700058015497e-05, "loss": 1.4931, "step": 724 }, { "epoch": 1.336405529953917, "grad_norm": 0.6287707090377808, "learning_rate": 2.0339917855056428e-05, "loss": 1.4846, "step": 725 }, { "epoch": 1.3382488479262673, "grad_norm": 0.6244567632675171, "learning_rate": 2.0309805285305905e-05, "loss": 1.4926, "step": 726 }, { "epoch": 1.3400921658986176, "grad_norm": 0.6375858783721924, "learning_rate": 2.0279668231152233e-05, "loss": 1.475, "step": 727 }, { "epoch": 1.3419354838709676, "grad_norm": 0.622603714466095, "learning_rate": 2.024950683156243e-05, "loss": 1.4917, "step": 728 }, { "epoch": 1.3437788018433179, "grad_norm": 0.6368018984794617, "learning_rate": 2.021932122561577e-05, "loss": 1.4584, "step": 729 }, { "epoch": 1.3456221198156681, "grad_norm": 0.6277997493743896, "learning_rate": 2.0189111552503142e-05, "loss": 1.5165, "step": 730 }, { "epoch": 1.3474654377880184, "grad_norm": 0.6314895153045654, "learning_rate": 2.015887795152643e-05, "loss": 1.4891, "step": 731 }, { "epoch": 1.3493087557603687, "grad_norm": 0.6547248959541321, "learning_rate": 2.0128620562097834e-05, "loss": 1.4912, "step": 732 }, { "epoch": 1.351152073732719, "grad_norm": 0.6388316750526428, "learning_rate": 2.009833952373925e-05, "loss": 1.5732, "step": 733 }, { "epoch": 1.3529953917050692, "grad_norm": 0.6275387406349182, "learning_rate": 2.0068034976081637e-05, "loss": 1.4561, "step": 734 }, { "epoch": 1.3548387096774195, "grad_norm": 0.6477261185646057, "learning_rate": 2.0037707058864343e-05, "loss": 1.4747, "step": 735 }, { "epoch": 1.3566820276497695, "grad_norm": 0.6163749694824219, "learning_rate": 2.0007355911934473e-05, "loss": 1.4715, "step": 736 }, { "epoch": 1.3585253456221198, "grad_norm": 0.6237518191337585, "learning_rate": 1.997698167524628e-05, "loss": 1.5064, "step": 737 }, { "epoch": 1.36036866359447, "grad_norm": 0.6290430426597595, "learning_rate": 1.9946584488860454e-05, "loss": 1.4825, "step": 738 }, { "epoch": 1.3622119815668203, "grad_norm": 0.6422656178474426, "learning_rate": 1.9916164492943518e-05, "loss": 1.5111, "step": 739 }, { "epoch": 1.3640552995391704, "grad_norm": 0.6391083598136902, "learning_rate": 1.9885721827767185e-05, "loss": 1.5618, "step": 740 }, { "epoch": 1.3658986175115206, "grad_norm": 0.624250054359436, "learning_rate": 1.9855256633707692e-05, "loss": 1.5096, "step": 741 }, { "epoch": 1.367741935483871, "grad_norm": 0.6381840109825134, "learning_rate": 1.9824769051245157e-05, "loss": 1.503, "step": 742 }, { "epoch": 1.3695852534562212, "grad_norm": 0.6450709104537964, "learning_rate": 1.979425922096294e-05, "loss": 1.5083, "step": 743 }, { "epoch": 1.3714285714285714, "grad_norm": 0.6425501704216003, "learning_rate": 1.976372728354699e-05, "loss": 1.5271, "step": 744 }, { "epoch": 1.3732718894009217, "grad_norm": 0.630938708782196, "learning_rate": 1.9733173379785188e-05, "loss": 1.4642, "step": 745 }, { "epoch": 1.375115207373272, "grad_norm": 0.6259406805038452, "learning_rate": 1.9702597650566723e-05, "loss": 1.4853, "step": 746 }, { "epoch": 1.3769585253456222, "grad_norm": 0.6241282224655151, "learning_rate": 1.9672000236881397e-05, "loss": 1.5297, "step": 747 }, { "epoch": 1.3788018433179723, "grad_norm": 0.6292351484298706, "learning_rate": 1.9641381279819028e-05, "loss": 1.5481, "step": 748 }, { "epoch": 1.3806451612903226, "grad_norm": 0.6297084093093872, "learning_rate": 1.9610740920568764e-05, "loss": 1.4684, "step": 749 }, { "epoch": 1.3824884792626728, "grad_norm": 0.6219861507415771, "learning_rate": 1.9580079300418444e-05, "loss": 1.5345, "step": 750 }, { "epoch": 1.384331797235023, "grad_norm": 0.6329815983772278, "learning_rate": 1.954939656075394e-05, "loss": 1.5065, "step": 751 }, { "epoch": 1.3861751152073734, "grad_norm": 0.6267799139022827, "learning_rate": 1.9518692843058514e-05, "loss": 1.5126, "step": 752 }, { "epoch": 1.3880184331797234, "grad_norm": 0.6429588794708252, "learning_rate": 1.9487968288912164e-05, "loss": 1.495, "step": 753 }, { "epoch": 1.3898617511520737, "grad_norm": 0.6146539449691772, "learning_rate": 1.9457223039990963e-05, "loss": 1.4306, "step": 754 }, { "epoch": 1.391705069124424, "grad_norm": 0.630659818649292, "learning_rate": 1.942645723806641e-05, "loss": 1.5311, "step": 755 }, { "epoch": 1.3935483870967742, "grad_norm": 0.6152111291885376, "learning_rate": 1.9395671025004777e-05, "loss": 1.4506, "step": 756 }, { "epoch": 1.3953917050691245, "grad_norm": 0.6187599897384644, "learning_rate": 1.936486454276647e-05, "loss": 1.4507, "step": 757 }, { "epoch": 1.3972350230414747, "grad_norm": 0.6392515897750854, "learning_rate": 1.9334037933405337e-05, "loss": 1.4364, "step": 758 }, { "epoch": 1.399078341013825, "grad_norm": 0.6132959723472595, "learning_rate": 1.9303191339068048e-05, "loss": 1.5138, "step": 759 }, { "epoch": 1.400921658986175, "grad_norm": 0.6178831458091736, "learning_rate": 1.9272324901993436e-05, "loss": 1.5203, "step": 760 }, { "epoch": 1.4027649769585253, "grad_norm": 0.616678774356842, "learning_rate": 1.9241438764511805e-05, "loss": 1.5184, "step": 761 }, { "epoch": 1.4046082949308756, "grad_norm": 0.5993118286132812, "learning_rate": 1.9210533069044334e-05, "loss": 1.4921, "step": 762 }, { "epoch": 1.4064516129032258, "grad_norm": 0.6092312335968018, "learning_rate": 1.9179607958102356e-05, "loss": 1.464, "step": 763 }, { "epoch": 1.4082949308755761, "grad_norm": 0.6284788250923157, "learning_rate": 1.9148663574286757e-05, "loss": 1.4234, "step": 764 }, { "epoch": 1.4101382488479262, "grad_norm": 0.6585412621498108, "learning_rate": 1.911770006028728e-05, "loss": 1.4843, "step": 765 }, { "epoch": 1.4119815668202764, "grad_norm": 0.6404136419296265, "learning_rate": 1.908671755888188e-05, "loss": 1.5148, "step": 766 }, { "epoch": 1.4138248847926267, "grad_norm": 0.6336871981620789, "learning_rate": 1.9055716212936075e-05, "loss": 1.5085, "step": 767 }, { "epoch": 1.415668202764977, "grad_norm": 0.6343189477920532, "learning_rate": 1.9024696165402272e-05, "loss": 1.5354, "step": 768 }, { "epoch": 1.4175115207373272, "grad_norm": 0.6193550825119019, "learning_rate": 1.899365755931911e-05, "loss": 1.4791, "step": 769 }, { "epoch": 1.4193548387096775, "grad_norm": 0.6221491694450378, "learning_rate": 1.8962600537810824e-05, "loss": 1.438, "step": 770 }, { "epoch": 1.4211981566820278, "grad_norm": 0.6263977885246277, "learning_rate": 1.893152524408653e-05, "loss": 1.5006, "step": 771 }, { "epoch": 1.4230414746543778, "grad_norm": 0.626641571521759, "learning_rate": 1.8900431821439644e-05, "loss": 1.5402, "step": 772 }, { "epoch": 1.424884792626728, "grad_norm": 0.6259623169898987, "learning_rate": 1.886932041324714e-05, "loss": 1.4768, "step": 773 }, { "epoch": 1.4267281105990783, "grad_norm": 0.624270498752594, "learning_rate": 1.883819116296895e-05, "loss": 1.4634, "step": 774 }, { "epoch": 1.4285714285714286, "grad_norm": 0.6257902383804321, "learning_rate": 1.880704421414726e-05, "loss": 1.4932, "step": 775 }, { "epoch": 1.4304147465437789, "grad_norm": 0.6192927360534668, "learning_rate": 1.8775879710405893e-05, "loss": 1.5394, "step": 776 }, { "epoch": 1.432258064516129, "grad_norm": 0.6155016422271729, "learning_rate": 1.8744697795449588e-05, "loss": 1.4331, "step": 777 }, { "epoch": 1.4341013824884792, "grad_norm": 0.6171903014183044, "learning_rate": 1.8713498613063403e-05, "loss": 1.4521, "step": 778 }, { "epoch": 1.4359447004608294, "grad_norm": 0.6074328422546387, "learning_rate": 1.8682282307111988e-05, "loss": 1.4878, "step": 779 }, { "epoch": 1.4377880184331797, "grad_norm": 0.6176053881645203, "learning_rate": 1.865104902153898e-05, "loss": 1.4668, "step": 780 }, { "epoch": 1.43963133640553, "grad_norm": 0.630382239818573, "learning_rate": 1.8619798900366298e-05, "loss": 1.4885, "step": 781 }, { "epoch": 1.4414746543778802, "grad_norm": 0.6310209035873413, "learning_rate": 1.8588532087693485e-05, "loss": 1.4715, "step": 782 }, { "epoch": 1.4433179723502305, "grad_norm": 0.6357136368751526, "learning_rate": 1.8557248727697068e-05, "loss": 1.5228, "step": 783 }, { "epoch": 1.4451612903225808, "grad_norm": 0.608016312122345, "learning_rate": 1.852594896462987e-05, "loss": 1.4766, "step": 784 }, { "epoch": 1.4470046082949308, "grad_norm": 0.6226021647453308, "learning_rate": 1.849463294282035e-05, "loss": 1.464, "step": 785 }, { "epoch": 1.448847926267281, "grad_norm": 0.6161075234413147, "learning_rate": 1.8463300806671936e-05, "loss": 1.4354, "step": 786 }, { "epoch": 1.4506912442396314, "grad_norm": 0.6273759603500366, "learning_rate": 1.8431952700662375e-05, "loss": 1.5135, "step": 787 }, { "epoch": 1.4525345622119816, "grad_norm": 0.6346685886383057, "learning_rate": 1.840058876934303e-05, "loss": 1.5269, "step": 788 }, { "epoch": 1.4543778801843317, "grad_norm": 0.613421618938446, "learning_rate": 1.8369209157338262e-05, "loss": 1.3808, "step": 789 }, { "epoch": 1.456221198156682, "grad_norm": 0.6439058184623718, "learning_rate": 1.8337814009344716e-05, "loss": 1.4847, "step": 790 }, { "epoch": 1.4580645161290322, "grad_norm": 0.6256642937660217, "learning_rate": 1.83064034701307e-05, "loss": 1.4693, "step": 791 }, { "epoch": 1.4599078341013825, "grad_norm": 0.6350804567337036, "learning_rate": 1.8274977684535478e-05, "loss": 1.4718, "step": 792 }, { "epoch": 1.4617511520737327, "grad_norm": 0.6155893206596375, "learning_rate": 1.824353679746861e-05, "loss": 1.4316, "step": 793 }, { "epoch": 1.463594470046083, "grad_norm": 0.6298454403877258, "learning_rate": 1.821208095390931e-05, "loss": 1.4731, "step": 794 }, { "epoch": 1.4654377880184333, "grad_norm": 0.6269511580467224, "learning_rate": 1.8180610298905758e-05, "loss": 1.5299, "step": 795 }, { "epoch": 1.4672811059907835, "grad_norm": 0.6153545379638672, "learning_rate": 1.8149124977574417e-05, "loss": 1.4951, "step": 796 }, { "epoch": 1.4691244239631336, "grad_norm": 0.6348549127578735, "learning_rate": 1.8117625135099386e-05, "loss": 1.5588, "step": 797 }, { "epoch": 1.4709677419354839, "grad_norm": 0.6142151355743408, "learning_rate": 1.8086110916731724e-05, "loss": 1.4915, "step": 798 }, { "epoch": 1.4728110599078341, "grad_norm": 0.613411009311676, "learning_rate": 1.805458246778878e-05, "loss": 1.4664, "step": 799 }, { "epoch": 1.4746543778801844, "grad_norm": 0.6193826794624329, "learning_rate": 1.802303993365353e-05, "loss": 1.4478, "step": 800 }, { "epoch": 1.4764976958525344, "grad_norm": 0.6402882933616638, "learning_rate": 1.7991483459773887e-05, "loss": 1.5715, "step": 801 }, { "epoch": 1.4783410138248847, "grad_norm": 0.6316819787025452, "learning_rate": 1.795991319166204e-05, "loss": 1.5089, "step": 802 }, { "epoch": 1.480184331797235, "grad_norm": 0.6205444931983948, "learning_rate": 1.79283292748938e-05, "loss": 1.4812, "step": 803 }, { "epoch": 1.4820276497695852, "grad_norm": 0.6215456128120422, "learning_rate": 1.7896731855107908e-05, "loss": 1.4817, "step": 804 }, { "epoch": 1.4838709677419355, "grad_norm": 0.6182077527046204, "learning_rate": 1.7865121078005365e-05, "loss": 1.4729, "step": 805 }, { "epoch": 1.4857142857142858, "grad_norm": 0.6173567771911621, "learning_rate": 1.7833497089348772e-05, "loss": 1.5043, "step": 806 }, { "epoch": 1.487557603686636, "grad_norm": 0.6046410202980042, "learning_rate": 1.780186003496164e-05, "loss": 1.4948, "step": 807 }, { "epoch": 1.4894009216589863, "grad_norm": 0.6256017088890076, "learning_rate": 1.7770210060727748e-05, "loss": 1.4626, "step": 808 }, { "epoch": 1.4912442396313363, "grad_norm": 0.6358310580253601, "learning_rate": 1.7738547312590426e-05, "loss": 1.4998, "step": 809 }, { "epoch": 1.4930875576036866, "grad_norm": 0.6277178525924683, "learning_rate": 1.770687193655192e-05, "loss": 1.5039, "step": 810 }, { "epoch": 1.4949308755760369, "grad_norm": 0.6221585273742676, "learning_rate": 1.7675184078672714e-05, "loss": 1.4923, "step": 811 }, { "epoch": 1.4967741935483871, "grad_norm": 0.6160699129104614, "learning_rate": 1.7643483885070827e-05, "loss": 1.491, "step": 812 }, { "epoch": 1.4986175115207372, "grad_norm": 0.6212751269340515, "learning_rate": 1.7611771501921174e-05, "loss": 1.5417, "step": 813 }, { "epoch": 1.5004608294930875, "grad_norm": 0.633661687374115, "learning_rate": 1.7580047075454877e-05, "loss": 1.5419, "step": 814 }, { "epoch": 1.5023041474654377, "grad_norm": 0.6082422137260437, "learning_rate": 1.7548310751958588e-05, "loss": 1.4325, "step": 815 }, { "epoch": 1.504147465437788, "grad_norm": 0.6264359354972839, "learning_rate": 1.751656267777382e-05, "loss": 1.4923, "step": 816 }, { "epoch": 1.5059907834101383, "grad_norm": 0.6744566559791565, "learning_rate": 1.748480299929627e-05, "loss": 1.5226, "step": 817 }, { "epoch": 1.5078341013824885, "grad_norm": 0.6213410496711731, "learning_rate": 1.7453031862975146e-05, "loss": 1.4748, "step": 818 }, { "epoch": 1.5096774193548388, "grad_norm": 0.6109259724617004, "learning_rate": 1.742124941531249e-05, "loss": 1.4755, "step": 819 }, { "epoch": 1.511520737327189, "grad_norm": 0.6165345311164856, "learning_rate": 1.73894558028625e-05, "loss": 1.4913, "step": 820 }, { "epoch": 1.5133640552995393, "grad_norm": 0.6217798590660095, "learning_rate": 1.7357651172230852e-05, "loss": 1.4408, "step": 821 }, { "epoch": 1.5152073732718894, "grad_norm": 0.6267424821853638, "learning_rate": 1.7325835670074044e-05, "loss": 1.5308, "step": 822 }, { "epoch": 1.5170506912442396, "grad_norm": 0.6253458261489868, "learning_rate": 1.729400944309869e-05, "loss": 1.4383, "step": 823 }, { "epoch": 1.51889400921659, "grad_norm": 0.624306321144104, "learning_rate": 1.7262172638060865e-05, "loss": 1.508, "step": 824 }, { "epoch": 1.52073732718894, "grad_norm": 0.6324599981307983, "learning_rate": 1.7230325401765415e-05, "loss": 1.4796, "step": 825 }, { "epoch": 1.5225806451612902, "grad_norm": 0.6594914197921753, "learning_rate": 1.7198467881065292e-05, "loss": 1.4957, "step": 826 }, { "epoch": 1.5244239631336405, "grad_norm": 0.6249353289604187, "learning_rate": 1.7166600222860876e-05, "loss": 1.5246, "step": 827 }, { "epoch": 1.5262672811059907, "grad_norm": 0.6245707869529724, "learning_rate": 1.713472257409928e-05, "loss": 1.4636, "step": 828 }, { "epoch": 1.528110599078341, "grad_norm": 0.6260251402854919, "learning_rate": 1.7102835081773686e-05, "loss": 1.5292, "step": 829 }, { "epoch": 1.5299539170506913, "grad_norm": 0.6254730820655823, "learning_rate": 1.707093789292269e-05, "loss": 1.5406, "step": 830 }, { "epoch": 1.5317972350230415, "grad_norm": 0.613824725151062, "learning_rate": 1.7039031154629567e-05, "loss": 1.4521, "step": 831 }, { "epoch": 1.5336405529953918, "grad_norm": 0.6051052212715149, "learning_rate": 1.700711501402164e-05, "loss": 1.4265, "step": 832 }, { "epoch": 1.535483870967742, "grad_norm": 0.6232450604438782, "learning_rate": 1.6975189618269592e-05, "loss": 1.4913, "step": 833 }, { "epoch": 1.5373271889400921, "grad_norm": 0.635735273361206, "learning_rate": 1.6943255114586788e-05, "loss": 1.4582, "step": 834 }, { "epoch": 1.5391705069124424, "grad_norm": 0.6376718282699585, "learning_rate": 1.6911311650228574e-05, "loss": 1.4683, "step": 835 }, { "epoch": 1.5410138248847927, "grad_norm": 0.6305147409439087, "learning_rate": 1.687935937249163e-05, "loss": 1.4772, "step": 836 }, { "epoch": 1.5428571428571427, "grad_norm": 0.6170597672462463, "learning_rate": 1.6847398428713256e-05, "loss": 1.4469, "step": 837 }, { "epoch": 1.544700460829493, "grad_norm": 0.6421403884887695, "learning_rate": 1.681542896627075e-05, "loss": 1.5286, "step": 838 }, { "epoch": 1.5465437788018432, "grad_norm": 0.6419578790664673, "learning_rate": 1.678345113258065e-05, "loss": 1.5065, "step": 839 }, { "epoch": 1.5483870967741935, "grad_norm": 0.6169295310974121, "learning_rate": 1.6751465075098115e-05, "loss": 1.4265, "step": 840 }, { "epoch": 1.5502304147465438, "grad_norm": 0.6323404312133789, "learning_rate": 1.6719470941316228e-05, "loss": 1.4971, "step": 841 }, { "epoch": 1.552073732718894, "grad_norm": 0.6140077710151672, "learning_rate": 1.668746887876531e-05, "loss": 1.4799, "step": 842 }, { "epoch": 1.5539170506912443, "grad_norm": 0.612179160118103, "learning_rate": 1.6655459035012237e-05, "loss": 1.4701, "step": 843 }, { "epoch": 1.5557603686635946, "grad_norm": 0.6366963982582092, "learning_rate": 1.662344155765977e-05, "loss": 1.4665, "step": 844 }, { "epoch": 1.5576036866359448, "grad_norm": 0.6345764994621277, "learning_rate": 1.659141659434587e-05, "loss": 1.5177, "step": 845 }, { "epoch": 1.5594470046082949, "grad_norm": 0.6261819005012512, "learning_rate": 1.655938429274302e-05, "loss": 1.4981, "step": 846 }, { "epoch": 1.5612903225806452, "grad_norm": 0.6166211366653442, "learning_rate": 1.6527344800557534e-05, "loss": 1.4655, "step": 847 }, { "epoch": 1.5631336405529954, "grad_norm": 0.613940954208374, "learning_rate": 1.6495298265528883e-05, "loss": 1.5047, "step": 848 }, { "epoch": 1.5649769585253455, "grad_norm": 0.6288052201271057, "learning_rate": 1.646324483542902e-05, "loss": 1.545, "step": 849 }, { "epoch": 1.5668202764976957, "grad_norm": 0.6278014779090881, "learning_rate": 1.64311846580617e-05, "loss": 1.5277, "step": 850 }, { "epoch": 1.568663594470046, "grad_norm": 0.6235432624816895, "learning_rate": 1.639911788126177e-05, "loss": 1.5563, "step": 851 }, { "epoch": 1.5705069124423963, "grad_norm": 0.6206692457199097, "learning_rate": 1.6367044652894515e-05, "loss": 1.4505, "step": 852 }, { "epoch": 1.5723502304147465, "grad_norm": 0.6300826072692871, "learning_rate": 1.6334965120854986e-05, "loss": 1.5347, "step": 853 }, { "epoch": 1.5741935483870968, "grad_norm": 0.6272528767585754, "learning_rate": 1.6302879433067274e-05, "loss": 1.4906, "step": 854 }, { "epoch": 1.576036866359447, "grad_norm": 0.6228590607643127, "learning_rate": 1.6270787737483877e-05, "loss": 1.5235, "step": 855 }, { "epoch": 1.5778801843317973, "grad_norm": 0.6355240941047668, "learning_rate": 1.623869018208499e-05, "loss": 1.4198, "step": 856 }, { "epoch": 1.5797235023041476, "grad_norm": 0.6255508065223694, "learning_rate": 1.6206586914877816e-05, "loss": 1.4651, "step": 857 }, { "epoch": 1.5815668202764976, "grad_norm": 0.6284841895103455, "learning_rate": 1.6174478083895922e-05, "loss": 1.4759, "step": 858 }, { "epoch": 1.583410138248848, "grad_norm": 0.6363735795021057, "learning_rate": 1.6142363837198504e-05, "loss": 1.5598, "step": 859 }, { "epoch": 1.5852534562211982, "grad_norm": 0.6262885928153992, "learning_rate": 1.6110244322869746e-05, "loss": 1.4991, "step": 860 }, { "epoch": 1.5870967741935482, "grad_norm": 0.6338739991188049, "learning_rate": 1.607811968901812e-05, "loss": 1.4834, "step": 861 }, { "epoch": 1.5889400921658985, "grad_norm": 0.6278196573257446, "learning_rate": 1.6045990083775703e-05, "loss": 1.5123, "step": 862 }, { "epoch": 1.5907834101382488, "grad_norm": 0.6288981437683105, "learning_rate": 1.6013855655297498e-05, "loss": 1.4883, "step": 863 }, { "epoch": 1.592626728110599, "grad_norm": 0.6212314367294312, "learning_rate": 1.5981716551760735e-05, "loss": 1.4122, "step": 864 }, { "epoch": 1.5944700460829493, "grad_norm": 0.6280766725540161, "learning_rate": 1.5949572921364226e-05, "loss": 1.4591, "step": 865 }, { "epoch": 1.5963133640552996, "grad_norm": 0.6237391829490662, "learning_rate": 1.5917424912327644e-05, "loss": 1.5029, "step": 866 }, { "epoch": 1.5981566820276498, "grad_norm": 0.6374077200889587, "learning_rate": 1.5885272672890842e-05, "loss": 1.5324, "step": 867 }, { "epoch": 1.6, "grad_norm": 0.6368247866630554, "learning_rate": 1.58531163513132e-05, "loss": 1.491, "step": 868 }, { "epoch": 1.6018433179723504, "grad_norm": 0.6101037859916687, "learning_rate": 1.5820956095872914e-05, "loss": 1.494, "step": 869 }, { "epoch": 1.6036866359447006, "grad_norm": 0.6246112585067749, "learning_rate": 1.5788792054866314e-05, "loss": 1.4569, "step": 870 }, { "epoch": 1.6055299539170507, "grad_norm": 0.6085765957832336, "learning_rate": 1.5756624376607193e-05, "loss": 1.4414, "step": 871 }, { "epoch": 1.607373271889401, "grad_norm": 0.6196484565734863, "learning_rate": 1.5724453209426108e-05, "loss": 1.4684, "step": 872 }, { "epoch": 1.6092165898617512, "grad_norm": 0.6180390119552612, "learning_rate": 1.5692278701669712e-05, "loss": 1.4834, "step": 873 }, { "epoch": 1.6110599078341012, "grad_norm": 0.6186936497688293, "learning_rate": 1.566010100170007e-05, "loss": 1.4631, "step": 874 }, { "epoch": 1.6129032258064515, "grad_norm": 0.624941349029541, "learning_rate": 1.5627920257893934e-05, "loss": 1.4867, "step": 875 }, { "epoch": 1.6147465437788018, "grad_norm": 0.6469988226890564, "learning_rate": 1.5595736618642126e-05, "loss": 1.5073, "step": 876 }, { "epoch": 1.616589861751152, "grad_norm": 0.6222912073135376, "learning_rate": 1.5563550232348813e-05, "loss": 1.4371, "step": 877 }, { "epoch": 1.6184331797235023, "grad_norm": 0.6234318614006042, "learning_rate": 1.553136124743081e-05, "loss": 1.4835, "step": 878 }, { "epoch": 1.6202764976958526, "grad_norm": 0.6003885865211487, "learning_rate": 1.5499169812316937e-05, "loss": 1.4223, "step": 879 }, { "epoch": 1.6221198156682028, "grad_norm": 0.6100941300392151, "learning_rate": 1.5466976075447295e-05, "loss": 1.462, "step": 880 }, { "epoch": 1.6239631336405531, "grad_norm": 0.6085091829299927, "learning_rate": 1.5434780185272616e-05, "loss": 1.4026, "step": 881 }, { "epoch": 1.6258064516129034, "grad_norm": 0.6302891373634338, "learning_rate": 1.5402582290253547e-05, "loss": 1.4636, "step": 882 }, { "epoch": 1.6276497695852534, "grad_norm": 0.6259814500808716, "learning_rate": 1.537038253885998e-05, "loss": 1.4606, "step": 883 }, { "epoch": 1.6294930875576037, "grad_norm": 0.632914125919342, "learning_rate": 1.533818107957038e-05, "loss": 1.4638, "step": 884 }, { "epoch": 1.631336405529954, "grad_norm": 0.6297101974487305, "learning_rate": 1.5305978060871083e-05, "loss": 1.518, "step": 885 }, { "epoch": 1.633179723502304, "grad_norm": 0.6185283064842224, "learning_rate": 1.5273773631255602e-05, "loss": 1.5152, "step": 886 }, { "epoch": 1.6350230414746543, "grad_norm": 0.6100816130638123, "learning_rate": 1.524156793922396e-05, "loss": 1.428, "step": 887 }, { "epoch": 1.6368663594470045, "grad_norm": 0.6179800629615784, "learning_rate": 1.5209361133282022e-05, "loss": 1.4588, "step": 888 }, { "epoch": 1.6387096774193548, "grad_norm": 0.6542914509773254, "learning_rate": 1.517715336194077e-05, "loss": 1.5733, "step": 889 }, { "epoch": 1.640552995391705, "grad_norm": 0.6015356779098511, "learning_rate": 1.5144944773715635e-05, "loss": 1.3893, "step": 890 }, { "epoch": 1.6423963133640553, "grad_norm": 0.6164160966873169, "learning_rate": 1.511273551712583e-05, "loss": 1.4802, "step": 891 }, { "epoch": 1.6442396313364056, "grad_norm": 0.6255238652229309, "learning_rate": 1.5080525740693635e-05, "loss": 1.4327, "step": 892 }, { "epoch": 1.6460829493087559, "grad_norm": 0.6259914636611938, "learning_rate": 1.5048315592943743e-05, "loss": 1.5048, "step": 893 }, { "epoch": 1.6479262672811061, "grad_norm": 0.613569438457489, "learning_rate": 1.5016105222402546e-05, "loss": 1.4628, "step": 894 }, { "epoch": 1.6497695852534562, "grad_norm": 0.6390876173973083, "learning_rate": 1.4983894777597461e-05, "loss": 1.542, "step": 895 }, { "epoch": 1.6516129032258065, "grad_norm": 0.6205943822860718, "learning_rate": 1.495168440705626e-05, "loss": 1.4237, "step": 896 }, { "epoch": 1.6534562211981567, "grad_norm": 0.6205142736434937, "learning_rate": 1.4919474259306362e-05, "loss": 1.4386, "step": 897 }, { "epoch": 1.6552995391705068, "grad_norm": 0.6293962597846985, "learning_rate": 1.4887264482874173e-05, "loss": 1.4495, "step": 898 }, { "epoch": 1.657142857142857, "grad_norm": 0.6244837045669556, "learning_rate": 1.4855055226284367e-05, "loss": 1.4597, "step": 899 }, { "epoch": 1.6589861751152073, "grad_norm": 0.6118645071983337, "learning_rate": 1.4822846638059234e-05, "loss": 1.4753, "step": 900 }, { "epoch": 1.6608294930875576, "grad_norm": 0.6228466629981995, "learning_rate": 1.4790638866717984e-05, "loss": 1.5137, "step": 901 }, { "epoch": 1.6626728110599078, "grad_norm": 0.6241260170936584, "learning_rate": 1.4758432060776044e-05, "loss": 1.4837, "step": 902 }, { "epoch": 1.664516129032258, "grad_norm": 0.6222780346870422, "learning_rate": 1.4726226368744404e-05, "loss": 1.4705, "step": 903 }, { "epoch": 1.6663594470046084, "grad_norm": 0.6234682202339172, "learning_rate": 1.4694021939128925e-05, "loss": 1.4755, "step": 904 }, { "epoch": 1.6682027649769586, "grad_norm": 0.6378889083862305, "learning_rate": 1.466181892042962e-05, "loss": 1.4987, "step": 905 }, { "epoch": 1.670046082949309, "grad_norm": 0.6307604312896729, "learning_rate": 1.462961746114002e-05, "loss": 1.5048, "step": 906 }, { "epoch": 1.671889400921659, "grad_norm": 0.628210186958313, "learning_rate": 1.4597417709746454e-05, "loss": 1.5087, "step": 907 }, { "epoch": 1.6737327188940092, "grad_norm": 0.6338211297988892, "learning_rate": 1.4565219814727388e-05, "loss": 1.4763, "step": 908 }, { "epoch": 1.6755760368663595, "grad_norm": 0.6072210669517517, "learning_rate": 1.4533023924552706e-05, "loss": 1.4619, "step": 909 }, { "epoch": 1.6774193548387095, "grad_norm": 0.6245105862617493, "learning_rate": 1.4500830187683066e-05, "loss": 1.4852, "step": 910 }, { "epoch": 1.6792626728110598, "grad_norm": 0.6249793767929077, "learning_rate": 1.4468638752569193e-05, "loss": 1.5034, "step": 911 }, { "epoch": 1.68110599078341, "grad_norm": 0.609481692314148, "learning_rate": 1.4436449767651191e-05, "loss": 1.4575, "step": 912 }, { "epoch": 1.6829493087557603, "grad_norm": 0.6104646921157837, "learning_rate": 1.4404263381357873e-05, "loss": 1.4809, "step": 913 }, { "epoch": 1.6847926267281106, "grad_norm": 0.6226664185523987, "learning_rate": 1.437207974210607e-05, "loss": 1.4704, "step": 914 }, { "epoch": 1.6866359447004609, "grad_norm": 0.6216410994529724, "learning_rate": 1.4339898998299936e-05, "loss": 1.4869, "step": 915 }, { "epoch": 1.6884792626728111, "grad_norm": 0.6308003664016724, "learning_rate": 1.4307721298330284e-05, "loss": 1.485, "step": 916 }, { "epoch": 1.6903225806451614, "grad_norm": 0.6217168569564819, "learning_rate": 1.4275546790573895e-05, "loss": 1.457, "step": 917 }, { "epoch": 1.6921658986175117, "grad_norm": 0.6104401350021362, "learning_rate": 1.4243375623392808e-05, "loss": 1.4455, "step": 918 }, { "epoch": 1.6940092165898617, "grad_norm": 0.6185252070426941, "learning_rate": 1.4211207945133685e-05, "loss": 1.4859, "step": 919 }, { "epoch": 1.695852534562212, "grad_norm": 0.6091212034225464, "learning_rate": 1.417904390412709e-05, "loss": 1.4455, "step": 920 }, { "epoch": 1.6976958525345622, "grad_norm": 0.6229434609413147, "learning_rate": 1.41468836486868e-05, "loss": 1.4195, "step": 921 }, { "epoch": 1.6995391705069123, "grad_norm": 0.6208975315093994, "learning_rate": 1.411472732710916e-05, "loss": 1.471, "step": 922 }, { "epoch": 1.7013824884792625, "grad_norm": 0.6302065253257751, "learning_rate": 1.4082575087672363e-05, "loss": 1.505, "step": 923 }, { "epoch": 1.7032258064516128, "grad_norm": 0.6388840675354004, "learning_rate": 1.4050427078635777e-05, "loss": 1.4838, "step": 924 }, { "epoch": 1.705069124423963, "grad_norm": 0.6101526021957397, "learning_rate": 1.4018283448239266e-05, "loss": 1.4843, "step": 925 }, { "epoch": 1.7069124423963133, "grad_norm": 0.6074756979942322, "learning_rate": 1.398614434470251e-05, "loss": 1.4603, "step": 926 }, { "epoch": 1.7087557603686636, "grad_norm": 0.622736930847168, "learning_rate": 1.3954009916224299e-05, "loss": 1.4994, "step": 927 }, { "epoch": 1.7105990783410139, "grad_norm": 0.6283549070358276, "learning_rate": 1.3921880310981878e-05, "loss": 1.474, "step": 928 }, { "epoch": 1.7124423963133641, "grad_norm": 0.6322742700576782, "learning_rate": 1.3889755677130253e-05, "loss": 1.5237, "step": 929 }, { "epoch": 1.7142857142857144, "grad_norm": 0.6395312547683716, "learning_rate": 1.3857636162801499e-05, "loss": 1.5169, "step": 930 }, { "epoch": 1.7161290322580647, "grad_norm": 0.6248497366905212, "learning_rate": 1.3825521916104082e-05, "loss": 1.4201, "step": 931 }, { "epoch": 1.7179723502304147, "grad_norm": 0.6242885589599609, "learning_rate": 1.3793413085122183e-05, "loss": 1.5287, "step": 932 }, { "epoch": 1.719815668202765, "grad_norm": 0.6468275189399719, "learning_rate": 1.3761309817915017e-05, "loss": 1.5145, "step": 933 }, { "epoch": 1.7216589861751153, "grad_norm": 0.6262132525444031, "learning_rate": 1.3729212262516124e-05, "loss": 1.5009, "step": 934 }, { "epoch": 1.7235023041474653, "grad_norm": 0.617623507976532, "learning_rate": 1.3697120566932727e-05, "loss": 1.4528, "step": 935 }, { "epoch": 1.7253456221198156, "grad_norm": 0.6099165081977844, "learning_rate": 1.3665034879145022e-05, "loss": 1.5117, "step": 936 }, { "epoch": 1.7271889400921658, "grad_norm": 0.6353684067726135, "learning_rate": 1.3632955347105487e-05, "loss": 1.5144, "step": 937 }, { "epoch": 1.729032258064516, "grad_norm": 0.6231996417045593, "learning_rate": 1.3600882118738232e-05, "loss": 1.5099, "step": 938 }, { "epoch": 1.7308755760368664, "grad_norm": 0.6236693859100342, "learning_rate": 1.3568815341938303e-05, "loss": 1.5045, "step": 939 }, { "epoch": 1.7327188940092166, "grad_norm": 0.6413432955741882, "learning_rate": 1.3536755164570977e-05, "loss": 1.5213, "step": 940 }, { "epoch": 1.734562211981567, "grad_norm": 0.6272238492965698, "learning_rate": 1.3504701734471117e-05, "loss": 1.4846, "step": 941 }, { "epoch": 1.7364055299539172, "grad_norm": 0.6341332793235779, "learning_rate": 1.3472655199442473e-05, "loss": 1.4832, "step": 942 }, { "epoch": 1.7382488479262674, "grad_norm": 0.6423808336257935, "learning_rate": 1.3440615707256984e-05, "loss": 1.4169, "step": 943 }, { "epoch": 1.7400921658986175, "grad_norm": 0.615389883518219, "learning_rate": 1.340858340565413e-05, "loss": 1.4557, "step": 944 }, { "epoch": 1.7419354838709677, "grad_norm": 0.6154221892356873, "learning_rate": 1.3376558442340233e-05, "loss": 1.4476, "step": 945 }, { "epoch": 1.743778801843318, "grad_norm": 0.6158937811851501, "learning_rate": 1.3344540964987766e-05, "loss": 1.4641, "step": 946 }, { "epoch": 1.745622119815668, "grad_norm": 0.639891505241394, "learning_rate": 1.331253112123469e-05, "loss": 1.5348, "step": 947 }, { "epoch": 1.7474654377880183, "grad_norm": 0.6212478876113892, "learning_rate": 1.3280529058683778e-05, "loss": 1.4653, "step": 948 }, { "epoch": 1.7493087557603686, "grad_norm": 0.6336373090744019, "learning_rate": 1.3248534924901887e-05, "loss": 1.4577, "step": 949 }, { "epoch": 1.7511520737327189, "grad_norm": 0.6200190782546997, "learning_rate": 1.3216548867419352e-05, "loss": 1.4874, "step": 950 }, { "epoch": 1.7529953917050691, "grad_norm": 0.6302803754806519, "learning_rate": 1.3184571033729253e-05, "loss": 1.4397, "step": 951 }, { "epoch": 1.7548387096774194, "grad_norm": 0.6245046257972717, "learning_rate": 1.3152601571286746e-05, "loss": 1.4626, "step": 952 }, { "epoch": 1.7566820276497697, "grad_norm": 0.6301631927490234, "learning_rate": 1.3120640627508376e-05, "loss": 1.4651, "step": 953 }, { "epoch": 1.75852534562212, "grad_norm": 0.6286301016807556, "learning_rate": 1.3088688349771425e-05, "loss": 1.5374, "step": 954 }, { "epoch": 1.7603686635944702, "grad_norm": 0.6357449889183044, "learning_rate": 1.3056744885413216e-05, "loss": 1.4855, "step": 955 }, { "epoch": 1.7622119815668202, "grad_norm": 0.6467391848564148, "learning_rate": 1.3024810381730409e-05, "loss": 1.5024, "step": 956 }, { "epoch": 1.7640552995391705, "grad_norm": 0.6342014074325562, "learning_rate": 1.2992884985978363e-05, "loss": 1.536, "step": 957 }, { "epoch": 1.7658986175115208, "grad_norm": 0.6294746398925781, "learning_rate": 1.2960968845370443e-05, "loss": 1.4541, "step": 958 }, { "epoch": 1.7677419354838708, "grad_norm": 0.6208163499832153, "learning_rate": 1.2929062107077315e-05, "loss": 1.431, "step": 959 }, { "epoch": 1.769585253456221, "grad_norm": 0.6172651052474976, "learning_rate": 1.2897164918226311e-05, "loss": 1.4431, "step": 960 }, { "epoch": 1.7714285714285714, "grad_norm": 0.6244322061538696, "learning_rate": 1.2865277425900725e-05, "loss": 1.4716, "step": 961 }, { "epoch": 1.7732718894009216, "grad_norm": 0.6172502040863037, "learning_rate": 1.2833399777139128e-05, "loss": 1.3914, "step": 962 }, { "epoch": 1.7751152073732719, "grad_norm": 0.6318785548210144, "learning_rate": 1.2801532118934708e-05, "loss": 1.4583, "step": 963 }, { "epoch": 1.7769585253456222, "grad_norm": 0.6199814677238464, "learning_rate": 1.276967459823459e-05, "loss": 1.457, "step": 964 }, { "epoch": 1.7788018433179724, "grad_norm": 0.6255317330360413, "learning_rate": 1.273782736193914e-05, "loss": 1.548, "step": 965 }, { "epoch": 1.7806451612903227, "grad_norm": 0.6222008466720581, "learning_rate": 1.2705990556901311e-05, "loss": 1.4477, "step": 966 }, { "epoch": 1.782488479262673, "grad_norm": 0.6105899810791016, "learning_rate": 1.2674164329925961e-05, "loss": 1.4841, "step": 967 }, { "epoch": 1.784331797235023, "grad_norm": 0.6214556097984314, "learning_rate": 1.2642348827769152e-05, "loss": 1.4731, "step": 968 }, { "epoch": 1.7861751152073733, "grad_norm": 0.6140370965003967, "learning_rate": 1.2610544197137502e-05, "loss": 1.443, "step": 969 }, { "epoch": 1.7880184331797235, "grad_norm": 0.6286352276802063, "learning_rate": 1.257875058468751e-05, "loss": 1.5135, "step": 970 }, { "epoch": 1.7898617511520736, "grad_norm": 0.6316712498664856, "learning_rate": 1.2546968137024856e-05, "loss": 1.5117, "step": 971 }, { "epoch": 1.7917050691244238, "grad_norm": 0.625602126121521, "learning_rate": 1.251519700070373e-05, "loss": 1.4391, "step": 972 }, { "epoch": 1.793548387096774, "grad_norm": 0.6154050827026367, "learning_rate": 1.2483437322226178e-05, "loss": 1.3968, "step": 973 }, { "epoch": 1.7953917050691244, "grad_norm": 0.618768036365509, "learning_rate": 1.2451689248041416e-05, "loss": 1.4865, "step": 974 }, { "epoch": 1.7972350230414746, "grad_norm": 0.6276272535324097, "learning_rate": 1.2419952924545125e-05, "loss": 1.4495, "step": 975 }, { "epoch": 1.799078341013825, "grad_norm": 0.6190533638000488, "learning_rate": 1.2388228498078827e-05, "loss": 1.5269, "step": 976 }, { "epoch": 1.8009216589861752, "grad_norm": 0.6257842779159546, "learning_rate": 1.2356516114929176e-05, "loss": 1.4942, "step": 977 }, { "epoch": 1.8027649769585254, "grad_norm": 0.6364653706550598, "learning_rate": 1.2324815921327288e-05, "loss": 1.4926, "step": 978 }, { "epoch": 1.8046082949308757, "grad_norm": 0.6261301040649414, "learning_rate": 1.2293128063448078e-05, "loss": 1.4694, "step": 979 }, { "epoch": 1.8064516129032258, "grad_norm": 0.6205568313598633, "learning_rate": 1.2261452687409576e-05, "loss": 1.4945, "step": 980 }, { "epoch": 1.808294930875576, "grad_norm": 0.6079727411270142, "learning_rate": 1.2229789939272253e-05, "loss": 1.4428, "step": 981 }, { "epoch": 1.8101382488479263, "grad_norm": 0.6270850896835327, "learning_rate": 1.2198139965038356e-05, "loss": 1.5193, "step": 982 }, { "epoch": 1.8119815668202763, "grad_norm": 0.6150006651878357, "learning_rate": 1.2166502910651232e-05, "loss": 1.4954, "step": 983 }, { "epoch": 1.8138248847926266, "grad_norm": 0.6237301826477051, "learning_rate": 1.2134878921994634e-05, "loss": 1.4647, "step": 984 }, { "epoch": 1.8156682027649769, "grad_norm": 0.6088722348213196, "learning_rate": 1.210326814489209e-05, "loss": 1.467, "step": 985 }, { "epoch": 1.8175115207373271, "grad_norm": 0.6163899898529053, "learning_rate": 1.2071670725106203e-05, "loss": 1.4266, "step": 986 }, { "epoch": 1.8193548387096774, "grad_norm": 0.6351820826530457, "learning_rate": 1.2040086808337965e-05, "loss": 1.49, "step": 987 }, { "epoch": 1.8211981566820277, "grad_norm": 0.6266679167747498, "learning_rate": 1.2008516540226115e-05, "loss": 1.4794, "step": 988 }, { "epoch": 1.823041474654378, "grad_norm": 0.6242062449455261, "learning_rate": 1.1976960066346474e-05, "loss": 1.4369, "step": 989 }, { "epoch": 1.8248847926267282, "grad_norm": 0.615892767906189, "learning_rate": 1.194541753221122e-05, "loss": 1.4711, "step": 990 }, { "epoch": 1.8267281105990785, "grad_norm": 0.6082549691200256, "learning_rate": 1.1913889083268278e-05, "loss": 1.4088, "step": 991 }, { "epoch": 1.8285714285714287, "grad_norm": 0.6081535220146179, "learning_rate": 1.1882374864900616e-05, "loss": 1.4086, "step": 992 }, { "epoch": 1.8304147465437788, "grad_norm": 0.6410644054412842, "learning_rate": 1.1850875022425587e-05, "loss": 1.4633, "step": 993 }, { "epoch": 1.832258064516129, "grad_norm": 0.6226089000701904, "learning_rate": 1.1819389701094241e-05, "loss": 1.4892, "step": 994 }, { "epoch": 1.8341013824884793, "grad_norm": 0.6127923130989075, "learning_rate": 1.1787919046090686e-05, "loss": 1.383, "step": 995 }, { "epoch": 1.8359447004608294, "grad_norm": 0.6225177049636841, "learning_rate": 1.1756463202531392e-05, "loss": 1.5134, "step": 996 }, { "epoch": 1.8377880184331796, "grad_norm": 0.6206548810005188, "learning_rate": 1.1725022315464528e-05, "loss": 1.443, "step": 997 }, { "epoch": 1.83963133640553, "grad_norm": 0.6356161236763, "learning_rate": 1.16935965298693e-05, "loss": 1.5307, "step": 998 }, { "epoch": 1.8414746543778802, "grad_norm": 0.6350612044334412, "learning_rate": 1.1662185990655285e-05, "loss": 1.4637, "step": 999 }, { "epoch": 1.8433179723502304, "grad_norm": 0.6142536401748657, "learning_rate": 1.1630790842661742e-05, "loss": 1.4541, "step": 1000 }, { "epoch": 1.8451612903225807, "grad_norm": 0.6386668086051941, "learning_rate": 1.1599411230656971e-05, "loss": 1.5009, "step": 1001 }, { "epoch": 1.847004608294931, "grad_norm": 0.6296034455299377, "learning_rate": 1.1568047299337632e-05, "loss": 1.4722, "step": 1002 }, { "epoch": 1.8488479262672812, "grad_norm": 0.624134361743927, "learning_rate": 1.1536699193328063e-05, "loss": 1.462, "step": 1003 }, { "epoch": 1.8506912442396315, "grad_norm": 0.6345768570899963, "learning_rate": 1.1505367057179648e-05, "loss": 1.4993, "step": 1004 }, { "epoch": 1.8525345622119815, "grad_norm": 0.6273616552352905, "learning_rate": 1.1474051035370133e-05, "loss": 1.4449, "step": 1005 }, { "epoch": 1.8543778801843318, "grad_norm": 0.6293226480484009, "learning_rate": 1.1442751272302933e-05, "loss": 1.4565, "step": 1006 }, { "epoch": 1.856221198156682, "grad_norm": 0.6160385012626648, "learning_rate": 1.1411467912306518e-05, "loss": 1.4354, "step": 1007 }, { "epoch": 1.8580645161290321, "grad_norm": 0.6164947748184204, "learning_rate": 1.138020109963371e-05, "loss": 1.4292, "step": 1008 }, { "epoch": 1.8599078341013824, "grad_norm": 0.6375648379325867, "learning_rate": 1.1348950978461023e-05, "loss": 1.511, "step": 1009 }, { "epoch": 1.8617511520737327, "grad_norm": 0.6110820770263672, "learning_rate": 1.1317717692888014e-05, "loss": 1.4858, "step": 1010 }, { "epoch": 1.863594470046083, "grad_norm": 0.6256279945373535, "learning_rate": 1.1286501386936606e-05, "loss": 1.4766, "step": 1011 }, { "epoch": 1.8654377880184332, "grad_norm": 0.6110477447509766, "learning_rate": 1.1255302204550414e-05, "loss": 1.4321, "step": 1012 }, { "epoch": 1.8672811059907835, "grad_norm": 0.6220497488975525, "learning_rate": 1.1224120289594111e-05, "loss": 1.4841, "step": 1013 }, { "epoch": 1.8691244239631337, "grad_norm": 0.6274992227554321, "learning_rate": 1.119295578585274e-05, "loss": 1.5051, "step": 1014 }, { "epoch": 1.870967741935484, "grad_norm": 0.6161903142929077, "learning_rate": 1.1161808837031056e-05, "loss": 1.401, "step": 1015 }, { "epoch": 1.8728110599078343, "grad_norm": 0.6302148699760437, "learning_rate": 1.1130679586752861e-05, "loss": 1.4638, "step": 1016 }, { "epoch": 1.8746543778801843, "grad_norm": 0.6273512244224548, "learning_rate": 1.1099568178560356e-05, "loss": 1.4947, "step": 1017 }, { "epoch": 1.8764976958525346, "grad_norm": 0.6425865888595581, "learning_rate": 1.1068474755913473e-05, "loss": 1.4952, "step": 1018 }, { "epoch": 1.8783410138248848, "grad_norm": 0.6212720274925232, "learning_rate": 1.1037399462189178e-05, "loss": 1.5055, "step": 1019 }, { "epoch": 1.8801843317972349, "grad_norm": 0.6158389449119568, "learning_rate": 1.1006342440680885e-05, "loss": 1.4511, "step": 1020 }, { "epoch": 1.8820276497695851, "grad_norm": 0.616641640663147, "learning_rate": 1.0975303834597734e-05, "loss": 1.4472, "step": 1021 }, { "epoch": 1.8838709677419354, "grad_norm": 0.6129568815231323, "learning_rate": 1.0944283787063929e-05, "loss": 1.4616, "step": 1022 }, { "epoch": 1.8857142857142857, "grad_norm": 0.6206817030906677, "learning_rate": 1.0913282441118123e-05, "loss": 1.5284, "step": 1023 }, { "epoch": 1.887557603686636, "grad_norm": 0.6341348886489868, "learning_rate": 1.0882299939712727e-05, "loss": 1.499, "step": 1024 }, { "epoch": 1.8894009216589862, "grad_norm": 0.6195560097694397, "learning_rate": 1.0851336425713248e-05, "loss": 1.4975, "step": 1025 }, { "epoch": 1.8912442396313365, "grad_norm": 0.6235442161560059, "learning_rate": 1.0820392041897647e-05, "loss": 1.4391, "step": 1026 }, { "epoch": 1.8930875576036867, "grad_norm": 0.6287155747413635, "learning_rate": 1.0789466930955672e-05, "loss": 1.5189, "step": 1027 }, { "epoch": 1.894930875576037, "grad_norm": 0.6090032458305359, "learning_rate": 1.0758561235488196e-05, "loss": 1.4664, "step": 1028 }, { "epoch": 1.896774193548387, "grad_norm": 0.6327146291732788, "learning_rate": 1.0727675098006568e-05, "loss": 1.4843, "step": 1029 }, { "epoch": 1.8986175115207373, "grad_norm": 0.6122310161590576, "learning_rate": 1.0696808660931953e-05, "loss": 1.4553, "step": 1030 }, { "epoch": 1.9004608294930876, "grad_norm": 0.6261757612228394, "learning_rate": 1.0665962066594666e-05, "loss": 1.4824, "step": 1031 }, { "epoch": 1.9023041474654376, "grad_norm": 0.6274355053901672, "learning_rate": 1.0635135457233533e-05, "loss": 1.4567, "step": 1032 }, { "epoch": 1.904147465437788, "grad_norm": 0.6279624104499817, "learning_rate": 1.0604328974995229e-05, "loss": 1.4678, "step": 1033 }, { "epoch": 1.9059907834101382, "grad_norm": 0.6198338866233826, "learning_rate": 1.0573542761933596e-05, "loss": 1.4485, "step": 1034 }, { "epoch": 1.9078341013824884, "grad_norm": 0.6208695769309998, "learning_rate": 1.054277696000904e-05, "loss": 1.4819, "step": 1035 }, { "epoch": 1.9096774193548387, "grad_norm": 0.6263963580131531, "learning_rate": 1.0512031711087835e-05, "loss": 1.5255, "step": 1036 }, { "epoch": 1.911520737327189, "grad_norm": 0.6205951571464539, "learning_rate": 1.048130715694149e-05, "loss": 1.521, "step": 1037 }, { "epoch": 1.9133640552995392, "grad_norm": 0.6268174052238464, "learning_rate": 1.0450603439246063e-05, "loss": 1.5401, "step": 1038 }, { "epoch": 1.9152073732718895, "grad_norm": 0.6209962964057922, "learning_rate": 1.0419920699581556e-05, "loss": 1.4511, "step": 1039 }, { "epoch": 1.9170506912442398, "grad_norm": 0.6165273785591125, "learning_rate": 1.038925907943124e-05, "loss": 1.3814, "step": 1040 }, { "epoch": 1.9188940092165898, "grad_norm": 0.6441971659660339, "learning_rate": 1.0358618720180975e-05, "loss": 1.5622, "step": 1041 }, { "epoch": 1.92073732718894, "grad_norm": 0.6228102445602417, "learning_rate": 1.0327999763118607e-05, "loss": 1.4774, "step": 1042 }, { "epoch": 1.9225806451612903, "grad_norm": 0.6098095774650574, "learning_rate": 1.0297402349433286e-05, "loss": 1.4682, "step": 1043 }, { "epoch": 1.9244239631336404, "grad_norm": 0.616716206073761, "learning_rate": 1.0266826620214813e-05, "loss": 1.4747, "step": 1044 }, { "epoch": 1.9262672811059907, "grad_norm": 0.7358875274658203, "learning_rate": 1.0236272716453012e-05, "loss": 1.4536, "step": 1045 }, { "epoch": 1.928110599078341, "grad_norm": 0.6337480545043945, "learning_rate": 1.0205740779037065e-05, "loss": 1.4961, "step": 1046 }, { "epoch": 1.9299539170506912, "grad_norm": 0.6171895265579224, "learning_rate": 1.0175230948754846e-05, "loss": 1.4639, "step": 1047 }, { "epoch": 1.9317972350230415, "grad_norm": 0.6095656752586365, "learning_rate": 1.014474336629231e-05, "loss": 1.4531, "step": 1048 }, { "epoch": 1.9336405529953917, "grad_norm": 0.6109644770622253, "learning_rate": 1.011427817223282e-05, "loss": 1.4413, "step": 1049 }, { "epoch": 1.935483870967742, "grad_norm": 0.63837069272995, "learning_rate": 1.0083835507056483e-05, "loss": 1.5052, "step": 1050 }, { "epoch": 1.9373271889400923, "grad_norm": 0.6383674740791321, "learning_rate": 1.0053415511139547e-05, "loss": 1.4862, "step": 1051 }, { "epoch": 1.9391705069124425, "grad_norm": 0.6082439422607422, "learning_rate": 1.0023018324753726e-05, "loss": 1.4977, "step": 1052 }, { "epoch": 1.9410138248847926, "grad_norm": 0.6215192079544067, "learning_rate": 9.992644088065528e-06, "loss": 1.4827, "step": 1053 }, { "epoch": 1.9428571428571428, "grad_norm": 0.6290032267570496, "learning_rate": 9.96229294113566e-06, "loss": 1.4721, "step": 1054 }, { "epoch": 1.944700460829493, "grad_norm": 0.6324096322059631, "learning_rate": 9.931965023918369e-06, "loss": 1.5131, "step": 1055 }, { "epoch": 1.9465437788018434, "grad_norm": 0.6195716857910156, "learning_rate": 9.901660476260752e-06, "loss": 1.4977, "step": 1056 }, { "epoch": 1.9483870967741934, "grad_norm": 0.6200453042984009, "learning_rate": 9.87137943790217e-06, "loss": 1.4831, "step": 1057 }, { "epoch": 1.9502304147465437, "grad_norm": 0.6134088635444641, "learning_rate": 9.84112204847357e-06, "loss": 1.4725, "step": 1058 }, { "epoch": 1.952073732718894, "grad_norm": 0.6237205862998962, "learning_rate": 9.810888447496859e-06, "loss": 1.4163, "step": 1059 }, { "epoch": 1.9539170506912442, "grad_norm": 0.64919114112854, "learning_rate": 9.780678774384235e-06, "loss": 1.4842, "step": 1060 }, { "epoch": 1.9557603686635945, "grad_norm": 0.6203941106796265, "learning_rate": 9.750493168437574e-06, "loss": 1.4534, "step": 1061 }, { "epoch": 1.9576036866359448, "grad_norm": 0.6301693916320801, "learning_rate": 9.720331768847773e-06, "loss": 1.5155, "step": 1062 }, { "epoch": 1.959447004608295, "grad_norm": 0.6484845876693726, "learning_rate": 9.690194714694101e-06, "loss": 1.4503, "step": 1063 }, { "epoch": 1.9612903225806453, "grad_norm": 0.6338369250297546, "learning_rate": 9.660082144943571e-06, "loss": 1.5774, "step": 1064 }, { "epoch": 1.9631336405529956, "grad_norm": 0.6252181529998779, "learning_rate": 9.629994198450305e-06, "loss": 1.4527, "step": 1065 }, { "epoch": 1.9649769585253456, "grad_norm": 0.6208912134170532, "learning_rate": 9.599931013954858e-06, "loss": 1.4861, "step": 1066 }, { "epoch": 1.9668202764976959, "grad_norm": 0.6281872987747192, "learning_rate": 9.569892730083631e-06, "loss": 1.5461, "step": 1067 }, { "epoch": 1.9686635944700461, "grad_norm": 0.6203744411468506, "learning_rate": 9.53987948534819e-06, "loss": 1.462, "step": 1068 }, { "epoch": 1.9705069124423962, "grad_norm": 0.614604115486145, "learning_rate": 9.509891418144635e-06, "loss": 1.4718, "step": 1069 }, { "epoch": 1.9723502304147464, "grad_norm": 0.6283097863197327, "learning_rate": 9.479928666752976e-06, "loss": 1.5551, "step": 1070 }, { "epoch": 1.9741935483870967, "grad_norm": 0.6157671809196472, "learning_rate": 9.449991369336502e-06, "loss": 1.3977, "step": 1071 }, { "epoch": 1.976036866359447, "grad_norm": 0.6210982203483582, "learning_rate": 9.420079663941096e-06, "loss": 1.5346, "step": 1072 }, { "epoch": 1.9778801843317972, "grad_norm": 0.6194225549697876, "learning_rate": 9.390193688494657e-06, "loss": 1.5029, "step": 1073 }, { "epoch": 1.9797235023041475, "grad_norm": 0.6160725951194763, "learning_rate": 9.360333580806439e-06, "loss": 1.4622, "step": 1074 }, { "epoch": 1.9815668202764978, "grad_norm": 0.6205019950866699, "learning_rate": 9.330499478566404e-06, "loss": 1.425, "step": 1075 }, { "epoch": 1.983410138248848, "grad_norm": 0.6261640191078186, "learning_rate": 9.300691519344602e-06, "loss": 1.4338, "step": 1076 }, { "epoch": 1.9852534562211983, "grad_norm": 0.6265663504600525, "learning_rate": 9.270909840590541e-06, "loss": 1.4644, "step": 1077 }, { "epoch": 1.9870967741935484, "grad_norm": 0.6148210763931274, "learning_rate": 9.24115457963253e-06, "loss": 1.4698, "step": 1078 }, { "epoch": 1.9889400921658986, "grad_norm": 0.6195936799049377, "learning_rate": 9.211425873677075e-06, "loss": 1.4701, "step": 1079 }, { "epoch": 1.9907834101382489, "grad_norm": 0.6328420042991638, "learning_rate": 9.181723859808225e-06, "loss": 1.472, "step": 1080 }, { "epoch": 1.992626728110599, "grad_norm": 0.6189725399017334, "learning_rate": 9.152048674986959e-06, "loss": 1.4852, "step": 1081 }, { "epoch": 1.9944700460829492, "grad_norm": 0.62830650806427, "learning_rate": 9.122400456050524e-06, "loss": 1.4511, "step": 1082 }, { "epoch": 1.9963133640552995, "grad_norm": 0.6179220080375671, "learning_rate": 9.092779339711833e-06, "loss": 1.4446, "step": 1083 }, { "epoch": 1.9981566820276497, "grad_norm": 0.6348637342453003, "learning_rate": 9.063185462558835e-06, "loss": 1.4722, "step": 1084 }, { "epoch": 2.0, "grad_norm": 0.6142784357070923, "learning_rate": 9.033618961053842e-06, "loss": 1.5356, "step": 1085 }, { "epoch": 2.0018433179723503, "grad_norm": 0.6448113322257996, "learning_rate": 9.004079971532972e-06, "loss": 1.388, "step": 1086 }, { "epoch": 2.0036866359447005, "grad_norm": 0.6346662640571594, "learning_rate": 8.974568630205462e-06, "loss": 1.4066, "step": 1087 }, { "epoch": 2.005529953917051, "grad_norm": 0.6126033067703247, "learning_rate": 8.945085073153046e-06, "loss": 1.37, "step": 1088 }, { "epoch": 2.007373271889401, "grad_norm": 0.6365305781364441, "learning_rate": 8.915629436329362e-06, "loss": 1.4118, "step": 1089 }, { "epoch": 2.0092165898617513, "grad_norm": 0.6332470774650574, "learning_rate": 8.886201855559294e-06, "loss": 1.2957, "step": 1090 }, { "epoch": 2.0110599078341016, "grad_norm": 0.6868927478790283, "learning_rate": 8.85680246653835e-06, "loss": 1.3095, "step": 1091 }, { "epoch": 2.0129032258064514, "grad_norm": 0.7050022482872009, "learning_rate": 8.827431404832048e-06, "loss": 1.3224, "step": 1092 }, { "epoch": 2.0147465437788017, "grad_norm": 0.7241560220718384, "learning_rate": 8.798088805875286e-06, "loss": 1.3539, "step": 1093 }, { "epoch": 2.016589861751152, "grad_norm": 0.6983562111854553, "learning_rate": 8.768774804971705e-06, "loss": 1.3512, "step": 1094 }, { "epoch": 2.0184331797235022, "grad_norm": 0.6734762787818909, "learning_rate": 8.739489537293087e-06, "loss": 1.2975, "step": 1095 }, { "epoch": 2.0202764976958525, "grad_norm": 0.6734596490859985, "learning_rate": 8.710233137878714e-06, "loss": 1.3413, "step": 1096 }, { "epoch": 2.0221198156682028, "grad_norm": 0.6779927611351013, "learning_rate": 8.681005741634755e-06, "loss": 1.2907, "step": 1097 }, { "epoch": 2.023963133640553, "grad_norm": 0.6977009177207947, "learning_rate": 8.651807483333627e-06, "loss": 1.4131, "step": 1098 }, { "epoch": 2.0258064516129033, "grad_norm": 0.6914709806442261, "learning_rate": 8.622638497613415e-06, "loss": 1.3721, "step": 1099 }, { "epoch": 2.0276497695852536, "grad_norm": 0.6631557941436768, "learning_rate": 8.593498918977196e-06, "loss": 1.3342, "step": 1100 }, { "epoch": 2.029493087557604, "grad_norm": 0.6785610914230347, "learning_rate": 8.564388881792456e-06, "loss": 1.3777, "step": 1101 }, { "epoch": 2.031336405529954, "grad_norm": 0.6817496418952942, "learning_rate": 8.53530852029047e-06, "loss": 1.295, "step": 1102 }, { "epoch": 2.0331797235023044, "grad_norm": 0.7101837396621704, "learning_rate": 8.506257968565659e-06, "loss": 1.373, "step": 1103 }, { "epoch": 2.035023041474654, "grad_norm": 0.6819498538970947, "learning_rate": 8.477237360574984e-06, "loss": 1.357, "step": 1104 }, { "epoch": 2.0368663594470044, "grad_norm": 0.6876365542411804, "learning_rate": 8.448246830137355e-06, "loss": 1.3519, "step": 1105 }, { "epoch": 2.0387096774193547, "grad_norm": 0.6620230674743652, "learning_rate": 8.419286510932961e-06, "loss": 1.323, "step": 1106 }, { "epoch": 2.040552995391705, "grad_norm": 0.6729424595832825, "learning_rate": 8.390356536502688e-06, "loss": 1.3321, "step": 1107 }, { "epoch": 2.0423963133640552, "grad_norm": 0.6731389760971069, "learning_rate": 8.361457040247518e-06, "loss": 1.372, "step": 1108 }, { "epoch": 2.0442396313364055, "grad_norm": 0.6786398887634277, "learning_rate": 8.332588155427869e-06, "loss": 1.3653, "step": 1109 }, { "epoch": 2.046082949308756, "grad_norm": 0.6752107739448547, "learning_rate": 8.303750015163008e-06, "loss": 1.3927, "step": 1110 }, { "epoch": 2.047926267281106, "grad_norm": 0.6660512685775757, "learning_rate": 8.274942752430449e-06, "loss": 1.3693, "step": 1111 }, { "epoch": 2.0497695852534563, "grad_norm": 0.6701961159706116, "learning_rate": 8.24616650006531e-06, "loss": 1.377, "step": 1112 }, { "epoch": 2.0516129032258066, "grad_norm": 0.6647840738296509, "learning_rate": 8.217421390759717e-06, "loss": 1.3477, "step": 1113 }, { "epoch": 2.053456221198157, "grad_norm": 0.6722539067268372, "learning_rate": 8.188707557062191e-06, "loss": 1.3789, "step": 1114 }, { "epoch": 2.055299539170507, "grad_norm": 0.6843180060386658, "learning_rate": 8.160025131377044e-06, "loss": 1.4177, "step": 1115 }, { "epoch": 2.057142857142857, "grad_norm": 0.6881752014160156, "learning_rate": 8.131374245963753e-06, "loss": 1.3698, "step": 1116 }, { "epoch": 2.058986175115207, "grad_norm": 0.6793879270553589, "learning_rate": 8.102755032936346e-06, "loss": 1.3472, "step": 1117 }, { "epoch": 2.0608294930875575, "grad_norm": 0.6853190064430237, "learning_rate": 8.074167624262834e-06, "loss": 1.3436, "step": 1118 }, { "epoch": 2.0626728110599077, "grad_norm": 0.6778216361999512, "learning_rate": 8.045612151764546e-06, "loss": 1.3622, "step": 1119 }, { "epoch": 2.064516129032258, "grad_norm": 0.6706726551055908, "learning_rate": 8.017088747115554e-06, "loss": 1.3257, "step": 1120 }, { "epoch": 2.0663594470046083, "grad_norm": 0.6715297102928162, "learning_rate": 7.98859754184207e-06, "loss": 1.3787, "step": 1121 }, { "epoch": 2.0682027649769585, "grad_norm": 0.6862074136734009, "learning_rate": 7.960138667321822e-06, "loss": 1.4221, "step": 1122 }, { "epoch": 2.070046082949309, "grad_norm": 0.6729874014854431, "learning_rate": 7.931712254783445e-06, "loss": 1.3922, "step": 1123 }, { "epoch": 2.071889400921659, "grad_norm": 0.6673253774642944, "learning_rate": 7.903318435305914e-06, "loss": 1.3538, "step": 1124 }, { "epoch": 2.0737327188940093, "grad_norm": 0.671423614025116, "learning_rate": 7.874957339817886e-06, "loss": 1.3339, "step": 1125 }, { "epoch": 2.0755760368663596, "grad_norm": 0.6981768608093262, "learning_rate": 7.846629099097127e-06, "loss": 1.3927, "step": 1126 }, { "epoch": 2.07741935483871, "grad_norm": 0.689078688621521, "learning_rate": 7.818333843769917e-06, "loss": 1.3312, "step": 1127 }, { "epoch": 2.0792626728110597, "grad_norm": 0.6774093508720398, "learning_rate": 7.790071704310422e-06, "loss": 1.3016, "step": 1128 }, { "epoch": 2.08110599078341, "grad_norm": 0.6872570514678955, "learning_rate": 7.76184281104011e-06, "loss": 1.3391, "step": 1129 }, { "epoch": 2.0829493087557602, "grad_norm": 0.6759765148162842, "learning_rate": 7.73364729412714e-06, "loss": 1.335, "step": 1130 }, { "epoch": 2.0847926267281105, "grad_norm": 0.673454225063324, "learning_rate": 7.70548528358578e-06, "loss": 1.3521, "step": 1131 }, { "epoch": 2.0866359447004608, "grad_norm": 0.6631683707237244, "learning_rate": 7.677356909275784e-06, "loss": 1.3174, "step": 1132 }, { "epoch": 2.088479262672811, "grad_norm": 0.6814154982566833, "learning_rate": 7.649262300901796e-06, "loss": 1.3549, "step": 1133 }, { "epoch": 2.0903225806451613, "grad_norm": 0.6744573712348938, "learning_rate": 7.621201588012786e-06, "loss": 1.3858, "step": 1134 }, { "epoch": 2.0921658986175116, "grad_norm": 0.6801254153251648, "learning_rate": 7.593174900001398e-06, "loss": 1.2982, "step": 1135 }, { "epoch": 2.094009216589862, "grad_norm": 0.6790188550949097, "learning_rate": 7.565182366103391e-06, "loss": 1.3989, "step": 1136 }, { "epoch": 2.095852534562212, "grad_norm": 0.6671974658966064, "learning_rate": 7.537224115397048e-06, "loss": 1.385, "step": 1137 }, { "epoch": 2.0976958525345624, "grad_norm": 0.6681115031242371, "learning_rate": 7.5093002768025485e-06, "loss": 1.4108, "step": 1138 }, { "epoch": 2.0995391705069126, "grad_norm": 0.6833025217056274, "learning_rate": 7.481410979081389e-06, "loss": 1.3597, "step": 1139 }, { "epoch": 2.1013824884792625, "grad_norm": 0.672607421875, "learning_rate": 7.453556350835821e-06, "loss": 1.3701, "step": 1140 }, { "epoch": 2.1032258064516127, "grad_norm": 0.6917509436607361, "learning_rate": 7.425736520508185e-06, "loss": 1.3828, "step": 1141 }, { "epoch": 2.105069124423963, "grad_norm": 0.6587055921554565, "learning_rate": 7.397951616380401e-06, "loss": 1.2994, "step": 1142 }, { "epoch": 2.1069124423963133, "grad_norm": 0.6666359305381775, "learning_rate": 7.370201766573325e-06, "loss": 1.3644, "step": 1143 }, { "epoch": 2.1087557603686635, "grad_norm": 0.6738996505737305, "learning_rate": 7.342487099046169e-06, "loss": 1.3453, "step": 1144 }, { "epoch": 2.110599078341014, "grad_norm": 0.6894683837890625, "learning_rate": 7.31480774159591e-06, "loss": 1.3742, "step": 1145 }, { "epoch": 2.112442396313364, "grad_norm": 0.6891577839851379, "learning_rate": 7.287163821856719e-06, "loss": 1.3593, "step": 1146 }, { "epoch": 2.1142857142857143, "grad_norm": 0.696865975856781, "learning_rate": 7.259555467299342e-06, "loss": 1.4075, "step": 1147 }, { "epoch": 2.1161290322580646, "grad_norm": 0.6975880861282349, "learning_rate": 7.231982805230538e-06, "loss": 1.3377, "step": 1148 }, { "epoch": 2.117972350230415, "grad_norm": 0.6784470081329346, "learning_rate": 7.204445962792471e-06, "loss": 1.3573, "step": 1149 }, { "epoch": 2.119815668202765, "grad_norm": 0.682109534740448, "learning_rate": 7.176945066962152e-06, "loss": 1.3835, "step": 1150 }, { "epoch": 2.1216589861751154, "grad_norm": 0.6714499592781067, "learning_rate": 7.149480244550822e-06, "loss": 1.3236, "step": 1151 }, { "epoch": 2.1235023041474657, "grad_norm": 0.6768394708633423, "learning_rate": 7.12205162220338e-06, "loss": 1.3855, "step": 1152 }, { "epoch": 2.1253456221198155, "grad_norm": 0.6709174513816833, "learning_rate": 7.094659326397818e-06, "loss": 1.3099, "step": 1153 }, { "epoch": 2.1271889400921657, "grad_norm": 0.6815443634986877, "learning_rate": 7.067303483444603e-06, "loss": 1.3125, "step": 1154 }, { "epoch": 2.129032258064516, "grad_norm": 0.685075044631958, "learning_rate": 7.039984219486109e-06, "loss": 1.3781, "step": 1155 }, { "epoch": 2.1308755760368663, "grad_norm": 0.6932164430618286, "learning_rate": 7.012701660496059e-06, "loss": 1.3745, "step": 1156 }, { "epoch": 2.1327188940092165, "grad_norm": 0.6802293658256531, "learning_rate": 6.985455932278904e-06, "loss": 1.3726, "step": 1157 }, { "epoch": 2.134562211981567, "grad_norm": 0.697127640247345, "learning_rate": 6.958247160469266e-06, "loss": 1.3934, "step": 1158 }, { "epoch": 2.136405529953917, "grad_norm": 0.6775503158569336, "learning_rate": 6.931075470531371e-06, "loss": 1.3542, "step": 1159 }, { "epoch": 2.1382488479262673, "grad_norm": 0.6863602995872498, "learning_rate": 6.9039409877584195e-06, "loss": 1.348, "step": 1160 }, { "epoch": 2.1400921658986176, "grad_norm": 0.6919829845428467, "learning_rate": 6.876843837272075e-06, "loss": 1.3627, "step": 1161 }, { "epoch": 2.141935483870968, "grad_norm": 0.684790849685669, "learning_rate": 6.849784144021859e-06, "loss": 1.3776, "step": 1162 }, { "epoch": 2.143778801843318, "grad_norm": 0.6547074317932129, "learning_rate": 6.82276203278453e-06, "loss": 1.3029, "step": 1163 }, { "epoch": 2.145622119815668, "grad_norm": 0.6686533093452454, "learning_rate": 6.795777628163599e-06, "loss": 1.323, "step": 1164 }, { "epoch": 2.1474654377880182, "grad_norm": 0.6911303400993347, "learning_rate": 6.7688310545886715e-06, "loss": 1.365, "step": 1165 }, { "epoch": 2.1493087557603685, "grad_norm": 0.6692765951156616, "learning_rate": 6.741922436314929e-06, "loss": 1.2475, "step": 1166 }, { "epoch": 2.1511520737327188, "grad_norm": 0.6844581961631775, "learning_rate": 6.715051897422523e-06, "loss": 1.3117, "step": 1167 }, { "epoch": 2.152995391705069, "grad_norm": 0.6967774629592896, "learning_rate": 6.688219561816008e-06, "loss": 1.3559, "step": 1168 }, { "epoch": 2.1548387096774193, "grad_norm": 0.7067211866378784, "learning_rate": 6.661425553223799e-06, "loss": 1.3959, "step": 1169 }, { "epoch": 2.1566820276497696, "grad_norm": 0.703859269618988, "learning_rate": 6.634669995197561e-06, "loss": 1.3766, "step": 1170 }, { "epoch": 2.15852534562212, "grad_norm": 0.6874822378158569, "learning_rate": 6.607953011111655e-06, "loss": 1.4068, "step": 1171 }, { "epoch": 2.16036866359447, "grad_norm": 0.6697222590446472, "learning_rate": 6.581274724162587e-06, "loss": 1.3052, "step": 1172 }, { "epoch": 2.1622119815668204, "grad_norm": 0.6919700503349304, "learning_rate": 6.554635257368411e-06, "loss": 1.3583, "step": 1173 }, { "epoch": 2.1640552995391706, "grad_norm": 0.7014095783233643, "learning_rate": 6.528034733568174e-06, "loss": 1.3912, "step": 1174 }, { "epoch": 2.165898617511521, "grad_norm": 0.6784268021583557, "learning_rate": 6.50147327542137e-06, "loss": 1.3509, "step": 1175 }, { "epoch": 2.167741935483871, "grad_norm": 0.6679689288139343, "learning_rate": 6.474951005407317e-06, "loss": 1.3483, "step": 1176 }, { "epoch": 2.169585253456221, "grad_norm": 0.6822939515113831, "learning_rate": 6.448468045824664e-06, "loss": 1.3322, "step": 1177 }, { "epoch": 2.1714285714285713, "grad_norm": 0.6901209354400635, "learning_rate": 6.4220245187907915e-06, "loss": 1.3977, "step": 1178 }, { "epoch": 2.1732718894009215, "grad_norm": 0.6929839253425598, "learning_rate": 6.395620546241221e-06, "loss": 1.4436, "step": 1179 }, { "epoch": 2.175115207373272, "grad_norm": 0.6934595108032227, "learning_rate": 6.369256249929112e-06, "loss": 1.3183, "step": 1180 }, { "epoch": 2.176958525345622, "grad_norm": 0.6910387873649597, "learning_rate": 6.342931751424656e-06, "loss": 1.3364, "step": 1181 }, { "epoch": 2.1788018433179723, "grad_norm": 0.6756082773208618, "learning_rate": 6.316647172114529e-06, "loss": 1.3502, "step": 1182 }, { "epoch": 2.1806451612903226, "grad_norm": 0.6873049736022949, "learning_rate": 6.2904026332013445e-06, "loss": 1.3436, "step": 1183 }, { "epoch": 2.182488479262673, "grad_norm": 0.6835952997207642, "learning_rate": 6.264198255703071e-06, "loss": 1.3504, "step": 1184 }, { "epoch": 2.184331797235023, "grad_norm": 0.7009977698326111, "learning_rate": 6.238034160452486e-06, "loss": 1.3765, "step": 1185 }, { "epoch": 2.1861751152073734, "grad_norm": 0.6782085299491882, "learning_rate": 6.211910468096631e-06, "loss": 1.3535, "step": 1186 }, { "epoch": 2.1880184331797237, "grad_norm": 0.6798295378684998, "learning_rate": 6.185827299096226e-06, "loss": 1.3375, "step": 1187 }, { "epoch": 2.189861751152074, "grad_norm": 0.6961286067962646, "learning_rate": 6.1597847737251504e-06, "loss": 1.3433, "step": 1188 }, { "epoch": 2.191705069124424, "grad_norm": 0.6996325850486755, "learning_rate": 6.133783012069853e-06, "loss": 1.3743, "step": 1189 }, { "epoch": 2.193548387096774, "grad_norm": 0.7019087672233582, "learning_rate": 6.1078221340288155e-06, "loss": 1.3915, "step": 1190 }, { "epoch": 2.1953917050691243, "grad_norm": 0.6897489428520203, "learning_rate": 6.081902259312013e-06, "loss": 1.42, "step": 1191 }, { "epoch": 2.1972350230414746, "grad_norm": 0.6787887811660767, "learning_rate": 6.05602350744033e-06, "loss": 1.3354, "step": 1192 }, { "epoch": 2.199078341013825, "grad_norm": 0.6967423558235168, "learning_rate": 6.030185997745031e-06, "loss": 1.3959, "step": 1193 }, { "epoch": 2.200921658986175, "grad_norm": 0.6939237117767334, "learning_rate": 6.004389849367223e-06, "loss": 1.3981, "step": 1194 }, { "epoch": 2.2027649769585254, "grad_norm": 0.6991174817085266, "learning_rate": 5.978635181257254e-06, "loss": 1.3817, "step": 1195 }, { "epoch": 2.2046082949308756, "grad_norm": 0.7009739279747009, "learning_rate": 5.952922112174231e-06, "loss": 1.3519, "step": 1196 }, { "epoch": 2.206451612903226, "grad_norm": 0.6813941597938538, "learning_rate": 5.927250760685441e-06, "loss": 1.3679, "step": 1197 }, { "epoch": 2.208294930875576, "grad_norm": 0.6973612904548645, "learning_rate": 5.901621245165772e-06, "loss": 1.3585, "step": 1198 }, { "epoch": 2.2101382488479264, "grad_norm": 0.6828699111938477, "learning_rate": 5.8760336837972355e-06, "loss": 1.3165, "step": 1199 }, { "epoch": 2.2119815668202767, "grad_norm": 0.6723896265029907, "learning_rate": 5.850488194568366e-06, "loss": 1.2887, "step": 1200 }, { "epoch": 2.2138248847926265, "grad_norm": 0.690314769744873, "learning_rate": 5.824984895273697e-06, "loss": 1.4159, "step": 1201 }, { "epoch": 2.215668202764977, "grad_norm": 0.6960341334342957, "learning_rate": 5.799523903513228e-06, "loss": 1.3834, "step": 1202 }, { "epoch": 2.217511520737327, "grad_norm": 0.6980242133140564, "learning_rate": 5.774105336691861e-06, "loss": 1.3443, "step": 1203 }, { "epoch": 2.2193548387096773, "grad_norm": 0.6807960867881775, "learning_rate": 5.748729312018869e-06, "loss": 1.3543, "step": 1204 }, { "epoch": 2.2211981566820276, "grad_norm": 0.6814734935760498, "learning_rate": 5.723395946507367e-06, "loss": 1.3776, "step": 1205 }, { "epoch": 2.223041474654378, "grad_norm": 0.6977008581161499, "learning_rate": 5.6981053569737525e-06, "loss": 1.3625, "step": 1206 }, { "epoch": 2.224884792626728, "grad_norm": 0.6968874931335449, "learning_rate": 5.67285766003717e-06, "loss": 1.3899, "step": 1207 }, { "epoch": 2.2267281105990784, "grad_norm": 0.6985254883766174, "learning_rate": 5.647652972118998e-06, "loss": 1.4092, "step": 1208 }, { "epoch": 2.2285714285714286, "grad_norm": 0.675391674041748, "learning_rate": 5.622491409442272e-06, "loss": 1.2546, "step": 1209 }, { "epoch": 2.230414746543779, "grad_norm": 0.696471095085144, "learning_rate": 5.597373088031193e-06, "loss": 1.4267, "step": 1210 }, { "epoch": 2.232258064516129, "grad_norm": 0.675577700138092, "learning_rate": 5.572298123710536e-06, "loss": 1.3478, "step": 1211 }, { "epoch": 2.2341013824884794, "grad_norm": 0.6813495755195618, "learning_rate": 5.54726663210518e-06, "loss": 1.4113, "step": 1212 }, { "epoch": 2.2359447004608297, "grad_norm": 0.6868924498558044, "learning_rate": 5.522278728639544e-06, "loss": 1.3613, "step": 1213 }, { "epoch": 2.2377880184331795, "grad_norm": 0.69166100025177, "learning_rate": 5.497334528537022e-06, "loss": 1.3028, "step": 1214 }, { "epoch": 2.23963133640553, "grad_norm": 0.6837006211280823, "learning_rate": 5.472434146819522e-06, "loss": 1.3908, "step": 1215 }, { "epoch": 2.24147465437788, "grad_norm": 0.688493549823761, "learning_rate": 5.447577698306876e-06, "loss": 1.4164, "step": 1216 }, { "epoch": 2.2433179723502303, "grad_norm": 0.6851277947425842, "learning_rate": 5.422765297616336e-06, "loss": 1.4041, "step": 1217 }, { "epoch": 2.2451612903225806, "grad_norm": 0.6843251585960388, "learning_rate": 5.3979970591620555e-06, "loss": 1.3534, "step": 1218 }, { "epoch": 2.247004608294931, "grad_norm": 0.6896577477455139, "learning_rate": 5.37327309715453e-06, "loss": 1.3825, "step": 1219 }, { "epoch": 2.248847926267281, "grad_norm": 0.6822614669799805, "learning_rate": 5.348593525600093e-06, "loss": 1.3201, "step": 1220 }, { "epoch": 2.2506912442396314, "grad_norm": 0.6849269270896912, "learning_rate": 5.323958458300403e-06, "loss": 1.2938, "step": 1221 }, { "epoch": 2.2525345622119817, "grad_norm": 0.6779518127441406, "learning_rate": 5.29936800885188e-06, "loss": 1.2902, "step": 1222 }, { "epoch": 2.254377880184332, "grad_norm": 0.6803906559944153, "learning_rate": 5.2748222906452105e-06, "loss": 1.3764, "step": 1223 }, { "epoch": 2.256221198156682, "grad_norm": 0.6799220442771912, "learning_rate": 5.250321416864828e-06, "loss": 1.3215, "step": 1224 }, { "epoch": 2.258064516129032, "grad_norm": 0.6993427276611328, "learning_rate": 5.2258655004883696e-06, "loss": 1.357, "step": 1225 }, { "epoch": 2.2599078341013823, "grad_norm": 0.6868489980697632, "learning_rate": 5.201454654286166e-06, "loss": 1.3206, "step": 1226 }, { "epoch": 2.2617511520737326, "grad_norm": 0.7051482200622559, "learning_rate": 5.177088990820725e-06, "loss": 1.401, "step": 1227 }, { "epoch": 2.263594470046083, "grad_norm": 0.6961747407913208, "learning_rate": 5.152768622446211e-06, "loss": 1.3809, "step": 1228 }, { "epoch": 2.265437788018433, "grad_norm": 0.6973814964294434, "learning_rate": 5.128493661307934e-06, "loss": 1.3734, "step": 1229 }, { "epoch": 2.2672811059907834, "grad_norm": 0.6868012547492981, "learning_rate": 5.104264219341793e-06, "loss": 1.4125, "step": 1230 }, { "epoch": 2.2691244239631336, "grad_norm": 0.6908063292503357, "learning_rate": 5.080080408273821e-06, "loss": 1.3699, "step": 1231 }, { "epoch": 2.270967741935484, "grad_norm": 0.7081430554389954, "learning_rate": 5.055942339619639e-06, "loss": 1.3477, "step": 1232 }, { "epoch": 2.272811059907834, "grad_norm": 0.6825849413871765, "learning_rate": 5.031850124683913e-06, "loss": 1.3962, "step": 1233 }, { "epoch": 2.2746543778801844, "grad_norm": 0.686294674873352, "learning_rate": 5.0078038745599e-06, "loss": 1.3794, "step": 1234 }, { "epoch": 2.2764976958525347, "grad_norm": 0.6920833587646484, "learning_rate": 4.983803700128893e-06, "loss": 1.3368, "step": 1235 }, { "epoch": 2.278341013824885, "grad_norm": 0.687598705291748, "learning_rate": 4.959849712059716e-06, "loss": 1.3466, "step": 1236 }, { "epoch": 2.2801843317972352, "grad_norm": 0.7005990147590637, "learning_rate": 4.935942020808239e-06, "loss": 1.413, "step": 1237 }, { "epoch": 2.282027649769585, "grad_norm": 0.6932473182678223, "learning_rate": 4.912080736616833e-06, "loss": 1.3757, "step": 1238 }, { "epoch": 2.2838709677419353, "grad_norm": 0.6995499730110168, "learning_rate": 4.888265969513876e-06, "loss": 1.381, "step": 1239 }, { "epoch": 2.2857142857142856, "grad_norm": 0.7041058540344238, "learning_rate": 4.864497829313269e-06, "loss": 1.4004, "step": 1240 }, { "epoch": 2.287557603686636, "grad_norm": 0.6951724886894226, "learning_rate": 4.840776425613887e-06, "loss": 1.3643, "step": 1241 }, { "epoch": 2.289400921658986, "grad_norm": 0.6973668932914734, "learning_rate": 4.817101867799097e-06, "loss": 1.3655, "step": 1242 }, { "epoch": 2.2912442396313364, "grad_norm": 0.6936375498771667, "learning_rate": 4.793474265036272e-06, "loss": 1.3885, "step": 1243 }, { "epoch": 2.2930875576036867, "grad_norm": 0.6932437419891357, "learning_rate": 4.769893726276243e-06, "loss": 1.4271, "step": 1244 }, { "epoch": 2.294930875576037, "grad_norm": 0.7016628384590149, "learning_rate": 4.746360360252834e-06, "loss": 1.3931, "step": 1245 }, { "epoch": 2.296774193548387, "grad_norm": 0.6992587447166443, "learning_rate": 4.722874275482338e-06, "loss": 1.3968, "step": 1246 }, { "epoch": 2.2986175115207375, "grad_norm": 0.6914000511169434, "learning_rate": 4.699435580263044e-06, "loss": 1.3286, "step": 1247 }, { "epoch": 2.3004608294930877, "grad_norm": 0.6802775859832764, "learning_rate": 4.676044382674702e-06, "loss": 1.2899, "step": 1248 }, { "epoch": 2.3023041474654375, "grad_norm": 0.685889720916748, "learning_rate": 4.652700790578047e-06, "loss": 1.3438, "step": 1249 }, { "epoch": 2.3041474654377883, "grad_norm": 0.6978851556777954, "learning_rate": 4.629404911614306e-06, "loss": 1.3308, "step": 1250 }, { "epoch": 2.305990783410138, "grad_norm": 0.7013998627662659, "learning_rate": 4.606156853204682e-06, "loss": 1.4056, "step": 1251 }, { "epoch": 2.3078341013824883, "grad_norm": 0.6957346796989441, "learning_rate": 4.5829567225498696e-06, "loss": 1.3495, "step": 1252 }, { "epoch": 2.3096774193548386, "grad_norm": 0.700558602809906, "learning_rate": 4.559804626629574e-06, "loss": 1.3925, "step": 1253 }, { "epoch": 2.311520737327189, "grad_norm": 0.6776589751243591, "learning_rate": 4.536700672201987e-06, "loss": 1.3284, "step": 1254 }, { "epoch": 2.313364055299539, "grad_norm": 0.6822309494018555, "learning_rate": 4.513644965803316e-06, "loss": 1.3351, "step": 1255 }, { "epoch": 2.3152073732718894, "grad_norm": 0.6825265884399414, "learning_rate": 4.490637613747301e-06, "loss": 1.3577, "step": 1256 }, { "epoch": 2.3170506912442397, "grad_norm": 0.6912850141525269, "learning_rate": 4.4676787221247e-06, "loss": 1.3797, "step": 1257 }, { "epoch": 2.31889400921659, "grad_norm": 0.7001147270202637, "learning_rate": 4.444768396802808e-06, "loss": 1.3887, "step": 1258 }, { "epoch": 2.32073732718894, "grad_norm": 0.6789734363555908, "learning_rate": 4.421906743424989e-06, "loss": 1.3149, "step": 1259 }, { "epoch": 2.3225806451612905, "grad_norm": 0.7131507992744446, "learning_rate": 4.39909386741016e-06, "loss": 1.3509, "step": 1260 }, { "epoch": 2.3244239631336407, "grad_norm": 0.6933061480522156, "learning_rate": 4.376329873952317e-06, "loss": 1.3676, "step": 1261 }, { "epoch": 2.3262672811059906, "grad_norm": 0.6951211094856262, "learning_rate": 4.353614868020051e-06, "loss": 1.3985, "step": 1262 }, { "epoch": 2.328110599078341, "grad_norm": 0.688729465007782, "learning_rate": 4.330948954356076e-06, "loss": 1.4083, "step": 1263 }, { "epoch": 2.329953917050691, "grad_norm": 0.6910402178764343, "learning_rate": 4.308332237476717e-06, "loss": 1.3192, "step": 1264 }, { "epoch": 2.3317972350230414, "grad_norm": 0.6972960829734802, "learning_rate": 4.285764821671446e-06, "loss": 1.3692, "step": 1265 }, { "epoch": 2.3336405529953916, "grad_norm": 0.701671838760376, "learning_rate": 4.263246811002414e-06, "loss": 1.4218, "step": 1266 }, { "epoch": 2.335483870967742, "grad_norm": 0.696283221244812, "learning_rate": 4.240778309303942e-06, "loss": 1.3779, "step": 1267 }, { "epoch": 2.337327188940092, "grad_norm": 0.6886963248252869, "learning_rate": 4.218359420182055e-06, "loss": 1.3591, "step": 1268 }, { "epoch": 2.3391705069124424, "grad_norm": 0.6748579144477844, "learning_rate": 4.195990247014025e-06, "loss": 1.3033, "step": 1269 }, { "epoch": 2.3410138248847927, "grad_norm": 0.6890683770179749, "learning_rate": 4.173670892947858e-06, "loss": 1.4007, "step": 1270 }, { "epoch": 2.342857142857143, "grad_norm": 0.7063291072845459, "learning_rate": 4.151401460901833e-06, "loss": 1.3436, "step": 1271 }, { "epoch": 2.3447004608294932, "grad_norm": 0.690841019153595, "learning_rate": 4.1291820535640505e-06, "loss": 1.3512, "step": 1272 }, { "epoch": 2.3465437788018435, "grad_norm": 0.677006185054779, "learning_rate": 4.107012773391918e-06, "loss": 1.3228, "step": 1273 }, { "epoch": 2.3483870967741938, "grad_norm": 0.6873452067375183, "learning_rate": 4.084893722611706e-06, "loss": 1.3463, "step": 1274 }, { "epoch": 2.3502304147465436, "grad_norm": 0.6967918872833252, "learning_rate": 4.062825003218075e-06, "loss": 1.303, "step": 1275 }, { "epoch": 2.352073732718894, "grad_norm": 0.6956656575202942, "learning_rate": 4.04080671697359e-06, "loss": 1.3326, "step": 1276 }, { "epoch": 2.353917050691244, "grad_norm": 0.7123549580574036, "learning_rate": 4.018838965408259e-06, "loss": 1.3362, "step": 1277 }, { "epoch": 2.3557603686635944, "grad_norm": 0.6919143795967102, "learning_rate": 3.99692184981908e-06, "loss": 1.2931, "step": 1278 }, { "epoch": 2.3576036866359447, "grad_norm": 0.7013681530952454, "learning_rate": 3.975055471269545e-06, "loss": 1.3301, "step": 1279 }, { "epoch": 2.359447004608295, "grad_norm": 0.7115483283996582, "learning_rate": 3.953239930589196e-06, "loss": 1.4116, "step": 1280 }, { "epoch": 2.361290322580645, "grad_norm": 0.6983519196510315, "learning_rate": 3.931475328373145e-06, "loss": 1.3881, "step": 1281 }, { "epoch": 2.3631336405529955, "grad_norm": 0.6783454418182373, "learning_rate": 3.909761764981637e-06, "loss": 1.3458, "step": 1282 }, { "epoch": 2.3649769585253457, "grad_norm": 0.6939337849617004, "learning_rate": 3.888099340539548e-06, "loss": 1.3673, "step": 1283 }, { "epoch": 2.366820276497696, "grad_norm": 0.6690245866775513, "learning_rate": 3.866488154935951e-06, "loss": 1.3026, "step": 1284 }, { "epoch": 2.3686635944700463, "grad_norm": 0.6817122101783752, "learning_rate": 3.844928307823655e-06, "loss": 1.3882, "step": 1285 }, { "epoch": 2.370506912442396, "grad_norm": 0.7006426453590393, "learning_rate": 3.823419898618733e-06, "loss": 1.3709, "step": 1286 }, { "epoch": 2.3723502304147464, "grad_norm": 0.6815746426582336, "learning_rate": 3.801963026500058e-06, "loss": 1.3332, "step": 1287 }, { "epoch": 2.3741935483870966, "grad_norm": 0.7056257128715515, "learning_rate": 3.7805577904088817e-06, "loss": 1.3765, "step": 1288 }, { "epoch": 2.376036866359447, "grad_norm": 0.6859834790229797, "learning_rate": 3.7592042890483335e-06, "loss": 1.3858, "step": 1289 }, { "epoch": 2.377880184331797, "grad_norm": 0.6959249973297119, "learning_rate": 3.7379026208829865e-06, "loss": 1.4264, "step": 1290 }, { "epoch": 2.3797235023041474, "grad_norm": 0.6897103190422058, "learning_rate": 3.7166528841384197e-06, "loss": 1.3078, "step": 1291 }, { "epoch": 2.3815668202764977, "grad_norm": 0.6846210956573486, "learning_rate": 3.695455176800719e-06, "loss": 1.365, "step": 1292 }, { "epoch": 2.383410138248848, "grad_norm": 0.7128205895423889, "learning_rate": 3.6743095966160773e-06, "loss": 1.4263, "step": 1293 }, { "epoch": 2.385253456221198, "grad_norm": 0.6864677667617798, "learning_rate": 3.6532162410903165e-06, "loss": 1.2979, "step": 1294 }, { "epoch": 2.3870967741935485, "grad_norm": 0.690218448638916, "learning_rate": 3.6321752074884374e-06, "loss": 1.3753, "step": 1295 }, { "epoch": 2.3889400921658988, "grad_norm": 0.6798112392425537, "learning_rate": 3.6111865928341723e-06, "loss": 1.3525, "step": 1296 }, { "epoch": 2.390783410138249, "grad_norm": 0.6902985572814941, "learning_rate": 3.5902504939095444e-06, "loss": 1.3868, "step": 1297 }, { "epoch": 2.3926267281105993, "grad_norm": 0.7128584980964661, "learning_rate": 3.5693670072544253e-06, "loss": 1.3945, "step": 1298 }, { "epoch": 2.394470046082949, "grad_norm": 0.7104604840278625, "learning_rate": 3.5485362291660727e-06, "loss": 1.3806, "step": 1299 }, { "epoch": 2.3963133640552994, "grad_norm": 0.690379798412323, "learning_rate": 3.527758255698696e-06, "loss": 1.3529, "step": 1300 }, { "epoch": 2.3981566820276496, "grad_norm": 0.6911632418632507, "learning_rate": 3.507033182663026e-06, "loss": 1.4435, "step": 1301 }, { "epoch": 2.4, "grad_norm": 0.6938514113426208, "learning_rate": 3.4863611056258456e-06, "loss": 1.3841, "step": 1302 }, { "epoch": 2.40184331797235, "grad_norm": 0.6961570978164673, "learning_rate": 3.465742119909568e-06, "loss": 1.4086, "step": 1303 }, { "epoch": 2.4036866359447004, "grad_norm": 0.69583660364151, "learning_rate": 3.445176320591806e-06, "loss": 1.3643, "step": 1304 }, { "epoch": 2.4055299539170507, "grad_norm": 0.6878751516342163, "learning_rate": 3.4246638025049043e-06, "loss": 1.318, "step": 1305 }, { "epoch": 2.407373271889401, "grad_norm": 0.6974665522575378, "learning_rate": 3.4042046602355238e-06, "loss": 1.3839, "step": 1306 }, { "epoch": 2.4092165898617512, "grad_norm": 0.6880777478218079, "learning_rate": 3.3837989881242142e-06, "loss": 1.3411, "step": 1307 }, { "epoch": 2.4110599078341015, "grad_norm": 0.6920519471168518, "learning_rate": 3.363446880264937e-06, "loss": 1.4285, "step": 1308 }, { "epoch": 2.412903225806452, "grad_norm": 0.7087485194206238, "learning_rate": 3.3431484305046838e-06, "loss": 1.3789, "step": 1309 }, { "epoch": 2.4147465437788016, "grad_norm": 0.6984785199165344, "learning_rate": 3.3229037324430228e-06, "loss": 1.3256, "step": 1310 }, { "epoch": 2.4165898617511523, "grad_norm": 0.698135495185852, "learning_rate": 3.3027128794316353e-06, "loss": 1.3923, "step": 1311 }, { "epoch": 2.418433179723502, "grad_norm": 0.6933091878890991, "learning_rate": 3.282575964573943e-06, "loss": 1.3683, "step": 1312 }, { "epoch": 2.4202764976958524, "grad_norm": 0.7052818536758423, "learning_rate": 3.2624930807246443e-06, "loss": 1.3845, "step": 1313 }, { "epoch": 2.4221198156682027, "grad_norm": 0.6939657330513, "learning_rate": 3.2424643204892734e-06, "loss": 1.3373, "step": 1314 }, { "epoch": 2.423963133640553, "grad_norm": 0.6912818551063538, "learning_rate": 3.2224897762238143e-06, "loss": 1.3381, "step": 1315 }, { "epoch": 2.425806451612903, "grad_norm": 0.7107348442077637, "learning_rate": 3.2025695400342346e-06, "loss": 1.3834, "step": 1316 }, { "epoch": 2.4276497695852535, "grad_norm": 0.6790490746498108, "learning_rate": 3.1827037037760965e-06, "loss": 1.2755, "step": 1317 }, { "epoch": 2.4294930875576037, "grad_norm": 0.699441134929657, "learning_rate": 3.162892359054098e-06, "loss": 1.4038, "step": 1318 }, { "epoch": 2.431336405529954, "grad_norm": 0.6973543763160706, "learning_rate": 3.1431355972216697e-06, "loss": 1.3676, "step": 1319 }, { "epoch": 2.4331797235023043, "grad_norm": 0.707459568977356, "learning_rate": 3.1234335093805655e-06, "loss": 1.365, "step": 1320 }, { "epoch": 2.4350230414746545, "grad_norm": 0.6954874992370605, "learning_rate": 3.1037861863804117e-06, "loss": 1.3783, "step": 1321 }, { "epoch": 2.436866359447005, "grad_norm": 0.6847036480903625, "learning_rate": 3.08419371881831e-06, "loss": 1.3485, "step": 1322 }, { "epoch": 2.4387096774193546, "grad_norm": 0.7029310464859009, "learning_rate": 3.0646561970384226e-06, "loss": 1.3705, "step": 1323 }, { "epoch": 2.440552995391705, "grad_norm": 0.6984797120094299, "learning_rate": 3.045173711131537e-06, "loss": 1.3549, "step": 1324 }, { "epoch": 2.442396313364055, "grad_norm": 0.6876455545425415, "learning_rate": 3.02574635093466e-06, "loss": 1.3486, "step": 1325 }, { "epoch": 2.4442396313364054, "grad_norm": 0.6995170712471008, "learning_rate": 3.0063742060306227e-06, "loss": 1.3729, "step": 1326 }, { "epoch": 2.4460829493087557, "grad_norm": 0.7218362092971802, "learning_rate": 2.9870573657476196e-06, "loss": 1.4211, "step": 1327 }, { "epoch": 2.447926267281106, "grad_norm": 0.6894773244857788, "learning_rate": 2.96779591915885e-06, "loss": 1.357, "step": 1328 }, { "epoch": 2.4497695852534562, "grad_norm": 0.7010200619697571, "learning_rate": 2.948589955082085e-06, "loss": 1.4049, "step": 1329 }, { "epoch": 2.4516129032258065, "grad_norm": 0.6923837661743164, "learning_rate": 2.9294395620792306e-06, "loss": 1.3203, "step": 1330 }, { "epoch": 2.4534562211981568, "grad_norm": 0.7083666324615479, "learning_rate": 2.910344828455975e-06, "loss": 1.3523, "step": 1331 }, { "epoch": 2.455299539170507, "grad_norm": 0.6948068141937256, "learning_rate": 2.8913058422613363e-06, "loss": 1.3841, "step": 1332 }, { "epoch": 2.4571428571428573, "grad_norm": 0.7008885741233826, "learning_rate": 2.872322691287268e-06, "loss": 1.3892, "step": 1333 }, { "epoch": 2.4589861751152076, "grad_norm": 0.6987984776496887, "learning_rate": 2.8533954630682728e-06, "loss": 1.4086, "step": 1334 }, { "epoch": 2.460829493087558, "grad_norm": 0.7005396485328674, "learning_rate": 2.834524244880974e-06, "loss": 1.404, "step": 1335 }, { "epoch": 2.4626728110599077, "grad_norm": 0.6974455714225769, "learning_rate": 2.81570912374372e-06, "loss": 1.3851, "step": 1336 }, { "epoch": 2.464516129032258, "grad_norm": 0.6951833367347717, "learning_rate": 2.796950186416199e-06, "loss": 1.3632, "step": 1337 }, { "epoch": 2.466359447004608, "grad_norm": 0.690406084060669, "learning_rate": 2.778247519399011e-06, "loss": 1.3805, "step": 1338 }, { "epoch": 2.4682027649769585, "grad_norm": 0.6913447380065918, "learning_rate": 2.7596012089333015e-06, "loss": 1.3446, "step": 1339 }, { "epoch": 2.4700460829493087, "grad_norm": 0.6822007894515991, "learning_rate": 2.74101134100033e-06, "loss": 1.3662, "step": 1340 }, { "epoch": 2.471889400921659, "grad_norm": 0.6963269114494324, "learning_rate": 2.7224780013210965e-06, "loss": 1.3676, "step": 1341 }, { "epoch": 2.4737327188940093, "grad_norm": 0.6866534948348999, "learning_rate": 2.7040012753559477e-06, "loss": 1.3452, "step": 1342 }, { "epoch": 2.4755760368663595, "grad_norm": 0.686664879322052, "learning_rate": 2.6855812483041555e-06, "loss": 1.4047, "step": 1343 }, { "epoch": 2.47741935483871, "grad_norm": 0.6777949929237366, "learning_rate": 2.667218005103562e-06, "loss": 1.3011, "step": 1344 }, { "epoch": 2.47926267281106, "grad_norm": 0.7019817233085632, "learning_rate": 2.6489116304301697e-06, "loss": 1.3816, "step": 1345 }, { "epoch": 2.4811059907834103, "grad_norm": 0.7036236524581909, "learning_rate": 2.6306622086977288e-06, "loss": 1.3411, "step": 1346 }, { "epoch": 2.48294930875576, "grad_norm": 0.6954577565193176, "learning_rate": 2.6124698240573973e-06, "loss": 1.3279, "step": 1347 }, { "epoch": 2.4847926267281104, "grad_norm": 0.6952524185180664, "learning_rate": 2.59433456039731e-06, "loss": 1.3657, "step": 1348 }, { "epoch": 2.4866359447004607, "grad_norm": 0.6980507373809814, "learning_rate": 2.576256501342206e-06, "loss": 1.4212, "step": 1349 }, { "epoch": 2.488479262672811, "grad_norm": 0.680519163608551, "learning_rate": 2.558235730253057e-06, "loss": 1.36, "step": 1350 }, { "epoch": 2.490322580645161, "grad_norm": 0.7142099142074585, "learning_rate": 2.540272330226658e-06, "loss": 1.3548, "step": 1351 }, { "epoch": 2.4921658986175115, "grad_norm": 0.6899405717849731, "learning_rate": 2.5223663840952584e-06, "loss": 1.3705, "step": 1352 }, { "epoch": 2.4940092165898617, "grad_norm": 0.6969538331031799, "learning_rate": 2.5045179744261864e-06, "loss": 1.3582, "step": 1353 }, { "epoch": 2.495852534562212, "grad_norm": 0.6933202147483826, "learning_rate": 2.486727183521451e-06, "loss": 1.3714, "step": 1354 }, { "epoch": 2.4976958525345623, "grad_norm": 0.712708592414856, "learning_rate": 2.46899409341737e-06, "loss": 1.3558, "step": 1355 }, { "epoch": 2.4995391705069125, "grad_norm": 0.6939406394958496, "learning_rate": 2.451318785884205e-06, "loss": 1.3618, "step": 1356 }, { "epoch": 2.501382488479263, "grad_norm": 0.695871114730835, "learning_rate": 2.4337013424257604e-06, "loss": 1.3456, "step": 1357 }, { "epoch": 2.5032258064516126, "grad_norm": 0.6812525391578674, "learning_rate": 2.416141844279023e-06, "loss": 1.3096, "step": 1358 }, { "epoch": 2.5050691244239633, "grad_norm": 0.6910449862480164, "learning_rate": 2.398640372413792e-06, "loss": 1.3358, "step": 1359 }, { "epoch": 2.506912442396313, "grad_norm": 0.6854673624038696, "learning_rate": 2.38119700753228e-06, "loss": 1.3437, "step": 1360 }, { "epoch": 2.5087557603686634, "grad_norm": 0.7046725749969482, "learning_rate": 2.3638118300687842e-06, "loss": 1.3896, "step": 1361 }, { "epoch": 2.5105990783410137, "grad_norm": 0.7020016312599182, "learning_rate": 2.3464849201892596e-06, "loss": 1.3573, "step": 1362 }, { "epoch": 2.512442396313364, "grad_norm": 0.6948372721672058, "learning_rate": 2.329216357791003e-06, "loss": 1.3689, "step": 1363 }, { "epoch": 2.5142857142857142, "grad_norm": 0.7082976698875427, "learning_rate": 2.3120062225022587e-06, "loss": 1.3785, "step": 1364 }, { "epoch": 2.5161290322580645, "grad_norm": 0.6942178010940552, "learning_rate": 2.294854593681834e-06, "loss": 1.3872, "step": 1365 }, { "epoch": 2.5179723502304148, "grad_norm": 0.689118504524231, "learning_rate": 2.2777615504187787e-06, "loss": 1.3156, "step": 1366 }, { "epoch": 2.519815668202765, "grad_norm": 0.6966586112976074, "learning_rate": 2.260727171531982e-06, "loss": 1.4158, "step": 1367 }, { "epoch": 2.5216589861751153, "grad_norm": 0.6985451579093933, "learning_rate": 2.2437515355698157e-06, "loss": 1.36, "step": 1368 }, { "epoch": 2.5235023041474656, "grad_norm": 0.6879088282585144, "learning_rate": 2.2268347208097954e-06, "loss": 1.3443, "step": 1369 }, { "epoch": 2.525345622119816, "grad_norm": 0.6737207174301147, "learning_rate": 2.2099768052581892e-06, "loss": 1.2475, "step": 1370 }, { "epoch": 2.5271889400921657, "grad_norm": 0.6908534169197083, "learning_rate": 2.1931778666496704e-06, "loss": 1.332, "step": 1371 }, { "epoch": 2.5290322580645164, "grad_norm": 0.6995269060134888, "learning_rate": 2.1764379824469704e-06, "loss": 1.401, "step": 1372 }, { "epoch": 2.530875576036866, "grad_norm": 0.7146071195602417, "learning_rate": 2.1597572298405e-06, "loss": 1.3561, "step": 1373 }, { "epoch": 2.5327188940092165, "grad_norm": 0.6886147856712341, "learning_rate": 2.1431356857480076e-06, "loss": 1.3331, "step": 1374 }, { "epoch": 2.5345622119815667, "grad_norm": 0.7007649540901184, "learning_rate": 2.126573426814226e-06, "loss": 1.3344, "step": 1375 }, { "epoch": 2.536405529953917, "grad_norm": 0.688534677028656, "learning_rate": 2.110070529410508e-06, "loss": 1.3246, "step": 1376 }, { "epoch": 2.5382488479262673, "grad_norm": 0.7089985609054565, "learning_rate": 2.093627069634484e-06, "loss": 1.4401, "step": 1377 }, { "epoch": 2.5400921658986175, "grad_norm": 0.6849905252456665, "learning_rate": 2.0772431233097007e-06, "loss": 1.3394, "step": 1378 }, { "epoch": 2.541935483870968, "grad_norm": 0.6877198815345764, "learning_rate": 2.060918765985288e-06, "loss": 1.3083, "step": 1379 }, { "epoch": 2.543778801843318, "grad_norm": 0.6927348375320435, "learning_rate": 2.044654072935603e-06, "loss": 1.3251, "step": 1380 }, { "epoch": 2.5456221198156683, "grad_norm": 0.6884286999702454, "learning_rate": 2.028449119159862e-06, "loss": 1.3462, "step": 1381 }, { "epoch": 2.5474654377880186, "grad_norm": 0.6815638542175293, "learning_rate": 2.012303979381836e-06, "loss": 1.3374, "step": 1382 }, { "epoch": 2.549308755760369, "grad_norm": 0.7098857760429382, "learning_rate": 1.9962187280494708e-06, "loss": 1.3831, "step": 1383 }, { "epoch": 2.5511520737327187, "grad_norm": 0.69313645362854, "learning_rate": 1.980193439334554e-06, "loss": 1.3521, "step": 1384 }, { "epoch": 2.5529953917050694, "grad_norm": 0.6805469393730164, "learning_rate": 1.9642281871323896e-06, "loss": 1.3119, "step": 1385 }, { "epoch": 2.554838709677419, "grad_norm": 0.6937850713729858, "learning_rate": 1.94832304506143e-06, "loss": 1.3877, "step": 1386 }, { "epoch": 2.5566820276497695, "grad_norm": 0.697150707244873, "learning_rate": 1.9324780864629506e-06, "loss": 1.3359, "step": 1387 }, { "epoch": 2.5585253456221198, "grad_norm": 0.697455644607544, "learning_rate": 1.916693384400722e-06, "loss": 1.3664, "step": 1388 }, { "epoch": 2.56036866359447, "grad_norm": 0.6871203780174255, "learning_rate": 1.9009690116606493e-06, "loss": 1.3733, "step": 1389 }, { "epoch": 2.5622119815668203, "grad_norm": 0.6924582123756409, "learning_rate": 1.8853050407504513e-06, "loss": 1.3542, "step": 1390 }, { "epoch": 2.5640552995391706, "grad_norm": 0.68536776304245, "learning_rate": 1.8697015438993337e-06, "loss": 1.3367, "step": 1391 }, { "epoch": 2.565898617511521, "grad_norm": 0.6952401399612427, "learning_rate": 1.8541585930576338e-06, "loss": 1.365, "step": 1392 }, { "epoch": 2.567741935483871, "grad_norm": 0.6826061010360718, "learning_rate": 1.8386762598965073e-06, "loss": 1.3032, "step": 1393 }, { "epoch": 2.5695852534562214, "grad_norm": 0.7107289433479309, "learning_rate": 1.8232546158075853e-06, "loss": 1.4014, "step": 1394 }, { "epoch": 2.571428571428571, "grad_norm": 0.6925123929977417, "learning_rate": 1.8078937319026655e-06, "loss": 1.3717, "step": 1395 }, { "epoch": 2.573271889400922, "grad_norm": 0.7032476663589478, "learning_rate": 1.7925936790133556e-06, "loss": 1.3851, "step": 1396 }, { "epoch": 2.5751152073732717, "grad_norm": 0.7096735835075378, "learning_rate": 1.7773545276907639e-06, "loss": 1.386, "step": 1397 }, { "epoch": 2.576958525345622, "grad_norm": 0.6902509331703186, "learning_rate": 1.7621763482051827e-06, "loss": 1.3836, "step": 1398 }, { "epoch": 2.5788018433179722, "grad_norm": 0.7009189128875732, "learning_rate": 1.747059210545739e-06, "loss": 1.3622, "step": 1399 }, { "epoch": 2.5806451612903225, "grad_norm": 0.690222442150116, "learning_rate": 1.7320031844200883e-06, "loss": 1.3653, "step": 1400 }, { "epoch": 2.5824884792626728, "grad_norm": 0.6907539963722229, "learning_rate": 1.7170083392540998e-06, "loss": 1.3486, "step": 1401 }, { "epoch": 2.584331797235023, "grad_norm": 0.6997573375701904, "learning_rate": 1.7020747441915184e-06, "loss": 1.307, "step": 1402 }, { "epoch": 2.5861751152073733, "grad_norm": 0.7017333507537842, "learning_rate": 1.687202468093655e-06, "loss": 1.4091, "step": 1403 }, { "epoch": 2.5880184331797236, "grad_norm": 0.6974263787269592, "learning_rate": 1.6723915795390733e-06, "loss": 1.3833, "step": 1404 }, { "epoch": 2.589861751152074, "grad_norm": 0.6967101097106934, "learning_rate": 1.6576421468232627e-06, "loss": 1.3608, "step": 1405 }, { "epoch": 2.591705069124424, "grad_norm": 0.691605269908905, "learning_rate": 1.6429542379583313e-06, "loss": 1.393, "step": 1406 }, { "epoch": 2.5935483870967744, "grad_norm": 0.7080404758453369, "learning_rate": 1.6283279206726964e-06, "loss": 1.4, "step": 1407 }, { "epoch": 2.595391705069124, "grad_norm": 0.6864544153213501, "learning_rate": 1.6137632624107602e-06, "loss": 1.3396, "step": 1408 }, { "epoch": 2.597235023041475, "grad_norm": 0.7036056518554688, "learning_rate": 1.5992603303325997e-06, "loss": 1.3618, "step": 1409 }, { "epoch": 2.5990783410138247, "grad_norm": 0.6970285177230835, "learning_rate": 1.5848191913136757e-06, "loss": 1.3164, "step": 1410 }, { "epoch": 2.600921658986175, "grad_norm": 0.6839280724525452, "learning_rate": 1.5704399119445007e-06, "loss": 1.3821, "step": 1411 }, { "epoch": 2.6027649769585253, "grad_norm": 0.698872983455658, "learning_rate": 1.5561225585303463e-06, "loss": 1.3527, "step": 1412 }, { "epoch": 2.6046082949308755, "grad_norm": 0.6838787794113159, "learning_rate": 1.5418671970909253e-06, "loss": 1.3371, "step": 1413 }, { "epoch": 2.606451612903226, "grad_norm": 0.7075074315071106, "learning_rate": 1.527673893360108e-06, "loss": 1.3556, "step": 1414 }, { "epoch": 2.608294930875576, "grad_norm": 0.7030183672904968, "learning_rate": 1.5135427127855982e-06, "loss": 1.3327, "step": 1415 }, { "epoch": 2.6101382488479263, "grad_norm": 0.7043654322624207, "learning_rate": 1.4994737205286375e-06, "loss": 1.3931, "step": 1416 }, { "epoch": 2.6119815668202766, "grad_norm": 0.7082083225250244, "learning_rate": 1.4854669814637145e-06, "loss": 1.3984, "step": 1417 }, { "epoch": 2.613824884792627, "grad_norm": 0.6881368160247803, "learning_rate": 1.47152256017825e-06, "loss": 1.37, "step": 1418 }, { "epoch": 2.6156682027649767, "grad_norm": 0.7025339603424072, "learning_rate": 1.4576405209723092e-06, "loss": 1.361, "step": 1419 }, { "epoch": 2.6175115207373274, "grad_norm": 0.687637209892273, "learning_rate": 1.4438209278583108e-06, "loss": 1.2804, "step": 1420 }, { "epoch": 2.6193548387096772, "grad_norm": 0.7058582305908203, "learning_rate": 1.4300638445607123e-06, "loss": 1.3675, "step": 1421 }, { "epoch": 2.6211981566820275, "grad_norm": 0.6980953812599182, "learning_rate": 1.4163693345157313e-06, "loss": 1.4115, "step": 1422 }, { "epoch": 2.6230414746543778, "grad_norm": 0.699360191822052, "learning_rate": 1.402737460871057e-06, "loss": 1.3778, "step": 1423 }, { "epoch": 2.624884792626728, "grad_norm": 0.7105777859687805, "learning_rate": 1.3891682864855438e-06, "loss": 1.3363, "step": 1424 }, { "epoch": 2.6267281105990783, "grad_norm": 0.6854103207588196, "learning_rate": 1.3756618739289278e-06, "loss": 1.3269, "step": 1425 }, { "epoch": 2.6285714285714286, "grad_norm": 0.684704065322876, "learning_rate": 1.36221828548155e-06, "loss": 1.3374, "step": 1426 }, { "epoch": 2.630414746543779, "grad_norm": 0.7046616673469543, "learning_rate": 1.3488375831340516e-06, "loss": 1.3667, "step": 1427 }, { "epoch": 2.632258064516129, "grad_norm": 0.6912033557891846, "learning_rate": 1.3355198285870935e-06, "loss": 1.3527, "step": 1428 }, { "epoch": 2.6341013824884794, "grad_norm": 0.6907352209091187, "learning_rate": 1.322265083251074e-06, "loss": 1.3638, "step": 1429 }, { "epoch": 2.6359447004608296, "grad_norm": 0.7130386829376221, "learning_rate": 1.3090734082458562e-06, "loss": 1.3651, "step": 1430 }, { "epoch": 2.63778801843318, "grad_norm": 0.6825953125953674, "learning_rate": 1.2959448644004611e-06, "loss": 1.3334, "step": 1431 }, { "epoch": 2.6396313364055297, "grad_norm": 0.6982609629631042, "learning_rate": 1.2828795122528076e-06, "loss": 1.3908, "step": 1432 }, { "epoch": 2.6414746543778804, "grad_norm": 0.692492663860321, "learning_rate": 1.2698774120494294e-06, "loss": 1.3487, "step": 1433 }, { "epoch": 2.6433179723502302, "grad_norm": 0.6895628571510315, "learning_rate": 1.2569386237451912e-06, "loss": 1.3558, "step": 1434 }, { "epoch": 2.6451612903225805, "grad_norm": 0.6980292201042175, "learning_rate": 1.2440632070030145e-06, "loss": 1.3764, "step": 1435 }, { "epoch": 2.647004608294931, "grad_norm": 0.7029381394386292, "learning_rate": 1.2312512211936105e-06, "loss": 1.3534, "step": 1436 }, { "epoch": 2.648847926267281, "grad_norm": 0.6971340775489807, "learning_rate": 1.2185027253951935e-06, "loss": 1.4055, "step": 1437 }, { "epoch": 2.6506912442396313, "grad_norm": 0.7034549713134766, "learning_rate": 1.2058177783932133e-06, "loss": 1.4045, "step": 1438 }, { "epoch": 2.6525345622119816, "grad_norm": 0.6788062453269958, "learning_rate": 1.1931964386800991e-06, "loss": 1.3686, "step": 1439 }, { "epoch": 2.654377880184332, "grad_norm": 0.6912444829940796, "learning_rate": 1.180638764454955e-06, "loss": 1.3333, "step": 1440 }, { "epoch": 2.656221198156682, "grad_norm": 0.7025635838508606, "learning_rate": 1.1681448136233274e-06, "loss": 1.3486, "step": 1441 }, { "epoch": 2.6580645161290324, "grad_norm": 0.6919708847999573, "learning_rate": 1.1557146437969274e-06, "loss": 1.3332, "step": 1442 }, { "epoch": 2.6599078341013827, "grad_norm": 0.6963950395584106, "learning_rate": 1.143348312293342e-06, "loss": 1.3389, "step": 1443 }, { "epoch": 2.661751152073733, "grad_norm": 0.6864328980445862, "learning_rate": 1.1310458761358057e-06, "loss": 1.3162, "step": 1444 }, { "epoch": 2.6635944700460827, "grad_norm": 0.7061904668807983, "learning_rate": 1.1188073920529202e-06, "loss": 1.4415, "step": 1445 }, { "epoch": 2.6654377880184335, "grad_norm": 0.6894161701202393, "learning_rate": 1.106632916478385e-06, "loss": 1.3559, "step": 1446 }, { "epoch": 2.6672811059907833, "grad_norm": 0.7051087021827698, "learning_rate": 1.0945225055507523e-06, "loss": 1.3591, "step": 1447 }, { "epoch": 2.6691244239631335, "grad_norm": 0.7041983008384705, "learning_rate": 1.0824762151131539e-06, "loss": 1.4266, "step": 1448 }, { "epoch": 2.670967741935484, "grad_norm": 0.701495885848999, "learning_rate": 1.0704941007130615e-06, "loss": 1.394, "step": 1449 }, { "epoch": 2.672811059907834, "grad_norm": 0.7057127356529236, "learning_rate": 1.0585762176020148e-06, "loss": 1.3658, "step": 1450 }, { "epoch": 2.6746543778801843, "grad_norm": 0.715721070766449, "learning_rate": 1.0467226207353675e-06, "loss": 1.4245, "step": 1451 }, { "epoch": 2.6764976958525346, "grad_norm": 0.7003916501998901, "learning_rate": 1.0349333647720506e-06, "loss": 1.3663, "step": 1452 }, { "epoch": 2.678341013824885, "grad_norm": 0.7034760117530823, "learning_rate": 1.0232085040742983e-06, "loss": 1.3387, "step": 1453 }, { "epoch": 2.680184331797235, "grad_norm": 0.7046371102333069, "learning_rate": 1.0115480927074084e-06, "loss": 1.3737, "step": 1454 }, { "epoch": 2.6820276497695854, "grad_norm": 0.701204776763916, "learning_rate": 9.999521844394989e-07, "loss": 1.383, "step": 1455 }, { "epoch": 2.6838709677419352, "grad_norm": 0.7004807591438293, "learning_rate": 9.884208327412458e-07, "loss": 1.3361, "step": 1456 }, { "epoch": 2.685714285714286, "grad_norm": 0.6986050605773926, "learning_rate": 9.769540907856472e-07, "loss": 1.3065, "step": 1457 }, { "epoch": 2.6875576036866358, "grad_norm": 0.702582597732544, "learning_rate": 9.655520114477772e-07, "loss": 1.4066, "step": 1458 }, { "epoch": 2.689400921658986, "grad_norm": 0.7070892453193665, "learning_rate": 9.542146473045304e-07, "loss": 1.3791, "step": 1459 }, { "epoch": 2.6912442396313363, "grad_norm": 0.6917366981506348, "learning_rate": 9.429420506343983e-07, "loss": 1.3098, "step": 1460 }, { "epoch": 2.6930875576036866, "grad_norm": 0.7119744420051575, "learning_rate": 9.317342734172213e-07, "loss": 1.3786, "step": 1461 }, { "epoch": 2.694930875576037, "grad_norm": 0.700971245765686, "learning_rate": 9.205913673339322e-07, "loss": 1.4132, "step": 1462 }, { "epoch": 2.696774193548387, "grad_norm": 0.7082599401473999, "learning_rate": 9.09513383766345e-07, "loss": 1.3717, "step": 1463 }, { "epoch": 2.6986175115207374, "grad_norm": 0.7001616358757019, "learning_rate": 8.985003737969011e-07, "loss": 1.4024, "step": 1464 }, { "epoch": 2.7004608294930876, "grad_norm": 0.7055609822273254, "learning_rate": 8.875523882084352e-07, "loss": 1.3241, "step": 1465 }, { "epoch": 2.702304147465438, "grad_norm": 0.6964618563652039, "learning_rate": 8.766694774839484e-07, "loss": 1.3672, "step": 1466 }, { "epoch": 2.704147465437788, "grad_norm": 0.709379255771637, "learning_rate": 8.658516918063652e-07, "loss": 1.3565, "step": 1467 }, { "epoch": 2.7059907834101384, "grad_norm": 0.6808931231498718, "learning_rate": 8.550990810583137e-07, "loss": 1.2817, "step": 1468 }, { "epoch": 2.7078341013824883, "grad_norm": 0.7060760855674744, "learning_rate": 8.444116948218855e-07, "loss": 1.4355, "step": 1469 }, { "epoch": 2.709677419354839, "grad_norm": 0.6960509419441223, "learning_rate": 8.337895823784097e-07, "loss": 1.3455, "step": 1470 }, { "epoch": 2.711520737327189, "grad_norm": 0.700712263584137, "learning_rate": 8.232327927082328e-07, "loss": 1.3898, "step": 1471 }, { "epoch": 2.713364055299539, "grad_norm": 0.7388414740562439, "learning_rate": 8.127413744904805e-07, "loss": 1.4364, "step": 1472 }, { "epoch": 2.7152073732718893, "grad_norm": 0.6935340166091919, "learning_rate": 8.023153761028412e-07, "loss": 1.3523, "step": 1473 }, { "epoch": 2.7170506912442396, "grad_norm": 0.6816375255584717, "learning_rate": 7.919548456213516e-07, "loss": 1.3601, "step": 1474 }, { "epoch": 2.71889400921659, "grad_norm": 0.695892333984375, "learning_rate": 7.816598308201428e-07, "loss": 1.3387, "step": 1475 }, { "epoch": 2.72073732718894, "grad_norm": 0.70052570104599, "learning_rate": 7.714303791712646e-07, "loss": 1.3647, "step": 1476 }, { "epoch": 2.7225806451612904, "grad_norm": 0.6881222128868103, "learning_rate": 7.612665378444367e-07, "loss": 1.3206, "step": 1477 }, { "epoch": 2.7244239631336407, "grad_norm": 0.6922666430473328, "learning_rate": 7.511683537068293e-07, "loss": 1.3773, "step": 1478 }, { "epoch": 2.726267281105991, "grad_norm": 0.6927684545516968, "learning_rate": 7.411358733228679e-07, "loss": 1.3665, "step": 1479 }, { "epoch": 2.7281105990783407, "grad_norm": 0.6982271075248718, "learning_rate": 7.311691429540058e-07, "loss": 1.3143, "step": 1480 }, { "epoch": 2.7299539170506915, "grad_norm": 0.702158510684967, "learning_rate": 7.212682085585032e-07, "loss": 1.3361, "step": 1481 }, { "epoch": 2.7317972350230413, "grad_norm": 0.6927374601364136, "learning_rate": 7.114331157912351e-07, "loss": 1.3222, "step": 1482 }, { "epoch": 2.7336405529953915, "grad_norm": 0.7043153047561646, "learning_rate": 7.016639100034627e-07, "loss": 1.3362, "step": 1483 }, { "epoch": 2.735483870967742, "grad_norm": 0.7088083028793335, "learning_rate": 6.919606362426301e-07, "loss": 1.3584, "step": 1484 }, { "epoch": 2.737327188940092, "grad_norm": 0.6986879706382751, "learning_rate": 6.82323339252166e-07, "loss": 1.3863, "step": 1485 }, { "epoch": 2.7391705069124423, "grad_norm": 0.6907955408096313, "learning_rate": 6.727520634712614e-07, "loss": 1.3572, "step": 1486 }, { "epoch": 2.7410138248847926, "grad_norm": 0.7046393156051636, "learning_rate": 6.632468530346736e-07, "loss": 1.2698, "step": 1487 }, { "epoch": 2.742857142857143, "grad_norm": 0.7117356657981873, "learning_rate": 6.538077517725255e-07, "loss": 1.3471, "step": 1488 }, { "epoch": 2.744700460829493, "grad_norm": 0.7022505402565002, "learning_rate": 6.444348032100955e-07, "loss": 1.3618, "step": 1489 }, { "epoch": 2.7465437788018434, "grad_norm": 0.687411367893219, "learning_rate": 6.351280505676227e-07, "loss": 1.2833, "step": 1490 }, { "epoch": 2.7483870967741937, "grad_norm": 0.688880205154419, "learning_rate": 6.258875367601052e-07, "loss": 1.3006, "step": 1491 }, { "epoch": 2.750230414746544, "grad_norm": 0.7116846442222595, "learning_rate": 6.167133043971024e-07, "loss": 1.3675, "step": 1492 }, { "epoch": 2.7520737327188938, "grad_norm": 0.6968063712120056, "learning_rate": 6.076053957825411e-07, "loss": 1.3476, "step": 1493 }, { "epoch": 2.7539170506912445, "grad_norm": 0.6912758350372314, "learning_rate": 5.985638529145115e-07, "loss": 1.3567, "step": 1494 }, { "epoch": 2.7557603686635943, "grad_norm": 0.691801905632019, "learning_rate": 5.895887174850866e-07, "loss": 1.3547, "step": 1495 }, { "epoch": 2.7576036866359446, "grad_norm": 0.6897842288017273, "learning_rate": 5.80680030880128e-07, "loss": 1.3168, "step": 1496 }, { "epoch": 2.759447004608295, "grad_norm": 0.7044353485107422, "learning_rate": 5.718378341790754e-07, "loss": 1.4228, "step": 1497 }, { "epoch": 2.761290322580645, "grad_norm": 0.684937596321106, "learning_rate": 5.630621681547871e-07, "loss": 1.3786, "step": 1498 }, { "epoch": 2.7631336405529954, "grad_norm": 0.7006590962409973, "learning_rate": 5.543530732733304e-07, "loss": 1.3824, "step": 1499 }, { "epoch": 2.7649769585253456, "grad_norm": 0.6988726854324341, "learning_rate": 5.457105896937997e-07, "loss": 1.3677, "step": 1500 }, { "epoch": 2.766820276497696, "grad_norm": 0.7169519662857056, "learning_rate": 5.371347572681434e-07, "loss": 1.3585, "step": 1501 }, { "epoch": 2.768663594470046, "grad_norm": 0.69769686460495, "learning_rate": 5.286256155409607e-07, "loss": 1.386, "step": 1502 }, { "epoch": 2.7705069124423964, "grad_norm": 0.7092714905738831, "learning_rate": 5.201832037493304e-07, "loss": 1.3351, "step": 1503 }, { "epoch": 2.7723502304147467, "grad_norm": 0.7064914107322693, "learning_rate": 5.118075608226335e-07, "loss": 1.3447, "step": 1504 }, { "epoch": 2.774193548387097, "grad_norm": 0.7054774761199951, "learning_rate": 5.034987253823614e-07, "loss": 1.4279, "step": 1505 }, { "epoch": 2.776036866359447, "grad_norm": 0.7050322890281677, "learning_rate": 4.952567357419496e-07, "loss": 1.314, "step": 1506 }, { "epoch": 2.7778801843317975, "grad_norm": 0.7061877846717834, "learning_rate": 4.870816299065956e-07, "loss": 1.3862, "step": 1507 }, { "epoch": 2.7797235023041473, "grad_norm": 0.6898355484008789, "learning_rate": 4.789734455730848e-07, "loss": 1.3467, "step": 1508 }, { "epoch": 2.7815668202764976, "grad_norm": 0.7052949070930481, "learning_rate": 4.709322201296168e-07, "loss": 1.382, "step": 1509 }, { "epoch": 2.783410138248848, "grad_norm": 0.7071539163589478, "learning_rate": 4.629579906556258e-07, "loss": 1.4003, "step": 1510 }, { "epoch": 2.785253456221198, "grad_norm": 0.6983172297477722, "learning_rate": 4.5505079392162696e-07, "loss": 1.3376, "step": 1511 }, { "epoch": 2.7870967741935484, "grad_norm": 0.7000498175621033, "learning_rate": 4.4721066638903405e-07, "loss": 1.3458, "step": 1512 }, { "epoch": 2.7889400921658987, "grad_norm": 0.6864968538284302, "learning_rate": 4.3943764420998344e-07, "loss": 1.3266, "step": 1513 }, { "epoch": 2.790783410138249, "grad_norm": 0.6955534815788269, "learning_rate": 4.317317632271889e-07, "loss": 1.4015, "step": 1514 }, { "epoch": 2.792626728110599, "grad_norm": 0.7089243531227112, "learning_rate": 4.2409305897376015e-07, "loss": 1.4046, "step": 1515 }, { "epoch": 2.7944700460829495, "grad_norm": 0.6945240497589111, "learning_rate": 4.165215666730415e-07, "loss": 1.3353, "step": 1516 }, { "epoch": 2.7963133640552993, "grad_norm": 0.6899064779281616, "learning_rate": 4.090173212384601e-07, "loss": 1.3701, "step": 1517 }, { "epoch": 2.79815668202765, "grad_norm": 0.6936092376708984, "learning_rate": 4.015803572733462e-07, "loss": 1.339, "step": 1518 }, { "epoch": 2.8, "grad_norm": 0.7209426164627075, "learning_rate": 3.9421070907078973e-07, "loss": 1.4274, "step": 1519 }, { "epoch": 2.80184331797235, "grad_norm": 0.6927912831306458, "learning_rate": 3.869084106134757e-07, "loss": 1.355, "step": 1520 }, { "epoch": 2.8036866359447004, "grad_norm": 0.6925110220909119, "learning_rate": 3.796734955735276e-07, "loss": 1.3308, "step": 1521 }, { "epoch": 2.8055299539170506, "grad_norm": 0.7080537676811218, "learning_rate": 3.725059973123507e-07, "loss": 1.3336, "step": 1522 }, { "epoch": 2.807373271889401, "grad_norm": 0.7040824890136719, "learning_rate": 3.654059488804856e-07, "loss": 1.3605, "step": 1523 }, { "epoch": 2.809216589861751, "grad_norm": 0.702531099319458, "learning_rate": 3.5837338301744516e-07, "loss": 1.3711, "step": 1524 }, { "epoch": 2.8110599078341014, "grad_norm": 0.7056758403778076, "learning_rate": 3.5140833215157097e-07, "loss": 1.3783, "step": 1525 }, { "epoch": 2.8129032258064517, "grad_norm": 0.7163128852844238, "learning_rate": 3.445108283998805e-07, "loss": 1.3565, "step": 1526 }, { "epoch": 2.814746543778802, "grad_norm": 0.7012839913368225, "learning_rate": 3.376809035679218e-07, "loss": 1.3606, "step": 1527 }, { "epoch": 2.8165898617511522, "grad_norm": 0.7015247941017151, "learning_rate": 3.3091858914962415e-07, "loss": 1.4073, "step": 1528 }, { "epoch": 2.8184331797235025, "grad_norm": 0.7001134753227234, "learning_rate": 3.2422391632715265e-07, "loss": 1.3873, "step": 1529 }, { "epoch": 2.8202764976958523, "grad_norm": 0.6959702372550964, "learning_rate": 3.1759691597076865e-07, "loss": 1.3719, "step": 1530 }, { "epoch": 2.822119815668203, "grad_norm": 0.6868043541908264, "learning_rate": 3.1103761863868486e-07, "loss": 1.3608, "step": 1531 }, { "epoch": 2.823963133640553, "grad_norm": 0.7002357840538025, "learning_rate": 3.045460545769152e-07, "loss": 1.3704, "step": 1532 }, { "epoch": 2.825806451612903, "grad_norm": 0.7262450456619263, "learning_rate": 2.981222537191586e-07, "loss": 1.3785, "step": 1533 }, { "epoch": 2.8276497695852534, "grad_norm": 0.6923169493675232, "learning_rate": 2.9176624568663377e-07, "loss": 1.2939, "step": 1534 }, { "epoch": 2.8294930875576036, "grad_norm": 0.6932117938995361, "learning_rate": 2.854780597879614e-07, "loss": 1.3075, "step": 1535 }, { "epoch": 2.831336405529954, "grad_norm": 0.6815235614776611, "learning_rate": 2.792577250190237e-07, "loss": 1.3348, "step": 1536 }, { "epoch": 2.833179723502304, "grad_norm": 0.7099239230155945, "learning_rate": 2.7310527006282505e-07, "loss": 1.3416, "step": 1537 }, { "epoch": 2.8350230414746544, "grad_norm": 0.7015982866287231, "learning_rate": 2.670207232893684e-07, "loss": 1.3327, "step": 1538 }, { "epoch": 2.8368663594470047, "grad_norm": 0.697188675403595, "learning_rate": 2.610041127555224e-07, "loss": 1.3458, "step": 1539 }, { "epoch": 2.838709677419355, "grad_norm": 0.7074711322784424, "learning_rate": 2.5505546620488597e-07, "loss": 1.3631, "step": 1540 }, { "epoch": 2.840552995391705, "grad_norm": 0.7083696126937866, "learning_rate": 2.4917481106766394e-07, "loss": 1.4529, "step": 1541 }, { "epoch": 2.8423963133640555, "grad_norm": 0.68403559923172, "learning_rate": 2.433621744605502e-07, "loss": 1.3884, "step": 1542 }, { "epoch": 2.8442396313364053, "grad_norm": 0.6853194236755371, "learning_rate": 2.3761758318658121e-07, "loss": 1.3071, "step": 1543 }, { "epoch": 2.8460829493087556, "grad_norm": 0.680692732334137, "learning_rate": 2.3194106373503443e-07, "loss": 1.3108, "step": 1544 }, { "epoch": 2.847926267281106, "grad_norm": 0.6971629858016968, "learning_rate": 2.2633264228129336e-07, "loss": 1.4166, "step": 1545 }, { "epoch": 2.849769585253456, "grad_norm": 0.7039859294891357, "learning_rate": 2.2079234468672938e-07, "loss": 1.4244, "step": 1546 }, { "epoch": 2.8516129032258064, "grad_norm": 0.6912335753440857, "learning_rate": 2.1532019649858513e-07, "loss": 1.293, "step": 1547 }, { "epoch": 2.8534562211981567, "grad_norm": 0.6962488889694214, "learning_rate": 2.0991622294985303e-07, "loss": 1.3406, "step": 1548 }, { "epoch": 2.855299539170507, "grad_norm": 0.6975343823432922, "learning_rate": 2.0458044895916516e-07, "loss": 1.3552, "step": 1549 }, { "epoch": 2.857142857142857, "grad_norm": 0.6945282220840454, "learning_rate": 1.9931289913066697e-07, "loss": 1.4044, "step": 1550 }, { "epoch": 2.8589861751152075, "grad_norm": 0.6886034607887268, "learning_rate": 1.9411359775391547e-07, "loss": 1.3211, "step": 1551 }, { "epoch": 2.8608294930875577, "grad_norm": 0.6891710162162781, "learning_rate": 1.8898256880376273e-07, "loss": 1.3942, "step": 1552 }, { "epoch": 2.862672811059908, "grad_norm": 0.7018667459487915, "learning_rate": 1.8391983594024443e-07, "loss": 1.3271, "step": 1553 }, { "epoch": 2.864516129032258, "grad_norm": 0.7080690264701843, "learning_rate": 1.7892542250846966e-07, "loss": 1.4262, "step": 1554 }, { "epoch": 2.8663594470046085, "grad_norm": 0.685600757598877, "learning_rate": 1.7399935153851798e-07, "loss": 1.3658, "step": 1555 }, { "epoch": 2.8682027649769584, "grad_norm": 0.694267213344574, "learning_rate": 1.691416457453293e-07, "loss": 1.4044, "step": 1556 }, { "epoch": 2.8700460829493086, "grad_norm": 0.7029085159301758, "learning_rate": 1.6435232752860074e-07, "loss": 1.3389, "step": 1557 }, { "epoch": 2.871889400921659, "grad_norm": 0.6980891823768616, "learning_rate": 1.5963141897267998e-07, "loss": 1.3445, "step": 1558 }, { "epoch": 2.873732718894009, "grad_norm": 0.7031422853469849, "learning_rate": 1.5497894184647033e-07, "loss": 1.3813, "step": 1559 }, { "epoch": 2.8755760368663594, "grad_norm": 0.6972190737724304, "learning_rate": 1.503949176033259e-07, "loss": 1.3673, "step": 1560 }, { "epoch": 2.8774193548387097, "grad_norm": 0.7183608412742615, "learning_rate": 1.4587936738094665e-07, "loss": 1.3809, "step": 1561 }, { "epoch": 2.87926267281106, "grad_norm": 0.6923873424530029, "learning_rate": 1.4143231200129835e-07, "loss": 1.3715, "step": 1562 }, { "epoch": 2.8811059907834102, "grad_norm": 0.6907538771629333, "learning_rate": 1.3705377197049617e-07, "loss": 1.3458, "step": 1563 }, { "epoch": 2.8829493087557605, "grad_norm": 0.7084501385688782, "learning_rate": 1.327437674787213e-07, "loss": 1.3406, "step": 1564 }, { "epoch": 2.8847926267281108, "grad_norm": 0.70960932970047, "learning_rate": 1.285023184001327e-07, "loss": 1.3623, "step": 1565 }, { "epoch": 2.886635944700461, "grad_norm": 0.7083925604820251, "learning_rate": 1.2432944429275894e-07, "loss": 1.4078, "step": 1566 }, { "epoch": 2.888479262672811, "grad_norm": 0.7167489528656006, "learning_rate": 1.2022516439842478e-07, "loss": 1.3787, "step": 1567 }, { "epoch": 2.8903225806451616, "grad_norm": 0.6991795897483826, "learning_rate": 1.1618949764265474e-07, "loss": 1.35, "step": 1568 }, { "epoch": 2.8921658986175114, "grad_norm": 0.6887746453285217, "learning_rate": 1.1222246263458469e-07, "loss": 1.3871, "step": 1569 }, { "epoch": 2.8940092165898617, "grad_norm": 0.7072399258613586, "learning_rate": 1.0832407766687535e-07, "loss": 1.441, "step": 1570 }, { "epoch": 2.895852534562212, "grad_norm": 0.7020343542098999, "learning_rate": 1.04494360715639e-07, "loss": 1.3935, "step": 1571 }, { "epoch": 2.897695852534562, "grad_norm": 0.6989784836769104, "learning_rate": 1.0073332944034119e-07, "loss": 1.3168, "step": 1572 }, { "epoch": 2.8995391705069125, "grad_norm": 0.6937752962112427, "learning_rate": 9.704100118372583e-08, "loss": 1.3756, "step": 1573 }, { "epoch": 2.9013824884792627, "grad_norm": 0.6995283365249634, "learning_rate": 9.34173929717419e-08, "loss": 1.3757, "step": 1574 }, { "epoch": 2.903225806451613, "grad_norm": 0.7166629433631897, "learning_rate": 8.986252151345353e-08, "loss": 1.4135, "step": 1575 }, { "epoch": 2.9050691244239633, "grad_norm": 0.6960161328315735, "learning_rate": 8.63764032009684e-08, "loss": 1.3557, "step": 1576 }, { "epoch": 2.9069124423963135, "grad_norm": 0.706387996673584, "learning_rate": 8.295905410936277e-08, "loss": 1.361, "step": 1577 }, { "epoch": 2.9087557603686633, "grad_norm": 0.7042820453643799, "learning_rate": 7.961048999660991e-08, "loss": 1.3768, "step": 1578 }, { "epoch": 2.910599078341014, "grad_norm": 0.689640462398529, "learning_rate": 7.63307263034968e-08, "loss": 1.342, "step": 1579 }, { "epoch": 2.912442396313364, "grad_norm": 0.6936720013618469, "learning_rate": 7.311977815356585e-08, "loss": 1.3707, "step": 1580 }, { "epoch": 2.914285714285714, "grad_norm": 0.6979496479034424, "learning_rate": 6.997766035303832e-08, "loss": 1.3504, "step": 1581 }, { "epoch": 2.9161290322580644, "grad_norm": 0.703223705291748, "learning_rate": 6.690438739074767e-08, "loss": 1.3933, "step": 1582 }, { "epoch": 2.9179723502304147, "grad_norm": 0.7049776315689087, "learning_rate": 6.389997343806797e-08, "loss": 1.364, "step": 1583 }, { "epoch": 2.919815668202765, "grad_norm": 0.7012540698051453, "learning_rate": 6.096443234885729e-08, "loss": 1.3237, "step": 1584 }, { "epoch": 2.921658986175115, "grad_norm": 0.7004204988479614, "learning_rate": 5.809777765939106e-08, "loss": 1.323, "step": 1585 }, { "epoch": 2.9235023041474655, "grad_norm": 0.6783429384231567, "learning_rate": 5.530002258829048e-08, "loss": 1.3122, "step": 1586 }, { "epoch": 2.9253456221198157, "grad_norm": 0.6861110329627991, "learning_rate": 5.257118003647754e-08, "loss": 1.305, "step": 1587 }, { "epoch": 2.927188940092166, "grad_norm": 0.688933253288269, "learning_rate": 4.991126258710177e-08, "loss": 1.3666, "step": 1588 }, { "epoch": 2.9290322580645163, "grad_norm": 0.6797176003456116, "learning_rate": 4.732028250548692e-08, "loss": 1.3484, "step": 1589 }, { "epoch": 2.9308755760368665, "grad_norm": 0.6951290965080261, "learning_rate": 4.479825173908103e-08, "loss": 1.3556, "step": 1590 }, { "epoch": 2.9327188940092164, "grad_norm": 0.6862546801567078, "learning_rate": 4.234518191738645e-08, "loss": 1.3306, "step": 1591 }, { "epoch": 2.934562211981567, "grad_norm": 0.7084790468215942, "learning_rate": 3.996108435192325e-08, "loss": 1.3896, "step": 1592 }, { "epoch": 2.936405529953917, "grad_norm": 0.6970670819282532, "learning_rate": 3.764597003616421e-08, "loss": 1.331, "step": 1593 }, { "epoch": 2.938248847926267, "grad_norm": 0.7036964893341064, "learning_rate": 3.539984964548826e-08, "loss": 1.3725, "step": 1594 }, { "epoch": 2.9400921658986174, "grad_norm": 0.7034350037574768, "learning_rate": 3.322273353713712e-08, "loss": 1.3939, "step": 1595 }, { "epoch": 2.9419354838709677, "grad_norm": 0.6966269016265869, "learning_rate": 3.111463175015539e-08, "loss": 1.3982, "step": 1596 }, { "epoch": 2.943778801843318, "grad_norm": 0.7027255296707153, "learning_rate": 2.907555400535389e-08, "loss": 1.3802, "step": 1597 }, { "epoch": 2.9456221198156682, "grad_norm": 0.7093203067779541, "learning_rate": 2.710550970526471e-08, "loss": 1.3544, "step": 1598 }, { "epoch": 2.9474654377880185, "grad_norm": 0.6913772225379944, "learning_rate": 2.5204507934091236e-08, "loss": 1.3289, "step": 1599 }, { "epoch": 2.9493087557603688, "grad_norm": 0.7050530314445496, "learning_rate": 2.3372557457673194e-08, "loss": 1.4242, "step": 1600 }, { "epoch": 2.951152073732719, "grad_norm": 0.6849048733711243, "learning_rate": 2.1609666723438336e-08, "loss": 1.3314, "step": 1601 }, { "epoch": 2.952995391705069, "grad_norm": 0.7082017064094543, "learning_rate": 1.991584386037415e-08, "loss": 1.3737, "step": 1602 }, { "epoch": 2.9548387096774196, "grad_norm": 0.7023243308067322, "learning_rate": 1.8291096678982877e-08, "loss": 1.3456, "step": 1603 }, { "epoch": 2.9566820276497694, "grad_norm": 0.6966132521629333, "learning_rate": 1.6735432671243223e-08, "loss": 1.3684, "step": 1604 }, { "epoch": 2.9585253456221197, "grad_norm": 0.6970471143722534, "learning_rate": 1.524885901058537e-08, "loss": 1.3946, "step": 1605 }, { "epoch": 2.96036866359447, "grad_norm": 0.7002370953559875, "learning_rate": 1.3831382551849348e-08, "loss": 1.3774, "step": 1606 }, { "epoch": 2.96221198156682, "grad_norm": 0.6900573372840881, "learning_rate": 1.248300983125672e-08, "loss": 1.336, "step": 1607 }, { "epoch": 2.9640552995391705, "grad_norm": 0.6997274160385132, "learning_rate": 1.120374706638061e-08, "loss": 1.4003, "step": 1608 }, { "epoch": 2.9658986175115207, "grad_norm": 0.6997906565666199, "learning_rate": 9.993600156117389e-09, "loss": 1.3516, "step": 1609 }, { "epoch": 2.967741935483871, "grad_norm": 0.690923810005188, "learning_rate": 8.852574680655035e-09, "loss": 1.3605, "step": 1610 }, { "epoch": 2.9695852534562213, "grad_norm": 0.7004004120826721, "learning_rate": 7.780675901454815e-09, "loss": 1.382, "step": 1611 }, { "epoch": 2.9714285714285715, "grad_norm": 0.6857348084449768, "learning_rate": 6.7779087612229726e-09, "loss": 1.3393, "step": 1612 }, { "epoch": 2.973271889400922, "grad_norm": 0.6932818293571472, "learning_rate": 5.844277883884086e-09, "loss": 1.361, "step": 1613 }, { "epoch": 2.975115207373272, "grad_norm": 0.687596321105957, "learning_rate": 4.979787574569406e-09, "loss": 1.3328, "step": 1614 }, { "epoch": 2.976958525345622, "grad_norm": 0.6965142488479614, "learning_rate": 4.184441819588547e-09, "loss": 1.3877, "step": 1615 }, { "epoch": 2.9788018433179726, "grad_norm": 0.6901707053184509, "learning_rate": 3.4582442864145026e-09, "loss": 1.3458, "step": 1616 }, { "epoch": 2.9806451612903224, "grad_norm": 0.6885871887207031, "learning_rate": 2.8011983236636563e-09, "loss": 1.3286, "step": 1617 }, { "epoch": 2.9824884792626727, "grad_norm": 0.7070132493972778, "learning_rate": 2.2133069610874577e-09, "loss": 1.41, "step": 1618 }, { "epoch": 2.984331797235023, "grad_norm": 0.689439594745636, "learning_rate": 1.6945729095507733e-09, "loss": 1.3779, "step": 1619 }, { "epoch": 2.986175115207373, "grad_norm": 0.691943883895874, "learning_rate": 1.2449985610235582e-09, "loss": 1.3485, "step": 1620 }, { "epoch": 2.9880184331797235, "grad_norm": 0.7042137980461121, "learning_rate": 8.645859885675345e-10, "loss": 1.3502, "step": 1621 }, { "epoch": 2.9898617511520738, "grad_norm": 0.6836023926734924, "learning_rate": 5.533369463311954e-10, "loss": 1.3233, "step": 1622 }, { "epoch": 2.991705069124424, "grad_norm": 0.7048534154891968, "learning_rate": 3.1125286953481715e-10, "loss": 1.3722, "step": 1623 }, { "epoch": 2.9935483870967743, "grad_norm": 0.7097730040550232, "learning_rate": 1.3833487446712757e-10, "loss": 1.4248, "step": 1624 }, { "epoch": 2.9953917050691246, "grad_norm": 0.708852231502533, "learning_rate": 3.4583758485307036e-11, "loss": 1.3838, "step": 1625 }, { "epoch": 2.9972350230414744, "grad_norm": 0.7059180736541748, "learning_rate": 0.0, "loss": 1.4033, "step": 1626 }, { "epoch": 2.9972350230414744, "step": 1626, "total_flos": 2.0909964885196014e+19, "train_loss": 1.494101381683115, "train_runtime": 42851.9197, "train_samples_per_second": 1.215, "train_steps_per_second": 0.038 } ], "logging_steps": 1.0, "max_steps": 1626, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0909964885196014e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }