{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1182, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008460236886632825, "grad_norm": 4.550535678863525, "learning_rate": 0.0, "loss": 1.2052, "step": 1 }, { "epoch": 0.001692047377326565, "grad_norm": 4.09000301361084, "learning_rate": 1.3888888888888888e-07, "loss": 1.0467, "step": 2 }, { "epoch": 0.0025380710659898475, "grad_norm": 4.39274787902832, "learning_rate": 2.7777777777777776e-07, "loss": 1.1306, "step": 3 }, { "epoch": 0.00338409475465313, "grad_norm": 4.3457722663879395, "learning_rate": 4.1666666666666667e-07, "loss": 1.0388, "step": 4 }, { "epoch": 0.004230118443316413, "grad_norm": 4.247500419616699, "learning_rate": 5.555555555555555e-07, "loss": 1.1666, "step": 5 }, { "epoch": 0.005076142131979695, "grad_norm": 4.16987943649292, "learning_rate": 6.944444444444446e-07, "loss": 1.1084, "step": 6 }, { "epoch": 0.005922165820642978, "grad_norm": 3.504650592803955, "learning_rate": 8.333333333333333e-07, "loss": 0.8725, "step": 7 }, { "epoch": 0.00676818950930626, "grad_norm": 4.1873297691345215, "learning_rate": 9.722222222222224e-07, "loss": 1.129, "step": 8 }, { "epoch": 0.007614213197969543, "grad_norm": 4.281223773956299, "learning_rate": 1.111111111111111e-06, "loss": 1.0329, "step": 9 }, { "epoch": 0.008460236886632826, "grad_norm": 3.8109793663024902, "learning_rate": 1.25e-06, "loss": 1.0024, "step": 10 }, { "epoch": 0.009306260575296108, "grad_norm": 4.020341873168945, "learning_rate": 1.3888888888888892e-06, "loss": 1.054, "step": 11 }, { "epoch": 0.01015228426395939, "grad_norm": 3.5737178325653076, "learning_rate": 1.527777777777778e-06, "loss": 1.0726, "step": 12 }, { "epoch": 0.010998307952622674, "grad_norm": 3.3822622299194336, "learning_rate": 1.6666666666666667e-06, "loss": 0.9914, "step": 13 }, { "epoch": 0.011844331641285956, "grad_norm": 3.4175751209259033, "learning_rate": 1.8055555555555557e-06, "loss": 1.1581, "step": 14 }, { "epoch": 0.012690355329949238, "grad_norm": 3.0857129096984863, "learning_rate": 1.944444444444445e-06, "loss": 0.9684, "step": 15 }, { "epoch": 0.01353637901861252, "grad_norm": 3.017608165740967, "learning_rate": 2.0833333333333334e-06, "loss": 1.0433, "step": 16 }, { "epoch": 0.014382402707275803, "grad_norm": 3.069457530975342, "learning_rate": 2.222222222222222e-06, "loss": 1.0798, "step": 17 }, { "epoch": 0.015228426395939087, "grad_norm": 2.6652209758758545, "learning_rate": 2.361111111111111e-06, "loss": 0.9204, "step": 18 }, { "epoch": 0.016074450084602367, "grad_norm": 2.924373149871826, "learning_rate": 2.5e-06, "loss": 1.0251, "step": 19 }, { "epoch": 0.01692047377326565, "grad_norm": 2.1823043823242188, "learning_rate": 2.6388888888888893e-06, "loss": 0.8016, "step": 20 }, { "epoch": 0.017766497461928935, "grad_norm": 2.238309860229492, "learning_rate": 2.7777777777777783e-06, "loss": 0.8186, "step": 21 }, { "epoch": 0.018612521150592216, "grad_norm": 2.507589817047119, "learning_rate": 2.916666666666667e-06, "loss": 0.9278, "step": 22 }, { "epoch": 0.0194585448392555, "grad_norm": 2.268179416656494, "learning_rate": 3.055555555555556e-06, "loss": 0.8594, "step": 23 }, { "epoch": 0.02030456852791878, "grad_norm": 2.1308953762054443, "learning_rate": 3.1944444444444443e-06, "loss": 0.9286, "step": 24 }, { "epoch": 0.021150592216582064, "grad_norm": 2.1231722831726074, "learning_rate": 3.3333333333333333e-06, "loss": 0.7292, "step": 25 }, { "epoch": 0.021996615905245348, "grad_norm": 2.203334331512451, "learning_rate": 3.4722222222222224e-06, "loss": 0.806, "step": 26 }, { "epoch": 0.02284263959390863, "grad_norm": 2.161112070083618, "learning_rate": 3.6111111111111115e-06, "loss": 0.8608, "step": 27 }, { "epoch": 0.023688663282571912, "grad_norm": 2.1429355144500732, "learning_rate": 3.7500000000000005e-06, "loss": 0.7729, "step": 28 }, { "epoch": 0.024534686971235193, "grad_norm": 2.123563051223755, "learning_rate": 3.88888888888889e-06, "loss": 0.7233, "step": 29 }, { "epoch": 0.025380710659898477, "grad_norm": 2.211416482925415, "learning_rate": 4.027777777777779e-06, "loss": 0.796, "step": 30 }, { "epoch": 0.02622673434856176, "grad_norm": 2.374946355819702, "learning_rate": 4.166666666666667e-06, "loss": 0.7406, "step": 31 }, { "epoch": 0.02707275803722504, "grad_norm": 1.9759607315063477, "learning_rate": 4.305555555555556e-06, "loss": 0.7946, "step": 32 }, { "epoch": 0.027918781725888325, "grad_norm": 2.052825689315796, "learning_rate": 4.444444444444444e-06, "loss": 0.8375, "step": 33 }, { "epoch": 0.028764805414551606, "grad_norm": 1.8392261266708374, "learning_rate": 4.583333333333333e-06, "loss": 0.7907, "step": 34 }, { "epoch": 0.02961082910321489, "grad_norm": 1.8615978956222534, "learning_rate": 4.722222222222222e-06, "loss": 0.7694, "step": 35 }, { "epoch": 0.030456852791878174, "grad_norm": 1.7482495307922363, "learning_rate": 4.861111111111111e-06, "loss": 0.7296, "step": 36 }, { "epoch": 0.031302876480541454, "grad_norm": 1.808103322982788, "learning_rate": 5e-06, "loss": 0.7169, "step": 37 }, { "epoch": 0.032148900169204735, "grad_norm": 1.6232045888900757, "learning_rate": 4.999990606222893e-06, "loss": 0.6382, "step": 38 }, { "epoch": 0.03299492385786802, "grad_norm": 1.79764986038208, "learning_rate": 4.9999624249621655e-06, "loss": 0.7091, "step": 39 }, { "epoch": 0.0338409475465313, "grad_norm": 1.9869894981384277, "learning_rate": 4.999915456429602e-06, "loss": 0.7723, "step": 40 }, { "epoch": 0.03468697123519458, "grad_norm": 1.8739275932312012, "learning_rate": 4.99984970097817e-06, "loss": 0.7096, "step": 41 }, { "epoch": 0.03553299492385787, "grad_norm": 1.8806772232055664, "learning_rate": 4.999765159102025e-06, "loss": 0.7704, "step": 42 }, { "epoch": 0.03637901861252115, "grad_norm": 1.8095641136169434, "learning_rate": 4.999661831436499e-06, "loss": 0.6302, "step": 43 }, { "epoch": 0.03722504230118443, "grad_norm": 1.986670970916748, "learning_rate": 4.9995397187581026e-06, "loss": 0.6971, "step": 44 }, { "epoch": 0.03807106598984772, "grad_norm": 1.7087007761001587, "learning_rate": 4.9993988219845155e-06, "loss": 0.7039, "step": 45 }, { "epoch": 0.038917089678511, "grad_norm": 1.7987544536590576, "learning_rate": 4.999239142174581e-06, "loss": 0.7259, "step": 46 }, { "epoch": 0.03976311336717428, "grad_norm": 2.194416046142578, "learning_rate": 4.999060680528294e-06, "loss": 0.7221, "step": 47 }, { "epoch": 0.04060913705583756, "grad_norm": 1.895753026008606, "learning_rate": 4.9988634383867995e-06, "loss": 0.712, "step": 48 }, { "epoch": 0.04145516074450085, "grad_norm": 1.4732792377471924, "learning_rate": 4.998647417232375e-06, "loss": 0.636, "step": 49 }, { "epoch": 0.04230118443316413, "grad_norm": 1.7982600927352905, "learning_rate": 4.998412618688426e-06, "loss": 0.6754, "step": 50 }, { "epoch": 0.04314720812182741, "grad_norm": 1.6426688432693481, "learning_rate": 4.9981590445194675e-06, "loss": 0.6804, "step": 51 }, { "epoch": 0.043993231810490696, "grad_norm": 1.800573706626892, "learning_rate": 4.997886696631115e-06, "loss": 0.6273, "step": 52 }, { "epoch": 0.04483925549915398, "grad_norm": 1.5397448539733887, "learning_rate": 4.997595577070068e-06, "loss": 0.6667, "step": 53 }, { "epoch": 0.04568527918781726, "grad_norm": 1.6163142919540405, "learning_rate": 4.997285688024097e-06, "loss": 0.6126, "step": 54 }, { "epoch": 0.046531302876480544, "grad_norm": 1.696107268333435, "learning_rate": 4.996957031822026e-06, "loss": 0.6148, "step": 55 }, { "epoch": 0.047377326565143825, "grad_norm": 1.809167742729187, "learning_rate": 4.996609610933713e-06, "loss": 0.6086, "step": 56 }, { "epoch": 0.048223350253807105, "grad_norm": 1.7550158500671387, "learning_rate": 4.996243427970032e-06, "loss": 0.661, "step": 57 }, { "epoch": 0.049069373942470386, "grad_norm": 1.5835374593734741, "learning_rate": 4.995858485682857e-06, "loss": 0.6386, "step": 58 }, { "epoch": 0.04991539763113367, "grad_norm": 1.7450281381607056, "learning_rate": 4.995454786965037e-06, "loss": 0.6046, "step": 59 }, { "epoch": 0.050761421319796954, "grad_norm": 1.586624026298523, "learning_rate": 4.995032334850378e-06, "loss": 0.6807, "step": 60 }, { "epoch": 0.051607445008460234, "grad_norm": 1.6673095226287842, "learning_rate": 4.994591132513616e-06, "loss": 0.6778, "step": 61 }, { "epoch": 0.05245346869712352, "grad_norm": 1.4863988161087036, "learning_rate": 4.994131183270396e-06, "loss": 0.5943, "step": 62 }, { "epoch": 0.0532994923857868, "grad_norm": 1.789526343345642, "learning_rate": 4.9936524905772466e-06, "loss": 0.6049, "step": 63 }, { "epoch": 0.05414551607445008, "grad_norm": 1.6632400751113892, "learning_rate": 4.993155058031554e-06, "loss": 0.7222, "step": 64 }, { "epoch": 0.05499153976311337, "grad_norm": 1.4629849195480347, "learning_rate": 4.992638889371534e-06, "loss": 0.5864, "step": 65 }, { "epoch": 0.05583756345177665, "grad_norm": 1.3855196237564087, "learning_rate": 4.992103988476206e-06, "loss": 0.6227, "step": 66 }, { "epoch": 0.05668358714043993, "grad_norm": 1.7248979806900024, "learning_rate": 4.99155035936536e-06, "loss": 0.6841, "step": 67 }, { "epoch": 0.05752961082910321, "grad_norm": 1.5749865770339966, "learning_rate": 4.990978006199534e-06, "loss": 0.6157, "step": 68 }, { "epoch": 0.0583756345177665, "grad_norm": 1.5531669855117798, "learning_rate": 4.990386933279973e-06, "loss": 0.5916, "step": 69 }, { "epoch": 0.05922165820642978, "grad_norm": 1.590692400932312, "learning_rate": 4.989777145048601e-06, "loss": 0.612, "step": 70 }, { "epoch": 0.06006768189509306, "grad_norm": 1.673030138015747, "learning_rate": 4.989148646087992e-06, "loss": 0.6037, "step": 71 }, { "epoch": 0.06091370558375635, "grad_norm": 1.7553675174713135, "learning_rate": 4.988501441121328e-06, "loss": 0.6356, "step": 72 }, { "epoch": 0.06175972927241963, "grad_norm": 1.5859638452529907, "learning_rate": 4.987835535012371e-06, "loss": 0.5881, "step": 73 }, { "epoch": 0.06260575296108291, "grad_norm": 1.5672540664672852, "learning_rate": 4.987150932765415e-06, "loss": 0.6047, "step": 74 }, { "epoch": 0.06345177664974619, "grad_norm": 1.6025596857070923, "learning_rate": 4.986447639525266e-06, "loss": 0.6815, "step": 75 }, { "epoch": 0.06429780033840947, "grad_norm": 1.5830960273742676, "learning_rate": 4.985725660577184e-06, "loss": 0.6036, "step": 76 }, { "epoch": 0.06514382402707276, "grad_norm": 1.7896863222122192, "learning_rate": 4.984985001346859e-06, "loss": 0.6463, "step": 77 }, { "epoch": 0.06598984771573604, "grad_norm": 1.6958873271942139, "learning_rate": 4.984225667400359e-06, "loss": 0.6724, "step": 78 }, { "epoch": 0.06683587140439932, "grad_norm": 1.622676968574524, "learning_rate": 4.983447664444097e-06, "loss": 0.6548, "step": 79 }, { "epoch": 0.0676818950930626, "grad_norm": 1.4418054819107056, "learning_rate": 4.982650998324781e-06, "loss": 0.5953, "step": 80 }, { "epoch": 0.06852791878172589, "grad_norm": 1.5336499214172363, "learning_rate": 4.981835675029375e-06, "loss": 0.6232, "step": 81 }, { "epoch": 0.06937394247038917, "grad_norm": 1.6581315994262695, "learning_rate": 4.981001700685051e-06, "loss": 0.6987, "step": 82 }, { "epoch": 0.07021996615905245, "grad_norm": 1.5908305644989014, "learning_rate": 4.980149081559142e-06, "loss": 0.5882, "step": 83 }, { "epoch": 0.07106598984771574, "grad_norm": 1.8210628032684326, "learning_rate": 4.979277824059103e-06, "loss": 0.6913, "step": 84 }, { "epoch": 0.07191201353637902, "grad_norm": 1.5042943954467773, "learning_rate": 4.978387934732451e-06, "loss": 0.693, "step": 85 }, { "epoch": 0.0727580372250423, "grad_norm": 1.7726975679397583, "learning_rate": 4.9774794202667236e-06, "loss": 0.7089, "step": 86 }, { "epoch": 0.07360406091370558, "grad_norm": 1.7052823305130005, "learning_rate": 4.976552287489427e-06, "loss": 0.6448, "step": 87 }, { "epoch": 0.07445008460236886, "grad_norm": 1.5747593641281128, "learning_rate": 4.975606543367983e-06, "loss": 0.5367, "step": 88 }, { "epoch": 0.07529610829103214, "grad_norm": 1.4632954597473145, "learning_rate": 4.974642195009681e-06, "loss": 0.5494, "step": 89 }, { "epoch": 0.07614213197969544, "grad_norm": 1.7577087879180908, "learning_rate": 4.97365924966162e-06, "loss": 0.6178, "step": 90 }, { "epoch": 0.07698815566835872, "grad_norm": 1.7583465576171875, "learning_rate": 4.972657714710653e-06, "loss": 0.622, "step": 91 }, { "epoch": 0.077834179357022, "grad_norm": 1.6962776184082031, "learning_rate": 4.9716375976833395e-06, "loss": 0.5397, "step": 92 }, { "epoch": 0.07868020304568528, "grad_norm": 1.7553181648254395, "learning_rate": 4.9705989062458805e-06, "loss": 0.5369, "step": 93 }, { "epoch": 0.07952622673434856, "grad_norm": 1.4741019010543823, "learning_rate": 4.969541648204064e-06, "loss": 0.5877, "step": 94 }, { "epoch": 0.08037225042301184, "grad_norm": 1.5855069160461426, "learning_rate": 4.968465831503207e-06, "loss": 0.7098, "step": 95 }, { "epoch": 0.08121827411167512, "grad_norm": 1.7892258167266846, "learning_rate": 4.967371464228096e-06, "loss": 0.6401, "step": 96 }, { "epoch": 0.08206429780033841, "grad_norm": 1.4679720401763916, "learning_rate": 4.966258554602924e-06, "loss": 0.5463, "step": 97 }, { "epoch": 0.0829103214890017, "grad_norm": 1.7447293996810913, "learning_rate": 4.965127110991232e-06, "loss": 0.6407, "step": 98 }, { "epoch": 0.08375634517766498, "grad_norm": 1.4224369525909424, "learning_rate": 4.9639771418958434e-06, "loss": 0.568, "step": 99 }, { "epoch": 0.08460236886632826, "grad_norm": 1.539178729057312, "learning_rate": 4.9628086559588e-06, "loss": 0.6125, "step": 100 }, { "epoch": 0.08544839255499154, "grad_norm": 1.6318973302841187, "learning_rate": 4.961621661961299e-06, "loss": 0.5793, "step": 101 }, { "epoch": 0.08629441624365482, "grad_norm": 1.7556897401809692, "learning_rate": 4.960416168823626e-06, "loss": 0.5352, "step": 102 }, { "epoch": 0.08714043993231811, "grad_norm": 1.5747413635253906, "learning_rate": 4.959192185605089e-06, "loss": 0.6511, "step": 103 }, { "epoch": 0.08798646362098139, "grad_norm": 1.4685758352279663, "learning_rate": 4.957949721503947e-06, "loss": 0.5377, "step": 104 }, { "epoch": 0.08883248730964467, "grad_norm": 1.4151921272277832, "learning_rate": 4.956688785857345e-06, "loss": 0.5788, "step": 105 }, { "epoch": 0.08967851099830795, "grad_norm": 1.6794856786727905, "learning_rate": 4.955409388141243e-06, "loss": 0.6054, "step": 106 }, { "epoch": 0.09052453468697123, "grad_norm": 1.4817862510681152, "learning_rate": 4.954111537970342e-06, "loss": 0.6027, "step": 107 }, { "epoch": 0.09137055837563451, "grad_norm": 1.728560209274292, "learning_rate": 4.952795245098013e-06, "loss": 0.552, "step": 108 }, { "epoch": 0.0922165820642978, "grad_norm": 1.6162961721420288, "learning_rate": 4.951460519416228e-06, "loss": 0.6239, "step": 109 }, { "epoch": 0.09306260575296109, "grad_norm": 1.5593509674072266, "learning_rate": 4.950107370955477e-06, "loss": 0.6413, "step": 110 }, { "epoch": 0.09390862944162437, "grad_norm": 1.680822491645813, "learning_rate": 4.948735809884701e-06, "loss": 0.5699, "step": 111 }, { "epoch": 0.09475465313028765, "grad_norm": 1.745162010192871, "learning_rate": 4.94734584651121e-06, "loss": 0.5752, "step": 112 }, { "epoch": 0.09560067681895093, "grad_norm": 1.4649637937545776, "learning_rate": 4.945937491280611e-06, "loss": 0.5046, "step": 113 }, { "epoch": 0.09644670050761421, "grad_norm": 1.5797159671783447, "learning_rate": 4.944510754776724e-06, "loss": 0.6037, "step": 114 }, { "epoch": 0.09729272419627749, "grad_norm": 1.8420116901397705, "learning_rate": 4.9430656477215016e-06, "loss": 0.647, "step": 115 }, { "epoch": 0.09813874788494077, "grad_norm": 1.5039803981781006, "learning_rate": 4.941602180974958e-06, "loss": 0.6207, "step": 116 }, { "epoch": 0.09898477157360407, "grad_norm": 1.636516809463501, "learning_rate": 4.940120365535076e-06, "loss": 0.5839, "step": 117 }, { "epoch": 0.09983079526226735, "grad_norm": 1.7785464525222778, "learning_rate": 4.938620212537733e-06, "loss": 0.5822, "step": 118 }, { "epoch": 0.10067681895093063, "grad_norm": 1.5129812955856323, "learning_rate": 4.937101733256608e-06, "loss": 0.6025, "step": 119 }, { "epoch": 0.10152284263959391, "grad_norm": 1.603428602218628, "learning_rate": 4.9355649391031066e-06, "loss": 0.6247, "step": 120 }, { "epoch": 0.10236886632825719, "grad_norm": 1.5037899017333984, "learning_rate": 4.934009841626272e-06, "loss": 0.5521, "step": 121 }, { "epoch": 0.10321489001692047, "grad_norm": 1.5160306692123413, "learning_rate": 4.932436452512693e-06, "loss": 0.6395, "step": 122 }, { "epoch": 0.10406091370558376, "grad_norm": 1.4215550422668457, "learning_rate": 4.930844783586424e-06, "loss": 0.4997, "step": 123 }, { "epoch": 0.10490693739424704, "grad_norm": 1.7557592391967773, "learning_rate": 4.929234846808893e-06, "loss": 0.6924, "step": 124 }, { "epoch": 0.10575296108291032, "grad_norm": 1.5873064994812012, "learning_rate": 4.927606654278809e-06, "loss": 0.5761, "step": 125 }, { "epoch": 0.1065989847715736, "grad_norm": 1.5326778888702393, "learning_rate": 4.925960218232073e-06, "loss": 0.5501, "step": 126 }, { "epoch": 0.10744500846023688, "grad_norm": 1.5582760572433472, "learning_rate": 4.924295551041688e-06, "loss": 0.5711, "step": 127 }, { "epoch": 0.10829103214890017, "grad_norm": 1.5439339876174927, "learning_rate": 4.922612665217664e-06, "loss": 0.5736, "step": 128 }, { "epoch": 0.10913705583756345, "grad_norm": 1.3591135740280151, "learning_rate": 4.920911573406925e-06, "loss": 0.5472, "step": 129 }, { "epoch": 0.10998307952622674, "grad_norm": 1.655671238899231, "learning_rate": 4.919192288393213e-06, "loss": 0.5782, "step": 130 }, { "epoch": 0.11082910321489002, "grad_norm": 1.5855729579925537, "learning_rate": 4.917454823096991e-06, "loss": 0.6764, "step": 131 }, { "epoch": 0.1116751269035533, "grad_norm": 1.4577858448028564, "learning_rate": 4.915699190575349e-06, "loss": 0.4923, "step": 132 }, { "epoch": 0.11252115059221658, "grad_norm": 1.5771598815917969, "learning_rate": 4.913925404021905e-06, "loss": 0.548, "step": 133 }, { "epoch": 0.11336717428087986, "grad_norm": 1.7482346296310425, "learning_rate": 4.912133476766701e-06, "loss": 0.629, "step": 134 }, { "epoch": 0.11421319796954314, "grad_norm": 1.6378353834152222, "learning_rate": 4.91032342227611e-06, "loss": 0.6119, "step": 135 }, { "epoch": 0.11505922165820642, "grad_norm": 1.4870620965957642, "learning_rate": 4.9084952541527315e-06, "loss": 0.5078, "step": 136 }, { "epoch": 0.11590524534686972, "grad_norm": 1.6145250797271729, "learning_rate": 4.9066489861352875e-06, "loss": 0.608, "step": 137 }, { "epoch": 0.116751269035533, "grad_norm": 1.616117000579834, "learning_rate": 4.904784632098523e-06, "loss": 0.5443, "step": 138 }, { "epoch": 0.11759729272419628, "grad_norm": 1.6198755502700806, "learning_rate": 4.902902206053099e-06, "loss": 0.6141, "step": 139 }, { "epoch": 0.11844331641285956, "grad_norm": 1.4478083848953247, "learning_rate": 4.9010017221454875e-06, "loss": 0.5369, "step": 140 }, { "epoch": 0.11928934010152284, "grad_norm": 1.6716082096099854, "learning_rate": 4.899083194657867e-06, "loss": 0.5421, "step": 141 }, { "epoch": 0.12013536379018612, "grad_norm": 1.6240705251693726, "learning_rate": 4.897146638008012e-06, "loss": 0.5594, "step": 142 }, { "epoch": 0.12098138747884941, "grad_norm": 1.4059849977493286, "learning_rate": 4.89519206674919e-06, "loss": 0.5416, "step": 143 }, { "epoch": 0.1218274111675127, "grad_norm": 1.4964284896850586, "learning_rate": 4.893219495570043e-06, "loss": 0.5634, "step": 144 }, { "epoch": 0.12267343485617598, "grad_norm": 1.5191673040390015, "learning_rate": 4.891228939294489e-06, "loss": 0.5912, "step": 145 }, { "epoch": 0.12351945854483926, "grad_norm": 1.652429223060608, "learning_rate": 4.8892204128816e-06, "loss": 0.6106, "step": 146 }, { "epoch": 0.12436548223350254, "grad_norm": 1.706398606300354, "learning_rate": 4.8871939314254965e-06, "loss": 0.6298, "step": 147 }, { "epoch": 0.12521150592216582, "grad_norm": 1.6904054880142212, "learning_rate": 4.88514951015523e-06, "loss": 0.5076, "step": 148 }, { "epoch": 0.1260575296108291, "grad_norm": 1.4144283533096313, "learning_rate": 4.883087164434672e-06, "loss": 0.5625, "step": 149 }, { "epoch": 0.12690355329949238, "grad_norm": 1.6963647603988647, "learning_rate": 4.881006909762394e-06, "loss": 0.7107, "step": 150 }, { "epoch": 0.12774957698815567, "grad_norm": 1.7292715311050415, "learning_rate": 4.878908761771555e-06, "loss": 0.5773, "step": 151 }, { "epoch": 0.12859560067681894, "grad_norm": 1.321929693222046, "learning_rate": 4.876792736229782e-06, "loss": 0.5283, "step": 152 }, { "epoch": 0.12944162436548223, "grad_norm": 1.5928312540054321, "learning_rate": 4.874658849039054e-06, "loss": 0.5278, "step": 153 }, { "epoch": 0.13028764805414553, "grad_norm": 1.5850692987442017, "learning_rate": 4.8725071162355805e-06, "loss": 0.6298, "step": 154 }, { "epoch": 0.1311336717428088, "grad_norm": 1.534232497215271, "learning_rate": 4.870337553989678e-06, "loss": 0.5157, "step": 155 }, { "epoch": 0.1319796954314721, "grad_norm": 1.4662110805511475, "learning_rate": 4.8681501786056545e-06, "loss": 0.4884, "step": 156 }, { "epoch": 0.13282571912013535, "grad_norm": 1.4880340099334717, "learning_rate": 4.865945006521684e-06, "loss": 0.6217, "step": 157 }, { "epoch": 0.13367174280879865, "grad_norm": 1.578230857849121, "learning_rate": 4.863722054309682e-06, "loss": 0.6814, "step": 158 }, { "epoch": 0.13451776649746192, "grad_norm": 1.6861345767974854, "learning_rate": 4.861481338675183e-06, "loss": 0.5715, "step": 159 }, { "epoch": 0.1353637901861252, "grad_norm": 1.561371922492981, "learning_rate": 4.8592228764572135e-06, "loss": 0.5708, "step": 160 }, { "epoch": 0.1362098138747885, "grad_norm": 1.7995845079421997, "learning_rate": 4.856946684628167e-06, "loss": 0.6436, "step": 161 }, { "epoch": 0.13705583756345177, "grad_norm": 1.5640236139297485, "learning_rate": 4.854652780293672e-06, "loss": 0.6295, "step": 162 }, { "epoch": 0.13790186125211507, "grad_norm": 1.3940402269363403, "learning_rate": 4.852341180692471e-06, "loss": 0.5547, "step": 163 }, { "epoch": 0.13874788494077833, "grad_norm": 1.512726068496704, "learning_rate": 4.8500119031962845e-06, "loss": 0.5077, "step": 164 }, { "epoch": 0.13959390862944163, "grad_norm": 1.684700608253479, "learning_rate": 4.847664965309684e-06, "loss": 0.5076, "step": 165 }, { "epoch": 0.1404399323181049, "grad_norm": 1.5767724514007568, "learning_rate": 4.845300384669958e-06, "loss": 0.6229, "step": 166 }, { "epoch": 0.1412859560067682, "grad_norm": 1.6619733572006226, "learning_rate": 4.842918179046982e-06, "loss": 0.555, "step": 167 }, { "epoch": 0.14213197969543148, "grad_norm": 1.633802056312561, "learning_rate": 4.840518366343083e-06, "loss": 0.592, "step": 168 }, { "epoch": 0.14297800338409475, "grad_norm": 1.6347830295562744, "learning_rate": 4.8381009645929044e-06, "loss": 0.6016, "step": 169 }, { "epoch": 0.14382402707275804, "grad_norm": 1.4932504892349243, "learning_rate": 4.835665991963274e-06, "loss": 0.5356, "step": 170 }, { "epoch": 0.1446700507614213, "grad_norm": 1.5529918670654297, "learning_rate": 4.833213466753063e-06, "loss": 0.503, "step": 171 }, { "epoch": 0.1455160744500846, "grad_norm": 1.5487970113754272, "learning_rate": 4.830743407393052e-06, "loss": 0.5763, "step": 172 }, { "epoch": 0.1463620981387479, "grad_norm": 1.521419644355774, "learning_rate": 4.82825583244579e-06, "loss": 0.5377, "step": 173 }, { "epoch": 0.14720812182741116, "grad_norm": 1.516845703125, "learning_rate": 4.825750760605458e-06, "loss": 0.6147, "step": 174 }, { "epoch": 0.14805414551607446, "grad_norm": 1.7243348360061646, "learning_rate": 4.823228210697723e-06, "loss": 0.5545, "step": 175 }, { "epoch": 0.14890016920473773, "grad_norm": 1.5753135681152344, "learning_rate": 4.820688201679605e-06, "loss": 0.5235, "step": 176 }, { "epoch": 0.14974619289340102, "grad_norm": 1.7663754224777222, "learning_rate": 4.818130752639326e-06, "loss": 0.6196, "step": 177 }, { "epoch": 0.1505922165820643, "grad_norm": 1.7618986368179321, "learning_rate": 4.815555882796169e-06, "loss": 0.6838, "step": 178 }, { "epoch": 0.15143824027072758, "grad_norm": 1.4118471145629883, "learning_rate": 4.8129636115003396e-06, "loss": 0.5275, "step": 179 }, { "epoch": 0.15228426395939088, "grad_norm": 1.6629770994186401, "learning_rate": 4.810353958232811e-06, "loss": 0.5783, "step": 180 }, { "epoch": 0.15313028764805414, "grad_norm": 1.4159945249557495, "learning_rate": 4.807726942605184e-06, "loss": 0.508, "step": 181 }, { "epoch": 0.15397631133671744, "grad_norm": 1.6594223976135254, "learning_rate": 4.8050825843595395e-06, "loss": 0.5711, "step": 182 }, { "epoch": 0.1548223350253807, "grad_norm": 1.604934811592102, "learning_rate": 4.802420903368286e-06, "loss": 0.5777, "step": 183 }, { "epoch": 0.155668358714044, "grad_norm": 1.6225334405899048, "learning_rate": 4.7997419196340136e-06, "loss": 0.5079, "step": 184 }, { "epoch": 0.15651438240270726, "grad_norm": 1.7522289752960205, "learning_rate": 4.797045653289343e-06, "loss": 0.587, "step": 185 }, { "epoch": 0.15736040609137056, "grad_norm": 1.5537394285202026, "learning_rate": 4.794332124596775e-06, "loss": 0.5068, "step": 186 }, { "epoch": 0.15820642978003385, "grad_norm": 1.5667427778244019, "learning_rate": 4.791601353948537e-06, "loss": 0.6201, "step": 187 }, { "epoch": 0.15905245346869712, "grad_norm": 1.667194128036499, "learning_rate": 4.788853361866429e-06, "loss": 0.5411, "step": 188 }, { "epoch": 0.1598984771573604, "grad_norm": 1.434585452079773, "learning_rate": 4.786088169001671e-06, "loss": 0.5377, "step": 189 }, { "epoch": 0.16074450084602368, "grad_norm": 1.4391204118728638, "learning_rate": 4.7833057961347476e-06, "loss": 0.5865, "step": 190 }, { "epoch": 0.16159052453468697, "grad_norm": 1.5104649066925049, "learning_rate": 4.78050626417525e-06, "loss": 0.4837, "step": 191 }, { "epoch": 0.16243654822335024, "grad_norm": 1.483412742614746, "learning_rate": 4.777689594161724e-06, "loss": 0.5627, "step": 192 }, { "epoch": 0.16328257191201354, "grad_norm": 1.9065080881118774, "learning_rate": 4.774855807261504e-06, "loss": 0.611, "step": 193 }, { "epoch": 0.16412859560067683, "grad_norm": 1.5537798404693604, "learning_rate": 4.77200492477056e-06, "loss": 0.5078, "step": 194 }, { "epoch": 0.1649746192893401, "grad_norm": 1.5670864582061768, "learning_rate": 4.769136968113337e-06, "loss": 0.5509, "step": 195 }, { "epoch": 0.1658206429780034, "grad_norm": 1.5053881406784058, "learning_rate": 4.766251958842589e-06, "loss": 0.5504, "step": 196 }, { "epoch": 0.16666666666666666, "grad_norm": 1.4908168315887451, "learning_rate": 4.763349918639228e-06, "loss": 0.5645, "step": 197 }, { "epoch": 0.16751269035532995, "grad_norm": 1.4086905717849731, "learning_rate": 4.760430869312144e-06, "loss": 0.4633, "step": 198 }, { "epoch": 0.16835871404399322, "grad_norm": 1.4943495988845825, "learning_rate": 4.757494832798057e-06, "loss": 0.5893, "step": 199 }, { "epoch": 0.1692047377326565, "grad_norm": 1.524116039276123, "learning_rate": 4.7545418311613485e-06, "loss": 0.5761, "step": 200 }, { "epoch": 0.1700507614213198, "grad_norm": 1.6232064962387085, "learning_rate": 4.751571886593886e-06, "loss": 0.5514, "step": 201 }, { "epoch": 0.17089678510998307, "grad_norm": 1.592065453529358, "learning_rate": 4.748585021414869e-06, "loss": 0.586, "step": 202 }, { "epoch": 0.17174280879864637, "grad_norm": 1.39573335647583, "learning_rate": 4.745581258070654e-06, "loss": 0.538, "step": 203 }, { "epoch": 0.17258883248730963, "grad_norm": 1.5501444339752197, "learning_rate": 4.742560619134587e-06, "loss": 0.6041, "step": 204 }, { "epoch": 0.17343485617597293, "grad_norm": 1.645410418510437, "learning_rate": 4.739523127306837e-06, "loss": 0.5364, "step": 205 }, { "epoch": 0.17428087986463622, "grad_norm": 1.623639464378357, "learning_rate": 4.736468805414218e-06, "loss": 0.6014, "step": 206 }, { "epoch": 0.1751269035532995, "grad_norm": 1.521011471748352, "learning_rate": 4.733397676410027e-06, "loss": 0.5821, "step": 207 }, { "epoch": 0.17597292724196278, "grad_norm": 1.590009331703186, "learning_rate": 4.730309763373866e-06, "loss": 0.5419, "step": 208 }, { "epoch": 0.17681895093062605, "grad_norm": 1.5016567707061768, "learning_rate": 4.727205089511466e-06, "loss": 0.5539, "step": 209 }, { "epoch": 0.17766497461928935, "grad_norm": 1.3730658292770386, "learning_rate": 4.7240836781545205e-06, "loss": 0.5356, "step": 210 }, { "epoch": 0.1785109983079526, "grad_norm": 1.5500950813293457, "learning_rate": 4.720945552760503e-06, "loss": 0.5907, "step": 211 }, { "epoch": 0.1793570219966159, "grad_norm": 1.5380445718765259, "learning_rate": 4.717790736912493e-06, "loss": 0.5068, "step": 212 }, { "epoch": 0.1802030456852792, "grad_norm": 1.4724454879760742, "learning_rate": 4.7146192543190005e-06, "loss": 0.5491, "step": 213 }, { "epoch": 0.18104906937394247, "grad_norm": 1.558326244354248, "learning_rate": 4.711431128813787e-06, "loss": 0.5843, "step": 214 }, { "epoch": 0.18189509306260576, "grad_norm": 1.6515816450119019, "learning_rate": 4.708226384355684e-06, "loss": 0.5486, "step": 215 }, { "epoch": 0.18274111675126903, "grad_norm": 1.4587641954421997, "learning_rate": 4.705005045028415e-06, "loss": 0.6305, "step": 216 }, { "epoch": 0.18358714043993232, "grad_norm": 1.5102735757827759, "learning_rate": 4.701767135040415e-06, "loss": 0.4159, "step": 217 }, { "epoch": 0.1844331641285956, "grad_norm": 1.6922459602355957, "learning_rate": 4.698512678724649e-06, "loss": 0.5456, "step": 218 }, { "epoch": 0.18527918781725888, "grad_norm": 1.7045525312423706, "learning_rate": 4.695241700538425e-06, "loss": 0.5584, "step": 219 }, { "epoch": 0.18612521150592218, "grad_norm": 1.6846798658370972, "learning_rate": 4.691954225063218e-06, "loss": 0.5568, "step": 220 }, { "epoch": 0.18697123519458544, "grad_norm": 1.4572404623031616, "learning_rate": 4.688650277004474e-06, "loss": 0.5408, "step": 221 }, { "epoch": 0.18781725888324874, "grad_norm": 1.7677491903305054, "learning_rate": 4.685329881191436e-06, "loss": 0.6165, "step": 222 }, { "epoch": 0.188663282571912, "grad_norm": 1.613861322402954, "learning_rate": 4.68199306257695e-06, "loss": 0.5826, "step": 223 }, { "epoch": 0.1895093062605753, "grad_norm": 1.4331867694854736, "learning_rate": 4.678639846237281e-06, "loss": 0.54, "step": 224 }, { "epoch": 0.19035532994923857, "grad_norm": 1.5444533824920654, "learning_rate": 4.675270257371922e-06, "loss": 0.5222, "step": 225 }, { "epoch": 0.19120135363790186, "grad_norm": 1.3472926616668701, "learning_rate": 4.671884321303407e-06, "loss": 0.5211, "step": 226 }, { "epoch": 0.19204737732656516, "grad_norm": 1.4535293579101562, "learning_rate": 4.668482063477118e-06, "loss": 0.5718, "step": 227 }, { "epoch": 0.19289340101522842, "grad_norm": 1.6504758596420288, "learning_rate": 4.665063509461098e-06, "loss": 0.5433, "step": 228 }, { "epoch": 0.19373942470389172, "grad_norm": 1.2919827699661255, "learning_rate": 4.661628684945851e-06, "loss": 0.4668, "step": 229 }, { "epoch": 0.19458544839255498, "grad_norm": 1.582503318786621, "learning_rate": 4.658177615744162e-06, "loss": 0.5492, "step": 230 }, { "epoch": 0.19543147208121828, "grad_norm": 1.4641120433807373, "learning_rate": 4.654710327790889e-06, "loss": 0.556, "step": 231 }, { "epoch": 0.19627749576988154, "grad_norm": 1.5751663446426392, "learning_rate": 4.651226847142774e-06, "loss": 0.5209, "step": 232 }, { "epoch": 0.19712351945854484, "grad_norm": 1.361720085144043, "learning_rate": 4.647727199978255e-06, "loss": 0.5849, "step": 233 }, { "epoch": 0.19796954314720813, "grad_norm": 1.4193429946899414, "learning_rate": 4.644211412597251e-06, "loss": 0.4808, "step": 234 }, { "epoch": 0.1988155668358714, "grad_norm": 1.641695261001587, "learning_rate": 4.640679511420983e-06, "loss": 0.5782, "step": 235 }, { "epoch": 0.1996615905245347, "grad_norm": 1.509385347366333, "learning_rate": 4.6371315229917644e-06, "loss": 0.5397, "step": 236 }, { "epoch": 0.20050761421319796, "grad_norm": 1.431472897529602, "learning_rate": 4.6335674739728055e-06, "loss": 0.5817, "step": 237 }, { "epoch": 0.20135363790186125, "grad_norm": 1.4169070720672607, "learning_rate": 4.629987391148012e-06, "loss": 0.536, "step": 238 }, { "epoch": 0.20219966159052452, "grad_norm": 1.3376789093017578, "learning_rate": 4.6263913014217826e-06, "loss": 0.4489, "step": 239 }, { "epoch": 0.20304568527918782, "grad_norm": 1.4299821853637695, "learning_rate": 4.622779231818811e-06, "loss": 0.555, "step": 240 }, { "epoch": 0.2038917089678511, "grad_norm": 1.5063016414642334, "learning_rate": 4.619151209483879e-06, "loss": 0.5898, "step": 241 }, { "epoch": 0.20473773265651438, "grad_norm": 1.5440738201141357, "learning_rate": 4.6155072616816515e-06, "loss": 0.5395, "step": 242 }, { "epoch": 0.20558375634517767, "grad_norm": 1.6442499160766602, "learning_rate": 4.611847415796476e-06, "loss": 0.5488, "step": 243 }, { "epoch": 0.20642978003384094, "grad_norm": 1.5802503824234009, "learning_rate": 4.608171699332174e-06, "loss": 0.5221, "step": 244 }, { "epoch": 0.20727580372250423, "grad_norm": 1.5823266506195068, "learning_rate": 4.604480139911836e-06, "loss": 0.5678, "step": 245 }, { "epoch": 0.20812182741116753, "grad_norm": 1.3660752773284912, "learning_rate": 4.600772765277607e-06, "loss": 0.5177, "step": 246 }, { "epoch": 0.2089678510998308, "grad_norm": 1.495895266532898, "learning_rate": 4.597049603290491e-06, "loss": 0.5982, "step": 247 }, { "epoch": 0.2098138747884941, "grad_norm": 1.5751233100891113, "learning_rate": 4.59331068193013e-06, "loss": 0.5461, "step": 248 }, { "epoch": 0.21065989847715735, "grad_norm": 1.4056577682495117, "learning_rate": 4.5895560292946e-06, "loss": 0.5657, "step": 249 }, { "epoch": 0.21150592216582065, "grad_norm": 1.6429760456085205, "learning_rate": 4.585785673600196e-06, "loss": 0.6208, "step": 250 }, { "epoch": 0.21235194585448391, "grad_norm": 1.6389528512954712, "learning_rate": 4.581999643181223e-06, "loss": 0.5263, "step": 251 }, { "epoch": 0.2131979695431472, "grad_norm": 1.4250948429107666, "learning_rate": 4.578197966489782e-06, "loss": 0.514, "step": 252 }, { "epoch": 0.2140439932318105, "grad_norm": 1.6212941408157349, "learning_rate": 4.574380672095555e-06, "loss": 0.4906, "step": 253 }, { "epoch": 0.21489001692047377, "grad_norm": 1.450196385383606, "learning_rate": 4.5705477886855925e-06, "loss": 0.4833, "step": 254 }, { "epoch": 0.21573604060913706, "grad_norm": 1.454309344291687, "learning_rate": 4.566699345064097e-06, "loss": 0.5631, "step": 255 }, { "epoch": 0.21658206429780033, "grad_norm": 1.706693410873413, "learning_rate": 4.562835370152206e-06, "loss": 0.548, "step": 256 }, { "epoch": 0.21742808798646363, "grad_norm": 1.4211560487747192, "learning_rate": 4.558955892987774e-06, "loss": 0.5947, "step": 257 }, { "epoch": 0.2182741116751269, "grad_norm": 1.4968616962432861, "learning_rate": 4.555060942725156e-06, "loss": 0.5693, "step": 258 }, { "epoch": 0.21912013536379019, "grad_norm": 1.4211528301239014, "learning_rate": 4.551150548634987e-06, "loss": 0.5976, "step": 259 }, { "epoch": 0.21996615905245348, "grad_norm": 1.4713115692138672, "learning_rate": 4.547224740103966e-06, "loss": 0.4759, "step": 260 }, { "epoch": 0.22081218274111675, "grad_norm": 1.6762306690216064, "learning_rate": 4.543283546634626e-06, "loss": 0.5732, "step": 261 }, { "epoch": 0.22165820642978004, "grad_norm": 1.266420841217041, "learning_rate": 4.539326997845124e-06, "loss": 0.4456, "step": 262 }, { "epoch": 0.2225042301184433, "grad_norm": 1.568297266960144, "learning_rate": 4.535355123469009e-06, "loss": 0.5853, "step": 263 }, { "epoch": 0.2233502538071066, "grad_norm": 1.5746086835861206, "learning_rate": 4.531367953355002e-06, "loss": 0.5569, "step": 264 }, { "epoch": 0.22419627749576987, "grad_norm": 1.4679572582244873, "learning_rate": 4.527365517466775e-06, "loss": 0.4425, "step": 265 }, { "epoch": 0.22504230118443316, "grad_norm": 1.5745289325714111, "learning_rate": 4.523347845882718e-06, "loss": 0.5316, "step": 266 }, { "epoch": 0.22588832487309646, "grad_norm": 1.5450767278671265, "learning_rate": 4.519314968795722e-06, "loss": 0.5353, "step": 267 }, { "epoch": 0.22673434856175972, "grad_norm": 1.850501537322998, "learning_rate": 4.515266916512945e-06, "loss": 0.6068, "step": 268 }, { "epoch": 0.22758037225042302, "grad_norm": 1.3289093971252441, "learning_rate": 4.511203719455588e-06, "loss": 0.529, "step": 269 }, { "epoch": 0.22842639593908629, "grad_norm": 1.6736395359039307, "learning_rate": 4.507125408158665e-06, "loss": 0.6073, "step": 270 }, { "epoch": 0.22927241962774958, "grad_norm": 1.3271763324737549, "learning_rate": 4.503032013270774e-06, "loss": 0.5165, "step": 271 }, { "epoch": 0.23011844331641285, "grad_norm": 1.4673720598220825, "learning_rate": 4.498923565553866e-06, "loss": 0.495, "step": 272 }, { "epoch": 0.23096446700507614, "grad_norm": 1.5206209421157837, "learning_rate": 4.494800095883014e-06, "loss": 0.5659, "step": 273 }, { "epoch": 0.23181049069373943, "grad_norm": 1.5659880638122559, "learning_rate": 4.490661635246183e-06, "loss": 0.5876, "step": 274 }, { "epoch": 0.2326565143824027, "grad_norm": 1.4979114532470703, "learning_rate": 4.4865082147439945e-06, "loss": 0.5988, "step": 275 }, { "epoch": 0.233502538071066, "grad_norm": 1.5441852807998657, "learning_rate": 4.482339865589492e-06, "loss": 0.5215, "step": 276 }, { "epoch": 0.23434856175972926, "grad_norm": 1.476711630821228, "learning_rate": 4.478156619107912e-06, "loss": 0.5019, "step": 277 }, { "epoch": 0.23519458544839256, "grad_norm": 1.4779571294784546, "learning_rate": 4.4739585067364425e-06, "loss": 0.6033, "step": 278 }, { "epoch": 0.23604060913705585, "grad_norm": 1.5338369607925415, "learning_rate": 4.469745560023987e-06, "loss": 0.5089, "step": 279 }, { "epoch": 0.23688663282571912, "grad_norm": 1.536832332611084, "learning_rate": 4.465517810630933e-06, "loss": 0.4967, "step": 280 }, { "epoch": 0.2377326565143824, "grad_norm": 1.5541664361953735, "learning_rate": 4.461275290328908e-06, "loss": 0.5869, "step": 281 }, { "epoch": 0.23857868020304568, "grad_norm": 1.7338745594024658, "learning_rate": 4.457018031000544e-06, "loss": 0.5288, "step": 282 }, { "epoch": 0.23942470389170897, "grad_norm": 1.345275640487671, "learning_rate": 4.452746064639239e-06, "loss": 0.4971, "step": 283 }, { "epoch": 0.24027072758037224, "grad_norm": 1.4979954957962036, "learning_rate": 4.448459423348911e-06, "loss": 0.5437, "step": 284 }, { "epoch": 0.24111675126903553, "grad_norm": 1.5409454107284546, "learning_rate": 4.444158139343763e-06, "loss": 0.5521, "step": 285 }, { "epoch": 0.24196277495769883, "grad_norm": 1.4069455862045288, "learning_rate": 4.439842244948036e-06, "loss": 0.5609, "step": 286 }, { "epoch": 0.2428087986463621, "grad_norm": 1.5055841207504272, "learning_rate": 4.435511772595773e-06, "loss": 0.5308, "step": 287 }, { "epoch": 0.2436548223350254, "grad_norm": 1.7902872562408447, "learning_rate": 4.4311667548305644e-06, "loss": 0.613, "step": 288 }, { "epoch": 0.24450084602368866, "grad_norm": 1.5679998397827148, "learning_rate": 4.426807224305315e-06, "loss": 0.5521, "step": 289 }, { "epoch": 0.24534686971235195, "grad_norm": 1.5276457071304321, "learning_rate": 4.422433213781991e-06, "loss": 0.5454, "step": 290 }, { "epoch": 0.24619289340101522, "grad_norm": 1.563459873199463, "learning_rate": 4.4180447561313765e-06, "loss": 0.508, "step": 291 }, { "epoch": 0.2470389170896785, "grad_norm": 1.575172781944275, "learning_rate": 4.413641884332825e-06, "loss": 0.5726, "step": 292 }, { "epoch": 0.2478849407783418, "grad_norm": 1.6327418088912964, "learning_rate": 4.409224631474014e-06, "loss": 0.6188, "step": 293 }, { "epoch": 0.24873096446700507, "grad_norm": 1.4867552518844604, "learning_rate": 4.404793030750695e-06, "loss": 0.5364, "step": 294 }, { "epoch": 0.24957698815566837, "grad_norm": 1.512021541595459, "learning_rate": 4.400347115466442e-06, "loss": 0.5083, "step": 295 }, { "epoch": 0.25042301184433163, "grad_norm": 1.3881378173828125, "learning_rate": 4.395886919032406e-06, "loss": 0.5789, "step": 296 }, { "epoch": 0.2512690355329949, "grad_norm": 1.4656717777252197, "learning_rate": 4.39141247496706e-06, "loss": 0.5213, "step": 297 }, { "epoch": 0.2521150592216582, "grad_norm": 1.5013030767440796, "learning_rate": 4.3869238168959485e-06, "loss": 0.5245, "step": 298 }, { "epoch": 0.2529610829103215, "grad_norm": 1.535556435585022, "learning_rate": 4.382420978551433e-06, "loss": 0.5541, "step": 299 }, { "epoch": 0.25380710659898476, "grad_norm": 1.4609181880950928, "learning_rate": 4.377903993772442e-06, "loss": 0.5155, "step": 300 }, { "epoch": 0.2546531302876481, "grad_norm": 1.4020256996154785, "learning_rate": 4.373372896504215e-06, "loss": 0.5553, "step": 301 }, { "epoch": 0.25549915397631134, "grad_norm": 1.5317964553833008, "learning_rate": 4.368827720798044e-06, "loss": 0.5547, "step": 302 }, { "epoch": 0.2563451776649746, "grad_norm": 1.6519298553466797, "learning_rate": 4.364268500811025e-06, "loss": 0.5305, "step": 303 }, { "epoch": 0.2571912013536379, "grad_norm": 1.511772632598877, "learning_rate": 4.359695270805795e-06, "loss": 0.449, "step": 304 }, { "epoch": 0.2580372250423012, "grad_norm": 1.3912087678909302, "learning_rate": 4.3551080651502755e-06, "loss": 0.5184, "step": 305 }, { "epoch": 0.25888324873096447, "grad_norm": 1.5851740837097168, "learning_rate": 4.350506918317416e-06, "loss": 0.5489, "step": 306 }, { "epoch": 0.25972927241962773, "grad_norm": 1.4331227540969849, "learning_rate": 4.345891864884937e-06, "loss": 0.5338, "step": 307 }, { "epoch": 0.26057529610829105, "grad_norm": 1.487821102142334, "learning_rate": 4.341262939535063e-06, "loss": 0.5155, "step": 308 }, { "epoch": 0.2614213197969543, "grad_norm": 1.5234811305999756, "learning_rate": 4.336620177054269e-06, "loss": 0.5026, "step": 309 }, { "epoch": 0.2622673434856176, "grad_norm": 1.3057184219360352, "learning_rate": 4.331963612333017e-06, "loss": 0.5378, "step": 310 }, { "epoch": 0.26311336717428085, "grad_norm": 1.5772360563278198, "learning_rate": 4.327293280365491e-06, "loss": 0.6281, "step": 311 }, { "epoch": 0.2639593908629442, "grad_norm": 1.5020463466644287, "learning_rate": 4.322609216249336e-06, "loss": 0.6181, "step": 312 }, { "epoch": 0.26480541455160744, "grad_norm": 1.4958893060684204, "learning_rate": 4.317911455185396e-06, "loss": 0.5468, "step": 313 }, { "epoch": 0.2656514382402707, "grad_norm": 1.5898452997207642, "learning_rate": 4.3132000324774485e-06, "loss": 0.5702, "step": 314 }, { "epoch": 0.26649746192893403, "grad_norm": 1.8296725749969482, "learning_rate": 4.308474983531936e-06, "loss": 0.7021, "step": 315 }, { "epoch": 0.2673434856175973, "grad_norm": 1.6258792877197266, "learning_rate": 4.303736343857704e-06, "loss": 0.557, "step": 316 }, { "epoch": 0.26818950930626057, "grad_norm": 1.581331729888916, "learning_rate": 4.298984149065732e-06, "loss": 0.5027, "step": 317 }, { "epoch": 0.26903553299492383, "grad_norm": 1.3485808372497559, "learning_rate": 4.294218434868869e-06, "loss": 0.4756, "step": 318 }, { "epoch": 0.26988155668358715, "grad_norm": 1.4658807516098022, "learning_rate": 4.289439237081557e-06, "loss": 0.5321, "step": 319 }, { "epoch": 0.2707275803722504, "grad_norm": 1.616141438484192, "learning_rate": 4.284646591619575e-06, "loss": 0.5233, "step": 320 }, { "epoch": 0.2715736040609137, "grad_norm": 1.5891221761703491, "learning_rate": 4.2798405344997545e-06, "loss": 0.5821, "step": 321 }, { "epoch": 0.272419627749577, "grad_norm": 1.4427226781845093, "learning_rate": 4.2750211018397204e-06, "loss": 0.4998, "step": 322 }, { "epoch": 0.2732656514382403, "grad_norm": 1.6634961366653442, "learning_rate": 4.270188329857613e-06, "loss": 0.6044, "step": 323 }, { "epoch": 0.27411167512690354, "grad_norm": 1.424747109413147, "learning_rate": 4.2653422548718195e-06, "loss": 0.5953, "step": 324 }, { "epoch": 0.2749576988155668, "grad_norm": 1.5241210460662842, "learning_rate": 4.260482913300697e-06, "loss": 0.576, "step": 325 }, { "epoch": 0.27580372250423013, "grad_norm": 1.4235581159591675, "learning_rate": 4.255610341662304e-06, "loss": 0.5074, "step": 326 }, { "epoch": 0.2766497461928934, "grad_norm": 1.5771161317825317, "learning_rate": 4.2507245765741215e-06, "loss": 0.5325, "step": 327 }, { "epoch": 0.27749576988155666, "grad_norm": 1.3609672784805298, "learning_rate": 4.245825654752781e-06, "loss": 0.5146, "step": 328 }, { "epoch": 0.27834179357022, "grad_norm": 1.4064686298370361, "learning_rate": 4.240913613013785e-06, "loss": 0.5279, "step": 329 }, { "epoch": 0.27918781725888325, "grad_norm": 1.3830342292785645, "learning_rate": 4.235988488271235e-06, "loss": 0.492, "step": 330 }, { "epoch": 0.2800338409475465, "grad_norm": 1.4581482410430908, "learning_rate": 4.231050317537548e-06, "loss": 0.5313, "step": 331 }, { "epoch": 0.2808798646362098, "grad_norm": 1.6080012321472168, "learning_rate": 4.226099137923186e-06, "loss": 0.5134, "step": 332 }, { "epoch": 0.2817258883248731, "grad_norm": 1.5522176027297974, "learning_rate": 4.221134986636371e-06, "loss": 0.562, "step": 333 }, { "epoch": 0.2825719120135364, "grad_norm": 1.5215039253234863, "learning_rate": 4.216157900982808e-06, "loss": 0.6292, "step": 334 }, { "epoch": 0.28341793570219964, "grad_norm": 1.5160623788833618, "learning_rate": 4.211167918365402e-06, "loss": 0.5636, "step": 335 }, { "epoch": 0.28426395939086296, "grad_norm": 1.3579323291778564, "learning_rate": 4.206165076283983e-06, "loss": 0.5459, "step": 336 }, { "epoch": 0.28510998307952623, "grad_norm": 1.6921268701553345, "learning_rate": 4.201149412335015e-06, "loss": 0.5202, "step": 337 }, { "epoch": 0.2859560067681895, "grad_norm": 1.5123941898345947, "learning_rate": 4.196120964211322e-06, "loss": 0.6074, "step": 338 }, { "epoch": 0.2868020304568528, "grad_norm": 1.4805959463119507, "learning_rate": 4.1910797697018026e-06, "loss": 0.5621, "step": 339 }, { "epoch": 0.2876480541455161, "grad_norm": 1.5556724071502686, "learning_rate": 4.1860258666911415e-06, "loss": 0.5228, "step": 340 }, { "epoch": 0.28849407783417935, "grad_norm": 1.430757999420166, "learning_rate": 4.180959293159529e-06, "loss": 0.5698, "step": 341 }, { "epoch": 0.2893401015228426, "grad_norm": 1.419783115386963, "learning_rate": 4.175880087182376e-06, "loss": 0.5296, "step": 342 }, { "epoch": 0.29018612521150594, "grad_norm": 1.7747116088867188, "learning_rate": 4.170788286930024e-06, "loss": 0.5129, "step": 343 }, { "epoch": 0.2910321489001692, "grad_norm": 1.5256282091140747, "learning_rate": 4.165683930667464e-06, "loss": 0.6247, "step": 344 }, { "epoch": 0.2918781725888325, "grad_norm": 1.5945425033569336, "learning_rate": 4.160567056754044e-06, "loss": 0.555, "step": 345 }, { "epoch": 0.2927241962774958, "grad_norm": 1.4873846769332886, "learning_rate": 4.155437703643182e-06, "loss": 0.576, "step": 346 }, { "epoch": 0.29357021996615906, "grad_norm": 1.5128968954086304, "learning_rate": 4.1502959098820774e-06, "loss": 0.5328, "step": 347 }, { "epoch": 0.29441624365482233, "grad_norm": 1.6180955171585083, "learning_rate": 4.145141714111421e-06, "loss": 0.568, "step": 348 }, { "epoch": 0.2952622673434856, "grad_norm": 1.605104923248291, "learning_rate": 4.139975155065109e-06, "loss": 0.5578, "step": 349 }, { "epoch": 0.2961082910321489, "grad_norm": 1.6930770874023438, "learning_rate": 4.134796271569942e-06, "loss": 0.4953, "step": 350 }, { "epoch": 0.2969543147208122, "grad_norm": 1.4728012084960938, "learning_rate": 4.129605102545341e-06, "loss": 0.4959, "step": 351 }, { "epoch": 0.29780033840947545, "grad_norm": 1.610328197479248, "learning_rate": 4.124401687003057e-06, "loss": 0.5775, "step": 352 }, { "epoch": 0.2986463620981388, "grad_norm": 1.7682827711105347, "learning_rate": 4.119186064046868e-06, "loss": 0.548, "step": 353 }, { "epoch": 0.29949238578680204, "grad_norm": 1.525848150253296, "learning_rate": 4.113958272872294e-06, "loss": 0.5324, "step": 354 }, { "epoch": 0.3003384094754653, "grad_norm": 1.4608542919158936, "learning_rate": 4.1087183527663e-06, "loss": 0.434, "step": 355 }, { "epoch": 0.3011844331641286, "grad_norm": 1.5706309080123901, "learning_rate": 4.103466343106999e-06, "loss": 0.4819, "step": 356 }, { "epoch": 0.3020304568527919, "grad_norm": 1.5604802370071411, "learning_rate": 4.098202283363356e-06, "loss": 0.6517, "step": 357 }, { "epoch": 0.30287648054145516, "grad_norm": 1.406082272529602, "learning_rate": 4.092926213094897e-06, "loss": 0.4477, "step": 358 }, { "epoch": 0.30372250423011843, "grad_norm": 1.4490677118301392, "learning_rate": 4.087638171951401e-06, "loss": 0.5426, "step": 359 }, { "epoch": 0.30456852791878175, "grad_norm": 1.472643256187439, "learning_rate": 4.082338199672615e-06, "loss": 0.5259, "step": 360 }, { "epoch": 0.305414551607445, "grad_norm": 1.5344111919403076, "learning_rate": 4.077026336087944e-06, "loss": 0.5191, "step": 361 }, { "epoch": 0.3062605752961083, "grad_norm": 1.4391659498214722, "learning_rate": 4.071702621116158e-06, "loss": 0.5136, "step": 362 }, { "epoch": 0.30710659898477155, "grad_norm": 1.6155903339385986, "learning_rate": 4.066367094765091e-06, "loss": 0.5033, "step": 363 }, { "epoch": 0.3079526226734349, "grad_norm": 1.7278281450271606, "learning_rate": 4.0610197971313395e-06, "loss": 0.5509, "step": 364 }, { "epoch": 0.30879864636209814, "grad_norm": 1.4298288822174072, "learning_rate": 4.0556607683999605e-06, "loss": 0.5406, "step": 365 }, { "epoch": 0.3096446700507614, "grad_norm": 1.405069351196289, "learning_rate": 4.050290048844171e-06, "loss": 0.5179, "step": 366 }, { "epoch": 0.31049069373942473, "grad_norm": 1.6136541366577148, "learning_rate": 4.044907678825045e-06, "loss": 0.5519, "step": 367 }, { "epoch": 0.311336717428088, "grad_norm": 1.4015752077102661, "learning_rate": 4.03951369879121e-06, "loss": 0.5596, "step": 368 }, { "epoch": 0.31218274111675126, "grad_norm": 1.5532114505767822, "learning_rate": 4.034108149278544e-06, "loss": 0.5302, "step": 369 }, { "epoch": 0.3130287648054145, "grad_norm": 1.4932410717010498, "learning_rate": 4.028691070909867e-06, "loss": 0.5539, "step": 370 }, { "epoch": 0.31387478849407785, "grad_norm": 1.5183504819869995, "learning_rate": 4.0232625043946416e-06, "loss": 0.5717, "step": 371 }, { "epoch": 0.3147208121827411, "grad_norm": 1.4620509147644043, "learning_rate": 4.017822490528664e-06, "loss": 0.5102, "step": 372 }, { "epoch": 0.3155668358714044, "grad_norm": 1.389592170715332, "learning_rate": 4.012371070193753e-06, "loss": 0.5388, "step": 373 }, { "epoch": 0.3164128595600677, "grad_norm": 1.6649476289749146, "learning_rate": 4.006908284357453e-06, "loss": 0.6184, "step": 374 }, { "epoch": 0.31725888324873097, "grad_norm": 1.5024865865707397, "learning_rate": 4.001434174072718e-06, "loss": 0.5858, "step": 375 }, { "epoch": 0.31810490693739424, "grad_norm": 1.4840333461761475, "learning_rate": 3.995948780477605e-06, "loss": 0.528, "step": 376 }, { "epoch": 0.3189509306260575, "grad_norm": 1.407777190208435, "learning_rate": 3.990452144794966e-06, "loss": 0.5988, "step": 377 }, { "epoch": 0.3197969543147208, "grad_norm": 1.409677505493164, "learning_rate": 3.984944308332138e-06, "loss": 0.5369, "step": 378 }, { "epoch": 0.3206429780033841, "grad_norm": 1.607074499130249, "learning_rate": 3.97942531248063e-06, "loss": 0.5795, "step": 379 }, { "epoch": 0.32148900169204736, "grad_norm": 1.291720986366272, "learning_rate": 3.973895198715816e-06, "loss": 0.5359, "step": 380 }, { "epoch": 0.3223350253807107, "grad_norm": 1.5747040510177612, "learning_rate": 3.968354008596621e-06, "loss": 0.5902, "step": 381 }, { "epoch": 0.32318104906937395, "grad_norm": 1.5991111993789673, "learning_rate": 3.96280178376521e-06, "loss": 0.5414, "step": 382 }, { "epoch": 0.3240270727580372, "grad_norm": 1.3984931707382202, "learning_rate": 3.957238565946672e-06, "loss": 0.4185, "step": 383 }, { "epoch": 0.3248730964467005, "grad_norm": 1.4653754234313965, "learning_rate": 3.951664396948709e-06, "loss": 0.6029, "step": 384 }, { "epoch": 0.3257191201353638, "grad_norm": 1.4449567794799805, "learning_rate": 3.9460793186613235e-06, "loss": 0.4723, "step": 385 }, { "epoch": 0.32656514382402707, "grad_norm": 1.3438575267791748, "learning_rate": 3.9404833730564975e-06, "loss": 0.4138, "step": 386 }, { "epoch": 0.32741116751269034, "grad_norm": 1.423072099685669, "learning_rate": 3.934876602187886e-06, "loss": 0.5623, "step": 387 }, { "epoch": 0.32825719120135366, "grad_norm": 1.52696692943573, "learning_rate": 3.929259048190492e-06, "loss": 0.4729, "step": 388 }, { "epoch": 0.3291032148900169, "grad_norm": 1.3745728731155396, "learning_rate": 3.923630753280358e-06, "loss": 0.5163, "step": 389 }, { "epoch": 0.3299492385786802, "grad_norm": 1.48550546169281, "learning_rate": 3.917991759754239e-06, "loss": 0.5379, "step": 390 }, { "epoch": 0.33079526226734346, "grad_norm": 1.4326492547988892, "learning_rate": 3.9123421099892955e-06, "loss": 0.5736, "step": 391 }, { "epoch": 0.3316412859560068, "grad_norm": 1.6504223346710205, "learning_rate": 3.906681846442768e-06, "loss": 0.5303, "step": 392 }, { "epoch": 0.33248730964467005, "grad_norm": 1.511051893234253, "learning_rate": 3.9010110116516595e-06, "loss": 0.5524, "step": 393 }, { "epoch": 0.3333333333333333, "grad_norm": 1.604653000831604, "learning_rate": 3.895329648232416e-06, "loss": 0.5777, "step": 394 }, { "epoch": 0.33417935702199664, "grad_norm": 1.5326584577560425, "learning_rate": 3.889637798880608e-06, "loss": 0.5853, "step": 395 }, { "epoch": 0.3350253807106599, "grad_norm": 1.4475630521774292, "learning_rate": 3.883935506370605e-06, "loss": 0.4671, "step": 396 }, { "epoch": 0.33587140439932317, "grad_norm": 1.4971731901168823, "learning_rate": 3.8782228135552615e-06, "loss": 0.6189, "step": 397 }, { "epoch": 0.33671742808798644, "grad_norm": 1.8308625221252441, "learning_rate": 3.872499763365585e-06, "loss": 0.518, "step": 398 }, { "epoch": 0.33756345177664976, "grad_norm": 1.6570651531219482, "learning_rate": 3.8667663988104245e-06, "loss": 0.5731, "step": 399 }, { "epoch": 0.338409475465313, "grad_norm": 1.411230444908142, "learning_rate": 3.861022762976136e-06, "loss": 0.5218, "step": 400 }, { "epoch": 0.3392554991539763, "grad_norm": 1.4464528560638428, "learning_rate": 3.85526889902627e-06, "loss": 0.5282, "step": 401 }, { "epoch": 0.3401015228426396, "grad_norm": 1.4024648666381836, "learning_rate": 3.849504850201238e-06, "loss": 0.529, "step": 402 }, { "epoch": 0.3409475465313029, "grad_norm": 1.5347040891647339, "learning_rate": 3.84373065981799e-06, "loss": 0.5267, "step": 403 }, { "epoch": 0.34179357021996615, "grad_norm": 1.5913515090942383, "learning_rate": 3.837946371269696e-06, "loss": 0.5788, "step": 404 }, { "epoch": 0.3426395939086294, "grad_norm": 1.4288476705551147, "learning_rate": 3.832152028025406e-06, "loss": 0.5769, "step": 405 }, { "epoch": 0.34348561759729274, "grad_norm": 1.577329397201538, "learning_rate": 3.826347673629738e-06, "loss": 0.6016, "step": 406 }, { "epoch": 0.344331641285956, "grad_norm": 1.6311994791030884, "learning_rate": 3.820533351702538e-06, "loss": 0.5265, "step": 407 }, { "epoch": 0.34517766497461927, "grad_norm": 1.445512294769287, "learning_rate": 3.8147091059385646e-06, "loss": 0.4741, "step": 408 }, { "epoch": 0.3460236886632826, "grad_norm": 1.338087558746338, "learning_rate": 3.80887498010715e-06, "loss": 0.4751, "step": 409 }, { "epoch": 0.34686971235194586, "grad_norm": 1.413231611251831, "learning_rate": 3.8030310180518748e-06, "loss": 0.4685, "step": 410 }, { "epoch": 0.3477157360406091, "grad_norm": 1.547485113143921, "learning_rate": 3.7971772636902425e-06, "loss": 0.5329, "step": 411 }, { "epoch": 0.34856175972927245, "grad_norm": 1.6648271083831787, "learning_rate": 3.791313761013343e-06, "loss": 0.4467, "step": 412 }, { "epoch": 0.3494077834179357, "grad_norm": 1.4436287879943848, "learning_rate": 3.7854405540855268e-06, "loss": 0.4634, "step": 413 }, { "epoch": 0.350253807106599, "grad_norm": 1.4640777111053467, "learning_rate": 3.77955768704407e-06, "loss": 0.574, "step": 414 }, { "epoch": 0.35109983079526225, "grad_norm": 1.6034146547317505, "learning_rate": 3.7736652040988474e-06, "loss": 0.5958, "step": 415 }, { "epoch": 0.35194585448392557, "grad_norm": 1.3981318473815918, "learning_rate": 3.7677631495319953e-06, "loss": 0.5729, "step": 416 }, { "epoch": 0.35279187817258884, "grad_norm": 1.4371929168701172, "learning_rate": 3.761851567697583e-06, "loss": 0.4701, "step": 417 }, { "epoch": 0.3536379018612521, "grad_norm": 1.4483686685562134, "learning_rate": 3.7559305030212746e-06, "loss": 0.6109, "step": 418 }, { "epoch": 0.3544839255499154, "grad_norm": 1.5899584293365479, "learning_rate": 3.7500000000000005e-06, "loss": 0.5399, "step": 419 }, { "epoch": 0.3553299492385787, "grad_norm": 1.4690099954605103, "learning_rate": 3.744060103201619e-06, "loss": 0.5434, "step": 420 }, { "epoch": 0.35617597292724196, "grad_norm": 1.5741721391677856, "learning_rate": 3.7381108572645836e-06, "loss": 0.5081, "step": 421 }, { "epoch": 0.3570219966159052, "grad_norm": 1.4619410037994385, "learning_rate": 3.7321523068976068e-06, "loss": 0.4984, "step": 422 }, { "epoch": 0.35786802030456855, "grad_norm": 1.5194240808486938, "learning_rate": 3.726184496879323e-06, "loss": 0.5694, "step": 423 }, { "epoch": 0.3587140439932318, "grad_norm": 1.4686346054077148, "learning_rate": 3.7202074720579544e-06, "loss": 0.4949, "step": 424 }, { "epoch": 0.3595600676818951, "grad_norm": 1.7008684873580933, "learning_rate": 3.7142212773509727e-06, "loss": 0.577, "step": 425 }, { "epoch": 0.3604060913705584, "grad_norm": 1.681458592414856, "learning_rate": 3.7082259577447604e-06, "loss": 0.5924, "step": 426 }, { "epoch": 0.36125211505922167, "grad_norm": 1.490136981010437, "learning_rate": 3.702221558294274e-06, "loss": 0.4885, "step": 427 }, { "epoch": 0.36209813874788493, "grad_norm": 1.41923189163208, "learning_rate": 3.696208124122706e-06, "loss": 0.4789, "step": 428 }, { "epoch": 0.3629441624365482, "grad_norm": 1.781841516494751, "learning_rate": 3.690185700421145e-06, "loss": 0.5364, "step": 429 }, { "epoch": 0.3637901861252115, "grad_norm": 1.4928046464920044, "learning_rate": 3.6841543324482356e-06, "loss": 0.4776, "step": 430 }, { "epoch": 0.3646362098138748, "grad_norm": 1.4005343914031982, "learning_rate": 3.6781140655298374e-06, "loss": 0.4997, "step": 431 }, { "epoch": 0.36548223350253806, "grad_norm": 1.7181766033172607, "learning_rate": 3.6720649450586885e-06, "loss": 0.5811, "step": 432 }, { "epoch": 0.3663282571912014, "grad_norm": 1.6100702285766602, "learning_rate": 3.6660070164940614e-06, "loss": 0.571, "step": 433 }, { "epoch": 0.36717428087986465, "grad_norm": 1.5206496715545654, "learning_rate": 3.659940325361419e-06, "loss": 0.4898, "step": 434 }, { "epoch": 0.3680203045685279, "grad_norm": 1.4749397039413452, "learning_rate": 3.6538649172520774e-06, "loss": 0.5013, "step": 435 }, { "epoch": 0.3688663282571912, "grad_norm": 1.568749189376831, "learning_rate": 3.64778083782286e-06, "loss": 0.5309, "step": 436 }, { "epoch": 0.3697123519458545, "grad_norm": 1.6498618125915527, "learning_rate": 3.641688132795757e-06, "loss": 0.6235, "step": 437 }, { "epoch": 0.37055837563451777, "grad_norm": 1.494895577430725, "learning_rate": 3.635586847957577e-06, "loss": 0.6193, "step": 438 }, { "epoch": 0.37140439932318103, "grad_norm": 1.5606284141540527, "learning_rate": 3.6294770291596083e-06, "loss": 0.5419, "step": 439 }, { "epoch": 0.37225042301184436, "grad_norm": 1.5283830165863037, "learning_rate": 3.6233587223172717e-06, "loss": 0.5235, "step": 440 }, { "epoch": 0.3730964467005076, "grad_norm": 1.5230801105499268, "learning_rate": 3.6172319734097764e-06, "loss": 0.6246, "step": 441 }, { "epoch": 0.3739424703891709, "grad_norm": 1.4073126316070557, "learning_rate": 3.611096828479773e-06, "loss": 0.5065, "step": 442 }, { "epoch": 0.37478849407783416, "grad_norm": 1.568429946899414, "learning_rate": 3.604953333633009e-06, "loss": 0.5097, "step": 443 }, { "epoch": 0.3756345177664975, "grad_norm": 1.5261017084121704, "learning_rate": 3.59880153503798e-06, "loss": 0.5877, "step": 444 }, { "epoch": 0.37648054145516074, "grad_norm": 1.5816318988800049, "learning_rate": 3.5926414789255877e-06, "loss": 0.5142, "step": 445 }, { "epoch": 0.377326565143824, "grad_norm": 1.4384475946426392, "learning_rate": 3.586473211588787e-06, "loss": 0.482, "step": 446 }, { "epoch": 0.37817258883248733, "grad_norm": 1.500186800956726, "learning_rate": 3.5802967793822386e-06, "loss": 0.5422, "step": 447 }, { "epoch": 0.3790186125211506, "grad_norm": 1.3965848684310913, "learning_rate": 3.5741122287219665e-06, "loss": 0.4749, "step": 448 }, { "epoch": 0.37986463620981387, "grad_norm": 1.4717751741409302, "learning_rate": 3.567919606085004e-06, "loss": 0.5679, "step": 449 }, { "epoch": 0.38071065989847713, "grad_norm": 1.4586070775985718, "learning_rate": 3.561718958009042e-06, "loss": 0.4985, "step": 450 }, { "epoch": 0.38155668358714045, "grad_norm": 1.437875747680664, "learning_rate": 3.555510331092087e-06, "loss": 0.4932, "step": 451 }, { "epoch": 0.3824027072758037, "grad_norm": 1.5008926391601562, "learning_rate": 3.549293771992104e-06, "loss": 0.5441, "step": 452 }, { "epoch": 0.383248730964467, "grad_norm": 1.5105735063552856, "learning_rate": 3.5430693274266694e-06, "loss": 0.5371, "step": 453 }, { "epoch": 0.3840947546531303, "grad_norm": 1.4934806823730469, "learning_rate": 3.5368370441726197e-06, "loss": 0.5097, "step": 454 }, { "epoch": 0.3849407783417936, "grad_norm": 1.5622752904891968, "learning_rate": 3.5305969690656985e-06, "loss": 0.5208, "step": 455 }, { "epoch": 0.38578680203045684, "grad_norm": 1.5283855199813843, "learning_rate": 3.5243491490002056e-06, "loss": 0.5266, "step": 456 }, { "epoch": 0.3866328257191201, "grad_norm": 1.4231075048446655, "learning_rate": 3.5180936309286444e-06, "loss": 0.5582, "step": 457 }, { "epoch": 0.38747884940778343, "grad_norm": 1.4727346897125244, "learning_rate": 3.5118304618613684e-06, "loss": 0.5029, "step": 458 }, { "epoch": 0.3883248730964467, "grad_norm": 1.4854283332824707, "learning_rate": 3.5055596888662295e-06, "loss": 0.5081, "step": 459 }, { "epoch": 0.38917089678510997, "grad_norm": 1.3666778802871704, "learning_rate": 3.4992813590682225e-06, "loss": 0.4756, "step": 460 }, { "epoch": 0.3900169204737733, "grad_norm": 1.7599936723709106, "learning_rate": 3.4929955196491315e-06, "loss": 0.498, "step": 461 }, { "epoch": 0.39086294416243655, "grad_norm": 1.5538440942764282, "learning_rate": 3.4867022178471764e-06, "loss": 0.6403, "step": 462 }, { "epoch": 0.3917089678510998, "grad_norm": 1.4417282342910767, "learning_rate": 3.4804015009566573e-06, "loss": 0.5727, "step": 463 }, { "epoch": 0.3925549915397631, "grad_norm": 1.512620449066162, "learning_rate": 3.4740934163275974e-06, "loss": 0.6629, "step": 464 }, { "epoch": 0.3934010152284264, "grad_norm": 1.446155071258545, "learning_rate": 3.46777801136539e-06, "loss": 0.5477, "step": 465 }, { "epoch": 0.3942470389170897, "grad_norm": 1.4650105237960815, "learning_rate": 3.4614553335304407e-06, "loss": 0.5874, "step": 466 }, { "epoch": 0.39509306260575294, "grad_norm": 1.7039233446121216, "learning_rate": 3.455125430337809e-06, "loss": 0.5101, "step": 467 }, { "epoch": 0.39593908629441626, "grad_norm": 1.5379666090011597, "learning_rate": 3.4487883493568566e-06, "loss": 0.5835, "step": 468 }, { "epoch": 0.39678510998307953, "grad_norm": 1.4904314279556274, "learning_rate": 3.442444138210883e-06, "loss": 0.5217, "step": 469 }, { "epoch": 0.3976311336717428, "grad_norm": 1.5643348693847656, "learning_rate": 3.436092844576774e-06, "loss": 0.5183, "step": 470 }, { "epoch": 0.39847715736040606, "grad_norm": 1.4762258529663086, "learning_rate": 3.4297345161846373e-06, "loss": 0.5368, "step": 471 }, { "epoch": 0.3993231810490694, "grad_norm": 1.496219277381897, "learning_rate": 3.4233692008174497e-06, "loss": 0.4962, "step": 472 }, { "epoch": 0.40016920473773265, "grad_norm": 1.5690780878067017, "learning_rate": 3.416996946310694e-06, "loss": 0.4984, "step": 473 }, { "epoch": 0.4010152284263959, "grad_norm": 1.4688186645507812, "learning_rate": 3.4106178005520006e-06, "loss": 0.5794, "step": 474 }, { "epoch": 0.40186125211505924, "grad_norm": 1.5053479671478271, "learning_rate": 3.4042318114807893e-06, "loss": 0.4901, "step": 475 }, { "epoch": 0.4027072758037225, "grad_norm": 1.3780537843704224, "learning_rate": 3.3978390270879056e-06, "loss": 0.558, "step": 476 }, { "epoch": 0.4035532994923858, "grad_norm": 1.382036566734314, "learning_rate": 3.3914394954152635e-06, "loss": 0.5202, "step": 477 }, { "epoch": 0.40439932318104904, "grad_norm": 1.469938039779663, "learning_rate": 3.385033264555482e-06, "loss": 0.5, "step": 478 }, { "epoch": 0.40524534686971236, "grad_norm": 1.4906823635101318, "learning_rate": 3.3786203826515235e-06, "loss": 0.4972, "step": 479 }, { "epoch": 0.40609137055837563, "grad_norm": 1.448851466178894, "learning_rate": 3.3722008978963365e-06, "loss": 0.5561, "step": 480 }, { "epoch": 0.4069373942470389, "grad_norm": 1.4323585033416748, "learning_rate": 3.3657748585324874e-06, "loss": 0.5233, "step": 481 }, { "epoch": 0.4077834179357022, "grad_norm": 1.6246329545974731, "learning_rate": 3.3593423128518017e-06, "loss": 0.6112, "step": 482 }, { "epoch": 0.4086294416243655, "grad_norm": 1.393144965171814, "learning_rate": 3.352903309194999e-06, "loss": 0.4907, "step": 483 }, { "epoch": 0.40947546531302875, "grad_norm": 1.4356099367141724, "learning_rate": 3.3464578959513322e-06, "loss": 0.5602, "step": 484 }, { "epoch": 0.4103214890016921, "grad_norm": 1.3799232244491577, "learning_rate": 3.3400061215582213e-06, "loss": 0.4997, "step": 485 }, { "epoch": 0.41116751269035534, "grad_norm": 1.570279836654663, "learning_rate": 3.3335480345008907e-06, "loss": 0.4928, "step": 486 }, { "epoch": 0.4120135363790186, "grad_norm": 1.4619395732879639, "learning_rate": 3.3270836833120047e-06, "loss": 0.4955, "step": 487 }, { "epoch": 0.4128595600676819, "grad_norm": 1.5027363300323486, "learning_rate": 3.3206131165713023e-06, "loss": 0.4928, "step": 488 }, { "epoch": 0.4137055837563452, "grad_norm": 1.5609545707702637, "learning_rate": 3.314136382905234e-06, "loss": 0.5996, "step": 489 }, { "epoch": 0.41455160744500846, "grad_norm": 1.6889272928237915, "learning_rate": 3.3076535309865925e-06, "loss": 0.604, "step": 490 }, { "epoch": 0.41539763113367173, "grad_norm": 1.5524094104766846, "learning_rate": 3.301164609534151e-06, "loss": 0.5188, "step": 491 }, { "epoch": 0.41624365482233505, "grad_norm": 1.5471911430358887, "learning_rate": 3.2946696673122953e-06, "loss": 0.56, "step": 492 }, { "epoch": 0.4170896785109983, "grad_norm": 1.515896201133728, "learning_rate": 3.288168753130657e-06, "loss": 0.5333, "step": 493 }, { "epoch": 0.4179357021996616, "grad_norm": 1.5967165231704712, "learning_rate": 3.2816619158437463e-06, "loss": 0.5538, "step": 494 }, { "epoch": 0.41878172588832485, "grad_norm": 1.4772549867630005, "learning_rate": 3.2751492043505873e-06, "loss": 0.5723, "step": 495 }, { "epoch": 0.4196277495769882, "grad_norm": 1.6248629093170166, "learning_rate": 3.268630667594348e-06, "loss": 0.5796, "step": 496 }, { "epoch": 0.42047377326565144, "grad_norm": 1.639542579650879, "learning_rate": 3.2621063545619734e-06, "loss": 0.5001, "step": 497 }, { "epoch": 0.4213197969543147, "grad_norm": 1.406894564628601, "learning_rate": 3.2555763142838175e-06, "loss": 0.414, "step": 498 }, { "epoch": 0.42216582064297803, "grad_norm": 1.5696587562561035, "learning_rate": 3.2490405958332743e-06, "loss": 0.4809, "step": 499 }, { "epoch": 0.4230118443316413, "grad_norm": 1.6583582162857056, "learning_rate": 3.24249924832641e-06, "loss": 0.4467, "step": 500 }, { "epoch": 0.42385786802030456, "grad_norm": 1.4358595609664917, "learning_rate": 3.2359523209215933e-06, "loss": 0.5417, "step": 501 }, { "epoch": 0.42470389170896783, "grad_norm": 1.497576355934143, "learning_rate": 3.2293998628191246e-06, "loss": 0.5172, "step": 502 }, { "epoch": 0.42554991539763115, "grad_norm": 1.3067649602890015, "learning_rate": 3.2228419232608692e-06, "loss": 0.488, "step": 503 }, { "epoch": 0.4263959390862944, "grad_norm": 1.7320537567138672, "learning_rate": 3.2162785515298854e-06, "loss": 0.5953, "step": 504 }, { "epoch": 0.4272419627749577, "grad_norm": 1.4349291324615479, "learning_rate": 3.2097097969500545e-06, "loss": 0.4994, "step": 505 }, { "epoch": 0.428087986463621, "grad_norm": 1.5768568515777588, "learning_rate": 3.2031357088857083e-06, "loss": 0.6069, "step": 506 }, { "epoch": 0.4289340101522843, "grad_norm": 1.8132436275482178, "learning_rate": 3.196556336741261e-06, "loss": 0.5251, "step": 507 }, { "epoch": 0.42978003384094754, "grad_norm": 1.570563793182373, "learning_rate": 3.1899717299608384e-06, "loss": 0.5625, "step": 508 }, { "epoch": 0.4306260575296108, "grad_norm": 1.51616632938385, "learning_rate": 3.1833819380279028e-06, "loss": 0.5653, "step": 509 }, { "epoch": 0.43147208121827413, "grad_norm": 1.4574007987976074, "learning_rate": 3.1767870104648834e-06, "loss": 0.4707, "step": 510 }, { "epoch": 0.4323181049069374, "grad_norm": 1.5099055767059326, "learning_rate": 3.1701869968328036e-06, "loss": 0.4206, "step": 511 }, { "epoch": 0.43316412859560066, "grad_norm": 1.533036708831787, "learning_rate": 3.1635819467309094e-06, "loss": 0.5782, "step": 512 }, { "epoch": 0.434010152284264, "grad_norm": 1.3681944608688354, "learning_rate": 3.156971909796295e-06, "loss": 0.5213, "step": 513 }, { "epoch": 0.43485617597292725, "grad_norm": 1.522709846496582, "learning_rate": 3.150356935703531e-06, "loss": 0.496, "step": 514 }, { "epoch": 0.4357021996615905, "grad_norm": 1.6741329431533813, "learning_rate": 3.143737074164292e-06, "loss": 0.5972, "step": 515 }, { "epoch": 0.4365482233502538, "grad_norm": 1.6000012159347534, "learning_rate": 3.1371123749269804e-06, "loss": 0.5715, "step": 516 }, { "epoch": 0.4373942470389171, "grad_norm": 1.5590283870697021, "learning_rate": 3.1304828877763567e-06, "loss": 0.5033, "step": 517 }, { "epoch": 0.43824027072758037, "grad_norm": 1.471935749053955, "learning_rate": 3.123848662533157e-06, "loss": 0.4956, "step": 518 }, { "epoch": 0.43908629441624364, "grad_norm": 1.4592459201812744, "learning_rate": 3.1172097490537308e-06, "loss": 0.5009, "step": 519 }, { "epoch": 0.43993231810490696, "grad_norm": 1.4220460653305054, "learning_rate": 3.110566197229655e-06, "loss": 0.5181, "step": 520 }, { "epoch": 0.4407783417935702, "grad_norm": 1.4611165523529053, "learning_rate": 3.1039180569873667e-06, "loss": 0.5199, "step": 521 }, { "epoch": 0.4416243654822335, "grad_norm": 1.3216363191604614, "learning_rate": 3.0972653782877836e-06, "loss": 0.4938, "step": 522 }, { "epoch": 0.44247038917089676, "grad_norm": 1.5299708843231201, "learning_rate": 3.0906082111259313e-06, "loss": 0.6092, "step": 523 }, { "epoch": 0.4433164128595601, "grad_norm": 1.4188978672027588, "learning_rate": 3.083946605530564e-06, "loss": 0.4906, "step": 524 }, { "epoch": 0.44416243654822335, "grad_norm": 1.493012547492981, "learning_rate": 3.0772806115637934e-06, "loss": 0.6105, "step": 525 }, { "epoch": 0.4450084602368866, "grad_norm": 1.3560352325439453, "learning_rate": 3.070610279320708e-06, "loss": 0.465, "step": 526 }, { "epoch": 0.44585448392554994, "grad_norm": 1.5919266939163208, "learning_rate": 3.063935658928998e-06, "loss": 0.4673, "step": 527 }, { "epoch": 0.4467005076142132, "grad_norm": 1.4098035097122192, "learning_rate": 3.0572568005485825e-06, "loss": 0.5447, "step": 528 }, { "epoch": 0.44754653130287647, "grad_norm": 1.4572504758834839, "learning_rate": 3.050573754371228e-06, "loss": 0.5234, "step": 529 }, { "epoch": 0.44839255499153974, "grad_norm": 1.505883812904358, "learning_rate": 3.0438865706201683e-06, "loss": 0.5126, "step": 530 }, { "epoch": 0.44923857868020306, "grad_norm": 1.484840750694275, "learning_rate": 3.0371952995497357e-06, "loss": 0.5136, "step": 531 }, { "epoch": 0.4500846023688663, "grad_norm": 1.516889214515686, "learning_rate": 3.0304999914449774e-06, "loss": 0.5783, "step": 532 }, { "epoch": 0.4509306260575296, "grad_norm": 1.399756669998169, "learning_rate": 3.02380069662128e-06, "loss": 0.501, "step": 533 }, { "epoch": 0.4517766497461929, "grad_norm": 1.6473559141159058, "learning_rate": 3.0170974654239877e-06, "loss": 0.5147, "step": 534 }, { "epoch": 0.4526226734348562, "grad_norm": 1.333022117614746, "learning_rate": 3.0103903482280295e-06, "loss": 0.4848, "step": 535 }, { "epoch": 0.45346869712351945, "grad_norm": 1.3775697946548462, "learning_rate": 3.0036793954375358e-06, "loss": 0.4997, "step": 536 }, { "epoch": 0.4543147208121827, "grad_norm": 1.4264084100723267, "learning_rate": 2.9969646574854632e-06, "loss": 0.4977, "step": 537 }, { "epoch": 0.45516074450084604, "grad_norm": 1.5796583890914917, "learning_rate": 2.9902461848332128e-06, "loss": 0.6589, "step": 538 }, { "epoch": 0.4560067681895093, "grad_norm": 1.5686849355697632, "learning_rate": 2.9835240279702516e-06, "loss": 0.4683, "step": 539 }, { "epoch": 0.45685279187817257, "grad_norm": 1.3858373165130615, "learning_rate": 2.9767982374137344e-06, "loss": 0.5051, "step": 540 }, { "epoch": 0.4576988155668359, "grad_norm": 1.6889417171478271, "learning_rate": 2.9700688637081233e-06, "loss": 0.5072, "step": 541 }, { "epoch": 0.45854483925549916, "grad_norm": 1.6027723550796509, "learning_rate": 2.9633359574248077e-06, "loss": 0.5958, "step": 542 }, { "epoch": 0.4593908629441624, "grad_norm": 1.4570367336273193, "learning_rate": 2.9565995691617242e-06, "loss": 0.5182, "step": 543 }, { "epoch": 0.4602368866328257, "grad_norm": 1.4474014043807983, "learning_rate": 2.9498597495429773e-06, "loss": 0.523, "step": 544 }, { "epoch": 0.461082910321489, "grad_norm": 1.5590665340423584, "learning_rate": 2.943116549218457e-06, "loss": 0.5413, "step": 545 }, { "epoch": 0.4619289340101523, "grad_norm": 1.539737582206726, "learning_rate": 2.9363700188634597e-06, "loss": 0.6038, "step": 546 }, { "epoch": 0.46277495769881555, "grad_norm": 1.543686032295227, "learning_rate": 2.929620209178307e-06, "loss": 0.4771, "step": 547 }, { "epoch": 0.46362098138747887, "grad_norm": 1.4283702373504639, "learning_rate": 2.9228671708879664e-06, "loss": 0.5311, "step": 548 }, { "epoch": 0.46446700507614214, "grad_norm": 1.365286946296692, "learning_rate": 2.916110954741667e-06, "loss": 0.485, "step": 549 }, { "epoch": 0.4653130287648054, "grad_norm": 1.4654829502105713, "learning_rate": 2.909351611512518e-06, "loss": 0.4788, "step": 550 }, { "epoch": 0.46615905245346867, "grad_norm": 1.5056861639022827, "learning_rate": 2.902589191997132e-06, "loss": 0.5171, "step": 551 }, { "epoch": 0.467005076142132, "grad_norm": 1.3876997232437134, "learning_rate": 2.8958237470152374e-06, "loss": 0.5373, "step": 552 }, { "epoch": 0.46785109983079526, "grad_norm": 1.370692253112793, "learning_rate": 2.889055327409301e-06, "loss": 0.4746, "step": 553 }, { "epoch": 0.4686971235194585, "grad_norm": 1.5535578727722168, "learning_rate": 2.882283984044141e-06, "loss": 0.4739, "step": 554 }, { "epoch": 0.46954314720812185, "grad_norm": 1.6460717916488647, "learning_rate": 2.8755097678065513e-06, "loss": 0.5865, "step": 555 }, { "epoch": 0.4703891708967851, "grad_norm": 1.4789174795150757, "learning_rate": 2.8687327296049126e-06, "loss": 0.5395, "step": 556 }, { "epoch": 0.4712351945854484, "grad_norm": 1.4689819812774658, "learning_rate": 2.861952920368816e-06, "loss": 0.592, "step": 557 }, { "epoch": 0.4720812182741117, "grad_norm": 1.719758152961731, "learning_rate": 2.8551703910486735e-06, "loss": 0.5949, "step": 558 }, { "epoch": 0.47292724196277497, "grad_norm": 1.5856834650039673, "learning_rate": 2.8483851926153396e-06, "loss": 0.4885, "step": 559 }, { "epoch": 0.47377326565143824, "grad_norm": 1.5992249250411987, "learning_rate": 2.8415973760597284e-06, "loss": 0.5733, "step": 560 }, { "epoch": 0.4746192893401015, "grad_norm": 1.512696385383606, "learning_rate": 2.8348069923924277e-06, "loss": 0.5093, "step": 561 }, { "epoch": 0.4754653130287648, "grad_norm": 1.4138331413269043, "learning_rate": 2.828014092643319e-06, "loss": 0.4628, "step": 562 }, { "epoch": 0.4763113367174281, "grad_norm": 1.6273956298828125, "learning_rate": 2.8212187278611907e-06, "loss": 0.6473, "step": 563 }, { "epoch": 0.47715736040609136, "grad_norm": 1.4268630743026733, "learning_rate": 2.8144209491133573e-06, "loss": 0.4941, "step": 564 }, { "epoch": 0.4780033840947547, "grad_norm": 1.5218658447265625, "learning_rate": 2.807620807485273e-06, "loss": 0.5629, "step": 565 }, { "epoch": 0.47884940778341795, "grad_norm": 1.5016785860061646, "learning_rate": 2.8008183540801486e-06, "loss": 0.5488, "step": 566 }, { "epoch": 0.4796954314720812, "grad_norm": 1.6292673349380493, "learning_rate": 2.7940136400185697e-06, "loss": 0.4968, "step": 567 }, { "epoch": 0.4805414551607445, "grad_norm": 1.7516484260559082, "learning_rate": 2.7872067164381113e-06, "loss": 0.6068, "step": 568 }, { "epoch": 0.4813874788494078, "grad_norm": 1.6018465757369995, "learning_rate": 2.7803976344929497e-06, "loss": 0.515, "step": 569 }, { "epoch": 0.48223350253807107, "grad_norm": 1.4703809022903442, "learning_rate": 2.7735864453534845e-06, "loss": 0.4804, "step": 570 }, { "epoch": 0.48307952622673433, "grad_norm": 1.6393179893493652, "learning_rate": 2.7667732002059494e-06, "loss": 0.5815, "step": 571 }, { "epoch": 0.48392554991539766, "grad_norm": 1.3294684886932373, "learning_rate": 2.7599579502520295e-06, "loss": 0.4847, "step": 572 }, { "epoch": 0.4847715736040609, "grad_norm": 1.5507575273513794, "learning_rate": 2.753140746708477e-06, "loss": 0.6029, "step": 573 }, { "epoch": 0.4856175972927242, "grad_norm": 1.458669662475586, "learning_rate": 2.746321640806722e-06, "loss": 0.5659, "step": 574 }, { "epoch": 0.48646362098138746, "grad_norm": 1.5378319025039673, "learning_rate": 2.7395006837924953e-06, "loss": 0.5321, "step": 575 }, { "epoch": 0.4873096446700508, "grad_norm": 1.6430675983428955, "learning_rate": 2.7326779269254363e-06, "loss": 0.4837, "step": 576 }, { "epoch": 0.48815566835871405, "grad_norm": 1.6236116886138916, "learning_rate": 2.7258534214787108e-06, "loss": 0.4962, "step": 577 }, { "epoch": 0.4890016920473773, "grad_norm": 1.4811713695526123, "learning_rate": 2.7190272187386246e-06, "loss": 0.4433, "step": 578 }, { "epoch": 0.48984771573604063, "grad_norm": 1.4098522663116455, "learning_rate": 2.7121993700042403e-06, "loss": 0.5793, "step": 579 }, { "epoch": 0.4906937394247039, "grad_norm": 1.4402129650115967, "learning_rate": 2.7053699265869883e-06, "loss": 0.4585, "step": 580 }, { "epoch": 0.49153976311336717, "grad_norm": 1.5286259651184082, "learning_rate": 2.6985389398102844e-06, "loss": 0.502, "step": 581 }, { "epoch": 0.49238578680203043, "grad_norm": 1.4347509145736694, "learning_rate": 2.6917064610091425e-06, "loss": 0.4995, "step": 582 }, { "epoch": 0.49323181049069376, "grad_norm": 1.3990005254745483, "learning_rate": 2.6848725415297888e-06, "loss": 0.4727, "step": 583 }, { "epoch": 0.494077834179357, "grad_norm": 1.4724209308624268, "learning_rate": 2.6780372327292763e-06, "loss": 0.5381, "step": 584 }, { "epoch": 0.4949238578680203, "grad_norm": 1.631493330001831, "learning_rate": 2.6712005859751e-06, "loss": 0.5569, "step": 585 }, { "epoch": 0.4957698815566836, "grad_norm": 1.513969898223877, "learning_rate": 2.6643626526448063e-06, "loss": 0.4989, "step": 586 }, { "epoch": 0.4966159052453469, "grad_norm": 1.542619228363037, "learning_rate": 2.6575234841256137e-06, "loss": 0.5313, "step": 587 }, { "epoch": 0.49746192893401014, "grad_norm": 1.4393746852874756, "learning_rate": 2.6506831318140226e-06, "loss": 0.5492, "step": 588 }, { "epoch": 0.4983079526226734, "grad_norm": 1.3358962535858154, "learning_rate": 2.6438416471154277e-06, "loss": 0.478, "step": 589 }, { "epoch": 0.49915397631133673, "grad_norm": 1.6701815128326416, "learning_rate": 2.636999081443736e-06, "loss": 0.5219, "step": 590 }, { "epoch": 0.5, "grad_norm": 1.4150053262710571, "learning_rate": 2.6301554862209756e-06, "loss": 0.4718, "step": 591 }, { "epoch": 0.5008460236886633, "grad_norm": 1.3618898391723633, "learning_rate": 2.6233109128769134e-06, "loss": 0.5055, "step": 592 }, { "epoch": 0.5016920473773265, "grad_norm": 1.6331762075424194, "learning_rate": 2.6164654128486683e-06, "loss": 0.5177, "step": 593 }, { "epoch": 0.5025380710659898, "grad_norm": 1.5203489065170288, "learning_rate": 2.6096190375803183e-06, "loss": 0.5066, "step": 594 }, { "epoch": 0.5033840947546532, "grad_norm": 1.6266874074935913, "learning_rate": 2.602771838522525e-06, "loss": 0.4494, "step": 595 }, { "epoch": 0.5042301184433164, "grad_norm": 1.3882555961608887, "learning_rate": 2.595923867132136e-06, "loss": 0.5231, "step": 596 }, { "epoch": 0.5050761421319797, "grad_norm": 1.4567385911941528, "learning_rate": 2.5890751748718055e-06, "loss": 0.5295, "step": 597 }, { "epoch": 0.505922165820643, "grad_norm": 1.4776872396469116, "learning_rate": 2.5822258132096038e-06, "loss": 0.505, "step": 598 }, { "epoch": 0.5067681895093062, "grad_norm": 1.404428243637085, "learning_rate": 2.575375833618633e-06, "loss": 0.4914, "step": 599 }, { "epoch": 0.5076142131979695, "grad_norm": 1.4569783210754395, "learning_rate": 2.568525287576638e-06, "loss": 0.4488, "step": 600 }, { "epoch": 0.5084602368866328, "grad_norm": 1.4980030059814453, "learning_rate": 2.561674226565621e-06, "loss": 0.5389, "step": 601 }, { "epoch": 0.5093062605752962, "grad_norm": 1.4138386249542236, "learning_rate": 2.5548227020714532e-06, "loss": 0.5175, "step": 602 }, { "epoch": 0.5101522842639594, "grad_norm": 1.7734383344650269, "learning_rate": 2.547970765583491e-06, "loss": 0.5559, "step": 603 }, { "epoch": 0.5109983079526227, "grad_norm": 1.5217783451080322, "learning_rate": 2.541118468594185e-06, "loss": 0.4747, "step": 604 }, { "epoch": 0.511844331641286, "grad_norm": 1.696345329284668, "learning_rate": 2.5342658625986965e-06, "loss": 0.5078, "step": 605 }, { "epoch": 0.5126903553299492, "grad_norm": 1.5817019939422607, "learning_rate": 2.527412999094507e-06, "loss": 0.5418, "step": 606 }, { "epoch": 0.5135363790186125, "grad_norm": 1.3751314878463745, "learning_rate": 2.520559929581034e-06, "loss": 0.4278, "step": 607 }, { "epoch": 0.5143824027072758, "grad_norm": 1.4618191719055176, "learning_rate": 2.5137067055592457e-06, "loss": 0.5491, "step": 608 }, { "epoch": 0.5152284263959391, "grad_norm": 1.4312200546264648, "learning_rate": 2.5068533785312673e-06, "loss": 0.5052, "step": 609 }, { "epoch": 0.5160744500846024, "grad_norm": 1.6040703058242798, "learning_rate": 2.5e-06, "loss": 0.4421, "step": 610 }, { "epoch": 0.5169204737732657, "grad_norm": 1.7869144678115845, "learning_rate": 2.4931466214687336e-06, "loss": 0.4662, "step": 611 }, { "epoch": 0.5177664974619289, "grad_norm": 1.4954723119735718, "learning_rate": 2.486293294440755e-06, "loss": 0.482, "step": 612 }, { "epoch": 0.5186125211505922, "grad_norm": 1.504907488822937, "learning_rate": 2.479440070418967e-06, "loss": 0.4846, "step": 613 }, { "epoch": 0.5194585448392555, "grad_norm": 1.5628070831298828, "learning_rate": 2.4725870009054944e-06, "loss": 0.5379, "step": 614 }, { "epoch": 0.5203045685279187, "grad_norm": 1.3009322881698608, "learning_rate": 2.4657341374013047e-06, "loss": 0.4173, "step": 615 }, { "epoch": 0.5211505922165821, "grad_norm": 1.4733883142471313, "learning_rate": 2.4588815314058155e-06, "loss": 0.4481, "step": 616 }, { "epoch": 0.5219966159052454, "grad_norm": 1.511818528175354, "learning_rate": 2.4520292344165093e-06, "loss": 0.4529, "step": 617 }, { "epoch": 0.5228426395939086, "grad_norm": 1.5413731336593628, "learning_rate": 2.4451772979285468e-06, "loss": 0.5215, "step": 618 }, { "epoch": 0.5236886632825719, "grad_norm": 1.4342833757400513, "learning_rate": 2.4383257734343795e-06, "loss": 0.4864, "step": 619 }, { "epoch": 0.5245346869712352, "grad_norm": 1.339322566986084, "learning_rate": 2.431474712423363e-06, "loss": 0.4954, "step": 620 }, { "epoch": 0.5253807106598984, "grad_norm": 1.6234588623046875, "learning_rate": 2.4246241663813675e-06, "loss": 0.4753, "step": 621 }, { "epoch": 0.5262267343485617, "grad_norm": 1.3801982402801514, "learning_rate": 2.4177741867903966e-06, "loss": 0.4836, "step": 622 }, { "epoch": 0.5270727580372251, "grad_norm": 1.5722270011901855, "learning_rate": 2.4109248251281953e-06, "loss": 0.5472, "step": 623 }, { "epoch": 0.5279187817258884, "grad_norm": 1.5523285865783691, "learning_rate": 2.4040761328678647e-06, "loss": 0.5056, "step": 624 }, { "epoch": 0.5287648054145516, "grad_norm": 1.481810212135315, "learning_rate": 2.3972281614774764e-06, "loss": 0.5493, "step": 625 }, { "epoch": 0.5296108291032149, "grad_norm": 1.4845212697982788, "learning_rate": 2.3903809624196826e-06, "loss": 0.5125, "step": 626 }, { "epoch": 0.5304568527918782, "grad_norm": 1.3311069011688232, "learning_rate": 2.3835345871513334e-06, "loss": 0.4476, "step": 627 }, { "epoch": 0.5313028764805414, "grad_norm": 1.6216577291488647, "learning_rate": 2.376689087123087e-06, "loss": 0.5174, "step": 628 }, { "epoch": 0.5321489001692047, "grad_norm": 1.6146211624145508, "learning_rate": 2.369844513779026e-06, "loss": 0.5302, "step": 629 }, { "epoch": 0.5329949238578681, "grad_norm": 1.3406670093536377, "learning_rate": 2.3630009185562646e-06, "loss": 0.571, "step": 630 }, { "epoch": 0.5338409475465313, "grad_norm": 1.4292736053466797, "learning_rate": 2.3561583528845723e-06, "loss": 0.5067, "step": 631 }, { "epoch": 0.5346869712351946, "grad_norm": 1.4825644493103027, "learning_rate": 2.3493168681859782e-06, "loss": 0.5024, "step": 632 }, { "epoch": 0.5355329949238579, "grad_norm": 1.6885347366333008, "learning_rate": 2.3424765158743867e-06, "loss": 0.5526, "step": 633 }, { "epoch": 0.5363790186125211, "grad_norm": 1.4368715286254883, "learning_rate": 2.335637347355194e-06, "loss": 0.5817, "step": 634 }, { "epoch": 0.5372250423011844, "grad_norm": 1.5050628185272217, "learning_rate": 2.3287994140249005e-06, "loss": 0.5771, "step": 635 }, { "epoch": 0.5380710659898477, "grad_norm": 1.43318510055542, "learning_rate": 2.321962767270724e-06, "loss": 0.4677, "step": 636 }, { "epoch": 0.538917089678511, "grad_norm": 1.4823578596115112, "learning_rate": 2.315127458470212e-06, "loss": 0.496, "step": 637 }, { "epoch": 0.5397631133671743, "grad_norm": 1.4760611057281494, "learning_rate": 2.308293538990858e-06, "loss": 0.5526, "step": 638 }, { "epoch": 0.5406091370558376, "grad_norm": 1.4542453289031982, "learning_rate": 2.301461060189716e-06, "loss": 0.4325, "step": 639 }, { "epoch": 0.5414551607445008, "grad_norm": 1.4895241260528564, "learning_rate": 2.2946300734130126e-06, "loss": 0.5135, "step": 640 }, { "epoch": 0.5423011844331641, "grad_norm": 1.3324871063232422, "learning_rate": 2.2878006299957613e-06, "loss": 0.4435, "step": 641 }, { "epoch": 0.5431472081218274, "grad_norm": 1.5044846534729004, "learning_rate": 2.2809727812613767e-06, "loss": 0.5371, "step": 642 }, { "epoch": 0.5439932318104906, "grad_norm": 1.3756799697875977, "learning_rate": 2.2741465785212905e-06, "loss": 0.4914, "step": 643 }, { "epoch": 0.544839255499154, "grad_norm": 1.5922890901565552, "learning_rate": 2.267322073074564e-06, "loss": 0.4904, "step": 644 }, { "epoch": 0.5456852791878173, "grad_norm": 1.377279281616211, "learning_rate": 2.260499316207505e-06, "loss": 0.4736, "step": 645 }, { "epoch": 0.5465313028764806, "grad_norm": 1.5589686632156372, "learning_rate": 2.2536783591932786e-06, "loss": 0.5133, "step": 646 }, { "epoch": 0.5473773265651438, "grad_norm": 1.617056965827942, "learning_rate": 2.246859253291524e-06, "loss": 0.4293, "step": 647 }, { "epoch": 0.5482233502538071, "grad_norm": 1.2803261280059814, "learning_rate": 2.2400420497479713e-06, "loss": 0.4212, "step": 648 }, { "epoch": 0.5490693739424704, "grad_norm": 1.705077886581421, "learning_rate": 2.2332267997940514e-06, "loss": 0.5949, "step": 649 }, { "epoch": 0.5499153976311336, "grad_norm": 1.2756019830703735, "learning_rate": 2.2264135546465163e-06, "loss": 0.4538, "step": 650 }, { "epoch": 0.550761421319797, "grad_norm": 1.4529634714126587, "learning_rate": 2.219602365507051e-06, "loss": 0.5181, "step": 651 }, { "epoch": 0.5516074450084603, "grad_norm": 1.4561599493026733, "learning_rate": 2.21279328356189e-06, "loss": 0.5149, "step": 652 }, { "epoch": 0.5524534686971235, "grad_norm": 1.526835560798645, "learning_rate": 2.205986359981431e-06, "loss": 0.4777, "step": 653 }, { "epoch": 0.5532994923857868, "grad_norm": 1.572192907333374, "learning_rate": 2.1991816459198526e-06, "loss": 0.4954, "step": 654 }, { "epoch": 0.5541455160744501, "grad_norm": 1.3265060186386108, "learning_rate": 2.1923791925147287e-06, "loss": 0.5218, "step": 655 }, { "epoch": 0.5549915397631133, "grad_norm": 1.4780526161193848, "learning_rate": 2.1855790508866435e-06, "loss": 0.5365, "step": 656 }, { "epoch": 0.5558375634517766, "grad_norm": 1.5056841373443604, "learning_rate": 2.1787812721388093e-06, "loss": 0.5579, "step": 657 }, { "epoch": 0.55668358714044, "grad_norm": 1.5058797597885132, "learning_rate": 2.1719859073566813e-06, "loss": 0.5154, "step": 658 }, { "epoch": 0.5575296108291032, "grad_norm": 1.3792710304260254, "learning_rate": 2.1651930076075727e-06, "loss": 0.4752, "step": 659 }, { "epoch": 0.5583756345177665, "grad_norm": 1.4559630155563354, "learning_rate": 2.158402623940273e-06, "loss": 0.5478, "step": 660 }, { "epoch": 0.5592216582064298, "grad_norm": 1.4395264387130737, "learning_rate": 2.1516148073846613e-06, "loss": 0.4919, "step": 661 }, { "epoch": 0.560067681895093, "grad_norm": 1.4851564168930054, "learning_rate": 2.1448296089513273e-06, "loss": 0.458, "step": 662 }, { "epoch": 0.5609137055837563, "grad_norm": 1.4913303852081299, "learning_rate": 2.1380470796311843e-06, "loss": 0.539, "step": 663 }, { "epoch": 0.5617597292724196, "grad_norm": 1.4618514776229858, "learning_rate": 2.131267270395088e-06, "loss": 0.4823, "step": 664 }, { "epoch": 0.562605752961083, "grad_norm": 1.4132062196731567, "learning_rate": 2.1244902321934495e-06, "loss": 0.4256, "step": 665 }, { "epoch": 0.5634517766497462, "grad_norm": 1.6368167400360107, "learning_rate": 2.11771601595586e-06, "loss": 0.5763, "step": 666 }, { "epoch": 0.5642978003384095, "grad_norm": 1.5602619647979736, "learning_rate": 2.1109446725907003e-06, "loss": 0.5281, "step": 667 }, { "epoch": 0.5651438240270727, "grad_norm": 1.4556282758712769, "learning_rate": 2.104176252984763e-06, "loss": 0.542, "step": 668 }, { "epoch": 0.565989847715736, "grad_norm": 1.4794425964355469, "learning_rate": 2.097410808002869e-06, "loss": 0.5617, "step": 669 }, { "epoch": 0.5668358714043993, "grad_norm": 1.653266429901123, "learning_rate": 2.0906483884874816e-06, "loss": 0.58, "step": 670 }, { "epoch": 0.5676818950930627, "grad_norm": 1.421467661857605, "learning_rate": 2.0838890452583337e-06, "loss": 0.5255, "step": 671 }, { "epoch": 0.5685279187817259, "grad_norm": 1.327575922012329, "learning_rate": 2.0771328291120336e-06, "loss": 0.4885, "step": 672 }, { "epoch": 0.5693739424703892, "grad_norm": 1.7012816667556763, "learning_rate": 2.070379790821693e-06, "loss": 0.5295, "step": 673 }, { "epoch": 0.5702199661590525, "grad_norm": 1.7111144065856934, "learning_rate": 2.063629981136541e-06, "loss": 0.4796, "step": 674 }, { "epoch": 0.5710659898477157, "grad_norm": 1.4859437942504883, "learning_rate": 2.0568834507815434e-06, "loss": 0.4998, "step": 675 }, { "epoch": 0.571912013536379, "grad_norm": 1.4416285753250122, "learning_rate": 2.050140250457023e-06, "loss": 0.5211, "step": 676 }, { "epoch": 0.5727580372250423, "grad_norm": 1.4626954793930054, "learning_rate": 2.043400430838276e-06, "loss": 0.5127, "step": 677 }, { "epoch": 0.5736040609137056, "grad_norm": 1.4203089475631714, "learning_rate": 2.036664042575193e-06, "loss": 0.4599, "step": 678 }, { "epoch": 0.5744500846023689, "grad_norm": 1.3037195205688477, "learning_rate": 2.0299311362918775e-06, "loss": 0.4848, "step": 679 }, { "epoch": 0.5752961082910322, "grad_norm": 1.6444575786590576, "learning_rate": 2.0232017625862664e-06, "loss": 0.5882, "step": 680 }, { "epoch": 0.5761421319796954, "grad_norm": 1.4801158905029297, "learning_rate": 2.01647597202975e-06, "loss": 0.5177, "step": 681 }, { "epoch": 0.5769881556683587, "grad_norm": 1.3590668439865112, "learning_rate": 2.0097538151667885e-06, "loss": 0.5236, "step": 682 }, { "epoch": 0.577834179357022, "grad_norm": 1.7626949548721313, "learning_rate": 2.0030353425145376e-06, "loss": 0.5392, "step": 683 }, { "epoch": 0.5786802030456852, "grad_norm": 1.493628740310669, "learning_rate": 1.9963206045624647e-06, "loss": 0.5182, "step": 684 }, { "epoch": 0.5795262267343486, "grad_norm": 1.515743374824524, "learning_rate": 1.989609651771971e-06, "loss": 0.5648, "step": 685 }, { "epoch": 0.5803722504230119, "grad_norm": 1.511932134628296, "learning_rate": 1.9829025345760127e-06, "loss": 0.4885, "step": 686 }, { "epoch": 0.5812182741116751, "grad_norm": 1.457202434539795, "learning_rate": 1.9761993033787206e-06, "loss": 0.5903, "step": 687 }, { "epoch": 0.5820642978003384, "grad_norm": 1.4985764026641846, "learning_rate": 1.969500008555023e-06, "loss": 0.5411, "step": 688 }, { "epoch": 0.5829103214890017, "grad_norm": 1.451522707939148, "learning_rate": 1.962804700450265e-06, "loss": 0.5094, "step": 689 }, { "epoch": 0.583756345177665, "grad_norm": 1.3009876012802124, "learning_rate": 1.956113429379833e-06, "loss": 0.4272, "step": 690 }, { "epoch": 0.5846023688663282, "grad_norm": 1.2623531818389893, "learning_rate": 1.9494262456287735e-06, "loss": 0.4011, "step": 691 }, { "epoch": 0.5854483925549916, "grad_norm": 1.3815149068832397, "learning_rate": 1.942743199451418e-06, "loss": 0.5189, "step": 692 }, { "epoch": 0.5862944162436549, "grad_norm": 1.6201854944229126, "learning_rate": 1.9360643410710027e-06, "loss": 0.6014, "step": 693 }, { "epoch": 0.5871404399323181, "grad_norm": 1.3804255723953247, "learning_rate": 1.929389720679294e-06, "loss": 0.4803, "step": 694 }, { "epoch": 0.5879864636209814, "grad_norm": 1.566095232963562, "learning_rate": 1.922719388436208e-06, "loss": 0.5526, "step": 695 }, { "epoch": 0.5888324873096447, "grad_norm": 1.5268288850784302, "learning_rate": 1.916053394469437e-06, "loss": 0.4897, "step": 696 }, { "epoch": 0.5896785109983079, "grad_norm": 1.3224598169326782, "learning_rate": 1.909391788874069e-06, "loss": 0.4971, "step": 697 }, { "epoch": 0.5905245346869712, "grad_norm": 1.6039224863052368, "learning_rate": 1.9027346217122161e-06, "loss": 0.6325, "step": 698 }, { "epoch": 0.5913705583756346, "grad_norm": 1.4165875911712646, "learning_rate": 1.8960819430126337e-06, "loss": 0.49, "step": 699 }, { "epoch": 0.5922165820642978, "grad_norm": 1.480425238609314, "learning_rate": 1.8894338027703456e-06, "loss": 0.5507, "step": 700 }, { "epoch": 0.5930626057529611, "grad_norm": 1.3495324850082397, "learning_rate": 1.88279025094627e-06, "loss": 0.4775, "step": 701 }, { "epoch": 0.5939086294416244, "grad_norm": 1.507717251777649, "learning_rate": 1.8761513374668434e-06, "loss": 0.5575, "step": 702 }, { "epoch": 0.5947546531302876, "grad_norm": 1.415955662727356, "learning_rate": 1.8695171122236443e-06, "loss": 0.4595, "step": 703 }, { "epoch": 0.5956006768189509, "grad_norm": 1.4151737689971924, "learning_rate": 1.8628876250730198e-06, "loss": 0.484, "step": 704 }, { "epoch": 0.5964467005076142, "grad_norm": 1.379846215248108, "learning_rate": 1.8562629258357087e-06, "loss": 0.4786, "step": 705 }, { "epoch": 0.5972927241962775, "grad_norm": 1.6034132242202759, "learning_rate": 1.8496430642964698e-06, "loss": 0.5037, "step": 706 }, { "epoch": 0.5981387478849408, "grad_norm": 1.4743245840072632, "learning_rate": 1.8430280902037061e-06, "loss": 0.4941, "step": 707 }, { "epoch": 0.5989847715736041, "grad_norm": 1.4026728868484497, "learning_rate": 1.8364180532690916e-06, "loss": 0.4978, "step": 708 }, { "epoch": 0.5998307952622673, "grad_norm": 1.5930570363998413, "learning_rate": 1.8298130031671974e-06, "loss": 0.5155, "step": 709 }, { "epoch": 0.6006768189509306, "grad_norm": 1.4963091611862183, "learning_rate": 1.8232129895351164e-06, "loss": 0.4775, "step": 710 }, { "epoch": 0.6015228426395939, "grad_norm": 1.3382537364959717, "learning_rate": 1.8166180619720974e-06, "loss": 0.4759, "step": 711 }, { "epoch": 0.6023688663282571, "grad_norm": 1.5729764699935913, "learning_rate": 1.8100282700391616e-06, "loss": 0.4431, "step": 712 }, { "epoch": 0.6032148900169205, "grad_norm": 1.6764527559280396, "learning_rate": 1.8034436632587394e-06, "loss": 0.4979, "step": 713 }, { "epoch": 0.6040609137055838, "grad_norm": 1.3650676012039185, "learning_rate": 1.7968642911142926e-06, "loss": 0.5107, "step": 714 }, { "epoch": 0.6049069373942471, "grad_norm": 1.4936357736587524, "learning_rate": 1.7902902030499463e-06, "loss": 0.5537, "step": 715 }, { "epoch": 0.6057529610829103, "grad_norm": 1.4027994871139526, "learning_rate": 1.7837214484701154e-06, "loss": 0.487, "step": 716 }, { "epoch": 0.6065989847715736, "grad_norm": 1.433212161064148, "learning_rate": 1.7771580767391314e-06, "loss": 0.5041, "step": 717 }, { "epoch": 0.6074450084602369, "grad_norm": 1.3107362985610962, "learning_rate": 1.7706001371808763e-06, "loss": 0.4696, "step": 718 }, { "epoch": 0.6082910321489001, "grad_norm": 1.5976382493972778, "learning_rate": 1.7640476790784077e-06, "loss": 0.5224, "step": 719 }, { "epoch": 0.6091370558375635, "grad_norm": 1.513655185699463, "learning_rate": 1.7575007516735909e-06, "loss": 0.5048, "step": 720 }, { "epoch": 0.6099830795262268, "grad_norm": 1.4939072132110596, "learning_rate": 1.7509594041667265e-06, "loss": 0.4744, "step": 721 }, { "epoch": 0.61082910321489, "grad_norm": 1.4942210912704468, "learning_rate": 1.7444236857161837e-06, "loss": 0.548, "step": 722 }, { "epoch": 0.6116751269035533, "grad_norm": 1.6308627128601074, "learning_rate": 1.7378936454380277e-06, "loss": 0.555, "step": 723 }, { "epoch": 0.6125211505922166, "grad_norm": 1.5539953708648682, "learning_rate": 1.7313693324056523e-06, "loss": 0.4423, "step": 724 }, { "epoch": 0.6133671742808798, "grad_norm": 1.5067429542541504, "learning_rate": 1.724850795649413e-06, "loss": 0.5053, "step": 725 }, { "epoch": 0.6142131979695431, "grad_norm": 1.4803341627120972, "learning_rate": 1.718338084156254e-06, "loss": 0.5284, "step": 726 }, { "epoch": 0.6150592216582065, "grad_norm": 1.3545798063278198, "learning_rate": 1.7118312468693437e-06, "loss": 0.4296, "step": 727 }, { "epoch": 0.6159052453468697, "grad_norm": 1.3966692686080933, "learning_rate": 1.7053303326877051e-06, "loss": 0.5169, "step": 728 }, { "epoch": 0.616751269035533, "grad_norm": 1.5924454927444458, "learning_rate": 1.6988353904658495e-06, "loss": 0.5, "step": 729 }, { "epoch": 0.6175972927241963, "grad_norm": 1.3930429220199585, "learning_rate": 1.692346469013408e-06, "loss": 0.5098, "step": 730 }, { "epoch": 0.6184433164128595, "grad_norm": 1.2582634687423706, "learning_rate": 1.6858636170947668e-06, "loss": 0.4755, "step": 731 }, { "epoch": 0.6192893401015228, "grad_norm": 1.777503252029419, "learning_rate": 1.6793868834286985e-06, "loss": 0.578, "step": 732 }, { "epoch": 0.6201353637901861, "grad_norm": 1.353458285331726, "learning_rate": 1.6729163166879964e-06, "loss": 0.4851, "step": 733 }, { "epoch": 0.6209813874788495, "grad_norm": 1.5160582065582275, "learning_rate": 1.6664519654991101e-06, "loss": 0.6046, "step": 734 }, { "epoch": 0.6218274111675127, "grad_norm": 1.39556086063385, "learning_rate": 1.6599938784417796e-06, "loss": 0.5351, "step": 735 }, { "epoch": 0.622673434856176, "grad_norm": 1.260372281074524, "learning_rate": 1.6535421040486686e-06, "loss": 0.4827, "step": 736 }, { "epoch": 0.6235194585448393, "grad_norm": 1.6492784023284912, "learning_rate": 1.6470966908050012e-06, "loss": 0.5938, "step": 737 }, { "epoch": 0.6243654822335025, "grad_norm": 1.360202670097351, "learning_rate": 1.6406576871481985e-06, "loss": 0.484, "step": 738 }, { "epoch": 0.6252115059221658, "grad_norm": 1.4238383769989014, "learning_rate": 1.634225141467513e-06, "loss": 0.4169, "step": 739 }, { "epoch": 0.626057529610829, "grad_norm": 1.4238007068634033, "learning_rate": 1.6277991021036644e-06, "loss": 0.5064, "step": 740 }, { "epoch": 0.6269035532994924, "grad_norm": 1.5756981372833252, "learning_rate": 1.6213796173484769e-06, "loss": 0.5694, "step": 741 }, { "epoch": 0.6277495769881557, "grad_norm": 1.5684349536895752, "learning_rate": 1.6149667354445192e-06, "loss": 0.5578, "step": 742 }, { "epoch": 0.628595600676819, "grad_norm": 1.3545986413955688, "learning_rate": 1.608560504584737e-06, "loss": 0.5363, "step": 743 }, { "epoch": 0.6294416243654822, "grad_norm": 1.3728703260421753, "learning_rate": 1.6021609729120948e-06, "loss": 0.4479, "step": 744 }, { "epoch": 0.6302876480541455, "grad_norm": 1.4639620780944824, "learning_rate": 1.5957681885192111e-06, "loss": 0.5478, "step": 745 }, { "epoch": 0.6311336717428088, "grad_norm": 1.3161569833755493, "learning_rate": 1.5893821994479996e-06, "loss": 0.4568, "step": 746 }, { "epoch": 0.631979695431472, "grad_norm": 1.5565990209579468, "learning_rate": 1.5830030536893066e-06, "loss": 0.5132, "step": 747 }, { "epoch": 0.6328257191201354, "grad_norm": 1.4323241710662842, "learning_rate": 1.5766307991825514e-06, "loss": 0.4374, "step": 748 }, { "epoch": 0.6336717428087987, "grad_norm": 1.372560739517212, "learning_rate": 1.5702654838153641e-06, "loss": 0.5318, "step": 749 }, { "epoch": 0.6345177664974619, "grad_norm": 1.3776485919952393, "learning_rate": 1.5639071554232266e-06, "loss": 0.4903, "step": 750 }, { "epoch": 0.6353637901861252, "grad_norm": 1.421043872833252, "learning_rate": 1.5575558617891173e-06, "loss": 0.4828, "step": 751 }, { "epoch": 0.6362098138747885, "grad_norm": 1.4715155363082886, "learning_rate": 1.551211650643144e-06, "loss": 0.5535, "step": 752 }, { "epoch": 0.6370558375634517, "grad_norm": 1.4590644836425781, "learning_rate": 1.5448745696621915e-06, "loss": 0.4879, "step": 753 }, { "epoch": 0.637901861252115, "grad_norm": 1.3986414670944214, "learning_rate": 1.5385446664695603e-06, "loss": 0.4828, "step": 754 }, { "epoch": 0.6387478849407784, "grad_norm": 1.2732402086257935, "learning_rate": 1.53222198863461e-06, "loss": 0.4103, "step": 755 }, { "epoch": 0.6395939086294417, "grad_norm": 1.4938690662384033, "learning_rate": 1.5259065836724035e-06, "loss": 0.4481, "step": 756 }, { "epoch": 0.6404399323181049, "grad_norm": 1.4133765697479248, "learning_rate": 1.5195984990433437e-06, "loss": 0.4322, "step": 757 }, { "epoch": 0.6412859560067682, "grad_norm": 1.4759124517440796, "learning_rate": 1.5132977821528244e-06, "loss": 0.5065, "step": 758 }, { "epoch": 0.6421319796954315, "grad_norm": 1.4921671152114868, "learning_rate": 1.5070044803508693e-06, "loss": 0.5302, "step": 759 }, { "epoch": 0.6429780033840947, "grad_norm": 1.5433109998703003, "learning_rate": 1.500718640931779e-06, "loss": 0.5281, "step": 760 }, { "epoch": 0.643824027072758, "grad_norm": 1.4201653003692627, "learning_rate": 1.494440311133772e-06, "loss": 0.4778, "step": 761 }, { "epoch": 0.6446700507614214, "grad_norm": 1.4562307596206665, "learning_rate": 1.4881695381386324e-06, "loss": 0.5654, "step": 762 }, { "epoch": 0.6455160744500846, "grad_norm": 1.3270257711410522, "learning_rate": 1.4819063690713565e-06, "loss": 0.4588, "step": 763 }, { "epoch": 0.6463620981387479, "grad_norm": 1.3628114461898804, "learning_rate": 1.4756508509997946e-06, "loss": 0.5339, "step": 764 }, { "epoch": 0.6472081218274112, "grad_norm": 1.5659477710723877, "learning_rate": 1.4694030309343015e-06, "loss": 0.4217, "step": 765 }, { "epoch": 0.6480541455160744, "grad_norm": 1.6154391765594482, "learning_rate": 1.4631629558273803e-06, "loss": 0.5273, "step": 766 }, { "epoch": 0.6489001692047377, "grad_norm": 1.5286039113998413, "learning_rate": 1.4569306725733313e-06, "loss": 0.5359, "step": 767 }, { "epoch": 0.649746192893401, "grad_norm": 1.4298804998397827, "learning_rate": 1.450706228007897e-06, "loss": 0.526, "step": 768 }, { "epoch": 0.6505922165820643, "grad_norm": 1.3179171085357666, "learning_rate": 1.4444896689079142e-06, "loss": 0.4854, "step": 769 }, { "epoch": 0.6514382402707276, "grad_norm": 1.4320234060287476, "learning_rate": 1.4382810419909587e-06, "loss": 0.5674, "step": 770 }, { "epoch": 0.6522842639593909, "grad_norm": 1.5675216913223267, "learning_rate": 1.432080393914997e-06, "loss": 0.6243, "step": 771 }, { "epoch": 0.6531302876480541, "grad_norm": 1.4815562963485718, "learning_rate": 1.4258877712780333e-06, "loss": 0.5564, "step": 772 }, { "epoch": 0.6539763113367174, "grad_norm": 1.3841257095336914, "learning_rate": 1.4197032206177618e-06, "loss": 0.481, "step": 773 }, { "epoch": 0.6548223350253807, "grad_norm": 1.3601456880569458, "learning_rate": 1.4135267884112153e-06, "loss": 0.4912, "step": 774 }, { "epoch": 0.6556683587140439, "grad_norm": 1.3832186460494995, "learning_rate": 1.4073585210744136e-06, "loss": 0.5253, "step": 775 }, { "epoch": 0.6565143824027073, "grad_norm": 1.516714096069336, "learning_rate": 1.401198464962021e-06, "loss": 0.4864, "step": 776 }, { "epoch": 0.6573604060913706, "grad_norm": 1.6682307720184326, "learning_rate": 1.3950466663669915e-06, "loss": 0.5815, "step": 777 }, { "epoch": 0.6582064297800339, "grad_norm": 1.3226845264434814, "learning_rate": 1.3889031715202272e-06, "loss": 0.4574, "step": 778 }, { "epoch": 0.6590524534686971, "grad_norm": 1.5030988454818726, "learning_rate": 1.3827680265902235e-06, "loss": 0.5515, "step": 779 }, { "epoch": 0.6598984771573604, "grad_norm": 1.5196452140808105, "learning_rate": 1.3766412776827282e-06, "loss": 0.5655, "step": 780 }, { "epoch": 0.6607445008460237, "grad_norm": 1.392120361328125, "learning_rate": 1.3705229708403928e-06, "loss": 0.5012, "step": 781 }, { "epoch": 0.6615905245346869, "grad_norm": 1.4476877450942993, "learning_rate": 1.3644131520424241e-06, "loss": 0.5739, "step": 782 }, { "epoch": 0.6624365482233503, "grad_norm": 1.4679392576217651, "learning_rate": 1.3583118672042441e-06, "loss": 0.5118, "step": 783 }, { "epoch": 0.6632825719120136, "grad_norm": 1.5056501626968384, "learning_rate": 1.3522191621771402e-06, "loss": 0.5661, "step": 784 }, { "epoch": 0.6641285956006768, "grad_norm": 1.583238124847412, "learning_rate": 1.346135082747923e-06, "loss": 0.6109, "step": 785 }, { "epoch": 0.6649746192893401, "grad_norm": 1.6272413730621338, "learning_rate": 1.3400596746385817e-06, "loss": 0.5755, "step": 786 }, { "epoch": 0.6658206429780034, "grad_norm": 1.630968451499939, "learning_rate": 1.3339929835059393e-06, "loss": 0.5263, "step": 787 }, { "epoch": 0.6666666666666666, "grad_norm": 1.260642647743225, "learning_rate": 1.3279350549413117e-06, "loss": 0.4703, "step": 788 }, { "epoch": 0.6675126903553299, "grad_norm": 1.5122746229171753, "learning_rate": 1.3218859344701634e-06, "loss": 0.5735, "step": 789 }, { "epoch": 0.6683587140439933, "grad_norm": 1.5645116567611694, "learning_rate": 1.3158456675517657e-06, "loss": 0.5894, "step": 790 }, { "epoch": 0.6692047377326565, "grad_norm": 1.6442245244979858, "learning_rate": 1.3098142995788554e-06, "loss": 0.5377, "step": 791 }, { "epoch": 0.6700507614213198, "grad_norm": 1.5986851453781128, "learning_rate": 1.3037918758772944e-06, "loss": 0.5731, "step": 792 }, { "epoch": 0.6708967851099831, "grad_norm": 1.582816243171692, "learning_rate": 1.2977784417057262e-06, "loss": 0.4785, "step": 793 }, { "epoch": 0.6717428087986463, "grad_norm": 1.3253484964370728, "learning_rate": 1.29177404225524e-06, "loss": 0.4108, "step": 794 }, { "epoch": 0.6725888324873096, "grad_norm": 1.4294928312301636, "learning_rate": 1.2857787226490275e-06, "loss": 0.5675, "step": 795 }, { "epoch": 0.6734348561759729, "grad_norm": 1.5926408767700195, "learning_rate": 1.2797925279420454e-06, "loss": 0.5764, "step": 796 }, { "epoch": 0.6742808798646363, "grad_norm": 1.352016568183899, "learning_rate": 1.2738155031206772e-06, "loss": 0.5051, "step": 797 }, { "epoch": 0.6751269035532995, "grad_norm": 1.4352556467056274, "learning_rate": 1.2678476931023947e-06, "loss": 0.4998, "step": 798 }, { "epoch": 0.6759729272419628, "grad_norm": 1.5205786228179932, "learning_rate": 1.2618891427354174e-06, "loss": 0.4852, "step": 799 }, { "epoch": 0.676818950930626, "grad_norm": 1.4403825998306274, "learning_rate": 1.2559398967983821e-06, "loss": 0.5406, "step": 800 }, { "epoch": 0.6776649746192893, "grad_norm": 1.380646824836731, "learning_rate": 1.2500000000000007e-06, "loss": 0.4836, "step": 801 }, { "epoch": 0.6785109983079526, "grad_norm": 1.469948410987854, "learning_rate": 1.2440694969787262e-06, "loss": 0.5521, "step": 802 }, { "epoch": 0.6793570219966159, "grad_norm": 1.4081382751464844, "learning_rate": 1.2381484323024178e-06, "loss": 0.4733, "step": 803 }, { "epoch": 0.6802030456852792, "grad_norm": 1.3682475090026855, "learning_rate": 1.232236850468004e-06, "loss": 0.5307, "step": 804 }, { "epoch": 0.6810490693739425, "grad_norm": 1.5731854438781738, "learning_rate": 1.2263347959011534e-06, "loss": 0.4799, "step": 805 }, { "epoch": 0.6818950930626058, "grad_norm": 1.4115339517593384, "learning_rate": 1.2204423129559306e-06, "loss": 0.4808, "step": 806 }, { "epoch": 0.682741116751269, "grad_norm": 1.5260988473892212, "learning_rate": 1.2145594459144745e-06, "loss": 0.5247, "step": 807 }, { "epoch": 0.6835871404399323, "grad_norm": 1.402204155921936, "learning_rate": 1.2086862389866577e-06, "loss": 0.5507, "step": 808 }, { "epoch": 0.6844331641285956, "grad_norm": 1.574589729309082, "learning_rate": 1.2028227363097583e-06, "loss": 0.4803, "step": 809 }, { "epoch": 0.6852791878172588, "grad_norm": 1.4717187881469727, "learning_rate": 1.1969689819481257e-06, "loss": 0.5736, "step": 810 }, { "epoch": 0.6861252115059222, "grad_norm": 1.4452178478240967, "learning_rate": 1.1911250198928508e-06, "loss": 0.5348, "step": 811 }, { "epoch": 0.6869712351945855, "grad_norm": 1.5130823850631714, "learning_rate": 1.1852908940614354e-06, "loss": 0.4494, "step": 812 }, { "epoch": 0.6878172588832487, "grad_norm": 1.5016757249832153, "learning_rate": 1.1794666482974617e-06, "loss": 0.4589, "step": 813 }, { "epoch": 0.688663282571912, "grad_norm": 1.604846715927124, "learning_rate": 1.1736523263702637e-06, "loss": 0.5153, "step": 814 }, { "epoch": 0.6895093062605753, "grad_norm": 1.5163328647613525, "learning_rate": 1.167847971974595e-06, "loss": 0.5082, "step": 815 }, { "epoch": 0.6903553299492385, "grad_norm": 1.3733899593353271, "learning_rate": 1.1620536287303052e-06, "loss": 0.478, "step": 816 }, { "epoch": 0.6912013536379019, "grad_norm": 1.4143409729003906, "learning_rate": 1.1562693401820094e-06, "loss": 0.497, "step": 817 }, { "epoch": 0.6920473773265652, "grad_norm": 1.385507345199585, "learning_rate": 1.1504951497987626e-06, "loss": 0.5322, "step": 818 }, { "epoch": 0.6928934010152284, "grad_norm": 1.6582002639770508, "learning_rate": 1.14473110097373e-06, "loss": 0.5422, "step": 819 }, { "epoch": 0.6937394247038917, "grad_norm": 1.633652925491333, "learning_rate": 1.1389772370238638e-06, "loss": 0.5311, "step": 820 }, { "epoch": 0.694585448392555, "grad_norm": 1.4567234516143799, "learning_rate": 1.133233601189577e-06, "loss": 0.5541, "step": 821 }, { "epoch": 0.6954314720812182, "grad_norm": 1.4335215091705322, "learning_rate": 1.1275002366344156e-06, "loss": 0.4871, "step": 822 }, { "epoch": 0.6962774957698815, "grad_norm": 1.401573896408081, "learning_rate": 1.1217771864447396e-06, "loss": 0.4619, "step": 823 }, { "epoch": 0.6971235194585449, "grad_norm": 1.5211458206176758, "learning_rate": 1.1160644936293955e-06, "loss": 0.5443, "step": 824 }, { "epoch": 0.6979695431472082, "grad_norm": 1.3945550918579102, "learning_rate": 1.110362201119393e-06, "loss": 0.5781, "step": 825 }, { "epoch": 0.6988155668358714, "grad_norm": 1.577100396156311, "learning_rate": 1.1046703517675848e-06, "loss": 0.5209, "step": 826 }, { "epoch": 0.6996615905245347, "grad_norm": 1.50028657913208, "learning_rate": 1.0989889883483415e-06, "loss": 0.4327, "step": 827 }, { "epoch": 0.700507614213198, "grad_norm": 1.5429868698120117, "learning_rate": 1.093318153557233e-06, "loss": 0.4869, "step": 828 }, { "epoch": 0.7013536379018612, "grad_norm": 1.4478799104690552, "learning_rate": 1.0876578900107053e-06, "loss": 0.5054, "step": 829 }, { "epoch": 0.7021996615905245, "grad_norm": 1.5432162284851074, "learning_rate": 1.0820082402457617e-06, "loss": 0.5047, "step": 830 }, { "epoch": 0.7030456852791879, "grad_norm": 1.5668376684188843, "learning_rate": 1.0763692467196432e-06, "loss": 0.5276, "step": 831 }, { "epoch": 0.7038917089678511, "grad_norm": 1.5857386589050293, "learning_rate": 1.070740951809508e-06, "loss": 0.5437, "step": 832 }, { "epoch": 0.7047377326565144, "grad_norm": 1.518097996711731, "learning_rate": 1.0651233978121145e-06, "loss": 0.5266, "step": 833 }, { "epoch": 0.7055837563451777, "grad_norm": 1.5712751150131226, "learning_rate": 1.0595166269435027e-06, "loss": 0.5185, "step": 834 }, { "epoch": 0.7064297800338409, "grad_norm": 1.4731966257095337, "learning_rate": 1.0539206813386774e-06, "loss": 0.5471, "step": 835 }, { "epoch": 0.7072758037225042, "grad_norm": 1.4393733739852905, "learning_rate": 1.048335603051291e-06, "loss": 0.4901, "step": 836 }, { "epoch": 0.7081218274111675, "grad_norm": 1.6055103540420532, "learning_rate": 1.0427614340533293e-06, "loss": 0.5252, "step": 837 }, { "epoch": 0.7089678510998308, "grad_norm": 1.5624628067016602, "learning_rate": 1.037198216234791e-06, "loss": 0.5123, "step": 838 }, { "epoch": 0.7098138747884941, "grad_norm": 1.5251977443695068, "learning_rate": 1.0316459914033794e-06, "loss": 0.4864, "step": 839 }, { "epoch": 0.7106598984771574, "grad_norm": 1.4280132055282593, "learning_rate": 1.0261048012841848e-06, "loss": 0.4176, "step": 840 }, { "epoch": 0.7115059221658206, "grad_norm": 1.5174310207366943, "learning_rate": 1.0205746875193712e-06, "loss": 0.468, "step": 841 }, { "epoch": 0.7123519458544839, "grad_norm": 1.4498469829559326, "learning_rate": 1.0150556916678634e-06, "loss": 0.4887, "step": 842 }, { "epoch": 0.7131979695431472, "grad_norm": 1.3770853281021118, "learning_rate": 1.0095478552050348e-06, "loss": 0.4825, "step": 843 }, { "epoch": 0.7140439932318104, "grad_norm": 1.3348174095153809, "learning_rate": 1.0040512195223947e-06, "loss": 0.4465, "step": 844 }, { "epoch": 0.7148900169204738, "grad_norm": 1.5871872901916504, "learning_rate": 9.985658259272826e-07, "loss": 0.504, "step": 845 }, { "epoch": 0.7157360406091371, "grad_norm": 1.5275450944900513, "learning_rate": 9.930917156425477e-07, "loss": 0.5283, "step": 846 }, { "epoch": 0.7165820642978004, "grad_norm": 1.7547075748443604, "learning_rate": 9.876289298062478e-07, "loss": 0.5201, "step": 847 }, { "epoch": 0.7174280879864636, "grad_norm": 1.480360507965088, "learning_rate": 9.821775094713376e-07, "loss": 0.5058, "step": 848 }, { "epoch": 0.7182741116751269, "grad_norm": 1.6711392402648926, "learning_rate": 9.767374956053584e-07, "loss": 0.4928, "step": 849 }, { "epoch": 0.7191201353637902, "grad_norm": 1.2926833629608154, "learning_rate": 9.713089290901334e-07, "loss": 0.4889, "step": 850 }, { "epoch": 0.7199661590524534, "grad_norm": 1.6195472478866577, "learning_rate": 9.658918507214567e-07, "loss": 0.5089, "step": 851 }, { "epoch": 0.7208121827411168, "grad_norm": 1.544764757156372, "learning_rate": 9.604863012087904e-07, "loss": 0.5558, "step": 852 }, { "epoch": 0.7216582064297801, "grad_norm": 1.4564974308013916, "learning_rate": 9.550923211749557e-07, "loss": 0.5031, "step": 853 }, { "epoch": 0.7225042301184433, "grad_norm": 1.4476693868637085, "learning_rate": 9.497099511558309e-07, "loss": 0.486, "step": 854 }, { "epoch": 0.7233502538071066, "grad_norm": 1.3963452577590942, "learning_rate": 9.443392316000413e-07, "loss": 0.4551, "step": 855 }, { "epoch": 0.7241962774957699, "grad_norm": 1.6682579517364502, "learning_rate": 9.389802028686617e-07, "loss": 0.5026, "step": 856 }, { "epoch": 0.7250423011844331, "grad_norm": 1.4662981033325195, "learning_rate": 9.336329052349089e-07, "loss": 0.5005, "step": 857 }, { "epoch": 0.7258883248730964, "grad_norm": 1.414525032043457, "learning_rate": 9.28297378883842e-07, "loss": 0.4489, "step": 858 }, { "epoch": 0.7267343485617598, "grad_norm": 1.6276682615280151, "learning_rate": 9.229736639120562e-07, "loss": 0.6037, "step": 859 }, { "epoch": 0.727580372250423, "grad_norm": 1.5041996240615845, "learning_rate": 9.176618003273848e-07, "loss": 0.5154, "step": 860 }, { "epoch": 0.7284263959390863, "grad_norm": 1.3910162448883057, "learning_rate": 9.123618280485993e-07, "loss": 0.3958, "step": 861 }, { "epoch": 0.7292724196277496, "grad_norm": 1.3514389991760254, "learning_rate": 9.070737869051044e-07, "loss": 0.456, "step": 862 }, { "epoch": 0.7301184433164128, "grad_norm": 1.4312387704849243, "learning_rate": 9.017977166366445e-07, "loss": 0.5148, "step": 863 }, { "epoch": 0.7309644670050761, "grad_norm": 1.4258145093917847, "learning_rate": 8.965336568930022e-07, "loss": 0.5091, "step": 864 }, { "epoch": 0.7318104906937394, "grad_norm": 1.468034029006958, "learning_rate": 8.912816472337008e-07, "loss": 0.4846, "step": 865 }, { "epoch": 0.7326565143824028, "grad_norm": 1.375032663345337, "learning_rate": 8.860417271277067e-07, "loss": 0.4945, "step": 866 }, { "epoch": 0.733502538071066, "grad_norm": 1.2760509252548218, "learning_rate": 8.808139359531332e-07, "loss": 0.4549, "step": 867 }, { "epoch": 0.7343485617597293, "grad_norm": 1.5727797746658325, "learning_rate": 8.75598312996944e-07, "loss": 0.4966, "step": 868 }, { "epoch": 0.7351945854483926, "grad_norm": 1.5262360572814941, "learning_rate": 8.703948974546592e-07, "loss": 0.5525, "step": 869 }, { "epoch": 0.7360406091370558, "grad_norm": 1.482978105545044, "learning_rate": 8.65203728430059e-07, "loss": 0.463, "step": 870 }, { "epoch": 0.7368866328257191, "grad_norm": 1.515729546546936, "learning_rate": 8.600248449348916e-07, "loss": 0.5125, "step": 871 }, { "epoch": 0.7377326565143824, "grad_norm": 1.6180707216262817, "learning_rate": 8.548582858885787e-07, "loss": 0.5446, "step": 872 }, { "epoch": 0.7385786802030457, "grad_norm": 1.7022454738616943, "learning_rate": 8.497040901179232e-07, "loss": 0.439, "step": 873 }, { "epoch": 0.739424703891709, "grad_norm": 1.35090172290802, "learning_rate": 8.445622963568184e-07, "loss": 0.4509, "step": 874 }, { "epoch": 0.7402707275803723, "grad_norm": 1.2935441732406616, "learning_rate": 8.394329432459561e-07, "loss": 0.4708, "step": 875 }, { "epoch": 0.7411167512690355, "grad_norm": 1.4397931098937988, "learning_rate": 8.343160693325356e-07, "loss": 0.4763, "step": 876 }, { "epoch": 0.7419627749576988, "grad_norm": 1.4349732398986816, "learning_rate": 8.292117130699767e-07, "loss": 0.4536, "step": 877 }, { "epoch": 0.7428087986463621, "grad_norm": 1.4029067754745483, "learning_rate": 8.241199128176255e-07, "loss": 0.5145, "step": 878 }, { "epoch": 0.7436548223350253, "grad_norm": 1.4133881330490112, "learning_rate": 8.190407068404721e-07, "loss": 0.5282, "step": 879 }, { "epoch": 0.7445008460236887, "grad_norm": 1.4240069389343262, "learning_rate": 8.139741333088597e-07, "loss": 0.4763, "step": 880 }, { "epoch": 0.745346869712352, "grad_norm": 1.5198265314102173, "learning_rate": 8.089202302981983e-07, "loss": 0.5663, "step": 881 }, { "epoch": 0.7461928934010152, "grad_norm": 1.3956340551376343, "learning_rate": 8.038790357886783e-07, "loss": 0.4796, "step": 882 }, { "epoch": 0.7470389170896785, "grad_norm": 1.4687814712524414, "learning_rate": 7.988505876649863e-07, "loss": 0.5539, "step": 883 }, { "epoch": 0.7478849407783418, "grad_norm": 1.3739749193191528, "learning_rate": 7.938349237160184e-07, "loss": 0.4545, "step": 884 }, { "epoch": 0.748730964467005, "grad_norm": 1.3965612649917603, "learning_rate": 7.888320816345984e-07, "loss": 0.5094, "step": 885 }, { "epoch": 0.7495769881556683, "grad_norm": 1.383991003036499, "learning_rate": 7.838420990171927e-07, "loss": 0.3785, "step": 886 }, { "epoch": 0.7504230118443317, "grad_norm": 1.4848946332931519, "learning_rate": 7.788650133636291e-07, "loss": 0.4969, "step": 887 }, { "epoch": 0.751269035532995, "grad_norm": 1.5263686180114746, "learning_rate": 7.739008620768143e-07, "loss": 0.4673, "step": 888 }, { "epoch": 0.7521150592216582, "grad_norm": 1.5392656326293945, "learning_rate": 7.689496824624526e-07, "loss": 0.5333, "step": 889 }, { "epoch": 0.7529610829103215, "grad_norm": 1.4336823225021362, "learning_rate": 7.640115117287661e-07, "loss": 0.5547, "step": 890 }, { "epoch": 0.7538071065989848, "grad_norm": 1.606310486793518, "learning_rate": 7.590863869862155e-07, "loss": 0.5233, "step": 891 }, { "epoch": 0.754653130287648, "grad_norm": 1.4799706935882568, "learning_rate": 7.541743452472194e-07, "loss": 0.5235, "step": 892 }, { "epoch": 0.7554991539763113, "grad_norm": 1.4974623918533325, "learning_rate": 7.492754234258794e-07, "loss": 0.4494, "step": 893 }, { "epoch": 0.7563451776649747, "grad_norm": 1.7052468061447144, "learning_rate": 7.443896583376972e-07, "loss": 0.5512, "step": 894 }, { "epoch": 0.7571912013536379, "grad_norm": 1.6682631969451904, "learning_rate": 7.395170866993043e-07, "loss": 0.5588, "step": 895 }, { "epoch": 0.7580372250423012, "grad_norm": 1.5600894689559937, "learning_rate": 7.346577451281822e-07, "loss": 0.5154, "step": 896 }, { "epoch": 0.7588832487309645, "grad_norm": 1.35463547706604, "learning_rate": 7.298116701423874e-07, "loss": 0.5002, "step": 897 }, { "epoch": 0.7597292724196277, "grad_norm": 1.4675949811935425, "learning_rate": 7.249788981602801e-07, "loss": 0.5376, "step": 898 }, { "epoch": 0.760575296108291, "grad_norm": 1.5439352989196777, "learning_rate": 7.201594655002458e-07, "loss": 0.4828, "step": 899 }, { "epoch": 0.7614213197969543, "grad_norm": 1.5555047988891602, "learning_rate": 7.153534083804253e-07, "loss": 0.5266, "step": 900 }, { "epoch": 0.7622673434856176, "grad_norm": 1.6355152130126953, "learning_rate": 7.105607629184433e-07, "loss": 0.5003, "step": 901 }, { "epoch": 0.7631133671742809, "grad_norm": 1.4930115938186646, "learning_rate": 7.057815651311323e-07, "loss": 0.5193, "step": 902 }, { "epoch": 0.7639593908629442, "grad_norm": 1.5737615823745728, "learning_rate": 7.010158509342682e-07, "loss": 0.5353, "step": 903 }, { "epoch": 0.7648054145516074, "grad_norm": 1.5561256408691406, "learning_rate": 6.962636561422967e-07, "loss": 0.5157, "step": 904 }, { "epoch": 0.7656514382402707, "grad_norm": 1.5765726566314697, "learning_rate": 6.915250164680648e-07, "loss": 0.5358, "step": 905 }, { "epoch": 0.766497461928934, "grad_norm": 1.4768812656402588, "learning_rate": 6.867999675225523e-07, "loss": 0.5021, "step": 906 }, { "epoch": 0.7673434856175972, "grad_norm": 1.4443702697753906, "learning_rate": 6.820885448146041e-07, "loss": 0.5442, "step": 907 }, { "epoch": 0.7681895093062606, "grad_norm": 1.5117536783218384, "learning_rate": 6.773907837506646e-07, "loss": 0.6001, "step": 908 }, { "epoch": 0.7690355329949239, "grad_norm": 1.4011093378067017, "learning_rate": 6.7270671963451e-07, "loss": 0.5374, "step": 909 }, { "epoch": 0.7698815566835872, "grad_norm": 1.523114562034607, "learning_rate": 6.680363876669832e-07, "loss": 0.4964, "step": 910 }, { "epoch": 0.7707275803722504, "grad_norm": 1.3653088808059692, "learning_rate": 6.633798229457309e-07, "loss": 0.4675, "step": 911 }, { "epoch": 0.7715736040609137, "grad_norm": 1.532370686531067, "learning_rate": 6.587370604649373e-07, "loss": 0.5292, "step": 912 }, { "epoch": 0.772419627749577, "grad_norm": 1.5474774837493896, "learning_rate": 6.541081351150638e-07, "loss": 0.5468, "step": 913 }, { "epoch": 0.7732656514382402, "grad_norm": 1.6492289304733276, "learning_rate": 6.494930816825842e-07, "loss": 0.5242, "step": 914 }, { "epoch": 0.7741116751269036, "grad_norm": 1.6536519527435303, "learning_rate": 6.448919348497254e-07, "loss": 0.5071, "step": 915 }, { "epoch": 0.7749576988155669, "grad_norm": 1.7425479888916016, "learning_rate": 6.403047291942057e-07, "loss": 0.5401, "step": 916 }, { "epoch": 0.7758037225042301, "grad_norm": 1.5332328081130981, "learning_rate": 6.357314991889757e-07, "loss": 0.5349, "step": 917 }, { "epoch": 0.7766497461928934, "grad_norm": 1.671473503112793, "learning_rate": 6.311722792019565e-07, "loss": 0.5401, "step": 918 }, { "epoch": 0.7774957698815567, "grad_norm": 1.3117287158966064, "learning_rate": 6.266271034957861e-07, "loss": 0.4603, "step": 919 }, { "epoch": 0.7783417935702199, "grad_norm": 1.5059982538223267, "learning_rate": 6.220960062275583e-07, "loss": 0.5476, "step": 920 }, { "epoch": 0.7791878172588832, "grad_norm": 1.5411993265151978, "learning_rate": 6.175790214485674e-07, "loss": 0.5483, "step": 921 }, { "epoch": 0.7800338409475466, "grad_norm": 1.4125125408172607, "learning_rate": 6.130761831040522e-07, "loss": 0.4734, "step": 922 }, { "epoch": 0.7808798646362098, "grad_norm": 1.3989757299423218, "learning_rate": 6.085875250329401e-07, "loss": 0.4282, "step": 923 }, { "epoch": 0.7817258883248731, "grad_norm": 1.408751368522644, "learning_rate": 6.041130809675944e-07, "loss": 0.4072, "step": 924 }, { "epoch": 0.7825719120135364, "grad_norm": 1.6468225717544556, "learning_rate": 5.996528845335587e-07, "loss": 0.5275, "step": 925 }, { "epoch": 0.7834179357021996, "grad_norm": 1.299722671508789, "learning_rate": 5.952069692493062e-07, "loss": 0.4825, "step": 926 }, { "epoch": 0.7842639593908629, "grad_norm": 1.3989574909210205, "learning_rate": 5.907753685259865e-07, "loss": 0.5353, "step": 927 }, { "epoch": 0.7851099830795262, "grad_norm": 1.632617473602295, "learning_rate": 5.863581156671755e-07, "loss": 0.5604, "step": 928 }, { "epoch": 0.7859560067681896, "grad_norm": 1.3802127838134766, "learning_rate": 5.819552438686238e-07, "loss": 0.5555, "step": 929 }, { "epoch": 0.7868020304568528, "grad_norm": 1.2966598272323608, "learning_rate": 5.775667862180087e-07, "loss": 0.5135, "step": 930 }, { "epoch": 0.7876480541455161, "grad_norm": 1.4955836534500122, "learning_rate": 5.731927756946848e-07, "loss": 0.5242, "step": 931 }, { "epoch": 0.7884940778341794, "grad_norm": 1.486580729484558, "learning_rate": 5.688332451694356e-07, "loss": 0.5137, "step": 932 }, { "epoch": 0.7893401015228426, "grad_norm": 1.5617793798446655, "learning_rate": 5.644882274042285e-07, "loss": 0.5552, "step": 933 }, { "epoch": 0.7901861252115059, "grad_norm": 1.4690701961517334, "learning_rate": 5.601577550519646e-07, "loss": 0.5497, "step": 934 }, { "epoch": 0.7910321489001692, "grad_norm": 1.4751214981079102, "learning_rate": 5.558418606562385e-07, "loss": 0.4816, "step": 935 }, { "epoch": 0.7918781725888325, "grad_norm": 1.4868813753128052, "learning_rate": 5.5154057665109e-07, "loss": 0.511, "step": 936 }, { "epoch": 0.7927241962774958, "grad_norm": 1.3979462385177612, "learning_rate": 5.472539353607612e-07, "loss": 0.4614, "step": 937 }, { "epoch": 0.7935702199661591, "grad_norm": 1.40373694896698, "learning_rate": 5.429819689994556e-07, "loss": 0.5177, "step": 938 }, { "epoch": 0.7944162436548223, "grad_norm": 1.5623489618301392, "learning_rate": 5.387247096710921e-07, "loss": 0.5668, "step": 939 }, { "epoch": 0.7952622673434856, "grad_norm": 1.258279800415039, "learning_rate": 5.344821893690679e-07, "loss": 0.4178, "step": 940 }, { "epoch": 0.7961082910321489, "grad_norm": 1.3184384107589722, "learning_rate": 5.30254439976014e-07, "loss": 0.4387, "step": 941 }, { "epoch": 0.7969543147208121, "grad_norm": 1.380998969078064, "learning_rate": 5.260414932635588e-07, "loss": 0.5116, "step": 942 }, { "epoch": 0.7978003384094755, "grad_norm": 1.447558045387268, "learning_rate": 5.218433808920884e-07, "loss": 0.5289, "step": 943 }, { "epoch": 0.7986463620981388, "grad_norm": 1.6539430618286133, "learning_rate": 5.176601344105084e-07, "loss": 0.5358, "step": 944 }, { "epoch": 0.799492385786802, "grad_norm": 1.6026103496551514, "learning_rate": 5.134917852560067e-07, "loss": 0.5461, "step": 945 }, { "epoch": 0.8003384094754653, "grad_norm": 1.8216614723205566, "learning_rate": 5.09338364753818e-07, "loss": 0.5195, "step": 946 }, { "epoch": 0.8011844331641286, "grad_norm": 1.376305103302002, "learning_rate": 5.051999041169869e-07, "loss": 0.5051, "step": 947 }, { "epoch": 0.8020304568527918, "grad_norm": 1.5061923265457153, "learning_rate": 5.010764344461352e-07, "loss": 0.4656, "step": 948 }, { "epoch": 0.8028764805414551, "grad_norm": 1.6302450895309448, "learning_rate": 4.969679867292276e-07, "loss": 0.5381, "step": 949 }, { "epoch": 0.8037225042301185, "grad_norm": 1.4138633012771606, "learning_rate": 4.928745918413352e-07, "loss": 0.5356, "step": 950 }, { "epoch": 0.8045685279187818, "grad_norm": 1.4501382112503052, "learning_rate": 4.887962805444122e-07, "loss": 0.506, "step": 951 }, { "epoch": 0.805414551607445, "grad_norm": 1.3429620265960693, "learning_rate": 4.847330834870551e-07, "loss": 0.4975, "step": 952 }, { "epoch": 0.8062605752961083, "grad_norm": 1.3932536840438843, "learning_rate": 4.806850312042782e-07, "loss": 0.4677, "step": 953 }, { "epoch": 0.8071065989847716, "grad_norm": 1.4443645477294922, "learning_rate": 4.766521541172822e-07, "loss": 0.5573, "step": 954 }, { "epoch": 0.8079526226734348, "grad_norm": 1.536169409751892, "learning_rate": 4.7263448253322574e-07, "loss": 0.5413, "step": 955 }, { "epoch": 0.8087986463620981, "grad_norm": 1.3791049718856812, "learning_rate": 4.686320466449981e-07, "loss": 0.4739, "step": 956 }, { "epoch": 0.8096446700507615, "grad_norm": 1.523890733718872, "learning_rate": 4.6464487653099216e-07, "loss": 0.5314, "step": 957 }, { "epoch": 0.8104906937394247, "grad_norm": 1.4911043643951416, "learning_rate": 4.6067300215487663e-07, "loss": 0.5903, "step": 958 }, { "epoch": 0.811336717428088, "grad_norm": 1.383557915687561, "learning_rate": 4.5671645336537425e-07, "loss": 0.5124, "step": 959 }, { "epoch": 0.8121827411167513, "grad_norm": 1.5496838092803955, "learning_rate": 4.52775259896035e-07, "loss": 0.5472, "step": 960 }, { "epoch": 0.8130287648054145, "grad_norm": 1.4877400398254395, "learning_rate": 4.4884945136501325e-07, "loss": 0.5108, "step": 961 }, { "epoch": 0.8138747884940778, "grad_norm": 1.4807846546173096, "learning_rate": 4.449390572748449e-07, "loss": 0.4671, "step": 962 }, { "epoch": 0.8147208121827412, "grad_norm": 1.3834859132766724, "learning_rate": 4.4104410701222703e-07, "loss": 0.4498, "step": 963 }, { "epoch": 0.8155668358714044, "grad_norm": 1.4340556859970093, "learning_rate": 4.371646298477947e-07, "loss": 0.4934, "step": 964 }, { "epoch": 0.8164128595600677, "grad_norm": 1.6941485404968262, "learning_rate": 4.333006549359034e-07, "loss": 0.5905, "step": 965 }, { "epoch": 0.817258883248731, "grad_norm": 1.3260046243667603, "learning_rate": 4.2945221131440783e-07, "loss": 0.4841, "step": 966 }, { "epoch": 0.8181049069373942, "grad_norm": 1.5405261516571045, "learning_rate": 4.2561932790444597e-07, "loss": 0.5125, "step": 967 }, { "epoch": 0.8189509306260575, "grad_norm": 1.5137594938278198, "learning_rate": 4.218020335102191e-07, "loss": 0.469, "step": 968 }, { "epoch": 0.8197969543147208, "grad_norm": 1.3630168437957764, "learning_rate": 4.1800035681877765e-07, "loss": 0.5501, "step": 969 }, { "epoch": 0.8206429780033841, "grad_norm": 1.3924651145935059, "learning_rate": 4.142143263998047e-07, "loss": 0.4994, "step": 970 }, { "epoch": 0.8214890016920474, "grad_norm": 1.426595687866211, "learning_rate": 4.104439707054003e-07, "loss": 0.4795, "step": 971 }, { "epoch": 0.8223350253807107, "grad_norm": 1.356465220451355, "learning_rate": 4.0668931806987e-07, "loss": 0.4743, "step": 972 }, { "epoch": 0.823181049069374, "grad_norm": 1.474966049194336, "learning_rate": 4.029503967095097e-07, "loss": 0.5118, "step": 973 }, { "epoch": 0.8240270727580372, "grad_norm": 1.4417613744735718, "learning_rate": 3.9922723472239356e-07, "loss": 0.5565, "step": 974 }, { "epoch": 0.8248730964467005, "grad_norm": 1.5358595848083496, "learning_rate": 3.9551986008816544e-07, "loss": 0.5128, "step": 975 }, { "epoch": 0.8257191201353637, "grad_norm": 1.390822410583496, "learning_rate": 3.9182830066782614e-07, "loss": 0.5552, "step": 976 }, { "epoch": 0.8265651438240271, "grad_norm": 1.3930827379226685, "learning_rate": 3.8815258420352385e-07, "loss": 0.4682, "step": 977 }, { "epoch": 0.8274111675126904, "grad_norm": 1.3697726726531982, "learning_rate": 3.844927383183486e-07, "loss": 0.4713, "step": 978 }, { "epoch": 0.8282571912013537, "grad_norm": 1.5058186054229736, "learning_rate": 3.808487905161215e-07, "loss": 0.5004, "step": 979 }, { "epoch": 0.8291032148900169, "grad_norm": 1.4689873456954956, "learning_rate": 3.772207681811896e-07, "loss": 0.5002, "step": 980 }, { "epoch": 0.8299492385786802, "grad_norm": 1.4826774597167969, "learning_rate": 3.7360869857821805e-07, "loss": 0.4269, "step": 981 }, { "epoch": 0.8307952622673435, "grad_norm": 1.3101059198379517, "learning_rate": 3.7001260885198925e-07, "loss": 0.53, "step": 982 }, { "epoch": 0.8316412859560067, "grad_norm": 1.5160847902297974, "learning_rate": 3.664325260271953e-07, "loss": 0.5218, "step": 983 }, { "epoch": 0.8324873096446701, "grad_norm": 1.3929725885391235, "learning_rate": 3.6286847700823634e-07, "loss": 0.55, "step": 984 }, { "epoch": 0.8333333333333334, "grad_norm": 1.4347172975540161, "learning_rate": 3.5932048857901773e-07, "loss": 0.455, "step": 985 }, { "epoch": 0.8341793570219966, "grad_norm": 1.373490333557129, "learning_rate": 3.5578858740274976e-07, "loss": 0.4914, "step": 986 }, { "epoch": 0.8350253807106599, "grad_norm": 1.4317591190338135, "learning_rate": 3.5227280002174626e-07, "loss": 0.5637, "step": 987 }, { "epoch": 0.8358714043993232, "grad_norm": 1.6726568937301636, "learning_rate": 3.487731528572255e-07, "loss": 0.5096, "step": 988 }, { "epoch": 0.8367174280879864, "grad_norm": 1.3493298292160034, "learning_rate": 3.4528967220911287e-07, "loss": 0.4357, "step": 989 }, { "epoch": 0.8375634517766497, "grad_norm": 1.4160189628601074, "learning_rate": 3.418223842558385e-07, "loss": 0.4529, "step": 990 }, { "epoch": 0.8384094754653131, "grad_norm": 1.5254844427108765, "learning_rate": 3.38371315054149e-07, "loss": 0.4857, "step": 991 }, { "epoch": 0.8392554991539763, "grad_norm": 1.4467828273773193, "learning_rate": 3.3493649053890325e-07, "loss": 0.5402, "step": 992 }, { "epoch": 0.8401015228426396, "grad_norm": 1.5890865325927734, "learning_rate": 3.315179365228824e-07, "loss": 0.4991, "step": 993 }, { "epoch": 0.8409475465313029, "grad_norm": 1.4599320888519287, "learning_rate": 3.281156786965933e-07, "loss": 0.399, "step": 994 }, { "epoch": 0.8417935702199661, "grad_norm": 1.501905918121338, "learning_rate": 3.2472974262807794e-07, "loss": 0.4408, "step": 995 }, { "epoch": 0.8426395939086294, "grad_norm": 1.4133471250534058, "learning_rate": 3.213601537627195e-07, "loss": 0.4854, "step": 996 }, { "epoch": 0.8434856175972927, "grad_norm": 1.474295973777771, "learning_rate": 3.1800693742305074e-07, "loss": 0.4868, "step": 997 }, { "epoch": 0.8443316412859561, "grad_norm": 1.340802788734436, "learning_rate": 3.146701188085649e-07, "loss": 0.5024, "step": 998 }, { "epoch": 0.8451776649746193, "grad_norm": 1.5965441465377808, "learning_rate": 3.11349722995527e-07, "loss": 0.5567, "step": 999 }, { "epoch": 0.8460236886632826, "grad_norm": 1.4308526515960693, "learning_rate": 3.080457749367832e-07, "loss": 0.4896, "step": 1000 }, { "epoch": 0.8468697123519459, "grad_norm": 1.475598931312561, "learning_rate": 3.04758299461575e-07, "loss": 0.4918, "step": 1001 }, { "epoch": 0.8477157360406091, "grad_norm": 1.4925700426101685, "learning_rate": 3.014873212753516e-07, "loss": 0.4652, "step": 1002 }, { "epoch": 0.8485617597292724, "grad_norm": 1.5554040670394897, "learning_rate": 2.982328649595856e-07, "loss": 0.5302, "step": 1003 }, { "epoch": 0.8494077834179357, "grad_norm": 1.666174054145813, "learning_rate": 2.949949549715858e-07, "loss": 0.4715, "step": 1004 }, { "epoch": 0.850253807106599, "grad_norm": 1.4457473754882812, "learning_rate": 2.917736156443171e-07, "loss": 0.4546, "step": 1005 }, { "epoch": 0.8510998307952623, "grad_norm": 1.4290974140167236, "learning_rate": 2.885688711862136e-07, "loss": 0.5035, "step": 1006 }, { "epoch": 0.8519458544839256, "grad_norm": 1.4836621284484863, "learning_rate": 2.8538074568099954e-07, "loss": 0.5912, "step": 1007 }, { "epoch": 0.8527918781725888, "grad_norm": 1.309338092803955, "learning_rate": 2.8220926308750757e-07, "loss": 0.4693, "step": 1008 }, { "epoch": 0.8536379018612521, "grad_norm": 1.3714721202850342, "learning_rate": 2.7905444723949765e-07, "loss": 0.4841, "step": 1009 }, { "epoch": 0.8544839255499154, "grad_norm": 1.3649089336395264, "learning_rate": 2.7591632184547996e-07, "loss": 0.4965, "step": 1010 }, { "epoch": 0.8553299492385786, "grad_norm": 1.4042978286743164, "learning_rate": 2.727949104885341e-07, "loss": 0.4913, "step": 1011 }, { "epoch": 0.856175972927242, "grad_norm": 1.6518502235412598, "learning_rate": 2.6969023662613473e-07, "loss": 0.5123, "step": 1012 }, { "epoch": 0.8570219966159053, "grad_norm": 1.5500681400299072, "learning_rate": 2.666023235899734e-07, "loss": 0.464, "step": 1013 }, { "epoch": 0.8578680203045685, "grad_norm": 1.5040642023086548, "learning_rate": 2.6353119458578297e-07, "loss": 0.4687, "step": 1014 }, { "epoch": 0.8587140439932318, "grad_norm": 1.548913836479187, "learning_rate": 2.604768726931645e-07, "loss": 0.565, "step": 1015 }, { "epoch": 0.8595600676818951, "grad_norm": 1.342336893081665, "learning_rate": 2.5743938086541354e-07, "loss": 0.4772, "step": 1016 }, { "epoch": 0.8604060913705583, "grad_norm": 1.419202446937561, "learning_rate": 2.544187419293462e-07, "loss": 0.5304, "step": 1017 }, { "epoch": 0.8612521150592216, "grad_norm": 1.3937733173370361, "learning_rate": 2.514149785851311e-07, "loss": 0.5183, "step": 1018 }, { "epoch": 0.862098138747885, "grad_norm": 1.44895601272583, "learning_rate": 2.4842811340611423e-07, "loss": 0.497, "step": 1019 }, { "epoch": 0.8629441624365483, "grad_norm": 1.648419976234436, "learning_rate": 2.454581688386523e-07, "loss": 0.4864, "step": 1020 }, { "epoch": 0.8637901861252115, "grad_norm": 1.4835489988327026, "learning_rate": 2.4250516720194267e-07, "loss": 0.5337, "step": 1021 }, { "epoch": 0.8646362098138748, "grad_norm": 1.247205376625061, "learning_rate": 2.3956913068785697e-07, "loss": 0.4464, "step": 1022 }, { "epoch": 0.8654822335025381, "grad_norm": 1.5768427848815918, "learning_rate": 2.3665008136077332e-07, "loss": 0.5662, "step": 1023 }, { "epoch": 0.8663282571912013, "grad_norm": 1.430821180343628, "learning_rate": 2.3374804115741056e-07, "loss": 0.4908, "step": 1024 }, { "epoch": 0.8671742808798646, "grad_norm": 1.5622667074203491, "learning_rate": 2.3086303188666393e-07, "loss": 0.5398, "step": 1025 }, { "epoch": 0.868020304568528, "grad_norm": 1.375335931777954, "learning_rate": 2.2799507522944048e-07, "loss": 0.4289, "step": 1026 }, { "epoch": 0.8688663282571912, "grad_norm": 1.2710477113723755, "learning_rate": 2.2514419273849674e-07, "loss": 0.4489, "step": 1027 }, { "epoch": 0.8697123519458545, "grad_norm": 1.3400541543960571, "learning_rate": 2.223104058382766e-07, "loss": 0.4705, "step": 1028 }, { "epoch": 0.8705583756345178, "grad_norm": 1.3298795223236084, "learning_rate": 2.1949373582475065e-07, "loss": 0.5148, "step": 1029 }, { "epoch": 0.871404399323181, "grad_norm": 1.3770630359649658, "learning_rate": 2.166942038652531e-07, "loss": 0.4446, "step": 1030 }, { "epoch": 0.8722504230118443, "grad_norm": 1.5070509910583496, "learning_rate": 2.1391183099832958e-07, "loss": 0.4768, "step": 1031 }, { "epoch": 0.8730964467005076, "grad_norm": 1.6530050039291382, "learning_rate": 2.111466381335714e-07, "loss": 0.5459, "step": 1032 }, { "epoch": 0.873942470389171, "grad_norm": 1.3462867736816406, "learning_rate": 2.083986460514631e-07, "loss": 0.4644, "step": 1033 }, { "epoch": 0.8747884940778342, "grad_norm": 1.366542100906372, "learning_rate": 2.056678754032246e-07, "loss": 0.4752, "step": 1034 }, { "epoch": 0.8756345177664975, "grad_norm": 1.3380286693572998, "learning_rate": 2.0295434671065706e-07, "loss": 0.4484, "step": 1035 }, { "epoch": 0.8764805414551607, "grad_norm": 1.5602799654006958, "learning_rate": 2.002580803659873e-07, "loss": 0.5123, "step": 1036 }, { "epoch": 0.877326565143824, "grad_norm": 1.4561575651168823, "learning_rate": 1.9757909663171508e-07, "loss": 0.5151, "step": 1037 }, { "epoch": 0.8781725888324873, "grad_norm": 1.4674752950668335, "learning_rate": 1.9491741564046125e-07, "loss": 0.4728, "step": 1038 }, { "epoch": 0.8790186125211505, "grad_norm": 1.5501822233200073, "learning_rate": 1.9227305739481618e-07, "loss": 0.4961, "step": 1039 }, { "epoch": 0.8798646362098139, "grad_norm": 1.3679369688034058, "learning_rate": 1.896460417671897e-07, "loss": 0.5051, "step": 1040 }, { "epoch": 0.8807106598984772, "grad_norm": 1.462009072303772, "learning_rate": 1.8703638849966094e-07, "loss": 0.4909, "step": 1041 }, { "epoch": 0.8815566835871405, "grad_norm": 1.4271647930145264, "learning_rate": 1.844441172038311e-07, "loss": 0.4598, "step": 1042 }, { "epoch": 0.8824027072758037, "grad_norm": 1.3603568077087402, "learning_rate": 1.818692473606748e-07, "loss": 0.446, "step": 1043 }, { "epoch": 0.883248730964467, "grad_norm": 1.4679503440856934, "learning_rate": 1.7931179832039513e-07, "loss": 0.4422, "step": 1044 }, { "epoch": 0.8840947546531303, "grad_norm": 1.4945645332336426, "learning_rate": 1.767717893022769e-07, "loss": 0.5402, "step": 1045 }, { "epoch": 0.8849407783417935, "grad_norm": 1.5339360237121582, "learning_rate": 1.7424923939454274e-07, "loss": 0.5342, "step": 1046 }, { "epoch": 0.8857868020304569, "grad_norm": 1.4887621402740479, "learning_rate": 1.717441675542106e-07, "loss": 0.5502, "step": 1047 }, { "epoch": 0.8866328257191202, "grad_norm": 1.5359052419662476, "learning_rate": 1.6925659260694894e-07, "loss": 0.5523, "step": 1048 }, { "epoch": 0.8874788494077834, "grad_norm": 1.4636895656585693, "learning_rate": 1.667865332469379e-07, "loss": 0.4753, "step": 1049 }, { "epoch": 0.8883248730964467, "grad_norm": 1.4733718633651733, "learning_rate": 1.643340080367267e-07, "loss": 0.4974, "step": 1050 }, { "epoch": 0.88917089678511, "grad_norm": 1.4999078512191772, "learning_rate": 1.6189903540709595e-07, "loss": 0.4722, "step": 1051 }, { "epoch": 0.8900169204737732, "grad_norm": 1.534686803817749, "learning_rate": 1.5948163365691798e-07, "loss": 0.5379, "step": 1052 }, { "epoch": 0.8908629441624365, "grad_norm": 1.3469022512435913, "learning_rate": 1.5708182095301867e-07, "loss": 0.5219, "step": 1053 }, { "epoch": 0.8917089678510999, "grad_norm": 1.2992980480194092, "learning_rate": 1.5469961533004258e-07, "loss": 0.4464, "step": 1054 }, { "epoch": 0.8925549915397631, "grad_norm": 1.1914132833480835, "learning_rate": 1.5233503469031686e-07, "loss": 0.3623, "step": 1055 }, { "epoch": 0.8934010152284264, "grad_norm": 1.5193549394607544, "learning_rate": 1.499880968037165e-07, "loss": 0.4728, "step": 1056 }, { "epoch": 0.8942470389170897, "grad_norm": 1.4608553647994995, "learning_rate": 1.4765881930752983e-07, "loss": 0.4644, "step": 1057 }, { "epoch": 0.8950930626057529, "grad_norm": 1.6205723285675049, "learning_rate": 1.4534721970632882e-07, "loss": 0.5703, "step": 1058 }, { "epoch": 0.8959390862944162, "grad_norm": 1.5539836883544922, "learning_rate": 1.4305331537183387e-07, "loss": 0.6296, "step": 1059 }, { "epoch": 0.8967851099830795, "grad_norm": 1.4455832242965698, "learning_rate": 1.4077712354278683e-07, "loss": 0.5241, "step": 1060 }, { "epoch": 0.8976311336717429, "grad_norm": 1.4322365522384644, "learning_rate": 1.385186613248171e-07, "loss": 0.4872, "step": 1061 }, { "epoch": 0.8984771573604061, "grad_norm": 1.5108438730239868, "learning_rate": 1.362779456903182e-07, "loss": 0.526, "step": 1062 }, { "epoch": 0.8993231810490694, "grad_norm": 1.3449329137802124, "learning_rate": 1.340549934783164e-07, "loss": 0.4537, "step": 1063 }, { "epoch": 0.9001692047377327, "grad_norm": 1.4694099426269531, "learning_rate": 1.3184982139434587e-07, "loss": 0.5095, "step": 1064 }, { "epoch": 0.9010152284263959, "grad_norm": 1.5920803546905518, "learning_rate": 1.2966244601032267e-07, "loss": 0.5452, "step": 1065 }, { "epoch": 0.9018612521150592, "grad_norm": 1.3615165948867798, "learning_rate": 1.2749288376442044e-07, "loss": 0.4741, "step": 1066 }, { "epoch": 0.9027072758037225, "grad_norm": 1.6377193927764893, "learning_rate": 1.253411509609459e-07, "loss": 0.555, "step": 1067 }, { "epoch": 0.9035532994923858, "grad_norm": 1.604526400566101, "learning_rate": 1.2320726377021836e-07, "loss": 0.5634, "step": 1068 }, { "epoch": 0.9043993231810491, "grad_norm": 1.342687964439392, "learning_rate": 1.2109123822844653e-07, "loss": 0.5433, "step": 1069 }, { "epoch": 0.9052453468697124, "grad_norm": 1.5053247213363647, "learning_rate": 1.1899309023760686e-07, "loss": 0.5671, "step": 1070 }, { "epoch": 0.9060913705583756, "grad_norm": 1.361431360244751, "learning_rate": 1.1691283556532846e-07, "loss": 0.5314, "step": 1071 }, { "epoch": 0.9069373942470389, "grad_norm": 1.2656420469284058, "learning_rate": 1.1485048984476998e-07, "loss": 0.4949, "step": 1072 }, { "epoch": 0.9077834179357022, "grad_norm": 1.4055432081222534, "learning_rate": 1.1280606857450387e-07, "loss": 0.4909, "step": 1073 }, { "epoch": 0.9086294416243654, "grad_norm": 1.4172112941741943, "learning_rate": 1.1077958711840032e-07, "loss": 0.4917, "step": 1074 }, { "epoch": 0.9094754653130288, "grad_norm": 1.3536546230316162, "learning_rate": 1.0877106070551175e-07, "loss": 0.4862, "step": 1075 }, { "epoch": 0.9103214890016921, "grad_norm": 1.4950735569000244, "learning_rate": 1.0678050442995802e-07, "loss": 0.479, "step": 1076 }, { "epoch": 0.9111675126903553, "grad_norm": 1.420637607574463, "learning_rate": 1.0480793325081174e-07, "loss": 0.5085, "step": 1077 }, { "epoch": 0.9120135363790186, "grad_norm": 1.3945380449295044, "learning_rate": 1.0285336199198858e-07, "loss": 0.4308, "step": 1078 }, { "epoch": 0.9128595600676819, "grad_norm": 1.4180177450180054, "learning_rate": 1.0091680534213389e-07, "loss": 0.4753, "step": 1079 }, { "epoch": 0.9137055837563451, "grad_norm": 1.3723344802856445, "learning_rate": 9.899827785451288e-08, "loss": 0.4353, "step": 1080 }, { "epoch": 0.9145516074450084, "grad_norm": 1.4225527048110962, "learning_rate": 9.709779394690144e-08, "loss": 0.5055, "step": 1081 }, { "epoch": 0.9153976311336718, "grad_norm": 1.4633773565292358, "learning_rate": 9.521536790147722e-08, "loss": 0.4279, "step": 1082 }, { "epoch": 0.916243654822335, "grad_norm": 1.5522949695587158, "learning_rate": 9.335101386471285e-08, "loss": 0.5099, "step": 1083 }, { "epoch": 0.9170896785109983, "grad_norm": 1.4376224279403687, "learning_rate": 9.150474584726926e-08, "loss": 0.5157, "step": 1084 }, { "epoch": 0.9179357021996616, "grad_norm": 1.551571249961853, "learning_rate": 8.967657772389032e-08, "loss": 0.5229, "step": 1085 }, { "epoch": 0.9187817258883249, "grad_norm": 1.5067143440246582, "learning_rate": 8.78665232332998e-08, "loss": 0.5731, "step": 1086 }, { "epoch": 0.9196277495769881, "grad_norm": 1.6685928106307983, "learning_rate": 8.607459597809565e-08, "loss": 0.5185, "step": 1087 }, { "epoch": 0.9204737732656514, "grad_norm": 1.6518747806549072, "learning_rate": 8.430080942465085e-08, "loss": 0.4492, "step": 1088 }, { "epoch": 0.9213197969543148, "grad_norm": 1.4557331800460815, "learning_rate": 8.254517690300946e-08, "loss": 0.4527, "step": 1089 }, { "epoch": 0.922165820642978, "grad_norm": 1.3865046501159668, "learning_rate": 8.080771160678763e-08, "loss": 0.4718, "step": 1090 }, { "epoch": 0.9230118443316413, "grad_norm": 1.6267703771591187, "learning_rate": 7.908842659307525e-08, "loss": 0.5811, "step": 1091 }, { "epoch": 0.9238578680203046, "grad_norm": 1.4735057353973389, "learning_rate": 7.738733478233673e-08, "loss": 0.5173, "step": 1092 }, { "epoch": 0.9247038917089678, "grad_norm": 1.2759696245193481, "learning_rate": 7.57044489583128e-08, "loss": 0.447, "step": 1093 }, { "epoch": 0.9255499153976311, "grad_norm": 1.3496994972229004, "learning_rate": 7.40397817679278e-08, "loss": 0.5328, "step": 1094 }, { "epoch": 0.9263959390862944, "grad_norm": 1.4382102489471436, "learning_rate": 7.239334572119172e-08, "loss": 0.4431, "step": 1095 }, { "epoch": 0.9272419627749577, "grad_norm": 1.497261643409729, "learning_rate": 7.076515319110688e-08, "loss": 0.4202, "step": 1096 }, { "epoch": 0.928087986463621, "grad_norm": 1.4099481105804443, "learning_rate": 6.915521641357504e-08, "loss": 0.4269, "step": 1097 }, { "epoch": 0.9289340101522843, "grad_norm": 1.5935684442520142, "learning_rate": 6.756354748730709e-08, "loss": 0.4807, "step": 1098 }, { "epoch": 0.9297800338409475, "grad_norm": 1.3946175575256348, "learning_rate": 6.599015837372907e-08, "loss": 0.4243, "step": 1099 }, { "epoch": 0.9306260575296108, "grad_norm": 1.605188012123108, "learning_rate": 6.443506089689411e-08, "loss": 0.4828, "step": 1100 }, { "epoch": 0.9314720812182741, "grad_norm": 1.4577580690383911, "learning_rate": 6.289826674339333e-08, "loss": 0.492, "step": 1101 }, { "epoch": 0.9323181049069373, "grad_norm": 1.3271147012710571, "learning_rate": 6.137978746226848e-08, "loss": 0.4126, "step": 1102 }, { "epoch": 0.9331641285956007, "grad_norm": 1.4398398399353027, "learning_rate": 5.987963446492384e-08, "loss": 0.5249, "step": 1103 }, { "epoch": 0.934010152284264, "grad_norm": 1.4152181148529053, "learning_rate": 5.839781902504227e-08, "loss": 0.4867, "step": 1104 }, { "epoch": 0.9348561759729273, "grad_norm": 1.6749119758605957, "learning_rate": 5.693435227849875e-08, "loss": 0.5718, "step": 1105 }, { "epoch": 0.9357021996615905, "grad_norm": 1.4649239778518677, "learning_rate": 5.548924522327748e-08, "loss": 0.4816, "step": 1106 }, { "epoch": 0.9365482233502538, "grad_norm": 1.4536762237548828, "learning_rate": 5.406250871938912e-08, "loss": 0.4235, "step": 1107 }, { "epoch": 0.937394247038917, "grad_norm": 1.7883013486862183, "learning_rate": 5.265415348879005e-08, "loss": 0.4966, "step": 1108 }, { "epoch": 0.9382402707275804, "grad_norm": 1.671594500541687, "learning_rate": 5.126419011529993e-08, "loss": 0.5989, "step": 1109 }, { "epoch": 0.9390862944162437, "grad_norm": 1.4351028203964233, "learning_rate": 4.989262904452369e-08, "loss": 0.4604, "step": 1110 }, { "epoch": 0.939932318104907, "grad_norm": 1.4522861242294312, "learning_rate": 4.853948058377245e-08, "loss": 0.5147, "step": 1111 }, { "epoch": 0.9407783417935702, "grad_norm": 1.4232293367385864, "learning_rate": 4.720475490198634e-08, "loss": 0.4657, "step": 1112 }, { "epoch": 0.9416243654822335, "grad_norm": 1.4143102169036865, "learning_rate": 4.5888462029658186e-08, "loss": 0.5611, "step": 1113 }, { "epoch": 0.9424703891708968, "grad_norm": 1.4739620685577393, "learning_rate": 4.4590611858756906e-08, "loss": 0.5188, "step": 1114 }, { "epoch": 0.94331641285956, "grad_norm": 1.3989241123199463, "learning_rate": 4.3311214142654766e-08, "loss": 0.5242, "step": 1115 }, { "epoch": 0.9441624365482234, "grad_norm": 1.3451173305511475, "learning_rate": 4.205027849605359e-08, "loss": 0.4451, "step": 1116 }, { "epoch": 0.9450084602368867, "grad_norm": 1.3867361545562744, "learning_rate": 4.0807814394911996e-08, "loss": 0.5151, "step": 1117 }, { "epoch": 0.9458544839255499, "grad_norm": 1.344876766204834, "learning_rate": 3.9583831176374654e-08, "loss": 0.4618, "step": 1118 }, { "epoch": 0.9467005076142132, "grad_norm": 1.4002844095230103, "learning_rate": 3.837833803870178e-08, "loss": 0.4737, "step": 1119 }, { "epoch": 0.9475465313028765, "grad_norm": 1.4744623899459839, "learning_rate": 3.7191344041200836e-08, "loss": 0.5001, "step": 1120 }, { "epoch": 0.9483925549915397, "grad_norm": 1.4199622869491577, "learning_rate": 3.602285810415718e-08, "loss": 0.4648, "step": 1121 }, { "epoch": 0.949238578680203, "grad_norm": 1.4660803079605103, "learning_rate": 3.4872889008767954e-08, "loss": 0.5241, "step": 1122 }, { "epoch": 0.9500846023688664, "grad_norm": 1.4228583574295044, "learning_rate": 3.37414453970758e-08, "loss": 0.4474, "step": 1123 }, { "epoch": 0.9509306260575296, "grad_norm": 1.6068693399429321, "learning_rate": 3.262853577190445e-08, "loss": 0.5052, "step": 1124 }, { "epoch": 0.9517766497461929, "grad_norm": 1.5408366918563843, "learning_rate": 3.153416849679347e-08, "loss": 0.5581, "step": 1125 }, { "epoch": 0.9526226734348562, "grad_norm": 1.6030367612838745, "learning_rate": 3.04583517959367e-08, "loss": 0.5493, "step": 1126 }, { "epoch": 0.9534686971235194, "grad_norm": 1.3581690788269043, "learning_rate": 2.940109375411976e-08, "loss": 0.4723, "step": 1127 }, { "epoch": 0.9543147208121827, "grad_norm": 1.3531447649002075, "learning_rate": 2.8362402316660374e-08, "loss": 0.4892, "step": 1128 }, { "epoch": 0.955160744500846, "grad_norm": 1.4876835346221924, "learning_rate": 2.734228528934679e-08, "loss": 0.5043, "step": 1129 }, { "epoch": 0.9560067681895094, "grad_norm": 1.703354835510254, "learning_rate": 2.634075033838057e-08, "loss": 0.4791, "step": 1130 }, { "epoch": 0.9568527918781726, "grad_norm": 1.5040783882141113, "learning_rate": 2.535780499031887e-08, "loss": 0.4949, "step": 1131 }, { "epoch": 0.9576988155668359, "grad_norm": 1.524038314819336, "learning_rate": 2.4393456632016977e-08, "loss": 0.5314, "step": 1132 }, { "epoch": 0.9585448392554992, "grad_norm": 1.4656578302383423, "learning_rate": 2.3447712510573928e-08, "loss": 0.5389, "step": 1133 }, { "epoch": 0.9593908629441624, "grad_norm": 1.492907166481018, "learning_rate": 2.2520579733277258e-08, "loss": 0.4928, "step": 1134 }, { "epoch": 0.9602368866328257, "grad_norm": 1.4177685976028442, "learning_rate": 2.161206526754972e-08, "loss": 0.4866, "step": 1135 }, { "epoch": 0.961082910321489, "grad_norm": 1.377503514289856, "learning_rate": 2.072217594089765e-08, "loss": 0.5325, "step": 1136 }, { "epoch": 0.9619289340101523, "grad_norm": 1.5548027753829956, "learning_rate": 1.985091844085796e-08, "loss": 0.5236, "step": 1137 }, { "epoch": 0.9627749576988156, "grad_norm": 1.4531538486480713, "learning_rate": 1.899829931495012e-08, "loss": 0.5555, "step": 1138 }, { "epoch": 0.9636209813874789, "grad_norm": 1.5614752769470215, "learning_rate": 1.8164324970625646e-08, "loss": 0.4734, "step": 1139 }, { "epoch": 0.9644670050761421, "grad_norm": 1.4508945941925049, "learning_rate": 1.7349001675219245e-08, "loss": 0.4839, "step": 1140 }, { "epoch": 0.9653130287648054, "grad_norm": 1.348341464996338, "learning_rate": 1.65523355559033e-08, "loss": 0.4769, "step": 1141 }, { "epoch": 0.9661590524534687, "grad_norm": 1.5168187618255615, "learning_rate": 1.5774332599641228e-08, "loss": 0.4738, "step": 1142 }, { "epoch": 0.9670050761421319, "grad_norm": 1.6730620861053467, "learning_rate": 1.501499865314171e-08, "loss": 0.6032, "step": 1143 }, { "epoch": 0.9678510998307953, "grad_norm": 1.559067726135254, "learning_rate": 1.4274339422816197e-08, "loss": 0.4942, "step": 1144 }, { "epoch": 0.9686971235194586, "grad_norm": 1.3421351909637451, "learning_rate": 1.3552360474734794e-08, "loss": 0.4029, "step": 1145 }, { "epoch": 0.9695431472081218, "grad_norm": 1.4648717641830444, "learning_rate": 1.2849067234584623e-08, "loss": 0.49, "step": 1146 }, { "epoch": 0.9703891708967851, "grad_norm": 1.444698691368103, "learning_rate": 1.2164464987630131e-08, "loss": 0.4617, "step": 1147 }, { "epoch": 0.9712351945854484, "grad_norm": 1.3629822731018066, "learning_rate": 1.1498558878672017e-08, "loss": 0.5096, "step": 1148 }, { "epoch": 0.9720812182741116, "grad_norm": 1.6496318578720093, "learning_rate": 1.0851353912008644e-08, "loss": 0.5134, "step": 1149 }, { "epoch": 0.9729272419627749, "grad_norm": 1.529559850692749, "learning_rate": 1.0222854951399408e-08, "loss": 0.5383, "step": 1150 }, { "epoch": 0.9737732656514383, "grad_norm": 1.5004898309707642, "learning_rate": 9.613066720028097e-09, "loss": 0.5168, "step": 1151 }, { "epoch": 0.9746192893401016, "grad_norm": 1.316214919090271, "learning_rate": 9.021993800466256e-09, "loss": 0.439, "step": 1152 }, { "epoch": 0.9754653130287648, "grad_norm": 1.5756371021270752, "learning_rate": 8.449640634639878e-09, "loss": 0.5111, "step": 1153 }, { "epoch": 0.9763113367174281, "grad_norm": 1.5366209745407104, "learning_rate": 7.896011523794989e-09, "loss": 0.5319, "step": 1154 }, { "epoch": 0.9771573604060914, "grad_norm": 1.3801406621932983, "learning_rate": 7.361110628466839e-09, "loss": 0.5529, "step": 1155 }, { "epoch": 0.9780033840947546, "grad_norm": 1.5490000247955322, "learning_rate": 6.84494196844715e-09, "loss": 0.5038, "step": 1156 }, { "epoch": 0.9788494077834179, "grad_norm": 1.5412358045578003, "learning_rate": 6.347509422754139e-09, "loss": 0.4155, "step": 1157 }, { "epoch": 0.9796954314720813, "grad_norm": 1.4816324710845947, "learning_rate": 5.868816729604765e-09, "loss": 0.4967, "step": 1158 }, { "epoch": 0.9805414551607445, "grad_norm": 1.3229267597198486, "learning_rate": 5.408867486384473e-09, "loss": 0.4326, "step": 1159 }, { "epoch": 0.9813874788494078, "grad_norm": 1.5264832973480225, "learning_rate": 4.9676651496222136e-09, "loss": 0.4914, "step": 1160 }, { "epoch": 0.9822335025380711, "grad_norm": 1.4101085662841797, "learning_rate": 4.5452130349629694e-09, "loss": 0.5168, "step": 1161 }, { "epoch": 0.9830795262267343, "grad_norm": 1.574628472328186, "learning_rate": 4.1415143171436026e-09, "loss": 0.504, "step": 1162 }, { "epoch": 0.9839255499153976, "grad_norm": 1.586148738861084, "learning_rate": 3.756572029968708e-09, "loss": 0.4851, "step": 1163 }, { "epoch": 0.9847715736040609, "grad_norm": 1.5436687469482422, "learning_rate": 3.390389066287858e-09, "loss": 0.4857, "step": 1164 }, { "epoch": 0.9856175972927242, "grad_norm": 1.4142553806304932, "learning_rate": 3.0429681779739485e-09, "loss": 0.4534, "step": 1165 }, { "epoch": 0.9864636209813875, "grad_norm": 1.3092448711395264, "learning_rate": 2.7143119759026614e-09, "loss": 0.4619, "step": 1166 }, { "epoch": 0.9873096446700508, "grad_norm": 1.4233113527297974, "learning_rate": 2.404422929932204e-09, "loss": 0.5151, "step": 1167 }, { "epoch": 0.988155668358714, "grad_norm": 1.3810383081436157, "learning_rate": 2.1133033688858217e-09, "loss": 0.4998, "step": 1168 }, { "epoch": 0.9890016920473773, "grad_norm": 1.4071393013000488, "learning_rate": 1.8409554805329243e-09, "loss": 0.4839, "step": 1169 }, { "epoch": 0.9898477157360406, "grad_norm": 1.4692143201828003, "learning_rate": 1.5873813115740989e-09, "loss": 0.4573, "step": 1170 }, { "epoch": 0.9906937394247038, "grad_norm": 1.4754483699798584, "learning_rate": 1.3525827676247327e-09, "loss": 0.5774, "step": 1171 }, { "epoch": 0.9915397631133672, "grad_norm": 1.3921698331832886, "learning_rate": 1.1365616132008595e-09, "loss": 0.488, "step": 1172 }, { "epoch": 0.9923857868020305, "grad_norm": 1.3214542865753174, "learning_rate": 9.393194717061127e-10, "loss": 0.5449, "step": 1173 }, { "epoch": 0.9932318104906938, "grad_norm": 1.474719524383545, "learning_rate": 7.608578254195143e-10, "loss": 0.4718, "step": 1174 }, { "epoch": 0.994077834179357, "grad_norm": 1.3366878032684326, "learning_rate": 6.011780154843716e-10, "loss": 0.5207, "step": 1175 }, { "epoch": 0.9949238578680203, "grad_norm": 1.5625263452529907, "learning_rate": 4.602812418974534e-10, "loss": 0.5025, "step": 1176 }, { "epoch": 0.9957698815566836, "grad_norm": 1.4968297481536865, "learning_rate": 3.3816856350177284e-10, "loss": 0.5409, "step": 1177 }, { "epoch": 0.9966159052453468, "grad_norm": 1.4987884759902954, "learning_rate": 2.348408979760408e-10, "loss": 0.4497, "step": 1178 }, { "epoch": 0.9974619289340102, "grad_norm": 1.578500509262085, "learning_rate": 1.502990218302247e-10, "loss": 0.5731, "step": 1179 }, { "epoch": 0.9983079526226735, "grad_norm": 1.4984197616577148, "learning_rate": 8.454357039860972e-11, "loss": 0.4767, "step": 1180 }, { "epoch": 0.9991539763113367, "grad_norm": 1.5081285238265991, "learning_rate": 3.7575037834247655e-11, "loss": 0.5204, "step": 1181 }, { "epoch": 1.0, "grad_norm": 1.4636836051940918, "learning_rate": 9.393777107291614e-12, "loss": 0.5414, "step": 1182 }, { "epoch": 1.0, "step": 1182, "total_flos": 8.436898797652541e+17, "train_loss": 0.5417148913627147, "train_runtime": 7080.4391, "train_samples_per_second": 4.674, "train_steps_per_second": 0.167 } ], "logging_steps": 1.0, "max_steps": 1182, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.436898797652541e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }