{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5706134094151213, "eval_steps": 500, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001426533523537803, "grad_norm": 1.1729735136032104, "learning_rate": 0.0, "loss": 1.583, "step": 1 }, { "epoch": 0.0002853067047075606, "grad_norm": 1.002526879310608, "learning_rate": 1.584786053882726e-08, "loss": 1.5026, "step": 2 }, { "epoch": 0.00042796005706134097, "grad_norm": 0.9373162984848022, "learning_rate": 3.169572107765452e-08, "loss": 1.5626, "step": 3 }, { "epoch": 0.0005706134094151213, "grad_norm": 1.0274591445922852, "learning_rate": 4.754358161648178e-08, "loss": 1.5833, "step": 4 }, { "epoch": 0.0007132667617689016, "grad_norm": 0.790949821472168, "learning_rate": 6.339144215530904e-08, "loss": 1.6648, "step": 5 }, { "epoch": 0.0008559201141226819, "grad_norm": 1.735785961151123, "learning_rate": 7.92393026941363e-08, "loss": 1.5637, "step": 6 }, { "epoch": 0.0009985734664764623, "grad_norm": 0.6724671721458435, "learning_rate": 9.508716323296357e-08, "loss": 1.601, "step": 7 }, { "epoch": 0.0011412268188302425, "grad_norm": 1.1278249025344849, "learning_rate": 1.1093502377179082e-07, "loss": 1.6493, "step": 8 }, { "epoch": 0.0012838801711840227, "grad_norm": 0.800915539264679, "learning_rate": 1.2678288431061808e-07, "loss": 1.5741, "step": 9 }, { "epoch": 0.0014265335235378032, "grad_norm": 1.5563911199569702, "learning_rate": 1.4263074484944535e-07, "loss": 1.6916, "step": 10 }, { "epoch": 0.0015691868758915834, "grad_norm": 0.9552595019340515, "learning_rate": 1.584786053882726e-07, "loss": 1.5376, "step": 11 }, { "epoch": 0.0017118402282453639, "grad_norm": 0.8556690812110901, "learning_rate": 1.7432646592709986e-07, "loss": 1.5753, "step": 12 }, { "epoch": 0.001854493580599144, "grad_norm": 1.0253041982650757, "learning_rate": 1.9017432646592713e-07, "loss": 1.6666, "step": 13 }, { "epoch": 0.0019971469329529245, "grad_norm": 0.8423359394073486, "learning_rate": 2.0602218700475438e-07, "loss": 1.4988, "step": 14 }, { "epoch": 0.0021398002853067048, "grad_norm": 1.2031570672988892, "learning_rate": 2.2187004754358165e-07, "loss": 1.6674, "step": 15 }, { "epoch": 0.002282453637660485, "grad_norm": 0.9490939974784851, "learning_rate": 2.3771790808240892e-07, "loss": 1.4538, "step": 16 }, { "epoch": 0.0024251069900142652, "grad_norm": 0.9502609968185425, "learning_rate": 2.5356576862123616e-07, "loss": 1.5736, "step": 17 }, { "epoch": 0.0025677603423680455, "grad_norm": 0.8738661408424377, "learning_rate": 2.694136291600634e-07, "loss": 1.676, "step": 18 }, { "epoch": 0.002710413694721826, "grad_norm": 0.9073160290718079, "learning_rate": 2.852614896988907e-07, "loss": 1.5503, "step": 19 }, { "epoch": 0.0028530670470756064, "grad_norm": 0.6794393062591553, "learning_rate": 3.0110935023771794e-07, "loss": 1.6698, "step": 20 }, { "epoch": 0.0029957203994293866, "grad_norm": 0.9852645397186279, "learning_rate": 3.169572107765452e-07, "loss": 1.5954, "step": 21 }, { "epoch": 0.003138373751783167, "grad_norm": 1.0426123142242432, "learning_rate": 3.3280507131537243e-07, "loss": 1.633, "step": 22 }, { "epoch": 0.003281027104136947, "grad_norm": 0.5876559615135193, "learning_rate": 3.486529318541997e-07, "loss": 1.5085, "step": 23 }, { "epoch": 0.0034236804564907277, "grad_norm": 1.1310482025146484, "learning_rate": 3.6450079239302697e-07, "loss": 1.7416, "step": 24 }, { "epoch": 0.003566333808844508, "grad_norm": 0.7944432497024536, "learning_rate": 3.8034865293185427e-07, "loss": 1.5737, "step": 25 }, { "epoch": 0.003708987161198288, "grad_norm": 1.0518521070480347, "learning_rate": 3.9619651347068146e-07, "loss": 1.6472, "step": 26 }, { "epoch": 0.0038516405135520684, "grad_norm": 0.7828543782234192, "learning_rate": 4.1204437400950875e-07, "loss": 1.5648, "step": 27 }, { "epoch": 0.003994293865905849, "grad_norm": 1.0826324224472046, "learning_rate": 4.27892234548336e-07, "loss": 1.6081, "step": 28 }, { "epoch": 0.004136947218259629, "grad_norm": 1.753553032875061, "learning_rate": 4.437400950871633e-07, "loss": 1.6047, "step": 29 }, { "epoch": 0.0042796005706134095, "grad_norm": 0.9000786542892456, "learning_rate": 4.5958795562599054e-07, "loss": 1.6023, "step": 30 }, { "epoch": 0.00442225392296719, "grad_norm": 0.694839596748352, "learning_rate": 4.7543581616481783e-07, "loss": 1.6433, "step": 31 }, { "epoch": 0.00456490727532097, "grad_norm": 1.0806282758712769, "learning_rate": 4.91283676703645e-07, "loss": 1.6082, "step": 32 }, { "epoch": 0.00470756062767475, "grad_norm": 0.9509055018424988, "learning_rate": 5.071315372424723e-07, "loss": 1.566, "step": 33 }, { "epoch": 0.0048502139800285305, "grad_norm": 0.7939223647117615, "learning_rate": 5.229793977812995e-07, "loss": 1.4584, "step": 34 }, { "epoch": 0.004992867332382311, "grad_norm": 0.8554040789604187, "learning_rate": 5.388272583201268e-07, "loss": 1.5141, "step": 35 }, { "epoch": 0.005135520684736091, "grad_norm": 1.0103391408920288, "learning_rate": 5.546751188589541e-07, "loss": 1.6457, "step": 36 }, { "epoch": 0.005278174037089872, "grad_norm": 0.9377884864807129, "learning_rate": 5.705229793977814e-07, "loss": 1.535, "step": 37 }, { "epoch": 0.005420827389443652, "grad_norm": 0.9149576425552368, "learning_rate": 5.863708399366086e-07, "loss": 1.5309, "step": 38 }, { "epoch": 0.0055634807417974325, "grad_norm": 0.98841392993927, "learning_rate": 6.022187004754359e-07, "loss": 1.4208, "step": 39 }, { "epoch": 0.005706134094151213, "grad_norm": 1.0452731847763062, "learning_rate": 6.180665610142631e-07, "loss": 1.4497, "step": 40 }, { "epoch": 0.005848787446504993, "grad_norm": 1.0485177040100098, "learning_rate": 6.339144215530904e-07, "loss": 1.6303, "step": 41 }, { "epoch": 0.005991440798858773, "grad_norm": 0.7732070684432983, "learning_rate": 6.497622820919177e-07, "loss": 1.6519, "step": 42 }, { "epoch": 0.006134094151212553, "grad_norm": 1.2481422424316406, "learning_rate": 6.656101426307449e-07, "loss": 1.5816, "step": 43 }, { "epoch": 0.006276747503566334, "grad_norm": 1.115005612373352, "learning_rate": 6.814580031695723e-07, "loss": 1.5048, "step": 44 }, { "epoch": 0.006419400855920114, "grad_norm": 0.9483717083930969, "learning_rate": 6.973058637083995e-07, "loss": 1.6211, "step": 45 }, { "epoch": 0.006562054208273894, "grad_norm": 0.8824179172515869, "learning_rate": 7.131537242472266e-07, "loss": 1.5428, "step": 46 }, { "epoch": 0.006704707560627674, "grad_norm": 1.1314977407455444, "learning_rate": 7.290015847860539e-07, "loss": 1.6307, "step": 47 }, { "epoch": 0.0068473609129814554, "grad_norm": 1.2876425981521606, "learning_rate": 7.448494453248812e-07, "loss": 1.5141, "step": 48 }, { "epoch": 0.006990014265335236, "grad_norm": 0.9325816631317139, "learning_rate": 7.606973058637085e-07, "loss": 1.5708, "step": 49 }, { "epoch": 0.007132667617689016, "grad_norm": 0.9257802367210388, "learning_rate": 7.765451664025357e-07, "loss": 1.5799, "step": 50 }, { "epoch": 0.007275320970042796, "grad_norm": 1.2095764875411987, "learning_rate": 7.923930269413629e-07, "loss": 1.6415, "step": 51 }, { "epoch": 0.007417974322396576, "grad_norm": 0.7863972783088684, "learning_rate": 8.082408874801903e-07, "loss": 1.5792, "step": 52 }, { "epoch": 0.007560627674750357, "grad_norm": 0.914885401725769, "learning_rate": 8.240887480190175e-07, "loss": 1.5083, "step": 53 }, { "epoch": 0.007703281027104137, "grad_norm": 1.314627766609192, "learning_rate": 8.399366085578448e-07, "loss": 1.5692, "step": 54 }, { "epoch": 0.007845934379457917, "grad_norm": 0.7810591459274292, "learning_rate": 8.55784469096672e-07, "loss": 1.6234, "step": 55 }, { "epoch": 0.007988587731811698, "grad_norm": 1.2098760604858398, "learning_rate": 8.716323296354994e-07, "loss": 1.5646, "step": 56 }, { "epoch": 0.008131241084165478, "grad_norm": 1.0744049549102783, "learning_rate": 8.874801901743266e-07, "loss": 1.5639, "step": 57 }, { "epoch": 0.008273894436519259, "grad_norm": 1.0017086267471313, "learning_rate": 9.033280507131538e-07, "loss": 1.6126, "step": 58 }, { "epoch": 0.008416547788873038, "grad_norm": 0.9702041745185852, "learning_rate": 9.191759112519811e-07, "loss": 1.5723, "step": 59 }, { "epoch": 0.008559201141226819, "grad_norm": 0.9367918968200684, "learning_rate": 9.350237717908083e-07, "loss": 1.5393, "step": 60 }, { "epoch": 0.008701854493580598, "grad_norm": 0.6486847996711731, "learning_rate": 9.508716323296357e-07, "loss": 1.4801, "step": 61 }, { "epoch": 0.00884450784593438, "grad_norm": 0.9823785424232483, "learning_rate": 9.667194928684629e-07, "loss": 1.3937, "step": 62 }, { "epoch": 0.008987161198288159, "grad_norm": 0.9961357712745667, "learning_rate": 9.8256735340729e-07, "loss": 1.576, "step": 63 }, { "epoch": 0.00912981455064194, "grad_norm": 0.9098517894744873, "learning_rate": 9.984152139461172e-07, "loss": 1.6427, "step": 64 }, { "epoch": 0.009272467902995721, "grad_norm": 0.8556783199310303, "learning_rate": 1.0142630744849446e-06, "loss": 1.5688, "step": 65 }, { "epoch": 0.0094151212553495, "grad_norm": 0.8681414127349854, "learning_rate": 1.0301109350237718e-06, "loss": 1.5462, "step": 66 }, { "epoch": 0.009557774607703282, "grad_norm": 1.2367175817489624, "learning_rate": 1.045958795562599e-06, "loss": 1.4963, "step": 67 }, { "epoch": 0.009700427960057061, "grad_norm": 1.0021933317184448, "learning_rate": 1.0618066561014264e-06, "loss": 1.495, "step": 68 }, { "epoch": 0.009843081312410842, "grad_norm": 1.1043912172317505, "learning_rate": 1.0776545166402536e-06, "loss": 1.6672, "step": 69 }, { "epoch": 0.009985734664764621, "grad_norm": 1.1770638227462769, "learning_rate": 1.093502377179081e-06, "loss": 1.6216, "step": 70 }, { "epoch": 0.010128388017118403, "grad_norm": 1.0524466037750244, "learning_rate": 1.1093502377179082e-06, "loss": 1.5777, "step": 71 }, { "epoch": 0.010271041369472182, "grad_norm": 0.8070217370986938, "learning_rate": 1.1251980982567354e-06, "loss": 1.4975, "step": 72 }, { "epoch": 0.010413694721825963, "grad_norm": 0.9641265869140625, "learning_rate": 1.1410459587955628e-06, "loss": 1.5548, "step": 73 }, { "epoch": 0.010556348074179744, "grad_norm": 0.6958743333816528, "learning_rate": 1.15689381933439e-06, "loss": 1.7126, "step": 74 }, { "epoch": 0.010699001426533523, "grad_norm": 0.918315589427948, "learning_rate": 1.1727416798732172e-06, "loss": 1.59, "step": 75 }, { "epoch": 0.010841654778887305, "grad_norm": 1.1720390319824219, "learning_rate": 1.1885895404120444e-06, "loss": 1.5478, "step": 76 }, { "epoch": 0.010984308131241084, "grad_norm": 1.5540131330490112, "learning_rate": 1.2044374009508718e-06, "loss": 1.6854, "step": 77 }, { "epoch": 0.011126961483594865, "grad_norm": 0.7053331732749939, "learning_rate": 1.220285261489699e-06, "loss": 1.6028, "step": 78 }, { "epoch": 0.011269614835948644, "grad_norm": 0.7510805130004883, "learning_rate": 1.2361331220285262e-06, "loss": 1.5807, "step": 79 }, { "epoch": 0.011412268188302425, "grad_norm": 0.6899828910827637, "learning_rate": 1.2519809825673536e-06, "loss": 1.4624, "step": 80 }, { "epoch": 0.011554921540656205, "grad_norm": 0.6752126216888428, "learning_rate": 1.2678288431061807e-06, "loss": 1.5698, "step": 81 }, { "epoch": 0.011697574893009986, "grad_norm": 0.8109561800956726, "learning_rate": 1.283676703645008e-06, "loss": 1.633, "step": 82 }, { "epoch": 0.011840228245363765, "grad_norm": 0.9425901770591736, "learning_rate": 1.2995245641838353e-06, "loss": 1.4105, "step": 83 }, { "epoch": 0.011982881597717546, "grad_norm": 1.0923776626586914, "learning_rate": 1.3153724247226625e-06, "loss": 1.5132, "step": 84 }, { "epoch": 0.012125534950071327, "grad_norm": 0.7857595086097717, "learning_rate": 1.3312202852614897e-06, "loss": 1.5801, "step": 85 }, { "epoch": 0.012268188302425107, "grad_norm": 0.9032093286514282, "learning_rate": 1.3470681458003171e-06, "loss": 1.5532, "step": 86 }, { "epoch": 0.012410841654778888, "grad_norm": 0.994260847568512, "learning_rate": 1.3629160063391445e-06, "loss": 1.4339, "step": 87 }, { "epoch": 0.012553495007132667, "grad_norm": 0.6411845088005066, "learning_rate": 1.3787638668779715e-06, "loss": 1.5609, "step": 88 }, { "epoch": 0.012696148359486448, "grad_norm": 0.7005854845046997, "learning_rate": 1.394611727416799e-06, "loss": 1.5113, "step": 89 }, { "epoch": 0.012838801711840228, "grad_norm": 0.9326108694076538, "learning_rate": 1.4104595879556259e-06, "loss": 1.6072, "step": 90 }, { "epoch": 0.012981455064194009, "grad_norm": 0.9695913195610046, "learning_rate": 1.4263074484944533e-06, "loss": 1.3987, "step": 91 }, { "epoch": 0.013124108416547788, "grad_norm": 1.1596570014953613, "learning_rate": 1.4421553090332807e-06, "loss": 1.5134, "step": 92 }, { "epoch": 0.01326676176890157, "grad_norm": 1.1897401809692383, "learning_rate": 1.4580031695721079e-06, "loss": 1.6665, "step": 93 }, { "epoch": 0.013409415121255349, "grad_norm": 0.829107940196991, "learning_rate": 1.473851030110935e-06, "loss": 1.5133, "step": 94 }, { "epoch": 0.01355206847360913, "grad_norm": 0.7455458045005798, "learning_rate": 1.4896988906497625e-06, "loss": 1.585, "step": 95 }, { "epoch": 0.013694721825962911, "grad_norm": 0.758978009223938, "learning_rate": 1.5055467511885897e-06, "loss": 1.45, "step": 96 }, { "epoch": 0.01383737517831669, "grad_norm": 1.5633254051208496, "learning_rate": 1.521394611727417e-06, "loss": 1.5009, "step": 97 }, { "epoch": 0.013980028530670471, "grad_norm": 0.9118133187294006, "learning_rate": 1.537242472266244e-06, "loss": 1.5366, "step": 98 }, { "epoch": 0.01412268188302425, "grad_norm": 0.877566933631897, "learning_rate": 1.5530903328050714e-06, "loss": 1.5132, "step": 99 }, { "epoch": 0.014265335235378032, "grad_norm": 0.9620270133018494, "learning_rate": 1.5689381933438988e-06, "loss": 1.5842, "step": 100 }, { "epoch": 0.014407988587731811, "grad_norm": 1.0387673377990723, "learning_rate": 1.5847860538827258e-06, "loss": 1.4847, "step": 101 }, { "epoch": 0.014550641940085592, "grad_norm": 0.9701688885688782, "learning_rate": 1.6006339144215532e-06, "loss": 1.507, "step": 102 }, { "epoch": 0.014693295292439372, "grad_norm": 0.7710080146789551, "learning_rate": 1.6164817749603806e-06, "loss": 1.4709, "step": 103 }, { "epoch": 0.014835948644793153, "grad_norm": 0.6720123291015625, "learning_rate": 1.6323296354992076e-06, "loss": 1.5219, "step": 104 }, { "epoch": 0.014978601997146932, "grad_norm": 0.9765576124191284, "learning_rate": 1.648177496038035e-06, "loss": 1.4367, "step": 105 }, { "epoch": 0.015121255349500713, "grad_norm": 0.8261208534240723, "learning_rate": 1.6640253565768622e-06, "loss": 1.5229, "step": 106 }, { "epoch": 0.015263908701854494, "grad_norm": 0.6505538821220398, "learning_rate": 1.6798732171156896e-06, "loss": 1.53, "step": 107 }, { "epoch": 0.015406562054208274, "grad_norm": 0.8936634659767151, "learning_rate": 1.6957210776545168e-06, "loss": 1.4057, "step": 108 }, { "epoch": 0.015549215406562055, "grad_norm": 0.6728257536888123, "learning_rate": 1.711568938193344e-06, "loss": 1.5097, "step": 109 }, { "epoch": 0.015691868758915834, "grad_norm": 0.8790159225463867, "learning_rate": 1.7274167987321714e-06, "loss": 1.4864, "step": 110 }, { "epoch": 0.015834522111269615, "grad_norm": 0.7423803806304932, "learning_rate": 1.7432646592709988e-06, "loss": 1.5311, "step": 111 }, { "epoch": 0.015977175463623396, "grad_norm": 0.8326552510261536, "learning_rate": 1.7591125198098258e-06, "loss": 1.4527, "step": 112 }, { "epoch": 0.016119828815977174, "grad_norm": 0.570444643497467, "learning_rate": 1.7749603803486532e-06, "loss": 1.4319, "step": 113 }, { "epoch": 0.016262482168330955, "grad_norm": 1.3569951057434082, "learning_rate": 1.7908082408874801e-06, "loss": 1.2923, "step": 114 }, { "epoch": 0.016405135520684736, "grad_norm": 0.9770404696464539, "learning_rate": 1.8066561014263076e-06, "loss": 1.5535, "step": 115 }, { "epoch": 0.016547788873038517, "grad_norm": 0.5709965825080872, "learning_rate": 1.822503961965135e-06, "loss": 1.5976, "step": 116 }, { "epoch": 0.0166904422253923, "grad_norm": 0.6833900809288025, "learning_rate": 1.8383518225039621e-06, "loss": 1.5812, "step": 117 }, { "epoch": 0.016833095577746076, "grad_norm": 0.5152895450592041, "learning_rate": 1.8541996830427893e-06, "loss": 1.5289, "step": 118 }, { "epoch": 0.016975748930099857, "grad_norm": 1.2101069688796997, "learning_rate": 1.8700475435816165e-06, "loss": 1.401, "step": 119 }, { "epoch": 0.017118402282453638, "grad_norm": 0.7375568747520447, "learning_rate": 1.885895404120444e-06, "loss": 1.4543, "step": 120 }, { "epoch": 0.01726105563480742, "grad_norm": 0.5590512156486511, "learning_rate": 1.9017432646592713e-06, "loss": 1.4852, "step": 121 }, { "epoch": 0.017403708987161197, "grad_norm": 0.8461640477180481, "learning_rate": 1.9175911251980985e-06, "loss": 1.4435, "step": 122 }, { "epoch": 0.017546362339514978, "grad_norm": 0.8407596945762634, "learning_rate": 1.9334389857369257e-06, "loss": 1.3145, "step": 123 }, { "epoch": 0.01768901569186876, "grad_norm": 0.6710478067398071, "learning_rate": 1.949286846275753e-06, "loss": 1.6102, "step": 124 }, { "epoch": 0.01783166904422254, "grad_norm": 0.5850281119346619, "learning_rate": 1.96513470681458e-06, "loss": 1.4688, "step": 125 }, { "epoch": 0.017974322396576318, "grad_norm": 0.7517263293266296, "learning_rate": 1.9809825673534077e-06, "loss": 1.3069, "step": 126 }, { "epoch": 0.0181169757489301, "grad_norm": 0.534304141998291, "learning_rate": 1.9968304278922345e-06, "loss": 1.6399, "step": 127 }, { "epoch": 0.01825962910128388, "grad_norm": 0.6381033658981323, "learning_rate": 2.012678288431062e-06, "loss": 1.4489, "step": 128 }, { "epoch": 0.01840228245363766, "grad_norm": 0.5316737294197083, "learning_rate": 2.0285261489698893e-06, "loss": 1.5069, "step": 129 }, { "epoch": 0.018544935805991442, "grad_norm": 0.5412260293960571, "learning_rate": 2.0443740095087165e-06, "loss": 1.4341, "step": 130 }, { "epoch": 0.01868758915834522, "grad_norm": 0.5599603652954102, "learning_rate": 2.0602218700475437e-06, "loss": 1.5583, "step": 131 }, { "epoch": 0.018830242510699, "grad_norm": 0.7412648797035217, "learning_rate": 2.0760697305863713e-06, "loss": 1.3646, "step": 132 }, { "epoch": 0.018972895863052782, "grad_norm": 0.6201611757278442, "learning_rate": 2.091917591125198e-06, "loss": 1.3277, "step": 133 }, { "epoch": 0.019115549215406563, "grad_norm": 0.6947835683822632, "learning_rate": 2.1077654516640257e-06, "loss": 1.2874, "step": 134 }, { "epoch": 0.01925820256776034, "grad_norm": 0.7464353442192078, "learning_rate": 2.123613312202853e-06, "loss": 1.4791, "step": 135 }, { "epoch": 0.019400855920114122, "grad_norm": 0.9278052449226379, "learning_rate": 2.13946117274168e-06, "loss": 1.3071, "step": 136 }, { "epoch": 0.019543509272467903, "grad_norm": 0.713912844657898, "learning_rate": 2.1553090332805072e-06, "loss": 1.3207, "step": 137 }, { "epoch": 0.019686162624821684, "grad_norm": 0.7252569794654846, "learning_rate": 2.1711568938193344e-06, "loss": 1.5553, "step": 138 }, { "epoch": 0.019828815977175465, "grad_norm": 0.693581759929657, "learning_rate": 2.187004754358162e-06, "loss": 1.5425, "step": 139 }, { "epoch": 0.019971469329529243, "grad_norm": 0.48828667402267456, "learning_rate": 2.2028526148969892e-06, "loss": 1.4735, "step": 140 }, { "epoch": 0.020114122681883024, "grad_norm": 0.5852355360984802, "learning_rate": 2.2187004754358164e-06, "loss": 1.4065, "step": 141 }, { "epoch": 0.020256776034236805, "grad_norm": 0.47599881887435913, "learning_rate": 2.2345483359746436e-06, "loss": 1.3987, "step": 142 }, { "epoch": 0.020399429386590586, "grad_norm": 0.4909083843231201, "learning_rate": 2.250396196513471e-06, "loss": 1.4729, "step": 143 }, { "epoch": 0.020542082738944364, "grad_norm": 0.4750116467475891, "learning_rate": 2.266244057052298e-06, "loss": 1.3937, "step": 144 }, { "epoch": 0.020684736091298145, "grad_norm": 0.48616355657577515, "learning_rate": 2.2820919175911256e-06, "loss": 1.415, "step": 145 }, { "epoch": 0.020827389443651926, "grad_norm": 0.482998788356781, "learning_rate": 2.2979397781299524e-06, "loss": 1.3461, "step": 146 }, { "epoch": 0.020970042796005707, "grad_norm": 0.600152313709259, "learning_rate": 2.31378763866878e-06, "loss": 1.5215, "step": 147 }, { "epoch": 0.021112696148359488, "grad_norm": 0.6215013861656189, "learning_rate": 2.329635499207607e-06, "loss": 1.3436, "step": 148 }, { "epoch": 0.021255349500713266, "grad_norm": 0.5478700399398804, "learning_rate": 2.3454833597464344e-06, "loss": 1.532, "step": 149 }, { "epoch": 0.021398002853067047, "grad_norm": 0.42473381757736206, "learning_rate": 2.3613312202852615e-06, "loss": 1.375, "step": 150 }, { "epoch": 0.021540656205420828, "grad_norm": 0.41281232237815857, "learning_rate": 2.3771790808240887e-06, "loss": 1.5224, "step": 151 }, { "epoch": 0.02168330955777461, "grad_norm": 0.698550820350647, "learning_rate": 2.3930269413629164e-06, "loss": 1.326, "step": 152 }, { "epoch": 0.021825962910128387, "grad_norm": 1.0649795532226562, "learning_rate": 2.4088748019017435e-06, "loss": 1.0123, "step": 153 }, { "epoch": 0.021968616262482168, "grad_norm": 0.36036765575408936, "learning_rate": 2.4247226624405707e-06, "loss": 1.3621, "step": 154 }, { "epoch": 0.02211126961483595, "grad_norm": 0.5121074914932251, "learning_rate": 2.440570522979398e-06, "loss": 1.3121, "step": 155 }, { "epoch": 0.02225392296718973, "grad_norm": 0.5649195909500122, "learning_rate": 2.4564183835182255e-06, "loss": 1.407, "step": 156 }, { "epoch": 0.022396576319543508, "grad_norm": 0.7566866278648376, "learning_rate": 2.4722662440570523e-06, "loss": 1.334, "step": 157 }, { "epoch": 0.02253922967189729, "grad_norm": 0.6937580108642578, "learning_rate": 2.48811410459588e-06, "loss": 1.2586, "step": 158 }, { "epoch": 0.02268188302425107, "grad_norm": 0.5125687122344971, "learning_rate": 2.503961965134707e-06, "loss": 1.2403, "step": 159 }, { "epoch": 0.02282453637660485, "grad_norm": 0.43598663806915283, "learning_rate": 2.5198098256735343e-06, "loss": 1.3609, "step": 160 }, { "epoch": 0.022967189728958632, "grad_norm": 0.4160390794277191, "learning_rate": 2.5356576862123615e-06, "loss": 1.3124, "step": 161 }, { "epoch": 0.02310984308131241, "grad_norm": 0.32933762669563293, "learning_rate": 2.551505546751189e-06, "loss": 1.3765, "step": 162 }, { "epoch": 0.02325249643366619, "grad_norm": 0.3887583017349243, "learning_rate": 2.567353407290016e-06, "loss": 1.3972, "step": 163 }, { "epoch": 0.023395149786019972, "grad_norm": 0.3544297218322754, "learning_rate": 2.583201267828843e-06, "loss": 1.4004, "step": 164 }, { "epoch": 0.023537803138373753, "grad_norm": 0.370754599571228, "learning_rate": 2.5990491283676707e-06, "loss": 1.3913, "step": 165 }, { "epoch": 0.02368045649072753, "grad_norm": 0.3701103925704956, "learning_rate": 2.614896988906498e-06, "loss": 1.4598, "step": 166 }, { "epoch": 0.02382310984308131, "grad_norm": 0.3261151611804962, "learning_rate": 2.630744849445325e-06, "loss": 1.4104, "step": 167 }, { "epoch": 0.023965763195435093, "grad_norm": 0.3154659569263458, "learning_rate": 2.6465927099841527e-06, "loss": 1.3251, "step": 168 }, { "epoch": 0.024108416547788874, "grad_norm": 0.435077965259552, "learning_rate": 2.6624405705229794e-06, "loss": 1.3756, "step": 169 }, { "epoch": 0.024251069900142655, "grad_norm": 0.3625625967979431, "learning_rate": 2.6782884310618066e-06, "loss": 1.5361, "step": 170 }, { "epoch": 0.024393723252496433, "grad_norm": 0.48244497179985046, "learning_rate": 2.6941362916006342e-06, "loss": 1.3244, "step": 171 }, { "epoch": 0.024536376604850214, "grad_norm": 0.7815013527870178, "learning_rate": 2.7099841521394614e-06, "loss": 1.1812, "step": 172 }, { "epoch": 0.024679029957203995, "grad_norm": 0.3500225245952606, "learning_rate": 2.725832012678289e-06, "loss": 1.359, "step": 173 }, { "epoch": 0.024821683309557776, "grad_norm": 0.34128132462501526, "learning_rate": 2.741679873217116e-06, "loss": 1.3115, "step": 174 }, { "epoch": 0.024964336661911554, "grad_norm": 0.3174647092819214, "learning_rate": 2.757527733755943e-06, "loss": 1.3462, "step": 175 }, { "epoch": 0.025106990014265335, "grad_norm": 0.40917524695396423, "learning_rate": 2.7733755942947706e-06, "loss": 1.4537, "step": 176 }, { "epoch": 0.025249643366619116, "grad_norm": 0.3159307837486267, "learning_rate": 2.789223454833598e-06, "loss": 1.3603, "step": 177 }, { "epoch": 0.025392296718972897, "grad_norm": 0.37976720929145813, "learning_rate": 2.805071315372425e-06, "loss": 1.2648, "step": 178 }, { "epoch": 0.025534950071326678, "grad_norm": 0.37940192222595215, "learning_rate": 2.8209191759112518e-06, "loss": 1.2733, "step": 179 }, { "epoch": 0.025677603423680456, "grad_norm": 0.3686927556991577, "learning_rate": 2.8367670364500794e-06, "loss": 1.3113, "step": 180 }, { "epoch": 0.025820256776034237, "grad_norm": 0.35218343138694763, "learning_rate": 2.8526148969889066e-06, "loss": 1.3315, "step": 181 }, { "epoch": 0.025962910128388018, "grad_norm": 0.3224192261695862, "learning_rate": 2.868462757527734e-06, "loss": 1.4326, "step": 182 }, { "epoch": 0.0261055634807418, "grad_norm": 0.3251970112323761, "learning_rate": 2.8843106180665614e-06, "loss": 1.3612, "step": 183 }, { "epoch": 0.026248216833095576, "grad_norm": 0.3309905529022217, "learning_rate": 2.900158478605388e-06, "loss": 1.3877, "step": 184 }, { "epoch": 0.026390870185449358, "grad_norm": 0.4436019957065582, "learning_rate": 2.9160063391442158e-06, "loss": 1.3599, "step": 185 }, { "epoch": 0.02653352353780314, "grad_norm": 0.3721274733543396, "learning_rate": 2.931854199683043e-06, "loss": 1.2321, "step": 186 }, { "epoch": 0.02667617689015692, "grad_norm": 0.3032548725605011, "learning_rate": 2.94770206022187e-06, "loss": 1.4337, "step": 187 }, { "epoch": 0.026818830242510697, "grad_norm": 0.30257293581962585, "learning_rate": 2.9635499207606977e-06, "loss": 1.409, "step": 188 }, { "epoch": 0.02696148359486448, "grad_norm": 0.3350675702095032, "learning_rate": 2.979397781299525e-06, "loss": 1.3598, "step": 189 }, { "epoch": 0.02710413694721826, "grad_norm": 0.2964542806148529, "learning_rate": 2.9952456418383517e-06, "loss": 1.2141, "step": 190 }, { "epoch": 0.02724679029957204, "grad_norm": 0.4170527458190918, "learning_rate": 3.0110935023771793e-06, "loss": 1.3052, "step": 191 }, { "epoch": 0.027389443651925822, "grad_norm": 0.4259490370750427, "learning_rate": 3.0269413629160065e-06, "loss": 1.2928, "step": 192 }, { "epoch": 0.0275320970042796, "grad_norm": 0.35218384861946106, "learning_rate": 3.042789223454834e-06, "loss": 1.386, "step": 193 }, { "epoch": 0.02767475035663338, "grad_norm": 0.3103131055831909, "learning_rate": 3.0586370839936613e-06, "loss": 1.4354, "step": 194 }, { "epoch": 0.02781740370898716, "grad_norm": 0.2982793152332306, "learning_rate": 3.074484944532488e-06, "loss": 1.4955, "step": 195 }, { "epoch": 0.027960057061340943, "grad_norm": 0.3776632249355316, "learning_rate": 3.0903328050713157e-06, "loss": 1.3391, "step": 196 }, { "epoch": 0.02810271041369472, "grad_norm": 0.32819125056266785, "learning_rate": 3.106180665610143e-06, "loss": 1.3595, "step": 197 }, { "epoch": 0.0282453637660485, "grad_norm": 0.332262247800827, "learning_rate": 3.12202852614897e-06, "loss": 1.3278, "step": 198 }, { "epoch": 0.028388017118402283, "grad_norm": 0.3423677682876587, "learning_rate": 3.1378763866877977e-06, "loss": 1.4108, "step": 199 }, { "epoch": 0.028530670470756064, "grad_norm": 0.39997029304504395, "learning_rate": 3.1537242472266245e-06, "loss": 1.1477, "step": 200 }, { "epoch": 0.028673323823109845, "grad_norm": 0.42977041006088257, "learning_rate": 3.1695721077654516e-06, "loss": 1.1345, "step": 201 }, { "epoch": 0.028815977175463622, "grad_norm": 0.31790071725845337, "learning_rate": 3.1854199683042793e-06, "loss": 1.3127, "step": 202 }, { "epoch": 0.028958630527817403, "grad_norm": 0.3349977731704712, "learning_rate": 3.2012678288431065e-06, "loss": 1.3327, "step": 203 }, { "epoch": 0.029101283880171185, "grad_norm": 0.3176394999027252, "learning_rate": 3.217115689381934e-06, "loss": 1.2639, "step": 204 }, { "epoch": 0.029243937232524966, "grad_norm": 0.3080146908760071, "learning_rate": 3.2329635499207613e-06, "loss": 1.2867, "step": 205 }, { "epoch": 0.029386590584878743, "grad_norm": 0.37435632944107056, "learning_rate": 3.248811410459588e-06, "loss": 1.2979, "step": 206 }, { "epoch": 0.029529243937232524, "grad_norm": 0.32686150074005127, "learning_rate": 3.2646592709984152e-06, "loss": 1.3988, "step": 207 }, { "epoch": 0.029671897289586305, "grad_norm": 0.3797704875469208, "learning_rate": 3.280507131537243e-06, "loss": 1.2882, "step": 208 }, { "epoch": 0.029814550641940087, "grad_norm": 0.34049201011657715, "learning_rate": 3.29635499207607e-06, "loss": 1.3673, "step": 209 }, { "epoch": 0.029957203994293864, "grad_norm": 0.3301849365234375, "learning_rate": 3.3122028526148976e-06, "loss": 1.345, "step": 210 }, { "epoch": 0.030099857346647645, "grad_norm": 0.3224700093269348, "learning_rate": 3.3280507131537244e-06, "loss": 1.3089, "step": 211 }, { "epoch": 0.030242510699001426, "grad_norm": 0.3416121006011963, "learning_rate": 3.3438985736925516e-06, "loss": 1.2731, "step": 212 }, { "epoch": 0.030385164051355208, "grad_norm": 0.3295351564884186, "learning_rate": 3.359746434231379e-06, "loss": 1.306, "step": 213 }, { "epoch": 0.03052781740370899, "grad_norm": 0.6265162229537964, "learning_rate": 3.3755942947702064e-06, "loss": 1.1073, "step": 214 }, { "epoch": 0.030670470756062766, "grad_norm": 0.3294594883918762, "learning_rate": 3.3914421553090336e-06, "loss": 1.4362, "step": 215 }, { "epoch": 0.030813124108416547, "grad_norm": 0.2984050512313843, "learning_rate": 3.4072900158478608e-06, "loss": 1.3641, "step": 216 }, { "epoch": 0.03095577746077033, "grad_norm": 0.2959788143634796, "learning_rate": 3.423137876386688e-06, "loss": 1.2501, "step": 217 }, { "epoch": 0.03109843081312411, "grad_norm": 0.2951110601425171, "learning_rate": 3.438985736925515e-06, "loss": 1.3613, "step": 218 }, { "epoch": 0.031241084165477887, "grad_norm": 0.2745145559310913, "learning_rate": 3.4548335974643428e-06, "loss": 1.4284, "step": 219 }, { "epoch": 0.03138373751783167, "grad_norm": 0.3220887780189514, "learning_rate": 3.47068145800317e-06, "loss": 1.3333, "step": 220 }, { "epoch": 0.031526390870185446, "grad_norm": 0.30729013681411743, "learning_rate": 3.4865293185419976e-06, "loss": 1.2198, "step": 221 }, { "epoch": 0.03166904422253923, "grad_norm": 0.360538512468338, "learning_rate": 3.5023771790808243e-06, "loss": 1.3019, "step": 222 }, { "epoch": 0.03181169757489301, "grad_norm": 0.30400270223617554, "learning_rate": 3.5182250396196515e-06, "loss": 1.3406, "step": 223 }, { "epoch": 0.03195435092724679, "grad_norm": 0.37592488527297974, "learning_rate": 3.5340729001584787e-06, "loss": 1.104, "step": 224 }, { "epoch": 0.03209700427960057, "grad_norm": 0.32413458824157715, "learning_rate": 3.5499207606973063e-06, "loss": 1.4291, "step": 225 }, { "epoch": 0.03223965763195435, "grad_norm": 0.335815966129303, "learning_rate": 3.5657686212361335e-06, "loss": 1.3057, "step": 226 }, { "epoch": 0.03238231098430813, "grad_norm": 0.406950980424881, "learning_rate": 3.5816164817749603e-06, "loss": 1.3551, "step": 227 }, { "epoch": 0.03252496433666191, "grad_norm": 0.30737853050231934, "learning_rate": 3.597464342313788e-06, "loss": 1.2572, "step": 228 }, { "epoch": 0.032667617689015695, "grad_norm": 0.3183289170265198, "learning_rate": 3.613312202852615e-06, "loss": 1.2256, "step": 229 }, { "epoch": 0.03281027104136947, "grad_norm": 0.2918608784675598, "learning_rate": 3.6291600633914427e-06, "loss": 1.3548, "step": 230 }, { "epoch": 0.03295292439372325, "grad_norm": 0.38249361515045166, "learning_rate": 3.64500792393027e-06, "loss": 1.3721, "step": 231 }, { "epoch": 0.033095577746077035, "grad_norm": 0.35961297154426575, "learning_rate": 3.6608557844690967e-06, "loss": 1.1631, "step": 232 }, { "epoch": 0.03323823109843081, "grad_norm": 0.32576683163642883, "learning_rate": 3.6767036450079243e-06, "loss": 1.1663, "step": 233 }, { "epoch": 0.0333808844507846, "grad_norm": 0.42287713289260864, "learning_rate": 3.6925515055467515e-06, "loss": 1.1628, "step": 234 }, { "epoch": 0.033523537803138374, "grad_norm": 0.3085416853427887, "learning_rate": 3.7083993660855787e-06, "loss": 1.3782, "step": 235 }, { "epoch": 0.03366619115549215, "grad_norm": 0.43059131503105164, "learning_rate": 3.7242472266244063e-06, "loss": 1.2775, "step": 236 }, { "epoch": 0.03380884450784594, "grad_norm": 0.294572651386261, "learning_rate": 3.740095087163233e-06, "loss": 1.1738, "step": 237 }, { "epoch": 0.033951497860199714, "grad_norm": 0.47916534543037415, "learning_rate": 3.7559429477020602e-06, "loss": 1.2438, "step": 238 }, { "epoch": 0.03409415121255349, "grad_norm": 0.5922914147377014, "learning_rate": 3.771790808240888e-06, "loss": 1.1924, "step": 239 }, { "epoch": 0.034236804564907276, "grad_norm": 0.2903591990470886, "learning_rate": 3.787638668779715e-06, "loss": 1.3334, "step": 240 }, { "epoch": 0.034379457917261054, "grad_norm": 0.3299739360809326, "learning_rate": 3.8034865293185427e-06, "loss": 1.2186, "step": 241 }, { "epoch": 0.03452211126961484, "grad_norm": 0.38692760467529297, "learning_rate": 3.8193343898573694e-06, "loss": 1.2373, "step": 242 }, { "epoch": 0.034664764621968616, "grad_norm": 0.41160643100738525, "learning_rate": 3.835182250396197e-06, "loss": 1.184, "step": 243 }, { "epoch": 0.034807417974322394, "grad_norm": 0.3497057855129242, "learning_rate": 3.851030110935024e-06, "loss": 1.1723, "step": 244 }, { "epoch": 0.03495007132667618, "grad_norm": 0.2992457151412964, "learning_rate": 3.866877971473851e-06, "loss": 1.3942, "step": 245 }, { "epoch": 0.035092724679029956, "grad_norm": 0.37590697407722473, "learning_rate": 3.882725832012679e-06, "loss": 1.259, "step": 246 }, { "epoch": 0.03523537803138374, "grad_norm": 0.387542188167572, "learning_rate": 3.898573692551506e-06, "loss": 1.1846, "step": 247 }, { "epoch": 0.03537803138373752, "grad_norm": 0.2927229702472687, "learning_rate": 3.9144215530903326e-06, "loss": 1.318, "step": 248 }, { "epoch": 0.035520684736091296, "grad_norm": 0.31897106766700745, "learning_rate": 3.93026941362916e-06, "loss": 1.3613, "step": 249 }, { "epoch": 0.03566333808844508, "grad_norm": 0.33956417441368103, "learning_rate": 3.946117274167988e-06, "loss": 1.2053, "step": 250 }, { "epoch": 0.03580599144079886, "grad_norm": 0.46036967635154724, "learning_rate": 3.961965134706815e-06, "loss": 1.2807, "step": 251 }, { "epoch": 0.035948644793152636, "grad_norm": 0.40634241700172424, "learning_rate": 3.977812995245642e-06, "loss": 1.1571, "step": 252 }, { "epoch": 0.03609129814550642, "grad_norm": 0.319195419549942, "learning_rate": 3.993660855784469e-06, "loss": 1.0505, "step": 253 }, { "epoch": 0.0362339514978602, "grad_norm": 0.31796982884407043, "learning_rate": 4.0095087163232966e-06, "loss": 1.3175, "step": 254 }, { "epoch": 0.03637660485021398, "grad_norm": 0.43286457657814026, "learning_rate": 4.025356576862124e-06, "loss": 1.1349, "step": 255 }, { "epoch": 0.03651925820256776, "grad_norm": 0.31193092465400696, "learning_rate": 4.041204437400951e-06, "loss": 1.1305, "step": 256 }, { "epoch": 0.03666191155492154, "grad_norm": 0.28341153264045715, "learning_rate": 4.0570522979397786e-06, "loss": 1.3878, "step": 257 }, { "epoch": 0.03680456490727532, "grad_norm": 0.42940574884414673, "learning_rate": 4.072900158478606e-06, "loss": 1.2368, "step": 258 }, { "epoch": 0.0369472182596291, "grad_norm": 0.31089913845062256, "learning_rate": 4.088748019017433e-06, "loss": 1.2277, "step": 259 }, { "epoch": 0.037089871611982884, "grad_norm": 0.2877831757068634, "learning_rate": 4.1045958795562605e-06, "loss": 1.3913, "step": 260 }, { "epoch": 0.03723252496433666, "grad_norm": 0.297457218170166, "learning_rate": 4.120443740095087e-06, "loss": 1.2163, "step": 261 }, { "epoch": 0.03737517831669044, "grad_norm": 0.3337115943431854, "learning_rate": 4.136291600633915e-06, "loss": 1.275, "step": 262 }, { "epoch": 0.037517831669044224, "grad_norm": 0.33439674973487854, "learning_rate": 4.1521394611727425e-06, "loss": 1.2002, "step": 263 }, { "epoch": 0.037660485021398, "grad_norm": 0.3287540674209595, "learning_rate": 4.167987321711569e-06, "loss": 1.3298, "step": 264 }, { "epoch": 0.037803138373751786, "grad_norm": 0.29057860374450684, "learning_rate": 4.183835182250396e-06, "loss": 1.3222, "step": 265 }, { "epoch": 0.037945791726105564, "grad_norm": 0.30964189767837524, "learning_rate": 4.199683042789224e-06, "loss": 1.352, "step": 266 }, { "epoch": 0.03808844507845934, "grad_norm": 0.2988928258419037, "learning_rate": 4.215530903328051e-06, "loss": 1.3347, "step": 267 }, { "epoch": 0.038231098430813126, "grad_norm": 0.40428733825683594, "learning_rate": 4.231378763866879e-06, "loss": 1.0087, "step": 268 }, { "epoch": 0.038373751783166904, "grad_norm": 0.35499265789985657, "learning_rate": 4.247226624405706e-06, "loss": 1.1082, "step": 269 }, { "epoch": 0.03851640513552068, "grad_norm": 0.3120858073234558, "learning_rate": 4.2630744849445325e-06, "loss": 1.3458, "step": 270 }, { "epoch": 0.038659058487874466, "grad_norm": 0.5083685517311096, "learning_rate": 4.27892234548336e-06, "loss": 1.2777, "step": 271 }, { "epoch": 0.038801711840228244, "grad_norm": 0.5204631686210632, "learning_rate": 4.294770206022188e-06, "loss": 1.2005, "step": 272 }, { "epoch": 0.03894436519258203, "grad_norm": 0.3237505555152893, "learning_rate": 4.3106180665610144e-06, "loss": 1.2321, "step": 273 }, { "epoch": 0.039087018544935806, "grad_norm": 0.2766949236392975, "learning_rate": 4.326465927099842e-06, "loss": 1.2984, "step": 274 }, { "epoch": 0.039229671897289584, "grad_norm": 0.41957545280456543, "learning_rate": 4.342313787638669e-06, "loss": 1.2279, "step": 275 }, { "epoch": 0.03937232524964337, "grad_norm": 0.3055225610733032, "learning_rate": 4.3581616481774964e-06, "loss": 1.2772, "step": 276 }, { "epoch": 0.039514978601997146, "grad_norm": 0.28122588992118835, "learning_rate": 4.374009508716324e-06, "loss": 1.3133, "step": 277 }, { "epoch": 0.03965763195435093, "grad_norm": 0.3901839554309845, "learning_rate": 4.389857369255151e-06, "loss": 1.1696, "step": 278 }, { "epoch": 0.03980028530670471, "grad_norm": 0.35039961338043213, "learning_rate": 4.4057052297939784e-06, "loss": 1.428, "step": 279 }, { "epoch": 0.039942938659058486, "grad_norm": 0.3964681327342987, "learning_rate": 4.421553090332805e-06, "loss": 1.2232, "step": 280 }, { "epoch": 0.04008559201141227, "grad_norm": 0.3448876142501831, "learning_rate": 4.437400950871633e-06, "loss": 1.2975, "step": 281 }, { "epoch": 0.04022824536376605, "grad_norm": 0.3363805413246155, "learning_rate": 4.45324881141046e-06, "loss": 1.3109, "step": 282 }, { "epoch": 0.040370898716119825, "grad_norm": 0.4259723722934723, "learning_rate": 4.469096671949287e-06, "loss": 1.1703, "step": 283 }, { "epoch": 0.04051355206847361, "grad_norm": 0.3295013904571533, "learning_rate": 4.484944532488115e-06, "loss": 1.1904, "step": 284 }, { "epoch": 0.04065620542082739, "grad_norm": 0.323429673910141, "learning_rate": 4.500792393026942e-06, "loss": 1.1965, "step": 285 }, { "epoch": 0.04079885877318117, "grad_norm": 0.30424764752388, "learning_rate": 4.516640253565769e-06, "loss": 1.397, "step": 286 }, { "epoch": 0.04094151212553495, "grad_norm": 0.29817402362823486, "learning_rate": 4.532488114104596e-06, "loss": 1.2242, "step": 287 }, { "epoch": 0.04108416547788873, "grad_norm": 0.3749523460865021, "learning_rate": 4.5483359746434236e-06, "loss": 1.081, "step": 288 }, { "epoch": 0.04122681883024251, "grad_norm": 0.2756361961364746, "learning_rate": 4.564183835182251e-06, "loss": 1.2206, "step": 289 }, { "epoch": 0.04136947218259629, "grad_norm": 0.34127745032310486, "learning_rate": 4.580031695721078e-06, "loss": 1.2318, "step": 290 }, { "epoch": 0.041512125534950074, "grad_norm": 0.39298638701438904, "learning_rate": 4.595879556259905e-06, "loss": 1.2457, "step": 291 }, { "epoch": 0.04165477888730385, "grad_norm": 0.3586486577987671, "learning_rate": 4.611727416798732e-06, "loss": 1.289, "step": 292 }, { "epoch": 0.04179743223965763, "grad_norm": 0.396969735622406, "learning_rate": 4.62757527733756e-06, "loss": 1.0968, "step": 293 }, { "epoch": 0.041940085592011414, "grad_norm": 0.4026542007923126, "learning_rate": 4.6434231378763876e-06, "loss": 1.2031, "step": 294 }, { "epoch": 0.04208273894436519, "grad_norm": 0.4238973557949066, "learning_rate": 4.659270998415214e-06, "loss": 1.1247, "step": 295 }, { "epoch": 0.042225392296718976, "grad_norm": 0.30522915720939636, "learning_rate": 4.675118858954041e-06, "loss": 1.2861, "step": 296 }, { "epoch": 0.042368045649072754, "grad_norm": 0.411531001329422, "learning_rate": 4.690966719492869e-06, "loss": 1.1247, "step": 297 }, { "epoch": 0.04251069900142653, "grad_norm": 0.44647908210754395, "learning_rate": 4.706814580031696e-06, "loss": 1.132, "step": 298 }, { "epoch": 0.042653352353780316, "grad_norm": 0.2880479097366333, "learning_rate": 4.722662440570523e-06, "loss": 1.3464, "step": 299 }, { "epoch": 0.042796005706134094, "grad_norm": 0.29678839445114136, "learning_rate": 4.738510301109351e-06, "loss": 1.1978, "step": 300 }, { "epoch": 0.04293865905848787, "grad_norm": 0.42006635665893555, "learning_rate": 4.7543581616481775e-06, "loss": 1.1341, "step": 301 }, { "epoch": 0.043081312410841656, "grad_norm": 0.43602508306503296, "learning_rate": 4.770206022187005e-06, "loss": 1.1031, "step": 302 }, { "epoch": 0.043223965763195434, "grad_norm": 0.4025443494319916, "learning_rate": 4.786053882725833e-06, "loss": 1.0055, "step": 303 }, { "epoch": 0.04336661911554922, "grad_norm": 0.4227970242500305, "learning_rate": 4.8019017432646595e-06, "loss": 1.1083, "step": 304 }, { "epoch": 0.043509272467902996, "grad_norm": 0.3869151473045349, "learning_rate": 4.817749603803487e-06, "loss": 1.0709, "step": 305 }, { "epoch": 0.04365192582025677, "grad_norm": 0.3243708312511444, "learning_rate": 4.833597464342314e-06, "loss": 1.3049, "step": 306 }, { "epoch": 0.04379457917261056, "grad_norm": 0.46890708804130554, "learning_rate": 4.8494453248811415e-06, "loss": 1.1072, "step": 307 }, { "epoch": 0.043937232524964336, "grad_norm": 0.3239574730396271, "learning_rate": 4.865293185419968e-06, "loss": 1.3027, "step": 308 }, { "epoch": 0.04407988587731812, "grad_norm": 0.37250688672065735, "learning_rate": 4.881141045958796e-06, "loss": 1.0762, "step": 309 }, { "epoch": 0.0442225392296719, "grad_norm": 0.38411664962768555, "learning_rate": 4.8969889064976235e-06, "loss": 1.1813, "step": 310 }, { "epoch": 0.044365192582025675, "grad_norm": 0.35546326637268066, "learning_rate": 4.912836767036451e-06, "loss": 1.1954, "step": 311 }, { "epoch": 0.04450784593437946, "grad_norm": 0.3755302131175995, "learning_rate": 4.928684627575278e-06, "loss": 1.2159, "step": 312 }, { "epoch": 0.04465049928673324, "grad_norm": 0.30921611189842224, "learning_rate": 4.944532488114105e-06, "loss": 1.2202, "step": 313 }, { "epoch": 0.044793152639087015, "grad_norm": 0.3620322644710541, "learning_rate": 4.960380348652932e-06, "loss": 1.2663, "step": 314 }, { "epoch": 0.0449358059914408, "grad_norm": 0.3114851117134094, "learning_rate": 4.97622820919176e-06, "loss": 1.27, "step": 315 }, { "epoch": 0.04507845934379458, "grad_norm": 0.38563475012779236, "learning_rate": 4.992076069730587e-06, "loss": 1.1611, "step": 316 }, { "epoch": 0.04522111269614836, "grad_norm": 0.495448499917984, "learning_rate": 5.007923930269414e-06, "loss": 1.2343, "step": 317 }, { "epoch": 0.04536376604850214, "grad_norm": 0.318773478269577, "learning_rate": 5.023771790808242e-06, "loss": 1.0744, "step": 318 }, { "epoch": 0.04550641940085592, "grad_norm": 0.3277983069419861, "learning_rate": 5.039619651347069e-06, "loss": 1.1822, "step": 319 }, { "epoch": 0.0456490727532097, "grad_norm": 0.39151665568351746, "learning_rate": 5.055467511885895e-06, "loss": 1.0839, "step": 320 }, { "epoch": 0.04579172610556348, "grad_norm": 0.4079418182373047, "learning_rate": 5.071315372424723e-06, "loss": 1.2956, "step": 321 }, { "epoch": 0.045934379457917264, "grad_norm": 0.29466429352760315, "learning_rate": 5.08716323296355e-06, "loss": 1.3297, "step": 322 }, { "epoch": 0.04607703281027104, "grad_norm": 0.7656469941139221, "learning_rate": 5.103011093502378e-06, "loss": 0.8453, "step": 323 }, { "epoch": 0.04621968616262482, "grad_norm": 0.29712972044944763, "learning_rate": 5.118858954041205e-06, "loss": 1.2395, "step": 324 }, { "epoch": 0.046362339514978604, "grad_norm": 0.4239838421344757, "learning_rate": 5.134706814580032e-06, "loss": 1.1287, "step": 325 }, { "epoch": 0.04650499286733238, "grad_norm": 0.346843421459198, "learning_rate": 5.150554675118859e-06, "loss": 1.2082, "step": 326 }, { "epoch": 0.046647646219686166, "grad_norm": 0.4217950403690338, "learning_rate": 5.166402535657686e-06, "loss": 1.1445, "step": 327 }, { "epoch": 0.046790299572039944, "grad_norm": 0.43816739320755005, "learning_rate": 5.182250396196515e-06, "loss": 1.1049, "step": 328 }, { "epoch": 0.04693295292439372, "grad_norm": 0.30863091349601746, "learning_rate": 5.198098256735341e-06, "loss": 1.193, "step": 329 }, { "epoch": 0.047075606276747506, "grad_norm": 0.3303157091140747, "learning_rate": 5.213946117274168e-06, "loss": 1.1326, "step": 330 }, { "epoch": 0.047218259629101283, "grad_norm": 0.5755234956741333, "learning_rate": 5.229793977812996e-06, "loss": 0.9575, "step": 331 }, { "epoch": 0.04736091298145506, "grad_norm": 0.4737313985824585, "learning_rate": 5.2456418383518225e-06, "loss": 1.0288, "step": 332 }, { "epoch": 0.047503566333808846, "grad_norm": 0.29919010400772095, "learning_rate": 5.26148969889065e-06, "loss": 1.2875, "step": 333 }, { "epoch": 0.04764621968616262, "grad_norm": 0.36434921622276306, "learning_rate": 5.277337559429478e-06, "loss": 1.1288, "step": 334 }, { "epoch": 0.04778887303851641, "grad_norm": 0.43472111225128174, "learning_rate": 5.293185419968305e-06, "loss": 1.0283, "step": 335 }, { "epoch": 0.047931526390870186, "grad_norm": 0.4471413791179657, "learning_rate": 5.309033280507132e-06, "loss": 1.1391, "step": 336 }, { "epoch": 0.04807417974322396, "grad_norm": 0.43796202540397644, "learning_rate": 5.324881141045959e-06, "loss": 1.0755, "step": 337 }, { "epoch": 0.04821683309557775, "grad_norm": 0.5343884825706482, "learning_rate": 5.3407290015847865e-06, "loss": 1.067, "step": 338 }, { "epoch": 0.048359486447931525, "grad_norm": 0.34646904468536377, "learning_rate": 5.356576862123613e-06, "loss": 1.1613, "step": 339 }, { "epoch": 0.04850213980028531, "grad_norm": 0.45741909742355347, "learning_rate": 5.372424722662442e-06, "loss": 1.2105, "step": 340 }, { "epoch": 0.04864479315263909, "grad_norm": 0.36324140429496765, "learning_rate": 5.3882725832012685e-06, "loss": 1.175, "step": 341 }, { "epoch": 0.048787446504992865, "grad_norm": 0.33652055263519287, "learning_rate": 5.404120443740095e-06, "loss": 1.2252, "step": 342 }, { "epoch": 0.04893009985734665, "grad_norm": 0.36475199460983276, "learning_rate": 5.419968304278923e-06, "loss": 1.2679, "step": 343 }, { "epoch": 0.04907275320970043, "grad_norm": 0.3909691870212555, "learning_rate": 5.43581616481775e-06, "loss": 1.1804, "step": 344 }, { "epoch": 0.049215406562054205, "grad_norm": 0.3171705901622772, "learning_rate": 5.451664025356578e-06, "loss": 1.1083, "step": 345 }, { "epoch": 0.04935805991440799, "grad_norm": 0.38119977712631226, "learning_rate": 5.467511885895405e-06, "loss": 1.1648, "step": 346 }, { "epoch": 0.04950071326676177, "grad_norm": 0.39248302578926086, "learning_rate": 5.483359746434232e-06, "loss": 1.0853, "step": 347 }, { "epoch": 0.04964336661911555, "grad_norm": 0.5233859419822693, "learning_rate": 5.499207606973059e-06, "loss": 1.1315, "step": 348 }, { "epoch": 0.04978601997146933, "grad_norm": 0.4257233142852783, "learning_rate": 5.515055467511886e-06, "loss": 1.2126, "step": 349 }, { "epoch": 0.04992867332382311, "grad_norm": 0.37368786334991455, "learning_rate": 5.5309033280507145e-06, "loss": 1.2713, "step": 350 }, { "epoch": 0.05007132667617689, "grad_norm": 0.3926977217197418, "learning_rate": 5.546751188589541e-06, "loss": 1.044, "step": 351 }, { "epoch": 0.05021398002853067, "grad_norm": 0.3789297938346863, "learning_rate": 5.562599049128368e-06, "loss": 1.2206, "step": 352 }, { "epoch": 0.050356633380884454, "grad_norm": 0.42507195472717285, "learning_rate": 5.578446909667196e-06, "loss": 1.1481, "step": 353 }, { "epoch": 0.05049928673323823, "grad_norm": 0.40778741240501404, "learning_rate": 5.594294770206022e-06, "loss": 1.2038, "step": 354 }, { "epoch": 0.05064194008559201, "grad_norm": 0.4026924967765808, "learning_rate": 5.61014263074485e-06, "loss": 1.0337, "step": 355 }, { "epoch": 0.050784593437945794, "grad_norm": 0.44218987226486206, "learning_rate": 5.625990491283677e-06, "loss": 1.1486, "step": 356 }, { "epoch": 0.05092724679029957, "grad_norm": 0.4402460753917694, "learning_rate": 5.6418383518225035e-06, "loss": 1.0998, "step": 357 }, { "epoch": 0.051069900142653356, "grad_norm": 0.3994021713733673, "learning_rate": 5.657686212361332e-06, "loss": 1.0053, "step": 358 }, { "epoch": 0.05121255349500713, "grad_norm": 0.3759647309780121, "learning_rate": 5.673534072900159e-06, "loss": 1.1892, "step": 359 }, { "epoch": 0.05135520684736091, "grad_norm": 0.31760871410369873, "learning_rate": 5.689381933438986e-06, "loss": 1.3337, "step": 360 }, { "epoch": 0.051497860199714696, "grad_norm": 0.378974974155426, "learning_rate": 5.705229793977813e-06, "loss": 1.1321, "step": 361 }, { "epoch": 0.05164051355206847, "grad_norm": 0.4521256387233734, "learning_rate": 5.72107765451664e-06, "loss": 1.1835, "step": 362 }, { "epoch": 0.05178316690442225, "grad_norm": 0.330822229385376, "learning_rate": 5.736925515055468e-06, "loss": 1.1133, "step": 363 }, { "epoch": 0.051925820256776035, "grad_norm": 0.37037456035614014, "learning_rate": 5.752773375594295e-06, "loss": 1.1967, "step": 364 }, { "epoch": 0.05206847360912981, "grad_norm": 0.3412037491798401, "learning_rate": 5.768621236133123e-06, "loss": 1.2311, "step": 365 }, { "epoch": 0.0522111269614836, "grad_norm": 0.5195797085762024, "learning_rate": 5.7844690966719495e-06, "loss": 1.0661, "step": 366 }, { "epoch": 0.052353780313837375, "grad_norm": 0.546709418296814, "learning_rate": 5.800316957210776e-06, "loss": 1.138, "step": 367 }, { "epoch": 0.05249643366619115, "grad_norm": 0.3824395537376404, "learning_rate": 5.816164817749605e-06, "loss": 1.1791, "step": 368 }, { "epoch": 0.05263908701854494, "grad_norm": 0.3928437829017639, "learning_rate": 5.8320126782884315e-06, "loss": 1.117, "step": 369 }, { "epoch": 0.052781740370898715, "grad_norm": 0.40824761986732483, "learning_rate": 5.847860538827259e-06, "loss": 1.1497, "step": 370 }, { "epoch": 0.0529243937232525, "grad_norm": 0.8095877766609192, "learning_rate": 5.863708399366086e-06, "loss": 1.0953, "step": 371 }, { "epoch": 0.05306704707560628, "grad_norm": 0.4009456932544708, "learning_rate": 5.8795562599049135e-06, "loss": 1.1634, "step": 372 }, { "epoch": 0.053209700427960055, "grad_norm": 0.4667862057685852, "learning_rate": 5.89540412044374e-06, "loss": 1.0981, "step": 373 }, { "epoch": 0.05335235378031384, "grad_norm": 0.44600826501846313, "learning_rate": 5.911251980982568e-06, "loss": 1.1326, "step": 374 }, { "epoch": 0.05349500713266762, "grad_norm": 0.41945433616638184, "learning_rate": 5.9270998415213955e-06, "loss": 0.9984, "step": 375 }, { "epoch": 0.053637660485021395, "grad_norm": 0.5451641082763672, "learning_rate": 5.942947702060222e-06, "loss": 1.0338, "step": 376 }, { "epoch": 0.05378031383737518, "grad_norm": 0.4329877197742462, "learning_rate": 5.95879556259905e-06, "loss": 1.1208, "step": 377 }, { "epoch": 0.05392296718972896, "grad_norm": 0.4113557040691376, "learning_rate": 5.974643423137877e-06, "loss": 1.1937, "step": 378 }, { "epoch": 0.05406562054208274, "grad_norm": 0.46751633286476135, "learning_rate": 5.990491283676703e-06, "loss": 1.0489, "step": 379 }, { "epoch": 0.05420827389443652, "grad_norm": 0.5643234848976135, "learning_rate": 6.006339144215532e-06, "loss": 1.1551, "step": 380 }, { "epoch": 0.0543509272467903, "grad_norm": 0.547410249710083, "learning_rate": 6.022187004754359e-06, "loss": 1.0762, "step": 381 }, { "epoch": 0.05449358059914408, "grad_norm": 0.5132689476013184, "learning_rate": 6.038034865293186e-06, "loss": 0.9542, "step": 382 }, { "epoch": 0.05463623395149786, "grad_norm": 0.41245096921920776, "learning_rate": 6.053882725832013e-06, "loss": 1.11, "step": 383 }, { "epoch": 0.054778887303851644, "grad_norm": 0.39710038900375366, "learning_rate": 6.06973058637084e-06, "loss": 1.101, "step": 384 }, { "epoch": 0.05492154065620542, "grad_norm": 0.3879498541355133, "learning_rate": 6.085578446909668e-06, "loss": 1.0018, "step": 385 }, { "epoch": 0.0550641940085592, "grad_norm": 0.40068891644477844, "learning_rate": 6.101426307448495e-06, "loss": 0.9925, "step": 386 }, { "epoch": 0.05520684736091298, "grad_norm": 0.3478064239025116, "learning_rate": 6.117274167987323e-06, "loss": 1.2, "step": 387 }, { "epoch": 0.05534950071326676, "grad_norm": 0.44799110293388367, "learning_rate": 6.133122028526149e-06, "loss": 1.0138, "step": 388 }, { "epoch": 0.05549215406562054, "grad_norm": 0.39405953884124756, "learning_rate": 6.148969889064976e-06, "loss": 1.1252, "step": 389 }, { "epoch": 0.05563480741797432, "grad_norm": 0.5006358623504639, "learning_rate": 6.164817749603804e-06, "loss": 1.0465, "step": 390 }, { "epoch": 0.0557774607703281, "grad_norm": 0.4405211806297302, "learning_rate": 6.180665610142631e-06, "loss": 1.1541, "step": 391 }, { "epoch": 0.055920114122681885, "grad_norm": 0.5930861234664917, "learning_rate": 6.196513470681459e-06, "loss": 1.0188, "step": 392 }, { "epoch": 0.05606276747503566, "grad_norm": 0.39982444047927856, "learning_rate": 6.212361331220286e-06, "loss": 1.0072, "step": 393 }, { "epoch": 0.05620542082738944, "grad_norm": 0.35119161009788513, "learning_rate": 6.2282091917591125e-06, "loss": 1.1994, "step": 394 }, { "epoch": 0.056348074179743225, "grad_norm": 0.5308758020401001, "learning_rate": 6.24405705229794e-06, "loss": 1.1496, "step": 395 }, { "epoch": 0.056490727532097, "grad_norm": 0.45965972542762756, "learning_rate": 6.259904912836767e-06, "loss": 1.1157, "step": 396 }, { "epoch": 0.05663338088445079, "grad_norm": 0.3781106472015381, "learning_rate": 6.275752773375595e-06, "loss": 1.1653, "step": 397 }, { "epoch": 0.056776034236804565, "grad_norm": 0.3776165544986725, "learning_rate": 6.291600633914422e-06, "loss": 1.2303, "step": 398 }, { "epoch": 0.05691868758915834, "grad_norm": 0.5492984056472778, "learning_rate": 6.307448494453249e-06, "loss": 1.1351, "step": 399 }, { "epoch": 0.05706134094151213, "grad_norm": 0.4699248671531677, "learning_rate": 6.3232963549920765e-06, "loss": 1.1055, "step": 400 }, { "epoch": 0.057203994293865905, "grad_norm": 0.4486481845378876, "learning_rate": 6.339144215530903e-06, "loss": 1.088, "step": 401 }, { "epoch": 0.05734664764621969, "grad_norm": 0.41519489884376526, "learning_rate": 6.354992076069732e-06, "loss": 1.1259, "step": 402 }, { "epoch": 0.05748930099857347, "grad_norm": 0.44372814893722534, "learning_rate": 6.3708399366085585e-06, "loss": 0.9689, "step": 403 }, { "epoch": 0.057631954350927245, "grad_norm": 0.44161999225616455, "learning_rate": 6.386687797147385e-06, "loss": 1.116, "step": 404 }, { "epoch": 0.05777460770328103, "grad_norm": 0.4586813449859619, "learning_rate": 6.402535657686213e-06, "loss": 1.0441, "step": 405 }, { "epoch": 0.05791726105563481, "grad_norm": 0.4998525381088257, "learning_rate": 6.41838351822504e-06, "loss": 1.0546, "step": 406 }, { "epoch": 0.058059914407988585, "grad_norm": 0.4752885401248932, "learning_rate": 6.434231378763868e-06, "loss": 1.074, "step": 407 }, { "epoch": 0.05820256776034237, "grad_norm": 0.5583359599113464, "learning_rate": 6.450079239302695e-06, "loss": 0.9808, "step": 408 }, { "epoch": 0.05834522111269615, "grad_norm": 0.4140510559082031, "learning_rate": 6.4659270998415225e-06, "loss": 1.1221, "step": 409 }, { "epoch": 0.05848787446504993, "grad_norm": 0.43604913353919983, "learning_rate": 6.481774960380349e-06, "loss": 1.0223, "step": 410 }, { "epoch": 0.05863052781740371, "grad_norm": 0.4573204815387726, "learning_rate": 6.497622820919176e-06, "loss": 0.9948, "step": 411 }, { "epoch": 0.05877318116975749, "grad_norm": 0.48934316635131836, "learning_rate": 6.513470681458004e-06, "loss": 1.0465, "step": 412 }, { "epoch": 0.05891583452211127, "grad_norm": 1.1828285455703735, "learning_rate": 6.5293185419968304e-06, "loss": 0.8108, "step": 413 }, { "epoch": 0.05905848787446505, "grad_norm": 0.39499032497406006, "learning_rate": 6.545166402535659e-06, "loss": 1.199, "step": 414 }, { "epoch": 0.05920114122681883, "grad_norm": 0.4509989321231842, "learning_rate": 6.561014263074486e-06, "loss": 1.0853, "step": 415 }, { "epoch": 0.05934379457917261, "grad_norm": 0.40844517946243286, "learning_rate": 6.5768621236133124e-06, "loss": 1.1005, "step": 416 }, { "epoch": 0.05948644793152639, "grad_norm": 0.3970955014228821, "learning_rate": 6.59270998415214e-06, "loss": 1.1695, "step": 417 }, { "epoch": 0.05962910128388017, "grad_norm": 0.5938369631767273, "learning_rate": 6.608557844690967e-06, "loss": 1.1761, "step": 418 }, { "epoch": 0.05977175463623395, "grad_norm": 0.4537716209888458, "learning_rate": 6.624405705229795e-06, "loss": 1.0383, "step": 419 }, { "epoch": 0.05991440798858773, "grad_norm": 0.46481889486312866, "learning_rate": 6.640253565768622e-06, "loss": 1.0384, "step": 420 }, { "epoch": 0.06005706134094151, "grad_norm": 0.4273732006549835, "learning_rate": 6.656101426307449e-06, "loss": 1.0673, "step": 421 }, { "epoch": 0.06019971469329529, "grad_norm": 0.4316958785057068, "learning_rate": 6.671949286846276e-06, "loss": 1.0772, "step": 422 }, { "epoch": 0.060342368045649075, "grad_norm": 0.44716697931289673, "learning_rate": 6.687797147385103e-06, "loss": 1.1134, "step": 423 }, { "epoch": 0.06048502139800285, "grad_norm": 0.49333369731903076, "learning_rate": 6.703645007923932e-06, "loss": 0.9782, "step": 424 }, { "epoch": 0.06062767475035663, "grad_norm": 0.531298041343689, "learning_rate": 6.719492868462758e-06, "loss": 0.9299, "step": 425 }, { "epoch": 0.060770328102710415, "grad_norm": 0.578718900680542, "learning_rate": 6.735340729001585e-06, "loss": 1.0049, "step": 426 }, { "epoch": 0.06091298145506419, "grad_norm": 0.5718407034873962, "learning_rate": 6.751188589540413e-06, "loss": 1.0456, "step": 427 }, { "epoch": 0.06105563480741798, "grad_norm": 0.40977999567985535, "learning_rate": 6.7670364500792396e-06, "loss": 1.0773, "step": 428 }, { "epoch": 0.061198288159771755, "grad_norm": 0.46405255794525146, "learning_rate": 6.782884310618067e-06, "loss": 1.0508, "step": 429 }, { "epoch": 0.06134094151212553, "grad_norm": 0.5183796286582947, "learning_rate": 6.798732171156894e-06, "loss": 1.0275, "step": 430 }, { "epoch": 0.06148359486447932, "grad_norm": 0.4515964090824127, "learning_rate": 6.8145800316957216e-06, "loss": 1.0331, "step": 431 }, { "epoch": 0.061626248216833095, "grad_norm": 0.503455638885498, "learning_rate": 6.830427892234549e-06, "loss": 1.1121, "step": 432 }, { "epoch": 0.06176890156918688, "grad_norm": 0.5511139035224915, "learning_rate": 6.846275752773376e-06, "loss": 0.9589, "step": 433 }, { "epoch": 0.06191155492154066, "grad_norm": 0.5188602209091187, "learning_rate": 6.8621236133122035e-06, "loss": 1.0141, "step": 434 }, { "epoch": 0.062054208273894434, "grad_norm": 0.46196311712265015, "learning_rate": 6.87797147385103e-06, "loss": 1.0483, "step": 435 }, { "epoch": 0.06219686162624822, "grad_norm": 0.5451324582099915, "learning_rate": 6.893819334389857e-06, "loss": 1.1733, "step": 436 }, { "epoch": 0.062339514978602, "grad_norm": 0.6649710536003113, "learning_rate": 6.9096671949286855e-06, "loss": 1.0655, "step": 437 }, { "epoch": 0.062482168330955774, "grad_norm": 0.6782967448234558, "learning_rate": 6.925515055467512e-06, "loss": 1.0939, "step": 438 }, { "epoch": 0.06262482168330956, "grad_norm": 0.5909539461135864, "learning_rate": 6.94136291600634e-06, "loss": 0.9001, "step": 439 }, { "epoch": 0.06276747503566334, "grad_norm": 0.6379039287567139, "learning_rate": 6.957210776545167e-06, "loss": 1.0132, "step": 440 }, { "epoch": 0.06291012838801711, "grad_norm": 0.5297428965568542, "learning_rate": 6.973058637083995e-06, "loss": 1.0505, "step": 441 }, { "epoch": 0.06305278174037089, "grad_norm": 0.529805600643158, "learning_rate": 6.988906497622822e-06, "loss": 0.9536, "step": 442 }, { "epoch": 0.06319543509272468, "grad_norm": 0.4905278980731964, "learning_rate": 7.004754358161649e-06, "loss": 1.025, "step": 443 }, { "epoch": 0.06333808844507846, "grad_norm": 0.45372721552848816, "learning_rate": 7.020602218700476e-06, "loss": 0.9676, "step": 444 }, { "epoch": 0.06348074179743224, "grad_norm": 0.43510833382606506, "learning_rate": 7.036450079239303e-06, "loss": 1.0614, "step": 445 }, { "epoch": 0.06362339514978602, "grad_norm": 0.4841681718826294, "learning_rate": 7.052297939778131e-06, "loss": 0.9938, "step": 446 }, { "epoch": 0.0637660485021398, "grad_norm": 0.5671361684799194, "learning_rate": 7.0681458003169574e-06, "loss": 1.0196, "step": 447 }, { "epoch": 0.06390870185449359, "grad_norm": 0.5770955085754395, "learning_rate": 7.083993660855785e-06, "loss": 0.9989, "step": 448 }, { "epoch": 0.06405135520684736, "grad_norm": 0.4592321217060089, "learning_rate": 7.099841521394613e-06, "loss": 0.9775, "step": 449 }, { "epoch": 0.06419400855920114, "grad_norm": 0.5942267775535583, "learning_rate": 7.1156893819334394e-06, "loss": 1.0381, "step": 450 }, { "epoch": 0.06433666191155492, "grad_norm": 0.531999409198761, "learning_rate": 7.131537242472267e-06, "loss": 0.9506, "step": 451 }, { "epoch": 0.0644793152639087, "grad_norm": 0.5397354960441589, "learning_rate": 7.147385103011094e-06, "loss": 0.9741, "step": 452 }, { "epoch": 0.06462196861626249, "grad_norm": 0.5519461035728455, "learning_rate": 7.163232963549921e-06, "loss": 1.0911, "step": 453 }, { "epoch": 0.06476462196861626, "grad_norm": 0.5628547668457031, "learning_rate": 7.179080824088749e-06, "loss": 1.169, "step": 454 }, { "epoch": 0.06490727532097004, "grad_norm": 0.44366130232810974, "learning_rate": 7.194928684627576e-06, "loss": 1.0363, "step": 455 }, { "epoch": 0.06504992867332382, "grad_norm": 0.48540574312210083, "learning_rate": 7.2107765451664034e-06, "loss": 0.9253, "step": 456 }, { "epoch": 0.0651925820256776, "grad_norm": 0.39855659008026123, "learning_rate": 7.22662440570523e-06, "loss": 1.0376, "step": 457 }, { "epoch": 0.06533523537803139, "grad_norm": 0.4191644489765167, "learning_rate": 7.242472266244057e-06, "loss": 0.9906, "step": 458 }, { "epoch": 0.06547788873038517, "grad_norm": 0.437701016664505, "learning_rate": 7.2583201267828854e-06, "loss": 1.0432, "step": 459 }, { "epoch": 0.06562054208273894, "grad_norm": 0.5492372512817383, "learning_rate": 7.274167987321712e-06, "loss": 1.0186, "step": 460 }, { "epoch": 0.06576319543509272, "grad_norm": 0.4746062755584717, "learning_rate": 7.29001584786054e-06, "loss": 1.0051, "step": 461 }, { "epoch": 0.0659058487874465, "grad_norm": 0.4426122009754181, "learning_rate": 7.305863708399367e-06, "loss": 0.9692, "step": 462 }, { "epoch": 0.06604850213980029, "grad_norm": 0.5334264039993286, "learning_rate": 7.321711568938193e-06, "loss": 1.0676, "step": 463 }, { "epoch": 0.06619115549215407, "grad_norm": 0.6350341439247131, "learning_rate": 7.337559429477021e-06, "loss": 1.0368, "step": 464 }, { "epoch": 0.06633380884450785, "grad_norm": 0.5225725769996643, "learning_rate": 7.3534072900158486e-06, "loss": 0.9821, "step": 465 }, { "epoch": 0.06647646219686162, "grad_norm": 0.4992254972457886, "learning_rate": 7.369255150554676e-06, "loss": 0.9978, "step": 466 }, { "epoch": 0.0666191155492154, "grad_norm": 0.4909706711769104, "learning_rate": 7.385103011093503e-06, "loss": 1.0252, "step": 467 }, { "epoch": 0.0667617689015692, "grad_norm": 0.48005351424217224, "learning_rate": 7.40095087163233e-06, "loss": 1.0211, "step": 468 }, { "epoch": 0.06690442225392297, "grad_norm": 0.40804851055145264, "learning_rate": 7.416798732171157e-06, "loss": 1.0521, "step": 469 }, { "epoch": 0.06704707560627675, "grad_norm": 0.4457440674304962, "learning_rate": 7.432646592709984e-06, "loss": 0.915, "step": 470 }, { "epoch": 0.06718972895863053, "grad_norm": 0.5630983114242554, "learning_rate": 7.4484944532488126e-06, "loss": 0.9437, "step": 471 }, { "epoch": 0.0673323823109843, "grad_norm": 0.4951111972332001, "learning_rate": 7.464342313787639e-06, "loss": 0.981, "step": 472 }, { "epoch": 0.06747503566333808, "grad_norm": 1.3637419939041138, "learning_rate": 7.480190174326466e-06, "loss": 0.8505, "step": 473 }, { "epoch": 0.06761768901569187, "grad_norm": 0.42957380414009094, "learning_rate": 7.496038034865294e-06, "loss": 1.0522, "step": 474 }, { "epoch": 0.06776034236804565, "grad_norm": 0.4950065016746521, "learning_rate": 7.5118858954041205e-06, "loss": 1.0665, "step": 475 }, { "epoch": 0.06790299572039943, "grad_norm": 0.4553673565387726, "learning_rate": 7.527733755942949e-06, "loss": 0.9747, "step": 476 }, { "epoch": 0.0680456490727532, "grad_norm": 0.6075873970985413, "learning_rate": 7.543581616481776e-06, "loss": 0.9686, "step": 477 }, { "epoch": 0.06818830242510698, "grad_norm": 0.4874385893344879, "learning_rate": 7.559429477020603e-06, "loss": 1.0179, "step": 478 }, { "epoch": 0.06833095577746078, "grad_norm": 0.44387388229370117, "learning_rate": 7.57527733755943e-06, "loss": 1.0731, "step": 479 }, { "epoch": 0.06847360912981455, "grad_norm": 0.5947974324226379, "learning_rate": 7.591125198098257e-06, "loss": 0.985, "step": 480 }, { "epoch": 0.06861626248216833, "grad_norm": 0.6631782650947571, "learning_rate": 7.606973058637085e-06, "loss": 1.0244, "step": 481 }, { "epoch": 0.06875891583452211, "grad_norm": 0.46486252546310425, "learning_rate": 7.622820919175912e-06, "loss": 0.9863, "step": 482 }, { "epoch": 0.06890156918687589, "grad_norm": 0.49008890986442566, "learning_rate": 7.638668779714739e-06, "loss": 0.9831, "step": 483 }, { "epoch": 0.06904422253922968, "grad_norm": 0.6009377837181091, "learning_rate": 7.654516640253566e-06, "loss": 0.9774, "step": 484 }, { "epoch": 0.06918687589158345, "grad_norm": 0.4419824182987213, "learning_rate": 7.670364500792394e-06, "loss": 0.9439, "step": 485 }, { "epoch": 0.06932952924393723, "grad_norm": 0.5547300577163696, "learning_rate": 7.686212361331222e-06, "loss": 0.9258, "step": 486 }, { "epoch": 0.06947218259629101, "grad_norm": 0.5227124094963074, "learning_rate": 7.702060221870048e-06, "loss": 1.0545, "step": 487 }, { "epoch": 0.06961483594864479, "grad_norm": 0.45763492584228516, "learning_rate": 7.717908082408875e-06, "loss": 0.952, "step": 488 }, { "epoch": 0.06975748930099858, "grad_norm": 0.563387393951416, "learning_rate": 7.733755942947703e-06, "loss": 0.9204, "step": 489 }, { "epoch": 0.06990014265335236, "grad_norm": 0.697047233581543, "learning_rate": 7.749603803486529e-06, "loss": 0.968, "step": 490 }, { "epoch": 0.07004279600570613, "grad_norm": 0.5165669322013855, "learning_rate": 7.765451664025358e-06, "loss": 1.0207, "step": 491 }, { "epoch": 0.07018544935805991, "grad_norm": 0.7232952117919922, "learning_rate": 7.781299524564184e-06, "loss": 0.78, "step": 492 }, { "epoch": 0.07032810271041369, "grad_norm": 0.5339188575744629, "learning_rate": 7.797147385103012e-06, "loss": 0.9616, "step": 493 }, { "epoch": 0.07047075606276748, "grad_norm": 0.4808948040008545, "learning_rate": 7.81299524564184e-06, "loss": 0.8511, "step": 494 }, { "epoch": 0.07061340941512126, "grad_norm": 0.5292257070541382, "learning_rate": 7.828843106180665e-06, "loss": 0.9998, "step": 495 }, { "epoch": 0.07075606276747504, "grad_norm": 0.5296969413757324, "learning_rate": 7.844690966719494e-06, "loss": 0.9505, "step": 496 }, { "epoch": 0.07089871611982881, "grad_norm": 0.6419568061828613, "learning_rate": 7.86053882725832e-06, "loss": 0.9637, "step": 497 }, { "epoch": 0.07104136947218259, "grad_norm": 0.900155782699585, "learning_rate": 7.876386687797148e-06, "loss": 0.771, "step": 498 }, { "epoch": 0.07118402282453638, "grad_norm": 0.48271769285202026, "learning_rate": 7.892234548335976e-06, "loss": 0.9403, "step": 499 }, { "epoch": 0.07132667617689016, "grad_norm": 0.5156475901603699, "learning_rate": 7.908082408874802e-06, "loss": 0.8973, "step": 500 }, { "epoch": 0.07146932952924394, "grad_norm": 0.5376582741737366, "learning_rate": 7.92393026941363e-06, "loss": 0.9655, "step": 501 }, { "epoch": 0.07161198288159772, "grad_norm": 0.4439854025840759, "learning_rate": 7.939778129952457e-06, "loss": 0.9807, "step": 502 }, { "epoch": 0.0717546362339515, "grad_norm": 0.537900984287262, "learning_rate": 7.955625990491284e-06, "loss": 1.0537, "step": 503 }, { "epoch": 0.07189728958630527, "grad_norm": 0.5043521523475647, "learning_rate": 7.971473851030112e-06, "loss": 0.9799, "step": 504 }, { "epoch": 0.07203994293865906, "grad_norm": 0.509040892124176, "learning_rate": 7.987321711568938e-06, "loss": 1.0014, "step": 505 }, { "epoch": 0.07218259629101284, "grad_norm": 0.5832608342170715, "learning_rate": 8.003169572107765e-06, "loss": 0.9699, "step": 506 }, { "epoch": 0.07232524964336662, "grad_norm": 0.5468351244926453, "learning_rate": 8.019017432646593e-06, "loss": 1.0328, "step": 507 }, { "epoch": 0.0724679029957204, "grad_norm": 0.6355152726173401, "learning_rate": 8.03486529318542e-06, "loss": 1.0518, "step": 508 }, { "epoch": 0.07261055634807417, "grad_norm": 0.4258562922477722, "learning_rate": 8.050713153724248e-06, "loss": 0.9437, "step": 509 }, { "epoch": 0.07275320970042796, "grad_norm": 0.514599084854126, "learning_rate": 8.066561014263076e-06, "loss": 1.0423, "step": 510 }, { "epoch": 0.07289586305278174, "grad_norm": 0.4540145993232727, "learning_rate": 8.082408874801902e-06, "loss": 0.9921, "step": 511 }, { "epoch": 0.07303851640513552, "grad_norm": 0.8550832867622375, "learning_rate": 8.09825673534073e-06, "loss": 0.8387, "step": 512 }, { "epoch": 0.0731811697574893, "grad_norm": 0.45189788937568665, "learning_rate": 8.114104595879557e-06, "loss": 1.0554, "step": 513 }, { "epoch": 0.07332382310984308, "grad_norm": 0.5321663022041321, "learning_rate": 8.129952456418385e-06, "loss": 0.8751, "step": 514 }, { "epoch": 0.07346647646219687, "grad_norm": 0.5127700567245483, "learning_rate": 8.145800316957212e-06, "loss": 0.972, "step": 515 }, { "epoch": 0.07360912981455064, "grad_norm": 1.4596971273422241, "learning_rate": 8.161648177496038e-06, "loss": 0.7484, "step": 516 }, { "epoch": 0.07375178316690442, "grad_norm": 0.5196806192398071, "learning_rate": 8.177496038034866e-06, "loss": 0.929, "step": 517 }, { "epoch": 0.0738944365192582, "grad_norm": 0.5517879724502563, "learning_rate": 8.193343898573693e-06, "loss": 0.8865, "step": 518 }, { "epoch": 0.07403708987161198, "grad_norm": 0.5994307994842529, "learning_rate": 8.209191759112521e-06, "loss": 0.8375, "step": 519 }, { "epoch": 0.07417974322396577, "grad_norm": 0.6686751246452332, "learning_rate": 8.225039619651349e-06, "loss": 0.9716, "step": 520 }, { "epoch": 0.07432239657631955, "grad_norm": 0.4750771224498749, "learning_rate": 8.240887480190175e-06, "loss": 0.9878, "step": 521 }, { "epoch": 0.07446504992867332, "grad_norm": 0.5097470283508301, "learning_rate": 8.256735340729002e-06, "loss": 1.0066, "step": 522 }, { "epoch": 0.0746077032810271, "grad_norm": 0.43969038128852844, "learning_rate": 8.27258320126783e-06, "loss": 0.9947, "step": 523 }, { "epoch": 0.07475035663338088, "grad_norm": 0.3874143064022064, "learning_rate": 8.288431061806656e-06, "loss": 1.0651, "step": 524 }, { "epoch": 0.07489300998573467, "grad_norm": 0.5227018594741821, "learning_rate": 8.304278922345485e-06, "loss": 0.9543, "step": 525 }, { "epoch": 0.07503566333808845, "grad_norm": 0.7560486197471619, "learning_rate": 8.320126782884311e-06, "loss": 1.0052, "step": 526 }, { "epoch": 0.07517831669044223, "grad_norm": 0.6004654765129089, "learning_rate": 8.335974643423139e-06, "loss": 1.0591, "step": 527 }, { "epoch": 0.075320970042796, "grad_norm": 0.4558890461921692, "learning_rate": 8.351822503961966e-06, "loss": 1.0117, "step": 528 }, { "epoch": 0.07546362339514978, "grad_norm": 0.5768234133720398, "learning_rate": 8.367670364500792e-06, "loss": 0.9102, "step": 529 }, { "epoch": 0.07560627674750357, "grad_norm": 0.5694892406463623, "learning_rate": 8.383518225039621e-06, "loss": 1.0243, "step": 530 }, { "epoch": 0.07574893009985735, "grad_norm": 0.49134907126426697, "learning_rate": 8.399366085578447e-06, "loss": 0.9375, "step": 531 }, { "epoch": 0.07589158345221113, "grad_norm": 0.5538228750228882, "learning_rate": 8.415213946117275e-06, "loss": 0.9334, "step": 532 }, { "epoch": 0.0760342368045649, "grad_norm": 0.48530998826026917, "learning_rate": 8.431061806656103e-06, "loss": 0.9813, "step": 533 }, { "epoch": 0.07617689015691868, "grad_norm": 0.4936850666999817, "learning_rate": 8.446909667194929e-06, "loss": 0.9924, "step": 534 }, { "epoch": 0.07631954350927246, "grad_norm": 0.43529391288757324, "learning_rate": 8.462757527733758e-06, "loss": 0.9429, "step": 535 }, { "epoch": 0.07646219686162625, "grad_norm": 0.48769012093544006, "learning_rate": 8.478605388272584e-06, "loss": 0.9897, "step": 536 }, { "epoch": 0.07660485021398003, "grad_norm": 0.5367408990859985, "learning_rate": 8.494453248811411e-06, "loss": 0.9396, "step": 537 }, { "epoch": 0.07674750356633381, "grad_norm": 0.5971816182136536, "learning_rate": 8.510301109350239e-06, "loss": 0.9168, "step": 538 }, { "epoch": 0.07689015691868759, "grad_norm": 0.7000044584274292, "learning_rate": 8.526148969889065e-06, "loss": 0.949, "step": 539 }, { "epoch": 0.07703281027104136, "grad_norm": 0.43918800354003906, "learning_rate": 8.541996830427893e-06, "loss": 1.0051, "step": 540 }, { "epoch": 0.07717546362339515, "grad_norm": 0.6692291498184204, "learning_rate": 8.55784469096672e-06, "loss": 0.9125, "step": 541 }, { "epoch": 0.07731811697574893, "grad_norm": 0.6336300373077393, "learning_rate": 8.573692551505548e-06, "loss": 1.036, "step": 542 }, { "epoch": 0.07746077032810271, "grad_norm": 0.47780126333236694, "learning_rate": 8.589540412044375e-06, "loss": 0.9203, "step": 543 }, { "epoch": 0.07760342368045649, "grad_norm": 0.5549113154411316, "learning_rate": 8.605388272583201e-06, "loss": 1.0006, "step": 544 }, { "epoch": 0.07774607703281027, "grad_norm": 0.6125926971435547, "learning_rate": 8.621236133122029e-06, "loss": 0.9076, "step": 545 }, { "epoch": 0.07788873038516406, "grad_norm": 0.5332480669021606, "learning_rate": 8.637083993660857e-06, "loss": 0.8981, "step": 546 }, { "epoch": 0.07803138373751783, "grad_norm": 0.5666670203208923, "learning_rate": 8.652931854199684e-06, "loss": 0.9091, "step": 547 }, { "epoch": 0.07817403708987161, "grad_norm": 0.5022792816162109, "learning_rate": 8.668779714738512e-06, "loss": 1.0507, "step": 548 }, { "epoch": 0.07831669044222539, "grad_norm": 0.5982717275619507, "learning_rate": 8.684627575277338e-06, "loss": 0.9029, "step": 549 }, { "epoch": 0.07845934379457917, "grad_norm": 0.6098309755325317, "learning_rate": 8.700475435816165e-06, "loss": 0.9194, "step": 550 }, { "epoch": 0.07860199714693296, "grad_norm": 0.5017668008804321, "learning_rate": 8.716323296354993e-06, "loss": 0.9662, "step": 551 }, { "epoch": 0.07874465049928674, "grad_norm": 0.5837364792823792, "learning_rate": 8.73217115689382e-06, "loss": 0.9717, "step": 552 }, { "epoch": 0.07888730385164051, "grad_norm": 0.4677683115005493, "learning_rate": 8.748019017432648e-06, "loss": 1.0237, "step": 553 }, { "epoch": 0.07902995720399429, "grad_norm": 0.502824604511261, "learning_rate": 8.763866877971474e-06, "loss": 0.9298, "step": 554 }, { "epoch": 0.07917261055634807, "grad_norm": 0.48983949422836304, "learning_rate": 8.779714738510302e-06, "loss": 1.0045, "step": 555 }, { "epoch": 0.07931526390870186, "grad_norm": 0.500971257686615, "learning_rate": 8.79556259904913e-06, "loss": 1.0793, "step": 556 }, { "epoch": 0.07945791726105564, "grad_norm": 0.7364229559898376, "learning_rate": 8.811410459587957e-06, "loss": 0.9338, "step": 557 }, { "epoch": 0.07960057061340942, "grad_norm": 0.5840064287185669, "learning_rate": 8.827258320126783e-06, "loss": 0.9634, "step": 558 }, { "epoch": 0.0797432239657632, "grad_norm": 0.6440963745117188, "learning_rate": 8.84310618066561e-06, "loss": 0.9646, "step": 559 }, { "epoch": 0.07988587731811697, "grad_norm": 0.4333682060241699, "learning_rate": 8.858954041204438e-06, "loss": 0.9591, "step": 560 }, { "epoch": 0.08002853067047076, "grad_norm": 0.7911944389343262, "learning_rate": 8.874801901743266e-06, "loss": 0.9921, "step": 561 }, { "epoch": 0.08017118402282454, "grad_norm": 0.5133646130561829, "learning_rate": 8.890649762282093e-06, "loss": 0.991, "step": 562 }, { "epoch": 0.08031383737517832, "grad_norm": 0.6080657243728638, "learning_rate": 8.90649762282092e-06, "loss": 0.9691, "step": 563 }, { "epoch": 0.0804564907275321, "grad_norm": 0.5872765183448792, "learning_rate": 8.922345483359747e-06, "loss": 0.957, "step": 564 }, { "epoch": 0.08059914407988587, "grad_norm": 0.5557759404182434, "learning_rate": 8.938193343898574e-06, "loss": 0.962, "step": 565 }, { "epoch": 0.08074179743223965, "grad_norm": 0.501661479473114, "learning_rate": 8.954041204437402e-06, "loss": 0.9534, "step": 566 }, { "epoch": 0.08088445078459344, "grad_norm": 0.6546953320503235, "learning_rate": 8.96988906497623e-06, "loss": 0.9015, "step": 567 }, { "epoch": 0.08102710413694722, "grad_norm": 0.601220428943634, "learning_rate": 8.985736925515056e-06, "loss": 0.8817, "step": 568 }, { "epoch": 0.081169757489301, "grad_norm": 0.5301333665847778, "learning_rate": 9.001584786053883e-06, "loss": 0.9456, "step": 569 }, { "epoch": 0.08131241084165478, "grad_norm": 0.457639217376709, "learning_rate": 9.01743264659271e-06, "loss": 0.9902, "step": 570 }, { "epoch": 0.08145506419400855, "grad_norm": 0.5956770777702332, "learning_rate": 9.033280507131538e-06, "loss": 0.9666, "step": 571 }, { "epoch": 0.08159771754636234, "grad_norm": 0.6892085671424866, "learning_rate": 9.049128367670366e-06, "loss": 0.9116, "step": 572 }, { "epoch": 0.08174037089871612, "grad_norm": 0.6338330507278442, "learning_rate": 9.064976228209192e-06, "loss": 0.9662, "step": 573 }, { "epoch": 0.0818830242510699, "grad_norm": 0.5096848011016846, "learning_rate": 9.08082408874802e-06, "loss": 1.0214, "step": 574 }, { "epoch": 0.08202567760342368, "grad_norm": 0.6919476985931396, "learning_rate": 9.096671949286847e-06, "loss": 0.9159, "step": 575 }, { "epoch": 0.08216833095577745, "grad_norm": 0.4773543179035187, "learning_rate": 9.112519809825675e-06, "loss": 0.9714, "step": 576 }, { "epoch": 0.08231098430813125, "grad_norm": 0.6229044198989868, "learning_rate": 9.128367670364502e-06, "loss": 0.9271, "step": 577 }, { "epoch": 0.08245363766048502, "grad_norm": 0.7057109475135803, "learning_rate": 9.144215530903328e-06, "loss": 0.9824, "step": 578 }, { "epoch": 0.0825962910128388, "grad_norm": 0.6915001273155212, "learning_rate": 9.160063391442156e-06, "loss": 0.8592, "step": 579 }, { "epoch": 0.08273894436519258, "grad_norm": 0.5569952130317688, "learning_rate": 9.175911251980984e-06, "loss": 0.9599, "step": 580 }, { "epoch": 0.08288159771754636, "grad_norm": 0.5352381467819214, "learning_rate": 9.19175911251981e-06, "loss": 0.879, "step": 581 }, { "epoch": 0.08302425106990015, "grad_norm": 0.43738898634910583, "learning_rate": 9.207606973058639e-06, "loss": 0.9443, "step": 582 }, { "epoch": 0.08316690442225393, "grad_norm": 1.4304313659667969, "learning_rate": 9.223454833597465e-06, "loss": 0.7674, "step": 583 }, { "epoch": 0.0833095577746077, "grad_norm": 0.5617502331733704, "learning_rate": 9.239302694136292e-06, "loss": 0.875, "step": 584 }, { "epoch": 0.08345221112696148, "grad_norm": 0.6359624266624451, "learning_rate": 9.25515055467512e-06, "loss": 1.0391, "step": 585 }, { "epoch": 0.08359486447931526, "grad_norm": 0.5497215390205383, "learning_rate": 9.270998415213946e-06, "loss": 0.9309, "step": 586 }, { "epoch": 0.08373751783166905, "grad_norm": 0.5523216724395752, "learning_rate": 9.286846275752775e-06, "loss": 0.9156, "step": 587 }, { "epoch": 0.08388017118402283, "grad_norm": 0.5747171640396118, "learning_rate": 9.302694136291601e-06, "loss": 0.8624, "step": 588 }, { "epoch": 0.0840228245363766, "grad_norm": 0.7639681100845337, "learning_rate": 9.318541996830429e-06, "loss": 0.9966, "step": 589 }, { "epoch": 0.08416547788873038, "grad_norm": 0.4723168611526489, "learning_rate": 9.334389857369256e-06, "loss": 1.0159, "step": 590 }, { "epoch": 0.08430813124108416, "grad_norm": 0.46684974431991577, "learning_rate": 9.350237717908082e-06, "loss": 1.0271, "step": 591 }, { "epoch": 0.08445078459343795, "grad_norm": 0.7794578075408936, "learning_rate": 9.366085578446912e-06, "loss": 0.9784, "step": 592 }, { "epoch": 0.08459343794579173, "grad_norm": 0.6551257371902466, "learning_rate": 9.381933438985737e-06, "loss": 1.0404, "step": 593 }, { "epoch": 0.08473609129814551, "grad_norm": 0.41763991117477417, "learning_rate": 9.397781299524565e-06, "loss": 0.9695, "step": 594 }, { "epoch": 0.08487874465049929, "grad_norm": 0.6196808815002441, "learning_rate": 9.413629160063393e-06, "loss": 0.9565, "step": 595 }, { "epoch": 0.08502139800285306, "grad_norm": 0.7163625955581665, "learning_rate": 9.429477020602219e-06, "loss": 0.9781, "step": 596 }, { "epoch": 0.08516405135520684, "grad_norm": 0.5696227550506592, "learning_rate": 9.445324881141046e-06, "loss": 1.0412, "step": 597 }, { "epoch": 0.08530670470756063, "grad_norm": 0.5615867376327515, "learning_rate": 9.461172741679874e-06, "loss": 1.0089, "step": 598 }, { "epoch": 0.08544935805991441, "grad_norm": 0.547003984451294, "learning_rate": 9.477020602218701e-06, "loss": 0.9197, "step": 599 }, { "epoch": 0.08559201141226819, "grad_norm": 0.5029258131980896, "learning_rate": 9.492868462757529e-06, "loss": 1.0293, "step": 600 }, { "epoch": 0.08573466476462197, "grad_norm": 0.5370084047317505, "learning_rate": 9.508716323296355e-06, "loss": 0.9866, "step": 601 }, { "epoch": 0.08587731811697574, "grad_norm": 0.4286113381385803, "learning_rate": 9.524564183835183e-06, "loss": 0.9768, "step": 602 }, { "epoch": 0.08601997146932953, "grad_norm": 0.6612531542778015, "learning_rate": 9.54041204437401e-06, "loss": 0.8579, "step": 603 }, { "epoch": 0.08616262482168331, "grad_norm": 0.5443513989448547, "learning_rate": 9.556259904912838e-06, "loss": 0.9375, "step": 604 }, { "epoch": 0.08630527817403709, "grad_norm": 0.5907576680183411, "learning_rate": 9.572107765451665e-06, "loss": 0.9235, "step": 605 }, { "epoch": 0.08644793152639087, "grad_norm": 0.5967205762863159, "learning_rate": 9.587955625990491e-06, "loss": 0.9973, "step": 606 }, { "epoch": 0.08659058487874464, "grad_norm": 0.8942917585372925, "learning_rate": 9.603803486529319e-06, "loss": 0.8579, "step": 607 }, { "epoch": 0.08673323823109844, "grad_norm": 0.6428115367889404, "learning_rate": 9.619651347068147e-06, "loss": 0.9871, "step": 608 }, { "epoch": 0.08687589158345221, "grad_norm": 0.4508056342601776, "learning_rate": 9.635499207606974e-06, "loss": 0.9676, "step": 609 }, { "epoch": 0.08701854493580599, "grad_norm": 0.7451004981994629, "learning_rate": 9.651347068145802e-06, "loss": 0.9257, "step": 610 }, { "epoch": 0.08716119828815977, "grad_norm": 0.6123510003089905, "learning_rate": 9.667194928684628e-06, "loss": 0.9135, "step": 611 }, { "epoch": 0.08730385164051355, "grad_norm": 0.47386524081230164, "learning_rate": 9.683042789223455e-06, "loss": 0.9185, "step": 612 }, { "epoch": 0.08744650499286734, "grad_norm": 0.6601650714874268, "learning_rate": 9.698890649762283e-06, "loss": 0.9124, "step": 613 }, { "epoch": 0.08758915834522112, "grad_norm": 0.5682607293128967, "learning_rate": 9.71473851030111e-06, "loss": 0.9254, "step": 614 }, { "epoch": 0.0877318116975749, "grad_norm": 0.5714483261108398, "learning_rate": 9.730586370839936e-06, "loss": 0.9514, "step": 615 }, { "epoch": 0.08787446504992867, "grad_norm": 0.7924468517303467, "learning_rate": 9.746434231378766e-06, "loss": 0.887, "step": 616 }, { "epoch": 0.08801711840228245, "grad_norm": 0.7420808672904968, "learning_rate": 9.762282091917592e-06, "loss": 0.9195, "step": 617 }, { "epoch": 0.08815977175463624, "grad_norm": 0.6279465556144714, "learning_rate": 9.77812995245642e-06, "loss": 0.916, "step": 618 }, { "epoch": 0.08830242510699002, "grad_norm": 0.5048345923423767, "learning_rate": 9.793977812995247e-06, "loss": 0.9049, "step": 619 }, { "epoch": 0.0884450784593438, "grad_norm": 0.5561876893043518, "learning_rate": 9.809825673534073e-06, "loss": 0.8861, "step": 620 }, { "epoch": 0.08858773181169757, "grad_norm": 0.598447859287262, "learning_rate": 9.825673534072902e-06, "loss": 0.9366, "step": 621 }, { "epoch": 0.08873038516405135, "grad_norm": 0.6633095741271973, "learning_rate": 9.841521394611728e-06, "loss": 0.887, "step": 622 }, { "epoch": 0.08887303851640514, "grad_norm": 0.7378283739089966, "learning_rate": 9.857369255150556e-06, "loss": 0.8606, "step": 623 }, { "epoch": 0.08901569186875892, "grad_norm": 0.600425660610199, "learning_rate": 9.873217115689383e-06, "loss": 0.8809, "step": 624 }, { "epoch": 0.0891583452211127, "grad_norm": 0.6276618838310242, "learning_rate": 9.88906497622821e-06, "loss": 0.8641, "step": 625 }, { "epoch": 0.08930099857346648, "grad_norm": 0.6094858646392822, "learning_rate": 9.904912836767039e-06, "loss": 0.9641, "step": 626 }, { "epoch": 0.08944365192582025, "grad_norm": 0.5553627610206604, "learning_rate": 9.920760697305864e-06, "loss": 0.8811, "step": 627 }, { "epoch": 0.08958630527817403, "grad_norm": 0.5599763989448547, "learning_rate": 9.936608557844692e-06, "loss": 0.9504, "step": 628 }, { "epoch": 0.08972895863052782, "grad_norm": 0.507016658782959, "learning_rate": 9.95245641838352e-06, "loss": 0.9171, "step": 629 }, { "epoch": 0.0898716119828816, "grad_norm": 0.74651700258255, "learning_rate": 9.968304278922346e-06, "loss": 0.9215, "step": 630 }, { "epoch": 0.09001426533523538, "grad_norm": 0.6789948344230652, "learning_rate": 9.984152139461173e-06, "loss": 0.9879, "step": 631 }, { "epoch": 0.09015691868758915, "grad_norm": 0.6133556962013245, "learning_rate": 1e-05, "loss": 0.9044, "step": 632 }, { "epoch": 0.09029957203994293, "grad_norm": 0.5088140964508057, "learning_rate": 9.999999940704466e-06, "loss": 0.9207, "step": 633 }, { "epoch": 0.09044222539229672, "grad_norm": 0.46542754769325256, "learning_rate": 9.999999762817858e-06, "loss": 1.0222, "step": 634 }, { "epoch": 0.0905848787446505, "grad_norm": 0.7682946920394897, "learning_rate": 9.999999466340187e-06, "loss": 0.8783, "step": 635 }, { "epoch": 0.09072753209700428, "grad_norm": 0.5734198689460754, "learning_rate": 9.999999051271458e-06, "loss": 0.9113, "step": 636 }, { "epoch": 0.09087018544935806, "grad_norm": 0.47362226247787476, "learning_rate": 9.999998517611677e-06, "loss": 0.9572, "step": 637 }, { "epoch": 0.09101283880171183, "grad_norm": 0.6190581321716309, "learning_rate": 9.999997865360862e-06, "loss": 0.7634, "step": 638 }, { "epoch": 0.09115549215406563, "grad_norm": 0.5904908180236816, "learning_rate": 9.999997094519025e-06, "loss": 0.8617, "step": 639 }, { "epoch": 0.0912981455064194, "grad_norm": 0.6041175723075867, "learning_rate": 9.999996205086187e-06, "loss": 0.9171, "step": 640 }, { "epoch": 0.09144079885877318, "grad_norm": 0.575161337852478, "learning_rate": 9.999995197062365e-06, "loss": 1.0264, "step": 641 }, { "epoch": 0.09158345221112696, "grad_norm": 0.5917263627052307, "learning_rate": 9.999994070447587e-06, "loss": 0.9185, "step": 642 }, { "epoch": 0.09172610556348074, "grad_norm": 0.5231802463531494, "learning_rate": 9.999992825241878e-06, "loss": 0.8656, "step": 643 }, { "epoch": 0.09186875891583453, "grad_norm": 0.6748559474945068, "learning_rate": 9.999991461445269e-06, "loss": 0.8496, "step": 644 }, { "epoch": 0.0920114122681883, "grad_norm": 0.5157588720321655, "learning_rate": 9.99998997905779e-06, "loss": 0.9527, "step": 645 }, { "epoch": 0.09215406562054208, "grad_norm": 0.7597405910491943, "learning_rate": 9.999988378079475e-06, "loss": 0.9636, "step": 646 }, { "epoch": 0.09229671897289586, "grad_norm": 0.6512150168418884, "learning_rate": 9.999986658510367e-06, "loss": 0.9696, "step": 647 }, { "epoch": 0.09243937232524964, "grad_norm": 0.5848551392555237, "learning_rate": 9.999984820350503e-06, "loss": 0.963, "step": 648 }, { "epoch": 0.09258202567760343, "grad_norm": 0.6997355818748474, "learning_rate": 9.999982863599927e-06, "loss": 0.9536, "step": 649 }, { "epoch": 0.09272467902995721, "grad_norm": 0.5355712175369263, "learning_rate": 9.999980788258687e-06, "loss": 0.8946, "step": 650 }, { "epoch": 0.09286733238231099, "grad_norm": 0.9124418497085571, "learning_rate": 9.999978594326831e-06, "loss": 0.7364, "step": 651 }, { "epoch": 0.09300998573466476, "grad_norm": 0.6042007803916931, "learning_rate": 9.999976281804411e-06, "loss": 0.8896, "step": 652 }, { "epoch": 0.09315263908701854, "grad_norm": 0.6304923295974731, "learning_rate": 9.999973850691483e-06, "loss": 0.8782, "step": 653 }, { "epoch": 0.09329529243937233, "grad_norm": 0.8566015362739563, "learning_rate": 9.9999713009881e-06, "loss": 0.8402, "step": 654 }, { "epoch": 0.09343794579172611, "grad_norm": 0.6642569303512573, "learning_rate": 9.99996863269433e-06, "loss": 0.9656, "step": 655 }, { "epoch": 0.09358059914407989, "grad_norm": 0.5763037800788879, "learning_rate": 9.999965845810232e-06, "loss": 0.9491, "step": 656 }, { "epoch": 0.09372325249643366, "grad_norm": 0.7415080666542053, "learning_rate": 9.999962940335873e-06, "loss": 0.9594, "step": 657 }, { "epoch": 0.09386590584878744, "grad_norm": 0.5235511064529419, "learning_rate": 9.99995991627132e-06, "loss": 0.9114, "step": 658 }, { "epoch": 0.09400855920114122, "grad_norm": 0.7261329293251038, "learning_rate": 9.999956773616647e-06, "loss": 0.9594, "step": 659 }, { "epoch": 0.09415121255349501, "grad_norm": 0.6547417044639587, "learning_rate": 9.999953512371928e-06, "loss": 0.8819, "step": 660 }, { "epoch": 0.09429386590584879, "grad_norm": 0.7186365723609924, "learning_rate": 9.99995013253724e-06, "loss": 0.9065, "step": 661 }, { "epoch": 0.09443651925820257, "grad_norm": 0.6450744867324829, "learning_rate": 9.999946634112662e-06, "loss": 0.8491, "step": 662 }, { "epoch": 0.09457917261055634, "grad_norm": 0.5700588226318359, "learning_rate": 9.99994301709828e-06, "loss": 0.8689, "step": 663 }, { "epoch": 0.09472182596291012, "grad_norm": 0.6099893450737, "learning_rate": 9.999939281494177e-06, "loss": 0.8701, "step": 664 }, { "epoch": 0.09486447931526391, "grad_norm": 0.4662160575389862, "learning_rate": 9.999935427300444e-06, "loss": 0.9128, "step": 665 }, { "epoch": 0.09500713266761769, "grad_norm": 0.7023544907569885, "learning_rate": 9.999931454517168e-06, "loss": 0.882, "step": 666 }, { "epoch": 0.09514978601997147, "grad_norm": 0.5410977005958557, "learning_rate": 9.99992736314445e-06, "loss": 0.9134, "step": 667 }, { "epoch": 0.09529243937232525, "grad_norm": 0.7053019404411316, "learning_rate": 9.999923153182382e-06, "loss": 0.8521, "step": 668 }, { "epoch": 0.09543509272467902, "grad_norm": 0.4959987998008728, "learning_rate": 9.999918824631065e-06, "loss": 0.9275, "step": 669 }, { "epoch": 0.09557774607703282, "grad_norm": 0.6551489233970642, "learning_rate": 9.999914377490601e-06, "loss": 0.9987, "step": 670 }, { "epoch": 0.0957203994293866, "grad_norm": 0.43556663393974304, "learning_rate": 9.999909811761098e-06, "loss": 1.0103, "step": 671 }, { "epoch": 0.09586305278174037, "grad_norm": 0.5840201377868652, "learning_rate": 9.999905127442662e-06, "loss": 0.927, "step": 672 }, { "epoch": 0.09600570613409415, "grad_norm": 0.6692790985107422, "learning_rate": 9.999900324535404e-06, "loss": 0.9702, "step": 673 }, { "epoch": 0.09614835948644793, "grad_norm": 0.7664057016372681, "learning_rate": 9.999895403039439e-06, "loss": 0.923, "step": 674 }, { "epoch": 0.09629101283880172, "grad_norm": 0.5922096371650696, "learning_rate": 9.999890362954883e-06, "loss": 0.9974, "step": 675 }, { "epoch": 0.0964336661911555, "grad_norm": 0.6685681939125061, "learning_rate": 9.999885204281858e-06, "loss": 0.8513, "step": 676 }, { "epoch": 0.09657631954350927, "grad_norm": 0.7253565192222595, "learning_rate": 9.999879927020482e-06, "loss": 0.7313, "step": 677 }, { "epoch": 0.09671897289586305, "grad_norm": 0.48530927300453186, "learning_rate": 9.999874531170883e-06, "loss": 0.8826, "step": 678 }, { "epoch": 0.09686162624821683, "grad_norm": 0.6055790781974792, "learning_rate": 9.99986901673319e-06, "loss": 0.9327, "step": 679 }, { "epoch": 0.09700427960057062, "grad_norm": 0.6805019974708557, "learning_rate": 9.99986338370753e-06, "loss": 0.9407, "step": 680 }, { "epoch": 0.0971469329529244, "grad_norm": 0.5405174493789673, "learning_rate": 9.999857632094039e-06, "loss": 0.9735, "step": 681 }, { "epoch": 0.09728958630527818, "grad_norm": 0.6433508992195129, "learning_rate": 9.999851761892854e-06, "loss": 0.955, "step": 682 }, { "epoch": 0.09743223965763195, "grad_norm": 0.6980995535850525, "learning_rate": 9.999845773104113e-06, "loss": 0.7192, "step": 683 }, { "epoch": 0.09757489300998573, "grad_norm": 0.5696197152137756, "learning_rate": 9.99983966572796e-06, "loss": 0.917, "step": 684 }, { "epoch": 0.09771754636233952, "grad_norm": 0.7470669150352478, "learning_rate": 9.999833439764536e-06, "loss": 0.9455, "step": 685 }, { "epoch": 0.0978601997146933, "grad_norm": 0.6613849997520447, "learning_rate": 9.999827095213994e-06, "loss": 0.9069, "step": 686 }, { "epoch": 0.09800285306704708, "grad_norm": 0.7304914593696594, "learning_rate": 9.999820632076478e-06, "loss": 0.8804, "step": 687 }, { "epoch": 0.09814550641940085, "grad_norm": 0.47209638357162476, "learning_rate": 9.999814050352148e-06, "loss": 0.991, "step": 688 }, { "epoch": 0.09828815977175463, "grad_norm": 0.5041055083274841, "learning_rate": 9.999807350041156e-06, "loss": 0.9164, "step": 689 }, { "epoch": 0.09843081312410841, "grad_norm": 0.7846484780311584, "learning_rate": 9.99980053114366e-06, "loss": 0.8503, "step": 690 }, { "epoch": 0.0985734664764622, "grad_norm": 0.5664672255516052, "learning_rate": 9.999793593659825e-06, "loss": 0.9555, "step": 691 }, { "epoch": 0.09871611982881598, "grad_norm": 0.5747510194778442, "learning_rate": 9.999786537589815e-06, "loss": 0.892, "step": 692 }, { "epoch": 0.09885877318116976, "grad_norm": 0.7297970056533813, "learning_rate": 9.999779362933795e-06, "loss": 0.896, "step": 693 }, { "epoch": 0.09900142653352353, "grad_norm": 0.7442668676376343, "learning_rate": 9.999772069691938e-06, "loss": 0.8587, "step": 694 }, { "epoch": 0.09914407988587731, "grad_norm": 0.623637318611145, "learning_rate": 9.999764657864415e-06, "loss": 0.9335, "step": 695 }, { "epoch": 0.0992867332382311, "grad_norm": 0.7037668824195862, "learning_rate": 9.999757127451402e-06, "loss": 0.8621, "step": 696 }, { "epoch": 0.09942938659058488, "grad_norm": 0.5125731825828552, "learning_rate": 9.999749478453078e-06, "loss": 0.9325, "step": 697 }, { "epoch": 0.09957203994293866, "grad_norm": 0.8168233036994934, "learning_rate": 9.999741710869623e-06, "loss": 0.9487, "step": 698 }, { "epoch": 0.09971469329529244, "grad_norm": 0.6448724865913391, "learning_rate": 9.999733824701226e-06, "loss": 0.8703, "step": 699 }, { "epoch": 0.09985734664764621, "grad_norm": 0.7140878438949585, "learning_rate": 9.999725819948068e-06, "loss": 0.8607, "step": 700 }, { "epoch": 0.1, "grad_norm": 0.6680331230163574, "learning_rate": 9.999717696610343e-06, "loss": 0.9373, "step": 701 }, { "epoch": 0.10014265335235378, "grad_norm": 0.642981767654419, "learning_rate": 9.999709454688243e-06, "loss": 0.9686, "step": 702 }, { "epoch": 0.10028530670470756, "grad_norm": 0.7402138710021973, "learning_rate": 9.999701094181962e-06, "loss": 0.8575, "step": 703 }, { "epoch": 0.10042796005706134, "grad_norm": 0.6488072872161865, "learning_rate": 9.999692615091699e-06, "loss": 0.892, "step": 704 }, { "epoch": 0.10057061340941512, "grad_norm": 0.5455636978149414, "learning_rate": 9.999684017417653e-06, "loss": 0.8922, "step": 705 }, { "epoch": 0.10071326676176891, "grad_norm": 0.6167206764221191, "learning_rate": 9.999675301160032e-06, "loss": 0.9615, "step": 706 }, { "epoch": 0.10085592011412269, "grad_norm": 0.6116539239883423, "learning_rate": 9.999666466319041e-06, "loss": 0.9432, "step": 707 }, { "epoch": 0.10099857346647646, "grad_norm": 0.6285659074783325, "learning_rate": 9.99965751289489e-06, "loss": 0.8619, "step": 708 }, { "epoch": 0.10114122681883024, "grad_norm": 0.664391815662384, "learning_rate": 9.999648440887789e-06, "loss": 0.8443, "step": 709 }, { "epoch": 0.10128388017118402, "grad_norm": 0.7672937512397766, "learning_rate": 9.999639250297956e-06, "loss": 0.9184, "step": 710 }, { "epoch": 0.10142653352353781, "grad_norm": 0.8006978034973145, "learning_rate": 9.999629941125608e-06, "loss": 0.8876, "step": 711 }, { "epoch": 0.10156918687589159, "grad_norm": 0.6475245356559753, "learning_rate": 9.999620513370965e-06, "loss": 0.9251, "step": 712 }, { "epoch": 0.10171184022824536, "grad_norm": 0.720442533493042, "learning_rate": 9.999610967034252e-06, "loss": 0.9017, "step": 713 }, { "epoch": 0.10185449358059914, "grad_norm": 0.6661159992218018, "learning_rate": 9.999601302115693e-06, "loss": 0.8709, "step": 714 }, { "epoch": 0.10199714693295292, "grad_norm": 0.6637430191040039, "learning_rate": 9.999591518615521e-06, "loss": 0.9481, "step": 715 }, { "epoch": 0.10213980028530671, "grad_norm": 0.5911180377006531, "learning_rate": 9.999581616533964e-06, "loss": 0.9107, "step": 716 }, { "epoch": 0.10228245363766049, "grad_norm": 0.6676122546195984, "learning_rate": 9.99957159587126e-06, "loss": 0.9506, "step": 717 }, { "epoch": 0.10242510699001427, "grad_norm": 0.4627430737018585, "learning_rate": 9.999561456627645e-06, "loss": 0.9372, "step": 718 }, { "epoch": 0.10256776034236804, "grad_norm": 0.7318198680877686, "learning_rate": 9.999551198803362e-06, "loss": 0.8507, "step": 719 }, { "epoch": 0.10271041369472182, "grad_norm": 0.8023821115493774, "learning_rate": 9.99954082239865e-06, "loss": 0.8392, "step": 720 }, { "epoch": 0.1028530670470756, "grad_norm": 0.6835203766822815, "learning_rate": 9.999530327413757e-06, "loss": 0.8794, "step": 721 }, { "epoch": 0.10299572039942939, "grad_norm": 0.47203439474105835, "learning_rate": 9.999519713848934e-06, "loss": 1.0079, "step": 722 }, { "epoch": 0.10313837375178317, "grad_norm": 0.5977882742881775, "learning_rate": 9.999508981704432e-06, "loss": 0.8756, "step": 723 }, { "epoch": 0.10328102710413695, "grad_norm": 0.6931073069572449, "learning_rate": 9.999498130980502e-06, "loss": 0.7312, "step": 724 }, { "epoch": 0.10342368045649072, "grad_norm": 0.7802338004112244, "learning_rate": 9.999487161677404e-06, "loss": 0.8725, "step": 725 }, { "epoch": 0.1035663338088445, "grad_norm": 0.9021378755569458, "learning_rate": 9.9994760737954e-06, "loss": 0.8778, "step": 726 }, { "epoch": 0.1037089871611983, "grad_norm": 0.4828585982322693, "learning_rate": 9.999464867334751e-06, "loss": 0.8971, "step": 727 }, { "epoch": 0.10385164051355207, "grad_norm": 0.4677823781967163, "learning_rate": 9.99945354229572e-06, "loss": 0.9673, "step": 728 }, { "epoch": 0.10399429386590585, "grad_norm": 0.4684864580631256, "learning_rate": 9.999442098678582e-06, "loss": 0.9235, "step": 729 }, { "epoch": 0.10413694721825963, "grad_norm": 0.7469426989555359, "learning_rate": 9.999430536483603e-06, "loss": 0.9443, "step": 730 }, { "epoch": 0.1042796005706134, "grad_norm": 0.6579768061637878, "learning_rate": 9.999418855711061e-06, "loss": 0.9426, "step": 731 }, { "epoch": 0.1044222539229672, "grad_norm": 0.5234336853027344, "learning_rate": 9.99940705636123e-06, "loss": 0.9752, "step": 732 }, { "epoch": 0.10456490727532097, "grad_norm": 0.8590401411056519, "learning_rate": 9.999395138434392e-06, "loss": 0.8162, "step": 733 }, { "epoch": 0.10470756062767475, "grad_norm": 0.8299201130867004, "learning_rate": 9.999383101930826e-06, "loss": 0.8756, "step": 734 }, { "epoch": 0.10485021398002853, "grad_norm": 0.7054989337921143, "learning_rate": 9.999370946850824e-06, "loss": 0.964, "step": 735 }, { "epoch": 0.1049928673323823, "grad_norm": 0.8901962637901306, "learning_rate": 9.999358673194667e-06, "loss": 0.8405, "step": 736 }, { "epoch": 0.1051355206847361, "grad_norm": 0.8690089583396912, "learning_rate": 9.999346280962652e-06, "loss": 0.8672, "step": 737 }, { "epoch": 0.10527817403708987, "grad_norm": 0.7922226190567017, "learning_rate": 9.999333770155071e-06, "loss": 0.8528, "step": 738 }, { "epoch": 0.10542082738944365, "grad_norm": 0.5827857255935669, "learning_rate": 9.999321140772217e-06, "loss": 0.9091, "step": 739 }, { "epoch": 0.10556348074179743, "grad_norm": 0.924829363822937, "learning_rate": 9.999308392814397e-06, "loss": 0.9159, "step": 740 }, { "epoch": 0.10570613409415121, "grad_norm": 0.6799506545066833, "learning_rate": 9.999295526281906e-06, "loss": 0.9056, "step": 741 }, { "epoch": 0.105848787446505, "grad_norm": 0.6216866374015808, "learning_rate": 9.999282541175056e-06, "loss": 0.8959, "step": 742 }, { "epoch": 0.10599144079885878, "grad_norm": 0.5728886723518372, "learning_rate": 9.999269437494148e-06, "loss": 0.9833, "step": 743 }, { "epoch": 0.10613409415121255, "grad_norm": 0.7292998433113098, "learning_rate": 9.999256215239497e-06, "loss": 0.8613, "step": 744 }, { "epoch": 0.10627674750356633, "grad_norm": 0.6396505832672119, "learning_rate": 9.999242874411416e-06, "loss": 0.9325, "step": 745 }, { "epoch": 0.10641940085592011, "grad_norm": 0.7370311617851257, "learning_rate": 9.999229415010222e-06, "loss": 0.8855, "step": 746 }, { "epoch": 0.10656205420827389, "grad_norm": 0.5834444761276245, "learning_rate": 9.99921583703623e-06, "loss": 0.9271, "step": 747 }, { "epoch": 0.10670470756062768, "grad_norm": 0.49565738439559937, "learning_rate": 9.99920214048977e-06, "loss": 0.9065, "step": 748 }, { "epoch": 0.10684736091298146, "grad_norm": 0.7068101763725281, "learning_rate": 9.99918832537116e-06, "loss": 0.8554, "step": 749 }, { "epoch": 0.10699001426533523, "grad_norm": 0.881374716758728, "learning_rate": 9.99917439168073e-06, "loss": 0.9012, "step": 750 }, { "epoch": 0.10713266761768901, "grad_norm": 0.6914541721343994, "learning_rate": 9.999160339418812e-06, "loss": 0.9369, "step": 751 }, { "epoch": 0.10727532097004279, "grad_norm": 0.6502618789672852, "learning_rate": 9.999146168585736e-06, "loss": 0.8951, "step": 752 }, { "epoch": 0.10741797432239658, "grad_norm": 0.48643290996551514, "learning_rate": 9.999131879181841e-06, "loss": 0.886, "step": 753 }, { "epoch": 0.10756062767475036, "grad_norm": 0.5962386727333069, "learning_rate": 9.999117471207465e-06, "loss": 0.8911, "step": 754 }, { "epoch": 0.10770328102710414, "grad_norm": 0.4888778626918793, "learning_rate": 9.999102944662948e-06, "loss": 0.9166, "step": 755 }, { "epoch": 0.10784593437945791, "grad_norm": 0.6880698204040527, "learning_rate": 9.999088299548636e-06, "loss": 0.8551, "step": 756 }, { "epoch": 0.10798858773181169, "grad_norm": 0.8405009508132935, "learning_rate": 9.999073535864879e-06, "loss": 0.8704, "step": 757 }, { "epoch": 0.10813124108416548, "grad_norm": 0.6738948225975037, "learning_rate": 9.999058653612022e-06, "loss": 0.9412, "step": 758 }, { "epoch": 0.10827389443651926, "grad_norm": 0.688207745552063, "learning_rate": 9.999043652790421e-06, "loss": 0.9065, "step": 759 }, { "epoch": 0.10841654778887304, "grad_norm": 0.6640740633010864, "learning_rate": 9.999028533400432e-06, "loss": 0.9356, "step": 760 }, { "epoch": 0.10855920114122682, "grad_norm": 0.6320343017578125, "learning_rate": 9.999013295442413e-06, "loss": 0.8912, "step": 761 }, { "epoch": 0.1087018544935806, "grad_norm": 0.8238005042076111, "learning_rate": 9.998997938916725e-06, "loss": 0.8738, "step": 762 }, { "epoch": 0.10884450784593439, "grad_norm": 0.9503881931304932, "learning_rate": 9.998982463823734e-06, "loss": 0.9157, "step": 763 }, { "epoch": 0.10898716119828816, "grad_norm": 0.7548708319664001, "learning_rate": 9.998966870163805e-06, "loss": 0.8612, "step": 764 }, { "epoch": 0.10912981455064194, "grad_norm": 0.8359969854354858, "learning_rate": 9.998951157937307e-06, "loss": 0.931, "step": 765 }, { "epoch": 0.10927246790299572, "grad_norm": 0.6833460330963135, "learning_rate": 9.998935327144617e-06, "loss": 0.9083, "step": 766 }, { "epoch": 0.1094151212553495, "grad_norm": 0.5777496695518494, "learning_rate": 9.998919377786105e-06, "loss": 0.9209, "step": 767 }, { "epoch": 0.10955777460770329, "grad_norm": 0.5251607894897461, "learning_rate": 9.998903309862153e-06, "loss": 0.9073, "step": 768 }, { "epoch": 0.10970042796005706, "grad_norm": 0.6294717788696289, "learning_rate": 9.99888712337314e-06, "loss": 0.8563, "step": 769 }, { "epoch": 0.10984308131241084, "grad_norm": 0.5857126116752625, "learning_rate": 9.998870818319453e-06, "loss": 0.9388, "step": 770 }, { "epoch": 0.10998573466476462, "grad_norm": 0.8108024001121521, "learning_rate": 9.998854394701476e-06, "loss": 0.9339, "step": 771 }, { "epoch": 0.1101283880171184, "grad_norm": 0.5285236835479736, "learning_rate": 9.9988378525196e-06, "loss": 0.9615, "step": 772 }, { "epoch": 0.11027104136947219, "grad_norm": 0.46636226773262024, "learning_rate": 9.998821191774215e-06, "loss": 0.9249, "step": 773 }, { "epoch": 0.11041369472182597, "grad_norm": 0.8498694896697998, "learning_rate": 9.998804412465717e-06, "loss": 0.8918, "step": 774 }, { "epoch": 0.11055634807417974, "grad_norm": 0.5153661370277405, "learning_rate": 9.998787514594505e-06, "loss": 0.8741, "step": 775 }, { "epoch": 0.11069900142653352, "grad_norm": 0.7411801815032959, "learning_rate": 9.99877049816098e-06, "loss": 0.6312, "step": 776 }, { "epoch": 0.1108416547788873, "grad_norm": 0.6182443499565125, "learning_rate": 9.998753363165547e-06, "loss": 0.9343, "step": 777 }, { "epoch": 0.11098430813124108, "grad_norm": 0.9486439228057861, "learning_rate": 9.99873610960861e-06, "loss": 0.9334, "step": 778 }, { "epoch": 0.11112696148359487, "grad_norm": 0.5551772117614746, "learning_rate": 9.998718737490576e-06, "loss": 0.8739, "step": 779 }, { "epoch": 0.11126961483594865, "grad_norm": 0.7792413234710693, "learning_rate": 9.998701246811863e-06, "loss": 0.8978, "step": 780 }, { "epoch": 0.11141226818830242, "grad_norm": 0.6563642024993896, "learning_rate": 9.998683637572882e-06, "loss": 0.8819, "step": 781 }, { "epoch": 0.1115549215406562, "grad_norm": 0.7804805040359497, "learning_rate": 9.99866590977405e-06, "loss": 0.8101, "step": 782 }, { "epoch": 0.11169757489300998, "grad_norm": 0.4641774594783783, "learning_rate": 9.998648063415787e-06, "loss": 0.9002, "step": 783 }, { "epoch": 0.11184022824536377, "grad_norm": 0.5388142466545105, "learning_rate": 9.998630098498522e-06, "loss": 0.9923, "step": 784 }, { "epoch": 0.11198288159771755, "grad_norm": 0.752376139163971, "learning_rate": 9.998612015022675e-06, "loss": 0.9021, "step": 785 }, { "epoch": 0.11212553495007133, "grad_norm": 0.781582236289978, "learning_rate": 9.99859381298868e-06, "loss": 0.8599, "step": 786 }, { "epoch": 0.1122681883024251, "grad_norm": 0.569164514541626, "learning_rate": 9.998575492396962e-06, "loss": 0.9035, "step": 787 }, { "epoch": 0.11241084165477888, "grad_norm": 0.6883033514022827, "learning_rate": 9.99855705324796e-06, "loss": 1.0016, "step": 788 }, { "epoch": 0.11255349500713267, "grad_norm": 0.5926066637039185, "learning_rate": 9.998538495542111e-06, "loss": 0.8653, "step": 789 }, { "epoch": 0.11269614835948645, "grad_norm": 0.6186153888702393, "learning_rate": 9.998519819279855e-06, "loss": 0.9434, "step": 790 }, { "epoch": 0.11283880171184023, "grad_norm": 0.877582848072052, "learning_rate": 9.998501024461636e-06, "loss": 0.9036, "step": 791 }, { "epoch": 0.112981455064194, "grad_norm": 0.6416645050048828, "learning_rate": 9.998482111087897e-06, "loss": 0.6692, "step": 792 }, { "epoch": 0.11312410841654778, "grad_norm": 0.7448471188545227, "learning_rate": 9.99846307915909e-06, "loss": 0.9356, "step": 793 }, { "epoch": 0.11326676176890157, "grad_norm": 0.6980072259902954, "learning_rate": 9.998443928675661e-06, "loss": 0.8437, "step": 794 }, { "epoch": 0.11340941512125535, "grad_norm": 0.4740276038646698, "learning_rate": 9.998424659638071e-06, "loss": 0.8907, "step": 795 }, { "epoch": 0.11355206847360913, "grad_norm": 0.7384071350097656, "learning_rate": 9.998405272046772e-06, "loss": 0.9034, "step": 796 }, { "epoch": 0.11369472182596291, "grad_norm": 0.7091321349143982, "learning_rate": 9.998385765902225e-06, "loss": 0.8201, "step": 797 }, { "epoch": 0.11383737517831669, "grad_norm": 0.6403769850730896, "learning_rate": 9.998366141204896e-06, "loss": 0.9436, "step": 798 }, { "epoch": 0.11398002853067048, "grad_norm": 0.6469430923461914, "learning_rate": 9.998346397955245e-06, "loss": 0.8494, "step": 799 }, { "epoch": 0.11412268188302425, "grad_norm": 0.8421128392219543, "learning_rate": 9.998326536153746e-06, "loss": 0.8924, "step": 800 }, { "epoch": 0.11426533523537803, "grad_norm": 0.9186739325523376, "learning_rate": 9.998306555800865e-06, "loss": 0.8441, "step": 801 }, { "epoch": 0.11440798858773181, "grad_norm": 0.8992595672607422, "learning_rate": 9.998286456897077e-06, "loss": 0.8433, "step": 802 }, { "epoch": 0.11455064194008559, "grad_norm": 0.7035727500915527, "learning_rate": 9.998266239442862e-06, "loss": 0.8591, "step": 803 }, { "epoch": 0.11469329529243938, "grad_norm": 0.7800397276878357, "learning_rate": 9.998245903438697e-06, "loss": 0.8754, "step": 804 }, { "epoch": 0.11483594864479316, "grad_norm": 0.8508110642433167, "learning_rate": 9.998225448885063e-06, "loss": 0.9149, "step": 805 }, { "epoch": 0.11497860199714693, "grad_norm": 0.6382063031196594, "learning_rate": 9.998204875782448e-06, "loss": 0.9411, "step": 806 }, { "epoch": 0.11512125534950071, "grad_norm": 0.6287840008735657, "learning_rate": 9.998184184131339e-06, "loss": 0.9203, "step": 807 }, { "epoch": 0.11526390870185449, "grad_norm": 0.7639631628990173, "learning_rate": 9.998163373932224e-06, "loss": 0.886, "step": 808 }, { "epoch": 0.11540656205420827, "grad_norm": 0.6414012908935547, "learning_rate": 9.9981424451856e-06, "loss": 0.9347, "step": 809 }, { "epoch": 0.11554921540656206, "grad_norm": 0.5841327905654907, "learning_rate": 9.998121397891964e-06, "loss": 0.8779, "step": 810 }, { "epoch": 0.11569186875891584, "grad_norm": 0.6971140503883362, "learning_rate": 9.998100232051813e-06, "loss": 0.8971, "step": 811 }, { "epoch": 0.11583452211126961, "grad_norm": 0.5263389348983765, "learning_rate": 9.998078947665648e-06, "loss": 0.9519, "step": 812 }, { "epoch": 0.11597717546362339, "grad_norm": 0.5685816407203674, "learning_rate": 9.998057544733979e-06, "loss": 0.9713, "step": 813 }, { "epoch": 0.11611982881597717, "grad_norm": 0.6715201139450073, "learning_rate": 9.998036023257307e-06, "loss": 0.887, "step": 814 }, { "epoch": 0.11626248216833096, "grad_norm": 0.7427307963371277, "learning_rate": 9.998014383236147e-06, "loss": 0.862, "step": 815 }, { "epoch": 0.11640513552068474, "grad_norm": 0.8541010618209839, "learning_rate": 9.99799262467101e-06, "loss": 0.9105, "step": 816 }, { "epoch": 0.11654778887303852, "grad_norm": 0.7574614882469177, "learning_rate": 9.997970747562413e-06, "loss": 0.865, "step": 817 }, { "epoch": 0.1166904422253923, "grad_norm": 0.6815717220306396, "learning_rate": 9.997948751910875e-06, "loss": 0.8968, "step": 818 }, { "epoch": 0.11683309557774607, "grad_norm": 0.8007264137268066, "learning_rate": 9.997926637716917e-06, "loss": 0.9665, "step": 819 }, { "epoch": 0.11697574893009986, "grad_norm": 0.8848460912704468, "learning_rate": 9.997904404981064e-06, "loss": 0.885, "step": 820 }, { "epoch": 0.11711840228245364, "grad_norm": 0.8217048645019531, "learning_rate": 9.997882053703844e-06, "loss": 0.8322, "step": 821 }, { "epoch": 0.11726105563480742, "grad_norm": 0.6951943039894104, "learning_rate": 9.997859583885786e-06, "loss": 0.952, "step": 822 }, { "epoch": 0.1174037089871612, "grad_norm": 0.7835627198219299, "learning_rate": 9.997836995527423e-06, "loss": 0.9116, "step": 823 }, { "epoch": 0.11754636233951497, "grad_norm": 0.8103483319282532, "learning_rate": 9.997814288629293e-06, "loss": 0.8805, "step": 824 }, { "epoch": 0.11768901569186876, "grad_norm": 0.6608843207359314, "learning_rate": 9.99779146319193e-06, "loss": 0.9667, "step": 825 }, { "epoch": 0.11783166904422254, "grad_norm": 0.7242885231971741, "learning_rate": 9.99776851921588e-06, "loss": 0.9672, "step": 826 }, { "epoch": 0.11797432239657632, "grad_norm": 1.1274234056472778, "learning_rate": 9.997745456701684e-06, "loss": 0.9623, "step": 827 }, { "epoch": 0.1181169757489301, "grad_norm": 0.7334691882133484, "learning_rate": 9.99772227564989e-06, "loss": 0.905, "step": 828 }, { "epoch": 0.11825962910128388, "grad_norm": 0.7089143991470337, "learning_rate": 9.99769897606105e-06, "loss": 0.8703, "step": 829 }, { "epoch": 0.11840228245363767, "grad_norm": 0.8237111568450928, "learning_rate": 9.997675557935714e-06, "loss": 0.9014, "step": 830 }, { "epoch": 0.11854493580599144, "grad_norm": 0.5905559659004211, "learning_rate": 9.997652021274438e-06, "loss": 0.9067, "step": 831 }, { "epoch": 0.11868758915834522, "grad_norm": 0.6693427562713623, "learning_rate": 9.997628366077781e-06, "loss": 0.884, "step": 832 }, { "epoch": 0.118830242510699, "grad_norm": 0.6466344594955444, "learning_rate": 9.997604592346301e-06, "loss": 0.9198, "step": 833 }, { "epoch": 0.11897289586305278, "grad_norm": 0.8904848694801331, "learning_rate": 9.997580700080567e-06, "loss": 0.8998, "step": 834 }, { "epoch": 0.11911554921540657, "grad_norm": 0.6356571912765503, "learning_rate": 9.997556689281142e-06, "loss": 0.9558, "step": 835 }, { "epoch": 0.11925820256776035, "grad_norm": 0.8112600445747375, "learning_rate": 9.997532559948597e-06, "loss": 0.819, "step": 836 }, { "epoch": 0.11940085592011412, "grad_norm": 0.5827550292015076, "learning_rate": 9.997508312083503e-06, "loss": 0.9217, "step": 837 }, { "epoch": 0.1195435092724679, "grad_norm": 0.6937353014945984, "learning_rate": 9.997483945686435e-06, "loss": 0.8083, "step": 838 }, { "epoch": 0.11968616262482168, "grad_norm": 0.8962831497192383, "learning_rate": 9.997459460757973e-06, "loss": 0.866, "step": 839 }, { "epoch": 0.11982881597717546, "grad_norm": 0.5882939696311951, "learning_rate": 9.997434857298697e-06, "loss": 0.8634, "step": 840 }, { "epoch": 0.11997146932952925, "grad_norm": 0.9035172462463379, "learning_rate": 9.99741013530919e-06, "loss": 0.8731, "step": 841 }, { "epoch": 0.12011412268188303, "grad_norm": 0.6360054016113281, "learning_rate": 9.997385294790038e-06, "loss": 0.9279, "step": 842 }, { "epoch": 0.1202567760342368, "grad_norm": 0.6242084503173828, "learning_rate": 9.99736033574183e-06, "loss": 0.9538, "step": 843 }, { "epoch": 0.12039942938659058, "grad_norm": 0.7019553780555725, "learning_rate": 9.99733525816516e-06, "loss": 0.8733, "step": 844 }, { "epoch": 0.12054208273894436, "grad_norm": 0.6724885702133179, "learning_rate": 9.99731006206062e-06, "loss": 0.8644, "step": 845 }, { "epoch": 0.12068473609129815, "grad_norm": 0.6924759745597839, "learning_rate": 9.997284747428811e-06, "loss": 0.9115, "step": 846 }, { "epoch": 0.12082738944365193, "grad_norm": 0.7439209818840027, "learning_rate": 9.997259314270331e-06, "loss": 0.8206, "step": 847 }, { "epoch": 0.1209700427960057, "grad_norm": 0.8042154312133789, "learning_rate": 9.997233762585783e-06, "loss": 0.8647, "step": 848 }, { "epoch": 0.12111269614835948, "grad_norm": 0.7187075614929199, "learning_rate": 9.997208092375773e-06, "loss": 0.8607, "step": 849 }, { "epoch": 0.12125534950071326, "grad_norm": 0.8251458406448364, "learning_rate": 9.997182303640912e-06, "loss": 0.8745, "step": 850 }, { "epoch": 0.12139800285306705, "grad_norm": 0.8559502959251404, "learning_rate": 9.99715639638181e-06, "loss": 0.8801, "step": 851 }, { "epoch": 0.12154065620542083, "grad_norm": 0.7346495985984802, "learning_rate": 9.997130370599081e-06, "loss": 0.8799, "step": 852 }, { "epoch": 0.12168330955777461, "grad_norm": 0.5646926760673523, "learning_rate": 9.997104226293343e-06, "loss": 0.897, "step": 853 }, { "epoch": 0.12182596291012839, "grad_norm": 0.7479763031005859, "learning_rate": 9.997077963465219e-06, "loss": 0.8199, "step": 854 }, { "epoch": 0.12196861626248216, "grad_norm": 0.5906651616096497, "learning_rate": 9.997051582115325e-06, "loss": 0.8739, "step": 855 }, { "epoch": 0.12211126961483595, "grad_norm": 1.1264022588729858, "learning_rate": 9.997025082244291e-06, "loss": 0.9145, "step": 856 }, { "epoch": 0.12225392296718973, "grad_norm": 0.6673288941383362, "learning_rate": 9.996998463852747e-06, "loss": 0.9036, "step": 857 }, { "epoch": 0.12239657631954351, "grad_norm": 0.6579235792160034, "learning_rate": 9.996971726941322e-06, "loss": 0.9104, "step": 858 }, { "epoch": 0.12253922967189729, "grad_norm": 0.8649102449417114, "learning_rate": 9.996944871510652e-06, "loss": 0.9032, "step": 859 }, { "epoch": 0.12268188302425106, "grad_norm": 0.6878856420516968, "learning_rate": 9.996917897561372e-06, "loss": 0.8311, "step": 860 }, { "epoch": 0.12282453637660486, "grad_norm": 0.756231963634491, "learning_rate": 9.996890805094122e-06, "loss": 0.8804, "step": 861 }, { "epoch": 0.12296718972895863, "grad_norm": 0.7084113359451294, "learning_rate": 9.996863594109544e-06, "loss": 0.9322, "step": 862 }, { "epoch": 0.12310984308131241, "grad_norm": 0.517266035079956, "learning_rate": 9.996836264608287e-06, "loss": 0.8073, "step": 863 }, { "epoch": 0.12325249643366619, "grad_norm": 0.8390178084373474, "learning_rate": 9.996808816590996e-06, "loss": 0.698, "step": 864 }, { "epoch": 0.12339514978601997, "grad_norm": 0.865898072719574, "learning_rate": 9.996781250058321e-06, "loss": 0.8309, "step": 865 }, { "epoch": 0.12353780313837376, "grad_norm": 0.654003381729126, "learning_rate": 9.996753565010919e-06, "loss": 0.9114, "step": 866 }, { "epoch": 0.12368045649072754, "grad_norm": 0.7689074873924255, "learning_rate": 9.996725761449443e-06, "loss": 0.9499, "step": 867 }, { "epoch": 0.12382310984308131, "grad_norm": 0.7274837493896484, "learning_rate": 9.996697839374559e-06, "loss": 0.8318, "step": 868 }, { "epoch": 0.12396576319543509, "grad_norm": 0.6424981355667114, "learning_rate": 9.996669798786922e-06, "loss": 0.8216, "step": 869 }, { "epoch": 0.12410841654778887, "grad_norm": 0.7490353584289551, "learning_rate": 9.996641639687199e-06, "loss": 0.8917, "step": 870 }, { "epoch": 0.12425106990014265, "grad_norm": 0.544717013835907, "learning_rate": 9.99661336207606e-06, "loss": 0.9394, "step": 871 }, { "epoch": 0.12439372325249644, "grad_norm": 0.6359512805938721, "learning_rate": 9.996584965954176e-06, "loss": 0.8508, "step": 872 }, { "epoch": 0.12453637660485022, "grad_norm": 0.6323925256729126, "learning_rate": 9.996556451322217e-06, "loss": 0.8678, "step": 873 }, { "epoch": 0.124679029957204, "grad_norm": 0.6573577523231506, "learning_rate": 9.99652781818086e-06, "loss": 0.8533, "step": 874 }, { "epoch": 0.12482168330955777, "grad_norm": 0.6162577271461487, "learning_rate": 9.99649906653079e-06, "loss": 0.9112, "step": 875 }, { "epoch": 0.12496433666191155, "grad_norm": 1.4229447841644287, "learning_rate": 9.99647019637268e-06, "loss": 0.6754, "step": 876 }, { "epoch": 0.12510699001426534, "grad_norm": 0.7424920797348022, "learning_rate": 9.996441207707222e-06, "loss": 0.8453, "step": 877 }, { "epoch": 0.12524964336661912, "grad_norm": 0.7387217879295349, "learning_rate": 9.9964121005351e-06, "loss": 0.8343, "step": 878 }, { "epoch": 0.1253922967189729, "grad_norm": 0.9214134812355042, "learning_rate": 9.996382874857004e-06, "loss": 0.8657, "step": 879 }, { "epoch": 0.12553495007132667, "grad_norm": 0.5441204309463501, "learning_rate": 9.99635353067363e-06, "loss": 0.9288, "step": 880 }, { "epoch": 0.12567760342368045, "grad_norm": 0.7430056929588318, "learning_rate": 9.996324067985672e-06, "loss": 0.849, "step": 881 }, { "epoch": 0.12582025677603423, "grad_norm": 0.9778558611869812, "learning_rate": 9.996294486793827e-06, "loss": 0.8932, "step": 882 }, { "epoch": 0.125962910128388, "grad_norm": 0.7526888847351074, "learning_rate": 9.996264787098801e-06, "loss": 0.8981, "step": 883 }, { "epoch": 0.12610556348074178, "grad_norm": 0.7484474182128906, "learning_rate": 9.996234968901295e-06, "loss": 0.9127, "step": 884 }, { "epoch": 0.1262482168330956, "grad_norm": 0.6596251726150513, "learning_rate": 9.996205032202017e-06, "loss": 0.8431, "step": 885 }, { "epoch": 0.12639087018544937, "grad_norm": 0.9939471483230591, "learning_rate": 9.996174977001677e-06, "loss": 0.914, "step": 886 }, { "epoch": 0.12653352353780314, "grad_norm": 0.7965244054794312, "learning_rate": 9.996144803300991e-06, "loss": 0.8923, "step": 887 }, { "epoch": 0.12667617689015692, "grad_norm": 0.807079017162323, "learning_rate": 9.99611451110067e-06, "loss": 0.827, "step": 888 }, { "epoch": 0.1268188302425107, "grad_norm": 0.5425110459327698, "learning_rate": 9.996084100401435e-06, "loss": 0.9275, "step": 889 }, { "epoch": 0.12696148359486448, "grad_norm": 0.5462783575057983, "learning_rate": 9.996053571204005e-06, "loss": 0.882, "step": 890 }, { "epoch": 0.12710413694721825, "grad_norm": 0.7539640665054321, "learning_rate": 9.996022923509108e-06, "loss": 0.8529, "step": 891 }, { "epoch": 0.12724679029957203, "grad_norm": 0.5665767788887024, "learning_rate": 9.995992157317466e-06, "loss": 0.9015, "step": 892 }, { "epoch": 0.1273894436519258, "grad_norm": 0.8361479640007019, "learning_rate": 9.995961272629815e-06, "loss": 0.8317, "step": 893 }, { "epoch": 0.1275320970042796, "grad_norm": 0.8227171897888184, "learning_rate": 9.99593026944688e-06, "loss": 0.7751, "step": 894 }, { "epoch": 0.1276747503566334, "grad_norm": 0.5182986259460449, "learning_rate": 9.995899147769403e-06, "loss": 0.9356, "step": 895 }, { "epoch": 0.12781740370898717, "grad_norm": 1.1745651960372925, "learning_rate": 9.995867907598119e-06, "loss": 0.7538, "step": 896 }, { "epoch": 0.12796005706134095, "grad_norm": 0.7005908489227295, "learning_rate": 9.995836548933768e-06, "loss": 0.8818, "step": 897 }, { "epoch": 0.12810271041369473, "grad_norm": 0.7603804469108582, "learning_rate": 9.995805071777096e-06, "loss": 0.8509, "step": 898 }, { "epoch": 0.1282453637660485, "grad_norm": 0.8213330507278442, "learning_rate": 9.995773476128849e-06, "loss": 0.8355, "step": 899 }, { "epoch": 0.12838801711840228, "grad_norm": 0.7150991559028625, "learning_rate": 9.995741761989778e-06, "loss": 0.8585, "step": 900 }, { "epoch": 0.12853067047075606, "grad_norm": 0.7070020437240601, "learning_rate": 9.995709929360629e-06, "loss": 0.9078, "step": 901 }, { "epoch": 0.12867332382310984, "grad_norm": 0.715682327747345, "learning_rate": 9.995677978242164e-06, "loss": 0.9014, "step": 902 }, { "epoch": 0.12881597717546361, "grad_norm": 0.6890527606010437, "learning_rate": 9.995645908635139e-06, "loss": 0.8958, "step": 903 }, { "epoch": 0.1289586305278174, "grad_norm": 0.6184905767440796, "learning_rate": 9.995613720540312e-06, "loss": 0.9359, "step": 904 }, { "epoch": 0.1291012838801712, "grad_norm": 0.7181702852249146, "learning_rate": 9.995581413958448e-06, "loss": 0.9108, "step": 905 }, { "epoch": 0.12924393723252497, "grad_norm": 0.7546802163124084, "learning_rate": 9.995548988890316e-06, "loss": 0.8612, "step": 906 }, { "epoch": 0.12938659058487875, "grad_norm": 0.6117148995399475, "learning_rate": 9.99551644533668e-06, "loss": 0.8909, "step": 907 }, { "epoch": 0.12952924393723253, "grad_norm": 0.7722803950309753, "learning_rate": 9.995483783298315e-06, "loss": 0.9538, "step": 908 }, { "epoch": 0.1296718972895863, "grad_norm": 0.9300944805145264, "learning_rate": 9.995451002775995e-06, "loss": 0.8414, "step": 909 }, { "epoch": 0.12981455064194009, "grad_norm": 0.7007274031639099, "learning_rate": 9.9954181037705e-06, "loss": 0.8163, "step": 910 }, { "epoch": 0.12995720399429386, "grad_norm": 0.7200602889060974, "learning_rate": 9.995385086282604e-06, "loss": 0.8925, "step": 911 }, { "epoch": 0.13009985734664764, "grad_norm": 0.6431737542152405, "learning_rate": 9.995351950313098e-06, "loss": 0.8477, "step": 912 }, { "epoch": 0.13024251069900142, "grad_norm": 0.7531431317329407, "learning_rate": 9.995318695862762e-06, "loss": 0.8907, "step": 913 }, { "epoch": 0.1303851640513552, "grad_norm": 0.7862675189971924, "learning_rate": 9.995285322932385e-06, "loss": 0.8865, "step": 914 }, { "epoch": 0.13052781740370897, "grad_norm": 0.8877861499786377, "learning_rate": 9.995251831522763e-06, "loss": 0.8969, "step": 915 }, { "epoch": 0.13067047075606278, "grad_norm": 0.5905489325523376, "learning_rate": 9.995218221634686e-06, "loss": 0.8297, "step": 916 }, { "epoch": 0.13081312410841656, "grad_norm": 0.9023591876029968, "learning_rate": 9.995184493268954e-06, "loss": 0.8982, "step": 917 }, { "epoch": 0.13095577746077033, "grad_norm": 0.7605781555175781, "learning_rate": 9.995150646426364e-06, "loss": 0.8962, "step": 918 }, { "epoch": 0.1310984308131241, "grad_norm": 0.7599273920059204, "learning_rate": 9.995116681107721e-06, "loss": 0.8883, "step": 919 }, { "epoch": 0.1312410841654779, "grad_norm": 0.7208825945854187, "learning_rate": 9.995082597313832e-06, "loss": 0.8444, "step": 920 }, { "epoch": 0.13138373751783167, "grad_norm": 1.0031217336654663, "learning_rate": 9.995048395045502e-06, "loss": 0.9012, "step": 921 }, { "epoch": 0.13152639087018544, "grad_norm": 0.8403066992759705, "learning_rate": 9.995014074303544e-06, "loss": 0.8789, "step": 922 }, { "epoch": 0.13166904422253922, "grad_norm": 0.7000038027763367, "learning_rate": 9.994979635088772e-06, "loss": 0.9594, "step": 923 }, { "epoch": 0.131811697574893, "grad_norm": 0.6286900043487549, "learning_rate": 9.994945077402002e-06, "loss": 0.866, "step": 924 }, { "epoch": 0.13195435092724678, "grad_norm": 0.7327531576156616, "learning_rate": 9.994910401244056e-06, "loss": 0.8621, "step": 925 }, { "epoch": 0.13209700427960058, "grad_norm": 0.7145300507545471, "learning_rate": 9.994875606615752e-06, "loss": 0.9447, "step": 926 }, { "epoch": 0.13223965763195436, "grad_norm": 0.7216164469718933, "learning_rate": 9.99484069351792e-06, "loss": 0.8226, "step": 927 }, { "epoch": 0.13238231098430814, "grad_norm": 0.6584069728851318, "learning_rate": 9.994805661951386e-06, "loss": 0.9572, "step": 928 }, { "epoch": 0.13252496433666192, "grad_norm": 0.6539363265037537, "learning_rate": 9.99477051191698e-06, "loss": 0.8985, "step": 929 }, { "epoch": 0.1326676176890157, "grad_norm": 0.9867337346076965, "learning_rate": 9.994735243415536e-06, "loss": 0.8692, "step": 930 }, { "epoch": 0.13281027104136947, "grad_norm": 0.7150183320045471, "learning_rate": 9.994699856447893e-06, "loss": 0.8236, "step": 931 }, { "epoch": 0.13295292439372325, "grad_norm": 0.6724671125411987, "learning_rate": 9.994664351014886e-06, "loss": 0.8764, "step": 932 }, { "epoch": 0.13309557774607703, "grad_norm": 0.7004656791687012, "learning_rate": 9.994628727117362e-06, "loss": 0.87, "step": 933 }, { "epoch": 0.1332382310984308, "grad_norm": 0.8211258053779602, "learning_rate": 9.994592984756163e-06, "loss": 0.8604, "step": 934 }, { "epoch": 0.13338088445078458, "grad_norm": 0.8809522390365601, "learning_rate": 9.994557123932138e-06, "loss": 0.9265, "step": 935 }, { "epoch": 0.1335235378031384, "grad_norm": 0.7160531282424927, "learning_rate": 9.994521144646136e-06, "loss": 0.835, "step": 936 }, { "epoch": 0.13366619115549216, "grad_norm": 0.5444583296775818, "learning_rate": 9.99448504689901e-06, "loss": 0.8967, "step": 937 }, { "epoch": 0.13380884450784594, "grad_norm": 0.8648520112037659, "learning_rate": 9.994448830691618e-06, "loss": 0.8299, "step": 938 }, { "epoch": 0.13395149786019972, "grad_norm": 0.7733383178710938, "learning_rate": 9.99441249602482e-06, "loss": 0.9028, "step": 939 }, { "epoch": 0.1340941512125535, "grad_norm": 0.7461035251617432, "learning_rate": 9.994376042899474e-06, "loss": 0.9103, "step": 940 }, { "epoch": 0.13423680456490727, "grad_norm": 0.6235612630844116, "learning_rate": 9.994339471316447e-06, "loss": 0.8323, "step": 941 }, { "epoch": 0.13437945791726105, "grad_norm": 0.6510120034217834, "learning_rate": 9.994302781276608e-06, "loss": 0.9043, "step": 942 }, { "epoch": 0.13452211126961483, "grad_norm": 0.6366427540779114, "learning_rate": 9.994265972780824e-06, "loss": 0.8177, "step": 943 }, { "epoch": 0.1346647646219686, "grad_norm": 0.8007756471633911, "learning_rate": 9.994229045829969e-06, "loss": 0.8732, "step": 944 }, { "epoch": 0.13480741797432239, "grad_norm": 0.7733418345451355, "learning_rate": 9.99419200042492e-06, "loss": 0.8649, "step": 945 }, { "epoch": 0.13495007132667616, "grad_norm": 0.6091267466545105, "learning_rate": 9.994154836566555e-06, "loss": 0.8779, "step": 946 }, { "epoch": 0.13509272467902997, "grad_norm": 0.7535198926925659, "learning_rate": 9.994117554255756e-06, "loss": 0.9231, "step": 947 }, { "epoch": 0.13523537803138375, "grad_norm": 0.7609281539916992, "learning_rate": 9.994080153493405e-06, "loss": 0.873, "step": 948 }, { "epoch": 0.13537803138373752, "grad_norm": 0.7612059712409973, "learning_rate": 9.994042634280391e-06, "loss": 0.9069, "step": 949 }, { "epoch": 0.1355206847360913, "grad_norm": 0.6364650726318359, "learning_rate": 9.994004996617604e-06, "loss": 0.8902, "step": 950 }, { "epoch": 0.13566333808844508, "grad_norm": 0.6534590721130371, "learning_rate": 9.993967240505938e-06, "loss": 0.9005, "step": 951 }, { "epoch": 0.13580599144079886, "grad_norm": 0.8126868605613708, "learning_rate": 9.993929365946284e-06, "loss": 0.8455, "step": 952 }, { "epoch": 0.13594864479315263, "grad_norm": 0.7154679894447327, "learning_rate": 9.993891372939545e-06, "loss": 0.9016, "step": 953 }, { "epoch": 0.1360912981455064, "grad_norm": 0.740562379360199, "learning_rate": 9.993853261486619e-06, "loss": 0.9093, "step": 954 }, { "epoch": 0.1362339514978602, "grad_norm": 0.9111164808273315, "learning_rate": 9.993815031588412e-06, "loss": 0.8247, "step": 955 }, { "epoch": 0.13637660485021397, "grad_norm": 0.5779091119766235, "learning_rate": 9.993776683245829e-06, "loss": 0.9569, "step": 956 }, { "epoch": 0.13651925820256777, "grad_norm": 0.7303023338317871, "learning_rate": 9.993738216459782e-06, "loss": 0.8136, "step": 957 }, { "epoch": 0.13666191155492155, "grad_norm": 1.7917770147323608, "learning_rate": 9.993699631231181e-06, "loss": 0.7486, "step": 958 }, { "epoch": 0.13680456490727533, "grad_norm": 0.9355977177619934, "learning_rate": 9.993660927560942e-06, "loss": 0.8275, "step": 959 }, { "epoch": 0.1369472182596291, "grad_norm": 0.7837207913398743, "learning_rate": 9.993622105449983e-06, "loss": 0.8468, "step": 960 }, { "epoch": 0.13708987161198288, "grad_norm": 0.595391035079956, "learning_rate": 9.993583164899224e-06, "loss": 0.9133, "step": 961 }, { "epoch": 0.13723252496433666, "grad_norm": 0.687847375869751, "learning_rate": 9.99354410590959e-06, "loss": 0.8575, "step": 962 }, { "epoch": 0.13737517831669044, "grad_norm": 0.6320686340332031, "learning_rate": 9.993504928482007e-06, "loss": 0.9417, "step": 963 }, { "epoch": 0.13751783166904422, "grad_norm": 0.743326723575592, "learning_rate": 9.993465632617405e-06, "loss": 0.8574, "step": 964 }, { "epoch": 0.137660485021398, "grad_norm": 0.6819217801094055, "learning_rate": 9.993426218316714e-06, "loss": 0.9254, "step": 965 }, { "epoch": 0.13780313837375177, "grad_norm": 0.7288452982902527, "learning_rate": 9.99338668558087e-06, "loss": 0.9224, "step": 966 }, { "epoch": 0.13794579172610558, "grad_norm": 0.8626713156700134, "learning_rate": 9.99334703441081e-06, "loss": 0.9685, "step": 967 }, { "epoch": 0.13808844507845935, "grad_norm": 0.7050989270210266, "learning_rate": 9.993307264807475e-06, "loss": 0.8818, "step": 968 }, { "epoch": 0.13823109843081313, "grad_norm": 0.7689258456230164, "learning_rate": 9.99326737677181e-06, "loss": 0.8551, "step": 969 }, { "epoch": 0.1383737517831669, "grad_norm": 0.5554561018943787, "learning_rate": 9.993227370304758e-06, "loss": 0.9251, "step": 970 }, { "epoch": 0.1385164051355207, "grad_norm": 0.6589494943618774, "learning_rate": 9.99318724540727e-06, "loss": 0.8533, "step": 971 }, { "epoch": 0.13865905848787446, "grad_norm": 0.6229296922683716, "learning_rate": 9.993147002080294e-06, "loss": 0.8369, "step": 972 }, { "epoch": 0.13880171184022824, "grad_norm": 0.8061829805374146, "learning_rate": 9.99310664032479e-06, "loss": 0.8777, "step": 973 }, { "epoch": 0.13894436519258202, "grad_norm": 0.7398107051849365, "learning_rate": 9.993066160141714e-06, "loss": 0.9083, "step": 974 }, { "epoch": 0.1390870185449358, "grad_norm": 0.9920467734336853, "learning_rate": 9.993025561532021e-06, "loss": 0.8731, "step": 975 }, { "epoch": 0.13922967189728958, "grad_norm": 0.6123038530349731, "learning_rate": 9.99298484449668e-06, "loss": 0.9163, "step": 976 }, { "epoch": 0.13937232524964335, "grad_norm": 1.1445646286010742, "learning_rate": 9.992944009036657e-06, "loss": 0.9156, "step": 977 }, { "epoch": 0.13951497860199716, "grad_norm": 0.573752224445343, "learning_rate": 9.992903055152914e-06, "loss": 0.8817, "step": 978 }, { "epoch": 0.13965763195435094, "grad_norm": 0.8756287693977356, "learning_rate": 9.992861982846427e-06, "loss": 0.8271, "step": 979 }, { "epoch": 0.1398002853067047, "grad_norm": 0.7713871002197266, "learning_rate": 9.992820792118172e-06, "loss": 0.821, "step": 980 }, { "epoch": 0.1399429386590585, "grad_norm": 0.7339078187942505, "learning_rate": 9.992779482969121e-06, "loss": 0.8016, "step": 981 }, { "epoch": 0.14008559201141227, "grad_norm": 0.691035270690918, "learning_rate": 9.992738055400257e-06, "loss": 0.8973, "step": 982 }, { "epoch": 0.14022824536376605, "grad_norm": 0.6231393814086914, "learning_rate": 9.992696509412563e-06, "loss": 0.8909, "step": 983 }, { "epoch": 0.14037089871611982, "grad_norm": 0.8889601826667786, "learning_rate": 9.992654845007022e-06, "loss": 0.8557, "step": 984 }, { "epoch": 0.1405135520684736, "grad_norm": 0.7168399691581726, "learning_rate": 9.992613062184623e-06, "loss": 0.8655, "step": 985 }, { "epoch": 0.14065620542082738, "grad_norm": 0.7748574018478394, "learning_rate": 9.992571160946359e-06, "loss": 0.816, "step": 986 }, { "epoch": 0.14079885877318116, "grad_norm": 0.7910947799682617, "learning_rate": 9.992529141293221e-06, "loss": 0.7946, "step": 987 }, { "epoch": 0.14094151212553496, "grad_norm": 0.6379979252815247, "learning_rate": 9.992487003226209e-06, "loss": 0.9722, "step": 988 }, { "epoch": 0.14108416547788874, "grad_norm": 0.7884982228279114, "learning_rate": 9.992444746746316e-06, "loss": 0.7898, "step": 989 }, { "epoch": 0.14122681883024252, "grad_norm": 0.8776577115058899, "learning_rate": 9.992402371854553e-06, "loss": 0.8405, "step": 990 }, { "epoch": 0.1413694721825963, "grad_norm": 0.9063935279846191, "learning_rate": 9.99235987855192e-06, "loss": 0.8733, "step": 991 }, { "epoch": 0.14151212553495007, "grad_norm": 0.6572399139404297, "learning_rate": 9.992317266839425e-06, "loss": 0.873, "step": 992 }, { "epoch": 0.14165477888730385, "grad_norm": 0.8871137499809265, "learning_rate": 9.992274536718078e-06, "loss": 0.8343, "step": 993 }, { "epoch": 0.14179743223965763, "grad_norm": 0.8485704064369202, "learning_rate": 9.992231688188895e-06, "loss": 0.8819, "step": 994 }, { "epoch": 0.1419400855920114, "grad_norm": 0.7479352355003357, "learning_rate": 9.992188721252891e-06, "loss": 0.8971, "step": 995 }, { "epoch": 0.14208273894436518, "grad_norm": 1.3619558811187744, "learning_rate": 9.992145635911085e-06, "loss": 0.8704, "step": 996 }, { "epoch": 0.14222539229671896, "grad_norm": 0.6050323247909546, "learning_rate": 9.9921024321645e-06, "loss": 0.9826, "step": 997 }, { "epoch": 0.14236804564907277, "grad_norm": 0.5990591645240784, "learning_rate": 9.992059110014157e-06, "loss": 0.9134, "step": 998 }, { "epoch": 0.14251069900142654, "grad_norm": 0.9036572575569153, "learning_rate": 9.992015669461089e-06, "loss": 0.863, "step": 999 }, { "epoch": 0.14265335235378032, "grad_norm": 0.8414834141731262, "learning_rate": 9.991972110506322e-06, "loss": 0.8551, "step": 1000 }, { "epoch": 0.1427960057061341, "grad_norm": 0.9207097291946411, "learning_rate": 9.99192843315089e-06, "loss": 0.9099, "step": 1001 }, { "epoch": 0.14293865905848788, "grad_norm": 0.6639807820320129, "learning_rate": 9.991884637395833e-06, "loss": 0.9566, "step": 1002 }, { "epoch": 0.14308131241084165, "grad_norm": 0.6359516382217407, "learning_rate": 9.991840723242183e-06, "loss": 0.8945, "step": 1003 }, { "epoch": 0.14322396576319543, "grad_norm": 0.9046689867973328, "learning_rate": 9.991796690690988e-06, "loss": 0.8852, "step": 1004 }, { "epoch": 0.1433666191155492, "grad_norm": 0.7262295484542847, "learning_rate": 9.991752539743287e-06, "loss": 0.8947, "step": 1005 }, { "epoch": 0.143509272467903, "grad_norm": 0.7294202446937561, "learning_rate": 9.991708270400128e-06, "loss": 0.8629, "step": 1006 }, { "epoch": 0.14365192582025677, "grad_norm": 0.6945996284484863, "learning_rate": 9.991663882662565e-06, "loss": 0.8053, "step": 1007 }, { "epoch": 0.14379457917261054, "grad_norm": 0.6925103664398193, "learning_rate": 9.991619376531649e-06, "loss": 0.8881, "step": 1008 }, { "epoch": 0.14393723252496435, "grad_norm": 0.7418951988220215, "learning_rate": 9.991574752008436e-06, "loss": 0.8712, "step": 1009 }, { "epoch": 0.14407988587731813, "grad_norm": 0.6167254447937012, "learning_rate": 9.99153000909398e-06, "loss": 0.8878, "step": 1010 }, { "epoch": 0.1442225392296719, "grad_norm": 0.7526018619537354, "learning_rate": 9.991485147789348e-06, "loss": 0.9019, "step": 1011 }, { "epoch": 0.14436519258202568, "grad_norm": 0.9956358075141907, "learning_rate": 9.9914401680956e-06, "loss": 0.9458, "step": 1012 }, { "epoch": 0.14450784593437946, "grad_norm": 0.837101936340332, "learning_rate": 9.991395070013806e-06, "loss": 0.8684, "step": 1013 }, { "epoch": 0.14465049928673324, "grad_norm": 0.9686935544013977, "learning_rate": 9.99134985354503e-06, "loss": 0.8609, "step": 1014 }, { "epoch": 0.144793152639087, "grad_norm": 0.7613106369972229, "learning_rate": 9.991304518690354e-06, "loss": 0.8575, "step": 1015 }, { "epoch": 0.1449358059914408, "grad_norm": 0.6044681668281555, "learning_rate": 9.991259065450846e-06, "loss": 0.9365, "step": 1016 }, { "epoch": 0.14507845934379457, "grad_norm": 0.8422667384147644, "learning_rate": 9.991213493827585e-06, "loss": 0.8833, "step": 1017 }, { "epoch": 0.14522111269614835, "grad_norm": 0.6289754509925842, "learning_rate": 9.991167803821653e-06, "loss": 0.8492, "step": 1018 }, { "epoch": 0.14536376604850215, "grad_norm": 0.797286331653595, "learning_rate": 9.991121995434135e-06, "loss": 0.911, "step": 1019 }, { "epoch": 0.14550641940085593, "grad_norm": 0.8525795340538025, "learning_rate": 9.991076068666114e-06, "loss": 0.8438, "step": 1020 }, { "epoch": 0.1456490727532097, "grad_norm": 0.840726912021637, "learning_rate": 9.991030023518681e-06, "loss": 0.8567, "step": 1021 }, { "epoch": 0.14579172610556348, "grad_norm": 0.615422248840332, "learning_rate": 9.99098385999293e-06, "loss": 0.8845, "step": 1022 }, { "epoch": 0.14593437945791726, "grad_norm": 0.93955397605896, "learning_rate": 9.990937578089953e-06, "loss": 0.7803, "step": 1023 }, { "epoch": 0.14607703281027104, "grad_norm": 0.8013250827789307, "learning_rate": 9.99089117781085e-06, "loss": 0.8792, "step": 1024 }, { "epoch": 0.14621968616262482, "grad_norm": 0.5773342847824097, "learning_rate": 9.99084465915672e-06, "loss": 0.9155, "step": 1025 }, { "epoch": 0.1463623395149786, "grad_norm": 0.6571844816207886, "learning_rate": 9.990798022128667e-06, "loss": 0.8368, "step": 1026 }, { "epoch": 0.14650499286733237, "grad_norm": 0.9351130723953247, "learning_rate": 9.990751266727795e-06, "loss": 0.897, "step": 1027 }, { "epoch": 0.14664764621968615, "grad_norm": 0.7844663262367249, "learning_rate": 9.990704392955218e-06, "loss": 0.8556, "step": 1028 }, { "epoch": 0.14679029957203996, "grad_norm": 0.7472745776176453, "learning_rate": 9.990657400812043e-06, "loss": 0.8071, "step": 1029 }, { "epoch": 0.14693295292439373, "grad_norm": 0.8757637739181519, "learning_rate": 9.990610290299387e-06, "loss": 0.7922, "step": 1030 }, { "epoch": 0.1470756062767475, "grad_norm": 0.7582662105560303, "learning_rate": 9.990563061418367e-06, "loss": 0.8197, "step": 1031 }, { "epoch": 0.1472182596291013, "grad_norm": 0.6933606863021851, "learning_rate": 9.990515714170103e-06, "loss": 0.9047, "step": 1032 }, { "epoch": 0.14736091298145507, "grad_norm": 0.7019277811050415, "learning_rate": 9.990468248555716e-06, "loss": 0.8288, "step": 1033 }, { "epoch": 0.14750356633380884, "grad_norm": 0.7585964798927307, "learning_rate": 9.990420664576334e-06, "loss": 0.768, "step": 1034 }, { "epoch": 0.14764621968616262, "grad_norm": 0.7656587958335876, "learning_rate": 9.990372962233087e-06, "loss": 0.8458, "step": 1035 }, { "epoch": 0.1477888730385164, "grad_norm": 1.2133119106292725, "learning_rate": 9.990325141527105e-06, "loss": 0.6152, "step": 1036 }, { "epoch": 0.14793152639087018, "grad_norm": 0.7640325427055359, "learning_rate": 9.99027720245952e-06, "loss": 0.9362, "step": 1037 }, { "epoch": 0.14807417974322395, "grad_norm": 0.8579018712043762, "learning_rate": 9.990229145031472e-06, "loss": 0.8926, "step": 1038 }, { "epoch": 0.14821683309557773, "grad_norm": 0.9569811224937439, "learning_rate": 9.9901809692441e-06, "loss": 0.9036, "step": 1039 }, { "epoch": 0.14835948644793154, "grad_norm": 0.637883722782135, "learning_rate": 9.990132675098546e-06, "loss": 0.8715, "step": 1040 }, { "epoch": 0.14850213980028532, "grad_norm": 0.6271453499794006, "learning_rate": 9.990084262595954e-06, "loss": 0.8285, "step": 1041 }, { "epoch": 0.1486447931526391, "grad_norm": 0.6584393978118896, "learning_rate": 9.990035731737477e-06, "loss": 0.9456, "step": 1042 }, { "epoch": 0.14878744650499287, "grad_norm": 0.9535266160964966, "learning_rate": 9.989987082524262e-06, "loss": 0.8898, "step": 1043 }, { "epoch": 0.14893009985734665, "grad_norm": 0.6312132477760315, "learning_rate": 9.989938314957463e-06, "loss": 0.8398, "step": 1044 }, { "epoch": 0.14907275320970043, "grad_norm": 0.8063013553619385, "learning_rate": 9.989889429038238e-06, "loss": 0.5944, "step": 1045 }, { "epoch": 0.1492154065620542, "grad_norm": 0.7557687759399414, "learning_rate": 9.989840424767748e-06, "loss": 0.8615, "step": 1046 }, { "epoch": 0.14935805991440798, "grad_norm": 0.6279237866401672, "learning_rate": 9.989791302147151e-06, "loss": 0.8655, "step": 1047 }, { "epoch": 0.14950071326676176, "grad_norm": 1.0212706327438354, "learning_rate": 9.989742061177617e-06, "loss": 0.7489, "step": 1048 }, { "epoch": 0.14964336661911554, "grad_norm": 0.9332009553909302, "learning_rate": 9.98969270186031e-06, "loss": 0.829, "step": 1049 }, { "epoch": 0.14978601997146934, "grad_norm": 0.7611649036407471, "learning_rate": 9.989643224196403e-06, "loss": 0.8425, "step": 1050 }, { "epoch": 0.14992867332382312, "grad_norm": 0.7248733043670654, "learning_rate": 9.989593628187068e-06, "loss": 0.9409, "step": 1051 }, { "epoch": 0.1500713266761769, "grad_norm": 0.5785107612609863, "learning_rate": 9.989543913833483e-06, "loss": 0.8661, "step": 1052 }, { "epoch": 0.15021398002853067, "grad_norm": 0.8986565470695496, "learning_rate": 9.989494081136825e-06, "loss": 0.899, "step": 1053 }, { "epoch": 0.15035663338088445, "grad_norm": 0.7995362281799316, "learning_rate": 9.989444130098279e-06, "loss": 0.8898, "step": 1054 }, { "epoch": 0.15049928673323823, "grad_norm": 0.915816068649292, "learning_rate": 9.989394060719026e-06, "loss": 0.8574, "step": 1055 }, { "epoch": 0.150641940085592, "grad_norm": 0.7998825907707214, "learning_rate": 9.989343873000257e-06, "loss": 0.9059, "step": 1056 }, { "epoch": 0.15078459343794579, "grad_norm": 0.6868506669998169, "learning_rate": 9.98929356694316e-06, "loss": 0.9177, "step": 1057 }, { "epoch": 0.15092724679029956, "grad_norm": 0.9999756813049316, "learning_rate": 9.98924314254893e-06, "loss": 0.9093, "step": 1058 }, { "epoch": 0.15106990014265334, "grad_norm": 0.7811575531959534, "learning_rate": 9.98919259981876e-06, "loss": 0.8138, "step": 1059 }, { "epoch": 0.15121255349500715, "grad_norm": 0.911644697189331, "learning_rate": 9.989141938753852e-06, "loss": 0.8323, "step": 1060 }, { "epoch": 0.15135520684736092, "grad_norm": 0.6191381812095642, "learning_rate": 9.989091159355406e-06, "loss": 0.8473, "step": 1061 }, { "epoch": 0.1514978601997147, "grad_norm": 0.7394691705703735, "learning_rate": 9.989040261624628e-06, "loss": 0.7634, "step": 1062 }, { "epoch": 0.15164051355206848, "grad_norm": 0.9278765916824341, "learning_rate": 9.988989245562722e-06, "loss": 0.8995, "step": 1063 }, { "epoch": 0.15178316690442226, "grad_norm": 0.7579810619354248, "learning_rate": 9.988938111170902e-06, "loss": 0.8339, "step": 1064 }, { "epoch": 0.15192582025677603, "grad_norm": 0.8946094512939453, "learning_rate": 9.988886858450377e-06, "loss": 0.8315, "step": 1065 }, { "epoch": 0.1520684736091298, "grad_norm": 0.678996205329895, "learning_rate": 9.988835487402366e-06, "loss": 0.8308, "step": 1066 }, { "epoch": 0.1522111269614836, "grad_norm": 0.724155604839325, "learning_rate": 9.988783998028085e-06, "loss": 0.8547, "step": 1067 }, { "epoch": 0.15235378031383737, "grad_norm": 1.0426677465438843, "learning_rate": 9.988732390328754e-06, "loss": 0.9468, "step": 1068 }, { "epoch": 0.15249643366619114, "grad_norm": 0.7471747994422913, "learning_rate": 9.988680664305603e-06, "loss": 0.813, "step": 1069 }, { "epoch": 0.15263908701854492, "grad_norm": 0.9394763112068176, "learning_rate": 9.988628819959853e-06, "loss": 0.8757, "step": 1070 }, { "epoch": 0.15278174037089873, "grad_norm": 0.6399518251419067, "learning_rate": 9.988576857292736e-06, "loss": 0.884, "step": 1071 }, { "epoch": 0.1529243937232525, "grad_norm": 0.8174779415130615, "learning_rate": 9.988524776305483e-06, "loss": 0.8657, "step": 1072 }, { "epoch": 0.15306704707560628, "grad_norm": 0.6559473872184753, "learning_rate": 9.988472576999331e-06, "loss": 0.8526, "step": 1073 }, { "epoch": 0.15320970042796006, "grad_norm": 0.7370951771736145, "learning_rate": 9.988420259375518e-06, "loss": 0.8755, "step": 1074 }, { "epoch": 0.15335235378031384, "grad_norm": 0.8102855682373047, "learning_rate": 9.988367823435282e-06, "loss": 0.8673, "step": 1075 }, { "epoch": 0.15349500713266762, "grad_norm": 0.7813428044319153, "learning_rate": 9.98831526917987e-06, "loss": 0.9025, "step": 1076 }, { "epoch": 0.1536376604850214, "grad_norm": 0.8604279160499573, "learning_rate": 9.98826259661053e-06, "loss": 0.8536, "step": 1077 }, { "epoch": 0.15378031383737517, "grad_norm": 0.8180389404296875, "learning_rate": 9.988209805728505e-06, "loss": 0.8953, "step": 1078 }, { "epoch": 0.15392296718972895, "grad_norm": 1.1215965747833252, "learning_rate": 9.988156896535055e-06, "loss": 0.89, "step": 1079 }, { "epoch": 0.15406562054208273, "grad_norm": 0.8152418732643127, "learning_rate": 9.988103869031428e-06, "loss": 0.8455, "step": 1080 }, { "epoch": 0.15420827389443653, "grad_norm": 0.6798477172851562, "learning_rate": 9.988050723218885e-06, "loss": 0.9168, "step": 1081 }, { "epoch": 0.1543509272467903, "grad_norm": 0.7677843570709229, "learning_rate": 9.987997459098686e-06, "loss": 0.8213, "step": 1082 }, { "epoch": 0.1544935805991441, "grad_norm": 0.7342092990875244, "learning_rate": 9.987944076672096e-06, "loss": 0.9088, "step": 1083 }, { "epoch": 0.15463623395149786, "grad_norm": 0.8838322162628174, "learning_rate": 9.987890575940378e-06, "loss": 0.8295, "step": 1084 }, { "epoch": 0.15477888730385164, "grad_norm": 1.0102605819702148, "learning_rate": 9.987836956904802e-06, "loss": 0.9076, "step": 1085 }, { "epoch": 0.15492154065620542, "grad_norm": 0.6762107610702515, "learning_rate": 9.98778321956664e-06, "loss": 0.7223, "step": 1086 }, { "epoch": 0.1550641940085592, "grad_norm": 0.7655817866325378, "learning_rate": 9.987729363927167e-06, "loss": 0.8291, "step": 1087 }, { "epoch": 0.15520684736091298, "grad_norm": 0.7236725687980652, "learning_rate": 9.987675389987661e-06, "loss": 0.7924, "step": 1088 }, { "epoch": 0.15534950071326675, "grad_norm": 0.7709295749664307, "learning_rate": 9.987621297749401e-06, "loss": 0.7729, "step": 1089 }, { "epoch": 0.15549215406562053, "grad_norm": 0.9965909123420715, "learning_rate": 9.98756708721367e-06, "loss": 0.832, "step": 1090 }, { "epoch": 0.15563480741797434, "grad_norm": 0.7601058483123779, "learning_rate": 9.987512758381754e-06, "loss": 0.8576, "step": 1091 }, { "epoch": 0.1557774607703281, "grad_norm": 0.6895081996917725, "learning_rate": 9.987458311254942e-06, "loss": 0.8564, "step": 1092 }, { "epoch": 0.1559201141226819, "grad_norm": 0.9341307878494263, "learning_rate": 9.987403745834525e-06, "loss": 0.8855, "step": 1093 }, { "epoch": 0.15606276747503567, "grad_norm": 0.9359080791473389, "learning_rate": 9.987349062121797e-06, "loss": 0.9022, "step": 1094 }, { "epoch": 0.15620542082738945, "grad_norm": 0.8535083532333374, "learning_rate": 9.987294260118055e-06, "loss": 0.8289, "step": 1095 }, { "epoch": 0.15634807417974322, "grad_norm": 0.711827278137207, "learning_rate": 9.987239339824599e-06, "loss": 0.8588, "step": 1096 }, { "epoch": 0.156490727532097, "grad_norm": 1.1219594478607178, "learning_rate": 9.98718430124273e-06, "loss": 0.8032, "step": 1097 }, { "epoch": 0.15663338088445078, "grad_norm": 1.6010338068008423, "learning_rate": 9.987129144373758e-06, "loss": 0.7201, "step": 1098 }, { "epoch": 0.15677603423680456, "grad_norm": 0.6423503756523132, "learning_rate": 9.987073869218986e-06, "loss": 0.8264, "step": 1099 }, { "epoch": 0.15691868758915833, "grad_norm": 0.7870016694068909, "learning_rate": 9.987018475779728e-06, "loss": 0.8288, "step": 1100 }, { "epoch": 0.1570613409415121, "grad_norm": 0.9856181144714355, "learning_rate": 9.986962964057297e-06, "loss": 0.7654, "step": 1101 }, { "epoch": 0.15720399429386592, "grad_norm": 0.7096058130264282, "learning_rate": 9.986907334053011e-06, "loss": 0.8829, "step": 1102 }, { "epoch": 0.1573466476462197, "grad_norm": 0.8059448003768921, "learning_rate": 9.986851585768186e-06, "loss": 0.9302, "step": 1103 }, { "epoch": 0.15748930099857347, "grad_norm": 0.7682087421417236, "learning_rate": 9.986795719204148e-06, "loss": 0.9289, "step": 1104 }, { "epoch": 0.15763195435092725, "grad_norm": 0.9392029047012329, "learning_rate": 9.986739734362221e-06, "loss": 0.7944, "step": 1105 }, { "epoch": 0.15777460770328103, "grad_norm": 0.8738077282905579, "learning_rate": 9.98668363124373e-06, "loss": 0.8127, "step": 1106 }, { "epoch": 0.1579172610556348, "grad_norm": 0.8040480613708496, "learning_rate": 9.98662740985001e-06, "loss": 0.8424, "step": 1107 }, { "epoch": 0.15805991440798858, "grad_norm": 0.7068692445755005, "learning_rate": 9.986571070182391e-06, "loss": 0.9402, "step": 1108 }, { "epoch": 0.15820256776034236, "grad_norm": 0.8845683932304382, "learning_rate": 9.986514612242213e-06, "loss": 0.8926, "step": 1109 }, { "epoch": 0.15834522111269614, "grad_norm": 0.6887304186820984, "learning_rate": 9.98645803603081e-06, "loss": 0.907, "step": 1110 }, { "epoch": 0.15848787446504992, "grad_norm": 0.6196120381355286, "learning_rate": 9.986401341549528e-06, "loss": 0.9172, "step": 1111 }, { "epoch": 0.15863052781740372, "grad_norm": 0.7921424508094788, "learning_rate": 9.986344528799711e-06, "loss": 0.9042, "step": 1112 }, { "epoch": 0.1587731811697575, "grad_norm": 0.7048509120941162, "learning_rate": 9.986287597782707e-06, "loss": 0.7891, "step": 1113 }, { "epoch": 0.15891583452211128, "grad_norm": 0.7620043158531189, "learning_rate": 9.986230548499863e-06, "loss": 0.8985, "step": 1114 }, { "epoch": 0.15905848787446505, "grad_norm": 0.9016017913818359, "learning_rate": 9.986173380952535e-06, "loss": 0.8309, "step": 1115 }, { "epoch": 0.15920114122681883, "grad_norm": 0.6812255382537842, "learning_rate": 9.986116095142078e-06, "loss": 0.8712, "step": 1116 }, { "epoch": 0.1593437945791726, "grad_norm": 0.6163586974143982, "learning_rate": 9.98605869106985e-06, "loss": 0.9178, "step": 1117 }, { "epoch": 0.1594864479315264, "grad_norm": 0.5008699297904968, "learning_rate": 9.986001168737217e-06, "loss": 0.8856, "step": 1118 }, { "epoch": 0.15962910128388016, "grad_norm": 0.851180911064148, "learning_rate": 9.985943528145537e-06, "loss": 0.8541, "step": 1119 }, { "epoch": 0.15977175463623394, "grad_norm": 0.7396800518035889, "learning_rate": 9.98588576929618e-06, "loss": 0.8499, "step": 1120 }, { "epoch": 0.15991440798858772, "grad_norm": 0.7640107870101929, "learning_rate": 9.985827892190516e-06, "loss": 0.8381, "step": 1121 }, { "epoch": 0.16005706134094153, "grad_norm": 0.8326088786125183, "learning_rate": 9.985769896829918e-06, "loss": 0.8361, "step": 1122 }, { "epoch": 0.1601997146932953, "grad_norm": 0.8211906552314758, "learning_rate": 9.98571178321576e-06, "loss": 0.8448, "step": 1123 }, { "epoch": 0.16034236804564908, "grad_norm": 0.8785040974617004, "learning_rate": 9.985653551349425e-06, "loss": 0.779, "step": 1124 }, { "epoch": 0.16048502139800286, "grad_norm": 0.8220649361610413, "learning_rate": 9.985595201232288e-06, "loss": 0.9088, "step": 1125 }, { "epoch": 0.16062767475035664, "grad_norm": 0.7736160159111023, "learning_rate": 9.985536732865735e-06, "loss": 0.8144, "step": 1126 }, { "epoch": 0.1607703281027104, "grad_norm": 1.540403962135315, "learning_rate": 9.985478146251156e-06, "loss": 0.7456, "step": 1127 }, { "epoch": 0.1609129814550642, "grad_norm": 0.7961999177932739, "learning_rate": 9.985419441389936e-06, "loss": 0.7858, "step": 1128 }, { "epoch": 0.16105563480741797, "grad_norm": 0.7917633652687073, "learning_rate": 9.985360618283468e-06, "loss": 0.868, "step": 1129 }, { "epoch": 0.16119828815977175, "grad_norm": 0.9145285487174988, "learning_rate": 9.985301676933151e-06, "loss": 0.843, "step": 1130 }, { "epoch": 0.16134094151212552, "grad_norm": 0.6547228097915649, "learning_rate": 9.98524261734038e-06, "loss": 0.8247, "step": 1131 }, { "epoch": 0.1614835948644793, "grad_norm": 0.7892211675643921, "learning_rate": 9.985183439506556e-06, "loss": 0.8616, "step": 1132 }, { "epoch": 0.1616262482168331, "grad_norm": 0.9836748838424683, "learning_rate": 9.985124143433082e-06, "loss": 0.8685, "step": 1133 }, { "epoch": 0.16176890156918688, "grad_norm": 0.8149711489677429, "learning_rate": 9.985064729121367e-06, "loss": 0.8477, "step": 1134 }, { "epoch": 0.16191155492154066, "grad_norm": 0.6759414076805115, "learning_rate": 9.985005196572817e-06, "loss": 0.8915, "step": 1135 }, { "epoch": 0.16205420827389444, "grad_norm": 0.6080157160758972, "learning_rate": 9.984945545788847e-06, "loss": 0.8656, "step": 1136 }, { "epoch": 0.16219686162624822, "grad_norm": 0.7513832449913025, "learning_rate": 9.984885776770869e-06, "loss": 0.9318, "step": 1137 }, { "epoch": 0.162339514978602, "grad_norm": 0.9032502770423889, "learning_rate": 9.984825889520303e-06, "loss": 0.865, "step": 1138 }, { "epoch": 0.16248216833095577, "grad_norm": 0.6458960771560669, "learning_rate": 9.984765884038567e-06, "loss": 0.8007, "step": 1139 }, { "epoch": 0.16262482168330955, "grad_norm": 0.8249204158782959, "learning_rate": 9.984705760327085e-06, "loss": 0.8908, "step": 1140 }, { "epoch": 0.16276747503566333, "grad_norm": 0.8001787662506104, "learning_rate": 9.984645518387287e-06, "loss": 0.8545, "step": 1141 }, { "epoch": 0.1629101283880171, "grad_norm": 0.8849978446960449, "learning_rate": 9.984585158220594e-06, "loss": 0.8599, "step": 1142 }, { "epoch": 0.1630527817403709, "grad_norm": 0.7225953936576843, "learning_rate": 9.984524679828445e-06, "loss": 0.8637, "step": 1143 }, { "epoch": 0.1631954350927247, "grad_norm": 0.5402420163154602, "learning_rate": 9.98446408321227e-06, "loss": 0.8936, "step": 1144 }, { "epoch": 0.16333808844507847, "grad_norm": 0.7665300369262695, "learning_rate": 9.98440336837351e-06, "loss": 0.912, "step": 1145 }, { "epoch": 0.16348074179743224, "grad_norm": 0.9873577356338501, "learning_rate": 9.9843425353136e-06, "loss": 0.8323, "step": 1146 }, { "epoch": 0.16362339514978602, "grad_norm": 0.7201932072639465, "learning_rate": 9.984281584033988e-06, "loss": 0.8669, "step": 1147 }, { "epoch": 0.1637660485021398, "grad_norm": 0.6797975897789001, "learning_rate": 9.984220514536115e-06, "loss": 0.8417, "step": 1148 }, { "epoch": 0.16390870185449358, "grad_norm": 0.8092601895332336, "learning_rate": 9.984159326821433e-06, "loss": 0.8525, "step": 1149 }, { "epoch": 0.16405135520684735, "grad_norm": 0.7208330035209656, "learning_rate": 9.984098020891391e-06, "loss": 0.8427, "step": 1150 }, { "epoch": 0.16419400855920113, "grad_norm": 0.8303775787353516, "learning_rate": 9.984036596747445e-06, "loss": 0.8823, "step": 1151 }, { "epoch": 0.1643366619115549, "grad_norm": 0.7441378831863403, "learning_rate": 9.983975054391051e-06, "loss": 0.8433, "step": 1152 }, { "epoch": 0.16447931526390872, "grad_norm": 0.9276571273803711, "learning_rate": 9.98391339382367e-06, "loss": 0.8364, "step": 1153 }, { "epoch": 0.1646219686162625, "grad_norm": 0.7798731923103333, "learning_rate": 9.98385161504676e-06, "loss": 0.8763, "step": 1154 }, { "epoch": 0.16476462196861627, "grad_norm": 1.1109129190444946, "learning_rate": 9.983789718061792e-06, "loss": 0.8132, "step": 1155 }, { "epoch": 0.16490727532097005, "grad_norm": 1.026613712310791, "learning_rate": 9.98372770287023e-06, "loss": 0.7514, "step": 1156 }, { "epoch": 0.16504992867332383, "grad_norm": 0.674289882183075, "learning_rate": 9.983665569473547e-06, "loss": 0.8774, "step": 1157 }, { "epoch": 0.1651925820256776, "grad_norm": 0.9314978718757629, "learning_rate": 9.983603317873217e-06, "loss": 0.8595, "step": 1158 }, { "epoch": 0.16533523537803138, "grad_norm": 0.7667367458343506, "learning_rate": 9.983540948070713e-06, "loss": 0.7934, "step": 1159 }, { "epoch": 0.16547788873038516, "grad_norm": 0.7203590869903564, "learning_rate": 9.983478460067519e-06, "loss": 0.8936, "step": 1160 }, { "epoch": 0.16562054208273894, "grad_norm": 0.768721878528595, "learning_rate": 9.983415853865113e-06, "loss": 0.8781, "step": 1161 }, { "epoch": 0.16576319543509271, "grad_norm": 0.9745950698852539, "learning_rate": 9.983353129464983e-06, "loss": 0.8604, "step": 1162 }, { "epoch": 0.1659058487874465, "grad_norm": 0.8448209762573242, "learning_rate": 9.983290286868617e-06, "loss": 0.8615, "step": 1163 }, { "epoch": 0.1660485021398003, "grad_norm": 0.893592894077301, "learning_rate": 9.983227326077503e-06, "loss": 0.8517, "step": 1164 }, { "epoch": 0.16619115549215407, "grad_norm": 0.7238365411758423, "learning_rate": 9.983164247093134e-06, "loss": 0.851, "step": 1165 }, { "epoch": 0.16633380884450785, "grad_norm": 0.8795214891433716, "learning_rate": 9.983101049917007e-06, "loss": 0.8641, "step": 1166 }, { "epoch": 0.16647646219686163, "grad_norm": 0.6775270104408264, "learning_rate": 9.983037734550625e-06, "loss": 0.8664, "step": 1167 }, { "epoch": 0.1666191155492154, "grad_norm": 0.6358814239501953, "learning_rate": 9.982974300995483e-06, "loss": 0.8293, "step": 1168 }, { "epoch": 0.16676176890156919, "grad_norm": 0.75082927942276, "learning_rate": 9.982910749253091e-06, "loss": 0.8579, "step": 1169 }, { "epoch": 0.16690442225392296, "grad_norm": 1.2580338716506958, "learning_rate": 9.982847079324953e-06, "loss": 0.8421, "step": 1170 }, { "epoch": 0.16704707560627674, "grad_norm": 0.7623533010482788, "learning_rate": 9.98278329121258e-06, "loss": 0.9265, "step": 1171 }, { "epoch": 0.16718972895863052, "grad_norm": 0.7251095175743103, "learning_rate": 9.982719384917486e-06, "loss": 0.793, "step": 1172 }, { "epoch": 0.1673323823109843, "grad_norm": 0.7233574986457825, "learning_rate": 9.982655360441185e-06, "loss": 0.9149, "step": 1173 }, { "epoch": 0.1674750356633381, "grad_norm": 0.8108822703361511, "learning_rate": 9.982591217785195e-06, "loss": 0.8378, "step": 1174 }, { "epoch": 0.16761768901569188, "grad_norm": 0.8185499906539917, "learning_rate": 9.982526956951041e-06, "loss": 0.6309, "step": 1175 }, { "epoch": 0.16776034236804566, "grad_norm": 0.872106671333313, "learning_rate": 9.982462577940244e-06, "loss": 0.8082, "step": 1176 }, { "epoch": 0.16790299572039943, "grad_norm": 0.8142444491386414, "learning_rate": 9.982398080754331e-06, "loss": 0.8214, "step": 1177 }, { "epoch": 0.1680456490727532, "grad_norm": 0.7037302851676941, "learning_rate": 9.982333465394834e-06, "loss": 0.8348, "step": 1178 }, { "epoch": 0.168188302425107, "grad_norm": 0.7830038666725159, "learning_rate": 9.982268731863285e-06, "loss": 0.9163, "step": 1179 }, { "epoch": 0.16833095577746077, "grad_norm": 0.6541464328765869, "learning_rate": 9.982203880161216e-06, "loss": 0.8602, "step": 1180 }, { "epoch": 0.16847360912981454, "grad_norm": 0.6037623286247253, "learning_rate": 9.98213891029017e-06, "loss": 0.8497, "step": 1181 }, { "epoch": 0.16861626248216832, "grad_norm": 0.9497793316841125, "learning_rate": 9.982073822251685e-06, "loss": 0.8877, "step": 1182 }, { "epoch": 0.1687589158345221, "grad_norm": 0.9584270119667053, "learning_rate": 9.982008616047305e-06, "loss": 0.9903, "step": 1183 }, { "epoch": 0.1689015691868759, "grad_norm": 0.9811310768127441, "learning_rate": 9.981943291678578e-06, "loss": 0.8805, "step": 1184 }, { "epoch": 0.16904422253922968, "grad_norm": 0.7309616208076477, "learning_rate": 9.98187784914705e-06, "loss": 0.8533, "step": 1185 }, { "epoch": 0.16918687589158346, "grad_norm": 0.5927829146385193, "learning_rate": 9.981812288454278e-06, "loss": 0.83, "step": 1186 }, { "epoch": 0.16932952924393724, "grad_norm": 0.7374027371406555, "learning_rate": 9.981746609601815e-06, "loss": 0.8102, "step": 1187 }, { "epoch": 0.16947218259629102, "grad_norm": 0.8296719193458557, "learning_rate": 9.981680812591215e-06, "loss": 0.9044, "step": 1188 }, { "epoch": 0.1696148359486448, "grad_norm": 1.2708629369735718, "learning_rate": 9.981614897424046e-06, "loss": 0.8514, "step": 1189 }, { "epoch": 0.16975748930099857, "grad_norm": 0.6136476397514343, "learning_rate": 9.981548864101864e-06, "loss": 0.8612, "step": 1190 }, { "epoch": 0.16990014265335235, "grad_norm": 0.7049573659896851, "learning_rate": 9.98148271262624e-06, "loss": 0.8801, "step": 1191 }, { "epoch": 0.17004279600570613, "grad_norm": 0.8933731317520142, "learning_rate": 9.98141644299874e-06, "loss": 0.8331, "step": 1192 }, { "epoch": 0.1701854493580599, "grad_norm": 0.5952308177947998, "learning_rate": 9.98135005522094e-06, "loss": 0.8901, "step": 1193 }, { "epoch": 0.17032810271041368, "grad_norm": 1.0774075984954834, "learning_rate": 9.981283549294411e-06, "loss": 0.9354, "step": 1194 }, { "epoch": 0.1704707560627675, "grad_norm": 0.8799782991409302, "learning_rate": 9.981216925220731e-06, "loss": 0.8744, "step": 1195 }, { "epoch": 0.17061340941512126, "grad_norm": 0.7206964492797852, "learning_rate": 9.98115018300148e-06, "loss": 0.8734, "step": 1196 }, { "epoch": 0.17075606276747504, "grad_norm": 0.7009869813919067, "learning_rate": 9.981083322638243e-06, "loss": 0.8943, "step": 1197 }, { "epoch": 0.17089871611982882, "grad_norm": 0.7496351599693298, "learning_rate": 9.981016344132604e-06, "loss": 0.8585, "step": 1198 }, { "epoch": 0.1710413694721826, "grad_norm": 0.9334327578544617, "learning_rate": 9.98094924748615e-06, "loss": 0.8503, "step": 1199 }, { "epoch": 0.17118402282453637, "grad_norm": 0.7665506601333618, "learning_rate": 9.980882032700476e-06, "loss": 0.8383, "step": 1200 }, { "epoch": 0.17132667617689015, "grad_norm": 0.7536643147468567, "learning_rate": 9.980814699777173e-06, "loss": 0.882, "step": 1201 }, { "epoch": 0.17146932952924393, "grad_norm": 0.7478814125061035, "learning_rate": 9.980747248717841e-06, "loss": 0.8675, "step": 1202 }, { "epoch": 0.1716119828815977, "grad_norm": 0.7154534459114075, "learning_rate": 9.98067967952408e-06, "loss": 0.8799, "step": 1203 }, { "epoch": 0.17175463623395149, "grad_norm": 0.6845526695251465, "learning_rate": 9.980611992197488e-06, "loss": 0.8574, "step": 1204 }, { "epoch": 0.1718972895863053, "grad_norm": 0.7099165916442871, "learning_rate": 9.980544186739674e-06, "loss": 0.8832, "step": 1205 }, { "epoch": 0.17203994293865907, "grad_norm": 0.7951145172119141, "learning_rate": 9.980476263152249e-06, "loss": 0.845, "step": 1206 }, { "epoch": 0.17218259629101285, "grad_norm": 0.8383887410163879, "learning_rate": 9.980408221436817e-06, "loss": 0.782, "step": 1207 }, { "epoch": 0.17232524964336662, "grad_norm": 0.7200697660446167, "learning_rate": 9.980340061594995e-06, "loss": 0.8029, "step": 1208 }, { "epoch": 0.1724679029957204, "grad_norm": 0.5835481882095337, "learning_rate": 9.980271783628402e-06, "loss": 0.8718, "step": 1209 }, { "epoch": 0.17261055634807418, "grad_norm": 0.6431670188903809, "learning_rate": 9.980203387538656e-06, "loss": 0.867, "step": 1210 }, { "epoch": 0.17275320970042796, "grad_norm": 1.0457934141159058, "learning_rate": 9.980134873327378e-06, "loss": 0.8991, "step": 1211 }, { "epoch": 0.17289586305278173, "grad_norm": 0.7832580208778381, "learning_rate": 9.980066240996193e-06, "loss": 0.8591, "step": 1212 }, { "epoch": 0.1730385164051355, "grad_norm": 0.7906302809715271, "learning_rate": 9.979997490546732e-06, "loss": 0.8512, "step": 1213 }, { "epoch": 0.1731811697574893, "grad_norm": 1.0404938459396362, "learning_rate": 9.97992862198062e-06, "loss": 0.7457, "step": 1214 }, { "epoch": 0.1733238231098431, "grad_norm": 1.0030831098556519, "learning_rate": 9.979859635299498e-06, "loss": 0.847, "step": 1215 }, { "epoch": 0.17346647646219687, "grad_norm": 0.7839489579200745, "learning_rate": 9.979790530504995e-06, "loss": 0.8379, "step": 1216 }, { "epoch": 0.17360912981455065, "grad_norm": 0.9872682690620422, "learning_rate": 9.979721307598754e-06, "loss": 0.9056, "step": 1217 }, { "epoch": 0.17375178316690443, "grad_norm": 0.8990773558616638, "learning_rate": 9.979651966582415e-06, "loss": 0.7916, "step": 1218 }, { "epoch": 0.1738944365192582, "grad_norm": 0.7062201499938965, "learning_rate": 9.979582507457626e-06, "loss": 0.8749, "step": 1219 }, { "epoch": 0.17403708987161198, "grad_norm": 1.092830777168274, "learning_rate": 9.979512930226029e-06, "loss": 0.7154, "step": 1220 }, { "epoch": 0.17417974322396576, "grad_norm": 0.804801881313324, "learning_rate": 9.97944323488928e-06, "loss": 0.8728, "step": 1221 }, { "epoch": 0.17432239657631954, "grad_norm": 0.8164383172988892, "learning_rate": 9.979373421449026e-06, "loss": 0.8472, "step": 1222 }, { "epoch": 0.17446504992867332, "grad_norm": 1.0955476760864258, "learning_rate": 9.979303489906929e-06, "loss": 0.9077, "step": 1223 }, { "epoch": 0.1746077032810271, "grad_norm": 0.8254255652427673, "learning_rate": 9.979233440264644e-06, "loss": 0.8198, "step": 1224 }, { "epoch": 0.17475035663338087, "grad_norm": 1.022617220878601, "learning_rate": 9.979163272523833e-06, "loss": 0.7953, "step": 1225 }, { "epoch": 0.17489300998573468, "grad_norm": 0.8240652680397034, "learning_rate": 9.97909298668616e-06, "loss": 0.8761, "step": 1226 }, { "epoch": 0.17503566333808845, "grad_norm": 0.6608521342277527, "learning_rate": 9.979022582753293e-06, "loss": 0.89, "step": 1227 }, { "epoch": 0.17517831669044223, "grad_norm": 0.7590407133102417, "learning_rate": 9.978952060726901e-06, "loss": 0.8207, "step": 1228 }, { "epoch": 0.175320970042796, "grad_norm": 1.3209112882614136, "learning_rate": 9.978881420608657e-06, "loss": 0.891, "step": 1229 }, { "epoch": 0.1754636233951498, "grad_norm": 0.5832023620605469, "learning_rate": 9.978810662400238e-06, "loss": 0.8418, "step": 1230 }, { "epoch": 0.17560627674750356, "grad_norm": 0.7879243493080139, "learning_rate": 9.97873978610332e-06, "loss": 0.7728, "step": 1231 }, { "epoch": 0.17574893009985734, "grad_norm": 1.1560922861099243, "learning_rate": 9.978668791719584e-06, "loss": 0.8481, "step": 1232 }, { "epoch": 0.17589158345221112, "grad_norm": 0.8897849321365356, "learning_rate": 9.978597679250716e-06, "loss": 0.7139, "step": 1233 }, { "epoch": 0.1760342368045649, "grad_norm": 0.9120283722877502, "learning_rate": 9.9785264486984e-06, "loss": 0.8597, "step": 1234 }, { "epoch": 0.17617689015691868, "grad_norm": 0.8726806640625, "learning_rate": 9.978455100064328e-06, "loss": 0.8664, "step": 1235 }, { "epoch": 0.17631954350927248, "grad_norm": 0.7392376661300659, "learning_rate": 9.97838363335019e-06, "loss": 0.8109, "step": 1236 }, { "epoch": 0.17646219686162626, "grad_norm": 0.8633730411529541, "learning_rate": 9.978312048557683e-06, "loss": 0.7605, "step": 1237 }, { "epoch": 0.17660485021398004, "grad_norm": 0.8165681958198547, "learning_rate": 9.978240345688503e-06, "loss": 0.9173, "step": 1238 }, { "epoch": 0.1767475035663338, "grad_norm": 0.9438103437423706, "learning_rate": 9.978168524744352e-06, "loss": 0.8134, "step": 1239 }, { "epoch": 0.1768901569186876, "grad_norm": 0.758353054523468, "learning_rate": 9.978096585726935e-06, "loss": 0.8101, "step": 1240 }, { "epoch": 0.17703281027104137, "grad_norm": 0.5737704634666443, "learning_rate": 9.978024528637954e-06, "loss": 0.8293, "step": 1241 }, { "epoch": 0.17717546362339515, "grad_norm": 0.6982561945915222, "learning_rate": 9.977952353479121e-06, "loss": 0.883, "step": 1242 }, { "epoch": 0.17731811697574892, "grad_norm": 0.7997564673423767, "learning_rate": 9.977880060252147e-06, "loss": 0.8539, "step": 1243 }, { "epoch": 0.1774607703281027, "grad_norm": 1.4520130157470703, "learning_rate": 9.977807648958748e-06, "loss": 0.848, "step": 1244 }, { "epoch": 0.17760342368045648, "grad_norm": 0.6465441584587097, "learning_rate": 9.97773511960064e-06, "loss": 0.8627, "step": 1245 }, { "epoch": 0.17774607703281028, "grad_norm": 0.7276219129562378, "learning_rate": 9.977662472179545e-06, "loss": 0.8171, "step": 1246 }, { "epoch": 0.17788873038516406, "grad_norm": 0.6480956673622131, "learning_rate": 9.977589706697183e-06, "loss": 0.8385, "step": 1247 }, { "epoch": 0.17803138373751784, "grad_norm": 0.8526870608329773, "learning_rate": 9.977516823155283e-06, "loss": 0.7951, "step": 1248 }, { "epoch": 0.17817403708987162, "grad_norm": 0.8076682686805725, "learning_rate": 9.977443821555571e-06, "loss": 0.793, "step": 1249 }, { "epoch": 0.1783166904422254, "grad_norm": 0.9861093759536743, "learning_rate": 9.977370701899781e-06, "loss": 0.793, "step": 1250 }, { "epoch": 0.17845934379457917, "grad_norm": 0.989619791507721, "learning_rate": 9.977297464189646e-06, "loss": 0.8507, "step": 1251 }, { "epoch": 0.17860199714693295, "grad_norm": 0.8125431537628174, "learning_rate": 9.977224108426901e-06, "loss": 0.8499, "step": 1252 }, { "epoch": 0.17874465049928673, "grad_norm": 0.8821991086006165, "learning_rate": 9.977150634613291e-06, "loss": 0.8917, "step": 1253 }, { "epoch": 0.1788873038516405, "grad_norm": 0.7301605939865112, "learning_rate": 9.977077042750554e-06, "loss": 0.88, "step": 1254 }, { "epoch": 0.17902995720399428, "grad_norm": 0.7050444483757019, "learning_rate": 9.977003332840437e-06, "loss": 0.8706, "step": 1255 }, { "epoch": 0.17917261055634806, "grad_norm": 0.873283863067627, "learning_rate": 9.97692950488469e-06, "loss": 0.7479, "step": 1256 }, { "epoch": 0.17931526390870187, "grad_norm": 0.8825015425682068, "learning_rate": 9.97685555888506e-06, "loss": 0.727, "step": 1257 }, { "epoch": 0.17945791726105564, "grad_norm": 0.7452796101570129, "learning_rate": 9.976781494843306e-06, "loss": 0.6812, "step": 1258 }, { "epoch": 0.17960057061340942, "grad_norm": 1.0616790056228638, "learning_rate": 9.976707312761181e-06, "loss": 0.8564, "step": 1259 }, { "epoch": 0.1797432239657632, "grad_norm": 0.7580656409263611, "learning_rate": 9.976633012640444e-06, "loss": 0.8373, "step": 1260 }, { "epoch": 0.17988587731811698, "grad_norm": 0.6261967420578003, "learning_rate": 9.97655859448286e-06, "loss": 0.8865, "step": 1261 }, { "epoch": 0.18002853067047075, "grad_norm": 0.9243199825286865, "learning_rate": 9.976484058290191e-06, "loss": 0.825, "step": 1262 }, { "epoch": 0.18017118402282453, "grad_norm": 0.8551122546195984, "learning_rate": 9.97640940406421e-06, "loss": 0.8567, "step": 1263 }, { "epoch": 0.1803138373751783, "grad_norm": 0.8754786252975464, "learning_rate": 9.976334631806684e-06, "loss": 0.8059, "step": 1264 }, { "epoch": 0.1804564907275321, "grad_norm": 0.8937332630157471, "learning_rate": 9.976259741519385e-06, "loss": 0.899, "step": 1265 }, { "epoch": 0.18059914407988586, "grad_norm": 0.9022212624549866, "learning_rate": 9.97618473320409e-06, "loss": 0.8596, "step": 1266 }, { "epoch": 0.18074179743223967, "grad_norm": 0.7678316831588745, "learning_rate": 9.976109606862581e-06, "loss": 0.8117, "step": 1267 }, { "epoch": 0.18088445078459345, "grad_norm": 0.8664880990982056, "learning_rate": 9.976034362496639e-06, "loss": 0.903, "step": 1268 }, { "epoch": 0.18102710413694723, "grad_norm": 0.9873309135437012, "learning_rate": 9.975959000108046e-06, "loss": 0.8767, "step": 1269 }, { "epoch": 0.181169757489301, "grad_norm": 0.983790934085846, "learning_rate": 9.975883519698592e-06, "loss": 0.8354, "step": 1270 }, { "epoch": 0.18131241084165478, "grad_norm": 0.7086114287376404, "learning_rate": 9.975807921270063e-06, "loss": 0.9251, "step": 1271 }, { "epoch": 0.18145506419400856, "grad_norm": 0.8843178153038025, "learning_rate": 9.97573220482426e-06, "loss": 0.8446, "step": 1272 }, { "epoch": 0.18159771754636234, "grad_norm": 0.95987868309021, "learning_rate": 9.975656370362972e-06, "loss": 0.875, "step": 1273 }, { "epoch": 0.1817403708987161, "grad_norm": 0.6299657821655273, "learning_rate": 9.975580417888e-06, "loss": 0.8612, "step": 1274 }, { "epoch": 0.1818830242510699, "grad_norm": 0.8124523758888245, "learning_rate": 9.975504347401147e-06, "loss": 0.7919, "step": 1275 }, { "epoch": 0.18202567760342367, "grad_norm": 0.6986494660377502, "learning_rate": 9.975428158904215e-06, "loss": 0.7638, "step": 1276 }, { "epoch": 0.18216833095577747, "grad_norm": 0.7109574675559998, "learning_rate": 9.975351852399011e-06, "loss": 0.7871, "step": 1277 }, { "epoch": 0.18231098430813125, "grad_norm": 0.7800731062889099, "learning_rate": 9.975275427887346e-06, "loss": 0.7934, "step": 1278 }, { "epoch": 0.18245363766048503, "grad_norm": 0.9133411645889282, "learning_rate": 9.97519888537103e-06, "loss": 0.8887, "step": 1279 }, { "epoch": 0.1825962910128388, "grad_norm": 0.7395647168159485, "learning_rate": 9.975122224851884e-06, "loss": 0.8354, "step": 1280 }, { "epoch": 0.18273894436519258, "grad_norm": 1.1085537672042847, "learning_rate": 9.975045446331723e-06, "loss": 0.8239, "step": 1281 }, { "epoch": 0.18288159771754636, "grad_norm": 0.7853597402572632, "learning_rate": 9.974968549812367e-06, "loss": 0.8743, "step": 1282 }, { "epoch": 0.18302425106990014, "grad_norm": 0.9312048554420471, "learning_rate": 9.97489153529564e-06, "loss": 0.8069, "step": 1283 }, { "epoch": 0.18316690442225392, "grad_norm": 0.7590904235839844, "learning_rate": 9.97481440278337e-06, "loss": 0.7983, "step": 1284 }, { "epoch": 0.1833095577746077, "grad_norm": 0.7187520861625671, "learning_rate": 9.974737152277385e-06, "loss": 0.8311, "step": 1285 }, { "epoch": 0.18345221112696147, "grad_norm": 0.6758371591567993, "learning_rate": 9.97465978377952e-06, "loss": 0.8674, "step": 1286 }, { "epoch": 0.18359486447931525, "grad_norm": 0.8807397484779358, "learning_rate": 9.974582297291607e-06, "loss": 0.941, "step": 1287 }, { "epoch": 0.18373751783166906, "grad_norm": 0.850386381149292, "learning_rate": 9.974504692815485e-06, "loss": 0.7757, "step": 1288 }, { "epoch": 0.18388017118402283, "grad_norm": 0.9141903519630432, "learning_rate": 9.974426970352996e-06, "loss": 0.8307, "step": 1289 }, { "epoch": 0.1840228245363766, "grad_norm": 0.645063579082489, "learning_rate": 9.97434912990598e-06, "loss": 0.937, "step": 1290 }, { "epoch": 0.1841654778887304, "grad_norm": 0.5873847603797913, "learning_rate": 9.974271171476287e-06, "loss": 0.9047, "step": 1291 }, { "epoch": 0.18430813124108417, "grad_norm": 0.9669590592384338, "learning_rate": 9.974193095065763e-06, "loss": 0.9448, "step": 1292 }, { "epoch": 0.18445078459343794, "grad_norm": 0.9505781531333923, "learning_rate": 9.974114900676262e-06, "loss": 0.9263, "step": 1293 }, { "epoch": 0.18459343794579172, "grad_norm": 0.8218867778778076, "learning_rate": 9.974036588309639e-06, "loss": 0.8104, "step": 1294 }, { "epoch": 0.1847360912981455, "grad_norm": 0.7694748044013977, "learning_rate": 9.97395815796775e-06, "loss": 0.8938, "step": 1295 }, { "epoch": 0.18487874465049928, "grad_norm": 0.6638206839561462, "learning_rate": 9.973879609652452e-06, "loss": 0.9507, "step": 1296 }, { "epoch": 0.18502139800285305, "grad_norm": 0.8577884435653687, "learning_rate": 9.973800943365616e-06, "loss": 0.9323, "step": 1297 }, { "epoch": 0.18516405135520686, "grad_norm": 0.6918472647666931, "learning_rate": 9.9737221591091e-06, "loss": 0.8634, "step": 1298 }, { "epoch": 0.18530670470756064, "grad_norm": 1.036751389503479, "learning_rate": 9.97364325688478e-06, "loss": 0.8545, "step": 1299 }, { "epoch": 0.18544935805991442, "grad_norm": 0.6787410378456116, "learning_rate": 9.97356423669452e-06, "loss": 0.7864, "step": 1300 }, { "epoch": 0.1855920114122682, "grad_norm": 0.6573331952095032, "learning_rate": 9.973485098540199e-06, "loss": 0.8529, "step": 1301 }, { "epoch": 0.18573466476462197, "grad_norm": 1.0073238611221313, "learning_rate": 9.973405842423691e-06, "loss": 0.7799, "step": 1302 }, { "epoch": 0.18587731811697575, "grad_norm": 0.9594725370407104, "learning_rate": 9.97332646834688e-06, "loss": 0.8208, "step": 1303 }, { "epoch": 0.18601997146932953, "grad_norm": 0.7488198280334473, "learning_rate": 9.973246976311643e-06, "loss": 0.8644, "step": 1304 }, { "epoch": 0.1861626248216833, "grad_norm": 0.7605316638946533, "learning_rate": 9.973167366319872e-06, "loss": 0.8014, "step": 1305 }, { "epoch": 0.18630527817403708, "grad_norm": 0.9765550494194031, "learning_rate": 9.973087638373449e-06, "loss": 0.8, "step": 1306 }, { "epoch": 0.18644793152639086, "grad_norm": 0.945305347442627, "learning_rate": 9.97300779247427e-06, "loss": 0.8289, "step": 1307 }, { "epoch": 0.18659058487874466, "grad_norm": 1.0048795938491821, "learning_rate": 9.972927828624224e-06, "loss": 0.8447, "step": 1308 }, { "epoch": 0.18673323823109844, "grad_norm": 0.7458885312080383, "learning_rate": 9.972847746825211e-06, "loss": 0.6681, "step": 1309 }, { "epoch": 0.18687589158345222, "grad_norm": 0.7100270986557007, "learning_rate": 9.97276754707913e-06, "loss": 0.8355, "step": 1310 }, { "epoch": 0.187018544935806, "grad_norm": 0.8413822650909424, "learning_rate": 9.972687229387882e-06, "loss": 0.8886, "step": 1311 }, { "epoch": 0.18716119828815977, "grad_norm": 0.8513115048408508, "learning_rate": 9.972606793753374e-06, "loss": 0.7423, "step": 1312 }, { "epoch": 0.18730385164051355, "grad_norm": 0.6329718232154846, "learning_rate": 9.972526240177511e-06, "loss": 0.8155, "step": 1313 }, { "epoch": 0.18744650499286733, "grad_norm": 0.8879488706588745, "learning_rate": 9.972445568662206e-06, "loss": 0.8847, "step": 1314 }, { "epoch": 0.1875891583452211, "grad_norm": 0.7198883891105652, "learning_rate": 9.972364779209372e-06, "loss": 0.6882, "step": 1315 }, { "epoch": 0.18773181169757489, "grad_norm": 0.6940578818321228, "learning_rate": 9.972283871820922e-06, "loss": 0.8243, "step": 1316 }, { "epoch": 0.18787446504992866, "grad_norm": 0.7445089221000671, "learning_rate": 9.972202846498779e-06, "loss": 0.9295, "step": 1317 }, { "epoch": 0.18801711840228244, "grad_norm": 0.7449865937232971, "learning_rate": 9.972121703244864e-06, "loss": 0.8018, "step": 1318 }, { "epoch": 0.18815977175463625, "grad_norm": 0.7003435492515564, "learning_rate": 9.972040442061101e-06, "loss": 0.8368, "step": 1319 }, { "epoch": 0.18830242510699002, "grad_norm": 0.8622342944145203, "learning_rate": 9.971959062949417e-06, "loss": 0.5611, "step": 1320 }, { "epoch": 0.1884450784593438, "grad_norm": 0.7828042507171631, "learning_rate": 9.971877565911742e-06, "loss": 0.8166, "step": 1321 }, { "epoch": 0.18858773181169758, "grad_norm": 0.7658116817474365, "learning_rate": 9.97179595095001e-06, "loss": 0.8256, "step": 1322 }, { "epoch": 0.18873038516405136, "grad_norm": 0.892788290977478, "learning_rate": 9.971714218066155e-06, "loss": 0.7364, "step": 1323 }, { "epoch": 0.18887303851640513, "grad_norm": 0.8484310507774353, "learning_rate": 9.97163236726212e-06, "loss": 0.7845, "step": 1324 }, { "epoch": 0.1890156918687589, "grad_norm": 0.6448402404785156, "learning_rate": 9.97155039853984e-06, "loss": 0.7648, "step": 1325 }, { "epoch": 0.1891583452211127, "grad_norm": 0.6692468523979187, "learning_rate": 9.971468311901262e-06, "loss": 0.8491, "step": 1326 }, { "epoch": 0.18930099857346647, "grad_norm": 0.6772980690002441, "learning_rate": 9.971386107348333e-06, "loss": 0.873, "step": 1327 }, { "epoch": 0.18944365192582024, "grad_norm": 0.790951669216156, "learning_rate": 9.971303784883006e-06, "loss": 0.8977, "step": 1328 }, { "epoch": 0.18958630527817405, "grad_norm": 0.7230355739593506, "learning_rate": 9.971221344507229e-06, "loss": 0.8403, "step": 1329 }, { "epoch": 0.18972895863052783, "grad_norm": 0.8040663003921509, "learning_rate": 9.971138786222959e-06, "loss": 0.8679, "step": 1330 }, { "epoch": 0.1898716119828816, "grad_norm": 0.8062121272087097, "learning_rate": 9.971056110032153e-06, "loss": 0.887, "step": 1331 }, { "epoch": 0.19001426533523538, "grad_norm": 0.7967968583106995, "learning_rate": 9.970973315936773e-06, "loss": 0.8603, "step": 1332 }, { "epoch": 0.19015691868758916, "grad_norm": 0.7940622568130493, "learning_rate": 9.970890403938783e-06, "loss": 0.8229, "step": 1333 }, { "epoch": 0.19029957203994294, "grad_norm": 0.7876176238059998, "learning_rate": 9.97080737404015e-06, "loss": 0.8662, "step": 1334 }, { "epoch": 0.19044222539229672, "grad_norm": 0.7090651988983154, "learning_rate": 9.970724226242842e-06, "loss": 0.7787, "step": 1335 }, { "epoch": 0.1905848787446505, "grad_norm": 0.8402417898178101, "learning_rate": 9.97064096054883e-06, "loss": 0.9188, "step": 1336 }, { "epoch": 0.19072753209700427, "grad_norm": 0.7670844197273254, "learning_rate": 9.970557576960093e-06, "loss": 0.8744, "step": 1337 }, { "epoch": 0.19087018544935805, "grad_norm": 0.8511255383491516, "learning_rate": 9.970474075478604e-06, "loss": 0.7396, "step": 1338 }, { "epoch": 0.19101283880171185, "grad_norm": 0.7267671227455139, "learning_rate": 9.970390456106349e-06, "loss": 0.8837, "step": 1339 }, { "epoch": 0.19115549215406563, "grad_norm": 0.8868377804756165, "learning_rate": 9.970306718845303e-06, "loss": 0.8495, "step": 1340 }, { "epoch": 0.1912981455064194, "grad_norm": 0.7415902018547058, "learning_rate": 9.970222863697462e-06, "loss": 0.8479, "step": 1341 }, { "epoch": 0.1914407988587732, "grad_norm": 0.8380827903747559, "learning_rate": 9.970138890664807e-06, "loss": 0.9269, "step": 1342 }, { "epoch": 0.19158345221112696, "grad_norm": 0.7889651656150818, "learning_rate": 9.970054799749334e-06, "loss": 0.8834, "step": 1343 }, { "epoch": 0.19172610556348074, "grad_norm": 0.8100181818008423, "learning_rate": 9.969970590953036e-06, "loss": 0.7753, "step": 1344 }, { "epoch": 0.19186875891583452, "grad_norm": 0.7502317428588867, "learning_rate": 9.969886264277911e-06, "loss": 0.8223, "step": 1345 }, { "epoch": 0.1920114122681883, "grad_norm": 0.6847222447395325, "learning_rate": 9.969801819725959e-06, "loss": 0.8619, "step": 1346 }, { "epoch": 0.19215406562054207, "grad_norm": 0.7199435830116272, "learning_rate": 9.96971725729918e-06, "loss": 0.8235, "step": 1347 }, { "epoch": 0.19229671897289585, "grad_norm": 0.9098808765411377, "learning_rate": 9.969632576999583e-06, "loss": 0.8578, "step": 1348 }, { "epoch": 0.19243937232524963, "grad_norm": 0.9622191786766052, "learning_rate": 9.969547778829177e-06, "loss": 0.8435, "step": 1349 }, { "epoch": 0.19258202567760344, "grad_norm": 1.048285961151123, "learning_rate": 9.969462862789972e-06, "loss": 0.8485, "step": 1350 }, { "epoch": 0.1927246790299572, "grad_norm": 0.9849112629890442, "learning_rate": 9.969377828883979e-06, "loss": 0.8363, "step": 1351 }, { "epoch": 0.192867332382311, "grad_norm": 0.828313410282135, "learning_rate": 9.96929267711322e-06, "loss": 0.7776, "step": 1352 }, { "epoch": 0.19300998573466477, "grad_norm": 0.676027774810791, "learning_rate": 9.969207407479713e-06, "loss": 0.8087, "step": 1353 }, { "epoch": 0.19315263908701855, "grad_norm": 0.7559381723403931, "learning_rate": 9.969122019985477e-06, "loss": 0.807, "step": 1354 }, { "epoch": 0.19329529243937232, "grad_norm": 0.8531244397163391, "learning_rate": 9.969036514632544e-06, "loss": 0.7793, "step": 1355 }, { "epoch": 0.1934379457917261, "grad_norm": 0.7253252267837524, "learning_rate": 9.968950891422934e-06, "loss": 0.9126, "step": 1356 }, { "epoch": 0.19358059914407988, "grad_norm": 0.6927995085716248, "learning_rate": 9.968865150358685e-06, "loss": 0.897, "step": 1357 }, { "epoch": 0.19372325249643366, "grad_norm": 1.0707788467407227, "learning_rate": 9.968779291441827e-06, "loss": 0.8988, "step": 1358 }, { "epoch": 0.19386590584878743, "grad_norm": 1.0267117023468018, "learning_rate": 9.968693314674396e-06, "loss": 0.8216, "step": 1359 }, { "epoch": 0.19400855920114124, "grad_norm": 0.8915116190910339, "learning_rate": 9.968607220058434e-06, "loss": 0.9453, "step": 1360 }, { "epoch": 0.19415121255349502, "grad_norm": 0.7114331126213074, "learning_rate": 9.96852100759598e-06, "loss": 0.8684, "step": 1361 }, { "epoch": 0.1942938659058488, "grad_norm": 0.5982599854469299, "learning_rate": 9.968434677289078e-06, "loss": 0.9415, "step": 1362 }, { "epoch": 0.19443651925820257, "grad_norm": 0.7128475308418274, "learning_rate": 9.96834822913978e-06, "loss": 0.8216, "step": 1363 }, { "epoch": 0.19457917261055635, "grad_norm": 0.7566629648208618, "learning_rate": 9.968261663150133e-06, "loss": 0.8028, "step": 1364 }, { "epoch": 0.19472182596291013, "grad_norm": 0.9583125710487366, "learning_rate": 9.968174979322193e-06, "loss": 0.8147, "step": 1365 }, { "epoch": 0.1948644793152639, "grad_norm": 0.9913781881332397, "learning_rate": 9.968088177658013e-06, "loss": 0.8716, "step": 1366 }, { "epoch": 0.19500713266761768, "grad_norm": 0.7165933847427368, "learning_rate": 9.968001258159652e-06, "loss": 0.8776, "step": 1367 }, { "epoch": 0.19514978601997146, "grad_norm": 0.5812830328941345, "learning_rate": 9.967914220829173e-06, "loss": 0.8651, "step": 1368 }, { "epoch": 0.19529243937232524, "grad_norm": 0.7699722051620483, "learning_rate": 9.96782706566864e-06, "loss": 0.8094, "step": 1369 }, { "epoch": 0.19543509272467904, "grad_norm": 0.9172202348709106, "learning_rate": 9.967739792680122e-06, "loss": 0.9267, "step": 1370 }, { "epoch": 0.19557774607703282, "grad_norm": 0.6696454882621765, "learning_rate": 9.967652401865685e-06, "loss": 0.8736, "step": 1371 }, { "epoch": 0.1957203994293866, "grad_norm": 0.7624047994613647, "learning_rate": 9.967564893227404e-06, "loss": 0.7729, "step": 1372 }, { "epoch": 0.19586305278174038, "grad_norm": 0.6636433601379395, "learning_rate": 9.967477266767354e-06, "loss": 0.9278, "step": 1373 }, { "epoch": 0.19600570613409415, "grad_norm": 0.969912052154541, "learning_rate": 9.967389522487615e-06, "loss": 0.8405, "step": 1374 }, { "epoch": 0.19614835948644793, "grad_norm": 0.5688678026199341, "learning_rate": 9.967301660390267e-06, "loss": 0.8659, "step": 1375 }, { "epoch": 0.1962910128388017, "grad_norm": 1.0910042524337769, "learning_rate": 9.967213680477392e-06, "loss": 0.915, "step": 1376 }, { "epoch": 0.1964336661911555, "grad_norm": 0.6769224405288696, "learning_rate": 9.967125582751078e-06, "loss": 0.8701, "step": 1377 }, { "epoch": 0.19657631954350926, "grad_norm": 0.7530673146247864, "learning_rate": 9.967037367213417e-06, "loss": 0.8574, "step": 1378 }, { "epoch": 0.19671897289586304, "grad_norm": 0.9435679912567139, "learning_rate": 9.966949033866498e-06, "loss": 0.7807, "step": 1379 }, { "epoch": 0.19686162624821682, "grad_norm": 0.7873449921607971, "learning_rate": 9.966860582712418e-06, "loss": 0.835, "step": 1380 }, { "epoch": 0.19700427960057063, "grad_norm": 0.8070048093795776, "learning_rate": 9.966772013753275e-06, "loss": 0.8098, "step": 1381 }, { "epoch": 0.1971469329529244, "grad_norm": 0.7220003604888916, "learning_rate": 9.96668332699117e-06, "loss": 0.8181, "step": 1382 }, { "epoch": 0.19728958630527818, "grad_norm": 1.0374494791030884, "learning_rate": 9.966594522428203e-06, "loss": 0.8704, "step": 1383 }, { "epoch": 0.19743223965763196, "grad_norm": 0.9598643183708191, "learning_rate": 9.966505600066482e-06, "loss": 0.6479, "step": 1384 }, { "epoch": 0.19757489300998574, "grad_norm": 0.7491325736045837, "learning_rate": 9.96641655990812e-06, "loss": 0.7976, "step": 1385 }, { "epoch": 0.1977175463623395, "grad_norm": 0.8149396777153015, "learning_rate": 9.966327401955225e-06, "loss": 0.6355, "step": 1386 }, { "epoch": 0.1978601997146933, "grad_norm": 1.5392215251922607, "learning_rate": 9.96623812620991e-06, "loss": 0.6817, "step": 1387 }, { "epoch": 0.19800285306704707, "grad_norm": 1.1809272766113281, "learning_rate": 9.966148732674296e-06, "loss": 0.82, "step": 1388 }, { "epoch": 0.19814550641940085, "grad_norm": 0.83845055103302, "learning_rate": 9.966059221350504e-06, "loss": 0.8338, "step": 1389 }, { "epoch": 0.19828815977175462, "grad_norm": 1.3236448764801025, "learning_rate": 9.965969592240654e-06, "loss": 0.6959, "step": 1390 }, { "epoch": 0.19843081312410843, "grad_norm": 0.8994632363319397, "learning_rate": 9.965879845346874e-06, "loss": 0.7831, "step": 1391 }, { "epoch": 0.1985734664764622, "grad_norm": 0.6459245085716248, "learning_rate": 9.96578998067129e-06, "loss": 0.9102, "step": 1392 }, { "epoch": 0.19871611982881598, "grad_norm": 1.321109652519226, "learning_rate": 9.965699998216035e-06, "loss": 0.6065, "step": 1393 }, { "epoch": 0.19885877318116976, "grad_norm": 0.7358494400978088, "learning_rate": 9.965609897983244e-06, "loss": 0.6254, "step": 1394 }, { "epoch": 0.19900142653352354, "grad_norm": 0.7518607974052429, "learning_rate": 9.965519679975053e-06, "loss": 0.8559, "step": 1395 }, { "epoch": 0.19914407988587732, "grad_norm": 0.7181278467178345, "learning_rate": 9.965429344193604e-06, "loss": 0.9398, "step": 1396 }, { "epoch": 0.1992867332382311, "grad_norm": 0.9121167063713074, "learning_rate": 9.965338890641036e-06, "loss": 0.7441, "step": 1397 }, { "epoch": 0.19942938659058487, "grad_norm": 0.9675090312957764, "learning_rate": 9.965248319319495e-06, "loss": 0.8634, "step": 1398 }, { "epoch": 0.19957203994293865, "grad_norm": 0.7653679847717285, "learning_rate": 9.965157630231132e-06, "loss": 0.7931, "step": 1399 }, { "epoch": 0.19971469329529243, "grad_norm": 0.6233118772506714, "learning_rate": 9.965066823378096e-06, "loss": 0.8327, "step": 1400 }, { "epoch": 0.19985734664764623, "grad_norm": 0.9585648775100708, "learning_rate": 9.964975898762542e-06, "loss": 0.8547, "step": 1401 }, { "epoch": 0.2, "grad_norm": 0.9791272878646851, "learning_rate": 9.964884856386625e-06, "loss": 0.9014, "step": 1402 }, { "epoch": 0.2001426533523538, "grad_norm": 0.7262995839118958, "learning_rate": 9.964793696252505e-06, "loss": 0.7906, "step": 1403 }, { "epoch": 0.20028530670470757, "grad_norm": 0.6719980239868164, "learning_rate": 9.964702418362344e-06, "loss": 0.8392, "step": 1404 }, { "epoch": 0.20042796005706134, "grad_norm": 0.7025762796401978, "learning_rate": 9.964611022718307e-06, "loss": 0.866, "step": 1405 }, { "epoch": 0.20057061340941512, "grad_norm": 1.1558644771575928, "learning_rate": 9.964519509322562e-06, "loss": 0.8687, "step": 1406 }, { "epoch": 0.2007132667617689, "grad_norm": 1.01312255859375, "learning_rate": 9.96442787817728e-06, "loss": 0.8252, "step": 1407 }, { "epoch": 0.20085592011412268, "grad_norm": 0.7223882079124451, "learning_rate": 9.964336129284633e-06, "loss": 0.84, "step": 1408 }, { "epoch": 0.20099857346647645, "grad_norm": 0.9957675337791443, "learning_rate": 9.964244262646798e-06, "loss": 0.851, "step": 1409 }, { "epoch": 0.20114122681883023, "grad_norm": 0.6859878301620483, "learning_rate": 9.964152278265953e-06, "loss": 0.8173, "step": 1410 }, { "epoch": 0.201283880171184, "grad_norm": 1.0454378128051758, "learning_rate": 9.964060176144281e-06, "loss": 0.8231, "step": 1411 }, { "epoch": 0.20142653352353782, "grad_norm": 0.7367770671844482, "learning_rate": 9.963967956283966e-06, "loss": 0.788, "step": 1412 }, { "epoch": 0.2015691868758916, "grad_norm": 0.8545896410942078, "learning_rate": 9.963875618687196e-06, "loss": 0.8883, "step": 1413 }, { "epoch": 0.20171184022824537, "grad_norm": 0.8085616827011108, "learning_rate": 9.96378316335616e-06, "loss": 0.8329, "step": 1414 }, { "epoch": 0.20185449358059915, "grad_norm": 0.8312434554100037, "learning_rate": 9.963690590293051e-06, "loss": 0.8483, "step": 1415 }, { "epoch": 0.20199714693295293, "grad_norm": 0.769288182258606, "learning_rate": 9.963597899500066e-06, "loss": 0.8054, "step": 1416 }, { "epoch": 0.2021398002853067, "grad_norm": 0.7340685129165649, "learning_rate": 9.963505090979401e-06, "loss": 0.9286, "step": 1417 }, { "epoch": 0.20228245363766048, "grad_norm": 0.7468265891075134, "learning_rate": 9.96341216473326e-06, "loss": 0.8526, "step": 1418 }, { "epoch": 0.20242510699001426, "grad_norm": 0.8425663113594055, "learning_rate": 9.963319120763844e-06, "loss": 0.8726, "step": 1419 }, { "epoch": 0.20256776034236804, "grad_norm": 0.7844623327255249, "learning_rate": 9.963225959073361e-06, "loss": 0.8478, "step": 1420 }, { "epoch": 0.2027104136947218, "grad_norm": 0.7162002325057983, "learning_rate": 9.963132679664023e-06, "loss": 0.8952, "step": 1421 }, { "epoch": 0.20285306704707562, "grad_norm": 0.9700891971588135, "learning_rate": 9.963039282538039e-06, "loss": 0.8846, "step": 1422 }, { "epoch": 0.2029957203994294, "grad_norm": 0.9550398588180542, "learning_rate": 9.962945767697628e-06, "loss": 0.7338, "step": 1423 }, { "epoch": 0.20313837375178317, "grad_norm": 0.8337945342063904, "learning_rate": 9.962852135145002e-06, "loss": 0.8405, "step": 1424 }, { "epoch": 0.20328102710413695, "grad_norm": 0.7780858278274536, "learning_rate": 9.962758384882388e-06, "loss": 0.9113, "step": 1425 }, { "epoch": 0.20342368045649073, "grad_norm": 0.8404639363288879, "learning_rate": 9.962664516912005e-06, "loss": 0.6913, "step": 1426 }, { "epoch": 0.2035663338088445, "grad_norm": 0.7688642144203186, "learning_rate": 9.962570531236086e-06, "loss": 0.7987, "step": 1427 }, { "epoch": 0.20370898716119828, "grad_norm": 0.5986928939819336, "learning_rate": 9.962476427856853e-06, "loss": 0.8337, "step": 1428 }, { "epoch": 0.20385164051355206, "grad_norm": 0.835395097732544, "learning_rate": 9.96238220677654e-06, "loss": 0.8206, "step": 1429 }, { "epoch": 0.20399429386590584, "grad_norm": 0.6732452511787415, "learning_rate": 9.962287867997383e-06, "loss": 0.8274, "step": 1430 }, { "epoch": 0.20413694721825962, "grad_norm": 0.7343263626098633, "learning_rate": 9.96219341152162e-06, "loss": 0.8371, "step": 1431 }, { "epoch": 0.20427960057061342, "grad_norm": 1.052815318107605, "learning_rate": 9.962098837351488e-06, "loss": 0.8115, "step": 1432 }, { "epoch": 0.2044222539229672, "grad_norm": 0.7229878306388855, "learning_rate": 9.962004145489235e-06, "loss": 0.8195, "step": 1433 }, { "epoch": 0.20456490727532098, "grad_norm": 0.7342201471328735, "learning_rate": 9.961909335937102e-06, "loss": 0.877, "step": 1434 }, { "epoch": 0.20470756062767476, "grad_norm": 0.8799934983253479, "learning_rate": 9.961814408697341e-06, "loss": 0.8691, "step": 1435 }, { "epoch": 0.20485021398002853, "grad_norm": 0.9665185213088989, "learning_rate": 9.961719363772203e-06, "loss": 0.8859, "step": 1436 }, { "epoch": 0.2049928673323823, "grad_norm": 0.7919800281524658, "learning_rate": 9.961624201163942e-06, "loss": 0.8623, "step": 1437 }, { "epoch": 0.2051355206847361, "grad_norm": 0.5769456624984741, "learning_rate": 9.961528920874814e-06, "loss": 0.6311, "step": 1438 }, { "epoch": 0.20527817403708987, "grad_norm": 0.7586618065834045, "learning_rate": 9.96143352290708e-06, "loss": 0.8375, "step": 1439 }, { "epoch": 0.20542082738944364, "grad_norm": 0.8899186253547668, "learning_rate": 9.961338007263004e-06, "loss": 0.851, "step": 1440 }, { "epoch": 0.20556348074179742, "grad_norm": 0.8149307370185852, "learning_rate": 9.961242373944848e-06, "loss": 0.8635, "step": 1441 }, { "epoch": 0.2057061340941512, "grad_norm": 0.8376900553703308, "learning_rate": 9.961146622954883e-06, "loss": 0.8212, "step": 1442 }, { "epoch": 0.205848787446505, "grad_norm": 0.757046639919281, "learning_rate": 9.961050754295379e-06, "loss": 0.9017, "step": 1443 }, { "epoch": 0.20599144079885878, "grad_norm": 0.992950439453125, "learning_rate": 9.960954767968611e-06, "loss": 0.8446, "step": 1444 }, { "epoch": 0.20613409415121256, "grad_norm": 0.8884807229042053, "learning_rate": 9.960858663976854e-06, "loss": 0.7902, "step": 1445 }, { "epoch": 0.20627674750356634, "grad_norm": 1.0772405862808228, "learning_rate": 9.960762442322386e-06, "loss": 0.8405, "step": 1446 }, { "epoch": 0.20641940085592012, "grad_norm": 0.7249343395233154, "learning_rate": 9.960666103007495e-06, "loss": 0.8426, "step": 1447 }, { "epoch": 0.2065620542082739, "grad_norm": 1.0311321020126343, "learning_rate": 9.960569646034461e-06, "loss": 0.8344, "step": 1448 }, { "epoch": 0.20670470756062767, "grad_norm": 0.8216068744659424, "learning_rate": 9.960473071405574e-06, "loss": 0.8188, "step": 1449 }, { "epoch": 0.20684736091298145, "grad_norm": 0.920533299446106, "learning_rate": 9.960376379123123e-06, "loss": 0.8147, "step": 1450 }, { "epoch": 0.20699001426533523, "grad_norm": 0.7541977167129517, "learning_rate": 9.9602795691894e-06, "loss": 0.8744, "step": 1451 }, { "epoch": 0.207132667617689, "grad_norm": 0.9311749339103699, "learning_rate": 9.960182641606704e-06, "loss": 0.831, "step": 1452 }, { "epoch": 0.2072753209700428, "grad_norm": 0.6928288340568542, "learning_rate": 9.960085596377335e-06, "loss": 0.8744, "step": 1453 }, { "epoch": 0.2074179743223966, "grad_norm": 0.8880890607833862, "learning_rate": 9.959988433503592e-06, "loss": 0.8748, "step": 1454 }, { "epoch": 0.20756062767475036, "grad_norm": 0.7945384979248047, "learning_rate": 9.959891152987779e-06, "loss": 0.8836, "step": 1455 }, { "epoch": 0.20770328102710414, "grad_norm": 0.8306334018707275, "learning_rate": 9.959793754832208e-06, "loss": 0.8725, "step": 1456 }, { "epoch": 0.20784593437945792, "grad_norm": 0.8966955542564392, "learning_rate": 9.959696239039182e-06, "loss": 0.8302, "step": 1457 }, { "epoch": 0.2079885877318117, "grad_norm": 0.7303590774536133, "learning_rate": 9.95959860561102e-06, "loss": 0.8507, "step": 1458 }, { "epoch": 0.20813124108416547, "grad_norm": 0.7616184949874878, "learning_rate": 9.959500854550036e-06, "loss": 0.8586, "step": 1459 }, { "epoch": 0.20827389443651925, "grad_norm": 0.6741976141929626, "learning_rate": 9.959402985858548e-06, "loss": 0.9009, "step": 1460 }, { "epoch": 0.20841654778887303, "grad_norm": 0.7569184899330139, "learning_rate": 9.959304999538875e-06, "loss": 0.8256, "step": 1461 }, { "epoch": 0.2085592011412268, "grad_norm": 0.7997366189956665, "learning_rate": 9.959206895593345e-06, "loss": 0.8549, "step": 1462 }, { "epoch": 0.20870185449358059, "grad_norm": 0.6974430084228516, "learning_rate": 9.959108674024282e-06, "loss": 0.8359, "step": 1463 }, { "epoch": 0.2088445078459344, "grad_norm": 0.6249332427978516, "learning_rate": 9.959010334834018e-06, "loss": 0.7799, "step": 1464 }, { "epoch": 0.20898716119828817, "grad_norm": 0.8389965891838074, "learning_rate": 9.958911878024884e-06, "loss": 0.8686, "step": 1465 }, { "epoch": 0.20912981455064195, "grad_norm": 0.9963505268096924, "learning_rate": 9.958813303599215e-06, "loss": 0.8453, "step": 1466 }, { "epoch": 0.20927246790299572, "grad_norm": 0.7172727584838867, "learning_rate": 9.95871461155935e-06, "loss": 0.836, "step": 1467 }, { "epoch": 0.2094151212553495, "grad_norm": 0.9143319129943848, "learning_rate": 9.958615801907629e-06, "loss": 0.8144, "step": 1468 }, { "epoch": 0.20955777460770328, "grad_norm": 0.8972535133361816, "learning_rate": 9.958516874646396e-06, "loss": 0.7733, "step": 1469 }, { "epoch": 0.20970042796005706, "grad_norm": 0.9460464715957642, "learning_rate": 9.958417829777996e-06, "loss": 0.8378, "step": 1470 }, { "epoch": 0.20984308131241083, "grad_norm": 0.8458881974220276, "learning_rate": 9.95831866730478e-06, "loss": 0.8638, "step": 1471 }, { "epoch": 0.2099857346647646, "grad_norm": 0.7687568664550781, "learning_rate": 9.958219387229099e-06, "loss": 0.7991, "step": 1472 }, { "epoch": 0.2101283880171184, "grad_norm": 0.6833989024162292, "learning_rate": 9.958119989553309e-06, "loss": 0.8146, "step": 1473 }, { "epoch": 0.2102710413694722, "grad_norm": 0.9045029282569885, "learning_rate": 9.958020474279766e-06, "loss": 0.7656, "step": 1474 }, { "epoch": 0.21041369472182597, "grad_norm": 0.8134523630142212, "learning_rate": 9.95792084141083e-06, "loss": 0.7935, "step": 1475 }, { "epoch": 0.21055634807417975, "grad_norm": 0.8164616227149963, "learning_rate": 9.957821090948867e-06, "loss": 0.8838, "step": 1476 }, { "epoch": 0.21069900142653353, "grad_norm": 0.8561461567878723, "learning_rate": 9.95772122289624e-06, "loss": 0.7915, "step": 1477 }, { "epoch": 0.2108416547788873, "grad_norm": 0.897421658039093, "learning_rate": 9.957621237255318e-06, "loss": 0.8708, "step": 1478 }, { "epoch": 0.21098430813124108, "grad_norm": 0.9836632609367371, "learning_rate": 9.957521134028474e-06, "loss": 0.8615, "step": 1479 }, { "epoch": 0.21112696148359486, "grad_norm": 0.9291009306907654, "learning_rate": 9.95742091321808e-06, "loss": 0.8834, "step": 1480 }, { "epoch": 0.21126961483594864, "grad_norm": 0.7535540461540222, "learning_rate": 9.957320574826516e-06, "loss": 0.8303, "step": 1481 }, { "epoch": 0.21141226818830242, "grad_norm": 0.8194931745529175, "learning_rate": 9.95722011885616e-06, "loss": 0.7933, "step": 1482 }, { "epoch": 0.2115549215406562, "grad_norm": 1.0039008855819702, "learning_rate": 9.957119545309393e-06, "loss": 0.9125, "step": 1483 }, { "epoch": 0.21169757489301, "grad_norm": 0.6903035640716553, "learning_rate": 9.957018854188604e-06, "loss": 0.8154, "step": 1484 }, { "epoch": 0.21184022824536378, "grad_norm": 0.8587132692337036, "learning_rate": 9.95691804549618e-06, "loss": 0.8082, "step": 1485 }, { "epoch": 0.21198288159771755, "grad_norm": 0.8282580971717834, "learning_rate": 9.956817119234511e-06, "loss": 0.858, "step": 1486 }, { "epoch": 0.21212553495007133, "grad_norm": 0.7019696235656738, "learning_rate": 9.956716075405993e-06, "loss": 0.748, "step": 1487 }, { "epoch": 0.2122681883024251, "grad_norm": 0.5958597660064697, "learning_rate": 9.956614914013018e-06, "loss": 0.8647, "step": 1488 }, { "epoch": 0.2124108416547789, "grad_norm": 1.059517502784729, "learning_rate": 9.95651363505799e-06, "loss": 0.7933, "step": 1489 }, { "epoch": 0.21255349500713266, "grad_norm": 0.877007007598877, "learning_rate": 9.956412238543309e-06, "loss": 0.8232, "step": 1490 }, { "epoch": 0.21269614835948644, "grad_norm": 0.6188616156578064, "learning_rate": 9.95631072447138e-06, "loss": 0.8703, "step": 1491 }, { "epoch": 0.21283880171184022, "grad_norm": 0.9746018648147583, "learning_rate": 9.95620909284461e-06, "loss": 0.802, "step": 1492 }, { "epoch": 0.212981455064194, "grad_norm": 0.7111904621124268, "learning_rate": 9.956107343665412e-06, "loss": 0.8449, "step": 1493 }, { "epoch": 0.21312410841654778, "grad_norm": 0.7560496926307678, "learning_rate": 9.956005476936198e-06, "loss": 0.8304, "step": 1494 }, { "epoch": 0.21326676176890158, "grad_norm": 0.9106425642967224, "learning_rate": 9.955903492659386e-06, "loss": 0.8314, "step": 1495 }, { "epoch": 0.21340941512125536, "grad_norm": 0.7547789216041565, "learning_rate": 9.95580139083739e-06, "loss": 0.7854, "step": 1496 }, { "epoch": 0.21355206847360914, "grad_norm": 0.8750001192092896, "learning_rate": 9.955699171472637e-06, "loss": 0.7986, "step": 1497 }, { "epoch": 0.2136947218259629, "grad_norm": 0.8077685236930847, "learning_rate": 9.955596834567547e-06, "loss": 0.8956, "step": 1498 }, { "epoch": 0.2138373751783167, "grad_norm": 0.8945642113685608, "learning_rate": 9.955494380124551e-06, "loss": 0.827, "step": 1499 }, { "epoch": 0.21398002853067047, "grad_norm": 0.706613302230835, "learning_rate": 9.955391808146075e-06, "loss": 0.7894, "step": 1500 }, { "epoch": 0.21412268188302425, "grad_norm": 0.974545419216156, "learning_rate": 9.955289118634556e-06, "loss": 0.8162, "step": 1501 }, { "epoch": 0.21426533523537802, "grad_norm": 0.7902306914329529, "learning_rate": 9.95518631159243e-06, "loss": 0.879, "step": 1502 }, { "epoch": 0.2144079885877318, "grad_norm": 0.6855301856994629, "learning_rate": 9.95508338702213e-06, "loss": 0.8494, "step": 1503 }, { "epoch": 0.21455064194008558, "grad_norm": 0.7092685699462891, "learning_rate": 9.9549803449261e-06, "loss": 0.8121, "step": 1504 }, { "epoch": 0.21469329529243938, "grad_norm": 0.7201098799705505, "learning_rate": 9.954877185306784e-06, "loss": 0.8713, "step": 1505 }, { "epoch": 0.21483594864479316, "grad_norm": 0.7282015681266785, "learning_rate": 9.954773908166631e-06, "loss": 0.785, "step": 1506 }, { "epoch": 0.21497860199714694, "grad_norm": 1.1726269721984863, "learning_rate": 9.954670513508089e-06, "loss": 0.8062, "step": 1507 }, { "epoch": 0.21512125534950072, "grad_norm": 0.9354681968688965, "learning_rate": 9.954567001333609e-06, "loss": 0.7863, "step": 1508 }, { "epoch": 0.2152639087018545, "grad_norm": 0.7831276059150696, "learning_rate": 9.954463371645646e-06, "loss": 0.8388, "step": 1509 }, { "epoch": 0.21540656205420827, "grad_norm": 0.7156667113304138, "learning_rate": 9.954359624446662e-06, "loss": 0.7901, "step": 1510 }, { "epoch": 0.21554921540656205, "grad_norm": 0.5895144939422607, "learning_rate": 9.954255759739113e-06, "loss": 0.834, "step": 1511 }, { "epoch": 0.21569186875891583, "grad_norm": 0.894581139087677, "learning_rate": 9.954151777525464e-06, "loss": 0.8422, "step": 1512 }, { "epoch": 0.2158345221112696, "grad_norm": 0.8653997778892517, "learning_rate": 9.95404767780818e-06, "loss": 0.7984, "step": 1513 }, { "epoch": 0.21597717546362338, "grad_norm": 0.8264660239219666, "learning_rate": 9.953943460589734e-06, "loss": 0.8289, "step": 1514 }, { "epoch": 0.2161198288159772, "grad_norm": 0.8366053104400635, "learning_rate": 9.953839125872595e-06, "loss": 0.8601, "step": 1515 }, { "epoch": 0.21626248216833097, "grad_norm": 0.8729448914527893, "learning_rate": 9.953734673659239e-06, "loss": 0.8096, "step": 1516 }, { "epoch": 0.21640513552068474, "grad_norm": 0.7833065986633301, "learning_rate": 9.953630103952142e-06, "loss": 0.7769, "step": 1517 }, { "epoch": 0.21654778887303852, "grad_norm": 0.9304572343826294, "learning_rate": 9.953525416753782e-06, "loss": 0.8985, "step": 1518 }, { "epoch": 0.2166904422253923, "grad_norm": 1.3142472505569458, "learning_rate": 9.953420612066646e-06, "loss": 0.8003, "step": 1519 }, { "epoch": 0.21683309557774608, "grad_norm": 0.8854172229766846, "learning_rate": 9.953315689893219e-06, "loss": 0.9071, "step": 1520 }, { "epoch": 0.21697574893009985, "grad_norm": 0.7279800176620483, "learning_rate": 9.95321065023599e-06, "loss": 0.8095, "step": 1521 }, { "epoch": 0.21711840228245363, "grad_norm": 1.1312992572784424, "learning_rate": 9.953105493097449e-06, "loss": 0.8317, "step": 1522 }, { "epoch": 0.2172610556348074, "grad_norm": 0.7317725419998169, "learning_rate": 9.953000218480088e-06, "loss": 0.8973, "step": 1523 }, { "epoch": 0.2174037089871612, "grad_norm": 0.9556543231010437, "learning_rate": 9.952894826386408e-06, "loss": 0.7908, "step": 1524 }, { "epoch": 0.21754636233951496, "grad_norm": 0.9414548873901367, "learning_rate": 9.952789316818907e-06, "loss": 0.8429, "step": 1525 }, { "epoch": 0.21768901569186877, "grad_norm": 0.7839816808700562, "learning_rate": 9.952683689780089e-06, "loss": 0.8439, "step": 1526 }, { "epoch": 0.21783166904422255, "grad_norm": 0.7676011919975281, "learning_rate": 9.952577945272458e-06, "loss": 0.8922, "step": 1527 }, { "epoch": 0.21797432239657633, "grad_norm": 0.738705575466156, "learning_rate": 9.95247208329852e-06, "loss": 0.8221, "step": 1528 }, { "epoch": 0.2181169757489301, "grad_norm": 0.76386958360672, "learning_rate": 9.952366103860788e-06, "loss": 0.8387, "step": 1529 }, { "epoch": 0.21825962910128388, "grad_norm": 0.8681210279464722, "learning_rate": 9.952260006961776e-06, "loss": 0.7862, "step": 1530 }, { "epoch": 0.21840228245363766, "grad_norm": 1.1066902875900269, "learning_rate": 9.952153792604e-06, "loss": 0.8484, "step": 1531 }, { "epoch": 0.21854493580599144, "grad_norm": 1.1809475421905518, "learning_rate": 9.95204746078998e-06, "loss": 0.8923, "step": 1532 }, { "epoch": 0.2186875891583452, "grad_norm": 0.951051652431488, "learning_rate": 9.951941011522234e-06, "loss": 0.7666, "step": 1533 }, { "epoch": 0.218830242510699, "grad_norm": 0.8252032399177551, "learning_rate": 9.951834444803294e-06, "loss": 0.7735, "step": 1534 }, { "epoch": 0.21897289586305277, "grad_norm": 3.1314423084259033, "learning_rate": 9.95172776063568e-06, "loss": 0.7101, "step": 1535 }, { "epoch": 0.21911554921540657, "grad_norm": 0.9142853617668152, "learning_rate": 9.951620959021929e-06, "loss": 0.8895, "step": 1536 }, { "epoch": 0.21925820256776035, "grad_norm": 0.841295063495636, "learning_rate": 9.951514039964569e-06, "loss": 0.7787, "step": 1537 }, { "epoch": 0.21940085592011413, "grad_norm": 0.7550581097602844, "learning_rate": 9.951407003466139e-06, "loss": 0.8533, "step": 1538 }, { "epoch": 0.2195435092724679, "grad_norm": 1.0160503387451172, "learning_rate": 9.951299849529175e-06, "loss": 0.8078, "step": 1539 }, { "epoch": 0.21968616262482168, "grad_norm": 1.1737887859344482, "learning_rate": 9.951192578156221e-06, "loss": 0.7924, "step": 1540 }, { "epoch": 0.21982881597717546, "grad_norm": 1.1103118658065796, "learning_rate": 9.95108518934982e-06, "loss": 0.8444, "step": 1541 }, { "epoch": 0.21997146932952924, "grad_norm": 0.9257894158363342, "learning_rate": 9.950977683112521e-06, "loss": 0.8423, "step": 1542 }, { "epoch": 0.22011412268188302, "grad_norm": 0.7128218412399292, "learning_rate": 9.950870059446872e-06, "loss": 0.8094, "step": 1543 }, { "epoch": 0.2202567760342368, "grad_norm": 0.754286527633667, "learning_rate": 9.950762318355425e-06, "loss": 0.8264, "step": 1544 }, { "epoch": 0.22039942938659057, "grad_norm": 0.8896986246109009, "learning_rate": 9.950654459840736e-06, "loss": 0.7568, "step": 1545 }, { "epoch": 0.22054208273894438, "grad_norm": 0.9886698126792908, "learning_rate": 9.950546483905366e-06, "loss": 0.8754, "step": 1546 }, { "epoch": 0.22068473609129816, "grad_norm": 0.8639637231826782, "learning_rate": 9.95043839055187e-06, "loss": 0.8645, "step": 1547 }, { "epoch": 0.22082738944365193, "grad_norm": 0.5904603004455566, "learning_rate": 9.950330179782818e-06, "loss": 0.9219, "step": 1548 }, { "epoch": 0.2209700427960057, "grad_norm": 0.6694537997245789, "learning_rate": 9.950221851600775e-06, "loss": 0.8739, "step": 1549 }, { "epoch": 0.2211126961483595, "grad_norm": 0.6146013140678406, "learning_rate": 9.950113406008306e-06, "loss": 0.9186, "step": 1550 }, { "epoch": 0.22125534950071327, "grad_norm": 0.778023898601532, "learning_rate": 9.95000484300799e-06, "loss": 0.8382, "step": 1551 }, { "epoch": 0.22139800285306704, "grad_norm": 0.7983495593070984, "learning_rate": 9.949896162602396e-06, "loss": 0.8371, "step": 1552 }, { "epoch": 0.22154065620542082, "grad_norm": 0.8418344259262085, "learning_rate": 9.949787364794104e-06, "loss": 0.81, "step": 1553 }, { "epoch": 0.2216833095577746, "grad_norm": 0.8970357775688171, "learning_rate": 9.949678449585695e-06, "loss": 0.8177, "step": 1554 }, { "epoch": 0.22182596291012838, "grad_norm": 0.8553009033203125, "learning_rate": 9.949569416979753e-06, "loss": 0.7695, "step": 1555 }, { "epoch": 0.22196861626248215, "grad_norm": 1.215833067893982, "learning_rate": 9.949460266978862e-06, "loss": 0.808, "step": 1556 }, { "epoch": 0.22211126961483596, "grad_norm": 0.6654186844825745, "learning_rate": 9.949350999585612e-06, "loss": 0.7478, "step": 1557 }, { "epoch": 0.22225392296718974, "grad_norm": 0.6524320244789124, "learning_rate": 9.949241614802593e-06, "loss": 0.8465, "step": 1558 }, { "epoch": 0.22239657631954352, "grad_norm": 0.9813492298126221, "learning_rate": 9.949132112632402e-06, "loss": 0.81, "step": 1559 }, { "epoch": 0.2225392296718973, "grad_norm": 0.8177356123924255, "learning_rate": 9.949022493077636e-06, "loss": 0.5872, "step": 1560 }, { "epoch": 0.22268188302425107, "grad_norm": 0.9326146245002747, "learning_rate": 9.948912756140891e-06, "loss": 0.8172, "step": 1561 }, { "epoch": 0.22282453637660485, "grad_norm": 0.7023347616195679, "learning_rate": 9.948802901824775e-06, "loss": 0.8085, "step": 1562 }, { "epoch": 0.22296718972895863, "grad_norm": 0.7878661155700684, "learning_rate": 9.948692930131892e-06, "loss": 0.9046, "step": 1563 }, { "epoch": 0.2231098430813124, "grad_norm": 0.7527204751968384, "learning_rate": 9.948582841064847e-06, "loss": 0.7797, "step": 1564 }, { "epoch": 0.22325249643366618, "grad_norm": 0.8323975205421448, "learning_rate": 9.948472634626256e-06, "loss": 0.8538, "step": 1565 }, { "epoch": 0.22339514978601996, "grad_norm": 0.8328871726989746, "learning_rate": 9.948362310818729e-06, "loss": 0.8325, "step": 1566 }, { "epoch": 0.22353780313837376, "grad_norm": 0.8960254788398743, "learning_rate": 9.948251869644885e-06, "loss": 0.7934, "step": 1567 }, { "epoch": 0.22368045649072754, "grad_norm": 0.619554340839386, "learning_rate": 9.948141311107344e-06, "loss": 0.8542, "step": 1568 }, { "epoch": 0.22382310984308132, "grad_norm": 0.8335708379745483, "learning_rate": 9.948030635208726e-06, "loss": 0.8629, "step": 1569 }, { "epoch": 0.2239657631954351, "grad_norm": 0.7417574524879456, "learning_rate": 9.947919841951656e-06, "loss": 0.7854, "step": 1570 }, { "epoch": 0.22410841654778887, "grad_norm": 0.7167844772338867, "learning_rate": 9.947808931338763e-06, "loss": 0.8039, "step": 1571 }, { "epoch": 0.22425106990014265, "grad_norm": 1.0231636762619019, "learning_rate": 9.947697903372678e-06, "loss": 0.8038, "step": 1572 }, { "epoch": 0.22439372325249643, "grad_norm": 0.8088961243629456, "learning_rate": 9.947586758056035e-06, "loss": 0.8621, "step": 1573 }, { "epoch": 0.2245363766048502, "grad_norm": 0.9731947183609009, "learning_rate": 9.947475495391467e-06, "loss": 0.8177, "step": 1574 }, { "epoch": 0.22467902995720399, "grad_norm": 0.7506547570228577, "learning_rate": 9.947364115381617e-06, "loss": 0.7207, "step": 1575 }, { "epoch": 0.22482168330955776, "grad_norm": 0.7788878679275513, "learning_rate": 9.947252618029124e-06, "loss": 0.857, "step": 1576 }, { "epoch": 0.22496433666191157, "grad_norm": 0.6956376433372498, "learning_rate": 9.947141003336633e-06, "loss": 0.8099, "step": 1577 }, { "epoch": 0.22510699001426535, "grad_norm": 0.593755841255188, "learning_rate": 9.947029271306792e-06, "loss": 0.8418, "step": 1578 }, { "epoch": 0.22524964336661912, "grad_norm": 0.7121747732162476, "learning_rate": 9.946917421942249e-06, "loss": 0.8181, "step": 1579 }, { "epoch": 0.2253922967189729, "grad_norm": 0.9468618035316467, "learning_rate": 9.94680545524566e-06, "loss": 0.8404, "step": 1580 }, { "epoch": 0.22553495007132668, "grad_norm": 0.9029089212417603, "learning_rate": 9.946693371219679e-06, "loss": 0.7842, "step": 1581 }, { "epoch": 0.22567760342368046, "grad_norm": 0.9820320010185242, "learning_rate": 9.946581169866964e-06, "loss": 0.8875, "step": 1582 }, { "epoch": 0.22582025677603423, "grad_norm": 0.8099811673164368, "learning_rate": 9.946468851190178e-06, "loss": 0.8181, "step": 1583 }, { "epoch": 0.225962910128388, "grad_norm": 0.773884654045105, "learning_rate": 9.946356415191983e-06, "loss": 0.8079, "step": 1584 }, { "epoch": 0.2261055634807418, "grad_norm": 0.8076888918876648, "learning_rate": 9.946243861875046e-06, "loss": 0.9412, "step": 1585 }, { "epoch": 0.22624821683309557, "grad_norm": 0.7969920635223389, "learning_rate": 9.946131191242038e-06, "loss": 0.8288, "step": 1586 }, { "epoch": 0.22639087018544934, "grad_norm": 0.7188319563865662, "learning_rate": 9.946018403295629e-06, "loss": 0.8562, "step": 1587 }, { "epoch": 0.22653352353780315, "grad_norm": 0.8297858238220215, "learning_rate": 9.945905498038498e-06, "loss": 0.7975, "step": 1588 }, { "epoch": 0.22667617689015693, "grad_norm": 1.1273237466812134, "learning_rate": 9.945792475473319e-06, "loss": 0.8782, "step": 1589 }, { "epoch": 0.2268188302425107, "grad_norm": 0.9453109502792358, "learning_rate": 9.945679335602775e-06, "loss": 0.8654, "step": 1590 }, { "epoch": 0.22696148359486448, "grad_norm": 0.8723772168159485, "learning_rate": 9.945566078429547e-06, "loss": 0.7964, "step": 1591 }, { "epoch": 0.22710413694721826, "grad_norm": 0.9568871855735779, "learning_rate": 9.945452703956325e-06, "loss": 0.8647, "step": 1592 }, { "epoch": 0.22724679029957204, "grad_norm": 0.8982163667678833, "learning_rate": 9.945339212185793e-06, "loss": 0.8167, "step": 1593 }, { "epoch": 0.22738944365192582, "grad_norm": 0.8818013072013855, "learning_rate": 9.94522560312065e-06, "loss": 0.8144, "step": 1594 }, { "epoch": 0.2275320970042796, "grad_norm": 0.9404963850975037, "learning_rate": 9.945111876763584e-06, "loss": 0.8257, "step": 1595 }, { "epoch": 0.22767475035663337, "grad_norm": 0.8494290113449097, "learning_rate": 9.944998033117294e-06, "loss": 0.8062, "step": 1596 }, { "epoch": 0.22781740370898715, "grad_norm": 0.8072165846824646, "learning_rate": 9.944884072184481e-06, "loss": 0.8291, "step": 1597 }, { "epoch": 0.22796005706134095, "grad_norm": 0.7694734334945679, "learning_rate": 9.944769993967851e-06, "loss": 0.7759, "step": 1598 }, { "epoch": 0.22810271041369473, "grad_norm": 0.719517171382904, "learning_rate": 9.944655798470104e-06, "loss": 0.8932, "step": 1599 }, { "epoch": 0.2282453637660485, "grad_norm": 0.9067398905754089, "learning_rate": 9.944541485693952e-06, "loss": 0.8331, "step": 1600 }, { "epoch": 0.2283880171184023, "grad_norm": 0.8119226098060608, "learning_rate": 9.944427055642106e-06, "loss": 0.7957, "step": 1601 }, { "epoch": 0.22853067047075606, "grad_norm": 0.6486713290214539, "learning_rate": 9.944312508317277e-06, "loss": 0.8689, "step": 1602 }, { "epoch": 0.22867332382310984, "grad_norm": 0.8576393127441406, "learning_rate": 9.944197843722186e-06, "loss": 0.7606, "step": 1603 }, { "epoch": 0.22881597717546362, "grad_norm": 0.8671183586120605, "learning_rate": 9.944083061859552e-06, "loss": 0.8407, "step": 1604 }, { "epoch": 0.2289586305278174, "grad_norm": 0.6724821329116821, "learning_rate": 9.943968162732096e-06, "loss": 0.8406, "step": 1605 }, { "epoch": 0.22910128388017117, "grad_norm": 0.8926640748977661, "learning_rate": 9.943853146342543e-06, "loss": 0.6138, "step": 1606 }, { "epoch": 0.22924393723252495, "grad_norm": 0.7364065051078796, "learning_rate": 9.94373801269362e-06, "loss": 0.7925, "step": 1607 }, { "epoch": 0.22938659058487876, "grad_norm": 1.13555109500885, "learning_rate": 9.943622761788062e-06, "loss": 0.8352, "step": 1608 }, { "epoch": 0.22952924393723254, "grad_norm": 0.7744009494781494, "learning_rate": 9.943507393628599e-06, "loss": 0.8385, "step": 1609 }, { "epoch": 0.2296718972895863, "grad_norm": 1.1767977476119995, "learning_rate": 9.94339190821797e-06, "loss": 0.9137, "step": 1610 }, { "epoch": 0.2298145506419401, "grad_norm": 0.6439663767814636, "learning_rate": 9.94327630555891e-06, "loss": 0.8694, "step": 1611 }, { "epoch": 0.22995720399429387, "grad_norm": 0.8223626613616943, "learning_rate": 9.943160585654164e-06, "loss": 0.8267, "step": 1612 }, { "epoch": 0.23009985734664765, "grad_norm": 1.0448628664016724, "learning_rate": 9.943044748506476e-06, "loss": 0.8596, "step": 1613 }, { "epoch": 0.23024251069900142, "grad_norm": 0.8901304006576538, "learning_rate": 9.942928794118593e-06, "loss": 0.8276, "step": 1614 }, { "epoch": 0.2303851640513552, "grad_norm": 1.0466147661209106, "learning_rate": 9.942812722493266e-06, "loss": 0.8693, "step": 1615 }, { "epoch": 0.23052781740370898, "grad_norm": 0.9215507507324219, "learning_rate": 9.942696533633248e-06, "loss": 0.8539, "step": 1616 }, { "epoch": 0.23067047075606276, "grad_norm": 0.9924200177192688, "learning_rate": 9.942580227541294e-06, "loss": 0.767, "step": 1617 }, { "epoch": 0.23081312410841653, "grad_norm": 0.6106994152069092, "learning_rate": 9.942463804220164e-06, "loss": 0.8978, "step": 1618 }, { "epoch": 0.23095577746077034, "grad_norm": 0.9730753302574158, "learning_rate": 9.942347263672617e-06, "loss": 0.849, "step": 1619 }, { "epoch": 0.23109843081312412, "grad_norm": 0.801425039768219, "learning_rate": 9.94223060590142e-06, "loss": 0.8151, "step": 1620 }, { "epoch": 0.2312410841654779, "grad_norm": 0.7202739119529724, "learning_rate": 9.942113830909336e-06, "loss": 0.5747, "step": 1621 }, { "epoch": 0.23138373751783167, "grad_norm": 1.0737241506576538, "learning_rate": 9.941996938699139e-06, "loss": 0.8335, "step": 1622 }, { "epoch": 0.23152639087018545, "grad_norm": 0.8841755390167236, "learning_rate": 9.941879929273598e-06, "loss": 0.8232, "step": 1623 }, { "epoch": 0.23166904422253923, "grad_norm": 0.9682257771492004, "learning_rate": 9.941762802635491e-06, "loss": 0.8213, "step": 1624 }, { "epoch": 0.231811697574893, "grad_norm": 0.8073592185974121, "learning_rate": 9.941645558787594e-06, "loss": 0.8897, "step": 1625 }, { "epoch": 0.23195435092724678, "grad_norm": 0.8076974153518677, "learning_rate": 9.94152819773269e-06, "loss": 0.8805, "step": 1626 }, { "epoch": 0.23209700427960056, "grad_norm": 0.8395135998725891, "learning_rate": 9.941410719473562e-06, "loss": 0.7626, "step": 1627 }, { "epoch": 0.23223965763195434, "grad_norm": 1.0759040117263794, "learning_rate": 9.941293124012993e-06, "loss": 0.8684, "step": 1628 }, { "epoch": 0.23238231098430814, "grad_norm": 0.6490672826766968, "learning_rate": 9.941175411353776e-06, "loss": 0.8351, "step": 1629 }, { "epoch": 0.23252496433666192, "grad_norm": 0.6349145770072937, "learning_rate": 9.9410575814987e-06, "loss": 0.802, "step": 1630 }, { "epoch": 0.2326676176890157, "grad_norm": 0.7153817415237427, "learning_rate": 9.940939634450564e-06, "loss": 0.8002, "step": 1631 }, { "epoch": 0.23281027104136948, "grad_norm": 0.7590317726135254, "learning_rate": 9.940821570212162e-06, "loss": 0.6527, "step": 1632 }, { "epoch": 0.23295292439372325, "grad_norm": 0.8898860812187195, "learning_rate": 9.940703388786296e-06, "loss": 0.8587, "step": 1633 }, { "epoch": 0.23309557774607703, "grad_norm": 0.8333155512809753, "learning_rate": 9.940585090175768e-06, "loss": 0.8109, "step": 1634 }, { "epoch": 0.2332382310984308, "grad_norm": 0.9909689426422119, "learning_rate": 9.940466674383383e-06, "loss": 0.8695, "step": 1635 }, { "epoch": 0.2333808844507846, "grad_norm": 0.8332314491271973, "learning_rate": 9.94034814141195e-06, "loss": 0.779, "step": 1636 }, { "epoch": 0.23352353780313836, "grad_norm": 0.7979727983474731, "learning_rate": 9.940229491264284e-06, "loss": 0.8014, "step": 1637 }, { "epoch": 0.23366619115549214, "grad_norm": 0.7950722575187683, "learning_rate": 9.940110723943196e-06, "loss": 0.8589, "step": 1638 }, { "epoch": 0.23380884450784595, "grad_norm": 0.6917372941970825, "learning_rate": 9.939991839451501e-06, "loss": 0.8161, "step": 1639 }, { "epoch": 0.23395149786019973, "grad_norm": 0.7580235600471497, "learning_rate": 9.939872837792022e-06, "loss": 0.7317, "step": 1640 }, { "epoch": 0.2340941512125535, "grad_norm": 0.8008021116256714, "learning_rate": 9.93975371896758e-06, "loss": 0.7995, "step": 1641 }, { "epoch": 0.23423680456490728, "grad_norm": 0.6910989284515381, "learning_rate": 9.939634482981e-06, "loss": 0.8452, "step": 1642 }, { "epoch": 0.23437945791726106, "grad_norm": 1.1207400560379028, "learning_rate": 9.939515129835112e-06, "loss": 0.9101, "step": 1643 }, { "epoch": 0.23452211126961484, "grad_norm": 0.8197427988052368, "learning_rate": 9.939395659532746e-06, "loss": 0.88, "step": 1644 }, { "epoch": 0.2346647646219686, "grad_norm": 0.7048259377479553, "learning_rate": 9.939276072076733e-06, "loss": 0.8642, "step": 1645 }, { "epoch": 0.2348074179743224, "grad_norm": 0.7667585015296936, "learning_rate": 9.939156367469914e-06, "loss": 0.882, "step": 1646 }, { "epoch": 0.23495007132667617, "grad_norm": 0.9176719188690186, "learning_rate": 9.939036545715126e-06, "loss": 0.7242, "step": 1647 }, { "epoch": 0.23509272467902995, "grad_norm": 0.8437342643737793, "learning_rate": 9.93891660681521e-06, "loss": 0.8303, "step": 1648 }, { "epoch": 0.23523537803138372, "grad_norm": 0.8201102614402771, "learning_rate": 9.93879655077301e-06, "loss": 0.8857, "step": 1649 }, { "epoch": 0.23537803138373753, "grad_norm": 0.7154242396354675, "learning_rate": 9.938676377591377e-06, "loss": 0.8175, "step": 1650 }, { "epoch": 0.2355206847360913, "grad_norm": 1.0373033285140991, "learning_rate": 9.938556087273158e-06, "loss": 0.8078, "step": 1651 }, { "epoch": 0.23566333808844508, "grad_norm": 1.3327196836471558, "learning_rate": 9.93843567982121e-06, "loss": 0.8027, "step": 1652 }, { "epoch": 0.23580599144079886, "grad_norm": 0.9266972541809082, "learning_rate": 9.938315155238385e-06, "loss": 0.7856, "step": 1653 }, { "epoch": 0.23594864479315264, "grad_norm": 0.7412891387939453, "learning_rate": 9.938194513527542e-06, "loss": 0.8634, "step": 1654 }, { "epoch": 0.23609129814550642, "grad_norm": 0.7532863020896912, "learning_rate": 9.938073754691543e-06, "loss": 0.8435, "step": 1655 }, { "epoch": 0.2362339514978602, "grad_norm": 0.7971359491348267, "learning_rate": 9.937952878733254e-06, "loss": 0.8465, "step": 1656 }, { "epoch": 0.23637660485021397, "grad_norm": 1.2778613567352295, "learning_rate": 9.93783188565554e-06, "loss": 0.6068, "step": 1657 }, { "epoch": 0.23651925820256775, "grad_norm": 0.9497107267379761, "learning_rate": 9.93771077546127e-06, "loss": 0.812, "step": 1658 }, { "epoch": 0.23666191155492153, "grad_norm": 1.138228178024292, "learning_rate": 9.937589548153319e-06, "loss": 0.8334, "step": 1659 }, { "epoch": 0.23680456490727533, "grad_norm": 0.8257390260696411, "learning_rate": 9.93746820373456e-06, "loss": 0.8706, "step": 1660 }, { "epoch": 0.2369472182596291, "grad_norm": 0.710342288017273, "learning_rate": 9.937346742207875e-06, "loss": 0.8297, "step": 1661 }, { "epoch": 0.2370898716119829, "grad_norm": 0.6259229779243469, "learning_rate": 9.937225163576138e-06, "loss": 0.8604, "step": 1662 }, { "epoch": 0.23723252496433667, "grad_norm": 0.8494049310684204, "learning_rate": 9.937103467842238e-06, "loss": 0.7666, "step": 1663 }, { "epoch": 0.23737517831669044, "grad_norm": 0.8034436702728271, "learning_rate": 9.93698165500906e-06, "loss": 0.8688, "step": 1664 }, { "epoch": 0.23751783166904422, "grad_norm": 0.8645340204238892, "learning_rate": 9.936859725079492e-06, "loss": 0.8275, "step": 1665 }, { "epoch": 0.237660485021398, "grad_norm": 0.828236997127533, "learning_rate": 9.936737678056429e-06, "loss": 0.8357, "step": 1666 }, { "epoch": 0.23780313837375178, "grad_norm": 0.7412002086639404, "learning_rate": 9.936615513942763e-06, "loss": 0.8065, "step": 1667 }, { "epoch": 0.23794579172610555, "grad_norm": 0.8817216753959656, "learning_rate": 9.936493232741394e-06, "loss": 0.8028, "step": 1668 }, { "epoch": 0.23808844507845933, "grad_norm": 1.2394442558288574, "learning_rate": 9.936370834455218e-06, "loss": 0.625, "step": 1669 }, { "epoch": 0.23823109843081314, "grad_norm": 0.8922938108444214, "learning_rate": 9.936248319087141e-06, "loss": 0.8644, "step": 1670 }, { "epoch": 0.23837375178316692, "grad_norm": 0.7142878174781799, "learning_rate": 9.93612568664007e-06, "loss": 0.8089, "step": 1671 }, { "epoch": 0.2385164051355207, "grad_norm": 0.9219820499420166, "learning_rate": 9.936002937116912e-06, "loss": 0.8657, "step": 1672 }, { "epoch": 0.23865905848787447, "grad_norm": 1.1039023399353027, "learning_rate": 9.935880070520578e-06, "loss": 0.7808, "step": 1673 }, { "epoch": 0.23880171184022825, "grad_norm": 0.8792162537574768, "learning_rate": 9.935757086853984e-06, "loss": 0.8895, "step": 1674 }, { "epoch": 0.23894436519258203, "grad_norm": 0.7179603576660156, "learning_rate": 9.935633986120044e-06, "loss": 0.8454, "step": 1675 }, { "epoch": 0.2390870185449358, "grad_norm": 0.7662439346313477, "learning_rate": 9.93551076832168e-06, "loss": 0.8214, "step": 1676 }, { "epoch": 0.23922967189728958, "grad_norm": 0.9854037761688232, "learning_rate": 9.935387433461814e-06, "loss": 0.8049, "step": 1677 }, { "epoch": 0.23937232524964336, "grad_norm": 0.7705009579658508, "learning_rate": 9.93526398154337e-06, "loss": 0.8203, "step": 1678 }, { "epoch": 0.23951497860199714, "grad_norm": 0.9047114253044128, "learning_rate": 9.93514041256928e-06, "loss": 0.8151, "step": 1679 }, { "epoch": 0.2396576319543509, "grad_norm": 0.9852571487426758, "learning_rate": 9.935016726542471e-06, "loss": 0.7873, "step": 1680 }, { "epoch": 0.23980028530670472, "grad_norm": 0.8604314923286438, "learning_rate": 9.934892923465875e-06, "loss": 0.7685, "step": 1681 }, { "epoch": 0.2399429386590585, "grad_norm": 1.0661243200302124, "learning_rate": 9.934769003342435e-06, "loss": 0.8301, "step": 1682 }, { "epoch": 0.24008559201141227, "grad_norm": 0.8258073925971985, "learning_rate": 9.934644966175084e-06, "loss": 0.8213, "step": 1683 }, { "epoch": 0.24022824536376605, "grad_norm": 1.0695422887802124, "learning_rate": 9.934520811966768e-06, "loss": 0.8046, "step": 1684 }, { "epoch": 0.24037089871611983, "grad_norm": 0.8595890402793884, "learning_rate": 9.934396540720428e-06, "loss": 0.8753, "step": 1685 }, { "epoch": 0.2405135520684736, "grad_norm": 0.8812291622161865, "learning_rate": 9.934272152439014e-06, "loss": 0.7167, "step": 1686 }, { "epoch": 0.24065620542082738, "grad_norm": 1.1692529916763306, "learning_rate": 9.934147647125476e-06, "loss": 0.8211, "step": 1687 }, { "epoch": 0.24079885877318116, "grad_norm": 0.8988098502159119, "learning_rate": 9.934023024782767e-06, "loss": 0.7913, "step": 1688 }, { "epoch": 0.24094151212553494, "grad_norm": 0.8145775198936462, "learning_rate": 9.933898285413843e-06, "loss": 0.8902, "step": 1689 }, { "epoch": 0.24108416547788872, "grad_norm": 0.7871213555335999, "learning_rate": 9.93377342902166e-06, "loss": 0.8396, "step": 1690 }, { "epoch": 0.24122681883024252, "grad_norm": 0.8794150948524475, "learning_rate": 9.933648455609184e-06, "loss": 0.8457, "step": 1691 }, { "epoch": 0.2413694721825963, "grad_norm": 0.8141103982925415, "learning_rate": 9.933523365179377e-06, "loss": 0.8583, "step": 1692 }, { "epoch": 0.24151212553495008, "grad_norm": 0.8373968005180359, "learning_rate": 9.933398157735204e-06, "loss": 0.7936, "step": 1693 }, { "epoch": 0.24165477888730386, "grad_norm": 0.7925145626068115, "learning_rate": 9.933272833279635e-06, "loss": 0.785, "step": 1694 }, { "epoch": 0.24179743223965763, "grad_norm": 0.830041229724884, "learning_rate": 9.933147391815646e-06, "loss": 0.8212, "step": 1695 }, { "epoch": 0.2419400855920114, "grad_norm": 0.9906054139137268, "learning_rate": 9.93302183334621e-06, "loss": 0.7736, "step": 1696 }, { "epoch": 0.2420827389443652, "grad_norm": 0.7971070408821106, "learning_rate": 9.932896157874304e-06, "loss": 0.8356, "step": 1697 }, { "epoch": 0.24222539229671897, "grad_norm": 0.9696570634841919, "learning_rate": 9.932770365402909e-06, "loss": 0.7921, "step": 1698 }, { "epoch": 0.24236804564907274, "grad_norm": 0.7731328010559082, "learning_rate": 9.93264445593501e-06, "loss": 0.8394, "step": 1699 }, { "epoch": 0.24251069900142652, "grad_norm": 0.7872111797332764, "learning_rate": 9.932518429473595e-06, "loss": 0.7934, "step": 1700 }, { "epoch": 0.24265335235378033, "grad_norm": 0.7657778263092041, "learning_rate": 9.932392286021649e-06, "loss": 0.8636, "step": 1701 }, { "epoch": 0.2427960057061341, "grad_norm": 0.8215643167495728, "learning_rate": 9.932266025582165e-06, "loss": 0.773, "step": 1702 }, { "epoch": 0.24293865905848788, "grad_norm": 0.9458875060081482, "learning_rate": 9.932139648158142e-06, "loss": 0.7793, "step": 1703 }, { "epoch": 0.24308131241084166, "grad_norm": 0.8223559856414795, "learning_rate": 9.93201315375257e-06, "loss": 0.7971, "step": 1704 }, { "epoch": 0.24322396576319544, "grad_norm": 1.04073965549469, "learning_rate": 9.931886542368454e-06, "loss": 0.773, "step": 1705 }, { "epoch": 0.24336661911554922, "grad_norm": 0.8274113535881042, "learning_rate": 9.931759814008796e-06, "loss": 0.7855, "step": 1706 }, { "epoch": 0.243509272467903, "grad_norm": 0.8987480401992798, "learning_rate": 9.931632968676603e-06, "loss": 0.8289, "step": 1707 }, { "epoch": 0.24365192582025677, "grad_norm": 0.7026833295822144, "learning_rate": 9.931506006374883e-06, "loss": 0.8352, "step": 1708 }, { "epoch": 0.24379457917261055, "grad_norm": 0.7738713622093201, "learning_rate": 9.931378927106645e-06, "loss": 0.7855, "step": 1709 }, { "epoch": 0.24393723252496433, "grad_norm": 0.9599910974502563, "learning_rate": 9.931251730874905e-06, "loss": 0.7995, "step": 1710 }, { "epoch": 0.2440798858773181, "grad_norm": 0.9438477754592896, "learning_rate": 9.931124417682681e-06, "loss": 0.8559, "step": 1711 }, { "epoch": 0.2442225392296719, "grad_norm": 0.789526104927063, "learning_rate": 9.93099698753299e-06, "loss": 0.7975, "step": 1712 }, { "epoch": 0.2443651925820257, "grad_norm": 0.7984293103218079, "learning_rate": 9.930869440428857e-06, "loss": 0.8323, "step": 1713 }, { "epoch": 0.24450784593437946, "grad_norm": 1.0544899702072144, "learning_rate": 9.930741776373305e-06, "loss": 0.8537, "step": 1714 }, { "epoch": 0.24465049928673324, "grad_norm": 0.9012507796287537, "learning_rate": 9.930613995369362e-06, "loss": 0.7284, "step": 1715 }, { "epoch": 0.24479315263908702, "grad_norm": 1.0507197380065918, "learning_rate": 9.93048609742006e-06, "loss": 0.5911, "step": 1716 }, { "epoch": 0.2449358059914408, "grad_norm": 0.7745269536972046, "learning_rate": 9.930358082528433e-06, "loss": 0.7643, "step": 1717 }, { "epoch": 0.24507845934379457, "grad_norm": 0.8316447734832764, "learning_rate": 9.930229950697518e-06, "loss": 0.8697, "step": 1718 }, { "epoch": 0.24522111269614835, "grad_norm": 0.8305345773696899, "learning_rate": 9.93010170193035e-06, "loss": 0.778, "step": 1719 }, { "epoch": 0.24536376604850213, "grad_norm": 0.9902516007423401, "learning_rate": 9.929973336229975e-06, "loss": 0.8369, "step": 1720 }, { "epoch": 0.2455064194008559, "grad_norm": 0.809398353099823, "learning_rate": 9.929844853599434e-06, "loss": 0.7423, "step": 1721 }, { "epoch": 0.2456490727532097, "grad_norm": 1.2137434482574463, "learning_rate": 9.929716254041779e-06, "loss": 0.8325, "step": 1722 }, { "epoch": 0.2457917261055635, "grad_norm": 0.9980698227882385, "learning_rate": 9.929587537560055e-06, "loss": 0.8153, "step": 1723 }, { "epoch": 0.24593437945791727, "grad_norm": 1.2068803310394287, "learning_rate": 9.929458704157319e-06, "loss": 0.7866, "step": 1724 }, { "epoch": 0.24607703281027105, "grad_norm": 0.8856320977210999, "learning_rate": 9.929329753836624e-06, "loss": 0.8647, "step": 1725 }, { "epoch": 0.24621968616262482, "grad_norm": 0.7726538777351379, "learning_rate": 9.92920068660103e-06, "loss": 0.835, "step": 1726 }, { "epoch": 0.2463623395149786, "grad_norm": 0.9689743518829346, "learning_rate": 9.929071502453599e-06, "loss": 0.847, "step": 1727 }, { "epoch": 0.24650499286733238, "grad_norm": 0.8871773481369019, "learning_rate": 9.928942201397391e-06, "loss": 0.796, "step": 1728 }, { "epoch": 0.24664764621968616, "grad_norm": 0.8159291744232178, "learning_rate": 9.928812783435477e-06, "loss": 0.8607, "step": 1729 }, { "epoch": 0.24679029957203993, "grad_norm": 0.8086974620819092, "learning_rate": 9.928683248570928e-06, "loss": 0.8934, "step": 1730 }, { "epoch": 0.2469329529243937, "grad_norm": 0.9735966324806213, "learning_rate": 9.928553596806809e-06, "loss": 0.848, "step": 1731 }, { "epoch": 0.24707560627674752, "grad_norm": 0.688496470451355, "learning_rate": 9.928423828146202e-06, "loss": 0.7779, "step": 1732 }, { "epoch": 0.2472182596291013, "grad_norm": 0.8273606896400452, "learning_rate": 9.928293942592182e-06, "loss": 0.8101, "step": 1733 }, { "epoch": 0.24736091298145507, "grad_norm": 0.8033018708229065, "learning_rate": 9.92816394014783e-06, "loss": 0.8352, "step": 1734 }, { "epoch": 0.24750356633380885, "grad_norm": 0.8431486487388611, "learning_rate": 9.928033820816231e-06, "loss": 0.7885, "step": 1735 }, { "epoch": 0.24764621968616263, "grad_norm": 1.2040809392929077, "learning_rate": 9.927903584600468e-06, "loss": 0.8404, "step": 1736 }, { "epoch": 0.2477888730385164, "grad_norm": 0.8894022703170776, "learning_rate": 9.927773231503633e-06, "loss": 0.7737, "step": 1737 }, { "epoch": 0.24793152639087018, "grad_norm": 0.703839123249054, "learning_rate": 9.927642761528815e-06, "loss": 0.8407, "step": 1738 }, { "epoch": 0.24807417974322396, "grad_norm": 0.8526538014411926, "learning_rate": 9.927512174679111e-06, "loss": 0.8415, "step": 1739 }, { "epoch": 0.24821683309557774, "grad_norm": 1.0738188028335571, "learning_rate": 9.927381470957617e-06, "loss": 0.8345, "step": 1740 }, { "epoch": 0.24835948644793152, "grad_norm": 1.0603893995285034, "learning_rate": 9.927250650367435e-06, "loss": 0.7458, "step": 1741 }, { "epoch": 0.2485021398002853, "grad_norm": 0.9537172913551331, "learning_rate": 9.927119712911664e-06, "loss": 0.7977, "step": 1742 }, { "epoch": 0.2486447931526391, "grad_norm": 1.0061439275741577, "learning_rate": 9.926988658593413e-06, "loss": 0.746, "step": 1743 }, { "epoch": 0.24878744650499288, "grad_norm": 0.6799601912498474, "learning_rate": 9.92685748741579e-06, "loss": 0.9128, "step": 1744 }, { "epoch": 0.24893009985734665, "grad_norm": 0.8178061246871948, "learning_rate": 9.926726199381902e-06, "loss": 0.7994, "step": 1745 }, { "epoch": 0.24907275320970043, "grad_norm": 0.756705641746521, "learning_rate": 9.926594794494867e-06, "loss": 0.9008, "step": 1746 }, { "epoch": 0.2492154065620542, "grad_norm": 0.9939935803413391, "learning_rate": 9.926463272757805e-06, "loss": 0.7959, "step": 1747 }, { "epoch": 0.249358059914408, "grad_norm": 1.0848501920700073, "learning_rate": 9.926331634173827e-06, "loss": 0.7923, "step": 1748 }, { "epoch": 0.24950071326676176, "grad_norm": 1.0696003437042236, "learning_rate": 9.926199878746061e-06, "loss": 0.8095, "step": 1749 }, { "epoch": 0.24964336661911554, "grad_norm": 0.775341272354126, "learning_rate": 9.92606800647763e-06, "loss": 0.8407, "step": 1750 }, { "epoch": 0.24978601997146932, "grad_norm": 0.8625017404556274, "learning_rate": 9.925936017371664e-06, "loss": 0.8632, "step": 1751 }, { "epoch": 0.2499286733238231, "grad_norm": 1.0839468240737915, "learning_rate": 9.925803911431292e-06, "loss": 0.7061, "step": 1752 }, { "epoch": 0.2500713266761769, "grad_norm": 0.9675854444503784, "learning_rate": 9.925671688659646e-06, "loss": 0.8347, "step": 1753 }, { "epoch": 0.2502139800285307, "grad_norm": 0.6596314907073975, "learning_rate": 9.925539349059864e-06, "loss": 0.7942, "step": 1754 }, { "epoch": 0.25035663338088443, "grad_norm": 0.8776452541351318, "learning_rate": 9.925406892635083e-06, "loss": 0.7596, "step": 1755 }, { "epoch": 0.25049928673323824, "grad_norm": 0.9428473114967346, "learning_rate": 9.925274319388447e-06, "loss": 0.7694, "step": 1756 }, { "epoch": 0.250641940085592, "grad_norm": 1.1374789476394653, "learning_rate": 9.9251416293231e-06, "loss": 0.7954, "step": 1757 }, { "epoch": 0.2507845934379458, "grad_norm": 1.1859643459320068, "learning_rate": 9.925008822442187e-06, "loss": 0.8431, "step": 1758 }, { "epoch": 0.2509272467902996, "grad_norm": 0.7116193771362305, "learning_rate": 9.924875898748861e-06, "loss": 0.8564, "step": 1759 }, { "epoch": 0.25106990014265335, "grad_norm": 0.8621867299079895, "learning_rate": 9.924742858246271e-06, "loss": 0.7682, "step": 1760 }, { "epoch": 0.25121255349500715, "grad_norm": 0.7891177535057068, "learning_rate": 9.924609700937575e-06, "loss": 0.8185, "step": 1761 }, { "epoch": 0.2513552068473609, "grad_norm": 0.8107175827026367, "learning_rate": 9.92447642682593e-06, "loss": 0.7584, "step": 1762 }, { "epoch": 0.2514978601997147, "grad_norm": 0.7248867154121399, "learning_rate": 9.9243430359145e-06, "loss": 0.8432, "step": 1763 }, { "epoch": 0.25164051355206846, "grad_norm": 0.9015504121780396, "learning_rate": 9.924209528206446e-06, "loss": 0.7818, "step": 1764 }, { "epoch": 0.25178316690442226, "grad_norm": 0.8606206774711609, "learning_rate": 9.924075903704935e-06, "loss": 0.821, "step": 1765 }, { "epoch": 0.251925820256776, "grad_norm": 0.7115620374679565, "learning_rate": 9.923942162413136e-06, "loss": 0.8123, "step": 1766 }, { "epoch": 0.2520684736091298, "grad_norm": 0.8173115849494934, "learning_rate": 9.923808304334222e-06, "loss": 0.8171, "step": 1767 }, { "epoch": 0.25221112696148357, "grad_norm": 0.7100493311882019, "learning_rate": 9.923674329471366e-06, "loss": 0.8178, "step": 1768 }, { "epoch": 0.2523537803138374, "grad_norm": 0.7646355032920837, "learning_rate": 9.92354023782775e-06, "loss": 0.815, "step": 1769 }, { "epoch": 0.2524964336661912, "grad_norm": 0.9536492228507996, "learning_rate": 9.92340602940655e-06, "loss": 0.8257, "step": 1770 }, { "epoch": 0.25263908701854493, "grad_norm": 0.9343258142471313, "learning_rate": 9.92327170421095e-06, "loss": 0.7676, "step": 1771 }, { "epoch": 0.25278174037089873, "grad_norm": 1.1464265584945679, "learning_rate": 9.923137262244138e-06, "loss": 0.8456, "step": 1772 }, { "epoch": 0.2529243937232525, "grad_norm": 0.8079214096069336, "learning_rate": 9.923002703509301e-06, "loss": 0.8753, "step": 1773 }, { "epoch": 0.2530670470756063, "grad_norm": 0.778343677520752, "learning_rate": 9.92286802800963e-06, "loss": 0.8405, "step": 1774 }, { "epoch": 0.25320970042796004, "grad_norm": 0.937049925327301, "learning_rate": 9.92273323574832e-06, "loss": 0.8797, "step": 1775 }, { "epoch": 0.25335235378031384, "grad_norm": 1.1116619110107422, "learning_rate": 9.92259832672857e-06, "loss": 0.7696, "step": 1776 }, { "epoch": 0.2534950071326676, "grad_norm": 0.7162361145019531, "learning_rate": 9.922463300953578e-06, "loss": 0.8254, "step": 1777 }, { "epoch": 0.2536376604850214, "grad_norm": 0.7013463973999023, "learning_rate": 9.922328158426545e-06, "loss": 0.7982, "step": 1778 }, { "epoch": 0.2537803138373752, "grad_norm": 1.1140834093093872, "learning_rate": 9.922192899150679e-06, "loss": 0.8328, "step": 1779 }, { "epoch": 0.25392296718972895, "grad_norm": 1.1142244338989258, "learning_rate": 9.922057523129187e-06, "loss": 0.8104, "step": 1780 }, { "epoch": 0.25406562054208276, "grad_norm": 0.9946165680885315, "learning_rate": 9.921922030365278e-06, "loss": 0.7862, "step": 1781 }, { "epoch": 0.2542082738944365, "grad_norm": 0.701593816280365, "learning_rate": 9.92178642086217e-06, "loss": 0.8262, "step": 1782 }, { "epoch": 0.2543509272467903, "grad_norm": 1.4632997512817383, "learning_rate": 9.921650694623077e-06, "loss": 0.6542, "step": 1783 }, { "epoch": 0.25449358059914406, "grad_norm": 0.7976811528205872, "learning_rate": 9.921514851651216e-06, "loss": 0.8723, "step": 1784 }, { "epoch": 0.25463623395149787, "grad_norm": 0.7965141534805298, "learning_rate": 9.921378891949813e-06, "loss": 0.8243, "step": 1785 }, { "epoch": 0.2547788873038516, "grad_norm": 1.0921205282211304, "learning_rate": 9.921242815522089e-06, "loss": 0.7583, "step": 1786 }, { "epoch": 0.2549215406562054, "grad_norm": 0.7497575879096985, "learning_rate": 9.921106622371275e-06, "loss": 0.8354, "step": 1787 }, { "epoch": 0.2550641940085592, "grad_norm": 1.1817249059677124, "learning_rate": 9.9209703125006e-06, "loss": 0.6963, "step": 1788 }, { "epoch": 0.255206847360913, "grad_norm": 0.8144768476486206, "learning_rate": 9.920833885913295e-06, "loss": 0.8296, "step": 1789 }, { "epoch": 0.2553495007132668, "grad_norm": 0.8028969764709473, "learning_rate": 9.920697342612598e-06, "loss": 0.7478, "step": 1790 }, { "epoch": 0.25549215406562054, "grad_norm": 0.7523711919784546, "learning_rate": 9.920560682601746e-06, "loss": 0.7942, "step": 1791 }, { "epoch": 0.25563480741797434, "grad_norm": 0.7429263591766357, "learning_rate": 9.920423905883982e-06, "loss": 0.8674, "step": 1792 }, { "epoch": 0.2557774607703281, "grad_norm": 0.8647870421409607, "learning_rate": 9.92028701246255e-06, "loss": 0.8352, "step": 1793 }, { "epoch": 0.2559201141226819, "grad_norm": 0.9710491895675659, "learning_rate": 9.920150002340696e-06, "loss": 0.8285, "step": 1794 }, { "epoch": 0.25606276747503565, "grad_norm": 0.8176320791244507, "learning_rate": 9.92001287552167e-06, "loss": 0.7461, "step": 1795 }, { "epoch": 0.25620542082738945, "grad_norm": 1.0631942749023438, "learning_rate": 9.919875632008724e-06, "loss": 0.7482, "step": 1796 }, { "epoch": 0.2563480741797432, "grad_norm": 1.1244279146194458, "learning_rate": 9.919738271805112e-06, "loss": 0.7229, "step": 1797 }, { "epoch": 0.256490727532097, "grad_norm": 0.9116760492324829, "learning_rate": 9.919600794914094e-06, "loss": 0.8452, "step": 1798 }, { "epoch": 0.25663338088445076, "grad_norm": 0.9942961931228638, "learning_rate": 9.919463201338933e-06, "loss": 0.7275, "step": 1799 }, { "epoch": 0.25677603423680456, "grad_norm": 0.9490411877632141, "learning_rate": 9.919325491082888e-06, "loss": 0.909, "step": 1800 }, { "epoch": 0.25691868758915837, "grad_norm": 0.9126991033554077, "learning_rate": 9.919187664149226e-06, "loss": 0.8144, "step": 1801 }, { "epoch": 0.2570613409415121, "grad_norm": 0.8840201497077942, "learning_rate": 9.919049720541216e-06, "loss": 0.832, "step": 1802 }, { "epoch": 0.2572039942938659, "grad_norm": 0.9308497905731201, "learning_rate": 9.91891166026213e-06, "loss": 0.8694, "step": 1803 }, { "epoch": 0.2573466476462197, "grad_norm": 0.8584593534469604, "learning_rate": 9.918773483315246e-06, "loss": 0.8431, "step": 1804 }, { "epoch": 0.2574893009985735, "grad_norm": 1.003719449043274, "learning_rate": 9.918635189703838e-06, "loss": 0.8484, "step": 1805 }, { "epoch": 0.25763195435092723, "grad_norm": 0.8581739068031311, "learning_rate": 9.918496779431186e-06, "loss": 0.7958, "step": 1806 }, { "epoch": 0.25777460770328103, "grad_norm": 1.4660345315933228, "learning_rate": 9.918358252500572e-06, "loss": 0.7827, "step": 1807 }, { "epoch": 0.2579172610556348, "grad_norm": 0.6751096248626709, "learning_rate": 9.918219608915285e-06, "loss": 0.7967, "step": 1808 }, { "epoch": 0.2580599144079886, "grad_norm": 0.9812304377555847, "learning_rate": 9.918080848678611e-06, "loss": 0.803, "step": 1809 }, { "epoch": 0.2582025677603424, "grad_norm": 0.9044556617736816, "learning_rate": 9.91794197179384e-06, "loss": 0.8133, "step": 1810 }, { "epoch": 0.25834522111269614, "grad_norm": 0.9294826984405518, "learning_rate": 9.917802978264269e-06, "loss": 0.7915, "step": 1811 }, { "epoch": 0.25848787446504995, "grad_norm": 0.8307023048400879, "learning_rate": 9.917663868093192e-06, "loss": 0.8041, "step": 1812 }, { "epoch": 0.2586305278174037, "grad_norm": 0.684723973274231, "learning_rate": 9.917524641283911e-06, "loss": 0.7893, "step": 1813 }, { "epoch": 0.2587731811697575, "grad_norm": 0.8535851240158081, "learning_rate": 9.917385297839725e-06, "loss": 0.7872, "step": 1814 }, { "epoch": 0.25891583452211125, "grad_norm": 1.284294605255127, "learning_rate": 9.917245837763942e-06, "loss": 0.658, "step": 1815 }, { "epoch": 0.25905848787446506, "grad_norm": 0.8843228816986084, "learning_rate": 9.917106261059867e-06, "loss": 0.8177, "step": 1816 }, { "epoch": 0.2592011412268188, "grad_norm": 0.8427625894546509, "learning_rate": 9.916966567730815e-06, "loss": 0.7868, "step": 1817 }, { "epoch": 0.2593437945791726, "grad_norm": 0.790692925453186, "learning_rate": 9.916826757780094e-06, "loss": 0.7719, "step": 1818 }, { "epoch": 0.25948644793152637, "grad_norm": 0.9854372143745422, "learning_rate": 9.916686831211022e-06, "loss": 0.8206, "step": 1819 }, { "epoch": 0.25962910128388017, "grad_norm": 0.8204353451728821, "learning_rate": 9.91654678802692e-06, "loss": 0.8109, "step": 1820 }, { "epoch": 0.259771754636234, "grad_norm": 0.9865830540657043, "learning_rate": 9.916406628231105e-06, "loss": 0.7548, "step": 1821 }, { "epoch": 0.2599144079885877, "grad_norm": 0.8347112536430359, "learning_rate": 9.916266351826907e-06, "loss": 0.8322, "step": 1822 }, { "epoch": 0.26005706134094153, "grad_norm": 0.9512296915054321, "learning_rate": 9.916125958817649e-06, "loss": 0.7825, "step": 1823 }, { "epoch": 0.2601997146932953, "grad_norm": 1.1798293590545654, "learning_rate": 9.91598544920666e-06, "loss": 0.8193, "step": 1824 }, { "epoch": 0.2603423680456491, "grad_norm": 0.9011582136154175, "learning_rate": 9.915844822997276e-06, "loss": 0.7892, "step": 1825 }, { "epoch": 0.26048502139800284, "grad_norm": 0.6729601621627808, "learning_rate": 9.915704080192832e-06, "loss": 0.8465, "step": 1826 }, { "epoch": 0.26062767475035664, "grad_norm": 0.8563498854637146, "learning_rate": 9.915563220796665e-06, "loss": 0.7662, "step": 1827 }, { "epoch": 0.2607703281027104, "grad_norm": 0.7469445466995239, "learning_rate": 9.915422244812115e-06, "loss": 0.8521, "step": 1828 }, { "epoch": 0.2609129814550642, "grad_norm": 0.9483319520950317, "learning_rate": 9.915281152242528e-06, "loss": 0.8422, "step": 1829 }, { "epoch": 0.26105563480741795, "grad_norm": 0.7566171288490295, "learning_rate": 9.915139943091248e-06, "loss": 0.8238, "step": 1830 }, { "epoch": 0.26119828815977175, "grad_norm": 0.6985406279563904, "learning_rate": 9.914998617361626e-06, "loss": 0.786, "step": 1831 }, { "epoch": 0.26134094151212556, "grad_norm": 0.8736724853515625, "learning_rate": 9.914857175057015e-06, "loss": 0.8642, "step": 1832 }, { "epoch": 0.2614835948644793, "grad_norm": 0.8644083142280579, "learning_rate": 9.914715616180768e-06, "loss": 0.7676, "step": 1833 }, { "epoch": 0.2616262482168331, "grad_norm": 0.7934670448303223, "learning_rate": 9.914573940736241e-06, "loss": 0.8021, "step": 1834 }, { "epoch": 0.26176890156918686, "grad_norm": 0.8259593844413757, "learning_rate": 9.914432148726799e-06, "loss": 0.8874, "step": 1835 }, { "epoch": 0.26191155492154067, "grad_norm": 0.8342422842979431, "learning_rate": 9.9142902401558e-06, "loss": 0.8138, "step": 1836 }, { "epoch": 0.2620542082738944, "grad_norm": 0.6923991441726685, "learning_rate": 9.914148215026612e-06, "loss": 0.8096, "step": 1837 }, { "epoch": 0.2621968616262482, "grad_norm": 0.7407599687576294, "learning_rate": 9.914006073342604e-06, "loss": 0.874, "step": 1838 }, { "epoch": 0.262339514978602, "grad_norm": 0.984772264957428, "learning_rate": 9.913863815107146e-06, "loss": 0.821, "step": 1839 }, { "epoch": 0.2624821683309558, "grad_norm": 1.0290112495422363, "learning_rate": 9.913721440323613e-06, "loss": 0.7882, "step": 1840 }, { "epoch": 0.2626248216833096, "grad_norm": 0.8359882235527039, "learning_rate": 9.913578948995385e-06, "loss": 0.8609, "step": 1841 }, { "epoch": 0.26276747503566333, "grad_norm": 1.0276292562484741, "learning_rate": 9.913436341125834e-06, "loss": 0.8341, "step": 1842 }, { "epoch": 0.26291012838801714, "grad_norm": 0.8084568381309509, "learning_rate": 9.91329361671835e-06, "loss": 0.7654, "step": 1843 }, { "epoch": 0.2630527817403709, "grad_norm": 0.9717106819152832, "learning_rate": 9.913150775776314e-06, "loss": 0.7615, "step": 1844 }, { "epoch": 0.2631954350927247, "grad_norm": 1.142538070678711, "learning_rate": 9.913007818303115e-06, "loss": 0.8357, "step": 1845 }, { "epoch": 0.26333808844507844, "grad_norm": 0.9166866540908813, "learning_rate": 9.912864744302145e-06, "loss": 0.7173, "step": 1846 }, { "epoch": 0.26348074179743225, "grad_norm": 1.0183794498443604, "learning_rate": 9.912721553776793e-06, "loss": 0.7675, "step": 1847 }, { "epoch": 0.263623395149786, "grad_norm": 0.7247642874717712, "learning_rate": 9.912578246730462e-06, "loss": 0.8502, "step": 1848 }, { "epoch": 0.2637660485021398, "grad_norm": 0.8331905603408813, "learning_rate": 9.912434823166545e-06, "loss": 0.7808, "step": 1849 }, { "epoch": 0.26390870185449355, "grad_norm": 0.9375711679458618, "learning_rate": 9.912291283088447e-06, "loss": 0.8247, "step": 1850 }, { "epoch": 0.26405135520684736, "grad_norm": 0.7520945072174072, "learning_rate": 9.912147626499573e-06, "loss": 0.8518, "step": 1851 }, { "epoch": 0.26419400855920117, "grad_norm": 0.8544954061508179, "learning_rate": 9.912003853403327e-06, "loss": 0.7918, "step": 1852 }, { "epoch": 0.2643366619115549, "grad_norm": 0.8004382848739624, "learning_rate": 9.911859963803122e-06, "loss": 0.9079, "step": 1853 }, { "epoch": 0.2644793152639087, "grad_norm": 0.9976580142974854, "learning_rate": 9.91171595770237e-06, "loss": 0.9102, "step": 1854 }, { "epoch": 0.26462196861626247, "grad_norm": 0.8790906667709351, "learning_rate": 9.911571835104485e-06, "loss": 0.8299, "step": 1855 }, { "epoch": 0.2647646219686163, "grad_norm": 0.6801099181175232, "learning_rate": 9.911427596012889e-06, "loss": 0.846, "step": 1856 }, { "epoch": 0.26490727532097, "grad_norm": 1.0817416906356812, "learning_rate": 9.911283240430998e-06, "loss": 0.775, "step": 1857 }, { "epoch": 0.26504992867332383, "grad_norm": 0.8968457579612732, "learning_rate": 9.911138768362241e-06, "loss": 0.7453, "step": 1858 }, { "epoch": 0.2651925820256776, "grad_norm": 0.7213981747627258, "learning_rate": 9.91099417981004e-06, "loss": 0.8386, "step": 1859 }, { "epoch": 0.2653352353780314, "grad_norm": 0.918195903301239, "learning_rate": 9.910849474777828e-06, "loss": 0.8895, "step": 1860 }, { "epoch": 0.26547788873038514, "grad_norm": 0.8049584627151489, "learning_rate": 9.910704653269037e-06, "loss": 0.8433, "step": 1861 }, { "epoch": 0.26562054208273894, "grad_norm": 0.7233972549438477, "learning_rate": 9.910559715287099e-06, "loss": 0.8634, "step": 1862 }, { "epoch": 0.26576319543509275, "grad_norm": 0.7243645191192627, "learning_rate": 9.910414660835453e-06, "loss": 0.8012, "step": 1863 }, { "epoch": 0.2659058487874465, "grad_norm": 0.7203574776649475, "learning_rate": 9.91026948991754e-06, "loss": 0.7966, "step": 1864 }, { "epoch": 0.2660485021398003, "grad_norm": 0.7382854223251343, "learning_rate": 9.910124202536803e-06, "loss": 0.8403, "step": 1865 }, { "epoch": 0.26619115549215405, "grad_norm": 1.0563346147537231, "learning_rate": 9.90997879869669e-06, "loss": 0.9433, "step": 1866 }, { "epoch": 0.26633380884450786, "grad_norm": 0.8045656681060791, "learning_rate": 9.909833278400648e-06, "loss": 0.8359, "step": 1867 }, { "epoch": 0.2664764621968616, "grad_norm": 0.7768740057945251, "learning_rate": 9.909687641652125e-06, "loss": 0.7659, "step": 1868 }, { "epoch": 0.2666191155492154, "grad_norm": 0.7277352213859558, "learning_rate": 9.909541888454579e-06, "loss": 0.8444, "step": 1869 }, { "epoch": 0.26676176890156916, "grad_norm": 0.7429260015487671, "learning_rate": 9.909396018811468e-06, "loss": 0.821, "step": 1870 }, { "epoch": 0.26690442225392297, "grad_norm": 0.8484064340591431, "learning_rate": 9.909250032726248e-06, "loss": 0.7954, "step": 1871 }, { "epoch": 0.2670470756062768, "grad_norm": 0.7717201113700867, "learning_rate": 9.909103930202387e-06, "loss": 0.7197, "step": 1872 }, { "epoch": 0.2671897289586305, "grad_norm": 0.7083487510681152, "learning_rate": 9.908957711243344e-06, "loss": 0.8331, "step": 1873 }, { "epoch": 0.26733238231098433, "grad_norm": 1.0699069499969482, "learning_rate": 9.90881137585259e-06, "loss": 0.7806, "step": 1874 }, { "epoch": 0.2674750356633381, "grad_norm": 0.7462451457977295, "learning_rate": 9.908664924033597e-06, "loss": 0.8331, "step": 1875 }, { "epoch": 0.2676176890156919, "grad_norm": 1.1656798124313354, "learning_rate": 9.908518355789838e-06, "loss": 0.8717, "step": 1876 }, { "epoch": 0.26776034236804563, "grad_norm": 0.9811933040618896, "learning_rate": 9.908371671124788e-06, "loss": 0.7728, "step": 1877 }, { "epoch": 0.26790299572039944, "grad_norm": 0.8236185908317566, "learning_rate": 9.908224870041927e-06, "loss": 0.7903, "step": 1878 }, { "epoch": 0.2680456490727532, "grad_norm": 0.9544426202774048, "learning_rate": 9.908077952544738e-06, "loss": 0.7954, "step": 1879 }, { "epoch": 0.268188302425107, "grad_norm": 0.7393714189529419, "learning_rate": 9.9079309186367e-06, "loss": 0.7989, "step": 1880 }, { "epoch": 0.26833095577746074, "grad_norm": 0.9020375609397888, "learning_rate": 9.907783768321308e-06, "loss": 0.8449, "step": 1881 }, { "epoch": 0.26847360912981455, "grad_norm": 0.8916321992874146, "learning_rate": 9.907636501602049e-06, "loss": 0.8259, "step": 1882 }, { "epoch": 0.26861626248216836, "grad_norm": 0.878184974193573, "learning_rate": 9.907489118482416e-06, "loss": 0.8694, "step": 1883 }, { "epoch": 0.2687589158345221, "grad_norm": 0.8452358841896057, "learning_rate": 9.907341618965904e-06, "loss": 0.8481, "step": 1884 }, { "epoch": 0.2689015691868759, "grad_norm": 0.7845661640167236, "learning_rate": 9.907194003056011e-06, "loss": 0.8263, "step": 1885 }, { "epoch": 0.26904422253922966, "grad_norm": 0.8402097821235657, "learning_rate": 9.90704627075624e-06, "loss": 0.8312, "step": 1886 }, { "epoch": 0.26918687589158347, "grad_norm": 0.8752978444099426, "learning_rate": 9.906898422070095e-06, "loss": 0.7898, "step": 1887 }, { "epoch": 0.2693295292439372, "grad_norm": 0.793667733669281, "learning_rate": 9.90675045700108e-06, "loss": 0.7785, "step": 1888 }, { "epoch": 0.269472182596291, "grad_norm": 0.8000465035438538, "learning_rate": 9.906602375552706e-06, "loss": 0.7866, "step": 1889 }, { "epoch": 0.26961483594864477, "grad_norm": 0.9185377955436707, "learning_rate": 9.906454177728486e-06, "loss": 0.8018, "step": 1890 }, { "epoch": 0.2697574893009986, "grad_norm": 0.8185838460922241, "learning_rate": 9.906305863531933e-06, "loss": 0.8672, "step": 1891 }, { "epoch": 0.2699001426533523, "grad_norm": 0.7808707356452942, "learning_rate": 9.906157432966569e-06, "loss": 0.7791, "step": 1892 }, { "epoch": 0.27004279600570613, "grad_norm": 0.9372024536132812, "learning_rate": 9.906008886035909e-06, "loss": 0.8064, "step": 1893 }, { "epoch": 0.27018544935805994, "grad_norm": 0.8064765930175781, "learning_rate": 9.90586022274348e-06, "loss": 0.8222, "step": 1894 }, { "epoch": 0.2703281027104137, "grad_norm": 0.7457480430603027, "learning_rate": 9.905711443092809e-06, "loss": 0.8121, "step": 1895 }, { "epoch": 0.2704707560627675, "grad_norm": 0.8527641296386719, "learning_rate": 9.90556254708742e-06, "loss": 0.7355, "step": 1896 }, { "epoch": 0.27061340941512124, "grad_norm": 1.4741696119308472, "learning_rate": 9.905413534730848e-06, "loss": 0.6868, "step": 1897 }, { "epoch": 0.27075606276747505, "grad_norm": 0.8204596638679504, "learning_rate": 9.905264406026627e-06, "loss": 0.7794, "step": 1898 }, { "epoch": 0.2708987161198288, "grad_norm": 0.9594339728355408, "learning_rate": 9.905115160978292e-06, "loss": 0.774, "step": 1899 }, { "epoch": 0.2710413694721826, "grad_norm": 1.2117671966552734, "learning_rate": 9.904965799589385e-06, "loss": 0.8822, "step": 1900 }, { "epoch": 0.27118402282453635, "grad_norm": 0.9087314605712891, "learning_rate": 9.904816321863449e-06, "loss": 0.7369, "step": 1901 }, { "epoch": 0.27132667617689016, "grad_norm": 0.9013188481330872, "learning_rate": 9.90466672780403e-06, "loss": 0.8629, "step": 1902 }, { "epoch": 0.27146932952924396, "grad_norm": 0.8729089498519897, "learning_rate": 9.904517017414672e-06, "loss": 0.7628, "step": 1903 }, { "epoch": 0.2716119828815977, "grad_norm": 1.22653329372406, "learning_rate": 9.904367190698929e-06, "loss": 0.8104, "step": 1904 }, { "epoch": 0.2717546362339515, "grad_norm": 0.7489973306655884, "learning_rate": 9.904217247660352e-06, "loss": 0.5759, "step": 1905 }, { "epoch": 0.27189728958630527, "grad_norm": 0.950798511505127, "learning_rate": 9.904067188302502e-06, "loss": 0.8215, "step": 1906 }, { "epoch": 0.2720399429386591, "grad_norm": 1.006636619567871, "learning_rate": 9.903917012628936e-06, "loss": 0.8021, "step": 1907 }, { "epoch": 0.2721825962910128, "grad_norm": 0.9519075751304626, "learning_rate": 9.903766720643213e-06, "loss": 0.805, "step": 1908 }, { "epoch": 0.27232524964336663, "grad_norm": 1.1060423851013184, "learning_rate": 9.903616312348903e-06, "loss": 0.7663, "step": 1909 }, { "epoch": 0.2724679029957204, "grad_norm": 0.922545850276947, "learning_rate": 9.903465787749567e-06, "loss": 0.7222, "step": 1910 }, { "epoch": 0.2726105563480742, "grad_norm": 0.8590786457061768, "learning_rate": 9.903315146848782e-06, "loss": 0.8197, "step": 1911 }, { "epoch": 0.27275320970042793, "grad_norm": 0.8543412089347839, "learning_rate": 9.903164389650115e-06, "loss": 0.7675, "step": 1912 }, { "epoch": 0.27289586305278174, "grad_norm": 0.8302007913589478, "learning_rate": 9.903013516157145e-06, "loss": 0.8347, "step": 1913 }, { "epoch": 0.27303851640513555, "grad_norm": 0.9529709815979004, "learning_rate": 9.902862526373452e-06, "loss": 0.7625, "step": 1914 }, { "epoch": 0.2731811697574893, "grad_norm": 0.6725876927375793, "learning_rate": 9.902711420302611e-06, "loss": 0.815, "step": 1915 }, { "epoch": 0.2733238231098431, "grad_norm": 0.9992535710334778, "learning_rate": 9.902560197948213e-06, "loss": 0.7819, "step": 1916 }, { "epoch": 0.27346647646219685, "grad_norm": 0.8791998624801636, "learning_rate": 9.90240885931384e-06, "loss": 0.786, "step": 1917 }, { "epoch": 0.27360912981455066, "grad_norm": 1.16233229637146, "learning_rate": 9.902257404403084e-06, "loss": 0.8775, "step": 1918 }, { "epoch": 0.2737517831669044, "grad_norm": 0.6956872940063477, "learning_rate": 9.902105833219536e-06, "loss": 0.8513, "step": 1919 }, { "epoch": 0.2738944365192582, "grad_norm": 1.01936936378479, "learning_rate": 9.901954145766791e-06, "loss": 0.7573, "step": 1920 }, { "epoch": 0.27403708987161196, "grad_norm": 0.8860127925872803, "learning_rate": 9.901802342048448e-06, "loss": 0.7638, "step": 1921 }, { "epoch": 0.27417974322396577, "grad_norm": 0.9512662887573242, "learning_rate": 9.901650422068105e-06, "loss": 0.6495, "step": 1922 }, { "epoch": 0.2743223965763195, "grad_norm": 0.6039310693740845, "learning_rate": 9.901498385829368e-06, "loss": 0.8469, "step": 1923 }, { "epoch": 0.2744650499286733, "grad_norm": 1.309650182723999, "learning_rate": 9.901346233335842e-06, "loss": 0.8151, "step": 1924 }, { "epoch": 0.2746077032810271, "grad_norm": 0.9727128744125366, "learning_rate": 9.901193964591136e-06, "loss": 0.7634, "step": 1925 }, { "epoch": 0.2747503566333809, "grad_norm": 0.9597587585449219, "learning_rate": 9.901041579598862e-06, "loss": 0.7804, "step": 1926 }, { "epoch": 0.2748930099857347, "grad_norm": 0.8195070028305054, "learning_rate": 9.900889078362632e-06, "loss": 0.856, "step": 1927 }, { "epoch": 0.27503566333808843, "grad_norm": 0.9469627141952515, "learning_rate": 9.900736460886065e-06, "loss": 0.8192, "step": 1928 }, { "epoch": 0.27517831669044224, "grad_norm": 0.9985857009887695, "learning_rate": 9.90058372717278e-06, "loss": 0.8751, "step": 1929 }, { "epoch": 0.275320970042796, "grad_norm": 0.9491239190101624, "learning_rate": 9.900430877226402e-06, "loss": 0.8496, "step": 1930 }, { "epoch": 0.2754636233951498, "grad_norm": 0.864153265953064, "learning_rate": 9.900277911050552e-06, "loss": 0.7982, "step": 1931 }, { "epoch": 0.27560627674750354, "grad_norm": 0.7806166410446167, "learning_rate": 9.90012482864886e-06, "loss": 0.731, "step": 1932 }, { "epoch": 0.27574893009985735, "grad_norm": 0.7389256954193115, "learning_rate": 9.899971630024959e-06, "loss": 0.8548, "step": 1933 }, { "epoch": 0.27589158345221115, "grad_norm": 0.8455584645271301, "learning_rate": 9.89981831518248e-06, "loss": 0.8258, "step": 1934 }, { "epoch": 0.2760342368045649, "grad_norm": 0.6914425492286682, "learning_rate": 9.89966488412506e-06, "loss": 0.7724, "step": 1935 }, { "epoch": 0.2761768901569187, "grad_norm": 0.7934995889663696, "learning_rate": 9.899511336856339e-06, "loss": 0.8076, "step": 1936 }, { "epoch": 0.27631954350927246, "grad_norm": 0.9315785765647888, "learning_rate": 9.899357673379958e-06, "loss": 0.8478, "step": 1937 }, { "epoch": 0.27646219686162626, "grad_norm": 1.0555291175842285, "learning_rate": 9.899203893699562e-06, "loss": 0.8919, "step": 1938 }, { "epoch": 0.27660485021398, "grad_norm": 0.7730499505996704, "learning_rate": 9.899049997818797e-06, "loss": 0.7527, "step": 1939 }, { "epoch": 0.2767475035663338, "grad_norm": 0.9190470576286316, "learning_rate": 9.898895985741315e-06, "loss": 0.9001, "step": 1940 }, { "epoch": 0.27689015691868757, "grad_norm": 0.8603836297988892, "learning_rate": 9.898741857470768e-06, "loss": 0.7746, "step": 1941 }, { "epoch": 0.2770328102710414, "grad_norm": 1.4602895975112915, "learning_rate": 9.898587613010813e-06, "loss": 0.7827, "step": 1942 }, { "epoch": 0.2771754636233951, "grad_norm": 0.5700140595436096, "learning_rate": 9.898433252365107e-06, "loss": 0.828, "step": 1943 }, { "epoch": 0.27731811697574893, "grad_norm": 0.8412007689476013, "learning_rate": 9.898278775537311e-06, "loss": 0.5979, "step": 1944 }, { "epoch": 0.27746077032810273, "grad_norm": 0.9965437054634094, "learning_rate": 9.898124182531089e-06, "loss": 0.8555, "step": 1945 }, { "epoch": 0.2776034236804565, "grad_norm": 0.7343485951423645, "learning_rate": 9.897969473350108e-06, "loss": 0.8199, "step": 1946 }, { "epoch": 0.2777460770328103, "grad_norm": 0.7718836069107056, "learning_rate": 9.897814647998037e-06, "loss": 0.8313, "step": 1947 }, { "epoch": 0.27788873038516404, "grad_norm": 0.9867026209831238, "learning_rate": 9.89765970647855e-06, "loss": 0.8149, "step": 1948 }, { "epoch": 0.27803138373751785, "grad_norm": 0.8841606974601746, "learning_rate": 9.89750464879532e-06, "loss": 0.8281, "step": 1949 }, { "epoch": 0.2781740370898716, "grad_norm": 1.020338535308838, "learning_rate": 9.897349474952024e-06, "loss": 0.7385, "step": 1950 }, { "epoch": 0.2783166904422254, "grad_norm": 1.148627758026123, "learning_rate": 9.897194184952345e-06, "loss": 0.7497, "step": 1951 }, { "epoch": 0.27845934379457915, "grad_norm": 0.8143110275268555, "learning_rate": 9.897038778799965e-06, "loss": 0.829, "step": 1952 }, { "epoch": 0.27860199714693296, "grad_norm": 0.8986217975616455, "learning_rate": 9.896883256498568e-06, "loss": 0.8975, "step": 1953 }, { "epoch": 0.2787446504992867, "grad_norm": 0.9541431069374084, "learning_rate": 9.896727618051847e-06, "loss": 0.9014, "step": 1954 }, { "epoch": 0.2788873038516405, "grad_norm": 1.1523237228393555, "learning_rate": 9.89657186346349e-06, "loss": 0.8349, "step": 1955 }, { "epoch": 0.2790299572039943, "grad_norm": 0.7698736190795898, "learning_rate": 9.896415992737192e-06, "loss": 0.8688, "step": 1956 }, { "epoch": 0.27917261055634807, "grad_norm": 0.8719567060470581, "learning_rate": 9.89626000587665e-06, "loss": 0.8121, "step": 1957 }, { "epoch": 0.27931526390870187, "grad_norm": 0.6933151483535767, "learning_rate": 9.896103902885566e-06, "loss": 0.7388, "step": 1958 }, { "epoch": 0.2794579172610556, "grad_norm": 1.1415010690689087, "learning_rate": 9.895947683767638e-06, "loss": 0.8419, "step": 1959 }, { "epoch": 0.2796005706134094, "grad_norm": 0.9674553871154785, "learning_rate": 9.895791348526573e-06, "loss": 0.7627, "step": 1960 }, { "epoch": 0.2797432239657632, "grad_norm": 1.1427204608917236, "learning_rate": 9.895634897166082e-06, "loss": 0.7922, "step": 1961 }, { "epoch": 0.279885877318117, "grad_norm": 0.7016680240631104, "learning_rate": 9.895478329689872e-06, "loss": 0.8672, "step": 1962 }, { "epoch": 0.28002853067047073, "grad_norm": 0.9272727966308594, "learning_rate": 9.895321646101659e-06, "loss": 0.8336, "step": 1963 }, { "epoch": 0.28017118402282454, "grad_norm": 0.9848592281341553, "learning_rate": 9.895164846405157e-06, "loss": 0.7641, "step": 1964 }, { "epoch": 0.28031383737517834, "grad_norm": 0.9903753399848938, "learning_rate": 9.895007930604088e-06, "loss": 0.824, "step": 1965 }, { "epoch": 0.2804564907275321, "grad_norm": 0.7130590081214905, "learning_rate": 9.89485089870217e-06, "loss": 0.7927, "step": 1966 }, { "epoch": 0.2805991440798859, "grad_norm": 0.8927415609359741, "learning_rate": 9.894693750703129e-06, "loss": 0.8597, "step": 1967 }, { "epoch": 0.28074179743223965, "grad_norm": 2.3921425342559814, "learning_rate": 9.894536486610694e-06, "loss": 0.8199, "step": 1968 }, { "epoch": 0.28088445078459345, "grad_norm": 0.7472104430198669, "learning_rate": 9.894379106428593e-06, "loss": 0.8225, "step": 1969 }, { "epoch": 0.2810271041369472, "grad_norm": 0.6429457664489746, "learning_rate": 9.89422161016056e-06, "loss": 0.8998, "step": 1970 }, { "epoch": 0.281169757489301, "grad_norm": 0.8767111897468567, "learning_rate": 9.89406399781033e-06, "loss": 0.8298, "step": 1971 }, { "epoch": 0.28131241084165476, "grad_norm": 0.7979419231414795, "learning_rate": 9.893906269381641e-06, "loss": 0.8413, "step": 1972 }, { "epoch": 0.28145506419400856, "grad_norm": 1.0332896709442139, "learning_rate": 9.893748424878236e-06, "loss": 0.7598, "step": 1973 }, { "epoch": 0.2815977175463623, "grad_norm": 0.7872117757797241, "learning_rate": 9.893590464303855e-06, "loss": 0.8619, "step": 1974 }, { "epoch": 0.2817403708987161, "grad_norm": 0.7206314206123352, "learning_rate": 9.893432387662247e-06, "loss": 0.7714, "step": 1975 }, { "epoch": 0.2818830242510699, "grad_norm": 0.8268548250198364, "learning_rate": 9.893274194957163e-06, "loss": 0.7982, "step": 1976 }, { "epoch": 0.2820256776034237, "grad_norm": 0.8085371851921082, "learning_rate": 9.89311588619235e-06, "loss": 0.7757, "step": 1977 }, { "epoch": 0.2821683309557775, "grad_norm": 0.7373887896537781, "learning_rate": 9.892957461371568e-06, "loss": 0.8767, "step": 1978 }, { "epoch": 0.28231098430813123, "grad_norm": 0.8956614136695862, "learning_rate": 9.892798920498572e-06, "loss": 0.8047, "step": 1979 }, { "epoch": 0.28245363766048504, "grad_norm": 1.2321044206619263, "learning_rate": 9.892640263577123e-06, "loss": 0.8781, "step": 1980 }, { "epoch": 0.2825962910128388, "grad_norm": 0.9143946766853333, "learning_rate": 9.892481490610984e-06, "loss": 0.886, "step": 1981 }, { "epoch": 0.2827389443651926, "grad_norm": 0.7610822916030884, "learning_rate": 9.892322601603918e-06, "loss": 0.8833, "step": 1982 }, { "epoch": 0.28288159771754634, "grad_norm": 1.0275628566741943, "learning_rate": 9.892163596559698e-06, "loss": 0.7262, "step": 1983 }, { "epoch": 0.28302425106990015, "grad_norm": 0.9696669578552246, "learning_rate": 9.892004475482094e-06, "loss": 0.8268, "step": 1984 }, { "epoch": 0.2831669044222539, "grad_norm": 0.9769879579544067, "learning_rate": 9.89184523837488e-06, "loss": 0.7619, "step": 1985 }, { "epoch": 0.2833095577746077, "grad_norm": 1.043142318725586, "learning_rate": 9.89168588524183e-06, "loss": 0.7586, "step": 1986 }, { "epoch": 0.2834522111269615, "grad_norm": 0.8820614218711853, "learning_rate": 9.891526416086727e-06, "loss": 0.8276, "step": 1987 }, { "epoch": 0.28359486447931526, "grad_norm": 0.8670064806938171, "learning_rate": 9.891366830913353e-06, "loss": 0.7495, "step": 1988 }, { "epoch": 0.28373751783166906, "grad_norm": 0.752273678779602, "learning_rate": 9.891207129725493e-06, "loss": 0.8117, "step": 1989 }, { "epoch": 0.2838801711840228, "grad_norm": 1.0996286869049072, "learning_rate": 9.891047312526932e-06, "loss": 0.7631, "step": 1990 }, { "epoch": 0.2840228245363766, "grad_norm": 0.7424052953720093, "learning_rate": 9.890887379321464e-06, "loss": 0.7772, "step": 1991 }, { "epoch": 0.28416547788873037, "grad_norm": 0.7052674293518066, "learning_rate": 9.89072733011288e-06, "loss": 0.8142, "step": 1992 }, { "epoch": 0.28430813124108417, "grad_norm": 1.076757550239563, "learning_rate": 9.890567164904978e-06, "loss": 0.8167, "step": 1993 }, { "epoch": 0.2844507845934379, "grad_norm": 0.8586769104003906, "learning_rate": 9.890406883701555e-06, "loss": 0.8052, "step": 1994 }, { "epoch": 0.2845934379457917, "grad_norm": 0.8514905571937561, "learning_rate": 9.890246486506416e-06, "loss": 0.7627, "step": 1995 }, { "epoch": 0.28473609129814553, "grad_norm": 0.7438399791717529, "learning_rate": 9.89008597332336e-06, "loss": 0.8691, "step": 1996 }, { "epoch": 0.2848787446504993, "grad_norm": 1.0306975841522217, "learning_rate": 9.889925344156198e-06, "loss": 0.8418, "step": 1997 }, { "epoch": 0.2850213980028531, "grad_norm": 0.9475598335266113, "learning_rate": 9.88976459900874e-06, "loss": 0.8198, "step": 1998 }, { "epoch": 0.28516405135520684, "grad_norm": 0.9006056785583496, "learning_rate": 9.889603737884796e-06, "loss": 0.5998, "step": 1999 }, { "epoch": 0.28530670470756064, "grad_norm": 1.1654584407806396, "learning_rate": 9.889442760788183e-06, "loss": 0.764, "step": 2000 }, { "epoch": 0.2854493580599144, "grad_norm": 0.7571951150894165, "learning_rate": 9.889281667722718e-06, "loss": 0.7987, "step": 2001 }, { "epoch": 0.2855920114122682, "grad_norm": 0.707734227180481, "learning_rate": 9.889120458692223e-06, "loss": 0.8486, "step": 2002 }, { "epoch": 0.28573466476462195, "grad_norm": 0.9231438040733337, "learning_rate": 9.888959133700523e-06, "loss": 0.8625, "step": 2003 }, { "epoch": 0.28587731811697575, "grad_norm": 0.84060138463974, "learning_rate": 9.888797692751442e-06, "loss": 0.7805, "step": 2004 }, { "epoch": 0.2860199714693295, "grad_norm": 0.8617420196533203, "learning_rate": 9.888636135848808e-06, "loss": 0.7294, "step": 2005 }, { "epoch": 0.2861626248216833, "grad_norm": 0.6491578221321106, "learning_rate": 9.888474462996457e-06, "loss": 0.7926, "step": 2006 }, { "epoch": 0.2863052781740371, "grad_norm": 0.7339638471603394, "learning_rate": 9.888312674198218e-06, "loss": 0.7536, "step": 2007 }, { "epoch": 0.28644793152639086, "grad_norm": 0.7412121295928955, "learning_rate": 9.888150769457934e-06, "loss": 0.8336, "step": 2008 }, { "epoch": 0.28659058487874467, "grad_norm": 0.9829639196395874, "learning_rate": 9.887988748779441e-06, "loss": 0.7488, "step": 2009 }, { "epoch": 0.2867332382310984, "grad_norm": 0.7689664363861084, "learning_rate": 9.887826612166584e-06, "loss": 0.7866, "step": 2010 }, { "epoch": 0.2868758915834522, "grad_norm": 0.8491288423538208, "learning_rate": 9.887664359623208e-06, "loss": 0.8923, "step": 2011 }, { "epoch": 0.287018544935806, "grad_norm": 0.9316936731338501, "learning_rate": 9.887501991153161e-06, "loss": 0.7523, "step": 2012 }, { "epoch": 0.2871611982881598, "grad_norm": 0.9495995044708252, "learning_rate": 9.887339506760294e-06, "loss": 0.821, "step": 2013 }, { "epoch": 0.28730385164051353, "grad_norm": 0.8646326065063477, "learning_rate": 9.887176906448463e-06, "loss": 0.9199, "step": 2014 }, { "epoch": 0.28744650499286734, "grad_norm": 1.0679668188095093, "learning_rate": 9.887014190221522e-06, "loss": 0.889, "step": 2015 }, { "epoch": 0.2875891583452211, "grad_norm": 1.015106439590454, "learning_rate": 9.886851358083332e-06, "loss": 0.8016, "step": 2016 }, { "epoch": 0.2877318116975749, "grad_norm": 0.628006637096405, "learning_rate": 9.886688410037752e-06, "loss": 0.8931, "step": 2017 }, { "epoch": 0.2878744650499287, "grad_norm": 0.9774754643440247, "learning_rate": 9.88652534608865e-06, "loss": 0.8701, "step": 2018 }, { "epoch": 0.28801711840228245, "grad_norm": 0.8394613862037659, "learning_rate": 9.886362166239894e-06, "loss": 0.8705, "step": 2019 }, { "epoch": 0.28815977175463625, "grad_norm": 0.8570374846458435, "learning_rate": 9.886198870495351e-06, "loss": 0.8012, "step": 2020 }, { "epoch": 0.28830242510699, "grad_norm": 0.9115515351295471, "learning_rate": 9.886035458858899e-06, "loss": 0.7363, "step": 2021 }, { "epoch": 0.2884450784593438, "grad_norm": 0.9202782511711121, "learning_rate": 9.885871931334408e-06, "loss": 0.8111, "step": 2022 }, { "epoch": 0.28858773181169756, "grad_norm": 1.2247858047485352, "learning_rate": 9.88570828792576e-06, "loss": 0.8151, "step": 2023 }, { "epoch": 0.28873038516405136, "grad_norm": 0.7569736242294312, "learning_rate": 9.885544528636836e-06, "loss": 0.932, "step": 2024 }, { "epoch": 0.2888730385164051, "grad_norm": 0.7694206237792969, "learning_rate": 9.885380653471521e-06, "loss": 0.6508, "step": 2025 }, { "epoch": 0.2890156918687589, "grad_norm": 0.9679473638534546, "learning_rate": 9.8852166624337e-06, "loss": 0.8412, "step": 2026 }, { "epoch": 0.2891583452211127, "grad_norm": 0.6804773211479187, "learning_rate": 9.885052555527263e-06, "loss": 0.7427, "step": 2027 }, { "epoch": 0.2893009985734665, "grad_norm": 0.8567250967025757, "learning_rate": 9.884888332756104e-06, "loss": 0.8438, "step": 2028 }, { "epoch": 0.2894436519258203, "grad_norm": 0.7586188912391663, "learning_rate": 9.884723994124117e-06, "loss": 0.8462, "step": 2029 }, { "epoch": 0.289586305278174, "grad_norm": 1.1931333541870117, "learning_rate": 9.884559539635197e-06, "loss": 0.8473, "step": 2030 }, { "epoch": 0.28972895863052783, "grad_norm": 0.8863710165023804, "learning_rate": 9.88439496929325e-06, "loss": 0.7864, "step": 2031 }, { "epoch": 0.2898716119828816, "grad_norm": 0.7390526533126831, "learning_rate": 9.884230283102173e-06, "loss": 0.8177, "step": 2032 }, { "epoch": 0.2900142653352354, "grad_norm": 0.8629305958747864, "learning_rate": 9.884065481065878e-06, "loss": 0.811, "step": 2033 }, { "epoch": 0.29015691868758914, "grad_norm": 0.7086455821990967, "learning_rate": 9.88390056318827e-06, "loss": 0.7686, "step": 2034 }, { "epoch": 0.29029957203994294, "grad_norm": 0.7655584216117859, "learning_rate": 9.883735529473262e-06, "loss": 0.8167, "step": 2035 }, { "epoch": 0.2904422253922967, "grad_norm": 0.7016885280609131, "learning_rate": 9.88357037992477e-06, "loss": 0.8214, "step": 2036 }, { "epoch": 0.2905848787446505, "grad_norm": 1.3929604291915894, "learning_rate": 9.883405114546706e-06, "loss": 0.7529, "step": 2037 }, { "epoch": 0.2907275320970043, "grad_norm": 0.8756687641143799, "learning_rate": 9.883239733342995e-06, "loss": 0.7332, "step": 2038 }, { "epoch": 0.29087018544935805, "grad_norm": 0.6817427277565002, "learning_rate": 9.883074236317558e-06, "loss": 0.6934, "step": 2039 }, { "epoch": 0.29101283880171186, "grad_norm": 0.7502633333206177, "learning_rate": 9.882908623474318e-06, "loss": 0.834, "step": 2040 }, { "epoch": 0.2911554921540656, "grad_norm": 0.8368335962295532, "learning_rate": 9.882742894817207e-06, "loss": 0.8301, "step": 2041 }, { "epoch": 0.2912981455064194, "grad_norm": 0.8467952609062195, "learning_rate": 9.882577050350152e-06, "loss": 0.8217, "step": 2042 }, { "epoch": 0.29144079885877316, "grad_norm": 0.9308727979660034, "learning_rate": 9.88241109007709e-06, "loss": 0.8206, "step": 2043 }, { "epoch": 0.29158345221112697, "grad_norm": 1.0542320013046265, "learning_rate": 9.882245014001954e-06, "loss": 0.8099, "step": 2044 }, { "epoch": 0.2917261055634807, "grad_norm": 0.8059763312339783, "learning_rate": 9.882078822128687e-06, "loss": 0.8226, "step": 2045 }, { "epoch": 0.2918687589158345, "grad_norm": 0.7922718524932861, "learning_rate": 9.881912514461224e-06, "loss": 0.8056, "step": 2046 }, { "epoch": 0.2920114122681883, "grad_norm": 0.7705415487289429, "learning_rate": 9.881746091003517e-06, "loss": 0.9385, "step": 2047 }, { "epoch": 0.2921540656205421, "grad_norm": 0.7232141494750977, "learning_rate": 9.88157955175951e-06, "loss": 0.7884, "step": 2048 }, { "epoch": 0.2922967189728959, "grad_norm": 0.9239014983177185, "learning_rate": 9.881412896733152e-06, "loss": 0.7982, "step": 2049 }, { "epoch": 0.29243937232524964, "grad_norm": 0.6806542277336121, "learning_rate": 9.881246125928397e-06, "loss": 0.7991, "step": 2050 }, { "epoch": 0.29258202567760344, "grad_norm": 0.918830156326294, "learning_rate": 9.8810792393492e-06, "loss": 0.7707, "step": 2051 }, { "epoch": 0.2927246790299572, "grad_norm": 0.7483209371566772, "learning_rate": 9.88091223699952e-06, "loss": 0.7601, "step": 2052 }, { "epoch": 0.292867332382311, "grad_norm": 1.057376742362976, "learning_rate": 9.880745118883319e-06, "loss": 0.8039, "step": 2053 }, { "epoch": 0.29300998573466475, "grad_norm": 0.7470461130142212, "learning_rate": 9.880577885004557e-06, "loss": 0.6418, "step": 2054 }, { "epoch": 0.29315263908701855, "grad_norm": 0.7077125310897827, "learning_rate": 9.880410535367205e-06, "loss": 0.7487, "step": 2055 }, { "epoch": 0.2932952924393723, "grad_norm": 0.7707588076591492, "learning_rate": 9.880243069975228e-06, "loss": 0.7815, "step": 2056 }, { "epoch": 0.2934379457917261, "grad_norm": 0.9563851356506348, "learning_rate": 9.880075488832601e-06, "loss": 0.8199, "step": 2057 }, { "epoch": 0.2935805991440799, "grad_norm": 0.7423125505447388, "learning_rate": 9.879907791943297e-06, "loss": 0.8316, "step": 2058 }, { "epoch": 0.29372325249643366, "grad_norm": 0.6795817613601685, "learning_rate": 9.879739979311296e-06, "loss": 0.8132, "step": 2059 }, { "epoch": 0.29386590584878747, "grad_norm": 0.6662577986717224, "learning_rate": 9.879572050940576e-06, "loss": 0.849, "step": 2060 }, { "epoch": 0.2940085592011412, "grad_norm": 0.789661169052124, "learning_rate": 9.879404006835119e-06, "loss": 0.8125, "step": 2061 }, { "epoch": 0.294151212553495, "grad_norm": 0.59214186668396, "learning_rate": 9.879235846998913e-06, "loss": 0.8108, "step": 2062 }, { "epoch": 0.2942938659058488, "grad_norm": 0.7700336575508118, "learning_rate": 9.879067571435946e-06, "loss": 0.7625, "step": 2063 }, { "epoch": 0.2944365192582026, "grad_norm": 0.5752390623092651, "learning_rate": 9.878899180150208e-06, "loss": 0.6208, "step": 2064 }, { "epoch": 0.29457917261055633, "grad_norm": 0.6733284592628479, "learning_rate": 9.878730673145695e-06, "loss": 0.7889, "step": 2065 }, { "epoch": 0.29472182596291013, "grad_norm": 0.8462895750999451, "learning_rate": 9.8785620504264e-06, "loss": 0.8705, "step": 2066 }, { "epoch": 0.2948644793152639, "grad_norm": 0.6242477297782898, "learning_rate": 9.878393311996329e-06, "loss": 0.7492, "step": 2067 }, { "epoch": 0.2950071326676177, "grad_norm": 0.8804550170898438, "learning_rate": 9.878224457859476e-06, "loss": 0.8494, "step": 2068 }, { "epoch": 0.2951497860199715, "grad_norm": 0.8771013617515564, "learning_rate": 9.878055488019852e-06, "loss": 0.8028, "step": 2069 }, { "epoch": 0.29529243937232524, "grad_norm": 0.6493806838989258, "learning_rate": 9.877886402481461e-06, "loss": 0.5711, "step": 2070 }, { "epoch": 0.29543509272467905, "grad_norm": 0.872032105922699, "learning_rate": 9.877717201248317e-06, "loss": 0.8921, "step": 2071 }, { "epoch": 0.2955777460770328, "grad_norm": 0.9797448515892029, "learning_rate": 9.87754788432443e-06, "loss": 0.8849, "step": 2072 }, { "epoch": 0.2957203994293866, "grad_norm": 0.676609992980957, "learning_rate": 9.877378451713817e-06, "loss": 0.7689, "step": 2073 }, { "epoch": 0.29586305278174035, "grad_norm": 0.8016397953033447, "learning_rate": 9.877208903420496e-06, "loss": 0.8762, "step": 2074 }, { "epoch": 0.29600570613409416, "grad_norm": 0.8251256346702576, "learning_rate": 9.87703923944849e-06, "loss": 0.7959, "step": 2075 }, { "epoch": 0.2961483594864479, "grad_norm": 0.9070316553115845, "learning_rate": 9.876869459801822e-06, "loss": 0.7998, "step": 2076 }, { "epoch": 0.2962910128388017, "grad_norm": 0.8603582382202148, "learning_rate": 9.87669956448452e-06, "loss": 0.792, "step": 2077 }, { "epoch": 0.29643366619115546, "grad_norm": 0.7488096356391907, "learning_rate": 9.876529553500612e-06, "loss": 0.8162, "step": 2078 }, { "epoch": 0.29657631954350927, "grad_norm": 0.9013305306434631, "learning_rate": 9.87635942685413e-06, "loss": 0.7686, "step": 2079 }, { "epoch": 0.2967189728958631, "grad_norm": 0.8539659976959229, "learning_rate": 9.876189184549111e-06, "loss": 0.9297, "step": 2080 }, { "epoch": 0.2968616262482168, "grad_norm": 0.807090699672699, "learning_rate": 9.876018826589593e-06, "loss": 0.8406, "step": 2081 }, { "epoch": 0.29700427960057063, "grad_norm": 0.898745059967041, "learning_rate": 9.875848352979613e-06, "loss": 0.8019, "step": 2082 }, { "epoch": 0.2971469329529244, "grad_norm": 1.1248681545257568, "learning_rate": 9.87567776372322e-06, "loss": 0.7014, "step": 2083 }, { "epoch": 0.2972895863052782, "grad_norm": 1.222901463508606, "learning_rate": 9.875507058824454e-06, "loss": 0.8078, "step": 2084 }, { "epoch": 0.29743223965763194, "grad_norm": 0.9312527775764465, "learning_rate": 9.875336238287368e-06, "loss": 0.7726, "step": 2085 }, { "epoch": 0.29757489300998574, "grad_norm": 0.9272825121879578, "learning_rate": 9.875165302116013e-06, "loss": 0.8807, "step": 2086 }, { "epoch": 0.2977175463623395, "grad_norm": 0.8349441885948181, "learning_rate": 9.874994250314443e-06, "loss": 0.7877, "step": 2087 }, { "epoch": 0.2978601997146933, "grad_norm": 1.006554126739502, "learning_rate": 9.874823082886713e-06, "loss": 0.8353, "step": 2088 }, { "epoch": 0.2980028530670471, "grad_norm": 0.79835444688797, "learning_rate": 9.874651799836884e-06, "loss": 0.726, "step": 2089 }, { "epoch": 0.29814550641940085, "grad_norm": 0.8016711473464966, "learning_rate": 9.874480401169022e-06, "loss": 0.7513, "step": 2090 }, { "epoch": 0.29828815977175466, "grad_norm": 0.8496073484420776, "learning_rate": 9.874308886887187e-06, "loss": 0.8491, "step": 2091 }, { "epoch": 0.2984308131241084, "grad_norm": 1.7348815202713013, "learning_rate": 9.874137256995451e-06, "loss": 0.8721, "step": 2092 }, { "epoch": 0.2985734664764622, "grad_norm": 0.922148585319519, "learning_rate": 9.87396551149788e-06, "loss": 0.8141, "step": 2093 }, { "epoch": 0.29871611982881596, "grad_norm": 0.9337180852890015, "learning_rate": 9.873793650398553e-06, "loss": 0.8335, "step": 2094 }, { "epoch": 0.29885877318116977, "grad_norm": 0.7504223585128784, "learning_rate": 9.873621673701543e-06, "loss": 0.8182, "step": 2095 }, { "epoch": 0.2990014265335235, "grad_norm": 0.8032134771347046, "learning_rate": 9.873449581410932e-06, "loss": 0.7608, "step": 2096 }, { "epoch": 0.2991440798858773, "grad_norm": 0.9668902158737183, "learning_rate": 9.873277373530796e-06, "loss": 0.8535, "step": 2097 }, { "epoch": 0.2992867332382311, "grad_norm": 0.9308344721794128, "learning_rate": 9.873105050065225e-06, "loss": 0.8363, "step": 2098 }, { "epoch": 0.2994293865905849, "grad_norm": 0.7573476433753967, "learning_rate": 9.872932611018303e-06, "loss": 0.9092, "step": 2099 }, { "epoch": 0.2995720399429387, "grad_norm": 0.9825876355171204, "learning_rate": 9.872760056394122e-06, "loss": 0.8367, "step": 2100 }, { "epoch": 0.29971469329529243, "grad_norm": 0.8236121535301208, "learning_rate": 9.872587386196773e-06, "loss": 0.7682, "step": 2101 }, { "epoch": 0.29985734664764624, "grad_norm": 0.9628751277923584, "learning_rate": 9.872414600430354e-06, "loss": 0.7783, "step": 2102 }, { "epoch": 0.3, "grad_norm": 1.1155561208724976, "learning_rate": 9.872241699098959e-06, "loss": 0.8202, "step": 2103 }, { "epoch": 0.3001426533523538, "grad_norm": 0.7287495136260986, "learning_rate": 9.872068682206693e-06, "loss": 0.7811, "step": 2104 }, { "epoch": 0.30028530670470754, "grad_norm": 0.8614897727966309, "learning_rate": 9.871895549757655e-06, "loss": 0.8026, "step": 2105 }, { "epoch": 0.30042796005706135, "grad_norm": 0.823110818862915, "learning_rate": 9.871722301755958e-06, "loss": 0.7913, "step": 2106 }, { "epoch": 0.3005706134094151, "grad_norm": 0.8629475235939026, "learning_rate": 9.871548938205707e-06, "loss": 0.8101, "step": 2107 }, { "epoch": 0.3007132667617689, "grad_norm": 1.211033582687378, "learning_rate": 9.871375459111012e-06, "loss": 0.818, "step": 2108 }, { "epoch": 0.30085592011412265, "grad_norm": 0.8554418087005615, "learning_rate": 9.87120186447599e-06, "loss": 0.7734, "step": 2109 }, { "epoch": 0.30099857346647646, "grad_norm": 0.8902543187141418, "learning_rate": 9.87102815430476e-06, "loss": 0.8581, "step": 2110 }, { "epoch": 0.30114122681883027, "grad_norm": 0.727468729019165, "learning_rate": 9.870854328601441e-06, "loss": 0.7895, "step": 2111 }, { "epoch": 0.301283880171184, "grad_norm": 0.7078068256378174, "learning_rate": 9.870680387370153e-06, "loss": 0.8017, "step": 2112 }, { "epoch": 0.3014265335235378, "grad_norm": 0.863304078578949, "learning_rate": 9.870506330615026e-06, "loss": 0.7368, "step": 2113 }, { "epoch": 0.30156918687589157, "grad_norm": 0.9752305746078491, "learning_rate": 9.870332158340184e-06, "loss": 0.8393, "step": 2114 }, { "epoch": 0.3017118402282454, "grad_norm": 0.7903264760971069, "learning_rate": 9.870157870549761e-06, "loss": 0.773, "step": 2115 }, { "epoch": 0.3018544935805991, "grad_norm": 0.6576414108276367, "learning_rate": 9.86998346724789e-06, "loss": 0.7978, "step": 2116 }, { "epoch": 0.30199714693295293, "grad_norm": 0.9541743397712708, "learning_rate": 9.869808948438709e-06, "loss": 0.8005, "step": 2117 }, { "epoch": 0.3021398002853067, "grad_norm": 0.753960371017456, "learning_rate": 9.869634314126355e-06, "loss": 0.8049, "step": 2118 }, { "epoch": 0.3022824536376605, "grad_norm": 1.614227533340454, "learning_rate": 9.86945956431497e-06, "loss": 0.5986, "step": 2119 }, { "epoch": 0.3024251069900143, "grad_norm": 0.9586261510848999, "learning_rate": 9.8692846990087e-06, "loss": 0.7981, "step": 2120 }, { "epoch": 0.30256776034236804, "grad_norm": 0.9887835383415222, "learning_rate": 9.86910971821169e-06, "loss": 0.8577, "step": 2121 }, { "epoch": 0.30271041369472185, "grad_norm": 0.793765127658844, "learning_rate": 9.868934621928096e-06, "loss": 0.8871, "step": 2122 }, { "epoch": 0.3028530670470756, "grad_norm": 0.9992631077766418, "learning_rate": 9.868759410162065e-06, "loss": 0.8226, "step": 2123 }, { "epoch": 0.3029957203994294, "grad_norm": 1.7083792686462402, "learning_rate": 9.868584082917756e-06, "loss": 0.623, "step": 2124 }, { "epoch": 0.30313837375178315, "grad_norm": 0.9900110960006714, "learning_rate": 9.868408640199325e-06, "loss": 0.8066, "step": 2125 }, { "epoch": 0.30328102710413696, "grad_norm": 0.8131747841835022, "learning_rate": 9.868233082010936e-06, "loss": 0.7903, "step": 2126 }, { "epoch": 0.3034236804564907, "grad_norm": 0.9614596366882324, "learning_rate": 9.868057408356751e-06, "loss": 0.7603, "step": 2127 }, { "epoch": 0.3035663338088445, "grad_norm": 0.8352845907211304, "learning_rate": 9.867881619240938e-06, "loss": 0.7485, "step": 2128 }, { "epoch": 0.30370898716119826, "grad_norm": 0.7611213326454163, "learning_rate": 9.867705714667663e-06, "loss": 0.7751, "step": 2129 }, { "epoch": 0.30385164051355207, "grad_norm": 1.0592976808547974, "learning_rate": 9.867529694641104e-06, "loss": 0.8413, "step": 2130 }, { "epoch": 0.3039942938659059, "grad_norm": 1.0875227451324463, "learning_rate": 9.867353559165432e-06, "loss": 0.8408, "step": 2131 }, { "epoch": 0.3041369472182596, "grad_norm": 1.003643274307251, "learning_rate": 9.867177308244823e-06, "loss": 0.7836, "step": 2132 }, { "epoch": 0.30427960057061343, "grad_norm": 0.9505198001861572, "learning_rate": 9.86700094188346e-06, "loss": 0.8237, "step": 2133 }, { "epoch": 0.3044222539229672, "grad_norm": 0.8064021468162537, "learning_rate": 9.866824460085526e-06, "loss": 0.9115, "step": 2134 }, { "epoch": 0.304564907275321, "grad_norm": 1.0296398401260376, "learning_rate": 9.86664786285521e-06, "loss": 0.8384, "step": 2135 }, { "epoch": 0.30470756062767473, "grad_norm": 0.6528288722038269, "learning_rate": 9.866471150196692e-06, "loss": 0.8159, "step": 2136 }, { "epoch": 0.30485021398002854, "grad_norm": 0.863062858581543, "learning_rate": 9.866294322114171e-06, "loss": 0.7495, "step": 2137 }, { "epoch": 0.3049928673323823, "grad_norm": 0.9573654532432556, "learning_rate": 9.866117378611839e-06, "loss": 0.7603, "step": 2138 }, { "epoch": 0.3051355206847361, "grad_norm": 1.066430687904358, "learning_rate": 9.865940319693891e-06, "loss": 0.7633, "step": 2139 }, { "epoch": 0.30527817403708984, "grad_norm": 1.0387983322143555, "learning_rate": 9.865763145364528e-06, "loss": 0.7622, "step": 2140 }, { "epoch": 0.30542082738944365, "grad_norm": 0.6708153486251831, "learning_rate": 9.865585855627951e-06, "loss": 0.8167, "step": 2141 }, { "epoch": 0.30556348074179746, "grad_norm": 0.8030664324760437, "learning_rate": 9.865408450488367e-06, "loss": 0.8367, "step": 2142 }, { "epoch": 0.3057061340941512, "grad_norm": 0.8071599006652832, "learning_rate": 9.865230929949984e-06, "loss": 0.7612, "step": 2143 }, { "epoch": 0.305848787446505, "grad_norm": 0.9778062701225281, "learning_rate": 9.86505329401701e-06, "loss": 0.7897, "step": 2144 }, { "epoch": 0.30599144079885876, "grad_norm": 0.8493444323539734, "learning_rate": 9.86487554269366e-06, "loss": 0.8974, "step": 2145 }, { "epoch": 0.30613409415121257, "grad_norm": 1.0544459819793701, "learning_rate": 9.864697675984148e-06, "loss": 0.7673, "step": 2146 }, { "epoch": 0.3062767475035663, "grad_norm": 1.2514063119888306, "learning_rate": 9.864519693892694e-06, "loss": 0.7424, "step": 2147 }, { "epoch": 0.3064194008559201, "grad_norm": 0.8082605004310608, "learning_rate": 9.864341596423522e-06, "loss": 0.8396, "step": 2148 }, { "epoch": 0.30656205420827387, "grad_norm": 1.1238821744918823, "learning_rate": 9.864163383580852e-06, "loss": 0.7987, "step": 2149 }, { "epoch": 0.3067047075606277, "grad_norm": 0.7748532295227051, "learning_rate": 9.86398505536891e-06, "loss": 0.786, "step": 2150 }, { "epoch": 0.3068473609129815, "grad_norm": 0.7004071474075317, "learning_rate": 9.863806611791932e-06, "loss": 0.738, "step": 2151 }, { "epoch": 0.30699001426533523, "grad_norm": 0.6278806328773499, "learning_rate": 9.863628052854144e-06, "loss": 0.8497, "step": 2152 }, { "epoch": 0.30713266761768904, "grad_norm": 0.8097479343414307, "learning_rate": 9.863449378559785e-06, "loss": 0.8619, "step": 2153 }, { "epoch": 0.3072753209700428, "grad_norm": 0.9160128831863403, "learning_rate": 9.863270588913093e-06, "loss": 0.8547, "step": 2154 }, { "epoch": 0.3074179743223966, "grad_norm": 0.6985733509063721, "learning_rate": 9.863091683918304e-06, "loss": 0.7888, "step": 2155 }, { "epoch": 0.30756062767475034, "grad_norm": 0.727873682975769, "learning_rate": 9.862912663579664e-06, "loss": 0.8047, "step": 2156 }, { "epoch": 0.30770328102710415, "grad_norm": 0.8142025470733643, "learning_rate": 9.86273352790142e-06, "loss": 0.8039, "step": 2157 }, { "epoch": 0.3078459343794579, "grad_norm": 1.0665960311889648, "learning_rate": 9.862554276887822e-06, "loss": 0.7953, "step": 2158 }, { "epoch": 0.3079885877318117, "grad_norm": 1.0293890237808228, "learning_rate": 9.862374910543118e-06, "loss": 0.8261, "step": 2159 }, { "epoch": 0.30813124108416545, "grad_norm": 0.6995319128036499, "learning_rate": 9.862195428871565e-06, "loss": 0.8047, "step": 2160 }, { "epoch": 0.30827389443651926, "grad_norm": 0.8701078295707703, "learning_rate": 9.862015831877418e-06, "loss": 0.8245, "step": 2161 }, { "epoch": 0.30841654778887306, "grad_norm": 1.0178847312927246, "learning_rate": 9.861836119564938e-06, "loss": 0.8391, "step": 2162 }, { "epoch": 0.3085592011412268, "grad_norm": 0.9585567116737366, "learning_rate": 9.861656291938386e-06, "loss": 0.801, "step": 2163 }, { "epoch": 0.3087018544935806, "grad_norm": 0.9947155714035034, "learning_rate": 9.861476349002031e-06, "loss": 0.7549, "step": 2164 }, { "epoch": 0.30884450784593437, "grad_norm": 0.8896161317825317, "learning_rate": 9.861296290760135e-06, "loss": 0.7609, "step": 2165 }, { "epoch": 0.3089871611982882, "grad_norm": 0.9120628237724304, "learning_rate": 9.861116117216973e-06, "loss": 0.7965, "step": 2166 }, { "epoch": 0.3091298145506419, "grad_norm": 0.8187360763549805, "learning_rate": 9.860935828376818e-06, "loss": 0.8245, "step": 2167 }, { "epoch": 0.30927246790299573, "grad_norm": 0.8231696486473083, "learning_rate": 9.860755424243944e-06, "loss": 0.8937, "step": 2168 }, { "epoch": 0.3094151212553495, "grad_norm": 0.7503544092178345, "learning_rate": 9.860574904822631e-06, "loss": 0.7331, "step": 2169 }, { "epoch": 0.3095577746077033, "grad_norm": 1.016889214515686, "learning_rate": 9.860394270117164e-06, "loss": 0.8046, "step": 2170 }, { "epoch": 0.30970042796005703, "grad_norm": 0.9495335817337036, "learning_rate": 9.860213520131821e-06, "loss": 0.82, "step": 2171 }, { "epoch": 0.30984308131241084, "grad_norm": 0.6853278875350952, "learning_rate": 9.860032654870892e-06, "loss": 0.7896, "step": 2172 }, { "epoch": 0.30998573466476464, "grad_norm": 0.9106058478355408, "learning_rate": 9.859851674338668e-06, "loss": 0.7421, "step": 2173 }, { "epoch": 0.3101283880171184, "grad_norm": 0.7071598172187805, "learning_rate": 9.859670578539442e-06, "loss": 0.7729, "step": 2174 }, { "epoch": 0.3102710413694722, "grad_norm": 0.7470977902412415, "learning_rate": 9.859489367477507e-06, "loss": 0.8296, "step": 2175 }, { "epoch": 0.31041369472182595, "grad_norm": 0.8395113945007324, "learning_rate": 9.859308041157159e-06, "loss": 0.6945, "step": 2176 }, { "epoch": 0.31055634807417976, "grad_norm": 0.840278685092926, "learning_rate": 9.859126599582705e-06, "loss": 0.7228, "step": 2177 }, { "epoch": 0.3106990014265335, "grad_norm": 0.9928528070449829, "learning_rate": 9.858945042758443e-06, "loss": 0.7163, "step": 2178 }, { "epoch": 0.3108416547788873, "grad_norm": 0.8427954316139221, "learning_rate": 9.858763370688682e-06, "loss": 0.7255, "step": 2179 }, { "epoch": 0.31098430813124106, "grad_norm": 0.932619035243988, "learning_rate": 9.85858158337773e-06, "loss": 0.7165, "step": 2180 }, { "epoch": 0.31112696148359487, "grad_norm": 1.118070125579834, "learning_rate": 9.8583996808299e-06, "loss": 0.7315, "step": 2181 }, { "epoch": 0.31126961483594867, "grad_norm": 0.8051701784133911, "learning_rate": 9.858217663049503e-06, "loss": 0.789, "step": 2182 }, { "epoch": 0.3114122681883024, "grad_norm": 0.7789623141288757, "learning_rate": 9.85803553004086e-06, "loss": 0.8121, "step": 2183 }, { "epoch": 0.3115549215406562, "grad_norm": 0.7830784916877747, "learning_rate": 9.85785328180829e-06, "loss": 0.6264, "step": 2184 }, { "epoch": 0.31169757489301, "grad_norm": 1.0730844736099243, "learning_rate": 9.857670918356113e-06, "loss": 0.7213, "step": 2185 }, { "epoch": 0.3118402282453638, "grad_norm": 0.7656449675559998, "learning_rate": 9.857488439688657e-06, "loss": 0.7442, "step": 2186 }, { "epoch": 0.31198288159771753, "grad_norm": 0.8415981531143188, "learning_rate": 9.85730584581025e-06, "loss": 0.7284, "step": 2187 }, { "epoch": 0.31212553495007134, "grad_norm": 1.2432276010513306, "learning_rate": 9.85712313672522e-06, "loss": 0.8322, "step": 2188 }, { "epoch": 0.3122681883024251, "grad_norm": 0.7802530527114868, "learning_rate": 9.856940312437904e-06, "loss": 0.8186, "step": 2189 }, { "epoch": 0.3124108416547789, "grad_norm": 0.894239604473114, "learning_rate": 9.856757372952636e-06, "loss": 0.8504, "step": 2190 }, { "epoch": 0.31255349500713264, "grad_norm": 1.0458093881607056, "learning_rate": 9.856574318273757e-06, "loss": 0.7908, "step": 2191 }, { "epoch": 0.31269614835948645, "grad_norm": 1.034009575843811, "learning_rate": 9.856391148405607e-06, "loss": 0.8138, "step": 2192 }, { "epoch": 0.31283880171184025, "grad_norm": 1.2163127660751343, "learning_rate": 9.856207863352531e-06, "loss": 0.7842, "step": 2193 }, { "epoch": 0.312981455064194, "grad_norm": 1.059219479560852, "learning_rate": 9.856024463118876e-06, "loss": 0.8056, "step": 2194 }, { "epoch": 0.3131241084165478, "grad_norm": 0.8998891115188599, "learning_rate": 9.855840947708994e-06, "loss": 0.7759, "step": 2195 }, { "epoch": 0.31326676176890156, "grad_norm": 0.8348072171211243, "learning_rate": 9.855657317127233e-06, "loss": 0.7015, "step": 2196 }, { "epoch": 0.31340941512125536, "grad_norm": 0.8206185102462769, "learning_rate": 9.855473571377954e-06, "loss": 0.8026, "step": 2197 }, { "epoch": 0.3135520684736091, "grad_norm": 0.7508227825164795, "learning_rate": 9.85528971046551e-06, "loss": 0.8614, "step": 2198 }, { "epoch": 0.3136947218259629, "grad_norm": 0.7386098504066467, "learning_rate": 9.855105734394266e-06, "loss": 0.8698, "step": 2199 }, { "epoch": 0.31383737517831667, "grad_norm": 1.059203028678894, "learning_rate": 9.854921643168582e-06, "loss": 0.7743, "step": 2200 }, { "epoch": 0.3139800285306705, "grad_norm": 0.9634552001953125, "learning_rate": 9.854737436792828e-06, "loss": 0.8486, "step": 2201 }, { "epoch": 0.3141226818830242, "grad_norm": 1.0923370122909546, "learning_rate": 9.854553115271369e-06, "loss": 0.8711, "step": 2202 }, { "epoch": 0.31426533523537803, "grad_norm": 0.7884952425956726, "learning_rate": 9.85436867860858e-06, "loss": 0.8044, "step": 2203 }, { "epoch": 0.31440798858773183, "grad_norm": 0.7849683165550232, "learning_rate": 9.854184126808834e-06, "loss": 0.8388, "step": 2204 }, { "epoch": 0.3145506419400856, "grad_norm": 0.9549512267112732, "learning_rate": 9.85399945987651e-06, "loss": 0.7388, "step": 2205 }, { "epoch": 0.3146932952924394, "grad_norm": 0.8654511570930481, "learning_rate": 9.853814677815985e-06, "loss": 0.7575, "step": 2206 }, { "epoch": 0.31483594864479314, "grad_norm": 0.8052588105201721, "learning_rate": 9.853629780631643e-06, "loss": 0.5873, "step": 2207 }, { "epoch": 0.31497860199714695, "grad_norm": 0.7848721146583557, "learning_rate": 9.85344476832787e-06, "loss": 0.8867, "step": 2208 }, { "epoch": 0.3151212553495007, "grad_norm": 1.3158105611801147, "learning_rate": 9.853259640909057e-06, "loss": 0.9041, "step": 2209 }, { "epoch": 0.3152639087018545, "grad_norm": 0.9283496737480164, "learning_rate": 9.85307439837959e-06, "loss": 0.749, "step": 2210 }, { "epoch": 0.31540656205420825, "grad_norm": 0.6851430535316467, "learning_rate": 9.852889040743862e-06, "loss": 0.838, "step": 2211 }, { "epoch": 0.31554921540656206, "grad_norm": 0.7198887467384338, "learning_rate": 9.852703568006274e-06, "loss": 0.7585, "step": 2212 }, { "epoch": 0.31569186875891586, "grad_norm": 0.9755715131759644, "learning_rate": 9.852517980171223e-06, "loss": 0.8576, "step": 2213 }, { "epoch": 0.3158345221112696, "grad_norm": 0.8580317497253418, "learning_rate": 9.852332277243111e-06, "loss": 0.7675, "step": 2214 }, { "epoch": 0.3159771754636234, "grad_norm": 1.0044978857040405, "learning_rate": 9.85214645922634e-06, "loss": 0.7174, "step": 2215 }, { "epoch": 0.31611982881597717, "grad_norm": 0.9367561936378479, "learning_rate": 9.851960526125322e-06, "loss": 0.8766, "step": 2216 }, { "epoch": 0.31626248216833097, "grad_norm": 1.1179993152618408, "learning_rate": 9.851774477944463e-06, "loss": 0.7772, "step": 2217 }, { "epoch": 0.3164051355206847, "grad_norm": 0.7806481719017029, "learning_rate": 9.851588314688177e-06, "loss": 0.7549, "step": 2218 }, { "epoch": 0.3165477888730385, "grad_norm": 0.8973761796951294, "learning_rate": 9.85140203636088e-06, "loss": 0.8134, "step": 2219 }, { "epoch": 0.3166904422253923, "grad_norm": 1.1041462421417236, "learning_rate": 9.851215642966992e-06, "loss": 0.7921, "step": 2220 }, { "epoch": 0.3168330955777461, "grad_norm": 0.960660457611084, "learning_rate": 9.851029134510928e-06, "loss": 0.7575, "step": 2221 }, { "epoch": 0.31697574893009983, "grad_norm": 0.6323800086975098, "learning_rate": 9.850842510997119e-06, "loss": 0.8378, "step": 2222 }, { "epoch": 0.31711840228245364, "grad_norm": 0.9967565536499023, "learning_rate": 9.850655772429984e-06, "loss": 0.7803, "step": 2223 }, { "epoch": 0.31726105563480744, "grad_norm": 0.8638782501220703, "learning_rate": 9.85046891881396e-06, "loss": 0.7756, "step": 2224 }, { "epoch": 0.3174037089871612, "grad_norm": 0.8508341312408447, "learning_rate": 9.850281950153473e-06, "loss": 0.785, "step": 2225 }, { "epoch": 0.317546362339515, "grad_norm": 0.9450670480728149, "learning_rate": 9.850094866452959e-06, "loss": 0.7804, "step": 2226 }, { "epoch": 0.31768901569186875, "grad_norm": 0.8388874530792236, "learning_rate": 9.849907667716856e-06, "loss": 0.744, "step": 2227 }, { "epoch": 0.31783166904422255, "grad_norm": 0.9147560000419617, "learning_rate": 9.849720353949603e-06, "loss": 0.8408, "step": 2228 }, { "epoch": 0.3179743223965763, "grad_norm": 0.7567501068115234, "learning_rate": 9.849532925155643e-06, "loss": 0.8426, "step": 2229 }, { "epoch": 0.3181169757489301, "grad_norm": 0.7739532589912415, "learning_rate": 9.849345381339424e-06, "loss": 0.8377, "step": 2230 }, { "epoch": 0.31825962910128386, "grad_norm": 0.761940062046051, "learning_rate": 9.849157722505388e-06, "loss": 0.8111, "step": 2231 }, { "epoch": 0.31840228245363766, "grad_norm": 0.780726969242096, "learning_rate": 9.848969948657994e-06, "loss": 0.7938, "step": 2232 }, { "epoch": 0.3185449358059914, "grad_norm": 0.7162595987319946, "learning_rate": 9.848782059801692e-06, "loss": 0.7784, "step": 2233 }, { "epoch": 0.3186875891583452, "grad_norm": 0.9884735345840454, "learning_rate": 9.848594055940935e-06, "loss": 0.8687, "step": 2234 }, { "epoch": 0.318830242510699, "grad_norm": 1.2923800945281982, "learning_rate": 9.848405937080187e-06, "loss": 0.7915, "step": 2235 }, { "epoch": 0.3189728958630528, "grad_norm": 0.8164573311805725, "learning_rate": 9.848217703223908e-06, "loss": 0.7613, "step": 2236 }, { "epoch": 0.3191155492154066, "grad_norm": 0.8309138417243958, "learning_rate": 9.848029354376561e-06, "loss": 0.7831, "step": 2237 }, { "epoch": 0.31925820256776033, "grad_norm": 0.8395116925239563, "learning_rate": 9.847840890542618e-06, "loss": 0.8752, "step": 2238 }, { "epoch": 0.31940085592011414, "grad_norm": 0.9086002707481384, "learning_rate": 9.847652311726542e-06, "loss": 0.7986, "step": 2239 }, { "epoch": 0.3195435092724679, "grad_norm": 0.9900307059288025, "learning_rate": 9.847463617932811e-06, "loss": 0.7143, "step": 2240 }, { "epoch": 0.3196861626248217, "grad_norm": 0.9896170496940613, "learning_rate": 9.847274809165901e-06, "loss": 0.7538, "step": 2241 }, { "epoch": 0.31982881597717544, "grad_norm": 0.8203100562095642, "learning_rate": 9.847085885430286e-06, "loss": 0.7835, "step": 2242 }, { "epoch": 0.31997146932952925, "grad_norm": 1.2037122249603271, "learning_rate": 9.846896846730451e-06, "loss": 0.7972, "step": 2243 }, { "epoch": 0.32011412268188305, "grad_norm": 0.7327616810798645, "learning_rate": 9.846707693070876e-06, "loss": 0.837, "step": 2244 }, { "epoch": 0.3202567760342368, "grad_norm": 0.7550442814826965, "learning_rate": 9.84651842445605e-06, "loss": 0.8301, "step": 2245 }, { "epoch": 0.3203994293865906, "grad_norm": 0.841128408908844, "learning_rate": 9.846329040890463e-06, "loss": 0.8046, "step": 2246 }, { "epoch": 0.32054208273894436, "grad_norm": 0.874862790107727, "learning_rate": 9.846139542378604e-06, "loss": 0.7318, "step": 2247 }, { "epoch": 0.32068473609129816, "grad_norm": 0.6857011914253235, "learning_rate": 9.84594992892497e-06, "loss": 0.8781, "step": 2248 }, { "epoch": 0.3208273894436519, "grad_norm": 0.7826842069625854, "learning_rate": 9.845760200534055e-06, "loss": 0.8683, "step": 2249 }, { "epoch": 0.3209700427960057, "grad_norm": 1.3429893255233765, "learning_rate": 9.845570357210362e-06, "loss": 0.6449, "step": 2250 }, { "epoch": 0.32111269614835947, "grad_norm": 0.9500937461853027, "learning_rate": 9.845380398958392e-06, "loss": 0.7185, "step": 2251 }, { "epoch": 0.32125534950071327, "grad_norm": 0.804557740688324, "learning_rate": 9.845190325782654e-06, "loss": 0.7884, "step": 2252 }, { "epoch": 0.321398002853067, "grad_norm": 0.7487125992774963, "learning_rate": 9.845000137687652e-06, "loss": 0.8521, "step": 2253 }, { "epoch": 0.3215406562054208, "grad_norm": 0.8841039538383484, "learning_rate": 9.844809834677898e-06, "loss": 0.7946, "step": 2254 }, { "epoch": 0.32168330955777463, "grad_norm": 0.6417714357376099, "learning_rate": 9.844619416757907e-06, "loss": 0.797, "step": 2255 }, { "epoch": 0.3218259629101284, "grad_norm": 1.0088717937469482, "learning_rate": 9.844428883932193e-06, "loss": 0.8008, "step": 2256 }, { "epoch": 0.3219686162624822, "grad_norm": 1.0375267267227173, "learning_rate": 9.844238236205278e-06, "loss": 0.8408, "step": 2257 }, { "epoch": 0.32211126961483594, "grad_norm": 0.770523726940155, "learning_rate": 9.844047473581682e-06, "loss": 0.7569, "step": 2258 }, { "epoch": 0.32225392296718974, "grad_norm": 1.4878278970718384, "learning_rate": 9.84385659606593e-06, "loss": 0.8058, "step": 2259 }, { "epoch": 0.3223965763195435, "grad_norm": 1.0476981401443481, "learning_rate": 9.84366560366255e-06, "loss": 0.9196, "step": 2260 }, { "epoch": 0.3225392296718973, "grad_norm": 1.2552111148834229, "learning_rate": 9.843474496376071e-06, "loss": 0.7388, "step": 2261 }, { "epoch": 0.32268188302425105, "grad_norm": 0.8368445038795471, "learning_rate": 9.843283274211026e-06, "loss": 0.7715, "step": 2262 }, { "epoch": 0.32282453637660485, "grad_norm": 1.0327579975128174, "learning_rate": 9.84309193717195e-06, "loss": 0.7277, "step": 2263 }, { "epoch": 0.3229671897289586, "grad_norm": 0.6204162836074829, "learning_rate": 9.84290048526338e-06, "loss": 0.8427, "step": 2264 }, { "epoch": 0.3231098430813124, "grad_norm": 1.0847949981689453, "learning_rate": 9.842708918489861e-06, "loss": 0.7236, "step": 2265 }, { "epoch": 0.3232524964336662, "grad_norm": 1.2121959924697876, "learning_rate": 9.842517236855932e-06, "loss": 0.7765, "step": 2266 }, { "epoch": 0.32339514978601996, "grad_norm": 0.678694486618042, "learning_rate": 9.842325440366143e-06, "loss": 0.7895, "step": 2267 }, { "epoch": 0.32353780313837377, "grad_norm": 1.0767388343811035, "learning_rate": 9.842133529025043e-06, "loss": 0.7751, "step": 2268 }, { "epoch": 0.3236804564907275, "grad_norm": 0.8977599143981934, "learning_rate": 9.841941502837177e-06, "loss": 0.7958, "step": 2269 }, { "epoch": 0.3238231098430813, "grad_norm": 1.0656965970993042, "learning_rate": 9.84174936180711e-06, "loss": 0.8319, "step": 2270 }, { "epoch": 0.3239657631954351, "grad_norm": 1.094441294670105, "learning_rate": 9.841557105939391e-06, "loss": 0.775, "step": 2271 }, { "epoch": 0.3241084165477889, "grad_norm": 0.715997576713562, "learning_rate": 9.841364735238586e-06, "loss": 0.7562, "step": 2272 }, { "epoch": 0.32425106990014263, "grad_norm": 0.9680311679840088, "learning_rate": 9.841172249709252e-06, "loss": 0.7118, "step": 2273 }, { "epoch": 0.32439372325249644, "grad_norm": 0.9165420532226562, "learning_rate": 9.84097964935596e-06, "loss": 0.7522, "step": 2274 }, { "epoch": 0.32453637660485024, "grad_norm": 0.9857007265090942, "learning_rate": 9.840786934183273e-06, "loss": 0.6922, "step": 2275 }, { "epoch": 0.324679029957204, "grad_norm": 0.7574713826179504, "learning_rate": 9.840594104195765e-06, "loss": 0.8032, "step": 2276 }, { "epoch": 0.3248216833095578, "grad_norm": 0.7682188153266907, "learning_rate": 9.840401159398007e-06, "loss": 0.8562, "step": 2277 }, { "epoch": 0.32496433666191155, "grad_norm": 0.9269386529922485, "learning_rate": 9.840208099794579e-06, "loss": 0.7957, "step": 2278 }, { "epoch": 0.32510699001426535, "grad_norm": 0.9559633135795593, "learning_rate": 9.840014925390057e-06, "loss": 0.7634, "step": 2279 }, { "epoch": 0.3252496433666191, "grad_norm": 1.1983888149261475, "learning_rate": 9.839821636189024e-06, "loss": 0.7992, "step": 2280 }, { "epoch": 0.3253922967189729, "grad_norm": 0.7660537362098694, "learning_rate": 9.839628232196064e-06, "loss": 0.8107, "step": 2281 }, { "epoch": 0.32553495007132666, "grad_norm": 0.7760874032974243, "learning_rate": 9.839434713415765e-06, "loss": 0.7389, "step": 2282 }, { "epoch": 0.32567760342368046, "grad_norm": 1.4310803413391113, "learning_rate": 9.839241079852718e-06, "loss": 0.7812, "step": 2283 }, { "epoch": 0.3258202567760342, "grad_norm": 1.1486469507217407, "learning_rate": 9.839047331511511e-06, "loss": 0.7388, "step": 2284 }, { "epoch": 0.325962910128388, "grad_norm": 0.944904625415802, "learning_rate": 9.838853468396744e-06, "loss": 0.823, "step": 2285 }, { "epoch": 0.3261055634807418, "grad_norm": 0.762138843536377, "learning_rate": 9.838659490513015e-06, "loss": 0.8101, "step": 2286 }, { "epoch": 0.3262482168330956, "grad_norm": 0.6126900315284729, "learning_rate": 9.838465397864921e-06, "loss": 0.851, "step": 2287 }, { "epoch": 0.3263908701854494, "grad_norm": 0.7258740067481995, "learning_rate": 9.838271190457067e-06, "loss": 0.8152, "step": 2288 }, { "epoch": 0.3265335235378031, "grad_norm": 0.9702327847480774, "learning_rate": 9.838076868294061e-06, "loss": 0.816, "step": 2289 }, { "epoch": 0.32667617689015693, "grad_norm": 0.762395441532135, "learning_rate": 9.837882431380514e-06, "loss": 0.8324, "step": 2290 }, { "epoch": 0.3268188302425107, "grad_norm": 0.8828544020652771, "learning_rate": 9.837687879721031e-06, "loss": 0.7704, "step": 2291 }, { "epoch": 0.3269614835948645, "grad_norm": 0.9014937281608582, "learning_rate": 9.837493213320233e-06, "loss": 0.6042, "step": 2292 }, { "epoch": 0.32710413694721824, "grad_norm": 0.8077760934829712, "learning_rate": 9.837298432182731e-06, "loss": 0.8175, "step": 2293 }, { "epoch": 0.32724679029957204, "grad_norm": 1.1762315034866333, "learning_rate": 9.83710353631315e-06, "loss": 0.9323, "step": 2294 }, { "epoch": 0.3273894436519258, "grad_norm": 1.0175790786743164, "learning_rate": 9.83690852571611e-06, "loss": 0.8173, "step": 2295 }, { "epoch": 0.3275320970042796, "grad_norm": 0.9739888906478882, "learning_rate": 9.836713400396239e-06, "loss": 0.8183, "step": 2296 }, { "epoch": 0.3276747503566334, "grad_norm": 0.8701342940330505, "learning_rate": 9.836518160358162e-06, "loss": 0.8104, "step": 2297 }, { "epoch": 0.32781740370898715, "grad_norm": 0.6412081718444824, "learning_rate": 9.83632280560651e-06, "loss": 0.7953, "step": 2298 }, { "epoch": 0.32796005706134096, "grad_norm": 0.854129433631897, "learning_rate": 9.836127336145919e-06, "loss": 0.7707, "step": 2299 }, { "epoch": 0.3281027104136947, "grad_norm": 0.8388704657554626, "learning_rate": 9.835931751981022e-06, "loss": 0.7845, "step": 2300 }, { "epoch": 0.3282453637660485, "grad_norm": 0.7747339010238647, "learning_rate": 9.83573605311646e-06, "loss": 0.8175, "step": 2301 }, { "epoch": 0.32838801711840226, "grad_norm": 0.6816387176513672, "learning_rate": 9.835540239556876e-06, "loss": 0.7926, "step": 2302 }, { "epoch": 0.32853067047075607, "grad_norm": 0.9259821176528931, "learning_rate": 9.83534431130691e-06, "loss": 0.7238, "step": 2303 }, { "epoch": 0.3286733238231098, "grad_norm": 0.8266289830207825, "learning_rate": 9.83514826837121e-06, "loss": 0.842, "step": 2304 }, { "epoch": 0.3288159771754636, "grad_norm": 0.8679077625274658, "learning_rate": 9.83495211075443e-06, "loss": 0.7786, "step": 2305 }, { "epoch": 0.32895863052781743, "grad_norm": 1.1242318153381348, "learning_rate": 9.834755838461219e-06, "loss": 0.7827, "step": 2306 }, { "epoch": 0.3291012838801712, "grad_norm": 0.7000129222869873, "learning_rate": 9.834559451496234e-06, "loss": 0.8138, "step": 2307 }, { "epoch": 0.329243937232525, "grad_norm": 0.8433248400688171, "learning_rate": 9.834362949864129e-06, "loss": 0.779, "step": 2308 }, { "epoch": 0.32938659058487874, "grad_norm": 1.2266490459442139, "learning_rate": 9.834166333569569e-06, "loss": 0.8196, "step": 2309 }, { "epoch": 0.32952924393723254, "grad_norm": 0.9142070412635803, "learning_rate": 9.833969602617218e-06, "loss": 0.55, "step": 2310 }, { "epoch": 0.3296718972895863, "grad_norm": 0.6996576189994812, "learning_rate": 9.833772757011736e-06, "loss": 0.8136, "step": 2311 }, { "epoch": 0.3298145506419401, "grad_norm": 1.056432843208313, "learning_rate": 9.8335757967578e-06, "loss": 0.7629, "step": 2312 }, { "epoch": 0.32995720399429385, "grad_norm": 0.7544792890548706, "learning_rate": 9.833378721860075e-06, "loss": 0.7414, "step": 2313 }, { "epoch": 0.33009985734664765, "grad_norm": 0.9668774604797363, "learning_rate": 9.833181532323237e-06, "loss": 0.6001, "step": 2314 }, { "epoch": 0.3302425106990014, "grad_norm": 0.7365550398826599, "learning_rate": 9.832984228151965e-06, "loss": 0.7766, "step": 2315 }, { "epoch": 0.3303851640513552, "grad_norm": 0.9288215637207031, "learning_rate": 9.832786809350938e-06, "loss": 0.7828, "step": 2316 }, { "epoch": 0.330527817403709, "grad_norm": 0.7639610767364502, "learning_rate": 9.832589275924838e-06, "loss": 0.7315, "step": 2317 }, { "epoch": 0.33067047075606276, "grad_norm": 1.0120751857757568, "learning_rate": 9.832391627878349e-06, "loss": 0.7244, "step": 2318 }, { "epoch": 0.33081312410841657, "grad_norm": 0.9512212872505188, "learning_rate": 9.832193865216159e-06, "loss": 0.7466, "step": 2319 }, { "epoch": 0.3309557774607703, "grad_norm": 1.036668300628662, "learning_rate": 9.831995987942961e-06, "loss": 0.8128, "step": 2320 }, { "epoch": 0.3310984308131241, "grad_norm": 0.7296528220176697, "learning_rate": 9.831797996063446e-06, "loss": 0.7596, "step": 2321 }, { "epoch": 0.3312410841654779, "grad_norm": 0.8461437821388245, "learning_rate": 9.83159988958231e-06, "loss": 0.732, "step": 2322 }, { "epoch": 0.3313837375178317, "grad_norm": 0.9580225348472595, "learning_rate": 9.831401668504252e-06, "loss": 0.7528, "step": 2323 }, { "epoch": 0.33152639087018543, "grad_norm": 0.9040142893791199, "learning_rate": 9.831203332833977e-06, "loss": 0.7836, "step": 2324 }, { "epoch": 0.33166904422253923, "grad_norm": 0.7880234718322754, "learning_rate": 9.831004882576183e-06, "loss": 0.7709, "step": 2325 }, { "epoch": 0.331811697574893, "grad_norm": 0.8258834481239319, "learning_rate": 9.830806317735581e-06, "loss": 0.7916, "step": 2326 }, { "epoch": 0.3319543509272468, "grad_norm": 0.7732114195823669, "learning_rate": 9.83060763831688e-06, "loss": 0.7807, "step": 2327 }, { "epoch": 0.3320970042796006, "grad_norm": 0.9464761018753052, "learning_rate": 9.83040884432479e-06, "loss": 0.837, "step": 2328 }, { "epoch": 0.33223965763195434, "grad_norm": 0.8679453730583191, "learning_rate": 9.83020993576403e-06, "loss": 0.8117, "step": 2329 }, { "epoch": 0.33238231098430815, "grad_norm": 0.8833606839179993, "learning_rate": 9.830010912639315e-06, "loss": 0.8299, "step": 2330 }, { "epoch": 0.3325249643366619, "grad_norm": 1.0865508317947388, "learning_rate": 9.829811774955367e-06, "loss": 0.7529, "step": 2331 }, { "epoch": 0.3326676176890157, "grad_norm": 0.9653570652008057, "learning_rate": 9.829612522716908e-06, "loss": 0.8046, "step": 2332 }, { "epoch": 0.33281027104136945, "grad_norm": 0.8964740633964539, "learning_rate": 9.829413155928662e-06, "loss": 0.7528, "step": 2333 }, { "epoch": 0.33295292439372326, "grad_norm": 0.8548893928527832, "learning_rate": 9.829213674595362e-06, "loss": 0.7959, "step": 2334 }, { "epoch": 0.333095577746077, "grad_norm": 1.0186488628387451, "learning_rate": 9.829014078721736e-06, "loss": 0.7133, "step": 2335 }, { "epoch": 0.3332382310984308, "grad_norm": 0.8716478943824768, "learning_rate": 9.82881436831252e-06, "loss": 0.7676, "step": 2336 }, { "epoch": 0.3333808844507846, "grad_norm": 0.900202751159668, "learning_rate": 9.82861454337245e-06, "loss": 0.8241, "step": 2337 }, { "epoch": 0.33352353780313837, "grad_norm": 0.6762422919273376, "learning_rate": 9.828414603906265e-06, "loss": 0.7335, "step": 2338 }, { "epoch": 0.3336661911554922, "grad_norm": 0.850338339805603, "learning_rate": 9.82821454991871e-06, "loss": 0.867, "step": 2339 }, { "epoch": 0.3338088445078459, "grad_norm": 1.1082850694656372, "learning_rate": 9.828014381414525e-06, "loss": 0.755, "step": 2340 }, { "epoch": 0.33395149786019973, "grad_norm": 0.6684832572937012, "learning_rate": 9.82781409839846e-06, "loss": 0.8119, "step": 2341 }, { "epoch": 0.3340941512125535, "grad_norm": 0.7720212936401367, "learning_rate": 9.827613700875267e-06, "loss": 0.8271, "step": 2342 }, { "epoch": 0.3342368045649073, "grad_norm": 0.7583309412002563, "learning_rate": 9.827413188849698e-06, "loss": 0.758, "step": 2343 }, { "epoch": 0.33437945791726104, "grad_norm": 0.9926135540008545, "learning_rate": 9.827212562326508e-06, "loss": 0.8526, "step": 2344 }, { "epoch": 0.33452211126961484, "grad_norm": 0.7970448732376099, "learning_rate": 9.827011821310455e-06, "loss": 0.8293, "step": 2345 }, { "epoch": 0.3346647646219686, "grad_norm": 0.8557180762290955, "learning_rate": 9.826810965806302e-06, "loss": 0.7789, "step": 2346 }, { "epoch": 0.3348074179743224, "grad_norm": 0.8372285962104797, "learning_rate": 9.826609995818812e-06, "loss": 0.6468, "step": 2347 }, { "epoch": 0.3349500713266762, "grad_norm": 0.9167457222938538, "learning_rate": 9.826408911352753e-06, "loss": 0.8412, "step": 2348 }, { "epoch": 0.33509272467902995, "grad_norm": 1.0763708353042603, "learning_rate": 9.826207712412891e-06, "loss": 0.7799, "step": 2349 }, { "epoch": 0.33523537803138376, "grad_norm": 0.9208558201789856, "learning_rate": 9.826006399004003e-06, "loss": 0.8056, "step": 2350 }, { "epoch": 0.3353780313837375, "grad_norm": 0.7773422002792358, "learning_rate": 9.82580497113086e-06, "loss": 0.7237, "step": 2351 }, { "epoch": 0.3355206847360913, "grad_norm": 0.7527308464050293, "learning_rate": 9.82560342879824e-06, "loss": 0.8651, "step": 2352 }, { "epoch": 0.33566333808844506, "grad_norm": 0.9323861598968506, "learning_rate": 9.825401772010923e-06, "loss": 0.7686, "step": 2353 }, { "epoch": 0.33580599144079887, "grad_norm": 0.8846133947372437, "learning_rate": 9.825200000773694e-06, "loss": 0.7928, "step": 2354 }, { "epoch": 0.3359486447931526, "grad_norm": 0.8226498961448669, "learning_rate": 9.824998115091336e-06, "loss": 0.7947, "step": 2355 }, { "epoch": 0.3360912981455064, "grad_norm": 0.9621587991714478, "learning_rate": 9.82479611496864e-06, "loss": 0.783, "step": 2356 }, { "epoch": 0.3362339514978602, "grad_norm": 0.9527766108512878, "learning_rate": 9.824594000410395e-06, "loss": 0.8012, "step": 2357 }, { "epoch": 0.336376604850214, "grad_norm": 1.204533576965332, "learning_rate": 9.824391771421398e-06, "loss": 0.8526, "step": 2358 }, { "epoch": 0.3365192582025678, "grad_norm": 0.8632824420928955, "learning_rate": 9.824189428006441e-06, "loss": 0.8141, "step": 2359 }, { "epoch": 0.33666191155492153, "grad_norm": 1.0136874914169312, "learning_rate": 9.823986970170324e-06, "loss": 0.8182, "step": 2360 }, { "epoch": 0.33680456490727534, "grad_norm": 1.1227818727493286, "learning_rate": 9.823784397917852e-06, "loss": 0.7657, "step": 2361 }, { "epoch": 0.3369472182596291, "grad_norm": 0.689673662185669, "learning_rate": 9.823581711253828e-06, "loss": 0.7461, "step": 2362 }, { "epoch": 0.3370898716119829, "grad_norm": 1.087032675743103, "learning_rate": 9.82337891018306e-06, "loss": 0.7345, "step": 2363 }, { "epoch": 0.33723252496433664, "grad_norm": 0.8440565466880798, "learning_rate": 9.823175994710356e-06, "loss": 0.7913, "step": 2364 }, { "epoch": 0.33737517831669045, "grad_norm": 1.036074161529541, "learning_rate": 9.82297296484053e-06, "loss": 0.8048, "step": 2365 }, { "epoch": 0.3375178316690442, "grad_norm": 1.0963290929794312, "learning_rate": 9.8227698205784e-06, "loss": 0.7909, "step": 2366 }, { "epoch": 0.337660485021398, "grad_norm": 0.7091878652572632, "learning_rate": 9.82256656192878e-06, "loss": 0.8131, "step": 2367 }, { "epoch": 0.3378031383737518, "grad_norm": 0.752960741519928, "learning_rate": 9.822363188896492e-06, "loss": 0.7583, "step": 2368 }, { "epoch": 0.33794579172610556, "grad_norm": 0.8184833526611328, "learning_rate": 9.822159701486363e-06, "loss": 0.8417, "step": 2369 }, { "epoch": 0.33808844507845937, "grad_norm": 0.7302771210670471, "learning_rate": 9.821956099703213e-06, "loss": 0.7717, "step": 2370 }, { "epoch": 0.3382310984308131, "grad_norm": 1.1408780813217163, "learning_rate": 9.821752383551879e-06, "loss": 0.7666, "step": 2371 }, { "epoch": 0.3383737517831669, "grad_norm": 1.0522292852401733, "learning_rate": 9.821548553037185e-06, "loss": 0.8067, "step": 2372 }, { "epoch": 0.33851640513552067, "grad_norm": 0.8145163059234619, "learning_rate": 9.821344608163972e-06, "loss": 0.8059, "step": 2373 }, { "epoch": 0.3386590584878745, "grad_norm": 0.6594700813293457, "learning_rate": 9.821140548937073e-06, "loss": 0.7328, "step": 2374 }, { "epoch": 0.3388017118402282, "grad_norm": 0.9611747860908508, "learning_rate": 9.82093637536133e-06, "loss": 0.8003, "step": 2375 }, { "epoch": 0.33894436519258203, "grad_norm": 1.3086268901824951, "learning_rate": 9.820732087441586e-06, "loss": 0.7714, "step": 2376 }, { "epoch": 0.3390870185449358, "grad_norm": 0.7010059356689453, "learning_rate": 9.820527685182684e-06, "loss": 0.8639, "step": 2377 }, { "epoch": 0.3392296718972896, "grad_norm": 0.9378232359886169, "learning_rate": 9.820323168589473e-06, "loss": 0.7348, "step": 2378 }, { "epoch": 0.3393723252496434, "grad_norm": 0.8938297629356384, "learning_rate": 9.820118537666805e-06, "loss": 0.8376, "step": 2379 }, { "epoch": 0.33951497860199714, "grad_norm": 0.7937347292900085, "learning_rate": 9.819913792419532e-06, "loss": 0.8924, "step": 2380 }, { "epoch": 0.33965763195435095, "grad_norm": 0.9030845165252686, "learning_rate": 9.819708932852512e-06, "loss": 0.8003, "step": 2381 }, { "epoch": 0.3398002853067047, "grad_norm": 0.7395643591880798, "learning_rate": 9.8195039589706e-06, "loss": 0.8144, "step": 2382 }, { "epoch": 0.3399429386590585, "grad_norm": 1.0145503282546997, "learning_rate": 9.819298870778663e-06, "loss": 0.77, "step": 2383 }, { "epoch": 0.34008559201141225, "grad_norm": 0.884653627872467, "learning_rate": 9.819093668281561e-06, "loss": 0.8517, "step": 2384 }, { "epoch": 0.34022824536376606, "grad_norm": 0.9484686851501465, "learning_rate": 9.818888351484164e-06, "loss": 0.7931, "step": 2385 }, { "epoch": 0.3403708987161198, "grad_norm": 0.7944343090057373, "learning_rate": 9.818682920391337e-06, "loss": 0.7936, "step": 2386 }, { "epoch": 0.3405135520684736, "grad_norm": 1.1869202852249146, "learning_rate": 9.81847737500796e-06, "loss": 0.7966, "step": 2387 }, { "epoch": 0.34065620542082736, "grad_norm": 0.7878841161727905, "learning_rate": 9.818271715338903e-06, "loss": 0.7684, "step": 2388 }, { "epoch": 0.34079885877318117, "grad_norm": 0.9743000268936157, "learning_rate": 9.818065941389044e-06, "loss": 0.7204, "step": 2389 }, { "epoch": 0.340941512125535, "grad_norm": 0.7342555522918701, "learning_rate": 9.817860053163266e-06, "loss": 0.8814, "step": 2390 }, { "epoch": 0.3410841654778887, "grad_norm": 0.9856871962547302, "learning_rate": 9.81765405066645e-06, "loss": 0.8494, "step": 2391 }, { "epoch": 0.34122681883024253, "grad_norm": 0.9224755764007568, "learning_rate": 9.817447933903481e-06, "loss": 0.7714, "step": 2392 }, { "epoch": 0.3413694721825963, "grad_norm": 0.8648902177810669, "learning_rate": 9.817241702879253e-06, "loss": 0.8198, "step": 2393 }, { "epoch": 0.3415121255349501, "grad_norm": 0.7871795892715454, "learning_rate": 9.817035357598652e-06, "loss": 0.7751, "step": 2394 }, { "epoch": 0.34165477888730383, "grad_norm": 0.8133195042610168, "learning_rate": 9.816828898066574e-06, "loss": 0.8081, "step": 2395 }, { "epoch": 0.34179743223965764, "grad_norm": 0.8734281659126282, "learning_rate": 9.816622324287916e-06, "loss": 0.8192, "step": 2396 }, { "epoch": 0.3419400855920114, "grad_norm": 0.8564184308052063, "learning_rate": 9.816415636267578e-06, "loss": 0.7833, "step": 2397 }, { "epoch": 0.3420827389443652, "grad_norm": 1.0076076984405518, "learning_rate": 9.81620883401046e-06, "loss": 0.8053, "step": 2398 }, { "epoch": 0.342225392296719, "grad_norm": 0.8750330805778503, "learning_rate": 9.816001917521472e-06, "loss": 0.7704, "step": 2399 }, { "epoch": 0.34236804564907275, "grad_norm": 0.8701121807098389, "learning_rate": 9.815794886805517e-06, "loss": 0.7769, "step": 2400 }, { "epoch": 0.34251069900142656, "grad_norm": 0.8542219400405884, "learning_rate": 9.815587741867507e-06, "loss": 0.8211, "step": 2401 }, { "epoch": 0.3426533523537803, "grad_norm": 1.0593914985656738, "learning_rate": 9.815380482712353e-06, "loss": 0.7406, "step": 2402 }, { "epoch": 0.3427960057061341, "grad_norm": 1.03482985496521, "learning_rate": 9.815173109344976e-06, "loss": 0.7539, "step": 2403 }, { "epoch": 0.34293865905848786, "grad_norm": 0.9401360154151917, "learning_rate": 9.814965621770289e-06, "loss": 0.7953, "step": 2404 }, { "epoch": 0.34308131241084167, "grad_norm": 1.158489465713501, "learning_rate": 9.814758019993216e-06, "loss": 0.7696, "step": 2405 }, { "epoch": 0.3432239657631954, "grad_norm": 1.2007076740264893, "learning_rate": 9.814550304018682e-06, "loss": 0.8182, "step": 2406 }, { "epoch": 0.3433666191155492, "grad_norm": 1.037165880203247, "learning_rate": 9.81434247385161e-06, "loss": 0.8141, "step": 2407 }, { "epoch": 0.34350927246790297, "grad_norm": 1.1913652420043945, "learning_rate": 9.814134529496934e-06, "loss": 0.7943, "step": 2408 }, { "epoch": 0.3436519258202568, "grad_norm": 1.300634741783142, "learning_rate": 9.813926470959582e-06, "loss": 0.6194, "step": 2409 }, { "epoch": 0.3437945791726106, "grad_norm": 0.9805374145507812, "learning_rate": 9.81371829824449e-06, "loss": 0.7247, "step": 2410 }, { "epoch": 0.34393723252496433, "grad_norm": 1.0163896083831787, "learning_rate": 9.813510011356597e-06, "loss": 0.8524, "step": 2411 }, { "epoch": 0.34407988587731814, "grad_norm": 0.9004878997802734, "learning_rate": 9.813301610300841e-06, "loss": 0.8224, "step": 2412 }, { "epoch": 0.3442225392296719, "grad_norm": 0.8286635875701904, "learning_rate": 9.813093095082167e-06, "loss": 0.8188, "step": 2413 }, { "epoch": 0.3443651925820257, "grad_norm": 0.9868021011352539, "learning_rate": 9.81288446570552e-06, "loss": 0.8024, "step": 2414 }, { "epoch": 0.34450784593437944, "grad_norm": 0.8571799993515015, "learning_rate": 9.812675722175846e-06, "loss": 0.7445, "step": 2415 }, { "epoch": 0.34465049928673325, "grad_norm": 0.7464162111282349, "learning_rate": 9.812466864498097e-06, "loss": 0.7484, "step": 2416 }, { "epoch": 0.344793152639087, "grad_norm": 1.1445914506912231, "learning_rate": 9.81225789267723e-06, "loss": 0.8077, "step": 2417 }, { "epoch": 0.3449358059914408, "grad_norm": 1.1138544082641602, "learning_rate": 9.812048806718199e-06, "loss": 0.8036, "step": 2418 }, { "epoch": 0.34507845934379455, "grad_norm": 1.2120366096496582, "learning_rate": 9.811839606625962e-06, "loss": 0.7882, "step": 2419 }, { "epoch": 0.34522111269614836, "grad_norm": 0.9743795990943909, "learning_rate": 9.811630292405484e-06, "loss": 0.7773, "step": 2420 }, { "epoch": 0.34536376604850216, "grad_norm": 0.919040322303772, "learning_rate": 9.811420864061725e-06, "loss": 0.7424, "step": 2421 }, { "epoch": 0.3455064194008559, "grad_norm": 0.8817082643508911, "learning_rate": 9.811211321599657e-06, "loss": 0.8003, "step": 2422 }, { "epoch": 0.3456490727532097, "grad_norm": 1.0432809591293335, "learning_rate": 9.811001665024246e-06, "loss": 0.7844, "step": 2423 }, { "epoch": 0.34579172610556347, "grad_norm": 1.0473785400390625, "learning_rate": 9.810791894340467e-06, "loss": 0.7574, "step": 2424 }, { "epoch": 0.3459343794579173, "grad_norm": 0.7565572261810303, "learning_rate": 9.810582009553296e-06, "loss": 0.7903, "step": 2425 }, { "epoch": 0.346077032810271, "grad_norm": 1.2424083948135376, "learning_rate": 9.81037201066771e-06, "loss": 0.8094, "step": 2426 }, { "epoch": 0.34621968616262483, "grad_norm": 0.8809653520584106, "learning_rate": 9.810161897688688e-06, "loss": 0.739, "step": 2427 }, { "epoch": 0.3463623395149786, "grad_norm": 1.0089491605758667, "learning_rate": 9.809951670621218e-06, "loss": 0.751, "step": 2428 }, { "epoch": 0.3465049928673324, "grad_norm": 0.7561847567558289, "learning_rate": 9.809741329470281e-06, "loss": 0.8513, "step": 2429 }, { "epoch": 0.3466476462196862, "grad_norm": 0.7132732272148132, "learning_rate": 9.809530874240869e-06, "loss": 0.7771, "step": 2430 }, { "epoch": 0.34679029957203994, "grad_norm": 1.2511911392211914, "learning_rate": 9.809320304937974e-06, "loss": 0.8489, "step": 2431 }, { "epoch": 0.34693295292439374, "grad_norm": 0.8901185989379883, "learning_rate": 9.809109621566589e-06, "loss": 0.7419, "step": 2432 }, { "epoch": 0.3470756062767475, "grad_norm": 2.0431883335113525, "learning_rate": 9.808898824131712e-06, "loss": 0.6619, "step": 2433 }, { "epoch": 0.3472182596291013, "grad_norm": 0.8219075798988342, "learning_rate": 9.808687912638342e-06, "loss": 0.8378, "step": 2434 }, { "epoch": 0.34736091298145505, "grad_norm": 1.001102328300476, "learning_rate": 9.80847688709148e-06, "loss": 0.8029, "step": 2435 }, { "epoch": 0.34750356633380886, "grad_norm": 0.7215900421142578, "learning_rate": 9.808265747496135e-06, "loss": 0.8174, "step": 2436 }, { "epoch": 0.3476462196861626, "grad_norm": 0.9470601081848145, "learning_rate": 9.808054493857311e-06, "loss": 0.7696, "step": 2437 }, { "epoch": 0.3477888730385164, "grad_norm": 0.902509331703186, "learning_rate": 9.807843126180022e-06, "loss": 0.7846, "step": 2438 }, { "epoch": 0.34793152639087016, "grad_norm": 0.8843024969100952, "learning_rate": 9.807631644469277e-06, "loss": 0.8151, "step": 2439 }, { "epoch": 0.34807417974322397, "grad_norm": 0.8945369124412537, "learning_rate": 9.807420048730095e-06, "loss": 0.8141, "step": 2440 }, { "epoch": 0.34821683309557777, "grad_norm": 0.9173300862312317, "learning_rate": 9.807208338967495e-06, "loss": 0.8004, "step": 2441 }, { "epoch": 0.3483594864479315, "grad_norm": 0.9097233414649963, "learning_rate": 9.806996515186498e-06, "loss": 0.8155, "step": 2442 }, { "epoch": 0.3485021398002853, "grad_norm": 0.7454264760017395, "learning_rate": 9.806784577392125e-06, "loss": 0.8061, "step": 2443 }, { "epoch": 0.3486447931526391, "grad_norm": 0.9167298674583435, "learning_rate": 9.80657252558941e-06, "loss": 0.7576, "step": 2444 }, { "epoch": 0.3487874465049929, "grad_norm": 0.8795915842056274, "learning_rate": 9.806360359783374e-06, "loss": 0.7774, "step": 2445 }, { "epoch": 0.34893009985734663, "grad_norm": 0.7418687343597412, "learning_rate": 9.806148079979052e-06, "loss": 0.7697, "step": 2446 }, { "epoch": 0.34907275320970044, "grad_norm": 1.1968201398849487, "learning_rate": 9.805935686181483e-06, "loss": 0.7797, "step": 2447 }, { "epoch": 0.3492154065620542, "grad_norm": 0.8256352543830872, "learning_rate": 9.805723178395699e-06, "loss": 0.7957, "step": 2448 }, { "epoch": 0.349358059914408, "grad_norm": 0.9797470569610596, "learning_rate": 9.805510556626744e-06, "loss": 0.719, "step": 2449 }, { "epoch": 0.34950071326676174, "grad_norm": 0.8725929856300354, "learning_rate": 9.80529782087966e-06, "loss": 0.8111, "step": 2450 }, { "epoch": 0.34964336661911555, "grad_norm": 0.9257572293281555, "learning_rate": 9.805084971159493e-06, "loss": 0.7767, "step": 2451 }, { "epoch": 0.34978601997146935, "grad_norm": 0.8392630815505981, "learning_rate": 9.80487200747129e-06, "loss": 0.8188, "step": 2452 }, { "epoch": 0.3499286733238231, "grad_norm": 0.9308575987815857, "learning_rate": 9.804658929820104e-06, "loss": 0.748, "step": 2453 }, { "epoch": 0.3500713266761769, "grad_norm": 0.8789147734642029, "learning_rate": 9.804445738210988e-06, "loss": 0.8509, "step": 2454 }, { "epoch": 0.35021398002853066, "grad_norm": 0.9652302861213684, "learning_rate": 9.804232432648997e-06, "loss": 0.7711, "step": 2455 }, { "epoch": 0.35035663338088446, "grad_norm": 1.293696641921997, "learning_rate": 9.804019013139192e-06, "loss": 0.8026, "step": 2456 }, { "epoch": 0.3504992867332382, "grad_norm": 0.6802603602409363, "learning_rate": 9.803805479686635e-06, "loss": 0.7896, "step": 2457 }, { "epoch": 0.350641940085592, "grad_norm": 0.9308311939239502, "learning_rate": 9.80359183229639e-06, "loss": 0.7302, "step": 2458 }, { "epoch": 0.35078459343794577, "grad_norm": 0.8198142647743225, "learning_rate": 9.803378070973524e-06, "loss": 0.7897, "step": 2459 }, { "epoch": 0.3509272467902996, "grad_norm": 1.0778833627700806, "learning_rate": 9.80316419572311e-06, "loss": 0.8393, "step": 2460 }, { "epoch": 0.3510699001426534, "grad_norm": 0.6993482708930969, "learning_rate": 9.802950206550215e-06, "loss": 0.7964, "step": 2461 }, { "epoch": 0.35121255349500713, "grad_norm": 0.9422694444656372, "learning_rate": 9.80273610345992e-06, "loss": 0.6952, "step": 2462 }, { "epoch": 0.35135520684736093, "grad_norm": 1.0927560329437256, "learning_rate": 9.802521886457298e-06, "loss": 0.9076, "step": 2463 }, { "epoch": 0.3514978601997147, "grad_norm": 0.8735848069190979, "learning_rate": 9.802307555547436e-06, "loss": 0.7146, "step": 2464 }, { "epoch": 0.3516405135520685, "grad_norm": 1.0025066137313843, "learning_rate": 9.802093110735414e-06, "loss": 0.7766, "step": 2465 }, { "epoch": 0.35178316690442224, "grad_norm": 0.7589578628540039, "learning_rate": 9.801878552026318e-06, "loss": 0.8558, "step": 2466 }, { "epoch": 0.35192582025677605, "grad_norm": 0.7957066297531128, "learning_rate": 9.801663879425236e-06, "loss": 0.7767, "step": 2467 }, { "epoch": 0.3520684736091298, "grad_norm": 2.2545599937438965, "learning_rate": 9.801449092937263e-06, "loss": 0.644, "step": 2468 }, { "epoch": 0.3522111269614836, "grad_norm": 0.9092872738838196, "learning_rate": 9.80123419256749e-06, "loss": 0.7203, "step": 2469 }, { "epoch": 0.35235378031383735, "grad_norm": 1.0055960416793823, "learning_rate": 9.801019178321017e-06, "loss": 0.7836, "step": 2470 }, { "epoch": 0.35249643366619116, "grad_norm": 3.424903631210327, "learning_rate": 9.800804050202942e-06, "loss": 0.6291, "step": 2471 }, { "epoch": 0.35263908701854496, "grad_norm": 1.0196425914764404, "learning_rate": 9.800588808218368e-06, "loss": 0.7185, "step": 2472 }, { "epoch": 0.3527817403708987, "grad_norm": 0.8520036935806274, "learning_rate": 9.800373452372399e-06, "loss": 0.6775, "step": 2473 }, { "epoch": 0.3529243937232525, "grad_norm": 0.9552729725837708, "learning_rate": 9.800157982670145e-06, "loss": 0.7737, "step": 2474 }, { "epoch": 0.35306704707560627, "grad_norm": 0.7399102449417114, "learning_rate": 9.799942399116713e-06, "loss": 0.7775, "step": 2475 }, { "epoch": 0.35320970042796007, "grad_norm": 1.059720754623413, "learning_rate": 9.79972670171722e-06, "loss": 0.8431, "step": 2476 }, { "epoch": 0.3533523537803138, "grad_norm": 1.0841524600982666, "learning_rate": 9.799510890476782e-06, "loss": 0.7776, "step": 2477 }, { "epoch": 0.3534950071326676, "grad_norm": 0.7432464361190796, "learning_rate": 9.799294965400514e-06, "loss": 0.8011, "step": 2478 }, { "epoch": 0.3536376604850214, "grad_norm": 0.9959025979042053, "learning_rate": 9.799078926493539e-06, "loss": 0.7847, "step": 2479 }, { "epoch": 0.3537803138373752, "grad_norm": 1.0784518718719482, "learning_rate": 9.798862773760984e-06, "loss": 0.7722, "step": 2480 }, { "epoch": 0.35392296718972893, "grad_norm": 0.867393970489502, "learning_rate": 9.798646507207971e-06, "loss": 0.8095, "step": 2481 }, { "epoch": 0.35406562054208274, "grad_norm": 0.7331669330596924, "learning_rate": 9.798430126839633e-06, "loss": 0.805, "step": 2482 }, { "epoch": 0.35420827389443654, "grad_norm": 1.0410361289978027, "learning_rate": 9.798213632661102e-06, "loss": 0.8331, "step": 2483 }, { "epoch": 0.3543509272467903, "grad_norm": 1.0381349325180054, "learning_rate": 9.79799702467751e-06, "loss": 0.8218, "step": 2484 }, { "epoch": 0.3544935805991441, "grad_norm": 0.6517478823661804, "learning_rate": 9.797780302893998e-06, "loss": 0.7942, "step": 2485 }, { "epoch": 0.35463623395149785, "grad_norm": 0.8073909282684326, "learning_rate": 9.797563467315703e-06, "loss": 0.8717, "step": 2486 }, { "epoch": 0.35477888730385165, "grad_norm": 0.8968905806541443, "learning_rate": 9.797346517947772e-06, "loss": 0.8036, "step": 2487 }, { "epoch": 0.3549215406562054, "grad_norm": 0.7728807330131531, "learning_rate": 9.797129454795346e-06, "loss": 0.8009, "step": 2488 }, { "epoch": 0.3550641940085592, "grad_norm": 0.7919692993164062, "learning_rate": 9.796912277863577e-06, "loss": 0.8218, "step": 2489 }, { "epoch": 0.35520684736091296, "grad_norm": 0.9538540244102478, "learning_rate": 9.796694987157614e-06, "loss": 0.8023, "step": 2490 }, { "epoch": 0.35534950071326676, "grad_norm": 0.9211897850036621, "learning_rate": 9.796477582682611e-06, "loss": 0.7562, "step": 2491 }, { "epoch": 0.35549215406562057, "grad_norm": 0.9030013084411621, "learning_rate": 9.796260064443724e-06, "loss": 0.8064, "step": 2492 }, { "epoch": 0.3556348074179743, "grad_norm": 1.1324703693389893, "learning_rate": 9.796042432446114e-06, "loss": 0.8613, "step": 2493 }, { "epoch": 0.3557774607703281, "grad_norm": 0.8738144636154175, "learning_rate": 9.795824686694943e-06, "loss": 0.7492, "step": 2494 }, { "epoch": 0.3559201141226819, "grad_norm": 0.70047926902771, "learning_rate": 9.795606827195373e-06, "loss": 0.5744, "step": 2495 }, { "epoch": 0.3560627674750357, "grad_norm": 0.8143078088760376, "learning_rate": 9.795388853952573e-06, "loss": 0.8359, "step": 2496 }, { "epoch": 0.35620542082738943, "grad_norm": 1.0857017040252686, "learning_rate": 9.795170766971712e-06, "loss": 0.8786, "step": 2497 }, { "epoch": 0.35634807417974323, "grad_norm": 0.7692033648490906, "learning_rate": 9.794952566257963e-06, "loss": 0.7636, "step": 2498 }, { "epoch": 0.356490727532097, "grad_norm": 0.6650281548500061, "learning_rate": 9.794734251816503e-06, "loss": 0.878, "step": 2499 }, { "epoch": 0.3566333808844508, "grad_norm": 0.8415030837059021, "learning_rate": 9.794515823652507e-06, "loss": 0.8108, "step": 2500 }, { "epoch": 0.35677603423680454, "grad_norm": 0.9431477189064026, "learning_rate": 9.794297281771158e-06, "loss": 0.879, "step": 2501 }, { "epoch": 0.35691868758915835, "grad_norm": 0.7415030002593994, "learning_rate": 9.794078626177639e-06, "loss": 0.9159, "step": 2502 }, { "epoch": 0.35706134094151215, "grad_norm": 0.9204234480857849, "learning_rate": 9.793859856877134e-06, "loss": 0.7233, "step": 2503 }, { "epoch": 0.3572039942938659, "grad_norm": 0.7926920652389526, "learning_rate": 9.793640973874837e-06, "loss": 0.7959, "step": 2504 }, { "epoch": 0.3573466476462197, "grad_norm": 0.8344845175743103, "learning_rate": 9.793421977175934e-06, "loss": 0.8314, "step": 2505 }, { "epoch": 0.35748930099857346, "grad_norm": 0.7620161175727844, "learning_rate": 9.79320286678562e-06, "loss": 0.7839, "step": 2506 }, { "epoch": 0.35763195435092726, "grad_norm": 0.9139984250068665, "learning_rate": 9.792983642709096e-06, "loss": 0.7797, "step": 2507 }, { "epoch": 0.357774607703281, "grad_norm": 0.6945261359214783, "learning_rate": 9.792764304951557e-06, "loss": 0.7363, "step": 2508 }, { "epoch": 0.3579172610556348, "grad_norm": 0.8204848170280457, "learning_rate": 9.792544853518206e-06, "loss": 0.7733, "step": 2509 }, { "epoch": 0.35805991440798857, "grad_norm": 0.8774577379226685, "learning_rate": 9.792325288414252e-06, "loss": 0.7407, "step": 2510 }, { "epoch": 0.35820256776034237, "grad_norm": 1.1311724185943604, "learning_rate": 9.792105609644899e-06, "loss": 0.8133, "step": 2511 }, { "epoch": 0.3583452211126961, "grad_norm": 0.7154818177223206, "learning_rate": 9.791885817215357e-06, "loss": 0.777, "step": 2512 }, { "epoch": 0.3584878744650499, "grad_norm": 1.033969521522522, "learning_rate": 9.791665911130841e-06, "loss": 0.8339, "step": 2513 }, { "epoch": 0.35863052781740373, "grad_norm": 1.0158448219299316, "learning_rate": 9.791445891396568e-06, "loss": 0.8523, "step": 2514 }, { "epoch": 0.3587731811697575, "grad_norm": 0.899519145488739, "learning_rate": 9.791225758017752e-06, "loss": 0.7123, "step": 2515 }, { "epoch": 0.3589158345221113, "grad_norm": 0.9491230845451355, "learning_rate": 9.791005510999619e-06, "loss": 0.787, "step": 2516 }, { "epoch": 0.35905848787446504, "grad_norm": 0.6641083359718323, "learning_rate": 9.790785150347388e-06, "loss": 0.7788, "step": 2517 }, { "epoch": 0.35920114122681884, "grad_norm": 0.931577205657959, "learning_rate": 9.79056467606629e-06, "loss": 0.8076, "step": 2518 }, { "epoch": 0.3593437945791726, "grad_norm": 0.8346139192581177, "learning_rate": 9.790344088161551e-06, "loss": 0.7443, "step": 2519 }, { "epoch": 0.3594864479315264, "grad_norm": 1.0931133031845093, "learning_rate": 9.790123386638405e-06, "loss": 0.8247, "step": 2520 }, { "epoch": 0.35962910128388015, "grad_norm": 0.7270719408988953, "learning_rate": 9.789902571502086e-06, "loss": 0.8063, "step": 2521 }, { "epoch": 0.35977175463623395, "grad_norm": 0.8199694752693176, "learning_rate": 9.789681642757832e-06, "loss": 0.8354, "step": 2522 }, { "epoch": 0.35991440798858776, "grad_norm": 1.01858389377594, "learning_rate": 9.789460600410882e-06, "loss": 0.8221, "step": 2523 }, { "epoch": 0.3600570613409415, "grad_norm": 0.8585506081581116, "learning_rate": 9.789239444466478e-06, "loss": 0.8108, "step": 2524 }, { "epoch": 0.3601997146932953, "grad_norm": 1.1112741231918335, "learning_rate": 9.789018174929868e-06, "loss": 0.7597, "step": 2525 }, { "epoch": 0.36034236804564906, "grad_norm": 1.0793445110321045, "learning_rate": 9.788796791806298e-06, "loss": 0.7678, "step": 2526 }, { "epoch": 0.36048502139800287, "grad_norm": 0.8803876042366028, "learning_rate": 9.78857529510102e-06, "loss": 0.8029, "step": 2527 }, { "epoch": 0.3606276747503566, "grad_norm": 0.7571743130683899, "learning_rate": 9.788353684819288e-06, "loss": 0.5882, "step": 2528 }, { "epoch": 0.3607703281027104, "grad_norm": 0.9352931380271912, "learning_rate": 9.788131960966353e-06, "loss": 0.7699, "step": 2529 }, { "epoch": 0.3609129814550642, "grad_norm": 0.7634739875793457, "learning_rate": 9.787910123547481e-06, "loss": 0.841, "step": 2530 }, { "epoch": 0.361055634807418, "grad_norm": 0.8426674604415894, "learning_rate": 9.787688172567932e-06, "loss": 0.8585, "step": 2531 }, { "epoch": 0.36119828815977173, "grad_norm": 0.8924207091331482, "learning_rate": 9.787466108032968e-06, "loss": 0.7654, "step": 2532 }, { "epoch": 0.36134094151212554, "grad_norm": 0.8272600769996643, "learning_rate": 9.787243929947857e-06, "loss": 0.7827, "step": 2533 }, { "epoch": 0.36148359486447934, "grad_norm": 0.8443830013275146, "learning_rate": 9.787021638317868e-06, "loss": 0.775, "step": 2534 }, { "epoch": 0.3616262482168331, "grad_norm": 1.0147038698196411, "learning_rate": 9.786799233148273e-06, "loss": 0.7492, "step": 2535 }, { "epoch": 0.3617689015691869, "grad_norm": 0.9985344409942627, "learning_rate": 9.786576714444349e-06, "loss": 0.8384, "step": 2536 }, { "epoch": 0.36191155492154065, "grad_norm": 0.573411762714386, "learning_rate": 9.786354082211374e-06, "loss": 0.7608, "step": 2537 }, { "epoch": 0.36205420827389445, "grad_norm": 0.9318847060203552, "learning_rate": 9.786131336454625e-06, "loss": 0.7311, "step": 2538 }, { "epoch": 0.3621968616262482, "grad_norm": 0.8152886033058167, "learning_rate": 9.78590847717939e-06, "loss": 0.7587, "step": 2539 }, { "epoch": 0.362339514978602, "grad_norm": 0.9994775056838989, "learning_rate": 9.78568550439095e-06, "loss": 0.8209, "step": 2540 }, { "epoch": 0.36248216833095576, "grad_norm": 0.8203306794166565, "learning_rate": 9.785462418094595e-06, "loss": 0.7314, "step": 2541 }, { "epoch": 0.36262482168330956, "grad_norm": 0.8252012729644775, "learning_rate": 9.785239218295618e-06, "loss": 0.8093, "step": 2542 }, { "epoch": 0.3627674750356633, "grad_norm": 0.8270312547683716, "learning_rate": 9.785015904999311e-06, "loss": 0.8546, "step": 2543 }, { "epoch": 0.3629101283880171, "grad_norm": 1.0600306987762451, "learning_rate": 9.784792478210971e-06, "loss": 0.8662, "step": 2544 }, { "epoch": 0.3630527817403709, "grad_norm": 0.9592757821083069, "learning_rate": 9.7845689379359e-06, "loss": 0.798, "step": 2545 }, { "epoch": 0.36319543509272467, "grad_norm": 0.9203529953956604, "learning_rate": 9.784345284179397e-06, "loss": 0.7938, "step": 2546 }, { "epoch": 0.3633380884450785, "grad_norm": 1.1838785409927368, "learning_rate": 9.784121516946765e-06, "loss": 0.8015, "step": 2547 }, { "epoch": 0.3634807417974322, "grad_norm": 1.0804641246795654, "learning_rate": 9.783897636243316e-06, "loss": 0.851, "step": 2548 }, { "epoch": 0.36362339514978603, "grad_norm": 0.8632106781005859, "learning_rate": 9.783673642074358e-06, "loss": 0.7753, "step": 2549 }, { "epoch": 0.3637660485021398, "grad_norm": 1.0359077453613281, "learning_rate": 9.783449534445202e-06, "loss": 0.7797, "step": 2550 }, { "epoch": 0.3639087018544936, "grad_norm": 0.8755848407745361, "learning_rate": 9.783225313361165e-06, "loss": 0.8103, "step": 2551 }, { "epoch": 0.36405135520684734, "grad_norm": 0.9089139699935913, "learning_rate": 9.783000978827565e-06, "loss": 0.8141, "step": 2552 }, { "epoch": 0.36419400855920114, "grad_norm": 1.0342538356781006, "learning_rate": 9.782776530849722e-06, "loss": 0.8129, "step": 2553 }, { "epoch": 0.36433666191155495, "grad_norm": 0.70041823387146, "learning_rate": 9.782551969432963e-06, "loss": 0.808, "step": 2554 }, { "epoch": 0.3644793152639087, "grad_norm": 0.9141285419464111, "learning_rate": 9.78232729458261e-06, "loss": 0.7329, "step": 2555 }, { "epoch": 0.3646219686162625, "grad_norm": 0.8585652709007263, "learning_rate": 9.782102506303993e-06, "loss": 0.7318, "step": 2556 }, { "epoch": 0.36476462196861625, "grad_norm": 1.0369824171066284, "learning_rate": 9.781877604602444e-06, "loss": 0.8093, "step": 2557 }, { "epoch": 0.36490727532097006, "grad_norm": 0.9943699240684509, "learning_rate": 9.781652589483296e-06, "loss": 0.8641, "step": 2558 }, { "epoch": 0.3650499286733238, "grad_norm": 0.9613951444625854, "learning_rate": 9.78142746095189e-06, "loss": 0.7307, "step": 2559 }, { "epoch": 0.3651925820256776, "grad_norm": 0.8950900435447693, "learning_rate": 9.781202219013561e-06, "loss": 0.8279, "step": 2560 }, { "epoch": 0.36533523537803136, "grad_norm": 1.0290483236312866, "learning_rate": 9.780976863673654e-06, "loss": 0.7668, "step": 2561 }, { "epoch": 0.36547788873038517, "grad_norm": 0.8867672085762024, "learning_rate": 9.780751394937513e-06, "loss": 0.797, "step": 2562 }, { "epoch": 0.3656205420827389, "grad_norm": 0.7999266982078552, "learning_rate": 9.780525812810486e-06, "loss": 0.5798, "step": 2563 }, { "epoch": 0.3657631954350927, "grad_norm": 0.9021796584129333, "learning_rate": 9.780300117297922e-06, "loss": 0.8297, "step": 2564 }, { "epoch": 0.36590584878744653, "grad_norm": 1.075351357460022, "learning_rate": 9.78007430840518e-06, "loss": 0.7587, "step": 2565 }, { "epoch": 0.3660485021398003, "grad_norm": 0.8415443897247314, "learning_rate": 9.779848386137606e-06, "loss": 0.6022, "step": 2566 }, { "epoch": 0.3661911554921541, "grad_norm": 1.243349313735962, "learning_rate": 9.779622350500565e-06, "loss": 0.7213, "step": 2567 }, { "epoch": 0.36633380884450784, "grad_norm": 1.0616198778152466, "learning_rate": 9.779396201499418e-06, "loss": 0.8023, "step": 2568 }, { "epoch": 0.36647646219686164, "grad_norm": 0.7718018889427185, "learning_rate": 9.779169939139526e-06, "loss": 0.7595, "step": 2569 }, { "epoch": 0.3666191155492154, "grad_norm": 0.8942586779594421, "learning_rate": 9.77894356342626e-06, "loss": 0.7716, "step": 2570 }, { "epoch": 0.3667617689015692, "grad_norm": 0.9758250117301941, "learning_rate": 9.778717074364985e-06, "loss": 0.8131, "step": 2571 }, { "epoch": 0.36690442225392295, "grad_norm": 1.077924370765686, "learning_rate": 9.778490471961074e-06, "loss": 0.7641, "step": 2572 }, { "epoch": 0.36704707560627675, "grad_norm": 0.9291079044342041, "learning_rate": 9.778263756219902e-06, "loss": 0.7663, "step": 2573 }, { "epoch": 0.3671897289586305, "grad_norm": 1.079791784286499, "learning_rate": 9.778036927146845e-06, "loss": 0.8218, "step": 2574 }, { "epoch": 0.3673323823109843, "grad_norm": 1.0044819116592407, "learning_rate": 9.777809984747287e-06, "loss": 0.8751, "step": 2575 }, { "epoch": 0.3674750356633381, "grad_norm": 0.775492250919342, "learning_rate": 9.777582929026606e-06, "loss": 0.7893, "step": 2576 }, { "epoch": 0.36761768901569186, "grad_norm": 0.8879029750823975, "learning_rate": 9.77735575999019e-06, "loss": 0.8414, "step": 2577 }, { "epoch": 0.36776034236804567, "grad_norm": 0.9367338418960571, "learning_rate": 9.777128477643426e-06, "loss": 0.6126, "step": 2578 }, { "epoch": 0.3679029957203994, "grad_norm": 0.752149224281311, "learning_rate": 9.776901081991706e-06, "loss": 0.7874, "step": 2579 }, { "epoch": 0.3680456490727532, "grad_norm": 0.821786642074585, "learning_rate": 9.77667357304042e-06, "loss": 0.8036, "step": 2580 }, { "epoch": 0.368188302425107, "grad_norm": 0.8069832921028137, "learning_rate": 9.776445950794968e-06, "loss": 0.7852, "step": 2581 }, { "epoch": 0.3683309557774608, "grad_norm": 0.7493656873703003, "learning_rate": 9.776218215260749e-06, "loss": 0.7966, "step": 2582 }, { "epoch": 0.3684736091298145, "grad_norm": 0.9002495408058167, "learning_rate": 9.775990366443162e-06, "loss": 0.8047, "step": 2583 }, { "epoch": 0.36861626248216833, "grad_norm": 0.8920180201530457, "learning_rate": 9.775762404347611e-06, "loss": 0.815, "step": 2584 }, { "epoch": 0.36875891583452214, "grad_norm": 1.1317116022109985, "learning_rate": 9.775534328979504e-06, "loss": 0.8338, "step": 2585 }, { "epoch": 0.3689015691868759, "grad_norm": 0.9028584957122803, "learning_rate": 9.775306140344251e-06, "loss": 0.8188, "step": 2586 }, { "epoch": 0.3690442225392297, "grad_norm": 0.7854993343353271, "learning_rate": 9.775077838447261e-06, "loss": 0.5499, "step": 2587 }, { "epoch": 0.36918687589158344, "grad_norm": 0.7696592807769775, "learning_rate": 9.774849423293955e-06, "loss": 0.7132, "step": 2588 }, { "epoch": 0.36932952924393725, "grad_norm": 0.849781334400177, "learning_rate": 9.774620894889747e-06, "loss": 0.7801, "step": 2589 }, { "epoch": 0.369472182596291, "grad_norm": 1.1491278409957886, "learning_rate": 9.774392253240056e-06, "loss": 0.7375, "step": 2590 }, { "epoch": 0.3696148359486448, "grad_norm": 1.0900774002075195, "learning_rate": 9.774163498350307e-06, "loss": 0.8474, "step": 2591 }, { "epoch": 0.36975748930099855, "grad_norm": 1.0792137384414673, "learning_rate": 9.773934630225926e-06, "loss": 0.8593, "step": 2592 }, { "epoch": 0.36990014265335236, "grad_norm": 0.7018675804138184, "learning_rate": 9.773705648872338e-06, "loss": 0.817, "step": 2593 }, { "epoch": 0.3700427960057061, "grad_norm": 0.7739388942718506, "learning_rate": 9.773476554294978e-06, "loss": 0.8053, "step": 2594 }, { "epoch": 0.3701854493580599, "grad_norm": 0.8324184417724609, "learning_rate": 9.77324734649928e-06, "loss": 0.8171, "step": 2595 }, { "epoch": 0.3703281027104137, "grad_norm": 0.9940540790557861, "learning_rate": 9.773018025490674e-06, "loss": 0.8009, "step": 2596 }, { "epoch": 0.37047075606276747, "grad_norm": 1.1315734386444092, "learning_rate": 9.772788591274606e-06, "loss": 0.8332, "step": 2597 }, { "epoch": 0.3706134094151213, "grad_norm": 1.3557701110839844, "learning_rate": 9.772559043856518e-06, "loss": 0.7393, "step": 2598 }, { "epoch": 0.370756062767475, "grad_norm": 0.7162796258926392, "learning_rate": 9.772329383241848e-06, "loss": 0.7788, "step": 2599 }, { "epoch": 0.37089871611982883, "grad_norm": 1.10598886013031, "learning_rate": 9.77209960943605e-06, "loss": 0.7347, "step": 2600 }, { "epoch": 0.3710413694721826, "grad_norm": 0.6513818502426147, "learning_rate": 9.771869722444569e-06, "loss": 0.8699, "step": 2601 }, { "epoch": 0.3711840228245364, "grad_norm": 1.0559954643249512, "learning_rate": 9.77163972227286e-06, "loss": 0.7199, "step": 2602 }, { "epoch": 0.37132667617689014, "grad_norm": 1.0768969058990479, "learning_rate": 9.771409608926377e-06, "loss": 0.8428, "step": 2603 }, { "epoch": 0.37146932952924394, "grad_norm": 0.8907505869865417, "learning_rate": 9.771179382410578e-06, "loss": 0.8004, "step": 2604 }, { "epoch": 0.3716119828815977, "grad_norm": 0.9419316649436951, "learning_rate": 9.770949042730924e-06, "loss": 0.7885, "step": 2605 }, { "epoch": 0.3717546362339515, "grad_norm": 1.0965865850448608, "learning_rate": 9.77071858989288e-06, "loss": 0.7895, "step": 2606 }, { "epoch": 0.3718972895863053, "grad_norm": 0.9528594613075256, "learning_rate": 9.770488023901907e-06, "loss": 0.8403, "step": 2607 }, { "epoch": 0.37203994293865905, "grad_norm": 0.8624246716499329, "learning_rate": 9.77025734476348e-06, "loss": 0.9, "step": 2608 }, { "epoch": 0.37218259629101286, "grad_norm": 0.8880956172943115, "learning_rate": 9.770026552483065e-06, "loss": 0.7662, "step": 2609 }, { "epoch": 0.3723252496433666, "grad_norm": 0.949665367603302, "learning_rate": 9.76979564706614e-06, "loss": 0.7712, "step": 2610 }, { "epoch": 0.3724679029957204, "grad_norm": 0.8791137933731079, "learning_rate": 9.769564628518178e-06, "loss": 0.8127, "step": 2611 }, { "epoch": 0.37261055634807416, "grad_norm": 1.0559605360031128, "learning_rate": 9.76933349684466e-06, "loss": 0.8561, "step": 2612 }, { "epoch": 0.37275320970042797, "grad_norm": 0.8810771703720093, "learning_rate": 9.769102252051069e-06, "loss": 0.8494, "step": 2613 }, { "epoch": 0.3728958630527817, "grad_norm": 0.7815802693367004, "learning_rate": 9.768870894142887e-06, "loss": 0.7677, "step": 2614 }, { "epoch": 0.3730385164051355, "grad_norm": 0.8313312530517578, "learning_rate": 9.768639423125605e-06, "loss": 0.7337, "step": 2615 }, { "epoch": 0.37318116975748933, "grad_norm": 0.9385679960250854, "learning_rate": 9.768407839004712e-06, "loss": 0.7887, "step": 2616 }, { "epoch": 0.3733238231098431, "grad_norm": 1.0283102989196777, "learning_rate": 9.768176141785699e-06, "loss": 0.7511, "step": 2617 }, { "epoch": 0.3734664764621969, "grad_norm": 0.753254234790802, "learning_rate": 9.767944331474062e-06, "loss": 0.7493, "step": 2618 }, { "epoch": 0.37360912981455063, "grad_norm": 1.1009119749069214, "learning_rate": 9.7677124080753e-06, "loss": 0.5995, "step": 2619 }, { "epoch": 0.37375178316690444, "grad_norm": 0.7960112690925598, "learning_rate": 9.767480371594914e-06, "loss": 0.8729, "step": 2620 }, { "epoch": 0.3738944365192582, "grad_norm": 0.9416414499282837, "learning_rate": 9.767248222038406e-06, "loss": 0.8489, "step": 2621 }, { "epoch": 0.374037089871612, "grad_norm": 0.7720905542373657, "learning_rate": 9.767015959411283e-06, "loss": 0.8232, "step": 2622 }, { "epoch": 0.37417974322396574, "grad_norm": 0.8114632964134216, "learning_rate": 9.766783583719054e-06, "loss": 0.7892, "step": 2623 }, { "epoch": 0.37432239657631955, "grad_norm": 1.4543877840042114, "learning_rate": 9.76655109496723e-06, "loss": 0.8484, "step": 2624 }, { "epoch": 0.3744650499286733, "grad_norm": 0.9627675414085388, "learning_rate": 9.766318493161326e-06, "loss": 0.7684, "step": 2625 }, { "epoch": 0.3746077032810271, "grad_norm": 0.6586639285087585, "learning_rate": 9.766085778306859e-06, "loss": 0.7958, "step": 2626 }, { "epoch": 0.3747503566333809, "grad_norm": 1.0314810276031494, "learning_rate": 9.765852950409349e-06, "loss": 0.6011, "step": 2627 }, { "epoch": 0.37489300998573466, "grad_norm": 0.8590237498283386, "learning_rate": 9.765620009474317e-06, "loss": 0.8111, "step": 2628 }, { "epoch": 0.37503566333808847, "grad_norm": 0.8130232095718384, "learning_rate": 9.765386955507288e-06, "loss": 0.5973, "step": 2629 }, { "epoch": 0.3751783166904422, "grad_norm": 0.921552300453186, "learning_rate": 9.76515378851379e-06, "loss": 0.7221, "step": 2630 }, { "epoch": 0.375320970042796, "grad_norm": 0.9046935439109802, "learning_rate": 9.764920508499352e-06, "loss": 0.7321, "step": 2631 }, { "epoch": 0.37546362339514977, "grad_norm": 1.291777491569519, "learning_rate": 9.76468711546951e-06, "loss": 0.8003, "step": 2632 }, { "epoch": 0.3756062767475036, "grad_norm": 0.8085049390792847, "learning_rate": 9.764453609429798e-06, "loss": 0.903, "step": 2633 }, { "epoch": 0.3757489300998573, "grad_norm": 0.9415332078933716, "learning_rate": 9.764219990385753e-06, "loss": 0.758, "step": 2634 }, { "epoch": 0.37589158345221113, "grad_norm": 1.023079752922058, "learning_rate": 9.763986258342918e-06, "loss": 0.8447, "step": 2635 }, { "epoch": 0.3760342368045649, "grad_norm": 1.039222240447998, "learning_rate": 9.763752413306836e-06, "loss": 0.7931, "step": 2636 }, { "epoch": 0.3761768901569187, "grad_norm": 1.2192070484161377, "learning_rate": 9.763518455283055e-06, "loss": 0.8556, "step": 2637 }, { "epoch": 0.3763195435092725, "grad_norm": 0.8125724196434021, "learning_rate": 9.76328438427712e-06, "loss": 0.7761, "step": 2638 }, { "epoch": 0.37646219686162624, "grad_norm": 0.7936736345291138, "learning_rate": 9.763050200294586e-06, "loss": 0.7777, "step": 2639 }, { "epoch": 0.37660485021398005, "grad_norm": 0.6694625020027161, "learning_rate": 9.762815903341006e-06, "loss": 0.8426, "step": 2640 }, { "epoch": 0.3767475035663338, "grad_norm": 0.9329331517219543, "learning_rate": 9.762581493421938e-06, "loss": 0.8454, "step": 2641 }, { "epoch": 0.3768901569186876, "grad_norm": 0.8289737105369568, "learning_rate": 9.762346970542943e-06, "loss": 0.7621, "step": 2642 }, { "epoch": 0.37703281027104135, "grad_norm": 1.041685938835144, "learning_rate": 9.76211233470958e-06, "loss": 0.8076, "step": 2643 }, { "epoch": 0.37717546362339516, "grad_norm": 0.8847928643226624, "learning_rate": 9.761877585927419e-06, "loss": 0.8006, "step": 2644 }, { "epoch": 0.3773181169757489, "grad_norm": 0.7176461219787598, "learning_rate": 9.761642724202023e-06, "loss": 0.8136, "step": 2645 }, { "epoch": 0.3774607703281027, "grad_norm": 0.8413593769073486, "learning_rate": 9.761407749538964e-06, "loss": 0.7785, "step": 2646 }, { "epoch": 0.3776034236804565, "grad_norm": 0.7981699705123901, "learning_rate": 9.761172661943816e-06, "loss": 0.5767, "step": 2647 }, { "epoch": 0.37774607703281027, "grad_norm": 0.9135536551475525, "learning_rate": 9.760937461422155e-06, "loss": 0.5791, "step": 2648 }, { "epoch": 0.3778887303851641, "grad_norm": 0.8779171705245972, "learning_rate": 9.76070214797956e-06, "loss": 0.7945, "step": 2649 }, { "epoch": 0.3780313837375178, "grad_norm": 0.8615351319313049, "learning_rate": 9.76046672162161e-06, "loss": 0.783, "step": 2650 }, { "epoch": 0.37817403708987163, "grad_norm": 0.7770572304725647, "learning_rate": 9.76023118235389e-06, "loss": 0.7985, "step": 2651 }, { "epoch": 0.3783166904422254, "grad_norm": 0.7578920722007751, "learning_rate": 9.759995530181987e-06, "loss": 0.823, "step": 2652 }, { "epoch": 0.3784593437945792, "grad_norm": 0.8214627504348755, "learning_rate": 9.75975976511149e-06, "loss": 0.8105, "step": 2653 }, { "epoch": 0.37860199714693293, "grad_norm": 0.8335827589035034, "learning_rate": 9.759523887147992e-06, "loss": 0.7065, "step": 2654 }, { "epoch": 0.37874465049928674, "grad_norm": 0.9503043293952942, "learning_rate": 9.759287896297084e-06, "loss": 0.7974, "step": 2655 }, { "epoch": 0.3788873038516405, "grad_norm": 0.8033483028411865, "learning_rate": 9.759051792564369e-06, "loss": 0.8391, "step": 2656 }, { "epoch": 0.3790299572039943, "grad_norm": 1.2023918628692627, "learning_rate": 9.758815575955441e-06, "loss": 0.7311, "step": 2657 }, { "epoch": 0.3791726105563481, "grad_norm": 0.8341216444969177, "learning_rate": 9.758579246475906e-06, "loss": 0.7213, "step": 2658 }, { "epoch": 0.37931526390870185, "grad_norm": 0.8582043051719666, "learning_rate": 9.758342804131368e-06, "loss": 0.7988, "step": 2659 }, { "epoch": 0.37945791726105566, "grad_norm": 0.7988712787628174, "learning_rate": 9.758106248927439e-06, "loss": 0.8425, "step": 2660 }, { "epoch": 0.3796005706134094, "grad_norm": 0.7101856470108032, "learning_rate": 9.757869580869724e-06, "loss": 0.8193, "step": 2661 }, { "epoch": 0.3797432239657632, "grad_norm": 0.9709107875823975, "learning_rate": 9.757632799963838e-06, "loss": 0.7668, "step": 2662 }, { "epoch": 0.37988587731811696, "grad_norm": 0.7359352707862854, "learning_rate": 9.757395906215399e-06, "loss": 0.7309, "step": 2663 }, { "epoch": 0.38002853067047077, "grad_norm": 0.9795485138893127, "learning_rate": 9.757158899630024e-06, "loss": 0.745, "step": 2664 }, { "epoch": 0.3801711840228245, "grad_norm": 0.9430841207504272, "learning_rate": 9.756921780213334e-06, "loss": 0.7417, "step": 2665 }, { "epoch": 0.3803138373751783, "grad_norm": 0.9501784443855286, "learning_rate": 9.756684547970954e-06, "loss": 0.847, "step": 2666 }, { "epoch": 0.38045649072753207, "grad_norm": 0.9335957765579224, "learning_rate": 9.756447202908512e-06, "loss": 0.7392, "step": 2667 }, { "epoch": 0.3805991440798859, "grad_norm": 1.1261742115020752, "learning_rate": 9.756209745031635e-06, "loss": 0.7525, "step": 2668 }, { "epoch": 0.3807417974322397, "grad_norm": 0.8628441095352173, "learning_rate": 9.755972174345955e-06, "loss": 0.7191, "step": 2669 }, { "epoch": 0.38088445078459343, "grad_norm": 0.9380318522453308, "learning_rate": 9.75573449085711e-06, "loss": 0.7961, "step": 2670 }, { "epoch": 0.38102710413694724, "grad_norm": 0.9635017514228821, "learning_rate": 9.755496694570734e-06, "loss": 0.7904, "step": 2671 }, { "epoch": 0.381169757489301, "grad_norm": 0.7903203368186951, "learning_rate": 9.755258785492468e-06, "loss": 0.8209, "step": 2672 }, { "epoch": 0.3813124108416548, "grad_norm": 0.7948763966560364, "learning_rate": 9.755020763627956e-06, "loss": 0.7637, "step": 2673 }, { "epoch": 0.38145506419400854, "grad_norm": 0.7615258097648621, "learning_rate": 9.754782628982842e-06, "loss": 0.8453, "step": 2674 }, { "epoch": 0.38159771754636235, "grad_norm": 0.7442870736122131, "learning_rate": 9.754544381562774e-06, "loss": 0.7853, "step": 2675 }, { "epoch": 0.3817403708987161, "grad_norm": 0.9709129333496094, "learning_rate": 9.754306021373405e-06, "loss": 0.7171, "step": 2676 }, { "epoch": 0.3818830242510699, "grad_norm": 0.9641637802124023, "learning_rate": 9.754067548420386e-06, "loss": 0.7876, "step": 2677 }, { "epoch": 0.3820256776034237, "grad_norm": 0.933236837387085, "learning_rate": 9.753828962709374e-06, "loss": 0.7901, "step": 2678 }, { "epoch": 0.38216833095577746, "grad_norm": 1.2068079710006714, "learning_rate": 9.753590264246028e-06, "loss": 0.6204, "step": 2679 }, { "epoch": 0.38231098430813126, "grad_norm": 1.1676217317581177, "learning_rate": 9.75335145303601e-06, "loss": 0.7691, "step": 2680 }, { "epoch": 0.382453637660485, "grad_norm": 1.1245152950286865, "learning_rate": 9.753112529084983e-06, "loss": 0.7144, "step": 2681 }, { "epoch": 0.3825962910128388, "grad_norm": 0.9518791437149048, "learning_rate": 9.752873492398615e-06, "loss": 0.7337, "step": 2682 }, { "epoch": 0.38273894436519257, "grad_norm": 0.7627285122871399, "learning_rate": 9.752634342982575e-06, "loss": 0.7944, "step": 2683 }, { "epoch": 0.3828815977175464, "grad_norm": 0.8641947507858276, "learning_rate": 9.752395080842533e-06, "loss": 0.808, "step": 2684 }, { "epoch": 0.3830242510699001, "grad_norm": 0.9172922372817993, "learning_rate": 9.75215570598417e-06, "loss": 0.8174, "step": 2685 }, { "epoch": 0.38316690442225393, "grad_norm": 0.8682686686515808, "learning_rate": 9.751916218413157e-06, "loss": 0.8089, "step": 2686 }, { "epoch": 0.3833095577746077, "grad_norm": 0.7675897479057312, "learning_rate": 9.751676618135178e-06, "loss": 0.849, "step": 2687 }, { "epoch": 0.3834522111269615, "grad_norm": 1.2480727434158325, "learning_rate": 9.751436905155914e-06, "loss": 0.7802, "step": 2688 }, { "epoch": 0.3835948644793153, "grad_norm": 0.8453992605209351, "learning_rate": 9.751197079481051e-06, "loss": 0.7772, "step": 2689 }, { "epoch": 0.38373751783166904, "grad_norm": 0.7082547545433044, "learning_rate": 9.750957141116278e-06, "loss": 0.8644, "step": 2690 }, { "epoch": 0.38388017118402284, "grad_norm": 0.8735644817352295, "learning_rate": 9.750717090067286e-06, "loss": 0.7443, "step": 2691 }, { "epoch": 0.3840228245363766, "grad_norm": 1.0152959823608398, "learning_rate": 9.750476926339767e-06, "loss": 0.7828, "step": 2692 }, { "epoch": 0.3841654778887304, "grad_norm": 0.832862377166748, "learning_rate": 9.750236649939418e-06, "loss": 0.8786, "step": 2693 }, { "epoch": 0.38430813124108415, "grad_norm": 0.797649085521698, "learning_rate": 9.749996260871938e-06, "loss": 0.8077, "step": 2694 }, { "epoch": 0.38445078459343796, "grad_norm": 1.1882747411727905, "learning_rate": 9.74975575914303e-06, "loss": 0.7217, "step": 2695 }, { "epoch": 0.3845934379457917, "grad_norm": 1.260117769241333, "learning_rate": 9.749515144758396e-06, "loss": 0.8008, "step": 2696 }, { "epoch": 0.3847360912981455, "grad_norm": 0.7837929129600525, "learning_rate": 9.749274417723745e-06, "loss": 0.8494, "step": 2697 }, { "epoch": 0.38487874465049926, "grad_norm": 0.8835393190383911, "learning_rate": 9.749033578044785e-06, "loss": 0.8154, "step": 2698 }, { "epoch": 0.38502139800285307, "grad_norm": 1.013067364692688, "learning_rate": 9.74879262572723e-06, "loss": 0.7286, "step": 2699 }, { "epoch": 0.38516405135520687, "grad_norm": 1.0670238733291626, "learning_rate": 9.748551560776792e-06, "loss": 0.8268, "step": 2700 }, { "epoch": 0.3853067047075606, "grad_norm": 0.8018626570701599, "learning_rate": 9.748310383199191e-06, "loss": 0.7929, "step": 2701 }, { "epoch": 0.3854493580599144, "grad_norm": 0.9058294892311096, "learning_rate": 9.748069093000148e-06, "loss": 0.8724, "step": 2702 }, { "epoch": 0.3855920114122682, "grad_norm": 0.7368719577789307, "learning_rate": 9.747827690185384e-06, "loss": 0.8173, "step": 2703 }, { "epoch": 0.385734664764622, "grad_norm": 0.90807044506073, "learning_rate": 9.747586174760627e-06, "loss": 0.7342, "step": 2704 }, { "epoch": 0.38587731811697573, "grad_norm": 0.9778223037719727, "learning_rate": 9.747344546731601e-06, "loss": 0.7719, "step": 2705 }, { "epoch": 0.38601997146932954, "grad_norm": 0.815638542175293, "learning_rate": 9.747102806104042e-06, "loss": 0.7554, "step": 2706 }, { "epoch": 0.3861626248216833, "grad_norm": 1.021000862121582, "learning_rate": 9.746860952883679e-06, "loss": 0.7847, "step": 2707 }, { "epoch": 0.3863052781740371, "grad_norm": 1.0167509317398071, "learning_rate": 9.746618987076252e-06, "loss": 0.7935, "step": 2708 }, { "epoch": 0.3864479315263909, "grad_norm": 0.9476686716079712, "learning_rate": 9.746376908687498e-06, "loss": 0.8703, "step": 2709 }, { "epoch": 0.38659058487874465, "grad_norm": 0.7579607963562012, "learning_rate": 9.74613471772316e-06, "loss": 0.828, "step": 2710 }, { "epoch": 0.38673323823109845, "grad_norm": 1.3265131711959839, "learning_rate": 9.745892414188981e-06, "loss": 0.7895, "step": 2711 }, { "epoch": 0.3868758915834522, "grad_norm": 0.8370159864425659, "learning_rate": 9.745649998090709e-06, "loss": 0.8543, "step": 2712 }, { "epoch": 0.387018544935806, "grad_norm": 0.772868812084198, "learning_rate": 9.745407469434095e-06, "loss": 0.8196, "step": 2713 }, { "epoch": 0.38716119828815976, "grad_norm": 0.8357599973678589, "learning_rate": 9.745164828224888e-06, "loss": 0.7744, "step": 2714 }, { "epoch": 0.38730385164051356, "grad_norm": 0.7198459506034851, "learning_rate": 9.744922074468845e-06, "loss": 0.8051, "step": 2715 }, { "epoch": 0.3874465049928673, "grad_norm": 0.9756195545196533, "learning_rate": 9.744679208171724e-06, "loss": 0.6199, "step": 2716 }, { "epoch": 0.3875891583452211, "grad_norm": 0.8039839863777161, "learning_rate": 9.744436229339284e-06, "loss": 0.7723, "step": 2717 }, { "epoch": 0.38773181169757487, "grad_norm": 0.776809573173523, "learning_rate": 9.74419313797729e-06, "loss": 0.8159, "step": 2718 }, { "epoch": 0.3878744650499287, "grad_norm": 0.7821508049964905, "learning_rate": 9.743949934091506e-06, "loss": 0.8218, "step": 2719 }, { "epoch": 0.3880171184022825, "grad_norm": 0.6585720181465149, "learning_rate": 9.7437066176877e-06, "loss": 0.7731, "step": 2720 }, { "epoch": 0.38815977175463623, "grad_norm": 1.144324541091919, "learning_rate": 9.743463188771644e-06, "loss": 0.8415, "step": 2721 }, { "epoch": 0.38830242510699003, "grad_norm": 1.0549962520599365, "learning_rate": 9.743219647349111e-06, "loss": 0.7864, "step": 2722 }, { "epoch": 0.3884450784593438, "grad_norm": 0.744493842124939, "learning_rate": 9.74297599342588e-06, "loss": 0.7736, "step": 2723 }, { "epoch": 0.3885877318116976, "grad_norm": 1.04491126537323, "learning_rate": 9.742732227007727e-06, "loss": 0.7707, "step": 2724 }, { "epoch": 0.38873038516405134, "grad_norm": 0.9046798348426819, "learning_rate": 9.742488348100435e-06, "loss": 0.7321, "step": 2725 }, { "epoch": 0.38887303851640515, "grad_norm": 0.7387887835502625, "learning_rate": 9.742244356709787e-06, "loss": 0.7697, "step": 2726 }, { "epoch": 0.3890156918687589, "grad_norm": 0.9351281523704529, "learning_rate": 9.742000252841572e-06, "loss": 0.8096, "step": 2727 }, { "epoch": 0.3891583452211127, "grad_norm": 0.7774584889411926, "learning_rate": 9.741756036501579e-06, "loss": 0.8076, "step": 2728 }, { "epoch": 0.38930099857346645, "grad_norm": 0.6976808905601501, "learning_rate": 9.7415117076956e-06, "loss": 0.7464, "step": 2729 }, { "epoch": 0.38944365192582026, "grad_norm": 0.7944491505622864, "learning_rate": 9.74126726642943e-06, "loss": 0.8093, "step": 2730 }, { "epoch": 0.38958630527817406, "grad_norm": 0.9049003720283508, "learning_rate": 9.741022712708867e-06, "loss": 0.7647, "step": 2731 }, { "epoch": 0.3897289586305278, "grad_norm": 0.7585650086402893, "learning_rate": 9.740778046539712e-06, "loss": 0.7726, "step": 2732 }, { "epoch": 0.3898716119828816, "grad_norm": 0.8783090710639954, "learning_rate": 9.740533267927766e-06, "loss": 0.7521, "step": 2733 }, { "epoch": 0.39001426533523537, "grad_norm": 0.8248961567878723, "learning_rate": 9.740288376878837e-06, "loss": 0.7884, "step": 2734 }, { "epoch": 0.39015691868758917, "grad_norm": 0.8012792468070984, "learning_rate": 9.740043373398733e-06, "loss": 0.6581, "step": 2735 }, { "epoch": 0.3902995720399429, "grad_norm": 0.9968196153640747, "learning_rate": 9.739798257493263e-06, "loss": 0.7685, "step": 2736 }, { "epoch": 0.3904422253922967, "grad_norm": 0.7691065073013306, "learning_rate": 9.739553029168242e-06, "loss": 0.6316, "step": 2737 }, { "epoch": 0.3905848787446505, "grad_norm": 0.9571718573570251, "learning_rate": 9.73930768842949e-06, "loss": 0.7773, "step": 2738 }, { "epoch": 0.3907275320970043, "grad_norm": 0.7709513306617737, "learning_rate": 9.73906223528282e-06, "loss": 0.8136, "step": 2739 }, { "epoch": 0.3908701854493581, "grad_norm": 0.6877098083496094, "learning_rate": 9.738816669734055e-06, "loss": 0.834, "step": 2740 }, { "epoch": 0.39101283880171184, "grad_norm": 0.9988148212432861, "learning_rate": 9.738570991789023e-06, "loss": 0.8033, "step": 2741 }, { "epoch": 0.39115549215406564, "grad_norm": 0.8886873722076416, "learning_rate": 9.738325201453547e-06, "loss": 0.6928, "step": 2742 }, { "epoch": 0.3912981455064194, "grad_norm": 0.84345543384552, "learning_rate": 9.73807929873346e-06, "loss": 0.7314, "step": 2743 }, { "epoch": 0.3914407988587732, "grad_norm": 1.0821406841278076, "learning_rate": 9.737833283634592e-06, "loss": 0.7557, "step": 2744 }, { "epoch": 0.39158345221112695, "grad_norm": 0.9236408472061157, "learning_rate": 9.73758715616278e-06, "loss": 0.8094, "step": 2745 }, { "epoch": 0.39172610556348075, "grad_norm": 0.9534152150154114, "learning_rate": 9.73734091632386e-06, "loss": 0.8072, "step": 2746 }, { "epoch": 0.3918687589158345, "grad_norm": 0.7680412530899048, "learning_rate": 9.737094564123672e-06, "loss": 0.7875, "step": 2747 }, { "epoch": 0.3920114122681883, "grad_norm": 0.999618411064148, "learning_rate": 9.736848099568062e-06, "loss": 0.8146, "step": 2748 }, { "epoch": 0.39215406562054206, "grad_norm": 0.8808778524398804, "learning_rate": 9.736601522662871e-06, "loss": 0.819, "step": 2749 }, { "epoch": 0.39229671897289586, "grad_norm": 1.0400785207748413, "learning_rate": 9.736354833413953e-06, "loss": 0.8888, "step": 2750 }, { "epoch": 0.39243937232524967, "grad_norm": 0.7799466252326965, "learning_rate": 9.736108031827155e-06, "loss": 0.777, "step": 2751 }, { "epoch": 0.3925820256776034, "grad_norm": 0.8672760128974915, "learning_rate": 9.735861117908329e-06, "loss": 0.7409, "step": 2752 }, { "epoch": 0.3927246790299572, "grad_norm": 0.9319669604301453, "learning_rate": 9.735614091663336e-06, "loss": 0.8232, "step": 2753 }, { "epoch": 0.392867332382311, "grad_norm": 0.6662679314613342, "learning_rate": 9.735366953098035e-06, "loss": 0.6303, "step": 2754 }, { "epoch": 0.3930099857346648, "grad_norm": 1.0762574672698975, "learning_rate": 9.735119702218285e-06, "loss": 0.8742, "step": 2755 }, { "epoch": 0.39315263908701853, "grad_norm": 1.2987182140350342, "learning_rate": 9.73487233902995e-06, "loss": 0.7481, "step": 2756 }, { "epoch": 0.39329529243937233, "grad_norm": 1.0922690629959106, "learning_rate": 9.7346248635389e-06, "loss": 0.7634, "step": 2757 }, { "epoch": 0.3934379457917261, "grad_norm": 0.8129608035087585, "learning_rate": 9.734377275751002e-06, "loss": 0.7855, "step": 2758 }, { "epoch": 0.3935805991440799, "grad_norm": 0.7861506342887878, "learning_rate": 9.734129575672127e-06, "loss": 0.8174, "step": 2759 }, { "epoch": 0.39372325249643364, "grad_norm": 1.0716252326965332, "learning_rate": 9.733881763308155e-06, "loss": 0.7856, "step": 2760 }, { "epoch": 0.39386590584878745, "grad_norm": 0.7261438965797424, "learning_rate": 9.73363383866496e-06, "loss": 0.7678, "step": 2761 }, { "epoch": 0.39400855920114125, "grad_norm": 0.9121649265289307, "learning_rate": 9.733385801748423e-06, "loss": 0.7824, "step": 2762 }, { "epoch": 0.394151212553495, "grad_norm": 0.8819028735160828, "learning_rate": 9.733137652564428e-06, "loss": 0.7833, "step": 2763 }, { "epoch": 0.3942938659058488, "grad_norm": 0.7690829038619995, "learning_rate": 9.732889391118859e-06, "loss": 0.8084, "step": 2764 }, { "epoch": 0.39443651925820256, "grad_norm": 0.8435876369476318, "learning_rate": 9.732641017417606e-06, "loss": 0.7105, "step": 2765 }, { "epoch": 0.39457917261055636, "grad_norm": 0.7878632545471191, "learning_rate": 9.732392531466559e-06, "loss": 0.7949, "step": 2766 }, { "epoch": 0.3947218259629101, "grad_norm": 0.9608743190765381, "learning_rate": 9.732143933271612e-06, "loss": 0.7771, "step": 2767 }, { "epoch": 0.3948644793152639, "grad_norm": 0.8073573112487793, "learning_rate": 9.731895222838661e-06, "loss": 0.7732, "step": 2768 }, { "epoch": 0.39500713266761767, "grad_norm": 0.9152679443359375, "learning_rate": 9.731646400173605e-06, "loss": 0.7618, "step": 2769 }, { "epoch": 0.39514978601997147, "grad_norm": 0.7204840779304504, "learning_rate": 9.731397465282343e-06, "loss": 0.8244, "step": 2770 }, { "epoch": 0.3952924393723253, "grad_norm": 0.8135253190994263, "learning_rate": 9.731148418170786e-06, "loss": 0.7748, "step": 2771 }, { "epoch": 0.395435092724679, "grad_norm": 0.9012436866760254, "learning_rate": 9.730899258844835e-06, "loss": 0.8807, "step": 2772 }, { "epoch": 0.39557774607703283, "grad_norm": 1.1430065631866455, "learning_rate": 9.730649987310403e-06, "loss": 0.847, "step": 2773 }, { "epoch": 0.3957203994293866, "grad_norm": 0.9346035122871399, "learning_rate": 9.7304006035734e-06, "loss": 0.7522, "step": 2774 }, { "epoch": 0.3958630527817404, "grad_norm": 0.8465891480445862, "learning_rate": 9.73015110763974e-06, "loss": 0.7143, "step": 2775 }, { "epoch": 0.39600570613409414, "grad_norm": 0.7051974534988403, "learning_rate": 9.729901499515345e-06, "loss": 0.728, "step": 2776 }, { "epoch": 0.39614835948644794, "grad_norm": 0.8334681987762451, "learning_rate": 9.729651779206132e-06, "loss": 0.7767, "step": 2777 }, { "epoch": 0.3962910128388017, "grad_norm": 0.7618001103401184, "learning_rate": 9.729401946718025e-06, "loss": 0.8218, "step": 2778 }, { "epoch": 0.3964336661911555, "grad_norm": 0.8659477233886719, "learning_rate": 9.729152002056947e-06, "loss": 0.7719, "step": 2779 }, { "epoch": 0.39657631954350925, "grad_norm": 1.1242305040359497, "learning_rate": 9.72890194522883e-06, "loss": 0.7171, "step": 2780 }, { "epoch": 0.39671897289586305, "grad_norm": 0.9327376484870911, "learning_rate": 9.728651776239604e-06, "loss": 0.796, "step": 2781 }, { "epoch": 0.39686162624821686, "grad_norm": 0.9061946272850037, "learning_rate": 9.7284014950952e-06, "loss": 0.7779, "step": 2782 }, { "epoch": 0.3970042796005706, "grad_norm": 0.9660446047782898, "learning_rate": 9.728151101801558e-06, "loss": 0.8504, "step": 2783 }, { "epoch": 0.3971469329529244, "grad_norm": 0.721226692199707, "learning_rate": 9.727900596364613e-06, "loss": 0.7862, "step": 2784 }, { "epoch": 0.39728958630527816, "grad_norm": 0.7617622017860413, "learning_rate": 9.72764997879031e-06, "loss": 0.7985, "step": 2785 }, { "epoch": 0.39743223965763197, "grad_norm": 0.9045830368995667, "learning_rate": 9.72739924908459e-06, "loss": 0.8475, "step": 2786 }, { "epoch": 0.3975748930099857, "grad_norm": 0.9054057002067566, "learning_rate": 9.727148407253402e-06, "loss": 0.7966, "step": 2787 }, { "epoch": 0.3977175463623395, "grad_norm": 0.901283860206604, "learning_rate": 9.726897453302697e-06, "loss": 0.8035, "step": 2788 }, { "epoch": 0.3978601997146933, "grad_norm": 1.1419939994812012, "learning_rate": 9.726646387238424e-06, "loss": 0.7672, "step": 2789 }, { "epoch": 0.3980028530670471, "grad_norm": 1.0149915218353271, "learning_rate": 9.726395209066539e-06, "loss": 0.7542, "step": 2790 }, { "epoch": 0.39814550641940083, "grad_norm": 0.8135383725166321, "learning_rate": 9.726143918793e-06, "loss": 0.8171, "step": 2791 }, { "epoch": 0.39828815977175464, "grad_norm": 0.847780704498291, "learning_rate": 9.725892516423765e-06, "loss": 0.8331, "step": 2792 }, { "epoch": 0.39843081312410844, "grad_norm": 0.7311589121818542, "learning_rate": 9.7256410019648e-06, "loss": 0.7983, "step": 2793 }, { "epoch": 0.3985734664764622, "grad_norm": 0.7674535512924194, "learning_rate": 9.72538937542207e-06, "loss": 0.8564, "step": 2794 }, { "epoch": 0.398716119828816, "grad_norm": 0.6326112747192383, "learning_rate": 9.72513763680154e-06, "loss": 0.8182, "step": 2795 }, { "epoch": 0.39885877318116975, "grad_norm": 0.7226607203483582, "learning_rate": 9.724885786109186e-06, "loss": 0.8457, "step": 2796 }, { "epoch": 0.39900142653352355, "grad_norm": 1.111894965171814, "learning_rate": 9.724633823350975e-06, "loss": 0.6933, "step": 2797 }, { "epoch": 0.3991440798858773, "grad_norm": 0.9274622201919556, "learning_rate": 9.724381748532888e-06, "loss": 0.7499, "step": 2798 }, { "epoch": 0.3992867332382311, "grad_norm": 0.9712232351303101, "learning_rate": 9.724129561660901e-06, "loss": 0.7916, "step": 2799 }, { "epoch": 0.39942938659058486, "grad_norm": 0.7862420082092285, "learning_rate": 9.723877262741e-06, "loss": 0.7479, "step": 2800 }, { "epoch": 0.39957203994293866, "grad_norm": 1.0056772232055664, "learning_rate": 9.723624851779163e-06, "loss": 0.7359, "step": 2801 }, { "epoch": 0.39971469329529247, "grad_norm": 0.9222816228866577, "learning_rate": 9.72337232878138e-06, "loss": 0.7021, "step": 2802 }, { "epoch": 0.3998573466476462, "grad_norm": 0.8763768672943115, "learning_rate": 9.72311969375364e-06, "loss": 0.775, "step": 2803 }, { "epoch": 0.4, "grad_norm": 0.8317567706108093, "learning_rate": 9.722866946701937e-06, "loss": 0.7815, "step": 2804 }, { "epoch": 0.40014265335235377, "grad_norm": 0.8371714353561401, "learning_rate": 9.72261408763226e-06, "loss": 0.7607, "step": 2805 }, { "epoch": 0.4002853067047076, "grad_norm": 0.9584025144577026, "learning_rate": 9.722361116550614e-06, "loss": 0.7464, "step": 2806 }, { "epoch": 0.4004279600570613, "grad_norm": 0.9503485560417175, "learning_rate": 9.722108033462992e-06, "loss": 0.8436, "step": 2807 }, { "epoch": 0.40057061340941513, "grad_norm": 1.078795075416565, "learning_rate": 9.721854838375402e-06, "loss": 0.7928, "step": 2808 }, { "epoch": 0.4007132667617689, "grad_norm": 1.1382116079330444, "learning_rate": 9.721601531293845e-06, "loss": 0.7984, "step": 2809 }, { "epoch": 0.4008559201141227, "grad_norm": 0.9619671106338501, "learning_rate": 9.721348112224331e-06, "loss": 0.7672, "step": 2810 }, { "epoch": 0.40099857346647644, "grad_norm": 1.0031604766845703, "learning_rate": 9.721094581172872e-06, "loss": 0.7621, "step": 2811 }, { "epoch": 0.40114122681883024, "grad_norm": 0.9520820379257202, "learning_rate": 9.720840938145479e-06, "loss": 0.7674, "step": 2812 }, { "epoch": 0.40128388017118405, "grad_norm": 0.9491026997566223, "learning_rate": 9.720587183148168e-06, "loss": 0.8088, "step": 2813 }, { "epoch": 0.4014265335235378, "grad_norm": 0.7598925828933716, "learning_rate": 9.720333316186961e-06, "loss": 0.6926, "step": 2814 }, { "epoch": 0.4015691868758916, "grad_norm": 0.8415775299072266, "learning_rate": 9.720079337267877e-06, "loss": 0.8191, "step": 2815 }, { "epoch": 0.40171184022824535, "grad_norm": 0.8067095875740051, "learning_rate": 9.719825246396939e-06, "loss": 0.7903, "step": 2816 }, { "epoch": 0.40185449358059916, "grad_norm": 0.6660135388374329, "learning_rate": 9.719571043580174e-06, "loss": 0.834, "step": 2817 }, { "epoch": 0.4019971469329529, "grad_norm": 0.6990180611610413, "learning_rate": 9.719316728823612e-06, "loss": 0.7858, "step": 2818 }, { "epoch": 0.4021398002853067, "grad_norm": 0.8663573265075684, "learning_rate": 9.719062302133284e-06, "loss": 0.7581, "step": 2819 }, { "epoch": 0.40228245363766046, "grad_norm": 1.0680193901062012, "learning_rate": 9.718807763515226e-06, "loss": 0.7723, "step": 2820 }, { "epoch": 0.40242510699001427, "grad_norm": 0.8193547129631042, "learning_rate": 9.718553112975473e-06, "loss": 0.7844, "step": 2821 }, { "epoch": 0.402567760342368, "grad_norm": 1.0157464742660522, "learning_rate": 9.718298350520066e-06, "loss": 0.8367, "step": 2822 }, { "epoch": 0.4027104136947218, "grad_norm": 0.8064296841621399, "learning_rate": 9.718043476155048e-06, "loss": 0.7501, "step": 2823 }, { "epoch": 0.40285306704707563, "grad_norm": 0.8575136065483093, "learning_rate": 9.717788489886464e-06, "loss": 0.7637, "step": 2824 }, { "epoch": 0.4029957203994294, "grad_norm": 0.7274131774902344, "learning_rate": 9.717533391720361e-06, "loss": 0.8096, "step": 2825 }, { "epoch": 0.4031383737517832, "grad_norm": 0.8452036380767822, "learning_rate": 9.717278181662791e-06, "loss": 0.771, "step": 2826 }, { "epoch": 0.40328102710413694, "grad_norm": 0.8290517330169678, "learning_rate": 9.717022859719807e-06, "loss": 0.5842, "step": 2827 }, { "epoch": 0.40342368045649074, "grad_norm": 0.7403361797332764, "learning_rate": 9.716767425897463e-06, "loss": 0.7776, "step": 2828 }, { "epoch": 0.4035663338088445, "grad_norm": 0.9250881671905518, "learning_rate": 9.716511880201818e-06, "loss": 0.7167, "step": 2829 }, { "epoch": 0.4037089871611983, "grad_norm": 0.8735909461975098, "learning_rate": 9.716256222638934e-06, "loss": 0.8424, "step": 2830 }, { "epoch": 0.40385164051355205, "grad_norm": 1.0311628580093384, "learning_rate": 9.716000453214873e-06, "loss": 0.7285, "step": 2831 }, { "epoch": 0.40399429386590585, "grad_norm": 0.7494921088218689, "learning_rate": 9.715744571935704e-06, "loss": 0.7569, "step": 2832 }, { "epoch": 0.40413694721825966, "grad_norm": 0.7012854814529419, "learning_rate": 9.715488578807495e-06, "loss": 0.8128, "step": 2833 }, { "epoch": 0.4042796005706134, "grad_norm": 0.7978854775428772, "learning_rate": 9.715232473836316e-06, "loss": 0.7329, "step": 2834 }, { "epoch": 0.4044222539229672, "grad_norm": 0.7102094292640686, "learning_rate": 9.714976257028243e-06, "loss": 0.7961, "step": 2835 }, { "epoch": 0.40456490727532096, "grad_norm": 1.024623990058899, "learning_rate": 9.714719928389353e-06, "loss": 0.801, "step": 2836 }, { "epoch": 0.40470756062767477, "grad_norm": 0.8503227829933167, "learning_rate": 9.714463487925727e-06, "loss": 0.722, "step": 2837 }, { "epoch": 0.4048502139800285, "grad_norm": 0.7975144386291504, "learning_rate": 9.714206935643445e-06, "loss": 0.8133, "step": 2838 }, { "epoch": 0.4049928673323823, "grad_norm": 1.0284011363983154, "learning_rate": 9.71395027154859e-06, "loss": 0.7676, "step": 2839 }, { "epoch": 0.4051355206847361, "grad_norm": 0.8952131867408752, "learning_rate": 9.713693495647256e-06, "loss": 0.7489, "step": 2840 }, { "epoch": 0.4052781740370899, "grad_norm": 1.1101491451263428, "learning_rate": 9.713436607945527e-06, "loss": 0.7814, "step": 2841 }, { "epoch": 0.4054208273894436, "grad_norm": 0.8903674483299255, "learning_rate": 9.713179608449499e-06, "loss": 0.7911, "step": 2842 }, { "epoch": 0.40556348074179743, "grad_norm": 0.6775222420692444, "learning_rate": 9.712922497165267e-06, "loss": 0.6458, "step": 2843 }, { "epoch": 0.40570613409415124, "grad_norm": 0.799577534198761, "learning_rate": 9.71266527409893e-06, "loss": 0.8535, "step": 2844 }, { "epoch": 0.405848787446505, "grad_norm": 0.7899248600006104, "learning_rate": 9.712407939256588e-06, "loss": 0.8031, "step": 2845 }, { "epoch": 0.4059914407988588, "grad_norm": 0.713304877281189, "learning_rate": 9.712150492644345e-06, "loss": 0.7477, "step": 2846 }, { "epoch": 0.40613409415121254, "grad_norm": 0.9414926767349243, "learning_rate": 9.711892934268305e-06, "loss": 0.7355, "step": 2847 }, { "epoch": 0.40627674750356635, "grad_norm": 0.9517255425453186, "learning_rate": 9.71163526413458e-06, "loss": 0.8135, "step": 2848 }, { "epoch": 0.4064194008559201, "grad_norm": 0.8743422031402588, "learning_rate": 9.71137748224928e-06, "loss": 0.774, "step": 2849 }, { "epoch": 0.4065620542082739, "grad_norm": 0.7215398550033569, "learning_rate": 9.711119588618519e-06, "loss": 0.7994, "step": 2850 }, { "epoch": 0.40670470756062765, "grad_norm": 0.8900471925735474, "learning_rate": 9.710861583248414e-06, "loss": 0.8477, "step": 2851 }, { "epoch": 0.40684736091298146, "grad_norm": 0.9419020414352417, "learning_rate": 9.710603466145085e-06, "loss": 0.7946, "step": 2852 }, { "epoch": 0.4069900142653352, "grad_norm": 0.9247307777404785, "learning_rate": 9.710345237314653e-06, "loss": 0.8304, "step": 2853 }, { "epoch": 0.407132667617689, "grad_norm": 1.071629285812378, "learning_rate": 9.710086896763245e-06, "loss": 0.7594, "step": 2854 }, { "epoch": 0.4072753209700428, "grad_norm": 0.9002863168716431, "learning_rate": 9.709828444496985e-06, "loss": 0.7731, "step": 2855 }, { "epoch": 0.40741797432239657, "grad_norm": 0.7038922309875488, "learning_rate": 9.709569880522004e-06, "loss": 0.741, "step": 2856 }, { "epoch": 0.4075606276747504, "grad_norm": 0.6473375558853149, "learning_rate": 9.709311204844436e-06, "loss": 0.809, "step": 2857 }, { "epoch": 0.4077032810271041, "grad_norm": 0.7838351130485535, "learning_rate": 9.709052417470415e-06, "loss": 0.7669, "step": 2858 }, { "epoch": 0.40784593437945793, "grad_norm": 0.7748342156410217, "learning_rate": 9.708793518406082e-06, "loss": 0.8423, "step": 2859 }, { "epoch": 0.4079885877318117, "grad_norm": 0.7048565745353699, "learning_rate": 9.708534507657572e-06, "loss": 0.7845, "step": 2860 }, { "epoch": 0.4081312410841655, "grad_norm": 0.7788856029510498, "learning_rate": 9.708275385231035e-06, "loss": 0.7807, "step": 2861 }, { "epoch": 0.40827389443651924, "grad_norm": 0.9324522614479065, "learning_rate": 9.70801615113261e-06, "loss": 0.7685, "step": 2862 }, { "epoch": 0.40841654778887304, "grad_norm": 0.9006614089012146, "learning_rate": 9.70775680536845e-06, "loss": 0.7371, "step": 2863 }, { "epoch": 0.40855920114122685, "grad_norm": 1.0853517055511475, "learning_rate": 9.707497347944707e-06, "loss": 0.7871, "step": 2864 }, { "epoch": 0.4087018544935806, "grad_norm": 0.8286964893341064, "learning_rate": 9.707237778867532e-06, "loss": 0.7706, "step": 2865 }, { "epoch": 0.4088445078459344, "grad_norm": 0.9044437408447266, "learning_rate": 9.706978098143082e-06, "loss": 0.8755, "step": 2866 }, { "epoch": 0.40898716119828815, "grad_norm": 0.9929351806640625, "learning_rate": 9.706718305777519e-06, "loss": 0.6942, "step": 2867 }, { "epoch": 0.40912981455064196, "grad_norm": 0.9444536566734314, "learning_rate": 9.706458401777e-06, "loss": 0.7041, "step": 2868 }, { "epoch": 0.4092724679029957, "grad_norm": 0.8999462723731995, "learning_rate": 9.706198386147694e-06, "loss": 0.8094, "step": 2869 }, { "epoch": 0.4094151212553495, "grad_norm": 1.2214356660842896, "learning_rate": 9.705938258895765e-06, "loss": 0.5336, "step": 2870 }, { "epoch": 0.40955777460770326, "grad_norm": 1.0949814319610596, "learning_rate": 9.705678020027385e-06, "loss": 0.7205, "step": 2871 }, { "epoch": 0.40970042796005707, "grad_norm": 1.0050899982452393, "learning_rate": 9.705417669548723e-06, "loss": 0.7636, "step": 2872 }, { "epoch": 0.4098430813124108, "grad_norm": 0.8458533883094788, "learning_rate": 9.705157207465959e-06, "loss": 0.8918, "step": 2873 }, { "epoch": 0.4099857346647646, "grad_norm": 0.9296154379844666, "learning_rate": 9.704896633785266e-06, "loss": 0.745, "step": 2874 }, { "epoch": 0.41012838801711843, "grad_norm": 0.8902323842048645, "learning_rate": 9.704635948512828e-06, "loss": 0.8048, "step": 2875 }, { "epoch": 0.4102710413694722, "grad_norm": 0.9437968134880066, "learning_rate": 9.704375151654825e-06, "loss": 0.8345, "step": 2876 }, { "epoch": 0.410413694721826, "grad_norm": 0.8752991557121277, "learning_rate": 9.704114243217444e-06, "loss": 0.7575, "step": 2877 }, { "epoch": 0.41055634807417973, "grad_norm": 0.9018489122390747, "learning_rate": 9.703853223206875e-06, "loss": 0.7553, "step": 2878 }, { "epoch": 0.41069900142653354, "grad_norm": 0.9929783940315247, "learning_rate": 9.703592091629306e-06, "loss": 0.8109, "step": 2879 }, { "epoch": 0.4108416547788873, "grad_norm": 0.7652303576469421, "learning_rate": 9.703330848490933e-06, "loss": 0.7772, "step": 2880 }, { "epoch": 0.4109843081312411, "grad_norm": 0.8961701393127441, "learning_rate": 9.70306949379795e-06, "loss": 0.7696, "step": 2881 }, { "epoch": 0.41112696148359484, "grad_norm": 0.8824974298477173, "learning_rate": 9.702808027556557e-06, "loss": 0.7934, "step": 2882 }, { "epoch": 0.41126961483594865, "grad_norm": 0.6955432295799255, "learning_rate": 9.702546449772955e-06, "loss": 0.7195, "step": 2883 }, { "epoch": 0.4114122681883024, "grad_norm": 0.7119831442832947, "learning_rate": 9.702284760453349e-06, "loss": 0.7584, "step": 2884 }, { "epoch": 0.4115549215406562, "grad_norm": 0.8682061433792114, "learning_rate": 9.702022959603944e-06, "loss": 0.8205, "step": 2885 }, { "epoch": 0.41169757489301, "grad_norm": 0.9031049609184265, "learning_rate": 9.701761047230953e-06, "loss": 0.8606, "step": 2886 }, { "epoch": 0.41184022824536376, "grad_norm": 0.8140887022018433, "learning_rate": 9.701499023340587e-06, "loss": 0.7225, "step": 2887 }, { "epoch": 0.41198288159771757, "grad_norm": 0.9121723175048828, "learning_rate": 9.701236887939056e-06, "loss": 0.7841, "step": 2888 }, { "epoch": 0.4121255349500713, "grad_norm": 0.8388721942901611, "learning_rate": 9.700974641032583e-06, "loss": 0.7966, "step": 2889 }, { "epoch": 0.4122681883024251, "grad_norm": 0.8396943807601929, "learning_rate": 9.700712282627388e-06, "loss": 0.7714, "step": 2890 }, { "epoch": 0.41241084165477887, "grad_norm": 0.9269974827766418, "learning_rate": 9.700449812729689e-06, "loss": 0.7563, "step": 2891 }, { "epoch": 0.4125534950071327, "grad_norm": 0.7480577826499939, "learning_rate": 9.700187231345715e-06, "loss": 0.7538, "step": 2892 }, { "epoch": 0.4126961483594864, "grad_norm": 1.004860758781433, "learning_rate": 9.699924538481691e-06, "loss": 0.7932, "step": 2893 }, { "epoch": 0.41283880171184023, "grad_norm": 0.9278278350830078, "learning_rate": 9.699661734143853e-06, "loss": 0.7935, "step": 2894 }, { "epoch": 0.41298145506419404, "grad_norm": 0.9548095464706421, "learning_rate": 9.699398818338427e-06, "loss": 0.7767, "step": 2895 }, { "epoch": 0.4131241084165478, "grad_norm": 0.8039991855621338, "learning_rate": 9.699135791071656e-06, "loss": 0.8061, "step": 2896 }, { "epoch": 0.4132667617689016, "grad_norm": 0.9833992123603821, "learning_rate": 9.698872652349775e-06, "loss": 0.694, "step": 2897 }, { "epoch": 0.41340941512125534, "grad_norm": 0.8005412817001343, "learning_rate": 9.698609402179024e-06, "loss": 0.7802, "step": 2898 }, { "epoch": 0.41355206847360915, "grad_norm": 0.7481583952903748, "learning_rate": 9.698346040565649e-06, "loss": 0.7829, "step": 2899 }, { "epoch": 0.4136947218259629, "grad_norm": 1.1110355854034424, "learning_rate": 9.698082567515897e-06, "loss": 0.7476, "step": 2900 }, { "epoch": 0.4138373751783167, "grad_norm": 0.7226589322090149, "learning_rate": 9.697818983036013e-06, "loss": 0.8259, "step": 2901 }, { "epoch": 0.41398002853067045, "grad_norm": 0.8115856647491455, "learning_rate": 9.697555287132253e-06, "loss": 0.612, "step": 2902 }, { "epoch": 0.41412268188302426, "grad_norm": 0.9504525661468506, "learning_rate": 9.697291479810873e-06, "loss": 0.7367, "step": 2903 }, { "epoch": 0.414265335235378, "grad_norm": 0.8218483328819275, "learning_rate": 9.697027561078124e-06, "loss": 0.7726, "step": 2904 }, { "epoch": 0.4144079885877318, "grad_norm": 1.4041001796722412, "learning_rate": 9.696763530940269e-06, "loss": 0.7757, "step": 2905 }, { "epoch": 0.4145506419400856, "grad_norm": 0.8205307722091675, "learning_rate": 9.69649938940357e-06, "loss": 0.7886, "step": 2906 }, { "epoch": 0.41469329529243937, "grad_norm": 0.7615723013877869, "learning_rate": 9.696235136474292e-06, "loss": 0.7337, "step": 2907 }, { "epoch": 0.4148359486447932, "grad_norm": 1.0919002294540405, "learning_rate": 9.695970772158704e-06, "loss": 0.8087, "step": 2908 }, { "epoch": 0.4149786019971469, "grad_norm": 0.8195972442626953, "learning_rate": 9.695706296463074e-06, "loss": 0.7811, "step": 2909 }, { "epoch": 0.41512125534950073, "grad_norm": 0.7052676677703857, "learning_rate": 9.695441709393675e-06, "loss": 0.8215, "step": 2910 }, { "epoch": 0.4152639087018545, "grad_norm": 0.7744655609130859, "learning_rate": 9.695177010956785e-06, "loss": 0.7793, "step": 2911 }, { "epoch": 0.4154065620542083, "grad_norm": 0.7613050937652588, "learning_rate": 9.694912201158679e-06, "loss": 0.7613, "step": 2912 }, { "epoch": 0.41554921540656203, "grad_norm": 0.8056584596633911, "learning_rate": 9.69464728000564e-06, "loss": 0.8121, "step": 2913 }, { "epoch": 0.41569186875891584, "grad_norm": 0.7558019757270813, "learning_rate": 9.694382247503951e-06, "loss": 0.7907, "step": 2914 }, { "epoch": 0.4158345221112696, "grad_norm": 1.5257713794708252, "learning_rate": 9.694117103659896e-06, "loss": 0.7828, "step": 2915 }, { "epoch": 0.4159771754636234, "grad_norm": 1.0058388710021973, "learning_rate": 9.693851848479769e-06, "loss": 0.7927, "step": 2916 }, { "epoch": 0.4161198288159772, "grad_norm": 0.7403674721717834, "learning_rate": 9.693586481969856e-06, "loss": 0.6133, "step": 2917 }, { "epoch": 0.41626248216833095, "grad_norm": 0.8431648015975952, "learning_rate": 9.693321004136451e-06, "loss": 0.7607, "step": 2918 }, { "epoch": 0.41640513552068475, "grad_norm": 1.0459867715835571, "learning_rate": 9.693055414985856e-06, "loss": 0.8597, "step": 2919 }, { "epoch": 0.4165477888730385, "grad_norm": 0.7903814911842346, "learning_rate": 9.692789714524366e-06, "loss": 0.7889, "step": 2920 }, { "epoch": 0.4166904422253923, "grad_norm": 1.0505493879318237, "learning_rate": 9.692523902758285e-06, "loss": 0.7474, "step": 2921 }, { "epoch": 0.41683309557774606, "grad_norm": 0.8948885798454285, "learning_rate": 9.692257979693914e-06, "loss": 0.7673, "step": 2922 }, { "epoch": 0.41697574893009987, "grad_norm": 0.8693767189979553, "learning_rate": 9.691991945337565e-06, "loss": 0.7114, "step": 2923 }, { "epoch": 0.4171184022824536, "grad_norm": 0.9207613468170166, "learning_rate": 9.691725799695544e-06, "loss": 0.7066, "step": 2924 }, { "epoch": 0.4172610556348074, "grad_norm": 0.9942516088485718, "learning_rate": 9.691459542774164e-06, "loss": 0.7637, "step": 2925 }, { "epoch": 0.41740370898716117, "grad_norm": 0.8703949451446533, "learning_rate": 9.691193174579744e-06, "loss": 0.71, "step": 2926 }, { "epoch": 0.417546362339515, "grad_norm": 0.7398990392684937, "learning_rate": 9.690926695118598e-06, "loss": 0.7718, "step": 2927 }, { "epoch": 0.4176890156918688, "grad_norm": 0.8628445863723755, "learning_rate": 9.690660104397047e-06, "loss": 0.7644, "step": 2928 }, { "epoch": 0.41783166904422253, "grad_norm": 0.995631992816925, "learning_rate": 9.690393402421413e-06, "loss": 0.8154, "step": 2929 }, { "epoch": 0.41797432239657634, "grad_norm": 0.7129629850387573, "learning_rate": 9.690126589198026e-06, "loss": 0.7785, "step": 2930 }, { "epoch": 0.4181169757489301, "grad_norm": 0.8479319214820862, "learning_rate": 9.68985966473321e-06, "loss": 0.7697, "step": 2931 }, { "epoch": 0.4182596291012839, "grad_norm": 0.8483423590660095, "learning_rate": 9.689592629033298e-06, "loss": 0.8224, "step": 2932 }, { "epoch": 0.41840228245363764, "grad_norm": 0.6983876824378967, "learning_rate": 9.689325482104623e-06, "loss": 0.7851, "step": 2933 }, { "epoch": 0.41854493580599145, "grad_norm": 0.9249723553657532, "learning_rate": 9.689058223953521e-06, "loss": 0.7409, "step": 2934 }, { "epoch": 0.4186875891583452, "grad_norm": 0.8699818253517151, "learning_rate": 9.688790854586332e-06, "loss": 0.8245, "step": 2935 }, { "epoch": 0.418830242510699, "grad_norm": 0.8217840790748596, "learning_rate": 9.688523374009395e-06, "loss": 0.8005, "step": 2936 }, { "epoch": 0.4189728958630528, "grad_norm": 0.8918729424476624, "learning_rate": 9.688255782229057e-06, "loss": 0.8313, "step": 2937 }, { "epoch": 0.41911554921540656, "grad_norm": 0.8414826393127441, "learning_rate": 9.687988079251664e-06, "loss": 0.8073, "step": 2938 }, { "epoch": 0.41925820256776036, "grad_norm": 1.081383228302002, "learning_rate": 9.687720265083564e-06, "loss": 0.7288, "step": 2939 }, { "epoch": 0.4194008559201141, "grad_norm": 1.0577263832092285, "learning_rate": 9.687452339731112e-06, "loss": 0.8177, "step": 2940 }, { "epoch": 0.4195435092724679, "grad_norm": 0.9822176098823547, "learning_rate": 9.687184303200658e-06, "loss": 0.7495, "step": 2941 }, { "epoch": 0.41968616262482167, "grad_norm": 1.1737984418869019, "learning_rate": 9.686916155498565e-06, "loss": 0.753, "step": 2942 }, { "epoch": 0.4198288159771755, "grad_norm": 1.0080480575561523, "learning_rate": 9.68664789663119e-06, "loss": 0.7267, "step": 2943 }, { "epoch": 0.4199714693295292, "grad_norm": 0.8572803139686584, "learning_rate": 9.686379526604895e-06, "loss": 0.7757, "step": 2944 }, { "epoch": 0.42011412268188303, "grad_norm": 0.9193053841590881, "learning_rate": 9.686111045426045e-06, "loss": 0.7465, "step": 2945 }, { "epoch": 0.4202567760342368, "grad_norm": 0.9724590182304382, "learning_rate": 9.685842453101011e-06, "loss": 0.764, "step": 2946 }, { "epoch": 0.4203994293865906, "grad_norm": 0.866526186466217, "learning_rate": 9.685573749636161e-06, "loss": 0.7693, "step": 2947 }, { "epoch": 0.4205420827389444, "grad_norm": 0.8589038848876953, "learning_rate": 9.68530493503787e-06, "loss": 0.7453, "step": 2948 }, { "epoch": 0.42068473609129814, "grad_norm": 0.7004780173301697, "learning_rate": 9.68503600931251e-06, "loss": 0.8181, "step": 2949 }, { "epoch": 0.42082738944365194, "grad_norm": 0.8782320618629456, "learning_rate": 9.684766972466463e-06, "loss": 0.814, "step": 2950 }, { "epoch": 0.4209700427960057, "grad_norm": 0.9447126388549805, "learning_rate": 9.68449782450611e-06, "loss": 0.791, "step": 2951 }, { "epoch": 0.4211126961483595, "grad_norm": 1.1747344732284546, "learning_rate": 9.684228565437833e-06, "loss": 0.8835, "step": 2952 }, { "epoch": 0.42125534950071325, "grad_norm": 0.7942746877670288, "learning_rate": 9.683959195268018e-06, "loss": 0.8682, "step": 2953 }, { "epoch": 0.42139800285306706, "grad_norm": 0.8650901317596436, "learning_rate": 9.683689714003055e-06, "loss": 0.8234, "step": 2954 }, { "epoch": 0.4215406562054208, "grad_norm": 0.8994263410568237, "learning_rate": 9.683420121649336e-06, "loss": 0.7916, "step": 2955 }, { "epoch": 0.4216833095577746, "grad_norm": 1.0481706857681274, "learning_rate": 9.683150418213257e-06, "loss": 0.6965, "step": 2956 }, { "epoch": 0.42182596291012836, "grad_norm": 0.9313482046127319, "learning_rate": 9.68288060370121e-06, "loss": 0.8502, "step": 2957 }, { "epoch": 0.42196861626248217, "grad_norm": 0.9151977896690369, "learning_rate": 9.682610678119599e-06, "loss": 0.7831, "step": 2958 }, { "epoch": 0.42211126961483597, "grad_norm": 0.6517190337181091, "learning_rate": 9.682340641474824e-06, "loss": 0.7718, "step": 2959 }, { "epoch": 0.4222539229671897, "grad_norm": 0.7682685256004333, "learning_rate": 9.68207049377329e-06, "loss": 0.786, "step": 2960 }, { "epoch": 0.4223965763195435, "grad_norm": 0.8396185040473938, "learning_rate": 9.681800235021407e-06, "loss": 0.7308, "step": 2961 }, { "epoch": 0.4225392296718973, "grad_norm": 1.1531387567520142, "learning_rate": 9.68152986522558e-06, "loss": 0.8158, "step": 2962 }, { "epoch": 0.4226818830242511, "grad_norm": 0.820608377456665, "learning_rate": 9.681259384392225e-06, "loss": 0.787, "step": 2963 }, { "epoch": 0.42282453637660483, "grad_norm": 0.9447684288024902, "learning_rate": 9.680988792527758e-06, "loss": 0.8109, "step": 2964 }, { "epoch": 0.42296718972895864, "grad_norm": 1.0396854877471924, "learning_rate": 9.680718089638594e-06, "loss": 0.8199, "step": 2965 }, { "epoch": 0.4231098430813124, "grad_norm": 0.913512647151947, "learning_rate": 9.680447275731156e-06, "loss": 0.7568, "step": 2966 }, { "epoch": 0.4232524964336662, "grad_norm": 0.9895064830780029, "learning_rate": 9.680176350811867e-06, "loss": 0.8481, "step": 2967 }, { "epoch": 0.42339514978602, "grad_norm": 0.8953961133956909, "learning_rate": 9.679905314887152e-06, "loss": 0.8444, "step": 2968 }, { "epoch": 0.42353780313837375, "grad_norm": 0.88303142786026, "learning_rate": 9.67963416796344e-06, "loss": 0.7658, "step": 2969 }, { "epoch": 0.42368045649072755, "grad_norm": 1.0362825393676758, "learning_rate": 9.67936291004716e-06, "loss": 0.8038, "step": 2970 }, { "epoch": 0.4238231098430813, "grad_norm": 0.9750179052352905, "learning_rate": 9.67909154114475e-06, "loss": 0.8254, "step": 2971 }, { "epoch": 0.4239657631954351, "grad_norm": 1.0256450176239014, "learning_rate": 9.678820061262643e-06, "loss": 0.7433, "step": 2972 }, { "epoch": 0.42410841654778886, "grad_norm": 0.7564718723297119, "learning_rate": 9.678548470407281e-06, "loss": 0.8129, "step": 2973 }, { "epoch": 0.42425106990014266, "grad_norm": 1.1093556880950928, "learning_rate": 9.678276768585102e-06, "loss": 0.7977, "step": 2974 }, { "epoch": 0.4243937232524964, "grad_norm": 0.9291697144508362, "learning_rate": 9.678004955802552e-06, "loss": 0.7628, "step": 2975 }, { "epoch": 0.4245363766048502, "grad_norm": 0.9266040325164795, "learning_rate": 9.67773303206608e-06, "loss": 0.8135, "step": 2976 }, { "epoch": 0.42467902995720397, "grad_norm": 0.8013978600502014, "learning_rate": 9.677460997382131e-06, "loss": 0.7634, "step": 2977 }, { "epoch": 0.4248216833095578, "grad_norm": 0.8356562852859497, "learning_rate": 9.677188851757162e-06, "loss": 0.7233, "step": 2978 }, { "epoch": 0.4249643366619116, "grad_norm": 0.9609328508377075, "learning_rate": 9.676916595197624e-06, "loss": 0.7878, "step": 2979 }, { "epoch": 0.42510699001426533, "grad_norm": 0.7560855746269226, "learning_rate": 9.676644227709975e-06, "loss": 0.8273, "step": 2980 }, { "epoch": 0.42524964336661913, "grad_norm": 0.8932199478149414, "learning_rate": 9.676371749300678e-06, "loss": 0.8321, "step": 2981 }, { "epoch": 0.4253922967189729, "grad_norm": 0.8932722806930542, "learning_rate": 9.676099159976193e-06, "loss": 0.7603, "step": 2982 }, { "epoch": 0.4255349500713267, "grad_norm": 1.1066131591796875, "learning_rate": 9.675826459742984e-06, "loss": 0.8208, "step": 2983 }, { "epoch": 0.42567760342368044, "grad_norm": 0.8241662979125977, "learning_rate": 9.675553648607525e-06, "loss": 0.8354, "step": 2984 }, { "epoch": 0.42582025677603424, "grad_norm": 0.87542325258255, "learning_rate": 9.675280726576282e-06, "loss": 0.769, "step": 2985 }, { "epoch": 0.425962910128388, "grad_norm": 0.7184815406799316, "learning_rate": 9.675007693655727e-06, "loss": 0.7796, "step": 2986 }, { "epoch": 0.4261055634807418, "grad_norm": 0.8368867635726929, "learning_rate": 9.674734549852338e-06, "loss": 0.7569, "step": 2987 }, { "epoch": 0.42624821683309555, "grad_norm": 0.8585637807846069, "learning_rate": 9.674461295172593e-06, "loss": 0.7845, "step": 2988 }, { "epoch": 0.42639087018544936, "grad_norm": 1.1723369359970093, "learning_rate": 9.674187929622973e-06, "loss": 0.749, "step": 2989 }, { "epoch": 0.42653352353780316, "grad_norm": 0.8972938656806946, "learning_rate": 9.673914453209963e-06, "loss": 0.7623, "step": 2990 }, { "epoch": 0.4266761768901569, "grad_norm": 0.7342985272407532, "learning_rate": 9.673640865940049e-06, "loss": 0.648, "step": 2991 }, { "epoch": 0.4268188302425107, "grad_norm": 0.852169394493103, "learning_rate": 9.673367167819718e-06, "loss": 0.817, "step": 2992 }, { "epoch": 0.42696148359486447, "grad_norm": 0.9238767623901367, "learning_rate": 9.673093358855464e-06, "loss": 0.7707, "step": 2993 }, { "epoch": 0.42710413694721827, "grad_norm": 1.145296335220337, "learning_rate": 9.67281943905378e-06, "loss": 0.7956, "step": 2994 }, { "epoch": 0.427246790299572, "grad_norm": 0.7868570685386658, "learning_rate": 9.672545408421163e-06, "loss": 0.7725, "step": 2995 }, { "epoch": 0.4273894436519258, "grad_norm": 0.813515841960907, "learning_rate": 9.672271266964114e-06, "loss": 0.7757, "step": 2996 }, { "epoch": 0.4275320970042796, "grad_norm": 0.8881155848503113, "learning_rate": 9.671997014689132e-06, "loss": 0.6963, "step": 2997 }, { "epoch": 0.4276747503566334, "grad_norm": 0.9524695873260498, "learning_rate": 9.671722651602723e-06, "loss": 0.8461, "step": 2998 }, { "epoch": 0.4278174037089872, "grad_norm": 0.9918982982635498, "learning_rate": 9.671448177711397e-06, "loss": 0.7447, "step": 2999 }, { "epoch": 0.42796005706134094, "grad_norm": 1.121673345565796, "learning_rate": 9.67117359302166e-06, "loss": 0.757, "step": 3000 }, { "epoch": 0.42810271041369474, "grad_norm": 0.9098827242851257, "learning_rate": 9.670898897540028e-06, "loss": 0.7916, "step": 3001 }, { "epoch": 0.4282453637660485, "grad_norm": 1.1406325101852417, "learning_rate": 9.670624091273015e-06, "loss": 0.6309, "step": 3002 }, { "epoch": 0.4283880171184023, "grad_norm": 0.8661484718322754, "learning_rate": 9.67034917422714e-06, "loss": 0.7726, "step": 3003 }, { "epoch": 0.42853067047075605, "grad_norm": 1.3654043674468994, "learning_rate": 9.670074146408921e-06, "loss": 0.7237, "step": 3004 }, { "epoch": 0.42867332382310985, "grad_norm": 0.8865222334861755, "learning_rate": 9.669799007824883e-06, "loss": 0.7603, "step": 3005 }, { "epoch": 0.4288159771754636, "grad_norm": 0.8929343223571777, "learning_rate": 9.669523758481552e-06, "loss": 0.718, "step": 3006 }, { "epoch": 0.4289586305278174, "grad_norm": 0.904663622379303, "learning_rate": 9.669248398385455e-06, "loss": 0.6883, "step": 3007 }, { "epoch": 0.42910128388017116, "grad_norm": 0.8316035866737366, "learning_rate": 9.668972927543124e-06, "loss": 0.7782, "step": 3008 }, { "epoch": 0.42924393723252496, "grad_norm": 1.2531659603118896, "learning_rate": 9.668697345961093e-06, "loss": 0.8153, "step": 3009 }, { "epoch": 0.42938659058487877, "grad_norm": 0.7125189900398254, "learning_rate": 9.668421653645897e-06, "loss": 0.7405, "step": 3010 }, { "epoch": 0.4295292439372325, "grad_norm": 0.9366601705551147, "learning_rate": 9.668145850604077e-06, "loss": 0.8485, "step": 3011 }, { "epoch": 0.4296718972895863, "grad_norm": 1.0495223999023438, "learning_rate": 9.667869936842173e-06, "loss": 0.6174, "step": 3012 }, { "epoch": 0.4298145506419401, "grad_norm": 0.9196107983589172, "learning_rate": 9.66759391236673e-06, "loss": 0.8012, "step": 3013 }, { "epoch": 0.4299572039942939, "grad_norm": 1.0751285552978516, "learning_rate": 9.667317777184293e-06, "loss": 0.7651, "step": 3014 }, { "epoch": 0.43009985734664763, "grad_norm": 1.247733235359192, "learning_rate": 9.667041531301412e-06, "loss": 0.7583, "step": 3015 }, { "epoch": 0.43024251069900143, "grad_norm": 1.0579503774642944, "learning_rate": 9.666765174724641e-06, "loss": 0.7364, "step": 3016 }, { "epoch": 0.4303851640513552, "grad_norm": 0.7801899909973145, "learning_rate": 9.666488707460536e-06, "loss": 0.7744, "step": 3017 }, { "epoch": 0.430527817403709, "grad_norm": 1.1810188293457031, "learning_rate": 9.66621212951565e-06, "loss": 0.8798, "step": 3018 }, { "epoch": 0.43067047075606274, "grad_norm": 0.9429445862770081, "learning_rate": 9.665935440896544e-06, "loss": 0.8093, "step": 3019 }, { "epoch": 0.43081312410841655, "grad_norm": 0.8942497372627258, "learning_rate": 9.665658641609781e-06, "loss": 0.7485, "step": 3020 }, { "epoch": 0.43095577746077035, "grad_norm": 0.9179964661598206, "learning_rate": 9.665381731661927e-06, "loss": 0.8204, "step": 3021 }, { "epoch": 0.4310984308131241, "grad_norm": 0.8470039963722229, "learning_rate": 9.66510471105955e-06, "loss": 0.7596, "step": 3022 }, { "epoch": 0.4312410841654779, "grad_norm": 1.2013462781906128, "learning_rate": 9.664827579809218e-06, "loss": 0.7973, "step": 3023 }, { "epoch": 0.43138373751783166, "grad_norm": 1.0752782821655273, "learning_rate": 9.664550337917508e-06, "loss": 0.7089, "step": 3024 }, { "epoch": 0.43152639087018546, "grad_norm": 0.9532427191734314, "learning_rate": 9.664272985390992e-06, "loss": 0.7647, "step": 3025 }, { "epoch": 0.4316690442225392, "grad_norm": 1.0374795198440552, "learning_rate": 9.66399552223625e-06, "loss": 0.7886, "step": 3026 }, { "epoch": 0.431811697574893, "grad_norm": 0.8640323877334595, "learning_rate": 9.663717948459862e-06, "loss": 0.7641, "step": 3027 }, { "epoch": 0.43195435092724677, "grad_norm": 0.9225583672523499, "learning_rate": 9.663440264068416e-06, "loss": 0.8084, "step": 3028 }, { "epoch": 0.43209700427960057, "grad_norm": 1.0404295921325684, "learning_rate": 9.663162469068491e-06, "loss": 0.7242, "step": 3029 }, { "epoch": 0.4322396576319544, "grad_norm": 0.7711402773857117, "learning_rate": 9.66288456346668e-06, "loss": 0.746, "step": 3030 }, { "epoch": 0.4323823109843081, "grad_norm": 0.9285078644752502, "learning_rate": 9.662606547269574e-06, "loss": 0.7943, "step": 3031 }, { "epoch": 0.43252496433666193, "grad_norm": 0.7582229375839233, "learning_rate": 9.662328420483766e-06, "loss": 0.7592, "step": 3032 }, { "epoch": 0.4326676176890157, "grad_norm": 1.0217987298965454, "learning_rate": 9.662050183115853e-06, "loss": 0.7232, "step": 3033 }, { "epoch": 0.4328102710413695, "grad_norm": 0.9547514319419861, "learning_rate": 9.661771835172438e-06, "loss": 0.7271, "step": 3034 }, { "epoch": 0.43295292439372324, "grad_norm": 0.6787132024765015, "learning_rate": 9.661493376660117e-06, "loss": 0.7799, "step": 3035 }, { "epoch": 0.43309557774607704, "grad_norm": 0.8108693957328796, "learning_rate": 9.661214807585498e-06, "loss": 0.7701, "step": 3036 }, { "epoch": 0.4332382310984308, "grad_norm": 1.157433271408081, "learning_rate": 9.660936127955185e-06, "loss": 0.803, "step": 3037 }, { "epoch": 0.4333808844507846, "grad_norm": 0.7043929696083069, "learning_rate": 9.660657337775792e-06, "loss": 0.7616, "step": 3038 }, { "epoch": 0.43352353780313835, "grad_norm": 0.9452171325683594, "learning_rate": 9.66037843705393e-06, "loss": 0.759, "step": 3039 }, { "epoch": 0.43366619115549215, "grad_norm": 0.9995279908180237, "learning_rate": 9.660099425796212e-06, "loss": 0.8103, "step": 3040 }, { "epoch": 0.43380884450784596, "grad_norm": 0.9892847537994385, "learning_rate": 9.659820304009259e-06, "loss": 0.8005, "step": 3041 }, { "epoch": 0.4339514978601997, "grad_norm": 0.7565533518791199, "learning_rate": 9.659541071699689e-06, "loss": 0.7973, "step": 3042 }, { "epoch": 0.4340941512125535, "grad_norm": 0.8700818419456482, "learning_rate": 9.659261728874124e-06, "loss": 0.8226, "step": 3043 }, { "epoch": 0.43423680456490726, "grad_norm": 0.9775557518005371, "learning_rate": 9.658982275539192e-06, "loss": 0.8062, "step": 3044 }, { "epoch": 0.43437945791726107, "grad_norm": 0.7708172798156738, "learning_rate": 9.65870271170152e-06, "loss": 0.8032, "step": 3045 }, { "epoch": 0.4345221112696148, "grad_norm": 1.0917460918426514, "learning_rate": 9.65842303736774e-06, "loss": 0.7665, "step": 3046 }, { "epoch": 0.4346647646219686, "grad_norm": 0.7986547946929932, "learning_rate": 9.65814325254448e-06, "loss": 0.7897, "step": 3047 }, { "epoch": 0.4348074179743224, "grad_norm": 0.7651693820953369, "learning_rate": 9.657863357238383e-06, "loss": 0.8331, "step": 3048 }, { "epoch": 0.4349500713266762, "grad_norm": 0.9188210964202881, "learning_rate": 9.657583351456087e-06, "loss": 0.8155, "step": 3049 }, { "epoch": 0.43509272467902993, "grad_norm": 1.0141257047653198, "learning_rate": 9.657303235204225e-06, "loss": 0.8118, "step": 3050 }, { "epoch": 0.43523537803138374, "grad_norm": 0.786700963973999, "learning_rate": 9.657023008489453e-06, "loss": 0.7622, "step": 3051 }, { "epoch": 0.43537803138373754, "grad_norm": 0.846847653388977, "learning_rate": 9.656742671318407e-06, "loss": 0.7399, "step": 3052 }, { "epoch": 0.4355206847360913, "grad_norm": 0.9744015336036682, "learning_rate": 9.656462223697743e-06, "loss": 0.8088, "step": 3053 }, { "epoch": 0.4356633380884451, "grad_norm": 1.167312741279602, "learning_rate": 9.656181665634108e-06, "loss": 0.5903, "step": 3054 }, { "epoch": 0.43580599144079885, "grad_norm": 0.9063330888748169, "learning_rate": 9.65590099713416e-06, "loss": 0.8071, "step": 3055 }, { "epoch": 0.43594864479315265, "grad_norm": 1.0339521169662476, "learning_rate": 9.655620218204553e-06, "loss": 0.7904, "step": 3056 }, { "epoch": 0.4360912981455064, "grad_norm": 0.7241429686546326, "learning_rate": 9.65533932885195e-06, "loss": 0.7982, "step": 3057 }, { "epoch": 0.4362339514978602, "grad_norm": 0.8558134436607361, "learning_rate": 9.65505832908301e-06, "loss": 0.8195, "step": 3058 }, { "epoch": 0.43637660485021396, "grad_norm": 0.9787079095840454, "learning_rate": 9.654777218904398e-06, "loss": 0.7268, "step": 3059 }, { "epoch": 0.43651925820256776, "grad_norm": 0.8959437012672424, "learning_rate": 9.654495998322785e-06, "loss": 0.8182, "step": 3060 }, { "epoch": 0.43666191155492157, "grad_norm": 0.842130720615387, "learning_rate": 9.654214667344836e-06, "loss": 0.8604, "step": 3061 }, { "epoch": 0.4368045649072753, "grad_norm": 0.924280047416687, "learning_rate": 9.653933225977228e-06, "loss": 0.7298, "step": 3062 }, { "epoch": 0.4369472182596291, "grad_norm": 1.0871145725250244, "learning_rate": 9.653651674226633e-06, "loss": 0.8445, "step": 3063 }, { "epoch": 0.43708987161198287, "grad_norm": 1.035857915878296, "learning_rate": 9.653370012099731e-06, "loss": 0.8529, "step": 3064 }, { "epoch": 0.4372325249643367, "grad_norm": 0.8568934202194214, "learning_rate": 9.653088239603203e-06, "loss": 0.7676, "step": 3065 }, { "epoch": 0.4373751783166904, "grad_norm": 1.0932868719100952, "learning_rate": 9.65280635674373e-06, "loss": 0.8164, "step": 3066 }, { "epoch": 0.43751783166904423, "grad_norm": 0.7900270819664001, "learning_rate": 9.652524363527998e-06, "loss": 0.7831, "step": 3067 }, { "epoch": 0.437660485021398, "grad_norm": 0.7176871299743652, "learning_rate": 9.652242259962696e-06, "loss": 0.7843, "step": 3068 }, { "epoch": 0.4378031383737518, "grad_norm": 0.677873432636261, "learning_rate": 9.651960046054515e-06, "loss": 0.8556, "step": 3069 }, { "epoch": 0.43794579172610554, "grad_norm": 0.9160250425338745, "learning_rate": 9.65167772181015e-06, "loss": 0.7274, "step": 3070 }, { "epoch": 0.43808844507845934, "grad_norm": 0.6624865531921387, "learning_rate": 9.651395287236296e-06, "loss": 0.7651, "step": 3071 }, { "epoch": 0.43823109843081315, "grad_norm": 1.0936319828033447, "learning_rate": 9.651112742339652e-06, "loss": 0.7677, "step": 3072 }, { "epoch": 0.4383737517831669, "grad_norm": 0.7446964979171753, "learning_rate": 9.650830087126918e-06, "loss": 0.8186, "step": 3073 }, { "epoch": 0.4385164051355207, "grad_norm": 0.8047989010810852, "learning_rate": 9.6505473216048e-06, "loss": 0.7856, "step": 3074 }, { "epoch": 0.43865905848787445, "grad_norm": 0.8886755704879761, "learning_rate": 9.650264445780004e-06, "loss": 0.687, "step": 3075 }, { "epoch": 0.43880171184022826, "grad_norm": 0.8686378598213196, "learning_rate": 9.64998145965924e-06, "loss": 0.7465, "step": 3076 }, { "epoch": 0.438944365192582, "grad_norm": 0.746656596660614, "learning_rate": 9.649698363249218e-06, "loss": 0.8124, "step": 3077 }, { "epoch": 0.4390870185449358, "grad_norm": 0.9459176659584045, "learning_rate": 9.649415156556655e-06, "loss": 0.6717, "step": 3078 }, { "epoch": 0.43922967189728956, "grad_norm": 1.091217041015625, "learning_rate": 9.649131839588267e-06, "loss": 0.8181, "step": 3079 }, { "epoch": 0.43937232524964337, "grad_norm": 0.8017871975898743, "learning_rate": 9.648848412350771e-06, "loss": 0.7779, "step": 3080 }, { "epoch": 0.4395149786019971, "grad_norm": 0.6447712182998657, "learning_rate": 9.648564874850896e-06, "loss": 0.5722, "step": 3081 }, { "epoch": 0.4396576319543509, "grad_norm": 0.8549794554710388, "learning_rate": 9.648281227095361e-06, "loss": 0.7929, "step": 3082 }, { "epoch": 0.43980028530670473, "grad_norm": 0.8107016682624817, "learning_rate": 9.647997469090897e-06, "loss": 0.8119, "step": 3083 }, { "epoch": 0.4399429386590585, "grad_norm": 1.0441479682922363, "learning_rate": 9.64771360084423e-06, "loss": 0.583, "step": 3084 }, { "epoch": 0.4400855920114123, "grad_norm": 0.9004194140434265, "learning_rate": 9.6474296223621e-06, "loss": 0.7648, "step": 3085 }, { "epoch": 0.44022824536376604, "grad_norm": 1.1389235258102417, "learning_rate": 9.647145533651236e-06, "loss": 0.58, "step": 3086 }, { "epoch": 0.44037089871611984, "grad_norm": 0.9744426012039185, "learning_rate": 9.646861334718378e-06, "loss": 0.7146, "step": 3087 }, { "epoch": 0.4405135520684736, "grad_norm": 0.8823460340499878, "learning_rate": 9.646577025570267e-06, "loss": 0.8082, "step": 3088 }, { "epoch": 0.4406562054208274, "grad_norm": 0.8189143538475037, "learning_rate": 9.646292606213648e-06, "loss": 0.7566, "step": 3089 }, { "epoch": 0.44079885877318115, "grad_norm": 0.8790888786315918, "learning_rate": 9.646008076655264e-06, "loss": 0.7985, "step": 3090 }, { "epoch": 0.44094151212553495, "grad_norm": 0.896385908126831, "learning_rate": 9.645723436901865e-06, "loss": 0.7181, "step": 3091 }, { "epoch": 0.44108416547788876, "grad_norm": 0.8352712392807007, "learning_rate": 9.645438686960202e-06, "loss": 0.8581, "step": 3092 }, { "epoch": 0.4412268188302425, "grad_norm": 0.9876428842544556, "learning_rate": 9.645153826837029e-06, "loss": 0.7531, "step": 3093 }, { "epoch": 0.4413694721825963, "grad_norm": 0.9629712104797363, "learning_rate": 9.644868856539102e-06, "loss": 0.7679, "step": 3094 }, { "epoch": 0.44151212553495006, "grad_norm": 0.8564965724945068, "learning_rate": 9.64458377607318e-06, "loss": 0.7726, "step": 3095 }, { "epoch": 0.44165477888730387, "grad_norm": 0.964055061340332, "learning_rate": 9.644298585446025e-06, "loss": 0.722, "step": 3096 }, { "epoch": 0.4417974322396576, "grad_norm": 1.0898507833480835, "learning_rate": 9.6440132846644e-06, "loss": 0.8075, "step": 3097 }, { "epoch": 0.4419400855920114, "grad_norm": 0.8865700960159302, "learning_rate": 9.643727873735074e-06, "loss": 0.75, "step": 3098 }, { "epoch": 0.4420827389443652, "grad_norm": 0.9955110549926758, "learning_rate": 9.643442352664815e-06, "loss": 0.7279, "step": 3099 }, { "epoch": 0.442225392296719, "grad_norm": 0.8635203242301941, "learning_rate": 9.643156721460393e-06, "loss": 0.7342, "step": 3100 }, { "epoch": 0.4423680456490727, "grad_norm": 0.7685550451278687, "learning_rate": 9.642870980128588e-06, "loss": 0.824, "step": 3101 }, { "epoch": 0.44251069900142653, "grad_norm": 1.062356948852539, "learning_rate": 9.642585128676172e-06, "loss": 0.7089, "step": 3102 }, { "epoch": 0.44265335235378034, "grad_norm": 0.958548903465271, "learning_rate": 9.642299167109929e-06, "loss": 0.8108, "step": 3103 }, { "epoch": 0.4427960057061341, "grad_norm": 1.234354019165039, "learning_rate": 9.642013095436638e-06, "loss": 0.8021, "step": 3104 }, { "epoch": 0.4429386590584879, "grad_norm": 0.7994375824928284, "learning_rate": 9.641726913663085e-06, "loss": 0.7735, "step": 3105 }, { "epoch": 0.44308131241084164, "grad_norm": 0.8465738296508789, "learning_rate": 9.641440621796061e-06, "loss": 0.7941, "step": 3106 }, { "epoch": 0.44322396576319545, "grad_norm": 0.9791854619979858, "learning_rate": 9.64115421984235e-06, "loss": 0.8316, "step": 3107 }, { "epoch": 0.4433666191155492, "grad_norm": 0.735660195350647, "learning_rate": 9.640867707808751e-06, "loss": 0.7989, "step": 3108 }, { "epoch": 0.443509272467903, "grad_norm": 0.7800251841545105, "learning_rate": 9.640581085702059e-06, "loss": 0.7541, "step": 3109 }, { "epoch": 0.44365192582025675, "grad_norm": 1.094420075416565, "learning_rate": 9.640294353529067e-06, "loss": 0.7858, "step": 3110 }, { "epoch": 0.44379457917261056, "grad_norm": 1.032516360282898, "learning_rate": 9.64000751129658e-06, "loss": 0.7806, "step": 3111 }, { "epoch": 0.4439372325249643, "grad_norm": 0.7042543292045593, "learning_rate": 9.639720559011401e-06, "loss": 0.7736, "step": 3112 }, { "epoch": 0.4440798858773181, "grad_norm": 0.821504533290863, "learning_rate": 9.639433496680337e-06, "loss": 0.8189, "step": 3113 }, { "epoch": 0.4442225392296719, "grad_norm": 0.7929046154022217, "learning_rate": 9.639146324310192e-06, "loss": 0.7493, "step": 3114 }, { "epoch": 0.44436519258202567, "grad_norm": 0.8610069155693054, "learning_rate": 9.638859041907781e-06, "loss": 0.7907, "step": 3115 }, { "epoch": 0.4445078459343795, "grad_norm": 1.0897170305252075, "learning_rate": 9.638571649479919e-06, "loss": 0.7437, "step": 3116 }, { "epoch": 0.4446504992867332, "grad_norm": 0.998016893863678, "learning_rate": 9.63828414703342e-06, "loss": 0.6941, "step": 3117 }, { "epoch": 0.44479315263908703, "grad_norm": 0.7024821639060974, "learning_rate": 9.637996534575103e-06, "loss": 0.8237, "step": 3118 }, { "epoch": 0.4449358059914408, "grad_norm": 0.9574607610702515, "learning_rate": 9.63770881211179e-06, "loss": 0.7375, "step": 3119 }, { "epoch": 0.4450784593437946, "grad_norm": 0.8610005974769592, "learning_rate": 9.637420979650307e-06, "loss": 0.7895, "step": 3120 }, { "epoch": 0.44522111269614834, "grad_norm": 0.8034377694129944, "learning_rate": 9.637133037197479e-06, "loss": 0.8204, "step": 3121 }, { "epoch": 0.44536376604850214, "grad_norm": 0.7363777160644531, "learning_rate": 9.636844984760134e-06, "loss": 0.8248, "step": 3122 }, { "epoch": 0.44550641940085595, "grad_norm": 1.3569444417953491, "learning_rate": 9.63655682234511e-06, "loss": 0.802, "step": 3123 }, { "epoch": 0.4456490727532097, "grad_norm": 0.9390245079994202, "learning_rate": 9.636268549959233e-06, "loss": 0.7577, "step": 3124 }, { "epoch": 0.4457917261055635, "grad_norm": 0.8765220046043396, "learning_rate": 9.635980167609347e-06, "loss": 0.8271, "step": 3125 }, { "epoch": 0.44593437945791725, "grad_norm": 0.693592369556427, "learning_rate": 9.63569167530229e-06, "loss": 0.7494, "step": 3126 }, { "epoch": 0.44607703281027106, "grad_norm": 0.8288514018058777, "learning_rate": 9.635403073044905e-06, "loss": 0.825, "step": 3127 }, { "epoch": 0.4462196861626248, "grad_norm": 0.8633185625076294, "learning_rate": 9.635114360844035e-06, "loss": 0.7698, "step": 3128 }, { "epoch": 0.4463623395149786, "grad_norm": 0.9909041523933411, "learning_rate": 9.634825538706532e-06, "loss": 0.7964, "step": 3129 }, { "epoch": 0.44650499286733236, "grad_norm": 1.0626932382583618, "learning_rate": 9.634536606639243e-06, "loss": 0.7989, "step": 3130 }, { "epoch": 0.44664764621968617, "grad_norm": 0.6464933753013611, "learning_rate": 9.634247564649019e-06, "loss": 0.8357, "step": 3131 }, { "epoch": 0.4467902995720399, "grad_norm": 0.9832266569137573, "learning_rate": 9.63395841274272e-06, "loss": 0.7892, "step": 3132 }, { "epoch": 0.4469329529243937, "grad_norm": 0.9554382562637329, "learning_rate": 9.633669150927201e-06, "loss": 0.7939, "step": 3133 }, { "epoch": 0.44707560627674753, "grad_norm": 0.8843060731887817, "learning_rate": 9.633379779209326e-06, "loss": 0.8062, "step": 3134 }, { "epoch": 0.4472182596291013, "grad_norm": 0.9719558358192444, "learning_rate": 9.633090297595956e-06, "loss": 0.7762, "step": 3135 }, { "epoch": 0.4473609129814551, "grad_norm": 0.9557838439941406, "learning_rate": 9.632800706093956e-06, "loss": 0.8144, "step": 3136 }, { "epoch": 0.44750356633380883, "grad_norm": 1.2489206790924072, "learning_rate": 9.6325110047102e-06, "loss": 0.7602, "step": 3137 }, { "epoch": 0.44764621968616264, "grad_norm": 0.7784707546234131, "learning_rate": 9.63222119345155e-06, "loss": 0.7544, "step": 3138 }, { "epoch": 0.4477888730385164, "grad_norm": 0.7684941291809082, "learning_rate": 9.631931272324889e-06, "loss": 0.8124, "step": 3139 }, { "epoch": 0.4479315263908702, "grad_norm": 1.026983380317688, "learning_rate": 9.631641241337088e-06, "loss": 0.7457, "step": 3140 }, { "epoch": 0.44807417974322394, "grad_norm": 0.898593008518219, "learning_rate": 9.631351100495028e-06, "loss": 0.7497, "step": 3141 }, { "epoch": 0.44821683309557775, "grad_norm": 0.8573411107063293, "learning_rate": 9.63106084980559e-06, "loss": 0.788, "step": 3142 }, { "epoch": 0.4483594864479315, "grad_norm": 1.0061452388763428, "learning_rate": 9.630770489275657e-06, "loss": 0.8013, "step": 3143 }, { "epoch": 0.4485021398002853, "grad_norm": 1.1630775928497314, "learning_rate": 9.630480018912119e-06, "loss": 0.7759, "step": 3144 }, { "epoch": 0.4486447931526391, "grad_norm": 0.8170731067657471, "learning_rate": 9.630189438721862e-06, "loss": 0.7687, "step": 3145 }, { "epoch": 0.44878744650499286, "grad_norm": 0.9104035496711731, "learning_rate": 9.62989874871178e-06, "loss": 0.7778, "step": 3146 }, { "epoch": 0.44893009985734667, "grad_norm": 0.9890031814575195, "learning_rate": 9.629607948888769e-06, "loss": 0.7919, "step": 3147 }, { "epoch": 0.4490727532097004, "grad_norm": 0.8792649507522583, "learning_rate": 9.629317039259722e-06, "loss": 0.8439, "step": 3148 }, { "epoch": 0.4492154065620542, "grad_norm": 0.7971907258033752, "learning_rate": 9.629026019831545e-06, "loss": 0.7737, "step": 3149 }, { "epoch": 0.44935805991440797, "grad_norm": 1.1017431020736694, "learning_rate": 9.628734890611133e-06, "loss": 0.7723, "step": 3150 }, { "epoch": 0.4495007132667618, "grad_norm": 0.8510836362838745, "learning_rate": 9.628443651605397e-06, "loss": 0.6911, "step": 3151 }, { "epoch": 0.4496433666191155, "grad_norm": 0.6971721053123474, "learning_rate": 9.628152302821242e-06, "loss": 0.7196, "step": 3152 }, { "epoch": 0.44978601997146933, "grad_norm": 0.9653314352035522, "learning_rate": 9.62786084426558e-06, "loss": 0.7898, "step": 3153 }, { "epoch": 0.44992867332382314, "grad_norm": 1.0043598413467407, "learning_rate": 9.62756927594532e-06, "loss": 0.6401, "step": 3154 }, { "epoch": 0.4500713266761769, "grad_norm": 1.0867184400558472, "learning_rate": 9.627277597867383e-06, "loss": 0.7209, "step": 3155 }, { "epoch": 0.4502139800285307, "grad_norm": 0.8640044331550598, "learning_rate": 9.626985810038685e-06, "loss": 0.7667, "step": 3156 }, { "epoch": 0.45035663338088444, "grad_norm": 0.6708057522773743, "learning_rate": 9.626693912466142e-06, "loss": 0.7246, "step": 3157 }, { "epoch": 0.45049928673323825, "grad_norm": 0.9218789935112, "learning_rate": 9.626401905156686e-06, "loss": 0.7633, "step": 3158 }, { "epoch": 0.450641940085592, "grad_norm": 0.7658910751342773, "learning_rate": 9.626109788117236e-06, "loss": 0.7817, "step": 3159 }, { "epoch": 0.4507845934379458, "grad_norm": 0.9468095898628235, "learning_rate": 9.625817561354723e-06, "loss": 0.7421, "step": 3160 }, { "epoch": 0.45092724679029955, "grad_norm": 0.753380298614502, "learning_rate": 9.625525224876076e-06, "loss": 0.7626, "step": 3161 }, { "epoch": 0.45106990014265336, "grad_norm": 0.9808356165885925, "learning_rate": 9.625232778688232e-06, "loss": 0.7686, "step": 3162 }, { "epoch": 0.4512125534950071, "grad_norm": 0.953310489654541, "learning_rate": 9.624940222798126e-06, "loss": 0.7573, "step": 3163 }, { "epoch": 0.4513552068473609, "grad_norm": 1.0124878883361816, "learning_rate": 9.624647557212696e-06, "loss": 0.7834, "step": 3164 }, { "epoch": 0.4514978601997147, "grad_norm": 0.9995359182357788, "learning_rate": 9.624354781938885e-06, "loss": 0.764, "step": 3165 }, { "epoch": 0.45164051355206847, "grad_norm": 1.0033024549484253, "learning_rate": 9.624061896983637e-06, "loss": 0.7917, "step": 3166 }, { "epoch": 0.4517831669044223, "grad_norm": 1.0837607383728027, "learning_rate": 9.623768902353896e-06, "loss": 0.7303, "step": 3167 }, { "epoch": 0.451925820256776, "grad_norm": 1.1728651523590088, "learning_rate": 9.623475798056615e-06, "loss": 0.8236, "step": 3168 }, { "epoch": 0.45206847360912983, "grad_norm": 0.7436525821685791, "learning_rate": 9.623182584098744e-06, "loss": 0.8001, "step": 3169 }, { "epoch": 0.4522111269614836, "grad_norm": 0.9537336826324463, "learning_rate": 9.622889260487236e-06, "loss": 0.7598, "step": 3170 }, { "epoch": 0.4523537803138374, "grad_norm": 1.2444756031036377, "learning_rate": 9.622595827229052e-06, "loss": 0.7903, "step": 3171 }, { "epoch": 0.45249643366619113, "grad_norm": 0.8312756419181824, "learning_rate": 9.622302284331149e-06, "loss": 0.8249, "step": 3172 }, { "epoch": 0.45263908701854494, "grad_norm": 0.6423664093017578, "learning_rate": 9.62200863180049e-06, "loss": 0.8271, "step": 3173 }, { "epoch": 0.4527817403708987, "grad_norm": 0.815904974937439, "learning_rate": 9.621714869644039e-06, "loss": 0.7393, "step": 3174 }, { "epoch": 0.4529243937232525, "grad_norm": 0.9608733654022217, "learning_rate": 9.621420997868765e-06, "loss": 0.7226, "step": 3175 }, { "epoch": 0.4530670470756063, "grad_norm": 1.05320405960083, "learning_rate": 9.621127016481637e-06, "loss": 0.7528, "step": 3176 }, { "epoch": 0.45320970042796005, "grad_norm": 1.0415343046188354, "learning_rate": 9.620832925489628e-06, "loss": 0.8105, "step": 3177 }, { "epoch": 0.45335235378031385, "grad_norm": 0.6745753884315491, "learning_rate": 9.620538724899713e-06, "loss": 0.7592, "step": 3178 }, { "epoch": 0.4534950071326676, "grad_norm": 0.7603384852409363, "learning_rate": 9.620244414718873e-06, "loss": 0.7451, "step": 3179 }, { "epoch": 0.4536376604850214, "grad_norm": 1.0224984884262085, "learning_rate": 9.619949994954083e-06, "loss": 0.8243, "step": 3180 }, { "epoch": 0.45378031383737516, "grad_norm": 1.0537947416305542, "learning_rate": 9.61965546561233e-06, "loss": 0.7938, "step": 3181 }, { "epoch": 0.45392296718972897, "grad_norm": 1.0355217456817627, "learning_rate": 9.6193608267006e-06, "loss": 0.7635, "step": 3182 }, { "epoch": 0.4540656205420827, "grad_norm": 1.05327308177948, "learning_rate": 9.619066078225879e-06, "loss": 0.7105, "step": 3183 }, { "epoch": 0.4542082738944365, "grad_norm": 0.8358237743377686, "learning_rate": 9.618771220195158e-06, "loss": 0.7776, "step": 3184 }, { "epoch": 0.4543509272467903, "grad_norm": 0.8938114047050476, "learning_rate": 9.618476252615432e-06, "loss": 0.8192, "step": 3185 }, { "epoch": 0.4544935805991441, "grad_norm": 0.8601402044296265, "learning_rate": 9.618181175493695e-06, "loss": 0.7887, "step": 3186 }, { "epoch": 0.4546362339514979, "grad_norm": 0.9431390166282654, "learning_rate": 9.617885988836951e-06, "loss": 0.772, "step": 3187 }, { "epoch": 0.45477888730385163, "grad_norm": 1.0409581661224365, "learning_rate": 9.617590692652194e-06, "loss": 0.8162, "step": 3188 }, { "epoch": 0.45492154065620544, "grad_norm": 1.045816421508789, "learning_rate": 9.617295286946433e-06, "loss": 0.8299, "step": 3189 }, { "epoch": 0.4550641940085592, "grad_norm": 1.2533879280090332, "learning_rate": 9.616999771726672e-06, "loss": 0.7699, "step": 3190 }, { "epoch": 0.455206847360913, "grad_norm": 0.8418307304382324, "learning_rate": 9.616704146999922e-06, "loss": 0.8287, "step": 3191 }, { "epoch": 0.45534950071326674, "grad_norm": 0.9584277868270874, "learning_rate": 9.616408412773192e-06, "loss": 0.7418, "step": 3192 }, { "epoch": 0.45549215406562055, "grad_norm": 0.9167273044586182, "learning_rate": 9.6161125690535e-06, "loss": 0.8158, "step": 3193 }, { "epoch": 0.4556348074179743, "grad_norm": 1.0698728561401367, "learning_rate": 9.61581661584786e-06, "loss": 0.8805, "step": 3194 }, { "epoch": 0.4557774607703281, "grad_norm": 1.150191068649292, "learning_rate": 9.615520553163294e-06, "loss": 0.8711, "step": 3195 }, { "epoch": 0.4559201141226819, "grad_norm": 0.6739160418510437, "learning_rate": 9.61522438100682e-06, "loss": 0.8376, "step": 3196 }, { "epoch": 0.45606276747503566, "grad_norm": 0.6858443021774292, "learning_rate": 9.614928099385467e-06, "loss": 0.8262, "step": 3197 }, { "epoch": 0.45620542082738946, "grad_norm": 0.7538442611694336, "learning_rate": 9.61463170830626e-06, "loss": 0.7978, "step": 3198 }, { "epoch": 0.4563480741797432, "grad_norm": 0.9261438250541687, "learning_rate": 9.61433520777623e-06, "loss": 0.7524, "step": 3199 }, { "epoch": 0.456490727532097, "grad_norm": 0.9965091347694397, "learning_rate": 9.614038597802405e-06, "loss": 0.7287, "step": 3200 }, { "epoch": 0.45663338088445077, "grad_norm": 1.0924919843673706, "learning_rate": 9.613741878391826e-06, "loss": 0.7976, "step": 3201 }, { "epoch": 0.4567760342368046, "grad_norm": 0.9510096907615662, "learning_rate": 9.613445049551529e-06, "loss": 0.8361, "step": 3202 }, { "epoch": 0.4569186875891583, "grad_norm": 0.7954645752906799, "learning_rate": 9.613148111288555e-06, "loss": 0.7906, "step": 3203 }, { "epoch": 0.45706134094151213, "grad_norm": 0.7574803233146667, "learning_rate": 9.612851063609943e-06, "loss": 0.765, "step": 3204 }, { "epoch": 0.4572039942938659, "grad_norm": 0.9515501260757446, "learning_rate": 9.61255390652274e-06, "loss": 0.6484, "step": 3205 }, { "epoch": 0.4573466476462197, "grad_norm": 0.872578501701355, "learning_rate": 9.612256640034e-06, "loss": 0.7244, "step": 3206 }, { "epoch": 0.4574893009985735, "grad_norm": 0.9286817312240601, "learning_rate": 9.611959264150763e-06, "loss": 0.8157, "step": 3207 }, { "epoch": 0.45763195435092724, "grad_norm": 0.8575893044471741, "learning_rate": 9.61166177888009e-06, "loss": 0.7894, "step": 3208 }, { "epoch": 0.45777460770328104, "grad_norm": 0.7181274890899658, "learning_rate": 9.611364184229037e-06, "loss": 0.7959, "step": 3209 }, { "epoch": 0.4579172610556348, "grad_norm": 0.9846453070640564, "learning_rate": 9.611066480204657e-06, "loss": 0.5517, "step": 3210 }, { "epoch": 0.4580599144079886, "grad_norm": 0.7997971177101135, "learning_rate": 9.610768666814015e-06, "loss": 0.798, "step": 3211 }, { "epoch": 0.45820256776034235, "grad_norm": 0.9456349015235901, "learning_rate": 9.610470744064175e-06, "loss": 0.6774, "step": 3212 }, { "epoch": 0.45834522111269616, "grad_norm": 0.8151498436927795, "learning_rate": 9.6101727119622e-06, "loss": 0.8158, "step": 3213 }, { "epoch": 0.4584878744650499, "grad_norm": 0.8408619165420532, "learning_rate": 9.60987457051516e-06, "loss": 0.8121, "step": 3214 }, { "epoch": 0.4586305278174037, "grad_norm": 0.9264407753944397, "learning_rate": 9.60957631973013e-06, "loss": 0.7625, "step": 3215 }, { "epoch": 0.4587731811697575, "grad_norm": 0.7770088911056519, "learning_rate": 9.60927795961418e-06, "loss": 0.819, "step": 3216 }, { "epoch": 0.45891583452211127, "grad_norm": 0.9818166494369507, "learning_rate": 9.608979490174385e-06, "loss": 0.5445, "step": 3217 }, { "epoch": 0.45905848787446507, "grad_norm": 0.9573136568069458, "learning_rate": 9.608680911417831e-06, "loss": 0.8056, "step": 3218 }, { "epoch": 0.4592011412268188, "grad_norm": 0.9539743065834045, "learning_rate": 9.608382223351594e-06, "loss": 0.7475, "step": 3219 }, { "epoch": 0.4593437945791726, "grad_norm": 0.7882934212684631, "learning_rate": 9.608083425982758e-06, "loss": 0.7953, "step": 3220 }, { "epoch": 0.4594864479315264, "grad_norm": 0.8050908446311951, "learning_rate": 9.607784519318413e-06, "loss": 0.7532, "step": 3221 }, { "epoch": 0.4596291012838802, "grad_norm": 0.9157688021659851, "learning_rate": 9.607485503365648e-06, "loss": 0.7805, "step": 3222 }, { "epoch": 0.45977175463623393, "grad_norm": 0.7888590693473816, "learning_rate": 9.607186378131554e-06, "loss": 0.7634, "step": 3223 }, { "epoch": 0.45991440798858774, "grad_norm": 1.0131776332855225, "learning_rate": 9.606887143623227e-06, "loss": 0.7819, "step": 3224 }, { "epoch": 0.4600570613409415, "grad_norm": 0.8839631676673889, "learning_rate": 9.606587799847762e-06, "loss": 0.7212, "step": 3225 }, { "epoch": 0.4601997146932953, "grad_norm": 0.9736989140510559, "learning_rate": 9.60628834681226e-06, "loss": 0.8043, "step": 3226 }, { "epoch": 0.4603423680456491, "grad_norm": 0.964718759059906, "learning_rate": 9.605988784523825e-06, "loss": 0.7951, "step": 3227 }, { "epoch": 0.46048502139800285, "grad_norm": 0.682881236076355, "learning_rate": 9.605689112989562e-06, "loss": 0.8274, "step": 3228 }, { "epoch": 0.46062767475035665, "grad_norm": 0.9523836374282837, "learning_rate": 9.605389332216577e-06, "loss": 0.8222, "step": 3229 }, { "epoch": 0.4607703281027104, "grad_norm": 0.7938910126686096, "learning_rate": 9.605089442211979e-06, "loss": 0.7983, "step": 3230 }, { "epoch": 0.4609129814550642, "grad_norm": 0.8781543970108032, "learning_rate": 9.604789442982885e-06, "loss": 0.8498, "step": 3231 }, { "epoch": 0.46105563480741796, "grad_norm": 0.9280606508255005, "learning_rate": 9.604489334536407e-06, "loss": 0.7853, "step": 3232 }, { "epoch": 0.46119828815977176, "grad_norm": 0.8369792103767395, "learning_rate": 9.604189116879666e-06, "loss": 0.7916, "step": 3233 }, { "epoch": 0.4613409415121255, "grad_norm": 0.9160106182098389, "learning_rate": 9.603888790019778e-06, "loss": 0.8411, "step": 3234 }, { "epoch": 0.4614835948644793, "grad_norm": 0.8717945218086243, "learning_rate": 9.60358835396387e-06, "loss": 0.7309, "step": 3235 }, { "epoch": 0.46162624821683307, "grad_norm": 0.9075588583946228, "learning_rate": 9.603287808719068e-06, "loss": 0.8055, "step": 3236 }, { "epoch": 0.4617689015691869, "grad_norm": 1.030797004699707, "learning_rate": 9.602987154292498e-06, "loss": 0.7871, "step": 3237 }, { "epoch": 0.4619115549215407, "grad_norm": 0.858007550239563, "learning_rate": 9.602686390691294e-06, "loss": 0.8354, "step": 3238 }, { "epoch": 0.46205420827389443, "grad_norm": 1.1889032125473022, "learning_rate": 9.602385517922587e-06, "loss": 0.7111, "step": 3239 }, { "epoch": 0.46219686162624823, "grad_norm": 1.2265406847000122, "learning_rate": 9.602084535993515e-06, "loss": 0.7946, "step": 3240 }, { "epoch": 0.462339514978602, "grad_norm": 0.9264851212501526, "learning_rate": 9.601783444911215e-06, "loss": 0.7307, "step": 3241 }, { "epoch": 0.4624821683309558, "grad_norm": 0.9731734991073608, "learning_rate": 9.601482244682828e-06, "loss": 0.832, "step": 3242 }, { "epoch": 0.46262482168330954, "grad_norm": 0.6344369649887085, "learning_rate": 9.601180935315499e-06, "loss": 0.7934, "step": 3243 }, { "epoch": 0.46276747503566334, "grad_norm": 0.7999869585037231, "learning_rate": 9.600879516816376e-06, "loss": 0.7569, "step": 3244 }, { "epoch": 0.4629101283880171, "grad_norm": 1.0258768796920776, "learning_rate": 9.600577989192607e-06, "loss": 0.7703, "step": 3245 }, { "epoch": 0.4630527817403709, "grad_norm": 1.1087878942489624, "learning_rate": 9.600276352451342e-06, "loss": 0.84, "step": 3246 }, { "epoch": 0.4631954350927247, "grad_norm": 0.8236591815948486, "learning_rate": 9.599974606599737e-06, "loss": 0.7499, "step": 3247 }, { "epoch": 0.46333808844507846, "grad_norm": 0.7220486402511597, "learning_rate": 9.59967275164495e-06, "loss": 0.7741, "step": 3248 }, { "epoch": 0.46348074179743226, "grad_norm": 0.7836496233940125, "learning_rate": 9.599370787594136e-06, "loss": 0.7621, "step": 3249 }, { "epoch": 0.463623395149786, "grad_norm": 0.9032376408576965, "learning_rate": 9.599068714454463e-06, "loss": 0.7961, "step": 3250 }, { "epoch": 0.4637660485021398, "grad_norm": 0.8954452872276306, "learning_rate": 9.59876653223309e-06, "loss": 0.7963, "step": 3251 }, { "epoch": 0.46390870185449357, "grad_norm": 1.0254467725753784, "learning_rate": 9.598464240937188e-06, "loss": 0.7492, "step": 3252 }, { "epoch": 0.46405135520684737, "grad_norm": 1.0668203830718994, "learning_rate": 9.598161840573927e-06, "loss": 0.7895, "step": 3253 }, { "epoch": 0.4641940085592011, "grad_norm": 0.978826105594635, "learning_rate": 9.597859331150476e-06, "loss": 0.6915, "step": 3254 }, { "epoch": 0.4643366619115549, "grad_norm": 0.8736929893493652, "learning_rate": 9.597556712674014e-06, "loss": 0.7851, "step": 3255 }, { "epoch": 0.4644793152639087, "grad_norm": 0.8989753723144531, "learning_rate": 9.597253985151715e-06, "loss": 0.8459, "step": 3256 }, { "epoch": 0.4646219686162625, "grad_norm": 0.9451749324798584, "learning_rate": 9.596951148590762e-06, "loss": 0.7292, "step": 3257 }, { "epoch": 0.4647646219686163, "grad_norm": 1.0433512926101685, "learning_rate": 9.596648202998336e-06, "loss": 0.8053, "step": 3258 }, { "epoch": 0.46490727532097004, "grad_norm": 1.5700123310089111, "learning_rate": 9.596345148381622e-06, "loss": 0.7484, "step": 3259 }, { "epoch": 0.46504992867332384, "grad_norm": 1.0634270906448364, "learning_rate": 9.596041984747812e-06, "loss": 0.766, "step": 3260 }, { "epoch": 0.4651925820256776, "grad_norm": 0.9275653958320618, "learning_rate": 9.59573871210409e-06, "loss": 0.8348, "step": 3261 }, { "epoch": 0.4653352353780314, "grad_norm": 1.2164363861083984, "learning_rate": 9.595435330457655e-06, "loss": 0.7802, "step": 3262 }, { "epoch": 0.46547788873038515, "grad_norm": 1.0342893600463867, "learning_rate": 9.595131839815698e-06, "loss": 0.5786, "step": 3263 }, { "epoch": 0.46562054208273895, "grad_norm": 0.9382723569869995, "learning_rate": 9.59482824018542e-06, "loss": 0.7375, "step": 3264 }, { "epoch": 0.4657631954350927, "grad_norm": 0.8210813403129578, "learning_rate": 9.59452453157402e-06, "loss": 0.792, "step": 3265 }, { "epoch": 0.4659058487874465, "grad_norm": 0.7499663829803467, "learning_rate": 9.594220713988705e-06, "loss": 0.7837, "step": 3266 }, { "epoch": 0.46604850213980026, "grad_norm": 0.7613505125045776, "learning_rate": 9.593916787436677e-06, "loss": 0.8459, "step": 3267 }, { "epoch": 0.46619115549215406, "grad_norm": 0.8401245474815369, "learning_rate": 9.593612751925146e-06, "loss": 0.7263, "step": 3268 }, { "epoch": 0.46633380884450787, "grad_norm": 0.7662743330001831, "learning_rate": 9.593308607461323e-06, "loss": 0.8032, "step": 3269 }, { "epoch": 0.4664764621968616, "grad_norm": 0.8141566514968872, "learning_rate": 9.593004354052424e-06, "loss": 0.8167, "step": 3270 }, { "epoch": 0.4666191155492154, "grad_norm": 1.0903931856155396, "learning_rate": 9.592699991705662e-06, "loss": 0.7851, "step": 3271 }, { "epoch": 0.4667617689015692, "grad_norm": 0.8852677941322327, "learning_rate": 9.592395520428257e-06, "loss": 0.8324, "step": 3272 }, { "epoch": 0.466904422253923, "grad_norm": 0.8141461610794067, "learning_rate": 9.592090940227432e-06, "loss": 0.7859, "step": 3273 }, { "epoch": 0.46704707560627673, "grad_norm": 1.0055336952209473, "learning_rate": 9.59178625111041e-06, "loss": 0.7143, "step": 3274 }, { "epoch": 0.46718972895863053, "grad_norm": 0.8097848296165466, "learning_rate": 9.591481453084416e-06, "loss": 0.8142, "step": 3275 }, { "epoch": 0.4673323823109843, "grad_norm": 1.0817996263504028, "learning_rate": 9.591176546156683e-06, "loss": 0.7397, "step": 3276 }, { "epoch": 0.4674750356633381, "grad_norm": 0.7397019267082214, "learning_rate": 9.59087153033444e-06, "loss": 0.7348, "step": 3277 }, { "epoch": 0.4676176890156919, "grad_norm": 0.7519397735595703, "learning_rate": 9.590566405624923e-06, "loss": 0.7753, "step": 3278 }, { "epoch": 0.46776034236804565, "grad_norm": 0.8674522638320923, "learning_rate": 9.590261172035368e-06, "loss": 0.8022, "step": 3279 }, { "epoch": 0.46790299572039945, "grad_norm": 0.8686073422431946, "learning_rate": 9.589955829573015e-06, "loss": 0.7183, "step": 3280 }, { "epoch": 0.4680456490727532, "grad_norm": 0.8603644967079163, "learning_rate": 9.589650378245106e-06, "loss": 0.8503, "step": 3281 }, { "epoch": 0.468188302425107, "grad_norm": 0.938601553440094, "learning_rate": 9.589344818058884e-06, "loss": 0.719, "step": 3282 }, { "epoch": 0.46833095577746076, "grad_norm": 1.1432517766952515, "learning_rate": 9.589039149021599e-06, "loss": 0.7682, "step": 3283 }, { "epoch": 0.46847360912981456, "grad_norm": 1.1235514879226685, "learning_rate": 9.588733371140502e-06, "loss": 0.7667, "step": 3284 }, { "epoch": 0.4686162624821683, "grad_norm": 1.0099997520446777, "learning_rate": 9.588427484422842e-06, "loss": 0.5631, "step": 3285 }, { "epoch": 0.4687589158345221, "grad_norm": 0.8651304841041565, "learning_rate": 9.588121488875875e-06, "loss": 0.7448, "step": 3286 }, { "epoch": 0.46890156918687587, "grad_norm": 0.8427119851112366, "learning_rate": 9.58781538450686e-06, "loss": 0.814, "step": 3287 }, { "epoch": 0.46904422253922967, "grad_norm": 0.8180068731307983, "learning_rate": 9.587509171323058e-06, "loss": 0.7244, "step": 3288 }, { "epoch": 0.4691868758915835, "grad_norm": 0.9020771980285645, "learning_rate": 9.587202849331728e-06, "loss": 0.8689, "step": 3289 }, { "epoch": 0.4693295292439372, "grad_norm": 0.8076386451721191, "learning_rate": 9.586896418540138e-06, "loss": 0.7931, "step": 3290 }, { "epoch": 0.46947218259629103, "grad_norm": 0.9270254373550415, "learning_rate": 9.586589878955558e-06, "loss": 0.8838, "step": 3291 }, { "epoch": 0.4696148359486448, "grad_norm": 0.95148104429245, "learning_rate": 9.586283230585255e-06, "loss": 0.76, "step": 3292 }, { "epoch": 0.4697574893009986, "grad_norm": 0.777870774269104, "learning_rate": 9.585976473436503e-06, "loss": 0.7963, "step": 3293 }, { "epoch": 0.46990014265335234, "grad_norm": 0.8370087146759033, "learning_rate": 9.585669607516579e-06, "loss": 0.8036, "step": 3294 }, { "epoch": 0.47004279600570614, "grad_norm": 1.1503015756607056, "learning_rate": 9.58536263283276e-06, "loss": 0.7816, "step": 3295 }, { "epoch": 0.4701854493580599, "grad_norm": 0.6871997117996216, "learning_rate": 9.585055549392328e-06, "loss": 0.7205, "step": 3296 }, { "epoch": 0.4703281027104137, "grad_norm": 1.048008918762207, "learning_rate": 9.584748357202565e-06, "loss": 0.7118, "step": 3297 }, { "epoch": 0.47047075606276745, "grad_norm": 1.0687289237976074, "learning_rate": 9.584441056270758e-06, "loss": 0.7412, "step": 3298 }, { "epoch": 0.47061340941512125, "grad_norm": 1.0124335289001465, "learning_rate": 9.584133646604198e-06, "loss": 0.8091, "step": 3299 }, { "epoch": 0.47075606276747506, "grad_norm": 0.9034653306007385, "learning_rate": 9.583826128210172e-06, "loss": 0.7489, "step": 3300 }, { "epoch": 0.4708987161198288, "grad_norm": 1.1596565246582031, "learning_rate": 9.583518501095976e-06, "loss": 0.5105, "step": 3301 }, { "epoch": 0.4710413694721826, "grad_norm": 0.9735150337219238, "learning_rate": 9.583210765268905e-06, "loss": 0.8305, "step": 3302 }, { "epoch": 0.47118402282453636, "grad_norm": 1.0492440462112427, "learning_rate": 9.582902920736259e-06, "loss": 0.7761, "step": 3303 }, { "epoch": 0.47132667617689017, "grad_norm": 0.6919054388999939, "learning_rate": 9.582594967505341e-06, "loss": 0.7489, "step": 3304 }, { "epoch": 0.4714693295292439, "grad_norm": 0.9516935348510742, "learning_rate": 9.582286905583452e-06, "loss": 0.7201, "step": 3305 }, { "epoch": 0.4716119828815977, "grad_norm": 0.9665992259979248, "learning_rate": 9.581978734977902e-06, "loss": 0.7759, "step": 3306 }, { "epoch": 0.4717546362339515, "grad_norm": 2.512998580932617, "learning_rate": 9.581670455695995e-06, "loss": 0.7929, "step": 3307 }, { "epoch": 0.4718972895863053, "grad_norm": 1.007515788078308, "learning_rate": 9.58136206774505e-06, "loss": 0.742, "step": 3308 }, { "epoch": 0.4720399429386591, "grad_norm": 0.7736210227012634, "learning_rate": 9.581053571132377e-06, "loss": 0.7686, "step": 3309 }, { "epoch": 0.47218259629101283, "grad_norm": 0.8426063060760498, "learning_rate": 9.580744965865292e-06, "loss": 0.8, "step": 3310 }, { "epoch": 0.47232524964336664, "grad_norm": 0.924156665802002, "learning_rate": 9.580436251951118e-06, "loss": 0.8075, "step": 3311 }, { "epoch": 0.4724679029957204, "grad_norm": 0.8497976660728455, "learning_rate": 9.580127429397173e-06, "loss": 0.7329, "step": 3312 }, { "epoch": 0.4726105563480742, "grad_norm": 1.2462340593338013, "learning_rate": 9.579818498210787e-06, "loss": 0.714, "step": 3313 }, { "epoch": 0.47275320970042795, "grad_norm": 0.9014277458190918, "learning_rate": 9.579509458399283e-06, "loss": 0.7491, "step": 3314 }, { "epoch": 0.47289586305278175, "grad_norm": 0.9916141033172607, "learning_rate": 9.579200309969992e-06, "loss": 0.8, "step": 3315 }, { "epoch": 0.4730385164051355, "grad_norm": 0.6881513595581055, "learning_rate": 9.578891052930248e-06, "loss": 0.7914, "step": 3316 }, { "epoch": 0.4731811697574893, "grad_norm": 1.0062134265899658, "learning_rate": 9.578581687287383e-06, "loss": 0.7581, "step": 3317 }, { "epoch": 0.47332382310984306, "grad_norm": 0.8403618335723877, "learning_rate": 9.578272213048738e-06, "loss": 0.7786, "step": 3318 }, { "epoch": 0.47346647646219686, "grad_norm": 1.5289185047149658, "learning_rate": 9.57796263022165e-06, "loss": 0.758, "step": 3319 }, { "epoch": 0.47360912981455067, "grad_norm": 0.7625239491462708, "learning_rate": 9.577652938813465e-06, "loss": 0.7897, "step": 3320 }, { "epoch": 0.4737517831669044, "grad_norm": 0.8987564444541931, "learning_rate": 9.577343138831526e-06, "loss": 0.7817, "step": 3321 }, { "epoch": 0.4738944365192582, "grad_norm": 1.0212286710739136, "learning_rate": 9.577033230283181e-06, "loss": 0.7974, "step": 3322 }, { "epoch": 0.47403708987161197, "grad_norm": 1.2200279235839844, "learning_rate": 9.576723213175781e-06, "loss": 0.6999, "step": 3323 }, { "epoch": 0.4741797432239658, "grad_norm": 1.008220911026001, "learning_rate": 9.57641308751668e-06, "loss": 0.691, "step": 3324 }, { "epoch": 0.4743223965763195, "grad_norm": 0.7976358532905579, "learning_rate": 9.57610285331323e-06, "loss": 0.8719, "step": 3325 }, { "epoch": 0.47446504992867333, "grad_norm": 1.099548578262329, "learning_rate": 9.575792510572796e-06, "loss": 0.7651, "step": 3326 }, { "epoch": 0.4746077032810271, "grad_norm": 1.777839183807373, "learning_rate": 9.575482059302732e-06, "loss": 0.7474, "step": 3327 }, { "epoch": 0.4747503566333809, "grad_norm": 0.9914703965187073, "learning_rate": 9.575171499510405e-06, "loss": 0.754, "step": 3328 }, { "epoch": 0.47489300998573464, "grad_norm": 1.0722758769989014, "learning_rate": 9.57486083120318e-06, "loss": 0.7997, "step": 3329 }, { "epoch": 0.47503566333808844, "grad_norm": 1.1728448867797852, "learning_rate": 9.574550054388426e-06, "loss": 0.7775, "step": 3330 }, { "epoch": 0.47517831669044225, "grad_norm": 0.8477550148963928, "learning_rate": 9.574239169073511e-06, "loss": 0.8113, "step": 3331 }, { "epoch": 0.475320970042796, "grad_norm": 0.9855422377586365, "learning_rate": 9.573928175265814e-06, "loss": 0.7778, "step": 3332 }, { "epoch": 0.4754636233951498, "grad_norm": 0.9079716801643372, "learning_rate": 9.573617072972707e-06, "loss": 0.8002, "step": 3333 }, { "epoch": 0.47560627674750355, "grad_norm": 0.7468597888946533, "learning_rate": 9.57330586220157e-06, "loss": 0.7523, "step": 3334 }, { "epoch": 0.47574893009985736, "grad_norm": 0.6399210095405579, "learning_rate": 9.572994542959784e-06, "loss": 0.779, "step": 3335 }, { "epoch": 0.4758915834522111, "grad_norm": 0.8479769229888916, "learning_rate": 9.572683115254736e-06, "loss": 0.7492, "step": 3336 }, { "epoch": 0.4760342368045649, "grad_norm": 0.9519271850585938, "learning_rate": 9.572371579093806e-06, "loss": 0.6451, "step": 3337 }, { "epoch": 0.47617689015691866, "grad_norm": 0.8485866189002991, "learning_rate": 9.57205993448439e-06, "loss": 0.7952, "step": 3338 }, { "epoch": 0.47631954350927247, "grad_norm": 1.2046514749526978, "learning_rate": 9.571748181433876e-06, "loss": 0.7759, "step": 3339 }, { "epoch": 0.4764621968616263, "grad_norm": 0.8765907883644104, "learning_rate": 9.57143631994966e-06, "loss": 0.7768, "step": 3340 }, { "epoch": 0.47660485021398, "grad_norm": 1.1514155864715576, "learning_rate": 9.571124350039136e-06, "loss": 0.782, "step": 3341 }, { "epoch": 0.47674750356633383, "grad_norm": 1.0372592210769653, "learning_rate": 9.570812271709707e-06, "loss": 0.7029, "step": 3342 }, { "epoch": 0.4768901569186876, "grad_norm": 0.9821612238883972, "learning_rate": 9.57050008496877e-06, "loss": 0.7207, "step": 3343 }, { "epoch": 0.4770328102710414, "grad_norm": 0.8796873092651367, "learning_rate": 9.570187789823735e-06, "loss": 0.6802, "step": 3344 }, { "epoch": 0.47717546362339514, "grad_norm": 1.1821767091751099, "learning_rate": 9.569875386282006e-06, "loss": 0.7817, "step": 3345 }, { "epoch": 0.47731811697574894, "grad_norm": 0.9369605183601379, "learning_rate": 9.569562874350993e-06, "loss": 0.7882, "step": 3346 }, { "epoch": 0.4774607703281027, "grad_norm": 0.941922664642334, "learning_rate": 9.56925025403811e-06, "loss": 0.7018, "step": 3347 }, { "epoch": 0.4776034236804565, "grad_norm": 1.0803322792053223, "learning_rate": 9.568937525350768e-06, "loss": 0.7262, "step": 3348 }, { "epoch": 0.47774607703281025, "grad_norm": 1.0246617794036865, "learning_rate": 9.568624688296387e-06, "loss": 0.7835, "step": 3349 }, { "epoch": 0.47788873038516405, "grad_norm": 0.8225551247596741, "learning_rate": 9.568311742882387e-06, "loss": 0.7754, "step": 3350 }, { "epoch": 0.47803138373751786, "grad_norm": 0.9982939958572388, "learning_rate": 9.567998689116192e-06, "loss": 0.7233, "step": 3351 }, { "epoch": 0.4781740370898716, "grad_norm": 0.9623139500617981, "learning_rate": 9.567685527005223e-06, "loss": 0.6851, "step": 3352 }, { "epoch": 0.4783166904422254, "grad_norm": 0.7305071353912354, "learning_rate": 9.56737225655691e-06, "loss": 0.731, "step": 3353 }, { "epoch": 0.47845934379457916, "grad_norm": 0.9447537660598755, "learning_rate": 9.567058877778683e-06, "loss": 0.8088, "step": 3354 }, { "epoch": 0.47860199714693297, "grad_norm": 0.8206272721290588, "learning_rate": 9.566745390677976e-06, "loss": 0.7427, "step": 3355 }, { "epoch": 0.4787446504992867, "grad_norm": 0.7819055318832397, "learning_rate": 9.566431795262223e-06, "loss": 0.7698, "step": 3356 }, { "epoch": 0.4788873038516405, "grad_norm": 0.9388282895088196, "learning_rate": 9.566118091538861e-06, "loss": 0.7823, "step": 3357 }, { "epoch": 0.47902995720399427, "grad_norm": 0.9281818270683289, "learning_rate": 9.565804279515333e-06, "loss": 0.7414, "step": 3358 }, { "epoch": 0.4791726105563481, "grad_norm": 0.9480597972869873, "learning_rate": 9.56549035919908e-06, "loss": 0.7342, "step": 3359 }, { "epoch": 0.4793152639087018, "grad_norm": 0.9322537779808044, "learning_rate": 9.565176330597548e-06, "loss": 0.7337, "step": 3360 }, { "epoch": 0.47945791726105563, "grad_norm": 0.8991124629974365, "learning_rate": 9.564862193718187e-06, "loss": 0.765, "step": 3361 }, { "epoch": 0.47960057061340944, "grad_norm": 0.9824044108390808, "learning_rate": 9.564547948568443e-06, "loss": 0.8108, "step": 3362 }, { "epoch": 0.4797432239657632, "grad_norm": 1.0409741401672363, "learning_rate": 9.564233595155777e-06, "loss": 0.7842, "step": 3363 }, { "epoch": 0.479885877318117, "grad_norm": 1.0946305990219116, "learning_rate": 9.563919133487638e-06, "loss": 0.8092, "step": 3364 }, { "epoch": 0.48002853067047074, "grad_norm": 0.9016760587692261, "learning_rate": 9.563604563571486e-06, "loss": 0.7146, "step": 3365 }, { "epoch": 0.48017118402282455, "grad_norm": 1.192918062210083, "learning_rate": 9.563289885414784e-06, "loss": 0.7887, "step": 3366 }, { "epoch": 0.4803138373751783, "grad_norm": 0.9595857262611389, "learning_rate": 9.562975099024995e-06, "loss": 0.7346, "step": 3367 }, { "epoch": 0.4804564907275321, "grad_norm": 0.8157551288604736, "learning_rate": 9.562660204409584e-06, "loss": 0.7408, "step": 3368 }, { "epoch": 0.48059914407988585, "grad_norm": 0.8149961233139038, "learning_rate": 9.562345201576021e-06, "loss": 0.7228, "step": 3369 }, { "epoch": 0.48074179743223966, "grad_norm": 1.0081431865692139, "learning_rate": 9.562030090531777e-06, "loss": 0.8398, "step": 3370 }, { "epoch": 0.48088445078459346, "grad_norm": 0.8619146347045898, "learning_rate": 9.561714871284326e-06, "loss": 0.7842, "step": 3371 }, { "epoch": 0.4810271041369472, "grad_norm": 0.8915643095970154, "learning_rate": 9.561399543841143e-06, "loss": 0.7844, "step": 3372 }, { "epoch": 0.481169757489301, "grad_norm": 0.8182870745658875, "learning_rate": 9.561084108209708e-06, "loss": 0.7867, "step": 3373 }, { "epoch": 0.48131241084165477, "grad_norm": 0.6612070798873901, "learning_rate": 9.560768564397505e-06, "loss": 0.789, "step": 3374 }, { "epoch": 0.4814550641940086, "grad_norm": 0.961587131023407, "learning_rate": 9.560452912412013e-06, "loss": 0.7723, "step": 3375 }, { "epoch": 0.4815977175463623, "grad_norm": 0.8533160090446472, "learning_rate": 9.560137152260723e-06, "loss": 0.7668, "step": 3376 }, { "epoch": 0.48174037089871613, "grad_norm": 1.1560102701187134, "learning_rate": 9.559821283951123e-06, "loss": 0.7964, "step": 3377 }, { "epoch": 0.4818830242510699, "grad_norm": 0.9823300838470459, "learning_rate": 9.559505307490704e-06, "loss": 0.7446, "step": 3378 }, { "epoch": 0.4820256776034237, "grad_norm": 1.1701860427856445, "learning_rate": 9.559189222886962e-06, "loss": 0.5649, "step": 3379 }, { "epoch": 0.48216833095577744, "grad_norm": 1.1225320100784302, "learning_rate": 9.558873030147392e-06, "loss": 0.7186, "step": 3380 }, { "epoch": 0.48231098430813124, "grad_norm": 0.9208173155784607, "learning_rate": 9.558556729279496e-06, "loss": 0.7182, "step": 3381 }, { "epoch": 0.48245363766048505, "grad_norm": 0.740606963634491, "learning_rate": 9.558240320290773e-06, "loss": 0.7868, "step": 3382 }, { "epoch": 0.4825962910128388, "grad_norm": 1.0487650632858276, "learning_rate": 9.55792380318873e-06, "loss": 0.7207, "step": 3383 }, { "epoch": 0.4827389443651926, "grad_norm": 0.9889148473739624, "learning_rate": 9.557607177980872e-06, "loss": 0.7738, "step": 3384 }, { "epoch": 0.48288159771754635, "grad_norm": 1.0722016096115112, "learning_rate": 9.557290444674711e-06, "loss": 0.7778, "step": 3385 }, { "epoch": 0.48302425106990016, "grad_norm": 0.7276331186294556, "learning_rate": 9.556973603277761e-06, "loss": 0.8651, "step": 3386 }, { "epoch": 0.4831669044222539, "grad_norm": 0.8589654564857483, "learning_rate": 9.556656653797531e-06, "loss": 0.7953, "step": 3387 }, { "epoch": 0.4833095577746077, "grad_norm": 0.8468416929244995, "learning_rate": 9.556339596241544e-06, "loss": 0.8578, "step": 3388 }, { "epoch": 0.48345221112696146, "grad_norm": 0.8124231696128845, "learning_rate": 9.556022430617318e-06, "loss": 0.8363, "step": 3389 }, { "epoch": 0.48359486447931527, "grad_norm": 1.1093146800994873, "learning_rate": 9.555705156932375e-06, "loss": 0.8176, "step": 3390 }, { "epoch": 0.483737517831669, "grad_norm": 1.0626344680786133, "learning_rate": 9.555387775194243e-06, "loss": 0.7326, "step": 3391 }, { "epoch": 0.4838801711840228, "grad_norm": 1.0361603498458862, "learning_rate": 9.555070285410444e-06, "loss": 0.8014, "step": 3392 }, { "epoch": 0.48402282453637663, "grad_norm": 0.8652250170707703, "learning_rate": 9.554752687588515e-06, "loss": 0.8119, "step": 3393 }, { "epoch": 0.4841654778887304, "grad_norm": 1.2987350225448608, "learning_rate": 9.554434981735984e-06, "loss": 0.803, "step": 3394 }, { "epoch": 0.4843081312410842, "grad_norm": 0.9913198351860046, "learning_rate": 9.554117167860389e-06, "loss": 0.8752, "step": 3395 }, { "epoch": 0.48445078459343793, "grad_norm": 0.9127089977264404, "learning_rate": 9.553799245969266e-06, "loss": 0.7903, "step": 3396 }, { "epoch": 0.48459343794579174, "grad_norm": 0.8692578077316284, "learning_rate": 9.553481216070157e-06, "loss": 0.7779, "step": 3397 }, { "epoch": 0.4847360912981455, "grad_norm": 1.1671979427337646, "learning_rate": 9.553163078170605e-06, "loss": 0.7761, "step": 3398 }, { "epoch": 0.4848787446504993, "grad_norm": 0.7351517081260681, "learning_rate": 9.552844832278155e-06, "loss": 0.6904, "step": 3399 }, { "epoch": 0.48502139800285304, "grad_norm": 1.8013851642608643, "learning_rate": 9.552526478400356e-06, "loss": 0.8007, "step": 3400 }, { "epoch": 0.48516405135520685, "grad_norm": 0.8513445258140564, "learning_rate": 9.552208016544759e-06, "loss": 0.7916, "step": 3401 }, { "epoch": 0.48530670470756065, "grad_norm": 0.854399561882019, "learning_rate": 9.551889446718916e-06, "loss": 0.8007, "step": 3402 }, { "epoch": 0.4854493580599144, "grad_norm": 0.8587920069694519, "learning_rate": 9.551570768930384e-06, "loss": 0.821, "step": 3403 }, { "epoch": 0.4855920114122682, "grad_norm": 0.8655844926834106, "learning_rate": 9.55125198318672e-06, "loss": 0.7874, "step": 3404 }, { "epoch": 0.48573466476462196, "grad_norm": 0.920585572719574, "learning_rate": 9.550933089495486e-06, "loss": 0.755, "step": 3405 }, { "epoch": 0.48587731811697576, "grad_norm": 0.9027933478355408, "learning_rate": 9.550614087864247e-06, "loss": 0.8007, "step": 3406 }, { "epoch": 0.4860199714693295, "grad_norm": 0.7437239289283752, "learning_rate": 9.550294978300568e-06, "loss": 0.7513, "step": 3407 }, { "epoch": 0.4861626248216833, "grad_norm": 0.748741090297699, "learning_rate": 9.549975760812019e-06, "loss": 0.838, "step": 3408 }, { "epoch": 0.48630527817403707, "grad_norm": 1.9705638885498047, "learning_rate": 9.549656435406167e-06, "loss": 0.6532, "step": 3409 }, { "epoch": 0.4864479315263909, "grad_norm": 1.141339659690857, "learning_rate": 9.549337002090589e-06, "loss": 0.7726, "step": 3410 }, { "epoch": 0.4865905848787446, "grad_norm": 1.154482364654541, "learning_rate": 9.549017460872862e-06, "loss": 0.7307, "step": 3411 }, { "epoch": 0.48673323823109843, "grad_norm": 1.2299119234085083, "learning_rate": 9.548697811760565e-06, "loss": 0.7401, "step": 3412 }, { "epoch": 0.48687589158345224, "grad_norm": 0.8068072199821472, "learning_rate": 9.548378054761278e-06, "loss": 0.8125, "step": 3413 }, { "epoch": 0.487018544935806, "grad_norm": 0.8187466263771057, "learning_rate": 9.548058189882584e-06, "loss": 0.7122, "step": 3414 }, { "epoch": 0.4871611982881598, "grad_norm": 0.8442869782447815, "learning_rate": 9.547738217132072e-06, "loss": 0.7748, "step": 3415 }, { "epoch": 0.48730385164051354, "grad_norm": 1.218238115310669, "learning_rate": 9.54741813651733e-06, "loss": 0.8156, "step": 3416 }, { "epoch": 0.48744650499286735, "grad_norm": 0.8840314745903015, "learning_rate": 9.547097948045952e-06, "loss": 0.7733, "step": 3417 }, { "epoch": 0.4875891583452211, "grad_norm": 0.8033078908920288, "learning_rate": 9.546777651725529e-06, "loss": 0.7879, "step": 3418 }, { "epoch": 0.4877318116975749, "grad_norm": 0.8911908864974976, "learning_rate": 9.54645724756366e-06, "loss": 0.8573, "step": 3419 }, { "epoch": 0.48787446504992865, "grad_norm": 0.7573607563972473, "learning_rate": 9.546136735567943e-06, "loss": 0.7739, "step": 3420 }, { "epoch": 0.48801711840228246, "grad_norm": 0.965168833732605, "learning_rate": 9.545816115745981e-06, "loss": 0.769, "step": 3421 }, { "epoch": 0.4881597717546362, "grad_norm": 1.0998084545135498, "learning_rate": 9.545495388105379e-06, "loss": 0.7601, "step": 3422 }, { "epoch": 0.48830242510699, "grad_norm": 0.8415771126747131, "learning_rate": 9.545174552653741e-06, "loss": 0.8746, "step": 3423 }, { "epoch": 0.4884450784593438, "grad_norm": 0.9667220115661621, "learning_rate": 9.544853609398681e-06, "loss": 0.7261, "step": 3424 }, { "epoch": 0.48858773181169757, "grad_norm": 0.8491535782814026, "learning_rate": 9.544532558347809e-06, "loss": 0.7778, "step": 3425 }, { "epoch": 0.4887303851640514, "grad_norm": 0.842559814453125, "learning_rate": 9.544211399508739e-06, "loss": 0.7902, "step": 3426 }, { "epoch": 0.4888730385164051, "grad_norm": 0.9630987644195557, "learning_rate": 9.54389013288909e-06, "loss": 0.7747, "step": 3427 }, { "epoch": 0.48901569186875893, "grad_norm": 0.8068737387657166, "learning_rate": 9.543568758496482e-06, "loss": 0.8007, "step": 3428 }, { "epoch": 0.4891583452211127, "grad_norm": 0.7649833559989929, "learning_rate": 9.543247276338535e-06, "loss": 0.6022, "step": 3429 }, { "epoch": 0.4893009985734665, "grad_norm": 0.8590714335441589, "learning_rate": 9.542925686422874e-06, "loss": 0.821, "step": 3430 }, { "epoch": 0.48944365192582023, "grad_norm": 1.0582678318023682, "learning_rate": 9.54260398875713e-06, "loss": 0.7332, "step": 3431 }, { "epoch": 0.48958630527817404, "grad_norm": 0.8106250762939453, "learning_rate": 9.542282183348932e-06, "loss": 0.7699, "step": 3432 }, { "epoch": 0.48972895863052784, "grad_norm": 0.8199784159660339, "learning_rate": 9.54196027020591e-06, "loss": 0.7216, "step": 3433 }, { "epoch": 0.4898716119828816, "grad_norm": 1.0330818891525269, "learning_rate": 9.541638249335702e-06, "loss": 0.7559, "step": 3434 }, { "epoch": 0.4900142653352354, "grad_norm": 0.7265938520431519, "learning_rate": 9.541316120745944e-06, "loss": 0.8366, "step": 3435 }, { "epoch": 0.49015691868758915, "grad_norm": 0.9404028654098511, "learning_rate": 9.540993884444278e-06, "loss": 0.7518, "step": 3436 }, { "epoch": 0.49029957203994295, "grad_norm": 1.1536147594451904, "learning_rate": 9.540671540438346e-06, "loss": 0.825, "step": 3437 }, { "epoch": 0.4904422253922967, "grad_norm": 0.9902913570404053, "learning_rate": 9.540349088735795e-06, "loss": 0.8027, "step": 3438 }, { "epoch": 0.4905848787446505, "grad_norm": 1.057051658630371, "learning_rate": 9.540026529344268e-06, "loss": 0.6917, "step": 3439 }, { "epoch": 0.49072753209700426, "grad_norm": 0.8220142126083374, "learning_rate": 9.53970386227142e-06, "loss": 0.7244, "step": 3440 }, { "epoch": 0.49087018544935807, "grad_norm": 1.011712908744812, "learning_rate": 9.539381087524904e-06, "loss": 0.7658, "step": 3441 }, { "epoch": 0.4910128388017118, "grad_norm": 0.8901670575141907, "learning_rate": 9.539058205112372e-06, "loss": 0.6595, "step": 3442 }, { "epoch": 0.4911554921540656, "grad_norm": 0.752519965171814, "learning_rate": 9.538735215041489e-06, "loss": 0.8126, "step": 3443 }, { "epoch": 0.4912981455064194, "grad_norm": 1.0619455575942993, "learning_rate": 9.538412117319907e-06, "loss": 0.6629, "step": 3444 }, { "epoch": 0.4914407988587732, "grad_norm": 0.7560083866119385, "learning_rate": 9.538088911955297e-06, "loss": 0.5751, "step": 3445 }, { "epoch": 0.491583452211127, "grad_norm": 0.9296385645866394, "learning_rate": 9.53776559895532e-06, "loss": 0.8717, "step": 3446 }, { "epoch": 0.49172610556348073, "grad_norm": 0.8130275011062622, "learning_rate": 9.537442178327647e-06, "loss": 0.7586, "step": 3447 }, { "epoch": 0.49186875891583454, "grad_norm": 0.7646908164024353, "learning_rate": 9.537118650079947e-06, "loss": 0.8079, "step": 3448 }, { "epoch": 0.4920114122681883, "grad_norm": 0.8075964450836182, "learning_rate": 9.536795014219895e-06, "loss": 0.7877, "step": 3449 }, { "epoch": 0.4921540656205421, "grad_norm": 0.9516100287437439, "learning_rate": 9.536471270755167e-06, "loss": 0.8018, "step": 3450 }, { "epoch": 0.49229671897289584, "grad_norm": 0.942426323890686, "learning_rate": 9.536147419693442e-06, "loss": 0.6944, "step": 3451 }, { "epoch": 0.49243937232524965, "grad_norm": 1.0228946208953857, "learning_rate": 9.535823461042399e-06, "loss": 0.7664, "step": 3452 }, { "epoch": 0.4925820256776034, "grad_norm": 0.8058091998100281, "learning_rate": 9.535499394809723e-06, "loss": 0.7978, "step": 3453 }, { "epoch": 0.4927246790299572, "grad_norm": 0.7258778214454651, "learning_rate": 9.535175221003103e-06, "loss": 0.7752, "step": 3454 }, { "epoch": 0.492867332382311, "grad_norm": 1.1583764553070068, "learning_rate": 9.534850939630224e-06, "loss": 0.8383, "step": 3455 }, { "epoch": 0.49300998573466476, "grad_norm": 1.035607099533081, "learning_rate": 9.534526550698777e-06, "loss": 0.5475, "step": 3456 }, { "epoch": 0.49315263908701856, "grad_norm": 0.8877325057983398, "learning_rate": 9.53420205421646e-06, "loss": 0.7747, "step": 3457 }, { "epoch": 0.4932952924393723, "grad_norm": 0.7912925481796265, "learning_rate": 9.533877450190965e-06, "loss": 0.8278, "step": 3458 }, { "epoch": 0.4934379457917261, "grad_norm": 0.8658379912376404, "learning_rate": 9.533552738629993e-06, "loss": 0.7532, "step": 3459 }, { "epoch": 0.49358059914407987, "grad_norm": 0.8575937747955322, "learning_rate": 9.533227919541247e-06, "loss": 0.8228, "step": 3460 }, { "epoch": 0.4937232524964337, "grad_norm": 0.762713611125946, "learning_rate": 9.53290299293243e-06, "loss": 0.7547, "step": 3461 }, { "epoch": 0.4938659058487874, "grad_norm": 0.9738590121269226, "learning_rate": 9.532577958811247e-06, "loss": 0.7268, "step": 3462 }, { "epoch": 0.49400855920114123, "grad_norm": 0.7226889729499817, "learning_rate": 9.532252817185408e-06, "loss": 0.7789, "step": 3463 }, { "epoch": 0.49415121255349503, "grad_norm": 1.1516462564468384, "learning_rate": 9.531927568062627e-06, "loss": 0.7941, "step": 3464 }, { "epoch": 0.4942938659058488, "grad_norm": 1.071286916732788, "learning_rate": 9.531602211450617e-06, "loss": 0.7673, "step": 3465 }, { "epoch": 0.4944365192582026, "grad_norm": 1.0745704174041748, "learning_rate": 9.531276747357094e-06, "loss": 0.711, "step": 3466 }, { "epoch": 0.49457917261055634, "grad_norm": 0.6707049608230591, "learning_rate": 9.530951175789777e-06, "loss": 0.7474, "step": 3467 }, { "epoch": 0.49472182596291014, "grad_norm": 0.8591291308403015, "learning_rate": 9.530625496756392e-06, "loss": 0.7541, "step": 3468 }, { "epoch": 0.4948644793152639, "grad_norm": 0.8556170463562012, "learning_rate": 9.530299710264658e-06, "loss": 0.8524, "step": 3469 }, { "epoch": 0.4950071326676177, "grad_norm": 0.8599547743797302, "learning_rate": 9.529973816322304e-06, "loss": 0.8055, "step": 3470 }, { "epoch": 0.49514978601997145, "grad_norm": 0.9279347658157349, "learning_rate": 9.529647814937063e-06, "loss": 0.8195, "step": 3471 }, { "epoch": 0.49529243937232525, "grad_norm": 0.9515174627304077, "learning_rate": 9.529321706116663e-06, "loss": 0.7372, "step": 3472 }, { "epoch": 0.495435092724679, "grad_norm": 0.949888288974762, "learning_rate": 9.52899548986884e-06, "loss": 0.7677, "step": 3473 }, { "epoch": 0.4955777460770328, "grad_norm": 1.099470615386963, "learning_rate": 9.528669166201331e-06, "loss": 0.8114, "step": 3474 }, { "epoch": 0.4957203994293866, "grad_norm": 1.121291160583496, "learning_rate": 9.528342735121878e-06, "loss": 0.7872, "step": 3475 }, { "epoch": 0.49586305278174037, "grad_norm": 0.6462776064872742, "learning_rate": 9.52801619663822e-06, "loss": 0.7725, "step": 3476 }, { "epoch": 0.49600570613409417, "grad_norm": 0.8585007190704346, "learning_rate": 9.527689550758105e-06, "loss": 0.813, "step": 3477 }, { "epoch": 0.4961483594864479, "grad_norm": 1.0574462413787842, "learning_rate": 9.527362797489278e-06, "loss": 0.6989, "step": 3478 }, { "epoch": 0.4962910128388017, "grad_norm": 0.8546985387802124, "learning_rate": 9.52703593683949e-06, "loss": 0.791, "step": 3479 }, { "epoch": 0.4964336661911555, "grad_norm": 0.8837389945983887, "learning_rate": 9.526708968816493e-06, "loss": 0.693, "step": 3480 }, { "epoch": 0.4965763195435093, "grad_norm": 0.866013765335083, "learning_rate": 9.526381893428042e-06, "loss": 0.5031, "step": 3481 }, { "epoch": 0.49671897289586303, "grad_norm": 0.9066146016120911, "learning_rate": 9.526054710681897e-06, "loss": 0.7991, "step": 3482 }, { "epoch": 0.49686162624821684, "grad_norm": 0.8528669476509094, "learning_rate": 9.525727420585817e-06, "loss": 0.7852, "step": 3483 }, { "epoch": 0.4970042796005706, "grad_norm": 1.0253227949142456, "learning_rate": 9.525400023147563e-06, "loss": 0.7319, "step": 3484 }, { "epoch": 0.4971469329529244, "grad_norm": 0.9645678400993347, "learning_rate": 9.525072518374902e-06, "loss": 0.6882, "step": 3485 }, { "epoch": 0.4972895863052782, "grad_norm": 0.9075619578361511, "learning_rate": 9.5247449062756e-06, "loss": 0.7304, "step": 3486 }, { "epoch": 0.49743223965763195, "grad_norm": 0.8408627510070801, "learning_rate": 9.524417186857431e-06, "loss": 0.7668, "step": 3487 }, { "epoch": 0.49757489300998575, "grad_norm": 0.7215490341186523, "learning_rate": 9.524089360128164e-06, "loss": 0.7671, "step": 3488 }, { "epoch": 0.4977175463623395, "grad_norm": 0.8745356798171997, "learning_rate": 9.523761426095576e-06, "loss": 0.7881, "step": 3489 }, { "epoch": 0.4978601997146933, "grad_norm": 0.7559221982955933, "learning_rate": 9.523433384767445e-06, "loss": 0.7475, "step": 3490 }, { "epoch": 0.49800285306704706, "grad_norm": 1.106828212738037, "learning_rate": 9.523105236151551e-06, "loss": 0.8218, "step": 3491 }, { "epoch": 0.49814550641940086, "grad_norm": 0.991148054599762, "learning_rate": 9.522776980255679e-06, "loss": 0.7087, "step": 3492 }, { "epoch": 0.4982881597717546, "grad_norm": 1.0358657836914062, "learning_rate": 9.522448617087614e-06, "loss": 0.7952, "step": 3493 }, { "epoch": 0.4984308131241084, "grad_norm": 0.9941891431808472, "learning_rate": 9.522120146655143e-06, "loss": 0.8071, "step": 3494 }, { "epoch": 0.4985734664764622, "grad_norm": 0.8804892897605896, "learning_rate": 9.521791568966057e-06, "loss": 0.7422, "step": 3495 }, { "epoch": 0.498716119828816, "grad_norm": 0.957059919834137, "learning_rate": 9.52146288402815e-06, "loss": 0.7304, "step": 3496 }, { "epoch": 0.4988587731811698, "grad_norm": 1.0978821516036987, "learning_rate": 9.521134091849216e-06, "loss": 0.7141, "step": 3497 }, { "epoch": 0.49900142653352353, "grad_norm": 0.6598830223083496, "learning_rate": 9.520805192437055e-06, "loss": 0.8085, "step": 3498 }, { "epoch": 0.49914407988587733, "grad_norm": 1.1140689849853516, "learning_rate": 9.52047618579947e-06, "loss": 0.8339, "step": 3499 }, { "epoch": 0.4992867332382311, "grad_norm": 0.9824712872505188, "learning_rate": 9.520147071944259e-06, "loss": 0.7018, "step": 3500 }, { "epoch": 0.4994293865905849, "grad_norm": 0.9506702423095703, "learning_rate": 9.519817850879232e-06, "loss": 0.8059, "step": 3501 }, { "epoch": 0.49957203994293864, "grad_norm": 0.8838631510734558, "learning_rate": 9.519488522612198e-06, "loss": 0.82, "step": 3502 }, { "epoch": 0.49971469329529244, "grad_norm": 0.8757986426353455, "learning_rate": 9.519159087150967e-06, "loss": 0.7687, "step": 3503 }, { "epoch": 0.4998573466476462, "grad_norm": 0.865254819393158, "learning_rate": 9.51882954450335e-06, "loss": 0.8303, "step": 3504 }, { "epoch": 0.5, "grad_norm": 0.8819034099578857, "learning_rate": 9.518499894677168e-06, "loss": 0.729, "step": 3505 }, { "epoch": 0.5001426533523537, "grad_norm": 0.8352157473564148, "learning_rate": 9.518170137680235e-06, "loss": 0.8139, "step": 3506 }, { "epoch": 0.5002853067047076, "grad_norm": 0.8177391886711121, "learning_rate": 9.517840273520376e-06, "loss": 0.7847, "step": 3507 }, { "epoch": 0.5004279600570614, "grad_norm": 0.8699647784233093, "learning_rate": 9.517510302205412e-06, "loss": 0.8454, "step": 3508 }, { "epoch": 0.5005706134094151, "grad_norm": 0.888346254825592, "learning_rate": 9.51718022374317e-06, "loss": 0.8272, "step": 3509 }, { "epoch": 0.5007132667617689, "grad_norm": 1.0211471319198608, "learning_rate": 9.51685003814148e-06, "loss": 0.7535, "step": 3510 }, { "epoch": 0.5008559201141227, "grad_norm": 0.8559572100639343, "learning_rate": 9.516519745408175e-06, "loss": 0.8029, "step": 3511 }, { "epoch": 0.5009985734664765, "grad_norm": 0.8854065537452698, "learning_rate": 9.516189345551086e-06, "loss": 0.732, "step": 3512 }, { "epoch": 0.5011412268188302, "grad_norm": 0.8586737513542175, "learning_rate": 9.515858838578049e-06, "loss": 0.7802, "step": 3513 }, { "epoch": 0.501283880171184, "grad_norm": 1.0144084692001343, "learning_rate": 9.515528224496903e-06, "loss": 0.719, "step": 3514 }, { "epoch": 0.5014265335235378, "grad_norm": 0.9962849617004395, "learning_rate": 9.515197503315493e-06, "loss": 0.7886, "step": 3515 }, { "epoch": 0.5015691868758916, "grad_norm": 0.7915270328521729, "learning_rate": 9.514866675041664e-06, "loss": 0.6798, "step": 3516 }, { "epoch": 0.5017118402282453, "grad_norm": 0.8684111833572388, "learning_rate": 9.514535739683255e-06, "loss": 0.7718, "step": 3517 }, { "epoch": 0.5018544935805992, "grad_norm": 0.8695011734962463, "learning_rate": 9.514204697248122e-06, "loss": 0.7509, "step": 3518 }, { "epoch": 0.5019971469329529, "grad_norm": 0.866762101650238, "learning_rate": 9.513873547744113e-06, "loss": 0.8408, "step": 3519 }, { "epoch": 0.5021398002853067, "grad_norm": 0.7866602540016174, "learning_rate": 9.513542291179085e-06, "loss": 0.7516, "step": 3520 }, { "epoch": 0.5022824536376604, "grad_norm": 0.6946073770523071, "learning_rate": 9.513210927560894e-06, "loss": 0.7936, "step": 3521 }, { "epoch": 0.5024251069900143, "grad_norm": 0.8390775322914124, "learning_rate": 9.5128794568974e-06, "loss": 0.7568, "step": 3522 }, { "epoch": 0.502567760342368, "grad_norm": 1.0733027458190918, "learning_rate": 9.512547879196461e-06, "loss": 0.8082, "step": 3523 }, { "epoch": 0.5027104136947218, "grad_norm": 1.0854607820510864, "learning_rate": 9.512216194465946e-06, "loss": 0.7777, "step": 3524 }, { "epoch": 0.5028530670470756, "grad_norm": 0.8890421390533447, "learning_rate": 9.511884402713721e-06, "loss": 0.7799, "step": 3525 }, { "epoch": 0.5029957203994294, "grad_norm": 0.7463951706886292, "learning_rate": 9.511552503947654e-06, "loss": 0.73, "step": 3526 }, { "epoch": 0.5031383737517832, "grad_norm": 1.0551022291183472, "learning_rate": 9.511220498175619e-06, "loss": 0.7731, "step": 3527 }, { "epoch": 0.5032810271041369, "grad_norm": 1.0738459825515747, "learning_rate": 9.510888385405488e-06, "loss": 0.708, "step": 3528 }, { "epoch": 0.5034236804564908, "grad_norm": 1.0715283155441284, "learning_rate": 9.510556165645141e-06, "loss": 0.7213, "step": 3529 }, { "epoch": 0.5035663338088445, "grad_norm": 1.0010348558425903, "learning_rate": 9.510223838902455e-06, "loss": 0.7985, "step": 3530 }, { "epoch": 0.5037089871611983, "grad_norm": 1.141632080078125, "learning_rate": 9.509891405185314e-06, "loss": 0.739, "step": 3531 }, { "epoch": 0.503851640513552, "grad_norm": 0.7817306518554688, "learning_rate": 9.509558864501602e-06, "loss": 0.7156, "step": 3532 }, { "epoch": 0.5039942938659059, "grad_norm": 0.8662512302398682, "learning_rate": 9.509226216859208e-06, "loss": 0.7869, "step": 3533 }, { "epoch": 0.5041369472182596, "grad_norm": 1.1924934387207031, "learning_rate": 9.508893462266017e-06, "loss": 0.792, "step": 3534 }, { "epoch": 0.5042796005706134, "grad_norm": 0.9089195728302002, "learning_rate": 9.508560600729928e-06, "loss": 0.7608, "step": 3535 }, { "epoch": 0.5044222539229671, "grad_norm": 0.7966616749763489, "learning_rate": 9.508227632258831e-06, "loss": 0.755, "step": 3536 }, { "epoch": 0.504564907275321, "grad_norm": 0.9615272283554077, "learning_rate": 9.507894556860625e-06, "loss": 0.7772, "step": 3537 }, { "epoch": 0.5047075606276747, "grad_norm": 0.8201649785041809, "learning_rate": 9.50756137454321e-06, "loss": 0.7975, "step": 3538 }, { "epoch": 0.5048502139800285, "grad_norm": 0.8268119692802429, "learning_rate": 9.507228085314488e-06, "loss": 0.7017, "step": 3539 }, { "epoch": 0.5049928673323824, "grad_norm": 0.7588708996772766, "learning_rate": 9.506894689182366e-06, "loss": 0.5902, "step": 3540 }, { "epoch": 0.5051355206847361, "grad_norm": 0.9902154207229614, "learning_rate": 9.506561186154748e-06, "loss": 0.8953, "step": 3541 }, { "epoch": 0.5052781740370899, "grad_norm": 1.1865127086639404, "learning_rate": 9.506227576239546e-06, "loss": 0.7981, "step": 3542 }, { "epoch": 0.5054208273894436, "grad_norm": 0.9655767679214478, "learning_rate": 9.505893859444674e-06, "loss": 0.8221, "step": 3543 }, { "epoch": 0.5055634807417975, "grad_norm": 0.9593498706817627, "learning_rate": 9.505560035778045e-06, "loss": 0.5432, "step": 3544 }, { "epoch": 0.5057061340941512, "grad_norm": 0.7647463083267212, "learning_rate": 9.505226105247579e-06, "loss": 0.5542, "step": 3545 }, { "epoch": 0.505848787446505, "grad_norm": 0.8450886607170105, "learning_rate": 9.504892067861194e-06, "loss": 0.8534, "step": 3546 }, { "epoch": 0.5059914407988588, "grad_norm": 0.8688698410987854, "learning_rate": 9.50455792362681e-06, "loss": 0.7664, "step": 3547 }, { "epoch": 0.5061340941512126, "grad_norm": 0.9295186996459961, "learning_rate": 9.504223672552362e-06, "loss": 0.7595, "step": 3548 }, { "epoch": 0.5062767475035663, "grad_norm": 0.8820104002952576, "learning_rate": 9.50388931464577e-06, "loss": 0.8455, "step": 3549 }, { "epoch": 0.5064194008559201, "grad_norm": 1.1296863555908203, "learning_rate": 9.503554849914962e-06, "loss": 0.8242, "step": 3550 }, { "epoch": 0.5065620542082739, "grad_norm": 0.9853180050849915, "learning_rate": 9.50322027836788e-06, "loss": 0.7073, "step": 3551 }, { "epoch": 0.5067047075606277, "grad_norm": 0.9048812389373779, "learning_rate": 9.50288560001245e-06, "loss": 0.7121, "step": 3552 }, { "epoch": 0.5068473609129814, "grad_norm": 1.1925512552261353, "learning_rate": 9.502550814856616e-06, "loss": 0.7647, "step": 3553 }, { "epoch": 0.5069900142653352, "grad_norm": 0.823168933391571, "learning_rate": 9.502215922908319e-06, "loss": 0.7714, "step": 3554 }, { "epoch": 0.507132667617689, "grad_norm": 0.7568991184234619, "learning_rate": 9.501880924175499e-06, "loss": 0.8182, "step": 3555 }, { "epoch": 0.5072753209700428, "grad_norm": 0.8724026083946228, "learning_rate": 9.501545818666102e-06, "loss": 0.8022, "step": 3556 }, { "epoch": 0.5074179743223965, "grad_norm": 0.9513503909111023, "learning_rate": 9.501210606388075e-06, "loss": 0.7868, "step": 3557 }, { "epoch": 0.5075606276747504, "grad_norm": 0.8349027037620544, "learning_rate": 9.500875287349374e-06, "loss": 0.7011, "step": 3558 }, { "epoch": 0.5077032810271042, "grad_norm": 0.7522615194320679, "learning_rate": 9.500539861557944e-06, "loss": 0.6727, "step": 3559 }, { "epoch": 0.5078459343794579, "grad_norm": 0.9592998027801514, "learning_rate": 9.50020432902175e-06, "loss": 0.739, "step": 3560 }, { "epoch": 0.5079885877318117, "grad_norm": 0.7955388426780701, "learning_rate": 9.499868689748742e-06, "loss": 0.7906, "step": 3561 }, { "epoch": 0.5081312410841655, "grad_norm": 0.8330178260803223, "learning_rate": 9.499532943746885e-06, "loss": 0.7143, "step": 3562 }, { "epoch": 0.5082738944365193, "grad_norm": 0.9068897366523743, "learning_rate": 9.49919709102414e-06, "loss": 0.7876, "step": 3563 }, { "epoch": 0.508416547788873, "grad_norm": 1.0191758871078491, "learning_rate": 9.498861131588476e-06, "loss": 0.6956, "step": 3564 }, { "epoch": 0.5085592011412268, "grad_norm": 0.9252503514289856, "learning_rate": 9.49852506544786e-06, "loss": 0.7659, "step": 3565 }, { "epoch": 0.5087018544935806, "grad_norm": 1.0246816873550415, "learning_rate": 9.498188892610261e-06, "loss": 0.6951, "step": 3566 }, { "epoch": 0.5088445078459344, "grad_norm": 0.9582048058509827, "learning_rate": 9.497852613083653e-06, "loss": 0.8055, "step": 3567 }, { "epoch": 0.5089871611982881, "grad_norm": 0.7913559675216675, "learning_rate": 9.497516226876015e-06, "loss": 0.774, "step": 3568 }, { "epoch": 0.509129814550642, "grad_norm": 0.9547677636146545, "learning_rate": 9.497179733995323e-06, "loss": 0.7667, "step": 3569 }, { "epoch": 0.5092724679029957, "grad_norm": 0.9399577975273132, "learning_rate": 9.496843134449558e-06, "loss": 0.7585, "step": 3570 }, { "epoch": 0.5094151212553495, "grad_norm": 1.0156217813491821, "learning_rate": 9.496506428246703e-06, "loss": 0.6632, "step": 3571 }, { "epoch": 0.5095577746077032, "grad_norm": 0.8402301073074341, "learning_rate": 9.496169615394746e-06, "loss": 0.7184, "step": 3572 }, { "epoch": 0.5097004279600571, "grad_norm": 0.8508307337760925, "learning_rate": 9.495832695901675e-06, "loss": 0.7316, "step": 3573 }, { "epoch": 0.5098430813124109, "grad_norm": 0.780446469783783, "learning_rate": 9.495495669775481e-06, "loss": 0.7526, "step": 3574 }, { "epoch": 0.5099857346647646, "grad_norm": 0.9137591123580933, "learning_rate": 9.495158537024156e-06, "loss": 0.7703, "step": 3575 }, { "epoch": 0.5101283880171184, "grad_norm": 1.0749053955078125, "learning_rate": 9.494821297655699e-06, "loss": 0.7643, "step": 3576 }, { "epoch": 0.5102710413694722, "grad_norm": 0.8332220911979675, "learning_rate": 9.494483951678105e-06, "loss": 0.8825, "step": 3577 }, { "epoch": 0.510413694721826, "grad_norm": 0.8721501231193542, "learning_rate": 9.494146499099379e-06, "loss": 0.8774, "step": 3578 }, { "epoch": 0.5105563480741797, "grad_norm": 1.0534876585006714, "learning_rate": 9.493808939927522e-06, "loss": 0.7787, "step": 3579 }, { "epoch": 0.5106990014265336, "grad_norm": 1.0937044620513916, "learning_rate": 9.493471274170542e-06, "loss": 0.7382, "step": 3580 }, { "epoch": 0.5108416547788873, "grad_norm": 0.758887767791748, "learning_rate": 9.49313350183645e-06, "loss": 0.7667, "step": 3581 }, { "epoch": 0.5109843081312411, "grad_norm": 0.8439927697181702, "learning_rate": 9.492795622933252e-06, "loss": 0.777, "step": 3582 }, { "epoch": 0.5111269614835948, "grad_norm": 0.9568290710449219, "learning_rate": 9.492457637468966e-06, "loss": 0.7759, "step": 3583 }, { "epoch": 0.5112696148359487, "grad_norm": 1.1437251567840576, "learning_rate": 9.492119545451605e-06, "loss": 0.7935, "step": 3584 }, { "epoch": 0.5114122681883024, "grad_norm": 1.3443694114685059, "learning_rate": 9.491781346889192e-06, "loss": 0.774, "step": 3585 }, { "epoch": 0.5115549215406562, "grad_norm": 0.8938597440719604, "learning_rate": 9.491443041789746e-06, "loss": 0.7622, "step": 3586 }, { "epoch": 0.5116975748930099, "grad_norm": 1.1223803758621216, "learning_rate": 9.49110463016129e-06, "loss": 0.5727, "step": 3587 }, { "epoch": 0.5118402282453638, "grad_norm": 0.9260916113853455, "learning_rate": 9.490766112011853e-06, "loss": 0.778, "step": 3588 }, { "epoch": 0.5119828815977175, "grad_norm": 0.8162046670913696, "learning_rate": 9.490427487349465e-06, "loss": 0.7675, "step": 3589 }, { "epoch": 0.5121255349500713, "grad_norm": 0.7428349852561951, "learning_rate": 9.490088756182153e-06, "loss": 0.7292, "step": 3590 }, { "epoch": 0.5122681883024252, "grad_norm": 0.9647871255874634, "learning_rate": 9.489749918517955e-06, "loss": 0.7386, "step": 3591 }, { "epoch": 0.5124108416547789, "grad_norm": 0.8613197803497314, "learning_rate": 9.489410974364905e-06, "loss": 0.8379, "step": 3592 }, { "epoch": 0.5125534950071327, "grad_norm": 0.9482429027557373, "learning_rate": 9.489071923731043e-06, "loss": 0.7613, "step": 3593 }, { "epoch": 0.5126961483594864, "grad_norm": 1.1268008947372437, "learning_rate": 9.488732766624413e-06, "loss": 0.8024, "step": 3594 }, { "epoch": 0.5128388017118403, "grad_norm": 0.9092463254928589, "learning_rate": 9.488393503053057e-06, "loss": 0.7992, "step": 3595 }, { "epoch": 0.512981455064194, "grad_norm": 0.8599215149879456, "learning_rate": 9.488054133025022e-06, "loss": 0.7407, "step": 3596 }, { "epoch": 0.5131241084165478, "grad_norm": 0.8078734874725342, "learning_rate": 9.487714656548357e-06, "loss": 0.7776, "step": 3597 }, { "epoch": 0.5132667617689015, "grad_norm": 0.9748033881187439, "learning_rate": 9.487375073631114e-06, "loss": 0.801, "step": 3598 }, { "epoch": 0.5134094151212554, "grad_norm": 1.144802212715149, "learning_rate": 9.487035384281348e-06, "loss": 0.7511, "step": 3599 }, { "epoch": 0.5135520684736091, "grad_norm": 0.9737942218780518, "learning_rate": 9.486695588507114e-06, "loss": 0.7913, "step": 3600 }, { "epoch": 0.5136947218259629, "grad_norm": 0.8674310445785522, "learning_rate": 9.486355686316473e-06, "loss": 0.7976, "step": 3601 }, { "epoch": 0.5138373751783167, "grad_norm": 0.994288980960846, "learning_rate": 9.486015677717488e-06, "loss": 0.8288, "step": 3602 }, { "epoch": 0.5139800285306705, "grad_norm": 0.9822586178779602, "learning_rate": 9.48567556271822e-06, "loss": 0.5385, "step": 3603 }, { "epoch": 0.5141226818830242, "grad_norm": 0.8938551545143127, "learning_rate": 9.485335341326739e-06, "loss": 0.8137, "step": 3604 }, { "epoch": 0.514265335235378, "grad_norm": 0.8361135721206665, "learning_rate": 9.484995013551112e-06, "loss": 0.7933, "step": 3605 }, { "epoch": 0.5144079885877318, "grad_norm": 0.9457839727401733, "learning_rate": 9.484654579399411e-06, "loss": 0.7884, "step": 3606 }, { "epoch": 0.5145506419400856, "grad_norm": 0.9015101194381714, "learning_rate": 9.484314038879713e-06, "loss": 0.7497, "step": 3607 }, { "epoch": 0.5146932952924393, "grad_norm": 0.8200323581695557, "learning_rate": 9.483973392000094e-06, "loss": 0.8567, "step": 3608 }, { "epoch": 0.5148359486447932, "grad_norm": 0.7484765648841858, "learning_rate": 9.483632638768634e-06, "loss": 0.7913, "step": 3609 }, { "epoch": 0.514978601997147, "grad_norm": 0.9521093368530273, "learning_rate": 9.483291779193413e-06, "loss": 0.6828, "step": 3610 }, { "epoch": 0.5151212553495007, "grad_norm": 1.0979682207107544, "learning_rate": 9.482950813282518e-06, "loss": 0.7313, "step": 3611 }, { "epoch": 0.5152639087018545, "grad_norm": 0.8850939869880676, "learning_rate": 9.482609741044034e-06, "loss": 0.7149, "step": 3612 }, { "epoch": 0.5154065620542083, "grad_norm": 1.010465383529663, "learning_rate": 9.48226856248605e-06, "loss": 0.7398, "step": 3613 }, { "epoch": 0.5155492154065621, "grad_norm": 0.921806812286377, "learning_rate": 9.481927277616663e-06, "loss": 0.6896, "step": 3614 }, { "epoch": 0.5156918687589158, "grad_norm": 0.9806258678436279, "learning_rate": 9.481585886443962e-06, "loss": 0.7171, "step": 3615 }, { "epoch": 0.5158345221112696, "grad_norm": 0.9127873182296753, "learning_rate": 9.481244388976048e-06, "loss": 0.7802, "step": 3616 }, { "epoch": 0.5159771754636234, "grad_norm": 1.0865702629089355, "learning_rate": 9.480902785221018e-06, "loss": 0.7494, "step": 3617 }, { "epoch": 0.5161198288159772, "grad_norm": 1.1731276512145996, "learning_rate": 9.480561075186976e-06, "loss": 0.8649, "step": 3618 }, { "epoch": 0.5162624821683309, "grad_norm": 0.8446781635284424, "learning_rate": 9.480219258882027e-06, "loss": 0.7662, "step": 3619 }, { "epoch": 0.5164051355206848, "grad_norm": 0.8926602005958557, "learning_rate": 9.479877336314277e-06, "loss": 0.8907, "step": 3620 }, { "epoch": 0.5165477888730385, "grad_norm": 0.9072000980377197, "learning_rate": 9.479535307491836e-06, "loss": 0.7547, "step": 3621 }, { "epoch": 0.5166904422253923, "grad_norm": 0.8798696994781494, "learning_rate": 9.479193172422818e-06, "loss": 0.7849, "step": 3622 }, { "epoch": 0.516833095577746, "grad_norm": 1.029890537261963, "learning_rate": 9.478850931115335e-06, "loss": 0.7595, "step": 3623 }, { "epoch": 0.5169757489300999, "grad_norm": 0.9974260926246643, "learning_rate": 9.478508583577506e-06, "loss": 0.7973, "step": 3624 }, { "epoch": 0.5171184022824536, "grad_norm": 0.672558069229126, "learning_rate": 9.478166129817449e-06, "loss": 0.7986, "step": 3625 }, { "epoch": 0.5172610556348074, "grad_norm": 0.910285472869873, "learning_rate": 9.477823569843291e-06, "loss": 0.7627, "step": 3626 }, { "epoch": 0.5174037089871611, "grad_norm": 0.7758559584617615, "learning_rate": 9.477480903663152e-06, "loss": 0.7544, "step": 3627 }, { "epoch": 0.517546362339515, "grad_norm": 1.2214888334274292, "learning_rate": 9.477138131285164e-06, "loss": 0.8018, "step": 3628 }, { "epoch": 0.5176890156918688, "grad_norm": 0.8100362420082092, "learning_rate": 9.476795252717452e-06, "loss": 0.7418, "step": 3629 }, { "epoch": 0.5178316690442225, "grad_norm": 0.8331372141838074, "learning_rate": 9.476452267968151e-06, "loss": 0.7488, "step": 3630 }, { "epoch": 0.5179743223965764, "grad_norm": 0.8265190720558167, "learning_rate": 9.476109177045395e-06, "loss": 0.8099, "step": 3631 }, { "epoch": 0.5181169757489301, "grad_norm": 0.8415448069572449, "learning_rate": 9.475765979957325e-06, "loss": 0.7815, "step": 3632 }, { "epoch": 0.5182596291012839, "grad_norm": 0.7178728580474854, "learning_rate": 9.475422676712076e-06, "loss": 0.8397, "step": 3633 }, { "epoch": 0.5184022824536376, "grad_norm": 0.9677311778068542, "learning_rate": 9.475079267317795e-06, "loss": 0.7822, "step": 3634 }, { "epoch": 0.5185449358059915, "grad_norm": 1.090571641921997, "learning_rate": 9.474735751782624e-06, "loss": 0.788, "step": 3635 }, { "epoch": 0.5186875891583452, "grad_norm": 0.9308180212974548, "learning_rate": 9.474392130114711e-06, "loss": 0.7907, "step": 3636 }, { "epoch": 0.518830242510699, "grad_norm": 0.8341872692108154, "learning_rate": 9.474048402322207e-06, "loss": 0.6871, "step": 3637 }, { "epoch": 0.5189728958630527, "grad_norm": 0.9667650461196899, "learning_rate": 9.473704568413265e-06, "loss": 0.7448, "step": 3638 }, { "epoch": 0.5191155492154066, "grad_norm": 0.9378452897071838, "learning_rate": 9.473360628396038e-06, "loss": 0.814, "step": 3639 }, { "epoch": 0.5192582025677603, "grad_norm": 0.9220533967018127, "learning_rate": 9.473016582278687e-06, "loss": 0.7933, "step": 3640 }, { "epoch": 0.5194008559201141, "grad_norm": 0.6695970892906189, "learning_rate": 9.472672430069368e-06, "loss": 0.8177, "step": 3641 }, { "epoch": 0.519543509272468, "grad_norm": 1.0739068984985352, "learning_rate": 9.472328171776246e-06, "loss": 0.8237, "step": 3642 }, { "epoch": 0.5196861626248217, "grad_norm": 0.6684971451759338, "learning_rate": 9.471983807407488e-06, "loss": 0.8188, "step": 3643 }, { "epoch": 0.5198288159771755, "grad_norm": 0.7271811366081238, "learning_rate": 9.471639336971258e-06, "loss": 0.8048, "step": 3644 }, { "epoch": 0.5199714693295292, "grad_norm": 0.9835609197616577, "learning_rate": 9.471294760475728e-06, "loss": 0.8445, "step": 3645 }, { "epoch": 0.5201141226818831, "grad_norm": 0.6809604167938232, "learning_rate": 9.47095007792907e-06, "loss": 0.8051, "step": 3646 }, { "epoch": 0.5202567760342368, "grad_norm": 1.2839874029159546, "learning_rate": 9.470605289339462e-06, "loss": 0.5462, "step": 3647 }, { "epoch": 0.5203994293865906, "grad_norm": 0.7694956064224243, "learning_rate": 9.470260394715079e-06, "loss": 0.7472, "step": 3648 }, { "epoch": 0.5205420827389443, "grad_norm": 0.8619051575660706, "learning_rate": 9.469915394064101e-06, "loss": 0.7096, "step": 3649 }, { "epoch": 0.5206847360912982, "grad_norm": 1.0383868217468262, "learning_rate": 9.469570287394713e-06, "loss": 0.7074, "step": 3650 }, { "epoch": 0.5208273894436519, "grad_norm": 0.8686227798461914, "learning_rate": 9.469225074715098e-06, "loss": 0.7352, "step": 3651 }, { "epoch": 0.5209700427960057, "grad_norm": 0.8512762784957886, "learning_rate": 9.468879756033446e-06, "loss": 0.737, "step": 3652 }, { "epoch": 0.5211126961483595, "grad_norm": 0.7683886885643005, "learning_rate": 9.468534331357946e-06, "loss": 0.773, "step": 3653 }, { "epoch": 0.5212553495007133, "grad_norm": 1.0536611080169678, "learning_rate": 9.46818880069679e-06, "loss": 0.7079, "step": 3654 }, { "epoch": 0.521398002853067, "grad_norm": 0.8594650626182556, "learning_rate": 9.467843164058176e-06, "loss": 0.7836, "step": 3655 }, { "epoch": 0.5215406562054208, "grad_norm": 0.8585334420204163, "learning_rate": 9.4674974214503e-06, "loss": 0.7949, "step": 3656 }, { "epoch": 0.5216833095577746, "grad_norm": 0.9271411299705505, "learning_rate": 9.467151572881363e-06, "loss": 0.8577, "step": 3657 }, { "epoch": 0.5218259629101284, "grad_norm": 0.7891442179679871, "learning_rate": 9.466805618359567e-06, "loss": 0.7561, "step": 3658 }, { "epoch": 0.5219686162624821, "grad_norm": 0.9075076580047607, "learning_rate": 9.466459557893119e-06, "loss": 0.8193, "step": 3659 }, { "epoch": 0.5221112696148359, "grad_norm": 1.1432865858078003, "learning_rate": 9.466113391490225e-06, "loss": 0.7082, "step": 3660 }, { "epoch": 0.5222539229671898, "grad_norm": 0.8940004110336304, "learning_rate": 9.465767119159097e-06, "loss": 0.7857, "step": 3661 }, { "epoch": 0.5223965763195435, "grad_norm": 1.145477056503296, "learning_rate": 9.465420740907948e-06, "loss": 0.7813, "step": 3662 }, { "epoch": 0.5225392296718973, "grad_norm": 0.771138072013855, "learning_rate": 9.465074256744993e-06, "loss": 0.7888, "step": 3663 }, { "epoch": 0.5226818830242511, "grad_norm": 1.0088539123535156, "learning_rate": 9.464727666678448e-06, "loss": 0.732, "step": 3664 }, { "epoch": 0.5228245363766049, "grad_norm": 0.7582293748855591, "learning_rate": 9.464380970716538e-06, "loss": 0.8644, "step": 3665 }, { "epoch": 0.5229671897289586, "grad_norm": 1.0350309610366821, "learning_rate": 9.464034168867483e-06, "loss": 0.72, "step": 3666 }, { "epoch": 0.5231098430813124, "grad_norm": 0.9733157157897949, "learning_rate": 9.463687261139507e-06, "loss": 0.8307, "step": 3667 }, { "epoch": 0.5232524964336662, "grad_norm": 1.0166035890579224, "learning_rate": 9.463340247540841e-06, "loss": 0.7685, "step": 3668 }, { "epoch": 0.52339514978602, "grad_norm": 1.02400803565979, "learning_rate": 9.462993128079716e-06, "loss": 0.8393, "step": 3669 }, { "epoch": 0.5235378031383737, "grad_norm": 0.7539787888526917, "learning_rate": 9.462645902764362e-06, "loss": 0.766, "step": 3670 }, { "epoch": 0.5236804564907276, "grad_norm": 0.8282262086868286, "learning_rate": 9.462298571603017e-06, "loss": 0.7855, "step": 3671 }, { "epoch": 0.5238231098430813, "grad_norm": 0.8732316493988037, "learning_rate": 9.461951134603918e-06, "loss": 0.792, "step": 3672 }, { "epoch": 0.5239657631954351, "grad_norm": 0.9385262131690979, "learning_rate": 9.461603591775304e-06, "loss": 0.7031, "step": 3673 }, { "epoch": 0.5241084165477888, "grad_norm": 0.8600476384162903, "learning_rate": 9.461255943125422e-06, "loss": 0.7815, "step": 3674 }, { "epoch": 0.5242510699001427, "grad_norm": 0.8772199749946594, "learning_rate": 9.460908188662516e-06, "loss": 0.6744, "step": 3675 }, { "epoch": 0.5243937232524964, "grad_norm": 0.7366943955421448, "learning_rate": 9.460560328394832e-06, "loss": 0.7759, "step": 3676 }, { "epoch": 0.5245363766048502, "grad_norm": 1.5101613998413086, "learning_rate": 9.460212362330623e-06, "loss": 0.8221, "step": 3677 }, { "epoch": 0.524679029957204, "grad_norm": 1.0198205709457397, "learning_rate": 9.45986429047814e-06, "loss": 0.7409, "step": 3678 }, { "epoch": 0.5248216833095578, "grad_norm": 0.8249444365501404, "learning_rate": 9.459516112845643e-06, "loss": 0.8117, "step": 3679 }, { "epoch": 0.5249643366619116, "grad_norm": 0.9865692257881165, "learning_rate": 9.459167829441385e-06, "loss": 0.7935, "step": 3680 }, { "epoch": 0.5251069900142653, "grad_norm": 0.984447181224823, "learning_rate": 9.45881944027363e-06, "loss": 0.7764, "step": 3681 }, { "epoch": 0.5252496433666192, "grad_norm": 0.8019534349441528, "learning_rate": 9.45847094535064e-06, "loss": 0.8343, "step": 3682 }, { "epoch": 0.5253922967189729, "grad_norm": 0.9061942100524902, "learning_rate": 9.45812234468068e-06, "loss": 0.791, "step": 3683 }, { "epoch": 0.5255349500713267, "grad_norm": 1.1901942491531372, "learning_rate": 9.457773638272018e-06, "loss": 0.7438, "step": 3684 }, { "epoch": 0.5256776034236804, "grad_norm": 0.6756222248077393, "learning_rate": 9.457424826132925e-06, "loss": 0.7347, "step": 3685 }, { "epoch": 0.5258202567760343, "grad_norm": 0.8619942665100098, "learning_rate": 9.457075908271677e-06, "loss": 0.8252, "step": 3686 }, { "epoch": 0.525962910128388, "grad_norm": 0.8412014245986938, "learning_rate": 9.456726884696548e-06, "loss": 0.7471, "step": 3687 }, { "epoch": 0.5261055634807418, "grad_norm": 0.8969194293022156, "learning_rate": 9.456377755415814e-06, "loss": 0.7682, "step": 3688 }, { "epoch": 0.5262482168330955, "grad_norm": 0.9360155463218689, "learning_rate": 9.456028520437758e-06, "loss": 0.7758, "step": 3689 }, { "epoch": 0.5263908701854494, "grad_norm": 0.9619754552841187, "learning_rate": 9.455679179770662e-06, "loss": 0.7107, "step": 3690 }, { "epoch": 0.5265335235378031, "grad_norm": 1.0945255756378174, "learning_rate": 9.455329733422814e-06, "loss": 0.7382, "step": 3691 }, { "epoch": 0.5266761768901569, "grad_norm": 0.9404793977737427, "learning_rate": 9.454980181402499e-06, "loss": 0.7799, "step": 3692 }, { "epoch": 0.5268188302425107, "grad_norm": 1.1391710042953491, "learning_rate": 9.45463052371801e-06, "loss": 0.7617, "step": 3693 }, { "epoch": 0.5269614835948645, "grad_norm": 1.1695657968521118, "learning_rate": 9.45428076037764e-06, "loss": 0.7959, "step": 3694 }, { "epoch": 0.5271041369472182, "grad_norm": 0.9902835488319397, "learning_rate": 9.453930891389684e-06, "loss": 0.6934, "step": 3695 }, { "epoch": 0.527246790299572, "grad_norm": 1.1658791303634644, "learning_rate": 9.45358091676244e-06, "loss": 0.7592, "step": 3696 }, { "epoch": 0.5273894436519259, "grad_norm": 0.8045031428337097, "learning_rate": 9.45323083650421e-06, "loss": 0.8133, "step": 3697 }, { "epoch": 0.5275320970042796, "grad_norm": 0.930515706539154, "learning_rate": 9.452880650623297e-06, "loss": 0.7203, "step": 3698 }, { "epoch": 0.5276747503566334, "grad_norm": 1.0128291845321655, "learning_rate": 9.452530359128007e-06, "loss": 0.7574, "step": 3699 }, { "epoch": 0.5278174037089871, "grad_norm": 0.7646418809890747, "learning_rate": 9.452179962026647e-06, "loss": 0.7076, "step": 3700 }, { "epoch": 0.527960057061341, "grad_norm": 0.9137625098228455, "learning_rate": 9.451829459327528e-06, "loss": 0.5396, "step": 3701 }, { "epoch": 0.5281027104136947, "grad_norm": 0.9639657139778137, "learning_rate": 9.451478851038965e-06, "loss": 0.7742, "step": 3702 }, { "epoch": 0.5282453637660485, "grad_norm": 1.0482797622680664, "learning_rate": 9.451128137169273e-06, "loss": 0.7571, "step": 3703 }, { "epoch": 0.5283880171184023, "grad_norm": 1.0665918588638306, "learning_rate": 9.450777317726768e-06, "loss": 0.5858, "step": 3704 }, { "epoch": 0.5285306704707561, "grad_norm": 0.9101869463920593, "learning_rate": 9.450426392719773e-06, "loss": 0.7022, "step": 3705 }, { "epoch": 0.5286733238231098, "grad_norm": 0.9322646856307983, "learning_rate": 9.450075362156614e-06, "loss": 0.7925, "step": 3706 }, { "epoch": 0.5288159771754636, "grad_norm": 0.9340315461158752, "learning_rate": 9.449724226045612e-06, "loss": 0.6898, "step": 3707 }, { "epoch": 0.5289586305278174, "grad_norm": 0.9280468821525574, "learning_rate": 9.449372984395096e-06, "loss": 0.799, "step": 3708 }, { "epoch": 0.5291012838801712, "grad_norm": 0.8312258124351501, "learning_rate": 9.4490216372134e-06, "loss": 0.7917, "step": 3709 }, { "epoch": 0.5292439372325249, "grad_norm": 0.7946302890777588, "learning_rate": 9.448670184508854e-06, "loss": 0.8267, "step": 3710 }, { "epoch": 0.5293865905848787, "grad_norm": 0.8247292637825012, "learning_rate": 9.448318626289796e-06, "loss": 0.865, "step": 3711 }, { "epoch": 0.5295292439372326, "grad_norm": 0.7541313171386719, "learning_rate": 9.447966962564564e-06, "loss": 0.7116, "step": 3712 }, { "epoch": 0.5296718972895863, "grad_norm": 0.9139978289604187, "learning_rate": 9.447615193341498e-06, "loss": 0.7635, "step": 3713 }, { "epoch": 0.52981455064194, "grad_norm": 0.7234423160552979, "learning_rate": 9.447263318628942e-06, "loss": 0.5395, "step": 3714 }, { "epoch": 0.5299572039942939, "grad_norm": 0.8707560300827026, "learning_rate": 9.44691133843524e-06, "loss": 0.6405, "step": 3715 }, { "epoch": 0.5300998573466477, "grad_norm": 0.6593236327171326, "learning_rate": 9.446559252768745e-06, "loss": 0.7982, "step": 3716 }, { "epoch": 0.5302425106990014, "grad_norm": 0.9893619418144226, "learning_rate": 9.446207061637802e-06, "loss": 0.8119, "step": 3717 }, { "epoch": 0.5303851640513552, "grad_norm": 0.8062154650688171, "learning_rate": 9.445854765050768e-06, "loss": 0.7776, "step": 3718 }, { "epoch": 0.530527817403709, "grad_norm": 0.7974979877471924, "learning_rate": 9.445502363015999e-06, "loss": 0.8263, "step": 3719 }, { "epoch": 0.5306704707560628, "grad_norm": 0.8471797704696655, "learning_rate": 9.44514985554185e-06, "loss": 0.7055, "step": 3720 }, { "epoch": 0.5308131241084165, "grad_norm": 0.9826592206954956, "learning_rate": 9.444797242636686e-06, "loss": 0.7937, "step": 3721 }, { "epoch": 0.5309557774607703, "grad_norm": 0.8173101544380188, "learning_rate": 9.444444524308869e-06, "loss": 0.7614, "step": 3722 }, { "epoch": 0.5310984308131241, "grad_norm": 0.8010444641113281, "learning_rate": 9.444091700566761e-06, "loss": 0.7716, "step": 3723 }, { "epoch": 0.5312410841654779, "grad_norm": 0.9752053022384644, "learning_rate": 9.443738771418738e-06, "loss": 0.7182, "step": 3724 }, { "epoch": 0.5313837375178316, "grad_norm": 0.9180756211280823, "learning_rate": 9.443385736873166e-06, "loss": 0.7389, "step": 3725 }, { "epoch": 0.5315263908701855, "grad_norm": 0.9787564873695374, "learning_rate": 9.443032596938416e-06, "loss": 0.7067, "step": 3726 }, { "epoch": 0.5316690442225392, "grad_norm": 1.2496058940887451, "learning_rate": 9.442679351622868e-06, "loss": 0.7819, "step": 3727 }, { "epoch": 0.531811697574893, "grad_norm": 1.0999810695648193, "learning_rate": 9.442326000934901e-06, "loss": 0.6903, "step": 3728 }, { "epoch": 0.5319543509272467, "grad_norm": 0.9185510277748108, "learning_rate": 9.441972544882892e-06, "loss": 0.7864, "step": 3729 }, { "epoch": 0.5320970042796006, "grad_norm": 1.1549924612045288, "learning_rate": 9.441618983475226e-06, "loss": 0.7518, "step": 3730 }, { "epoch": 0.5322396576319544, "grad_norm": 0.7317920923233032, "learning_rate": 9.44126531672029e-06, "loss": 0.7407, "step": 3731 }, { "epoch": 0.5323823109843081, "grad_norm": 0.9729320406913757, "learning_rate": 9.44091154462647e-06, "loss": 0.8163, "step": 3732 }, { "epoch": 0.532524964336662, "grad_norm": 0.8830687999725342, "learning_rate": 9.440557667202159e-06, "loss": 0.7556, "step": 3733 }, { "epoch": 0.5326676176890157, "grad_norm": 1.000052809715271, "learning_rate": 9.44020368445575e-06, "loss": 0.5106, "step": 3734 }, { "epoch": 0.5328102710413695, "grad_norm": 0.710538923740387, "learning_rate": 9.43984959639564e-06, "loss": 0.7229, "step": 3735 }, { "epoch": 0.5329529243937232, "grad_norm": 1.032488465309143, "learning_rate": 9.439495403030224e-06, "loss": 0.7088, "step": 3736 }, { "epoch": 0.5330955777460771, "grad_norm": 1.1073076725006104, "learning_rate": 9.439141104367906e-06, "loss": 0.7845, "step": 3737 }, { "epoch": 0.5332382310984308, "grad_norm": 1.2522554397583008, "learning_rate": 9.438786700417086e-06, "loss": 0.8004, "step": 3738 }, { "epoch": 0.5333808844507846, "grad_norm": 0.892382800579071, "learning_rate": 9.438432191186173e-06, "loss": 0.7946, "step": 3739 }, { "epoch": 0.5335235378031383, "grad_norm": 0.9575504660606384, "learning_rate": 9.438077576683575e-06, "loss": 0.7192, "step": 3740 }, { "epoch": 0.5336661911554922, "grad_norm": 1.0557219982147217, "learning_rate": 9.437722856917701e-06, "loss": 0.7357, "step": 3741 }, { "epoch": 0.5338088445078459, "grad_norm": 0.9808925986289978, "learning_rate": 9.437368031896965e-06, "loss": 0.8335, "step": 3742 }, { "epoch": 0.5339514978601997, "grad_norm": 0.8837986588478088, "learning_rate": 9.437013101629785e-06, "loss": 0.7742, "step": 3743 }, { "epoch": 0.5340941512125535, "grad_norm": 0.9830921292304993, "learning_rate": 9.436658066124575e-06, "loss": 0.8071, "step": 3744 }, { "epoch": 0.5342368045649073, "grad_norm": 0.9636303186416626, "learning_rate": 9.43630292538976e-06, "loss": 0.7329, "step": 3745 }, { "epoch": 0.534379457917261, "grad_norm": 0.7886168956756592, "learning_rate": 9.435947679433761e-06, "loss": 0.7752, "step": 3746 }, { "epoch": 0.5345221112696148, "grad_norm": 1.0022714138031006, "learning_rate": 9.435592328265003e-06, "loss": 0.7812, "step": 3747 }, { "epoch": 0.5346647646219687, "grad_norm": 1.0230801105499268, "learning_rate": 9.435236871891917e-06, "loss": 0.8012, "step": 3748 }, { "epoch": 0.5348074179743224, "grad_norm": 0.7865760326385498, "learning_rate": 9.434881310322932e-06, "loss": 0.7669, "step": 3749 }, { "epoch": 0.5349500713266762, "grad_norm": 0.8765584826469421, "learning_rate": 9.434525643566481e-06, "loss": 0.8231, "step": 3750 }, { "epoch": 0.5350927246790299, "grad_norm": 1.0778067111968994, "learning_rate": 9.434169871631003e-06, "loss": 0.7504, "step": 3751 }, { "epoch": 0.5352353780313838, "grad_norm": 0.9879114627838135, "learning_rate": 9.433813994524933e-06, "loss": 0.7609, "step": 3752 }, { "epoch": 0.5353780313837375, "grad_norm": 0.7823513746261597, "learning_rate": 9.43345801225671e-06, "loss": 0.8272, "step": 3753 }, { "epoch": 0.5355206847360913, "grad_norm": 0.9532626271247864, "learning_rate": 9.433101924834782e-06, "loss": 0.74, "step": 3754 }, { "epoch": 0.5356633380884451, "grad_norm": 0.7085595726966858, "learning_rate": 9.432745732267593e-06, "loss": 0.7988, "step": 3755 }, { "epoch": 0.5358059914407989, "grad_norm": 0.8867372870445251, "learning_rate": 9.432389434563589e-06, "loss": 0.7765, "step": 3756 }, { "epoch": 0.5359486447931526, "grad_norm": 0.8110182285308838, "learning_rate": 9.432033031731222e-06, "loss": 0.7357, "step": 3757 }, { "epoch": 0.5360912981455064, "grad_norm": 0.8507335782051086, "learning_rate": 9.431676523778948e-06, "loss": 0.75, "step": 3758 }, { "epoch": 0.5362339514978602, "grad_norm": 0.9301802515983582, "learning_rate": 9.43131991071522e-06, "loss": 0.7358, "step": 3759 }, { "epoch": 0.536376604850214, "grad_norm": 0.8076273798942566, "learning_rate": 9.430963192548497e-06, "loss": 0.7378, "step": 3760 }, { "epoch": 0.5365192582025677, "grad_norm": 0.9314234852790833, "learning_rate": 9.430606369287238e-06, "loss": 0.7996, "step": 3761 }, { "epoch": 0.5366619115549215, "grad_norm": 0.9800743460655212, "learning_rate": 9.43024944093991e-06, "loss": 0.759, "step": 3762 }, { "epoch": 0.5368045649072754, "grad_norm": 0.8995145559310913, "learning_rate": 9.429892407514973e-06, "loss": 0.753, "step": 3763 }, { "epoch": 0.5369472182596291, "grad_norm": 0.880490779876709, "learning_rate": 9.4295352690209e-06, "loss": 0.7698, "step": 3764 }, { "epoch": 0.5370898716119828, "grad_norm": 0.9529716968536377, "learning_rate": 9.429178025466162e-06, "loss": 0.7406, "step": 3765 }, { "epoch": 0.5372325249643367, "grad_norm": 0.8968760371208191, "learning_rate": 9.428820676859229e-06, "loss": 0.7051, "step": 3766 }, { "epoch": 0.5373751783166905, "grad_norm": 0.8870418667793274, "learning_rate": 9.428463223208579e-06, "loss": 0.7241, "step": 3767 }, { "epoch": 0.5375178316690442, "grad_norm": 0.8251546621322632, "learning_rate": 9.428105664522688e-06, "loss": 0.7874, "step": 3768 }, { "epoch": 0.537660485021398, "grad_norm": 0.9303982853889465, "learning_rate": 9.427748000810037e-06, "loss": 0.7694, "step": 3769 }, { "epoch": 0.5378031383737518, "grad_norm": 0.8409507274627686, "learning_rate": 9.427390232079113e-06, "loss": 0.5782, "step": 3770 }, { "epoch": 0.5379457917261056, "grad_norm": 0.9384737610816956, "learning_rate": 9.427032358338397e-06, "loss": 0.6343, "step": 3771 }, { "epoch": 0.5380884450784593, "grad_norm": 0.7768946290016174, "learning_rate": 9.426674379596378e-06, "loss": 0.5803, "step": 3772 }, { "epoch": 0.5382310984308131, "grad_norm": 0.9682494401931763, "learning_rate": 9.426316295861551e-06, "loss": 0.7583, "step": 3773 }, { "epoch": 0.5383737517831669, "grad_norm": 1.0426535606384277, "learning_rate": 9.425958107142403e-06, "loss": 0.6997, "step": 3774 }, { "epoch": 0.5385164051355207, "grad_norm": 1.061850666999817, "learning_rate": 9.42559981344743e-06, "loss": 0.7664, "step": 3775 }, { "epoch": 0.5386590584878744, "grad_norm": 1.269931435585022, "learning_rate": 9.425241414785136e-06, "loss": 0.6823, "step": 3776 }, { "epoch": 0.5388017118402283, "grad_norm": 0.648510754108429, "learning_rate": 9.424882911164017e-06, "loss": 0.5664, "step": 3777 }, { "epoch": 0.538944365192582, "grad_norm": 0.9643648862838745, "learning_rate": 9.424524302592575e-06, "loss": 0.7396, "step": 3778 }, { "epoch": 0.5390870185449358, "grad_norm": 0.9548050761222839, "learning_rate": 9.424165589079319e-06, "loss": 0.7476, "step": 3779 }, { "epoch": 0.5392296718972895, "grad_norm": 1.1638333797454834, "learning_rate": 9.423806770632755e-06, "loss": 0.8011, "step": 3780 }, { "epoch": 0.5393723252496434, "grad_norm": 0.7156169414520264, "learning_rate": 9.423447847261394e-06, "loss": 0.5816, "step": 3781 }, { "epoch": 0.5395149786019972, "grad_norm": 0.8134765028953552, "learning_rate": 9.42308881897375e-06, "loss": 0.7886, "step": 3782 }, { "epoch": 0.5396576319543509, "grad_norm": 0.7660357356071472, "learning_rate": 9.422729685778336e-06, "loss": 0.5448, "step": 3783 }, { "epoch": 0.5398002853067047, "grad_norm": 0.6888023018836975, "learning_rate": 9.422370447683673e-06, "loss": 0.748, "step": 3784 }, { "epoch": 0.5399429386590585, "grad_norm": 0.6250286102294922, "learning_rate": 9.422011104698279e-06, "loss": 0.623, "step": 3785 }, { "epoch": 0.5400855920114123, "grad_norm": 0.8474677801132202, "learning_rate": 9.42165165683068e-06, "loss": 0.7005, "step": 3786 }, { "epoch": 0.540228245363766, "grad_norm": 0.7340326905250549, "learning_rate": 9.421292104089396e-06, "loss": 0.8158, "step": 3787 }, { "epoch": 0.5403708987161199, "grad_norm": 0.8920254707336426, "learning_rate": 9.42093244648296e-06, "loss": 0.7517, "step": 3788 }, { "epoch": 0.5405135520684736, "grad_norm": 1.0397709608078003, "learning_rate": 9.420572684019902e-06, "loss": 0.7797, "step": 3789 }, { "epoch": 0.5406562054208274, "grad_norm": 0.9333937764167786, "learning_rate": 9.420212816708752e-06, "loss": 0.6948, "step": 3790 }, { "epoch": 0.5407988587731811, "grad_norm": 0.6868544816970825, "learning_rate": 9.419852844558049e-06, "loss": 0.583, "step": 3791 }, { "epoch": 0.540941512125535, "grad_norm": 0.8502908945083618, "learning_rate": 9.419492767576329e-06, "loss": 0.855, "step": 3792 }, { "epoch": 0.5410841654778887, "grad_norm": 0.9610970616340637, "learning_rate": 9.41913258577213e-06, "loss": 0.7259, "step": 3793 }, { "epoch": 0.5412268188302425, "grad_norm": 0.7129231691360474, "learning_rate": 9.418772299153999e-06, "loss": 0.8318, "step": 3794 }, { "epoch": 0.5413694721825963, "grad_norm": 1.0146222114562988, "learning_rate": 9.41841190773048e-06, "loss": 0.7347, "step": 3795 }, { "epoch": 0.5415121255349501, "grad_norm": 0.7784373760223389, "learning_rate": 9.41805141151012e-06, "loss": 0.7789, "step": 3796 }, { "epoch": 0.5416547788873038, "grad_norm": 0.9648737907409668, "learning_rate": 9.417690810501469e-06, "loss": 0.7374, "step": 3797 }, { "epoch": 0.5417974322396576, "grad_norm": 1.2017556428909302, "learning_rate": 9.417330104713084e-06, "loss": 0.7079, "step": 3798 }, { "epoch": 0.5419400855920115, "grad_norm": 1.0219480991363525, "learning_rate": 9.416969294153512e-06, "loss": 0.771, "step": 3799 }, { "epoch": 0.5420827389443652, "grad_norm": 0.9495611786842346, "learning_rate": 9.41660837883132e-06, "loss": 0.6917, "step": 3800 }, { "epoch": 0.542225392296719, "grad_norm": 0.9101681709289551, "learning_rate": 9.416247358755062e-06, "loss": 0.7347, "step": 3801 }, { "epoch": 0.5423680456490727, "grad_norm": 0.7405933737754822, "learning_rate": 9.415886233933303e-06, "loss": 0.7821, "step": 3802 }, { "epoch": 0.5425106990014266, "grad_norm": 1.2204949855804443, "learning_rate": 9.415525004374608e-06, "loss": 0.7268, "step": 3803 }, { "epoch": 0.5426533523537803, "grad_norm": 0.7763726115226746, "learning_rate": 9.415163670087544e-06, "loss": 0.765, "step": 3804 }, { "epoch": 0.5427960057061341, "grad_norm": 0.8272026777267456, "learning_rate": 9.414802231080683e-06, "loss": 0.7719, "step": 3805 }, { "epoch": 0.5429386590584879, "grad_norm": 1.023071050643921, "learning_rate": 9.414440687362599e-06, "loss": 0.7672, "step": 3806 }, { "epoch": 0.5430813124108417, "grad_norm": 0.8470318913459778, "learning_rate": 9.414079038941861e-06, "loss": 0.7465, "step": 3807 }, { "epoch": 0.5432239657631954, "grad_norm": 1.1065614223480225, "learning_rate": 9.41371728582705e-06, "loss": 0.7848, "step": 3808 }, { "epoch": 0.5433666191155492, "grad_norm": 0.8630528450012207, "learning_rate": 9.413355428026748e-06, "loss": 0.8045, "step": 3809 }, { "epoch": 0.543509272467903, "grad_norm": 1.003273606300354, "learning_rate": 9.412993465549539e-06, "loss": 0.7217, "step": 3810 }, { "epoch": 0.5436519258202568, "grad_norm": 0.8895302414894104, "learning_rate": 9.412631398404002e-06, "loss": 0.7901, "step": 3811 }, { "epoch": 0.5437945791726105, "grad_norm": 0.960581362247467, "learning_rate": 9.412269226598728e-06, "loss": 0.7509, "step": 3812 }, { "epoch": 0.5439372325249643, "grad_norm": 0.9232653975486755, "learning_rate": 9.411906950142307e-06, "loss": 0.7023, "step": 3813 }, { "epoch": 0.5440798858773181, "grad_norm": 1.0568187236785889, "learning_rate": 9.411544569043332e-06, "loss": 0.8439, "step": 3814 }, { "epoch": 0.5442225392296719, "grad_norm": 0.8565939664840698, "learning_rate": 9.411182083310398e-06, "loss": 0.8, "step": 3815 }, { "epoch": 0.5443651925820256, "grad_norm": 1.2284659147262573, "learning_rate": 9.410819492952101e-06, "loss": 0.736, "step": 3816 }, { "epoch": 0.5445078459343795, "grad_norm": 0.8112896680831909, "learning_rate": 9.410456797977043e-06, "loss": 0.7771, "step": 3817 }, { "epoch": 0.5446504992867333, "grad_norm": 0.8988441824913025, "learning_rate": 9.410093998393826e-06, "loss": 0.7399, "step": 3818 }, { "epoch": 0.544793152639087, "grad_norm": 0.7892339825630188, "learning_rate": 9.409731094211054e-06, "loss": 0.8353, "step": 3819 }, { "epoch": 0.5449358059914408, "grad_norm": 0.909959614276886, "learning_rate": 9.409368085437334e-06, "loss": 0.8046, "step": 3820 }, { "epoch": 0.5450784593437946, "grad_norm": 0.9480772614479065, "learning_rate": 9.409004972081277e-06, "loss": 0.7268, "step": 3821 }, { "epoch": 0.5452211126961484, "grad_norm": 0.7678883075714111, "learning_rate": 9.408641754151496e-06, "loss": 0.7189, "step": 3822 }, { "epoch": 0.5453637660485021, "grad_norm": 1.0699303150177002, "learning_rate": 9.408278431656605e-06, "loss": 0.6943, "step": 3823 }, { "epoch": 0.5455064194008559, "grad_norm": 0.8879801034927368, "learning_rate": 9.407915004605221e-06, "loss": 0.7314, "step": 3824 }, { "epoch": 0.5456490727532097, "grad_norm": 0.8187634944915771, "learning_rate": 9.407551473005965e-06, "loss": 0.7599, "step": 3825 }, { "epoch": 0.5457917261055635, "grad_norm": 0.8397625684738159, "learning_rate": 9.407187836867459e-06, "loss": 0.7384, "step": 3826 }, { "epoch": 0.5459343794579172, "grad_norm": 0.8377321362495422, "learning_rate": 9.406824096198326e-06, "loss": 0.7193, "step": 3827 }, { "epoch": 0.5460770328102711, "grad_norm": 1.0204477310180664, "learning_rate": 9.406460251007195e-06, "loss": 0.7563, "step": 3828 }, { "epoch": 0.5462196861626248, "grad_norm": 0.7482069730758667, "learning_rate": 9.406096301302696e-06, "loss": 0.7271, "step": 3829 }, { "epoch": 0.5463623395149786, "grad_norm": 0.8671506643295288, "learning_rate": 9.405732247093463e-06, "loss": 0.7212, "step": 3830 }, { "epoch": 0.5465049928673323, "grad_norm": 1.0759758949279785, "learning_rate": 9.405368088388124e-06, "loss": 0.7894, "step": 3831 }, { "epoch": 0.5466476462196862, "grad_norm": 0.7334364056587219, "learning_rate": 9.405003825195324e-06, "loss": 0.749, "step": 3832 }, { "epoch": 0.54679029957204, "grad_norm": 0.9418511390686035, "learning_rate": 9.404639457523698e-06, "loss": 0.7525, "step": 3833 }, { "epoch": 0.5469329529243937, "grad_norm": 0.9294775128364563, "learning_rate": 9.404274985381889e-06, "loss": 0.8074, "step": 3834 }, { "epoch": 0.5470756062767475, "grad_norm": 0.82530277967453, "learning_rate": 9.403910408778543e-06, "loss": 0.7286, "step": 3835 }, { "epoch": 0.5472182596291013, "grad_norm": 0.9933231472969055, "learning_rate": 9.403545727722306e-06, "loss": 0.7873, "step": 3836 }, { "epoch": 0.5473609129814551, "grad_norm": 1.3327282667160034, "learning_rate": 9.403180942221827e-06, "loss": 0.7544, "step": 3837 }, { "epoch": 0.5475035663338088, "grad_norm": 1.029980182647705, "learning_rate": 9.40281605228576e-06, "loss": 0.7961, "step": 3838 }, { "epoch": 0.5476462196861627, "grad_norm": 0.9801850318908691, "learning_rate": 9.402451057922757e-06, "loss": 0.7422, "step": 3839 }, { "epoch": 0.5477888730385164, "grad_norm": 0.7895873188972473, "learning_rate": 9.402085959141476e-06, "loss": 0.7254, "step": 3840 }, { "epoch": 0.5479315263908702, "grad_norm": 0.9539476633071899, "learning_rate": 9.401720755950578e-06, "loss": 0.7911, "step": 3841 }, { "epoch": 0.5480741797432239, "grad_norm": 0.7882383465766907, "learning_rate": 9.401355448358725e-06, "loss": 0.7592, "step": 3842 }, { "epoch": 0.5482168330955778, "grad_norm": 1.0241360664367676, "learning_rate": 9.40099003637458e-06, "loss": 0.7755, "step": 3843 }, { "epoch": 0.5483594864479315, "grad_norm": 0.914991557598114, "learning_rate": 9.40062452000681e-06, "loss": 0.8418, "step": 3844 }, { "epoch": 0.5485021398002853, "grad_norm": 0.9055288434028625, "learning_rate": 9.400258899264083e-06, "loss": 0.7925, "step": 3845 }, { "epoch": 0.548644793152639, "grad_norm": 0.8815552592277527, "learning_rate": 9.399893174155072e-06, "loss": 0.5296, "step": 3846 }, { "epoch": 0.5487874465049929, "grad_norm": 0.7472600340843201, "learning_rate": 9.399527344688454e-06, "loss": 0.5569, "step": 3847 }, { "epoch": 0.5489300998573466, "grad_norm": 0.852327287197113, "learning_rate": 9.399161410872904e-06, "loss": 0.746, "step": 3848 }, { "epoch": 0.5490727532097004, "grad_norm": 0.9859546422958374, "learning_rate": 9.3987953727171e-06, "loss": 0.7886, "step": 3849 }, { "epoch": 0.5492154065620543, "grad_norm": 0.8234342336654663, "learning_rate": 9.398429230229724e-06, "loss": 0.7614, "step": 3850 }, { "epoch": 0.549358059914408, "grad_norm": 1.2982840538024902, "learning_rate": 9.39806298341946e-06, "loss": 0.8429, "step": 3851 }, { "epoch": 0.5495007132667618, "grad_norm": 0.8056015372276306, "learning_rate": 9.397696632294998e-06, "loss": 0.7934, "step": 3852 }, { "epoch": 0.5496433666191155, "grad_norm": 0.6674420237541199, "learning_rate": 9.397330176865026e-06, "loss": 0.7803, "step": 3853 }, { "epoch": 0.5497860199714694, "grad_norm": 1.0118069648742676, "learning_rate": 9.396963617138231e-06, "loss": 0.8304, "step": 3854 }, { "epoch": 0.5499286733238231, "grad_norm": 0.822985827922821, "learning_rate": 9.396596953123312e-06, "loss": 0.7801, "step": 3855 }, { "epoch": 0.5500713266761769, "grad_norm": 0.9515111446380615, "learning_rate": 9.396230184828964e-06, "loss": 0.5683, "step": 3856 }, { "epoch": 0.5502139800285307, "grad_norm": 0.9106077551841736, "learning_rate": 9.395863312263888e-06, "loss": 0.7109, "step": 3857 }, { "epoch": 0.5503566333808845, "grad_norm": 1.0948283672332764, "learning_rate": 9.395496335436783e-06, "loss": 0.8231, "step": 3858 }, { "epoch": 0.5504992867332382, "grad_norm": 0.7987005114555359, "learning_rate": 9.395129254356354e-06, "loss": 0.7439, "step": 3859 }, { "epoch": 0.550641940085592, "grad_norm": 1.1406909227371216, "learning_rate": 9.394762069031306e-06, "loss": 0.7732, "step": 3860 }, { "epoch": 0.5507845934379458, "grad_norm": 0.9241793155670166, "learning_rate": 9.394394779470351e-06, "loss": 0.7821, "step": 3861 }, { "epoch": 0.5509272467902996, "grad_norm": 0.9340644478797913, "learning_rate": 9.394027385682198e-06, "loss": 0.7734, "step": 3862 }, { "epoch": 0.5510699001426533, "grad_norm": 1.0453873872756958, "learning_rate": 9.393659887675564e-06, "loss": 0.7709, "step": 3863 }, { "epoch": 0.5512125534950071, "grad_norm": 1.141420841217041, "learning_rate": 9.393292285459162e-06, "loss": 0.7037, "step": 3864 }, { "epoch": 0.551355206847361, "grad_norm": 0.9163342714309692, "learning_rate": 9.392924579041711e-06, "loss": 0.7508, "step": 3865 }, { "epoch": 0.5514978601997147, "grad_norm": 1.008906364440918, "learning_rate": 9.392556768431936e-06, "loss": 0.7192, "step": 3866 }, { "epoch": 0.5516405135520684, "grad_norm": 1.2465540170669556, "learning_rate": 9.392188853638556e-06, "loss": 0.8364, "step": 3867 }, { "epoch": 0.5517831669044223, "grad_norm": 1.1515864133834839, "learning_rate": 9.3918208346703e-06, "loss": 0.741, "step": 3868 }, { "epoch": 0.5519258202567761, "grad_norm": 0.8714399337768555, "learning_rate": 9.391452711535897e-06, "loss": 0.7673, "step": 3869 }, { "epoch": 0.5520684736091298, "grad_norm": 0.8498027920722961, "learning_rate": 9.391084484244077e-06, "loss": 0.7968, "step": 3870 }, { "epoch": 0.5522111269614836, "grad_norm": 0.9492782354354858, "learning_rate": 9.390716152803573e-06, "loss": 0.7765, "step": 3871 }, { "epoch": 0.5523537803138374, "grad_norm": 1.3390405178070068, "learning_rate": 9.390347717223124e-06, "loss": 0.5709, "step": 3872 }, { "epoch": 0.5524964336661912, "grad_norm": 0.7939488887786865, "learning_rate": 9.389979177511467e-06, "loss": 0.7349, "step": 3873 }, { "epoch": 0.5526390870185449, "grad_norm": 0.9034905433654785, "learning_rate": 9.389610533677342e-06, "loss": 0.6985, "step": 3874 }, { "epoch": 0.5527817403708987, "grad_norm": 1.0521996021270752, "learning_rate": 9.389241785729495e-06, "loss": 0.7328, "step": 3875 }, { "epoch": 0.5529243937232525, "grad_norm": 1.0269242525100708, "learning_rate": 9.388872933676672e-06, "loss": 0.674, "step": 3876 }, { "epoch": 0.5530670470756063, "grad_norm": 0.7578728795051575, "learning_rate": 9.388503977527617e-06, "loss": 0.8702, "step": 3877 }, { "epoch": 0.55320970042796, "grad_norm": 0.9115679264068604, "learning_rate": 9.388134917291085e-06, "loss": 0.7844, "step": 3878 }, { "epoch": 0.5533523537803139, "grad_norm": 0.7785453200340271, "learning_rate": 9.38776575297583e-06, "loss": 0.762, "step": 3879 }, { "epoch": 0.5534950071326676, "grad_norm": 0.7482083439826965, "learning_rate": 9.387396484590607e-06, "loss": 0.7261, "step": 3880 }, { "epoch": 0.5536376604850214, "grad_norm": 0.86270672082901, "learning_rate": 9.387027112144171e-06, "loss": 0.5208, "step": 3881 }, { "epoch": 0.5537803138373751, "grad_norm": 1.0379152297973633, "learning_rate": 9.38665763564529e-06, "loss": 0.7124, "step": 3882 }, { "epoch": 0.553922967189729, "grad_norm": 1.0672602653503418, "learning_rate": 9.38628805510272e-06, "loss": 0.7421, "step": 3883 }, { "epoch": 0.5540656205420827, "grad_norm": 0.8030416369438171, "learning_rate": 9.38591837052523e-06, "loss": 0.8006, "step": 3884 }, { "epoch": 0.5542082738944365, "grad_norm": 0.8115381002426147, "learning_rate": 9.385548581921589e-06, "loss": 0.783, "step": 3885 }, { "epoch": 0.5543509272467902, "grad_norm": 1.0033485889434814, "learning_rate": 9.385178689300567e-06, "loss": 0.7728, "step": 3886 }, { "epoch": 0.5544935805991441, "grad_norm": 1.0693013668060303, "learning_rate": 9.384808692670937e-06, "loss": 0.7733, "step": 3887 }, { "epoch": 0.5546362339514979, "grad_norm": 0.7807943820953369, "learning_rate": 9.384438592041476e-06, "loss": 0.8106, "step": 3888 }, { "epoch": 0.5547788873038516, "grad_norm": 1.0082643032073975, "learning_rate": 9.384068387420959e-06, "loss": 0.7763, "step": 3889 }, { "epoch": 0.5549215406562055, "grad_norm": 0.6661744713783264, "learning_rate": 9.38369807881817e-06, "loss": 0.7905, "step": 3890 }, { "epoch": 0.5550641940085592, "grad_norm": 0.7957794070243835, "learning_rate": 9.38332766624189e-06, "loss": 0.7971, "step": 3891 }, { "epoch": 0.555206847360913, "grad_norm": 0.9141960740089417, "learning_rate": 9.382957149700904e-06, "loss": 0.764, "step": 3892 }, { "epoch": 0.5553495007132667, "grad_norm": 0.9018802642822266, "learning_rate": 9.382586529204002e-06, "loss": 0.7772, "step": 3893 }, { "epoch": 0.5554921540656206, "grad_norm": 1.0242016315460205, "learning_rate": 9.382215804759975e-06, "loss": 0.8302, "step": 3894 }, { "epoch": 0.5556348074179743, "grad_norm": 0.8564112782478333, "learning_rate": 9.381844976377612e-06, "loss": 0.7643, "step": 3895 }, { "epoch": 0.5557774607703281, "grad_norm": 0.82747882604599, "learning_rate": 9.381474044065712e-06, "loss": 0.7983, "step": 3896 }, { "epoch": 0.5559201141226818, "grad_norm": 0.9846975207328796, "learning_rate": 9.381103007833072e-06, "loss": 0.6618, "step": 3897 }, { "epoch": 0.5560627674750357, "grad_norm": 1.0486607551574707, "learning_rate": 9.38073186768849e-06, "loss": 0.7422, "step": 3898 }, { "epoch": 0.5562054208273894, "grad_norm": 1.1494112014770508, "learning_rate": 9.380360623640774e-06, "loss": 0.7019, "step": 3899 }, { "epoch": 0.5563480741797432, "grad_norm": 0.8423972129821777, "learning_rate": 9.379989275698724e-06, "loss": 0.8188, "step": 3900 }, { "epoch": 0.556490727532097, "grad_norm": 0.9076462984085083, "learning_rate": 9.37961782387115e-06, "loss": 0.7645, "step": 3901 }, { "epoch": 0.5566333808844508, "grad_norm": 0.8854547739028931, "learning_rate": 9.379246268166863e-06, "loss": 0.6709, "step": 3902 }, { "epoch": 0.5567760342368046, "grad_norm": 0.769031822681427, "learning_rate": 9.378874608594673e-06, "loss": 0.7791, "step": 3903 }, { "epoch": 0.5569186875891583, "grad_norm": 0.9481874108314514, "learning_rate": 9.378502845163397e-06, "loss": 0.8044, "step": 3904 }, { "epoch": 0.5570613409415122, "grad_norm": 0.7037337422370911, "learning_rate": 9.378130977881855e-06, "loss": 0.7142, "step": 3905 }, { "epoch": 0.5572039942938659, "grad_norm": 0.8134806156158447, "learning_rate": 9.37775900675886e-06, "loss": 0.8321, "step": 3906 }, { "epoch": 0.5573466476462197, "grad_norm": 1.2003941535949707, "learning_rate": 9.37738693180324e-06, "loss": 0.83, "step": 3907 }, { "epoch": 0.5574893009985734, "grad_norm": 1.061151385307312, "learning_rate": 9.37701475302382e-06, "loss": 0.7487, "step": 3908 }, { "epoch": 0.5576319543509273, "grad_norm": 0.9353809356689453, "learning_rate": 9.376642470429425e-06, "loss": 0.7537, "step": 3909 }, { "epoch": 0.557774607703281, "grad_norm": 0.8664660453796387, "learning_rate": 9.376270084028887e-06, "loss": 0.7445, "step": 3910 }, { "epoch": 0.5579172610556348, "grad_norm": 0.9144179821014404, "learning_rate": 9.375897593831036e-06, "loss": 0.7737, "step": 3911 }, { "epoch": 0.5580599144079886, "grad_norm": 1.072312593460083, "learning_rate": 9.375524999844709e-06, "loss": 0.7481, "step": 3912 }, { "epoch": 0.5582025677603424, "grad_norm": 0.8142551779747009, "learning_rate": 9.375152302078742e-06, "loss": 0.7438, "step": 3913 }, { "epoch": 0.5583452211126961, "grad_norm": 0.9728956818580627, "learning_rate": 9.374779500541975e-06, "loss": 0.765, "step": 3914 }, { "epoch": 0.5584878744650499, "grad_norm": 0.8081833720207214, "learning_rate": 9.374406595243252e-06, "loss": 0.8573, "step": 3915 }, { "epoch": 0.5586305278174037, "grad_norm": 0.7585574984550476, "learning_rate": 9.374033586191413e-06, "loss": 0.8176, "step": 3916 }, { "epoch": 0.5587731811697575, "grad_norm": 0.8608925938606262, "learning_rate": 9.37366047339531e-06, "loss": 0.8215, "step": 3917 }, { "epoch": 0.5589158345221112, "grad_norm": 0.8893569707870483, "learning_rate": 9.373287256863792e-06, "loss": 0.7927, "step": 3918 }, { "epoch": 0.5590584878744651, "grad_norm": 0.7753645181655884, "learning_rate": 9.372913936605708e-06, "loss": 0.795, "step": 3919 }, { "epoch": 0.5592011412268189, "grad_norm": 0.8030126094818115, "learning_rate": 9.372540512629913e-06, "loss": 0.7629, "step": 3920 }, { "epoch": 0.5593437945791726, "grad_norm": 0.6363753080368042, "learning_rate": 9.372166984945266e-06, "loss": 0.7646, "step": 3921 }, { "epoch": 0.5594864479315264, "grad_norm": 0.6828857064247131, "learning_rate": 9.371793353560627e-06, "loss": 0.7824, "step": 3922 }, { "epoch": 0.5596291012838802, "grad_norm": 0.7620823979377747, "learning_rate": 9.371419618484854e-06, "loss": 0.8032, "step": 3923 }, { "epoch": 0.559771754636234, "grad_norm": 0.9093039631843567, "learning_rate": 9.371045779726815e-06, "loss": 0.6905, "step": 3924 }, { "epoch": 0.5599144079885877, "grad_norm": 0.8852125406265259, "learning_rate": 9.370671837295376e-06, "loss": 0.7307, "step": 3925 }, { "epoch": 0.5600570613409415, "grad_norm": 1.2022708654403687, "learning_rate": 9.370297791199406e-06, "loss": 0.7569, "step": 3926 }, { "epoch": 0.5601997146932953, "grad_norm": 1.2121343612670898, "learning_rate": 9.369923641447774e-06, "loss": 0.8059, "step": 3927 }, { "epoch": 0.5603423680456491, "grad_norm": 0.9693889617919922, "learning_rate": 9.369549388049359e-06, "loss": 0.671, "step": 3928 }, { "epoch": 0.5604850213980028, "grad_norm": 0.8566369414329529, "learning_rate": 9.369175031013034e-06, "loss": 0.7901, "step": 3929 }, { "epoch": 0.5606276747503567, "grad_norm": 0.6705704927444458, "learning_rate": 9.368800570347681e-06, "loss": 0.81, "step": 3930 }, { "epoch": 0.5607703281027104, "grad_norm": 0.7827990055084229, "learning_rate": 9.368426006062179e-06, "loss": 0.8014, "step": 3931 }, { "epoch": 0.5609129814550642, "grad_norm": 1.008968472480774, "learning_rate": 9.36805133816541e-06, "loss": 0.7946, "step": 3932 }, { "epoch": 0.5610556348074179, "grad_norm": 1.0732449293136597, "learning_rate": 9.367676566666268e-06, "loss": 0.7743, "step": 3933 }, { "epoch": 0.5611982881597718, "grad_norm": 0.844943106174469, "learning_rate": 9.367301691573633e-06, "loss": 0.8017, "step": 3934 }, { "epoch": 0.5613409415121255, "grad_norm": 1.220983862876892, "learning_rate": 9.366926712896404e-06, "loss": 0.7263, "step": 3935 }, { "epoch": 0.5614835948644793, "grad_norm": 0.9163217544555664, "learning_rate": 9.36655163064347e-06, "loss": 0.7281, "step": 3936 }, { "epoch": 0.561626248216833, "grad_norm": 0.798708975315094, "learning_rate": 9.36617644482373e-06, "loss": 0.7522, "step": 3937 }, { "epoch": 0.5617689015691869, "grad_norm": 0.7758632898330688, "learning_rate": 9.365801155446079e-06, "loss": 0.8327, "step": 3938 }, { "epoch": 0.5619115549215407, "grad_norm": 0.8243889212608337, "learning_rate": 9.365425762519422e-06, "loss": 0.8003, "step": 3939 }, { "epoch": 0.5620542082738944, "grad_norm": 0.8462749123573303, "learning_rate": 9.36505026605266e-06, "loss": 0.7252, "step": 3940 }, { "epoch": 0.5621968616262483, "grad_norm": 1.0904769897460938, "learning_rate": 9.364674666054701e-06, "loss": 0.7509, "step": 3941 }, { "epoch": 0.562339514978602, "grad_norm": 0.9569264054298401, "learning_rate": 9.364298962534455e-06, "loss": 0.7646, "step": 3942 }, { "epoch": 0.5624821683309558, "grad_norm": 0.7271814346313477, "learning_rate": 9.363923155500829e-06, "loss": 0.7848, "step": 3943 }, { "epoch": 0.5626248216833095, "grad_norm": 0.8120964169502258, "learning_rate": 9.363547244962738e-06, "loss": 0.7717, "step": 3944 }, { "epoch": 0.5627674750356634, "grad_norm": 0.8208606243133545, "learning_rate": 9.3631712309291e-06, "loss": 0.8162, "step": 3945 }, { "epoch": 0.5629101283880171, "grad_norm": 1.1177812814712524, "learning_rate": 9.36279511340883e-06, "loss": 0.7225, "step": 3946 }, { "epoch": 0.5630527817403709, "grad_norm": 1.286088466644287, "learning_rate": 9.362418892410851e-06, "loss": 0.8166, "step": 3947 }, { "epoch": 0.5631954350927246, "grad_norm": 1.0582689046859741, "learning_rate": 9.362042567944088e-06, "loss": 0.7698, "step": 3948 }, { "epoch": 0.5633380884450785, "grad_norm": 0.9759199619293213, "learning_rate": 9.361666140017463e-06, "loss": 0.7137, "step": 3949 }, { "epoch": 0.5634807417974322, "grad_norm": 0.8342788219451904, "learning_rate": 9.361289608639905e-06, "loss": 0.7432, "step": 3950 }, { "epoch": 0.563623395149786, "grad_norm": 1.1413004398345947, "learning_rate": 9.360912973820347e-06, "loss": 0.7713, "step": 3951 }, { "epoch": 0.5637660485021398, "grad_norm": 0.8673157691955566, "learning_rate": 9.36053623556772e-06, "loss": 0.7325, "step": 3952 }, { "epoch": 0.5639087018544936, "grad_norm": 0.8513365983963013, "learning_rate": 9.360159393890958e-06, "loss": 0.7634, "step": 3953 }, { "epoch": 0.5640513552068473, "grad_norm": 0.646003007888794, "learning_rate": 9.359782448799005e-06, "loss": 0.5906, "step": 3954 }, { "epoch": 0.5641940085592011, "grad_norm": 1.680774450302124, "learning_rate": 9.359405400300794e-06, "loss": 0.7783, "step": 3955 }, { "epoch": 0.564336661911555, "grad_norm": 1.0468223094940186, "learning_rate": 9.359028248405273e-06, "loss": 0.7025, "step": 3956 }, { "epoch": 0.5644793152639087, "grad_norm": 0.8599289059638977, "learning_rate": 9.358650993121386e-06, "loss": 0.7677, "step": 3957 }, { "epoch": 0.5646219686162625, "grad_norm": 0.9311829805374146, "learning_rate": 9.358273634458082e-06, "loss": 0.658, "step": 3958 }, { "epoch": 0.5647646219686162, "grad_norm": 0.9008866548538208, "learning_rate": 9.357896172424308e-06, "loss": 0.7866, "step": 3959 }, { "epoch": 0.5649072753209701, "grad_norm": 0.9304162263870239, "learning_rate": 9.357518607029019e-06, "loss": 0.5638, "step": 3960 }, { "epoch": 0.5650499286733238, "grad_norm": 1.1516731977462769, "learning_rate": 9.35714093828117e-06, "loss": 0.8124, "step": 3961 }, { "epoch": 0.5651925820256776, "grad_norm": 0.9910885095596313, "learning_rate": 9.35676316618972e-06, "loss": 0.8032, "step": 3962 }, { "epoch": 0.5653352353780314, "grad_norm": 0.8922032713890076, "learning_rate": 9.356385290763625e-06, "loss": 0.8418, "step": 3963 }, { "epoch": 0.5654778887303852, "grad_norm": 1.1360137462615967, "learning_rate": 9.356007312011853e-06, "loss": 0.6794, "step": 3964 }, { "epoch": 0.5656205420827389, "grad_norm": 0.9588967561721802, "learning_rate": 9.355629229943363e-06, "loss": 0.7752, "step": 3965 }, { "epoch": 0.5657631954350927, "grad_norm": 0.768655002117157, "learning_rate": 9.355251044567129e-06, "loss": 0.7379, "step": 3966 }, { "epoch": 0.5659058487874465, "grad_norm": 0.9083778858184814, "learning_rate": 9.354872755892115e-06, "loss": 0.7248, "step": 3967 }, { "epoch": 0.5660485021398003, "grad_norm": 0.8241592049598694, "learning_rate": 9.354494363927296e-06, "loss": 0.5281, "step": 3968 }, { "epoch": 0.566191155492154, "grad_norm": 1.0365681648254395, "learning_rate": 9.354115868681648e-06, "loss": 0.6871, "step": 3969 }, { "epoch": 0.5663338088445078, "grad_norm": 0.8775922060012817, "learning_rate": 9.353737270164145e-06, "loss": 0.7885, "step": 3970 }, { "epoch": 0.5664764621968617, "grad_norm": 1.0082521438598633, "learning_rate": 9.35335856838377e-06, "loss": 0.8228, "step": 3971 }, { "epoch": 0.5666191155492154, "grad_norm": 0.7796494960784912, "learning_rate": 9.352979763349501e-06, "loss": 0.7185, "step": 3972 }, { "epoch": 0.5667617689015692, "grad_norm": 0.9608703255653381, "learning_rate": 9.352600855070327e-06, "loss": 0.7932, "step": 3973 }, { "epoch": 0.566904422253923, "grad_norm": 1.2445639371871948, "learning_rate": 9.352221843555234e-06, "loss": 0.727, "step": 3974 }, { "epoch": 0.5670470756062768, "grad_norm": 0.9000669121742249, "learning_rate": 9.35184272881321e-06, "loss": 0.7399, "step": 3975 }, { "epoch": 0.5671897289586305, "grad_norm": 0.7240111827850342, "learning_rate": 9.351463510853245e-06, "loss": 0.7932, "step": 3976 }, { "epoch": 0.5673323823109843, "grad_norm": 1.207418441772461, "learning_rate": 9.351084189684339e-06, "loss": 0.7482, "step": 3977 }, { "epoch": 0.5674750356633381, "grad_norm": 1.0589617490768433, "learning_rate": 9.350704765315485e-06, "loss": 0.8294, "step": 3978 }, { "epoch": 0.5676176890156919, "grad_norm": 0.7942510843276978, "learning_rate": 9.350325237755683e-06, "loss": 0.7877, "step": 3979 }, { "epoch": 0.5677603423680456, "grad_norm": 0.8113898634910583, "learning_rate": 9.349945607013935e-06, "loss": 0.7244, "step": 3980 }, { "epoch": 0.5679029957203995, "grad_norm": 0.8928048014640808, "learning_rate": 9.349565873099245e-06, "loss": 0.7469, "step": 3981 }, { "epoch": 0.5680456490727532, "grad_norm": 0.7207472920417786, "learning_rate": 9.349186036020618e-06, "loss": 0.828, "step": 3982 }, { "epoch": 0.568188302425107, "grad_norm": 0.8237086534500122, "learning_rate": 9.348806095787065e-06, "loss": 0.758, "step": 3983 }, { "epoch": 0.5683309557774607, "grad_norm": 0.7653695940971375, "learning_rate": 9.3484260524076e-06, "loss": 0.5715, "step": 3984 }, { "epoch": 0.5684736091298146, "grad_norm": 0.8891986608505249, "learning_rate": 9.348045905891231e-06, "loss": 0.7624, "step": 3985 }, { "epoch": 0.5686162624821683, "grad_norm": 0.7860222458839417, "learning_rate": 9.347665656246978e-06, "loss": 0.745, "step": 3986 }, { "epoch": 0.5687589158345221, "grad_norm": 0.8255372047424316, "learning_rate": 9.34728530348386e-06, "loss": 0.739, "step": 3987 }, { "epoch": 0.5689015691868758, "grad_norm": 0.8702134490013123, "learning_rate": 9.346904847610898e-06, "loss": 0.7413, "step": 3988 }, { "epoch": 0.5690442225392297, "grad_norm": 1.1986943483352661, "learning_rate": 9.346524288637114e-06, "loss": 0.7627, "step": 3989 }, { "epoch": 0.5691868758915835, "grad_norm": 0.8165330290794373, "learning_rate": 9.346143626571536e-06, "loss": 0.8448, "step": 3990 }, { "epoch": 0.5693295292439372, "grad_norm": 0.842220664024353, "learning_rate": 9.34576286142319e-06, "loss": 0.7451, "step": 3991 }, { "epoch": 0.5694721825962911, "grad_norm": 1.1866034269332886, "learning_rate": 9.345381993201113e-06, "loss": 0.5583, "step": 3992 }, { "epoch": 0.5696148359486448, "grad_norm": 1.0934287309646606, "learning_rate": 9.345001021914334e-06, "loss": 0.6854, "step": 3993 }, { "epoch": 0.5697574893009986, "grad_norm": 0.9536330103874207, "learning_rate": 9.344619947571888e-06, "loss": 0.804, "step": 3994 }, { "epoch": 0.5699001426533523, "grad_norm": 0.8321189284324646, "learning_rate": 9.344238770182817e-06, "loss": 0.7551, "step": 3995 }, { "epoch": 0.5700427960057062, "grad_norm": 1.0529017448425293, "learning_rate": 9.343857489756155e-06, "loss": 0.7332, "step": 3996 }, { "epoch": 0.5701854493580599, "grad_norm": 0.9437790513038635, "learning_rate": 9.343476106300957e-06, "loss": 0.8217, "step": 3997 }, { "epoch": 0.5703281027104137, "grad_norm": 0.6984540224075317, "learning_rate": 9.343094619826257e-06, "loss": 0.7697, "step": 3998 }, { "epoch": 0.5704707560627674, "grad_norm": 0.7660011053085327, "learning_rate": 9.34271303034111e-06, "loss": 0.7877, "step": 3999 }, { "epoch": 0.5706134094151213, "grad_norm": 0.8685745000839233, "learning_rate": 9.342331337854565e-06, "loss": 0.7815, "step": 4000 } ], "logging_steps": 1.0, "max_steps": 21030, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.319886361723929e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }