{ "best_metric": 0.021728611886642827, "best_model_checkpoint": "./results-cc/code-t5/codet5_fmft_official_0.0001/checkpoint-14718", "epoch": 8.0, "eval_steps": 500, "global_step": 117744, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003397200706617747, "grad_norm": 6.21469259262085, "learning_rate": 9.999660279929339e-05, "loss": 7.0272, "step": 5 }, { "epoch": 0.0006794401413235494, "grad_norm": 4.770811557769775, "learning_rate": 9.999235629841012e-05, "loss": 4.8093, "step": 10 }, { "epoch": 0.0010191602119853241, "grad_norm": 3.3407115936279297, "learning_rate": 9.998810979752684e-05, "loss": 4.5152, "step": 15 }, { "epoch": 0.001358880282647099, "grad_norm": 3.316413402557373, "learning_rate": 9.998386329664357e-05, "loss": 3.8516, "step": 20 }, { "epoch": 0.0016986003533088735, "grad_norm": 3.275665760040283, "learning_rate": 9.99796167957603e-05, "loss": 4.1015, "step": 25 }, { "epoch": 0.0020383204239706482, "grad_norm": 3.4907727241516113, "learning_rate": 9.997537029487703e-05, "loss": 3.9727, "step": 30 }, { "epoch": 0.002378040494632423, "grad_norm": 4.418203353881836, "learning_rate": 9.997112379399374e-05, "loss": 3.8008, "step": 35 }, { "epoch": 0.002717760565294198, "grad_norm": 2.678400993347168, "learning_rate": 9.996687729311048e-05, "loss": 4.036, "step": 40 }, { "epoch": 0.0030574806359559724, "grad_norm": 3.6774635314941406, "learning_rate": 9.996263079222721e-05, "loss": 4.2775, "step": 45 }, { "epoch": 0.003397200706617747, "grad_norm": 7.125624179840088, "learning_rate": 9.995838429134394e-05, "loss": 3.8021, "step": 50 }, { "epoch": 0.0037369207772795215, "grad_norm": 3.47074294090271, "learning_rate": 9.995413779046067e-05, "loss": 3.9785, "step": 55 }, { "epoch": 0.0040766408479412965, "grad_norm": 4.336516380310059, "learning_rate": 9.99498912895774e-05, "loss": 3.9175, "step": 60 }, { "epoch": 0.0044163609186030715, "grad_norm": 2.7094192504882812, "learning_rate": 9.994564478869412e-05, "loss": 3.9308, "step": 65 }, { "epoch": 0.004756080989264846, "grad_norm": 2.82883620262146, "learning_rate": 9.994139828781085e-05, "loss": 4.2121, "step": 70 }, { "epoch": 0.005095801059926621, "grad_norm": 2.981565475463867, "learning_rate": 9.993715178692758e-05, "loss": 4.0359, "step": 75 }, { "epoch": 0.005435521130588396, "grad_norm": 10.057659149169922, "learning_rate": 9.993290528604431e-05, "loss": 3.8392, "step": 80 }, { "epoch": 0.00577524120125017, "grad_norm": 4.061558723449707, "learning_rate": 9.992865878516104e-05, "loss": 3.8535, "step": 85 }, { "epoch": 0.006114961271911945, "grad_norm": 12.613106727600098, "learning_rate": 9.992441228427776e-05, "loss": 3.9515, "step": 90 }, { "epoch": 0.006454681342573719, "grad_norm": 2.677182197570801, "learning_rate": 9.992016578339449e-05, "loss": 4.1063, "step": 95 }, { "epoch": 0.006794401413235494, "grad_norm": 3.033088207244873, "learning_rate": 9.991591928251122e-05, "loss": 3.98, "step": 100 }, { "epoch": 0.007134121483897269, "grad_norm": 3.225051164627075, "learning_rate": 9.991167278162795e-05, "loss": 3.5691, "step": 105 }, { "epoch": 0.007473841554559043, "grad_norm": 2.7921855449676514, "learning_rate": 9.990742628074468e-05, "loss": 4.0241, "step": 110 }, { "epoch": 0.007813561625220818, "grad_norm": 2.826827049255371, "learning_rate": 9.99031797798614e-05, "loss": 4.0395, "step": 115 }, { "epoch": 0.008153281695882593, "grad_norm": 9.27625560760498, "learning_rate": 9.989893327897812e-05, "loss": 4.1619, "step": 120 }, { "epoch": 0.008493001766544368, "grad_norm": 2.8031373023986816, "learning_rate": 9.989468677809486e-05, "loss": 3.9835, "step": 125 }, { "epoch": 0.008832721837206143, "grad_norm": 3.7431797981262207, "learning_rate": 9.989044027721159e-05, "loss": 3.8106, "step": 130 }, { "epoch": 0.009172441907867916, "grad_norm": 5.541841983795166, "learning_rate": 9.98861937763283e-05, "loss": 3.7821, "step": 135 }, { "epoch": 0.009512161978529691, "grad_norm": 3.7987961769104004, "learning_rate": 9.988194727544504e-05, "loss": 3.7472, "step": 140 }, { "epoch": 0.009851882049191466, "grad_norm": 6.096441745758057, "learning_rate": 9.987770077456177e-05, "loss": 3.7836, "step": 145 }, { "epoch": 0.010191602119853241, "grad_norm": 3.9141364097595215, "learning_rate": 9.987345427367849e-05, "loss": 4.0505, "step": 150 }, { "epoch": 0.010531322190515016, "grad_norm": 3.785848379135132, "learning_rate": 9.986920777279523e-05, "loss": 3.8544, "step": 155 }, { "epoch": 0.010871042261176791, "grad_norm": 3.2531163692474365, "learning_rate": 9.986496127191196e-05, "loss": 3.8316, "step": 160 }, { "epoch": 0.011210762331838564, "grad_norm": 3.1866166591644287, "learning_rate": 9.986071477102867e-05, "loss": 3.8971, "step": 165 }, { "epoch": 0.01155048240250034, "grad_norm": 2.796600818634033, "learning_rate": 9.985646827014541e-05, "loss": 3.7587, "step": 170 }, { "epoch": 0.011890202473162114, "grad_norm": 2.753453254699707, "learning_rate": 9.985222176926214e-05, "loss": 3.7977, "step": 175 }, { "epoch": 0.01222992254382389, "grad_norm": 3.392162322998047, "learning_rate": 9.984797526837885e-05, "loss": 3.9074, "step": 180 }, { "epoch": 0.012569642614485664, "grad_norm": 3.0789575576782227, "learning_rate": 9.98437287674956e-05, "loss": 3.9379, "step": 185 }, { "epoch": 0.012909362685147438, "grad_norm": 3.4421534538269043, "learning_rate": 9.983948226661231e-05, "loss": 3.8396, "step": 190 }, { "epoch": 0.013249082755809213, "grad_norm": 8.736903190612793, "learning_rate": 9.983523576572904e-05, "loss": 4.0782, "step": 195 }, { "epoch": 0.013588802826470988, "grad_norm": 3.5333974361419678, "learning_rate": 9.983098926484578e-05, "loss": 3.9485, "step": 200 }, { "epoch": 0.013928522897132763, "grad_norm": 3.0175955295562744, "learning_rate": 9.98267427639625e-05, "loss": 4.0261, "step": 205 }, { "epoch": 0.014268242967794538, "grad_norm": 2.8383960723876953, "learning_rate": 9.982249626307922e-05, "loss": 3.6848, "step": 210 }, { "epoch": 0.014607963038456313, "grad_norm": 2.407959222793579, "learning_rate": 9.981824976219596e-05, "loss": 3.806, "step": 215 }, { "epoch": 0.014947683109118086, "grad_norm": 2.9977355003356934, "learning_rate": 9.981400326131268e-05, "loss": 3.9132, "step": 220 }, { "epoch": 0.015287403179779861, "grad_norm": 3.05517578125, "learning_rate": 9.98097567604294e-05, "loss": 3.8259, "step": 225 }, { "epoch": 0.015627123250441636, "grad_norm": 2.4192965030670166, "learning_rate": 9.980551025954615e-05, "loss": 3.7206, "step": 230 }, { "epoch": 0.01596684332110341, "grad_norm": 3.0928332805633545, "learning_rate": 9.980126375866286e-05, "loss": 3.8799, "step": 235 }, { "epoch": 0.016306563391765186, "grad_norm": 3.791109085083008, "learning_rate": 9.979701725777959e-05, "loss": 3.8032, "step": 240 }, { "epoch": 0.01664628346242696, "grad_norm": 2.8189823627471924, "learning_rate": 9.979277075689633e-05, "loss": 3.8046, "step": 245 }, { "epoch": 0.016986003533088736, "grad_norm": 4.813939094543457, "learning_rate": 9.978852425601305e-05, "loss": 3.9856, "step": 250 }, { "epoch": 0.01732572360375051, "grad_norm": 2.9254181385040283, "learning_rate": 9.978427775512977e-05, "loss": 3.7032, "step": 255 }, { "epoch": 0.017665443674412286, "grad_norm": 4.05214786529541, "learning_rate": 9.97800312542465e-05, "loss": 3.8685, "step": 260 }, { "epoch": 0.01800516374507406, "grad_norm": 2.7574172019958496, "learning_rate": 9.977578475336323e-05, "loss": 4.0542, "step": 265 }, { "epoch": 0.018344883815735832, "grad_norm": 12.412968635559082, "learning_rate": 9.977153825247996e-05, "loss": 3.7525, "step": 270 }, { "epoch": 0.01868460388639761, "grad_norm": 2.9556572437286377, "learning_rate": 9.976729175159669e-05, "loss": 3.747, "step": 275 }, { "epoch": 0.019024323957059382, "grad_norm": 2.2956831455230713, "learning_rate": 9.976304525071341e-05, "loss": 3.7616, "step": 280 }, { "epoch": 0.01936404402772116, "grad_norm": 2.4142673015594482, "learning_rate": 9.975879874983014e-05, "loss": 3.8668, "step": 285 }, { "epoch": 0.019703764098382932, "grad_norm": 2.9800455570220947, "learning_rate": 9.975455224894687e-05, "loss": 3.8922, "step": 290 }, { "epoch": 0.020043484169044706, "grad_norm": 2.6110286712646484, "learning_rate": 9.97503057480636e-05, "loss": 3.8381, "step": 295 }, { "epoch": 0.020383204239706482, "grad_norm": 2.673398494720459, "learning_rate": 9.974605924718033e-05, "loss": 3.6466, "step": 300 }, { "epoch": 0.020722924310368256, "grad_norm": 3.2894580364227295, "learning_rate": 9.974181274629705e-05, "loss": 3.8729, "step": 305 }, { "epoch": 0.021062644381030032, "grad_norm": 3.046811103820801, "learning_rate": 9.973756624541378e-05, "loss": 3.7184, "step": 310 }, { "epoch": 0.021402364451691806, "grad_norm": 2.517362594604492, "learning_rate": 9.973331974453051e-05, "loss": 4.1241, "step": 315 }, { "epoch": 0.021742084522353582, "grad_norm": 3.15246319770813, "learning_rate": 9.972907324364724e-05, "loss": 3.8871, "step": 320 }, { "epoch": 0.022081804593015356, "grad_norm": 3.8455631732940674, "learning_rate": 9.972482674276397e-05, "loss": 3.6376, "step": 325 }, { "epoch": 0.02242152466367713, "grad_norm": 2.374818801879883, "learning_rate": 9.972142954205735e-05, "loss": 3.4756, "step": 330 }, { "epoch": 0.022761244734338906, "grad_norm": 3.907341718673706, "learning_rate": 9.971718304117408e-05, "loss": 3.9709, "step": 335 }, { "epoch": 0.02310096480500068, "grad_norm": 3.2161388397216797, "learning_rate": 9.97129365402908e-05, "loss": 3.8017, "step": 340 }, { "epoch": 0.023440684875662456, "grad_norm": 3.5958497524261475, "learning_rate": 9.970869003940753e-05, "loss": 3.8609, "step": 345 }, { "epoch": 0.02378040494632423, "grad_norm": 2.839556932449341, "learning_rate": 9.970444353852426e-05, "loss": 3.5558, "step": 350 }, { "epoch": 0.024120125016986002, "grad_norm": 2.8804948329925537, "learning_rate": 9.970019703764099e-05, "loss": 4.0357, "step": 355 }, { "epoch": 0.02445984508764778, "grad_norm": 4.942351818084717, "learning_rate": 9.969595053675772e-05, "loss": 3.9725, "step": 360 }, { "epoch": 0.024799565158309552, "grad_norm": 3.3335719108581543, "learning_rate": 9.969170403587444e-05, "loss": 3.8432, "step": 365 }, { "epoch": 0.02513928522897133, "grad_norm": 2.7704007625579834, "learning_rate": 9.968745753499117e-05, "loss": 3.9828, "step": 370 }, { "epoch": 0.025479005299633102, "grad_norm": 3.0958409309387207, "learning_rate": 9.96832110341079e-05, "loss": 3.7734, "step": 375 }, { "epoch": 0.025818725370294875, "grad_norm": 2.696258068084717, "learning_rate": 9.967896453322463e-05, "loss": 4.0272, "step": 380 }, { "epoch": 0.026158445440956652, "grad_norm": 2.9321091175079346, "learning_rate": 9.967471803234136e-05, "loss": 3.955, "step": 385 }, { "epoch": 0.026498165511618425, "grad_norm": 2.6848959922790527, "learning_rate": 9.967047153145808e-05, "loss": 3.6993, "step": 390 }, { "epoch": 0.026837885582280202, "grad_norm": 2.8681793212890625, "learning_rate": 9.966622503057481e-05, "loss": 3.4435, "step": 395 }, { "epoch": 0.027177605652941975, "grad_norm": 2.906076431274414, "learning_rate": 9.966197852969154e-05, "loss": 3.6707, "step": 400 }, { "epoch": 0.027517325723603752, "grad_norm": 3.168874979019165, "learning_rate": 9.965773202880827e-05, "loss": 3.5505, "step": 405 }, { "epoch": 0.027857045794265525, "grad_norm": 2.90678334236145, "learning_rate": 9.9653485527925e-05, "loss": 3.9038, "step": 410 }, { "epoch": 0.0281967658649273, "grad_norm": 3.7996811866760254, "learning_rate": 9.964923902704172e-05, "loss": 3.9709, "step": 415 }, { "epoch": 0.028536485935589075, "grad_norm": 3.3655152320861816, "learning_rate": 9.964499252615845e-05, "loss": 3.6655, "step": 420 }, { "epoch": 0.02887620600625085, "grad_norm": 2.801706075668335, "learning_rate": 9.964074602527518e-05, "loss": 4.0456, "step": 425 }, { "epoch": 0.029215926076912625, "grad_norm": 2.752469539642334, "learning_rate": 9.963649952439191e-05, "loss": 3.8482, "step": 430 }, { "epoch": 0.0295556461475744, "grad_norm": 2.6563782691955566, "learning_rate": 9.963225302350864e-05, "loss": 3.8063, "step": 435 }, { "epoch": 0.029895366218236172, "grad_norm": 4.1556220054626465, "learning_rate": 9.962800652262536e-05, "loss": 3.7697, "step": 440 }, { "epoch": 0.03023508628889795, "grad_norm": 3.3705992698669434, "learning_rate": 9.962376002174208e-05, "loss": 3.6937, "step": 445 }, { "epoch": 0.030574806359559722, "grad_norm": 2.6561191082000732, "learning_rate": 9.961951352085882e-05, "loss": 3.55, "step": 450 }, { "epoch": 0.0309145264302215, "grad_norm": 3.07314395904541, "learning_rate": 9.961526701997555e-05, "loss": 3.7798, "step": 455 }, { "epoch": 0.03125424650088327, "grad_norm": 2.819225311279297, "learning_rate": 9.961102051909226e-05, "loss": 4.1133, "step": 460 }, { "epoch": 0.03159396657154505, "grad_norm": 3.2146010398864746, "learning_rate": 9.9606774018209e-05, "loss": 3.7746, "step": 465 }, { "epoch": 0.03193368664220682, "grad_norm": 2.454878568649292, "learning_rate": 9.960252751732573e-05, "loss": 3.8051, "step": 470 }, { "epoch": 0.032273406712868595, "grad_norm": 2.937072515487671, "learning_rate": 9.959828101644245e-05, "loss": 3.9788, "step": 475 }, { "epoch": 0.03261312678353037, "grad_norm": 3.164280414581299, "learning_rate": 9.959403451555919e-05, "loss": 3.5624, "step": 480 }, { "epoch": 0.03295284685419215, "grad_norm": 3.0861897468566895, "learning_rate": 9.958978801467592e-05, "loss": 3.5828, "step": 485 }, { "epoch": 0.03329256692485392, "grad_norm": 3.0691051483154297, "learning_rate": 9.958554151379263e-05, "loss": 3.7145, "step": 490 }, { "epoch": 0.033632286995515695, "grad_norm": 2.841773271560669, "learning_rate": 9.958129501290937e-05, "loss": 3.8588, "step": 495 }, { "epoch": 0.03397200706617747, "grad_norm": 12.969367980957031, "learning_rate": 9.95770485120261e-05, "loss": 3.6362, "step": 500 }, { "epoch": 0.03431172713683924, "grad_norm": 2.858612060546875, "learning_rate": 9.957280201114282e-05, "loss": 3.7403, "step": 505 }, { "epoch": 0.03465144720750102, "grad_norm": 3.6948654651641846, "learning_rate": 9.956855551025956e-05, "loss": 3.7201, "step": 510 }, { "epoch": 0.034991167278162795, "grad_norm": 3.085568904876709, "learning_rate": 9.956430900937627e-05, "loss": 3.6721, "step": 515 }, { "epoch": 0.03533088734882457, "grad_norm": 3.0437660217285156, "learning_rate": 9.9560062508493e-05, "loss": 3.8041, "step": 520 }, { "epoch": 0.03567060741948634, "grad_norm": 2.336348533630371, "learning_rate": 9.955581600760974e-05, "loss": 3.703, "step": 525 }, { "epoch": 0.03601032749014812, "grad_norm": 3.068443536758423, "learning_rate": 9.955156950672646e-05, "loss": 3.5309, "step": 530 }, { "epoch": 0.036350047560809895, "grad_norm": 2.803267240524292, "learning_rate": 9.954732300584318e-05, "loss": 3.74, "step": 535 }, { "epoch": 0.036689767631471665, "grad_norm": 2.932396173477173, "learning_rate": 9.954307650495992e-05, "loss": 3.6244, "step": 540 }, { "epoch": 0.03702948770213344, "grad_norm": 3.2129034996032715, "learning_rate": 9.953883000407664e-05, "loss": 3.6903, "step": 545 }, { "epoch": 0.03736920777279522, "grad_norm": 2.4441819190979004, "learning_rate": 9.953458350319337e-05, "loss": 4.0107, "step": 550 }, { "epoch": 0.03770892784345699, "grad_norm": 2.7356765270233154, "learning_rate": 9.953033700231011e-05, "loss": 3.755, "step": 555 }, { "epoch": 0.038048647914118765, "grad_norm": 2.823246479034424, "learning_rate": 9.952609050142682e-05, "loss": 3.7853, "step": 560 }, { "epoch": 0.03838836798478054, "grad_norm": 3.929452419281006, "learning_rate": 9.952184400054355e-05, "loss": 3.8028, "step": 565 }, { "epoch": 0.03872808805544232, "grad_norm": 3.2468960285186768, "learning_rate": 9.951759749966029e-05, "loss": 3.9392, "step": 570 }, { "epoch": 0.03906780812610409, "grad_norm": 2.5720834732055664, "learning_rate": 9.951335099877701e-05, "loss": 3.8785, "step": 575 }, { "epoch": 0.039407528196765865, "grad_norm": 2.521256446838379, "learning_rate": 9.950910449789374e-05, "loss": 3.6521, "step": 580 }, { "epoch": 0.03974724826742764, "grad_norm": 2.8516931533813477, "learning_rate": 9.950485799701046e-05, "loss": 3.6808, "step": 585 }, { "epoch": 0.04008696833808941, "grad_norm": 3.3582510948181152, "learning_rate": 9.950061149612719e-05, "loss": 3.6077, "step": 590 }, { "epoch": 0.04042668840875119, "grad_norm": 2.874913454055786, "learning_rate": 9.949636499524393e-05, "loss": 3.6599, "step": 595 }, { "epoch": 0.040766408479412965, "grad_norm": 5.1997480392456055, "learning_rate": 9.949211849436065e-05, "loss": 3.7753, "step": 600 }, { "epoch": 0.04110612855007474, "grad_norm": 3.319835662841797, "learning_rate": 9.948787199347738e-05, "loss": 3.5933, "step": 605 }, { "epoch": 0.04144584862073651, "grad_norm": 2.802927017211914, "learning_rate": 9.948362549259412e-05, "loss": 3.841, "step": 610 }, { "epoch": 0.04178556869139829, "grad_norm": 2.867325782775879, "learning_rate": 9.947937899171083e-05, "loss": 3.6816, "step": 615 }, { "epoch": 0.042125288762060065, "grad_norm": 2.727735757827759, "learning_rate": 9.947513249082756e-05, "loss": 3.6244, "step": 620 }, { "epoch": 0.042465008832721834, "grad_norm": 2.464614152908325, "learning_rate": 9.94708859899443e-05, "loss": 3.2751, "step": 625 }, { "epoch": 0.04280472890338361, "grad_norm": 2.5432565212249756, "learning_rate": 9.946663948906102e-05, "loss": 3.5677, "step": 630 }, { "epoch": 0.04314444897404539, "grad_norm": 2.9960644245147705, "learning_rate": 9.946239298817774e-05, "loss": 3.6849, "step": 635 }, { "epoch": 0.043484169044707165, "grad_norm": 2.4648358821868896, "learning_rate": 9.945814648729449e-05, "loss": 3.6208, "step": 640 }, { "epoch": 0.043823889115368934, "grad_norm": 2.461162805557251, "learning_rate": 9.94538999864112e-05, "loss": 3.9922, "step": 645 }, { "epoch": 0.04416360918603071, "grad_norm": 2.333400011062622, "learning_rate": 9.944965348552793e-05, "loss": 3.8104, "step": 650 }, { "epoch": 0.04450332925669249, "grad_norm": 2.3225696086883545, "learning_rate": 9.944540698464467e-05, "loss": 3.6985, "step": 655 }, { "epoch": 0.04484304932735426, "grad_norm": 3.624342441558838, "learning_rate": 9.944116048376138e-05, "loss": 3.6232, "step": 660 }, { "epoch": 0.045182769398016034, "grad_norm": 2.766263723373413, "learning_rate": 9.943691398287811e-05, "loss": 3.6127, "step": 665 }, { "epoch": 0.04552248946867781, "grad_norm": 2.61580491065979, "learning_rate": 9.943266748199484e-05, "loss": 3.8995, "step": 670 }, { "epoch": 0.04586220953933958, "grad_norm": 2.9932680130004883, "learning_rate": 9.942842098111157e-05, "loss": 3.6624, "step": 675 }, { "epoch": 0.04620192961000136, "grad_norm": 2.72580623626709, "learning_rate": 9.94241744802283e-05, "loss": 3.7734, "step": 680 }, { "epoch": 0.046541649680663134, "grad_norm": 2.9109714031219482, "learning_rate": 9.941992797934502e-05, "loss": 3.8153, "step": 685 }, { "epoch": 0.04688136975132491, "grad_norm": 2.517665147781372, "learning_rate": 9.941568147846175e-05, "loss": 3.7694, "step": 690 }, { "epoch": 0.04722108982198668, "grad_norm": 3.241339921951294, "learning_rate": 9.941143497757848e-05, "loss": 3.7621, "step": 695 }, { "epoch": 0.04756080989264846, "grad_norm": 2.0298354625701904, "learning_rate": 9.940718847669521e-05, "loss": 3.9023, "step": 700 }, { "epoch": 0.047900529963310234, "grad_norm": 3.248481035232544, "learning_rate": 9.940294197581194e-05, "loss": 3.7963, "step": 705 }, { "epoch": 0.048240250033972004, "grad_norm": 2.496670722961426, "learning_rate": 9.939869547492866e-05, "loss": 3.7356, "step": 710 }, { "epoch": 0.04857997010463378, "grad_norm": 4.151010990142822, "learning_rate": 9.939444897404539e-05, "loss": 3.6123, "step": 715 }, { "epoch": 0.04891969017529556, "grad_norm": 2.359184503555298, "learning_rate": 9.939020247316212e-05, "loss": 3.7655, "step": 720 }, { "epoch": 0.049259410245957334, "grad_norm": 2.5961592197418213, "learning_rate": 9.938595597227885e-05, "loss": 3.604, "step": 725 }, { "epoch": 0.049599130316619104, "grad_norm": 2.7124698162078857, "learning_rate": 9.938170947139558e-05, "loss": 3.714, "step": 730 }, { "epoch": 0.04993885038728088, "grad_norm": 3.47074556350708, "learning_rate": 9.93774629705123e-05, "loss": 3.5581, "step": 735 }, { "epoch": 0.05027857045794266, "grad_norm": 2.783320426940918, "learning_rate": 9.937321646962903e-05, "loss": 3.9011, "step": 740 }, { "epoch": 0.05061829052860443, "grad_norm": 2.4175233840942383, "learning_rate": 9.936896996874576e-05, "loss": 3.5208, "step": 745 }, { "epoch": 0.050958010599266204, "grad_norm": 2.8976969718933105, "learning_rate": 9.936472346786249e-05, "loss": 3.7739, "step": 750 }, { "epoch": 0.05129773066992798, "grad_norm": 2.563922882080078, "learning_rate": 9.936047696697922e-05, "loss": 3.7629, "step": 755 }, { "epoch": 0.05163745074058975, "grad_norm": 2.919168472290039, "learning_rate": 9.935623046609594e-05, "loss": 3.8214, "step": 760 }, { "epoch": 0.05197717081125153, "grad_norm": 2.8702971935272217, "learning_rate": 9.935198396521267e-05, "loss": 3.6658, "step": 765 }, { "epoch": 0.052316890881913304, "grad_norm": 2.588665008544922, "learning_rate": 9.93477374643294e-05, "loss": 3.6532, "step": 770 }, { "epoch": 0.05265661095257508, "grad_norm": 2.481748580932617, "learning_rate": 9.934349096344613e-05, "loss": 3.8163, "step": 775 }, { "epoch": 0.05299633102323685, "grad_norm": 2.211552381515503, "learning_rate": 9.933924446256286e-05, "loss": 3.8311, "step": 780 }, { "epoch": 0.05333605109389863, "grad_norm": 2.36183762550354, "learning_rate": 9.933499796167957e-05, "loss": 3.6162, "step": 785 }, { "epoch": 0.053675771164560404, "grad_norm": 2.776430368423462, "learning_rate": 9.933075146079631e-05, "loss": 3.7577, "step": 790 }, { "epoch": 0.054015491235222174, "grad_norm": 2.7656915187835693, "learning_rate": 9.932650495991304e-05, "loss": 3.604, "step": 795 }, { "epoch": 0.05435521130588395, "grad_norm": 2.6776299476623535, "learning_rate": 9.932225845902975e-05, "loss": 3.9155, "step": 800 }, { "epoch": 0.05469493137654573, "grad_norm": 2.4317731857299805, "learning_rate": 9.93180119581465e-05, "loss": 3.9431, "step": 805 }, { "epoch": 0.055034651447207504, "grad_norm": 2.411588191986084, "learning_rate": 9.931376545726322e-05, "loss": 3.9271, "step": 810 }, { "epoch": 0.055374371517869274, "grad_norm": 3.2233071327209473, "learning_rate": 9.930951895637994e-05, "loss": 3.7217, "step": 815 }, { "epoch": 0.05571409158853105, "grad_norm": 2.9518697261810303, "learning_rate": 9.930527245549668e-05, "loss": 3.6582, "step": 820 }, { "epoch": 0.05605381165919283, "grad_norm": 2.211646318435669, "learning_rate": 9.930102595461341e-05, "loss": 3.6279, "step": 825 }, { "epoch": 0.0563935317298546, "grad_norm": 3.2264046669006348, "learning_rate": 9.929677945373012e-05, "loss": 3.5536, "step": 830 }, { "epoch": 0.056733251800516374, "grad_norm": 3.2947652339935303, "learning_rate": 9.929253295284686e-05, "loss": 3.6845, "step": 835 }, { "epoch": 0.05707297187117815, "grad_norm": 2.8133134841918945, "learning_rate": 9.928828645196359e-05, "loss": 3.9895, "step": 840 }, { "epoch": 0.05741269194183993, "grad_norm": 2.6523597240448, "learning_rate": 9.92840399510803e-05, "loss": 3.8254, "step": 845 }, { "epoch": 0.0577524120125017, "grad_norm": 2.699572801589966, "learning_rate": 9.927979345019705e-05, "loss": 3.7742, "step": 850 }, { "epoch": 0.058092132083163474, "grad_norm": 2.490971565246582, "learning_rate": 9.927554694931378e-05, "loss": 3.6874, "step": 855 }, { "epoch": 0.05843185215382525, "grad_norm": 2.4525551795959473, "learning_rate": 9.927130044843049e-05, "loss": 3.3945, "step": 860 }, { "epoch": 0.05877157222448702, "grad_norm": 3.1005003452301025, "learning_rate": 9.926705394754723e-05, "loss": 3.72, "step": 865 }, { "epoch": 0.0591112922951488, "grad_norm": 2.757838010787964, "learning_rate": 9.926280744666395e-05, "loss": 3.7281, "step": 870 }, { "epoch": 0.059451012365810574, "grad_norm": 2.688002109527588, "learning_rate": 9.925856094578067e-05, "loss": 3.4413, "step": 875 }, { "epoch": 0.059790732436472344, "grad_norm": 2.5481104850769043, "learning_rate": 9.925431444489742e-05, "loss": 3.5383, "step": 880 }, { "epoch": 0.06013045250713412, "grad_norm": 2.7239933013916016, "learning_rate": 9.925006794401413e-05, "loss": 3.6688, "step": 885 }, { "epoch": 0.0604701725777959, "grad_norm": 3.6569783687591553, "learning_rate": 9.924582144313086e-05, "loss": 3.8351, "step": 890 }, { "epoch": 0.060809892648457674, "grad_norm": 2.423292636871338, "learning_rate": 9.92415749422476e-05, "loss": 3.8014, "step": 895 }, { "epoch": 0.061149612719119444, "grad_norm": 3.125342845916748, "learning_rate": 9.923732844136431e-05, "loss": 3.7874, "step": 900 }, { "epoch": 0.06148933278978122, "grad_norm": 3.0751779079437256, "learning_rate": 9.923308194048104e-05, "loss": 3.6073, "step": 905 }, { "epoch": 0.061829052860443, "grad_norm": 3.3726134300231934, "learning_rate": 9.922883543959778e-05, "loss": 3.6009, "step": 910 }, { "epoch": 0.06216877293110477, "grad_norm": 2.86334490776062, "learning_rate": 9.92245889387145e-05, "loss": 3.8159, "step": 915 }, { "epoch": 0.06250849300176654, "grad_norm": 2.39286732673645, "learning_rate": 9.922034243783123e-05, "loss": 3.5785, "step": 920 }, { "epoch": 0.06284821307242831, "grad_norm": 2.7859303951263428, "learning_rate": 9.921609593694797e-05, "loss": 3.8303, "step": 925 }, { "epoch": 0.0631879331430901, "grad_norm": 2.4177496433258057, "learning_rate": 9.921184943606468e-05, "loss": 3.7221, "step": 930 }, { "epoch": 0.06352765321375187, "grad_norm": 2.441819190979004, "learning_rate": 9.920760293518142e-05, "loss": 3.875, "step": 935 }, { "epoch": 0.06386737328441364, "grad_norm": 2.359551429748535, "learning_rate": 9.920335643429814e-05, "loss": 3.5418, "step": 940 }, { "epoch": 0.06420709335507542, "grad_norm": 2.5077199935913086, "learning_rate": 9.919910993341487e-05, "loss": 3.6693, "step": 945 }, { "epoch": 0.06454681342573719, "grad_norm": 2.7118682861328125, "learning_rate": 9.919486343253161e-05, "loss": 3.7542, "step": 950 }, { "epoch": 0.06488653349639897, "grad_norm": 3.035710573196411, "learning_rate": 9.919061693164832e-05, "loss": 3.6694, "step": 955 }, { "epoch": 0.06522625356706074, "grad_norm": 3.3193423748016357, "learning_rate": 9.918637043076505e-05, "loss": 3.8316, "step": 960 }, { "epoch": 0.06556597363772251, "grad_norm": 2.4807193279266357, "learning_rate": 9.918212392988179e-05, "loss": 3.6416, "step": 965 }, { "epoch": 0.0659056937083843, "grad_norm": 3.7772791385650635, "learning_rate": 9.91778774289985e-05, "loss": 3.932, "step": 970 }, { "epoch": 0.06624541377904607, "grad_norm": 2.8233423233032227, "learning_rate": 9.917363092811523e-05, "loss": 3.8445, "step": 975 }, { "epoch": 0.06658513384970784, "grad_norm": 2.7697527408599854, "learning_rate": 9.916938442723198e-05, "loss": 3.7474, "step": 980 }, { "epoch": 0.06692485392036962, "grad_norm": 3.04189395904541, "learning_rate": 9.916513792634869e-05, "loss": 3.7973, "step": 985 }, { "epoch": 0.06726457399103139, "grad_norm": 2.437936782836914, "learning_rate": 9.916089142546542e-05, "loss": 3.7282, "step": 990 }, { "epoch": 0.06760429406169316, "grad_norm": 2.2944767475128174, "learning_rate": 9.915664492458216e-05, "loss": 3.6289, "step": 995 }, { "epoch": 0.06794401413235494, "grad_norm": 2.751984119415283, "learning_rate": 9.915239842369887e-05, "loss": 3.7081, "step": 1000 }, { "epoch": 0.06828373420301671, "grad_norm": 2.8478729724884033, "learning_rate": 9.91481519228156e-05, "loss": 3.6821, "step": 1005 }, { "epoch": 0.06862345427367848, "grad_norm": 3.9907171726226807, "learning_rate": 9.914390542193233e-05, "loss": 3.5488, "step": 1010 }, { "epoch": 0.06896317434434027, "grad_norm": 2.771101713180542, "learning_rate": 9.913965892104906e-05, "loss": 3.8093, "step": 1015 }, { "epoch": 0.06930289441500204, "grad_norm": 2.20055890083313, "learning_rate": 9.913541242016579e-05, "loss": 3.5228, "step": 1020 }, { "epoch": 0.0696426144856638, "grad_norm": 2.4106743335723877, "learning_rate": 9.913116591928251e-05, "loss": 3.4087, "step": 1025 }, { "epoch": 0.06998233455632559, "grad_norm": 2.4740729331970215, "learning_rate": 9.912691941839924e-05, "loss": 3.8916, "step": 1030 }, { "epoch": 0.07032205462698736, "grad_norm": 3.0354933738708496, "learning_rate": 9.912267291751597e-05, "loss": 3.5902, "step": 1035 }, { "epoch": 0.07066177469764914, "grad_norm": 2.821380138397217, "learning_rate": 9.91184264166327e-05, "loss": 3.6938, "step": 1040 }, { "epoch": 0.07100149476831091, "grad_norm": 3.0581631660461426, "learning_rate": 9.911417991574943e-05, "loss": 3.7114, "step": 1045 }, { "epoch": 0.07134121483897268, "grad_norm": 2.672722816467285, "learning_rate": 9.910993341486615e-05, "loss": 3.9402, "step": 1050 }, { "epoch": 0.07168093490963447, "grad_norm": 3.6198601722717285, "learning_rate": 9.910568691398288e-05, "loss": 3.4995, "step": 1055 }, { "epoch": 0.07202065498029624, "grad_norm": 2.590298891067505, "learning_rate": 9.910144041309961e-05, "loss": 3.4578, "step": 1060 }, { "epoch": 0.072360375050958, "grad_norm": 2.5233349800109863, "learning_rate": 9.909719391221634e-05, "loss": 3.4242, "step": 1065 }, { "epoch": 0.07270009512161979, "grad_norm": 3.3468916416168213, "learning_rate": 9.909294741133307e-05, "loss": 3.9203, "step": 1070 }, { "epoch": 0.07303981519228156, "grad_norm": 2.8617589473724365, "learning_rate": 9.90887009104498e-05, "loss": 3.4865, "step": 1075 }, { "epoch": 0.07337953526294333, "grad_norm": 3.0948257446289062, "learning_rate": 9.908445440956652e-05, "loss": 3.8441, "step": 1080 }, { "epoch": 0.07371925533360511, "grad_norm": 2.5815205574035645, "learning_rate": 9.908020790868325e-05, "loss": 3.6181, "step": 1085 }, { "epoch": 0.07405897540426688, "grad_norm": 2.946096420288086, "learning_rate": 9.907596140779998e-05, "loss": 3.6933, "step": 1090 }, { "epoch": 0.07439869547492865, "grad_norm": 2.984200954437256, "learning_rate": 9.90717149069167e-05, "loss": 3.7388, "step": 1095 }, { "epoch": 0.07473841554559044, "grad_norm": 2.596780776977539, "learning_rate": 9.906746840603343e-05, "loss": 3.2643, "step": 1100 }, { "epoch": 0.0750781356162522, "grad_norm": 2.4430718421936035, "learning_rate": 9.906322190515016e-05, "loss": 3.9153, "step": 1105 }, { "epoch": 0.07541785568691398, "grad_norm": 3.0134854316711426, "learning_rate": 9.905897540426689e-05, "loss": 3.6416, "step": 1110 }, { "epoch": 0.07575757575757576, "grad_norm": 2.158900260925293, "learning_rate": 9.905472890338362e-05, "loss": 3.6849, "step": 1115 }, { "epoch": 0.07609729582823753, "grad_norm": 2.352266311645508, "learning_rate": 9.905048240250035e-05, "loss": 3.5914, "step": 1120 }, { "epoch": 0.07643701589889931, "grad_norm": 2.2301273345947266, "learning_rate": 9.904623590161707e-05, "loss": 3.7726, "step": 1125 }, { "epoch": 0.07677673596956108, "grad_norm": 2.9615561962127686, "learning_rate": 9.90419894007338e-05, "loss": 3.5834, "step": 1130 }, { "epoch": 0.07711645604022285, "grad_norm": 2.662644624710083, "learning_rate": 9.903774289985053e-05, "loss": 3.6984, "step": 1135 }, { "epoch": 0.07745617611088464, "grad_norm": 3.0979316234588623, "learning_rate": 9.903349639896725e-05, "loss": 3.4165, "step": 1140 }, { "epoch": 0.0777958961815464, "grad_norm": 3.0384013652801514, "learning_rate": 9.902924989808399e-05, "loss": 3.6552, "step": 1145 }, { "epoch": 0.07813561625220818, "grad_norm": 2.941535234451294, "learning_rate": 9.902500339720071e-05, "loss": 3.5969, "step": 1150 }, { "epoch": 0.07847533632286996, "grad_norm": 2.47575306892395, "learning_rate": 9.902075689631743e-05, "loss": 3.6427, "step": 1155 }, { "epoch": 0.07881505639353173, "grad_norm": 2.178506374359131, "learning_rate": 9.901651039543417e-05, "loss": 3.5326, "step": 1160 }, { "epoch": 0.0791547764641935, "grad_norm": 3.017336130142212, "learning_rate": 9.90122638945509e-05, "loss": 3.7905, "step": 1165 }, { "epoch": 0.07949449653485528, "grad_norm": 2.3930509090423584, "learning_rate": 9.900801739366761e-05, "loss": 3.6515, "step": 1170 }, { "epoch": 0.07983421660551705, "grad_norm": 2.8017749786376953, "learning_rate": 9.900377089278435e-05, "loss": 3.5386, "step": 1175 }, { "epoch": 0.08017393667617882, "grad_norm": 2.6371870040893555, "learning_rate": 9.899952439190108e-05, "loss": 4.0497, "step": 1180 }, { "epoch": 0.0805136567468406, "grad_norm": 2.327693462371826, "learning_rate": 9.89952778910178e-05, "loss": 3.6986, "step": 1185 }, { "epoch": 0.08085337681750238, "grad_norm": 4.935518264770508, "learning_rate": 9.899103139013454e-05, "loss": 3.7905, "step": 1190 }, { "epoch": 0.08119309688816416, "grad_norm": 2.404540538787842, "learning_rate": 9.898678488925127e-05, "loss": 3.3917, "step": 1195 }, { "epoch": 0.08153281695882593, "grad_norm": 2.52348256111145, "learning_rate": 9.898253838836798e-05, "loss": 3.7255, "step": 1200 }, { "epoch": 0.0818725370294877, "grad_norm": 3.3984553813934326, "learning_rate": 9.897829188748472e-05, "loss": 3.924, "step": 1205 }, { "epoch": 0.08221225710014948, "grad_norm": 2.7035470008850098, "learning_rate": 9.897404538660144e-05, "loss": 3.4641, "step": 1210 }, { "epoch": 0.08255197717081125, "grad_norm": 2.4212405681610107, "learning_rate": 9.896979888571817e-05, "loss": 3.5081, "step": 1215 }, { "epoch": 0.08289169724147302, "grad_norm": 2.361762285232544, "learning_rate": 9.896555238483491e-05, "loss": 3.601, "step": 1220 }, { "epoch": 0.0832314173121348, "grad_norm": 2.352565288543701, "learning_rate": 9.896130588395162e-05, "loss": 3.5627, "step": 1225 }, { "epoch": 0.08357113738279658, "grad_norm": 2.543168067932129, "learning_rate": 9.895705938306835e-05, "loss": 3.8297, "step": 1230 }, { "epoch": 0.08391085745345835, "grad_norm": 2.7356700897216797, "learning_rate": 9.895281288218509e-05, "loss": 3.5042, "step": 1235 }, { "epoch": 0.08425057752412013, "grad_norm": 2.7182724475860596, "learning_rate": 9.89485663813018e-05, "loss": 3.6767, "step": 1240 }, { "epoch": 0.0845902975947819, "grad_norm": 2.528550624847412, "learning_rate": 9.894431988041853e-05, "loss": 3.4373, "step": 1245 }, { "epoch": 0.08493001766544367, "grad_norm": 4.637968063354492, "learning_rate": 9.894007337953527e-05, "loss": 3.682, "step": 1250 }, { "epoch": 0.08526973773610545, "grad_norm": 2.421348810195923, "learning_rate": 9.893582687865199e-05, "loss": 3.8257, "step": 1255 }, { "epoch": 0.08560945780676722, "grad_norm": 3.2755727767944336, "learning_rate": 9.893158037776872e-05, "loss": 3.4979, "step": 1260 }, { "epoch": 0.08594917787742899, "grad_norm": 2.8488996028900146, "learning_rate": 9.892733387688546e-05, "loss": 3.6051, "step": 1265 }, { "epoch": 0.08628889794809078, "grad_norm": 2.797036647796631, "learning_rate": 9.892308737600217e-05, "loss": 3.615, "step": 1270 }, { "epoch": 0.08662861801875255, "grad_norm": 2.750133514404297, "learning_rate": 9.891884087511891e-05, "loss": 3.6867, "step": 1275 }, { "epoch": 0.08696833808941433, "grad_norm": 2.2481203079223633, "learning_rate": 9.891459437423564e-05, "loss": 3.7979, "step": 1280 }, { "epoch": 0.0873080581600761, "grad_norm": 2.9319496154785156, "learning_rate": 9.891034787335236e-05, "loss": 4.1288, "step": 1285 }, { "epoch": 0.08764777823073787, "grad_norm": 2.4174962043762207, "learning_rate": 9.89061013724691e-05, "loss": 3.6116, "step": 1290 }, { "epoch": 0.08798749830139965, "grad_norm": 2.875229597091675, "learning_rate": 9.890185487158581e-05, "loss": 3.663, "step": 1295 }, { "epoch": 0.08832721837206142, "grad_norm": 2.4175915718078613, "learning_rate": 9.889760837070254e-05, "loss": 3.8735, "step": 1300 }, { "epoch": 0.08866693844272319, "grad_norm": 2.8554160594940186, "learning_rate": 9.889336186981928e-05, "loss": 3.6893, "step": 1305 }, { "epoch": 0.08900665851338498, "grad_norm": 2.5718233585357666, "learning_rate": 9.8889115368936e-05, "loss": 3.7129, "step": 1310 }, { "epoch": 0.08934637858404675, "grad_norm": 2.9183428287506104, "learning_rate": 9.888486886805273e-05, "loss": 3.5695, "step": 1315 }, { "epoch": 0.08968609865470852, "grad_norm": 2.3678832054138184, "learning_rate": 9.888062236716947e-05, "loss": 3.3905, "step": 1320 }, { "epoch": 0.0900258187253703, "grad_norm": 3.0546958446502686, "learning_rate": 9.887637586628618e-05, "loss": 3.4117, "step": 1325 }, { "epoch": 0.09036553879603207, "grad_norm": 2.8776729106903076, "learning_rate": 9.887212936540291e-05, "loss": 3.6729, "step": 1330 }, { "epoch": 0.09070525886669384, "grad_norm": 3.1774954795837402, "learning_rate": 9.886788286451965e-05, "loss": 3.7087, "step": 1335 }, { "epoch": 0.09104497893735562, "grad_norm": 2.4546725749969482, "learning_rate": 9.886363636363637e-05, "loss": 3.8551, "step": 1340 }, { "epoch": 0.09138469900801739, "grad_norm": 2.312204599380493, "learning_rate": 9.88593898627531e-05, "loss": 3.4144, "step": 1345 }, { "epoch": 0.09172441907867916, "grad_norm": 2.9866554737091064, "learning_rate": 9.885514336186984e-05, "loss": 3.5115, "step": 1350 }, { "epoch": 0.09206413914934095, "grad_norm": 2.046361207962036, "learning_rate": 9.885089686098655e-05, "loss": 3.7515, "step": 1355 }, { "epoch": 0.09240385922000272, "grad_norm": 3.2899272441864014, "learning_rate": 9.884665036010328e-05, "loss": 3.5764, "step": 1360 }, { "epoch": 0.0927435792906645, "grad_norm": 2.3010950088500977, "learning_rate": 9.884240385922e-05, "loss": 3.6661, "step": 1365 }, { "epoch": 0.09308329936132627, "grad_norm": 2.771374464035034, "learning_rate": 9.883815735833673e-05, "loss": 3.7498, "step": 1370 }, { "epoch": 0.09342301943198804, "grad_norm": 2.3871958255767822, "learning_rate": 9.883391085745346e-05, "loss": 3.7738, "step": 1375 }, { "epoch": 0.09376273950264982, "grad_norm": 2.7825450897216797, "learning_rate": 9.882966435657019e-05, "loss": 3.5485, "step": 1380 }, { "epoch": 0.09410245957331159, "grad_norm": 3.3543453216552734, "learning_rate": 9.882541785568692e-05, "loss": 3.6235, "step": 1385 }, { "epoch": 0.09444217964397336, "grad_norm": 2.853057622909546, "learning_rate": 9.882117135480365e-05, "loss": 3.6737, "step": 1390 }, { "epoch": 0.09478189971463515, "grad_norm": 3.0069384574890137, "learning_rate": 9.881692485392037e-05, "loss": 3.644, "step": 1395 }, { "epoch": 0.09512161978529692, "grad_norm": 2.6202545166015625, "learning_rate": 9.88126783530371e-05, "loss": 3.6517, "step": 1400 }, { "epoch": 0.09546133985595869, "grad_norm": 6.95131254196167, "learning_rate": 9.880843185215383e-05, "loss": 3.7388, "step": 1405 }, { "epoch": 0.09580105992662047, "grad_norm": 3.5970633029937744, "learning_rate": 9.880418535127056e-05, "loss": 3.8082, "step": 1410 }, { "epoch": 0.09614077999728224, "grad_norm": 11.684786796569824, "learning_rate": 9.879993885038729e-05, "loss": 3.7492, "step": 1415 }, { "epoch": 0.09648050006794401, "grad_norm": 3.0684146881103516, "learning_rate": 9.879569234950401e-05, "loss": 3.8429, "step": 1420 }, { "epoch": 0.09682022013860579, "grad_norm": 2.6395862102508545, "learning_rate": 9.87922951487974e-05, "loss": 3.7778, "step": 1425 }, { "epoch": 0.09715994020926756, "grad_norm": 3.00087308883667, "learning_rate": 9.878804864791412e-05, "loss": 3.5962, "step": 1430 }, { "epoch": 0.09749966027992933, "grad_norm": 2.7398698329925537, "learning_rate": 9.878380214703085e-05, "loss": 3.5594, "step": 1435 }, { "epoch": 0.09783938035059112, "grad_norm": 3.682832717895508, "learning_rate": 9.877955564614758e-05, "loss": 3.784, "step": 1440 }, { "epoch": 0.09817910042125289, "grad_norm": 2.15097975730896, "learning_rate": 9.877530914526431e-05, "loss": 3.5341, "step": 1445 }, { "epoch": 0.09851882049191467, "grad_norm": 2.5064339637756348, "learning_rate": 9.877106264438104e-05, "loss": 3.6966, "step": 1450 }, { "epoch": 0.09885854056257644, "grad_norm": 2.165351629257202, "learning_rate": 9.876681614349776e-05, "loss": 3.7644, "step": 1455 }, { "epoch": 0.09919826063323821, "grad_norm": 2.6611499786376953, "learning_rate": 9.876256964261449e-05, "loss": 3.7581, "step": 1460 }, { "epoch": 0.09953798070389999, "grad_norm": 4.449843883514404, "learning_rate": 9.87583231417312e-05, "loss": 3.6901, "step": 1465 }, { "epoch": 0.09987770077456176, "grad_norm": 3.5374207496643066, "learning_rate": 9.875407664084795e-05, "loss": 3.816, "step": 1470 }, { "epoch": 0.10021742084522353, "grad_norm": 2.6360864639282227, "learning_rate": 9.874983013996468e-05, "loss": 3.3863, "step": 1475 }, { "epoch": 0.10055714091588532, "grad_norm": 3.614423990249634, "learning_rate": 9.87455836390814e-05, "loss": 3.6914, "step": 1480 }, { "epoch": 0.10089686098654709, "grad_norm": 2.6101832389831543, "learning_rate": 9.874133713819813e-05, "loss": 3.4576, "step": 1485 }, { "epoch": 0.10123658105720885, "grad_norm": 2.3915576934814453, "learning_rate": 9.873709063731486e-05, "loss": 3.5092, "step": 1490 }, { "epoch": 0.10157630112787064, "grad_norm": 2.73832368850708, "learning_rate": 9.873284413643159e-05, "loss": 3.564, "step": 1495 }, { "epoch": 0.10191602119853241, "grad_norm": 2.4439172744750977, "learning_rate": 9.872859763554832e-05, "loss": 3.7701, "step": 1500 }, { "epoch": 0.10225574126919418, "grad_norm": 2.556349754333496, "learning_rate": 9.872435113466504e-05, "loss": 3.6641, "step": 1505 }, { "epoch": 0.10259546133985596, "grad_norm": 2.692896604537964, "learning_rate": 9.872010463378177e-05, "loss": 3.5379, "step": 1510 }, { "epoch": 0.10293518141051773, "grad_norm": 2.2422587871551514, "learning_rate": 9.87158581328985e-05, "loss": 3.5895, "step": 1515 }, { "epoch": 0.1032749014811795, "grad_norm": 2.542940855026245, "learning_rate": 9.871161163201523e-05, "loss": 3.545, "step": 1520 }, { "epoch": 0.10361462155184128, "grad_norm": 8.475870132446289, "learning_rate": 9.870736513113196e-05, "loss": 3.7558, "step": 1525 }, { "epoch": 0.10395434162250305, "grad_norm": 2.605943441390991, "learning_rate": 9.870311863024868e-05, "loss": 3.698, "step": 1530 }, { "epoch": 0.10429406169316484, "grad_norm": 2.659353733062744, "learning_rate": 9.86988721293654e-05, "loss": 3.879, "step": 1535 }, { "epoch": 0.10463378176382661, "grad_norm": 2.6726794242858887, "learning_rate": 9.869462562848214e-05, "loss": 3.7048, "step": 1540 }, { "epoch": 0.10497350183448838, "grad_norm": 2.2029128074645996, "learning_rate": 9.869037912759887e-05, "loss": 3.6834, "step": 1545 }, { "epoch": 0.10531322190515016, "grad_norm": 2.46644926071167, "learning_rate": 9.868613262671558e-05, "loss": 3.6618, "step": 1550 }, { "epoch": 0.10565294197581193, "grad_norm": 2.5602245330810547, "learning_rate": 9.868188612583232e-05, "loss": 3.5211, "step": 1555 }, { "epoch": 0.1059926620464737, "grad_norm": 2.3584413528442383, "learning_rate": 9.867763962494905e-05, "loss": 3.6223, "step": 1560 }, { "epoch": 0.10633238211713548, "grad_norm": 3.620461940765381, "learning_rate": 9.867339312406577e-05, "loss": 3.7434, "step": 1565 }, { "epoch": 0.10667210218779725, "grad_norm": 2.8504605293273926, "learning_rate": 9.866914662318251e-05, "loss": 3.7301, "step": 1570 }, { "epoch": 0.10701182225845902, "grad_norm": 3.402848720550537, "learning_rate": 9.866490012229924e-05, "loss": 3.9087, "step": 1575 }, { "epoch": 0.10735154232912081, "grad_norm": 2.317671060562134, "learning_rate": 9.866065362141595e-05, "loss": 3.7321, "step": 1580 }, { "epoch": 0.10769126239978258, "grad_norm": 2.246663808822632, "learning_rate": 9.865640712053269e-05, "loss": 3.5694, "step": 1585 }, { "epoch": 0.10803098247044435, "grad_norm": 2.606186866760254, "learning_rate": 9.865216061964942e-05, "loss": 3.5645, "step": 1590 }, { "epoch": 0.10837070254110613, "grad_norm": 2.2364070415496826, "learning_rate": 9.864791411876613e-05, "loss": 3.8505, "step": 1595 }, { "epoch": 0.1087104226117679, "grad_norm": 1.8168416023254395, "learning_rate": 9.864366761788288e-05, "loss": 3.7063, "step": 1600 }, { "epoch": 0.10905014268242967, "grad_norm": 2.6164488792419434, "learning_rate": 9.863942111699959e-05, "loss": 3.6088, "step": 1605 }, { "epoch": 0.10938986275309145, "grad_norm": 3.5399606227874756, "learning_rate": 9.863517461611632e-05, "loss": 3.7959, "step": 1610 }, { "epoch": 0.10972958282375322, "grad_norm": 2.352106809616089, "learning_rate": 9.863092811523306e-05, "loss": 3.6098, "step": 1615 }, { "epoch": 0.11006930289441501, "grad_norm": 2.274197816848755, "learning_rate": 9.862668161434977e-05, "loss": 3.5673, "step": 1620 }, { "epoch": 0.11040902296507678, "grad_norm": 2.0550856590270996, "learning_rate": 9.86224351134665e-05, "loss": 3.5434, "step": 1625 }, { "epoch": 0.11074874303573855, "grad_norm": 2.5609278678894043, "learning_rate": 9.861818861258324e-05, "loss": 3.7825, "step": 1630 }, { "epoch": 0.11108846310640033, "grad_norm": 2.473008871078491, "learning_rate": 9.861394211169996e-05, "loss": 3.6442, "step": 1635 }, { "epoch": 0.1114281831770621, "grad_norm": 2.609839677810669, "learning_rate": 9.860969561081669e-05, "loss": 3.7895, "step": 1640 }, { "epoch": 0.11176790324772387, "grad_norm": 2.270355701446533, "learning_rate": 9.860544910993343e-05, "loss": 3.6965, "step": 1645 }, { "epoch": 0.11210762331838565, "grad_norm": 8.605286598205566, "learning_rate": 9.860120260905014e-05, "loss": 3.5205, "step": 1650 }, { "epoch": 0.11244734338904742, "grad_norm": 2.727208137512207, "learning_rate": 9.859695610816687e-05, "loss": 3.7554, "step": 1655 }, { "epoch": 0.1127870634597092, "grad_norm": 2.6802847385406494, "learning_rate": 9.859270960728361e-05, "loss": 3.6455, "step": 1660 }, { "epoch": 0.11312678353037098, "grad_norm": 3.5274956226348877, "learning_rate": 9.858846310640033e-05, "loss": 3.6387, "step": 1665 }, { "epoch": 0.11346650360103275, "grad_norm": 2.9241151809692383, "learning_rate": 9.858421660551705e-05, "loss": 3.6712, "step": 1670 }, { "epoch": 0.11380622367169452, "grad_norm": 2.2806825637817383, "learning_rate": 9.85799701046338e-05, "loss": 3.7348, "step": 1675 }, { "epoch": 0.1141459437423563, "grad_norm": 2.5223445892333984, "learning_rate": 9.857572360375051e-05, "loss": 3.5781, "step": 1680 }, { "epoch": 0.11448566381301807, "grad_norm": 2.367621660232544, "learning_rate": 9.857147710286724e-05, "loss": 3.5701, "step": 1685 }, { "epoch": 0.11482538388367985, "grad_norm": 2.555980682373047, "learning_rate": 9.856723060198397e-05, "loss": 3.3252, "step": 1690 }, { "epoch": 0.11516510395434162, "grad_norm": 2.4443697929382324, "learning_rate": 9.85629841011007e-05, "loss": 3.5512, "step": 1695 }, { "epoch": 0.1155048240250034, "grad_norm": 2.950860023498535, "learning_rate": 9.855873760021742e-05, "loss": 3.7208, "step": 1700 }, { "epoch": 0.11584454409566518, "grad_norm": 2.127361297607422, "learning_rate": 9.855449109933415e-05, "loss": 3.3163, "step": 1705 }, { "epoch": 0.11618426416632695, "grad_norm": 2.0718114376068115, "learning_rate": 9.855024459845088e-05, "loss": 3.8084, "step": 1710 }, { "epoch": 0.11652398423698872, "grad_norm": 2.4193952083587646, "learning_rate": 9.854599809756761e-05, "loss": 3.2907, "step": 1715 }, { "epoch": 0.1168637043076505, "grad_norm": 2.3924756050109863, "learning_rate": 9.854175159668433e-05, "loss": 3.7214, "step": 1720 }, { "epoch": 0.11720342437831227, "grad_norm": 3.228001117706299, "learning_rate": 9.853750509580106e-05, "loss": 3.4578, "step": 1725 }, { "epoch": 0.11754314444897404, "grad_norm": 2.3939614295959473, "learning_rate": 9.853325859491779e-05, "loss": 3.5184, "step": 1730 }, { "epoch": 0.11788286451963582, "grad_norm": 4.840658187866211, "learning_rate": 9.852901209403452e-05, "loss": 3.5232, "step": 1735 }, { "epoch": 0.1182225845902976, "grad_norm": 2.6658291816711426, "learning_rate": 9.852476559315125e-05, "loss": 3.7119, "step": 1740 }, { "epoch": 0.11856230466095936, "grad_norm": 3.069031238555908, "learning_rate": 9.852051909226797e-05, "loss": 3.8233, "step": 1745 }, { "epoch": 0.11890202473162115, "grad_norm": 2.8473379611968994, "learning_rate": 9.85162725913847e-05, "loss": 3.6721, "step": 1750 }, { "epoch": 0.11924174480228292, "grad_norm": 2.682267427444458, "learning_rate": 9.851202609050143e-05, "loss": 3.7382, "step": 1755 }, { "epoch": 0.11958146487294469, "grad_norm": 2.3937129974365234, "learning_rate": 9.850777958961816e-05, "loss": 3.7596, "step": 1760 }, { "epoch": 0.11992118494360647, "grad_norm": 2.733515739440918, "learning_rate": 9.850353308873489e-05, "loss": 3.6185, "step": 1765 }, { "epoch": 0.12026090501426824, "grad_norm": 2.3822734355926514, "learning_rate": 9.849928658785161e-05, "loss": 3.7106, "step": 1770 }, { "epoch": 0.12060062508493002, "grad_norm": 2.527878999710083, "learning_rate": 9.849504008696834e-05, "loss": 3.6467, "step": 1775 }, { "epoch": 0.1209403451555918, "grad_norm": 2.95981502532959, "learning_rate": 9.849079358608507e-05, "loss": 3.7278, "step": 1780 }, { "epoch": 0.12128006522625356, "grad_norm": 2.6949033737182617, "learning_rate": 9.84865470852018e-05, "loss": 3.8295, "step": 1785 }, { "epoch": 0.12161978529691535, "grad_norm": 2.6312930583953857, "learning_rate": 9.848230058431853e-05, "loss": 3.7378, "step": 1790 }, { "epoch": 0.12195950536757712, "grad_norm": 2.5927481651306152, "learning_rate": 9.847805408343525e-05, "loss": 3.457, "step": 1795 }, { "epoch": 0.12229922543823889, "grad_norm": 2.448457717895508, "learning_rate": 9.847380758255198e-05, "loss": 3.45, "step": 1800 }, { "epoch": 0.12263894550890067, "grad_norm": 4.1560821533203125, "learning_rate": 9.84695610816687e-05, "loss": 3.6898, "step": 1805 }, { "epoch": 0.12297866557956244, "grad_norm": 2.262101888656616, "learning_rate": 9.846531458078544e-05, "loss": 3.6114, "step": 1810 }, { "epoch": 0.12331838565022421, "grad_norm": 2.7449049949645996, "learning_rate": 9.846106807990217e-05, "loss": 3.5611, "step": 1815 }, { "epoch": 0.123658105720886, "grad_norm": 3.1933770179748535, "learning_rate": 9.84568215790189e-05, "loss": 3.59, "step": 1820 }, { "epoch": 0.12399782579154776, "grad_norm": 2.600872755050659, "learning_rate": 9.845257507813562e-05, "loss": 3.532, "step": 1825 }, { "epoch": 0.12433754586220953, "grad_norm": 2.23490571975708, "learning_rate": 9.844832857725235e-05, "loss": 3.667, "step": 1830 }, { "epoch": 0.12467726593287132, "grad_norm": 2.7708678245544434, "learning_rate": 9.844408207636908e-05, "loss": 3.7112, "step": 1835 }, { "epoch": 0.1250169860035331, "grad_norm": 8.08360481262207, "learning_rate": 9.843983557548581e-05, "loss": 3.7553, "step": 1840 }, { "epoch": 0.12535670607419486, "grad_norm": 3.5631556510925293, "learning_rate": 9.843558907460254e-05, "loss": 3.5741, "step": 1845 }, { "epoch": 0.12569642614485663, "grad_norm": 3.6369643211364746, "learning_rate": 9.843134257371926e-05, "loss": 3.7011, "step": 1850 }, { "epoch": 0.12603614621551842, "grad_norm": 2.509939432144165, "learning_rate": 9.842709607283599e-05, "loss": 3.7049, "step": 1855 }, { "epoch": 0.1263758662861802, "grad_norm": 2.6086642742156982, "learning_rate": 9.842284957195272e-05, "loss": 3.6444, "step": 1860 }, { "epoch": 0.12671558635684196, "grad_norm": 2.4885952472686768, "learning_rate": 9.841860307106945e-05, "loss": 3.3782, "step": 1865 }, { "epoch": 0.12705530642750373, "grad_norm": 2.3255302906036377, "learning_rate": 9.841435657018618e-05, "loss": 3.8069, "step": 1870 }, { "epoch": 0.1273950264981655, "grad_norm": 2.29691219329834, "learning_rate": 9.84101100693029e-05, "loss": 3.5613, "step": 1875 }, { "epoch": 0.12773474656882727, "grad_norm": 5.276554584503174, "learning_rate": 9.840586356841963e-05, "loss": 3.6252, "step": 1880 }, { "epoch": 0.12807446663948907, "grad_norm": 2.1562600135803223, "learning_rate": 9.840161706753636e-05, "loss": 3.7605, "step": 1885 }, { "epoch": 0.12841418671015084, "grad_norm": 2.6067538261413574, "learning_rate": 9.839737056665307e-05, "loss": 3.1914, "step": 1890 }, { "epoch": 0.1287539067808126, "grad_norm": 2.827772617340088, "learning_rate": 9.839312406576982e-05, "loss": 3.6718, "step": 1895 }, { "epoch": 0.12909362685147438, "grad_norm": 2.7478749752044678, "learning_rate": 9.838887756488654e-05, "loss": 3.6127, "step": 1900 }, { "epoch": 0.12943334692213615, "grad_norm": 2.3657724857330322, "learning_rate": 9.838463106400326e-05, "loss": 3.4427, "step": 1905 }, { "epoch": 0.12977306699279795, "grad_norm": 2.349724292755127, "learning_rate": 9.838038456312e-05, "loss": 3.6335, "step": 1910 }, { "epoch": 0.13011278706345972, "grad_norm": 3.6742427349090576, "learning_rate": 9.837613806223673e-05, "loss": 3.7661, "step": 1915 }, { "epoch": 0.1304525071341215, "grad_norm": 2.6719472408294678, "learning_rate": 9.837189156135344e-05, "loss": 3.3573, "step": 1920 }, { "epoch": 0.13079222720478326, "grad_norm": 2.593247652053833, "learning_rate": 9.836764506047018e-05, "loss": 3.7031, "step": 1925 }, { "epoch": 0.13113194727544503, "grad_norm": 2.6783385276794434, "learning_rate": 9.836339855958691e-05, "loss": 3.6197, "step": 1930 }, { "epoch": 0.1314716673461068, "grad_norm": 2.8472886085510254, "learning_rate": 9.835915205870363e-05, "loss": 3.6187, "step": 1935 }, { "epoch": 0.1318113874167686, "grad_norm": 2.1278092861175537, "learning_rate": 9.835490555782037e-05, "loss": 3.209, "step": 1940 }, { "epoch": 0.13215110748743036, "grad_norm": 3.7434868812561035, "learning_rate": 9.83506590569371e-05, "loss": 3.505, "step": 1945 }, { "epoch": 0.13249082755809213, "grad_norm": 2.496720314025879, "learning_rate": 9.834641255605381e-05, "loss": 3.8107, "step": 1950 }, { "epoch": 0.1328305476287539, "grad_norm": 2.278834342956543, "learning_rate": 9.834216605517055e-05, "loss": 3.6206, "step": 1955 }, { "epoch": 0.13317026769941567, "grad_norm": 2.7811431884765625, "learning_rate": 9.833791955428727e-05, "loss": 3.6232, "step": 1960 }, { "epoch": 0.13350998777007744, "grad_norm": 2.345259428024292, "learning_rate": 9.8333673053404e-05, "loss": 3.3393, "step": 1965 }, { "epoch": 0.13384970784073924, "grad_norm": 2.5706770420074463, "learning_rate": 9.832942655252074e-05, "loss": 3.5282, "step": 1970 }, { "epoch": 0.134189427911401, "grad_norm": 2.311833143234253, "learning_rate": 9.832518005163745e-05, "loss": 3.83, "step": 1975 }, { "epoch": 0.13452914798206278, "grad_norm": 2.6815695762634277, "learning_rate": 9.832093355075418e-05, "loss": 3.5066, "step": 1980 }, { "epoch": 0.13486886805272455, "grad_norm": 2.2054522037506104, "learning_rate": 9.831668704987092e-05, "loss": 3.704, "step": 1985 }, { "epoch": 0.13520858812338632, "grad_norm": 3.077894687652588, "learning_rate": 9.831244054898763e-05, "loss": 3.5649, "step": 1990 }, { "epoch": 0.13554830819404812, "grad_norm": 4.214877128601074, "learning_rate": 9.830819404810436e-05, "loss": 3.6439, "step": 1995 }, { "epoch": 0.1358880282647099, "grad_norm": 2.8064329624176025, "learning_rate": 9.83039475472211e-05, "loss": 3.6047, "step": 2000 }, { "epoch": 0.13622774833537166, "grad_norm": 2.8148748874664307, "learning_rate": 9.829970104633782e-05, "loss": 3.4838, "step": 2005 }, { "epoch": 0.13656746840603343, "grad_norm": 2.6066055297851562, "learning_rate": 9.829545454545455e-05, "loss": 3.9006, "step": 2010 }, { "epoch": 0.1369071884766952, "grad_norm": 1.8680016994476318, "learning_rate": 9.829120804457129e-05, "loss": 3.6725, "step": 2015 }, { "epoch": 0.13724690854735697, "grad_norm": 2.4525768756866455, "learning_rate": 9.8286961543688e-05, "loss": 3.8145, "step": 2020 }, { "epoch": 0.13758662861801876, "grad_norm": 2.4238014221191406, "learning_rate": 9.828271504280473e-05, "loss": 3.4119, "step": 2025 }, { "epoch": 0.13792634868868053, "grad_norm": 3.079613208770752, "learning_rate": 9.827846854192146e-05, "loss": 3.5258, "step": 2030 }, { "epoch": 0.1382660687593423, "grad_norm": 2.3786492347717285, "learning_rate": 9.827422204103819e-05, "loss": 3.7729, "step": 2035 }, { "epoch": 0.13860578883000407, "grad_norm": 2.0804662704467773, "learning_rate": 9.826997554015491e-05, "loss": 3.7198, "step": 2040 }, { "epoch": 0.13894550890066584, "grad_norm": 2.224865436553955, "learning_rate": 9.826572903927164e-05, "loss": 3.7894, "step": 2045 }, { "epoch": 0.1392852289713276, "grad_norm": 2.318671464920044, "learning_rate": 9.826148253838837e-05, "loss": 3.5705, "step": 2050 }, { "epoch": 0.1396249490419894, "grad_norm": 2.839855194091797, "learning_rate": 9.82572360375051e-05, "loss": 3.6663, "step": 2055 }, { "epoch": 0.13996466911265118, "grad_norm": 2.529090404510498, "learning_rate": 9.825298953662183e-05, "loss": 3.7397, "step": 2060 }, { "epoch": 0.14030438918331295, "grad_norm": 2.648512601852417, "learning_rate": 9.824874303573855e-05, "loss": 3.8091, "step": 2065 }, { "epoch": 0.14064410925397472, "grad_norm": 2.4608304500579834, "learning_rate": 9.824449653485528e-05, "loss": 3.5435, "step": 2070 }, { "epoch": 0.1409838293246365, "grad_norm": 2.5319597721099854, "learning_rate": 9.824025003397201e-05, "loss": 3.4596, "step": 2075 }, { "epoch": 0.1413235493952983, "grad_norm": 1.8423856496810913, "learning_rate": 9.823600353308874e-05, "loss": 3.7239, "step": 2080 }, { "epoch": 0.14166326946596006, "grad_norm": 2.655466079711914, "learning_rate": 9.823175703220547e-05, "loss": 3.7743, "step": 2085 }, { "epoch": 0.14200298953662183, "grad_norm": 2.5335872173309326, "learning_rate": 9.82275105313222e-05, "loss": 3.6003, "step": 2090 }, { "epoch": 0.1423427096072836, "grad_norm": 2.2621281147003174, "learning_rate": 9.822326403043892e-05, "loss": 3.6408, "step": 2095 }, { "epoch": 0.14268242967794537, "grad_norm": 2.2347781658172607, "learning_rate": 9.821901752955565e-05, "loss": 3.5515, "step": 2100 }, { "epoch": 0.14302214974860714, "grad_norm": 2.8384768962860107, "learning_rate": 9.821477102867238e-05, "loss": 3.6728, "step": 2105 }, { "epoch": 0.14336186981926893, "grad_norm": 2.3546082973480225, "learning_rate": 9.82105245277891e-05, "loss": 3.7157, "step": 2110 }, { "epoch": 0.1437015898899307, "grad_norm": 2.647495746612549, "learning_rate": 9.820627802690583e-05, "loss": 3.2504, "step": 2115 }, { "epoch": 0.14404130996059247, "grad_norm": 2.484992504119873, "learning_rate": 9.820203152602256e-05, "loss": 3.7337, "step": 2120 }, { "epoch": 0.14438103003125424, "grad_norm": 2.94370436668396, "learning_rate": 9.819778502513929e-05, "loss": 3.523, "step": 2125 }, { "epoch": 0.144720750101916, "grad_norm": 2.8493423461914062, "learning_rate": 9.819353852425602e-05, "loss": 3.5636, "step": 2130 }, { "epoch": 0.14506047017257778, "grad_norm": 2.9806811809539795, "learning_rate": 9.818929202337275e-05, "loss": 3.5644, "step": 2135 }, { "epoch": 0.14540019024323958, "grad_norm": 2.0938262939453125, "learning_rate": 9.818504552248947e-05, "loss": 3.5947, "step": 2140 }, { "epoch": 0.14573991031390135, "grad_norm": 12.675642967224121, "learning_rate": 9.81807990216062e-05, "loss": 3.6706, "step": 2145 }, { "epoch": 0.14607963038456312, "grad_norm": 2.495978355407715, "learning_rate": 9.817655252072293e-05, "loss": 3.47, "step": 2150 }, { "epoch": 0.1464193504552249, "grad_norm": 2.372793436050415, "learning_rate": 9.817230601983966e-05, "loss": 4.0237, "step": 2155 }, { "epoch": 0.14675907052588666, "grad_norm": 2.2783010005950928, "learning_rate": 9.816805951895639e-05, "loss": 3.6148, "step": 2160 }, { "epoch": 0.14709879059654846, "grad_norm": 2.8050243854522705, "learning_rate": 9.816381301807311e-05, "loss": 3.5389, "step": 2165 }, { "epoch": 0.14743851066721023, "grad_norm": 2.6023781299591064, "learning_rate": 9.815956651718984e-05, "loss": 3.4244, "step": 2170 }, { "epoch": 0.147778230737872, "grad_norm": 2.3167836666107178, "learning_rate": 9.815532001630657e-05, "loss": 3.7994, "step": 2175 }, { "epoch": 0.14811795080853377, "grad_norm": 2.3294200897216797, "learning_rate": 9.81510735154233e-05, "loss": 3.5152, "step": 2180 }, { "epoch": 0.14845767087919554, "grad_norm": 2.136742115020752, "learning_rate": 9.814682701454003e-05, "loss": 3.653, "step": 2185 }, { "epoch": 0.1487973909498573, "grad_norm": 2.252354860305786, "learning_rate": 9.814258051365675e-05, "loss": 3.6066, "step": 2190 }, { "epoch": 0.1491371110205191, "grad_norm": 2.5286755561828613, "learning_rate": 9.813833401277348e-05, "loss": 3.5312, "step": 2195 }, { "epoch": 0.14947683109118087, "grad_norm": 2.4802448749542236, "learning_rate": 9.813408751189021e-05, "loss": 3.6054, "step": 2200 }, { "epoch": 0.14981655116184264, "grad_norm": 2.19534969329834, "learning_rate": 9.812984101100694e-05, "loss": 3.5825, "step": 2205 }, { "epoch": 0.1501562712325044, "grad_norm": 2.286517858505249, "learning_rate": 9.812559451012367e-05, "loss": 3.7755, "step": 2210 }, { "epoch": 0.15049599130316618, "grad_norm": 2.3610992431640625, "learning_rate": 9.81213480092404e-05, "loss": 3.5939, "step": 2215 }, { "epoch": 0.15083571137382795, "grad_norm": 2.321992874145508, "learning_rate": 9.811710150835712e-05, "loss": 3.707, "step": 2220 }, { "epoch": 0.15117543144448975, "grad_norm": 2.371002435684204, "learning_rate": 9.811285500747385e-05, "loss": 3.7992, "step": 2225 }, { "epoch": 0.15151515151515152, "grad_norm": 2.4403250217437744, "learning_rate": 9.810860850659058e-05, "loss": 3.4019, "step": 2230 }, { "epoch": 0.1518548715858133, "grad_norm": 2.438884735107422, "learning_rate": 9.81043620057073e-05, "loss": 3.8167, "step": 2235 }, { "epoch": 0.15219459165647506, "grad_norm": 2.1409878730773926, "learning_rate": 9.810011550482403e-05, "loss": 3.7638, "step": 2240 }, { "epoch": 0.15253431172713683, "grad_norm": 1.9637494087219238, "learning_rate": 9.809586900394075e-05, "loss": 3.7475, "step": 2245 }, { "epoch": 0.15287403179779863, "grad_norm": 2.9680416584014893, "learning_rate": 9.809162250305749e-05, "loss": 3.3943, "step": 2250 }, { "epoch": 0.1532137518684604, "grad_norm": 2.462663412094116, "learning_rate": 9.808737600217422e-05, "loss": 3.5647, "step": 2255 }, { "epoch": 0.15355347193912217, "grad_norm": 2.943225145339966, "learning_rate": 9.808312950129093e-05, "loss": 3.8152, "step": 2260 }, { "epoch": 0.15389319200978394, "grad_norm": 2.1768798828125, "learning_rate": 9.807888300040767e-05, "loss": 3.6182, "step": 2265 }, { "epoch": 0.1542329120804457, "grad_norm": 2.3816909790039062, "learning_rate": 9.80746364995244e-05, "loss": 3.701, "step": 2270 }, { "epoch": 0.15457263215110748, "grad_norm": 3.084084987640381, "learning_rate": 9.807038999864112e-05, "loss": 3.4905, "step": 2275 }, { "epoch": 0.15491235222176927, "grad_norm": 2.858288049697876, "learning_rate": 9.806614349775786e-05, "loss": 3.4518, "step": 2280 }, { "epoch": 0.15525207229243104, "grad_norm": 2.808328866958618, "learning_rate": 9.806189699687459e-05, "loss": 3.5431, "step": 2285 }, { "epoch": 0.1555917923630928, "grad_norm": 2.4387338161468506, "learning_rate": 9.80576504959913e-05, "loss": 3.5698, "step": 2290 }, { "epoch": 0.15593151243375458, "grad_norm": 3.9449453353881836, "learning_rate": 9.805340399510804e-05, "loss": 3.5102, "step": 2295 }, { "epoch": 0.15627123250441635, "grad_norm": 3.0978400707244873, "learning_rate": 9.804915749422477e-05, "loss": 3.6287, "step": 2300 }, { "epoch": 0.15661095257507815, "grad_norm": 2.203216552734375, "learning_rate": 9.804491099334148e-05, "loss": 3.7373, "step": 2305 }, { "epoch": 0.15695067264573992, "grad_norm": 2.424668312072754, "learning_rate": 9.804066449245823e-05, "loss": 3.4089, "step": 2310 }, { "epoch": 0.1572903927164017, "grad_norm": 2.095552921295166, "learning_rate": 9.803641799157494e-05, "loss": 3.6822, "step": 2315 }, { "epoch": 0.15763011278706346, "grad_norm": 2.480710983276367, "learning_rate": 9.803217149069167e-05, "loss": 3.6402, "step": 2320 }, { "epoch": 0.15796983285772523, "grad_norm": 2.6255786418914795, "learning_rate": 9.802792498980841e-05, "loss": 3.8064, "step": 2325 }, { "epoch": 0.158309552928387, "grad_norm": 2.1788408756256104, "learning_rate": 9.802367848892512e-05, "loss": 3.5313, "step": 2330 }, { "epoch": 0.1586492729990488, "grad_norm": 2.2660152912139893, "learning_rate": 9.801943198804185e-05, "loss": 3.6233, "step": 2335 }, { "epoch": 0.15898899306971057, "grad_norm": 1.9752708673477173, "learning_rate": 9.80151854871586e-05, "loss": 3.6753, "step": 2340 }, { "epoch": 0.15932871314037234, "grad_norm": 2.174795150756836, "learning_rate": 9.801093898627531e-05, "loss": 3.6857, "step": 2345 }, { "epoch": 0.1596684332110341, "grad_norm": 2.283968448638916, "learning_rate": 9.800669248539204e-05, "loss": 3.8543, "step": 2350 }, { "epoch": 0.16000815328169588, "grad_norm": 2.2027220726013184, "learning_rate": 9.800244598450878e-05, "loss": 3.6323, "step": 2355 }, { "epoch": 0.16034787335235764, "grad_norm": 2.3527281284332275, "learning_rate": 9.799819948362549e-05, "loss": 3.6871, "step": 2360 }, { "epoch": 0.16068759342301944, "grad_norm": 2.2693097591400146, "learning_rate": 9.799395298274222e-05, "loss": 3.4205, "step": 2365 }, { "epoch": 0.1610273134936812, "grad_norm": 2.471632242202759, "learning_rate": 9.798970648185896e-05, "loss": 3.503, "step": 2370 }, { "epoch": 0.16136703356434298, "grad_norm": 2.77603816986084, "learning_rate": 9.798545998097568e-05, "loss": 3.6174, "step": 2375 }, { "epoch": 0.16170675363500475, "grad_norm": 2.966644048690796, "learning_rate": 9.79812134800924e-05, "loss": 3.408, "step": 2380 }, { "epoch": 0.16204647370566652, "grad_norm": 2.5603301525115967, "learning_rate": 9.797696697920913e-05, "loss": 3.5237, "step": 2385 }, { "epoch": 0.16238619377632832, "grad_norm": 2.822503089904785, "learning_rate": 9.797272047832586e-05, "loss": 3.6871, "step": 2390 }, { "epoch": 0.1627259138469901, "grad_norm": 3.1340339183807373, "learning_rate": 9.796847397744259e-05, "loss": 3.4174, "step": 2395 }, { "epoch": 0.16306563391765186, "grad_norm": 2.0358223915100098, "learning_rate": 9.796422747655932e-05, "loss": 3.6311, "step": 2400 }, { "epoch": 0.16340535398831363, "grad_norm": 2.0518910884857178, "learning_rate": 9.795998097567604e-05, "loss": 3.3313, "step": 2405 }, { "epoch": 0.1637450740589754, "grad_norm": 2.1445281505584717, "learning_rate": 9.795573447479277e-05, "loss": 3.7508, "step": 2410 }, { "epoch": 0.16408479412963717, "grad_norm": 2.2487223148345947, "learning_rate": 9.79514879739095e-05, "loss": 3.8102, "step": 2415 }, { "epoch": 0.16442451420029897, "grad_norm": 2.8430604934692383, "learning_rate": 9.794724147302623e-05, "loss": 3.591, "step": 2420 }, { "epoch": 0.16476423427096074, "grad_norm": 2.6248815059661865, "learning_rate": 9.794299497214296e-05, "loss": 3.6117, "step": 2425 }, { "epoch": 0.1651039543416225, "grad_norm": 2.956120729446411, "learning_rate": 9.793874847125968e-05, "loss": 3.6201, "step": 2430 }, { "epoch": 0.16544367441228428, "grad_norm": 2.6630165576934814, "learning_rate": 9.793450197037641e-05, "loss": 3.7398, "step": 2435 }, { "epoch": 0.16578339448294604, "grad_norm": 2.3322465419769287, "learning_rate": 9.793025546949314e-05, "loss": 3.6164, "step": 2440 }, { "epoch": 0.16612311455360781, "grad_norm": 2.939293146133423, "learning_rate": 9.792600896860987e-05, "loss": 3.5847, "step": 2445 }, { "epoch": 0.1664628346242696, "grad_norm": 2.323683738708496, "learning_rate": 9.79217624677266e-05, "loss": 3.4673, "step": 2450 }, { "epoch": 0.16680255469493138, "grad_norm": 3.1200194358825684, "learning_rate": 9.791751596684332e-05, "loss": 3.5641, "step": 2455 }, { "epoch": 0.16714227476559315, "grad_norm": 2.71930193901062, "learning_rate": 9.791326946596005e-05, "loss": 3.6519, "step": 2460 }, { "epoch": 0.16748199483625492, "grad_norm": 2.446498155593872, "learning_rate": 9.790902296507678e-05, "loss": 3.5345, "step": 2465 }, { "epoch": 0.1678217149069167, "grad_norm": 2.8127427101135254, "learning_rate": 9.790477646419351e-05, "loss": 3.7365, "step": 2470 }, { "epoch": 0.1681614349775785, "grad_norm": 2.792457103729248, "learning_rate": 9.790052996331024e-05, "loss": 3.7071, "step": 2475 }, { "epoch": 0.16850115504824026, "grad_norm": 2.2959587574005127, "learning_rate": 9.789628346242696e-05, "loss": 3.5937, "step": 2480 }, { "epoch": 0.16884087511890203, "grad_norm": 2.0903351306915283, "learning_rate": 9.789203696154369e-05, "loss": 3.7096, "step": 2485 }, { "epoch": 0.1691805951895638, "grad_norm": 1.8774720430374146, "learning_rate": 9.788779046066042e-05, "loss": 3.6802, "step": 2490 }, { "epoch": 0.16952031526022557, "grad_norm": 2.540419816970825, "learning_rate": 9.788354395977715e-05, "loss": 3.6033, "step": 2495 }, { "epoch": 0.16986003533088734, "grad_norm": 2.4474573135375977, "learning_rate": 9.787929745889388e-05, "loss": 3.7732, "step": 2500 }, { "epoch": 0.17019975540154914, "grad_norm": 2.218492269515991, "learning_rate": 9.78750509580106e-05, "loss": 3.8521, "step": 2505 }, { "epoch": 0.1705394754722109, "grad_norm": 2.1093363761901855, "learning_rate": 9.787080445712733e-05, "loss": 3.8351, "step": 2510 }, { "epoch": 0.17087919554287267, "grad_norm": 1.7610983848571777, "learning_rate": 9.786655795624406e-05, "loss": 3.5571, "step": 2515 }, { "epoch": 0.17121891561353444, "grad_norm": 2.0385890007019043, "learning_rate": 9.786231145536079e-05, "loss": 3.8087, "step": 2520 }, { "epoch": 0.17155863568419621, "grad_norm": 1.9753596782684326, "learning_rate": 9.785806495447752e-05, "loss": 3.7648, "step": 2525 }, { "epoch": 0.17189835575485798, "grad_norm": 2.7233643531799316, "learning_rate": 9.785381845359425e-05, "loss": 3.6077, "step": 2530 }, { "epoch": 0.17223807582551978, "grad_norm": 2.5640554428100586, "learning_rate": 9.784957195271097e-05, "loss": 3.8141, "step": 2535 }, { "epoch": 0.17257779589618155, "grad_norm": 2.5163915157318115, "learning_rate": 9.78453254518277e-05, "loss": 3.49, "step": 2540 }, { "epoch": 0.17291751596684332, "grad_norm": 2.22219181060791, "learning_rate": 9.784107895094443e-05, "loss": 3.5199, "step": 2545 }, { "epoch": 0.1732572360375051, "grad_norm": 2.1310431957244873, "learning_rate": 9.783683245006116e-05, "loss": 3.7165, "step": 2550 }, { "epoch": 0.17359695610816686, "grad_norm": 2.130831480026245, "learning_rate": 9.783258594917789e-05, "loss": 3.471, "step": 2555 }, { "epoch": 0.17393667617882866, "grad_norm": 3.715970754623413, "learning_rate": 9.782833944829461e-05, "loss": 3.4676, "step": 2560 }, { "epoch": 0.17427639624949043, "grad_norm": 3.315221071243286, "learning_rate": 9.782409294741134e-05, "loss": 3.5581, "step": 2565 }, { "epoch": 0.1746161163201522, "grad_norm": 3.4642419815063477, "learning_rate": 9.781984644652807e-05, "loss": 3.6419, "step": 2570 }, { "epoch": 0.17495583639081397, "grad_norm": 3.6717991828918457, "learning_rate": 9.78155999456448e-05, "loss": 3.7798, "step": 2575 }, { "epoch": 0.17529555646147574, "grad_norm": 2.8997764587402344, "learning_rate": 9.781135344476153e-05, "loss": 3.7123, "step": 2580 }, { "epoch": 0.1756352765321375, "grad_norm": 2.0655264854431152, "learning_rate": 9.780710694387824e-05, "loss": 3.5152, "step": 2585 }, { "epoch": 0.1759749966027993, "grad_norm": 2.969449520111084, "learning_rate": 9.780286044299498e-05, "loss": 3.4419, "step": 2590 }, { "epoch": 0.17631471667346107, "grad_norm": 2.859215259552002, "learning_rate": 9.779861394211171e-05, "loss": 3.5513, "step": 2595 }, { "epoch": 0.17665443674412284, "grad_norm": 2.473663806915283, "learning_rate": 9.779436744122842e-05, "loss": 3.4312, "step": 2600 }, { "epoch": 0.17699415681478461, "grad_norm": 3.0627822875976562, "learning_rate": 9.779012094034517e-05, "loss": 3.8827, "step": 2605 }, { "epoch": 0.17733387688544638, "grad_norm": 3.38277530670166, "learning_rate": 9.778587443946189e-05, "loss": 3.6351, "step": 2610 }, { "epoch": 0.17767359695610815, "grad_norm": 2.0422134399414062, "learning_rate": 9.778162793857861e-05, "loss": 3.3612, "step": 2615 }, { "epoch": 0.17801331702676995, "grad_norm": 2.3042876720428467, "learning_rate": 9.777738143769535e-05, "loss": 3.5923, "step": 2620 }, { "epoch": 0.17835303709743172, "grad_norm": 2.577887773513794, "learning_rate": 9.777313493681208e-05, "loss": 3.4756, "step": 2625 }, { "epoch": 0.1786927571680935, "grad_norm": 2.7902963161468506, "learning_rate": 9.776888843592879e-05, "loss": 3.4211, "step": 2630 }, { "epoch": 0.17903247723875526, "grad_norm": 2.413365125656128, "learning_rate": 9.776464193504553e-05, "loss": 3.5782, "step": 2635 }, { "epoch": 0.17937219730941703, "grad_norm": 2.4663658142089844, "learning_rate": 9.776039543416226e-05, "loss": 3.7296, "step": 2640 }, { "epoch": 0.17971191738007883, "grad_norm": 2.2067759037017822, "learning_rate": 9.775614893327898e-05, "loss": 3.5974, "step": 2645 }, { "epoch": 0.1800516374507406, "grad_norm": 2.559173822402954, "learning_rate": 9.775190243239572e-05, "loss": 3.778, "step": 2650 }, { "epoch": 0.18039135752140237, "grad_norm": 3.064662456512451, "learning_rate": 9.774765593151245e-05, "loss": 3.6979, "step": 2655 }, { "epoch": 0.18073107759206414, "grad_norm": 1.996138095855713, "learning_rate": 9.774340943062916e-05, "loss": 3.4503, "step": 2660 }, { "epoch": 0.1810707976627259, "grad_norm": 2.4670746326446533, "learning_rate": 9.77391629297459e-05, "loss": 3.5571, "step": 2665 }, { "epoch": 0.18141051773338768, "grad_norm": 3.6713316440582275, "learning_rate": 9.773491642886262e-05, "loss": 3.5245, "step": 2670 }, { "epoch": 0.18175023780404947, "grad_norm": 3.433065414428711, "learning_rate": 9.773066992797934e-05, "loss": 3.4949, "step": 2675 }, { "epoch": 0.18208995787471124, "grad_norm": 2.7018165588378906, "learning_rate": 9.772642342709609e-05, "loss": 3.7476, "step": 2680 }, { "epoch": 0.18242967794537301, "grad_norm": 1.8158012628555298, "learning_rate": 9.77221769262128e-05, "loss": 3.5977, "step": 2685 }, { "epoch": 0.18276939801603478, "grad_norm": 2.106041431427002, "learning_rate": 9.771793042532953e-05, "loss": 3.6258, "step": 2690 }, { "epoch": 0.18310911808669655, "grad_norm": 2.2543394565582275, "learning_rate": 9.771368392444627e-05, "loss": 3.2172, "step": 2695 }, { "epoch": 0.18344883815735832, "grad_norm": 2.4051880836486816, "learning_rate": 9.770943742356298e-05, "loss": 3.7232, "step": 2700 }, { "epoch": 0.18378855822802012, "grad_norm": 2.4393973350524902, "learning_rate": 9.770519092267971e-05, "loss": 3.4493, "step": 2705 }, { "epoch": 0.1841282782986819, "grad_norm": 2.4562902450561523, "learning_rate": 9.770094442179645e-05, "loss": 3.4672, "step": 2710 }, { "epoch": 0.18446799836934366, "grad_norm": 2.406501531600952, "learning_rate": 9.769669792091317e-05, "loss": 3.633, "step": 2715 }, { "epoch": 0.18480771844000543, "grad_norm": 2.541698932647705, "learning_rate": 9.76924514200299e-05, "loss": 3.6351, "step": 2720 }, { "epoch": 0.1851474385106672, "grad_norm": 2.210376739501953, "learning_rate": 9.768820491914664e-05, "loss": 3.7154, "step": 2725 }, { "epoch": 0.185487158581329, "grad_norm": 2.403480052947998, "learning_rate": 9.768395841826335e-05, "loss": 3.8181, "step": 2730 }, { "epoch": 0.18582687865199077, "grad_norm": 2.3813345432281494, "learning_rate": 9.767971191738008e-05, "loss": 3.639, "step": 2735 }, { "epoch": 0.18616659872265254, "grad_norm": 1.8875386714935303, "learning_rate": 9.767546541649681e-05, "loss": 3.44, "step": 2740 }, { "epoch": 0.1865063187933143, "grad_norm": 9.793167114257812, "learning_rate": 9.767121891561354e-05, "loss": 3.4686, "step": 2745 }, { "epoch": 0.18684603886397608, "grad_norm": 2.6854958534240723, "learning_rate": 9.766697241473026e-05, "loss": 3.6919, "step": 2750 }, { "epoch": 0.18718575893463785, "grad_norm": 2.650813579559326, "learning_rate": 9.766272591384699e-05, "loss": 3.8755, "step": 2755 }, { "epoch": 0.18752547900529964, "grad_norm": 2.170989751815796, "learning_rate": 9.765847941296372e-05, "loss": 3.6333, "step": 2760 }, { "epoch": 0.18786519907596141, "grad_norm": 1.9350907802581787, "learning_rate": 9.765423291208045e-05, "loss": 3.7114, "step": 2765 }, { "epoch": 0.18820491914662318, "grad_norm": 1.8634272813796997, "learning_rate": 9.764998641119718e-05, "loss": 3.6276, "step": 2770 }, { "epoch": 0.18854463921728495, "grad_norm": 2.8237640857696533, "learning_rate": 9.76457399103139e-05, "loss": 3.8684, "step": 2775 }, { "epoch": 0.18888435928794672, "grad_norm": 2.0793941020965576, "learning_rate": 9.764149340943063e-05, "loss": 3.6411, "step": 2780 }, { "epoch": 0.1892240793586085, "grad_norm": 2.297959089279175, "learning_rate": 9.763724690854736e-05, "loss": 3.3032, "step": 2785 }, { "epoch": 0.1895637994292703, "grad_norm": 3.117781639099121, "learning_rate": 9.763300040766409e-05, "loss": 3.6391, "step": 2790 }, { "epoch": 0.18990351949993206, "grad_norm": 2.892871141433716, "learning_rate": 9.762875390678082e-05, "loss": 3.4297, "step": 2795 }, { "epoch": 0.19024323957059383, "grad_norm": 2.8412797451019287, "learning_rate": 9.762450740589754e-05, "loss": 3.5017, "step": 2800 }, { "epoch": 0.1905829596412556, "grad_norm": 2.782804489135742, "learning_rate": 9.762026090501427e-05, "loss": 3.5181, "step": 2805 }, { "epoch": 0.19092267971191737, "grad_norm": 2.209777355194092, "learning_rate": 9.7616014404131e-05, "loss": 3.2019, "step": 2810 }, { "epoch": 0.19126239978257917, "grad_norm": 3.334501028060913, "learning_rate": 9.761176790324773e-05, "loss": 3.582, "step": 2815 }, { "epoch": 0.19160211985324094, "grad_norm": 3.1110005378723145, "learning_rate": 9.760752140236446e-05, "loss": 3.7484, "step": 2820 }, { "epoch": 0.1919418399239027, "grad_norm": 1.786243200302124, "learning_rate": 9.760327490148118e-05, "loss": 3.4655, "step": 2825 }, { "epoch": 0.19228155999456448, "grad_norm": 2.2295684814453125, "learning_rate": 9.759902840059791e-05, "loss": 3.6777, "step": 2830 }, { "epoch": 0.19262128006522625, "grad_norm": 2.8025248050689697, "learning_rate": 9.759478189971464e-05, "loss": 3.6121, "step": 2835 }, { "epoch": 0.19296100013588802, "grad_norm": 2.6978213787078857, "learning_rate": 9.759053539883137e-05, "loss": 3.5891, "step": 2840 }, { "epoch": 0.19330072020654981, "grad_norm": 1.7959308624267578, "learning_rate": 9.75862888979481e-05, "loss": 3.6874, "step": 2845 }, { "epoch": 0.19364044027721158, "grad_norm": 2.0286197662353516, "learning_rate": 9.758204239706482e-05, "loss": 3.5829, "step": 2850 }, { "epoch": 0.19398016034787335, "grad_norm": 2.8474998474121094, "learning_rate": 9.757779589618155e-05, "loss": 3.6931, "step": 2855 }, { "epoch": 0.19431988041853512, "grad_norm": 2.3275139331817627, "learning_rate": 9.757354939529828e-05, "loss": 3.5373, "step": 2860 }, { "epoch": 0.1946596004891969, "grad_norm": 2.9940762519836426, "learning_rate": 9.756930289441501e-05, "loss": 3.7331, "step": 2865 }, { "epoch": 0.19499932055985866, "grad_norm": 2.6775050163269043, "learning_rate": 9.756505639353174e-05, "loss": 3.4479, "step": 2870 }, { "epoch": 0.19533904063052046, "grad_norm": 2.838517904281616, "learning_rate": 9.756080989264846e-05, "loss": 3.5264, "step": 2875 }, { "epoch": 0.19567876070118223, "grad_norm": 2.5054478645324707, "learning_rate": 9.755656339176519e-05, "loss": 3.6542, "step": 2880 }, { "epoch": 0.196018480771844, "grad_norm": 2.3794655799865723, "learning_rate": 9.755231689088192e-05, "loss": 3.5861, "step": 2885 }, { "epoch": 0.19635820084250577, "grad_norm": 2.6987698078155518, "learning_rate": 9.754807038999865e-05, "loss": 3.1002, "step": 2890 }, { "epoch": 0.19669792091316754, "grad_norm": 2.576537847518921, "learning_rate": 9.754382388911538e-05, "loss": 3.2865, "step": 2895 }, { "epoch": 0.19703764098382934, "grad_norm": 2.7286622524261475, "learning_rate": 9.75395773882321e-05, "loss": 3.6333, "step": 2900 }, { "epoch": 0.1973773610544911, "grad_norm": 2.4445180892944336, "learning_rate": 9.753533088734883e-05, "loss": 3.3457, "step": 2905 }, { "epoch": 0.19771708112515288, "grad_norm": 2.815880298614502, "learning_rate": 9.753108438646556e-05, "loss": 3.4336, "step": 2910 }, { "epoch": 0.19805680119581465, "grad_norm": 2.600489854812622, "learning_rate": 9.752683788558229e-05, "loss": 3.5123, "step": 2915 }, { "epoch": 0.19839652126647642, "grad_norm": 2.266153573989868, "learning_rate": 9.752259138469902e-05, "loss": 3.5787, "step": 2920 }, { "epoch": 0.1987362413371382, "grad_norm": 2.3167531490325928, "learning_rate": 9.751834488381574e-05, "loss": 3.5104, "step": 2925 }, { "epoch": 0.19907596140779998, "grad_norm": 2.650376319885254, "learning_rate": 9.751409838293247e-05, "loss": 3.3488, "step": 2930 }, { "epoch": 0.19941568147846175, "grad_norm": 2.216982126235962, "learning_rate": 9.75098518820492e-05, "loss": 3.7367, "step": 2935 }, { "epoch": 0.19975540154912352, "grad_norm": 3.7843616008758545, "learning_rate": 9.750560538116591e-05, "loss": 3.3857, "step": 2940 }, { "epoch": 0.2000951216197853, "grad_norm": 2.531493902206421, "learning_rate": 9.750135888028266e-05, "loss": 3.57, "step": 2945 }, { "epoch": 0.20043484169044706, "grad_norm": 2.5871853828430176, "learning_rate": 9.749711237939938e-05, "loss": 3.7574, "step": 2950 }, { "epoch": 0.20077456176110883, "grad_norm": 1.9841268062591553, "learning_rate": 9.74928658785161e-05, "loss": 3.8677, "step": 2955 }, { "epoch": 0.20111428183177063, "grad_norm": 2.5171425342559814, "learning_rate": 9.748861937763284e-05, "loss": 3.7339, "step": 2960 }, { "epoch": 0.2014540019024324, "grad_norm": 1.8714319467544556, "learning_rate": 9.748437287674957e-05, "loss": 3.47, "step": 2965 }, { "epoch": 0.20179372197309417, "grad_norm": 2.2297403812408447, "learning_rate": 9.748012637586628e-05, "loss": 3.5625, "step": 2970 }, { "epoch": 0.20213344204375594, "grad_norm": 2.2692747116088867, "learning_rate": 9.747587987498302e-05, "loss": 3.4923, "step": 2975 }, { "epoch": 0.2024731621144177, "grad_norm": 2.2953994274139404, "learning_rate": 9.747163337409975e-05, "loss": 3.5081, "step": 2980 }, { "epoch": 0.2028128821850795, "grad_norm": 2.5327563285827637, "learning_rate": 9.746738687321647e-05, "loss": 3.475, "step": 2985 }, { "epoch": 0.20315260225574128, "grad_norm": 1.9328463077545166, "learning_rate": 9.746314037233321e-05, "loss": 3.6246, "step": 2990 }, { "epoch": 0.20349232232640305, "grad_norm": 3.280945062637329, "learning_rate": 9.745889387144994e-05, "loss": 3.6621, "step": 2995 }, { "epoch": 0.20383204239706482, "grad_norm": 2.1963016986846924, "learning_rate": 9.745464737056665e-05, "loss": 3.5739, "step": 3000 }, { "epoch": 0.2041717624677266, "grad_norm": 3.0117032527923584, "learning_rate": 9.745040086968339e-05, "loss": 3.543, "step": 3005 }, { "epoch": 0.20451148253838836, "grad_norm": 2.1569318771362305, "learning_rate": 9.74461543688001e-05, "loss": 3.5851, "step": 3010 }, { "epoch": 0.20485120260905015, "grad_norm": 2.2693192958831787, "learning_rate": 9.744190786791683e-05, "loss": 3.7163, "step": 3015 }, { "epoch": 0.20519092267971192, "grad_norm": 2.4745140075683594, "learning_rate": 9.743766136703358e-05, "loss": 3.6156, "step": 3020 }, { "epoch": 0.2055306427503737, "grad_norm": 2.7690165042877197, "learning_rate": 9.743341486615029e-05, "loss": 3.6366, "step": 3025 }, { "epoch": 0.20587036282103546, "grad_norm": 2.6442840099334717, "learning_rate": 9.742916836526702e-05, "loss": 3.5539, "step": 3030 }, { "epoch": 0.20621008289169723, "grad_norm": 2.116943359375, "learning_rate": 9.742492186438376e-05, "loss": 3.5905, "step": 3035 }, { "epoch": 0.206549802962359, "grad_norm": 2.459552526473999, "learning_rate": 9.742067536350047e-05, "loss": 3.6107, "step": 3040 }, { "epoch": 0.2068895230330208, "grad_norm": 2.3407833576202393, "learning_rate": 9.74164288626172e-05, "loss": 3.4376, "step": 3045 }, { "epoch": 0.20722924310368257, "grad_norm": 2.142601490020752, "learning_rate": 9.741218236173394e-05, "loss": 3.5207, "step": 3050 }, { "epoch": 0.20756896317434434, "grad_norm": 2.957223892211914, "learning_rate": 9.740793586085066e-05, "loss": 3.4058, "step": 3055 }, { "epoch": 0.2079086832450061, "grad_norm": 3.0165371894836426, "learning_rate": 9.740368935996739e-05, "loss": 3.6678, "step": 3060 }, { "epoch": 0.20824840331566788, "grad_norm": 1.903260588645935, "learning_rate": 9.739944285908413e-05, "loss": 3.3852, "step": 3065 }, { "epoch": 0.20858812338632968, "grad_norm": 2.46872878074646, "learning_rate": 9.739519635820084e-05, "loss": 3.6929, "step": 3070 }, { "epoch": 0.20892784345699145, "grad_norm": 1.9820877313613892, "learning_rate": 9.739094985731757e-05, "loss": 3.4162, "step": 3075 }, { "epoch": 0.20926756352765322, "grad_norm": 2.5532376766204834, "learning_rate": 9.738670335643431e-05, "loss": 3.3846, "step": 3080 }, { "epoch": 0.209607283598315, "grad_norm": 2.64316463470459, "learning_rate": 9.738245685555103e-05, "loss": 3.8069, "step": 3085 }, { "epoch": 0.20994700366897676, "grad_norm": 2.5200743675231934, "learning_rate": 9.737821035466775e-05, "loss": 3.5191, "step": 3090 }, { "epoch": 0.21028672373963853, "grad_norm": 2.303730010986328, "learning_rate": 9.737396385378448e-05, "loss": 3.6871, "step": 3095 }, { "epoch": 0.21062644381030032, "grad_norm": 2.920701503753662, "learning_rate": 9.736971735290121e-05, "loss": 3.8136, "step": 3100 }, { "epoch": 0.2109661638809621, "grad_norm": 2.594109296798706, "learning_rate": 9.736547085201794e-05, "loss": 3.5586, "step": 3105 }, { "epoch": 0.21130588395162386, "grad_norm": 2.717984437942505, "learning_rate": 9.736122435113467e-05, "loss": 3.766, "step": 3110 }, { "epoch": 0.21164560402228563, "grad_norm": 2.254509210586548, "learning_rate": 9.73569778502514e-05, "loss": 3.6191, "step": 3115 }, { "epoch": 0.2119853240929474, "grad_norm": 3.8217010498046875, "learning_rate": 9.735273134936812e-05, "loss": 3.615, "step": 3120 }, { "epoch": 0.21232504416360917, "grad_norm": 2.576765537261963, "learning_rate": 9.734848484848485e-05, "loss": 3.3398, "step": 3125 }, { "epoch": 0.21266476423427097, "grad_norm": 3.2126941680908203, "learning_rate": 9.734423834760158e-05, "loss": 3.7622, "step": 3130 }, { "epoch": 0.21300448430493274, "grad_norm": 2.3438591957092285, "learning_rate": 9.733999184671831e-05, "loss": 3.4653, "step": 3135 }, { "epoch": 0.2133442043755945, "grad_norm": 1.928268551826477, "learning_rate": 9.733574534583503e-05, "loss": 3.4275, "step": 3140 }, { "epoch": 0.21368392444625628, "grad_norm": 2.6850974559783936, "learning_rate": 9.733149884495176e-05, "loss": 3.6838, "step": 3145 }, { "epoch": 0.21402364451691805, "grad_norm": 2.4214115142822266, "learning_rate": 9.732725234406849e-05, "loss": 3.2707, "step": 3150 }, { "epoch": 0.21436336458757985, "grad_norm": 3.014404535293579, "learning_rate": 9.732300584318522e-05, "loss": 3.5808, "step": 3155 }, { "epoch": 0.21470308465824162, "grad_norm": 2.328543186187744, "learning_rate": 9.731875934230195e-05, "loss": 3.5542, "step": 3160 }, { "epoch": 0.2150428047289034, "grad_norm": 1.7934205532073975, "learning_rate": 9.731451284141867e-05, "loss": 3.6418, "step": 3165 }, { "epoch": 0.21538252479956516, "grad_norm": 3.1732406616210938, "learning_rate": 9.73102663405354e-05, "loss": 3.3123, "step": 3170 }, { "epoch": 0.21572224487022693, "grad_norm": 2.4772515296936035, "learning_rate": 9.730601983965213e-05, "loss": 3.5331, "step": 3175 }, { "epoch": 0.2160619649408887, "grad_norm": 2.125526189804077, "learning_rate": 9.730177333876886e-05, "loss": 3.6172, "step": 3180 }, { "epoch": 0.2164016850115505, "grad_norm": 2.6105737686157227, "learning_rate": 9.729752683788559e-05, "loss": 3.6548, "step": 3185 }, { "epoch": 0.21674140508221226, "grad_norm": 2.7148489952087402, "learning_rate": 9.729328033700231e-05, "loss": 3.8312, "step": 3190 }, { "epoch": 0.21708112515287403, "grad_norm": 2.44311785697937, "learning_rate": 9.728903383611904e-05, "loss": 3.5378, "step": 3195 }, { "epoch": 0.2174208452235358, "grad_norm": 2.3602492809295654, "learning_rate": 9.728478733523577e-05, "loss": 3.7604, "step": 3200 }, { "epoch": 0.21776056529419757, "grad_norm": 2.6540510654449463, "learning_rate": 9.72805408343525e-05, "loss": 3.7915, "step": 3205 }, { "epoch": 0.21810028536485934, "grad_norm": 2.384971857070923, "learning_rate": 9.727629433346923e-05, "loss": 3.7734, "step": 3210 }, { "epoch": 0.21844000543552114, "grad_norm": 2.243856191635132, "learning_rate": 9.727204783258595e-05, "loss": 3.6214, "step": 3215 }, { "epoch": 0.2187797255061829, "grad_norm": 2.1839208602905273, "learning_rate": 9.726780133170268e-05, "loss": 3.4916, "step": 3220 }, { "epoch": 0.21911944557684468, "grad_norm": 2.262728691101074, "learning_rate": 9.726355483081941e-05, "loss": 3.4205, "step": 3225 }, { "epoch": 0.21945916564750645, "grad_norm": 2.3842148780822754, "learning_rate": 9.725930832993614e-05, "loss": 3.6629, "step": 3230 }, { "epoch": 0.21979888571816822, "grad_norm": 2.1511716842651367, "learning_rate": 9.725506182905287e-05, "loss": 3.4792, "step": 3235 }, { "epoch": 0.22013860578883002, "grad_norm": 2.3301377296447754, "learning_rate": 9.72508153281696e-05, "loss": 3.5182, "step": 3240 }, { "epoch": 0.22047832585949179, "grad_norm": 2.0076563358306885, "learning_rate": 9.724656882728632e-05, "loss": 3.4083, "step": 3245 }, { "epoch": 0.22081804593015356, "grad_norm": 2.830758810043335, "learning_rate": 9.724232232640305e-05, "loss": 3.5786, "step": 3250 }, { "epoch": 0.22115776600081533, "grad_norm": 2.079038381576538, "learning_rate": 9.723807582551978e-05, "loss": 3.4956, "step": 3255 }, { "epoch": 0.2214974860714771, "grad_norm": 2.3984270095825195, "learning_rate": 9.723382932463651e-05, "loss": 3.6339, "step": 3260 }, { "epoch": 0.22183720614213887, "grad_norm": 2.0549917221069336, "learning_rate": 9.722958282375324e-05, "loss": 3.619, "step": 3265 }, { "epoch": 0.22217692621280066, "grad_norm": 2.448378801345825, "learning_rate": 9.722533632286996e-05, "loss": 3.7021, "step": 3270 }, { "epoch": 0.22251664628346243, "grad_norm": 2.4262993335723877, "learning_rate": 9.722108982198669e-05, "loss": 3.5763, "step": 3275 }, { "epoch": 0.2228563663541242, "grad_norm": 2.3491122722625732, "learning_rate": 9.721684332110342e-05, "loss": 3.5363, "step": 3280 }, { "epoch": 0.22319608642478597, "grad_norm": 2.4455039501190186, "learning_rate": 9.721259682022015e-05, "loss": 3.8102, "step": 3285 }, { "epoch": 0.22353580649544774, "grad_norm": 3.75618052482605, "learning_rate": 9.720835031933688e-05, "loss": 3.5561, "step": 3290 }, { "epoch": 0.22387552656610954, "grad_norm": 4.276167869567871, "learning_rate": 9.720410381845359e-05, "loss": 3.2564, "step": 3295 }, { "epoch": 0.2242152466367713, "grad_norm": 2.3141064643859863, "learning_rate": 9.719985731757033e-05, "loss": 3.4724, "step": 3300 }, { "epoch": 0.22455496670743308, "grad_norm": 2.0347092151641846, "learning_rate": 9.719561081668706e-05, "loss": 3.7681, "step": 3305 }, { "epoch": 0.22489468677809485, "grad_norm": 3.3582491874694824, "learning_rate": 9.719136431580377e-05, "loss": 3.3226, "step": 3310 }, { "epoch": 0.22523440684875662, "grad_norm": 2.4000747203826904, "learning_rate": 9.718711781492052e-05, "loss": 3.3896, "step": 3315 }, { "epoch": 0.2255741269194184, "grad_norm": 2.094728469848633, "learning_rate": 9.718287131403724e-05, "loss": 3.4655, "step": 3320 }, { "epoch": 0.22591384699008019, "grad_norm": 2.277259349822998, "learning_rate": 9.717862481315396e-05, "loss": 3.5437, "step": 3325 }, { "epoch": 0.22625356706074196, "grad_norm": 2.1364047527313232, "learning_rate": 9.71743783122707e-05, "loss": 3.7251, "step": 3330 }, { "epoch": 0.22659328713140373, "grad_norm": 2.2038934230804443, "learning_rate": 9.717013181138743e-05, "loss": 3.4311, "step": 3335 }, { "epoch": 0.2269330072020655, "grad_norm": 3.897040605545044, "learning_rate": 9.716588531050414e-05, "loss": 3.2396, "step": 3340 }, { "epoch": 0.22727272727272727, "grad_norm": 2.321673631668091, "learning_rate": 9.716163880962088e-05, "loss": 3.5803, "step": 3345 }, { "epoch": 0.22761244734338903, "grad_norm": 2.327251434326172, "learning_rate": 9.715739230873761e-05, "loss": 3.6083, "step": 3350 }, { "epoch": 0.22795216741405083, "grad_norm": 2.3726818561553955, "learning_rate": 9.715314580785433e-05, "loss": 3.5619, "step": 3355 }, { "epoch": 0.2282918874847126, "grad_norm": 1.785314679145813, "learning_rate": 9.714889930697107e-05, "loss": 3.6039, "step": 3360 }, { "epoch": 0.22863160755537437, "grad_norm": 2.9010629653930664, "learning_rate": 9.714465280608778e-05, "loss": 3.7284, "step": 3365 }, { "epoch": 0.22897132762603614, "grad_norm": 2.874483823776245, "learning_rate": 9.714040630520451e-05, "loss": 3.6143, "step": 3370 }, { "epoch": 0.2293110476966979, "grad_norm": 2.348184823989868, "learning_rate": 9.713615980432125e-05, "loss": 3.6277, "step": 3375 }, { "epoch": 0.2296507677673597, "grad_norm": 2.8848817348480225, "learning_rate": 9.713191330343797e-05, "loss": 3.4631, "step": 3380 }, { "epoch": 0.22999048783802148, "grad_norm": 2.196012496948242, "learning_rate": 9.71276668025547e-05, "loss": 3.6293, "step": 3385 }, { "epoch": 0.23033020790868325, "grad_norm": 2.025202989578247, "learning_rate": 9.712342030167144e-05, "loss": 3.4078, "step": 3390 }, { "epoch": 0.23066992797934502, "grad_norm": 2.2144856452941895, "learning_rate": 9.711917380078815e-05, "loss": 3.656, "step": 3395 }, { "epoch": 0.2310096480500068, "grad_norm": 2.4922642707824707, "learning_rate": 9.711492729990488e-05, "loss": 3.4842, "step": 3400 }, { "epoch": 0.23134936812066856, "grad_norm": 2.624922037124634, "learning_rate": 9.711068079902162e-05, "loss": 3.6389, "step": 3405 }, { "epoch": 0.23168908819133036, "grad_norm": 2.332202196121216, "learning_rate": 9.710643429813833e-05, "loss": 3.5247, "step": 3410 }, { "epoch": 0.23202880826199213, "grad_norm": 2.6892404556274414, "learning_rate": 9.710218779725506e-05, "loss": 3.5611, "step": 3415 }, { "epoch": 0.2323685283326539, "grad_norm": 2.2102909088134766, "learning_rate": 9.70979412963718e-05, "loss": 3.759, "step": 3420 }, { "epoch": 0.23270824840331567, "grad_norm": 2.823338031768799, "learning_rate": 9.709369479548852e-05, "loss": 3.6453, "step": 3425 }, { "epoch": 0.23304796847397743, "grad_norm": 2.9724040031433105, "learning_rate": 9.708944829460525e-05, "loss": 3.6166, "step": 3430 }, { "epoch": 0.2333876885446392, "grad_norm": 1.7646759748458862, "learning_rate": 9.708520179372197e-05, "loss": 3.6706, "step": 3435 }, { "epoch": 0.233727408615301, "grad_norm": 3.241206169128418, "learning_rate": 9.70809552928387e-05, "loss": 3.3441, "step": 3440 }, { "epoch": 0.23406712868596277, "grad_norm": 3.5427963733673096, "learning_rate": 9.707670879195543e-05, "loss": 3.377, "step": 3445 }, { "epoch": 0.23440684875662454, "grad_norm": 2.484489917755127, "learning_rate": 9.707246229107216e-05, "loss": 3.555, "step": 3450 }, { "epoch": 0.2347465688272863, "grad_norm": 2.6747255325317383, "learning_rate": 9.706821579018889e-05, "loss": 3.5748, "step": 3455 }, { "epoch": 0.23508628889794808, "grad_norm": 2.3457493782043457, "learning_rate": 9.706396928930561e-05, "loss": 3.4077, "step": 3460 }, { "epoch": 0.23542600896860988, "grad_norm": 1.8754417896270752, "learning_rate": 9.705972278842234e-05, "loss": 3.6926, "step": 3465 }, { "epoch": 0.23576572903927165, "grad_norm": 2.3581864833831787, "learning_rate": 9.705547628753907e-05, "loss": 3.4543, "step": 3470 }, { "epoch": 0.23610544910993342, "grad_norm": 2.6379165649414062, "learning_rate": 9.70512297866558e-05, "loss": 3.4407, "step": 3475 }, { "epoch": 0.2364451691805952, "grad_norm": 3.1290669441223145, "learning_rate": 9.704698328577253e-05, "loss": 3.7414, "step": 3480 }, { "epoch": 0.23678488925125696, "grad_norm": 2.228219747543335, "learning_rate": 9.704273678488925e-05, "loss": 3.6329, "step": 3485 }, { "epoch": 0.23712460932191873, "grad_norm": 2.2058937549591064, "learning_rate": 9.703849028400598e-05, "loss": 3.3255, "step": 3490 }, { "epoch": 0.23746432939258053, "grad_norm": 2.2477197647094727, "learning_rate": 9.703424378312271e-05, "loss": 3.5478, "step": 3495 }, { "epoch": 0.2378040494632423, "grad_norm": 2.473595142364502, "learning_rate": 9.702999728223944e-05, "loss": 3.6665, "step": 3500 }, { "epoch": 0.23814376953390406, "grad_norm": 2.5324323177337646, "learning_rate": 9.702575078135617e-05, "loss": 3.6785, "step": 3505 }, { "epoch": 0.23848348960456583, "grad_norm": 2.982419013977051, "learning_rate": 9.70215042804729e-05, "loss": 3.472, "step": 3510 }, { "epoch": 0.2388232096752276, "grad_norm": 2.538529396057129, "learning_rate": 9.701725777958962e-05, "loss": 3.5603, "step": 3515 }, { "epoch": 0.23916292974588937, "grad_norm": 2.321845054626465, "learning_rate": 9.701301127870635e-05, "loss": 3.4214, "step": 3520 }, { "epoch": 0.23950264981655117, "grad_norm": 1.846472144126892, "learning_rate": 9.700876477782308e-05, "loss": 3.6138, "step": 3525 }, { "epoch": 0.23984236988721294, "grad_norm": 2.2306771278381348, "learning_rate": 9.70045182769398e-05, "loss": 3.5543, "step": 3530 }, { "epoch": 0.2401820899578747, "grad_norm": 2.332549571990967, "learning_rate": 9.700027177605653e-05, "loss": 3.2609, "step": 3535 }, { "epoch": 0.24052181002853648, "grad_norm": 2.479609966278076, "learning_rate": 9.699602527517326e-05, "loss": 3.5471, "step": 3540 }, { "epoch": 0.24086153009919825, "grad_norm": 2.5703837871551514, "learning_rate": 9.699177877428999e-05, "loss": 3.5975, "step": 3545 }, { "epoch": 0.24120125016986005, "grad_norm": 2.5452921390533447, "learning_rate": 9.698753227340672e-05, "loss": 3.7861, "step": 3550 }, { "epoch": 0.24154097024052182, "grad_norm": 2.0180318355560303, "learning_rate": 9.698328577252345e-05, "loss": 3.6417, "step": 3555 }, { "epoch": 0.2418806903111836, "grad_norm": 2.594252347946167, "learning_rate": 9.697903927164017e-05, "loss": 3.3996, "step": 3560 }, { "epoch": 0.24222041038184536, "grad_norm": 2.105578899383545, "learning_rate": 9.69747927707569e-05, "loss": 3.5252, "step": 3565 }, { "epoch": 0.24256013045250713, "grad_norm": 2.715123414993286, "learning_rate": 9.697054626987363e-05, "loss": 3.3542, "step": 3570 }, { "epoch": 0.2428998505231689, "grad_norm": 3.476891040802002, "learning_rate": 9.696629976899036e-05, "loss": 3.6821, "step": 3575 }, { "epoch": 0.2432395705938307, "grad_norm": 2.4146616458892822, "learning_rate": 9.696205326810709e-05, "loss": 3.6638, "step": 3580 }, { "epoch": 0.24357929066449246, "grad_norm": 2.325687885284424, "learning_rate": 9.695780676722381e-05, "loss": 3.6656, "step": 3585 }, { "epoch": 0.24391901073515423, "grad_norm": 2.509408712387085, "learning_rate": 9.695356026634054e-05, "loss": 3.3896, "step": 3590 }, { "epoch": 0.244258730805816, "grad_norm": 2.271212100982666, "learning_rate": 9.694931376545727e-05, "loss": 3.5217, "step": 3595 }, { "epoch": 0.24459845087647777, "grad_norm": 2.4985265731811523, "learning_rate": 9.6945067264574e-05, "loss": 3.483, "step": 3600 }, { "epoch": 0.24493817094713954, "grad_norm": 2.5406460762023926, "learning_rate": 9.694082076369073e-05, "loss": 3.4885, "step": 3605 }, { "epoch": 0.24527789101780134, "grad_norm": 2.8880388736724854, "learning_rate": 9.693657426280745e-05, "loss": 3.5721, "step": 3610 }, { "epoch": 0.2456176110884631, "grad_norm": 2.0021584033966064, "learning_rate": 9.693232776192418e-05, "loss": 3.7434, "step": 3615 }, { "epoch": 0.24595733115912488, "grad_norm": 2.264113664627075, "learning_rate": 9.692808126104091e-05, "loss": 3.3232, "step": 3620 }, { "epoch": 0.24629705122978665, "grad_norm": 2.6743524074554443, "learning_rate": 9.692383476015764e-05, "loss": 3.4665, "step": 3625 }, { "epoch": 0.24663677130044842, "grad_norm": 2.0711376667022705, "learning_rate": 9.691958825927437e-05, "loss": 3.4765, "step": 3630 }, { "epoch": 0.24697649137111022, "grad_norm": 2.702500820159912, "learning_rate": 9.691534175839108e-05, "loss": 3.723, "step": 3635 }, { "epoch": 0.247316211441772, "grad_norm": 2.507871627807617, "learning_rate": 9.691109525750782e-05, "loss": 3.7241, "step": 3640 }, { "epoch": 0.24765593151243376, "grad_norm": 2.5994646549224854, "learning_rate": 9.690684875662455e-05, "loss": 3.7523, "step": 3645 }, { "epoch": 0.24799565158309553, "grad_norm": 1.9709161520004272, "learning_rate": 9.690260225574126e-05, "loss": 3.53, "step": 3650 }, { "epoch": 0.2483353716537573, "grad_norm": 2.298203468322754, "learning_rate": 9.6898355754858e-05, "loss": 3.3325, "step": 3655 }, { "epoch": 0.24867509172441907, "grad_norm": 2.473635196685791, "learning_rate": 9.689410925397473e-05, "loss": 3.4705, "step": 3660 }, { "epoch": 0.24901481179508086, "grad_norm": 2.189612865447998, "learning_rate": 9.688986275309145e-05, "loss": 3.4748, "step": 3665 }, { "epoch": 0.24935453186574263, "grad_norm": 2.4042279720306396, "learning_rate": 9.688561625220819e-05, "loss": 3.5936, "step": 3670 }, { "epoch": 0.2496942519364044, "grad_norm": 3.3932950496673584, "learning_rate": 9.688136975132492e-05, "loss": 3.6917, "step": 3675 }, { "epoch": 0.2500339720070662, "grad_norm": 2.467170000076294, "learning_rate": 9.687712325044163e-05, "loss": 3.6054, "step": 3680 }, { "epoch": 0.25037369207772797, "grad_norm": 2.3353543281555176, "learning_rate": 9.687287674955837e-05, "loss": 3.4045, "step": 3685 }, { "epoch": 0.2507134121483897, "grad_norm": 2.071033477783203, "learning_rate": 9.68686302486751e-05, "loss": 3.46, "step": 3690 }, { "epoch": 0.2510531322190515, "grad_norm": 2.2171244621276855, "learning_rate": 9.686438374779182e-05, "loss": 3.637, "step": 3695 }, { "epoch": 0.25139285228971325, "grad_norm": 2.5467214584350586, "learning_rate": 9.686013724690856e-05, "loss": 3.6963, "step": 3700 }, { "epoch": 0.25173257236037505, "grad_norm": 2.5387585163116455, "learning_rate": 9.685589074602529e-05, "loss": 3.6495, "step": 3705 }, { "epoch": 0.25207229243103685, "grad_norm": 2.4366462230682373, "learning_rate": 9.6851644245142e-05, "loss": 3.5348, "step": 3710 }, { "epoch": 0.2524120125016986, "grad_norm": 3.1417269706726074, "learning_rate": 9.684739774425874e-05, "loss": 3.5507, "step": 3715 }, { "epoch": 0.2527517325723604, "grad_norm": 1.702452540397644, "learning_rate": 9.684315124337546e-05, "loss": 3.4484, "step": 3720 }, { "epoch": 0.25309145264302213, "grad_norm": 1.9584827423095703, "learning_rate": 9.683890474249218e-05, "loss": 3.6776, "step": 3725 }, { "epoch": 0.2534311727136839, "grad_norm": 2.385561943054199, "learning_rate": 9.683465824160893e-05, "loss": 3.701, "step": 3730 }, { "epoch": 0.2537708927843457, "grad_norm": 2.3099288940429688, "learning_rate": 9.683041174072564e-05, "loss": 3.5095, "step": 3735 }, { "epoch": 0.25411061285500747, "grad_norm": 2.428244113922119, "learning_rate": 9.682616523984237e-05, "loss": 3.6398, "step": 3740 }, { "epoch": 0.25445033292566926, "grad_norm": 3.5786471366882324, "learning_rate": 9.682191873895911e-05, "loss": 3.4363, "step": 3745 }, { "epoch": 0.254790052996331, "grad_norm": 1.8464620113372803, "learning_rate": 9.681767223807582e-05, "loss": 3.4019, "step": 3750 }, { "epoch": 0.2551297730669928, "grad_norm": 2.6754884719848633, "learning_rate": 9.681342573719255e-05, "loss": 3.6996, "step": 3755 }, { "epoch": 0.25546949313765455, "grad_norm": 2.255600690841675, "learning_rate": 9.68091792363093e-05, "loss": 3.5124, "step": 3760 }, { "epoch": 0.25580921320831634, "grad_norm": 1.8826730251312256, "learning_rate": 9.680493273542601e-05, "loss": 3.5347, "step": 3765 }, { "epoch": 0.25614893327897814, "grad_norm": 2.5889227390289307, "learning_rate": 9.680068623454274e-05, "loss": 3.5486, "step": 3770 }, { "epoch": 0.2564886533496399, "grad_norm": 2.1120548248291016, "learning_rate": 9.679643973365948e-05, "loss": 3.4781, "step": 3775 }, { "epoch": 0.2568283734203017, "grad_norm": 2.3759171962738037, "learning_rate": 9.679219323277619e-05, "loss": 3.6285, "step": 3780 }, { "epoch": 0.2571680934909634, "grad_norm": 2.5310583114624023, "learning_rate": 9.678794673189292e-05, "loss": 3.6126, "step": 3785 }, { "epoch": 0.2575078135616252, "grad_norm": 2.1929080486297607, "learning_rate": 9.678370023100965e-05, "loss": 3.6937, "step": 3790 }, { "epoch": 0.257847533632287, "grad_norm": 2.534999132156372, "learning_rate": 9.677945373012638e-05, "loss": 3.5688, "step": 3795 }, { "epoch": 0.25818725370294876, "grad_norm": 2.346290349960327, "learning_rate": 9.67752072292431e-05, "loss": 3.4974, "step": 3800 }, { "epoch": 0.25852697377361056, "grad_norm": 2.659761428833008, "learning_rate": 9.677096072835983e-05, "loss": 3.5312, "step": 3805 }, { "epoch": 0.2588666938442723, "grad_norm": 2.5047528743743896, "learning_rate": 9.676671422747656e-05, "loss": 3.6177, "step": 3810 }, { "epoch": 0.2592064139149341, "grad_norm": 2.0250766277313232, "learning_rate": 9.676246772659329e-05, "loss": 3.5748, "step": 3815 }, { "epoch": 0.2595461339855959, "grad_norm": 2.3063530921936035, "learning_rate": 9.675822122571002e-05, "loss": 3.3994, "step": 3820 }, { "epoch": 0.25988585405625764, "grad_norm": 2.4666457176208496, "learning_rate": 9.675397472482674e-05, "loss": 3.6596, "step": 3825 }, { "epoch": 0.26022557412691943, "grad_norm": 3.235499858856201, "learning_rate": 9.674972822394347e-05, "loss": 3.3593, "step": 3830 }, { "epoch": 0.2605652941975812, "grad_norm": 2.335772752761841, "learning_rate": 9.67454817230602e-05, "loss": 3.431, "step": 3835 }, { "epoch": 0.260905014268243, "grad_norm": 1.8728452920913696, "learning_rate": 9.674123522217693e-05, "loss": 3.5788, "step": 3840 }, { "epoch": 0.2612447343389047, "grad_norm": 2.2257189750671387, "learning_rate": 9.673698872129366e-05, "loss": 3.7165, "step": 3845 }, { "epoch": 0.2615844544095665, "grad_norm": 2.8375751972198486, "learning_rate": 9.673274222041038e-05, "loss": 3.6698, "step": 3850 }, { "epoch": 0.2619241744802283, "grad_norm": 2.3299713134765625, "learning_rate": 9.672849571952711e-05, "loss": 3.3994, "step": 3855 }, { "epoch": 0.26226389455089005, "grad_norm": 3.0395591259002686, "learning_rate": 9.672424921864384e-05, "loss": 3.6161, "step": 3860 }, { "epoch": 0.26260361462155185, "grad_norm": 2.087859630584717, "learning_rate": 9.672000271776057e-05, "loss": 3.4369, "step": 3865 }, { "epoch": 0.2629433346922136, "grad_norm": 2.1337099075317383, "learning_rate": 9.67157562168773e-05, "loss": 3.3582, "step": 3870 }, { "epoch": 0.2632830547628754, "grad_norm": 2.8431849479675293, "learning_rate": 9.671150971599402e-05, "loss": 3.6454, "step": 3875 }, { "epoch": 0.2636227748335372, "grad_norm": 1.9242441654205322, "learning_rate": 9.670726321511075e-05, "loss": 3.6938, "step": 3880 }, { "epoch": 0.26396249490419893, "grad_norm": 2.4420218467712402, "learning_rate": 9.670301671422748e-05, "loss": 3.5834, "step": 3885 }, { "epoch": 0.2643022149748607, "grad_norm": 1.7619452476501465, "learning_rate": 9.669877021334421e-05, "loss": 3.663, "step": 3890 }, { "epoch": 0.26464193504552247, "grad_norm": 2.231281042098999, "learning_rate": 9.669452371246094e-05, "loss": 3.6955, "step": 3895 }, { "epoch": 0.26498165511618427, "grad_norm": 2.4212920665740967, "learning_rate": 9.669027721157766e-05, "loss": 3.3691, "step": 3900 }, { "epoch": 0.26532137518684606, "grad_norm": 2.491760730743408, "learning_rate": 9.668603071069439e-05, "loss": 3.8895, "step": 3905 }, { "epoch": 0.2656610952575078, "grad_norm": 2.7254176139831543, "learning_rate": 9.668178420981112e-05, "loss": 3.4169, "step": 3910 }, { "epoch": 0.2660008153281696, "grad_norm": 2.8254876136779785, "learning_rate": 9.667753770892785e-05, "loss": 3.7341, "step": 3915 }, { "epoch": 0.26634053539883135, "grad_norm": 2.8087313175201416, "learning_rate": 9.667329120804458e-05, "loss": 3.767, "step": 3920 }, { "epoch": 0.26668025546949314, "grad_norm": 2.340233564376831, "learning_rate": 9.66690447071613e-05, "loss": 3.5807, "step": 3925 }, { "epoch": 0.2670199755401549, "grad_norm": 2.023094654083252, "learning_rate": 9.666479820627803e-05, "loss": 3.3063, "step": 3930 }, { "epoch": 0.2673596956108167, "grad_norm": 2.119107246398926, "learning_rate": 9.666055170539476e-05, "loss": 3.6316, "step": 3935 }, { "epoch": 0.2676994156814785, "grad_norm": 1.8223646879196167, "learning_rate": 9.665630520451149e-05, "loss": 3.6356, "step": 3940 }, { "epoch": 0.2680391357521402, "grad_norm": 2.998859405517578, "learning_rate": 9.665205870362822e-05, "loss": 3.4163, "step": 3945 }, { "epoch": 0.268378855822802, "grad_norm": 2.333075761795044, "learning_rate": 9.664781220274495e-05, "loss": 3.782, "step": 3950 }, { "epoch": 0.26871857589346376, "grad_norm": 2.799476385116577, "learning_rate": 9.664356570186167e-05, "loss": 3.2197, "step": 3955 }, { "epoch": 0.26905829596412556, "grad_norm": 1.8839623928070068, "learning_rate": 9.66393192009784e-05, "loss": 3.6562, "step": 3960 }, { "epoch": 0.26939801603478736, "grad_norm": 2.6432347297668457, "learning_rate": 9.663507270009513e-05, "loss": 3.7665, "step": 3965 }, { "epoch": 0.2697377361054491, "grad_norm": 2.414016008377075, "learning_rate": 9.663082619921186e-05, "loss": 3.4374, "step": 3970 }, { "epoch": 0.2700774561761109, "grad_norm": 2.284450054168701, "learning_rate": 9.662657969832859e-05, "loss": 3.8625, "step": 3975 }, { "epoch": 0.27041717624677264, "grad_norm": 2.4175004959106445, "learning_rate": 9.662233319744531e-05, "loss": 3.6093, "step": 3980 }, { "epoch": 0.27075689631743444, "grad_norm": 2.2012200355529785, "learning_rate": 9.661808669656204e-05, "loss": 3.7269, "step": 3985 }, { "epoch": 0.27109661638809623, "grad_norm": 1.7872872352600098, "learning_rate": 9.661384019567876e-05, "loss": 3.5437, "step": 3990 }, { "epoch": 0.271436336458758, "grad_norm": 2.2658510208129883, "learning_rate": 9.66095936947955e-05, "loss": 3.5009, "step": 3995 }, { "epoch": 0.2717760565294198, "grad_norm": 2.562431812286377, "learning_rate": 9.660534719391223e-05, "loss": 3.8177, "step": 4000 }, { "epoch": 0.2721157766000815, "grad_norm": 2.3021233081817627, "learning_rate": 9.660110069302894e-05, "loss": 3.5899, "step": 4005 }, { "epoch": 0.2724554966707433, "grad_norm": 1.9640750885009766, "learning_rate": 9.659685419214568e-05, "loss": 3.4248, "step": 4010 }, { "epoch": 0.27279521674140506, "grad_norm": 2.4887568950653076, "learning_rate": 9.659260769126241e-05, "loss": 3.8734, "step": 4015 }, { "epoch": 0.27313493681206685, "grad_norm": 2.33853816986084, "learning_rate": 9.658836119037912e-05, "loss": 3.4048, "step": 4020 }, { "epoch": 0.27347465688272865, "grad_norm": 2.5136539936065674, "learning_rate": 9.658411468949587e-05, "loss": 3.4624, "step": 4025 }, { "epoch": 0.2738143769533904, "grad_norm": 2.3944242000579834, "learning_rate": 9.657986818861259e-05, "loss": 3.3688, "step": 4030 }, { "epoch": 0.2741540970240522, "grad_norm": 1.9791560173034668, "learning_rate": 9.657562168772931e-05, "loss": 3.6241, "step": 4035 }, { "epoch": 0.27449381709471393, "grad_norm": 2.2201993465423584, "learning_rate": 9.657137518684605e-05, "loss": 3.4826, "step": 4040 }, { "epoch": 0.27483353716537573, "grad_norm": 2.487506628036499, "learning_rate": 9.656712868596278e-05, "loss": 3.746, "step": 4045 }, { "epoch": 0.2751732572360375, "grad_norm": 2.5716309547424316, "learning_rate": 9.656288218507949e-05, "loss": 3.7071, "step": 4050 }, { "epoch": 0.27551297730669927, "grad_norm": 2.1051158905029297, "learning_rate": 9.655863568419623e-05, "loss": 3.5788, "step": 4055 }, { "epoch": 0.27585269737736107, "grad_norm": 2.3651576042175293, "learning_rate": 9.655438918331295e-05, "loss": 3.4922, "step": 4060 }, { "epoch": 0.2761924174480228, "grad_norm": 2.6557517051696777, "learning_rate": 9.655014268242968e-05, "loss": 3.4848, "step": 4065 }, { "epoch": 0.2765321375186846, "grad_norm": 2.4949405193328857, "learning_rate": 9.654589618154642e-05, "loss": 3.3538, "step": 4070 }, { "epoch": 0.2768718575893464, "grad_norm": 4.292568206787109, "learning_rate": 9.654164968066313e-05, "loss": 3.6778, "step": 4075 }, { "epoch": 0.27721157766000815, "grad_norm": 2.119539499282837, "learning_rate": 9.653740317977986e-05, "loss": 3.608, "step": 4080 }, { "epoch": 0.27755129773066994, "grad_norm": 2.371422290802002, "learning_rate": 9.65331566788966e-05, "loss": 3.2927, "step": 4085 }, { "epoch": 0.2778910178013317, "grad_norm": 1.9895979166030884, "learning_rate": 9.652891017801332e-05, "loss": 3.4853, "step": 4090 }, { "epoch": 0.2782307378719935, "grad_norm": 2.7541933059692383, "learning_rate": 9.652466367713004e-05, "loss": 3.554, "step": 4095 }, { "epoch": 0.2785704579426552, "grad_norm": 2.2593939304351807, "learning_rate": 9.652041717624679e-05, "loss": 3.6386, "step": 4100 }, { "epoch": 0.278910178013317, "grad_norm": 2.2123637199401855, "learning_rate": 9.65161706753635e-05, "loss": 3.43, "step": 4105 }, { "epoch": 0.2792498980839788, "grad_norm": 2.282215118408203, "learning_rate": 9.651192417448023e-05, "loss": 3.6618, "step": 4110 }, { "epoch": 0.27958961815464056, "grad_norm": 1.8907086849212646, "learning_rate": 9.650767767359697e-05, "loss": 3.5053, "step": 4115 }, { "epoch": 0.27992933822530236, "grad_norm": 2.011336326599121, "learning_rate": 9.650343117271368e-05, "loss": 3.5264, "step": 4120 }, { "epoch": 0.2802690582959641, "grad_norm": 2.488196849822998, "learning_rate": 9.649918467183041e-05, "loss": 3.4374, "step": 4125 }, { "epoch": 0.2806087783666259, "grad_norm": 2.4004621505737305, "learning_rate": 9.649493817094715e-05, "loss": 3.5411, "step": 4130 }, { "epoch": 0.2809484984372877, "grad_norm": 1.8012967109680176, "learning_rate": 9.649069167006387e-05, "loss": 3.4135, "step": 4135 }, { "epoch": 0.28128821850794944, "grad_norm": 2.6300768852233887, "learning_rate": 9.64864451691806e-05, "loss": 3.4507, "step": 4140 }, { "epoch": 0.28162793857861124, "grad_norm": 2.403141498565674, "learning_rate": 9.648219866829732e-05, "loss": 3.5553, "step": 4145 }, { "epoch": 0.281967658649273, "grad_norm": 2.485696792602539, "learning_rate": 9.647795216741405e-05, "loss": 3.7829, "step": 4150 }, { "epoch": 0.2823073787199348, "grad_norm": 2.2314913272857666, "learning_rate": 9.647370566653078e-05, "loss": 3.7152, "step": 4155 }, { "epoch": 0.2826470987905966, "grad_norm": 2.596282958984375, "learning_rate": 9.646945916564751e-05, "loss": 3.6471, "step": 4160 }, { "epoch": 0.2829868188612583, "grad_norm": 2.3855040073394775, "learning_rate": 9.646521266476424e-05, "loss": 3.3684, "step": 4165 }, { "epoch": 0.2833265389319201, "grad_norm": 1.9615055322647095, "learning_rate": 9.646096616388096e-05, "loss": 3.4021, "step": 4170 }, { "epoch": 0.28366625900258186, "grad_norm": 2.558335781097412, "learning_rate": 9.645671966299769e-05, "loss": 3.6088, "step": 4175 }, { "epoch": 0.28400597907324365, "grad_norm": 2.681771993637085, "learning_rate": 9.645247316211442e-05, "loss": 3.2109, "step": 4180 }, { "epoch": 0.2843456991439054, "grad_norm": 2.2063992023468018, "learning_rate": 9.644822666123115e-05, "loss": 3.6736, "step": 4185 }, { "epoch": 0.2846854192145672, "grad_norm": 2.680952310562134, "learning_rate": 9.644398016034788e-05, "loss": 3.301, "step": 4190 }, { "epoch": 0.285025139285229, "grad_norm": 2.5799989700317383, "learning_rate": 9.64397336594646e-05, "loss": 3.6096, "step": 4195 }, { "epoch": 0.28536485935589073, "grad_norm": 2.01418137550354, "learning_rate": 9.643548715858135e-05, "loss": 3.5222, "step": 4200 }, { "epoch": 0.28570457942655253, "grad_norm": 1.9510987997055054, "learning_rate": 9.643124065769806e-05, "loss": 3.6441, "step": 4205 }, { "epoch": 0.28604429949721427, "grad_norm": 2.36946439743042, "learning_rate": 9.642699415681479e-05, "loss": 3.5422, "step": 4210 }, { "epoch": 0.28638401956787607, "grad_norm": 2.337088108062744, "learning_rate": 9.642274765593152e-05, "loss": 3.53, "step": 4215 }, { "epoch": 0.28672373963853787, "grad_norm": 2.494980573654175, "learning_rate": 9.641850115504824e-05, "loss": 3.7402, "step": 4220 }, { "epoch": 0.2870634597091996, "grad_norm": 2.3410887718200684, "learning_rate": 9.641425465416497e-05, "loss": 3.7098, "step": 4225 }, { "epoch": 0.2874031797798614, "grad_norm": 2.20373272895813, "learning_rate": 9.64100081532817e-05, "loss": 3.6799, "step": 4230 }, { "epoch": 0.28774289985052315, "grad_norm": 2.4828975200653076, "learning_rate": 9.640576165239843e-05, "loss": 3.396, "step": 4235 }, { "epoch": 0.28808261992118495, "grad_norm": 2.2657108306884766, "learning_rate": 9.640151515151516e-05, "loss": 3.7589, "step": 4240 }, { "epoch": 0.28842233999184674, "grad_norm": 2.0548276901245117, "learning_rate": 9.639726865063188e-05, "loss": 3.6342, "step": 4245 }, { "epoch": 0.2887620600625085, "grad_norm": 2.6554222106933594, "learning_rate": 9.639302214974861e-05, "loss": 3.4329, "step": 4250 }, { "epoch": 0.2891017801331703, "grad_norm": 1.936296820640564, "learning_rate": 9.638877564886534e-05, "loss": 3.4031, "step": 4255 }, { "epoch": 0.289441500203832, "grad_norm": 2.6779937744140625, "learning_rate": 9.638452914798207e-05, "loss": 3.7708, "step": 4260 }, { "epoch": 0.2897812202744938, "grad_norm": 2.238558292388916, "learning_rate": 9.63802826470988e-05, "loss": 3.4513, "step": 4265 }, { "epoch": 0.29012094034515556, "grad_norm": 2.370130777359009, "learning_rate": 9.637603614621552e-05, "loss": 3.4908, "step": 4270 }, { "epoch": 0.29046066041581736, "grad_norm": 2.2709288597106934, "learning_rate": 9.637178964533225e-05, "loss": 3.6773, "step": 4275 }, { "epoch": 0.29080038048647916, "grad_norm": 3.465778112411499, "learning_rate": 9.636754314444898e-05, "loss": 3.4743, "step": 4280 }, { "epoch": 0.2911401005571409, "grad_norm": 1.926073431968689, "learning_rate": 9.636329664356571e-05, "loss": 3.7204, "step": 4285 }, { "epoch": 0.2914798206278027, "grad_norm": 2.610278606414795, "learning_rate": 9.635905014268244e-05, "loss": 3.4249, "step": 4290 }, { "epoch": 0.29181954069846444, "grad_norm": 2.490473508834839, "learning_rate": 9.635480364179916e-05, "loss": 3.7892, "step": 4295 }, { "epoch": 0.29215926076912624, "grad_norm": 2.551053524017334, "learning_rate": 9.635055714091589e-05, "loss": 3.4143, "step": 4300 }, { "epoch": 0.29249898083978804, "grad_norm": 2.625108242034912, "learning_rate": 9.634631064003262e-05, "loss": 3.2754, "step": 4305 }, { "epoch": 0.2928387009104498, "grad_norm": 2.3275227546691895, "learning_rate": 9.634206413914935e-05, "loss": 3.6869, "step": 4310 }, { "epoch": 0.2931784209811116, "grad_norm": 2.8762521743774414, "learning_rate": 9.633781763826608e-05, "loss": 3.2725, "step": 4315 }, { "epoch": 0.2935181410517733, "grad_norm": 2.3196375370025635, "learning_rate": 9.63335711373828e-05, "loss": 3.4897, "step": 4320 }, { "epoch": 0.2938578611224351, "grad_norm": 2.0370218753814697, "learning_rate": 9.632932463649953e-05, "loss": 3.4089, "step": 4325 }, { "epoch": 0.2941975811930969, "grad_norm": 2.1044533252716064, "learning_rate": 9.632507813561626e-05, "loss": 3.5402, "step": 4330 }, { "epoch": 0.29453730126375866, "grad_norm": 2.2539141178131104, "learning_rate": 9.632083163473299e-05, "loss": 3.7155, "step": 4335 }, { "epoch": 0.29487702133442045, "grad_norm": 2.5959103107452393, "learning_rate": 9.631658513384972e-05, "loss": 3.4873, "step": 4340 }, { "epoch": 0.2952167414050822, "grad_norm": 2.0770015716552734, "learning_rate": 9.631233863296643e-05, "loss": 3.0496, "step": 4345 }, { "epoch": 0.295556461475744, "grad_norm": 2.2833893299102783, "learning_rate": 9.630809213208317e-05, "loss": 3.4672, "step": 4350 }, { "epoch": 0.29589618154640573, "grad_norm": 2.616764545440674, "learning_rate": 9.63038456311999e-05, "loss": 3.5965, "step": 4355 }, { "epoch": 0.29623590161706753, "grad_norm": 2.0542845726013184, "learning_rate": 9.629959913031661e-05, "loss": 3.3098, "step": 4360 }, { "epoch": 0.29657562168772933, "grad_norm": 2.2543091773986816, "learning_rate": 9.629535262943336e-05, "loss": 3.7651, "step": 4365 }, { "epoch": 0.29691534175839107, "grad_norm": 2.0595855712890625, "learning_rate": 9.629110612855008e-05, "loss": 3.6352, "step": 4370 }, { "epoch": 0.29725506182905287, "grad_norm": 2.7353787422180176, "learning_rate": 9.62868596276668e-05, "loss": 3.348, "step": 4375 }, { "epoch": 0.2975947818997146, "grad_norm": 2.508931875228882, "learning_rate": 9.628261312678354e-05, "loss": 3.5119, "step": 4380 }, { "epoch": 0.2979345019703764, "grad_norm": 1.9135644435882568, "learning_rate": 9.627836662590027e-05, "loss": 3.5481, "step": 4385 }, { "epoch": 0.2982742220410382, "grad_norm": 2.5715854167938232, "learning_rate": 9.627412012501698e-05, "loss": 3.7529, "step": 4390 }, { "epoch": 0.29861394211169995, "grad_norm": 2.355053424835205, "learning_rate": 9.626987362413372e-05, "loss": 3.7664, "step": 4395 }, { "epoch": 0.29895366218236175, "grad_norm": 2.377655506134033, "learning_rate": 9.626562712325045e-05, "loss": 3.5368, "step": 4400 }, { "epoch": 0.2992933822530235, "grad_norm": 2.6739094257354736, "learning_rate": 9.626138062236717e-05, "loss": 3.5576, "step": 4405 }, { "epoch": 0.2996331023236853, "grad_norm": 2.7493178844451904, "learning_rate": 9.625713412148391e-05, "loss": 3.6849, "step": 4410 }, { "epoch": 0.2999728223943471, "grad_norm": 2.9010813236236572, "learning_rate": 9.625288762060062e-05, "loss": 3.7381, "step": 4415 }, { "epoch": 0.3003125424650088, "grad_norm": 3.4917478561401367, "learning_rate": 9.624864111971735e-05, "loss": 3.5783, "step": 4420 }, { "epoch": 0.3006522625356706, "grad_norm": 2.0238993167877197, "learning_rate": 9.624439461883409e-05, "loss": 3.7146, "step": 4425 }, { "epoch": 0.30099198260633236, "grad_norm": 2.591813802719116, "learning_rate": 9.62401481179508e-05, "loss": 3.7996, "step": 4430 }, { "epoch": 0.30133170267699416, "grad_norm": 3.253753185272217, "learning_rate": 9.623590161706753e-05, "loss": 3.8351, "step": 4435 }, { "epoch": 0.3016714227476559, "grad_norm": 2.0914976596832275, "learning_rate": 9.623165511618428e-05, "loss": 3.4595, "step": 4440 }, { "epoch": 0.3020111428183177, "grad_norm": 2.1238136291503906, "learning_rate": 9.622740861530099e-05, "loss": 3.5304, "step": 4445 }, { "epoch": 0.3023508628889795, "grad_norm": 2.066178798675537, "learning_rate": 9.622316211441772e-05, "loss": 3.545, "step": 4450 }, { "epoch": 0.30269058295964124, "grad_norm": 1.8286415338516235, "learning_rate": 9.621891561353446e-05, "loss": 3.3081, "step": 4455 }, { "epoch": 0.30303030303030304, "grad_norm": 2.3473880290985107, "learning_rate": 9.621466911265117e-05, "loss": 3.4117, "step": 4460 }, { "epoch": 0.3033700231009648, "grad_norm": 2.1917498111724854, "learning_rate": 9.62104226117679e-05, "loss": 3.5267, "step": 4465 }, { "epoch": 0.3037097431716266, "grad_norm": 2.225980758666992, "learning_rate": 9.620617611088464e-05, "loss": 3.3304, "step": 4470 }, { "epoch": 0.3040494632422884, "grad_norm": 2.079214334487915, "learning_rate": 9.620192961000136e-05, "loss": 3.5595, "step": 4475 }, { "epoch": 0.3043891833129501, "grad_norm": 2.7024800777435303, "learning_rate": 9.619768310911809e-05, "loss": 3.6907, "step": 4480 }, { "epoch": 0.3047289033836119, "grad_norm": 2.1574583053588867, "learning_rate": 9.619343660823483e-05, "loss": 3.3908, "step": 4485 }, { "epoch": 0.30506862345427366, "grad_norm": 2.0015764236450195, "learning_rate": 9.618919010735154e-05, "loss": 3.859, "step": 4490 }, { "epoch": 0.30540834352493546, "grad_norm": 3.2876758575439453, "learning_rate": 9.618494360646827e-05, "loss": 3.5404, "step": 4495 }, { "epoch": 0.30574806359559725, "grad_norm": 2.3011112213134766, "learning_rate": 9.6180697105585e-05, "loss": 3.5167, "step": 4500 }, { "epoch": 0.306087783666259, "grad_norm": 4.733278274536133, "learning_rate": 9.617645060470173e-05, "loss": 3.3622, "step": 4505 }, { "epoch": 0.3064275037369208, "grad_norm": 2.109318256378174, "learning_rate": 9.617220410381845e-05, "loss": 3.9048, "step": 4510 }, { "epoch": 0.30676722380758253, "grad_norm": 1.850805640220642, "learning_rate": 9.616795760293518e-05, "loss": 3.6222, "step": 4515 }, { "epoch": 0.30710694387824433, "grad_norm": 2.6362383365631104, "learning_rate": 9.616371110205191e-05, "loss": 3.6315, "step": 4520 }, { "epoch": 0.3074466639489061, "grad_norm": 2.3985159397125244, "learning_rate": 9.615946460116864e-05, "loss": 3.3994, "step": 4525 }, { "epoch": 0.30778638401956787, "grad_norm": 3.1595442295074463, "learning_rate": 9.615521810028537e-05, "loss": 3.6236, "step": 4530 }, { "epoch": 0.30812610409022967, "grad_norm": 2.6055166721343994, "learning_rate": 9.61509715994021e-05, "loss": 3.399, "step": 4535 }, { "epoch": 0.3084658241608914, "grad_norm": 2.2961208820343018, "learning_rate": 9.614672509851884e-05, "loss": 3.4636, "step": 4540 }, { "epoch": 0.3088055442315532, "grad_norm": 2.6362643241882324, "learning_rate": 9.614247859763555e-05, "loss": 3.3003, "step": 4545 }, { "epoch": 0.30914526430221495, "grad_norm": 2.211743116378784, "learning_rate": 9.613823209675228e-05, "loss": 3.4525, "step": 4550 }, { "epoch": 0.30948498437287675, "grad_norm": 1.7058196067810059, "learning_rate": 9.613398559586902e-05, "loss": 3.6407, "step": 4555 }, { "epoch": 0.30982470444353855, "grad_norm": 1.867773413658142, "learning_rate": 9.612973909498573e-05, "loss": 3.5318, "step": 4560 }, { "epoch": 0.3101644245142003, "grad_norm": 2.837249994277954, "learning_rate": 9.612549259410246e-05, "loss": 3.5919, "step": 4565 }, { "epoch": 0.3105041445848621, "grad_norm": 3.0847020149230957, "learning_rate": 9.612124609321919e-05, "loss": 3.6143, "step": 4570 }, { "epoch": 0.3108438646555238, "grad_norm": 2.0619595050811768, "learning_rate": 9.611699959233592e-05, "loss": 3.2918, "step": 4575 }, { "epoch": 0.3111835847261856, "grad_norm": 2.6687724590301514, "learning_rate": 9.611275309145265e-05, "loss": 3.7714, "step": 4580 }, { "epoch": 0.3115233047968474, "grad_norm": 2.254484176635742, "learning_rate": 9.610850659056937e-05, "loss": 3.6149, "step": 4585 }, { "epoch": 0.31186302486750916, "grad_norm": 2.5946383476257324, "learning_rate": 9.61042600896861e-05, "loss": 3.5097, "step": 4590 }, { "epoch": 0.31220274493817096, "grad_norm": 2.005279064178467, "learning_rate": 9.610001358880283e-05, "loss": 3.5782, "step": 4595 }, { "epoch": 0.3125424650088327, "grad_norm": 2.2426390647888184, "learning_rate": 9.609576708791956e-05, "loss": 3.6735, "step": 4600 }, { "epoch": 0.3128821850794945, "grad_norm": 2.1684017181396484, "learning_rate": 9.609152058703629e-05, "loss": 3.6251, "step": 4605 }, { "epoch": 0.3132219051501563, "grad_norm": 2.69171142578125, "learning_rate": 9.608727408615301e-05, "loss": 3.519, "step": 4610 }, { "epoch": 0.31356162522081804, "grad_norm": 2.0534961223602295, "learning_rate": 9.608302758526974e-05, "loss": 3.4568, "step": 4615 }, { "epoch": 0.31390134529147984, "grad_norm": 2.2909598350524902, "learning_rate": 9.607878108438647e-05, "loss": 3.741, "step": 4620 }, { "epoch": 0.3142410653621416, "grad_norm": 1.7981642484664917, "learning_rate": 9.60745345835032e-05, "loss": 3.4916, "step": 4625 }, { "epoch": 0.3145807854328034, "grad_norm": 1.9211735725402832, "learning_rate": 9.607028808261993e-05, "loss": 3.3815, "step": 4630 }, { "epoch": 0.3149205055034651, "grad_norm": 2.814077854156494, "learning_rate": 9.606604158173665e-05, "loss": 3.4764, "step": 4635 }, { "epoch": 0.3152602255741269, "grad_norm": 2.218183994293213, "learning_rate": 9.606179508085338e-05, "loss": 3.6787, "step": 4640 }, { "epoch": 0.3155999456447887, "grad_norm": 2.0759880542755127, "learning_rate": 9.605754857997011e-05, "loss": 3.5326, "step": 4645 }, { "epoch": 0.31593966571545046, "grad_norm": 2.1081411838531494, "learning_rate": 9.605330207908684e-05, "loss": 3.4549, "step": 4650 }, { "epoch": 0.31627938578611225, "grad_norm": 2.7850899696350098, "learning_rate": 9.604905557820357e-05, "loss": 3.4321, "step": 4655 }, { "epoch": 0.316619105856774, "grad_norm": 2.095616340637207, "learning_rate": 9.60448090773203e-05, "loss": 3.7483, "step": 4660 }, { "epoch": 0.3169588259274358, "grad_norm": 3.430894613265991, "learning_rate": 9.604056257643702e-05, "loss": 3.5369, "step": 4665 }, { "epoch": 0.3172985459980976, "grad_norm": 2.5829780101776123, "learning_rate": 9.603631607555375e-05, "loss": 3.8033, "step": 4670 }, { "epoch": 0.31763826606875933, "grad_norm": 2.3534040451049805, "learning_rate": 9.603206957467048e-05, "loss": 3.63, "step": 4675 }, { "epoch": 0.31797798613942113, "grad_norm": 1.994956612586975, "learning_rate": 9.602782307378721e-05, "loss": 3.4428, "step": 4680 }, { "epoch": 0.3183177062100829, "grad_norm": 1.761325716972351, "learning_rate": 9.602357657290394e-05, "loss": 3.6485, "step": 4685 }, { "epoch": 0.31865742628074467, "grad_norm": 2.204400062561035, "learning_rate": 9.601933007202066e-05, "loss": 3.7453, "step": 4690 }, { "epoch": 0.31899714635140647, "grad_norm": 1.9333000183105469, "learning_rate": 9.601508357113739e-05, "loss": 3.6396, "step": 4695 }, { "epoch": 0.3193368664220682, "grad_norm": 2.2336275577545166, "learning_rate": 9.60108370702541e-05, "loss": 3.5145, "step": 4700 }, { "epoch": 0.31967658649273, "grad_norm": 2.0712366104125977, "learning_rate": 9.600659056937085e-05, "loss": 3.408, "step": 4705 }, { "epoch": 0.32001630656339175, "grad_norm": 2.6495025157928467, "learning_rate": 9.600234406848758e-05, "loss": 3.3555, "step": 4710 }, { "epoch": 0.32035602663405355, "grad_norm": 2.1428282260894775, "learning_rate": 9.599809756760429e-05, "loss": 3.5858, "step": 4715 }, { "epoch": 0.3206957467047153, "grad_norm": 2.094815254211426, "learning_rate": 9.599385106672103e-05, "loss": 3.6793, "step": 4720 }, { "epoch": 0.3210354667753771, "grad_norm": 2.002483606338501, "learning_rate": 9.598960456583776e-05, "loss": 3.6505, "step": 4725 }, { "epoch": 0.3213751868460389, "grad_norm": 2.252927541732788, "learning_rate": 9.598535806495447e-05, "loss": 3.4, "step": 4730 }, { "epoch": 0.3217149069167006, "grad_norm": 2.516890287399292, "learning_rate": 9.598111156407122e-05, "loss": 3.2724, "step": 4735 }, { "epoch": 0.3220546269873624, "grad_norm": 2.3295066356658936, "learning_rate": 9.597686506318794e-05, "loss": 3.3916, "step": 4740 }, { "epoch": 0.32239434705802417, "grad_norm": 2.1854660511016846, "learning_rate": 9.597261856230466e-05, "loss": 3.3465, "step": 4745 }, { "epoch": 0.32273406712868596, "grad_norm": 2.062574863433838, "learning_rate": 9.59683720614214e-05, "loss": 3.4885, "step": 4750 }, { "epoch": 0.32307378719934776, "grad_norm": 2.5735292434692383, "learning_rate": 9.596412556053813e-05, "loss": 3.671, "step": 4755 }, { "epoch": 0.3234135072700095, "grad_norm": 2.2522685527801514, "learning_rate": 9.595987905965484e-05, "loss": 3.3854, "step": 4760 }, { "epoch": 0.3237532273406713, "grad_norm": 2.4699902534484863, "learning_rate": 9.595563255877158e-05, "loss": 3.588, "step": 4765 }, { "epoch": 0.32409294741133304, "grad_norm": 2.5795655250549316, "learning_rate": 9.59513860578883e-05, "loss": 3.5708, "step": 4770 }, { "epoch": 0.32443266748199484, "grad_norm": 2.078995943069458, "learning_rate": 9.594713955700503e-05, "loss": 3.3525, "step": 4775 }, { "epoch": 0.32477238755265664, "grad_norm": 1.9790847301483154, "learning_rate": 9.594289305612177e-05, "loss": 3.626, "step": 4780 }, { "epoch": 0.3251121076233184, "grad_norm": 2.1064956188201904, "learning_rate": 9.593864655523848e-05, "loss": 3.5161, "step": 4785 }, { "epoch": 0.3254518276939802, "grad_norm": 2.2212953567504883, "learning_rate": 9.593440005435521e-05, "loss": 3.3063, "step": 4790 }, { "epoch": 0.3257915477646419, "grad_norm": 1.9809656143188477, "learning_rate": 9.593015355347195e-05, "loss": 3.5678, "step": 4795 }, { "epoch": 0.3261312678353037, "grad_norm": 2.668142080307007, "learning_rate": 9.592590705258867e-05, "loss": 3.3486, "step": 4800 }, { "epoch": 0.32647098790596546, "grad_norm": 2.9718236923217773, "learning_rate": 9.59216605517054e-05, "loss": 3.3339, "step": 4805 }, { "epoch": 0.32681070797662726, "grad_norm": 2.3413827419281006, "learning_rate": 9.591741405082214e-05, "loss": 3.765, "step": 4810 }, { "epoch": 0.32715042804728905, "grad_norm": 2.590475559234619, "learning_rate": 9.591316754993885e-05, "loss": 3.3701, "step": 4815 }, { "epoch": 0.3274901481179508, "grad_norm": 3.03633975982666, "learning_rate": 9.590892104905558e-05, "loss": 3.6036, "step": 4820 }, { "epoch": 0.3278298681886126, "grad_norm": 1.715158224105835, "learning_rate": 9.590467454817232e-05, "loss": 3.6644, "step": 4825 }, { "epoch": 0.32816958825927434, "grad_norm": 2.420332193374634, "learning_rate": 9.590042804728903e-05, "loss": 3.5532, "step": 4830 }, { "epoch": 0.32850930832993613, "grad_norm": 2.3807718753814697, "learning_rate": 9.589618154640576e-05, "loss": 3.5792, "step": 4835 }, { "epoch": 0.32884902840059793, "grad_norm": 2.3275177478790283, "learning_rate": 9.589193504552249e-05, "loss": 3.7666, "step": 4840 }, { "epoch": 0.3291887484712597, "grad_norm": 2.4652233123779297, "learning_rate": 9.588768854463922e-05, "loss": 3.6853, "step": 4845 }, { "epoch": 0.32952846854192147, "grad_norm": 3.2501232624053955, "learning_rate": 9.588344204375595e-05, "loss": 3.5766, "step": 4850 }, { "epoch": 0.3298681886125832, "grad_norm": 3.4538049697875977, "learning_rate": 9.587919554287267e-05, "loss": 3.6567, "step": 4855 }, { "epoch": 0.330207908683245, "grad_norm": 2.496765375137329, "learning_rate": 9.58749490419894e-05, "loss": 3.2606, "step": 4860 }, { "epoch": 0.3305476287539068, "grad_norm": 2.3711674213409424, "learning_rate": 9.587070254110613e-05, "loss": 3.6201, "step": 4865 }, { "epoch": 0.33088734882456855, "grad_norm": 2.1462559700012207, "learning_rate": 9.586645604022286e-05, "loss": 3.769, "step": 4870 }, { "epoch": 0.33122706889523035, "grad_norm": 2.1831979751586914, "learning_rate": 9.586220953933959e-05, "loss": 3.2845, "step": 4875 }, { "epoch": 0.3315667889658921, "grad_norm": 2.3417398929595947, "learning_rate": 9.585796303845633e-05, "loss": 3.2206, "step": 4880 }, { "epoch": 0.3319065090365539, "grad_norm": 1.9240666627883911, "learning_rate": 9.585371653757304e-05, "loss": 3.6608, "step": 4885 }, { "epoch": 0.33224622910721563, "grad_norm": 2.5419371128082275, "learning_rate": 9.584947003668977e-05, "loss": 3.5532, "step": 4890 }, { "epoch": 0.3325859491778774, "grad_norm": 2.2226459980010986, "learning_rate": 9.584522353580651e-05, "loss": 3.7632, "step": 4895 }, { "epoch": 0.3329256692485392, "grad_norm": 2.428110122680664, "learning_rate": 9.584097703492323e-05, "loss": 3.5621, "step": 4900 }, { "epoch": 0.33326538931920097, "grad_norm": 2.192072868347168, "learning_rate": 9.583673053403995e-05, "loss": 3.4916, "step": 4905 }, { "epoch": 0.33360510938986276, "grad_norm": 1.971107006072998, "learning_rate": 9.58324840331567e-05, "loss": 3.7121, "step": 4910 }, { "epoch": 0.3339448294605245, "grad_norm": 2.5853402614593506, "learning_rate": 9.582823753227341e-05, "loss": 3.617, "step": 4915 }, { "epoch": 0.3342845495311863, "grad_norm": 2.0271685123443604, "learning_rate": 9.582399103139014e-05, "loss": 3.3627, "step": 4920 }, { "epoch": 0.3346242696018481, "grad_norm": 2.1375293731689453, "learning_rate": 9.581974453050687e-05, "loss": 3.5635, "step": 4925 }, { "epoch": 0.33496398967250984, "grad_norm": 2.15071702003479, "learning_rate": 9.58154980296236e-05, "loss": 4.0459, "step": 4930 }, { "epoch": 0.33530370974317164, "grad_norm": 2.625150680541992, "learning_rate": 9.581125152874032e-05, "loss": 3.4702, "step": 4935 }, { "epoch": 0.3356434298138334, "grad_norm": 2.021744728088379, "learning_rate": 9.580700502785705e-05, "loss": 3.4294, "step": 4940 }, { "epoch": 0.3359831498844952, "grad_norm": 2.2073516845703125, "learning_rate": 9.580275852697378e-05, "loss": 3.6793, "step": 4945 }, { "epoch": 0.336322869955157, "grad_norm": 2.4387354850769043, "learning_rate": 9.57985120260905e-05, "loss": 3.5551, "step": 4950 }, { "epoch": 0.3366625900258187, "grad_norm": 1.6607073545455933, "learning_rate": 9.579426552520723e-05, "loss": 3.639, "step": 4955 }, { "epoch": 0.3370023100964805, "grad_norm": 2.0000860691070557, "learning_rate": 9.579001902432396e-05, "loss": 3.3364, "step": 4960 }, { "epoch": 0.33734203016714226, "grad_norm": 1.7590445280075073, "learning_rate": 9.578577252344069e-05, "loss": 3.5949, "step": 4965 }, { "epoch": 0.33768175023780406, "grad_norm": 2.9744365215301514, "learning_rate": 9.578152602255742e-05, "loss": 3.3676, "step": 4970 }, { "epoch": 0.3380214703084658, "grad_norm": 3.2376041412353516, "learning_rate": 9.577727952167415e-05, "loss": 3.5237, "step": 4975 }, { "epoch": 0.3383611903791276, "grad_norm": 2.650528907775879, "learning_rate": 9.577303302079087e-05, "loss": 3.5577, "step": 4980 }, { "epoch": 0.3387009104497894, "grad_norm": 2.194971799850464, "learning_rate": 9.57687865199076e-05, "loss": 3.464, "step": 4985 }, { "epoch": 0.33904063052045114, "grad_norm": 3.3826820850372314, "learning_rate": 9.576454001902433e-05, "loss": 3.6076, "step": 4990 }, { "epoch": 0.33938035059111293, "grad_norm": 2.0753471851348877, "learning_rate": 9.576029351814106e-05, "loss": 3.679, "step": 4995 }, { "epoch": 0.3397200706617747, "grad_norm": 2.0594770908355713, "learning_rate": 9.575604701725779e-05, "loss": 3.4702, "step": 5000 }, { "epoch": 0.3400597907324365, "grad_norm": 2.103978157043457, "learning_rate": 9.575180051637451e-05, "loss": 3.7303, "step": 5005 }, { "epoch": 0.34039951080309827, "grad_norm": 2.249831199645996, "learning_rate": 9.574755401549124e-05, "loss": 3.3897, "step": 5010 }, { "epoch": 0.34073923087376, "grad_norm": 2.216661214828491, "learning_rate": 9.574330751460797e-05, "loss": 3.3432, "step": 5015 }, { "epoch": 0.3410789509444218, "grad_norm": 2.2671594619750977, "learning_rate": 9.57390610137247e-05, "loss": 3.2468, "step": 5020 }, { "epoch": 0.34141867101508355, "grad_norm": 2.5372536182403564, "learning_rate": 9.573481451284143e-05, "loss": 3.2482, "step": 5025 }, { "epoch": 0.34175839108574535, "grad_norm": 2.581930637359619, "learning_rate": 9.573056801195815e-05, "loss": 3.3906, "step": 5030 }, { "epoch": 0.34209811115640715, "grad_norm": 2.497941493988037, "learning_rate": 9.572632151107488e-05, "loss": 3.2897, "step": 5035 }, { "epoch": 0.3424378312270689, "grad_norm": 2.802440881729126, "learning_rate": 9.57220750101916e-05, "loss": 3.3894, "step": 5040 }, { "epoch": 0.3427775512977307, "grad_norm": 2.4767963886260986, "learning_rate": 9.571782850930834e-05, "loss": 3.597, "step": 5045 }, { "epoch": 0.34311727136839243, "grad_norm": 2.506659507751465, "learning_rate": 9.571358200842507e-05, "loss": 3.5804, "step": 5050 }, { "epoch": 0.3434569914390542, "grad_norm": 2.0703656673431396, "learning_rate": 9.570933550754178e-05, "loss": 3.7641, "step": 5055 }, { "epoch": 0.34379671150971597, "grad_norm": 3.0426642894744873, "learning_rate": 9.570508900665852e-05, "loss": 3.3222, "step": 5060 }, { "epoch": 0.34413643158037777, "grad_norm": 1.9872699975967407, "learning_rate": 9.570084250577525e-05, "loss": 3.2471, "step": 5065 }, { "epoch": 0.34447615165103956, "grad_norm": 2.0045838356018066, "learning_rate": 9.569659600489196e-05, "loss": 3.4617, "step": 5070 }, { "epoch": 0.3448158717217013, "grad_norm": 2.3296327590942383, "learning_rate": 9.56923495040087e-05, "loss": 3.4086, "step": 5075 }, { "epoch": 0.3451555917923631, "grad_norm": 2.673248291015625, "learning_rate": 9.568810300312543e-05, "loss": 3.5193, "step": 5080 }, { "epoch": 0.34549531186302485, "grad_norm": 2.696136713027954, "learning_rate": 9.568385650224215e-05, "loss": 3.3428, "step": 5085 }, { "epoch": 0.34583503193368664, "grad_norm": 4.353693008422852, "learning_rate": 9.567961000135889e-05, "loss": 3.3181, "step": 5090 }, { "epoch": 0.34617475200434844, "grad_norm": 2.4072203636169434, "learning_rate": 9.567536350047562e-05, "loss": 3.6271, "step": 5095 }, { "epoch": 0.3465144720750102, "grad_norm": 2.293489933013916, "learning_rate": 9.567111699959233e-05, "loss": 3.291, "step": 5100 }, { "epoch": 0.346854192145672, "grad_norm": 1.891231656074524, "learning_rate": 9.566687049870907e-05, "loss": 3.4596, "step": 5105 }, { "epoch": 0.3471939122163337, "grad_norm": 2.0043277740478516, "learning_rate": 9.56626239978258e-05, "loss": 3.6103, "step": 5110 }, { "epoch": 0.3475336322869955, "grad_norm": 2.2673497200012207, "learning_rate": 9.565837749694252e-05, "loss": 3.5595, "step": 5115 }, { "epoch": 0.3478733523576573, "grad_norm": 2.216912031173706, "learning_rate": 9.565413099605926e-05, "loss": 3.461, "step": 5120 }, { "epoch": 0.34821307242831906, "grad_norm": 2.1620354652404785, "learning_rate": 9.564988449517597e-05, "loss": 3.6213, "step": 5125 }, { "epoch": 0.34855279249898086, "grad_norm": 2.035184383392334, "learning_rate": 9.56456379942927e-05, "loss": 3.389, "step": 5130 }, { "epoch": 0.3488925125696426, "grad_norm": 2.0516014099121094, "learning_rate": 9.564139149340944e-05, "loss": 3.3663, "step": 5135 }, { "epoch": 0.3492322326403044, "grad_norm": 1.7857179641723633, "learning_rate": 9.563714499252616e-05, "loss": 3.4649, "step": 5140 }, { "epoch": 0.34957195271096614, "grad_norm": 2.283069133758545, "learning_rate": 9.563289849164288e-05, "loss": 3.2298, "step": 5145 }, { "epoch": 0.34991167278162794, "grad_norm": 2.4059247970581055, "learning_rate": 9.562865199075963e-05, "loss": 3.6538, "step": 5150 }, { "epoch": 0.35025139285228973, "grad_norm": 1.8156726360321045, "learning_rate": 9.562440548987634e-05, "loss": 3.4423, "step": 5155 }, { "epoch": 0.3505911129229515, "grad_norm": 1.9691323041915894, "learning_rate": 9.562015898899307e-05, "loss": 3.6515, "step": 5160 }, { "epoch": 0.3509308329936133, "grad_norm": 1.8808988332748413, "learning_rate": 9.561591248810981e-05, "loss": 3.4709, "step": 5165 }, { "epoch": 0.351270553064275, "grad_norm": 2.020207405090332, "learning_rate": 9.561166598722652e-05, "loss": 3.4481, "step": 5170 }, { "epoch": 0.3516102731349368, "grad_norm": 2.298236846923828, "learning_rate": 9.560741948634325e-05, "loss": 3.2875, "step": 5175 }, { "epoch": 0.3519499932055986, "grad_norm": 3.182771921157837, "learning_rate": 9.560317298546e-05, "loss": 3.6108, "step": 5180 }, { "epoch": 0.35228971327626035, "grad_norm": 2.5850470066070557, "learning_rate": 9.559892648457671e-05, "loss": 3.3886, "step": 5185 }, { "epoch": 0.35262943334692215, "grad_norm": 2.092212438583374, "learning_rate": 9.559467998369344e-05, "loss": 3.7107, "step": 5190 }, { "epoch": 0.3529691534175839, "grad_norm": 2.263517141342163, "learning_rate": 9.559043348281016e-05, "loss": 3.4494, "step": 5195 }, { "epoch": 0.3533088734882457, "grad_norm": 2.3949036598205566, "learning_rate": 9.558618698192689e-05, "loss": 3.5598, "step": 5200 }, { "epoch": 0.3536485935589075, "grad_norm": 2.4407613277435303, "learning_rate": 9.558194048104362e-05, "loss": 3.366, "step": 5205 }, { "epoch": 0.35398831362956923, "grad_norm": 3.042372226715088, "learning_rate": 9.557769398016035e-05, "loss": 3.4785, "step": 5210 }, { "epoch": 0.354328033700231, "grad_norm": 2.424865484237671, "learning_rate": 9.557344747927708e-05, "loss": 3.6114, "step": 5215 }, { "epoch": 0.35466775377089277, "grad_norm": 2.8207926750183105, "learning_rate": 9.556920097839382e-05, "loss": 3.6909, "step": 5220 }, { "epoch": 0.35500747384155457, "grad_norm": 2.065338373184204, "learning_rate": 9.556495447751053e-05, "loss": 3.4274, "step": 5225 }, { "epoch": 0.3553471939122163, "grad_norm": 1.9987508058547974, "learning_rate": 9.556070797662726e-05, "loss": 3.5197, "step": 5230 }, { "epoch": 0.3556869139828781, "grad_norm": 2.4522342681884766, "learning_rate": 9.5556461475744e-05, "loss": 3.2803, "step": 5235 }, { "epoch": 0.3560266340535399, "grad_norm": 1.7566057443618774, "learning_rate": 9.555221497486072e-05, "loss": 3.5834, "step": 5240 }, { "epoch": 0.35636635412420165, "grad_norm": 2.4280521869659424, "learning_rate": 9.554796847397744e-05, "loss": 3.4192, "step": 5245 }, { "epoch": 0.35670607419486344, "grad_norm": 2.3163771629333496, "learning_rate": 9.554372197309419e-05, "loss": 3.7487, "step": 5250 }, { "epoch": 0.3570457942655252, "grad_norm": 1.7652122974395752, "learning_rate": 9.55394754722109e-05, "loss": 3.3039, "step": 5255 }, { "epoch": 0.357385514336187, "grad_norm": 2.1827664375305176, "learning_rate": 9.553522897132763e-05, "loss": 3.3813, "step": 5260 }, { "epoch": 0.3577252344068488, "grad_norm": 2.1868386268615723, "learning_rate": 9.553098247044436e-05, "loss": 3.2241, "step": 5265 }, { "epoch": 0.3580649544775105, "grad_norm": 2.2805964946746826, "learning_rate": 9.552673596956108e-05, "loss": 3.4951, "step": 5270 }, { "epoch": 0.3584046745481723, "grad_norm": 2.293192148208618, "learning_rate": 9.552248946867781e-05, "loss": 3.6963, "step": 5275 }, { "epoch": 0.35874439461883406, "grad_norm": 1.9848803281784058, "learning_rate": 9.551824296779454e-05, "loss": 3.5111, "step": 5280 }, { "epoch": 0.35908411468949586, "grad_norm": 2.6939401626586914, "learning_rate": 9.551399646691127e-05, "loss": 3.6023, "step": 5285 }, { "epoch": 0.35942383476015766, "grad_norm": 2.6907401084899902, "learning_rate": 9.5509749966028e-05, "loss": 3.572, "step": 5290 }, { "epoch": 0.3597635548308194, "grad_norm": 2.4348137378692627, "learning_rate": 9.550550346514472e-05, "loss": 3.7261, "step": 5295 }, { "epoch": 0.3601032749014812, "grad_norm": 2.1113297939300537, "learning_rate": 9.550125696426145e-05, "loss": 3.6574, "step": 5300 }, { "epoch": 0.36044299497214294, "grad_norm": 2.00673508644104, "learning_rate": 9.549701046337818e-05, "loss": 3.6794, "step": 5305 }, { "epoch": 0.36078271504280474, "grad_norm": 1.7474217414855957, "learning_rate": 9.549276396249491e-05, "loss": 3.7086, "step": 5310 }, { "epoch": 0.3611224351134665, "grad_norm": 1.9775686264038086, "learning_rate": 9.548851746161164e-05, "loss": 3.761, "step": 5315 }, { "epoch": 0.3614621551841283, "grad_norm": 2.300579071044922, "learning_rate": 9.548427096072836e-05, "loss": 3.4078, "step": 5320 }, { "epoch": 0.3618018752547901, "grad_norm": 2.7633800506591797, "learning_rate": 9.548002445984509e-05, "loss": 3.7536, "step": 5325 }, { "epoch": 0.3621415953254518, "grad_norm": 2.896941900253296, "learning_rate": 9.547577795896182e-05, "loss": 3.5601, "step": 5330 }, { "epoch": 0.3624813153961136, "grad_norm": 2.7240138053894043, "learning_rate": 9.547153145807855e-05, "loss": 3.8074, "step": 5335 }, { "epoch": 0.36282103546677535, "grad_norm": 2.397312879562378, "learning_rate": 9.546728495719528e-05, "loss": 3.4732, "step": 5340 }, { "epoch": 0.36316075553743715, "grad_norm": 3.0793724060058594, "learning_rate": 9.5463038456312e-05, "loss": 3.3193, "step": 5345 }, { "epoch": 0.36350047560809895, "grad_norm": 2.4621636867523193, "learning_rate": 9.545879195542873e-05, "loss": 3.4898, "step": 5350 }, { "epoch": 0.3638401956787607, "grad_norm": 1.9295380115509033, "learning_rate": 9.545454545454546e-05, "loss": 3.2048, "step": 5355 }, { "epoch": 0.3641799157494225, "grad_norm": 2.3588459491729736, "learning_rate": 9.545029895366219e-05, "loss": 3.4091, "step": 5360 }, { "epoch": 0.36451963582008423, "grad_norm": 2.6944656372070312, "learning_rate": 9.544605245277892e-05, "loss": 3.5929, "step": 5365 }, { "epoch": 0.36485935589074603, "grad_norm": 2.7152786254882812, "learning_rate": 9.544180595189564e-05, "loss": 3.3166, "step": 5370 }, { "epoch": 0.3651990759614078, "grad_norm": 2.3631365299224854, "learning_rate": 9.543755945101237e-05, "loss": 3.4736, "step": 5375 }, { "epoch": 0.36553879603206957, "grad_norm": 2.3416121006011963, "learning_rate": 9.54333129501291e-05, "loss": 3.3867, "step": 5380 }, { "epoch": 0.36587851610273137, "grad_norm": 2.008406639099121, "learning_rate": 9.542906644924583e-05, "loss": 3.5575, "step": 5385 }, { "epoch": 0.3662182361733931, "grad_norm": 2.2432711124420166, "learning_rate": 9.542481994836256e-05, "loss": 3.458, "step": 5390 }, { "epoch": 0.3665579562440549, "grad_norm": 2.037553310394287, "learning_rate": 9.542057344747927e-05, "loss": 3.4037, "step": 5395 }, { "epoch": 0.36689767631471665, "grad_norm": 1.8744285106658936, "learning_rate": 9.541632694659601e-05, "loss": 3.5522, "step": 5400 }, { "epoch": 0.36723739638537845, "grad_norm": 2.1985297203063965, "learning_rate": 9.541208044571274e-05, "loss": 3.4464, "step": 5405 }, { "epoch": 0.36757711645604024, "grad_norm": 2.1987900733947754, "learning_rate": 9.540783394482946e-05, "loss": 3.3822, "step": 5410 }, { "epoch": 0.367916836526702, "grad_norm": 2.3122260570526123, "learning_rate": 9.54035874439462e-05, "loss": 3.5572, "step": 5415 }, { "epoch": 0.3682565565973638, "grad_norm": 1.7977787256240845, "learning_rate": 9.539934094306293e-05, "loss": 3.4209, "step": 5420 }, { "epoch": 0.3685962766680255, "grad_norm": 2.850813627243042, "learning_rate": 9.539509444217964e-05, "loss": 3.3063, "step": 5425 }, { "epoch": 0.3689359967386873, "grad_norm": 2.0067622661590576, "learning_rate": 9.539084794129638e-05, "loss": 3.5631, "step": 5430 }, { "epoch": 0.3692757168093491, "grad_norm": 2.521108627319336, "learning_rate": 9.538660144041311e-05, "loss": 3.4673, "step": 5435 }, { "epoch": 0.36961543688001086, "grad_norm": 2.341568946838379, "learning_rate": 9.538235493952982e-05, "loss": 3.3754, "step": 5440 }, { "epoch": 0.36995515695067266, "grad_norm": 1.9837868213653564, "learning_rate": 9.537895773882322e-05, "loss": 3.5923, "step": 5445 }, { "epoch": 0.3702948770213344, "grad_norm": 2.3855340480804443, "learning_rate": 9.537471123793993e-05, "loss": 3.3687, "step": 5450 }, { "epoch": 0.3706345970919962, "grad_norm": 2.099226713180542, "learning_rate": 9.537046473705668e-05, "loss": 3.7035, "step": 5455 }, { "epoch": 0.370974317162658, "grad_norm": 2.2473480701446533, "learning_rate": 9.53662182361734e-05, "loss": 3.3972, "step": 5460 }, { "epoch": 0.37131403723331974, "grad_norm": 2.3042585849761963, "learning_rate": 9.536197173529012e-05, "loss": 3.3928, "step": 5465 }, { "epoch": 0.37165375730398154, "grad_norm": 2.688464879989624, "learning_rate": 9.535772523440686e-05, "loss": 3.6379, "step": 5470 }, { "epoch": 0.3719934773746433, "grad_norm": 2.2118613719940186, "learning_rate": 9.535347873352359e-05, "loss": 3.6294, "step": 5475 }, { "epoch": 0.3723331974453051, "grad_norm": 1.9588185548782349, "learning_rate": 9.53492322326403e-05, "loss": 3.6541, "step": 5480 }, { "epoch": 0.3726729175159668, "grad_norm": 2.7694761753082275, "learning_rate": 9.534498573175704e-05, "loss": 3.8579, "step": 5485 }, { "epoch": 0.3730126375866286, "grad_norm": 2.9138429164886475, "learning_rate": 9.534073923087377e-05, "loss": 3.4537, "step": 5490 }, { "epoch": 0.3733523576572904, "grad_norm": 2.030308485031128, "learning_rate": 9.533649272999049e-05, "loss": 3.5327, "step": 5495 }, { "epoch": 0.37369207772795215, "grad_norm": 2.130218029022217, "learning_rate": 9.533224622910723e-05, "loss": 3.5519, "step": 5500 }, { "epoch": 0.37403179779861395, "grad_norm": 2.3214871883392334, "learning_rate": 9.532799972822396e-05, "loss": 3.6258, "step": 5505 }, { "epoch": 0.3743715178692757, "grad_norm": 2.158155918121338, "learning_rate": 9.532375322734067e-05, "loss": 3.3948, "step": 5510 }, { "epoch": 0.3747112379399375, "grad_norm": 2.628941059112549, "learning_rate": 9.531950672645741e-05, "loss": 3.5988, "step": 5515 }, { "epoch": 0.3750509580105993, "grad_norm": 1.88633131980896, "learning_rate": 9.531526022557413e-05, "loss": 3.2646, "step": 5520 }, { "epoch": 0.37539067808126103, "grad_norm": 2.2778618335723877, "learning_rate": 9.531101372469085e-05, "loss": 3.4196, "step": 5525 }, { "epoch": 0.37573039815192283, "grad_norm": 2.4018216133117676, "learning_rate": 9.53067672238076e-05, "loss": 3.4266, "step": 5530 }, { "epoch": 0.37607011822258457, "grad_norm": 1.891074538230896, "learning_rate": 9.530252072292431e-05, "loss": 3.8662, "step": 5535 }, { "epoch": 0.37640983829324637, "grad_norm": 2.3034353256225586, "learning_rate": 9.529827422204104e-05, "loss": 3.4704, "step": 5540 }, { "epoch": 0.37674955836390817, "grad_norm": 2.0530219078063965, "learning_rate": 9.529402772115778e-05, "loss": 3.8118, "step": 5545 }, { "epoch": 0.3770892784345699, "grad_norm": 1.7378071546554565, "learning_rate": 9.52897812202745e-05, "loss": 3.7089, "step": 5550 }, { "epoch": 0.3774289985052317, "grad_norm": 4.47321891784668, "learning_rate": 9.528553471939122e-05, "loss": 3.7129, "step": 5555 }, { "epoch": 0.37776871857589345, "grad_norm": 2.6330199241638184, "learning_rate": 9.528128821850796e-05, "loss": 3.6119, "step": 5560 }, { "epoch": 0.37810843864655524, "grad_norm": 2.302823305130005, "learning_rate": 9.527704171762468e-05, "loss": 3.3904, "step": 5565 }, { "epoch": 0.378448158717217, "grad_norm": 2.3510868549346924, "learning_rate": 9.52727952167414e-05, "loss": 3.6345, "step": 5570 }, { "epoch": 0.3787878787878788, "grad_norm": 1.9679723978042603, "learning_rate": 9.526854871585815e-05, "loss": 3.3001, "step": 5575 }, { "epoch": 0.3791275988585406, "grad_norm": 2.5521187782287598, "learning_rate": 9.526430221497486e-05, "loss": 3.3881, "step": 5580 }, { "epoch": 0.3794673189292023, "grad_norm": 2.2444393634796143, "learning_rate": 9.526005571409159e-05, "loss": 3.4638, "step": 5585 }, { "epoch": 0.3798070389998641, "grad_norm": 2.0025293827056885, "learning_rate": 9.525580921320832e-05, "loss": 3.2738, "step": 5590 }, { "epoch": 0.38014675907052586, "grad_norm": 2.1449875831604004, "learning_rate": 9.525156271232505e-05, "loss": 3.508, "step": 5595 }, { "epoch": 0.38048647914118766, "grad_norm": 2.309112071990967, "learning_rate": 9.524731621144177e-05, "loss": 3.7207, "step": 5600 }, { "epoch": 0.38082619921184946, "grad_norm": 2.2592954635620117, "learning_rate": 9.52430697105585e-05, "loss": 3.5663, "step": 5605 }, { "epoch": 0.3811659192825112, "grad_norm": 2.173708915710449, "learning_rate": 9.523882320967523e-05, "loss": 3.3756, "step": 5610 }, { "epoch": 0.381505639353173, "grad_norm": 2.0735418796539307, "learning_rate": 9.523457670879196e-05, "loss": 3.3884, "step": 5615 }, { "epoch": 0.38184535942383474, "grad_norm": 1.907126784324646, "learning_rate": 9.523033020790869e-05, "loss": 3.4768, "step": 5620 }, { "epoch": 0.38218507949449654, "grad_norm": 2.386720895767212, "learning_rate": 9.522608370702541e-05, "loss": 3.4243, "step": 5625 }, { "epoch": 0.38252479956515834, "grad_norm": 2.549302339553833, "learning_rate": 9.522183720614214e-05, "loss": 3.6601, "step": 5630 }, { "epoch": 0.3828645196358201, "grad_norm": 2.2185428142547607, "learning_rate": 9.521759070525887e-05, "loss": 3.2892, "step": 5635 }, { "epoch": 0.3832042397064819, "grad_norm": 2.2634174823760986, "learning_rate": 9.52133442043756e-05, "loss": 3.5779, "step": 5640 }, { "epoch": 0.3835439597771436, "grad_norm": 2.3982715606689453, "learning_rate": 9.520909770349233e-05, "loss": 3.5883, "step": 5645 }, { "epoch": 0.3838836798478054, "grad_norm": 2.4622321128845215, "learning_rate": 9.520485120260905e-05, "loss": 3.3979, "step": 5650 }, { "epoch": 0.38422339991846716, "grad_norm": 2.497669219970703, "learning_rate": 9.520060470172578e-05, "loss": 3.7132, "step": 5655 }, { "epoch": 0.38456311998912895, "grad_norm": 1.95124089717865, "learning_rate": 9.519635820084251e-05, "loss": 3.4086, "step": 5660 }, { "epoch": 0.38490284005979075, "grad_norm": 2.267400026321411, "learning_rate": 9.519211169995924e-05, "loss": 3.4912, "step": 5665 }, { "epoch": 0.3852425601304525, "grad_norm": 2.9256820678710938, "learning_rate": 9.518786519907597e-05, "loss": 3.457, "step": 5670 }, { "epoch": 0.3855822802011143, "grad_norm": 2.201944351196289, "learning_rate": 9.51836186981927e-05, "loss": 3.418, "step": 5675 }, { "epoch": 0.38592200027177603, "grad_norm": 2.2088499069213867, "learning_rate": 9.517937219730942e-05, "loss": 3.4102, "step": 5680 }, { "epoch": 0.38626172034243783, "grad_norm": 2.034724712371826, "learning_rate": 9.517512569642615e-05, "loss": 3.4246, "step": 5685 }, { "epoch": 0.38660144041309963, "grad_norm": 2.6734964847564697, "learning_rate": 9.517087919554288e-05, "loss": 3.6603, "step": 5690 }, { "epoch": 0.38694116048376137, "grad_norm": 2.102245569229126, "learning_rate": 9.51666326946596e-05, "loss": 3.3954, "step": 5695 }, { "epoch": 0.38728088055442317, "grad_norm": 2.590301036834717, "learning_rate": 9.516238619377633e-05, "loss": 3.5354, "step": 5700 }, { "epoch": 0.3876206006250849, "grad_norm": 2.55387544631958, "learning_rate": 9.515813969289306e-05, "loss": 3.4349, "step": 5705 }, { "epoch": 0.3879603206957467, "grad_norm": 2.1026203632354736, "learning_rate": 9.515389319200979e-05, "loss": 3.5727, "step": 5710 }, { "epoch": 0.3883000407664085, "grad_norm": 2.344355583190918, "learning_rate": 9.514964669112652e-05, "loss": 3.5671, "step": 5715 }, { "epoch": 0.38863976083707025, "grad_norm": 2.415301561355591, "learning_rate": 9.514540019024323e-05, "loss": 3.5547, "step": 5720 }, { "epoch": 0.38897948090773204, "grad_norm": 2.3402092456817627, "learning_rate": 9.514115368935997e-05, "loss": 3.6248, "step": 5725 }, { "epoch": 0.3893192009783938, "grad_norm": 5.19041633605957, "learning_rate": 9.51369071884767e-05, "loss": 3.5613, "step": 5730 }, { "epoch": 0.3896589210490556, "grad_norm": 2.601196527481079, "learning_rate": 9.513266068759342e-05, "loss": 3.5092, "step": 5735 }, { "epoch": 0.3899986411197173, "grad_norm": 2.2364370822906494, "learning_rate": 9.512841418671016e-05, "loss": 3.7385, "step": 5740 }, { "epoch": 0.3903383611903791, "grad_norm": 2.4521467685699463, "learning_rate": 9.512416768582689e-05, "loss": 3.5398, "step": 5745 }, { "epoch": 0.3906780812610409, "grad_norm": 2.12900710105896, "learning_rate": 9.51199211849436e-05, "loss": 3.5456, "step": 5750 }, { "epoch": 0.39101780133170266, "grad_norm": 2.26472544670105, "learning_rate": 9.511567468406034e-05, "loss": 3.6027, "step": 5755 }, { "epoch": 0.39135752140236446, "grad_norm": 2.145358085632324, "learning_rate": 9.511142818317707e-05, "loss": 3.6173, "step": 5760 }, { "epoch": 0.3916972414730262, "grad_norm": 2.2755930423736572, "learning_rate": 9.51071816822938e-05, "loss": 3.3401, "step": 5765 }, { "epoch": 0.392036961543688, "grad_norm": 2.128591537475586, "learning_rate": 9.510293518141053e-05, "loss": 3.5998, "step": 5770 }, { "epoch": 0.3923766816143498, "grad_norm": 2.3939907550811768, "learning_rate": 9.509868868052725e-05, "loss": 3.5838, "step": 5775 }, { "epoch": 0.39271640168501154, "grad_norm": 2.3246493339538574, "learning_rate": 9.509444217964398e-05, "loss": 3.239, "step": 5780 }, { "epoch": 0.39305612175567334, "grad_norm": 2.4405014514923096, "learning_rate": 9.509019567876071e-05, "loss": 3.5871, "step": 5785 }, { "epoch": 0.3933958418263351, "grad_norm": 2.5543372631073, "learning_rate": 9.508594917787742e-05, "loss": 3.7152, "step": 5790 }, { "epoch": 0.3937355618969969, "grad_norm": 2.156559705734253, "learning_rate": 9.508170267699417e-05, "loss": 3.4728, "step": 5795 }, { "epoch": 0.3940752819676587, "grad_norm": 2.7907774448394775, "learning_rate": 9.50774561761109e-05, "loss": 3.6774, "step": 5800 }, { "epoch": 0.3944150020383204, "grad_norm": 2.349163770675659, "learning_rate": 9.507320967522761e-05, "loss": 3.6738, "step": 5805 }, { "epoch": 0.3947547221089822, "grad_norm": 2.1579880714416504, "learning_rate": 9.506896317434435e-05, "loss": 3.5, "step": 5810 }, { "epoch": 0.39509444217964396, "grad_norm": 2.3118982315063477, "learning_rate": 9.506471667346108e-05, "loss": 3.1216, "step": 5815 }, { "epoch": 0.39543416225030575, "grad_norm": 2.0580344200134277, "learning_rate": 9.506047017257779e-05, "loss": 3.728, "step": 5820 }, { "epoch": 0.3957738823209675, "grad_norm": 2.1640231609344482, "learning_rate": 9.505622367169453e-05, "loss": 3.607, "step": 5825 }, { "epoch": 0.3961136023916293, "grad_norm": 2.620502233505249, "learning_rate": 9.505197717081126e-05, "loss": 3.2637, "step": 5830 }, { "epoch": 0.3964533224622911, "grad_norm": 2.107973098754883, "learning_rate": 9.504773066992798e-05, "loss": 3.6636, "step": 5835 }, { "epoch": 0.39679304253295283, "grad_norm": 2.1638150215148926, "learning_rate": 9.504348416904472e-05, "loss": 3.4899, "step": 5840 }, { "epoch": 0.39713276260361463, "grad_norm": 2.3276584148406982, "learning_rate": 9.503923766816145e-05, "loss": 3.4387, "step": 5845 }, { "epoch": 0.3974724826742764, "grad_norm": 2.4550554752349854, "learning_rate": 9.503499116727816e-05, "loss": 3.3838, "step": 5850 }, { "epoch": 0.39781220274493817, "grad_norm": 2.314365863800049, "learning_rate": 9.50307446663949e-05, "loss": 3.5617, "step": 5855 }, { "epoch": 0.39815192281559997, "grad_norm": 1.8994426727294922, "learning_rate": 9.502649816551162e-05, "loss": 3.5274, "step": 5860 }, { "epoch": 0.3984916428862617, "grad_norm": 1.8744722604751587, "learning_rate": 9.502225166462835e-05, "loss": 3.7462, "step": 5865 }, { "epoch": 0.3988313629569235, "grad_norm": 2.5736844539642334, "learning_rate": 9.501800516374509e-05, "loss": 3.4497, "step": 5870 }, { "epoch": 0.39917108302758525, "grad_norm": 1.7807859182357788, "learning_rate": 9.50137586628618e-05, "loss": 3.2401, "step": 5875 }, { "epoch": 0.39951080309824705, "grad_norm": 2.3960299491882324, "learning_rate": 9.500951216197853e-05, "loss": 3.5057, "step": 5880 }, { "epoch": 0.39985052316890884, "grad_norm": 2.1863772869110107, "learning_rate": 9.500526566109527e-05, "loss": 3.6953, "step": 5885 }, { "epoch": 0.4001902432395706, "grad_norm": 1.8718239068984985, "learning_rate": 9.500101916021199e-05, "loss": 3.5513, "step": 5890 }, { "epoch": 0.4005299633102324, "grad_norm": 2.161097288131714, "learning_rate": 9.499677265932871e-05, "loss": 3.2804, "step": 5895 }, { "epoch": 0.4008696833808941, "grad_norm": 2.724787712097168, "learning_rate": 9.499252615844545e-05, "loss": 3.4583, "step": 5900 }, { "epoch": 0.4012094034515559, "grad_norm": 2.079878330230713, "learning_rate": 9.498827965756217e-05, "loss": 3.569, "step": 5905 }, { "epoch": 0.40154912352221767, "grad_norm": 3.1078429222106934, "learning_rate": 9.49840331566789e-05, "loss": 3.6336, "step": 5910 }, { "epoch": 0.40188884359287946, "grad_norm": 2.195901393890381, "learning_rate": 9.497978665579564e-05, "loss": 3.6751, "step": 5915 }, { "epoch": 0.40222856366354126, "grad_norm": 3.3410584926605225, "learning_rate": 9.497554015491235e-05, "loss": 3.6466, "step": 5920 }, { "epoch": 0.402568283734203, "grad_norm": 1.914284348487854, "learning_rate": 9.497129365402908e-05, "loss": 3.433, "step": 5925 }, { "epoch": 0.4029080038048648, "grad_norm": 2.947977304458618, "learning_rate": 9.496704715314582e-05, "loss": 3.5166, "step": 5930 }, { "epoch": 0.40324772387552654, "grad_norm": 2.164297103881836, "learning_rate": 9.496280065226254e-05, "loss": 3.7164, "step": 5935 }, { "epoch": 0.40358744394618834, "grad_norm": 2.2850894927978516, "learning_rate": 9.495855415137927e-05, "loss": 3.4645, "step": 5940 }, { "epoch": 0.40392716401685014, "grad_norm": 2.1502060890197754, "learning_rate": 9.495430765049599e-05, "loss": 3.4292, "step": 5945 }, { "epoch": 0.4042668840875119, "grad_norm": 1.9091222286224365, "learning_rate": 9.495006114961272e-05, "loss": 3.7687, "step": 5950 }, { "epoch": 0.4046066041581737, "grad_norm": 1.9365631341934204, "learning_rate": 9.494581464872945e-05, "loss": 3.5556, "step": 5955 }, { "epoch": 0.4049463242288354, "grad_norm": 2.008934736251831, "learning_rate": 9.494156814784618e-05, "loss": 3.4835, "step": 5960 }, { "epoch": 0.4052860442994972, "grad_norm": 2.1476967334747314, "learning_rate": 9.49373216469629e-05, "loss": 3.5901, "step": 5965 }, { "epoch": 0.405625764370159, "grad_norm": 2.3163695335388184, "learning_rate": 9.493307514607963e-05, "loss": 3.7873, "step": 5970 }, { "epoch": 0.40596548444082076, "grad_norm": 2.0577564239501953, "learning_rate": 9.492882864519636e-05, "loss": 3.3, "step": 5975 }, { "epoch": 0.40630520451148255, "grad_norm": 2.55914568901062, "learning_rate": 9.492458214431309e-05, "loss": 3.5086, "step": 5980 }, { "epoch": 0.4066449245821443, "grad_norm": 1.9641376733779907, "learning_rate": 9.492033564342982e-05, "loss": 3.3942, "step": 5985 }, { "epoch": 0.4069846446528061, "grad_norm": 2.674025297164917, "learning_rate": 9.491608914254655e-05, "loss": 3.7973, "step": 5990 }, { "epoch": 0.40732436472346784, "grad_norm": 2.183528184890747, "learning_rate": 9.491184264166327e-05, "loss": 3.5388, "step": 5995 }, { "epoch": 0.40766408479412963, "grad_norm": 2.0168113708496094, "learning_rate": 9.490759614078e-05, "loss": 3.4047, "step": 6000 }, { "epoch": 0.40800380486479143, "grad_norm": 2.4202096462249756, "learning_rate": 9.490334963989673e-05, "loss": 3.5396, "step": 6005 }, { "epoch": 0.4083435249354532, "grad_norm": 2.2292206287384033, "learning_rate": 9.489910313901346e-05, "loss": 3.5008, "step": 6010 }, { "epoch": 0.40868324500611497, "grad_norm": 2.375166416168213, "learning_rate": 9.489485663813019e-05, "loss": 3.5598, "step": 6015 }, { "epoch": 0.4090229650767767, "grad_norm": 2.449183225631714, "learning_rate": 9.489061013724691e-05, "loss": 3.5098, "step": 6020 }, { "epoch": 0.4093626851474385, "grad_norm": 2.4351933002471924, "learning_rate": 9.488636363636364e-05, "loss": 3.3801, "step": 6025 }, { "epoch": 0.4097024052181003, "grad_norm": 2.2387006282806396, "learning_rate": 9.488211713548037e-05, "loss": 3.3374, "step": 6030 }, { "epoch": 0.41004212528876205, "grad_norm": 2.148315906524658, "learning_rate": 9.48778706345971e-05, "loss": 3.3949, "step": 6035 }, { "epoch": 0.41038184535942385, "grad_norm": 3.2371878623962402, "learning_rate": 9.487362413371383e-05, "loss": 3.5393, "step": 6040 }, { "epoch": 0.4107215654300856, "grad_norm": 2.1698648929595947, "learning_rate": 9.486937763283055e-05, "loss": 3.4602, "step": 6045 }, { "epoch": 0.4110612855007474, "grad_norm": 2.2219765186309814, "learning_rate": 9.486513113194728e-05, "loss": 3.5677, "step": 6050 }, { "epoch": 0.4114010055714092, "grad_norm": 2.333155632019043, "learning_rate": 9.486088463106401e-05, "loss": 3.2506, "step": 6055 }, { "epoch": 0.4117407256420709, "grad_norm": 2.4780113697052, "learning_rate": 9.485663813018072e-05, "loss": 3.3625, "step": 6060 }, { "epoch": 0.4120804457127327, "grad_norm": 3.1555166244506836, "learning_rate": 9.485239162929747e-05, "loss": 3.6358, "step": 6065 }, { "epoch": 0.41242016578339447, "grad_norm": 2.722064971923828, "learning_rate": 9.48481451284142e-05, "loss": 3.7669, "step": 6070 }, { "epoch": 0.41275988585405626, "grad_norm": 2.3357059955596924, "learning_rate": 9.484389862753091e-05, "loss": 3.1026, "step": 6075 }, { "epoch": 0.413099605924718, "grad_norm": 2.924870491027832, "learning_rate": 9.483965212664765e-05, "loss": 3.6327, "step": 6080 }, { "epoch": 0.4134393259953798, "grad_norm": 2.819397211074829, "learning_rate": 9.483540562576438e-05, "loss": 3.4774, "step": 6085 }, { "epoch": 0.4137790460660416, "grad_norm": 2.039072275161743, "learning_rate": 9.483115912488109e-05, "loss": 3.6857, "step": 6090 }, { "epoch": 0.41411876613670334, "grad_norm": 2.581530809402466, "learning_rate": 9.482691262399783e-05, "loss": 3.474, "step": 6095 }, { "epoch": 0.41445848620736514, "grad_norm": 2.0482287406921387, "learning_rate": 9.482266612311456e-05, "loss": 3.4328, "step": 6100 }, { "epoch": 0.4147982062780269, "grad_norm": 2.2304725646972656, "learning_rate": 9.481841962223129e-05, "loss": 3.3463, "step": 6105 }, { "epoch": 0.4151379263486887, "grad_norm": 2.487243413925171, "learning_rate": 9.481417312134802e-05, "loss": 3.4985, "step": 6110 }, { "epoch": 0.4154776464193505, "grad_norm": 3.1797499656677246, "learning_rate": 9.480992662046475e-05, "loss": 3.4223, "step": 6115 }, { "epoch": 0.4158173664900122, "grad_norm": 2.1688737869262695, "learning_rate": 9.480568011958147e-05, "loss": 3.7536, "step": 6120 }, { "epoch": 0.416157086560674, "grad_norm": 2.112556219100952, "learning_rate": 9.48014336186982e-05, "loss": 3.5606, "step": 6125 }, { "epoch": 0.41649680663133576, "grad_norm": 1.8179993629455566, "learning_rate": 9.479718711781493e-05, "loss": 3.3376, "step": 6130 }, { "epoch": 0.41683652670199756, "grad_norm": 2.383026599884033, "learning_rate": 9.479294061693166e-05, "loss": 3.4151, "step": 6135 }, { "epoch": 0.41717624677265935, "grad_norm": 2.0007266998291016, "learning_rate": 9.478869411604839e-05, "loss": 3.5658, "step": 6140 }, { "epoch": 0.4175159668433211, "grad_norm": 2.4773383140563965, "learning_rate": 9.47844476151651e-05, "loss": 3.6117, "step": 6145 }, { "epoch": 0.4178556869139829, "grad_norm": 3.078857898712158, "learning_rate": 9.478020111428184e-05, "loss": 3.6391, "step": 6150 }, { "epoch": 0.41819540698464464, "grad_norm": 2.535287618637085, "learning_rate": 9.477595461339857e-05, "loss": 3.3409, "step": 6155 }, { "epoch": 0.41853512705530643, "grad_norm": 2.0010769367218018, "learning_rate": 9.477170811251528e-05, "loss": 3.4942, "step": 6160 }, { "epoch": 0.4188748471259682, "grad_norm": 2.3958261013031006, "learning_rate": 9.476746161163203e-05, "loss": 3.7077, "step": 6165 }, { "epoch": 0.41921456719663, "grad_norm": 2.1570017337799072, "learning_rate": 9.476321511074875e-05, "loss": 3.6067, "step": 6170 }, { "epoch": 0.41955428726729177, "grad_norm": 2.6229751110076904, "learning_rate": 9.475896860986547e-05, "loss": 3.5907, "step": 6175 }, { "epoch": 0.4198940073379535, "grad_norm": 2.0216729640960693, "learning_rate": 9.475472210898221e-05, "loss": 3.4843, "step": 6180 }, { "epoch": 0.4202337274086153, "grad_norm": 2.4097788333892822, "learning_rate": 9.475047560809894e-05, "loss": 3.4321, "step": 6185 }, { "epoch": 0.42057344747927705, "grad_norm": 2.1403191089630127, "learning_rate": 9.474622910721565e-05, "loss": 3.3428, "step": 6190 }, { "epoch": 0.42091316754993885, "grad_norm": 1.7724568843841553, "learning_rate": 9.47419826063324e-05, "loss": 3.5977, "step": 6195 }, { "epoch": 0.42125288762060065, "grad_norm": 1.9858791828155518, "learning_rate": 9.473773610544912e-05, "loss": 3.5343, "step": 6200 }, { "epoch": 0.4215926076912624, "grad_norm": 2.3160152435302734, "learning_rate": 9.473348960456584e-05, "loss": 3.4312, "step": 6205 }, { "epoch": 0.4219323277619242, "grad_norm": 2.1834471225738525, "learning_rate": 9.472924310368258e-05, "loss": 3.4977, "step": 6210 }, { "epoch": 0.42227204783258593, "grad_norm": 2.7510712146759033, "learning_rate": 9.472499660279929e-05, "loss": 3.215, "step": 6215 }, { "epoch": 0.4226117679032477, "grad_norm": 2.227858066558838, "learning_rate": 9.472075010191602e-05, "loss": 3.5091, "step": 6220 }, { "epoch": 0.4229514879739095, "grad_norm": 2.0628483295440674, "learning_rate": 9.471650360103276e-05, "loss": 3.3493, "step": 6225 }, { "epoch": 0.42329120804457127, "grad_norm": 2.113569974899292, "learning_rate": 9.471225710014948e-05, "loss": 3.4865, "step": 6230 }, { "epoch": 0.42363092811523306, "grad_norm": 1.8367078304290771, "learning_rate": 9.47080105992662e-05, "loss": 3.5712, "step": 6235 }, { "epoch": 0.4239706481858948, "grad_norm": 2.3989691734313965, "learning_rate": 9.470376409838295e-05, "loss": 3.4337, "step": 6240 }, { "epoch": 0.4243103682565566, "grad_norm": 2.3639042377471924, "learning_rate": 9.469951759749966e-05, "loss": 3.4743, "step": 6245 }, { "epoch": 0.42465008832721834, "grad_norm": 2.052133560180664, "learning_rate": 9.469527109661639e-05, "loss": 3.3761, "step": 6250 }, { "epoch": 0.42498980839788014, "grad_norm": 2.2083051204681396, "learning_rate": 9.469102459573313e-05, "loss": 3.4967, "step": 6255 }, { "epoch": 0.42532952846854194, "grad_norm": 2.2909905910491943, "learning_rate": 9.468677809484984e-05, "loss": 3.1705, "step": 6260 }, { "epoch": 0.4256692485392037, "grad_norm": 1.757896900177002, "learning_rate": 9.468253159396657e-05, "loss": 3.1889, "step": 6265 }, { "epoch": 0.4260089686098655, "grad_norm": 2.757215738296509, "learning_rate": 9.467828509308331e-05, "loss": 3.631, "step": 6270 }, { "epoch": 0.4263486886805272, "grad_norm": 2.077026844024658, "learning_rate": 9.467403859220003e-05, "loss": 3.3514, "step": 6275 }, { "epoch": 0.426688408751189, "grad_norm": 2.645669460296631, "learning_rate": 9.466979209131676e-05, "loss": 3.4541, "step": 6280 }, { "epoch": 0.4270281288218508, "grad_norm": 2.870403528213501, "learning_rate": 9.466554559043348e-05, "loss": 3.6692, "step": 6285 }, { "epoch": 0.42736784889251256, "grad_norm": 2.545874834060669, "learning_rate": 9.466129908955021e-05, "loss": 3.4684, "step": 6290 }, { "epoch": 0.42770756896317436, "grad_norm": 1.9612118005752563, "learning_rate": 9.465705258866694e-05, "loss": 3.6194, "step": 6295 }, { "epoch": 0.4280472890338361, "grad_norm": 1.9753509759902954, "learning_rate": 9.465280608778367e-05, "loss": 3.4943, "step": 6300 }, { "epoch": 0.4283870091044979, "grad_norm": 2.0523602962493896, "learning_rate": 9.46485595869004e-05, "loss": 3.1491, "step": 6305 }, { "epoch": 0.4287267291751597, "grad_norm": 2.113978624343872, "learning_rate": 9.464431308601712e-05, "loss": 3.8279, "step": 6310 }, { "epoch": 0.42906644924582144, "grad_norm": 6.667606830596924, "learning_rate": 9.464006658513385e-05, "loss": 3.6007, "step": 6315 }, { "epoch": 0.42940616931648323, "grad_norm": 2.3507540225982666, "learning_rate": 9.463582008425058e-05, "loss": 3.5137, "step": 6320 }, { "epoch": 0.429745889387145, "grad_norm": 2.382766008377075, "learning_rate": 9.463157358336731e-05, "loss": 3.3439, "step": 6325 }, { "epoch": 0.4300856094578068, "grad_norm": 2.8620731830596924, "learning_rate": 9.462732708248404e-05, "loss": 3.4246, "step": 6330 }, { "epoch": 0.4304253295284685, "grad_norm": 2.1794207096099854, "learning_rate": 9.462308058160076e-05, "loss": 3.5608, "step": 6335 }, { "epoch": 0.4307650495991303, "grad_norm": 2.4482569694519043, "learning_rate": 9.461883408071749e-05, "loss": 3.409, "step": 6340 }, { "epoch": 0.4311047696697921, "grad_norm": 2.355351686477661, "learning_rate": 9.461458757983422e-05, "loss": 3.3397, "step": 6345 }, { "epoch": 0.43144448974045385, "grad_norm": 2.7132089138031006, "learning_rate": 9.461034107895095e-05, "loss": 3.4117, "step": 6350 }, { "epoch": 0.43178420981111565, "grad_norm": 2.6843183040618896, "learning_rate": 9.460609457806768e-05, "loss": 3.4725, "step": 6355 }, { "epoch": 0.4321239298817774, "grad_norm": 2.08958101272583, "learning_rate": 9.46018480771844e-05, "loss": 3.2091, "step": 6360 }, { "epoch": 0.4324636499524392, "grad_norm": 2.0385537147521973, "learning_rate": 9.459760157630113e-05, "loss": 3.5679, "step": 6365 }, { "epoch": 0.432803370023101, "grad_norm": 2.765249013900757, "learning_rate": 9.459335507541786e-05, "loss": 3.1881, "step": 6370 }, { "epoch": 0.43314309009376273, "grad_norm": 2.6931862831115723, "learning_rate": 9.458910857453459e-05, "loss": 3.3704, "step": 6375 }, { "epoch": 0.4334828101644245, "grad_norm": 2.6158087253570557, "learning_rate": 9.458486207365132e-05, "loss": 3.5237, "step": 6380 }, { "epoch": 0.43382253023508627, "grad_norm": 2.631019115447998, "learning_rate": 9.458061557276804e-05, "loss": 3.4318, "step": 6385 }, { "epoch": 0.43416225030574807, "grad_norm": 2.567272186279297, "learning_rate": 9.457636907188477e-05, "loss": 3.3813, "step": 6390 }, { "epoch": 0.43450197037640986, "grad_norm": 2.3392422199249268, "learning_rate": 9.45721225710015e-05, "loss": 3.6318, "step": 6395 }, { "epoch": 0.4348416904470716, "grad_norm": 2.278768539428711, "learning_rate": 9.456787607011823e-05, "loss": 3.3889, "step": 6400 }, { "epoch": 0.4351814105177334, "grad_norm": 1.941454529762268, "learning_rate": 9.456362956923496e-05, "loss": 3.553, "step": 6405 }, { "epoch": 0.43552113058839514, "grad_norm": 2.306364059448242, "learning_rate": 9.455938306835168e-05, "loss": 3.3899, "step": 6410 }, { "epoch": 0.43586085065905694, "grad_norm": 2.377596855163574, "learning_rate": 9.45551365674684e-05, "loss": 3.3728, "step": 6415 }, { "epoch": 0.4362005707297187, "grad_norm": 1.8657268285751343, "learning_rate": 9.455089006658514e-05, "loss": 3.3541, "step": 6420 }, { "epoch": 0.4365402908003805, "grad_norm": 2.3879880905151367, "learning_rate": 9.454664356570187e-05, "loss": 3.3301, "step": 6425 }, { "epoch": 0.4368800108710423, "grad_norm": 2.1015968322753906, "learning_rate": 9.454239706481858e-05, "loss": 3.7275, "step": 6430 }, { "epoch": 0.437219730941704, "grad_norm": 2.0952141284942627, "learning_rate": 9.453815056393532e-05, "loss": 3.5275, "step": 6435 }, { "epoch": 0.4375594510123658, "grad_norm": 2.6250181198120117, "learning_rate": 9.453390406305205e-05, "loss": 3.407, "step": 6440 }, { "epoch": 0.43789917108302756, "grad_norm": 2.5968611240386963, "learning_rate": 9.452965756216878e-05, "loss": 3.4617, "step": 6445 }, { "epoch": 0.43823889115368936, "grad_norm": 2.3081111907958984, "learning_rate": 9.452541106128551e-05, "loss": 3.5497, "step": 6450 }, { "epoch": 0.43857861122435116, "grad_norm": 2.375849723815918, "learning_rate": 9.452116456040224e-05, "loss": 3.725, "step": 6455 }, { "epoch": 0.4389183312950129, "grad_norm": 2.0327749252319336, "learning_rate": 9.451691805951896e-05, "loss": 3.2806, "step": 6460 }, { "epoch": 0.4392580513656747, "grad_norm": 2.4605295658111572, "learning_rate": 9.451267155863569e-05, "loss": 3.4662, "step": 6465 }, { "epoch": 0.43959777143633644, "grad_norm": 2.1697449684143066, "learning_rate": 9.450842505775242e-05, "loss": 3.2462, "step": 6470 }, { "epoch": 0.43993749150699824, "grad_norm": 2.1426942348480225, "learning_rate": 9.450417855686915e-05, "loss": 3.7187, "step": 6475 }, { "epoch": 0.44027721157766003, "grad_norm": 2.0731704235076904, "learning_rate": 9.449993205598588e-05, "loss": 3.6408, "step": 6480 }, { "epoch": 0.4406169316483218, "grad_norm": 2.120217800140381, "learning_rate": 9.44956855551026e-05, "loss": 3.355, "step": 6485 }, { "epoch": 0.44095665171898357, "grad_norm": 2.223435640335083, "learning_rate": 9.449143905421933e-05, "loss": 3.6731, "step": 6490 }, { "epoch": 0.4412963717896453, "grad_norm": 2.6069273948669434, "learning_rate": 9.448719255333606e-05, "loss": 3.4692, "step": 6495 }, { "epoch": 0.4416360918603071, "grad_norm": 2.720393657684326, "learning_rate": 9.448294605245277e-05, "loss": 3.6989, "step": 6500 }, { "epoch": 0.4419758119309689, "grad_norm": 2.779848575592041, "learning_rate": 9.447869955156952e-05, "loss": 3.2684, "step": 6505 }, { "epoch": 0.44231553200163065, "grad_norm": 2.641580104827881, "learning_rate": 9.447445305068624e-05, "loss": 3.6752, "step": 6510 }, { "epoch": 0.44265525207229245, "grad_norm": 2.4761297702789307, "learning_rate": 9.447020654980296e-05, "loss": 3.4422, "step": 6515 }, { "epoch": 0.4429949721429542, "grad_norm": 2.3604958057403564, "learning_rate": 9.44659600489197e-05, "loss": 3.2987, "step": 6520 }, { "epoch": 0.443334692213616, "grad_norm": 13.032170295715332, "learning_rate": 9.446171354803643e-05, "loss": 3.5987, "step": 6525 }, { "epoch": 0.44367441228427773, "grad_norm": 2.362748146057129, "learning_rate": 9.445746704715314e-05, "loss": 3.4385, "step": 6530 }, { "epoch": 0.44401413235493953, "grad_norm": 2.326183795928955, "learning_rate": 9.445322054626988e-05, "loss": 3.2894, "step": 6535 }, { "epoch": 0.4443538524256013, "grad_norm": 2.1152613162994385, "learning_rate": 9.444897404538661e-05, "loss": 3.404, "step": 6540 }, { "epoch": 0.44469357249626307, "grad_norm": 1.8823668956756592, "learning_rate": 9.444472754450333e-05, "loss": 3.6945, "step": 6545 }, { "epoch": 0.44503329256692487, "grad_norm": 2.385741710662842, "learning_rate": 9.444048104362007e-05, "loss": 3.1683, "step": 6550 }, { "epoch": 0.4453730126375866, "grad_norm": 2.7184717655181885, "learning_rate": 9.44362345427368e-05, "loss": 3.5738, "step": 6555 }, { "epoch": 0.4457127327082484, "grad_norm": 1.9414417743682861, "learning_rate": 9.443198804185351e-05, "loss": 3.7462, "step": 6560 }, { "epoch": 0.4460524527789102, "grad_norm": 1.9968997240066528, "learning_rate": 9.442774154097025e-05, "loss": 3.2228, "step": 6565 }, { "epoch": 0.44639217284957194, "grad_norm": 2.190063953399658, "learning_rate": 9.442349504008697e-05, "loss": 3.269, "step": 6570 }, { "epoch": 0.44673189292023374, "grad_norm": 2.6371009349823, "learning_rate": 9.44192485392037e-05, "loss": 3.6551, "step": 6575 }, { "epoch": 0.4470716129908955, "grad_norm": 2.2833216190338135, "learning_rate": 9.441500203832044e-05, "loss": 3.3992, "step": 6580 }, { "epoch": 0.4474113330615573, "grad_norm": 2.629237174987793, "learning_rate": 9.441075553743715e-05, "loss": 3.4378, "step": 6585 }, { "epoch": 0.4477510531322191, "grad_norm": 1.9535990953445435, "learning_rate": 9.440650903655388e-05, "loss": 3.3535, "step": 6590 }, { "epoch": 0.4480907732028808, "grad_norm": 1.9888969659805298, "learning_rate": 9.440226253567062e-05, "loss": 3.4303, "step": 6595 }, { "epoch": 0.4484304932735426, "grad_norm": 2.4825336933135986, "learning_rate": 9.439801603478734e-05, "loss": 3.6693, "step": 6600 }, { "epoch": 0.44877021334420436, "grad_norm": 1.7589823007583618, "learning_rate": 9.439376953390406e-05, "loss": 3.404, "step": 6605 }, { "epoch": 0.44910993341486616, "grad_norm": 2.042783498764038, "learning_rate": 9.43895230330208e-05, "loss": 3.434, "step": 6610 }, { "epoch": 0.4494496534855279, "grad_norm": 1.9554245471954346, "learning_rate": 9.438527653213752e-05, "loss": 3.6922, "step": 6615 }, { "epoch": 0.4497893735561897, "grad_norm": 1.8562333583831787, "learning_rate": 9.438103003125425e-05, "loss": 3.4767, "step": 6620 }, { "epoch": 0.4501290936268515, "grad_norm": 2.6993446350097656, "learning_rate": 9.437678353037099e-05, "loss": 3.5627, "step": 6625 }, { "epoch": 0.45046881369751324, "grad_norm": 2.9945762157440186, "learning_rate": 9.43725370294877e-05, "loss": 3.6948, "step": 6630 }, { "epoch": 0.45080853376817503, "grad_norm": 2.996058702468872, "learning_rate": 9.436829052860443e-05, "loss": 3.5105, "step": 6635 }, { "epoch": 0.4511482538388368, "grad_norm": 2.4074368476867676, "learning_rate": 9.436404402772116e-05, "loss": 3.5571, "step": 6640 }, { "epoch": 0.4514879739094986, "grad_norm": 2.7382924556732178, "learning_rate": 9.435979752683789e-05, "loss": 3.2168, "step": 6645 }, { "epoch": 0.45182769398016037, "grad_norm": 2.3451027870178223, "learning_rate": 9.435555102595462e-05, "loss": 3.6281, "step": 6650 }, { "epoch": 0.4521674140508221, "grad_norm": 2.5332274436950684, "learning_rate": 9.435130452507134e-05, "loss": 3.3889, "step": 6655 }, { "epoch": 0.4525071341214839, "grad_norm": 2.3876821994781494, "learning_rate": 9.434705802418807e-05, "loss": 3.3261, "step": 6660 }, { "epoch": 0.45284685419214565, "grad_norm": 2.107971429824829, "learning_rate": 9.43428115233048e-05, "loss": 3.2646, "step": 6665 }, { "epoch": 0.45318657426280745, "grad_norm": 3.0384490489959717, "learning_rate": 9.433856502242153e-05, "loss": 3.6383, "step": 6670 }, { "epoch": 0.45352629433346925, "grad_norm": 2.549604654312134, "learning_rate": 9.433431852153826e-05, "loss": 3.7763, "step": 6675 }, { "epoch": 0.453866014404131, "grad_norm": 2.0201973915100098, "learning_rate": 9.433007202065498e-05, "loss": 3.5223, "step": 6680 }, { "epoch": 0.4542057344747928, "grad_norm": 2.8859968185424805, "learning_rate": 9.432582551977171e-05, "loss": 3.3874, "step": 6685 }, { "epoch": 0.45454545454545453, "grad_norm": 2.010023593902588, "learning_rate": 9.432157901888844e-05, "loss": 3.3481, "step": 6690 }, { "epoch": 0.45488517461611633, "grad_norm": 2.603708028793335, "learning_rate": 9.431733251800517e-05, "loss": 3.5631, "step": 6695 }, { "epoch": 0.45522489468677807, "grad_norm": 2.7841217517852783, "learning_rate": 9.43130860171219e-05, "loss": 3.2159, "step": 6700 }, { "epoch": 0.45556461475743987, "grad_norm": 3.0173606872558594, "learning_rate": 9.430883951623862e-05, "loss": 3.3789, "step": 6705 }, { "epoch": 0.45590433482810166, "grad_norm": 2.4205286502838135, "learning_rate": 9.430459301535535e-05, "loss": 3.543, "step": 6710 }, { "epoch": 0.4562440548987634, "grad_norm": 2.243870735168457, "learning_rate": 9.430034651447208e-05, "loss": 3.5436, "step": 6715 }, { "epoch": 0.4565837749694252, "grad_norm": 2.2129411697387695, "learning_rate": 9.429610001358881e-05, "loss": 3.4442, "step": 6720 }, { "epoch": 0.45692349504008695, "grad_norm": 2.4223175048828125, "learning_rate": 9.429185351270554e-05, "loss": 3.3737, "step": 6725 }, { "epoch": 0.45726321511074874, "grad_norm": 1.9786146879196167, "learning_rate": 9.428760701182226e-05, "loss": 3.5645, "step": 6730 }, { "epoch": 0.45760293518141054, "grad_norm": 2.7910783290863037, "learning_rate": 9.428336051093899e-05, "loss": 3.487, "step": 6735 }, { "epoch": 0.4579426552520723, "grad_norm": 2.45232892036438, "learning_rate": 9.427911401005572e-05, "loss": 3.5406, "step": 6740 }, { "epoch": 0.4582823753227341, "grad_norm": 2.456681251525879, "learning_rate": 9.427486750917245e-05, "loss": 3.475, "step": 6745 }, { "epoch": 0.4586220953933958, "grad_norm": 1.9153252840042114, "learning_rate": 9.427062100828918e-05, "loss": 3.5629, "step": 6750 }, { "epoch": 0.4589618154640576, "grad_norm": 2.0055551528930664, "learning_rate": 9.42663745074059e-05, "loss": 3.5447, "step": 6755 }, { "epoch": 0.4593015355347194, "grad_norm": 2.0676677227020264, "learning_rate": 9.426212800652263e-05, "loss": 3.2286, "step": 6760 }, { "epoch": 0.45964125560538116, "grad_norm": 2.464555501937866, "learning_rate": 9.425788150563936e-05, "loss": 3.5286, "step": 6765 }, { "epoch": 0.45998097567604296, "grad_norm": 2.44195294380188, "learning_rate": 9.425363500475607e-05, "loss": 3.5644, "step": 6770 }, { "epoch": 0.4603206957467047, "grad_norm": 2.6626532077789307, "learning_rate": 9.424938850387282e-05, "loss": 3.5582, "step": 6775 }, { "epoch": 0.4606604158173665, "grad_norm": 2.4734203815460205, "learning_rate": 9.424514200298954e-05, "loss": 3.6252, "step": 6780 }, { "epoch": 0.46100013588802824, "grad_norm": 2.028855085372925, "learning_rate": 9.424089550210627e-05, "loss": 3.5049, "step": 6785 }, { "epoch": 0.46133985595869004, "grad_norm": 2.358114004135132, "learning_rate": 9.4236649001223e-05, "loss": 3.4021, "step": 6790 }, { "epoch": 0.46167957602935183, "grad_norm": 2.8885111808776855, "learning_rate": 9.423240250033973e-05, "loss": 3.3187, "step": 6795 }, { "epoch": 0.4620192961000136, "grad_norm": 2.25164794921875, "learning_rate": 9.422815599945646e-05, "loss": 3.4916, "step": 6800 }, { "epoch": 0.4623590161706754, "grad_norm": 3.0029280185699463, "learning_rate": 9.422390949857318e-05, "loss": 3.3904, "step": 6805 }, { "epoch": 0.4626987362413371, "grad_norm": 5.231080055236816, "learning_rate": 9.421966299768991e-05, "loss": 3.3713, "step": 6810 }, { "epoch": 0.4630384563119989, "grad_norm": 2.0717029571533203, "learning_rate": 9.421541649680664e-05, "loss": 3.1369, "step": 6815 }, { "epoch": 0.4633781763826607, "grad_norm": 2.319355010986328, "learning_rate": 9.421116999592337e-05, "loss": 3.6872, "step": 6820 }, { "epoch": 0.46371789645332245, "grad_norm": 2.5064797401428223, "learning_rate": 9.42069234950401e-05, "loss": 3.5946, "step": 6825 }, { "epoch": 0.46405761652398425, "grad_norm": 2.3484818935394287, "learning_rate": 9.420267699415682e-05, "loss": 3.414, "step": 6830 }, { "epoch": 0.464397336594646, "grad_norm": 2.2609245777130127, "learning_rate": 9.419843049327355e-05, "loss": 3.3876, "step": 6835 }, { "epoch": 0.4647370566653078, "grad_norm": 1.8349295854568481, "learning_rate": 9.419418399239027e-05, "loss": 3.6525, "step": 6840 }, { "epoch": 0.4650767767359696, "grad_norm": 1.8431589603424072, "learning_rate": 9.418993749150701e-05, "loss": 3.3925, "step": 6845 }, { "epoch": 0.46541649680663133, "grad_norm": 2.482482671737671, "learning_rate": 9.418569099062374e-05, "loss": 3.6049, "step": 6850 }, { "epoch": 0.4657562168772931, "grad_norm": 2.1367428302764893, "learning_rate": 9.418144448974045e-05, "loss": 3.4989, "step": 6855 }, { "epoch": 0.46609593694795487, "grad_norm": 2.3413004875183105, "learning_rate": 9.417719798885719e-05, "loss": 3.6076, "step": 6860 }, { "epoch": 0.46643565701861667, "grad_norm": 1.8435660600662231, "learning_rate": 9.417295148797392e-05, "loss": 3.3502, "step": 6865 }, { "epoch": 0.4667753770892784, "grad_norm": 2.126728057861328, "learning_rate": 9.416870498709063e-05, "loss": 3.4011, "step": 6870 }, { "epoch": 0.4671150971599402, "grad_norm": 2.902451992034912, "learning_rate": 9.416445848620738e-05, "loss": 3.2404, "step": 6875 }, { "epoch": 0.467454817230602, "grad_norm": 2.37249493598938, "learning_rate": 9.41602119853241e-05, "loss": 3.153, "step": 6880 }, { "epoch": 0.46779453730126375, "grad_norm": 2.160510778427124, "learning_rate": 9.415596548444082e-05, "loss": 3.4056, "step": 6885 }, { "epoch": 0.46813425737192554, "grad_norm": 1.893813133239746, "learning_rate": 9.415171898355756e-05, "loss": 3.4616, "step": 6890 }, { "epoch": 0.4684739774425873, "grad_norm": 2.4358766078948975, "learning_rate": 9.414747248267429e-05, "loss": 3.5515, "step": 6895 }, { "epoch": 0.4688136975132491, "grad_norm": 2.7551798820495605, "learning_rate": 9.4143225981791e-05, "loss": 3.5848, "step": 6900 }, { "epoch": 0.4691534175839109, "grad_norm": 2.432792901992798, "learning_rate": 9.413897948090774e-05, "loss": 3.5817, "step": 6905 }, { "epoch": 0.4694931376545726, "grad_norm": 2.665708541870117, "learning_rate": 9.413473298002447e-05, "loss": 3.5486, "step": 6910 }, { "epoch": 0.4698328577252344, "grad_norm": 2.0607924461364746, "learning_rate": 9.413048647914119e-05, "loss": 3.5216, "step": 6915 }, { "epoch": 0.47017257779589616, "grad_norm": 2.703848123550415, "learning_rate": 9.412623997825793e-05, "loss": 3.2601, "step": 6920 }, { "epoch": 0.47051229786655796, "grad_norm": 1.9270505905151367, "learning_rate": 9.412199347737464e-05, "loss": 3.4523, "step": 6925 }, { "epoch": 0.47085201793721976, "grad_norm": 2.961392641067505, "learning_rate": 9.411774697649137e-05, "loss": 3.5116, "step": 6930 }, { "epoch": 0.4711917380078815, "grad_norm": 2.5065722465515137, "learning_rate": 9.411350047560811e-05, "loss": 3.4238, "step": 6935 }, { "epoch": 0.4715314580785433, "grad_norm": 2.1807680130004883, "learning_rate": 9.410925397472483e-05, "loss": 3.392, "step": 6940 }, { "epoch": 0.47187117814920504, "grad_norm": 2.7778799533843994, "learning_rate": 9.410500747384155e-05, "loss": 3.6427, "step": 6945 }, { "epoch": 0.47221089821986684, "grad_norm": 2.6303834915161133, "learning_rate": 9.41007609729583e-05, "loss": 3.3448, "step": 6950 }, { "epoch": 0.4725506182905286, "grad_norm": 2.7593629360198975, "learning_rate": 9.409651447207501e-05, "loss": 3.7642, "step": 6955 }, { "epoch": 0.4728903383611904, "grad_norm": 3.0659103393554688, "learning_rate": 9.409226797119174e-05, "loss": 3.6468, "step": 6960 }, { "epoch": 0.4732300584318522, "grad_norm": 2.652475595474243, "learning_rate": 9.408802147030848e-05, "loss": 3.5978, "step": 6965 }, { "epoch": 0.4735697785025139, "grad_norm": 2.0086569786071777, "learning_rate": 9.40837749694252e-05, "loss": 3.2978, "step": 6970 }, { "epoch": 0.4739094985731757, "grad_norm": 2.729933261871338, "learning_rate": 9.407952846854192e-05, "loss": 3.5512, "step": 6975 }, { "epoch": 0.47424921864383746, "grad_norm": 1.6890413761138916, "learning_rate": 9.407528196765866e-05, "loss": 3.6644, "step": 6980 }, { "epoch": 0.47458893871449925, "grad_norm": 1.9818360805511475, "learning_rate": 9.407103546677538e-05, "loss": 3.3539, "step": 6985 }, { "epoch": 0.47492865878516105, "grad_norm": 2.3442163467407227, "learning_rate": 9.40667889658921e-05, "loss": 3.4523, "step": 6990 }, { "epoch": 0.4752683788558228, "grad_norm": 2.061002492904663, "learning_rate": 9.406254246500883e-05, "loss": 3.0931, "step": 6995 }, { "epoch": 0.4756080989264846, "grad_norm": 8.470160484313965, "learning_rate": 9.405829596412556e-05, "loss": 3.1827, "step": 7000 }, { "epoch": 0.47594781899714633, "grad_norm": 2.8169310092926025, "learning_rate": 9.405404946324229e-05, "loss": 3.8022, "step": 7005 }, { "epoch": 0.47628753906780813, "grad_norm": 2.561768054962158, "learning_rate": 9.404980296235902e-05, "loss": 3.6038, "step": 7010 }, { "epoch": 0.4766272591384699, "grad_norm": 2.4986679553985596, "learning_rate": 9.404555646147575e-05, "loss": 3.4651, "step": 7015 }, { "epoch": 0.47696697920913167, "grad_norm": 2.27316951751709, "learning_rate": 9.404130996059247e-05, "loss": 3.101, "step": 7020 }, { "epoch": 0.47730669927979347, "grad_norm": 2.2828121185302734, "learning_rate": 9.40370634597092e-05, "loss": 3.5669, "step": 7025 }, { "epoch": 0.4776464193504552, "grad_norm": 2.379077434539795, "learning_rate": 9.403281695882593e-05, "loss": 3.4817, "step": 7030 }, { "epoch": 0.477986139421117, "grad_norm": 2.2172257900238037, "learning_rate": 9.402857045794266e-05, "loss": 3.2743, "step": 7035 }, { "epoch": 0.47832585949177875, "grad_norm": 2.4453556537628174, "learning_rate": 9.402432395705939e-05, "loss": 3.383, "step": 7040 }, { "epoch": 0.47866557956244055, "grad_norm": 2.9633665084838867, "learning_rate": 9.402007745617611e-05, "loss": 3.5467, "step": 7045 }, { "epoch": 0.47900529963310234, "grad_norm": 2.0435760021209717, "learning_rate": 9.401583095529284e-05, "loss": 3.4691, "step": 7050 }, { "epoch": 0.4793450197037641, "grad_norm": 2.785783290863037, "learning_rate": 9.401158445440957e-05, "loss": 3.6137, "step": 7055 }, { "epoch": 0.4796847397744259, "grad_norm": 2.068085193634033, "learning_rate": 9.40073379535263e-05, "loss": 3.5638, "step": 7060 }, { "epoch": 0.4800244598450876, "grad_norm": 2.291339159011841, "learning_rate": 9.400309145264303e-05, "loss": 3.4057, "step": 7065 }, { "epoch": 0.4803641799157494, "grad_norm": 2.7503013610839844, "learning_rate": 9.399884495175975e-05, "loss": 3.3003, "step": 7070 }, { "epoch": 0.4807038999864112, "grad_norm": 1.7548198699951172, "learning_rate": 9.399459845087648e-05, "loss": 3.6192, "step": 7075 }, { "epoch": 0.48104362005707296, "grad_norm": 1.8460267782211304, "learning_rate": 9.399035194999321e-05, "loss": 3.6293, "step": 7080 }, { "epoch": 0.48138334012773476, "grad_norm": 2.46345591545105, "learning_rate": 9.398610544910994e-05, "loss": 3.6347, "step": 7085 }, { "epoch": 0.4817230601983965, "grad_norm": 2.366938829421997, "learning_rate": 9.398185894822667e-05, "loss": 3.4579, "step": 7090 }, { "epoch": 0.4820627802690583, "grad_norm": 2.4367525577545166, "learning_rate": 9.39776124473434e-05, "loss": 3.5454, "step": 7095 }, { "epoch": 0.4824025003397201, "grad_norm": 1.74684476852417, "learning_rate": 9.397336594646012e-05, "loss": 3.416, "step": 7100 }, { "epoch": 0.48274222041038184, "grad_norm": 2.6481809616088867, "learning_rate": 9.396911944557685e-05, "loss": 3.452, "step": 7105 }, { "epoch": 0.48308194048104364, "grad_norm": 2.301795244216919, "learning_rate": 9.396487294469358e-05, "loss": 3.6525, "step": 7110 }, { "epoch": 0.4834216605517054, "grad_norm": 2.501932144165039, "learning_rate": 9.39606264438103e-05, "loss": 3.3493, "step": 7115 }, { "epoch": 0.4837613806223672, "grad_norm": 2.062826633453369, "learning_rate": 9.395637994292703e-05, "loss": 3.4704, "step": 7120 }, { "epoch": 0.4841011006930289, "grad_norm": 2.595562696456909, "learning_rate": 9.395213344204376e-05, "loss": 3.3932, "step": 7125 }, { "epoch": 0.4844408207636907, "grad_norm": 2.828798294067383, "learning_rate": 9.394788694116049e-05, "loss": 3.3996, "step": 7130 }, { "epoch": 0.4847805408343525, "grad_norm": 2.470886468887329, "learning_rate": 9.394364044027722e-05, "loss": 3.3655, "step": 7135 }, { "epoch": 0.48512026090501426, "grad_norm": 2.382993221282959, "learning_rate": 9.393939393939395e-05, "loss": 3.6529, "step": 7140 }, { "epoch": 0.48545998097567605, "grad_norm": 2.754181146621704, "learning_rate": 9.393514743851067e-05, "loss": 3.6594, "step": 7145 }, { "epoch": 0.4857997010463378, "grad_norm": 1.9599021673202515, "learning_rate": 9.39309009376274e-05, "loss": 3.5488, "step": 7150 }, { "epoch": 0.4861394211169996, "grad_norm": 2.0592167377471924, "learning_rate": 9.392665443674413e-05, "loss": 3.5513, "step": 7155 }, { "epoch": 0.4864791411876614, "grad_norm": 2.4830880165100098, "learning_rate": 9.392240793586086e-05, "loss": 3.5027, "step": 7160 }, { "epoch": 0.48681886125832313, "grad_norm": 2.4404654502868652, "learning_rate": 9.391816143497759e-05, "loss": 3.4388, "step": 7165 }, { "epoch": 0.48715858132898493, "grad_norm": 2.4427778720855713, "learning_rate": 9.391391493409431e-05, "loss": 3.0905, "step": 7170 }, { "epoch": 0.48749830139964667, "grad_norm": 2.395155429840088, "learning_rate": 9.390966843321104e-05, "loss": 3.5831, "step": 7175 }, { "epoch": 0.48783802147030847, "grad_norm": 2.1079752445220947, "learning_rate": 9.390542193232777e-05, "loss": 3.5322, "step": 7180 }, { "epoch": 0.48817774154097027, "grad_norm": 2.6965034008026123, "learning_rate": 9.39011754314445e-05, "loss": 3.4407, "step": 7185 }, { "epoch": 0.488517461611632, "grad_norm": 1.781265139579773, "learning_rate": 9.389692893056123e-05, "loss": 3.522, "step": 7190 }, { "epoch": 0.4888571816822938, "grad_norm": 2.130610466003418, "learning_rate": 9.389268242967794e-05, "loss": 3.6397, "step": 7195 }, { "epoch": 0.48919690175295555, "grad_norm": 2.013097047805786, "learning_rate": 9.388843592879468e-05, "loss": 3.2335, "step": 7200 }, { "epoch": 0.48953662182361735, "grad_norm": 2.235908269882202, "learning_rate": 9.388418942791141e-05, "loss": 3.4402, "step": 7205 }, { "epoch": 0.4898763418942791, "grad_norm": 2.2331745624542236, "learning_rate": 9.387994292702812e-05, "loss": 3.659, "step": 7210 }, { "epoch": 0.4902160619649409, "grad_norm": 1.9735581874847412, "learning_rate": 9.387569642614487e-05, "loss": 3.5952, "step": 7215 }, { "epoch": 0.4905557820356027, "grad_norm": 2.35489821434021, "learning_rate": 9.38714499252616e-05, "loss": 3.4633, "step": 7220 }, { "epoch": 0.4908955021062644, "grad_norm": 2.8531453609466553, "learning_rate": 9.386720342437831e-05, "loss": 3.6025, "step": 7225 }, { "epoch": 0.4912352221769262, "grad_norm": 2.5958797931671143, "learning_rate": 9.386295692349505e-05, "loss": 3.223, "step": 7230 }, { "epoch": 0.49157494224758796, "grad_norm": 2.380370855331421, "learning_rate": 9.385871042261178e-05, "loss": 3.4611, "step": 7235 }, { "epoch": 0.49191466231824976, "grad_norm": 3.0313644409179688, "learning_rate": 9.385446392172849e-05, "loss": 3.5145, "step": 7240 }, { "epoch": 0.49225438238891156, "grad_norm": 2.2284817695617676, "learning_rate": 9.385021742084523e-05, "loss": 3.6086, "step": 7245 }, { "epoch": 0.4925941024595733, "grad_norm": 2.185957193374634, "learning_rate": 9.384597091996196e-05, "loss": 3.6982, "step": 7250 }, { "epoch": 0.4929338225302351, "grad_norm": 2.016425132751465, "learning_rate": 9.384172441907868e-05, "loss": 3.0776, "step": 7255 }, { "epoch": 0.49327354260089684, "grad_norm": 2.518101215362549, "learning_rate": 9.383747791819542e-05, "loss": 3.5951, "step": 7260 }, { "epoch": 0.49361326267155864, "grad_norm": 2.331812858581543, "learning_rate": 9.383323141731213e-05, "loss": 3.4408, "step": 7265 }, { "epoch": 0.49395298274222044, "grad_norm": 2.329537868499756, "learning_rate": 9.382898491642886e-05, "loss": 3.4495, "step": 7270 }, { "epoch": 0.4942927028128822, "grad_norm": 2.441767454147339, "learning_rate": 9.38247384155456e-05, "loss": 3.4213, "step": 7275 }, { "epoch": 0.494632422883544, "grad_norm": 2.638486623764038, "learning_rate": 9.382049191466232e-05, "loss": 3.522, "step": 7280 }, { "epoch": 0.4949721429542057, "grad_norm": 2.0256617069244385, "learning_rate": 9.381624541377905e-05, "loss": 3.6722, "step": 7285 }, { "epoch": 0.4953118630248675, "grad_norm": 2.3431129455566406, "learning_rate": 9.381199891289579e-05, "loss": 3.2951, "step": 7290 }, { "epoch": 0.49565158309552926, "grad_norm": 2.6712446212768555, "learning_rate": 9.38077524120125e-05, "loss": 3.3398, "step": 7295 }, { "epoch": 0.49599130316619106, "grad_norm": 1.864131212234497, "learning_rate": 9.380350591112923e-05, "loss": 3.4863, "step": 7300 }, { "epoch": 0.49633102323685285, "grad_norm": 2.352243661880493, "learning_rate": 9.379925941024597e-05, "loss": 3.3791, "step": 7305 }, { "epoch": 0.4966707433075146, "grad_norm": 2.14172625541687, "learning_rate": 9.379501290936269e-05, "loss": 3.4757, "step": 7310 }, { "epoch": 0.4970104633781764, "grad_norm": 2.0858139991760254, "learning_rate": 9.379076640847941e-05, "loss": 3.4032, "step": 7315 }, { "epoch": 0.49735018344883813, "grad_norm": 2.608372688293457, "learning_rate": 9.378651990759615e-05, "loss": 3.6841, "step": 7320 }, { "epoch": 0.49768990351949993, "grad_norm": 2.271538257598877, "learning_rate": 9.378227340671287e-05, "loss": 3.4201, "step": 7325 }, { "epoch": 0.49802962359016173, "grad_norm": 1.973562479019165, "learning_rate": 9.37780269058296e-05, "loss": 3.332, "step": 7330 }, { "epoch": 0.49836934366082347, "grad_norm": 2.0143280029296875, "learning_rate": 9.377378040494634e-05, "loss": 3.5688, "step": 7335 }, { "epoch": 0.49870906373148527, "grad_norm": 2.195525646209717, "learning_rate": 9.376953390406305e-05, "loss": 3.644, "step": 7340 }, { "epoch": 0.499048783802147, "grad_norm": 2.025935649871826, "learning_rate": 9.376528740317978e-05, "loss": 3.491, "step": 7345 }, { "epoch": 0.4993885038728088, "grad_norm": 1.9479522705078125, "learning_rate": 9.376104090229651e-05, "loss": 3.1799, "step": 7350 }, { "epoch": 0.4997282239434706, "grad_norm": 1.8824591636657715, "learning_rate": 9.375679440141324e-05, "loss": 3.4035, "step": 7355 }, { "epoch": 0.5000679440141323, "grad_norm": 2.545675277709961, "learning_rate": 9.375254790052997e-05, "loss": 3.6221, "step": 7360 }, { "epoch": 0.5004076640847941, "grad_norm": 2.1282083988189697, "learning_rate": 9.374830139964669e-05, "loss": 3.5629, "step": 7365 }, { "epoch": 0.5007473841554559, "grad_norm": 2.6184799671173096, "learning_rate": 9.374405489876342e-05, "loss": 3.7157, "step": 7370 }, { "epoch": 0.5010871042261177, "grad_norm": 2.4793124198913574, "learning_rate": 9.373980839788015e-05, "loss": 3.5615, "step": 7375 }, { "epoch": 0.5014268242967794, "grad_norm": 2.208740234375, "learning_rate": 9.373556189699688e-05, "loss": 3.7752, "step": 7380 }, { "epoch": 0.5017665443674413, "grad_norm": 1.948194980621338, "learning_rate": 9.37313153961136e-05, "loss": 3.7429, "step": 7385 }, { "epoch": 0.502106264438103, "grad_norm": 1.986470103263855, "learning_rate": 9.372706889523033e-05, "loss": 3.2519, "step": 7390 }, { "epoch": 0.5024459845087648, "grad_norm": 2.75628924369812, "learning_rate": 9.372282239434706e-05, "loss": 3.4789, "step": 7395 }, { "epoch": 0.5027857045794265, "grad_norm": 2.468574047088623, "learning_rate": 9.371857589346379e-05, "loss": 3.5915, "step": 7400 }, { "epoch": 0.5031254246500884, "grad_norm": 2.94883394241333, "learning_rate": 9.371432939258052e-05, "loss": 3.3152, "step": 7405 }, { "epoch": 0.5034651447207501, "grad_norm": 2.3007545471191406, "learning_rate": 9.371008289169725e-05, "loss": 3.4305, "step": 7410 }, { "epoch": 0.5038048647914118, "grad_norm": 2.000077962875366, "learning_rate": 9.370583639081397e-05, "loss": 3.4782, "step": 7415 }, { "epoch": 0.5041445848620737, "grad_norm": 2.7133595943450928, "learning_rate": 9.37015898899307e-05, "loss": 3.5937, "step": 7420 }, { "epoch": 0.5044843049327354, "grad_norm": 2.718013286590576, "learning_rate": 9.369734338904743e-05, "loss": 3.2927, "step": 7425 }, { "epoch": 0.5048240250033972, "grad_norm": 2.0150578022003174, "learning_rate": 9.369309688816416e-05, "loss": 3.4968, "step": 7430 }, { "epoch": 0.5051637450740589, "grad_norm": 1.6512356996536255, "learning_rate": 9.368885038728089e-05, "loss": 3.5757, "step": 7435 }, { "epoch": 0.5055034651447208, "grad_norm": 2.2183570861816406, "learning_rate": 9.368460388639761e-05, "loss": 3.3624, "step": 7440 }, { "epoch": 0.5058431852153825, "grad_norm": 2.0382187366485596, "learning_rate": 9.368035738551434e-05, "loss": 3.371, "step": 7445 }, { "epoch": 0.5061829052860443, "grad_norm": 1.9518389701843262, "learning_rate": 9.367611088463107e-05, "loss": 3.3832, "step": 7450 }, { "epoch": 0.5065226253567061, "grad_norm": 3.202738046646118, "learning_rate": 9.36718643837478e-05, "loss": 3.3198, "step": 7455 }, { "epoch": 0.5068623454273679, "grad_norm": 2.3529140949249268, "learning_rate": 9.366761788286453e-05, "loss": 3.5379, "step": 7460 }, { "epoch": 0.5072020654980296, "grad_norm": 2.8575384616851807, "learning_rate": 9.366337138198124e-05, "loss": 3.5806, "step": 7465 }, { "epoch": 0.5075417855686915, "grad_norm": 2.269850730895996, "learning_rate": 9.365912488109798e-05, "loss": 3.1829, "step": 7470 }, { "epoch": 0.5078815056393532, "grad_norm": 2.088341474533081, "learning_rate": 9.365487838021471e-05, "loss": 3.688, "step": 7475 }, { "epoch": 0.5082212257100149, "grad_norm": 2.2586138248443604, "learning_rate": 9.365063187933144e-05, "loss": 3.1449, "step": 7480 }, { "epoch": 0.5085609457806767, "grad_norm": 2.2704641819000244, "learning_rate": 9.364638537844817e-05, "loss": 3.8194, "step": 7485 }, { "epoch": 0.5089006658513385, "grad_norm": 2.153308391571045, "learning_rate": 9.36421388775649e-05, "loss": 3.2544, "step": 7490 }, { "epoch": 0.5092403859220003, "grad_norm": 2.500732421875, "learning_rate": 9.363789237668162e-05, "loss": 3.6002, "step": 7495 }, { "epoch": 0.509580105992662, "grad_norm": 2.318199872970581, "learning_rate": 9.363364587579835e-05, "loss": 3.5671, "step": 7500 }, { "epoch": 0.5099198260633239, "grad_norm": 1.8818392753601074, "learning_rate": 9.362939937491508e-05, "loss": 3.5941, "step": 7505 }, { "epoch": 0.5102595461339856, "grad_norm": 2.1657497882843018, "learning_rate": 9.36251528740318e-05, "loss": 3.7112, "step": 7510 }, { "epoch": 0.5105992662046474, "grad_norm": 2.491417169570923, "learning_rate": 9.362090637314853e-05, "loss": 3.5348, "step": 7515 }, { "epoch": 0.5109389862753091, "grad_norm": 2.4612016677856445, "learning_rate": 9.361665987226526e-05, "loss": 3.5846, "step": 7520 }, { "epoch": 0.511278706345971, "grad_norm": 2.693815231323242, "learning_rate": 9.361241337138199e-05, "loss": 3.6232, "step": 7525 }, { "epoch": 0.5116184264166327, "grad_norm": 2.876926898956299, "learning_rate": 9.360816687049872e-05, "loss": 3.5763, "step": 7530 }, { "epoch": 0.5119581464872944, "grad_norm": 3.4614970684051514, "learning_rate": 9.360392036961545e-05, "loss": 3.4691, "step": 7535 }, { "epoch": 0.5122978665579563, "grad_norm": 1.8120405673980713, "learning_rate": 9.359967386873217e-05, "loss": 3.6407, "step": 7540 }, { "epoch": 0.512637586628618, "grad_norm": 2.506169319152832, "learning_rate": 9.35954273678489e-05, "loss": 3.3106, "step": 7545 }, { "epoch": 0.5129773066992798, "grad_norm": 1.9559695720672607, "learning_rate": 9.359118086696562e-05, "loss": 3.3331, "step": 7550 }, { "epoch": 0.5133170267699416, "grad_norm": 2.0006439685821533, "learning_rate": 9.358693436608236e-05, "loss": 3.5985, "step": 7555 }, { "epoch": 0.5136567468406034, "grad_norm": 2.290966510772705, "learning_rate": 9.358268786519909e-05, "loss": 3.6198, "step": 7560 }, { "epoch": 0.5139964669112651, "grad_norm": 2.4100987911224365, "learning_rate": 9.35784413643158e-05, "loss": 3.5429, "step": 7565 }, { "epoch": 0.5143361869819268, "grad_norm": 1.9473223686218262, "learning_rate": 9.357419486343254e-05, "loss": 3.5272, "step": 7570 }, { "epoch": 0.5146759070525887, "grad_norm": 2.3493199348449707, "learning_rate": 9.356994836254927e-05, "loss": 3.3414, "step": 7575 }, { "epoch": 0.5150156271232504, "grad_norm": 2.2954466342926025, "learning_rate": 9.356570186166598e-05, "loss": 3.589, "step": 7580 }, { "epoch": 0.5153553471939122, "grad_norm": 2.340156078338623, "learning_rate": 9.356145536078273e-05, "loss": 3.5733, "step": 7585 }, { "epoch": 0.515695067264574, "grad_norm": 15.759178161621094, "learning_rate": 9.355720885989945e-05, "loss": 3.4173, "step": 7590 }, { "epoch": 0.5160347873352358, "grad_norm": 2.4459228515625, "learning_rate": 9.355296235901617e-05, "loss": 3.4002, "step": 7595 }, { "epoch": 0.5163745074058975, "grad_norm": 1.8496448993682861, "learning_rate": 9.354871585813291e-05, "loss": 3.4946, "step": 7600 }, { "epoch": 0.5167142274765593, "grad_norm": 1.9563500881195068, "learning_rate": 9.354446935724964e-05, "loss": 3.3107, "step": 7605 }, { "epoch": 0.5170539475472211, "grad_norm": 2.895695686340332, "learning_rate": 9.354022285636635e-05, "loss": 3.4064, "step": 7610 }, { "epoch": 0.5173936676178829, "grad_norm": 2.685279369354248, "learning_rate": 9.35359763554831e-05, "loss": 3.0983, "step": 7615 }, { "epoch": 0.5177333876885446, "grad_norm": 1.6636030673980713, "learning_rate": 9.353172985459981e-05, "loss": 3.6338, "step": 7620 }, { "epoch": 0.5180731077592065, "grad_norm": 2.0679140090942383, "learning_rate": 9.352748335371654e-05, "loss": 3.6541, "step": 7625 }, { "epoch": 0.5184128278298682, "grad_norm": 2.401170015335083, "learning_rate": 9.352323685283328e-05, "loss": 3.327, "step": 7630 }, { "epoch": 0.5187525479005299, "grad_norm": 2.8371365070343018, "learning_rate": 9.351899035194999e-05, "loss": 3.6023, "step": 7635 }, { "epoch": 0.5190922679711918, "grad_norm": 1.9553372859954834, "learning_rate": 9.351474385106672e-05, "loss": 3.6175, "step": 7640 }, { "epoch": 0.5194319880418535, "grad_norm": 2.2127702236175537, "learning_rate": 9.351049735018346e-05, "loss": 3.4014, "step": 7645 }, { "epoch": 0.5197717081125153, "grad_norm": 2.1941514015197754, "learning_rate": 9.350625084930018e-05, "loss": 3.5133, "step": 7650 }, { "epoch": 0.520111428183177, "grad_norm": 2.303549289703369, "learning_rate": 9.35020043484169e-05, "loss": 3.2347, "step": 7655 }, { "epoch": 0.5204511482538389, "grad_norm": 2.5060219764709473, "learning_rate": 9.349775784753365e-05, "loss": 3.4632, "step": 7660 }, { "epoch": 0.5207908683245006, "grad_norm": 2.187718391418457, "learning_rate": 9.349351134665036e-05, "loss": 3.2201, "step": 7665 }, { "epoch": 0.5211305883951624, "grad_norm": 2.0518343448638916, "learning_rate": 9.348926484576709e-05, "loss": 3.5035, "step": 7670 }, { "epoch": 0.5214703084658242, "grad_norm": 2.011011838912964, "learning_rate": 9.348501834488383e-05, "loss": 3.2746, "step": 7675 }, { "epoch": 0.521810028536486, "grad_norm": 1.9551658630371094, "learning_rate": 9.348077184400054e-05, "loss": 3.5228, "step": 7680 }, { "epoch": 0.5221497486071477, "grad_norm": 2.1620616912841797, "learning_rate": 9.347652534311727e-05, "loss": 3.4825, "step": 7685 }, { "epoch": 0.5224894686778094, "grad_norm": 1.9534777402877808, "learning_rate": 9.3472278842234e-05, "loss": 3.6559, "step": 7690 }, { "epoch": 0.5228291887484713, "grad_norm": 2.493875503540039, "learning_rate": 9.346803234135073e-05, "loss": 3.1751, "step": 7695 }, { "epoch": 0.523168908819133, "grad_norm": 1.9353262186050415, "learning_rate": 9.346378584046746e-05, "loss": 3.4514, "step": 7700 }, { "epoch": 0.5235086288897948, "grad_norm": 1.9636565446853638, "learning_rate": 9.345953933958418e-05, "loss": 3.7546, "step": 7705 }, { "epoch": 0.5238483489604566, "grad_norm": 1.9866634607315063, "learning_rate": 9.345529283870091e-05, "loss": 3.3293, "step": 7710 }, { "epoch": 0.5241880690311184, "grad_norm": 3.0144455432891846, "learning_rate": 9.345104633781764e-05, "loss": 3.4535, "step": 7715 }, { "epoch": 0.5245277891017801, "grad_norm": 2.969761371612549, "learning_rate": 9.344679983693437e-05, "loss": 3.4912, "step": 7720 }, { "epoch": 0.524867509172442, "grad_norm": 2.070857286453247, "learning_rate": 9.34425533360511e-05, "loss": 3.5424, "step": 7725 }, { "epoch": 0.5252072292431037, "grad_norm": 2.1081411838531494, "learning_rate": 9.343830683516782e-05, "loss": 3.4614, "step": 7730 }, { "epoch": 0.5255469493137654, "grad_norm": 1.9040887355804443, "learning_rate": 9.343406033428455e-05, "loss": 3.3716, "step": 7735 }, { "epoch": 0.5258866693844272, "grad_norm": 1.9735606908798218, "learning_rate": 9.342981383340128e-05, "loss": 3.4665, "step": 7740 }, { "epoch": 0.526226389455089, "grad_norm": 2.214897632598877, "learning_rate": 9.342556733251801e-05, "loss": 3.3892, "step": 7745 }, { "epoch": 0.5265661095257508, "grad_norm": 2.2447516918182373, "learning_rate": 9.342132083163474e-05, "loss": 3.748, "step": 7750 }, { "epoch": 0.5269058295964125, "grad_norm": 2.7474708557128906, "learning_rate": 9.341707433075146e-05, "loss": 3.6698, "step": 7755 }, { "epoch": 0.5272455496670744, "grad_norm": 2.5779128074645996, "learning_rate": 9.341282782986819e-05, "loss": 2.9593, "step": 7760 }, { "epoch": 0.5275852697377361, "grad_norm": 2.1730480194091797, "learning_rate": 9.340858132898492e-05, "loss": 3.6393, "step": 7765 }, { "epoch": 0.5279249898083979, "grad_norm": 3.3322739601135254, "learning_rate": 9.340433482810165e-05, "loss": 3.4001, "step": 7770 }, { "epoch": 0.5282647098790596, "grad_norm": 2.4361867904663086, "learning_rate": 9.340008832721838e-05, "loss": 3.7426, "step": 7775 }, { "epoch": 0.5286044299497215, "grad_norm": 2.4264729022979736, "learning_rate": 9.33958418263351e-05, "loss": 3.0694, "step": 7780 }, { "epoch": 0.5289441500203832, "grad_norm": 2.169437885284424, "learning_rate": 9.339159532545183e-05, "loss": 3.6901, "step": 7785 }, { "epoch": 0.5292838700910449, "grad_norm": 2.2769272327423096, "learning_rate": 9.338734882456856e-05, "loss": 3.6727, "step": 7790 }, { "epoch": 0.5296235901617068, "grad_norm": 3.0056302547454834, "learning_rate": 9.338310232368529e-05, "loss": 3.6638, "step": 7795 }, { "epoch": 0.5299633102323685, "grad_norm": 2.313436269760132, "learning_rate": 9.337885582280202e-05, "loss": 3.2055, "step": 7800 }, { "epoch": 0.5303030303030303, "grad_norm": 1.8805210590362549, "learning_rate": 9.337460932191874e-05, "loss": 3.355, "step": 7805 }, { "epoch": 0.5306427503736921, "grad_norm": 2.6432125568389893, "learning_rate": 9.337036282103547e-05, "loss": 3.4257, "step": 7810 }, { "epoch": 0.5309824704443539, "grad_norm": 2.71626877784729, "learning_rate": 9.33661163201522e-05, "loss": 3.5226, "step": 7815 }, { "epoch": 0.5313221905150156, "grad_norm": 1.9068526029586792, "learning_rate": 9.336186981926893e-05, "loss": 3.4854, "step": 7820 }, { "epoch": 0.5316619105856774, "grad_norm": 2.322669267654419, "learning_rate": 9.335762331838566e-05, "loss": 3.5672, "step": 7825 }, { "epoch": 0.5320016306563392, "grad_norm": 2.248173475265503, "learning_rate": 9.335337681750238e-05, "loss": 3.4054, "step": 7830 }, { "epoch": 0.532341350727001, "grad_norm": 2.2564051151275635, "learning_rate": 9.334913031661911e-05, "loss": 3.6523, "step": 7835 }, { "epoch": 0.5326810707976627, "grad_norm": 2.781511068344116, "learning_rate": 9.334488381573584e-05, "loss": 3.7222, "step": 7840 }, { "epoch": 0.5330207908683245, "grad_norm": 2.621516466140747, "learning_rate": 9.334063731485257e-05, "loss": 3.4049, "step": 7845 }, { "epoch": 0.5333605109389863, "grad_norm": 2.0905559062957764, "learning_rate": 9.33363908139693e-05, "loss": 3.5393, "step": 7850 }, { "epoch": 0.533700231009648, "grad_norm": 1.8236668109893799, "learning_rate": 9.333214431308602e-05, "loss": 3.5404, "step": 7855 }, { "epoch": 0.5340399510803098, "grad_norm": 2.09511661529541, "learning_rate": 9.332789781220275e-05, "loss": 3.5058, "step": 7860 }, { "epoch": 0.5343796711509716, "grad_norm": 3.618272304534912, "learning_rate": 9.332365131131948e-05, "loss": 3.4683, "step": 7865 }, { "epoch": 0.5347193912216334, "grad_norm": 3.5706093311309814, "learning_rate": 9.331940481043621e-05, "loss": 3.4599, "step": 7870 }, { "epoch": 0.5350591112922951, "grad_norm": 1.727651834487915, "learning_rate": 9.331515830955294e-05, "loss": 3.4183, "step": 7875 }, { "epoch": 0.535398831362957, "grad_norm": 2.5069267749786377, "learning_rate": 9.331091180866966e-05, "loss": 3.5288, "step": 7880 }, { "epoch": 0.5357385514336187, "grad_norm": 2.4135208129882812, "learning_rate": 9.330666530778639e-05, "loss": 3.3558, "step": 7885 }, { "epoch": 0.5360782715042804, "grad_norm": 2.3433218002319336, "learning_rate": 9.330241880690311e-05, "loss": 3.6919, "step": 7890 }, { "epoch": 0.5364179915749423, "grad_norm": 2.7258450984954834, "learning_rate": 9.329817230601985e-05, "loss": 3.3195, "step": 7895 }, { "epoch": 0.536757711645604, "grad_norm": 2.2421512603759766, "learning_rate": 9.329392580513658e-05, "loss": 3.2954, "step": 7900 }, { "epoch": 0.5370974317162658, "grad_norm": 3.3051373958587646, "learning_rate": 9.328967930425329e-05, "loss": 3.6181, "step": 7905 }, { "epoch": 0.5374371517869275, "grad_norm": 2.076032876968384, "learning_rate": 9.328543280337003e-05, "loss": 3.5818, "step": 7910 }, { "epoch": 0.5377768718575894, "grad_norm": 1.787664771080017, "learning_rate": 9.328118630248676e-05, "loss": 3.6571, "step": 7915 }, { "epoch": 0.5381165919282511, "grad_norm": 2.2575559616088867, "learning_rate": 9.327693980160347e-05, "loss": 3.3945, "step": 7920 }, { "epoch": 0.5384563119989129, "grad_norm": 2.319761037826538, "learning_rate": 9.327269330072022e-05, "loss": 3.5612, "step": 7925 }, { "epoch": 0.5387960320695747, "grad_norm": 2.307724952697754, "learning_rate": 9.326844679983694e-05, "loss": 3.668, "step": 7930 }, { "epoch": 0.5391357521402365, "grad_norm": 1.859480857849121, "learning_rate": 9.326420029895366e-05, "loss": 3.6731, "step": 7935 }, { "epoch": 0.5394754722108982, "grad_norm": 2.80035138130188, "learning_rate": 9.32599537980704e-05, "loss": 3.3538, "step": 7940 }, { "epoch": 0.5398151922815599, "grad_norm": 2.4471659660339355, "learning_rate": 9.325570729718713e-05, "loss": 3.4815, "step": 7945 }, { "epoch": 0.5401549123522218, "grad_norm": 2.661670446395874, "learning_rate": 9.325146079630384e-05, "loss": 3.5947, "step": 7950 }, { "epoch": 0.5404946324228835, "grad_norm": 2.048238515853882, "learning_rate": 9.324721429542058e-05, "loss": 3.2475, "step": 7955 }, { "epoch": 0.5408343524935453, "grad_norm": 2.1112208366394043, "learning_rate": 9.324296779453731e-05, "loss": 3.4487, "step": 7960 }, { "epoch": 0.5411740725642071, "grad_norm": 1.6436501741409302, "learning_rate": 9.323872129365403e-05, "loss": 3.5088, "step": 7965 }, { "epoch": 0.5415137926348689, "grad_norm": 2.4236257076263428, "learning_rate": 9.323447479277077e-05, "loss": 3.44, "step": 7970 }, { "epoch": 0.5418535127055306, "grad_norm": 2.173462152481079, "learning_rate": 9.323022829188748e-05, "loss": 3.5009, "step": 7975 }, { "epoch": 0.5421932327761925, "grad_norm": 2.001016139984131, "learning_rate": 9.322598179100421e-05, "loss": 3.4251, "step": 7980 }, { "epoch": 0.5425329528468542, "grad_norm": 2.305433988571167, "learning_rate": 9.322173529012095e-05, "loss": 3.4381, "step": 7985 }, { "epoch": 0.542872672917516, "grad_norm": 2.1340959072113037, "learning_rate": 9.321748878923767e-05, "loss": 3.5409, "step": 7990 }, { "epoch": 0.5432123929881777, "grad_norm": 2.51355242729187, "learning_rate": 9.32132422883544e-05, "loss": 3.5031, "step": 7995 }, { "epoch": 0.5435521130588395, "grad_norm": 2.293647527694702, "learning_rate": 9.320899578747114e-05, "loss": 3.6442, "step": 8000 }, { "epoch": 0.5438918331295013, "grad_norm": 2.1015965938568115, "learning_rate": 9.320474928658785e-05, "loss": 3.3585, "step": 8005 }, { "epoch": 0.544231553200163, "grad_norm": 2.1317553520202637, "learning_rate": 9.320050278570458e-05, "loss": 3.5866, "step": 8010 }, { "epoch": 0.5445712732708249, "grad_norm": 1.9175254106521606, "learning_rate": 9.319625628482132e-05, "loss": 3.3682, "step": 8015 }, { "epoch": 0.5449109933414866, "grad_norm": 2.203145742416382, "learning_rate": 9.319200978393804e-05, "loss": 3.5032, "step": 8020 }, { "epoch": 0.5452507134121484, "grad_norm": 2.406024694442749, "learning_rate": 9.318776328305476e-05, "loss": 3.6251, "step": 8025 }, { "epoch": 0.5455904334828101, "grad_norm": 2.588593006134033, "learning_rate": 9.31835167821715e-05, "loss": 3.4116, "step": 8030 }, { "epoch": 0.545930153553472, "grad_norm": 2.538975477218628, "learning_rate": 9.317927028128822e-05, "loss": 3.4752, "step": 8035 }, { "epoch": 0.5462698736241337, "grad_norm": 2.235506772994995, "learning_rate": 9.317502378040495e-05, "loss": 3.2153, "step": 8040 }, { "epoch": 0.5466095936947954, "grad_norm": 2.4122209548950195, "learning_rate": 9.317077727952168e-05, "loss": 3.4472, "step": 8045 }, { "epoch": 0.5469493137654573, "grad_norm": 2.3780431747436523, "learning_rate": 9.31665307786384e-05, "loss": 3.2939, "step": 8050 }, { "epoch": 0.547289033836119, "grad_norm": 2.1943743228912354, "learning_rate": 9.316228427775513e-05, "loss": 3.4332, "step": 8055 }, { "epoch": 0.5476287539067808, "grad_norm": 2.5293846130371094, "learning_rate": 9.315803777687186e-05, "loss": 3.4539, "step": 8060 }, { "epoch": 0.5479684739774426, "grad_norm": 2.6729140281677246, "learning_rate": 9.315379127598859e-05, "loss": 3.443, "step": 8065 }, { "epoch": 0.5483081940481044, "grad_norm": 2.1881024837493896, "learning_rate": 9.314954477510532e-05, "loss": 3.4367, "step": 8070 }, { "epoch": 0.5486479141187661, "grad_norm": 2.0456316471099854, "learning_rate": 9.314529827422204e-05, "loss": 3.4568, "step": 8075 }, { "epoch": 0.5489876341894279, "grad_norm": 1.9006816148757935, "learning_rate": 9.314105177333877e-05, "loss": 3.5054, "step": 8080 }, { "epoch": 0.5493273542600897, "grad_norm": 2.4287989139556885, "learning_rate": 9.31368052724555e-05, "loss": 3.4349, "step": 8085 }, { "epoch": 0.5496670743307515, "grad_norm": 2.4924192428588867, "learning_rate": 9.313255877157223e-05, "loss": 3.4955, "step": 8090 }, { "epoch": 0.5500067944014132, "grad_norm": 3.8655943870544434, "learning_rate": 9.312831227068896e-05, "loss": 3.4524, "step": 8095 }, { "epoch": 0.550346514472075, "grad_norm": 2.8569958209991455, "learning_rate": 9.312406576980568e-05, "loss": 3.4709, "step": 8100 }, { "epoch": 0.5506862345427368, "grad_norm": 2.472886562347412, "learning_rate": 9.311981926892241e-05, "loss": 3.2456, "step": 8105 }, { "epoch": 0.5510259546133985, "grad_norm": 3.545949697494507, "learning_rate": 9.311557276803914e-05, "loss": 3.4041, "step": 8110 }, { "epoch": 0.5513656746840603, "grad_norm": 1.8553361892700195, "learning_rate": 9.311132626715587e-05, "loss": 3.4402, "step": 8115 }, { "epoch": 0.5517053947547221, "grad_norm": 2.2590484619140625, "learning_rate": 9.31070797662726e-05, "loss": 3.3832, "step": 8120 }, { "epoch": 0.5520451148253839, "grad_norm": 2.084836721420288, "learning_rate": 9.310283326538932e-05, "loss": 3.5876, "step": 8125 }, { "epoch": 0.5523848348960456, "grad_norm": 3.508754014968872, "learning_rate": 9.309858676450605e-05, "loss": 3.6488, "step": 8130 }, { "epoch": 0.5527245549667075, "grad_norm": 2.4495081901550293, "learning_rate": 9.309434026362278e-05, "loss": 3.3691, "step": 8135 }, { "epoch": 0.5530642750373692, "grad_norm": 2.252100706100464, "learning_rate": 9.309009376273951e-05, "loss": 3.4972, "step": 8140 }, { "epoch": 0.553403995108031, "grad_norm": 2.076917886734009, "learning_rate": 9.308584726185624e-05, "loss": 3.578, "step": 8145 }, { "epoch": 0.5537437151786928, "grad_norm": 3.1268932819366455, "learning_rate": 9.308160076097296e-05, "loss": 3.231, "step": 8150 }, { "epoch": 0.5540834352493546, "grad_norm": 3.3976073265075684, "learning_rate": 9.307735426008969e-05, "loss": 3.4044, "step": 8155 }, { "epoch": 0.5544231553200163, "grad_norm": 2.028085470199585, "learning_rate": 9.307310775920642e-05, "loss": 3.3118, "step": 8160 }, { "epoch": 0.554762875390678, "grad_norm": 2.361985206604004, "learning_rate": 9.306886125832315e-05, "loss": 3.7181, "step": 8165 }, { "epoch": 0.5551025954613399, "grad_norm": 1.8930116891860962, "learning_rate": 9.306461475743988e-05, "loss": 3.7768, "step": 8170 }, { "epoch": 0.5554423155320016, "grad_norm": 1.7071287631988525, "learning_rate": 9.30603682565566e-05, "loss": 3.5297, "step": 8175 }, { "epoch": 0.5557820356026634, "grad_norm": 2.141624927520752, "learning_rate": 9.305612175567333e-05, "loss": 3.3615, "step": 8180 }, { "epoch": 0.5561217556733252, "grad_norm": 1.87115478515625, "learning_rate": 9.305187525479006e-05, "loss": 3.3518, "step": 8185 }, { "epoch": 0.556461475743987, "grad_norm": 2.391692638397217, "learning_rate": 9.304762875390679e-05, "loss": 3.4963, "step": 8190 }, { "epoch": 0.5568011958146487, "grad_norm": 2.429943799972534, "learning_rate": 9.304338225302352e-05, "loss": 3.412, "step": 8195 }, { "epoch": 0.5571409158853105, "grad_norm": 1.8239959478378296, "learning_rate": 9.303913575214024e-05, "loss": 3.5649, "step": 8200 }, { "epoch": 0.5574806359559723, "grad_norm": 2.659858226776123, "learning_rate": 9.303488925125697e-05, "loss": 3.4042, "step": 8205 }, { "epoch": 0.557820356026634, "grad_norm": 2.560776948928833, "learning_rate": 9.30306427503737e-05, "loss": 3.4747, "step": 8210 }, { "epoch": 0.5581600760972958, "grad_norm": 4.9363322257995605, "learning_rate": 9.302639624949043e-05, "loss": 3.4194, "step": 8215 }, { "epoch": 0.5584997961679576, "grad_norm": 2.1661062240600586, "learning_rate": 9.302214974860716e-05, "loss": 3.6843, "step": 8220 }, { "epoch": 0.5588395162386194, "grad_norm": 2.345493793487549, "learning_rate": 9.301790324772388e-05, "loss": 3.2607, "step": 8225 }, { "epoch": 0.5591792363092811, "grad_norm": 2.23356032371521, "learning_rate": 9.301365674684061e-05, "loss": 3.5943, "step": 8230 }, { "epoch": 0.559518956379943, "grad_norm": 2.2561025619506836, "learning_rate": 9.300941024595734e-05, "loss": 3.4041, "step": 8235 }, { "epoch": 0.5598586764506047, "grad_norm": 1.8876768350601196, "learning_rate": 9.300516374507407e-05, "loss": 3.7598, "step": 8240 }, { "epoch": 0.5601983965212665, "grad_norm": 2.052811861038208, "learning_rate": 9.300091724419078e-05, "loss": 3.2988, "step": 8245 }, { "epoch": 0.5605381165919282, "grad_norm": 2.5591275691986084, "learning_rate": 9.299667074330752e-05, "loss": 3.3676, "step": 8250 }, { "epoch": 0.5608778366625901, "grad_norm": 2.2649238109588623, "learning_rate": 9.299242424242425e-05, "loss": 3.2285, "step": 8255 }, { "epoch": 0.5612175567332518, "grad_norm": 2.2049078941345215, "learning_rate": 9.298817774154097e-05, "loss": 3.4271, "step": 8260 }, { "epoch": 0.5615572768039135, "grad_norm": 1.873483419418335, "learning_rate": 9.298393124065771e-05, "loss": 3.4574, "step": 8265 }, { "epoch": 0.5618969968745754, "grad_norm": 1.9567097425460815, "learning_rate": 9.297968473977444e-05, "loss": 3.4079, "step": 8270 }, { "epoch": 0.5622367169452371, "grad_norm": 2.712512493133545, "learning_rate": 9.297543823889115e-05, "loss": 3.1644, "step": 8275 }, { "epoch": 0.5625764370158989, "grad_norm": 2.7369048595428467, "learning_rate": 9.297119173800789e-05, "loss": 3.5099, "step": 8280 }, { "epoch": 0.5629161570865606, "grad_norm": 2.09191632270813, "learning_rate": 9.296694523712462e-05, "loss": 3.4824, "step": 8285 }, { "epoch": 0.5632558771572225, "grad_norm": 2.352203845977783, "learning_rate": 9.296269873624133e-05, "loss": 3.642, "step": 8290 }, { "epoch": 0.5635955972278842, "grad_norm": 2.490455389022827, "learning_rate": 9.295845223535808e-05, "loss": 3.3203, "step": 8295 }, { "epoch": 0.563935317298546, "grad_norm": 1.7819089889526367, "learning_rate": 9.29542057344748e-05, "loss": 3.6042, "step": 8300 }, { "epoch": 0.5642750373692078, "grad_norm": 2.003439426422119, "learning_rate": 9.294995923359152e-05, "loss": 3.4489, "step": 8305 }, { "epoch": 0.5646147574398696, "grad_norm": 2.1695783138275146, "learning_rate": 9.294571273270826e-05, "loss": 3.445, "step": 8310 }, { "epoch": 0.5649544775105313, "grad_norm": 2.241365432739258, "learning_rate": 9.294146623182497e-05, "loss": 3.2735, "step": 8315 }, { "epoch": 0.5652941975811931, "grad_norm": 2.0233113765716553, "learning_rate": 9.29372197309417e-05, "loss": 3.306, "step": 8320 }, { "epoch": 0.5656339176518549, "grad_norm": 2.314389705657959, "learning_rate": 9.293297323005844e-05, "loss": 3.6903, "step": 8325 }, { "epoch": 0.5659736377225166, "grad_norm": 2.3685622215270996, "learning_rate": 9.292872672917516e-05, "loss": 3.5154, "step": 8330 }, { "epoch": 0.5663133577931784, "grad_norm": 2.357534408569336, "learning_rate": 9.292448022829189e-05, "loss": 3.6782, "step": 8335 }, { "epoch": 0.5666530778638402, "grad_norm": 2.2818925380706787, "learning_rate": 9.292023372740863e-05, "loss": 3.5172, "step": 8340 }, { "epoch": 0.566992797934502, "grad_norm": 2.028261423110962, "learning_rate": 9.291598722652534e-05, "loss": 3.3944, "step": 8345 }, { "epoch": 0.5673325180051637, "grad_norm": 2.5161893367767334, "learning_rate": 9.291174072564207e-05, "loss": 3.7035, "step": 8350 }, { "epoch": 0.5676722380758256, "grad_norm": 1.9733099937438965, "learning_rate": 9.290749422475881e-05, "loss": 3.6006, "step": 8355 }, { "epoch": 0.5680119581464873, "grad_norm": 2.111640691757202, "learning_rate": 9.290324772387553e-05, "loss": 3.3657, "step": 8360 }, { "epoch": 0.568351678217149, "grad_norm": 1.9056446552276611, "learning_rate": 9.289900122299225e-05, "loss": 3.3409, "step": 8365 }, { "epoch": 0.5686913982878108, "grad_norm": 2.0116829872131348, "learning_rate": 9.2894754722109e-05, "loss": 3.4526, "step": 8370 }, { "epoch": 0.5690311183584726, "grad_norm": 2.0043551921844482, "learning_rate": 9.289050822122571e-05, "loss": 3.6603, "step": 8375 }, { "epoch": 0.5693708384291344, "grad_norm": 2.0761964321136475, "learning_rate": 9.288626172034244e-05, "loss": 3.6551, "step": 8380 }, { "epoch": 0.5697105584997961, "grad_norm": 2.1102120876312256, "learning_rate": 9.288201521945918e-05, "loss": 3.52, "step": 8385 }, { "epoch": 0.570050278570458, "grad_norm": 2.037296772003174, "learning_rate": 9.28777687185759e-05, "loss": 3.3974, "step": 8390 }, { "epoch": 0.5703899986411197, "grad_norm": 2.359480381011963, "learning_rate": 9.287352221769262e-05, "loss": 3.7609, "step": 8395 }, { "epoch": 0.5707297187117815, "grad_norm": 1.9942586421966553, "learning_rate": 9.286927571680935e-05, "loss": 3.3588, "step": 8400 }, { "epoch": 0.5710694387824433, "grad_norm": 2.1668598651885986, "learning_rate": 9.286502921592608e-05, "loss": 3.4829, "step": 8405 }, { "epoch": 0.5714091588531051, "grad_norm": 2.974886417388916, "learning_rate": 9.28607827150428e-05, "loss": 3.4702, "step": 8410 }, { "epoch": 0.5717488789237668, "grad_norm": 2.2977232933044434, "learning_rate": 9.285653621415953e-05, "loss": 3.0362, "step": 8415 }, { "epoch": 0.5720885989944285, "grad_norm": 2.6215264797210693, "learning_rate": 9.285228971327626e-05, "loss": 3.6049, "step": 8420 }, { "epoch": 0.5724283190650904, "grad_norm": 1.657969355583191, "learning_rate": 9.284804321239299e-05, "loss": 3.5216, "step": 8425 }, { "epoch": 0.5727680391357521, "grad_norm": 2.9846670627593994, "learning_rate": 9.284379671150972e-05, "loss": 3.5265, "step": 8430 }, { "epoch": 0.5731077592064139, "grad_norm": 3.2366905212402344, "learning_rate": 9.283955021062645e-05, "loss": 3.1744, "step": 8435 }, { "epoch": 0.5734474792770757, "grad_norm": 2.417916774749756, "learning_rate": 9.283530370974317e-05, "loss": 3.429, "step": 8440 }, { "epoch": 0.5737871993477375, "grad_norm": 2.7880892753601074, "learning_rate": 9.28310572088599e-05, "loss": 3.3616, "step": 8445 }, { "epoch": 0.5741269194183992, "grad_norm": 2.4934799671173096, "learning_rate": 9.282681070797663e-05, "loss": 3.5551, "step": 8450 }, { "epoch": 0.574466639489061, "grad_norm": 2.4630706310272217, "learning_rate": 9.282256420709336e-05, "loss": 3.149, "step": 8455 }, { "epoch": 0.5748063595597228, "grad_norm": 2.399496555328369, "learning_rate": 9.281831770621009e-05, "loss": 3.6542, "step": 8460 }, { "epoch": 0.5751460796303846, "grad_norm": 2.125706672668457, "learning_rate": 9.281407120532681e-05, "loss": 3.5369, "step": 8465 }, { "epoch": 0.5754857997010463, "grad_norm": 2.127157211303711, "learning_rate": 9.280982470444354e-05, "loss": 3.3497, "step": 8470 }, { "epoch": 0.5758255197717081, "grad_norm": 2.590937376022339, "learning_rate": 9.280557820356027e-05, "loss": 3.3688, "step": 8475 }, { "epoch": 0.5761652398423699, "grad_norm": 2.6459944248199463, "learning_rate": 9.2801331702677e-05, "loss": 3.3186, "step": 8480 }, { "epoch": 0.5765049599130316, "grad_norm": 2.029041051864624, "learning_rate": 9.279708520179373e-05, "loss": 3.5337, "step": 8485 }, { "epoch": 0.5768446799836935, "grad_norm": 2.311288356781006, "learning_rate": 9.279283870091045e-05, "loss": 3.174, "step": 8490 }, { "epoch": 0.5771844000543552, "grad_norm": 2.555339813232422, "learning_rate": 9.278859220002718e-05, "loss": 3.5468, "step": 8495 }, { "epoch": 0.577524120125017, "grad_norm": 1.7727775573730469, "learning_rate": 9.278434569914391e-05, "loss": 3.5269, "step": 8500 }, { "epoch": 0.5778638401956787, "grad_norm": 2.611510753631592, "learning_rate": 9.278009919826064e-05, "loss": 3.3579, "step": 8505 }, { "epoch": 0.5782035602663406, "grad_norm": 2.1769232749938965, "learning_rate": 9.277585269737737e-05, "loss": 3.4102, "step": 8510 }, { "epoch": 0.5785432803370023, "grad_norm": 2.586691379547119, "learning_rate": 9.27716061964941e-05, "loss": 3.337, "step": 8515 }, { "epoch": 0.578883000407664, "grad_norm": 2.089334011077881, "learning_rate": 9.276735969561082e-05, "loss": 3.2416, "step": 8520 }, { "epoch": 0.5792227204783259, "grad_norm": 1.9141018390655518, "learning_rate": 9.276311319472755e-05, "loss": 3.4277, "step": 8525 }, { "epoch": 0.5795624405489876, "grad_norm": 1.7541699409484863, "learning_rate": 9.275886669384428e-05, "loss": 3.3736, "step": 8530 }, { "epoch": 0.5799021606196494, "grad_norm": 2.5130348205566406, "learning_rate": 9.2754620192961e-05, "loss": 3.3604, "step": 8535 }, { "epoch": 0.5802418806903111, "grad_norm": 3.10162091255188, "learning_rate": 9.275037369207773e-05, "loss": 3.4115, "step": 8540 }, { "epoch": 0.580581600760973, "grad_norm": 2.407411813735962, "learning_rate": 9.274612719119446e-05, "loss": 3.6455, "step": 8545 }, { "epoch": 0.5809213208316347, "grad_norm": 2.0053093433380127, "learning_rate": 9.274188069031119e-05, "loss": 3.2288, "step": 8550 }, { "epoch": 0.5812610409022965, "grad_norm": 2.641085624694824, "learning_rate": 9.273763418942792e-05, "loss": 3.5824, "step": 8555 }, { "epoch": 0.5816007609729583, "grad_norm": 2.637511730194092, "learning_rate": 9.273338768854465e-05, "loss": 3.5481, "step": 8560 }, { "epoch": 0.5819404810436201, "grad_norm": 2.107805013656616, "learning_rate": 9.272914118766137e-05, "loss": 3.4166, "step": 8565 }, { "epoch": 0.5822802011142818, "grad_norm": 2.414813756942749, "learning_rate": 9.27248946867781e-05, "loss": 3.4619, "step": 8570 }, { "epoch": 0.5826199211849437, "grad_norm": 2.577255964279175, "learning_rate": 9.272064818589483e-05, "loss": 3.5583, "step": 8575 }, { "epoch": 0.5829596412556054, "grad_norm": 1.952826976776123, "learning_rate": 9.271640168501156e-05, "loss": 3.326, "step": 8580 }, { "epoch": 0.5832993613262671, "grad_norm": 2.2093346118927, "learning_rate": 9.271215518412829e-05, "loss": 3.4601, "step": 8585 }, { "epoch": 0.5836390813969289, "grad_norm": 2.598940372467041, "learning_rate": 9.270790868324501e-05, "loss": 3.4668, "step": 8590 }, { "epoch": 0.5839788014675907, "grad_norm": 2.000551700592041, "learning_rate": 9.270366218236174e-05, "loss": 3.6846, "step": 8595 }, { "epoch": 0.5843185215382525, "grad_norm": 2.354728937149048, "learning_rate": 9.269941568147846e-05, "loss": 3.361, "step": 8600 }, { "epoch": 0.5846582416089142, "grad_norm": 2.418919563293457, "learning_rate": 9.26951691805952e-05, "loss": 3.6553, "step": 8605 }, { "epoch": 0.5849979616795761, "grad_norm": 1.7047700881958008, "learning_rate": 9.269092267971193e-05, "loss": 3.2775, "step": 8610 }, { "epoch": 0.5853376817502378, "grad_norm": 2.0444867610931396, "learning_rate": 9.268667617882864e-05, "loss": 3.5458, "step": 8615 }, { "epoch": 0.5856774018208996, "grad_norm": 3.0210981369018555, "learning_rate": 9.268242967794538e-05, "loss": 3.3262, "step": 8620 }, { "epoch": 0.5860171218915613, "grad_norm": 2.4124979972839355, "learning_rate": 9.267818317706211e-05, "loss": 3.5382, "step": 8625 }, { "epoch": 0.5863568419622232, "grad_norm": 2.2351627349853516, "learning_rate": 9.267393667617882e-05, "loss": 3.7378, "step": 8630 }, { "epoch": 0.5866965620328849, "grad_norm": 1.7832891941070557, "learning_rate": 9.266969017529557e-05, "loss": 3.5345, "step": 8635 }, { "epoch": 0.5870362821035466, "grad_norm": 4.006323337554932, "learning_rate": 9.26654436744123e-05, "loss": 3.0423, "step": 8640 }, { "epoch": 0.5873760021742085, "grad_norm": 2.2737832069396973, "learning_rate": 9.266119717352901e-05, "loss": 3.7005, "step": 8645 }, { "epoch": 0.5877157222448702, "grad_norm": 1.8196533918380737, "learning_rate": 9.265695067264575e-05, "loss": 3.7564, "step": 8650 }, { "epoch": 0.588055442315532, "grad_norm": 1.9307434558868408, "learning_rate": 9.265270417176248e-05, "loss": 3.3201, "step": 8655 }, { "epoch": 0.5883951623861938, "grad_norm": 2.5794484615325928, "learning_rate": 9.264845767087919e-05, "loss": 3.3153, "step": 8660 }, { "epoch": 0.5887348824568556, "grad_norm": 2.2121431827545166, "learning_rate": 9.264421116999593e-05, "loss": 3.4331, "step": 8665 }, { "epoch": 0.5890746025275173, "grad_norm": 3.738420248031616, "learning_rate": 9.263996466911265e-05, "loss": 3.603, "step": 8670 }, { "epoch": 0.589414322598179, "grad_norm": 2.2575557231903076, "learning_rate": 9.263571816822938e-05, "loss": 3.5148, "step": 8675 }, { "epoch": 0.5897540426688409, "grad_norm": 1.7424854040145874, "learning_rate": 9.263147166734612e-05, "loss": 3.4102, "step": 8680 }, { "epoch": 0.5900937627395026, "grad_norm": 1.972262978553772, "learning_rate": 9.262722516646283e-05, "loss": 3.3401, "step": 8685 }, { "epoch": 0.5904334828101644, "grad_norm": 2.403106689453125, "learning_rate": 9.262297866557956e-05, "loss": 3.1883, "step": 8690 }, { "epoch": 0.5907732028808262, "grad_norm": 2.0763771533966064, "learning_rate": 9.26187321646963e-05, "loss": 3.4608, "step": 8695 }, { "epoch": 0.591112922951488, "grad_norm": 2.262354612350464, "learning_rate": 9.261448566381302e-05, "loss": 3.5027, "step": 8700 }, { "epoch": 0.5914526430221497, "grad_norm": 2.96211838722229, "learning_rate": 9.261023916292975e-05, "loss": 3.5515, "step": 8705 }, { "epoch": 0.5917923630928115, "grad_norm": 2.1572303771972656, "learning_rate": 9.260599266204649e-05, "loss": 3.4375, "step": 8710 }, { "epoch": 0.5921320831634733, "grad_norm": 2.367905855178833, "learning_rate": 9.26017461611632e-05, "loss": 3.2595, "step": 8715 }, { "epoch": 0.5924718032341351, "grad_norm": 2.771169900894165, "learning_rate": 9.259749966027993e-05, "loss": 3.5077, "step": 8720 }, { "epoch": 0.5928115233047968, "grad_norm": 2.8313136100769043, "learning_rate": 9.259410245957331e-05, "loss": 3.5583, "step": 8725 }, { "epoch": 0.5931512433754587, "grad_norm": 2.2947633266448975, "learning_rate": 9.258985595869004e-05, "loss": 3.594, "step": 8730 }, { "epoch": 0.5934909634461204, "grad_norm": 2.876166820526123, "learning_rate": 9.258560945780678e-05, "loss": 3.538, "step": 8735 }, { "epoch": 0.5938306835167821, "grad_norm": 2.4721367359161377, "learning_rate": 9.25813629569235e-05, "loss": 3.4472, "step": 8740 }, { "epoch": 0.594170403587444, "grad_norm": 2.5023317337036133, "learning_rate": 9.257711645604022e-05, "loss": 3.4651, "step": 8745 }, { "epoch": 0.5945101236581057, "grad_norm": 2.4450182914733887, "learning_rate": 9.257286995515697e-05, "loss": 3.5858, "step": 8750 }, { "epoch": 0.5948498437287675, "grad_norm": 2.0935065746307373, "learning_rate": 9.256862345427368e-05, "loss": 3.3318, "step": 8755 }, { "epoch": 0.5951895637994292, "grad_norm": 3.6565756797790527, "learning_rate": 9.256437695339041e-05, "loss": 3.4703, "step": 8760 }, { "epoch": 0.5955292838700911, "grad_norm": 2.7227180004119873, "learning_rate": 9.256013045250715e-05, "loss": 3.4155, "step": 8765 }, { "epoch": 0.5958690039407528, "grad_norm": 2.177351951599121, "learning_rate": 9.255588395162386e-05, "loss": 3.4463, "step": 8770 }, { "epoch": 0.5962087240114146, "grad_norm": 2.880723237991333, "learning_rate": 9.255163745074059e-05, "loss": 3.4563, "step": 8775 }, { "epoch": 0.5965484440820764, "grad_norm": 2.088820457458496, "learning_rate": 9.254739094985733e-05, "loss": 3.4154, "step": 8780 }, { "epoch": 0.5968881641527382, "grad_norm": 3.313237190246582, "learning_rate": 9.254314444897405e-05, "loss": 3.4208, "step": 8785 }, { "epoch": 0.5972278842233999, "grad_norm": 2.361285924911499, "learning_rate": 9.253889794809078e-05, "loss": 3.5362, "step": 8790 }, { "epoch": 0.5975676042940616, "grad_norm": 1.9548826217651367, "learning_rate": 9.25346514472075e-05, "loss": 3.5581, "step": 8795 }, { "epoch": 0.5979073243647235, "grad_norm": 2.136289596557617, "learning_rate": 9.253040494632423e-05, "loss": 3.3625, "step": 8800 }, { "epoch": 0.5982470444353852, "grad_norm": 2.0891830921173096, "learning_rate": 9.252615844544096e-05, "loss": 3.4685, "step": 8805 }, { "epoch": 0.598586764506047, "grad_norm": 2.046870231628418, "learning_rate": 9.252191194455769e-05, "loss": 3.4844, "step": 8810 }, { "epoch": 0.5989264845767088, "grad_norm": 2.767624616622925, "learning_rate": 9.251766544367442e-05, "loss": 3.4608, "step": 8815 }, { "epoch": 0.5992662046473706, "grad_norm": 2.1752066612243652, "learning_rate": 9.251341894279114e-05, "loss": 3.2597, "step": 8820 }, { "epoch": 0.5996059247180323, "grad_norm": 2.496392011642456, "learning_rate": 9.250917244190787e-05, "loss": 3.3379, "step": 8825 }, { "epoch": 0.5999456447886942, "grad_norm": 2.0173327922821045, "learning_rate": 9.25049259410246e-05, "loss": 3.3509, "step": 8830 }, { "epoch": 0.6002853648593559, "grad_norm": 2.064110517501831, "learning_rate": 9.250067944014133e-05, "loss": 3.4919, "step": 8835 }, { "epoch": 0.6006250849300176, "grad_norm": 2.5744454860687256, "learning_rate": 9.249643293925806e-05, "loss": 3.7008, "step": 8840 }, { "epoch": 0.6009648050006794, "grad_norm": 1.8868316411972046, "learning_rate": 9.249218643837478e-05, "loss": 3.4246, "step": 8845 }, { "epoch": 0.6013045250713412, "grad_norm": 2.1251633167266846, "learning_rate": 9.248793993749151e-05, "loss": 3.6764, "step": 8850 }, { "epoch": 0.601644245142003, "grad_norm": 2.310070037841797, "learning_rate": 9.248369343660824e-05, "loss": 3.5046, "step": 8855 }, { "epoch": 0.6019839652126647, "grad_norm": 2.216578483581543, "learning_rate": 9.247944693572497e-05, "loss": 3.4614, "step": 8860 }, { "epoch": 0.6023236852833266, "grad_norm": 1.9413403272628784, "learning_rate": 9.24752004348417e-05, "loss": 3.4046, "step": 8865 }, { "epoch": 0.6026634053539883, "grad_norm": 2.4662301540374756, "learning_rate": 9.247095393395842e-05, "loss": 3.3895, "step": 8870 }, { "epoch": 0.6030031254246501, "grad_norm": 2.511380195617676, "learning_rate": 9.246670743307515e-05, "loss": 3.3947, "step": 8875 }, { "epoch": 0.6033428454953118, "grad_norm": 2.423506736755371, "learning_rate": 9.246246093219188e-05, "loss": 3.2647, "step": 8880 }, { "epoch": 0.6036825655659737, "grad_norm": 2.3918538093566895, "learning_rate": 9.245821443130861e-05, "loss": 3.6556, "step": 8885 }, { "epoch": 0.6040222856366354, "grad_norm": 1.7094987630844116, "learning_rate": 9.245396793042534e-05, "loss": 3.4037, "step": 8890 }, { "epoch": 0.6043620057072971, "grad_norm": 2.2404685020446777, "learning_rate": 9.244972142954206e-05, "loss": 3.4772, "step": 8895 }, { "epoch": 0.604701725777959, "grad_norm": 2.090378522872925, "learning_rate": 9.244547492865879e-05, "loss": 3.5517, "step": 8900 }, { "epoch": 0.6050414458486207, "grad_norm": 2.432039976119995, "learning_rate": 9.244122842777552e-05, "loss": 3.354, "step": 8905 }, { "epoch": 0.6053811659192825, "grad_norm": 2.135570764541626, "learning_rate": 9.243698192689225e-05, "loss": 3.4889, "step": 8910 }, { "epoch": 0.6057208859899443, "grad_norm": 3.326573610305786, "learning_rate": 9.243273542600898e-05, "loss": 3.2613, "step": 8915 }, { "epoch": 0.6060606060606061, "grad_norm": 1.7053751945495605, "learning_rate": 9.24284889251257e-05, "loss": 3.4801, "step": 8920 }, { "epoch": 0.6064003261312678, "grad_norm": 2.004863739013672, "learning_rate": 9.242424242424242e-05, "loss": 3.386, "step": 8925 }, { "epoch": 0.6067400462019296, "grad_norm": 2.6540749073028564, "learning_rate": 9.241999592335916e-05, "loss": 3.6117, "step": 8930 }, { "epoch": 0.6070797662725914, "grad_norm": 2.4861056804656982, "learning_rate": 9.241574942247589e-05, "loss": 3.4069, "step": 8935 }, { "epoch": 0.6074194863432532, "grad_norm": 1.9969956874847412, "learning_rate": 9.24115029215926e-05, "loss": 3.4672, "step": 8940 }, { "epoch": 0.6077592064139149, "grad_norm": 2.5103538036346436, "learning_rate": 9.240725642070934e-05, "loss": 3.5373, "step": 8945 }, { "epoch": 0.6080989264845768, "grad_norm": 2.205951690673828, "learning_rate": 9.240300991982607e-05, "loss": 3.5909, "step": 8950 }, { "epoch": 0.6084386465552385, "grad_norm": 2.1522507667541504, "learning_rate": 9.239876341894279e-05, "loss": 3.3089, "step": 8955 }, { "epoch": 0.6087783666259002, "grad_norm": 2.1917757987976074, "learning_rate": 9.239451691805953e-05, "loss": 3.4894, "step": 8960 }, { "epoch": 0.609118086696562, "grad_norm": 2.4323723316192627, "learning_rate": 9.239027041717626e-05, "loss": 3.3356, "step": 8965 }, { "epoch": 0.6094578067672238, "grad_norm": 2.0584442615509033, "learning_rate": 9.238602391629297e-05, "loss": 3.4695, "step": 8970 }, { "epoch": 0.6097975268378856, "grad_norm": 2.437145948410034, "learning_rate": 9.238177741540971e-05, "loss": 3.3846, "step": 8975 }, { "epoch": 0.6101372469085473, "grad_norm": 2.3354413509368896, "learning_rate": 9.237753091452644e-05, "loss": 3.3006, "step": 8980 }, { "epoch": 0.6104769669792092, "grad_norm": 2.009671211242676, "learning_rate": 9.237328441364315e-05, "loss": 3.2417, "step": 8985 }, { "epoch": 0.6108166870498709, "grad_norm": 1.7423853874206543, "learning_rate": 9.23690379127599e-05, "loss": 3.4108, "step": 8990 }, { "epoch": 0.6111564071205327, "grad_norm": 2.296311616897583, "learning_rate": 9.236479141187661e-05, "loss": 3.7126, "step": 8995 }, { "epoch": 0.6114961271911945, "grad_norm": 2.9706532955169678, "learning_rate": 9.236054491099334e-05, "loss": 3.4258, "step": 9000 }, { "epoch": 0.6118358472618562, "grad_norm": 2.3803908824920654, "learning_rate": 9.235629841011008e-05, "loss": 3.485, "step": 9005 }, { "epoch": 0.612175567332518, "grad_norm": 2.1578822135925293, "learning_rate": 9.23520519092268e-05, "loss": 3.4563, "step": 9010 }, { "epoch": 0.6125152874031797, "grad_norm": 2.049509048461914, "learning_rate": 9.234780540834352e-05, "loss": 3.3914, "step": 9015 }, { "epoch": 0.6128550074738416, "grad_norm": 2.0547492504119873, "learning_rate": 9.234355890746026e-05, "loss": 3.5119, "step": 9020 }, { "epoch": 0.6131947275445033, "grad_norm": 3.450504779815674, "learning_rate": 9.233931240657698e-05, "loss": 3.38, "step": 9025 }, { "epoch": 0.6135344476151651, "grad_norm": 2.7689361572265625, "learning_rate": 9.23350659056937e-05, "loss": 3.335, "step": 9030 }, { "epoch": 0.6138741676858269, "grad_norm": 2.228415012359619, "learning_rate": 9.233081940481045e-05, "loss": 3.3001, "step": 9035 }, { "epoch": 0.6142138877564887, "grad_norm": 1.9736047983169556, "learning_rate": 9.232657290392716e-05, "loss": 3.5795, "step": 9040 }, { "epoch": 0.6145536078271504, "grad_norm": 2.6956357955932617, "learning_rate": 9.23223264030439e-05, "loss": 3.5509, "step": 9045 }, { "epoch": 0.6148933278978121, "grad_norm": 2.0668585300445557, "learning_rate": 9.231807990216063e-05, "loss": 3.4657, "step": 9050 }, { "epoch": 0.615233047968474, "grad_norm": 2.4969048500061035, "learning_rate": 9.231383340127735e-05, "loss": 3.6714, "step": 9055 }, { "epoch": 0.6155727680391357, "grad_norm": 2.2255680561065674, "learning_rate": 9.230958690039409e-05, "loss": 3.5104, "step": 9060 }, { "epoch": 0.6159124881097975, "grad_norm": 2.149826765060425, "learning_rate": 9.23053403995108e-05, "loss": 3.5722, "step": 9065 }, { "epoch": 0.6162522081804593, "grad_norm": 3.0322883129119873, "learning_rate": 9.230109389862753e-05, "loss": 3.3271, "step": 9070 }, { "epoch": 0.6165919282511211, "grad_norm": 2.4431819915771484, "learning_rate": 9.229684739774427e-05, "loss": 3.2296, "step": 9075 }, { "epoch": 0.6169316483217828, "grad_norm": 1.9841899871826172, "learning_rate": 9.229260089686099e-05, "loss": 3.5309, "step": 9080 }, { "epoch": 0.6172713683924447, "grad_norm": 2.1378426551818848, "learning_rate": 9.228835439597771e-05, "loss": 3.5327, "step": 9085 }, { "epoch": 0.6176110884631064, "grad_norm": 2.0772478580474854, "learning_rate": 9.228410789509446e-05, "loss": 3.2381, "step": 9090 }, { "epoch": 0.6179508085337682, "grad_norm": 2.1273200511932373, "learning_rate": 9.227986139421117e-05, "loss": 3.3578, "step": 9095 }, { "epoch": 0.6182905286044299, "grad_norm": 2.3347697257995605, "learning_rate": 9.22756148933279e-05, "loss": 3.5752, "step": 9100 }, { "epoch": 0.6186302486750918, "grad_norm": 2.3471555709838867, "learning_rate": 9.227136839244464e-05, "loss": 3.7021, "step": 9105 }, { "epoch": 0.6189699687457535, "grad_norm": 2.6924638748168945, "learning_rate": 9.226712189156135e-05, "loss": 3.0245, "step": 9110 }, { "epoch": 0.6193096888164152, "grad_norm": 2.1283507347106934, "learning_rate": 9.226287539067808e-05, "loss": 3.5864, "step": 9115 }, { "epoch": 0.6196494088870771, "grad_norm": 2.029057264328003, "learning_rate": 9.225862888979482e-05, "loss": 3.5111, "step": 9120 }, { "epoch": 0.6199891289577388, "grad_norm": 2.3345603942871094, "learning_rate": 9.225438238891154e-05, "loss": 3.6344, "step": 9125 }, { "epoch": 0.6203288490284006, "grad_norm": 2.1531529426574707, "learning_rate": 9.225013588802827e-05, "loss": 3.4414, "step": 9130 }, { "epoch": 0.6206685690990623, "grad_norm": 2.1636009216308594, "learning_rate": 9.224588938714501e-05, "loss": 3.5252, "step": 9135 }, { "epoch": 0.6210082891697242, "grad_norm": 1.7845302820205688, "learning_rate": 9.224164288626172e-05, "loss": 3.3295, "step": 9140 }, { "epoch": 0.6213480092403859, "grad_norm": 2.275392532348633, "learning_rate": 9.223739638537845e-05, "loss": 3.5735, "step": 9145 }, { "epoch": 0.6216877293110477, "grad_norm": 1.6259108781814575, "learning_rate": 9.223314988449518e-05, "loss": 3.3143, "step": 9150 }, { "epoch": 0.6220274493817095, "grad_norm": 2.047922134399414, "learning_rate": 9.22289033836119e-05, "loss": 3.4208, "step": 9155 }, { "epoch": 0.6223671694523712, "grad_norm": 1.9892585277557373, "learning_rate": 9.222465688272863e-05, "loss": 3.5035, "step": 9160 }, { "epoch": 0.622706889523033, "grad_norm": 2.327022075653076, "learning_rate": 9.222041038184536e-05, "loss": 3.4351, "step": 9165 }, { "epoch": 0.6230466095936948, "grad_norm": 2.236118793487549, "learning_rate": 9.221616388096209e-05, "loss": 3.4589, "step": 9170 }, { "epoch": 0.6233863296643566, "grad_norm": 2.212754487991333, "learning_rate": 9.221191738007882e-05, "loss": 3.4827, "step": 9175 }, { "epoch": 0.6237260497350183, "grad_norm": 2.8504140377044678, "learning_rate": 9.220767087919555e-05, "loss": 3.1599, "step": 9180 }, { "epoch": 0.6240657698056801, "grad_norm": 1.8453243970870972, "learning_rate": 9.220342437831227e-05, "loss": 3.7203, "step": 9185 }, { "epoch": 0.6244054898763419, "grad_norm": 2.2583439350128174, "learning_rate": 9.2199177877429e-05, "loss": 3.5995, "step": 9190 }, { "epoch": 0.6247452099470037, "grad_norm": 2.018906593322754, "learning_rate": 9.219493137654573e-05, "loss": 3.629, "step": 9195 }, { "epoch": 0.6250849300176654, "grad_norm": 2.3241515159606934, "learning_rate": 9.219068487566246e-05, "loss": 3.2277, "step": 9200 }, { "epoch": 0.6254246500883273, "grad_norm": 2.038961172103882, "learning_rate": 9.218643837477919e-05, "loss": 3.538, "step": 9205 }, { "epoch": 0.625764370158989, "grad_norm": 2.4797563552856445, "learning_rate": 9.218219187389591e-05, "loss": 3.6085, "step": 9210 }, { "epoch": 0.6261040902296507, "grad_norm": 2.335033655166626, "learning_rate": 9.217794537301264e-05, "loss": 3.436, "step": 9215 }, { "epoch": 0.6264438103003126, "grad_norm": 2.303050994873047, "learning_rate": 9.217369887212937e-05, "loss": 3.5978, "step": 9220 }, { "epoch": 0.6267835303709743, "grad_norm": 2.4888527393341064, "learning_rate": 9.21694523712461e-05, "loss": 3.829, "step": 9225 }, { "epoch": 0.6271232504416361, "grad_norm": 2.4484055042266846, "learning_rate": 9.216520587036283e-05, "loss": 3.4508, "step": 9230 }, { "epoch": 0.6274629705122978, "grad_norm": 2.2140262126922607, "learning_rate": 9.216095936947955e-05, "loss": 3.4847, "step": 9235 }, { "epoch": 0.6278026905829597, "grad_norm": 2.3871517181396484, "learning_rate": 9.215671286859628e-05, "loss": 3.3973, "step": 9240 }, { "epoch": 0.6281424106536214, "grad_norm": 2.2835378646850586, "learning_rate": 9.215246636771301e-05, "loss": 3.3773, "step": 9245 }, { "epoch": 0.6284821307242832, "grad_norm": 2.3547534942626953, "learning_rate": 9.214821986682974e-05, "loss": 3.2761, "step": 9250 }, { "epoch": 0.628821850794945, "grad_norm": 2.603886127471924, "learning_rate": 9.214397336594647e-05, "loss": 3.2656, "step": 9255 }, { "epoch": 0.6291615708656068, "grad_norm": 2.165262460708618, "learning_rate": 9.21397268650632e-05, "loss": 3.4739, "step": 9260 }, { "epoch": 0.6295012909362685, "grad_norm": 2.0599539279937744, "learning_rate": 9.213548036417991e-05, "loss": 3.2759, "step": 9265 }, { "epoch": 0.6298410110069302, "grad_norm": 2.8846681118011475, "learning_rate": 9.213123386329665e-05, "loss": 3.3199, "step": 9270 }, { "epoch": 0.6301807310775921, "grad_norm": 2.104825258255005, "learning_rate": 9.212698736241338e-05, "loss": 3.4047, "step": 9275 }, { "epoch": 0.6305204511482538, "grad_norm": 2.2291181087493896, "learning_rate": 9.212274086153009e-05, "loss": 3.6294, "step": 9280 }, { "epoch": 0.6308601712189156, "grad_norm": 1.9005157947540283, "learning_rate": 9.211849436064683e-05, "loss": 3.5974, "step": 9285 }, { "epoch": 0.6311998912895774, "grad_norm": 2.375688076019287, "learning_rate": 9.211424785976356e-05, "loss": 3.0638, "step": 9290 }, { "epoch": 0.6315396113602392, "grad_norm": 2.006298303604126, "learning_rate": 9.211000135888028e-05, "loss": 3.4262, "step": 9295 }, { "epoch": 0.6318793314309009, "grad_norm": 2.4329819679260254, "learning_rate": 9.210575485799702e-05, "loss": 3.6207, "step": 9300 }, { "epoch": 0.6322190515015628, "grad_norm": 1.9812030792236328, "learning_rate": 9.210150835711375e-05, "loss": 3.4401, "step": 9305 }, { "epoch": 0.6325587715722245, "grad_norm": 2.0097873210906982, "learning_rate": 9.209726185623046e-05, "loss": 3.2702, "step": 9310 }, { "epoch": 0.6328984916428863, "grad_norm": 2.418948173522949, "learning_rate": 9.20930153553472e-05, "loss": 3.365, "step": 9315 }, { "epoch": 0.633238211713548, "grad_norm": 1.9337080717086792, "learning_rate": 9.208876885446393e-05, "loss": 3.3297, "step": 9320 }, { "epoch": 0.6335779317842098, "grad_norm": 2.334913492202759, "learning_rate": 9.208452235358065e-05, "loss": 3.5442, "step": 9325 }, { "epoch": 0.6339176518548716, "grad_norm": 1.979494333267212, "learning_rate": 9.208027585269739e-05, "loss": 3.5878, "step": 9330 }, { "epoch": 0.6342573719255333, "grad_norm": 2.2706778049468994, "learning_rate": 9.207602935181411e-05, "loss": 3.6239, "step": 9335 }, { "epoch": 0.6345970919961952, "grad_norm": 1.9782280921936035, "learning_rate": 9.207178285093083e-05, "loss": 3.5322, "step": 9340 }, { "epoch": 0.6349368120668569, "grad_norm": 2.21012282371521, "learning_rate": 9.206753635004757e-05, "loss": 3.4804, "step": 9345 }, { "epoch": 0.6352765321375187, "grad_norm": 1.9822043180465698, "learning_rate": 9.206328984916429e-05, "loss": 3.5511, "step": 9350 }, { "epoch": 0.6356162522081804, "grad_norm": 1.61638343334198, "learning_rate": 9.205904334828101e-05, "loss": 3.3277, "step": 9355 }, { "epoch": 0.6359559722788423, "grad_norm": 2.369020462036133, "learning_rate": 9.205479684739775e-05, "loss": 3.3067, "step": 9360 }, { "epoch": 0.636295692349504, "grad_norm": 2.076824426651001, "learning_rate": 9.205055034651447e-05, "loss": 3.5835, "step": 9365 }, { "epoch": 0.6366354124201657, "grad_norm": 1.8298687934875488, "learning_rate": 9.20463038456312e-05, "loss": 3.7377, "step": 9370 }, { "epoch": 0.6369751324908276, "grad_norm": 2.4179134368896484, "learning_rate": 9.204205734474794e-05, "loss": 3.3789, "step": 9375 }, { "epoch": 0.6373148525614893, "grad_norm": 2.1561992168426514, "learning_rate": 9.203781084386465e-05, "loss": 3.3464, "step": 9380 }, { "epoch": 0.6376545726321511, "grad_norm": 1.7424547672271729, "learning_rate": 9.20335643429814e-05, "loss": 3.4852, "step": 9385 }, { "epoch": 0.6379942927028129, "grad_norm": 2.0857956409454346, "learning_rate": 9.202931784209812e-05, "loss": 3.4334, "step": 9390 }, { "epoch": 0.6383340127734747, "grad_norm": 2.058171510696411, "learning_rate": 9.202507134121484e-05, "loss": 3.437, "step": 9395 }, { "epoch": 0.6386737328441364, "grad_norm": 2.1025898456573486, "learning_rate": 9.202082484033158e-05, "loss": 3.3733, "step": 9400 }, { "epoch": 0.6390134529147982, "grad_norm": 2.4429051876068115, "learning_rate": 9.201657833944831e-05, "loss": 3.3843, "step": 9405 }, { "epoch": 0.63935317298546, "grad_norm": 1.996283769607544, "learning_rate": 9.201233183856502e-05, "loss": 3.3407, "step": 9410 }, { "epoch": 0.6396928930561218, "grad_norm": 1.9609453678131104, "learning_rate": 9.200808533768176e-05, "loss": 3.2856, "step": 9415 }, { "epoch": 0.6400326131267835, "grad_norm": 2.6451570987701416, "learning_rate": 9.200383883679848e-05, "loss": 3.4036, "step": 9420 }, { "epoch": 0.6403723331974454, "grad_norm": 2.3675267696380615, "learning_rate": 9.19995923359152e-05, "loss": 3.5064, "step": 9425 }, { "epoch": 0.6407120532681071, "grad_norm": 2.80167818069458, "learning_rate": 9.199534583503195e-05, "loss": 3.4674, "step": 9430 }, { "epoch": 0.6410517733387688, "grad_norm": 2.440291404724121, "learning_rate": 9.199109933414866e-05, "loss": 3.28, "step": 9435 }, { "epoch": 0.6413914934094306, "grad_norm": 2.2997565269470215, "learning_rate": 9.198685283326539e-05, "loss": 3.4094, "step": 9440 }, { "epoch": 0.6417312134800924, "grad_norm": 2.3637266159057617, "learning_rate": 9.198260633238213e-05, "loss": 3.373, "step": 9445 }, { "epoch": 0.6420709335507542, "grad_norm": 2.2341365814208984, "learning_rate": 9.197835983149885e-05, "loss": 3.4228, "step": 9450 }, { "epoch": 0.6424106536214159, "grad_norm": 2.6212844848632812, "learning_rate": 9.197411333061557e-05, "loss": 3.3152, "step": 9455 }, { "epoch": 0.6427503736920778, "grad_norm": 2.5143659114837646, "learning_rate": 9.196986682973232e-05, "loss": 3.5497, "step": 9460 }, { "epoch": 0.6430900937627395, "grad_norm": 2.1178486347198486, "learning_rate": 9.196562032884903e-05, "loss": 3.3481, "step": 9465 }, { "epoch": 0.6434298138334013, "grad_norm": 2.7853009700775146, "learning_rate": 9.196137382796576e-05, "loss": 3.4423, "step": 9470 }, { "epoch": 0.6437695339040631, "grad_norm": 2.1077260971069336, "learning_rate": 9.19571273270825e-05, "loss": 3.5083, "step": 9475 }, { "epoch": 0.6441092539747248, "grad_norm": 2.360146999359131, "learning_rate": 9.195288082619921e-05, "loss": 3.3045, "step": 9480 }, { "epoch": 0.6444489740453866, "grad_norm": 2.139396905899048, "learning_rate": 9.194863432531594e-05, "loss": 3.9015, "step": 9485 }, { "epoch": 0.6447886941160483, "grad_norm": 2.755953311920166, "learning_rate": 9.194438782443267e-05, "loss": 3.4252, "step": 9490 }, { "epoch": 0.6451284141867102, "grad_norm": 2.182157278060913, "learning_rate": 9.19401413235494e-05, "loss": 3.3928, "step": 9495 }, { "epoch": 0.6454681342573719, "grad_norm": 2.243736982345581, "learning_rate": 9.193589482266613e-05, "loss": 3.3703, "step": 9500 }, { "epoch": 0.6458078543280337, "grad_norm": 1.755360722541809, "learning_rate": 9.193164832178285e-05, "loss": 3.2493, "step": 9505 }, { "epoch": 0.6461475743986955, "grad_norm": 1.9161643981933594, "learning_rate": 9.192740182089958e-05, "loss": 3.3648, "step": 9510 }, { "epoch": 0.6464872944693573, "grad_norm": 2.177321195602417, "learning_rate": 9.192315532001631e-05, "loss": 3.5422, "step": 9515 }, { "epoch": 0.646827014540019, "grad_norm": 2.789555311203003, "learning_rate": 9.191890881913304e-05, "loss": 3.5221, "step": 9520 }, { "epoch": 0.6471667346106807, "grad_norm": 2.1760945320129395, "learning_rate": 9.191466231824977e-05, "loss": 3.469, "step": 9525 }, { "epoch": 0.6475064546813426, "grad_norm": 1.720252513885498, "learning_rate": 9.19104158173665e-05, "loss": 3.4823, "step": 9530 }, { "epoch": 0.6478461747520043, "grad_norm": 2.2654337882995605, "learning_rate": 9.190616931648322e-05, "loss": 3.428, "step": 9535 }, { "epoch": 0.6481858948226661, "grad_norm": 4.516109466552734, "learning_rate": 9.190192281559995e-05, "loss": 3.592, "step": 9540 }, { "epoch": 0.6485256148933279, "grad_norm": 2.6933679580688477, "learning_rate": 9.189767631471668e-05, "loss": 3.5928, "step": 9545 }, { "epoch": 0.6488653349639897, "grad_norm": 2.0785329341888428, "learning_rate": 9.18934298138334e-05, "loss": 3.5918, "step": 9550 }, { "epoch": 0.6492050550346514, "grad_norm": 1.7163983583450317, "learning_rate": 9.188918331295013e-05, "loss": 3.3801, "step": 9555 }, { "epoch": 0.6495447751053133, "grad_norm": 2.295823812484741, "learning_rate": 9.188493681206686e-05, "loss": 3.5109, "step": 9560 }, { "epoch": 0.649884495175975, "grad_norm": 2.0109357833862305, "learning_rate": 9.188069031118359e-05, "loss": 3.6524, "step": 9565 }, { "epoch": 0.6502242152466368, "grad_norm": 2.102390766143799, "learning_rate": 9.187644381030032e-05, "loss": 3.418, "step": 9570 }, { "epoch": 0.6505639353172985, "grad_norm": 3.062297821044922, "learning_rate": 9.187219730941705e-05, "loss": 3.1961, "step": 9575 }, { "epoch": 0.6509036553879604, "grad_norm": 2.4372718334198, "learning_rate": 9.186795080853377e-05, "loss": 3.5075, "step": 9580 }, { "epoch": 0.6512433754586221, "grad_norm": 2.544154405593872, "learning_rate": 9.18637043076505e-05, "loss": 3.2986, "step": 9585 }, { "epoch": 0.6515830955292838, "grad_norm": 1.6882457733154297, "learning_rate": 9.185945780676723e-05, "loss": 3.4571, "step": 9590 }, { "epoch": 0.6519228155999457, "grad_norm": 2.341235637664795, "learning_rate": 9.185521130588396e-05, "loss": 3.5237, "step": 9595 }, { "epoch": 0.6522625356706074, "grad_norm": 2.0831446647644043, "learning_rate": 9.185096480500069e-05, "loss": 3.2293, "step": 9600 }, { "epoch": 0.6526022557412692, "grad_norm": 2.001333713531494, "learning_rate": 9.184671830411741e-05, "loss": 3.4746, "step": 9605 }, { "epoch": 0.6529419758119309, "grad_norm": 2.2685201168060303, "learning_rate": 9.184247180323414e-05, "loss": 3.4989, "step": 9610 }, { "epoch": 0.6532816958825928, "grad_norm": 2.427672863006592, "learning_rate": 9.183822530235087e-05, "loss": 3.4831, "step": 9615 }, { "epoch": 0.6536214159532545, "grad_norm": 2.1567723751068115, "learning_rate": 9.183397880146758e-05, "loss": 3.3328, "step": 9620 }, { "epoch": 0.6539611360239163, "grad_norm": 2.433497667312622, "learning_rate": 9.182973230058433e-05, "loss": 3.6096, "step": 9625 }, { "epoch": 0.6543008560945781, "grad_norm": 2.287774085998535, "learning_rate": 9.182548579970105e-05, "loss": 3.4264, "step": 9630 }, { "epoch": 0.6546405761652399, "grad_norm": 1.9578315019607544, "learning_rate": 9.182123929881777e-05, "loss": 3.3448, "step": 9635 }, { "epoch": 0.6549802962359016, "grad_norm": 2.3514602184295654, "learning_rate": 9.181699279793451e-05, "loss": 3.3377, "step": 9640 }, { "epoch": 0.6553200163065634, "grad_norm": 1.8604629039764404, "learning_rate": 9.181274629705124e-05, "loss": 3.3196, "step": 9645 }, { "epoch": 0.6556597363772252, "grad_norm": 2.3649957180023193, "learning_rate": 9.180849979616795e-05, "loss": 3.0222, "step": 9650 }, { "epoch": 0.6559994564478869, "grad_norm": 2.3329782485961914, "learning_rate": 9.18042532952847e-05, "loss": 3.4149, "step": 9655 }, { "epoch": 0.6563391765185487, "grad_norm": 2.2776143550872803, "learning_rate": 9.180000679440142e-05, "loss": 3.7491, "step": 9660 }, { "epoch": 0.6566788965892105, "grad_norm": 2.8028154373168945, "learning_rate": 9.179576029351814e-05, "loss": 3.3465, "step": 9665 }, { "epoch": 0.6570186166598723, "grad_norm": 2.7860090732574463, "learning_rate": 9.179151379263488e-05, "loss": 3.0798, "step": 9670 }, { "epoch": 0.657358336730534, "grad_norm": 2.304365634918213, "learning_rate": 9.17872672917516e-05, "loss": 3.2702, "step": 9675 }, { "epoch": 0.6576980568011959, "grad_norm": 2.0030386447906494, "learning_rate": 9.178302079086832e-05, "loss": 3.6393, "step": 9680 }, { "epoch": 0.6580377768718576, "grad_norm": 2.4066038131713867, "learning_rate": 9.177877428998506e-05, "loss": 3.2818, "step": 9685 }, { "epoch": 0.6583774969425193, "grad_norm": 3.0386524200439453, "learning_rate": 9.177452778910178e-05, "loss": 3.2272, "step": 9690 }, { "epoch": 0.6587172170131811, "grad_norm": 2.250406503677368, "learning_rate": 9.17702812882185e-05, "loss": 3.452, "step": 9695 }, { "epoch": 0.6590569370838429, "grad_norm": 2.1529886722564697, "learning_rate": 9.176603478733525e-05, "loss": 3.3902, "step": 9700 }, { "epoch": 0.6593966571545047, "grad_norm": 2.389617919921875, "learning_rate": 9.176178828645196e-05, "loss": 3.4426, "step": 9705 }, { "epoch": 0.6597363772251664, "grad_norm": 1.7933048009872437, "learning_rate": 9.175754178556869e-05, "loss": 3.4909, "step": 9710 }, { "epoch": 0.6600760972958283, "grad_norm": 2.0979044437408447, "learning_rate": 9.175329528468543e-05, "loss": 3.4606, "step": 9715 }, { "epoch": 0.66041581736649, "grad_norm": 1.7562857866287231, "learning_rate": 9.174904878380214e-05, "loss": 3.945, "step": 9720 }, { "epoch": 0.6607555374371518, "grad_norm": 2.451220989227295, "learning_rate": 9.174480228291889e-05, "loss": 3.5378, "step": 9725 }, { "epoch": 0.6610952575078136, "grad_norm": 2.4984099864959717, "learning_rate": 9.174055578203561e-05, "loss": 3.1811, "step": 9730 }, { "epoch": 0.6614349775784754, "grad_norm": 1.889421820640564, "learning_rate": 9.173630928115233e-05, "loss": 3.4117, "step": 9735 }, { "epoch": 0.6617746976491371, "grad_norm": 2.304583787918091, "learning_rate": 9.173206278026907e-05, "loss": 3.3953, "step": 9740 }, { "epoch": 0.6621144177197988, "grad_norm": 2.1655526161193848, "learning_rate": 9.17278162793858e-05, "loss": 3.435, "step": 9745 }, { "epoch": 0.6624541377904607, "grad_norm": 2.957169532775879, "learning_rate": 9.172356977850251e-05, "loss": 3.5148, "step": 9750 }, { "epoch": 0.6627938578611224, "grad_norm": 2.8838658332824707, "learning_rate": 9.171932327761925e-05, "loss": 3.6021, "step": 9755 }, { "epoch": 0.6631335779317842, "grad_norm": 2.703655958175659, "learning_rate": 9.171507677673598e-05, "loss": 3.3602, "step": 9760 }, { "epoch": 0.663473298002446, "grad_norm": 2.6492762565612793, "learning_rate": 9.17108302758527e-05, "loss": 3.1517, "step": 9765 }, { "epoch": 0.6638130180731078, "grad_norm": 2.315916061401367, "learning_rate": 9.170658377496944e-05, "loss": 3.5733, "step": 9770 }, { "epoch": 0.6641527381437695, "grad_norm": 3.022876262664795, "learning_rate": 9.170233727408615e-05, "loss": 3.2548, "step": 9775 }, { "epoch": 0.6644924582144313, "grad_norm": 1.8072370290756226, "learning_rate": 9.169809077320288e-05, "loss": 3.6563, "step": 9780 }, { "epoch": 0.6648321782850931, "grad_norm": 1.8629965782165527, "learning_rate": 9.169384427231962e-05, "loss": 3.6613, "step": 9785 }, { "epoch": 0.6651718983557549, "grad_norm": 1.7219549417495728, "learning_rate": 9.168959777143634e-05, "loss": 3.3072, "step": 9790 }, { "epoch": 0.6655116184264166, "grad_norm": 2.3677175045013428, "learning_rate": 9.168535127055306e-05, "loss": 3.1922, "step": 9795 }, { "epoch": 0.6658513384970784, "grad_norm": 2.2522263526916504, "learning_rate": 9.16811047696698e-05, "loss": 3.2677, "step": 9800 }, { "epoch": 0.6661910585677402, "grad_norm": 2.0835580825805664, "learning_rate": 9.167685826878652e-05, "loss": 3.3593, "step": 9805 }, { "epoch": 0.6665307786384019, "grad_norm": 1.9465687274932861, "learning_rate": 9.167261176790325e-05, "loss": 3.4494, "step": 9810 }, { "epoch": 0.6668704987090638, "grad_norm": 2.414717197418213, "learning_rate": 9.166836526701999e-05, "loss": 3.6492, "step": 9815 }, { "epoch": 0.6672102187797255, "grad_norm": 2.6287484169006348, "learning_rate": 9.16641187661367e-05, "loss": 3.7339, "step": 9820 }, { "epoch": 0.6675499388503873, "grad_norm": 2.2006239891052246, "learning_rate": 9.165987226525343e-05, "loss": 3.5375, "step": 9825 }, { "epoch": 0.667889658921049, "grad_norm": 1.995247721672058, "learning_rate": 9.165562576437017e-05, "loss": 3.4984, "step": 9830 }, { "epoch": 0.6682293789917109, "grad_norm": 2.1331167221069336, "learning_rate": 9.165137926348689e-05, "loss": 3.387, "step": 9835 }, { "epoch": 0.6685690990623726, "grad_norm": 2.292428493499756, "learning_rate": 9.164713276260362e-05, "loss": 3.4255, "step": 9840 }, { "epoch": 0.6689088191330343, "grad_norm": 2.4552714824676514, "learning_rate": 9.164288626172034e-05, "loss": 3.5577, "step": 9845 }, { "epoch": 0.6692485392036962, "grad_norm": 1.9254560470581055, "learning_rate": 9.163863976083707e-05, "loss": 3.2334, "step": 9850 }, { "epoch": 0.6695882592743579, "grad_norm": 2.543802499771118, "learning_rate": 9.16343932599538e-05, "loss": 3.6791, "step": 9855 }, { "epoch": 0.6699279793450197, "grad_norm": 1.9474387168884277, "learning_rate": 9.163014675907053e-05, "loss": 3.5802, "step": 9860 }, { "epoch": 0.6702676994156814, "grad_norm": 2.0926296710968018, "learning_rate": 9.162590025818726e-05, "loss": 3.5644, "step": 9865 }, { "epoch": 0.6706074194863433, "grad_norm": 2.4188356399536133, "learning_rate": 9.162165375730398e-05, "loss": 3.2545, "step": 9870 }, { "epoch": 0.670947139557005, "grad_norm": 1.9968558549880981, "learning_rate": 9.161740725642071e-05, "loss": 3.8078, "step": 9875 }, { "epoch": 0.6712868596276668, "grad_norm": 2.048319101333618, "learning_rate": 9.161316075553744e-05, "loss": 3.4627, "step": 9880 }, { "epoch": 0.6716265796983286, "grad_norm": 2.169461965560913, "learning_rate": 9.160891425465417e-05, "loss": 3.606, "step": 9885 }, { "epoch": 0.6719662997689904, "grad_norm": 1.8501156568527222, "learning_rate": 9.16046677537709e-05, "loss": 3.3269, "step": 9890 }, { "epoch": 0.6723060198396521, "grad_norm": 2.0102906227111816, "learning_rate": 9.160042125288762e-05, "loss": 3.194, "step": 9895 }, { "epoch": 0.672645739910314, "grad_norm": 3.0749316215515137, "learning_rate": 9.159617475200435e-05, "loss": 3.4971, "step": 9900 }, { "epoch": 0.6729854599809757, "grad_norm": 2.1286542415618896, "learning_rate": 9.159192825112108e-05, "loss": 3.2454, "step": 9905 }, { "epoch": 0.6733251800516374, "grad_norm": 1.8950390815734863, "learning_rate": 9.158768175023781e-05, "loss": 3.5541, "step": 9910 }, { "epoch": 0.6736649001222992, "grad_norm": 2.1330528259277344, "learning_rate": 9.158343524935454e-05, "loss": 3.5395, "step": 9915 }, { "epoch": 0.674004620192961, "grad_norm": 2.268120288848877, "learning_rate": 9.157918874847126e-05, "loss": 3.4602, "step": 9920 }, { "epoch": 0.6743443402636228, "grad_norm": 2.3372132778167725, "learning_rate": 9.157494224758799e-05, "loss": 3.3258, "step": 9925 }, { "epoch": 0.6746840603342845, "grad_norm": 2.7525475025177, "learning_rate": 9.157069574670472e-05, "loss": 3.4347, "step": 9930 }, { "epoch": 0.6750237804049464, "grad_norm": 2.215385913848877, "learning_rate": 9.156644924582145e-05, "loss": 3.5077, "step": 9935 }, { "epoch": 0.6753635004756081, "grad_norm": 2.8587841987609863, "learning_rate": 9.156220274493818e-05, "loss": 3.361, "step": 9940 }, { "epoch": 0.6757032205462699, "grad_norm": 2.27107572555542, "learning_rate": 9.15579562440549e-05, "loss": 3.4283, "step": 9945 }, { "epoch": 0.6760429406169316, "grad_norm": 1.9669655561447144, "learning_rate": 9.155370974317163e-05, "loss": 3.6681, "step": 9950 }, { "epoch": 0.6763826606875935, "grad_norm": 2.0418825149536133, "learning_rate": 9.154946324228836e-05, "loss": 3.3541, "step": 9955 }, { "epoch": 0.6767223807582552, "grad_norm": 1.9659185409545898, "learning_rate": 9.154521674140509e-05, "loss": 3.3576, "step": 9960 }, { "epoch": 0.6770621008289169, "grad_norm": 2.1350066661834717, "learning_rate": 9.154097024052182e-05, "loss": 3.406, "step": 9965 }, { "epoch": 0.6774018208995788, "grad_norm": 2.241499900817871, "learning_rate": 9.153672373963854e-05, "loss": 3.3658, "step": 9970 }, { "epoch": 0.6777415409702405, "grad_norm": 4.039486885070801, "learning_rate": 9.153247723875526e-05, "loss": 3.3798, "step": 9975 }, { "epoch": 0.6780812610409023, "grad_norm": 2.198073625564575, "learning_rate": 9.1528230737872e-05, "loss": 3.3894, "step": 9980 }, { "epoch": 0.6784209811115641, "grad_norm": 2.494109630584717, "learning_rate": 9.152398423698873e-05, "loss": 3.3189, "step": 9985 }, { "epoch": 0.6787607011822259, "grad_norm": 1.901872992515564, "learning_rate": 9.151973773610544e-05, "loss": 3.5007, "step": 9990 }, { "epoch": 0.6791004212528876, "grad_norm": 2.490457534790039, "learning_rate": 9.151549123522218e-05, "loss": 3.2335, "step": 9995 }, { "epoch": 0.6794401413235494, "grad_norm": 2.129027843475342, "learning_rate": 9.151124473433891e-05, "loss": 3.3407, "step": 10000 }, { "epoch": 0.6797798613942112, "grad_norm": 2.3029603958129883, "learning_rate": 9.150699823345563e-05, "loss": 3.6818, "step": 10005 }, { "epoch": 0.680119581464873, "grad_norm": 2.6103415489196777, "learning_rate": 9.150275173257237e-05, "loss": 3.4749, "step": 10010 }, { "epoch": 0.6804593015355347, "grad_norm": 2.0774784088134766, "learning_rate": 9.14985052316891e-05, "loss": 3.4361, "step": 10015 }, { "epoch": 0.6807990216061965, "grad_norm": 2.4018054008483887, "learning_rate": 9.149425873080581e-05, "loss": 3.3953, "step": 10020 }, { "epoch": 0.6811387416768583, "grad_norm": 2.4003589153289795, "learning_rate": 9.149001222992255e-05, "loss": 3.6101, "step": 10025 }, { "epoch": 0.68147846174752, "grad_norm": 2.0229439735412598, "learning_rate": 9.148576572903928e-05, "loss": 3.3307, "step": 10030 }, { "epoch": 0.6818181818181818, "grad_norm": 1.962277889251709, "learning_rate": 9.1481519228156e-05, "loss": 3.6952, "step": 10035 }, { "epoch": 0.6821579018888436, "grad_norm": 2.5740997791290283, "learning_rate": 9.147727272727274e-05, "loss": 3.3417, "step": 10040 }, { "epoch": 0.6824976219595054, "grad_norm": 2.2659201622009277, "learning_rate": 9.147302622638945e-05, "loss": 3.3707, "step": 10045 }, { "epoch": 0.6828373420301671, "grad_norm": 2.5698659420013428, "learning_rate": 9.146877972550618e-05, "loss": 3.2316, "step": 10050 }, { "epoch": 0.683177062100829, "grad_norm": 1.8414411544799805, "learning_rate": 9.146453322462292e-05, "loss": 3.4781, "step": 10055 }, { "epoch": 0.6835167821714907, "grad_norm": 2.0387251377105713, "learning_rate": 9.146028672373964e-05, "loss": 3.6213, "step": 10060 }, { "epoch": 0.6838565022421524, "grad_norm": 2.15816593170166, "learning_rate": 9.145604022285638e-05, "loss": 3.5245, "step": 10065 }, { "epoch": 0.6841962223128143, "grad_norm": 1.8952358961105347, "learning_rate": 9.14517937219731e-05, "loss": 3.706, "step": 10070 }, { "epoch": 0.684535942383476, "grad_norm": 2.976165533065796, "learning_rate": 9.144754722108982e-05, "loss": 3.1039, "step": 10075 }, { "epoch": 0.6848756624541378, "grad_norm": 1.9853427410125732, "learning_rate": 9.144330072020656e-05, "loss": 3.39, "step": 10080 }, { "epoch": 0.6852153825247995, "grad_norm": 2.650207757949829, "learning_rate": 9.143905421932329e-05, "loss": 3.3963, "step": 10085 }, { "epoch": 0.6855551025954614, "grad_norm": 2.18571400642395, "learning_rate": 9.143480771844e-05, "loss": 3.3268, "step": 10090 }, { "epoch": 0.6858948226661231, "grad_norm": 2.225778102874756, "learning_rate": 9.143056121755674e-05, "loss": 3.3894, "step": 10095 }, { "epoch": 0.6862345427367849, "grad_norm": 2.971393346786499, "learning_rate": 9.142631471667347e-05, "loss": 3.5052, "step": 10100 }, { "epoch": 0.6865742628074467, "grad_norm": 2.1307294368743896, "learning_rate": 9.142206821579019e-05, "loss": 3.2559, "step": 10105 }, { "epoch": 0.6869139828781085, "grad_norm": 2.2580299377441406, "learning_rate": 9.141782171490693e-05, "loss": 3.4579, "step": 10110 }, { "epoch": 0.6872537029487702, "grad_norm": 2.247525691986084, "learning_rate": 9.141357521402364e-05, "loss": 3.5955, "step": 10115 }, { "epoch": 0.6875934230194319, "grad_norm": 2.9665870666503906, "learning_rate": 9.140932871314037e-05, "loss": 3.1336, "step": 10120 }, { "epoch": 0.6879331430900938, "grad_norm": 2.1075119972229004, "learning_rate": 9.140508221225711e-05, "loss": 3.5494, "step": 10125 }, { "epoch": 0.6882728631607555, "grad_norm": 2.1065688133239746, "learning_rate": 9.140083571137383e-05, "loss": 3.2784, "step": 10130 }, { "epoch": 0.6886125832314173, "grad_norm": 1.5755414962768555, "learning_rate": 9.139658921049056e-05, "loss": 3.455, "step": 10135 }, { "epoch": 0.6889523033020791, "grad_norm": 2.1131350994110107, "learning_rate": 9.13923427096073e-05, "loss": 3.2843, "step": 10140 }, { "epoch": 0.6892920233727409, "grad_norm": 1.853386640548706, "learning_rate": 9.138809620872401e-05, "loss": 3.5551, "step": 10145 }, { "epoch": 0.6896317434434026, "grad_norm": 2.1120431423187256, "learning_rate": 9.138384970784074e-05, "loss": 3.5836, "step": 10150 }, { "epoch": 0.6899714635140645, "grad_norm": 2.3452959060668945, "learning_rate": 9.137960320695748e-05, "loss": 3.4539, "step": 10155 }, { "epoch": 0.6903111835847262, "grad_norm": 2.2407162189483643, "learning_rate": 9.13753567060742e-05, "loss": 3.3432, "step": 10160 }, { "epoch": 0.690650903655388, "grad_norm": 2.4129278659820557, "learning_rate": 9.137111020519092e-05, "loss": 3.3533, "step": 10165 }, { "epoch": 0.6909906237260497, "grad_norm": 2.294137477874756, "learning_rate": 9.136686370430767e-05, "loss": 3.3819, "step": 10170 }, { "epoch": 0.6913303437967115, "grad_norm": 2.1614489555358887, "learning_rate": 9.136261720342438e-05, "loss": 3.2577, "step": 10175 }, { "epoch": 0.6916700638673733, "grad_norm": 2.5635268688201904, "learning_rate": 9.135837070254111e-05, "loss": 3.5839, "step": 10180 }, { "epoch": 0.692009783938035, "grad_norm": 2.0306272506713867, "learning_rate": 9.135412420165785e-05, "loss": 2.997, "step": 10185 }, { "epoch": 0.6923495040086969, "grad_norm": 3.441389799118042, "learning_rate": 9.134987770077456e-05, "loss": 3.2866, "step": 10190 }, { "epoch": 0.6926892240793586, "grad_norm": 1.9897480010986328, "learning_rate": 9.134563119989129e-05, "loss": 3.5245, "step": 10195 }, { "epoch": 0.6930289441500204, "grad_norm": 2.4812498092651367, "learning_rate": 9.134138469900802e-05, "loss": 3.4362, "step": 10200 }, { "epoch": 0.6933686642206821, "grad_norm": 2.473639726638794, "learning_rate": 9.133713819812475e-05, "loss": 3.2035, "step": 10205 }, { "epoch": 0.693708384291344, "grad_norm": 1.88172447681427, "learning_rate": 9.133289169724148e-05, "loss": 3.415, "step": 10210 }, { "epoch": 0.6940481043620057, "grad_norm": 2.242117166519165, "learning_rate": 9.13286451963582e-05, "loss": 3.5095, "step": 10215 }, { "epoch": 0.6943878244326674, "grad_norm": 3.2184598445892334, "learning_rate": 9.132439869547493e-05, "loss": 3.582, "step": 10220 }, { "epoch": 0.6947275445033293, "grad_norm": 2.613415002822876, "learning_rate": 9.132015219459166e-05, "loss": 3.4259, "step": 10225 }, { "epoch": 0.695067264573991, "grad_norm": 2.203864097595215, "learning_rate": 9.131590569370839e-05, "loss": 3.5516, "step": 10230 }, { "epoch": 0.6954069846446528, "grad_norm": 2.1330056190490723, "learning_rate": 9.131165919282512e-05, "loss": 3.5234, "step": 10235 }, { "epoch": 0.6957467047153146, "grad_norm": 2.382392644882202, "learning_rate": 9.130741269194184e-05, "loss": 3.4453, "step": 10240 }, { "epoch": 0.6960864247859764, "grad_norm": 2.107342481613159, "learning_rate": 9.130316619105857e-05, "loss": 3.622, "step": 10245 }, { "epoch": 0.6964261448566381, "grad_norm": 2.135791540145874, "learning_rate": 9.12989196901753e-05, "loss": 3.3522, "step": 10250 }, { "epoch": 0.6967658649272999, "grad_norm": 3.066088914871216, "learning_rate": 9.129467318929203e-05, "loss": 3.3842, "step": 10255 }, { "epoch": 0.6971055849979617, "grad_norm": 3.687159538269043, "learning_rate": 9.129042668840876e-05, "loss": 3.4694, "step": 10260 }, { "epoch": 0.6974453050686235, "grad_norm": 2.5193636417388916, "learning_rate": 9.128618018752548e-05, "loss": 3.4444, "step": 10265 }, { "epoch": 0.6977850251392852, "grad_norm": 2.3423962593078613, "learning_rate": 9.128193368664221e-05, "loss": 3.6123, "step": 10270 }, { "epoch": 0.698124745209947, "grad_norm": 1.9859436750411987, "learning_rate": 9.127768718575894e-05, "loss": 3.2797, "step": 10275 }, { "epoch": 0.6984644652806088, "grad_norm": 2.5177195072174072, "learning_rate": 9.127344068487567e-05, "loss": 3.3931, "step": 10280 }, { "epoch": 0.6988041853512705, "grad_norm": 2.1163747310638428, "learning_rate": 9.12691941839924e-05, "loss": 3.3861, "step": 10285 }, { "epoch": 0.6991439054219323, "grad_norm": 2.503312826156616, "learning_rate": 9.126494768310912e-05, "loss": 3.3352, "step": 10290 }, { "epoch": 0.6994836254925941, "grad_norm": 2.0320215225219727, "learning_rate": 9.126070118222585e-05, "loss": 3.0865, "step": 10295 }, { "epoch": 0.6998233455632559, "grad_norm": 1.8928866386413574, "learning_rate": 9.125645468134258e-05, "loss": 3.4041, "step": 10300 }, { "epoch": 0.7001630656339176, "grad_norm": 2.484449863433838, "learning_rate": 9.125220818045931e-05, "loss": 3.2752, "step": 10305 }, { "epoch": 0.7005027857045795, "grad_norm": 2.4888932704925537, "learning_rate": 9.124796167957604e-05, "loss": 3.3456, "step": 10310 }, { "epoch": 0.7008425057752412, "grad_norm": 2.85724139213562, "learning_rate": 9.124371517869275e-05, "loss": 3.4374, "step": 10315 }, { "epoch": 0.701182225845903, "grad_norm": 1.9421014785766602, "learning_rate": 9.123946867780949e-05, "loss": 3.3827, "step": 10320 }, { "epoch": 0.7015219459165648, "grad_norm": 1.892106294631958, "learning_rate": 9.123522217692622e-05, "loss": 3.412, "step": 10325 }, { "epoch": 0.7018616659872265, "grad_norm": 2.7637858390808105, "learning_rate": 9.123097567604293e-05, "loss": 3.452, "step": 10330 }, { "epoch": 0.7022013860578883, "grad_norm": 2.0583410263061523, "learning_rate": 9.122672917515968e-05, "loss": 3.3105, "step": 10335 }, { "epoch": 0.70254110612855, "grad_norm": 1.8880698680877686, "learning_rate": 9.12224826742764e-05, "loss": 3.2688, "step": 10340 }, { "epoch": 0.7028808261992119, "grad_norm": 2.3727846145629883, "learning_rate": 9.121823617339312e-05, "loss": 3.4935, "step": 10345 }, { "epoch": 0.7032205462698736, "grad_norm": 2.1201822757720947, "learning_rate": 9.121398967250986e-05, "loss": 3.3217, "step": 10350 }, { "epoch": 0.7035602663405354, "grad_norm": 2.1377501487731934, "learning_rate": 9.120974317162659e-05, "loss": 3.4306, "step": 10355 }, { "epoch": 0.7038999864111972, "grad_norm": 2.378638982772827, "learning_rate": 9.12054966707433e-05, "loss": 3.6065, "step": 10360 }, { "epoch": 0.704239706481859, "grad_norm": 2.6018004417419434, "learning_rate": 9.120125016986004e-05, "loss": 3.1431, "step": 10365 }, { "epoch": 0.7045794265525207, "grad_norm": 2.800001382827759, "learning_rate": 9.119700366897677e-05, "loss": 3.4992, "step": 10370 }, { "epoch": 0.7049191466231824, "grad_norm": 1.935689926147461, "learning_rate": 9.119275716809349e-05, "loss": 3.3126, "step": 10375 }, { "epoch": 0.7052588666938443, "grad_norm": 1.9114010334014893, "learning_rate": 9.118851066721023e-05, "loss": 3.5247, "step": 10380 }, { "epoch": 0.705598586764506, "grad_norm": 2.6919593811035156, "learning_rate": 9.118426416632696e-05, "loss": 3.5271, "step": 10385 }, { "epoch": 0.7059383068351678, "grad_norm": 2.280654191970825, "learning_rate": 9.118001766544367e-05, "loss": 3.4718, "step": 10390 }, { "epoch": 0.7062780269058296, "grad_norm": 2.0293657779693604, "learning_rate": 9.117577116456041e-05, "loss": 3.5115, "step": 10395 }, { "epoch": 0.7066177469764914, "grad_norm": 2.0440328121185303, "learning_rate": 9.117152466367713e-05, "loss": 3.3796, "step": 10400 }, { "epoch": 0.7069574670471531, "grad_norm": 2.232790231704712, "learning_rate": 9.116727816279387e-05, "loss": 3.3604, "step": 10405 }, { "epoch": 0.707297187117815, "grad_norm": 1.9614242315292358, "learning_rate": 9.11630316619106e-05, "loss": 3.4927, "step": 10410 }, { "epoch": 0.7076369071884767, "grad_norm": 1.8019899129867554, "learning_rate": 9.115878516102731e-05, "loss": 3.5046, "step": 10415 }, { "epoch": 0.7079766272591385, "grad_norm": 2.18086314201355, "learning_rate": 9.115453866014405e-05, "loss": 3.5707, "step": 10420 }, { "epoch": 0.7083163473298002, "grad_norm": 2.0439870357513428, "learning_rate": 9.115029215926078e-05, "loss": 3.6305, "step": 10425 }, { "epoch": 0.708656067400462, "grad_norm": 1.9378706216812134, "learning_rate": 9.11460456583775e-05, "loss": 3.718, "step": 10430 }, { "epoch": 0.7089957874711238, "grad_norm": 1.7242690324783325, "learning_rate": 9.114179915749424e-05, "loss": 3.6396, "step": 10435 }, { "epoch": 0.7093355075417855, "grad_norm": 1.859023094177246, "learning_rate": 9.113755265661096e-05, "loss": 3.5972, "step": 10440 }, { "epoch": 0.7096752276124474, "grad_norm": 1.8366934061050415, "learning_rate": 9.113330615572768e-05, "loss": 3.5045, "step": 10445 }, { "epoch": 0.7100149476831091, "grad_norm": 2.2177224159240723, "learning_rate": 9.112905965484442e-05, "loss": 3.4228, "step": 10450 }, { "epoch": 0.7103546677537709, "grad_norm": 2.008777618408203, "learning_rate": 9.112481315396115e-05, "loss": 3.6157, "step": 10455 }, { "epoch": 0.7106943878244326, "grad_norm": 2.003328323364258, "learning_rate": 9.112056665307786e-05, "loss": 3.4033, "step": 10460 }, { "epoch": 0.7110341078950945, "grad_norm": 2.2293601036071777, "learning_rate": 9.11163201521946e-05, "loss": 3.4233, "step": 10465 }, { "epoch": 0.7113738279657562, "grad_norm": 1.7605472803115845, "learning_rate": 9.111207365131132e-05, "loss": 3.5793, "step": 10470 }, { "epoch": 0.711713548036418, "grad_norm": 2.229853868484497, "learning_rate": 9.110782715042805e-05, "loss": 3.3773, "step": 10475 }, { "epoch": 0.7120532681070798, "grad_norm": 2.0041186809539795, "learning_rate": 9.110358064954479e-05, "loss": 3.5001, "step": 10480 }, { "epoch": 0.7123929881777415, "grad_norm": 2.100130081176758, "learning_rate": 9.10993341486615e-05, "loss": 3.0908, "step": 10485 }, { "epoch": 0.7127327082484033, "grad_norm": 1.9602878093719482, "learning_rate": 9.109508764777823e-05, "loss": 3.4569, "step": 10490 }, { "epoch": 0.7130724283190651, "grad_norm": 3.0202150344848633, "learning_rate": 9.109084114689497e-05, "loss": 3.3881, "step": 10495 }, { "epoch": 0.7134121483897269, "grad_norm": 2.5624940395355225, "learning_rate": 9.108659464601169e-05, "loss": 3.3818, "step": 10500 }, { "epoch": 0.7137518684603886, "grad_norm": 2.795262575149536, "learning_rate": 9.108234814512841e-05, "loss": 3.4421, "step": 10505 }, { "epoch": 0.7140915885310504, "grad_norm": 1.8148218393325806, "learning_rate": 9.107810164424516e-05, "loss": 3.6402, "step": 10510 }, { "epoch": 0.7144313086017122, "grad_norm": 1.9620920419692993, "learning_rate": 9.107385514336187e-05, "loss": 3.3249, "step": 10515 }, { "epoch": 0.714771028672374, "grad_norm": 2.484452962875366, "learning_rate": 9.10696086424786e-05, "loss": 3.3529, "step": 10520 }, { "epoch": 0.7151107487430357, "grad_norm": 2.289315938949585, "learning_rate": 9.106536214159534e-05, "loss": 3.4068, "step": 10525 }, { "epoch": 0.7154504688136976, "grad_norm": 2.0351531505584717, "learning_rate": 9.106111564071205e-05, "loss": 3.2509, "step": 10530 }, { "epoch": 0.7157901888843593, "grad_norm": 1.8132216930389404, "learning_rate": 9.105686913982878e-05, "loss": 3.522, "step": 10535 }, { "epoch": 0.716129908955021, "grad_norm": 2.4029035568237305, "learning_rate": 9.105262263894551e-05, "loss": 3.5176, "step": 10540 }, { "epoch": 0.7164696290256828, "grad_norm": 2.159874677658081, "learning_rate": 9.104837613806224e-05, "loss": 3.2876, "step": 10545 }, { "epoch": 0.7168093490963446, "grad_norm": 2.4289910793304443, "learning_rate": 9.104412963717897e-05, "loss": 3.5906, "step": 10550 }, { "epoch": 0.7171490691670064, "grad_norm": 1.7269231081008911, "learning_rate": 9.10398831362957e-05, "loss": 3.5046, "step": 10555 }, { "epoch": 0.7174887892376681, "grad_norm": 1.981059193611145, "learning_rate": 9.103563663541242e-05, "loss": 3.4799, "step": 10560 }, { "epoch": 0.71782850930833, "grad_norm": 1.7709567546844482, "learning_rate": 9.103139013452915e-05, "loss": 3.5944, "step": 10565 }, { "epoch": 0.7181682293789917, "grad_norm": 1.9505103826522827, "learning_rate": 9.102714363364588e-05, "loss": 3.5716, "step": 10570 }, { "epoch": 0.7185079494496535, "grad_norm": 2.706106185913086, "learning_rate": 9.10228971327626e-05, "loss": 3.343, "step": 10575 }, { "epoch": 0.7188476695203153, "grad_norm": 2.7203116416931152, "learning_rate": 9.101865063187933e-05, "loss": 3.3993, "step": 10580 }, { "epoch": 0.719187389590977, "grad_norm": 3.2484118938446045, "learning_rate": 9.101440413099606e-05, "loss": 3.264, "step": 10585 }, { "epoch": 0.7195271096616388, "grad_norm": 2.256519079208374, "learning_rate": 9.101015763011279e-05, "loss": 3.5549, "step": 10590 }, { "epoch": 0.7198668297323005, "grad_norm": 2.5238773822784424, "learning_rate": 9.100591112922952e-05, "loss": 3.2853, "step": 10595 }, { "epoch": 0.7202065498029624, "grad_norm": 2.011028528213501, "learning_rate": 9.100166462834625e-05, "loss": 3.3662, "step": 10600 }, { "epoch": 0.7205462698736241, "grad_norm": 2.2392654418945312, "learning_rate": 9.099741812746297e-05, "loss": 3.5608, "step": 10605 }, { "epoch": 0.7208859899442859, "grad_norm": 1.7563396692276, "learning_rate": 9.09931716265797e-05, "loss": 3.4139, "step": 10610 }, { "epoch": 0.7212257100149477, "grad_norm": 1.82901132106781, "learning_rate": 9.098892512569643e-05, "loss": 3.4043, "step": 10615 }, { "epoch": 0.7215654300856095, "grad_norm": 2.18038010597229, "learning_rate": 9.098467862481316e-05, "loss": 3.4499, "step": 10620 }, { "epoch": 0.7219051501562712, "grad_norm": 2.0786614418029785, "learning_rate": 9.098043212392989e-05, "loss": 3.6284, "step": 10625 }, { "epoch": 0.722244870226933, "grad_norm": 1.9242092370986938, "learning_rate": 9.097618562304661e-05, "loss": 3.4669, "step": 10630 }, { "epoch": 0.7225845902975948, "grad_norm": 2.1038947105407715, "learning_rate": 9.097193912216334e-05, "loss": 3.3863, "step": 10635 }, { "epoch": 0.7229243103682566, "grad_norm": 2.0907907485961914, "learning_rate": 9.096769262128007e-05, "loss": 3.3652, "step": 10640 }, { "epoch": 0.7232640304389183, "grad_norm": 2.884026527404785, "learning_rate": 9.09634461203968e-05, "loss": 3.4208, "step": 10645 }, { "epoch": 0.7236037505095801, "grad_norm": 2.813676357269287, "learning_rate": 9.095919961951353e-05, "loss": 3.3257, "step": 10650 }, { "epoch": 0.7239434705802419, "grad_norm": 2.612833261489868, "learning_rate": 9.095495311863025e-05, "loss": 3.3911, "step": 10655 }, { "epoch": 0.7242831906509036, "grad_norm": 2.0771286487579346, "learning_rate": 9.095070661774698e-05, "loss": 3.1228, "step": 10660 }, { "epoch": 0.7246229107215655, "grad_norm": 2.3251900672912598, "learning_rate": 9.094646011686371e-05, "loss": 3.4598, "step": 10665 }, { "epoch": 0.7249626307922272, "grad_norm": 2.2367258071899414, "learning_rate": 9.094221361598043e-05, "loss": 3.4079, "step": 10670 }, { "epoch": 0.725302350862889, "grad_norm": 2.5025484561920166, "learning_rate": 9.093796711509717e-05, "loss": 3.5936, "step": 10675 }, { "epoch": 0.7256420709335507, "grad_norm": 2.1150619983673096, "learning_rate": 9.09337206142139e-05, "loss": 3.446, "step": 10680 }, { "epoch": 0.7259817910042126, "grad_norm": 2.5962636470794678, "learning_rate": 9.092947411333061e-05, "loss": 3.3378, "step": 10685 }, { "epoch": 0.7263215110748743, "grad_norm": 1.8724310398101807, "learning_rate": 9.092522761244735e-05, "loss": 3.4324, "step": 10690 }, { "epoch": 0.726661231145536, "grad_norm": 2.1106648445129395, "learning_rate": 9.092098111156408e-05, "loss": 3.4576, "step": 10695 }, { "epoch": 0.7270009512161979, "grad_norm": 3.0349202156066895, "learning_rate": 9.091673461068079e-05, "loss": 3.6279, "step": 10700 }, { "epoch": 0.7273406712868596, "grad_norm": 2.1640307903289795, "learning_rate": 9.091248810979753e-05, "loss": 3.5625, "step": 10705 }, { "epoch": 0.7276803913575214, "grad_norm": 2.3512585163116455, "learning_rate": 9.090824160891426e-05, "loss": 3.6583, "step": 10710 }, { "epoch": 0.7280201114281831, "grad_norm": 1.8274447917938232, "learning_rate": 9.090399510803098e-05, "loss": 3.5874, "step": 10715 }, { "epoch": 0.728359831498845, "grad_norm": 2.6690142154693604, "learning_rate": 9.089974860714772e-05, "loss": 3.2241, "step": 10720 }, { "epoch": 0.7286995515695067, "grad_norm": 1.6083638668060303, "learning_rate": 9.089550210626445e-05, "loss": 3.4425, "step": 10725 }, { "epoch": 0.7290392716401685, "grad_norm": 1.9268238544464111, "learning_rate": 9.089125560538116e-05, "loss": 3.2873, "step": 10730 }, { "epoch": 0.7293789917108303, "grad_norm": 2.3528811931610107, "learning_rate": 9.08870091044979e-05, "loss": 3.5524, "step": 10735 }, { "epoch": 0.7297187117814921, "grad_norm": 2.069373846054077, "learning_rate": 9.088276260361463e-05, "loss": 3.609, "step": 10740 }, { "epoch": 0.7300584318521538, "grad_norm": 1.9688727855682373, "learning_rate": 9.087851610273136e-05, "loss": 3.3181, "step": 10745 }, { "epoch": 0.7303981519228157, "grad_norm": 2.8601443767547607, "learning_rate": 9.087426960184809e-05, "loss": 3.4131, "step": 10750 }, { "epoch": 0.7307378719934774, "grad_norm": 2.1676247119903564, "learning_rate": 9.08700231009648e-05, "loss": 3.4029, "step": 10755 }, { "epoch": 0.7310775920641391, "grad_norm": 2.119716167449951, "learning_rate": 9.086577660008154e-05, "loss": 3.6311, "step": 10760 }, { "epoch": 0.7314173121348009, "grad_norm": 2.2926578521728516, "learning_rate": 9.086153009919827e-05, "loss": 3.5058, "step": 10765 }, { "epoch": 0.7317570322054627, "grad_norm": 2.3588671684265137, "learning_rate": 9.085728359831499e-05, "loss": 3.3528, "step": 10770 }, { "epoch": 0.7320967522761245, "grad_norm": 2.3392527103424072, "learning_rate": 9.085303709743173e-05, "loss": 3.6571, "step": 10775 }, { "epoch": 0.7324364723467862, "grad_norm": 1.9288994073867798, "learning_rate": 9.084879059654845e-05, "loss": 3.4191, "step": 10780 }, { "epoch": 0.7327761924174481, "grad_norm": 2.903181552886963, "learning_rate": 9.084454409566517e-05, "loss": 3.5195, "step": 10785 }, { "epoch": 0.7331159124881098, "grad_norm": 2.0483531951904297, "learning_rate": 9.084029759478191e-05, "loss": 3.288, "step": 10790 }, { "epoch": 0.7334556325587716, "grad_norm": 2.4179515838623047, "learning_rate": 9.083605109389864e-05, "loss": 3.4188, "step": 10795 }, { "epoch": 0.7337953526294333, "grad_norm": 2.3747267723083496, "learning_rate": 9.083180459301535e-05, "loss": 3.3079, "step": 10800 }, { "epoch": 0.7341350727000951, "grad_norm": 2.533458948135376, "learning_rate": 9.08275580921321e-05, "loss": 3.5124, "step": 10805 }, { "epoch": 0.7344747927707569, "grad_norm": 2.1378977298736572, "learning_rate": 9.082331159124882e-05, "loss": 3.473, "step": 10810 }, { "epoch": 0.7348145128414186, "grad_norm": 2.3398780822753906, "learning_rate": 9.081906509036554e-05, "loss": 3.3911, "step": 10815 }, { "epoch": 0.7351542329120805, "grad_norm": 2.040144681930542, "learning_rate": 9.081481858948228e-05, "loss": 3.4196, "step": 10820 }, { "epoch": 0.7354939529827422, "grad_norm": 1.981629729270935, "learning_rate": 9.0810572088599e-05, "loss": 3.4989, "step": 10825 }, { "epoch": 0.735833673053404, "grad_norm": 2.342792272567749, "learning_rate": 9.080632558771572e-05, "loss": 3.4088, "step": 10830 }, { "epoch": 0.7361733931240658, "grad_norm": 2.331023931503296, "learning_rate": 9.080207908683246e-05, "loss": 3.4984, "step": 10835 }, { "epoch": 0.7365131131947276, "grad_norm": 2.042898654937744, "learning_rate": 9.079783258594918e-05, "loss": 3.4063, "step": 10840 }, { "epoch": 0.7368528332653893, "grad_norm": 4.239623546600342, "learning_rate": 9.07935860850659e-05, "loss": 3.3617, "step": 10845 }, { "epoch": 0.737192553336051, "grad_norm": 2.5794546604156494, "learning_rate": 9.078933958418265e-05, "loss": 3.5826, "step": 10850 }, { "epoch": 0.7375322734067129, "grad_norm": 1.8803948163986206, "learning_rate": 9.078509308329936e-05, "loss": 3.3706, "step": 10855 }, { "epoch": 0.7378719934773746, "grad_norm": 1.9100732803344727, "learning_rate": 9.078084658241609e-05, "loss": 3.5624, "step": 10860 }, { "epoch": 0.7382117135480364, "grad_norm": 1.75567626953125, "learning_rate": 9.077660008153283e-05, "loss": 3.6124, "step": 10865 }, { "epoch": 0.7385514336186982, "grad_norm": 2.0038938522338867, "learning_rate": 9.077235358064955e-05, "loss": 3.4023, "step": 10870 }, { "epoch": 0.73889115368936, "grad_norm": 2.361287832260132, "learning_rate": 9.076810707976627e-05, "loss": 3.4801, "step": 10875 }, { "epoch": 0.7392308737600217, "grad_norm": 2.0191619396209717, "learning_rate": 9.076386057888302e-05, "loss": 3.4571, "step": 10880 }, { "epoch": 0.7395705938306835, "grad_norm": 1.9986028671264648, "learning_rate": 9.075961407799973e-05, "loss": 3.4198, "step": 10885 }, { "epoch": 0.7399103139013453, "grad_norm": 2.421579360961914, "learning_rate": 9.075536757711646e-05, "loss": 3.3339, "step": 10890 }, { "epoch": 0.7402500339720071, "grad_norm": 2.5237321853637695, "learning_rate": 9.075112107623319e-05, "loss": 3.4519, "step": 10895 }, { "epoch": 0.7405897540426688, "grad_norm": 4.795865535736084, "learning_rate": 9.074687457534991e-05, "loss": 3.33, "step": 10900 }, { "epoch": 0.7409294741133307, "grad_norm": 3.025031089782715, "learning_rate": 9.074262807446664e-05, "loss": 3.5102, "step": 10905 }, { "epoch": 0.7412691941839924, "grad_norm": 2.056140422821045, "learning_rate": 9.073838157358337e-05, "loss": 3.4382, "step": 10910 }, { "epoch": 0.7416089142546541, "grad_norm": 1.9269989728927612, "learning_rate": 9.07341350727001e-05, "loss": 3.6869, "step": 10915 }, { "epoch": 0.741948634325316, "grad_norm": 2.2858726978302, "learning_rate": 9.072988857181683e-05, "loss": 3.387, "step": 10920 }, { "epoch": 0.7422883543959777, "grad_norm": 2.283271074295044, "learning_rate": 9.072564207093355e-05, "loss": 3.3383, "step": 10925 }, { "epoch": 0.7426280744666395, "grad_norm": 2.2543795108795166, "learning_rate": 9.072139557005028e-05, "loss": 3.4783, "step": 10930 }, { "epoch": 0.7429677945373012, "grad_norm": 2.2956485748291016, "learning_rate": 9.071714906916701e-05, "loss": 3.3174, "step": 10935 }, { "epoch": 0.7433075146079631, "grad_norm": 2.596615791320801, "learning_rate": 9.071290256828374e-05, "loss": 3.3084, "step": 10940 }, { "epoch": 0.7436472346786248, "grad_norm": 1.9923878908157349, "learning_rate": 9.070865606740047e-05, "loss": 3.4221, "step": 10945 }, { "epoch": 0.7439869547492866, "grad_norm": 1.891037106513977, "learning_rate": 9.07044095665172e-05, "loss": 3.7177, "step": 10950 }, { "epoch": 0.7443266748199484, "grad_norm": 1.9931647777557373, "learning_rate": 9.070016306563392e-05, "loss": 3.2509, "step": 10955 }, { "epoch": 0.7446663948906102, "grad_norm": 1.6091890335083008, "learning_rate": 9.069591656475065e-05, "loss": 3.376, "step": 10960 }, { "epoch": 0.7450061149612719, "grad_norm": 2.1172564029693604, "learning_rate": 9.069167006386738e-05, "loss": 3.6299, "step": 10965 }, { "epoch": 0.7453458350319336, "grad_norm": 2.5945401191711426, "learning_rate": 9.06874235629841e-05, "loss": 3.3684, "step": 10970 }, { "epoch": 0.7456855551025955, "grad_norm": 2.4114725589752197, "learning_rate": 9.068317706210083e-05, "loss": 3.4607, "step": 10975 }, { "epoch": 0.7460252751732572, "grad_norm": 2.6435694694519043, "learning_rate": 9.067893056121756e-05, "loss": 3.3231, "step": 10980 }, { "epoch": 0.746364995243919, "grad_norm": 1.8973042964935303, "learning_rate": 9.067468406033429e-05, "loss": 3.4685, "step": 10985 }, { "epoch": 0.7467047153145808, "grad_norm": 2.3614795207977295, "learning_rate": 9.067043755945102e-05, "loss": 3.2778, "step": 10990 }, { "epoch": 0.7470444353852426, "grad_norm": 2.1101648807525635, "learning_rate": 9.066619105856775e-05, "loss": 3.2096, "step": 10995 }, { "epoch": 0.7473841554559043, "grad_norm": 1.946252703666687, "learning_rate": 9.066194455768447e-05, "loss": 3.2057, "step": 11000 }, { "epoch": 0.7477238755265662, "grad_norm": 2.2493433952331543, "learning_rate": 9.06576980568012e-05, "loss": 3.2899, "step": 11005 }, { "epoch": 0.7480635955972279, "grad_norm": 2.2197651863098145, "learning_rate": 9.065345155591793e-05, "loss": 3.3169, "step": 11010 }, { "epoch": 0.7484033156678896, "grad_norm": 1.8556232452392578, "learning_rate": 9.064920505503466e-05, "loss": 3.0973, "step": 11015 }, { "epoch": 0.7487430357385514, "grad_norm": 2.3237788677215576, "learning_rate": 9.064495855415139e-05, "loss": 3.4265, "step": 11020 }, { "epoch": 0.7490827558092132, "grad_norm": 2.0329315662384033, "learning_rate": 9.06407120532681e-05, "loss": 3.4525, "step": 11025 }, { "epoch": 0.749422475879875, "grad_norm": 2.1035406589508057, "learning_rate": 9.063646555238484e-05, "loss": 3.4395, "step": 11030 }, { "epoch": 0.7497621959505367, "grad_norm": 2.091654062271118, "learning_rate": 9.063221905150157e-05, "loss": 3.4969, "step": 11035 }, { "epoch": 0.7501019160211986, "grad_norm": 2.510631561279297, "learning_rate": 9.062797255061828e-05, "loss": 3.5577, "step": 11040 }, { "epoch": 0.7504416360918603, "grad_norm": 2.022939920425415, "learning_rate": 9.062372604973503e-05, "loss": 3.4426, "step": 11045 }, { "epoch": 0.7507813561625221, "grad_norm": 2.287958860397339, "learning_rate": 9.061947954885175e-05, "loss": 3.4774, "step": 11050 }, { "epoch": 0.7511210762331838, "grad_norm": 2.3949368000030518, "learning_rate": 9.061523304796847e-05, "loss": 3.1168, "step": 11055 }, { "epoch": 0.7514607963038457, "grad_norm": 2.22428297996521, "learning_rate": 9.061098654708521e-05, "loss": 3.5221, "step": 11060 }, { "epoch": 0.7518005163745074, "grad_norm": 2.5449883937835693, "learning_rate": 9.060674004620194e-05, "loss": 3.587, "step": 11065 }, { "epoch": 0.7521402364451691, "grad_norm": 2.7419300079345703, "learning_rate": 9.060249354531865e-05, "loss": 3.3353, "step": 11070 }, { "epoch": 0.752479956515831, "grad_norm": 1.9885202646255493, "learning_rate": 9.05982470444354e-05, "loss": 3.4969, "step": 11075 }, { "epoch": 0.7528196765864927, "grad_norm": 1.9539488554000854, "learning_rate": 9.059400054355212e-05, "loss": 3.4032, "step": 11080 }, { "epoch": 0.7531593966571545, "grad_norm": 2.6884703636169434, "learning_rate": 9.058975404266885e-05, "loss": 3.2384, "step": 11085 }, { "epoch": 0.7534991167278163, "grad_norm": 3.0844130516052246, "learning_rate": 9.058550754178558e-05, "loss": 3.4376, "step": 11090 }, { "epoch": 0.7538388367984781, "grad_norm": 1.7788217067718506, "learning_rate": 9.058126104090229e-05, "loss": 3.5911, "step": 11095 }, { "epoch": 0.7541785568691398, "grad_norm": 1.9761929512023926, "learning_rate": 9.057701454001903e-05, "loss": 3.2833, "step": 11100 }, { "epoch": 0.7545182769398016, "grad_norm": 1.8782196044921875, "learning_rate": 9.057276803913576e-05, "loss": 3.4258, "step": 11105 }, { "epoch": 0.7548579970104634, "grad_norm": 1.9788384437561035, "learning_rate": 9.056852153825248e-05, "loss": 3.367, "step": 11110 }, { "epoch": 0.7551977170811252, "grad_norm": 2.1012284755706787, "learning_rate": 9.056427503736922e-05, "loss": 3.3178, "step": 11115 }, { "epoch": 0.7555374371517869, "grad_norm": 2.2467188835144043, "learning_rate": 9.056002853648595e-05, "loss": 3.3999, "step": 11120 }, { "epoch": 0.7558771572224487, "grad_norm": 1.7547935247421265, "learning_rate": 9.055578203560266e-05, "loss": 3.5821, "step": 11125 }, { "epoch": 0.7562168772931105, "grad_norm": 1.9440996646881104, "learning_rate": 9.05515355347194e-05, "loss": 3.519, "step": 11130 }, { "epoch": 0.7565565973637722, "grad_norm": 2.0020744800567627, "learning_rate": 9.054728903383613e-05, "loss": 3.5756, "step": 11135 }, { "epoch": 0.756896317434434, "grad_norm": 1.671148419380188, "learning_rate": 9.054304253295284e-05, "loss": 3.4798, "step": 11140 }, { "epoch": 0.7572360375050958, "grad_norm": 2.3796920776367188, "learning_rate": 9.053879603206959e-05, "loss": 3.2064, "step": 11145 }, { "epoch": 0.7575757575757576, "grad_norm": 2.4899344444274902, "learning_rate": 9.053454953118631e-05, "loss": 3.6966, "step": 11150 }, { "epoch": 0.7579154776464193, "grad_norm": 2.257075071334839, "learning_rate": 9.053030303030303e-05, "loss": 3.3965, "step": 11155 }, { "epoch": 0.7582551977170812, "grad_norm": 2.018040180206299, "learning_rate": 9.052605652941977e-05, "loss": 3.4178, "step": 11160 }, { "epoch": 0.7585949177877429, "grad_norm": 2.3814074993133545, "learning_rate": 9.05218100285365e-05, "loss": 3.1839, "step": 11165 }, { "epoch": 0.7589346378584046, "grad_norm": 1.9290012121200562, "learning_rate": 9.051756352765321e-05, "loss": 3.2344, "step": 11170 }, { "epoch": 0.7592743579290665, "grad_norm": 2.2735488414764404, "learning_rate": 9.051331702676995e-05, "loss": 3.4017, "step": 11175 }, { "epoch": 0.7596140779997282, "grad_norm": 2.5904603004455566, "learning_rate": 9.050907052588667e-05, "loss": 3.2168, "step": 11180 }, { "epoch": 0.75995379807039, "grad_norm": 1.96474289894104, "learning_rate": 9.05048240250034e-05, "loss": 3.2424, "step": 11185 }, { "epoch": 0.7602935181410517, "grad_norm": 1.849856972694397, "learning_rate": 9.050057752412014e-05, "loss": 3.4237, "step": 11190 }, { "epoch": 0.7606332382117136, "grad_norm": 2.0133066177368164, "learning_rate": 9.049633102323685e-05, "loss": 3.5238, "step": 11195 }, { "epoch": 0.7609729582823753, "grad_norm": 2.219210147857666, "learning_rate": 9.049208452235358e-05, "loss": 3.2749, "step": 11200 }, { "epoch": 0.7613126783530371, "grad_norm": 2.412020206451416, "learning_rate": 9.048783802147032e-05, "loss": 3.4784, "step": 11205 }, { "epoch": 0.7616523984236989, "grad_norm": 4.569281101226807, "learning_rate": 9.048359152058704e-05, "loss": 3.4909, "step": 11210 }, { "epoch": 0.7619921184943607, "grad_norm": 2.299633502960205, "learning_rate": 9.047934501970376e-05, "loss": 3.303, "step": 11215 }, { "epoch": 0.7623318385650224, "grad_norm": 1.8916287422180176, "learning_rate": 9.04750985188205e-05, "loss": 3.5023, "step": 11220 }, { "epoch": 0.7626715586356841, "grad_norm": 1.9439961910247803, "learning_rate": 9.047085201793722e-05, "loss": 3.3326, "step": 11225 }, { "epoch": 0.763011278706346, "grad_norm": 1.908080816268921, "learning_rate": 9.046660551705395e-05, "loss": 3.5699, "step": 11230 }, { "epoch": 0.7633509987770077, "grad_norm": 1.8485513925552368, "learning_rate": 9.046235901617069e-05, "loss": 3.4751, "step": 11235 }, { "epoch": 0.7636907188476695, "grad_norm": 2.3641703128814697, "learning_rate": 9.04581125152874e-05, "loss": 3.3947, "step": 11240 }, { "epoch": 0.7640304389183313, "grad_norm": 2.173023223876953, "learning_rate": 9.045386601440413e-05, "loss": 3.1465, "step": 11245 }, { "epoch": 0.7643701589889931, "grad_norm": 2.2282724380493164, "learning_rate": 9.044961951352086e-05, "loss": 3.3554, "step": 11250 }, { "epoch": 0.7647098790596548, "grad_norm": 1.8429909944534302, "learning_rate": 9.044537301263759e-05, "loss": 3.6429, "step": 11255 }, { "epoch": 0.7650495991303167, "grad_norm": 1.8007175922393799, "learning_rate": 9.044112651175432e-05, "loss": 3.509, "step": 11260 }, { "epoch": 0.7653893192009784, "grad_norm": 2.144818067550659, "learning_rate": 9.043688001087104e-05, "loss": 3.3788, "step": 11265 }, { "epoch": 0.7657290392716402, "grad_norm": 2.8039710521698, "learning_rate": 9.043263350998777e-05, "loss": 3.4663, "step": 11270 }, { "epoch": 0.7660687593423019, "grad_norm": 2.3489575386047363, "learning_rate": 9.04283870091045e-05, "loss": 3.4442, "step": 11275 }, { "epoch": 0.7664084794129638, "grad_norm": 2.214536428451538, "learning_rate": 9.042414050822123e-05, "loss": 3.3436, "step": 11280 }, { "epoch": 0.7667481994836255, "grad_norm": 2.0688140392303467, "learning_rate": 9.041989400733796e-05, "loss": 3.2298, "step": 11285 }, { "epoch": 0.7670879195542872, "grad_norm": 2.198549747467041, "learning_rate": 9.041564750645468e-05, "loss": 3.3888, "step": 11290 }, { "epoch": 0.7674276396249491, "grad_norm": 1.9206246137619019, "learning_rate": 9.041140100557141e-05, "loss": 3.301, "step": 11295 }, { "epoch": 0.7677673596956108, "grad_norm": 1.651972770690918, "learning_rate": 9.040715450468814e-05, "loss": 3.4455, "step": 11300 }, { "epoch": 0.7681070797662726, "grad_norm": 2.2197558879852295, "learning_rate": 9.040290800380487e-05, "loss": 3.2435, "step": 11305 }, { "epoch": 0.7684467998369343, "grad_norm": 2.584726572036743, "learning_rate": 9.03986615029216e-05, "loss": 3.4971, "step": 11310 }, { "epoch": 0.7687865199075962, "grad_norm": 2.4776198863983154, "learning_rate": 9.039441500203832e-05, "loss": 3.3481, "step": 11315 }, { "epoch": 0.7691262399782579, "grad_norm": 1.9876595735549927, "learning_rate": 9.039016850115505e-05, "loss": 3.5562, "step": 11320 }, { "epoch": 0.7694659600489197, "grad_norm": 2.591463088989258, "learning_rate": 9.038592200027178e-05, "loss": 3.335, "step": 11325 }, { "epoch": 0.7698056801195815, "grad_norm": 2.3524725437164307, "learning_rate": 9.038167549938851e-05, "loss": 3.2752, "step": 11330 }, { "epoch": 0.7701454001902432, "grad_norm": 3.10605525970459, "learning_rate": 9.037742899850524e-05, "loss": 3.2207, "step": 11335 }, { "epoch": 0.770485120260905, "grad_norm": 2.462714672088623, "learning_rate": 9.037318249762196e-05, "loss": 3.1319, "step": 11340 }, { "epoch": 0.7708248403315668, "grad_norm": 2.4269654750823975, "learning_rate": 9.036893599673869e-05, "loss": 3.4491, "step": 11345 }, { "epoch": 0.7711645604022286, "grad_norm": 1.8146681785583496, "learning_rate": 9.036468949585542e-05, "loss": 3.4463, "step": 11350 }, { "epoch": 0.7715042804728903, "grad_norm": 2.8521134853363037, "learning_rate": 9.036044299497215e-05, "loss": 3.3275, "step": 11355 }, { "epoch": 0.7718440005435521, "grad_norm": 2.598526954650879, "learning_rate": 9.035619649408888e-05, "loss": 3.7912, "step": 11360 }, { "epoch": 0.7721837206142139, "grad_norm": 1.7714214324951172, "learning_rate": 9.03519499932056e-05, "loss": 3.4521, "step": 11365 }, { "epoch": 0.7725234406848757, "grad_norm": 2.496175527572632, "learning_rate": 9.034770349232233e-05, "loss": 3.6247, "step": 11370 }, { "epoch": 0.7728631607555374, "grad_norm": 2.84682297706604, "learning_rate": 9.034345699143906e-05, "loss": 3.2996, "step": 11375 }, { "epoch": 0.7732028808261993, "grad_norm": 2.184936761856079, "learning_rate": 9.033921049055578e-05, "loss": 3.2265, "step": 11380 }, { "epoch": 0.773542600896861, "grad_norm": 1.8503079414367676, "learning_rate": 9.033496398967252e-05, "loss": 3.4367, "step": 11385 }, { "epoch": 0.7738823209675227, "grad_norm": 2.2539453506469727, "learning_rate": 9.033071748878924e-05, "loss": 3.5811, "step": 11390 }, { "epoch": 0.7742220410381845, "grad_norm": 2.4276680946350098, "learning_rate": 9.032647098790596e-05, "loss": 3.4865, "step": 11395 }, { "epoch": 0.7745617611088463, "grad_norm": 2.0705325603485107, "learning_rate": 9.03222244870227e-05, "loss": 3.3396, "step": 11400 }, { "epoch": 0.7749014811795081, "grad_norm": 2.168041706085205, "learning_rate": 9.031797798613943e-05, "loss": 3.5976, "step": 11405 }, { "epoch": 0.7752412012501698, "grad_norm": 2.1073849201202393, "learning_rate": 9.031373148525614e-05, "loss": 3.7443, "step": 11410 }, { "epoch": 0.7755809213208317, "grad_norm": 2.7408745288848877, "learning_rate": 9.030948498437288e-05, "loss": 3.5112, "step": 11415 }, { "epoch": 0.7759206413914934, "grad_norm": 2.7794125080108643, "learning_rate": 9.030523848348961e-05, "loss": 3.5839, "step": 11420 }, { "epoch": 0.7762603614621552, "grad_norm": 2.4596080780029297, "learning_rate": 9.030099198260634e-05, "loss": 3.1659, "step": 11425 }, { "epoch": 0.776600081532817, "grad_norm": 2.173229217529297, "learning_rate": 9.029674548172307e-05, "loss": 3.1308, "step": 11430 }, { "epoch": 0.7769398016034788, "grad_norm": 1.906375765800476, "learning_rate": 9.02924989808398e-05, "loss": 3.3156, "step": 11435 }, { "epoch": 0.7772795216741405, "grad_norm": 2.454479217529297, "learning_rate": 9.028825247995652e-05, "loss": 3.4941, "step": 11440 }, { "epoch": 0.7776192417448022, "grad_norm": 2.167015552520752, "learning_rate": 9.028400597907325e-05, "loss": 3.3087, "step": 11445 }, { "epoch": 0.7779589618154641, "grad_norm": 2.4903719425201416, "learning_rate": 9.027975947818997e-05, "loss": 3.4101, "step": 11450 }, { "epoch": 0.7782986818861258, "grad_norm": 2.269793748855591, "learning_rate": 9.027551297730671e-05, "loss": 3.5565, "step": 11455 }, { "epoch": 0.7786384019567876, "grad_norm": 1.8844324350357056, "learning_rate": 9.027126647642344e-05, "loss": 3.1795, "step": 11460 }, { "epoch": 0.7789781220274494, "grad_norm": 1.9648065567016602, "learning_rate": 9.026701997554015e-05, "loss": 3.5654, "step": 11465 }, { "epoch": 0.7793178420981112, "grad_norm": 2.3887670040130615, "learning_rate": 9.026277347465689e-05, "loss": 3.4269, "step": 11470 }, { "epoch": 0.7796575621687729, "grad_norm": 2.103919506072998, "learning_rate": 9.025852697377362e-05, "loss": 3.4802, "step": 11475 }, { "epoch": 0.7799972822394347, "grad_norm": 2.0800790786743164, "learning_rate": 9.025428047289034e-05, "loss": 3.4395, "step": 11480 }, { "epoch": 0.7803370023100965, "grad_norm": 1.862479329109192, "learning_rate": 9.025003397200708e-05, "loss": 3.3586, "step": 11485 }, { "epoch": 0.7806767223807582, "grad_norm": 2.47119402885437, "learning_rate": 9.02457874711238e-05, "loss": 3.278, "step": 11490 }, { "epoch": 0.78101644245142, "grad_norm": 1.9219058752059937, "learning_rate": 9.024154097024052e-05, "loss": 3.4069, "step": 11495 }, { "epoch": 0.7813561625220818, "grad_norm": 2.386284351348877, "learning_rate": 9.023729446935726e-05, "loss": 3.0464, "step": 11500 }, { "epoch": 0.7816958825927436, "grad_norm": 1.8137407302856445, "learning_rate": 9.023304796847399e-05, "loss": 3.3531, "step": 11505 }, { "epoch": 0.7820356026634053, "grad_norm": 2.1406009197235107, "learning_rate": 9.02288014675907e-05, "loss": 3.6528, "step": 11510 }, { "epoch": 0.7823753227340672, "grad_norm": 2.3624815940856934, "learning_rate": 9.022455496670744e-05, "loss": 3.486, "step": 11515 }, { "epoch": 0.7827150428047289, "grad_norm": 2.1248888969421387, "learning_rate": 9.022030846582416e-05, "loss": 3.5324, "step": 11520 }, { "epoch": 0.7830547628753907, "grad_norm": 2.0932772159576416, "learning_rate": 9.021606196494089e-05, "loss": 3.6139, "step": 11525 }, { "epoch": 0.7833944829460524, "grad_norm": 2.263762950897217, "learning_rate": 9.021181546405763e-05, "loss": 3.3103, "step": 11530 }, { "epoch": 0.7837342030167143, "grad_norm": 1.8357583284378052, "learning_rate": 9.020756896317434e-05, "loss": 3.2178, "step": 11535 }, { "epoch": 0.784073923087376, "grad_norm": 2.109544038772583, "learning_rate": 9.020332246229107e-05, "loss": 3.6964, "step": 11540 }, { "epoch": 0.7844136431580377, "grad_norm": 2.2789525985717773, "learning_rate": 9.019907596140781e-05, "loss": 3.3875, "step": 11545 }, { "epoch": 0.7847533632286996, "grad_norm": 1.6911152601242065, "learning_rate": 9.019482946052453e-05, "loss": 3.5005, "step": 11550 }, { "epoch": 0.7850930832993613, "grad_norm": 2.174966812133789, "learning_rate": 9.019058295964126e-05, "loss": 3.3693, "step": 11555 }, { "epoch": 0.7854328033700231, "grad_norm": 2.047849178314209, "learning_rate": 9.0186336458758e-05, "loss": 3.6064, "step": 11560 }, { "epoch": 0.7857725234406848, "grad_norm": 2.1291744709014893, "learning_rate": 9.018208995787471e-05, "loss": 3.4093, "step": 11565 }, { "epoch": 0.7861122435113467, "grad_norm": 2.612980842590332, "learning_rate": 9.017784345699144e-05, "loss": 3.1426, "step": 11570 }, { "epoch": 0.7864519635820084, "grad_norm": 2.067013740539551, "learning_rate": 9.017359695610818e-05, "loss": 3.3052, "step": 11575 }, { "epoch": 0.7867916836526702, "grad_norm": 2.4694230556488037, "learning_rate": 9.01693504552249e-05, "loss": 3.7424, "step": 11580 }, { "epoch": 0.787131403723332, "grad_norm": 1.8251795768737793, "learning_rate": 9.016510395434162e-05, "loss": 3.3368, "step": 11585 }, { "epoch": 0.7874711237939938, "grad_norm": 1.5800342559814453, "learning_rate": 9.016085745345836e-05, "loss": 3.4217, "step": 11590 }, { "epoch": 0.7878108438646555, "grad_norm": 2.0962321758270264, "learning_rate": 9.015661095257508e-05, "loss": 3.2964, "step": 11595 }, { "epoch": 0.7881505639353173, "grad_norm": 1.998167634010315, "learning_rate": 9.015236445169181e-05, "loss": 3.2976, "step": 11600 }, { "epoch": 0.7884902840059791, "grad_norm": 2.041581153869629, "learning_rate": 9.014811795080854e-05, "loss": 3.587, "step": 11605 }, { "epoch": 0.7888300040766408, "grad_norm": 2.483847141265869, "learning_rate": 9.014387144992526e-05, "loss": 3.5872, "step": 11610 }, { "epoch": 0.7891697241473026, "grad_norm": 15.90668773651123, "learning_rate": 9.013962494904199e-05, "loss": 3.2744, "step": 11615 }, { "epoch": 0.7895094442179644, "grad_norm": 2.419654369354248, "learning_rate": 9.013537844815872e-05, "loss": 3.3145, "step": 11620 }, { "epoch": 0.7898491642886262, "grad_norm": 2.0530455112457275, "learning_rate": 9.013113194727545e-05, "loss": 3.2514, "step": 11625 }, { "epoch": 0.7901888843592879, "grad_norm": 2.198951482772827, "learning_rate": 9.012688544639218e-05, "loss": 3.4445, "step": 11630 }, { "epoch": 0.7905286044299498, "grad_norm": 2.1728460788726807, "learning_rate": 9.01226389455089e-05, "loss": 3.3801, "step": 11635 }, { "epoch": 0.7908683245006115, "grad_norm": 1.963135004043579, "learning_rate": 9.011839244462563e-05, "loss": 3.5995, "step": 11640 }, { "epoch": 0.7912080445712733, "grad_norm": 2.1429896354675293, "learning_rate": 9.011414594374236e-05, "loss": 3.1804, "step": 11645 }, { "epoch": 0.791547764641935, "grad_norm": 2.343388557434082, "learning_rate": 9.010989944285909e-05, "loss": 3.4809, "step": 11650 }, { "epoch": 0.7918874847125968, "grad_norm": 2.60878849029541, "learning_rate": 9.010565294197582e-05, "loss": 3.475, "step": 11655 }, { "epoch": 0.7922272047832586, "grad_norm": 2.281998872756958, "learning_rate": 9.010140644109254e-05, "loss": 3.4948, "step": 11660 }, { "epoch": 0.7925669248539203, "grad_norm": 2.4443137645721436, "learning_rate": 9.009715994020927e-05, "loss": 3.3156, "step": 11665 }, { "epoch": 0.7929066449245822, "grad_norm": 2.0320675373077393, "learning_rate": 9.0092913439326e-05, "loss": 3.4341, "step": 11670 }, { "epoch": 0.7932463649952439, "grad_norm": 2.0136067867279053, "learning_rate": 9.008866693844273e-05, "loss": 3.3965, "step": 11675 }, { "epoch": 0.7935860850659057, "grad_norm": 2.0364115238189697, "learning_rate": 9.008442043755946e-05, "loss": 3.4643, "step": 11680 }, { "epoch": 0.7939258051365675, "grad_norm": 2.2068874835968018, "learning_rate": 9.008017393667618e-05, "loss": 3.7331, "step": 11685 }, { "epoch": 0.7942655252072293, "grad_norm": 2.0830392837524414, "learning_rate": 9.007592743579291e-05, "loss": 3.3041, "step": 11690 }, { "epoch": 0.794605245277891, "grad_norm": 2.5593206882476807, "learning_rate": 9.007168093490964e-05, "loss": 3.3785, "step": 11695 }, { "epoch": 0.7949449653485527, "grad_norm": 1.8627713918685913, "learning_rate": 9.006743443402637e-05, "loss": 3.2584, "step": 11700 }, { "epoch": 0.7952846854192146, "grad_norm": 4.073328971862793, "learning_rate": 9.00631879331431e-05, "loss": 3.3095, "step": 11705 }, { "epoch": 0.7956244054898763, "grad_norm": 2.2592155933380127, "learning_rate": 9.005894143225982e-05, "loss": 3.1838, "step": 11710 }, { "epoch": 0.7959641255605381, "grad_norm": 1.8784817457199097, "learning_rate": 9.005469493137655e-05, "loss": 3.5494, "step": 11715 }, { "epoch": 0.7963038456311999, "grad_norm": 2.0339643955230713, "learning_rate": 9.005044843049327e-05, "loss": 3.4768, "step": 11720 }, { "epoch": 0.7966435657018617, "grad_norm": 2.1371805667877197, "learning_rate": 9.004620192961001e-05, "loss": 3.3907, "step": 11725 }, { "epoch": 0.7969832857725234, "grad_norm": 2.3924808502197266, "learning_rate": 9.004195542872674e-05, "loss": 3.5149, "step": 11730 }, { "epoch": 0.7973230058431852, "grad_norm": 2.4105582237243652, "learning_rate": 9.003770892784345e-05, "loss": 3.4412, "step": 11735 }, { "epoch": 0.797662725913847, "grad_norm": 2.1838505268096924, "learning_rate": 9.003346242696019e-05, "loss": 3.1949, "step": 11740 }, { "epoch": 0.7980024459845088, "grad_norm": 2.1127848625183105, "learning_rate": 9.002921592607692e-05, "loss": 3.3228, "step": 11745 }, { "epoch": 0.7983421660551705, "grad_norm": 2.802795648574829, "learning_rate": 9.002496942519363e-05, "loss": 3.519, "step": 11750 }, { "epoch": 0.7986818861258324, "grad_norm": 2.131911277770996, "learning_rate": 9.002072292431038e-05, "loss": 3.1634, "step": 11755 }, { "epoch": 0.7990216061964941, "grad_norm": 2.7319459915161133, "learning_rate": 9.00164764234271e-05, "loss": 3.4152, "step": 11760 }, { "epoch": 0.7993613262671558, "grad_norm": 2.0982625484466553, "learning_rate": 9.001222992254383e-05, "loss": 3.4346, "step": 11765 }, { "epoch": 0.7997010463378177, "grad_norm": 1.7234692573547363, "learning_rate": 9.000798342166056e-05, "loss": 3.4908, "step": 11770 }, { "epoch": 0.8000407664084794, "grad_norm": 1.9733598232269287, "learning_rate": 9.000373692077729e-05, "loss": 3.3127, "step": 11775 }, { "epoch": 0.8003804864791412, "grad_norm": 2.0834248065948486, "learning_rate": 8.999949041989402e-05, "loss": 3.6224, "step": 11780 }, { "epoch": 0.8007202065498029, "grad_norm": 2.2089688777923584, "learning_rate": 8.999524391901074e-05, "loss": 3.5807, "step": 11785 }, { "epoch": 0.8010599266204648, "grad_norm": 1.7381982803344727, "learning_rate": 8.999099741812747e-05, "loss": 3.1458, "step": 11790 }, { "epoch": 0.8013996466911265, "grad_norm": 1.9252065420150757, "learning_rate": 8.99867509172442e-05, "loss": 3.4913, "step": 11795 }, { "epoch": 0.8017393667617883, "grad_norm": 2.1850547790527344, "learning_rate": 8.998250441636093e-05, "loss": 3.5705, "step": 11800 }, { "epoch": 0.8020790868324501, "grad_norm": 2.3850276470184326, "learning_rate": 8.997825791547764e-05, "loss": 3.6488, "step": 11805 }, { "epoch": 0.8024188069031118, "grad_norm": 2.362490177154541, "learning_rate": 8.997401141459438e-05, "loss": 3.1729, "step": 11810 }, { "epoch": 0.8027585269737736, "grad_norm": 2.5183753967285156, "learning_rate": 8.996976491371111e-05, "loss": 3.3475, "step": 11815 }, { "epoch": 0.8030982470444353, "grad_norm": 2.144824266433716, "learning_rate": 8.996551841282783e-05, "loss": 3.4031, "step": 11820 }, { "epoch": 0.8034379671150972, "grad_norm": 2.101644277572632, "learning_rate": 8.996127191194457e-05, "loss": 3.6251, "step": 11825 }, { "epoch": 0.8037776871857589, "grad_norm": 1.8469126224517822, "learning_rate": 8.99570254110613e-05, "loss": 3.4567, "step": 11830 }, { "epoch": 0.8041174072564207, "grad_norm": 2.1471898555755615, "learning_rate": 8.995277891017801e-05, "loss": 3.5695, "step": 11835 }, { "epoch": 0.8044571273270825, "grad_norm": 2.626678228378296, "learning_rate": 8.994853240929475e-05, "loss": 3.5137, "step": 11840 }, { "epoch": 0.8047968473977443, "grad_norm": 2.2975993156433105, "learning_rate": 8.994428590841148e-05, "loss": 3.3626, "step": 11845 }, { "epoch": 0.805136567468406, "grad_norm": 2.5563313961029053, "learning_rate": 8.99400394075282e-05, "loss": 3.5137, "step": 11850 }, { "epoch": 0.8054762875390679, "grad_norm": 3.081108808517456, "learning_rate": 8.993579290664494e-05, "loss": 3.2473, "step": 11855 }, { "epoch": 0.8058160076097296, "grad_norm": 2.4409470558166504, "learning_rate": 8.993154640576166e-05, "loss": 3.2571, "step": 11860 }, { "epoch": 0.8061557276803913, "grad_norm": 2.2482988834381104, "learning_rate": 8.992729990487838e-05, "loss": 3.3227, "step": 11865 }, { "epoch": 0.8064954477510531, "grad_norm": 1.7942568063735962, "learning_rate": 8.992305340399512e-05, "loss": 3.5038, "step": 11870 }, { "epoch": 0.8068351678217149, "grad_norm": 1.990861415863037, "learning_rate": 8.991880690311183e-05, "loss": 3.4663, "step": 11875 }, { "epoch": 0.8071748878923767, "grad_norm": 2.294863700866699, "learning_rate": 8.991456040222856e-05, "loss": 2.9932, "step": 11880 }, { "epoch": 0.8075146079630384, "grad_norm": 2.1445112228393555, "learning_rate": 8.99103139013453e-05, "loss": 3.4847, "step": 11885 }, { "epoch": 0.8078543280337003, "grad_norm": 2.0598299503326416, "learning_rate": 8.990606740046202e-05, "loss": 3.1814, "step": 11890 }, { "epoch": 0.808194048104362, "grad_norm": 1.9853487014770508, "learning_rate": 8.990182089957875e-05, "loss": 3.4542, "step": 11895 }, { "epoch": 0.8085337681750238, "grad_norm": 2.471073865890503, "learning_rate": 8.989757439869549e-05, "loss": 3.2959, "step": 11900 }, { "epoch": 0.8088734882456855, "grad_norm": 1.9424799680709839, "learning_rate": 8.98933278978122e-05, "loss": 3.3625, "step": 11905 }, { "epoch": 0.8092132083163474, "grad_norm": 1.9726063013076782, "learning_rate": 8.988908139692893e-05, "loss": 3.5501, "step": 11910 }, { "epoch": 0.8095529283870091, "grad_norm": 1.877665638923645, "learning_rate": 8.988483489604567e-05, "loss": 3.478, "step": 11915 }, { "epoch": 0.8098926484576708, "grad_norm": 1.8277817964553833, "learning_rate": 8.988058839516239e-05, "loss": 3.187, "step": 11920 }, { "epoch": 0.8102323685283327, "grad_norm": 1.7110615968704224, "learning_rate": 8.987634189427911e-05, "loss": 3.4653, "step": 11925 }, { "epoch": 0.8105720885989944, "grad_norm": 1.9001785516738892, "learning_rate": 8.987209539339586e-05, "loss": 3.4926, "step": 11930 }, { "epoch": 0.8109118086696562, "grad_norm": 1.8427282571792603, "learning_rate": 8.986784889251257e-05, "loss": 3.4295, "step": 11935 }, { "epoch": 0.811251528740318, "grad_norm": 1.6634604930877686, "learning_rate": 8.98636023916293e-05, "loss": 3.2924, "step": 11940 }, { "epoch": 0.8115912488109798, "grad_norm": 1.9675753116607666, "learning_rate": 8.985935589074603e-05, "loss": 3.1548, "step": 11945 }, { "epoch": 0.8119309688816415, "grad_norm": 2.0560007095336914, "learning_rate": 8.985510938986275e-05, "loss": 3.5211, "step": 11950 }, { "epoch": 0.8122706889523033, "grad_norm": 2.9898903369903564, "learning_rate": 8.985086288897948e-05, "loss": 3.295, "step": 11955 }, { "epoch": 0.8126104090229651, "grad_norm": 2.198967933654785, "learning_rate": 8.984661638809621e-05, "loss": 3.4186, "step": 11960 }, { "epoch": 0.8129501290936268, "grad_norm": 1.7902920246124268, "learning_rate": 8.984236988721294e-05, "loss": 3.3827, "step": 11965 }, { "epoch": 0.8132898491642886, "grad_norm": 2.6019420623779297, "learning_rate": 8.983812338632967e-05, "loss": 3.385, "step": 11970 }, { "epoch": 0.8136295692349504, "grad_norm": 2.494234800338745, "learning_rate": 8.98338768854464e-05, "loss": 3.5925, "step": 11975 }, { "epoch": 0.8139692893056122, "grad_norm": 2.5273165702819824, "learning_rate": 8.982963038456312e-05, "loss": 3.183, "step": 11980 }, { "epoch": 0.8143090093762739, "grad_norm": 2.065208911895752, "learning_rate": 8.982538388367985e-05, "loss": 3.2692, "step": 11985 }, { "epoch": 0.8146487294469357, "grad_norm": 2.5276691913604736, "learning_rate": 8.982113738279658e-05, "loss": 3.6195, "step": 11990 }, { "epoch": 0.8149884495175975, "grad_norm": 2.551149845123291, "learning_rate": 8.98168908819133e-05, "loss": 3.122, "step": 11995 }, { "epoch": 0.8153281695882593, "grad_norm": 2.36954402923584, "learning_rate": 8.981264438103003e-05, "loss": 3.4339, "step": 12000 }, { "epoch": 0.815667889658921, "grad_norm": 2.6555187702178955, "learning_rate": 8.980839788014676e-05, "loss": 3.3592, "step": 12005 }, { "epoch": 0.8160076097295829, "grad_norm": 2.415350914001465, "learning_rate": 8.980415137926349e-05, "loss": 3.4568, "step": 12010 }, { "epoch": 0.8163473298002446, "grad_norm": 2.061955690383911, "learning_rate": 8.979990487838022e-05, "loss": 3.6163, "step": 12015 }, { "epoch": 0.8166870498709063, "grad_norm": 2.0661327838897705, "learning_rate": 8.979565837749695e-05, "loss": 3.3972, "step": 12020 }, { "epoch": 0.8170267699415682, "grad_norm": 1.9909498691558838, "learning_rate": 8.979141187661367e-05, "loss": 3.0278, "step": 12025 }, { "epoch": 0.8173664900122299, "grad_norm": 1.9194748401641846, "learning_rate": 8.97871653757304e-05, "loss": 3.3678, "step": 12030 }, { "epoch": 0.8177062100828917, "grad_norm": 2.6531505584716797, "learning_rate": 8.978291887484713e-05, "loss": 3.4254, "step": 12035 }, { "epoch": 0.8180459301535534, "grad_norm": 1.943596601486206, "learning_rate": 8.977867237396386e-05, "loss": 3.3382, "step": 12040 }, { "epoch": 0.8183856502242153, "grad_norm": 1.5993430614471436, "learning_rate": 8.977442587308059e-05, "loss": 3.7341, "step": 12045 }, { "epoch": 0.818725370294877, "grad_norm": 1.6929173469543457, "learning_rate": 8.977017937219731e-05, "loss": 3.4217, "step": 12050 }, { "epoch": 0.8190650903655388, "grad_norm": 2.355818748474121, "learning_rate": 8.976593287131404e-05, "loss": 3.4165, "step": 12055 }, { "epoch": 0.8194048104362006, "grad_norm": 2.1425981521606445, "learning_rate": 8.976168637043077e-05, "loss": 3.3265, "step": 12060 }, { "epoch": 0.8197445305068624, "grad_norm": 2.230255126953125, "learning_rate": 8.97574398695475e-05, "loss": 3.4455, "step": 12065 }, { "epoch": 0.8200842505775241, "grad_norm": 3.521151542663574, "learning_rate": 8.975319336866423e-05, "loss": 3.4047, "step": 12070 }, { "epoch": 0.8204239706481858, "grad_norm": 2.098658323287964, "learning_rate": 8.974894686778094e-05, "loss": 3.5767, "step": 12075 }, { "epoch": 0.8207636907188477, "grad_norm": 2.61434006690979, "learning_rate": 8.974470036689768e-05, "loss": 3.2252, "step": 12080 }, { "epoch": 0.8211034107895094, "grad_norm": 2.139094352722168, "learning_rate": 8.974045386601441e-05, "loss": 3.4537, "step": 12085 }, { "epoch": 0.8214431308601712, "grad_norm": 2.295015335083008, "learning_rate": 8.973620736513113e-05, "loss": 3.6022, "step": 12090 }, { "epoch": 0.821782850930833, "grad_norm": 2.326263189315796, "learning_rate": 8.973196086424787e-05, "loss": 3.572, "step": 12095 }, { "epoch": 0.8221225710014948, "grad_norm": 2.0316975116729736, "learning_rate": 8.97277143633646e-05, "loss": 3.4458, "step": 12100 }, { "epoch": 0.8224622910721565, "grad_norm": 2.1909048557281494, "learning_rate": 8.972346786248132e-05, "loss": 3.2895, "step": 12105 }, { "epoch": 0.8228020111428184, "grad_norm": 2.148266077041626, "learning_rate": 8.971922136159805e-05, "loss": 3.4308, "step": 12110 }, { "epoch": 0.8231417312134801, "grad_norm": 2.0468289852142334, "learning_rate": 8.971497486071478e-05, "loss": 3.3819, "step": 12115 }, { "epoch": 0.8234814512841419, "grad_norm": 1.9551857709884644, "learning_rate": 8.97107283598315e-05, "loss": 3.4927, "step": 12120 }, { "epoch": 0.8238211713548036, "grad_norm": 5.074975967407227, "learning_rate": 8.970648185894823e-05, "loss": 3.4348, "step": 12125 }, { "epoch": 0.8241608914254654, "grad_norm": 2.103262186050415, "learning_rate": 8.970223535806496e-05, "loss": 3.657, "step": 12130 }, { "epoch": 0.8245006114961272, "grad_norm": 2.4705734252929688, "learning_rate": 8.969798885718169e-05, "loss": 3.0682, "step": 12135 }, { "epoch": 0.8248403315667889, "grad_norm": 1.8436349630355835, "learning_rate": 8.969374235629842e-05, "loss": 3.399, "step": 12140 }, { "epoch": 0.8251800516374508, "grad_norm": 2.4647295475006104, "learning_rate": 8.968949585541513e-05, "loss": 3.4332, "step": 12145 }, { "epoch": 0.8255197717081125, "grad_norm": 2.043389081954956, "learning_rate": 8.968524935453187e-05, "loss": 3.3697, "step": 12150 }, { "epoch": 0.8258594917787743, "grad_norm": 1.9643347263336182, "learning_rate": 8.96810028536486e-05, "loss": 3.5348, "step": 12155 }, { "epoch": 0.826199211849436, "grad_norm": 2.261094808578491, "learning_rate": 8.967675635276532e-05, "loss": 3.3601, "step": 12160 }, { "epoch": 0.8265389319200979, "grad_norm": 2.189314126968384, "learning_rate": 8.967250985188206e-05, "loss": 3.5678, "step": 12165 }, { "epoch": 0.8268786519907596, "grad_norm": 2.5339884757995605, "learning_rate": 8.966826335099879e-05, "loss": 3.4697, "step": 12170 }, { "epoch": 0.8272183720614213, "grad_norm": 2.93972110748291, "learning_rate": 8.96640168501155e-05, "loss": 3.271, "step": 12175 }, { "epoch": 0.8275580921320832, "grad_norm": 1.789829969406128, "learning_rate": 8.965977034923224e-05, "loss": 3.5214, "step": 12180 }, { "epoch": 0.8278978122027449, "grad_norm": 1.7386118173599243, "learning_rate": 8.965552384834897e-05, "loss": 3.3794, "step": 12185 }, { "epoch": 0.8282375322734067, "grad_norm": 3.1779747009277344, "learning_rate": 8.965127734746569e-05, "loss": 3.5923, "step": 12190 }, { "epoch": 0.8285772523440685, "grad_norm": 1.8903814554214478, "learning_rate": 8.964703084658243e-05, "loss": 3.6321, "step": 12195 }, { "epoch": 0.8289169724147303, "grad_norm": 2.069563627243042, "learning_rate": 8.964278434569915e-05, "loss": 3.5606, "step": 12200 }, { "epoch": 0.829256692485392, "grad_norm": 2.354498863220215, "learning_rate": 8.963853784481587e-05, "loss": 3.6108, "step": 12205 }, { "epoch": 0.8295964125560538, "grad_norm": 2.1593213081359863, "learning_rate": 8.963429134393261e-05, "loss": 3.2927, "step": 12210 }, { "epoch": 0.8299361326267156, "grad_norm": 2.683704376220703, "learning_rate": 8.963004484304934e-05, "loss": 3.3888, "step": 12215 }, { "epoch": 0.8302758526973774, "grad_norm": 2.1463334560394287, "learning_rate": 8.962579834216605e-05, "loss": 3.2958, "step": 12220 }, { "epoch": 0.8306155727680391, "grad_norm": 2.190086841583252, "learning_rate": 8.96215518412828e-05, "loss": 3.5065, "step": 12225 }, { "epoch": 0.830955292838701, "grad_norm": 2.2001006603240967, "learning_rate": 8.961730534039951e-05, "loss": 3.2064, "step": 12230 }, { "epoch": 0.8312950129093627, "grad_norm": 2.6916494369506836, "learning_rate": 8.961305883951624e-05, "loss": 2.9586, "step": 12235 }, { "epoch": 0.8316347329800244, "grad_norm": 1.9479535818099976, "learning_rate": 8.960881233863298e-05, "loss": 3.2363, "step": 12240 }, { "epoch": 0.8319744530506862, "grad_norm": 1.9741711616516113, "learning_rate": 8.96045658377497e-05, "loss": 3.0458, "step": 12245 }, { "epoch": 0.832314173121348, "grad_norm": 2.184316396713257, "learning_rate": 8.960031933686642e-05, "loss": 3.3535, "step": 12250 }, { "epoch": 0.8326538931920098, "grad_norm": 2.348109006881714, "learning_rate": 8.959607283598316e-05, "loss": 3.1871, "step": 12255 }, { "epoch": 0.8329936132626715, "grad_norm": 2.490572690963745, "learning_rate": 8.959182633509988e-05, "loss": 3.3422, "step": 12260 }, { "epoch": 0.8333333333333334, "grad_norm": 2.7501299381256104, "learning_rate": 8.95875798342166e-05, "loss": 3.5116, "step": 12265 }, { "epoch": 0.8336730534039951, "grad_norm": 2.969365358352661, "learning_rate": 8.958333333333335e-05, "loss": 3.0079, "step": 12270 }, { "epoch": 0.8340127734746569, "grad_norm": 2.0569348335266113, "learning_rate": 8.957908683245006e-05, "loss": 3.5527, "step": 12275 }, { "epoch": 0.8343524935453187, "grad_norm": 2.477231025695801, "learning_rate": 8.957484033156679e-05, "loss": 3.4916, "step": 12280 }, { "epoch": 0.8346922136159804, "grad_norm": 2.0322723388671875, "learning_rate": 8.957059383068353e-05, "loss": 3.2195, "step": 12285 }, { "epoch": 0.8350319336866422, "grad_norm": 2.4855687618255615, "learning_rate": 8.956634732980025e-05, "loss": 3.3189, "step": 12290 }, { "epoch": 0.8353716537573039, "grad_norm": 1.9906818866729736, "learning_rate": 8.956210082891697e-05, "loss": 3.3418, "step": 12295 }, { "epoch": 0.8357113738279658, "grad_norm": 2.5079314708709717, "learning_rate": 8.95578543280337e-05, "loss": 3.4239, "step": 12300 }, { "epoch": 0.8360510938986275, "grad_norm": 3.2814083099365234, "learning_rate": 8.955360782715043e-05, "loss": 3.1097, "step": 12305 }, { "epoch": 0.8363908139692893, "grad_norm": 2.568511962890625, "learning_rate": 8.954936132626716e-05, "loss": 3.5821, "step": 12310 }, { "epoch": 0.8367305340399511, "grad_norm": 2.464090585708618, "learning_rate": 8.954511482538389e-05, "loss": 3.2909, "step": 12315 }, { "epoch": 0.8370702541106129, "grad_norm": 2.4630167484283447, "learning_rate": 8.954086832450061e-05, "loss": 3.5004, "step": 12320 }, { "epoch": 0.8374099741812746, "grad_norm": 1.9303792715072632, "learning_rate": 8.953662182361734e-05, "loss": 3.5628, "step": 12325 }, { "epoch": 0.8377496942519363, "grad_norm": 2.097794771194458, "learning_rate": 8.953237532273407e-05, "loss": 3.0507, "step": 12330 }, { "epoch": 0.8380894143225982, "grad_norm": 2.0385208129882812, "learning_rate": 8.95281288218508e-05, "loss": 3.612, "step": 12335 }, { "epoch": 0.83842913439326, "grad_norm": 2.2375195026397705, "learning_rate": 8.952388232096753e-05, "loss": 3.452, "step": 12340 }, { "epoch": 0.8387688544639217, "grad_norm": 2.2851905822753906, "learning_rate": 8.951963582008425e-05, "loss": 3.424, "step": 12345 }, { "epoch": 0.8391085745345835, "grad_norm": 2.183159351348877, "learning_rate": 8.951538931920098e-05, "loss": 3.3318, "step": 12350 }, { "epoch": 0.8394482946052453, "grad_norm": 2.781352996826172, "learning_rate": 8.951114281831771e-05, "loss": 3.3904, "step": 12355 }, { "epoch": 0.839788014675907, "grad_norm": 2.242860794067383, "learning_rate": 8.950689631743444e-05, "loss": 3.4498, "step": 12360 }, { "epoch": 0.8401277347465689, "grad_norm": 1.608384370803833, "learning_rate": 8.950264981655117e-05, "loss": 3.6722, "step": 12365 }, { "epoch": 0.8404674548172306, "grad_norm": 3.1077661514282227, "learning_rate": 8.94984033156679e-05, "loss": 3.4293, "step": 12370 }, { "epoch": 0.8408071748878924, "grad_norm": 2.1576080322265625, "learning_rate": 8.949415681478462e-05, "loss": 3.3204, "step": 12375 }, { "epoch": 0.8411468949585541, "grad_norm": 2.1023166179656982, "learning_rate": 8.948991031390135e-05, "loss": 3.3246, "step": 12380 }, { "epoch": 0.841486615029216, "grad_norm": 2.3234474658966064, "learning_rate": 8.948566381301808e-05, "loss": 3.5551, "step": 12385 }, { "epoch": 0.8418263350998777, "grad_norm": 1.919976830482483, "learning_rate": 8.94814173121348e-05, "loss": 3.1543, "step": 12390 }, { "epoch": 0.8421660551705394, "grad_norm": 2.105980157852173, "learning_rate": 8.947717081125153e-05, "loss": 3.2298, "step": 12395 }, { "epoch": 0.8425057752412013, "grad_norm": 2.2233755588531494, "learning_rate": 8.947292431036826e-05, "loss": 3.4588, "step": 12400 }, { "epoch": 0.842845495311863, "grad_norm": 2.014068841934204, "learning_rate": 8.946867780948499e-05, "loss": 3.3832, "step": 12405 }, { "epoch": 0.8431852153825248, "grad_norm": 2.395500659942627, "learning_rate": 8.946443130860172e-05, "loss": 3.6357, "step": 12410 }, { "epoch": 0.8435249354531865, "grad_norm": 2.483025550842285, "learning_rate": 8.946018480771845e-05, "loss": 3.4749, "step": 12415 }, { "epoch": 0.8438646555238484, "grad_norm": 1.8673661947250366, "learning_rate": 8.945593830683517e-05, "loss": 3.4196, "step": 12420 }, { "epoch": 0.8442043755945101, "grad_norm": 2.281325101852417, "learning_rate": 8.94516918059519e-05, "loss": 3.6027, "step": 12425 }, { "epoch": 0.8445440956651719, "grad_norm": 1.9356565475463867, "learning_rate": 8.944744530506862e-05, "loss": 3.3846, "step": 12430 }, { "epoch": 0.8448838157358337, "grad_norm": 2.9688615798950195, "learning_rate": 8.944319880418536e-05, "loss": 3.5697, "step": 12435 }, { "epoch": 0.8452235358064955, "grad_norm": 3.5359628200531006, "learning_rate": 8.943895230330209e-05, "loss": 3.3995, "step": 12440 }, { "epoch": 0.8455632558771572, "grad_norm": 1.894195556640625, "learning_rate": 8.943470580241881e-05, "loss": 3.3548, "step": 12445 }, { "epoch": 0.845902975947819, "grad_norm": 1.9114396572113037, "learning_rate": 8.943045930153554e-05, "loss": 3.3295, "step": 12450 }, { "epoch": 0.8462426960184808, "grad_norm": 2.432551145553589, "learning_rate": 8.942621280065227e-05, "loss": 3.6598, "step": 12455 }, { "epoch": 0.8465824160891425, "grad_norm": 2.635043144226074, "learning_rate": 8.9421966299769e-05, "loss": 3.3133, "step": 12460 }, { "epoch": 0.8469221361598043, "grad_norm": 1.818357229232788, "learning_rate": 8.941771979888573e-05, "loss": 3.5124, "step": 12465 }, { "epoch": 0.8472618562304661, "grad_norm": 2.465458869934082, "learning_rate": 8.941347329800245e-05, "loss": 3.3309, "step": 12470 }, { "epoch": 0.8476015763011279, "grad_norm": 1.9214965105056763, "learning_rate": 8.940922679711918e-05, "loss": 3.0097, "step": 12475 }, { "epoch": 0.8479412963717896, "grad_norm": 2.410896062850952, "learning_rate": 8.940498029623591e-05, "loss": 3.2621, "step": 12480 }, { "epoch": 0.8482810164424515, "grad_norm": 1.94785737991333, "learning_rate": 8.940073379535264e-05, "loss": 3.3998, "step": 12485 }, { "epoch": 0.8486207365131132, "grad_norm": 1.8780781030654907, "learning_rate": 8.939648729446937e-05, "loss": 3.4095, "step": 12490 }, { "epoch": 0.848960456583775, "grad_norm": 1.8824725151062012, "learning_rate": 8.93922407935861e-05, "loss": 3.5479, "step": 12495 }, { "epoch": 0.8493001766544367, "grad_norm": 2.340257406234741, "learning_rate": 8.938799429270281e-05, "loss": 3.4268, "step": 12500 }, { "epoch": 0.8496398967250985, "grad_norm": 2.2857558727264404, "learning_rate": 8.938374779181955e-05, "loss": 3.3992, "step": 12505 }, { "epoch": 0.8499796167957603, "grad_norm": 2.3237154483795166, "learning_rate": 8.937950129093628e-05, "loss": 3.2269, "step": 12510 }, { "epoch": 0.850319336866422, "grad_norm": 2.065633535385132, "learning_rate": 8.937525479005299e-05, "loss": 3.2727, "step": 12515 }, { "epoch": 0.8506590569370839, "grad_norm": 2.3923022747039795, "learning_rate": 8.937100828916973e-05, "loss": 3.3786, "step": 12520 }, { "epoch": 0.8509987770077456, "grad_norm": 2.556154251098633, "learning_rate": 8.936676178828646e-05, "loss": 3.4553, "step": 12525 }, { "epoch": 0.8513384970784074, "grad_norm": 2.490079164505005, "learning_rate": 8.936251528740318e-05, "loss": 3.4441, "step": 12530 }, { "epoch": 0.8516782171490692, "grad_norm": 2.2074363231658936, "learning_rate": 8.935826878651992e-05, "loss": 3.7676, "step": 12535 }, { "epoch": 0.852017937219731, "grad_norm": 2.49845552444458, "learning_rate": 8.935402228563665e-05, "loss": 3.4963, "step": 12540 }, { "epoch": 0.8523576572903927, "grad_norm": 2.1002702713012695, "learning_rate": 8.934977578475336e-05, "loss": 3.4969, "step": 12545 }, { "epoch": 0.8526973773610544, "grad_norm": 3.216614007949829, "learning_rate": 8.93455292838701e-05, "loss": 3.5337, "step": 12550 }, { "epoch": 0.8530370974317163, "grad_norm": 2.5423402786254883, "learning_rate": 8.934128278298683e-05, "loss": 3.3793, "step": 12555 }, { "epoch": 0.853376817502378, "grad_norm": 2.6236515045166016, "learning_rate": 8.933703628210354e-05, "loss": 3.2957, "step": 12560 }, { "epoch": 0.8537165375730398, "grad_norm": 2.173349142074585, "learning_rate": 8.933278978122029e-05, "loss": 3.5842, "step": 12565 }, { "epoch": 0.8540562576437016, "grad_norm": 2.4947588443756104, "learning_rate": 8.9328543280337e-05, "loss": 3.2113, "step": 12570 }, { "epoch": 0.8543959777143634, "grad_norm": 2.3870837688446045, "learning_rate": 8.932429677945373e-05, "loss": 3.4508, "step": 12575 }, { "epoch": 0.8547356977850251, "grad_norm": 2.351508378982544, "learning_rate": 8.932005027857047e-05, "loss": 3.3167, "step": 12580 }, { "epoch": 0.8550754178556869, "grad_norm": 2.275207757949829, "learning_rate": 8.931580377768718e-05, "loss": 3.0924, "step": 12585 }, { "epoch": 0.8554151379263487, "grad_norm": 1.9779943227767944, "learning_rate": 8.931155727680391e-05, "loss": 3.4408, "step": 12590 }, { "epoch": 0.8557548579970105, "grad_norm": 2.062061071395874, "learning_rate": 8.930731077592065e-05, "loss": 3.5864, "step": 12595 }, { "epoch": 0.8560945780676722, "grad_norm": 2.147888660430908, "learning_rate": 8.930306427503737e-05, "loss": 3.4524, "step": 12600 }, { "epoch": 0.856434298138334, "grad_norm": 3.2547719478607178, "learning_rate": 8.92988177741541e-05, "loss": 3.1025, "step": 12605 }, { "epoch": 0.8567740182089958, "grad_norm": 1.7158524990081787, "learning_rate": 8.929457127327084e-05, "loss": 3.4208, "step": 12610 }, { "epoch": 0.8571137382796575, "grad_norm": 2.0777182579040527, "learning_rate": 8.929032477238755e-05, "loss": 3.283, "step": 12615 }, { "epoch": 0.8574534583503194, "grad_norm": 2.6932685375213623, "learning_rate": 8.928607827150428e-05, "loss": 3.7409, "step": 12620 }, { "epoch": 0.8577931784209811, "grad_norm": 2.5824766159057617, "learning_rate": 8.928183177062102e-05, "loss": 3.4157, "step": 12625 }, { "epoch": 0.8581328984916429, "grad_norm": 2.1701784133911133, "learning_rate": 8.927758526973774e-05, "loss": 3.5725, "step": 12630 }, { "epoch": 0.8584726185623046, "grad_norm": 2.7500288486480713, "learning_rate": 8.927333876885446e-05, "loss": 3.271, "step": 12635 }, { "epoch": 0.8588123386329665, "grad_norm": 2.2366464138031006, "learning_rate": 8.92690922679712e-05, "loss": 3.4973, "step": 12640 }, { "epoch": 0.8591520587036282, "grad_norm": 1.7908543348312378, "learning_rate": 8.926484576708792e-05, "loss": 3.5301, "step": 12645 }, { "epoch": 0.85949177877429, "grad_norm": 2.123262405395508, "learning_rate": 8.926059926620465e-05, "loss": 3.3515, "step": 12650 }, { "epoch": 0.8598314988449518, "grad_norm": 1.9612798690795898, "learning_rate": 8.925635276532138e-05, "loss": 3.3157, "step": 12655 }, { "epoch": 0.8601712189156135, "grad_norm": 2.2451987266540527, "learning_rate": 8.92521062644381e-05, "loss": 3.3019, "step": 12660 }, { "epoch": 0.8605109389862753, "grad_norm": 2.1565518379211426, "learning_rate": 8.924785976355483e-05, "loss": 3.3352, "step": 12665 }, { "epoch": 0.860850659056937, "grad_norm": 2.283409595489502, "learning_rate": 8.924361326267156e-05, "loss": 3.3587, "step": 12670 }, { "epoch": 0.8611903791275989, "grad_norm": 1.6637094020843506, "learning_rate": 8.923936676178829e-05, "loss": 3.4987, "step": 12675 }, { "epoch": 0.8615300991982606, "grad_norm": 2.5579943656921387, "learning_rate": 8.923512026090502e-05, "loss": 3.3384, "step": 12680 }, { "epoch": 0.8618698192689224, "grad_norm": 2.9387600421905518, "learning_rate": 8.923087376002174e-05, "loss": 3.4482, "step": 12685 }, { "epoch": 0.8622095393395842, "grad_norm": 2.019212007522583, "learning_rate": 8.922662725913847e-05, "loss": 3.1966, "step": 12690 }, { "epoch": 0.862549259410246, "grad_norm": 2.327361583709717, "learning_rate": 8.92223807582552e-05, "loss": 3.3307, "step": 12695 }, { "epoch": 0.8628889794809077, "grad_norm": 2.6048264503479004, "learning_rate": 8.921813425737193e-05, "loss": 3.4112, "step": 12700 }, { "epoch": 0.8632286995515696, "grad_norm": 1.9804767370224, "learning_rate": 8.921388775648866e-05, "loss": 3.137, "step": 12705 }, { "epoch": 0.8635684196222313, "grad_norm": 2.4206314086914062, "learning_rate": 8.920964125560538e-05, "loss": 3.1874, "step": 12710 }, { "epoch": 0.863908139692893, "grad_norm": 2.8207027912139893, "learning_rate": 8.920539475472211e-05, "loss": 3.4444, "step": 12715 }, { "epoch": 0.8642478597635548, "grad_norm": 1.9976999759674072, "learning_rate": 8.920114825383884e-05, "loss": 3.4778, "step": 12720 }, { "epoch": 0.8645875798342166, "grad_norm": 2.2826948165893555, "learning_rate": 8.919690175295557e-05, "loss": 3.528, "step": 12725 }, { "epoch": 0.8649272999048784, "grad_norm": 1.950202226638794, "learning_rate": 8.91926552520723e-05, "loss": 3.3601, "step": 12730 }, { "epoch": 0.8652670199755401, "grad_norm": 2.4248416423797607, "learning_rate": 8.918840875118902e-05, "loss": 3.6821, "step": 12735 }, { "epoch": 0.865606740046202, "grad_norm": 2.4804511070251465, "learning_rate": 8.918416225030575e-05, "loss": 3.4957, "step": 12740 }, { "epoch": 0.8659464601168637, "grad_norm": 2.0834972858428955, "learning_rate": 8.917991574942248e-05, "loss": 3.4513, "step": 12745 }, { "epoch": 0.8662861801875255, "grad_norm": 1.7990913391113281, "learning_rate": 8.917566924853921e-05, "loss": 3.4229, "step": 12750 }, { "epoch": 0.8666259002581872, "grad_norm": 2.037576675415039, "learning_rate": 8.917142274765594e-05, "loss": 3.2739, "step": 12755 }, { "epoch": 0.866965620328849, "grad_norm": 2.4587152004241943, "learning_rate": 8.916717624677266e-05, "loss": 3.3435, "step": 12760 }, { "epoch": 0.8673053403995108, "grad_norm": 2.065459728240967, "learning_rate": 8.916292974588939e-05, "loss": 3.46, "step": 12765 }, { "epoch": 0.8676450604701725, "grad_norm": 2.088833808898926, "learning_rate": 8.915868324500611e-05, "loss": 3.4938, "step": 12770 }, { "epoch": 0.8679847805408344, "grad_norm": 2.0702526569366455, "learning_rate": 8.915443674412285e-05, "loss": 3.4722, "step": 12775 }, { "epoch": 0.8683245006114961, "grad_norm": 1.93660306930542, "learning_rate": 8.915019024323958e-05, "loss": 3.2508, "step": 12780 }, { "epoch": 0.8686642206821579, "grad_norm": 2.8605918884277344, "learning_rate": 8.91459437423563e-05, "loss": 3.5661, "step": 12785 }, { "epoch": 0.8690039407528197, "grad_norm": 2.237478494644165, "learning_rate": 8.914169724147303e-05, "loss": 3.3289, "step": 12790 }, { "epoch": 0.8693436608234815, "grad_norm": 1.5462170839309692, "learning_rate": 8.913745074058976e-05, "loss": 3.4486, "step": 12795 }, { "epoch": 0.8696833808941432, "grad_norm": 2.314127206802368, "learning_rate": 8.913320423970649e-05, "loss": 3.5927, "step": 12800 }, { "epoch": 0.870023100964805, "grad_norm": 2.1215624809265137, "learning_rate": 8.912895773882322e-05, "loss": 3.5857, "step": 12805 }, { "epoch": 0.8703628210354668, "grad_norm": 2.038442373275757, "learning_rate": 8.912471123793994e-05, "loss": 3.5813, "step": 12810 }, { "epoch": 0.8707025411061285, "grad_norm": 2.324225425720215, "learning_rate": 8.912046473705667e-05, "loss": 3.488, "step": 12815 }, { "epoch": 0.8710422611767903, "grad_norm": 2.0202338695526123, "learning_rate": 8.91162182361734e-05, "loss": 3.4948, "step": 12820 }, { "epoch": 0.8713819812474521, "grad_norm": 3.1543400287628174, "learning_rate": 8.911197173529013e-05, "loss": 3.5433, "step": 12825 }, { "epoch": 0.8717217013181139, "grad_norm": 1.6956403255462646, "learning_rate": 8.910772523440686e-05, "loss": 3.3524, "step": 12830 }, { "epoch": 0.8720614213887756, "grad_norm": 1.625807523727417, "learning_rate": 8.910347873352358e-05, "loss": 3.266, "step": 12835 }, { "epoch": 0.8724011414594374, "grad_norm": 2.3619906902313232, "learning_rate": 8.909923223264031e-05, "loss": 3.4372, "step": 12840 }, { "epoch": 0.8727408615300992, "grad_norm": 2.4061622619628906, "learning_rate": 8.909498573175704e-05, "loss": 3.3166, "step": 12845 }, { "epoch": 0.873080581600761, "grad_norm": 2.2131905555725098, "learning_rate": 8.909073923087377e-05, "loss": 3.6936, "step": 12850 }, { "epoch": 0.8734203016714227, "grad_norm": 2.0261764526367188, "learning_rate": 8.908649272999048e-05, "loss": 3.5052, "step": 12855 }, { "epoch": 0.8737600217420846, "grad_norm": 2.1919784545898438, "learning_rate": 8.908224622910722e-05, "loss": 3.2626, "step": 12860 }, { "epoch": 0.8740997418127463, "grad_norm": 1.908305287361145, "learning_rate": 8.907799972822395e-05, "loss": 3.3079, "step": 12865 }, { "epoch": 0.874439461883408, "grad_norm": 3.6500329971313477, "learning_rate": 8.907375322734067e-05, "loss": 3.0749, "step": 12870 }, { "epoch": 0.8747791819540699, "grad_norm": 2.219109296798706, "learning_rate": 8.906950672645741e-05, "loss": 3.5303, "step": 12875 }, { "epoch": 0.8751189020247316, "grad_norm": 1.99685800075531, "learning_rate": 8.906526022557414e-05, "loss": 3.1335, "step": 12880 }, { "epoch": 0.8754586220953934, "grad_norm": 2.4649786949157715, "learning_rate": 8.906101372469085e-05, "loss": 3.4878, "step": 12885 }, { "epoch": 0.8757983421660551, "grad_norm": 1.9280425310134888, "learning_rate": 8.905676722380759e-05, "loss": 3.3292, "step": 12890 }, { "epoch": 0.876138062236717, "grad_norm": 2.0135698318481445, "learning_rate": 8.905252072292432e-05, "loss": 3.4459, "step": 12895 }, { "epoch": 0.8764777823073787, "grad_norm": 2.0553817749023438, "learning_rate": 8.904827422204104e-05, "loss": 3.6205, "step": 12900 }, { "epoch": 0.8768175023780405, "grad_norm": 2.1602604389190674, "learning_rate": 8.904402772115778e-05, "loss": 3.4049, "step": 12905 }, { "epoch": 0.8771572224487023, "grad_norm": 2.554487705230713, "learning_rate": 8.90397812202745e-05, "loss": 3.4514, "step": 12910 }, { "epoch": 0.877496942519364, "grad_norm": 2.9594929218292236, "learning_rate": 8.903553471939122e-05, "loss": 3.0998, "step": 12915 }, { "epoch": 0.8778366625900258, "grad_norm": 2.31552791595459, "learning_rate": 8.903128821850796e-05, "loss": 3.3427, "step": 12920 }, { "epoch": 0.8781763826606876, "grad_norm": 2.297060012817383, "learning_rate": 8.902704171762468e-05, "loss": 3.196, "step": 12925 }, { "epoch": 0.8785161027313494, "grad_norm": 2.477471351623535, "learning_rate": 8.90227952167414e-05, "loss": 3.4359, "step": 12930 }, { "epoch": 0.8788558228020111, "grad_norm": 2.201030969619751, "learning_rate": 8.901854871585814e-05, "loss": 3.3868, "step": 12935 }, { "epoch": 0.8791955428726729, "grad_norm": 2.3145620822906494, "learning_rate": 8.901430221497486e-05, "loss": 3.3346, "step": 12940 }, { "epoch": 0.8795352629433347, "grad_norm": 1.816537618637085, "learning_rate": 8.901005571409159e-05, "loss": 3.4542, "step": 12945 }, { "epoch": 0.8798749830139965, "grad_norm": 1.8864638805389404, "learning_rate": 8.900580921320833e-05, "loss": 2.9929, "step": 12950 }, { "epoch": 0.8802147030846582, "grad_norm": 2.606241226196289, "learning_rate": 8.900156271232504e-05, "loss": 3.5346, "step": 12955 }, { "epoch": 0.8805544231553201, "grad_norm": 1.8780237436294556, "learning_rate": 8.899731621144177e-05, "loss": 3.4796, "step": 12960 }, { "epoch": 0.8808941432259818, "grad_norm": 2.0902504920959473, "learning_rate": 8.899306971055851e-05, "loss": 3.5528, "step": 12965 }, { "epoch": 0.8812338632966435, "grad_norm": 2.3323183059692383, "learning_rate": 8.898882320967523e-05, "loss": 3.4537, "step": 12970 }, { "epoch": 0.8815735833673053, "grad_norm": 2.0410828590393066, "learning_rate": 8.898457670879196e-05, "loss": 3.4332, "step": 12975 }, { "epoch": 0.8819133034379671, "grad_norm": 2.0101664066314697, "learning_rate": 8.89803302079087e-05, "loss": 3.5238, "step": 12980 }, { "epoch": 0.8822530235086289, "grad_norm": 2.3145689964294434, "learning_rate": 8.897608370702541e-05, "loss": 3.3466, "step": 12985 }, { "epoch": 0.8825927435792906, "grad_norm": 2.9193103313446045, "learning_rate": 8.897183720614214e-05, "loss": 3.5716, "step": 12990 }, { "epoch": 0.8829324636499525, "grad_norm": 2.369565010070801, "learning_rate": 8.896759070525888e-05, "loss": 3.468, "step": 12995 }, { "epoch": 0.8832721837206142, "grad_norm": 2.023770809173584, "learning_rate": 8.89633442043756e-05, "loss": 3.4391, "step": 13000 }, { "epoch": 0.883611903791276, "grad_norm": 2.9925358295440674, "learning_rate": 8.895909770349232e-05, "loss": 3.4964, "step": 13005 }, { "epoch": 0.8839516238619378, "grad_norm": 5.115194320678711, "learning_rate": 8.895485120260905e-05, "loss": 3.2782, "step": 13010 }, { "epoch": 0.8842913439325996, "grad_norm": 1.8293346166610718, "learning_rate": 8.895060470172578e-05, "loss": 3.2187, "step": 13015 }, { "epoch": 0.8846310640032613, "grad_norm": 1.6921168565750122, "learning_rate": 8.894635820084251e-05, "loss": 3.2743, "step": 13020 }, { "epoch": 0.884970784073923, "grad_norm": 2.1380624771118164, "learning_rate": 8.894211169995924e-05, "loss": 3.2137, "step": 13025 }, { "epoch": 0.8853105041445849, "grad_norm": 2.3241119384765625, "learning_rate": 8.893786519907596e-05, "loss": 3.405, "step": 13030 }, { "epoch": 0.8856502242152466, "grad_norm": 2.54184889793396, "learning_rate": 8.893361869819269e-05, "loss": 3.4104, "step": 13035 }, { "epoch": 0.8859899442859084, "grad_norm": 2.4962642192840576, "learning_rate": 8.892937219730942e-05, "loss": 3.1967, "step": 13040 }, { "epoch": 0.8863296643565702, "grad_norm": 1.788737177848816, "learning_rate": 8.892512569642615e-05, "loss": 3.4218, "step": 13045 }, { "epoch": 0.886669384427232, "grad_norm": 2.252824068069458, "learning_rate": 8.892087919554288e-05, "loss": 3.5111, "step": 13050 }, { "epoch": 0.8870091044978937, "grad_norm": 1.9539830684661865, "learning_rate": 8.89166326946596e-05, "loss": 3.3384, "step": 13055 }, { "epoch": 0.8873488245685555, "grad_norm": 2.8625988960266113, "learning_rate": 8.891238619377633e-05, "loss": 3.1979, "step": 13060 }, { "epoch": 0.8876885446392173, "grad_norm": 1.923761248588562, "learning_rate": 8.890813969289306e-05, "loss": 3.3952, "step": 13065 }, { "epoch": 0.8880282647098791, "grad_norm": 3.0056352615356445, "learning_rate": 8.890389319200979e-05, "loss": 3.3172, "step": 13070 }, { "epoch": 0.8883679847805408, "grad_norm": 2.1578927040100098, "learning_rate": 8.889964669112652e-05, "loss": 3.4347, "step": 13075 }, { "epoch": 0.8887077048512027, "grad_norm": 1.9011409282684326, "learning_rate": 8.889540019024324e-05, "loss": 3.4688, "step": 13080 }, { "epoch": 0.8890474249218644, "grad_norm": 2.308225393295288, "learning_rate": 8.889115368935997e-05, "loss": 3.3298, "step": 13085 }, { "epoch": 0.8893871449925261, "grad_norm": 2.313910484313965, "learning_rate": 8.88869071884767e-05, "loss": 3.4784, "step": 13090 }, { "epoch": 0.889726865063188, "grad_norm": 2.3167572021484375, "learning_rate": 8.888266068759343e-05, "loss": 3.4088, "step": 13095 }, { "epoch": 0.8900665851338497, "grad_norm": 1.786078691482544, "learning_rate": 8.887841418671016e-05, "loss": 3.3644, "step": 13100 }, { "epoch": 0.8904063052045115, "grad_norm": 2.768312692642212, "learning_rate": 8.887416768582688e-05, "loss": 3.4611, "step": 13105 }, { "epoch": 0.8907460252751732, "grad_norm": 2.6162333488464355, "learning_rate": 8.886992118494361e-05, "loss": 3.4614, "step": 13110 }, { "epoch": 0.8910857453458351, "grad_norm": 2.2137365341186523, "learning_rate": 8.886567468406034e-05, "loss": 3.3557, "step": 13115 }, { "epoch": 0.8914254654164968, "grad_norm": 2.2785744667053223, "learning_rate": 8.886142818317707e-05, "loss": 3.5574, "step": 13120 }, { "epoch": 0.8917651854871586, "grad_norm": 2.1758687496185303, "learning_rate": 8.88571816822938e-05, "loss": 3.1697, "step": 13125 }, { "epoch": 0.8921049055578204, "grad_norm": 2.1688098907470703, "learning_rate": 8.885293518141052e-05, "loss": 3.2151, "step": 13130 }, { "epoch": 0.8924446256284821, "grad_norm": 2.6832351684570312, "learning_rate": 8.884868868052725e-05, "loss": 3.3154, "step": 13135 }, { "epoch": 0.8927843456991439, "grad_norm": 1.8473397493362427, "learning_rate": 8.884444217964398e-05, "loss": 3.4229, "step": 13140 }, { "epoch": 0.8931240657698056, "grad_norm": 1.936287522315979, "learning_rate": 8.884019567876071e-05, "loss": 3.5728, "step": 13145 }, { "epoch": 0.8934637858404675, "grad_norm": 2.4662015438079834, "learning_rate": 8.883594917787744e-05, "loss": 3.4527, "step": 13150 }, { "epoch": 0.8938035059111292, "grad_norm": 3.8504714965820312, "learning_rate": 8.883170267699416e-05, "loss": 3.5006, "step": 13155 }, { "epoch": 0.894143225981791, "grad_norm": 1.987755537033081, "learning_rate": 8.882745617611089e-05, "loss": 3.166, "step": 13160 }, { "epoch": 0.8944829460524528, "grad_norm": 1.9100432395935059, "learning_rate": 8.882320967522762e-05, "loss": 3.1983, "step": 13165 }, { "epoch": 0.8948226661231146, "grad_norm": 3.521446704864502, "learning_rate": 8.881896317434435e-05, "loss": 3.2565, "step": 13170 }, { "epoch": 0.8951623861937763, "grad_norm": 1.5010722875595093, "learning_rate": 8.881471667346108e-05, "loss": 3.4737, "step": 13175 }, { "epoch": 0.8955021062644382, "grad_norm": 2.792729139328003, "learning_rate": 8.88104701725778e-05, "loss": 3.3971, "step": 13180 }, { "epoch": 0.8958418263350999, "grad_norm": 2.1972968578338623, "learning_rate": 8.880622367169453e-05, "loss": 3.6645, "step": 13185 }, { "epoch": 0.8961815464057616, "grad_norm": 1.9702963829040527, "learning_rate": 8.880197717081126e-05, "loss": 3.2824, "step": 13190 }, { "epoch": 0.8965212664764234, "grad_norm": 3.380788564682007, "learning_rate": 8.879773066992799e-05, "loss": 3.5972, "step": 13195 }, { "epoch": 0.8968609865470852, "grad_norm": 1.9863908290863037, "learning_rate": 8.879348416904472e-05, "loss": 3.2562, "step": 13200 }, { "epoch": 0.897200706617747, "grad_norm": 2.170250177383423, "learning_rate": 8.878923766816144e-05, "loss": 3.2255, "step": 13205 }, { "epoch": 0.8975404266884087, "grad_norm": 1.925988793373108, "learning_rate": 8.878499116727816e-05, "loss": 3.2652, "step": 13210 }, { "epoch": 0.8978801467590706, "grad_norm": 2.326113224029541, "learning_rate": 8.87807446663949e-05, "loss": 3.2761, "step": 13215 }, { "epoch": 0.8982198668297323, "grad_norm": 2.4070301055908203, "learning_rate": 8.877649816551163e-05, "loss": 3.3647, "step": 13220 }, { "epoch": 0.8985595869003941, "grad_norm": 2.393268346786499, "learning_rate": 8.877225166462834e-05, "loss": 3.575, "step": 13225 }, { "epoch": 0.8988993069710558, "grad_norm": 2.325731039047241, "learning_rate": 8.876800516374508e-05, "loss": 3.6121, "step": 13230 }, { "epoch": 0.8992390270417177, "grad_norm": 2.6531436443328857, "learning_rate": 8.876375866286181e-05, "loss": 3.2707, "step": 13235 }, { "epoch": 0.8995787471123794, "grad_norm": 1.7149523496627808, "learning_rate": 8.875951216197853e-05, "loss": 3.5532, "step": 13240 }, { "epoch": 0.8999184671830411, "grad_norm": 2.0498011112213135, "learning_rate": 8.875526566109527e-05, "loss": 3.1953, "step": 13245 }, { "epoch": 0.900258187253703, "grad_norm": 2.6770033836364746, "learning_rate": 8.8751019160212e-05, "loss": 3.4046, "step": 13250 }, { "epoch": 0.9005979073243647, "grad_norm": 2.205594062805176, "learning_rate": 8.874677265932871e-05, "loss": 3.3253, "step": 13255 }, { "epoch": 0.9009376273950265, "grad_norm": 2.046020984649658, "learning_rate": 8.874252615844545e-05, "loss": 3.3483, "step": 13260 }, { "epoch": 0.9012773474656883, "grad_norm": 2.020634651184082, "learning_rate": 8.873827965756218e-05, "loss": 3.334, "step": 13265 }, { "epoch": 0.9016170675363501, "grad_norm": 3.039229393005371, "learning_rate": 8.87340331566789e-05, "loss": 3.3703, "step": 13270 }, { "epoch": 0.9019567876070118, "grad_norm": 4.250556945800781, "learning_rate": 8.872978665579564e-05, "loss": 3.5719, "step": 13275 }, { "epoch": 0.9022965076776736, "grad_norm": 2.8187313079833984, "learning_rate": 8.872554015491235e-05, "loss": 3.6244, "step": 13280 }, { "epoch": 0.9026362277483354, "grad_norm": 1.824264407157898, "learning_rate": 8.872129365402908e-05, "loss": 3.456, "step": 13285 }, { "epoch": 0.9029759478189971, "grad_norm": 2.8311729431152344, "learning_rate": 8.871704715314582e-05, "loss": 3.4809, "step": 13290 }, { "epoch": 0.9033156678896589, "grad_norm": 2.219768762588501, "learning_rate": 8.871280065226253e-05, "loss": 3.6001, "step": 13295 }, { "epoch": 0.9036553879603207, "grad_norm": 2.002916097640991, "learning_rate": 8.870855415137926e-05, "loss": 3.5155, "step": 13300 }, { "epoch": 0.9039951080309825, "grad_norm": 2.181034803390503, "learning_rate": 8.8704307650496e-05, "loss": 3.2217, "step": 13305 }, { "epoch": 0.9043348281016442, "grad_norm": 1.8515734672546387, "learning_rate": 8.870006114961272e-05, "loss": 3.5193, "step": 13310 }, { "epoch": 0.904674548172306, "grad_norm": 1.9891297817230225, "learning_rate": 8.869581464872945e-05, "loss": 3.6964, "step": 13315 }, { "epoch": 0.9050142682429678, "grad_norm": 1.9584465026855469, "learning_rate": 8.869156814784619e-05, "loss": 3.2573, "step": 13320 }, { "epoch": 0.9053539883136296, "grad_norm": 1.926998257637024, "learning_rate": 8.86873216469629e-05, "loss": 3.2995, "step": 13325 }, { "epoch": 0.9056937083842913, "grad_norm": 2.677941083908081, "learning_rate": 8.868307514607963e-05, "loss": 3.3437, "step": 13330 }, { "epoch": 0.9060334284549532, "grad_norm": 2.195709228515625, "learning_rate": 8.867882864519637e-05, "loss": 3.4825, "step": 13335 }, { "epoch": 0.9063731485256149, "grad_norm": 1.9107495546340942, "learning_rate": 8.867458214431309e-05, "loss": 3.125, "step": 13340 }, { "epoch": 0.9067128685962766, "grad_norm": 1.839128017425537, "learning_rate": 8.867033564342981e-05, "loss": 3.4562, "step": 13345 }, { "epoch": 0.9070525886669385, "grad_norm": 2.709388256072998, "learning_rate": 8.866608914254654e-05, "loss": 3.3654, "step": 13350 }, { "epoch": 0.9073923087376002, "grad_norm": 2.196286678314209, "learning_rate": 8.866184264166327e-05, "loss": 3.347, "step": 13355 }, { "epoch": 0.907732028808262, "grad_norm": 2.0276081562042236, "learning_rate": 8.865759614078e-05, "loss": 3.3006, "step": 13360 }, { "epoch": 0.9080717488789237, "grad_norm": 2.9177396297454834, "learning_rate": 8.865334963989673e-05, "loss": 3.2102, "step": 13365 }, { "epoch": 0.9084114689495856, "grad_norm": 2.3971760272979736, "learning_rate": 8.864910313901345e-05, "loss": 3.3668, "step": 13370 }, { "epoch": 0.9087511890202473, "grad_norm": 1.934773325920105, "learning_rate": 8.864485663813018e-05, "loss": 3.04, "step": 13375 }, { "epoch": 0.9090909090909091, "grad_norm": 1.9874131679534912, "learning_rate": 8.864061013724691e-05, "loss": 3.5997, "step": 13380 }, { "epoch": 0.9094306291615709, "grad_norm": 2.0478274822235107, "learning_rate": 8.863636363636364e-05, "loss": 3.5252, "step": 13385 }, { "epoch": 0.9097703492322327, "grad_norm": 2.533780813217163, "learning_rate": 8.863211713548037e-05, "loss": 3.4548, "step": 13390 }, { "epoch": 0.9101100693028944, "grad_norm": 2.431217670440674, "learning_rate": 8.86278706345971e-05, "loss": 3.1835, "step": 13395 }, { "epoch": 0.9104497893735561, "grad_norm": 2.06567645072937, "learning_rate": 8.862362413371382e-05, "loss": 3.1748, "step": 13400 }, { "epoch": 0.910789509444218, "grad_norm": 2.533534288406372, "learning_rate": 8.861937763283055e-05, "loss": 3.8377, "step": 13405 }, { "epoch": 0.9111292295148797, "grad_norm": 1.6790692806243896, "learning_rate": 8.861513113194728e-05, "loss": 3.1369, "step": 13410 }, { "epoch": 0.9114689495855415, "grad_norm": 1.5203920602798462, "learning_rate": 8.8610884631064e-05, "loss": 3.4221, "step": 13415 }, { "epoch": 0.9118086696562033, "grad_norm": 1.9265767335891724, "learning_rate": 8.860663813018073e-05, "loss": 3.6573, "step": 13420 }, { "epoch": 0.9121483897268651, "grad_norm": 3.2400338649749756, "learning_rate": 8.860239162929746e-05, "loss": 3.3486, "step": 13425 }, { "epoch": 0.9124881097975268, "grad_norm": 1.939512848854065, "learning_rate": 8.859814512841419e-05, "loss": 3.129, "step": 13430 }, { "epoch": 0.9128278298681887, "grad_norm": 2.470653772354126, "learning_rate": 8.859389862753092e-05, "loss": 3.3915, "step": 13435 }, { "epoch": 0.9131675499388504, "grad_norm": 2.515514612197876, "learning_rate": 8.858965212664765e-05, "loss": 3.2656, "step": 13440 }, { "epoch": 0.9135072700095122, "grad_norm": 2.438286781311035, "learning_rate": 8.858540562576437e-05, "loss": 3.3285, "step": 13445 }, { "epoch": 0.9138469900801739, "grad_norm": 2.142879009246826, "learning_rate": 8.85811591248811e-05, "loss": 3.2102, "step": 13450 }, { "epoch": 0.9141867101508357, "grad_norm": 1.9283112287521362, "learning_rate": 8.857691262399783e-05, "loss": 3.3321, "step": 13455 }, { "epoch": 0.9145264302214975, "grad_norm": 2.196021556854248, "learning_rate": 8.857266612311456e-05, "loss": 3.5754, "step": 13460 }, { "epoch": 0.9148661502921592, "grad_norm": 2.6829655170440674, "learning_rate": 8.856841962223129e-05, "loss": 3.4622, "step": 13465 }, { "epoch": 0.9152058703628211, "grad_norm": 2.14595627784729, "learning_rate": 8.856417312134801e-05, "loss": 3.5408, "step": 13470 }, { "epoch": 0.9155455904334828, "grad_norm": 1.946702003479004, "learning_rate": 8.855992662046474e-05, "loss": 3.3058, "step": 13475 }, { "epoch": 0.9158853105041446, "grad_norm": 2.204606771469116, "learning_rate": 8.855568011958147e-05, "loss": 3.2866, "step": 13480 }, { "epoch": 0.9162250305748063, "grad_norm": 1.8814940452575684, "learning_rate": 8.85514336186982e-05, "loss": 3.317, "step": 13485 }, { "epoch": 0.9165647506454682, "grad_norm": 2.5195305347442627, "learning_rate": 8.854718711781493e-05, "loss": 3.3938, "step": 13490 }, { "epoch": 0.9169044707161299, "grad_norm": 2.58943247795105, "learning_rate": 8.854294061693165e-05, "loss": 3.4735, "step": 13495 }, { "epoch": 0.9172441907867916, "grad_norm": 2.0806257724761963, "learning_rate": 8.853869411604838e-05, "loss": 3.3306, "step": 13500 }, { "epoch": 0.9175839108574535, "grad_norm": 2.1838865280151367, "learning_rate": 8.853444761516511e-05, "loss": 3.4814, "step": 13505 }, { "epoch": 0.9179236309281152, "grad_norm": 1.8753262758255005, "learning_rate": 8.853020111428184e-05, "loss": 3.7305, "step": 13510 }, { "epoch": 0.918263350998777, "grad_norm": 1.9816781282424927, "learning_rate": 8.852595461339857e-05, "loss": 2.9606, "step": 13515 }, { "epoch": 0.9186030710694388, "grad_norm": 3.587141275405884, "learning_rate": 8.85217081125153e-05, "loss": 3.4243, "step": 13520 }, { "epoch": 0.9189427911401006, "grad_norm": 2.19759464263916, "learning_rate": 8.851746161163202e-05, "loss": 3.4074, "step": 13525 }, { "epoch": 0.9192825112107623, "grad_norm": 2.2791662216186523, "learning_rate": 8.851321511074875e-05, "loss": 3.1331, "step": 13530 }, { "epoch": 0.9196222312814241, "grad_norm": 2.315096855163574, "learning_rate": 8.850896860986548e-05, "loss": 3.4695, "step": 13535 }, { "epoch": 0.9199619513520859, "grad_norm": 1.9696284532546997, "learning_rate": 8.85047221089822e-05, "loss": 3.5776, "step": 13540 }, { "epoch": 0.9203016714227477, "grad_norm": 2.397587299346924, "learning_rate": 8.850047560809893e-05, "loss": 3.3722, "step": 13545 }, { "epoch": 0.9206413914934094, "grad_norm": 2.582270860671997, "learning_rate": 8.849622910721565e-05, "loss": 3.5678, "step": 13550 }, { "epoch": 0.9209811115640713, "grad_norm": 2.483783006668091, "learning_rate": 8.849198260633239e-05, "loss": 3.4023, "step": 13555 }, { "epoch": 0.921320831634733, "grad_norm": 2.3398361206054688, "learning_rate": 8.848773610544912e-05, "loss": 3.3163, "step": 13560 }, { "epoch": 0.9216605517053947, "grad_norm": 2.571629285812378, "learning_rate": 8.848348960456583e-05, "loss": 3.3716, "step": 13565 }, { "epoch": 0.9220002717760565, "grad_norm": 3.2918901443481445, "learning_rate": 8.847924310368257e-05, "loss": 3.3708, "step": 13570 }, { "epoch": 0.9223399918467183, "grad_norm": 1.8452770709991455, "learning_rate": 8.84749966027993e-05, "loss": 3.398, "step": 13575 }, { "epoch": 0.9226797119173801, "grad_norm": 2.250333547592163, "learning_rate": 8.847075010191602e-05, "loss": 3.2594, "step": 13580 }, { "epoch": 0.9230194319880418, "grad_norm": 2.371022939682007, "learning_rate": 8.846650360103276e-05, "loss": 3.1019, "step": 13585 }, { "epoch": 0.9233591520587037, "grad_norm": 2.073197603225708, "learning_rate": 8.846225710014949e-05, "loss": 3.406, "step": 13590 }, { "epoch": 0.9236988721293654, "grad_norm": 2.479724168777466, "learning_rate": 8.84580105992662e-05, "loss": 3.4001, "step": 13595 }, { "epoch": 0.9240385922000272, "grad_norm": 1.9628310203552246, "learning_rate": 8.845376409838294e-05, "loss": 3.3249, "step": 13600 }, { "epoch": 0.924378312270689, "grad_norm": 2.0974276065826416, "learning_rate": 8.844951759749967e-05, "loss": 3.5215, "step": 13605 }, { "epoch": 0.9247180323413507, "grad_norm": 1.8485081195831299, "learning_rate": 8.844527109661639e-05, "loss": 3.5419, "step": 13610 }, { "epoch": 0.9250577524120125, "grad_norm": 2.2899010181427, "learning_rate": 8.844102459573313e-05, "loss": 3.3265, "step": 13615 }, { "epoch": 0.9253974724826742, "grad_norm": 1.9482983350753784, "learning_rate": 8.843677809484985e-05, "loss": 3.4175, "step": 13620 }, { "epoch": 0.9257371925533361, "grad_norm": 1.6159031391143799, "learning_rate": 8.843253159396657e-05, "loss": 3.4558, "step": 13625 }, { "epoch": 0.9260769126239978, "grad_norm": 1.5389013290405273, "learning_rate": 8.842828509308331e-05, "loss": 3.2855, "step": 13630 }, { "epoch": 0.9264166326946596, "grad_norm": 2.0275795459747314, "learning_rate": 8.842403859220003e-05, "loss": 3.494, "step": 13635 }, { "epoch": 0.9267563527653214, "grad_norm": 2.7954111099243164, "learning_rate": 8.841979209131675e-05, "loss": 3.373, "step": 13640 }, { "epoch": 0.9270960728359832, "grad_norm": 2.6040596961975098, "learning_rate": 8.84155455904335e-05, "loss": 3.147, "step": 13645 }, { "epoch": 0.9274357929066449, "grad_norm": 2.22871732711792, "learning_rate": 8.841129908955021e-05, "loss": 3.5235, "step": 13650 }, { "epoch": 0.9277755129773066, "grad_norm": 2.38703989982605, "learning_rate": 8.840705258866694e-05, "loss": 3.4076, "step": 13655 }, { "epoch": 0.9281152330479685, "grad_norm": 1.7858290672302246, "learning_rate": 8.840280608778368e-05, "loss": 3.4969, "step": 13660 }, { "epoch": 0.9284549531186302, "grad_norm": 2.1832544803619385, "learning_rate": 8.83985595869004e-05, "loss": 3.381, "step": 13665 }, { "epoch": 0.928794673189292, "grad_norm": 2.0679519176483154, "learning_rate": 8.839431308601712e-05, "loss": 3.5828, "step": 13670 }, { "epoch": 0.9291343932599538, "grad_norm": 2.5063905715942383, "learning_rate": 8.839006658513386e-05, "loss": 3.5292, "step": 13675 }, { "epoch": 0.9294741133306156, "grad_norm": 2.0488312244415283, "learning_rate": 8.838582008425058e-05, "loss": 3.3768, "step": 13680 }, { "epoch": 0.9298138334012773, "grad_norm": 2.228555679321289, "learning_rate": 8.83815735833673e-05, "loss": 3.1115, "step": 13685 }, { "epoch": 0.9301535534719392, "grad_norm": 2.152878522872925, "learning_rate": 8.837732708248405e-05, "loss": 3.2995, "step": 13690 }, { "epoch": 0.9304932735426009, "grad_norm": 2.3059911727905273, "learning_rate": 8.837308058160076e-05, "loss": 3.3245, "step": 13695 }, { "epoch": 0.9308329936132627, "grad_norm": 1.7861649990081787, "learning_rate": 8.836883408071749e-05, "loss": 3.1596, "step": 13700 }, { "epoch": 0.9311727136839244, "grad_norm": 2.206907272338867, "learning_rate": 8.836458757983422e-05, "loss": 3.3185, "step": 13705 }, { "epoch": 0.9315124337545863, "grad_norm": 1.6934267282485962, "learning_rate": 8.836034107895095e-05, "loss": 3.3716, "step": 13710 }, { "epoch": 0.931852153825248, "grad_norm": 2.176382303237915, "learning_rate": 8.835609457806767e-05, "loss": 3.6565, "step": 13715 }, { "epoch": 0.9321918738959097, "grad_norm": 2.210233449935913, "learning_rate": 8.83518480771844e-05, "loss": 3.3932, "step": 13720 }, { "epoch": 0.9325315939665716, "grad_norm": 2.223986864089966, "learning_rate": 8.834760157630113e-05, "loss": 3.149, "step": 13725 }, { "epoch": 0.9328713140372333, "grad_norm": 2.146643877029419, "learning_rate": 8.834335507541786e-05, "loss": 3.2761, "step": 13730 }, { "epoch": 0.9332110341078951, "grad_norm": 1.9866292476654053, "learning_rate": 8.833910857453459e-05, "loss": 3.5739, "step": 13735 }, { "epoch": 0.9335507541785568, "grad_norm": 2.1259379386901855, "learning_rate": 8.833486207365131e-05, "loss": 3.3762, "step": 13740 }, { "epoch": 0.9338904742492187, "grad_norm": 2.23091459274292, "learning_rate": 8.833061557276804e-05, "loss": 3.5156, "step": 13745 }, { "epoch": 0.9342301943198804, "grad_norm": 2.2373642921447754, "learning_rate": 8.832636907188477e-05, "loss": 2.9909, "step": 13750 }, { "epoch": 0.9345699143905422, "grad_norm": 1.9161615371704102, "learning_rate": 8.83221225710015e-05, "loss": 3.6332, "step": 13755 }, { "epoch": 0.934909634461204, "grad_norm": 2.2176098823547363, "learning_rate": 8.831787607011823e-05, "loss": 3.3376, "step": 13760 }, { "epoch": 0.9352493545318658, "grad_norm": 2.131208896636963, "learning_rate": 8.831362956923495e-05, "loss": 3.4767, "step": 13765 }, { "epoch": 0.9355890746025275, "grad_norm": 1.7842234373092651, "learning_rate": 8.830938306835168e-05, "loss": 3.4977, "step": 13770 }, { "epoch": 0.9359287946731893, "grad_norm": 2.3341705799102783, "learning_rate": 8.830513656746841e-05, "loss": 3.1641, "step": 13775 }, { "epoch": 0.9362685147438511, "grad_norm": 2.0824098587036133, "learning_rate": 8.830089006658514e-05, "loss": 3.2631, "step": 13780 }, { "epoch": 0.9366082348145128, "grad_norm": 2.0086612701416016, "learning_rate": 8.829664356570187e-05, "loss": 3.4141, "step": 13785 }, { "epoch": 0.9369479548851746, "grad_norm": 1.7281066179275513, "learning_rate": 8.82923970648186e-05, "loss": 3.2272, "step": 13790 }, { "epoch": 0.9372876749558364, "grad_norm": 5.72705602645874, "learning_rate": 8.828815056393532e-05, "loss": 3.4826, "step": 13795 }, { "epoch": 0.9376273950264982, "grad_norm": 2.011859893798828, "learning_rate": 8.828390406305205e-05, "loss": 3.1706, "step": 13800 }, { "epoch": 0.9379671150971599, "grad_norm": 1.9322566986083984, "learning_rate": 8.827965756216878e-05, "loss": 3.3577, "step": 13805 }, { "epoch": 0.9383068351678218, "grad_norm": 2.202861785888672, "learning_rate": 8.82754110612855e-05, "loss": 3.0179, "step": 13810 }, { "epoch": 0.9386465552384835, "grad_norm": 1.9804877042770386, "learning_rate": 8.827116456040223e-05, "loss": 3.3431, "step": 13815 }, { "epoch": 0.9389862753091452, "grad_norm": 1.7738773822784424, "learning_rate": 8.826691805951896e-05, "loss": 3.4447, "step": 13820 }, { "epoch": 0.939325995379807, "grad_norm": 2.635908842086792, "learning_rate": 8.826267155863569e-05, "loss": 2.9742, "step": 13825 }, { "epoch": 0.9396657154504688, "grad_norm": 2.6631758213043213, "learning_rate": 8.825842505775242e-05, "loss": 3.4358, "step": 13830 }, { "epoch": 0.9400054355211306, "grad_norm": 2.182623863220215, "learning_rate": 8.825417855686915e-05, "loss": 3.3175, "step": 13835 }, { "epoch": 0.9403451555917923, "grad_norm": 1.8466054201126099, "learning_rate": 8.824993205598587e-05, "loss": 3.3017, "step": 13840 }, { "epoch": 0.9406848756624542, "grad_norm": 2.6957767009735107, "learning_rate": 8.82456855551026e-05, "loss": 3.1934, "step": 13845 }, { "epoch": 0.9410245957331159, "grad_norm": 1.862452507019043, "learning_rate": 8.824143905421933e-05, "loss": 3.4138, "step": 13850 }, { "epoch": 0.9413643158037777, "grad_norm": 1.6880158185958862, "learning_rate": 8.823719255333606e-05, "loss": 3.4685, "step": 13855 }, { "epoch": 0.9417040358744395, "grad_norm": 2.598220109939575, "learning_rate": 8.823294605245279e-05, "loss": 3.4484, "step": 13860 }, { "epoch": 0.9420437559451013, "grad_norm": 2.7040443420410156, "learning_rate": 8.822869955156951e-05, "loss": 3.0738, "step": 13865 }, { "epoch": 0.942383476015763, "grad_norm": 2.614271879196167, "learning_rate": 8.822445305068624e-05, "loss": 3.3705, "step": 13870 }, { "epoch": 0.9427231960864247, "grad_norm": 2.1435706615448, "learning_rate": 8.822020654980297e-05, "loss": 3.3804, "step": 13875 }, { "epoch": 0.9430629161570866, "grad_norm": 2.510624408721924, "learning_rate": 8.82159600489197e-05, "loss": 3.4607, "step": 13880 }, { "epoch": 0.9434026362277483, "grad_norm": 2.1398496627807617, "learning_rate": 8.821171354803643e-05, "loss": 3.0927, "step": 13885 }, { "epoch": 0.9437423562984101, "grad_norm": 2.012885570526123, "learning_rate": 8.820746704715315e-05, "loss": 3.5992, "step": 13890 }, { "epoch": 0.9440820763690719, "grad_norm": 2.2159385681152344, "learning_rate": 8.820322054626988e-05, "loss": 3.5982, "step": 13895 }, { "epoch": 0.9444217964397337, "grad_norm": 2.6419429779052734, "learning_rate": 8.819897404538661e-05, "loss": 3.3276, "step": 13900 }, { "epoch": 0.9447615165103954, "grad_norm": 2.559035301208496, "learning_rate": 8.819472754450332e-05, "loss": 3.3703, "step": 13905 }, { "epoch": 0.9451012365810572, "grad_norm": 2.028944492340088, "learning_rate": 8.819048104362007e-05, "loss": 3.5419, "step": 13910 }, { "epoch": 0.945440956651719, "grad_norm": 2.531256914138794, "learning_rate": 8.81862345427368e-05, "loss": 3.2886, "step": 13915 }, { "epoch": 0.9457806767223808, "grad_norm": 1.943610668182373, "learning_rate": 8.818198804185351e-05, "loss": 3.0905, "step": 13920 }, { "epoch": 0.9461203967930425, "grad_norm": 1.7919096946716309, "learning_rate": 8.817774154097025e-05, "loss": 3.5025, "step": 13925 }, { "epoch": 0.9464601168637043, "grad_norm": 2.541760206222534, "learning_rate": 8.817349504008698e-05, "loss": 3.2375, "step": 13930 }, { "epoch": 0.9467998369343661, "grad_norm": 2.244941234588623, "learning_rate": 8.816924853920369e-05, "loss": 3.2675, "step": 13935 }, { "epoch": 0.9471395570050278, "grad_norm": 2.9352285861968994, "learning_rate": 8.816500203832043e-05, "loss": 3.5117, "step": 13940 }, { "epoch": 0.9474792770756897, "grad_norm": 2.416170597076416, "learning_rate": 8.816075553743716e-05, "loss": 3.4273, "step": 13945 }, { "epoch": 0.9478189971463514, "grad_norm": 2.170205593109131, "learning_rate": 8.815650903655388e-05, "loss": 3.4009, "step": 13950 }, { "epoch": 0.9481587172170132, "grad_norm": 1.6898329257965088, "learning_rate": 8.815226253567062e-05, "loss": 3.1527, "step": 13955 }, { "epoch": 0.9484984372876749, "grad_norm": 1.983485221862793, "learning_rate": 8.814801603478735e-05, "loss": 3.3173, "step": 13960 }, { "epoch": 0.9488381573583368, "grad_norm": 1.9404557943344116, "learning_rate": 8.814376953390406e-05, "loss": 3.1729, "step": 13965 }, { "epoch": 0.9491778774289985, "grad_norm": 2.5552010536193848, "learning_rate": 8.81395230330208e-05, "loss": 3.5427, "step": 13970 }, { "epoch": 0.9495175974996602, "grad_norm": 2.0226283073425293, "learning_rate": 8.813527653213752e-05, "loss": 3.4911, "step": 13975 }, { "epoch": 0.9498573175703221, "grad_norm": 2.0019125938415527, "learning_rate": 8.813103003125424e-05, "loss": 3.5023, "step": 13980 }, { "epoch": 0.9501970376409838, "grad_norm": 2.6398468017578125, "learning_rate": 8.812678353037099e-05, "loss": 3.2408, "step": 13985 }, { "epoch": 0.9505367577116456, "grad_norm": 1.8547154664993286, "learning_rate": 8.81225370294877e-05, "loss": 3.4152, "step": 13990 }, { "epoch": 0.9508764777823073, "grad_norm": 2.198720932006836, "learning_rate": 8.811829052860443e-05, "loss": 3.4573, "step": 13995 }, { "epoch": 0.9512161978529692, "grad_norm": 2.129786968231201, "learning_rate": 8.811404402772117e-05, "loss": 3.2805, "step": 14000 }, { "epoch": 0.9515559179236309, "grad_norm": 1.6172959804534912, "learning_rate": 8.810979752683788e-05, "loss": 3.0507, "step": 14005 }, { "epoch": 0.9518956379942927, "grad_norm": 1.756527304649353, "learning_rate": 8.810555102595461e-05, "loss": 3.2287, "step": 14010 }, { "epoch": 0.9522353580649545, "grad_norm": 2.1423068046569824, "learning_rate": 8.810130452507135e-05, "loss": 3.4567, "step": 14015 }, { "epoch": 0.9525750781356163, "grad_norm": 2.0487897396087646, "learning_rate": 8.809705802418807e-05, "loss": 3.4508, "step": 14020 }, { "epoch": 0.952914798206278, "grad_norm": 2.11043381690979, "learning_rate": 8.80928115233048e-05, "loss": 3.436, "step": 14025 }, { "epoch": 0.9532545182769399, "grad_norm": 1.6826726198196411, "learning_rate": 8.808856502242154e-05, "loss": 3.3798, "step": 14030 }, { "epoch": 0.9535942383476016, "grad_norm": 2.220339298248291, "learning_rate": 8.808431852153825e-05, "loss": 3.3067, "step": 14035 }, { "epoch": 0.9539339584182633, "grad_norm": 1.793248176574707, "learning_rate": 8.808007202065498e-05, "loss": 3.4956, "step": 14040 }, { "epoch": 0.9542736784889251, "grad_norm": 3.180000066757202, "learning_rate": 8.807582551977172e-05, "loss": 3.3711, "step": 14045 }, { "epoch": 0.9546133985595869, "grad_norm": 2.0635814666748047, "learning_rate": 8.807157901888844e-05, "loss": 3.5886, "step": 14050 }, { "epoch": 0.9549531186302487, "grad_norm": 1.8114535808563232, "learning_rate": 8.806733251800516e-05, "loss": 3.3088, "step": 14055 }, { "epoch": 0.9552928387009104, "grad_norm": 2.163492202758789, "learning_rate": 8.806308601712189e-05, "loss": 3.3083, "step": 14060 }, { "epoch": 0.9556325587715723, "grad_norm": 2.1202809810638428, "learning_rate": 8.805883951623862e-05, "loss": 3.3011, "step": 14065 }, { "epoch": 0.955972278842234, "grad_norm": 5.391999244689941, "learning_rate": 8.805459301535535e-05, "loss": 3.3349, "step": 14070 }, { "epoch": 0.9563119989128958, "grad_norm": 1.9948046207427979, "learning_rate": 8.805034651447208e-05, "loss": 3.6082, "step": 14075 }, { "epoch": 0.9566517189835575, "grad_norm": 2.9853687286376953, "learning_rate": 8.80461000135888e-05, "loss": 3.4616, "step": 14080 }, { "epoch": 0.9569914390542194, "grad_norm": 1.9888941049575806, "learning_rate": 8.804185351270553e-05, "loss": 3.227, "step": 14085 }, { "epoch": 0.9573311591248811, "grad_norm": 2.2460319995880127, "learning_rate": 8.803760701182226e-05, "loss": 3.2841, "step": 14090 }, { "epoch": 0.9576708791955428, "grad_norm": 1.9560335874557495, "learning_rate": 8.803336051093899e-05, "loss": 3.4355, "step": 14095 }, { "epoch": 0.9580105992662047, "grad_norm": 2.192253828048706, "learning_rate": 8.802911401005572e-05, "loss": 3.3058, "step": 14100 }, { "epoch": 0.9583503193368664, "grad_norm": 2.2094857692718506, "learning_rate": 8.802486750917244e-05, "loss": 3.1896, "step": 14105 }, { "epoch": 0.9586900394075282, "grad_norm": 1.9594848155975342, "learning_rate": 8.802062100828917e-05, "loss": 3.1649, "step": 14110 }, { "epoch": 0.95902975947819, "grad_norm": 2.1726417541503906, "learning_rate": 8.80163745074059e-05, "loss": 3.3859, "step": 14115 }, { "epoch": 0.9593694795488518, "grad_norm": 2.2436349391937256, "learning_rate": 8.801212800652263e-05, "loss": 3.742, "step": 14120 }, { "epoch": 0.9597091996195135, "grad_norm": 2.6532297134399414, "learning_rate": 8.800788150563936e-05, "loss": 3.5557, "step": 14125 }, { "epoch": 0.9600489196901753, "grad_norm": 2.1286096572875977, "learning_rate": 8.800363500475608e-05, "loss": 3.4458, "step": 14130 }, { "epoch": 0.9603886397608371, "grad_norm": 2.4496521949768066, "learning_rate": 8.799938850387281e-05, "loss": 3.3531, "step": 14135 }, { "epoch": 0.9607283598314988, "grad_norm": 2.0682320594787598, "learning_rate": 8.799514200298954e-05, "loss": 3.5732, "step": 14140 }, { "epoch": 0.9610680799021606, "grad_norm": 1.8080471754074097, "learning_rate": 8.799089550210627e-05, "loss": 3.3395, "step": 14145 }, { "epoch": 0.9614077999728224, "grad_norm": 1.9637908935546875, "learning_rate": 8.7986649001223e-05, "loss": 3.5554, "step": 14150 }, { "epoch": 0.9617475200434842, "grad_norm": 2.443098306655884, "learning_rate": 8.798240250033972e-05, "loss": 3.417, "step": 14155 }, { "epoch": 0.9620872401141459, "grad_norm": 2.088620185852051, "learning_rate": 8.797815599945645e-05, "loss": 3.4192, "step": 14160 }, { "epoch": 0.9624269601848077, "grad_norm": 2.1158978939056396, "learning_rate": 8.797390949857318e-05, "loss": 3.4457, "step": 14165 }, { "epoch": 0.9627666802554695, "grad_norm": 2.1730027198791504, "learning_rate": 8.796966299768991e-05, "loss": 3.3562, "step": 14170 }, { "epoch": 0.9631064003261313, "grad_norm": 2.249563694000244, "learning_rate": 8.796541649680664e-05, "loss": 3.3702, "step": 14175 }, { "epoch": 0.963446120396793, "grad_norm": 2.219883441925049, "learning_rate": 8.796116999592336e-05, "loss": 3.3654, "step": 14180 }, { "epoch": 0.9637858404674549, "grad_norm": 2.218933343887329, "learning_rate": 8.795692349504009e-05, "loss": 3.5882, "step": 14185 }, { "epoch": 0.9641255605381166, "grad_norm": 2.0734035968780518, "learning_rate": 8.795267699415682e-05, "loss": 3.3429, "step": 14190 }, { "epoch": 0.9644652806087783, "grad_norm": 2.0548102855682373, "learning_rate": 8.794843049327355e-05, "loss": 3.42, "step": 14195 }, { "epoch": 0.9648050006794402, "grad_norm": 1.7602564096450806, "learning_rate": 8.794418399239028e-05, "loss": 3.3179, "step": 14200 }, { "epoch": 0.9651447207501019, "grad_norm": 2.2944834232330322, "learning_rate": 8.7939937491507e-05, "loss": 3.2597, "step": 14205 }, { "epoch": 0.9654844408207637, "grad_norm": 1.991561770439148, "learning_rate": 8.793569099062373e-05, "loss": 3.366, "step": 14210 }, { "epoch": 0.9658241608914254, "grad_norm": 2.391232490539551, "learning_rate": 8.793144448974046e-05, "loss": 3.289, "step": 14215 }, { "epoch": 0.9661638809620873, "grad_norm": 2.7012839317321777, "learning_rate": 8.792719798885719e-05, "loss": 3.6302, "step": 14220 }, { "epoch": 0.966503601032749, "grad_norm": 2.4641456604003906, "learning_rate": 8.792295148797392e-05, "loss": 3.3902, "step": 14225 }, { "epoch": 0.9668433211034108, "grad_norm": 2.2539710998535156, "learning_rate": 8.791870498709064e-05, "loss": 3.4028, "step": 14230 }, { "epoch": 0.9671830411740726, "grad_norm": 1.8685566186904907, "learning_rate": 8.791445848620737e-05, "loss": 3.4354, "step": 14235 }, { "epoch": 0.9675227612447344, "grad_norm": 2.8558037281036377, "learning_rate": 8.79102119853241e-05, "loss": 3.2689, "step": 14240 }, { "epoch": 0.9678624813153961, "grad_norm": 2.2012362480163574, "learning_rate": 8.790596548444083e-05, "loss": 3.2239, "step": 14245 }, { "epoch": 0.9682022013860578, "grad_norm": 1.9266421794891357, "learning_rate": 8.790171898355756e-05, "loss": 3.4227, "step": 14250 }, { "epoch": 0.9685419214567197, "grad_norm": 1.6999174356460571, "learning_rate": 8.789747248267428e-05, "loss": 3.5226, "step": 14255 }, { "epoch": 0.9688816415273814, "grad_norm": 1.8867813348770142, "learning_rate": 8.7893225981791e-05, "loss": 3.4338, "step": 14260 }, { "epoch": 0.9692213615980432, "grad_norm": 1.8755052089691162, "learning_rate": 8.788897948090774e-05, "loss": 3.2911, "step": 14265 }, { "epoch": 0.969561081668705, "grad_norm": 2.179497718811035, "learning_rate": 8.788473298002447e-05, "loss": 3.3562, "step": 14270 }, { "epoch": 0.9699008017393668, "grad_norm": 2.0865354537963867, "learning_rate": 8.788048647914118e-05, "loss": 3.4683, "step": 14275 }, { "epoch": 0.9702405218100285, "grad_norm": 2.681731700897217, "learning_rate": 8.787623997825792e-05, "loss": 3.3607, "step": 14280 }, { "epoch": 0.9705802418806904, "grad_norm": 2.1763722896575928, "learning_rate": 8.787199347737465e-05, "loss": 3.4264, "step": 14285 }, { "epoch": 0.9709199619513521, "grad_norm": 1.6971964836120605, "learning_rate": 8.786774697649137e-05, "loss": 3.4209, "step": 14290 }, { "epoch": 0.9712596820220138, "grad_norm": 1.9688156843185425, "learning_rate": 8.786350047560811e-05, "loss": 3.3535, "step": 14295 }, { "epoch": 0.9715994020926756, "grad_norm": 2.2232604026794434, "learning_rate": 8.785925397472484e-05, "loss": 3.3133, "step": 14300 }, { "epoch": 0.9719391221633374, "grad_norm": 2.290151357650757, "learning_rate": 8.785500747384155e-05, "loss": 3.5035, "step": 14305 }, { "epoch": 0.9722788422339992, "grad_norm": 2.404773712158203, "learning_rate": 8.785076097295829e-05, "loss": 3.3521, "step": 14310 }, { "epoch": 0.9726185623046609, "grad_norm": 2.1335349082946777, "learning_rate": 8.784651447207502e-05, "loss": 3.1834, "step": 14315 }, { "epoch": 0.9729582823753228, "grad_norm": 1.9422911405563354, "learning_rate": 8.784226797119174e-05, "loss": 3.3794, "step": 14320 }, { "epoch": 0.9732980024459845, "grad_norm": 2.3396897315979004, "learning_rate": 8.783802147030848e-05, "loss": 2.9371, "step": 14325 }, { "epoch": 0.9736377225166463, "grad_norm": 2.0865442752838135, "learning_rate": 8.783377496942519e-05, "loss": 3.4637, "step": 14330 }, { "epoch": 0.973977442587308, "grad_norm": 1.9042447805404663, "learning_rate": 8.782952846854192e-05, "loss": 3.4505, "step": 14335 }, { "epoch": 0.9743171626579699, "grad_norm": 1.6497031450271606, "learning_rate": 8.782528196765866e-05, "loss": 3.3054, "step": 14340 }, { "epoch": 0.9746568827286316, "grad_norm": 1.9402196407318115, "learning_rate": 8.782103546677538e-05, "loss": 3.54, "step": 14345 }, { "epoch": 0.9749966027992933, "grad_norm": 2.109180450439453, "learning_rate": 8.78167889658921e-05, "loss": 3.4088, "step": 14350 }, { "epoch": 0.9753363228699552, "grad_norm": 1.9940447807312012, "learning_rate": 8.781254246500884e-05, "loss": 2.9455, "step": 14355 }, { "epoch": 0.9756760429406169, "grad_norm": 1.989651083946228, "learning_rate": 8.780829596412556e-05, "loss": 3.5452, "step": 14360 }, { "epoch": 0.9760157630112787, "grad_norm": 1.6461108922958374, "learning_rate": 8.780404946324229e-05, "loss": 3.3594, "step": 14365 }, { "epoch": 0.9763554830819405, "grad_norm": 2.007110357284546, "learning_rate": 8.779980296235903e-05, "loss": 3.4678, "step": 14370 }, { "epoch": 0.9766952031526023, "grad_norm": 2.036989450454712, "learning_rate": 8.779555646147574e-05, "loss": 3.1833, "step": 14375 }, { "epoch": 0.977034923223264, "grad_norm": 1.9842579364776611, "learning_rate": 8.779130996059247e-05, "loss": 3.4287, "step": 14380 }, { "epoch": 0.9773746432939258, "grad_norm": 2.1302239894866943, "learning_rate": 8.778706345970921e-05, "loss": 2.9818, "step": 14385 }, { "epoch": 0.9777143633645876, "grad_norm": 2.6202449798583984, "learning_rate": 8.778281695882593e-05, "loss": 3.2379, "step": 14390 }, { "epoch": 0.9780540834352494, "grad_norm": 2.211385488510132, "learning_rate": 8.777857045794266e-05, "loss": 3.5694, "step": 14395 }, { "epoch": 0.9783938035059111, "grad_norm": 2.216343879699707, "learning_rate": 8.777432395705938e-05, "loss": 3.3475, "step": 14400 }, { "epoch": 0.978733523576573, "grad_norm": 2.0956733226776123, "learning_rate": 8.777007745617611e-05, "loss": 3.1273, "step": 14405 }, { "epoch": 0.9790732436472347, "grad_norm": 1.9711989164352417, "learning_rate": 8.776583095529284e-05, "loss": 3.1431, "step": 14410 }, { "epoch": 0.9794129637178964, "grad_norm": 1.7499561309814453, "learning_rate": 8.776158445440957e-05, "loss": 3.3817, "step": 14415 }, { "epoch": 0.9797526837885582, "grad_norm": 2.2698473930358887, "learning_rate": 8.77573379535263e-05, "loss": 3.1078, "step": 14420 }, { "epoch": 0.98009240385922, "grad_norm": 1.723395586013794, "learning_rate": 8.775309145264302e-05, "loss": 3.1959, "step": 14425 }, { "epoch": 0.9804321239298818, "grad_norm": 2.517970561981201, "learning_rate": 8.774884495175975e-05, "loss": 3.4642, "step": 14430 }, { "epoch": 0.9807718440005435, "grad_norm": 4.220458507537842, "learning_rate": 8.774459845087648e-05, "loss": 3.0291, "step": 14435 }, { "epoch": 0.9811115640712054, "grad_norm": 2.1747329235076904, "learning_rate": 8.774035194999321e-05, "loss": 3.4183, "step": 14440 }, { "epoch": 0.9814512841418671, "grad_norm": 1.8945306539535522, "learning_rate": 8.773610544910994e-05, "loss": 3.5566, "step": 14445 }, { "epoch": 0.9817910042125289, "grad_norm": 2.3650691509246826, "learning_rate": 8.773185894822666e-05, "loss": 2.9793, "step": 14450 }, { "epoch": 0.9821307242831907, "grad_norm": 1.4567208290100098, "learning_rate": 8.772761244734339e-05, "loss": 3.5264, "step": 14455 }, { "epoch": 0.9824704443538524, "grad_norm": 2.0683977603912354, "learning_rate": 8.772336594646012e-05, "loss": 3.2673, "step": 14460 }, { "epoch": 0.9828101644245142, "grad_norm": 1.6733638048171997, "learning_rate": 8.771911944557685e-05, "loss": 3.405, "step": 14465 }, { "epoch": 0.9831498844951759, "grad_norm": 2.290371894836426, "learning_rate": 8.771487294469358e-05, "loss": 3.3912, "step": 14470 }, { "epoch": 0.9834896045658378, "grad_norm": 2.452742338180542, "learning_rate": 8.77106264438103e-05, "loss": 3.0632, "step": 14475 }, { "epoch": 0.9838293246364995, "grad_norm": 2.672314167022705, "learning_rate": 8.770637994292703e-05, "loss": 3.2689, "step": 14480 }, { "epoch": 0.9841690447071613, "grad_norm": 2.217886209487915, "learning_rate": 8.770213344204376e-05, "loss": 3.4091, "step": 14485 }, { "epoch": 0.9845087647778231, "grad_norm": 1.8328226804733276, "learning_rate": 8.769788694116049e-05, "loss": 3.3401, "step": 14490 }, { "epoch": 0.9848484848484849, "grad_norm": 3.0413577556610107, "learning_rate": 8.769364044027722e-05, "loss": 3.5316, "step": 14495 }, { "epoch": 0.9851882049191466, "grad_norm": 2.2129108905792236, "learning_rate": 8.768939393939394e-05, "loss": 3.3554, "step": 14500 }, { "epoch": 0.9855279249898083, "grad_norm": 2.103874683380127, "learning_rate": 8.768514743851067e-05, "loss": 3.2374, "step": 14505 }, { "epoch": 0.9858676450604702, "grad_norm": 2.4148151874542236, "learning_rate": 8.76809009376274e-05, "loss": 3.2403, "step": 14510 }, { "epoch": 0.9862073651311319, "grad_norm": 2.0161502361297607, "learning_rate": 8.767665443674413e-05, "loss": 3.4749, "step": 14515 }, { "epoch": 0.9865470852017937, "grad_norm": 1.8706799745559692, "learning_rate": 8.767240793586086e-05, "loss": 3.2181, "step": 14520 }, { "epoch": 0.9868868052724555, "grad_norm": 1.7251646518707275, "learning_rate": 8.766816143497758e-05, "loss": 3.1983, "step": 14525 }, { "epoch": 0.9872265253431173, "grad_norm": 2.270615339279175, "learning_rate": 8.766391493409431e-05, "loss": 3.3982, "step": 14530 }, { "epoch": 0.987566245413779, "grad_norm": 2.3085289001464844, "learning_rate": 8.765966843321104e-05, "loss": 3.3536, "step": 14535 }, { "epoch": 0.9879059654844409, "grad_norm": 3.0530669689178467, "learning_rate": 8.765542193232777e-05, "loss": 3.4448, "step": 14540 }, { "epoch": 0.9882456855551026, "grad_norm": 1.8535819053649902, "learning_rate": 8.76511754314445e-05, "loss": 3.2159, "step": 14545 }, { "epoch": 0.9885854056257644, "grad_norm": 2.3296449184417725, "learning_rate": 8.764692893056122e-05, "loss": 3.2978, "step": 14550 }, { "epoch": 0.9889251256964261, "grad_norm": 3.934795379638672, "learning_rate": 8.764268242967795e-05, "loss": 3.3364, "step": 14555 }, { "epoch": 0.989264845767088, "grad_norm": 2.124008893966675, "learning_rate": 8.763843592879468e-05, "loss": 3.3821, "step": 14560 }, { "epoch": 0.9896045658377497, "grad_norm": 1.8345386981964111, "learning_rate": 8.763418942791141e-05, "loss": 3.4093, "step": 14565 }, { "epoch": 0.9899442859084114, "grad_norm": 1.6654386520385742, "learning_rate": 8.762994292702814e-05, "loss": 3.1737, "step": 14570 }, { "epoch": 0.9902840059790733, "grad_norm": 2.241921901702881, "learning_rate": 8.762569642614486e-05, "loss": 3.4927, "step": 14575 }, { "epoch": 0.990623726049735, "grad_norm": 2.1256651878356934, "learning_rate": 8.762144992526159e-05, "loss": 3.3094, "step": 14580 }, { "epoch": 0.9909634461203968, "grad_norm": 2.0170223712921143, "learning_rate": 8.761720342437832e-05, "loss": 3.3643, "step": 14585 }, { "epoch": 0.9913031661910585, "grad_norm": 1.6097463369369507, "learning_rate": 8.761295692349505e-05, "loss": 3.4341, "step": 14590 }, { "epoch": 0.9916428862617204, "grad_norm": 1.8417088985443115, "learning_rate": 8.760871042261178e-05, "loss": 3.2865, "step": 14595 }, { "epoch": 0.9919826063323821, "grad_norm": 1.768991231918335, "learning_rate": 8.760446392172849e-05, "loss": 3.3081, "step": 14600 }, { "epoch": 0.9923223264030439, "grad_norm": 2.3983466625213623, "learning_rate": 8.760021742084523e-05, "loss": 3.5978, "step": 14605 }, { "epoch": 0.9926620464737057, "grad_norm": 2.188138246536255, "learning_rate": 8.759597091996196e-05, "loss": 3.2194, "step": 14610 }, { "epoch": 0.9930017665443674, "grad_norm": 1.9903393983840942, "learning_rate": 8.759172441907867e-05, "loss": 3.4792, "step": 14615 }, { "epoch": 0.9933414866150292, "grad_norm": 2.0076169967651367, "learning_rate": 8.758747791819542e-05, "loss": 3.1612, "step": 14620 }, { "epoch": 0.993681206685691, "grad_norm": 2.4887337684631348, "learning_rate": 8.758323141731214e-05, "loss": 3.3489, "step": 14625 }, { "epoch": 0.9940209267563528, "grad_norm": 2.093916177749634, "learning_rate": 8.757898491642886e-05, "loss": 3.5879, "step": 14630 }, { "epoch": 0.9943606468270145, "grad_norm": 2.3841006755828857, "learning_rate": 8.75747384155456e-05, "loss": 3.2517, "step": 14635 }, { "epoch": 0.9947003668976763, "grad_norm": 2.1051478385925293, "learning_rate": 8.757049191466233e-05, "loss": 3.4353, "step": 14640 }, { "epoch": 0.9950400869683381, "grad_norm": 2.1174380779266357, "learning_rate": 8.756624541377904e-05, "loss": 3.3293, "step": 14645 }, { "epoch": 0.9953798070389999, "grad_norm": 2.258221387863159, "learning_rate": 8.756199891289578e-05, "loss": 3.2861, "step": 14650 }, { "epoch": 0.9957195271096616, "grad_norm": 2.3565449714660645, "learning_rate": 8.755775241201251e-05, "loss": 3.3414, "step": 14655 }, { "epoch": 0.9960592471803235, "grad_norm": 1.9497843980789185, "learning_rate": 8.755350591112923e-05, "loss": 3.38, "step": 14660 }, { "epoch": 0.9963989672509852, "grad_norm": 2.1668646335601807, "learning_rate": 8.754925941024597e-05, "loss": 3.5752, "step": 14665 }, { "epoch": 0.9967386873216469, "grad_norm": 1.968654990196228, "learning_rate": 8.75450129093627e-05, "loss": 3.4732, "step": 14670 }, { "epoch": 0.9970784073923087, "grad_norm": 2.0822348594665527, "learning_rate": 8.754076640847941e-05, "loss": 3.4756, "step": 14675 }, { "epoch": 0.9974181274629705, "grad_norm": 2.294171094894409, "learning_rate": 8.753651990759615e-05, "loss": 3.3943, "step": 14680 }, { "epoch": 0.9977578475336323, "grad_norm": 2.1640141010284424, "learning_rate": 8.753227340671287e-05, "loss": 3.3356, "step": 14685 }, { "epoch": 0.998097567604294, "grad_norm": 1.4752922058105469, "learning_rate": 8.75280269058296e-05, "loss": 3.4327, "step": 14690 }, { "epoch": 0.9984372876749559, "grad_norm": 1.9537899494171143, "learning_rate": 8.752378040494634e-05, "loss": 3.6484, "step": 14695 }, { "epoch": 0.9987770077456176, "grad_norm": 2.025458335876465, "learning_rate": 8.751953390406305e-05, "loss": 3.5732, "step": 14700 }, { "epoch": 0.9991167278162794, "grad_norm": 2.4621469974517822, "learning_rate": 8.751528740317978e-05, "loss": 3.3452, "step": 14705 }, { "epoch": 0.9994564478869412, "grad_norm": 2.4667470455169678, "learning_rate": 8.751104090229652e-05, "loss": 3.4136, "step": 14710 }, { "epoch": 0.999796167957603, "grad_norm": 2.1196353435516357, "learning_rate": 8.750679440141323e-05, "loss": 3.5448, "step": 14715 }, { "epoch": 1.0, "eval_bertscore": { "f1": 0.8405732463549616, "precision": 0.8403871552243415, "recall": 0.8418096007793862 }, "eval_bleu_4": 0.021728611886642827, "eval_exact_match": 0.0005814516910553348, "eval_loss": 3.374361753463745, "eval_meteor": 0.08745923686208683, "eval_rouge": { "rouge1": 0.12130096569603133, "rouge2": 0.0190627440734581, "rougeL": 0.10542714029787417, "rougeLsum": 0.10548014916626183 }, "eval_runtime": 1964.3551, "eval_samples_per_second": 5.253, "eval_steps_per_second": 0.657, "step": 14718 }, { "epoch": 1.0001358880282647, "grad_norm": 2.30869197845459, "learning_rate": 8.750254790052996e-05, "loss": 3.2446, "step": 14720 }, { "epoch": 1.0004756080989265, "grad_norm": 1.9131754636764526, "learning_rate": 8.74983013996467e-05, "loss": 3.1535, "step": 14725 }, { "epoch": 1.0008153281695882, "grad_norm": 2.8986282348632812, "learning_rate": 8.749405489876342e-05, "loss": 3.0042, "step": 14730 }, { "epoch": 1.00115504824025, "grad_norm": 1.9709516763687134, "learning_rate": 8.748980839788015e-05, "loss": 3.2395, "step": 14735 }, { "epoch": 1.0014947683109119, "grad_norm": 2.672908306121826, "learning_rate": 8.748556189699689e-05, "loss": 3.0194, "step": 14740 }, { "epoch": 1.0018344883815735, "grad_norm": 3.025421142578125, "learning_rate": 8.74813153961136e-05, "loss": 3.2476, "step": 14745 }, { "epoch": 1.0021742084522354, "grad_norm": 2.4004569053649902, "learning_rate": 8.747706889523033e-05, "loss": 2.8947, "step": 14750 }, { "epoch": 1.0025139285228972, "grad_norm": 2.401870012283325, "learning_rate": 8.747282239434706e-05, "loss": 3.0122, "step": 14755 }, { "epoch": 1.0028536485935589, "grad_norm": 3.2174415588378906, "learning_rate": 8.746857589346379e-05, "loss": 3.0847, "step": 14760 }, { "epoch": 1.0031933686642207, "grad_norm": 2.0827596187591553, "learning_rate": 8.746432939258051e-05, "loss": 3.1836, "step": 14765 }, { "epoch": 1.0035330887348826, "grad_norm": 2.3997199535369873, "learning_rate": 8.746008289169724e-05, "loss": 3.0103, "step": 14770 }, { "epoch": 1.0038728088055442, "grad_norm": 2.429229974746704, "learning_rate": 8.745583639081397e-05, "loss": 3.2468, "step": 14775 }, { "epoch": 1.004212528876206, "grad_norm": 2.4387757778167725, "learning_rate": 8.74515898899307e-05, "loss": 3.0489, "step": 14780 }, { "epoch": 1.0045522489468677, "grad_norm": 2.841871738433838, "learning_rate": 8.744734338904743e-05, "loss": 3.2153, "step": 14785 }, { "epoch": 1.0048919690175295, "grad_norm": 2.7952771186828613, "learning_rate": 8.744309688816415e-05, "loss": 3.2557, "step": 14790 }, { "epoch": 1.0052316890881914, "grad_norm": 2.5474305152893066, "learning_rate": 8.743885038728088e-05, "loss": 2.9619, "step": 14795 }, { "epoch": 1.005571409158853, "grad_norm": 2.0270814895629883, "learning_rate": 8.743460388639761e-05, "loss": 3.1087, "step": 14800 }, { "epoch": 1.0059111292295149, "grad_norm": 2.0786213874816895, "learning_rate": 8.743035738551434e-05, "loss": 3.1248, "step": 14805 }, { "epoch": 1.0062508493001767, "grad_norm": 2.212132692337036, "learning_rate": 8.742611088463107e-05, "loss": 3.0981, "step": 14810 }, { "epoch": 1.0065905693708384, "grad_norm": 2.028635263442993, "learning_rate": 8.74218643837478e-05, "loss": 2.8963, "step": 14815 }, { "epoch": 1.0069302894415002, "grad_norm": 2.8337883949279785, "learning_rate": 8.741761788286452e-05, "loss": 3.4381, "step": 14820 }, { "epoch": 1.007270009512162, "grad_norm": 2.580205202102661, "learning_rate": 8.741337138198125e-05, "loss": 3.1345, "step": 14825 }, { "epoch": 1.0076097295828237, "grad_norm": 2.3339014053344727, "learning_rate": 8.740912488109798e-05, "loss": 3.1914, "step": 14830 }, { "epoch": 1.0079494496534855, "grad_norm": 2.153930902481079, "learning_rate": 8.74048783802147e-05, "loss": 3.1637, "step": 14835 }, { "epoch": 1.0082891697241474, "grad_norm": 2.009772777557373, "learning_rate": 8.740063187933143e-05, "loss": 3.0908, "step": 14840 }, { "epoch": 1.008628889794809, "grad_norm": 1.904910922050476, "learning_rate": 8.739638537844816e-05, "loss": 3.4076, "step": 14845 }, { "epoch": 1.0089686098654709, "grad_norm": 2.499971628189087, "learning_rate": 8.739213887756489e-05, "loss": 3.3322, "step": 14850 }, { "epoch": 1.0093083299361327, "grad_norm": 1.792022466659546, "learning_rate": 8.738789237668162e-05, "loss": 3.379, "step": 14855 }, { "epoch": 1.0096480500067944, "grad_norm": 2.683361530303955, "learning_rate": 8.738364587579835e-05, "loss": 3.1376, "step": 14860 }, { "epoch": 1.0099877700774562, "grad_norm": 2.071753740310669, "learning_rate": 8.737939937491507e-05, "loss": 2.9295, "step": 14865 }, { "epoch": 1.0103274901481178, "grad_norm": 2.188662528991699, "learning_rate": 8.73751528740318e-05, "loss": 3.113, "step": 14870 }, { "epoch": 1.0106672102187797, "grad_norm": 2.599916934967041, "learning_rate": 8.737090637314853e-05, "loss": 3.0625, "step": 14875 }, { "epoch": 1.0110069302894416, "grad_norm": 2.975278854370117, "learning_rate": 8.736665987226526e-05, "loss": 3.0086, "step": 14880 }, { "epoch": 1.0113466503601032, "grad_norm": 2.398127794265747, "learning_rate": 8.736241337138199e-05, "loss": 3.2112, "step": 14885 }, { "epoch": 1.011686370430765, "grad_norm": 2.409496545791626, "learning_rate": 8.735816687049871e-05, "loss": 3.3021, "step": 14890 }, { "epoch": 1.012026090501427, "grad_norm": 2.0854439735412598, "learning_rate": 8.735392036961544e-05, "loss": 3.0219, "step": 14895 }, { "epoch": 1.0123658105720885, "grad_norm": 2.3014514446258545, "learning_rate": 8.734967386873217e-05, "loss": 2.9108, "step": 14900 }, { "epoch": 1.0127055306427504, "grad_norm": 2.2741005420684814, "learning_rate": 8.73454273678489e-05, "loss": 3.3868, "step": 14905 }, { "epoch": 1.0130452507134122, "grad_norm": 1.7030918598175049, "learning_rate": 8.734118086696563e-05, "loss": 3.4189, "step": 14910 }, { "epoch": 1.0133849707840739, "grad_norm": 2.1163835525512695, "learning_rate": 8.733693436608235e-05, "loss": 3.0682, "step": 14915 }, { "epoch": 1.0137246908547357, "grad_norm": 2.0328800678253174, "learning_rate": 8.733268786519908e-05, "loss": 2.9648, "step": 14920 }, { "epoch": 1.0140644109253976, "grad_norm": 2.264561891555786, "learning_rate": 8.732844136431581e-05, "loss": 3.0905, "step": 14925 }, { "epoch": 1.0144041309960592, "grad_norm": 4.060765743255615, "learning_rate": 8.732419486343254e-05, "loss": 2.9391, "step": 14930 }, { "epoch": 1.014743851066721, "grad_norm": 2.225100517272949, "learning_rate": 8.731994836254927e-05, "loss": 2.9151, "step": 14935 }, { "epoch": 1.015083571137383, "grad_norm": 2.091400623321533, "learning_rate": 8.7315701861666e-05, "loss": 2.8766, "step": 14940 }, { "epoch": 1.0154232912080445, "grad_norm": 2.7768290042877197, "learning_rate": 8.731145536078272e-05, "loss": 3.4132, "step": 14945 }, { "epoch": 1.0157630112787064, "grad_norm": 2.244966506958008, "learning_rate": 8.730720885989945e-05, "loss": 3.349, "step": 14950 }, { "epoch": 1.016102731349368, "grad_norm": 2.1745193004608154, "learning_rate": 8.730296235901617e-05, "loss": 3.1685, "step": 14955 }, { "epoch": 1.0164424514200299, "grad_norm": 1.9947024583816528, "learning_rate": 8.72987158581329e-05, "loss": 3.139, "step": 14960 }, { "epoch": 1.0167821714906917, "grad_norm": 2.1804933547973633, "learning_rate": 8.729446935724963e-05, "loss": 3.0923, "step": 14965 }, { "epoch": 1.0171218915613534, "grad_norm": 4.387372016906738, "learning_rate": 8.729022285636635e-05, "loss": 2.9515, "step": 14970 }, { "epoch": 1.0174616116320152, "grad_norm": 2.2970199584960938, "learning_rate": 8.728597635548309e-05, "loss": 3.1069, "step": 14975 }, { "epoch": 1.017801331702677, "grad_norm": 2.175563335418701, "learning_rate": 8.728172985459982e-05, "loss": 3.0783, "step": 14980 }, { "epoch": 1.0181410517733387, "grad_norm": 2.723208427429199, "learning_rate": 8.727748335371653e-05, "loss": 3.3666, "step": 14985 }, { "epoch": 1.0184807718440005, "grad_norm": 2.127715587615967, "learning_rate": 8.727323685283327e-05, "loss": 3.1234, "step": 14990 }, { "epoch": 1.0188204919146624, "grad_norm": 2.8167009353637695, "learning_rate": 8.726899035195e-05, "loss": 3.1016, "step": 14995 }, { "epoch": 1.019160211985324, "grad_norm": 1.97833251953125, "learning_rate": 8.726474385106672e-05, "loss": 3.1455, "step": 15000 }, { "epoch": 1.0194999320559859, "grad_norm": 4.795385360717773, "learning_rate": 8.726049735018346e-05, "loss": 2.9838, "step": 15005 }, { "epoch": 1.0198396521266477, "grad_norm": 2.1148219108581543, "learning_rate": 8.725625084930019e-05, "loss": 3.1759, "step": 15010 }, { "epoch": 1.0201793721973094, "grad_norm": 1.6429870128631592, "learning_rate": 8.72520043484169e-05, "loss": 3.1696, "step": 15015 }, { "epoch": 1.0205190922679712, "grad_norm": 2.728102445602417, "learning_rate": 8.724775784753364e-05, "loss": 2.815, "step": 15020 }, { "epoch": 1.020858812338633, "grad_norm": 2.3607075214385986, "learning_rate": 8.724351134665036e-05, "loss": 3.1382, "step": 15025 }, { "epoch": 1.0211985324092947, "grad_norm": 2.4370737075805664, "learning_rate": 8.723926484576709e-05, "loss": 2.8557, "step": 15030 }, { "epoch": 1.0215382524799566, "grad_norm": 2.104238986968994, "learning_rate": 8.723501834488383e-05, "loss": 3.4797, "step": 15035 }, { "epoch": 1.0218779725506182, "grad_norm": 2.226778268814087, "learning_rate": 8.723077184400054e-05, "loss": 3.0661, "step": 15040 }, { "epoch": 1.02221769262128, "grad_norm": 2.0539276599884033, "learning_rate": 8.722652534311727e-05, "loss": 3.1283, "step": 15045 }, { "epoch": 1.022557412691942, "grad_norm": 2.7914774417877197, "learning_rate": 8.722227884223401e-05, "loss": 3.1527, "step": 15050 }, { "epoch": 1.0228971327626035, "grad_norm": 2.321556806564331, "learning_rate": 8.721803234135073e-05, "loss": 3.0205, "step": 15055 }, { "epoch": 1.0232368528332654, "grad_norm": 2.4246115684509277, "learning_rate": 8.721378584046745e-05, "loss": 3.2256, "step": 15060 }, { "epoch": 1.0235765729039272, "grad_norm": 2.5204031467437744, "learning_rate": 8.72095393395842e-05, "loss": 3.1204, "step": 15065 }, { "epoch": 1.0239162929745889, "grad_norm": 2.1460721492767334, "learning_rate": 8.720529283870091e-05, "loss": 2.9568, "step": 15070 }, { "epoch": 1.0242560130452507, "grad_norm": 2.519885301589966, "learning_rate": 8.720104633781764e-05, "loss": 3.1539, "step": 15075 }, { "epoch": 1.0245957331159126, "grad_norm": 2.4039416313171387, "learning_rate": 8.719679983693438e-05, "loss": 3.3851, "step": 15080 }, { "epoch": 1.0249354531865742, "grad_norm": 2.370136022567749, "learning_rate": 8.71925533360511e-05, "loss": 3.0977, "step": 15085 }, { "epoch": 1.025275173257236, "grad_norm": 2.002659797668457, "learning_rate": 8.718830683516782e-05, "loss": 3.1973, "step": 15090 }, { "epoch": 1.025614893327898, "grad_norm": 2.115602493286133, "learning_rate": 8.718406033428456e-05, "loss": 3.4616, "step": 15095 }, { "epoch": 1.0259546133985595, "grad_norm": 1.9132109880447388, "learning_rate": 8.717981383340128e-05, "loss": 3.1265, "step": 15100 }, { "epoch": 1.0262943334692214, "grad_norm": 2.2726759910583496, "learning_rate": 8.7175567332518e-05, "loss": 3.0846, "step": 15105 }, { "epoch": 1.0266340535398832, "grad_norm": 2.6386284828186035, "learning_rate": 8.717132083163473e-05, "loss": 3.0202, "step": 15110 }, { "epoch": 1.0269737736105449, "grad_norm": 2.7785184383392334, "learning_rate": 8.716707433075146e-05, "loss": 3.1202, "step": 15115 }, { "epoch": 1.0273134936812067, "grad_norm": 2.415992498397827, "learning_rate": 8.716282782986819e-05, "loss": 3.2731, "step": 15120 }, { "epoch": 1.0276532137518684, "grad_norm": 2.963005304336548, "learning_rate": 8.715858132898492e-05, "loss": 3.0403, "step": 15125 }, { "epoch": 1.0279929338225302, "grad_norm": 2.3140108585357666, "learning_rate": 8.715433482810165e-05, "loss": 3.1842, "step": 15130 }, { "epoch": 1.028332653893192, "grad_norm": 2.191446542739868, "learning_rate": 8.715008832721837e-05, "loss": 3.0885, "step": 15135 }, { "epoch": 1.0286723739638537, "grad_norm": 2.13881254196167, "learning_rate": 8.71458418263351e-05, "loss": 3.2907, "step": 15140 }, { "epoch": 1.0290120940345155, "grad_norm": 2.5937154293060303, "learning_rate": 8.714159532545183e-05, "loss": 3.0431, "step": 15145 }, { "epoch": 1.0293518141051774, "grad_norm": 2.163626194000244, "learning_rate": 8.713734882456856e-05, "loss": 3.2725, "step": 15150 }, { "epoch": 1.029691534175839, "grad_norm": 2.138154983520508, "learning_rate": 8.713310232368529e-05, "loss": 3.101, "step": 15155 }, { "epoch": 1.0300312542465009, "grad_norm": 1.9921923875808716, "learning_rate": 8.712885582280201e-05, "loss": 3.2871, "step": 15160 }, { "epoch": 1.0303709743171627, "grad_norm": 1.9792184829711914, "learning_rate": 8.712460932191874e-05, "loss": 3.0034, "step": 15165 }, { "epoch": 1.0307106943878244, "grad_norm": 1.8674850463867188, "learning_rate": 8.712036282103547e-05, "loss": 3.3249, "step": 15170 }, { "epoch": 1.0310504144584862, "grad_norm": 2.6335103511810303, "learning_rate": 8.71161163201522e-05, "loss": 2.9675, "step": 15175 }, { "epoch": 1.031390134529148, "grad_norm": 2.7190873622894287, "learning_rate": 8.711186981926893e-05, "loss": 3.2263, "step": 15180 }, { "epoch": 1.0317298545998097, "grad_norm": 2.040736436843872, "learning_rate": 8.710762331838565e-05, "loss": 3.2414, "step": 15185 }, { "epoch": 1.0320695746704716, "grad_norm": 2.3963961601257324, "learning_rate": 8.710337681750238e-05, "loss": 3.3165, "step": 15190 }, { "epoch": 1.0324092947411334, "grad_norm": 2.3382728099823, "learning_rate": 8.709913031661911e-05, "loss": 3.2522, "step": 15195 }, { "epoch": 1.032749014811795, "grad_norm": 2.087597370147705, "learning_rate": 8.709488381573584e-05, "loss": 3.1282, "step": 15200 }, { "epoch": 1.033088734882457, "grad_norm": 2.1558640003204346, "learning_rate": 8.709063731485257e-05, "loss": 3.21, "step": 15205 }, { "epoch": 1.0334284549531185, "grad_norm": 2.794996500015259, "learning_rate": 8.70863908139693e-05, "loss": 3.2127, "step": 15210 }, { "epoch": 1.0337681750237804, "grad_norm": 1.7753225564956665, "learning_rate": 8.708214431308602e-05, "loss": 3.2298, "step": 15215 }, { "epoch": 1.0341078950944422, "grad_norm": 2.1452348232269287, "learning_rate": 8.707789781220275e-05, "loss": 3.3406, "step": 15220 }, { "epoch": 1.0344476151651039, "grad_norm": 2.5502309799194336, "learning_rate": 8.707365131131948e-05, "loss": 2.8342, "step": 15225 }, { "epoch": 1.0347873352357657, "grad_norm": 2.419553756713867, "learning_rate": 8.70694048104362e-05, "loss": 3.1423, "step": 15230 }, { "epoch": 1.0351270553064276, "grad_norm": 1.9955918788909912, "learning_rate": 8.706515830955293e-05, "loss": 3.3963, "step": 15235 }, { "epoch": 1.0354667753770892, "grad_norm": 2.357942581176758, "learning_rate": 8.706091180866966e-05, "loss": 3.107, "step": 15240 }, { "epoch": 1.035806495447751, "grad_norm": 3.321685791015625, "learning_rate": 8.705666530778639e-05, "loss": 3.296, "step": 15245 }, { "epoch": 1.036146215518413, "grad_norm": 1.9503504037857056, "learning_rate": 8.705241880690312e-05, "loss": 3.2677, "step": 15250 }, { "epoch": 1.0364859355890745, "grad_norm": 2.0552210807800293, "learning_rate": 8.704817230601985e-05, "loss": 3.155, "step": 15255 }, { "epoch": 1.0368256556597364, "grad_norm": 2.3544914722442627, "learning_rate": 8.704392580513657e-05, "loss": 3.2778, "step": 15260 }, { "epoch": 1.0371653757303982, "grad_norm": 1.8692113161087036, "learning_rate": 8.70396793042533e-05, "loss": 3.1443, "step": 15265 }, { "epoch": 1.0375050958010599, "grad_norm": 2.00246524810791, "learning_rate": 8.703543280337003e-05, "loss": 3.0428, "step": 15270 }, { "epoch": 1.0378448158717217, "grad_norm": 2.625488758087158, "learning_rate": 8.703118630248676e-05, "loss": 2.9475, "step": 15275 }, { "epoch": 1.0381845359423836, "grad_norm": 2.3337042331695557, "learning_rate": 8.702693980160349e-05, "loss": 3.2891, "step": 15280 }, { "epoch": 1.0385242560130452, "grad_norm": 2.249253988265991, "learning_rate": 8.702269330072021e-05, "loss": 3.127, "step": 15285 }, { "epoch": 1.038863976083707, "grad_norm": 1.868890404701233, "learning_rate": 8.701844679983694e-05, "loss": 3.5173, "step": 15290 }, { "epoch": 1.0392036961543687, "grad_norm": 2.724318027496338, "learning_rate": 8.701420029895367e-05, "loss": 3.1516, "step": 15295 }, { "epoch": 1.0395434162250305, "grad_norm": 1.8657560348510742, "learning_rate": 8.70099537980704e-05, "loss": 3.1976, "step": 15300 }, { "epoch": 1.0398831362956924, "grad_norm": 2.7632944583892822, "learning_rate": 8.700570729718713e-05, "loss": 3.1658, "step": 15305 }, { "epoch": 1.040222856366354, "grad_norm": 2.5096688270568848, "learning_rate": 8.700146079630384e-05, "loss": 2.7502, "step": 15310 }, { "epoch": 1.0405625764370159, "grad_norm": 3.0364601612091064, "learning_rate": 8.699721429542058e-05, "loss": 3.3051, "step": 15315 }, { "epoch": 1.0409022965076777, "grad_norm": 3.174330472946167, "learning_rate": 8.699296779453731e-05, "loss": 3.0503, "step": 15320 }, { "epoch": 1.0412420165783394, "grad_norm": 2.3506035804748535, "learning_rate": 8.698872129365402e-05, "loss": 3.1273, "step": 15325 }, { "epoch": 1.0415817366490012, "grad_norm": 2.0939528942108154, "learning_rate": 8.698447479277077e-05, "loss": 3.1117, "step": 15330 }, { "epoch": 1.041921456719663, "grad_norm": 2.167555332183838, "learning_rate": 8.69802282918875e-05, "loss": 3.2264, "step": 15335 }, { "epoch": 1.0422611767903247, "grad_norm": 2.4801979064941406, "learning_rate": 8.697598179100421e-05, "loss": 3.2799, "step": 15340 }, { "epoch": 1.0426008968609866, "grad_norm": 2.5489535331726074, "learning_rate": 8.697173529012095e-05, "loss": 3.0035, "step": 15345 }, { "epoch": 1.0429406169316484, "grad_norm": 2.4706666469573975, "learning_rate": 8.696748878923768e-05, "loss": 3.1509, "step": 15350 }, { "epoch": 1.04328033700231, "grad_norm": 2.302821397781372, "learning_rate": 8.696324228835439e-05, "loss": 2.9043, "step": 15355 }, { "epoch": 1.043620057072972, "grad_norm": 2.2464780807495117, "learning_rate": 8.695899578747113e-05, "loss": 3.1871, "step": 15360 }, { "epoch": 1.0439597771436337, "grad_norm": 2.3567419052124023, "learning_rate": 8.695474928658786e-05, "loss": 2.9689, "step": 15365 }, { "epoch": 1.0442994972142954, "grad_norm": 2.4720120429992676, "learning_rate": 8.695050278570458e-05, "loss": 3.2866, "step": 15370 }, { "epoch": 1.0446392172849572, "grad_norm": 2.420205593109131, "learning_rate": 8.694625628482132e-05, "loss": 3.1823, "step": 15375 }, { "epoch": 1.0449789373556189, "grad_norm": 2.0984537601470947, "learning_rate": 8.694200978393803e-05, "loss": 3.0093, "step": 15380 }, { "epoch": 1.0453186574262807, "grad_norm": 2.8476850986480713, "learning_rate": 8.693776328305476e-05, "loss": 2.9611, "step": 15385 }, { "epoch": 1.0456583774969426, "grad_norm": 2.129262685775757, "learning_rate": 8.69335167821715e-05, "loss": 2.9638, "step": 15390 }, { "epoch": 1.0459980975676042, "grad_norm": 1.9606996774673462, "learning_rate": 8.692927028128822e-05, "loss": 2.9926, "step": 15395 }, { "epoch": 1.046337817638266, "grad_norm": 2.6173980236053467, "learning_rate": 8.692502378040494e-05, "loss": 2.9136, "step": 15400 }, { "epoch": 1.046677537708928, "grad_norm": 2.355893135070801, "learning_rate": 8.692077727952169e-05, "loss": 2.974, "step": 15405 }, { "epoch": 1.0470172577795895, "grad_norm": 2.5517373085021973, "learning_rate": 8.69165307786384e-05, "loss": 3.1295, "step": 15410 }, { "epoch": 1.0473569778502514, "grad_norm": 2.352768898010254, "learning_rate": 8.691228427775513e-05, "loss": 3.414, "step": 15415 }, { "epoch": 1.0476966979209132, "grad_norm": 2.4451212882995605, "learning_rate": 8.690803777687187e-05, "loss": 3.008, "step": 15420 }, { "epoch": 1.0480364179915749, "grad_norm": 2.185710906982422, "learning_rate": 8.690379127598858e-05, "loss": 3.0535, "step": 15425 }, { "epoch": 1.0483761380622367, "grad_norm": 2.4750380516052246, "learning_rate": 8.689954477510531e-05, "loss": 2.9396, "step": 15430 }, { "epoch": 1.0487158581328986, "grad_norm": 2.212312936782837, "learning_rate": 8.689529827422205e-05, "loss": 3.1045, "step": 15435 }, { "epoch": 1.0490555782035602, "grad_norm": 2.1509015560150146, "learning_rate": 8.689105177333877e-05, "loss": 3.0418, "step": 15440 }, { "epoch": 1.049395298274222, "grad_norm": 2.6781060695648193, "learning_rate": 8.68868052724555e-05, "loss": 3.0164, "step": 15445 }, { "epoch": 1.049735018344884, "grad_norm": 2.3661441802978516, "learning_rate": 8.688255877157224e-05, "loss": 3.2907, "step": 15450 }, { "epoch": 1.0500747384155455, "grad_norm": 2.1468160152435303, "learning_rate": 8.687831227068895e-05, "loss": 3.3136, "step": 15455 }, { "epoch": 1.0504144584862074, "grad_norm": 2.945831775665283, "learning_rate": 8.687406576980568e-05, "loss": 2.9884, "step": 15460 }, { "epoch": 1.050754178556869, "grad_norm": 2.4920785427093506, "learning_rate": 8.686981926892241e-05, "loss": 3.3932, "step": 15465 }, { "epoch": 1.0510938986275309, "grad_norm": 2.2416367530822754, "learning_rate": 8.686557276803914e-05, "loss": 3.2772, "step": 15470 }, { "epoch": 1.0514336186981927, "grad_norm": 2.168023109436035, "learning_rate": 8.686132626715586e-05, "loss": 2.9846, "step": 15475 }, { "epoch": 1.0517733387688544, "grad_norm": 2.297691822052002, "learning_rate": 8.685707976627259e-05, "loss": 3.2309, "step": 15480 }, { "epoch": 1.0521130588395162, "grad_norm": 2.081343412399292, "learning_rate": 8.685283326538932e-05, "loss": 3.301, "step": 15485 }, { "epoch": 1.052452778910178, "grad_norm": 2.02813720703125, "learning_rate": 8.684858676450605e-05, "loss": 3.3152, "step": 15490 }, { "epoch": 1.0527924989808397, "grad_norm": 2.477771759033203, "learning_rate": 8.684434026362278e-05, "loss": 3.0457, "step": 15495 }, { "epoch": 1.0531322190515016, "grad_norm": 2.0686535835266113, "learning_rate": 8.68400937627395e-05, "loss": 2.9762, "step": 15500 }, { "epoch": 1.0534719391221634, "grad_norm": 2.531630516052246, "learning_rate": 8.683584726185623e-05, "loss": 3.212, "step": 15505 }, { "epoch": 1.053811659192825, "grad_norm": 1.9772518873214722, "learning_rate": 8.683160076097296e-05, "loss": 3.4337, "step": 15510 }, { "epoch": 1.054151379263487, "grad_norm": 2.5738208293914795, "learning_rate": 8.682735426008969e-05, "loss": 2.9645, "step": 15515 }, { "epoch": 1.0544910993341488, "grad_norm": 2.4857518672943115, "learning_rate": 8.682310775920643e-05, "loss": 3.3956, "step": 15520 }, { "epoch": 1.0548308194048104, "grad_norm": 2.699049234390259, "learning_rate": 8.681886125832314e-05, "loss": 3.1422, "step": 15525 }, { "epoch": 1.0551705394754722, "grad_norm": 1.6927685737609863, "learning_rate": 8.681461475743987e-05, "loss": 3.1454, "step": 15530 }, { "epoch": 1.055510259546134, "grad_norm": 2.5065701007843018, "learning_rate": 8.68103682565566e-05, "loss": 3.2285, "step": 15535 }, { "epoch": 1.0558499796167957, "grad_norm": 2.3521716594696045, "learning_rate": 8.680612175567333e-05, "loss": 3.2009, "step": 15540 }, { "epoch": 1.0561896996874576, "grad_norm": 2.0714354515075684, "learning_rate": 8.680187525479006e-05, "loss": 3.2255, "step": 15545 }, { "epoch": 1.0565294197581192, "grad_norm": 2.350886344909668, "learning_rate": 8.679762875390678e-05, "loss": 3.1984, "step": 15550 }, { "epoch": 1.056869139828781, "grad_norm": 2.2276477813720703, "learning_rate": 8.679338225302351e-05, "loss": 3.0226, "step": 15555 }, { "epoch": 1.057208859899443, "grad_norm": 2.189335346221924, "learning_rate": 8.678913575214024e-05, "loss": 2.8481, "step": 15560 }, { "epoch": 1.0575485799701045, "grad_norm": 2.427762985229492, "learning_rate": 8.678488925125697e-05, "loss": 3.2815, "step": 15565 }, { "epoch": 1.0578883000407664, "grad_norm": 3.0753462314605713, "learning_rate": 8.67806427503737e-05, "loss": 3.1703, "step": 15570 }, { "epoch": 1.0582280201114282, "grad_norm": 2.283756971359253, "learning_rate": 8.677639624949042e-05, "loss": 2.9694, "step": 15575 }, { "epoch": 1.0585677401820899, "grad_norm": 2.13521671295166, "learning_rate": 8.677214974860715e-05, "loss": 3.2099, "step": 15580 }, { "epoch": 1.0589074602527517, "grad_norm": 2.292412042617798, "learning_rate": 8.676790324772388e-05, "loss": 3.274, "step": 15585 }, { "epoch": 1.0592471803234136, "grad_norm": 2.070289134979248, "learning_rate": 8.676365674684061e-05, "loss": 3.3526, "step": 15590 }, { "epoch": 1.0595869003940752, "grad_norm": 2.3847177028656006, "learning_rate": 8.675941024595734e-05, "loss": 3.1829, "step": 15595 }, { "epoch": 1.059926620464737, "grad_norm": 1.789763331413269, "learning_rate": 8.675516374507406e-05, "loss": 3.1609, "step": 15600 }, { "epoch": 1.060266340535399, "grad_norm": 2.454777717590332, "learning_rate": 8.675091724419079e-05, "loss": 2.9367, "step": 15605 }, { "epoch": 1.0606060606060606, "grad_norm": 2.7157504558563232, "learning_rate": 8.674667074330752e-05, "loss": 3.1696, "step": 15610 }, { "epoch": 1.0609457806767224, "grad_norm": 3.1225547790527344, "learning_rate": 8.674242424242425e-05, "loss": 3.2189, "step": 15615 }, { "epoch": 1.0612855007473843, "grad_norm": 2.060387134552002, "learning_rate": 8.673817774154098e-05, "loss": 3.5489, "step": 15620 }, { "epoch": 1.061625220818046, "grad_norm": 2.2736964225769043, "learning_rate": 8.67339312406577e-05, "loss": 3.0779, "step": 15625 }, { "epoch": 1.0619649408887077, "grad_norm": 1.9541609287261963, "learning_rate": 8.672968473977443e-05, "loss": 3.2011, "step": 15630 }, { "epoch": 1.0623046609593694, "grad_norm": 2.68597149848938, "learning_rate": 8.672543823889116e-05, "loss": 3.2799, "step": 15635 }, { "epoch": 1.0626443810300312, "grad_norm": 2.3689451217651367, "learning_rate": 8.672119173800789e-05, "loss": 2.9422, "step": 15640 }, { "epoch": 1.062984101100693, "grad_norm": 2.3264012336730957, "learning_rate": 8.671694523712462e-05, "loss": 3.1657, "step": 15645 }, { "epoch": 1.0633238211713547, "grad_norm": 2.3649744987487793, "learning_rate": 8.671269873624134e-05, "loss": 3.087, "step": 15650 }, { "epoch": 1.0636635412420166, "grad_norm": 3.537750482559204, "learning_rate": 8.670845223535807e-05, "loss": 3.2181, "step": 15655 }, { "epoch": 1.0640032613126784, "grad_norm": 2.5492265224456787, "learning_rate": 8.67042057344748e-05, "loss": 3.051, "step": 15660 }, { "epoch": 1.06434298138334, "grad_norm": 1.990507960319519, "learning_rate": 8.669995923359152e-05, "loss": 3.0818, "step": 15665 }, { "epoch": 1.064682701454002, "grad_norm": 3.3221476078033447, "learning_rate": 8.669571273270826e-05, "loss": 3.0982, "step": 15670 }, { "epoch": 1.0650224215246638, "grad_norm": 4.010848045349121, "learning_rate": 8.669146623182498e-05, "loss": 3.1638, "step": 15675 }, { "epoch": 1.0653621415953254, "grad_norm": 2.2472732067108154, "learning_rate": 8.66872197309417e-05, "loss": 3.3108, "step": 15680 }, { "epoch": 1.0657018616659872, "grad_norm": 2.4128522872924805, "learning_rate": 8.668297323005844e-05, "loss": 2.8159, "step": 15685 }, { "epoch": 1.066041581736649, "grad_norm": 2.031771659851074, "learning_rate": 8.667872672917517e-05, "loss": 3.289, "step": 15690 }, { "epoch": 1.0663813018073107, "grad_norm": 2.698399305343628, "learning_rate": 8.667448022829188e-05, "loss": 3.1861, "step": 15695 }, { "epoch": 1.0667210218779726, "grad_norm": 2.5234034061431885, "learning_rate": 8.667023372740862e-05, "loss": 3.178, "step": 15700 }, { "epoch": 1.0670607419486344, "grad_norm": 2.236525774002075, "learning_rate": 8.666598722652535e-05, "loss": 2.7984, "step": 15705 }, { "epoch": 1.067400462019296, "grad_norm": 2.192214012145996, "learning_rate": 8.666174072564207e-05, "loss": 3.0382, "step": 15710 }, { "epoch": 1.067740182089958, "grad_norm": 2.169633150100708, "learning_rate": 8.665749422475881e-05, "loss": 3.0626, "step": 15715 }, { "epoch": 1.0680799021606195, "grad_norm": 2.2419815063476562, "learning_rate": 8.665324772387554e-05, "loss": 3.1818, "step": 15720 }, { "epoch": 1.0684196222312814, "grad_norm": 2.343369960784912, "learning_rate": 8.664900122299225e-05, "loss": 3.1712, "step": 15725 }, { "epoch": 1.0687593423019432, "grad_norm": 2.3501298427581787, "learning_rate": 8.664475472210899e-05, "loss": 3.1012, "step": 15730 }, { "epoch": 1.0690990623726049, "grad_norm": 2.043482542037964, "learning_rate": 8.664050822122571e-05, "loss": 3.359, "step": 15735 }, { "epoch": 1.0694387824432667, "grad_norm": 2.629627227783203, "learning_rate": 8.663626172034244e-05, "loss": 3.1868, "step": 15740 }, { "epoch": 1.0697785025139286, "grad_norm": 2.2328848838806152, "learning_rate": 8.663201521945918e-05, "loss": 2.9562, "step": 15745 }, { "epoch": 1.0701182225845902, "grad_norm": 1.9899100065231323, "learning_rate": 8.662776871857589e-05, "loss": 2.8648, "step": 15750 }, { "epoch": 1.070457942655252, "grad_norm": 2.5784785747528076, "learning_rate": 8.662352221769262e-05, "loss": 3.4873, "step": 15755 }, { "epoch": 1.070797662725914, "grad_norm": 2.4347057342529297, "learning_rate": 8.661927571680936e-05, "loss": 3.1194, "step": 15760 }, { "epoch": 1.0711373827965756, "grad_norm": 2.6061007976531982, "learning_rate": 8.661502921592608e-05, "loss": 3.2621, "step": 15765 }, { "epoch": 1.0714771028672374, "grad_norm": 2.1125524044036865, "learning_rate": 8.66107827150428e-05, "loss": 2.9996, "step": 15770 }, { "epoch": 1.0718168229378993, "grad_norm": 2.141483783721924, "learning_rate": 8.660653621415954e-05, "loss": 3.2219, "step": 15775 }, { "epoch": 1.072156543008561, "grad_norm": 2.4532277584075928, "learning_rate": 8.660228971327626e-05, "loss": 3.1275, "step": 15780 }, { "epoch": 1.0724962630792227, "grad_norm": 1.9706717729568481, "learning_rate": 8.659804321239299e-05, "loss": 3.028, "step": 15785 }, { "epoch": 1.0728359831498846, "grad_norm": 2.2143452167510986, "learning_rate": 8.659379671150973e-05, "loss": 3.1731, "step": 15790 }, { "epoch": 1.0731757032205462, "grad_norm": 2.6343696117401123, "learning_rate": 8.658955021062644e-05, "loss": 3.3856, "step": 15795 }, { "epoch": 1.073515423291208, "grad_norm": 2.6153793334960938, "learning_rate": 8.658530370974317e-05, "loss": 3.0982, "step": 15800 }, { "epoch": 1.07385514336187, "grad_norm": 2.0144715309143066, "learning_rate": 8.65810572088599e-05, "loss": 3.2946, "step": 15805 }, { "epoch": 1.0741948634325316, "grad_norm": 1.694215178489685, "learning_rate": 8.657681070797663e-05, "loss": 2.9771, "step": 15810 }, { "epoch": 1.0745345835031934, "grad_norm": 2.1858346462249756, "learning_rate": 8.657256420709336e-05, "loss": 3.235, "step": 15815 }, { "epoch": 1.074874303573855, "grad_norm": 2.03642201423645, "learning_rate": 8.656831770621008e-05, "loss": 3.1267, "step": 15820 }, { "epoch": 1.075214023644517, "grad_norm": 2.378024101257324, "learning_rate": 8.656407120532681e-05, "loss": 3.1074, "step": 15825 }, { "epoch": 1.0755537437151788, "grad_norm": 2.05552339553833, "learning_rate": 8.655982470444354e-05, "loss": 3.1471, "step": 15830 }, { "epoch": 1.0758934637858404, "grad_norm": 2.2869720458984375, "learning_rate": 8.655557820356027e-05, "loss": 2.8809, "step": 15835 }, { "epoch": 1.0762331838565022, "grad_norm": 2.2955286502838135, "learning_rate": 8.6551331702677e-05, "loss": 3.0577, "step": 15840 }, { "epoch": 1.076572903927164, "grad_norm": 2.378366470336914, "learning_rate": 8.654708520179372e-05, "loss": 2.7664, "step": 15845 }, { "epoch": 1.0769126239978257, "grad_norm": 2.4306867122650146, "learning_rate": 8.654283870091045e-05, "loss": 2.9594, "step": 15850 }, { "epoch": 1.0772523440684876, "grad_norm": 1.8849385976791382, "learning_rate": 8.653859220002718e-05, "loss": 3.1311, "step": 15855 }, { "epoch": 1.0775920641391494, "grad_norm": 2.6707868576049805, "learning_rate": 8.653434569914392e-05, "loss": 3.4138, "step": 15860 }, { "epoch": 1.077931784209811, "grad_norm": 2.10031795501709, "learning_rate": 8.653009919826064e-05, "loss": 2.8666, "step": 15865 }, { "epoch": 1.078271504280473, "grad_norm": 2.022233247756958, "learning_rate": 8.652585269737736e-05, "loss": 3.1484, "step": 15870 }, { "epoch": 1.0786112243511348, "grad_norm": 2.3755929470062256, "learning_rate": 8.65216061964941e-05, "loss": 2.9824, "step": 15875 }, { "epoch": 1.0789509444217964, "grad_norm": 2.4512529373168945, "learning_rate": 8.651735969561082e-05, "loss": 2.9989, "step": 15880 }, { "epoch": 1.0792906644924583, "grad_norm": 3.042884349822998, "learning_rate": 8.651311319472755e-05, "loss": 3.2211, "step": 15885 }, { "epoch": 1.0796303845631199, "grad_norm": 1.8290334939956665, "learning_rate": 8.650886669384428e-05, "loss": 3.0718, "step": 15890 }, { "epoch": 1.0799701046337817, "grad_norm": 2.1615564823150635, "learning_rate": 8.6504620192961e-05, "loss": 3.3251, "step": 15895 }, { "epoch": 1.0803098247044436, "grad_norm": 2.30755615234375, "learning_rate": 8.650037369207773e-05, "loss": 2.7695, "step": 15900 }, { "epoch": 1.0806495447751052, "grad_norm": 2.1867570877075195, "learning_rate": 8.649612719119446e-05, "loss": 3.2912, "step": 15905 }, { "epoch": 1.080989264845767, "grad_norm": 2.442603588104248, "learning_rate": 8.649188069031119e-05, "loss": 3.3973, "step": 15910 }, { "epoch": 1.081328984916429, "grad_norm": 4.194520473480225, "learning_rate": 8.648763418942792e-05, "loss": 3.2343, "step": 15915 }, { "epoch": 1.0816687049870906, "grad_norm": 2.6808524131774902, "learning_rate": 8.648338768854464e-05, "loss": 3.052, "step": 15920 }, { "epoch": 1.0820084250577524, "grad_norm": 2.1443703174591064, "learning_rate": 8.647914118766137e-05, "loss": 3.1404, "step": 15925 }, { "epoch": 1.0823481451284143, "grad_norm": 1.9627197980880737, "learning_rate": 8.64748946867781e-05, "loss": 3.067, "step": 15930 }, { "epoch": 1.082687865199076, "grad_norm": 2.636603593826294, "learning_rate": 8.647064818589483e-05, "loss": 3.0666, "step": 15935 }, { "epoch": 1.0830275852697377, "grad_norm": 3.0560848712921143, "learning_rate": 8.646640168501156e-05, "loss": 3.1902, "step": 15940 }, { "epoch": 1.0833673053403996, "grad_norm": 2.3743202686309814, "learning_rate": 8.646215518412828e-05, "loss": 3.2491, "step": 15945 }, { "epoch": 1.0837070254110612, "grad_norm": 2.115143299102783, "learning_rate": 8.645790868324501e-05, "loss": 3.1435, "step": 15950 }, { "epoch": 1.084046745481723, "grad_norm": 2.2356748580932617, "learning_rate": 8.645366218236174e-05, "loss": 3.2498, "step": 15955 }, { "epoch": 1.084386465552385, "grad_norm": 2.2339792251586914, "learning_rate": 8.644941568147847e-05, "loss": 3.0879, "step": 15960 }, { "epoch": 1.0847261856230466, "grad_norm": 2.1448214054107666, "learning_rate": 8.64451691805952e-05, "loss": 3.1655, "step": 15965 }, { "epoch": 1.0850659056937084, "grad_norm": 1.9646430015563965, "learning_rate": 8.644092267971192e-05, "loss": 3.1327, "step": 15970 }, { "epoch": 1.0854056257643703, "grad_norm": 2.1800124645233154, "learning_rate": 8.643667617882865e-05, "loss": 3.191, "step": 15975 }, { "epoch": 1.085745345835032, "grad_norm": 2.1873319149017334, "learning_rate": 8.643242967794538e-05, "loss": 3.2687, "step": 15980 }, { "epoch": 1.0860850659056938, "grad_norm": 2.410721778869629, "learning_rate": 8.642818317706211e-05, "loss": 3.1492, "step": 15985 }, { "epoch": 1.0864247859763554, "grad_norm": 1.8868910074234009, "learning_rate": 8.642393667617884e-05, "loss": 3.0879, "step": 15990 }, { "epoch": 1.0867645060470172, "grad_norm": 2.2535200119018555, "learning_rate": 8.641969017529556e-05, "loss": 3.2036, "step": 15995 }, { "epoch": 1.087104226117679, "grad_norm": 2.3863608837127686, "learning_rate": 8.641544367441229e-05, "loss": 3.2514, "step": 16000 }, { "epoch": 1.0874439461883407, "grad_norm": 2.666572093963623, "learning_rate": 8.6411197173529e-05, "loss": 3.1061, "step": 16005 }, { "epoch": 1.0877836662590026, "grad_norm": 2.202094793319702, "learning_rate": 8.640695067264575e-05, "loss": 3.3512, "step": 16010 }, { "epoch": 1.0881233863296644, "grad_norm": 1.8430840969085693, "learning_rate": 8.640270417176248e-05, "loss": 3.0738, "step": 16015 }, { "epoch": 1.088463106400326, "grad_norm": 1.7933624982833862, "learning_rate": 8.639845767087919e-05, "loss": 3.097, "step": 16020 }, { "epoch": 1.088802826470988, "grad_norm": 2.2488911151885986, "learning_rate": 8.639421116999593e-05, "loss": 3.1039, "step": 16025 }, { "epoch": 1.0891425465416498, "grad_norm": 2.1771674156188965, "learning_rate": 8.638996466911266e-05, "loss": 3.1576, "step": 16030 }, { "epoch": 1.0894822666123114, "grad_norm": 2.5261213779449463, "learning_rate": 8.638571816822937e-05, "loss": 3.032, "step": 16035 }, { "epoch": 1.0898219866829733, "grad_norm": 2.223576307296753, "learning_rate": 8.638147166734612e-05, "loss": 2.9688, "step": 16040 }, { "epoch": 1.090161706753635, "grad_norm": 2.354400396347046, "learning_rate": 8.637722516646284e-05, "loss": 3.058, "step": 16045 }, { "epoch": 1.0905014268242967, "grad_norm": 1.614444613456726, "learning_rate": 8.637297866557956e-05, "loss": 3.3343, "step": 16050 }, { "epoch": 1.0908411468949586, "grad_norm": 2.129671335220337, "learning_rate": 8.63687321646963e-05, "loss": 3.0322, "step": 16055 }, { "epoch": 1.0911808669656202, "grad_norm": 2.2163119316101074, "learning_rate": 8.636448566381303e-05, "loss": 2.9134, "step": 16060 }, { "epoch": 1.091520587036282, "grad_norm": 2.1517395973205566, "learning_rate": 8.636023916292974e-05, "loss": 3.0223, "step": 16065 }, { "epoch": 1.091860307106944, "grad_norm": 2.5327374935150146, "learning_rate": 8.635599266204648e-05, "loss": 2.9531, "step": 16070 }, { "epoch": 1.0922000271776056, "grad_norm": 2.1549723148345947, "learning_rate": 8.635174616116321e-05, "loss": 3.1735, "step": 16075 }, { "epoch": 1.0925397472482674, "grad_norm": 2.0121519565582275, "learning_rate": 8.634749966027993e-05, "loss": 3.1517, "step": 16080 }, { "epoch": 1.0928794673189293, "grad_norm": 2.3270137310028076, "learning_rate": 8.634325315939667e-05, "loss": 3.0897, "step": 16085 }, { "epoch": 1.093219187389591, "grad_norm": 2.4425158500671387, "learning_rate": 8.633900665851338e-05, "loss": 2.8674, "step": 16090 }, { "epoch": 1.0935589074602527, "grad_norm": 3.075639247894287, "learning_rate": 8.633476015763011e-05, "loss": 3.2719, "step": 16095 }, { "epoch": 1.0938986275309146, "grad_norm": 2.0954062938690186, "learning_rate": 8.633051365674685e-05, "loss": 3.2527, "step": 16100 }, { "epoch": 1.0942383476015762, "grad_norm": 2.13724422454834, "learning_rate": 8.632626715586357e-05, "loss": 3.0245, "step": 16105 }, { "epoch": 1.094578067672238, "grad_norm": 2.8380849361419678, "learning_rate": 8.63220206549803e-05, "loss": 3.0688, "step": 16110 }, { "epoch": 1.0949177877429, "grad_norm": 2.4382002353668213, "learning_rate": 8.631777415409704e-05, "loss": 3.1912, "step": 16115 }, { "epoch": 1.0952575078135616, "grad_norm": 2.5092978477478027, "learning_rate": 8.631352765321375e-05, "loss": 3.258, "step": 16120 }, { "epoch": 1.0955972278842234, "grad_norm": 3.227663278579712, "learning_rate": 8.630928115233048e-05, "loss": 3.061, "step": 16125 }, { "epoch": 1.0959369479548853, "grad_norm": 2.041472911834717, "learning_rate": 8.630503465144722e-05, "loss": 3.3431, "step": 16130 }, { "epoch": 1.096276668025547, "grad_norm": 2.510363817214966, "learning_rate": 8.630078815056393e-05, "loss": 2.9741, "step": 16135 }, { "epoch": 1.0966163880962088, "grad_norm": 1.694738507270813, "learning_rate": 8.629654164968066e-05, "loss": 3.3611, "step": 16140 }, { "epoch": 1.0969561081668706, "grad_norm": 1.9568151235580444, "learning_rate": 8.62922951487974e-05, "loss": 3.0013, "step": 16145 }, { "epoch": 1.0972958282375322, "grad_norm": 1.975366473197937, "learning_rate": 8.628804864791412e-05, "loss": 3.1205, "step": 16150 }, { "epoch": 1.097635548308194, "grad_norm": 2.414090871810913, "learning_rate": 8.628380214703085e-05, "loss": 3.1255, "step": 16155 }, { "epoch": 1.0979752683788557, "grad_norm": 2.1612026691436768, "learning_rate": 8.627955564614757e-05, "loss": 3.1081, "step": 16160 }, { "epoch": 1.0983149884495176, "grad_norm": 2.985217809677124, "learning_rate": 8.62753091452643e-05, "loss": 3.1068, "step": 16165 }, { "epoch": 1.0986547085201794, "grad_norm": 2.7231714725494385, "learning_rate": 8.627106264438103e-05, "loss": 3.2082, "step": 16170 }, { "epoch": 1.098994428590841, "grad_norm": 2.356581449508667, "learning_rate": 8.626681614349776e-05, "loss": 3.2482, "step": 16175 }, { "epoch": 1.099334148661503, "grad_norm": 2.735565185546875, "learning_rate": 8.626256964261449e-05, "loss": 3.1379, "step": 16180 }, { "epoch": 1.0996738687321648, "grad_norm": 1.9703309535980225, "learning_rate": 8.625832314173121e-05, "loss": 3.261, "step": 16185 }, { "epoch": 1.1000135888028264, "grad_norm": 2.283799409866333, "learning_rate": 8.625407664084794e-05, "loss": 2.9341, "step": 16190 }, { "epoch": 1.1003533088734883, "grad_norm": 2.292316198348999, "learning_rate": 8.624983013996467e-05, "loss": 3.2409, "step": 16195 }, { "epoch": 1.10069302894415, "grad_norm": 2.147491693496704, "learning_rate": 8.624558363908141e-05, "loss": 3.0141, "step": 16200 }, { "epoch": 1.1010327490148117, "grad_norm": 2.0741465091705322, "learning_rate": 8.624133713819813e-05, "loss": 3.2522, "step": 16205 }, { "epoch": 1.1013724690854736, "grad_norm": 1.955869197845459, "learning_rate": 8.623709063731485e-05, "loss": 2.9261, "step": 16210 }, { "epoch": 1.1017121891561354, "grad_norm": 1.770039677619934, "learning_rate": 8.62328441364316e-05, "loss": 2.8996, "step": 16215 }, { "epoch": 1.102051909226797, "grad_norm": 1.9141325950622559, "learning_rate": 8.622859763554831e-05, "loss": 3.1795, "step": 16220 }, { "epoch": 1.102391629297459, "grad_norm": 2.374511957168579, "learning_rate": 8.622435113466504e-05, "loss": 3.3029, "step": 16225 }, { "epoch": 1.1027313493681206, "grad_norm": 2.254007339477539, "learning_rate": 8.622010463378177e-05, "loss": 3.1491, "step": 16230 }, { "epoch": 1.1030710694387824, "grad_norm": 2.598451614379883, "learning_rate": 8.62158581328985e-05, "loss": 3.261, "step": 16235 }, { "epoch": 1.1034107895094443, "grad_norm": 2.0318565368652344, "learning_rate": 8.621161163201522e-05, "loss": 3.3017, "step": 16240 }, { "epoch": 1.103750509580106, "grad_norm": 2.161024808883667, "learning_rate": 8.620736513113195e-05, "loss": 3.3279, "step": 16245 }, { "epoch": 1.1040902296507678, "grad_norm": 2.296281337738037, "learning_rate": 8.620311863024868e-05, "loss": 3.2609, "step": 16250 }, { "epoch": 1.1044299497214296, "grad_norm": 3.0991873741149902, "learning_rate": 8.61988721293654e-05, "loss": 3.1059, "step": 16255 }, { "epoch": 1.1047696697920912, "grad_norm": 1.9574401378631592, "learning_rate": 8.619462562848213e-05, "loss": 2.9239, "step": 16260 }, { "epoch": 1.105109389862753, "grad_norm": 2.292971611022949, "learning_rate": 8.619037912759886e-05, "loss": 3.0658, "step": 16265 }, { "epoch": 1.105449109933415, "grad_norm": 2.8157806396484375, "learning_rate": 8.618613262671559e-05, "loss": 3.0061, "step": 16270 }, { "epoch": 1.1057888300040766, "grad_norm": 2.398336887359619, "learning_rate": 8.618188612583232e-05, "loss": 3.2219, "step": 16275 }, { "epoch": 1.1061285500747384, "grad_norm": 3.086050271987915, "learning_rate": 8.617763962494905e-05, "loss": 3.0895, "step": 16280 }, { "epoch": 1.1064682701454003, "grad_norm": 2.5477700233459473, "learning_rate": 8.617339312406577e-05, "loss": 3.2036, "step": 16285 }, { "epoch": 1.106807990216062, "grad_norm": 2.0780415534973145, "learning_rate": 8.61691466231825e-05, "loss": 2.9129, "step": 16290 }, { "epoch": 1.1071477102867238, "grad_norm": 2.314579486846924, "learning_rate": 8.616490012229923e-05, "loss": 2.8898, "step": 16295 }, { "epoch": 1.1074874303573856, "grad_norm": 3.482827663421631, "learning_rate": 8.616065362141596e-05, "loss": 2.8581, "step": 16300 }, { "epoch": 1.1078271504280472, "grad_norm": 2.2679224014282227, "learning_rate": 8.615640712053269e-05, "loss": 3.0698, "step": 16305 }, { "epoch": 1.108166870498709, "grad_norm": 1.9809743165969849, "learning_rate": 8.615216061964941e-05, "loss": 3.3289, "step": 16310 }, { "epoch": 1.108506590569371, "grad_norm": 2.274793863296509, "learning_rate": 8.614791411876614e-05, "loss": 2.958, "step": 16315 }, { "epoch": 1.1088463106400326, "grad_norm": 2.2958693504333496, "learning_rate": 8.614366761788287e-05, "loss": 3.2688, "step": 16320 }, { "epoch": 1.1091860307106944, "grad_norm": 1.9463376998901367, "learning_rate": 8.61394211169996e-05, "loss": 3.1601, "step": 16325 }, { "epoch": 1.109525750781356, "grad_norm": 2.8061790466308594, "learning_rate": 8.613517461611633e-05, "loss": 3.0366, "step": 16330 }, { "epoch": 1.109865470852018, "grad_norm": 1.6833640336990356, "learning_rate": 8.613092811523305e-05, "loss": 3.2329, "step": 16335 }, { "epoch": 1.1102051909226798, "grad_norm": 2.233766794204712, "learning_rate": 8.612668161434978e-05, "loss": 3.0547, "step": 16340 }, { "epoch": 1.1105449109933414, "grad_norm": 2.355074882507324, "learning_rate": 8.612243511346651e-05, "loss": 3.301, "step": 16345 }, { "epoch": 1.1108846310640033, "grad_norm": 2.138704538345337, "learning_rate": 8.611818861258324e-05, "loss": 3.2653, "step": 16350 }, { "epoch": 1.1112243511346651, "grad_norm": 2.511831521987915, "learning_rate": 8.611394211169997e-05, "loss": 3.1332, "step": 16355 }, { "epoch": 1.1115640712053267, "grad_norm": 2.0067574977874756, "learning_rate": 8.610969561081668e-05, "loss": 2.9857, "step": 16360 }, { "epoch": 1.1119037912759886, "grad_norm": 2.110568046569824, "learning_rate": 8.610544910993342e-05, "loss": 3.125, "step": 16365 }, { "epoch": 1.1122435113466504, "grad_norm": 2.5426957607269287, "learning_rate": 8.610120260905015e-05, "loss": 3.1039, "step": 16370 }, { "epoch": 1.112583231417312, "grad_norm": 1.7200394868850708, "learning_rate": 8.609695610816687e-05, "loss": 2.9743, "step": 16375 }, { "epoch": 1.112922951487974, "grad_norm": 2.3359487056732178, "learning_rate": 8.60927096072836e-05, "loss": 3.1945, "step": 16380 }, { "epoch": 1.1132626715586358, "grad_norm": 2.1111721992492676, "learning_rate": 8.608846310640033e-05, "loss": 3.2685, "step": 16385 }, { "epoch": 1.1136023916292974, "grad_norm": 2.470550060272217, "learning_rate": 8.608421660551705e-05, "loss": 3.3793, "step": 16390 }, { "epoch": 1.1139421116999593, "grad_norm": 2.086550712585449, "learning_rate": 8.607997010463379e-05, "loss": 3.2846, "step": 16395 }, { "epoch": 1.114281831770621, "grad_norm": 2.1649057865142822, "learning_rate": 8.607572360375052e-05, "loss": 3.0462, "step": 16400 }, { "epoch": 1.1146215518412828, "grad_norm": 2.367246389389038, "learning_rate": 8.607147710286723e-05, "loss": 3.1444, "step": 16405 }, { "epoch": 1.1149612719119446, "grad_norm": 2.2998857498168945, "learning_rate": 8.606723060198397e-05, "loss": 3.2317, "step": 16410 }, { "epoch": 1.1153009919826062, "grad_norm": 2.385794162750244, "learning_rate": 8.60629841011007e-05, "loss": 3.1187, "step": 16415 }, { "epoch": 1.115640712053268, "grad_norm": 2.265432596206665, "learning_rate": 8.605873760021742e-05, "loss": 3.2452, "step": 16420 }, { "epoch": 1.11598043212393, "grad_norm": 1.877649188041687, "learning_rate": 8.605449109933416e-05, "loss": 2.9557, "step": 16425 }, { "epoch": 1.1163201521945916, "grad_norm": 2.5823593139648438, "learning_rate": 8.605024459845087e-05, "loss": 3.2503, "step": 16430 }, { "epoch": 1.1166598722652534, "grad_norm": 3.100353479385376, "learning_rate": 8.60459980975676e-05, "loss": 3.093, "step": 16435 }, { "epoch": 1.1169995923359153, "grad_norm": 2.3010990619659424, "learning_rate": 8.604175159668434e-05, "loss": 3.18, "step": 16440 }, { "epoch": 1.117339312406577, "grad_norm": 2.706552028656006, "learning_rate": 8.603750509580106e-05, "loss": 3.0878, "step": 16445 }, { "epoch": 1.1176790324772388, "grad_norm": 2.202302932739258, "learning_rate": 8.603325859491779e-05, "loss": 3.4724, "step": 16450 }, { "epoch": 1.1180187525479006, "grad_norm": 2.3082780838012695, "learning_rate": 8.602901209403453e-05, "loss": 3.1427, "step": 16455 }, { "epoch": 1.1183584726185622, "grad_norm": 2.7347395420074463, "learning_rate": 8.60256148933279e-05, "loss": 3.1853, "step": 16460 }, { "epoch": 1.118698192689224, "grad_norm": 2.2122974395751953, "learning_rate": 8.602136839244464e-05, "loss": 3.3377, "step": 16465 }, { "epoch": 1.119037912759886, "grad_norm": 2.7581090927124023, "learning_rate": 8.601712189156137e-05, "loss": 2.9068, "step": 16470 }, { "epoch": 1.1193776328305476, "grad_norm": 1.8623926639556885, "learning_rate": 8.601287539067808e-05, "loss": 3.2701, "step": 16475 }, { "epoch": 1.1197173529012094, "grad_norm": 2.0706443786621094, "learning_rate": 8.600862888979482e-05, "loss": 3.2068, "step": 16480 }, { "epoch": 1.1200570729718713, "grad_norm": 2.152874708175659, "learning_rate": 8.600438238891154e-05, "loss": 3.0626, "step": 16485 }, { "epoch": 1.120396793042533, "grad_norm": 2.053597927093506, "learning_rate": 8.600013588802826e-05, "loss": 3.1452, "step": 16490 }, { "epoch": 1.1207365131131948, "grad_norm": 2.2044103145599365, "learning_rate": 8.5995889387145e-05, "loss": 3.224, "step": 16495 }, { "epoch": 1.1210762331838564, "grad_norm": 2.230273723602295, "learning_rate": 8.599164288626172e-05, "loss": 3.1447, "step": 16500 }, { "epoch": 1.1214159532545183, "grad_norm": 2.198505401611328, "learning_rate": 8.598739638537845e-05, "loss": 3.1565, "step": 16505 }, { "epoch": 1.1217556733251801, "grad_norm": 2.218946695327759, "learning_rate": 8.598314988449519e-05, "loss": 3.084, "step": 16510 }, { "epoch": 1.1220953933958417, "grad_norm": 2.240658760070801, "learning_rate": 8.59789033836119e-05, "loss": 3.1794, "step": 16515 }, { "epoch": 1.1224351134665036, "grad_norm": 2.132720708847046, "learning_rate": 8.597465688272863e-05, "loss": 3.2455, "step": 16520 }, { "epoch": 1.1227748335371655, "grad_norm": 2.004615545272827, "learning_rate": 8.597041038184537e-05, "loss": 2.9868, "step": 16525 }, { "epoch": 1.123114553607827, "grad_norm": 2.115450620651245, "learning_rate": 8.596616388096209e-05, "loss": 3.0763, "step": 16530 }, { "epoch": 1.123454273678489, "grad_norm": 2.229064702987671, "learning_rate": 8.596191738007882e-05, "loss": 3.0351, "step": 16535 }, { "epoch": 1.1237939937491508, "grad_norm": 2.4399330615997314, "learning_rate": 8.595767087919556e-05, "loss": 3.3476, "step": 16540 }, { "epoch": 1.1241337138198124, "grad_norm": 2.296751022338867, "learning_rate": 8.595342437831227e-05, "loss": 3.2091, "step": 16545 }, { "epoch": 1.1244734338904743, "grad_norm": 2.415126085281372, "learning_rate": 8.5949177877429e-05, "loss": 3.1151, "step": 16550 }, { "epoch": 1.1248131539611361, "grad_norm": 2.2919363975524902, "learning_rate": 8.594493137654573e-05, "loss": 3.2003, "step": 16555 }, { "epoch": 1.1251528740317978, "grad_norm": 2.525120258331299, "learning_rate": 8.594068487566246e-05, "loss": 3.218, "step": 16560 }, { "epoch": 1.1254925941024596, "grad_norm": 2.6710808277130127, "learning_rate": 8.593643837477918e-05, "loss": 3.2876, "step": 16565 }, { "epoch": 1.1258323141731212, "grad_norm": 2.4441282749176025, "learning_rate": 8.593219187389591e-05, "loss": 3.0996, "step": 16570 }, { "epoch": 1.126172034243783, "grad_norm": 2.353207588195801, "learning_rate": 8.592794537301264e-05, "loss": 3.2753, "step": 16575 }, { "epoch": 1.126511754314445, "grad_norm": 2.584679126739502, "learning_rate": 8.592369887212937e-05, "loss": 3.2511, "step": 16580 }, { "epoch": 1.1268514743851066, "grad_norm": 1.8293663263320923, "learning_rate": 8.59194523712461e-05, "loss": 3.2099, "step": 16585 }, { "epoch": 1.1271911944557684, "grad_norm": 2.2782256603240967, "learning_rate": 8.591520587036282e-05, "loss": 3.0469, "step": 16590 }, { "epoch": 1.1275309145264303, "grad_norm": 2.7480275630950928, "learning_rate": 8.591095936947955e-05, "loss": 3.3851, "step": 16595 }, { "epoch": 1.127870634597092, "grad_norm": 3.2154648303985596, "learning_rate": 8.590671286859628e-05, "loss": 3.1037, "step": 16600 }, { "epoch": 1.1282103546677538, "grad_norm": 2.23931884765625, "learning_rate": 8.590246636771301e-05, "loss": 2.9678, "step": 16605 }, { "epoch": 1.1285500747384156, "grad_norm": 2.300801992416382, "learning_rate": 8.589821986682974e-05, "loss": 3.1218, "step": 16610 }, { "epoch": 1.1288897948090773, "grad_norm": 2.4723966121673584, "learning_rate": 8.589397336594646e-05, "loss": 3.2169, "step": 16615 }, { "epoch": 1.129229514879739, "grad_norm": 2.23512864112854, "learning_rate": 8.588972686506319e-05, "loss": 3.0868, "step": 16620 }, { "epoch": 1.129569234950401, "grad_norm": 2.3879282474517822, "learning_rate": 8.588548036417992e-05, "loss": 3.1783, "step": 16625 }, { "epoch": 1.1299089550210626, "grad_norm": 2.5794687271118164, "learning_rate": 8.588123386329665e-05, "loss": 3.1455, "step": 16630 }, { "epoch": 1.1302486750917244, "grad_norm": 2.4697799682617188, "learning_rate": 8.587698736241338e-05, "loss": 3.3277, "step": 16635 }, { "epoch": 1.1305883951623863, "grad_norm": 2.625331401824951, "learning_rate": 8.58727408615301e-05, "loss": 3.2481, "step": 16640 }, { "epoch": 1.130928115233048, "grad_norm": 2.468745708465576, "learning_rate": 8.586849436064683e-05, "loss": 3.0601, "step": 16645 }, { "epoch": 1.1312678353037098, "grad_norm": 2.3153254985809326, "learning_rate": 8.586424785976356e-05, "loss": 3.3684, "step": 16650 }, { "epoch": 1.1316075553743716, "grad_norm": 2.171705484390259, "learning_rate": 8.586000135888029e-05, "loss": 3.0198, "step": 16655 }, { "epoch": 1.1319472754450333, "grad_norm": 2.3118605613708496, "learning_rate": 8.585575485799702e-05, "loss": 3.2327, "step": 16660 }, { "epoch": 1.1322869955156951, "grad_norm": 1.8895362615585327, "learning_rate": 8.585150835711374e-05, "loss": 3.1462, "step": 16665 }, { "epoch": 1.1326267155863567, "grad_norm": 2.0137879848480225, "learning_rate": 8.584726185623047e-05, "loss": 3.1581, "step": 16670 }, { "epoch": 1.1329664356570186, "grad_norm": 2.1412277221679688, "learning_rate": 8.58430153553472e-05, "loss": 3.1931, "step": 16675 }, { "epoch": 1.1333061557276805, "grad_norm": 2.565464735031128, "learning_rate": 8.583876885446393e-05, "loss": 3.0637, "step": 16680 }, { "epoch": 1.133645875798342, "grad_norm": 2.1115612983703613, "learning_rate": 8.583452235358064e-05, "loss": 3.2082, "step": 16685 }, { "epoch": 1.133985595869004, "grad_norm": 2.767141580581665, "learning_rate": 8.583027585269738e-05, "loss": 3.2512, "step": 16690 }, { "epoch": 1.1343253159396658, "grad_norm": 2.1114745140075684, "learning_rate": 8.582602935181411e-05, "loss": 3.1951, "step": 16695 }, { "epoch": 1.1346650360103274, "grad_norm": 2.2073051929473877, "learning_rate": 8.582178285093083e-05, "loss": 2.9713, "step": 16700 }, { "epoch": 1.1350047560809893, "grad_norm": 2.117887020111084, "learning_rate": 8.581753635004757e-05, "loss": 3.2467, "step": 16705 }, { "epoch": 1.1353444761516511, "grad_norm": 2.73413348197937, "learning_rate": 8.58132898491643e-05, "loss": 3.1864, "step": 16710 }, { "epoch": 1.1356841962223128, "grad_norm": 2.1243979930877686, "learning_rate": 8.580904334828101e-05, "loss": 3.1065, "step": 16715 }, { "epoch": 1.1360239162929746, "grad_norm": 2.111412286758423, "learning_rate": 8.580479684739775e-05, "loss": 3.3383, "step": 16720 }, { "epoch": 1.1363636363636362, "grad_norm": 2.344789505004883, "learning_rate": 8.580055034651448e-05, "loss": 3.2155, "step": 16725 }, { "epoch": 1.136703356434298, "grad_norm": 2.131981611251831, "learning_rate": 8.57963038456312e-05, "loss": 3.3473, "step": 16730 }, { "epoch": 1.13704307650496, "grad_norm": 2.2861790657043457, "learning_rate": 8.579205734474794e-05, "loss": 3.3183, "step": 16735 }, { "epoch": 1.1373827965756216, "grad_norm": 2.4504401683807373, "learning_rate": 8.578781084386466e-05, "loss": 3.0199, "step": 16740 }, { "epoch": 1.1377225166462834, "grad_norm": 2.132719039916992, "learning_rate": 8.578356434298139e-05, "loss": 3.1277, "step": 16745 }, { "epoch": 1.1380622367169453, "grad_norm": 2.6674137115478516, "learning_rate": 8.577931784209812e-05, "loss": 3.1785, "step": 16750 }, { "epoch": 1.138401956787607, "grad_norm": 3.072659730911255, "learning_rate": 8.577507134121483e-05, "loss": 3.2915, "step": 16755 }, { "epoch": 1.1387416768582688, "grad_norm": 3.810137987136841, "learning_rate": 8.577082484033158e-05, "loss": 3.0122, "step": 16760 }, { "epoch": 1.1390813969289306, "grad_norm": 1.7937947511672974, "learning_rate": 8.57665783394483e-05, "loss": 3.3811, "step": 16765 }, { "epoch": 1.1394211169995923, "grad_norm": 2.6032357215881348, "learning_rate": 8.576233183856502e-05, "loss": 3.1355, "step": 16770 }, { "epoch": 1.139760837070254, "grad_norm": 2.4091122150421143, "learning_rate": 8.575808533768176e-05, "loss": 3.0669, "step": 16775 }, { "epoch": 1.140100557140916, "grad_norm": 2.0258991718292236, "learning_rate": 8.575383883679849e-05, "loss": 3.0335, "step": 16780 }, { "epoch": 1.1404402772115776, "grad_norm": 2.6693549156188965, "learning_rate": 8.57495923359152e-05, "loss": 3.1867, "step": 16785 }, { "epoch": 1.1407799972822394, "grad_norm": 2.249675750732422, "learning_rate": 8.574534583503194e-05, "loss": 3.4236, "step": 16790 }, { "epoch": 1.1411197173529013, "grad_norm": 2.4057884216308594, "learning_rate": 8.574109933414867e-05, "loss": 2.9178, "step": 16795 }, { "epoch": 1.141459437423563, "grad_norm": 2.2202658653259277, "learning_rate": 8.573685283326539e-05, "loss": 3.1986, "step": 16800 }, { "epoch": 1.1417991574942248, "grad_norm": 2.370149850845337, "learning_rate": 8.573260633238213e-05, "loss": 3.2366, "step": 16805 }, { "epoch": 1.1421388775648866, "grad_norm": 2.285414695739746, "learning_rate": 8.572835983149886e-05, "loss": 3.2788, "step": 16810 }, { "epoch": 1.1424785976355483, "grad_norm": 2.5869669914245605, "learning_rate": 8.572411333061557e-05, "loss": 3.1342, "step": 16815 }, { "epoch": 1.1428183177062101, "grad_norm": 2.224613666534424, "learning_rate": 8.571986682973231e-05, "loss": 2.859, "step": 16820 }, { "epoch": 1.143158037776872, "grad_norm": 2.589503288269043, "learning_rate": 8.571562032884903e-05, "loss": 3.0213, "step": 16825 }, { "epoch": 1.1434977578475336, "grad_norm": 2.192312240600586, "learning_rate": 8.571137382796575e-05, "loss": 3.1909, "step": 16830 }, { "epoch": 1.1438374779181955, "grad_norm": 2.127734899520874, "learning_rate": 8.57071273270825e-05, "loss": 3.2997, "step": 16835 }, { "epoch": 1.144177197988857, "grad_norm": 2.3593618869781494, "learning_rate": 8.570288082619921e-05, "loss": 3.3107, "step": 16840 }, { "epoch": 1.144516918059519, "grad_norm": 2.0043985843658447, "learning_rate": 8.569863432531594e-05, "loss": 3.139, "step": 16845 }, { "epoch": 1.1448566381301808, "grad_norm": 2.446709632873535, "learning_rate": 8.569438782443268e-05, "loss": 3.2184, "step": 16850 }, { "epoch": 1.1451963582008424, "grad_norm": 2.5402939319610596, "learning_rate": 8.56901413235494e-05, "loss": 2.9559, "step": 16855 }, { "epoch": 1.1455360782715043, "grad_norm": 2.082660436630249, "learning_rate": 8.568589482266612e-05, "loss": 3.2496, "step": 16860 }, { "epoch": 1.1458757983421661, "grad_norm": 2.2846643924713135, "learning_rate": 8.568164832178286e-05, "loss": 2.9525, "step": 16865 }, { "epoch": 1.1462155184128278, "grad_norm": 2.511950969696045, "learning_rate": 8.567740182089958e-05, "loss": 3.0708, "step": 16870 }, { "epoch": 1.1465552384834896, "grad_norm": 2.654700994491577, "learning_rate": 8.56731553200163e-05, "loss": 3.3991, "step": 16875 }, { "epoch": 1.1468949585541515, "grad_norm": 2.331679582595825, "learning_rate": 8.566890881913305e-05, "loss": 3.2186, "step": 16880 }, { "epoch": 1.147234678624813, "grad_norm": 2.4103479385375977, "learning_rate": 8.566466231824976e-05, "loss": 3.1998, "step": 16885 }, { "epoch": 1.147574398695475, "grad_norm": 2.4473280906677246, "learning_rate": 8.566041581736649e-05, "loss": 3.224, "step": 16890 }, { "epoch": 1.1479141187661366, "grad_norm": 2.3549439907073975, "learning_rate": 8.565616931648323e-05, "loss": 3.1537, "step": 16895 }, { "epoch": 1.1482538388367984, "grad_norm": 2.334117889404297, "learning_rate": 8.565192281559995e-05, "loss": 2.9916, "step": 16900 }, { "epoch": 1.1485935589074603, "grad_norm": 1.8435026407241821, "learning_rate": 8.564767631471667e-05, "loss": 3.2278, "step": 16905 }, { "epoch": 1.148933278978122, "grad_norm": 3.083643913269043, "learning_rate": 8.56434298138334e-05, "loss": 3.025, "step": 16910 }, { "epoch": 1.1492729990487838, "grad_norm": 2.2554051876068115, "learning_rate": 8.563918331295013e-05, "loss": 3.1764, "step": 16915 }, { "epoch": 1.1496127191194456, "grad_norm": 2.209787130355835, "learning_rate": 8.563493681206686e-05, "loss": 3.0753, "step": 16920 }, { "epoch": 1.1499524391901073, "grad_norm": 4.730804443359375, "learning_rate": 8.563069031118359e-05, "loss": 3.0808, "step": 16925 }, { "epoch": 1.150292159260769, "grad_norm": 2.500304937362671, "learning_rate": 8.562644381030031e-05, "loss": 3.1797, "step": 16930 }, { "epoch": 1.150631879331431, "grad_norm": 2.559262275695801, "learning_rate": 8.562219730941704e-05, "loss": 3.0817, "step": 16935 }, { "epoch": 1.1509715994020926, "grad_norm": 2.0849416255950928, "learning_rate": 8.561795080853377e-05, "loss": 3.2532, "step": 16940 }, { "epoch": 1.1513113194727544, "grad_norm": 2.04194712638855, "learning_rate": 8.56137043076505e-05, "loss": 2.9838, "step": 16945 }, { "epoch": 1.1516510395434163, "grad_norm": 2.3993418216705322, "learning_rate": 8.560945780676723e-05, "loss": 3.1991, "step": 16950 }, { "epoch": 1.151990759614078, "grad_norm": 1.8043920993804932, "learning_rate": 8.560521130588395e-05, "loss": 2.9373, "step": 16955 }, { "epoch": 1.1523304796847398, "grad_norm": 2.1885170936584473, "learning_rate": 8.560096480500068e-05, "loss": 2.9557, "step": 16960 }, { "epoch": 1.1526701997554016, "grad_norm": 2.1039299964904785, "learning_rate": 8.559671830411741e-05, "loss": 3.0931, "step": 16965 }, { "epoch": 1.1530099198260633, "grad_norm": 2.7661192417144775, "learning_rate": 8.559247180323414e-05, "loss": 3.1221, "step": 16970 }, { "epoch": 1.1533496398967251, "grad_norm": 2.545485258102417, "learning_rate": 8.558822530235087e-05, "loss": 3.0511, "step": 16975 }, { "epoch": 1.153689359967387, "grad_norm": 2.4748198986053467, "learning_rate": 8.55839788014676e-05, "loss": 3.113, "step": 16980 }, { "epoch": 1.1540290800380486, "grad_norm": 2.437826156616211, "learning_rate": 8.557973230058432e-05, "loss": 3.2185, "step": 16985 }, { "epoch": 1.1543688001087105, "grad_norm": 2.1418001651763916, "learning_rate": 8.557548579970105e-05, "loss": 2.956, "step": 16990 }, { "epoch": 1.1547085201793723, "grad_norm": 2.22198224067688, "learning_rate": 8.557123929881778e-05, "loss": 3.1626, "step": 16995 }, { "epoch": 1.155048240250034, "grad_norm": 2.625210762023926, "learning_rate": 8.556699279793451e-05, "loss": 3.1484, "step": 17000 }, { "epoch": 1.1553879603206958, "grad_norm": 2.673679828643799, "learning_rate": 8.556274629705123e-05, "loss": 3.1328, "step": 17005 }, { "epoch": 1.1557276803913574, "grad_norm": 2.514819860458374, "learning_rate": 8.555849979616796e-05, "loss": 3.1742, "step": 17010 }, { "epoch": 1.1560674004620193, "grad_norm": 2.2032127380371094, "learning_rate": 8.555425329528469e-05, "loss": 3.1751, "step": 17015 }, { "epoch": 1.1564071205326811, "grad_norm": 1.7072609663009644, "learning_rate": 8.555000679440142e-05, "loss": 3.2175, "step": 17020 }, { "epoch": 1.1567468406033428, "grad_norm": 2.5565409660339355, "learning_rate": 8.554576029351813e-05, "loss": 3.3605, "step": 17025 }, { "epoch": 1.1570865606740046, "grad_norm": 2.6628568172454834, "learning_rate": 8.554151379263487e-05, "loss": 3.0904, "step": 17030 }, { "epoch": 1.1574262807446665, "grad_norm": 2.65402889251709, "learning_rate": 8.55372672917516e-05, "loss": 3.0857, "step": 17035 }, { "epoch": 1.157766000815328, "grad_norm": 2.5074262619018555, "learning_rate": 8.553302079086832e-05, "loss": 2.9191, "step": 17040 }, { "epoch": 1.15810572088599, "grad_norm": 1.8939683437347412, "learning_rate": 8.552877428998506e-05, "loss": 3.3389, "step": 17045 }, { "epoch": 1.1584454409566518, "grad_norm": 2.3318254947662354, "learning_rate": 8.552452778910179e-05, "loss": 2.9196, "step": 17050 }, { "epoch": 1.1587851610273134, "grad_norm": 2.0132625102996826, "learning_rate": 8.55202812882185e-05, "loss": 3.2216, "step": 17055 }, { "epoch": 1.1591248810979753, "grad_norm": 1.9142305850982666, "learning_rate": 8.551603478733524e-05, "loss": 3.1704, "step": 17060 }, { "epoch": 1.159464601168637, "grad_norm": 1.8424546718597412, "learning_rate": 8.551178828645197e-05, "loss": 3.2671, "step": 17065 }, { "epoch": 1.1598043212392988, "grad_norm": 2.1203792095184326, "learning_rate": 8.550754178556869e-05, "loss": 3.1002, "step": 17070 }, { "epoch": 1.1601440413099606, "grad_norm": 2.369992733001709, "learning_rate": 8.550329528468543e-05, "loss": 3.281, "step": 17075 }, { "epoch": 1.1604837613806223, "grad_norm": 2.1601762771606445, "learning_rate": 8.549904878380216e-05, "loss": 2.9115, "step": 17080 }, { "epoch": 1.1608234814512841, "grad_norm": 2.275728464126587, "learning_rate": 8.549480228291888e-05, "loss": 3.0631, "step": 17085 }, { "epoch": 1.161163201521946, "grad_norm": 2.109595537185669, "learning_rate": 8.549055578203561e-05, "loss": 3.1239, "step": 17090 }, { "epoch": 1.1615029215926076, "grad_norm": 2.128661632537842, "learning_rate": 8.548630928115234e-05, "loss": 3.1442, "step": 17095 }, { "epoch": 1.1618426416632694, "grad_norm": 2.3789777755737305, "learning_rate": 8.548206278026907e-05, "loss": 3.0954, "step": 17100 }, { "epoch": 1.1621823617339313, "grad_norm": 2.137110710144043, "learning_rate": 8.54778162793858e-05, "loss": 3.1697, "step": 17105 }, { "epoch": 1.162522081804593, "grad_norm": 1.9250047206878662, "learning_rate": 8.547356977850251e-05, "loss": 3.0934, "step": 17110 }, { "epoch": 1.1628618018752548, "grad_norm": 2.3185181617736816, "learning_rate": 8.546932327761925e-05, "loss": 2.9383, "step": 17115 }, { "epoch": 1.1632015219459166, "grad_norm": 2.4098916053771973, "learning_rate": 8.546507677673598e-05, "loss": 3.1652, "step": 17120 }, { "epoch": 1.1635412420165783, "grad_norm": 1.968785047531128, "learning_rate": 8.54608302758527e-05, "loss": 2.9564, "step": 17125 }, { "epoch": 1.1638809620872401, "grad_norm": 1.872333288192749, "learning_rate": 8.545658377496944e-05, "loss": 3.4779, "step": 17130 }, { "epoch": 1.164220682157902, "grad_norm": 2.3156728744506836, "learning_rate": 8.545233727408616e-05, "loss": 2.9883, "step": 17135 }, { "epoch": 1.1645604022285636, "grad_norm": 2.2558436393737793, "learning_rate": 8.544809077320288e-05, "loss": 3.2636, "step": 17140 }, { "epoch": 1.1649001222992255, "grad_norm": 2.6535086631774902, "learning_rate": 8.544384427231962e-05, "loss": 3.1672, "step": 17145 }, { "epoch": 1.1652398423698873, "grad_norm": 2.6773743629455566, "learning_rate": 8.543959777143635e-05, "loss": 3.2249, "step": 17150 }, { "epoch": 1.165579562440549, "grad_norm": 1.7836766242980957, "learning_rate": 8.543535127055306e-05, "loss": 3.114, "step": 17155 }, { "epoch": 1.1659192825112108, "grad_norm": 2.1389663219451904, "learning_rate": 8.54311047696698e-05, "loss": 3.3641, "step": 17160 }, { "epoch": 1.1662590025818727, "grad_norm": 2.6542718410491943, "learning_rate": 8.542685826878653e-05, "loss": 3.1683, "step": 17165 }, { "epoch": 1.1665987226525343, "grad_norm": 1.8113877773284912, "learning_rate": 8.542261176790325e-05, "loss": 2.9448, "step": 17170 }, { "epoch": 1.1669384427231961, "grad_norm": 2.655492067337036, "learning_rate": 8.541836526701999e-05, "loss": 3.3364, "step": 17175 }, { "epoch": 1.1672781627938578, "grad_norm": 2.340226411819458, "learning_rate": 8.54141187661367e-05, "loss": 3.3013, "step": 17180 }, { "epoch": 1.1676178828645196, "grad_norm": 2.045069932937622, "learning_rate": 8.540987226525343e-05, "loss": 3.2449, "step": 17185 }, { "epoch": 1.1679576029351815, "grad_norm": 2.474536180496216, "learning_rate": 8.540562576437017e-05, "loss": 2.9656, "step": 17190 }, { "epoch": 1.168297323005843, "grad_norm": 3.101227283477783, "learning_rate": 8.540137926348689e-05, "loss": 3.0324, "step": 17195 }, { "epoch": 1.168637043076505, "grad_norm": 2.74116587638855, "learning_rate": 8.539713276260361e-05, "loss": 3.3847, "step": 17200 }, { "epoch": 1.1689767631471668, "grad_norm": 2.042508602142334, "learning_rate": 8.539288626172036e-05, "loss": 3.1468, "step": 17205 }, { "epoch": 1.1693164832178284, "grad_norm": 2.0567827224731445, "learning_rate": 8.538863976083707e-05, "loss": 3.0906, "step": 17210 }, { "epoch": 1.1696562032884903, "grad_norm": 2.2396318912506104, "learning_rate": 8.53843932599538e-05, "loss": 3.1771, "step": 17215 }, { "epoch": 1.1699959233591521, "grad_norm": 1.6931891441345215, "learning_rate": 8.538014675907054e-05, "loss": 3.2719, "step": 17220 }, { "epoch": 1.1703356434298138, "grad_norm": 2.093397617340088, "learning_rate": 8.537590025818725e-05, "loss": 2.9152, "step": 17225 }, { "epoch": 1.1706753635004756, "grad_norm": 2.667349100112915, "learning_rate": 8.537165375730398e-05, "loss": 3.1451, "step": 17230 }, { "epoch": 1.1710150835711373, "grad_norm": 2.143707275390625, "learning_rate": 8.536740725642072e-05, "loss": 2.8677, "step": 17235 }, { "epoch": 1.1713548036417991, "grad_norm": 2.3816161155700684, "learning_rate": 8.536316075553744e-05, "loss": 2.7687, "step": 17240 }, { "epoch": 1.171694523712461, "grad_norm": 2.2845377922058105, "learning_rate": 8.535891425465417e-05, "loss": 3.1063, "step": 17245 }, { "epoch": 1.1720342437831226, "grad_norm": 2.644127607345581, "learning_rate": 8.535466775377091e-05, "loss": 3.1788, "step": 17250 }, { "epoch": 1.1723739638537845, "grad_norm": 2.554811954498291, "learning_rate": 8.535042125288762e-05, "loss": 3.2802, "step": 17255 }, { "epoch": 1.1727136839244463, "grad_norm": 2.06567645072937, "learning_rate": 8.534617475200435e-05, "loss": 3.2183, "step": 17260 }, { "epoch": 1.173053403995108, "grad_norm": 2.3143064975738525, "learning_rate": 8.534192825112108e-05, "loss": 3.1405, "step": 17265 }, { "epoch": 1.1733931240657698, "grad_norm": 2.3943819999694824, "learning_rate": 8.53376817502378e-05, "loss": 3.2057, "step": 17270 }, { "epoch": 1.1737328441364316, "grad_norm": 2.074068784713745, "learning_rate": 8.533343524935453e-05, "loss": 2.9001, "step": 17275 }, { "epoch": 1.1740725642070933, "grad_norm": 2.9090402126312256, "learning_rate": 8.532918874847126e-05, "loss": 3.3257, "step": 17280 }, { "epoch": 1.1744122842777551, "grad_norm": 2.402426242828369, "learning_rate": 8.532494224758799e-05, "loss": 3.1006, "step": 17285 }, { "epoch": 1.174752004348417, "grad_norm": 2.293633460998535, "learning_rate": 8.532069574670472e-05, "loss": 3.1106, "step": 17290 }, { "epoch": 1.1750917244190786, "grad_norm": 2.5674209594726562, "learning_rate": 8.531644924582145e-05, "loss": 3.2722, "step": 17295 }, { "epoch": 1.1754314444897405, "grad_norm": 2.1805107593536377, "learning_rate": 8.531220274493817e-05, "loss": 3.3079, "step": 17300 }, { "epoch": 1.1757711645604023, "grad_norm": 2.1323251724243164, "learning_rate": 8.53079562440549e-05, "loss": 3.3061, "step": 17305 }, { "epoch": 1.176110884631064, "grad_norm": 2.791619300842285, "learning_rate": 8.530370974317163e-05, "loss": 3.2324, "step": 17310 }, { "epoch": 1.1764506047017258, "grad_norm": 1.8311772346496582, "learning_rate": 8.529946324228836e-05, "loss": 3.0768, "step": 17315 }, { "epoch": 1.1767903247723877, "grad_norm": 2.3822219371795654, "learning_rate": 8.529521674140509e-05, "loss": 3.3545, "step": 17320 }, { "epoch": 1.1771300448430493, "grad_norm": 2.6983091831207275, "learning_rate": 8.529097024052181e-05, "loss": 3.1856, "step": 17325 }, { "epoch": 1.1774697649137111, "grad_norm": 2.0249528884887695, "learning_rate": 8.528672373963854e-05, "loss": 3.4715, "step": 17330 }, { "epoch": 1.177809484984373, "grad_norm": 1.8609148263931274, "learning_rate": 8.528247723875527e-05, "loss": 3.2364, "step": 17335 }, { "epoch": 1.1781492050550346, "grad_norm": 2.763221263885498, "learning_rate": 8.5278230737872e-05, "loss": 3.1754, "step": 17340 }, { "epoch": 1.1784889251256965, "grad_norm": 2.6841585636138916, "learning_rate": 8.527398423698873e-05, "loss": 3.084, "step": 17345 }, { "epoch": 1.178828645196358, "grad_norm": 2.157466411590576, "learning_rate": 8.526973773610545e-05, "loss": 3.0322, "step": 17350 }, { "epoch": 1.17916836526702, "grad_norm": 2.205597400665283, "learning_rate": 8.526549123522218e-05, "loss": 3.0102, "step": 17355 }, { "epoch": 1.1795080853376818, "grad_norm": 2.634817361831665, "learning_rate": 8.526124473433891e-05, "loss": 3.3672, "step": 17360 }, { "epoch": 1.1798478054083434, "grad_norm": 2.8805477619171143, "learning_rate": 8.525699823345564e-05, "loss": 2.9156, "step": 17365 }, { "epoch": 1.1801875254790053, "grad_norm": 2.046644687652588, "learning_rate": 8.525275173257237e-05, "loss": 3.0305, "step": 17370 }, { "epoch": 1.1805272455496671, "grad_norm": 2.229313373565674, "learning_rate": 8.52485052316891e-05, "loss": 3.1037, "step": 17375 }, { "epoch": 1.1808669656203288, "grad_norm": 2.0023913383483887, "learning_rate": 8.524425873080581e-05, "loss": 3.1887, "step": 17380 }, { "epoch": 1.1812066856909906, "grad_norm": 2.0016255378723145, "learning_rate": 8.524001222992255e-05, "loss": 2.9121, "step": 17385 }, { "epoch": 1.1815464057616525, "grad_norm": 2.5384511947631836, "learning_rate": 8.523576572903928e-05, "loss": 3.1855, "step": 17390 }, { "epoch": 1.1818861258323141, "grad_norm": 2.8584842681884766, "learning_rate": 8.523151922815599e-05, "loss": 3.061, "step": 17395 }, { "epoch": 1.182225845902976, "grad_norm": 2.3286919593811035, "learning_rate": 8.522727272727273e-05, "loss": 2.9993, "step": 17400 }, { "epoch": 1.1825655659736376, "grad_norm": 2.467280387878418, "learning_rate": 8.522302622638946e-05, "loss": 3.028, "step": 17405 }, { "epoch": 1.1829052860442995, "grad_norm": 2.7928664684295654, "learning_rate": 8.521877972550618e-05, "loss": 3.2665, "step": 17410 }, { "epoch": 1.1832450061149613, "grad_norm": 2.1814064979553223, "learning_rate": 8.521453322462292e-05, "loss": 3.0085, "step": 17415 }, { "epoch": 1.183584726185623, "grad_norm": 2.075307607650757, "learning_rate": 8.521028672373965e-05, "loss": 3.0652, "step": 17420 }, { "epoch": 1.1839244462562848, "grad_norm": 2.1006062030792236, "learning_rate": 8.520604022285637e-05, "loss": 3.0541, "step": 17425 }, { "epoch": 1.1842641663269466, "grad_norm": 2.186474561691284, "learning_rate": 8.52017937219731e-05, "loss": 3.2293, "step": 17430 }, { "epoch": 1.1846038863976083, "grad_norm": 2.180755138397217, "learning_rate": 8.519754722108983e-05, "loss": 3.4418, "step": 17435 }, { "epoch": 1.1849436064682701, "grad_norm": 2.785910129547119, "learning_rate": 8.519330072020656e-05, "loss": 2.9607, "step": 17440 }, { "epoch": 1.185283326538932, "grad_norm": 2.3607475757598877, "learning_rate": 8.518905421932329e-05, "loss": 2.9277, "step": 17445 }, { "epoch": 1.1856230466095936, "grad_norm": 2.3492658138275146, "learning_rate": 8.518480771844001e-05, "loss": 3.0135, "step": 17450 }, { "epoch": 1.1859627666802555, "grad_norm": 2.4335687160491943, "learning_rate": 8.518056121755674e-05, "loss": 3.323, "step": 17455 }, { "epoch": 1.1863024867509173, "grad_norm": 1.8220953941345215, "learning_rate": 8.517631471667347e-05, "loss": 3.1973, "step": 17460 }, { "epoch": 1.186642206821579, "grad_norm": 2.550981044769287, "learning_rate": 8.517206821579018e-05, "loss": 3.0824, "step": 17465 }, { "epoch": 1.1869819268922408, "grad_norm": 2.202667713165283, "learning_rate": 8.516782171490693e-05, "loss": 3.2676, "step": 17470 }, { "epoch": 1.1873216469629027, "grad_norm": 2.4011387825012207, "learning_rate": 8.516357521402365e-05, "loss": 2.7202, "step": 17475 }, { "epoch": 1.1876613670335643, "grad_norm": 2.407180070877075, "learning_rate": 8.515932871314037e-05, "loss": 3.2158, "step": 17480 }, { "epoch": 1.1880010871042261, "grad_norm": 2.432288646697998, "learning_rate": 8.515508221225711e-05, "loss": 3.1782, "step": 17485 }, { "epoch": 1.188340807174888, "grad_norm": 2.4256417751312256, "learning_rate": 8.515083571137384e-05, "loss": 2.8283, "step": 17490 }, { "epoch": 1.1886805272455496, "grad_norm": 1.9922828674316406, "learning_rate": 8.514658921049055e-05, "loss": 2.9442, "step": 17495 }, { "epoch": 1.1890202473162115, "grad_norm": 2.1461081504821777, "learning_rate": 8.51423427096073e-05, "loss": 3.0157, "step": 17500 }, { "epoch": 1.1893599673868733, "grad_norm": 2.4359195232391357, "learning_rate": 8.513809620872402e-05, "loss": 3.0185, "step": 17505 }, { "epoch": 1.189699687457535, "grad_norm": 1.954289197921753, "learning_rate": 8.513384970784074e-05, "loss": 3.112, "step": 17510 }, { "epoch": 1.1900394075281968, "grad_norm": 2.2223775386810303, "learning_rate": 8.512960320695748e-05, "loss": 3.4016, "step": 17515 }, { "epoch": 1.1903791275988584, "grad_norm": 2.257162094116211, "learning_rate": 8.51253567060742e-05, "loss": 3.1134, "step": 17520 }, { "epoch": 1.1907188476695203, "grad_norm": 3.334343433380127, "learning_rate": 8.512111020519092e-05, "loss": 3.1491, "step": 17525 }, { "epoch": 1.1910585677401822, "grad_norm": 2.1623353958129883, "learning_rate": 8.511686370430766e-05, "loss": 3.1408, "step": 17530 }, { "epoch": 1.1913982878108438, "grad_norm": 2.2434475421905518, "learning_rate": 8.511261720342438e-05, "loss": 3.2626, "step": 17535 }, { "epoch": 1.1917380078815056, "grad_norm": 2.358820915222168, "learning_rate": 8.51083707025411e-05, "loss": 2.9978, "step": 17540 }, { "epoch": 1.1920777279521675, "grad_norm": 2.564138412475586, "learning_rate": 8.510412420165785e-05, "loss": 3.1658, "step": 17545 }, { "epoch": 1.1924174480228291, "grad_norm": 2.3039612770080566, "learning_rate": 8.509987770077456e-05, "loss": 3.2348, "step": 17550 }, { "epoch": 1.192757168093491, "grad_norm": 2.981652021408081, "learning_rate": 8.509563119989129e-05, "loss": 3.1044, "step": 17555 }, { "epoch": 1.1930968881641528, "grad_norm": 2.394303321838379, "learning_rate": 8.509138469900803e-05, "loss": 3.1496, "step": 17560 }, { "epoch": 1.1934366082348145, "grad_norm": 2.3956921100616455, "learning_rate": 8.508713819812474e-05, "loss": 2.9078, "step": 17565 }, { "epoch": 1.1937763283054763, "grad_norm": 2.466205358505249, "learning_rate": 8.508289169724147e-05, "loss": 3.2642, "step": 17570 }, { "epoch": 1.194116048376138, "grad_norm": 2.8344693183898926, "learning_rate": 8.507864519635821e-05, "loss": 2.8435, "step": 17575 }, { "epoch": 1.1944557684467998, "grad_norm": 1.9532610177993774, "learning_rate": 8.507439869547493e-05, "loss": 3.1643, "step": 17580 }, { "epoch": 1.1947954885174616, "grad_norm": 2.789651393890381, "learning_rate": 8.507015219459166e-05, "loss": 3.2273, "step": 17585 }, { "epoch": 1.1951352085881233, "grad_norm": 2.4625720977783203, "learning_rate": 8.50659056937084e-05, "loss": 3.3086, "step": 17590 }, { "epoch": 1.1954749286587851, "grad_norm": 2.0927586555480957, "learning_rate": 8.506165919282511e-05, "loss": 3.3535, "step": 17595 }, { "epoch": 1.195814648729447, "grad_norm": 1.8827426433563232, "learning_rate": 8.505741269194184e-05, "loss": 3.1169, "step": 17600 }, { "epoch": 1.1961543688001086, "grad_norm": 2.2182226181030273, "learning_rate": 8.505316619105857e-05, "loss": 2.9344, "step": 17605 }, { "epoch": 1.1964940888707705, "grad_norm": 2.1034176349639893, "learning_rate": 8.50489196901753e-05, "loss": 2.7258, "step": 17610 }, { "epoch": 1.1968338089414323, "grad_norm": 2.468949317932129, "learning_rate": 8.504467318929202e-05, "loss": 3.1744, "step": 17615 }, { "epoch": 1.197173529012094, "grad_norm": 2.333782911300659, "learning_rate": 8.504042668840875e-05, "loss": 3.0918, "step": 17620 }, { "epoch": 1.1975132490827558, "grad_norm": 2.36661434173584, "learning_rate": 8.503618018752548e-05, "loss": 2.8428, "step": 17625 }, { "epoch": 1.1978529691534177, "grad_norm": 2.2041683197021484, "learning_rate": 8.503193368664221e-05, "loss": 3.1057, "step": 17630 }, { "epoch": 1.1981926892240793, "grad_norm": 2.190769910812378, "learning_rate": 8.502768718575894e-05, "loss": 3.0566, "step": 17635 }, { "epoch": 1.1985324092947411, "grad_norm": 2.398974657058716, "learning_rate": 8.502344068487566e-05, "loss": 3.2764, "step": 17640 }, { "epoch": 1.198872129365403, "grad_norm": 2.3960354328155518, "learning_rate": 8.501919418399239e-05, "loss": 3.0608, "step": 17645 }, { "epoch": 1.1992118494360646, "grad_norm": 2.7395224571228027, "learning_rate": 8.501494768310912e-05, "loss": 3.2048, "step": 17650 }, { "epoch": 1.1995515695067265, "grad_norm": 2.3225252628326416, "learning_rate": 8.501070118222585e-05, "loss": 3.1891, "step": 17655 }, { "epoch": 1.1998912895773883, "grad_norm": 2.2578678131103516, "learning_rate": 8.500645468134258e-05, "loss": 3.263, "step": 17660 }, { "epoch": 1.20023100964805, "grad_norm": 2.4527175426483154, "learning_rate": 8.50022081804593e-05, "loss": 3.2225, "step": 17665 }, { "epoch": 1.2005707297187118, "grad_norm": 2.2852933406829834, "learning_rate": 8.499796167957603e-05, "loss": 3.1584, "step": 17670 }, { "epoch": 1.2009104497893737, "grad_norm": 2.127671957015991, "learning_rate": 8.499371517869276e-05, "loss": 3.1089, "step": 17675 }, { "epoch": 1.2012501698600353, "grad_norm": 1.8248087167739868, "learning_rate": 8.498946867780949e-05, "loss": 2.8879, "step": 17680 }, { "epoch": 1.2015898899306972, "grad_norm": 2.843006134033203, "learning_rate": 8.498522217692622e-05, "loss": 3.2203, "step": 17685 }, { "epoch": 1.2019296100013588, "grad_norm": 1.6448454856872559, "learning_rate": 8.498097567604294e-05, "loss": 3.3998, "step": 17690 }, { "epoch": 1.2022693300720206, "grad_norm": 2.3757848739624023, "learning_rate": 8.497672917515967e-05, "loss": 2.9268, "step": 17695 }, { "epoch": 1.2026090501426825, "grad_norm": 1.7654362916946411, "learning_rate": 8.49724826742764e-05, "loss": 2.8365, "step": 17700 }, { "epoch": 1.2029487702133441, "grad_norm": 2.264331817626953, "learning_rate": 8.496823617339313e-05, "loss": 2.8926, "step": 17705 }, { "epoch": 1.203288490284006, "grad_norm": 2.440425157546997, "learning_rate": 8.496398967250986e-05, "loss": 3.5014, "step": 17710 }, { "epoch": 1.2036282103546678, "grad_norm": 2.3158648014068604, "learning_rate": 8.495974317162658e-05, "loss": 2.9269, "step": 17715 }, { "epoch": 1.2039679304253295, "grad_norm": 1.885067343711853, "learning_rate": 8.495549667074331e-05, "loss": 3.3057, "step": 17720 }, { "epoch": 1.2043076504959913, "grad_norm": 2.4898009300231934, "learning_rate": 8.495125016986004e-05, "loss": 3.2606, "step": 17725 }, { "epoch": 1.2046473705666532, "grad_norm": 2.731964349746704, "learning_rate": 8.494700366897677e-05, "loss": 3.1088, "step": 17730 }, { "epoch": 1.2049870906373148, "grad_norm": 2.74788498878479, "learning_rate": 8.494275716809348e-05, "loss": 3.0655, "step": 17735 }, { "epoch": 1.2053268107079766, "grad_norm": 2.4860711097717285, "learning_rate": 8.493851066721022e-05, "loss": 3.0658, "step": 17740 }, { "epoch": 1.2056665307786383, "grad_norm": 1.8011753559112549, "learning_rate": 8.493426416632695e-05, "loss": 3.1291, "step": 17745 }, { "epoch": 1.2060062508493001, "grad_norm": 2.1408698558807373, "learning_rate": 8.493001766544367e-05, "loss": 3.2729, "step": 17750 }, { "epoch": 1.206345970919962, "grad_norm": 2.7948555946350098, "learning_rate": 8.492577116456041e-05, "loss": 2.8812, "step": 17755 }, { "epoch": 1.2066856909906236, "grad_norm": 2.2286229133605957, "learning_rate": 8.492152466367714e-05, "loss": 3.2149, "step": 17760 }, { "epoch": 1.2070254110612855, "grad_norm": 2.2322006225585938, "learning_rate": 8.491727816279386e-05, "loss": 3.217, "step": 17765 }, { "epoch": 1.2073651311319473, "grad_norm": 2.244314193725586, "learning_rate": 8.491303166191059e-05, "loss": 2.9655, "step": 17770 }, { "epoch": 1.207704851202609, "grad_norm": 2.1061673164367676, "learning_rate": 8.490878516102732e-05, "loss": 3.3165, "step": 17775 }, { "epoch": 1.2080445712732708, "grad_norm": 2.640225648880005, "learning_rate": 8.490453866014405e-05, "loss": 3.1946, "step": 17780 }, { "epoch": 1.2083842913439327, "grad_norm": 2.497861623764038, "learning_rate": 8.490029215926078e-05, "loss": 3.0072, "step": 17785 }, { "epoch": 1.2087240114145943, "grad_norm": 2.195460557937622, "learning_rate": 8.48960456583775e-05, "loss": 3.4614, "step": 17790 }, { "epoch": 1.2090637314852561, "grad_norm": 2.023413896560669, "learning_rate": 8.489179915749423e-05, "loss": 3.2188, "step": 17795 }, { "epoch": 1.209403451555918, "grad_norm": 2.388948440551758, "learning_rate": 8.488755265661096e-05, "loss": 3.2555, "step": 17800 }, { "epoch": 1.2097431716265796, "grad_norm": 2.6501822471618652, "learning_rate": 8.488330615572768e-05, "loss": 2.9324, "step": 17805 }, { "epoch": 1.2100828916972415, "grad_norm": 3.172443389892578, "learning_rate": 8.487905965484442e-05, "loss": 3.0715, "step": 17810 }, { "epoch": 1.2104226117679033, "grad_norm": 3.1303582191467285, "learning_rate": 8.487481315396115e-05, "loss": 3.3469, "step": 17815 }, { "epoch": 1.210762331838565, "grad_norm": 2.0062291622161865, "learning_rate": 8.487056665307786e-05, "loss": 3.2175, "step": 17820 }, { "epoch": 1.2111020519092268, "grad_norm": 3.216390609741211, "learning_rate": 8.48663201521946e-05, "loss": 3.2397, "step": 17825 }, { "epoch": 1.2114417719798887, "grad_norm": 1.8768597841262817, "learning_rate": 8.486207365131133e-05, "loss": 3.3119, "step": 17830 }, { "epoch": 1.2117814920505503, "grad_norm": 2.5123848915100098, "learning_rate": 8.485782715042804e-05, "loss": 3.141, "step": 17835 }, { "epoch": 1.2121212121212122, "grad_norm": 2.539381265640259, "learning_rate": 8.485358064954479e-05, "loss": 3.2911, "step": 17840 }, { "epoch": 1.212460932191874, "grad_norm": 2.222728967666626, "learning_rate": 8.484933414866151e-05, "loss": 3.2865, "step": 17845 }, { "epoch": 1.2128006522625356, "grad_norm": 2.7850637435913086, "learning_rate": 8.484508764777823e-05, "loss": 3.1734, "step": 17850 }, { "epoch": 1.2131403723331975, "grad_norm": 2.801698684692383, "learning_rate": 8.484084114689497e-05, "loss": 3.2408, "step": 17855 }, { "epoch": 1.2134800924038591, "grad_norm": 2.20119571685791, "learning_rate": 8.48365946460117e-05, "loss": 3.1183, "step": 17860 }, { "epoch": 1.213819812474521, "grad_norm": 2.172232151031494, "learning_rate": 8.483234814512841e-05, "loss": 3.3283, "step": 17865 }, { "epoch": 1.2141595325451828, "grad_norm": 2.3587169647216797, "learning_rate": 8.482810164424515e-05, "loss": 2.9588, "step": 17870 }, { "epoch": 1.2144992526158445, "grad_norm": 2.004204511642456, "learning_rate": 8.482385514336188e-05, "loss": 3.3391, "step": 17875 }, { "epoch": 1.2148389726865063, "grad_norm": 2.177614450454712, "learning_rate": 8.48196086424786e-05, "loss": 3.3148, "step": 17880 }, { "epoch": 1.2151786927571682, "grad_norm": 2.337120532989502, "learning_rate": 8.481536214159534e-05, "loss": 2.8841, "step": 17885 }, { "epoch": 1.2155184128278298, "grad_norm": 1.790246844291687, "learning_rate": 8.481111564071205e-05, "loss": 3.3059, "step": 17890 }, { "epoch": 1.2158581328984917, "grad_norm": 1.8437602519989014, "learning_rate": 8.480686913982878e-05, "loss": 3.3762, "step": 17895 }, { "epoch": 1.2161978529691535, "grad_norm": 1.9164321422576904, "learning_rate": 8.480262263894552e-05, "loss": 3.3096, "step": 17900 }, { "epoch": 1.2165375730398151, "grad_norm": 2.5329020023345947, "learning_rate": 8.479837613806224e-05, "loss": 3.2332, "step": 17905 }, { "epoch": 1.216877293110477, "grad_norm": 2.7095813751220703, "learning_rate": 8.479412963717896e-05, "loss": 3.0832, "step": 17910 }, { "epoch": 1.2172170131811386, "grad_norm": 2.4907257556915283, "learning_rate": 8.47898831362957e-05, "loss": 3.134, "step": 17915 }, { "epoch": 1.2175567332518005, "grad_norm": 2.1684517860412598, "learning_rate": 8.478563663541242e-05, "loss": 3.2347, "step": 17920 }, { "epoch": 1.2178964533224623, "grad_norm": 3.918362617492676, "learning_rate": 8.478139013452915e-05, "loss": 3.2723, "step": 17925 }, { "epoch": 1.218236173393124, "grad_norm": 2.401397705078125, "learning_rate": 8.477714363364589e-05, "loss": 2.9748, "step": 17930 }, { "epoch": 1.2185758934637858, "grad_norm": 2.2411012649536133, "learning_rate": 8.47728971327626e-05, "loss": 3.2747, "step": 17935 }, { "epoch": 1.2189156135344477, "grad_norm": 2.9361982345581055, "learning_rate": 8.476865063187933e-05, "loss": 3.0138, "step": 17940 }, { "epoch": 1.2192553336051093, "grad_norm": 2.0158889293670654, "learning_rate": 8.476525343117271e-05, "loss": 3.4261, "step": 17945 }, { "epoch": 1.2195950536757711, "grad_norm": 2.4452991485595703, "learning_rate": 8.476100693028944e-05, "loss": 3.0619, "step": 17950 }, { "epoch": 1.219934773746433, "grad_norm": 2.799466133117676, "learning_rate": 8.475676042940617e-05, "loss": 3.0358, "step": 17955 }, { "epoch": 1.2202744938170946, "grad_norm": 2.563778877258301, "learning_rate": 8.47525139285229e-05, "loss": 3.2123, "step": 17960 }, { "epoch": 1.2206142138877565, "grad_norm": 2.418405294418335, "learning_rate": 8.474826742763963e-05, "loss": 3.1887, "step": 17965 }, { "epoch": 1.2209539339584183, "grad_norm": 2.1202385425567627, "learning_rate": 8.474402092675637e-05, "loss": 3.2072, "step": 17970 }, { "epoch": 1.22129365402908, "grad_norm": 2.5736687183380127, "learning_rate": 8.473977442587308e-05, "loss": 3.2447, "step": 17975 }, { "epoch": 1.2216333740997418, "grad_norm": 1.9829968214035034, "learning_rate": 8.473552792498981e-05, "loss": 3.191, "step": 17980 }, { "epoch": 1.2219730941704037, "grad_norm": 2.413740634918213, "learning_rate": 8.473128142410655e-05, "loss": 2.9798, "step": 17985 }, { "epoch": 1.2223128142410653, "grad_norm": 2.823563575744629, "learning_rate": 8.472703492322327e-05, "loss": 3.2723, "step": 17990 }, { "epoch": 1.2226525343117272, "grad_norm": 1.90080988407135, "learning_rate": 8.472278842234e-05, "loss": 3.2717, "step": 17995 }, { "epoch": 1.222992254382389, "grad_norm": 3.395491123199463, "learning_rate": 8.471854192145672e-05, "loss": 3.0808, "step": 18000 }, { "epoch": 1.2233319744530506, "grad_norm": 2.3716769218444824, "learning_rate": 8.471429542057345e-05, "loss": 3.0945, "step": 18005 }, { "epoch": 1.2236716945237125, "grad_norm": 2.560508966445923, "learning_rate": 8.471004891969018e-05, "loss": 3.21, "step": 18010 }, { "epoch": 1.2240114145943743, "grad_norm": 1.9152395725250244, "learning_rate": 8.47058024188069e-05, "loss": 2.9292, "step": 18015 }, { "epoch": 1.224351134665036, "grad_norm": 2.163419485092163, "learning_rate": 8.470155591792363e-05, "loss": 3.4238, "step": 18020 }, { "epoch": 1.2246908547356978, "grad_norm": 2.2632687091827393, "learning_rate": 8.469730941704036e-05, "loss": 3.0823, "step": 18025 }, { "epoch": 1.2250305748063597, "grad_norm": 2.091890811920166, "learning_rate": 8.469306291615709e-05, "loss": 2.9497, "step": 18030 }, { "epoch": 1.2253702948770213, "grad_norm": 2.1150062084198, "learning_rate": 8.468881641527382e-05, "loss": 3.4516, "step": 18035 }, { "epoch": 1.2257100149476832, "grad_norm": 2.666934013366699, "learning_rate": 8.468456991439055e-05, "loss": 3.3015, "step": 18040 }, { "epoch": 1.2260497350183448, "grad_norm": 2.1368987560272217, "learning_rate": 8.468032341350727e-05, "loss": 3.386, "step": 18045 }, { "epoch": 1.2263894550890067, "grad_norm": 2.170696496963501, "learning_rate": 8.4676076912624e-05, "loss": 3.0157, "step": 18050 }, { "epoch": 1.2267291751596685, "grad_norm": 2.733492136001587, "learning_rate": 8.467183041174073e-05, "loss": 3.1628, "step": 18055 }, { "epoch": 1.2270688952303301, "grad_norm": 2.4373562335968018, "learning_rate": 8.466758391085746e-05, "loss": 2.8124, "step": 18060 }, { "epoch": 1.227408615300992, "grad_norm": 2.012988805770874, "learning_rate": 8.466333740997419e-05, "loss": 3.1222, "step": 18065 }, { "epoch": 1.2277483353716538, "grad_norm": 2.5634098052978516, "learning_rate": 8.465909090909091e-05, "loss": 3.2715, "step": 18070 }, { "epoch": 1.2280880554423155, "grad_norm": 1.778075098991394, "learning_rate": 8.465484440820764e-05, "loss": 3.2871, "step": 18075 }, { "epoch": 1.2284277755129773, "grad_norm": 2.427739381790161, "learning_rate": 8.465059790732437e-05, "loss": 3.0088, "step": 18080 }, { "epoch": 1.228767495583639, "grad_norm": 2.1893632411956787, "learning_rate": 8.46463514064411e-05, "loss": 3.0883, "step": 18085 }, { "epoch": 1.2291072156543008, "grad_norm": 2.6807620525360107, "learning_rate": 8.464210490555783e-05, "loss": 3.1748, "step": 18090 }, { "epoch": 1.2294469357249627, "grad_norm": 2.462125062942505, "learning_rate": 8.463785840467455e-05, "loss": 3.1678, "step": 18095 }, { "epoch": 1.2297866557956243, "grad_norm": 1.9320952892303467, "learning_rate": 8.463361190379128e-05, "loss": 3.3728, "step": 18100 }, { "epoch": 1.2301263758662861, "grad_norm": 2.039095401763916, "learning_rate": 8.462936540290801e-05, "loss": 3.2489, "step": 18105 }, { "epoch": 1.230466095936948, "grad_norm": 2.0925791263580322, "learning_rate": 8.462511890202474e-05, "loss": 3.1002, "step": 18110 }, { "epoch": 1.2308058160076096, "grad_norm": 2.1690471172332764, "learning_rate": 8.462087240114147e-05, "loss": 3.1293, "step": 18115 }, { "epoch": 1.2311455360782715, "grad_norm": 2.407494306564331, "learning_rate": 8.46166259002582e-05, "loss": 3.4324, "step": 18120 }, { "epoch": 1.2314852561489333, "grad_norm": 2.364600419998169, "learning_rate": 8.461237939937492e-05, "loss": 3.0952, "step": 18125 }, { "epoch": 1.231824976219595, "grad_norm": 2.367589235305786, "learning_rate": 8.460813289849164e-05, "loss": 3.0789, "step": 18130 }, { "epoch": 1.2321646962902568, "grad_norm": 2.4406158924102783, "learning_rate": 8.460388639760838e-05, "loss": 3.1882, "step": 18135 }, { "epoch": 1.2325044163609187, "grad_norm": 6.952081203460693, "learning_rate": 8.460048919690176e-05, "loss": 2.9911, "step": 18140 }, { "epoch": 1.2328441364315803, "grad_norm": 2.9780967235565186, "learning_rate": 8.459624269601848e-05, "loss": 3.1506, "step": 18145 }, { "epoch": 1.2331838565022422, "grad_norm": 2.218165159225464, "learning_rate": 8.459199619513522e-05, "loss": 3.2172, "step": 18150 }, { "epoch": 1.233523576572904, "grad_norm": 1.9049497842788696, "learning_rate": 8.458774969425194e-05, "loss": 3.0417, "step": 18155 }, { "epoch": 1.2338632966435656, "grad_norm": 2.3028628826141357, "learning_rate": 8.458350319336866e-05, "loss": 3.1689, "step": 18160 }, { "epoch": 1.2342030167142275, "grad_norm": 3.1826634407043457, "learning_rate": 8.45792566924854e-05, "loss": 2.9722, "step": 18165 }, { "epoch": 1.2345427367848893, "grad_norm": 2.8784751892089844, "learning_rate": 8.457501019160212e-05, "loss": 3.0849, "step": 18170 }, { "epoch": 1.234882456855551, "grad_norm": 2.3540570735931396, "learning_rate": 8.457076369071886e-05, "loss": 3.1305, "step": 18175 }, { "epoch": 1.2352221769262128, "grad_norm": 2.93977952003479, "learning_rate": 8.456651718983558e-05, "loss": 3.0674, "step": 18180 }, { "epoch": 1.2355618969968747, "grad_norm": 2.422358751296997, "learning_rate": 8.45622706889523e-05, "loss": 3.1472, "step": 18185 }, { "epoch": 1.2359016170675363, "grad_norm": 1.978877067565918, "learning_rate": 8.455802418806904e-05, "loss": 3.1292, "step": 18190 }, { "epoch": 1.2362413371381982, "grad_norm": 2.0296530723571777, "learning_rate": 8.455377768718577e-05, "loss": 3.0947, "step": 18195 }, { "epoch": 1.23658105720886, "grad_norm": 2.2050564289093018, "learning_rate": 8.454953118630248e-05, "loss": 3.1433, "step": 18200 }, { "epoch": 1.2369207772795217, "grad_norm": 2.254584312438965, "learning_rate": 8.454528468541922e-05, "loss": 2.9938, "step": 18205 }, { "epoch": 1.2372604973501835, "grad_norm": 2.5608925819396973, "learning_rate": 8.454103818453595e-05, "loss": 3.1343, "step": 18210 }, { "epoch": 1.2376002174208451, "grad_norm": 1.9931073188781738, "learning_rate": 8.453679168365267e-05, "loss": 3.0368, "step": 18215 }, { "epoch": 1.237939937491507, "grad_norm": 2.189662218093872, "learning_rate": 8.453254518276941e-05, "loss": 2.988, "step": 18220 }, { "epoch": 1.2382796575621688, "grad_norm": 3.7098000049591064, "learning_rate": 8.452829868188614e-05, "loss": 3.3588, "step": 18225 }, { "epoch": 1.2386193776328305, "grad_norm": 2.7460649013519287, "learning_rate": 8.452405218100285e-05, "loss": 3.1161, "step": 18230 }, { "epoch": 1.2389590977034923, "grad_norm": 1.8998315334320068, "learning_rate": 8.451980568011959e-05, "loss": 3.2159, "step": 18235 }, { "epoch": 1.2392988177741542, "grad_norm": 1.6464040279388428, "learning_rate": 8.451555917923632e-05, "loss": 3.1123, "step": 18240 }, { "epoch": 1.2396385378448158, "grad_norm": 2.936018466949463, "learning_rate": 8.451131267835304e-05, "loss": 3.1334, "step": 18245 }, { "epoch": 1.2399782579154777, "grad_norm": 2.007638931274414, "learning_rate": 8.450706617746978e-05, "loss": 3.1782, "step": 18250 }, { "epoch": 1.2403179779861393, "grad_norm": 2.1598904132843018, "learning_rate": 8.450281967658649e-05, "loss": 3.2208, "step": 18255 }, { "epoch": 1.2406576980568012, "grad_norm": 2.5170626640319824, "learning_rate": 8.449857317570322e-05, "loss": 2.8462, "step": 18260 }, { "epoch": 1.240997418127463, "grad_norm": 2.2687435150146484, "learning_rate": 8.449432667481996e-05, "loss": 3.4036, "step": 18265 }, { "epoch": 1.2413371381981246, "grad_norm": 2.5870163440704346, "learning_rate": 8.449008017393668e-05, "loss": 2.8955, "step": 18270 }, { "epoch": 1.2416768582687865, "grad_norm": 2.392914056777954, "learning_rate": 8.44858336730534e-05, "loss": 3.0344, "step": 18275 }, { "epoch": 1.2420165783394483, "grad_norm": 2.2060658931732178, "learning_rate": 8.448158717217015e-05, "loss": 3.1811, "step": 18280 }, { "epoch": 1.24235629841011, "grad_norm": 2.69670033454895, "learning_rate": 8.447734067128686e-05, "loss": 3.4721, "step": 18285 }, { "epoch": 1.2426960184807718, "grad_norm": 2.791132926940918, "learning_rate": 8.447309417040359e-05, "loss": 3.205, "step": 18290 }, { "epoch": 1.2430357385514337, "grad_norm": 2.056818723678589, "learning_rate": 8.446884766952033e-05, "loss": 2.9145, "step": 18295 }, { "epoch": 1.2433754586220953, "grad_norm": 2.623302459716797, "learning_rate": 8.446460116863704e-05, "loss": 3.2987, "step": 18300 }, { "epoch": 1.2437151786927572, "grad_norm": 1.9602549076080322, "learning_rate": 8.446035466775377e-05, "loss": 3.2924, "step": 18305 }, { "epoch": 1.244054898763419, "grad_norm": 2.175046920776367, "learning_rate": 8.445610816687051e-05, "loss": 3.3399, "step": 18310 }, { "epoch": 1.2443946188340806, "grad_norm": 2.4577581882476807, "learning_rate": 8.445186166598723e-05, "loss": 3.2879, "step": 18315 }, { "epoch": 1.2447343389047425, "grad_norm": 2.3961596488952637, "learning_rate": 8.444761516510396e-05, "loss": 3.1989, "step": 18320 }, { "epoch": 1.2450740589754044, "grad_norm": 2.3278400897979736, "learning_rate": 8.444336866422068e-05, "loss": 3.0104, "step": 18325 }, { "epoch": 1.245413779046066, "grad_norm": 2.4436874389648438, "learning_rate": 8.443912216333741e-05, "loss": 3.0281, "step": 18330 }, { "epoch": 1.2457534991167278, "grad_norm": 2.5430917739868164, "learning_rate": 8.443487566245414e-05, "loss": 3.0995, "step": 18335 }, { "epoch": 1.2460932191873897, "grad_norm": 2.187664031982422, "learning_rate": 8.443062916157087e-05, "loss": 2.9794, "step": 18340 }, { "epoch": 1.2464329392580513, "grad_norm": 2.7553632259368896, "learning_rate": 8.44263826606876e-05, "loss": 3.1793, "step": 18345 }, { "epoch": 1.2467726593287132, "grad_norm": 2.6137242317199707, "learning_rate": 8.442213615980432e-05, "loss": 2.9469, "step": 18350 }, { "epoch": 1.247112379399375, "grad_norm": 1.9004125595092773, "learning_rate": 8.441788965892105e-05, "loss": 3.3129, "step": 18355 }, { "epoch": 1.2474520994700367, "grad_norm": 2.1133928298950195, "learning_rate": 8.441364315803778e-05, "loss": 3.2002, "step": 18360 }, { "epoch": 1.2477918195406985, "grad_norm": 1.6085877418518066, "learning_rate": 8.440939665715451e-05, "loss": 3.2101, "step": 18365 }, { "epoch": 1.2481315396113604, "grad_norm": 2.693652868270874, "learning_rate": 8.440515015627124e-05, "loss": 2.9727, "step": 18370 }, { "epoch": 1.248471259682022, "grad_norm": 1.9359029531478882, "learning_rate": 8.440090365538796e-05, "loss": 3.2408, "step": 18375 }, { "epoch": 1.2488109797526838, "grad_norm": 2.0936391353607178, "learning_rate": 8.439665715450469e-05, "loss": 2.9226, "step": 18380 }, { "epoch": 1.2491506998233455, "grad_norm": 2.0737240314483643, "learning_rate": 8.439241065362142e-05, "loss": 3.4417, "step": 18385 }, { "epoch": 1.2494904198940073, "grad_norm": 2.2659354209899902, "learning_rate": 8.438816415273815e-05, "loss": 3.0092, "step": 18390 }, { "epoch": 1.2498301399646692, "grad_norm": 2.0523459911346436, "learning_rate": 8.438391765185488e-05, "loss": 3.1798, "step": 18395 }, { "epoch": 1.2501698600353308, "grad_norm": 1.980141282081604, "learning_rate": 8.43796711509716e-05, "loss": 3.2656, "step": 18400 }, { "epoch": 1.2505095801059927, "grad_norm": 2.310943126678467, "learning_rate": 8.437542465008833e-05, "loss": 3.2703, "step": 18405 }, { "epoch": 1.2508493001766543, "grad_norm": 1.8876733779907227, "learning_rate": 8.437117814920506e-05, "loss": 3.1529, "step": 18410 }, { "epoch": 1.2511890202473162, "grad_norm": 2.2244467735290527, "learning_rate": 8.436693164832179e-05, "loss": 2.8567, "step": 18415 }, { "epoch": 1.251528740317978, "grad_norm": 2.359764575958252, "learning_rate": 8.436268514743852e-05, "loss": 3.0042, "step": 18420 }, { "epoch": 1.2518684603886396, "grad_norm": 2.8725733757019043, "learning_rate": 8.435843864655524e-05, "loss": 3.1287, "step": 18425 }, { "epoch": 1.2522081804593015, "grad_norm": 2.5743112564086914, "learning_rate": 8.435419214567197e-05, "loss": 2.8734, "step": 18430 }, { "epoch": 1.2525479005299633, "grad_norm": 2.669994831085205, "learning_rate": 8.43499456447887e-05, "loss": 3.3361, "step": 18435 }, { "epoch": 1.252887620600625, "grad_norm": 2.656480073928833, "learning_rate": 8.434569914390543e-05, "loss": 3.0891, "step": 18440 }, { "epoch": 1.2532273406712868, "grad_norm": 2.512725353240967, "learning_rate": 8.434145264302216e-05, "loss": 3.0658, "step": 18445 }, { "epoch": 1.2535670607419487, "grad_norm": 1.891467809677124, "learning_rate": 8.433720614213888e-05, "loss": 3.3717, "step": 18450 }, { "epoch": 1.2539067808126103, "grad_norm": 2.0639593601226807, "learning_rate": 8.43329596412556e-05, "loss": 2.9577, "step": 18455 }, { "epoch": 1.2542465008832722, "grad_norm": 3.049912929534912, "learning_rate": 8.432871314037234e-05, "loss": 3.2183, "step": 18460 }, { "epoch": 1.254586220953934, "grad_norm": 2.376614809036255, "learning_rate": 8.432446663948907e-05, "loss": 3.1204, "step": 18465 }, { "epoch": 1.2549259410245956, "grad_norm": 2.330531358718872, "learning_rate": 8.432022013860578e-05, "loss": 2.9542, "step": 18470 }, { "epoch": 1.2552656610952575, "grad_norm": 2.5936226844787598, "learning_rate": 8.431597363772252e-05, "loss": 3.0327, "step": 18475 }, { "epoch": 1.2556053811659194, "grad_norm": 2.4157814979553223, "learning_rate": 8.431172713683925e-05, "loss": 3.1052, "step": 18480 }, { "epoch": 1.255945101236581, "grad_norm": 2.348862409591675, "learning_rate": 8.430748063595597e-05, "loss": 2.961, "step": 18485 }, { "epoch": 1.2562848213072428, "grad_norm": 2.7146077156066895, "learning_rate": 8.430323413507271e-05, "loss": 3.0756, "step": 18490 }, { "epoch": 1.2566245413779047, "grad_norm": 2.546279191970825, "learning_rate": 8.429898763418944e-05, "loss": 3.2275, "step": 18495 }, { "epoch": 1.2569642614485663, "grad_norm": 2.2697274684906006, "learning_rate": 8.429474113330615e-05, "loss": 3.0742, "step": 18500 }, { "epoch": 1.2573039815192282, "grad_norm": 2.2615301609039307, "learning_rate": 8.429049463242289e-05, "loss": 3.1414, "step": 18505 }, { "epoch": 1.25764370158989, "grad_norm": 2.13057279586792, "learning_rate": 8.428624813153962e-05, "loss": 3.121, "step": 18510 }, { "epoch": 1.2579834216605517, "grad_norm": 2.138392686843872, "learning_rate": 8.428200163065635e-05, "loss": 3.3565, "step": 18515 }, { "epoch": 1.2583231417312135, "grad_norm": 3.173229694366455, "learning_rate": 8.427775512977308e-05, "loss": 3.2987, "step": 18520 }, { "epoch": 1.2586628618018754, "grad_norm": 2.1133835315704346, "learning_rate": 8.427350862888979e-05, "loss": 3.3721, "step": 18525 }, { "epoch": 1.259002581872537, "grad_norm": 1.8167943954467773, "learning_rate": 8.426926212800653e-05, "loss": 3.1662, "step": 18530 }, { "epoch": 1.2593423019431988, "grad_norm": 2.3955750465393066, "learning_rate": 8.426501562712326e-05, "loss": 3.4028, "step": 18535 }, { "epoch": 1.2596820220138607, "grad_norm": 2.3643815517425537, "learning_rate": 8.426076912623997e-05, "loss": 3.2071, "step": 18540 }, { "epoch": 1.2600217420845223, "grad_norm": 2.2137067317962646, "learning_rate": 8.425652262535672e-05, "loss": 3.2797, "step": 18545 }, { "epoch": 1.2603614621551842, "grad_norm": 2.335918664932251, "learning_rate": 8.425227612447344e-05, "loss": 3.4165, "step": 18550 }, { "epoch": 1.260701182225846, "grad_norm": 2.0566961765289307, "learning_rate": 8.424802962359016e-05, "loss": 3.0134, "step": 18555 }, { "epoch": 1.2610409022965077, "grad_norm": 2.0521132946014404, "learning_rate": 8.42437831227069e-05, "loss": 3.458, "step": 18560 }, { "epoch": 1.2613806223671695, "grad_norm": 3.3469531536102295, "learning_rate": 8.423953662182363e-05, "loss": 3.2423, "step": 18565 }, { "epoch": 1.2617203424378312, "grad_norm": 2.120882987976074, "learning_rate": 8.423529012094034e-05, "loss": 3.2687, "step": 18570 }, { "epoch": 1.262060062508493, "grad_norm": 2.151803970336914, "learning_rate": 8.423104362005708e-05, "loss": 3.0413, "step": 18575 }, { "epoch": 1.2623997825791546, "grad_norm": 2.7154135704040527, "learning_rate": 8.422679711917381e-05, "loss": 3.0378, "step": 18580 }, { "epoch": 1.2627395026498165, "grad_norm": 2.9995031356811523, "learning_rate": 8.422255061829053e-05, "loss": 3.3322, "step": 18585 }, { "epoch": 1.2630792227204783, "grad_norm": 2.524198055267334, "learning_rate": 8.421830411740727e-05, "loss": 3.2448, "step": 18590 }, { "epoch": 1.26341894279114, "grad_norm": 2.0347900390625, "learning_rate": 8.421405761652398e-05, "loss": 3.1029, "step": 18595 }, { "epoch": 1.2637586628618018, "grad_norm": 2.218827962875366, "learning_rate": 8.420981111564071e-05, "loss": 3.2121, "step": 18600 }, { "epoch": 1.2640983829324637, "grad_norm": 2.3106675148010254, "learning_rate": 8.420556461475745e-05, "loss": 3.0728, "step": 18605 }, { "epoch": 1.2644381030031253, "grad_norm": 2.0531718730926514, "learning_rate": 8.420131811387417e-05, "loss": 2.9063, "step": 18610 }, { "epoch": 1.2647778230737872, "grad_norm": 2.24519944190979, "learning_rate": 8.41970716129909e-05, "loss": 3.3056, "step": 18615 }, { "epoch": 1.265117543144449, "grad_norm": 2.15539813041687, "learning_rate": 8.419282511210764e-05, "loss": 3.3202, "step": 18620 }, { "epoch": 1.2654572632151107, "grad_norm": 2.227459669113159, "learning_rate": 8.418857861122435e-05, "loss": 2.9173, "step": 18625 }, { "epoch": 1.2657969832857725, "grad_norm": 2.177807331085205, "learning_rate": 8.418433211034108e-05, "loss": 2.9365, "step": 18630 }, { "epoch": 1.2661367033564344, "grad_norm": 2.584482192993164, "learning_rate": 8.418008560945782e-05, "loss": 3.4534, "step": 18635 }, { "epoch": 1.266476423427096, "grad_norm": 1.9384872913360596, "learning_rate": 8.417583910857453e-05, "loss": 2.8901, "step": 18640 }, { "epoch": 1.2668161434977578, "grad_norm": 1.9829132556915283, "learning_rate": 8.417159260769126e-05, "loss": 2.9091, "step": 18645 }, { "epoch": 1.2671558635684197, "grad_norm": 2.687098741531372, "learning_rate": 8.4167346106808e-05, "loss": 3.1605, "step": 18650 }, { "epoch": 1.2674955836390813, "grad_norm": 1.8849765062332153, "learning_rate": 8.416309960592472e-05, "loss": 3.4004, "step": 18655 }, { "epoch": 1.2678353037097432, "grad_norm": 2.4337048530578613, "learning_rate": 8.415885310504145e-05, "loss": 3.1828, "step": 18660 }, { "epoch": 1.268175023780405, "grad_norm": 2.5539112091064453, "learning_rate": 8.415460660415819e-05, "loss": 3.307, "step": 18665 }, { "epoch": 1.2685147438510667, "grad_norm": 2.991689443588257, "learning_rate": 8.41503601032749e-05, "loss": 3.0543, "step": 18670 }, { "epoch": 1.2688544639217285, "grad_norm": 2.3092780113220215, "learning_rate": 8.414611360239163e-05, "loss": 3.0504, "step": 18675 }, { "epoch": 1.2691941839923904, "grad_norm": 2.0929970741271973, "learning_rate": 8.414186710150836e-05, "loss": 3.0415, "step": 18680 }, { "epoch": 1.269533904063052, "grad_norm": 2.2268807888031006, "learning_rate": 8.413762060062509e-05, "loss": 3.381, "step": 18685 }, { "epoch": 1.2698736241337139, "grad_norm": 3.1145057678222656, "learning_rate": 8.413337409974181e-05, "loss": 3.365, "step": 18690 }, { "epoch": 1.2702133442043757, "grad_norm": 2.7754008769989014, "learning_rate": 8.412912759885854e-05, "loss": 3.1258, "step": 18695 }, { "epoch": 1.2705530642750373, "grad_norm": 2.4909720420837402, "learning_rate": 8.412488109797527e-05, "loss": 3.3821, "step": 18700 }, { "epoch": 1.2708927843456992, "grad_norm": 1.8635869026184082, "learning_rate": 8.4120634597092e-05, "loss": 3.2121, "step": 18705 }, { "epoch": 1.271232504416361, "grad_norm": 2.2984797954559326, "learning_rate": 8.411638809620873e-05, "loss": 3.1347, "step": 18710 }, { "epoch": 1.2715722244870227, "grad_norm": 2.69459867477417, "learning_rate": 8.411214159532545e-05, "loss": 3.0602, "step": 18715 }, { "epoch": 1.2719119445576845, "grad_norm": 2.732907772064209, "learning_rate": 8.410789509444218e-05, "loss": 3.0148, "step": 18720 }, { "epoch": 1.2722516646283464, "grad_norm": 2.157026767730713, "learning_rate": 8.410364859355891e-05, "loss": 3.183, "step": 18725 }, { "epoch": 1.272591384699008, "grad_norm": 2.6103355884552, "learning_rate": 8.409940209267564e-05, "loss": 2.9926, "step": 18730 }, { "epoch": 1.2729311047696699, "grad_norm": 2.005643606185913, "learning_rate": 8.409515559179237e-05, "loss": 3.0309, "step": 18735 }, { "epoch": 1.2732708248403315, "grad_norm": 2.097993850708008, "learning_rate": 8.40909090909091e-05, "loss": 2.954, "step": 18740 }, { "epoch": 1.2736105449109933, "grad_norm": 2.7219598293304443, "learning_rate": 8.408666259002582e-05, "loss": 3.0145, "step": 18745 }, { "epoch": 1.273950264981655, "grad_norm": 2.367142677307129, "learning_rate": 8.408241608914255e-05, "loss": 3.2261, "step": 18750 }, { "epoch": 1.2742899850523168, "grad_norm": 3.359825849533081, "learning_rate": 8.407816958825928e-05, "loss": 3.2301, "step": 18755 }, { "epoch": 1.2746297051229787, "grad_norm": 1.9077342748641968, "learning_rate": 8.4073923087376e-05, "loss": 3.1173, "step": 18760 }, { "epoch": 1.2749694251936403, "grad_norm": 2.657862901687622, "learning_rate": 8.406967658649273e-05, "loss": 2.9501, "step": 18765 }, { "epoch": 1.2753091452643022, "grad_norm": 2.0106236934661865, "learning_rate": 8.406543008560946e-05, "loss": 3.0216, "step": 18770 }, { "epoch": 1.275648865334964, "grad_norm": 2.224235773086548, "learning_rate": 8.406118358472619e-05, "loss": 3.2647, "step": 18775 }, { "epoch": 1.2759885854056257, "grad_norm": 2.308769941329956, "learning_rate": 8.405693708384292e-05, "loss": 3.3494, "step": 18780 }, { "epoch": 1.2763283054762875, "grad_norm": 3.0622072219848633, "learning_rate": 8.405269058295965e-05, "loss": 3.215, "step": 18785 }, { "epoch": 1.2766680255469494, "grad_norm": 2.5719780921936035, "learning_rate": 8.404844408207637e-05, "loss": 3.3876, "step": 18790 }, { "epoch": 1.277007745617611, "grad_norm": 1.9406092166900635, "learning_rate": 8.40441975811931e-05, "loss": 3.1952, "step": 18795 }, { "epoch": 1.2773474656882728, "grad_norm": 2.6092703342437744, "learning_rate": 8.403995108030983e-05, "loss": 3.0813, "step": 18800 }, { "epoch": 1.2776871857589347, "grad_norm": 2.1493639945983887, "learning_rate": 8.403570457942656e-05, "loss": 3.2328, "step": 18805 }, { "epoch": 1.2780269058295963, "grad_norm": 2.112837076187134, "learning_rate": 8.403145807854327e-05, "loss": 3.1631, "step": 18810 }, { "epoch": 1.2783666259002582, "grad_norm": 2.279195785522461, "learning_rate": 8.402721157766001e-05, "loss": 3.085, "step": 18815 }, { "epoch": 1.27870634597092, "grad_norm": 2.7095484733581543, "learning_rate": 8.402296507677674e-05, "loss": 3.2826, "step": 18820 }, { "epoch": 1.2790460660415817, "grad_norm": 1.8947978019714355, "learning_rate": 8.401871857589346e-05, "loss": 3.2259, "step": 18825 }, { "epoch": 1.2793857861122435, "grad_norm": 2.303814649581909, "learning_rate": 8.40144720750102e-05, "loss": 3.0728, "step": 18830 }, { "epoch": 1.2797255061829054, "grad_norm": 2.720696210861206, "learning_rate": 8.401022557412693e-05, "loss": 3.2279, "step": 18835 }, { "epoch": 1.280065226253567, "grad_norm": 2.7013349533081055, "learning_rate": 8.400597907324364e-05, "loss": 3.3357, "step": 18840 }, { "epoch": 1.2804049463242289, "grad_norm": 2.0768990516662598, "learning_rate": 8.400173257236038e-05, "loss": 3.1552, "step": 18845 }, { "epoch": 1.2807446663948907, "grad_norm": 2.626643419265747, "learning_rate": 8.399748607147711e-05, "loss": 3.0257, "step": 18850 }, { "epoch": 1.2810843864655523, "grad_norm": 1.8060283660888672, "learning_rate": 8.399323957059384e-05, "loss": 3.156, "step": 18855 }, { "epoch": 1.2814241065362142, "grad_norm": 2.703299045562744, "learning_rate": 8.398899306971057e-05, "loss": 3.0773, "step": 18860 }, { "epoch": 1.281763826606876, "grad_norm": 1.8832459449768066, "learning_rate": 8.39847465688273e-05, "loss": 3.0603, "step": 18865 }, { "epoch": 1.2821035466775377, "grad_norm": 1.9961769580841064, "learning_rate": 8.398050006794402e-05, "loss": 3.2401, "step": 18870 }, { "epoch": 1.2824432667481995, "grad_norm": 2.000192880630493, "learning_rate": 8.397625356706075e-05, "loss": 3.2258, "step": 18875 }, { "epoch": 1.2827829868188614, "grad_norm": 2.309966802597046, "learning_rate": 8.397200706617747e-05, "loss": 2.9602, "step": 18880 }, { "epoch": 1.283122706889523, "grad_norm": 2.1657958030700684, "learning_rate": 8.396776056529421e-05, "loss": 3.0762, "step": 18885 }, { "epoch": 1.2834624269601849, "grad_norm": 2.0939619541168213, "learning_rate": 8.396351406441093e-05, "loss": 2.9071, "step": 18890 }, { "epoch": 1.2838021470308467, "grad_norm": 2.6245276927948, "learning_rate": 8.395926756352765e-05, "loss": 3.1561, "step": 18895 }, { "epoch": 1.2841418671015083, "grad_norm": 1.818008303642273, "learning_rate": 8.395502106264439e-05, "loss": 3.1518, "step": 18900 }, { "epoch": 1.2844815871721702, "grad_norm": 2.979822874069214, "learning_rate": 8.395077456176112e-05, "loss": 3.0983, "step": 18905 }, { "epoch": 1.2848213072428318, "grad_norm": 2.1078271865844727, "learning_rate": 8.394652806087783e-05, "loss": 3.3307, "step": 18910 }, { "epoch": 1.2851610273134937, "grad_norm": 2.234423875808716, "learning_rate": 8.394228155999457e-05, "loss": 2.7054, "step": 18915 }, { "epoch": 1.2855007473841553, "grad_norm": 2.1681642532348633, "learning_rate": 8.39380350591113e-05, "loss": 3.0357, "step": 18920 }, { "epoch": 1.2858404674548172, "grad_norm": 1.9604345560073853, "learning_rate": 8.393378855822802e-05, "loss": 3.0168, "step": 18925 }, { "epoch": 1.286180187525479, "grad_norm": 2.1112465858459473, "learning_rate": 8.392954205734476e-05, "loss": 3.1861, "step": 18930 }, { "epoch": 1.2865199075961407, "grad_norm": 2.618908405303955, "learning_rate": 8.392529555646149e-05, "loss": 3.0168, "step": 18935 }, { "epoch": 1.2868596276668025, "grad_norm": 3.1241960525512695, "learning_rate": 8.39210490555782e-05, "loss": 3.3108, "step": 18940 }, { "epoch": 1.2871993477374644, "grad_norm": 2.8992037773132324, "learning_rate": 8.391680255469494e-05, "loss": 3.0318, "step": 18945 }, { "epoch": 1.287539067808126, "grad_norm": 2.126753091812134, "learning_rate": 8.391255605381166e-05, "loss": 3.2924, "step": 18950 }, { "epoch": 1.2878787878787878, "grad_norm": 2.1527132987976074, "learning_rate": 8.390830955292839e-05, "loss": 3.224, "step": 18955 }, { "epoch": 1.2882185079494497, "grad_norm": 2.415839195251465, "learning_rate": 8.390406305204513e-05, "loss": 2.9574, "step": 18960 }, { "epoch": 1.2885582280201113, "grad_norm": 2.3451662063598633, "learning_rate": 8.389981655116184e-05, "loss": 3.0623, "step": 18965 }, { "epoch": 1.2888979480907732, "grad_norm": 2.725031852722168, "learning_rate": 8.389557005027857e-05, "loss": 3.2653, "step": 18970 }, { "epoch": 1.289237668161435, "grad_norm": 1.9835706949234009, "learning_rate": 8.389132354939531e-05, "loss": 3.2502, "step": 18975 }, { "epoch": 1.2895773882320967, "grad_norm": 1.9125920534133911, "learning_rate": 8.388707704851203e-05, "loss": 3.1987, "step": 18980 }, { "epoch": 1.2899171083027585, "grad_norm": 2.0938632488250732, "learning_rate": 8.388283054762875e-05, "loss": 3.1903, "step": 18985 }, { "epoch": 1.2902568283734204, "grad_norm": 2.069566488265991, "learning_rate": 8.38785840467455e-05, "loss": 3.1328, "step": 18990 }, { "epoch": 1.290596548444082, "grad_norm": 2.818765163421631, "learning_rate": 8.387433754586221e-05, "loss": 3.491, "step": 18995 }, { "epoch": 1.2909362685147439, "grad_norm": 2.1981394290924072, "learning_rate": 8.387009104497894e-05, "loss": 3.0353, "step": 19000 }, { "epoch": 1.2912759885854057, "grad_norm": 2.226665735244751, "learning_rate": 8.386584454409568e-05, "loss": 3.0757, "step": 19005 }, { "epoch": 1.2916157086560673, "grad_norm": 2.2213501930236816, "learning_rate": 8.38615980432124e-05, "loss": 3.1728, "step": 19010 }, { "epoch": 1.2919554287267292, "grad_norm": 2.70015287399292, "learning_rate": 8.385735154232912e-05, "loss": 3.2788, "step": 19015 }, { "epoch": 1.292295148797391, "grad_norm": 3.1442747116088867, "learning_rate": 8.385310504144586e-05, "loss": 3.2444, "step": 19020 }, { "epoch": 1.2926348688680527, "grad_norm": 2.0642406940460205, "learning_rate": 8.384885854056258e-05, "loss": 3.1259, "step": 19025 }, { "epoch": 1.2929745889387145, "grad_norm": 2.4983508586883545, "learning_rate": 8.38446120396793e-05, "loss": 3.1975, "step": 19030 }, { "epoch": 1.2933143090093764, "grad_norm": 2.1498804092407227, "learning_rate": 8.384036553879603e-05, "loss": 3.3958, "step": 19035 }, { "epoch": 1.293654029080038, "grad_norm": 2.3990325927734375, "learning_rate": 8.383611903791276e-05, "loss": 3.1468, "step": 19040 }, { "epoch": 1.2939937491506999, "grad_norm": 2.367478847503662, "learning_rate": 8.383187253702949e-05, "loss": 3.3413, "step": 19045 }, { "epoch": 1.2943334692213617, "grad_norm": 8.217135429382324, "learning_rate": 8.382762603614622e-05, "loss": 2.8345, "step": 19050 }, { "epoch": 1.2946731892920234, "grad_norm": 1.946753740310669, "learning_rate": 8.382337953526295e-05, "loss": 3.2246, "step": 19055 }, { "epoch": 1.2950129093626852, "grad_norm": 1.9144037961959839, "learning_rate": 8.381913303437967e-05, "loss": 3.3223, "step": 19060 }, { "epoch": 1.295352629433347, "grad_norm": 2.002957820892334, "learning_rate": 8.38148865334964e-05, "loss": 2.9987, "step": 19065 }, { "epoch": 1.2956923495040087, "grad_norm": 2.463346004486084, "learning_rate": 8.381064003261313e-05, "loss": 2.994, "step": 19070 }, { "epoch": 1.2960320695746705, "grad_norm": 2.6741526126861572, "learning_rate": 8.380639353172986e-05, "loss": 3.0277, "step": 19075 }, { "epoch": 1.2963717896453322, "grad_norm": 2.218398332595825, "learning_rate": 8.380214703084659e-05, "loss": 3.4148, "step": 19080 }, { "epoch": 1.296711509715994, "grad_norm": 2.352410078048706, "learning_rate": 8.379790052996331e-05, "loss": 3.3881, "step": 19085 }, { "epoch": 1.2970512297866557, "grad_norm": 2.4706027507781982, "learning_rate": 8.379365402908004e-05, "loss": 3.1758, "step": 19090 }, { "epoch": 1.2973909498573175, "grad_norm": 2.915313720703125, "learning_rate": 8.378940752819677e-05, "loss": 2.9262, "step": 19095 }, { "epoch": 1.2977306699279794, "grad_norm": 2.9038808345794678, "learning_rate": 8.37851610273135e-05, "loss": 3.1293, "step": 19100 }, { "epoch": 1.298070389998641, "grad_norm": 2.12363862991333, "learning_rate": 8.378091452643023e-05, "loss": 3.0869, "step": 19105 }, { "epoch": 1.2984101100693028, "grad_norm": 2.5473520755767822, "learning_rate": 8.377666802554695e-05, "loss": 3.04, "step": 19110 }, { "epoch": 1.2987498301399647, "grad_norm": 2.5940699577331543, "learning_rate": 8.377242152466368e-05, "loss": 3.0103, "step": 19115 }, { "epoch": 1.2990895502106263, "grad_norm": 2.304903745651245, "learning_rate": 8.376817502378041e-05, "loss": 3.2611, "step": 19120 }, { "epoch": 1.2994292702812882, "grad_norm": 1.978206753730774, "learning_rate": 8.376392852289714e-05, "loss": 3.0842, "step": 19125 }, { "epoch": 1.29976899035195, "grad_norm": 3.0393362045288086, "learning_rate": 8.375968202201387e-05, "loss": 3.2413, "step": 19130 }, { "epoch": 1.3001087104226117, "grad_norm": 2.87630558013916, "learning_rate": 8.37554355211306e-05, "loss": 3.1173, "step": 19135 }, { "epoch": 1.3004484304932735, "grad_norm": 2.475665330886841, "learning_rate": 8.375118902024732e-05, "loss": 3.0364, "step": 19140 }, { "epoch": 1.3007881505639354, "grad_norm": 2.4671146869659424, "learning_rate": 8.374694251936405e-05, "loss": 3.1916, "step": 19145 }, { "epoch": 1.301127870634597, "grad_norm": 1.9563183784484863, "learning_rate": 8.374269601848076e-05, "loss": 3.2938, "step": 19150 }, { "epoch": 1.3014675907052589, "grad_norm": 2.0039820671081543, "learning_rate": 8.37384495175975e-05, "loss": 3.0437, "step": 19155 }, { "epoch": 1.3018073107759207, "grad_norm": 2.491685628890991, "learning_rate": 8.373420301671423e-05, "loss": 3.2657, "step": 19160 }, { "epoch": 1.3021470308465823, "grad_norm": 2.2003109455108643, "learning_rate": 8.372995651583095e-05, "loss": 3.0889, "step": 19165 }, { "epoch": 1.3024867509172442, "grad_norm": 2.18029522895813, "learning_rate": 8.372571001494769e-05, "loss": 3.3814, "step": 19170 }, { "epoch": 1.302826470987906, "grad_norm": 2.2697248458862305, "learning_rate": 8.372146351406442e-05, "loss": 3.0865, "step": 19175 }, { "epoch": 1.3031661910585677, "grad_norm": 3.016533136367798, "learning_rate": 8.371721701318113e-05, "loss": 2.9483, "step": 19180 }, { "epoch": 1.3035059111292295, "grad_norm": 2.1677825450897217, "learning_rate": 8.371297051229787e-05, "loss": 3.0909, "step": 19185 }, { "epoch": 1.3038456311998914, "grad_norm": 2.2355077266693115, "learning_rate": 8.37087240114146e-05, "loss": 3.4013, "step": 19190 }, { "epoch": 1.304185351270553, "grad_norm": 2.0104660987854004, "learning_rate": 8.370447751053133e-05, "loss": 3.0999, "step": 19195 }, { "epoch": 1.3045250713412149, "grad_norm": 2.320512056350708, "learning_rate": 8.370023100964806e-05, "loss": 3.4221, "step": 19200 }, { "epoch": 1.3048647914118767, "grad_norm": 3.1073973178863525, "learning_rate": 8.369598450876479e-05, "loss": 3.1249, "step": 19205 }, { "epoch": 1.3052045114825384, "grad_norm": 1.8599817752838135, "learning_rate": 8.369173800788151e-05, "loss": 3.1691, "step": 19210 }, { "epoch": 1.3055442315532002, "grad_norm": 2.530397415161133, "learning_rate": 8.368749150699824e-05, "loss": 3.3433, "step": 19215 }, { "epoch": 1.305883951623862, "grad_norm": 2.1036972999572754, "learning_rate": 8.368324500611497e-05, "loss": 3.0514, "step": 19220 }, { "epoch": 1.3062236716945237, "grad_norm": 2.32476544380188, "learning_rate": 8.36789985052317e-05, "loss": 3.3169, "step": 19225 }, { "epoch": 1.3065633917651855, "grad_norm": 2.550122022628784, "learning_rate": 8.367475200434843e-05, "loss": 3.1984, "step": 19230 }, { "epoch": 1.3069031118358474, "grad_norm": 2.6246917247772217, "learning_rate": 8.367050550346514e-05, "loss": 2.9103, "step": 19235 }, { "epoch": 1.307242831906509, "grad_norm": 2.1523358821868896, "learning_rate": 8.366625900258188e-05, "loss": 2.9991, "step": 19240 }, { "epoch": 1.3075825519771709, "grad_norm": 2.154230833053589, "learning_rate": 8.366201250169861e-05, "loss": 3.2634, "step": 19245 }, { "epoch": 1.3079222720478325, "grad_norm": 2.629481792449951, "learning_rate": 8.365776600081532e-05, "loss": 3.3749, "step": 19250 }, { "epoch": 1.3082619921184944, "grad_norm": 2.0786283016204834, "learning_rate": 8.365351949993207e-05, "loss": 3.4532, "step": 19255 }, { "epoch": 1.308601712189156, "grad_norm": 3.2218899726867676, "learning_rate": 8.36492729990488e-05, "loss": 3.2479, "step": 19260 }, { "epoch": 1.3089414322598178, "grad_norm": 2.057097911834717, "learning_rate": 8.364502649816551e-05, "loss": 2.6586, "step": 19265 }, { "epoch": 1.3092811523304797, "grad_norm": 2.896979808807373, "learning_rate": 8.364077999728225e-05, "loss": 3.0097, "step": 19270 }, { "epoch": 1.3096208724011413, "grad_norm": 1.7672171592712402, "learning_rate": 8.363653349639898e-05, "loss": 3.0122, "step": 19275 }, { "epoch": 1.3099605924718032, "grad_norm": 2.158547878265381, "learning_rate": 8.363228699551569e-05, "loss": 2.6579, "step": 19280 }, { "epoch": 1.310300312542465, "grad_norm": 2.500842571258545, "learning_rate": 8.362804049463243e-05, "loss": 3.2105, "step": 19285 }, { "epoch": 1.3106400326131267, "grad_norm": 1.7782007455825806, "learning_rate": 8.362379399374916e-05, "loss": 3.1732, "step": 19290 }, { "epoch": 1.3109797526837885, "grad_norm": 2.130035877227783, "learning_rate": 8.361954749286588e-05, "loss": 3.1556, "step": 19295 }, { "epoch": 1.3113194727544504, "grad_norm": 2.199474811553955, "learning_rate": 8.361530099198262e-05, "loss": 3.181, "step": 19300 }, { "epoch": 1.311659192825112, "grad_norm": 1.7535898685455322, "learning_rate": 8.361105449109933e-05, "loss": 3.2605, "step": 19305 }, { "epoch": 1.3119989128957739, "grad_norm": 2.18916916847229, "learning_rate": 8.360680799021606e-05, "loss": 2.8785, "step": 19310 }, { "epoch": 1.3123386329664357, "grad_norm": 2.2582430839538574, "learning_rate": 8.36025614893328e-05, "loss": 2.9831, "step": 19315 }, { "epoch": 1.3126783530370973, "grad_norm": 1.9239839315414429, "learning_rate": 8.359831498844952e-05, "loss": 3.0693, "step": 19320 }, { "epoch": 1.3130180731077592, "grad_norm": 2.1424694061279297, "learning_rate": 8.359406848756624e-05, "loss": 3.1158, "step": 19325 }, { "epoch": 1.313357793178421, "grad_norm": 2.4976706504821777, "learning_rate": 8.358982198668299e-05, "loss": 3.1853, "step": 19330 }, { "epoch": 1.3136975132490827, "grad_norm": 2.8336129188537598, "learning_rate": 8.35855754857997e-05, "loss": 2.9422, "step": 19335 }, { "epoch": 1.3140372333197445, "grad_norm": 2.411175489425659, "learning_rate": 8.358132898491643e-05, "loss": 3.2333, "step": 19340 }, { "epoch": 1.3143769533904064, "grad_norm": 2.2931973934173584, "learning_rate": 8.357708248403317e-05, "loss": 3.1161, "step": 19345 }, { "epoch": 1.314716673461068, "grad_norm": 2.530747890472412, "learning_rate": 8.357283598314988e-05, "loss": 3.0444, "step": 19350 }, { "epoch": 1.3150563935317299, "grad_norm": 3.688964605331421, "learning_rate": 8.356858948226661e-05, "loss": 3.112, "step": 19355 }, { "epoch": 1.3153961136023917, "grad_norm": 2.817298650741577, "learning_rate": 8.356434298138335e-05, "loss": 2.9638, "step": 19360 }, { "epoch": 1.3157358336730534, "grad_norm": 2.35357928276062, "learning_rate": 8.356009648050007e-05, "loss": 3.1919, "step": 19365 }, { "epoch": 1.3160755537437152, "grad_norm": 2.191555976867676, "learning_rate": 8.35558499796168e-05, "loss": 3.2852, "step": 19370 }, { "epoch": 1.316415273814377, "grad_norm": 2.1471309661865234, "learning_rate": 8.355160347873352e-05, "loss": 3.5218, "step": 19375 }, { "epoch": 1.3167549938850387, "grad_norm": 2.610703468322754, "learning_rate": 8.354735697785025e-05, "loss": 3.1737, "step": 19380 }, { "epoch": 1.3170947139557005, "grad_norm": 2.201317310333252, "learning_rate": 8.354311047696698e-05, "loss": 3.3154, "step": 19385 }, { "epoch": 1.3174344340263624, "grad_norm": 2.8069913387298584, "learning_rate": 8.353886397608371e-05, "loss": 3.237, "step": 19390 }, { "epoch": 1.317774154097024, "grad_norm": 2.6587281227111816, "learning_rate": 8.353461747520044e-05, "loss": 2.6479, "step": 19395 }, { "epoch": 1.3181138741676859, "grad_norm": 2.1327004432678223, "learning_rate": 8.353037097431716e-05, "loss": 3.3246, "step": 19400 }, { "epoch": 1.3184535942383477, "grad_norm": 2.4769632816314697, "learning_rate": 8.352612447343389e-05, "loss": 3.1231, "step": 19405 }, { "epoch": 1.3187933143090094, "grad_norm": 2.2913386821746826, "learning_rate": 8.352187797255062e-05, "loss": 3.0371, "step": 19410 }, { "epoch": 1.3191330343796712, "grad_norm": 2.4176933765411377, "learning_rate": 8.351763147166735e-05, "loss": 3.0891, "step": 19415 }, { "epoch": 1.3194727544503329, "grad_norm": 2.266829490661621, "learning_rate": 8.351338497078408e-05, "loss": 3.4109, "step": 19420 }, { "epoch": 1.3198124745209947, "grad_norm": 1.8006917238235474, "learning_rate": 8.35091384699008e-05, "loss": 3.0679, "step": 19425 }, { "epoch": 1.3201521945916566, "grad_norm": 2.072492837905884, "learning_rate": 8.350489196901753e-05, "loss": 3.269, "step": 19430 }, { "epoch": 1.3204919146623182, "grad_norm": 2.110848903656006, "learning_rate": 8.350064546813426e-05, "loss": 3.1268, "step": 19435 }, { "epoch": 1.32083163473298, "grad_norm": 2.41896653175354, "learning_rate": 8.349639896725099e-05, "loss": 3.0796, "step": 19440 }, { "epoch": 1.3211713548036417, "grad_norm": 2.9456441402435303, "learning_rate": 8.349215246636772e-05, "loss": 3.1286, "step": 19445 }, { "epoch": 1.3215110748743035, "grad_norm": 2.5453457832336426, "learning_rate": 8.348790596548444e-05, "loss": 3.1563, "step": 19450 }, { "epoch": 1.3218507949449654, "grad_norm": 2.6794698238372803, "learning_rate": 8.348365946460117e-05, "loss": 3.4239, "step": 19455 }, { "epoch": 1.322190515015627, "grad_norm": 2.308593511581421, "learning_rate": 8.34794129637179e-05, "loss": 3.3923, "step": 19460 }, { "epoch": 1.3225302350862889, "grad_norm": 2.1200051307678223, "learning_rate": 8.347516646283463e-05, "loss": 3.0742, "step": 19465 }, { "epoch": 1.3228699551569507, "grad_norm": 2.6288223266601562, "learning_rate": 8.347091996195136e-05, "loss": 3.2448, "step": 19470 }, { "epoch": 1.3232096752276123, "grad_norm": 2.451172351837158, "learning_rate": 8.346667346106808e-05, "loss": 3.1302, "step": 19475 }, { "epoch": 1.3235493952982742, "grad_norm": 2.4335975646972656, "learning_rate": 8.346242696018481e-05, "loss": 2.9497, "step": 19480 }, { "epoch": 1.323889115368936, "grad_norm": 2.2931275367736816, "learning_rate": 8.345818045930154e-05, "loss": 3.1333, "step": 19485 }, { "epoch": 1.3242288354395977, "grad_norm": 1.8031290769577026, "learning_rate": 8.345393395841827e-05, "loss": 3.0602, "step": 19490 }, { "epoch": 1.3245685555102595, "grad_norm": 2.3432915210723877, "learning_rate": 8.3449687457535e-05, "loss": 3.2341, "step": 19495 }, { "epoch": 1.3249082755809214, "grad_norm": 1.9686427116394043, "learning_rate": 8.344544095665172e-05, "loss": 3.096, "step": 19500 }, { "epoch": 1.325247995651583, "grad_norm": 2.4868268966674805, "learning_rate": 8.344119445576844e-05, "loss": 2.9805, "step": 19505 }, { "epoch": 1.3255877157222449, "grad_norm": 3.291416645050049, "learning_rate": 8.343694795488518e-05, "loss": 3.0311, "step": 19510 }, { "epoch": 1.3259274357929067, "grad_norm": 2.34765625, "learning_rate": 8.343270145400191e-05, "loss": 3.1334, "step": 19515 }, { "epoch": 1.3262671558635684, "grad_norm": 2.14704966545105, "learning_rate": 8.342845495311862e-05, "loss": 3.0888, "step": 19520 }, { "epoch": 1.3266068759342302, "grad_norm": 2.011936902999878, "learning_rate": 8.342420845223536e-05, "loss": 3.0831, "step": 19525 }, { "epoch": 1.326946596004892, "grad_norm": 2.4400269985198975, "learning_rate": 8.341996195135209e-05, "loss": 2.9112, "step": 19530 }, { "epoch": 1.3272863160755537, "grad_norm": 2.3949038982391357, "learning_rate": 8.341571545046882e-05, "loss": 3.2236, "step": 19535 }, { "epoch": 1.3276260361462155, "grad_norm": 2.3653507232666016, "learning_rate": 8.341146894958555e-05, "loss": 3.2653, "step": 19540 }, { "epoch": 1.3279657562168774, "grad_norm": 1.6111050844192505, "learning_rate": 8.340722244870228e-05, "loss": 3.0647, "step": 19545 }, { "epoch": 1.328305476287539, "grad_norm": 2.103250026702881, "learning_rate": 8.3402975947819e-05, "loss": 3.0713, "step": 19550 }, { "epoch": 1.3286451963582009, "grad_norm": 2.8240556716918945, "learning_rate": 8.339872944693573e-05, "loss": 3.3166, "step": 19555 }, { "epoch": 1.3289849164288627, "grad_norm": 2.1869864463806152, "learning_rate": 8.339448294605246e-05, "loss": 3.2199, "step": 19560 }, { "epoch": 1.3293246364995244, "grad_norm": 3.081423044204712, "learning_rate": 8.339023644516919e-05, "loss": 3.02, "step": 19565 }, { "epoch": 1.3296643565701862, "grad_norm": 2.450051784515381, "learning_rate": 8.338598994428592e-05, "loss": 3.1514, "step": 19570 }, { "epoch": 1.330004076640848, "grad_norm": 2.2419495582580566, "learning_rate": 8.338174344340263e-05, "loss": 3.285, "step": 19575 }, { "epoch": 1.3303437967115097, "grad_norm": 2.0886194705963135, "learning_rate": 8.337749694251937e-05, "loss": 3.02, "step": 19580 }, { "epoch": 1.3306835167821716, "grad_norm": 2.1806087493896484, "learning_rate": 8.33732504416361e-05, "loss": 3.1763, "step": 19585 }, { "epoch": 1.3310232368528332, "grad_norm": 2.4929771423339844, "learning_rate": 8.336900394075282e-05, "loss": 3.0762, "step": 19590 }, { "epoch": 1.331362956923495, "grad_norm": 2.3715457916259766, "learning_rate": 8.336475743986956e-05, "loss": 3.1447, "step": 19595 }, { "epoch": 1.331702676994157, "grad_norm": 2.6157379150390625, "learning_rate": 8.336051093898628e-05, "loss": 3.2575, "step": 19600 }, { "epoch": 1.3320423970648185, "grad_norm": 2.3593897819519043, "learning_rate": 8.3356264438103e-05, "loss": 3.1279, "step": 19605 }, { "epoch": 1.3323821171354804, "grad_norm": 2.172013759613037, "learning_rate": 8.335201793721974e-05, "loss": 3.0598, "step": 19610 }, { "epoch": 1.332721837206142, "grad_norm": 2.3443591594696045, "learning_rate": 8.334777143633647e-05, "loss": 2.8344, "step": 19615 }, { "epoch": 1.3330615572768039, "grad_norm": 2.5689098834991455, "learning_rate": 8.334352493545318e-05, "loss": 3.4779, "step": 19620 }, { "epoch": 1.3334012773474657, "grad_norm": 2.4440360069274902, "learning_rate": 8.333927843456992e-05, "loss": 3.3208, "step": 19625 }, { "epoch": 1.3337409974181273, "grad_norm": 2.641834259033203, "learning_rate": 8.333503193368665e-05, "loss": 3.608, "step": 19630 }, { "epoch": 1.3340807174887892, "grad_norm": 2.600895881652832, "learning_rate": 8.333078543280337e-05, "loss": 3.2028, "step": 19635 }, { "epoch": 1.334420437559451, "grad_norm": 2.1720659732818604, "learning_rate": 8.332653893192011e-05, "loss": 3.0077, "step": 19640 }, { "epoch": 1.3347601576301127, "grad_norm": 1.8656299114227295, "learning_rate": 8.332229243103684e-05, "loss": 2.9722, "step": 19645 }, { "epoch": 1.3350998777007745, "grad_norm": 2.5497708320617676, "learning_rate": 8.331804593015355e-05, "loss": 2.9279, "step": 19650 }, { "epoch": 1.3354395977714364, "grad_norm": 2.07839298248291, "learning_rate": 8.331379942927029e-05, "loss": 2.9328, "step": 19655 }, { "epoch": 1.335779317842098, "grad_norm": 2.7612650394439697, "learning_rate": 8.330955292838701e-05, "loss": 3.2976, "step": 19660 }, { "epoch": 1.3361190379127599, "grad_norm": 2.550137758255005, "learning_rate": 8.330530642750374e-05, "loss": 3.2618, "step": 19665 }, { "epoch": 1.3364587579834217, "grad_norm": 2.333615779876709, "learning_rate": 8.330105992662048e-05, "loss": 3.2016, "step": 19670 }, { "epoch": 1.3367984780540834, "grad_norm": 3.426398515701294, "learning_rate": 8.329681342573719e-05, "loss": 3.3043, "step": 19675 }, { "epoch": 1.3371381981247452, "grad_norm": 3.1546308994293213, "learning_rate": 8.329256692485392e-05, "loss": 3.2096, "step": 19680 }, { "epoch": 1.337477918195407, "grad_norm": 2.275761127471924, "learning_rate": 8.328832042397066e-05, "loss": 3.3416, "step": 19685 }, { "epoch": 1.3378176382660687, "grad_norm": 2.3696277141571045, "learning_rate": 8.328407392308738e-05, "loss": 3.1553, "step": 19690 }, { "epoch": 1.3381573583367306, "grad_norm": 2.0864062309265137, "learning_rate": 8.32798274222041e-05, "loss": 3.1969, "step": 19695 }, { "epoch": 1.3384970784073924, "grad_norm": 2.6433920860290527, "learning_rate": 8.327558092132085e-05, "loss": 2.9955, "step": 19700 }, { "epoch": 1.338836798478054, "grad_norm": 2.3974854946136475, "learning_rate": 8.327133442043756e-05, "loss": 3.1556, "step": 19705 }, { "epoch": 1.3391765185487159, "grad_norm": 2.883655071258545, "learning_rate": 8.326708791955429e-05, "loss": 2.9807, "step": 19710 }, { "epoch": 1.3395162386193777, "grad_norm": 2.483011484146118, "learning_rate": 8.326284141867103e-05, "loss": 2.9567, "step": 19715 }, { "epoch": 1.3398559586900394, "grad_norm": 3.1995060443878174, "learning_rate": 8.325859491778774e-05, "loss": 3.233, "step": 19720 }, { "epoch": 1.3401956787607012, "grad_norm": 2.1740522384643555, "learning_rate": 8.325434841690447e-05, "loss": 3.2189, "step": 19725 }, { "epoch": 1.340535398831363, "grad_norm": 1.961218237876892, "learning_rate": 8.32501019160212e-05, "loss": 3.0615, "step": 19730 }, { "epoch": 1.3408751189020247, "grad_norm": 2.0804314613342285, "learning_rate": 8.324585541513793e-05, "loss": 3.2115, "step": 19735 }, { "epoch": 1.3412148389726866, "grad_norm": 2.193080425262451, "learning_rate": 8.324160891425466e-05, "loss": 2.8251, "step": 19740 }, { "epoch": 1.3415545590433484, "grad_norm": 2.002324104309082, "learning_rate": 8.323736241337138e-05, "loss": 3.1709, "step": 19745 }, { "epoch": 1.34189427911401, "grad_norm": 2.4646170139312744, "learning_rate": 8.323311591248811e-05, "loss": 3.0021, "step": 19750 }, { "epoch": 1.342233999184672, "grad_norm": 2.6604042053222656, "learning_rate": 8.322886941160484e-05, "loss": 3.2263, "step": 19755 }, { "epoch": 1.3425737192553335, "grad_norm": 2.2545738220214844, "learning_rate": 8.322462291072157e-05, "loss": 3.0967, "step": 19760 }, { "epoch": 1.3429134393259954, "grad_norm": 2.6470861434936523, "learning_rate": 8.32203764098383e-05, "loss": 2.7709, "step": 19765 }, { "epoch": 1.3432531593966572, "grad_norm": 3.0276424884796143, "learning_rate": 8.321612990895502e-05, "loss": 3.0609, "step": 19770 }, { "epoch": 1.3435928794673189, "grad_norm": 2.865962505340576, "learning_rate": 8.321188340807175e-05, "loss": 3.011, "step": 19775 }, { "epoch": 1.3439325995379807, "grad_norm": 2.0818450450897217, "learning_rate": 8.320763690718848e-05, "loss": 3.2033, "step": 19780 }, { "epoch": 1.3442723196086424, "grad_norm": 2.48551082611084, "learning_rate": 8.320339040630521e-05, "loss": 3.2391, "step": 19785 }, { "epoch": 1.3446120396793042, "grad_norm": 2.1376097202301025, "learning_rate": 8.319914390542194e-05, "loss": 3.3387, "step": 19790 }, { "epoch": 1.344951759749966, "grad_norm": 2.151871681213379, "learning_rate": 8.319489740453866e-05, "loss": 3.1622, "step": 19795 }, { "epoch": 1.3452914798206277, "grad_norm": 2.7584547996520996, "learning_rate": 8.319065090365539e-05, "loss": 2.9696, "step": 19800 }, { "epoch": 1.3456311998912895, "grad_norm": 1.967170000076294, "learning_rate": 8.318640440277212e-05, "loss": 3.1354, "step": 19805 }, { "epoch": 1.3459709199619514, "grad_norm": 2.2264912128448486, "learning_rate": 8.318215790188885e-05, "loss": 2.9777, "step": 19810 }, { "epoch": 1.346310640032613, "grad_norm": 2.3796403408050537, "learning_rate": 8.317791140100558e-05, "loss": 3.1426, "step": 19815 }, { "epoch": 1.3466503601032749, "grad_norm": 1.9788484573364258, "learning_rate": 8.31736649001223e-05, "loss": 3.0548, "step": 19820 }, { "epoch": 1.3469900801739367, "grad_norm": 2.1816768646240234, "learning_rate": 8.316941839923903e-05, "loss": 3.0612, "step": 19825 }, { "epoch": 1.3473298002445984, "grad_norm": 2.2051780223846436, "learning_rate": 8.316517189835576e-05, "loss": 3.0718, "step": 19830 }, { "epoch": 1.3476695203152602, "grad_norm": 1.7137373685836792, "learning_rate": 8.316092539747249e-05, "loss": 3.2806, "step": 19835 }, { "epoch": 1.348009240385922, "grad_norm": 2.278404951095581, "learning_rate": 8.315667889658922e-05, "loss": 2.9733, "step": 19840 }, { "epoch": 1.3483489604565837, "grad_norm": 2.149613618850708, "learning_rate": 8.315243239570594e-05, "loss": 3.0682, "step": 19845 }, { "epoch": 1.3486886805272456, "grad_norm": 2.4129276275634766, "learning_rate": 8.314818589482267e-05, "loss": 3.3056, "step": 19850 }, { "epoch": 1.3490284005979074, "grad_norm": 1.6844329833984375, "learning_rate": 8.31439393939394e-05, "loss": 3.2644, "step": 19855 }, { "epoch": 1.349368120668569, "grad_norm": 2.224241256713867, "learning_rate": 8.313969289305611e-05, "loss": 3.1338, "step": 19860 }, { "epoch": 1.349707840739231, "grad_norm": 2.23639178276062, "learning_rate": 8.313544639217286e-05, "loss": 3.2477, "step": 19865 }, { "epoch": 1.3500475608098927, "grad_norm": 2.20953369140625, "learning_rate": 8.313119989128958e-05, "loss": 3.2112, "step": 19870 }, { "epoch": 1.3503872808805544, "grad_norm": 3.1163082122802734, "learning_rate": 8.312695339040631e-05, "loss": 3.3203, "step": 19875 }, { "epoch": 1.3507270009512162, "grad_norm": 2.6877846717834473, "learning_rate": 8.312270688952304e-05, "loss": 3.3793, "step": 19880 }, { "epoch": 1.351066721021878, "grad_norm": 2.013887882232666, "learning_rate": 8.311846038863977e-05, "loss": 3.3233, "step": 19885 }, { "epoch": 1.3514064410925397, "grad_norm": 2.4961721897125244, "learning_rate": 8.31142138877565e-05, "loss": 3.0326, "step": 19890 }, { "epoch": 1.3517461611632016, "grad_norm": 2.1453802585601807, "learning_rate": 8.310996738687322e-05, "loss": 3.2333, "step": 19895 }, { "epoch": 1.3520858812338634, "grad_norm": 3.0378100872039795, "learning_rate": 8.310572088598995e-05, "loss": 3.1335, "step": 19900 }, { "epoch": 1.352425601304525, "grad_norm": 2.3463401794433594, "learning_rate": 8.310147438510668e-05, "loss": 3.1625, "step": 19905 }, { "epoch": 1.352765321375187, "grad_norm": 2.7863640785217285, "learning_rate": 8.309722788422341e-05, "loss": 3.15, "step": 19910 }, { "epoch": 1.3531050414458488, "grad_norm": 2.1661951541900635, "learning_rate": 8.309298138334014e-05, "loss": 3.1724, "step": 19915 }, { "epoch": 1.3534447615165104, "grad_norm": 2.9946000576019287, "learning_rate": 8.308873488245686e-05, "loss": 3.3261, "step": 19920 }, { "epoch": 1.3537844815871722, "grad_norm": 2.163444995880127, "learning_rate": 8.308448838157359e-05, "loss": 3.144, "step": 19925 }, { "epoch": 1.3541242016578339, "grad_norm": 2.2801215648651123, "learning_rate": 8.30802418806903e-05, "loss": 3.3056, "step": 19930 }, { "epoch": 1.3544639217284957, "grad_norm": 2.1326651573181152, "learning_rate": 8.307599537980705e-05, "loss": 3.32, "step": 19935 }, { "epoch": 1.3548036417991576, "grad_norm": 2.219773530960083, "learning_rate": 8.307174887892378e-05, "loss": 3.0977, "step": 19940 }, { "epoch": 1.3551433618698192, "grad_norm": 2.6351184844970703, "learning_rate": 8.306750237804049e-05, "loss": 3.2868, "step": 19945 }, { "epoch": 1.355483081940481, "grad_norm": 2.068354606628418, "learning_rate": 8.306325587715723e-05, "loss": 3.0937, "step": 19950 }, { "epoch": 1.3558228020111427, "grad_norm": 1.9194705486297607, "learning_rate": 8.305900937627396e-05, "loss": 3.2357, "step": 19955 }, { "epoch": 1.3561625220818045, "grad_norm": 2.3089730739593506, "learning_rate": 8.305476287539067e-05, "loss": 3.3079, "step": 19960 }, { "epoch": 1.3565022421524664, "grad_norm": 2.464461326599121, "learning_rate": 8.305051637450742e-05, "loss": 3.3079, "step": 19965 }, { "epoch": 1.356841962223128, "grad_norm": 2.3953306674957275, "learning_rate": 8.304626987362414e-05, "loss": 3.1601, "step": 19970 }, { "epoch": 1.3571816822937899, "grad_norm": 2.2159829139709473, "learning_rate": 8.304202337274086e-05, "loss": 3.1169, "step": 19975 }, { "epoch": 1.3575214023644517, "grad_norm": 1.9967857599258423, "learning_rate": 8.30377768718576e-05, "loss": 3.1903, "step": 19980 }, { "epoch": 1.3578611224351134, "grad_norm": 2.1005873680114746, "learning_rate": 8.303353037097433e-05, "loss": 3.042, "step": 19985 }, { "epoch": 1.3582008425057752, "grad_norm": 1.979207158088684, "learning_rate": 8.302928387009104e-05, "loss": 3.1986, "step": 19990 }, { "epoch": 1.358540562576437, "grad_norm": 2.3008763790130615, "learning_rate": 8.302503736920778e-05, "loss": 2.9712, "step": 19995 }, { "epoch": 1.3588802826470987, "grad_norm": 2.3096811771392822, "learning_rate": 8.30207908683245e-05, "loss": 3.2945, "step": 20000 }, { "epoch": 1.3592200027177606, "grad_norm": 2.0889320373535156, "learning_rate": 8.301654436744123e-05, "loss": 3.3693, "step": 20005 }, { "epoch": 1.3595597227884224, "grad_norm": 2.4991743564605713, "learning_rate": 8.301229786655797e-05, "loss": 3.2455, "step": 20010 }, { "epoch": 1.359899442859084, "grad_norm": 2.1006858348846436, "learning_rate": 8.300805136567468e-05, "loss": 3.3456, "step": 20015 }, { "epoch": 1.360239162929746, "grad_norm": 3.270563840866089, "learning_rate": 8.300380486479141e-05, "loss": 3.314, "step": 20020 }, { "epoch": 1.3605788830004077, "grad_norm": 2.715388774871826, "learning_rate": 8.299955836390815e-05, "loss": 2.9931, "step": 20025 }, { "epoch": 1.3609186030710694, "grad_norm": 2.0025980472564697, "learning_rate": 8.299531186302487e-05, "loss": 3.1046, "step": 20030 }, { "epoch": 1.3612583231417312, "grad_norm": 2.388028144836426, "learning_rate": 8.29910653621416e-05, "loss": 3.3109, "step": 20035 }, { "epoch": 1.361598043212393, "grad_norm": 2.1693150997161865, "learning_rate": 8.298681886125834e-05, "loss": 3.2525, "step": 20040 }, { "epoch": 1.3619377632830547, "grad_norm": 2.3309805393218994, "learning_rate": 8.298257236037505e-05, "loss": 3.0773, "step": 20045 }, { "epoch": 1.3622774833537166, "grad_norm": 2.2802536487579346, "learning_rate": 8.297832585949178e-05, "loss": 3.2519, "step": 20050 }, { "epoch": 1.3626172034243784, "grad_norm": 2.408200979232788, "learning_rate": 8.297407935860852e-05, "loss": 3.1945, "step": 20055 }, { "epoch": 1.36295692349504, "grad_norm": 1.925860047340393, "learning_rate": 8.296983285772523e-05, "loss": 3.2652, "step": 20060 }, { "epoch": 1.363296643565702, "grad_norm": 2.1805877685546875, "learning_rate": 8.296558635684196e-05, "loss": 3.231, "step": 20065 }, { "epoch": 1.3636363636363638, "grad_norm": 2.433406352996826, "learning_rate": 8.29613398559587e-05, "loss": 3.1759, "step": 20070 }, { "epoch": 1.3639760837070254, "grad_norm": 2.538785696029663, "learning_rate": 8.295709335507542e-05, "loss": 3.2548, "step": 20075 }, { "epoch": 1.3643158037776872, "grad_norm": 2.3177692890167236, "learning_rate": 8.295284685419215e-05, "loss": 3.0779, "step": 20080 }, { "epoch": 1.364655523848349, "grad_norm": 2.5449752807617188, "learning_rate": 8.294860035330887e-05, "loss": 2.9889, "step": 20085 }, { "epoch": 1.3649952439190107, "grad_norm": 2.2528669834136963, "learning_rate": 8.29443538524256e-05, "loss": 3.1341, "step": 20090 }, { "epoch": 1.3653349639896726, "grad_norm": 1.8308610916137695, "learning_rate": 8.294010735154233e-05, "loss": 3.0478, "step": 20095 }, { "epoch": 1.3656746840603342, "grad_norm": 2.1846110820770264, "learning_rate": 8.293586085065906e-05, "loss": 3.2134, "step": 20100 }, { "epoch": 1.366014404130996, "grad_norm": 2.356254816055298, "learning_rate": 8.293161434977579e-05, "loss": 3.2403, "step": 20105 }, { "epoch": 1.366354124201658, "grad_norm": 3.1504602432250977, "learning_rate": 8.292736784889251e-05, "loss": 2.9943, "step": 20110 }, { "epoch": 1.3666938442723195, "grad_norm": 2.5712594985961914, "learning_rate": 8.292312134800924e-05, "loss": 3.2443, "step": 20115 }, { "epoch": 1.3670335643429814, "grad_norm": 2.409951686859131, "learning_rate": 8.291887484712597e-05, "loss": 2.737, "step": 20120 }, { "epoch": 1.367373284413643, "grad_norm": 2.150327205657959, "learning_rate": 8.29146283462427e-05, "loss": 3.0044, "step": 20125 }, { "epoch": 1.3677130044843049, "grad_norm": 1.9901130199432373, "learning_rate": 8.291038184535943e-05, "loss": 3.0604, "step": 20130 }, { "epoch": 1.3680527245549667, "grad_norm": 2.6596732139587402, "learning_rate": 8.290613534447615e-05, "loss": 3.2628, "step": 20135 }, { "epoch": 1.3683924446256284, "grad_norm": 1.9714230298995972, "learning_rate": 8.290188884359288e-05, "loss": 2.8663, "step": 20140 }, { "epoch": 1.3687321646962902, "grad_norm": 2.6513099670410156, "learning_rate": 8.289764234270961e-05, "loss": 3.0886, "step": 20145 }, { "epoch": 1.369071884766952, "grad_norm": 2.1611180305480957, "learning_rate": 8.289339584182634e-05, "loss": 2.8259, "step": 20150 }, { "epoch": 1.3694116048376137, "grad_norm": 3.2138078212738037, "learning_rate": 8.288914934094307e-05, "loss": 3.3077, "step": 20155 }, { "epoch": 1.3697513249082756, "grad_norm": 2.1341300010681152, "learning_rate": 8.28849028400598e-05, "loss": 3.1549, "step": 20160 }, { "epoch": 1.3700910449789374, "grad_norm": 2.2968950271606445, "learning_rate": 8.288065633917652e-05, "loss": 3.2249, "step": 20165 }, { "epoch": 1.370430765049599, "grad_norm": 2.2152397632598877, "learning_rate": 8.287640983829325e-05, "loss": 3.0581, "step": 20170 }, { "epoch": 1.370770485120261, "grad_norm": 1.8105682134628296, "learning_rate": 8.287216333740998e-05, "loss": 3.0354, "step": 20175 }, { "epoch": 1.3711102051909227, "grad_norm": 1.7299968004226685, "learning_rate": 8.28679168365267e-05, "loss": 3.2013, "step": 20180 }, { "epoch": 1.3714499252615844, "grad_norm": 2.2978410720825195, "learning_rate": 8.286367033564343e-05, "loss": 2.8145, "step": 20185 }, { "epoch": 1.3717896453322462, "grad_norm": 2.6015682220458984, "learning_rate": 8.285942383476016e-05, "loss": 3.3483, "step": 20190 }, { "epoch": 1.372129365402908, "grad_norm": 2.6056344509124756, "learning_rate": 8.285517733387689e-05, "loss": 2.8744, "step": 20195 }, { "epoch": 1.3724690854735697, "grad_norm": 2.439389705657959, "learning_rate": 8.28509308329936e-05, "loss": 2.9485, "step": 20200 }, { "epoch": 1.3728088055442316, "grad_norm": 1.7435129880905151, "learning_rate": 8.284668433211035e-05, "loss": 2.9544, "step": 20205 }, { "epoch": 1.3731485256148934, "grad_norm": 2.0399727821350098, "learning_rate": 8.284243783122707e-05, "loss": 3.0275, "step": 20210 }, { "epoch": 1.373488245685555, "grad_norm": 2.3127999305725098, "learning_rate": 8.28381913303438e-05, "loss": 3.0916, "step": 20215 }, { "epoch": 1.373827965756217, "grad_norm": 2.2405402660369873, "learning_rate": 8.283394482946053e-05, "loss": 2.9231, "step": 20220 }, { "epoch": 1.3741676858268788, "grad_norm": 2.433073043823242, "learning_rate": 8.282969832857726e-05, "loss": 3.2036, "step": 20225 }, { "epoch": 1.3745074058975404, "grad_norm": 1.6897759437561035, "learning_rate": 8.282545182769399e-05, "loss": 2.9428, "step": 20230 }, { "epoch": 1.3748471259682022, "grad_norm": 2.8258860111236572, "learning_rate": 8.282120532681071e-05, "loss": 3.0705, "step": 20235 }, { "epoch": 1.375186846038864, "grad_norm": 2.2952122688293457, "learning_rate": 8.281695882592744e-05, "loss": 3.0119, "step": 20240 }, { "epoch": 1.3755265661095257, "grad_norm": 1.9521044492721558, "learning_rate": 8.281271232504417e-05, "loss": 3.1394, "step": 20245 }, { "epoch": 1.3758662861801876, "grad_norm": 2.5122733116149902, "learning_rate": 8.28084658241609e-05, "loss": 3.0152, "step": 20250 }, { "epoch": 1.3762060062508494, "grad_norm": 2.3075971603393555, "learning_rate": 8.280421932327763e-05, "loss": 2.9496, "step": 20255 }, { "epoch": 1.376545726321511, "grad_norm": 2.397758722305298, "learning_rate": 8.279997282239435e-05, "loss": 3.0162, "step": 20260 }, { "epoch": 1.376885446392173, "grad_norm": 3.0828402042388916, "learning_rate": 8.279572632151108e-05, "loss": 3.1852, "step": 20265 }, { "epoch": 1.3772251664628345, "grad_norm": 2.531620502471924, "learning_rate": 8.279147982062781e-05, "loss": 3.3615, "step": 20270 }, { "epoch": 1.3775648865334964, "grad_norm": 2.7288320064544678, "learning_rate": 8.278723331974454e-05, "loss": 3.2292, "step": 20275 }, { "epoch": 1.3779046066041583, "grad_norm": 2.1473748683929443, "learning_rate": 8.278298681886127e-05, "loss": 3.239, "step": 20280 }, { "epoch": 1.3782443266748199, "grad_norm": 2.742555856704712, "learning_rate": 8.277874031797798e-05, "loss": 3.1241, "step": 20285 }, { "epoch": 1.3785840467454817, "grad_norm": 2.201572895050049, "learning_rate": 8.277449381709472e-05, "loss": 3.3159, "step": 20290 }, { "epoch": 1.3789237668161434, "grad_norm": 2.162670373916626, "learning_rate": 8.277024731621145e-05, "loss": 3.3423, "step": 20295 }, { "epoch": 1.3792634868868052, "grad_norm": 2.3968236446380615, "learning_rate": 8.276600081532817e-05, "loss": 3.2498, "step": 20300 }, { "epoch": 1.379603206957467, "grad_norm": 2.3862075805664062, "learning_rate": 8.276175431444491e-05, "loss": 2.9306, "step": 20305 }, { "epoch": 1.3799429270281287, "grad_norm": 1.7744899988174438, "learning_rate": 8.275750781356163e-05, "loss": 3.1748, "step": 20310 }, { "epoch": 1.3802826470987906, "grad_norm": 2.3496859073638916, "learning_rate": 8.275326131267835e-05, "loss": 3.2558, "step": 20315 }, { "epoch": 1.3806223671694524, "grad_norm": 2.913076162338257, "learning_rate": 8.274901481179509e-05, "loss": 3.1979, "step": 20320 }, { "epoch": 1.380962087240114, "grad_norm": 3.2436130046844482, "learning_rate": 8.274476831091182e-05, "loss": 3.0811, "step": 20325 }, { "epoch": 1.381301807310776, "grad_norm": 1.8516377210617065, "learning_rate": 8.274052181002853e-05, "loss": 3.142, "step": 20330 }, { "epoch": 1.3816415273814378, "grad_norm": 2.129700183868408, "learning_rate": 8.273627530914527e-05, "loss": 3.0831, "step": 20335 }, { "epoch": 1.3819812474520994, "grad_norm": 2.080352306365967, "learning_rate": 8.2732028808262e-05, "loss": 3.3032, "step": 20340 }, { "epoch": 1.3823209675227612, "grad_norm": 2.5409297943115234, "learning_rate": 8.272778230737872e-05, "loss": 3.1279, "step": 20345 }, { "epoch": 1.382660687593423, "grad_norm": 2.3008930683135986, "learning_rate": 8.272353580649546e-05, "loss": 3.1175, "step": 20350 }, { "epoch": 1.3830004076640847, "grad_norm": 2.0624516010284424, "learning_rate": 8.271928930561217e-05, "loss": 2.9542, "step": 20355 }, { "epoch": 1.3833401277347466, "grad_norm": 2.23747181892395, "learning_rate": 8.27150428047289e-05, "loss": 2.9788, "step": 20360 }, { "epoch": 1.3836798478054084, "grad_norm": 2.1014392375946045, "learning_rate": 8.271079630384564e-05, "loss": 3.2534, "step": 20365 }, { "epoch": 1.38401956787607, "grad_norm": 2.0518760681152344, "learning_rate": 8.270654980296236e-05, "loss": 3.2459, "step": 20370 }, { "epoch": 1.384359287946732, "grad_norm": 2.1980743408203125, "learning_rate": 8.270230330207909e-05, "loss": 3.1766, "step": 20375 }, { "epoch": 1.3846990080173938, "grad_norm": 2.0363404750823975, "learning_rate": 8.269805680119583e-05, "loss": 3.417, "step": 20380 }, { "epoch": 1.3850387280880554, "grad_norm": 2.8005053997039795, "learning_rate": 8.269381030031254e-05, "loss": 3.3321, "step": 20385 }, { "epoch": 1.3853784481587172, "grad_norm": 1.941962480545044, "learning_rate": 8.268956379942927e-05, "loss": 3.0745, "step": 20390 }, { "epoch": 1.385718168229379, "grad_norm": 2.526724338531494, "learning_rate": 8.268531729854601e-05, "loss": 3.0986, "step": 20395 }, { "epoch": 1.3860578883000407, "grad_norm": 2.1204662322998047, "learning_rate": 8.268107079766273e-05, "loss": 3.027, "step": 20400 }, { "epoch": 1.3863976083707026, "grad_norm": 3.70656681060791, "learning_rate": 8.267682429677945e-05, "loss": 3.0858, "step": 20405 }, { "epoch": 1.3867373284413644, "grad_norm": 2.3088057041168213, "learning_rate": 8.26725777958962e-05, "loss": 3.102, "step": 20410 }, { "epoch": 1.387077048512026, "grad_norm": 2.6806347370147705, "learning_rate": 8.266833129501291e-05, "loss": 3.2617, "step": 20415 }, { "epoch": 1.387416768582688, "grad_norm": 1.9130173921585083, "learning_rate": 8.266408479412964e-05, "loss": 3.1984, "step": 20420 }, { "epoch": 1.3877564886533498, "grad_norm": 1.9806621074676514, "learning_rate": 8.265983829324637e-05, "loss": 3.1172, "step": 20425 }, { "epoch": 1.3880962087240114, "grad_norm": 2.022230625152588, "learning_rate": 8.26555917923631e-05, "loss": 3.0141, "step": 20430 }, { "epoch": 1.3884359287946733, "grad_norm": 2.104722738265991, "learning_rate": 8.265134529147982e-05, "loss": 3.1626, "step": 20435 }, { "epoch": 1.3887756488653349, "grad_norm": 2.387971878051758, "learning_rate": 8.264709879059655e-05, "loss": 3.1217, "step": 20440 }, { "epoch": 1.3891153689359967, "grad_norm": 2.7508127689361572, "learning_rate": 8.264285228971328e-05, "loss": 3.3625, "step": 20445 }, { "epoch": 1.3894550890066586, "grad_norm": 3.218015193939209, "learning_rate": 8.263860578883e-05, "loss": 3.1983, "step": 20450 }, { "epoch": 1.3897948090773202, "grad_norm": 2.2599637508392334, "learning_rate": 8.263435928794673e-05, "loss": 2.7813, "step": 20455 }, { "epoch": 1.390134529147982, "grad_norm": 2.2458770275115967, "learning_rate": 8.263011278706346e-05, "loss": 3.3739, "step": 20460 }, { "epoch": 1.3904742492186437, "grad_norm": 2.159101724624634, "learning_rate": 8.262586628618019e-05, "loss": 3.0675, "step": 20465 }, { "epoch": 1.3908139692893056, "grad_norm": 2.3456637859344482, "learning_rate": 8.262161978529692e-05, "loss": 2.9775, "step": 20470 }, { "epoch": 1.3911536893599674, "grad_norm": 2.1157217025756836, "learning_rate": 8.261737328441365e-05, "loss": 3.1257, "step": 20475 }, { "epoch": 1.391493409430629, "grad_norm": 2.341310739517212, "learning_rate": 8.261312678353037e-05, "loss": 3.2314, "step": 20480 }, { "epoch": 1.391833129501291, "grad_norm": 2.3226921558380127, "learning_rate": 8.26088802826471e-05, "loss": 2.9832, "step": 20485 }, { "epoch": 1.3921728495719528, "grad_norm": 2.402355909347534, "learning_rate": 8.260463378176383e-05, "loss": 3.1205, "step": 20490 }, { "epoch": 1.3925125696426144, "grad_norm": 2.277787923812866, "learning_rate": 8.260038728088056e-05, "loss": 3.0872, "step": 20495 }, { "epoch": 1.3928522897132762, "grad_norm": 2.8708958625793457, "learning_rate": 8.259614077999729e-05, "loss": 3.0786, "step": 20500 }, { "epoch": 1.393192009783938, "grad_norm": 2.236647129058838, "learning_rate": 8.259189427911401e-05, "loss": 3.1455, "step": 20505 }, { "epoch": 1.3935317298545997, "grad_norm": 2.3977179527282715, "learning_rate": 8.258764777823074e-05, "loss": 2.9557, "step": 20510 }, { "epoch": 1.3938714499252616, "grad_norm": 2.380377769470215, "learning_rate": 8.258340127734747e-05, "loss": 3.03, "step": 20515 }, { "epoch": 1.3942111699959234, "grad_norm": 2.793792486190796, "learning_rate": 8.25791547764642e-05, "loss": 3.3077, "step": 20520 }, { "epoch": 1.394550890066585, "grad_norm": 2.881566286087036, "learning_rate": 8.257490827558093e-05, "loss": 3.1737, "step": 20525 }, { "epoch": 1.394890610137247, "grad_norm": 2.6030044555664062, "learning_rate": 8.257066177469765e-05, "loss": 2.8968, "step": 20530 }, { "epoch": 1.3952303302079088, "grad_norm": 2.3953189849853516, "learning_rate": 8.256641527381438e-05, "loss": 3.2203, "step": 20535 }, { "epoch": 1.3955700502785704, "grad_norm": 1.8169585466384888, "learning_rate": 8.256216877293111e-05, "loss": 3.2986, "step": 20540 }, { "epoch": 1.3959097703492322, "grad_norm": 2.1150872707366943, "learning_rate": 8.255792227204784e-05, "loss": 3.1586, "step": 20545 }, { "epoch": 1.396249490419894, "grad_norm": 2.7152676582336426, "learning_rate": 8.255367577116457e-05, "loss": 2.8844, "step": 20550 }, { "epoch": 1.3965892104905557, "grad_norm": 1.7945562601089478, "learning_rate": 8.25494292702813e-05, "loss": 3.0309, "step": 20555 }, { "epoch": 1.3969289305612176, "grad_norm": 1.7501095533370972, "learning_rate": 8.254518276939802e-05, "loss": 2.8841, "step": 20560 }, { "epoch": 1.3972686506318794, "grad_norm": 2.3829140663146973, "learning_rate": 8.254093626851475e-05, "loss": 3.1809, "step": 20565 }, { "epoch": 1.397608370702541, "grad_norm": 1.9426268339157104, "learning_rate": 8.253668976763148e-05, "loss": 3.0188, "step": 20570 }, { "epoch": 1.397948090773203, "grad_norm": 1.9519649744033813, "learning_rate": 8.25324432667482e-05, "loss": 2.8217, "step": 20575 }, { "epoch": 1.3982878108438648, "grad_norm": 2.6862709522247314, "learning_rate": 8.252819676586493e-05, "loss": 3.2123, "step": 20580 }, { "epoch": 1.3986275309145264, "grad_norm": 2.461069107055664, "learning_rate": 8.252395026498166e-05, "loss": 3.0193, "step": 20585 }, { "epoch": 1.3989672509851883, "grad_norm": 2.4085259437561035, "learning_rate": 8.251970376409839e-05, "loss": 3.2415, "step": 20590 }, { "epoch": 1.3993069710558501, "grad_norm": 2.4262914657592773, "learning_rate": 8.251545726321512e-05, "loss": 3.0608, "step": 20595 }, { "epoch": 1.3996466911265117, "grad_norm": 2.9582927227020264, "learning_rate": 8.251121076233185e-05, "loss": 3.0076, "step": 20600 }, { "epoch": 1.3999864111971736, "grad_norm": 2.474562406539917, "learning_rate": 8.250696426144857e-05, "loss": 3.2983, "step": 20605 }, { "epoch": 1.4003261312678352, "grad_norm": 2.2842636108398438, "learning_rate": 8.25027177605653e-05, "loss": 3.1906, "step": 20610 }, { "epoch": 1.400665851338497, "grad_norm": 1.781190037727356, "learning_rate": 8.249847125968203e-05, "loss": 3.3017, "step": 20615 }, { "epoch": 1.401005571409159, "grad_norm": 2.206921100616455, "learning_rate": 8.249422475879876e-05, "loss": 3.0484, "step": 20620 }, { "epoch": 1.4013452914798206, "grad_norm": 2.4714009761810303, "learning_rate": 8.248997825791547e-05, "loss": 2.9551, "step": 20625 }, { "epoch": 1.4016850115504824, "grad_norm": 1.5894997119903564, "learning_rate": 8.248573175703221e-05, "loss": 3.1898, "step": 20630 }, { "epoch": 1.402024731621144, "grad_norm": 3.3878135681152344, "learning_rate": 8.248148525614894e-05, "loss": 2.9763, "step": 20635 }, { "epoch": 1.402364451691806, "grad_norm": 2.1392290592193604, "learning_rate": 8.247723875526566e-05, "loss": 3.5644, "step": 20640 }, { "epoch": 1.4027041717624678, "grad_norm": 2.571692943572998, "learning_rate": 8.24729922543824e-05, "loss": 3.47, "step": 20645 }, { "epoch": 1.4030438918331294, "grad_norm": 2.525303840637207, "learning_rate": 8.246874575349913e-05, "loss": 3.036, "step": 20650 }, { "epoch": 1.4033836119037912, "grad_norm": 2.221220016479492, "learning_rate": 8.246449925261584e-05, "loss": 2.8469, "step": 20655 }, { "epoch": 1.403723331974453, "grad_norm": 1.9885146617889404, "learning_rate": 8.246025275173258e-05, "loss": 3.1918, "step": 20660 }, { "epoch": 1.4040630520451147, "grad_norm": 2.2824549674987793, "learning_rate": 8.245600625084931e-05, "loss": 3.3516, "step": 20665 }, { "epoch": 1.4044027721157766, "grad_norm": 2.475072145462036, "learning_rate": 8.245175974996602e-05, "loss": 3.1174, "step": 20670 }, { "epoch": 1.4047424921864384, "grad_norm": 2.137674331665039, "learning_rate": 8.244751324908277e-05, "loss": 3.0381, "step": 20675 }, { "epoch": 1.4050822122571, "grad_norm": 2.1038877964019775, "learning_rate": 8.24432667481995e-05, "loss": 3.1527, "step": 20680 }, { "epoch": 1.405421932327762, "grad_norm": 1.9170911312103271, "learning_rate": 8.243902024731621e-05, "loss": 3.1947, "step": 20685 }, { "epoch": 1.4057616523984238, "grad_norm": 2.039401054382324, "learning_rate": 8.243477374643295e-05, "loss": 2.9993, "step": 20690 }, { "epoch": 1.4061013724690854, "grad_norm": 2.1149933338165283, "learning_rate": 8.243052724554968e-05, "loss": 3.1452, "step": 20695 }, { "epoch": 1.4064410925397473, "grad_norm": 2.713371992111206, "learning_rate": 8.242628074466639e-05, "loss": 3.2017, "step": 20700 }, { "epoch": 1.406780812610409, "grad_norm": 2.218287944793701, "learning_rate": 8.242203424378313e-05, "loss": 3.0229, "step": 20705 }, { "epoch": 1.4071205326810707, "grad_norm": 2.4041788578033447, "learning_rate": 8.241778774289985e-05, "loss": 3.3165, "step": 20710 }, { "epoch": 1.4074602527517326, "grad_norm": 2.405266046524048, "learning_rate": 8.241354124201658e-05, "loss": 2.9573, "step": 20715 }, { "epoch": 1.4077999728223944, "grad_norm": 1.793777346611023, "learning_rate": 8.240929474113332e-05, "loss": 2.8743, "step": 20720 }, { "epoch": 1.408139692893056, "grad_norm": 2.073530673980713, "learning_rate": 8.240504824025003e-05, "loss": 3.0486, "step": 20725 }, { "epoch": 1.408479412963718, "grad_norm": 2.65214204788208, "learning_rate": 8.240080173936676e-05, "loss": 3.0212, "step": 20730 }, { "epoch": 1.4088191330343798, "grad_norm": 2.2387924194335938, "learning_rate": 8.23965552384835e-05, "loss": 3.2006, "step": 20735 }, { "epoch": 1.4091588531050414, "grad_norm": 2.2694191932678223, "learning_rate": 8.239230873760022e-05, "loss": 2.9273, "step": 20740 }, { "epoch": 1.4094985731757033, "grad_norm": 2.1453797817230225, "learning_rate": 8.238806223671694e-05, "loss": 3.0255, "step": 20745 }, { "epoch": 1.4098382932463651, "grad_norm": 2.834642171859741, "learning_rate": 8.238381573583369e-05, "loss": 3.1018, "step": 20750 }, { "epoch": 1.4101780133170267, "grad_norm": 2.5196282863616943, "learning_rate": 8.23795692349504e-05, "loss": 2.8158, "step": 20755 }, { "epoch": 1.4105177333876886, "grad_norm": 2.2740976810455322, "learning_rate": 8.237532273406713e-05, "loss": 3.0884, "step": 20760 }, { "epoch": 1.4108574534583505, "grad_norm": 2.4846839904785156, "learning_rate": 8.237107623318387e-05, "loss": 3.2266, "step": 20765 }, { "epoch": 1.411197173529012, "grad_norm": 2.148468017578125, "learning_rate": 8.236682973230058e-05, "loss": 3.1342, "step": 20770 }, { "epoch": 1.411536893599674, "grad_norm": 2.5328493118286133, "learning_rate": 8.236258323141731e-05, "loss": 2.998, "step": 20775 }, { "epoch": 1.4118766136703356, "grad_norm": 1.9863817691802979, "learning_rate": 8.235833673053404e-05, "loss": 3.2965, "step": 20780 }, { "epoch": 1.4122163337409974, "grad_norm": 2.675961494445801, "learning_rate": 8.235409022965077e-05, "loss": 3.1175, "step": 20785 }, { "epoch": 1.4125560538116593, "grad_norm": 3.3449347019195557, "learning_rate": 8.23498437287675e-05, "loss": 3.0405, "step": 20790 }, { "epoch": 1.412895773882321, "grad_norm": 2.164681911468506, "learning_rate": 8.234559722788422e-05, "loss": 3.3099, "step": 20795 }, { "epoch": 1.4132354939529828, "grad_norm": 2.366765022277832, "learning_rate": 8.234135072700095e-05, "loss": 3.1268, "step": 20800 }, { "epoch": 1.4135752140236444, "grad_norm": 2.191352605819702, "learning_rate": 8.233710422611768e-05, "loss": 3.0938, "step": 20805 }, { "epoch": 1.4139149340943062, "grad_norm": 2.0899465084075928, "learning_rate": 8.233285772523441e-05, "loss": 2.9937, "step": 20810 }, { "epoch": 1.414254654164968, "grad_norm": 2.847877025604248, "learning_rate": 8.232861122435114e-05, "loss": 3.301, "step": 20815 }, { "epoch": 1.4145943742356297, "grad_norm": 2.8321621417999268, "learning_rate": 8.232436472346786e-05, "loss": 3.2424, "step": 20820 }, { "epoch": 1.4149340943062916, "grad_norm": 2.0046005249023438, "learning_rate": 8.232011822258459e-05, "loss": 3.1894, "step": 20825 }, { "epoch": 1.4152738143769534, "grad_norm": 2.4765868186950684, "learning_rate": 8.231587172170132e-05, "loss": 3.2132, "step": 20830 }, { "epoch": 1.415613534447615, "grad_norm": 2.112231969833374, "learning_rate": 8.231162522081805e-05, "loss": 3.065, "step": 20835 }, { "epoch": 1.415953254518277, "grad_norm": 1.8529397249221802, "learning_rate": 8.230737871993478e-05, "loss": 3.2497, "step": 20840 }, { "epoch": 1.4162929745889388, "grad_norm": 2.848719358444214, "learning_rate": 8.23031322190515e-05, "loss": 3.3069, "step": 20845 }, { "epoch": 1.4166326946596004, "grad_norm": 2.429046392440796, "learning_rate": 8.229888571816823e-05, "loss": 2.9168, "step": 20850 }, { "epoch": 1.4169724147302623, "grad_norm": 1.9812660217285156, "learning_rate": 8.229463921728496e-05, "loss": 2.9549, "step": 20855 }, { "epoch": 1.417312134800924, "grad_norm": 2.3665666580200195, "learning_rate": 8.229039271640169e-05, "loss": 3.0706, "step": 20860 }, { "epoch": 1.4176518548715857, "grad_norm": 2.329331159591675, "learning_rate": 8.228614621551842e-05, "loss": 3.1358, "step": 20865 }, { "epoch": 1.4179915749422476, "grad_norm": 2.1407110691070557, "learning_rate": 8.228189971463514e-05, "loss": 3.1366, "step": 20870 }, { "epoch": 1.4183312950129094, "grad_norm": 1.7732393741607666, "learning_rate": 8.227765321375187e-05, "loss": 3.1153, "step": 20875 }, { "epoch": 1.418671015083571, "grad_norm": 2.2510104179382324, "learning_rate": 8.22734067128686e-05, "loss": 3.2287, "step": 20880 }, { "epoch": 1.419010735154233, "grad_norm": 1.987746238708496, "learning_rate": 8.226916021198533e-05, "loss": 3.0724, "step": 20885 }, { "epoch": 1.4193504552248948, "grad_norm": 2.268430471420288, "learning_rate": 8.226491371110206e-05, "loss": 3.189, "step": 20890 }, { "epoch": 1.4196901752955564, "grad_norm": 2.629068374633789, "learning_rate": 8.226066721021878e-05, "loss": 3.389, "step": 20895 }, { "epoch": 1.4200298953662183, "grad_norm": 2.505136489868164, "learning_rate": 8.225642070933551e-05, "loss": 3.2129, "step": 20900 }, { "epoch": 1.4203696154368801, "grad_norm": 2.6550087928771973, "learning_rate": 8.225217420845224e-05, "loss": 2.8335, "step": 20905 }, { "epoch": 1.4207093355075417, "grad_norm": 2.877295732498169, "learning_rate": 8.224792770756897e-05, "loss": 3.2846, "step": 20910 }, { "epoch": 1.4210490555782036, "grad_norm": 1.8921520709991455, "learning_rate": 8.22436812066857e-05, "loss": 3.0472, "step": 20915 }, { "epoch": 1.4213887756488655, "grad_norm": 2.480560779571533, "learning_rate": 8.223943470580242e-05, "loss": 3.3146, "step": 20920 }, { "epoch": 1.421728495719527, "grad_norm": 2.417854070663452, "learning_rate": 8.223518820491915e-05, "loss": 3.1723, "step": 20925 }, { "epoch": 1.422068215790189, "grad_norm": 2.013273000717163, "learning_rate": 8.223094170403588e-05, "loss": 3.1093, "step": 20930 }, { "epoch": 1.4224079358608508, "grad_norm": 2.351437568664551, "learning_rate": 8.222669520315261e-05, "loss": 3.1184, "step": 20935 }, { "epoch": 1.4227476559315124, "grad_norm": 2.3020803928375244, "learning_rate": 8.222244870226934e-05, "loss": 2.9524, "step": 20940 }, { "epoch": 1.4230873760021743, "grad_norm": 2.2138304710388184, "learning_rate": 8.221820220138606e-05, "loss": 2.9494, "step": 20945 }, { "epoch": 1.423427096072836, "grad_norm": 3.2288293838500977, "learning_rate": 8.221395570050279e-05, "loss": 3.0482, "step": 20950 }, { "epoch": 1.4237668161434978, "grad_norm": 2.048159599304199, "learning_rate": 8.220970919961952e-05, "loss": 3.3927, "step": 20955 }, { "epoch": 1.4241065362141596, "grad_norm": 2.743596315383911, "learning_rate": 8.220546269873625e-05, "loss": 3.2629, "step": 20960 }, { "epoch": 1.4244462562848212, "grad_norm": 1.9155710935592651, "learning_rate": 8.220121619785298e-05, "loss": 3.0751, "step": 20965 }, { "epoch": 1.424785976355483, "grad_norm": 2.3849992752075195, "learning_rate": 8.21969696969697e-05, "loss": 3.319, "step": 20970 }, { "epoch": 1.4251256964261447, "grad_norm": 2.19162917137146, "learning_rate": 8.219272319608643e-05, "loss": 2.991, "step": 20975 }, { "epoch": 1.4254654164968066, "grad_norm": 2.237793445587158, "learning_rate": 8.218847669520315e-05, "loss": 3.31, "step": 20980 }, { "epoch": 1.4258051365674684, "grad_norm": 2.530552625656128, "learning_rate": 8.218423019431989e-05, "loss": 3.1369, "step": 20985 }, { "epoch": 1.42614485663813, "grad_norm": 3.318754196166992, "learning_rate": 8.217998369343662e-05, "loss": 2.9894, "step": 20990 }, { "epoch": 1.426484576708792, "grad_norm": 2.0185229778289795, "learning_rate": 8.217573719255333e-05, "loss": 2.8334, "step": 20995 }, { "epoch": 1.4268242967794538, "grad_norm": 2.3050625324249268, "learning_rate": 8.217149069167007e-05, "loss": 3.1339, "step": 21000 }, { "epoch": 1.4271640168501154, "grad_norm": 2.322265148162842, "learning_rate": 8.21672441907868e-05, "loss": 3.2718, "step": 21005 }, { "epoch": 1.4275037369207773, "grad_norm": 2.462761878967285, "learning_rate": 8.216299768990352e-05, "loss": 3.1209, "step": 21010 }, { "epoch": 1.427843456991439, "grad_norm": 2.065742015838623, "learning_rate": 8.215875118902026e-05, "loss": 3.293, "step": 21015 }, { "epoch": 1.4281831770621007, "grad_norm": 2.6446399688720703, "learning_rate": 8.215450468813698e-05, "loss": 2.9724, "step": 21020 }, { "epoch": 1.4285228971327626, "grad_norm": 2.3302907943725586, "learning_rate": 8.21502581872537e-05, "loss": 3.1505, "step": 21025 }, { "epoch": 1.4288626172034244, "grad_norm": 2.2137393951416016, "learning_rate": 8.214601168637044e-05, "loss": 3.2382, "step": 21030 }, { "epoch": 1.429202337274086, "grad_norm": 2.144469976425171, "learning_rate": 8.214176518548717e-05, "loss": 3.1081, "step": 21035 }, { "epoch": 1.429542057344748, "grad_norm": 2.4748640060424805, "learning_rate": 8.213751868460388e-05, "loss": 3.3355, "step": 21040 }, { "epoch": 1.4298817774154098, "grad_norm": 2.0508370399475098, "learning_rate": 8.213327218372062e-05, "loss": 3.1559, "step": 21045 }, { "epoch": 1.4302214974860714, "grad_norm": 2.167984962463379, "learning_rate": 8.212902568283735e-05, "loss": 3.2638, "step": 21050 }, { "epoch": 1.4305612175567333, "grad_norm": 2.3773393630981445, "learning_rate": 8.212477918195407e-05, "loss": 3.3355, "step": 21055 }, { "epoch": 1.4309009376273951, "grad_norm": 2.486272096633911, "learning_rate": 8.212053268107081e-05, "loss": 3.1343, "step": 21060 }, { "epoch": 1.4312406576980568, "grad_norm": 1.8494338989257812, "learning_rate": 8.211628618018752e-05, "loss": 3.2411, "step": 21065 }, { "epoch": 1.4315803777687186, "grad_norm": 2.4917538166046143, "learning_rate": 8.211203967930425e-05, "loss": 3.0116, "step": 21070 }, { "epoch": 1.4319200978393805, "grad_norm": 2.3786776065826416, "learning_rate": 8.210779317842099e-05, "loss": 3.0055, "step": 21075 }, { "epoch": 1.432259817910042, "grad_norm": 2.315488576889038, "learning_rate": 8.210354667753771e-05, "loss": 3.062, "step": 21080 }, { "epoch": 1.432599537980704, "grad_norm": 2.857086181640625, "learning_rate": 8.209930017665444e-05, "loss": 3.2804, "step": 21085 }, { "epoch": 1.4329392580513658, "grad_norm": 2.245039701461792, "learning_rate": 8.209505367577118e-05, "loss": 3.0896, "step": 21090 }, { "epoch": 1.4332789781220274, "grad_norm": 2.178875684738159, "learning_rate": 8.209080717488789e-05, "loss": 3.0292, "step": 21095 }, { "epoch": 1.4336186981926893, "grad_norm": 2.464405059814453, "learning_rate": 8.208656067400462e-05, "loss": 2.8995, "step": 21100 }, { "epoch": 1.4339584182633511, "grad_norm": 2.1630334854125977, "learning_rate": 8.208231417312136e-05, "loss": 3.0116, "step": 21105 }, { "epoch": 1.4342981383340128, "grad_norm": 1.864648461341858, "learning_rate": 8.207806767223808e-05, "loss": 3.1695, "step": 21110 }, { "epoch": 1.4346378584046746, "grad_norm": 2.046037197113037, "learning_rate": 8.20738211713548e-05, "loss": 3.0734, "step": 21115 }, { "epoch": 1.4349775784753362, "grad_norm": 2.4201462268829346, "learning_rate": 8.206957467047154e-05, "loss": 3.2512, "step": 21120 }, { "epoch": 1.435317298545998, "grad_norm": 1.9256389141082764, "learning_rate": 8.206532816958826e-05, "loss": 3.1762, "step": 21125 }, { "epoch": 1.43565701861666, "grad_norm": 2.251685619354248, "learning_rate": 8.206108166870499e-05, "loss": 3.1873, "step": 21130 }, { "epoch": 1.4359967386873216, "grad_norm": 1.899512767791748, "learning_rate": 8.205683516782172e-05, "loss": 3.2561, "step": 21135 }, { "epoch": 1.4363364587579834, "grad_norm": 2.435127019882202, "learning_rate": 8.205258866693844e-05, "loss": 3.1973, "step": 21140 }, { "epoch": 1.436676178828645, "grad_norm": 2.2361338138580322, "learning_rate": 8.204834216605517e-05, "loss": 3.2118, "step": 21145 }, { "epoch": 1.437015898899307, "grad_norm": 2.7010786533355713, "learning_rate": 8.20440956651719e-05, "loss": 3.0619, "step": 21150 }, { "epoch": 1.4373556189699688, "grad_norm": 1.9589693546295166, "learning_rate": 8.203984916428863e-05, "loss": 3.0892, "step": 21155 }, { "epoch": 1.4376953390406304, "grad_norm": 2.694336414337158, "learning_rate": 8.203560266340536e-05, "loss": 2.9059, "step": 21160 }, { "epoch": 1.4380350591112923, "grad_norm": 3.054337739944458, "learning_rate": 8.203135616252208e-05, "loss": 3.14, "step": 21165 }, { "epoch": 1.438374779181954, "grad_norm": 2.087423086166382, "learning_rate": 8.202710966163881e-05, "loss": 3.5099, "step": 21170 }, { "epoch": 1.4387144992526157, "grad_norm": 2.5977048873901367, "learning_rate": 8.202286316075554e-05, "loss": 3.15, "step": 21175 }, { "epoch": 1.4390542193232776, "grad_norm": 1.7532145977020264, "learning_rate": 8.201861665987227e-05, "loss": 3.0722, "step": 21180 }, { "epoch": 1.4393939393939394, "grad_norm": 2.2247447967529297, "learning_rate": 8.2014370158989e-05, "loss": 3.2625, "step": 21185 }, { "epoch": 1.439733659464601, "grad_norm": 2.6481752395629883, "learning_rate": 8.201012365810572e-05, "loss": 3.0951, "step": 21190 }, { "epoch": 1.440073379535263, "grad_norm": 2.276188850402832, "learning_rate": 8.200587715722245e-05, "loss": 3.1992, "step": 21195 }, { "epoch": 1.4404130996059248, "grad_norm": 2.1375932693481445, "learning_rate": 8.200163065633918e-05, "loss": 3.1335, "step": 21200 }, { "epoch": 1.4407528196765864, "grad_norm": 2.505671262741089, "learning_rate": 8.199738415545591e-05, "loss": 3.4725, "step": 21205 }, { "epoch": 1.4410925397472483, "grad_norm": 2.0956218242645264, "learning_rate": 8.199313765457264e-05, "loss": 3.1477, "step": 21210 }, { "epoch": 1.4414322598179101, "grad_norm": 1.99732506275177, "learning_rate": 8.198889115368936e-05, "loss": 3.3213, "step": 21215 }, { "epoch": 1.4417719798885718, "grad_norm": 2.257024049758911, "learning_rate": 8.198464465280609e-05, "loss": 3.2751, "step": 21220 }, { "epoch": 1.4421116999592336, "grad_norm": 2.451585054397583, "learning_rate": 8.198039815192282e-05, "loss": 3.1127, "step": 21225 }, { "epoch": 1.4424514200298955, "grad_norm": 2.5541532039642334, "learning_rate": 8.197615165103955e-05, "loss": 3.1756, "step": 21230 }, { "epoch": 1.442791140100557, "grad_norm": 2.9582409858703613, "learning_rate": 8.197190515015628e-05, "loss": 3.1656, "step": 21235 }, { "epoch": 1.443130860171219, "grad_norm": 2.2966880798339844, "learning_rate": 8.1967658649273e-05, "loss": 3.0335, "step": 21240 }, { "epoch": 1.4434705802418808, "grad_norm": 2.2744967937469482, "learning_rate": 8.196341214838973e-05, "loss": 3.3246, "step": 21245 }, { "epoch": 1.4438103003125424, "grad_norm": 2.4437222480773926, "learning_rate": 8.195916564750646e-05, "loss": 3.0277, "step": 21250 }, { "epoch": 1.4441500203832043, "grad_norm": 2.6923649311065674, "learning_rate": 8.195491914662319e-05, "loss": 3.235, "step": 21255 }, { "epoch": 1.4444897404538661, "grad_norm": 2.1755564212799072, "learning_rate": 8.195067264573992e-05, "loss": 3.018, "step": 21260 }, { "epoch": 1.4448294605245278, "grad_norm": 2.3752338886260986, "learning_rate": 8.194642614485664e-05, "loss": 2.8705, "step": 21265 }, { "epoch": 1.4451691805951896, "grad_norm": 2.1604950428009033, "learning_rate": 8.194217964397337e-05, "loss": 3.0399, "step": 21270 }, { "epoch": 1.4455089006658515, "grad_norm": 3.874286413192749, "learning_rate": 8.19379331430901e-05, "loss": 2.9547, "step": 21275 }, { "epoch": 1.445848620736513, "grad_norm": 2.116117238998413, "learning_rate": 8.193368664220683e-05, "loss": 2.9006, "step": 21280 }, { "epoch": 1.446188340807175, "grad_norm": 2.368436574935913, "learning_rate": 8.192944014132356e-05, "loss": 3.2923, "step": 21285 }, { "epoch": 1.4465280608778366, "grad_norm": 1.881812572479248, "learning_rate": 8.192519364044028e-05, "loss": 3.4516, "step": 21290 }, { "epoch": 1.4468677809484984, "grad_norm": 2.399817943572998, "learning_rate": 8.192094713955701e-05, "loss": 3.0193, "step": 21295 }, { "epoch": 1.4472075010191603, "grad_norm": 2.359391450881958, "learning_rate": 8.191670063867374e-05, "loss": 3.1371, "step": 21300 }, { "epoch": 1.447547221089822, "grad_norm": 2.470081090927124, "learning_rate": 8.191245413779047e-05, "loss": 3.0367, "step": 21305 }, { "epoch": 1.4478869411604838, "grad_norm": 2.8054094314575195, "learning_rate": 8.19082076369072e-05, "loss": 3.0233, "step": 21310 }, { "epoch": 1.4482266612311454, "grad_norm": 2.9361116886138916, "learning_rate": 8.190396113602392e-05, "loss": 3.1115, "step": 21315 }, { "epoch": 1.4485663813018073, "grad_norm": 2.272947072982788, "learning_rate": 8.189971463514065e-05, "loss": 3.0345, "step": 21320 }, { "epoch": 1.4489061013724691, "grad_norm": 2.72670841217041, "learning_rate": 8.189546813425738e-05, "loss": 3.2559, "step": 21325 }, { "epoch": 1.4492458214431307, "grad_norm": 2.041247606277466, "learning_rate": 8.189122163337411e-05, "loss": 3.1027, "step": 21330 }, { "epoch": 1.4495855415137926, "grad_norm": 2.2772579193115234, "learning_rate": 8.188697513249082e-05, "loss": 3.3037, "step": 21335 }, { "epoch": 1.4499252615844545, "grad_norm": 1.917273998260498, "learning_rate": 8.188272863160756e-05, "loss": 3.0677, "step": 21340 }, { "epoch": 1.450264981655116, "grad_norm": 2.3226027488708496, "learning_rate": 8.187848213072429e-05, "loss": 3.3472, "step": 21345 }, { "epoch": 1.450604701725778, "grad_norm": 4.2957963943481445, "learning_rate": 8.1874235629841e-05, "loss": 3.1524, "step": 21350 }, { "epoch": 1.4509444217964398, "grad_norm": 2.567692518234253, "learning_rate": 8.186998912895775e-05, "loss": 3.0993, "step": 21355 }, { "epoch": 1.4512841418671014, "grad_norm": 2.390002489089966, "learning_rate": 8.186574262807448e-05, "loss": 3.1362, "step": 21360 }, { "epoch": 1.4516238619377633, "grad_norm": 2.3773672580718994, "learning_rate": 8.186149612719119e-05, "loss": 3.0634, "step": 21365 }, { "epoch": 1.4519635820084251, "grad_norm": 2.0547966957092285, "learning_rate": 8.185724962630793e-05, "loss": 3.5409, "step": 21370 }, { "epoch": 1.4523033020790868, "grad_norm": 1.9661675691604614, "learning_rate": 8.185300312542466e-05, "loss": 3.1074, "step": 21375 }, { "epoch": 1.4526430221497486, "grad_norm": 2.310188055038452, "learning_rate": 8.184875662454137e-05, "loss": 2.8887, "step": 21380 }, { "epoch": 1.4529827422204105, "grad_norm": 2.3605878353118896, "learning_rate": 8.184451012365812e-05, "loss": 3.3244, "step": 21385 }, { "epoch": 1.453322462291072, "grad_norm": 2.394765615463257, "learning_rate": 8.184026362277484e-05, "loss": 3.183, "step": 21390 }, { "epoch": 1.453662182361734, "grad_norm": 2.571427822113037, "learning_rate": 8.183601712189156e-05, "loss": 2.8497, "step": 21395 }, { "epoch": 1.4540019024323958, "grad_norm": 2.530102252960205, "learning_rate": 8.18317706210083e-05, "loss": 3.0503, "step": 21400 }, { "epoch": 1.4543416225030574, "grad_norm": 1.9617499113082886, "learning_rate": 8.182752412012501e-05, "loss": 3.2468, "step": 21405 }, { "epoch": 1.4546813425737193, "grad_norm": 2.385876178741455, "learning_rate": 8.182327761924174e-05, "loss": 3.2283, "step": 21410 }, { "epoch": 1.4550210626443811, "grad_norm": 2.9865622520446777, "learning_rate": 8.181903111835848e-05, "loss": 3.188, "step": 21415 }, { "epoch": 1.4553607827150428, "grad_norm": 2.6668357849121094, "learning_rate": 8.18147846174752e-05, "loss": 2.996, "step": 21420 }, { "epoch": 1.4557005027857046, "grad_norm": 2.6005356311798096, "learning_rate": 8.181053811659193e-05, "loss": 3.0422, "step": 21425 }, { "epoch": 1.4560402228563665, "grad_norm": 3.0199570655822754, "learning_rate": 8.180629161570867e-05, "loss": 3.1721, "step": 21430 }, { "epoch": 1.456379942927028, "grad_norm": 2.1057827472686768, "learning_rate": 8.180204511482538e-05, "loss": 2.9194, "step": 21435 }, { "epoch": 1.45671966299769, "grad_norm": 2.74001145362854, "learning_rate": 8.179779861394211e-05, "loss": 3.3322, "step": 21440 }, { "epoch": 1.4570593830683518, "grad_norm": 2.039435386657715, "learning_rate": 8.179355211305885e-05, "loss": 3.056, "step": 21445 }, { "epoch": 1.4573991031390134, "grad_norm": 1.9968096017837524, "learning_rate": 8.178930561217557e-05, "loss": 3.4177, "step": 21450 }, { "epoch": 1.4577388232096753, "grad_norm": 2.1837856769561768, "learning_rate": 8.17850591112923e-05, "loss": 3.2066, "step": 21455 }, { "epoch": 1.458078543280337, "grad_norm": 2.1833746433258057, "learning_rate": 8.178081261040904e-05, "loss": 2.9974, "step": 21460 }, { "epoch": 1.4584182633509988, "grad_norm": 2.6582283973693848, "learning_rate": 8.177656610952575e-05, "loss": 3.3573, "step": 21465 }, { "epoch": 1.4587579834216606, "grad_norm": 1.6959004402160645, "learning_rate": 8.177231960864248e-05, "loss": 3.0079, "step": 21470 }, { "epoch": 1.4590977034923223, "grad_norm": 2.3994617462158203, "learning_rate": 8.176807310775922e-05, "loss": 2.9953, "step": 21475 }, { "epoch": 1.4594374235629841, "grad_norm": 2.4883947372436523, "learning_rate": 8.176382660687593e-05, "loss": 3.2973, "step": 21480 }, { "epoch": 1.4597771436336457, "grad_norm": 2.384650945663452, "learning_rate": 8.175958010599266e-05, "loss": 3.1889, "step": 21485 }, { "epoch": 1.4601168637043076, "grad_norm": 3.0406787395477295, "learning_rate": 8.175533360510939e-05, "loss": 3.3719, "step": 21490 }, { "epoch": 1.4604565837749695, "grad_norm": 2.534827947616577, "learning_rate": 8.175108710422612e-05, "loss": 3.3862, "step": 21495 }, { "epoch": 1.460796303845631, "grad_norm": 2.1604185104370117, "learning_rate": 8.174684060334285e-05, "loss": 2.8296, "step": 21500 }, { "epoch": 1.461136023916293, "grad_norm": 2.440549373626709, "learning_rate": 8.174259410245957e-05, "loss": 3.0673, "step": 21505 }, { "epoch": 1.4614757439869548, "grad_norm": 2.082812786102295, "learning_rate": 8.17383476015763e-05, "loss": 3.3464, "step": 21510 }, { "epoch": 1.4618154640576164, "grad_norm": 2.5672848224639893, "learning_rate": 8.173410110069303e-05, "loss": 3.2632, "step": 21515 }, { "epoch": 1.4621551841282783, "grad_norm": 3.2514288425445557, "learning_rate": 8.172985459980976e-05, "loss": 2.9521, "step": 21520 }, { "epoch": 1.4624949041989401, "grad_norm": 2.345829725265503, "learning_rate": 8.172560809892649e-05, "loss": 3.366, "step": 21525 }, { "epoch": 1.4628346242696018, "grad_norm": 2.612558364868164, "learning_rate": 8.172136159804321e-05, "loss": 3.0705, "step": 21530 }, { "epoch": 1.4631743443402636, "grad_norm": 2.7469096183776855, "learning_rate": 8.171711509715994e-05, "loss": 3.1686, "step": 21535 }, { "epoch": 1.4635140644109255, "grad_norm": 3.039041042327881, "learning_rate": 8.171286859627667e-05, "loss": 3.1072, "step": 21540 }, { "epoch": 1.463853784481587, "grad_norm": 2.201322317123413, "learning_rate": 8.17086220953934e-05, "loss": 2.9188, "step": 21545 }, { "epoch": 1.464193504552249, "grad_norm": 2.55010986328125, "learning_rate": 8.170437559451013e-05, "loss": 3.3439, "step": 21550 }, { "epoch": 1.4645332246229108, "grad_norm": 2.818606376647949, "learning_rate": 8.170012909362685e-05, "loss": 3.0044, "step": 21555 }, { "epoch": 1.4648729446935724, "grad_norm": 1.8057218790054321, "learning_rate": 8.169588259274358e-05, "loss": 3.2332, "step": 21560 }, { "epoch": 1.4652126647642343, "grad_norm": 2.095020055770874, "learning_rate": 8.169163609186031e-05, "loss": 3.3442, "step": 21565 }, { "epoch": 1.4655523848348961, "grad_norm": 2.3326210975646973, "learning_rate": 8.168738959097704e-05, "loss": 3.3416, "step": 21570 }, { "epoch": 1.4658921049055578, "grad_norm": 2.106870412826538, "learning_rate": 8.168314309009377e-05, "loss": 3.3424, "step": 21575 }, { "epoch": 1.4662318249762196, "grad_norm": 2.719245672225952, "learning_rate": 8.16788965892105e-05, "loss": 3.5093, "step": 21580 }, { "epoch": 1.4665715450468815, "grad_norm": 2.0219976902008057, "learning_rate": 8.167465008832722e-05, "loss": 3.1554, "step": 21585 }, { "epoch": 1.466911265117543, "grad_norm": 1.8291354179382324, "learning_rate": 8.167040358744395e-05, "loss": 3.1183, "step": 21590 }, { "epoch": 1.467250985188205, "grad_norm": 2.2609548568725586, "learning_rate": 8.166615708656068e-05, "loss": 3.1338, "step": 21595 }, { "epoch": 1.4675907052588668, "grad_norm": 2.5385022163391113, "learning_rate": 8.16619105856774e-05, "loss": 3.255, "step": 21600 }, { "epoch": 1.4679304253295284, "grad_norm": 2.0754313468933105, "learning_rate": 8.165766408479413e-05, "loss": 3.213, "step": 21605 }, { "epoch": 1.4682701454001903, "grad_norm": 2.188455581665039, "learning_rate": 8.165341758391086e-05, "loss": 3.3053, "step": 21610 }, { "epoch": 1.4686098654708521, "grad_norm": 2.0832107067108154, "learning_rate": 8.164917108302759e-05, "loss": 3.0363, "step": 21615 }, { "epoch": 1.4689495855415138, "grad_norm": 2.5153415203094482, "learning_rate": 8.164492458214432e-05, "loss": 3.1454, "step": 21620 }, { "epoch": 1.4692893056121756, "grad_norm": 2.162707805633545, "learning_rate": 8.164067808126105e-05, "loss": 2.8139, "step": 21625 }, { "epoch": 1.4696290256828373, "grad_norm": 2.1490602493286133, "learning_rate": 8.163643158037777e-05, "loss": 2.9023, "step": 21630 }, { "epoch": 1.4699687457534991, "grad_norm": 2.539289712905884, "learning_rate": 8.16321850794945e-05, "loss": 3.2756, "step": 21635 }, { "epoch": 1.470308465824161, "grad_norm": 2.5845298767089844, "learning_rate": 8.162793857861123e-05, "loss": 2.9754, "step": 21640 }, { "epoch": 1.4706481858948226, "grad_norm": 2.398932456970215, "learning_rate": 8.162369207772796e-05, "loss": 3.0342, "step": 21645 }, { "epoch": 1.4709879059654845, "grad_norm": 2.4456968307495117, "learning_rate": 8.161944557684469e-05, "loss": 3.1146, "step": 21650 }, { "epoch": 1.471327626036146, "grad_norm": 2.595708131790161, "learning_rate": 8.161519907596141e-05, "loss": 3.1591, "step": 21655 }, { "epoch": 1.471667346106808, "grad_norm": 2.0897984504699707, "learning_rate": 8.161095257507814e-05, "loss": 2.8923, "step": 21660 }, { "epoch": 1.4720070661774698, "grad_norm": 2.726308822631836, "learning_rate": 8.160670607419487e-05, "loss": 3.2104, "step": 21665 }, { "epoch": 1.4723467862481314, "grad_norm": 2.102151870727539, "learning_rate": 8.16024595733116e-05, "loss": 3.3072, "step": 21670 }, { "epoch": 1.4726865063187933, "grad_norm": 2.1287736892700195, "learning_rate": 8.159821307242833e-05, "loss": 3.0793, "step": 21675 }, { "epoch": 1.4730262263894551, "grad_norm": 2.0891668796539307, "learning_rate": 8.159396657154505e-05, "loss": 3.0618, "step": 21680 }, { "epoch": 1.4733659464601168, "grad_norm": 2.176342248916626, "learning_rate": 8.158972007066178e-05, "loss": 3.0817, "step": 21685 }, { "epoch": 1.4737056665307786, "grad_norm": 2.1339001655578613, "learning_rate": 8.15854735697785e-05, "loss": 3.0742, "step": 21690 }, { "epoch": 1.4740453866014405, "grad_norm": 2.301198720932007, "learning_rate": 8.158122706889524e-05, "loss": 3.2908, "step": 21695 }, { "epoch": 1.474385106672102, "grad_norm": 2.0960190296173096, "learning_rate": 8.157698056801197e-05, "loss": 3.1484, "step": 21700 }, { "epoch": 1.474724826742764, "grad_norm": 2.0909488201141357, "learning_rate": 8.157273406712868e-05, "loss": 3.017, "step": 21705 }, { "epoch": 1.4750645468134258, "grad_norm": 2.356675386428833, "learning_rate": 8.156848756624542e-05, "loss": 3.129, "step": 21710 }, { "epoch": 1.4754042668840874, "grad_norm": 1.9985225200653076, "learning_rate": 8.156424106536215e-05, "loss": 3.2561, "step": 21715 }, { "epoch": 1.4757439869547493, "grad_norm": 2.045762300491333, "learning_rate": 8.155999456447887e-05, "loss": 3.1774, "step": 21720 }, { "epoch": 1.4760837070254111, "grad_norm": 2.750842809677124, "learning_rate": 8.155574806359561e-05, "loss": 3.03, "step": 21725 }, { "epoch": 1.4764234270960728, "grad_norm": 3.570111036300659, "learning_rate": 8.155150156271233e-05, "loss": 3.1465, "step": 21730 }, { "epoch": 1.4767631471667346, "grad_norm": 2.482123851776123, "learning_rate": 8.154725506182905e-05, "loss": 2.95, "step": 21735 }, { "epoch": 1.4771028672373965, "grad_norm": 2.1936609745025635, "learning_rate": 8.154300856094579e-05, "loss": 3.1092, "step": 21740 }, { "epoch": 1.477442587308058, "grad_norm": 2.47110652923584, "learning_rate": 8.153876206006252e-05, "loss": 2.981, "step": 21745 }, { "epoch": 1.47778230737872, "grad_norm": 2.4511544704437256, "learning_rate": 8.153451555917923e-05, "loss": 3.1318, "step": 21750 }, { "epoch": 1.4781220274493818, "grad_norm": 1.763468861579895, "learning_rate": 8.153026905829597e-05, "loss": 3.129, "step": 21755 }, { "epoch": 1.4784617475200434, "grad_norm": 2.098017454147339, "learning_rate": 8.152602255741269e-05, "loss": 3.0641, "step": 21760 }, { "epoch": 1.4788014675907053, "grad_norm": 2.7241034507751465, "learning_rate": 8.152177605652942e-05, "loss": 3.5035, "step": 21765 }, { "epoch": 1.4791411876613672, "grad_norm": 2.263162851333618, "learning_rate": 8.151752955564616e-05, "loss": 3.1498, "step": 21770 }, { "epoch": 1.4794809077320288, "grad_norm": 2.13037109375, "learning_rate": 8.151328305476287e-05, "loss": 3.1854, "step": 21775 }, { "epoch": 1.4798206278026906, "grad_norm": 1.8671108484268188, "learning_rate": 8.15090365538796e-05, "loss": 2.9688, "step": 21780 }, { "epoch": 1.4801603478733525, "grad_norm": 2.0200414657592773, "learning_rate": 8.150479005299634e-05, "loss": 3.3353, "step": 21785 }, { "epoch": 1.4805000679440141, "grad_norm": 2.113217830657959, "learning_rate": 8.150054355211306e-05, "loss": 3.2097, "step": 21790 }, { "epoch": 1.480839788014676, "grad_norm": 2.640662670135498, "learning_rate": 8.149629705122979e-05, "loss": 3.2542, "step": 21795 }, { "epoch": 1.4811795080853376, "grad_norm": 2.647900104522705, "learning_rate": 8.149205055034653e-05, "loss": 3.222, "step": 21800 }, { "epoch": 1.4815192281559995, "grad_norm": 1.6794424057006836, "learning_rate": 8.148780404946324e-05, "loss": 3.3978, "step": 21805 }, { "epoch": 1.4818589482266613, "grad_norm": 2.296631336212158, "learning_rate": 8.148355754857997e-05, "loss": 3.2332, "step": 21810 }, { "epoch": 1.482198668297323, "grad_norm": 3.1611526012420654, "learning_rate": 8.147931104769671e-05, "loss": 3.327, "step": 21815 }, { "epoch": 1.4825383883679848, "grad_norm": 1.8363741636276245, "learning_rate": 8.147506454681343e-05, "loss": 2.8675, "step": 21820 }, { "epoch": 1.4828781084386464, "grad_norm": 2.590754747390747, "learning_rate": 8.147081804593015e-05, "loss": 3.4195, "step": 21825 }, { "epoch": 1.4832178285093083, "grad_norm": 2.1619303226470947, "learning_rate": 8.146657154504688e-05, "loss": 3.1473, "step": 21830 }, { "epoch": 1.4835575485799701, "grad_norm": 2.6810085773468018, "learning_rate": 8.146232504416361e-05, "loss": 3.3049, "step": 21835 }, { "epoch": 1.4838972686506318, "grad_norm": 2.0129213333129883, "learning_rate": 8.145807854328034e-05, "loss": 3.0352, "step": 21840 }, { "epoch": 1.4842369887212936, "grad_norm": 2.7097628116607666, "learning_rate": 8.145383204239707e-05, "loss": 3.0395, "step": 21845 }, { "epoch": 1.4845767087919555, "grad_norm": 2.8040475845336914, "learning_rate": 8.14495855415138e-05, "loss": 3.0627, "step": 21850 }, { "epoch": 1.484916428862617, "grad_norm": 3.376140832901001, "learning_rate": 8.144533904063052e-05, "loss": 3.2558, "step": 21855 }, { "epoch": 1.485256148933279, "grad_norm": 2.3673391342163086, "learning_rate": 8.144109253974725e-05, "loss": 3.1321, "step": 21860 }, { "epoch": 1.4855958690039408, "grad_norm": 2.124087333679199, "learning_rate": 8.143684603886398e-05, "loss": 3.4394, "step": 21865 }, { "epoch": 1.4859355890746024, "grad_norm": 2.3488471508026123, "learning_rate": 8.14325995379807e-05, "loss": 2.9634, "step": 21870 }, { "epoch": 1.4862753091452643, "grad_norm": 2.1479978561401367, "learning_rate": 8.142835303709743e-05, "loss": 3.0164, "step": 21875 }, { "epoch": 1.4866150292159261, "grad_norm": 2.6838459968566895, "learning_rate": 8.142410653621416e-05, "loss": 3.173, "step": 21880 }, { "epoch": 1.4869547492865878, "grad_norm": 1.7710260152816772, "learning_rate": 8.141986003533089e-05, "loss": 2.8636, "step": 21885 }, { "epoch": 1.4872944693572496, "grad_norm": 2.7834584712982178, "learning_rate": 8.141561353444762e-05, "loss": 2.9539, "step": 21890 }, { "epoch": 1.4876341894279115, "grad_norm": 2.2233357429504395, "learning_rate": 8.141136703356435e-05, "loss": 3.1799, "step": 21895 }, { "epoch": 1.487973909498573, "grad_norm": 3.1467673778533936, "learning_rate": 8.140712053268107e-05, "loss": 2.972, "step": 21900 }, { "epoch": 1.488313629569235, "grad_norm": 2.0675952434539795, "learning_rate": 8.14028740317978e-05, "loss": 3.2404, "step": 21905 }, { "epoch": 1.4886533496398968, "grad_norm": 2.29799485206604, "learning_rate": 8.139862753091453e-05, "loss": 2.9069, "step": 21910 }, { "epoch": 1.4889930697105584, "grad_norm": 2.599933385848999, "learning_rate": 8.139438103003126e-05, "loss": 2.975, "step": 21915 }, { "epoch": 1.4893327897812203, "grad_norm": 2.6207778453826904, "learning_rate": 8.139013452914799e-05, "loss": 2.986, "step": 21920 }, { "epoch": 1.4896725098518822, "grad_norm": 2.5706019401550293, "learning_rate": 8.138588802826471e-05, "loss": 2.9393, "step": 21925 }, { "epoch": 1.4900122299225438, "grad_norm": 2.8636975288391113, "learning_rate": 8.138164152738144e-05, "loss": 3.2465, "step": 21930 }, { "epoch": 1.4903519499932056, "grad_norm": 1.7587151527404785, "learning_rate": 8.137739502649817e-05, "loss": 3.1522, "step": 21935 }, { "epoch": 1.4906916700638675, "grad_norm": 2.4995434284210205, "learning_rate": 8.13731485256149e-05, "loss": 3.1796, "step": 21940 }, { "epoch": 1.4910313901345291, "grad_norm": 2.0465023517608643, "learning_rate": 8.136890202473163e-05, "loss": 3.0539, "step": 21945 }, { "epoch": 1.491371110205191, "grad_norm": 2.617828607559204, "learning_rate": 8.136465552384835e-05, "loss": 3.277, "step": 21950 }, { "epoch": 1.4917108302758528, "grad_norm": 2.4132907390594482, "learning_rate": 8.136040902296508e-05, "loss": 3.1457, "step": 21955 }, { "epoch": 1.4920505503465145, "grad_norm": 1.930677056312561, "learning_rate": 8.135616252208181e-05, "loss": 3.0226, "step": 21960 }, { "epoch": 1.4923902704171763, "grad_norm": 2.4545044898986816, "learning_rate": 8.135191602119854e-05, "loss": 3.0375, "step": 21965 }, { "epoch": 1.492729990487838, "grad_norm": 2.638476610183716, "learning_rate": 8.134766952031527e-05, "loss": 2.9513, "step": 21970 }, { "epoch": 1.4930697105584998, "grad_norm": 2.3739259243011475, "learning_rate": 8.1343423019432e-05, "loss": 3.2368, "step": 21975 }, { "epoch": 1.4934094306291616, "grad_norm": 2.2661991119384766, "learning_rate": 8.133917651854872e-05, "loss": 3.1546, "step": 21980 }, { "epoch": 1.4937491506998233, "grad_norm": 2.974379539489746, "learning_rate": 8.133493001766545e-05, "loss": 2.8179, "step": 21985 }, { "epoch": 1.4940888707704851, "grad_norm": 1.8829962015151978, "learning_rate": 8.133068351678218e-05, "loss": 3.0098, "step": 21990 }, { "epoch": 1.4944285908411468, "grad_norm": 3.5551061630249023, "learning_rate": 8.13264370158989e-05, "loss": 3.0839, "step": 21995 }, { "epoch": 1.4947683109118086, "grad_norm": 2.1558964252471924, "learning_rate": 8.132219051501563e-05, "loss": 3.1354, "step": 22000 }, { "epoch": 1.4951080309824705, "grad_norm": 3.096306562423706, "learning_rate": 8.131794401413236e-05, "loss": 3.4013, "step": 22005 }, { "epoch": 1.495447751053132, "grad_norm": 2.3201558589935303, "learning_rate": 8.131369751324909e-05, "loss": 3.0509, "step": 22010 }, { "epoch": 1.495787471123794, "grad_norm": 2.3089308738708496, "learning_rate": 8.130945101236582e-05, "loss": 3.1965, "step": 22015 }, { "epoch": 1.4961271911944558, "grad_norm": 2.857806444168091, "learning_rate": 8.130520451148255e-05, "loss": 3.2418, "step": 22020 }, { "epoch": 1.4964669112651174, "grad_norm": 1.8063747882843018, "learning_rate": 8.130095801059927e-05, "loss": 3.0518, "step": 22025 }, { "epoch": 1.4968066313357793, "grad_norm": 2.2133724689483643, "learning_rate": 8.129671150971599e-05, "loss": 2.8176, "step": 22030 }, { "epoch": 1.4971463514064411, "grad_norm": 2.3203070163726807, "learning_rate": 8.129246500883273e-05, "loss": 3.3026, "step": 22035 }, { "epoch": 1.4974860714771028, "grad_norm": 2.0370352268218994, "learning_rate": 8.128821850794946e-05, "loss": 3.1038, "step": 22040 }, { "epoch": 1.4978257915477646, "grad_norm": 2.4081084728240967, "learning_rate": 8.128397200706617e-05, "loss": 3.2389, "step": 22045 }, { "epoch": 1.4981655116184265, "grad_norm": 2.214411735534668, "learning_rate": 8.127972550618291e-05, "loss": 3.0048, "step": 22050 }, { "epoch": 1.4985052316890881, "grad_norm": 2.0688064098358154, "learning_rate": 8.127547900529964e-05, "loss": 3.0481, "step": 22055 }, { "epoch": 1.49884495175975, "grad_norm": 2.0312693119049072, "learning_rate": 8.127123250441636e-05, "loss": 3.3044, "step": 22060 }, { "epoch": 1.4991846718304118, "grad_norm": 2.1069529056549072, "learning_rate": 8.12669860035331e-05, "loss": 3.541, "step": 22065 }, { "epoch": 1.4995243919010735, "grad_norm": 2.717766523361206, "learning_rate": 8.126273950264983e-05, "loss": 3.0851, "step": 22070 }, { "epoch": 1.4998641119717353, "grad_norm": 2.5223419666290283, "learning_rate": 8.125849300176654e-05, "loss": 3.2366, "step": 22075 }, { "epoch": 1.5002038320423972, "grad_norm": 2.2510194778442383, "learning_rate": 8.125424650088328e-05, "loss": 3.2999, "step": 22080 }, { "epoch": 1.5005435521130588, "grad_norm": 2.219562530517578, "learning_rate": 8.125000000000001e-05, "loss": 3.0216, "step": 22085 }, { "epoch": 1.5008832721837206, "grad_norm": 2.7108266353607178, "learning_rate": 8.124575349911672e-05, "loss": 3.0231, "step": 22090 }, { "epoch": 1.5012229922543825, "grad_norm": 2.639662265777588, "learning_rate": 8.124150699823347e-05, "loss": 3.0429, "step": 22095 }, { "epoch": 1.5015627123250441, "grad_norm": 3.000920057296753, "learning_rate": 8.12372604973502e-05, "loss": 2.9791, "step": 22100 }, { "epoch": 1.501902432395706, "grad_norm": 2.370986223220825, "learning_rate": 8.123301399646691e-05, "loss": 3.1658, "step": 22105 }, { "epoch": 1.5022421524663678, "grad_norm": 2.4702115058898926, "learning_rate": 8.122876749558365e-05, "loss": 3.2707, "step": 22110 }, { "epoch": 1.5025818725370295, "grad_norm": 3.035630226135254, "learning_rate": 8.122452099470036e-05, "loss": 3.1227, "step": 22115 }, { "epoch": 1.5029215926076913, "grad_norm": 1.9910575151443481, "learning_rate": 8.122027449381709e-05, "loss": 3.0498, "step": 22120 }, { "epoch": 1.5032613126783532, "grad_norm": 2.1701064109802246, "learning_rate": 8.121602799293383e-05, "loss": 3.1122, "step": 22125 }, { "epoch": 1.5036010327490148, "grad_norm": 2.265299081802368, "learning_rate": 8.121178149205055e-05, "loss": 2.9568, "step": 22130 }, { "epoch": 1.5039407528196764, "grad_norm": 2.853564977645874, "learning_rate": 8.120753499116728e-05, "loss": 3.1771, "step": 22135 }, { "epoch": 1.5042804728903385, "grad_norm": 2.3497140407562256, "learning_rate": 8.120328849028402e-05, "loss": 3.2081, "step": 22140 }, { "epoch": 1.5046201929610001, "grad_norm": 2.240844249725342, "learning_rate": 8.119904198940073e-05, "loss": 3.2018, "step": 22145 }, { "epoch": 1.5049599130316618, "grad_norm": 2.842055320739746, "learning_rate": 8.119479548851746e-05, "loss": 3.3145, "step": 22150 }, { "epoch": 1.5052996331023238, "grad_norm": 2.224113941192627, "learning_rate": 8.11905489876342e-05, "loss": 3.1152, "step": 22155 }, { "epoch": 1.5056393531729855, "grad_norm": 2.5725951194763184, "learning_rate": 8.118630248675092e-05, "loss": 3.1976, "step": 22160 }, { "epoch": 1.505979073243647, "grad_norm": 2.4435536861419678, "learning_rate": 8.118205598586764e-05, "loss": 3.2828, "step": 22165 }, { "epoch": 1.506318793314309, "grad_norm": 2.386230945587158, "learning_rate": 8.117780948498439e-05, "loss": 2.9935, "step": 22170 }, { "epoch": 1.5066585133849708, "grad_norm": 2.5827276706695557, "learning_rate": 8.11735629841011e-05, "loss": 3.0572, "step": 22175 }, { "epoch": 1.5069982334556324, "grad_norm": 2.163503885269165, "learning_rate": 8.116931648321783e-05, "loss": 3.0941, "step": 22180 }, { "epoch": 1.5073379535262943, "grad_norm": 1.922644019126892, "learning_rate": 8.116506998233456e-05, "loss": 3.2409, "step": 22185 }, { "epoch": 1.5076776735969561, "grad_norm": 2.7379677295684814, "learning_rate": 8.116082348145128e-05, "loss": 2.8825, "step": 22190 }, { "epoch": 1.5080173936676178, "grad_norm": 2.502066135406494, "learning_rate": 8.115657698056801e-05, "loss": 3.2349, "step": 22195 }, { "epoch": 1.5083571137382796, "grad_norm": 2.060755729675293, "learning_rate": 8.115233047968474e-05, "loss": 3.1501, "step": 22200 }, { "epoch": 1.5086968338089415, "grad_norm": 2.379448413848877, "learning_rate": 8.114808397880147e-05, "loss": 3.3053, "step": 22205 }, { "epoch": 1.5090365538796031, "grad_norm": 2.610377788543701, "learning_rate": 8.11438374779182e-05, "loss": 3.0429, "step": 22210 }, { "epoch": 1.509376273950265, "grad_norm": 12.173897743225098, "learning_rate": 8.113959097703492e-05, "loss": 3.2181, "step": 22215 }, { "epoch": 1.5097159940209268, "grad_norm": 1.9937551021575928, "learning_rate": 8.113534447615165e-05, "loss": 3.0577, "step": 22220 }, { "epoch": 1.5100557140915885, "grad_norm": 2.348818063735962, "learning_rate": 8.113109797526838e-05, "loss": 3.006, "step": 22225 }, { "epoch": 1.5103954341622503, "grad_norm": 2.679379463195801, "learning_rate": 8.112685147438511e-05, "loss": 3.0483, "step": 22230 }, { "epoch": 1.5107351542329122, "grad_norm": 2.253755569458008, "learning_rate": 8.112260497350184e-05, "loss": 2.9632, "step": 22235 }, { "epoch": 1.5110748743035738, "grad_norm": 2.283074378967285, "learning_rate": 8.111835847261856e-05, "loss": 3.1912, "step": 22240 }, { "epoch": 1.5114145943742356, "grad_norm": 1.9697760343551636, "learning_rate": 8.111411197173529e-05, "loss": 2.9747, "step": 22245 }, { "epoch": 1.5117543144448975, "grad_norm": 2.2269363403320312, "learning_rate": 8.110986547085202e-05, "loss": 3.3056, "step": 22250 }, { "epoch": 1.5120940345155591, "grad_norm": 3.104374408721924, "learning_rate": 8.110561896996875e-05, "loss": 3.2387, "step": 22255 }, { "epoch": 1.512433754586221, "grad_norm": 2.219477653503418, "learning_rate": 8.110137246908548e-05, "loss": 2.9768, "step": 22260 }, { "epoch": 1.5127734746568828, "grad_norm": 1.905508041381836, "learning_rate": 8.10971259682022e-05, "loss": 3.1467, "step": 22265 }, { "epoch": 1.5131131947275445, "grad_norm": 2.2341620922088623, "learning_rate": 8.109287946731893e-05, "loss": 3.148, "step": 22270 }, { "epoch": 1.5134529147982063, "grad_norm": 2.146062135696411, "learning_rate": 8.108863296643566e-05, "loss": 2.9786, "step": 22275 }, { "epoch": 1.5137926348688682, "grad_norm": 3.228429079055786, "learning_rate": 8.108438646555239e-05, "loss": 2.9631, "step": 22280 }, { "epoch": 1.5141323549395298, "grad_norm": 2.4490015506744385, "learning_rate": 8.108013996466912e-05, "loss": 3.241, "step": 22285 }, { "epoch": 1.5144720750101917, "grad_norm": 2.3527138233184814, "learning_rate": 8.107589346378584e-05, "loss": 3.1791, "step": 22290 }, { "epoch": 1.5148117950808535, "grad_norm": 2.61240816116333, "learning_rate": 8.107164696290257e-05, "loss": 2.941, "step": 22295 }, { "epoch": 1.5151515151515151, "grad_norm": 2.141897201538086, "learning_rate": 8.10674004620193e-05, "loss": 3.0623, "step": 22300 }, { "epoch": 1.5154912352221768, "grad_norm": 2.0223662853240967, "learning_rate": 8.106315396113603e-05, "loss": 3.4328, "step": 22305 }, { "epoch": 1.5158309552928388, "grad_norm": 2.6018874645233154, "learning_rate": 8.105890746025276e-05, "loss": 3.1, "step": 22310 }, { "epoch": 1.5161706753635005, "grad_norm": 2.465341567993164, "learning_rate": 8.105466095936948e-05, "loss": 3.3714, "step": 22315 }, { "epoch": 1.516510395434162, "grad_norm": 2.1531565189361572, "learning_rate": 8.105041445848621e-05, "loss": 3.2167, "step": 22320 }, { "epoch": 1.5168501155048242, "grad_norm": 2.683344602584839, "learning_rate": 8.104616795760294e-05, "loss": 3.215, "step": 22325 }, { "epoch": 1.5171898355754858, "grad_norm": 2.9042248725891113, "learning_rate": 8.104192145671967e-05, "loss": 3.0666, "step": 22330 }, { "epoch": 1.5175295556461474, "grad_norm": 2.0894615650177, "learning_rate": 8.10376749558364e-05, "loss": 3.2862, "step": 22335 }, { "epoch": 1.5178692757168093, "grad_norm": 2.436842918395996, "learning_rate": 8.103342845495312e-05, "loss": 3.0626, "step": 22340 }, { "epoch": 1.5182089957874711, "grad_norm": 2.159822702407837, "learning_rate": 8.102918195406985e-05, "loss": 3.0873, "step": 22345 }, { "epoch": 1.5185487158581328, "grad_norm": 2.6246209144592285, "learning_rate": 8.102493545318658e-05, "loss": 2.9946, "step": 22350 }, { "epoch": 1.5188884359287946, "grad_norm": 2.0767273902893066, "learning_rate": 8.102068895230331e-05, "loss": 3.0087, "step": 22355 }, { "epoch": 1.5192281559994565, "grad_norm": 2.0464682579040527, "learning_rate": 8.101644245142004e-05, "loss": 3.2402, "step": 22360 }, { "epoch": 1.5195678760701181, "grad_norm": 2.6543307304382324, "learning_rate": 8.101219595053676e-05, "loss": 2.9227, "step": 22365 }, { "epoch": 1.51990759614078, "grad_norm": 2.259063720703125, "learning_rate": 8.100794944965349e-05, "loss": 2.903, "step": 22370 }, { "epoch": 1.5202473162114418, "grad_norm": 2.091684579849243, "learning_rate": 8.100370294877022e-05, "loss": 3.278, "step": 22375 }, { "epoch": 1.5205870362821035, "grad_norm": 2.101658582687378, "learning_rate": 8.099945644788695e-05, "loss": 3.3038, "step": 22380 }, { "epoch": 1.5209267563527653, "grad_norm": 2.079465627670288, "learning_rate": 8.099520994700366e-05, "loss": 3.1543, "step": 22385 }, { "epoch": 1.5212664764234272, "grad_norm": 2.3017818927764893, "learning_rate": 8.09909634461204e-05, "loss": 3.0157, "step": 22390 }, { "epoch": 1.5216061964940888, "grad_norm": 2.272446870803833, "learning_rate": 8.098671694523713e-05, "loss": 3.2408, "step": 22395 }, { "epoch": 1.5219459165647506, "grad_norm": 2.580500602722168, "learning_rate": 8.098247044435385e-05, "loss": 3.3326, "step": 22400 }, { "epoch": 1.5222856366354125, "grad_norm": 3.470601797103882, "learning_rate": 8.097822394347059e-05, "loss": 3.1053, "step": 22405 }, { "epoch": 1.5226253567060741, "grad_norm": 2.161909341812134, "learning_rate": 8.097397744258732e-05, "loss": 3.351, "step": 22410 }, { "epoch": 1.522965076776736, "grad_norm": 1.9601142406463623, "learning_rate": 8.096973094170403e-05, "loss": 3.2543, "step": 22415 }, { "epoch": 1.5233047968473978, "grad_norm": 2.4458138942718506, "learning_rate": 8.096548444082077e-05, "loss": 3.083, "step": 22420 }, { "epoch": 1.5236445169180595, "grad_norm": 1.8223735094070435, "learning_rate": 8.09612379399375e-05, "loss": 3.3305, "step": 22425 }, { "epoch": 1.5239842369887213, "grad_norm": 1.8036235570907593, "learning_rate": 8.095699143905422e-05, "loss": 2.8345, "step": 22430 }, { "epoch": 1.5243239570593832, "grad_norm": 2.5609030723571777, "learning_rate": 8.095274493817096e-05, "loss": 3.0212, "step": 22435 }, { "epoch": 1.5246636771300448, "grad_norm": 2.5375401973724365, "learning_rate": 8.094849843728768e-05, "loss": 3.1805, "step": 22440 }, { "epoch": 1.5250033972007067, "grad_norm": 2.2576138973236084, "learning_rate": 8.09442519364044e-05, "loss": 3.1713, "step": 22445 }, { "epoch": 1.5253431172713685, "grad_norm": 2.494591474533081, "learning_rate": 8.094000543552114e-05, "loss": 2.9757, "step": 22450 }, { "epoch": 1.5256828373420301, "grad_norm": 2.2282874584198, "learning_rate": 8.093575893463786e-05, "loss": 3.1359, "step": 22455 }, { "epoch": 1.526022557412692, "grad_norm": 2.6069905757904053, "learning_rate": 8.093151243375458e-05, "loss": 3.2601, "step": 22460 }, { "epoch": 1.5263622774833538, "grad_norm": 2.0143704414367676, "learning_rate": 8.092726593287132e-05, "loss": 3.2181, "step": 22465 }, { "epoch": 1.5267019975540155, "grad_norm": 2.390014886856079, "learning_rate": 8.092301943198804e-05, "loss": 3.1135, "step": 22470 }, { "epoch": 1.527041717624677, "grad_norm": 2.543194532394409, "learning_rate": 8.091877293110477e-05, "loss": 3.3268, "step": 22475 }, { "epoch": 1.5273814376953392, "grad_norm": 2.4032504558563232, "learning_rate": 8.091452643022151e-05, "loss": 3.155, "step": 22480 }, { "epoch": 1.5277211577660008, "grad_norm": 1.8286172151565552, "learning_rate": 8.091027992933822e-05, "loss": 3.2774, "step": 22485 }, { "epoch": 1.5280608778366624, "grad_norm": 2.742299795150757, "learning_rate": 8.090603342845495e-05, "loss": 3.3042, "step": 22490 }, { "epoch": 1.5284005979073245, "grad_norm": 2.7098631858825684, "learning_rate": 8.090178692757169e-05, "loss": 3.1759, "step": 22495 }, { "epoch": 1.5287403179779862, "grad_norm": 2.730055570602417, "learning_rate": 8.089754042668841e-05, "loss": 3.0796, "step": 22500 }, { "epoch": 1.5290800380486478, "grad_norm": 2.269876480102539, "learning_rate": 8.089329392580514e-05, "loss": 3.0321, "step": 22505 }, { "epoch": 1.5294197581193096, "grad_norm": 2.4020063877105713, "learning_rate": 8.088904742492188e-05, "loss": 3.1242, "step": 22510 }, { "epoch": 1.5297594781899715, "grad_norm": 2.1301727294921875, "learning_rate": 8.088480092403859e-05, "loss": 3.0057, "step": 22515 }, { "epoch": 1.5300991982606331, "grad_norm": 3.005225419998169, "learning_rate": 8.088055442315532e-05, "loss": 2.9744, "step": 22520 }, { "epoch": 1.530438918331295, "grad_norm": 2.101240634918213, "learning_rate": 8.087630792227206e-05, "loss": 3.2834, "step": 22525 }, { "epoch": 1.5307786384019568, "grad_norm": 2.041539430618286, "learning_rate": 8.087206142138878e-05, "loss": 3.1203, "step": 22530 }, { "epoch": 1.5311183584726185, "grad_norm": 2.1744072437286377, "learning_rate": 8.08678149205055e-05, "loss": 3.3011, "step": 22535 }, { "epoch": 1.5314580785432803, "grad_norm": 2.2379226684570312, "learning_rate": 8.086356841962223e-05, "loss": 3.1101, "step": 22540 }, { "epoch": 1.5317977986139422, "grad_norm": 1.8760087490081787, "learning_rate": 8.085932191873896e-05, "loss": 3.0947, "step": 22545 }, { "epoch": 1.5321375186846038, "grad_norm": 2.731261968612671, "learning_rate": 8.085507541785569e-05, "loss": 3.0728, "step": 22550 }, { "epoch": 1.5324772387552656, "grad_norm": 2.160400390625, "learning_rate": 8.085082891697242e-05, "loss": 3.1874, "step": 22555 }, { "epoch": 1.5328169588259275, "grad_norm": 1.7649985551834106, "learning_rate": 8.084658241608914e-05, "loss": 3.207, "step": 22560 }, { "epoch": 1.5331566788965891, "grad_norm": 2.6594948768615723, "learning_rate": 8.084233591520587e-05, "loss": 3.1192, "step": 22565 }, { "epoch": 1.533496398967251, "grad_norm": 2.251199245452881, "learning_rate": 8.08380894143226e-05, "loss": 3.325, "step": 22570 }, { "epoch": 1.5338361190379128, "grad_norm": 2.401714324951172, "learning_rate": 8.083384291343933e-05, "loss": 3.1504, "step": 22575 }, { "epoch": 1.5341758391085745, "grad_norm": 2.979182243347168, "learning_rate": 8.082959641255606e-05, "loss": 3.1405, "step": 22580 }, { "epoch": 1.5345155591792363, "grad_norm": 3.1366515159606934, "learning_rate": 8.082534991167278e-05, "loss": 3.2199, "step": 22585 }, { "epoch": 1.5348552792498982, "grad_norm": 2.713632583618164, "learning_rate": 8.082110341078951e-05, "loss": 3.569, "step": 22590 }, { "epoch": 1.5351949993205598, "grad_norm": 2.5851569175720215, "learning_rate": 8.081685690990624e-05, "loss": 3.4113, "step": 22595 }, { "epoch": 1.5355347193912217, "grad_norm": 2.7169599533081055, "learning_rate": 8.081261040902297e-05, "loss": 3.0817, "step": 22600 }, { "epoch": 1.5358744394618835, "grad_norm": 2.002993583679199, "learning_rate": 8.08083639081397e-05, "loss": 3.1543, "step": 22605 }, { "epoch": 1.5362141595325451, "grad_norm": 2.7861313819885254, "learning_rate": 8.080411740725642e-05, "loss": 3.1094, "step": 22610 }, { "epoch": 1.536553879603207, "grad_norm": 2.2655553817749023, "learning_rate": 8.079987090637315e-05, "loss": 3.4512, "step": 22615 }, { "epoch": 1.5368935996738688, "grad_norm": 2.0050148963928223, "learning_rate": 8.079562440548988e-05, "loss": 3.1571, "step": 22620 }, { "epoch": 1.5372333197445305, "grad_norm": 2.1957623958587646, "learning_rate": 8.079137790460661e-05, "loss": 3.1791, "step": 22625 }, { "epoch": 1.5375730398151923, "grad_norm": 2.1138737201690674, "learning_rate": 8.078713140372334e-05, "loss": 3.1712, "step": 22630 }, { "epoch": 1.5379127598858542, "grad_norm": 2.132114887237549, "learning_rate": 8.078288490284006e-05, "loss": 3.3005, "step": 22635 }, { "epoch": 1.5382524799565158, "grad_norm": 2.1787753105163574, "learning_rate": 8.077863840195679e-05, "loss": 3.1272, "step": 22640 }, { "epoch": 1.5385922000271774, "grad_norm": 2.1454029083251953, "learning_rate": 8.077439190107352e-05, "loss": 3.0683, "step": 22645 }, { "epoch": 1.5389319200978395, "grad_norm": 2.3944082260131836, "learning_rate": 8.077014540019025e-05, "loss": 3.1779, "step": 22650 }, { "epoch": 1.5392716401685012, "grad_norm": 2.1905698776245117, "learning_rate": 8.076589889930698e-05, "loss": 2.9614, "step": 22655 }, { "epoch": 1.5396113602391628, "grad_norm": 2.1102778911590576, "learning_rate": 8.07616523984237e-05, "loss": 3.1994, "step": 22660 }, { "epoch": 1.5399510803098249, "grad_norm": 2.8910844326019287, "learning_rate": 8.075740589754043e-05, "loss": 2.9904, "step": 22665 }, { "epoch": 1.5402908003804865, "grad_norm": 1.9213372468948364, "learning_rate": 8.075315939665716e-05, "loss": 3.1029, "step": 22670 }, { "epoch": 1.5406305204511481, "grad_norm": 2.295098066329956, "learning_rate": 8.074891289577389e-05, "loss": 3.1452, "step": 22675 }, { "epoch": 1.54097024052181, "grad_norm": 2.2256388664245605, "learning_rate": 8.074466639489062e-05, "loss": 3.0605, "step": 22680 }, { "epoch": 1.5413099605924718, "grad_norm": 1.9003127813339233, "learning_rate": 8.074041989400734e-05, "loss": 3.2734, "step": 22685 }, { "epoch": 1.5416496806631335, "grad_norm": 2.4645321369171143, "learning_rate": 8.073617339312407e-05, "loss": 2.8481, "step": 22690 }, { "epoch": 1.5419894007337953, "grad_norm": 2.093108654022217, "learning_rate": 8.07319268922408e-05, "loss": 3.265, "step": 22695 }, { "epoch": 1.5423291208044572, "grad_norm": 2.6915228366851807, "learning_rate": 8.072768039135753e-05, "loss": 2.978, "step": 22700 }, { "epoch": 1.5426688408751188, "grad_norm": 1.692994475364685, "learning_rate": 8.072343389047426e-05, "loss": 3.1569, "step": 22705 }, { "epoch": 1.5430085609457806, "grad_norm": 2.745131015777588, "learning_rate": 8.071918738959098e-05, "loss": 3.1397, "step": 22710 }, { "epoch": 1.5433482810164425, "grad_norm": 2.548003673553467, "learning_rate": 8.071494088870771e-05, "loss": 3.1576, "step": 22715 }, { "epoch": 1.5436880010871041, "grad_norm": 2.31303071975708, "learning_rate": 8.071069438782444e-05, "loss": 2.7908, "step": 22720 }, { "epoch": 1.544027721157766, "grad_norm": 2.012073516845703, "learning_rate": 8.070644788694117e-05, "loss": 3.1712, "step": 22725 }, { "epoch": 1.5443674412284278, "grad_norm": 2.688370943069458, "learning_rate": 8.07022013860579e-05, "loss": 3.3534, "step": 22730 }, { "epoch": 1.5447071612990895, "grad_norm": 2.181748628616333, "learning_rate": 8.069795488517462e-05, "loss": 3.102, "step": 22735 }, { "epoch": 1.5450468813697513, "grad_norm": 1.8749175071716309, "learning_rate": 8.069370838429134e-05, "loss": 3.0715, "step": 22740 }, { "epoch": 1.5453866014404132, "grad_norm": 2.097414016723633, "learning_rate": 8.068946188340808e-05, "loss": 3.0598, "step": 22745 }, { "epoch": 1.5457263215110748, "grad_norm": 2.101708173751831, "learning_rate": 8.068521538252481e-05, "loss": 3.2165, "step": 22750 }, { "epoch": 1.5460660415817367, "grad_norm": 2.3494420051574707, "learning_rate": 8.068096888164152e-05, "loss": 3.1098, "step": 22755 }, { "epoch": 1.5464057616523985, "grad_norm": 2.009627342224121, "learning_rate": 8.067672238075826e-05, "loss": 2.9815, "step": 22760 }, { "epoch": 1.5467454817230601, "grad_norm": 2.034803628921509, "learning_rate": 8.067247587987499e-05, "loss": 3.097, "step": 22765 }, { "epoch": 1.547085201793722, "grad_norm": 2.226534366607666, "learning_rate": 8.06682293789917e-05, "loss": 2.9896, "step": 22770 }, { "epoch": 1.5474249218643839, "grad_norm": 2.5061962604522705, "learning_rate": 8.066398287810845e-05, "loss": 3.2953, "step": 22775 }, { "epoch": 1.5477646419350455, "grad_norm": 2.962066888809204, "learning_rate": 8.065973637722518e-05, "loss": 3.0953, "step": 22780 }, { "epoch": 1.5481043620057073, "grad_norm": 2.5800185203552246, "learning_rate": 8.065548987634189e-05, "loss": 3.2363, "step": 22785 }, { "epoch": 1.5484440820763692, "grad_norm": 2.3481576442718506, "learning_rate": 8.065124337545863e-05, "loss": 3.0598, "step": 22790 }, { "epoch": 1.5487838021470308, "grad_norm": 2.6696126461029053, "learning_rate": 8.064699687457536e-05, "loss": 3.3842, "step": 22795 }, { "epoch": 1.5491235222176927, "grad_norm": 2.1983413696289062, "learning_rate": 8.064275037369207e-05, "loss": 2.857, "step": 22800 }, { "epoch": 1.5494632422883545, "grad_norm": 2.44551420211792, "learning_rate": 8.063850387280882e-05, "loss": 2.9284, "step": 22805 }, { "epoch": 1.5498029623590162, "grad_norm": 2.367021322250366, "learning_rate": 8.063425737192553e-05, "loss": 3.0468, "step": 22810 }, { "epoch": 1.5501426824296778, "grad_norm": 2.2245068550109863, "learning_rate": 8.063001087104226e-05, "loss": 3.1987, "step": 22815 }, { "epoch": 1.5504824025003399, "grad_norm": 1.7932250499725342, "learning_rate": 8.0625764370159e-05, "loss": 3.1474, "step": 22820 }, { "epoch": 1.5508221225710015, "grad_norm": 2.5434980392456055, "learning_rate": 8.062151786927571e-05, "loss": 3.0818, "step": 22825 }, { "epoch": 1.5511618426416631, "grad_norm": 2.8801679611206055, "learning_rate": 8.061727136839244e-05, "loss": 3.3493, "step": 22830 }, { "epoch": 1.5515015627123252, "grad_norm": 2.6044957637786865, "learning_rate": 8.061302486750918e-05, "loss": 3.1845, "step": 22835 }, { "epoch": 1.5518412827829868, "grad_norm": 2.5215535163879395, "learning_rate": 8.06087783666259e-05, "loss": 3.2564, "step": 22840 }, { "epoch": 1.5521810028536485, "grad_norm": 2.5697412490844727, "learning_rate": 8.060453186574263e-05, "loss": 3.2895, "step": 22845 }, { "epoch": 1.5525207229243103, "grad_norm": 1.8007861375808716, "learning_rate": 8.060028536485937e-05, "loss": 3.0228, "step": 22850 }, { "epoch": 1.5528604429949722, "grad_norm": 2.639610528945923, "learning_rate": 8.059603886397608e-05, "loss": 2.9063, "step": 22855 }, { "epoch": 1.5532001630656338, "grad_norm": 2.6378636360168457, "learning_rate": 8.059179236309281e-05, "loss": 3.1427, "step": 22860 }, { "epoch": 1.5535398831362957, "grad_norm": 2.172271728515625, "learning_rate": 8.058754586220955e-05, "loss": 3.2499, "step": 22865 }, { "epoch": 1.5538796032069575, "grad_norm": 2.3454151153564453, "learning_rate": 8.058329936132627e-05, "loss": 3.1086, "step": 22870 }, { "epoch": 1.5542193232776191, "grad_norm": 2.027076244354248, "learning_rate": 8.0579052860443e-05, "loss": 3.1995, "step": 22875 }, { "epoch": 1.554559043348281, "grad_norm": 1.851736307144165, "learning_rate": 8.057480635955972e-05, "loss": 3.1522, "step": 22880 }, { "epoch": 1.5548987634189428, "grad_norm": 2.3592073917388916, "learning_rate": 8.057055985867645e-05, "loss": 3.1417, "step": 22885 }, { "epoch": 1.5552384834896045, "grad_norm": 3.5496418476104736, "learning_rate": 8.056631335779318e-05, "loss": 3.1573, "step": 22890 }, { "epoch": 1.5555782035602663, "grad_norm": 2.280775785446167, "learning_rate": 8.05620668569099e-05, "loss": 3.1556, "step": 22895 }, { "epoch": 1.5559179236309282, "grad_norm": 2.3768093585968018, "learning_rate": 8.055782035602663e-05, "loss": 3.0392, "step": 22900 }, { "epoch": 1.5562576437015898, "grad_norm": 1.823554515838623, "learning_rate": 8.055357385514336e-05, "loss": 3.1332, "step": 22905 }, { "epoch": 1.5565973637722517, "grad_norm": 2.305758237838745, "learning_rate": 8.054932735426009e-05, "loss": 3.0977, "step": 22910 }, { "epoch": 1.5569370838429135, "grad_norm": 2.1599485874176025, "learning_rate": 8.054508085337682e-05, "loss": 3.205, "step": 22915 }, { "epoch": 1.5572768039135751, "grad_norm": 1.9594082832336426, "learning_rate": 8.054083435249355e-05, "loss": 3.1247, "step": 22920 }, { "epoch": 1.557616523984237, "grad_norm": 2.2548611164093018, "learning_rate": 8.053658785161027e-05, "loss": 3.1928, "step": 22925 }, { "epoch": 1.5579562440548989, "grad_norm": 2.1897099018096924, "learning_rate": 8.0532341350727e-05, "loss": 3.0203, "step": 22930 }, { "epoch": 1.5582959641255605, "grad_norm": 2.1525840759277344, "learning_rate": 8.052809484984373e-05, "loss": 3.0101, "step": 22935 }, { "epoch": 1.5586356841962223, "grad_norm": 1.8499841690063477, "learning_rate": 8.052384834896046e-05, "loss": 3.2433, "step": 22940 }, { "epoch": 1.5589754042668842, "grad_norm": 2.2833266258239746, "learning_rate": 8.051960184807719e-05, "loss": 3.015, "step": 22945 }, { "epoch": 1.5593151243375458, "grad_norm": 1.7112709283828735, "learning_rate": 8.051535534719393e-05, "loss": 2.9901, "step": 22950 }, { "epoch": 1.5596548444082077, "grad_norm": 1.9771926403045654, "learning_rate": 8.051110884631064e-05, "loss": 3.0535, "step": 22955 }, { "epoch": 1.5599945644788695, "grad_norm": 2.6309499740600586, "learning_rate": 8.050686234542737e-05, "loss": 3.1528, "step": 22960 }, { "epoch": 1.5603342845495312, "grad_norm": 3.0359010696411133, "learning_rate": 8.05026158445441e-05, "loss": 3.1405, "step": 22965 }, { "epoch": 1.560674004620193, "grad_norm": 2.571286201477051, "learning_rate": 8.049836934366083e-05, "loss": 3.3338, "step": 22970 }, { "epoch": 1.5610137246908549, "grad_norm": 2.450045347213745, "learning_rate": 8.049412284277755e-05, "loss": 2.9118, "step": 22975 }, { "epoch": 1.5613534447615165, "grad_norm": 2.959523916244507, "learning_rate": 8.048987634189428e-05, "loss": 2.8351, "step": 22980 }, { "epoch": 1.5616931648321781, "grad_norm": 3.696211099624634, "learning_rate": 8.048562984101101e-05, "loss": 3.049, "step": 22985 }, { "epoch": 1.5620328849028402, "grad_norm": 2.0919480323791504, "learning_rate": 8.048138334012774e-05, "loss": 3.3308, "step": 22990 }, { "epoch": 1.5623726049735018, "grad_norm": 1.9434196949005127, "learning_rate": 8.047713683924447e-05, "loss": 3.0509, "step": 22995 }, { "epoch": 1.5627123250441635, "grad_norm": 3.086956024169922, "learning_rate": 8.04728903383612e-05, "loss": 3.0748, "step": 23000 }, { "epoch": 1.5630520451148255, "grad_norm": 2.6584713459014893, "learning_rate": 8.046864383747792e-05, "loss": 3.0359, "step": 23005 }, { "epoch": 1.5633917651854872, "grad_norm": 2.4303088188171387, "learning_rate": 8.046439733659465e-05, "loss": 2.9171, "step": 23010 }, { "epoch": 1.5637314852561488, "grad_norm": 2.551551342010498, "learning_rate": 8.046015083571138e-05, "loss": 3.454, "step": 23015 }, { "epoch": 1.5640712053268107, "grad_norm": 2.3309953212738037, "learning_rate": 8.04559043348281e-05, "loss": 3.3818, "step": 23020 }, { "epoch": 1.5644109253974725, "grad_norm": 2.769739866256714, "learning_rate": 8.045165783394483e-05, "loss": 3.0619, "step": 23025 }, { "epoch": 1.5647506454681341, "grad_norm": 2.9249422550201416, "learning_rate": 8.044741133306156e-05, "loss": 3.0022, "step": 23030 }, { "epoch": 1.565090365538796, "grad_norm": 2.3361692428588867, "learning_rate": 8.044316483217829e-05, "loss": 3.3319, "step": 23035 }, { "epoch": 1.5654300856094578, "grad_norm": 2.1396164894104004, "learning_rate": 8.043891833129502e-05, "loss": 3.1678, "step": 23040 }, { "epoch": 1.5657698056801195, "grad_norm": 2.0419070720672607, "learning_rate": 8.043467183041175e-05, "loss": 2.9963, "step": 23045 }, { "epoch": 1.5661095257507813, "grad_norm": 2.198774814605713, "learning_rate": 8.043042532952847e-05, "loss": 3.2018, "step": 23050 }, { "epoch": 1.5664492458214432, "grad_norm": 2.5873143672943115, "learning_rate": 8.04261788286452e-05, "loss": 3.1958, "step": 23055 }, { "epoch": 1.5667889658921048, "grad_norm": 2.152818202972412, "learning_rate": 8.042193232776193e-05, "loss": 3.3476, "step": 23060 }, { "epoch": 1.5671286859627667, "grad_norm": 2.311088800430298, "learning_rate": 8.041768582687866e-05, "loss": 3.3604, "step": 23065 }, { "epoch": 1.5674684060334285, "grad_norm": 2.364342212677002, "learning_rate": 8.041343932599539e-05, "loss": 3.4264, "step": 23070 }, { "epoch": 1.5678081261040901, "grad_norm": 2.5579168796539307, "learning_rate": 8.040919282511211e-05, "loss": 3.0613, "step": 23075 }, { "epoch": 1.568147846174752, "grad_norm": 2.2893006801605225, "learning_rate": 8.040494632422883e-05, "loss": 3.2526, "step": 23080 }, { "epoch": 1.5684875662454139, "grad_norm": 2.583123207092285, "learning_rate": 8.040069982334557e-05, "loss": 3.1118, "step": 23085 }, { "epoch": 1.5688272863160755, "grad_norm": 2.6920881271362305, "learning_rate": 8.03964533224623e-05, "loss": 3.1316, "step": 23090 }, { "epoch": 1.5691670063867373, "grad_norm": 2.4343600273132324, "learning_rate": 8.039220682157901e-05, "loss": 3.0418, "step": 23095 }, { "epoch": 1.5695067264573992, "grad_norm": 2.3915584087371826, "learning_rate": 8.038796032069575e-05, "loss": 3.2098, "step": 23100 }, { "epoch": 1.5698464465280608, "grad_norm": 2.45762300491333, "learning_rate": 8.038371381981248e-05, "loss": 3.261, "step": 23105 }, { "epoch": 1.5701861665987227, "grad_norm": 1.9450072050094604, "learning_rate": 8.03794673189292e-05, "loss": 3.1178, "step": 23110 }, { "epoch": 1.5705258866693845, "grad_norm": 2.6797728538513184, "learning_rate": 8.037522081804594e-05, "loss": 3.0839, "step": 23115 }, { "epoch": 1.5708656067400462, "grad_norm": 2.1099905967712402, "learning_rate": 8.037097431716267e-05, "loss": 3.0253, "step": 23120 }, { "epoch": 1.571205326810708, "grad_norm": 2.116863250732422, "learning_rate": 8.036672781627938e-05, "loss": 3.2955, "step": 23125 }, { "epoch": 1.5715450468813699, "grad_norm": 2.0059492588043213, "learning_rate": 8.036248131539612e-05, "loss": 3.1087, "step": 23130 }, { "epoch": 1.5718847669520315, "grad_norm": 1.9442765712738037, "learning_rate": 8.035823481451285e-05, "loss": 3.3201, "step": 23135 }, { "epoch": 1.5722244870226934, "grad_norm": 2.892045259475708, "learning_rate": 8.035398831362957e-05, "loss": 3.2288, "step": 23140 }, { "epoch": 1.5725642070933552, "grad_norm": 2.104874849319458, "learning_rate": 8.034974181274631e-05, "loss": 3.211, "step": 23145 }, { "epoch": 1.5729039271640168, "grad_norm": 2.1357052326202393, "learning_rate": 8.034549531186303e-05, "loss": 3.2233, "step": 23150 }, { "epoch": 1.5732436472346785, "grad_norm": 3.0593295097351074, "learning_rate": 8.034124881097975e-05, "loss": 3.2835, "step": 23155 }, { "epoch": 1.5735833673053405, "grad_norm": 2.1449203491210938, "learning_rate": 8.033700231009649e-05, "loss": 2.942, "step": 23160 }, { "epoch": 1.5739230873760022, "grad_norm": 1.9886829853057861, "learning_rate": 8.03327558092132e-05, "loss": 3.1405, "step": 23165 }, { "epoch": 1.5742628074466638, "grad_norm": 3.539085865020752, "learning_rate": 8.032850930832993e-05, "loss": 3.2428, "step": 23170 }, { "epoch": 1.5746025275173259, "grad_norm": 2.8944342136383057, "learning_rate": 8.032426280744667e-05, "loss": 3.2433, "step": 23175 }, { "epoch": 1.5749422475879875, "grad_norm": 2.4629132747650146, "learning_rate": 8.032001630656339e-05, "loss": 3.3128, "step": 23180 }, { "epoch": 1.5752819676586491, "grad_norm": 2.2349321842193604, "learning_rate": 8.031576980568012e-05, "loss": 3.0997, "step": 23185 }, { "epoch": 1.575621687729311, "grad_norm": 2.647217273712158, "learning_rate": 8.031152330479686e-05, "loss": 3.3292, "step": 23190 }, { "epoch": 1.5759614077999728, "grad_norm": 2.251600503921509, "learning_rate": 8.030727680391357e-05, "loss": 3.5316, "step": 23195 }, { "epoch": 1.5763011278706345, "grad_norm": 2.162277936935425, "learning_rate": 8.03030303030303e-05, "loss": 2.9383, "step": 23200 }, { "epoch": 1.5766408479412963, "grad_norm": 2.4764013290405273, "learning_rate": 8.029878380214704e-05, "loss": 3.1365, "step": 23205 }, { "epoch": 1.5769805680119582, "grad_norm": 2.621953248977661, "learning_rate": 8.029453730126376e-05, "loss": 2.9139, "step": 23210 }, { "epoch": 1.5773202880826198, "grad_norm": 2.305241584777832, "learning_rate": 8.029029080038049e-05, "loss": 3.0455, "step": 23215 }, { "epoch": 1.5776600081532817, "grad_norm": 2.5796010494232178, "learning_rate": 8.028604429949723e-05, "loss": 3.063, "step": 23220 }, { "epoch": 1.5779997282239435, "grad_norm": 2.1034276485443115, "learning_rate": 8.028179779861394e-05, "loss": 3.2697, "step": 23225 }, { "epoch": 1.5783394482946052, "grad_norm": 2.4162826538085938, "learning_rate": 8.027755129773067e-05, "loss": 3.0545, "step": 23230 }, { "epoch": 1.578679168365267, "grad_norm": 2.531973361968994, "learning_rate": 8.02733047968474e-05, "loss": 3.1583, "step": 23235 }, { "epoch": 1.5790188884359289, "grad_norm": 2.6121904850006104, "learning_rate": 8.026905829596413e-05, "loss": 3.0507, "step": 23240 }, { "epoch": 1.5793586085065905, "grad_norm": 2.807656764984131, "learning_rate": 8.026481179508085e-05, "loss": 2.835, "step": 23245 }, { "epoch": 1.5796983285772523, "grad_norm": 2.0964086055755615, "learning_rate": 8.026056529419758e-05, "loss": 3.1812, "step": 23250 }, { "epoch": 1.5800380486479142, "grad_norm": 2.551853656768799, "learning_rate": 8.025631879331431e-05, "loss": 3.3418, "step": 23255 }, { "epoch": 1.5803777687185758, "grad_norm": 2.418344497680664, "learning_rate": 8.025207229243104e-05, "loss": 3.252, "step": 23260 }, { "epoch": 1.5807174887892377, "grad_norm": 2.392409324645996, "learning_rate": 8.024782579154777e-05, "loss": 3.1384, "step": 23265 }, { "epoch": 1.5810572088598995, "grad_norm": 2.413090705871582, "learning_rate": 8.02435792906645e-05, "loss": 3.197, "step": 23270 }, { "epoch": 1.5813969289305612, "grad_norm": 2.236069679260254, "learning_rate": 8.023933278978122e-05, "loss": 3.3267, "step": 23275 }, { "epoch": 1.581736649001223, "grad_norm": 2.496711492538452, "learning_rate": 8.023508628889795e-05, "loss": 3.4166, "step": 23280 }, { "epoch": 1.5820763690718849, "grad_norm": 2.3561644554138184, "learning_rate": 8.023083978801468e-05, "loss": 3.112, "step": 23285 }, { "epoch": 1.5824160891425465, "grad_norm": 2.3797974586486816, "learning_rate": 8.022659328713142e-05, "loss": 2.8066, "step": 23290 }, { "epoch": 1.5827558092132084, "grad_norm": 2.9345498085021973, "learning_rate": 8.022234678624813e-05, "loss": 3.0367, "step": 23295 }, { "epoch": 1.5830955292838702, "grad_norm": 2.6804137229919434, "learning_rate": 8.021810028536486e-05, "loss": 3.049, "step": 23300 }, { "epoch": 1.5834352493545318, "grad_norm": 2.503467082977295, "learning_rate": 8.021385378448159e-05, "loss": 3.2594, "step": 23305 }, { "epoch": 1.5837749694251937, "grad_norm": 2.626493215560913, "learning_rate": 8.020960728359832e-05, "loss": 3.0593, "step": 23310 }, { "epoch": 1.5841146894958555, "grad_norm": 1.920375943183899, "learning_rate": 8.020536078271505e-05, "loss": 2.8344, "step": 23315 }, { "epoch": 1.5844544095665172, "grad_norm": 2.055162191390991, "learning_rate": 8.020111428183177e-05, "loss": 2.8729, "step": 23320 }, { "epoch": 1.5847941296371788, "grad_norm": 2.266422986984253, "learning_rate": 8.01968677809485e-05, "loss": 3.1564, "step": 23325 }, { "epoch": 1.5851338497078409, "grad_norm": 2.5240285396575928, "learning_rate": 8.019262128006523e-05, "loss": 3.1906, "step": 23330 }, { "epoch": 1.5854735697785025, "grad_norm": 2.232200860977173, "learning_rate": 8.018837477918196e-05, "loss": 3.2157, "step": 23335 }, { "epoch": 1.5858132898491641, "grad_norm": 2.524718999862671, "learning_rate": 8.018412827829869e-05, "loss": 2.9527, "step": 23340 }, { "epoch": 1.5861530099198262, "grad_norm": 2.38564395904541, "learning_rate": 8.017988177741541e-05, "loss": 3.095, "step": 23345 }, { "epoch": 1.5864927299904878, "grad_norm": 2.3716490268707275, "learning_rate": 8.017563527653214e-05, "loss": 3.545, "step": 23350 }, { "epoch": 1.5868324500611495, "grad_norm": 2.88743257522583, "learning_rate": 8.017138877564887e-05, "loss": 3.063, "step": 23355 }, { "epoch": 1.5871721701318113, "grad_norm": 2.5551199913024902, "learning_rate": 8.01671422747656e-05, "loss": 2.9947, "step": 23360 }, { "epoch": 1.5875118902024732, "grad_norm": 2.090111494064331, "learning_rate": 8.016289577388233e-05, "loss": 3.2834, "step": 23365 }, { "epoch": 1.5878516102731348, "grad_norm": 2.0735414028167725, "learning_rate": 8.015864927299905e-05, "loss": 3.215, "step": 23370 }, { "epoch": 1.5881913303437967, "grad_norm": 1.6969127655029297, "learning_rate": 8.015440277211578e-05, "loss": 3.0498, "step": 23375 }, { "epoch": 1.5885310504144585, "grad_norm": 2.589768171310425, "learning_rate": 8.015015627123251e-05, "loss": 3.2448, "step": 23380 }, { "epoch": 1.5888707704851202, "grad_norm": 2.661884307861328, "learning_rate": 8.014590977034924e-05, "loss": 3.2241, "step": 23385 }, { "epoch": 1.589210490555782, "grad_norm": 2.8591601848602295, "learning_rate": 8.014166326946597e-05, "loss": 3.0973, "step": 23390 }, { "epoch": 1.5895502106264439, "grad_norm": 2.104495048522949, "learning_rate": 8.01374167685827e-05, "loss": 3.2625, "step": 23395 }, { "epoch": 1.5898899306971055, "grad_norm": 2.7045485973358154, "learning_rate": 8.013317026769942e-05, "loss": 3.0764, "step": 23400 }, { "epoch": 1.5902296507677673, "grad_norm": 2.247933864593506, "learning_rate": 8.012892376681615e-05, "loss": 3.4075, "step": 23405 }, { "epoch": 1.5905693708384292, "grad_norm": 2.623363971710205, "learning_rate": 8.012467726593288e-05, "loss": 3.3687, "step": 23410 }, { "epoch": 1.5909090909090908, "grad_norm": 2.2272698879241943, "learning_rate": 8.01204307650496e-05, "loss": 3.2562, "step": 23415 }, { "epoch": 1.5912488109797527, "grad_norm": 2.4665372371673584, "learning_rate": 8.011618426416633e-05, "loss": 2.9725, "step": 23420 }, { "epoch": 1.5915885310504145, "grad_norm": 2.2858498096466064, "learning_rate": 8.011193776328306e-05, "loss": 3.4047, "step": 23425 }, { "epoch": 1.5919282511210762, "grad_norm": 1.9644286632537842, "learning_rate": 8.010769126239979e-05, "loss": 3.1697, "step": 23430 }, { "epoch": 1.592267971191738, "grad_norm": 2.581047773361206, "learning_rate": 8.01034447615165e-05, "loss": 2.8072, "step": 23435 }, { "epoch": 1.5926076912623999, "grad_norm": 3.899115800857544, "learning_rate": 8.009919826063325e-05, "loss": 2.7813, "step": 23440 }, { "epoch": 1.5929474113330615, "grad_norm": 2.4000909328460693, "learning_rate": 8.009495175974997e-05, "loss": 3.0748, "step": 23445 }, { "epoch": 1.5932871314037234, "grad_norm": 2.101249933242798, "learning_rate": 8.009070525886669e-05, "loss": 3.3974, "step": 23450 }, { "epoch": 1.5936268514743852, "grad_norm": 2.1905007362365723, "learning_rate": 8.008645875798343e-05, "loss": 2.886, "step": 23455 }, { "epoch": 1.5939665715450468, "grad_norm": 2.143242359161377, "learning_rate": 8.008221225710016e-05, "loss": 3.1932, "step": 23460 }, { "epoch": 1.5943062916157087, "grad_norm": 1.7879488468170166, "learning_rate": 8.007796575621687e-05, "loss": 2.9897, "step": 23465 }, { "epoch": 1.5946460116863705, "grad_norm": 2.8292295932769775, "learning_rate": 8.007371925533361e-05, "loss": 3.2814, "step": 23470 }, { "epoch": 1.5949857317570322, "grad_norm": 2.153930902481079, "learning_rate": 8.006947275445034e-05, "loss": 2.8433, "step": 23475 }, { "epoch": 1.595325451827694, "grad_norm": 2.0364325046539307, "learning_rate": 8.006522625356706e-05, "loss": 3.1716, "step": 23480 }, { "epoch": 1.5956651718983559, "grad_norm": 2.7722291946411133, "learning_rate": 8.00609797526838e-05, "loss": 2.8564, "step": 23485 }, { "epoch": 1.5960048919690175, "grad_norm": 2.196533441543579, "learning_rate": 8.005673325180053e-05, "loss": 2.9968, "step": 23490 }, { "epoch": 1.5963446120396791, "grad_norm": 2.4224865436553955, "learning_rate": 8.005248675091724e-05, "loss": 3.0097, "step": 23495 }, { "epoch": 1.5966843321103412, "grad_norm": 2.9657809734344482, "learning_rate": 8.004824025003398e-05, "loss": 3.2164, "step": 23500 }, { "epoch": 1.5970240521810029, "grad_norm": 2.673886775970459, "learning_rate": 8.004399374915071e-05, "loss": 2.876, "step": 23505 }, { "epoch": 1.5973637722516645, "grad_norm": 2.4086215496063232, "learning_rate": 8.003974724826742e-05, "loss": 3.1705, "step": 23510 }, { "epoch": 1.5977034923223266, "grad_norm": 2.3078999519348145, "learning_rate": 8.003550074738417e-05, "loss": 3.205, "step": 23515 }, { "epoch": 1.5980432123929882, "grad_norm": 2.4589381217956543, "learning_rate": 8.003125424650088e-05, "loss": 3.1394, "step": 23520 }, { "epoch": 1.5983829324636498, "grad_norm": 2.270158290863037, "learning_rate": 8.002700774561761e-05, "loss": 3.2823, "step": 23525 }, { "epoch": 1.5987226525343117, "grad_norm": 2.0820112228393555, "learning_rate": 8.002276124473435e-05, "loss": 3.0154, "step": 23530 }, { "epoch": 1.5990623726049735, "grad_norm": 2.2473063468933105, "learning_rate": 8.001851474385106e-05, "loss": 2.8396, "step": 23535 }, { "epoch": 1.5994020926756352, "grad_norm": 2.463590383529663, "learning_rate": 8.001426824296779e-05, "loss": 3.1924, "step": 23540 }, { "epoch": 1.599741812746297, "grad_norm": 3.007286787033081, "learning_rate": 8.001002174208453e-05, "loss": 2.9976, "step": 23545 }, { "epoch": 1.6000815328169589, "grad_norm": 2.1783323287963867, "learning_rate": 8.000577524120125e-05, "loss": 3.3747, "step": 23550 }, { "epoch": 1.6004212528876205, "grad_norm": 2.392162799835205, "learning_rate": 8.000152874031798e-05, "loss": 3.2027, "step": 23555 }, { "epoch": 1.6007609729582823, "grad_norm": 2.225264072418213, "learning_rate": 7.999728223943472e-05, "loss": 3.2349, "step": 23560 }, { "epoch": 1.6011006930289442, "grad_norm": 2.5892059803009033, "learning_rate": 7.999303573855143e-05, "loss": 3.1803, "step": 23565 }, { "epoch": 1.6014404130996058, "grad_norm": 2.417187452316284, "learning_rate": 7.998878923766816e-05, "loss": 3.177, "step": 23570 }, { "epoch": 1.6017801331702677, "grad_norm": 2.4077401161193848, "learning_rate": 7.99845427367849e-05, "loss": 3.0976, "step": 23575 }, { "epoch": 1.6021198532409295, "grad_norm": 3.6572349071502686, "learning_rate": 7.998029623590162e-05, "loss": 2.9733, "step": 23580 }, { "epoch": 1.6024595733115912, "grad_norm": 2.495851516723633, "learning_rate": 7.997604973501834e-05, "loss": 3.413, "step": 23585 }, { "epoch": 1.602799293382253, "grad_norm": 2.019451141357422, "learning_rate": 7.997180323413507e-05, "loss": 3.2564, "step": 23590 }, { "epoch": 1.6031390134529149, "grad_norm": 2.367903709411621, "learning_rate": 7.99675567332518e-05, "loss": 3.0396, "step": 23595 }, { "epoch": 1.6034787335235765, "grad_norm": 2.5327234268188477, "learning_rate": 7.996331023236853e-05, "loss": 3.3511, "step": 23600 }, { "epoch": 1.6038184535942384, "grad_norm": 2.207205295562744, "learning_rate": 7.995906373148526e-05, "loss": 3.2446, "step": 23605 }, { "epoch": 1.6041581736649002, "grad_norm": 2.6602160930633545, "learning_rate": 7.995481723060198e-05, "loss": 3.2076, "step": 23610 }, { "epoch": 1.6044978937355618, "grad_norm": 3.026776075363159, "learning_rate": 7.995057072971871e-05, "loss": 2.7942, "step": 23615 }, { "epoch": 1.6048376138062237, "grad_norm": 2.576280355453491, "learning_rate": 7.994632422883544e-05, "loss": 3.162, "step": 23620 }, { "epoch": 1.6051773338768855, "grad_norm": 2.1998507976531982, "learning_rate": 7.994207772795217e-05, "loss": 3.2263, "step": 23625 }, { "epoch": 1.6055170539475472, "grad_norm": 2.042456865310669, "learning_rate": 7.993783122706891e-05, "loss": 3.1196, "step": 23630 }, { "epoch": 1.605856774018209, "grad_norm": 2.4029834270477295, "learning_rate": 7.993358472618562e-05, "loss": 3.0296, "step": 23635 }, { "epoch": 1.6061964940888709, "grad_norm": 2.3315653800964355, "learning_rate": 7.992933822530235e-05, "loss": 3.1826, "step": 23640 }, { "epoch": 1.6065362141595325, "grad_norm": 1.7864441871643066, "learning_rate": 7.99250917244191e-05, "loss": 2.9412, "step": 23645 }, { "epoch": 1.6068759342301944, "grad_norm": 2.2516770362854004, "learning_rate": 7.992084522353581e-05, "loss": 3.1403, "step": 23650 }, { "epoch": 1.6072156543008562, "grad_norm": 2.4315083026885986, "learning_rate": 7.991659872265254e-05, "loss": 3.2036, "step": 23655 }, { "epoch": 1.6075553743715179, "grad_norm": 1.8667473793029785, "learning_rate": 7.991235222176926e-05, "loss": 3.2971, "step": 23660 }, { "epoch": 1.6078950944421795, "grad_norm": 1.9791537523269653, "learning_rate": 7.990810572088599e-05, "loss": 3.015, "step": 23665 }, { "epoch": 1.6082348145128416, "grad_norm": 2.35858154296875, "learning_rate": 7.990385922000272e-05, "loss": 3.0645, "step": 23670 }, { "epoch": 1.6085745345835032, "grad_norm": 2.2646596431732178, "learning_rate": 7.989961271911945e-05, "loss": 3.0229, "step": 23675 }, { "epoch": 1.6089142546541648, "grad_norm": 2.583509683609009, "learning_rate": 7.989536621823618e-05, "loss": 3.1221, "step": 23680 }, { "epoch": 1.609253974724827, "grad_norm": 2.033073902130127, "learning_rate": 7.98911197173529e-05, "loss": 3.1439, "step": 23685 }, { "epoch": 1.6095936947954885, "grad_norm": 1.9015529155731201, "learning_rate": 7.988687321646963e-05, "loss": 3.2378, "step": 23690 }, { "epoch": 1.6099334148661502, "grad_norm": 2.274399757385254, "learning_rate": 7.988262671558636e-05, "loss": 2.7565, "step": 23695 }, { "epoch": 1.610273134936812, "grad_norm": 2.1519861221313477, "learning_rate": 7.987838021470309e-05, "loss": 3.1537, "step": 23700 }, { "epoch": 1.6106128550074739, "grad_norm": 1.7025268077850342, "learning_rate": 7.987413371381982e-05, "loss": 3.1303, "step": 23705 }, { "epoch": 1.6109525750781355, "grad_norm": 2.024348020553589, "learning_rate": 7.986988721293654e-05, "loss": 3.0303, "step": 23710 }, { "epoch": 1.6112922951487973, "grad_norm": 2.069246530532837, "learning_rate": 7.986564071205327e-05, "loss": 3.2464, "step": 23715 }, { "epoch": 1.6116320152194592, "grad_norm": 2.437349796295166, "learning_rate": 7.986139421117e-05, "loss": 3.4381, "step": 23720 }, { "epoch": 1.6119717352901208, "grad_norm": 2.8407020568847656, "learning_rate": 7.985714771028673e-05, "loss": 3.1324, "step": 23725 }, { "epoch": 1.6123114553607827, "grad_norm": 2.5039138793945312, "learning_rate": 7.985290120940346e-05, "loss": 3.3341, "step": 23730 }, { "epoch": 1.6126511754314445, "grad_norm": 2.5436229705810547, "learning_rate": 7.984865470852018e-05, "loss": 3.1484, "step": 23735 }, { "epoch": 1.6129908955021062, "grad_norm": 1.7639858722686768, "learning_rate": 7.984440820763691e-05, "loss": 2.8548, "step": 23740 }, { "epoch": 1.613330615572768, "grad_norm": 2.338717460632324, "learning_rate": 7.984016170675364e-05, "loss": 3.2206, "step": 23745 }, { "epoch": 1.6136703356434299, "grad_norm": 2.1778671741485596, "learning_rate": 7.983591520587037e-05, "loss": 3.189, "step": 23750 }, { "epoch": 1.6140100557140915, "grad_norm": 2.2452001571655273, "learning_rate": 7.98316687049871e-05, "loss": 3.2978, "step": 23755 }, { "epoch": 1.6143497757847534, "grad_norm": 1.9756388664245605, "learning_rate": 7.982742220410382e-05, "loss": 3.1373, "step": 23760 }, { "epoch": 1.6146894958554152, "grad_norm": 2.405799388885498, "learning_rate": 7.982317570322055e-05, "loss": 3.1108, "step": 23765 }, { "epoch": 1.6150292159260768, "grad_norm": 2.2411060333251953, "learning_rate": 7.981892920233728e-05, "loss": 2.8488, "step": 23770 }, { "epoch": 1.6153689359967387, "grad_norm": 1.8959070444107056, "learning_rate": 7.981468270145401e-05, "loss": 3.2344, "step": 23775 }, { "epoch": 1.6157086560674006, "grad_norm": 2.3129420280456543, "learning_rate": 7.981043620057074e-05, "loss": 3.0802, "step": 23780 }, { "epoch": 1.6160483761380622, "grad_norm": 2.088834285736084, "learning_rate": 7.980618969968746e-05, "loss": 2.9837, "step": 23785 }, { "epoch": 1.616388096208724, "grad_norm": 2.0929858684539795, "learning_rate": 7.980194319880418e-05, "loss": 2.9479, "step": 23790 }, { "epoch": 1.6167278162793859, "grad_norm": 2.9058895111083984, "learning_rate": 7.979769669792092e-05, "loss": 2.9323, "step": 23795 }, { "epoch": 1.6170675363500475, "grad_norm": 2.117215394973755, "learning_rate": 7.979345019703765e-05, "loss": 3.1262, "step": 23800 }, { "epoch": 1.6174072564207094, "grad_norm": 1.8428412675857544, "learning_rate": 7.978920369615436e-05, "loss": 2.8952, "step": 23805 }, { "epoch": 1.6177469764913712, "grad_norm": 2.527233600616455, "learning_rate": 7.97849571952711e-05, "loss": 3.075, "step": 23810 }, { "epoch": 1.6180866965620329, "grad_norm": 2.2099881172180176, "learning_rate": 7.978071069438783e-05, "loss": 3.3984, "step": 23815 }, { "epoch": 1.6184264166326947, "grad_norm": 2.062875509262085, "learning_rate": 7.977646419350455e-05, "loss": 2.9516, "step": 23820 }, { "epoch": 1.6187661367033566, "grad_norm": 2.9232025146484375, "learning_rate": 7.977221769262129e-05, "loss": 3.1849, "step": 23825 }, { "epoch": 1.6191058567740182, "grad_norm": 2.221270799636841, "learning_rate": 7.976797119173802e-05, "loss": 3.1973, "step": 23830 }, { "epoch": 1.6194455768446798, "grad_norm": 2.2557077407836914, "learning_rate": 7.976372469085473e-05, "loss": 3.1334, "step": 23835 }, { "epoch": 1.619785296915342, "grad_norm": 1.6427829265594482, "learning_rate": 7.975947818997147e-05, "loss": 2.8887, "step": 23840 }, { "epoch": 1.6201250169860035, "grad_norm": 2.4084279537200928, "learning_rate": 7.97552316890882e-05, "loss": 2.8251, "step": 23845 }, { "epoch": 1.6204647370566652, "grad_norm": 2.2722971439361572, "learning_rate": 7.975098518820492e-05, "loss": 3.3338, "step": 23850 }, { "epoch": 1.6208044571273272, "grad_norm": 2.17574143409729, "learning_rate": 7.974673868732166e-05, "loss": 3.4072, "step": 23855 }, { "epoch": 1.6211441771979889, "grad_norm": 2.123832941055298, "learning_rate": 7.974249218643837e-05, "loss": 2.9957, "step": 23860 }, { "epoch": 1.6214838972686505, "grad_norm": 2.2879855632781982, "learning_rate": 7.97382456855551e-05, "loss": 2.9354, "step": 23865 }, { "epoch": 1.6218236173393124, "grad_norm": 2.4838762283325195, "learning_rate": 7.973399918467184e-05, "loss": 3.0129, "step": 23870 }, { "epoch": 1.6221633374099742, "grad_norm": 2.4831178188323975, "learning_rate": 7.972975268378856e-05, "loss": 3.0539, "step": 23875 }, { "epoch": 1.6225030574806358, "grad_norm": 2.297403573989868, "learning_rate": 7.972550618290528e-05, "loss": 3.2802, "step": 23880 }, { "epoch": 1.6228427775512977, "grad_norm": 2.14865779876709, "learning_rate": 7.972125968202202e-05, "loss": 3.1111, "step": 23885 }, { "epoch": 1.6231824976219595, "grad_norm": 2.3590307235717773, "learning_rate": 7.971701318113874e-05, "loss": 3.1189, "step": 23890 }, { "epoch": 1.6235222176926212, "grad_norm": 2.643796920776367, "learning_rate": 7.971276668025547e-05, "loss": 2.9283, "step": 23895 }, { "epoch": 1.623861937763283, "grad_norm": 2.151998281478882, "learning_rate": 7.970852017937221e-05, "loss": 3.1685, "step": 23900 }, { "epoch": 1.6242016578339449, "grad_norm": 2.2859504222869873, "learning_rate": 7.970427367848892e-05, "loss": 3.1546, "step": 23905 }, { "epoch": 1.6245413779046065, "grad_norm": 2.3883957862854004, "learning_rate": 7.970002717760565e-05, "loss": 3.1288, "step": 23910 }, { "epoch": 1.6248810979752684, "grad_norm": 2.4175689220428467, "learning_rate": 7.969578067672239e-05, "loss": 3.1275, "step": 23915 }, { "epoch": 1.6252208180459302, "grad_norm": 2.048658847808838, "learning_rate": 7.969153417583911e-05, "loss": 3.1117, "step": 23920 }, { "epoch": 1.6255605381165918, "grad_norm": 2.3431670665740967, "learning_rate": 7.968728767495584e-05, "loss": 3.1812, "step": 23925 }, { "epoch": 1.6259002581872537, "grad_norm": 1.93198823928833, "learning_rate": 7.968304117407258e-05, "loss": 3.2141, "step": 23930 }, { "epoch": 1.6262399782579156, "grad_norm": 2.5542478561401367, "learning_rate": 7.967879467318929e-05, "loss": 2.9813, "step": 23935 }, { "epoch": 1.6265796983285772, "grad_norm": 1.7865632772445679, "learning_rate": 7.967454817230602e-05, "loss": 3.2098, "step": 23940 }, { "epoch": 1.626919418399239, "grad_norm": 2.374694347381592, "learning_rate": 7.967030167142275e-05, "loss": 3.1369, "step": 23945 }, { "epoch": 1.627259138469901, "grad_norm": 2.7434165477752686, "learning_rate": 7.966605517053948e-05, "loss": 2.8928, "step": 23950 }, { "epoch": 1.6275988585405625, "grad_norm": 2.3230886459350586, "learning_rate": 7.96618086696562e-05, "loss": 3.155, "step": 23955 }, { "epoch": 1.6279385786112244, "grad_norm": 2.1516273021698, "learning_rate": 7.965756216877293e-05, "loss": 3.443, "step": 23960 }, { "epoch": 1.6282782986818862, "grad_norm": 2.462550401687622, "learning_rate": 7.965331566788966e-05, "loss": 3.1891, "step": 23965 }, { "epoch": 1.6286180187525479, "grad_norm": 2.118922233581543, "learning_rate": 7.96490691670064e-05, "loss": 3.2656, "step": 23970 }, { "epoch": 1.6289577388232097, "grad_norm": 2.376283884048462, "learning_rate": 7.964482266612312e-05, "loss": 3.1935, "step": 23975 }, { "epoch": 1.6292974588938716, "grad_norm": 3.006774663925171, "learning_rate": 7.964057616523984e-05, "loss": 3.0767, "step": 23980 }, { "epoch": 1.6296371789645332, "grad_norm": 2.2596044540405273, "learning_rate": 7.963632966435658e-05, "loss": 3.3747, "step": 23985 }, { "epoch": 1.629976899035195, "grad_norm": 2.8000986576080322, "learning_rate": 7.96320831634733e-05, "loss": 3.2069, "step": 23990 }, { "epoch": 1.630316619105857, "grad_norm": 2.521155595779419, "learning_rate": 7.962783666259003e-05, "loss": 3.2523, "step": 23995 }, { "epoch": 1.6306563391765185, "grad_norm": 2.2134268283843994, "learning_rate": 7.962359016170677e-05, "loss": 3.2639, "step": 24000 }, { "epoch": 1.6309960592471802, "grad_norm": 2.3149707317352295, "learning_rate": 7.961934366082348e-05, "loss": 3.0262, "step": 24005 }, { "epoch": 1.6313357793178422, "grad_norm": 2.3694636821746826, "learning_rate": 7.961509715994021e-05, "loss": 3.0294, "step": 24010 }, { "epoch": 1.6316754993885039, "grad_norm": 2.285766363143921, "learning_rate": 7.961085065905694e-05, "loss": 2.9604, "step": 24015 }, { "epoch": 1.6320152194591655, "grad_norm": 2.492323637008667, "learning_rate": 7.960660415817367e-05, "loss": 3.2622, "step": 24020 }, { "epoch": 1.6323549395298276, "grad_norm": 2.384927988052368, "learning_rate": 7.96023576572904e-05, "loss": 3.2169, "step": 24025 }, { "epoch": 1.6326946596004892, "grad_norm": 1.9765962362289429, "learning_rate": 7.959811115640712e-05, "loss": 3.2308, "step": 24030 }, { "epoch": 1.6330343796711508, "grad_norm": 1.9071754217147827, "learning_rate": 7.959386465552385e-05, "loss": 3.1642, "step": 24035 }, { "epoch": 1.633374099741813, "grad_norm": 2.3187808990478516, "learning_rate": 7.958961815464058e-05, "loss": 3.0772, "step": 24040 }, { "epoch": 1.6337138198124745, "grad_norm": 2.012781858444214, "learning_rate": 7.958537165375731e-05, "loss": 2.9981, "step": 24045 }, { "epoch": 1.6340535398831362, "grad_norm": 2.5334277153015137, "learning_rate": 7.958112515287404e-05, "loss": 2.8753, "step": 24050 }, { "epoch": 1.634393259953798, "grad_norm": 2.6003878116607666, "learning_rate": 7.957687865199076e-05, "loss": 2.7655, "step": 24055 }, { "epoch": 1.6347329800244599, "grad_norm": 2.4181385040283203, "learning_rate": 7.957263215110749e-05, "loss": 3.1851, "step": 24060 }, { "epoch": 1.6350727000951215, "grad_norm": 2.157761812210083, "learning_rate": 7.956838565022422e-05, "loss": 3.3418, "step": 24065 }, { "epoch": 1.6354124201657834, "grad_norm": 3.0880305767059326, "learning_rate": 7.956413914934095e-05, "loss": 3.2564, "step": 24070 }, { "epoch": 1.6357521402364452, "grad_norm": 1.8300590515136719, "learning_rate": 7.955989264845768e-05, "loss": 3.1885, "step": 24075 }, { "epoch": 1.6360918603071068, "grad_norm": 2.1066012382507324, "learning_rate": 7.95556461475744e-05, "loss": 3.3311, "step": 24080 }, { "epoch": 1.6364315803777687, "grad_norm": 2.601642370223999, "learning_rate": 7.955139964669113e-05, "loss": 3.5467, "step": 24085 }, { "epoch": 1.6367713004484306, "grad_norm": 3.044271945953369, "learning_rate": 7.954715314580786e-05, "loss": 3.1397, "step": 24090 }, { "epoch": 1.6371110205190922, "grad_norm": 2.2762680053710938, "learning_rate": 7.954290664492459e-05, "loss": 3.0231, "step": 24095 }, { "epoch": 1.637450740589754, "grad_norm": 2.3025426864624023, "learning_rate": 7.953866014404132e-05, "loss": 3.2033, "step": 24100 }, { "epoch": 1.637790460660416, "grad_norm": 2.395594596862793, "learning_rate": 7.953441364315804e-05, "loss": 3.0704, "step": 24105 }, { "epoch": 1.6381301807310775, "grad_norm": 2.4636731147766113, "learning_rate": 7.953016714227477e-05, "loss": 3.0167, "step": 24110 }, { "epoch": 1.6384699008017394, "grad_norm": 2.393038749694824, "learning_rate": 7.95259206413915e-05, "loss": 3.1084, "step": 24115 }, { "epoch": 1.6388096208724012, "grad_norm": 2.2553353309631348, "learning_rate": 7.952167414050823e-05, "loss": 2.9872, "step": 24120 }, { "epoch": 1.6391493409430629, "grad_norm": 2.2797842025756836, "learning_rate": 7.951742763962496e-05, "loss": 3.0871, "step": 24125 }, { "epoch": 1.6394890610137247, "grad_norm": 2.3373708724975586, "learning_rate": 7.951318113874168e-05, "loss": 3.2189, "step": 24130 }, { "epoch": 1.6398287810843866, "grad_norm": 1.7799533605575562, "learning_rate": 7.950893463785841e-05, "loss": 3.0754, "step": 24135 }, { "epoch": 1.6401685011550482, "grad_norm": 2.874905586242676, "learning_rate": 7.950468813697514e-05, "loss": 3.288, "step": 24140 }, { "epoch": 1.64050822122571, "grad_norm": 2.5211052894592285, "learning_rate": 7.950044163609185e-05, "loss": 3.0488, "step": 24145 }, { "epoch": 1.640847941296372, "grad_norm": 2.1439437866210938, "learning_rate": 7.94961951352086e-05, "loss": 3.2793, "step": 24150 }, { "epoch": 1.6411876613670335, "grad_norm": 2.6317031383514404, "learning_rate": 7.949194863432532e-05, "loss": 3.0128, "step": 24155 }, { "epoch": 1.6415273814376954, "grad_norm": 1.722983717918396, "learning_rate": 7.948770213344204e-05, "loss": 2.7323, "step": 24160 }, { "epoch": 1.6418671015083572, "grad_norm": 2.3440260887145996, "learning_rate": 7.948345563255878e-05, "loss": 3.1457, "step": 24165 }, { "epoch": 1.6422068215790189, "grad_norm": 2.328374147415161, "learning_rate": 7.947920913167551e-05, "loss": 2.8222, "step": 24170 }, { "epoch": 1.6425465416496805, "grad_norm": 2.4720795154571533, "learning_rate": 7.947496263079222e-05, "loss": 3.0618, "step": 24175 }, { "epoch": 1.6428862617203426, "grad_norm": 2.2125420570373535, "learning_rate": 7.947071612990896e-05, "loss": 3.174, "step": 24180 }, { "epoch": 1.6432259817910042, "grad_norm": 2.6828110218048096, "learning_rate": 7.946646962902569e-05, "loss": 3.2685, "step": 24185 }, { "epoch": 1.6435657018616658, "grad_norm": 2.1720566749572754, "learning_rate": 7.94622231281424e-05, "loss": 2.7214, "step": 24190 }, { "epoch": 1.643905421932328, "grad_norm": 2.333803653717041, "learning_rate": 7.945797662725915e-05, "loss": 3.1255, "step": 24195 }, { "epoch": 1.6442451420029895, "grad_norm": 2.6541714668273926, "learning_rate": 7.945373012637588e-05, "loss": 3.3754, "step": 24200 }, { "epoch": 1.6445848620736512, "grad_norm": 2.538076639175415, "learning_rate": 7.944948362549259e-05, "loss": 3.3199, "step": 24205 }, { "epoch": 1.6449245821443133, "grad_norm": 1.8759652376174927, "learning_rate": 7.944523712460933e-05, "loss": 2.9294, "step": 24210 }, { "epoch": 1.6452643022149749, "grad_norm": 2.326796293258667, "learning_rate": 7.944099062372605e-05, "loss": 3.0812, "step": 24215 }, { "epoch": 1.6456040222856365, "grad_norm": 2.2889723777770996, "learning_rate": 7.943674412284277e-05, "loss": 3.1042, "step": 24220 }, { "epoch": 1.6459437423562984, "grad_norm": 1.9444963932037354, "learning_rate": 7.943249762195952e-05, "loss": 3.2166, "step": 24225 }, { "epoch": 1.6462834624269602, "grad_norm": 2.649440050125122, "learning_rate": 7.942825112107623e-05, "loss": 3.1855, "step": 24230 }, { "epoch": 1.6466231824976219, "grad_norm": 2.310926675796509, "learning_rate": 7.942400462019296e-05, "loss": 2.9596, "step": 24235 }, { "epoch": 1.6469629025682837, "grad_norm": 2.1062028408050537, "learning_rate": 7.94197581193097e-05, "loss": 3.1851, "step": 24240 }, { "epoch": 1.6473026226389456, "grad_norm": 2.3403899669647217, "learning_rate": 7.941551161842641e-05, "loss": 3.0171, "step": 24245 }, { "epoch": 1.6476423427096072, "grad_norm": 2.022459030151367, "learning_rate": 7.941126511754314e-05, "loss": 3.2034, "step": 24250 }, { "epoch": 1.647982062780269, "grad_norm": 2.4203693866729736, "learning_rate": 7.940701861665988e-05, "loss": 3.0666, "step": 24255 }, { "epoch": 1.648321782850931, "grad_norm": 2.2848196029663086, "learning_rate": 7.94027721157766e-05, "loss": 3.2922, "step": 24260 }, { "epoch": 1.6486615029215925, "grad_norm": 1.9610669612884521, "learning_rate": 7.939852561489333e-05, "loss": 3.2501, "step": 24265 }, { "epoch": 1.6490012229922544, "grad_norm": 3.511955738067627, "learning_rate": 7.939427911401007e-05, "loss": 3.0733, "step": 24270 }, { "epoch": 1.6493409430629162, "grad_norm": 2.1376330852508545, "learning_rate": 7.939003261312678e-05, "loss": 3.0481, "step": 24275 }, { "epoch": 1.6496806631335779, "grad_norm": 2.6878068447113037, "learning_rate": 7.938578611224351e-05, "loss": 3.2549, "step": 24280 }, { "epoch": 1.6500203832042397, "grad_norm": 2.422595262527466, "learning_rate": 7.938153961136024e-05, "loss": 2.9433, "step": 24285 }, { "epoch": 1.6503601032749016, "grad_norm": 2.9374866485595703, "learning_rate": 7.937729311047697e-05, "loss": 2.9886, "step": 24290 }, { "epoch": 1.6506998233455632, "grad_norm": 2.3228964805603027, "learning_rate": 7.93730466095937e-05, "loss": 3.372, "step": 24295 }, { "epoch": 1.651039543416225, "grad_norm": 2.1392860412597656, "learning_rate": 7.936880010871042e-05, "loss": 2.9199, "step": 24300 }, { "epoch": 1.651379263486887, "grad_norm": 2.1871585845947266, "learning_rate": 7.936455360782715e-05, "loss": 3.1866, "step": 24305 }, { "epoch": 1.6517189835575485, "grad_norm": 2.7511889934539795, "learning_rate": 7.936030710694389e-05, "loss": 3.2612, "step": 24310 }, { "epoch": 1.6520587036282104, "grad_norm": 2.1277923583984375, "learning_rate": 7.93560606060606e-05, "loss": 3.2464, "step": 24315 }, { "epoch": 1.6523984236988722, "grad_norm": 2.6164650917053223, "learning_rate": 7.935181410517733e-05, "loss": 2.9635, "step": 24320 }, { "epoch": 1.6527381437695339, "grad_norm": 2.2448627948760986, "learning_rate": 7.934756760429408e-05, "loss": 3.0829, "step": 24325 }, { "epoch": 1.6530778638401957, "grad_norm": 2.938589096069336, "learning_rate": 7.934332110341079e-05, "loss": 3.0437, "step": 24330 }, { "epoch": 1.6534175839108576, "grad_norm": 2.347111701965332, "learning_rate": 7.933907460252752e-05, "loss": 3.1187, "step": 24335 }, { "epoch": 1.6537573039815192, "grad_norm": 2.2160401344299316, "learning_rate": 7.933482810164426e-05, "loss": 2.8692, "step": 24340 }, { "epoch": 1.6540970240521808, "grad_norm": 1.9993133544921875, "learning_rate": 7.933058160076097e-05, "loss": 3.1064, "step": 24345 }, { "epoch": 1.654436744122843, "grad_norm": 2.7206554412841797, "learning_rate": 7.93263350998777e-05, "loss": 3.1883, "step": 24350 }, { "epoch": 1.6547764641935045, "grad_norm": 2.818760395050049, "learning_rate": 7.932208859899444e-05, "loss": 3.1883, "step": 24355 }, { "epoch": 1.6551161842641662, "grad_norm": 2.2877390384674072, "learning_rate": 7.931784209811116e-05, "loss": 3.224, "step": 24360 }, { "epoch": 1.6554559043348283, "grad_norm": 2.173454999923706, "learning_rate": 7.931359559722789e-05, "loss": 3.1599, "step": 24365 }, { "epoch": 1.6557956244054899, "grad_norm": 2.1698291301727295, "learning_rate": 7.930934909634461e-05, "loss": 3.2442, "step": 24370 }, { "epoch": 1.6561353444761515, "grad_norm": 1.9263452291488647, "learning_rate": 7.930510259546134e-05, "loss": 3.0457, "step": 24375 }, { "epoch": 1.6564750645468136, "grad_norm": 1.857949137687683, "learning_rate": 7.930085609457807e-05, "loss": 2.9157, "step": 24380 }, { "epoch": 1.6568147846174752, "grad_norm": 2.3632688522338867, "learning_rate": 7.92966095936948e-05, "loss": 3.3608, "step": 24385 }, { "epoch": 1.6571545046881369, "grad_norm": 2.57535719871521, "learning_rate": 7.929236309281153e-05, "loss": 3.1504, "step": 24390 }, { "epoch": 1.6574942247587987, "grad_norm": 2.3024682998657227, "learning_rate": 7.928811659192825e-05, "loss": 3.3579, "step": 24395 }, { "epoch": 1.6578339448294606, "grad_norm": 2.305633544921875, "learning_rate": 7.928387009104498e-05, "loss": 3.2923, "step": 24400 }, { "epoch": 1.6581736649001222, "grad_norm": 2.5272152423858643, "learning_rate": 7.927962359016171e-05, "loss": 3.0296, "step": 24405 }, { "epoch": 1.658513384970784, "grad_norm": 2.3879213333129883, "learning_rate": 7.927537708927844e-05, "loss": 3.2146, "step": 24410 }, { "epoch": 1.658853105041446, "grad_norm": 2.1616806983947754, "learning_rate": 7.927113058839517e-05, "loss": 3.0372, "step": 24415 }, { "epoch": 1.6591928251121075, "grad_norm": 2.647230863571167, "learning_rate": 7.92668840875119e-05, "loss": 2.9421, "step": 24420 }, { "epoch": 1.6595325451827694, "grad_norm": 2.2444651126861572, "learning_rate": 7.926263758662862e-05, "loss": 3.1189, "step": 24425 }, { "epoch": 1.6598722652534312, "grad_norm": 2.2218024730682373, "learning_rate": 7.925839108574535e-05, "loss": 3.2437, "step": 24430 }, { "epoch": 1.6602119853240929, "grad_norm": 2.298354148864746, "learning_rate": 7.925414458486208e-05, "loss": 3.1784, "step": 24435 }, { "epoch": 1.6605517053947547, "grad_norm": 3.068572521209717, "learning_rate": 7.92498980839788e-05, "loss": 3.0607, "step": 24440 }, { "epoch": 1.6608914254654166, "grad_norm": 2.1786322593688965, "learning_rate": 7.924565158309553e-05, "loss": 3.0608, "step": 24445 }, { "epoch": 1.6612311455360782, "grad_norm": 2.2821431159973145, "learning_rate": 7.924140508221226e-05, "loss": 3.3108, "step": 24450 }, { "epoch": 1.66157086560674, "grad_norm": 2.7578399181365967, "learning_rate": 7.923715858132899e-05, "loss": 3.0775, "step": 24455 }, { "epoch": 1.661910585677402, "grad_norm": 2.0647096633911133, "learning_rate": 7.923291208044572e-05, "loss": 3.3405, "step": 24460 }, { "epoch": 1.6622503057480635, "grad_norm": 2.40832257270813, "learning_rate": 7.922866557956245e-05, "loss": 3.1014, "step": 24465 }, { "epoch": 1.6625900258187254, "grad_norm": 3.6699230670928955, "learning_rate": 7.922441907867917e-05, "loss": 3.2573, "step": 24470 }, { "epoch": 1.6629297458893872, "grad_norm": 2.4639415740966797, "learning_rate": 7.92201725777959e-05, "loss": 3.2395, "step": 24475 }, { "epoch": 1.6632694659600489, "grad_norm": 1.9101179838180542, "learning_rate": 7.921592607691263e-05, "loss": 3.2772, "step": 24480 }, { "epoch": 1.6636091860307107, "grad_norm": 2.6236648559570312, "learning_rate": 7.921167957602935e-05, "loss": 2.9615, "step": 24485 }, { "epoch": 1.6639489061013726, "grad_norm": 2.6915390491485596, "learning_rate": 7.920743307514609e-05, "loss": 3.119, "step": 24490 }, { "epoch": 1.6642886261720342, "grad_norm": 3.28731369972229, "learning_rate": 7.920318657426281e-05, "loss": 2.9593, "step": 24495 }, { "epoch": 1.664628346242696, "grad_norm": 2.3724942207336426, "learning_rate": 7.919894007337953e-05, "loss": 3.3355, "step": 24500 }, { "epoch": 1.664968066313358, "grad_norm": 2.1374714374542236, "learning_rate": 7.919469357249627e-05, "loss": 3.0648, "step": 24505 }, { "epoch": 1.6653077863840196, "grad_norm": 2.3634867668151855, "learning_rate": 7.9190447071613e-05, "loss": 3.0481, "step": 24510 }, { "epoch": 1.6656475064546812, "grad_norm": 2.034057855606079, "learning_rate": 7.918620057072971e-05, "loss": 2.9744, "step": 24515 }, { "epoch": 1.6659872265253433, "grad_norm": 2.367448091506958, "learning_rate": 7.918195406984645e-05, "loss": 2.9934, "step": 24520 }, { "epoch": 1.6663269465960049, "grad_norm": 1.9216195344924927, "learning_rate": 7.917770756896318e-05, "loss": 3.1882, "step": 24525 }, { "epoch": 1.6666666666666665, "grad_norm": 2.692432403564453, "learning_rate": 7.91734610680799e-05, "loss": 3.2895, "step": 24530 }, { "epoch": 1.6670063867373286, "grad_norm": 2.7321083545684814, "learning_rate": 7.916921456719664e-05, "loss": 3.3296, "step": 24535 }, { "epoch": 1.6673461068079902, "grad_norm": 2.4718422889709473, "learning_rate": 7.916496806631337e-05, "loss": 3.0631, "step": 24540 }, { "epoch": 1.6676858268786519, "grad_norm": 2.3455545902252197, "learning_rate": 7.916072156543008e-05, "loss": 3.1359, "step": 24545 }, { "epoch": 1.668025546949314, "grad_norm": 2.8979876041412354, "learning_rate": 7.915647506454682e-05, "loss": 3.1126, "step": 24550 }, { "epoch": 1.6683652670199756, "grad_norm": 2.3549792766571045, "learning_rate": 7.915222856366355e-05, "loss": 3.4011, "step": 24555 }, { "epoch": 1.6687049870906372, "grad_norm": 1.9721176624298096, "learning_rate": 7.914798206278027e-05, "loss": 3.1826, "step": 24560 }, { "epoch": 1.669044707161299, "grad_norm": 1.7556242942810059, "learning_rate": 7.914373556189701e-05, "loss": 3.192, "step": 24565 }, { "epoch": 1.669384427231961, "grad_norm": 2.06095552444458, "learning_rate": 7.913948906101372e-05, "loss": 3.3862, "step": 24570 }, { "epoch": 1.6697241473026225, "grad_norm": 2.4839911460876465, "learning_rate": 7.913524256013045e-05, "loss": 3.0068, "step": 24575 }, { "epoch": 1.6700638673732844, "grad_norm": 2.599515199661255, "learning_rate": 7.913099605924719e-05, "loss": 3.2518, "step": 24580 }, { "epoch": 1.6704035874439462, "grad_norm": 2.205244302749634, "learning_rate": 7.91267495583639e-05, "loss": 3.1122, "step": 24585 }, { "epoch": 1.6707433075146079, "grad_norm": 2.067880868911743, "learning_rate": 7.912250305748063e-05, "loss": 2.9256, "step": 24590 }, { "epoch": 1.6710830275852697, "grad_norm": 1.8992834091186523, "learning_rate": 7.911825655659737e-05, "loss": 3.1351, "step": 24595 }, { "epoch": 1.6714227476559316, "grad_norm": 2.271033525466919, "learning_rate": 7.911401005571409e-05, "loss": 2.98, "step": 24600 }, { "epoch": 1.6717624677265932, "grad_norm": 2.1949856281280518, "learning_rate": 7.910976355483082e-05, "loss": 3.085, "step": 24605 }, { "epoch": 1.672102187797255, "grad_norm": 2.479254961013794, "learning_rate": 7.910551705394756e-05, "loss": 3.2649, "step": 24610 }, { "epoch": 1.672441907867917, "grad_norm": 2.488210916519165, "learning_rate": 7.910127055306427e-05, "loss": 3.3634, "step": 24615 }, { "epoch": 1.6727816279385785, "grad_norm": 2.648226737976074, "learning_rate": 7.9097024052181e-05, "loss": 2.8145, "step": 24620 }, { "epoch": 1.6731213480092404, "grad_norm": 2.325770616531372, "learning_rate": 7.909277755129774e-05, "loss": 3.2135, "step": 24625 }, { "epoch": 1.6734610680799022, "grad_norm": 2.2618236541748047, "learning_rate": 7.908853105041446e-05, "loss": 3.3017, "step": 24630 }, { "epoch": 1.6738007881505639, "grad_norm": 2.05930757522583, "learning_rate": 7.908428454953119e-05, "loss": 3.2222, "step": 24635 }, { "epoch": 1.6741405082212257, "grad_norm": 2.065270185470581, "learning_rate": 7.908003804864791e-05, "loss": 3.1014, "step": 24640 }, { "epoch": 1.6744802282918876, "grad_norm": 2.070124626159668, "learning_rate": 7.907579154776464e-05, "loss": 3.1498, "step": 24645 }, { "epoch": 1.6748199483625492, "grad_norm": 1.880163550376892, "learning_rate": 7.907154504688138e-05, "loss": 2.9305, "step": 24650 }, { "epoch": 1.675159668433211, "grad_norm": 2.519547700881958, "learning_rate": 7.90672985459981e-05, "loss": 3.2324, "step": 24655 }, { "epoch": 1.675499388503873, "grad_norm": 2.0141000747680664, "learning_rate": 7.906305204511483e-05, "loss": 3.0311, "step": 24660 }, { "epoch": 1.6758391085745346, "grad_norm": 2.4212496280670166, "learning_rate": 7.905880554423157e-05, "loss": 3.2919, "step": 24665 }, { "epoch": 1.6761788286451964, "grad_norm": 2.9612245559692383, "learning_rate": 7.905455904334828e-05, "loss": 3.0865, "step": 24670 }, { "epoch": 1.6765185487158583, "grad_norm": 2.282923460006714, "learning_rate": 7.905031254246501e-05, "loss": 3.2385, "step": 24675 }, { "epoch": 1.67685826878652, "grad_norm": 1.807603120803833, "learning_rate": 7.904606604158175e-05, "loss": 2.9936, "step": 24680 }, { "epoch": 1.6771979888571815, "grad_norm": 2.804231643676758, "learning_rate": 7.904181954069847e-05, "loss": 3.3675, "step": 24685 }, { "epoch": 1.6775377089278436, "grad_norm": 2.3472416400909424, "learning_rate": 7.90375730398152e-05, "loss": 3.1258, "step": 24690 }, { "epoch": 1.6778774289985052, "grad_norm": 2.424626111984253, "learning_rate": 7.903332653893193e-05, "loss": 3.2569, "step": 24695 }, { "epoch": 1.6782171490691669, "grad_norm": 2.431288719177246, "learning_rate": 7.902908003804865e-05, "loss": 3.0925, "step": 24700 }, { "epoch": 1.678556869139829, "grad_norm": 2.483682632446289, "learning_rate": 7.902483353716538e-05, "loss": 3.2147, "step": 24705 }, { "epoch": 1.6788965892104906, "grad_norm": 2.0984437465667725, "learning_rate": 7.90205870362821e-05, "loss": 3.1396, "step": 24710 }, { "epoch": 1.6792363092811522, "grad_norm": 2.308192491531372, "learning_rate": 7.901634053539883e-05, "loss": 3.2947, "step": 24715 }, { "epoch": 1.6795760293518143, "grad_norm": 2.3560452461242676, "learning_rate": 7.901209403451556e-05, "loss": 3.1472, "step": 24720 }, { "epoch": 1.679915749422476, "grad_norm": 2.178514242172241, "learning_rate": 7.900784753363229e-05, "loss": 3.0335, "step": 24725 }, { "epoch": 1.6802554694931375, "grad_norm": 2.7326197624206543, "learning_rate": 7.900360103274902e-05, "loss": 3.1924, "step": 24730 }, { "epoch": 1.6805951895637994, "grad_norm": 3.0688869953155518, "learning_rate": 7.899935453186575e-05, "loss": 3.1492, "step": 24735 }, { "epoch": 1.6809349096344612, "grad_norm": 2.088712692260742, "learning_rate": 7.899510803098247e-05, "loss": 3.1008, "step": 24740 }, { "epoch": 1.6812746297051229, "grad_norm": 2.2717535495758057, "learning_rate": 7.89908615300992e-05, "loss": 3.4157, "step": 24745 }, { "epoch": 1.6816143497757847, "grad_norm": 2.48484468460083, "learning_rate": 7.898661502921593e-05, "loss": 3.1401, "step": 24750 }, { "epoch": 1.6819540698464466, "grad_norm": 2.474637746810913, "learning_rate": 7.898236852833266e-05, "loss": 3.0524, "step": 24755 }, { "epoch": 1.6822937899171082, "grad_norm": 2.3691623210906982, "learning_rate": 7.897812202744939e-05, "loss": 2.996, "step": 24760 }, { "epoch": 1.68263350998777, "grad_norm": 2.295539617538452, "learning_rate": 7.897387552656611e-05, "loss": 2.6807, "step": 24765 }, { "epoch": 1.682973230058432, "grad_norm": 2.047369956970215, "learning_rate": 7.896962902568284e-05, "loss": 2.9565, "step": 24770 }, { "epoch": 1.6833129501290935, "grad_norm": 2.226591110229492, "learning_rate": 7.896538252479957e-05, "loss": 3.1462, "step": 24775 }, { "epoch": 1.6836526701997554, "grad_norm": 2.2200310230255127, "learning_rate": 7.89611360239163e-05, "loss": 2.9683, "step": 24780 }, { "epoch": 1.6839923902704172, "grad_norm": 2.516662836074829, "learning_rate": 7.895688952303303e-05, "loss": 3.136, "step": 24785 }, { "epoch": 1.6843321103410789, "grad_norm": 1.822034239768982, "learning_rate": 7.895264302214975e-05, "loss": 3.0046, "step": 24790 }, { "epoch": 1.6846718304117407, "grad_norm": 2.1298294067382812, "learning_rate": 7.894839652126648e-05, "loss": 3.1418, "step": 24795 }, { "epoch": 1.6850115504824026, "grad_norm": 1.930459976196289, "learning_rate": 7.894415002038321e-05, "loss": 2.9932, "step": 24800 }, { "epoch": 1.6853512705530642, "grad_norm": 2.8510024547576904, "learning_rate": 7.893990351949994e-05, "loss": 2.9029, "step": 24805 }, { "epoch": 1.685690990623726, "grad_norm": 3.35207200050354, "learning_rate": 7.893565701861667e-05, "loss": 2.7222, "step": 24810 }, { "epoch": 1.686030710694388, "grad_norm": 2.2029500007629395, "learning_rate": 7.89314105177334e-05, "loss": 3.1146, "step": 24815 }, { "epoch": 1.6863704307650496, "grad_norm": 2.03083872795105, "learning_rate": 7.892716401685012e-05, "loss": 3.0414, "step": 24820 }, { "epoch": 1.6867101508357114, "grad_norm": 2.366328239440918, "learning_rate": 7.892291751596685e-05, "loss": 3.1199, "step": 24825 }, { "epoch": 1.6870498709063733, "grad_norm": 2.2316782474517822, "learning_rate": 7.891867101508358e-05, "loss": 3.3682, "step": 24830 }, { "epoch": 1.687389590977035, "grad_norm": 1.8325347900390625, "learning_rate": 7.89144245142003e-05, "loss": 3.0096, "step": 24835 }, { "epoch": 1.6877293110476967, "grad_norm": 2.122591733932495, "learning_rate": 7.891017801331702e-05, "loss": 3.1991, "step": 24840 }, { "epoch": 1.6880690311183586, "grad_norm": 2.4152474403381348, "learning_rate": 7.890593151243376e-05, "loss": 3.2079, "step": 24845 }, { "epoch": 1.6884087511890202, "grad_norm": 2.439443588256836, "learning_rate": 7.890168501155049e-05, "loss": 2.9717, "step": 24850 }, { "epoch": 1.6887484712596819, "grad_norm": 2.3476035594940186, "learning_rate": 7.88974385106672e-05, "loss": 3.0453, "step": 24855 }, { "epoch": 1.689088191330344, "grad_norm": 2.189002275466919, "learning_rate": 7.889319200978395e-05, "loss": 3.2766, "step": 24860 }, { "epoch": 1.6894279114010056, "grad_norm": 2.0040104389190674, "learning_rate": 7.888894550890067e-05, "loss": 3.0079, "step": 24865 }, { "epoch": 1.6897676314716672, "grad_norm": 2.2623462677001953, "learning_rate": 7.888469900801739e-05, "loss": 2.9484, "step": 24870 }, { "epoch": 1.6901073515423293, "grad_norm": 1.7284208536148071, "learning_rate": 7.888045250713413e-05, "loss": 3.1839, "step": 24875 }, { "epoch": 1.690447071612991, "grad_norm": 1.7806012630462646, "learning_rate": 7.887620600625086e-05, "loss": 2.9503, "step": 24880 }, { "epoch": 1.6907867916836525, "grad_norm": 2.289440155029297, "learning_rate": 7.887195950536757e-05, "loss": 2.9333, "step": 24885 }, { "epoch": 1.6911265117543146, "grad_norm": 2.4635350704193115, "learning_rate": 7.886771300448431e-05, "loss": 3.1075, "step": 24890 }, { "epoch": 1.6914662318249762, "grad_norm": 3.1606218814849854, "learning_rate": 7.886346650360104e-05, "loss": 3.1179, "step": 24895 }, { "epoch": 1.6918059518956379, "grad_norm": 2.1307435035705566, "learning_rate": 7.885922000271776e-05, "loss": 2.9397, "step": 24900 }, { "epoch": 1.6921456719662997, "grad_norm": 2.137542724609375, "learning_rate": 7.88549735018345e-05, "loss": 2.9801, "step": 24905 }, { "epoch": 1.6924853920369616, "grad_norm": 2.3366780281066895, "learning_rate": 7.885072700095121e-05, "loss": 3.2255, "step": 24910 }, { "epoch": 1.6928251121076232, "grad_norm": 2.563669204711914, "learning_rate": 7.884648050006794e-05, "loss": 3.1042, "step": 24915 }, { "epoch": 1.693164832178285, "grad_norm": 2.2320773601531982, "learning_rate": 7.884223399918468e-05, "loss": 3.1274, "step": 24920 }, { "epoch": 1.693504552248947, "grad_norm": 2.347332715988159, "learning_rate": 7.88379874983014e-05, "loss": 3.472, "step": 24925 }, { "epoch": 1.6938442723196085, "grad_norm": 2.1576037406921387, "learning_rate": 7.883374099741812e-05, "loss": 2.8108, "step": 24930 }, { "epoch": 1.6941839923902704, "grad_norm": 1.8269600868225098, "learning_rate": 7.882949449653487e-05, "loss": 3.3657, "step": 24935 }, { "epoch": 1.6945237124609323, "grad_norm": 2.4393627643585205, "learning_rate": 7.882524799565158e-05, "loss": 2.9657, "step": 24940 }, { "epoch": 1.6948634325315939, "grad_norm": 2.201742172241211, "learning_rate": 7.882100149476831e-05, "loss": 3.138, "step": 24945 }, { "epoch": 1.6952031526022557, "grad_norm": 3.0711684226989746, "learning_rate": 7.881675499388505e-05, "loss": 3.2137, "step": 24950 }, { "epoch": 1.6955428726729176, "grad_norm": 3.1136128902435303, "learning_rate": 7.881250849300176e-05, "loss": 3.1509, "step": 24955 }, { "epoch": 1.6958825927435792, "grad_norm": 2.2385661602020264, "learning_rate": 7.880826199211849e-05, "loss": 2.842, "step": 24960 }, { "epoch": 1.696222312814241, "grad_norm": 2.1298553943634033, "learning_rate": 7.880401549123523e-05, "loss": 2.9195, "step": 24965 }, { "epoch": 1.696562032884903, "grad_norm": 2.046191930770874, "learning_rate": 7.879976899035195e-05, "loss": 3.1673, "step": 24970 }, { "epoch": 1.6969017529555646, "grad_norm": 2.3005893230438232, "learning_rate": 7.879552248946868e-05, "loss": 3.0924, "step": 24975 }, { "epoch": 1.6972414730262264, "grad_norm": 2.494781970977783, "learning_rate": 7.879127598858542e-05, "loss": 3.0621, "step": 24980 }, { "epoch": 1.6975811930968883, "grad_norm": 2.7863471508026123, "learning_rate": 7.878702948770213e-05, "loss": 2.8173, "step": 24985 }, { "epoch": 1.69792091316755, "grad_norm": 2.7463161945343018, "learning_rate": 7.878278298681887e-05, "loss": 3.1708, "step": 24990 }, { "epoch": 1.6982606332382117, "grad_norm": 2.618462085723877, "learning_rate": 7.877853648593559e-05, "loss": 3.2949, "step": 24995 }, { "epoch": 1.6986003533088736, "grad_norm": 2.2135725021362305, "learning_rate": 7.877428998505232e-05, "loss": 3.2235, "step": 25000 }, { "epoch": 1.6989400733795352, "grad_norm": 2.063904285430908, "learning_rate": 7.877004348416906e-05, "loss": 3.1055, "step": 25005 }, { "epoch": 1.699279793450197, "grad_norm": 2.629775285720825, "learning_rate": 7.876579698328577e-05, "loss": 3.2044, "step": 25010 }, { "epoch": 1.699619513520859, "grad_norm": 2.3128819465637207, "learning_rate": 7.87615504824025e-05, "loss": 2.8105, "step": 25015 }, { "epoch": 1.6999592335915206, "grad_norm": 3.0335042476654053, "learning_rate": 7.875730398151924e-05, "loss": 3.3407, "step": 25020 }, { "epoch": 1.7002989536621822, "grad_norm": 2.6317718029022217, "learning_rate": 7.875305748063596e-05, "loss": 3.1412, "step": 25025 }, { "epoch": 1.7006386737328443, "grad_norm": 2.525820016860962, "learning_rate": 7.874881097975268e-05, "loss": 3.1057, "step": 25030 }, { "epoch": 1.700978393803506, "grad_norm": 2.111293315887451, "learning_rate": 7.874456447886943e-05, "loss": 2.7405, "step": 25035 }, { "epoch": 1.7013181138741675, "grad_norm": 2.464423894882202, "learning_rate": 7.874031797798614e-05, "loss": 3.2521, "step": 25040 }, { "epoch": 1.7016578339448296, "grad_norm": 2.371596574783325, "learning_rate": 7.873607147710287e-05, "loss": 3.1241, "step": 25045 }, { "epoch": 1.7019975540154912, "grad_norm": 2.385103225708008, "learning_rate": 7.873182497621961e-05, "loss": 2.9815, "step": 25050 }, { "epoch": 1.7023372740861529, "grad_norm": 3.0190956592559814, "learning_rate": 7.872757847533632e-05, "loss": 3.1643, "step": 25055 }, { "epoch": 1.702676994156815, "grad_norm": 2.6503663063049316, "learning_rate": 7.872333197445305e-05, "loss": 3.0229, "step": 25060 }, { "epoch": 1.7030167142274766, "grad_norm": 1.7168222665786743, "learning_rate": 7.871908547356978e-05, "loss": 2.7766, "step": 25065 }, { "epoch": 1.7033564342981382, "grad_norm": 2.047663688659668, "learning_rate": 7.871483897268651e-05, "loss": 3.2423, "step": 25070 }, { "epoch": 1.7036961543688, "grad_norm": 1.9442451000213623, "learning_rate": 7.871059247180324e-05, "loss": 2.8593, "step": 25075 }, { "epoch": 1.704035874439462, "grad_norm": 1.8877469301223755, "learning_rate": 7.870634597091996e-05, "loss": 3.0722, "step": 25080 }, { "epoch": 1.7043755945101235, "grad_norm": 1.9783966541290283, "learning_rate": 7.870209947003669e-05, "loss": 3.2589, "step": 25085 }, { "epoch": 1.7047153145807854, "grad_norm": 1.9782874584197998, "learning_rate": 7.869785296915342e-05, "loss": 3.1981, "step": 25090 }, { "epoch": 1.7050550346514473, "grad_norm": 2.2946243286132812, "learning_rate": 7.869360646827015e-05, "loss": 3.2368, "step": 25095 }, { "epoch": 1.7053947547221089, "grad_norm": 1.812559962272644, "learning_rate": 7.868935996738688e-05, "loss": 3.1283, "step": 25100 }, { "epoch": 1.7057344747927707, "grad_norm": 2.49066424369812, "learning_rate": 7.86851134665036e-05, "loss": 3.245, "step": 25105 }, { "epoch": 1.7060741948634326, "grad_norm": 1.5750577449798584, "learning_rate": 7.868086696562033e-05, "loss": 3.0493, "step": 25110 }, { "epoch": 1.7064139149340942, "grad_norm": 2.0973849296569824, "learning_rate": 7.867662046473706e-05, "loss": 3.0417, "step": 25115 }, { "epoch": 1.706753635004756, "grad_norm": 2.588610887527466, "learning_rate": 7.867237396385379e-05, "loss": 2.9006, "step": 25120 }, { "epoch": 1.707093355075418, "grad_norm": 3.340257167816162, "learning_rate": 7.866812746297052e-05, "loss": 3.1596, "step": 25125 }, { "epoch": 1.7074330751460796, "grad_norm": 2.3328568935394287, "learning_rate": 7.866388096208724e-05, "loss": 3.3802, "step": 25130 }, { "epoch": 1.7077727952167414, "grad_norm": 2.218491554260254, "learning_rate": 7.865963446120397e-05, "loss": 3.0675, "step": 25135 }, { "epoch": 1.7081125152874033, "grad_norm": 2.6592538356781006, "learning_rate": 7.86553879603207e-05, "loss": 3.2742, "step": 25140 }, { "epoch": 1.708452235358065, "grad_norm": 4.278927326202393, "learning_rate": 7.865114145943743e-05, "loss": 3.0939, "step": 25145 }, { "epoch": 1.7087919554287267, "grad_norm": 2.599681854248047, "learning_rate": 7.864689495855416e-05, "loss": 3.3881, "step": 25150 }, { "epoch": 1.7091316754993886, "grad_norm": 2.197549819946289, "learning_rate": 7.864264845767088e-05, "loss": 3.3022, "step": 25155 }, { "epoch": 1.7094713955700502, "grad_norm": 3.443685293197632, "learning_rate": 7.863840195678761e-05, "loss": 3.1818, "step": 25160 }, { "epoch": 1.709811115640712, "grad_norm": 2.29880952835083, "learning_rate": 7.863415545590434e-05, "loss": 3.0577, "step": 25165 }, { "epoch": 1.710150835711374, "grad_norm": 2.340808629989624, "learning_rate": 7.862990895502107e-05, "loss": 3.1472, "step": 25170 }, { "epoch": 1.7104905557820356, "grad_norm": 2.5195224285125732, "learning_rate": 7.86256624541378e-05, "loss": 3.3274, "step": 25175 }, { "epoch": 1.7108302758526974, "grad_norm": 2.734041213989258, "learning_rate": 7.862141595325452e-05, "loss": 3.1557, "step": 25180 }, { "epoch": 1.7111699959233593, "grad_norm": 2.144071102142334, "learning_rate": 7.861716945237125e-05, "loss": 3.2288, "step": 25185 }, { "epoch": 1.711509715994021, "grad_norm": 2.909952402114868, "learning_rate": 7.861292295148798e-05, "loss": 3.1344, "step": 25190 }, { "epoch": 1.7118494360646825, "grad_norm": 2.5179455280303955, "learning_rate": 7.86086764506047e-05, "loss": 3.2485, "step": 25195 }, { "epoch": 1.7121891561353446, "grad_norm": 2.1903719902038574, "learning_rate": 7.860442994972144e-05, "loss": 3.0548, "step": 25200 }, { "epoch": 1.7125288762060062, "grad_norm": 2.6767728328704834, "learning_rate": 7.860018344883816e-05, "loss": 2.9945, "step": 25205 }, { "epoch": 1.7128685962766679, "grad_norm": 2.5417134761810303, "learning_rate": 7.859593694795488e-05, "loss": 3.2021, "step": 25210 }, { "epoch": 1.71320831634733, "grad_norm": 2.167224407196045, "learning_rate": 7.859169044707162e-05, "loss": 3.0823, "step": 25215 }, { "epoch": 1.7135480364179916, "grad_norm": 2.468609094619751, "learning_rate": 7.858744394618835e-05, "loss": 3.2126, "step": 25220 }, { "epoch": 1.7138877564886532, "grad_norm": 2.2001941204071045, "learning_rate": 7.858319744530506e-05, "loss": 3.1315, "step": 25225 }, { "epoch": 1.7142274765593153, "grad_norm": 2.2086355686187744, "learning_rate": 7.85789509444218e-05, "loss": 2.8626, "step": 25230 }, { "epoch": 1.714567196629977, "grad_norm": 2.1467504501342773, "learning_rate": 7.857470444353853e-05, "loss": 2.9929, "step": 25235 }, { "epoch": 1.7149069167006386, "grad_norm": 3.0928773880004883, "learning_rate": 7.857045794265525e-05, "loss": 3.5851, "step": 25240 }, { "epoch": 1.7152466367713004, "grad_norm": 1.8033757209777832, "learning_rate": 7.856621144177199e-05, "loss": 2.961, "step": 25245 }, { "epoch": 1.7155863568419623, "grad_norm": 2.1423511505126953, "learning_rate": 7.856196494088872e-05, "loss": 3.2192, "step": 25250 }, { "epoch": 1.7159260769126239, "grad_norm": 2.400533437728882, "learning_rate": 7.855771844000543e-05, "loss": 3.1091, "step": 25255 }, { "epoch": 1.7162657969832857, "grad_norm": 2.78483510017395, "learning_rate": 7.855347193912217e-05, "loss": 3.0231, "step": 25260 }, { "epoch": 1.7166055170539476, "grad_norm": 2.4451379776000977, "learning_rate": 7.854922543823889e-05, "loss": 3.2031, "step": 25265 }, { "epoch": 1.7169452371246092, "grad_norm": 2.1280910968780518, "learning_rate": 7.854497893735562e-05, "loss": 3.0764, "step": 25270 }, { "epoch": 1.717284957195271, "grad_norm": 1.881915807723999, "learning_rate": 7.854073243647236e-05, "loss": 3.1337, "step": 25275 }, { "epoch": 1.717624677265933, "grad_norm": 2.631406307220459, "learning_rate": 7.853648593558907e-05, "loss": 3.2711, "step": 25280 }, { "epoch": 1.7179643973365946, "grad_norm": 1.9780089855194092, "learning_rate": 7.85322394347058e-05, "loss": 3.3296, "step": 25285 }, { "epoch": 1.7183041174072564, "grad_norm": 2.187490701675415, "learning_rate": 7.852799293382254e-05, "loss": 3.1905, "step": 25290 }, { "epoch": 1.7186438374779183, "grad_norm": 2.5066287517547607, "learning_rate": 7.852374643293926e-05, "loss": 2.8036, "step": 25295 }, { "epoch": 1.71898355754858, "grad_norm": 2.1440110206604004, "learning_rate": 7.851949993205598e-05, "loss": 3.1176, "step": 25300 }, { "epoch": 1.7193232776192418, "grad_norm": 2.3380978107452393, "learning_rate": 7.851525343117272e-05, "loss": 2.9822, "step": 25305 }, { "epoch": 1.7196629976899036, "grad_norm": 2.5016205310821533, "learning_rate": 7.851100693028944e-05, "loss": 3.3066, "step": 25310 }, { "epoch": 1.7200027177605652, "grad_norm": 2.9663121700286865, "learning_rate": 7.850676042940617e-05, "loss": 3.2069, "step": 25315 }, { "epoch": 1.720342437831227, "grad_norm": 2.1484975814819336, "learning_rate": 7.850251392852291e-05, "loss": 3.2186, "step": 25320 }, { "epoch": 1.720682157901889, "grad_norm": 2.3737523555755615, "learning_rate": 7.849826742763962e-05, "loss": 3.1142, "step": 25325 }, { "epoch": 1.7210218779725506, "grad_norm": 1.9842629432678223, "learning_rate": 7.849402092675636e-05, "loss": 3.2632, "step": 25330 }, { "epoch": 1.7213615980432124, "grad_norm": 2.374236822128296, "learning_rate": 7.848977442587308e-05, "loss": 2.9444, "step": 25335 }, { "epoch": 1.7217013181138743, "grad_norm": 1.9363150596618652, "learning_rate": 7.848552792498981e-05, "loss": 3.2022, "step": 25340 }, { "epoch": 1.722041038184536, "grad_norm": 2.257291316986084, "learning_rate": 7.848128142410655e-05, "loss": 2.9785, "step": 25345 }, { "epoch": 1.7223807582551978, "grad_norm": 2.0249829292297363, "learning_rate": 7.847703492322326e-05, "loss": 2.8525, "step": 25350 }, { "epoch": 1.7227204783258596, "grad_norm": 2.551863193511963, "learning_rate": 7.847278842233999e-05, "loss": 2.9705, "step": 25355 }, { "epoch": 1.7230601983965212, "grad_norm": 2.3288865089416504, "learning_rate": 7.846854192145673e-05, "loss": 2.9433, "step": 25360 }, { "epoch": 1.7233999184671829, "grad_norm": 2.0250039100646973, "learning_rate": 7.846429542057345e-05, "loss": 3.0272, "step": 25365 }, { "epoch": 1.723739638537845, "grad_norm": 2.227827310562134, "learning_rate": 7.846004891969018e-05, "loss": 3.3118, "step": 25370 }, { "epoch": 1.7240793586085066, "grad_norm": 2.1122922897338867, "learning_rate": 7.845580241880692e-05, "loss": 3.0647, "step": 25375 }, { "epoch": 1.7244190786791682, "grad_norm": 2.4064412117004395, "learning_rate": 7.845155591792363e-05, "loss": 3.0551, "step": 25380 }, { "epoch": 1.7247587987498303, "grad_norm": 2.554306745529175, "learning_rate": 7.844730941704036e-05, "loss": 3.2133, "step": 25385 }, { "epoch": 1.725098518820492, "grad_norm": 2.163743257522583, "learning_rate": 7.84430629161571e-05, "loss": 3.1168, "step": 25390 }, { "epoch": 1.7254382388911536, "grad_norm": 2.8445305824279785, "learning_rate": 7.843881641527382e-05, "loss": 3.149, "step": 25395 }, { "epoch": 1.7257779589618156, "grad_norm": 2.410911798477173, "learning_rate": 7.843456991439054e-05, "loss": 2.9868, "step": 25400 }, { "epoch": 1.7261176790324773, "grad_norm": 2.3865110874176025, "learning_rate": 7.843032341350728e-05, "loss": 3.0487, "step": 25405 }, { "epoch": 1.726457399103139, "grad_norm": 2.4551093578338623, "learning_rate": 7.8426076912624e-05, "loss": 3.136, "step": 25410 }, { "epoch": 1.7267971191738007, "grad_norm": 1.8952611684799194, "learning_rate": 7.842183041174073e-05, "loss": 3.2134, "step": 25415 }, { "epoch": 1.7271368392444626, "grad_norm": 2.1133100986480713, "learning_rate": 7.841758391085746e-05, "loss": 3.3401, "step": 25420 }, { "epoch": 1.7274765593151242, "grad_norm": 2.349020004272461, "learning_rate": 7.841333740997418e-05, "loss": 3.1158, "step": 25425 }, { "epoch": 1.727816279385786, "grad_norm": 2.743114471435547, "learning_rate": 7.840909090909091e-05, "loss": 2.9839, "step": 25430 }, { "epoch": 1.728155999456448, "grad_norm": 2.694377899169922, "learning_rate": 7.840484440820764e-05, "loss": 3.0227, "step": 25435 }, { "epoch": 1.7284957195271096, "grad_norm": 1.909999132156372, "learning_rate": 7.840059790732437e-05, "loss": 3.2236, "step": 25440 }, { "epoch": 1.7288354395977714, "grad_norm": 1.9602288007736206, "learning_rate": 7.83963514064411e-05, "loss": 3.1057, "step": 25445 }, { "epoch": 1.7291751596684333, "grad_norm": 2.832615852355957, "learning_rate": 7.839210490555782e-05, "loss": 3.1816, "step": 25450 }, { "epoch": 1.729514879739095, "grad_norm": 2.720790386199951, "learning_rate": 7.838785840467455e-05, "loss": 3.2551, "step": 25455 }, { "epoch": 1.7298545998097568, "grad_norm": 2.223598003387451, "learning_rate": 7.838361190379128e-05, "loss": 3.3529, "step": 25460 }, { "epoch": 1.7301943198804186, "grad_norm": 3.2756645679473877, "learning_rate": 7.837936540290801e-05, "loss": 3.1987, "step": 25465 }, { "epoch": 1.7305340399510802, "grad_norm": 2.441863536834717, "learning_rate": 7.837511890202474e-05, "loss": 2.9144, "step": 25470 }, { "epoch": 1.730873760021742, "grad_norm": 2.0835304260253906, "learning_rate": 7.837087240114146e-05, "loss": 3.0935, "step": 25475 }, { "epoch": 1.731213480092404, "grad_norm": 2.5149943828582764, "learning_rate": 7.836662590025819e-05, "loss": 2.959, "step": 25480 }, { "epoch": 1.7315532001630656, "grad_norm": 3.031153440475464, "learning_rate": 7.836237939937492e-05, "loss": 3.021, "step": 25485 }, { "epoch": 1.7318929202337274, "grad_norm": 2.5758750438690186, "learning_rate": 7.835813289849165e-05, "loss": 3.0913, "step": 25490 }, { "epoch": 1.7322326403043893, "grad_norm": 2.674776315689087, "learning_rate": 7.835388639760838e-05, "loss": 2.9513, "step": 25495 }, { "epoch": 1.732572360375051, "grad_norm": 3.215749740600586, "learning_rate": 7.83496398967251e-05, "loss": 3.1589, "step": 25500 }, { "epoch": 1.7329120804457128, "grad_norm": 2.7835681438446045, "learning_rate": 7.834539339584183e-05, "loss": 3.1765, "step": 25505 }, { "epoch": 1.7332518005163746, "grad_norm": 2.467149496078491, "learning_rate": 7.834114689495856e-05, "loss": 3.4603, "step": 25510 }, { "epoch": 1.7335915205870362, "grad_norm": 2.130449056625366, "learning_rate": 7.833690039407529e-05, "loss": 2.9757, "step": 25515 }, { "epoch": 1.733931240657698, "grad_norm": 2.1739137172698975, "learning_rate": 7.833265389319202e-05, "loss": 3.1593, "step": 25520 }, { "epoch": 1.73427096072836, "grad_norm": 2.3880116939544678, "learning_rate": 7.832840739230874e-05, "loss": 2.84, "step": 25525 }, { "epoch": 1.7346106807990216, "grad_norm": 2.0763261318206787, "learning_rate": 7.832416089142547e-05, "loss": 2.9699, "step": 25530 }, { "epoch": 1.7349504008696832, "grad_norm": 1.9876128435134888, "learning_rate": 7.831991439054219e-05, "loss": 3.0225, "step": 25535 }, { "epoch": 1.7352901209403453, "grad_norm": 2.4172310829162598, "learning_rate": 7.831566788965893e-05, "loss": 3.0805, "step": 25540 }, { "epoch": 1.735629841011007, "grad_norm": 2.723555088043213, "learning_rate": 7.831142138877566e-05, "loss": 3.0567, "step": 25545 }, { "epoch": 1.7359695610816686, "grad_norm": 2.918781042098999, "learning_rate": 7.830717488789237e-05, "loss": 3.2265, "step": 25550 }, { "epoch": 1.7363092811523306, "grad_norm": 2.2047557830810547, "learning_rate": 7.830292838700911e-05, "loss": 3.1763, "step": 25555 }, { "epoch": 1.7366490012229923, "grad_norm": 1.8321616649627686, "learning_rate": 7.829868188612584e-05, "loss": 3.1604, "step": 25560 }, { "epoch": 1.736988721293654, "grad_norm": 2.4316389560699463, "learning_rate": 7.829443538524255e-05, "loss": 3.0741, "step": 25565 }, { "epoch": 1.737328441364316, "grad_norm": 2.8348770141601562, "learning_rate": 7.82901888843593e-05, "loss": 2.9098, "step": 25570 }, { "epoch": 1.7376681614349776, "grad_norm": 2.218296766281128, "learning_rate": 7.828594238347602e-05, "loss": 3.0445, "step": 25575 }, { "epoch": 1.7380078815056392, "grad_norm": 2.6599409580230713, "learning_rate": 7.828169588259274e-05, "loss": 3.2038, "step": 25580 }, { "epoch": 1.738347601576301, "grad_norm": 2.374025583267212, "learning_rate": 7.827744938170948e-05, "loss": 2.9121, "step": 25585 }, { "epoch": 1.738687321646963, "grad_norm": 2.7980189323425293, "learning_rate": 7.827320288082621e-05, "loss": 3.2768, "step": 25590 }, { "epoch": 1.7390270417176246, "grad_norm": 2.3380215167999268, "learning_rate": 7.826895637994292e-05, "loss": 3.2385, "step": 25595 }, { "epoch": 1.7393667617882864, "grad_norm": 2.4923715591430664, "learning_rate": 7.826470987905966e-05, "loss": 3.2919, "step": 25600 }, { "epoch": 1.7397064818589483, "grad_norm": 2.415642023086548, "learning_rate": 7.826046337817639e-05, "loss": 3.2136, "step": 25605 }, { "epoch": 1.74004620192961, "grad_norm": 2.6573903560638428, "learning_rate": 7.82562168772931e-05, "loss": 3.1695, "step": 25610 }, { "epoch": 1.7403859220002718, "grad_norm": 2.8470118045806885, "learning_rate": 7.825197037640985e-05, "loss": 3.3296, "step": 25615 }, { "epoch": 1.7407256420709336, "grad_norm": 2.559333324432373, "learning_rate": 7.824772387552656e-05, "loss": 3.0094, "step": 25620 }, { "epoch": 1.7410653621415952, "grad_norm": 2.1470296382904053, "learning_rate": 7.824347737464329e-05, "loss": 3.1524, "step": 25625 }, { "epoch": 1.741405082212257, "grad_norm": 3.2497901916503906, "learning_rate": 7.823923087376003e-05, "loss": 3.2377, "step": 25630 }, { "epoch": 1.741744802282919, "grad_norm": 2.3494489192962646, "learning_rate": 7.823498437287675e-05, "loss": 3.1284, "step": 25635 }, { "epoch": 1.7420845223535806, "grad_norm": 2.011596202850342, "learning_rate": 7.823073787199347e-05, "loss": 3.0346, "step": 25640 }, { "epoch": 1.7424242424242424, "grad_norm": 2.5411458015441895, "learning_rate": 7.822649137111022e-05, "loss": 2.9019, "step": 25645 }, { "epoch": 1.7427639624949043, "grad_norm": 2.7102127075195312, "learning_rate": 7.822224487022693e-05, "loss": 2.9433, "step": 25650 }, { "epoch": 1.743103682565566, "grad_norm": 2.163911819458008, "learning_rate": 7.821799836934366e-05, "loss": 3.1544, "step": 25655 }, { "epoch": 1.7434434026362278, "grad_norm": 2.2884256839752197, "learning_rate": 7.82137518684604e-05, "loss": 3.0144, "step": 25660 }, { "epoch": 1.7437831227068896, "grad_norm": 2.846609115600586, "learning_rate": 7.820950536757711e-05, "loss": 3.1131, "step": 25665 }, { "epoch": 1.7441228427775513, "grad_norm": 2.130465030670166, "learning_rate": 7.820525886669386e-05, "loss": 3.4687, "step": 25670 }, { "epoch": 1.744462562848213, "grad_norm": 2.3080596923828125, "learning_rate": 7.820101236581058e-05, "loss": 2.9235, "step": 25675 }, { "epoch": 1.744802282918875, "grad_norm": 2.0096237659454346, "learning_rate": 7.81967658649273e-05, "loss": 3.2381, "step": 25680 }, { "epoch": 1.7451420029895366, "grad_norm": 2.4197561740875244, "learning_rate": 7.819251936404404e-05, "loss": 2.957, "step": 25685 }, { "epoch": 1.7454817230601984, "grad_norm": 2.6553540229797363, "learning_rate": 7.818827286316075e-05, "loss": 3.1148, "step": 25690 }, { "epoch": 1.7458214431308603, "grad_norm": 2.2102138996124268, "learning_rate": 7.818402636227748e-05, "loss": 3.1368, "step": 25695 }, { "epoch": 1.746161163201522, "grad_norm": 2.3347318172454834, "learning_rate": 7.817977986139422e-05, "loss": 3.0237, "step": 25700 }, { "epoch": 1.7465008832721836, "grad_norm": 3.1708321571350098, "learning_rate": 7.817553336051094e-05, "loss": 3.3016, "step": 25705 }, { "epoch": 1.7468406033428456, "grad_norm": 2.9401843547821045, "learning_rate": 7.817128685962767e-05, "loss": 2.9047, "step": 25710 }, { "epoch": 1.7471803234135073, "grad_norm": 3.2280285358428955, "learning_rate": 7.816704035874441e-05, "loss": 3.0238, "step": 25715 }, { "epoch": 1.747520043484169, "grad_norm": 2.687034845352173, "learning_rate": 7.816279385786112e-05, "loss": 3.1996, "step": 25720 }, { "epoch": 1.747859763554831, "grad_norm": 2.196671962738037, "learning_rate": 7.815854735697785e-05, "loss": 3.0269, "step": 25725 }, { "epoch": 1.7481994836254926, "grad_norm": 2.207371950149536, "learning_rate": 7.815430085609459e-05, "loss": 3.1821, "step": 25730 }, { "epoch": 1.7485392036961542, "grad_norm": 2.0818793773651123, "learning_rate": 7.81500543552113e-05, "loss": 2.8197, "step": 25735 }, { "epoch": 1.7488789237668163, "grad_norm": 2.1139891147613525, "learning_rate": 7.814580785432803e-05, "loss": 3.1511, "step": 25740 }, { "epoch": 1.749218643837478, "grad_norm": 1.7968618869781494, "learning_rate": 7.814156135344478e-05, "loss": 2.8643, "step": 25745 }, { "epoch": 1.7495583639081396, "grad_norm": 2.4299352169036865, "learning_rate": 7.813731485256149e-05, "loss": 3.2556, "step": 25750 }, { "epoch": 1.7498980839788014, "grad_norm": 3.1609182357788086, "learning_rate": 7.813306835167822e-05, "loss": 3.156, "step": 25755 }, { "epoch": 1.7502378040494633, "grad_norm": 3.6961045265197754, "learning_rate": 7.812882185079496e-05, "loss": 3.0597, "step": 25760 }, { "epoch": 1.750577524120125, "grad_norm": 2.6010992527008057, "learning_rate": 7.812457534991167e-05, "loss": 2.959, "step": 25765 }, { "epoch": 1.7509172441907868, "grad_norm": 1.5992677211761475, "learning_rate": 7.81203288490284e-05, "loss": 2.9638, "step": 25770 }, { "epoch": 1.7512569642614486, "grad_norm": 2.102860689163208, "learning_rate": 7.811608234814513e-05, "loss": 3.1267, "step": 25775 }, { "epoch": 1.7515966843321102, "grad_norm": 2.1993508338928223, "learning_rate": 7.811183584726186e-05, "loss": 3.1713, "step": 25780 }, { "epoch": 1.751936404402772, "grad_norm": 2.727658271789551, "learning_rate": 7.810758934637859e-05, "loss": 2.9792, "step": 25785 }, { "epoch": 1.752276124473434, "grad_norm": 2.192072868347168, "learning_rate": 7.810334284549531e-05, "loss": 3.3353, "step": 25790 }, { "epoch": 1.7526158445440956, "grad_norm": 1.8997693061828613, "learning_rate": 7.809909634461204e-05, "loss": 3.1163, "step": 25795 }, { "epoch": 1.7529555646147574, "grad_norm": 2.457106351852417, "learning_rate": 7.809484984372877e-05, "loss": 3.1479, "step": 25800 }, { "epoch": 1.7532952846854193, "grad_norm": 2.4914886951446533, "learning_rate": 7.80906033428455e-05, "loss": 3.0802, "step": 25805 }, { "epoch": 1.753635004756081, "grad_norm": 2.1077258586883545, "learning_rate": 7.808635684196223e-05, "loss": 3.3337, "step": 25810 }, { "epoch": 1.7539747248267428, "grad_norm": 2.5423974990844727, "learning_rate": 7.808211034107895e-05, "loss": 3.3362, "step": 25815 }, { "epoch": 1.7543144448974046, "grad_norm": 2.8756706714630127, "learning_rate": 7.807786384019568e-05, "loss": 3.3001, "step": 25820 }, { "epoch": 1.7546541649680663, "grad_norm": 3.03214955329895, "learning_rate": 7.807361733931241e-05, "loss": 3.0538, "step": 25825 }, { "epoch": 1.754993885038728, "grad_norm": 2.8291306495666504, "learning_rate": 7.806937083842914e-05, "loss": 3.1952, "step": 25830 }, { "epoch": 1.75533360510939, "grad_norm": 2.371199131011963, "learning_rate": 7.806512433754587e-05, "loss": 3.1702, "step": 25835 }, { "epoch": 1.7556733251800516, "grad_norm": 2.005872964859009, "learning_rate": 7.80608778366626e-05, "loss": 3.1738, "step": 25840 }, { "epoch": 1.7560130452507134, "grad_norm": 1.99236261844635, "learning_rate": 7.805663133577932e-05, "loss": 3.2003, "step": 25845 }, { "epoch": 1.7563527653213753, "grad_norm": 2.477273464202881, "learning_rate": 7.805238483489605e-05, "loss": 3.0884, "step": 25850 }, { "epoch": 1.756692485392037, "grad_norm": 2.1286377906799316, "learning_rate": 7.804813833401278e-05, "loss": 3.1821, "step": 25855 }, { "epoch": 1.7570322054626988, "grad_norm": 2.795013904571533, "learning_rate": 7.80438918331295e-05, "loss": 3.0251, "step": 25860 }, { "epoch": 1.7573719255333606, "grad_norm": 2.6424858570098877, "learning_rate": 7.803964533224623e-05, "loss": 3.1338, "step": 25865 }, { "epoch": 1.7577116456040223, "grad_norm": 1.9454326629638672, "learning_rate": 7.803539883136296e-05, "loss": 2.9116, "step": 25870 }, { "epoch": 1.758051365674684, "grad_norm": 2.369999408721924, "learning_rate": 7.803115233047969e-05, "loss": 2.9475, "step": 25875 }, { "epoch": 1.758391085745346, "grad_norm": 3.0911989212036133, "learning_rate": 7.802690582959642e-05, "loss": 2.9575, "step": 25880 }, { "epoch": 1.7587308058160076, "grad_norm": 1.8563637733459473, "learning_rate": 7.802265932871315e-05, "loss": 3.1829, "step": 25885 }, { "epoch": 1.7590705258866692, "grad_norm": 2.1008291244506836, "learning_rate": 7.801841282782986e-05, "loss": 2.917, "step": 25890 }, { "epoch": 1.7594102459573313, "grad_norm": 2.044984817504883, "learning_rate": 7.80141663269466e-05, "loss": 3.2058, "step": 25895 }, { "epoch": 1.759749966027993, "grad_norm": 2.236020803451538, "learning_rate": 7.800991982606333e-05, "loss": 2.7915, "step": 25900 }, { "epoch": 1.7600896860986546, "grad_norm": 2.736248254776001, "learning_rate": 7.800567332518005e-05, "loss": 3.0091, "step": 25905 }, { "epoch": 1.7604294061693166, "grad_norm": 2.521221160888672, "learning_rate": 7.800142682429679e-05, "loss": 3.1945, "step": 25910 }, { "epoch": 1.7607691262399783, "grad_norm": 1.9984818696975708, "learning_rate": 7.799718032341351e-05, "loss": 3.1822, "step": 25915 }, { "epoch": 1.76110884631064, "grad_norm": 2.2769992351531982, "learning_rate": 7.799293382253023e-05, "loss": 2.9766, "step": 25920 }, { "epoch": 1.7614485663813018, "grad_norm": 2.2961416244506836, "learning_rate": 7.798868732164697e-05, "loss": 3.0604, "step": 25925 }, { "epoch": 1.7617882864519636, "grad_norm": 2.2242467403411865, "learning_rate": 7.79844408207637e-05, "loss": 3.0973, "step": 25930 }, { "epoch": 1.7621280065226252, "grad_norm": 2.818603754043579, "learning_rate": 7.798019431988041e-05, "loss": 3.1937, "step": 25935 }, { "epoch": 1.762467726593287, "grad_norm": 2.251955032348633, "learning_rate": 7.797594781899715e-05, "loss": 2.9853, "step": 25940 }, { "epoch": 1.762807446663949, "grad_norm": 2.0808801651000977, "learning_rate": 7.797170131811388e-05, "loss": 3.5087, "step": 25945 }, { "epoch": 1.7631471667346106, "grad_norm": 3.019653558731079, "learning_rate": 7.79674548172306e-05, "loss": 3.0619, "step": 25950 }, { "epoch": 1.7634868868052724, "grad_norm": 2.4149889945983887, "learning_rate": 7.796320831634734e-05, "loss": 3.1969, "step": 25955 }, { "epoch": 1.7638266068759343, "grad_norm": 2.329658031463623, "learning_rate": 7.795896181546407e-05, "loss": 3.2909, "step": 25960 }, { "epoch": 1.764166326946596, "grad_norm": 2.6557304859161377, "learning_rate": 7.795471531458078e-05, "loss": 3.0474, "step": 25965 }, { "epoch": 1.7645060470172578, "grad_norm": 2.2977402210235596, "learning_rate": 7.795046881369752e-05, "loss": 3.3154, "step": 25970 }, { "epoch": 1.7648457670879196, "grad_norm": 1.7964727878570557, "learning_rate": 7.794622231281424e-05, "loss": 3.072, "step": 25975 }, { "epoch": 1.7651854871585813, "grad_norm": 1.9714410305023193, "learning_rate": 7.794197581193097e-05, "loss": 3.0936, "step": 25980 }, { "epoch": 1.765525207229243, "grad_norm": 2.5563364028930664, "learning_rate": 7.79377293110477e-05, "loss": 2.946, "step": 25985 }, { "epoch": 1.765864927299905, "grad_norm": 2.36130690574646, "learning_rate": 7.793348281016442e-05, "loss": 2.9939, "step": 25990 }, { "epoch": 1.7662046473705666, "grad_norm": 2.392960786819458, "learning_rate": 7.792923630928115e-05, "loss": 3.3166, "step": 25995 }, { "epoch": 1.7665443674412284, "grad_norm": 1.9943103790283203, "learning_rate": 7.792498980839789e-05, "loss": 3.2472, "step": 26000 }, { "epoch": 1.7668840875118903, "grad_norm": 2.000736713409424, "learning_rate": 7.79207433075146e-05, "loss": 3.1176, "step": 26005 }, { "epoch": 1.767223807582552, "grad_norm": 2.4997541904449463, "learning_rate": 7.791649680663135e-05, "loss": 3.2827, "step": 26010 }, { "epoch": 1.7675635276532138, "grad_norm": 1.9916497468948364, "learning_rate": 7.791225030574807e-05, "loss": 3.03, "step": 26015 }, { "epoch": 1.7679032477238756, "grad_norm": 2.4015212059020996, "learning_rate": 7.790800380486479e-05, "loss": 2.9828, "step": 26020 }, { "epoch": 1.7682429677945373, "grad_norm": 2.223437547683716, "learning_rate": 7.790375730398153e-05, "loss": 2.702, "step": 26025 }, { "epoch": 1.7685826878651991, "grad_norm": 2.277967929840088, "learning_rate": 7.789951080309826e-05, "loss": 2.9263, "step": 26030 }, { "epoch": 1.768922407935861, "grad_norm": 2.1711008548736572, "learning_rate": 7.789526430221497e-05, "loss": 3.1374, "step": 26035 }, { "epoch": 1.7692621280065226, "grad_norm": 2.6543405055999756, "learning_rate": 7.789101780133171e-05, "loss": 3.1365, "step": 26040 }, { "epoch": 1.7696018480771842, "grad_norm": 2.292097568511963, "learning_rate": 7.788677130044843e-05, "loss": 3.1071, "step": 26045 }, { "epoch": 1.7699415681478463, "grad_norm": 2.6394004821777344, "learning_rate": 7.788252479956516e-05, "loss": 3.213, "step": 26050 }, { "epoch": 1.770281288218508, "grad_norm": 2.181950092315674, "learning_rate": 7.78782782986819e-05, "loss": 2.9425, "step": 26055 }, { "epoch": 1.7706210082891696, "grad_norm": 2.1352169513702393, "learning_rate": 7.787403179779861e-05, "loss": 3.0547, "step": 26060 }, { "epoch": 1.7709607283598316, "grad_norm": 2.2108592987060547, "learning_rate": 7.786978529691534e-05, "loss": 3.108, "step": 26065 }, { "epoch": 1.7713004484304933, "grad_norm": 2.122340202331543, "learning_rate": 7.786553879603208e-05, "loss": 2.8692, "step": 26070 }, { "epoch": 1.771640168501155, "grad_norm": 2.625976800918579, "learning_rate": 7.78612922951488e-05, "loss": 2.9678, "step": 26075 }, { "epoch": 1.771979888571817, "grad_norm": 2.6580705642700195, "learning_rate": 7.785704579426553e-05, "loss": 3.3313, "step": 26080 }, { "epoch": 1.7723196086424786, "grad_norm": 2.31508469581604, "learning_rate": 7.785364859355891e-05, "loss": 3.0197, "step": 26085 }, { "epoch": 1.7726593287131402, "grad_norm": 2.250746726989746, "learning_rate": 7.784940209267564e-05, "loss": 3.2406, "step": 26090 }, { "epoch": 1.772999048783802, "grad_norm": 2.913311719894409, "learning_rate": 7.784515559179236e-05, "loss": 2.8259, "step": 26095 }, { "epoch": 1.773338768854464, "grad_norm": 2.155630350112915, "learning_rate": 7.784090909090909e-05, "loss": 2.895, "step": 26100 }, { "epoch": 1.7736784889251256, "grad_norm": 1.6155600547790527, "learning_rate": 7.783666259002582e-05, "loss": 3.1419, "step": 26105 }, { "epoch": 1.7740182089957874, "grad_norm": 2.524012804031372, "learning_rate": 7.783241608914255e-05, "loss": 2.8533, "step": 26110 }, { "epoch": 1.7743579290664493, "grad_norm": 2.9862594604492188, "learning_rate": 7.782816958825928e-05, "loss": 3.1339, "step": 26115 }, { "epoch": 1.774697649137111, "grad_norm": 2.984583616256714, "learning_rate": 7.7823923087376e-05, "loss": 3.2583, "step": 26120 }, { "epoch": 1.7750373692077728, "grad_norm": 2.437347173690796, "learning_rate": 7.781967658649273e-05, "loss": 3.2265, "step": 26125 }, { "epoch": 1.7753770892784346, "grad_norm": 2.2708029747009277, "learning_rate": 7.781543008560946e-05, "loss": 3.0733, "step": 26130 }, { "epoch": 1.7757168093490963, "grad_norm": 2.7668817043304443, "learning_rate": 7.781118358472619e-05, "loss": 3.0609, "step": 26135 }, { "epoch": 1.7760565294197581, "grad_norm": 1.9918642044067383, "learning_rate": 7.780693708384292e-05, "loss": 3.1067, "step": 26140 }, { "epoch": 1.77639624949042, "grad_norm": 2.998879909515381, "learning_rate": 7.780269058295964e-05, "loss": 3.1449, "step": 26145 }, { "epoch": 1.7767359695610816, "grad_norm": 2.167196273803711, "learning_rate": 7.779844408207637e-05, "loss": 2.9074, "step": 26150 }, { "epoch": 1.7770756896317434, "grad_norm": 2.3727028369903564, "learning_rate": 7.77941975811931e-05, "loss": 2.8627, "step": 26155 }, { "epoch": 1.7774154097024053, "grad_norm": 2.3460350036621094, "learning_rate": 7.778995108030983e-05, "loss": 3.1443, "step": 26160 }, { "epoch": 1.777755129773067, "grad_norm": 2.5672426223754883, "learning_rate": 7.778570457942656e-05, "loss": 3.2593, "step": 26165 }, { "epoch": 1.7780948498437288, "grad_norm": 2.5876622200012207, "learning_rate": 7.778145807854328e-05, "loss": 3.2252, "step": 26170 }, { "epoch": 1.7784345699143906, "grad_norm": 2.476654052734375, "learning_rate": 7.777721157766001e-05, "loss": 2.8281, "step": 26175 }, { "epoch": 1.7787742899850523, "grad_norm": 2.469271421432495, "learning_rate": 7.777296507677674e-05, "loss": 3.4724, "step": 26180 }, { "epoch": 1.7791140100557141, "grad_norm": 2.10829758644104, "learning_rate": 7.776871857589347e-05, "loss": 3.0918, "step": 26185 }, { "epoch": 1.779453730126376, "grad_norm": 2.9665069580078125, "learning_rate": 7.77644720750102e-05, "loss": 3.0655, "step": 26190 }, { "epoch": 1.7797934501970376, "grad_norm": 2.3517444133758545, "learning_rate": 7.776022557412692e-05, "loss": 2.8644, "step": 26195 }, { "epoch": 1.7801331702676995, "grad_norm": 2.8702950477600098, "learning_rate": 7.775597907324365e-05, "loss": 2.9187, "step": 26200 }, { "epoch": 1.7804728903383613, "grad_norm": 2.463003396987915, "learning_rate": 7.775173257236038e-05, "loss": 3.0258, "step": 26205 }, { "epoch": 1.780812610409023, "grad_norm": 2.6622352600097656, "learning_rate": 7.774748607147711e-05, "loss": 3.1525, "step": 26210 }, { "epoch": 1.7811523304796846, "grad_norm": 2.420630693435669, "learning_rate": 7.774323957059384e-05, "loss": 3.0491, "step": 26215 }, { "epoch": 1.7814920505503467, "grad_norm": 2.1801037788391113, "learning_rate": 7.773899306971056e-05, "loss": 3.1913, "step": 26220 }, { "epoch": 1.7818317706210083, "grad_norm": 1.9278881549835205, "learning_rate": 7.773474656882729e-05, "loss": 3.1459, "step": 26225 }, { "epoch": 1.78217149069167, "grad_norm": 2.15151047706604, "learning_rate": 7.773050006794402e-05, "loss": 3.3006, "step": 26230 }, { "epoch": 1.782511210762332, "grad_norm": 1.9418704509735107, "learning_rate": 7.772625356706075e-05, "loss": 3.1913, "step": 26235 }, { "epoch": 1.7828509308329936, "grad_norm": 2.2551803588867188, "learning_rate": 7.772200706617748e-05, "loss": 2.9066, "step": 26240 }, { "epoch": 1.7831906509036552, "grad_norm": 2.1045279502868652, "learning_rate": 7.77177605652942e-05, "loss": 3.1787, "step": 26245 }, { "epoch": 1.7835303709743173, "grad_norm": 2.1425933837890625, "learning_rate": 7.771351406441093e-05, "loss": 3.3002, "step": 26250 }, { "epoch": 1.783870091044979, "grad_norm": 1.761022686958313, "learning_rate": 7.770926756352766e-05, "loss": 2.9589, "step": 26255 }, { "epoch": 1.7842098111156406, "grad_norm": 2.596975803375244, "learning_rate": 7.770502106264439e-05, "loss": 3.0829, "step": 26260 }, { "epoch": 1.7845495311863024, "grad_norm": 2.102264165878296, "learning_rate": 7.770077456176112e-05, "loss": 3.0544, "step": 26265 }, { "epoch": 1.7848892512569643, "grad_norm": 1.861818552017212, "learning_rate": 7.769652806087784e-05, "loss": 3.1836, "step": 26270 }, { "epoch": 1.785228971327626, "grad_norm": 2.5787038803100586, "learning_rate": 7.769228155999457e-05, "loss": 3.1748, "step": 26275 }, { "epoch": 1.7855686913982878, "grad_norm": 1.951506495475769, "learning_rate": 7.76880350591113e-05, "loss": 3.197, "step": 26280 }, { "epoch": 1.7859084114689496, "grad_norm": 2.2554166316986084, "learning_rate": 7.768378855822801e-05, "loss": 3.2184, "step": 26285 }, { "epoch": 1.7862481315396113, "grad_norm": 2.111567258834839, "learning_rate": 7.767954205734476e-05, "loss": 3.1675, "step": 26290 }, { "epoch": 1.7865878516102731, "grad_norm": 2.6121914386749268, "learning_rate": 7.767529555646148e-05, "loss": 3.1816, "step": 26295 }, { "epoch": 1.786927571680935, "grad_norm": 1.9530616998672485, "learning_rate": 7.76710490555782e-05, "loss": 3.0873, "step": 26300 }, { "epoch": 1.7872672917515966, "grad_norm": 1.9182062149047852, "learning_rate": 7.766680255469494e-05, "loss": 2.9796, "step": 26305 }, { "epoch": 1.7876070118222585, "grad_norm": 2.142709255218506, "learning_rate": 7.766255605381167e-05, "loss": 3.2652, "step": 26310 }, { "epoch": 1.7879467318929203, "grad_norm": 2.088698625564575, "learning_rate": 7.765830955292838e-05, "loss": 3.146, "step": 26315 }, { "epoch": 1.788286451963582, "grad_norm": 1.908423662185669, "learning_rate": 7.765406305204512e-05, "loss": 2.9525, "step": 26320 }, { "epoch": 1.7886261720342438, "grad_norm": 2.3162548542022705, "learning_rate": 7.764981655116185e-05, "loss": 2.9607, "step": 26325 }, { "epoch": 1.7889658921049056, "grad_norm": 2.514728307723999, "learning_rate": 7.764557005027857e-05, "loss": 3.2216, "step": 26330 }, { "epoch": 1.7893056121755673, "grad_norm": 2.0565431118011475, "learning_rate": 7.764132354939531e-05, "loss": 3.0204, "step": 26335 }, { "epoch": 1.7896453322462291, "grad_norm": 1.9886020421981812, "learning_rate": 7.763707704851204e-05, "loss": 3.0823, "step": 26340 }, { "epoch": 1.789985052316891, "grad_norm": 2.1314537525177, "learning_rate": 7.763283054762875e-05, "loss": 3.1529, "step": 26345 }, { "epoch": 1.7903247723875526, "grad_norm": 2.8539040088653564, "learning_rate": 7.762858404674549e-05, "loss": 3.47, "step": 26350 }, { "epoch": 1.7906644924582145, "grad_norm": 1.9514466524124146, "learning_rate": 7.762433754586222e-05, "loss": 3.3772, "step": 26355 }, { "epoch": 1.7910042125288763, "grad_norm": 2.2457361221313477, "learning_rate": 7.762009104497893e-05, "loss": 3.1879, "step": 26360 }, { "epoch": 1.791343932599538, "grad_norm": 2.1827821731567383, "learning_rate": 7.761584454409568e-05, "loss": 3.2387, "step": 26365 }, { "epoch": 1.7916836526701998, "grad_norm": 2.1375627517700195, "learning_rate": 7.761159804321239e-05, "loss": 3.0977, "step": 26370 }, { "epoch": 1.7920233727408617, "grad_norm": 2.5501868724823, "learning_rate": 7.760735154232912e-05, "loss": 3.2646, "step": 26375 }, { "epoch": 1.7923630928115233, "grad_norm": 2.1524605751037598, "learning_rate": 7.760310504144586e-05, "loss": 3.333, "step": 26380 }, { "epoch": 1.792702812882185, "grad_norm": 2.4430625438690186, "learning_rate": 7.759885854056257e-05, "loss": 3.2273, "step": 26385 }, { "epoch": 1.793042532952847, "grad_norm": 3.0629942417144775, "learning_rate": 7.75946120396793e-05, "loss": 3.2132, "step": 26390 }, { "epoch": 1.7933822530235086, "grad_norm": 1.9162448644638062, "learning_rate": 7.759036553879604e-05, "loss": 3.3167, "step": 26395 }, { "epoch": 1.7937219730941703, "grad_norm": 2.4543304443359375, "learning_rate": 7.758611903791276e-05, "loss": 3.0996, "step": 26400 }, { "epoch": 1.7940616931648323, "grad_norm": 3.722655773162842, "learning_rate": 7.758187253702949e-05, "loss": 3.1772, "step": 26405 }, { "epoch": 1.794401413235494, "grad_norm": 2.3546760082244873, "learning_rate": 7.757762603614623e-05, "loss": 3.0021, "step": 26410 }, { "epoch": 1.7947411333061556, "grad_norm": 1.6066317558288574, "learning_rate": 7.757337953526294e-05, "loss": 3.0702, "step": 26415 }, { "epoch": 1.7950808533768177, "grad_norm": 2.3938136100769043, "learning_rate": 7.756913303437967e-05, "loss": 2.9935, "step": 26420 }, { "epoch": 1.7954205734474793, "grad_norm": 2.856961488723755, "learning_rate": 7.756488653349641e-05, "loss": 2.9893, "step": 26425 }, { "epoch": 1.795760293518141, "grad_norm": 2.2283706665039062, "learning_rate": 7.756064003261313e-05, "loss": 3.0744, "step": 26430 }, { "epoch": 1.7961000135888028, "grad_norm": 2.512599229812622, "learning_rate": 7.755639353172985e-05, "loss": 2.9258, "step": 26435 }, { "epoch": 1.7964397336594646, "grad_norm": 2.532031774520874, "learning_rate": 7.755214703084658e-05, "loss": 3.2849, "step": 26440 }, { "epoch": 1.7967794537301263, "grad_norm": 1.8234078884124756, "learning_rate": 7.754790052996331e-05, "loss": 2.9894, "step": 26445 }, { "epoch": 1.7971191738007881, "grad_norm": 2.236910104751587, "learning_rate": 7.754365402908004e-05, "loss": 2.7594, "step": 26450 }, { "epoch": 1.79745889387145, "grad_norm": 2.763725519180298, "learning_rate": 7.753940752819677e-05, "loss": 3.1187, "step": 26455 }, { "epoch": 1.7977986139421116, "grad_norm": 2.461181163787842, "learning_rate": 7.75351610273135e-05, "loss": 3.2677, "step": 26460 }, { "epoch": 1.7981383340127735, "grad_norm": 2.2266182899475098, "learning_rate": 7.753091452643022e-05, "loss": 3.0194, "step": 26465 }, { "epoch": 1.7984780540834353, "grad_norm": 2.81679368019104, "learning_rate": 7.752666802554695e-05, "loss": 3.1458, "step": 26470 }, { "epoch": 1.798817774154097, "grad_norm": 2.4501900672912598, "learning_rate": 7.752242152466368e-05, "loss": 3.1922, "step": 26475 }, { "epoch": 1.7991574942247588, "grad_norm": 2.0089271068573, "learning_rate": 7.751817502378041e-05, "loss": 3.1314, "step": 26480 }, { "epoch": 1.7994972142954206, "grad_norm": 2.071876049041748, "learning_rate": 7.751392852289713e-05, "loss": 2.9528, "step": 26485 }, { "epoch": 1.7998369343660823, "grad_norm": 2.6126999855041504, "learning_rate": 7.750968202201386e-05, "loss": 3.0358, "step": 26490 }, { "epoch": 1.8001766544367441, "grad_norm": 1.979263424873352, "learning_rate": 7.750543552113059e-05, "loss": 3.1274, "step": 26495 }, { "epoch": 1.800516374507406, "grad_norm": 2.490952730178833, "learning_rate": 7.750118902024732e-05, "loss": 3.0538, "step": 26500 }, { "epoch": 1.8008560945780676, "grad_norm": 2.885842800140381, "learning_rate": 7.749694251936405e-05, "loss": 2.9749, "step": 26505 }, { "epoch": 1.8011958146487295, "grad_norm": 2.1924259662628174, "learning_rate": 7.749269601848077e-05, "loss": 3.3798, "step": 26510 }, { "epoch": 1.8015355347193913, "grad_norm": 2.438788652420044, "learning_rate": 7.74884495175975e-05, "loss": 3.1839, "step": 26515 }, { "epoch": 1.801875254790053, "grad_norm": 2.3999481201171875, "learning_rate": 7.748420301671423e-05, "loss": 2.8568, "step": 26520 }, { "epoch": 1.8022149748607148, "grad_norm": 2.2855465412139893, "learning_rate": 7.747995651583096e-05, "loss": 3.3466, "step": 26525 }, { "epoch": 1.8025546949313767, "grad_norm": 1.8062503337860107, "learning_rate": 7.747571001494769e-05, "loss": 3.0506, "step": 26530 }, { "epoch": 1.8028944150020383, "grad_norm": 2.5632057189941406, "learning_rate": 7.747146351406441e-05, "loss": 3.298, "step": 26535 }, { "epoch": 1.8032341350727001, "grad_norm": 3.5791549682617188, "learning_rate": 7.746721701318114e-05, "loss": 3.1422, "step": 26540 }, { "epoch": 1.803573855143362, "grad_norm": 2.091130018234253, "learning_rate": 7.746297051229787e-05, "loss": 3.1773, "step": 26545 }, { "epoch": 1.8039135752140236, "grad_norm": 2.396031141281128, "learning_rate": 7.74587240114146e-05, "loss": 3.0228, "step": 26550 }, { "epoch": 1.8042532952846853, "grad_norm": 2.5048418045043945, "learning_rate": 7.745447751053133e-05, "loss": 3.3761, "step": 26555 }, { "epoch": 1.8045930153553473, "grad_norm": 2.783858060836792, "learning_rate": 7.745023100964805e-05, "loss": 3.2456, "step": 26560 }, { "epoch": 1.804932735426009, "grad_norm": 2.1788558959960938, "learning_rate": 7.744598450876478e-05, "loss": 3.037, "step": 26565 }, { "epoch": 1.8052724554966706, "grad_norm": 2.3056671619415283, "learning_rate": 7.744173800788151e-05, "loss": 3.1544, "step": 26570 }, { "epoch": 1.8056121755673327, "grad_norm": 2.429523468017578, "learning_rate": 7.743749150699824e-05, "loss": 2.9899, "step": 26575 }, { "epoch": 1.8059518956379943, "grad_norm": 2.4359240531921387, "learning_rate": 7.743324500611497e-05, "loss": 2.9508, "step": 26580 }, { "epoch": 1.806291615708656, "grad_norm": 2.3094794750213623, "learning_rate": 7.74289985052317e-05, "loss": 3.056, "step": 26585 }, { "epoch": 1.806631335779318, "grad_norm": 2.3741917610168457, "learning_rate": 7.742475200434842e-05, "loss": 3.1983, "step": 26590 }, { "epoch": 1.8069710558499796, "grad_norm": 2.288221597671509, "learning_rate": 7.742050550346515e-05, "loss": 3.0869, "step": 26595 }, { "epoch": 1.8073107759206413, "grad_norm": 2.6347525119781494, "learning_rate": 7.741625900258188e-05, "loss": 3.1772, "step": 26600 }, { "epoch": 1.8076504959913031, "grad_norm": 2.406423568725586, "learning_rate": 7.741201250169861e-05, "loss": 3.1251, "step": 26605 }, { "epoch": 1.807990216061965, "grad_norm": 2.241910696029663, "learning_rate": 7.740776600081534e-05, "loss": 3.1652, "step": 26610 }, { "epoch": 1.8083299361326266, "grad_norm": 1.9865071773529053, "learning_rate": 7.740351949993206e-05, "loss": 2.9906, "step": 26615 }, { "epoch": 1.8086696562032885, "grad_norm": 2.069444179534912, "learning_rate": 7.739927299904879e-05, "loss": 3.1773, "step": 26620 }, { "epoch": 1.8090093762739503, "grad_norm": 2.4377858638763428, "learning_rate": 7.739502649816552e-05, "loss": 3.0775, "step": 26625 }, { "epoch": 1.809349096344612, "grad_norm": 1.9275544881820679, "learning_rate": 7.739077999728225e-05, "loss": 3.1101, "step": 26630 }, { "epoch": 1.8096888164152738, "grad_norm": 2.0048301219940186, "learning_rate": 7.738653349639898e-05, "loss": 3.3246, "step": 26635 }, { "epoch": 1.8100285364859356, "grad_norm": 2.480525493621826, "learning_rate": 7.738228699551569e-05, "loss": 3.105, "step": 26640 }, { "epoch": 1.8103682565565973, "grad_norm": 1.9505364894866943, "learning_rate": 7.737804049463243e-05, "loss": 3.1537, "step": 26645 }, { "epoch": 1.8107079766272591, "grad_norm": 2.0714266300201416, "learning_rate": 7.737379399374916e-05, "loss": 3.2833, "step": 26650 }, { "epoch": 1.811047696697921, "grad_norm": 2.2133336067199707, "learning_rate": 7.736954749286587e-05, "loss": 3.1581, "step": 26655 }, { "epoch": 1.8113874167685826, "grad_norm": 2.0205650329589844, "learning_rate": 7.736530099198262e-05, "loss": 3.15, "step": 26660 }, { "epoch": 1.8117271368392445, "grad_norm": 1.7573566436767578, "learning_rate": 7.736105449109934e-05, "loss": 3.1551, "step": 26665 }, { "epoch": 1.8120668569099063, "grad_norm": 2.23946475982666, "learning_rate": 7.735680799021606e-05, "loss": 3.1275, "step": 26670 }, { "epoch": 1.812406576980568, "grad_norm": 2.1011099815368652, "learning_rate": 7.73525614893328e-05, "loss": 3.0563, "step": 26675 }, { "epoch": 1.8127462970512298, "grad_norm": 2.198601722717285, "learning_rate": 7.734831498844953e-05, "loss": 2.9046, "step": 26680 }, { "epoch": 1.8130860171218917, "grad_norm": 2.197557210922241, "learning_rate": 7.734406848756624e-05, "loss": 3.0091, "step": 26685 }, { "epoch": 1.8134257371925533, "grad_norm": 1.7412292957305908, "learning_rate": 7.733982198668298e-05, "loss": 3.0878, "step": 26690 }, { "epoch": 1.8137654572632151, "grad_norm": 3.0230817794799805, "learning_rate": 7.733557548579971e-05, "loss": 3.5279, "step": 26695 }, { "epoch": 1.814105177333877, "grad_norm": 1.6145836114883423, "learning_rate": 7.733132898491643e-05, "loss": 3.0374, "step": 26700 }, { "epoch": 1.8144448974045386, "grad_norm": 2.460658073425293, "learning_rate": 7.732708248403317e-05, "loss": 2.9726, "step": 26705 }, { "epoch": 1.8147846174752005, "grad_norm": 2.2300047874450684, "learning_rate": 7.732283598314988e-05, "loss": 3.2933, "step": 26710 }, { "epoch": 1.8151243375458623, "grad_norm": 2.3013508319854736, "learning_rate": 7.731858948226661e-05, "loss": 3.0667, "step": 26715 }, { "epoch": 1.815464057616524, "grad_norm": 2.4465763568878174, "learning_rate": 7.731434298138335e-05, "loss": 3.1171, "step": 26720 }, { "epoch": 1.8158037776871856, "grad_norm": 2.0427074432373047, "learning_rate": 7.731009648050007e-05, "loss": 3.046, "step": 26725 }, { "epoch": 1.8161434977578477, "grad_norm": 2.5369038581848145, "learning_rate": 7.73058499796168e-05, "loss": 3.2725, "step": 26730 }, { "epoch": 1.8164832178285093, "grad_norm": 2.166409730911255, "learning_rate": 7.730160347873354e-05, "loss": 3.3461, "step": 26735 }, { "epoch": 1.816822937899171, "grad_norm": 2.319040298461914, "learning_rate": 7.729735697785025e-05, "loss": 2.7712, "step": 26740 }, { "epoch": 1.817162657969833, "grad_norm": 1.9603718519210815, "learning_rate": 7.729311047696698e-05, "loss": 3.2593, "step": 26745 }, { "epoch": 1.8175023780404946, "grad_norm": 3.740324020385742, "learning_rate": 7.728886397608372e-05, "loss": 3.3696, "step": 26750 }, { "epoch": 1.8178420981111563, "grad_norm": 1.8102388381958008, "learning_rate": 7.728461747520043e-05, "loss": 3.0669, "step": 26755 }, { "epoch": 1.8181818181818183, "grad_norm": 3.1991071701049805, "learning_rate": 7.728037097431716e-05, "loss": 3.1157, "step": 26760 }, { "epoch": 1.81852153825248, "grad_norm": 2.5141990184783936, "learning_rate": 7.72761244734339e-05, "loss": 3.0235, "step": 26765 }, { "epoch": 1.8188612583231416, "grad_norm": 2.187177896499634, "learning_rate": 7.727187797255062e-05, "loss": 3.2348, "step": 26770 }, { "epoch": 1.8192009783938035, "grad_norm": 2.5198798179626465, "learning_rate": 7.726763147166735e-05, "loss": 3.2125, "step": 26775 }, { "epoch": 1.8195406984644653, "grad_norm": 2.6535212993621826, "learning_rate": 7.726338497078409e-05, "loss": 2.8693, "step": 26780 }, { "epoch": 1.819880418535127, "grad_norm": 1.8410385847091675, "learning_rate": 7.72591384699008e-05, "loss": 3.0423, "step": 26785 }, { "epoch": 1.8202201386057888, "grad_norm": 2.551285982131958, "learning_rate": 7.725489196901753e-05, "loss": 3.092, "step": 26790 }, { "epoch": 1.8205598586764506, "grad_norm": 2.199617862701416, "learning_rate": 7.725064546813426e-05, "loss": 3.0433, "step": 26795 }, { "epoch": 1.8208995787471123, "grad_norm": 2.3636202812194824, "learning_rate": 7.724639896725099e-05, "loss": 3.1591, "step": 26800 }, { "epoch": 1.8212392988177741, "grad_norm": 2.478843927383423, "learning_rate": 7.724215246636771e-05, "loss": 3.2765, "step": 26805 }, { "epoch": 1.821579018888436, "grad_norm": 3.1877827644348145, "learning_rate": 7.723790596548444e-05, "loss": 3.0666, "step": 26810 }, { "epoch": 1.8219187389590976, "grad_norm": 2.585592031478882, "learning_rate": 7.723365946460117e-05, "loss": 3.034, "step": 26815 }, { "epoch": 1.8222584590297595, "grad_norm": 2.2671680450439453, "learning_rate": 7.72294129637179e-05, "loss": 2.7859, "step": 26820 }, { "epoch": 1.8225981791004213, "grad_norm": 2.1179215908050537, "learning_rate": 7.722516646283463e-05, "loss": 2.9632, "step": 26825 }, { "epoch": 1.822937899171083, "grad_norm": 2.3167059421539307, "learning_rate": 7.722091996195135e-05, "loss": 3.0691, "step": 26830 }, { "epoch": 1.8232776192417448, "grad_norm": 2.3410491943359375, "learning_rate": 7.721667346106808e-05, "loss": 3.1702, "step": 26835 }, { "epoch": 1.8236173393124067, "grad_norm": 2.363323926925659, "learning_rate": 7.721242696018481e-05, "loss": 3.0566, "step": 26840 }, { "epoch": 1.8239570593830683, "grad_norm": 2.4881200790405273, "learning_rate": 7.720818045930154e-05, "loss": 3.1313, "step": 26845 }, { "epoch": 1.8242967794537301, "grad_norm": 2.0896127223968506, "learning_rate": 7.720393395841827e-05, "loss": 3.1223, "step": 26850 }, { "epoch": 1.824636499524392, "grad_norm": 2.2629923820495605, "learning_rate": 7.7199687457535e-05, "loss": 3.2373, "step": 26855 }, { "epoch": 1.8249762195950536, "grad_norm": 2.2007815837860107, "learning_rate": 7.719544095665172e-05, "loss": 2.9422, "step": 26860 }, { "epoch": 1.8253159396657155, "grad_norm": 2.223975896835327, "learning_rate": 7.719119445576845e-05, "loss": 3.2982, "step": 26865 }, { "epoch": 1.8256556597363773, "grad_norm": 2.763624906539917, "learning_rate": 7.718694795488518e-05, "loss": 3.208, "step": 26870 }, { "epoch": 1.825995379807039, "grad_norm": 2.9828145503997803, "learning_rate": 7.71827014540019e-05, "loss": 3.5227, "step": 26875 }, { "epoch": 1.8263350998777008, "grad_norm": 2.9083051681518555, "learning_rate": 7.717845495311863e-05, "loss": 3.1263, "step": 26880 }, { "epoch": 1.8266748199483627, "grad_norm": 1.9238674640655518, "learning_rate": 7.717420845223536e-05, "loss": 3.0739, "step": 26885 }, { "epoch": 1.8270145400190243, "grad_norm": 2.9363484382629395, "learning_rate": 7.716996195135209e-05, "loss": 3.0791, "step": 26890 }, { "epoch": 1.827354260089686, "grad_norm": 2.052509307861328, "learning_rate": 7.716571545046882e-05, "loss": 3.1173, "step": 26895 }, { "epoch": 1.827693980160348, "grad_norm": 2.747323751449585, "learning_rate": 7.716146894958555e-05, "loss": 2.9635, "step": 26900 }, { "epoch": 1.8280337002310096, "grad_norm": 2.5822415351867676, "learning_rate": 7.715722244870227e-05, "loss": 3.0979, "step": 26905 }, { "epoch": 1.8283734203016713, "grad_norm": 1.9909422397613525, "learning_rate": 7.7152975947819e-05, "loss": 2.951, "step": 26910 }, { "epoch": 1.8287131403723333, "grad_norm": 2.530153274536133, "learning_rate": 7.714872944693573e-05, "loss": 3.2017, "step": 26915 }, { "epoch": 1.829052860442995, "grad_norm": 2.203684091567993, "learning_rate": 7.714448294605246e-05, "loss": 2.9655, "step": 26920 }, { "epoch": 1.8293925805136566, "grad_norm": 3.315704584121704, "learning_rate": 7.714023644516919e-05, "loss": 3.0512, "step": 26925 }, { "epoch": 1.8297323005843187, "grad_norm": 2.359522819519043, "learning_rate": 7.713598994428591e-05, "loss": 3.2, "step": 26930 }, { "epoch": 1.8300720206549803, "grad_norm": 1.7305158376693726, "learning_rate": 7.713174344340264e-05, "loss": 3.1963, "step": 26935 }, { "epoch": 1.830411740725642, "grad_norm": 2.193296432495117, "learning_rate": 7.712749694251937e-05, "loss": 3.0817, "step": 26940 }, { "epoch": 1.8307514607963038, "grad_norm": 2.0984816551208496, "learning_rate": 7.71232504416361e-05, "loss": 2.9039, "step": 26945 }, { "epoch": 1.8310911808669657, "grad_norm": 2.2206740379333496, "learning_rate": 7.711900394075283e-05, "loss": 2.9126, "step": 26950 }, { "epoch": 1.8314309009376273, "grad_norm": 2.661334991455078, "learning_rate": 7.711475743986955e-05, "loss": 3.0428, "step": 26955 }, { "epoch": 1.8317706210082891, "grad_norm": 1.778355598449707, "learning_rate": 7.711051093898628e-05, "loss": 3.2122, "step": 26960 }, { "epoch": 1.832110341078951, "grad_norm": 2.066814422607422, "learning_rate": 7.710626443810301e-05, "loss": 2.9322, "step": 26965 }, { "epoch": 1.8324500611496126, "grad_norm": 2.570808172225952, "learning_rate": 7.710201793721974e-05, "loss": 3.1221, "step": 26970 }, { "epoch": 1.8327897812202745, "grad_norm": 2.6245594024658203, "learning_rate": 7.709777143633647e-05, "loss": 3.3583, "step": 26975 }, { "epoch": 1.8331295012909363, "grad_norm": 1.9049986600875854, "learning_rate": 7.70935249354532e-05, "loss": 3.135, "step": 26980 }, { "epoch": 1.833469221361598, "grad_norm": 2.338733434677124, "learning_rate": 7.708927843456992e-05, "loss": 3.0272, "step": 26985 }, { "epoch": 1.8338089414322598, "grad_norm": 1.9864106178283691, "learning_rate": 7.708503193368665e-05, "loss": 3.1316, "step": 26990 }, { "epoch": 1.8341486615029217, "grad_norm": 2.092661142349243, "learning_rate": 7.708078543280336e-05, "loss": 2.7805, "step": 26995 }, { "epoch": 1.8344883815735833, "grad_norm": 2.7871005535125732, "learning_rate": 7.70765389319201e-05, "loss": 3.201, "step": 27000 }, { "epoch": 1.8348281016442451, "grad_norm": 2.923245906829834, "learning_rate": 7.707229243103683e-05, "loss": 2.9419, "step": 27005 }, { "epoch": 1.835167821714907, "grad_norm": 2.83024263381958, "learning_rate": 7.706804593015355e-05, "loss": 2.9797, "step": 27010 }, { "epoch": 1.8355075417855686, "grad_norm": 1.8398743867874146, "learning_rate": 7.706379942927029e-05, "loss": 3.1155, "step": 27015 }, { "epoch": 1.8358472618562305, "grad_norm": 2.296391010284424, "learning_rate": 7.705955292838702e-05, "loss": 3.1674, "step": 27020 }, { "epoch": 1.8361869819268923, "grad_norm": 2.731701612472534, "learning_rate": 7.705530642750373e-05, "loss": 3.0923, "step": 27025 }, { "epoch": 1.836526701997554, "grad_norm": 3.2736191749572754, "learning_rate": 7.705105992662047e-05, "loss": 2.983, "step": 27030 }, { "epoch": 1.8368664220682158, "grad_norm": 2.073464870452881, "learning_rate": 7.70468134257372e-05, "loss": 3.1866, "step": 27035 }, { "epoch": 1.8372061421388777, "grad_norm": 1.944350004196167, "learning_rate": 7.704256692485392e-05, "loss": 2.7842, "step": 27040 }, { "epoch": 1.8375458622095393, "grad_norm": 2.769984722137451, "learning_rate": 7.703832042397066e-05, "loss": 3.052, "step": 27045 }, { "epoch": 1.8378855822802012, "grad_norm": 1.8545281887054443, "learning_rate": 7.703407392308739e-05, "loss": 3.1054, "step": 27050 }, { "epoch": 1.838225302350863, "grad_norm": 3.0648059844970703, "learning_rate": 7.70298274222041e-05, "loss": 3.0471, "step": 27055 }, { "epoch": 1.8385650224215246, "grad_norm": 2.437469959259033, "learning_rate": 7.702558092132084e-05, "loss": 3.0116, "step": 27060 }, { "epoch": 1.8389047424921863, "grad_norm": 2.4457004070281982, "learning_rate": 7.702133442043756e-05, "loss": 3.0664, "step": 27065 }, { "epoch": 1.8392444625628483, "grad_norm": 2.1901047229766846, "learning_rate": 7.701708791955428e-05, "loss": 3.0984, "step": 27070 }, { "epoch": 1.83958418263351, "grad_norm": 2.1599161624908447, "learning_rate": 7.701284141867103e-05, "loss": 3.1907, "step": 27075 }, { "epoch": 1.8399239027041716, "grad_norm": 2.23201060295105, "learning_rate": 7.700859491778774e-05, "loss": 3.1663, "step": 27080 }, { "epoch": 1.8402636227748337, "grad_norm": 3.026954174041748, "learning_rate": 7.700434841690447e-05, "loss": 3.0543, "step": 27085 }, { "epoch": 1.8406033428454953, "grad_norm": 2.698244094848633, "learning_rate": 7.700010191602121e-05, "loss": 3.1336, "step": 27090 }, { "epoch": 1.840943062916157, "grad_norm": 2.29350209236145, "learning_rate": 7.699585541513792e-05, "loss": 2.9643, "step": 27095 }, { "epoch": 1.841282782986819, "grad_norm": 2.657437562942505, "learning_rate": 7.699160891425465e-05, "loss": 3.1281, "step": 27100 }, { "epoch": 1.8416225030574807, "grad_norm": 2.0524096488952637, "learning_rate": 7.69873624133714e-05, "loss": 2.9292, "step": 27105 }, { "epoch": 1.8419622231281423, "grad_norm": 2.117199420928955, "learning_rate": 7.698311591248811e-05, "loss": 3.2478, "step": 27110 }, { "epoch": 1.8423019431988041, "grad_norm": 2.2987513542175293, "learning_rate": 7.697886941160484e-05, "loss": 2.87, "step": 27115 }, { "epoch": 1.842641663269466, "grad_norm": 2.038937568664551, "learning_rate": 7.697462291072158e-05, "loss": 3.2343, "step": 27120 }, { "epoch": 1.8429813833401276, "grad_norm": 2.59110689163208, "learning_rate": 7.697037640983829e-05, "loss": 3.1579, "step": 27125 }, { "epoch": 1.8433211034107895, "grad_norm": 2.2692933082580566, "learning_rate": 7.696612990895502e-05, "loss": 3.1608, "step": 27130 }, { "epoch": 1.8436608234814513, "grad_norm": 1.6392329931259155, "learning_rate": 7.696188340807175e-05, "loss": 3.2101, "step": 27135 }, { "epoch": 1.844000543552113, "grad_norm": 1.9864287376403809, "learning_rate": 7.695763690718848e-05, "loss": 3.1093, "step": 27140 }, { "epoch": 1.8443402636227748, "grad_norm": 1.9157487154006958, "learning_rate": 7.69533904063052e-05, "loss": 2.9183, "step": 27145 }, { "epoch": 1.8446799836934367, "grad_norm": 2.5211832523345947, "learning_rate": 7.694914390542193e-05, "loss": 3.1746, "step": 27150 }, { "epoch": 1.8450197037640983, "grad_norm": 2.007887601852417, "learning_rate": 7.694489740453866e-05, "loss": 3.247, "step": 27155 }, { "epoch": 1.8453594238347601, "grad_norm": 2.5068259239196777, "learning_rate": 7.694065090365539e-05, "loss": 3.2636, "step": 27160 }, { "epoch": 1.845699143905422, "grad_norm": 2.9746387004852295, "learning_rate": 7.693640440277212e-05, "loss": 2.9189, "step": 27165 }, { "epoch": 1.8460388639760836, "grad_norm": 2.7051615715026855, "learning_rate": 7.693215790188884e-05, "loss": 3.086, "step": 27170 }, { "epoch": 1.8463785840467455, "grad_norm": 2.3708856105804443, "learning_rate": 7.692791140100557e-05, "loss": 3.248, "step": 27175 }, { "epoch": 1.8467183041174073, "grad_norm": 2.9180946350097656, "learning_rate": 7.69236649001223e-05, "loss": 3.4064, "step": 27180 }, { "epoch": 1.847058024188069, "grad_norm": 2.70876407623291, "learning_rate": 7.691941839923903e-05, "loss": 3.2662, "step": 27185 }, { "epoch": 1.8473977442587308, "grad_norm": 2.154006242752075, "learning_rate": 7.691517189835576e-05, "loss": 3.41, "step": 27190 }, { "epoch": 1.8477374643293927, "grad_norm": 2.0589733123779297, "learning_rate": 7.691092539747248e-05, "loss": 3.3298, "step": 27195 }, { "epoch": 1.8480771844000543, "grad_norm": 1.9650657176971436, "learning_rate": 7.690667889658921e-05, "loss": 3.1014, "step": 27200 }, { "epoch": 1.8484169044707162, "grad_norm": 1.933982014656067, "learning_rate": 7.690243239570594e-05, "loss": 3.4651, "step": 27205 }, { "epoch": 1.848756624541378, "grad_norm": 2.068985939025879, "learning_rate": 7.689818589482267e-05, "loss": 3.0291, "step": 27210 }, { "epoch": 1.8490963446120396, "grad_norm": 1.821938395500183, "learning_rate": 7.68939393939394e-05, "loss": 2.9486, "step": 27215 }, { "epoch": 1.8494360646827015, "grad_norm": 2.0930378437042236, "learning_rate": 7.688969289305612e-05, "loss": 3.2507, "step": 27220 }, { "epoch": 1.8497757847533634, "grad_norm": 2.478593111038208, "learning_rate": 7.688544639217285e-05, "loss": 2.9142, "step": 27225 }, { "epoch": 1.850115504824025, "grad_norm": 3.154484748840332, "learning_rate": 7.688119989128958e-05, "loss": 2.9439, "step": 27230 }, { "epoch": 1.8504552248946866, "grad_norm": 1.861238956451416, "learning_rate": 7.687695339040631e-05, "loss": 3.1517, "step": 27235 }, { "epoch": 1.8507949449653487, "grad_norm": 2.4789273738861084, "learning_rate": 7.687270688952304e-05, "loss": 3.1058, "step": 27240 }, { "epoch": 1.8511346650360103, "grad_norm": 2.8032898902893066, "learning_rate": 7.686846038863976e-05, "loss": 3.1437, "step": 27245 }, { "epoch": 1.851474385106672, "grad_norm": 3.2110612392425537, "learning_rate": 7.686421388775649e-05, "loss": 3.0456, "step": 27250 }, { "epoch": 1.851814105177334, "grad_norm": 2.325127601623535, "learning_rate": 7.685996738687322e-05, "loss": 3.03, "step": 27255 }, { "epoch": 1.8521538252479957, "grad_norm": 1.97621488571167, "learning_rate": 7.685572088598995e-05, "loss": 3.1132, "step": 27260 }, { "epoch": 1.8524935453186573, "grad_norm": 2.351135730743408, "learning_rate": 7.685147438510668e-05, "loss": 3.2396, "step": 27265 }, { "epoch": 1.8528332653893194, "grad_norm": 2.6092605590820312, "learning_rate": 7.68472278842234e-05, "loss": 3.1898, "step": 27270 }, { "epoch": 1.853172985459981, "grad_norm": 2.7400941848754883, "learning_rate": 7.684298138334013e-05, "loss": 3.4605, "step": 27275 }, { "epoch": 1.8535127055306426, "grad_norm": 2.38222599029541, "learning_rate": 7.683873488245686e-05, "loss": 3.2443, "step": 27280 }, { "epoch": 1.8538524256013045, "grad_norm": 2.746715784072876, "learning_rate": 7.683448838157359e-05, "loss": 3.1581, "step": 27285 }, { "epoch": 1.8541921456719663, "grad_norm": 1.7765752077102661, "learning_rate": 7.683024188069032e-05, "loss": 3.1439, "step": 27290 }, { "epoch": 1.854531865742628, "grad_norm": 2.4650473594665527, "learning_rate": 7.682599537980704e-05, "loss": 3.0289, "step": 27295 }, { "epoch": 1.8548715858132898, "grad_norm": 2.4802491664886475, "learning_rate": 7.682174887892377e-05, "loss": 3.18, "step": 27300 }, { "epoch": 1.8552113058839517, "grad_norm": 2.068467617034912, "learning_rate": 7.68175023780405e-05, "loss": 3.0209, "step": 27305 }, { "epoch": 1.8555510259546133, "grad_norm": 2.8435990810394287, "learning_rate": 7.681325587715723e-05, "loss": 2.9467, "step": 27310 }, { "epoch": 1.8558907460252752, "grad_norm": 3.043851375579834, "learning_rate": 7.680900937627396e-05, "loss": 3.3474, "step": 27315 }, { "epoch": 1.856230466095937, "grad_norm": 2.353761672973633, "learning_rate": 7.680476287539069e-05, "loss": 2.957, "step": 27320 }, { "epoch": 1.8565701861665986, "grad_norm": 2.140108585357666, "learning_rate": 7.680051637450741e-05, "loss": 2.881, "step": 27325 }, { "epoch": 1.8569099062372605, "grad_norm": 2.4800424575805664, "learning_rate": 7.679626987362414e-05, "loss": 3.0105, "step": 27330 }, { "epoch": 1.8572496263079223, "grad_norm": 2.343324899673462, "learning_rate": 7.679202337274086e-05, "loss": 3.063, "step": 27335 }, { "epoch": 1.857589346378584, "grad_norm": 2.18135666847229, "learning_rate": 7.67877768718576e-05, "loss": 2.9887, "step": 27340 }, { "epoch": 1.8579290664492458, "grad_norm": 2.4539411067962646, "learning_rate": 7.678353037097433e-05, "loss": 2.9533, "step": 27345 }, { "epoch": 1.8582687865199077, "grad_norm": 2.331774950027466, "learning_rate": 7.677928387009104e-05, "loss": 3.0974, "step": 27350 }, { "epoch": 1.8586085065905693, "grad_norm": 2.3595340251922607, "learning_rate": 7.677503736920778e-05, "loss": 3.0773, "step": 27355 }, { "epoch": 1.8589482266612312, "grad_norm": 2.0841751098632812, "learning_rate": 7.677079086832451e-05, "loss": 3.0586, "step": 27360 }, { "epoch": 1.859287946731893, "grad_norm": 1.9876915216445923, "learning_rate": 7.676654436744122e-05, "loss": 3.0679, "step": 27365 }, { "epoch": 1.8596276668025546, "grad_norm": 2.1417362689971924, "learning_rate": 7.676229786655797e-05, "loss": 3.0183, "step": 27370 }, { "epoch": 1.8599673868732165, "grad_norm": 2.726984739303589, "learning_rate": 7.675805136567469e-05, "loss": 3.2041, "step": 27375 }, { "epoch": 1.8603071069438784, "grad_norm": 2.067295551300049, "learning_rate": 7.675380486479141e-05, "loss": 3.0132, "step": 27380 }, { "epoch": 1.86064682701454, "grad_norm": 2.146242618560791, "learning_rate": 7.674955836390815e-05, "loss": 3.074, "step": 27385 }, { "epoch": 1.8609865470852018, "grad_norm": 2.232279062271118, "learning_rate": 7.674531186302488e-05, "loss": 2.8961, "step": 27390 }, { "epoch": 1.8613262671558637, "grad_norm": 2.2834177017211914, "learning_rate": 7.674106536214159e-05, "loss": 3.14, "step": 27395 }, { "epoch": 1.8616659872265253, "grad_norm": 2.6307361125946045, "learning_rate": 7.673681886125833e-05, "loss": 3.3468, "step": 27400 }, { "epoch": 1.862005707297187, "grad_norm": 2.715911626815796, "learning_rate": 7.673257236037506e-05, "loss": 2.9778, "step": 27405 }, { "epoch": 1.862345427367849, "grad_norm": 2.0807228088378906, "learning_rate": 7.672832585949178e-05, "loss": 3.3724, "step": 27410 }, { "epoch": 1.8626851474385107, "grad_norm": 2.5434887409210205, "learning_rate": 7.672407935860852e-05, "loss": 3.2741, "step": 27415 }, { "epoch": 1.8630248675091723, "grad_norm": 2.1043920516967773, "learning_rate": 7.671983285772523e-05, "loss": 3.2459, "step": 27420 }, { "epoch": 1.8633645875798344, "grad_norm": 1.8709131479263306, "learning_rate": 7.671558635684196e-05, "loss": 3.2026, "step": 27425 }, { "epoch": 1.863704307650496, "grad_norm": 2.4489920139312744, "learning_rate": 7.67113398559587e-05, "loss": 3.2356, "step": 27430 }, { "epoch": 1.8640440277211576, "grad_norm": 1.9306682348251343, "learning_rate": 7.670709335507542e-05, "loss": 3.1765, "step": 27435 }, { "epoch": 1.8643837477918197, "grad_norm": 2.573514699935913, "learning_rate": 7.670284685419214e-05, "loss": 3.3323, "step": 27440 }, { "epoch": 1.8647234678624813, "grad_norm": 2.5110087394714355, "learning_rate": 7.669860035330889e-05, "loss": 3.1147, "step": 27445 }, { "epoch": 1.865063187933143, "grad_norm": 2.3144068717956543, "learning_rate": 7.66943538524256e-05, "loss": 2.8936, "step": 27450 }, { "epoch": 1.8654029080038048, "grad_norm": 2.231311559677124, "learning_rate": 7.669010735154233e-05, "loss": 2.9251, "step": 27455 }, { "epoch": 1.8657426280744667, "grad_norm": 1.6897121667861938, "learning_rate": 7.668586085065907e-05, "loss": 3.11, "step": 27460 }, { "epoch": 1.8660823481451283, "grad_norm": 2.448975086212158, "learning_rate": 7.668161434977578e-05, "loss": 2.9559, "step": 27465 }, { "epoch": 1.8664220682157902, "grad_norm": 2.1717324256896973, "learning_rate": 7.667736784889251e-05, "loss": 2.9955, "step": 27470 }, { "epoch": 1.866761788286452, "grad_norm": 2.089994192123413, "learning_rate": 7.667312134800925e-05, "loss": 2.8238, "step": 27475 }, { "epoch": 1.8671015083571136, "grad_norm": 2.3929944038391113, "learning_rate": 7.666887484712597e-05, "loss": 3.2607, "step": 27480 }, { "epoch": 1.8674412284277755, "grad_norm": 1.9740955829620361, "learning_rate": 7.66646283462427e-05, "loss": 2.891, "step": 27485 }, { "epoch": 1.8677809484984373, "grad_norm": 2.3024773597717285, "learning_rate": 7.666038184535942e-05, "loss": 3.0612, "step": 27490 }, { "epoch": 1.868120668569099, "grad_norm": 3.3042104244232178, "learning_rate": 7.665613534447615e-05, "loss": 3.2953, "step": 27495 }, { "epoch": 1.8684603886397608, "grad_norm": 2.3196797370910645, "learning_rate": 7.665188884359288e-05, "loss": 3.0501, "step": 27500 }, { "epoch": 1.8688001087104227, "grad_norm": 2.428032875061035, "learning_rate": 7.664764234270961e-05, "loss": 3.207, "step": 27505 }, { "epoch": 1.8691398287810843, "grad_norm": 3.057541847229004, "learning_rate": 7.664339584182634e-05, "loss": 2.8613, "step": 27510 }, { "epoch": 1.8694795488517462, "grad_norm": 2.063589334487915, "learning_rate": 7.663914934094306e-05, "loss": 3.1815, "step": 27515 }, { "epoch": 1.869819268922408, "grad_norm": 2.3187758922576904, "learning_rate": 7.663490284005979e-05, "loss": 2.9754, "step": 27520 }, { "epoch": 1.8701589889930696, "grad_norm": 2.0371193885803223, "learning_rate": 7.663065633917652e-05, "loss": 3.1302, "step": 27525 }, { "epoch": 1.8704987090637315, "grad_norm": 1.6881928443908691, "learning_rate": 7.662640983829325e-05, "loss": 3.1822, "step": 27530 }, { "epoch": 1.8708384291343934, "grad_norm": 2.5029690265655518, "learning_rate": 7.662216333740998e-05, "loss": 2.7222, "step": 27535 }, { "epoch": 1.871178149205055, "grad_norm": 2.034454345703125, "learning_rate": 7.66179168365267e-05, "loss": 3.3064, "step": 27540 }, { "epoch": 1.8715178692757168, "grad_norm": 1.9858264923095703, "learning_rate": 7.661367033564343e-05, "loss": 3.1488, "step": 27545 }, { "epoch": 1.8718575893463787, "grad_norm": 2.0935847759246826, "learning_rate": 7.660942383476016e-05, "loss": 3.2976, "step": 27550 }, { "epoch": 1.8721973094170403, "grad_norm": 2.2324557304382324, "learning_rate": 7.660517733387689e-05, "loss": 3.1139, "step": 27555 }, { "epoch": 1.8725370294877022, "grad_norm": 2.2560606002807617, "learning_rate": 7.660093083299362e-05, "loss": 2.755, "step": 27560 }, { "epoch": 1.872876749558364, "grad_norm": 2.4951157569885254, "learning_rate": 7.659668433211034e-05, "loss": 3.1529, "step": 27565 }, { "epoch": 1.8732164696290257, "grad_norm": 1.9777984619140625, "learning_rate": 7.659243783122707e-05, "loss": 2.9721, "step": 27570 }, { "epoch": 1.8735561896996873, "grad_norm": 2.509479522705078, "learning_rate": 7.65881913303438e-05, "loss": 3.1863, "step": 27575 }, { "epoch": 1.8738959097703494, "grad_norm": 3.0480055809020996, "learning_rate": 7.658394482946053e-05, "loss": 3.2283, "step": 27580 }, { "epoch": 1.874235629841011, "grad_norm": 2.016733169555664, "learning_rate": 7.657969832857726e-05, "loss": 3.0942, "step": 27585 }, { "epoch": 1.8745753499116726, "grad_norm": 1.846492886543274, "learning_rate": 7.657545182769398e-05, "loss": 2.9827, "step": 27590 }, { "epoch": 1.8749150699823347, "grad_norm": 2.2705273628234863, "learning_rate": 7.657120532681071e-05, "loss": 3.0156, "step": 27595 }, { "epoch": 1.8752547900529963, "grad_norm": 3.2174344062805176, "learning_rate": 7.656695882592744e-05, "loss": 2.9896, "step": 27600 }, { "epoch": 1.875594510123658, "grad_norm": 2.8306455612182617, "learning_rate": 7.656271232504417e-05, "loss": 3.3594, "step": 27605 }, { "epoch": 1.87593423019432, "grad_norm": 1.9212497472763062, "learning_rate": 7.65584658241609e-05, "loss": 3.0082, "step": 27610 }, { "epoch": 1.8762739502649817, "grad_norm": 2.091606378555298, "learning_rate": 7.655421932327762e-05, "loss": 3.1608, "step": 27615 }, { "epoch": 1.8766136703356433, "grad_norm": 2.082756757736206, "learning_rate": 7.654997282239435e-05, "loss": 3.0812, "step": 27620 }, { "epoch": 1.8769533904063052, "grad_norm": 2.6417272090911865, "learning_rate": 7.654572632151108e-05, "loss": 3.2918, "step": 27625 }, { "epoch": 1.877293110476967, "grad_norm": 1.738220453262329, "learning_rate": 7.654147982062781e-05, "loss": 3.178, "step": 27630 }, { "epoch": 1.8776328305476286, "grad_norm": 2.31689190864563, "learning_rate": 7.653723331974454e-05, "loss": 2.8362, "step": 27635 }, { "epoch": 1.8779725506182905, "grad_norm": 1.877156138420105, "learning_rate": 7.653298681886126e-05, "loss": 3.1947, "step": 27640 }, { "epoch": 1.8783122706889523, "grad_norm": 2.0325238704681396, "learning_rate": 7.652874031797799e-05, "loss": 2.8322, "step": 27645 }, { "epoch": 1.878651990759614, "grad_norm": 2.6106879711151123, "learning_rate": 7.652449381709472e-05, "loss": 2.8975, "step": 27650 }, { "epoch": 1.8789917108302758, "grad_norm": 1.8443951606750488, "learning_rate": 7.652024731621145e-05, "loss": 3.1467, "step": 27655 }, { "epoch": 1.8793314309009377, "grad_norm": 2.3421149253845215, "learning_rate": 7.651600081532818e-05, "loss": 3.1108, "step": 27660 }, { "epoch": 1.8796711509715993, "grad_norm": 3.119368076324463, "learning_rate": 7.65117543144449e-05, "loss": 3.2215, "step": 27665 }, { "epoch": 1.8800108710422612, "grad_norm": 2.329908609390259, "learning_rate": 7.650750781356163e-05, "loss": 3.1109, "step": 27670 }, { "epoch": 1.880350591112923, "grad_norm": 2.177886486053467, "learning_rate": 7.650326131267836e-05, "loss": 2.9682, "step": 27675 }, { "epoch": 1.8806903111835847, "grad_norm": 2.719085216522217, "learning_rate": 7.649901481179509e-05, "loss": 3.224, "step": 27680 }, { "epoch": 1.8810300312542465, "grad_norm": 2.0145809650421143, "learning_rate": 7.649476831091182e-05, "loss": 2.9872, "step": 27685 }, { "epoch": 1.8813697513249084, "grad_norm": 2.1415984630584717, "learning_rate": 7.649052181002853e-05, "loss": 3.0974, "step": 27690 }, { "epoch": 1.88170947139557, "grad_norm": 2.3289973735809326, "learning_rate": 7.648627530914527e-05, "loss": 3.3273, "step": 27695 }, { "epoch": 1.8820491914662318, "grad_norm": 2.4103522300720215, "learning_rate": 7.6482028808262e-05, "loss": 2.9701, "step": 27700 }, { "epoch": 1.8823889115368937, "grad_norm": 2.717409610748291, "learning_rate": 7.647778230737871e-05, "loss": 3.0437, "step": 27705 }, { "epoch": 1.8827286316075553, "grad_norm": 2.373095989227295, "learning_rate": 7.647353580649546e-05, "loss": 3.1154, "step": 27710 }, { "epoch": 1.8830683516782172, "grad_norm": 2.031996011734009, "learning_rate": 7.646928930561218e-05, "loss": 3.2179, "step": 27715 }, { "epoch": 1.883408071748879, "grad_norm": 2.3508496284484863, "learning_rate": 7.64650428047289e-05, "loss": 3.0274, "step": 27720 }, { "epoch": 1.8837477918195407, "grad_norm": 1.8511039018630981, "learning_rate": 7.646079630384564e-05, "loss": 3.1366, "step": 27725 }, { "epoch": 1.8840875118902025, "grad_norm": 2.8277013301849365, "learning_rate": 7.645654980296237e-05, "loss": 3.0585, "step": 27730 }, { "epoch": 1.8844272319608644, "grad_norm": 2.1448183059692383, "learning_rate": 7.645230330207908e-05, "loss": 3.2475, "step": 27735 }, { "epoch": 1.884766952031526, "grad_norm": 2.570324659347534, "learning_rate": 7.644805680119582e-05, "loss": 3.2969, "step": 27740 }, { "epoch": 1.8851066721021879, "grad_norm": 2.0884017944335938, "learning_rate": 7.644381030031255e-05, "loss": 3.0555, "step": 27745 }, { "epoch": 1.8854463921728497, "grad_norm": NaN, "learning_rate": 7.644041309960592e-05, "loss": 3.1024, "step": 27750 }, { "epoch": 1.8857861122435113, "grad_norm": 2.806691884994507, "learning_rate": 7.643616659872266e-05, "loss": 2.8527, "step": 27755 }, { "epoch": 1.886125832314173, "grad_norm": 2.1964993476867676, "learning_rate": 7.643192009783938e-05, "loss": 3.2373, "step": 27760 }, { "epoch": 1.886465552384835, "grad_norm": 2.354509115219116, "learning_rate": 7.64276735969561e-05, "loss": 3.1778, "step": 27765 }, { "epoch": 1.8868052724554967, "grad_norm": 2.4587438106536865, "learning_rate": 7.642342709607285e-05, "loss": 3.1722, "step": 27770 }, { "epoch": 1.8871449925261583, "grad_norm": 2.4756369590759277, "learning_rate": 7.641918059518956e-05, "loss": 2.9047, "step": 27775 }, { "epoch": 1.8874847125968204, "grad_norm": 1.7465013265609741, "learning_rate": 7.64149340943063e-05, "loss": 3.0069, "step": 27780 }, { "epoch": 1.887824432667482, "grad_norm": 3.511488676071167, "learning_rate": 7.641068759342303e-05, "loss": 2.9059, "step": 27785 }, { "epoch": 1.8881641527381436, "grad_norm": 2.237320899963379, "learning_rate": 7.640644109253975e-05, "loss": 3.3023, "step": 27790 }, { "epoch": 1.8885038728088055, "grad_norm": 2.0233359336853027, "learning_rate": 7.640219459165649e-05, "loss": 3.1025, "step": 27795 }, { "epoch": 1.8888435928794673, "grad_norm": 2.548675060272217, "learning_rate": 7.639794809077321e-05, "loss": 3.0015, "step": 27800 }, { "epoch": 1.889183312950129, "grad_norm": 2.0353915691375732, "learning_rate": 7.639370158988993e-05, "loss": 3.0699, "step": 27805 }, { "epoch": 1.8895230330207908, "grad_norm": 2.5724847316741943, "learning_rate": 7.638945508900667e-05, "loss": 3.2858, "step": 27810 }, { "epoch": 1.8898627530914527, "grad_norm": 2.9025065898895264, "learning_rate": 7.638520858812339e-05, "loss": 3.0484, "step": 27815 }, { "epoch": 1.8902024731621143, "grad_norm": 2.2071945667266846, "learning_rate": 7.638096208724011e-05, "loss": 3.0271, "step": 27820 }, { "epoch": 1.8905421932327762, "grad_norm": 1.82050359249115, "learning_rate": 7.637671558635685e-05, "loss": 3.1157, "step": 27825 }, { "epoch": 1.890881913303438, "grad_norm": 2.4065794944763184, "learning_rate": 7.637246908547357e-05, "loss": 3.448, "step": 27830 }, { "epoch": 1.8912216333740997, "grad_norm": 2.2991464138031006, "learning_rate": 7.63682225845903e-05, "loss": 3.1454, "step": 27835 }, { "epoch": 1.8915613534447615, "grad_norm": 1.891642689704895, "learning_rate": 7.636397608370704e-05, "loss": 2.9483, "step": 27840 }, { "epoch": 1.8919010735154234, "grad_norm": 2.2040536403656006, "learning_rate": 7.635972958282375e-05, "loss": 2.9585, "step": 27845 }, { "epoch": 1.892240793586085, "grad_norm": 2.387117862701416, "learning_rate": 7.635548308194048e-05, "loss": 2.9323, "step": 27850 }, { "epoch": 1.8925805136567468, "grad_norm": 2.2065279483795166, "learning_rate": 7.635123658105722e-05, "loss": 3.4106, "step": 27855 }, { "epoch": 1.8929202337274087, "grad_norm": 2.1931374073028564, "learning_rate": 7.634699008017394e-05, "loss": 3.4514, "step": 27860 }, { "epoch": 1.8932599537980703, "grad_norm": 2.781662940979004, "learning_rate": 7.634274357929067e-05, "loss": 2.8987, "step": 27865 }, { "epoch": 1.8935996738687322, "grad_norm": 2.301693916320801, "learning_rate": 7.63384970784074e-05, "loss": 3.3632, "step": 27870 }, { "epoch": 1.893939393939394, "grad_norm": 2.279365301132202, "learning_rate": 7.633425057752412e-05, "loss": 3.2833, "step": 27875 }, { "epoch": 1.8942791140100557, "grad_norm": 2.288835287094116, "learning_rate": 7.633000407664085e-05, "loss": 3.1473, "step": 27880 }, { "epoch": 1.8946188340807175, "grad_norm": 2.3164305686950684, "learning_rate": 7.632575757575758e-05, "loss": 2.9438, "step": 27885 }, { "epoch": 1.8949585541513794, "grad_norm": 2.0501458644866943, "learning_rate": 7.63215110748743e-05, "loss": 3.1751, "step": 27890 }, { "epoch": 1.895298274222041, "grad_norm": 2.308576822280884, "learning_rate": 7.631726457399103e-05, "loss": 3.1881, "step": 27895 }, { "epoch": 1.8956379942927029, "grad_norm": 2.5395681858062744, "learning_rate": 7.631301807310776e-05, "loss": 3.1854, "step": 27900 }, { "epoch": 1.8959777143633647, "grad_norm": 3.258880615234375, "learning_rate": 7.630877157222449e-05, "loss": 3.0109, "step": 27905 }, { "epoch": 1.8963174344340263, "grad_norm": 2.1310782432556152, "learning_rate": 7.630452507134122e-05, "loss": 3.1678, "step": 27910 }, { "epoch": 1.8966571545046882, "grad_norm": 1.9601948261260986, "learning_rate": 7.630027857045795e-05, "loss": 3.089, "step": 27915 }, { "epoch": 1.89699687457535, "grad_norm": 2.471707582473755, "learning_rate": 7.629603206957467e-05, "loss": 3.1724, "step": 27920 }, { "epoch": 1.8973365946460117, "grad_norm": 2.219703435897827, "learning_rate": 7.62917855686914e-05, "loss": 3.1682, "step": 27925 }, { "epoch": 1.8976763147166733, "grad_norm": 1.866390585899353, "learning_rate": 7.628753906780813e-05, "loss": 3.235, "step": 27930 }, { "epoch": 1.8980160347873354, "grad_norm": 2.2726480960845947, "learning_rate": 7.628329256692486e-05, "loss": 3.1165, "step": 27935 }, { "epoch": 1.898355754857997, "grad_norm": 2.3328518867492676, "learning_rate": 7.627904606604159e-05, "loss": 3.094, "step": 27940 }, { "epoch": 1.8986954749286586, "grad_norm": 2.224870443344116, "learning_rate": 7.627479956515831e-05, "loss": 2.9332, "step": 27945 }, { "epoch": 1.8990351949993207, "grad_norm": 1.8989044427871704, "learning_rate": 7.627055306427504e-05, "loss": 3.0802, "step": 27950 }, { "epoch": 1.8993749150699824, "grad_norm": 2.091613292694092, "learning_rate": 7.626630656339177e-05, "loss": 2.8774, "step": 27955 }, { "epoch": 1.899714635140644, "grad_norm": 2.1311538219451904, "learning_rate": 7.62620600625085e-05, "loss": 3.0929, "step": 27960 }, { "epoch": 1.9000543552113058, "grad_norm": 3.082733392715454, "learning_rate": 7.625781356162523e-05, "loss": 3.1782, "step": 27965 }, { "epoch": 1.9003940752819677, "grad_norm": 2.2441892623901367, "learning_rate": 7.625356706074195e-05, "loss": 3.3129, "step": 27970 }, { "epoch": 1.9007337953526293, "grad_norm": 2.2801971435546875, "learning_rate": 7.624932055985868e-05, "loss": 2.9, "step": 27975 }, { "epoch": 1.9010735154232912, "grad_norm": 2.091660499572754, "learning_rate": 7.624507405897541e-05, "loss": 3.1662, "step": 27980 }, { "epoch": 1.901413235493953, "grad_norm": 2.3531792163848877, "learning_rate": 7.624082755809214e-05, "loss": 3.3912, "step": 27985 }, { "epoch": 1.9017529555646147, "grad_norm": 2.55037522315979, "learning_rate": 7.623658105720887e-05, "loss": 3.0912, "step": 27990 }, { "epoch": 1.9020926756352765, "grad_norm": 3.2746686935424805, "learning_rate": 7.62323345563256e-05, "loss": 3.052, "step": 27995 }, { "epoch": 1.9024323957059384, "grad_norm": 1.8762394189834595, "learning_rate": 7.622808805544232e-05, "loss": 3.1586, "step": 28000 }, { "epoch": 1.9027721157766, "grad_norm": 2.116262674331665, "learning_rate": 7.622384155455905e-05, "loss": 3.1542, "step": 28005 }, { "epoch": 1.9031118358472618, "grad_norm": 2.5110292434692383, "learning_rate": 7.621959505367578e-05, "loss": 3.3174, "step": 28010 }, { "epoch": 1.9034515559179237, "grad_norm": 2.49763560295105, "learning_rate": 7.621534855279249e-05, "loss": 2.9178, "step": 28015 }, { "epoch": 1.9037912759885853, "grad_norm": 2.209655284881592, "learning_rate": 7.621110205190923e-05, "loss": 3.2218, "step": 28020 }, { "epoch": 1.9041309960592472, "grad_norm": 2.2340502738952637, "learning_rate": 7.620685555102596e-05, "loss": 3.1163, "step": 28025 }, { "epoch": 1.904470716129909, "grad_norm": 2.1133852005004883, "learning_rate": 7.620260905014268e-05, "loss": 3.1452, "step": 28030 }, { "epoch": 1.9048104362005707, "grad_norm": 2.2322001457214355, "learning_rate": 7.619836254925942e-05, "loss": 3.0691, "step": 28035 }, { "epoch": 1.9051501562712325, "grad_norm": 2.8358147144317627, "learning_rate": 7.619411604837615e-05, "loss": 3.0138, "step": 28040 }, { "epoch": 1.9054898763418944, "grad_norm": 2.207125663757324, "learning_rate": 7.618986954749286e-05, "loss": 3.245, "step": 28045 }, { "epoch": 1.905829596412556, "grad_norm": 2.081594705581665, "learning_rate": 7.61856230466096e-05, "loss": 2.9488, "step": 28050 }, { "epoch": 1.9061693164832179, "grad_norm": 2.695155620574951, "learning_rate": 7.618137654572633e-05, "loss": 3.0864, "step": 28055 }, { "epoch": 1.9065090365538797, "grad_norm": 2.140300989151001, "learning_rate": 7.617713004484304e-05, "loss": 3.2436, "step": 28060 }, { "epoch": 1.9068487566245413, "grad_norm": 2.1456351280212402, "learning_rate": 7.617288354395979e-05, "loss": 2.9438, "step": 28065 }, { "epoch": 1.9071884766952032, "grad_norm": 1.9999964237213135, "learning_rate": 7.616863704307651e-05, "loss": 3.0302, "step": 28070 }, { "epoch": 1.907528196765865, "grad_norm": 2.269711494445801, "learning_rate": 7.616439054219323e-05, "loss": 3.2361, "step": 28075 }, { "epoch": 1.9078679168365267, "grad_norm": 2.0367424488067627, "learning_rate": 7.616014404130997e-05, "loss": 2.9758, "step": 28080 }, { "epoch": 1.9082076369071885, "grad_norm": 2.723017692565918, "learning_rate": 7.615589754042668e-05, "loss": 3.0374, "step": 28085 }, { "epoch": 1.9085473569778504, "grad_norm": 1.807457447052002, "learning_rate": 7.615165103954341e-05, "loss": 3.3083, "step": 28090 }, { "epoch": 1.908887077048512, "grad_norm": 2.172144889831543, "learning_rate": 7.614740453866015e-05, "loss": 2.9945, "step": 28095 }, { "epoch": 1.9092267971191736, "grad_norm": 2.8753693103790283, "learning_rate": 7.614315803777687e-05, "loss": 3.0463, "step": 28100 }, { "epoch": 1.9095665171898357, "grad_norm": 2.023721694946289, "learning_rate": 7.61389115368936e-05, "loss": 3.0665, "step": 28105 }, { "epoch": 1.9099062372604974, "grad_norm": 2.9845969676971436, "learning_rate": 7.613466503601034e-05, "loss": 3.1598, "step": 28110 }, { "epoch": 1.910245957331159, "grad_norm": 2.2160465717315674, "learning_rate": 7.613041853512705e-05, "loss": 3.0306, "step": 28115 }, { "epoch": 1.910585677401821, "grad_norm": 2.3750877380371094, "learning_rate": 7.61261720342438e-05, "loss": 3.3382, "step": 28120 }, { "epoch": 1.9109253974724827, "grad_norm": 2.2799172401428223, "learning_rate": 7.612192553336052e-05, "loss": 3.3384, "step": 28125 }, { "epoch": 1.9112651175431443, "grad_norm": 2.643886089324951, "learning_rate": 7.611767903247724e-05, "loss": 3.0926, "step": 28130 }, { "epoch": 1.9116048376138062, "grad_norm": 3.3736634254455566, "learning_rate": 7.611343253159398e-05, "loss": 2.9597, "step": 28135 }, { "epoch": 1.911944557684468, "grad_norm": 2.474228620529175, "learning_rate": 7.61091860307107e-05, "loss": 3.4044, "step": 28140 }, { "epoch": 1.9122842777551297, "grad_norm": 2.11260724067688, "learning_rate": 7.610493952982742e-05, "loss": 3.1853, "step": 28145 }, { "epoch": 1.9126239978257915, "grad_norm": 2.2474424839019775, "learning_rate": 7.610069302894416e-05, "loss": 3.1387, "step": 28150 }, { "epoch": 1.9129637178964534, "grad_norm": 2.4043467044830322, "learning_rate": 7.609644652806089e-05, "loss": 3.1623, "step": 28155 }, { "epoch": 1.913303437967115, "grad_norm": 2.7095489501953125, "learning_rate": 7.60922000271776e-05, "loss": 3.2455, "step": 28160 }, { "epoch": 1.9136431580377768, "grad_norm": 2.517821788787842, "learning_rate": 7.608795352629435e-05, "loss": 3.288, "step": 28165 }, { "epoch": 1.9139828781084387, "grad_norm": 2.698319673538208, "learning_rate": 7.608370702541106e-05, "loss": 3.1006, "step": 28170 }, { "epoch": 1.9143225981791003, "grad_norm": 2.6808128356933594, "learning_rate": 7.607946052452779e-05, "loss": 2.9293, "step": 28175 }, { "epoch": 1.9146623182497622, "grad_norm": 2.479747772216797, "learning_rate": 7.607521402364453e-05, "loss": 3.2191, "step": 28180 }, { "epoch": 1.915002038320424, "grad_norm": 2.543238639831543, "learning_rate": 7.607096752276124e-05, "loss": 3.0712, "step": 28185 }, { "epoch": 1.9153417583910857, "grad_norm": 2.1166558265686035, "learning_rate": 7.606672102187797e-05, "loss": 3.083, "step": 28190 }, { "epoch": 1.9156814784617475, "grad_norm": 2.145005226135254, "learning_rate": 7.606247452099471e-05, "loss": 3.0221, "step": 28195 }, { "epoch": 1.9160211985324094, "grad_norm": 2.488499879837036, "learning_rate": 7.605822802011143e-05, "loss": 3.3669, "step": 28200 }, { "epoch": 1.916360918603071, "grad_norm": 2.58695387840271, "learning_rate": 7.605398151922816e-05, "loss": 3.1787, "step": 28205 }, { "epoch": 1.9167006386737329, "grad_norm": 1.8806232213974, "learning_rate": 7.60497350183449e-05, "loss": 3.0878, "step": 28210 }, { "epoch": 1.9170403587443947, "grad_norm": 1.9815678596496582, "learning_rate": 7.604548851746161e-05, "loss": 3.0222, "step": 28215 }, { "epoch": 1.9173800788150563, "grad_norm": 2.2918195724487305, "learning_rate": 7.604124201657834e-05, "loss": 3.0331, "step": 28220 }, { "epoch": 1.9177197988857182, "grad_norm": 2.391535520553589, "learning_rate": 7.603699551569508e-05, "loss": 2.8877, "step": 28225 }, { "epoch": 1.91805951895638, "grad_norm": 3.0718655586242676, "learning_rate": 7.60327490148118e-05, "loss": 2.8627, "step": 28230 }, { "epoch": 1.9183992390270417, "grad_norm": 2.5053040981292725, "learning_rate": 7.602850251392852e-05, "loss": 3.2241, "step": 28235 }, { "epoch": 1.9187389590977035, "grad_norm": 2.0100598335266113, "learning_rate": 7.602425601304525e-05, "loss": 3.2431, "step": 28240 }, { "epoch": 1.9190786791683654, "grad_norm": 2.525655508041382, "learning_rate": 7.602000951216198e-05, "loss": 3.0196, "step": 28245 }, { "epoch": 1.919418399239027, "grad_norm": 3.112536907196045, "learning_rate": 7.601576301127871e-05, "loss": 3.207, "step": 28250 }, { "epoch": 1.9197581193096889, "grad_norm": 2.2864222526550293, "learning_rate": 7.601151651039544e-05, "loss": 3.2297, "step": 28255 }, { "epoch": 1.9200978393803507, "grad_norm": 2.0935513973236084, "learning_rate": 7.600727000951216e-05, "loss": 3.1655, "step": 28260 }, { "epoch": 1.9204375594510124, "grad_norm": 2.8635947704315186, "learning_rate": 7.600302350862889e-05, "loss": 3.3129, "step": 28265 }, { "epoch": 1.920777279521674, "grad_norm": 2.450864315032959, "learning_rate": 7.599877700774562e-05, "loss": 3.2408, "step": 28270 }, { "epoch": 1.921116999592336, "grad_norm": 2.263458251953125, "learning_rate": 7.599453050686235e-05, "loss": 3.2516, "step": 28275 }, { "epoch": 1.9214567196629977, "grad_norm": 1.7489906549453735, "learning_rate": 7.599028400597908e-05, "loss": 2.9025, "step": 28280 }, { "epoch": 1.9217964397336593, "grad_norm": 2.5138020515441895, "learning_rate": 7.59860375050958e-05, "loss": 3.1881, "step": 28285 }, { "epoch": 1.9221361598043214, "grad_norm": 1.9094632863998413, "learning_rate": 7.598179100421253e-05, "loss": 3.2932, "step": 28290 }, { "epoch": 1.922475879874983, "grad_norm": 4.343641757965088, "learning_rate": 7.597754450332926e-05, "loss": 3.127, "step": 28295 }, { "epoch": 1.9228155999456447, "grad_norm": 2.4466192722320557, "learning_rate": 7.597329800244599e-05, "loss": 3.2918, "step": 28300 }, { "epoch": 1.9231553200163065, "grad_norm": 2.015000820159912, "learning_rate": 7.596905150156272e-05, "loss": 3.1481, "step": 28305 }, { "epoch": 1.9234950400869684, "grad_norm": 2.3897221088409424, "learning_rate": 7.596480500067944e-05, "loss": 3.1582, "step": 28310 }, { "epoch": 1.92383476015763, "grad_norm": 2.1749634742736816, "learning_rate": 7.596055849979617e-05, "loss": 3.4227, "step": 28315 }, { "epoch": 1.9241744802282919, "grad_norm": 2.2386114597320557, "learning_rate": 7.59563119989129e-05, "loss": 3.2449, "step": 28320 }, { "epoch": 1.9245142002989537, "grad_norm": 2.021123170852661, "learning_rate": 7.595206549802963e-05, "loss": 3.1207, "step": 28325 }, { "epoch": 1.9248539203696153, "grad_norm": 2.5366077423095703, "learning_rate": 7.594781899714636e-05, "loss": 3.0246, "step": 28330 }, { "epoch": 1.9251936404402772, "grad_norm": 2.128523349761963, "learning_rate": 7.594357249626308e-05, "loss": 3.0812, "step": 28335 }, { "epoch": 1.925533360510939, "grad_norm": 2.2569377422332764, "learning_rate": 7.593932599537981e-05, "loss": 3.1366, "step": 28340 }, { "epoch": 1.9258730805816007, "grad_norm": 2.29042911529541, "learning_rate": 7.593507949449654e-05, "loss": 3.0291, "step": 28345 }, { "epoch": 1.9262128006522625, "grad_norm": 2.249439239501953, "learning_rate": 7.593083299361327e-05, "loss": 3.0819, "step": 28350 }, { "epoch": 1.9265525207229244, "grad_norm": 2.2209835052490234, "learning_rate": 7.592658649273e-05, "loss": 3.007, "step": 28355 }, { "epoch": 1.926892240793586, "grad_norm": 2.5707733631134033, "learning_rate": 7.592233999184672e-05, "loss": 2.9841, "step": 28360 }, { "epoch": 1.9272319608642479, "grad_norm": 2.266757011413574, "learning_rate": 7.591809349096345e-05, "loss": 3.197, "step": 28365 }, { "epoch": 1.9275716809349097, "grad_norm": 2.0532617568969727, "learning_rate": 7.591384699008017e-05, "loss": 3.0935, "step": 28370 }, { "epoch": 1.9279114010055713, "grad_norm": 2.235595464706421, "learning_rate": 7.590960048919691e-05, "loss": 3.1522, "step": 28375 }, { "epoch": 1.9282511210762332, "grad_norm": 2.7405800819396973, "learning_rate": 7.590535398831364e-05, "loss": 3.1264, "step": 28380 }, { "epoch": 1.928590841146895, "grad_norm": 1.7598448991775513, "learning_rate": 7.590110748743035e-05, "loss": 2.8771, "step": 28385 }, { "epoch": 1.9289305612175567, "grad_norm": 2.6497509479522705, "learning_rate": 7.589686098654709e-05, "loss": 3.1393, "step": 28390 }, { "epoch": 1.9292702812882185, "grad_norm": 2.1486170291900635, "learning_rate": 7.589261448566382e-05, "loss": 3.2534, "step": 28395 }, { "epoch": 1.9296100013588804, "grad_norm": 2.9134247303009033, "learning_rate": 7.588836798478053e-05, "loss": 3.2209, "step": 28400 }, { "epoch": 1.929949721429542, "grad_norm": 2.0863306522369385, "learning_rate": 7.588412148389728e-05, "loss": 3.1329, "step": 28405 }, { "epoch": 1.9302894415002039, "grad_norm": 1.93815279006958, "learning_rate": 7.5879874983014e-05, "loss": 3.0161, "step": 28410 }, { "epoch": 1.9306291615708657, "grad_norm": 2.3904831409454346, "learning_rate": 7.587562848213072e-05, "loss": 2.7333, "step": 28415 }, { "epoch": 1.9309688816415274, "grad_norm": 2.2069056034088135, "learning_rate": 7.587138198124746e-05, "loss": 3.2735, "step": 28420 }, { "epoch": 1.9313086017121892, "grad_norm": 2.385458469390869, "learning_rate": 7.586713548036419e-05, "loss": 3.0873, "step": 28425 }, { "epoch": 1.931648321782851, "grad_norm": 2.5859811305999756, "learning_rate": 7.58628889794809e-05, "loss": 3.1178, "step": 28430 }, { "epoch": 1.9319880418535127, "grad_norm": 3.0195767879486084, "learning_rate": 7.585864247859764e-05, "loss": 3.1793, "step": 28435 }, { "epoch": 1.9323277619241743, "grad_norm": 2.378535509109497, "learning_rate": 7.585439597771436e-05, "loss": 2.9938, "step": 28440 }, { "epoch": 1.9326674819948364, "grad_norm": 2.2799861431121826, "learning_rate": 7.585014947683109e-05, "loss": 3.1205, "step": 28445 }, { "epoch": 1.933007202065498, "grad_norm": 2.9469921588897705, "learning_rate": 7.584590297594783e-05, "loss": 3.2269, "step": 28450 }, { "epoch": 1.9333469221361597, "grad_norm": 2.220541000366211, "learning_rate": 7.584165647506454e-05, "loss": 3.1968, "step": 28455 }, { "epoch": 1.9336866422068217, "grad_norm": 1.9632863998413086, "learning_rate": 7.583740997418128e-05, "loss": 3.1144, "step": 28460 }, { "epoch": 1.9340263622774834, "grad_norm": 2.367384433746338, "learning_rate": 7.583316347329801e-05, "loss": 3.1041, "step": 28465 }, { "epoch": 1.934366082348145, "grad_norm": 1.9280580282211304, "learning_rate": 7.582891697241473e-05, "loss": 2.9473, "step": 28470 }, { "epoch": 1.9347058024188069, "grad_norm": 2.664886474609375, "learning_rate": 7.582467047153147e-05, "loss": 3.439, "step": 28475 }, { "epoch": 1.9350455224894687, "grad_norm": 2.3256778717041016, "learning_rate": 7.58204239706482e-05, "loss": 3.1754, "step": 28480 }, { "epoch": 1.9353852425601303, "grad_norm": 2.391998052597046, "learning_rate": 7.581617746976491e-05, "loss": 3.147, "step": 28485 }, { "epoch": 1.9357249626307922, "grad_norm": 1.756085991859436, "learning_rate": 7.581193096888165e-05, "loss": 2.7846, "step": 28490 }, { "epoch": 1.936064682701454, "grad_norm": 2.6136341094970703, "learning_rate": 7.580768446799838e-05, "loss": 3.0768, "step": 28495 }, { "epoch": 1.9364044027721157, "grad_norm": 2.296966552734375, "learning_rate": 7.58034379671151e-05, "loss": 3.1648, "step": 28500 }, { "epoch": 1.9367441228427775, "grad_norm": 2.477619171142578, "learning_rate": 7.579919146623184e-05, "loss": 3.3196, "step": 28505 }, { "epoch": 1.9370838429134394, "grad_norm": 2.0452630519866943, "learning_rate": 7.579494496534855e-05, "loss": 3.1232, "step": 28510 }, { "epoch": 1.937423562984101, "grad_norm": 2.4598324298858643, "learning_rate": 7.579069846446528e-05, "loss": 3.1533, "step": 28515 }, { "epoch": 1.9377632830547629, "grad_norm": 2.922856092453003, "learning_rate": 7.578645196358202e-05, "loss": 3.0587, "step": 28520 }, { "epoch": 1.9381030031254247, "grad_norm": 2.050163745880127, "learning_rate": 7.578220546269874e-05, "loss": 3.0809, "step": 28525 }, { "epoch": 1.9384427231960863, "grad_norm": 1.9244883060455322, "learning_rate": 7.577795896181546e-05, "loss": 3.149, "step": 28530 }, { "epoch": 1.9387824432667482, "grad_norm": 2.006455183029175, "learning_rate": 7.57737124609322e-05, "loss": 3.0052, "step": 28535 }, { "epoch": 1.93912216333741, "grad_norm": 2.5562117099761963, "learning_rate": 7.576946596004892e-05, "loss": 3.2334, "step": 28540 }, { "epoch": 1.9394618834080717, "grad_norm": 2.62188720703125, "learning_rate": 7.576521945916565e-05, "loss": 3.1534, "step": 28545 }, { "epoch": 1.9398016034787335, "grad_norm": 2.2384095191955566, "learning_rate": 7.576097295828239e-05, "loss": 3.0744, "step": 28550 }, { "epoch": 1.9401413235493954, "grad_norm": 2.587770938873291, "learning_rate": 7.57567264573991e-05, "loss": 3.1429, "step": 28555 }, { "epoch": 1.940481043620057, "grad_norm": 2.5841619968414307, "learning_rate": 7.575247995651583e-05, "loss": 3.2312, "step": 28560 }, { "epoch": 1.9408207636907189, "grad_norm": 1.8146272897720337, "learning_rate": 7.574823345563257e-05, "loss": 3.0229, "step": 28565 }, { "epoch": 1.9411604837613807, "grad_norm": 2.372926712036133, "learning_rate": 7.574398695474929e-05, "loss": 3.1789, "step": 28570 }, { "epoch": 1.9415002038320424, "grad_norm": 2.551459550857544, "learning_rate": 7.573974045386602e-05, "loss": 2.8598, "step": 28575 }, { "epoch": 1.9418399239027042, "grad_norm": 2.420760154724121, "learning_rate": 7.573549395298276e-05, "loss": 3.1541, "step": 28580 }, { "epoch": 1.942179643973366, "grad_norm": 2.0682671070098877, "learning_rate": 7.573124745209947e-05, "loss": 3.2815, "step": 28585 }, { "epoch": 1.9425193640440277, "grad_norm": 2.199331283569336, "learning_rate": 7.57270009512162e-05, "loss": 3.2044, "step": 28590 }, { "epoch": 1.9428590841146895, "grad_norm": 2.3669800758361816, "learning_rate": 7.572275445033293e-05, "loss": 3.1128, "step": 28595 }, { "epoch": 1.9431988041853514, "grad_norm": 2.5546624660491943, "learning_rate": 7.571850794944966e-05, "loss": 2.8527, "step": 28600 }, { "epoch": 1.943538524256013, "grad_norm": 2.579801559448242, "learning_rate": 7.571426144856638e-05, "loss": 3.0877, "step": 28605 }, { "epoch": 1.9438782443266747, "grad_norm": 2.436826467514038, "learning_rate": 7.571001494768311e-05, "loss": 2.966, "step": 28610 }, { "epoch": 1.9442179643973367, "grad_norm": 2.05155086517334, "learning_rate": 7.570576844679984e-05, "loss": 3.0932, "step": 28615 }, { "epoch": 1.9445576844679984, "grad_norm": 2.5746707916259766, "learning_rate": 7.570152194591657e-05, "loss": 2.6564, "step": 28620 }, { "epoch": 1.94489740453866, "grad_norm": 2.3523788452148438, "learning_rate": 7.56972754450333e-05, "loss": 3.017, "step": 28625 }, { "epoch": 1.945237124609322, "grad_norm": 2.542787790298462, "learning_rate": 7.569302894415002e-05, "loss": 3.0197, "step": 28630 }, { "epoch": 1.9455768446799837, "grad_norm": 2.112898349761963, "learning_rate": 7.568878244326675e-05, "loss": 3.0272, "step": 28635 }, { "epoch": 1.9459165647506453, "grad_norm": 2.000229597091675, "learning_rate": 7.568453594238348e-05, "loss": 3.0129, "step": 28640 }, { "epoch": 1.9462562848213072, "grad_norm": 2.201838970184326, "learning_rate": 7.568028944150021e-05, "loss": 3.1465, "step": 28645 }, { "epoch": 1.946596004891969, "grad_norm": 2.045835494995117, "learning_rate": 7.567604294061694e-05, "loss": 2.8033, "step": 28650 }, { "epoch": 1.9469357249626307, "grad_norm": 2.6491544246673584, "learning_rate": 7.567179643973366e-05, "loss": 3.2981, "step": 28655 }, { "epoch": 1.9472754450332925, "grad_norm": 2.5857491493225098, "learning_rate": 7.566754993885039e-05, "loss": 3.4056, "step": 28660 }, { "epoch": 1.9476151651039544, "grad_norm": 2.873198986053467, "learning_rate": 7.566330343796712e-05, "loss": 2.9128, "step": 28665 }, { "epoch": 1.947954885174616, "grad_norm": 2.9196317195892334, "learning_rate": 7.565905693708385e-05, "loss": 2.9958, "step": 28670 }, { "epoch": 1.9482946052452779, "grad_norm": 3.147783041000366, "learning_rate": 7.565481043620058e-05, "loss": 2.8599, "step": 28675 }, { "epoch": 1.9486343253159397, "grad_norm": 2.119173526763916, "learning_rate": 7.56505639353173e-05, "loss": 3.3324, "step": 28680 }, { "epoch": 1.9489740453866014, "grad_norm": 3.1764020919799805, "learning_rate": 7.564631743443403e-05, "loss": 3.2604, "step": 28685 }, { "epoch": 1.9493137654572632, "grad_norm": 2.1890499591827393, "learning_rate": 7.564207093355076e-05, "loss": 3.2284, "step": 28690 }, { "epoch": 1.949653485527925, "grad_norm": 2.174365282058716, "learning_rate": 7.563782443266749e-05, "loss": 3.1214, "step": 28695 }, { "epoch": 1.9499932055985867, "grad_norm": 2.2260303497314453, "learning_rate": 7.563357793178422e-05, "loss": 3.0671, "step": 28700 }, { "epoch": 1.9503329256692485, "grad_norm": 2.211394786834717, "learning_rate": 7.562933143090094e-05, "loss": 3.0085, "step": 28705 }, { "epoch": 1.9506726457399104, "grad_norm": 2.9633936882019043, "learning_rate": 7.562508493001766e-05, "loss": 3.3168, "step": 28710 }, { "epoch": 1.951012365810572, "grad_norm": 2.3385560512542725, "learning_rate": 7.56208384291344e-05, "loss": 3.188, "step": 28715 }, { "epoch": 1.9513520858812339, "grad_norm": 2.6494498252868652, "learning_rate": 7.561659192825113e-05, "loss": 3.149, "step": 28720 }, { "epoch": 1.9516918059518957, "grad_norm": 2.4933292865753174, "learning_rate": 7.561234542736784e-05, "loss": 3.2332, "step": 28725 }, { "epoch": 1.9520315260225574, "grad_norm": 2.6878976821899414, "learning_rate": 7.560809892648458e-05, "loss": 3.0681, "step": 28730 }, { "epoch": 1.9523712460932192, "grad_norm": 2.074894666671753, "learning_rate": 7.560385242560131e-05, "loss": 3.3515, "step": 28735 }, { "epoch": 1.952710966163881, "grad_norm": 2.1727371215820312, "learning_rate": 7.559960592471803e-05, "loss": 3.1968, "step": 28740 }, { "epoch": 1.9530506862345427, "grad_norm": 2.603647232055664, "learning_rate": 7.559535942383477e-05, "loss": 3.3344, "step": 28745 }, { "epoch": 1.9533904063052046, "grad_norm": 1.9196419715881348, "learning_rate": 7.55911129229515e-05, "loss": 3.1132, "step": 28750 }, { "epoch": 1.9537301263758664, "grad_norm": 2.3859190940856934, "learning_rate": 7.558686642206821e-05, "loss": 3.1421, "step": 28755 }, { "epoch": 1.954069846446528, "grad_norm": 2.1490862369537354, "learning_rate": 7.558261992118495e-05, "loss": 3.1863, "step": 28760 }, { "epoch": 1.95440956651719, "grad_norm": 2.631837844848633, "learning_rate": 7.557837342030168e-05, "loss": 3.2466, "step": 28765 }, { "epoch": 1.9547492865878517, "grad_norm": 2.3944027423858643, "learning_rate": 7.55741269194184e-05, "loss": 3.2221, "step": 28770 }, { "epoch": 1.9550890066585134, "grad_norm": 2.433523654937744, "learning_rate": 7.556988041853514e-05, "loss": 2.9589, "step": 28775 }, { "epoch": 1.955428726729175, "grad_norm": 1.9845753908157349, "learning_rate": 7.556563391765186e-05, "loss": 3.3447, "step": 28780 }, { "epoch": 1.955768446799837, "grad_norm": 3.184778928756714, "learning_rate": 7.556138741676858e-05, "loss": 3.1367, "step": 28785 }, { "epoch": 1.9561081668704987, "grad_norm": 1.9075074195861816, "learning_rate": 7.555714091588532e-05, "loss": 3.2884, "step": 28790 }, { "epoch": 1.9564478869411603, "grad_norm": 2.2537171840667725, "learning_rate": 7.555289441500203e-05, "loss": 3.0773, "step": 28795 }, { "epoch": 1.9567876070118224, "grad_norm": 1.979084849357605, "learning_rate": 7.554864791411878e-05, "loss": 3.0479, "step": 28800 }, { "epoch": 1.957127327082484, "grad_norm": 2.094853639602661, "learning_rate": 7.55444014132355e-05, "loss": 2.8567, "step": 28805 }, { "epoch": 1.9574670471531457, "grad_norm": 2.274618148803711, "learning_rate": 7.554015491235222e-05, "loss": 3.1104, "step": 28810 }, { "epoch": 1.9578067672238075, "grad_norm": 2.6801295280456543, "learning_rate": 7.553590841146896e-05, "loss": 2.962, "step": 28815 }, { "epoch": 1.9581464872944694, "grad_norm": 2.340153217315674, "learning_rate": 7.553166191058569e-05, "loss": 3.1754, "step": 28820 }, { "epoch": 1.958486207365131, "grad_norm": 2.2124459743499756, "learning_rate": 7.55274154097024e-05, "loss": 3.0577, "step": 28825 }, { "epoch": 1.9588259274357929, "grad_norm": 2.8771133422851562, "learning_rate": 7.552316890881914e-05, "loss": 2.9538, "step": 28830 }, { "epoch": 1.9591656475064547, "grad_norm": 2.695392608642578, "learning_rate": 7.551892240793587e-05, "loss": 2.9569, "step": 28835 }, { "epoch": 1.9595053675771164, "grad_norm": 2.3964059352874756, "learning_rate": 7.551467590705259e-05, "loss": 3.1677, "step": 28840 }, { "epoch": 1.9598450876477782, "grad_norm": 2.1804282665252686, "learning_rate": 7.551042940616933e-05, "loss": 3.1765, "step": 28845 }, { "epoch": 1.96018480771844, "grad_norm": 2.743041753768921, "learning_rate": 7.550618290528606e-05, "loss": 3.2061, "step": 28850 }, { "epoch": 1.9605245277891017, "grad_norm": 2.0886635780334473, "learning_rate": 7.550193640440277e-05, "loss": 3.1321, "step": 28855 }, { "epoch": 1.9608642478597635, "grad_norm": 2.2183334827423096, "learning_rate": 7.549768990351951e-05, "loss": 3.2593, "step": 28860 }, { "epoch": 1.9612039679304254, "grad_norm": 1.7419331073760986, "learning_rate": 7.549344340263623e-05, "loss": 3.1154, "step": 28865 }, { "epoch": 1.961543688001087, "grad_norm": 2.314300298690796, "learning_rate": 7.548919690175295e-05, "loss": 3.0786, "step": 28870 }, { "epoch": 1.9618834080717489, "grad_norm": 2.2278547286987305, "learning_rate": 7.54849504008697e-05, "loss": 3.1487, "step": 28875 }, { "epoch": 1.9622231281424107, "grad_norm": 1.8488706350326538, "learning_rate": 7.548070389998641e-05, "loss": 3.1022, "step": 28880 }, { "epoch": 1.9625628482130724, "grad_norm": 2.755526542663574, "learning_rate": 7.547645739910314e-05, "loss": 2.8684, "step": 28885 }, { "epoch": 1.9629025682837342, "grad_norm": 2.208143472671509, "learning_rate": 7.547221089821988e-05, "loss": 3.0988, "step": 28890 }, { "epoch": 1.963242288354396, "grad_norm": 2.841569662094116, "learning_rate": 7.54679643973366e-05, "loss": 2.9577, "step": 28895 }, { "epoch": 1.9635820084250577, "grad_norm": 2.515333652496338, "learning_rate": 7.546371789645332e-05, "loss": 3.113, "step": 28900 }, { "epoch": 1.9639217284957196, "grad_norm": 2.9101030826568604, "learning_rate": 7.545947139557006e-05, "loss": 3.3685, "step": 28905 }, { "epoch": 1.9642614485663814, "grad_norm": 2.1407253742218018, "learning_rate": 7.545522489468678e-05, "loss": 2.9551, "step": 28910 }, { "epoch": 1.964601168637043, "grad_norm": 2.2798993587493896, "learning_rate": 7.54509783938035e-05, "loss": 2.9902, "step": 28915 }, { "epoch": 1.964940888707705, "grad_norm": 2.0801477432250977, "learning_rate": 7.544673189292025e-05, "loss": 3.309, "step": 28920 }, { "epoch": 1.9652806087783667, "grad_norm": 2.0574846267700195, "learning_rate": 7.544248539203696e-05, "loss": 2.9815, "step": 28925 }, { "epoch": 1.9656203288490284, "grad_norm": 2.192553758621216, "learning_rate": 7.543823889115369e-05, "loss": 3.0721, "step": 28930 }, { "epoch": 1.9659600489196902, "grad_norm": 2.302954912185669, "learning_rate": 7.543399239027042e-05, "loss": 3.0172, "step": 28935 }, { "epoch": 1.966299768990352, "grad_norm": 2.1968162059783936, "learning_rate": 7.542974588938715e-05, "loss": 3.0592, "step": 28940 }, { "epoch": 1.9666394890610137, "grad_norm": 2.003596544265747, "learning_rate": 7.542549938850387e-05, "loss": 3.2548, "step": 28945 }, { "epoch": 1.9669792091316753, "grad_norm": 2.767369508743286, "learning_rate": 7.54212528876206e-05, "loss": 3.3981, "step": 28950 }, { "epoch": 1.9673189292023374, "grad_norm": 2.5868186950683594, "learning_rate": 7.541700638673733e-05, "loss": 2.9734, "step": 28955 }, { "epoch": 1.967658649272999, "grad_norm": 2.622058868408203, "learning_rate": 7.541360918603071e-05, "loss": 2.896, "step": 28960 }, { "epoch": 1.9679983693436607, "grad_norm": 2.7104122638702393, "learning_rate": 7.540936268514744e-05, "loss": 3.0907, "step": 28965 }, { "epoch": 1.9683380894143228, "grad_norm": 2.5816166400909424, "learning_rate": 7.540511618426417e-05, "loss": 3.1513, "step": 28970 }, { "epoch": 1.9686778094849844, "grad_norm": 2.4295477867126465, "learning_rate": 7.54008696833809e-05, "loss": 2.9098, "step": 28975 }, { "epoch": 1.969017529555646, "grad_norm": 2.123857259750366, "learning_rate": 7.539662318249762e-05, "loss": 2.9782, "step": 28980 }, { "epoch": 1.9693572496263079, "grad_norm": 3.0194756984710693, "learning_rate": 7.539237668161435e-05, "loss": 2.8823, "step": 28985 }, { "epoch": 1.9696969696969697, "grad_norm": 2.3843750953674316, "learning_rate": 7.538813018073108e-05, "loss": 3.1841, "step": 28990 }, { "epoch": 1.9700366897676314, "grad_norm": 2.0697379112243652, "learning_rate": 7.538388367984781e-05, "loss": 2.9336, "step": 28995 }, { "epoch": 1.9703764098382932, "grad_norm": 2.2745234966278076, "learning_rate": 7.537963717896454e-05, "loss": 3.1089, "step": 29000 }, { "epoch": 1.970716129908955, "grad_norm": 2.0290029048919678, "learning_rate": 7.537539067808126e-05, "loss": 3.0779, "step": 29005 }, { "epoch": 1.9710558499796167, "grad_norm": 2.124476909637451, "learning_rate": 7.537114417719799e-05, "loss": 2.9958, "step": 29010 }, { "epoch": 1.9713955700502785, "grad_norm": 2.728823661804199, "learning_rate": 7.536689767631472e-05, "loss": 3.1017, "step": 29015 }, { "epoch": 1.9717352901209404, "grad_norm": 2.4377262592315674, "learning_rate": 7.536265117543145e-05, "loss": 3.2409, "step": 29020 }, { "epoch": 1.972075010191602, "grad_norm": 2.7641658782958984, "learning_rate": 7.535840467454818e-05, "loss": 3.2079, "step": 29025 }, { "epoch": 1.9724147302622639, "grad_norm": 3.133378267288208, "learning_rate": 7.53541581736649e-05, "loss": 3.2692, "step": 29030 }, { "epoch": 1.9727544503329257, "grad_norm": 2.5248780250549316, "learning_rate": 7.534991167278163e-05, "loss": 3.1707, "step": 29035 }, { "epoch": 1.9730941704035874, "grad_norm": 2.5250210762023926, "learning_rate": 7.534566517189836e-05, "loss": 3.1433, "step": 29040 }, { "epoch": 1.9734338904742492, "grad_norm": 2.380455493927002, "learning_rate": 7.534141867101509e-05, "loss": 3.0855, "step": 29045 }, { "epoch": 1.973773610544911, "grad_norm": 2.764409303665161, "learning_rate": 7.533717217013182e-05, "loss": 3.1247, "step": 29050 }, { "epoch": 1.9741133306155727, "grad_norm": 2.933856964111328, "learning_rate": 7.533292566924854e-05, "loss": 3.3306, "step": 29055 }, { "epoch": 1.9744530506862346, "grad_norm": 2.228053331375122, "learning_rate": 7.532867916836527e-05, "loss": 3.1422, "step": 29060 }, { "epoch": 1.9747927707568964, "grad_norm": 2.6253631114959717, "learning_rate": 7.5324432667482e-05, "loss": 3.3346, "step": 29065 }, { "epoch": 1.975132490827558, "grad_norm": 2.146010398864746, "learning_rate": 7.532018616659873e-05, "loss": 2.9861, "step": 29070 }, { "epoch": 1.97547221089822, "grad_norm": 2.421412944793701, "learning_rate": 7.531593966571546e-05, "loss": 3.0006, "step": 29075 }, { "epoch": 1.9758119309688817, "grad_norm": 2.507629871368408, "learning_rate": 7.531169316483218e-05, "loss": 3.054, "step": 29080 }, { "epoch": 1.9761516510395434, "grad_norm": 2.2739782333374023, "learning_rate": 7.530744666394891e-05, "loss": 3.2322, "step": 29085 }, { "epoch": 1.9764913711102052, "grad_norm": 2.339521646499634, "learning_rate": 7.530320016306564e-05, "loss": 3.1474, "step": 29090 }, { "epoch": 1.976831091180867, "grad_norm": 2.1287288665771484, "learning_rate": 7.529895366218237e-05, "loss": 3.1588, "step": 29095 }, { "epoch": 1.9771708112515287, "grad_norm": 2.123516321182251, "learning_rate": 7.52947071612991e-05, "loss": 2.8852, "step": 29100 }, { "epoch": 1.9775105313221906, "grad_norm": 1.8145041465759277, "learning_rate": 7.529046066041581e-05, "loss": 3.1893, "step": 29105 }, { "epoch": 1.9778502513928524, "grad_norm": 2.6469790935516357, "learning_rate": 7.528621415953255e-05, "loss": 2.8033, "step": 29110 }, { "epoch": 1.978189971463514, "grad_norm": 1.946053147315979, "learning_rate": 7.528196765864928e-05, "loss": 3.078, "step": 29115 }, { "epoch": 1.9785296915341757, "grad_norm": 2.475724697113037, "learning_rate": 7.5277721157766e-05, "loss": 3.2642, "step": 29120 }, { "epoch": 1.9788694116048378, "grad_norm": 2.0721795558929443, "learning_rate": 7.527347465688274e-05, "loss": 3.3805, "step": 29125 }, { "epoch": 1.9792091316754994, "grad_norm": 3.0353705883026123, "learning_rate": 7.526922815599946e-05, "loss": 3.1753, "step": 29130 }, { "epoch": 1.979548851746161, "grad_norm": 2.4231224060058594, "learning_rate": 7.526498165511618e-05, "loss": 3.2043, "step": 29135 }, { "epoch": 1.979888571816823, "grad_norm": 3.3645389080047607, "learning_rate": 7.526073515423292e-05, "loss": 2.9394, "step": 29140 }, { "epoch": 1.9802282918874847, "grad_norm": 2.067190647125244, "learning_rate": 7.525648865334965e-05, "loss": 3.0524, "step": 29145 }, { "epoch": 1.9805680119581464, "grad_norm": 2.42840313911438, "learning_rate": 7.525224215246636e-05, "loss": 3.1707, "step": 29150 }, { "epoch": 1.9809077320288082, "grad_norm": 2.1095645427703857, "learning_rate": 7.52479956515831e-05, "loss": 3.0823, "step": 29155 }, { "epoch": 1.98124745209947, "grad_norm": 3.0719072818756104, "learning_rate": 7.524374915069983e-05, "loss": 2.9233, "step": 29160 }, { "epoch": 1.9815871721701317, "grad_norm": 2.422792434692383, "learning_rate": 7.523950264981655e-05, "loss": 2.9013, "step": 29165 }, { "epoch": 1.9819268922407935, "grad_norm": 2.3153486251831055, "learning_rate": 7.523525614893329e-05, "loss": 3.1357, "step": 29170 }, { "epoch": 1.9822666123114554, "grad_norm": 2.009716033935547, "learning_rate": 7.523100964805002e-05, "loss": 3.3993, "step": 29175 }, { "epoch": 1.982606332382117, "grad_norm": 1.9454790353775024, "learning_rate": 7.522676314716673e-05, "loss": 3.0183, "step": 29180 }, { "epoch": 1.9829460524527789, "grad_norm": 1.904334306716919, "learning_rate": 7.522251664628347e-05, "loss": 3.1715, "step": 29185 }, { "epoch": 1.9832857725234407, "grad_norm": 2.2463202476501465, "learning_rate": 7.521827014540019e-05, "loss": 3.2941, "step": 29190 }, { "epoch": 1.9836254925941024, "grad_norm": 2.2557945251464844, "learning_rate": 7.521402364451692e-05, "loss": 3.2341, "step": 29195 }, { "epoch": 1.9839652126647642, "grad_norm": 2.2725160121917725, "learning_rate": 7.520977714363366e-05, "loss": 3.3968, "step": 29200 }, { "epoch": 1.984304932735426, "grad_norm": 2.4397733211517334, "learning_rate": 7.520553064275037e-05, "loss": 3.1323, "step": 29205 }, { "epoch": 1.9846446528060877, "grad_norm": 2.8582324981689453, "learning_rate": 7.52012841418671e-05, "loss": 2.9241, "step": 29210 }, { "epoch": 1.9849843728767496, "grad_norm": 2.4458377361297607, "learning_rate": 7.519703764098384e-05, "loss": 3.2787, "step": 29215 }, { "epoch": 1.9853240929474114, "grad_norm": 1.938068151473999, "learning_rate": 7.519279114010056e-05, "loss": 2.9329, "step": 29220 }, { "epoch": 1.985663813018073, "grad_norm": 2.4035961627960205, "learning_rate": 7.518854463921728e-05, "loss": 3.1059, "step": 29225 }, { "epoch": 1.986003533088735, "grad_norm": 2.759819507598877, "learning_rate": 7.518429813833402e-05, "loss": 3.1266, "step": 29230 }, { "epoch": 1.9863432531593967, "grad_norm": 2.427544116973877, "learning_rate": 7.518005163745074e-05, "loss": 3.0647, "step": 29235 }, { "epoch": 1.9866829732300584, "grad_norm": 2.5108723640441895, "learning_rate": 7.517580513656747e-05, "loss": 2.9191, "step": 29240 }, { "epoch": 1.9870226933007202, "grad_norm": 2.357426881790161, "learning_rate": 7.517155863568421e-05, "loss": 3.2065, "step": 29245 }, { "epoch": 1.987362413371382, "grad_norm": 2.159099817276001, "learning_rate": 7.516731213480092e-05, "loss": 3.2238, "step": 29250 }, { "epoch": 1.9877021334420437, "grad_norm": 3.036363124847412, "learning_rate": 7.516306563391765e-05, "loss": 3.1361, "step": 29255 }, { "epoch": 1.9880418535127056, "grad_norm": 2.1075751781463623, "learning_rate": 7.515881913303438e-05, "loss": 3.3899, "step": 29260 }, { "epoch": 1.9883815735833674, "grad_norm": 2.155301570892334, "learning_rate": 7.515457263215111e-05, "loss": 2.8848, "step": 29265 }, { "epoch": 1.988721293654029, "grad_norm": 2.3294179439544678, "learning_rate": 7.515032613126784e-05, "loss": 3.0982, "step": 29270 }, { "epoch": 1.989061013724691, "grad_norm": 2.1296825408935547, "learning_rate": 7.514607963038456e-05, "loss": 2.8994, "step": 29275 }, { "epoch": 1.9894007337953528, "grad_norm": 1.901005744934082, "learning_rate": 7.514183312950129e-05, "loss": 3.1666, "step": 29280 }, { "epoch": 1.9897404538660144, "grad_norm": 3.6504855155944824, "learning_rate": 7.513758662861802e-05, "loss": 3.0361, "step": 29285 }, { "epoch": 1.990080173936676, "grad_norm": 1.8268702030181885, "learning_rate": 7.513334012773475e-05, "loss": 3.0225, "step": 29290 }, { "epoch": 1.990419894007338, "grad_norm": 1.9732341766357422, "learning_rate": 7.512909362685148e-05, "loss": 3.0278, "step": 29295 }, { "epoch": 1.9907596140779997, "grad_norm": 2.3141682147979736, "learning_rate": 7.51248471259682e-05, "loss": 3.253, "step": 29300 }, { "epoch": 1.9910993341486614, "grad_norm": 2.2490394115448, "learning_rate": 7.512060062508493e-05, "loss": 2.9211, "step": 29305 }, { "epoch": 1.9914390542193234, "grad_norm": 1.9065049886703491, "learning_rate": 7.511635412420166e-05, "loss": 3.1539, "step": 29310 }, { "epoch": 1.991778774289985, "grad_norm": 2.147704601287842, "learning_rate": 7.511210762331839e-05, "loss": 3.1215, "step": 29315 }, { "epoch": 1.9921184943606467, "grad_norm": 2.1542935371398926, "learning_rate": 7.510786112243512e-05, "loss": 3.1616, "step": 29320 }, { "epoch": 1.9924582144313085, "grad_norm": 2.464399814605713, "learning_rate": 7.510361462155184e-05, "loss": 3.1584, "step": 29325 }, { "epoch": 1.9927979345019704, "grad_norm": 1.989039659500122, "learning_rate": 7.509936812066857e-05, "loss": 2.9447, "step": 29330 }, { "epoch": 1.993137654572632, "grad_norm": 2.792526960372925, "learning_rate": 7.50951216197853e-05, "loss": 3.0083, "step": 29335 }, { "epoch": 1.9934773746432939, "grad_norm": 2.1241977214813232, "learning_rate": 7.509087511890203e-05, "loss": 3.2801, "step": 29340 }, { "epoch": 1.9938170947139557, "grad_norm": 3.040813446044922, "learning_rate": 7.508662861801876e-05, "loss": 2.914, "step": 29345 }, { "epoch": 1.9941568147846174, "grad_norm": 2.126187324523926, "learning_rate": 7.508238211713548e-05, "loss": 3.3349, "step": 29350 }, { "epoch": 1.9944965348552792, "grad_norm": 2.157275676727295, "learning_rate": 7.507813561625221e-05, "loss": 2.791, "step": 29355 }, { "epoch": 1.994836254925941, "grad_norm": 3.333425521850586, "learning_rate": 7.507388911536894e-05, "loss": 3.1522, "step": 29360 }, { "epoch": 1.9951759749966027, "grad_norm": 2.5370683670043945, "learning_rate": 7.506964261448567e-05, "loss": 3.1743, "step": 29365 }, { "epoch": 1.9955156950672646, "grad_norm": 2.2897889614105225, "learning_rate": 7.50653961136024e-05, "loss": 3.1302, "step": 29370 }, { "epoch": 1.9958554151379264, "grad_norm": 2.421088218688965, "learning_rate": 7.506114961271912e-05, "loss": 3.2489, "step": 29375 }, { "epoch": 1.996195135208588, "grad_norm": 2.0058069229125977, "learning_rate": 7.505690311183585e-05, "loss": 3.2967, "step": 29380 }, { "epoch": 1.99653485527925, "grad_norm": 2.79900860786438, "learning_rate": 7.505265661095258e-05, "loss": 3.0705, "step": 29385 }, { "epoch": 1.9968745753499118, "grad_norm": 2.259375810623169, "learning_rate": 7.504841011006931e-05, "loss": 2.8808, "step": 29390 }, { "epoch": 1.9972142954205734, "grad_norm": 1.880364179611206, "learning_rate": 7.504416360918604e-05, "loss": 3.2924, "step": 29395 }, { "epoch": 1.9975540154912352, "grad_norm": 2.0297441482543945, "learning_rate": 7.503991710830276e-05, "loss": 3.2808, "step": 29400 }, { "epoch": 1.997893735561897, "grad_norm": 2.408135414123535, "learning_rate": 7.503567060741949e-05, "loss": 3.071, "step": 29405 }, { "epoch": 1.9982334556325587, "grad_norm": 3.0595507621765137, "learning_rate": 7.503142410653622e-05, "loss": 3.1848, "step": 29410 }, { "epoch": 1.9985731757032206, "grad_norm": 2.4421892166137695, "learning_rate": 7.502717760565295e-05, "loss": 2.9379, "step": 29415 }, { "epoch": 1.9989128957738824, "grad_norm": 2.7105507850646973, "learning_rate": 7.502293110476968e-05, "loss": 3.3322, "step": 29420 }, { "epoch": 1.999252615844544, "grad_norm": 1.8133794069290161, "learning_rate": 7.50186846038864e-05, "loss": 3.1868, "step": 29425 }, { "epoch": 1.999592335915206, "grad_norm": 2.039344310760498, "learning_rate": 7.501443810300313e-05, "loss": 3.1575, "step": 29430 }, { "epoch": 1.9999320559858678, "grad_norm": 1.8784676790237427, "learning_rate": 7.501019160211986e-05, "loss": 3.2324, "step": 29435 }, { "epoch": 2.0, "eval_bertscore": { "f1": 0.841848739168073, "precision": 0.8440065389918384, "recall": 0.8405869630420725 }, "eval_bleu_4": 0.01946428804896622, "eval_exact_match": 0.00038763446070355656, "eval_loss": 3.3244004249572754, "eval_meteor": 0.0926989136406381, "eval_rouge": { "rouge1": 0.1285878504320886, "rouge2": 0.01960166083351067, "rougeL": 0.1112510812129393, "rougeLsum": 0.11125047123427585 }, "eval_runtime": 1674.3038, "eval_samples_per_second": 6.163, "eval_steps_per_second": 0.77, "step": 29436 }, { "epoch": 2.0002717760565294, "grad_norm": 2.6795079708099365, "learning_rate": 7.500594510123659e-05, "loss": 2.9364, "step": 29440 }, { "epoch": 2.000611496127191, "grad_norm": 1.8579944372177124, "learning_rate": 7.500169860035332e-05, "loss": 2.8412, "step": 29445 }, { "epoch": 2.000951216197853, "grad_norm": 2.0560669898986816, "learning_rate": 7.499745209947004e-05, "loss": 2.9165, "step": 29450 }, { "epoch": 2.0012909362685147, "grad_norm": 1.9636757373809814, "learning_rate": 7.499320559858677e-05, "loss": 2.7912, "step": 29455 }, { "epoch": 2.0016306563391764, "grad_norm": 2.5882821083068848, "learning_rate": 7.498895909770349e-05, "loss": 2.8707, "step": 29460 }, { "epoch": 2.0019703764098384, "grad_norm": 2.8081557750701904, "learning_rate": 7.498471259682023e-05, "loss": 2.6846, "step": 29465 }, { "epoch": 2.0023100964805, "grad_norm": 2.8940157890319824, "learning_rate": 7.498046609593696e-05, "loss": 2.6389, "step": 29470 }, { "epoch": 2.0026498165511617, "grad_norm": 2.80936861038208, "learning_rate": 7.497621959505367e-05, "loss": 2.7701, "step": 29475 }, { "epoch": 2.0029895366218238, "grad_norm": 2.4131548404693604, "learning_rate": 7.497197309417041e-05, "loss": 2.7458, "step": 29480 }, { "epoch": 2.0033292566924854, "grad_norm": 2.389918088912964, "learning_rate": 7.496772659328714e-05, "loss": 2.7065, "step": 29485 }, { "epoch": 2.003668976763147, "grad_norm": 2.5859525203704834, "learning_rate": 7.496348009240385e-05, "loss": 2.9326, "step": 29490 }, { "epoch": 2.004008696833809, "grad_norm": 2.18538761138916, "learning_rate": 7.49592335915206e-05, "loss": 3.0994, "step": 29495 }, { "epoch": 2.0043484169044707, "grad_norm": 2.3322620391845703, "learning_rate": 7.495498709063732e-05, "loss": 2.5589, "step": 29500 }, { "epoch": 2.0046881369751324, "grad_norm": 2.770749092102051, "learning_rate": 7.495074058975404e-05, "loss": 2.9452, "step": 29505 }, { "epoch": 2.0050278570457944, "grad_norm": 3.7293803691864014, "learning_rate": 7.494649408887078e-05, "loss": 3.1104, "step": 29510 }, { "epoch": 2.005367577116456, "grad_norm": 2.3366551399230957, "learning_rate": 7.494224758798751e-05, "loss": 2.8419, "step": 29515 }, { "epoch": 2.0057072971871177, "grad_norm": 3.4655368328094482, "learning_rate": 7.493800108710422e-05, "loss": 2.6913, "step": 29520 }, { "epoch": 2.00604701725778, "grad_norm": 2.3731484413146973, "learning_rate": 7.493375458622096e-05, "loss": 3.0112, "step": 29525 }, { "epoch": 2.0063867373284414, "grad_norm": 2.3347737789154053, "learning_rate": 7.492950808533769e-05, "loss": 2.8945, "step": 29530 }, { "epoch": 2.006726457399103, "grad_norm": 2.6611995697021484, "learning_rate": 7.49252615844544e-05, "loss": 3.0057, "step": 29535 }, { "epoch": 2.007066177469765, "grad_norm": 2.458369016647339, "learning_rate": 7.492101508357115e-05, "loss": 2.9144, "step": 29540 }, { "epoch": 2.0074058975404268, "grad_norm": 2.526743173599243, "learning_rate": 7.491676858268786e-05, "loss": 3.1454, "step": 29545 }, { "epoch": 2.0077456176110884, "grad_norm": 2.6925439834594727, "learning_rate": 7.491252208180459e-05, "loss": 2.3647, "step": 29550 }, { "epoch": 2.0080853376817505, "grad_norm": 2.265676736831665, "learning_rate": 7.490827558092133e-05, "loss": 2.9153, "step": 29555 }, { "epoch": 2.008425057752412, "grad_norm": 2.5735394954681396, "learning_rate": 7.490402908003805e-05, "loss": 2.7733, "step": 29560 }, { "epoch": 2.0087647778230737, "grad_norm": 2.0370891094207764, "learning_rate": 7.489978257915477e-05, "loss": 2.8148, "step": 29565 }, { "epoch": 2.0091044978937354, "grad_norm": 2.738034725189209, "learning_rate": 7.489553607827152e-05, "loss": 3.0228, "step": 29570 }, { "epoch": 2.0094442179643974, "grad_norm": 2.0666706562042236, "learning_rate": 7.489128957738823e-05, "loss": 2.8504, "step": 29575 }, { "epoch": 2.009783938035059, "grad_norm": 2.385753631591797, "learning_rate": 7.488704307650496e-05, "loss": 3.0118, "step": 29580 }, { "epoch": 2.0101236581057207, "grad_norm": 1.8605682849884033, "learning_rate": 7.48827965756217e-05, "loss": 2.8524, "step": 29585 }, { "epoch": 2.0104633781763828, "grad_norm": 2.7440872192382812, "learning_rate": 7.487855007473841e-05, "loss": 2.769, "step": 29590 }, { "epoch": 2.0108030982470444, "grad_norm": 2.7751457691192627, "learning_rate": 7.487430357385514e-05, "loss": 2.8537, "step": 29595 }, { "epoch": 2.011142818317706, "grad_norm": 2.5925710201263428, "learning_rate": 7.487005707297188e-05, "loss": 2.6804, "step": 29600 }, { "epoch": 2.011482538388368, "grad_norm": 2.4397692680358887, "learning_rate": 7.48658105720886e-05, "loss": 2.7212, "step": 29605 }, { "epoch": 2.0118222584590297, "grad_norm": 2.528805732727051, "learning_rate": 7.486156407120533e-05, "loss": 2.8698, "step": 29610 }, { "epoch": 2.0121619785296914, "grad_norm": 2.8329408168792725, "learning_rate": 7.485731757032205e-05, "loss": 3.0218, "step": 29615 }, { "epoch": 2.0125016986003534, "grad_norm": 2.5902037620544434, "learning_rate": 7.485307106943878e-05, "loss": 2.8593, "step": 29620 }, { "epoch": 2.012841418671015, "grad_norm": 2.289531946182251, "learning_rate": 7.484882456855551e-05, "loss": 2.7867, "step": 29625 }, { "epoch": 2.0131811387416767, "grad_norm": 2.7444756031036377, "learning_rate": 7.484457806767224e-05, "loss": 2.972, "step": 29630 }, { "epoch": 2.0135208588123388, "grad_norm": 2.2614378929138184, "learning_rate": 7.484033156678897e-05, "loss": 2.7588, "step": 29635 }, { "epoch": 2.0138605788830004, "grad_norm": 2.463730812072754, "learning_rate": 7.48360850659057e-05, "loss": 2.5112, "step": 29640 }, { "epoch": 2.014200298953662, "grad_norm": 2.013122797012329, "learning_rate": 7.483183856502242e-05, "loss": 2.8146, "step": 29645 }, { "epoch": 2.014540019024324, "grad_norm": 2.124673366546631, "learning_rate": 7.482759206413915e-05, "loss": 2.8551, "step": 29650 }, { "epoch": 2.0148797390949857, "grad_norm": 3.1076674461364746, "learning_rate": 7.482334556325588e-05, "loss": 3.0486, "step": 29655 }, { "epoch": 2.0152194591656474, "grad_norm": 2.3551108837127686, "learning_rate": 7.48190990623726e-05, "loss": 2.9822, "step": 29660 }, { "epoch": 2.0155591792363095, "grad_norm": 2.4393575191497803, "learning_rate": 7.481485256148933e-05, "loss": 2.9179, "step": 29665 }, { "epoch": 2.015898899306971, "grad_norm": 2.585097551345825, "learning_rate": 7.481060606060606e-05, "loss": 2.7696, "step": 29670 }, { "epoch": 2.0162386193776327, "grad_norm": 2.9331648349761963, "learning_rate": 7.480635955972279e-05, "loss": 2.5015, "step": 29675 }, { "epoch": 2.016578339448295, "grad_norm": 2.4432363510131836, "learning_rate": 7.480211305883952e-05, "loss": 2.9582, "step": 29680 }, { "epoch": 2.0169180595189564, "grad_norm": 2.635822057723999, "learning_rate": 7.479786655795625e-05, "loss": 2.8276, "step": 29685 }, { "epoch": 2.017257779589618, "grad_norm": 2.9043288230895996, "learning_rate": 7.479362005707297e-05, "loss": 2.954, "step": 29690 }, { "epoch": 2.01759749966028, "grad_norm": 2.139641284942627, "learning_rate": 7.47893735561897e-05, "loss": 3.0699, "step": 29695 }, { "epoch": 2.0179372197309418, "grad_norm": 3.0969314575195312, "learning_rate": 7.478512705530643e-05, "loss": 2.7995, "step": 29700 }, { "epoch": 2.0182769398016034, "grad_norm": 2.36922287940979, "learning_rate": 7.478088055442316e-05, "loss": 2.7186, "step": 29705 }, { "epoch": 2.0186166598722655, "grad_norm": 2.4031357765197754, "learning_rate": 7.477663405353989e-05, "loss": 2.995, "step": 29710 }, { "epoch": 2.018956379942927, "grad_norm": 2.784118175506592, "learning_rate": 7.477238755265661e-05, "loss": 2.8399, "step": 29715 }, { "epoch": 2.0192961000135887, "grad_norm": 2.167398691177368, "learning_rate": 7.476814105177334e-05, "loss": 2.6851, "step": 29720 }, { "epoch": 2.0196358200842504, "grad_norm": 2.37473726272583, "learning_rate": 7.476389455089007e-05, "loss": 2.9162, "step": 29725 }, { "epoch": 2.0199755401549124, "grad_norm": 2.7478296756744385, "learning_rate": 7.47596480500068e-05, "loss": 2.5132, "step": 29730 }, { "epoch": 2.020315260225574, "grad_norm": 2.5670759677886963, "learning_rate": 7.475540154912353e-05, "loss": 2.6938, "step": 29735 }, { "epoch": 2.0206549802962357, "grad_norm": 2.4618723392486572, "learning_rate": 7.475115504824025e-05, "loss": 2.6926, "step": 29740 }, { "epoch": 2.0209947003668978, "grad_norm": 2.6811130046844482, "learning_rate": 7.474690854735698e-05, "loss": 2.7106, "step": 29745 }, { "epoch": 2.0213344204375594, "grad_norm": 2.6105988025665283, "learning_rate": 7.474266204647371e-05, "loss": 2.6724, "step": 29750 }, { "epoch": 2.021674140508221, "grad_norm": 2.1424081325531006, "learning_rate": 7.473841554559044e-05, "loss": 2.859, "step": 29755 }, { "epoch": 2.022013860578883, "grad_norm": 2.196782350540161, "learning_rate": 7.473416904470717e-05, "loss": 3.1088, "step": 29760 }, { "epoch": 2.0223535806495447, "grad_norm": 2.355724573135376, "learning_rate": 7.47299225438239e-05, "loss": 2.9257, "step": 29765 }, { "epoch": 2.0226933007202064, "grad_norm": 2.6831564903259277, "learning_rate": 7.472567604294062e-05, "loss": 2.6668, "step": 29770 }, { "epoch": 2.0230330207908684, "grad_norm": 2.6010472774505615, "learning_rate": 7.472142954205735e-05, "loss": 2.7811, "step": 29775 }, { "epoch": 2.02337274086153, "grad_norm": 2.7459564208984375, "learning_rate": 7.471718304117408e-05, "loss": 2.8719, "step": 29780 }, { "epoch": 2.0237124609321917, "grad_norm": 2.3936069011688232, "learning_rate": 7.47129365402908e-05, "loss": 2.7923, "step": 29785 }, { "epoch": 2.024052181002854, "grad_norm": 2.1768381595611572, "learning_rate": 7.470869003940753e-05, "loss": 2.9082, "step": 29790 }, { "epoch": 2.0243919010735154, "grad_norm": 2.3544816970825195, "learning_rate": 7.470444353852426e-05, "loss": 2.8488, "step": 29795 }, { "epoch": 2.024731621144177, "grad_norm": 2.107367992401123, "learning_rate": 7.470019703764099e-05, "loss": 3.1517, "step": 29800 }, { "epoch": 2.025071341214839, "grad_norm": 2.631769895553589, "learning_rate": 7.469595053675772e-05, "loss": 2.9465, "step": 29805 }, { "epoch": 2.0254110612855007, "grad_norm": 2.39536452293396, "learning_rate": 7.469170403587445e-05, "loss": 2.9528, "step": 29810 }, { "epoch": 2.0257507813561624, "grad_norm": 2.543368101119995, "learning_rate": 7.468745753499116e-05, "loss": 2.7987, "step": 29815 }, { "epoch": 2.0260905014268245, "grad_norm": 2.064850091934204, "learning_rate": 7.46832110341079e-05, "loss": 2.7314, "step": 29820 }, { "epoch": 2.026430221497486, "grad_norm": 2.4331371784210205, "learning_rate": 7.467896453322463e-05, "loss": 2.8943, "step": 29825 }, { "epoch": 2.0267699415681477, "grad_norm": 2.669968843460083, "learning_rate": 7.467471803234135e-05, "loss": 2.8123, "step": 29830 }, { "epoch": 2.02710966163881, "grad_norm": 2.492422342300415, "learning_rate": 7.467047153145809e-05, "loss": 2.6961, "step": 29835 }, { "epoch": 2.0274493817094714, "grad_norm": 3.306272029876709, "learning_rate": 7.466622503057481e-05, "loss": 3.157, "step": 29840 }, { "epoch": 2.027789101780133, "grad_norm": 2.7194485664367676, "learning_rate": 7.466197852969153e-05, "loss": 2.7129, "step": 29845 }, { "epoch": 2.028128821850795, "grad_norm": 2.3458445072174072, "learning_rate": 7.465773202880827e-05, "loss": 2.9062, "step": 29850 }, { "epoch": 2.0284685419214568, "grad_norm": 2.3543636798858643, "learning_rate": 7.4653485527925e-05, "loss": 3.0495, "step": 29855 }, { "epoch": 2.0288082619921184, "grad_norm": 1.8242942094802856, "learning_rate": 7.464923902704171e-05, "loss": 2.8118, "step": 29860 }, { "epoch": 2.0291479820627805, "grad_norm": 3.205662727355957, "learning_rate": 7.464499252615845e-05, "loss": 2.9539, "step": 29865 }, { "epoch": 2.029487702133442, "grad_norm": 3.000920057296753, "learning_rate": 7.464074602527518e-05, "loss": 2.9476, "step": 29870 }, { "epoch": 2.0298274222041037, "grad_norm": 2.355893135070801, "learning_rate": 7.46364995243919e-05, "loss": 3.0278, "step": 29875 }, { "epoch": 2.030167142274766, "grad_norm": 2.899022340774536, "learning_rate": 7.463225302350864e-05, "loss": 2.9646, "step": 29880 }, { "epoch": 2.0305068623454274, "grad_norm": 2.8812341690063477, "learning_rate": 7.462800652262535e-05, "loss": 2.8675, "step": 29885 }, { "epoch": 2.030846582416089, "grad_norm": 2.3888485431671143, "learning_rate": 7.462376002174208e-05, "loss": 2.8935, "step": 29890 }, { "epoch": 2.031186302486751, "grad_norm": 3.1890838146209717, "learning_rate": 7.461951352085882e-05, "loss": 2.5128, "step": 29895 }, { "epoch": 2.0315260225574128, "grad_norm": 2.81164813041687, "learning_rate": 7.461526701997554e-05, "loss": 2.6016, "step": 29900 }, { "epoch": 2.0318657426280744, "grad_norm": 2.6146998405456543, "learning_rate": 7.461102051909227e-05, "loss": 3.0301, "step": 29905 }, { "epoch": 2.032205462698736, "grad_norm": 1.8808610439300537, "learning_rate": 7.460677401820901e-05, "loss": 3.0125, "step": 29910 }, { "epoch": 2.032545182769398, "grad_norm": 2.689366340637207, "learning_rate": 7.460252751732572e-05, "loss": 2.8051, "step": 29915 }, { "epoch": 2.0328849028400597, "grad_norm": 2.6244099140167236, "learning_rate": 7.459828101644245e-05, "loss": 2.7638, "step": 29920 }, { "epoch": 2.0332246229107214, "grad_norm": 3.0078155994415283, "learning_rate": 7.459403451555919e-05, "loss": 2.9749, "step": 29925 }, { "epoch": 2.0335643429813834, "grad_norm": 2.0732171535491943, "learning_rate": 7.45897880146759e-05, "loss": 2.8928, "step": 29930 }, { "epoch": 2.033904063052045, "grad_norm": 2.9517555236816406, "learning_rate": 7.458554151379263e-05, "loss": 2.7221, "step": 29935 }, { "epoch": 2.0342437831227067, "grad_norm": 2.4192471504211426, "learning_rate": 7.458129501290937e-05, "loss": 2.94, "step": 29940 }, { "epoch": 2.034583503193369, "grad_norm": 2.8015544414520264, "learning_rate": 7.457704851202609e-05, "loss": 2.8085, "step": 29945 }, { "epoch": 2.0349232232640304, "grad_norm": 2.1137688159942627, "learning_rate": 7.457280201114282e-05, "loss": 2.8315, "step": 29950 }, { "epoch": 2.035262943334692, "grad_norm": 2.859384059906006, "learning_rate": 7.456855551025956e-05, "loss": 2.8674, "step": 29955 }, { "epoch": 2.035602663405354, "grad_norm": 2.6429381370544434, "learning_rate": 7.456430900937627e-05, "loss": 2.8131, "step": 29960 }, { "epoch": 2.0359423834760157, "grad_norm": 2.308537721633911, "learning_rate": 7.4560062508493e-05, "loss": 2.6939, "step": 29965 }, { "epoch": 2.0362821035466774, "grad_norm": 3.581963539123535, "learning_rate": 7.455581600760973e-05, "loss": 2.9594, "step": 29970 }, { "epoch": 2.0366218236173395, "grad_norm": 2.2647864818573, "learning_rate": 7.455156950672646e-05, "loss": 3.0387, "step": 29975 }, { "epoch": 2.036961543688001, "grad_norm": 3.074066162109375, "learning_rate": 7.454732300584319e-05, "loss": 2.6509, "step": 29980 }, { "epoch": 2.0373012637586627, "grad_norm": 2.550022840499878, "learning_rate": 7.454307650495991e-05, "loss": 2.7745, "step": 29985 }, { "epoch": 2.037640983829325, "grad_norm": 2.1169040203094482, "learning_rate": 7.453883000407664e-05, "loss": 3.0576, "step": 29990 }, { "epoch": 2.0379807038999864, "grad_norm": 2.068608283996582, "learning_rate": 7.453458350319337e-05, "loss": 3.0045, "step": 29995 }, { "epoch": 2.038320423970648, "grad_norm": 2.2857630252838135, "learning_rate": 7.45303370023101e-05, "loss": 3.026, "step": 30000 }, { "epoch": 2.03866014404131, "grad_norm": 2.244429111480713, "learning_rate": 7.452609050142683e-05, "loss": 2.9252, "step": 30005 }, { "epoch": 2.0389998641119718, "grad_norm": 2.570117235183716, "learning_rate": 7.452184400054355e-05, "loss": 3.1872, "step": 30010 }, { "epoch": 2.0393395841826334, "grad_norm": 2.2473602294921875, "learning_rate": 7.451759749966028e-05, "loss": 3.0836, "step": 30015 }, { "epoch": 2.0396793042532955, "grad_norm": 2.2887418270111084, "learning_rate": 7.451335099877701e-05, "loss": 2.8895, "step": 30020 }, { "epoch": 2.040019024323957, "grad_norm": 2.561905860900879, "learning_rate": 7.450910449789374e-05, "loss": 2.8178, "step": 30025 }, { "epoch": 2.0403587443946187, "grad_norm": 1.9778385162353516, "learning_rate": 7.450485799701047e-05, "loss": 2.9349, "step": 30030 }, { "epoch": 2.040698464465281, "grad_norm": 2.9580740928649902, "learning_rate": 7.45006114961272e-05, "loss": 2.9434, "step": 30035 }, { "epoch": 2.0410381845359424, "grad_norm": 2.3856868743896484, "learning_rate": 7.449636499524392e-05, "loss": 2.7755, "step": 30040 }, { "epoch": 2.041377904606604, "grad_norm": 2.515916109085083, "learning_rate": 7.449211849436065e-05, "loss": 2.8113, "step": 30045 }, { "epoch": 2.041717624677266, "grad_norm": 2.309178113937378, "learning_rate": 7.448787199347738e-05, "loss": 2.8119, "step": 30050 }, { "epoch": 2.0420573447479278, "grad_norm": 2.4263248443603516, "learning_rate": 7.44836254925941e-05, "loss": 2.7233, "step": 30055 }, { "epoch": 2.0423970648185894, "grad_norm": 3.246434450149536, "learning_rate": 7.447937899171083e-05, "loss": 2.8268, "step": 30060 }, { "epoch": 2.042736784889251, "grad_norm": 2.664445638656616, "learning_rate": 7.447513249082756e-05, "loss": 2.7813, "step": 30065 }, { "epoch": 2.043076504959913, "grad_norm": 2.5832269191741943, "learning_rate": 7.447088598994429e-05, "loss": 3.0583, "step": 30070 }, { "epoch": 2.0434162250305747, "grad_norm": 2.5769526958465576, "learning_rate": 7.446663948906102e-05, "loss": 2.6938, "step": 30075 }, { "epoch": 2.0437559451012364, "grad_norm": 2.3436672687530518, "learning_rate": 7.446239298817775e-05, "loss": 2.8074, "step": 30080 }, { "epoch": 2.0440956651718984, "grad_norm": 2.328648090362549, "learning_rate": 7.445814648729447e-05, "loss": 2.9438, "step": 30085 }, { "epoch": 2.04443538524256, "grad_norm": 1.7734001874923706, "learning_rate": 7.44538999864112e-05, "loss": 2.9071, "step": 30090 }, { "epoch": 2.0447751053132217, "grad_norm": 2.1509578227996826, "learning_rate": 7.444965348552793e-05, "loss": 2.6813, "step": 30095 }, { "epoch": 2.045114825383884, "grad_norm": 3.0813682079315186, "learning_rate": 7.444540698464466e-05, "loss": 2.7379, "step": 30100 }, { "epoch": 2.0454545454545454, "grad_norm": 2.720266342163086, "learning_rate": 7.444116048376139e-05, "loss": 2.9105, "step": 30105 }, { "epoch": 2.045794265525207, "grad_norm": 2.6327788829803467, "learning_rate": 7.443691398287811e-05, "loss": 2.8268, "step": 30110 }, { "epoch": 2.046133985595869, "grad_norm": 2.005829334259033, "learning_rate": 7.443266748199484e-05, "loss": 2.8885, "step": 30115 }, { "epoch": 2.0464737056665308, "grad_norm": 2.2291221618652344, "learning_rate": 7.442842098111157e-05, "loss": 2.761, "step": 30120 }, { "epoch": 2.0468134257371924, "grad_norm": 2.4330501556396484, "learning_rate": 7.44241744802283e-05, "loss": 2.6783, "step": 30125 }, { "epoch": 2.0471531458078545, "grad_norm": 2.6118242740631104, "learning_rate": 7.441992797934503e-05, "loss": 2.7594, "step": 30130 }, { "epoch": 2.047492865878516, "grad_norm": 2.5592970848083496, "learning_rate": 7.441568147846175e-05, "loss": 2.8215, "step": 30135 }, { "epoch": 2.0478325859491777, "grad_norm": 2.17258358001709, "learning_rate": 7.441143497757848e-05, "loss": 2.9681, "step": 30140 }, { "epoch": 2.04817230601984, "grad_norm": 2.4612655639648438, "learning_rate": 7.440718847669521e-05, "loss": 2.9799, "step": 30145 }, { "epoch": 2.0485120260905014, "grad_norm": 3.2577667236328125, "learning_rate": 7.440294197581194e-05, "loss": 2.6439, "step": 30150 }, { "epoch": 2.048851746161163, "grad_norm": 2.174975633621216, "learning_rate": 7.439869547492867e-05, "loss": 2.7521, "step": 30155 }, { "epoch": 2.049191466231825, "grad_norm": 2.398503065109253, "learning_rate": 7.43944489740454e-05, "loss": 2.9313, "step": 30160 }, { "epoch": 2.0495311863024868, "grad_norm": 2.306687593460083, "learning_rate": 7.439020247316212e-05, "loss": 2.8543, "step": 30165 }, { "epoch": 2.0498709063731484, "grad_norm": 2.7029595375061035, "learning_rate": 7.438595597227884e-05, "loss": 2.868, "step": 30170 }, { "epoch": 2.0502106264438105, "grad_norm": 2.542226552963257, "learning_rate": 7.438170947139558e-05, "loss": 2.8526, "step": 30175 }, { "epoch": 2.050550346514472, "grad_norm": 2.1273577213287354, "learning_rate": 7.43774629705123e-05, "loss": 3.1048, "step": 30180 }, { "epoch": 2.0508900665851337, "grad_norm": 2.6364011764526367, "learning_rate": 7.437321646962902e-05, "loss": 2.6408, "step": 30185 }, { "epoch": 2.051229786655796, "grad_norm": 2.2773373126983643, "learning_rate": 7.436896996874576e-05, "loss": 2.6211, "step": 30190 }, { "epoch": 2.0515695067264574, "grad_norm": 2.607774496078491, "learning_rate": 7.436472346786249e-05, "loss": 2.7324, "step": 30195 }, { "epoch": 2.051909226797119, "grad_norm": 3.008474826812744, "learning_rate": 7.43604769669792e-05, "loss": 3.1252, "step": 30200 }, { "epoch": 2.052248946867781, "grad_norm": 1.8315781354904175, "learning_rate": 7.435623046609595e-05, "loss": 2.9039, "step": 30205 }, { "epoch": 2.0525886669384428, "grad_norm": 2.9692742824554443, "learning_rate": 7.435198396521267e-05, "loss": 2.8868, "step": 30210 }, { "epoch": 2.0529283870091044, "grad_norm": 2.640066623687744, "learning_rate": 7.434773746432939e-05, "loss": 2.8781, "step": 30215 }, { "epoch": 2.0532681070797665, "grad_norm": 2.1203536987304688, "learning_rate": 7.434349096344613e-05, "loss": 3.1269, "step": 30220 }, { "epoch": 2.053607827150428, "grad_norm": 2.0128555297851562, "learning_rate": 7.433924446256286e-05, "loss": 2.6647, "step": 30225 }, { "epoch": 2.0539475472210897, "grad_norm": 2.8558647632598877, "learning_rate": 7.433499796167957e-05, "loss": 2.6389, "step": 30230 }, { "epoch": 2.054287267291752, "grad_norm": 2.166069746017456, "learning_rate": 7.433075146079631e-05, "loss": 2.919, "step": 30235 }, { "epoch": 2.0546269873624134, "grad_norm": 2.352363348007202, "learning_rate": 7.432650495991303e-05, "loss": 2.8888, "step": 30240 }, { "epoch": 2.054966707433075, "grad_norm": 2.562628984451294, "learning_rate": 7.432225845902976e-05, "loss": 3.0803, "step": 30245 }, { "epoch": 2.0553064275037367, "grad_norm": 2.390963315963745, "learning_rate": 7.43180119581465e-05, "loss": 2.9428, "step": 30250 }, { "epoch": 2.055646147574399, "grad_norm": 2.36476469039917, "learning_rate": 7.431376545726321e-05, "loss": 2.9731, "step": 30255 }, { "epoch": 2.0559858676450604, "grad_norm": 2.418400526046753, "learning_rate": 7.430951895637994e-05, "loss": 3.1036, "step": 30260 }, { "epoch": 2.056325587715722, "grad_norm": 2.340879440307617, "learning_rate": 7.430527245549668e-05, "loss": 2.9845, "step": 30265 }, { "epoch": 2.056665307786384, "grad_norm": 2.3492257595062256, "learning_rate": 7.43010259546134e-05, "loss": 2.6465, "step": 30270 }, { "epoch": 2.0570050278570458, "grad_norm": 2.8554415702819824, "learning_rate": 7.429677945373012e-05, "loss": 2.9995, "step": 30275 }, { "epoch": 2.0573447479277074, "grad_norm": 2.424406051635742, "learning_rate": 7.429253295284687e-05, "loss": 2.6178, "step": 30280 }, { "epoch": 2.0576844679983695, "grad_norm": 2.4609885215759277, "learning_rate": 7.428828645196358e-05, "loss": 2.8312, "step": 30285 }, { "epoch": 2.058024188069031, "grad_norm": 1.9410361051559448, "learning_rate": 7.428403995108031e-05, "loss": 3.0029, "step": 30290 }, { "epoch": 2.0583639081396927, "grad_norm": 3.0945980548858643, "learning_rate": 7.427979345019705e-05, "loss": 2.7266, "step": 30295 }, { "epoch": 2.058703628210355, "grad_norm": 3.2254483699798584, "learning_rate": 7.427554694931376e-05, "loss": 2.9567, "step": 30300 }, { "epoch": 2.0590433482810164, "grad_norm": 2.620258331298828, "learning_rate": 7.427130044843049e-05, "loss": 2.9076, "step": 30305 }, { "epoch": 2.059383068351678, "grad_norm": 3.593092918395996, "learning_rate": 7.426705394754722e-05, "loss": 2.8532, "step": 30310 }, { "epoch": 2.05972278842234, "grad_norm": 2.350992441177368, "learning_rate": 7.426280744666395e-05, "loss": 2.854, "step": 30315 }, { "epoch": 2.0600625084930018, "grad_norm": 2.4689648151397705, "learning_rate": 7.425856094578068e-05, "loss": 2.8995, "step": 30320 }, { "epoch": 2.0604022285636634, "grad_norm": 2.5991857051849365, "learning_rate": 7.42543144448974e-05, "loss": 3.0254, "step": 30325 }, { "epoch": 2.0607419486343255, "grad_norm": 2.456585168838501, "learning_rate": 7.425006794401413e-05, "loss": 2.8153, "step": 30330 }, { "epoch": 2.061081668704987, "grad_norm": 2.551870822906494, "learning_rate": 7.424582144313086e-05, "loss": 2.7377, "step": 30335 }, { "epoch": 2.0614213887756487, "grad_norm": 6.7735700607299805, "learning_rate": 7.424157494224759e-05, "loss": 2.9538, "step": 30340 }, { "epoch": 2.061761108846311, "grad_norm": 2.6504135131835938, "learning_rate": 7.423732844136432e-05, "loss": 2.7895, "step": 30345 }, { "epoch": 2.0621008289169724, "grad_norm": 2.446011781692505, "learning_rate": 7.423308194048104e-05, "loss": 2.9544, "step": 30350 }, { "epoch": 2.062440548987634, "grad_norm": 2.4367711544036865, "learning_rate": 7.422883543959777e-05, "loss": 2.8587, "step": 30355 }, { "epoch": 2.062780269058296, "grad_norm": 2.65613055229187, "learning_rate": 7.42245889387145e-05, "loss": 2.9465, "step": 30360 }, { "epoch": 2.0631199891289578, "grad_norm": 2.275883197784424, "learning_rate": 7.422034243783123e-05, "loss": 2.6144, "step": 30365 }, { "epoch": 2.0634597091996194, "grad_norm": 3.0012567043304443, "learning_rate": 7.421609593694796e-05, "loss": 2.6446, "step": 30370 }, { "epoch": 2.0637994292702815, "grad_norm": 2.4693431854248047, "learning_rate": 7.421184943606468e-05, "loss": 2.6527, "step": 30375 }, { "epoch": 2.064139149340943, "grad_norm": 1.9672855138778687, "learning_rate": 7.420760293518143e-05, "loss": 2.7742, "step": 30380 }, { "epoch": 2.0644788694116047, "grad_norm": 2.420311212539673, "learning_rate": 7.420335643429814e-05, "loss": 2.8474, "step": 30385 }, { "epoch": 2.064818589482267, "grad_norm": 2.9461617469787598, "learning_rate": 7.419910993341487e-05, "loss": 2.9942, "step": 30390 }, { "epoch": 2.0651583095529285, "grad_norm": 2.694941997528076, "learning_rate": 7.41948634325316e-05, "loss": 2.936, "step": 30395 }, { "epoch": 2.06549802962359, "grad_norm": 3.169748306274414, "learning_rate": 7.419061693164832e-05, "loss": 2.7352, "step": 30400 }, { "epoch": 2.0658377496942517, "grad_norm": 2.6266000270843506, "learning_rate": 7.418637043076505e-05, "loss": 2.7515, "step": 30405 }, { "epoch": 2.066177469764914, "grad_norm": 2.097975492477417, "learning_rate": 7.418212392988178e-05, "loss": 2.8563, "step": 30410 }, { "epoch": 2.0665171898355754, "grad_norm": 1.8259203433990479, "learning_rate": 7.417787742899851e-05, "loss": 2.7046, "step": 30415 }, { "epoch": 2.066856909906237, "grad_norm": 2.576036214828491, "learning_rate": 7.417363092811524e-05, "loss": 2.5868, "step": 30420 }, { "epoch": 2.067196629976899, "grad_norm": 2.122982978820801, "learning_rate": 7.416938442723196e-05, "loss": 2.8706, "step": 30425 }, { "epoch": 2.0675363500475608, "grad_norm": 2.1780261993408203, "learning_rate": 7.416513792634869e-05, "loss": 2.9122, "step": 30430 }, { "epoch": 2.0678760701182224, "grad_norm": 1.793778419494629, "learning_rate": 7.416089142546542e-05, "loss": 2.8279, "step": 30435 }, { "epoch": 2.0682157901888845, "grad_norm": 1.919999599456787, "learning_rate": 7.415664492458215e-05, "loss": 2.9963, "step": 30440 }, { "epoch": 2.068555510259546, "grad_norm": 2.95601487159729, "learning_rate": 7.415239842369888e-05, "loss": 2.9123, "step": 30445 }, { "epoch": 2.0688952303302077, "grad_norm": 2.2220966815948486, "learning_rate": 7.41481519228156e-05, "loss": 3.0763, "step": 30450 }, { "epoch": 2.06923495040087, "grad_norm": 2.3044705390930176, "learning_rate": 7.414390542193233e-05, "loss": 2.7335, "step": 30455 }, { "epoch": 2.0695746704715314, "grad_norm": 2.5424342155456543, "learning_rate": 7.413965892104906e-05, "loss": 2.8393, "step": 30460 }, { "epoch": 2.069914390542193, "grad_norm": 2.195078134536743, "learning_rate": 7.413541242016579e-05, "loss": 2.9011, "step": 30465 }, { "epoch": 2.070254110612855, "grad_norm": 2.3487470149993896, "learning_rate": 7.413116591928252e-05, "loss": 2.8503, "step": 30470 }, { "epoch": 2.0705938306835168, "grad_norm": 2.798672914505005, "learning_rate": 7.412691941839924e-05, "loss": 2.7748, "step": 30475 }, { "epoch": 2.0709335507541784, "grad_norm": 3.3947830200195312, "learning_rate": 7.412267291751597e-05, "loss": 2.9046, "step": 30480 }, { "epoch": 2.0712732708248405, "grad_norm": 2.51865816116333, "learning_rate": 7.41184264166327e-05, "loss": 2.704, "step": 30485 }, { "epoch": 2.071612990895502, "grad_norm": 2.678447961807251, "learning_rate": 7.411417991574943e-05, "loss": 2.9488, "step": 30490 }, { "epoch": 2.0719527109661637, "grad_norm": 2.261056661605835, "learning_rate": 7.410993341486616e-05, "loss": 2.8581, "step": 30495 }, { "epoch": 2.072292431036826, "grad_norm": 2.3845531940460205, "learning_rate": 7.410568691398288e-05, "loss": 2.6832, "step": 30500 }, { "epoch": 2.0726321511074874, "grad_norm": 2.3385767936706543, "learning_rate": 7.410144041309961e-05, "loss": 2.9101, "step": 30505 }, { "epoch": 2.072971871178149, "grad_norm": 2.6022534370422363, "learning_rate": 7.409719391221633e-05, "loss": 2.7101, "step": 30510 }, { "epoch": 2.073311591248811, "grad_norm": 3.1760449409484863, "learning_rate": 7.409294741133307e-05, "loss": 2.7215, "step": 30515 }, { "epoch": 2.073651311319473, "grad_norm": 2.4780170917510986, "learning_rate": 7.40887009104498e-05, "loss": 2.9563, "step": 30520 }, { "epoch": 2.0739910313901344, "grad_norm": 2.2647385597229004, "learning_rate": 7.408445440956651e-05, "loss": 3.1099, "step": 30525 }, { "epoch": 2.0743307514607965, "grad_norm": 2.3827314376831055, "learning_rate": 7.408020790868325e-05, "loss": 2.901, "step": 30530 }, { "epoch": 2.074670471531458, "grad_norm": 2.695590019226074, "learning_rate": 7.407596140779998e-05, "loss": 2.6522, "step": 30535 }, { "epoch": 2.0750101916021197, "grad_norm": 2.4444708824157715, "learning_rate": 7.40717149069167e-05, "loss": 2.6737, "step": 30540 }, { "epoch": 2.075349911672782, "grad_norm": 2.2724313735961914, "learning_rate": 7.406746840603344e-05, "loss": 2.831, "step": 30545 }, { "epoch": 2.0756896317434435, "grad_norm": 2.434687376022339, "learning_rate": 7.406322190515016e-05, "loss": 2.8243, "step": 30550 }, { "epoch": 2.076029351814105, "grad_norm": 2.7441351413726807, "learning_rate": 7.405897540426688e-05, "loss": 2.9331, "step": 30555 }, { "epoch": 2.076369071884767, "grad_norm": 2.553590774536133, "learning_rate": 7.405472890338362e-05, "loss": 2.8288, "step": 30560 }, { "epoch": 2.076708791955429, "grad_norm": 2.826411247253418, "learning_rate": 7.405048240250035e-05, "loss": 2.8676, "step": 30565 }, { "epoch": 2.0770485120260904, "grad_norm": 2.153524160385132, "learning_rate": 7.404623590161706e-05, "loss": 2.635, "step": 30570 }, { "epoch": 2.0773882320967525, "grad_norm": 2.7298362255096436, "learning_rate": 7.40419894007338e-05, "loss": 2.8498, "step": 30575 }, { "epoch": 2.077727952167414, "grad_norm": 2.277820110321045, "learning_rate": 7.403774289985053e-05, "loss": 2.7896, "step": 30580 }, { "epoch": 2.0780676722380758, "grad_norm": 2.4571571350097656, "learning_rate": 7.403349639896725e-05, "loss": 2.6884, "step": 30585 }, { "epoch": 2.0784073923087374, "grad_norm": 2.296334743499756, "learning_rate": 7.402924989808399e-05, "loss": 2.7297, "step": 30590 }, { "epoch": 2.0787471123793995, "grad_norm": 2.306119680404663, "learning_rate": 7.40250033972007e-05, "loss": 2.7951, "step": 30595 }, { "epoch": 2.079086832450061, "grad_norm": 2.554975986480713, "learning_rate": 7.402075689631743e-05, "loss": 2.9499, "step": 30600 }, { "epoch": 2.0794265525207227, "grad_norm": 2.1825132369995117, "learning_rate": 7.401651039543417e-05, "loss": 3.0308, "step": 30605 }, { "epoch": 2.079766272591385, "grad_norm": 2.230574607849121, "learning_rate": 7.401226389455089e-05, "loss": 3.0007, "step": 30610 }, { "epoch": 2.0801059926620464, "grad_norm": 2.2529873847961426, "learning_rate": 7.400801739366762e-05, "loss": 2.8754, "step": 30615 }, { "epoch": 2.080445712732708, "grad_norm": 2.3084466457366943, "learning_rate": 7.400377089278436e-05, "loss": 2.6534, "step": 30620 }, { "epoch": 2.08078543280337, "grad_norm": 2.1155998706817627, "learning_rate": 7.399952439190107e-05, "loss": 2.9757, "step": 30625 }, { "epoch": 2.0811251528740318, "grad_norm": 2.123640537261963, "learning_rate": 7.39952778910178e-05, "loss": 2.8968, "step": 30630 }, { "epoch": 2.0814648729446934, "grad_norm": 2.8777170181274414, "learning_rate": 7.399103139013454e-05, "loss": 2.8788, "step": 30635 }, { "epoch": 2.0818045930153555, "grad_norm": 2.7564914226531982, "learning_rate": 7.398678488925126e-05, "loss": 2.873, "step": 30640 }, { "epoch": 2.082144313086017, "grad_norm": 3.3174631595611572, "learning_rate": 7.398253838836798e-05, "loss": 2.9518, "step": 30645 }, { "epoch": 2.0824840331566787, "grad_norm": 2.5635201930999756, "learning_rate": 7.397829188748472e-05, "loss": 3.0272, "step": 30650 }, { "epoch": 2.082823753227341, "grad_norm": 2.0250821113586426, "learning_rate": 7.397404538660144e-05, "loss": 3.0716, "step": 30655 }, { "epoch": 2.0831634732980024, "grad_norm": 2.78653883934021, "learning_rate": 7.396979888571817e-05, "loss": 2.8882, "step": 30660 }, { "epoch": 2.083503193368664, "grad_norm": 2.052046060562134, "learning_rate": 7.39655523848349e-05, "loss": 2.7746, "step": 30665 }, { "epoch": 2.083842913439326, "grad_norm": 2.956955671310425, "learning_rate": 7.396130588395162e-05, "loss": 2.8316, "step": 30670 }, { "epoch": 2.084182633509988, "grad_norm": 2.475355386734009, "learning_rate": 7.395705938306835e-05, "loss": 2.8269, "step": 30675 }, { "epoch": 2.0845223535806494, "grad_norm": 2.204317092895508, "learning_rate": 7.395281288218508e-05, "loss": 2.8544, "step": 30680 }, { "epoch": 2.0848620736513115, "grad_norm": 2.7193329334259033, "learning_rate": 7.394856638130181e-05, "loss": 2.9839, "step": 30685 }, { "epoch": 2.085201793721973, "grad_norm": 2.458470106124878, "learning_rate": 7.394431988041854e-05, "loss": 2.7418, "step": 30690 }, { "epoch": 2.0855415137926347, "grad_norm": 2.152122735977173, "learning_rate": 7.394007337953526e-05, "loss": 2.8788, "step": 30695 }, { "epoch": 2.085881233863297, "grad_norm": 2.4549384117126465, "learning_rate": 7.393582687865199e-05, "loss": 2.6465, "step": 30700 }, { "epoch": 2.0862209539339585, "grad_norm": 2.260570764541626, "learning_rate": 7.393158037776872e-05, "loss": 2.8107, "step": 30705 }, { "epoch": 2.08656067400462, "grad_norm": 2.8719472885131836, "learning_rate": 7.392733387688545e-05, "loss": 2.8522, "step": 30710 }, { "epoch": 2.086900394075282, "grad_norm": 2.5505106449127197, "learning_rate": 7.392308737600218e-05, "loss": 2.7772, "step": 30715 }, { "epoch": 2.087240114145944, "grad_norm": 2.6343767642974854, "learning_rate": 7.391884087511892e-05, "loss": 2.9572, "step": 30720 }, { "epoch": 2.0875798342166054, "grad_norm": 2.281907320022583, "learning_rate": 7.391459437423563e-05, "loss": 3.1174, "step": 30725 }, { "epoch": 2.0879195542872675, "grad_norm": 2.143794298171997, "learning_rate": 7.391034787335236e-05, "loss": 2.707, "step": 30730 }, { "epoch": 2.088259274357929, "grad_norm": 2.4875645637512207, "learning_rate": 7.390610137246909e-05, "loss": 2.6618, "step": 30735 }, { "epoch": 2.0885989944285908, "grad_norm": 2.0516207218170166, "learning_rate": 7.390185487158582e-05, "loss": 2.8297, "step": 30740 }, { "epoch": 2.0889387144992524, "grad_norm": 1.9263273477554321, "learning_rate": 7.389760837070254e-05, "loss": 2.8218, "step": 30745 }, { "epoch": 2.0892784345699145, "grad_norm": 2.2035927772521973, "learning_rate": 7.389336186981927e-05, "loss": 2.9927, "step": 30750 }, { "epoch": 2.089618154640576, "grad_norm": 2.4095373153686523, "learning_rate": 7.3889115368936e-05, "loss": 3.0667, "step": 30755 }, { "epoch": 2.0899578747112377, "grad_norm": 2.3253426551818848, "learning_rate": 7.388486886805273e-05, "loss": 2.7613, "step": 30760 }, { "epoch": 2.0902975947819, "grad_norm": 1.9827263355255127, "learning_rate": 7.388062236716946e-05, "loss": 3.3739, "step": 30765 }, { "epoch": 2.0906373148525614, "grad_norm": 2.064014434814453, "learning_rate": 7.387637586628618e-05, "loss": 2.9371, "step": 30770 }, { "epoch": 2.090977034923223, "grad_norm": 2.722526788711548, "learning_rate": 7.387212936540291e-05, "loss": 2.6101, "step": 30775 }, { "epoch": 2.091316754993885, "grad_norm": 2.3257317543029785, "learning_rate": 7.386788286451964e-05, "loss": 2.8477, "step": 30780 }, { "epoch": 2.0916564750645468, "grad_norm": 2.4590606689453125, "learning_rate": 7.386363636363637e-05, "loss": 2.9414, "step": 30785 }, { "epoch": 2.0919961951352084, "grad_norm": 2.0425572395324707, "learning_rate": 7.38593898627531e-05, "loss": 3.0248, "step": 30790 }, { "epoch": 2.0923359152058705, "grad_norm": 3.1275641918182373, "learning_rate": 7.385514336186982e-05, "loss": 2.9446, "step": 30795 }, { "epoch": 2.092675635276532, "grad_norm": 2.0467655658721924, "learning_rate": 7.385089686098655e-05, "loss": 2.929, "step": 30800 }, { "epoch": 2.0930153553471937, "grad_norm": 3.3775136470794678, "learning_rate": 7.384665036010328e-05, "loss": 2.8984, "step": 30805 }, { "epoch": 2.093355075417856, "grad_norm": 2.711089849472046, "learning_rate": 7.384240385922001e-05, "loss": 2.8151, "step": 30810 }, { "epoch": 2.0936947954885174, "grad_norm": 2.3855321407318115, "learning_rate": 7.383815735833674e-05, "loss": 3.0087, "step": 30815 }, { "epoch": 2.094034515559179, "grad_norm": 2.2837018966674805, "learning_rate": 7.383391085745346e-05, "loss": 2.771, "step": 30820 }, { "epoch": 2.094374235629841, "grad_norm": 2.6461751461029053, "learning_rate": 7.382966435657019e-05, "loss": 2.8718, "step": 30825 }, { "epoch": 2.094713955700503, "grad_norm": 2.696939706802368, "learning_rate": 7.382541785568692e-05, "loss": 2.9514, "step": 30830 }, { "epoch": 2.0950536757711644, "grad_norm": 2.4946305751800537, "learning_rate": 7.382117135480365e-05, "loss": 2.8458, "step": 30835 }, { "epoch": 2.0953933958418265, "grad_norm": 2.12959361076355, "learning_rate": 7.381692485392038e-05, "loss": 2.8818, "step": 30840 }, { "epoch": 2.095733115912488, "grad_norm": 2.7258903980255127, "learning_rate": 7.38126783530371e-05, "loss": 3.0537, "step": 30845 }, { "epoch": 2.0960728359831498, "grad_norm": 2.178759813308716, "learning_rate": 7.380843185215383e-05, "loss": 3.0348, "step": 30850 }, { "epoch": 2.096412556053812, "grad_norm": 2.6325511932373047, "learning_rate": 7.380418535127056e-05, "loss": 2.921, "step": 30855 }, { "epoch": 2.0967522761244735, "grad_norm": 2.504305839538574, "learning_rate": 7.379993885038729e-05, "loss": 2.9904, "step": 30860 }, { "epoch": 2.097091996195135, "grad_norm": 2.5931689739227295, "learning_rate": 7.3795692349504e-05, "loss": 2.6159, "step": 30865 }, { "epoch": 2.097431716265797, "grad_norm": 2.468587875366211, "learning_rate": 7.379144584862074e-05, "loss": 2.9524, "step": 30870 }, { "epoch": 2.097771436336459, "grad_norm": 2.0654757022857666, "learning_rate": 7.378719934773747e-05, "loss": 2.8306, "step": 30875 }, { "epoch": 2.0981111564071204, "grad_norm": 3.0068020820617676, "learning_rate": 7.378295284685419e-05, "loss": 2.7057, "step": 30880 }, { "epoch": 2.0984508764777825, "grad_norm": 2.6942555904388428, "learning_rate": 7.377870634597093e-05, "loss": 2.9817, "step": 30885 }, { "epoch": 2.098790596548444, "grad_norm": 2.3710033893585205, "learning_rate": 7.377445984508766e-05, "loss": 3.0534, "step": 30890 }, { "epoch": 2.0991303166191058, "grad_norm": 2.9666614532470703, "learning_rate": 7.377021334420437e-05, "loss": 2.6432, "step": 30895 }, { "epoch": 2.099470036689768, "grad_norm": 2.49957013130188, "learning_rate": 7.376596684332111e-05, "loss": 2.7922, "step": 30900 }, { "epoch": 2.0998097567604295, "grad_norm": 2.324939727783203, "learning_rate": 7.376172034243784e-05, "loss": 3.2355, "step": 30905 }, { "epoch": 2.100149476831091, "grad_norm": 2.4136805534362793, "learning_rate": 7.375747384155455e-05, "loss": 2.8858, "step": 30910 }, { "epoch": 2.100489196901753, "grad_norm": 2.5704548358917236, "learning_rate": 7.37532273406713e-05, "loss": 2.9921, "step": 30915 }, { "epoch": 2.100828916972415, "grad_norm": 2.5235979557037354, "learning_rate": 7.374898083978802e-05, "loss": 2.6011, "step": 30920 }, { "epoch": 2.1011686370430764, "grad_norm": 3.0002171993255615, "learning_rate": 7.374473433890474e-05, "loss": 3.0421, "step": 30925 }, { "epoch": 2.101508357113738, "grad_norm": 2.244694471359253, "learning_rate": 7.374048783802148e-05, "loss": 2.7911, "step": 30930 }, { "epoch": 2.1018480771844, "grad_norm": 2.864948034286499, "learning_rate": 7.37362413371382e-05, "loss": 2.9958, "step": 30935 }, { "epoch": 2.1021877972550618, "grad_norm": 2.1722474098205566, "learning_rate": 7.373199483625492e-05, "loss": 2.9415, "step": 30940 }, { "epoch": 2.1025275173257234, "grad_norm": 2.5893020629882812, "learning_rate": 7.372774833537166e-05, "loss": 2.4564, "step": 30945 }, { "epoch": 2.1028672373963855, "grad_norm": 2.5131783485412598, "learning_rate": 7.372350183448838e-05, "loss": 2.7157, "step": 30950 }, { "epoch": 2.103206957467047, "grad_norm": 3.518671989440918, "learning_rate": 7.37192553336051e-05, "loss": 2.9077, "step": 30955 }, { "epoch": 2.1035466775377087, "grad_norm": 2.7589869499206543, "learning_rate": 7.371500883272185e-05, "loss": 3.0533, "step": 30960 }, { "epoch": 2.103886397608371, "grad_norm": 2.680380344390869, "learning_rate": 7.371076233183856e-05, "loss": 2.8846, "step": 30965 }, { "epoch": 2.1042261176790324, "grad_norm": 2.487778663635254, "learning_rate": 7.370651583095529e-05, "loss": 2.6657, "step": 30970 }, { "epoch": 2.104565837749694, "grad_norm": 3.4275400638580322, "learning_rate": 7.370226933007203e-05, "loss": 2.7897, "step": 30975 }, { "epoch": 2.104905557820356, "grad_norm": 2.215406656265259, "learning_rate": 7.369802282918875e-05, "loss": 2.751, "step": 30980 }, { "epoch": 2.105245277891018, "grad_norm": 2.3256890773773193, "learning_rate": 7.369377632830547e-05, "loss": 2.7192, "step": 30985 }, { "epoch": 2.1055849979616794, "grad_norm": 2.755117416381836, "learning_rate": 7.368952982742222e-05, "loss": 2.8071, "step": 30990 }, { "epoch": 2.1059247180323415, "grad_norm": 2.254702568054199, "learning_rate": 7.368528332653893e-05, "loss": 3.0328, "step": 30995 }, { "epoch": 2.106264438103003, "grad_norm": 3.071423053741455, "learning_rate": 7.368103682565566e-05, "loss": 3.0569, "step": 31000 }, { "epoch": 2.1066041581736648, "grad_norm": 2.264272451400757, "learning_rate": 7.36767903247724e-05, "loss": 2.8806, "step": 31005 }, { "epoch": 2.106943878244327, "grad_norm": 2.471797466278076, "learning_rate": 7.367254382388911e-05, "loss": 2.7283, "step": 31010 }, { "epoch": 2.1072835983149885, "grad_norm": 2.172548770904541, "learning_rate": 7.366829732300584e-05, "loss": 3.0946, "step": 31015 }, { "epoch": 2.10762331838565, "grad_norm": 2.160358190536499, "learning_rate": 7.366405082212257e-05, "loss": 2.8451, "step": 31020 }, { "epoch": 2.107963038456312, "grad_norm": 2.6872618198394775, "learning_rate": 7.36598043212393e-05, "loss": 2.8363, "step": 31025 }, { "epoch": 2.108302758526974, "grad_norm": 2.148705244064331, "learning_rate": 7.365555782035603e-05, "loss": 2.9905, "step": 31030 }, { "epoch": 2.1086424785976354, "grad_norm": 2.9774627685546875, "learning_rate": 7.365131131947275e-05, "loss": 2.6249, "step": 31035 }, { "epoch": 2.1089821986682975, "grad_norm": 2.3521101474761963, "learning_rate": 7.364706481858948e-05, "loss": 2.8132, "step": 31040 }, { "epoch": 2.109321918738959, "grad_norm": 2.055856227874756, "learning_rate": 7.364281831770621e-05, "loss": 3.051, "step": 31045 }, { "epoch": 2.1096616388096208, "grad_norm": 3.5205814838409424, "learning_rate": 7.363857181682294e-05, "loss": 2.8518, "step": 31050 }, { "epoch": 2.110001358880283, "grad_norm": 2.627393960952759, "learning_rate": 7.363432531593967e-05, "loss": 3.1598, "step": 31055 }, { "epoch": 2.1103410789509445, "grad_norm": 2.066636323928833, "learning_rate": 7.363007881505641e-05, "loss": 2.9144, "step": 31060 }, { "epoch": 2.110680799021606, "grad_norm": 2.600825548171997, "learning_rate": 7.362583231417312e-05, "loss": 2.9868, "step": 31065 }, { "epoch": 2.111020519092268, "grad_norm": 2.3791635036468506, "learning_rate": 7.362158581328985e-05, "loss": 2.8484, "step": 31070 }, { "epoch": 2.11136023916293, "grad_norm": 1.9819400310516357, "learning_rate": 7.361733931240659e-05, "loss": 3.161, "step": 31075 }, { "epoch": 2.1116999592335914, "grad_norm": 2.9163262844085693, "learning_rate": 7.36130928115233e-05, "loss": 2.7878, "step": 31080 }, { "epoch": 2.112039679304253, "grad_norm": 2.9426286220550537, "learning_rate": 7.360884631064003e-05, "loss": 2.8696, "step": 31085 }, { "epoch": 2.112379399374915, "grad_norm": 2.4491126537323, "learning_rate": 7.360459980975676e-05, "loss": 2.7751, "step": 31090 }, { "epoch": 2.1127191194455768, "grad_norm": 2.107301712036133, "learning_rate": 7.360035330887349e-05, "loss": 2.866, "step": 31095 }, { "epoch": 2.1130588395162384, "grad_norm": 1.861428141593933, "learning_rate": 7.359610680799022e-05, "loss": 2.5902, "step": 31100 }, { "epoch": 2.1133985595869005, "grad_norm": 2.854252576828003, "learning_rate": 7.359186030710695e-05, "loss": 3.0074, "step": 31105 }, { "epoch": 2.113738279657562, "grad_norm": 2.785717487335205, "learning_rate": 7.358761380622367e-05, "loss": 3.0784, "step": 31110 }, { "epoch": 2.1140779997282237, "grad_norm": 2.3460628986358643, "learning_rate": 7.35833673053404e-05, "loss": 2.6897, "step": 31115 }, { "epoch": 2.114417719798886, "grad_norm": 2.249157428741455, "learning_rate": 7.357912080445713e-05, "loss": 2.6992, "step": 31120 }, { "epoch": 2.1147574398695475, "grad_norm": 2.5355870723724365, "learning_rate": 7.357487430357386e-05, "loss": 2.9464, "step": 31125 }, { "epoch": 2.115097159940209, "grad_norm": 2.1262025833129883, "learning_rate": 7.357062780269059e-05, "loss": 2.7423, "step": 31130 }, { "epoch": 2.115436880010871, "grad_norm": 2.4036850929260254, "learning_rate": 7.356638130180731e-05, "loss": 2.5303, "step": 31135 }, { "epoch": 2.115776600081533, "grad_norm": 2.6583638191223145, "learning_rate": 7.356213480092404e-05, "loss": 2.6474, "step": 31140 }, { "epoch": 2.1161163201521944, "grad_norm": 2.268475294113159, "learning_rate": 7.355788830004077e-05, "loss": 3.1298, "step": 31145 }, { "epoch": 2.1164560402228565, "grad_norm": 2.237586736679077, "learning_rate": 7.35536417991575e-05, "loss": 3.2857, "step": 31150 }, { "epoch": 2.116795760293518, "grad_norm": 2.364640712738037, "learning_rate": 7.354939529827423e-05, "loss": 2.6831, "step": 31155 }, { "epoch": 2.1171354803641798, "grad_norm": 2.5554301738739014, "learning_rate": 7.354514879739095e-05, "loss": 2.6464, "step": 31160 }, { "epoch": 2.117475200434842, "grad_norm": 2.073047637939453, "learning_rate": 7.354090229650768e-05, "loss": 2.9406, "step": 31165 }, { "epoch": 2.1178149205055035, "grad_norm": 2.79610013961792, "learning_rate": 7.353665579562441e-05, "loss": 2.7716, "step": 31170 }, { "epoch": 2.118154640576165, "grad_norm": 2.3431003093719482, "learning_rate": 7.353240929474114e-05, "loss": 2.9038, "step": 31175 }, { "epoch": 2.118494360646827, "grad_norm": 2.2636406421661377, "learning_rate": 7.352816279385787e-05, "loss": 2.8735, "step": 31180 }, { "epoch": 2.118834080717489, "grad_norm": 2.130570650100708, "learning_rate": 7.35239162929746e-05, "loss": 2.8323, "step": 31185 }, { "epoch": 2.1191738007881504, "grad_norm": 2.365546464920044, "learning_rate": 7.351966979209132e-05, "loss": 2.7843, "step": 31190 }, { "epoch": 2.1195135208588125, "grad_norm": 2.310532808303833, "learning_rate": 7.351542329120805e-05, "loss": 2.6915, "step": 31195 }, { "epoch": 2.119853240929474, "grad_norm": 2.7656478881835938, "learning_rate": 7.351117679032478e-05, "loss": 3.0216, "step": 31200 }, { "epoch": 2.1201929610001358, "grad_norm": 2.587975263595581, "learning_rate": 7.35069302894415e-05, "loss": 3.0772, "step": 31205 }, { "epoch": 2.120532681070798, "grad_norm": 2.652137041091919, "learning_rate": 7.350268378855823e-05, "loss": 2.8592, "step": 31210 }, { "epoch": 2.1208724011414595, "grad_norm": 2.9964892864227295, "learning_rate": 7.349843728767496e-05, "loss": 2.8963, "step": 31215 }, { "epoch": 2.121212121212121, "grad_norm": 3.3836796283721924, "learning_rate": 7.349419078679168e-05, "loss": 2.9061, "step": 31220 }, { "epoch": 2.121551841282783, "grad_norm": 2.8891940116882324, "learning_rate": 7.348994428590842e-05, "loss": 2.774, "step": 31225 }, { "epoch": 2.121891561353445, "grad_norm": 2.2968828678131104, "learning_rate": 7.348569778502515e-05, "loss": 2.8507, "step": 31230 }, { "epoch": 2.1222312814241064, "grad_norm": 2.935595750808716, "learning_rate": 7.348145128414186e-05, "loss": 2.9116, "step": 31235 }, { "epoch": 2.1225710014947685, "grad_norm": 2.4461894035339355, "learning_rate": 7.34772047832586e-05, "loss": 2.7972, "step": 31240 }, { "epoch": 2.12291072156543, "grad_norm": 2.456735372543335, "learning_rate": 7.347295828237533e-05, "loss": 3.0261, "step": 31245 }, { "epoch": 2.123250441636092, "grad_norm": 2.44901704788208, "learning_rate": 7.346871178149205e-05, "loss": 3.0048, "step": 31250 }, { "epoch": 2.123590161706754, "grad_norm": 3.09194278717041, "learning_rate": 7.346446528060879e-05, "loss": 2.9744, "step": 31255 }, { "epoch": 2.1239298817774155, "grad_norm": 2.2403769493103027, "learning_rate": 7.346021877972551e-05, "loss": 2.8826, "step": 31260 }, { "epoch": 2.124269601848077, "grad_norm": 2.9280319213867188, "learning_rate": 7.345597227884223e-05, "loss": 2.593, "step": 31265 }, { "epoch": 2.1246093219187387, "grad_norm": 3.040015935897827, "learning_rate": 7.345172577795897e-05, "loss": 2.7814, "step": 31270 }, { "epoch": 2.124949041989401, "grad_norm": 2.68477463722229, "learning_rate": 7.34474792770757e-05, "loss": 2.6031, "step": 31275 }, { "epoch": 2.1252887620600625, "grad_norm": 2.556406259536743, "learning_rate": 7.344323277619241e-05, "loss": 2.9355, "step": 31280 }, { "epoch": 2.125628482130724, "grad_norm": 2.5404229164123535, "learning_rate": 7.343898627530915e-05, "loss": 2.8651, "step": 31285 }, { "epoch": 2.125968202201386, "grad_norm": 2.7011654376983643, "learning_rate": 7.343473977442587e-05, "loss": 2.837, "step": 31290 }, { "epoch": 2.126307922272048, "grad_norm": 2.327671527862549, "learning_rate": 7.34304932735426e-05, "loss": 2.524, "step": 31295 }, { "epoch": 2.1266476423427094, "grad_norm": 2.9154348373413086, "learning_rate": 7.342624677265934e-05, "loss": 2.6966, "step": 31300 }, { "epoch": 2.1269873624133715, "grad_norm": 2.1959869861602783, "learning_rate": 7.342200027177605e-05, "loss": 2.6832, "step": 31305 }, { "epoch": 2.127327082484033, "grad_norm": 2.181729555130005, "learning_rate": 7.341775377089278e-05, "loss": 2.7116, "step": 31310 }, { "epoch": 2.1276668025546948, "grad_norm": 2.510688543319702, "learning_rate": 7.341350727000952e-05, "loss": 2.8046, "step": 31315 }, { "epoch": 2.128006522625357, "grad_norm": 2.6294424533843994, "learning_rate": 7.340926076912624e-05, "loss": 2.7417, "step": 31320 }, { "epoch": 2.1283462426960185, "grad_norm": 2.638599157333374, "learning_rate": 7.340501426824297e-05, "loss": 2.7655, "step": 31325 }, { "epoch": 2.12868596276668, "grad_norm": 2.7706165313720703, "learning_rate": 7.340076776735971e-05, "loss": 2.827, "step": 31330 }, { "epoch": 2.129025682837342, "grad_norm": 2.107454776763916, "learning_rate": 7.339652126647642e-05, "loss": 3.04, "step": 31335 }, { "epoch": 2.129365402908004, "grad_norm": 2.6518778800964355, "learning_rate": 7.339227476559315e-05, "loss": 2.8692, "step": 31340 }, { "epoch": 2.1297051229786654, "grad_norm": 2.7663064002990723, "learning_rate": 7.338802826470989e-05, "loss": 2.9203, "step": 31345 }, { "epoch": 2.1300448430493275, "grad_norm": 2.4914000034332275, "learning_rate": 7.33837817638266e-05, "loss": 3.0337, "step": 31350 }, { "epoch": 2.130384563119989, "grad_norm": 2.7287967205047607, "learning_rate": 7.337953526294333e-05, "loss": 2.8438, "step": 31355 }, { "epoch": 2.1307242831906508, "grad_norm": 2.4823944568634033, "learning_rate": 7.337528876206006e-05, "loss": 2.7058, "step": 31360 }, { "epoch": 2.131064003261313, "grad_norm": 1.9221601486206055, "learning_rate": 7.337104226117679e-05, "loss": 2.8376, "step": 31365 }, { "epoch": 2.1314037233319745, "grad_norm": 2.1335904598236084, "learning_rate": 7.336679576029352e-05, "loss": 2.8827, "step": 31370 }, { "epoch": 2.131743443402636, "grad_norm": 2.6391942501068115, "learning_rate": 7.336254925941025e-05, "loss": 2.7885, "step": 31375 }, { "epoch": 2.132083163473298, "grad_norm": 3.6247646808624268, "learning_rate": 7.335830275852697e-05, "loss": 2.8082, "step": 31380 }, { "epoch": 2.13242288354396, "grad_norm": 2.277064085006714, "learning_rate": 7.33540562576437e-05, "loss": 2.9635, "step": 31385 }, { "epoch": 2.1327626036146214, "grad_norm": 2.869926691055298, "learning_rate": 7.334980975676043e-05, "loss": 3.0227, "step": 31390 }, { "epoch": 2.1331023236852835, "grad_norm": 2.9172587394714355, "learning_rate": 7.334556325587716e-05, "loss": 2.6014, "step": 31395 }, { "epoch": 2.133442043755945, "grad_norm": 2.7994954586029053, "learning_rate": 7.33413167549939e-05, "loss": 2.8752, "step": 31400 }, { "epoch": 2.133781763826607, "grad_norm": 2.3583290576934814, "learning_rate": 7.333707025411061e-05, "loss": 2.7177, "step": 31405 }, { "epoch": 2.134121483897269, "grad_norm": 2.1733558177948, "learning_rate": 7.333282375322734e-05, "loss": 2.9815, "step": 31410 }, { "epoch": 2.1344612039679305, "grad_norm": 3.3743643760681152, "learning_rate": 7.332857725234408e-05, "loss": 2.8485, "step": 31415 }, { "epoch": 2.134800924038592, "grad_norm": 2.5239744186401367, "learning_rate": 7.33243307514608e-05, "loss": 2.9612, "step": 31420 }, { "epoch": 2.1351406441092537, "grad_norm": 2.759899616241455, "learning_rate": 7.332008425057753e-05, "loss": 2.8794, "step": 31425 }, { "epoch": 2.135480364179916, "grad_norm": 2.9350452423095703, "learning_rate": 7.331583774969427e-05, "loss": 2.9479, "step": 31430 }, { "epoch": 2.1358200842505775, "grad_norm": 3.629171848297119, "learning_rate": 7.331159124881098e-05, "loss": 2.7938, "step": 31435 }, { "epoch": 2.136159804321239, "grad_norm": 2.5048296451568604, "learning_rate": 7.330734474792771e-05, "loss": 2.7417, "step": 31440 }, { "epoch": 2.136499524391901, "grad_norm": 2.308688163757324, "learning_rate": 7.330309824704444e-05, "loss": 2.6334, "step": 31445 }, { "epoch": 2.136839244462563, "grad_norm": 2.3188395500183105, "learning_rate": 7.329885174616117e-05, "loss": 2.8212, "step": 31450 }, { "epoch": 2.1371789645332244, "grad_norm": 2.6362268924713135, "learning_rate": 7.32946052452779e-05, "loss": 2.8074, "step": 31455 }, { "epoch": 2.1375186846038865, "grad_norm": 2.487534284591675, "learning_rate": 7.329035874439462e-05, "loss": 2.8061, "step": 31460 }, { "epoch": 2.137858404674548, "grad_norm": 2.2327656745910645, "learning_rate": 7.328611224351135e-05, "loss": 2.7227, "step": 31465 }, { "epoch": 2.1381981247452098, "grad_norm": 2.356982707977295, "learning_rate": 7.328186574262808e-05, "loss": 2.513, "step": 31470 }, { "epoch": 2.138537844815872, "grad_norm": 2.3870913982391357, "learning_rate": 7.32776192417448e-05, "loss": 2.7243, "step": 31475 }, { "epoch": 2.1388775648865335, "grad_norm": 2.556197166442871, "learning_rate": 7.327337274086153e-05, "loss": 2.731, "step": 31480 }, { "epoch": 2.139217284957195, "grad_norm": 2.594111919403076, "learning_rate": 7.326912623997826e-05, "loss": 2.9534, "step": 31485 }, { "epoch": 2.139557005027857, "grad_norm": 3.1718010902404785, "learning_rate": 7.326487973909499e-05, "loss": 2.9768, "step": 31490 }, { "epoch": 2.139896725098519, "grad_norm": 3.1553213596343994, "learning_rate": 7.326063323821172e-05, "loss": 2.6472, "step": 31495 }, { "epoch": 2.1402364451691804, "grad_norm": 2.1870553493499756, "learning_rate": 7.325638673732845e-05, "loss": 2.732, "step": 31500 }, { "epoch": 2.1405761652398425, "grad_norm": 2.666288375854492, "learning_rate": 7.325214023644517e-05, "loss": 2.7383, "step": 31505 }, { "epoch": 2.140915885310504, "grad_norm": 3.1232471466064453, "learning_rate": 7.32478937355619e-05, "loss": 2.8459, "step": 31510 }, { "epoch": 2.1412556053811658, "grad_norm": 2.730386972427368, "learning_rate": 7.324364723467863e-05, "loss": 2.874, "step": 31515 }, { "epoch": 2.141595325451828, "grad_norm": 2.8086507320404053, "learning_rate": 7.323940073379536e-05, "loss": 2.7177, "step": 31520 }, { "epoch": 2.1419350455224895, "grad_norm": 2.5212855339050293, "learning_rate": 7.323515423291209e-05, "loss": 2.5837, "step": 31525 }, { "epoch": 2.142274765593151, "grad_norm": 2.8517391681671143, "learning_rate": 7.323090773202881e-05, "loss": 2.9606, "step": 31530 }, { "epoch": 2.142614485663813, "grad_norm": 2.2224719524383545, "learning_rate": 7.322666123114554e-05, "loss": 2.8309, "step": 31535 }, { "epoch": 2.142954205734475, "grad_norm": 2.0225741863250732, "learning_rate": 7.322241473026227e-05, "loss": 2.8789, "step": 31540 }, { "epoch": 2.1432939258051364, "grad_norm": 2.3641550540924072, "learning_rate": 7.3218168229379e-05, "loss": 2.7967, "step": 31545 }, { "epoch": 2.1436336458757985, "grad_norm": 2.0497488975524902, "learning_rate": 7.321392172849573e-05, "loss": 3.2184, "step": 31550 }, { "epoch": 2.14397336594646, "grad_norm": 3.472209930419922, "learning_rate": 7.320967522761245e-05, "loss": 2.9774, "step": 31555 }, { "epoch": 2.144313086017122, "grad_norm": 2.7638535499572754, "learning_rate": 7.320542872672918e-05, "loss": 3.0032, "step": 31560 }, { "epoch": 2.144652806087784, "grad_norm": 3.0521039962768555, "learning_rate": 7.320118222584591e-05, "loss": 2.804, "step": 31565 }, { "epoch": 2.1449925261584455, "grad_norm": 2.3021106719970703, "learning_rate": 7.319693572496264e-05, "loss": 2.8053, "step": 31570 }, { "epoch": 2.145332246229107, "grad_norm": 2.5082180500030518, "learning_rate": 7.319268922407935e-05, "loss": 2.8664, "step": 31575 }, { "epoch": 2.145671966299769, "grad_norm": 2.75583815574646, "learning_rate": 7.31884427231961e-05, "loss": 2.7824, "step": 31580 }, { "epoch": 2.146011686370431, "grad_norm": 2.6617960929870605, "learning_rate": 7.318419622231282e-05, "loss": 2.9111, "step": 31585 }, { "epoch": 2.1463514064410925, "grad_norm": 2.7381837368011475, "learning_rate": 7.317994972142954e-05, "loss": 2.8285, "step": 31590 }, { "epoch": 2.1466911265117545, "grad_norm": 3.1180145740509033, "learning_rate": 7.317570322054628e-05, "loss": 2.7461, "step": 31595 }, { "epoch": 2.147030846582416, "grad_norm": 2.2978696823120117, "learning_rate": 7.3171456719663e-05, "loss": 2.8203, "step": 31600 }, { "epoch": 2.147370566653078, "grad_norm": 2.9238462448120117, "learning_rate": 7.316721021877972e-05, "loss": 2.5553, "step": 31605 }, { "epoch": 2.14771028672374, "grad_norm": 2.5886151790618896, "learning_rate": 7.316296371789646e-05, "loss": 2.7367, "step": 31610 }, { "epoch": 2.1480500067944015, "grad_norm": 2.4001543521881104, "learning_rate": 7.315871721701319e-05, "loss": 2.9889, "step": 31615 }, { "epoch": 2.148389726865063, "grad_norm": 2.7419893741607666, "learning_rate": 7.31544707161299e-05, "loss": 2.7599, "step": 31620 }, { "epoch": 2.1487294469357248, "grad_norm": 4.007094383239746, "learning_rate": 7.315022421524665e-05, "loss": 2.9481, "step": 31625 }, { "epoch": 2.149069167006387, "grad_norm": 2.578610420227051, "learning_rate": 7.314597771436337e-05, "loss": 2.8995, "step": 31630 }, { "epoch": 2.1494088870770485, "grad_norm": 2.7997400760650635, "learning_rate": 7.314173121348009e-05, "loss": 2.7231, "step": 31635 }, { "epoch": 2.14974860714771, "grad_norm": 3.1252384185791016, "learning_rate": 7.313748471259683e-05, "loss": 2.9679, "step": 31640 }, { "epoch": 2.150088327218372, "grad_norm": 3.155029058456421, "learning_rate": 7.313323821171354e-05, "loss": 2.864, "step": 31645 }, { "epoch": 2.150428047289034, "grad_norm": 2.583678960800171, "learning_rate": 7.312899171083027e-05, "loss": 3.1305, "step": 31650 }, { "epoch": 2.1507677673596954, "grad_norm": 2.233858585357666, "learning_rate": 7.312474520994701e-05, "loss": 2.9823, "step": 31655 }, { "epoch": 2.1511074874303575, "grad_norm": 2.1236205101013184, "learning_rate": 7.312049870906373e-05, "loss": 2.9251, "step": 31660 }, { "epoch": 2.151447207501019, "grad_norm": 2.290729284286499, "learning_rate": 7.311625220818046e-05, "loss": 3.0322, "step": 31665 }, { "epoch": 2.1517869275716808, "grad_norm": 2.456944227218628, "learning_rate": 7.31120057072972e-05, "loss": 2.9416, "step": 31670 }, { "epoch": 2.152126647642343, "grad_norm": 2.4273998737335205, "learning_rate": 7.310775920641391e-05, "loss": 2.9172, "step": 31675 }, { "epoch": 2.1524663677130045, "grad_norm": 2.6144332885742188, "learning_rate": 7.310351270553064e-05, "loss": 2.7795, "step": 31680 }, { "epoch": 2.152806087783666, "grad_norm": 2.943060874938965, "learning_rate": 7.309926620464738e-05, "loss": 2.7928, "step": 31685 }, { "epoch": 2.153145807854328, "grad_norm": 2.7722623348236084, "learning_rate": 7.30950197037641e-05, "loss": 3.0052, "step": 31690 }, { "epoch": 2.15348552792499, "grad_norm": 2.389148473739624, "learning_rate": 7.309077320288082e-05, "loss": 2.611, "step": 31695 }, { "epoch": 2.1538252479956514, "grad_norm": 2.327939510345459, "learning_rate": 7.308652670199757e-05, "loss": 2.917, "step": 31700 }, { "epoch": 2.1541649680663135, "grad_norm": 3.5726497173309326, "learning_rate": 7.308228020111428e-05, "loss": 2.7951, "step": 31705 }, { "epoch": 2.154504688136975, "grad_norm": 2.775653600692749, "learning_rate": 7.307803370023101e-05, "loss": 2.8961, "step": 31710 }, { "epoch": 2.154844408207637, "grad_norm": 1.9740110635757446, "learning_rate": 7.307378719934774e-05, "loss": 2.7459, "step": 31715 }, { "epoch": 2.155184128278299, "grad_norm": 2.7524194717407227, "learning_rate": 7.306954069846446e-05, "loss": 2.7924, "step": 31720 }, { "epoch": 2.1555238483489605, "grad_norm": 2.367311954498291, "learning_rate": 7.306529419758119e-05, "loss": 2.823, "step": 31725 }, { "epoch": 2.155863568419622, "grad_norm": 2.279973030090332, "learning_rate": 7.306104769669792e-05, "loss": 2.9015, "step": 31730 }, { "epoch": 2.156203288490284, "grad_norm": 2.978196859359741, "learning_rate": 7.305680119581465e-05, "loss": 2.5979, "step": 31735 }, { "epoch": 2.156543008560946, "grad_norm": 2.662166118621826, "learning_rate": 7.305255469493139e-05, "loss": 3.0308, "step": 31740 }, { "epoch": 2.1568827286316075, "grad_norm": 2.5069267749786377, "learning_rate": 7.30483081940481e-05, "loss": 2.8156, "step": 31745 }, { "epoch": 2.1572224487022695, "grad_norm": 2.4225502014160156, "learning_rate": 7.304406169316483e-05, "loss": 2.6263, "step": 31750 }, { "epoch": 2.157562168772931, "grad_norm": 2.6255383491516113, "learning_rate": 7.303981519228157e-05, "loss": 2.816, "step": 31755 }, { "epoch": 2.157901888843593, "grad_norm": 2.365485668182373, "learning_rate": 7.303556869139829e-05, "loss": 2.7387, "step": 31760 }, { "epoch": 2.1582416089142544, "grad_norm": 3.111435890197754, "learning_rate": 7.303132219051502e-05, "loss": 2.7663, "step": 31765 }, { "epoch": 2.1585813289849165, "grad_norm": 2.511577844619751, "learning_rate": 7.302707568963176e-05, "loss": 2.8971, "step": 31770 }, { "epoch": 2.158921049055578, "grad_norm": 2.7931160926818848, "learning_rate": 7.302282918874847e-05, "loss": 2.5632, "step": 31775 }, { "epoch": 2.1592607691262398, "grad_norm": 2.498915910720825, "learning_rate": 7.30185826878652e-05, "loss": 2.5629, "step": 31780 }, { "epoch": 2.159600489196902, "grad_norm": 2.30410099029541, "learning_rate": 7.301433618698194e-05, "loss": 2.8038, "step": 31785 }, { "epoch": 2.1599402092675635, "grad_norm": 2.586737871170044, "learning_rate": 7.301008968609866e-05, "loss": 3.0827, "step": 31790 }, { "epoch": 2.160279929338225, "grad_norm": 2.6262729167938232, "learning_rate": 7.300584318521538e-05, "loss": 2.9232, "step": 31795 }, { "epoch": 2.160619649408887, "grad_norm": 2.411555290222168, "learning_rate": 7.300159668433211e-05, "loss": 2.7063, "step": 31800 }, { "epoch": 2.160959369479549, "grad_norm": 3.18284273147583, "learning_rate": 7.299735018344884e-05, "loss": 3.0274, "step": 31805 }, { "epoch": 2.1612990895502104, "grad_norm": 2.4379079341888428, "learning_rate": 7.299310368256557e-05, "loss": 3.1373, "step": 31810 }, { "epoch": 2.1616388096208725, "grad_norm": 2.1181862354278564, "learning_rate": 7.29888571816823e-05, "loss": 3.0131, "step": 31815 }, { "epoch": 2.161978529691534, "grad_norm": 2.228039026260376, "learning_rate": 7.298461068079902e-05, "loss": 2.7808, "step": 31820 }, { "epoch": 2.1623182497621958, "grad_norm": 2.5265860557556152, "learning_rate": 7.298036417991575e-05, "loss": 3.0269, "step": 31825 }, { "epoch": 2.162657969832858, "grad_norm": 2.642199754714966, "learning_rate": 7.297611767903248e-05, "loss": 2.7669, "step": 31830 }, { "epoch": 2.1629976899035195, "grad_norm": 2.636336326599121, "learning_rate": 7.297187117814921e-05, "loss": 2.8599, "step": 31835 }, { "epoch": 2.163337409974181, "grad_norm": 2.2358405590057373, "learning_rate": 7.296762467726594e-05, "loss": 3.0448, "step": 31840 }, { "epoch": 2.163677130044843, "grad_norm": 2.5056679248809814, "learning_rate": 7.296337817638266e-05, "loss": 3.0527, "step": 31845 }, { "epoch": 2.164016850115505, "grad_norm": 3.2500596046447754, "learning_rate": 7.295913167549939e-05, "loss": 2.7133, "step": 31850 }, { "epoch": 2.1643565701861665, "grad_norm": 1.9268250465393066, "learning_rate": 7.295488517461612e-05, "loss": 2.9751, "step": 31855 }, { "epoch": 2.1646962902568285, "grad_norm": 2.3511154651641846, "learning_rate": 7.295063867373285e-05, "loss": 2.9204, "step": 31860 }, { "epoch": 2.16503601032749, "grad_norm": 2.209137201309204, "learning_rate": 7.294639217284958e-05, "loss": 2.9173, "step": 31865 }, { "epoch": 2.165375730398152, "grad_norm": 2.5032577514648438, "learning_rate": 7.29421456719663e-05, "loss": 2.9606, "step": 31870 }, { "epoch": 2.165715450468814, "grad_norm": 2.4951276779174805, "learning_rate": 7.293789917108303e-05, "loss": 2.7491, "step": 31875 }, { "epoch": 2.1660551705394755, "grad_norm": 2.795292854309082, "learning_rate": 7.293365267019976e-05, "loss": 2.6247, "step": 31880 }, { "epoch": 2.166394890610137, "grad_norm": 2.178217887878418, "learning_rate": 7.292940616931649e-05, "loss": 2.851, "step": 31885 }, { "epoch": 2.166734610680799, "grad_norm": 1.982544183731079, "learning_rate": 7.292515966843322e-05, "loss": 2.8331, "step": 31890 }, { "epoch": 2.167074330751461, "grad_norm": 2.747790575027466, "learning_rate": 7.292091316754994e-05, "loss": 2.7614, "step": 31895 }, { "epoch": 2.1674140508221225, "grad_norm": 2.6290438175201416, "learning_rate": 7.291666666666667e-05, "loss": 2.8658, "step": 31900 }, { "epoch": 2.1677537708927845, "grad_norm": 2.4196720123291016, "learning_rate": 7.29124201657834e-05, "loss": 2.9558, "step": 31905 }, { "epoch": 2.168093490963446, "grad_norm": 2.5483410358428955, "learning_rate": 7.290817366490013e-05, "loss": 2.9037, "step": 31910 }, { "epoch": 2.168433211034108, "grad_norm": 3.6036500930786133, "learning_rate": 7.290392716401684e-05, "loss": 2.9248, "step": 31915 }, { "epoch": 2.16877293110477, "grad_norm": 2.6482503414154053, "learning_rate": 7.289968066313358e-05, "loss": 2.7477, "step": 31920 }, { "epoch": 2.1691126511754315, "grad_norm": 2.3495898246765137, "learning_rate": 7.289543416225031e-05, "loss": 2.7447, "step": 31925 }, { "epoch": 2.169452371246093, "grad_norm": 2.259047031402588, "learning_rate": 7.289118766136703e-05, "loss": 2.6077, "step": 31930 }, { "epoch": 2.169792091316755, "grad_norm": 2.268988847732544, "learning_rate": 7.288694116048377e-05, "loss": 2.9224, "step": 31935 }, { "epoch": 2.170131811387417, "grad_norm": 2.582597255706787, "learning_rate": 7.28826946596005e-05, "loss": 3.0393, "step": 31940 }, { "epoch": 2.1704715314580785, "grad_norm": 2.538836717605591, "learning_rate": 7.287844815871721e-05, "loss": 2.8997, "step": 31945 }, { "epoch": 2.1708112515287405, "grad_norm": 2.57734751701355, "learning_rate": 7.287420165783395e-05, "loss": 2.7832, "step": 31950 }, { "epoch": 2.171150971599402, "grad_norm": 2.3304271697998047, "learning_rate": 7.286995515695068e-05, "loss": 3.0135, "step": 31955 }, { "epoch": 2.171490691670064, "grad_norm": 2.2500483989715576, "learning_rate": 7.28657086560674e-05, "loss": 2.8586, "step": 31960 }, { "epoch": 2.1718304117407254, "grad_norm": 2.5006260871887207, "learning_rate": 7.286146215518414e-05, "loss": 2.9862, "step": 31965 }, { "epoch": 2.1721701318113875, "grad_norm": 2.5351569652557373, "learning_rate": 7.285721565430086e-05, "loss": 2.9213, "step": 31970 }, { "epoch": 2.172509851882049, "grad_norm": 3.245718240737915, "learning_rate": 7.285296915341758e-05, "loss": 2.7387, "step": 31975 }, { "epoch": 2.172849571952711, "grad_norm": 2.6623175144195557, "learning_rate": 7.284872265253432e-05, "loss": 2.7355, "step": 31980 }, { "epoch": 2.173189292023373, "grad_norm": 3.485614776611328, "learning_rate": 7.284447615165105e-05, "loss": 2.878, "step": 31985 }, { "epoch": 2.1735290120940345, "grad_norm": 2.635676622390747, "learning_rate": 7.284022965076776e-05, "loss": 2.7834, "step": 31990 }, { "epoch": 2.173868732164696, "grad_norm": 2.62349009513855, "learning_rate": 7.28359831498845e-05, "loss": 2.7872, "step": 31995 }, { "epoch": 2.174208452235358, "grad_norm": 2.5205256938934326, "learning_rate": 7.283173664900122e-05, "loss": 2.7985, "step": 32000 }, { "epoch": 2.17454817230602, "grad_norm": 3.6266956329345703, "learning_rate": 7.282749014811795e-05, "loss": 3.2532, "step": 32005 }, { "epoch": 2.1748878923766815, "grad_norm": 2.609475612640381, "learning_rate": 7.282324364723469e-05, "loss": 2.8743, "step": 32010 }, { "epoch": 2.1752276124473435, "grad_norm": 1.9756784439086914, "learning_rate": 7.28189971463514e-05, "loss": 2.8504, "step": 32015 }, { "epoch": 2.175567332518005, "grad_norm": 2.6859335899353027, "learning_rate": 7.281475064546813e-05, "loss": 2.7201, "step": 32020 }, { "epoch": 2.175907052588667, "grad_norm": 2.6270711421966553, "learning_rate": 7.281050414458487e-05, "loss": 2.9089, "step": 32025 }, { "epoch": 2.176246772659329, "grad_norm": 2.4383959770202637, "learning_rate": 7.280625764370159e-05, "loss": 2.9499, "step": 32030 }, { "epoch": 2.1765864927299905, "grad_norm": 3.0225062370300293, "learning_rate": 7.280201114281832e-05, "loss": 2.9942, "step": 32035 }, { "epoch": 2.176926212800652, "grad_norm": 1.931923270225525, "learning_rate": 7.279776464193506e-05, "loss": 2.71, "step": 32040 }, { "epoch": 2.177265932871314, "grad_norm": 2.019214391708374, "learning_rate": 7.279351814105177e-05, "loss": 2.9714, "step": 32045 }, { "epoch": 2.177605652941976, "grad_norm": 2.7808187007904053, "learning_rate": 7.27892716401685e-05, "loss": 2.7648, "step": 32050 }, { "epoch": 2.1779453730126375, "grad_norm": 2.8006744384765625, "learning_rate": 7.278502513928524e-05, "loss": 2.8812, "step": 32055 }, { "epoch": 2.1782850930832995, "grad_norm": 2.2327897548675537, "learning_rate": 7.278077863840196e-05, "loss": 2.9042, "step": 32060 }, { "epoch": 2.178624813153961, "grad_norm": 2.820535898208618, "learning_rate": 7.277653213751868e-05, "loss": 2.9931, "step": 32065 }, { "epoch": 2.178964533224623, "grad_norm": 2.4196078777313232, "learning_rate": 7.277228563663541e-05, "loss": 3.0074, "step": 32070 }, { "epoch": 2.179304253295285, "grad_norm": 2.7761640548706055, "learning_rate": 7.276803913575214e-05, "loss": 2.8628, "step": 32075 }, { "epoch": 2.1796439733659465, "grad_norm": 2.750659942626953, "learning_rate": 7.276379263486888e-05, "loss": 2.8649, "step": 32080 }, { "epoch": 2.179983693436608, "grad_norm": 2.17332124710083, "learning_rate": 7.27595461339856e-05, "loss": 3.2372, "step": 32085 }, { "epoch": 2.18032341350727, "grad_norm": 2.285783529281616, "learning_rate": 7.275529963310232e-05, "loss": 2.5827, "step": 32090 }, { "epoch": 2.180663133577932, "grad_norm": 2.761211395263672, "learning_rate": 7.275105313221907e-05, "loss": 2.9476, "step": 32095 }, { "epoch": 2.1810028536485935, "grad_norm": 2.1492533683776855, "learning_rate": 7.274680663133578e-05, "loss": 2.9212, "step": 32100 }, { "epoch": 2.181342573719255, "grad_norm": 2.6279208660125732, "learning_rate": 7.274256013045251e-05, "loss": 2.8142, "step": 32105 }, { "epoch": 2.181682293789917, "grad_norm": 2.5408644676208496, "learning_rate": 7.273831362956925e-05, "loss": 2.8058, "step": 32110 }, { "epoch": 2.182022013860579, "grad_norm": 3.28847336769104, "learning_rate": 7.273406712868596e-05, "loss": 2.8023, "step": 32115 }, { "epoch": 2.1823617339312404, "grad_norm": 3.030602216720581, "learning_rate": 7.272982062780269e-05, "loss": 2.8996, "step": 32120 }, { "epoch": 2.1827014540019025, "grad_norm": 2.9742941856384277, "learning_rate": 7.272557412691943e-05, "loss": 3.0448, "step": 32125 }, { "epoch": 2.183041174072564, "grad_norm": 2.154947280883789, "learning_rate": 7.272132762603615e-05, "loss": 2.8417, "step": 32130 }, { "epoch": 2.183380894143226, "grad_norm": 3.4010002613067627, "learning_rate": 7.271708112515288e-05, "loss": 2.9116, "step": 32135 }, { "epoch": 2.183720614213888, "grad_norm": 2.4265589714050293, "learning_rate": 7.27128346242696e-05, "loss": 2.7856, "step": 32140 }, { "epoch": 2.1840603342845495, "grad_norm": 2.6139602661132812, "learning_rate": 7.270858812338633e-05, "loss": 2.7635, "step": 32145 }, { "epoch": 2.184400054355211, "grad_norm": 2.5564332008361816, "learning_rate": 7.270434162250306e-05, "loss": 2.85, "step": 32150 }, { "epoch": 2.184739774425873, "grad_norm": 2.6383421421051025, "learning_rate": 7.270009512161979e-05, "loss": 2.5879, "step": 32155 }, { "epoch": 2.185079494496535, "grad_norm": 2.366234302520752, "learning_rate": 7.269584862073652e-05, "loss": 2.8338, "step": 32160 }, { "epoch": 2.1854192145671965, "grad_norm": 2.585604429244995, "learning_rate": 7.269160211985324e-05, "loss": 2.8866, "step": 32165 }, { "epoch": 2.1857589346378585, "grad_norm": 2.5743491649627686, "learning_rate": 7.268735561896997e-05, "loss": 2.9296, "step": 32170 }, { "epoch": 2.18609865470852, "grad_norm": 2.985650062561035, "learning_rate": 7.26831091180867e-05, "loss": 2.7078, "step": 32175 }, { "epoch": 2.186438374779182, "grad_norm": 2.474036455154419, "learning_rate": 7.267886261720343e-05, "loss": 2.9389, "step": 32180 }, { "epoch": 2.186778094849844, "grad_norm": 2.665015935897827, "learning_rate": 7.267461611632016e-05, "loss": 2.96, "step": 32185 }, { "epoch": 2.1871178149205055, "grad_norm": 2.628732204437256, "learning_rate": 7.267036961543688e-05, "loss": 3.1032, "step": 32190 }, { "epoch": 2.187457534991167, "grad_norm": 2.326331615447998, "learning_rate": 7.266612311455361e-05, "loss": 3.0261, "step": 32195 }, { "epoch": 2.187797255061829, "grad_norm": 2.705033779144287, "learning_rate": 7.266187661367034e-05, "loss": 2.6146, "step": 32200 }, { "epoch": 2.188136975132491, "grad_norm": 2.401536464691162, "learning_rate": 7.265763011278707e-05, "loss": 2.7942, "step": 32205 }, { "epoch": 2.1884766952031525, "grad_norm": 2.5899674892425537, "learning_rate": 7.26533836119038e-05, "loss": 3.0034, "step": 32210 }, { "epoch": 2.1888164152738145, "grad_norm": 2.6663405895233154, "learning_rate": 7.264913711102052e-05, "loss": 2.9185, "step": 32215 }, { "epoch": 2.189156135344476, "grad_norm": 2.868541955947876, "learning_rate": 7.264489061013725e-05, "loss": 2.8926, "step": 32220 }, { "epoch": 2.189495855415138, "grad_norm": 2.244673728942871, "learning_rate": 7.264064410925398e-05, "loss": 2.6419, "step": 32225 }, { "epoch": 2.1898355754858, "grad_norm": 2.707160472869873, "learning_rate": 7.263639760837071e-05, "loss": 2.9722, "step": 32230 }, { "epoch": 2.1901752955564615, "grad_norm": 2.7686007022857666, "learning_rate": 7.263215110748744e-05, "loss": 3.0309, "step": 32235 }, { "epoch": 2.190515015627123, "grad_norm": 2.503296136856079, "learning_rate": 7.262790460660416e-05, "loss": 3.0139, "step": 32240 }, { "epoch": 2.190854735697785, "grad_norm": 2.2878918647766113, "learning_rate": 7.262365810572089e-05, "loss": 2.6227, "step": 32245 }, { "epoch": 2.191194455768447, "grad_norm": 2.203489065170288, "learning_rate": 7.261941160483762e-05, "loss": 2.7983, "step": 32250 }, { "epoch": 2.1915341758391085, "grad_norm": 2.38346004486084, "learning_rate": 7.261516510395435e-05, "loss": 2.8158, "step": 32255 }, { "epoch": 2.1918738959097706, "grad_norm": 2.3112001419067383, "learning_rate": 7.261091860307108e-05, "loss": 2.7889, "step": 32260 }, { "epoch": 2.192213615980432, "grad_norm": 2.7322516441345215, "learning_rate": 7.26066721021878e-05, "loss": 3.0088, "step": 32265 }, { "epoch": 2.192553336051094, "grad_norm": 2.709104299545288, "learning_rate": 7.260242560130452e-05, "loss": 2.8148, "step": 32270 }, { "epoch": 2.192893056121756, "grad_norm": 2.5044991970062256, "learning_rate": 7.259817910042126e-05, "loss": 2.9297, "step": 32275 }, { "epoch": 2.1932327761924175, "grad_norm": 3.2808268070220947, "learning_rate": 7.259393259953799e-05, "loss": 2.8997, "step": 32280 }, { "epoch": 2.193572496263079, "grad_norm": 2.535069465637207, "learning_rate": 7.25896860986547e-05, "loss": 2.4536, "step": 32285 }, { "epoch": 2.1939122163337412, "grad_norm": 2.559718608856201, "learning_rate": 7.258543959777144e-05, "loss": 2.8679, "step": 32290 }, { "epoch": 2.194251936404403, "grad_norm": 2.8825697898864746, "learning_rate": 7.258119309688817e-05, "loss": 2.9033, "step": 32295 }, { "epoch": 2.1945916564750645, "grad_norm": 2.5118119716644287, "learning_rate": 7.257694659600489e-05, "loss": 2.8238, "step": 32300 }, { "epoch": 2.194931376545726, "grad_norm": 2.6427652835845947, "learning_rate": 7.257270009512163e-05, "loss": 2.8466, "step": 32305 }, { "epoch": 2.195271096616388, "grad_norm": 3.8258843421936035, "learning_rate": 7.256845359423836e-05, "loss": 2.7912, "step": 32310 }, { "epoch": 2.19561081668705, "grad_norm": 2.6479482650756836, "learning_rate": 7.256420709335507e-05, "loss": 2.6662, "step": 32315 }, { "epoch": 2.1959505367577115, "grad_norm": 2.2168033123016357, "learning_rate": 7.255996059247181e-05, "loss": 2.4222, "step": 32320 }, { "epoch": 2.1962902568283735, "grad_norm": 2.3843162059783936, "learning_rate": 7.255571409158854e-05, "loss": 2.9481, "step": 32325 }, { "epoch": 2.196629976899035, "grad_norm": 2.367833137512207, "learning_rate": 7.255146759070525e-05, "loss": 2.7758, "step": 32330 }, { "epoch": 2.196969696969697, "grad_norm": 2.869832992553711, "learning_rate": 7.2547221089822e-05, "loss": 2.8787, "step": 32335 }, { "epoch": 2.197309417040359, "grad_norm": 2.0547783374786377, "learning_rate": 7.254297458893871e-05, "loss": 3.1413, "step": 32340 }, { "epoch": 2.1976491371110205, "grad_norm": 2.676029920578003, "learning_rate": 7.253872808805544e-05, "loss": 3.0651, "step": 32345 }, { "epoch": 2.197988857181682, "grad_norm": 2.4999969005584717, "learning_rate": 7.253448158717218e-05, "loss": 2.8629, "step": 32350 }, { "epoch": 2.198328577252344, "grad_norm": 3.400418996810913, "learning_rate": 7.25302350862889e-05, "loss": 2.9073, "step": 32355 }, { "epoch": 2.198668297323006, "grad_norm": 2.153087854385376, "learning_rate": 7.252598858540562e-05, "loss": 2.7997, "step": 32360 }, { "epoch": 2.1990080173936675, "grad_norm": 2.2648510932922363, "learning_rate": 7.252174208452236e-05, "loss": 2.5949, "step": 32365 }, { "epoch": 2.1993477374643295, "grad_norm": 2.1078643798828125, "learning_rate": 7.251749558363908e-05, "loss": 2.661, "step": 32370 }, { "epoch": 2.199687457534991, "grad_norm": 2.45188307762146, "learning_rate": 7.25132490827558e-05, "loss": 2.6129, "step": 32375 }, { "epoch": 2.200027177605653, "grad_norm": 2.4503281116485596, "learning_rate": 7.250900258187255e-05, "loss": 2.9823, "step": 32380 }, { "epoch": 2.200366897676315, "grad_norm": 3.2324373722076416, "learning_rate": 7.250475608098926e-05, "loss": 2.827, "step": 32385 }, { "epoch": 2.2007066177469765, "grad_norm": 2.140300750732422, "learning_rate": 7.250050958010599e-05, "loss": 3.1271, "step": 32390 }, { "epoch": 2.201046337817638, "grad_norm": 2.7005391120910645, "learning_rate": 7.249626307922273e-05, "loss": 2.6956, "step": 32395 }, { "epoch": 2.2013860578883, "grad_norm": 2.338383913040161, "learning_rate": 7.249201657833945e-05, "loss": 3.137, "step": 32400 }, { "epoch": 2.201725777958962, "grad_norm": 2.583280324935913, "learning_rate": 7.248777007745617e-05, "loss": 2.9527, "step": 32405 }, { "epoch": 2.2020654980296235, "grad_norm": 2.5168142318725586, "learning_rate": 7.248352357657292e-05, "loss": 2.8838, "step": 32410 }, { "epoch": 2.2024052181002856, "grad_norm": 2.1450343132019043, "learning_rate": 7.247927707568963e-05, "loss": 3.0693, "step": 32415 }, { "epoch": 2.202744938170947, "grad_norm": 2.5818140506744385, "learning_rate": 7.247503057480637e-05, "loss": 2.7989, "step": 32420 }, { "epoch": 2.203084658241609, "grad_norm": 2.5677552223205566, "learning_rate": 7.247078407392309e-05, "loss": 3.1136, "step": 32425 }, { "epoch": 2.203424378312271, "grad_norm": 2.7690656185150146, "learning_rate": 7.246653757303981e-05, "loss": 2.6049, "step": 32430 }, { "epoch": 2.2037640983829325, "grad_norm": 2.109607696533203, "learning_rate": 7.246229107215656e-05, "loss": 2.9253, "step": 32435 }, { "epoch": 2.204103818453594, "grad_norm": 2.6270899772644043, "learning_rate": 7.245804457127327e-05, "loss": 3.1298, "step": 32440 }, { "epoch": 2.204443538524256, "grad_norm": 2.35166072845459, "learning_rate": 7.245379807039e-05, "loss": 3.1539, "step": 32445 }, { "epoch": 2.204783258594918, "grad_norm": 2.573591709136963, "learning_rate": 7.244955156950674e-05, "loss": 2.6941, "step": 32450 }, { "epoch": 2.2051229786655795, "grad_norm": 2.209564685821533, "learning_rate": 7.244530506862345e-05, "loss": 2.6168, "step": 32455 }, { "epoch": 2.205462698736241, "grad_norm": 2.941195011138916, "learning_rate": 7.244105856774018e-05, "loss": 2.7766, "step": 32460 }, { "epoch": 2.205802418806903, "grad_norm": 2.3241777420043945, "learning_rate": 7.243681206685692e-05, "loss": 3.0742, "step": 32465 }, { "epoch": 2.206142138877565, "grad_norm": 2.508758306503296, "learning_rate": 7.243256556597364e-05, "loss": 2.9663, "step": 32470 }, { "epoch": 2.2064818589482265, "grad_norm": 2.758220672607422, "learning_rate": 7.242831906509037e-05, "loss": 3.0078, "step": 32475 }, { "epoch": 2.2068215790188885, "grad_norm": 3.032233238220215, "learning_rate": 7.242407256420711e-05, "loss": 2.5507, "step": 32480 }, { "epoch": 2.20716129908955, "grad_norm": 2.356436252593994, "learning_rate": 7.241982606332382e-05, "loss": 3.1194, "step": 32485 }, { "epoch": 2.207501019160212, "grad_norm": 2.4200472831726074, "learning_rate": 7.241557956244055e-05, "loss": 2.8318, "step": 32490 }, { "epoch": 2.207840739230874, "grad_norm": 2.3763587474823, "learning_rate": 7.241133306155728e-05, "loss": 3.0172, "step": 32495 }, { "epoch": 2.2081804593015355, "grad_norm": 2.687589645385742, "learning_rate": 7.2407086560674e-05, "loss": 2.7233, "step": 32500 }, { "epoch": 2.208520179372197, "grad_norm": 2.665396213531494, "learning_rate": 7.240284005979073e-05, "loss": 2.8024, "step": 32505 }, { "epoch": 2.208859899442859, "grad_norm": 2.23095440864563, "learning_rate": 7.239859355890746e-05, "loss": 2.9582, "step": 32510 }, { "epoch": 2.209199619513521, "grad_norm": 2.5943028926849365, "learning_rate": 7.239434705802419e-05, "loss": 2.9848, "step": 32515 }, { "epoch": 2.2095393395841825, "grad_norm": 2.637180805206299, "learning_rate": 7.239010055714092e-05, "loss": 2.8392, "step": 32520 }, { "epoch": 2.2098790596548445, "grad_norm": 3.0510904788970947, "learning_rate": 7.238585405625765e-05, "loss": 3.0234, "step": 32525 }, { "epoch": 2.210218779725506, "grad_norm": 2.3842458724975586, "learning_rate": 7.238160755537437e-05, "loss": 2.9579, "step": 32530 }, { "epoch": 2.210558499796168, "grad_norm": 2.3340415954589844, "learning_rate": 7.23773610544911e-05, "loss": 2.8746, "step": 32535 }, { "epoch": 2.21089821986683, "grad_norm": 2.3182785511016846, "learning_rate": 7.237311455360783e-05, "loss": 3.0853, "step": 32540 }, { "epoch": 2.2112379399374915, "grad_norm": 2.586538553237915, "learning_rate": 7.236886805272456e-05, "loss": 2.9246, "step": 32545 }, { "epoch": 2.211577660008153, "grad_norm": 2.100529670715332, "learning_rate": 7.236462155184129e-05, "loss": 3.0887, "step": 32550 }, { "epoch": 2.211917380078815, "grad_norm": 2.63149094581604, "learning_rate": 7.236037505095801e-05, "loss": 2.8504, "step": 32555 }, { "epoch": 2.212257100149477, "grad_norm": 2.9503448009490967, "learning_rate": 7.235612855007474e-05, "loss": 2.9143, "step": 32560 }, { "epoch": 2.2125968202201385, "grad_norm": 2.248025417327881, "learning_rate": 7.235188204919147e-05, "loss": 2.9164, "step": 32565 }, { "epoch": 2.2129365402908006, "grad_norm": 2.230646848678589, "learning_rate": 7.23476355483082e-05, "loss": 2.6523, "step": 32570 }, { "epoch": 2.213276260361462, "grad_norm": 5.8250813484191895, "learning_rate": 7.234338904742493e-05, "loss": 2.757, "step": 32575 }, { "epoch": 2.213615980432124, "grad_norm": 3.016338586807251, "learning_rate": 7.233914254654165e-05, "loss": 2.985, "step": 32580 }, { "epoch": 2.213955700502786, "grad_norm": 2.552410364151001, "learning_rate": 7.233489604565838e-05, "loss": 3.0378, "step": 32585 }, { "epoch": 2.2142954205734475, "grad_norm": 2.273813009262085, "learning_rate": 7.233064954477511e-05, "loss": 2.7316, "step": 32590 }, { "epoch": 2.214635140644109, "grad_norm": 2.841160297393799, "learning_rate": 7.232640304389184e-05, "loss": 2.6506, "step": 32595 }, { "epoch": 2.2149748607147712, "grad_norm": 2.33121395111084, "learning_rate": 7.232215654300857e-05, "loss": 2.9489, "step": 32600 }, { "epoch": 2.215314580785433, "grad_norm": 2.3448524475097656, "learning_rate": 7.23179100421253e-05, "loss": 2.7922, "step": 32605 }, { "epoch": 2.2156543008560945, "grad_norm": 2.7594521045684814, "learning_rate": 7.231366354124202e-05, "loss": 2.6612, "step": 32610 }, { "epoch": 2.2159940209267566, "grad_norm": 2.6771318912506104, "learning_rate": 7.230941704035875e-05, "loss": 2.9995, "step": 32615 }, { "epoch": 2.216333740997418, "grad_norm": 2.764615058898926, "learning_rate": 7.230517053947548e-05, "loss": 2.9179, "step": 32620 }, { "epoch": 2.21667346106808, "grad_norm": 2.8338239192962646, "learning_rate": 7.230092403859219e-05, "loss": 2.8216, "step": 32625 }, { "epoch": 2.217013181138742, "grad_norm": 2.5993545055389404, "learning_rate": 7.229667753770893e-05, "loss": 2.7961, "step": 32630 }, { "epoch": 2.2173529012094035, "grad_norm": 2.8181838989257812, "learning_rate": 7.229243103682566e-05, "loss": 2.7114, "step": 32635 }, { "epoch": 2.217692621280065, "grad_norm": 2.7266626358032227, "learning_rate": 7.228818453594238e-05, "loss": 2.8717, "step": 32640 }, { "epoch": 2.218032341350727, "grad_norm": 2.305598020553589, "learning_rate": 7.228393803505912e-05, "loss": 2.8953, "step": 32645 }, { "epoch": 2.218372061421389, "grad_norm": 2.1641077995300293, "learning_rate": 7.227969153417585e-05, "loss": 2.7629, "step": 32650 }, { "epoch": 2.2187117814920505, "grad_norm": 3.4364869594573975, "learning_rate": 7.227544503329256e-05, "loss": 2.8596, "step": 32655 }, { "epoch": 2.219051501562712, "grad_norm": 2.5347118377685547, "learning_rate": 7.22711985324093e-05, "loss": 2.7934, "step": 32660 }, { "epoch": 2.219391221633374, "grad_norm": 2.2209579944610596, "learning_rate": 7.226695203152603e-05, "loss": 2.7307, "step": 32665 }, { "epoch": 2.219730941704036, "grad_norm": 2.8445167541503906, "learning_rate": 7.226270553064275e-05, "loss": 3.0325, "step": 32670 }, { "epoch": 2.2200706617746975, "grad_norm": 2.2517526149749756, "learning_rate": 7.225845902975949e-05, "loss": 2.8809, "step": 32675 }, { "epoch": 2.2204103818453595, "grad_norm": 2.6467535495758057, "learning_rate": 7.225421252887621e-05, "loss": 2.6644, "step": 32680 }, { "epoch": 2.220750101916021, "grad_norm": 2.3985931873321533, "learning_rate": 7.224996602799293e-05, "loss": 2.8872, "step": 32685 }, { "epoch": 2.221089821986683, "grad_norm": 3.244860887527466, "learning_rate": 7.224571952710967e-05, "loss": 2.6877, "step": 32690 }, { "epoch": 2.221429542057345, "grad_norm": 3.914081335067749, "learning_rate": 7.224147302622639e-05, "loss": 2.6948, "step": 32695 }, { "epoch": 2.2217692621280065, "grad_norm": 1.8677458763122559, "learning_rate": 7.223722652534311e-05, "loss": 2.8802, "step": 32700 }, { "epoch": 2.222108982198668, "grad_norm": 2.2272286415100098, "learning_rate": 7.223298002445985e-05, "loss": 2.7559, "step": 32705 }, { "epoch": 2.2224487022693302, "grad_norm": 1.9221246242523193, "learning_rate": 7.222873352357657e-05, "loss": 2.9327, "step": 32710 }, { "epoch": 2.222788422339992, "grad_norm": 2.714747428894043, "learning_rate": 7.22244870226933e-05, "loss": 2.9806, "step": 32715 }, { "epoch": 2.2231281424106535, "grad_norm": 2.9049882888793945, "learning_rate": 7.222024052181004e-05, "loss": 2.774, "step": 32720 }, { "epoch": 2.2234678624813156, "grad_norm": 2.2195212841033936, "learning_rate": 7.221599402092675e-05, "loss": 2.9041, "step": 32725 }, { "epoch": 2.223807582551977, "grad_norm": 2.716212272644043, "learning_rate": 7.221174752004348e-05, "loss": 2.9316, "step": 32730 }, { "epoch": 2.224147302622639, "grad_norm": 2.736064910888672, "learning_rate": 7.220750101916022e-05, "loss": 2.9731, "step": 32735 }, { "epoch": 2.224487022693301, "grad_norm": 2.524653673171997, "learning_rate": 7.220325451827694e-05, "loss": 2.9659, "step": 32740 }, { "epoch": 2.2248267427639625, "grad_norm": 2.6109070777893066, "learning_rate": 7.219900801739367e-05, "loss": 2.9224, "step": 32745 }, { "epoch": 2.225166462834624, "grad_norm": 3.146559953689575, "learning_rate": 7.219476151651041e-05, "loss": 2.487, "step": 32750 }, { "epoch": 2.2255061829052862, "grad_norm": 3.417128086090088, "learning_rate": 7.219051501562712e-05, "loss": 2.7406, "step": 32755 }, { "epoch": 2.225845902975948, "grad_norm": 2.2835352420806885, "learning_rate": 7.218626851474386e-05, "loss": 3.0532, "step": 32760 }, { "epoch": 2.2261856230466095, "grad_norm": 2.5866715908050537, "learning_rate": 7.218202201386058e-05, "loss": 2.8051, "step": 32765 }, { "epoch": 2.2265253431172716, "grad_norm": 2.6528501510620117, "learning_rate": 7.21777755129773e-05, "loss": 2.9577, "step": 32770 }, { "epoch": 2.226865063187933, "grad_norm": 2.46233868598938, "learning_rate": 7.217352901209405e-05, "loss": 2.8804, "step": 32775 }, { "epoch": 2.227204783258595, "grad_norm": 2.764763116836548, "learning_rate": 7.216928251121076e-05, "loss": 2.8279, "step": 32780 }, { "epoch": 2.2275445033292565, "grad_norm": 2.5412604808807373, "learning_rate": 7.216503601032749e-05, "loss": 2.8297, "step": 32785 }, { "epoch": 2.2278842233999185, "grad_norm": 2.3578827381134033, "learning_rate": 7.216078950944423e-05, "loss": 2.8099, "step": 32790 }, { "epoch": 2.22822394347058, "grad_norm": 2.601724624633789, "learning_rate": 7.215654300856095e-05, "loss": 2.783, "step": 32795 }, { "epoch": 2.228563663541242, "grad_norm": 2.28682279586792, "learning_rate": 7.215229650767767e-05, "loss": 2.9805, "step": 32800 }, { "epoch": 2.228903383611904, "grad_norm": 2.7013375759124756, "learning_rate": 7.214805000679441e-05, "loss": 2.8832, "step": 32805 }, { "epoch": 2.2292431036825655, "grad_norm": 2.4026753902435303, "learning_rate": 7.214380350591113e-05, "loss": 2.928, "step": 32810 }, { "epoch": 2.229582823753227, "grad_norm": 2.1729629039764404, "learning_rate": 7.213955700502786e-05, "loss": 3.0705, "step": 32815 }, { "epoch": 2.229922543823889, "grad_norm": 3.5258920192718506, "learning_rate": 7.21353105041446e-05, "loss": 2.9983, "step": 32820 }, { "epoch": 2.230262263894551, "grad_norm": 2.0111894607543945, "learning_rate": 7.213106400326131e-05, "loss": 3.0425, "step": 32825 }, { "epoch": 2.2306019839652125, "grad_norm": 2.6268765926361084, "learning_rate": 7.212681750237804e-05, "loss": 3.2287, "step": 32830 }, { "epoch": 2.2309417040358746, "grad_norm": 2.9517054557800293, "learning_rate": 7.212257100149478e-05, "loss": 3.0094, "step": 32835 }, { "epoch": 2.231281424106536, "grad_norm": 3.03939151763916, "learning_rate": 7.21183245006115e-05, "loss": 2.8436, "step": 32840 }, { "epoch": 2.231621144177198, "grad_norm": 2.6090807914733887, "learning_rate": 7.211407799972823e-05, "loss": 2.8543, "step": 32845 }, { "epoch": 2.23196086424786, "grad_norm": 2.416677713394165, "learning_rate": 7.210983149884495e-05, "loss": 2.8729, "step": 32850 }, { "epoch": 2.2323005843185215, "grad_norm": 2.290902853012085, "learning_rate": 7.210558499796168e-05, "loss": 2.8549, "step": 32855 }, { "epoch": 2.232640304389183, "grad_norm": 2.6582746505737305, "learning_rate": 7.210133849707841e-05, "loss": 2.6719, "step": 32860 }, { "epoch": 2.2329800244598452, "grad_norm": 3.020695686340332, "learning_rate": 7.209709199619514e-05, "loss": 2.641, "step": 32865 }, { "epoch": 2.233319744530507, "grad_norm": 2.2010974884033203, "learning_rate": 7.209284549531187e-05, "loss": 3.0755, "step": 32870 }, { "epoch": 2.2336594646011685, "grad_norm": 2.7290945053100586, "learning_rate": 7.20885989944286e-05, "loss": 3.0898, "step": 32875 }, { "epoch": 2.2339991846718306, "grad_norm": 2.375084638595581, "learning_rate": 7.208435249354532e-05, "loss": 2.8042, "step": 32880 }, { "epoch": 2.234338904742492, "grad_norm": 2.595149040222168, "learning_rate": 7.208010599266205e-05, "loss": 2.779, "step": 32885 }, { "epoch": 2.234678624813154, "grad_norm": 2.8085498809814453, "learning_rate": 7.207585949177878e-05, "loss": 2.9124, "step": 32890 }, { "epoch": 2.235018344883816, "grad_norm": 3.00689959526062, "learning_rate": 7.20716129908955e-05, "loss": 2.8729, "step": 32895 }, { "epoch": 2.2353580649544775, "grad_norm": 2.6798105239868164, "learning_rate": 7.206736649001223e-05, "loss": 2.9082, "step": 32900 }, { "epoch": 2.235697785025139, "grad_norm": 2.737867832183838, "learning_rate": 7.206311998912896e-05, "loss": 2.8506, "step": 32905 }, { "epoch": 2.2360375050958012, "grad_norm": 2.6229255199432373, "learning_rate": 7.205887348824569e-05, "loss": 3.063, "step": 32910 }, { "epoch": 2.236377225166463, "grad_norm": 2.813624143600464, "learning_rate": 7.205462698736242e-05, "loss": 3.0996, "step": 32915 }, { "epoch": 2.2367169452371245, "grad_norm": 2.6742606163024902, "learning_rate": 7.205038048647915e-05, "loss": 2.7752, "step": 32920 }, { "epoch": 2.2370566653077866, "grad_norm": 2.7019104957580566, "learning_rate": 7.204613398559587e-05, "loss": 2.9885, "step": 32925 }, { "epoch": 2.237396385378448, "grad_norm": 2.530966281890869, "learning_rate": 7.20418874847126e-05, "loss": 3.0438, "step": 32930 }, { "epoch": 2.23773610544911, "grad_norm": 2.2754204273223877, "learning_rate": 7.203764098382933e-05, "loss": 2.6577, "step": 32935 }, { "epoch": 2.238075825519772, "grad_norm": 2.677442789077759, "learning_rate": 7.203339448294606e-05, "loss": 2.6966, "step": 32940 }, { "epoch": 2.2384155455904335, "grad_norm": 2.8423991203308105, "learning_rate": 7.202914798206279e-05, "loss": 2.8794, "step": 32945 }, { "epoch": 2.238755265661095, "grad_norm": 2.196319103240967, "learning_rate": 7.202490148117951e-05, "loss": 3.0519, "step": 32950 }, { "epoch": 2.2390949857317572, "grad_norm": 2.4786598682403564, "learning_rate": 7.202065498029624e-05, "loss": 2.8842, "step": 32955 }, { "epoch": 2.239434705802419, "grad_norm": 1.9977598190307617, "learning_rate": 7.201640847941297e-05, "loss": 2.8492, "step": 32960 }, { "epoch": 2.2397744258730805, "grad_norm": 2.959456443786621, "learning_rate": 7.201216197852968e-05, "loss": 2.8653, "step": 32965 }, { "epoch": 2.2401141459437426, "grad_norm": 2.815317153930664, "learning_rate": 7.200791547764643e-05, "loss": 2.9413, "step": 32970 }, { "epoch": 2.240453866014404, "grad_norm": 2.9889278411865234, "learning_rate": 7.200366897676315e-05, "loss": 2.8071, "step": 32975 }, { "epoch": 2.240793586085066, "grad_norm": 2.4369211196899414, "learning_rate": 7.199942247587987e-05, "loss": 2.8481, "step": 32980 }, { "epoch": 2.2411333061557275, "grad_norm": 2.818216323852539, "learning_rate": 7.199517597499661e-05, "loss": 2.8107, "step": 32985 }, { "epoch": 2.2414730262263896, "grad_norm": 3.0462470054626465, "learning_rate": 7.199092947411334e-05, "loss": 2.7375, "step": 32990 }, { "epoch": 2.241812746297051, "grad_norm": 2.6907548904418945, "learning_rate": 7.198668297323005e-05, "loss": 2.7458, "step": 32995 }, { "epoch": 2.242152466367713, "grad_norm": 2.6370062828063965, "learning_rate": 7.19824364723468e-05, "loss": 2.7958, "step": 33000 }, { "epoch": 2.242492186438375, "grad_norm": 2.5103893280029297, "learning_rate": 7.197818997146352e-05, "loss": 2.9386, "step": 33005 }, { "epoch": 2.2428319065090365, "grad_norm": 2.700191020965576, "learning_rate": 7.197394347058024e-05, "loss": 2.7064, "step": 33010 }, { "epoch": 2.243171626579698, "grad_norm": 3.1728148460388184, "learning_rate": 7.196969696969698e-05, "loss": 3.0548, "step": 33015 }, { "epoch": 2.2435113466503602, "grad_norm": 3.294248580932617, "learning_rate": 7.19654504688137e-05, "loss": 2.9384, "step": 33020 }, { "epoch": 2.243851066721022, "grad_norm": 2.2904651165008545, "learning_rate": 7.196120396793042e-05, "loss": 2.898, "step": 33025 }, { "epoch": 2.2441907867916835, "grad_norm": 3.1902170181274414, "learning_rate": 7.195695746704716e-05, "loss": 2.8181, "step": 33030 }, { "epoch": 2.2445305068623456, "grad_norm": 2.228954553604126, "learning_rate": 7.195271096616389e-05, "loss": 2.7608, "step": 33035 }, { "epoch": 2.244870226933007, "grad_norm": 2.109145164489746, "learning_rate": 7.19484644652806e-05, "loss": 2.8157, "step": 33040 }, { "epoch": 2.245209947003669, "grad_norm": 2.4772539138793945, "learning_rate": 7.194421796439735e-05, "loss": 2.9839, "step": 33045 }, { "epoch": 2.245549667074331, "grad_norm": 2.549163341522217, "learning_rate": 7.193997146351406e-05, "loss": 3.1693, "step": 33050 }, { "epoch": 2.2458893871449925, "grad_norm": 2.631770610809326, "learning_rate": 7.193572496263079e-05, "loss": 3.1441, "step": 33055 }, { "epoch": 2.246229107215654, "grad_norm": 2.316704034805298, "learning_rate": 7.193147846174753e-05, "loss": 2.7444, "step": 33060 }, { "epoch": 2.2465688272863162, "grad_norm": 3.0326640605926514, "learning_rate": 7.192723196086424e-05, "loss": 2.8628, "step": 33065 }, { "epoch": 2.246908547356978, "grad_norm": 2.9378201961517334, "learning_rate": 7.192298545998097e-05, "loss": 2.9638, "step": 33070 }, { "epoch": 2.2472482674276395, "grad_norm": 2.492703914642334, "learning_rate": 7.191873895909771e-05, "loss": 2.9663, "step": 33075 }, { "epoch": 2.2475879874983016, "grad_norm": 2.2440569400787354, "learning_rate": 7.191449245821443e-05, "loss": 2.9027, "step": 33080 }, { "epoch": 2.247927707568963, "grad_norm": 3.4572348594665527, "learning_rate": 7.191024595733116e-05, "loss": 2.8705, "step": 33085 }, { "epoch": 2.248267427639625, "grad_norm": 2.316709041595459, "learning_rate": 7.19059994564479e-05, "loss": 2.7859, "step": 33090 }, { "epoch": 2.248607147710287, "grad_norm": 2.37251353263855, "learning_rate": 7.190175295556461e-05, "loss": 2.6468, "step": 33095 }, { "epoch": 2.2489468677809485, "grad_norm": 2.5166938304901123, "learning_rate": 7.189750645468135e-05, "loss": 2.7446, "step": 33100 }, { "epoch": 2.24928658785161, "grad_norm": 2.4438979625701904, "learning_rate": 7.189325995379808e-05, "loss": 2.7165, "step": 33105 }, { "epoch": 2.2496263079222723, "grad_norm": 3.1850368976593018, "learning_rate": 7.18890134529148e-05, "loss": 2.8552, "step": 33110 }, { "epoch": 2.249966027992934, "grad_norm": 3.012151002883911, "learning_rate": 7.188476695203154e-05, "loss": 2.9675, "step": 33115 }, { "epoch": 2.2503057480635955, "grad_norm": 2.4529848098754883, "learning_rate": 7.188052045114825e-05, "loss": 2.8602, "step": 33120 }, { "epoch": 2.250645468134257, "grad_norm": 3.0945756435394287, "learning_rate": 7.187627395026498e-05, "loss": 2.9763, "step": 33125 }, { "epoch": 2.250985188204919, "grad_norm": 2.383312463760376, "learning_rate": 7.187202744938172e-05, "loss": 3.0129, "step": 33130 }, { "epoch": 2.251324908275581, "grad_norm": 2.6412696838378906, "learning_rate": 7.186778094849844e-05, "loss": 2.9029, "step": 33135 }, { "epoch": 2.2516646283462425, "grad_norm": 2.708972930908203, "learning_rate": 7.186353444761516e-05, "loss": 2.9874, "step": 33140 }, { "epoch": 2.2520043484169046, "grad_norm": 2.7336604595184326, "learning_rate": 7.18592879467319e-05, "loss": 2.969, "step": 33145 }, { "epoch": 2.252344068487566, "grad_norm": 2.527611494064331, "learning_rate": 7.185504144584862e-05, "loss": 2.592, "step": 33150 }, { "epoch": 2.252683788558228, "grad_norm": 2.8073744773864746, "learning_rate": 7.185079494496535e-05, "loss": 3.1713, "step": 33155 }, { "epoch": 2.25302350862889, "grad_norm": 2.5062410831451416, "learning_rate": 7.184654844408209e-05, "loss": 2.9258, "step": 33160 }, { "epoch": 2.2533632286995515, "grad_norm": 2.3286213874816895, "learning_rate": 7.18423019431988e-05, "loss": 3.001, "step": 33165 }, { "epoch": 2.253702948770213, "grad_norm": 3.6908411979675293, "learning_rate": 7.183805544231553e-05, "loss": 2.6436, "step": 33170 }, { "epoch": 2.2540426688408752, "grad_norm": 2.550650119781494, "learning_rate": 7.183380894143227e-05, "loss": 2.8839, "step": 33175 }, { "epoch": 2.254382388911537, "grad_norm": 2.3838183879852295, "learning_rate": 7.182956244054899e-05, "loss": 2.7746, "step": 33180 }, { "epoch": 2.2547221089821985, "grad_norm": 2.1881535053253174, "learning_rate": 7.182531593966572e-05, "loss": 2.9651, "step": 33185 }, { "epoch": 2.2550618290528606, "grad_norm": 2.4208669662475586, "learning_rate": 7.182106943878244e-05, "loss": 2.9019, "step": 33190 }, { "epoch": 2.255401549123522, "grad_norm": 2.846346855163574, "learning_rate": 7.181682293789917e-05, "loss": 3.0531, "step": 33195 }, { "epoch": 2.255741269194184, "grad_norm": 2.2849111557006836, "learning_rate": 7.18125764370159e-05, "loss": 2.986, "step": 33200 }, { "epoch": 2.256080989264846, "grad_norm": 2.2540855407714844, "learning_rate": 7.180832993613263e-05, "loss": 2.9826, "step": 33205 }, { "epoch": 2.2564207093355075, "grad_norm": 2.126955270767212, "learning_rate": 7.180408343524936e-05, "loss": 2.9653, "step": 33210 }, { "epoch": 2.256760429406169, "grad_norm": 2.5405783653259277, "learning_rate": 7.179983693436608e-05, "loss": 2.7437, "step": 33215 }, { "epoch": 2.2571001494768312, "grad_norm": 2.238685369491577, "learning_rate": 7.179559043348281e-05, "loss": 3.0019, "step": 33220 }, { "epoch": 2.257439869547493, "grad_norm": 3.673489570617676, "learning_rate": 7.179134393259954e-05, "loss": 2.8176, "step": 33225 }, { "epoch": 2.2577795896181545, "grad_norm": 2.6978836059570312, "learning_rate": 7.178709743171627e-05, "loss": 2.8816, "step": 33230 }, { "epoch": 2.2581193096888166, "grad_norm": 2.273484945297241, "learning_rate": 7.1782850930833e-05, "loss": 2.8848, "step": 33235 }, { "epoch": 2.258459029759478, "grad_norm": 2.888094663619995, "learning_rate": 7.177860442994972e-05, "loss": 2.8667, "step": 33240 }, { "epoch": 2.25879874983014, "grad_norm": 2.8391363620758057, "learning_rate": 7.177435792906645e-05, "loss": 2.6174, "step": 33245 }, { "epoch": 2.259138469900802, "grad_norm": 2.857567310333252, "learning_rate": 7.177011142818318e-05, "loss": 2.9095, "step": 33250 }, { "epoch": 2.2594781899714635, "grad_norm": 2.2659287452697754, "learning_rate": 7.176586492729991e-05, "loss": 2.5851, "step": 33255 }, { "epoch": 2.259817910042125, "grad_norm": 2.3036651611328125, "learning_rate": 7.176161842641664e-05, "loss": 3.0104, "step": 33260 }, { "epoch": 2.2601576301127873, "grad_norm": 2.9956750869750977, "learning_rate": 7.175737192553336e-05, "loss": 3.075, "step": 33265 }, { "epoch": 2.260497350183449, "grad_norm": 2.576242446899414, "learning_rate": 7.175312542465009e-05, "loss": 2.7991, "step": 33270 }, { "epoch": 2.2608370702541105, "grad_norm": 2.7969980239868164, "learning_rate": 7.174887892376682e-05, "loss": 2.9271, "step": 33275 }, { "epoch": 2.2611767903247726, "grad_norm": 2.8595151901245117, "learning_rate": 7.174463242288355e-05, "loss": 2.8552, "step": 33280 }, { "epoch": 2.261516510395434, "grad_norm": 2.8775007724761963, "learning_rate": 7.174038592200028e-05, "loss": 2.9249, "step": 33285 }, { "epoch": 2.261856230466096, "grad_norm": 2.4308218955993652, "learning_rate": 7.1736139421117e-05, "loss": 2.8212, "step": 33290 }, { "epoch": 2.262195950536758, "grad_norm": 2.703437328338623, "learning_rate": 7.173189292023373e-05, "loss": 2.8471, "step": 33295 }, { "epoch": 2.2625356706074196, "grad_norm": 2.51092791557312, "learning_rate": 7.172764641935046e-05, "loss": 2.8742, "step": 33300 }, { "epoch": 2.262875390678081, "grad_norm": 2.545304536819458, "learning_rate": 7.172339991846719e-05, "loss": 2.861, "step": 33305 }, { "epoch": 2.2632151107487433, "grad_norm": 3.2464287281036377, "learning_rate": 7.171915341758392e-05, "loss": 2.8217, "step": 33310 }, { "epoch": 2.263554830819405, "grad_norm": 2.676722764968872, "learning_rate": 7.171490691670064e-05, "loss": 2.9303, "step": 33315 }, { "epoch": 2.2638945508900665, "grad_norm": 2.3667256832122803, "learning_rate": 7.171066041581736e-05, "loss": 3.0589, "step": 33320 }, { "epoch": 2.2642342709607286, "grad_norm": 2.2364025115966797, "learning_rate": 7.17064139149341e-05, "loss": 2.7239, "step": 33325 }, { "epoch": 2.2645739910313902, "grad_norm": 3.356407880783081, "learning_rate": 7.170216741405083e-05, "loss": 2.7041, "step": 33330 }, { "epoch": 2.264913711102052, "grad_norm": 2.2420506477355957, "learning_rate": 7.169792091316754e-05, "loss": 2.8294, "step": 33335 }, { "epoch": 2.2652534311727135, "grad_norm": 2.3690154552459717, "learning_rate": 7.169367441228428e-05, "loss": 2.8359, "step": 33340 }, { "epoch": 2.2655931512433756, "grad_norm": 2.274134635925293, "learning_rate": 7.168942791140101e-05, "loss": 2.717, "step": 33345 }, { "epoch": 2.265932871314037, "grad_norm": 2.4822583198547363, "learning_rate": 7.168518141051773e-05, "loss": 2.965, "step": 33350 }, { "epoch": 2.266272591384699, "grad_norm": 2.929772138595581, "learning_rate": 7.168093490963447e-05, "loss": 2.9092, "step": 33355 }, { "epoch": 2.266612311455361, "grad_norm": 2.18749737739563, "learning_rate": 7.16766884087512e-05, "loss": 3.127, "step": 33360 }, { "epoch": 2.2669520315260225, "grad_norm": 2.733646869659424, "learning_rate": 7.167244190786791e-05, "loss": 3.0774, "step": 33365 }, { "epoch": 2.267291751596684, "grad_norm": 2.4777932167053223, "learning_rate": 7.166819540698465e-05, "loss": 3.0328, "step": 33370 }, { "epoch": 2.2676314716673462, "grad_norm": 2.515777826309204, "learning_rate": 7.166394890610138e-05, "loss": 2.6895, "step": 33375 }, { "epoch": 2.267971191738008, "grad_norm": 2.2061026096343994, "learning_rate": 7.16597024052181e-05, "loss": 3.0604, "step": 33380 }, { "epoch": 2.2683109118086695, "grad_norm": 2.259354829788208, "learning_rate": 7.165545590433484e-05, "loss": 2.6643, "step": 33385 }, { "epoch": 2.2686506318793316, "grad_norm": 2.33528995513916, "learning_rate": 7.165120940345155e-05, "loss": 2.8681, "step": 33390 }, { "epoch": 2.268990351949993, "grad_norm": 2.8973610401153564, "learning_rate": 7.164696290256828e-05, "loss": 2.8836, "step": 33395 }, { "epoch": 2.269330072020655, "grad_norm": 1.956241250038147, "learning_rate": 7.164271640168502e-05, "loss": 2.7226, "step": 33400 }, { "epoch": 2.269669792091317, "grad_norm": 2.6452531814575195, "learning_rate": 7.163846990080174e-05, "loss": 2.7448, "step": 33405 }, { "epoch": 2.2700095121619785, "grad_norm": 2.762395143508911, "learning_rate": 7.163422339991846e-05, "loss": 2.7664, "step": 33410 }, { "epoch": 2.27034923223264, "grad_norm": 2.376967430114746, "learning_rate": 7.16299768990352e-05, "loss": 2.8824, "step": 33415 }, { "epoch": 2.2706889523033023, "grad_norm": 2.7017393112182617, "learning_rate": 7.162573039815192e-05, "loss": 2.886, "step": 33420 }, { "epoch": 2.271028672373964, "grad_norm": 2.87359881401062, "learning_rate": 7.162148389726865e-05, "loss": 2.6773, "step": 33425 }, { "epoch": 2.2713683924446255, "grad_norm": 2.3254525661468506, "learning_rate": 7.161723739638539e-05, "loss": 2.8154, "step": 33430 }, { "epoch": 2.2717081125152876, "grad_norm": 2.0850281715393066, "learning_rate": 7.16129908955021e-05, "loss": 2.7453, "step": 33435 }, { "epoch": 2.2720478325859492, "grad_norm": 2.539327383041382, "learning_rate": 7.160874439461884e-05, "loss": 2.7044, "step": 33440 }, { "epoch": 2.272387552656611, "grad_norm": 2.1143887042999268, "learning_rate": 7.160449789373557e-05, "loss": 2.8842, "step": 33445 }, { "epoch": 2.2727272727272725, "grad_norm": 2.4410974979400635, "learning_rate": 7.160025139285229e-05, "loss": 2.7021, "step": 33450 }, { "epoch": 2.2730669927979346, "grad_norm": 2.7359278202056885, "learning_rate": 7.159600489196903e-05, "loss": 2.957, "step": 33455 }, { "epoch": 2.273406712868596, "grad_norm": 3.809649705886841, "learning_rate": 7.159175839108576e-05, "loss": 2.7873, "step": 33460 }, { "epoch": 2.273746432939258, "grad_norm": 2.3964028358459473, "learning_rate": 7.158751189020247e-05, "loss": 2.8097, "step": 33465 }, { "epoch": 2.27408615300992, "grad_norm": 3.1428470611572266, "learning_rate": 7.158326538931921e-05, "loss": 2.7104, "step": 33470 }, { "epoch": 2.2744258730805815, "grad_norm": 2.038189172744751, "learning_rate": 7.157901888843593e-05, "loss": 2.9272, "step": 33475 }, { "epoch": 2.274765593151243, "grad_norm": 2.914719581604004, "learning_rate": 7.157477238755266e-05, "loss": 2.746, "step": 33480 }, { "epoch": 2.2751053132219052, "grad_norm": 2.727069139480591, "learning_rate": 7.15705258866694e-05, "loss": 2.8028, "step": 33485 }, { "epoch": 2.275445033292567, "grad_norm": 1.9680135250091553, "learning_rate": 7.156627938578611e-05, "loss": 2.9049, "step": 33490 }, { "epoch": 2.2757847533632285, "grad_norm": 2.9704511165618896, "learning_rate": 7.156203288490284e-05, "loss": 3.0333, "step": 33495 }, { "epoch": 2.2761244734338906, "grad_norm": 2.1864876747131348, "learning_rate": 7.155778638401958e-05, "loss": 3.0913, "step": 33500 }, { "epoch": 2.276464193504552, "grad_norm": 2.4827537536621094, "learning_rate": 7.15535398831363e-05, "loss": 3.1058, "step": 33505 }, { "epoch": 2.276803913575214, "grad_norm": 2.9812800884246826, "learning_rate": 7.154929338225302e-05, "loss": 2.6917, "step": 33510 }, { "epoch": 2.277143633645876, "grad_norm": 2.4080286026000977, "learning_rate": 7.154504688136976e-05, "loss": 2.8874, "step": 33515 }, { "epoch": 2.2774833537165375, "grad_norm": 3.4363133907318115, "learning_rate": 7.154080038048648e-05, "loss": 2.8681, "step": 33520 }, { "epoch": 2.277823073787199, "grad_norm": 2.821748971939087, "learning_rate": 7.153655387960321e-05, "loss": 2.8607, "step": 33525 }, { "epoch": 2.2781627938578612, "grad_norm": 2.5044710636138916, "learning_rate": 7.153230737871995e-05, "loss": 2.8668, "step": 33530 }, { "epoch": 2.278502513928523, "grad_norm": 2.5523736476898193, "learning_rate": 7.152806087783666e-05, "loss": 2.7326, "step": 33535 }, { "epoch": 2.2788422339991845, "grad_norm": 3.2739861011505127, "learning_rate": 7.152381437695339e-05, "loss": 3.1531, "step": 33540 }, { "epoch": 2.2791819540698466, "grad_norm": 2.9218909740448, "learning_rate": 7.151956787607012e-05, "loss": 2.8316, "step": 33545 }, { "epoch": 2.279521674140508, "grad_norm": 2.267181158065796, "learning_rate": 7.151532137518685e-05, "loss": 2.8639, "step": 33550 }, { "epoch": 2.27986139421117, "grad_norm": 3.2489068508148193, "learning_rate": 7.151107487430358e-05, "loss": 2.8422, "step": 33555 }, { "epoch": 2.280201114281832, "grad_norm": 3.3514530658721924, "learning_rate": 7.15068283734203e-05, "loss": 2.9968, "step": 33560 }, { "epoch": 2.2805408343524936, "grad_norm": 2.346881151199341, "learning_rate": 7.150258187253703e-05, "loss": 2.7428, "step": 33565 }, { "epoch": 2.280880554423155, "grad_norm": 2.0740861892700195, "learning_rate": 7.149833537165376e-05, "loss": 3.0628, "step": 33570 }, { "epoch": 2.2812202744938173, "grad_norm": 2.480759859085083, "learning_rate": 7.149408887077049e-05, "loss": 2.7652, "step": 33575 }, { "epoch": 2.281559994564479, "grad_norm": 3.563560962677002, "learning_rate": 7.148984236988722e-05, "loss": 2.6843, "step": 33580 }, { "epoch": 2.2818997146351405, "grad_norm": 2.0972559452056885, "learning_rate": 7.148559586900394e-05, "loss": 2.9678, "step": 33585 }, { "epoch": 2.2822394347058026, "grad_norm": 2.3808963298797607, "learning_rate": 7.148134936812067e-05, "loss": 2.8033, "step": 33590 }, { "epoch": 2.2825791547764642, "grad_norm": 3.3226823806762695, "learning_rate": 7.14771028672374e-05, "loss": 2.7548, "step": 33595 }, { "epoch": 2.282918874847126, "grad_norm": 2.8946468830108643, "learning_rate": 7.147285636635413e-05, "loss": 2.9991, "step": 33600 }, { "epoch": 2.283258594917788, "grad_norm": 2.3981614112854004, "learning_rate": 7.146860986547086e-05, "loss": 2.6931, "step": 33605 }, { "epoch": 2.2835983149884496, "grad_norm": 3.123107671737671, "learning_rate": 7.146436336458758e-05, "loss": 2.8222, "step": 33610 }, { "epoch": 2.283938035059111, "grad_norm": 2.4896984100341797, "learning_rate": 7.146011686370431e-05, "loss": 2.8692, "step": 33615 }, { "epoch": 2.2842777551297733, "grad_norm": 2.5145962238311768, "learning_rate": 7.145587036282104e-05, "loss": 2.9429, "step": 33620 }, { "epoch": 2.284617475200435, "grad_norm": 2.648367166519165, "learning_rate": 7.145162386193777e-05, "loss": 3.2012, "step": 33625 }, { "epoch": 2.2849571952710965, "grad_norm": 2.926182746887207, "learning_rate": 7.14473773610545e-05, "loss": 2.7221, "step": 33630 }, { "epoch": 2.2852969153417586, "grad_norm": 2.737332344055176, "learning_rate": 7.144313086017122e-05, "loss": 2.8477, "step": 33635 }, { "epoch": 2.2856366354124202, "grad_norm": 2.942445755004883, "learning_rate": 7.143888435928795e-05, "loss": 3.1411, "step": 33640 }, { "epoch": 2.285976355483082, "grad_norm": 2.692033529281616, "learning_rate": 7.143463785840468e-05, "loss": 3.0244, "step": 33645 }, { "epoch": 2.286316075553744, "grad_norm": 2.7170755863189697, "learning_rate": 7.143039135752141e-05, "loss": 2.8173, "step": 33650 }, { "epoch": 2.2866557956244056, "grad_norm": 2.954761266708374, "learning_rate": 7.142614485663814e-05, "loss": 2.8025, "step": 33655 }, { "epoch": 2.286995515695067, "grad_norm": 2.086940288543701, "learning_rate": 7.142189835575486e-05, "loss": 2.9903, "step": 33660 }, { "epoch": 2.2873352357657293, "grad_norm": 2.643549680709839, "learning_rate": 7.141765185487159e-05, "loss": 2.8517, "step": 33665 }, { "epoch": 2.287674955836391, "grad_norm": 2.153751850128174, "learning_rate": 7.141340535398832e-05, "loss": 2.8574, "step": 33670 }, { "epoch": 2.2880146759070525, "grad_norm": 2.5794284343719482, "learning_rate": 7.140915885310503e-05, "loss": 2.6638, "step": 33675 }, { "epoch": 2.288354395977714, "grad_norm": 2.548109769821167, "learning_rate": 7.140491235222178e-05, "loss": 2.7205, "step": 33680 }, { "epoch": 2.2886941160483762, "grad_norm": 2.6856367588043213, "learning_rate": 7.14006658513385e-05, "loss": 2.9321, "step": 33685 }, { "epoch": 2.289033836119038, "grad_norm": 2.874324083328247, "learning_rate": 7.139641935045522e-05, "loss": 2.9563, "step": 33690 }, { "epoch": 2.2893735561896995, "grad_norm": 2.687319755554199, "learning_rate": 7.139217284957196e-05, "loss": 2.8583, "step": 33695 }, { "epoch": 2.2897132762603616, "grad_norm": 2.615657091140747, "learning_rate": 7.138792634868869e-05, "loss": 3.0671, "step": 33700 }, { "epoch": 2.290052996331023, "grad_norm": 2.107964515686035, "learning_rate": 7.13836798478054e-05, "loss": 2.9097, "step": 33705 }, { "epoch": 2.290392716401685, "grad_norm": 3.530917167663574, "learning_rate": 7.137943334692214e-05, "loss": 2.5951, "step": 33710 }, { "epoch": 2.290732436472347, "grad_norm": 2.7556774616241455, "learning_rate": 7.137518684603887e-05, "loss": 2.697, "step": 33715 }, { "epoch": 2.2910721565430086, "grad_norm": 2.444884777069092, "learning_rate": 7.137094034515559e-05, "loss": 2.8195, "step": 33720 }, { "epoch": 2.29141187661367, "grad_norm": 1.9590510129928589, "learning_rate": 7.136669384427233e-05, "loss": 2.8847, "step": 33725 }, { "epoch": 2.2917515966843323, "grad_norm": 2.3431553840637207, "learning_rate": 7.136244734338906e-05, "loss": 3.0651, "step": 33730 }, { "epoch": 2.292091316754994, "grad_norm": 2.650692939758301, "learning_rate": 7.135820084250577e-05, "loss": 2.8076, "step": 33735 }, { "epoch": 2.2924310368256555, "grad_norm": 2.5777385234832764, "learning_rate": 7.135395434162251e-05, "loss": 2.8528, "step": 33740 }, { "epoch": 2.2927707568963176, "grad_norm": 2.5233380794525146, "learning_rate": 7.134970784073923e-05, "loss": 2.8692, "step": 33745 }, { "epoch": 2.2931104769669792, "grad_norm": 2.349297046661377, "learning_rate": 7.134546133985595e-05, "loss": 2.9358, "step": 33750 }, { "epoch": 2.293450197037641, "grad_norm": 2.395275592803955, "learning_rate": 7.13412148389727e-05, "loss": 3.0758, "step": 33755 }, { "epoch": 2.293789917108303, "grad_norm": 2.194223165512085, "learning_rate": 7.133696833808941e-05, "loss": 2.6824, "step": 33760 }, { "epoch": 2.2941296371789646, "grad_norm": 2.347844123840332, "learning_rate": 7.133272183720614e-05, "loss": 2.6725, "step": 33765 }, { "epoch": 2.294469357249626, "grad_norm": 2.437697649002075, "learning_rate": 7.132847533632288e-05, "loss": 2.9175, "step": 33770 }, { "epoch": 2.2948090773202883, "grad_norm": 2.6092443466186523, "learning_rate": 7.13242288354396e-05, "loss": 2.7966, "step": 33775 }, { "epoch": 2.29514879739095, "grad_norm": 2.4573628902435303, "learning_rate": 7.131998233455634e-05, "loss": 2.9263, "step": 33780 }, { "epoch": 2.2954885174616115, "grad_norm": 2.697084665298462, "learning_rate": 7.131573583367306e-05, "loss": 2.8543, "step": 33785 }, { "epoch": 2.295828237532273, "grad_norm": 3.2013614177703857, "learning_rate": 7.131148933278978e-05, "loss": 2.7588, "step": 33790 }, { "epoch": 2.2961679576029352, "grad_norm": 2.8212287425994873, "learning_rate": 7.130724283190652e-05, "loss": 2.8531, "step": 33795 }, { "epoch": 2.296507677673597, "grad_norm": 2.2348718643188477, "learning_rate": 7.130299633102325e-05, "loss": 2.9562, "step": 33800 }, { "epoch": 2.2968473977442585, "grad_norm": 3.1019287109375, "learning_rate": 7.129874983013996e-05, "loss": 2.8243, "step": 33805 }, { "epoch": 2.2971871178149206, "grad_norm": 2.593221664428711, "learning_rate": 7.12945033292567e-05, "loss": 2.7527, "step": 33810 }, { "epoch": 2.297526837885582, "grad_norm": 2.282301664352417, "learning_rate": 7.129025682837343e-05, "loss": 3.0354, "step": 33815 }, { "epoch": 2.297866557956244, "grad_norm": 2.350959062576294, "learning_rate": 7.128601032749015e-05, "loss": 2.888, "step": 33820 }, { "epoch": 2.298206278026906, "grad_norm": 2.5017123222351074, "learning_rate": 7.128176382660689e-05, "loss": 2.6232, "step": 33825 }, { "epoch": 2.2985459980975675, "grad_norm": 2.8844668865203857, "learning_rate": 7.12775173257236e-05, "loss": 2.9418, "step": 33830 }, { "epoch": 2.298885718168229, "grad_norm": 2.256675958633423, "learning_rate": 7.127327082484033e-05, "loss": 2.7264, "step": 33835 }, { "epoch": 2.2992254382388913, "grad_norm": 2.9403581619262695, "learning_rate": 7.126902432395707e-05, "loss": 2.6627, "step": 33840 }, { "epoch": 2.299565158309553, "grad_norm": 2.686836004257202, "learning_rate": 7.126477782307379e-05, "loss": 3.1429, "step": 33845 }, { "epoch": 2.2999048783802145, "grad_norm": 2.2373688220977783, "learning_rate": 7.126053132219051e-05, "loss": 3.0425, "step": 33850 }, { "epoch": 2.3002445984508766, "grad_norm": 2.261141300201416, "learning_rate": 7.125628482130726e-05, "loss": 2.8044, "step": 33855 }, { "epoch": 2.300584318521538, "grad_norm": 2.178985118865967, "learning_rate": 7.125203832042397e-05, "loss": 2.5396, "step": 33860 }, { "epoch": 2.3009240385922, "grad_norm": 2.3004748821258545, "learning_rate": 7.12477918195407e-05, "loss": 3.0973, "step": 33865 }, { "epoch": 2.301263758662862, "grad_norm": 2.8068654537200928, "learning_rate": 7.124354531865744e-05, "loss": 2.5353, "step": 33870 }, { "epoch": 2.3016034787335236, "grad_norm": 2.3578546047210693, "learning_rate": 7.123929881777415e-05, "loss": 2.6653, "step": 33875 }, { "epoch": 2.301943198804185, "grad_norm": 2.5236282348632812, "learning_rate": 7.123505231689088e-05, "loss": 2.874, "step": 33880 }, { "epoch": 2.3022829188748473, "grad_norm": 2.3787760734558105, "learning_rate": 7.123080581600762e-05, "loss": 2.7655, "step": 33885 }, { "epoch": 2.302622638945509, "grad_norm": 2.4700238704681396, "learning_rate": 7.122655931512434e-05, "loss": 2.8297, "step": 33890 }, { "epoch": 2.3029623590161705, "grad_norm": 2.2989063262939453, "learning_rate": 7.122231281424107e-05, "loss": 2.9462, "step": 33895 }, { "epoch": 2.3033020790868326, "grad_norm": 2.6547038555145264, "learning_rate": 7.12180663133578e-05, "loss": 2.6824, "step": 33900 }, { "epoch": 2.3036417991574942, "grad_norm": 2.5606420040130615, "learning_rate": 7.121381981247452e-05, "loss": 2.9927, "step": 33905 }, { "epoch": 2.303981519228156, "grad_norm": 2.7266082763671875, "learning_rate": 7.120957331159125e-05, "loss": 2.8656, "step": 33910 }, { "epoch": 2.304321239298818, "grad_norm": 2.3635828495025635, "learning_rate": 7.120532681070798e-05, "loss": 3.1329, "step": 33915 }, { "epoch": 2.3046609593694796, "grad_norm": 2.7144851684570312, "learning_rate": 7.12010803098247e-05, "loss": 3.0532, "step": 33920 }, { "epoch": 2.305000679440141, "grad_norm": 2.716566562652588, "learning_rate": 7.119683380894143e-05, "loss": 3.0392, "step": 33925 }, { "epoch": 2.3053403995108033, "grad_norm": 2.258928060531616, "learning_rate": 7.119258730805816e-05, "loss": 2.8086, "step": 33930 }, { "epoch": 2.305680119581465, "grad_norm": 2.6322383880615234, "learning_rate": 7.118834080717489e-05, "loss": 2.9156, "step": 33935 }, { "epoch": 2.3060198396521265, "grad_norm": 2.2710142135620117, "learning_rate": 7.118409430629162e-05, "loss": 2.9396, "step": 33940 }, { "epoch": 2.3063595597227886, "grad_norm": 2.6053669452667236, "learning_rate": 7.117984780540835e-05, "loss": 2.9691, "step": 33945 }, { "epoch": 2.3066992797934502, "grad_norm": 2.794912815093994, "learning_rate": 7.117560130452507e-05, "loss": 2.7754, "step": 33950 }, { "epoch": 2.307038999864112, "grad_norm": 2.1708662509918213, "learning_rate": 7.11713548036418e-05, "loss": 2.7649, "step": 33955 }, { "epoch": 2.307378719934774, "grad_norm": 2.634655237197876, "learning_rate": 7.116710830275853e-05, "loss": 3.0541, "step": 33960 }, { "epoch": 2.3077184400054356, "grad_norm": 2.5157651901245117, "learning_rate": 7.116286180187526e-05, "loss": 2.8231, "step": 33965 }, { "epoch": 2.308058160076097, "grad_norm": 2.4562346935272217, "learning_rate": 7.115861530099199e-05, "loss": 2.611, "step": 33970 }, { "epoch": 2.3083978801467593, "grad_norm": 2.3899688720703125, "learning_rate": 7.115436880010871e-05, "loss": 2.9507, "step": 33975 }, { "epoch": 2.308737600217421, "grad_norm": 2.777217149734497, "learning_rate": 7.115012229922544e-05, "loss": 2.6746, "step": 33980 }, { "epoch": 2.3090773202880825, "grad_norm": 2.945129156112671, "learning_rate": 7.114587579834217e-05, "loss": 2.9719, "step": 33985 }, { "epoch": 2.3094170403587446, "grad_norm": 3.0835156440734863, "learning_rate": 7.11416292974589e-05, "loss": 2.7813, "step": 33990 }, { "epoch": 2.3097567604294063, "grad_norm": 2.7127490043640137, "learning_rate": 7.113738279657563e-05, "loss": 2.9853, "step": 33995 }, { "epoch": 2.310096480500068, "grad_norm": 3.142542839050293, "learning_rate": 7.113313629569235e-05, "loss": 2.7801, "step": 34000 }, { "epoch": 2.31043620057073, "grad_norm": 2.4139974117279053, "learning_rate": 7.112888979480908e-05, "loss": 3.1278, "step": 34005 }, { "epoch": 2.3107759206413916, "grad_norm": 2.723345994949341, "learning_rate": 7.112464329392581e-05, "loss": 3.0258, "step": 34010 }, { "epoch": 2.311115640712053, "grad_norm": 2.465451240539551, "learning_rate": 7.112039679304254e-05, "loss": 2.9895, "step": 34015 }, { "epoch": 2.311455360782715, "grad_norm": 2.4733362197875977, "learning_rate": 7.111615029215927e-05, "loss": 2.9006, "step": 34020 }, { "epoch": 2.311795080853377, "grad_norm": 2.4655091762542725, "learning_rate": 7.1111903791276e-05, "loss": 2.9466, "step": 34025 }, { "epoch": 2.3121348009240386, "grad_norm": 2.6101701259613037, "learning_rate": 7.110765729039271e-05, "loss": 2.6588, "step": 34030 }, { "epoch": 2.3124745209947, "grad_norm": 2.262708902359009, "learning_rate": 7.110341078950945e-05, "loss": 2.9966, "step": 34035 }, { "epoch": 2.3128142410653623, "grad_norm": 2.3123021125793457, "learning_rate": 7.109916428862618e-05, "loss": 3.0163, "step": 34040 }, { "epoch": 2.313153961136024, "grad_norm": 2.6490986347198486, "learning_rate": 7.109491778774289e-05, "loss": 2.9423, "step": 34045 }, { "epoch": 2.3134936812066855, "grad_norm": 2.7038750648498535, "learning_rate": 7.109067128685963e-05, "loss": 2.6715, "step": 34050 }, { "epoch": 2.3138334012773476, "grad_norm": 3.0394952297210693, "learning_rate": 7.108642478597636e-05, "loss": 2.6657, "step": 34055 }, { "epoch": 2.3141731213480092, "grad_norm": 2.4358348846435547, "learning_rate": 7.108217828509308e-05, "loss": 2.7694, "step": 34060 }, { "epoch": 2.314512841418671, "grad_norm": 2.3364017009735107, "learning_rate": 7.107793178420982e-05, "loss": 3.0931, "step": 34065 }, { "epoch": 2.314852561489333, "grad_norm": 2.834378719329834, "learning_rate": 7.107368528332655e-05, "loss": 2.7962, "step": 34070 }, { "epoch": 2.3151922815599946, "grad_norm": 2.201174736022949, "learning_rate": 7.106943878244326e-05, "loss": 2.648, "step": 34075 }, { "epoch": 2.315532001630656, "grad_norm": 3.1579744815826416, "learning_rate": 7.106519228156e-05, "loss": 2.8779, "step": 34080 }, { "epoch": 2.3158717217013183, "grad_norm": 2.3728339672088623, "learning_rate": 7.106094578067673e-05, "loss": 3.0648, "step": 34085 }, { "epoch": 2.31621144177198, "grad_norm": 2.3025660514831543, "learning_rate": 7.105669927979345e-05, "loss": 3.0211, "step": 34090 }, { "epoch": 2.3165511618426415, "grad_norm": 2.3758535385131836, "learning_rate": 7.105245277891019e-05, "loss": 3.108, "step": 34095 }, { "epoch": 2.3168908819133036, "grad_norm": 2.253108263015747, "learning_rate": 7.10482062780269e-05, "loss": 2.9757, "step": 34100 }, { "epoch": 2.3172306019839652, "grad_norm": 2.525343179702759, "learning_rate": 7.104395977714363e-05, "loss": 2.6353, "step": 34105 }, { "epoch": 2.317570322054627, "grad_norm": 2.078627824783325, "learning_rate": 7.103971327626037e-05, "loss": 2.8689, "step": 34110 }, { "epoch": 2.317910042125289, "grad_norm": 2.9714694023132324, "learning_rate": 7.103546677537709e-05, "loss": 3.0411, "step": 34115 }, { "epoch": 2.3182497621959506, "grad_norm": 3.262454032897949, "learning_rate": 7.103122027449383e-05, "loss": 3.0117, "step": 34120 }, { "epoch": 2.318589482266612, "grad_norm": 2.5837652683258057, "learning_rate": 7.102697377361055e-05, "loss": 2.8791, "step": 34125 }, { "epoch": 2.318929202337274, "grad_norm": 2.580111503601074, "learning_rate": 7.102272727272727e-05, "loss": 3.0266, "step": 34130 }, { "epoch": 2.319268922407936, "grad_norm": 2.4645164012908936, "learning_rate": 7.101848077184401e-05, "loss": 3.0575, "step": 34135 }, { "epoch": 2.3196086424785975, "grad_norm": 2.240555763244629, "learning_rate": 7.101423427096074e-05, "loss": 2.9808, "step": 34140 }, { "epoch": 2.319948362549259, "grad_norm": 2.9168052673339844, "learning_rate": 7.100998777007745e-05, "loss": 2.7487, "step": 34145 }, { "epoch": 2.3202880826199213, "grad_norm": 2.508246660232544, "learning_rate": 7.10057412691942e-05, "loss": 2.9243, "step": 34150 }, { "epoch": 2.320627802690583, "grad_norm": 2.457505702972412, "learning_rate": 7.100149476831092e-05, "loss": 2.7805, "step": 34155 }, { "epoch": 2.3209675227612445, "grad_norm": 2.9223756790161133, "learning_rate": 7.099724826742764e-05, "loss": 2.7046, "step": 34160 }, { "epoch": 2.3213072428319066, "grad_norm": 2.261190891265869, "learning_rate": 7.099300176654438e-05, "loss": 2.8148, "step": 34165 }, { "epoch": 2.3216469629025682, "grad_norm": 2.06142258644104, "learning_rate": 7.09887552656611e-05, "loss": 2.9093, "step": 34170 }, { "epoch": 2.32198668297323, "grad_norm": 2.3254172801971436, "learning_rate": 7.098450876477782e-05, "loss": 2.7803, "step": 34175 }, { "epoch": 2.322326403043892, "grad_norm": 2.258063316345215, "learning_rate": 7.098026226389456e-05, "loss": 2.7093, "step": 34180 }, { "epoch": 2.3226661231145536, "grad_norm": 3.1792619228363037, "learning_rate": 7.097601576301128e-05, "loss": 2.7252, "step": 34185 }, { "epoch": 2.323005843185215, "grad_norm": 2.4762306213378906, "learning_rate": 7.0971769262128e-05, "loss": 2.7708, "step": 34190 }, { "epoch": 2.3233455632558773, "grad_norm": 2.777348756790161, "learning_rate": 7.096752276124475e-05, "loss": 2.9391, "step": 34195 }, { "epoch": 2.323685283326539, "grad_norm": 2.5369091033935547, "learning_rate": 7.096327626036146e-05, "loss": 2.832, "step": 34200 }, { "epoch": 2.3240250033972005, "grad_norm": 2.8260910511016846, "learning_rate": 7.095902975947819e-05, "loss": 2.793, "step": 34205 }, { "epoch": 2.3243647234678626, "grad_norm": 2.193542003631592, "learning_rate": 7.095478325859493e-05, "loss": 3.3257, "step": 34210 }, { "epoch": 2.3247044435385242, "grad_norm": 2.1192986965179443, "learning_rate": 7.095053675771165e-05, "loss": 2.8477, "step": 34215 }, { "epoch": 2.325044163609186, "grad_norm": 2.6264851093292236, "learning_rate": 7.094629025682837e-05, "loss": 3.1252, "step": 34220 }, { "epoch": 2.325383883679848, "grad_norm": 2.8318257331848145, "learning_rate": 7.094204375594511e-05, "loss": 2.7435, "step": 34225 }, { "epoch": 2.3257236037505096, "grad_norm": 2.5293996334075928, "learning_rate": 7.093779725506183e-05, "loss": 3.0579, "step": 34230 }, { "epoch": 2.326063323821171, "grad_norm": 2.5022411346435547, "learning_rate": 7.093355075417856e-05, "loss": 2.8786, "step": 34235 }, { "epoch": 2.3264030438918333, "grad_norm": 2.090965986251831, "learning_rate": 7.09293042532953e-05, "loss": 2.9156, "step": 34240 }, { "epoch": 2.326742763962495, "grad_norm": 2.1572256088256836, "learning_rate": 7.092505775241201e-05, "loss": 2.7839, "step": 34245 }, { "epoch": 2.3270824840331565, "grad_norm": 3.1166539192199707, "learning_rate": 7.092081125152874e-05, "loss": 2.8552, "step": 34250 }, { "epoch": 2.3274222041038186, "grad_norm": 2.7175168991088867, "learning_rate": 7.091656475064547e-05, "loss": 2.8959, "step": 34255 }, { "epoch": 2.3277619241744802, "grad_norm": 2.73356294631958, "learning_rate": 7.09123182497622e-05, "loss": 2.8853, "step": 34260 }, { "epoch": 2.328101644245142, "grad_norm": 2.753854990005493, "learning_rate": 7.090892104905558e-05, "loss": 2.6925, "step": 34265 }, { "epoch": 2.328441364315804, "grad_norm": 2.866767406463623, "learning_rate": 7.090467454817231e-05, "loss": 2.9841, "step": 34270 }, { "epoch": 2.3287810843864656, "grad_norm": 2.5555315017700195, "learning_rate": 7.090042804728904e-05, "loss": 2.6517, "step": 34275 }, { "epoch": 2.329120804457127, "grad_norm": 2.26069974899292, "learning_rate": 7.089618154640576e-05, "loss": 3.2515, "step": 34280 }, { "epoch": 2.3294605245277893, "grad_norm": 2.1021358966827393, "learning_rate": 7.089193504552249e-05, "loss": 2.8279, "step": 34285 }, { "epoch": 2.329800244598451, "grad_norm": 2.628113031387329, "learning_rate": 7.088768854463922e-05, "loss": 2.7249, "step": 34290 }, { "epoch": 2.3301399646691126, "grad_norm": 2.0941641330718994, "learning_rate": 7.088344204375595e-05, "loss": 2.9333, "step": 34295 }, { "epoch": 2.3304796847397746, "grad_norm": 2.4794209003448486, "learning_rate": 7.087919554287268e-05, "loss": 2.7818, "step": 34300 }, { "epoch": 2.3308194048104363, "grad_norm": 2.5860445499420166, "learning_rate": 7.08749490419894e-05, "loss": 3.012, "step": 34305 }, { "epoch": 2.331159124881098, "grad_norm": 2.4684438705444336, "learning_rate": 7.087070254110613e-05, "loss": 3.0197, "step": 34310 }, { "epoch": 2.33149884495176, "grad_norm": 3.3360862731933594, "learning_rate": 7.086645604022286e-05, "loss": 2.8926, "step": 34315 }, { "epoch": 2.3318385650224216, "grad_norm": 2.590787410736084, "learning_rate": 7.086220953933959e-05, "loss": 2.6342, "step": 34320 }, { "epoch": 2.3321782850930832, "grad_norm": 2.646605968475342, "learning_rate": 7.085796303845632e-05, "loss": 2.7164, "step": 34325 }, { "epoch": 2.3325180051637453, "grad_norm": 2.7746493816375732, "learning_rate": 7.085371653757304e-05, "loss": 2.8453, "step": 34330 }, { "epoch": 2.332857725234407, "grad_norm": 2.554841995239258, "learning_rate": 7.084947003668977e-05, "loss": 2.6374, "step": 34335 }, { "epoch": 2.3331974453050686, "grad_norm": 2.8993215560913086, "learning_rate": 7.08452235358065e-05, "loss": 2.9047, "step": 34340 }, { "epoch": 2.3335371653757306, "grad_norm": 2.6300294399261475, "learning_rate": 7.084097703492323e-05, "loss": 2.794, "step": 34345 }, { "epoch": 2.3338768854463923, "grad_norm": 2.670795202255249, "learning_rate": 7.083673053403996e-05, "loss": 3.0014, "step": 34350 }, { "epoch": 2.334216605517054, "grad_norm": 2.758223056793213, "learning_rate": 7.083248403315668e-05, "loss": 2.8641, "step": 34355 }, { "epoch": 2.3345563255877155, "grad_norm": 2.2380244731903076, "learning_rate": 7.082823753227341e-05, "loss": 3.0237, "step": 34360 }, { "epoch": 2.3348960456583776, "grad_norm": 2.4583683013916016, "learning_rate": 7.082399103139014e-05, "loss": 2.8291, "step": 34365 }, { "epoch": 2.3352357657290392, "grad_norm": 2.723651170730591, "learning_rate": 7.081974453050687e-05, "loss": 2.9106, "step": 34370 }, { "epoch": 2.335575485799701, "grad_norm": 2.5610740184783936, "learning_rate": 7.08154980296236e-05, "loss": 3.0477, "step": 34375 }, { "epoch": 2.335915205870363, "grad_norm": 2.950927495956421, "learning_rate": 7.081125152874032e-05, "loss": 2.8319, "step": 34380 }, { "epoch": 2.3362549259410246, "grad_norm": 2.4604146480560303, "learning_rate": 7.080700502785705e-05, "loss": 2.9264, "step": 34385 }, { "epoch": 2.336594646011686, "grad_norm": 2.8737692832946777, "learning_rate": 7.080275852697378e-05, "loss": 2.7706, "step": 34390 }, { "epoch": 2.3369343660823483, "grad_norm": 3.378516435623169, "learning_rate": 7.079851202609051e-05, "loss": 2.9104, "step": 34395 }, { "epoch": 2.33727408615301, "grad_norm": 3.263104200363159, "learning_rate": 7.079426552520724e-05, "loss": 3.1305, "step": 34400 }, { "epoch": 2.3376138062236715, "grad_norm": 2.3031599521636963, "learning_rate": 7.079001902432396e-05, "loss": 2.9942, "step": 34405 }, { "epoch": 2.3379535262943336, "grad_norm": 2.3622806072235107, "learning_rate": 7.078577252344069e-05, "loss": 2.9843, "step": 34410 }, { "epoch": 2.3382932463649952, "grad_norm": 3.0826754570007324, "learning_rate": 7.078152602255742e-05, "loss": 2.8902, "step": 34415 }, { "epoch": 2.338632966435657, "grad_norm": 3.3753864765167236, "learning_rate": 7.077727952167415e-05, "loss": 2.7516, "step": 34420 }, { "epoch": 2.338972686506319, "grad_norm": 2.2426023483276367, "learning_rate": 7.077303302079086e-05, "loss": 2.6478, "step": 34425 }, { "epoch": 2.3393124065769806, "grad_norm": 2.315877676010132, "learning_rate": 7.07687865199076e-05, "loss": 2.9567, "step": 34430 }, { "epoch": 2.339652126647642, "grad_norm": 2.1361262798309326, "learning_rate": 7.076454001902433e-05, "loss": 3.086, "step": 34435 }, { "epoch": 2.3399918467183043, "grad_norm": 3.3793654441833496, "learning_rate": 7.076029351814105e-05, "loss": 2.6959, "step": 34440 }, { "epoch": 2.340331566788966, "grad_norm": 2.507476568222046, "learning_rate": 7.075604701725779e-05, "loss": 2.9376, "step": 34445 }, { "epoch": 2.3406712868596276, "grad_norm": 2.858837366104126, "learning_rate": 7.075180051637452e-05, "loss": 2.9585, "step": 34450 }, { "epoch": 2.3410110069302896, "grad_norm": 2.601497173309326, "learning_rate": 7.074755401549123e-05, "loss": 3.0959, "step": 34455 }, { "epoch": 2.3413507270009513, "grad_norm": 3.1965508460998535, "learning_rate": 7.074330751460797e-05, "loss": 2.9546, "step": 34460 }, { "epoch": 2.341690447071613, "grad_norm": 2.5353057384490967, "learning_rate": 7.07390610137247e-05, "loss": 2.9041, "step": 34465 }, { "epoch": 2.3420301671422745, "grad_norm": 1.7086178064346313, "learning_rate": 7.073481451284141e-05, "loss": 2.8519, "step": 34470 }, { "epoch": 2.3423698872129366, "grad_norm": 2.8152029514312744, "learning_rate": 7.073056801195816e-05, "loss": 2.6963, "step": 34475 }, { "epoch": 2.3427096072835982, "grad_norm": 2.3477418422698975, "learning_rate": 7.072632151107488e-05, "loss": 2.9066, "step": 34480 }, { "epoch": 2.34304932735426, "grad_norm": 2.9404959678649902, "learning_rate": 7.07220750101916e-05, "loss": 2.8291, "step": 34485 }, { "epoch": 2.343389047424922, "grad_norm": 2.7910149097442627, "learning_rate": 7.071782850930834e-05, "loss": 3.0187, "step": 34490 }, { "epoch": 2.3437287674955836, "grad_norm": 2.5161490440368652, "learning_rate": 7.071358200842505e-05, "loss": 2.9051, "step": 34495 }, { "epoch": 2.344068487566245, "grad_norm": 2.566486120223999, "learning_rate": 7.070933550754178e-05, "loss": 2.7364, "step": 34500 }, { "epoch": 2.3444082076369073, "grad_norm": 2.621084213256836, "learning_rate": 7.070508900665852e-05, "loss": 2.8163, "step": 34505 }, { "epoch": 2.344747927707569, "grad_norm": 2.709444999694824, "learning_rate": 7.070084250577524e-05, "loss": 2.8932, "step": 34510 }, { "epoch": 2.3450876477782305, "grad_norm": 2.692056179046631, "learning_rate": 7.069659600489197e-05, "loss": 2.7742, "step": 34515 }, { "epoch": 2.3454273678488926, "grad_norm": 2.6077077388763428, "learning_rate": 7.069234950400871e-05, "loss": 2.6064, "step": 34520 }, { "epoch": 2.3457670879195542, "grad_norm": 2.4959874153137207, "learning_rate": 7.068810300312542e-05, "loss": 2.9578, "step": 34525 }, { "epoch": 2.346106807990216, "grad_norm": 2.457660675048828, "learning_rate": 7.068385650224215e-05, "loss": 3.0708, "step": 34530 }, { "epoch": 2.346446528060878, "grad_norm": 2.701500415802002, "learning_rate": 7.067961000135889e-05, "loss": 2.9593, "step": 34535 }, { "epoch": 2.3467862481315396, "grad_norm": 2.6554887294769287, "learning_rate": 7.06753635004756e-05, "loss": 2.8099, "step": 34540 }, { "epoch": 2.347125968202201, "grad_norm": 2.4060919284820557, "learning_rate": 7.067111699959233e-05, "loss": 2.8869, "step": 34545 }, { "epoch": 2.3474656882728633, "grad_norm": 2.5635411739349365, "learning_rate": 7.066687049870908e-05, "loss": 2.9909, "step": 34550 }, { "epoch": 2.347805408343525, "grad_norm": 2.57822322845459, "learning_rate": 7.066262399782579e-05, "loss": 2.9703, "step": 34555 }, { "epoch": 2.3481451284141865, "grad_norm": 2.588759183883667, "learning_rate": 7.065837749694252e-05, "loss": 2.9997, "step": 34560 }, { "epoch": 2.3484848484848486, "grad_norm": 2.6414830684661865, "learning_rate": 7.065413099605925e-05, "loss": 3.0769, "step": 34565 }, { "epoch": 2.3488245685555103, "grad_norm": 2.3212831020355225, "learning_rate": 7.064988449517597e-05, "loss": 2.7674, "step": 34570 }, { "epoch": 2.349164288626172, "grad_norm": 2.8408432006835938, "learning_rate": 7.06456379942927e-05, "loss": 2.7318, "step": 34575 }, { "epoch": 2.349504008696834, "grad_norm": 2.4794816970825195, "learning_rate": 7.064139149340943e-05, "loss": 3.0151, "step": 34580 }, { "epoch": 2.3498437287674956, "grad_norm": 2.977769136428833, "learning_rate": 7.063714499252616e-05, "loss": 2.9193, "step": 34585 }, { "epoch": 2.350183448838157, "grad_norm": 2.171194076538086, "learning_rate": 7.063289849164289e-05, "loss": 3.1434, "step": 34590 }, { "epoch": 2.3505231689088193, "grad_norm": 2.487334728240967, "learning_rate": 7.062865199075961e-05, "loss": 2.7264, "step": 34595 }, { "epoch": 2.350862888979481, "grad_norm": 2.2434961795806885, "learning_rate": 7.062440548987634e-05, "loss": 3.02, "step": 34600 }, { "epoch": 2.3512026090501426, "grad_norm": 3.00331974029541, "learning_rate": 7.062015898899307e-05, "loss": 2.8278, "step": 34605 }, { "epoch": 2.3515423291208046, "grad_norm": 2.2123804092407227, "learning_rate": 7.06159124881098e-05, "loss": 3.0067, "step": 34610 }, { "epoch": 2.3518820491914663, "grad_norm": 2.424043893814087, "learning_rate": 7.061166598722653e-05, "loss": 2.9332, "step": 34615 }, { "epoch": 2.352221769262128, "grad_norm": 2.4622995853424072, "learning_rate": 7.060826878651991e-05, "loss": 2.8361, "step": 34620 }, { "epoch": 2.35256148933279, "grad_norm": 2.6604418754577637, "learning_rate": 7.060402228563664e-05, "loss": 2.866, "step": 34625 }, { "epoch": 2.3529012094034516, "grad_norm": 2.5710318088531494, "learning_rate": 7.059977578475337e-05, "loss": 3.0238, "step": 34630 }, { "epoch": 2.3532409294741132, "grad_norm": 3.210592746734619, "learning_rate": 7.05955292838701e-05, "loss": 3.1189, "step": 34635 }, { "epoch": 2.3535806495447753, "grad_norm": 2.2843761444091797, "learning_rate": 7.059128278298682e-05, "loss": 2.8119, "step": 34640 }, { "epoch": 2.353920369615437, "grad_norm": 2.031383514404297, "learning_rate": 7.058703628210355e-05, "loss": 2.8845, "step": 34645 }, { "epoch": 2.3542600896860986, "grad_norm": 2.3393239974975586, "learning_rate": 7.058278978122028e-05, "loss": 2.85, "step": 34650 }, { "epoch": 2.3545998097567606, "grad_norm": 3.140007972717285, "learning_rate": 7.0578543280337e-05, "loss": 3.0588, "step": 34655 }, { "epoch": 2.3549395298274223, "grad_norm": 2.22468900680542, "learning_rate": 7.057429677945373e-05, "loss": 2.9224, "step": 34660 }, { "epoch": 2.355279249898084, "grad_norm": 2.8856472969055176, "learning_rate": 7.057005027857046e-05, "loss": 2.9433, "step": 34665 }, { "epoch": 2.355618969968746, "grad_norm": 2.4705569744110107, "learning_rate": 7.056580377768719e-05, "loss": 2.9765, "step": 34670 }, { "epoch": 2.3559586900394076, "grad_norm": 2.3354599475860596, "learning_rate": 7.056155727680392e-05, "loss": 2.8684, "step": 34675 }, { "epoch": 2.3562984101100692, "grad_norm": 3.158928155899048, "learning_rate": 7.055731077592065e-05, "loss": 2.8186, "step": 34680 }, { "epoch": 2.3566381301807313, "grad_norm": 2.2252073287963867, "learning_rate": 7.055306427503737e-05, "loss": 2.9741, "step": 34685 }, { "epoch": 2.356977850251393, "grad_norm": 2.151014566421509, "learning_rate": 7.05488177741541e-05, "loss": 2.8004, "step": 34690 }, { "epoch": 2.3573175703220546, "grad_norm": 2.267813205718994, "learning_rate": 7.054457127327083e-05, "loss": 2.7901, "step": 34695 }, { "epoch": 2.357657290392716, "grad_norm": 2.7214906215667725, "learning_rate": 7.054032477238756e-05, "loss": 3.0882, "step": 34700 }, { "epoch": 2.3579970104633783, "grad_norm": 2.603743314743042, "learning_rate": 7.053607827150429e-05, "loss": 2.8008, "step": 34705 }, { "epoch": 2.35833673053404, "grad_norm": 2.5437684059143066, "learning_rate": 7.053183177062101e-05, "loss": 2.7384, "step": 34710 }, { "epoch": 2.3586764506047015, "grad_norm": 2.7358975410461426, "learning_rate": 7.052758526973774e-05, "loss": 2.6488, "step": 34715 }, { "epoch": 2.3590161706753636, "grad_norm": 2.229494333267212, "learning_rate": 7.052333876885447e-05, "loss": 3.0669, "step": 34720 }, { "epoch": 2.3593558907460253, "grad_norm": 2.5671639442443848, "learning_rate": 7.05190922679712e-05, "loss": 2.8208, "step": 34725 }, { "epoch": 2.359695610816687, "grad_norm": 2.82729172706604, "learning_rate": 7.051484576708793e-05, "loss": 2.7834, "step": 34730 }, { "epoch": 2.360035330887349, "grad_norm": 2.134284019470215, "learning_rate": 7.051059926620464e-05, "loss": 2.8592, "step": 34735 }, { "epoch": 2.3603750509580106, "grad_norm": 2.6359963417053223, "learning_rate": 7.050635276532138e-05, "loss": 2.5675, "step": 34740 }, { "epoch": 2.360714771028672, "grad_norm": 2.2443127632141113, "learning_rate": 7.050210626443811e-05, "loss": 2.9256, "step": 34745 }, { "epoch": 2.3610544910993343, "grad_norm": 2.3838393688201904, "learning_rate": 7.049785976355482e-05, "loss": 2.9419, "step": 34750 }, { "epoch": 2.361394211169996, "grad_norm": 2.286731481552124, "learning_rate": 7.049361326267157e-05, "loss": 2.9402, "step": 34755 }, { "epoch": 2.3617339312406576, "grad_norm": 2.372781753540039, "learning_rate": 7.04893667617883e-05, "loss": 2.9946, "step": 34760 }, { "epoch": 2.3620736513113196, "grad_norm": 2.4940130710601807, "learning_rate": 7.048512026090501e-05, "loss": 2.8405, "step": 34765 }, { "epoch": 2.3624133713819813, "grad_norm": 2.6241533756256104, "learning_rate": 7.048087376002175e-05, "loss": 2.6742, "step": 34770 }, { "epoch": 2.362753091452643, "grad_norm": 2.936385154724121, "learning_rate": 7.047662725913848e-05, "loss": 2.8912, "step": 34775 }, { "epoch": 2.363092811523305, "grad_norm": 2.523900032043457, "learning_rate": 7.047238075825519e-05, "loss": 2.7687, "step": 34780 }, { "epoch": 2.3634325315939666, "grad_norm": 2.479990243911743, "learning_rate": 7.046813425737193e-05, "loss": 2.8283, "step": 34785 }, { "epoch": 2.3637722516646282, "grad_norm": 3.4924445152282715, "learning_rate": 7.046388775648866e-05, "loss": 3.0545, "step": 34790 }, { "epoch": 2.3641119717352903, "grad_norm": 1.9932026863098145, "learning_rate": 7.045964125560538e-05, "loss": 2.8188, "step": 34795 }, { "epoch": 2.364451691805952, "grad_norm": 3.0220398902893066, "learning_rate": 7.045539475472212e-05, "loss": 2.9753, "step": 34800 }, { "epoch": 2.3647914118766136, "grad_norm": 2.0858099460601807, "learning_rate": 7.045114825383885e-05, "loss": 2.9209, "step": 34805 }, { "epoch": 2.365131131947275, "grad_norm": 2.6817047595977783, "learning_rate": 7.044690175295556e-05, "loss": 2.9403, "step": 34810 }, { "epoch": 2.3654708520179373, "grad_norm": 2.6224255561828613, "learning_rate": 7.04426552520723e-05, "loss": 2.6147, "step": 34815 }, { "epoch": 2.365810572088599, "grad_norm": 2.5610463619232178, "learning_rate": 7.043840875118902e-05, "loss": 2.5895, "step": 34820 }, { "epoch": 2.3661502921592605, "grad_norm": 2.316950798034668, "learning_rate": 7.043416225030574e-05, "loss": 2.5098, "step": 34825 }, { "epoch": 2.3664900122299226, "grad_norm": 2.282607078552246, "learning_rate": 7.042991574942249e-05, "loss": 2.6223, "step": 34830 }, { "epoch": 2.3668297323005842, "grad_norm": 2.3446457386016846, "learning_rate": 7.04256692485392e-05, "loss": 3.0913, "step": 34835 }, { "epoch": 2.367169452371246, "grad_norm": 2.5094339847564697, "learning_rate": 7.042142274765593e-05, "loss": 3.0641, "step": 34840 }, { "epoch": 2.367509172441908, "grad_norm": 2.8461568355560303, "learning_rate": 7.041717624677267e-05, "loss": 2.8861, "step": 34845 }, { "epoch": 2.3678488925125696, "grad_norm": 2.2054810523986816, "learning_rate": 7.041292974588938e-05, "loss": 2.9976, "step": 34850 }, { "epoch": 2.368188612583231, "grad_norm": 2.2506179809570312, "learning_rate": 7.040868324500611e-05, "loss": 2.7335, "step": 34855 }, { "epoch": 2.3685283326538933, "grad_norm": 2.0371556282043457, "learning_rate": 7.040443674412285e-05, "loss": 2.8492, "step": 34860 }, { "epoch": 2.368868052724555, "grad_norm": 2.5143632888793945, "learning_rate": 7.040019024323957e-05, "loss": 2.8334, "step": 34865 }, { "epoch": 2.3692077727952165, "grad_norm": 2.9262382984161377, "learning_rate": 7.039594374235631e-05, "loss": 2.9527, "step": 34870 }, { "epoch": 2.3695474928658786, "grad_norm": 2.5030689239501953, "learning_rate": 7.039169724147304e-05, "loss": 3.0629, "step": 34875 }, { "epoch": 2.3698872129365403, "grad_norm": 2.7528388500213623, "learning_rate": 7.038745074058975e-05, "loss": 2.8406, "step": 34880 }, { "epoch": 2.370226933007202, "grad_norm": 2.6048178672790527, "learning_rate": 7.03832042397065e-05, "loss": 2.8999, "step": 34885 }, { "epoch": 2.370566653077864, "grad_norm": 2.229081153869629, "learning_rate": 7.037895773882321e-05, "loss": 2.8877, "step": 34890 }, { "epoch": 2.3709063731485256, "grad_norm": 2.4746718406677246, "learning_rate": 7.037471123793994e-05, "loss": 2.8643, "step": 34895 }, { "epoch": 2.3712460932191872, "grad_norm": 2.5018646717071533, "learning_rate": 7.037046473705668e-05, "loss": 2.9986, "step": 34900 }, { "epoch": 2.3715858132898493, "grad_norm": 2.7964279651641846, "learning_rate": 7.036621823617339e-05, "loss": 2.8603, "step": 34905 }, { "epoch": 2.371925533360511, "grad_norm": 2.9289073944091797, "learning_rate": 7.036197173529012e-05, "loss": 2.6558, "step": 34910 }, { "epoch": 2.3722652534311726, "grad_norm": 2.2120940685272217, "learning_rate": 7.035772523440686e-05, "loss": 3.0276, "step": 34915 }, { "epoch": 2.3726049735018346, "grad_norm": 2.83843731880188, "learning_rate": 7.035347873352358e-05, "loss": 2.8591, "step": 34920 }, { "epoch": 2.3729446935724963, "grad_norm": 3.1187357902526855, "learning_rate": 7.03492322326403e-05, "loss": 2.8378, "step": 34925 }, { "epoch": 2.373284413643158, "grad_norm": 2.3623311519622803, "learning_rate": 7.034498573175705e-05, "loss": 2.99, "step": 34930 }, { "epoch": 2.37362413371382, "grad_norm": 3.107548952102661, "learning_rate": 7.034073923087376e-05, "loss": 2.9462, "step": 34935 }, { "epoch": 2.3739638537844816, "grad_norm": 2.5312345027923584, "learning_rate": 7.033649272999049e-05, "loss": 3.087, "step": 34940 }, { "epoch": 2.3743035738551432, "grad_norm": 2.1021296977996826, "learning_rate": 7.033224622910723e-05, "loss": 2.7072, "step": 34945 }, { "epoch": 2.3746432939258053, "grad_norm": 2.6371450424194336, "learning_rate": 7.032799972822394e-05, "loss": 3.0352, "step": 34950 }, { "epoch": 2.374983013996467, "grad_norm": 2.9401698112487793, "learning_rate": 7.032375322734067e-05, "loss": 2.8383, "step": 34955 }, { "epoch": 2.3753227340671286, "grad_norm": 2.43183970451355, "learning_rate": 7.03195067264574e-05, "loss": 2.9162, "step": 34960 }, { "epoch": 2.3756624541377906, "grad_norm": 2.9700193405151367, "learning_rate": 7.031526022557413e-05, "loss": 3.0964, "step": 34965 }, { "epoch": 2.3760021742084523, "grad_norm": 2.5709939002990723, "learning_rate": 7.031101372469086e-05, "loss": 2.9114, "step": 34970 }, { "epoch": 2.376341894279114, "grad_norm": 2.4931843280792236, "learning_rate": 7.030676722380758e-05, "loss": 2.8589, "step": 34975 }, { "epoch": 2.376681614349776, "grad_norm": 2.565371513366699, "learning_rate": 7.030252072292431e-05, "loss": 2.7652, "step": 34980 }, { "epoch": 2.3770213344204376, "grad_norm": 2.473902940750122, "learning_rate": 7.029827422204104e-05, "loss": 2.761, "step": 34985 }, { "epoch": 2.3773610544910992, "grad_norm": 2.27754807472229, "learning_rate": 7.029402772115777e-05, "loss": 3.1908, "step": 34990 }, { "epoch": 2.3777007745617613, "grad_norm": 2.640387773513794, "learning_rate": 7.02897812202745e-05, "loss": 2.8169, "step": 34995 }, { "epoch": 2.378040494632423, "grad_norm": 2.683617353439331, "learning_rate": 7.028553471939122e-05, "loss": 2.9573, "step": 35000 }, { "epoch": 2.3783802147030846, "grad_norm": 2.8778738975524902, "learning_rate": 7.028128821850795e-05, "loss": 2.594, "step": 35005 }, { "epoch": 2.3787199347737467, "grad_norm": 2.4887609481811523, "learning_rate": 7.027704171762468e-05, "loss": 3.0278, "step": 35010 }, { "epoch": 2.3790596548444083, "grad_norm": 2.7506797313690186, "learning_rate": 7.027279521674141e-05, "loss": 2.7833, "step": 35015 }, { "epoch": 2.37939937491507, "grad_norm": 2.3488357067108154, "learning_rate": 7.026854871585814e-05, "loss": 2.8063, "step": 35020 }, { "epoch": 2.379739094985732, "grad_norm": 3.231802463531494, "learning_rate": 7.026430221497486e-05, "loss": 2.9822, "step": 35025 }, { "epoch": 2.3800788150563936, "grad_norm": 3.4267241954803467, "learning_rate": 7.026005571409159e-05, "loss": 2.9886, "step": 35030 }, { "epoch": 2.3804185351270553, "grad_norm": 2.4523873329162598, "learning_rate": 7.025580921320832e-05, "loss": 3.0519, "step": 35035 }, { "epoch": 2.380758255197717, "grad_norm": 3.380687952041626, "learning_rate": 7.025156271232505e-05, "loss": 3.0582, "step": 35040 }, { "epoch": 2.381097975268379, "grad_norm": 2.9874401092529297, "learning_rate": 7.024731621144178e-05, "loss": 2.5456, "step": 35045 }, { "epoch": 2.3814376953390406, "grad_norm": 2.186332941055298, "learning_rate": 7.02430697105585e-05, "loss": 2.642, "step": 35050 }, { "epoch": 2.3817774154097022, "grad_norm": 3.18216609954834, "learning_rate": 7.023882320967523e-05, "loss": 2.8831, "step": 35055 }, { "epoch": 2.3821171354803643, "grad_norm": 2.3590075969696045, "learning_rate": 7.023457670879196e-05, "loss": 2.8258, "step": 35060 }, { "epoch": 2.382456855551026, "grad_norm": 2.156984329223633, "learning_rate": 7.023033020790869e-05, "loss": 2.8981, "step": 35065 }, { "epoch": 2.3827965756216876, "grad_norm": 3.155792236328125, "learning_rate": 7.022608370702542e-05, "loss": 2.9315, "step": 35070 }, { "epoch": 2.3831362956923496, "grad_norm": 2.882294178009033, "learning_rate": 7.022183720614214e-05, "loss": 2.8122, "step": 35075 }, { "epoch": 2.3834760157630113, "grad_norm": 2.609004259109497, "learning_rate": 7.021759070525887e-05, "loss": 2.8451, "step": 35080 }, { "epoch": 2.383815735833673, "grad_norm": 3.035541534423828, "learning_rate": 7.02133442043756e-05, "loss": 2.7457, "step": 35085 }, { "epoch": 2.384155455904335, "grad_norm": 2.86891770362854, "learning_rate": 7.020909770349231e-05, "loss": 2.7824, "step": 35090 }, { "epoch": 2.3844951759749966, "grad_norm": 3.03977370262146, "learning_rate": 7.020485120260906e-05, "loss": 3.0968, "step": 35095 }, { "epoch": 2.3848348960456582, "grad_norm": 2.299593210220337, "learning_rate": 7.020060470172578e-05, "loss": 3.0813, "step": 35100 }, { "epoch": 2.3851746161163203, "grad_norm": 2.1191513538360596, "learning_rate": 7.01963582008425e-05, "loss": 3.1961, "step": 35105 }, { "epoch": 2.385514336186982, "grad_norm": 2.488736152648926, "learning_rate": 7.019211169995924e-05, "loss": 2.9869, "step": 35110 }, { "epoch": 2.3858540562576436, "grad_norm": 2.304994821548462, "learning_rate": 7.018786519907597e-05, "loss": 2.9287, "step": 35115 }, { "epoch": 2.3861937763283056, "grad_norm": 2.701179027557373, "learning_rate": 7.018361869819268e-05, "loss": 3.0533, "step": 35120 }, { "epoch": 2.3865334963989673, "grad_norm": 2.213292121887207, "learning_rate": 7.017937219730942e-05, "loss": 3.0357, "step": 35125 }, { "epoch": 2.386873216469629, "grad_norm": 2.4117133617401123, "learning_rate": 7.017512569642615e-05, "loss": 3.1747, "step": 35130 }, { "epoch": 2.387212936540291, "grad_norm": 2.389124631881714, "learning_rate": 7.017087919554287e-05, "loss": 2.853, "step": 35135 }, { "epoch": 2.3875526566109526, "grad_norm": 2.151144027709961, "learning_rate": 7.016663269465961e-05, "loss": 2.7346, "step": 35140 }, { "epoch": 2.3878923766816142, "grad_norm": 2.625351905822754, "learning_rate": 7.016238619377634e-05, "loss": 2.8885, "step": 35145 }, { "epoch": 2.388232096752276, "grad_norm": 3.2861478328704834, "learning_rate": 7.015813969289305e-05, "loss": 2.8362, "step": 35150 }, { "epoch": 2.388571816822938, "grad_norm": 2.1395816802978516, "learning_rate": 7.015389319200979e-05, "loss": 2.9644, "step": 35155 }, { "epoch": 2.3889115368935996, "grad_norm": 2.6700897216796875, "learning_rate": 7.014964669112651e-05, "loss": 3.0158, "step": 35160 }, { "epoch": 2.389251256964261, "grad_norm": 2.8132481575012207, "learning_rate": 7.014540019024324e-05, "loss": 2.9523, "step": 35165 }, { "epoch": 2.3895909770349233, "grad_norm": 2.6887552738189697, "learning_rate": 7.014115368935998e-05, "loss": 2.768, "step": 35170 }, { "epoch": 2.389930697105585, "grad_norm": 2.512615919113159, "learning_rate": 7.013690718847669e-05, "loss": 3.0026, "step": 35175 }, { "epoch": 2.3902704171762466, "grad_norm": 2.908093214035034, "learning_rate": 7.013266068759342e-05, "loss": 2.7553, "step": 35180 }, { "epoch": 2.3906101372469086, "grad_norm": 2.8729748725891113, "learning_rate": 7.012841418671016e-05, "loss": 2.8429, "step": 35185 }, { "epoch": 2.3909498573175703, "grad_norm": 2.3159680366516113, "learning_rate": 7.012416768582688e-05, "loss": 2.8827, "step": 35190 }, { "epoch": 2.391289577388232, "grad_norm": 2.9218711853027344, "learning_rate": 7.01199211849436e-05, "loss": 3.0433, "step": 35195 }, { "epoch": 2.391629297458894, "grad_norm": 2.8214924335479736, "learning_rate": 7.011567468406034e-05, "loss": 3.1996, "step": 35200 }, { "epoch": 2.3919690175295556, "grad_norm": 2.7502284049987793, "learning_rate": 7.011142818317706e-05, "loss": 2.5223, "step": 35205 }, { "epoch": 2.3923087376002172, "grad_norm": 2.7930655479431152, "learning_rate": 7.01071816822938e-05, "loss": 3.4296, "step": 35210 }, { "epoch": 2.3926484576708793, "grad_norm": 2.6804404258728027, "learning_rate": 7.010293518141053e-05, "loss": 2.7994, "step": 35215 }, { "epoch": 2.392988177741541, "grad_norm": 2.3044817447662354, "learning_rate": 7.009868868052724e-05, "loss": 2.895, "step": 35220 }, { "epoch": 2.3933278978122026, "grad_norm": 2.724813222885132, "learning_rate": 7.009444217964398e-05, "loss": 3.1755, "step": 35225 }, { "epoch": 2.3936676178828646, "grad_norm": 2.8181862831115723, "learning_rate": 7.009019567876071e-05, "loss": 3.0028, "step": 35230 }, { "epoch": 2.3940073379535263, "grad_norm": 2.7648284435272217, "learning_rate": 7.008594917787743e-05, "loss": 2.7241, "step": 35235 }, { "epoch": 2.394347058024188, "grad_norm": 2.9353692531585693, "learning_rate": 7.008170267699417e-05, "loss": 2.8355, "step": 35240 }, { "epoch": 2.39468677809485, "grad_norm": 2.361222505569458, "learning_rate": 7.007745617611088e-05, "loss": 2.8034, "step": 35245 }, { "epoch": 2.3950264981655116, "grad_norm": 2.793325185775757, "learning_rate": 7.007320967522761e-05, "loss": 2.7581, "step": 35250 }, { "epoch": 2.3953662182361732, "grad_norm": 2.171382188796997, "learning_rate": 7.006896317434435e-05, "loss": 2.7134, "step": 35255 }, { "epoch": 2.3957059383068353, "grad_norm": 3.468437671661377, "learning_rate": 7.006471667346107e-05, "loss": 2.8216, "step": 35260 }, { "epoch": 2.396045658377497, "grad_norm": 2.464850664138794, "learning_rate": 7.00604701725778e-05, "loss": 3.1752, "step": 35265 }, { "epoch": 2.3963853784481586, "grad_norm": 2.6258625984191895, "learning_rate": 7.005622367169454e-05, "loss": 3.0151, "step": 35270 }, { "epoch": 2.3967250985188207, "grad_norm": 2.383960723876953, "learning_rate": 7.005197717081125e-05, "loss": 2.8177, "step": 35275 }, { "epoch": 2.3970648185894823, "grad_norm": 2.3152294158935547, "learning_rate": 7.004773066992798e-05, "loss": 2.7848, "step": 35280 }, { "epoch": 2.397404538660144, "grad_norm": 2.6042919158935547, "learning_rate": 7.004348416904472e-05, "loss": 3.0522, "step": 35285 }, { "epoch": 2.397744258730806, "grad_norm": 2.628201723098755, "learning_rate": 7.003923766816144e-05, "loss": 3.0452, "step": 35290 }, { "epoch": 2.3980839788014676, "grad_norm": 2.1670613288879395, "learning_rate": 7.003499116727816e-05, "loss": 3.0142, "step": 35295 }, { "epoch": 2.3984236988721293, "grad_norm": 2.4588780403137207, "learning_rate": 7.00307446663949e-05, "loss": 2.8846, "step": 35300 }, { "epoch": 2.3987634189427913, "grad_norm": 2.3068199157714844, "learning_rate": 7.002649816551162e-05, "loss": 3.0823, "step": 35305 }, { "epoch": 2.399103139013453, "grad_norm": 2.983576774597168, "learning_rate": 7.002225166462835e-05, "loss": 2.7105, "step": 35310 }, { "epoch": 2.3994428590841146, "grad_norm": 3.062408447265625, "learning_rate": 7.001800516374508e-05, "loss": 2.6718, "step": 35315 }, { "epoch": 2.3997825791547767, "grad_norm": 2.4704747200012207, "learning_rate": 7.00137586628618e-05, "loss": 2.8896, "step": 35320 }, { "epoch": 2.4001222992254383, "grad_norm": 3.0728890895843506, "learning_rate": 7.000951216197853e-05, "loss": 2.8956, "step": 35325 }, { "epoch": 2.4004620192961, "grad_norm": 2.2266340255737305, "learning_rate": 7.000526566109526e-05, "loss": 2.8365, "step": 35330 }, { "epoch": 2.400801739366762, "grad_norm": 2.348817825317383, "learning_rate": 7.000101916021199e-05, "loss": 3.1044, "step": 35335 }, { "epoch": 2.4011414594374236, "grad_norm": 2.404682159423828, "learning_rate": 6.999677265932872e-05, "loss": 2.9011, "step": 35340 }, { "epoch": 2.4014811795080853, "grad_norm": 2.6520276069641113, "learning_rate": 6.999252615844544e-05, "loss": 2.9033, "step": 35345 }, { "epoch": 2.4018208995787473, "grad_norm": 3.1509225368499756, "learning_rate": 6.998827965756217e-05, "loss": 2.6892, "step": 35350 }, { "epoch": 2.402160619649409, "grad_norm": 2.8803300857543945, "learning_rate": 6.99840331566789e-05, "loss": 2.811, "step": 35355 }, { "epoch": 2.4025003397200706, "grad_norm": 3.7315073013305664, "learning_rate": 6.997978665579563e-05, "loss": 2.809, "step": 35360 }, { "epoch": 2.4028400597907327, "grad_norm": 3.1501524448394775, "learning_rate": 6.997554015491236e-05, "loss": 2.7987, "step": 35365 }, { "epoch": 2.4031797798613943, "grad_norm": 2.7156310081481934, "learning_rate": 6.997129365402908e-05, "loss": 2.9991, "step": 35370 }, { "epoch": 2.403519499932056, "grad_norm": 2.1509265899658203, "learning_rate": 6.996704715314581e-05, "loss": 2.7195, "step": 35375 }, { "epoch": 2.4038592200027176, "grad_norm": 2.544740915298462, "learning_rate": 6.996280065226254e-05, "loss": 2.7569, "step": 35380 }, { "epoch": 2.4041989400733796, "grad_norm": 2.563994884490967, "learning_rate": 6.995855415137927e-05, "loss": 2.9555, "step": 35385 }, { "epoch": 2.4045386601440413, "grad_norm": 3.081427574157715, "learning_rate": 6.9954307650496e-05, "loss": 2.9096, "step": 35390 }, { "epoch": 2.404878380214703, "grad_norm": 2.60455322265625, "learning_rate": 6.995006114961272e-05, "loss": 2.7894, "step": 35395 }, { "epoch": 2.405218100285365, "grad_norm": 2.8225345611572266, "learning_rate": 6.994581464872945e-05, "loss": 2.9205, "step": 35400 }, { "epoch": 2.4055578203560266, "grad_norm": 3.1617965698242188, "learning_rate": 6.994156814784618e-05, "loss": 3.1462, "step": 35405 }, { "epoch": 2.4058975404266882, "grad_norm": 2.963561534881592, "learning_rate": 6.993732164696291e-05, "loss": 2.7297, "step": 35410 }, { "epoch": 2.4062372604973503, "grad_norm": 2.312760353088379, "learning_rate": 6.993307514607964e-05, "loss": 2.6756, "step": 35415 }, { "epoch": 2.406576980568012, "grad_norm": 2.3338189125061035, "learning_rate": 6.992882864519636e-05, "loss": 2.9017, "step": 35420 }, { "epoch": 2.4069167006386736, "grad_norm": 2.862079381942749, "learning_rate": 6.992458214431309e-05, "loss": 3.0301, "step": 35425 }, { "epoch": 2.4072564207093357, "grad_norm": 2.7982325553894043, "learning_rate": 6.992033564342982e-05, "loss": 3.045, "step": 35430 }, { "epoch": 2.4075961407799973, "grad_norm": 2.6386008262634277, "learning_rate": 6.991608914254655e-05, "loss": 2.7592, "step": 35435 }, { "epoch": 2.407935860850659, "grad_norm": 2.281254768371582, "learning_rate": 6.991184264166328e-05, "loss": 2.7442, "step": 35440 }, { "epoch": 2.408275580921321, "grad_norm": 2.5060062408447266, "learning_rate": 6.990759614077999e-05, "loss": 3.1551, "step": 35445 }, { "epoch": 2.4086153009919826, "grad_norm": 2.892580509185791, "learning_rate": 6.990334963989673e-05, "loss": 3.1529, "step": 35450 }, { "epoch": 2.4089550210626443, "grad_norm": 2.742112159729004, "learning_rate": 6.989910313901346e-05, "loss": 2.7901, "step": 35455 }, { "epoch": 2.4092947411333063, "grad_norm": 2.644575834274292, "learning_rate": 6.989485663813017e-05, "loss": 3.0039, "step": 35460 }, { "epoch": 2.409634461203968, "grad_norm": 2.3727176189422607, "learning_rate": 6.989061013724692e-05, "loss": 2.6525, "step": 35465 }, { "epoch": 2.4099741812746296, "grad_norm": 2.308077335357666, "learning_rate": 6.988636363636364e-05, "loss": 3.0708, "step": 35470 }, { "epoch": 2.4103139013452917, "grad_norm": 2.052539587020874, "learning_rate": 6.988211713548036e-05, "loss": 2.823, "step": 35475 }, { "epoch": 2.4106536214159533, "grad_norm": 2.1632354259490967, "learning_rate": 6.98778706345971e-05, "loss": 2.8402, "step": 35480 }, { "epoch": 2.410993341486615, "grad_norm": 2.9465255737304688, "learning_rate": 6.987362413371383e-05, "loss": 2.9808, "step": 35485 }, { "epoch": 2.4113330615572766, "grad_norm": 2.4450366497039795, "learning_rate": 6.986937763283054e-05, "loss": 3.1097, "step": 35490 }, { "epoch": 2.4116727816279386, "grad_norm": 2.9601128101348877, "learning_rate": 6.986513113194728e-05, "loss": 3.0554, "step": 35495 }, { "epoch": 2.4120125016986003, "grad_norm": 2.7868294715881348, "learning_rate": 6.986088463106401e-05, "loss": 3.0586, "step": 35500 }, { "epoch": 2.412352221769262, "grad_norm": 2.743361234664917, "learning_rate": 6.985663813018073e-05, "loss": 2.7898, "step": 35505 }, { "epoch": 2.412691941839924, "grad_norm": 3.074756383895874, "learning_rate": 6.985239162929747e-05, "loss": 2.92, "step": 35510 }, { "epoch": 2.4130316619105856, "grad_norm": 1.977927803993225, "learning_rate": 6.984814512841418e-05, "loss": 2.7484, "step": 35515 }, { "epoch": 2.4133713819812472, "grad_norm": 3.0567781925201416, "learning_rate": 6.984389862753091e-05, "loss": 3.0041, "step": 35520 }, { "epoch": 2.4137111020519093, "grad_norm": 2.6955924034118652, "learning_rate": 6.983965212664765e-05, "loss": 2.7346, "step": 35525 }, { "epoch": 2.414050822122571, "grad_norm": 2.461596727371216, "learning_rate": 6.983540562576437e-05, "loss": 3.0973, "step": 35530 }, { "epoch": 2.4143905421932326, "grad_norm": 2.0066142082214355, "learning_rate": 6.98311591248811e-05, "loss": 3.0485, "step": 35535 }, { "epoch": 2.4147302622638946, "grad_norm": 2.6982574462890625, "learning_rate": 6.982691262399784e-05, "loss": 2.9969, "step": 35540 }, { "epoch": 2.4150699823345563, "grad_norm": 2.598910331726074, "learning_rate": 6.982266612311455e-05, "loss": 2.7667, "step": 35545 }, { "epoch": 2.415409702405218, "grad_norm": 2.638730049133301, "learning_rate": 6.981841962223129e-05, "loss": 2.6667, "step": 35550 }, { "epoch": 2.41574942247588, "grad_norm": 2.5907459259033203, "learning_rate": 6.981417312134802e-05, "loss": 2.9999, "step": 35555 }, { "epoch": 2.4160891425465416, "grad_norm": 2.4071083068847656, "learning_rate": 6.980992662046473e-05, "loss": 2.9842, "step": 35560 }, { "epoch": 2.4164288626172032, "grad_norm": 2.2664036750793457, "learning_rate": 6.980568011958148e-05, "loss": 2.661, "step": 35565 }, { "epoch": 2.4167685826878653, "grad_norm": 2.981271743774414, "learning_rate": 6.98014336186982e-05, "loss": 2.7352, "step": 35570 }, { "epoch": 2.417108302758527, "grad_norm": 1.8821171522140503, "learning_rate": 6.979718711781492e-05, "loss": 2.7459, "step": 35575 }, { "epoch": 2.4174480228291886, "grad_norm": 3.172126293182373, "learning_rate": 6.979294061693166e-05, "loss": 3.0412, "step": 35580 }, { "epoch": 2.4177877428998507, "grad_norm": 2.004974126815796, "learning_rate": 6.978869411604839e-05, "loss": 3.1148, "step": 35585 }, { "epoch": 2.4181274629705123, "grad_norm": 3.0270581245422363, "learning_rate": 6.97844476151651e-05, "loss": 2.9165, "step": 35590 }, { "epoch": 2.418467183041174, "grad_norm": 3.3335025310516357, "learning_rate": 6.978020111428184e-05, "loss": 3.0955, "step": 35595 }, { "epoch": 2.418806903111836, "grad_norm": 3.1411149501800537, "learning_rate": 6.977595461339856e-05, "loss": 2.983, "step": 35600 }, { "epoch": 2.4191466231824976, "grad_norm": 2.1047346591949463, "learning_rate": 6.977170811251529e-05, "loss": 3.0337, "step": 35605 }, { "epoch": 2.4194863432531593, "grad_norm": 2.3618600368499756, "learning_rate": 6.976746161163203e-05, "loss": 2.9426, "step": 35610 }, { "epoch": 2.4198260633238213, "grad_norm": 2.7285261154174805, "learning_rate": 6.976321511074874e-05, "loss": 2.9254, "step": 35615 }, { "epoch": 2.420165783394483, "grad_norm": 2.207577705383301, "learning_rate": 6.975896860986547e-05, "loss": 2.9517, "step": 35620 }, { "epoch": 2.4205055034651446, "grad_norm": 2.5136406421661377, "learning_rate": 6.975472210898221e-05, "loss": 2.7099, "step": 35625 }, { "epoch": 2.4208452235358067, "grad_norm": 2.9708242416381836, "learning_rate": 6.975047560809893e-05, "loss": 2.8352, "step": 35630 }, { "epoch": 2.4211849436064683, "grad_norm": 2.7779388427734375, "learning_rate": 6.974622910721565e-05, "loss": 3.0524, "step": 35635 }, { "epoch": 2.42152466367713, "grad_norm": 2.373838424682617, "learning_rate": 6.97419826063324e-05, "loss": 2.8603, "step": 35640 }, { "epoch": 2.421864383747792, "grad_norm": 2.3022124767303467, "learning_rate": 6.973773610544911e-05, "loss": 2.8949, "step": 35645 }, { "epoch": 2.4222041038184536, "grad_norm": 2.5553793907165527, "learning_rate": 6.973348960456584e-05, "loss": 2.8069, "step": 35650 }, { "epoch": 2.4225438238891153, "grad_norm": 2.2914814949035645, "learning_rate": 6.972924310368258e-05, "loss": 2.6029, "step": 35655 }, { "epoch": 2.4228835439597773, "grad_norm": 2.329559087753296, "learning_rate": 6.97249966027993e-05, "loss": 3.0569, "step": 35660 }, { "epoch": 2.423223264030439, "grad_norm": 2.1489336490631104, "learning_rate": 6.972075010191602e-05, "loss": 2.8562, "step": 35665 }, { "epoch": 2.4235629841011006, "grad_norm": 2.97452449798584, "learning_rate": 6.971650360103275e-05, "loss": 2.9063, "step": 35670 }, { "epoch": 2.4239027041717627, "grad_norm": 3.807521343231201, "learning_rate": 6.971225710014948e-05, "loss": 2.8265, "step": 35675 }, { "epoch": 2.4242424242424243, "grad_norm": 2.915558338165283, "learning_rate": 6.97080105992662e-05, "loss": 2.7847, "step": 35680 }, { "epoch": 2.424582144313086, "grad_norm": 2.5947763919830322, "learning_rate": 6.970376409838293e-05, "loss": 2.9631, "step": 35685 }, { "epoch": 2.424921864383748, "grad_norm": 2.230578899383545, "learning_rate": 6.969951759749966e-05, "loss": 2.7497, "step": 35690 }, { "epoch": 2.4252615844544096, "grad_norm": 2.825385570526123, "learning_rate": 6.969527109661639e-05, "loss": 2.7577, "step": 35695 }, { "epoch": 2.4256013045250713, "grad_norm": 3.1607205867767334, "learning_rate": 6.969102459573312e-05, "loss": 2.9565, "step": 35700 }, { "epoch": 2.4259410245957334, "grad_norm": 2.8743951320648193, "learning_rate": 6.968677809484985e-05, "loss": 2.58, "step": 35705 }, { "epoch": 2.426280744666395, "grad_norm": 2.3829264640808105, "learning_rate": 6.968253159396657e-05, "loss": 2.994, "step": 35710 }, { "epoch": 2.4266204647370566, "grad_norm": 2.1523728370666504, "learning_rate": 6.96782850930833e-05, "loss": 2.7175, "step": 35715 }, { "epoch": 2.4269601848077182, "grad_norm": 2.788994789123535, "learning_rate": 6.967403859220003e-05, "loss": 2.9636, "step": 35720 }, { "epoch": 2.4272999048783803, "grad_norm": 2.2859983444213867, "learning_rate": 6.966979209131676e-05, "loss": 2.7263, "step": 35725 }, { "epoch": 2.427639624949042, "grad_norm": 2.5180959701538086, "learning_rate": 6.966554559043349e-05, "loss": 3.0389, "step": 35730 }, { "epoch": 2.4279793450197036, "grad_norm": 2.0956718921661377, "learning_rate": 6.966129908955021e-05, "loss": 2.7651, "step": 35735 }, { "epoch": 2.4283190650903657, "grad_norm": 2.4341745376586914, "learning_rate": 6.965705258866694e-05, "loss": 2.7979, "step": 35740 }, { "epoch": 2.4286587851610273, "grad_norm": 2.9098217487335205, "learning_rate": 6.965280608778367e-05, "loss": 2.6419, "step": 35745 }, { "epoch": 2.428998505231689, "grad_norm": 2.665822982788086, "learning_rate": 6.96485595869004e-05, "loss": 2.9638, "step": 35750 }, { "epoch": 2.429338225302351, "grad_norm": 2.929487705230713, "learning_rate": 6.964431308601713e-05, "loss": 2.8675, "step": 35755 }, { "epoch": 2.4296779453730126, "grad_norm": 2.2345054149627686, "learning_rate": 6.964006658513385e-05, "loss": 3.0557, "step": 35760 }, { "epoch": 2.4300176654436743, "grad_norm": 2.695965528488159, "learning_rate": 6.963582008425058e-05, "loss": 2.9785, "step": 35765 }, { "epoch": 2.4303573855143363, "grad_norm": 3.562631368637085, "learning_rate": 6.963157358336731e-05, "loss": 2.8666, "step": 35770 }, { "epoch": 2.430697105584998, "grad_norm": 2.9152259826660156, "learning_rate": 6.962732708248404e-05, "loss": 2.8094, "step": 35775 }, { "epoch": 2.4310368256556596, "grad_norm": 2.0694656372070312, "learning_rate": 6.962308058160077e-05, "loss": 2.522, "step": 35780 }, { "epoch": 2.4313765457263217, "grad_norm": 2.6559112071990967, "learning_rate": 6.96188340807175e-05, "loss": 2.9519, "step": 35785 }, { "epoch": 2.4317162657969833, "grad_norm": 2.471632719039917, "learning_rate": 6.961458757983422e-05, "loss": 2.9082, "step": 35790 }, { "epoch": 2.432055985867645, "grad_norm": 2.771301507949829, "learning_rate": 6.961034107895095e-05, "loss": 2.9289, "step": 35795 }, { "epoch": 2.432395705938307, "grad_norm": 2.208820104598999, "learning_rate": 6.960609457806766e-05, "loss": 2.8148, "step": 35800 }, { "epoch": 2.4327354260089686, "grad_norm": 3.381685972213745, "learning_rate": 6.96018480771844e-05, "loss": 2.8582, "step": 35805 }, { "epoch": 2.4330751460796303, "grad_norm": 2.4134204387664795, "learning_rate": 6.959760157630113e-05, "loss": 2.9508, "step": 35810 }, { "epoch": 2.4334148661502923, "grad_norm": 2.4693033695220947, "learning_rate": 6.959335507541785e-05, "loss": 2.8022, "step": 35815 }, { "epoch": 2.433754586220954, "grad_norm": 1.9903475046157837, "learning_rate": 6.958910857453459e-05, "loss": 3.1173, "step": 35820 }, { "epoch": 2.4340943062916156, "grad_norm": 2.996833086013794, "learning_rate": 6.958486207365132e-05, "loss": 2.9733, "step": 35825 }, { "epoch": 2.4344340263622772, "grad_norm": 2.1666030883789062, "learning_rate": 6.958061557276803e-05, "loss": 2.8332, "step": 35830 }, { "epoch": 2.4347737464329393, "grad_norm": 2.287216901779175, "learning_rate": 6.957636907188477e-05, "loss": 2.7161, "step": 35835 }, { "epoch": 2.435113466503601, "grad_norm": 2.4098427295684814, "learning_rate": 6.95721225710015e-05, "loss": 2.9299, "step": 35840 }, { "epoch": 2.4354531865742626, "grad_norm": 3.045104742050171, "learning_rate": 6.956787607011822e-05, "loss": 2.8422, "step": 35845 }, { "epoch": 2.4357929066449246, "grad_norm": 2.81266188621521, "learning_rate": 6.956362956923496e-05, "loss": 2.8374, "step": 35850 }, { "epoch": 2.4361326267155863, "grad_norm": 2.4714598655700684, "learning_rate": 6.955938306835169e-05, "loss": 3.0145, "step": 35855 }, { "epoch": 2.436472346786248, "grad_norm": 2.6172285079956055, "learning_rate": 6.95551365674684e-05, "loss": 3.0117, "step": 35860 }, { "epoch": 2.43681206685691, "grad_norm": 2.8098249435424805, "learning_rate": 6.955089006658514e-05, "loss": 2.7968, "step": 35865 }, { "epoch": 2.4371517869275716, "grad_norm": 2.370896816253662, "learning_rate": 6.954664356570186e-05, "loss": 2.8684, "step": 35870 }, { "epoch": 2.4374915069982332, "grad_norm": 3.935030698776245, "learning_rate": 6.954239706481859e-05, "loss": 2.8771, "step": 35875 }, { "epoch": 2.4378312270688953, "grad_norm": 2.8129899501800537, "learning_rate": 6.953815056393533e-05, "loss": 2.9591, "step": 35880 }, { "epoch": 2.438170947139557, "grad_norm": 3.0711424350738525, "learning_rate": 6.953390406305204e-05, "loss": 2.7587, "step": 35885 }, { "epoch": 2.4385106672102186, "grad_norm": 2.7172489166259766, "learning_rate": 6.952965756216878e-05, "loss": 2.8987, "step": 35890 }, { "epoch": 2.4388503872808807, "grad_norm": 2.6130783557891846, "learning_rate": 6.952541106128551e-05, "loss": 3.0382, "step": 35895 }, { "epoch": 2.4391901073515423, "grad_norm": 2.5299265384674072, "learning_rate": 6.952116456040223e-05, "loss": 2.7763, "step": 35900 }, { "epoch": 2.439529827422204, "grad_norm": 2.2693889141082764, "learning_rate": 6.951691805951897e-05, "loss": 2.7368, "step": 35905 }, { "epoch": 2.439869547492866, "grad_norm": 1.9066417217254639, "learning_rate": 6.95126715586357e-05, "loss": 3.0769, "step": 35910 }, { "epoch": 2.4402092675635276, "grad_norm": 2.6348166465759277, "learning_rate": 6.950842505775241e-05, "loss": 2.9046, "step": 35915 }, { "epoch": 2.4405489876341893, "grad_norm": 2.4349727630615234, "learning_rate": 6.950417855686915e-05, "loss": 2.7552, "step": 35920 }, { "epoch": 2.4408887077048513, "grad_norm": 1.9662432670593262, "learning_rate": 6.949993205598588e-05, "loss": 2.7501, "step": 35925 }, { "epoch": 2.441228427775513, "grad_norm": 2.6146278381347656, "learning_rate": 6.949568555510259e-05, "loss": 3.0241, "step": 35930 }, { "epoch": 2.4415681478461746, "grad_norm": 2.9588687419891357, "learning_rate": 6.949143905421933e-05, "loss": 3.2807, "step": 35935 }, { "epoch": 2.4419078679168367, "grad_norm": 2.466895580291748, "learning_rate": 6.948719255333605e-05, "loss": 2.9624, "step": 35940 }, { "epoch": 2.4422475879874983, "grad_norm": 3.110175609588623, "learning_rate": 6.948294605245278e-05, "loss": 2.8925, "step": 35945 }, { "epoch": 2.44258730805816, "grad_norm": 2.655573606491089, "learning_rate": 6.947869955156952e-05, "loss": 2.7808, "step": 35950 }, { "epoch": 2.442927028128822, "grad_norm": 2.0184485912323, "learning_rate": 6.947445305068623e-05, "loss": 3.1071, "step": 35955 }, { "epoch": 2.4432667481994836, "grad_norm": 2.2519071102142334, "learning_rate": 6.947020654980296e-05, "loss": 2.681, "step": 35960 }, { "epoch": 2.4436064682701453, "grad_norm": 2.2664551734924316, "learning_rate": 6.94659600489197e-05, "loss": 2.8412, "step": 35965 }, { "epoch": 2.4439461883408073, "grad_norm": 2.4475481510162354, "learning_rate": 6.946171354803642e-05, "loss": 2.7617, "step": 35970 }, { "epoch": 2.444285908411469, "grad_norm": 2.22914981842041, "learning_rate": 6.945746704715315e-05, "loss": 2.7537, "step": 35975 }, { "epoch": 2.4446256284821306, "grad_norm": 2.016018867492676, "learning_rate": 6.945322054626989e-05, "loss": 2.6793, "step": 35980 }, { "epoch": 2.4449653485527927, "grad_norm": 2.2291128635406494, "learning_rate": 6.94489740453866e-05, "loss": 2.5315, "step": 35985 }, { "epoch": 2.4453050686234543, "grad_norm": 2.565746307373047, "learning_rate": 6.944472754450333e-05, "loss": 2.9868, "step": 35990 }, { "epoch": 2.445644788694116, "grad_norm": 2.946347236633301, "learning_rate": 6.944048104362007e-05, "loss": 2.8528, "step": 35995 }, { "epoch": 2.445984508764778, "grad_norm": 2.569042444229126, "learning_rate": 6.943623454273679e-05, "loss": 3.022, "step": 36000 }, { "epoch": 2.4463242288354397, "grad_norm": 3.020263195037842, "learning_rate": 6.943198804185351e-05, "loss": 3.0681, "step": 36005 }, { "epoch": 2.4466639489061013, "grad_norm": 2.655601978302002, "learning_rate": 6.942774154097025e-05, "loss": 2.8952, "step": 36010 }, { "epoch": 2.4470036689767634, "grad_norm": 2.622605562210083, "learning_rate": 6.942349504008697e-05, "loss": 2.9363, "step": 36015 }, { "epoch": 2.447343389047425, "grad_norm": 2.1272544860839844, "learning_rate": 6.94192485392037e-05, "loss": 2.757, "step": 36020 }, { "epoch": 2.4476831091180866, "grad_norm": 3.076218366622925, "learning_rate": 6.941500203832043e-05, "loss": 2.8739, "step": 36025 }, { "epoch": 2.4480228291887487, "grad_norm": 2.262355089187622, "learning_rate": 6.941075553743715e-05, "loss": 2.8395, "step": 36030 }, { "epoch": 2.4483625492594103, "grad_norm": 2.2670209407806396, "learning_rate": 6.940650903655388e-05, "loss": 2.8617, "step": 36035 }, { "epoch": 2.448702269330072, "grad_norm": 2.6539855003356934, "learning_rate": 6.940226253567061e-05, "loss": 2.7431, "step": 36040 }, { "epoch": 2.449041989400734, "grad_norm": 2.4343738555908203, "learning_rate": 6.939801603478734e-05, "loss": 3.09, "step": 36045 }, { "epoch": 2.4493817094713957, "grad_norm": 2.4954891204833984, "learning_rate": 6.939376953390407e-05, "loss": 2.9479, "step": 36050 }, { "epoch": 2.4497214295420573, "grad_norm": 3.1870458126068115, "learning_rate": 6.93895230330208e-05, "loss": 2.9042, "step": 36055 }, { "epoch": 2.4500611496127194, "grad_norm": 2.4881112575531006, "learning_rate": 6.938527653213752e-05, "loss": 2.6858, "step": 36060 }, { "epoch": 2.450400869683381, "grad_norm": 2.852431297302246, "learning_rate": 6.938103003125425e-05, "loss": 2.8484, "step": 36065 }, { "epoch": 2.4507405897540426, "grad_norm": 2.1989328861236572, "learning_rate": 6.937678353037098e-05, "loss": 2.8493, "step": 36070 }, { "epoch": 2.4510803098247043, "grad_norm": 3.4317619800567627, "learning_rate": 6.93725370294877e-05, "loss": 2.8357, "step": 36075 }, { "epoch": 2.4514200298953663, "grad_norm": 2.4539077281951904, "learning_rate": 6.936829052860443e-05, "loss": 2.9886, "step": 36080 }, { "epoch": 2.451759749966028, "grad_norm": 2.526620388031006, "learning_rate": 6.936404402772116e-05, "loss": 3.0329, "step": 36085 }, { "epoch": 2.4520994700366896, "grad_norm": 2.582157850265503, "learning_rate": 6.935979752683789e-05, "loss": 2.7375, "step": 36090 }, { "epoch": 2.4524391901073517, "grad_norm": 3.219203472137451, "learning_rate": 6.935555102595462e-05, "loss": 2.6922, "step": 36095 }, { "epoch": 2.4527789101780133, "grad_norm": 3.097987413406372, "learning_rate": 6.935130452507135e-05, "loss": 2.6205, "step": 36100 }, { "epoch": 2.453118630248675, "grad_norm": 2.2678287029266357, "learning_rate": 6.934705802418807e-05, "loss": 2.5794, "step": 36105 }, { "epoch": 2.453458350319337, "grad_norm": 2.4181735515594482, "learning_rate": 6.93428115233048e-05, "loss": 3.1427, "step": 36110 }, { "epoch": 2.4537980703899986, "grad_norm": 2.244144916534424, "learning_rate": 6.933856502242153e-05, "loss": 3.0582, "step": 36115 }, { "epoch": 2.4541377904606603, "grad_norm": 2.8486807346343994, "learning_rate": 6.933431852153826e-05, "loss": 2.8195, "step": 36120 }, { "epoch": 2.4544775105313223, "grad_norm": 2.490574598312378, "learning_rate": 6.933007202065499e-05, "loss": 2.8463, "step": 36125 }, { "epoch": 2.454817230601984, "grad_norm": 2.3764498233795166, "learning_rate": 6.932582551977171e-05, "loss": 2.7976, "step": 36130 }, { "epoch": 2.4551569506726456, "grad_norm": 2.5652353763580322, "learning_rate": 6.932157901888844e-05, "loss": 2.9673, "step": 36135 }, { "epoch": 2.4554966707433077, "grad_norm": 2.624072313308716, "learning_rate": 6.931733251800516e-05, "loss": 2.6483, "step": 36140 }, { "epoch": 2.4558363908139693, "grad_norm": 2.7718558311462402, "learning_rate": 6.93130860171219e-05, "loss": 2.9093, "step": 36145 }, { "epoch": 2.456176110884631, "grad_norm": 2.7054357528686523, "learning_rate": 6.930883951623863e-05, "loss": 2.9123, "step": 36150 }, { "epoch": 2.456515830955293, "grad_norm": 3.15636944770813, "learning_rate": 6.930459301535534e-05, "loss": 3.0354, "step": 36155 }, { "epoch": 2.4568555510259547, "grad_norm": 3.4371187686920166, "learning_rate": 6.930034651447208e-05, "loss": 2.8552, "step": 36160 }, { "epoch": 2.4571952710966163, "grad_norm": 2.7227766513824463, "learning_rate": 6.929610001358881e-05, "loss": 3.1815, "step": 36165 }, { "epoch": 2.457534991167278, "grad_norm": 2.6570630073547363, "learning_rate": 6.929185351270552e-05, "loss": 2.7059, "step": 36170 }, { "epoch": 2.45787471123794, "grad_norm": 2.614607334136963, "learning_rate": 6.928760701182227e-05, "loss": 3.0138, "step": 36175 }, { "epoch": 2.4582144313086016, "grad_norm": 2.4186723232269287, "learning_rate": 6.9283360510939e-05, "loss": 2.8609, "step": 36180 }, { "epoch": 2.4585541513792633, "grad_norm": 2.5404715538024902, "learning_rate": 6.927911401005571e-05, "loss": 2.9057, "step": 36185 }, { "epoch": 2.4588938714499253, "grad_norm": 2.186760663986206, "learning_rate": 6.927486750917245e-05, "loss": 3.0263, "step": 36190 }, { "epoch": 2.459233591520587, "grad_norm": 2.7148067951202393, "learning_rate": 6.927062100828918e-05, "loss": 3.1019, "step": 36195 }, { "epoch": 2.4595733115912486, "grad_norm": 2.25779128074646, "learning_rate": 6.926637450740589e-05, "loss": 2.7665, "step": 36200 }, { "epoch": 2.4599130316619107, "grad_norm": 2.265082836151123, "learning_rate": 6.926212800652263e-05, "loss": 3.2078, "step": 36205 }, { "epoch": 2.4602527517325723, "grad_norm": 2.318193197250366, "learning_rate": 6.925788150563936e-05, "loss": 3.0142, "step": 36210 }, { "epoch": 2.460592471803234, "grad_norm": 2.312988758087158, "learning_rate": 6.925363500475608e-05, "loss": 2.6591, "step": 36215 }, { "epoch": 2.460932191873896, "grad_norm": 3.0365545749664307, "learning_rate": 6.924938850387282e-05, "loss": 3.1214, "step": 36220 }, { "epoch": 2.4612719119445576, "grad_norm": 2.7436187267303467, "learning_rate": 6.924514200298953e-05, "loss": 2.61, "step": 36225 }, { "epoch": 2.4616116320152193, "grad_norm": 2.6556971073150635, "learning_rate": 6.924089550210627e-05, "loss": 3.0112, "step": 36230 }, { "epoch": 2.4619513520858813, "grad_norm": 2.3319458961486816, "learning_rate": 6.9236649001223e-05, "loss": 2.5905, "step": 36235 }, { "epoch": 2.462291072156543, "grad_norm": 2.9753007888793945, "learning_rate": 6.923240250033972e-05, "loss": 2.6911, "step": 36240 }, { "epoch": 2.4626307922272046, "grad_norm": 2.9441587924957275, "learning_rate": 6.922815599945646e-05, "loss": 2.8773, "step": 36245 }, { "epoch": 2.4629705122978667, "grad_norm": 2.6573565006256104, "learning_rate": 6.922390949857319e-05, "loss": 2.8358, "step": 36250 }, { "epoch": 2.4633102323685283, "grad_norm": 3.6324851512908936, "learning_rate": 6.92196629976899e-05, "loss": 2.7356, "step": 36255 }, { "epoch": 2.46364995243919, "grad_norm": 2.8612987995147705, "learning_rate": 6.921541649680664e-05, "loss": 2.7782, "step": 36260 }, { "epoch": 2.463989672509852, "grad_norm": 2.8375043869018555, "learning_rate": 6.921116999592337e-05, "loss": 2.8572, "step": 36265 }, { "epoch": 2.4643293925805136, "grad_norm": 2.669295072555542, "learning_rate": 6.920777279521674e-05, "loss": 3.0442, "step": 36270 }, { "epoch": 2.4646691126511753, "grad_norm": 2.3792831897735596, "learning_rate": 6.920352629433348e-05, "loss": 2.8364, "step": 36275 }, { "epoch": 2.4650088327218374, "grad_norm": 3.1327357292175293, "learning_rate": 6.91992797934502e-05, "loss": 2.8886, "step": 36280 }, { "epoch": 2.465348552792499, "grad_norm": 2.0272269248962402, "learning_rate": 6.919503329256692e-05, "loss": 2.91, "step": 36285 }, { "epoch": 2.4656882728631606, "grad_norm": 3.1752374172210693, "learning_rate": 6.919078679168366e-05, "loss": 3.024, "step": 36290 }, { "epoch": 2.4660279929338227, "grad_norm": 2.540814161300659, "learning_rate": 6.918654029080038e-05, "loss": 2.6388, "step": 36295 }, { "epoch": 2.4663677130044843, "grad_norm": 2.8777990341186523, "learning_rate": 6.91822937899171e-05, "loss": 2.7668, "step": 36300 }, { "epoch": 2.466707433075146, "grad_norm": 1.9412076473236084, "learning_rate": 6.917804728903385e-05, "loss": 2.7007, "step": 36305 }, { "epoch": 2.467047153145808, "grad_norm": 3.231858253479004, "learning_rate": 6.917380078815056e-05, "loss": 3.0344, "step": 36310 }, { "epoch": 2.4673868732164697, "grad_norm": 2.079586982727051, "learning_rate": 6.916955428726729e-05, "loss": 3.0335, "step": 36315 }, { "epoch": 2.4677265932871313, "grad_norm": 1.7809194326400757, "learning_rate": 6.916530778638403e-05, "loss": 2.9976, "step": 36320 }, { "epoch": 2.4680663133577934, "grad_norm": 2.635244369506836, "learning_rate": 6.916106128550075e-05, "loss": 2.9159, "step": 36325 }, { "epoch": 2.468406033428455, "grad_norm": 2.4959027767181396, "learning_rate": 6.915681478461747e-05, "loss": 2.7241, "step": 36330 }, { "epoch": 2.4687457534991166, "grad_norm": 2.5943853855133057, "learning_rate": 6.91525682837342e-05, "loss": 3.2129, "step": 36335 }, { "epoch": 2.4690854735697787, "grad_norm": 2.5291550159454346, "learning_rate": 6.914832178285093e-05, "loss": 2.7015, "step": 36340 }, { "epoch": 2.4694251936404403, "grad_norm": 3.1089577674865723, "learning_rate": 6.914407528196766e-05, "loss": 2.888, "step": 36345 }, { "epoch": 2.469764913711102, "grad_norm": 3.1142477989196777, "learning_rate": 6.913982878108439e-05, "loss": 2.9951, "step": 36350 }, { "epoch": 2.470104633781764, "grad_norm": 3.570629119873047, "learning_rate": 6.913558228020111e-05, "loss": 2.6336, "step": 36355 }, { "epoch": 2.4704443538524257, "grad_norm": 2.279010534286499, "learning_rate": 6.913133577931784e-05, "loss": 2.8397, "step": 36360 }, { "epoch": 2.4707840739230873, "grad_norm": 2.6092872619628906, "learning_rate": 6.912708927843457e-05, "loss": 3.0801, "step": 36365 }, { "epoch": 2.4711237939937494, "grad_norm": 2.729870557785034, "learning_rate": 6.91228427775513e-05, "loss": 2.9089, "step": 36370 }, { "epoch": 2.471463514064411, "grad_norm": 3.039632558822632, "learning_rate": 6.911859627666803e-05, "loss": 2.7368, "step": 36375 }, { "epoch": 2.4718032341350726, "grad_norm": 3.0877602100372314, "learning_rate": 6.911434977578475e-05, "loss": 3.0176, "step": 36380 }, { "epoch": 2.4721429542057347, "grad_norm": 2.4678003787994385, "learning_rate": 6.911010327490148e-05, "loss": 3.0447, "step": 36385 }, { "epoch": 2.4724826742763963, "grad_norm": 2.6697616577148438, "learning_rate": 6.910585677401821e-05, "loss": 2.7798, "step": 36390 }, { "epoch": 2.472822394347058, "grad_norm": 2.527792453765869, "learning_rate": 6.910161027313494e-05, "loss": 2.9531, "step": 36395 }, { "epoch": 2.47316211441772, "grad_norm": 2.6106691360473633, "learning_rate": 6.909736377225167e-05, "loss": 2.8131, "step": 36400 }, { "epoch": 2.4735018344883817, "grad_norm": 2.6880154609680176, "learning_rate": 6.90931172713684e-05, "loss": 2.9417, "step": 36405 }, { "epoch": 2.4738415545590433, "grad_norm": 2.4095699787139893, "learning_rate": 6.908887077048512e-05, "loss": 2.9523, "step": 36410 }, { "epoch": 2.474181274629705, "grad_norm": 2.760786294937134, "learning_rate": 6.908462426960185e-05, "loss": 2.924, "step": 36415 }, { "epoch": 2.474520994700367, "grad_norm": 3.1484298706054688, "learning_rate": 6.908037776871858e-05, "loss": 2.8933, "step": 36420 }, { "epoch": 2.4748607147710286, "grad_norm": 2.6603291034698486, "learning_rate": 6.90761312678353e-05, "loss": 2.8348, "step": 36425 }, { "epoch": 2.4752004348416903, "grad_norm": 2.282660722732544, "learning_rate": 6.907188476695203e-05, "loss": 2.9947, "step": 36430 }, { "epoch": 2.4755401549123524, "grad_norm": 2.321894884109497, "learning_rate": 6.906763826606876e-05, "loss": 2.9793, "step": 36435 }, { "epoch": 2.475879874983014, "grad_norm": 2.9684109687805176, "learning_rate": 6.906339176518549e-05, "loss": 2.8523, "step": 36440 }, { "epoch": 2.4762195950536756, "grad_norm": 2.6583075523376465, "learning_rate": 6.905914526430222e-05, "loss": 2.8955, "step": 36445 }, { "epoch": 2.4765593151243377, "grad_norm": 2.502173662185669, "learning_rate": 6.905489876341895e-05, "loss": 2.9136, "step": 36450 }, { "epoch": 2.4768990351949993, "grad_norm": 2.5015711784362793, "learning_rate": 6.905065226253567e-05, "loss": 2.8965, "step": 36455 }, { "epoch": 2.477238755265661, "grad_norm": 2.7010722160339355, "learning_rate": 6.90464057616524e-05, "loss": 2.8445, "step": 36460 }, { "epoch": 2.477578475336323, "grad_norm": 2.4464237689971924, "learning_rate": 6.904215926076913e-05, "loss": 2.7228, "step": 36465 }, { "epoch": 2.4779181954069847, "grad_norm": 2.554811477661133, "learning_rate": 6.903791275988586e-05, "loss": 2.782, "step": 36470 }, { "epoch": 2.4782579154776463, "grad_norm": 2.400758981704712, "learning_rate": 6.903366625900259e-05, "loss": 3.0293, "step": 36475 }, { "epoch": 2.4785976355483084, "grad_norm": 2.7101879119873047, "learning_rate": 6.902941975811931e-05, "loss": 2.7493, "step": 36480 }, { "epoch": 2.47893735561897, "grad_norm": 3.0034878253936768, "learning_rate": 6.902517325723604e-05, "loss": 2.7329, "step": 36485 }, { "epoch": 2.4792770756896316, "grad_norm": 3.274033308029175, "learning_rate": 6.902092675635277e-05, "loss": 2.7164, "step": 36490 }, { "epoch": 2.4796167957602937, "grad_norm": 2.2256555557250977, "learning_rate": 6.90166802554695e-05, "loss": 3.0214, "step": 36495 }, { "epoch": 2.4799565158309553, "grad_norm": 2.5799527168273926, "learning_rate": 6.901243375458623e-05, "loss": 2.5719, "step": 36500 }, { "epoch": 2.480296235901617, "grad_norm": 2.147019147872925, "learning_rate": 6.900818725370295e-05, "loss": 2.9919, "step": 36505 }, { "epoch": 2.4806359559722786, "grad_norm": 2.1284823417663574, "learning_rate": 6.900394075281968e-05, "loss": 2.8693, "step": 36510 }, { "epoch": 2.4809756760429407, "grad_norm": 2.3786141872406006, "learning_rate": 6.899969425193641e-05, "loss": 2.7485, "step": 36515 }, { "epoch": 2.4813153961136023, "grad_norm": 2.629624128341675, "learning_rate": 6.899544775105314e-05, "loss": 2.852, "step": 36520 }, { "epoch": 2.481655116184264, "grad_norm": 3.4901015758514404, "learning_rate": 6.899120125016987e-05, "loss": 3.0455, "step": 36525 }, { "epoch": 2.481994836254926, "grad_norm": 2.9418323040008545, "learning_rate": 6.89869547492866e-05, "loss": 2.8617, "step": 36530 }, { "epoch": 2.4823345563255876, "grad_norm": 2.096083164215088, "learning_rate": 6.898270824840331e-05, "loss": 2.9096, "step": 36535 }, { "epoch": 2.4826742763962493, "grad_norm": 2.118027687072754, "learning_rate": 6.897846174752005e-05, "loss": 2.6093, "step": 36540 }, { "epoch": 2.4830139964669113, "grad_norm": 2.749107599258423, "learning_rate": 6.897421524663678e-05, "loss": 2.8269, "step": 36545 }, { "epoch": 2.483353716537573, "grad_norm": 2.812605142593384, "learning_rate": 6.89699687457535e-05, "loss": 2.8566, "step": 36550 }, { "epoch": 2.4836934366082346, "grad_norm": 2.2518582344055176, "learning_rate": 6.896572224487023e-05, "loss": 2.775, "step": 36555 }, { "epoch": 2.4840331566788967, "grad_norm": 2.8586621284484863, "learning_rate": 6.896147574398696e-05, "loss": 2.6903, "step": 36560 }, { "epoch": 2.4843728767495583, "grad_norm": 2.986347198486328, "learning_rate": 6.895722924310368e-05, "loss": 2.7418, "step": 36565 }, { "epoch": 2.48471259682022, "grad_norm": 2.7009947299957275, "learning_rate": 6.895298274222042e-05, "loss": 2.6043, "step": 36570 }, { "epoch": 2.485052316890882, "grad_norm": 2.7809622287750244, "learning_rate": 6.894873624133715e-05, "loss": 3.1194, "step": 36575 }, { "epoch": 2.4853920369615436, "grad_norm": 2.8052258491516113, "learning_rate": 6.894448974045386e-05, "loss": 3.1514, "step": 36580 }, { "epoch": 2.4857317570322053, "grad_norm": 2.1822922229766846, "learning_rate": 6.89402432395706e-05, "loss": 2.7942, "step": 36585 }, { "epoch": 2.4860714771028674, "grad_norm": 2.401043176651001, "learning_rate": 6.893599673868733e-05, "loss": 3.0528, "step": 36590 }, { "epoch": 2.486411197173529, "grad_norm": 2.942129373550415, "learning_rate": 6.893175023780405e-05, "loss": 2.6362, "step": 36595 }, { "epoch": 2.4867509172441906, "grad_norm": 2.684694766998291, "learning_rate": 6.892750373692079e-05, "loss": 3.023, "step": 36600 }, { "epoch": 2.4870906373148527, "grad_norm": 2.3506500720977783, "learning_rate": 6.892325723603751e-05, "loss": 2.6381, "step": 36605 }, { "epoch": 2.4874303573855143, "grad_norm": 2.647239923477173, "learning_rate": 6.891901073515423e-05, "loss": 3.0656, "step": 36610 }, { "epoch": 2.487770077456176, "grad_norm": 2.75241756439209, "learning_rate": 6.891476423427097e-05, "loss": 2.8526, "step": 36615 }, { "epoch": 2.488109797526838, "grad_norm": 2.176468849182129, "learning_rate": 6.891051773338769e-05, "loss": 2.789, "step": 36620 }, { "epoch": 2.4884495175974997, "grad_norm": 2.299071788787842, "learning_rate": 6.890627123250441e-05, "loss": 3.1207, "step": 36625 }, { "epoch": 2.4887892376681613, "grad_norm": 2.490391254425049, "learning_rate": 6.890202473162116e-05, "loss": 2.7668, "step": 36630 }, { "epoch": 2.4891289577388234, "grad_norm": 2.8350327014923096, "learning_rate": 6.889777823073787e-05, "loss": 3.0158, "step": 36635 }, { "epoch": 2.489468677809485, "grad_norm": 2.6504266262054443, "learning_rate": 6.88935317298546e-05, "loss": 2.8257, "step": 36640 }, { "epoch": 2.4898083978801466, "grad_norm": 2.2182462215423584, "learning_rate": 6.888928522897134e-05, "loss": 2.8935, "step": 36645 }, { "epoch": 2.4901481179508087, "grad_norm": 2.6496944427490234, "learning_rate": 6.888503872808805e-05, "loss": 2.8011, "step": 36650 }, { "epoch": 2.4904878380214703, "grad_norm": 2.610541343688965, "learning_rate": 6.888079222720478e-05, "loss": 2.9308, "step": 36655 }, { "epoch": 2.490827558092132, "grad_norm": 2.864105701446533, "learning_rate": 6.887654572632152e-05, "loss": 2.6255, "step": 36660 }, { "epoch": 2.491167278162794, "grad_norm": 3.1328980922698975, "learning_rate": 6.887229922543824e-05, "loss": 2.7354, "step": 36665 }, { "epoch": 2.4915069982334557, "grad_norm": 2.560131072998047, "learning_rate": 6.886805272455497e-05, "loss": 2.8593, "step": 36670 }, { "epoch": 2.4918467183041173, "grad_norm": 2.4380557537078857, "learning_rate": 6.886380622367171e-05, "loss": 2.681, "step": 36675 }, { "epoch": 2.4921864383747794, "grad_norm": 2.543964147567749, "learning_rate": 6.885955972278842e-05, "loss": 2.7345, "step": 36680 }, { "epoch": 2.492526158445441, "grad_norm": 19.179168701171875, "learning_rate": 6.885531322190515e-05, "loss": 2.7034, "step": 36685 }, { "epoch": 2.4928658785161026, "grad_norm": 2.7417800426483154, "learning_rate": 6.885106672102188e-05, "loss": 2.7146, "step": 36690 }, { "epoch": 2.4932055985867647, "grad_norm": 2.4987404346466064, "learning_rate": 6.88468202201386e-05, "loss": 2.9183, "step": 36695 }, { "epoch": 2.4935453186574263, "grad_norm": 2.3113768100738525, "learning_rate": 6.884257371925533e-05, "loss": 2.7689, "step": 36700 }, { "epoch": 2.493885038728088, "grad_norm": 2.8846020698547363, "learning_rate": 6.883832721837206e-05, "loss": 2.6778, "step": 36705 }, { "epoch": 2.49422475879875, "grad_norm": 2.6610000133514404, "learning_rate": 6.883408071748879e-05, "loss": 3.0026, "step": 36710 }, { "epoch": 2.4945644788694117, "grad_norm": 2.099112033843994, "learning_rate": 6.882983421660552e-05, "loss": 3.1418, "step": 36715 }, { "epoch": 2.4949041989400733, "grad_norm": 2.3478517532348633, "learning_rate": 6.882558771572225e-05, "loss": 2.812, "step": 36720 }, { "epoch": 2.4952439190107354, "grad_norm": 2.169065237045288, "learning_rate": 6.882134121483897e-05, "loss": 2.5509, "step": 36725 }, { "epoch": 2.495583639081397, "grad_norm": 2.1830947399139404, "learning_rate": 6.88170947139557e-05, "loss": 2.91, "step": 36730 }, { "epoch": 2.4959233591520587, "grad_norm": 3.0980560779571533, "learning_rate": 6.881284821307243e-05, "loss": 3.0925, "step": 36735 }, { "epoch": 2.4962630792227207, "grad_norm": 2.999156951904297, "learning_rate": 6.880860171218916e-05, "loss": 2.6741, "step": 36740 }, { "epoch": 2.4966027992933824, "grad_norm": 2.725588083267212, "learning_rate": 6.880435521130589e-05, "loss": 2.747, "step": 36745 }, { "epoch": 2.496942519364044, "grad_norm": 2.4250600337982178, "learning_rate": 6.880010871042261e-05, "loss": 2.9836, "step": 36750 }, { "epoch": 2.4972822394347056, "grad_norm": 2.4303722381591797, "learning_rate": 6.879586220953934e-05, "loss": 2.7213, "step": 36755 }, { "epoch": 2.4976219595053677, "grad_norm": 1.9918283224105835, "learning_rate": 6.879161570865607e-05, "loss": 2.7808, "step": 36760 }, { "epoch": 2.4979616795760293, "grad_norm": 2.607755422592163, "learning_rate": 6.87873692077728e-05, "loss": 2.8595, "step": 36765 }, { "epoch": 2.498301399646691, "grad_norm": 2.215458869934082, "learning_rate": 6.878312270688953e-05, "loss": 2.8676, "step": 36770 }, { "epoch": 2.498641119717353, "grad_norm": 2.1545305252075195, "learning_rate": 6.877887620600625e-05, "loss": 2.7229, "step": 36775 }, { "epoch": 2.4989808397880147, "grad_norm": 2.3851988315582275, "learning_rate": 6.877462970512298e-05, "loss": 2.669, "step": 36780 }, { "epoch": 2.4993205598586763, "grad_norm": 2.689288854598999, "learning_rate": 6.877038320423971e-05, "loss": 2.8596, "step": 36785 }, { "epoch": 2.4996602799293384, "grad_norm": 2.765158176422119, "learning_rate": 6.876613670335644e-05, "loss": 2.7605, "step": 36790 }, { "epoch": 2.5, "grad_norm": 2.844634532928467, "learning_rate": 6.876189020247317e-05, "loss": 2.8816, "step": 36795 }, { "epoch": 2.5003397200706616, "grad_norm": 3.314610481262207, "learning_rate": 6.87576437015899e-05, "loss": 3.0288, "step": 36800 }, { "epoch": 2.5006794401413237, "grad_norm": 2.398308038711548, "learning_rate": 6.875339720070662e-05, "loss": 2.9398, "step": 36805 }, { "epoch": 2.5010191602119853, "grad_norm": 2.510425329208374, "learning_rate": 6.874915069982335e-05, "loss": 2.9893, "step": 36810 }, { "epoch": 2.501358880282647, "grad_norm": 2.56449031829834, "learning_rate": 6.874490419894008e-05, "loss": 2.7364, "step": 36815 }, { "epoch": 2.5016986003533086, "grad_norm": 2.306351661682129, "learning_rate": 6.87406576980568e-05, "loss": 2.9306, "step": 36820 }, { "epoch": 2.5020383204239707, "grad_norm": 2.4728736877441406, "learning_rate": 6.873641119717353e-05, "loss": 2.8723, "step": 36825 }, { "epoch": 2.5023780404946323, "grad_norm": 3.774904727935791, "learning_rate": 6.873216469629026e-05, "loss": 2.8946, "step": 36830 }, { "epoch": 2.502717760565294, "grad_norm": 2.9934442043304443, "learning_rate": 6.872791819540699e-05, "loss": 2.8574, "step": 36835 }, { "epoch": 2.503057480635956, "grad_norm": 2.564286708831787, "learning_rate": 6.872367169452372e-05, "loss": 3.05, "step": 36840 }, { "epoch": 2.5033972007066176, "grad_norm": 3.0022566318511963, "learning_rate": 6.871942519364045e-05, "loss": 2.8541, "step": 36845 }, { "epoch": 2.5037369207772793, "grad_norm": 2.674966812133789, "learning_rate": 6.871517869275717e-05, "loss": 2.8517, "step": 36850 }, { "epoch": 2.5040766408479413, "grad_norm": 2.796332359313965, "learning_rate": 6.87109321918739e-05, "loss": 2.7225, "step": 36855 }, { "epoch": 2.504416360918603, "grad_norm": 2.7705564498901367, "learning_rate": 6.870668569099063e-05, "loss": 2.9098, "step": 36860 }, { "epoch": 2.5047560809892646, "grad_norm": 2.6970791816711426, "learning_rate": 6.870243919010736e-05, "loss": 2.9904, "step": 36865 }, { "epoch": 2.5050958010599267, "grad_norm": 2.006086587905884, "learning_rate": 6.869819268922409e-05, "loss": 2.5733, "step": 36870 }, { "epoch": 2.5054355211305883, "grad_norm": 2.3928089141845703, "learning_rate": 6.869394618834081e-05, "loss": 2.6797, "step": 36875 }, { "epoch": 2.50577524120125, "grad_norm": 2.273238182067871, "learning_rate": 6.868969968745754e-05, "loss": 2.8116, "step": 36880 }, { "epoch": 2.506114961271912, "grad_norm": 2.2713592052459717, "learning_rate": 6.868545318657427e-05, "loss": 2.9371, "step": 36885 }, { "epoch": 2.5064546813425737, "grad_norm": 2.7680211067199707, "learning_rate": 6.868120668569098e-05, "loss": 2.7669, "step": 36890 }, { "epoch": 2.5067944014132353, "grad_norm": 2.5596399307250977, "learning_rate": 6.867696018480773e-05, "loss": 2.604, "step": 36895 }, { "epoch": 2.5071341214838974, "grad_norm": 2.9570071697235107, "learning_rate": 6.867271368392445e-05, "loss": 2.7609, "step": 36900 }, { "epoch": 2.507473841554559, "grad_norm": 2.2159435749053955, "learning_rate": 6.866846718304117e-05, "loss": 2.9598, "step": 36905 }, { "epoch": 2.5078135616252206, "grad_norm": 2.0596272945404053, "learning_rate": 6.866422068215791e-05, "loss": 2.8464, "step": 36910 }, { "epoch": 2.5081532816958827, "grad_norm": 2.856106758117676, "learning_rate": 6.865997418127464e-05, "loss": 2.8699, "step": 36915 }, { "epoch": 2.5084930017665443, "grad_norm": 2.5044291019439697, "learning_rate": 6.865572768039135e-05, "loss": 2.8157, "step": 36920 }, { "epoch": 2.508832721837206, "grad_norm": 2.736778974533081, "learning_rate": 6.86514811795081e-05, "loss": 3.0243, "step": 36925 }, { "epoch": 2.509172441907868, "grad_norm": 2.2713077068328857, "learning_rate": 6.864723467862482e-05, "loss": 2.5983, "step": 36930 }, { "epoch": 2.5095121619785297, "grad_norm": 3.2018189430236816, "learning_rate": 6.864298817774154e-05, "loss": 3.0627, "step": 36935 }, { "epoch": 2.5098518820491913, "grad_norm": 1.8724850416183472, "learning_rate": 6.863874167685828e-05, "loss": 2.9749, "step": 36940 }, { "epoch": 2.5101916021198534, "grad_norm": 2.358077049255371, "learning_rate": 6.8634495175975e-05, "loss": 2.9769, "step": 36945 }, { "epoch": 2.510531322190515, "grad_norm": 3.0869734287261963, "learning_rate": 6.863024867509172e-05, "loss": 2.7415, "step": 36950 }, { "epoch": 2.5108710422611766, "grad_norm": 3.0742838382720947, "learning_rate": 6.862600217420846e-05, "loss": 2.9295, "step": 36955 }, { "epoch": 2.5112107623318387, "grad_norm": 2.1295053958892822, "learning_rate": 6.862175567332518e-05, "loss": 2.8799, "step": 36960 }, { "epoch": 2.5115504824025003, "grad_norm": 2.291189670562744, "learning_rate": 6.86175091724419e-05, "loss": 2.8248, "step": 36965 }, { "epoch": 2.511890202473162, "grad_norm": 2.456853151321411, "learning_rate": 6.861326267155865e-05, "loss": 2.859, "step": 36970 }, { "epoch": 2.512229922543824, "grad_norm": 2.398435592651367, "learning_rate": 6.860901617067536e-05, "loss": 3.1459, "step": 36975 }, { "epoch": 2.5125696426144857, "grad_norm": 2.2835819721221924, "learning_rate": 6.860476966979209e-05, "loss": 2.6371, "step": 36980 }, { "epoch": 2.5129093626851473, "grad_norm": 2.9513657093048096, "learning_rate": 6.860052316890883e-05, "loss": 2.9201, "step": 36985 }, { "epoch": 2.5132490827558094, "grad_norm": 3.302701234817505, "learning_rate": 6.859627666802554e-05, "loss": 3.1837, "step": 36990 }, { "epoch": 2.513588802826471, "grad_norm": 2.322561740875244, "learning_rate": 6.859203016714227e-05, "loss": 3.1432, "step": 36995 }, { "epoch": 2.5139285228971326, "grad_norm": 2.473001003265381, "learning_rate": 6.858778366625901e-05, "loss": 2.9267, "step": 37000 }, { "epoch": 2.5142682429677947, "grad_norm": 2.3103833198547363, "learning_rate": 6.858353716537573e-05, "loss": 3.3016, "step": 37005 }, { "epoch": 2.5146079630384564, "grad_norm": 2.0207979679107666, "learning_rate": 6.857929066449246e-05, "loss": 2.7113, "step": 37010 }, { "epoch": 2.514947683109118, "grad_norm": 2.3413424491882324, "learning_rate": 6.85750441636092e-05, "loss": 2.8632, "step": 37015 }, { "epoch": 2.51528740317978, "grad_norm": 2.747422456741333, "learning_rate": 6.857079766272591e-05, "loss": 2.6579, "step": 37020 }, { "epoch": 2.5156271232504417, "grad_norm": 2.8912651538848877, "learning_rate": 6.856655116184264e-05, "loss": 2.7011, "step": 37025 }, { "epoch": 2.5159668433211033, "grad_norm": 3.157102346420288, "learning_rate": 6.856230466095938e-05, "loss": 2.7374, "step": 37030 }, { "epoch": 2.5163065633917654, "grad_norm": 2.220137357711792, "learning_rate": 6.85580581600761e-05, "loss": 2.7044, "step": 37035 }, { "epoch": 2.516646283462427, "grad_norm": 2.5889430046081543, "learning_rate": 6.855381165919282e-05, "loss": 2.8967, "step": 37040 }, { "epoch": 2.5169860035330887, "grad_norm": 2.711066484451294, "learning_rate": 6.854956515830955e-05, "loss": 2.721, "step": 37045 }, { "epoch": 2.5173257236037507, "grad_norm": 3.357123613357544, "learning_rate": 6.854531865742628e-05, "loss": 2.9674, "step": 37050 }, { "epoch": 2.5176654436744124, "grad_norm": 3.4016497135162354, "learning_rate": 6.854107215654301e-05, "loss": 2.9654, "step": 37055 }, { "epoch": 2.518005163745074, "grad_norm": 4.764350414276123, "learning_rate": 6.853682565565974e-05, "loss": 2.8026, "step": 37060 }, { "epoch": 2.518344883815736, "grad_norm": 2.3031537532806396, "learning_rate": 6.853257915477646e-05, "loss": 3.0386, "step": 37065 }, { "epoch": 2.5186846038863977, "grad_norm": 1.9287599325180054, "learning_rate": 6.852833265389319e-05, "loss": 2.8992, "step": 37070 }, { "epoch": 2.5190243239570593, "grad_norm": 2.171041488647461, "learning_rate": 6.852408615300992e-05, "loss": 2.5883, "step": 37075 }, { "epoch": 2.5193640440277214, "grad_norm": 2.6766788959503174, "learning_rate": 6.851983965212665e-05, "loss": 2.9233, "step": 37080 }, { "epoch": 2.519703764098383, "grad_norm": 2.135284900665283, "learning_rate": 6.851559315124338e-05, "loss": 2.7392, "step": 37085 }, { "epoch": 2.5200434841690447, "grad_norm": 2.325324058532715, "learning_rate": 6.85113466503601e-05, "loss": 2.9793, "step": 37090 }, { "epoch": 2.5203832042397067, "grad_norm": 3.323512554168701, "learning_rate": 6.850710014947683e-05, "loss": 3.1295, "step": 37095 }, { "epoch": 2.5207229243103684, "grad_norm": 2.422266960144043, "learning_rate": 6.850285364859356e-05, "loss": 2.7453, "step": 37100 }, { "epoch": 2.52106264438103, "grad_norm": 2.4628241062164307, "learning_rate": 6.849860714771029e-05, "loss": 2.6986, "step": 37105 }, { "epoch": 2.521402364451692, "grad_norm": 2.9656896591186523, "learning_rate": 6.849436064682702e-05, "loss": 2.761, "step": 37110 }, { "epoch": 2.5217420845223537, "grad_norm": 3.1462349891662598, "learning_rate": 6.849011414594374e-05, "loss": 2.8018, "step": 37115 }, { "epoch": 2.5220818045930153, "grad_norm": 2.428358793258667, "learning_rate": 6.848586764506047e-05, "loss": 2.8695, "step": 37120 }, { "epoch": 2.522421524663677, "grad_norm": 2.666701316833496, "learning_rate": 6.84816211441772e-05, "loss": 2.9546, "step": 37125 }, { "epoch": 2.522761244734339, "grad_norm": 2.835503339767456, "learning_rate": 6.847737464329393e-05, "loss": 2.8387, "step": 37130 }, { "epoch": 2.5231009648050007, "grad_norm": 2.6240999698638916, "learning_rate": 6.847312814241066e-05, "loss": 3.0432, "step": 37135 }, { "epoch": 2.5234406848756623, "grad_norm": 1.9038598537445068, "learning_rate": 6.846888164152738e-05, "loss": 2.9314, "step": 37140 }, { "epoch": 2.5237804049463244, "grad_norm": 2.4371955394744873, "learning_rate": 6.846463514064411e-05, "loss": 2.8247, "step": 37145 }, { "epoch": 2.524120125016986, "grad_norm": 2.8475470542907715, "learning_rate": 6.846038863976084e-05, "loss": 2.9876, "step": 37150 }, { "epoch": 2.5244598450876476, "grad_norm": 2.7672042846679688, "learning_rate": 6.845614213887757e-05, "loss": 2.9531, "step": 37155 }, { "epoch": 2.5247995651583093, "grad_norm": 2.5306334495544434, "learning_rate": 6.84518956379943e-05, "loss": 2.7677, "step": 37160 }, { "epoch": 2.5251392852289714, "grad_norm": 2.52740740776062, "learning_rate": 6.844764913711102e-05, "loss": 2.9487, "step": 37165 }, { "epoch": 2.525479005299633, "grad_norm": 2.8691012859344482, "learning_rate": 6.844340263622775e-05, "loss": 2.7835, "step": 37170 }, { "epoch": 2.5258187253702946, "grad_norm": 2.8805058002471924, "learning_rate": 6.843915613534448e-05, "loss": 2.7185, "step": 37175 }, { "epoch": 2.5261584454409567, "grad_norm": 2.530669927597046, "learning_rate": 6.843490963446121e-05, "loss": 2.7696, "step": 37180 }, { "epoch": 2.5264981655116183, "grad_norm": 2.019381046295166, "learning_rate": 6.843066313357794e-05, "loss": 2.8238, "step": 37185 }, { "epoch": 2.52683788558228, "grad_norm": 2.0712549686431885, "learning_rate": 6.842641663269466e-05, "loss": 2.8316, "step": 37190 }, { "epoch": 2.527177605652942, "grad_norm": 2.6231372356414795, "learning_rate": 6.842217013181139e-05, "loss": 3.0487, "step": 37195 }, { "epoch": 2.5275173257236037, "grad_norm": 2.391839027404785, "learning_rate": 6.841792363092812e-05, "loss": 2.6429, "step": 37200 }, { "epoch": 2.5278570457942653, "grad_norm": 2.0125784873962402, "learning_rate": 6.841367713004485e-05, "loss": 3.0303, "step": 37205 }, { "epoch": 2.5281967658649274, "grad_norm": 2.610694408416748, "learning_rate": 6.840943062916158e-05, "loss": 2.8498, "step": 37210 }, { "epoch": 2.528536485935589, "grad_norm": 2.705554246902466, "learning_rate": 6.84051841282783e-05, "loss": 2.6772, "step": 37215 }, { "epoch": 2.5288762060062506, "grad_norm": 2.274245023727417, "learning_rate": 6.840093762739503e-05, "loss": 2.8861, "step": 37220 }, { "epoch": 2.5292159260769127, "grad_norm": 2.617867946624756, "learning_rate": 6.839669112651176e-05, "loss": 2.7425, "step": 37225 }, { "epoch": 2.5295556461475743, "grad_norm": 2.722824811935425, "learning_rate": 6.839244462562849e-05, "loss": 2.666, "step": 37230 }, { "epoch": 2.529895366218236, "grad_norm": 3.2266955375671387, "learning_rate": 6.838819812474522e-05, "loss": 2.6601, "step": 37235 }, { "epoch": 2.530235086288898, "grad_norm": 2.826720714569092, "learning_rate": 6.838395162386194e-05, "loss": 2.9312, "step": 37240 }, { "epoch": 2.5305748063595597, "grad_norm": 3.328007698059082, "learning_rate": 6.837970512297866e-05, "loss": 2.6671, "step": 37245 }, { "epoch": 2.5309145264302213, "grad_norm": 2.7020578384399414, "learning_rate": 6.83754586220954e-05, "loss": 2.7164, "step": 37250 }, { "epoch": 2.5312542465008834, "grad_norm": 2.3483011722564697, "learning_rate": 6.837121212121213e-05, "loss": 3.0862, "step": 37255 }, { "epoch": 2.531593966571545, "grad_norm": 2.5297110080718994, "learning_rate": 6.836696562032884e-05, "loss": 2.8416, "step": 37260 }, { "epoch": 2.5319336866422066, "grad_norm": 2.378270387649536, "learning_rate": 6.836271911944558e-05, "loss": 2.744, "step": 37265 }, { "epoch": 2.5322734067128687, "grad_norm": 2.6856589317321777, "learning_rate": 6.835847261856231e-05, "loss": 2.9038, "step": 37270 }, { "epoch": 2.5326131267835303, "grad_norm": 2.134129047393799, "learning_rate": 6.835422611767903e-05, "loss": 2.8911, "step": 37275 }, { "epoch": 2.532952846854192, "grad_norm": 2.9446496963500977, "learning_rate": 6.834997961679577e-05, "loss": 2.8635, "step": 37280 }, { "epoch": 2.533292566924854, "grad_norm": 2.8164846897125244, "learning_rate": 6.83457331159125e-05, "loss": 2.7913, "step": 37285 }, { "epoch": 2.5336322869955157, "grad_norm": 2.2910094261169434, "learning_rate": 6.834148661502921e-05, "loss": 2.8502, "step": 37290 }, { "epoch": 2.5339720070661773, "grad_norm": 2.3221139907836914, "learning_rate": 6.833724011414595e-05, "loss": 2.7446, "step": 37295 }, { "epoch": 2.5343117271368394, "grad_norm": 3.0794098377227783, "learning_rate": 6.833299361326268e-05, "loss": 2.7153, "step": 37300 }, { "epoch": 2.534651447207501, "grad_norm": 2.520988702774048, "learning_rate": 6.83287471123794e-05, "loss": 2.8816, "step": 37305 }, { "epoch": 2.5349911672781626, "grad_norm": 2.5690062046051025, "learning_rate": 6.832450061149614e-05, "loss": 2.6358, "step": 37310 }, { "epoch": 2.5353308873488247, "grad_norm": 2.68367075920105, "learning_rate": 6.832025411061285e-05, "loss": 2.6295, "step": 37315 }, { "epoch": 2.5356706074194864, "grad_norm": 2.3374521732330322, "learning_rate": 6.831600760972958e-05, "loss": 2.9144, "step": 37320 }, { "epoch": 2.536010327490148, "grad_norm": 2.6247620582580566, "learning_rate": 6.831176110884632e-05, "loss": 2.8014, "step": 37325 }, { "epoch": 2.53635004756081, "grad_norm": 2.585573434829712, "learning_rate": 6.830751460796304e-05, "loss": 2.9667, "step": 37330 }, { "epoch": 2.5366897676314717, "grad_norm": 2.6098432540893555, "learning_rate": 6.830326810707976e-05, "loss": 2.9989, "step": 37335 }, { "epoch": 2.5370294877021333, "grad_norm": 2.1431467533111572, "learning_rate": 6.82990216061965e-05, "loss": 2.9876, "step": 37340 }, { "epoch": 2.5373692077727954, "grad_norm": 3.552119493484497, "learning_rate": 6.829477510531322e-05, "loss": 3.1474, "step": 37345 }, { "epoch": 2.537708927843457, "grad_norm": 2.578529119491577, "learning_rate": 6.829052860442995e-05, "loss": 2.688, "step": 37350 }, { "epoch": 2.5380486479141187, "grad_norm": 2.1737754344940186, "learning_rate": 6.828628210354669e-05, "loss": 3.0156, "step": 37355 }, { "epoch": 2.5383883679847807, "grad_norm": 2.2164628505706787, "learning_rate": 6.82820356026634e-05, "loss": 2.8365, "step": 37360 }, { "epoch": 2.5387280880554424, "grad_norm": 2.37174391746521, "learning_rate": 6.827778910178013e-05, "loss": 2.8078, "step": 37365 }, { "epoch": 2.539067808126104, "grad_norm": 3.0881669521331787, "learning_rate": 6.827354260089687e-05, "loss": 2.8677, "step": 37370 }, { "epoch": 2.539407528196766, "grad_norm": 2.3913590908050537, "learning_rate": 6.826929610001359e-05, "loss": 3.0141, "step": 37375 }, { "epoch": 2.5397472482674277, "grad_norm": 2.1972177028656006, "learning_rate": 6.826504959913032e-05, "loss": 2.9044, "step": 37380 }, { "epoch": 2.5400869683380893, "grad_norm": 2.4756932258605957, "learning_rate": 6.826080309824706e-05, "loss": 2.8661, "step": 37385 }, { "epoch": 2.5404266884087514, "grad_norm": 3.1892879009246826, "learning_rate": 6.825655659736377e-05, "loss": 2.966, "step": 37390 }, { "epoch": 2.540766408479413, "grad_norm": 2.1875312328338623, "learning_rate": 6.82523100964805e-05, "loss": 3.0447, "step": 37395 }, { "epoch": 2.5411061285500747, "grad_norm": 2.870303153991699, "learning_rate": 6.824806359559723e-05, "loss": 3.0153, "step": 37400 }, { "epoch": 2.5414458486207367, "grad_norm": 2.278170585632324, "learning_rate": 6.824381709471396e-05, "loss": 2.7114, "step": 37405 }, { "epoch": 2.5417855686913984, "grad_norm": 2.755500316619873, "learning_rate": 6.823957059383068e-05, "loss": 2.8944, "step": 37410 }, { "epoch": 2.54212528876206, "grad_norm": 2.7264719009399414, "learning_rate": 6.823532409294741e-05, "loss": 3.2652, "step": 37415 }, { "epoch": 2.542465008832722, "grad_norm": 2.7291550636291504, "learning_rate": 6.823107759206414e-05, "loss": 3.2136, "step": 37420 }, { "epoch": 2.5428047289033837, "grad_norm": 2.4789066314697266, "learning_rate": 6.822683109118087e-05, "loss": 2.8958, "step": 37425 }, { "epoch": 2.5431444489740453, "grad_norm": 2.3400373458862305, "learning_rate": 6.82225845902976e-05, "loss": 2.9496, "step": 37430 }, { "epoch": 2.5434841690447074, "grad_norm": 3.258683919906616, "learning_rate": 6.821833808941432e-05, "loss": 2.8139, "step": 37435 }, { "epoch": 2.543823889115369, "grad_norm": 2.8406243324279785, "learning_rate": 6.821409158853105e-05, "loss": 2.8072, "step": 37440 }, { "epoch": 2.5441636091860307, "grad_norm": 2.7137792110443115, "learning_rate": 6.820984508764778e-05, "loss": 2.7456, "step": 37445 }, { "epoch": 2.5445033292566928, "grad_norm": 2.475144386291504, "learning_rate": 6.820559858676451e-05, "loss": 2.8004, "step": 37450 }, { "epoch": 2.5448430493273544, "grad_norm": 3.3095340728759766, "learning_rate": 6.820135208588124e-05, "loss": 2.7617, "step": 37455 }, { "epoch": 2.545182769398016, "grad_norm": 2.2609851360321045, "learning_rate": 6.819710558499796e-05, "loss": 2.8164, "step": 37460 }, { "epoch": 2.5455224894686777, "grad_norm": 2.9108874797821045, "learning_rate": 6.819285908411469e-05, "loss": 2.9293, "step": 37465 }, { "epoch": 2.5458622095393397, "grad_norm": 2.991950750350952, "learning_rate": 6.818861258323142e-05, "loss": 2.8873, "step": 37470 }, { "epoch": 2.5462019296100014, "grad_norm": 2.547733783721924, "learning_rate": 6.818436608234815e-05, "loss": 2.6936, "step": 37475 }, { "epoch": 2.546541649680663, "grad_norm": 2.4576239585876465, "learning_rate": 6.818011958146488e-05, "loss": 2.7207, "step": 37480 }, { "epoch": 2.546881369751325, "grad_norm": 2.4056460857391357, "learning_rate": 6.81758730805816e-05, "loss": 3.1119, "step": 37485 }, { "epoch": 2.5472210898219867, "grad_norm": 2.506714105606079, "learning_rate": 6.817162657969833e-05, "loss": 3.185, "step": 37490 }, { "epoch": 2.5475608098926483, "grad_norm": 3.0299837589263916, "learning_rate": 6.816738007881506e-05, "loss": 2.7355, "step": 37495 }, { "epoch": 2.54790052996331, "grad_norm": 2.888084888458252, "learning_rate": 6.816313357793179e-05, "loss": 2.9168, "step": 37500 }, { "epoch": 2.548240250033972, "grad_norm": 2.542440414428711, "learning_rate": 6.815888707704852e-05, "loss": 3.0907, "step": 37505 }, { "epoch": 2.5485799701046337, "grad_norm": 3.1866838932037354, "learning_rate": 6.815464057616524e-05, "loss": 2.9711, "step": 37510 }, { "epoch": 2.5489196901752953, "grad_norm": 2.4200713634490967, "learning_rate": 6.815039407528197e-05, "loss": 2.9092, "step": 37515 }, { "epoch": 2.5492594102459574, "grad_norm": 1.9268187284469604, "learning_rate": 6.81461475743987e-05, "loss": 2.7667, "step": 37520 }, { "epoch": 2.549599130316619, "grad_norm": 3.042440891265869, "learning_rate": 6.814190107351543e-05, "loss": 3.0116, "step": 37525 }, { "epoch": 2.5499388503872806, "grad_norm": 2.2717578411102295, "learning_rate": 6.813765457263216e-05, "loss": 2.8378, "step": 37530 }, { "epoch": 2.5502785704579427, "grad_norm": 2.1049041748046875, "learning_rate": 6.813340807174888e-05, "loss": 2.9582, "step": 37535 }, { "epoch": 2.5506182905286043, "grad_norm": 2.1529700756073, "learning_rate": 6.812916157086561e-05, "loss": 3.0074, "step": 37540 }, { "epoch": 2.550958010599266, "grad_norm": 2.5441761016845703, "learning_rate": 6.812491506998234e-05, "loss": 2.895, "step": 37545 }, { "epoch": 2.551297730669928, "grad_norm": 2.9164888858795166, "learning_rate": 6.812066856909907e-05, "loss": 2.6769, "step": 37550 }, { "epoch": 2.5516374507405897, "grad_norm": 3.096315383911133, "learning_rate": 6.81164220682158e-05, "loss": 2.6371, "step": 37555 }, { "epoch": 2.5519771708112513, "grad_norm": 2.129807472229004, "learning_rate": 6.811217556733252e-05, "loss": 2.5968, "step": 37560 }, { "epoch": 2.5523168908819134, "grad_norm": 2.1301066875457764, "learning_rate": 6.810792906644925e-05, "loss": 2.9003, "step": 37565 }, { "epoch": 2.552656610952575, "grad_norm": 3.2404301166534424, "learning_rate": 6.810368256556598e-05, "loss": 2.8252, "step": 37570 }, { "epoch": 2.5529963310232366, "grad_norm": 2.1729214191436768, "learning_rate": 6.809943606468271e-05, "loss": 2.7925, "step": 37575 }, { "epoch": 2.5533360510938987, "grad_norm": 2.7666208744049072, "learning_rate": 6.809518956379944e-05, "loss": 3.0622, "step": 37580 }, { "epoch": 2.5536757711645603, "grad_norm": 2.252192974090576, "learning_rate": 6.809094306291616e-05, "loss": 2.5479, "step": 37585 }, { "epoch": 2.554015491235222, "grad_norm": 2.6822447776794434, "learning_rate": 6.808669656203289e-05, "loss": 2.714, "step": 37590 }, { "epoch": 2.554355211305884, "grad_norm": 2.504847288131714, "learning_rate": 6.808245006114962e-05, "loss": 2.8245, "step": 37595 }, { "epoch": 2.5546949313765457, "grad_norm": 2.7419333457946777, "learning_rate": 6.807820356026633e-05, "loss": 2.654, "step": 37600 }, { "epoch": 2.5550346514472073, "grad_norm": 1.9716005325317383, "learning_rate": 6.807395705938308e-05, "loss": 2.97, "step": 37605 }, { "epoch": 2.5553743715178694, "grad_norm": 3.2693631649017334, "learning_rate": 6.80697105584998e-05, "loss": 2.888, "step": 37610 }, { "epoch": 2.555714091588531, "grad_norm": 3.284727096557617, "learning_rate": 6.806546405761652e-05, "loss": 2.5229, "step": 37615 }, { "epoch": 2.5560538116591927, "grad_norm": 2.1625843048095703, "learning_rate": 6.806121755673326e-05, "loss": 3.1056, "step": 37620 }, { "epoch": 2.5563935317298547, "grad_norm": 4.007523536682129, "learning_rate": 6.805697105584999e-05, "loss": 3.1011, "step": 37625 }, { "epoch": 2.5567332518005164, "grad_norm": 2.7701921463012695, "learning_rate": 6.80527245549667e-05, "loss": 2.8603, "step": 37630 }, { "epoch": 2.557072971871178, "grad_norm": 2.5270421504974365, "learning_rate": 6.804847805408344e-05, "loss": 3.0333, "step": 37635 }, { "epoch": 2.55741269194184, "grad_norm": 3.3283724784851074, "learning_rate": 6.804423155320017e-05, "loss": 2.619, "step": 37640 }, { "epoch": 2.5577524120125017, "grad_norm": 2.696286678314209, "learning_rate": 6.803998505231689e-05, "loss": 2.9433, "step": 37645 }, { "epoch": 2.5580921320831633, "grad_norm": 2.5282692909240723, "learning_rate": 6.803573855143363e-05, "loss": 2.7431, "step": 37650 }, { "epoch": 2.5584318521538254, "grad_norm": 2.2593133449554443, "learning_rate": 6.803149205055036e-05, "loss": 2.9311, "step": 37655 }, { "epoch": 2.558771572224487, "grad_norm": 2.1333420276641846, "learning_rate": 6.802724554966707e-05, "loss": 2.7823, "step": 37660 }, { "epoch": 2.5591112922951487, "grad_norm": 2.0374886989593506, "learning_rate": 6.802299904878381e-05, "loss": 2.732, "step": 37665 }, { "epoch": 2.5594510123658107, "grad_norm": 2.0871834754943848, "learning_rate": 6.801875254790053e-05, "loss": 2.8647, "step": 37670 }, { "epoch": 2.5597907324364724, "grad_norm": 2.507150888442993, "learning_rate": 6.801450604701725e-05, "loss": 3.0782, "step": 37675 }, { "epoch": 2.560130452507134, "grad_norm": 2.6754543781280518, "learning_rate": 6.8010259546134e-05, "loss": 2.9921, "step": 37680 }, { "epoch": 2.560470172577796, "grad_norm": 2.994729995727539, "learning_rate": 6.800601304525071e-05, "loss": 2.9442, "step": 37685 }, { "epoch": 2.5608098926484577, "grad_norm": 3.0052552223205566, "learning_rate": 6.800176654436744e-05, "loss": 3.0579, "step": 37690 }, { "epoch": 2.5611496127191193, "grad_norm": 2.276484727859497, "learning_rate": 6.799752004348418e-05, "loss": 2.8563, "step": 37695 }, { "epoch": 2.5614893327897814, "grad_norm": 2.4378061294555664, "learning_rate": 6.79932735426009e-05, "loss": 2.8816, "step": 37700 }, { "epoch": 2.561829052860443, "grad_norm": 2.601623296737671, "learning_rate": 6.798902704171762e-05, "loss": 2.8678, "step": 37705 }, { "epoch": 2.5621687729311047, "grad_norm": 2.4671618938446045, "learning_rate": 6.798478054083436e-05, "loss": 2.7786, "step": 37710 }, { "epoch": 2.5625084930017668, "grad_norm": 2.2860820293426514, "learning_rate": 6.798053403995108e-05, "loss": 2.9612, "step": 37715 }, { "epoch": 2.5628482130724284, "grad_norm": 2.632852554321289, "learning_rate": 6.79762875390678e-05, "loss": 2.8748, "step": 37720 }, { "epoch": 2.56318793314309, "grad_norm": 2.6135635375976562, "learning_rate": 6.797204103818455e-05, "loss": 2.6869, "step": 37725 }, { "epoch": 2.563527653213752, "grad_norm": 2.6546952724456787, "learning_rate": 6.796779453730126e-05, "loss": 2.8213, "step": 37730 }, { "epoch": 2.5638673732844137, "grad_norm": 2.6910033226013184, "learning_rate": 6.796354803641799e-05, "loss": 2.9202, "step": 37735 }, { "epoch": 2.5642070933550754, "grad_norm": 2.2304110527038574, "learning_rate": 6.795930153553472e-05, "loss": 2.9653, "step": 37740 }, { "epoch": 2.5645468134257374, "grad_norm": 2.309368371963501, "learning_rate": 6.795505503465145e-05, "loss": 3.0005, "step": 37745 }, { "epoch": 2.564886533496399, "grad_norm": 2.526531934738159, "learning_rate": 6.795080853376817e-05, "loss": 2.747, "step": 37750 }, { "epoch": 2.5652262535670607, "grad_norm": 2.5361368656158447, "learning_rate": 6.79465620328849e-05, "loss": 2.849, "step": 37755 }, { "epoch": 2.5655659736377228, "grad_norm": 2.984708547592163, "learning_rate": 6.794231553200163e-05, "loss": 2.9142, "step": 37760 }, { "epoch": 2.5659056937083844, "grad_norm": 2.6706700325012207, "learning_rate": 6.793806903111836e-05, "loss": 2.8548, "step": 37765 }, { "epoch": 2.566245413779046, "grad_norm": 2.9800949096679688, "learning_rate": 6.793382253023509e-05, "loss": 2.6532, "step": 37770 }, { "epoch": 2.566585133849708, "grad_norm": 2.8505971431732178, "learning_rate": 6.792957602935181e-05, "loss": 2.9988, "step": 37775 }, { "epoch": 2.5669248539203697, "grad_norm": 2.4278013706207275, "learning_rate": 6.792532952846854e-05, "loss": 3.0019, "step": 37780 }, { "epoch": 2.5672645739910314, "grad_norm": 2.3436875343322754, "learning_rate": 6.792108302758527e-05, "loss": 2.6639, "step": 37785 }, { "epoch": 2.5676042940616934, "grad_norm": 2.42673659324646, "learning_rate": 6.7916836526702e-05, "loss": 3.153, "step": 37790 }, { "epoch": 2.567944014132355, "grad_norm": 2.4318907260894775, "learning_rate": 6.791259002581873e-05, "loss": 2.8102, "step": 37795 }, { "epoch": 2.5682837342030167, "grad_norm": 3.4344096183776855, "learning_rate": 6.790834352493545e-05, "loss": 3.1191, "step": 37800 }, { "epoch": 2.5686234542736783, "grad_norm": 2.688859462738037, "learning_rate": 6.790409702405218e-05, "loss": 2.7938, "step": 37805 }, { "epoch": 2.5689631743443404, "grad_norm": 2.0871872901916504, "learning_rate": 6.789985052316892e-05, "loss": 2.9864, "step": 37810 }, { "epoch": 2.569302894415002, "grad_norm": 2.2219603061676025, "learning_rate": 6.789560402228564e-05, "loss": 2.9542, "step": 37815 }, { "epoch": 2.5696426144856637, "grad_norm": 2.6057393550872803, "learning_rate": 6.789135752140237e-05, "loss": 2.9553, "step": 37820 }, { "epoch": 2.5699823345563257, "grad_norm": 2.6241869926452637, "learning_rate": 6.78871110205191e-05, "loss": 2.8869, "step": 37825 }, { "epoch": 2.5703220546269874, "grad_norm": 2.450568437576294, "learning_rate": 6.788286451963582e-05, "loss": 2.4867, "step": 37830 }, { "epoch": 2.570661774697649, "grad_norm": 3.1553847789764404, "learning_rate": 6.787861801875255e-05, "loss": 2.7547, "step": 37835 }, { "epoch": 2.5710014947683106, "grad_norm": 2.7350618839263916, "learning_rate": 6.787437151786928e-05, "loss": 2.7205, "step": 37840 }, { "epoch": 2.5713412148389727, "grad_norm": 3.0714025497436523, "learning_rate": 6.7870125016986e-05, "loss": 2.8067, "step": 37845 }, { "epoch": 2.5716809349096343, "grad_norm": 2.8385627269744873, "learning_rate": 6.786587851610273e-05, "loss": 2.9797, "step": 37850 }, { "epoch": 2.572020654980296, "grad_norm": 2.5253870487213135, "learning_rate": 6.786163201521946e-05, "loss": 3.0123, "step": 37855 }, { "epoch": 2.572360375050958, "grad_norm": 2.074084758758545, "learning_rate": 6.785738551433619e-05, "loss": 2.833, "step": 37860 }, { "epoch": 2.5727000951216197, "grad_norm": 2.756474494934082, "learning_rate": 6.785313901345292e-05, "loss": 3.0785, "step": 37865 }, { "epoch": 2.5730398151922813, "grad_norm": 2.5887820720672607, "learning_rate": 6.784889251256965e-05, "loss": 2.9685, "step": 37870 }, { "epoch": 2.5733795352629434, "grad_norm": 3.175185203552246, "learning_rate": 6.784464601168637e-05, "loss": 2.8345, "step": 37875 }, { "epoch": 2.573719255333605, "grad_norm": 2.823227643966675, "learning_rate": 6.78403995108031e-05, "loss": 2.75, "step": 37880 }, { "epoch": 2.5740589754042666, "grad_norm": 2.7847611904144287, "learning_rate": 6.783615300991983e-05, "loss": 2.8097, "step": 37885 }, { "epoch": 2.5743986954749287, "grad_norm": 2.010211944580078, "learning_rate": 6.783190650903656e-05, "loss": 2.7341, "step": 37890 }, { "epoch": 2.5747384155455904, "grad_norm": 3.187751531600952, "learning_rate": 6.782766000815329e-05, "loss": 2.9511, "step": 37895 }, { "epoch": 2.575078135616252, "grad_norm": 3.0195224285125732, "learning_rate": 6.782341350727001e-05, "loss": 3.0851, "step": 37900 }, { "epoch": 2.575417855686914, "grad_norm": 2.2294766902923584, "learning_rate": 6.781916700638674e-05, "loss": 2.9929, "step": 37905 }, { "epoch": 2.5757575757575757, "grad_norm": 3.1622302532196045, "learning_rate": 6.781492050550347e-05, "loss": 2.9269, "step": 37910 }, { "epoch": 2.5760972958282373, "grad_norm": 2.7007076740264893, "learning_rate": 6.78106740046202e-05, "loss": 2.9615, "step": 37915 }, { "epoch": 2.5764370158988994, "grad_norm": 14.437150001525879, "learning_rate": 6.780642750373693e-05, "loss": 2.9266, "step": 37920 }, { "epoch": 2.576776735969561, "grad_norm": 2.6873276233673096, "learning_rate": 6.780218100285365e-05, "loss": 2.9159, "step": 37925 }, { "epoch": 2.5771164560402227, "grad_norm": 2.3815042972564697, "learning_rate": 6.779793450197038e-05, "loss": 2.6331, "step": 37930 }, { "epoch": 2.5774561761108847, "grad_norm": 3.4629364013671875, "learning_rate": 6.779368800108711e-05, "loss": 2.6978, "step": 37935 }, { "epoch": 2.5777958961815464, "grad_norm": 2.393726110458374, "learning_rate": 6.778944150020383e-05, "loss": 2.8431, "step": 37940 }, { "epoch": 2.578135616252208, "grad_norm": 3.004044771194458, "learning_rate": 6.778519499932057e-05, "loss": 2.6822, "step": 37945 }, { "epoch": 2.57847533632287, "grad_norm": 2.7075695991516113, "learning_rate": 6.77809484984373e-05, "loss": 2.8261, "step": 37950 }, { "epoch": 2.5788150563935317, "grad_norm": 2.4040913581848145, "learning_rate": 6.777670199755401e-05, "loss": 2.7626, "step": 37955 }, { "epoch": 2.5791547764641933, "grad_norm": 2.5677497386932373, "learning_rate": 6.777245549667075e-05, "loss": 2.6485, "step": 37960 }, { "epoch": 2.5794944965348554, "grad_norm": 2.510744571685791, "learning_rate": 6.776820899578748e-05, "loss": 3.1189, "step": 37965 }, { "epoch": 2.579834216605517, "grad_norm": 2.889301300048828, "learning_rate": 6.77639624949042e-05, "loss": 2.6906, "step": 37970 }, { "epoch": 2.5801739366761787, "grad_norm": 2.604011058807373, "learning_rate": 6.775971599402093e-05, "loss": 2.8559, "step": 37975 }, { "epoch": 2.5805136567468407, "grad_norm": 2.4496896266937256, "learning_rate": 6.775546949313766e-05, "loss": 2.855, "step": 37980 }, { "epoch": 2.5808533768175024, "grad_norm": 2.470024585723877, "learning_rate": 6.775122299225438e-05, "loss": 2.7581, "step": 37985 }, { "epoch": 2.581193096888164, "grad_norm": 3.712808132171631, "learning_rate": 6.774697649137112e-05, "loss": 2.8977, "step": 37990 }, { "epoch": 2.581532816958826, "grad_norm": 2.251918077468872, "learning_rate": 6.774272999048785e-05, "loss": 2.964, "step": 37995 }, { "epoch": 2.5818725370294877, "grad_norm": 3.016146421432495, "learning_rate": 6.773848348960456e-05, "loss": 2.7017, "step": 38000 }, { "epoch": 2.5822122571001493, "grad_norm": 3.2210898399353027, "learning_rate": 6.77342369887213e-05, "loss": 2.7479, "step": 38005 }, { "epoch": 2.5825519771708114, "grad_norm": 2.1722893714904785, "learning_rate": 6.772999048783803e-05, "loss": 3.1732, "step": 38010 }, { "epoch": 2.582891697241473, "grad_norm": 2.638812303543091, "learning_rate": 6.772574398695475e-05, "loss": 3.0639, "step": 38015 }, { "epoch": 2.5832314173121347, "grad_norm": 2.6575958728790283, "learning_rate": 6.772149748607149e-05, "loss": 3.1536, "step": 38020 }, { "epoch": 2.5835711373827968, "grad_norm": 2.1818037033081055, "learning_rate": 6.77172509851882e-05, "loss": 2.8977, "step": 38025 }, { "epoch": 2.5839108574534584, "grad_norm": 2.4179983139038086, "learning_rate": 6.771300448430493e-05, "loss": 2.8236, "step": 38030 }, { "epoch": 2.58425057752412, "grad_norm": 3.028856039047241, "learning_rate": 6.770875798342167e-05, "loss": 2.7606, "step": 38035 }, { "epoch": 2.584590297594782, "grad_norm": 2.811190366744995, "learning_rate": 6.770451148253839e-05, "loss": 3.1478, "step": 38040 }, { "epoch": 2.5849300176654437, "grad_norm": 2.4055917263031006, "learning_rate": 6.770026498165511e-05, "loss": 2.9504, "step": 38045 }, { "epoch": 2.5852697377361054, "grad_norm": 2.4768154621124268, "learning_rate": 6.769601848077186e-05, "loss": 2.7994, "step": 38050 }, { "epoch": 2.5856094578067674, "grad_norm": 2.3764660358428955, "learning_rate": 6.769177197988857e-05, "loss": 2.9768, "step": 38055 }, { "epoch": 2.585949177877429, "grad_norm": 2.5303773880004883, "learning_rate": 6.76875254790053e-05, "loss": 2.9305, "step": 38060 }, { "epoch": 2.5862888979480907, "grad_norm": 3.078676462173462, "learning_rate": 6.768327897812204e-05, "loss": 2.9563, "step": 38065 }, { "epoch": 2.5866286180187528, "grad_norm": 2.204348087310791, "learning_rate": 6.767903247723875e-05, "loss": 3.1187, "step": 38070 }, { "epoch": 2.5869683380894144, "grad_norm": 2.418675422668457, "learning_rate": 6.767478597635548e-05, "loss": 2.995, "step": 38075 }, { "epoch": 2.587308058160076, "grad_norm": 2.4261486530303955, "learning_rate": 6.767053947547222e-05, "loss": 2.812, "step": 38080 }, { "epoch": 2.587647778230738, "grad_norm": 2.001694679260254, "learning_rate": 6.766629297458894e-05, "loss": 2.926, "step": 38085 }, { "epoch": 2.5879874983013997, "grad_norm": 2.7596981525421143, "learning_rate": 6.766204647370567e-05, "loss": 2.8651, "step": 38090 }, { "epoch": 2.5883272183720614, "grad_norm": 2.5010128021240234, "learning_rate": 6.76577999728224e-05, "loss": 2.8189, "step": 38095 }, { "epoch": 2.5886669384427234, "grad_norm": 2.063835382461548, "learning_rate": 6.765355347193912e-05, "loss": 2.8352, "step": 38100 }, { "epoch": 2.589006658513385, "grad_norm": 2.7058346271514893, "learning_rate": 6.764930697105585e-05, "loss": 2.9535, "step": 38105 }, { "epoch": 2.5893463785840467, "grad_norm": 3.634355068206787, "learning_rate": 6.764506047017258e-05, "loss": 3.106, "step": 38110 }, { "epoch": 2.589686098654709, "grad_norm": 3.119853973388672, "learning_rate": 6.76408139692893e-05, "loss": 2.9532, "step": 38115 }, { "epoch": 2.5900258187253704, "grad_norm": 3.111384153366089, "learning_rate": 6.763656746840603e-05, "loss": 2.7549, "step": 38120 }, { "epoch": 2.590365538796032, "grad_norm": 2.7534067630767822, "learning_rate": 6.763232096752276e-05, "loss": 2.9623, "step": 38125 }, { "epoch": 2.590705258866694, "grad_norm": 3.2579431533813477, "learning_rate": 6.762807446663949e-05, "loss": 2.8762, "step": 38130 }, { "epoch": 2.5910449789373557, "grad_norm": 2.918245315551758, "learning_rate": 6.762382796575622e-05, "loss": 2.8607, "step": 38135 }, { "epoch": 2.5913846990080174, "grad_norm": 3.2122600078582764, "learning_rate": 6.761958146487295e-05, "loss": 2.7979, "step": 38140 }, { "epoch": 2.591724419078679, "grad_norm": 2.9006221294403076, "learning_rate": 6.761533496398967e-05, "loss": 3.1179, "step": 38145 }, { "epoch": 2.592064139149341, "grad_norm": 3.2954087257385254, "learning_rate": 6.761108846310642e-05, "loss": 2.8831, "step": 38150 }, { "epoch": 2.5924038592200027, "grad_norm": 2.377049446105957, "learning_rate": 6.760684196222313e-05, "loss": 2.8573, "step": 38155 }, { "epoch": 2.5927435792906643, "grad_norm": 2.7391598224639893, "learning_rate": 6.760259546133986e-05, "loss": 2.9053, "step": 38160 }, { "epoch": 2.5930832993613264, "grad_norm": 2.282212018966675, "learning_rate": 6.759834896045659e-05, "loss": 2.8421, "step": 38165 }, { "epoch": 2.593423019431988, "grad_norm": 2.8099169731140137, "learning_rate": 6.759410245957331e-05, "loss": 2.7378, "step": 38170 }, { "epoch": 2.5937627395026497, "grad_norm": 2.8606815338134766, "learning_rate": 6.758985595869004e-05, "loss": 2.596, "step": 38175 }, { "epoch": 2.5941024595733113, "grad_norm": 2.3433115482330322, "learning_rate": 6.758560945780677e-05, "loss": 2.919, "step": 38180 }, { "epoch": 2.5944421796439734, "grad_norm": 2.245513439178467, "learning_rate": 6.75813629569235e-05, "loss": 2.6823, "step": 38185 }, { "epoch": 2.594781899714635, "grad_norm": 2.751593589782715, "learning_rate": 6.757711645604023e-05, "loss": 2.7134, "step": 38190 }, { "epoch": 2.5951216197852967, "grad_norm": 3.0579206943511963, "learning_rate": 6.757286995515695e-05, "loss": 2.9138, "step": 38195 }, { "epoch": 2.5954613398559587, "grad_norm": 2.687041997909546, "learning_rate": 6.756862345427368e-05, "loss": 2.7101, "step": 38200 }, { "epoch": 2.5958010599266204, "grad_norm": 2.178497076034546, "learning_rate": 6.756437695339041e-05, "loss": 2.8123, "step": 38205 }, { "epoch": 2.596140779997282, "grad_norm": 2.499065399169922, "learning_rate": 6.756013045250714e-05, "loss": 2.9889, "step": 38210 }, { "epoch": 2.596480500067944, "grad_norm": 2.8728232383728027, "learning_rate": 6.755588395162387e-05, "loss": 2.7526, "step": 38215 }, { "epoch": 2.5968202201386057, "grad_norm": 2.4300577640533447, "learning_rate": 6.75516374507406e-05, "loss": 2.8078, "step": 38220 }, { "epoch": 2.5971599402092673, "grad_norm": 2.4517829418182373, "learning_rate": 6.754739094985732e-05, "loss": 2.7838, "step": 38225 }, { "epoch": 2.5974996602799294, "grad_norm": 2.265532970428467, "learning_rate": 6.754314444897405e-05, "loss": 2.9336, "step": 38230 }, { "epoch": 2.597839380350591, "grad_norm": 2.3643155097961426, "learning_rate": 6.753889794809078e-05, "loss": 2.985, "step": 38235 }, { "epoch": 2.5981791004212527, "grad_norm": 2.6102828979492188, "learning_rate": 6.75346514472075e-05, "loss": 2.9066, "step": 38240 }, { "epoch": 2.5985188204919147, "grad_norm": 2.542153835296631, "learning_rate": 6.753040494632423e-05, "loss": 2.946, "step": 38245 }, { "epoch": 2.5988585405625764, "grad_norm": 2.457089900970459, "learning_rate": 6.752615844544096e-05, "loss": 3.1301, "step": 38250 }, { "epoch": 2.599198260633238, "grad_norm": 2.5255632400512695, "learning_rate": 6.752191194455769e-05, "loss": 2.9549, "step": 38255 }, { "epoch": 2.5995379807039, "grad_norm": 2.49082088470459, "learning_rate": 6.751766544367442e-05, "loss": 2.9772, "step": 38260 }, { "epoch": 2.5998777007745617, "grad_norm": 2.6122875213623047, "learning_rate": 6.751341894279115e-05, "loss": 2.7394, "step": 38265 }, { "epoch": 2.6002174208452233, "grad_norm": 2.6915297508239746, "learning_rate": 6.750917244190787e-05, "loss": 3.0776, "step": 38270 }, { "epoch": 2.6005571409158854, "grad_norm": 2.103724718093872, "learning_rate": 6.75049259410246e-05, "loss": 2.6891, "step": 38275 }, { "epoch": 2.600896860986547, "grad_norm": 2.5344247817993164, "learning_rate": 6.750067944014133e-05, "loss": 2.9183, "step": 38280 }, { "epoch": 2.6012365810572087, "grad_norm": 2.2719497680664062, "learning_rate": 6.749643293925806e-05, "loss": 3.0495, "step": 38285 }, { "epoch": 2.6015763011278707, "grad_norm": 3.199738025665283, "learning_rate": 6.749218643837479e-05, "loss": 2.7406, "step": 38290 }, { "epoch": 2.6019160211985324, "grad_norm": 2.533522844314575, "learning_rate": 6.74879399374915e-05, "loss": 2.809, "step": 38295 }, { "epoch": 2.602255741269194, "grad_norm": 3.2273240089416504, "learning_rate": 6.748369343660824e-05, "loss": 2.9993, "step": 38300 }, { "epoch": 2.602595461339856, "grad_norm": 3.299168109893799, "learning_rate": 6.747944693572497e-05, "loss": 2.7073, "step": 38305 }, { "epoch": 2.6029351814105177, "grad_norm": 2.1161553859710693, "learning_rate": 6.747520043484168e-05, "loss": 2.7851, "step": 38310 }, { "epoch": 2.6032749014811793, "grad_norm": 3.119278907775879, "learning_rate": 6.747095393395843e-05, "loss": 2.6906, "step": 38315 }, { "epoch": 2.6036146215518414, "grad_norm": 2.4718985557556152, "learning_rate": 6.746670743307515e-05, "loss": 2.9861, "step": 38320 }, { "epoch": 2.603954341622503, "grad_norm": 2.9431488513946533, "learning_rate": 6.746246093219187e-05, "loss": 3.0868, "step": 38325 }, { "epoch": 2.6042940616931647, "grad_norm": 2.153566360473633, "learning_rate": 6.745821443130861e-05, "loss": 3.1163, "step": 38330 }, { "epoch": 2.6046337817638268, "grad_norm": 2.363375186920166, "learning_rate": 6.745396793042534e-05, "loss": 2.9746, "step": 38335 }, { "epoch": 2.6049735018344884, "grad_norm": 3.1366095542907715, "learning_rate": 6.744972142954205e-05, "loss": 2.9438, "step": 38340 }, { "epoch": 2.60531322190515, "grad_norm": 2.321437120437622, "learning_rate": 6.74454749286588e-05, "loss": 2.8652, "step": 38345 }, { "epoch": 2.605652941975812, "grad_norm": 3.460543155670166, "learning_rate": 6.744122842777552e-05, "loss": 2.7299, "step": 38350 }, { "epoch": 2.6059926620464737, "grad_norm": 3.3095545768737793, "learning_rate": 6.743698192689224e-05, "loss": 2.7622, "step": 38355 }, { "epoch": 2.6063323821171354, "grad_norm": 3.4358859062194824, "learning_rate": 6.743273542600898e-05, "loss": 2.8424, "step": 38360 }, { "epoch": 2.6066721021877974, "grad_norm": 3.171118974685669, "learning_rate": 6.742848892512569e-05, "loss": 2.7078, "step": 38365 }, { "epoch": 2.607011822258459, "grad_norm": 2.1339268684387207, "learning_rate": 6.742424242424242e-05, "loss": 3.101, "step": 38370 }, { "epoch": 2.6073515423291207, "grad_norm": 2.4946935176849365, "learning_rate": 6.741999592335916e-05, "loss": 3.3163, "step": 38375 }, { "epoch": 2.6076912623997828, "grad_norm": 2.467456817626953, "learning_rate": 6.741574942247588e-05, "loss": 2.9841, "step": 38380 }, { "epoch": 2.6080309824704444, "grad_norm": 2.4423015117645264, "learning_rate": 6.74115029215926e-05, "loss": 2.8888, "step": 38385 }, { "epoch": 2.608370702541106, "grad_norm": 2.0815627574920654, "learning_rate": 6.740725642070935e-05, "loss": 2.8606, "step": 38390 }, { "epoch": 2.608710422611768, "grad_norm": 2.246143102645874, "learning_rate": 6.740300991982606e-05, "loss": 3.2063, "step": 38395 }, { "epoch": 2.6090501426824297, "grad_norm": 2.0039048194885254, "learning_rate": 6.739876341894279e-05, "loss": 3.1321, "step": 38400 }, { "epoch": 2.6093898627530914, "grad_norm": 2.4745893478393555, "learning_rate": 6.739451691805953e-05, "loss": 2.8916, "step": 38405 }, { "epoch": 2.6097295828237534, "grad_norm": 2.5843822956085205, "learning_rate": 6.739027041717624e-05, "loss": 2.9398, "step": 38410 }, { "epoch": 2.610069302894415, "grad_norm": 2.304074287414551, "learning_rate": 6.738602391629297e-05, "loss": 2.8772, "step": 38415 }, { "epoch": 2.6104090229650767, "grad_norm": 2.4494616985321045, "learning_rate": 6.738177741540971e-05, "loss": 2.7535, "step": 38420 }, { "epoch": 2.610748743035739, "grad_norm": 2.67423677444458, "learning_rate": 6.737753091452643e-05, "loss": 2.9351, "step": 38425 }, { "epoch": 2.6110884631064004, "grad_norm": 2.3614962100982666, "learning_rate": 6.737328441364316e-05, "loss": 2.6543, "step": 38430 }, { "epoch": 2.611428183177062, "grad_norm": 2.4679925441741943, "learning_rate": 6.73690379127599e-05, "loss": 2.5482, "step": 38435 }, { "epoch": 2.611767903247724, "grad_norm": 2.9054107666015625, "learning_rate": 6.736479141187661e-05, "loss": 2.8471, "step": 38440 }, { "epoch": 2.6121076233183858, "grad_norm": 2.903245687484741, "learning_rate": 6.736054491099334e-05, "loss": 2.7201, "step": 38445 }, { "epoch": 2.6124473433890474, "grad_norm": 2.0583713054656982, "learning_rate": 6.735629841011007e-05, "loss": 2.8769, "step": 38450 }, { "epoch": 2.6127870634597095, "grad_norm": 2.653325080871582, "learning_rate": 6.73520519092268e-05, "loss": 3.023, "step": 38455 }, { "epoch": 2.613126783530371, "grad_norm": 2.5004913806915283, "learning_rate": 6.734780540834352e-05, "loss": 2.9097, "step": 38460 }, { "epoch": 2.6134665036010327, "grad_norm": 2.437610387802124, "learning_rate": 6.734355890746025e-05, "loss": 2.9482, "step": 38465 }, { "epoch": 2.613806223671695, "grad_norm": 3.140174388885498, "learning_rate": 6.733931240657698e-05, "loss": 2.8661, "step": 38470 }, { "epoch": 2.6141459437423564, "grad_norm": 2.6310250759124756, "learning_rate": 6.733506590569371e-05, "loss": 2.7438, "step": 38475 }, { "epoch": 2.614485663813018, "grad_norm": 3.076601505279541, "learning_rate": 6.733081940481044e-05, "loss": 3.1612, "step": 38480 }, { "epoch": 2.6148253838836797, "grad_norm": 2.43564772605896, "learning_rate": 6.732657290392716e-05, "loss": 2.8918, "step": 38485 }, { "epoch": 2.6151651039543418, "grad_norm": 2.933253049850464, "learning_rate": 6.73223264030439e-05, "loss": 2.6901, "step": 38490 }, { "epoch": 2.6155048240250034, "grad_norm": 2.7124884128570557, "learning_rate": 6.731807990216062e-05, "loss": 3.0068, "step": 38495 }, { "epoch": 2.615844544095665, "grad_norm": 3.264448881149292, "learning_rate": 6.731383340127735e-05, "loss": 2.7429, "step": 38500 }, { "epoch": 2.616184264166327, "grad_norm": 2.2883338928222656, "learning_rate": 6.730958690039409e-05, "loss": 2.6133, "step": 38505 }, { "epoch": 2.6165239842369887, "grad_norm": 3.475515842437744, "learning_rate": 6.73053403995108e-05, "loss": 2.7335, "step": 38510 }, { "epoch": 2.6168637043076504, "grad_norm": 3.464359760284424, "learning_rate": 6.730109389862753e-05, "loss": 2.5843, "step": 38515 }, { "epoch": 2.617203424378312, "grad_norm": 2.8729920387268066, "learning_rate": 6.729684739774426e-05, "loss": 2.9148, "step": 38520 }, { "epoch": 2.617543144448974, "grad_norm": 2.8280208110809326, "learning_rate": 6.729260089686099e-05, "loss": 2.8434, "step": 38525 }, { "epoch": 2.6178828645196357, "grad_norm": 3.0193166732788086, "learning_rate": 6.728835439597772e-05, "loss": 2.827, "step": 38530 }, { "epoch": 2.6182225845902973, "grad_norm": 1.807002305984497, "learning_rate": 6.728410789509444e-05, "loss": 2.6169, "step": 38535 }, { "epoch": 2.6185623046609594, "grad_norm": 1.8771253824234009, "learning_rate": 6.727986139421117e-05, "loss": 2.9248, "step": 38540 }, { "epoch": 2.618902024731621, "grad_norm": 2.406872510910034, "learning_rate": 6.72756148933279e-05, "loss": 2.8825, "step": 38545 }, { "epoch": 2.6192417448022827, "grad_norm": 2.3661530017852783, "learning_rate": 6.727136839244463e-05, "loss": 2.8771, "step": 38550 }, { "epoch": 2.6195814648729447, "grad_norm": 2.643105983734131, "learning_rate": 6.726712189156136e-05, "loss": 2.9863, "step": 38555 }, { "epoch": 2.6199211849436064, "grad_norm": 2.929593086242676, "learning_rate": 6.726287539067808e-05, "loss": 2.763, "step": 38560 }, { "epoch": 2.620260905014268, "grad_norm": 2.7095017433166504, "learning_rate": 6.725862888979481e-05, "loss": 2.6926, "step": 38565 }, { "epoch": 2.62060062508493, "grad_norm": 2.4646315574645996, "learning_rate": 6.725438238891154e-05, "loss": 3.0921, "step": 38570 }, { "epoch": 2.6209403451555917, "grad_norm": 2.567578077316284, "learning_rate": 6.725013588802827e-05, "loss": 2.8483, "step": 38575 }, { "epoch": 2.6212800652262533, "grad_norm": 2.0502731800079346, "learning_rate": 6.7245889387145e-05, "loss": 3.0149, "step": 38580 }, { "epoch": 2.6216197852969154, "grad_norm": 2.6412718296051025, "learning_rate": 6.724164288626172e-05, "loss": 2.9639, "step": 38585 }, { "epoch": 2.621959505367577, "grad_norm": 2.7631053924560547, "learning_rate": 6.723739638537845e-05, "loss": 2.9052, "step": 38590 }, { "epoch": 2.6222992254382387, "grad_norm": 3.7648298740386963, "learning_rate": 6.723314988449518e-05, "loss": 2.8673, "step": 38595 }, { "epoch": 2.6226389455089008, "grad_norm": 2.6479198932647705, "learning_rate": 6.722890338361191e-05, "loss": 2.8589, "step": 38600 }, { "epoch": 2.6229786655795624, "grad_norm": 2.4731521606445312, "learning_rate": 6.722465688272864e-05, "loss": 2.9984, "step": 38605 }, { "epoch": 2.623318385650224, "grad_norm": 2.864417552947998, "learning_rate": 6.722041038184536e-05, "loss": 2.9199, "step": 38610 }, { "epoch": 2.623658105720886, "grad_norm": 2.4752018451690674, "learning_rate": 6.721616388096209e-05, "loss": 2.8037, "step": 38615 }, { "epoch": 2.6239978257915477, "grad_norm": 3.153531789779663, "learning_rate": 6.721191738007882e-05, "loss": 2.8538, "step": 38620 }, { "epoch": 2.6243375458622094, "grad_norm": 2.8109240531921387, "learning_rate": 6.720767087919555e-05, "loss": 3.219, "step": 38625 }, { "epoch": 2.6246772659328714, "grad_norm": 2.462411642074585, "learning_rate": 6.720342437831228e-05, "loss": 3.1663, "step": 38630 }, { "epoch": 2.625016986003533, "grad_norm": 2.7495129108428955, "learning_rate": 6.7199177877429e-05, "loss": 3.1242, "step": 38635 }, { "epoch": 2.6253567060741947, "grad_norm": 3.2862045764923096, "learning_rate": 6.719493137654573e-05, "loss": 2.8801, "step": 38640 }, { "epoch": 2.6256964261448568, "grad_norm": 2.287105083465576, "learning_rate": 6.719068487566246e-05, "loss": 3.0822, "step": 38645 }, { "epoch": 2.6260361462155184, "grad_norm": 2.5567684173583984, "learning_rate": 6.718643837477918e-05, "loss": 3.1454, "step": 38650 }, { "epoch": 2.62637586628618, "grad_norm": 2.6903908252716064, "learning_rate": 6.718219187389592e-05, "loss": 2.4761, "step": 38655 }, { "epoch": 2.626715586356842, "grad_norm": 2.82474684715271, "learning_rate": 6.717794537301264e-05, "loss": 2.7498, "step": 38660 }, { "epoch": 2.6270553064275037, "grad_norm": 2.447014331817627, "learning_rate": 6.717369887212936e-05, "loss": 2.7338, "step": 38665 }, { "epoch": 2.6273950264981654, "grad_norm": 3.1563055515289307, "learning_rate": 6.71694523712461e-05, "loss": 2.9259, "step": 38670 }, { "epoch": 2.6277347465688274, "grad_norm": 3.7830159664154053, "learning_rate": 6.716520587036283e-05, "loss": 2.5403, "step": 38675 }, { "epoch": 2.628074466639489, "grad_norm": 3.1503515243530273, "learning_rate": 6.716095936947954e-05, "loss": 2.8889, "step": 38680 }, { "epoch": 2.6284141867101507, "grad_norm": 2.388890266418457, "learning_rate": 6.715671286859628e-05, "loss": 3.1028, "step": 38685 }, { "epoch": 2.6287539067808128, "grad_norm": 2.22987961769104, "learning_rate": 6.715246636771301e-05, "loss": 2.9653, "step": 38690 }, { "epoch": 2.6290936268514744, "grad_norm": 2.5909998416900635, "learning_rate": 6.714821986682973e-05, "loss": 2.8676, "step": 38695 }, { "epoch": 2.629433346922136, "grad_norm": 2.1347615718841553, "learning_rate": 6.714397336594647e-05, "loss": 2.8525, "step": 38700 }, { "epoch": 2.629773066992798, "grad_norm": 3.0845413208007812, "learning_rate": 6.71397268650632e-05, "loss": 2.9472, "step": 38705 }, { "epoch": 2.6301127870634597, "grad_norm": 1.6901551485061646, "learning_rate": 6.713548036417991e-05, "loss": 3.1451, "step": 38710 }, { "epoch": 2.6304525071341214, "grad_norm": 3.682664632797241, "learning_rate": 6.713123386329665e-05, "loss": 2.7474, "step": 38715 }, { "epoch": 2.6307922272047835, "grad_norm": 2.9111549854278564, "learning_rate": 6.712698736241337e-05, "loss": 2.921, "step": 38720 }, { "epoch": 2.631131947275445, "grad_norm": 2.3598098754882812, "learning_rate": 6.71227408615301e-05, "loss": 2.8633, "step": 38725 }, { "epoch": 2.6314716673461067, "grad_norm": 2.8214187622070312, "learning_rate": 6.711849436064684e-05, "loss": 2.8342, "step": 38730 }, { "epoch": 2.631811387416769, "grad_norm": 2.5833311080932617, "learning_rate": 6.711424785976355e-05, "loss": 2.7921, "step": 38735 }, { "epoch": 2.6321511074874304, "grad_norm": 2.792243480682373, "learning_rate": 6.711000135888028e-05, "loss": 2.8511, "step": 38740 }, { "epoch": 2.632490827558092, "grad_norm": 3.159402370452881, "learning_rate": 6.710575485799702e-05, "loss": 2.9135, "step": 38745 }, { "epoch": 2.632830547628754, "grad_norm": 2.572770595550537, "learning_rate": 6.710150835711374e-05, "loss": 2.8951, "step": 38750 }, { "epoch": 2.6331702676994158, "grad_norm": 3.0543746948242188, "learning_rate": 6.709726185623046e-05, "loss": 2.6931, "step": 38755 }, { "epoch": 2.6335099877700774, "grad_norm": 2.811274528503418, "learning_rate": 6.70930153553472e-05, "loss": 2.8557, "step": 38760 }, { "epoch": 2.6338497078407395, "grad_norm": 2.4028146266937256, "learning_rate": 6.708876885446392e-05, "loss": 2.8453, "step": 38765 }, { "epoch": 2.634189427911401, "grad_norm": 3.2126615047454834, "learning_rate": 6.708452235358065e-05, "loss": 2.9768, "step": 38770 }, { "epoch": 2.6345291479820627, "grad_norm": 2.4542951583862305, "learning_rate": 6.708027585269739e-05, "loss": 2.9779, "step": 38775 }, { "epoch": 2.634868868052725, "grad_norm": 2.292292594909668, "learning_rate": 6.70760293518141e-05, "loss": 3.0735, "step": 38780 }, { "epoch": 2.6352085881233864, "grad_norm": 3.0019266605377197, "learning_rate": 6.707178285093083e-05, "loss": 3.1685, "step": 38785 }, { "epoch": 2.635548308194048, "grad_norm": 3.292145013809204, "learning_rate": 6.706753635004756e-05, "loss": 2.9841, "step": 38790 }, { "epoch": 2.63588802826471, "grad_norm": 2.629441976547241, "learning_rate": 6.706328984916429e-05, "loss": 2.8594, "step": 38795 }, { "epoch": 2.6362277483353718, "grad_norm": 3.0574731826782227, "learning_rate": 6.705904334828102e-05, "loss": 2.8771, "step": 38800 }, { "epoch": 2.6365674684060334, "grad_norm": 2.6779141426086426, "learning_rate": 6.705479684739774e-05, "loss": 2.8473, "step": 38805 }, { "epoch": 2.6369071884766955, "grad_norm": 3.4501090049743652, "learning_rate": 6.705055034651447e-05, "loss": 2.6921, "step": 38810 }, { "epoch": 2.637246908547357, "grad_norm": 2.730074882507324, "learning_rate": 6.70463038456312e-05, "loss": 2.8, "step": 38815 }, { "epoch": 2.6375866286180187, "grad_norm": 2.9338693618774414, "learning_rate": 6.704205734474793e-05, "loss": 3.1308, "step": 38820 }, { "epoch": 2.6379263486886804, "grad_norm": 3.162263870239258, "learning_rate": 6.703781084386466e-05, "loss": 2.9966, "step": 38825 }, { "epoch": 2.6382660687593424, "grad_norm": 2.225322961807251, "learning_rate": 6.70335643429814e-05, "loss": 2.9601, "step": 38830 }, { "epoch": 2.638605788830004, "grad_norm": 2.6248650550842285, "learning_rate": 6.702931784209811e-05, "loss": 3.0551, "step": 38835 }, { "epoch": 2.6389455089006657, "grad_norm": 2.3626461029052734, "learning_rate": 6.702507134121484e-05, "loss": 2.7492, "step": 38840 }, { "epoch": 2.639285228971328, "grad_norm": 2.080854654312134, "learning_rate": 6.702082484033158e-05, "loss": 2.8699, "step": 38845 }, { "epoch": 2.6396249490419894, "grad_norm": 3.29331374168396, "learning_rate": 6.70165783394483e-05, "loss": 2.7356, "step": 38850 }, { "epoch": 2.639964669112651, "grad_norm": 2.3498892784118652, "learning_rate": 6.701233183856502e-05, "loss": 2.9322, "step": 38855 }, { "epoch": 2.640304389183313, "grad_norm": 2.7081191539764404, "learning_rate": 6.700808533768177e-05, "loss": 2.8274, "step": 38860 }, { "epoch": 2.6406441092539747, "grad_norm": 2.415950298309326, "learning_rate": 6.700383883679848e-05, "loss": 2.9285, "step": 38865 }, { "epoch": 2.6409838293246364, "grad_norm": 2.1954262256622314, "learning_rate": 6.699959233591521e-05, "loss": 2.7791, "step": 38870 }, { "epoch": 2.641323549395298, "grad_norm": 2.9754018783569336, "learning_rate": 6.699534583503194e-05, "loss": 3.0383, "step": 38875 }, { "epoch": 2.64166326946596, "grad_norm": 2.1076395511627197, "learning_rate": 6.699109933414866e-05, "loss": 2.8066, "step": 38880 }, { "epoch": 2.6420029895366217, "grad_norm": 2.6439709663391113, "learning_rate": 6.698685283326539e-05, "loss": 3.1457, "step": 38885 }, { "epoch": 2.6423427096072833, "grad_norm": 2.458111047744751, "learning_rate": 6.698260633238212e-05, "loss": 3.0301, "step": 38890 }, { "epoch": 2.6426824296779454, "grad_norm": 2.8658201694488525, "learning_rate": 6.697835983149885e-05, "loss": 2.9865, "step": 38895 }, { "epoch": 2.643022149748607, "grad_norm": 2.4706296920776367, "learning_rate": 6.697411333061558e-05, "loss": 2.5321, "step": 38900 }, { "epoch": 2.6433618698192687, "grad_norm": 2.8461296558380127, "learning_rate": 6.69698668297323e-05, "loss": 2.5973, "step": 38905 }, { "epoch": 2.6437015898899308, "grad_norm": 2.2155938148498535, "learning_rate": 6.696562032884903e-05, "loss": 2.8131, "step": 38910 }, { "epoch": 2.6440413099605924, "grad_norm": 2.2939932346343994, "learning_rate": 6.696137382796576e-05, "loss": 2.8855, "step": 38915 }, { "epoch": 2.644381030031254, "grad_norm": 2.662064552307129, "learning_rate": 6.695712732708249e-05, "loss": 3.1327, "step": 38920 }, { "epoch": 2.644720750101916, "grad_norm": 2.43733549118042, "learning_rate": 6.695288082619922e-05, "loss": 2.6741, "step": 38925 }, { "epoch": 2.6450604701725777, "grad_norm": 2.6043355464935303, "learning_rate": 6.694863432531594e-05, "loss": 2.5818, "step": 38930 }, { "epoch": 2.6454001902432394, "grad_norm": 2.6044507026672363, "learning_rate": 6.694438782443267e-05, "loss": 2.7464, "step": 38935 }, { "epoch": 2.6457399103139014, "grad_norm": 1.9855369329452515, "learning_rate": 6.69401413235494e-05, "loss": 2.6782, "step": 38940 }, { "epoch": 2.646079630384563, "grad_norm": 2.074866771697998, "learning_rate": 6.693589482266613e-05, "loss": 3.0467, "step": 38945 }, { "epoch": 2.6464193504552247, "grad_norm": 2.960725784301758, "learning_rate": 6.693164832178286e-05, "loss": 2.7033, "step": 38950 }, { "epoch": 2.6467590705258868, "grad_norm": 2.5412089824676514, "learning_rate": 6.692740182089958e-05, "loss": 2.8193, "step": 38955 }, { "epoch": 2.6470987905965484, "grad_norm": 2.6029062271118164, "learning_rate": 6.692315532001631e-05, "loss": 2.6387, "step": 38960 }, { "epoch": 2.64743851066721, "grad_norm": 3.0253963470458984, "learning_rate": 6.691890881913304e-05, "loss": 2.7336, "step": 38965 }, { "epoch": 2.647778230737872, "grad_norm": 2.6294069290161133, "learning_rate": 6.691466231824977e-05, "loss": 2.9274, "step": 38970 }, { "epoch": 2.6481179508085337, "grad_norm": 2.2256550788879395, "learning_rate": 6.69104158173665e-05, "loss": 2.8639, "step": 38975 }, { "epoch": 2.6484576708791954, "grad_norm": 2.545598268508911, "learning_rate": 6.690616931648322e-05, "loss": 3.0086, "step": 38980 }, { "epoch": 2.6487973909498574, "grad_norm": 2.6093497276306152, "learning_rate": 6.690192281559995e-05, "loss": 2.8188, "step": 38985 }, { "epoch": 2.649137111020519, "grad_norm": 2.6682939529418945, "learning_rate": 6.689767631471667e-05, "loss": 2.9122, "step": 38990 }, { "epoch": 2.6494768310911807, "grad_norm": 3.0411627292633057, "learning_rate": 6.689342981383341e-05, "loss": 2.778, "step": 38995 }, { "epoch": 2.649816551161843, "grad_norm": 2.6106839179992676, "learning_rate": 6.688918331295014e-05, "loss": 3.0235, "step": 39000 }, { "epoch": 2.6501562712325044, "grad_norm": 2.7723851203918457, "learning_rate": 6.688493681206685e-05, "loss": 2.5967, "step": 39005 }, { "epoch": 2.650495991303166, "grad_norm": 2.8451755046844482, "learning_rate": 6.688069031118359e-05, "loss": 3.0753, "step": 39010 }, { "epoch": 2.650835711373828, "grad_norm": 2.368204355239868, "learning_rate": 6.687644381030032e-05, "loss": 2.9082, "step": 39015 }, { "epoch": 2.6511754314444897, "grad_norm": 2.9391934871673584, "learning_rate": 6.687219730941703e-05, "loss": 2.6594, "step": 39020 }, { "epoch": 2.6515151515151514, "grad_norm": 10.034318923950195, "learning_rate": 6.686795080853378e-05, "loss": 2.9751, "step": 39025 }, { "epoch": 2.6518548715858135, "grad_norm": 2.6097140312194824, "learning_rate": 6.68637043076505e-05, "loss": 2.952, "step": 39030 }, { "epoch": 2.652194591656475, "grad_norm": 2.646087169647217, "learning_rate": 6.685945780676722e-05, "loss": 3.069, "step": 39035 }, { "epoch": 2.6525343117271367, "grad_norm": 3.207259178161621, "learning_rate": 6.685521130588396e-05, "loss": 3.0337, "step": 39040 }, { "epoch": 2.652874031797799, "grad_norm": 2.8996167182922363, "learning_rate": 6.685096480500069e-05, "loss": 2.8535, "step": 39045 }, { "epoch": 2.6532137518684604, "grad_norm": 2.8655858039855957, "learning_rate": 6.68467183041174e-05, "loss": 2.7526, "step": 39050 }, { "epoch": 2.653553471939122, "grad_norm": 3.328139543533325, "learning_rate": 6.684247180323414e-05, "loss": 2.4913, "step": 39055 }, { "epoch": 2.653893192009784, "grad_norm": 3.229722023010254, "learning_rate": 6.683822530235087e-05, "loss": 3.1658, "step": 39060 }, { "epoch": 2.6542329120804458, "grad_norm": 2.545689582824707, "learning_rate": 6.683397880146759e-05, "loss": 2.6104, "step": 39065 }, { "epoch": 2.6545726321511074, "grad_norm": 2.227609157562256, "learning_rate": 6.682973230058433e-05, "loss": 3.1946, "step": 39070 }, { "epoch": 2.6549123522217695, "grad_norm": 3.0608584880828857, "learning_rate": 6.682548579970104e-05, "loss": 2.8416, "step": 39075 }, { "epoch": 2.655252072292431, "grad_norm": 2.69193434715271, "learning_rate": 6.682123929881777e-05, "loss": 2.9532, "step": 39080 }, { "epoch": 2.6555917923630927, "grad_norm": 2.4134609699249268, "learning_rate": 6.681699279793451e-05, "loss": 2.9345, "step": 39085 }, { "epoch": 2.655931512433755, "grad_norm": 2.209101915359497, "learning_rate": 6.681274629705123e-05, "loss": 2.8692, "step": 39090 }, { "epoch": 2.6562712325044164, "grad_norm": 2.7269978523254395, "learning_rate": 6.680849979616795e-05, "loss": 2.8582, "step": 39095 }, { "epoch": 2.656610952575078, "grad_norm": 3.584454298019409, "learning_rate": 6.68042532952847e-05, "loss": 2.7563, "step": 39100 }, { "epoch": 2.65695067264574, "grad_norm": 2.17447566986084, "learning_rate": 6.680000679440141e-05, "loss": 2.7197, "step": 39105 }, { "epoch": 2.6572903927164018, "grad_norm": 3.0304059982299805, "learning_rate": 6.679576029351814e-05, "loss": 2.6487, "step": 39110 }, { "epoch": 2.6576301127870634, "grad_norm": 2.93624210357666, "learning_rate": 6.679151379263488e-05, "loss": 2.7259, "step": 39115 }, { "epoch": 2.6579698328577255, "grad_norm": 2.8288323879241943, "learning_rate": 6.67872672917516e-05, "loss": 3.0265, "step": 39120 }, { "epoch": 2.658309552928387, "grad_norm": 2.0984106063842773, "learning_rate": 6.678302079086832e-05, "loss": 2.5774, "step": 39125 }, { "epoch": 2.6586492729990487, "grad_norm": 2.5715625286102295, "learning_rate": 6.677877428998506e-05, "loss": 2.7671, "step": 39130 }, { "epoch": 2.658988993069711, "grad_norm": 2.809166193008423, "learning_rate": 6.677452778910178e-05, "loss": 2.9648, "step": 39135 }, { "epoch": 2.6593287131403724, "grad_norm": 2.5643320083618164, "learning_rate": 6.67702812882185e-05, "loss": 2.6669, "step": 39140 }, { "epoch": 2.659668433211034, "grad_norm": 2.619176149368286, "learning_rate": 6.676603478733523e-05, "loss": 2.7711, "step": 39145 }, { "epoch": 2.660008153281696, "grad_norm": 2.4763574600219727, "learning_rate": 6.676178828645196e-05, "loss": 2.6226, "step": 39150 }, { "epoch": 2.660347873352358, "grad_norm": 2.1603004932403564, "learning_rate": 6.675754178556869e-05, "loss": 3.2757, "step": 39155 }, { "epoch": 2.6606875934230194, "grad_norm": 2.5221877098083496, "learning_rate": 6.675329528468542e-05, "loss": 2.8004, "step": 39160 }, { "epoch": 2.661027313493681, "grad_norm": 3.8291170597076416, "learning_rate": 6.674904878380215e-05, "loss": 3.0412, "step": 39165 }, { "epoch": 2.661367033564343, "grad_norm": 2.406068801879883, "learning_rate": 6.674480228291889e-05, "loss": 2.7924, "step": 39170 }, { "epoch": 2.6617067536350048, "grad_norm": 2.424229621887207, "learning_rate": 6.67405557820356e-05, "loss": 2.7397, "step": 39175 }, { "epoch": 2.6620464737056664, "grad_norm": 2.517542600631714, "learning_rate": 6.673630928115233e-05, "loss": 2.6514, "step": 39180 }, { "epoch": 2.6623861937763285, "grad_norm": 2.412754774093628, "learning_rate": 6.673206278026907e-05, "loss": 2.5954, "step": 39185 }, { "epoch": 2.66272591384699, "grad_norm": 3.235698938369751, "learning_rate": 6.672781627938579e-05, "loss": 2.7645, "step": 39190 }, { "epoch": 2.6630656339176517, "grad_norm": 2.9976754188537598, "learning_rate": 6.672356977850251e-05, "loss": 2.9047, "step": 39195 }, { "epoch": 2.663405353988314, "grad_norm": 2.902226686477661, "learning_rate": 6.671932327761926e-05, "loss": 3.0287, "step": 39200 }, { "epoch": 2.6637450740589754, "grad_norm": 3.112895965576172, "learning_rate": 6.671507677673597e-05, "loss": 2.7551, "step": 39205 }, { "epoch": 2.664084794129637, "grad_norm": 2.2252063751220703, "learning_rate": 6.67108302758527e-05, "loss": 2.881, "step": 39210 }, { "epoch": 2.6644245142002987, "grad_norm": 2.599398612976074, "learning_rate": 6.670658377496943e-05, "loss": 2.8218, "step": 39215 }, { "epoch": 2.6647642342709608, "grad_norm": 2.496976375579834, "learning_rate": 6.670233727408615e-05, "loss": 2.655, "step": 39220 }, { "epoch": 2.6651039543416224, "grad_norm": 2.2898473739624023, "learning_rate": 6.669809077320288e-05, "loss": 3.1329, "step": 39225 }, { "epoch": 2.665443674412284, "grad_norm": 2.970425844192505, "learning_rate": 6.669384427231961e-05, "loss": 2.8378, "step": 39230 }, { "epoch": 2.665783394482946, "grad_norm": 2.561455011367798, "learning_rate": 6.668959777143634e-05, "loss": 2.7859, "step": 39235 }, { "epoch": 2.6661231145536077, "grad_norm": 2.284447431564331, "learning_rate": 6.668535127055307e-05, "loss": 2.8523, "step": 39240 }, { "epoch": 2.6664628346242694, "grad_norm": 2.0862128734588623, "learning_rate": 6.66811047696698e-05, "loss": 2.8192, "step": 39245 }, { "epoch": 2.6668025546949314, "grad_norm": 2.473999261856079, "learning_rate": 6.667685826878652e-05, "loss": 3.0033, "step": 39250 }, { "epoch": 2.667142274765593, "grad_norm": 2.2699356079101562, "learning_rate": 6.667261176790325e-05, "loss": 2.6669, "step": 39255 }, { "epoch": 2.6674819948362547, "grad_norm": 3.451693534851074, "learning_rate": 6.666836526701998e-05, "loss": 3.0239, "step": 39260 }, { "epoch": 2.6678217149069168, "grad_norm": 2.2982728481292725, "learning_rate": 6.66641187661367e-05, "loss": 2.7047, "step": 39265 }, { "epoch": 2.6681614349775784, "grad_norm": 2.1939680576324463, "learning_rate": 6.665987226525343e-05, "loss": 2.7031, "step": 39270 }, { "epoch": 2.66850115504824, "grad_norm": 2.48677921295166, "learning_rate": 6.665562576437016e-05, "loss": 3.0671, "step": 39275 }, { "epoch": 2.668840875118902, "grad_norm": 3.239086627960205, "learning_rate": 6.665137926348689e-05, "loss": 2.8316, "step": 39280 }, { "epoch": 2.6691805951895637, "grad_norm": 2.5277998447418213, "learning_rate": 6.664713276260362e-05, "loss": 3.105, "step": 39285 }, { "epoch": 2.6695203152602254, "grad_norm": 2.6320407390594482, "learning_rate": 6.664288626172035e-05, "loss": 2.8164, "step": 39290 }, { "epoch": 2.6698600353308874, "grad_norm": 3.0384397506713867, "learning_rate": 6.663863976083707e-05, "loss": 3.1299, "step": 39295 }, { "epoch": 2.670199755401549, "grad_norm": 2.6731858253479004, "learning_rate": 6.66343932599538e-05, "loss": 2.8869, "step": 39300 }, { "epoch": 2.6705394754722107, "grad_norm": 2.5015945434570312, "learning_rate": 6.663014675907053e-05, "loss": 2.8292, "step": 39305 }, { "epoch": 2.670879195542873, "grad_norm": 2.210874319076538, "learning_rate": 6.662590025818726e-05, "loss": 2.8115, "step": 39310 }, { "epoch": 2.6712189156135344, "grad_norm": 2.69162654876709, "learning_rate": 6.662165375730399e-05, "loss": 2.9366, "step": 39315 }, { "epoch": 2.671558635684196, "grad_norm": 3.060002565383911, "learning_rate": 6.661740725642071e-05, "loss": 3.2969, "step": 39320 }, { "epoch": 2.671898355754858, "grad_norm": 2.2809250354766846, "learning_rate": 6.661316075553744e-05, "loss": 2.8266, "step": 39325 }, { "epoch": 2.6722380758255198, "grad_norm": 2.4836490154266357, "learning_rate": 6.660891425465417e-05, "loss": 2.9465, "step": 39330 }, { "epoch": 2.6725777958961814, "grad_norm": 2.271540641784668, "learning_rate": 6.66046677537709e-05, "loss": 2.8534, "step": 39335 }, { "epoch": 2.6729175159668435, "grad_norm": 3.542440414428711, "learning_rate": 6.660042125288763e-05, "loss": 2.9739, "step": 39340 }, { "epoch": 2.673257236037505, "grad_norm": 3.2761428356170654, "learning_rate": 6.659617475200434e-05, "loss": 2.6387, "step": 39345 }, { "epoch": 2.6735969561081667, "grad_norm": 3.138814926147461, "learning_rate": 6.659192825112108e-05, "loss": 2.8155, "step": 39350 }, { "epoch": 2.673936676178829, "grad_norm": 3.0206894874572754, "learning_rate": 6.658768175023781e-05, "loss": 3.1042, "step": 39355 }, { "epoch": 2.6742763962494904, "grad_norm": 2.2979846000671387, "learning_rate": 6.658343524935453e-05, "loss": 2.6831, "step": 39360 }, { "epoch": 2.674616116320152, "grad_norm": 2.446089029312134, "learning_rate": 6.657918874847127e-05, "loss": 2.7729, "step": 39365 }, { "epoch": 2.674955836390814, "grad_norm": 2.5559582710266113, "learning_rate": 6.6574942247588e-05, "loss": 2.7888, "step": 39370 }, { "epoch": 2.6752955564614758, "grad_norm": 2.218928575515747, "learning_rate": 6.657069574670471e-05, "loss": 2.9845, "step": 39375 }, { "epoch": 2.6756352765321374, "grad_norm": 2.2955286502838135, "learning_rate": 6.656644924582145e-05, "loss": 2.8426, "step": 39380 }, { "epoch": 2.6759749966027995, "grad_norm": 2.4526240825653076, "learning_rate": 6.656220274493818e-05, "loss": 2.6929, "step": 39385 }, { "epoch": 2.676314716673461, "grad_norm": 2.59660005569458, "learning_rate": 6.65579562440549e-05, "loss": 2.9412, "step": 39390 }, { "epoch": 2.6766544367441227, "grad_norm": 2.783498525619507, "learning_rate": 6.655370974317163e-05, "loss": 3.0735, "step": 39395 }, { "epoch": 2.676994156814785, "grad_norm": 2.51041841506958, "learning_rate": 6.654946324228836e-05, "loss": 2.9859, "step": 39400 }, { "epoch": 2.6773338768854464, "grad_norm": 2.2317023277282715, "learning_rate": 6.654521674140508e-05, "loss": 2.7273, "step": 39405 }, { "epoch": 2.677673596956108, "grad_norm": 2.558079719543457, "learning_rate": 6.654097024052182e-05, "loss": 2.6302, "step": 39410 }, { "epoch": 2.67801331702677, "grad_norm": 1.9369330406188965, "learning_rate": 6.653672373963853e-05, "loss": 2.8751, "step": 39415 }, { "epoch": 2.6783530370974318, "grad_norm": 2.861281394958496, "learning_rate": 6.653247723875526e-05, "loss": 3.0652, "step": 39420 }, { "epoch": 2.6786927571680934, "grad_norm": 2.977268934249878, "learning_rate": 6.6528230737872e-05, "loss": 2.9673, "step": 39425 }, { "epoch": 2.6790324772387555, "grad_norm": 2.4439337253570557, "learning_rate": 6.652398423698872e-05, "loss": 2.9563, "step": 39430 }, { "epoch": 2.679372197309417, "grad_norm": 2.7307047843933105, "learning_rate": 6.651973773610545e-05, "loss": 2.9169, "step": 39435 }, { "epoch": 2.6797119173800787, "grad_norm": 2.0581142902374268, "learning_rate": 6.651549123522219e-05, "loss": 2.8391, "step": 39440 }, { "epoch": 2.680051637450741, "grad_norm": 2.3664426803588867, "learning_rate": 6.65112447343389e-05, "loss": 2.9775, "step": 39445 }, { "epoch": 2.6803913575214025, "grad_norm": 2.304893970489502, "learning_rate": 6.650699823345563e-05, "loss": 3.0994, "step": 39450 }, { "epoch": 2.680731077592064, "grad_norm": 2.7240869998931885, "learning_rate": 6.650275173257237e-05, "loss": 2.9215, "step": 39455 }, { "epoch": 2.681070797662726, "grad_norm": 2.5582196712493896, "learning_rate": 6.649850523168909e-05, "loss": 2.9343, "step": 39460 }, { "epoch": 2.681410517733388, "grad_norm": 2.5808770656585693, "learning_rate": 6.649425873080581e-05, "loss": 2.7977, "step": 39465 }, { "epoch": 2.6817502378040494, "grad_norm": 2.185234785079956, "learning_rate": 6.649001222992255e-05, "loss": 2.9682, "step": 39470 }, { "epoch": 2.6820899578747115, "grad_norm": 2.978548526763916, "learning_rate": 6.648576572903927e-05, "loss": 2.8133, "step": 39475 }, { "epoch": 2.682429677945373, "grad_norm": 2.207078218460083, "learning_rate": 6.6481519228156e-05, "loss": 2.7571, "step": 39480 }, { "epoch": 2.6827693980160348, "grad_norm": 2.788602590560913, "learning_rate": 6.647727272727274e-05, "loss": 2.8684, "step": 39485 }, { "epoch": 2.683109118086697, "grad_norm": 2.6338865756988525, "learning_rate": 6.647302622638945e-05, "loss": 3.1187, "step": 39490 }, { "epoch": 2.6834488381573585, "grad_norm": 2.6035475730895996, "learning_rate": 6.646877972550618e-05, "loss": 2.8225, "step": 39495 }, { "epoch": 2.68378855822802, "grad_norm": 2.2625927925109863, "learning_rate": 6.646453322462291e-05, "loss": 2.841, "step": 39500 }, { "epoch": 2.6841282782986817, "grad_norm": 2.349428176879883, "learning_rate": 6.646028672373964e-05, "loss": 2.6801, "step": 39505 }, { "epoch": 2.684467998369344, "grad_norm": 2.0479040145874023, "learning_rate": 6.645604022285638e-05, "loss": 2.8401, "step": 39510 }, { "epoch": 2.6848077184400054, "grad_norm": 2.7826051712036133, "learning_rate": 6.64517937219731e-05, "loss": 2.7258, "step": 39515 }, { "epoch": 2.685147438510667, "grad_norm": 2.9135968685150146, "learning_rate": 6.644754722108982e-05, "loss": 2.6078, "step": 39520 }, { "epoch": 2.685487158581329, "grad_norm": 2.884554624557495, "learning_rate": 6.644330072020656e-05, "loss": 2.7347, "step": 39525 }, { "epoch": 2.6858268786519908, "grad_norm": 2.893587827682495, "learning_rate": 6.643905421932328e-05, "loss": 2.9134, "step": 39530 }, { "epoch": 2.6861665987226524, "grad_norm": 2.359928607940674, "learning_rate": 6.643480771844e-05, "loss": 3.1526, "step": 39535 }, { "epoch": 2.6865063187933145, "grad_norm": 2.4569144248962402, "learning_rate": 6.643056121755675e-05, "loss": 2.8642, "step": 39540 }, { "epoch": 2.686846038863976, "grad_norm": 3.6395065784454346, "learning_rate": 6.642631471667346e-05, "loss": 2.6914, "step": 39545 }, { "epoch": 2.6871857589346377, "grad_norm": 3.608389139175415, "learning_rate": 6.642206821579019e-05, "loss": 2.9302, "step": 39550 }, { "epoch": 2.6875254790052994, "grad_norm": 3.795585870742798, "learning_rate": 6.641782171490693e-05, "loss": 2.9411, "step": 39555 }, { "epoch": 2.6878651990759614, "grad_norm": 2.422041893005371, "learning_rate": 6.641357521402365e-05, "loss": 2.992, "step": 39560 }, { "epoch": 2.688204919146623, "grad_norm": 2.368557929992676, "learning_rate": 6.640932871314037e-05, "loss": 2.976, "step": 39565 }, { "epoch": 2.6885446392172847, "grad_norm": 2.9903745651245117, "learning_rate": 6.64050822122571e-05, "loss": 2.8574, "step": 39570 }, { "epoch": 2.688884359287947, "grad_norm": 2.4499189853668213, "learning_rate": 6.640083571137383e-05, "loss": 2.8417, "step": 39575 }, { "epoch": 2.6892240793586084, "grad_norm": 2.675269603729248, "learning_rate": 6.639658921049056e-05, "loss": 2.8337, "step": 39580 }, { "epoch": 2.68956379942927, "grad_norm": 2.9201700687408447, "learning_rate": 6.639234270960729e-05, "loss": 3.2209, "step": 39585 }, { "epoch": 2.689903519499932, "grad_norm": 2.7216968536376953, "learning_rate": 6.638809620872401e-05, "loss": 2.8951, "step": 39590 }, { "epoch": 2.6902432395705937, "grad_norm": 2.5420145988464355, "learning_rate": 6.638384970784074e-05, "loss": 3.0105, "step": 39595 }, { "epoch": 2.6905829596412554, "grad_norm": 2.570909261703491, "learning_rate": 6.637960320695747e-05, "loss": 2.8155, "step": 39600 }, { "epoch": 2.6909226797119175, "grad_norm": 2.411818265914917, "learning_rate": 6.63753567060742e-05, "loss": 2.8956, "step": 39605 }, { "epoch": 2.691262399782579, "grad_norm": 2.6803107261657715, "learning_rate": 6.637111020519093e-05, "loss": 2.9188, "step": 39610 }, { "epoch": 2.6916021198532407, "grad_norm": 2.3280529975891113, "learning_rate": 6.636686370430765e-05, "loss": 3.0527, "step": 39615 }, { "epoch": 2.691941839923903, "grad_norm": 2.305952548980713, "learning_rate": 6.636261720342438e-05, "loss": 3.0115, "step": 39620 }, { "epoch": 2.6922815599945644, "grad_norm": 3.250913619995117, "learning_rate": 6.635837070254111e-05, "loss": 2.8584, "step": 39625 }, { "epoch": 2.692621280065226, "grad_norm": 2.571157217025757, "learning_rate": 6.635412420165784e-05, "loss": 2.8758, "step": 39630 }, { "epoch": 2.692961000135888, "grad_norm": 2.5761568546295166, "learning_rate": 6.634987770077457e-05, "loss": 2.8399, "step": 39635 }, { "epoch": 2.6933007202065498, "grad_norm": 2.698218584060669, "learning_rate": 6.63456311998913e-05, "loss": 2.8117, "step": 39640 }, { "epoch": 2.6936404402772114, "grad_norm": 2.5568418502807617, "learning_rate": 6.634138469900802e-05, "loss": 2.6298, "step": 39645 }, { "epoch": 2.6939801603478735, "grad_norm": 2.5328919887542725, "learning_rate": 6.633713819812475e-05, "loss": 2.9664, "step": 39650 }, { "epoch": 2.694319880418535, "grad_norm": 2.5795412063598633, "learning_rate": 6.633289169724148e-05, "loss": 2.7646, "step": 39655 }, { "epoch": 2.6946596004891967, "grad_norm": 3.00156831741333, "learning_rate": 6.63286451963582e-05, "loss": 2.8727, "step": 39660 }, { "epoch": 2.694999320559859, "grad_norm": 2.1106953620910645, "learning_rate": 6.632439869547493e-05, "loss": 3.0242, "step": 39665 }, { "epoch": 2.6953390406305204, "grad_norm": 2.2487103939056396, "learning_rate": 6.632015219459166e-05, "loss": 2.8543, "step": 39670 }, { "epoch": 2.695678760701182, "grad_norm": 3.315570592880249, "learning_rate": 6.631590569370839e-05, "loss": 2.7978, "step": 39675 }, { "epoch": 2.696018480771844, "grad_norm": 2.309931516647339, "learning_rate": 6.631165919282512e-05, "loss": 2.6682, "step": 39680 }, { "epoch": 2.6963582008425058, "grad_norm": 2.6990764141082764, "learning_rate": 6.630741269194185e-05, "loss": 2.7619, "step": 39685 }, { "epoch": 2.6966979209131674, "grad_norm": 2.341813087463379, "learning_rate": 6.630316619105857e-05, "loss": 2.8763, "step": 39690 }, { "epoch": 2.6970376409838295, "grad_norm": 2.1638786792755127, "learning_rate": 6.62989196901753e-05, "loss": 3.0116, "step": 39695 }, { "epoch": 2.697377361054491, "grad_norm": 2.4895527362823486, "learning_rate": 6.629467318929202e-05, "loss": 2.9299, "step": 39700 }, { "epoch": 2.6977170811251527, "grad_norm": 2.412722110748291, "learning_rate": 6.629042668840876e-05, "loss": 2.7742, "step": 39705 }, { "epoch": 2.698056801195815, "grad_norm": 2.5143167972564697, "learning_rate": 6.628618018752549e-05, "loss": 2.8952, "step": 39710 }, { "epoch": 2.6983965212664764, "grad_norm": 3.1772470474243164, "learning_rate": 6.62819336866422e-05, "loss": 2.977, "step": 39715 }, { "epoch": 2.698736241337138, "grad_norm": 2.9763741493225098, "learning_rate": 6.627768718575894e-05, "loss": 3.114, "step": 39720 }, { "epoch": 2.6990759614078, "grad_norm": 2.0188193321228027, "learning_rate": 6.627344068487567e-05, "loss": 2.8653, "step": 39725 }, { "epoch": 2.699415681478462, "grad_norm": 2.0958075523376465, "learning_rate": 6.626919418399238e-05, "loss": 2.6714, "step": 39730 }, { "epoch": 2.6997554015491234, "grad_norm": 3.4125635623931885, "learning_rate": 6.626494768310913e-05, "loss": 2.9452, "step": 39735 }, { "epoch": 2.7000951216197855, "grad_norm": 2.4062206745147705, "learning_rate": 6.626070118222585e-05, "loss": 2.6402, "step": 39740 }, { "epoch": 2.700434841690447, "grad_norm": 2.3939006328582764, "learning_rate": 6.625645468134257e-05, "loss": 2.8495, "step": 39745 }, { "epoch": 2.7007745617611087, "grad_norm": 2.9921669960021973, "learning_rate": 6.625220818045931e-05, "loss": 2.946, "step": 39750 }, { "epoch": 2.701114281831771, "grad_norm": 2.313920736312866, "learning_rate": 6.624796167957604e-05, "loss": 2.8604, "step": 39755 }, { "epoch": 2.7014540019024325, "grad_norm": 3.000562906265259, "learning_rate": 6.624371517869275e-05, "loss": 2.9496, "step": 39760 }, { "epoch": 2.701793721973094, "grad_norm": 2.4430792331695557, "learning_rate": 6.62394686778095e-05, "loss": 3.0203, "step": 39765 }, { "epoch": 2.702133442043756, "grad_norm": 2.5668089389801025, "learning_rate": 6.623522217692621e-05, "loss": 2.797, "step": 39770 }, { "epoch": 2.702473162114418, "grad_norm": 2.4180548191070557, "learning_rate": 6.623097567604294e-05, "loss": 2.6999, "step": 39775 }, { "epoch": 2.7028128821850794, "grad_norm": 3.1647932529449463, "learning_rate": 6.622672917515968e-05, "loss": 2.7253, "step": 39780 }, { "epoch": 2.7031526022557415, "grad_norm": 2.6288881301879883, "learning_rate": 6.622248267427639e-05, "loss": 2.8432, "step": 39785 }, { "epoch": 2.703492322326403, "grad_norm": 2.8653409481048584, "learning_rate": 6.621823617339312e-05, "loss": 3.1213, "step": 39790 }, { "epoch": 2.7038320423970648, "grad_norm": 3.723019599914551, "learning_rate": 6.621398967250986e-05, "loss": 2.8473, "step": 39795 }, { "epoch": 2.704171762467727, "grad_norm": 3.4440574645996094, "learning_rate": 6.620974317162658e-05, "loss": 2.7812, "step": 39800 }, { "epoch": 2.7045114825383885, "grad_norm": 2.91428279876709, "learning_rate": 6.62054966707433e-05, "loss": 2.7485, "step": 39805 }, { "epoch": 2.70485120260905, "grad_norm": 2.5563442707061768, "learning_rate": 6.620125016986005e-05, "loss": 2.9591, "step": 39810 }, { "epoch": 2.705190922679712, "grad_norm": 2.8113036155700684, "learning_rate": 6.619700366897676e-05, "loss": 2.712, "step": 39815 }, { "epoch": 2.705530642750374, "grad_norm": 2.692422866821289, "learning_rate": 6.619275716809349e-05, "loss": 2.9117, "step": 39820 }, { "epoch": 2.7058703628210354, "grad_norm": 2.3368916511535645, "learning_rate": 6.618851066721023e-05, "loss": 2.5788, "step": 39825 }, { "epoch": 2.7062100828916975, "grad_norm": 2.236020088195801, "learning_rate": 6.618426416632694e-05, "loss": 2.7513, "step": 39830 }, { "epoch": 2.706549802962359, "grad_norm": 2.5192513465881348, "learning_rate": 6.618001766544367e-05, "loss": 2.94, "step": 39835 }, { "epoch": 2.7068895230330208, "grad_norm": 2.414891242980957, "learning_rate": 6.617577116456041e-05, "loss": 2.7276, "step": 39840 }, { "epoch": 2.7072292431036824, "grad_norm": 2.236032009124756, "learning_rate": 6.617152466367713e-05, "loss": 2.7258, "step": 39845 }, { "epoch": 2.7075689631743445, "grad_norm": 2.2014713287353516, "learning_rate": 6.616727816279387e-05, "loss": 2.8846, "step": 39850 }, { "epoch": 2.707908683245006, "grad_norm": 2.6398515701293945, "learning_rate": 6.616303166191058e-05, "loss": 2.7526, "step": 39855 }, { "epoch": 2.7082484033156677, "grad_norm": 3.089102029800415, "learning_rate": 6.615878516102731e-05, "loss": 2.8342, "step": 39860 }, { "epoch": 2.70858812338633, "grad_norm": 2.5100319385528564, "learning_rate": 6.615453866014405e-05, "loss": 2.9273, "step": 39865 }, { "epoch": 2.7089278434569914, "grad_norm": 3.251819133758545, "learning_rate": 6.615029215926077e-05, "loss": 2.9454, "step": 39870 }, { "epoch": 2.709267563527653, "grad_norm": 2.426191568374634, "learning_rate": 6.61460456583775e-05, "loss": 2.7936, "step": 39875 }, { "epoch": 2.709607283598315, "grad_norm": 4.115034580230713, "learning_rate": 6.614179915749424e-05, "loss": 2.933, "step": 39880 }, { "epoch": 2.709947003668977, "grad_norm": 2.2522740364074707, "learning_rate": 6.613755265661095e-05, "loss": 2.8951, "step": 39885 }, { "epoch": 2.7102867237396384, "grad_norm": 2.2103471755981445, "learning_rate": 6.613330615572768e-05, "loss": 2.9958, "step": 39890 }, { "epoch": 2.7106264438103, "grad_norm": 2.4762537479400635, "learning_rate": 6.612905965484442e-05, "loss": 2.8667, "step": 39895 }, { "epoch": 2.710966163880962, "grad_norm": 2.4176042079925537, "learning_rate": 6.612481315396114e-05, "loss": 2.688, "step": 39900 }, { "epoch": 2.7113058839516238, "grad_norm": 3.564321279525757, "learning_rate": 6.612056665307786e-05, "loss": 2.8902, "step": 39905 }, { "epoch": 2.7116456040222854, "grad_norm": 2.7414708137512207, "learning_rate": 6.61163201521946e-05, "loss": 2.736, "step": 39910 }, { "epoch": 2.7119853240929475, "grad_norm": 2.766125202178955, "learning_rate": 6.611207365131132e-05, "loss": 2.9626, "step": 39915 }, { "epoch": 2.712325044163609, "grad_norm": 2.703932285308838, "learning_rate": 6.610782715042805e-05, "loss": 3.0755, "step": 39920 }, { "epoch": 2.7126647642342707, "grad_norm": 2.103227376937866, "learning_rate": 6.610358064954478e-05, "loss": 3.0405, "step": 39925 }, { "epoch": 2.713004484304933, "grad_norm": 2.672485113143921, "learning_rate": 6.60993341486615e-05, "loss": 3.0913, "step": 39930 }, { "epoch": 2.7133442043755944, "grad_norm": 2.924328327178955, "learning_rate": 6.609508764777823e-05, "loss": 2.9658, "step": 39935 }, { "epoch": 2.713683924446256, "grad_norm": 2.285172700881958, "learning_rate": 6.609084114689496e-05, "loss": 2.8487, "step": 39940 }, { "epoch": 2.714023644516918, "grad_norm": 2.668564796447754, "learning_rate": 6.608659464601169e-05, "loss": 2.6633, "step": 39945 }, { "epoch": 2.7143633645875798, "grad_norm": 3.216981887817383, "learning_rate": 6.608234814512842e-05, "loss": 2.7423, "step": 39950 }, { "epoch": 2.7147030846582414, "grad_norm": 2.186006784439087, "learning_rate": 6.607810164424514e-05, "loss": 2.8648, "step": 39955 }, { "epoch": 2.7150428047289035, "grad_norm": 2.176941394805908, "learning_rate": 6.607385514336187e-05, "loss": 3.0831, "step": 39960 }, { "epoch": 2.715382524799565, "grad_norm": 2.4363954067230225, "learning_rate": 6.60696086424786e-05, "loss": 2.8328, "step": 39965 }, { "epoch": 2.7157222448702267, "grad_norm": 2.3128790855407715, "learning_rate": 6.606536214159533e-05, "loss": 2.6963, "step": 39970 }, { "epoch": 2.716061964940889, "grad_norm": 2.889275074005127, "learning_rate": 6.606111564071206e-05, "loss": 3.1042, "step": 39975 }, { "epoch": 2.7164016850115504, "grad_norm": 2.649754047393799, "learning_rate": 6.605686913982878e-05, "loss": 2.7229, "step": 39980 }, { "epoch": 2.716741405082212, "grad_norm": 2.6923274993896484, "learning_rate": 6.605262263894551e-05, "loss": 2.7352, "step": 39985 }, { "epoch": 2.717081125152874, "grad_norm": 2.1978163719177246, "learning_rate": 6.604837613806224e-05, "loss": 2.9197, "step": 39990 }, { "epoch": 2.7174208452235358, "grad_norm": 2.3174662590026855, "learning_rate": 6.604412963717897e-05, "loss": 2.9932, "step": 39995 }, { "epoch": 2.7177605652941974, "grad_norm": 2.1532764434814453, "learning_rate": 6.60398831362957e-05, "loss": 3.0214, "step": 40000 }, { "epoch": 2.7181002853648595, "grad_norm": 2.75075364112854, "learning_rate": 6.603563663541242e-05, "loss": 2.9421, "step": 40005 }, { "epoch": 2.718440005435521, "grad_norm": 2.5508768558502197, "learning_rate": 6.603139013452915e-05, "loss": 2.662, "step": 40010 }, { "epoch": 2.7187797255061827, "grad_norm": 2.2343692779541016, "learning_rate": 6.602714363364588e-05, "loss": 2.8812, "step": 40015 }, { "epoch": 2.719119445576845, "grad_norm": 2.839855432510376, "learning_rate": 6.602289713276261e-05, "loss": 2.912, "step": 40020 }, { "epoch": 2.7194591656475064, "grad_norm": 2.274542808532715, "learning_rate": 6.601865063187934e-05, "loss": 3.1489, "step": 40025 }, { "epoch": 2.719798885718168, "grad_norm": 2.825540542602539, "learning_rate": 6.601440413099606e-05, "loss": 2.7486, "step": 40030 }, { "epoch": 2.72013860578883, "grad_norm": 2.2289435863494873, "learning_rate": 6.601015763011279e-05, "loss": 2.6439, "step": 40035 }, { "epoch": 2.720478325859492, "grad_norm": 3.194033145904541, "learning_rate": 6.600591112922952e-05, "loss": 2.8224, "step": 40040 }, { "epoch": 2.7208180459301534, "grad_norm": 2.270961284637451, "learning_rate": 6.600166462834625e-05, "loss": 2.7678, "step": 40045 }, { "epoch": 2.7211577660008155, "grad_norm": 2.3954927921295166, "learning_rate": 6.599741812746298e-05, "loss": 2.8226, "step": 40050 }, { "epoch": 2.721497486071477, "grad_norm": 2.8671417236328125, "learning_rate": 6.599317162657969e-05, "loss": 2.9074, "step": 40055 }, { "epoch": 2.7218372061421388, "grad_norm": 3.6670186519622803, "learning_rate": 6.598892512569643e-05, "loss": 2.7583, "step": 40060 }, { "epoch": 2.722176926212801, "grad_norm": 2.590505599975586, "learning_rate": 6.598467862481316e-05, "loss": 2.9169, "step": 40065 }, { "epoch": 2.7225166462834625, "grad_norm": 2.4467620849609375, "learning_rate": 6.598043212392988e-05, "loss": 2.9517, "step": 40070 }, { "epoch": 2.722856366354124, "grad_norm": 2.9187862873077393, "learning_rate": 6.597618562304662e-05, "loss": 2.9878, "step": 40075 }, { "epoch": 2.723196086424786, "grad_norm": 3.2807698249816895, "learning_rate": 6.597193912216334e-05, "loss": 2.8541, "step": 40080 }, { "epoch": 2.723535806495448, "grad_norm": 2.470397472381592, "learning_rate": 6.596769262128006e-05, "loss": 3.0779, "step": 40085 }, { "epoch": 2.7238755265661094, "grad_norm": 3.051114082336426, "learning_rate": 6.59634461203968e-05, "loss": 2.8814, "step": 40090 }, { "epoch": 2.7242152466367715, "grad_norm": 1.9850605726242065, "learning_rate": 6.595919961951353e-05, "loss": 2.9229, "step": 40095 }, { "epoch": 2.724554966707433, "grad_norm": 2.9119999408721924, "learning_rate": 6.595495311863024e-05, "loss": 2.8862, "step": 40100 }, { "epoch": 2.7248946867780948, "grad_norm": 2.657938003540039, "learning_rate": 6.595070661774698e-05, "loss": 2.8614, "step": 40105 }, { "epoch": 2.725234406848757, "grad_norm": 3.470484972000122, "learning_rate": 6.594646011686371e-05, "loss": 2.9865, "step": 40110 }, { "epoch": 2.7255741269194185, "grad_norm": 2.882481813430786, "learning_rate": 6.594221361598043e-05, "loss": 2.9882, "step": 40115 }, { "epoch": 2.72591384699008, "grad_norm": 2.7816174030303955, "learning_rate": 6.593796711509717e-05, "loss": 2.7509, "step": 40120 }, { "epoch": 2.726253567060742, "grad_norm": 2.6384620666503906, "learning_rate": 6.593372061421388e-05, "loss": 2.7616, "step": 40125 }, { "epoch": 2.726593287131404, "grad_norm": 2.533102512359619, "learning_rate": 6.592947411333061e-05, "loss": 3.0263, "step": 40130 }, { "epoch": 2.7269330072020654, "grad_norm": 2.7377500534057617, "learning_rate": 6.592522761244735e-05, "loss": 2.6826, "step": 40135 }, { "epoch": 2.7272727272727275, "grad_norm": 2.872434377670288, "learning_rate": 6.592098111156407e-05, "loss": 2.8364, "step": 40140 }, { "epoch": 2.727612447343389, "grad_norm": 2.604964017868042, "learning_rate": 6.59167346106808e-05, "loss": 2.7574, "step": 40145 }, { "epoch": 2.7279521674140508, "grad_norm": 3.300954580307007, "learning_rate": 6.591248810979754e-05, "loss": 2.9141, "step": 40150 }, { "epoch": 2.728291887484713, "grad_norm": 2.0070114135742188, "learning_rate": 6.590824160891425e-05, "loss": 2.9944, "step": 40155 }, { "epoch": 2.7286316075553745, "grad_norm": 2.429285764694214, "learning_rate": 6.590399510803098e-05, "loss": 2.8111, "step": 40160 }, { "epoch": 2.728971327626036, "grad_norm": 2.912529468536377, "learning_rate": 6.589974860714772e-05, "loss": 2.9444, "step": 40165 }, { "epoch": 2.729311047696698, "grad_norm": 3.246731758117676, "learning_rate": 6.589550210626444e-05, "loss": 2.8829, "step": 40170 }, { "epoch": 2.72965076776736, "grad_norm": 2.4997425079345703, "learning_rate": 6.589125560538116e-05, "loss": 3.006, "step": 40175 }, { "epoch": 2.7299904878380215, "grad_norm": 3.466829299926758, "learning_rate": 6.58870091044979e-05, "loss": 3.0045, "step": 40180 }, { "epoch": 2.730330207908683, "grad_norm": 2.1152756214141846, "learning_rate": 6.588276260361462e-05, "loss": 2.8668, "step": 40185 }, { "epoch": 2.730669927979345, "grad_norm": 2.4130821228027344, "learning_rate": 6.587851610273136e-05, "loss": 3.0264, "step": 40190 }, { "epoch": 2.731009648050007, "grad_norm": 2.218432664871216, "learning_rate": 6.587426960184808e-05, "loss": 3.0034, "step": 40195 }, { "epoch": 2.7313493681206684, "grad_norm": 2.4641647338867188, "learning_rate": 6.58700231009648e-05, "loss": 3.0558, "step": 40200 }, { "epoch": 2.7316890881913305, "grad_norm": 2.7903048992156982, "learning_rate": 6.586577660008155e-05, "loss": 2.8087, "step": 40205 }, { "epoch": 2.732028808261992, "grad_norm": 2.3673899173736572, "learning_rate": 6.586153009919826e-05, "loss": 2.9955, "step": 40210 }, { "epoch": 2.7323685283326538, "grad_norm": 2.8146414756774902, "learning_rate": 6.585728359831499e-05, "loss": 2.9555, "step": 40215 }, { "epoch": 2.732708248403316, "grad_norm": 2.950516700744629, "learning_rate": 6.585303709743173e-05, "loss": 2.8338, "step": 40220 }, { "epoch": 2.7330479684739775, "grad_norm": 2.440747022628784, "learning_rate": 6.584879059654844e-05, "loss": 2.795, "step": 40225 }, { "epoch": 2.733387688544639, "grad_norm": 3.1097049713134766, "learning_rate": 6.584454409566517e-05, "loss": 2.7434, "step": 40230 }, { "epoch": 2.7337274086153007, "grad_norm": 2.939255952835083, "learning_rate": 6.584029759478191e-05, "loss": 2.8983, "step": 40235 }, { "epoch": 2.734067128685963, "grad_norm": 2.8646624088287354, "learning_rate": 6.583605109389863e-05, "loss": 2.9732, "step": 40240 }, { "epoch": 2.7344068487566244, "grad_norm": 2.5609359741210938, "learning_rate": 6.583180459301536e-05, "loss": 2.7577, "step": 40245 }, { "epoch": 2.734746568827286, "grad_norm": 2.952439308166504, "learning_rate": 6.58275580921321e-05, "loss": 2.9922, "step": 40250 }, { "epoch": 2.735086288897948, "grad_norm": 2.4541308879852295, "learning_rate": 6.582331159124881e-05, "loss": 3.0283, "step": 40255 }, { "epoch": 2.7354260089686098, "grad_norm": 2.835606336593628, "learning_rate": 6.581906509036554e-05, "loss": 2.8731, "step": 40260 }, { "epoch": 2.7357657290392714, "grad_norm": 2.631284475326538, "learning_rate": 6.581481858948228e-05, "loss": 2.8663, "step": 40265 }, { "epoch": 2.7361054491099335, "grad_norm": 2.3714487552642822, "learning_rate": 6.5810572088599e-05, "loss": 2.9099, "step": 40270 }, { "epoch": 2.736445169180595, "grad_norm": 2.3893535137176514, "learning_rate": 6.580632558771572e-05, "loss": 2.6998, "step": 40275 }, { "epoch": 2.7367848892512567, "grad_norm": 3.2341208457946777, "learning_rate": 6.580207908683245e-05, "loss": 2.9449, "step": 40280 }, { "epoch": 2.737124609321919, "grad_norm": 2.497009515762329, "learning_rate": 6.579783258594918e-05, "loss": 2.7983, "step": 40285 }, { "epoch": 2.7374643293925804, "grad_norm": 2.3509414196014404, "learning_rate": 6.579358608506591e-05, "loss": 3.0025, "step": 40290 }, { "epoch": 2.737804049463242, "grad_norm": 1.977014183998108, "learning_rate": 6.578933958418264e-05, "loss": 2.761, "step": 40295 }, { "epoch": 2.738143769533904, "grad_norm": 2.6861908435821533, "learning_rate": 6.578509308329936e-05, "loss": 2.7909, "step": 40300 }, { "epoch": 2.738483489604566, "grad_norm": 3.4079887866973877, "learning_rate": 6.578084658241609e-05, "loss": 2.7381, "step": 40305 }, { "epoch": 2.7388232096752274, "grad_norm": 2.469932794570923, "learning_rate": 6.577660008153282e-05, "loss": 3.0945, "step": 40310 }, { "epoch": 2.7391629297458895, "grad_norm": 2.783158779144287, "learning_rate": 6.577235358064955e-05, "loss": 3.0394, "step": 40315 }, { "epoch": 2.739502649816551, "grad_norm": 2.569690465927124, "learning_rate": 6.576810707976628e-05, "loss": 2.6023, "step": 40320 }, { "epoch": 2.7398423698872127, "grad_norm": 2.3013992309570312, "learning_rate": 6.5763860578883e-05, "loss": 2.8592, "step": 40325 }, { "epoch": 2.740182089957875, "grad_norm": 2.577995777130127, "learning_rate": 6.575961407799973e-05, "loss": 2.9581, "step": 40330 }, { "epoch": 2.7405218100285365, "grad_norm": 2.329434871673584, "learning_rate": 6.575536757711646e-05, "loss": 2.9095, "step": 40335 }, { "epoch": 2.740861530099198, "grad_norm": 2.584672689437866, "learning_rate": 6.575112107623319e-05, "loss": 2.8243, "step": 40340 }, { "epoch": 2.74120125016986, "grad_norm": 3.077834367752075, "learning_rate": 6.574687457534992e-05, "loss": 3.0151, "step": 40345 }, { "epoch": 2.741540970240522, "grad_norm": 2.684251546859741, "learning_rate": 6.574262807446664e-05, "loss": 2.448, "step": 40350 }, { "epoch": 2.7418806903111834, "grad_norm": 2.2603726387023926, "learning_rate": 6.573838157358337e-05, "loss": 2.7965, "step": 40355 }, { "epoch": 2.7422204103818455, "grad_norm": 2.4126157760620117, "learning_rate": 6.57341350727001e-05, "loss": 3.0213, "step": 40360 }, { "epoch": 2.742560130452507, "grad_norm": 2.9223880767822266, "learning_rate": 6.572988857181683e-05, "loss": 2.9649, "step": 40365 }, { "epoch": 2.7428998505231688, "grad_norm": 2.3649656772613525, "learning_rate": 6.572564207093356e-05, "loss": 3.0905, "step": 40370 }, { "epoch": 2.743239570593831, "grad_norm": 2.783773422241211, "learning_rate": 6.572139557005028e-05, "loss": 2.8659, "step": 40375 }, { "epoch": 2.7435792906644925, "grad_norm": 2.5564093589782715, "learning_rate": 6.571714906916701e-05, "loss": 3.0647, "step": 40380 }, { "epoch": 2.743919010735154, "grad_norm": 2.9985511302948, "learning_rate": 6.571290256828374e-05, "loss": 2.8643, "step": 40385 }, { "epoch": 2.744258730805816, "grad_norm": 2.3980064392089844, "learning_rate": 6.570865606740047e-05, "loss": 3.1008, "step": 40390 }, { "epoch": 2.744598450876478, "grad_norm": 2.5930960178375244, "learning_rate": 6.570440956651718e-05, "loss": 2.8577, "step": 40395 }, { "epoch": 2.7449381709471394, "grad_norm": 2.5189201831817627, "learning_rate": 6.570016306563392e-05, "loss": 2.6611, "step": 40400 }, { "epoch": 2.7452778910178015, "grad_norm": 2.53883695602417, "learning_rate": 6.569591656475065e-05, "loss": 2.8445, "step": 40405 }, { "epoch": 2.745617611088463, "grad_norm": 2.3216991424560547, "learning_rate": 6.569167006386737e-05, "loss": 2.779, "step": 40410 }, { "epoch": 2.7459573311591248, "grad_norm": 2.4621541500091553, "learning_rate": 6.568742356298411e-05, "loss": 2.8686, "step": 40415 }, { "epoch": 2.746297051229787, "grad_norm": 2.007140636444092, "learning_rate": 6.568317706210084e-05, "loss": 2.9982, "step": 40420 }, { "epoch": 2.7466367713004485, "grad_norm": 2.6398890018463135, "learning_rate": 6.567893056121755e-05, "loss": 2.9535, "step": 40425 }, { "epoch": 2.74697649137111, "grad_norm": 2.4992523193359375, "learning_rate": 6.567468406033429e-05, "loss": 2.6168, "step": 40430 }, { "epoch": 2.747316211441772, "grad_norm": 3.618821144104004, "learning_rate": 6.567043755945102e-05, "loss": 2.9917, "step": 40435 }, { "epoch": 2.747655931512434, "grad_norm": 3.0088586807250977, "learning_rate": 6.566619105856773e-05, "loss": 2.9979, "step": 40440 }, { "epoch": 2.7479956515830954, "grad_norm": 2.879828691482544, "learning_rate": 6.566194455768448e-05, "loss": 2.8093, "step": 40445 }, { "epoch": 2.7483353716537575, "grad_norm": 3.0831661224365234, "learning_rate": 6.56576980568012e-05, "loss": 3.0196, "step": 40450 }, { "epoch": 2.748675091724419, "grad_norm": 2.3895788192749023, "learning_rate": 6.565345155591792e-05, "loss": 2.9457, "step": 40455 }, { "epoch": 2.749014811795081, "grad_norm": 3.199672222137451, "learning_rate": 6.564920505503466e-05, "loss": 3.052, "step": 40460 }, { "epoch": 2.749354531865743, "grad_norm": 2.934739351272583, "learning_rate": 6.564495855415139e-05, "loss": 2.7604, "step": 40465 }, { "epoch": 2.7496942519364045, "grad_norm": 3.3200016021728516, "learning_rate": 6.56407120532681e-05, "loss": 2.4752, "step": 40470 }, { "epoch": 2.750033972007066, "grad_norm": 2.2652482986450195, "learning_rate": 6.563646555238484e-05, "loss": 2.9485, "step": 40475 }, { "epoch": 2.750373692077728, "grad_norm": 2.273358106613159, "learning_rate": 6.563221905150156e-05, "loss": 2.881, "step": 40480 }, { "epoch": 2.75071341214839, "grad_norm": 1.9917385578155518, "learning_rate": 6.562797255061829e-05, "loss": 2.9871, "step": 40485 }, { "epoch": 2.7510531322190515, "grad_norm": 2.124825954437256, "learning_rate": 6.562372604973503e-05, "loss": 2.8542, "step": 40490 }, { "epoch": 2.7513928522897135, "grad_norm": 2.0578689575195312, "learning_rate": 6.561947954885174e-05, "loss": 2.8453, "step": 40495 }, { "epoch": 2.751732572360375, "grad_norm": 2.638822317123413, "learning_rate": 6.561523304796847e-05, "loss": 2.8206, "step": 40500 }, { "epoch": 2.752072292431037, "grad_norm": 2.329501152038574, "learning_rate": 6.561098654708521e-05, "loss": 2.6578, "step": 40505 }, { "epoch": 2.752412012501699, "grad_norm": 2.66125750541687, "learning_rate": 6.560674004620193e-05, "loss": 2.8269, "step": 40510 }, { "epoch": 2.7527517325723605, "grad_norm": 2.7791552543640137, "learning_rate": 6.560249354531865e-05, "loss": 2.9145, "step": 40515 }, { "epoch": 2.753091452643022, "grad_norm": 2.6038599014282227, "learning_rate": 6.55982470444354e-05, "loss": 2.7314, "step": 40520 }, { "epoch": 2.7534311727136838, "grad_norm": 3.0967013835906982, "learning_rate": 6.559400054355211e-05, "loss": 2.8948, "step": 40525 }, { "epoch": 2.753770892784346, "grad_norm": 2.196110248565674, "learning_rate": 6.558975404266885e-05, "loss": 2.7613, "step": 40530 }, { "epoch": 2.7541106128550075, "grad_norm": 1.9374616146087646, "learning_rate": 6.558550754178558e-05, "loss": 3.0885, "step": 40535 }, { "epoch": 2.754450332925669, "grad_norm": 2.385503053665161, "learning_rate": 6.55812610409023e-05, "loss": 2.8897, "step": 40540 }, { "epoch": 2.754790052996331, "grad_norm": 2.38891863822937, "learning_rate": 6.557701454001904e-05, "loss": 2.8785, "step": 40545 }, { "epoch": 2.755129773066993, "grad_norm": 2.675189256668091, "learning_rate": 6.557276803913575e-05, "loss": 3.0233, "step": 40550 }, { "epoch": 2.7554694931376544, "grad_norm": 2.525963068008423, "learning_rate": 6.556852153825248e-05, "loss": 3.0195, "step": 40555 }, { "epoch": 2.7558092132083165, "grad_norm": 2.102921962738037, "learning_rate": 6.556427503736922e-05, "loss": 2.8251, "step": 40560 }, { "epoch": 2.756148933278978, "grad_norm": 2.310913562774658, "learning_rate": 6.556002853648593e-05, "loss": 2.864, "step": 40565 }, { "epoch": 2.7564886533496398, "grad_norm": 2.1294314861297607, "learning_rate": 6.555578203560266e-05, "loss": 2.6677, "step": 40570 }, { "epoch": 2.7568283734203014, "grad_norm": 2.8248138427734375, "learning_rate": 6.55515355347194e-05, "loss": 2.8003, "step": 40575 }, { "epoch": 2.7571680934909635, "grad_norm": 2.8655574321746826, "learning_rate": 6.554728903383612e-05, "loss": 2.8361, "step": 40580 }, { "epoch": 2.757507813561625, "grad_norm": 2.375882863998413, "learning_rate": 6.554304253295285e-05, "loss": 2.861, "step": 40585 }, { "epoch": 2.7578475336322867, "grad_norm": 2.6689200401306152, "learning_rate": 6.553879603206959e-05, "loss": 2.8057, "step": 40590 }, { "epoch": 2.758187253702949, "grad_norm": 3.2060320377349854, "learning_rate": 6.55345495311863e-05, "loss": 2.9169, "step": 40595 }, { "epoch": 2.7585269737736104, "grad_norm": 2.333512783050537, "learning_rate": 6.553030303030303e-05, "loss": 3.039, "step": 40600 }, { "epoch": 2.758866693844272, "grad_norm": 1.9706335067749023, "learning_rate": 6.552605652941977e-05, "loss": 3.0068, "step": 40605 }, { "epoch": 2.759206413914934, "grad_norm": 2.7223987579345703, "learning_rate": 6.552181002853649e-05, "loss": 2.8364, "step": 40610 }, { "epoch": 2.759546133985596, "grad_norm": 2.2709546089172363, "learning_rate": 6.551756352765321e-05, "loss": 2.5229, "step": 40615 }, { "epoch": 2.7598858540562574, "grad_norm": 3.2892937660217285, "learning_rate": 6.551331702676994e-05, "loss": 2.8836, "step": 40620 }, { "epoch": 2.7602255741269195, "grad_norm": 2.2599120140075684, "learning_rate": 6.550907052588667e-05, "loss": 2.7728, "step": 40625 }, { "epoch": 2.760565294197581, "grad_norm": 2.813753604888916, "learning_rate": 6.55048240250034e-05, "loss": 2.7188, "step": 40630 }, { "epoch": 2.7609050142682428, "grad_norm": 2.3014936447143555, "learning_rate": 6.550057752412013e-05, "loss": 2.7399, "step": 40635 }, { "epoch": 2.761244734338905, "grad_norm": 1.9553325176239014, "learning_rate": 6.549633102323685e-05, "loss": 2.5975, "step": 40640 }, { "epoch": 2.7615844544095665, "grad_norm": 2.5717434883117676, "learning_rate": 6.549208452235358e-05, "loss": 2.9183, "step": 40645 }, { "epoch": 2.761924174480228, "grad_norm": 2.718109130859375, "learning_rate": 6.548783802147031e-05, "loss": 2.9618, "step": 40650 }, { "epoch": 2.76226389455089, "grad_norm": 2.286770820617676, "learning_rate": 6.548359152058704e-05, "loss": 2.899, "step": 40655 }, { "epoch": 2.762603614621552, "grad_norm": 2.6756913661956787, "learning_rate": 6.547934501970377e-05, "loss": 2.8422, "step": 40660 }, { "epoch": 2.7629433346922134, "grad_norm": 2.2153453826904297, "learning_rate": 6.54750985188205e-05, "loss": 2.7745, "step": 40665 }, { "epoch": 2.7632830547628755, "grad_norm": 2.2952187061309814, "learning_rate": 6.547085201793722e-05, "loss": 2.9457, "step": 40670 }, { "epoch": 2.763622774833537, "grad_norm": 2.634551763534546, "learning_rate": 6.546660551705395e-05, "loss": 2.7052, "step": 40675 }, { "epoch": 2.7639624949041988, "grad_norm": 2.115837574005127, "learning_rate": 6.546235901617068e-05, "loss": 2.7296, "step": 40680 }, { "epoch": 2.764302214974861, "grad_norm": 3.0631937980651855, "learning_rate": 6.54581125152874e-05, "loss": 3.0136, "step": 40685 }, { "epoch": 2.7646419350455225, "grad_norm": 2.6448240280151367, "learning_rate": 6.545386601440413e-05, "loss": 3.0548, "step": 40690 }, { "epoch": 2.764981655116184, "grad_norm": 2.7083404064178467, "learning_rate": 6.544961951352086e-05, "loss": 2.8964, "step": 40695 }, { "epoch": 2.765321375186846, "grad_norm": 3.10906720161438, "learning_rate": 6.544537301263759e-05, "loss": 2.7021, "step": 40700 }, { "epoch": 2.765661095257508, "grad_norm": 2.239124298095703, "learning_rate": 6.544112651175432e-05, "loss": 2.9246, "step": 40705 }, { "epoch": 2.7660008153281694, "grad_norm": 2.920992612838745, "learning_rate": 6.543688001087105e-05, "loss": 2.8942, "step": 40710 }, { "epoch": 2.7663405353988315, "grad_norm": 2.0136702060699463, "learning_rate": 6.543263350998777e-05, "loss": 2.7624, "step": 40715 }, { "epoch": 2.766680255469493, "grad_norm": 3.1501710414886475, "learning_rate": 6.54283870091045e-05, "loss": 2.8562, "step": 40720 }, { "epoch": 2.7670199755401548, "grad_norm": 2.5970866680145264, "learning_rate": 6.542414050822123e-05, "loss": 2.7209, "step": 40725 }, { "epoch": 2.767359695610817, "grad_norm": 2.689242124557495, "learning_rate": 6.541989400733796e-05, "loss": 2.7574, "step": 40730 }, { "epoch": 2.7676994156814785, "grad_norm": 2.41975998878479, "learning_rate": 6.541564750645469e-05, "loss": 2.7907, "step": 40735 }, { "epoch": 2.76803913575214, "grad_norm": 2.439521551132202, "learning_rate": 6.541140100557141e-05, "loss": 2.9541, "step": 40740 }, { "epoch": 2.768378855822802, "grad_norm": 2.319988250732422, "learning_rate": 6.540715450468814e-05, "loss": 2.9297, "step": 40745 }, { "epoch": 2.768718575893464, "grad_norm": 2.486443042755127, "learning_rate": 6.540290800380486e-05, "loss": 2.8965, "step": 40750 }, { "epoch": 2.7690582959641254, "grad_norm": 3.1002209186553955, "learning_rate": 6.53986615029216e-05, "loss": 2.794, "step": 40755 }, { "epoch": 2.7693980160347875, "grad_norm": 2.3997440338134766, "learning_rate": 6.539441500203833e-05, "loss": 2.8672, "step": 40760 }, { "epoch": 2.769737736105449, "grad_norm": 2.937993049621582, "learning_rate": 6.539016850115504e-05, "loss": 2.9264, "step": 40765 }, { "epoch": 2.770077456176111, "grad_norm": 2.6107094287872314, "learning_rate": 6.538592200027178e-05, "loss": 2.8881, "step": 40770 }, { "epoch": 2.770417176246773, "grad_norm": 2.7826120853424072, "learning_rate": 6.538167549938851e-05, "loss": 2.9953, "step": 40775 }, { "epoch": 2.7707568963174345, "grad_norm": 2.544933319091797, "learning_rate": 6.537742899850523e-05, "loss": 2.9075, "step": 40780 }, { "epoch": 2.771096616388096, "grad_norm": 3.2106549739837646, "learning_rate": 6.537318249762197e-05, "loss": 2.9144, "step": 40785 }, { "epoch": 2.771436336458758, "grad_norm": 2.2586002349853516, "learning_rate": 6.53689359967387e-05, "loss": 3.0081, "step": 40790 }, { "epoch": 2.77177605652942, "grad_norm": 2.3555150032043457, "learning_rate": 6.536468949585541e-05, "loss": 2.9201, "step": 40795 }, { "epoch": 2.7721157766000815, "grad_norm": 2.0945611000061035, "learning_rate": 6.536044299497215e-05, "loss": 2.8956, "step": 40800 }, { "epoch": 2.7724554966707435, "grad_norm": 2.534541130065918, "learning_rate": 6.535619649408888e-05, "loss": 2.7043, "step": 40805 }, { "epoch": 2.772795216741405, "grad_norm": 2.7646126747131348, "learning_rate": 6.53519499932056e-05, "loss": 2.6734, "step": 40810 }, { "epoch": 2.773134936812067, "grad_norm": 2.9370641708374023, "learning_rate": 6.534770349232233e-05, "loss": 2.9447, "step": 40815 }, { "epoch": 2.773474656882729, "grad_norm": 2.073805809020996, "learning_rate": 6.534345699143905e-05, "loss": 2.9998, "step": 40820 }, { "epoch": 2.7738143769533905, "grad_norm": 2.8202309608459473, "learning_rate": 6.533921049055578e-05, "loss": 2.8111, "step": 40825 }, { "epoch": 2.774154097024052, "grad_norm": 6.443764686584473, "learning_rate": 6.533496398967252e-05, "loss": 2.8279, "step": 40830 }, { "epoch": 2.774493817094714, "grad_norm": 2.623002529144287, "learning_rate": 6.533071748878923e-05, "loss": 3.0616, "step": 40835 }, { "epoch": 2.774833537165376, "grad_norm": 2.1023824214935303, "learning_rate": 6.532647098790596e-05, "loss": 3.1155, "step": 40840 }, { "epoch": 2.7751732572360375, "grad_norm": 3.2373430728912354, "learning_rate": 6.53222244870227e-05, "loss": 3.1188, "step": 40845 }, { "epoch": 2.7755129773066995, "grad_norm": 3.1374411582946777, "learning_rate": 6.531797798613942e-05, "loss": 2.8041, "step": 40850 }, { "epoch": 2.775852697377361, "grad_norm": 3.2239274978637695, "learning_rate": 6.531373148525615e-05, "loss": 2.704, "step": 40855 }, { "epoch": 2.776192417448023, "grad_norm": 2.6581108570098877, "learning_rate": 6.530948498437289e-05, "loss": 2.7243, "step": 40860 }, { "epoch": 2.7765321375186844, "grad_norm": 2.7655375003814697, "learning_rate": 6.53052384834896e-05, "loss": 3.1451, "step": 40865 }, { "epoch": 2.7768718575893465, "grad_norm": 2.2054693698883057, "learning_rate": 6.530099198260634e-05, "loss": 2.8022, "step": 40870 }, { "epoch": 2.777211577660008, "grad_norm": 3.210428476333618, "learning_rate": 6.529674548172307e-05, "loss": 2.8862, "step": 40875 }, { "epoch": 2.7775512977306698, "grad_norm": 2.781090259552002, "learning_rate": 6.529249898083979e-05, "loss": 2.9109, "step": 40880 }, { "epoch": 2.777891017801332, "grad_norm": 3.266692876815796, "learning_rate": 6.528825247995653e-05, "loss": 2.6155, "step": 40885 }, { "epoch": 2.7782307378719935, "grad_norm": 2.3367810249328613, "learning_rate": 6.528400597907325e-05, "loss": 2.906, "step": 40890 }, { "epoch": 2.778570457942655, "grad_norm": 2.811100482940674, "learning_rate": 6.527975947818997e-05, "loss": 3.0921, "step": 40895 }, { "epoch": 2.778910178013317, "grad_norm": 2.416438341140747, "learning_rate": 6.527551297730671e-05, "loss": 2.4453, "step": 40900 }, { "epoch": 2.779249898083979, "grad_norm": 2.9167141914367676, "learning_rate": 6.527126647642343e-05, "loss": 2.9564, "step": 40905 }, { "epoch": 2.7795896181546405, "grad_norm": 2.266587495803833, "learning_rate": 6.526701997554015e-05, "loss": 2.9091, "step": 40910 }, { "epoch": 2.779929338225302, "grad_norm": 2.5627219676971436, "learning_rate": 6.52627734746569e-05, "loss": 2.8278, "step": 40915 }, { "epoch": 2.780269058295964, "grad_norm": 2.6823933124542236, "learning_rate": 6.525852697377361e-05, "loss": 2.4441, "step": 40920 }, { "epoch": 2.780608778366626, "grad_norm": 3.0533502101898193, "learning_rate": 6.525428047289034e-05, "loss": 2.9164, "step": 40925 }, { "epoch": 2.7809484984372874, "grad_norm": 2.512409210205078, "learning_rate": 6.525003397200708e-05, "loss": 2.9796, "step": 40930 }, { "epoch": 2.7812882185079495, "grad_norm": 2.108218193054199, "learning_rate": 6.52457874711238e-05, "loss": 2.9888, "step": 40935 }, { "epoch": 2.781627938578611, "grad_norm": 2.1209561824798584, "learning_rate": 6.524154097024052e-05, "loss": 3.0111, "step": 40940 }, { "epoch": 2.7819676586492728, "grad_norm": 2.3573029041290283, "learning_rate": 6.523729446935726e-05, "loss": 2.6337, "step": 40945 }, { "epoch": 2.782307378719935, "grad_norm": 3.38387393951416, "learning_rate": 6.523304796847398e-05, "loss": 2.9943, "step": 40950 }, { "epoch": 2.7826470987905965, "grad_norm": 2.9011528491973877, "learning_rate": 6.52288014675907e-05, "loss": 3.0088, "step": 40955 }, { "epoch": 2.782986818861258, "grad_norm": 2.7904200553894043, "learning_rate": 6.522455496670745e-05, "loss": 2.8642, "step": 40960 }, { "epoch": 2.78332653893192, "grad_norm": 2.945808172225952, "learning_rate": 6.522030846582416e-05, "loss": 2.7027, "step": 40965 }, { "epoch": 2.783666259002582, "grad_norm": 1.9799600839614868, "learning_rate": 6.521606196494089e-05, "loss": 2.776, "step": 40970 }, { "epoch": 2.7840059790732434, "grad_norm": 2.3330442905426025, "learning_rate": 6.521181546405762e-05, "loss": 2.9486, "step": 40975 }, { "epoch": 2.7843456991439055, "grad_norm": 3.077242851257324, "learning_rate": 6.520756896317435e-05, "loss": 2.5958, "step": 40980 }, { "epoch": 2.784685419214567, "grad_norm": 2.4076287746429443, "learning_rate": 6.520332246229107e-05, "loss": 3.0264, "step": 40985 }, { "epoch": 2.7850251392852288, "grad_norm": 2.597252368927002, "learning_rate": 6.51990759614078e-05, "loss": 2.7861, "step": 40990 }, { "epoch": 2.785364859355891, "grad_norm": 2.518523693084717, "learning_rate": 6.519482946052453e-05, "loss": 2.9624, "step": 40995 }, { "epoch": 2.7857045794265525, "grad_norm": 2.8092150688171387, "learning_rate": 6.519058295964126e-05, "loss": 2.7024, "step": 41000 }, { "epoch": 2.786044299497214, "grad_norm": 2.228590250015259, "learning_rate": 6.518633645875799e-05, "loss": 3.1828, "step": 41005 }, { "epoch": 2.786384019567876, "grad_norm": 2.6084840297698975, "learning_rate": 6.518208995787471e-05, "loss": 2.9838, "step": 41010 }, { "epoch": 2.786723739638538, "grad_norm": 2.5276601314544678, "learning_rate": 6.517784345699144e-05, "loss": 3.0255, "step": 41015 }, { "epoch": 2.7870634597091994, "grad_norm": 2.4653260707855225, "learning_rate": 6.517359695610817e-05, "loss": 2.7616, "step": 41020 }, { "epoch": 2.7874031797798615, "grad_norm": 2.7016422748565674, "learning_rate": 6.51693504552249e-05, "loss": 3.0349, "step": 41025 }, { "epoch": 2.787742899850523, "grad_norm": 3.3234872817993164, "learning_rate": 6.516510395434163e-05, "loss": 2.6831, "step": 41030 }, { "epoch": 2.788082619921185, "grad_norm": 2.4267187118530273, "learning_rate": 6.516085745345835e-05, "loss": 3.0517, "step": 41035 }, { "epoch": 2.788422339991847, "grad_norm": 2.545227289199829, "learning_rate": 6.515661095257508e-05, "loss": 2.9543, "step": 41040 }, { "epoch": 2.7887620600625085, "grad_norm": 2.1498873233795166, "learning_rate": 6.515236445169181e-05, "loss": 2.4472, "step": 41045 }, { "epoch": 2.78910178013317, "grad_norm": 2.9690628051757812, "learning_rate": 6.514811795080854e-05, "loss": 2.6714, "step": 41050 }, { "epoch": 2.789441500203832, "grad_norm": 1.7730838060379028, "learning_rate": 6.514387144992527e-05, "loss": 3.0915, "step": 41055 }, { "epoch": 2.789781220274494, "grad_norm": 2.2477707862854004, "learning_rate": 6.5139624949042e-05, "loss": 2.9614, "step": 41060 }, { "epoch": 2.7901209403451555, "grad_norm": 2.3915510177612305, "learning_rate": 6.513537844815872e-05, "loss": 2.8616, "step": 41065 }, { "epoch": 2.7904606604158175, "grad_norm": 2.7380785942077637, "learning_rate": 6.513113194727545e-05, "loss": 2.9817, "step": 41070 }, { "epoch": 2.790800380486479, "grad_norm": 2.7035715579986572, "learning_rate": 6.512688544639218e-05, "loss": 2.7382, "step": 41075 }, { "epoch": 2.791140100557141, "grad_norm": 2.657104015350342, "learning_rate": 6.51226389455089e-05, "loss": 2.7728, "step": 41080 }, { "epoch": 2.791479820627803, "grad_norm": 2.610893487930298, "learning_rate": 6.511839244462563e-05, "loss": 2.8062, "step": 41085 }, { "epoch": 2.7918195406984645, "grad_norm": 2.376220941543579, "learning_rate": 6.511414594374236e-05, "loss": 2.8263, "step": 41090 }, { "epoch": 2.792159260769126, "grad_norm": 2.599231004714966, "learning_rate": 6.510989944285909e-05, "loss": 2.9356, "step": 41095 }, { "epoch": 2.792498980839788, "grad_norm": 2.4146227836608887, "learning_rate": 6.510565294197582e-05, "loss": 2.7129, "step": 41100 }, { "epoch": 2.79283870091045, "grad_norm": 2.1954920291900635, "learning_rate": 6.510140644109253e-05, "loss": 2.9354, "step": 41105 }, { "epoch": 2.7931784209811115, "grad_norm": 2.3812460899353027, "learning_rate": 6.509715994020927e-05, "loss": 2.8716, "step": 41110 }, { "epoch": 2.7935181410517735, "grad_norm": 2.267728328704834, "learning_rate": 6.5092913439326e-05, "loss": 2.993, "step": 41115 }, { "epoch": 2.793857861122435, "grad_norm": 2.295377254486084, "learning_rate": 6.508866693844272e-05, "loss": 2.7791, "step": 41120 }, { "epoch": 2.794197581193097, "grad_norm": 2.1918461322784424, "learning_rate": 6.508442043755946e-05, "loss": 3.0198, "step": 41125 }, { "epoch": 2.794537301263759, "grad_norm": 2.7741034030914307, "learning_rate": 6.508017393667619e-05, "loss": 2.5945, "step": 41130 }, { "epoch": 2.7948770213344205, "grad_norm": 2.2531955242156982, "learning_rate": 6.50759274357929e-05, "loss": 2.7821, "step": 41135 }, { "epoch": 2.795216741405082, "grad_norm": 2.750145196914673, "learning_rate": 6.507168093490964e-05, "loss": 2.7181, "step": 41140 }, { "epoch": 2.795556461475744, "grad_norm": 3.07197642326355, "learning_rate": 6.506743443402637e-05, "loss": 2.5182, "step": 41145 }, { "epoch": 2.795896181546406, "grad_norm": 2.6003315448760986, "learning_rate": 6.506318793314308e-05, "loss": 2.8778, "step": 41150 }, { "epoch": 2.7962359016170675, "grad_norm": 2.67132568359375, "learning_rate": 6.505894143225983e-05, "loss": 2.7068, "step": 41155 }, { "epoch": 2.7965756216877296, "grad_norm": 2.413753032684326, "learning_rate": 6.505469493137655e-05, "loss": 2.8731, "step": 41160 }, { "epoch": 2.796915341758391, "grad_norm": 2.8107919692993164, "learning_rate": 6.505044843049327e-05, "loss": 2.8014, "step": 41165 }, { "epoch": 2.797255061829053, "grad_norm": 2.5508530139923096, "learning_rate": 6.504620192961001e-05, "loss": 2.9041, "step": 41170 }, { "epoch": 2.797594781899715, "grad_norm": 2.3772330284118652, "learning_rate": 6.504195542872672e-05, "loss": 2.8754, "step": 41175 }, { "epoch": 2.7979345019703765, "grad_norm": 2.342186450958252, "learning_rate": 6.503770892784345e-05, "loss": 3.0057, "step": 41180 }, { "epoch": 2.798274222041038, "grad_norm": 2.8410542011260986, "learning_rate": 6.50334624269602e-05, "loss": 2.8383, "step": 41185 }, { "epoch": 2.7986139421117002, "grad_norm": 2.284156322479248, "learning_rate": 6.502921592607691e-05, "loss": 2.8947, "step": 41190 }, { "epoch": 2.798953662182362, "grad_norm": 2.2382524013519287, "learning_rate": 6.502496942519364e-05, "loss": 3.1354, "step": 41195 }, { "epoch": 2.7992933822530235, "grad_norm": 2.581310272216797, "learning_rate": 6.502072292431038e-05, "loss": 2.849, "step": 41200 }, { "epoch": 2.799633102323685, "grad_norm": 2.1007981300354004, "learning_rate": 6.501647642342709e-05, "loss": 3.0252, "step": 41205 }, { "epoch": 2.799972822394347, "grad_norm": 2.4185450077056885, "learning_rate": 6.501222992254383e-05, "loss": 3.0702, "step": 41210 }, { "epoch": 2.800312542465009, "grad_norm": 2.175434112548828, "learning_rate": 6.500798342166056e-05, "loss": 2.7132, "step": 41215 }, { "epoch": 2.8006522625356705, "grad_norm": 2.7065207958221436, "learning_rate": 6.500373692077728e-05, "loss": 2.6857, "step": 41220 }, { "epoch": 2.8009919826063325, "grad_norm": 2.244459629058838, "learning_rate": 6.499949041989402e-05, "loss": 2.9211, "step": 41225 }, { "epoch": 2.801331702676994, "grad_norm": 2.623772382736206, "learning_rate": 6.499524391901075e-05, "loss": 2.818, "step": 41230 }, { "epoch": 2.801671422747656, "grad_norm": 2.7752795219421387, "learning_rate": 6.499099741812746e-05, "loss": 2.7382, "step": 41235 }, { "epoch": 2.802011142818318, "grad_norm": 2.7914140224456787, "learning_rate": 6.49867509172442e-05, "loss": 2.7617, "step": 41240 }, { "epoch": 2.8023508628889795, "grad_norm": 2.4266555309295654, "learning_rate": 6.498250441636092e-05, "loss": 2.6798, "step": 41245 }, { "epoch": 2.802690582959641, "grad_norm": 2.620039701461792, "learning_rate": 6.497825791547764e-05, "loss": 2.8772, "step": 41250 }, { "epoch": 2.8030303030303028, "grad_norm": 2.776280403137207, "learning_rate": 6.497401141459439e-05, "loss": 2.8348, "step": 41255 }, { "epoch": 2.803370023100965, "grad_norm": 3.01751708984375, "learning_rate": 6.49697649137111e-05, "loss": 3.3323, "step": 41260 }, { "epoch": 2.8037097431716265, "grad_norm": 2.471693277359009, "learning_rate": 6.496551841282783e-05, "loss": 2.862, "step": 41265 }, { "epoch": 2.804049463242288, "grad_norm": 2.114093542098999, "learning_rate": 6.496127191194457e-05, "loss": 2.7035, "step": 41270 }, { "epoch": 2.80438918331295, "grad_norm": 2.6727797985076904, "learning_rate": 6.495702541106128e-05, "loss": 2.9008, "step": 41275 }, { "epoch": 2.804728903383612, "grad_norm": 2.4465315341949463, "learning_rate": 6.495277891017801e-05, "loss": 2.9239, "step": 41280 }, { "epoch": 2.8050686234542734, "grad_norm": 2.8016796112060547, "learning_rate": 6.494853240929475e-05, "loss": 2.7524, "step": 41285 }, { "epoch": 2.8054083435249355, "grad_norm": 2.989760160446167, "learning_rate": 6.494428590841147e-05, "loss": 2.9688, "step": 41290 }, { "epoch": 2.805748063595597, "grad_norm": 2.005190134048462, "learning_rate": 6.49400394075282e-05, "loss": 2.5573, "step": 41295 }, { "epoch": 2.8060877836662588, "grad_norm": 2.329812526702881, "learning_rate": 6.493579290664494e-05, "loss": 2.9679, "step": 41300 }, { "epoch": 2.806427503736921, "grad_norm": 2.8777544498443604, "learning_rate": 6.493154640576165e-05, "loss": 3.0185, "step": 41305 }, { "epoch": 2.8067672238075825, "grad_norm": 2.2319202423095703, "learning_rate": 6.492729990487838e-05, "loss": 2.637, "step": 41310 }, { "epoch": 2.807106943878244, "grad_norm": 2.7467093467712402, "learning_rate": 6.492305340399512e-05, "loss": 2.8332, "step": 41315 }, { "epoch": 2.807446663948906, "grad_norm": 3.6084210872650146, "learning_rate": 6.491880690311184e-05, "loss": 2.7452, "step": 41320 }, { "epoch": 2.807786384019568, "grad_norm": 2.1219143867492676, "learning_rate": 6.491456040222856e-05, "loss": 2.946, "step": 41325 }, { "epoch": 2.8081261040902294, "grad_norm": 3.115403652191162, "learning_rate": 6.491031390134529e-05, "loss": 2.7164, "step": 41330 }, { "epoch": 2.8084658241608915, "grad_norm": 2.8636534214019775, "learning_rate": 6.490606740046202e-05, "loss": 2.8727, "step": 41335 }, { "epoch": 2.808805544231553, "grad_norm": 2.611293315887451, "learning_rate": 6.490182089957875e-05, "loss": 2.9188, "step": 41340 }, { "epoch": 2.809145264302215, "grad_norm": 2.548767566680908, "learning_rate": 6.489842369887213e-05, "loss": 2.9819, "step": 41345 }, { "epoch": 2.809484984372877, "grad_norm": 2.415820837020874, "learning_rate": 6.489417719798886e-05, "loss": 2.6145, "step": 41350 }, { "epoch": 2.8098247044435385, "grad_norm": 2.42195725440979, "learning_rate": 6.488993069710559e-05, "loss": 2.9635, "step": 41355 }, { "epoch": 2.8101644245142, "grad_norm": 2.7100532054901123, "learning_rate": 6.488568419622231e-05, "loss": 2.8993, "step": 41360 }, { "epoch": 2.810504144584862, "grad_norm": 3.4988210201263428, "learning_rate": 6.488143769533904e-05, "loss": 2.8695, "step": 41365 }, { "epoch": 2.810843864655524, "grad_norm": 2.784912347793579, "learning_rate": 6.487719119445577e-05, "loss": 2.8029, "step": 41370 }, { "epoch": 2.8111835847261855, "grad_norm": 2.7839391231536865, "learning_rate": 6.48729446935725e-05, "loss": 2.7699, "step": 41375 }, { "epoch": 2.8115233047968475, "grad_norm": 2.3756163120269775, "learning_rate": 6.486869819268923e-05, "loss": 3.1111, "step": 41380 }, { "epoch": 2.811863024867509, "grad_norm": 2.4390523433685303, "learning_rate": 6.486445169180596e-05, "loss": 3.0579, "step": 41385 }, { "epoch": 2.812202744938171, "grad_norm": 2.628551721572876, "learning_rate": 6.486020519092268e-05, "loss": 3.0649, "step": 41390 }, { "epoch": 2.812542465008833, "grad_norm": 2.346226930618286, "learning_rate": 6.485595869003941e-05, "loss": 3.03, "step": 41395 }, { "epoch": 2.8128821850794945, "grad_norm": 2.305500030517578, "learning_rate": 6.485171218915614e-05, "loss": 2.9761, "step": 41400 }, { "epoch": 2.813221905150156, "grad_norm": 2.387829542160034, "learning_rate": 6.484746568827287e-05, "loss": 2.8576, "step": 41405 }, { "epoch": 2.813561625220818, "grad_norm": 2.223330497741699, "learning_rate": 6.48432191873896e-05, "loss": 3.0693, "step": 41410 }, { "epoch": 2.81390134529148, "grad_norm": 2.8536736965179443, "learning_rate": 6.483897268650632e-05, "loss": 3.0593, "step": 41415 }, { "epoch": 2.8142410653621415, "grad_norm": 3.11086106300354, "learning_rate": 6.483472618562305e-05, "loss": 2.9715, "step": 41420 }, { "epoch": 2.8145807854328035, "grad_norm": 2.4432990550994873, "learning_rate": 6.483047968473978e-05, "loss": 2.8827, "step": 41425 }, { "epoch": 2.814920505503465, "grad_norm": 3.9677414894104004, "learning_rate": 6.482623318385651e-05, "loss": 2.7872, "step": 41430 }, { "epoch": 2.815260225574127, "grad_norm": 2.419381618499756, "learning_rate": 6.482198668297324e-05, "loss": 2.7625, "step": 41435 }, { "epoch": 2.815599945644789, "grad_norm": 4.775445938110352, "learning_rate": 6.481774018208996e-05, "loss": 2.7827, "step": 41440 }, { "epoch": 2.8159396657154505, "grad_norm": 2.7892887592315674, "learning_rate": 6.481349368120669e-05, "loss": 2.8593, "step": 41445 }, { "epoch": 2.816279385786112, "grad_norm": 2.50234055519104, "learning_rate": 6.480924718032342e-05, "loss": 3.3107, "step": 41450 }, { "epoch": 2.816619105856774, "grad_norm": 2.8684780597686768, "learning_rate": 6.480500067944015e-05, "loss": 2.9337, "step": 41455 }, { "epoch": 2.816958825927436, "grad_norm": 2.5374410152435303, "learning_rate": 6.480075417855688e-05, "loss": 3.0721, "step": 41460 }, { "epoch": 2.8172985459980975, "grad_norm": 2.5425262451171875, "learning_rate": 6.47965076776736e-05, "loss": 2.8604, "step": 41465 }, { "epoch": 2.8176382660687596, "grad_norm": 2.2211456298828125, "learning_rate": 6.479226117679033e-05, "loss": 2.8549, "step": 41470 }, { "epoch": 2.817977986139421, "grad_norm": 1.9318825006484985, "learning_rate": 6.478801467590706e-05, "loss": 3.0851, "step": 41475 }, { "epoch": 2.818317706210083, "grad_norm": 2.354517698287964, "learning_rate": 6.478376817502379e-05, "loss": 2.9898, "step": 41480 }, { "epoch": 2.818657426280745, "grad_norm": 2.7633161544799805, "learning_rate": 6.477952167414052e-05, "loss": 2.9684, "step": 41485 }, { "epoch": 2.8189971463514065, "grad_norm": 2.677177906036377, "learning_rate": 6.477527517325724e-05, "loss": 2.852, "step": 41490 }, { "epoch": 2.819336866422068, "grad_norm": 2.110973596572876, "learning_rate": 6.477102867237397e-05, "loss": 3.0317, "step": 41495 }, { "epoch": 2.8196765864927302, "grad_norm": 2.5987420082092285, "learning_rate": 6.476678217149069e-05, "loss": 2.7855, "step": 41500 }, { "epoch": 2.820016306563392, "grad_norm": 2.253988265991211, "learning_rate": 6.476253567060743e-05, "loss": 3.0228, "step": 41505 }, { "epoch": 2.8203560266340535, "grad_norm": 2.2387146949768066, "learning_rate": 6.475828916972416e-05, "loss": 2.8952, "step": 41510 }, { "epoch": 2.8206957467047156, "grad_norm": 2.9445619583129883, "learning_rate": 6.475404266884087e-05, "loss": 2.9037, "step": 41515 }, { "epoch": 2.821035466775377, "grad_norm": 2.8344292640686035, "learning_rate": 6.474979616795761e-05, "loss": 3.0546, "step": 41520 }, { "epoch": 2.821375186846039, "grad_norm": 2.185452938079834, "learning_rate": 6.474554966707434e-05, "loss": 2.751, "step": 41525 }, { "epoch": 2.821714906916701, "grad_norm": 2.3576762676239014, "learning_rate": 6.474130316619105e-05, "loss": 2.9786, "step": 41530 }, { "epoch": 2.8220546269873625, "grad_norm": 2.249861001968384, "learning_rate": 6.47370566653078e-05, "loss": 3.0127, "step": 41535 }, { "epoch": 2.822394347058024, "grad_norm": 3.067277669906616, "learning_rate": 6.473281016442452e-05, "loss": 2.7902, "step": 41540 }, { "epoch": 2.822734067128686, "grad_norm": 2.083677053451538, "learning_rate": 6.472856366354124e-05, "loss": 2.7676, "step": 41545 }, { "epoch": 2.823073787199348, "grad_norm": 2.0475070476531982, "learning_rate": 6.472431716265798e-05, "loss": 2.8473, "step": 41550 }, { "epoch": 2.8234135072700095, "grad_norm": 2.9752724170684814, "learning_rate": 6.472007066177471e-05, "loss": 2.959, "step": 41555 }, { "epoch": 2.823753227340671, "grad_norm": 2.3292949199676514, "learning_rate": 6.471582416089142e-05, "loss": 3.0272, "step": 41560 }, { "epoch": 2.824092947411333, "grad_norm": 2.74574875831604, "learning_rate": 6.471157766000816e-05, "loss": 3.2755, "step": 41565 }, { "epoch": 2.824432667481995, "grad_norm": 2.530944347381592, "learning_rate": 6.470733115912488e-05, "loss": 2.7685, "step": 41570 }, { "epoch": 2.8247723875526565, "grad_norm": 3.857891321182251, "learning_rate": 6.47030846582416e-05, "loss": 2.8276, "step": 41575 }, { "epoch": 2.8251121076233185, "grad_norm": 1.997509479522705, "learning_rate": 6.469883815735835e-05, "loss": 2.9233, "step": 41580 }, { "epoch": 2.82545182769398, "grad_norm": 2.8965396881103516, "learning_rate": 6.469459165647506e-05, "loss": 2.6772, "step": 41585 }, { "epoch": 2.825791547764642, "grad_norm": 2.6832735538482666, "learning_rate": 6.469034515559179e-05, "loss": 3.0873, "step": 41590 }, { "epoch": 2.8261312678353034, "grad_norm": 2.402070999145508, "learning_rate": 6.468609865470853e-05, "loss": 2.8667, "step": 41595 }, { "epoch": 2.8264709879059655, "grad_norm": 3.007087230682373, "learning_rate": 6.468185215382525e-05, "loss": 3.0838, "step": 41600 }, { "epoch": 2.826810707976627, "grad_norm": 2.3626439571380615, "learning_rate": 6.467760565294197e-05, "loss": 2.9005, "step": 41605 }, { "epoch": 2.8271504280472888, "grad_norm": 2.771550178527832, "learning_rate": 6.467335915205872e-05, "loss": 2.8791, "step": 41610 }, { "epoch": 2.827490148117951, "grad_norm": 2.3809547424316406, "learning_rate": 6.466911265117543e-05, "loss": 2.8746, "step": 41615 }, { "epoch": 2.8278298681886125, "grad_norm": 2.7754967212677, "learning_rate": 6.466486615029216e-05, "loss": 2.8761, "step": 41620 }, { "epoch": 2.828169588259274, "grad_norm": 2.4588124752044678, "learning_rate": 6.46606196494089e-05, "loss": 2.6752, "step": 41625 }, { "epoch": 2.828509308329936, "grad_norm": 2.2886292934417725, "learning_rate": 6.465637314852561e-05, "loss": 3.0407, "step": 41630 }, { "epoch": 2.828849028400598, "grad_norm": 2.196563482284546, "learning_rate": 6.465212664764234e-05, "loss": 2.8309, "step": 41635 }, { "epoch": 2.8291887484712595, "grad_norm": 1.894526481628418, "learning_rate": 6.464788014675908e-05, "loss": 2.8875, "step": 41640 }, { "epoch": 2.8295284685419215, "grad_norm": 3.1863574981689453, "learning_rate": 6.46436336458758e-05, "loss": 2.8466, "step": 41645 }, { "epoch": 2.829868188612583, "grad_norm": 2.571197509765625, "learning_rate": 6.463938714499253e-05, "loss": 2.8223, "step": 41650 }, { "epoch": 2.830207908683245, "grad_norm": 2.7708609104156494, "learning_rate": 6.463514064410925e-05, "loss": 2.9225, "step": 41655 }, { "epoch": 2.830547628753907, "grad_norm": 2.5957746505737305, "learning_rate": 6.463089414322598e-05, "loss": 2.8971, "step": 41660 }, { "epoch": 2.8308873488245685, "grad_norm": 2.8926374912261963, "learning_rate": 6.462664764234271e-05, "loss": 2.7277, "step": 41665 }, { "epoch": 2.83122706889523, "grad_norm": 2.964825391769409, "learning_rate": 6.462240114145944e-05, "loss": 3.1038, "step": 41670 }, { "epoch": 2.831566788965892, "grad_norm": 2.686652898788452, "learning_rate": 6.461815464057617e-05, "loss": 2.8843, "step": 41675 }, { "epoch": 2.831906509036554, "grad_norm": 2.345465660095215, "learning_rate": 6.46139081396929e-05, "loss": 3.0047, "step": 41680 }, { "epoch": 2.8322462291072155, "grad_norm": 2.709242343902588, "learning_rate": 6.460966163880962e-05, "loss": 2.9307, "step": 41685 }, { "epoch": 2.8325859491778775, "grad_norm": 2.3548269271850586, "learning_rate": 6.460541513792635e-05, "loss": 3.002, "step": 41690 }, { "epoch": 2.832925669248539, "grad_norm": 2.6516854763031006, "learning_rate": 6.460116863704308e-05, "loss": 2.8822, "step": 41695 }, { "epoch": 2.833265389319201, "grad_norm": 2.9709959030151367, "learning_rate": 6.45969221361598e-05, "loss": 2.9266, "step": 41700 }, { "epoch": 2.833605109389863, "grad_norm": 2.673978567123413, "learning_rate": 6.459267563527653e-05, "loss": 2.9614, "step": 41705 }, { "epoch": 2.8339448294605245, "grad_norm": 2.626713991165161, "learning_rate": 6.458842913439326e-05, "loss": 3.1217, "step": 41710 }, { "epoch": 2.834284549531186, "grad_norm": 2.668423652648926, "learning_rate": 6.458418263350999e-05, "loss": 2.7484, "step": 41715 }, { "epoch": 2.834624269601848, "grad_norm": 1.9918149709701538, "learning_rate": 6.457993613262672e-05, "loss": 2.7805, "step": 41720 }, { "epoch": 2.83496398967251, "grad_norm": 2.128889799118042, "learning_rate": 6.457568963174345e-05, "loss": 3.0617, "step": 41725 }, { "epoch": 2.8353037097431715, "grad_norm": 2.4832050800323486, "learning_rate": 6.457144313086017e-05, "loss": 3.0199, "step": 41730 }, { "epoch": 2.8356434298138335, "grad_norm": 2.900622844696045, "learning_rate": 6.45671966299769e-05, "loss": 2.7243, "step": 41735 }, { "epoch": 2.835983149884495, "grad_norm": 1.9343276023864746, "learning_rate": 6.456295012909363e-05, "loss": 2.9369, "step": 41740 }, { "epoch": 2.836322869955157, "grad_norm": 2.5988264083862305, "learning_rate": 6.455870362821036e-05, "loss": 2.9788, "step": 41745 }, { "epoch": 2.836662590025819, "grad_norm": 2.8444950580596924, "learning_rate": 6.455445712732709e-05, "loss": 2.9396, "step": 41750 }, { "epoch": 2.8370023100964805, "grad_norm": 2.019070863723755, "learning_rate": 6.455021062644381e-05, "loss": 2.779, "step": 41755 }, { "epoch": 2.837342030167142, "grad_norm": 3.0244858264923096, "learning_rate": 6.454596412556054e-05, "loss": 3.0933, "step": 41760 }, { "epoch": 2.8376817502378042, "grad_norm": 2.308075189590454, "learning_rate": 6.454171762467727e-05, "loss": 2.8315, "step": 41765 }, { "epoch": 2.838021470308466, "grad_norm": 3.436429023742676, "learning_rate": 6.4537471123794e-05, "loss": 2.9457, "step": 41770 }, { "epoch": 2.8383611903791275, "grad_norm": 2.568779945373535, "learning_rate": 6.453322462291073e-05, "loss": 2.9347, "step": 41775 }, { "epoch": 2.8387009104497896, "grad_norm": 2.162428140640259, "learning_rate": 6.452897812202745e-05, "loss": 2.9211, "step": 41780 }, { "epoch": 2.839040630520451, "grad_norm": 2.0808980464935303, "learning_rate": 6.452473162114418e-05, "loss": 3.0477, "step": 41785 }, { "epoch": 2.839380350591113, "grad_norm": 2.04494571685791, "learning_rate": 6.452048512026091e-05, "loss": 2.8785, "step": 41790 }, { "epoch": 2.839720070661775, "grad_norm": 2.911792755126953, "learning_rate": 6.451623861937764e-05, "loss": 2.9912, "step": 41795 }, { "epoch": 2.8400597907324365, "grad_norm": 2.6843655109405518, "learning_rate": 6.451199211849437e-05, "loss": 2.6494, "step": 41800 }, { "epoch": 2.840399510803098, "grad_norm": 2.6418278217315674, "learning_rate": 6.45077456176111e-05, "loss": 3.153, "step": 41805 }, { "epoch": 2.8407392308737602, "grad_norm": 2.390766143798828, "learning_rate": 6.450349911672782e-05, "loss": 3.1173, "step": 41810 }, { "epoch": 2.841078950944422, "grad_norm": 2.207667827606201, "learning_rate": 6.449925261584455e-05, "loss": 3.1404, "step": 41815 }, { "epoch": 2.8414186710150835, "grad_norm": 2.8816592693328857, "learning_rate": 6.449500611496128e-05, "loss": 2.647, "step": 41820 }, { "epoch": 2.8417583910857456, "grad_norm": 2.6593692302703857, "learning_rate": 6.4490759614078e-05, "loss": 2.7348, "step": 41825 }, { "epoch": 2.842098111156407, "grad_norm": 2.633289337158203, "learning_rate": 6.448651311319473e-05, "loss": 2.7321, "step": 41830 }, { "epoch": 2.842437831227069, "grad_norm": 2.3365345001220703, "learning_rate": 6.448226661231146e-05, "loss": 2.7749, "step": 41835 }, { "epoch": 2.842777551297731, "grad_norm": 2.306715488433838, "learning_rate": 6.447802011142819e-05, "loss": 2.994, "step": 41840 }, { "epoch": 2.8431172713683925, "grad_norm": 2.8806052207946777, "learning_rate": 6.447377361054492e-05, "loss": 2.8169, "step": 41845 }, { "epoch": 2.843456991439054, "grad_norm": 2.9268922805786133, "learning_rate": 6.446952710966165e-05, "loss": 2.7233, "step": 41850 }, { "epoch": 2.8437967115097162, "grad_norm": 2.8393988609313965, "learning_rate": 6.446528060877836e-05, "loss": 2.8896, "step": 41855 }, { "epoch": 2.844136431580378, "grad_norm": 3.685328245162964, "learning_rate": 6.44610341078951e-05, "loss": 2.4078, "step": 41860 }, { "epoch": 2.8444761516510395, "grad_norm": 2.4849066734313965, "learning_rate": 6.445678760701183e-05, "loss": 2.7003, "step": 41865 }, { "epoch": 2.8448158717217016, "grad_norm": 2.467170000076294, "learning_rate": 6.445254110612854e-05, "loss": 2.8029, "step": 41870 }, { "epoch": 2.845155591792363, "grad_norm": 2.4568846225738525, "learning_rate": 6.444829460524529e-05, "loss": 2.7143, "step": 41875 }, { "epoch": 2.845495311863025, "grad_norm": 2.5644912719726562, "learning_rate": 6.444404810436201e-05, "loss": 2.8153, "step": 41880 }, { "epoch": 2.8458350319336865, "grad_norm": 2.829301357269287, "learning_rate": 6.443980160347873e-05, "loss": 2.8507, "step": 41885 }, { "epoch": 2.8461747520043486, "grad_norm": 2.657801389694214, "learning_rate": 6.443555510259547e-05, "loss": 2.9958, "step": 41890 }, { "epoch": 2.84651447207501, "grad_norm": 3.4041993618011475, "learning_rate": 6.44313086017122e-05, "loss": 3.0189, "step": 41895 }, { "epoch": 2.846854192145672, "grad_norm": 2.6580958366394043, "learning_rate": 6.442706210082891e-05, "loss": 2.9081, "step": 41900 }, { "epoch": 2.847193912216334, "grad_norm": 2.2756004333496094, "learning_rate": 6.442281559994565e-05, "loss": 2.7954, "step": 41905 }, { "epoch": 2.8475336322869955, "grad_norm": 2.933276653289795, "learning_rate": 6.441856909906238e-05, "loss": 2.8285, "step": 41910 }, { "epoch": 2.847873352357657, "grad_norm": 2.6607792377471924, "learning_rate": 6.44143225981791e-05, "loss": 3.0069, "step": 41915 }, { "epoch": 2.8482130724283192, "grad_norm": 2.675792694091797, "learning_rate": 6.441007609729584e-05, "loss": 3.1656, "step": 41920 }, { "epoch": 2.848552792498981, "grad_norm": 2.6383190155029297, "learning_rate": 6.440582959641255e-05, "loss": 2.9142, "step": 41925 }, { "epoch": 2.8488925125696425, "grad_norm": 2.4906532764434814, "learning_rate": 6.440158309552928e-05, "loss": 2.9663, "step": 41930 }, { "epoch": 2.849232232640304, "grad_norm": 2.642246723175049, "learning_rate": 6.439733659464602e-05, "loss": 2.8084, "step": 41935 }, { "epoch": 2.849571952710966, "grad_norm": 2.7404944896698, "learning_rate": 6.439309009376274e-05, "loss": 2.7727, "step": 41940 }, { "epoch": 2.849911672781628, "grad_norm": 2.6698710918426514, "learning_rate": 6.438884359287946e-05, "loss": 2.7499, "step": 41945 }, { "epoch": 2.8502513928522895, "grad_norm": 3.406205177307129, "learning_rate": 6.43845970919962e-05, "loss": 2.8814, "step": 41950 }, { "epoch": 2.8505911129229515, "grad_norm": 2.39392352104187, "learning_rate": 6.438035059111292e-05, "loss": 2.8943, "step": 41955 }, { "epoch": 2.850930832993613, "grad_norm": 2.5300188064575195, "learning_rate": 6.437610409022965e-05, "loss": 3.0087, "step": 41960 }, { "epoch": 2.851270553064275, "grad_norm": 2.919565439224243, "learning_rate": 6.437185758934639e-05, "loss": 2.85, "step": 41965 }, { "epoch": 2.851610273134937, "grad_norm": 2.0663681030273438, "learning_rate": 6.43676110884631e-05, "loss": 3.0431, "step": 41970 }, { "epoch": 2.8519499932055985, "grad_norm": 2.6295268535614014, "learning_rate": 6.436336458757983e-05, "loss": 3.0261, "step": 41975 }, { "epoch": 2.85228971327626, "grad_norm": 2.2726035118103027, "learning_rate": 6.435911808669657e-05, "loss": 2.6216, "step": 41980 }, { "epoch": 2.852629433346922, "grad_norm": 3.431983470916748, "learning_rate": 6.435487158581329e-05, "loss": 2.9445, "step": 41985 }, { "epoch": 2.852969153417584, "grad_norm": 3.8592517375946045, "learning_rate": 6.435062508493002e-05, "loss": 2.8368, "step": 41990 }, { "epoch": 2.8533088734882455, "grad_norm": 2.416383743286133, "learning_rate": 6.434637858404674e-05, "loss": 2.9819, "step": 41995 }, { "epoch": 2.8536485935589075, "grad_norm": 1.95250403881073, "learning_rate": 6.434213208316347e-05, "loss": 3.1096, "step": 42000 }, { "epoch": 2.853988313629569, "grad_norm": 1.9882855415344238, "learning_rate": 6.43378855822802e-05, "loss": 2.9814, "step": 42005 }, { "epoch": 2.854328033700231, "grad_norm": 2.5557801723480225, "learning_rate": 6.433363908139693e-05, "loss": 2.6824, "step": 42010 }, { "epoch": 2.854667753770893, "grad_norm": 2.5723187923431396, "learning_rate": 6.432939258051366e-05, "loss": 2.9775, "step": 42015 }, { "epoch": 2.8550074738415545, "grad_norm": 2.4219303131103516, "learning_rate": 6.432514607963038e-05, "loss": 3.0586, "step": 42020 }, { "epoch": 2.855347193912216, "grad_norm": 2.7466137409210205, "learning_rate": 6.432089957874711e-05, "loss": 3.0983, "step": 42025 }, { "epoch": 2.855686913982878, "grad_norm": 2.0010483264923096, "learning_rate": 6.431665307786384e-05, "loss": 2.8322, "step": 42030 }, { "epoch": 2.85602663405354, "grad_norm": 2.368320941925049, "learning_rate": 6.431240657698057e-05, "loss": 2.5147, "step": 42035 }, { "epoch": 2.8563663541242015, "grad_norm": 2.9837822914123535, "learning_rate": 6.43081600760973e-05, "loss": 2.9686, "step": 42040 }, { "epoch": 2.8567060741948636, "grad_norm": 2.944248676300049, "learning_rate": 6.430391357521402e-05, "loss": 3.0939, "step": 42045 }, { "epoch": 2.857045794265525, "grad_norm": 2.242908239364624, "learning_rate": 6.429966707433075e-05, "loss": 2.7864, "step": 42050 }, { "epoch": 2.857385514336187, "grad_norm": 2.7827534675598145, "learning_rate": 6.429542057344748e-05, "loss": 3.0626, "step": 42055 }, { "epoch": 2.857725234406849, "grad_norm": 2.4988014698028564, "learning_rate": 6.429117407256421e-05, "loss": 2.8158, "step": 42060 }, { "epoch": 2.8580649544775105, "grad_norm": 2.2051219940185547, "learning_rate": 6.428692757168094e-05, "loss": 2.9991, "step": 42065 }, { "epoch": 2.858404674548172, "grad_norm": 2.5778985023498535, "learning_rate": 6.428268107079766e-05, "loss": 2.9162, "step": 42070 }, { "epoch": 2.8587443946188342, "grad_norm": 2.831587314605713, "learning_rate": 6.427843456991439e-05, "loss": 2.8565, "step": 42075 }, { "epoch": 2.859084114689496, "grad_norm": 2.934818744659424, "learning_rate": 6.427418806903112e-05, "loss": 2.7693, "step": 42080 }, { "epoch": 2.8594238347601575, "grad_norm": 2.4566493034362793, "learning_rate": 6.426994156814785e-05, "loss": 2.7966, "step": 42085 }, { "epoch": 2.8597635548308196, "grad_norm": 2.263430595397949, "learning_rate": 6.426569506726458e-05, "loss": 3.0567, "step": 42090 }, { "epoch": 2.860103274901481, "grad_norm": 2.3581595420837402, "learning_rate": 6.42614485663813e-05, "loss": 2.7767, "step": 42095 }, { "epoch": 2.860442994972143, "grad_norm": 2.9617903232574463, "learning_rate": 6.425720206549803e-05, "loss": 2.9535, "step": 42100 }, { "epoch": 2.860782715042805, "grad_norm": 1.825981616973877, "learning_rate": 6.425295556461476e-05, "loss": 2.6746, "step": 42105 }, { "epoch": 2.8611224351134665, "grad_norm": 2.6099274158477783, "learning_rate": 6.424870906373149e-05, "loss": 3.0985, "step": 42110 }, { "epoch": 2.861462155184128, "grad_norm": 3.299381732940674, "learning_rate": 6.424446256284822e-05, "loss": 2.7984, "step": 42115 }, { "epoch": 2.8618018752547902, "grad_norm": 2.61155104637146, "learning_rate": 6.424021606196495e-05, "loss": 2.9621, "step": 42120 }, { "epoch": 2.862141595325452, "grad_norm": 2.746744155883789, "learning_rate": 6.423596956108167e-05, "loss": 2.9591, "step": 42125 }, { "epoch": 2.8624813153961135, "grad_norm": 2.5117361545562744, "learning_rate": 6.42317230601984e-05, "loss": 3.0301, "step": 42130 }, { "epoch": 2.8628210354667756, "grad_norm": 4.8392815589904785, "learning_rate": 6.422747655931513e-05, "loss": 2.9452, "step": 42135 }, { "epoch": 2.863160755537437, "grad_norm": 1.9186369180679321, "learning_rate": 6.422323005843186e-05, "loss": 2.5069, "step": 42140 }, { "epoch": 2.863500475608099, "grad_norm": 3.051628589630127, "learning_rate": 6.421898355754859e-05, "loss": 2.702, "step": 42145 }, { "epoch": 2.863840195678761, "grad_norm": 2.497953176498413, "learning_rate": 6.421473705666531e-05, "loss": 2.7513, "step": 42150 }, { "epoch": 2.8641799157494225, "grad_norm": 2.6878738403320312, "learning_rate": 6.421049055578204e-05, "loss": 2.9584, "step": 42155 }, { "epoch": 2.864519635820084, "grad_norm": 2.067409038543701, "learning_rate": 6.420624405489877e-05, "loss": 2.7122, "step": 42160 }, { "epoch": 2.8648593558907463, "grad_norm": 2.4519448280334473, "learning_rate": 6.42019975540155e-05, "loss": 2.7383, "step": 42165 }, { "epoch": 2.865199075961408, "grad_norm": 2.7528769969940186, "learning_rate": 6.419775105313223e-05, "loss": 2.9632, "step": 42170 }, { "epoch": 2.8655387960320695, "grad_norm": 2.170304298400879, "learning_rate": 6.419350455224895e-05, "loss": 2.8523, "step": 42175 }, { "epoch": 2.8658785161027316, "grad_norm": 2.4876937866210938, "learning_rate": 6.418925805136568e-05, "loss": 2.9689, "step": 42180 }, { "epoch": 2.866218236173393, "grad_norm": 2.506371259689331, "learning_rate": 6.418501155048241e-05, "loss": 2.5686, "step": 42185 }, { "epoch": 2.866557956244055, "grad_norm": 1.965591549873352, "learning_rate": 6.418076504959914e-05, "loss": 2.8474, "step": 42190 }, { "epoch": 2.866897676314717, "grad_norm": 2.8499233722686768, "learning_rate": 6.417651854871585e-05, "loss": 2.8966, "step": 42195 }, { "epoch": 2.8672373963853786, "grad_norm": 2.3366429805755615, "learning_rate": 6.417227204783259e-05, "loss": 2.8795, "step": 42200 }, { "epoch": 2.86757711645604, "grad_norm": 2.7884373664855957, "learning_rate": 6.416802554694932e-05, "loss": 2.879, "step": 42205 }, { "epoch": 2.8679168365267023, "grad_norm": 2.152167797088623, "learning_rate": 6.416377904606604e-05, "loss": 2.8308, "step": 42210 }, { "epoch": 2.868256556597364, "grad_norm": 2.0090763568878174, "learning_rate": 6.415953254518278e-05, "loss": 2.8052, "step": 42215 }, { "epoch": 2.8685962766680255, "grad_norm": 2.4713783264160156, "learning_rate": 6.41552860442995e-05, "loss": 2.6109, "step": 42220 }, { "epoch": 2.868935996738687, "grad_norm": 2.3393452167510986, "learning_rate": 6.415103954341622e-05, "loss": 2.9778, "step": 42225 }, { "epoch": 2.8692757168093492, "grad_norm": 3.0657999515533447, "learning_rate": 6.414679304253296e-05, "loss": 2.8476, "step": 42230 }, { "epoch": 2.869615436880011, "grad_norm": 2.0843794345855713, "learning_rate": 6.414254654164969e-05, "loss": 2.8495, "step": 42235 }, { "epoch": 2.8699551569506725, "grad_norm": 2.1548399925231934, "learning_rate": 6.41383000407664e-05, "loss": 2.9315, "step": 42240 }, { "epoch": 2.8702948770213346, "grad_norm": 2.6753087043762207, "learning_rate": 6.413405353988315e-05, "loss": 2.9804, "step": 42245 }, { "epoch": 2.870634597091996, "grad_norm": 2.3251872062683105, "learning_rate": 6.412980703899987e-05, "loss": 2.7588, "step": 42250 }, { "epoch": 2.870974317162658, "grad_norm": 3.0811984539031982, "learning_rate": 6.412556053811659e-05, "loss": 3.0299, "step": 42255 }, { "epoch": 2.87131403723332, "grad_norm": 2.180817127227783, "learning_rate": 6.412131403723333e-05, "loss": 2.8578, "step": 42260 }, { "epoch": 2.8716537573039815, "grad_norm": 2.535815954208374, "learning_rate": 6.411706753635006e-05, "loss": 2.8386, "step": 42265 }, { "epoch": 2.871993477374643, "grad_norm": 3.685558319091797, "learning_rate": 6.411282103546677e-05, "loss": 2.7166, "step": 42270 }, { "epoch": 2.872333197445305, "grad_norm": 2.4926416873931885, "learning_rate": 6.410857453458351e-05, "loss": 3.0711, "step": 42275 }, { "epoch": 2.872672917515967, "grad_norm": 2.3767123222351074, "learning_rate": 6.410432803370023e-05, "loss": 2.9984, "step": 42280 }, { "epoch": 2.8730126375866285, "grad_norm": 3.3025412559509277, "learning_rate": 6.410008153281696e-05, "loss": 2.8388, "step": 42285 }, { "epoch": 2.87335235765729, "grad_norm": 2.637598991394043, "learning_rate": 6.40958350319337e-05, "loss": 2.837, "step": 42290 }, { "epoch": 2.873692077727952, "grad_norm": 2.4114694595336914, "learning_rate": 6.409158853105041e-05, "loss": 2.9052, "step": 42295 }, { "epoch": 2.874031797798614, "grad_norm": 2.1582558155059814, "learning_rate": 6.408734203016714e-05, "loss": 3.1175, "step": 42300 }, { "epoch": 2.8743715178692755, "grad_norm": 2.5294811725616455, "learning_rate": 6.408309552928388e-05, "loss": 2.9059, "step": 42305 }, { "epoch": 2.8747112379399375, "grad_norm": 2.794278144836426, "learning_rate": 6.40788490284006e-05, "loss": 2.7743, "step": 42310 }, { "epoch": 2.875050958010599, "grad_norm": 2.7898988723754883, "learning_rate": 6.407460252751732e-05, "loss": 2.9136, "step": 42315 }, { "epoch": 2.875390678081261, "grad_norm": 3.001523971557617, "learning_rate": 6.407035602663407e-05, "loss": 3.0074, "step": 42320 }, { "epoch": 2.875730398151923, "grad_norm": 2.2704036235809326, "learning_rate": 6.406610952575078e-05, "loss": 2.8073, "step": 42325 }, { "epoch": 2.8760701182225845, "grad_norm": 2.404538154602051, "learning_rate": 6.406186302486751e-05, "loss": 2.9175, "step": 42330 }, { "epoch": 2.876409838293246, "grad_norm": 2.6501519680023193, "learning_rate": 6.405761652398425e-05, "loss": 2.7772, "step": 42335 }, { "epoch": 2.876749558363908, "grad_norm": 2.312359571456909, "learning_rate": 6.405337002310096e-05, "loss": 2.9673, "step": 42340 }, { "epoch": 2.87708927843457, "grad_norm": 2.709221601486206, "learning_rate": 6.404912352221769e-05, "loss": 2.7364, "step": 42345 }, { "epoch": 2.8774289985052315, "grad_norm": 2.205306053161621, "learning_rate": 6.404487702133442e-05, "loss": 3.0865, "step": 42350 }, { "epoch": 2.8777687185758936, "grad_norm": 2.41137957572937, "learning_rate": 6.404063052045115e-05, "loss": 2.8276, "step": 42355 }, { "epoch": 2.878108438646555, "grad_norm": 2.276944637298584, "learning_rate": 6.403638401956788e-05, "loss": 3.0043, "step": 42360 }, { "epoch": 2.878448158717217, "grad_norm": 2.237504005432129, "learning_rate": 6.40321375186846e-05, "loss": 2.8188, "step": 42365 }, { "epoch": 2.878787878787879, "grad_norm": 3.0440680980682373, "learning_rate": 6.402789101780133e-05, "loss": 2.9591, "step": 42370 }, { "epoch": 2.8791275988585405, "grad_norm": 2.8474230766296387, "learning_rate": 6.402364451691806e-05, "loss": 3.1362, "step": 42375 }, { "epoch": 2.879467318929202, "grad_norm": 2.5333969593048096, "learning_rate": 6.401939801603479e-05, "loss": 2.957, "step": 42380 }, { "epoch": 2.8798070389998642, "grad_norm": 2.117067813873291, "learning_rate": 6.401515151515152e-05, "loss": 2.7731, "step": 42385 }, { "epoch": 2.880146759070526, "grad_norm": 2.1889822483062744, "learning_rate": 6.401090501426824e-05, "loss": 2.8312, "step": 42390 }, { "epoch": 2.8804864791411875, "grad_norm": 2.8561365604400635, "learning_rate": 6.400665851338497e-05, "loss": 2.9295, "step": 42395 }, { "epoch": 2.8808261992118496, "grad_norm": 2.802196502685547, "learning_rate": 6.40024120125017e-05, "loss": 3.1234, "step": 42400 }, { "epoch": 2.881165919282511, "grad_norm": 2.9400784969329834, "learning_rate": 6.399816551161843e-05, "loss": 2.7293, "step": 42405 }, { "epoch": 2.881505639353173, "grad_norm": 2.5473814010620117, "learning_rate": 6.399391901073516e-05, "loss": 2.9787, "step": 42410 }, { "epoch": 2.881845359423835, "grad_norm": 2.3638710975646973, "learning_rate": 6.398967250985188e-05, "loss": 3.0195, "step": 42415 }, { "epoch": 2.8821850794944965, "grad_norm": 3.1871178150177, "learning_rate": 6.398542600896861e-05, "loss": 3.2004, "step": 42420 }, { "epoch": 2.882524799565158, "grad_norm": 3.0735414028167725, "learning_rate": 6.398117950808534e-05, "loss": 2.6976, "step": 42425 }, { "epoch": 2.8828645196358202, "grad_norm": 3.481785297393799, "learning_rate": 6.397693300720207e-05, "loss": 2.9718, "step": 42430 }, { "epoch": 2.883204239706482, "grad_norm": 2.3989548683166504, "learning_rate": 6.39726865063188e-05, "loss": 2.8783, "step": 42435 }, { "epoch": 2.8835439597771435, "grad_norm": 2.265214443206787, "learning_rate": 6.396844000543552e-05, "loss": 2.8116, "step": 42440 }, { "epoch": 2.8838836798478056, "grad_norm": 2.5238490104675293, "learning_rate": 6.396419350455225e-05, "loss": 3.1175, "step": 42445 }, { "epoch": 2.884223399918467, "grad_norm": 2.5994927883148193, "learning_rate": 6.395994700366898e-05, "loss": 2.7655, "step": 42450 }, { "epoch": 2.884563119989129, "grad_norm": 2.6285173892974854, "learning_rate": 6.395570050278571e-05, "loss": 3.0349, "step": 42455 }, { "epoch": 2.884902840059791, "grad_norm": 2.2180709838867188, "learning_rate": 6.395145400190244e-05, "loss": 3.0628, "step": 42460 }, { "epoch": 2.8852425601304525, "grad_norm": 2.3714797496795654, "learning_rate": 6.394720750101916e-05, "loss": 2.8545, "step": 42465 }, { "epoch": 2.885582280201114, "grad_norm": 2.7275679111480713, "learning_rate": 6.394296100013589e-05, "loss": 2.8561, "step": 42470 }, { "epoch": 2.8859220002717763, "grad_norm": 2.43982195854187, "learning_rate": 6.393871449925262e-05, "loss": 2.9546, "step": 42475 }, { "epoch": 2.886261720342438, "grad_norm": 2.554250717163086, "learning_rate": 6.393446799836935e-05, "loss": 3.0668, "step": 42480 }, { "epoch": 2.8866014404130995, "grad_norm": 2.075946569442749, "learning_rate": 6.393022149748608e-05, "loss": 2.9442, "step": 42485 }, { "epoch": 2.8869411604837616, "grad_norm": 3.356644868850708, "learning_rate": 6.39259749966028e-05, "loss": 2.8794, "step": 42490 }, { "epoch": 2.8872808805544232, "grad_norm": 2.311112403869629, "learning_rate": 6.392172849571953e-05, "loss": 2.9946, "step": 42495 }, { "epoch": 2.887620600625085, "grad_norm": 2.343459367752075, "learning_rate": 6.391748199483626e-05, "loss": 2.7249, "step": 42500 }, { "epoch": 2.887960320695747, "grad_norm": 2.2536301612854004, "learning_rate": 6.391323549395299e-05, "loss": 2.8687, "step": 42505 }, { "epoch": 2.8883000407664086, "grad_norm": 3.3326616287231445, "learning_rate": 6.390898899306972e-05, "loss": 2.8561, "step": 42510 }, { "epoch": 2.88863976083707, "grad_norm": 2.604400634765625, "learning_rate": 6.390474249218644e-05, "loss": 2.5927, "step": 42515 }, { "epoch": 2.8889794809077323, "grad_norm": 3.091397285461426, "learning_rate": 6.390049599130317e-05, "loss": 2.8921, "step": 42520 }, { "epoch": 2.889319200978394, "grad_norm": 2.6026604175567627, "learning_rate": 6.38962494904199e-05, "loss": 2.7503, "step": 42525 }, { "epoch": 2.8896589210490555, "grad_norm": 2.682535171508789, "learning_rate": 6.389200298953663e-05, "loss": 3.0291, "step": 42530 }, { "epoch": 2.8899986411197176, "grad_norm": 2.703031063079834, "learning_rate": 6.388775648865336e-05, "loss": 2.6187, "step": 42535 }, { "epoch": 2.8903383611903792, "grad_norm": 3.758254289627075, "learning_rate": 6.388350998777008e-05, "loss": 3.0204, "step": 42540 }, { "epoch": 2.890678081261041, "grad_norm": 2.6611380577087402, "learning_rate": 6.387926348688681e-05, "loss": 2.8538, "step": 42545 }, { "epoch": 2.891017801331703, "grad_norm": 2.655282497406006, "learning_rate": 6.387501698600353e-05, "loss": 2.8271, "step": 42550 }, { "epoch": 2.8913575214023646, "grad_norm": 2.4591705799102783, "learning_rate": 6.387077048512027e-05, "loss": 2.6762, "step": 42555 }, { "epoch": 2.891697241473026, "grad_norm": 2.673245906829834, "learning_rate": 6.3866523984237e-05, "loss": 3.1023, "step": 42560 }, { "epoch": 2.8920369615436883, "grad_norm": 3.4211318492889404, "learning_rate": 6.386227748335371e-05, "loss": 3.0904, "step": 42565 }, { "epoch": 2.89237668161435, "grad_norm": 2.283975601196289, "learning_rate": 6.385803098247045e-05, "loss": 2.916, "step": 42570 }, { "epoch": 2.8927164016850115, "grad_norm": 2.3239402770996094, "learning_rate": 6.385378448158718e-05, "loss": 2.878, "step": 42575 }, { "epoch": 2.893056121755673, "grad_norm": 2.4184346199035645, "learning_rate": 6.38495379807039e-05, "loss": 2.7716, "step": 42580 }, { "epoch": 2.8933958418263352, "grad_norm": 2.0175349712371826, "learning_rate": 6.384529147982064e-05, "loss": 2.9142, "step": 42585 }, { "epoch": 2.893735561896997, "grad_norm": 2.925576686859131, "learning_rate": 6.384104497893736e-05, "loss": 2.7365, "step": 42590 }, { "epoch": 2.8940752819676585, "grad_norm": 3.04794979095459, "learning_rate": 6.383679847805408e-05, "loss": 2.9535, "step": 42595 }, { "epoch": 2.8944150020383206, "grad_norm": 2.6904172897338867, "learning_rate": 6.383255197717082e-05, "loss": 2.8784, "step": 42600 }, { "epoch": 2.894754722108982, "grad_norm": 2.3562920093536377, "learning_rate": 6.382830547628755e-05, "loss": 2.5949, "step": 42605 }, { "epoch": 2.895094442179644, "grad_norm": 2.427410364151001, "learning_rate": 6.382405897540426e-05, "loss": 2.8935, "step": 42610 }, { "epoch": 2.8954341622503055, "grad_norm": 3.090660333633423, "learning_rate": 6.3819812474521e-05, "loss": 2.5846, "step": 42615 }, { "epoch": 2.8957738823209676, "grad_norm": 2.0443029403686523, "learning_rate": 6.381556597363772e-05, "loss": 2.8741, "step": 42620 }, { "epoch": 2.896113602391629, "grad_norm": 2.977945327758789, "learning_rate": 6.381131947275445e-05, "loss": 2.7774, "step": 42625 }, { "epoch": 2.896453322462291, "grad_norm": 3.230617046356201, "learning_rate": 6.380707297187119e-05, "loss": 2.7794, "step": 42630 }, { "epoch": 2.896793042532953, "grad_norm": 2.3651633262634277, "learning_rate": 6.38028264709879e-05, "loss": 2.9267, "step": 42635 }, { "epoch": 2.8971327626036145, "grad_norm": 2.765608310699463, "learning_rate": 6.379857997010463e-05, "loss": 2.7873, "step": 42640 }, { "epoch": 2.897472482674276, "grad_norm": 2.665827512741089, "learning_rate": 6.379433346922137e-05, "loss": 3.1831, "step": 42645 }, { "epoch": 2.8978122027449382, "grad_norm": 1.826034665107727, "learning_rate": 6.379008696833809e-05, "loss": 2.9061, "step": 42650 }, { "epoch": 2.8981519228156, "grad_norm": 2.3546698093414307, "learning_rate": 6.378584046745481e-05, "loss": 2.9096, "step": 42655 }, { "epoch": 2.8984916428862615, "grad_norm": 2.829352617263794, "learning_rate": 6.378159396657156e-05, "loss": 2.8983, "step": 42660 }, { "epoch": 2.8988313629569236, "grad_norm": 2.5470399856567383, "learning_rate": 6.377734746568827e-05, "loss": 3.069, "step": 42665 }, { "epoch": 2.899171083027585, "grad_norm": 2.7082109451293945, "learning_rate": 6.3773100964805e-05, "loss": 2.7477, "step": 42670 }, { "epoch": 2.899510803098247, "grad_norm": 2.406224250793457, "learning_rate": 6.376885446392174e-05, "loss": 2.9585, "step": 42675 }, { "epoch": 2.899850523168909, "grad_norm": 2.862260103225708, "learning_rate": 6.376460796303845e-05, "loss": 2.8541, "step": 42680 }, { "epoch": 2.9001902432395705, "grad_norm": 3.009321928024292, "learning_rate": 6.376036146215518e-05, "loss": 2.9235, "step": 42685 }, { "epoch": 2.900529963310232, "grad_norm": 2.966660737991333, "learning_rate": 6.375611496127192e-05, "loss": 2.7794, "step": 42690 }, { "epoch": 2.9008696833808942, "grad_norm": 2.590205430984497, "learning_rate": 6.375186846038864e-05, "loss": 2.9247, "step": 42695 }, { "epoch": 2.901209403451556, "grad_norm": 2.357530355453491, "learning_rate": 6.374762195950537e-05, "loss": 2.8064, "step": 42700 }, { "epoch": 2.9015491235222175, "grad_norm": 3.0211453437805176, "learning_rate": 6.37433754586221e-05, "loss": 3.1684, "step": 42705 }, { "epoch": 2.9018888435928796, "grad_norm": 2.6337366104125977, "learning_rate": 6.373912895773882e-05, "loss": 2.9375, "step": 42710 }, { "epoch": 2.902228563663541, "grad_norm": 2.5670831203460693, "learning_rate": 6.373488245685555e-05, "loss": 2.83, "step": 42715 }, { "epoch": 2.902568283734203, "grad_norm": 2.681105852127075, "learning_rate": 6.373063595597228e-05, "loss": 2.7457, "step": 42720 }, { "epoch": 2.902908003804865, "grad_norm": 3.2885146141052246, "learning_rate": 6.372638945508901e-05, "loss": 2.9336, "step": 42725 }, { "epoch": 2.9032477238755265, "grad_norm": 2.730564832687378, "learning_rate": 6.372214295420573e-05, "loss": 2.8601, "step": 42730 }, { "epoch": 2.903587443946188, "grad_norm": 2.087347984313965, "learning_rate": 6.371789645332246e-05, "loss": 2.9232, "step": 42735 }, { "epoch": 2.9039271640168502, "grad_norm": 3.414072036743164, "learning_rate": 6.371364995243919e-05, "loss": 2.8434, "step": 42740 }, { "epoch": 2.904266884087512, "grad_norm": 2.7909038066864014, "learning_rate": 6.370940345155592e-05, "loss": 2.7083, "step": 42745 }, { "epoch": 2.9046066041581735, "grad_norm": 2.3120107650756836, "learning_rate": 6.370515695067265e-05, "loss": 2.8208, "step": 42750 }, { "epoch": 2.9049463242288356, "grad_norm": 2.299196243286133, "learning_rate": 6.370091044978937e-05, "loss": 2.756, "step": 42755 }, { "epoch": 2.905286044299497, "grad_norm": 2.8555119037628174, "learning_rate": 6.36966639489061e-05, "loss": 2.9248, "step": 42760 }, { "epoch": 2.905625764370159, "grad_norm": 2.811492443084717, "learning_rate": 6.369241744802283e-05, "loss": 2.8161, "step": 42765 }, { "epoch": 2.905965484440821, "grad_norm": 1.9016971588134766, "learning_rate": 6.368817094713956e-05, "loss": 2.9139, "step": 42770 }, { "epoch": 2.9063052045114826, "grad_norm": 2.2157418727874756, "learning_rate": 6.368392444625629e-05, "loss": 3.1089, "step": 42775 }, { "epoch": 2.906644924582144, "grad_norm": 3.4433884620666504, "learning_rate": 6.367967794537301e-05, "loss": 2.9751, "step": 42780 }, { "epoch": 2.9069846446528063, "grad_norm": 2.376004695892334, "learning_rate": 6.367543144448974e-05, "loss": 2.6181, "step": 42785 }, { "epoch": 2.907324364723468, "grad_norm": 2.470858573913574, "learning_rate": 6.367118494360647e-05, "loss": 2.8412, "step": 42790 }, { "epoch": 2.9076640847941295, "grad_norm": 2.531801700592041, "learning_rate": 6.36669384427232e-05, "loss": 2.784, "step": 42795 }, { "epoch": 2.9080038048647916, "grad_norm": 2.175307273864746, "learning_rate": 6.366269194183993e-05, "loss": 2.9428, "step": 42800 }, { "epoch": 2.9083435249354532, "grad_norm": 3.2641522884368896, "learning_rate": 6.365844544095666e-05, "loss": 2.8295, "step": 42805 }, { "epoch": 2.908683245006115, "grad_norm": 2.4607620239257812, "learning_rate": 6.365419894007338e-05, "loss": 3.0663, "step": 42810 }, { "epoch": 2.909022965076777, "grad_norm": 2.373431444168091, "learning_rate": 6.364995243919011e-05, "loss": 3.0383, "step": 42815 }, { "epoch": 2.9093626851474386, "grad_norm": 2.1411023139953613, "learning_rate": 6.364570593830684e-05, "loss": 2.7878, "step": 42820 }, { "epoch": 2.9097024052181, "grad_norm": 2.241614818572998, "learning_rate": 6.364145943742357e-05, "loss": 2.89, "step": 42825 }, { "epoch": 2.9100421252887623, "grad_norm": 1.903334379196167, "learning_rate": 6.36372129365403e-05, "loss": 2.8315, "step": 42830 }, { "epoch": 2.910381845359424, "grad_norm": 2.546024799346924, "learning_rate": 6.363296643565702e-05, "loss": 3.0155, "step": 42835 }, { "epoch": 2.9107215654300855, "grad_norm": 2.237835168838501, "learning_rate": 6.362871993477375e-05, "loss": 3.0227, "step": 42840 }, { "epoch": 2.9110612855007476, "grad_norm": 2.0416884422302246, "learning_rate": 6.362447343389048e-05, "loss": 2.8689, "step": 42845 }, { "epoch": 2.9114010055714092, "grad_norm": 2.5653765201568604, "learning_rate": 6.362022693300721e-05, "loss": 2.8174, "step": 42850 }, { "epoch": 2.911740725642071, "grad_norm": 3.047692060470581, "learning_rate": 6.361598043212394e-05, "loss": 2.8751, "step": 42855 }, { "epoch": 2.912080445712733, "grad_norm": 2.9215328693389893, "learning_rate": 6.361173393124066e-05, "loss": 2.895, "step": 42860 }, { "epoch": 2.9124201657833946, "grad_norm": 2.808067798614502, "learning_rate": 6.360748743035739e-05, "loss": 3.0477, "step": 42865 }, { "epoch": 2.912759885854056, "grad_norm": 2.919793128967285, "learning_rate": 6.360324092947412e-05, "loss": 2.9139, "step": 42870 }, { "epoch": 2.9130996059247183, "grad_norm": 2.492488145828247, "learning_rate": 6.359899442859085e-05, "loss": 2.9184, "step": 42875 }, { "epoch": 2.91343932599538, "grad_norm": 2.000537157058716, "learning_rate": 6.359474792770758e-05, "loss": 2.873, "step": 42880 }, { "epoch": 2.9137790460660415, "grad_norm": 2.1350104808807373, "learning_rate": 6.35905014268243e-05, "loss": 3.2367, "step": 42885 }, { "epoch": 2.9141187661367036, "grad_norm": 2.4465792179107666, "learning_rate": 6.358625492594103e-05, "loss": 2.7766, "step": 42890 }, { "epoch": 2.9144584862073653, "grad_norm": 2.370462417602539, "learning_rate": 6.358200842505776e-05, "loss": 2.9266, "step": 42895 }, { "epoch": 2.914798206278027, "grad_norm": 2.8273375034332275, "learning_rate": 6.357776192417449e-05, "loss": 2.662, "step": 42900 }, { "epoch": 2.915137926348689, "grad_norm": 2.0778305530548096, "learning_rate": 6.35735154232912e-05, "loss": 3.0374, "step": 42905 }, { "epoch": 2.9154776464193506, "grad_norm": 2.5056045055389404, "learning_rate": 6.356926892240794e-05, "loss": 2.8407, "step": 42910 }, { "epoch": 2.915817366490012, "grad_norm": 2.0039525032043457, "learning_rate": 6.356502242152467e-05, "loss": 2.9541, "step": 42915 }, { "epoch": 2.916157086560674, "grad_norm": 3.165757656097412, "learning_rate": 6.356077592064139e-05, "loss": 2.9218, "step": 42920 }, { "epoch": 2.916496806631336, "grad_norm": 2.7333691120147705, "learning_rate": 6.355652941975813e-05, "loss": 2.8425, "step": 42925 }, { "epoch": 2.9168365267019976, "grad_norm": 2.4824376106262207, "learning_rate": 6.355228291887486e-05, "loss": 2.7608, "step": 42930 }, { "epoch": 2.917176246772659, "grad_norm": 2.7838735580444336, "learning_rate": 6.354803641799157e-05, "loss": 2.9769, "step": 42935 }, { "epoch": 2.9175159668433213, "grad_norm": 2.259155750274658, "learning_rate": 6.354378991710831e-05, "loss": 3.074, "step": 42940 }, { "epoch": 2.917855686913983, "grad_norm": 1.9683998823165894, "learning_rate": 6.353954341622504e-05, "loss": 3.0854, "step": 42945 }, { "epoch": 2.9181954069846445, "grad_norm": 2.452773332595825, "learning_rate": 6.353529691534175e-05, "loss": 2.7691, "step": 42950 }, { "epoch": 2.918535127055306, "grad_norm": 2.9236342906951904, "learning_rate": 6.35310504144585e-05, "loss": 2.7306, "step": 42955 }, { "epoch": 2.9188748471259682, "grad_norm": 2.681786060333252, "learning_rate": 6.352680391357522e-05, "loss": 2.8184, "step": 42960 }, { "epoch": 2.91921456719663, "grad_norm": 2.294116258621216, "learning_rate": 6.352255741269194e-05, "loss": 3.0274, "step": 42965 }, { "epoch": 2.9195542872672915, "grad_norm": 3.2736148834228516, "learning_rate": 6.351831091180868e-05, "loss": 3.025, "step": 42970 }, { "epoch": 2.9198940073379536, "grad_norm": 2.139836072921753, "learning_rate": 6.35140644109254e-05, "loss": 2.7493, "step": 42975 }, { "epoch": 2.920233727408615, "grad_norm": 2.2349212169647217, "learning_rate": 6.350981791004212e-05, "loss": 3.0556, "step": 42980 }, { "epoch": 2.920573447479277, "grad_norm": 2.960188150405884, "learning_rate": 6.350557140915886e-05, "loss": 2.7569, "step": 42985 }, { "epoch": 2.920913167549939, "grad_norm": 3.2165050506591797, "learning_rate": 6.350132490827558e-05, "loss": 2.9504, "step": 42990 }, { "epoch": 2.9212528876206005, "grad_norm": 2.0773661136627197, "learning_rate": 6.34970784073923e-05, "loss": 2.9503, "step": 42995 }, { "epoch": 2.921592607691262, "grad_norm": 2.158926010131836, "learning_rate": 6.349283190650905e-05, "loss": 2.9059, "step": 43000 }, { "epoch": 2.9219323277619242, "grad_norm": 2.513636827468872, "learning_rate": 6.348858540562576e-05, "loss": 3.1532, "step": 43005 }, { "epoch": 2.922272047832586, "grad_norm": 1.8973510265350342, "learning_rate": 6.348433890474249e-05, "loss": 2.9038, "step": 43010 }, { "epoch": 2.9226117679032475, "grad_norm": 2.600379705429077, "learning_rate": 6.348009240385923e-05, "loss": 2.5417, "step": 43015 }, { "epoch": 2.9229514879739096, "grad_norm": 2.9058995246887207, "learning_rate": 6.347584590297595e-05, "loss": 2.8264, "step": 43020 }, { "epoch": 2.923291208044571, "grad_norm": 2.3049614429473877, "learning_rate": 6.347159940209267e-05, "loss": 3.0184, "step": 43025 }, { "epoch": 2.923630928115233, "grad_norm": 10.728985786437988, "learning_rate": 6.346735290120942e-05, "loss": 2.8307, "step": 43030 }, { "epoch": 2.923970648185895, "grad_norm": 2.0527853965759277, "learning_rate": 6.346310640032613e-05, "loss": 2.743, "step": 43035 }, { "epoch": 2.9243103682565565, "grad_norm": 2.68605899810791, "learning_rate": 6.345885989944286e-05, "loss": 2.9973, "step": 43040 }, { "epoch": 2.924650088327218, "grad_norm": 2.482227325439453, "learning_rate": 6.345461339855959e-05, "loss": 3.0287, "step": 43045 }, { "epoch": 2.9249898083978803, "grad_norm": 2.2620441913604736, "learning_rate": 6.345036689767631e-05, "loss": 3.0198, "step": 43050 }, { "epoch": 2.925329528468542, "grad_norm": 2.494870662689209, "learning_rate": 6.344612039679304e-05, "loss": 2.7754, "step": 43055 }, { "epoch": 2.9256692485392035, "grad_norm": 2.4927690029144287, "learning_rate": 6.344187389590977e-05, "loss": 2.8307, "step": 43060 }, { "epoch": 2.9260089686098656, "grad_norm": 2.370493173599243, "learning_rate": 6.34376273950265e-05, "loss": 3.0575, "step": 43065 }, { "epoch": 2.926348688680527, "grad_norm": 2.65486216545105, "learning_rate": 6.343338089414323e-05, "loss": 2.7006, "step": 43070 }, { "epoch": 2.926688408751189, "grad_norm": 3.183997392654419, "learning_rate": 6.342913439325995e-05, "loss": 2.9067, "step": 43075 }, { "epoch": 2.927028128821851, "grad_norm": 3.3517072200775146, "learning_rate": 6.342488789237668e-05, "loss": 2.9482, "step": 43080 }, { "epoch": 2.9273678488925126, "grad_norm": 2.780806541442871, "learning_rate": 6.342064139149341e-05, "loss": 2.9847, "step": 43085 }, { "epoch": 2.927707568963174, "grad_norm": 2.150066375732422, "learning_rate": 6.341639489061014e-05, "loss": 2.6754, "step": 43090 }, { "epoch": 2.9280472890338363, "grad_norm": 2.2423152923583984, "learning_rate": 6.341214838972687e-05, "loss": 2.6866, "step": 43095 }, { "epoch": 2.928387009104498, "grad_norm": 2.6974401473999023, "learning_rate": 6.34079018888436e-05, "loss": 2.7768, "step": 43100 }, { "epoch": 2.9287267291751595, "grad_norm": 3.1187562942504883, "learning_rate": 6.340365538796032e-05, "loss": 2.8514, "step": 43105 }, { "epoch": 2.9290664492458216, "grad_norm": 2.2111377716064453, "learning_rate": 6.339940888707705e-05, "loss": 2.8802, "step": 43110 }, { "epoch": 2.9294061693164832, "grad_norm": 2.4621052742004395, "learning_rate": 6.339516238619379e-05, "loss": 3.1268, "step": 43115 }, { "epoch": 2.929745889387145, "grad_norm": 2.816563129425049, "learning_rate": 6.33909158853105e-05, "loss": 2.6262, "step": 43120 }, { "epoch": 2.930085609457807, "grad_norm": 1.9172126054763794, "learning_rate": 6.338666938442723e-05, "loss": 3.1015, "step": 43125 }, { "epoch": 2.9304253295284686, "grad_norm": 2.972682237625122, "learning_rate": 6.338242288354396e-05, "loss": 2.8274, "step": 43130 }, { "epoch": 2.93076504959913, "grad_norm": 2.4529919624328613, "learning_rate": 6.337817638266069e-05, "loss": 2.4313, "step": 43135 }, { "epoch": 2.9311047696697923, "grad_norm": 2.4640016555786133, "learning_rate": 6.337392988177742e-05, "loss": 2.7124, "step": 43140 }, { "epoch": 2.931444489740454, "grad_norm": 1.9889428615570068, "learning_rate": 6.336968338089415e-05, "loss": 3.1961, "step": 43145 }, { "epoch": 2.9317842098111155, "grad_norm": 2.5904924869537354, "learning_rate": 6.336543688001087e-05, "loss": 3.1429, "step": 43150 }, { "epoch": 2.9321239298817776, "grad_norm": 2.9119553565979004, "learning_rate": 6.33611903791276e-05, "loss": 2.9641, "step": 43155 }, { "epoch": 2.9324636499524392, "grad_norm": 2.802194833755493, "learning_rate": 6.335694387824433e-05, "loss": 2.9945, "step": 43160 }, { "epoch": 2.932803370023101, "grad_norm": 2.0449118614196777, "learning_rate": 6.335269737736106e-05, "loss": 2.9299, "step": 43165 }, { "epoch": 2.933143090093763, "grad_norm": 3.0839691162109375, "learning_rate": 6.334845087647779e-05, "loss": 2.541, "step": 43170 }, { "epoch": 2.9334828101644246, "grad_norm": 3.0113909244537354, "learning_rate": 6.334420437559451e-05, "loss": 2.9088, "step": 43175 }, { "epoch": 2.933822530235086, "grad_norm": 3.385293960571289, "learning_rate": 6.333995787471124e-05, "loss": 3.0459, "step": 43180 }, { "epoch": 2.9341622503057483, "grad_norm": 2.295408248901367, "learning_rate": 6.333571137382797e-05, "loss": 2.8665, "step": 43185 }, { "epoch": 2.93450197037641, "grad_norm": 2.8694491386413574, "learning_rate": 6.33314648729447e-05, "loss": 2.9281, "step": 43190 }, { "epoch": 2.9348416904470715, "grad_norm": 3.023141384124756, "learning_rate": 6.332721837206143e-05, "loss": 2.7896, "step": 43195 }, { "epoch": 2.9351814105177336, "grad_norm": 2.8131825923919678, "learning_rate": 6.332297187117815e-05, "loss": 2.942, "step": 43200 }, { "epoch": 2.9355211305883953, "grad_norm": 3.0802853107452393, "learning_rate": 6.331872537029488e-05, "loss": 3.0019, "step": 43205 }, { "epoch": 2.935860850659057, "grad_norm": 2.3259482383728027, "learning_rate": 6.331447886941161e-05, "loss": 2.9986, "step": 43210 }, { "epoch": 2.936200570729719, "grad_norm": 2.3877856731414795, "learning_rate": 6.331023236852834e-05, "loss": 2.9524, "step": 43215 }, { "epoch": 2.9365402908003806, "grad_norm": 2.2158284187316895, "learning_rate": 6.330598586764507e-05, "loss": 2.9328, "step": 43220 }, { "epoch": 2.9368800108710422, "grad_norm": 2.3816816806793213, "learning_rate": 6.33017393667618e-05, "loss": 2.8693, "step": 43225 }, { "epoch": 2.9372197309417043, "grad_norm": 2.766390323638916, "learning_rate": 6.329749286587852e-05, "loss": 2.847, "step": 43230 }, { "epoch": 2.937559451012366, "grad_norm": 2.726473569869995, "learning_rate": 6.329324636499525e-05, "loss": 2.769, "step": 43235 }, { "epoch": 2.9378991710830276, "grad_norm": 2.82566499710083, "learning_rate": 6.328899986411198e-05, "loss": 2.925, "step": 43240 }, { "epoch": 2.9382388911536896, "grad_norm": 2.173187494277954, "learning_rate": 6.328475336322869e-05, "loss": 2.7865, "step": 43245 }, { "epoch": 2.9385786112243513, "grad_norm": 2.1216158866882324, "learning_rate": 6.328050686234543e-05, "loss": 2.8729, "step": 43250 }, { "epoch": 2.938918331295013, "grad_norm": 2.6849758625030518, "learning_rate": 6.327626036146216e-05, "loss": 2.9976, "step": 43255 }, { "epoch": 2.9392580513656745, "grad_norm": 2.536841630935669, "learning_rate": 6.327201386057888e-05, "loss": 2.8386, "step": 43260 }, { "epoch": 2.9395977714363366, "grad_norm": 3.0899136066436768, "learning_rate": 6.326776735969562e-05, "loss": 2.7906, "step": 43265 }, { "epoch": 2.9399374915069982, "grad_norm": 2.432621955871582, "learning_rate": 6.326352085881235e-05, "loss": 2.931, "step": 43270 }, { "epoch": 2.94027721157766, "grad_norm": 2.0455474853515625, "learning_rate": 6.325927435792906e-05, "loss": 2.73, "step": 43275 }, { "epoch": 2.940616931648322, "grad_norm": 3.2633211612701416, "learning_rate": 6.32550278570458e-05, "loss": 2.9331, "step": 43280 }, { "epoch": 2.9409566517189836, "grad_norm": 2.7019169330596924, "learning_rate": 6.325078135616253e-05, "loss": 2.9192, "step": 43285 }, { "epoch": 2.941296371789645, "grad_norm": 2.0277504920959473, "learning_rate": 6.324653485527924e-05, "loss": 2.9421, "step": 43290 }, { "epoch": 2.941636091860307, "grad_norm": 1.9713400602340698, "learning_rate": 6.324228835439599e-05, "loss": 2.9884, "step": 43295 }, { "epoch": 2.941975811930969, "grad_norm": 2.627044677734375, "learning_rate": 6.323804185351271e-05, "loss": 3.0583, "step": 43300 }, { "epoch": 2.9423155320016305, "grad_norm": 2.0527448654174805, "learning_rate": 6.323379535262943e-05, "loss": 2.6098, "step": 43305 }, { "epoch": 2.942655252072292, "grad_norm": 2.2679405212402344, "learning_rate": 6.322954885174617e-05, "loss": 2.9053, "step": 43310 }, { "epoch": 2.9429949721429542, "grad_norm": 2.694499969482422, "learning_rate": 6.32253023508629e-05, "loss": 2.7053, "step": 43315 }, { "epoch": 2.943334692213616, "grad_norm": 2.882115364074707, "learning_rate": 6.322105584997961e-05, "loss": 2.9821, "step": 43320 }, { "epoch": 2.9436744122842775, "grad_norm": 2.865227222442627, "learning_rate": 6.321680934909635e-05, "loss": 2.6854, "step": 43325 }, { "epoch": 2.9440141323549396, "grad_norm": 2.7139840126037598, "learning_rate": 6.321256284821307e-05, "loss": 2.796, "step": 43330 }, { "epoch": 2.944353852425601, "grad_norm": 2.225299119949341, "learning_rate": 6.32083163473298e-05, "loss": 2.8714, "step": 43335 }, { "epoch": 2.944693572496263, "grad_norm": 2.0879437923431396, "learning_rate": 6.320406984644654e-05, "loss": 3.213, "step": 43340 }, { "epoch": 2.945033292566925, "grad_norm": 2.7704555988311768, "learning_rate": 6.319982334556325e-05, "loss": 2.9431, "step": 43345 }, { "epoch": 2.9453730126375866, "grad_norm": 2.4820826053619385, "learning_rate": 6.319557684467998e-05, "loss": 2.9517, "step": 43350 }, { "epoch": 2.945712732708248, "grad_norm": 2.592560291290283, "learning_rate": 6.319133034379672e-05, "loss": 2.9859, "step": 43355 }, { "epoch": 2.9460524527789103, "grad_norm": 2.277372121810913, "learning_rate": 6.318708384291344e-05, "loss": 2.8529, "step": 43360 }, { "epoch": 2.946392172849572, "grad_norm": 2.2645270824432373, "learning_rate": 6.318283734203016e-05, "loss": 2.8192, "step": 43365 }, { "epoch": 2.9467318929202335, "grad_norm": 3.022289276123047, "learning_rate": 6.31785908411469e-05, "loss": 3.133, "step": 43370 }, { "epoch": 2.9470716129908956, "grad_norm": 2.3428568840026855, "learning_rate": 6.317434434026362e-05, "loss": 2.9805, "step": 43375 }, { "epoch": 2.9474113330615572, "grad_norm": 2.501491069793701, "learning_rate": 6.317009783938035e-05, "loss": 2.7155, "step": 43380 }, { "epoch": 2.947751053132219, "grad_norm": 2.4294042587280273, "learning_rate": 6.316585133849709e-05, "loss": 2.7876, "step": 43385 }, { "epoch": 2.948090773202881, "grad_norm": 2.3899755477905273, "learning_rate": 6.31616048376138e-05, "loss": 2.8115, "step": 43390 }, { "epoch": 2.9484304932735426, "grad_norm": 2.3364574909210205, "learning_rate": 6.315735833673053e-05, "loss": 2.9227, "step": 43395 }, { "epoch": 2.948770213344204, "grad_norm": 2.399021863937378, "learning_rate": 6.315311183584726e-05, "loss": 2.9328, "step": 43400 }, { "epoch": 2.9491099334148663, "grad_norm": 2.3216590881347656, "learning_rate": 6.314886533496399e-05, "loss": 2.7227, "step": 43405 }, { "epoch": 2.949449653485528, "grad_norm": 3.714038372039795, "learning_rate": 6.314461883408072e-05, "loss": 2.6603, "step": 43410 }, { "epoch": 2.9497893735561895, "grad_norm": 2.296725034713745, "learning_rate": 6.314037233319744e-05, "loss": 2.845, "step": 43415 }, { "epoch": 2.9501290936268516, "grad_norm": 2.579305410385132, "learning_rate": 6.313612583231417e-05, "loss": 2.7024, "step": 43420 }, { "epoch": 2.9504688136975132, "grad_norm": 2.5859363079071045, "learning_rate": 6.31318793314309e-05, "loss": 2.9255, "step": 43425 }, { "epoch": 2.950808533768175, "grad_norm": 3.3377249240875244, "learning_rate": 6.312763283054763e-05, "loss": 3.0615, "step": 43430 }, { "epoch": 2.951148253838837, "grad_norm": 3.21527099609375, "learning_rate": 6.312338632966436e-05, "loss": 3.0399, "step": 43435 }, { "epoch": 2.9514879739094986, "grad_norm": 2.441737413406372, "learning_rate": 6.311913982878108e-05, "loss": 3.0567, "step": 43440 }, { "epoch": 2.95182769398016, "grad_norm": 2.572805881500244, "learning_rate": 6.311489332789781e-05, "loss": 3.0194, "step": 43445 }, { "epoch": 2.9521674140508223, "grad_norm": 2.7967517375946045, "learning_rate": 6.311064682701454e-05, "loss": 2.6892, "step": 43450 }, { "epoch": 2.952507134121484, "grad_norm": 3.1797385215759277, "learning_rate": 6.310640032613128e-05, "loss": 2.6515, "step": 43455 }, { "epoch": 2.9528468541921455, "grad_norm": 2.39595103263855, "learning_rate": 6.3102153825248e-05, "loss": 2.9515, "step": 43460 }, { "epoch": 2.9531865742628076, "grad_norm": 2.1616830825805664, "learning_rate": 6.309790732436472e-05, "loss": 2.8582, "step": 43465 }, { "epoch": 2.9535262943334692, "grad_norm": 2.7316129207611084, "learning_rate": 6.309366082348145e-05, "loss": 3.0753, "step": 43470 }, { "epoch": 2.953866014404131, "grad_norm": 2.6425704956054688, "learning_rate": 6.308941432259818e-05, "loss": 2.8106, "step": 43475 }, { "epoch": 2.954205734474793, "grad_norm": 3.6827657222747803, "learning_rate": 6.308516782171491e-05, "loss": 2.9449, "step": 43480 }, { "epoch": 2.9545454545454546, "grad_norm": 1.7239692211151123, "learning_rate": 6.308092132083164e-05, "loss": 3.0702, "step": 43485 }, { "epoch": 2.954885174616116, "grad_norm": 2.322948455810547, "learning_rate": 6.307667481994836e-05, "loss": 3.0854, "step": 43490 }, { "epoch": 2.9552248946867783, "grad_norm": 2.4289119243621826, "learning_rate": 6.307242831906509e-05, "loss": 2.7504, "step": 43495 }, { "epoch": 2.95556461475744, "grad_norm": 2.6094188690185547, "learning_rate": 6.306818181818182e-05, "loss": 2.8131, "step": 43500 }, { "epoch": 2.9559043348281016, "grad_norm": 2.251466751098633, "learning_rate": 6.306393531729855e-05, "loss": 2.6623, "step": 43505 }, { "epoch": 2.9562440548987636, "grad_norm": 2.5696821212768555, "learning_rate": 6.305968881641528e-05, "loss": 2.9544, "step": 43510 }, { "epoch": 2.9565837749694253, "grad_norm": 2.497166633605957, "learning_rate": 6.3055442315532e-05, "loss": 2.3506, "step": 43515 }, { "epoch": 2.956923495040087, "grad_norm": 2.0636487007141113, "learning_rate": 6.305119581464873e-05, "loss": 2.7172, "step": 43520 }, { "epoch": 2.957263215110749, "grad_norm": 2.368767499923706, "learning_rate": 6.304694931376546e-05, "loss": 2.8507, "step": 43525 }, { "epoch": 2.9576029351814106, "grad_norm": 2.841017961502075, "learning_rate": 6.304270281288219e-05, "loss": 3.0685, "step": 43530 }, { "epoch": 2.9579426552520722, "grad_norm": 2.4968183040618896, "learning_rate": 6.303845631199892e-05, "loss": 2.6671, "step": 43535 }, { "epoch": 2.9582823753227343, "grad_norm": 2.8350741863250732, "learning_rate": 6.303420981111565e-05, "loss": 2.7609, "step": 43540 }, { "epoch": 2.958622095393396, "grad_norm": 2.4903788566589355, "learning_rate": 6.302996331023237e-05, "loss": 3.039, "step": 43545 }, { "epoch": 2.9589618154640576, "grad_norm": 2.471454620361328, "learning_rate": 6.30257168093491e-05, "loss": 3.014, "step": 43550 }, { "epoch": 2.9593015355347196, "grad_norm": 2.6847634315490723, "learning_rate": 6.302147030846583e-05, "loss": 2.4716, "step": 43555 }, { "epoch": 2.9596412556053813, "grad_norm": 2.8549342155456543, "learning_rate": 6.301722380758256e-05, "loss": 2.8611, "step": 43560 }, { "epoch": 2.959980975676043, "grad_norm": 2.5478858947753906, "learning_rate": 6.301297730669929e-05, "loss": 2.5325, "step": 43565 }, { "epoch": 2.960320695746705, "grad_norm": 3.203347682952881, "learning_rate": 6.300873080581601e-05, "loss": 2.6735, "step": 43570 }, { "epoch": 2.9606604158173666, "grad_norm": 2.6403164863586426, "learning_rate": 6.300448430493274e-05, "loss": 2.9435, "step": 43575 }, { "epoch": 2.9610001358880282, "grad_norm": 2.842076301574707, "learning_rate": 6.300023780404947e-05, "loss": 2.9122, "step": 43580 }, { "epoch": 2.9613398559586903, "grad_norm": 3.105372667312622, "learning_rate": 6.29959913031662e-05, "loss": 2.8843, "step": 43585 }, { "epoch": 2.961679576029352, "grad_norm": 2.025050640106201, "learning_rate": 6.299174480228293e-05, "loss": 3.1069, "step": 43590 }, { "epoch": 2.9620192961000136, "grad_norm": 2.532238245010376, "learning_rate": 6.298749830139965e-05, "loss": 2.9374, "step": 43595 }, { "epoch": 2.962359016170675, "grad_norm": 2.429966449737549, "learning_rate": 6.298325180051637e-05, "loss": 3.0115, "step": 43600 }, { "epoch": 2.9626987362413373, "grad_norm": 2.6659014225006104, "learning_rate": 6.297900529963311e-05, "loss": 2.9745, "step": 43605 }, { "epoch": 2.963038456311999, "grad_norm": 2.7694625854492188, "learning_rate": 6.297475879874984e-05, "loss": 2.9184, "step": 43610 }, { "epoch": 2.9633781763826605, "grad_norm": 2.556886672973633, "learning_rate": 6.297051229786655e-05, "loss": 2.706, "step": 43615 }, { "epoch": 2.9637178964533226, "grad_norm": 2.5256528854370117, "learning_rate": 6.296626579698329e-05, "loss": 2.9665, "step": 43620 }, { "epoch": 2.9640576165239843, "grad_norm": 2.4932057857513428, "learning_rate": 6.296286859627668e-05, "loss": 3.0581, "step": 43625 }, { "epoch": 2.964397336594646, "grad_norm": 2.669119119644165, "learning_rate": 6.295862209539339e-05, "loss": 2.671, "step": 43630 }, { "epoch": 2.9647370566653075, "grad_norm": 3.0221590995788574, "learning_rate": 6.295437559451013e-05, "loss": 2.8973, "step": 43635 }, { "epoch": 2.9650767767359696, "grad_norm": 3.0703744888305664, "learning_rate": 6.295012909362686e-05, "loss": 2.9576, "step": 43640 }, { "epoch": 2.965416496806631, "grad_norm": 2.3150134086608887, "learning_rate": 6.294588259274357e-05, "loss": 2.86, "step": 43645 }, { "epoch": 2.965756216877293, "grad_norm": 2.537323236465454, "learning_rate": 6.294163609186032e-05, "loss": 2.9235, "step": 43650 }, { "epoch": 2.966095936947955, "grad_norm": 2.178290605545044, "learning_rate": 6.293738959097703e-05, "loss": 3.0093, "step": 43655 }, { "epoch": 2.9664356570186166, "grad_norm": 2.388913154602051, "learning_rate": 6.293314309009377e-05, "loss": 2.961, "step": 43660 }, { "epoch": 2.966775377089278, "grad_norm": 2.870378255844116, "learning_rate": 6.29288965892105e-05, "loss": 2.9914, "step": 43665 }, { "epoch": 2.9671150971599403, "grad_norm": 2.750117301940918, "learning_rate": 6.292465008832721e-05, "loss": 3.0924, "step": 43670 }, { "epoch": 2.967454817230602, "grad_norm": 2.399247884750366, "learning_rate": 6.292040358744396e-05, "loss": 2.8835, "step": 43675 }, { "epoch": 2.9677945373012635, "grad_norm": 2.588620662689209, "learning_rate": 6.291615708656068e-05, "loss": 2.7579, "step": 43680 }, { "epoch": 2.9681342573719256, "grad_norm": 2.214627265930176, "learning_rate": 6.29119105856774e-05, "loss": 2.9608, "step": 43685 }, { "epoch": 2.9684739774425872, "grad_norm": 2.442462682723999, "learning_rate": 6.290766408479414e-05, "loss": 2.6808, "step": 43690 }, { "epoch": 2.968813697513249, "grad_norm": 2.9900472164154053, "learning_rate": 6.290341758391087e-05, "loss": 2.9437, "step": 43695 }, { "epoch": 2.969153417583911, "grad_norm": 3.1464250087738037, "learning_rate": 6.289917108302758e-05, "loss": 2.968, "step": 43700 }, { "epoch": 2.9694931376545726, "grad_norm": 2.5165774822235107, "learning_rate": 6.289492458214432e-05, "loss": 2.7649, "step": 43705 }, { "epoch": 2.969832857725234, "grad_norm": 2.4301364421844482, "learning_rate": 6.289067808126105e-05, "loss": 2.8473, "step": 43710 }, { "epoch": 2.9701725777958963, "grad_norm": 2.8239126205444336, "learning_rate": 6.288643158037777e-05, "loss": 2.7647, "step": 43715 }, { "epoch": 2.970512297866558, "grad_norm": 2.444150924682617, "learning_rate": 6.288218507949451e-05, "loss": 2.6592, "step": 43720 }, { "epoch": 2.9708520179372195, "grad_norm": 2.6786327362060547, "learning_rate": 6.287793857861122e-05, "loss": 2.7567, "step": 43725 }, { "epoch": 2.9711917380078816, "grad_norm": 3.554654121398926, "learning_rate": 6.287369207772795e-05, "loss": 2.9391, "step": 43730 }, { "epoch": 2.9715314580785432, "grad_norm": 2.7705936431884766, "learning_rate": 6.286944557684469e-05, "loss": 2.7993, "step": 43735 }, { "epoch": 2.971871178149205, "grad_norm": 2.884589195251465, "learning_rate": 6.28651990759614e-05, "loss": 2.8092, "step": 43740 }, { "epoch": 2.972210898219867, "grad_norm": 2.6871585845947266, "learning_rate": 6.286095257507813e-05, "loss": 3.0513, "step": 43745 }, { "epoch": 2.9725506182905286, "grad_norm": 2.528822660446167, "learning_rate": 6.285670607419488e-05, "loss": 2.9363, "step": 43750 }, { "epoch": 2.97289033836119, "grad_norm": 3.0709316730499268, "learning_rate": 6.285245957331159e-05, "loss": 2.8959, "step": 43755 }, { "epoch": 2.9732300584318523, "grad_norm": 2.5555033683776855, "learning_rate": 6.284821307242832e-05, "loss": 3.0097, "step": 43760 }, { "epoch": 2.973569778502514, "grad_norm": 3.3990583419799805, "learning_rate": 6.284396657154506e-05, "loss": 2.6562, "step": 43765 }, { "epoch": 2.9739094985731755, "grad_norm": 2.0062661170959473, "learning_rate": 6.283972007066177e-05, "loss": 2.9625, "step": 43770 }, { "epoch": 2.9742492186438376, "grad_norm": 3.002354383468628, "learning_rate": 6.28354735697785e-05, "loss": 2.7489, "step": 43775 }, { "epoch": 2.9745889387144993, "grad_norm": 2.4868950843811035, "learning_rate": 6.283122706889524e-05, "loss": 2.9703, "step": 43780 }, { "epoch": 2.974928658785161, "grad_norm": 2.0465188026428223, "learning_rate": 6.282698056801196e-05, "loss": 2.937, "step": 43785 }, { "epoch": 2.975268378855823, "grad_norm": 2.3584482669830322, "learning_rate": 6.282273406712869e-05, "loss": 2.9676, "step": 43790 }, { "epoch": 2.9756080989264846, "grad_norm": 2.933183431625366, "learning_rate": 6.281848756624541e-05, "loss": 2.9251, "step": 43795 }, { "epoch": 2.975947818997146, "grad_norm": 2.1981115341186523, "learning_rate": 6.281424106536214e-05, "loss": 2.8811, "step": 43800 }, { "epoch": 2.9762875390678083, "grad_norm": 2.4321842193603516, "learning_rate": 6.280999456447887e-05, "loss": 2.8602, "step": 43805 }, { "epoch": 2.97662725913847, "grad_norm": 2.843501567840576, "learning_rate": 6.28057480635956e-05, "loss": 2.8197, "step": 43810 }, { "epoch": 2.9769669792091316, "grad_norm": 3.009243965148926, "learning_rate": 6.280150156271233e-05, "loss": 3.0237, "step": 43815 }, { "epoch": 2.9773066992797936, "grad_norm": 3.727771043777466, "learning_rate": 6.279725506182905e-05, "loss": 3.1101, "step": 43820 }, { "epoch": 2.9776464193504553, "grad_norm": 2.333244800567627, "learning_rate": 6.279300856094578e-05, "loss": 2.8567, "step": 43825 }, { "epoch": 2.977986139421117, "grad_norm": 2.2459793090820312, "learning_rate": 6.278876206006251e-05, "loss": 2.8869, "step": 43830 }, { "epoch": 2.978325859491779, "grad_norm": 2.548922300338745, "learning_rate": 6.278451555917924e-05, "loss": 2.4562, "step": 43835 }, { "epoch": 2.9786655795624406, "grad_norm": 2.1965553760528564, "learning_rate": 6.278026905829597e-05, "loss": 2.8068, "step": 43840 }, { "epoch": 2.9790052996331022, "grad_norm": 2.2735435962677, "learning_rate": 6.27760225574127e-05, "loss": 3.0591, "step": 43845 }, { "epoch": 2.9793450197037643, "grad_norm": 2.673776388168335, "learning_rate": 6.277177605652942e-05, "loss": 2.9207, "step": 43850 }, { "epoch": 2.979684739774426, "grad_norm": 2.2267091274261475, "learning_rate": 6.276752955564615e-05, "loss": 2.6404, "step": 43855 }, { "epoch": 2.9800244598450876, "grad_norm": 2.861922264099121, "learning_rate": 6.276328305476288e-05, "loss": 2.7949, "step": 43860 }, { "epoch": 2.9803641799157496, "grad_norm": 2.2101333141326904, "learning_rate": 6.27590365538796e-05, "loss": 2.7929, "step": 43865 }, { "epoch": 2.9807038999864113, "grad_norm": 2.2170605659484863, "learning_rate": 6.275479005299633e-05, "loss": 2.7815, "step": 43870 }, { "epoch": 2.981043620057073, "grad_norm": 2.563218832015991, "learning_rate": 6.275054355211306e-05, "loss": 2.8483, "step": 43875 }, { "epoch": 2.981383340127735, "grad_norm": 2.6105117797851562, "learning_rate": 6.274629705122979e-05, "loss": 3.1274, "step": 43880 }, { "epoch": 2.9817230601983966, "grad_norm": 2.9051222801208496, "learning_rate": 6.274205055034652e-05, "loss": 2.7063, "step": 43885 }, { "epoch": 2.9820627802690582, "grad_norm": 3.0429909229278564, "learning_rate": 6.273780404946325e-05, "loss": 2.5503, "step": 43890 }, { "epoch": 2.9824025003397203, "grad_norm": 3.1011507511138916, "learning_rate": 6.273355754857997e-05, "loss": 3.1631, "step": 43895 }, { "epoch": 2.982742220410382, "grad_norm": 3.4804766178131104, "learning_rate": 6.27293110476967e-05, "loss": 2.8958, "step": 43900 }, { "epoch": 2.9830819404810436, "grad_norm": 3.34084153175354, "learning_rate": 6.272506454681343e-05, "loss": 2.9442, "step": 43905 }, { "epoch": 2.9834216605517057, "grad_norm": 2.783764362335205, "learning_rate": 6.272081804593016e-05, "loss": 2.8464, "step": 43910 }, { "epoch": 2.9837613806223673, "grad_norm": 2.4844605922698975, "learning_rate": 6.271657154504689e-05, "loss": 2.9745, "step": 43915 }, { "epoch": 2.984101100693029, "grad_norm": 2.3999674320220947, "learning_rate": 6.271232504416361e-05, "loss": 2.8644, "step": 43920 }, { "epoch": 2.984440820763691, "grad_norm": 2.422574758529663, "learning_rate": 6.270807854328033e-05, "loss": 2.9002, "step": 43925 }, { "epoch": 2.9847805408343526, "grad_norm": 2.4430551528930664, "learning_rate": 6.270383204239707e-05, "loss": 2.7903, "step": 43930 }, { "epoch": 2.9851202609050143, "grad_norm": 2.741461992263794, "learning_rate": 6.26995855415138e-05, "loss": 2.604, "step": 43935 }, { "epoch": 2.985459980975676, "grad_norm": 2.724034070968628, "learning_rate": 6.269533904063051e-05, "loss": 2.7318, "step": 43940 }, { "epoch": 2.985799701046338, "grad_norm": 2.4745585918426514, "learning_rate": 6.269109253974725e-05, "loss": 2.8034, "step": 43945 }, { "epoch": 2.9861394211169996, "grad_norm": 2.6932084560394287, "learning_rate": 6.268684603886398e-05, "loss": 2.9563, "step": 43950 }, { "epoch": 2.9864791411876612, "grad_norm": 2.7314553260803223, "learning_rate": 6.26825995379807e-05, "loss": 2.8684, "step": 43955 }, { "epoch": 2.9868188612583233, "grad_norm": 3.059908390045166, "learning_rate": 6.267835303709744e-05, "loss": 3.061, "step": 43960 }, { "epoch": 2.987158581328985, "grad_norm": 2.4929327964782715, "learning_rate": 6.267410653621417e-05, "loss": 2.9594, "step": 43965 }, { "epoch": 2.9874983013996466, "grad_norm": 2.5450191497802734, "learning_rate": 6.266986003533088e-05, "loss": 3.1002, "step": 43970 }, { "epoch": 2.987838021470308, "grad_norm": 2.003309965133667, "learning_rate": 6.266561353444762e-05, "loss": 2.7907, "step": 43975 }, { "epoch": 2.9881777415409703, "grad_norm": 2.3971405029296875, "learning_rate": 6.266136703356435e-05, "loss": 2.874, "step": 43980 }, { "epoch": 2.988517461611632, "grad_norm": 2.0976603031158447, "learning_rate": 6.265712053268107e-05, "loss": 2.7981, "step": 43985 }, { "epoch": 2.9888571816822935, "grad_norm": 3.400545358657837, "learning_rate": 6.26528740317978e-05, "loss": 3.054, "step": 43990 }, { "epoch": 2.9891969017529556, "grad_norm": 2.8273191452026367, "learning_rate": 6.264862753091452e-05, "loss": 3.0018, "step": 43995 }, { "epoch": 2.9895366218236172, "grad_norm": 2.578583240509033, "learning_rate": 6.264438103003126e-05, "loss": 2.7566, "step": 44000 }, { "epoch": 2.989876341894279, "grad_norm": 3.495593786239624, "learning_rate": 6.264013452914799e-05, "loss": 2.827, "step": 44005 }, { "epoch": 2.990216061964941, "grad_norm": 2.5263893604278564, "learning_rate": 6.26358880282647e-05, "loss": 2.6254, "step": 44010 }, { "epoch": 2.9905557820356026, "grad_norm": 2.7343976497650146, "learning_rate": 6.263164152738145e-05, "loss": 3.0869, "step": 44015 }, { "epoch": 2.990895502106264, "grad_norm": 2.6465818881988525, "learning_rate": 6.262739502649817e-05, "loss": 2.9654, "step": 44020 }, { "epoch": 2.9912352221769263, "grad_norm": 2.542222023010254, "learning_rate": 6.262314852561489e-05, "loss": 2.75, "step": 44025 }, { "epoch": 2.991574942247588, "grad_norm": 3.189852714538574, "learning_rate": 6.261890202473163e-05, "loss": 3.4077, "step": 44030 }, { "epoch": 2.9919146623182495, "grad_norm": 2.633131504058838, "learning_rate": 6.261465552384836e-05, "loss": 2.8016, "step": 44035 }, { "epoch": 2.9922543823889116, "grad_norm": 2.4546921253204346, "learning_rate": 6.261040902296507e-05, "loss": 2.7874, "step": 44040 }, { "epoch": 2.9925941024595732, "grad_norm": 2.3768067359924316, "learning_rate": 6.260616252208181e-05, "loss": 2.9393, "step": 44045 }, { "epoch": 2.992933822530235, "grad_norm": 2.638796091079712, "learning_rate": 6.260191602119854e-05, "loss": 3.0302, "step": 44050 }, { "epoch": 2.993273542600897, "grad_norm": 2.735020875930786, "learning_rate": 6.259766952031526e-05, "loss": 3.0984, "step": 44055 }, { "epoch": 2.9936132626715586, "grad_norm": 2.5348198413848877, "learning_rate": 6.2593423019432e-05, "loss": 2.9988, "step": 44060 }, { "epoch": 2.99395298274222, "grad_norm": 2.6617624759674072, "learning_rate": 6.258917651854873e-05, "loss": 2.8556, "step": 44065 }, { "epoch": 2.9942927028128823, "grad_norm": 2.3484246730804443, "learning_rate": 6.258493001766544e-05, "loss": 3.0089, "step": 44070 }, { "epoch": 2.994632422883544, "grad_norm": 2.9314157962799072, "learning_rate": 6.258068351678218e-05, "loss": 2.8703, "step": 44075 }, { "epoch": 2.9949721429542056, "grad_norm": 2.8618922233581543, "learning_rate": 6.25764370158989e-05, "loss": 2.7056, "step": 44080 }, { "epoch": 2.9953118630248676, "grad_norm": 2.6290252208709717, "learning_rate": 6.257219051501563e-05, "loss": 2.8443, "step": 44085 }, { "epoch": 2.9956515830955293, "grad_norm": 2.573580265045166, "learning_rate": 6.256794401413237e-05, "loss": 2.78, "step": 44090 }, { "epoch": 2.995991303166191, "grad_norm": 2.642366886138916, "learning_rate": 6.256369751324908e-05, "loss": 3.0765, "step": 44095 }, { "epoch": 2.996331023236853, "grad_norm": 2.6896209716796875, "learning_rate": 6.255945101236581e-05, "loss": 3.072, "step": 44100 }, { "epoch": 2.9966707433075146, "grad_norm": 2.805664300918579, "learning_rate": 6.255520451148255e-05, "loss": 3.0172, "step": 44105 }, { "epoch": 2.9970104633781762, "grad_norm": 2.8096816539764404, "learning_rate": 6.255095801059927e-05, "loss": 2.8625, "step": 44110 }, { "epoch": 2.9973501834488383, "grad_norm": 2.5644164085388184, "learning_rate": 6.254671150971599e-05, "loss": 2.7646, "step": 44115 }, { "epoch": 2.9976899035195, "grad_norm": 2.384903907775879, "learning_rate": 6.254246500883273e-05, "loss": 3.0215, "step": 44120 }, { "epoch": 2.9980296235901616, "grad_norm": 2.414642572402954, "learning_rate": 6.253821850794945e-05, "loss": 3.0292, "step": 44125 }, { "epoch": 2.9983693436608236, "grad_norm": 3.1505260467529297, "learning_rate": 6.253397200706618e-05, "loss": 3.0292, "step": 44130 }, { "epoch": 2.9987090637314853, "grad_norm": 2.4606642723083496, "learning_rate": 6.252972550618292e-05, "loss": 3.0602, "step": 44135 }, { "epoch": 2.999048783802147, "grad_norm": 2.334613800048828, "learning_rate": 6.252547900529963e-05, "loss": 2.5692, "step": 44140 }, { "epoch": 2.999388503872809, "grad_norm": 2.3245410919189453, "learning_rate": 6.252123250441636e-05, "loss": 2.9259, "step": 44145 }, { "epoch": 2.9997282239434706, "grad_norm": 2.3911643028259277, "learning_rate": 6.251698600353309e-05, "loss": 2.7824, "step": 44150 }, { "epoch": 3.0, "eval_bertscore": { "f1": 0.8436210026157862, "precision": 0.8467942321990185, "recall": 0.8412111612158305 }, "eval_bleu_4": 0.018022898597743052, "eval_exact_match": 0.0008721775365830022, "eval_loss": 3.344754457473755, "eval_meteor": 0.09283271163706162, "eval_rouge": { "rouge1": 0.12719564501657615, "rouge2": 0.0195392176740379, "rougeL": 0.10959025562265381, "rougeLsum": 0.10961899989544632 }, "eval_runtime": 1355.3248, "eval_samples_per_second": 7.614, "eval_steps_per_second": 0.952, "step": 44154 }, { "epoch": 3.0000679440141322, "grad_norm": 2.2868940830230713, "learning_rate": 6.251273950264982e-05, "loss": 2.7517, "step": 44155 }, { "epoch": 3.0004076640847943, "grad_norm": 2.45833683013916, "learning_rate": 6.250849300176655e-05, "loss": 2.7158, "step": 44160 }, { "epoch": 3.000747384155456, "grad_norm": 3.1056582927703857, "learning_rate": 6.250424650088327e-05, "loss": 2.6595, "step": 44165 }, { "epoch": 3.0010871042261176, "grad_norm": 2.785490036010742, "learning_rate": 6.25e-05, "loss": 2.467, "step": 44170 }, { "epoch": 3.0014268242967796, "grad_norm": 3.0077106952667236, "learning_rate": 6.249575349911673e-05, "loss": 2.4956, "step": 44175 }, { "epoch": 3.0017665443674413, "grad_norm": 2.9648818969726562, "learning_rate": 6.249150699823346e-05, "loss": 2.4867, "step": 44180 }, { "epoch": 3.002106264438103, "grad_norm": 3.417421579360962, "learning_rate": 6.248726049735019e-05, "loss": 2.5104, "step": 44185 }, { "epoch": 3.002445984508765, "grad_norm": 2.4758522510528564, "learning_rate": 6.248301399646691e-05, "loss": 2.7156, "step": 44190 }, { "epoch": 3.0027857045794266, "grad_norm": 2.5200109481811523, "learning_rate": 6.247876749558364e-05, "loss": 2.5341, "step": 44195 }, { "epoch": 3.0031254246500882, "grad_norm": 2.175049304962158, "learning_rate": 6.247452099470037e-05, "loss": 2.5147, "step": 44200 }, { "epoch": 3.0034651447207503, "grad_norm": 3.2451255321502686, "learning_rate": 6.24702744938171e-05, "loss": 2.5677, "step": 44205 }, { "epoch": 3.003804864791412, "grad_norm": 3.3307926654815674, "learning_rate": 6.246602799293383e-05, "loss": 2.5857, "step": 44210 }, { "epoch": 3.0041445848620736, "grad_norm": 2.4567108154296875, "learning_rate": 6.246178149205055e-05, "loss": 2.5292, "step": 44215 }, { "epoch": 3.004484304932735, "grad_norm": 2.773935556411743, "learning_rate": 6.245753499116728e-05, "loss": 2.6834, "step": 44220 }, { "epoch": 3.0048240250033973, "grad_norm": 3.097031593322754, "learning_rate": 6.245328849028401e-05, "loss": 2.7184, "step": 44225 }, { "epoch": 3.005163745074059, "grad_norm": 3.0274107456207275, "learning_rate": 6.244904198940074e-05, "loss": 2.4645, "step": 44230 }, { "epoch": 3.0055034651447206, "grad_norm": 3.200516939163208, "learning_rate": 6.244479548851747e-05, "loss": 2.4271, "step": 44235 }, { "epoch": 3.0058431852153826, "grad_norm": 2.3546459674835205, "learning_rate": 6.24405489876342e-05, "loss": 2.5465, "step": 44240 }, { "epoch": 3.0061829052860443, "grad_norm": 3.123286247253418, "learning_rate": 6.243630248675092e-05, "loss": 2.7357, "step": 44245 }, { "epoch": 3.006522625356706, "grad_norm": 2.9733712673187256, "learning_rate": 6.243205598586765e-05, "loss": 2.7089, "step": 44250 }, { "epoch": 3.006862345427368, "grad_norm": 3.1115715503692627, "learning_rate": 6.242780948498438e-05, "loss": 2.5907, "step": 44255 }, { "epoch": 3.0072020654980296, "grad_norm": 3.0466842651367188, "learning_rate": 6.24235629841011e-05, "loss": 3.0144, "step": 44260 }, { "epoch": 3.0075417855686912, "grad_norm": 2.651977300643921, "learning_rate": 6.241931648321783e-05, "loss": 2.6987, "step": 44265 }, { "epoch": 3.0078815056393533, "grad_norm": 2.6829891204833984, "learning_rate": 6.241506998233456e-05, "loss": 2.2642, "step": 44270 }, { "epoch": 3.008221225710015, "grad_norm": 3.012467861175537, "learning_rate": 6.241082348145129e-05, "loss": 2.5989, "step": 44275 }, { "epoch": 3.0085609457806766, "grad_norm": 2.8435988426208496, "learning_rate": 6.2406576980568e-05, "loss": 2.7754, "step": 44280 }, { "epoch": 3.0089006658513386, "grad_norm": 2.927203416824341, "learning_rate": 6.240233047968475e-05, "loss": 2.541, "step": 44285 }, { "epoch": 3.0092403859220003, "grad_norm": 3.1063406467437744, "learning_rate": 6.239808397880147e-05, "loss": 2.5268, "step": 44290 }, { "epoch": 3.009580105992662, "grad_norm": 2.78932785987854, "learning_rate": 6.239383747791819e-05, "loss": 2.6742, "step": 44295 }, { "epoch": 3.009919826063324, "grad_norm": 2.5540268421173096, "learning_rate": 6.238959097703493e-05, "loss": 2.6497, "step": 44300 }, { "epoch": 3.0102595461339856, "grad_norm": 3.3080313205718994, "learning_rate": 6.238534447615166e-05, "loss": 2.4733, "step": 44305 }, { "epoch": 3.0105992662046472, "grad_norm": 2.596853733062744, "learning_rate": 6.238109797526837e-05, "loss": 2.8331, "step": 44310 }, { "epoch": 3.0109389862753093, "grad_norm": 2.959601402282715, "learning_rate": 6.237685147438511e-05, "loss": 2.6186, "step": 44315 }, { "epoch": 3.011278706345971, "grad_norm": 2.505808115005493, "learning_rate": 6.237260497350184e-05, "loss": 2.5706, "step": 44320 }, { "epoch": 3.0116184264166326, "grad_norm": 2.451305627822876, "learning_rate": 6.236835847261856e-05, "loss": 2.5595, "step": 44325 }, { "epoch": 3.0119581464872947, "grad_norm": 2.8791213035583496, "learning_rate": 6.23641119717353e-05, "loss": 2.9366, "step": 44330 }, { "epoch": 3.0122978665579563, "grad_norm": 2.712905168533325, "learning_rate": 6.235986547085203e-05, "loss": 2.6395, "step": 44335 }, { "epoch": 3.012637586628618, "grad_norm": 2.4978888034820557, "learning_rate": 6.235561896996874e-05, "loss": 2.5198, "step": 44340 }, { "epoch": 3.01297730669928, "grad_norm": 3.3200266361236572, "learning_rate": 6.235137246908548e-05, "loss": 2.6762, "step": 44345 }, { "epoch": 3.0133170267699416, "grad_norm": 2.5130019187927246, "learning_rate": 6.23471259682022e-05, "loss": 2.6091, "step": 44350 }, { "epoch": 3.0136567468406033, "grad_norm": 3.070953130722046, "learning_rate": 6.234287946731894e-05, "loss": 2.7601, "step": 44355 }, { "epoch": 3.0139964669112653, "grad_norm": 2.617109537124634, "learning_rate": 6.233863296643567e-05, "loss": 2.6887, "step": 44360 }, { "epoch": 3.014336186981927, "grad_norm": 2.5470352172851562, "learning_rate": 6.233438646555238e-05, "loss": 2.582, "step": 44365 }, { "epoch": 3.0146759070525886, "grad_norm": 2.5497167110443115, "learning_rate": 6.233013996466912e-05, "loss": 2.4719, "step": 44370 }, { "epoch": 3.01501562712325, "grad_norm": 2.472151041030884, "learning_rate": 6.232589346378585e-05, "loss": 2.7231, "step": 44375 }, { "epoch": 3.0153553471939123, "grad_norm": 2.927999496459961, "learning_rate": 6.232164696290256e-05, "loss": 2.4487, "step": 44380 }, { "epoch": 3.015695067264574, "grad_norm": 2.876382827758789, "learning_rate": 6.23174004620193e-05, "loss": 2.5629, "step": 44385 }, { "epoch": 3.0160347873352356, "grad_norm": 3.9703330993652344, "learning_rate": 6.231315396113603e-05, "loss": 2.5848, "step": 44390 }, { "epoch": 3.0163745074058976, "grad_norm": 2.5750908851623535, "learning_rate": 6.230890746025275e-05, "loss": 2.6077, "step": 44395 }, { "epoch": 3.0167142274765593, "grad_norm": 3.0136287212371826, "learning_rate": 6.230466095936949e-05, "loss": 2.7563, "step": 44400 }, { "epoch": 3.017053947547221, "grad_norm": 2.6053998470306396, "learning_rate": 6.230041445848622e-05, "loss": 2.4659, "step": 44405 }, { "epoch": 3.017393667617883, "grad_norm": 3.971515655517578, "learning_rate": 6.229616795760293e-05, "loss": 2.498, "step": 44410 }, { "epoch": 3.0177333876885446, "grad_norm": 3.1349074840545654, "learning_rate": 6.229192145671967e-05, "loss": 2.7552, "step": 44415 }, { "epoch": 3.0180731077592062, "grad_norm": 3.045069456100464, "learning_rate": 6.228767495583639e-05, "loss": 2.7661, "step": 44420 }, { "epoch": 3.0184128278298683, "grad_norm": 3.2662057876586914, "learning_rate": 6.228342845495312e-05, "loss": 2.8245, "step": 44425 }, { "epoch": 3.01875254790053, "grad_norm": 2.1064670085906982, "learning_rate": 6.227918195406986e-05, "loss": 2.764, "step": 44430 }, { "epoch": 3.0190922679711916, "grad_norm": 2.5639965534210205, "learning_rate": 6.227493545318657e-05, "loss": 2.4625, "step": 44435 }, { "epoch": 3.0194319880418536, "grad_norm": 2.718942642211914, "learning_rate": 6.22706889523033e-05, "loss": 2.9989, "step": 44440 }, { "epoch": 3.0197717081125153, "grad_norm": 2.8092098236083984, "learning_rate": 6.226644245142004e-05, "loss": 2.6884, "step": 44445 }, { "epoch": 3.020111428183177, "grad_norm": 2.6269731521606445, "learning_rate": 6.226219595053676e-05, "loss": 2.6261, "step": 44450 }, { "epoch": 3.020451148253839, "grad_norm": 3.5216193199157715, "learning_rate": 6.225794944965348e-05, "loss": 2.4704, "step": 44455 }, { "epoch": 3.0207908683245006, "grad_norm": 2.400459051132202, "learning_rate": 6.225370294877023e-05, "loss": 2.4307, "step": 44460 }, { "epoch": 3.0211305883951622, "grad_norm": 3.115241765975952, "learning_rate": 6.224945644788694e-05, "loss": 2.7368, "step": 44465 }, { "epoch": 3.0214703084658243, "grad_norm": 2.9368958473205566, "learning_rate": 6.224520994700367e-05, "loss": 2.5684, "step": 44470 }, { "epoch": 3.021810028536486, "grad_norm": 3.363358736038208, "learning_rate": 6.224096344612041e-05, "loss": 2.436, "step": 44475 }, { "epoch": 3.0221497486071476, "grad_norm": 3.2365918159484863, "learning_rate": 6.223671694523712e-05, "loss": 2.74, "step": 44480 }, { "epoch": 3.0224894686778097, "grad_norm": 3.2514488697052, "learning_rate": 6.223247044435385e-05, "loss": 2.862, "step": 44485 }, { "epoch": 3.0228291887484713, "grad_norm": 2.439197063446045, "learning_rate": 6.22282239434706e-05, "loss": 2.1704, "step": 44490 }, { "epoch": 3.023168908819133, "grad_norm": 2.5690793991088867, "learning_rate": 6.222397744258731e-05, "loss": 2.6357, "step": 44495 }, { "epoch": 3.023508628889795, "grad_norm": 2.841646909713745, "learning_rate": 6.221973094170404e-05, "loss": 2.5562, "step": 44500 }, { "epoch": 3.0238483489604566, "grad_norm": 3.0541059970855713, "learning_rate": 6.221548444082076e-05, "loss": 2.2913, "step": 44505 }, { "epoch": 3.0241880690311183, "grad_norm": 2.589517831802368, "learning_rate": 6.221123793993749e-05, "loss": 2.7089, "step": 44510 }, { "epoch": 3.0245277891017803, "grad_norm": 2.6566877365112305, "learning_rate": 6.220699143905422e-05, "loss": 2.5891, "step": 44515 }, { "epoch": 3.024867509172442, "grad_norm": 3.127986431121826, "learning_rate": 6.220274493817095e-05, "loss": 2.5958, "step": 44520 }, { "epoch": 3.0252072292431036, "grad_norm": 2.917306900024414, "learning_rate": 6.219849843728768e-05, "loss": 2.517, "step": 44525 }, { "epoch": 3.0255469493137657, "grad_norm": 3.1529979705810547, "learning_rate": 6.21942519364044e-05, "loss": 2.5596, "step": 44530 }, { "epoch": 3.0258866693844273, "grad_norm": 2.7146902084350586, "learning_rate": 6.219000543552113e-05, "loss": 2.5879, "step": 44535 }, { "epoch": 3.026226389455089, "grad_norm": 2.899761438369751, "learning_rate": 6.218575893463786e-05, "loss": 3.1163, "step": 44540 }, { "epoch": 3.026566109525751, "grad_norm": 2.944312572479248, "learning_rate": 6.218151243375459e-05, "loss": 2.2744, "step": 44545 }, { "epoch": 3.0269058295964126, "grad_norm": 3.6018784046173096, "learning_rate": 6.217726593287132e-05, "loss": 2.786, "step": 44550 }, { "epoch": 3.0272455496670743, "grad_norm": 2.3904306888580322, "learning_rate": 6.217301943198804e-05, "loss": 2.7451, "step": 44555 }, { "epoch": 3.027585269737736, "grad_norm": 3.0909764766693115, "learning_rate": 6.216877293110477e-05, "loss": 2.547, "step": 44560 }, { "epoch": 3.027924989808398, "grad_norm": 2.973238468170166, "learning_rate": 6.21645264302215e-05, "loss": 2.7352, "step": 44565 }, { "epoch": 3.0282647098790596, "grad_norm": 2.231839418411255, "learning_rate": 6.216027992933823e-05, "loss": 2.6191, "step": 44570 }, { "epoch": 3.0286044299497212, "grad_norm": 6.664119720458984, "learning_rate": 6.215603342845496e-05, "loss": 2.6538, "step": 44575 }, { "epoch": 3.0289441500203833, "grad_norm": 3.1335861682891846, "learning_rate": 6.215178692757168e-05, "loss": 2.4022, "step": 44580 }, { "epoch": 3.029283870091045, "grad_norm": 2.832934617996216, "learning_rate": 6.214754042668841e-05, "loss": 2.6996, "step": 44585 }, { "epoch": 3.0296235901617066, "grad_norm": 2.7451255321502686, "learning_rate": 6.214329392580514e-05, "loss": 2.4904, "step": 44590 }, { "epoch": 3.0299633102323686, "grad_norm": 3.4330132007598877, "learning_rate": 6.213904742492187e-05, "loss": 2.6093, "step": 44595 }, { "epoch": 3.0303030303030303, "grad_norm": 2.981940984725952, "learning_rate": 6.21348009240386e-05, "loss": 2.6604, "step": 44600 }, { "epoch": 3.030642750373692, "grad_norm": 3.0892679691314697, "learning_rate": 6.213055442315532e-05, "loss": 2.5117, "step": 44605 }, { "epoch": 3.030982470444354, "grad_norm": 2.445789337158203, "learning_rate": 6.212630792227205e-05, "loss": 2.5402, "step": 44610 }, { "epoch": 3.0313221905150156, "grad_norm": 2.806547164916992, "learning_rate": 6.212206142138878e-05, "loss": 2.249, "step": 44615 }, { "epoch": 3.0316619105856772, "grad_norm": 2.4626317024230957, "learning_rate": 6.21178149205055e-05, "loss": 2.5839, "step": 44620 }, { "epoch": 3.0320016306563393, "grad_norm": 3.067021131515503, "learning_rate": 6.211356841962224e-05, "loss": 2.6751, "step": 44625 }, { "epoch": 3.032341350727001, "grad_norm": 3.173828125, "learning_rate": 6.210932191873896e-05, "loss": 2.7888, "step": 44630 }, { "epoch": 3.0326810707976626, "grad_norm": 2.9103870391845703, "learning_rate": 6.210507541785568e-05, "loss": 2.5854, "step": 44635 }, { "epoch": 3.0330207908683247, "grad_norm": 2.4109058380126953, "learning_rate": 6.210082891697242e-05, "loss": 2.4926, "step": 44640 }, { "epoch": 3.0333605109389863, "grad_norm": 3.072766065597534, "learning_rate": 6.209658241608915e-05, "loss": 2.8609, "step": 44645 }, { "epoch": 3.033700231009648, "grad_norm": 2.597222089767456, "learning_rate": 6.209233591520586e-05, "loss": 2.3425, "step": 44650 }, { "epoch": 3.03403995108031, "grad_norm": 2.915306806564331, "learning_rate": 6.20880894143226e-05, "loss": 2.7408, "step": 44655 }, { "epoch": 3.0343796711509716, "grad_norm": 2.5909571647644043, "learning_rate": 6.208384291343933e-05, "loss": 2.6407, "step": 44660 }, { "epoch": 3.0347193912216333, "grad_norm": 2.5591001510620117, "learning_rate": 6.207959641255605e-05, "loss": 2.7635, "step": 44665 }, { "epoch": 3.0350591112922953, "grad_norm": 3.7902510166168213, "learning_rate": 6.207534991167279e-05, "loss": 2.3373, "step": 44670 }, { "epoch": 3.035398831362957, "grad_norm": 2.4085988998413086, "learning_rate": 6.207110341078952e-05, "loss": 2.5338, "step": 44675 }, { "epoch": 3.0357385514336186, "grad_norm": 2.9585907459259033, "learning_rate": 6.206685690990623e-05, "loss": 2.6777, "step": 44680 }, { "epoch": 3.0360782715042807, "grad_norm": 2.3659236431121826, "learning_rate": 6.206261040902297e-05, "loss": 2.7841, "step": 44685 }, { "epoch": 3.0364179915749423, "grad_norm": 2.5725021362304688, "learning_rate": 6.20583639081397e-05, "loss": 2.5865, "step": 44690 }, { "epoch": 3.036757711645604, "grad_norm": 2.734870195388794, "learning_rate": 6.205411740725643e-05, "loss": 2.5246, "step": 44695 }, { "epoch": 3.037097431716266, "grad_norm": 2.7912237644195557, "learning_rate": 6.204987090637316e-05, "loss": 2.6776, "step": 44700 }, { "epoch": 3.0374371517869276, "grad_norm": 4.22821044921875, "learning_rate": 6.204562440548987e-05, "loss": 2.5157, "step": 44705 }, { "epoch": 3.0377768718575893, "grad_norm": 2.5312485694885254, "learning_rate": 6.204137790460661e-05, "loss": 2.6519, "step": 44710 }, { "epoch": 3.038116591928251, "grad_norm": 4.3450727462768555, "learning_rate": 6.203713140372334e-05, "loss": 2.4362, "step": 44715 }, { "epoch": 3.038456311998913, "grad_norm": 2.6034960746765137, "learning_rate": 6.203288490284006e-05, "loss": 2.5176, "step": 44720 }, { "epoch": 3.0387960320695746, "grad_norm": 2.140181064605713, "learning_rate": 6.20286384019568e-05, "loss": 2.5548, "step": 44725 }, { "epoch": 3.0391357521402362, "grad_norm": 3.6592767238616943, "learning_rate": 6.202439190107352e-05, "loss": 2.6695, "step": 44730 }, { "epoch": 3.0394754722108983, "grad_norm": 2.6702942848205566, "learning_rate": 6.202014540019024e-05, "loss": 2.6633, "step": 44735 }, { "epoch": 3.03981519228156, "grad_norm": 2.3571646213531494, "learning_rate": 6.201589889930698e-05, "loss": 2.2876, "step": 44740 }, { "epoch": 3.0401549123522216, "grad_norm": 2.929504156112671, "learning_rate": 6.201165239842371e-05, "loss": 2.6622, "step": 44745 }, { "epoch": 3.0404946324228836, "grad_norm": 2.4358577728271484, "learning_rate": 6.200740589754042e-05, "loss": 2.4993, "step": 44750 }, { "epoch": 3.0408343524935453, "grad_norm": 2.509467363357544, "learning_rate": 6.200315939665716e-05, "loss": 2.3741, "step": 44755 }, { "epoch": 3.041174072564207, "grad_norm": 3.046647310256958, "learning_rate": 6.199891289577389e-05, "loss": 2.5236, "step": 44760 }, { "epoch": 3.041513792634869, "grad_norm": 2.913309335708618, "learning_rate": 6.199466639489061e-05, "loss": 2.3743, "step": 44765 }, { "epoch": 3.0418535127055306, "grad_norm": 2.7027413845062256, "learning_rate": 6.199041989400735e-05, "loss": 2.2602, "step": 44770 }, { "epoch": 3.0421932327761922, "grad_norm": 3.835176706314087, "learning_rate": 6.198617339312406e-05, "loss": 2.6553, "step": 44775 }, { "epoch": 3.0425329528468543, "grad_norm": 3.218554973602295, "learning_rate": 6.198192689224079e-05, "loss": 2.5201, "step": 44780 }, { "epoch": 3.042872672917516, "grad_norm": 2.950648069381714, "learning_rate": 6.197768039135753e-05, "loss": 2.5142, "step": 44785 }, { "epoch": 3.0432123929881776, "grad_norm": 2.4874401092529297, "learning_rate": 6.197343389047425e-05, "loss": 2.4779, "step": 44790 }, { "epoch": 3.0435521130588397, "grad_norm": 2.4780521392822266, "learning_rate": 6.196918738959098e-05, "loss": 2.7321, "step": 44795 }, { "epoch": 3.0438918331295013, "grad_norm": 2.4437167644500732, "learning_rate": 6.196494088870772e-05, "loss": 2.7687, "step": 44800 }, { "epoch": 3.044231553200163, "grad_norm": 3.2298033237457275, "learning_rate": 6.196069438782443e-05, "loss": 2.5742, "step": 44805 }, { "epoch": 3.044571273270825, "grad_norm": 2.394625186920166, "learning_rate": 6.195644788694116e-05, "loss": 2.4374, "step": 44810 }, { "epoch": 3.0449109933414866, "grad_norm": 2.4658761024475098, "learning_rate": 6.19522013860579e-05, "loss": 2.7978, "step": 44815 }, { "epoch": 3.0452507134121483, "grad_norm": 3.2676289081573486, "learning_rate": 6.194795488517462e-05, "loss": 2.5945, "step": 44820 }, { "epoch": 3.0455904334828103, "grad_norm": 2.4850590229034424, "learning_rate": 6.194370838429134e-05, "loss": 2.609, "step": 44825 }, { "epoch": 3.045930153553472, "grad_norm": 2.453706979751587, "learning_rate": 6.193946188340808e-05, "loss": 2.6474, "step": 44830 }, { "epoch": 3.0462698736241336, "grad_norm": 2.5077168941497803, "learning_rate": 6.19352153825248e-05, "loss": 2.3033, "step": 44835 }, { "epoch": 3.0466095936947957, "grad_norm": 2.600891351699829, "learning_rate": 6.193096888164153e-05, "loss": 2.4053, "step": 44840 }, { "epoch": 3.0469493137654573, "grad_norm": 2.1451218128204346, "learning_rate": 6.192672238075826e-05, "loss": 2.4663, "step": 44845 }, { "epoch": 3.047289033836119, "grad_norm": 2.3034934997558594, "learning_rate": 6.192247587987498e-05, "loss": 2.8461, "step": 44850 }, { "epoch": 3.047628753906781, "grad_norm": 2.2709696292877197, "learning_rate": 6.191822937899171e-05, "loss": 2.7071, "step": 44855 }, { "epoch": 3.0479684739774426, "grad_norm": 2.3257453441619873, "learning_rate": 6.191398287810844e-05, "loss": 2.4767, "step": 44860 }, { "epoch": 3.0483081940481043, "grad_norm": 3.6940181255340576, "learning_rate": 6.190973637722517e-05, "loss": 2.5544, "step": 44865 }, { "epoch": 3.0486479141187663, "grad_norm": 2.7374346256256104, "learning_rate": 6.19054898763419e-05, "loss": 2.5145, "step": 44870 }, { "epoch": 3.048987634189428, "grad_norm": 2.4553074836730957, "learning_rate": 6.190124337545862e-05, "loss": 2.6006, "step": 44875 }, { "epoch": 3.0493273542600896, "grad_norm": 2.461040496826172, "learning_rate": 6.189699687457535e-05, "loss": 2.5418, "step": 44880 }, { "epoch": 3.0496670743307517, "grad_norm": 3.4068305492401123, "learning_rate": 6.189275037369208e-05, "loss": 2.5787, "step": 44885 }, { "epoch": 3.0500067944014133, "grad_norm": 3.0094192028045654, "learning_rate": 6.188850387280881e-05, "loss": 2.7251, "step": 44890 }, { "epoch": 3.050346514472075, "grad_norm": 2.847745895385742, "learning_rate": 6.188425737192554e-05, "loss": 2.6745, "step": 44895 }, { "epoch": 3.0506862345427366, "grad_norm": 2.967829942703247, "learning_rate": 6.188001087104226e-05, "loss": 2.7089, "step": 44900 }, { "epoch": 3.0510259546133986, "grad_norm": 2.562251091003418, "learning_rate": 6.187576437015899e-05, "loss": 2.4157, "step": 44905 }, { "epoch": 3.0513656746840603, "grad_norm": 3.110755681991577, "learning_rate": 6.187151786927572e-05, "loss": 2.5521, "step": 44910 }, { "epoch": 3.051705394754722, "grad_norm": 2.299278974533081, "learning_rate": 6.186727136839245e-05, "loss": 2.7082, "step": 44915 }, { "epoch": 3.052045114825384, "grad_norm": 2.484738349914551, "learning_rate": 6.186302486750918e-05, "loss": 2.7903, "step": 44920 }, { "epoch": 3.0523848348960456, "grad_norm": 2.93115496635437, "learning_rate": 6.18587783666259e-05, "loss": 2.6092, "step": 44925 }, { "epoch": 3.0527245549667072, "grad_norm": 3.3506147861480713, "learning_rate": 6.185453186574263e-05, "loss": 2.5726, "step": 44930 }, { "epoch": 3.0530642750373693, "grad_norm": 3.37900710105896, "learning_rate": 6.185028536485936e-05, "loss": 2.4359, "step": 44935 }, { "epoch": 3.053403995108031, "grad_norm": 2.959272623062134, "learning_rate": 6.184603886397609e-05, "loss": 2.6661, "step": 44940 }, { "epoch": 3.0537437151786926, "grad_norm": 2.470141649246216, "learning_rate": 6.184179236309282e-05, "loss": 2.6426, "step": 44945 }, { "epoch": 3.0540834352493547, "grad_norm": 2.448007345199585, "learning_rate": 6.183754586220954e-05, "loss": 2.6554, "step": 44950 }, { "epoch": 3.0544231553200163, "grad_norm": 2.663689613342285, "learning_rate": 6.183329936132627e-05, "loss": 2.4835, "step": 44955 }, { "epoch": 3.054762875390678, "grad_norm": 2.3235864639282227, "learning_rate": 6.1829052860443e-05, "loss": 2.6551, "step": 44960 }, { "epoch": 3.05510259546134, "grad_norm": 2.824305295944214, "learning_rate": 6.182480635955973e-05, "loss": 2.581, "step": 44965 }, { "epoch": 3.0554423155320016, "grad_norm": 2.200814723968506, "learning_rate": 6.182055985867646e-05, "loss": 2.5672, "step": 44970 }, { "epoch": 3.0557820356026633, "grad_norm": 2.901991128921509, "learning_rate": 6.181631335779317e-05, "loss": 2.5947, "step": 44975 }, { "epoch": 3.0561217556733253, "grad_norm": 2.5509870052337646, "learning_rate": 6.181206685690991e-05, "loss": 2.7375, "step": 44980 }, { "epoch": 3.056461475743987, "grad_norm": 2.7620651721954346, "learning_rate": 6.180782035602664e-05, "loss": 2.6658, "step": 44985 }, { "epoch": 3.0568011958146486, "grad_norm": 2.4316766262054443, "learning_rate": 6.180357385514335e-05, "loss": 2.7843, "step": 44990 }, { "epoch": 3.0571409158853107, "grad_norm": 2.731300115585327, "learning_rate": 6.17993273542601e-05, "loss": 2.3436, "step": 44995 }, { "epoch": 3.0574806359559723, "grad_norm": 2.821324110031128, "learning_rate": 6.179508085337682e-05, "loss": 2.5554, "step": 45000 }, { "epoch": 3.057820356026634, "grad_norm": 2.691425323486328, "learning_rate": 6.179083435249354e-05, "loss": 2.4781, "step": 45005 }, { "epoch": 3.058160076097296, "grad_norm": 2.7064402103424072, "learning_rate": 6.178658785161028e-05, "loss": 2.3815, "step": 45010 }, { "epoch": 3.0584997961679576, "grad_norm": 2.79347825050354, "learning_rate": 6.178234135072701e-05, "loss": 2.7541, "step": 45015 }, { "epoch": 3.0588395162386193, "grad_norm": 3.1680078506469727, "learning_rate": 6.177809484984372e-05, "loss": 2.3345, "step": 45020 }, { "epoch": 3.0591792363092813, "grad_norm": 2.849902868270874, "learning_rate": 6.177384834896046e-05, "loss": 2.6493, "step": 45025 }, { "epoch": 3.059518956379943, "grad_norm": 2.6487367153167725, "learning_rate": 6.176960184807719e-05, "loss": 2.7796, "step": 45030 }, { "epoch": 3.0598586764506046, "grad_norm": 2.7261464595794678, "learning_rate": 6.176535534719392e-05, "loss": 2.574, "step": 45035 }, { "epoch": 3.0601983965212667, "grad_norm": 2.3914976119995117, "learning_rate": 6.176110884631065e-05, "loss": 2.6915, "step": 45040 }, { "epoch": 3.0605381165919283, "grad_norm": 2.416743516921997, "learning_rate": 6.175686234542736e-05, "loss": 2.4771, "step": 45045 }, { "epoch": 3.06087783666259, "grad_norm": 2.6397206783294678, "learning_rate": 6.17526158445441e-05, "loss": 2.5199, "step": 45050 }, { "epoch": 3.0612175567332516, "grad_norm": 2.9350996017456055, "learning_rate": 6.174836934366083e-05, "loss": 2.5469, "step": 45055 }, { "epoch": 3.0615572768039137, "grad_norm": 3.5507800579071045, "learning_rate": 6.174412284277755e-05, "loss": 2.6506, "step": 45060 }, { "epoch": 3.0618969968745753, "grad_norm": 2.8510043621063232, "learning_rate": 6.173987634189429e-05, "loss": 2.7304, "step": 45065 }, { "epoch": 3.062236716945237, "grad_norm": 3.4330592155456543, "learning_rate": 6.173562984101102e-05, "loss": 2.556, "step": 45070 }, { "epoch": 3.062576437015899, "grad_norm": 2.5481996536254883, "learning_rate": 6.173138334012773e-05, "loss": 2.9414, "step": 45075 }, { "epoch": 3.0629161570865606, "grad_norm": 2.2356624603271484, "learning_rate": 6.172713683924447e-05, "loss": 2.7239, "step": 45080 }, { "epoch": 3.0632558771572223, "grad_norm": 2.2875261306762695, "learning_rate": 6.17228903383612e-05, "loss": 2.6732, "step": 45085 }, { "epoch": 3.0635955972278843, "grad_norm": 2.7780873775482178, "learning_rate": 6.171864383747791e-05, "loss": 2.508, "step": 45090 }, { "epoch": 3.063935317298546, "grad_norm": 2.5159337520599365, "learning_rate": 6.171439733659466e-05, "loss": 2.7124, "step": 45095 }, { "epoch": 3.0642750373692076, "grad_norm": 2.9078614711761475, "learning_rate": 6.171015083571138e-05, "loss": 2.7028, "step": 45100 }, { "epoch": 3.0646147574398697, "grad_norm": 3.219499111175537, "learning_rate": 6.17059043348281e-05, "loss": 2.5908, "step": 45105 }, { "epoch": 3.0649544775105313, "grad_norm": 2.4671969413757324, "learning_rate": 6.170165783394484e-05, "loss": 2.7262, "step": 45110 }, { "epoch": 3.065294197581193, "grad_norm": 3.0522289276123047, "learning_rate": 6.169741133306157e-05, "loss": 2.6622, "step": 45115 }, { "epoch": 3.065633917651855, "grad_norm": 2.445866584777832, "learning_rate": 6.169316483217828e-05, "loss": 2.6381, "step": 45120 }, { "epoch": 3.0659736377225166, "grad_norm": 3.00288987159729, "learning_rate": 6.168891833129502e-05, "loss": 2.6573, "step": 45125 }, { "epoch": 3.0663133577931783, "grad_norm": 3.4120700359344482, "learning_rate": 6.168467183041174e-05, "loss": 2.7162, "step": 45130 }, { "epoch": 3.0666530778638403, "grad_norm": 2.8043336868286133, "learning_rate": 6.168042532952847e-05, "loss": 2.571, "step": 45135 }, { "epoch": 3.066992797934502, "grad_norm": 3.172525644302368, "learning_rate": 6.167617882864521e-05, "loss": 2.4449, "step": 45140 }, { "epoch": 3.0673325180051636, "grad_norm": 2.813579797744751, "learning_rate": 6.167193232776192e-05, "loss": 2.3576, "step": 45145 }, { "epoch": 3.0676722380758257, "grad_norm": 2.861489772796631, "learning_rate": 6.166768582687865e-05, "loss": 2.7259, "step": 45150 }, { "epoch": 3.0680119581464873, "grad_norm": 2.9324798583984375, "learning_rate": 6.166343932599539e-05, "loss": 2.7662, "step": 45155 }, { "epoch": 3.068351678217149, "grad_norm": 2.2696385383605957, "learning_rate": 6.16591928251121e-05, "loss": 2.5481, "step": 45160 }, { "epoch": 3.068691398287811, "grad_norm": 2.735779285430908, "learning_rate": 6.165494632422883e-05, "loss": 2.7307, "step": 45165 }, { "epoch": 3.0690311183584726, "grad_norm": 2.5435783863067627, "learning_rate": 6.165069982334558e-05, "loss": 2.5602, "step": 45170 }, { "epoch": 3.0693708384291343, "grad_norm": 3.9287896156311035, "learning_rate": 6.164645332246229e-05, "loss": 2.5696, "step": 45175 }, { "epoch": 3.0697105584997963, "grad_norm": 2.9579076766967773, "learning_rate": 6.164220682157902e-05, "loss": 2.6724, "step": 45180 }, { "epoch": 3.070050278570458, "grad_norm": 2.8023667335510254, "learning_rate": 6.163796032069576e-05, "loss": 2.6657, "step": 45185 }, { "epoch": 3.0703899986411196, "grad_norm": 2.708861827850342, "learning_rate": 6.163371381981247e-05, "loss": 2.836, "step": 45190 }, { "epoch": 3.0707297187117817, "grad_norm": 2.6461057662963867, "learning_rate": 6.16294673189292e-05, "loss": 2.4298, "step": 45195 }, { "epoch": 3.0710694387824433, "grad_norm": 2.626725673675537, "learning_rate": 6.162522081804593e-05, "loss": 2.5838, "step": 45200 }, { "epoch": 3.071409158853105, "grad_norm": 2.7858784198760986, "learning_rate": 6.162097431716266e-05, "loss": 2.3573, "step": 45205 }, { "epoch": 3.071748878923767, "grad_norm": 3.4565534591674805, "learning_rate": 6.161672781627939e-05, "loss": 2.8111, "step": 45210 }, { "epoch": 3.0720885989944287, "grad_norm": 3.6737582683563232, "learning_rate": 6.161248131539611e-05, "loss": 2.7497, "step": 45215 }, { "epoch": 3.0724283190650903, "grad_norm": 2.5880634784698486, "learning_rate": 6.160823481451284e-05, "loss": 2.5472, "step": 45220 }, { "epoch": 3.0727680391357524, "grad_norm": 2.559246063232422, "learning_rate": 6.160398831362957e-05, "loss": 2.6945, "step": 45225 }, { "epoch": 3.073107759206414, "grad_norm": 2.364694118499756, "learning_rate": 6.15997418127463e-05, "loss": 2.4728, "step": 45230 }, { "epoch": 3.0734474792770756, "grad_norm": 3.020582437515259, "learning_rate": 6.159549531186303e-05, "loss": 2.2723, "step": 45235 }, { "epoch": 3.0737871993477373, "grad_norm": 2.520266532897949, "learning_rate": 6.159124881097975e-05, "loss": 2.6487, "step": 45240 }, { "epoch": 3.0741269194183993, "grad_norm": 2.9109416007995605, "learning_rate": 6.158700231009648e-05, "loss": 2.8471, "step": 45245 }, { "epoch": 3.074466639489061, "grad_norm": 3.738551616668701, "learning_rate": 6.158275580921321e-05, "loss": 2.8553, "step": 45250 }, { "epoch": 3.0748063595597226, "grad_norm": 2.1985840797424316, "learning_rate": 6.157850930832994e-05, "loss": 2.5998, "step": 45255 }, { "epoch": 3.0751460796303847, "grad_norm": 5.675144672393799, "learning_rate": 6.157426280744667e-05, "loss": 2.4365, "step": 45260 }, { "epoch": 3.0754857997010463, "grad_norm": 2.937282085418701, "learning_rate": 6.15700163065634e-05, "loss": 2.6011, "step": 45265 }, { "epoch": 3.075825519771708, "grad_norm": 3.135892868041992, "learning_rate": 6.156576980568012e-05, "loss": 2.2419, "step": 45270 }, { "epoch": 3.07616523984237, "grad_norm": 3.248688220977783, "learning_rate": 6.156152330479685e-05, "loss": 2.4321, "step": 45275 }, { "epoch": 3.0765049599130316, "grad_norm": 2.5275940895080566, "learning_rate": 6.155727680391358e-05, "loss": 2.2593, "step": 45280 }, { "epoch": 3.0768446799836933, "grad_norm": 2.450739860534668, "learning_rate": 6.15530303030303e-05, "loss": 2.7806, "step": 45285 }, { "epoch": 3.0771844000543553, "grad_norm": 2.2140045166015625, "learning_rate": 6.154878380214703e-05, "loss": 2.5494, "step": 45290 }, { "epoch": 3.077524120125017, "grad_norm": 3.8122198581695557, "learning_rate": 6.154453730126376e-05, "loss": 2.6671, "step": 45295 }, { "epoch": 3.0778638401956786, "grad_norm": 3.1062753200531006, "learning_rate": 6.154029080038049e-05, "loss": 2.696, "step": 45300 }, { "epoch": 3.0782035602663407, "grad_norm": 2.412242889404297, "learning_rate": 6.153604429949722e-05, "loss": 2.6653, "step": 45305 }, { "epoch": 3.0785432803370023, "grad_norm": 3.5647287368774414, "learning_rate": 6.153179779861395e-05, "loss": 2.6439, "step": 45310 }, { "epoch": 3.078883000407664, "grad_norm": 2.443979501724243, "learning_rate": 6.152755129773067e-05, "loss": 2.6712, "step": 45315 }, { "epoch": 3.079222720478326, "grad_norm": 2.6619842052459717, "learning_rate": 6.15233047968474e-05, "loss": 2.5772, "step": 45320 }, { "epoch": 3.0795624405489876, "grad_norm": 2.57438063621521, "learning_rate": 6.151905829596413e-05, "loss": 2.6252, "step": 45325 }, { "epoch": 3.0799021606196493, "grad_norm": 2.5984668731689453, "learning_rate": 6.151481179508084e-05, "loss": 2.7317, "step": 45330 }, { "epoch": 3.0802418806903114, "grad_norm": 3.1712467670440674, "learning_rate": 6.151056529419759e-05, "loss": 2.6919, "step": 45335 }, { "epoch": 3.080581600760973, "grad_norm": 2.4014062881469727, "learning_rate": 6.150631879331431e-05, "loss": 2.3779, "step": 45340 }, { "epoch": 3.0809213208316346, "grad_norm": 2.6509835720062256, "learning_rate": 6.150207229243103e-05, "loss": 2.7756, "step": 45345 }, { "epoch": 3.0812610409022967, "grad_norm": 4.1481852531433105, "learning_rate": 6.149782579154777e-05, "loss": 2.6235, "step": 45350 }, { "epoch": 3.0816007609729583, "grad_norm": 3.6159589290618896, "learning_rate": 6.14935792906645e-05, "loss": 2.3822, "step": 45355 }, { "epoch": 3.08194048104362, "grad_norm": 3.0536913871765137, "learning_rate": 6.148933278978121e-05, "loss": 2.706, "step": 45360 }, { "epoch": 3.082280201114282, "grad_norm": 2.7778139114379883, "learning_rate": 6.148508628889795e-05, "loss": 2.6586, "step": 45365 }, { "epoch": 3.0826199211849437, "grad_norm": 3.1889829635620117, "learning_rate": 6.148083978801468e-05, "loss": 2.6006, "step": 45370 }, { "epoch": 3.0829596412556053, "grad_norm": 2.5359725952148438, "learning_rate": 6.147659328713141e-05, "loss": 2.1793, "step": 45375 }, { "epoch": 3.0832993613262674, "grad_norm": 2.771437644958496, "learning_rate": 6.147234678624814e-05, "loss": 2.52, "step": 45380 }, { "epoch": 3.083639081396929, "grad_norm": 3.0103042125701904, "learning_rate": 6.146810028536487e-05, "loss": 2.6314, "step": 45385 }, { "epoch": 3.0839788014675906, "grad_norm": 3.4768903255462646, "learning_rate": 6.14638537844816e-05, "loss": 2.6762, "step": 45390 }, { "epoch": 3.0843185215382523, "grad_norm": 2.3217854499816895, "learning_rate": 6.145960728359832e-05, "loss": 2.5904, "step": 45395 }, { "epoch": 3.0846582416089143, "grad_norm": 2.8839683532714844, "learning_rate": 6.145536078271504e-05, "loss": 2.5652, "step": 45400 }, { "epoch": 3.084997961679576, "grad_norm": 3.221543788909912, "learning_rate": 6.145111428183178e-05, "loss": 2.4907, "step": 45405 }, { "epoch": 3.0853376817502376, "grad_norm": 2.6963815689086914, "learning_rate": 6.14468677809485e-05, "loss": 2.3342, "step": 45410 }, { "epoch": 3.0856774018208997, "grad_norm": 2.9910647869110107, "learning_rate": 6.144262128006522e-05, "loss": 2.57, "step": 45415 }, { "epoch": 3.0860171218915613, "grad_norm": 2.861651659011841, "learning_rate": 6.143837477918196e-05, "loss": 2.5958, "step": 45420 }, { "epoch": 3.086356841962223, "grad_norm": 2.8195338249206543, "learning_rate": 6.143412827829869e-05, "loss": 2.7635, "step": 45425 }, { "epoch": 3.086696562032885, "grad_norm": 2.544964551925659, "learning_rate": 6.14298817774154e-05, "loss": 2.7025, "step": 45430 }, { "epoch": 3.0870362821035466, "grad_norm": 2.558871030807495, "learning_rate": 6.142563527653215e-05, "loss": 2.7839, "step": 45435 }, { "epoch": 3.0873760021742083, "grad_norm": 2.0621941089630127, "learning_rate": 6.142138877564887e-05, "loss": 2.5913, "step": 45440 }, { "epoch": 3.0877157222448703, "grad_norm": 2.9697296619415283, "learning_rate": 6.141714227476559e-05, "loss": 2.5622, "step": 45445 }, { "epoch": 3.088055442315532, "grad_norm": 3.5703916549682617, "learning_rate": 6.141289577388233e-05, "loss": 2.8924, "step": 45450 }, { "epoch": 3.0883951623861936, "grad_norm": 3.0459320545196533, "learning_rate": 6.140864927299906e-05, "loss": 2.7276, "step": 45455 }, { "epoch": 3.0887348824568557, "grad_norm": 2.5620365142822266, "learning_rate": 6.140440277211577e-05, "loss": 2.6582, "step": 45460 }, { "epoch": 3.0890746025275173, "grad_norm": 2.744715690612793, "learning_rate": 6.140015627123251e-05, "loss": 2.4125, "step": 45465 }, { "epoch": 3.089414322598179, "grad_norm": 2.8061773777008057, "learning_rate": 6.139590977034923e-05, "loss": 2.522, "step": 45470 }, { "epoch": 3.089754042668841, "grad_norm": 2.6432082653045654, "learning_rate": 6.139166326946596e-05, "loss": 2.4858, "step": 45475 }, { "epoch": 3.0900937627395026, "grad_norm": 2.805016279220581, "learning_rate": 6.13874167685827e-05, "loss": 2.4771, "step": 45480 }, { "epoch": 3.0904334828101643, "grad_norm": 2.6848366260528564, "learning_rate": 6.138317026769941e-05, "loss": 2.4915, "step": 45485 }, { "epoch": 3.0907732028808264, "grad_norm": 2.7174413204193115, "learning_rate": 6.137892376681614e-05, "loss": 2.7712, "step": 45490 }, { "epoch": 3.091112922951488, "grad_norm": 3.03861927986145, "learning_rate": 6.137467726593288e-05, "loss": 2.5052, "step": 45495 }, { "epoch": 3.0914526430221496, "grad_norm": 2.9231536388397217, "learning_rate": 6.13704307650496e-05, "loss": 2.5571, "step": 45500 }, { "epoch": 3.0917923630928117, "grad_norm": 3.617368221282959, "learning_rate": 6.136618426416633e-05, "loss": 2.4332, "step": 45505 }, { "epoch": 3.0921320831634733, "grad_norm": 3.1835227012634277, "learning_rate": 6.136193776328307e-05, "loss": 2.6909, "step": 45510 }, { "epoch": 3.092471803234135, "grad_norm": 2.7364068031311035, "learning_rate": 6.135769126239978e-05, "loss": 2.8165, "step": 45515 }, { "epoch": 3.092811523304797, "grad_norm": 2.3527634143829346, "learning_rate": 6.135344476151651e-05, "loss": 2.7099, "step": 45520 }, { "epoch": 3.0931512433754587, "grad_norm": 3.39811372756958, "learning_rate": 6.134919826063325e-05, "loss": 2.6058, "step": 45525 }, { "epoch": 3.0934909634461203, "grad_norm": 3.0002284049987793, "learning_rate": 6.134495175974997e-05, "loss": 2.5486, "step": 45530 }, { "epoch": 3.0938306835167824, "grad_norm": 3.4571571350097656, "learning_rate": 6.134070525886669e-05, "loss": 2.5509, "step": 45535 }, { "epoch": 3.094170403587444, "grad_norm": 2.1406850814819336, "learning_rate": 6.133645875798343e-05, "loss": 2.7326, "step": 45540 }, { "epoch": 3.0945101236581056, "grad_norm": 2.9123282432556152, "learning_rate": 6.133221225710015e-05, "loss": 2.9172, "step": 45545 }, { "epoch": 3.0948498437287677, "grad_norm": 2.9215502738952637, "learning_rate": 6.132796575621688e-05, "loss": 2.574, "step": 45550 }, { "epoch": 3.0951895637994293, "grad_norm": 2.696094036102295, "learning_rate": 6.13237192553336e-05, "loss": 2.8012, "step": 45555 }, { "epoch": 3.095529283870091, "grad_norm": 3.2326924800872803, "learning_rate": 6.131947275445033e-05, "loss": 2.5008, "step": 45560 }, { "epoch": 3.095869003940753, "grad_norm": 2.9536123275756836, "learning_rate": 6.131522625356706e-05, "loss": 2.6371, "step": 45565 }, { "epoch": 3.0962087240114147, "grad_norm": 2.5968735218048096, "learning_rate": 6.131097975268379e-05, "loss": 2.6024, "step": 45570 }, { "epoch": 3.0965484440820763, "grad_norm": 2.4624617099761963, "learning_rate": 6.130673325180052e-05, "loss": 2.6952, "step": 45575 }, { "epoch": 3.096888164152738, "grad_norm": 2.9417290687561035, "learning_rate": 6.130248675091725e-05, "loss": 2.3725, "step": 45580 }, { "epoch": 3.0972278842234, "grad_norm": 2.5840747356414795, "learning_rate": 6.129824025003397e-05, "loss": 2.9206, "step": 45585 }, { "epoch": 3.0975676042940616, "grad_norm": 2.9998762607574463, "learning_rate": 6.12939937491507e-05, "loss": 2.487, "step": 45590 }, { "epoch": 3.0979073243647233, "grad_norm": 2.052476167678833, "learning_rate": 6.128974724826743e-05, "loss": 2.5724, "step": 45595 }, { "epoch": 3.0982470444353853, "grad_norm": 2.945603609085083, "learning_rate": 6.128550074738416e-05, "loss": 2.7141, "step": 45600 }, { "epoch": 3.098586764506047, "grad_norm": 1.9669088125228882, "learning_rate": 6.128125424650089e-05, "loss": 2.6215, "step": 45605 }, { "epoch": 3.0989264845767086, "grad_norm": 2.5967369079589844, "learning_rate": 6.127700774561761e-05, "loss": 2.5661, "step": 45610 }, { "epoch": 3.0992662046473707, "grad_norm": 3.5900046825408936, "learning_rate": 6.127276124473434e-05, "loss": 2.34, "step": 45615 }, { "epoch": 3.0996059247180323, "grad_norm": 2.163970470428467, "learning_rate": 6.126851474385107e-05, "loss": 2.6454, "step": 45620 }, { "epoch": 3.099945644788694, "grad_norm": 2.701467275619507, "learning_rate": 6.12642682429678e-05, "loss": 2.4211, "step": 45625 }, { "epoch": 3.100285364859356, "grad_norm": 2.882798671722412, "learning_rate": 6.126002174208453e-05, "loss": 2.6015, "step": 45630 }, { "epoch": 3.1006250849300176, "grad_norm": 2.773341655731201, "learning_rate": 6.125577524120125e-05, "loss": 2.5339, "step": 45635 }, { "epoch": 3.1009648050006793, "grad_norm": 2.7573776245117188, "learning_rate": 6.125152874031798e-05, "loss": 2.3732, "step": 45640 }, { "epoch": 3.1013045250713414, "grad_norm": 3.6085715293884277, "learning_rate": 6.124728223943471e-05, "loss": 2.6479, "step": 45645 }, { "epoch": 3.101644245142003, "grad_norm": 3.402090549468994, "learning_rate": 6.124303573855144e-05, "loss": 2.6077, "step": 45650 }, { "epoch": 3.1019839652126646, "grad_norm": 2.848862886428833, "learning_rate": 6.123878923766817e-05, "loss": 2.6829, "step": 45655 }, { "epoch": 3.1023236852833267, "grad_norm": 3.6187217235565186, "learning_rate": 6.12345427367849e-05, "loss": 2.74, "step": 45660 }, { "epoch": 3.1026634053539883, "grad_norm": 3.0635950565338135, "learning_rate": 6.123029623590162e-05, "loss": 2.5582, "step": 45665 }, { "epoch": 3.10300312542465, "grad_norm": 2.7168378829956055, "learning_rate": 6.122604973501834e-05, "loss": 2.4507, "step": 45670 }, { "epoch": 3.103342845495312, "grad_norm": 2.2598958015441895, "learning_rate": 6.122180323413508e-05, "loss": 2.3321, "step": 45675 }, { "epoch": 3.1036825655659737, "grad_norm": 2.9496755599975586, "learning_rate": 6.12175567332518e-05, "loss": 2.6717, "step": 45680 }, { "epoch": 3.1040222856366353, "grad_norm": 2.3091912269592285, "learning_rate": 6.121331023236852e-05, "loss": 2.7839, "step": 45685 }, { "epoch": 3.1043620057072974, "grad_norm": 2.830721378326416, "learning_rate": 6.120906373148526e-05, "loss": 2.4916, "step": 45690 }, { "epoch": 3.104701725777959, "grad_norm": 3.7131669521331787, "learning_rate": 6.120481723060199e-05, "loss": 2.4428, "step": 45695 }, { "epoch": 3.1050414458486206, "grad_norm": 3.2363569736480713, "learning_rate": 6.12005707297187e-05, "loss": 2.8485, "step": 45700 }, { "epoch": 3.1053811659192827, "grad_norm": 3.252998113632202, "learning_rate": 6.119632422883545e-05, "loss": 2.6082, "step": 45705 }, { "epoch": 3.1057208859899443, "grad_norm": 3.007309913635254, "learning_rate": 6.119207772795217e-05, "loss": 2.8104, "step": 45710 }, { "epoch": 3.106060606060606, "grad_norm": 3.1706295013427734, "learning_rate": 6.11878312270689e-05, "loss": 2.5451, "step": 45715 }, { "epoch": 3.106400326131268, "grad_norm": 2.052898406982422, "learning_rate": 6.118358472618563e-05, "loss": 2.5148, "step": 45720 }, { "epoch": 3.1067400462019297, "grad_norm": 2.6658923625946045, "learning_rate": 6.117933822530236e-05, "loss": 2.6117, "step": 45725 }, { "epoch": 3.1070797662725913, "grad_norm": 2.8422820568084717, "learning_rate": 6.117509172441909e-05, "loss": 2.4492, "step": 45730 }, { "epoch": 3.107419486343253, "grad_norm": 3.247668504714966, "learning_rate": 6.117084522353581e-05, "loss": 2.7131, "step": 45735 }, { "epoch": 3.107759206413915, "grad_norm": 3.4430460929870605, "learning_rate": 6.116659872265254e-05, "loss": 2.6487, "step": 45740 }, { "epoch": 3.1080989264845766, "grad_norm": 3.7621049880981445, "learning_rate": 6.116235222176927e-05, "loss": 2.7751, "step": 45745 }, { "epoch": 3.1084386465552383, "grad_norm": 2.9972593784332275, "learning_rate": 6.1158105720886e-05, "loss": 2.7394, "step": 45750 }, { "epoch": 3.1087783666259003, "grad_norm": 2.871457099914551, "learning_rate": 6.115385922000271e-05, "loss": 2.525, "step": 45755 }, { "epoch": 3.109118086696562, "grad_norm": 3.2024028301239014, "learning_rate": 6.114961271911945e-05, "loss": 2.6765, "step": 45760 }, { "epoch": 3.1094578067672236, "grad_norm": 2.9891891479492188, "learning_rate": 6.114536621823618e-05, "loss": 2.4177, "step": 45765 }, { "epoch": 3.1097975268378857, "grad_norm": 2.8756539821624756, "learning_rate": 6.11411197173529e-05, "loss": 2.487, "step": 45770 }, { "epoch": 3.1101372469085473, "grad_norm": 2.498920440673828, "learning_rate": 6.113687321646964e-05, "loss": 2.6385, "step": 45775 }, { "epoch": 3.110476966979209, "grad_norm": 2.273944854736328, "learning_rate": 6.113262671558637e-05, "loss": 2.8035, "step": 45780 }, { "epoch": 3.110816687049871, "grad_norm": 2.8879127502441406, "learning_rate": 6.112838021470308e-05, "loss": 2.6476, "step": 45785 }, { "epoch": 3.1111564071205327, "grad_norm": 2.700998306274414, "learning_rate": 6.112413371381982e-05, "loss": 2.8117, "step": 45790 }, { "epoch": 3.1114961271911943, "grad_norm": 3.4575271606445312, "learning_rate": 6.111988721293655e-05, "loss": 2.7146, "step": 45795 }, { "epoch": 3.1118358472618564, "grad_norm": 2.947744131088257, "learning_rate": 6.111564071205326e-05, "loss": 2.7659, "step": 45800 }, { "epoch": 3.112175567332518, "grad_norm": 2.7048606872558594, "learning_rate": 6.111139421117e-05, "loss": 2.4225, "step": 45805 }, { "epoch": 3.1125152874031796, "grad_norm": 2.125182867050171, "learning_rate": 6.110714771028673e-05, "loss": 2.3727, "step": 45810 }, { "epoch": 3.1128550074738417, "grad_norm": 2.526522397994995, "learning_rate": 6.110290120940345e-05, "loss": 2.5548, "step": 45815 }, { "epoch": 3.1131947275445033, "grad_norm": 2.734128952026367, "learning_rate": 6.109865470852019e-05, "loss": 2.7714, "step": 45820 }, { "epoch": 3.113534447615165, "grad_norm": 2.5222437381744385, "learning_rate": 6.10944082076369e-05, "loss": 2.5369, "step": 45825 }, { "epoch": 3.113874167685827, "grad_norm": 1.9560060501098633, "learning_rate": 6.109016170675363e-05, "loss": 2.6618, "step": 45830 }, { "epoch": 3.1142138877564887, "grad_norm": 2.7506015300750732, "learning_rate": 6.108591520587037e-05, "loss": 2.641, "step": 45835 }, { "epoch": 3.1145536078271503, "grad_norm": 2.593543529510498, "learning_rate": 6.108166870498709e-05, "loss": 2.7658, "step": 45840 }, { "epoch": 3.1148933278978124, "grad_norm": 2.3858141899108887, "learning_rate": 6.107742220410382e-05, "loss": 2.5818, "step": 45845 }, { "epoch": 3.115233047968474, "grad_norm": 3.0611634254455566, "learning_rate": 6.107317570322056e-05, "loss": 2.7578, "step": 45850 }, { "epoch": 3.1155727680391356, "grad_norm": 2.9577865600585938, "learning_rate": 6.106892920233727e-05, "loss": 2.5854, "step": 45855 }, { "epoch": 3.1159124881097977, "grad_norm": 2.941765308380127, "learning_rate": 6.1064682701454e-05, "loss": 2.5156, "step": 45860 }, { "epoch": 3.1162522081804593, "grad_norm": 3.185497522354126, "learning_rate": 6.106043620057074e-05, "loss": 2.4807, "step": 45865 }, { "epoch": 3.116591928251121, "grad_norm": 3.559756278991699, "learning_rate": 6.105618969968746e-05, "loss": 2.5456, "step": 45870 }, { "epoch": 3.116931648321783, "grad_norm": 2.844498872756958, "learning_rate": 6.105194319880418e-05, "loss": 2.7003, "step": 45875 }, { "epoch": 3.1172713683924447, "grad_norm": 2.324758768081665, "learning_rate": 6.104769669792093e-05, "loss": 2.8581, "step": 45880 }, { "epoch": 3.1176110884631063, "grad_norm": 3.571455478668213, "learning_rate": 6.104345019703764e-05, "loss": 2.4852, "step": 45885 }, { "epoch": 3.1179508085337684, "grad_norm": 3.16640567779541, "learning_rate": 6.103920369615437e-05, "loss": 2.6321, "step": 45890 }, { "epoch": 3.11829052860443, "grad_norm": 4.770359516143799, "learning_rate": 6.10349571952711e-05, "loss": 2.5786, "step": 45895 }, { "epoch": 3.1186302486750916, "grad_norm": 3.6400671005249023, "learning_rate": 6.1030710694387824e-05, "loss": 2.7922, "step": 45900 }, { "epoch": 3.1189699687457537, "grad_norm": 2.405118942260742, "learning_rate": 6.102646419350455e-05, "loss": 2.8324, "step": 45905 }, { "epoch": 3.1193096888164153, "grad_norm": 3.0265142917633057, "learning_rate": 6.102221769262129e-05, "loss": 2.6403, "step": 45910 }, { "epoch": 3.119649408887077, "grad_norm": 3.249288320541382, "learning_rate": 6.101797119173801e-05, "loss": 2.6813, "step": 45915 }, { "epoch": 3.1199891289577386, "grad_norm": 2.414825677871704, "learning_rate": 6.1013724690854736e-05, "loss": 2.5937, "step": 45920 }, { "epoch": 3.1203288490284007, "grad_norm": 2.8772614002227783, "learning_rate": 6.100947818997147e-05, "loss": 2.4634, "step": 45925 }, { "epoch": 3.1206685690990623, "grad_norm": 3.9665236473083496, "learning_rate": 6.100523168908819e-05, "loss": 2.5666, "step": 45930 }, { "epoch": 3.121008289169724, "grad_norm": 2.99753999710083, "learning_rate": 6.100098518820492e-05, "loss": 2.61, "step": 45935 }, { "epoch": 3.121348009240386, "grad_norm": 2.929417848587036, "learning_rate": 6.0996738687321655e-05, "loss": 2.5714, "step": 45940 }, { "epoch": 3.1216877293110477, "grad_norm": 3.0352039337158203, "learning_rate": 6.0992492186438376e-05, "loss": 2.6582, "step": 45945 }, { "epoch": 3.1220274493817093, "grad_norm": 3.2585031986236572, "learning_rate": 6.0988245685555104e-05, "loss": 2.7204, "step": 45950 }, { "epoch": 3.1223671694523714, "grad_norm": 2.713040828704834, "learning_rate": 6.098399918467184e-05, "loss": 2.3733, "step": 45955 }, { "epoch": 3.122706889523033, "grad_norm": 2.8852083683013916, "learning_rate": 6.097975268378856e-05, "loss": 2.5948, "step": 45960 }, { "epoch": 3.1230466095936946, "grad_norm": 2.478910446166992, "learning_rate": 6.097550618290528e-05, "loss": 2.6078, "step": 45965 }, { "epoch": 3.1233863296643567, "grad_norm": 4.074156284332275, "learning_rate": 6.0971259682022016e-05, "loss": 2.6734, "step": 45970 }, { "epoch": 3.1237260497350183, "grad_norm": 2.921963691711426, "learning_rate": 6.0967013181138744e-05, "loss": 2.6696, "step": 45975 }, { "epoch": 3.12406576980568, "grad_norm": 2.515371084213257, "learning_rate": 6.0962766680255466e-05, "loss": 2.5766, "step": 45980 }, { "epoch": 3.124405489876342, "grad_norm": 2.6420416831970215, "learning_rate": 6.09585201793722e-05, "loss": 2.7474, "step": 45985 }, { "epoch": 3.1247452099470037, "grad_norm": 2.637143850326538, "learning_rate": 6.095427367848893e-05, "loss": 2.416, "step": 45990 }, { "epoch": 3.1250849300176653, "grad_norm": 2.5644690990448, "learning_rate": 6.095002717760565e-05, "loss": 2.7429, "step": 45995 }, { "epoch": 3.1254246500883274, "grad_norm": 2.780679225921631, "learning_rate": 6.0945780676722384e-05, "loss": 2.7792, "step": 46000 }, { "epoch": 3.125764370158989, "grad_norm": 2.7984566688537598, "learning_rate": 6.094153417583911e-05, "loss": 2.5765, "step": 46005 }, { "epoch": 3.1261040902296506, "grad_norm": 2.4595868587493896, "learning_rate": 6.0937287674955834e-05, "loss": 2.673, "step": 46010 }, { "epoch": 3.1264438103003127, "grad_norm": 3.374431610107422, "learning_rate": 6.093304117407257e-05, "loss": 2.6256, "step": 46015 }, { "epoch": 3.1267835303709743, "grad_norm": 3.0825657844543457, "learning_rate": 6.0928794673189296e-05, "loss": 2.5943, "step": 46020 }, { "epoch": 3.127123250441636, "grad_norm": 2.5380446910858154, "learning_rate": 6.092454817230602e-05, "loss": 2.561, "step": 46025 }, { "epoch": 3.127462970512298, "grad_norm": 2.1272902488708496, "learning_rate": 6.092030167142275e-05, "loss": 2.6787, "step": 46030 }, { "epoch": 3.1278026905829597, "grad_norm": 3.340616226196289, "learning_rate": 6.0916055170539474e-05, "loss": 2.3975, "step": 46035 }, { "epoch": 3.1281424106536213, "grad_norm": 3.8273916244506836, "learning_rate": 6.09118086696562e-05, "loss": 2.7072, "step": 46040 }, { "epoch": 3.1284821307242834, "grad_norm": 2.539577007293701, "learning_rate": 6.0907562168772936e-05, "loss": 2.5249, "step": 46045 }, { "epoch": 3.128821850794945, "grad_norm": 3.0822501182556152, "learning_rate": 6.090331566788966e-05, "loss": 2.6918, "step": 46050 }, { "epoch": 3.1291615708656066, "grad_norm": 2.9147047996520996, "learning_rate": 6.089906916700639e-05, "loss": 2.3968, "step": 46055 }, { "epoch": 3.1295012909362687, "grad_norm": 2.658662796020508, "learning_rate": 6.089482266612312e-05, "loss": 2.5841, "step": 46060 }, { "epoch": 3.1298410110069304, "grad_norm": 3.306494951248169, "learning_rate": 6.089057616523984e-05, "loss": 2.5609, "step": 46065 }, { "epoch": 3.130180731077592, "grad_norm": 2.9373531341552734, "learning_rate": 6.0886329664356576e-05, "loss": 2.7832, "step": 46070 }, { "epoch": 3.1305204511482536, "grad_norm": 3.3074514865875244, "learning_rate": 6.0882083163473304e-05, "loss": 2.568, "step": 46075 }, { "epoch": 3.1308601712189157, "grad_norm": 3.301267385482788, "learning_rate": 6.0877836662590026e-05, "loss": 2.5623, "step": 46080 }, { "epoch": 3.1311998912895773, "grad_norm": 2.230637788772583, "learning_rate": 6.087359016170676e-05, "loss": 2.4402, "step": 46085 }, { "epoch": 3.131539611360239, "grad_norm": 2.6415765285491943, "learning_rate": 6.086934366082349e-05, "loss": 2.916, "step": 46090 }, { "epoch": 3.131879331430901, "grad_norm": 2.7084083557128906, "learning_rate": 6.086509715994021e-05, "loss": 2.7842, "step": 46095 }, { "epoch": 3.1322190515015627, "grad_norm": 2.444410800933838, "learning_rate": 6.0860850659056945e-05, "loss": 2.7631, "step": 46100 }, { "epoch": 3.1325587715722243, "grad_norm": 3.3677573204040527, "learning_rate": 6.0856604158173666e-05, "loss": 2.9391, "step": 46105 }, { "epoch": 3.1328984916428864, "grad_norm": 3.0019562244415283, "learning_rate": 6.0852357657290394e-05, "loss": 2.6072, "step": 46110 }, { "epoch": 3.133238211713548, "grad_norm": 3.2771778106689453, "learning_rate": 6.084811115640713e-05, "loss": 2.5026, "step": 46115 }, { "epoch": 3.1335779317842096, "grad_norm": 2.888606548309326, "learning_rate": 6.084386465552385e-05, "loss": 2.5867, "step": 46120 }, { "epoch": 3.1339176518548717, "grad_norm": 3.0068769454956055, "learning_rate": 6.083961815464058e-05, "loss": 2.9474, "step": 46125 }, { "epoch": 3.1342573719255333, "grad_norm": 2.8002054691314697, "learning_rate": 6.083537165375731e-05, "loss": 2.3265, "step": 46130 }, { "epoch": 3.134597091996195, "grad_norm": 3.379180669784546, "learning_rate": 6.0831125152874034e-05, "loss": 2.6085, "step": 46135 }, { "epoch": 3.134936812066857, "grad_norm": 2.4681875705718994, "learning_rate": 6.082687865199076e-05, "loss": 2.4718, "step": 46140 }, { "epoch": 3.1352765321375187, "grad_norm": 3.189727306365967, "learning_rate": 6.0822632151107497e-05, "loss": 2.7203, "step": 46145 }, { "epoch": 3.1356162522081803, "grad_norm": 2.8403828144073486, "learning_rate": 6.081838565022422e-05, "loss": 2.5077, "step": 46150 }, { "epoch": 3.1359559722788424, "grad_norm": 2.361598491668701, "learning_rate": 6.0814139149340946e-05, "loss": 2.657, "step": 46155 }, { "epoch": 3.136295692349504, "grad_norm": 3.1540324687957764, "learning_rate": 6.080989264845768e-05, "loss": 2.4752, "step": 46160 }, { "epoch": 3.1366354124201656, "grad_norm": 3.0682966709136963, "learning_rate": 6.08056461475744e-05, "loss": 2.6416, "step": 46165 }, { "epoch": 3.1369751324908277, "grad_norm": 2.4763803482055664, "learning_rate": 6.080139964669112e-05, "loss": 2.7262, "step": 46170 }, { "epoch": 3.1373148525614893, "grad_norm": 3.4504480361938477, "learning_rate": 6.0797153145807865e-05, "loss": 2.7592, "step": 46175 }, { "epoch": 3.137654572632151, "grad_norm": 2.9379894733428955, "learning_rate": 6.0792906644924586e-05, "loss": 2.5887, "step": 46180 }, { "epoch": 3.137994292702813, "grad_norm": 3.4191436767578125, "learning_rate": 6.078866014404131e-05, "loss": 2.7007, "step": 46185 }, { "epoch": 3.1383340127734747, "grad_norm": 3.2445383071899414, "learning_rate": 6.078441364315804e-05, "loss": 2.7622, "step": 46190 }, { "epoch": 3.1386737328441363, "grad_norm": 2.66401743888855, "learning_rate": 6.078016714227477e-05, "loss": 2.7598, "step": 46195 }, { "epoch": 3.1390134529147984, "grad_norm": 2.8560678958892822, "learning_rate": 6.077592064139149e-05, "loss": 2.5249, "step": 46200 }, { "epoch": 3.13935317298546, "grad_norm": 2.8705074787139893, "learning_rate": 6.0771674140508226e-05, "loss": 2.5676, "step": 46205 }, { "epoch": 3.1396928930561216, "grad_norm": 3.117645502090454, "learning_rate": 6.0767427639624954e-05, "loss": 2.5626, "step": 46210 }, { "epoch": 3.1400326131267837, "grad_norm": 2.548863410949707, "learning_rate": 6.0763181138741675e-05, "loss": 2.959, "step": 46215 }, { "epoch": 3.1403723331974454, "grad_norm": 2.386326551437378, "learning_rate": 6.075893463785841e-05, "loss": 2.5988, "step": 46220 }, { "epoch": 3.140712053268107, "grad_norm": 2.5917367935180664, "learning_rate": 6.075468813697514e-05, "loss": 2.6189, "step": 46225 }, { "epoch": 3.141051773338769, "grad_norm": 3.436851739883423, "learning_rate": 6.075044163609186e-05, "loss": 2.4757, "step": 46230 }, { "epoch": 3.1413914934094307, "grad_norm": 3.2527499198913574, "learning_rate": 6.0746195135208594e-05, "loss": 2.6168, "step": 46235 }, { "epoch": 3.1417312134800923, "grad_norm": 2.406264543533325, "learning_rate": 6.0741948634325315e-05, "loss": 2.6203, "step": 46240 }, { "epoch": 3.1420709335507544, "grad_norm": 2.943305492401123, "learning_rate": 6.073770213344204e-05, "loss": 2.4729, "step": 46245 }, { "epoch": 3.142410653621416, "grad_norm": 2.9246890544891357, "learning_rate": 6.073345563255878e-05, "loss": 2.5757, "step": 46250 }, { "epoch": 3.1427503736920777, "grad_norm": 4.13363790512085, "learning_rate": 6.07292091316755e-05, "loss": 2.6068, "step": 46255 }, { "epoch": 3.1430900937627397, "grad_norm": 3.5764694213867188, "learning_rate": 6.072496263079223e-05, "loss": 2.7325, "step": 46260 }, { "epoch": 3.1434298138334014, "grad_norm": 3.263214588165283, "learning_rate": 6.072071612990896e-05, "loss": 2.5769, "step": 46265 }, { "epoch": 3.143769533904063, "grad_norm": 4.079758644104004, "learning_rate": 6.071646962902568e-05, "loss": 2.8195, "step": 46270 }, { "epoch": 3.1441092539747246, "grad_norm": 3.225309371948242, "learning_rate": 6.071222312814241e-05, "loss": 2.8302, "step": 46275 }, { "epoch": 3.1444489740453867, "grad_norm": 3.0685908794403076, "learning_rate": 6.0707976627259146e-05, "loss": 2.6666, "step": 46280 }, { "epoch": 3.1447886941160483, "grad_norm": 2.8438823223114014, "learning_rate": 6.070373012637587e-05, "loss": 2.5286, "step": 46285 }, { "epoch": 3.14512841418671, "grad_norm": 2.0901293754577637, "learning_rate": 6.0699483625492595e-05, "loss": 2.7288, "step": 46290 }, { "epoch": 3.145468134257372, "grad_norm": 3.2252795696258545, "learning_rate": 6.069523712460933e-05, "loss": 2.4783, "step": 46295 }, { "epoch": 3.1458078543280337, "grad_norm": 3.683256149291992, "learning_rate": 6.069099062372605e-05, "loss": 2.8689, "step": 46300 }, { "epoch": 3.1461475743986953, "grad_norm": 3.2921340465545654, "learning_rate": 6.068674412284277e-05, "loss": 2.6921, "step": 46305 }, { "epoch": 3.1464872944693574, "grad_norm": 3.202117681503296, "learning_rate": 6.0682497621959514e-05, "loss": 2.3954, "step": 46310 }, { "epoch": 3.146827014540019, "grad_norm": 2.2990455627441406, "learning_rate": 6.0678251121076235e-05, "loss": 2.546, "step": 46315 }, { "epoch": 3.1471667346106806, "grad_norm": 2.686115026473999, "learning_rate": 6.0674004620192957e-05, "loss": 2.4629, "step": 46320 }, { "epoch": 3.1475064546813427, "grad_norm": 4.0229949951171875, "learning_rate": 6.066975811930969e-05, "loss": 2.7715, "step": 46325 }, { "epoch": 3.1478461747520043, "grad_norm": 2.9702203273773193, "learning_rate": 6.066551161842642e-05, "loss": 2.7271, "step": 46330 }, { "epoch": 3.148185894822666, "grad_norm": 3.958265542984009, "learning_rate": 6.066126511754314e-05, "loss": 2.6183, "step": 46335 }, { "epoch": 3.148525614893328, "grad_norm": 2.902881145477295, "learning_rate": 6.0657018616659875e-05, "loss": 2.4809, "step": 46340 }, { "epoch": 3.1488653349639897, "grad_norm": 3.147587776184082, "learning_rate": 6.06527721157766e-05, "loss": 2.6318, "step": 46345 }, { "epoch": 3.1492050550346513, "grad_norm": 2.8482885360717773, "learning_rate": 6.0648525614893325e-05, "loss": 2.7055, "step": 46350 }, { "epoch": 3.1495447751053134, "grad_norm": 3.3120644092559814, "learning_rate": 6.064427911401006e-05, "loss": 2.7037, "step": 46355 }, { "epoch": 3.149884495175975, "grad_norm": 4.015407562255859, "learning_rate": 6.064003261312679e-05, "loss": 2.7219, "step": 46360 }, { "epoch": 3.1502242152466366, "grad_norm": 2.9041194915771484, "learning_rate": 6.063578611224351e-05, "loss": 2.6527, "step": 46365 }, { "epoch": 3.1505639353172987, "grad_norm": 3.273566484451294, "learning_rate": 6.063153961136024e-05, "loss": 2.4679, "step": 46370 }, { "epoch": 3.1509036553879604, "grad_norm": 2.5031065940856934, "learning_rate": 6.062729311047697e-05, "loss": 2.509, "step": 46375 }, { "epoch": 3.151243375458622, "grad_norm": 2.940049409866333, "learning_rate": 6.062304660959369e-05, "loss": 2.7079, "step": 46380 }, { "epoch": 3.151583095529284, "grad_norm": 2.582793712615967, "learning_rate": 6.061880010871043e-05, "loss": 2.865, "step": 46385 }, { "epoch": 3.1519228155999457, "grad_norm": 2.4718544483184814, "learning_rate": 6.061455360782715e-05, "loss": 2.5735, "step": 46390 }, { "epoch": 3.1522625356706073, "grad_norm": 2.3777220249176025, "learning_rate": 6.0610307106943883e-05, "loss": 2.6901, "step": 46395 }, { "epoch": 3.1526022557412694, "grad_norm": 3.173853874206543, "learning_rate": 6.060606060606061e-05, "loss": 2.8708, "step": 46400 }, { "epoch": 3.152941975811931, "grad_norm": 4.325406074523926, "learning_rate": 6.060181410517733e-05, "loss": 2.5439, "step": 46405 }, { "epoch": 3.1532816958825927, "grad_norm": 2.367835521697998, "learning_rate": 6.059756760429407e-05, "loss": 2.7214, "step": 46410 }, { "epoch": 3.1536214159532543, "grad_norm": 2.8184056282043457, "learning_rate": 6.0593321103410795e-05, "loss": 2.3885, "step": 46415 }, { "epoch": 3.1539611360239164, "grad_norm": 3.236341953277588, "learning_rate": 6.058907460252752e-05, "loss": 2.711, "step": 46420 }, { "epoch": 3.154300856094578, "grad_norm": 2.9092044830322266, "learning_rate": 6.058482810164425e-05, "loss": 2.7263, "step": 46425 }, { "epoch": 3.1546405761652396, "grad_norm": 3.0085766315460205, "learning_rate": 6.058058160076098e-05, "loss": 2.6238, "step": 46430 }, { "epoch": 3.1549802962359017, "grad_norm": 3.223471164703369, "learning_rate": 6.05763350998777e-05, "loss": 2.4841, "step": 46435 }, { "epoch": 3.1553200163065633, "grad_norm": 3.217090129852295, "learning_rate": 6.0572088598994435e-05, "loss": 2.6684, "step": 46440 }, { "epoch": 3.155659736377225, "grad_norm": 2.107914447784424, "learning_rate": 6.0567842098111163e-05, "loss": 2.7807, "step": 46445 }, { "epoch": 3.155999456447887, "grad_norm": 2.479245185852051, "learning_rate": 6.0563595597227885e-05, "loss": 2.657, "step": 46450 }, { "epoch": 3.1563391765185487, "grad_norm": 2.8167362213134766, "learning_rate": 6.055934909634462e-05, "loss": 2.8612, "step": 46455 }, { "epoch": 3.1566788965892103, "grad_norm": 3.1863181591033936, "learning_rate": 6.055510259546134e-05, "loss": 2.5623, "step": 46460 }, { "epoch": 3.1570186166598724, "grad_norm": 2.191195487976074, "learning_rate": 6.055085609457807e-05, "loss": 2.6234, "step": 46465 }, { "epoch": 3.157358336730534, "grad_norm": 3.1053736209869385, "learning_rate": 6.0546609593694804e-05, "loss": 2.7031, "step": 46470 }, { "epoch": 3.1576980568011956, "grad_norm": 2.8911895751953125, "learning_rate": 6.0542363092811525e-05, "loss": 2.4208, "step": 46475 }, { "epoch": 3.1580377768718577, "grad_norm": 3.010591506958008, "learning_rate": 6.053811659192825e-05, "loss": 2.4733, "step": 46480 }, { "epoch": 3.1583774969425193, "grad_norm": 2.362907886505127, "learning_rate": 6.053387009104499e-05, "loss": 2.5439, "step": 46485 }, { "epoch": 3.158717217013181, "grad_norm": 2.7823867797851562, "learning_rate": 6.052962359016171e-05, "loss": 2.5877, "step": 46490 }, { "epoch": 3.159056937083843, "grad_norm": 2.911181688308716, "learning_rate": 6.052537708927844e-05, "loss": 2.694, "step": 46495 }, { "epoch": 3.1593966571545047, "grad_norm": 2.212508201599121, "learning_rate": 6.052113058839517e-05, "loss": 2.2516, "step": 46500 }, { "epoch": 3.1597363772251663, "grad_norm": 3.0515520572662354, "learning_rate": 6.051688408751189e-05, "loss": 2.7793, "step": 46505 }, { "epoch": 3.1600760972958284, "grad_norm": 3.667433023452759, "learning_rate": 6.051263758662862e-05, "loss": 2.7773, "step": 46510 }, { "epoch": 3.16041581736649, "grad_norm": 3.6117522716522217, "learning_rate": 6.0508391085745356e-05, "loss": 2.6802, "step": 46515 }, { "epoch": 3.1607555374371517, "grad_norm": 2.9189257621765137, "learning_rate": 6.050414458486208e-05, "loss": 2.8876, "step": 46520 }, { "epoch": 3.1610952575078137, "grad_norm": 3.2988877296447754, "learning_rate": 6.04998980839788e-05, "loss": 2.5486, "step": 46525 }, { "epoch": 3.1614349775784754, "grad_norm": 3.0867862701416016, "learning_rate": 6.049565158309553e-05, "loss": 2.6567, "step": 46530 }, { "epoch": 3.161774697649137, "grad_norm": 2.158282518386841, "learning_rate": 6.049140508221226e-05, "loss": 2.7339, "step": 46535 }, { "epoch": 3.162114417719799, "grad_norm": 2.750734567642212, "learning_rate": 6.048715858132898e-05, "loss": 2.4309, "step": 46540 }, { "epoch": 3.1624541377904607, "grad_norm": 3.0486252307891846, "learning_rate": 6.048291208044572e-05, "loss": 2.6383, "step": 46545 }, { "epoch": 3.1627938578611223, "grad_norm": 2.469954490661621, "learning_rate": 6.0478665579562445e-05, "loss": 2.7054, "step": 46550 }, { "epoch": 3.1631335779317844, "grad_norm": 3.7965927124023438, "learning_rate": 6.0474419078679166e-05, "loss": 2.6308, "step": 46555 }, { "epoch": 3.163473298002446, "grad_norm": 3.386059284210205, "learning_rate": 6.04701725777959e-05, "loss": 2.6602, "step": 46560 }, { "epoch": 3.1638130180731077, "grad_norm": 2.14241886138916, "learning_rate": 6.046592607691263e-05, "loss": 2.8693, "step": 46565 }, { "epoch": 3.1641527381437697, "grad_norm": 2.9677376747131348, "learning_rate": 6.046167957602935e-05, "loss": 2.6457, "step": 46570 }, { "epoch": 3.1644924582144314, "grad_norm": 2.93597149848938, "learning_rate": 6.0457433075146085e-05, "loss": 2.8831, "step": 46575 }, { "epoch": 3.164832178285093, "grad_norm": 2.504652976989746, "learning_rate": 6.045318657426281e-05, "loss": 2.7166, "step": 46580 }, { "epoch": 3.165171898355755, "grad_norm": 3.443554162979126, "learning_rate": 6.0448940073379534e-05, "loss": 2.5276, "step": 46585 }, { "epoch": 3.1655116184264167, "grad_norm": 3.1100313663482666, "learning_rate": 6.044469357249627e-05, "loss": 2.5886, "step": 46590 }, { "epoch": 3.1658513384970783, "grad_norm": 2.833468437194824, "learning_rate": 6.044044707161299e-05, "loss": 2.8514, "step": 46595 }, { "epoch": 3.1661910585677404, "grad_norm": 2.742321014404297, "learning_rate": 6.043620057072972e-05, "loss": 2.6156, "step": 46600 }, { "epoch": 3.166530778638402, "grad_norm": 2.7438206672668457, "learning_rate": 6.043195406984645e-05, "loss": 2.701, "step": 46605 }, { "epoch": 3.1668704987090637, "grad_norm": 3.4980123043060303, "learning_rate": 6.0427707568963174e-05, "loss": 2.7137, "step": 46610 }, { "epoch": 3.1672102187797253, "grad_norm": 3.556279182434082, "learning_rate": 6.04234610680799e-05, "loss": 2.6034, "step": 46615 }, { "epoch": 3.1675499388503874, "grad_norm": 2.927478551864624, "learning_rate": 6.041921456719664e-05, "loss": 2.6684, "step": 46620 }, { "epoch": 3.167889658921049, "grad_norm": 2.9749529361724854, "learning_rate": 6.041496806631336e-05, "loss": 2.9288, "step": 46625 }, { "epoch": 3.1682293789917106, "grad_norm": 2.6125893592834473, "learning_rate": 6.0410721565430086e-05, "loss": 2.4815, "step": 46630 }, { "epoch": 3.1685690990623727, "grad_norm": 2.615388870239258, "learning_rate": 6.040647506454682e-05, "loss": 2.833, "step": 46635 }, { "epoch": 3.1689088191330343, "grad_norm": 2.36024808883667, "learning_rate": 6.040222856366354e-05, "loss": 2.7118, "step": 46640 }, { "epoch": 3.169248539203696, "grad_norm": 2.9611659049987793, "learning_rate": 6.039798206278027e-05, "loss": 2.7553, "step": 46645 }, { "epoch": 3.169588259274358, "grad_norm": 3.24847412109375, "learning_rate": 6.0393735561897005e-05, "loss": 2.6906, "step": 46650 }, { "epoch": 3.1699279793450197, "grad_norm": 3.5068531036376953, "learning_rate": 6.0389489061013726e-05, "loss": 2.69, "step": 46655 }, { "epoch": 3.1702676994156813, "grad_norm": 2.5857746601104736, "learning_rate": 6.038524256013045e-05, "loss": 2.8106, "step": 46660 }, { "epoch": 3.1706074194863434, "grad_norm": 2.4310455322265625, "learning_rate": 6.038099605924719e-05, "loss": 2.664, "step": 46665 }, { "epoch": 3.170947139557005, "grad_norm": 3.2873823642730713, "learning_rate": 6.037674955836391e-05, "loss": 2.5636, "step": 46670 }, { "epoch": 3.1712868596276667, "grad_norm": 3.2585885524749756, "learning_rate": 6.037250305748063e-05, "loss": 2.6766, "step": 46675 }, { "epoch": 3.1716265796983287, "grad_norm": 2.7384653091430664, "learning_rate": 6.0368256556597366e-05, "loss": 2.806, "step": 46680 }, { "epoch": 3.1719662997689904, "grad_norm": 2.9798951148986816, "learning_rate": 6.0364010055714094e-05, "loss": 2.6157, "step": 46685 }, { "epoch": 3.172306019839652, "grad_norm": 3.8753621578216553, "learning_rate": 6.0359763554830816e-05, "loss": 2.602, "step": 46690 }, { "epoch": 3.172645739910314, "grad_norm": 3.012899398803711, "learning_rate": 6.035551705394755e-05, "loss": 2.9841, "step": 46695 }, { "epoch": 3.1729854599809757, "grad_norm": 2.7441647052764893, "learning_rate": 6.035127055306428e-05, "loss": 2.5284, "step": 46700 }, { "epoch": 3.1733251800516373, "grad_norm": 3.102907180786133, "learning_rate": 6.0347024052181e-05, "loss": 2.6148, "step": 46705 }, { "epoch": 3.1736649001222994, "grad_norm": 2.724086046218872, "learning_rate": 6.0342777551297734e-05, "loss": 2.5694, "step": 46710 }, { "epoch": 3.174004620192961, "grad_norm": 3.766587972640991, "learning_rate": 6.033853105041446e-05, "loss": 2.8716, "step": 46715 }, { "epoch": 3.1743443402636227, "grad_norm": 2.8343911170959473, "learning_rate": 6.0334284549531184e-05, "loss": 2.7291, "step": 46720 }, { "epoch": 3.1746840603342847, "grad_norm": 2.7937800884246826, "learning_rate": 6.033003804864792e-05, "loss": 2.5675, "step": 46725 }, { "epoch": 3.1750237804049464, "grad_norm": 2.6995415687561035, "learning_rate": 6.032579154776464e-05, "loss": 2.7218, "step": 46730 }, { "epoch": 3.175363500475608, "grad_norm": 2.9349143505096436, "learning_rate": 6.032154504688138e-05, "loss": 2.6253, "step": 46735 }, { "epoch": 3.17570322054627, "grad_norm": 4.143592834472656, "learning_rate": 6.03172985459981e-05, "loss": 2.7289, "step": 46740 }, { "epoch": 3.1760429406169317, "grad_norm": 3.768796682357788, "learning_rate": 6.0313052045114824e-05, "loss": 2.7898, "step": 46745 }, { "epoch": 3.1763826606875933, "grad_norm": 2.665308952331543, "learning_rate": 6.030880554423156e-05, "loss": 2.6752, "step": 46750 }, { "epoch": 3.176722380758255, "grad_norm": 5.868978977203369, "learning_rate": 6.0304559043348286e-05, "loss": 2.3959, "step": 46755 }, { "epoch": 3.177062100828917, "grad_norm": 2.7418627738952637, "learning_rate": 6.030031254246501e-05, "loss": 2.7054, "step": 46760 }, { "epoch": 3.1774018208995787, "grad_norm": 2.3441858291625977, "learning_rate": 6.029606604158174e-05, "loss": 2.4867, "step": 46765 }, { "epoch": 3.1777415409702403, "grad_norm": 2.8008203506469727, "learning_rate": 6.029181954069847e-05, "loss": 2.63, "step": 46770 }, { "epoch": 3.1780812610409024, "grad_norm": 2.80837345123291, "learning_rate": 6.028757303981519e-05, "loss": 2.4076, "step": 46775 }, { "epoch": 3.178420981111564, "grad_norm": 2.8351316452026367, "learning_rate": 6.0283326538931926e-05, "loss": 2.6908, "step": 46780 }, { "epoch": 3.1787607011822256, "grad_norm": 2.8012216091156006, "learning_rate": 6.0279080038048654e-05, "loss": 2.6704, "step": 46785 }, { "epoch": 3.1791004212528877, "grad_norm": 3.3776566982269287, "learning_rate": 6.0274833537165376e-05, "loss": 2.9641, "step": 46790 }, { "epoch": 3.1794401413235494, "grad_norm": 2.4579989910125732, "learning_rate": 6.027058703628211e-05, "loss": 2.6299, "step": 46795 }, { "epoch": 3.179779861394211, "grad_norm": 3.5386276245117188, "learning_rate": 6.026634053539884e-05, "loss": 2.446, "step": 46800 }, { "epoch": 3.180119581464873, "grad_norm": 2.469154119491577, "learning_rate": 6.026209403451556e-05, "loss": 2.4471, "step": 46805 }, { "epoch": 3.1804593015355347, "grad_norm": 3.4554054737091064, "learning_rate": 6.0257847533632295e-05, "loss": 2.5192, "step": 46810 }, { "epoch": 3.1807990216061963, "grad_norm": 2.854088068008423, "learning_rate": 6.0253601032749016e-05, "loss": 2.5021, "step": 46815 }, { "epoch": 3.1811387416768584, "grad_norm": 2.5398294925689697, "learning_rate": 6.0249354531865744e-05, "loss": 2.5075, "step": 46820 }, { "epoch": 3.18147846174752, "grad_norm": 2.621281862258911, "learning_rate": 6.024510803098248e-05, "loss": 2.7171, "step": 46825 }, { "epoch": 3.1818181818181817, "grad_norm": 2.33772349357605, "learning_rate": 6.02408615300992e-05, "loss": 2.6252, "step": 46830 }, { "epoch": 3.1821579018888437, "grad_norm": 2.240786075592041, "learning_rate": 6.023661502921593e-05, "loss": 2.6047, "step": 46835 }, { "epoch": 3.1824976219595054, "grad_norm": 2.9792301654815674, "learning_rate": 6.023236852833266e-05, "loss": 2.5699, "step": 46840 }, { "epoch": 3.182837342030167, "grad_norm": 2.713186740875244, "learning_rate": 6.0228122027449384e-05, "loss": 2.6341, "step": 46845 }, { "epoch": 3.183177062100829, "grad_norm": 2.556035041809082, "learning_rate": 6.022387552656611e-05, "loss": 2.7423, "step": 46850 }, { "epoch": 3.1835167821714907, "grad_norm": 2.399386405944824, "learning_rate": 6.0219629025682847e-05, "loss": 2.6734, "step": 46855 }, { "epoch": 3.1838565022421523, "grad_norm": 3.3091013431549072, "learning_rate": 6.021538252479957e-05, "loss": 2.7456, "step": 46860 }, { "epoch": 3.1841962223128144, "grad_norm": 2.699409246444702, "learning_rate": 6.0211136023916296e-05, "loss": 2.5802, "step": 46865 }, { "epoch": 3.184535942383476, "grad_norm": 2.598843574523926, "learning_rate": 6.020688952303303e-05, "loss": 2.8579, "step": 46870 }, { "epoch": 3.1848756624541377, "grad_norm": 2.656864643096924, "learning_rate": 6.020264302214975e-05, "loss": 2.6345, "step": 46875 }, { "epoch": 3.1852153825247997, "grad_norm": 2.3053998947143555, "learning_rate": 6.019839652126647e-05, "loss": 2.6689, "step": 46880 }, { "epoch": 3.1855551025954614, "grad_norm": 2.763906955718994, "learning_rate": 6.019415002038321e-05, "loss": 2.8092, "step": 46885 }, { "epoch": 3.185894822666123, "grad_norm": 3.10703706741333, "learning_rate": 6.0189903519499936e-05, "loss": 2.649, "step": 46890 }, { "epoch": 3.186234542736785, "grad_norm": 3.1034016609191895, "learning_rate": 6.018565701861666e-05, "loss": 2.4781, "step": 46895 }, { "epoch": 3.1865742628074467, "grad_norm": 3.28414249420166, "learning_rate": 6.018141051773339e-05, "loss": 2.7509, "step": 46900 }, { "epoch": 3.1869139828781083, "grad_norm": 3.0933456420898438, "learning_rate": 6.017716401685012e-05, "loss": 2.5929, "step": 46905 }, { "epoch": 3.1872537029487704, "grad_norm": 2.7512218952178955, "learning_rate": 6.017291751596684e-05, "loss": 2.6622, "step": 46910 }, { "epoch": 3.187593423019432, "grad_norm": 2.832580089569092, "learning_rate": 6.0168671015083576e-05, "loss": 2.715, "step": 46915 }, { "epoch": 3.1879331430900937, "grad_norm": 3.160163402557373, "learning_rate": 6.0164424514200304e-05, "loss": 2.7473, "step": 46920 }, { "epoch": 3.1882728631607558, "grad_norm": 2.731445550918579, "learning_rate": 6.0160178013317025e-05, "loss": 2.7898, "step": 46925 }, { "epoch": 3.1886125832314174, "grad_norm": 2.8048434257507324, "learning_rate": 6.015593151243376e-05, "loss": 2.7551, "step": 46930 }, { "epoch": 3.188952303302079, "grad_norm": 3.022279977798462, "learning_rate": 6.015168501155049e-05, "loss": 2.4656, "step": 46935 }, { "epoch": 3.189292023372741, "grad_norm": 3.146343469619751, "learning_rate": 6.014743851066721e-05, "loss": 2.6958, "step": 46940 }, { "epoch": 3.1896317434434027, "grad_norm": 2.2673187255859375, "learning_rate": 6.0143192009783944e-05, "loss": 2.5601, "step": 46945 }, { "epoch": 3.1899714635140644, "grad_norm": 2.2161688804626465, "learning_rate": 6.0138945508900665e-05, "loss": 2.5695, "step": 46950 }, { "epoch": 3.190311183584726, "grad_norm": 3.3977065086364746, "learning_rate": 6.013469900801739e-05, "loss": 2.5422, "step": 46955 }, { "epoch": 3.190650903655388, "grad_norm": 2.559920310974121, "learning_rate": 6.013045250713413e-05, "loss": 2.3275, "step": 46960 }, { "epoch": 3.1909906237260497, "grad_norm": 2.9739184379577637, "learning_rate": 6.012620600625085e-05, "loss": 2.6452, "step": 46965 }, { "epoch": 3.1913303437967113, "grad_norm": 3.771822929382324, "learning_rate": 6.012195950536758e-05, "loss": 2.4769, "step": 46970 }, { "epoch": 3.1916700638673734, "grad_norm": 2.866081953048706, "learning_rate": 6.011771300448431e-05, "loss": 2.6274, "step": 46975 }, { "epoch": 3.192009783938035, "grad_norm": 2.9803593158721924, "learning_rate": 6.011346650360103e-05, "loss": 2.5861, "step": 46980 }, { "epoch": 3.1923495040086967, "grad_norm": 2.7969908714294434, "learning_rate": 6.010922000271776e-05, "loss": 2.6243, "step": 46985 }, { "epoch": 3.1926892240793587, "grad_norm": 3.548264503479004, "learning_rate": 6.0104973501834496e-05, "loss": 2.6694, "step": 46990 }, { "epoch": 3.1930289441500204, "grad_norm": 2.8545618057250977, "learning_rate": 6.010072700095122e-05, "loss": 2.5612, "step": 46995 }, { "epoch": 3.193368664220682, "grad_norm": 2.4095969200134277, "learning_rate": 6.0096480500067945e-05, "loss": 2.4992, "step": 47000 }, { "epoch": 3.193708384291344, "grad_norm": 2.571326494216919, "learning_rate": 6.009223399918468e-05, "loss": 2.8421, "step": 47005 }, { "epoch": 3.1940481043620057, "grad_norm": 2.803874969482422, "learning_rate": 6.00879874983014e-05, "loss": 2.4833, "step": 47010 }, { "epoch": 3.1943878244326673, "grad_norm": 3.131424903869629, "learning_rate": 6.008374099741812e-05, "loss": 2.4151, "step": 47015 }, { "epoch": 3.1947275445033294, "grad_norm": 2.6473822593688965, "learning_rate": 6.007949449653486e-05, "loss": 2.7082, "step": 47020 }, { "epoch": 3.195067264573991, "grad_norm": 2.617608070373535, "learning_rate": 6.0075247995651585e-05, "loss": 2.6692, "step": 47025 }, { "epoch": 3.1954069846446527, "grad_norm": 2.908132553100586, "learning_rate": 6.0071001494768307e-05, "loss": 2.5631, "step": 47030 }, { "epoch": 3.1957467047153147, "grad_norm": 2.5100667476654053, "learning_rate": 6.006675499388504e-05, "loss": 2.6055, "step": 47035 }, { "epoch": 3.1960864247859764, "grad_norm": 3.2616758346557617, "learning_rate": 6.006250849300177e-05, "loss": 2.6471, "step": 47040 }, { "epoch": 3.196426144856638, "grad_norm": 2.74526309967041, "learning_rate": 6.005826199211849e-05, "loss": 2.4161, "step": 47045 }, { "epoch": 3.1967658649273, "grad_norm": 2.78303599357605, "learning_rate": 6.0054015491235225e-05, "loss": 2.5344, "step": 47050 }, { "epoch": 3.1971055849979617, "grad_norm": 2.6886916160583496, "learning_rate": 6.004976899035195e-05, "loss": 2.6016, "step": 47055 }, { "epoch": 3.1974453050686233, "grad_norm": 3.1653459072113037, "learning_rate": 6.0045522489468675e-05, "loss": 2.5765, "step": 47060 }, { "epoch": 3.1977850251392854, "grad_norm": 2.7483439445495605, "learning_rate": 6.004127598858541e-05, "loss": 2.7884, "step": 47065 }, { "epoch": 3.198124745209947, "grad_norm": 2.8181824684143066, "learning_rate": 6.003702948770214e-05, "loss": 2.5484, "step": 47070 }, { "epoch": 3.1984644652806087, "grad_norm": 2.921250820159912, "learning_rate": 6.003278298681887e-05, "loss": 2.8555, "step": 47075 }, { "epoch": 3.1988041853512708, "grad_norm": 2.89974308013916, "learning_rate": 6.002853648593559e-05, "loss": 2.5109, "step": 47080 }, { "epoch": 3.1991439054219324, "grad_norm": 3.1901919841766357, "learning_rate": 6.0024289985052315e-05, "loss": 2.6134, "step": 47085 }, { "epoch": 3.199483625492594, "grad_norm": 3.183979034423828, "learning_rate": 6.0020043484169056e-05, "loss": 2.7121, "step": 47090 }, { "epoch": 3.1998233455632556, "grad_norm": 2.5023550987243652, "learning_rate": 6.001579698328578e-05, "loss": 2.5158, "step": 47095 }, { "epoch": 3.2001630656339177, "grad_norm": 2.2624783515930176, "learning_rate": 6.00115504824025e-05, "loss": 2.4965, "step": 47100 }, { "epoch": 3.2005027857045794, "grad_norm": 2.8234853744506836, "learning_rate": 6.0007303981519233e-05, "loss": 2.6319, "step": 47105 }, { "epoch": 3.200842505775241, "grad_norm": 2.4671812057495117, "learning_rate": 6.000305748063596e-05, "loss": 2.9672, "step": 47110 }, { "epoch": 3.201182225845903, "grad_norm": 2.534623861312866, "learning_rate": 5.999881097975268e-05, "loss": 2.5704, "step": 47115 }, { "epoch": 3.2015219459165647, "grad_norm": 2.9752633571624756, "learning_rate": 5.999456447886942e-05, "loss": 2.7075, "step": 47120 }, { "epoch": 3.2018616659872263, "grad_norm": 2.5304839611053467, "learning_rate": 5.9990317977986145e-05, "loss": 2.3124, "step": 47125 }, { "epoch": 3.2022013860578884, "grad_norm": 2.109858512878418, "learning_rate": 5.998607147710287e-05, "loss": 2.7711, "step": 47130 }, { "epoch": 3.20254110612855, "grad_norm": 3.1976866722106934, "learning_rate": 5.99818249762196e-05, "loss": 2.782, "step": 47135 }, { "epoch": 3.2028808261992117, "grad_norm": 3.35524582862854, "learning_rate": 5.997757847533633e-05, "loss": 2.668, "step": 47140 }, { "epoch": 3.2032205462698737, "grad_norm": 2.3625519275665283, "learning_rate": 5.997333197445305e-05, "loss": 2.7226, "step": 47145 }, { "epoch": 3.2035602663405354, "grad_norm": 2.855787754058838, "learning_rate": 5.9969085473569785e-05, "loss": 2.6403, "step": 47150 }, { "epoch": 3.203899986411197, "grad_norm": 2.457564115524292, "learning_rate": 5.996483897268651e-05, "loss": 2.8457, "step": 47155 }, { "epoch": 3.204239706481859, "grad_norm": 3.0500247478485107, "learning_rate": 5.9960592471803235e-05, "loss": 2.6847, "step": 47160 }, { "epoch": 3.2045794265525207, "grad_norm": 2.4493391513824463, "learning_rate": 5.995634597091997e-05, "loss": 2.6596, "step": 47165 }, { "epoch": 3.2049191466231823, "grad_norm": 2.828385591506958, "learning_rate": 5.995209947003669e-05, "loss": 2.7445, "step": 47170 }, { "epoch": 3.2052588666938444, "grad_norm": 2.378145456314087, "learning_rate": 5.994785296915342e-05, "loss": 2.6635, "step": 47175 }, { "epoch": 3.205598586764506, "grad_norm": 2.971569538116455, "learning_rate": 5.9943606468270154e-05, "loss": 2.8385, "step": 47180 }, { "epoch": 3.2059383068351677, "grad_norm": 3.6825850009918213, "learning_rate": 5.9939359967386875e-05, "loss": 2.4758, "step": 47185 }, { "epoch": 3.2062780269058297, "grad_norm": 3.52589750289917, "learning_rate": 5.99351134665036e-05, "loss": 2.4603, "step": 47190 }, { "epoch": 3.2066177469764914, "grad_norm": 3.1186392307281494, "learning_rate": 5.993086696562034e-05, "loss": 2.5775, "step": 47195 }, { "epoch": 3.206957467047153, "grad_norm": 2.611785888671875, "learning_rate": 5.992662046473706e-05, "loss": 2.4947, "step": 47200 }, { "epoch": 3.207297187117815, "grad_norm": 3.091813802719116, "learning_rate": 5.992237396385379e-05, "loss": 2.509, "step": 47205 }, { "epoch": 3.2076369071884767, "grad_norm": 2.228663206100464, "learning_rate": 5.991812746297052e-05, "loss": 2.8218, "step": 47210 }, { "epoch": 3.2079766272591383, "grad_norm": 2.932852029800415, "learning_rate": 5.991388096208724e-05, "loss": 2.2709, "step": 47215 }, { "epoch": 3.2083163473298004, "grad_norm": 3.1143603324890137, "learning_rate": 5.9909634461203964e-05, "loss": 2.4826, "step": 47220 }, { "epoch": 3.208656067400462, "grad_norm": 2.5443098545074463, "learning_rate": 5.9905387960320706e-05, "loss": 2.5853, "step": 47225 }, { "epoch": 3.2089957874711237, "grad_norm": 2.580108642578125, "learning_rate": 5.990114145943743e-05, "loss": 2.1899, "step": 47230 }, { "epoch": 3.2093355075417858, "grad_norm": 3.556394577026367, "learning_rate": 5.989689495855415e-05, "loss": 2.7531, "step": 47235 }, { "epoch": 3.2096752276124474, "grad_norm": 3.0793557167053223, "learning_rate": 5.989264845767088e-05, "loss": 2.6961, "step": 47240 }, { "epoch": 3.210014947683109, "grad_norm": 2.407968759536743, "learning_rate": 5.988840195678761e-05, "loss": 2.8031, "step": 47245 }, { "epoch": 3.210354667753771, "grad_norm": 2.8929407596588135, "learning_rate": 5.988415545590433e-05, "loss": 2.8026, "step": 47250 }, { "epoch": 3.2106943878244327, "grad_norm": 2.791210889816284, "learning_rate": 5.987990895502107e-05, "loss": 2.6746, "step": 47255 }, { "epoch": 3.2110341078950944, "grad_norm": 3.023725986480713, "learning_rate": 5.9875662454137795e-05, "loss": 2.6678, "step": 47260 }, { "epoch": 3.2113738279657564, "grad_norm": 2.891850471496582, "learning_rate": 5.9871415953254516e-05, "loss": 2.3322, "step": 47265 }, { "epoch": 3.211713548036418, "grad_norm": 2.565315008163452, "learning_rate": 5.986716945237125e-05, "loss": 2.7056, "step": 47270 }, { "epoch": 3.2120532681070797, "grad_norm": 3.4878222942352295, "learning_rate": 5.986292295148798e-05, "loss": 2.5065, "step": 47275 }, { "epoch": 3.2123929881777418, "grad_norm": 2.976191997528076, "learning_rate": 5.98586764506047e-05, "loss": 2.5852, "step": 47280 }, { "epoch": 3.2127327082484034, "grad_norm": 2.7357935905456543, "learning_rate": 5.9854429949721435e-05, "loss": 2.4612, "step": 47285 }, { "epoch": 3.213072428319065, "grad_norm": 3.430412530899048, "learning_rate": 5.985018344883816e-05, "loss": 2.6565, "step": 47290 }, { "epoch": 3.2134121483897267, "grad_norm": 2.3980047702789307, "learning_rate": 5.9845936947954884e-05, "loss": 2.5278, "step": 47295 }, { "epoch": 3.2137518684603887, "grad_norm": 3.2201192378997803, "learning_rate": 5.984169044707162e-05, "loss": 2.7885, "step": 47300 }, { "epoch": 3.2140915885310504, "grad_norm": 2.248121976852417, "learning_rate": 5.983744394618834e-05, "loss": 2.4705, "step": 47305 }, { "epoch": 3.214431308601712, "grad_norm": 2.346435546875, "learning_rate": 5.983319744530507e-05, "loss": 2.4563, "step": 47310 }, { "epoch": 3.214771028672374, "grad_norm": 2.9149394035339355, "learning_rate": 5.98289509444218e-05, "loss": 2.531, "step": 47315 }, { "epoch": 3.2151107487430357, "grad_norm": 2.5448968410491943, "learning_rate": 5.9824704443538524e-05, "loss": 2.8662, "step": 47320 }, { "epoch": 3.2154504688136973, "grad_norm": 2.336742877960205, "learning_rate": 5.982045794265525e-05, "loss": 2.7161, "step": 47325 }, { "epoch": 3.2157901888843594, "grad_norm": 4.994561672210693, "learning_rate": 5.981621144177199e-05, "loss": 2.5567, "step": 47330 }, { "epoch": 3.216129908955021, "grad_norm": 2.798302412033081, "learning_rate": 5.981196494088871e-05, "loss": 2.6365, "step": 47335 }, { "epoch": 3.2164696290256827, "grad_norm": 2.570514678955078, "learning_rate": 5.9807718440005436e-05, "loss": 2.6457, "step": 47340 }, { "epoch": 3.2168093490963448, "grad_norm": 2.765613079071045, "learning_rate": 5.980347193912217e-05, "loss": 2.3407, "step": 47345 }, { "epoch": 3.2171490691670064, "grad_norm": 2.2991321086883545, "learning_rate": 5.979922543823889e-05, "loss": 2.684, "step": 47350 }, { "epoch": 3.217488789237668, "grad_norm": 2.499985694885254, "learning_rate": 5.9794978937355613e-05, "loss": 2.5594, "step": 47355 }, { "epoch": 3.21782850930833, "grad_norm": 2.847698211669922, "learning_rate": 5.9790732436472355e-05, "loss": 2.5836, "step": 47360 }, { "epoch": 3.2181682293789917, "grad_norm": 3.9246978759765625, "learning_rate": 5.9786485935589076e-05, "loss": 2.4467, "step": 47365 }, { "epoch": 3.2185079494496533, "grad_norm": 2.7297072410583496, "learning_rate": 5.97822394347058e-05, "loss": 2.7662, "step": 47370 }, { "epoch": 3.2188476695203154, "grad_norm": 2.5392837524414062, "learning_rate": 5.977799293382253e-05, "loss": 2.5418, "step": 47375 }, { "epoch": 3.219187389590977, "grad_norm": 2.1325314044952393, "learning_rate": 5.977374643293926e-05, "loss": 2.8878, "step": 47380 }, { "epoch": 3.2195271096616387, "grad_norm": 2.5503149032592773, "learning_rate": 5.976949993205598e-05, "loss": 2.266, "step": 47385 }, { "epoch": 3.2198668297323008, "grad_norm": 2.319958209991455, "learning_rate": 5.9765253431172716e-05, "loss": 2.8636, "step": 47390 }, { "epoch": 3.2202065498029624, "grad_norm": 3.1763179302215576, "learning_rate": 5.9761006930289444e-05, "loss": 2.765, "step": 47395 }, { "epoch": 3.220546269873624, "grad_norm": 2.9198200702667236, "learning_rate": 5.9756760429406166e-05, "loss": 2.5902, "step": 47400 }, { "epoch": 3.220885989944286, "grad_norm": 3.504743814468384, "learning_rate": 5.97525139285229e-05, "loss": 2.6534, "step": 47405 }, { "epoch": 3.2212257100149477, "grad_norm": 2.5820276737213135, "learning_rate": 5.974826742763963e-05, "loss": 2.6547, "step": 47410 }, { "epoch": 3.2215654300856094, "grad_norm": 2.5885589122772217, "learning_rate": 5.974402092675636e-05, "loss": 2.6941, "step": 47415 }, { "epoch": 3.2219051501562714, "grad_norm": 3.3239829540252686, "learning_rate": 5.9739774425873084e-05, "loss": 2.5868, "step": 47420 }, { "epoch": 3.222244870226933, "grad_norm": 3.053412675857544, "learning_rate": 5.973552792498981e-05, "loss": 2.4398, "step": 47425 }, { "epoch": 3.2225845902975947, "grad_norm": 5.217771530151367, "learning_rate": 5.973128142410655e-05, "loss": 2.4226, "step": 47430 }, { "epoch": 3.2229243103682563, "grad_norm": 3.016993522644043, "learning_rate": 5.972703492322327e-05, "loss": 2.6324, "step": 47435 }, { "epoch": 3.2232640304389184, "grad_norm": 2.6609559059143066, "learning_rate": 5.972278842233999e-05, "loss": 2.4695, "step": 47440 }, { "epoch": 3.22360375050958, "grad_norm": 2.501796245574951, "learning_rate": 5.9718541921456724e-05, "loss": 2.7287, "step": 47445 }, { "epoch": 3.2239434705802417, "grad_norm": 2.773616313934326, "learning_rate": 5.971429542057345e-05, "loss": 2.5592, "step": 47450 }, { "epoch": 3.2242831906509037, "grad_norm": 2.475858688354492, "learning_rate": 5.9710048919690174e-05, "loss": 2.7399, "step": 47455 }, { "epoch": 3.2246229107215654, "grad_norm": 3.0686421394348145, "learning_rate": 5.970580241880691e-05, "loss": 2.503, "step": 47460 }, { "epoch": 3.224962630792227, "grad_norm": 2.642151355743408, "learning_rate": 5.9701555917923636e-05, "loss": 2.6087, "step": 47465 }, { "epoch": 3.225302350862889, "grad_norm": 2.9031636714935303, "learning_rate": 5.969730941704036e-05, "loss": 2.6994, "step": 47470 }, { "epoch": 3.2256420709335507, "grad_norm": 2.30350923538208, "learning_rate": 5.969306291615709e-05, "loss": 2.6536, "step": 47475 }, { "epoch": 3.2259817910042123, "grad_norm": 3.0235235691070557, "learning_rate": 5.968881641527382e-05, "loss": 2.7427, "step": 47480 }, { "epoch": 3.2263215110748744, "grad_norm": 2.2616536617279053, "learning_rate": 5.968456991439054e-05, "loss": 2.7981, "step": 47485 }, { "epoch": 3.226661231145536, "grad_norm": 2.675032615661621, "learning_rate": 5.9680323413507276e-05, "loss": 2.6674, "step": 47490 }, { "epoch": 3.2270009512161977, "grad_norm": 3.224485397338867, "learning_rate": 5.9676076912624004e-05, "loss": 2.4114, "step": 47495 }, { "epoch": 3.2273406712868598, "grad_norm": 3.025817394256592, "learning_rate": 5.9671830411740726e-05, "loss": 2.3692, "step": 47500 }, { "epoch": 3.2276803913575214, "grad_norm": 2.7855257987976074, "learning_rate": 5.966758391085746e-05, "loss": 2.7968, "step": 47505 }, { "epoch": 3.228020111428183, "grad_norm": 2.5053977966308594, "learning_rate": 5.966333740997418e-05, "loss": 2.7356, "step": 47510 }, { "epoch": 3.228359831498845, "grad_norm": 2.6051552295684814, "learning_rate": 5.965909090909091e-05, "loss": 2.8893, "step": 47515 }, { "epoch": 3.2286995515695067, "grad_norm": 2.3538286685943604, "learning_rate": 5.9654844408207645e-05, "loss": 2.509, "step": 47520 }, { "epoch": 3.2290392716401684, "grad_norm": 2.562830924987793, "learning_rate": 5.9650597907324366e-05, "loss": 2.5854, "step": 47525 }, { "epoch": 3.2293789917108304, "grad_norm": 2.4822897911071777, "learning_rate": 5.9646351406441094e-05, "loss": 2.8038, "step": 47530 }, { "epoch": 3.229718711781492, "grad_norm": 3.207085132598877, "learning_rate": 5.964210490555783e-05, "loss": 2.3555, "step": 47535 }, { "epoch": 3.2300584318521537, "grad_norm": 2.898174524307251, "learning_rate": 5.963785840467455e-05, "loss": 2.4186, "step": 47540 }, { "epoch": 3.2303981519228158, "grad_norm": 2.727675437927246, "learning_rate": 5.963361190379128e-05, "loss": 2.6826, "step": 47545 }, { "epoch": 3.2307378719934774, "grad_norm": 3.3141400814056396, "learning_rate": 5.962936540290801e-05, "loss": 2.3551, "step": 47550 }, { "epoch": 3.231077592064139, "grad_norm": 3.104971170425415, "learning_rate": 5.9625118902024734e-05, "loss": 2.6006, "step": 47555 }, { "epoch": 3.231417312134801, "grad_norm": 2.7019083499908447, "learning_rate": 5.962087240114146e-05, "loss": 2.5984, "step": 47560 }, { "epoch": 3.2317570322054627, "grad_norm": 2.2454307079315186, "learning_rate": 5.9616625900258197e-05, "loss": 2.6183, "step": 47565 }, { "epoch": 3.2320967522761244, "grad_norm": 2.4489352703094482, "learning_rate": 5.961237939937492e-05, "loss": 2.6516, "step": 47570 }, { "epoch": 3.2324364723467864, "grad_norm": 2.7975361347198486, "learning_rate": 5.960813289849164e-05, "loss": 2.5906, "step": 47575 }, { "epoch": 3.232776192417448, "grad_norm": 3.366086959838867, "learning_rate": 5.960388639760838e-05, "loss": 2.5536, "step": 47580 }, { "epoch": 3.2331159124881097, "grad_norm": 3.4636659622192383, "learning_rate": 5.95996398967251e-05, "loss": 2.7989, "step": 47585 }, { "epoch": 3.2334556325587718, "grad_norm": 2.8910024166107178, "learning_rate": 5.959539339584182e-05, "loss": 2.8327, "step": 47590 }, { "epoch": 3.2337953526294334, "grad_norm": 2.79451847076416, "learning_rate": 5.959114689495856e-05, "loss": 2.455, "step": 47595 }, { "epoch": 3.234135072700095, "grad_norm": 3.2374727725982666, "learning_rate": 5.9586900394075286e-05, "loss": 2.8862, "step": 47600 }, { "epoch": 3.234474792770757, "grad_norm": 3.3101179599761963, "learning_rate": 5.958265389319201e-05, "loss": 2.799, "step": 47605 }, { "epoch": 3.2348145128414187, "grad_norm": 2.354494094848633, "learning_rate": 5.957840739230874e-05, "loss": 2.422, "step": 47610 }, { "epoch": 3.2351542329120804, "grad_norm": 3.1574625968933105, "learning_rate": 5.957416089142547e-05, "loss": 2.6647, "step": 47615 }, { "epoch": 3.2354939529827424, "grad_norm": 2.1228551864624023, "learning_rate": 5.956991439054219e-05, "loss": 2.7671, "step": 47620 }, { "epoch": 3.235833673053404, "grad_norm": 3.9211690425872803, "learning_rate": 5.9565667889658926e-05, "loss": 2.5166, "step": 47625 }, { "epoch": 3.2361733931240657, "grad_norm": 2.332029104232788, "learning_rate": 5.9561421388775654e-05, "loss": 2.689, "step": 47630 }, { "epoch": 3.2365131131947273, "grad_norm": 2.478637218475342, "learning_rate": 5.9557174887892375e-05, "loss": 2.7149, "step": 47635 }, { "epoch": 3.2368528332653894, "grad_norm": 2.2254416942596436, "learning_rate": 5.955292838700911e-05, "loss": 2.8284, "step": 47640 }, { "epoch": 3.237192553336051, "grad_norm": 2.985679864883423, "learning_rate": 5.954868188612583e-05, "loss": 2.7128, "step": 47645 }, { "epoch": 3.2375322734067127, "grad_norm": 3.12209153175354, "learning_rate": 5.954443538524256e-05, "loss": 2.4139, "step": 47650 }, { "epoch": 3.2378719934773748, "grad_norm": 2.3909854888916016, "learning_rate": 5.9540188884359294e-05, "loss": 2.5975, "step": 47655 }, { "epoch": 3.2382117135480364, "grad_norm": 2.5583386421203613, "learning_rate": 5.9535942383476015e-05, "loss": 2.7975, "step": 47660 }, { "epoch": 3.238551433618698, "grad_norm": 2.514857292175293, "learning_rate": 5.953169588259274e-05, "loss": 2.6267, "step": 47665 }, { "epoch": 3.23889115368936, "grad_norm": 2.397183895111084, "learning_rate": 5.952744938170948e-05, "loss": 2.7008, "step": 47670 }, { "epoch": 3.2392308737600217, "grad_norm": 2.9384236335754395, "learning_rate": 5.95232028808262e-05, "loss": 2.5278, "step": 47675 }, { "epoch": 3.2395705938306834, "grad_norm": 2.4964709281921387, "learning_rate": 5.951895637994293e-05, "loss": 2.6315, "step": 47680 }, { "epoch": 3.2399103139013454, "grad_norm": 3.4378502368927, "learning_rate": 5.951470987905966e-05, "loss": 3.0003, "step": 47685 }, { "epoch": 3.240250033972007, "grad_norm": 3.082827568054199, "learning_rate": 5.951046337817638e-05, "loss": 2.4727, "step": 47690 }, { "epoch": 3.2405897540426687, "grad_norm": 2.169679880142212, "learning_rate": 5.950621687729311e-05, "loss": 2.8607, "step": 47695 }, { "epoch": 3.2409294741133308, "grad_norm": 3.1321494579315186, "learning_rate": 5.9501970376409846e-05, "loss": 2.5528, "step": 47700 }, { "epoch": 3.2412691941839924, "grad_norm": 2.7897303104400635, "learning_rate": 5.949772387552657e-05, "loss": 2.7279, "step": 47705 }, { "epoch": 3.241608914254654, "grad_norm": 2.884906053543091, "learning_rate": 5.949347737464329e-05, "loss": 2.8365, "step": 47710 }, { "epoch": 3.241948634325316, "grad_norm": 3.2746403217315674, "learning_rate": 5.948923087376003e-05, "loss": 2.4839, "step": 47715 }, { "epoch": 3.2422883543959777, "grad_norm": 4.0694403648376465, "learning_rate": 5.948498437287675e-05, "loss": 2.7803, "step": 47720 }, { "epoch": 3.2426280744666394, "grad_norm": 2.7740883827209473, "learning_rate": 5.948073787199347e-05, "loss": 2.5242, "step": 47725 }, { "epoch": 3.2429677945373014, "grad_norm": 2.645009756088257, "learning_rate": 5.947649137111021e-05, "loss": 2.5663, "step": 47730 }, { "epoch": 3.243307514607963, "grad_norm": 2.688680410385132, "learning_rate": 5.9472244870226935e-05, "loss": 2.6585, "step": 47735 }, { "epoch": 3.2436472346786247, "grad_norm": 2.9370453357696533, "learning_rate": 5.9467998369343657e-05, "loss": 2.6356, "step": 47740 }, { "epoch": 3.2439869547492868, "grad_norm": 2.9453186988830566, "learning_rate": 5.946375186846039e-05, "loss": 2.6669, "step": 47745 }, { "epoch": 3.2443266748199484, "grad_norm": 3.0866732597351074, "learning_rate": 5.945950536757712e-05, "loss": 2.6579, "step": 47750 }, { "epoch": 3.24466639489061, "grad_norm": 3.2788660526275635, "learning_rate": 5.9455258866693854e-05, "loss": 2.7865, "step": 47755 }, { "epoch": 3.245006114961272, "grad_norm": 2.988339900970459, "learning_rate": 5.9451012365810575e-05, "loss": 2.5777, "step": 47760 }, { "epoch": 3.2453458350319337, "grad_norm": 3.272127389907837, "learning_rate": 5.94467658649273e-05, "loss": 2.7255, "step": 47765 }, { "epoch": 3.2456855551025954, "grad_norm": 2.5274906158447266, "learning_rate": 5.944251936404404e-05, "loss": 2.695, "step": 47770 }, { "epoch": 3.246025275173257, "grad_norm": 2.8500077724456787, "learning_rate": 5.943827286316076e-05, "loss": 2.5719, "step": 47775 }, { "epoch": 3.246364995243919, "grad_norm": 2.60661244392395, "learning_rate": 5.943402636227749e-05, "loss": 2.7186, "step": 47780 }, { "epoch": 3.2467047153145807, "grad_norm": 2.666860580444336, "learning_rate": 5.942977986139422e-05, "loss": 2.3178, "step": 47785 }, { "epoch": 3.2470444353852423, "grad_norm": 2.7542011737823486, "learning_rate": 5.942553336051094e-05, "loss": 2.5535, "step": 47790 }, { "epoch": 3.2473841554559044, "grad_norm": 3.371932029724121, "learning_rate": 5.9421286859627665e-05, "loss": 2.5211, "step": 47795 }, { "epoch": 3.247723875526566, "grad_norm": 2.894420862197876, "learning_rate": 5.94170403587444e-05, "loss": 2.6859, "step": 47800 }, { "epoch": 3.2480635955972277, "grad_norm": 3.054295778274536, "learning_rate": 5.941279385786113e-05, "loss": 2.3259, "step": 47805 }, { "epoch": 3.2484033156678898, "grad_norm": 2.6132569313049316, "learning_rate": 5.940854735697785e-05, "loss": 2.6808, "step": 47810 }, { "epoch": 3.2487430357385514, "grad_norm": 2.879854202270508, "learning_rate": 5.9404300856094583e-05, "loss": 2.4794, "step": 47815 }, { "epoch": 3.249082755809213, "grad_norm": 2.829766273498535, "learning_rate": 5.940005435521131e-05, "loss": 2.728, "step": 47820 }, { "epoch": 3.249422475879875, "grad_norm": 4.490461349487305, "learning_rate": 5.939580785432803e-05, "loss": 2.5251, "step": 47825 }, { "epoch": 3.2497621959505367, "grad_norm": 2.680060386657715, "learning_rate": 5.939156135344477e-05, "loss": 2.5399, "step": 47830 }, { "epoch": 3.2501019160211984, "grad_norm": 3.01959490776062, "learning_rate": 5.9387314852561495e-05, "loss": 2.6698, "step": 47835 }, { "epoch": 3.2504416360918604, "grad_norm": 3.024235963821411, "learning_rate": 5.938306835167822e-05, "loss": 2.7814, "step": 47840 }, { "epoch": 3.250781356162522, "grad_norm": 2.9093096256256104, "learning_rate": 5.937882185079495e-05, "loss": 2.6428, "step": 47845 }, { "epoch": 3.2511210762331837, "grad_norm": 3.322240114212036, "learning_rate": 5.937457534991168e-05, "loss": 2.7288, "step": 47850 }, { "epoch": 3.2514607963038458, "grad_norm": 3.4853687286376953, "learning_rate": 5.93703288490284e-05, "loss": 2.6528, "step": 47855 }, { "epoch": 3.2518005163745074, "grad_norm": 2.4532697200775146, "learning_rate": 5.9366082348145135e-05, "loss": 2.5792, "step": 47860 }, { "epoch": 3.252140236445169, "grad_norm": 3.0097200870513916, "learning_rate": 5.936183584726186e-05, "loss": 2.299, "step": 47865 }, { "epoch": 3.252479956515831, "grad_norm": 2.727108955383301, "learning_rate": 5.9357589346378585e-05, "loss": 2.4889, "step": 47870 }, { "epoch": 3.2528196765864927, "grad_norm": 2.9238369464874268, "learning_rate": 5.935334284549532e-05, "loss": 2.2632, "step": 47875 }, { "epoch": 3.2531593966571544, "grad_norm": 2.936868190765381, "learning_rate": 5.934909634461204e-05, "loss": 2.6845, "step": 47880 }, { "epoch": 3.2534991167278164, "grad_norm": 2.5863049030303955, "learning_rate": 5.934484984372877e-05, "loss": 2.771, "step": 47885 }, { "epoch": 3.253838836798478, "grad_norm": 2.981854200363159, "learning_rate": 5.9340603342845504e-05, "loss": 2.813, "step": 47890 }, { "epoch": 3.2541785568691397, "grad_norm": 3.1252224445343018, "learning_rate": 5.9336356841962225e-05, "loss": 2.7184, "step": 47895 }, { "epoch": 3.254518276939802, "grad_norm": 3.074927568435669, "learning_rate": 5.933211034107895e-05, "loss": 2.517, "step": 47900 }, { "epoch": 3.2548579970104634, "grad_norm": 3.2150840759277344, "learning_rate": 5.932786384019569e-05, "loss": 2.5774, "step": 47905 }, { "epoch": 3.255197717081125, "grad_norm": 2.832587718963623, "learning_rate": 5.932361733931241e-05, "loss": 2.6123, "step": 47910 }, { "epoch": 3.255537437151787, "grad_norm": 2.5181069374084473, "learning_rate": 5.931937083842914e-05, "loss": 2.5995, "step": 47915 }, { "epoch": 3.2558771572224487, "grad_norm": 2.994257926940918, "learning_rate": 5.931512433754587e-05, "loss": 3.0826, "step": 47920 }, { "epoch": 3.2562168772931104, "grad_norm": 2.346097469329834, "learning_rate": 5.931087783666259e-05, "loss": 2.681, "step": 47925 }, { "epoch": 3.2565565973637725, "grad_norm": 2.8161747455596924, "learning_rate": 5.9306631335779314e-05, "loss": 2.709, "step": 47930 }, { "epoch": 3.256896317434434, "grad_norm": 2.699458360671997, "learning_rate": 5.930238483489605e-05, "loss": 2.8849, "step": 47935 }, { "epoch": 3.2572360375050957, "grad_norm": 2.681090831756592, "learning_rate": 5.929813833401278e-05, "loss": 2.6192, "step": 47940 }, { "epoch": 3.257575757575758, "grad_norm": 2.248185873031616, "learning_rate": 5.92938918331295e-05, "loss": 2.6111, "step": 47945 }, { "epoch": 3.2579154776464194, "grad_norm": 2.4142308235168457, "learning_rate": 5.928964533224623e-05, "loss": 2.8569, "step": 47950 }, { "epoch": 3.258255197717081, "grad_norm": 2.9008712768554688, "learning_rate": 5.928539883136296e-05, "loss": 2.6858, "step": 47955 }, { "epoch": 3.258594917787743, "grad_norm": 2.6931588649749756, "learning_rate": 5.928115233047968e-05, "loss": 2.8415, "step": 47960 }, { "epoch": 3.2589346378584048, "grad_norm": 3.061473846435547, "learning_rate": 5.927690582959642e-05, "loss": 2.6788, "step": 47965 }, { "epoch": 3.2592743579290664, "grad_norm": 2.7542333602905273, "learning_rate": 5.9272659328713145e-05, "loss": 2.9289, "step": 47970 }, { "epoch": 3.2596140779997285, "grad_norm": 2.6673617362976074, "learning_rate": 5.9268412827829866e-05, "loss": 2.4655, "step": 47975 }, { "epoch": 3.25995379807039, "grad_norm": 2.375857353210449, "learning_rate": 5.92641663269466e-05, "loss": 2.4597, "step": 47980 }, { "epoch": 3.2602935181410517, "grad_norm": 2.672887086868286, "learning_rate": 5.925991982606333e-05, "loss": 2.8928, "step": 47985 }, { "epoch": 3.2606332382117134, "grad_norm": 3.0494515895843506, "learning_rate": 5.925567332518005e-05, "loss": 2.6642, "step": 47990 }, { "epoch": 3.2609729582823754, "grad_norm": 2.699974536895752, "learning_rate": 5.9251426824296785e-05, "loss": 2.8947, "step": 47995 }, { "epoch": 3.261312678353037, "grad_norm": 2.508403778076172, "learning_rate": 5.9247180323413506e-05, "loss": 2.6863, "step": 48000 }, { "epoch": 3.2616523984236987, "grad_norm": 2.9255964756011963, "learning_rate": 5.9242933822530234e-05, "loss": 2.5877, "step": 48005 }, { "epoch": 3.2619921184943608, "grad_norm": 2.580044746398926, "learning_rate": 5.923868732164697e-05, "loss": 2.8637, "step": 48010 }, { "epoch": 3.2623318385650224, "grad_norm": 3.238852024078369, "learning_rate": 5.923444082076369e-05, "loss": 2.6726, "step": 48015 }, { "epoch": 3.262671558635684, "grad_norm": 3.1592605113983154, "learning_rate": 5.923019431988042e-05, "loss": 2.6149, "step": 48020 }, { "epoch": 3.263011278706346, "grad_norm": 2.8850114345550537, "learning_rate": 5.922594781899715e-05, "loss": 2.9235, "step": 48025 }, { "epoch": 3.2633509987770077, "grad_norm": 2.1010165214538574, "learning_rate": 5.9221701318113874e-05, "loss": 2.6225, "step": 48030 }, { "epoch": 3.2636907188476694, "grad_norm": 2.5613348484039307, "learning_rate": 5.92174548172306e-05, "loss": 2.6233, "step": 48035 }, { "epoch": 3.2640304389183314, "grad_norm": 2.2286911010742188, "learning_rate": 5.921320831634734e-05, "loss": 2.7152, "step": 48040 }, { "epoch": 3.264370158988993, "grad_norm": 2.6900367736816406, "learning_rate": 5.920896181546406e-05, "loss": 2.5497, "step": 48045 }, { "epoch": 3.2647098790596547, "grad_norm": 2.789196729660034, "learning_rate": 5.9204715314580786e-05, "loss": 2.6762, "step": 48050 }, { "epoch": 3.265049599130317, "grad_norm": 3.4102065563201904, "learning_rate": 5.920046881369752e-05, "loss": 2.3482, "step": 48055 }, { "epoch": 3.2653893192009784, "grad_norm": 3.0726892948150635, "learning_rate": 5.919622231281424e-05, "loss": 2.3456, "step": 48060 }, { "epoch": 3.26572903927164, "grad_norm": 3.1701509952545166, "learning_rate": 5.9191975811930963e-05, "loss": 2.6734, "step": 48065 }, { "epoch": 3.266068759342302, "grad_norm": 2.9895129203796387, "learning_rate": 5.91877293110477e-05, "loss": 2.68, "step": 48070 }, { "epoch": 3.2664084794129638, "grad_norm": 2.820270538330078, "learning_rate": 5.9183482810164426e-05, "loss": 2.8193, "step": 48075 }, { "epoch": 3.2667481994836254, "grad_norm": 2.759887933731079, "learning_rate": 5.917923630928115e-05, "loss": 2.5581, "step": 48080 }, { "epoch": 3.2670879195542875, "grad_norm": 3.7307915687561035, "learning_rate": 5.917498980839788e-05, "loss": 2.4986, "step": 48085 }, { "epoch": 3.267427639624949, "grad_norm": 3.2624409198760986, "learning_rate": 5.917074330751461e-05, "loss": 2.5754, "step": 48090 }, { "epoch": 3.2677673596956107, "grad_norm": 2.9853460788726807, "learning_rate": 5.9166496806631345e-05, "loss": 2.6817, "step": 48095 }, { "epoch": 3.2681070797662723, "grad_norm": 2.534881591796875, "learning_rate": 5.9162250305748066e-05, "loss": 2.7412, "step": 48100 }, { "epoch": 3.2684467998369344, "grad_norm": 2.806415557861328, "learning_rate": 5.9158003804864794e-05, "loss": 2.7451, "step": 48105 }, { "epoch": 3.268786519907596, "grad_norm": 4.4954729080200195, "learning_rate": 5.915375730398153e-05, "loss": 2.668, "step": 48110 }, { "epoch": 3.2691262399782577, "grad_norm": 3.173485040664673, "learning_rate": 5.914951080309825e-05, "loss": 2.728, "step": 48115 }, { "epoch": 3.2694659600489198, "grad_norm": 2.262758493423462, "learning_rate": 5.914526430221498e-05, "loss": 2.6933, "step": 48120 }, { "epoch": 3.2698056801195814, "grad_norm": 3.0963516235351562, "learning_rate": 5.914101780133171e-05, "loss": 2.6976, "step": 48125 }, { "epoch": 3.270145400190243, "grad_norm": 3.229255199432373, "learning_rate": 5.9136771300448434e-05, "loss": 2.3242, "step": 48130 }, { "epoch": 3.270485120260905, "grad_norm": 3.681879997253418, "learning_rate": 5.9132524799565156e-05, "loss": 2.3763, "step": 48135 }, { "epoch": 3.2708248403315667, "grad_norm": 3.3484883308410645, "learning_rate": 5.91282782986819e-05, "loss": 2.6792, "step": 48140 }, { "epoch": 3.2711645604022284, "grad_norm": 2.8985397815704346, "learning_rate": 5.912403179779862e-05, "loss": 2.6202, "step": 48145 }, { "epoch": 3.2715042804728904, "grad_norm": 2.526890993118286, "learning_rate": 5.911978529691534e-05, "loss": 2.7652, "step": 48150 }, { "epoch": 3.271844000543552, "grad_norm": 3.4727072715759277, "learning_rate": 5.9115538796032074e-05, "loss": 2.8975, "step": 48155 }, { "epoch": 3.2721837206142137, "grad_norm": 3.4717955589294434, "learning_rate": 5.91112922951488e-05, "loss": 2.1545, "step": 48160 }, { "epoch": 3.2725234406848758, "grad_norm": 3.4661102294921875, "learning_rate": 5.9107045794265524e-05, "loss": 2.7924, "step": 48165 }, { "epoch": 3.2728631607555374, "grad_norm": 3.134699821472168, "learning_rate": 5.910279929338226e-05, "loss": 2.4852, "step": 48170 }, { "epoch": 3.273202880826199, "grad_norm": 3.601686716079712, "learning_rate": 5.9098552792498986e-05, "loss": 2.4555, "step": 48175 }, { "epoch": 3.273542600896861, "grad_norm": 2.7330222129821777, "learning_rate": 5.909430629161571e-05, "loss": 2.655, "step": 48180 }, { "epoch": 3.2738823209675227, "grad_norm": 3.133280038833618, "learning_rate": 5.909005979073244e-05, "loss": 2.6581, "step": 48185 }, { "epoch": 3.2742220410381844, "grad_norm": 3.2082266807556152, "learning_rate": 5.908581328984917e-05, "loss": 2.7243, "step": 48190 }, { "epoch": 3.2745617611088464, "grad_norm": 2.873220443725586, "learning_rate": 5.908156678896589e-05, "loss": 2.7333, "step": 48195 }, { "epoch": 3.274901481179508, "grad_norm": 3.319262742996216, "learning_rate": 5.9077320288082626e-05, "loss": 2.7141, "step": 48200 }, { "epoch": 3.2752412012501697, "grad_norm": 3.250889301300049, "learning_rate": 5.9073073787199354e-05, "loss": 2.6914, "step": 48205 }, { "epoch": 3.275580921320832, "grad_norm": 3.1701340675354004, "learning_rate": 5.9068827286316076e-05, "loss": 2.6544, "step": 48210 }, { "epoch": 3.2759206413914934, "grad_norm": 2.4602227210998535, "learning_rate": 5.906458078543281e-05, "loss": 2.5289, "step": 48215 }, { "epoch": 3.276260361462155, "grad_norm": 2.34391450881958, "learning_rate": 5.906033428454953e-05, "loss": 2.6864, "step": 48220 }, { "epoch": 3.276600081532817, "grad_norm": 3.011082410812378, "learning_rate": 5.905608778366626e-05, "loss": 2.5895, "step": 48225 }, { "epoch": 3.2769398016034788, "grad_norm": 3.0422780513763428, "learning_rate": 5.9051841282782994e-05, "loss": 2.6901, "step": 48230 }, { "epoch": 3.2772795216741404, "grad_norm": 3.9468655586242676, "learning_rate": 5.9047594781899716e-05, "loss": 2.7183, "step": 48235 }, { "epoch": 3.2776192417448025, "grad_norm": 2.9022715091705322, "learning_rate": 5.9043348281016444e-05, "loss": 2.7536, "step": 48240 }, { "epoch": 3.277958961815464, "grad_norm": 2.442136287689209, "learning_rate": 5.903910178013318e-05, "loss": 2.8268, "step": 48245 }, { "epoch": 3.2782986818861257, "grad_norm": 2.7649850845336914, "learning_rate": 5.90348552792499e-05, "loss": 2.7587, "step": 48250 }, { "epoch": 3.278638401956788, "grad_norm": 2.4570441246032715, "learning_rate": 5.903060877836663e-05, "loss": 2.5841, "step": 48255 }, { "epoch": 3.2789781220274494, "grad_norm": 2.8084421157836914, "learning_rate": 5.902636227748336e-05, "loss": 2.6477, "step": 48260 }, { "epoch": 3.279317842098111, "grad_norm": 2.9739186763763428, "learning_rate": 5.9022115776600084e-05, "loss": 2.6515, "step": 48265 }, { "epoch": 3.279657562168773, "grad_norm": 3.052497386932373, "learning_rate": 5.9017869275716805e-05, "loss": 2.5085, "step": 48270 }, { "epoch": 3.2799972822394348, "grad_norm": 3.195629358291626, "learning_rate": 5.9013622774833547e-05, "loss": 2.4991, "step": 48275 }, { "epoch": 3.2803370023100964, "grad_norm": 2.6866118907928467, "learning_rate": 5.900937627395027e-05, "loss": 2.684, "step": 48280 }, { "epoch": 3.2806767223807585, "grad_norm": 2.998324155807495, "learning_rate": 5.900512977306699e-05, "loss": 2.5319, "step": 48285 }, { "epoch": 3.28101644245142, "grad_norm": 2.8442087173461914, "learning_rate": 5.9000883272183724e-05, "loss": 2.5875, "step": 48290 }, { "epoch": 3.2813561625220817, "grad_norm": 2.475931167602539, "learning_rate": 5.899663677130045e-05, "loss": 2.7306, "step": 48295 }, { "epoch": 3.281695882592744, "grad_norm": 3.5174002647399902, "learning_rate": 5.899239027041717e-05, "loss": 2.5934, "step": 48300 }, { "epoch": 3.2820356026634054, "grad_norm": 3.4889466762542725, "learning_rate": 5.898814376953391e-05, "loss": 2.6678, "step": 48305 }, { "epoch": 3.282375322734067, "grad_norm": 3.2216272354125977, "learning_rate": 5.8983897268650636e-05, "loss": 2.798, "step": 48310 }, { "epoch": 3.282715042804729, "grad_norm": 3.21291446685791, "learning_rate": 5.897965076776736e-05, "loss": 2.7658, "step": 48315 }, { "epoch": 3.2830547628753908, "grad_norm": 3.5444839000701904, "learning_rate": 5.897540426688409e-05, "loss": 2.5541, "step": 48320 }, { "epoch": 3.2833944829460524, "grad_norm": 3.567556381225586, "learning_rate": 5.897115776600082e-05, "loss": 2.5528, "step": 48325 }, { "epoch": 3.283734203016714, "grad_norm": 3.0726795196533203, "learning_rate": 5.896691126511754e-05, "loss": 2.6141, "step": 48330 }, { "epoch": 3.284073923087376, "grad_norm": 2.681291103363037, "learning_rate": 5.8962664764234276e-05, "loss": 2.5336, "step": 48335 }, { "epoch": 3.2844136431580377, "grad_norm": 2.472302198410034, "learning_rate": 5.8958418263351004e-05, "loss": 2.6363, "step": 48340 }, { "epoch": 3.2847533632286994, "grad_norm": 2.5513525009155273, "learning_rate": 5.8954171762467725e-05, "loss": 2.4122, "step": 48345 }, { "epoch": 3.2850930832993614, "grad_norm": 2.751579761505127, "learning_rate": 5.894992526158446e-05, "loss": 2.6489, "step": 48350 }, { "epoch": 3.285432803370023, "grad_norm": 2.553992509841919, "learning_rate": 5.894567876070118e-05, "loss": 2.6476, "step": 48355 }, { "epoch": 3.2857725234406847, "grad_norm": 2.5143048763275146, "learning_rate": 5.894143225981791e-05, "loss": 2.5699, "step": 48360 }, { "epoch": 3.286112243511347, "grad_norm": 2.9840404987335205, "learning_rate": 5.8937185758934644e-05, "loss": 2.5733, "step": 48365 }, { "epoch": 3.2864519635820084, "grad_norm": 2.929802417755127, "learning_rate": 5.8932939258051365e-05, "loss": 2.757, "step": 48370 }, { "epoch": 3.28679168365267, "grad_norm": 3.3168716430664062, "learning_rate": 5.892869275716809e-05, "loss": 2.8026, "step": 48375 }, { "epoch": 3.287131403723332, "grad_norm": 2.3953659534454346, "learning_rate": 5.892444625628483e-05, "loss": 2.8568, "step": 48380 }, { "epoch": 3.2874711237939938, "grad_norm": 2.505596876144409, "learning_rate": 5.892019975540155e-05, "loss": 2.591, "step": 48385 }, { "epoch": 3.2878108438646554, "grad_norm": 2.7593724727630615, "learning_rate": 5.891595325451828e-05, "loss": 2.6883, "step": 48390 }, { "epoch": 3.2881505639353175, "grad_norm": 2.3295910358428955, "learning_rate": 5.891170675363501e-05, "loss": 2.6719, "step": 48395 }, { "epoch": 3.288490284005979, "grad_norm": 2.38839054107666, "learning_rate": 5.890746025275173e-05, "loss": 2.7758, "step": 48400 }, { "epoch": 3.2888300040766407, "grad_norm": 2.9797310829162598, "learning_rate": 5.890321375186846e-05, "loss": 2.9007, "step": 48405 }, { "epoch": 3.289169724147303, "grad_norm": 2.5871500968933105, "learning_rate": 5.8898967250985196e-05, "loss": 2.6451, "step": 48410 }, { "epoch": 3.2895094442179644, "grad_norm": 3.1238620281219482, "learning_rate": 5.889472075010192e-05, "loss": 2.6505, "step": 48415 }, { "epoch": 3.289849164288626, "grad_norm": 3.2345523834228516, "learning_rate": 5.889047424921864e-05, "loss": 2.5368, "step": 48420 }, { "epoch": 3.290188884359288, "grad_norm": 2.6326816082000732, "learning_rate": 5.888622774833537e-05, "loss": 2.589, "step": 48425 }, { "epoch": 3.2905286044299498, "grad_norm": 3.5683910846710205, "learning_rate": 5.88819812474521e-05, "loss": 2.7182, "step": 48430 }, { "epoch": 3.2908683245006114, "grad_norm": 2.714891195297241, "learning_rate": 5.8877734746568836e-05, "loss": 2.5601, "step": 48435 }, { "epoch": 3.291208044571273, "grad_norm": 3.609675168991089, "learning_rate": 5.887348824568556e-05, "loss": 2.8752, "step": 48440 }, { "epoch": 3.291547764641935, "grad_norm": 2.5109713077545166, "learning_rate": 5.8869241744802285e-05, "loss": 2.9924, "step": 48445 }, { "epoch": 3.2918874847125967, "grad_norm": 2.3223347663879395, "learning_rate": 5.886499524391902e-05, "loss": 2.9348, "step": 48450 }, { "epoch": 3.2922272047832584, "grad_norm": 2.4431145191192627, "learning_rate": 5.886074874303574e-05, "loss": 2.2735, "step": 48455 }, { "epoch": 3.2925669248539204, "grad_norm": 2.7106847763061523, "learning_rate": 5.885650224215247e-05, "loss": 2.4807, "step": 48460 }, { "epoch": 3.292906644924582, "grad_norm": 2.8587028980255127, "learning_rate": 5.8852255741269204e-05, "loss": 2.6226, "step": 48465 }, { "epoch": 3.2932463649952437, "grad_norm": 4.164457321166992, "learning_rate": 5.8848009240385925e-05, "loss": 2.6379, "step": 48470 }, { "epoch": 3.2935860850659058, "grad_norm": 2.9645864963531494, "learning_rate": 5.884376273950265e-05, "loss": 2.5923, "step": 48475 }, { "epoch": 3.2939258051365674, "grad_norm": 2.390814781188965, "learning_rate": 5.883951623861939e-05, "loss": 2.9028, "step": 48480 }, { "epoch": 3.294265525207229, "grad_norm": 2.4575953483581543, "learning_rate": 5.883526973773611e-05, "loss": 2.3662, "step": 48485 }, { "epoch": 3.294605245277891, "grad_norm": 3.279229164123535, "learning_rate": 5.883102323685283e-05, "loss": 2.6963, "step": 48490 }, { "epoch": 3.2949449653485527, "grad_norm": 3.2237133979797363, "learning_rate": 5.8826776735969565e-05, "loss": 2.6523, "step": 48495 }, { "epoch": 3.2952846854192144, "grad_norm": 3.518148422241211, "learning_rate": 5.882253023508629e-05, "loss": 2.593, "step": 48500 }, { "epoch": 3.2956244054898765, "grad_norm": 2.745551824569702, "learning_rate": 5.8818283734203015e-05, "loss": 2.7986, "step": 48505 }, { "epoch": 3.295964125560538, "grad_norm": 2.458641290664673, "learning_rate": 5.881403723331975e-05, "loss": 2.619, "step": 48510 }, { "epoch": 3.2963038456311997, "grad_norm": 2.715862274169922, "learning_rate": 5.880979073243648e-05, "loss": 2.7406, "step": 48515 }, { "epoch": 3.296643565701862, "grad_norm": 2.60361909866333, "learning_rate": 5.88055442315532e-05, "loss": 2.5506, "step": 48520 }, { "epoch": 3.2969832857725234, "grad_norm": 3.2603912353515625, "learning_rate": 5.8801297730669933e-05, "loss": 2.5982, "step": 48525 }, { "epoch": 3.297323005843185, "grad_norm": 2.6636226177215576, "learning_rate": 5.879705122978666e-05, "loss": 2.4897, "step": 48530 }, { "epoch": 3.297662725913847, "grad_norm": 2.4643638134002686, "learning_rate": 5.879280472890338e-05, "loss": 2.5008, "step": 48535 }, { "epoch": 3.2980024459845088, "grad_norm": 2.9693360328674316, "learning_rate": 5.878855822802012e-05, "loss": 2.3711, "step": 48540 }, { "epoch": 3.2983421660551704, "grad_norm": 3.4321091175079346, "learning_rate": 5.8784311727136845e-05, "loss": 2.7605, "step": 48545 }, { "epoch": 3.2986818861258325, "grad_norm": 3.0705556869506836, "learning_rate": 5.878006522625357e-05, "loss": 2.5965, "step": 48550 }, { "epoch": 3.299021606196494, "grad_norm": 3.852396249771118, "learning_rate": 5.87758187253703e-05, "loss": 2.6989, "step": 48555 }, { "epoch": 3.2993613262671557, "grad_norm": 2.646639347076416, "learning_rate": 5.877157222448702e-05, "loss": 2.7441, "step": 48560 }, { "epoch": 3.299701046337818, "grad_norm": 2.6407532691955566, "learning_rate": 5.876732572360375e-05, "loss": 2.4719, "step": 48565 }, { "epoch": 3.3000407664084794, "grad_norm": 2.272702932357788, "learning_rate": 5.8763079222720485e-05, "loss": 2.7446, "step": 48570 }, { "epoch": 3.300380486479141, "grad_norm": 3.3678300380706787, "learning_rate": 5.875883272183721e-05, "loss": 2.715, "step": 48575 }, { "epoch": 3.300720206549803, "grad_norm": 2.245823621749878, "learning_rate": 5.8754586220953935e-05, "loss": 2.6088, "step": 48580 }, { "epoch": 3.3010599266204648, "grad_norm": 2.417091131210327, "learning_rate": 5.875033972007067e-05, "loss": 2.8069, "step": 48585 }, { "epoch": 3.3013996466911264, "grad_norm": 3.5061795711517334, "learning_rate": 5.874609321918739e-05, "loss": 2.5925, "step": 48590 }, { "epoch": 3.3017393667617885, "grad_norm": 2.5555145740509033, "learning_rate": 5.874184671830412e-05, "loss": 2.8129, "step": 48595 }, { "epoch": 3.30207908683245, "grad_norm": 2.5107994079589844, "learning_rate": 5.8737600217420854e-05, "loss": 2.5695, "step": 48600 }, { "epoch": 3.3024188069031117, "grad_norm": 2.8061537742614746, "learning_rate": 5.8733353716537575e-05, "loss": 2.347, "step": 48605 }, { "epoch": 3.302758526973774, "grad_norm": 2.513251543045044, "learning_rate": 5.87291072156543e-05, "loss": 2.7919, "step": 48610 }, { "epoch": 3.3030982470444354, "grad_norm": 3.096212863922119, "learning_rate": 5.872486071477104e-05, "loss": 2.8426, "step": 48615 }, { "epoch": 3.303437967115097, "grad_norm": 2.8991081714630127, "learning_rate": 5.872061421388776e-05, "loss": 2.534, "step": 48620 }, { "epoch": 3.303777687185759, "grad_norm": 2.8733999729156494, "learning_rate": 5.871636771300448e-05, "loss": 2.4915, "step": 48625 }, { "epoch": 3.304117407256421, "grad_norm": 2.950313091278076, "learning_rate": 5.871212121212122e-05, "loss": 2.5696, "step": 48630 }, { "epoch": 3.3044571273270824, "grad_norm": 2.9623308181762695, "learning_rate": 5.870787471123794e-05, "loss": 2.9563, "step": 48635 }, { "epoch": 3.3047968473977445, "grad_norm": 4.065356254577637, "learning_rate": 5.8703628210354664e-05, "loss": 2.7995, "step": 48640 }, { "epoch": 3.305136567468406, "grad_norm": 3.1349027156829834, "learning_rate": 5.86993817094714e-05, "loss": 2.7425, "step": 48645 }, { "epoch": 3.3054762875390677, "grad_norm": 2.656968116760254, "learning_rate": 5.869513520858813e-05, "loss": 2.8414, "step": 48650 }, { "epoch": 3.30581600760973, "grad_norm": 2.531351089477539, "learning_rate": 5.869088870770485e-05, "loss": 2.6888, "step": 48655 }, { "epoch": 3.3061557276803915, "grad_norm": 2.6194896697998047, "learning_rate": 5.868664220682158e-05, "loss": 2.9722, "step": 48660 }, { "epoch": 3.306495447751053, "grad_norm": 2.658533811569214, "learning_rate": 5.868239570593831e-05, "loss": 2.4892, "step": 48665 }, { "epoch": 3.3068351678217147, "grad_norm": 3.520268678665161, "learning_rate": 5.867814920505503e-05, "loss": 2.5675, "step": 48670 }, { "epoch": 3.307174887892377, "grad_norm": 2.536196708679199, "learning_rate": 5.867390270417177e-05, "loss": 2.7056, "step": 48675 }, { "epoch": 3.3075146079630384, "grad_norm": 2.494250535964966, "learning_rate": 5.8669656203288495e-05, "loss": 2.512, "step": 48680 }, { "epoch": 3.3078543280337, "grad_norm": 2.8718020915985107, "learning_rate": 5.8665409702405216e-05, "loss": 2.3574, "step": 48685 }, { "epoch": 3.308194048104362, "grad_norm": 2.699218511581421, "learning_rate": 5.866116320152195e-05, "loss": 2.3399, "step": 48690 }, { "epoch": 3.3085337681750238, "grad_norm": 2.6838178634643555, "learning_rate": 5.865691670063868e-05, "loss": 2.4551, "step": 48695 }, { "epoch": 3.3088734882456854, "grad_norm": 3.0420753955841064, "learning_rate": 5.86526701997554e-05, "loss": 2.8534, "step": 48700 }, { "epoch": 3.3092132083163475, "grad_norm": 2.5088698863983154, "learning_rate": 5.8648423698872135e-05, "loss": 2.5033, "step": 48705 }, { "epoch": 3.309552928387009, "grad_norm": 2.7634105682373047, "learning_rate": 5.8644177197988856e-05, "loss": 2.4465, "step": 48710 }, { "epoch": 3.3098926484576707, "grad_norm": 2.6179144382476807, "learning_rate": 5.8639930697105584e-05, "loss": 2.6166, "step": 48715 }, { "epoch": 3.310232368528333, "grad_norm": 2.3447160720825195, "learning_rate": 5.863568419622232e-05, "loss": 2.4478, "step": 48720 }, { "epoch": 3.3105720885989944, "grad_norm": 4.530699729919434, "learning_rate": 5.863143769533904e-05, "loss": 2.6055, "step": 48725 }, { "epoch": 3.310911808669656, "grad_norm": 2.9589450359344482, "learning_rate": 5.862719119445577e-05, "loss": 2.6452, "step": 48730 }, { "epoch": 3.311251528740318, "grad_norm": 2.6558170318603516, "learning_rate": 5.86229446935725e-05, "loss": 2.5646, "step": 48735 }, { "epoch": 3.3115912488109798, "grad_norm": 2.8550119400024414, "learning_rate": 5.8618698192689224e-05, "loss": 2.5716, "step": 48740 }, { "epoch": 3.3119309688816414, "grad_norm": 3.1555840969085693, "learning_rate": 5.861445169180595e-05, "loss": 2.4376, "step": 48745 }, { "epoch": 3.3122706889523035, "grad_norm": 2.820712089538574, "learning_rate": 5.861020519092269e-05, "loss": 2.9127, "step": 48750 }, { "epoch": 3.312610409022965, "grad_norm": 2.694552421569824, "learning_rate": 5.860595869003941e-05, "loss": 2.5147, "step": 48755 }, { "epoch": 3.3129501290936267, "grad_norm": 2.425222873687744, "learning_rate": 5.860171218915613e-05, "loss": 2.5988, "step": 48760 }, { "epoch": 3.313289849164289, "grad_norm": 2.8267147541046143, "learning_rate": 5.859746568827287e-05, "loss": 2.6605, "step": 48765 }, { "epoch": 3.3136295692349504, "grad_norm": 3.108240842819214, "learning_rate": 5.859321918738959e-05, "loss": 2.399, "step": 48770 }, { "epoch": 3.313969289305612, "grad_norm": 2.446183204650879, "learning_rate": 5.858897268650633e-05, "loss": 2.5611, "step": 48775 }, { "epoch": 3.3143090093762737, "grad_norm": 3.231043815612793, "learning_rate": 5.858472618562305e-05, "loss": 2.5286, "step": 48780 }, { "epoch": 3.314648729446936, "grad_norm": 2.9186177253723145, "learning_rate": 5.8580479684739776e-05, "loss": 2.5972, "step": 48785 }, { "epoch": 3.3149884495175974, "grad_norm": 2.94924259185791, "learning_rate": 5.857623318385651e-05, "loss": 2.7945, "step": 48790 }, { "epoch": 3.315328169588259, "grad_norm": 2.4969959259033203, "learning_rate": 5.857198668297323e-05, "loss": 2.7363, "step": 48795 }, { "epoch": 3.315667889658921, "grad_norm": 2.5729854106903076, "learning_rate": 5.856774018208996e-05, "loss": 2.6471, "step": 48800 }, { "epoch": 3.3160076097295828, "grad_norm": 3.0090482234954834, "learning_rate": 5.8563493681206695e-05, "loss": 2.6808, "step": 48805 }, { "epoch": 3.3163473298002444, "grad_norm": 2.4568679332733154, "learning_rate": 5.8559247180323416e-05, "loss": 2.5641, "step": 48810 }, { "epoch": 3.3166870498709065, "grad_norm": 3.2302839756011963, "learning_rate": 5.8555000679440144e-05, "loss": 2.5569, "step": 48815 }, { "epoch": 3.317026769941568, "grad_norm": 3.603423595428467, "learning_rate": 5.855075417855688e-05, "loss": 2.5637, "step": 48820 }, { "epoch": 3.3173664900122297, "grad_norm": 2.8666603565216064, "learning_rate": 5.85465076776736e-05, "loss": 2.7008, "step": 48825 }, { "epoch": 3.317706210082892, "grad_norm": 2.5622143745422363, "learning_rate": 5.854226117679033e-05, "loss": 2.5537, "step": 48830 }, { "epoch": 3.3180459301535534, "grad_norm": 2.1393260955810547, "learning_rate": 5.853801467590706e-05, "loss": 2.9338, "step": 48835 }, { "epoch": 3.318385650224215, "grad_norm": 2.6130690574645996, "learning_rate": 5.8533768175023784e-05, "loss": 2.7037, "step": 48840 }, { "epoch": 3.318725370294877, "grad_norm": 2.7171480655670166, "learning_rate": 5.8529521674140506e-05, "loss": 2.853, "step": 48845 }, { "epoch": 3.3190650903655388, "grad_norm": 2.451345205307007, "learning_rate": 5.852527517325724e-05, "loss": 2.3786, "step": 48850 }, { "epoch": 3.3194048104362004, "grad_norm": 2.5791518688201904, "learning_rate": 5.852102867237397e-05, "loss": 2.7813, "step": 48855 }, { "epoch": 3.3197445305068625, "grad_norm": 2.887070655822754, "learning_rate": 5.851678217149069e-05, "loss": 2.3928, "step": 48860 }, { "epoch": 3.320084250577524, "grad_norm": 3.8464972972869873, "learning_rate": 5.8512535670607424e-05, "loss": 2.6732, "step": 48865 }, { "epoch": 3.3204239706481857, "grad_norm": 2.8620152473449707, "learning_rate": 5.850828916972415e-05, "loss": 2.8419, "step": 48870 }, { "epoch": 3.320763690718848, "grad_norm": 2.6543989181518555, "learning_rate": 5.8504042668840874e-05, "loss": 2.4594, "step": 48875 }, { "epoch": 3.3211034107895094, "grad_norm": 2.929872989654541, "learning_rate": 5.849979616795761e-05, "loss": 2.6069, "step": 48880 }, { "epoch": 3.321443130860171, "grad_norm": 3.515226125717163, "learning_rate": 5.8495549667074336e-05, "loss": 2.5269, "step": 48885 }, { "epoch": 3.321782850930833, "grad_norm": 3.039562702178955, "learning_rate": 5.849130316619106e-05, "loss": 2.7344, "step": 48890 }, { "epoch": 3.3221225710014948, "grad_norm": 2.9268157482147217, "learning_rate": 5.848705666530779e-05, "loss": 2.7336, "step": 48895 }, { "epoch": 3.3224622910721564, "grad_norm": 2.391961097717285, "learning_rate": 5.848281016442452e-05, "loss": 2.7141, "step": 48900 }, { "epoch": 3.3228020111428185, "grad_norm": 3.3546721935272217, "learning_rate": 5.847856366354124e-05, "loss": 2.5892, "step": 48905 }, { "epoch": 3.32314173121348, "grad_norm": 2.3554396629333496, "learning_rate": 5.8474317162657976e-05, "loss": 2.9127, "step": 48910 }, { "epoch": 3.3234814512841417, "grad_norm": 3.4112977981567383, "learning_rate": 5.84700706617747e-05, "loss": 2.6191, "step": 48915 }, { "epoch": 3.323821171354804, "grad_norm": 3.5169098377227783, "learning_rate": 5.8465824160891426e-05, "loss": 2.5496, "step": 48920 }, { "epoch": 3.3241608914254654, "grad_norm": 3.151120185852051, "learning_rate": 5.846157766000816e-05, "loss": 2.721, "step": 48925 }, { "epoch": 3.324500611496127, "grad_norm": 2.863374710083008, "learning_rate": 5.845733115912488e-05, "loss": 2.577, "step": 48930 }, { "epoch": 3.324840331566789, "grad_norm": 2.715941905975342, "learning_rate": 5.845308465824161e-05, "loss": 2.6897, "step": 48935 }, { "epoch": 3.325180051637451, "grad_norm": 2.6981709003448486, "learning_rate": 5.8448838157358344e-05, "loss": 2.6448, "step": 48940 }, { "epoch": 3.3255197717081124, "grad_norm": 2.3191311359405518, "learning_rate": 5.8444591656475066e-05, "loss": 2.7063, "step": 48945 }, { "epoch": 3.3258594917787745, "grad_norm": 3.6513760089874268, "learning_rate": 5.8440345155591794e-05, "loss": 2.8699, "step": 48950 }, { "epoch": 3.326199211849436, "grad_norm": 3.0109074115753174, "learning_rate": 5.843609865470853e-05, "loss": 2.6771, "step": 48955 }, { "epoch": 3.3265389319200978, "grad_norm": 2.94903302192688, "learning_rate": 5.843185215382525e-05, "loss": 2.6266, "step": 48960 }, { "epoch": 3.32687865199076, "grad_norm": 2.887619733810425, "learning_rate": 5.842760565294198e-05, "loss": 2.9329, "step": 48965 }, { "epoch": 3.3272183720614215, "grad_norm": 3.033759593963623, "learning_rate": 5.842335915205871e-05, "loss": 2.6764, "step": 48970 }, { "epoch": 3.327558092132083, "grad_norm": 3.045762062072754, "learning_rate": 5.8419112651175434e-05, "loss": 2.4487, "step": 48975 }, { "epoch": 3.327897812202745, "grad_norm": 2.6762218475341797, "learning_rate": 5.8414866150292155e-05, "loss": 2.3902, "step": 48980 }, { "epoch": 3.328237532273407, "grad_norm": 2.6471920013427734, "learning_rate": 5.841061964940889e-05, "loss": 2.5738, "step": 48985 }, { "epoch": 3.3285772523440684, "grad_norm": 4.224062442779541, "learning_rate": 5.840637314852562e-05, "loss": 2.418, "step": 48990 }, { "epoch": 3.3289169724147305, "grad_norm": 3.1231632232666016, "learning_rate": 5.840212664764234e-05, "loss": 2.5876, "step": 48995 }, { "epoch": 3.329256692485392, "grad_norm": 3.0738091468811035, "learning_rate": 5.8397880146759074e-05, "loss": 2.7087, "step": 49000 }, { "epoch": 3.3295964125560538, "grad_norm": 3.3641135692596436, "learning_rate": 5.83936336458758e-05, "loss": 2.6004, "step": 49005 }, { "epoch": 3.3299361326267154, "grad_norm": 3.257779359817505, "learning_rate": 5.838938714499252e-05, "loss": 2.6426, "step": 49010 }, { "epoch": 3.3302758526973775, "grad_norm": 3.0804808139801025, "learning_rate": 5.838514064410926e-05, "loss": 2.473, "step": 49015 }, { "epoch": 3.330615572768039, "grad_norm": 3.166154146194458, "learning_rate": 5.8380894143225986e-05, "loss": 2.6435, "step": 49020 }, { "epoch": 3.3309552928387007, "grad_norm": 3.529520034790039, "learning_rate": 5.837664764234271e-05, "loss": 2.8687, "step": 49025 }, { "epoch": 3.331295012909363, "grad_norm": 3.5093724727630615, "learning_rate": 5.837240114145944e-05, "loss": 2.7945, "step": 49030 }, { "epoch": 3.3316347329800244, "grad_norm": 3.2521026134490967, "learning_rate": 5.836815464057617e-05, "loss": 2.7782, "step": 49035 }, { "epoch": 3.331974453050686, "grad_norm": 2.689807415008545, "learning_rate": 5.836390813969289e-05, "loss": 2.9997, "step": 49040 }, { "epoch": 3.332314173121348, "grad_norm": 2.4162654876708984, "learning_rate": 5.8359661638809626e-05, "loss": 2.5214, "step": 49045 }, { "epoch": 3.3326538931920098, "grad_norm": 3.0956497192382812, "learning_rate": 5.835541513792635e-05, "loss": 2.7831, "step": 49050 }, { "epoch": 3.3329936132626714, "grad_norm": 2.2428841590881348, "learning_rate": 5.8351168637043075e-05, "loss": 2.6306, "step": 49055 }, { "epoch": 3.3333333333333335, "grad_norm": 2.8608782291412354, "learning_rate": 5.834692213615981e-05, "loss": 2.7122, "step": 49060 }, { "epoch": 3.333673053403995, "grad_norm": 2.9111006259918213, "learning_rate": 5.834267563527653e-05, "loss": 2.258, "step": 49065 }, { "epoch": 3.3340127734746567, "grad_norm": 3.106304883956909, "learning_rate": 5.833842913439326e-05, "loss": 2.4716, "step": 49070 }, { "epoch": 3.334352493545319, "grad_norm": 2.7428324222564697, "learning_rate": 5.8334182633509994e-05, "loss": 2.6095, "step": 49075 }, { "epoch": 3.3346922136159804, "grad_norm": 2.9663283824920654, "learning_rate": 5.8329936132626715e-05, "loss": 2.7297, "step": 49080 }, { "epoch": 3.335031933686642, "grad_norm": 2.3337273597717285, "learning_rate": 5.832568963174344e-05, "loss": 2.6218, "step": 49085 }, { "epoch": 3.335371653757304, "grad_norm": 2.6962480545043945, "learning_rate": 5.832144313086018e-05, "loss": 2.5354, "step": 49090 }, { "epoch": 3.335711373827966, "grad_norm": 2.8693203926086426, "learning_rate": 5.83171966299769e-05, "loss": 2.7813, "step": 49095 }, { "epoch": 3.3360510938986274, "grad_norm": 2.680664539337158, "learning_rate": 5.831295012909363e-05, "loss": 2.6734, "step": 49100 }, { "epoch": 3.3363908139692895, "grad_norm": 3.218064785003662, "learning_rate": 5.830870362821036e-05, "loss": 2.5298, "step": 49105 }, { "epoch": 3.336730534039951, "grad_norm": 2.579939842224121, "learning_rate": 5.830445712732708e-05, "loss": 2.942, "step": 49110 }, { "epoch": 3.3370702541106128, "grad_norm": 2.762768268585205, "learning_rate": 5.830021062644382e-05, "loss": 2.393, "step": 49115 }, { "epoch": 3.3374099741812744, "grad_norm": 2.833437919616699, "learning_rate": 5.8295964125560546e-05, "loss": 2.5672, "step": 49120 }, { "epoch": 3.3377496942519365, "grad_norm": 2.9584767818450928, "learning_rate": 5.829171762467727e-05, "loss": 2.6577, "step": 49125 }, { "epoch": 3.338089414322598, "grad_norm": 3.102783679962158, "learning_rate": 5.8287471123794e-05, "loss": 2.6388, "step": 49130 }, { "epoch": 3.3384291343932597, "grad_norm": 2.5110108852386475, "learning_rate": 5.828322462291072e-05, "loss": 2.6268, "step": 49135 }, { "epoch": 3.338768854463922, "grad_norm": 3.256730318069458, "learning_rate": 5.827897812202745e-05, "loss": 2.9355, "step": 49140 }, { "epoch": 3.3391085745345834, "grad_norm": 3.620572805404663, "learning_rate": 5.8274731621144186e-05, "loss": 2.7559, "step": 49145 }, { "epoch": 3.339448294605245, "grad_norm": 2.313680648803711, "learning_rate": 5.827048512026091e-05, "loss": 2.5925, "step": 49150 }, { "epoch": 3.339788014675907, "grad_norm": 3.028668165206909, "learning_rate": 5.8266238619377635e-05, "loss": 2.5035, "step": 49155 }, { "epoch": 3.3401277347465688, "grad_norm": 2.950040102005005, "learning_rate": 5.826199211849437e-05, "loss": 2.6291, "step": 49160 }, { "epoch": 3.3404674548172304, "grad_norm": 3.0675814151763916, "learning_rate": 5.825774561761109e-05, "loss": 2.568, "step": 49165 }, { "epoch": 3.3408071748878925, "grad_norm": 2.701056480407715, "learning_rate": 5.825349911672782e-05, "loss": 2.6339, "step": 49170 }, { "epoch": 3.341146894958554, "grad_norm": 3.4437077045440674, "learning_rate": 5.8249252615844554e-05, "loss": 2.5866, "step": 49175 }, { "epoch": 3.3414866150292157, "grad_norm": 2.9771413803100586, "learning_rate": 5.8245006114961275e-05, "loss": 2.592, "step": 49180 }, { "epoch": 3.341826335099878, "grad_norm": 2.7614078521728516, "learning_rate": 5.8240759614077997e-05, "loss": 2.2505, "step": 49185 }, { "epoch": 3.3421660551705394, "grad_norm": 2.392564296722412, "learning_rate": 5.823651311319474e-05, "loss": 2.6024, "step": 49190 }, { "epoch": 3.342505775241201, "grad_norm": 2.8367559909820557, "learning_rate": 5.823226661231146e-05, "loss": 2.4817, "step": 49195 }, { "epoch": 3.342845495311863, "grad_norm": 3.6189651489257812, "learning_rate": 5.822802011142818e-05, "loss": 2.6341, "step": 49200 }, { "epoch": 3.3431852153825248, "grad_norm": 3.1288065910339355, "learning_rate": 5.8223773610544915e-05, "loss": 2.9131, "step": 49205 }, { "epoch": 3.3435249354531864, "grad_norm": 3.627847909927368, "learning_rate": 5.821952710966164e-05, "loss": 2.7732, "step": 49210 }, { "epoch": 3.3438646555238485, "grad_norm": 3.669844150543213, "learning_rate": 5.8215280608778365e-05, "loss": 2.5735, "step": 49215 }, { "epoch": 3.34420437559451, "grad_norm": 2.815056324005127, "learning_rate": 5.82110341078951e-05, "loss": 2.5126, "step": 49220 }, { "epoch": 3.3445440956651717, "grad_norm": 3.041527271270752, "learning_rate": 5.820678760701183e-05, "loss": 2.6165, "step": 49225 }, { "epoch": 3.344883815735834, "grad_norm": 2.9578757286071777, "learning_rate": 5.820254110612855e-05, "loss": 2.7525, "step": 49230 }, { "epoch": 3.3452235358064955, "grad_norm": 2.2051401138305664, "learning_rate": 5.819829460524528e-05, "loss": 2.4427, "step": 49235 }, { "epoch": 3.345563255877157, "grad_norm": 2.6765942573547363, "learning_rate": 5.819404810436201e-05, "loss": 2.6829, "step": 49240 }, { "epoch": 3.345902975947819, "grad_norm": 2.618868112564087, "learning_rate": 5.818980160347873e-05, "loss": 2.6127, "step": 49245 }, { "epoch": 3.346242696018481, "grad_norm": 3.7703309059143066, "learning_rate": 5.818555510259547e-05, "loss": 2.7188, "step": 49250 }, { "epoch": 3.3465824160891424, "grad_norm": 3.474940061569214, "learning_rate": 5.8181308601712195e-05, "loss": 2.703, "step": 49255 }, { "epoch": 3.3469221361598045, "grad_norm": 2.7042455673217773, "learning_rate": 5.817706210082892e-05, "loss": 2.6962, "step": 49260 }, { "epoch": 3.347261856230466, "grad_norm": 3.6290714740753174, "learning_rate": 5.817281559994565e-05, "loss": 2.7122, "step": 49265 }, { "epoch": 3.3476015763011278, "grad_norm": 3.255074977874756, "learning_rate": 5.816856909906237e-05, "loss": 2.9043, "step": 49270 }, { "epoch": 3.34794129637179, "grad_norm": 2.51712965965271, "learning_rate": 5.81643225981791e-05, "loss": 2.7841, "step": 49275 }, { "epoch": 3.3482810164424515, "grad_norm": 3.1697425842285156, "learning_rate": 5.8160076097295835e-05, "loss": 2.7359, "step": 49280 }, { "epoch": 3.348620736513113, "grad_norm": 2.54496169090271, "learning_rate": 5.815582959641256e-05, "loss": 2.7903, "step": 49285 }, { "epoch": 3.348960456583775, "grad_norm": 2.7105305194854736, "learning_rate": 5.8151583095529285e-05, "loss": 2.7651, "step": 49290 }, { "epoch": 3.349300176654437, "grad_norm": 2.5399835109710693, "learning_rate": 5.814733659464602e-05, "loss": 2.6541, "step": 49295 }, { "epoch": 3.3496398967250984, "grad_norm": 2.5251853466033936, "learning_rate": 5.814309009376274e-05, "loss": 2.5668, "step": 49300 }, { "epoch": 3.3499796167957605, "grad_norm": 3.0604867935180664, "learning_rate": 5.813884359287947e-05, "loss": 2.4033, "step": 49305 }, { "epoch": 3.350319336866422, "grad_norm": 3.126925468444824, "learning_rate": 5.8134597091996204e-05, "loss": 2.5094, "step": 49310 }, { "epoch": 3.3506590569370838, "grad_norm": 2.9345223903656006, "learning_rate": 5.8130350591112925e-05, "loss": 2.7797, "step": 49315 }, { "epoch": 3.350998777007746, "grad_norm": 2.8518590927124023, "learning_rate": 5.812610409022965e-05, "loss": 2.8126, "step": 49320 }, { "epoch": 3.3513384970784075, "grad_norm": 2.5982890129089355, "learning_rate": 5.812185758934639e-05, "loss": 2.5552, "step": 49325 }, { "epoch": 3.351678217149069, "grad_norm": 2.7866971492767334, "learning_rate": 5.811761108846311e-05, "loss": 2.5643, "step": 49330 }, { "epoch": 3.352017937219731, "grad_norm": 2.786322832107544, "learning_rate": 5.811336458757983e-05, "loss": 2.5875, "step": 49335 }, { "epoch": 3.352357657290393, "grad_norm": 2.5442113876342773, "learning_rate": 5.8109118086696565e-05, "loss": 2.6784, "step": 49340 }, { "epoch": 3.3526973773610544, "grad_norm": 3.4166195392608643, "learning_rate": 5.810487158581329e-05, "loss": 2.5499, "step": 49345 }, { "epoch": 3.353037097431716, "grad_norm": 2.7850265502929688, "learning_rate": 5.8100625084930014e-05, "loss": 2.5745, "step": 49350 }, { "epoch": 3.353376817502378, "grad_norm": 2.7927029132843018, "learning_rate": 5.809637858404675e-05, "loss": 2.3962, "step": 49355 }, { "epoch": 3.35371653757304, "grad_norm": 3.1698217391967773, "learning_rate": 5.809213208316348e-05, "loss": 2.5019, "step": 49360 }, { "epoch": 3.3540562576437014, "grad_norm": 2.9058828353881836, "learning_rate": 5.80878855822802e-05, "loss": 2.6938, "step": 49365 }, { "epoch": 3.3543959777143635, "grad_norm": 2.4002864360809326, "learning_rate": 5.808363908139693e-05, "loss": 2.5666, "step": 49370 }, { "epoch": 3.354735697785025, "grad_norm": 2.9429261684417725, "learning_rate": 5.807939258051366e-05, "loss": 2.4773, "step": 49375 }, { "epoch": 3.3550754178556867, "grad_norm": 2.467482566833496, "learning_rate": 5.807514607963038e-05, "loss": 2.5223, "step": 49380 }, { "epoch": 3.355415137926349, "grad_norm": 4.007264614105225, "learning_rate": 5.807089957874712e-05, "loss": 2.6292, "step": 49385 }, { "epoch": 3.3557548579970105, "grad_norm": 2.4284827709198, "learning_rate": 5.8066653077863845e-05, "loss": 2.5243, "step": 49390 }, { "epoch": 3.356094578067672, "grad_norm": 2.662198305130005, "learning_rate": 5.8062406576980566e-05, "loss": 2.8009, "step": 49395 }, { "epoch": 3.356434298138334, "grad_norm": 3.105346918106079, "learning_rate": 5.80581600760973e-05, "loss": 2.6657, "step": 49400 }, { "epoch": 3.356774018208996, "grad_norm": 2.6072793006896973, "learning_rate": 5.805391357521402e-05, "loss": 2.4585, "step": 49405 }, { "epoch": 3.3571137382796574, "grad_norm": 3.010219097137451, "learning_rate": 5.804966707433075e-05, "loss": 2.9686, "step": 49410 }, { "epoch": 3.3574534583503195, "grad_norm": 2.908656120300293, "learning_rate": 5.8045420573447485e-05, "loss": 2.3673, "step": 49415 }, { "epoch": 3.357793178420981, "grad_norm": 2.5763936042785645, "learning_rate": 5.8041174072564206e-05, "loss": 2.5116, "step": 49420 }, { "epoch": 3.3581328984916428, "grad_norm": 2.6269562244415283, "learning_rate": 5.8036927571680934e-05, "loss": 2.826, "step": 49425 }, { "epoch": 3.358472618562305, "grad_norm": 2.4541683197021484, "learning_rate": 5.803268107079767e-05, "loss": 2.5724, "step": 49430 }, { "epoch": 3.3588123386329665, "grad_norm": 2.461139678955078, "learning_rate": 5.802843456991439e-05, "loss": 2.2342, "step": 49435 }, { "epoch": 3.359152058703628, "grad_norm": 3.072265625, "learning_rate": 5.802418806903112e-05, "loss": 2.2876, "step": 49440 }, { "epoch": 3.35949177877429, "grad_norm": 2.569561719894409, "learning_rate": 5.801994156814785e-05, "loss": 2.4161, "step": 49445 }, { "epoch": 3.359831498844952, "grad_norm": 2.018080949783325, "learning_rate": 5.8015695067264574e-05, "loss": 2.3958, "step": 49450 }, { "epoch": 3.3601712189156134, "grad_norm": 2.906526565551758, "learning_rate": 5.801144856638131e-05, "loss": 2.9528, "step": 49455 }, { "epoch": 3.360510938986275, "grad_norm": 2.5698728561401367, "learning_rate": 5.800720206549804e-05, "loss": 2.6339, "step": 49460 }, { "epoch": 3.360850659056937, "grad_norm": 4.096144199371338, "learning_rate": 5.800295556461476e-05, "loss": 2.4382, "step": 49465 }, { "epoch": 3.3611903791275988, "grad_norm": 3.242069721221924, "learning_rate": 5.799870906373149e-05, "loss": 2.5848, "step": 49470 }, { "epoch": 3.3615300991982604, "grad_norm": 2.700396776199341, "learning_rate": 5.7994462562848214e-05, "loss": 2.6905, "step": 49475 }, { "epoch": 3.3618698192689225, "grad_norm": 3.252281904220581, "learning_rate": 5.799021606196494e-05, "loss": 2.6783, "step": 49480 }, { "epoch": 3.362209539339584, "grad_norm": 2.751518964767456, "learning_rate": 5.798596956108168e-05, "loss": 2.5333, "step": 49485 }, { "epoch": 3.3625492594102457, "grad_norm": 3.2180004119873047, "learning_rate": 5.79817230601984e-05, "loss": 2.8054, "step": 49490 }, { "epoch": 3.362888979480908, "grad_norm": 3.4425437450408936, "learning_rate": 5.7977476559315126e-05, "loss": 2.7067, "step": 49495 }, { "epoch": 3.3632286995515694, "grad_norm": 3.0046613216400146, "learning_rate": 5.797323005843186e-05, "loss": 2.5731, "step": 49500 }, { "epoch": 3.363568419622231, "grad_norm": 2.450153350830078, "learning_rate": 5.796898355754858e-05, "loss": 2.4925, "step": 49505 }, { "epoch": 3.363908139692893, "grad_norm": 2.694035291671753, "learning_rate": 5.796473705666531e-05, "loss": 2.5491, "step": 49510 }, { "epoch": 3.364247859763555, "grad_norm": 3.017169237136841, "learning_rate": 5.7960490555782045e-05, "loss": 2.9629, "step": 49515 }, { "epoch": 3.3645875798342164, "grad_norm": 3.2886581420898438, "learning_rate": 5.7956244054898766e-05, "loss": 2.4734, "step": 49520 }, { "epoch": 3.3649272999048785, "grad_norm": 3.386265277862549, "learning_rate": 5.7951997554015494e-05, "loss": 2.6513, "step": 49525 }, { "epoch": 3.36526701997554, "grad_norm": 3.2496118545532227, "learning_rate": 5.794775105313223e-05, "loss": 2.7141, "step": 49530 }, { "epoch": 3.3656067400462018, "grad_norm": 3.3022773265838623, "learning_rate": 5.794350455224895e-05, "loss": 2.7682, "step": 49535 }, { "epoch": 3.365946460116864, "grad_norm": 2.811795234680176, "learning_rate": 5.793925805136567e-05, "loss": 2.8391, "step": 49540 }, { "epoch": 3.3662861801875255, "grad_norm": 3.306130886077881, "learning_rate": 5.793501155048241e-05, "loss": 2.6624, "step": 49545 }, { "epoch": 3.366625900258187, "grad_norm": 2.369394302368164, "learning_rate": 5.7930765049599134e-05, "loss": 2.8036, "step": 49550 }, { "epoch": 3.366965620328849, "grad_norm": 2.525630235671997, "learning_rate": 5.7926518548715856e-05, "loss": 2.7626, "step": 49555 }, { "epoch": 3.367305340399511, "grad_norm": 3.401448965072632, "learning_rate": 5.792227204783259e-05, "loss": 2.3447, "step": 49560 }, { "epoch": 3.3676450604701724, "grad_norm": 3.6087889671325684, "learning_rate": 5.791802554694932e-05, "loss": 2.5139, "step": 49565 }, { "epoch": 3.3679847805408345, "grad_norm": 3.187155246734619, "learning_rate": 5.791377904606604e-05, "loss": 2.6187, "step": 49570 }, { "epoch": 3.368324500611496, "grad_norm": 2.477323532104492, "learning_rate": 5.7909532545182774e-05, "loss": 2.4734, "step": 49575 }, { "epoch": 3.3686642206821578, "grad_norm": 3.187922477722168, "learning_rate": 5.79052860442995e-05, "loss": 2.6809, "step": 49580 }, { "epoch": 3.36900394075282, "grad_norm": 3.2568752765655518, "learning_rate": 5.7901039543416224e-05, "loss": 2.3924, "step": 49585 }, { "epoch": 3.3693436608234815, "grad_norm": 2.1432132720947266, "learning_rate": 5.789679304253296e-05, "loss": 2.6607, "step": 49590 }, { "epoch": 3.369683380894143, "grad_norm": 3.3766181468963623, "learning_rate": 5.7892546541649686e-05, "loss": 2.6216, "step": 49595 }, { "epoch": 3.370023100964805, "grad_norm": 3.1420822143554688, "learning_rate": 5.788830004076641e-05, "loss": 2.641, "step": 49600 }, { "epoch": 3.370362821035467, "grad_norm": 2.6370792388916016, "learning_rate": 5.788405353988314e-05, "loss": 2.7054, "step": 49605 }, { "epoch": 3.3707025411061284, "grad_norm": 2.964106321334839, "learning_rate": 5.7879807038999864e-05, "loss": 2.7615, "step": 49610 }, { "epoch": 3.3710422611767905, "grad_norm": 2.395554780960083, "learning_rate": 5.787556053811659e-05, "loss": 2.5228, "step": 49615 }, { "epoch": 3.371381981247452, "grad_norm": 2.59548282623291, "learning_rate": 5.7871314037233326e-05, "loss": 2.5345, "step": 49620 }, { "epoch": 3.3717217013181138, "grad_norm": 3.9454970359802246, "learning_rate": 5.786706753635005e-05, "loss": 2.534, "step": 49625 }, { "epoch": 3.372061421388776, "grad_norm": 3.626347303390503, "learning_rate": 5.7862821035466776e-05, "loss": 2.7645, "step": 49630 }, { "epoch": 3.3724011414594375, "grad_norm": 3.3168342113494873, "learning_rate": 5.785857453458351e-05, "loss": 2.5851, "step": 49635 }, { "epoch": 3.372740861530099, "grad_norm": 2.580997943878174, "learning_rate": 5.785432803370023e-05, "loss": 2.6121, "step": 49640 }, { "epoch": 3.373080581600761, "grad_norm": 2.6847941875457764, "learning_rate": 5.785008153281696e-05, "loss": 2.4783, "step": 49645 }, { "epoch": 3.373420301671423, "grad_norm": 2.3812882900238037, "learning_rate": 5.7845835031933694e-05, "loss": 2.6686, "step": 49650 }, { "epoch": 3.3737600217420844, "grad_norm": 2.9046339988708496, "learning_rate": 5.7841588531050416e-05, "loss": 2.8484, "step": 49655 }, { "epoch": 3.3740997418127465, "grad_norm": 3.2626547813415527, "learning_rate": 5.7837342030167144e-05, "loss": 2.4056, "step": 49660 }, { "epoch": 3.374439461883408, "grad_norm": 3.0814919471740723, "learning_rate": 5.783309552928388e-05, "loss": 2.565, "step": 49665 }, { "epoch": 3.37477918195407, "grad_norm": 2.5083820819854736, "learning_rate": 5.78288490284006e-05, "loss": 2.7249, "step": 49670 }, { "epoch": 3.375118902024732, "grad_norm": 2.3871703147888184, "learning_rate": 5.782460252751732e-05, "loss": 2.4951, "step": 49675 }, { "epoch": 3.3754586220953935, "grad_norm": 3.1951260566711426, "learning_rate": 5.782035602663406e-05, "loss": 2.5205, "step": 49680 }, { "epoch": 3.375798342166055, "grad_norm": 2.429542064666748, "learning_rate": 5.7816109525750784e-05, "loss": 2.6502, "step": 49685 }, { "epoch": 3.3761380622367168, "grad_norm": 2.787264108657837, "learning_rate": 5.7811863024867505e-05, "loss": 2.4596, "step": 49690 }, { "epoch": 3.376477782307379, "grad_norm": 3.185837507247925, "learning_rate": 5.780761652398424e-05, "loss": 2.8776, "step": 49695 }, { "epoch": 3.3768175023780405, "grad_norm": 2.9520273208618164, "learning_rate": 5.780337002310097e-05, "loss": 2.6819, "step": 49700 }, { "epoch": 3.377157222448702, "grad_norm": 2.7411704063415527, "learning_rate": 5.779912352221769e-05, "loss": 2.685, "step": 49705 }, { "epoch": 3.377496942519364, "grad_norm": 2.9800021648406982, "learning_rate": 5.7794877021334424e-05, "loss": 2.5805, "step": 49710 }, { "epoch": 3.377836662590026, "grad_norm": 2.3130083084106445, "learning_rate": 5.779063052045115e-05, "loss": 2.5228, "step": 49715 }, { "epoch": 3.3781763826606874, "grad_norm": 2.415134906768799, "learning_rate": 5.778638401956787e-05, "loss": 2.7289, "step": 49720 }, { "epoch": 3.3785161027313495, "grad_norm": 3.108112096786499, "learning_rate": 5.778213751868461e-05, "loss": 2.4701, "step": 49725 }, { "epoch": 3.378855822802011, "grad_norm": 2.852532386779785, "learning_rate": 5.7777891017801336e-05, "loss": 2.701, "step": 49730 }, { "epoch": 3.3791955428726728, "grad_norm": 3.767781972885132, "learning_rate": 5.777364451691806e-05, "loss": 2.5354, "step": 49735 }, { "epoch": 3.379535262943335, "grad_norm": 3.3251235485076904, "learning_rate": 5.776939801603479e-05, "loss": 2.5738, "step": 49740 }, { "epoch": 3.3798749830139965, "grad_norm": 3.334869623184204, "learning_rate": 5.776515151515152e-05, "loss": 2.7205, "step": 49745 }, { "epoch": 3.380214703084658, "grad_norm": 2.866974353790283, "learning_rate": 5.776090501426824e-05, "loss": 2.5816, "step": 49750 }, { "epoch": 3.38055442315532, "grad_norm": 3.2689104080200195, "learning_rate": 5.7756658513384976e-05, "loss": 2.7133, "step": 49755 }, { "epoch": 3.380894143225982, "grad_norm": 3.0612692832946777, "learning_rate": 5.77524120125017e-05, "loss": 2.3028, "step": 49760 }, { "epoch": 3.3812338632966434, "grad_norm": 2.709345579147339, "learning_rate": 5.7748165511618425e-05, "loss": 2.7171, "step": 49765 }, { "epoch": 3.3815735833673055, "grad_norm": 2.416746139526367, "learning_rate": 5.774391901073516e-05, "loss": 2.5783, "step": 49770 }, { "epoch": 3.381913303437967, "grad_norm": 2.648409605026245, "learning_rate": 5.773967250985188e-05, "loss": 2.6922, "step": 49775 }, { "epoch": 3.3822530235086288, "grad_norm": 3.254093885421753, "learning_rate": 5.773542600896861e-05, "loss": 2.725, "step": 49780 }, { "epoch": 3.382592743579291, "grad_norm": 2.405228853225708, "learning_rate": 5.7731179508085344e-05, "loss": 2.6575, "step": 49785 }, { "epoch": 3.3829324636499525, "grad_norm": 2.1902918815612793, "learning_rate": 5.7726933007202065e-05, "loss": 2.6164, "step": 49790 }, { "epoch": 3.383272183720614, "grad_norm": 2.8036744594573975, "learning_rate": 5.77226865063188e-05, "loss": 2.7449, "step": 49795 }, { "epoch": 3.3836119037912757, "grad_norm": 2.7830140590667725, "learning_rate": 5.771844000543553e-05, "loss": 2.6619, "step": 49800 }, { "epoch": 3.383951623861938, "grad_norm": 2.219872236251831, "learning_rate": 5.771419350455225e-05, "loss": 2.3945, "step": 49805 }, { "epoch": 3.3842913439325994, "grad_norm": 3.6246018409729004, "learning_rate": 5.7709947003668984e-05, "loss": 2.8289, "step": 49810 }, { "epoch": 3.384631064003261, "grad_norm": 2.7521393299102783, "learning_rate": 5.770570050278571e-05, "loss": 2.7643, "step": 49815 }, { "epoch": 3.384970784073923, "grad_norm": 2.6415233612060547, "learning_rate": 5.770145400190243e-05, "loss": 2.5169, "step": 49820 }, { "epoch": 3.385310504144585, "grad_norm": 2.746262550354004, "learning_rate": 5.769720750101917e-05, "loss": 2.7704, "step": 49825 }, { "epoch": 3.3856502242152464, "grad_norm": 2.6909642219543457, "learning_rate": 5.769296100013589e-05, "loss": 2.6191, "step": 49830 }, { "epoch": 3.3859899442859085, "grad_norm": 3.394339084625244, "learning_rate": 5.768871449925262e-05, "loss": 2.9335, "step": 49835 }, { "epoch": 3.38632966435657, "grad_norm": 2.502397298812866, "learning_rate": 5.768446799836935e-05, "loss": 2.5248, "step": 49840 }, { "epoch": 3.3866693844272318, "grad_norm": 2.89192533493042, "learning_rate": 5.768022149748607e-05, "loss": 2.5263, "step": 49845 }, { "epoch": 3.387009104497894, "grad_norm": 2.684067487716675, "learning_rate": 5.76759749966028e-05, "loss": 2.8893, "step": 49850 }, { "epoch": 3.3873488245685555, "grad_norm": 2.9330365657806396, "learning_rate": 5.7671728495719536e-05, "loss": 2.7086, "step": 49855 }, { "epoch": 3.387688544639217, "grad_norm": 3.347424268722534, "learning_rate": 5.766748199483626e-05, "loss": 2.7238, "step": 49860 }, { "epoch": 3.388028264709879, "grad_norm": 2.854808807373047, "learning_rate": 5.7663235493952985e-05, "loss": 2.5887, "step": 49865 }, { "epoch": 3.388367984780541, "grad_norm": 2.71651029586792, "learning_rate": 5.765898899306972e-05, "loss": 2.7597, "step": 49870 }, { "epoch": 3.3887077048512024, "grad_norm": 3.309288263320923, "learning_rate": 5.765474249218644e-05, "loss": 2.8948, "step": 49875 }, { "epoch": 3.3890474249218645, "grad_norm": 3.1740097999572754, "learning_rate": 5.765049599130317e-05, "loss": 2.6101, "step": 49880 }, { "epoch": 3.389387144992526, "grad_norm": 2.9965102672576904, "learning_rate": 5.7646249490419904e-05, "loss": 2.6054, "step": 49885 }, { "epoch": 3.3897268650631878, "grad_norm": 3.460183620452881, "learning_rate": 5.7642002989536625e-05, "loss": 2.5276, "step": 49890 }, { "epoch": 3.39006658513385, "grad_norm": 2.493967056274414, "learning_rate": 5.7637756488653347e-05, "loss": 2.7725, "step": 49895 }, { "epoch": 3.3904063052045115, "grad_norm": 3.019871473312378, "learning_rate": 5.763350998777008e-05, "loss": 2.7797, "step": 49900 }, { "epoch": 3.390746025275173, "grad_norm": 2.97623610496521, "learning_rate": 5.762926348688681e-05, "loss": 2.5325, "step": 49905 }, { "epoch": 3.391085745345835, "grad_norm": 3.091357946395874, "learning_rate": 5.762501698600353e-05, "loss": 2.565, "step": 49910 }, { "epoch": 3.391425465416497, "grad_norm": 3.5296127796173096, "learning_rate": 5.7620770485120265e-05, "loss": 2.5706, "step": 49915 }, { "epoch": 3.3917651854871584, "grad_norm": 3.541645050048828, "learning_rate": 5.761652398423699e-05, "loss": 2.4987, "step": 49920 }, { "epoch": 3.3921049055578205, "grad_norm": 3.02231502532959, "learning_rate": 5.7612277483353715e-05, "loss": 2.6693, "step": 49925 }, { "epoch": 3.392444625628482, "grad_norm": 2.6990902423858643, "learning_rate": 5.760803098247045e-05, "loss": 2.7053, "step": 49930 }, { "epoch": 3.3927843456991438, "grad_norm": 3.1595356464385986, "learning_rate": 5.760378448158718e-05, "loss": 2.7433, "step": 49935 }, { "epoch": 3.393124065769806, "grad_norm": 3.0042810440063477, "learning_rate": 5.75995379807039e-05, "loss": 2.4742, "step": 49940 }, { "epoch": 3.3934637858404675, "grad_norm": 3.4605026245117188, "learning_rate": 5.759529147982063e-05, "loss": 2.7327, "step": 49945 }, { "epoch": 3.393803505911129, "grad_norm": 2.512404441833496, "learning_rate": 5.759104497893736e-05, "loss": 2.6189, "step": 49950 }, { "epoch": 3.394143225981791, "grad_norm": 2.037259578704834, "learning_rate": 5.758679847805408e-05, "loss": 2.7469, "step": 49955 }, { "epoch": 3.394482946052453, "grad_norm": 2.512971878051758, "learning_rate": 5.758255197717082e-05, "loss": 2.798, "step": 49960 }, { "epoch": 3.3948226661231145, "grad_norm": 2.411858320236206, "learning_rate": 5.757830547628754e-05, "loss": 2.7937, "step": 49965 }, { "epoch": 3.3951623861937765, "grad_norm": 2.540823459625244, "learning_rate": 5.757405897540427e-05, "loss": 2.3295, "step": 49970 }, { "epoch": 3.395502106264438, "grad_norm": 2.439969062805176, "learning_rate": 5.7569812474521e-05, "loss": 2.6327, "step": 49975 }, { "epoch": 3.3958418263351, "grad_norm": 2.6024394035339355, "learning_rate": 5.756556597363772e-05, "loss": 2.7728, "step": 49980 }, { "epoch": 3.396181546405762, "grad_norm": 2.445382833480835, "learning_rate": 5.756131947275445e-05, "loss": 2.5832, "step": 49985 }, { "epoch": 3.3965212664764235, "grad_norm": 2.280484676361084, "learning_rate": 5.7557072971871185e-05, "loss": 2.8051, "step": 49990 }, { "epoch": 3.396860986547085, "grad_norm": 2.4845521450042725, "learning_rate": 5.755282647098791e-05, "loss": 2.525, "step": 49995 }, { "epoch": 3.397200706617747, "grad_norm": 2.77901554107666, "learning_rate": 5.7548579970104635e-05, "loss": 2.7674, "step": 50000 }, { "epoch": 3.397540426688409, "grad_norm": 3.150834560394287, "learning_rate": 5.754433346922137e-05, "loss": 2.795, "step": 50005 }, { "epoch": 3.3978801467590705, "grad_norm": 3.155022621154785, "learning_rate": 5.754008696833809e-05, "loss": 2.7676, "step": 50010 }, { "epoch": 3.3982198668297325, "grad_norm": 2.9117774963378906, "learning_rate": 5.753584046745482e-05, "loss": 2.7309, "step": 50015 }, { "epoch": 3.398559586900394, "grad_norm": 2.778167247772217, "learning_rate": 5.7531593966571553e-05, "loss": 2.606, "step": 50020 }, { "epoch": 3.398899306971056, "grad_norm": 3.585148572921753, "learning_rate": 5.7527347465688275e-05, "loss": 2.6644, "step": 50025 }, { "epoch": 3.3992390270417174, "grad_norm": 2.221543788909912, "learning_rate": 5.7523100964804996e-05, "loss": 2.6223, "step": 50030 }, { "epoch": 3.3995787471123795, "grad_norm": 3.2797045707702637, "learning_rate": 5.751885446392174e-05, "loss": 2.7119, "step": 50035 }, { "epoch": 3.399918467183041, "grad_norm": 2.790076732635498, "learning_rate": 5.751460796303846e-05, "loss": 2.8772, "step": 50040 }, { "epoch": 3.4002581872537028, "grad_norm": 3.2539806365966797, "learning_rate": 5.751036146215518e-05, "loss": 2.6239, "step": 50045 }, { "epoch": 3.400597907324365, "grad_norm": 2.822788715362549, "learning_rate": 5.7506114961271915e-05, "loss": 2.7351, "step": 50050 }, { "epoch": 3.4009376273950265, "grad_norm": 3.0499160289764404, "learning_rate": 5.750186846038864e-05, "loss": 2.7095, "step": 50055 }, { "epoch": 3.401277347465688, "grad_norm": 2.326519012451172, "learning_rate": 5.7497621959505364e-05, "loss": 2.5381, "step": 50060 }, { "epoch": 3.40161706753635, "grad_norm": 3.5861427783966064, "learning_rate": 5.74933754586221e-05, "loss": 2.8078, "step": 50065 }, { "epoch": 3.401956787607012, "grad_norm": 3.0746846199035645, "learning_rate": 5.748912895773883e-05, "loss": 2.7248, "step": 50070 }, { "epoch": 3.4022965076776734, "grad_norm": 3.164686441421509, "learning_rate": 5.748488245685555e-05, "loss": 2.6936, "step": 50075 }, { "epoch": 3.4026362277483355, "grad_norm": 2.4166295528411865, "learning_rate": 5.748063595597228e-05, "loss": 2.2861, "step": 50080 }, { "epoch": 3.402975947818997, "grad_norm": 2.82391095161438, "learning_rate": 5.747638945508901e-05, "loss": 2.8206, "step": 50085 }, { "epoch": 3.403315667889659, "grad_norm": 2.9101638793945312, "learning_rate": 5.747214295420573e-05, "loss": 2.6633, "step": 50090 }, { "epoch": 3.403655387960321, "grad_norm": 3.212955951690674, "learning_rate": 5.746789645332247e-05, "loss": 2.8291, "step": 50095 }, { "epoch": 3.4039951080309825, "grad_norm": 2.687751293182373, "learning_rate": 5.746364995243919e-05, "loss": 2.592, "step": 50100 }, { "epoch": 3.404334828101644, "grad_norm": 3.531846046447754, "learning_rate": 5.7459403451555916e-05, "loss": 2.6279, "step": 50105 }, { "epoch": 3.404674548172306, "grad_norm": 2.5693304538726807, "learning_rate": 5.745515695067265e-05, "loss": 2.6671, "step": 50110 }, { "epoch": 3.405014268242968, "grad_norm": 2.994098663330078, "learning_rate": 5.745091044978937e-05, "loss": 2.7997, "step": 50115 }, { "epoch": 3.4053539883136295, "grad_norm": 2.7371551990509033, "learning_rate": 5.74466639489061e-05, "loss": 2.8257, "step": 50120 }, { "epoch": 3.4056937083842915, "grad_norm": 3.4334235191345215, "learning_rate": 5.7442417448022835e-05, "loss": 2.7509, "step": 50125 }, { "epoch": 3.406033428454953, "grad_norm": 2.7304389476776123, "learning_rate": 5.7438170947139556e-05, "loss": 2.8085, "step": 50130 }, { "epoch": 3.406373148525615, "grad_norm": 2.8060460090637207, "learning_rate": 5.743392444625629e-05, "loss": 2.6213, "step": 50135 }, { "epoch": 3.4067128685962764, "grad_norm": 2.9384877681732178, "learning_rate": 5.742967794537302e-05, "loss": 2.6951, "step": 50140 }, { "epoch": 3.4070525886669385, "grad_norm": 2.525840997695923, "learning_rate": 5.742543144448974e-05, "loss": 2.6157, "step": 50145 }, { "epoch": 3.4073923087376, "grad_norm": 2.767059326171875, "learning_rate": 5.7421184943606475e-05, "loss": 2.6951, "step": 50150 }, { "epoch": 3.4077320288082618, "grad_norm": 2.916616916656494, "learning_rate": 5.74169384427232e-05, "loss": 2.8044, "step": 50155 }, { "epoch": 3.408071748878924, "grad_norm": 2.7393386363983154, "learning_rate": 5.7412691941839924e-05, "loss": 2.773, "step": 50160 }, { "epoch": 3.4084114689495855, "grad_norm": 2.8515868186950684, "learning_rate": 5.740844544095666e-05, "loss": 2.9669, "step": 50165 }, { "epoch": 3.408751189020247, "grad_norm": 3.0329184532165527, "learning_rate": 5.740419894007339e-05, "loss": 2.6608, "step": 50170 }, { "epoch": 3.409090909090909, "grad_norm": 2.534803628921509, "learning_rate": 5.739995243919011e-05, "loss": 2.5516, "step": 50175 }, { "epoch": 3.409430629161571, "grad_norm": 3.3730196952819824, "learning_rate": 5.739570593830684e-05, "loss": 2.5883, "step": 50180 }, { "epoch": 3.4097703492322324, "grad_norm": 3.005603313446045, "learning_rate": 5.7391459437423564e-05, "loss": 2.5978, "step": 50185 }, { "epoch": 3.4101100693028945, "grad_norm": 3.511761426925659, "learning_rate": 5.738721293654029e-05, "loss": 2.5862, "step": 50190 }, { "epoch": 3.410449789373556, "grad_norm": 2.9491779804229736, "learning_rate": 5.738296643565703e-05, "loss": 2.5344, "step": 50195 }, { "epoch": 3.4107895094442178, "grad_norm": 3.3583545684814453, "learning_rate": 5.737871993477375e-05, "loss": 2.8908, "step": 50200 }, { "epoch": 3.41112922951488, "grad_norm": 2.9508216381073, "learning_rate": 5.7374473433890476e-05, "loss": 2.7979, "step": 50205 }, { "epoch": 3.4114689495855415, "grad_norm": 2.261901617050171, "learning_rate": 5.737022693300721e-05, "loss": 2.4734, "step": 50210 }, { "epoch": 3.411808669656203, "grad_norm": 2.4690346717834473, "learning_rate": 5.736598043212393e-05, "loss": 2.7056, "step": 50215 }, { "epoch": 3.412148389726865, "grad_norm": 2.564551591873169, "learning_rate": 5.736173393124066e-05, "loss": 2.6596, "step": 50220 }, { "epoch": 3.412488109797527, "grad_norm": 3.660587787628174, "learning_rate": 5.7357487430357395e-05, "loss": 2.5337, "step": 50225 }, { "epoch": 3.4128278298681884, "grad_norm": 2.8000407218933105, "learning_rate": 5.7353240929474116e-05, "loss": 2.5444, "step": 50230 }, { "epoch": 3.4131675499388505, "grad_norm": 2.6935973167419434, "learning_rate": 5.7348994428590844e-05, "loss": 2.4467, "step": 50235 }, { "epoch": 3.413507270009512, "grad_norm": 2.8509953022003174, "learning_rate": 5.734474792770758e-05, "loss": 2.4764, "step": 50240 }, { "epoch": 3.413846990080174, "grad_norm": 3.0784873962402344, "learning_rate": 5.73405014268243e-05, "loss": 2.3339, "step": 50245 }, { "epoch": 3.414186710150836, "grad_norm": 3.0565545558929443, "learning_rate": 5.733625492594102e-05, "loss": 2.766, "step": 50250 }, { "epoch": 3.4145264302214975, "grad_norm": 2.694812297821045, "learning_rate": 5.7332008425057756e-05, "loss": 2.6671, "step": 50255 }, { "epoch": 3.414866150292159, "grad_norm": 3.3347010612487793, "learning_rate": 5.7327761924174484e-05, "loss": 2.755, "step": 50260 }, { "epoch": 3.415205870362821, "grad_norm": 3.086880922317505, "learning_rate": 5.7323515423291206e-05, "loss": 2.9748, "step": 50265 }, { "epoch": 3.415545590433483, "grad_norm": 4.436493873596191, "learning_rate": 5.731926892240794e-05, "loss": 2.7639, "step": 50270 }, { "epoch": 3.4158853105041445, "grad_norm": 3.466200113296509, "learning_rate": 5.731502242152467e-05, "loss": 2.6394, "step": 50275 }, { "epoch": 3.4162250305748065, "grad_norm": 2.710296869277954, "learning_rate": 5.731077592064139e-05, "loss": 2.84, "step": 50280 }, { "epoch": 3.416564750645468, "grad_norm": 3.168750524520874, "learning_rate": 5.7306529419758124e-05, "loss": 2.5721, "step": 50285 }, { "epoch": 3.41690447071613, "grad_norm": 2.725311756134033, "learning_rate": 5.730228291887485e-05, "loss": 2.6818, "step": 50290 }, { "epoch": 3.417244190786792, "grad_norm": 2.914715528488159, "learning_rate": 5.7298036417991574e-05, "loss": 2.6842, "step": 50295 }, { "epoch": 3.4175839108574535, "grad_norm": 2.8536064624786377, "learning_rate": 5.729378991710831e-05, "loss": 2.5679, "step": 50300 }, { "epoch": 3.417923630928115, "grad_norm": 2.265467882156372, "learning_rate": 5.7289543416225036e-05, "loss": 2.5822, "step": 50305 }, { "epoch": 3.418263350998777, "grad_norm": 2.585052251815796, "learning_rate": 5.728529691534176e-05, "loss": 2.6931, "step": 50310 }, { "epoch": 3.418603071069439, "grad_norm": 2.7857868671417236, "learning_rate": 5.728105041445849e-05, "loss": 2.4016, "step": 50315 }, { "epoch": 3.4189427911401005, "grad_norm": 2.4591872692108154, "learning_rate": 5.7276803913575214e-05, "loss": 2.6795, "step": 50320 }, { "epoch": 3.4192825112107625, "grad_norm": 2.8244452476501465, "learning_rate": 5.727255741269194e-05, "loss": 2.6654, "step": 50325 }, { "epoch": 3.419622231281424, "grad_norm": 3.3547258377075195, "learning_rate": 5.7268310911808676e-05, "loss": 2.8973, "step": 50330 }, { "epoch": 3.419961951352086, "grad_norm": 2.5927586555480957, "learning_rate": 5.72640644109254e-05, "loss": 2.5317, "step": 50335 }, { "epoch": 3.420301671422748, "grad_norm": 3.813947916030884, "learning_rate": 5.7259817910042126e-05, "loss": 2.9009, "step": 50340 }, { "epoch": 3.4206413914934095, "grad_norm": 3.277491807937622, "learning_rate": 5.725557140915886e-05, "loss": 2.8471, "step": 50345 }, { "epoch": 3.420981111564071, "grad_norm": 3.07554292678833, "learning_rate": 5.725132490827558e-05, "loss": 2.7522, "step": 50350 }, { "epoch": 3.421320831634733, "grad_norm": 3.041522264480591, "learning_rate": 5.724707840739231e-05, "loss": 2.7324, "step": 50355 }, { "epoch": 3.421660551705395, "grad_norm": 3.275499105453491, "learning_rate": 5.7242831906509044e-05, "loss": 2.5624, "step": 50360 }, { "epoch": 3.4220002717760565, "grad_norm": 2.3536605834960938, "learning_rate": 5.7238585405625766e-05, "loss": 2.5655, "step": 50365 }, { "epoch": 3.422339991846718, "grad_norm": 2.943060874938965, "learning_rate": 5.7234338904742494e-05, "loss": 2.5045, "step": 50370 }, { "epoch": 3.42267971191738, "grad_norm": 2.8690075874328613, "learning_rate": 5.723009240385923e-05, "loss": 2.7202, "step": 50375 }, { "epoch": 3.423019431988042, "grad_norm": 3.467745542526245, "learning_rate": 5.722584590297595e-05, "loss": 2.3477, "step": 50380 }, { "epoch": 3.4233591520587034, "grad_norm": 3.1895439624786377, "learning_rate": 5.722159940209267e-05, "loss": 2.8443, "step": 50385 }, { "epoch": 3.4236988721293655, "grad_norm": 2.7065999507904053, "learning_rate": 5.7217352901209406e-05, "loss": 2.396, "step": 50390 }, { "epoch": 3.424038592200027, "grad_norm": 2.1214816570281982, "learning_rate": 5.7213106400326134e-05, "loss": 2.7246, "step": 50395 }, { "epoch": 3.424378312270689, "grad_norm": 2.566716194152832, "learning_rate": 5.7208859899442855e-05, "loss": 2.6924, "step": 50400 }, { "epoch": 3.424718032341351, "grad_norm": 2.8151938915252686, "learning_rate": 5.720461339855959e-05, "loss": 2.5044, "step": 50405 }, { "epoch": 3.4250577524120125, "grad_norm": 2.9072976112365723, "learning_rate": 5.720036689767632e-05, "loss": 2.7627, "step": 50410 }, { "epoch": 3.425397472482674, "grad_norm": 3.156541347503662, "learning_rate": 5.719612039679304e-05, "loss": 2.4013, "step": 50415 }, { "epoch": 3.425737192553336, "grad_norm": 2.8101441860198975, "learning_rate": 5.7191873895909774e-05, "loss": 2.6761, "step": 50420 }, { "epoch": 3.426076912623998, "grad_norm": 3.397296905517578, "learning_rate": 5.71876273950265e-05, "loss": 2.8402, "step": 50425 }, { "epoch": 3.4264166326946595, "grad_norm": 2.3676648139953613, "learning_rate": 5.718338089414322e-05, "loss": 2.7006, "step": 50430 }, { "epoch": 3.4267563527653215, "grad_norm": 2.658055067062378, "learning_rate": 5.717913439325996e-05, "loss": 2.5258, "step": 50435 }, { "epoch": 3.427096072835983, "grad_norm": 3.1113762855529785, "learning_rate": 5.7174887892376686e-05, "loss": 2.5352, "step": 50440 }, { "epoch": 3.427435792906645, "grad_norm": 2.5961759090423584, "learning_rate": 5.717064139149341e-05, "loss": 2.5256, "step": 50445 }, { "epoch": 3.427775512977307, "grad_norm": 2.813293695449829, "learning_rate": 5.716639489061014e-05, "loss": 2.5504, "step": 50450 }, { "epoch": 3.4281152330479685, "grad_norm": 2.3762640953063965, "learning_rate": 5.716214838972686e-05, "loss": 2.6543, "step": 50455 }, { "epoch": 3.42845495311863, "grad_norm": 2.7518248558044434, "learning_rate": 5.715790188884359e-05, "loss": 2.724, "step": 50460 }, { "epoch": 3.428794673189292, "grad_norm": 2.7445125579833984, "learning_rate": 5.7153655387960326e-05, "loss": 2.4677, "step": 50465 }, { "epoch": 3.429134393259954, "grad_norm": 2.709625482559204, "learning_rate": 5.714940888707705e-05, "loss": 2.6631, "step": 50470 }, { "epoch": 3.4294741133306155, "grad_norm": 3.284383535385132, "learning_rate": 5.714516238619378e-05, "loss": 2.4866, "step": 50475 }, { "epoch": 3.429813833401277, "grad_norm": 2.9138195514678955, "learning_rate": 5.714091588531051e-05, "loss": 2.7001, "step": 50480 }, { "epoch": 3.430153553471939, "grad_norm": 3.1392455101013184, "learning_rate": 5.713666938442723e-05, "loss": 2.7613, "step": 50485 }, { "epoch": 3.430493273542601, "grad_norm": 2.0913593769073486, "learning_rate": 5.7132422883543966e-05, "loss": 2.7835, "step": 50490 }, { "epoch": 3.4308329936132624, "grad_norm": 3.6437275409698486, "learning_rate": 5.7128176382660694e-05, "loss": 2.4913, "step": 50495 }, { "epoch": 3.4311727136839245, "grad_norm": 2.648484468460083, "learning_rate": 5.7123929881777415e-05, "loss": 2.7589, "step": 50500 }, { "epoch": 3.431512433754586, "grad_norm": 2.3462419509887695, "learning_rate": 5.711968338089415e-05, "loss": 2.6116, "step": 50505 }, { "epoch": 3.4318521538252478, "grad_norm": 3.037416458129883, "learning_rate": 5.711543688001088e-05, "loss": 2.3791, "step": 50510 }, { "epoch": 3.43219187389591, "grad_norm": 3.6811282634735107, "learning_rate": 5.71111903791276e-05, "loss": 2.7399, "step": 50515 }, { "epoch": 3.4325315939665715, "grad_norm": 3.078995704650879, "learning_rate": 5.7106943878244334e-05, "loss": 2.5858, "step": 50520 }, { "epoch": 3.432871314037233, "grad_norm": 2.677748203277588, "learning_rate": 5.7102697377361055e-05, "loss": 2.4378, "step": 50525 }, { "epoch": 3.433211034107895, "grad_norm": 2.9752204418182373, "learning_rate": 5.709845087647778e-05, "loss": 2.6923, "step": 50530 }, { "epoch": 3.433550754178557, "grad_norm": 3.52575945854187, "learning_rate": 5.709420437559452e-05, "loss": 2.4622, "step": 50535 }, { "epoch": 3.4338904742492184, "grad_norm": 3.09185528755188, "learning_rate": 5.708995787471124e-05, "loss": 2.3608, "step": 50540 }, { "epoch": 3.4342301943198805, "grad_norm": 2.797389268875122, "learning_rate": 5.708571137382797e-05, "loss": 2.6834, "step": 50545 }, { "epoch": 3.434569914390542, "grad_norm": 2.8939802646636963, "learning_rate": 5.70814648729447e-05, "loss": 2.7934, "step": 50550 }, { "epoch": 3.434909634461204, "grad_norm": 2.883331060409546, "learning_rate": 5.707721837206142e-05, "loss": 2.5394, "step": 50555 }, { "epoch": 3.435249354531866, "grad_norm": 3.1303112506866455, "learning_rate": 5.707297187117815e-05, "loss": 2.8356, "step": 50560 }, { "epoch": 3.4355890746025275, "grad_norm": 2.6146011352539062, "learning_rate": 5.7068725370294886e-05, "loss": 2.618, "step": 50565 }, { "epoch": 3.435928794673189, "grad_norm": 3.4707283973693848, "learning_rate": 5.706447886941161e-05, "loss": 2.5969, "step": 50570 }, { "epoch": 3.436268514743851, "grad_norm": 2.442793607711792, "learning_rate": 5.7060232368528335e-05, "loss": 2.578, "step": 50575 }, { "epoch": 3.436608234814513, "grad_norm": 3.5585968494415283, "learning_rate": 5.705598586764507e-05, "loss": 2.5619, "step": 50580 }, { "epoch": 3.4369479548851745, "grad_norm": 4.080143451690674, "learning_rate": 5.705173936676179e-05, "loss": 2.8129, "step": 50585 }, { "epoch": 3.4372876749558365, "grad_norm": 3.6859335899353027, "learning_rate": 5.704749286587851e-05, "loss": 2.5996, "step": 50590 }, { "epoch": 3.437627395026498, "grad_norm": 3.2871994972229004, "learning_rate": 5.7043246364995254e-05, "loss": 2.9287, "step": 50595 }, { "epoch": 3.43796711509716, "grad_norm": 2.74299693107605, "learning_rate": 5.7038999864111975e-05, "loss": 2.8758, "step": 50600 }, { "epoch": 3.438306835167822, "grad_norm": 2.607349157333374, "learning_rate": 5.7034753363228697e-05, "loss": 2.768, "step": 50605 }, { "epoch": 3.4386465552384835, "grad_norm": 2.81693434715271, "learning_rate": 5.703050686234543e-05, "loss": 2.5486, "step": 50610 }, { "epoch": 3.438986275309145, "grad_norm": 2.413477897644043, "learning_rate": 5.702626036146216e-05, "loss": 2.4385, "step": 50615 }, { "epoch": 3.439325995379807, "grad_norm": 2.743896484375, "learning_rate": 5.702201386057888e-05, "loss": 2.859, "step": 50620 }, { "epoch": 3.439665715450469, "grad_norm": 2.827566146850586, "learning_rate": 5.7017767359695615e-05, "loss": 2.6958, "step": 50625 }, { "epoch": 3.4400054355211305, "grad_norm": 3.2262678146362305, "learning_rate": 5.701352085881234e-05, "loss": 2.7815, "step": 50630 }, { "epoch": 3.4403451555917925, "grad_norm": 2.971144199371338, "learning_rate": 5.7009274357929065e-05, "loss": 2.7822, "step": 50635 }, { "epoch": 3.440684875662454, "grad_norm": 2.3554909229278564, "learning_rate": 5.70050278570458e-05, "loss": 2.6831, "step": 50640 }, { "epoch": 3.441024595733116, "grad_norm": 3.8580448627471924, "learning_rate": 5.700078135616253e-05, "loss": 2.7881, "step": 50645 }, { "epoch": 3.441364315803778, "grad_norm": 2.868924617767334, "learning_rate": 5.699653485527925e-05, "loss": 2.7546, "step": 50650 }, { "epoch": 3.4417040358744395, "grad_norm": 3.331035852432251, "learning_rate": 5.699228835439598e-05, "loss": 2.768, "step": 50655 }, { "epoch": 3.442043755945101, "grad_norm": 2.7522430419921875, "learning_rate": 5.698804185351271e-05, "loss": 2.6348, "step": 50660 }, { "epoch": 3.442383476015763, "grad_norm": 2.1582815647125244, "learning_rate": 5.698379535262943e-05, "loss": 2.8858, "step": 50665 }, { "epoch": 3.442723196086425, "grad_norm": 2.796111583709717, "learning_rate": 5.697954885174617e-05, "loss": 2.6984, "step": 50670 }, { "epoch": 3.4430629161570865, "grad_norm": 2.599461555480957, "learning_rate": 5.697530235086289e-05, "loss": 2.4705, "step": 50675 }, { "epoch": 3.4434026362277486, "grad_norm": 2.5213611125946045, "learning_rate": 5.697105584997962e-05, "loss": 2.8011, "step": 50680 }, { "epoch": 3.44374235629841, "grad_norm": 3.2420997619628906, "learning_rate": 5.696680934909635e-05, "loss": 2.7599, "step": 50685 }, { "epoch": 3.444082076369072, "grad_norm": 3.2162926197052, "learning_rate": 5.696256284821307e-05, "loss": 2.8856, "step": 50690 }, { "epoch": 3.444421796439734, "grad_norm": 2.7826340198516846, "learning_rate": 5.69583163473298e-05, "loss": 2.7726, "step": 50695 }, { "epoch": 3.4447615165103955, "grad_norm": 2.4142367839813232, "learning_rate": 5.6954069846446535e-05, "loss": 2.5367, "step": 50700 }, { "epoch": 3.445101236581057, "grad_norm": 3.0823276042938232, "learning_rate": 5.694982334556326e-05, "loss": 2.4873, "step": 50705 }, { "epoch": 3.4454409566517192, "grad_norm": 2.5820369720458984, "learning_rate": 5.6945576844679985e-05, "loss": 2.8386, "step": 50710 }, { "epoch": 3.445780676722381, "grad_norm": 4.134663105010986, "learning_rate": 5.694133034379672e-05, "loss": 2.6241, "step": 50715 }, { "epoch": 3.4461203967930425, "grad_norm": 2.7096102237701416, "learning_rate": 5.693708384291344e-05, "loss": 2.7666, "step": 50720 }, { "epoch": 3.446460116863704, "grad_norm": 2.7165284156799316, "learning_rate": 5.693283734203016e-05, "loss": 2.9003, "step": 50725 }, { "epoch": 3.446799836934366, "grad_norm": 2.7782130241394043, "learning_rate": 5.6928590841146903e-05, "loss": 3.0164, "step": 50730 }, { "epoch": 3.447139557005028, "grad_norm": 4.184937953948975, "learning_rate": 5.6924344340263625e-05, "loss": 2.388, "step": 50735 }, { "epoch": 3.4474792770756895, "grad_norm": 2.5436623096466064, "learning_rate": 5.6920097839380346e-05, "loss": 2.6696, "step": 50740 }, { "epoch": 3.4478189971463515, "grad_norm": 2.9251410961151123, "learning_rate": 5.691585133849708e-05, "loss": 2.356, "step": 50745 }, { "epoch": 3.448158717217013, "grad_norm": 2.7984066009521484, "learning_rate": 5.691160483761381e-05, "loss": 2.635, "step": 50750 }, { "epoch": 3.448498437287675, "grad_norm": 2.555828809738159, "learning_rate": 5.690735833673053e-05, "loss": 2.5936, "step": 50755 }, { "epoch": 3.448838157358337, "grad_norm": 2.9438562393188477, "learning_rate": 5.6903111835847265e-05, "loss": 2.4474, "step": 50760 }, { "epoch": 3.4491778774289985, "grad_norm": 2.7051889896392822, "learning_rate": 5.689886533496399e-05, "loss": 2.3885, "step": 50765 }, { "epoch": 3.44951759749966, "grad_norm": 3.8609542846679688, "learning_rate": 5.6894618834080714e-05, "loss": 2.5375, "step": 50770 }, { "epoch": 3.449857317570322, "grad_norm": 2.8057141304016113, "learning_rate": 5.689037233319745e-05, "loss": 2.4813, "step": 50775 }, { "epoch": 3.450197037640984, "grad_norm": 2.577971935272217, "learning_rate": 5.688612583231418e-05, "loss": 2.4916, "step": 50780 }, { "epoch": 3.4505367577116455, "grad_norm": 3.093618869781494, "learning_rate": 5.68818793314309e-05, "loss": 2.4996, "step": 50785 }, { "epoch": 3.4508764777823075, "grad_norm": 3.319857120513916, "learning_rate": 5.687763283054763e-05, "loss": 3.0093, "step": 50790 }, { "epoch": 3.451216197852969, "grad_norm": 4.011595249176025, "learning_rate": 5.687338632966436e-05, "loss": 2.5762, "step": 50795 }, { "epoch": 3.451555917923631, "grad_norm": 2.347700357437134, "learning_rate": 5.686913982878108e-05, "loss": 2.7548, "step": 50800 }, { "epoch": 3.451895637994293, "grad_norm": 2.7346551418304443, "learning_rate": 5.686489332789782e-05, "loss": 2.7119, "step": 50805 }, { "epoch": 3.4522353580649545, "grad_norm": 3.016407012939453, "learning_rate": 5.686064682701454e-05, "loss": 2.6337, "step": 50810 }, { "epoch": 3.452575078135616, "grad_norm": 2.11376953125, "learning_rate": 5.685640032613127e-05, "loss": 2.8768, "step": 50815 }, { "epoch": 3.452914798206278, "grad_norm": 2.837094306945801, "learning_rate": 5.6852153825248e-05, "loss": 2.6223, "step": 50820 }, { "epoch": 3.45325451827694, "grad_norm": 2.4540517330169678, "learning_rate": 5.684790732436472e-05, "loss": 2.707, "step": 50825 }, { "epoch": 3.4535942383476015, "grad_norm": 2.9523963928222656, "learning_rate": 5.684366082348146e-05, "loss": 2.6908, "step": 50830 }, { "epoch": 3.453933958418263, "grad_norm": 3.303818941116333, "learning_rate": 5.6839414322598185e-05, "loss": 2.6029, "step": 50835 }, { "epoch": 3.454273678488925, "grad_norm": 2.7771708965301514, "learning_rate": 5.6835167821714906e-05, "loss": 2.6861, "step": 50840 }, { "epoch": 3.454613398559587, "grad_norm": 2.717177629470825, "learning_rate": 5.683092132083164e-05, "loss": 2.6358, "step": 50845 }, { "epoch": 3.4549531186302485, "grad_norm": 3.2403063774108887, "learning_rate": 5.682667481994837e-05, "loss": 2.6993, "step": 50850 }, { "epoch": 3.4552928387009105, "grad_norm": 3.754647970199585, "learning_rate": 5.682242831906509e-05, "loss": 2.7462, "step": 50855 }, { "epoch": 3.455632558771572, "grad_norm": 2.6753854751586914, "learning_rate": 5.6818181818181825e-05, "loss": 2.46, "step": 50860 }, { "epoch": 3.455972278842234, "grad_norm": 3.5444352626800537, "learning_rate": 5.681393531729855e-05, "loss": 2.5957, "step": 50865 }, { "epoch": 3.456311998912896, "grad_norm": 3.2119152545928955, "learning_rate": 5.6809688816415274e-05, "loss": 2.6952, "step": 50870 }, { "epoch": 3.4566517189835575, "grad_norm": 3.5981297492980957, "learning_rate": 5.680544231553201e-05, "loss": 2.7675, "step": 50875 }, { "epoch": 3.456991439054219, "grad_norm": 2.73132061958313, "learning_rate": 5.680119581464873e-05, "loss": 2.643, "step": 50880 }, { "epoch": 3.457331159124881, "grad_norm": 3.4671709537506104, "learning_rate": 5.679694931376546e-05, "loss": 2.5475, "step": 50885 }, { "epoch": 3.457670879195543, "grad_norm": 2.9604928493499756, "learning_rate": 5.679270281288219e-05, "loss": 2.6593, "step": 50890 }, { "epoch": 3.4580105992662045, "grad_norm": 3.6615092754364014, "learning_rate": 5.6788456311998914e-05, "loss": 2.3763, "step": 50895 }, { "epoch": 3.4583503193368665, "grad_norm": 2.8586864471435547, "learning_rate": 5.678420981111564e-05, "loss": 2.8485, "step": 50900 }, { "epoch": 3.458690039407528, "grad_norm": 2.7350118160247803, "learning_rate": 5.677996331023238e-05, "loss": 2.5861, "step": 50905 }, { "epoch": 3.45902975947819, "grad_norm": 2.919403076171875, "learning_rate": 5.67757168093491e-05, "loss": 2.6265, "step": 50910 }, { "epoch": 3.459369479548852, "grad_norm": 3.411698579788208, "learning_rate": 5.6771470308465826e-05, "loss": 2.6373, "step": 50915 }, { "epoch": 3.4597091996195135, "grad_norm": 3.225245714187622, "learning_rate": 5.676722380758256e-05, "loss": 2.605, "step": 50920 }, { "epoch": 3.460048919690175, "grad_norm": 2.6704018115997314, "learning_rate": 5.676297730669928e-05, "loss": 2.5061, "step": 50925 }, { "epoch": 3.460388639760837, "grad_norm": 2.6541833877563477, "learning_rate": 5.675873080581601e-05, "loss": 2.3583, "step": 50930 }, { "epoch": 3.460728359831499, "grad_norm": 3.52203631401062, "learning_rate": 5.6754484304932745e-05, "loss": 2.7257, "step": 50935 }, { "epoch": 3.4610680799021605, "grad_norm": 2.8887877464294434, "learning_rate": 5.6750237804049466e-05, "loss": 2.5942, "step": 50940 }, { "epoch": 3.4614077999728226, "grad_norm": 3.164377212524414, "learning_rate": 5.674599130316619e-05, "loss": 2.8417, "step": 50945 }, { "epoch": 3.461747520043484, "grad_norm": 3.174079418182373, "learning_rate": 5.674174480228293e-05, "loss": 2.6182, "step": 50950 }, { "epoch": 3.462087240114146, "grad_norm": 2.7427895069122314, "learning_rate": 5.673749830139965e-05, "loss": 2.6289, "step": 50955 }, { "epoch": 3.462426960184808, "grad_norm": 2.8059542179107666, "learning_rate": 5.673325180051637e-05, "loss": 2.8392, "step": 50960 }, { "epoch": 3.4627666802554695, "grad_norm": 3.1647233963012695, "learning_rate": 5.6729005299633106e-05, "loss": 2.6867, "step": 50965 }, { "epoch": 3.463106400326131, "grad_norm": 2.848595380783081, "learning_rate": 5.6724758798749834e-05, "loss": 2.5663, "step": 50970 }, { "epoch": 3.4634461203967932, "grad_norm": 3.4821362495422363, "learning_rate": 5.6720512297866556e-05, "loss": 2.7182, "step": 50975 }, { "epoch": 3.463785840467455, "grad_norm": 3.6766741275787354, "learning_rate": 5.671626579698329e-05, "loss": 2.7328, "step": 50980 }, { "epoch": 3.4641255605381165, "grad_norm": 2.684614896774292, "learning_rate": 5.671201929610002e-05, "loss": 2.7115, "step": 50985 }, { "epoch": 3.4644652806087786, "grad_norm": 2.993286609649658, "learning_rate": 5.670777279521674e-05, "loss": 2.4606, "step": 50990 }, { "epoch": 3.46480500067944, "grad_norm": 3.17950177192688, "learning_rate": 5.6703526294333474e-05, "loss": 2.7688, "step": 50995 }, { "epoch": 3.465144720750102, "grad_norm": 2.350471258163452, "learning_rate": 5.66992797934502e-05, "loss": 2.8177, "step": 51000 }, { "epoch": 3.465484440820764, "grad_norm": 2.479504108428955, "learning_rate": 5.6695033292566924e-05, "loss": 2.9111, "step": 51005 }, { "epoch": 3.4658241608914255, "grad_norm": 3.131518840789795, "learning_rate": 5.669078679168366e-05, "loss": 2.6344, "step": 51010 }, { "epoch": 3.466163880962087, "grad_norm": 2.931938648223877, "learning_rate": 5.668654029080038e-05, "loss": 2.7075, "step": 51015 }, { "epoch": 3.4665036010327492, "grad_norm": 3.074636220932007, "learning_rate": 5.668229378991711e-05, "loss": 2.5418, "step": 51020 }, { "epoch": 3.466843321103411, "grad_norm": 3.431478500366211, "learning_rate": 5.667804728903384e-05, "loss": 2.6961, "step": 51025 }, { "epoch": 3.4671830411740725, "grad_norm": 2.68873929977417, "learning_rate": 5.6673800788150564e-05, "loss": 2.7798, "step": 51030 }, { "epoch": 3.4675227612447346, "grad_norm": 3.2399861812591553, "learning_rate": 5.666955428726729e-05, "loss": 2.7822, "step": 51035 }, { "epoch": 3.467862481315396, "grad_norm": 2.727790355682373, "learning_rate": 5.6665307786384026e-05, "loss": 2.6302, "step": 51040 }, { "epoch": 3.468202201386058, "grad_norm": 2.818514347076416, "learning_rate": 5.666106128550075e-05, "loss": 2.6519, "step": 51045 }, { "epoch": 3.46854192145672, "grad_norm": 2.920196056365967, "learning_rate": 5.6656814784617476e-05, "loss": 2.8656, "step": 51050 }, { "epoch": 3.4688816415273815, "grad_norm": 2.8249921798706055, "learning_rate": 5.665256828373421e-05, "loss": 2.7594, "step": 51055 }, { "epoch": 3.469221361598043, "grad_norm": 3.5907883644104004, "learning_rate": 5.664832178285093e-05, "loss": 2.5777, "step": 51060 }, { "epoch": 3.469561081668705, "grad_norm": 2.8041741847991943, "learning_rate": 5.664407528196766e-05, "loss": 2.4786, "step": 51065 }, { "epoch": 3.469900801739367, "grad_norm": 3.306623935699463, "learning_rate": 5.6639828781084394e-05, "loss": 2.3999, "step": 51070 }, { "epoch": 3.4702405218100285, "grad_norm": 2.695286750793457, "learning_rate": 5.6635582280201116e-05, "loss": 2.3765, "step": 51075 }, { "epoch": 3.47058024188069, "grad_norm": 3.495270013809204, "learning_rate": 5.663133577931784e-05, "loss": 2.7355, "step": 51080 }, { "epoch": 3.470919961951352, "grad_norm": 2.9184091091156006, "learning_rate": 5.662708927843458e-05, "loss": 2.6914, "step": 51085 }, { "epoch": 3.471259682022014, "grad_norm": 3.478416919708252, "learning_rate": 5.66228427775513e-05, "loss": 2.879, "step": 51090 }, { "epoch": 3.4715994020926755, "grad_norm": 3.5425872802734375, "learning_rate": 5.661859627666802e-05, "loss": 2.6303, "step": 51095 }, { "epoch": 3.4719391221633376, "grad_norm": 3.794433832168579, "learning_rate": 5.6614349775784756e-05, "loss": 2.7628, "step": 51100 }, { "epoch": 3.472278842233999, "grad_norm": 3.0504095554351807, "learning_rate": 5.6610103274901484e-05, "loss": 2.4943, "step": 51105 }, { "epoch": 3.472618562304661, "grad_norm": 2.6893227100372314, "learning_rate": 5.6605856774018205e-05, "loss": 2.4581, "step": 51110 }, { "epoch": 3.472958282375323, "grad_norm": 2.2905726432800293, "learning_rate": 5.660161027313494e-05, "loss": 2.7471, "step": 51115 }, { "epoch": 3.4732980024459845, "grad_norm": 2.4600229263305664, "learning_rate": 5.659736377225167e-05, "loss": 2.7973, "step": 51120 }, { "epoch": 3.473637722516646, "grad_norm": 2.9591126441955566, "learning_rate": 5.659311727136839e-05, "loss": 2.678, "step": 51125 }, { "epoch": 3.4739774425873082, "grad_norm": 3.756035089492798, "learning_rate": 5.6588870770485124e-05, "loss": 2.5904, "step": 51130 }, { "epoch": 3.47431716265797, "grad_norm": 2.6760857105255127, "learning_rate": 5.658462426960185e-05, "loss": 2.5063, "step": 51135 }, { "epoch": 3.4746568827286315, "grad_norm": 4.156393051147461, "learning_rate": 5.658037776871857e-05, "loss": 2.5189, "step": 51140 }, { "epoch": 3.4749966027992936, "grad_norm": 3.062368154525757, "learning_rate": 5.657613126783531e-05, "loss": 2.6781, "step": 51145 }, { "epoch": 3.475336322869955, "grad_norm": 2.2676472663879395, "learning_rate": 5.6571884766952036e-05, "loss": 2.8404, "step": 51150 }, { "epoch": 3.475676042940617, "grad_norm": 3.8898403644561768, "learning_rate": 5.656763826606877e-05, "loss": 2.5264, "step": 51155 }, { "epoch": 3.4760157630112785, "grad_norm": 2.58195424079895, "learning_rate": 5.656339176518549e-05, "loss": 2.7198, "step": 51160 }, { "epoch": 3.4763554830819405, "grad_norm": 2.734403371810913, "learning_rate": 5.655914526430221e-05, "loss": 2.6367, "step": 51165 }, { "epoch": 3.476695203152602, "grad_norm": 2.5963022708892822, "learning_rate": 5.655489876341895e-05, "loss": 2.4463, "step": 51170 }, { "epoch": 3.477034923223264, "grad_norm": 3.0417895317077637, "learning_rate": 5.6550652262535676e-05, "loss": 2.5688, "step": 51175 }, { "epoch": 3.477374643293926, "grad_norm": 3.0491108894348145, "learning_rate": 5.65464057616524e-05, "loss": 2.7393, "step": 51180 }, { "epoch": 3.4777143633645875, "grad_norm": 2.5710065364837646, "learning_rate": 5.654215926076913e-05, "loss": 2.774, "step": 51185 }, { "epoch": 3.478054083435249, "grad_norm": 2.3422696590423584, "learning_rate": 5.653791275988586e-05, "loss": 2.6508, "step": 51190 }, { "epoch": 3.478393803505911, "grad_norm": 3.300638198852539, "learning_rate": 5.653366625900258e-05, "loss": 2.7231, "step": 51195 }, { "epoch": 3.478733523576573, "grad_norm": 2.7397043704986572, "learning_rate": 5.6529419758119316e-05, "loss": 2.5697, "step": 51200 }, { "epoch": 3.4790732436472345, "grad_norm": 2.553534984588623, "learning_rate": 5.6525173257236044e-05, "loss": 2.5437, "step": 51205 }, { "epoch": 3.4794129637178965, "grad_norm": 3.160005569458008, "learning_rate": 5.6520926756352765e-05, "loss": 2.2824, "step": 51210 }, { "epoch": 3.479752683788558, "grad_norm": 3.067506790161133, "learning_rate": 5.65166802554695e-05, "loss": 2.9278, "step": 51215 }, { "epoch": 3.48009240385922, "grad_norm": 3.1850109100341797, "learning_rate": 5.651243375458623e-05, "loss": 2.5531, "step": 51220 }, { "epoch": 3.480432123929882, "grad_norm": 3.155773878097534, "learning_rate": 5.650818725370295e-05, "loss": 2.6328, "step": 51225 }, { "epoch": 3.4807718440005435, "grad_norm": 3.8329060077667236, "learning_rate": 5.6503940752819684e-05, "loss": 2.6393, "step": 51230 }, { "epoch": 3.481111564071205, "grad_norm": 3.5783042907714844, "learning_rate": 5.6499694251936405e-05, "loss": 2.7214, "step": 51235 }, { "epoch": 3.481451284141867, "grad_norm": 2.7418134212493896, "learning_rate": 5.649544775105313e-05, "loss": 2.6102, "step": 51240 }, { "epoch": 3.481791004212529, "grad_norm": 2.9067962169647217, "learning_rate": 5.649120125016987e-05, "loss": 2.4207, "step": 51245 }, { "epoch": 3.4821307242831905, "grad_norm": 3.06026029586792, "learning_rate": 5.648695474928659e-05, "loss": 2.5004, "step": 51250 }, { "epoch": 3.4824704443538526, "grad_norm": 2.712575912475586, "learning_rate": 5.648270824840332e-05, "loss": 2.8337, "step": 51255 }, { "epoch": 3.482810164424514, "grad_norm": 3.454665184020996, "learning_rate": 5.647846174752005e-05, "loss": 2.7192, "step": 51260 }, { "epoch": 3.483149884495176, "grad_norm": 2.5924229621887207, "learning_rate": 5.647421524663677e-05, "loss": 2.655, "step": 51265 }, { "epoch": 3.483489604565838, "grad_norm": 2.6575846672058105, "learning_rate": 5.64699687457535e-05, "loss": 2.6927, "step": 51270 }, { "epoch": 3.4838293246364995, "grad_norm": 3.204495429992676, "learning_rate": 5.6465722244870236e-05, "loss": 2.4482, "step": 51275 }, { "epoch": 3.484169044707161, "grad_norm": 2.323482036590576, "learning_rate": 5.646147574398696e-05, "loss": 2.7601, "step": 51280 }, { "epoch": 3.4845087647778232, "grad_norm": 2.463637590408325, "learning_rate": 5.6457229243103685e-05, "loss": 2.5519, "step": 51285 }, { "epoch": 3.484848484848485, "grad_norm": 3.8507096767425537, "learning_rate": 5.645298274222042e-05, "loss": 2.5318, "step": 51290 }, { "epoch": 3.4851882049191465, "grad_norm": 3.283604621887207, "learning_rate": 5.644873624133714e-05, "loss": 2.6634, "step": 51295 }, { "epoch": 3.4855279249898086, "grad_norm": 2.7146108150482178, "learning_rate": 5.644448974045386e-05, "loss": 2.6286, "step": 51300 }, { "epoch": 3.48586764506047, "grad_norm": 2.874734878540039, "learning_rate": 5.64402432395706e-05, "loss": 2.9609, "step": 51305 }, { "epoch": 3.486207365131132, "grad_norm": 2.8612022399902344, "learning_rate": 5.6435996738687325e-05, "loss": 2.4863, "step": 51310 }, { "epoch": 3.486547085201794, "grad_norm": 3.303309202194214, "learning_rate": 5.6431750237804047e-05, "loss": 2.8193, "step": 51315 }, { "epoch": 3.4868868052724555, "grad_norm": 2.6770377159118652, "learning_rate": 5.642750373692078e-05, "loss": 2.3623, "step": 51320 }, { "epoch": 3.487226525343117, "grad_norm": 3.3040332794189453, "learning_rate": 5.642325723603751e-05, "loss": 2.738, "step": 51325 }, { "epoch": 3.4875662454137792, "grad_norm": 2.86647629737854, "learning_rate": 5.641901073515423e-05, "loss": 2.7852, "step": 51330 }, { "epoch": 3.487905965484441, "grad_norm": 3.0568430423736572, "learning_rate": 5.6414764234270965e-05, "loss": 2.8606, "step": 51335 }, { "epoch": 3.4882456855551025, "grad_norm": 2.8630478382110596, "learning_rate": 5.641051773338769e-05, "loss": 2.6551, "step": 51340 }, { "epoch": 3.4885854056257646, "grad_norm": 2.559586763381958, "learning_rate": 5.6406271232504415e-05, "loss": 2.8577, "step": 51345 }, { "epoch": 3.488925125696426, "grad_norm": 2.675455331802368, "learning_rate": 5.640202473162115e-05, "loss": 2.6777, "step": 51350 }, { "epoch": 3.489264845767088, "grad_norm": 2.4923484325408936, "learning_rate": 5.639777823073788e-05, "loss": 2.538, "step": 51355 }, { "epoch": 3.48960456583775, "grad_norm": 2.5324625968933105, "learning_rate": 5.63935317298546e-05, "loss": 2.5302, "step": 51360 }, { "epoch": 3.4899442859084115, "grad_norm": 3.4465205669403076, "learning_rate": 5.638928522897133e-05, "loss": 2.5653, "step": 51365 }, { "epoch": 3.490284005979073, "grad_norm": 2.836991786956787, "learning_rate": 5.6385038728088055e-05, "loss": 2.6412, "step": 51370 }, { "epoch": 3.4906237260497353, "grad_norm": 2.480180263519287, "learning_rate": 5.638079222720478e-05, "loss": 2.8696, "step": 51375 }, { "epoch": 3.490963446120397, "grad_norm": 3.320063352584839, "learning_rate": 5.637739502649817e-05, "loss": 2.621, "step": 51380 }, { "epoch": 3.4913031661910585, "grad_norm": 2.9164392948150635, "learning_rate": 5.637314852561489e-05, "loss": 2.5957, "step": 51385 }, { "epoch": 3.4916428862617206, "grad_norm": 3.3345518112182617, "learning_rate": 5.636890202473163e-05, "loss": 2.523, "step": 51390 }, { "epoch": 3.491982606332382, "grad_norm": 2.6188886165618896, "learning_rate": 5.6364655523848356e-05, "loss": 2.6612, "step": 51395 }, { "epoch": 3.492322326403044, "grad_norm": 2.9700183868408203, "learning_rate": 5.636040902296508e-05, "loss": 2.7315, "step": 51400 }, { "epoch": 3.4926620464737055, "grad_norm": 2.918888807296753, "learning_rate": 5.635616252208181e-05, "loss": 2.5624, "step": 51405 }, { "epoch": 3.4930017665443676, "grad_norm": 3.2428455352783203, "learning_rate": 5.635191602119853e-05, "loss": 2.7456, "step": 51410 }, { "epoch": 3.493341486615029, "grad_norm": 2.8781676292419434, "learning_rate": 5.634766952031526e-05, "loss": 2.5742, "step": 51415 }, { "epoch": 3.493681206685691, "grad_norm": 3.1961536407470703, "learning_rate": 5.6343423019431996e-05, "loss": 3.1385, "step": 51420 }, { "epoch": 3.494020926756353, "grad_norm": 2.4169983863830566, "learning_rate": 5.633917651854872e-05, "loss": 2.6125, "step": 51425 }, { "epoch": 3.4943606468270145, "grad_norm": 3.1814639568328857, "learning_rate": 5.6334930017665445e-05, "loss": 2.7686, "step": 51430 }, { "epoch": 3.494700366897676, "grad_norm": 2.095824956893921, "learning_rate": 5.633068351678218e-05, "loss": 2.837, "step": 51435 }, { "epoch": 3.4950400869683382, "grad_norm": 2.5081615447998047, "learning_rate": 5.63264370158989e-05, "loss": 2.4505, "step": 51440 }, { "epoch": 3.495379807039, "grad_norm": 2.9236392974853516, "learning_rate": 5.632219051501563e-05, "loss": 2.5309, "step": 51445 }, { "epoch": 3.4957195271096615, "grad_norm": 3.258976459503174, "learning_rate": 5.6317944014132364e-05, "loss": 2.5415, "step": 51450 }, { "epoch": 3.4960592471803236, "grad_norm": 3.9286563396453857, "learning_rate": 5.6313697513249085e-05, "loss": 2.4631, "step": 51455 }, { "epoch": 3.496398967250985, "grad_norm": 2.720778465270996, "learning_rate": 5.630945101236581e-05, "loss": 2.7272, "step": 51460 }, { "epoch": 3.496738687321647, "grad_norm": 2.8122780323028564, "learning_rate": 5.630520451148255e-05, "loss": 2.6999, "step": 51465 }, { "epoch": 3.497078407392309, "grad_norm": 2.686795949935913, "learning_rate": 5.630095801059927e-05, "loss": 2.6763, "step": 51470 }, { "epoch": 3.4974181274629705, "grad_norm": 2.9906435012817383, "learning_rate": 5.629671150971599e-05, "loss": 2.54, "step": 51475 }, { "epoch": 3.497757847533632, "grad_norm": 2.4486443996429443, "learning_rate": 5.629246500883273e-05, "loss": 2.8251, "step": 51480 }, { "epoch": 3.4980975676042942, "grad_norm": 2.4134702682495117, "learning_rate": 5.628821850794945e-05, "loss": 2.5099, "step": 51485 }, { "epoch": 3.498437287674956, "grad_norm": 2.593292474746704, "learning_rate": 5.6283972007066174e-05, "loss": 2.6783, "step": 51490 }, { "epoch": 3.4987770077456175, "grad_norm": 2.742314100265503, "learning_rate": 5.627972550618291e-05, "loss": 2.7378, "step": 51495 }, { "epoch": 3.499116727816279, "grad_norm": 2.56933331489563, "learning_rate": 5.627547900529964e-05, "loss": 2.5983, "step": 51500 }, { "epoch": 3.499456447886941, "grad_norm": 3.672384262084961, "learning_rate": 5.627123250441636e-05, "loss": 2.6031, "step": 51505 }, { "epoch": 3.499796167957603, "grad_norm": 2.523789644241333, "learning_rate": 5.626698600353309e-05, "loss": 2.812, "step": 51510 }, { "epoch": 3.5001358880282645, "grad_norm": 3.256971597671509, "learning_rate": 5.626273950264982e-05, "loss": 2.5522, "step": 51515 }, { "epoch": 3.5004756080989265, "grad_norm": 2.747798442840576, "learning_rate": 5.625849300176654e-05, "loss": 2.3769, "step": 51520 }, { "epoch": 3.500815328169588, "grad_norm": 2.8933346271514893, "learning_rate": 5.625424650088328e-05, "loss": 2.5956, "step": 51525 }, { "epoch": 3.50115504824025, "grad_norm": 2.835277557373047, "learning_rate": 5.6250000000000005e-05, "loss": 2.7539, "step": 51530 }, { "epoch": 3.501494768310912, "grad_norm": 2.405259132385254, "learning_rate": 5.6245753499116726e-05, "loss": 2.4506, "step": 51535 }, { "epoch": 3.5018344883815735, "grad_norm": 2.7710132598876953, "learning_rate": 5.624150699823346e-05, "loss": 2.7858, "step": 51540 }, { "epoch": 3.502174208452235, "grad_norm": 2.800849199295044, "learning_rate": 5.623726049735019e-05, "loss": 2.5594, "step": 51545 }, { "epoch": 3.5025139285228972, "grad_norm": 2.83235764503479, "learning_rate": 5.623301399646691e-05, "loss": 2.7264, "step": 51550 }, { "epoch": 3.502853648593559, "grad_norm": 2.5591931343078613, "learning_rate": 5.6228767495583645e-05, "loss": 2.6758, "step": 51555 }, { "epoch": 3.5031933686642205, "grad_norm": 2.5791914463043213, "learning_rate": 5.6224520994700367e-05, "loss": 2.4058, "step": 51560 }, { "epoch": 3.5035330887348826, "grad_norm": 2.453951835632324, "learning_rate": 5.6220274493817095e-05, "loss": 2.8116, "step": 51565 }, { "epoch": 3.503872808805544, "grad_norm": 2.4467177391052246, "learning_rate": 5.621602799293383e-05, "loss": 2.3996, "step": 51570 }, { "epoch": 3.504212528876206, "grad_norm": 3.500153064727783, "learning_rate": 5.621178149205055e-05, "loss": 2.7938, "step": 51575 }, { "epoch": 3.504552248946868, "grad_norm": 3.3955790996551514, "learning_rate": 5.620753499116728e-05, "loss": 2.708, "step": 51580 }, { "epoch": 3.5048919690175295, "grad_norm": 2.6683597564697266, "learning_rate": 5.620328849028401e-05, "loss": 2.7936, "step": 51585 }, { "epoch": 3.505231689088191, "grad_norm": 2.654384136199951, "learning_rate": 5.6199041989400735e-05, "loss": 2.4537, "step": 51590 }, { "epoch": 3.5055714091588532, "grad_norm": 3.323004722595215, "learning_rate": 5.619479548851746e-05, "loss": 2.7471, "step": 51595 }, { "epoch": 3.505911129229515, "grad_norm": 2.898725986480713, "learning_rate": 5.61905489876342e-05, "loss": 2.3552, "step": 51600 }, { "epoch": 3.5062508493001765, "grad_norm": 2.6496212482452393, "learning_rate": 5.618630248675092e-05, "loss": 2.3461, "step": 51605 }, { "epoch": 3.5065905693708386, "grad_norm": 2.797362804412842, "learning_rate": 5.618205598586764e-05, "loss": 2.4539, "step": 51610 }, { "epoch": 3.5069302894415, "grad_norm": 2.2122366428375244, "learning_rate": 5.617780948498438e-05, "loss": 2.361, "step": 51615 }, { "epoch": 3.507270009512162, "grad_norm": 2.631124496459961, "learning_rate": 5.61735629841011e-05, "loss": 2.9041, "step": 51620 }, { "epoch": 3.507609729582824, "grad_norm": 3.0361533164978027, "learning_rate": 5.6169316483217824e-05, "loss": 2.9542, "step": 51625 }, { "epoch": 3.5079494496534855, "grad_norm": 3.3060998916625977, "learning_rate": 5.616506998233456e-05, "loss": 2.6908, "step": 51630 }, { "epoch": 3.508289169724147, "grad_norm": 2.70412278175354, "learning_rate": 5.616082348145129e-05, "loss": 2.5481, "step": 51635 }, { "epoch": 3.5086288897948092, "grad_norm": 3.2759041786193848, "learning_rate": 5.615657698056801e-05, "loss": 2.7779, "step": 51640 }, { "epoch": 3.508968609865471, "grad_norm": 2.8146908283233643, "learning_rate": 5.615233047968474e-05, "loss": 2.622, "step": 51645 }, { "epoch": 3.5093083299361325, "grad_norm": 2.9992551803588867, "learning_rate": 5.614808397880147e-05, "loss": 2.785, "step": 51650 }, { "epoch": 3.5096480500067946, "grad_norm": 2.547711133956909, "learning_rate": 5.614383747791819e-05, "loss": 2.5833, "step": 51655 }, { "epoch": 3.509987770077456, "grad_norm": 2.6142985820770264, "learning_rate": 5.613959097703493e-05, "loss": 2.6341, "step": 51660 }, { "epoch": 3.510327490148118, "grad_norm": 2.206244945526123, "learning_rate": 5.6135344476151655e-05, "loss": 2.4643, "step": 51665 }, { "epoch": 3.51066721021878, "grad_norm": 2.888695240020752, "learning_rate": 5.6131097975268376e-05, "loss": 2.7326, "step": 51670 }, { "epoch": 3.5110069302894416, "grad_norm": 2.736699342727661, "learning_rate": 5.612685147438511e-05, "loss": 2.7524, "step": 51675 }, { "epoch": 3.511346650360103, "grad_norm": 2.648254156112671, "learning_rate": 5.612260497350184e-05, "loss": 2.4868, "step": 51680 }, { "epoch": 3.5116863704307653, "grad_norm": 2.408200979232788, "learning_rate": 5.611835847261856e-05, "loss": 2.5309, "step": 51685 }, { "epoch": 3.512026090501427, "grad_norm": 2.8067917823791504, "learning_rate": 5.6114111971735295e-05, "loss": 2.4944, "step": 51690 }, { "epoch": 3.5123658105720885, "grad_norm": 2.332545757293701, "learning_rate": 5.6109865470852016e-05, "loss": 2.7668, "step": 51695 }, { "epoch": 3.5127055306427506, "grad_norm": 3.5724058151245117, "learning_rate": 5.6105618969968744e-05, "loss": 2.9003, "step": 51700 }, { "epoch": 3.5130452507134122, "grad_norm": 3.081160306930542, "learning_rate": 5.610137246908548e-05, "loss": 2.5595, "step": 51705 }, { "epoch": 3.513384970784074, "grad_norm": 2.3318490982055664, "learning_rate": 5.60971259682022e-05, "loss": 2.7825, "step": 51710 }, { "epoch": 3.513724690854736, "grad_norm": 2.738731622695923, "learning_rate": 5.6092879467318935e-05, "loss": 2.3966, "step": 51715 }, { "epoch": 3.5140644109253976, "grad_norm": 3.62019419670105, "learning_rate": 5.608863296643566e-05, "loss": 2.6542, "step": 51720 }, { "epoch": 3.514404130996059, "grad_norm": 2.3305537700653076, "learning_rate": 5.6084386465552384e-05, "loss": 2.5387, "step": 51725 }, { "epoch": 3.5147438510667213, "grad_norm": 2.905503511428833, "learning_rate": 5.608013996466912e-05, "loss": 2.4281, "step": 51730 }, { "epoch": 3.515083571137383, "grad_norm": 2.66275691986084, "learning_rate": 5.607589346378585e-05, "loss": 2.5788, "step": 51735 }, { "epoch": 3.5154232912080445, "grad_norm": 2.8033385276794434, "learning_rate": 5.607164696290257e-05, "loss": 2.6485, "step": 51740 }, { "epoch": 3.5157630112787066, "grad_norm": 4.338611125946045, "learning_rate": 5.60674004620193e-05, "loss": 2.8029, "step": 51745 }, { "epoch": 3.5161027313493682, "grad_norm": 3.1887338161468506, "learning_rate": 5.606315396113603e-05, "loss": 2.9792, "step": 51750 }, { "epoch": 3.51644245142003, "grad_norm": 2.7171733379364014, "learning_rate": 5.605890746025275e-05, "loss": 2.5991, "step": 51755 }, { "epoch": 3.516782171490692, "grad_norm": 3.0102341175079346, "learning_rate": 5.605466095936949e-05, "loss": 2.6508, "step": 51760 }, { "epoch": 3.5171218915613536, "grad_norm": 2.407808542251587, "learning_rate": 5.605041445848621e-05, "loss": 2.4101, "step": 51765 }, { "epoch": 3.517461611632015, "grad_norm": 3.102858304977417, "learning_rate": 5.6046167957602936e-05, "loss": 2.8031, "step": 51770 }, { "epoch": 3.517801331702677, "grad_norm": 2.940248727798462, "learning_rate": 5.604192145671967e-05, "loss": 2.574, "step": 51775 }, { "epoch": 3.518141051773339, "grad_norm": 3.3487348556518555, "learning_rate": 5.603767495583639e-05, "loss": 2.5597, "step": 51780 }, { "epoch": 3.5184807718440005, "grad_norm": 3.622494697570801, "learning_rate": 5.603342845495312e-05, "loss": 2.6287, "step": 51785 }, { "epoch": 3.518820491914662, "grad_norm": 3.011427640914917, "learning_rate": 5.6029181954069855e-05, "loss": 2.8588, "step": 51790 }, { "epoch": 3.5191602119853242, "grad_norm": 2.9134292602539062, "learning_rate": 5.6024935453186576e-05, "loss": 2.5072, "step": 51795 }, { "epoch": 3.519499932055986, "grad_norm": 2.8658218383789062, "learning_rate": 5.6020688952303304e-05, "loss": 2.4812, "step": 51800 }, { "epoch": 3.5198396521266475, "grad_norm": 2.451270818710327, "learning_rate": 5.601644245142004e-05, "loss": 2.8666, "step": 51805 }, { "epoch": 3.520179372197309, "grad_norm": 2.4529190063476562, "learning_rate": 5.601219595053676e-05, "loss": 2.9067, "step": 51810 }, { "epoch": 3.520519092267971, "grad_norm": 2.6944432258605957, "learning_rate": 5.600794944965349e-05, "loss": 2.6201, "step": 51815 }, { "epoch": 3.520858812338633, "grad_norm": 2.623286724090576, "learning_rate": 5.600370294877022e-05, "loss": 2.4969, "step": 51820 }, { "epoch": 3.5211985324092945, "grad_norm": 2.6219382286071777, "learning_rate": 5.5999456447886944e-05, "loss": 2.5477, "step": 51825 }, { "epoch": 3.5215382524799566, "grad_norm": 3.3300588130950928, "learning_rate": 5.5995209947003665e-05, "loss": 2.7673, "step": 51830 }, { "epoch": 3.521877972550618, "grad_norm": 2.8080618381500244, "learning_rate": 5.599096344612041e-05, "loss": 2.861, "step": 51835 }, { "epoch": 3.52221769262128, "grad_norm": 3.562591075897217, "learning_rate": 5.598671694523713e-05, "loss": 2.4302, "step": 51840 }, { "epoch": 3.522557412691942, "grad_norm": 2.658022403717041, "learning_rate": 5.598247044435385e-05, "loss": 2.585, "step": 51845 }, { "epoch": 3.5228971327626035, "grad_norm": 2.9727578163146973, "learning_rate": 5.5978223943470584e-05, "loss": 2.7654, "step": 51850 }, { "epoch": 3.523236852833265, "grad_norm": 2.9608778953552246, "learning_rate": 5.597397744258731e-05, "loss": 2.6231, "step": 51855 }, { "epoch": 3.5235765729039272, "grad_norm": 2.9984121322631836, "learning_rate": 5.5969730941704033e-05, "loss": 2.6763, "step": 51860 }, { "epoch": 3.523916292974589, "grad_norm": 2.438699960708618, "learning_rate": 5.596548444082077e-05, "loss": 2.6556, "step": 51865 }, { "epoch": 3.5242560130452505, "grad_norm": 3.1006290912628174, "learning_rate": 5.5961237939937496e-05, "loss": 2.4908, "step": 51870 }, { "epoch": 3.5245957331159126, "grad_norm": 2.638380289077759, "learning_rate": 5.595699143905422e-05, "loss": 2.8196, "step": 51875 }, { "epoch": 3.524935453186574, "grad_norm": 3.067211627960205, "learning_rate": 5.595274493817095e-05, "loss": 2.4666, "step": 51880 }, { "epoch": 3.525275173257236, "grad_norm": 2.4525041580200195, "learning_rate": 5.594849843728768e-05, "loss": 2.4484, "step": 51885 }, { "epoch": 3.525614893327898, "grad_norm": 2.3768370151519775, "learning_rate": 5.59442519364044e-05, "loss": 2.7793, "step": 51890 }, { "epoch": 3.5259546133985595, "grad_norm": 3.165830135345459, "learning_rate": 5.5940005435521136e-05, "loss": 2.6261, "step": 51895 }, { "epoch": 3.526294333469221, "grad_norm": 2.561811685562134, "learning_rate": 5.593575893463786e-05, "loss": 2.784, "step": 51900 }, { "epoch": 3.5266340535398832, "grad_norm": 3.0297791957855225, "learning_rate": 5.5931512433754585e-05, "loss": 2.7576, "step": 51905 }, { "epoch": 3.526973773610545, "grad_norm": 2.840655565261841, "learning_rate": 5.592726593287132e-05, "loss": 2.6963, "step": 51910 }, { "epoch": 3.5273134936812065, "grad_norm": 3.3243229389190674, "learning_rate": 5.592301943198804e-05, "loss": 2.6893, "step": 51915 }, { "epoch": 3.5276532137518686, "grad_norm": 2.3795907497406006, "learning_rate": 5.591877293110477e-05, "loss": 2.3979, "step": 51920 }, { "epoch": 3.52799293382253, "grad_norm": 3.1516244411468506, "learning_rate": 5.5914526430221504e-05, "loss": 2.8305, "step": 51925 }, { "epoch": 3.528332653893192, "grad_norm": 2.5643296241760254, "learning_rate": 5.5910279929338226e-05, "loss": 2.8036, "step": 51930 }, { "epoch": 3.528672373963854, "grad_norm": 3.0453672409057617, "learning_rate": 5.5906033428454954e-05, "loss": 2.44, "step": 51935 }, { "epoch": 3.5290120940345155, "grad_norm": 3.853943347930908, "learning_rate": 5.590178692757169e-05, "loss": 2.7396, "step": 51940 }, { "epoch": 3.529351814105177, "grad_norm": 2.692331552505493, "learning_rate": 5.589754042668841e-05, "loss": 2.493, "step": 51945 }, { "epoch": 3.5296915341758393, "grad_norm": 3.038382053375244, "learning_rate": 5.589329392580514e-05, "loss": 2.547, "step": 51950 }, { "epoch": 3.530031254246501, "grad_norm": 2.804222583770752, "learning_rate": 5.588904742492187e-05, "loss": 2.4514, "step": 51955 }, { "epoch": 3.5303709743171625, "grad_norm": 2.8632142543792725, "learning_rate": 5.5884800924038594e-05, "loss": 2.7836, "step": 51960 }, { "epoch": 3.5307106943878246, "grad_norm": 3.0953025817871094, "learning_rate": 5.5880554423155315e-05, "loss": 2.619, "step": 51965 }, { "epoch": 3.531050414458486, "grad_norm": 2.559816598892212, "learning_rate": 5.5876307922272056e-05, "loss": 2.7879, "step": 51970 }, { "epoch": 3.531390134529148, "grad_norm": 2.561647415161133, "learning_rate": 5.587206142138878e-05, "loss": 2.3376, "step": 51975 }, { "epoch": 3.53172985459981, "grad_norm": 3.5919857025146484, "learning_rate": 5.58678149205055e-05, "loss": 2.4248, "step": 51980 }, { "epoch": 3.5320695746704716, "grad_norm": 3.2466812133789062, "learning_rate": 5.5863568419622234e-05, "loss": 2.7872, "step": 51985 }, { "epoch": 3.532409294741133, "grad_norm": 2.611550807952881, "learning_rate": 5.585932191873896e-05, "loss": 2.6999, "step": 51990 }, { "epoch": 3.5327490148117953, "grad_norm": 3.60481595993042, "learning_rate": 5.585507541785568e-05, "loss": 2.6204, "step": 51995 }, { "epoch": 3.533088734882457, "grad_norm": 3.383373260498047, "learning_rate": 5.585082891697242e-05, "loss": 2.7403, "step": 52000 }, { "epoch": 3.5334284549531185, "grad_norm": 3.263198137283325, "learning_rate": 5.5846582416089146e-05, "loss": 2.9342, "step": 52005 }, { "epoch": 3.5337681750237806, "grad_norm": 2.605419397354126, "learning_rate": 5.584233591520587e-05, "loss": 2.4831, "step": 52010 }, { "epoch": 3.5341078950944422, "grad_norm": 4.237478733062744, "learning_rate": 5.58380894143226e-05, "loss": 2.6957, "step": 52015 }, { "epoch": 3.534447615165104, "grad_norm": 2.6653594970703125, "learning_rate": 5.583384291343933e-05, "loss": 2.5156, "step": 52020 }, { "epoch": 3.534787335235766, "grad_norm": 2.346331834793091, "learning_rate": 5.582959641255605e-05, "loss": 2.8028, "step": 52025 }, { "epoch": 3.5351270553064276, "grad_norm": 2.9327170848846436, "learning_rate": 5.5825349911672786e-05, "loss": 2.9343, "step": 52030 }, { "epoch": 3.535466775377089, "grad_norm": 3.0147554874420166, "learning_rate": 5.5821103410789514e-05, "loss": 2.7765, "step": 52035 }, { "epoch": 3.5358064954477513, "grad_norm": 3.1851978302001953, "learning_rate": 5.5816856909906235e-05, "loss": 2.5522, "step": 52040 }, { "epoch": 3.536146215518413, "grad_norm": 3.0622754096984863, "learning_rate": 5.581261040902297e-05, "loss": 2.5225, "step": 52045 }, { "epoch": 3.5364859355890745, "grad_norm": 2.94459867477417, "learning_rate": 5.580836390813969e-05, "loss": 2.6685, "step": 52050 }, { "epoch": 3.5368256556597366, "grad_norm": 3.4703335762023926, "learning_rate": 5.5804117407256426e-05, "loss": 2.8898, "step": 52055 }, { "epoch": 3.5371653757303982, "grad_norm": 3.2369542121887207, "learning_rate": 5.5799870906373154e-05, "loss": 2.6087, "step": 52060 }, { "epoch": 3.53750509580106, "grad_norm": 3.1012845039367676, "learning_rate": 5.5795624405489875e-05, "loss": 2.48, "step": 52065 }, { "epoch": 3.537844815871722, "grad_norm": 2.7007904052734375, "learning_rate": 5.579137790460661e-05, "loss": 2.587, "step": 52070 }, { "epoch": 3.5381845359423836, "grad_norm": 2.798797607421875, "learning_rate": 5.578713140372334e-05, "loss": 2.6346, "step": 52075 }, { "epoch": 3.538524256013045, "grad_norm": 2.8352274894714355, "learning_rate": 5.578288490284006e-05, "loss": 2.8291, "step": 52080 }, { "epoch": 3.5388639760837073, "grad_norm": 3.233208656311035, "learning_rate": 5.5778638401956794e-05, "loss": 2.4403, "step": 52085 }, { "epoch": 3.539203696154369, "grad_norm": 2.9570696353912354, "learning_rate": 5.577439190107352e-05, "loss": 2.6125, "step": 52090 }, { "epoch": 3.5395434162250305, "grad_norm": 3.2040932178497314, "learning_rate": 5.577014540019024e-05, "loss": 2.9116, "step": 52095 }, { "epoch": 3.5398831362956926, "grad_norm": 2.536067485809326, "learning_rate": 5.576589889930698e-05, "loss": 2.6797, "step": 52100 }, { "epoch": 3.5402228563663543, "grad_norm": 2.061469554901123, "learning_rate": 5.5761652398423706e-05, "loss": 2.7943, "step": 52105 }, { "epoch": 3.540562576437016, "grad_norm": 2.4424314498901367, "learning_rate": 5.575740589754043e-05, "loss": 2.4964, "step": 52110 }, { "epoch": 3.5409022965076775, "grad_norm": 2.6691079139709473, "learning_rate": 5.575315939665716e-05, "loss": 2.7678, "step": 52115 }, { "epoch": 3.5412420165783396, "grad_norm": 3.252107858657837, "learning_rate": 5.574891289577388e-05, "loss": 2.5733, "step": 52120 }, { "epoch": 3.541581736649001, "grad_norm": 2.5510220527648926, "learning_rate": 5.574466639489061e-05, "loss": 2.5033, "step": 52125 }, { "epoch": 3.541921456719663, "grad_norm": 3.1445765495300293, "learning_rate": 5.5740419894007346e-05, "loss": 2.7732, "step": 52130 }, { "epoch": 3.542261176790325, "grad_norm": 3.6571080684661865, "learning_rate": 5.573617339312407e-05, "loss": 2.7266, "step": 52135 }, { "epoch": 3.5426008968609866, "grad_norm": 2.892589569091797, "learning_rate": 5.5731926892240795e-05, "loss": 2.4922, "step": 52140 }, { "epoch": 3.542940616931648, "grad_norm": 2.4424309730529785, "learning_rate": 5.572768039135753e-05, "loss": 2.6639, "step": 52145 }, { "epoch": 3.54328033700231, "grad_norm": 2.3522567749023438, "learning_rate": 5.572343389047425e-05, "loss": 2.5411, "step": 52150 }, { "epoch": 3.543620057072972, "grad_norm": 2.2572295665740967, "learning_rate": 5.571918738959098e-05, "loss": 2.8366, "step": 52155 }, { "epoch": 3.5439597771436335, "grad_norm": 4.067531108856201, "learning_rate": 5.5714940888707714e-05, "loss": 2.8553, "step": 52160 }, { "epoch": 3.544299497214295, "grad_norm": 2.7806453704833984, "learning_rate": 5.5710694387824435e-05, "loss": 2.6496, "step": 52165 }, { "epoch": 3.5446392172849572, "grad_norm": 3.0721306800842285, "learning_rate": 5.570644788694116e-05, "loss": 2.5579, "step": 52170 }, { "epoch": 3.544978937355619, "grad_norm": 2.887892246246338, "learning_rate": 5.57022013860579e-05, "loss": 2.4955, "step": 52175 }, { "epoch": 3.5453186574262805, "grad_norm": 2.9305930137634277, "learning_rate": 5.569795488517462e-05, "loss": 2.5507, "step": 52180 }, { "epoch": 3.5456583774969426, "grad_norm": 2.469559907913208, "learning_rate": 5.569370838429134e-05, "loss": 2.3222, "step": 52185 }, { "epoch": 3.545998097567604, "grad_norm": 2.843297243118286, "learning_rate": 5.5689461883408075e-05, "loss": 2.6724, "step": 52190 }, { "epoch": 3.546337817638266, "grad_norm": 2.775986671447754, "learning_rate": 5.56852153825248e-05, "loss": 2.363, "step": 52195 }, { "epoch": 3.546677537708928, "grad_norm": 3.5917201042175293, "learning_rate": 5.5680968881641524e-05, "loss": 2.5072, "step": 52200 }, { "epoch": 3.5470172577795895, "grad_norm": 3.096494674682617, "learning_rate": 5.567672238075826e-05, "loss": 2.5529, "step": 52205 }, { "epoch": 3.547356977850251, "grad_norm": 3.027320384979248, "learning_rate": 5.567247587987499e-05, "loss": 2.6046, "step": 52210 }, { "epoch": 3.5476966979209132, "grad_norm": 3.125319480895996, "learning_rate": 5.566822937899171e-05, "loss": 2.7196, "step": 52215 }, { "epoch": 3.548036417991575, "grad_norm": 3.218252182006836, "learning_rate": 5.566398287810844e-05, "loss": 2.5769, "step": 52220 }, { "epoch": 3.5483761380622365, "grad_norm": 2.6914479732513428, "learning_rate": 5.565973637722517e-05, "loss": 2.7244, "step": 52225 }, { "epoch": 3.5487158581328986, "grad_norm": 2.8633227348327637, "learning_rate": 5.565548987634189e-05, "loss": 2.6585, "step": 52230 }, { "epoch": 3.54905557820356, "grad_norm": 2.563129425048828, "learning_rate": 5.565124337545863e-05, "loss": 2.609, "step": 52235 }, { "epoch": 3.549395298274222, "grad_norm": 2.977673053741455, "learning_rate": 5.5646996874575355e-05, "loss": 2.7376, "step": 52240 }, { "epoch": 3.549735018344884, "grad_norm": 3.6967363357543945, "learning_rate": 5.5642750373692076e-05, "loss": 2.7695, "step": 52245 }, { "epoch": 3.5500747384155455, "grad_norm": 3.462928295135498, "learning_rate": 5.563850387280881e-05, "loss": 2.6054, "step": 52250 }, { "epoch": 3.550414458486207, "grad_norm": 2.4181928634643555, "learning_rate": 5.563425737192553e-05, "loss": 2.6812, "step": 52255 }, { "epoch": 3.5507541785568693, "grad_norm": 3.225978136062622, "learning_rate": 5.563001087104226e-05, "loss": 2.5515, "step": 52260 }, { "epoch": 3.551093898627531, "grad_norm": 2.5103952884674072, "learning_rate": 5.5625764370158995e-05, "loss": 2.6857, "step": 52265 }, { "epoch": 3.5514336186981925, "grad_norm": 2.7031679153442383, "learning_rate": 5.5621517869275716e-05, "loss": 2.5659, "step": 52270 }, { "epoch": 3.5517733387688546, "grad_norm": 2.48359751701355, "learning_rate": 5.5617271368392445e-05, "loss": 2.8548, "step": 52275 }, { "epoch": 3.5521130588395162, "grad_norm": 3.3835678100585938, "learning_rate": 5.561302486750918e-05, "loss": 2.7051, "step": 52280 }, { "epoch": 3.552452778910178, "grad_norm": 2.973233699798584, "learning_rate": 5.56087783666259e-05, "loss": 2.7595, "step": 52285 }, { "epoch": 3.55279249898084, "grad_norm": 2.532942295074463, "learning_rate": 5.560453186574263e-05, "loss": 2.7193, "step": 52290 }, { "epoch": 3.5531322190515016, "grad_norm": 3.6456615924835205, "learning_rate": 5.560028536485936e-05, "loss": 2.9684, "step": 52295 }, { "epoch": 3.553471939122163, "grad_norm": 2.6713504791259766, "learning_rate": 5.5596038863976085e-05, "loss": 2.5564, "step": 52300 }, { "epoch": 3.5538116591928253, "grad_norm": 2.8890509605407715, "learning_rate": 5.559179236309281e-05, "loss": 3.0074, "step": 52305 }, { "epoch": 3.554151379263487, "grad_norm": 3.2029647827148438, "learning_rate": 5.558754586220955e-05, "loss": 2.4704, "step": 52310 }, { "epoch": 3.5544910993341485, "grad_norm": 2.6607916355133057, "learning_rate": 5.558329936132627e-05, "loss": 2.7024, "step": 52315 }, { "epoch": 3.5548308194048106, "grad_norm": 2.8625760078430176, "learning_rate": 5.557905286044299e-05, "loss": 2.6972, "step": 52320 }, { "epoch": 3.5551705394754722, "grad_norm": 2.2975263595581055, "learning_rate": 5.5574806359559725e-05, "loss": 2.8351, "step": 52325 }, { "epoch": 3.555510259546134, "grad_norm": 2.9054412841796875, "learning_rate": 5.557055985867645e-05, "loss": 2.6556, "step": 52330 }, { "epoch": 3.555849979616796, "grad_norm": 3.630976438522339, "learning_rate": 5.5566313357793174e-05, "loss": 2.8565, "step": 52335 }, { "epoch": 3.5561896996874576, "grad_norm": 2.8161814212799072, "learning_rate": 5.556206685690991e-05, "loss": 2.6026, "step": 52340 }, { "epoch": 3.556529419758119, "grad_norm": 3.2018356323242188, "learning_rate": 5.555782035602664e-05, "loss": 2.3137, "step": 52345 }, { "epoch": 3.5568691398287813, "grad_norm": 2.6659915447235107, "learning_rate": 5.555357385514336e-05, "loss": 2.5747, "step": 52350 }, { "epoch": 3.557208859899443, "grad_norm": 3.3127007484436035, "learning_rate": 5.554932735426009e-05, "loss": 2.6577, "step": 52355 }, { "epoch": 3.5575485799701045, "grad_norm": 2.7248642444610596, "learning_rate": 5.554508085337682e-05, "loss": 2.7703, "step": 52360 }, { "epoch": 3.5578883000407666, "grad_norm": 2.6688430309295654, "learning_rate": 5.554168365267021e-05, "loss": 2.4781, "step": 52365 }, { "epoch": 3.5582280201114282, "grad_norm": 2.680729866027832, "learning_rate": 5.553743715178693e-05, "loss": 2.6838, "step": 52370 }, { "epoch": 3.55856774018209, "grad_norm": 3.0587782859802246, "learning_rate": 5.553319065090365e-05, "loss": 2.539, "step": 52375 }, { "epoch": 3.558907460252752, "grad_norm": 2.8909318447113037, "learning_rate": 5.552894415002039e-05, "loss": 2.7996, "step": 52380 }, { "epoch": 3.5592471803234136, "grad_norm": 2.9270362854003906, "learning_rate": 5.5524697649137115e-05, "loss": 2.5241, "step": 52385 }, { "epoch": 3.559586900394075, "grad_norm": 3.4596669673919678, "learning_rate": 5.5520451148253836e-05, "loss": 2.4679, "step": 52390 }, { "epoch": 3.5599266204647373, "grad_norm": 4.133945465087891, "learning_rate": 5.551620464737057e-05, "loss": 2.5228, "step": 52395 }, { "epoch": 3.560266340535399, "grad_norm": 2.8870537281036377, "learning_rate": 5.55119581464873e-05, "loss": 2.6803, "step": 52400 }, { "epoch": 3.5606060606060606, "grad_norm": 2.893031597137451, "learning_rate": 5.550771164560402e-05, "loss": 2.6156, "step": 52405 }, { "epoch": 3.5609457806767226, "grad_norm": 2.7695541381835938, "learning_rate": 5.5503465144720755e-05, "loss": 2.5281, "step": 52410 }, { "epoch": 3.5612855007473843, "grad_norm": 3.036602258682251, "learning_rate": 5.549921864383748e-05, "loss": 2.6629, "step": 52415 }, { "epoch": 3.561625220818046, "grad_norm": 3.139230251312256, "learning_rate": 5.5494972142954204e-05, "loss": 2.7036, "step": 52420 }, { "epoch": 3.561964940888708, "grad_norm": 2.699061155319214, "learning_rate": 5.549072564207094e-05, "loss": 2.7395, "step": 52425 }, { "epoch": 3.5623046609593696, "grad_norm": 2.9757511615753174, "learning_rate": 5.548647914118767e-05, "loss": 2.3656, "step": 52430 }, { "epoch": 3.5626443810300312, "grad_norm": 2.7541139125823975, "learning_rate": 5.548223264030439e-05, "loss": 2.7607, "step": 52435 }, { "epoch": 3.5629841011006933, "grad_norm": 3.1184229850769043, "learning_rate": 5.547798613942112e-05, "loss": 2.4661, "step": 52440 }, { "epoch": 3.563323821171355, "grad_norm": 3.408555269241333, "learning_rate": 5.5473739638537844e-05, "loss": 2.5125, "step": 52445 }, { "epoch": 3.5636635412420166, "grad_norm": 3.1734750270843506, "learning_rate": 5.546949313765457e-05, "loss": 2.559, "step": 52450 }, { "epoch": 3.564003261312678, "grad_norm": 4.116896629333496, "learning_rate": 5.546524663677131e-05, "loss": 2.5826, "step": 52455 }, { "epoch": 3.5643429813833403, "grad_norm": 3.1547484397888184, "learning_rate": 5.546100013588803e-05, "loss": 2.8051, "step": 52460 }, { "epoch": 3.564682701454002, "grad_norm": 2.7692501544952393, "learning_rate": 5.5456753635004756e-05, "loss": 2.6007, "step": 52465 }, { "epoch": 3.5650224215246635, "grad_norm": 2.7349326610565186, "learning_rate": 5.545250713412149e-05, "loss": 2.5137, "step": 52470 }, { "epoch": 3.5653621415953256, "grad_norm": 4.172150135040283, "learning_rate": 5.544826063323821e-05, "loss": 2.6842, "step": 52475 }, { "epoch": 3.5657018616659872, "grad_norm": 3.1102311611175537, "learning_rate": 5.544401413235494e-05, "loss": 2.8083, "step": 52480 }, { "epoch": 3.566041581736649, "grad_norm": 2.9621498584747314, "learning_rate": 5.5439767631471675e-05, "loss": 2.6522, "step": 52485 }, { "epoch": 3.5663813018073105, "grad_norm": 3.1642303466796875, "learning_rate": 5.5435521130588396e-05, "loss": 2.658, "step": 52490 }, { "epoch": 3.5667210218779726, "grad_norm": 2.8746607303619385, "learning_rate": 5.5431274629705124e-05, "loss": 2.9731, "step": 52495 }, { "epoch": 3.567060741948634, "grad_norm": 2.5396525859832764, "learning_rate": 5.542702812882186e-05, "loss": 2.5648, "step": 52500 }, { "epoch": 3.567400462019296, "grad_norm": 2.4780197143554688, "learning_rate": 5.542278162793858e-05, "loss": 2.6809, "step": 52505 }, { "epoch": 3.567740182089958, "grad_norm": 2.515576124191284, "learning_rate": 5.54185351270553e-05, "loss": 2.5731, "step": 52510 }, { "epoch": 3.5680799021606195, "grad_norm": 2.595015525817871, "learning_rate": 5.5414288626172036e-05, "loss": 2.4988, "step": 52515 }, { "epoch": 3.568419622231281, "grad_norm": 2.8311767578125, "learning_rate": 5.5410042125288765e-05, "loss": 2.7111, "step": 52520 }, { "epoch": 3.5687593423019432, "grad_norm": 3.008960247039795, "learning_rate": 5.5405795624405486e-05, "loss": 2.9857, "step": 52525 }, { "epoch": 3.569099062372605, "grad_norm": 3.004916191101074, "learning_rate": 5.540154912352222e-05, "loss": 2.6368, "step": 52530 }, { "epoch": 3.5694387824432665, "grad_norm": 4.756645202636719, "learning_rate": 5.539730262263895e-05, "loss": 2.5221, "step": 52535 }, { "epoch": 3.5697785025139286, "grad_norm": 2.8137848377227783, "learning_rate": 5.539305612175567e-05, "loss": 3.0167, "step": 52540 }, { "epoch": 3.57011822258459, "grad_norm": 3.3494200706481934, "learning_rate": 5.5388809620872405e-05, "loss": 2.6497, "step": 52545 }, { "epoch": 3.570457942655252, "grad_norm": 3.320756673812866, "learning_rate": 5.538456311998913e-05, "loss": 2.746, "step": 52550 }, { "epoch": 3.570797662725914, "grad_norm": 3.4638407230377197, "learning_rate": 5.5380316619105854e-05, "loss": 2.7196, "step": 52555 }, { "epoch": 3.5711373827965756, "grad_norm": 3.374370574951172, "learning_rate": 5.537607011822259e-05, "loss": 2.6648, "step": 52560 }, { "epoch": 3.571477102867237, "grad_norm": 2.806990623474121, "learning_rate": 5.5371823617339317e-05, "loss": 2.6222, "step": 52565 }, { "epoch": 3.5718168229378993, "grad_norm": 2.5199320316314697, "learning_rate": 5.536757711645604e-05, "loss": 2.6994, "step": 52570 }, { "epoch": 3.572156543008561, "grad_norm": 2.833709955215454, "learning_rate": 5.536333061557277e-05, "loss": 2.7361, "step": 52575 }, { "epoch": 3.5724962630792225, "grad_norm": 3.156615972518921, "learning_rate": 5.5359084114689494e-05, "loss": 2.6164, "step": 52580 }, { "epoch": 3.5728359831498846, "grad_norm": 3.184270143508911, "learning_rate": 5.535483761380622e-05, "loss": 2.9133, "step": 52585 }, { "epoch": 3.5731757032205462, "grad_norm": 3.183936834335327, "learning_rate": 5.535059111292296e-05, "loss": 2.7994, "step": 52590 }, { "epoch": 3.573515423291208, "grad_norm": 3.231081008911133, "learning_rate": 5.534634461203968e-05, "loss": 2.5703, "step": 52595 }, { "epoch": 3.57385514336187, "grad_norm": 2.8645737171173096, "learning_rate": 5.534209811115641e-05, "loss": 2.6108, "step": 52600 }, { "epoch": 3.5741948634325316, "grad_norm": 2.3522181510925293, "learning_rate": 5.533785161027314e-05, "loss": 2.6285, "step": 52605 }, { "epoch": 3.574534583503193, "grad_norm": 3.6320674419403076, "learning_rate": 5.533360510938986e-05, "loss": 2.908, "step": 52610 }, { "epoch": 3.5748743035738553, "grad_norm": 4.187628269195557, "learning_rate": 5.53293586085066e-05, "loss": 2.5714, "step": 52615 }, { "epoch": 3.575214023644517, "grad_norm": 2.804564952850342, "learning_rate": 5.5325112107623325e-05, "loss": 2.607, "step": 52620 }, { "epoch": 3.5755537437151785, "grad_norm": 2.91055965423584, "learning_rate": 5.5320865606740046e-05, "loss": 2.7316, "step": 52625 }, { "epoch": 3.5758934637858406, "grad_norm": 3.214878559112549, "learning_rate": 5.531661910585678e-05, "loss": 2.7625, "step": 52630 }, { "epoch": 3.5762331838565022, "grad_norm": 2.484912633895874, "learning_rate": 5.531237260497351e-05, "loss": 2.4574, "step": 52635 }, { "epoch": 3.576572903927164, "grad_norm": 3.4525578022003174, "learning_rate": 5.530812610409023e-05, "loss": 2.4648, "step": 52640 }, { "epoch": 3.576912623997826, "grad_norm": 2.585263729095459, "learning_rate": 5.5303879603206965e-05, "loss": 2.6039, "step": 52645 }, { "epoch": 3.5772523440684876, "grad_norm": 2.939793348312378, "learning_rate": 5.5299633102323686e-05, "loss": 2.623, "step": 52650 }, { "epoch": 3.577592064139149, "grad_norm": 2.787386417388916, "learning_rate": 5.5295386601440414e-05, "loss": 2.6772, "step": 52655 }, { "epoch": 3.5779317842098113, "grad_norm": 2.8600518703460693, "learning_rate": 5.529114010055715e-05, "loss": 2.4486, "step": 52660 }, { "epoch": 3.578271504280473, "grad_norm": 2.908708095550537, "learning_rate": 5.528689359967387e-05, "loss": 2.5186, "step": 52665 }, { "epoch": 3.5786112243511345, "grad_norm": 2.775031089782715, "learning_rate": 5.52826470987906e-05, "loss": 2.6699, "step": 52670 }, { "epoch": 3.5789509444217966, "grad_norm": 3.3414041996002197, "learning_rate": 5.527840059790733e-05, "loss": 2.6122, "step": 52675 }, { "epoch": 3.5792906644924583, "grad_norm": 2.151859760284424, "learning_rate": 5.5274154097024054e-05, "loss": 2.7559, "step": 52680 }, { "epoch": 3.57963038456312, "grad_norm": 2.730217218399048, "learning_rate": 5.526990759614078e-05, "loss": 2.7584, "step": 52685 }, { "epoch": 3.579970104633782, "grad_norm": 2.857921838760376, "learning_rate": 5.526566109525752e-05, "loss": 2.8245, "step": 52690 }, { "epoch": 3.5803098247044436, "grad_norm": 2.9230616092681885, "learning_rate": 5.526141459437424e-05, "loss": 2.6724, "step": 52695 }, { "epoch": 3.580649544775105, "grad_norm": 2.816019296646118, "learning_rate": 5.5257168093490966e-05, "loss": 2.8149, "step": 52700 }, { "epoch": 3.5809892648457673, "grad_norm": 2.7677793502807617, "learning_rate": 5.52529215926077e-05, "loss": 2.7396, "step": 52705 }, { "epoch": 3.581328984916429, "grad_norm": 2.710733652114868, "learning_rate": 5.524867509172442e-05, "loss": 2.706, "step": 52710 }, { "epoch": 3.5816687049870906, "grad_norm": 2.4737489223480225, "learning_rate": 5.524442859084114e-05, "loss": 2.6162, "step": 52715 }, { "epoch": 3.5820084250577526, "grad_norm": 2.992745876312256, "learning_rate": 5.5240182089957885e-05, "loss": 2.6922, "step": 52720 }, { "epoch": 3.5823481451284143, "grad_norm": 2.8625340461730957, "learning_rate": 5.5235935589074606e-05, "loss": 2.768, "step": 52725 }, { "epoch": 3.582687865199076, "grad_norm": 2.8656864166259766, "learning_rate": 5.523168908819133e-05, "loss": 2.5823, "step": 52730 }, { "epoch": 3.583027585269738, "grad_norm": 2.7193076610565186, "learning_rate": 5.522744258730806e-05, "loss": 2.6048, "step": 52735 }, { "epoch": 3.5833673053403996, "grad_norm": 3.4697396755218506, "learning_rate": 5.522319608642479e-05, "loss": 2.5188, "step": 52740 }, { "epoch": 3.5837070254110612, "grad_norm": 2.591480255126953, "learning_rate": 5.521894958554151e-05, "loss": 2.2473, "step": 52745 }, { "epoch": 3.5840467454817233, "grad_norm": 2.7014617919921875, "learning_rate": 5.5214703084658246e-05, "loss": 2.6989, "step": 52750 }, { "epoch": 3.584386465552385, "grad_norm": 3.0379836559295654, "learning_rate": 5.5210456583774974e-05, "loss": 2.4862, "step": 52755 }, { "epoch": 3.5847261856230466, "grad_norm": 3.2977941036224365, "learning_rate": 5.5206210082891695e-05, "loss": 2.8019, "step": 52760 }, { "epoch": 3.5850659056937086, "grad_norm": 3.3002171516418457, "learning_rate": 5.520196358200843e-05, "loss": 2.5951, "step": 52765 }, { "epoch": 3.5854056257643703, "grad_norm": 2.5679335594177246, "learning_rate": 5.519771708112516e-05, "loss": 2.7366, "step": 52770 }, { "epoch": 3.585745345835032, "grad_norm": 2.8284642696380615, "learning_rate": 5.519347058024188e-05, "loss": 2.631, "step": 52775 }, { "epoch": 3.586085065905694, "grad_norm": 2.02429461479187, "learning_rate": 5.5189224079358614e-05, "loss": 2.7478, "step": 52780 }, { "epoch": 3.5864247859763556, "grad_norm": 3.0977795124053955, "learning_rate": 5.5184977578475335e-05, "loss": 2.3908, "step": 52785 }, { "epoch": 3.5867645060470172, "grad_norm": 2.621243953704834, "learning_rate": 5.518073107759206e-05, "loss": 2.8546, "step": 52790 }, { "epoch": 3.587104226117679, "grad_norm": 2.6012892723083496, "learning_rate": 5.51764845767088e-05, "loss": 2.9267, "step": 52795 }, { "epoch": 3.587443946188341, "grad_norm": 2.401249885559082, "learning_rate": 5.517223807582552e-05, "loss": 2.8662, "step": 52800 }, { "epoch": 3.5877836662590026, "grad_norm": 2.8981668949127197, "learning_rate": 5.516799157494225e-05, "loss": 2.7777, "step": 52805 }, { "epoch": 3.588123386329664, "grad_norm": 2.888399362564087, "learning_rate": 5.516374507405898e-05, "loss": 3.132, "step": 52810 }, { "epoch": 3.5884631064003263, "grad_norm": 3.0416738986968994, "learning_rate": 5.5159498573175703e-05, "loss": 2.7793, "step": 52815 }, { "epoch": 3.588802826470988, "grad_norm": 3.045759916305542, "learning_rate": 5.515525207229243e-05, "loss": 2.7329, "step": 52820 }, { "epoch": 3.5891425465416495, "grad_norm": 3.1122212409973145, "learning_rate": 5.5151005571409166e-05, "loss": 2.6677, "step": 52825 }, { "epoch": 3.589482266612311, "grad_norm": 3.1478350162506104, "learning_rate": 5.514675907052589e-05, "loss": 2.8631, "step": 52830 }, { "epoch": 3.5898219866829733, "grad_norm": 3.3304598331451416, "learning_rate": 5.5142512569642615e-05, "loss": 2.6015, "step": 52835 }, { "epoch": 3.590161706753635, "grad_norm": 2.7094743251800537, "learning_rate": 5.513826606875935e-05, "loss": 2.8091, "step": 52840 }, { "epoch": 3.5905014268242965, "grad_norm": 3.2078845500946045, "learning_rate": 5.513401956787607e-05, "loss": 2.7099, "step": 52845 }, { "epoch": 3.5908411468949586, "grad_norm": 3.08577036857605, "learning_rate": 5.512977306699279e-05, "loss": 2.7335, "step": 52850 }, { "epoch": 3.59118086696562, "grad_norm": 3.58479905128479, "learning_rate": 5.5125526566109534e-05, "loss": 2.6731, "step": 52855 }, { "epoch": 3.591520587036282, "grad_norm": 2.345681667327881, "learning_rate": 5.5121280065226255e-05, "loss": 2.5239, "step": 52860 }, { "epoch": 3.591860307106944, "grad_norm": 3.0536723136901855, "learning_rate": 5.511703356434298e-05, "loss": 2.5415, "step": 52865 }, { "epoch": 3.5922000271776056, "grad_norm": 3.7524733543395996, "learning_rate": 5.511278706345971e-05, "loss": 2.6988, "step": 52870 }, { "epoch": 3.592539747248267, "grad_norm": 2.622236967086792, "learning_rate": 5.510854056257644e-05, "loss": 2.4092, "step": 52875 }, { "epoch": 3.5928794673189293, "grad_norm": 2.7898497581481934, "learning_rate": 5.510429406169316e-05, "loss": 2.7619, "step": 52880 }, { "epoch": 3.593219187389591, "grad_norm": 2.949436664581299, "learning_rate": 5.5100047560809896e-05, "loss": 2.6948, "step": 52885 }, { "epoch": 3.5935589074602525, "grad_norm": 2.31866717338562, "learning_rate": 5.5095801059926624e-05, "loss": 2.7283, "step": 52890 }, { "epoch": 3.5938986275309146, "grad_norm": 2.3303449153900146, "learning_rate": 5.5091554559043345e-05, "loss": 2.6912, "step": 52895 }, { "epoch": 3.5942383476015762, "grad_norm": 2.575329542160034, "learning_rate": 5.508730805816008e-05, "loss": 2.5701, "step": 52900 }, { "epoch": 3.594578067672238, "grad_norm": 2.884255886077881, "learning_rate": 5.508306155727681e-05, "loss": 2.5436, "step": 52905 }, { "epoch": 3.5949177877429, "grad_norm": 3.1830739974975586, "learning_rate": 5.507881505639353e-05, "loss": 2.6526, "step": 52910 }, { "epoch": 3.5952575078135616, "grad_norm": 3.1192550659179688, "learning_rate": 5.5074568555510264e-05, "loss": 2.8389, "step": 52915 }, { "epoch": 3.595597227884223, "grad_norm": 2.5397868156433105, "learning_rate": 5.507032205462699e-05, "loss": 2.7231, "step": 52920 }, { "epoch": 3.5959369479548853, "grad_norm": 2.4447169303894043, "learning_rate": 5.506607555374371e-05, "loss": 2.2999, "step": 52925 }, { "epoch": 3.596276668025547, "grad_norm": 4.100081920623779, "learning_rate": 5.506182905286045e-05, "loss": 2.672, "step": 52930 }, { "epoch": 3.5966163880962085, "grad_norm": 2.262406826019287, "learning_rate": 5.505758255197717e-05, "loss": 2.6608, "step": 52935 }, { "epoch": 3.5969561081668706, "grad_norm": 3.0814085006713867, "learning_rate": 5.5053336051093904e-05, "loss": 2.6303, "step": 52940 }, { "epoch": 3.5972958282375322, "grad_norm": 3.2071785926818848, "learning_rate": 5.504908955021063e-05, "loss": 2.5142, "step": 52945 }, { "epoch": 3.597635548308194, "grad_norm": 2.8602304458618164, "learning_rate": 5.504484304932735e-05, "loss": 2.7107, "step": 52950 }, { "epoch": 3.597975268378856, "grad_norm": 3.255892276763916, "learning_rate": 5.504059654844409e-05, "loss": 2.6556, "step": 52955 }, { "epoch": 3.5983149884495176, "grad_norm": 2.8614649772644043, "learning_rate": 5.5036350047560816e-05, "loss": 2.5393, "step": 52960 }, { "epoch": 3.598654708520179, "grad_norm": 2.670213222503662, "learning_rate": 5.503210354667754e-05, "loss": 2.696, "step": 52965 }, { "epoch": 3.5989944285908413, "grad_norm": 2.7876412868499756, "learning_rate": 5.502785704579427e-05, "loss": 2.6874, "step": 52970 }, { "epoch": 3.599334148661503, "grad_norm": 2.3092386722564697, "learning_rate": 5.5023610544911e-05, "loss": 2.7457, "step": 52975 }, { "epoch": 3.5996738687321646, "grad_norm": 2.8101553916931152, "learning_rate": 5.501936404402772e-05, "loss": 2.5901, "step": 52980 }, { "epoch": 3.6000135888028266, "grad_norm": 3.028923273086548, "learning_rate": 5.5015117543144456e-05, "loss": 2.3318, "step": 52985 }, { "epoch": 3.6003533088734883, "grad_norm": 2.9495420455932617, "learning_rate": 5.5010871042261184e-05, "loss": 2.5094, "step": 52990 }, { "epoch": 3.60069302894415, "grad_norm": 2.878655195236206, "learning_rate": 5.5006624541377905e-05, "loss": 2.9436, "step": 52995 }, { "epoch": 3.601032749014812, "grad_norm": 2.527127742767334, "learning_rate": 5.500237804049464e-05, "loss": 2.6019, "step": 53000 }, { "epoch": 3.6013724690854736, "grad_norm": 2.293062210083008, "learning_rate": 5.499813153961136e-05, "loss": 2.6994, "step": 53005 }, { "epoch": 3.6017121891561352, "grad_norm": 3.6823818683624268, "learning_rate": 5.499388503872809e-05, "loss": 2.5676, "step": 53010 }, { "epoch": 3.6020519092267973, "grad_norm": 2.6705315113067627, "learning_rate": 5.4989638537844824e-05, "loss": 2.578, "step": 53015 }, { "epoch": 3.602391629297459, "grad_norm": 2.685978412628174, "learning_rate": 5.4985392036961545e-05, "loss": 2.5083, "step": 53020 }, { "epoch": 3.6027313493681206, "grad_norm": 3.000995397567749, "learning_rate": 5.498114553607827e-05, "loss": 2.4466, "step": 53025 }, { "epoch": 3.6030710694387826, "grad_norm": 3.506591796875, "learning_rate": 5.497689903519501e-05, "loss": 2.5981, "step": 53030 }, { "epoch": 3.6034107895094443, "grad_norm": 2.787928342819214, "learning_rate": 5.497265253431173e-05, "loss": 2.6483, "step": 53035 }, { "epoch": 3.603750509580106, "grad_norm": 3.134399652481079, "learning_rate": 5.496840603342846e-05, "loss": 2.6016, "step": 53040 }, { "epoch": 3.604090229650768, "grad_norm": 3.5533246994018555, "learning_rate": 5.496415953254519e-05, "loss": 2.5582, "step": 53045 }, { "epoch": 3.6044299497214296, "grad_norm": 3.1755716800689697, "learning_rate": 5.495991303166191e-05, "loss": 2.5149, "step": 53050 }, { "epoch": 3.6047696697920912, "grad_norm": 2.3805689811706543, "learning_rate": 5.495566653077864e-05, "loss": 2.8571, "step": 53055 }, { "epoch": 3.6051093898627533, "grad_norm": 2.7575631141662598, "learning_rate": 5.4951420029895376e-05, "loss": 2.6951, "step": 53060 }, { "epoch": 3.605449109933415, "grad_norm": 3.0349316596984863, "learning_rate": 5.49471735290121e-05, "loss": 2.6491, "step": 53065 }, { "epoch": 3.6057888300040766, "grad_norm": 2.2695538997650146, "learning_rate": 5.494292702812882e-05, "loss": 2.7194, "step": 53070 }, { "epoch": 3.6061285500747386, "grad_norm": 3.5160157680511475, "learning_rate": 5.493868052724555e-05, "loss": 2.5368, "step": 53075 }, { "epoch": 3.6064682701454003, "grad_norm": 3.0349624156951904, "learning_rate": 5.493443402636228e-05, "loss": 2.5951, "step": 53080 }, { "epoch": 3.606807990216062, "grad_norm": 2.3521804809570312, "learning_rate": 5.4930187525479e-05, "loss": 2.3653, "step": 53085 }, { "epoch": 3.607147710286724, "grad_norm": 2.740743398666382, "learning_rate": 5.492594102459574e-05, "loss": 2.4884, "step": 53090 }, { "epoch": 3.6074874303573856, "grad_norm": 2.4644622802734375, "learning_rate": 5.4921694523712465e-05, "loss": 2.4439, "step": 53095 }, { "epoch": 3.6078271504280472, "grad_norm": 2.587235689163208, "learning_rate": 5.4917448022829186e-05, "loss": 2.7281, "step": 53100 }, { "epoch": 3.6081668704987093, "grad_norm": 2.408263683319092, "learning_rate": 5.491320152194592e-05, "loss": 2.7712, "step": 53105 }, { "epoch": 3.608506590569371, "grad_norm": 2.8748698234558105, "learning_rate": 5.490895502106265e-05, "loss": 2.4224, "step": 53110 }, { "epoch": 3.6088463106400326, "grad_norm": 2.385756015777588, "learning_rate": 5.490470852017937e-05, "loss": 2.7882, "step": 53115 }, { "epoch": 3.6091860307106947, "grad_norm": 3.018146514892578, "learning_rate": 5.4900462019296105e-05, "loss": 2.6565, "step": 53120 }, { "epoch": 3.6095257507813563, "grad_norm": 3.291782855987549, "learning_rate": 5.489621551841283e-05, "loss": 2.7429, "step": 53125 }, { "epoch": 3.609865470852018, "grad_norm": 2.723959445953369, "learning_rate": 5.4891969017529554e-05, "loss": 2.5847, "step": 53130 }, { "epoch": 3.6102051909226796, "grad_norm": 2.9151530265808105, "learning_rate": 5.488772251664629e-05, "loss": 2.9346, "step": 53135 }, { "epoch": 3.6105449109933416, "grad_norm": 3.0353589057922363, "learning_rate": 5.488347601576301e-05, "loss": 2.5455, "step": 53140 }, { "epoch": 3.6108846310640033, "grad_norm": 3.5040016174316406, "learning_rate": 5.487922951487974e-05, "loss": 2.5746, "step": 53145 }, { "epoch": 3.611224351134665, "grad_norm": 3.093935966491699, "learning_rate": 5.487498301399647e-05, "loss": 2.8601, "step": 53150 }, { "epoch": 3.611564071205327, "grad_norm": 2.5085012912750244, "learning_rate": 5.4870736513113194e-05, "loss": 2.6071, "step": 53155 }, { "epoch": 3.6119037912759886, "grad_norm": 3.6084539890289307, "learning_rate": 5.486649001222992e-05, "loss": 2.5483, "step": 53160 }, { "epoch": 3.6122435113466502, "grad_norm": 2.8857126235961914, "learning_rate": 5.486224351134666e-05, "loss": 2.8749, "step": 53165 }, { "epoch": 3.612583231417312, "grad_norm": 2.399291753768921, "learning_rate": 5.485799701046338e-05, "loss": 2.5194, "step": 53170 }, { "epoch": 3.612922951487974, "grad_norm": 3.180344343185425, "learning_rate": 5.4853750509580106e-05, "loss": 2.4061, "step": 53175 }, { "epoch": 3.6132626715586356, "grad_norm": 3.086031675338745, "learning_rate": 5.484950400869684e-05, "loss": 2.7398, "step": 53180 }, { "epoch": 3.613602391629297, "grad_norm": 3.445481300354004, "learning_rate": 5.484525750781356e-05, "loss": 2.6904, "step": 53185 }, { "epoch": 3.6139421116999593, "grad_norm": 3.5798773765563965, "learning_rate": 5.484101100693029e-05, "loss": 2.7346, "step": 53190 }, { "epoch": 3.614281831770621, "grad_norm": 3.126305341720581, "learning_rate": 5.4836764506047025e-05, "loss": 2.6878, "step": 53195 }, { "epoch": 3.6146215518412825, "grad_norm": 3.8366825580596924, "learning_rate": 5.4832518005163746e-05, "loss": 2.6277, "step": 53200 }, { "epoch": 3.6149612719119446, "grad_norm": 3.3250904083251953, "learning_rate": 5.482827150428047e-05, "loss": 2.5304, "step": 53205 }, { "epoch": 3.6153009919826062, "grad_norm": 2.88128924369812, "learning_rate": 5.48240250033972e-05, "loss": 2.8218, "step": 53210 }, { "epoch": 3.615640712053268, "grad_norm": 3.8187129497528076, "learning_rate": 5.481977850251393e-05, "loss": 2.8233, "step": 53215 }, { "epoch": 3.61598043212393, "grad_norm": 2.420917510986328, "learning_rate": 5.481553200163065e-05, "loss": 2.6587, "step": 53220 }, { "epoch": 3.6163201521945916, "grad_norm": 2.7580041885375977, "learning_rate": 5.4811285500747386e-05, "loss": 2.4665, "step": 53225 }, { "epoch": 3.616659872265253, "grad_norm": 2.8408052921295166, "learning_rate": 5.4807038999864114e-05, "loss": 2.6703, "step": 53230 }, { "epoch": 3.6169995923359153, "grad_norm": 2.820956230163574, "learning_rate": 5.4802792498980836e-05, "loss": 2.7173, "step": 53235 }, { "epoch": 3.617339312406577, "grad_norm": 3.618184804916382, "learning_rate": 5.479854599809757e-05, "loss": 2.7723, "step": 53240 }, { "epoch": 3.6176790324772385, "grad_norm": 3.3565938472747803, "learning_rate": 5.47942994972143e-05, "loss": 2.5099, "step": 53245 }, { "epoch": 3.6180187525479006, "grad_norm": 2.8368425369262695, "learning_rate": 5.479005299633102e-05, "loss": 2.6956, "step": 53250 }, { "epoch": 3.6183584726185622, "grad_norm": 3.2090811729431152, "learning_rate": 5.4785806495447755e-05, "loss": 2.6166, "step": 53255 }, { "epoch": 3.618698192689224, "grad_norm": 2.7484230995178223, "learning_rate": 5.478155999456448e-05, "loss": 2.4952, "step": 53260 }, { "epoch": 3.619037912759886, "grad_norm": 3.370145320892334, "learning_rate": 5.4777313493681204e-05, "loss": 2.6519, "step": 53265 }, { "epoch": 3.6193776328305476, "grad_norm": 3.727430820465088, "learning_rate": 5.477306699279794e-05, "loss": 2.524, "step": 53270 }, { "epoch": 3.619717352901209, "grad_norm": 2.4824416637420654, "learning_rate": 5.476882049191466e-05, "loss": 2.5857, "step": 53275 }, { "epoch": 3.6200570729718713, "grad_norm": 3.2881078720092773, "learning_rate": 5.47645739910314e-05, "loss": 2.5965, "step": 53280 }, { "epoch": 3.620396793042533, "grad_norm": 3.1078624725341797, "learning_rate": 5.476032749014812e-05, "loss": 2.5652, "step": 53285 }, { "epoch": 3.6207365131131946, "grad_norm": 2.64428448677063, "learning_rate": 5.4756080989264844e-05, "loss": 2.64, "step": 53290 }, { "epoch": 3.6210762331838566, "grad_norm": 3.088085889816284, "learning_rate": 5.475183448838158e-05, "loss": 2.6032, "step": 53295 }, { "epoch": 3.6214159532545183, "grad_norm": 3.1999409198760986, "learning_rate": 5.4747587987498307e-05, "loss": 2.7245, "step": 53300 }, { "epoch": 3.62175567332518, "grad_norm": 2.7167162895202637, "learning_rate": 5.474334148661503e-05, "loss": 2.7836, "step": 53305 }, { "epoch": 3.622095393395842, "grad_norm": 2.90730881690979, "learning_rate": 5.473909498573176e-05, "loss": 2.7729, "step": 53310 }, { "epoch": 3.6224351134665036, "grad_norm": 2.402665853500366, "learning_rate": 5.473484848484849e-05, "loss": 2.594, "step": 53315 }, { "epoch": 3.6227748335371652, "grad_norm": 3.7584025859832764, "learning_rate": 5.473060198396521e-05, "loss": 2.3594, "step": 53320 }, { "epoch": 3.6231145536078273, "grad_norm": 2.530648708343506, "learning_rate": 5.472635548308195e-05, "loss": 2.6209, "step": 53325 }, { "epoch": 3.623454273678489, "grad_norm": 3.0844333171844482, "learning_rate": 5.4722108982198675e-05, "loss": 2.6797, "step": 53330 }, { "epoch": 3.6237939937491506, "grad_norm": 2.5358188152313232, "learning_rate": 5.4717862481315396e-05, "loss": 2.6514, "step": 53335 }, { "epoch": 3.6241337138198126, "grad_norm": 3.0186991691589355, "learning_rate": 5.471361598043213e-05, "loss": 2.7466, "step": 53340 }, { "epoch": 3.6244734338904743, "grad_norm": 3.7789254188537598, "learning_rate": 5.470936947954886e-05, "loss": 2.5726, "step": 53345 }, { "epoch": 3.624813153961136, "grad_norm": 2.943664073944092, "learning_rate": 5.470512297866558e-05, "loss": 2.4535, "step": 53350 }, { "epoch": 3.625152874031798, "grad_norm": 2.354891538619995, "learning_rate": 5.4700876477782315e-05, "loss": 2.5381, "step": 53355 }, { "epoch": 3.6254925941024596, "grad_norm": 3.465477466583252, "learning_rate": 5.4696629976899036e-05, "loss": 2.8222, "step": 53360 }, { "epoch": 3.6258323141731212, "grad_norm": 2.3109538555145264, "learning_rate": 5.4692383476015764e-05, "loss": 2.6006, "step": 53365 }, { "epoch": 3.6261720342437833, "grad_norm": 2.060910701751709, "learning_rate": 5.46881369751325e-05, "loss": 2.7886, "step": 53370 }, { "epoch": 3.626511754314445, "grad_norm": 2.495556592941284, "learning_rate": 5.468389047424922e-05, "loss": 2.4788, "step": 53375 }, { "epoch": 3.6268514743851066, "grad_norm": 2.8116753101348877, "learning_rate": 5.467964397336595e-05, "loss": 2.8679, "step": 53380 }, { "epoch": 3.6271911944557687, "grad_norm": 2.646369695663452, "learning_rate": 5.467539747248268e-05, "loss": 2.6782, "step": 53385 }, { "epoch": 3.6275309145264303, "grad_norm": 2.9455509185791016, "learning_rate": 5.4671150971599404e-05, "loss": 2.5309, "step": 53390 }, { "epoch": 3.627870634597092, "grad_norm": 3.091667890548706, "learning_rate": 5.466690447071613e-05, "loss": 2.4873, "step": 53395 }, { "epoch": 3.628210354667754, "grad_norm": 3.017653703689575, "learning_rate": 5.466265796983287e-05, "loss": 2.4942, "step": 53400 }, { "epoch": 3.6285500747384156, "grad_norm": 3.087207317352295, "learning_rate": 5.465841146894959e-05, "loss": 2.6546, "step": 53405 }, { "epoch": 3.6288897948090773, "grad_norm": 3.1591267585754395, "learning_rate": 5.465416496806631e-05, "loss": 2.8042, "step": 53410 }, { "epoch": 3.6292295148797393, "grad_norm": 3.3653972148895264, "learning_rate": 5.464991846718305e-05, "loss": 2.4652, "step": 53415 }, { "epoch": 3.629569234950401, "grad_norm": 2.327711582183838, "learning_rate": 5.464567196629977e-05, "loss": 2.5292, "step": 53420 }, { "epoch": 3.6299089550210626, "grad_norm": 2.6656930446624756, "learning_rate": 5.464142546541649e-05, "loss": 2.7769, "step": 53425 }, { "epoch": 3.6302486750917247, "grad_norm": 4.747982501983643, "learning_rate": 5.463717896453323e-05, "loss": 2.6345, "step": 53430 }, { "epoch": 3.6305883951623863, "grad_norm": 3.394649028778076, "learning_rate": 5.4632932463649956e-05, "loss": 2.7924, "step": 53435 }, { "epoch": 3.630928115233048, "grad_norm": 2.9275999069213867, "learning_rate": 5.462868596276668e-05, "loss": 2.6581, "step": 53440 }, { "epoch": 3.63126783530371, "grad_norm": 2.868699073791504, "learning_rate": 5.462443946188341e-05, "loss": 2.5593, "step": 53445 }, { "epoch": 3.6316075553743716, "grad_norm": 2.597003698348999, "learning_rate": 5.462019296100014e-05, "loss": 2.5915, "step": 53450 }, { "epoch": 3.6319472754450333, "grad_norm": 2.4369924068450928, "learning_rate": 5.461594646011686e-05, "loss": 2.8242, "step": 53455 }, { "epoch": 3.6322869955156953, "grad_norm": 3.5231173038482666, "learning_rate": 5.4611699959233596e-05, "loss": 2.4, "step": 53460 }, { "epoch": 3.632626715586357, "grad_norm": 2.668994903564453, "learning_rate": 5.4607453458350324e-05, "loss": 2.7237, "step": 53465 }, { "epoch": 3.6329664356570186, "grad_norm": 2.3501148223876953, "learning_rate": 5.4603206957467045e-05, "loss": 2.6112, "step": 53470 }, { "epoch": 3.6333061557276802, "grad_norm": 3.1929283142089844, "learning_rate": 5.459896045658378e-05, "loss": 2.8226, "step": 53475 }, { "epoch": 3.6336458757983423, "grad_norm": 3.6395983695983887, "learning_rate": 5.459471395570051e-05, "loss": 2.5196, "step": 53480 }, { "epoch": 3.633985595869004, "grad_norm": 3.161607503890991, "learning_rate": 5.459046745481723e-05, "loss": 2.4493, "step": 53485 }, { "epoch": 3.6343253159396656, "grad_norm": 3.586505174636841, "learning_rate": 5.4586220953933964e-05, "loss": 2.6826, "step": 53490 }, { "epoch": 3.6346650360103276, "grad_norm": 3.5871284008026123, "learning_rate": 5.4581974453050685e-05, "loss": 2.5601, "step": 53495 }, { "epoch": 3.6350047560809893, "grad_norm": 2.362823009490967, "learning_rate": 5.457772795216741e-05, "loss": 2.6973, "step": 53500 }, { "epoch": 3.635344476151651, "grad_norm": 3.0246388912200928, "learning_rate": 5.457348145128415e-05, "loss": 2.5211, "step": 53505 }, { "epoch": 3.635684196222313, "grad_norm": 2.363642692565918, "learning_rate": 5.456923495040087e-05, "loss": 2.771, "step": 53510 }, { "epoch": 3.6360239162929746, "grad_norm": 2.1949758529663086, "learning_rate": 5.45649884495176e-05, "loss": 2.5003, "step": 53515 }, { "epoch": 3.6363636363636362, "grad_norm": 2.644327163696289, "learning_rate": 5.456074194863433e-05, "loss": 2.8567, "step": 53520 }, { "epoch": 3.636703356434298, "grad_norm": 3.5075619220733643, "learning_rate": 5.4556495447751053e-05, "loss": 2.2926, "step": 53525 }, { "epoch": 3.63704307650496, "grad_norm": 3.019507646560669, "learning_rate": 5.455224894686778e-05, "loss": 2.9875, "step": 53530 }, { "epoch": 3.6373827965756216, "grad_norm": 2.413931369781494, "learning_rate": 5.4548002445984516e-05, "loss": 2.8579, "step": 53535 }, { "epoch": 3.637722516646283, "grad_norm": 3.4250686168670654, "learning_rate": 5.454375594510124e-05, "loss": 2.7384, "step": 53540 }, { "epoch": 3.6380622367169453, "grad_norm": 2.2936387062072754, "learning_rate": 5.4539509444217965e-05, "loss": 2.7715, "step": 53545 }, { "epoch": 3.638401956787607, "grad_norm": 3.105081558227539, "learning_rate": 5.45352629433347e-05, "loss": 2.7997, "step": 53550 }, { "epoch": 3.6387416768582685, "grad_norm": 2.745973825454712, "learning_rate": 5.453101644245142e-05, "loss": 2.9619, "step": 53555 }, { "epoch": 3.6390813969289306, "grad_norm": 3.6320221424102783, "learning_rate": 5.452676994156814e-05, "loss": 2.5561, "step": 53560 }, { "epoch": 3.6394211169995923, "grad_norm": 2.906780958175659, "learning_rate": 5.452252344068488e-05, "loss": 2.5451, "step": 53565 }, { "epoch": 3.639760837070254, "grad_norm": 2.874401092529297, "learning_rate": 5.4518276939801605e-05, "loss": 2.5956, "step": 53570 }, { "epoch": 3.640100557140916, "grad_norm": 3.4235966205596924, "learning_rate": 5.451403043891833e-05, "loss": 2.7613, "step": 53575 }, { "epoch": 3.6404402772115776, "grad_norm": 2.667903423309326, "learning_rate": 5.450978393803506e-05, "loss": 2.7561, "step": 53580 }, { "epoch": 3.6407799972822392, "grad_norm": 2.6428465843200684, "learning_rate": 5.450553743715179e-05, "loss": 2.4946, "step": 53585 }, { "epoch": 3.6411197173529013, "grad_norm": 2.9111132621765137, "learning_rate": 5.450129093626851e-05, "loss": 2.9216, "step": 53590 }, { "epoch": 3.641459437423563, "grad_norm": 3.340425968170166, "learning_rate": 5.4497044435385246e-05, "loss": 2.6076, "step": 53595 }, { "epoch": 3.6417991574942246, "grad_norm": 2.9887657165527344, "learning_rate": 5.4492797934501974e-05, "loss": 2.7143, "step": 53600 }, { "epoch": 3.6421388775648866, "grad_norm": 2.5334179401397705, "learning_rate": 5.4488551433618695e-05, "loss": 2.6357, "step": 53605 }, { "epoch": 3.6424785976355483, "grad_norm": 2.8419458866119385, "learning_rate": 5.448430493273543e-05, "loss": 2.6766, "step": 53610 }, { "epoch": 3.64281831770621, "grad_norm": 3.215965509414673, "learning_rate": 5.448005843185216e-05, "loss": 2.7904, "step": 53615 }, { "epoch": 3.643158037776872, "grad_norm": 2.563445568084717, "learning_rate": 5.447581193096889e-05, "loss": 2.5436, "step": 53620 }, { "epoch": 3.6434977578475336, "grad_norm": 2.7455697059631348, "learning_rate": 5.4471565430085614e-05, "loss": 2.8204, "step": 53625 }, { "epoch": 3.6438374779181952, "grad_norm": 2.9572675228118896, "learning_rate": 5.4467318929202335e-05, "loss": 2.9693, "step": 53630 }, { "epoch": 3.6441771979888573, "grad_norm": 2.860879898071289, "learning_rate": 5.4463072428319076e-05, "loss": 2.6074, "step": 53635 }, { "epoch": 3.644516918059519, "grad_norm": 3.2244226932525635, "learning_rate": 5.44588259274358e-05, "loss": 2.831, "step": 53640 }, { "epoch": 3.6448566381301806, "grad_norm": 3.0524468421936035, "learning_rate": 5.445457942655252e-05, "loss": 2.434, "step": 53645 }, { "epoch": 3.6451963582008426, "grad_norm": 2.688746452331543, "learning_rate": 5.4450332925669254e-05, "loss": 2.7182, "step": 53650 }, { "epoch": 3.6455360782715043, "grad_norm": 3.101473093032837, "learning_rate": 5.444608642478598e-05, "loss": 2.6349, "step": 53655 }, { "epoch": 3.645875798342166, "grad_norm": 3.3587584495544434, "learning_rate": 5.44418399239027e-05, "loss": 2.7295, "step": 53660 }, { "epoch": 3.646215518412828, "grad_norm": 2.2312471866607666, "learning_rate": 5.443759342301944e-05, "loss": 2.7469, "step": 53665 }, { "epoch": 3.6465552384834896, "grad_norm": 3.1812021732330322, "learning_rate": 5.4433346922136166e-05, "loss": 2.7479, "step": 53670 }, { "epoch": 3.6468949585541512, "grad_norm": 3.661242961883545, "learning_rate": 5.442910042125289e-05, "loss": 2.7042, "step": 53675 }, { "epoch": 3.6472346786248133, "grad_norm": 2.5407474040985107, "learning_rate": 5.442485392036962e-05, "loss": 2.4885, "step": 53680 }, { "epoch": 3.647574398695475, "grad_norm": 5.466484546661377, "learning_rate": 5.442060741948635e-05, "loss": 2.7886, "step": 53685 }, { "epoch": 3.6479141187661366, "grad_norm": 2.497042655944824, "learning_rate": 5.441636091860307e-05, "loss": 2.7509, "step": 53690 }, { "epoch": 3.6482538388367987, "grad_norm": 2.6774425506591797, "learning_rate": 5.4412114417719806e-05, "loss": 2.7979, "step": 53695 }, { "epoch": 3.6485935589074603, "grad_norm": 2.4519240856170654, "learning_rate": 5.440786791683653e-05, "loss": 2.7652, "step": 53700 }, { "epoch": 3.648933278978122, "grad_norm": 3.926471710205078, "learning_rate": 5.4403621415953255e-05, "loss": 2.6807, "step": 53705 }, { "epoch": 3.649272999048784, "grad_norm": 3.621401071548462, "learning_rate": 5.439937491506999e-05, "loss": 2.824, "step": 53710 }, { "epoch": 3.6496127191194456, "grad_norm": 2.3365907669067383, "learning_rate": 5.439512841418671e-05, "loss": 2.8734, "step": 53715 }, { "epoch": 3.6499524391901073, "grad_norm": 2.949477195739746, "learning_rate": 5.439088191330344e-05, "loss": 2.6288, "step": 53720 }, { "epoch": 3.6502921592607693, "grad_norm": 3.169401168823242, "learning_rate": 5.4386635412420174e-05, "loss": 2.4325, "step": 53725 }, { "epoch": 3.650631879331431, "grad_norm": 3.4743974208831787, "learning_rate": 5.4382388911536895e-05, "loss": 2.804, "step": 53730 }, { "epoch": 3.6509715994020926, "grad_norm": 2.994386911392212, "learning_rate": 5.437814241065362e-05, "loss": 2.5499, "step": 53735 }, { "epoch": 3.6513113194727547, "grad_norm": 3.1002421379089355, "learning_rate": 5.437389590977036e-05, "loss": 2.4364, "step": 53740 }, { "epoch": 3.6516510395434163, "grad_norm": 3.3297910690307617, "learning_rate": 5.436964940888708e-05, "loss": 2.3451, "step": 53745 }, { "epoch": 3.651990759614078, "grad_norm": 2.9923672676086426, "learning_rate": 5.436540290800381e-05, "loss": 2.5885, "step": 53750 }, { "epoch": 3.65233047968474, "grad_norm": 2.5717248916625977, "learning_rate": 5.436115640712054e-05, "loss": 2.7071, "step": 53755 }, { "epoch": 3.6526701997554016, "grad_norm": 2.824021577835083, "learning_rate": 5.435690990623726e-05, "loss": 2.4973, "step": 53760 }, { "epoch": 3.6530099198260633, "grad_norm": 3.002835750579834, "learning_rate": 5.4352663405353984e-05, "loss": 2.5663, "step": 53765 }, { "epoch": 3.6533496398967253, "grad_norm": 3.248764991760254, "learning_rate": 5.4348416904470726e-05, "loss": 2.4115, "step": 53770 }, { "epoch": 3.653689359967387, "grad_norm": 2.6518125534057617, "learning_rate": 5.434417040358745e-05, "loss": 2.8328, "step": 53775 }, { "epoch": 3.6540290800380486, "grad_norm": 3.1756067276000977, "learning_rate": 5.433992390270417e-05, "loss": 2.6183, "step": 53780 }, { "epoch": 3.6543688001087107, "grad_norm": 2.8120973110198975, "learning_rate": 5.43356774018209e-05, "loss": 2.8289, "step": 53785 }, { "epoch": 3.6547085201793723, "grad_norm": 3.445826768875122, "learning_rate": 5.433143090093763e-05, "loss": 2.6843, "step": 53790 }, { "epoch": 3.655048240250034, "grad_norm": 2.6505393981933594, "learning_rate": 5.432718440005435e-05, "loss": 2.7497, "step": 53795 }, { "epoch": 3.655387960320696, "grad_norm": 3.2695505619049072, "learning_rate": 5.432293789917109e-05, "loss": 2.5553, "step": 53800 }, { "epoch": 3.6557276803913576, "grad_norm": 3.2420737743377686, "learning_rate": 5.4318691398287815e-05, "loss": 2.7107, "step": 53805 }, { "epoch": 3.6560674004620193, "grad_norm": 2.770841121673584, "learning_rate": 5.4314444897404536e-05, "loss": 2.609, "step": 53810 }, { "epoch": 3.656407120532681, "grad_norm": 2.588703155517578, "learning_rate": 5.431019839652127e-05, "loss": 2.7954, "step": 53815 }, { "epoch": 3.656746840603343, "grad_norm": 2.792252540588379, "learning_rate": 5.4305951895638e-05, "loss": 2.7368, "step": 53820 }, { "epoch": 3.6570865606740046, "grad_norm": 3.447218179702759, "learning_rate": 5.430170539475472e-05, "loss": 2.5992, "step": 53825 }, { "epoch": 3.6574262807446662, "grad_norm": 2.773792266845703, "learning_rate": 5.4297458893871455e-05, "loss": 2.464, "step": 53830 }, { "epoch": 3.6577660008153283, "grad_norm": 3.440659999847412, "learning_rate": 5.429321239298818e-05, "loss": 2.2079, "step": 53835 }, { "epoch": 3.65810572088599, "grad_norm": 3.2299320697784424, "learning_rate": 5.4288965892104904e-05, "loss": 2.3684, "step": 53840 }, { "epoch": 3.6584454409566516, "grad_norm": 3.4173777103424072, "learning_rate": 5.428471939122164e-05, "loss": 2.7699, "step": 53845 }, { "epoch": 3.6587851610273137, "grad_norm": 2.689551830291748, "learning_rate": 5.428047289033836e-05, "loss": 2.7347, "step": 53850 }, { "epoch": 3.6591248810979753, "grad_norm": 2.921049118041992, "learning_rate": 5.427622638945509e-05, "loss": 2.5967, "step": 53855 }, { "epoch": 3.659464601168637, "grad_norm": 2.842941999435425, "learning_rate": 5.427197988857182e-05, "loss": 2.6814, "step": 53860 }, { "epoch": 3.6598043212392986, "grad_norm": 3.1112968921661377, "learning_rate": 5.4267733387688544e-05, "loss": 2.4267, "step": 53865 }, { "epoch": 3.6601440413099606, "grad_norm": 2.706005334854126, "learning_rate": 5.426348688680527e-05, "loss": 2.6366, "step": 53870 }, { "epoch": 3.6604837613806223, "grad_norm": 2.3966739177703857, "learning_rate": 5.425924038592201e-05, "loss": 2.8412, "step": 53875 }, { "epoch": 3.660823481451284, "grad_norm": 2.2472984790802, "learning_rate": 5.425499388503873e-05, "loss": 2.5592, "step": 53880 }, { "epoch": 3.661163201521946, "grad_norm": 3.0651931762695312, "learning_rate": 5.4250747384155456e-05, "loss": 2.2762, "step": 53885 }, { "epoch": 3.6615029215926076, "grad_norm": 2.3256139755249023, "learning_rate": 5.424650088327219e-05, "loss": 2.5979, "step": 53890 }, { "epoch": 3.6618426416632692, "grad_norm": 3.2557623386383057, "learning_rate": 5.424225438238891e-05, "loss": 2.8204, "step": 53895 }, { "epoch": 3.6621823617339313, "grad_norm": 2.86956524848938, "learning_rate": 5.4238007881505634e-05, "loss": 2.6548, "step": 53900 }, { "epoch": 3.662522081804593, "grad_norm": 2.6367383003234863, "learning_rate": 5.4233761380622375e-05, "loss": 2.779, "step": 53905 }, { "epoch": 3.6628618018752546, "grad_norm": 2.8077504634857178, "learning_rate": 5.4229514879739096e-05, "loss": 2.7548, "step": 53910 }, { "epoch": 3.6632015219459166, "grad_norm": 3.245084285736084, "learning_rate": 5.422526837885582e-05, "loss": 2.503, "step": 53915 }, { "epoch": 3.6635412420165783, "grad_norm": 2.707523822784424, "learning_rate": 5.422102187797255e-05, "loss": 2.7878, "step": 53920 }, { "epoch": 3.66388096208724, "grad_norm": 2.7137181758880615, "learning_rate": 5.421677537708928e-05, "loss": 2.7273, "step": 53925 }, { "epoch": 3.664220682157902, "grad_norm": 2.8677279949188232, "learning_rate": 5.4212528876206e-05, "loss": 2.951, "step": 53930 }, { "epoch": 3.6645604022285636, "grad_norm": 3.0276834964752197, "learning_rate": 5.4208282375322736e-05, "loss": 2.6345, "step": 53935 }, { "epoch": 3.6649001222992252, "grad_norm": 2.709641695022583, "learning_rate": 5.4204035874439464e-05, "loss": 2.6646, "step": 53940 }, { "epoch": 3.6652398423698873, "grad_norm": 2.933922290802002, "learning_rate": 5.4199789373556186e-05, "loss": 2.4785, "step": 53945 }, { "epoch": 3.665579562440549, "grad_norm": 3.3044514656066895, "learning_rate": 5.419554287267292e-05, "loss": 2.5819, "step": 53950 }, { "epoch": 3.6659192825112106, "grad_norm": 3.532085418701172, "learning_rate": 5.419129637178965e-05, "loss": 2.8758, "step": 53955 }, { "epoch": 3.6662590025818727, "grad_norm": 2.8017191886901855, "learning_rate": 5.418704987090638e-05, "loss": 2.6372, "step": 53960 }, { "epoch": 3.6665987226525343, "grad_norm": 2.676215410232544, "learning_rate": 5.4182803370023105e-05, "loss": 2.7862, "step": 53965 }, { "epoch": 3.666938442723196, "grad_norm": 3.349027395248413, "learning_rate": 5.417855686913983e-05, "loss": 2.7617, "step": 53970 }, { "epoch": 3.667278162793858, "grad_norm": 2.694199323654175, "learning_rate": 5.417431036825657e-05, "loss": 2.9753, "step": 53975 }, { "epoch": 3.6676178828645196, "grad_norm": 2.323106527328491, "learning_rate": 5.417006386737329e-05, "loss": 2.699, "step": 53980 }, { "epoch": 3.6679576029351812, "grad_norm": 3.121896266937256, "learning_rate": 5.416581736649001e-05, "loss": 2.7063, "step": 53985 }, { "epoch": 3.6682973230058433, "grad_norm": 2.794593095779419, "learning_rate": 5.4161570865606745e-05, "loss": 2.4597, "step": 53990 }, { "epoch": 3.668637043076505, "grad_norm": 3.0989644527435303, "learning_rate": 5.415732436472347e-05, "loss": 2.712, "step": 53995 }, { "epoch": 3.6689767631471666, "grad_norm": 3.509622812271118, "learning_rate": 5.4153077863840194e-05, "loss": 2.8406, "step": 54000 }, { "epoch": 3.6693164832178287, "grad_norm": 2.6929521560668945, "learning_rate": 5.414883136295693e-05, "loss": 2.7193, "step": 54005 }, { "epoch": 3.6696562032884903, "grad_norm": 2.228806495666504, "learning_rate": 5.4144584862073657e-05, "loss": 2.635, "step": 54010 }, { "epoch": 3.669995923359152, "grad_norm": 2.503035306930542, "learning_rate": 5.414033836119038e-05, "loss": 2.7057, "step": 54015 }, { "epoch": 3.670335643429814, "grad_norm": 2.6751644611358643, "learning_rate": 5.413609186030711e-05, "loss": 2.7118, "step": 54020 }, { "epoch": 3.6706753635004756, "grad_norm": 3.05403733253479, "learning_rate": 5.413184535942384e-05, "loss": 2.7818, "step": 54025 }, { "epoch": 3.6710150835711373, "grad_norm": 2.67421817779541, "learning_rate": 5.412759885854056e-05, "loss": 2.7236, "step": 54030 }, { "epoch": 3.6713548036417993, "grad_norm": 2.1426706314086914, "learning_rate": 5.41233523576573e-05, "loss": 2.7312, "step": 54035 }, { "epoch": 3.671694523712461, "grad_norm": 2.4000611305236816, "learning_rate": 5.4119105856774025e-05, "loss": 2.8692, "step": 54040 }, { "epoch": 3.6720342437831226, "grad_norm": 3.026993751525879, "learning_rate": 5.4114859355890746e-05, "loss": 2.7194, "step": 54045 }, { "epoch": 3.6723739638537847, "grad_norm": 2.504063844680786, "learning_rate": 5.411061285500748e-05, "loss": 2.7702, "step": 54050 }, { "epoch": 3.6727136839244463, "grad_norm": 2.837562084197998, "learning_rate": 5.41063663541242e-05, "loss": 2.84, "step": 54055 }, { "epoch": 3.673053403995108, "grad_norm": 2.5751888751983643, "learning_rate": 5.410211985324093e-05, "loss": 2.8702, "step": 54060 }, { "epoch": 3.67339312406577, "grad_norm": 2.640831232070923, "learning_rate": 5.4097873352357665e-05, "loss": 2.6573, "step": 54065 }, { "epoch": 3.6737328441364316, "grad_norm": 3.1222116947174072, "learning_rate": 5.4093626851474386e-05, "loss": 2.5962, "step": 54070 }, { "epoch": 3.6740725642070933, "grad_norm": 2.655935287475586, "learning_rate": 5.4089380350591114e-05, "loss": 2.9298, "step": 54075 }, { "epoch": 3.6744122842777553, "grad_norm": 2.86087703704834, "learning_rate": 5.408513384970785e-05, "loss": 2.8637, "step": 54080 }, { "epoch": 3.674752004348417, "grad_norm": 3.196826934814453, "learning_rate": 5.408088734882457e-05, "loss": 2.8632, "step": 54085 }, { "epoch": 3.6750917244190786, "grad_norm": 2.3622794151306152, "learning_rate": 5.40766408479413e-05, "loss": 2.7453, "step": 54090 }, { "epoch": 3.6754314444897407, "grad_norm": 2.715672016143799, "learning_rate": 5.407239434705803e-05, "loss": 2.3074, "step": 54095 }, { "epoch": 3.6757711645604023, "grad_norm": 2.8099467754364014, "learning_rate": 5.4068147846174754e-05, "loss": 2.7583, "step": 54100 }, { "epoch": 3.676110884631064, "grad_norm": 3.058915615081787, "learning_rate": 5.406390134529148e-05, "loss": 2.645, "step": 54105 }, { "epoch": 3.676450604701726, "grad_norm": 3.0849013328552246, "learning_rate": 5.405965484440822e-05, "loss": 2.6217, "step": 54110 }, { "epoch": 3.6767903247723877, "grad_norm": 2.7013955116271973, "learning_rate": 5.405540834352494e-05, "loss": 2.7517, "step": 54115 }, { "epoch": 3.6771300448430493, "grad_norm": 2.872748374938965, "learning_rate": 5.405116184264166e-05, "loss": 2.688, "step": 54120 }, { "epoch": 3.6774697649137114, "grad_norm": 3.1026604175567627, "learning_rate": 5.4046915341758394e-05, "loss": 2.7108, "step": 54125 }, { "epoch": 3.677809484984373, "grad_norm": 2.8618011474609375, "learning_rate": 5.404266884087512e-05, "loss": 2.5599, "step": 54130 }, { "epoch": 3.6781492050550346, "grad_norm": 3.2002198696136475, "learning_rate": 5.403842233999184e-05, "loss": 2.8841, "step": 54135 }, { "epoch": 3.6784889251256967, "grad_norm": 2.9184951782226562, "learning_rate": 5.403417583910858e-05, "loss": 2.8488, "step": 54140 }, { "epoch": 3.6788286451963583, "grad_norm": 3.065308094024658, "learning_rate": 5.4029929338225306e-05, "loss": 2.6575, "step": 54145 }, { "epoch": 3.67916836526702, "grad_norm": 2.567681074142456, "learning_rate": 5.402568283734203e-05, "loss": 2.6841, "step": 54150 }, { "epoch": 3.6795080853376816, "grad_norm": 3.055697441101074, "learning_rate": 5.402143633645876e-05, "loss": 2.7134, "step": 54155 }, { "epoch": 3.6798478054083437, "grad_norm": 2.4930224418640137, "learning_rate": 5.401718983557549e-05, "loss": 2.7114, "step": 54160 }, { "epoch": 3.6801875254790053, "grad_norm": 3.266420602798462, "learning_rate": 5.401294333469221e-05, "loss": 2.8332, "step": 54165 }, { "epoch": 3.680527245549667, "grad_norm": 3.3956475257873535, "learning_rate": 5.4008696833808946e-05, "loss": 2.3182, "step": 54170 }, { "epoch": 3.680866965620329, "grad_norm": 2.9332144260406494, "learning_rate": 5.4004450332925674e-05, "loss": 3.0763, "step": 54175 }, { "epoch": 3.6812066856909906, "grad_norm": 2.4412155151367188, "learning_rate": 5.4000203832042395e-05, "loss": 2.6418, "step": 54180 }, { "epoch": 3.6815464057616523, "grad_norm": 2.8408901691436768, "learning_rate": 5.399595733115913e-05, "loss": 2.6567, "step": 54185 }, { "epoch": 3.6818861258323143, "grad_norm": 2.532942295074463, "learning_rate": 5.399171083027585e-05, "loss": 2.6932, "step": 54190 }, { "epoch": 3.682225845902976, "grad_norm": 2.920191526412964, "learning_rate": 5.398746432939258e-05, "loss": 2.4943, "step": 54195 }, { "epoch": 3.6825655659736376, "grad_norm": 2.9305899143218994, "learning_rate": 5.3983217828509314e-05, "loss": 2.6628, "step": 54200 }, { "epoch": 3.6829052860442992, "grad_norm": 3.071486473083496, "learning_rate": 5.3978971327626035e-05, "loss": 2.6223, "step": 54205 }, { "epoch": 3.6832450061149613, "grad_norm": 3.3199124336242676, "learning_rate": 5.397472482674276e-05, "loss": 2.8016, "step": 54210 }, { "epoch": 3.683584726185623, "grad_norm": 2.424224376678467, "learning_rate": 5.39704783258595e-05, "loss": 2.7656, "step": 54215 }, { "epoch": 3.6839244462562846, "grad_norm": 2.6088085174560547, "learning_rate": 5.396623182497622e-05, "loss": 2.5939, "step": 54220 }, { "epoch": 3.6842641663269466, "grad_norm": 3.0795347690582275, "learning_rate": 5.396198532409295e-05, "loss": 2.5678, "step": 54225 }, { "epoch": 3.6846038863976083, "grad_norm": 2.9501564502716064, "learning_rate": 5.395773882320968e-05, "loss": 2.7163, "step": 54230 }, { "epoch": 3.68494360646827, "grad_norm": 2.8955352306365967, "learning_rate": 5.3953492322326403e-05, "loss": 2.7845, "step": 54235 }, { "epoch": 3.685283326538932, "grad_norm": 2.914663791656494, "learning_rate": 5.394924582144313e-05, "loss": 2.4699, "step": 54240 }, { "epoch": 3.6856230466095936, "grad_norm": 3.1324353218078613, "learning_rate": 5.3944999320559866e-05, "loss": 2.7505, "step": 54245 }, { "epoch": 3.6859627666802552, "grad_norm": 2.84432315826416, "learning_rate": 5.394075281967659e-05, "loss": 2.6051, "step": 54250 }, { "epoch": 3.6863024867509173, "grad_norm": 3.05011248588562, "learning_rate": 5.393650631879331e-05, "loss": 2.8818, "step": 54255 }, { "epoch": 3.686642206821579, "grad_norm": 2.43587589263916, "learning_rate": 5.393225981791005e-05, "loss": 2.5324, "step": 54260 }, { "epoch": 3.6869819268922406, "grad_norm": 2.5185017585754395, "learning_rate": 5.392801331702677e-05, "loss": 2.8739, "step": 54265 }, { "epoch": 3.6873216469629027, "grad_norm": 2.54901385307312, "learning_rate": 5.392376681614349e-05, "loss": 2.7788, "step": 54270 }, { "epoch": 3.6876613670335643, "grad_norm": 2.5778300762176514, "learning_rate": 5.391952031526023e-05, "loss": 2.4682, "step": 54275 }, { "epoch": 3.688001087104226, "grad_norm": 2.2631802558898926, "learning_rate": 5.3915273814376955e-05, "loss": 2.7922, "step": 54280 }, { "epoch": 3.688340807174888, "grad_norm": 2.7719838619232178, "learning_rate": 5.391102731349368e-05, "loss": 2.6531, "step": 54285 }, { "epoch": 3.6886805272455496, "grad_norm": 2.280763626098633, "learning_rate": 5.390678081261041e-05, "loss": 2.8295, "step": 54290 }, { "epoch": 3.6890202473162113, "grad_norm": 2.3566176891326904, "learning_rate": 5.390253431172714e-05, "loss": 2.5771, "step": 54295 }, { "epoch": 3.6893599673868733, "grad_norm": 3.0991578102111816, "learning_rate": 5.3898287810843874e-05, "loss": 2.5571, "step": 54300 }, { "epoch": 3.689699687457535, "grad_norm": 3.5296599864959717, "learning_rate": 5.3894041309960595e-05, "loss": 2.6497, "step": 54305 }, { "epoch": 3.6900394075281966, "grad_norm": 3.3959927558898926, "learning_rate": 5.3889794809077324e-05, "loss": 2.3219, "step": 54310 }, { "epoch": 3.6903791275988587, "grad_norm": 2.755852460861206, "learning_rate": 5.388554830819406e-05, "loss": 2.8478, "step": 54315 }, { "epoch": 3.6907188476695203, "grad_norm": 2.309946060180664, "learning_rate": 5.388130180731078e-05, "loss": 3.0053, "step": 54320 }, { "epoch": 3.691058567740182, "grad_norm": 2.8378608226776123, "learning_rate": 5.38770553064275e-05, "loss": 2.5029, "step": 54325 }, { "epoch": 3.691398287810844, "grad_norm": 2.4973771572113037, "learning_rate": 5.387280880554424e-05, "loss": 2.3686, "step": 54330 }, { "epoch": 3.6917380078815056, "grad_norm": 2.7793147563934326, "learning_rate": 5.3868562304660964e-05, "loss": 2.6866, "step": 54335 }, { "epoch": 3.6920777279521673, "grad_norm": 2.818185329437256, "learning_rate": 5.3864315803777685e-05, "loss": 2.4509, "step": 54340 }, { "epoch": 3.6924174480228293, "grad_norm": 2.654273271560669, "learning_rate": 5.386006930289442e-05, "loss": 2.7115, "step": 54345 }, { "epoch": 3.692757168093491, "grad_norm": 2.2245025634765625, "learning_rate": 5.385582280201115e-05, "loss": 2.9087, "step": 54350 }, { "epoch": 3.6930968881641526, "grad_norm": 2.6192140579223633, "learning_rate": 5.385157630112787e-05, "loss": 2.8382, "step": 54355 }, { "epoch": 3.6934366082348147, "grad_norm": 2.1598315238952637, "learning_rate": 5.3847329800244604e-05, "loss": 2.6655, "step": 54360 }, { "epoch": 3.6937763283054763, "grad_norm": 2.3344004154205322, "learning_rate": 5.384308329936133e-05, "loss": 2.5311, "step": 54365 }, { "epoch": 3.694116048376138, "grad_norm": 2.41049861907959, "learning_rate": 5.383883679847805e-05, "loss": 2.595, "step": 54370 }, { "epoch": 3.6944557684468, "grad_norm": 3.519301652908325, "learning_rate": 5.383459029759479e-05, "loss": 2.7818, "step": 54375 }, { "epoch": 3.6947954885174616, "grad_norm": 2.947232246398926, "learning_rate": 5.3830343796711516e-05, "loss": 2.4392, "step": 54380 }, { "epoch": 3.6951352085881233, "grad_norm": 3.4128284454345703, "learning_rate": 5.382609729582824e-05, "loss": 2.6124, "step": 54385 }, { "epoch": 3.6954749286587854, "grad_norm": 3.3953161239624023, "learning_rate": 5.382185079494497e-05, "loss": 2.6136, "step": 54390 }, { "epoch": 3.695814648729447, "grad_norm": 2.8492558002471924, "learning_rate": 5.38176042940617e-05, "loss": 2.5071, "step": 54395 }, { "epoch": 3.6961543688001086, "grad_norm": 2.9488847255706787, "learning_rate": 5.381335779317842e-05, "loss": 2.4473, "step": 54400 }, { "epoch": 3.6964940888707707, "grad_norm": 3.03159236907959, "learning_rate": 5.3809111292295156e-05, "loss": 2.9085, "step": 54405 }, { "epoch": 3.6968338089414323, "grad_norm": 2.8965117931365967, "learning_rate": 5.380486479141188e-05, "loss": 2.8592, "step": 54410 }, { "epoch": 3.697173529012094, "grad_norm": 3.170588493347168, "learning_rate": 5.3800618290528605e-05, "loss": 2.5917, "step": 54415 }, { "epoch": 3.697513249082756, "grad_norm": 3.1337523460388184, "learning_rate": 5.379637178964534e-05, "loss": 2.6162, "step": 54420 }, { "epoch": 3.6978529691534177, "grad_norm": 2.1865532398223877, "learning_rate": 5.379212528876206e-05, "loss": 2.5239, "step": 54425 }, { "epoch": 3.6981926892240793, "grad_norm": 2.5238914489746094, "learning_rate": 5.378787878787879e-05, "loss": 2.5808, "step": 54430 }, { "epoch": 3.6985324092947414, "grad_norm": 2.9772069454193115, "learning_rate": 5.3783632286995524e-05, "loss": 2.5857, "step": 54435 }, { "epoch": 3.698872129365403, "grad_norm": 3.4892563819885254, "learning_rate": 5.3779385786112245e-05, "loss": 2.6772, "step": 54440 }, { "epoch": 3.6992118494360646, "grad_norm": 2.789379835128784, "learning_rate": 5.377513928522897e-05, "loss": 2.7525, "step": 54445 }, { "epoch": 3.6995515695067267, "grad_norm": 2.6026158332824707, "learning_rate": 5.377089278434571e-05, "loss": 2.5317, "step": 54450 }, { "epoch": 3.6998912895773883, "grad_norm": 3.2587039470672607, "learning_rate": 5.376664628346243e-05, "loss": 2.9282, "step": 54455 }, { "epoch": 3.70023100964805, "grad_norm": 2.9437808990478516, "learning_rate": 5.376239978257916e-05, "loss": 2.5109, "step": 54460 }, { "epoch": 3.700570729718712, "grad_norm": 3.0591626167297363, "learning_rate": 5.375815328169589e-05, "loss": 2.6704, "step": 54465 }, { "epoch": 3.7009104497893737, "grad_norm": 2.3466553688049316, "learning_rate": 5.375390678081261e-05, "loss": 2.675, "step": 54470 }, { "epoch": 3.7012501698600353, "grad_norm": 2.633103370666504, "learning_rate": 5.3749660279929334e-05, "loss": 2.8056, "step": 54475 }, { "epoch": 3.7015898899306974, "grad_norm": 3.1262001991271973, "learning_rate": 5.374541377904607e-05, "loss": 2.7324, "step": 54480 }, { "epoch": 3.701929610001359, "grad_norm": 2.5734477043151855, "learning_rate": 5.37411672781628e-05, "loss": 2.6909, "step": 54485 }, { "epoch": 3.7022693300720206, "grad_norm": 2.5581748485565186, "learning_rate": 5.373692077727952e-05, "loss": 2.6748, "step": 54490 }, { "epoch": 3.7026090501426823, "grad_norm": 2.826899766921997, "learning_rate": 5.373267427639625e-05, "loss": 2.7025, "step": 54495 }, { "epoch": 3.7029487702133443, "grad_norm": 2.541175603866577, "learning_rate": 5.372842777551298e-05, "loss": 2.6585, "step": 54500 }, { "epoch": 3.703288490284006, "grad_norm": 2.974703550338745, "learning_rate": 5.37241812746297e-05, "loss": 2.4482, "step": 54505 }, { "epoch": 3.7036282103546676, "grad_norm": 3.36680269241333, "learning_rate": 5.371993477374644e-05, "loss": 2.8423, "step": 54510 }, { "epoch": 3.7039679304253297, "grad_norm": 2.8255698680877686, "learning_rate": 5.3715688272863165e-05, "loss": 2.3848, "step": 54515 }, { "epoch": 3.7043076504959913, "grad_norm": 3.5014326572418213, "learning_rate": 5.3711441771979886e-05, "loss": 2.5812, "step": 54520 }, { "epoch": 3.704647370566653, "grad_norm": 2.212489128112793, "learning_rate": 5.370719527109662e-05, "loss": 2.7976, "step": 54525 }, { "epoch": 3.704987090637315, "grad_norm": 2.7681703567504883, "learning_rate": 5.370294877021335e-05, "loss": 2.6876, "step": 54530 }, { "epoch": 3.7053268107079766, "grad_norm": 2.6609621047973633, "learning_rate": 5.369870226933007e-05, "loss": 2.5789, "step": 54535 }, { "epoch": 3.7056665307786383, "grad_norm": 2.6875953674316406, "learning_rate": 5.3694455768446805e-05, "loss": 2.7428, "step": 54540 }, { "epoch": 3.7060062508493, "grad_norm": 2.8840765953063965, "learning_rate": 5.3690209267563526e-05, "loss": 2.7082, "step": 54545 }, { "epoch": 3.706345970919962, "grad_norm": 2.9827911853790283, "learning_rate": 5.3685962766680254e-05, "loss": 2.7916, "step": 54550 }, { "epoch": 3.7066856909906236, "grad_norm": 3.323657989501953, "learning_rate": 5.368171626579699e-05, "loss": 2.5646, "step": 54555 }, { "epoch": 3.7070254110612852, "grad_norm": 3.482459545135498, "learning_rate": 5.367746976491371e-05, "loss": 2.5462, "step": 54560 }, { "epoch": 3.7073651311319473, "grad_norm": 2.7446141242980957, "learning_rate": 5.367322326403044e-05, "loss": 2.7885, "step": 54565 }, { "epoch": 3.707704851202609, "grad_norm": 2.935079574584961, "learning_rate": 5.366897676314717e-05, "loss": 2.4181, "step": 54570 }, { "epoch": 3.7080445712732706, "grad_norm": 2.9474639892578125, "learning_rate": 5.3664730262263894e-05, "loss": 2.577, "step": 54575 }, { "epoch": 3.7083842913439327, "grad_norm": 2.549680233001709, "learning_rate": 5.366048376138062e-05, "loss": 2.7458, "step": 54580 }, { "epoch": 3.7087240114145943, "grad_norm": 3.024616241455078, "learning_rate": 5.365623726049736e-05, "loss": 2.6009, "step": 54585 }, { "epoch": 3.709063731485256, "grad_norm": 3.5150585174560547, "learning_rate": 5.365199075961408e-05, "loss": 2.7499, "step": 54590 }, { "epoch": 3.709403451555918, "grad_norm": 3.1028506755828857, "learning_rate": 5.3647744258730806e-05, "loss": 2.567, "step": 54595 }, { "epoch": 3.7097431716265796, "grad_norm": 3.269897699356079, "learning_rate": 5.364349775784754e-05, "loss": 2.6737, "step": 54600 }, { "epoch": 3.7100828916972413, "grad_norm": 3.070488214492798, "learning_rate": 5.363925125696426e-05, "loss": 2.7447, "step": 54605 }, { "epoch": 3.7104226117679033, "grad_norm": 3.2651121616363525, "learning_rate": 5.3635004756080984e-05, "loss": 2.4947, "step": 54610 }, { "epoch": 3.710762331838565, "grad_norm": 2.610347032546997, "learning_rate": 5.363075825519772e-05, "loss": 2.4563, "step": 54615 }, { "epoch": 3.7111020519092266, "grad_norm": 2.384096384048462, "learning_rate": 5.3626511754314446e-05, "loss": 2.5675, "step": 54620 }, { "epoch": 3.7114417719798887, "grad_norm": 3.248663902282715, "learning_rate": 5.362226525343117e-05, "loss": 2.5858, "step": 54625 }, { "epoch": 3.7117814920505503, "grad_norm": 3.863953113555908, "learning_rate": 5.36180187525479e-05, "loss": 2.8793, "step": 54630 }, { "epoch": 3.712121212121212, "grad_norm": 2.560786247253418, "learning_rate": 5.361377225166463e-05, "loss": 2.9183, "step": 54635 }, { "epoch": 3.712460932191874, "grad_norm": 3.231235980987549, "learning_rate": 5.3609525750781365e-05, "loss": 2.5998, "step": 54640 }, { "epoch": 3.7128006522625356, "grad_norm": 3.8415560722351074, "learning_rate": 5.360612855007474e-05, "loss": 2.6699, "step": 54645 }, { "epoch": 3.7131403723331973, "grad_norm": 2.7436788082122803, "learning_rate": 5.360188204919146e-05, "loss": 2.5029, "step": 54650 }, { "epoch": 3.7134800924038593, "grad_norm": 3.9268181324005127, "learning_rate": 5.3597635548308204e-05, "loss": 2.7166, "step": 54655 }, { "epoch": 3.713819812474521, "grad_norm": 2.7557852268218994, "learning_rate": 5.3593389047424925e-05, "loss": 2.387, "step": 54660 }, { "epoch": 3.7141595325451826, "grad_norm": 2.768921375274658, "learning_rate": 5.3589142546541646e-05, "loss": 2.3702, "step": 54665 }, { "epoch": 3.7144992526158447, "grad_norm": 2.2948062419891357, "learning_rate": 5.358489604565838e-05, "loss": 2.7336, "step": 54670 }, { "epoch": 3.7148389726865063, "grad_norm": 2.5433075428009033, "learning_rate": 5.358064954477511e-05, "loss": 2.7427, "step": 54675 }, { "epoch": 3.715178692757168, "grad_norm": 3.9078762531280518, "learning_rate": 5.357640304389183e-05, "loss": 2.605, "step": 54680 }, { "epoch": 3.71551841282783, "grad_norm": 2.9355628490448, "learning_rate": 5.3572156543008565e-05, "loss": 2.6138, "step": 54685 }, { "epoch": 3.7158581328984917, "grad_norm": 3.681117296218872, "learning_rate": 5.356791004212529e-05, "loss": 2.9606, "step": 54690 }, { "epoch": 3.7161978529691533, "grad_norm": 2.838035821914673, "learning_rate": 5.3563663541242014e-05, "loss": 2.717, "step": 54695 }, { "epoch": 3.7165375730398154, "grad_norm": 2.9549405574798584, "learning_rate": 5.355941704035875e-05, "loss": 2.6251, "step": 54700 }, { "epoch": 3.716877293110477, "grad_norm": 3.222689151763916, "learning_rate": 5.355517053947548e-05, "loss": 2.7618, "step": 54705 }, { "epoch": 3.7172170131811386, "grad_norm": 2.6103355884552, "learning_rate": 5.35509240385922e-05, "loss": 2.8096, "step": 54710 }, { "epoch": 3.7175567332518007, "grad_norm": 2.2881808280944824, "learning_rate": 5.354667753770893e-05, "loss": 2.4569, "step": 54715 }, { "epoch": 3.7178964533224623, "grad_norm": 2.4472408294677734, "learning_rate": 5.354243103682566e-05, "loss": 2.7362, "step": 54720 }, { "epoch": 3.718236173393124, "grad_norm": 3.5117714405059814, "learning_rate": 5.353818453594238e-05, "loss": 2.6502, "step": 54725 }, { "epoch": 3.718575893463786, "grad_norm": 2.7762980461120605, "learning_rate": 5.353393803505912e-05, "loss": 2.7424, "step": 54730 }, { "epoch": 3.7189156135344477, "grad_norm": 2.4854774475097656, "learning_rate": 5.352969153417584e-05, "loss": 2.4755, "step": 54735 }, { "epoch": 3.7192553336051093, "grad_norm": 2.7148194313049316, "learning_rate": 5.3525445033292566e-05, "loss": 2.8247, "step": 54740 }, { "epoch": 3.7195950536757714, "grad_norm": 2.7671520709991455, "learning_rate": 5.35211985324093e-05, "loss": 2.5705, "step": 54745 }, { "epoch": 3.719934773746433, "grad_norm": 2.5501816272735596, "learning_rate": 5.351695203152602e-05, "loss": 2.6635, "step": 54750 }, { "epoch": 3.7202744938170946, "grad_norm": 2.153071165084839, "learning_rate": 5.351270553064275e-05, "loss": 2.6029, "step": 54755 }, { "epoch": 3.7206142138877567, "grad_norm": 2.147852659225464, "learning_rate": 5.3508459029759485e-05, "loss": 2.7027, "step": 54760 }, { "epoch": 3.7209539339584183, "grad_norm": 2.424833297729492, "learning_rate": 5.3504212528876206e-05, "loss": 2.5417, "step": 54765 }, { "epoch": 3.72129365402908, "grad_norm": 2.7225852012634277, "learning_rate": 5.3499966027992934e-05, "loss": 2.5664, "step": 54770 }, { "epoch": 3.721633374099742, "grad_norm": 3.0716164112091064, "learning_rate": 5.349571952710967e-05, "loss": 2.5, "step": 54775 }, { "epoch": 3.7219730941704037, "grad_norm": 3.3833489418029785, "learning_rate": 5.349147302622639e-05, "loss": 2.7875, "step": 54780 }, { "epoch": 3.7223128142410653, "grad_norm": 3.077796220779419, "learning_rate": 5.348722652534311e-05, "loss": 2.4122, "step": 54785 }, { "epoch": 3.7226525343117274, "grad_norm": 3.4713988304138184, "learning_rate": 5.348298002445985e-05, "loss": 2.7594, "step": 54790 }, { "epoch": 3.722992254382389, "grad_norm": 3.48213529586792, "learning_rate": 5.3478733523576574e-05, "loss": 2.5509, "step": 54795 }, { "epoch": 3.7233319744530506, "grad_norm": 2.5501296520233154, "learning_rate": 5.3474487022693296e-05, "loss": 2.3782, "step": 54800 }, { "epoch": 3.7236716945237127, "grad_norm": 2.644472599029541, "learning_rate": 5.347024052181003e-05, "loss": 2.5049, "step": 54805 }, { "epoch": 3.7240114145943743, "grad_norm": 3.1270365715026855, "learning_rate": 5.346599402092676e-05, "loss": 2.4716, "step": 54810 }, { "epoch": 3.724351134665036, "grad_norm": 2.8512699604034424, "learning_rate": 5.346174752004348e-05, "loss": 2.5131, "step": 54815 }, { "epoch": 3.724690854735698, "grad_norm": 3.119328022003174, "learning_rate": 5.3457501019160214e-05, "loss": 2.9687, "step": 54820 }, { "epoch": 3.7250305748063597, "grad_norm": 3.2998292446136475, "learning_rate": 5.345325451827694e-05, "loss": 2.4787, "step": 54825 }, { "epoch": 3.7253702948770213, "grad_norm": 3.530672311782837, "learning_rate": 5.3449008017393664e-05, "loss": 2.4188, "step": 54830 }, { "epoch": 3.725710014947683, "grad_norm": 2.827981948852539, "learning_rate": 5.34447615165104e-05, "loss": 2.471, "step": 54835 }, { "epoch": 3.726049735018345, "grad_norm": 2.9270451068878174, "learning_rate": 5.3440515015627126e-05, "loss": 2.7075, "step": 54840 }, { "epoch": 3.7263894550890067, "grad_norm": 2.547572135925293, "learning_rate": 5.343626851474386e-05, "loss": 2.682, "step": 54845 }, { "epoch": 3.7267291751596683, "grad_norm": 2.399031162261963, "learning_rate": 5.343202201386058e-05, "loss": 2.4246, "step": 54850 }, { "epoch": 3.7270688952303304, "grad_norm": 3.007361650466919, "learning_rate": 5.342777551297731e-05, "loss": 2.6032, "step": 54855 }, { "epoch": 3.727408615300992, "grad_norm": 2.9024016857147217, "learning_rate": 5.3423529012094045e-05, "loss": 2.8027, "step": 54860 }, { "epoch": 3.7277483353716536, "grad_norm": 2.721405506134033, "learning_rate": 5.3419282511210766e-05, "loss": 2.8683, "step": 54865 }, { "epoch": 3.7280880554423157, "grad_norm": 3.224095106124878, "learning_rate": 5.341503601032749e-05, "loss": 2.5535, "step": 54870 }, { "epoch": 3.7284277755129773, "grad_norm": 2.7875499725341797, "learning_rate": 5.341078950944422e-05, "loss": 2.6393, "step": 54875 }, { "epoch": 3.728767495583639, "grad_norm": 2.362476110458374, "learning_rate": 5.340654300856095e-05, "loss": 2.6356, "step": 54880 }, { "epoch": 3.7291072156543006, "grad_norm": 2.610008716583252, "learning_rate": 5.340229650767767e-05, "loss": 2.6266, "step": 54885 }, { "epoch": 3.7294469357249627, "grad_norm": 3.528432846069336, "learning_rate": 5.3398050006794406e-05, "loss": 2.679, "step": 54890 }, { "epoch": 3.7297866557956243, "grad_norm": 3.4143850803375244, "learning_rate": 5.3393803505911134e-05, "loss": 2.6681, "step": 54895 }, { "epoch": 3.730126375866286, "grad_norm": 3.444061040878296, "learning_rate": 5.3389557005027856e-05, "loss": 2.6281, "step": 54900 }, { "epoch": 3.730466095936948, "grad_norm": 3.3952677249908447, "learning_rate": 5.338531050414459e-05, "loss": 2.5212, "step": 54905 }, { "epoch": 3.7308058160076096, "grad_norm": 2.8460922241210938, "learning_rate": 5.338106400326132e-05, "loss": 2.7205, "step": 54910 }, { "epoch": 3.7311455360782713, "grad_norm": 2.678752899169922, "learning_rate": 5.337681750237804e-05, "loss": 2.5589, "step": 54915 }, { "epoch": 3.7314852561489333, "grad_norm": 2.77821946144104, "learning_rate": 5.3372571001494775e-05, "loss": 2.4463, "step": 54920 }, { "epoch": 3.731824976219595, "grad_norm": 2.935011863708496, "learning_rate": 5.33683245006115e-05, "loss": 2.7611, "step": 54925 }, { "epoch": 3.7321646962902566, "grad_norm": 3.0955731868743896, "learning_rate": 5.3364077999728224e-05, "loss": 2.6592, "step": 54930 }, { "epoch": 3.7325044163609187, "grad_norm": 2.883256673812866, "learning_rate": 5.335983149884496e-05, "loss": 2.7396, "step": 54935 }, { "epoch": 3.7328441364315803, "grad_norm": 2.5930988788604736, "learning_rate": 5.335558499796168e-05, "loss": 2.6483, "step": 54940 }, { "epoch": 3.733183856502242, "grad_norm": 2.67106032371521, "learning_rate": 5.335133849707841e-05, "loss": 2.5096, "step": 54945 }, { "epoch": 3.733523576572904, "grad_norm": 2.3625435829162598, "learning_rate": 5.334709199619514e-05, "loss": 2.3912, "step": 54950 }, { "epoch": 3.7338632966435656, "grad_norm": 2.855060338973999, "learning_rate": 5.3342845495311864e-05, "loss": 2.7035, "step": 54955 }, { "epoch": 3.7342030167142273, "grad_norm": 2.3203470706939697, "learning_rate": 5.333859899442859e-05, "loss": 2.7722, "step": 54960 }, { "epoch": 3.7345427367848893, "grad_norm": 2.6071271896362305, "learning_rate": 5.3334352493545327e-05, "loss": 2.508, "step": 54965 }, { "epoch": 3.734882456855551, "grad_norm": 2.672025442123413, "learning_rate": 5.333010599266205e-05, "loss": 2.6742, "step": 54970 }, { "epoch": 3.7352221769262126, "grad_norm": 2.8856849670410156, "learning_rate": 5.3325859491778776e-05, "loss": 2.578, "step": 54975 }, { "epoch": 3.7355618969968747, "grad_norm": 3.407344341278076, "learning_rate": 5.332161299089551e-05, "loss": 2.6251, "step": 54980 }, { "epoch": 3.7359016170675363, "grad_norm": 2.862654209136963, "learning_rate": 5.331736649001223e-05, "loss": 2.7346, "step": 54985 }, { "epoch": 3.736241337138198, "grad_norm": 3.0037364959716797, "learning_rate": 5.331311998912896e-05, "loss": 2.6624, "step": 54990 }, { "epoch": 3.73658105720886, "grad_norm": 2.876797914505005, "learning_rate": 5.3308873488245695e-05, "loss": 2.7169, "step": 54995 }, { "epoch": 3.7369207772795217, "grad_norm": 2.8962960243225098, "learning_rate": 5.3304626987362416e-05, "loss": 2.5091, "step": 55000 }, { "epoch": 3.7372604973501833, "grad_norm": 2.39593505859375, "learning_rate": 5.330038048647914e-05, "loss": 2.6135, "step": 55005 }, { "epoch": 3.7376002174208454, "grad_norm": 3.2136807441711426, "learning_rate": 5.329613398559587e-05, "loss": 2.4873, "step": 55010 }, { "epoch": 3.737939937491507, "grad_norm": 2.956136703491211, "learning_rate": 5.32918874847126e-05, "loss": 2.8238, "step": 55015 }, { "epoch": 3.7382796575621686, "grad_norm": 2.888561487197876, "learning_rate": 5.328764098382932e-05, "loss": 2.7187, "step": 55020 }, { "epoch": 3.7386193776328307, "grad_norm": 3.458754539489746, "learning_rate": 5.3283394482946056e-05, "loss": 2.6292, "step": 55025 }, { "epoch": 3.7389590977034923, "grad_norm": 3.390854597091675, "learning_rate": 5.3279147982062784e-05, "loss": 2.7433, "step": 55030 }, { "epoch": 3.739298817774154, "grad_norm": 3.6867940425872803, "learning_rate": 5.3274901481179505e-05, "loss": 2.4745, "step": 55035 }, { "epoch": 3.739638537844816, "grad_norm": 3.4530560970306396, "learning_rate": 5.327065498029624e-05, "loss": 2.7376, "step": 55040 }, { "epoch": 3.7399782579154777, "grad_norm": 2.4929885864257812, "learning_rate": 5.326640847941297e-05, "loss": 2.2892, "step": 55045 }, { "epoch": 3.7403179779861393, "grad_norm": 2.9519667625427246, "learning_rate": 5.326216197852969e-05, "loss": 2.5695, "step": 55050 }, { "epoch": 3.7406576980568014, "grad_norm": 2.8576667308807373, "learning_rate": 5.3257915477646424e-05, "loss": 2.7779, "step": 55055 }, { "epoch": 3.740997418127463, "grad_norm": 3.032822608947754, "learning_rate": 5.325366897676315e-05, "loss": 2.7884, "step": 55060 }, { "epoch": 3.7413371381981246, "grad_norm": 2.955684185028076, "learning_rate": 5.324942247587987e-05, "loss": 2.7235, "step": 55065 }, { "epoch": 3.7416768582687867, "grad_norm": 2.280242443084717, "learning_rate": 5.324517597499661e-05, "loss": 2.8016, "step": 55070 }, { "epoch": 3.7420165783394483, "grad_norm": 2.7048728466033936, "learning_rate": 5.324092947411333e-05, "loss": 3.0724, "step": 55075 }, { "epoch": 3.74235629841011, "grad_norm": 2.649876594543457, "learning_rate": 5.323668297323006e-05, "loss": 2.5054, "step": 55080 }, { "epoch": 3.742696018480772, "grad_norm": 2.9021778106689453, "learning_rate": 5.323243647234679e-05, "loss": 2.5182, "step": 55085 }, { "epoch": 3.7430357385514337, "grad_norm": 4.470992565155029, "learning_rate": 5.322818997146351e-05, "loss": 2.6337, "step": 55090 }, { "epoch": 3.7433754586220953, "grad_norm": 2.647334575653076, "learning_rate": 5.322394347058024e-05, "loss": 2.6449, "step": 55095 }, { "epoch": 3.7437151786927574, "grad_norm": 3.20133638381958, "learning_rate": 5.3219696969696976e-05, "loss": 2.5868, "step": 55100 }, { "epoch": 3.744054898763419, "grad_norm": 3.3301610946655273, "learning_rate": 5.32154504688137e-05, "loss": 2.3993, "step": 55105 }, { "epoch": 3.7443946188340806, "grad_norm": 3.0800795555114746, "learning_rate": 5.3211203967930425e-05, "loss": 2.5717, "step": 55110 }, { "epoch": 3.7447343389047427, "grad_norm": 3.532489061355591, "learning_rate": 5.320695746704716e-05, "loss": 2.7888, "step": 55115 }, { "epoch": 3.7450740589754044, "grad_norm": 2.711648464202881, "learning_rate": 5.320271096616388e-05, "loss": 2.4889, "step": 55120 }, { "epoch": 3.745413779046066, "grad_norm": 2.929497003555298, "learning_rate": 5.319846446528061e-05, "loss": 2.4263, "step": 55125 }, { "epoch": 3.745753499116728, "grad_norm": 2.5506012439727783, "learning_rate": 5.3194217964397344e-05, "loss": 2.6867, "step": 55130 }, { "epoch": 3.7460932191873897, "grad_norm": 2.6420910358428955, "learning_rate": 5.3189971463514065e-05, "loss": 2.6493, "step": 55135 }, { "epoch": 3.7464329392580513, "grad_norm": 2.0889508724212646, "learning_rate": 5.3185724962630787e-05, "loss": 2.6293, "step": 55140 }, { "epoch": 3.7467726593287134, "grad_norm": 2.713324785232544, "learning_rate": 5.318147846174753e-05, "loss": 2.504, "step": 55145 }, { "epoch": 3.747112379399375, "grad_norm": 2.987572431564331, "learning_rate": 5.317723196086425e-05, "loss": 2.6185, "step": 55150 }, { "epoch": 3.7474520994700367, "grad_norm": 3.538640260696411, "learning_rate": 5.317298545998097e-05, "loss": 2.7443, "step": 55155 }, { "epoch": 3.7477918195406987, "grad_norm": 2.394007921218872, "learning_rate": 5.3168738959097705e-05, "loss": 2.6113, "step": 55160 }, { "epoch": 3.7481315396113604, "grad_norm": 2.9287068843841553, "learning_rate": 5.316449245821443e-05, "loss": 2.6317, "step": 55165 }, { "epoch": 3.748471259682022, "grad_norm": 2.6599879264831543, "learning_rate": 5.3160245957331155e-05, "loss": 2.5536, "step": 55170 }, { "epoch": 3.7488109797526836, "grad_norm": 3.58638596534729, "learning_rate": 5.315599945644789e-05, "loss": 2.7078, "step": 55175 }, { "epoch": 3.7491506998233457, "grad_norm": 2.9176063537597656, "learning_rate": 5.315175295556462e-05, "loss": 2.6546, "step": 55180 }, { "epoch": 3.7494904198940073, "grad_norm": 3.1693761348724365, "learning_rate": 5.314750645468135e-05, "loss": 2.4781, "step": 55185 }, { "epoch": 3.749830139964669, "grad_norm": 2.669142246246338, "learning_rate": 5.314325995379807e-05, "loss": 2.8855, "step": 55190 }, { "epoch": 3.750169860035331, "grad_norm": 3.3542492389678955, "learning_rate": 5.31390134529148e-05, "loss": 2.2719, "step": 55195 }, { "epoch": 3.7505095801059927, "grad_norm": 3.3159449100494385, "learning_rate": 5.3134766952031536e-05, "loss": 2.4908, "step": 55200 }, { "epoch": 3.7508493001766543, "grad_norm": 2.9679317474365234, "learning_rate": 5.313052045114826e-05, "loss": 2.8518, "step": 55205 }, { "epoch": 3.7511890202473164, "grad_norm": 3.259218692779541, "learning_rate": 5.312627395026498e-05, "loss": 2.6094, "step": 55210 }, { "epoch": 3.751528740317978, "grad_norm": 2.400458574295044, "learning_rate": 5.312202744938172e-05, "loss": 2.4052, "step": 55215 }, { "epoch": 3.7518684603886396, "grad_norm": 2.7049458026885986, "learning_rate": 5.311778094849844e-05, "loss": 2.4511, "step": 55220 }, { "epoch": 3.7522081804593013, "grad_norm": 3.255251884460449, "learning_rate": 5.311353444761516e-05, "loss": 2.6922, "step": 55225 }, { "epoch": 3.7525479005299633, "grad_norm": 2.7190630435943604, "learning_rate": 5.31092879467319e-05, "loss": 2.8711, "step": 55230 }, { "epoch": 3.752887620600625, "grad_norm": 2.819495916366577, "learning_rate": 5.3105041445848625e-05, "loss": 2.6549, "step": 55235 }, { "epoch": 3.7532273406712866, "grad_norm": 2.420233726501465, "learning_rate": 5.310079494496535e-05, "loss": 2.4301, "step": 55240 }, { "epoch": 3.7535670607419487, "grad_norm": 3.112879753112793, "learning_rate": 5.309654844408208e-05, "loss": 2.9895, "step": 55245 }, { "epoch": 3.7539067808126103, "grad_norm": 2.9803056716918945, "learning_rate": 5.309230194319881e-05, "loss": 2.8564, "step": 55250 }, { "epoch": 3.754246500883272, "grad_norm": 2.259212017059326, "learning_rate": 5.308805544231553e-05, "loss": 2.5352, "step": 55255 }, { "epoch": 3.754586220953934, "grad_norm": 2.5805368423461914, "learning_rate": 5.3083808941432265e-05, "loss": 2.7792, "step": 55260 }, { "epoch": 3.7549259410245956, "grad_norm": 3.0600666999816895, "learning_rate": 5.3079562440548993e-05, "loss": 2.8351, "step": 55265 }, { "epoch": 3.7552656610952573, "grad_norm": 2.6023621559143066, "learning_rate": 5.3075315939665715e-05, "loss": 2.656, "step": 55270 }, { "epoch": 3.7556053811659194, "grad_norm": 2.9912779331207275, "learning_rate": 5.307106943878245e-05, "loss": 2.7658, "step": 55275 }, { "epoch": 3.755945101236581, "grad_norm": 3.085588216781616, "learning_rate": 5.306682293789918e-05, "loss": 2.5922, "step": 55280 }, { "epoch": 3.7562848213072426, "grad_norm": 2.8782927989959717, "learning_rate": 5.30625764370159e-05, "loss": 2.691, "step": 55285 }, { "epoch": 3.7566245413779047, "grad_norm": 2.8048789501190186, "learning_rate": 5.3058329936132634e-05, "loss": 2.7547, "step": 55290 }, { "epoch": 3.7569642614485663, "grad_norm": 2.793271064758301, "learning_rate": 5.3054083435249355e-05, "loss": 2.6189, "step": 55295 }, { "epoch": 3.757303981519228, "grad_norm": 3.1447322368621826, "learning_rate": 5.304983693436608e-05, "loss": 2.7562, "step": 55300 }, { "epoch": 3.75764370158989, "grad_norm": 5.980701446533203, "learning_rate": 5.304559043348282e-05, "loss": 2.5619, "step": 55305 }, { "epoch": 3.7579834216605517, "grad_norm": 4.0136308670043945, "learning_rate": 5.304134393259954e-05, "loss": 2.7696, "step": 55310 }, { "epoch": 3.7583231417312133, "grad_norm": 3.1258223056793213, "learning_rate": 5.303709743171627e-05, "loss": 2.864, "step": 55315 }, { "epoch": 3.7586628618018754, "grad_norm": 2.7330732345581055, "learning_rate": 5.3032850930833e-05, "loss": 2.3798, "step": 55320 }, { "epoch": 3.759002581872537, "grad_norm": 2.182865858078003, "learning_rate": 5.302860442994972e-05, "loss": 2.7304, "step": 55325 }, { "epoch": 3.7593423019431986, "grad_norm": 3.273721218109131, "learning_rate": 5.302435792906645e-05, "loss": 2.5088, "step": 55330 }, { "epoch": 3.7596820220138607, "grad_norm": 2.4479262828826904, "learning_rate": 5.3020111428183186e-05, "loss": 2.5982, "step": 55335 }, { "epoch": 3.7600217420845223, "grad_norm": 2.421867847442627, "learning_rate": 5.301586492729991e-05, "loss": 2.8278, "step": 55340 }, { "epoch": 3.760361462155184, "grad_norm": 2.5731077194213867, "learning_rate": 5.3011618426416635e-05, "loss": 2.7268, "step": 55345 }, { "epoch": 3.760701182225846, "grad_norm": 2.8792152404785156, "learning_rate": 5.300737192553337e-05, "loss": 2.5092, "step": 55350 }, { "epoch": 3.7610409022965077, "grad_norm": 2.9105520248413086, "learning_rate": 5.300312542465009e-05, "loss": 3.0648, "step": 55355 }, { "epoch": 3.7613806223671693, "grad_norm": 2.6481010913848877, "learning_rate": 5.299887892376681e-05, "loss": 2.8511, "step": 55360 }, { "epoch": 3.7617203424378314, "grad_norm": 2.959960460662842, "learning_rate": 5.299463242288355e-05, "loss": 2.5995, "step": 55365 }, { "epoch": 3.762060062508493, "grad_norm": 3.704331159591675, "learning_rate": 5.2990385922000275e-05, "loss": 2.2544, "step": 55370 }, { "epoch": 3.7623997825791546, "grad_norm": 2.7968204021453857, "learning_rate": 5.2986139421116996e-05, "loss": 2.7073, "step": 55375 }, { "epoch": 3.7627395026498167, "grad_norm": 2.767169952392578, "learning_rate": 5.298189292023373e-05, "loss": 2.7798, "step": 55380 }, { "epoch": 3.7630792227204783, "grad_norm": 2.4430949687957764, "learning_rate": 5.297764641935046e-05, "loss": 2.6361, "step": 55385 }, { "epoch": 3.76341894279114, "grad_norm": 2.4685423374176025, "learning_rate": 5.297339991846718e-05, "loss": 2.6251, "step": 55390 }, { "epoch": 3.763758662861802, "grad_norm": 3.60640549659729, "learning_rate": 5.2969153417583915e-05, "loss": 3.2916, "step": 55395 }, { "epoch": 3.7640983829324637, "grad_norm": 2.977384090423584, "learning_rate": 5.296490691670064e-05, "loss": 2.5762, "step": 55400 }, { "epoch": 3.7644381030031253, "grad_norm": 3.0000340938568115, "learning_rate": 5.2960660415817364e-05, "loss": 2.4974, "step": 55405 }, { "epoch": 3.7647778230737874, "grad_norm": 2.807652711868286, "learning_rate": 5.29564139149341e-05, "loss": 2.5799, "step": 55410 }, { "epoch": 3.765117543144449, "grad_norm": 2.4923596382141113, "learning_rate": 5.295216741405083e-05, "loss": 2.6481, "step": 55415 }, { "epoch": 3.7654572632151107, "grad_norm": 5.267822742462158, "learning_rate": 5.294792091316755e-05, "loss": 2.7272, "step": 55420 }, { "epoch": 3.7657969832857727, "grad_norm": 2.152534008026123, "learning_rate": 5.294367441228428e-05, "loss": 2.7439, "step": 55425 }, { "epoch": 3.7661367033564344, "grad_norm": 2.670257806777954, "learning_rate": 5.2939427911401004e-05, "loss": 2.5918, "step": 55430 }, { "epoch": 3.766476423427096, "grad_norm": 3.2984204292297363, "learning_rate": 5.293518141051773e-05, "loss": 2.7476, "step": 55435 }, { "epoch": 3.766816143497758, "grad_norm": 2.9367787837982178, "learning_rate": 5.293093490963447e-05, "loss": 2.5619, "step": 55440 }, { "epoch": 3.7671558635684197, "grad_norm": 2.8697409629821777, "learning_rate": 5.292668840875119e-05, "loss": 2.5906, "step": 55445 }, { "epoch": 3.7674955836390813, "grad_norm": 3.0453004837036133, "learning_rate": 5.2922441907867916e-05, "loss": 2.8951, "step": 55450 }, { "epoch": 3.7678353037097434, "grad_norm": 3.076749086380005, "learning_rate": 5.291819540698465e-05, "loss": 2.7374, "step": 55455 }, { "epoch": 3.768175023780405, "grad_norm": 3.173746347427368, "learning_rate": 5.291394890610137e-05, "loss": 2.4486, "step": 55460 }, { "epoch": 3.7685147438510667, "grad_norm": 2.936803102493286, "learning_rate": 5.29097024052181e-05, "loss": 2.408, "step": 55465 }, { "epoch": 3.7688544639217287, "grad_norm": 3.205482244491577, "learning_rate": 5.2905455904334835e-05, "loss": 2.4218, "step": 55470 }, { "epoch": 3.7691941839923904, "grad_norm": 3.4415230751037598, "learning_rate": 5.2901209403451556e-05, "loss": 2.4711, "step": 55475 }, { "epoch": 3.769533904063052, "grad_norm": 2.8459949493408203, "learning_rate": 5.2896962902568284e-05, "loss": 2.576, "step": 55480 }, { "epoch": 3.769873624133714, "grad_norm": 2.766266345977783, "learning_rate": 5.289271640168502e-05, "loss": 2.6444, "step": 55485 }, { "epoch": 3.7702133442043757, "grad_norm": 2.6292688846588135, "learning_rate": 5.288846990080174e-05, "loss": 2.6492, "step": 55490 }, { "epoch": 3.7705530642750373, "grad_norm": 2.732306957244873, "learning_rate": 5.288422339991846e-05, "loss": 2.7019, "step": 55495 }, { "epoch": 3.7708927843456994, "grad_norm": 2.8468384742736816, "learning_rate": 5.2879976899035196e-05, "loss": 2.6682, "step": 55500 }, { "epoch": 3.771232504416361, "grad_norm": 3.2515039443969727, "learning_rate": 5.2875730398151924e-05, "loss": 2.4782, "step": 55505 }, { "epoch": 3.7715722244870227, "grad_norm": 2.5492570400238037, "learning_rate": 5.2871483897268646e-05, "loss": 2.3691, "step": 55510 }, { "epoch": 3.7719119445576843, "grad_norm": 3.2753777503967285, "learning_rate": 5.286723739638538e-05, "loss": 2.7745, "step": 55515 }, { "epoch": 3.7722516646283464, "grad_norm": 2.2860920429229736, "learning_rate": 5.286299089550211e-05, "loss": 2.9007, "step": 55520 }, { "epoch": 3.772591384699008, "grad_norm": 3.52055025100708, "learning_rate": 5.285874439461884e-05, "loss": 2.7135, "step": 55525 }, { "epoch": 3.7729311047696696, "grad_norm": 2.643212080001831, "learning_rate": 5.2854497893735564e-05, "loss": 2.4707, "step": 55530 }, { "epoch": 3.7732708248403317, "grad_norm": 2.5945487022399902, "learning_rate": 5.285025139285229e-05, "loss": 2.4996, "step": 55535 }, { "epoch": 3.7736105449109933, "grad_norm": 2.245051145553589, "learning_rate": 5.284600489196903e-05, "loss": 2.7054, "step": 55540 }, { "epoch": 3.773950264981655, "grad_norm": 2.993192672729492, "learning_rate": 5.284175839108575e-05, "loss": 2.7374, "step": 55545 }, { "epoch": 3.774289985052317, "grad_norm": 3.12917160987854, "learning_rate": 5.2837511890202476e-05, "loss": 2.7288, "step": 55550 }, { "epoch": 3.7746297051229787, "grad_norm": 3.6876182556152344, "learning_rate": 5.283326538931921e-05, "loss": 2.6504, "step": 55555 }, { "epoch": 3.7749694251936403, "grad_norm": 2.9647085666656494, "learning_rate": 5.282901888843593e-05, "loss": 2.5995, "step": 55560 }, { "epoch": 3.775309145264302, "grad_norm": 3.024855375289917, "learning_rate": 5.2824772387552654e-05, "loss": 2.4853, "step": 55565 }, { "epoch": 3.775648865334964, "grad_norm": 2.860816240310669, "learning_rate": 5.2820525886669395e-05, "loss": 2.603, "step": 55570 }, { "epoch": 3.7759885854056257, "grad_norm": 2.879091739654541, "learning_rate": 5.2816279385786116e-05, "loss": 2.9886, "step": 55575 }, { "epoch": 3.7763283054762873, "grad_norm": 2.605649709701538, "learning_rate": 5.281203288490284e-05, "loss": 2.5076, "step": 55580 }, { "epoch": 3.7766680255469494, "grad_norm": 5.583571434020996, "learning_rate": 5.280778638401957e-05, "loss": 2.7559, "step": 55585 }, { "epoch": 3.777007745617611, "grad_norm": 3.6434667110443115, "learning_rate": 5.28035398831363e-05, "loss": 2.734, "step": 55590 }, { "epoch": 3.7773474656882726, "grad_norm": 4.303974628448486, "learning_rate": 5.279929338225302e-05, "loss": 2.7738, "step": 55595 }, { "epoch": 3.7776871857589347, "grad_norm": 3.521550178527832, "learning_rate": 5.2795046881369756e-05, "loss": 2.6801, "step": 55600 }, { "epoch": 3.7780269058295963, "grad_norm": 3.2027125358581543, "learning_rate": 5.2790800380486484e-05, "loss": 2.5119, "step": 55605 }, { "epoch": 3.778366625900258, "grad_norm": 2.8594326972961426, "learning_rate": 5.2786553879603206e-05, "loss": 2.7503, "step": 55610 }, { "epoch": 3.77870634597092, "grad_norm": 2.081756591796875, "learning_rate": 5.278230737871994e-05, "loss": 2.732, "step": 55615 }, { "epoch": 3.7790460660415817, "grad_norm": 2.8883416652679443, "learning_rate": 5.277806087783667e-05, "loss": 2.5549, "step": 55620 }, { "epoch": 3.7793857861122433, "grad_norm": 2.660011053085327, "learning_rate": 5.277381437695339e-05, "loss": 2.84, "step": 55625 }, { "epoch": 3.7797255061829054, "grad_norm": 3.1210484504699707, "learning_rate": 5.2769567876070125e-05, "loss": 2.9113, "step": 55630 }, { "epoch": 3.780065226253567, "grad_norm": 2.6128087043762207, "learning_rate": 5.276532137518685e-05, "loss": 2.8074, "step": 55635 }, { "epoch": 3.7804049463242286, "grad_norm": 2.7622530460357666, "learning_rate": 5.2761074874303574e-05, "loss": 2.7982, "step": 55640 }, { "epoch": 3.7807446663948907, "grad_norm": 2.1461002826690674, "learning_rate": 5.275682837342031e-05, "loss": 2.717, "step": 55645 }, { "epoch": 3.7810843864655523, "grad_norm": 2.55609130859375, "learning_rate": 5.275258187253703e-05, "loss": 2.621, "step": 55650 }, { "epoch": 3.781424106536214, "grad_norm": 2.8004844188690186, "learning_rate": 5.274833537165376e-05, "loss": 2.754, "step": 55655 }, { "epoch": 3.781763826606876, "grad_norm": 2.7365875244140625, "learning_rate": 5.274408887077049e-05, "loss": 2.7116, "step": 55660 }, { "epoch": 3.7821035466775377, "grad_norm": 3.8465113639831543, "learning_rate": 5.2739842369887214e-05, "loss": 2.4822, "step": 55665 }, { "epoch": 3.7824432667481993, "grad_norm": 2.592015504837036, "learning_rate": 5.273559586900394e-05, "loss": 2.9152, "step": 55670 }, { "epoch": 3.7827829868188614, "grad_norm": 3.0403189659118652, "learning_rate": 5.2731349368120677e-05, "loss": 2.6492, "step": 55675 }, { "epoch": 3.783122706889523, "grad_norm": 2.298379421234131, "learning_rate": 5.27271028672374e-05, "loss": 2.6102, "step": 55680 }, { "epoch": 3.7834624269601846, "grad_norm": 3.2885305881500244, "learning_rate": 5.2722856366354126e-05, "loss": 2.7201, "step": 55685 }, { "epoch": 3.7838021470308467, "grad_norm": 3.4812440872192383, "learning_rate": 5.271860986547086e-05, "loss": 2.4412, "step": 55690 }, { "epoch": 3.7841418671015083, "grad_norm": 3.5725088119506836, "learning_rate": 5.271436336458758e-05, "loss": 3.0523, "step": 55695 }, { "epoch": 3.78448158717217, "grad_norm": 2.7348995208740234, "learning_rate": 5.27101168637043e-05, "loss": 2.6815, "step": 55700 }, { "epoch": 3.784821307242832, "grad_norm": 3.254387378692627, "learning_rate": 5.2705870362821045e-05, "loss": 2.5725, "step": 55705 }, { "epoch": 3.7851610273134937, "grad_norm": 2.3536293506622314, "learning_rate": 5.2701623861937766e-05, "loss": 2.5523, "step": 55710 }, { "epoch": 3.7855007473841553, "grad_norm": 2.7436351776123047, "learning_rate": 5.269737736105449e-05, "loss": 2.5104, "step": 55715 }, { "epoch": 3.7858404674548174, "grad_norm": 3.6193594932556152, "learning_rate": 5.269313086017122e-05, "loss": 2.4924, "step": 55720 }, { "epoch": 3.786180187525479, "grad_norm": 2.557429790496826, "learning_rate": 5.268888435928795e-05, "loss": 2.5192, "step": 55725 }, { "epoch": 3.7865199075961407, "grad_norm": 3.1293816566467285, "learning_rate": 5.268463785840467e-05, "loss": 2.8721, "step": 55730 }, { "epoch": 3.7868596276668027, "grad_norm": 2.2253034114837646, "learning_rate": 5.2680391357521406e-05, "loss": 2.422, "step": 55735 }, { "epoch": 3.7871993477374644, "grad_norm": 2.723355770111084, "learning_rate": 5.2676144856638134e-05, "loss": 2.8806, "step": 55740 }, { "epoch": 3.787539067808126, "grad_norm": 2.5515224933624268, "learning_rate": 5.2671898355754855e-05, "loss": 2.7721, "step": 55745 }, { "epoch": 3.787878787878788, "grad_norm": 2.8662993907928467, "learning_rate": 5.266765185487159e-05, "loss": 2.5874, "step": 55750 }, { "epoch": 3.7882185079494497, "grad_norm": 2.5163445472717285, "learning_rate": 5.266340535398832e-05, "loss": 2.7594, "step": 55755 }, { "epoch": 3.7885582280201113, "grad_norm": 2.46321702003479, "learning_rate": 5.265915885310504e-05, "loss": 2.6406, "step": 55760 }, { "epoch": 3.7888979480907734, "grad_norm": 2.4558322429656982, "learning_rate": 5.2654912352221774e-05, "loss": 2.536, "step": 55765 }, { "epoch": 3.789237668161435, "grad_norm": 3.0818028450012207, "learning_rate": 5.26506658513385e-05, "loss": 2.879, "step": 55770 }, { "epoch": 3.7895773882320967, "grad_norm": 3.011068820953369, "learning_rate": 5.264641935045522e-05, "loss": 2.762, "step": 55775 }, { "epoch": 3.7899171083027587, "grad_norm": 3.2029330730438232, "learning_rate": 5.264217284957196e-05, "loss": 2.7969, "step": 55780 }, { "epoch": 3.7902568283734204, "grad_norm": 3.0033645629882812, "learning_rate": 5.263792634868868e-05, "loss": 2.8535, "step": 55785 }, { "epoch": 3.790596548444082, "grad_norm": 2.4692602157592773, "learning_rate": 5.263367984780541e-05, "loss": 2.5942, "step": 55790 }, { "epoch": 3.790936268514744, "grad_norm": 2.7422852516174316, "learning_rate": 5.262943334692214e-05, "loss": 2.5668, "step": 55795 }, { "epoch": 3.7912759885854057, "grad_norm": 3.3613076210021973, "learning_rate": 5.262518684603886e-05, "loss": 2.3524, "step": 55800 }, { "epoch": 3.7916157086560673, "grad_norm": 3.0965728759765625, "learning_rate": 5.262094034515559e-05, "loss": 2.6307, "step": 55805 }, { "epoch": 3.7919554287267294, "grad_norm": 2.681396722793579, "learning_rate": 5.2616693844272326e-05, "loss": 2.6378, "step": 55810 }, { "epoch": 3.792295148797391, "grad_norm": 2.892751932144165, "learning_rate": 5.261244734338905e-05, "loss": 2.5705, "step": 55815 }, { "epoch": 3.7926348688680527, "grad_norm": 2.6940786838531494, "learning_rate": 5.2608200842505775e-05, "loss": 2.9462, "step": 55820 }, { "epoch": 3.7929745889387148, "grad_norm": 3.392794132232666, "learning_rate": 5.260395434162251e-05, "loss": 2.595, "step": 55825 }, { "epoch": 3.7933143090093764, "grad_norm": 2.405003309249878, "learning_rate": 5.259970784073923e-05, "loss": 2.9004, "step": 55830 }, { "epoch": 3.793654029080038, "grad_norm": 3.343574047088623, "learning_rate": 5.259546133985596e-05, "loss": 2.6352, "step": 55835 }, { "epoch": 3.7939937491507, "grad_norm": 2.8969616889953613, "learning_rate": 5.2591214838972694e-05, "loss": 2.6597, "step": 55840 }, { "epoch": 3.7943334692213617, "grad_norm": 2.4584832191467285, "learning_rate": 5.2586968338089415e-05, "loss": 2.7038, "step": 55845 }, { "epoch": 3.7946731892920234, "grad_norm": 2.5499391555786133, "learning_rate": 5.2582721837206137e-05, "loss": 2.4809, "step": 55850 }, { "epoch": 3.795012909362685, "grad_norm": 3.318979024887085, "learning_rate": 5.257847533632287e-05, "loss": 2.6098, "step": 55855 }, { "epoch": 3.795352629433347, "grad_norm": 2.4156033992767334, "learning_rate": 5.25742288354396e-05, "loss": 2.8063, "step": 55860 }, { "epoch": 3.7956923495040087, "grad_norm": 3.7770767211914062, "learning_rate": 5.2569982334556334e-05, "loss": 2.5586, "step": 55865 }, { "epoch": 3.7960320695746703, "grad_norm": 3.4559409618377686, "learning_rate": 5.2565735833673055e-05, "loss": 2.741, "step": 55870 }, { "epoch": 3.7963717896453324, "grad_norm": 2.8841452598571777, "learning_rate": 5.256148933278978e-05, "loss": 2.4941, "step": 55875 }, { "epoch": 3.796711509715994, "grad_norm": 2.5378990173339844, "learning_rate": 5.255724283190652e-05, "loss": 2.7941, "step": 55880 }, { "epoch": 3.7970512297866557, "grad_norm": 2.7228550910949707, "learning_rate": 5.255299633102324e-05, "loss": 2.8295, "step": 55885 }, { "epoch": 3.7973909498573177, "grad_norm": 2.8503522872924805, "learning_rate": 5.254874983013997e-05, "loss": 2.752, "step": 55890 }, { "epoch": 3.7977306699279794, "grad_norm": 2.7842838764190674, "learning_rate": 5.25445033292567e-05, "loss": 2.6116, "step": 55895 }, { "epoch": 3.798070389998641, "grad_norm": 2.6441495418548584, "learning_rate": 5.254025682837342e-05, "loss": 2.7275, "step": 55900 }, { "epoch": 3.7984101100693026, "grad_norm": 3.0915520191192627, "learning_rate": 5.253601032749015e-05, "loss": 2.7492, "step": 55905 }, { "epoch": 3.7987498301399647, "grad_norm": 4.860446453094482, "learning_rate": 5.2531763826606886e-05, "loss": 2.3135, "step": 55910 }, { "epoch": 3.7990895502106263, "grad_norm": 2.343940258026123, "learning_rate": 5.252751732572361e-05, "loss": 2.5761, "step": 55915 }, { "epoch": 3.799429270281288, "grad_norm": 3.6780149936676025, "learning_rate": 5.252327082484033e-05, "loss": 2.7287, "step": 55920 }, { "epoch": 3.79976899035195, "grad_norm": 2.4913129806518555, "learning_rate": 5.2519024323957063e-05, "loss": 2.7669, "step": 55925 }, { "epoch": 3.8001087104226117, "grad_norm": 3.199601173400879, "learning_rate": 5.251477782307379e-05, "loss": 2.7958, "step": 55930 }, { "epoch": 3.8004484304932733, "grad_norm": 2.667104959487915, "learning_rate": 5.251053132219051e-05, "loss": 2.6709, "step": 55935 }, { "epoch": 3.8007881505639354, "grad_norm": 2.273455858230591, "learning_rate": 5.250628482130725e-05, "loss": 2.5854, "step": 55940 }, { "epoch": 3.801127870634597, "grad_norm": 2.809748649597168, "learning_rate": 5.2502038320423975e-05, "loss": 2.8484, "step": 55945 }, { "epoch": 3.8014675907052586, "grad_norm": 2.9569268226623535, "learning_rate": 5.24977918195407e-05, "loss": 2.6678, "step": 55950 }, { "epoch": 3.8018073107759207, "grad_norm": 3.181635618209839, "learning_rate": 5.249354531865743e-05, "loss": 2.3201, "step": 55955 }, { "epoch": 3.8021470308465823, "grad_norm": 2.559530019760132, "learning_rate": 5.248929881777416e-05, "loss": 2.588, "step": 55960 }, { "epoch": 3.802486750917244, "grad_norm": 2.943277359008789, "learning_rate": 5.248505231689088e-05, "loss": 2.7019, "step": 55965 }, { "epoch": 3.802826470987906, "grad_norm": 2.824355363845825, "learning_rate": 5.2480805816007615e-05, "loss": 2.9226, "step": 55970 }, { "epoch": 3.8031661910585677, "grad_norm": 3.656717300415039, "learning_rate": 5.2476559315124343e-05, "loss": 2.6118, "step": 55975 }, { "epoch": 3.8035059111292293, "grad_norm": 2.7854244709014893, "learning_rate": 5.2472312814241065e-05, "loss": 2.5217, "step": 55980 }, { "epoch": 3.8038456311998914, "grad_norm": 2.822964668273926, "learning_rate": 5.24680663133578e-05, "loss": 2.8266, "step": 55985 }, { "epoch": 3.804185351270553, "grad_norm": 3.6616833209991455, "learning_rate": 5.246381981247452e-05, "loss": 2.5687, "step": 55990 }, { "epoch": 3.8045250713412146, "grad_norm": 2.6607298851013184, "learning_rate": 5.245957331159125e-05, "loss": 2.732, "step": 55995 }, { "epoch": 3.8048647914118767, "grad_norm": 3.2751376628875732, "learning_rate": 5.2455326810707984e-05, "loss": 2.62, "step": 56000 }, { "epoch": 3.8052045114825384, "grad_norm": 3.0609471797943115, "learning_rate": 5.2451080309824705e-05, "loss": 2.4822, "step": 56005 }, { "epoch": 3.8055442315532, "grad_norm": 3.586829662322998, "learning_rate": 5.244683380894143e-05, "loss": 2.6152, "step": 56010 }, { "epoch": 3.805883951623862, "grad_norm": 2.4684624671936035, "learning_rate": 5.244258730805817e-05, "loss": 2.5693, "step": 56015 }, { "epoch": 3.8062236716945237, "grad_norm": 2.6271610260009766, "learning_rate": 5.243834080717489e-05, "loss": 2.688, "step": 56020 }, { "epoch": 3.8065633917651853, "grad_norm": 3.0429904460906982, "learning_rate": 5.243409430629162e-05, "loss": 2.6247, "step": 56025 }, { "epoch": 3.8069031118358474, "grad_norm": 3.003896951675415, "learning_rate": 5.242984780540835e-05, "loss": 2.7188, "step": 56030 }, { "epoch": 3.807242831906509, "grad_norm": 2.859788656234741, "learning_rate": 5.242560130452507e-05, "loss": 2.4568, "step": 56035 }, { "epoch": 3.8075825519771707, "grad_norm": 2.0217576026916504, "learning_rate": 5.24213548036418e-05, "loss": 2.8285, "step": 56040 }, { "epoch": 3.8079222720478327, "grad_norm": 2.8265633583068848, "learning_rate": 5.2417108302758536e-05, "loss": 2.5848, "step": 56045 }, { "epoch": 3.8082619921184944, "grad_norm": 3.9329254627227783, "learning_rate": 5.241286180187526e-05, "loss": 2.8014, "step": 56050 }, { "epoch": 3.808601712189156, "grad_norm": 3.3661301136016846, "learning_rate": 5.240861530099198e-05, "loss": 2.7736, "step": 56055 }, { "epoch": 3.808941432259818, "grad_norm": 2.4180753231048584, "learning_rate": 5.240436880010872e-05, "loss": 2.6465, "step": 56060 }, { "epoch": 3.8092811523304797, "grad_norm": 2.3953070640563965, "learning_rate": 5.240012229922544e-05, "loss": 2.7638, "step": 56065 }, { "epoch": 3.8096208724011413, "grad_norm": 3.8865163326263428, "learning_rate": 5.239587579834216e-05, "loss": 2.6671, "step": 56070 }, { "epoch": 3.8099605924718034, "grad_norm": 2.8913497924804688, "learning_rate": 5.23916292974589e-05, "loss": 2.6809, "step": 56075 }, { "epoch": 3.810300312542465, "grad_norm": 3.1466472148895264, "learning_rate": 5.2387382796575625e-05, "loss": 2.7226, "step": 56080 }, { "epoch": 3.8106400326131267, "grad_norm": 2.638930082321167, "learning_rate": 5.2383136295692346e-05, "loss": 2.5809, "step": 56085 }, { "epoch": 3.8109797526837887, "grad_norm": 3.431274652481079, "learning_rate": 5.237888979480908e-05, "loss": 2.591, "step": 56090 }, { "epoch": 3.8113194727544504, "grad_norm": 3.590620517730713, "learning_rate": 5.237464329392581e-05, "loss": 2.6654, "step": 56095 }, { "epoch": 3.811659192825112, "grad_norm": 2.5659096240997314, "learning_rate": 5.237039679304253e-05, "loss": 2.7391, "step": 56100 }, { "epoch": 3.811998912895774, "grad_norm": 3.1224803924560547, "learning_rate": 5.2366150292159265e-05, "loss": 2.6125, "step": 56105 }, { "epoch": 3.8123386329664357, "grad_norm": 2.650670289993286, "learning_rate": 5.236190379127599e-05, "loss": 2.7048, "step": 56110 }, { "epoch": 3.8126783530370973, "grad_norm": 2.8850224018096924, "learning_rate": 5.2357657290392714e-05, "loss": 2.9119, "step": 56115 }, { "epoch": 3.8130180731077594, "grad_norm": 2.6401257514953613, "learning_rate": 5.235341078950945e-05, "loss": 2.7619, "step": 56120 }, { "epoch": 3.813357793178421, "grad_norm": 2.639648675918579, "learning_rate": 5.234916428862617e-05, "loss": 2.7353, "step": 56125 }, { "epoch": 3.8136975132490827, "grad_norm": 3.596008062362671, "learning_rate": 5.23449177877429e-05, "loss": 2.5294, "step": 56130 }, { "epoch": 3.8140372333197448, "grad_norm": 3.031553268432617, "learning_rate": 5.234067128685963e-05, "loss": 2.5223, "step": 56135 }, { "epoch": 3.8143769533904064, "grad_norm": 3.4186642169952393, "learning_rate": 5.2336424785976354e-05, "loss": 2.5036, "step": 56140 }, { "epoch": 3.814716673461068, "grad_norm": 3.338253974914551, "learning_rate": 5.233217828509308e-05, "loss": 2.7328, "step": 56145 }, { "epoch": 3.81505639353173, "grad_norm": 2.8498051166534424, "learning_rate": 5.232793178420982e-05, "loss": 2.9289, "step": 56150 }, { "epoch": 3.8153961136023917, "grad_norm": 3.6998696327209473, "learning_rate": 5.232368528332654e-05, "loss": 2.6307, "step": 56155 }, { "epoch": 3.8157358336730534, "grad_norm": 3.8617048263549805, "learning_rate": 5.2319438782443266e-05, "loss": 2.4523, "step": 56160 }, { "epoch": 3.8160755537437154, "grad_norm": 3.6960384845733643, "learning_rate": 5.231519228156e-05, "loss": 2.711, "step": 56165 }, { "epoch": 3.816415273814377, "grad_norm": 3.018306255340576, "learning_rate": 5.231094578067672e-05, "loss": 2.5503, "step": 56170 }, { "epoch": 3.8167549938850387, "grad_norm": 2.3954687118530273, "learning_rate": 5.230669927979345e-05, "loss": 2.5941, "step": 56175 }, { "epoch": 3.8170947139557008, "grad_norm": 3.1553568840026855, "learning_rate": 5.2302452778910185e-05, "loss": 2.6791, "step": 56180 }, { "epoch": 3.8174344340263624, "grad_norm": 2.6465442180633545, "learning_rate": 5.2298206278026906e-05, "loss": 2.6565, "step": 56185 }, { "epoch": 3.817774154097024, "grad_norm": 2.998422861099243, "learning_rate": 5.229395977714363e-05, "loss": 2.8415, "step": 56190 }, { "epoch": 3.8181138741676857, "grad_norm": 3.1532185077667236, "learning_rate": 5.228971327626037e-05, "loss": 2.7644, "step": 56195 }, { "epoch": 3.8184535942383477, "grad_norm": 2.9911954402923584, "learning_rate": 5.228546677537709e-05, "loss": 2.8114, "step": 56200 }, { "epoch": 3.8187933143090094, "grad_norm": 3.054624557495117, "learning_rate": 5.2281220274493825e-05, "loss": 2.3858, "step": 56205 }, { "epoch": 3.819133034379671, "grad_norm": 2.709944486618042, "learning_rate": 5.2276973773610546e-05, "loss": 2.7862, "step": 56210 }, { "epoch": 3.819472754450333, "grad_norm": 2.8829331398010254, "learning_rate": 5.2272727272727274e-05, "loss": 2.6702, "step": 56215 }, { "epoch": 3.8198124745209947, "grad_norm": 3.645541191101074, "learning_rate": 5.226848077184401e-05, "loss": 2.8299, "step": 56220 }, { "epoch": 3.8201521945916563, "grad_norm": 2.7965087890625, "learning_rate": 5.226423427096073e-05, "loss": 2.6213, "step": 56225 }, { "epoch": 3.8204919146623184, "grad_norm": 2.3716444969177246, "learning_rate": 5.225998777007746e-05, "loss": 2.4623, "step": 56230 }, { "epoch": 3.82083163473298, "grad_norm": 2.394076108932495, "learning_rate": 5.225574126919419e-05, "loss": 2.842, "step": 56235 }, { "epoch": 3.8211713548036417, "grad_norm": 2.5317041873931885, "learning_rate": 5.2251494768310914e-05, "loss": 2.6478, "step": 56240 }, { "epoch": 3.8215110748743033, "grad_norm": 2.7771084308624268, "learning_rate": 5.224724826742764e-05, "loss": 2.6761, "step": 56245 }, { "epoch": 3.8218507949449654, "grad_norm": 3.0818865299224854, "learning_rate": 5.224300176654438e-05, "loss": 2.6193, "step": 56250 }, { "epoch": 3.822190515015627, "grad_norm": 3.306093454360962, "learning_rate": 5.22387552656611e-05, "loss": 2.8807, "step": 56255 }, { "epoch": 3.8225302350862886, "grad_norm": 2.7638914585113525, "learning_rate": 5.2234508764777826e-05, "loss": 2.5673, "step": 56260 }, { "epoch": 3.8228699551569507, "grad_norm": 2.373511552810669, "learning_rate": 5.223026226389456e-05, "loss": 2.573, "step": 56265 }, { "epoch": 3.8232096752276123, "grad_norm": 2.25152850151062, "learning_rate": 5.222601576301128e-05, "loss": 2.4066, "step": 56270 }, { "epoch": 3.823549395298274, "grad_norm": 2.500343084335327, "learning_rate": 5.2221769262128004e-05, "loss": 2.6784, "step": 56275 }, { "epoch": 3.823889115368936, "grad_norm": 3.296292304992676, "learning_rate": 5.221752276124474e-05, "loss": 2.7354, "step": 56280 }, { "epoch": 3.8242288354395977, "grad_norm": 2.9123294353485107, "learning_rate": 5.2213276260361466e-05, "loss": 2.7209, "step": 56285 }, { "epoch": 3.8245685555102593, "grad_norm": 2.828777551651001, "learning_rate": 5.220902975947819e-05, "loss": 2.6574, "step": 56290 }, { "epoch": 3.8249082755809214, "grad_norm": 2.782244920730591, "learning_rate": 5.220478325859492e-05, "loss": 2.6227, "step": 56295 }, { "epoch": 3.825247995651583, "grad_norm": 2.507490396499634, "learning_rate": 5.220053675771165e-05, "loss": 2.7511, "step": 56300 }, { "epoch": 3.8255877157222447, "grad_norm": 2.6617133617401123, "learning_rate": 5.219629025682837e-05, "loss": 2.7208, "step": 56305 }, { "epoch": 3.8259274357929067, "grad_norm": 3.451181173324585, "learning_rate": 5.2192043755945106e-05, "loss": 2.7875, "step": 56310 }, { "epoch": 3.8262671558635684, "grad_norm": 2.3146305084228516, "learning_rate": 5.2187797255061834e-05, "loss": 2.7662, "step": 56315 }, { "epoch": 3.82660687593423, "grad_norm": 2.541814088821411, "learning_rate": 5.2183550754178556e-05, "loss": 2.3794, "step": 56320 }, { "epoch": 3.826946596004892, "grad_norm": 2.955191135406494, "learning_rate": 5.217930425329529e-05, "loss": 2.686, "step": 56325 }, { "epoch": 3.8272863160755537, "grad_norm": 2.4664506912231445, "learning_rate": 5.217505775241202e-05, "loss": 2.5422, "step": 56330 }, { "epoch": 3.8276260361462153, "grad_norm": 3.597324848175049, "learning_rate": 5.217081125152874e-05, "loss": 2.75, "step": 56335 }, { "epoch": 3.8279657562168774, "grad_norm": 2.500138759613037, "learning_rate": 5.2166564750645475e-05, "loss": 2.7711, "step": 56340 }, { "epoch": 3.828305476287539, "grad_norm": 3.226724624633789, "learning_rate": 5.2162318249762196e-05, "loss": 2.6056, "step": 56345 }, { "epoch": 3.8286451963582007, "grad_norm": 3.1762542724609375, "learning_rate": 5.2158071748878924e-05, "loss": 2.7366, "step": 56350 }, { "epoch": 3.8289849164288627, "grad_norm": 3.9958152770996094, "learning_rate": 5.215382524799566e-05, "loss": 2.4322, "step": 56355 }, { "epoch": 3.8293246364995244, "grad_norm": 3.200662612915039, "learning_rate": 5.214957874711238e-05, "loss": 2.6591, "step": 56360 }, { "epoch": 3.829664356570186, "grad_norm": 3.4982075691223145, "learning_rate": 5.214533224622911e-05, "loss": 2.6438, "step": 56365 }, { "epoch": 3.830004076640848, "grad_norm": 2.4793832302093506, "learning_rate": 5.214108574534584e-05, "loss": 2.6172, "step": 56370 }, { "epoch": 3.8303437967115097, "grad_norm": 4.1612772941589355, "learning_rate": 5.2136839244462564e-05, "loss": 2.6318, "step": 56375 }, { "epoch": 3.8306835167821713, "grad_norm": 3.1245369911193848, "learning_rate": 5.213259274357929e-05, "loss": 2.5524, "step": 56380 }, { "epoch": 3.8310232368528334, "grad_norm": 2.611635684967041, "learning_rate": 5.2128346242696027e-05, "loss": 2.5954, "step": 56385 }, { "epoch": 3.831362956923495, "grad_norm": 3.5742058753967285, "learning_rate": 5.212409974181275e-05, "loss": 2.5821, "step": 56390 }, { "epoch": 3.8317026769941567, "grad_norm": 4.339736461639404, "learning_rate": 5.2119853240929476e-05, "loss": 2.6601, "step": 56395 }, { "epoch": 3.8320423970648188, "grad_norm": 2.465395212173462, "learning_rate": 5.211560674004621e-05, "loss": 2.6663, "step": 56400 }, { "epoch": 3.8323821171354804, "grad_norm": 3.180126905441284, "learning_rate": 5.211136023916293e-05, "loss": 2.7677, "step": 56405 }, { "epoch": 3.832721837206142, "grad_norm": 2.857855796813965, "learning_rate": 5.210711373827965e-05, "loss": 2.7075, "step": 56410 }, { "epoch": 3.833061557276804, "grad_norm": 3.082148313522339, "learning_rate": 5.210286723739639e-05, "loss": 2.5711, "step": 56415 }, { "epoch": 3.8334012773474657, "grad_norm": 3.0116982460021973, "learning_rate": 5.2098620736513116e-05, "loss": 2.4701, "step": 56420 }, { "epoch": 3.8337409974181273, "grad_norm": 3.0021612644195557, "learning_rate": 5.209437423562984e-05, "loss": 2.4869, "step": 56425 }, { "epoch": 3.8340807174887894, "grad_norm": 3.014232635498047, "learning_rate": 5.209012773474657e-05, "loss": 2.5341, "step": 56430 }, { "epoch": 3.834420437559451, "grad_norm": 2.915961980819702, "learning_rate": 5.20858812338633e-05, "loss": 2.6044, "step": 56435 }, { "epoch": 3.8347601576301127, "grad_norm": 2.8588685989379883, "learning_rate": 5.208163473298002e-05, "loss": 2.5533, "step": 56440 }, { "epoch": 3.8350998777007748, "grad_norm": 2.7237155437469482, "learning_rate": 5.2077388232096756e-05, "loss": 2.672, "step": 56445 }, { "epoch": 3.8354395977714364, "grad_norm": 3.5394535064697266, "learning_rate": 5.2073141731213484e-05, "loss": 2.6747, "step": 56450 }, { "epoch": 3.835779317842098, "grad_norm": 2.6633338928222656, "learning_rate": 5.2068895230330205e-05, "loss": 2.8055, "step": 56455 }, { "epoch": 3.83611903791276, "grad_norm": 2.6426126956939697, "learning_rate": 5.206464872944694e-05, "loss": 2.6594, "step": 56460 }, { "epoch": 3.8364587579834217, "grad_norm": 3.108307123184204, "learning_rate": 5.206040222856367e-05, "loss": 2.6415, "step": 56465 }, { "epoch": 3.8367984780540834, "grad_norm": 2.7249152660369873, "learning_rate": 5.205615572768039e-05, "loss": 2.6122, "step": 56470 }, { "epoch": 3.8371381981247454, "grad_norm": 2.9893736839294434, "learning_rate": 5.2051909226797124e-05, "loss": 2.8274, "step": 56475 }, { "epoch": 3.837477918195407, "grad_norm": 3.107015609741211, "learning_rate": 5.2047662725913845e-05, "loss": 2.6973, "step": 56480 }, { "epoch": 3.8378176382660687, "grad_norm": 2.9264719486236572, "learning_rate": 5.204341622503057e-05, "loss": 2.8568, "step": 56485 }, { "epoch": 3.8381573583367308, "grad_norm": 3.3839046955108643, "learning_rate": 5.203916972414731e-05, "loss": 2.662, "step": 56490 }, { "epoch": 3.8384970784073924, "grad_norm": 2.474233865737915, "learning_rate": 5.203492322326403e-05, "loss": 2.9305, "step": 56495 }, { "epoch": 3.838836798478054, "grad_norm": 2.950976848602295, "learning_rate": 5.203067672238076e-05, "loss": 2.5218, "step": 56500 }, { "epoch": 3.839176518548716, "grad_norm": 2.670625925064087, "learning_rate": 5.202643022149749e-05, "loss": 2.637, "step": 56505 }, { "epoch": 3.8395162386193777, "grad_norm": 2.9923887252807617, "learning_rate": 5.202218372061421e-05, "loss": 2.8743, "step": 56510 }, { "epoch": 3.8398559586900394, "grad_norm": 3.0059165954589844, "learning_rate": 5.201793721973094e-05, "loss": 2.711, "step": 56515 }, { "epoch": 3.8401956787607014, "grad_norm": 3.541130304336548, "learning_rate": 5.2013690718847676e-05, "loss": 2.8189, "step": 56520 }, { "epoch": 3.840535398831363, "grad_norm": 2.330437183380127, "learning_rate": 5.20094442179644e-05, "loss": 2.4967, "step": 56525 }, { "epoch": 3.8408751189020247, "grad_norm": 2.7980024814605713, "learning_rate": 5.2005197717081125e-05, "loss": 2.7703, "step": 56530 }, { "epoch": 3.8412148389726863, "grad_norm": 3.2092278003692627, "learning_rate": 5.200095121619786e-05, "loss": 2.4688, "step": 56535 }, { "epoch": 3.8415545590433484, "grad_norm": 3.2342097759246826, "learning_rate": 5.199670471531458e-05, "loss": 2.8677, "step": 56540 }, { "epoch": 3.84189427911401, "grad_norm": 3.679749011993408, "learning_rate": 5.1992458214431316e-05, "loss": 2.6422, "step": 56545 }, { "epoch": 3.8422339991846717, "grad_norm": 2.812077045440674, "learning_rate": 5.198821171354804e-05, "loss": 2.8083, "step": 56550 }, { "epoch": 3.8425737192553338, "grad_norm": 3.533416986465454, "learning_rate": 5.1983965212664765e-05, "loss": 2.8453, "step": 56555 }, { "epoch": 3.8429134393259954, "grad_norm": 2.998565435409546, "learning_rate": 5.19797187117815e-05, "loss": 2.6478, "step": 56560 }, { "epoch": 3.843253159396657, "grad_norm": 2.7340621948242188, "learning_rate": 5.197547221089822e-05, "loss": 2.5131, "step": 56565 }, { "epoch": 3.843592879467319, "grad_norm": 3.7765398025512695, "learning_rate": 5.197122571001495e-05, "loss": 2.6867, "step": 56570 }, { "epoch": 3.8439325995379807, "grad_norm": 3.3054018020629883, "learning_rate": 5.1966979209131684e-05, "loss": 2.5298, "step": 56575 }, { "epoch": 3.8442723196086424, "grad_norm": 2.2288551330566406, "learning_rate": 5.1962732708248405e-05, "loss": 2.732, "step": 56580 }, { "epoch": 3.844612039679304, "grad_norm": 3.9374241828918457, "learning_rate": 5.195848620736513e-05, "loss": 2.8715, "step": 56585 }, { "epoch": 3.844951759749966, "grad_norm": 2.6070587635040283, "learning_rate": 5.195423970648187e-05, "loss": 2.5908, "step": 56590 }, { "epoch": 3.8452914798206277, "grad_norm": 2.554860830307007, "learning_rate": 5.194999320559859e-05, "loss": 2.5233, "step": 56595 }, { "epoch": 3.8456311998912893, "grad_norm": 3.032909393310547, "learning_rate": 5.194574670471532e-05, "loss": 2.8867, "step": 56600 }, { "epoch": 3.8459709199619514, "grad_norm": 2.6710658073425293, "learning_rate": 5.194150020383205e-05, "loss": 2.5784, "step": 56605 }, { "epoch": 3.846310640032613, "grad_norm": 2.5736896991729736, "learning_rate": 5.193725370294877e-05, "loss": 2.6532, "step": 56610 }, { "epoch": 3.8466503601032747, "grad_norm": 3.3925211429595947, "learning_rate": 5.1933007202065495e-05, "loss": 2.8247, "step": 56615 }, { "epoch": 3.8469900801739367, "grad_norm": 2.7261600494384766, "learning_rate": 5.1928760701182236e-05, "loss": 2.5762, "step": 56620 }, { "epoch": 3.8473298002445984, "grad_norm": 2.684049367904663, "learning_rate": 5.192451420029896e-05, "loss": 2.6459, "step": 56625 }, { "epoch": 3.84766952031526, "grad_norm": 3.5803146362304688, "learning_rate": 5.192026769941568e-05, "loss": 2.7709, "step": 56630 }, { "epoch": 3.848009240385922, "grad_norm": 3.0711007118225098, "learning_rate": 5.1916021198532413e-05, "loss": 2.9689, "step": 56635 }, { "epoch": 3.8483489604565837, "grad_norm": 2.370063543319702, "learning_rate": 5.191177469764914e-05, "loss": 2.5856, "step": 56640 }, { "epoch": 3.8486886805272453, "grad_norm": 2.264859199523926, "learning_rate": 5.190752819676586e-05, "loss": 2.838, "step": 56645 }, { "epoch": 3.8490284005979074, "grad_norm": 2.37310528755188, "learning_rate": 5.19032816958826e-05, "loss": 2.6225, "step": 56650 }, { "epoch": 3.849368120668569, "grad_norm": 2.955050468444824, "learning_rate": 5.1899035194999325e-05, "loss": 2.6213, "step": 56655 }, { "epoch": 3.8497078407392307, "grad_norm": 3.035383462905884, "learning_rate": 5.189478869411605e-05, "loss": 2.8507, "step": 56660 }, { "epoch": 3.8500475608098927, "grad_norm": 3.321685552597046, "learning_rate": 5.189054219323278e-05, "loss": 2.6259, "step": 56665 }, { "epoch": 3.8503872808805544, "grad_norm": 2.4077696800231934, "learning_rate": 5.188629569234951e-05, "loss": 2.6426, "step": 56670 }, { "epoch": 3.850727000951216, "grad_norm": 2.521333694458008, "learning_rate": 5.188204919146623e-05, "loss": 2.6583, "step": 56675 }, { "epoch": 3.851066721021878, "grad_norm": 3.9249582290649414, "learning_rate": 5.1877802690582965e-05, "loss": 2.613, "step": 56680 }, { "epoch": 3.8514064410925397, "grad_norm": 3.178511619567871, "learning_rate": 5.1873556189699693e-05, "loss": 2.4094, "step": 56685 }, { "epoch": 3.8517461611632013, "grad_norm": 2.9206433296203613, "learning_rate": 5.1869309688816415e-05, "loss": 2.4983, "step": 56690 }, { "epoch": 3.8520858812338634, "grad_norm": 3.3349297046661377, "learning_rate": 5.186506318793315e-05, "loss": 2.8629, "step": 56695 }, { "epoch": 3.852425601304525, "grad_norm": 3.1550638675689697, "learning_rate": 5.186081668704987e-05, "loss": 2.4911, "step": 56700 }, { "epoch": 3.8527653213751867, "grad_norm": 2.879500389099121, "learning_rate": 5.18565701861666e-05, "loss": 2.4032, "step": 56705 }, { "epoch": 3.8531050414458488, "grad_norm": 2.5453975200653076, "learning_rate": 5.1852323685283334e-05, "loss": 2.6833, "step": 56710 }, { "epoch": 3.8534447615165104, "grad_norm": 2.911942720413208, "learning_rate": 5.1848077184400055e-05, "loss": 2.634, "step": 56715 }, { "epoch": 3.853784481587172, "grad_norm": 2.227163076400757, "learning_rate": 5.184383068351678e-05, "loss": 2.6941, "step": 56720 }, { "epoch": 3.854124201657834, "grad_norm": 2.7551047801971436, "learning_rate": 5.183958418263352e-05, "loss": 2.7083, "step": 56725 }, { "epoch": 3.8544639217284957, "grad_norm": 2.6865713596343994, "learning_rate": 5.183533768175024e-05, "loss": 2.6303, "step": 56730 }, { "epoch": 3.8548036417991574, "grad_norm": 2.547424077987671, "learning_rate": 5.183109118086697e-05, "loss": 2.6307, "step": 56735 }, { "epoch": 3.8551433618698194, "grad_norm": 3.101421594619751, "learning_rate": 5.18268446799837e-05, "loss": 2.7388, "step": 56740 }, { "epoch": 3.855483081940481, "grad_norm": 2.7342023849487305, "learning_rate": 5.182259817910042e-05, "loss": 2.5301, "step": 56745 }, { "epoch": 3.8558228020111427, "grad_norm": 2.537348508834839, "learning_rate": 5.181835167821715e-05, "loss": 2.7301, "step": 56750 }, { "epoch": 3.8561625220818048, "grad_norm": 2.6104860305786133, "learning_rate": 5.1814105177333886e-05, "loss": 2.5869, "step": 56755 }, { "epoch": 3.8565022421524664, "grad_norm": 2.5943777561187744, "learning_rate": 5.180985867645061e-05, "loss": 2.6868, "step": 56760 }, { "epoch": 3.856841962223128, "grad_norm": 2.2841312885284424, "learning_rate": 5.180561217556733e-05, "loss": 2.5744, "step": 56765 }, { "epoch": 3.85718168229379, "grad_norm": 3.8459765911102295, "learning_rate": 5.180136567468406e-05, "loss": 2.7154, "step": 56770 }, { "epoch": 3.8575214023644517, "grad_norm": 2.083698272705078, "learning_rate": 5.179711917380079e-05, "loss": 2.7004, "step": 56775 }, { "epoch": 3.8578611224351134, "grad_norm": 2.944331169128418, "learning_rate": 5.179287267291751e-05, "loss": 2.5696, "step": 56780 }, { "epoch": 3.8582008425057754, "grad_norm": 2.6615090370178223, "learning_rate": 5.178862617203425e-05, "loss": 2.6105, "step": 56785 }, { "epoch": 3.858540562576437, "grad_norm": 3.154672145843506, "learning_rate": 5.1784379671150975e-05, "loss": 2.4781, "step": 56790 }, { "epoch": 3.8588802826470987, "grad_norm": 2.5939717292785645, "learning_rate": 5.1780133170267696e-05, "loss": 2.7819, "step": 56795 }, { "epoch": 3.8592200027177608, "grad_norm": 3.0485751628875732, "learning_rate": 5.177588666938443e-05, "loss": 2.4806, "step": 56800 }, { "epoch": 3.8595597227884224, "grad_norm": 3.4071714878082275, "learning_rate": 5.177164016850116e-05, "loss": 2.5857, "step": 56805 }, { "epoch": 3.859899442859084, "grad_norm": 2.700669288635254, "learning_rate": 5.176739366761788e-05, "loss": 2.7455, "step": 56810 }, { "epoch": 3.860239162929746, "grad_norm": 3.090440273284912, "learning_rate": 5.1763147166734615e-05, "loss": 2.6011, "step": 56815 }, { "epoch": 3.8605788830004077, "grad_norm": 2.664705991744995, "learning_rate": 5.175890066585134e-05, "loss": 2.7867, "step": 56820 }, { "epoch": 3.8609186030710694, "grad_norm": 3.0144431591033936, "learning_rate": 5.1754654164968064e-05, "loss": 2.6615, "step": 56825 }, { "epoch": 3.8612583231417315, "grad_norm": 2.419977903366089, "learning_rate": 5.17504076640848e-05, "loss": 2.6046, "step": 56830 }, { "epoch": 3.861598043212393, "grad_norm": 2.6569929122924805, "learning_rate": 5.174616116320152e-05, "loss": 2.5155, "step": 56835 }, { "epoch": 3.8619377632830547, "grad_norm": 2.7923924922943115, "learning_rate": 5.174191466231825e-05, "loss": 2.6225, "step": 56840 }, { "epoch": 3.862277483353717, "grad_norm": 2.4641332626342773, "learning_rate": 5.173766816143498e-05, "loss": 2.5673, "step": 56845 }, { "epoch": 3.8626172034243784, "grad_norm": 3.149980068206787, "learning_rate": 5.1733421660551704e-05, "loss": 2.4698, "step": 56850 }, { "epoch": 3.86295692349504, "grad_norm": 2.981867790222168, "learning_rate": 5.172917515966843e-05, "loss": 2.7438, "step": 56855 }, { "epoch": 3.863296643565702, "grad_norm": 2.3471217155456543, "learning_rate": 5.172492865878517e-05, "loss": 2.5126, "step": 56860 }, { "epoch": 3.8636363636363638, "grad_norm": 2.927110433578491, "learning_rate": 5.172068215790189e-05, "loss": 2.4378, "step": 56865 }, { "epoch": 3.8639760837070254, "grad_norm": 2.8269565105438232, "learning_rate": 5.1716435657018616e-05, "loss": 2.728, "step": 56870 }, { "epoch": 3.864315803777687, "grad_norm": 3.32924222946167, "learning_rate": 5.171218915613535e-05, "loss": 2.9766, "step": 56875 }, { "epoch": 3.864655523848349, "grad_norm": 2.5336427688598633, "learning_rate": 5.170794265525207e-05, "loss": 2.734, "step": 56880 }, { "epoch": 3.8649952439190107, "grad_norm": 3.113532543182373, "learning_rate": 5.170369615436881e-05, "loss": 2.5176, "step": 56885 }, { "epoch": 3.8653349639896724, "grad_norm": 2.6173338890075684, "learning_rate": 5.1699449653485535e-05, "loss": 2.7062, "step": 56890 }, { "epoch": 3.8656746840603344, "grad_norm": 2.362431287765503, "learning_rate": 5.1695203152602256e-05, "loss": 2.5671, "step": 56895 }, { "epoch": 3.866014404130996, "grad_norm": 11.315655708312988, "learning_rate": 5.169095665171899e-05, "loss": 2.6638, "step": 56900 }, { "epoch": 3.8663541242016577, "grad_norm": 2.566910743713379, "learning_rate": 5.168671015083571e-05, "loss": 2.5468, "step": 56905 }, { "epoch": 3.8666938442723198, "grad_norm": 3.487967014312744, "learning_rate": 5.168246364995244e-05, "loss": 2.9285, "step": 56910 }, { "epoch": 3.8670335643429814, "grad_norm": 2.438947916030884, "learning_rate": 5.1678217149069175e-05, "loss": 2.6015, "step": 56915 }, { "epoch": 3.867373284413643, "grad_norm": 2.7607555389404297, "learning_rate": 5.1673970648185896e-05, "loss": 2.7363, "step": 56920 }, { "epoch": 3.8677130044843047, "grad_norm": 2.308427333831787, "learning_rate": 5.1669724147302624e-05, "loss": 2.5163, "step": 56925 }, { "epoch": 3.8680527245549667, "grad_norm": 3.2143988609313965, "learning_rate": 5.166547764641936e-05, "loss": 2.7176, "step": 56930 }, { "epoch": 3.8683924446256284, "grad_norm": 3.187291145324707, "learning_rate": 5.166123114553608e-05, "loss": 2.4064, "step": 56935 }, { "epoch": 3.86873216469629, "grad_norm": 3.5352859497070312, "learning_rate": 5.165698464465281e-05, "loss": 2.601, "step": 56940 }, { "epoch": 3.869071884766952, "grad_norm": 3.0915355682373047, "learning_rate": 5.165273814376954e-05, "loss": 2.9215, "step": 56945 }, { "epoch": 3.8694116048376137, "grad_norm": 2.7492570877075195, "learning_rate": 5.1648491642886264e-05, "loss": 2.6014, "step": 56950 }, { "epoch": 3.8697513249082753, "grad_norm": 2.9409282207489014, "learning_rate": 5.164424514200299e-05, "loss": 2.7068, "step": 56955 }, { "epoch": 3.8700910449789374, "grad_norm": 3.349717617034912, "learning_rate": 5.163999864111973e-05, "loss": 2.757, "step": 56960 }, { "epoch": 3.870430765049599, "grad_norm": 2.7608063220977783, "learning_rate": 5.163575214023645e-05, "loss": 2.8654, "step": 56965 }, { "epoch": 3.8707704851202607, "grad_norm": 3.2742090225219727, "learning_rate": 5.163150563935317e-05, "loss": 2.7137, "step": 56970 }, { "epoch": 3.8711102051909227, "grad_norm": 2.9967005252838135, "learning_rate": 5.162725913846991e-05, "loss": 2.4584, "step": 56975 }, { "epoch": 3.8714499252615844, "grad_norm": 2.4887173175811768, "learning_rate": 5.162301263758663e-05, "loss": 2.7096, "step": 56980 }, { "epoch": 3.871789645332246, "grad_norm": 2.571650981903076, "learning_rate": 5.1618766136703354e-05, "loss": 2.1943, "step": 56985 }, { "epoch": 3.872129365402908, "grad_norm": 3.7537314891815186, "learning_rate": 5.161451963582009e-05, "loss": 2.6953, "step": 56990 }, { "epoch": 3.8724690854735697, "grad_norm": 2.870532989501953, "learning_rate": 5.1610273134936816e-05, "loss": 2.5858, "step": 56995 }, { "epoch": 3.8728088055442313, "grad_norm": 2.7051942348480225, "learning_rate": 5.160602663405354e-05, "loss": 2.714, "step": 57000 }, { "epoch": 3.8731485256148934, "grad_norm": 2.5362401008605957, "learning_rate": 5.160178013317027e-05, "loss": 2.5623, "step": 57005 }, { "epoch": 3.873488245685555, "grad_norm": 3.2777321338653564, "learning_rate": 5.1597533632287e-05, "loss": 2.52, "step": 57010 }, { "epoch": 3.8738279657562167, "grad_norm": 2.80192232131958, "learning_rate": 5.159328713140372e-05, "loss": 2.7683, "step": 57015 }, { "epoch": 3.8741676858268788, "grad_norm": 2.5260798931121826, "learning_rate": 5.1589040630520456e-05, "loss": 2.6816, "step": 57020 }, { "epoch": 3.8745074058975404, "grad_norm": 3.080578088760376, "learning_rate": 5.1584794129637184e-05, "loss": 2.6682, "step": 57025 }, { "epoch": 3.874847125968202, "grad_norm": 2.8846914768218994, "learning_rate": 5.1580547628753906e-05, "loss": 2.498, "step": 57030 }, { "epoch": 3.875186846038864, "grad_norm": 2.8969340324401855, "learning_rate": 5.157630112787064e-05, "loss": 2.5073, "step": 57035 }, { "epoch": 3.8755265661095257, "grad_norm": 2.3065099716186523, "learning_rate": 5.157205462698736e-05, "loss": 2.5523, "step": 57040 }, { "epoch": 3.8758662861801874, "grad_norm": 2.70666241645813, "learning_rate": 5.156780812610409e-05, "loss": 2.5945, "step": 57045 }, { "epoch": 3.8762060062508494, "grad_norm": 3.3395001888275146, "learning_rate": 5.1563561625220824e-05, "loss": 2.6332, "step": 57050 }, { "epoch": 3.876545726321511, "grad_norm": 2.896921157836914, "learning_rate": 5.1559315124337546e-05, "loss": 2.474, "step": 57055 }, { "epoch": 3.8768854463921727, "grad_norm": 2.7025537490844727, "learning_rate": 5.1555068623454274e-05, "loss": 2.7308, "step": 57060 }, { "epoch": 3.8772251664628348, "grad_norm": 3.112837314605713, "learning_rate": 5.155082212257101e-05, "loss": 2.4616, "step": 57065 }, { "epoch": 3.8775648865334964, "grad_norm": 2.9802846908569336, "learning_rate": 5.154657562168773e-05, "loss": 2.5599, "step": 57070 }, { "epoch": 3.877904606604158, "grad_norm": 2.631535768508911, "learning_rate": 5.154232912080446e-05, "loss": 2.7969, "step": 57075 }, { "epoch": 3.87824432667482, "grad_norm": 2.8278470039367676, "learning_rate": 5.153808261992119e-05, "loss": 2.4685, "step": 57080 }, { "epoch": 3.8785840467454817, "grad_norm": 2.639024019241333, "learning_rate": 5.1533836119037914e-05, "loss": 3.0386, "step": 57085 }, { "epoch": 3.8789237668161434, "grad_norm": 3.5089941024780273, "learning_rate": 5.152958961815464e-05, "loss": 2.8916, "step": 57090 }, { "epoch": 3.8792634868868054, "grad_norm": 2.91690731048584, "learning_rate": 5.1525343117271377e-05, "loss": 2.6765, "step": 57095 }, { "epoch": 3.879603206957467, "grad_norm": 2.586881637573242, "learning_rate": 5.15210966163881e-05, "loss": 2.5586, "step": 57100 }, { "epoch": 3.8799429270281287, "grad_norm": 3.258009910583496, "learning_rate": 5.151685011550482e-05, "loss": 2.5939, "step": 57105 }, { "epoch": 3.880282647098791, "grad_norm": 2.8492431640625, "learning_rate": 5.151260361462156e-05, "loss": 2.6791, "step": 57110 }, { "epoch": 3.8806223671694524, "grad_norm": 2.2476460933685303, "learning_rate": 5.150835711373828e-05, "loss": 2.6679, "step": 57115 }, { "epoch": 3.880962087240114, "grad_norm": 2.8709981441497803, "learning_rate": 5.1504110612855e-05, "loss": 2.6672, "step": 57120 }, { "epoch": 3.881301807310776, "grad_norm": 2.5953474044799805, "learning_rate": 5.149986411197174e-05, "loss": 2.7369, "step": 57125 }, { "epoch": 3.8816415273814378, "grad_norm": 2.695953369140625, "learning_rate": 5.1495617611088466e-05, "loss": 2.7986, "step": 57130 }, { "epoch": 3.8819812474520994, "grad_norm": 2.8774361610412598, "learning_rate": 5.149137111020519e-05, "loss": 2.6428, "step": 57135 }, { "epoch": 3.8823209675227615, "grad_norm": 2.9498705863952637, "learning_rate": 5.148712460932192e-05, "loss": 2.6745, "step": 57140 }, { "epoch": 3.882660687593423, "grad_norm": 2.6238396167755127, "learning_rate": 5.148287810843865e-05, "loss": 2.6943, "step": 57145 }, { "epoch": 3.8830004076640847, "grad_norm": 3.0606577396392822, "learning_rate": 5.147863160755537e-05, "loss": 2.5627, "step": 57150 }, { "epoch": 3.883340127734747, "grad_norm": 3.072925567626953, "learning_rate": 5.1474385106672106e-05, "loss": 2.5789, "step": 57155 }, { "epoch": 3.8836798478054084, "grad_norm": 3.1097633838653564, "learning_rate": 5.1470138605788834e-05, "loss": 3.0725, "step": 57160 }, { "epoch": 3.88401956787607, "grad_norm": 2.8554205894470215, "learning_rate": 5.1465892104905555e-05, "loss": 2.7128, "step": 57165 }, { "epoch": 3.884359287946732, "grad_norm": 2.6414451599121094, "learning_rate": 5.146164560402229e-05, "loss": 2.8055, "step": 57170 }, { "epoch": 3.8846990080173938, "grad_norm": 3.2435009479522705, "learning_rate": 5.145739910313902e-05, "loss": 2.5392, "step": 57175 }, { "epoch": 3.8850387280880554, "grad_norm": 3.007258653640747, "learning_rate": 5.145315260225574e-05, "loss": 2.4215, "step": 57180 }, { "epoch": 3.8853784481587175, "grad_norm": 2.4634416103363037, "learning_rate": 5.1448906101372474e-05, "loss": 2.6393, "step": 57185 }, { "epoch": 3.885718168229379, "grad_norm": 2.957742214202881, "learning_rate": 5.1444659600489195e-05, "loss": 2.7172, "step": 57190 }, { "epoch": 3.8860578883000407, "grad_norm": 2.322429656982422, "learning_rate": 5.144041309960592e-05, "loss": 2.7194, "step": 57195 }, { "epoch": 3.886397608370703, "grad_norm": 2.2565505504608154, "learning_rate": 5.143616659872266e-05, "loss": 2.466, "step": 57200 }, { "epoch": 3.8867373284413644, "grad_norm": 2.6984500885009766, "learning_rate": 5.143192009783938e-05, "loss": 2.7929, "step": 57205 }, { "epoch": 3.887077048512026, "grad_norm": 2.3908379077911377, "learning_rate": 5.142767359695611e-05, "loss": 2.7753, "step": 57210 }, { "epoch": 3.887416768582688, "grad_norm": 4.6714091300964355, "learning_rate": 5.142342709607284e-05, "loss": 2.8407, "step": 57215 }, { "epoch": 3.8877564886533498, "grad_norm": 2.705406665802002, "learning_rate": 5.141918059518956e-05, "loss": 2.6967, "step": 57220 }, { "epoch": 3.8880962087240114, "grad_norm": 3.2333452701568604, "learning_rate": 5.14149340943063e-05, "loss": 2.6058, "step": 57225 }, { "epoch": 3.888435928794673, "grad_norm": 2.9705708026885986, "learning_rate": 5.1410687593423026e-05, "loss": 2.4276, "step": 57230 }, { "epoch": 3.888775648865335, "grad_norm": 3.0882694721221924, "learning_rate": 5.140644109253975e-05, "loss": 2.7124, "step": 57235 }, { "epoch": 3.8891153689359967, "grad_norm": 2.2528645992279053, "learning_rate": 5.140219459165648e-05, "loss": 2.4972, "step": 57240 }, { "epoch": 3.8894550890066584, "grad_norm": 2.482088088989258, "learning_rate": 5.139794809077321e-05, "loss": 2.5647, "step": 57245 }, { "epoch": 3.8897948090773204, "grad_norm": 2.566974639892578, "learning_rate": 5.139370158988993e-05, "loss": 2.5764, "step": 57250 }, { "epoch": 3.890134529147982, "grad_norm": 2.556541919708252, "learning_rate": 5.1389455089006666e-05, "loss": 2.5282, "step": 57255 }, { "epoch": 3.8904742492186437, "grad_norm": 3.121629238128662, "learning_rate": 5.138520858812339e-05, "loss": 2.535, "step": 57260 }, { "epoch": 3.8908139692893053, "grad_norm": 2.954664707183838, "learning_rate": 5.1380962087240115e-05, "loss": 2.7888, "step": 57265 }, { "epoch": 3.8911536893599674, "grad_norm": 3.29011607170105, "learning_rate": 5.137671558635685e-05, "loss": 2.4167, "step": 57270 }, { "epoch": 3.891493409430629, "grad_norm": 3.223984956741333, "learning_rate": 5.137246908547357e-05, "loss": 2.6805, "step": 57275 }, { "epoch": 3.8918331295012907, "grad_norm": 2.9513955116271973, "learning_rate": 5.13682225845903e-05, "loss": 2.4679, "step": 57280 }, { "epoch": 3.8921728495719528, "grad_norm": 2.7964305877685547, "learning_rate": 5.1363976083707034e-05, "loss": 2.7168, "step": 57285 }, { "epoch": 3.8925125696426144, "grad_norm": 3.0507638454437256, "learning_rate": 5.1359729582823755e-05, "loss": 2.655, "step": 57290 }, { "epoch": 3.892852289713276, "grad_norm": 2.760481357574463, "learning_rate": 5.135548308194048e-05, "loss": 2.5317, "step": 57295 }, { "epoch": 3.893192009783938, "grad_norm": 3.0945229530334473, "learning_rate": 5.135123658105722e-05, "loss": 2.4209, "step": 57300 }, { "epoch": 3.8935317298545997, "grad_norm": 3.2787132263183594, "learning_rate": 5.134699008017394e-05, "loss": 2.7483, "step": 57305 }, { "epoch": 3.8938714499252614, "grad_norm": 2.9250307083129883, "learning_rate": 5.134274357929067e-05, "loss": 2.6009, "step": 57310 }, { "epoch": 3.8942111699959234, "grad_norm": 3.497755527496338, "learning_rate": 5.13384970784074e-05, "loss": 2.7029, "step": 57315 }, { "epoch": 3.894550890066585, "grad_norm": 3.4534454345703125, "learning_rate": 5.133425057752412e-05, "loss": 2.6056, "step": 57320 }, { "epoch": 3.8948906101372467, "grad_norm": 2.8335704803466797, "learning_rate": 5.1330004076640845e-05, "loss": 2.878, "step": 57325 }, { "epoch": 3.8952303302079088, "grad_norm": 2.5142197608947754, "learning_rate": 5.132575757575758e-05, "loss": 2.7027, "step": 57330 }, { "epoch": 3.8955700502785704, "grad_norm": 3.240032434463501, "learning_rate": 5.132151107487431e-05, "loss": 2.578, "step": 57335 }, { "epoch": 3.895909770349232, "grad_norm": 2.6280264854431152, "learning_rate": 5.131726457399103e-05, "loss": 2.5428, "step": 57340 }, { "epoch": 3.896249490419894, "grad_norm": 3.1701865196228027, "learning_rate": 5.1313018073107763e-05, "loss": 2.7606, "step": 57345 }, { "epoch": 3.8965892104905557, "grad_norm": 1.8384658098220825, "learning_rate": 5.130877157222449e-05, "loss": 3.0209, "step": 57350 }, { "epoch": 3.8969289305612174, "grad_norm": 2.6450884342193604, "learning_rate": 5.130452507134121e-05, "loss": 2.8249, "step": 57355 }, { "epoch": 3.8972686506318794, "grad_norm": 3.168283224105835, "learning_rate": 5.130027857045795e-05, "loss": 2.7251, "step": 57360 }, { "epoch": 3.897608370702541, "grad_norm": 2.8171496391296387, "learning_rate": 5.1296032069574675e-05, "loss": 2.8644, "step": 57365 }, { "epoch": 3.8979480907732027, "grad_norm": 2.930093765258789, "learning_rate": 5.12917855686914e-05, "loss": 2.8225, "step": 57370 }, { "epoch": 3.8982878108438648, "grad_norm": 3.162263870239258, "learning_rate": 5.128753906780813e-05, "loss": 2.8305, "step": 57375 }, { "epoch": 3.8986275309145264, "grad_norm": 3.8035473823547363, "learning_rate": 5.128329256692486e-05, "loss": 2.9233, "step": 57380 }, { "epoch": 3.898967250985188, "grad_norm": 2.777956485748291, "learning_rate": 5.127904606604158e-05, "loss": 2.6849, "step": 57385 }, { "epoch": 3.89930697105585, "grad_norm": 3.301225423812866, "learning_rate": 5.1274799565158315e-05, "loss": 2.6772, "step": 57390 }, { "epoch": 3.8996466911265117, "grad_norm": 2.867997169494629, "learning_rate": 5.127055306427504e-05, "loss": 2.7003, "step": 57395 }, { "epoch": 3.8999864111971734, "grad_norm": 4.4231719970703125, "learning_rate": 5.1266306563391765e-05, "loss": 2.5169, "step": 57400 }, { "epoch": 3.9003261312678354, "grad_norm": 2.5361850261688232, "learning_rate": 5.12620600625085e-05, "loss": 2.4381, "step": 57405 }, { "epoch": 3.900665851338497, "grad_norm": 2.448200225830078, "learning_rate": 5.125781356162522e-05, "loss": 2.5449, "step": 57410 }, { "epoch": 3.9010055714091587, "grad_norm": 3.010823965072632, "learning_rate": 5.125356706074195e-05, "loss": 2.418, "step": 57415 }, { "epoch": 3.901345291479821, "grad_norm": 3.2317638397216797, "learning_rate": 5.1249320559858684e-05, "loss": 2.6806, "step": 57420 }, { "epoch": 3.9016850115504824, "grad_norm": 2.9934186935424805, "learning_rate": 5.1245074058975405e-05, "loss": 2.6774, "step": 57425 }, { "epoch": 3.902024731621144, "grad_norm": 2.4397411346435547, "learning_rate": 5.124082755809213e-05, "loss": 2.6133, "step": 57430 }, { "epoch": 3.902364451691806, "grad_norm": 3.3261001110076904, "learning_rate": 5.123658105720887e-05, "loss": 2.5702, "step": 57435 }, { "epoch": 3.9027041717624678, "grad_norm": 2.4837610721588135, "learning_rate": 5.123233455632559e-05, "loss": 2.7174, "step": 57440 }, { "epoch": 3.9030438918331294, "grad_norm": 3.346442222595215, "learning_rate": 5.122808805544232e-05, "loss": 2.2686, "step": 57445 }, { "epoch": 3.9033836119037915, "grad_norm": 2.5529749393463135, "learning_rate": 5.122384155455905e-05, "loss": 2.503, "step": 57450 }, { "epoch": 3.903723331974453, "grad_norm": 2.621027946472168, "learning_rate": 5.121959505367577e-05, "loss": 2.8963, "step": 57455 }, { "epoch": 3.9040630520451147, "grad_norm": 2.519329071044922, "learning_rate": 5.1215348552792494e-05, "loss": 2.6112, "step": 57460 }, { "epoch": 3.904402772115777, "grad_norm": 2.7075016498565674, "learning_rate": 5.121110205190923e-05, "loss": 2.8031, "step": 57465 }, { "epoch": 3.9047424921864384, "grad_norm": 3.339505195617676, "learning_rate": 5.120685555102596e-05, "loss": 2.789, "step": 57470 }, { "epoch": 3.9050822122571, "grad_norm": 2.8944246768951416, "learning_rate": 5.120260905014268e-05, "loss": 2.552, "step": 57475 }, { "epoch": 3.905421932327762, "grad_norm": 2.963977813720703, "learning_rate": 5.119836254925941e-05, "loss": 2.6212, "step": 57480 }, { "epoch": 3.9057616523984238, "grad_norm": 3.274439811706543, "learning_rate": 5.119411604837614e-05, "loss": 2.6426, "step": 57485 }, { "epoch": 3.9061013724690854, "grad_norm": 2.8027596473693848, "learning_rate": 5.118986954749286e-05, "loss": 2.2386, "step": 57490 }, { "epoch": 3.9064410925397475, "grad_norm": 3.223269462585449, "learning_rate": 5.11856230466096e-05, "loss": 2.6628, "step": 57495 }, { "epoch": 3.906780812610409, "grad_norm": 2.583049774169922, "learning_rate": 5.1181376545726325e-05, "loss": 2.7272, "step": 57500 }, { "epoch": 3.9071205326810707, "grad_norm": 2.4067203998565674, "learning_rate": 5.1177130044843046e-05, "loss": 2.8416, "step": 57505 }, { "epoch": 3.907460252751733, "grad_norm": 2.1453757286071777, "learning_rate": 5.117288354395978e-05, "loss": 2.488, "step": 57510 }, { "epoch": 3.9077999728223944, "grad_norm": 3.155808210372925, "learning_rate": 5.116863704307651e-05, "loss": 2.6276, "step": 57515 }, { "epoch": 3.908139692893056, "grad_norm": 2.501887798309326, "learning_rate": 5.116439054219323e-05, "loss": 2.8218, "step": 57520 }, { "epoch": 3.908479412963718, "grad_norm": 3.214613199234009, "learning_rate": 5.1160144041309965e-05, "loss": 2.2512, "step": 57525 }, { "epoch": 3.90881913303438, "grad_norm": 3.1113622188568115, "learning_rate": 5.1155897540426686e-05, "loss": 2.6048, "step": 57530 }, { "epoch": 3.9091588531050414, "grad_norm": 3.2472407817840576, "learning_rate": 5.1151651039543414e-05, "loss": 2.6876, "step": 57535 }, { "epoch": 3.9094985731757035, "grad_norm": 3.2425973415374756, "learning_rate": 5.114740453866015e-05, "loss": 2.5637, "step": 57540 }, { "epoch": 3.909838293246365, "grad_norm": 2.614567756652832, "learning_rate": 5.114315803777687e-05, "loss": 2.8013, "step": 57545 }, { "epoch": 3.9101780133170267, "grad_norm": 3.003906488418579, "learning_rate": 5.11389115368936e-05, "loss": 2.5292, "step": 57550 }, { "epoch": 3.910517733387689, "grad_norm": 2.964050531387329, "learning_rate": 5.113466503601033e-05, "loss": 2.6391, "step": 57555 }, { "epoch": 3.9108574534583505, "grad_norm": 2.822847843170166, "learning_rate": 5.1130418535127054e-05, "loss": 2.7255, "step": 57560 }, { "epoch": 3.911197173529012, "grad_norm": 2.578233003616333, "learning_rate": 5.112617203424379e-05, "loss": 2.5909, "step": 57565 }, { "epoch": 3.9115368935996737, "grad_norm": 2.7669739723205566, "learning_rate": 5.112192553336052e-05, "loss": 2.6816, "step": 57570 }, { "epoch": 3.911876613670336, "grad_norm": 2.983884334564209, "learning_rate": 5.111767903247724e-05, "loss": 2.678, "step": 57575 }, { "epoch": 3.9122163337409974, "grad_norm": 2.735745429992676, "learning_rate": 5.111343253159397e-05, "loss": 2.7386, "step": 57580 }, { "epoch": 3.912556053811659, "grad_norm": 2.7516820430755615, "learning_rate": 5.11091860307107e-05, "loss": 2.8025, "step": 57585 }, { "epoch": 3.912895773882321, "grad_norm": 3.358433723449707, "learning_rate": 5.110493952982742e-05, "loss": 2.8781, "step": 57590 }, { "epoch": 3.9132354939529828, "grad_norm": 3.2941975593566895, "learning_rate": 5.110069302894416e-05, "loss": 2.5261, "step": 57595 }, { "epoch": 3.9135752140236444, "grad_norm": 3.1952359676361084, "learning_rate": 5.1096446528060885e-05, "loss": 2.7264, "step": 57600 }, { "epoch": 3.913914934094306, "grad_norm": 3.083512544631958, "learning_rate": 5.1092200027177606e-05, "loss": 2.7174, "step": 57605 }, { "epoch": 3.914254654164968, "grad_norm": 2.5786592960357666, "learning_rate": 5.108795352629434e-05, "loss": 2.5672, "step": 57610 }, { "epoch": 3.9145943742356297, "grad_norm": 2.928004026412964, "learning_rate": 5.108370702541106e-05, "loss": 2.6172, "step": 57615 }, { "epoch": 3.9149340943062914, "grad_norm": 3.5214924812316895, "learning_rate": 5.107946052452779e-05, "loss": 2.6936, "step": 57620 }, { "epoch": 3.9152738143769534, "grad_norm": 2.547619581222534, "learning_rate": 5.1075214023644525e-05, "loss": 2.9367, "step": 57625 }, { "epoch": 3.915613534447615, "grad_norm": 3.699422597885132, "learning_rate": 5.1070967522761246e-05, "loss": 2.5786, "step": 57630 }, { "epoch": 3.9159532545182767, "grad_norm": 2.955080986022949, "learning_rate": 5.1066721021877974e-05, "loss": 2.7542, "step": 57635 }, { "epoch": 3.9162929745889388, "grad_norm": 3.133667230606079, "learning_rate": 5.106247452099471e-05, "loss": 2.8257, "step": 57640 }, { "epoch": 3.9166326946596004, "grad_norm": 2.8084263801574707, "learning_rate": 5.105822802011143e-05, "loss": 3.0232, "step": 57645 }, { "epoch": 3.916972414730262, "grad_norm": 2.3472726345062256, "learning_rate": 5.105398151922816e-05, "loss": 2.4972, "step": 57650 }, { "epoch": 3.917312134800924, "grad_norm": 2.9724347591400146, "learning_rate": 5.104973501834489e-05, "loss": 2.5167, "step": 57655 }, { "epoch": 3.9176518548715857, "grad_norm": 3.3162529468536377, "learning_rate": 5.1045488517461614e-05, "loss": 2.4967, "step": 57660 }, { "epoch": 3.9179915749422474, "grad_norm": 2.073413610458374, "learning_rate": 5.1041242016578336e-05, "loss": 2.8868, "step": 57665 }, { "epoch": 3.9183312950129094, "grad_norm": 3.270642042160034, "learning_rate": 5.103699551569508e-05, "loss": 2.6121, "step": 57670 }, { "epoch": 3.918671015083571, "grad_norm": 2.3059237003326416, "learning_rate": 5.10327490148118e-05, "loss": 2.7518, "step": 57675 }, { "epoch": 3.9190107351542327, "grad_norm": 2.3299105167388916, "learning_rate": 5.102850251392852e-05, "loss": 2.6248, "step": 57680 }, { "epoch": 3.919350455224895, "grad_norm": 2.7732510566711426, "learning_rate": 5.1024256013045254e-05, "loss": 2.6337, "step": 57685 }, { "epoch": 3.9196901752955564, "grad_norm": 2.465378999710083, "learning_rate": 5.102000951216198e-05, "loss": 2.7095, "step": 57690 }, { "epoch": 3.920029895366218, "grad_norm": 2.5028879642486572, "learning_rate": 5.1015763011278704e-05, "loss": 2.8878, "step": 57695 }, { "epoch": 3.92036961543688, "grad_norm": 2.661450147628784, "learning_rate": 5.101151651039544e-05, "loss": 2.8964, "step": 57700 }, { "epoch": 3.9207093355075417, "grad_norm": 2.5327324867248535, "learning_rate": 5.1007270009512166e-05, "loss": 2.752, "step": 57705 }, { "epoch": 3.9210490555782034, "grad_norm": 3.300886631011963, "learning_rate": 5.100302350862889e-05, "loss": 2.6364, "step": 57710 }, { "epoch": 3.9213887756488655, "grad_norm": 3.0936219692230225, "learning_rate": 5.099877700774562e-05, "loss": 2.5768, "step": 57715 }, { "epoch": 3.921728495719527, "grad_norm": 3.517143487930298, "learning_rate": 5.099453050686235e-05, "loss": 2.4669, "step": 57720 }, { "epoch": 3.9220682157901887, "grad_norm": 2.5364489555358887, "learning_rate": 5.099028400597907e-05, "loss": 2.4694, "step": 57725 }, { "epoch": 3.922407935860851, "grad_norm": 2.90400767326355, "learning_rate": 5.0986037505095806e-05, "loss": 2.7183, "step": 57730 }, { "epoch": 3.9227476559315124, "grad_norm": 2.443892478942871, "learning_rate": 5.0981791004212534e-05, "loss": 2.6648, "step": 57735 }, { "epoch": 3.923087376002174, "grad_norm": 2.9831604957580566, "learning_rate": 5.0977544503329256e-05, "loss": 2.4512, "step": 57740 }, { "epoch": 3.923427096072836, "grad_norm": 2.7523043155670166, "learning_rate": 5.097329800244599e-05, "loss": 2.5939, "step": 57745 }, { "epoch": 3.9237668161434978, "grad_norm": 3.4979279041290283, "learning_rate": 5.096905150156271e-05, "loss": 2.6456, "step": 57750 }, { "epoch": 3.9241065362141594, "grad_norm": 3.2881739139556885, "learning_rate": 5.096480500067944e-05, "loss": 2.6859, "step": 57755 }, { "epoch": 3.9244462562848215, "grad_norm": 2.2432990074157715, "learning_rate": 5.0960558499796174e-05, "loss": 2.6898, "step": 57760 }, { "epoch": 3.924785976355483, "grad_norm": 2.7765915393829346, "learning_rate": 5.0956311998912896e-05, "loss": 2.592, "step": 57765 }, { "epoch": 3.9251256964261447, "grad_norm": 2.8350324630737305, "learning_rate": 5.0952065498029624e-05, "loss": 2.5234, "step": 57770 }, { "epoch": 3.925465416496807, "grad_norm": 2.312603712081909, "learning_rate": 5.094781899714636e-05, "loss": 2.6412, "step": 57775 }, { "epoch": 3.9258051365674684, "grad_norm": 2.7662696838378906, "learning_rate": 5.094357249626308e-05, "loss": 2.6119, "step": 57780 }, { "epoch": 3.92614485663813, "grad_norm": 3.405935764312744, "learning_rate": 5.093932599537981e-05, "loss": 2.6387, "step": 57785 }, { "epoch": 3.926484576708792, "grad_norm": 2.461655616760254, "learning_rate": 5.093507949449654e-05, "loss": 2.6544, "step": 57790 }, { "epoch": 3.9268242967794538, "grad_norm": 2.5556724071502686, "learning_rate": 5.0930832993613264e-05, "loss": 2.5862, "step": 57795 }, { "epoch": 3.9271640168501154, "grad_norm": 2.7491629123687744, "learning_rate": 5.092658649272999e-05, "loss": 2.7002, "step": 57800 }, { "epoch": 3.9275037369207775, "grad_norm": 2.6158323287963867, "learning_rate": 5.0922339991846727e-05, "loss": 2.8294, "step": 57805 }, { "epoch": 3.927843456991439, "grad_norm": 3.876922607421875, "learning_rate": 5.091809349096345e-05, "loss": 2.5914, "step": 57810 }, { "epoch": 3.9281831770621007, "grad_norm": 3.0313608646392822, "learning_rate": 5.091384699008017e-05, "loss": 2.6847, "step": 57815 }, { "epoch": 3.928522897132763, "grad_norm": 2.991044282913208, "learning_rate": 5.0909600489196904e-05, "loss": 2.4713, "step": 57820 }, { "epoch": 3.9288626172034244, "grad_norm": 3.224635124206543, "learning_rate": 5.090535398831363e-05, "loss": 2.87, "step": 57825 }, { "epoch": 3.929202337274086, "grad_norm": 3.3720860481262207, "learning_rate": 5.090110748743035e-05, "loss": 2.6742, "step": 57830 }, { "epoch": 3.929542057344748, "grad_norm": 2.9253275394439697, "learning_rate": 5.089686098654709e-05, "loss": 2.4164, "step": 57835 }, { "epoch": 3.92988177741541, "grad_norm": 2.789706230163574, "learning_rate": 5.0892614485663816e-05, "loss": 2.5455, "step": 57840 }, { "epoch": 3.9302214974860714, "grad_norm": 3.183084726333618, "learning_rate": 5.088836798478054e-05, "loss": 2.7178, "step": 57845 }, { "epoch": 3.9305612175567335, "grad_norm": 2.4187071323394775, "learning_rate": 5.088412148389727e-05, "loss": 2.5712, "step": 57850 }, { "epoch": 3.930900937627395, "grad_norm": 2.18961763381958, "learning_rate": 5.0879874983014e-05, "loss": 2.7921, "step": 57855 }, { "epoch": 3.9312406576980568, "grad_norm": 3.2209885120391846, "learning_rate": 5.087562848213072e-05, "loss": 2.7486, "step": 57860 }, { "epoch": 3.931580377768719, "grad_norm": 3.0875725746154785, "learning_rate": 5.0871381981247456e-05, "loss": 2.585, "step": 57865 }, { "epoch": 3.9319200978393805, "grad_norm": 2.761476755142212, "learning_rate": 5.0867135480364184e-05, "loss": 2.5954, "step": 57870 }, { "epoch": 3.932259817910042, "grad_norm": 2.864959478378296, "learning_rate": 5.0862888979480905e-05, "loss": 2.7829, "step": 57875 }, { "epoch": 3.932599537980704, "grad_norm": 2.608217716217041, "learning_rate": 5.085864247859764e-05, "loss": 2.639, "step": 57880 }, { "epoch": 3.932939258051366, "grad_norm": 3.256286382675171, "learning_rate": 5.085439597771436e-05, "loss": 2.7463, "step": 57885 }, { "epoch": 3.9332789781220274, "grad_norm": 2.120694398880005, "learning_rate": 5.085014947683109e-05, "loss": 2.6062, "step": 57890 }, { "epoch": 3.9336186981926895, "grad_norm": 2.6391844749450684, "learning_rate": 5.0845902975947824e-05, "loss": 2.7043, "step": 57895 }, { "epoch": 3.933958418263351, "grad_norm": 2.4700918197631836, "learning_rate": 5.0841656475064545e-05, "loss": 2.5329, "step": 57900 }, { "epoch": 3.9342981383340128, "grad_norm": 3.6105763912200928, "learning_rate": 5.083740997418128e-05, "loss": 2.7457, "step": 57905 }, { "epoch": 3.9346378584046744, "grad_norm": 2.7482974529266357, "learning_rate": 5.083316347329801e-05, "loss": 2.8393, "step": 57910 }, { "epoch": 3.9349775784753365, "grad_norm": 2.4086170196533203, "learning_rate": 5.082891697241473e-05, "loss": 2.6138, "step": 57915 }, { "epoch": 3.935317298545998, "grad_norm": 3.2041432857513428, "learning_rate": 5.0824670471531464e-05, "loss": 2.1956, "step": 57920 }, { "epoch": 3.9356570186166597, "grad_norm": 3.634415864944458, "learning_rate": 5.082042397064819e-05, "loss": 2.6689, "step": 57925 }, { "epoch": 3.935996738687322, "grad_norm": 3.3837668895721436, "learning_rate": 5.081617746976491e-05, "loss": 2.5403, "step": 57930 }, { "epoch": 3.9363364587579834, "grad_norm": 2.917192220687866, "learning_rate": 5.081193096888165e-05, "loss": 2.7121, "step": 57935 }, { "epoch": 3.936676178828645, "grad_norm": 3.8953018188476562, "learning_rate": 5.0807684467998376e-05, "loss": 2.6281, "step": 57940 }, { "epoch": 3.9370158988993067, "grad_norm": 2.389723777770996, "learning_rate": 5.08034379671151e-05, "loss": 2.2974, "step": 57945 }, { "epoch": 3.9373556189699688, "grad_norm": 3.4712977409362793, "learning_rate": 5.079919146623183e-05, "loss": 2.6664, "step": 57950 }, { "epoch": 3.9376953390406304, "grad_norm": 2.3964879512786865, "learning_rate": 5.079494496534855e-05, "loss": 2.7176, "step": 57955 }, { "epoch": 3.938035059111292, "grad_norm": 2.4608306884765625, "learning_rate": 5.079069846446528e-05, "loss": 2.3164, "step": 57960 }, { "epoch": 3.938374779181954, "grad_norm": 3.1480233669281006, "learning_rate": 5.0786451963582016e-05, "loss": 2.5413, "step": 57965 }, { "epoch": 3.9387144992526157, "grad_norm": 4.078991889953613, "learning_rate": 5.078220546269874e-05, "loss": 2.9333, "step": 57970 }, { "epoch": 3.9390542193232774, "grad_norm": 2.8765552043914795, "learning_rate": 5.0777958961815465e-05, "loss": 2.7398, "step": 57975 }, { "epoch": 3.9393939393939394, "grad_norm": 2.877673625946045, "learning_rate": 5.07737124609322e-05, "loss": 2.435, "step": 57980 }, { "epoch": 3.939733659464601, "grad_norm": 3.058164596557617, "learning_rate": 5.076946596004892e-05, "loss": 2.7264, "step": 57985 }, { "epoch": 3.9400733795352627, "grad_norm": 2.769174098968506, "learning_rate": 5.076521945916565e-05, "loss": 2.7224, "step": 57990 }, { "epoch": 3.940413099605925, "grad_norm": 3.0961849689483643, "learning_rate": 5.0760972958282384e-05, "loss": 2.5724, "step": 57995 }, { "epoch": 3.9407528196765864, "grad_norm": 2.8460257053375244, "learning_rate": 5.0756726457399105e-05, "loss": 2.6055, "step": 58000 }, { "epoch": 3.941092539747248, "grad_norm": 3.459496259689331, "learning_rate": 5.075247995651583e-05, "loss": 2.4981, "step": 58005 }, { "epoch": 3.94143225981791, "grad_norm": 2.95174503326416, "learning_rate": 5.074823345563257e-05, "loss": 2.7702, "step": 58010 }, { "epoch": 3.9417719798885718, "grad_norm": 2.931529998779297, "learning_rate": 5.074398695474929e-05, "loss": 2.6994, "step": 58015 }, { "epoch": 3.9421116999592334, "grad_norm": 2.825571060180664, "learning_rate": 5.073974045386601e-05, "loss": 2.7307, "step": 58020 }, { "epoch": 3.9424514200298955, "grad_norm": 3.0452797412872314, "learning_rate": 5.073549395298275e-05, "loss": 2.6116, "step": 58025 }, { "epoch": 3.942791140100557, "grad_norm": 3.0215532779693604, "learning_rate": 5.073124745209947e-05, "loss": 2.5112, "step": 58030 }, { "epoch": 3.9431308601712187, "grad_norm": 2.7357747554779053, "learning_rate": 5.0727000951216195e-05, "loss": 2.4713, "step": 58035 }, { "epoch": 3.943470580241881, "grad_norm": 3.0209877490997314, "learning_rate": 5.072275445033293e-05, "loss": 2.676, "step": 58040 }, { "epoch": 3.9438103003125424, "grad_norm": 2.7296488285064697, "learning_rate": 5.071850794944966e-05, "loss": 2.4399, "step": 58045 }, { "epoch": 3.944150020383204, "grad_norm": 2.9537582397460938, "learning_rate": 5.071426144856638e-05, "loss": 2.6604, "step": 58050 }, { "epoch": 3.944489740453866, "grad_norm": 2.796907901763916, "learning_rate": 5.071001494768311e-05, "loss": 2.8129, "step": 58055 }, { "epoch": 3.9448294605245278, "grad_norm": 2.6976511478424072, "learning_rate": 5.070576844679984e-05, "loss": 2.5728, "step": 58060 }, { "epoch": 3.9451691805951894, "grad_norm": 3.030714511871338, "learning_rate": 5.070152194591656e-05, "loss": 2.5211, "step": 58065 }, { "epoch": 3.9455089006658515, "grad_norm": 2.575324058532715, "learning_rate": 5.06972754450333e-05, "loss": 2.7135, "step": 58070 }, { "epoch": 3.945848620736513, "grad_norm": 2.760098457336426, "learning_rate": 5.0693028944150025e-05, "loss": 2.8892, "step": 58075 }, { "epoch": 3.9461883408071747, "grad_norm": 2.6648457050323486, "learning_rate": 5.068878244326675e-05, "loss": 2.5491, "step": 58080 }, { "epoch": 3.946528060877837, "grad_norm": 2.544893264770508, "learning_rate": 5.068453594238348e-05, "loss": 2.6503, "step": 58085 }, { "epoch": 3.9468677809484984, "grad_norm": 3.1049351692199707, "learning_rate": 5.068028944150021e-05, "loss": 2.6892, "step": 58090 }, { "epoch": 3.94720750101916, "grad_norm": 2.6632914543151855, "learning_rate": 5.067604294061693e-05, "loss": 2.6538, "step": 58095 }, { "epoch": 3.947547221089822, "grad_norm": 2.9303548336029053, "learning_rate": 5.0671796439733665e-05, "loss": 2.8465, "step": 58100 }, { "epoch": 3.9478869411604838, "grad_norm": 4.020623683929443, "learning_rate": 5.066754993885039e-05, "loss": 2.6394, "step": 58105 }, { "epoch": 3.9482266612311454, "grad_norm": 3.2896852493286133, "learning_rate": 5.0663303437967115e-05, "loss": 2.4408, "step": 58110 }, { "epoch": 3.9485663813018075, "grad_norm": 3.5502240657806396, "learning_rate": 5.065905693708385e-05, "loss": 2.7919, "step": 58115 }, { "epoch": 3.948906101372469, "grad_norm": 4.125308036804199, "learning_rate": 5.065481043620057e-05, "loss": 2.4397, "step": 58120 }, { "epoch": 3.9492458214431307, "grad_norm": 2.9851012229919434, "learning_rate": 5.06505639353173e-05, "loss": 2.5921, "step": 58125 }, { "epoch": 3.949585541513793, "grad_norm": 3.3202478885650635, "learning_rate": 5.0646317434434034e-05, "loss": 2.9862, "step": 58130 }, { "epoch": 3.9499252615844545, "grad_norm": 3.0477161407470703, "learning_rate": 5.0642070933550755e-05, "loss": 2.7608, "step": 58135 }, { "epoch": 3.950264981655116, "grad_norm": 3.1674954891204834, "learning_rate": 5.063782443266748e-05, "loss": 2.6437, "step": 58140 }, { "epoch": 3.950604701725778, "grad_norm": 2.523071050643921, "learning_rate": 5.063357793178422e-05, "loss": 2.8717, "step": 58145 }, { "epoch": 3.95094442179644, "grad_norm": 2.9666759967803955, "learning_rate": 5.062933143090094e-05, "loss": 2.8078, "step": 58150 }, { "epoch": 3.9512841418671014, "grad_norm": 2.435631275177002, "learning_rate": 5.062508493001766e-05, "loss": 2.7694, "step": 58155 }, { "epoch": 3.9516238619377635, "grad_norm": 2.8508503437042236, "learning_rate": 5.06208384291344e-05, "loss": 2.8881, "step": 58160 }, { "epoch": 3.951963582008425, "grad_norm": 2.9007132053375244, "learning_rate": 5.061659192825112e-05, "loss": 2.9962, "step": 58165 }, { "epoch": 3.9523033020790868, "grad_norm": 2.835639238357544, "learning_rate": 5.0612345427367844e-05, "loss": 2.5585, "step": 58170 }, { "epoch": 3.952643022149749, "grad_norm": 3.2643752098083496, "learning_rate": 5.060809892648458e-05, "loss": 2.5335, "step": 58175 }, { "epoch": 3.9529827422204105, "grad_norm": 2.563113212585449, "learning_rate": 5.060385242560131e-05, "loss": 2.5437, "step": 58180 }, { "epoch": 3.953322462291072, "grad_norm": 2.7418391704559326, "learning_rate": 5.059960592471803e-05, "loss": 2.6415, "step": 58185 }, { "epoch": 3.953662182361734, "grad_norm": 2.476175546646118, "learning_rate": 5.059535942383476e-05, "loss": 2.5597, "step": 58190 }, { "epoch": 3.954001902432396, "grad_norm": 3.244837760925293, "learning_rate": 5.059111292295149e-05, "loss": 2.5694, "step": 58195 }, { "epoch": 3.9543416225030574, "grad_norm": 3.0034046173095703, "learning_rate": 5.058686642206821e-05, "loss": 2.5499, "step": 58200 }, { "epoch": 3.9546813425737195, "grad_norm": 3.147610664367676, "learning_rate": 5.058261992118495e-05, "loss": 2.7714, "step": 58205 }, { "epoch": 3.955021062644381, "grad_norm": 3.2157702445983887, "learning_rate": 5.0578373420301675e-05, "loss": 2.5665, "step": 58210 }, { "epoch": 3.9553607827150428, "grad_norm": 3.059756278991699, "learning_rate": 5.0574126919418396e-05, "loss": 2.5621, "step": 58215 }, { "epoch": 3.955700502785705, "grad_norm": 3.028452157974243, "learning_rate": 5.056988041853513e-05, "loss": 2.7965, "step": 58220 }, { "epoch": 3.9560402228563665, "grad_norm": 2.5602915287017822, "learning_rate": 5.056563391765186e-05, "loss": 2.3929, "step": 58225 }, { "epoch": 3.956379942927028, "grad_norm": 2.168294906616211, "learning_rate": 5.056138741676858e-05, "loss": 2.5164, "step": 58230 }, { "epoch": 3.95671966299769, "grad_norm": 2.504100799560547, "learning_rate": 5.0557140915885315e-05, "loss": 2.8631, "step": 58235 }, { "epoch": 3.957059383068352, "grad_norm": 2.6265103816986084, "learning_rate": 5.0552894415002036e-05, "loss": 2.7658, "step": 58240 }, { "epoch": 3.9573991031390134, "grad_norm": 2.7835936546325684, "learning_rate": 5.054864791411877e-05, "loss": 2.6785, "step": 58245 }, { "epoch": 3.957738823209675, "grad_norm": 2.3459768295288086, "learning_rate": 5.05444014132355e-05, "loss": 2.6923, "step": 58250 }, { "epoch": 3.958078543280337, "grad_norm": 3.0021679401397705, "learning_rate": 5.054015491235222e-05, "loss": 2.4708, "step": 58255 }, { "epoch": 3.958418263350999, "grad_norm": 3.116725444793701, "learning_rate": 5.0535908411468955e-05, "loss": 2.5583, "step": 58260 }, { "epoch": 3.9587579834216604, "grad_norm": 2.5511083602905273, "learning_rate": 5.053166191058568e-05, "loss": 2.7816, "step": 58265 }, { "epoch": 3.9590977034923225, "grad_norm": 2.6298906803131104, "learning_rate": 5.0527415409702404e-05, "loss": 2.6246, "step": 58270 }, { "epoch": 3.959437423562984, "grad_norm": 2.7950141429901123, "learning_rate": 5.052316890881914e-05, "loss": 2.4126, "step": 58275 }, { "epoch": 3.9597771436336457, "grad_norm": 2.645782232284546, "learning_rate": 5.051892240793587e-05, "loss": 2.6414, "step": 58280 }, { "epoch": 3.9601168637043074, "grad_norm": 2.941934108734131, "learning_rate": 5.051467590705259e-05, "loss": 2.6251, "step": 58285 }, { "epoch": 3.9604565837749695, "grad_norm": 2.959021806716919, "learning_rate": 5.051042940616932e-05, "loss": 2.8483, "step": 58290 }, { "epoch": 3.960796303845631, "grad_norm": 2.702273368835449, "learning_rate": 5.050618290528605e-05, "loss": 2.581, "step": 58295 }, { "epoch": 3.9611360239162927, "grad_norm": 3.187697410583496, "learning_rate": 5.050193640440277e-05, "loss": 2.6732, "step": 58300 }, { "epoch": 3.961475743986955, "grad_norm": 2.7249069213867188, "learning_rate": 5.049768990351951e-05, "loss": 2.6871, "step": 58305 }, { "epoch": 3.9618154640576164, "grad_norm": 2.6305301189422607, "learning_rate": 5.049344340263623e-05, "loss": 2.5919, "step": 58310 }, { "epoch": 3.962155184128278, "grad_norm": 2.7356581687927246, "learning_rate": 5.0489196901752956e-05, "loss": 2.9337, "step": 58315 }, { "epoch": 3.96249490419894, "grad_norm": 2.9181690216064453, "learning_rate": 5.048495040086969e-05, "loss": 2.6473, "step": 58320 }, { "epoch": 3.9628346242696018, "grad_norm": 2.375478506088257, "learning_rate": 5.048070389998641e-05, "loss": 2.7647, "step": 58325 }, { "epoch": 3.9631743443402634, "grad_norm": 3.3332667350769043, "learning_rate": 5.047645739910314e-05, "loss": 2.5943, "step": 58330 }, { "epoch": 3.9635140644109255, "grad_norm": 2.3029823303222656, "learning_rate": 5.0472210898219875e-05, "loss": 2.7037, "step": 58335 }, { "epoch": 3.963853784481587, "grad_norm": 2.411141872406006, "learning_rate": 5.0467964397336596e-05, "loss": 2.7213, "step": 58340 }, { "epoch": 3.9641935045522487, "grad_norm": 2.9772133827209473, "learning_rate": 5.0463717896453324e-05, "loss": 2.3409, "step": 58345 }, { "epoch": 3.964533224622911, "grad_norm": 2.567690849304199, "learning_rate": 5.045947139557006e-05, "loss": 2.9519, "step": 58350 }, { "epoch": 3.9648729446935724, "grad_norm": 3.6533749103546143, "learning_rate": 5.045522489468678e-05, "loss": 2.4617, "step": 58355 }, { "epoch": 3.965212664764234, "grad_norm": 2.6738317012786865, "learning_rate": 5.045097839380351e-05, "loss": 2.5803, "step": 58360 }, { "epoch": 3.965552384834896, "grad_norm": 2.9066507816314697, "learning_rate": 5.044673189292024e-05, "loss": 2.7269, "step": 58365 }, { "epoch": 3.9658921049055578, "grad_norm": 3.2588751316070557, "learning_rate": 5.0442485392036964e-05, "loss": 2.7199, "step": 58370 }, { "epoch": 3.9662318249762194, "grad_norm": 2.778644561767578, "learning_rate": 5.0438238891153686e-05, "loss": 2.6826, "step": 58375 }, { "epoch": 3.9665715450468815, "grad_norm": 3.01527738571167, "learning_rate": 5.043399239027042e-05, "loss": 2.6553, "step": 58380 }, { "epoch": 3.966911265117543, "grad_norm": 3.0831334590911865, "learning_rate": 5.042974588938715e-05, "loss": 2.8826, "step": 58385 }, { "epoch": 3.9672509851882047, "grad_norm": 2.9203081130981445, "learning_rate": 5.042549938850387e-05, "loss": 2.7565, "step": 58390 }, { "epoch": 3.967590705258867, "grad_norm": 2.852332592010498, "learning_rate": 5.0421252887620604e-05, "loss": 2.7393, "step": 58395 }, { "epoch": 3.9679304253295284, "grad_norm": 2.3704938888549805, "learning_rate": 5.041700638673733e-05, "loss": 2.7046, "step": 58400 }, { "epoch": 3.96827014540019, "grad_norm": 2.3658053874969482, "learning_rate": 5.0412759885854054e-05, "loss": 2.5809, "step": 58405 }, { "epoch": 3.968609865470852, "grad_norm": 2.5078864097595215, "learning_rate": 5.040851338497079e-05, "loss": 2.6039, "step": 58410 }, { "epoch": 3.968949585541514, "grad_norm": 2.6687662601470947, "learning_rate": 5.0404266884087516e-05, "loss": 2.4562, "step": 58415 }, { "epoch": 3.9692893056121754, "grad_norm": 2.699019193649292, "learning_rate": 5.040002038320424e-05, "loss": 2.5999, "step": 58420 }, { "epoch": 3.9696290256828375, "grad_norm": 3.033698558807373, "learning_rate": 5.039577388232097e-05, "loss": 2.9311, "step": 58425 }, { "epoch": 3.969968745753499, "grad_norm": 4.034801483154297, "learning_rate": 5.03915273814377e-05, "loss": 2.4423, "step": 58430 }, { "epoch": 3.9703084658241607, "grad_norm": 2.6311309337615967, "learning_rate": 5.038728088055442e-05, "loss": 2.6202, "step": 58435 }, { "epoch": 3.970648185894823, "grad_norm": 2.6529300212860107, "learning_rate": 5.0383034379671156e-05, "loss": 2.4935, "step": 58440 }, { "epoch": 3.9709879059654845, "grad_norm": 3.053030252456665, "learning_rate": 5.037878787878788e-05, "loss": 2.6297, "step": 58445 }, { "epoch": 3.971327626036146, "grad_norm": 2.3091723918914795, "learning_rate": 5.0374541377904606e-05, "loss": 2.3626, "step": 58450 }, { "epoch": 3.971667346106808, "grad_norm": 2.7943193912506104, "learning_rate": 5.037029487702134e-05, "loss": 2.7612, "step": 58455 }, { "epoch": 3.97200706617747, "grad_norm": 2.494582176208496, "learning_rate": 5.036604837613806e-05, "loss": 2.6991, "step": 58460 }, { "epoch": 3.9723467862481314, "grad_norm": 3.6409969329833984, "learning_rate": 5.036180187525479e-05, "loss": 2.4791, "step": 58465 }, { "epoch": 3.9726865063187935, "grad_norm": 3.5908119678497314, "learning_rate": 5.0357555374371524e-05, "loss": 2.452, "step": 58470 }, { "epoch": 3.973026226389455, "grad_norm": 2.747262716293335, "learning_rate": 5.0353308873488246e-05, "loss": 2.4104, "step": 58475 }, { "epoch": 3.9733659464601168, "grad_norm": 2.726316452026367, "learning_rate": 5.0349062372604974e-05, "loss": 2.7117, "step": 58480 }, { "epoch": 3.973705666530779, "grad_norm": 2.792098045349121, "learning_rate": 5.034481587172171e-05, "loss": 2.447, "step": 58485 }, { "epoch": 3.9740453866014405, "grad_norm": 2.3341119289398193, "learning_rate": 5.034056937083843e-05, "loss": 2.821, "step": 58490 }, { "epoch": 3.974385106672102, "grad_norm": 2.8674685955047607, "learning_rate": 5.033632286995516e-05, "loss": 2.4972, "step": 58495 }, { "epoch": 3.974724826742764, "grad_norm": 3.3703246116638184, "learning_rate": 5.033207636907189e-05, "loss": 2.758, "step": 58500 }, { "epoch": 3.975064546813426, "grad_norm": 3.2013983726501465, "learning_rate": 5.0327829868188614e-05, "loss": 2.4027, "step": 58505 }, { "epoch": 3.9754042668840874, "grad_norm": 2.5478575229644775, "learning_rate": 5.0323583367305335e-05, "loss": 2.7008, "step": 58510 }, { "epoch": 3.9757439869547495, "grad_norm": 2.961528778076172, "learning_rate": 5.0319336866422077e-05, "loss": 2.7252, "step": 58515 }, { "epoch": 3.976083707025411, "grad_norm": 3.6520533561706543, "learning_rate": 5.03150903655388e-05, "loss": 2.7618, "step": 58520 }, { "epoch": 3.9764234270960728, "grad_norm": 2.9465065002441406, "learning_rate": 5.031084386465552e-05, "loss": 2.767, "step": 58525 }, { "epoch": 3.976763147166735, "grad_norm": 3.3014721870422363, "learning_rate": 5.0306597363772254e-05, "loss": 2.3693, "step": 58530 }, { "epoch": 3.9771028672373965, "grad_norm": 3.9857935905456543, "learning_rate": 5.030235086288898e-05, "loss": 2.3402, "step": 58535 }, { "epoch": 3.977442587308058, "grad_norm": 3.082703113555908, "learning_rate": 5.02981043620057e-05, "loss": 2.6837, "step": 58540 }, { "epoch": 3.97778230737872, "grad_norm": 3.470388412475586, "learning_rate": 5.029385786112244e-05, "loss": 2.5369, "step": 58545 }, { "epoch": 3.978122027449382, "grad_norm": 2.9021286964416504, "learning_rate": 5.0289611360239166e-05, "loss": 2.7223, "step": 58550 }, { "epoch": 3.9784617475200434, "grad_norm": 3.0547516345977783, "learning_rate": 5.028536485935589e-05, "loss": 2.9339, "step": 58555 }, { "epoch": 3.9788014675907055, "grad_norm": 2.6745738983154297, "learning_rate": 5.028111835847262e-05, "loss": 2.7931, "step": 58560 }, { "epoch": 3.979141187661367, "grad_norm": 2.1184208393096924, "learning_rate": 5.027687185758935e-05, "loss": 2.7527, "step": 58565 }, { "epoch": 3.979480907732029, "grad_norm": 3.554565906524658, "learning_rate": 5.027262535670607e-05, "loss": 2.5358, "step": 58570 }, { "epoch": 3.979820627802691, "grad_norm": 3.4603161811828613, "learning_rate": 5.0268378855822806e-05, "loss": 2.6147, "step": 58575 }, { "epoch": 3.9801603478733525, "grad_norm": 2.181053638458252, "learning_rate": 5.026413235493953e-05, "loss": 2.7333, "step": 58580 }, { "epoch": 3.980500067944014, "grad_norm": 3.188716173171997, "learning_rate": 5.025988585405627e-05, "loss": 2.6106, "step": 58585 }, { "epoch": 3.9808397880146758, "grad_norm": 2.6890082359313965, "learning_rate": 5.025563935317299e-05, "loss": 2.5988, "step": 58590 }, { "epoch": 3.981179508085338, "grad_norm": 3.2987942695617676, "learning_rate": 5.025139285228971e-05, "loss": 2.7874, "step": 58595 }, { "epoch": 3.9815192281559995, "grad_norm": 2.5154268741607666, "learning_rate": 5.0247146351406446e-05, "loss": 2.6299, "step": 58600 }, { "epoch": 3.981858948226661, "grad_norm": 2.1041104793548584, "learning_rate": 5.0242899850523174e-05, "loss": 2.764, "step": 58605 }, { "epoch": 3.982198668297323, "grad_norm": 3.1605985164642334, "learning_rate": 5.0238653349639895e-05, "loss": 2.448, "step": 58610 }, { "epoch": 3.982538388367985, "grad_norm": 3.080580711364746, "learning_rate": 5.023440684875663e-05, "loss": 2.4564, "step": 58615 }, { "epoch": 3.9828781084386464, "grad_norm": 2.5551819801330566, "learning_rate": 5.023016034787336e-05, "loss": 2.7305, "step": 58620 }, { "epoch": 3.983217828509308, "grad_norm": 2.5345723628997803, "learning_rate": 5.022591384699008e-05, "loss": 2.496, "step": 58625 }, { "epoch": 3.98355754857997, "grad_norm": 2.391097068786621, "learning_rate": 5.0221667346106814e-05, "loss": 2.6421, "step": 58630 }, { "epoch": 3.9838972686506318, "grad_norm": 2.546192169189453, "learning_rate": 5.021742084522354e-05, "loss": 2.4992, "step": 58635 }, { "epoch": 3.9842369887212934, "grad_norm": 2.886326313018799, "learning_rate": 5.021317434434026e-05, "loss": 2.8822, "step": 58640 }, { "epoch": 3.9845767087919555, "grad_norm": 2.832873582839966, "learning_rate": 5.0208927843457e-05, "loss": 2.5066, "step": 58645 }, { "epoch": 3.984916428862617, "grad_norm": 2.5445826053619385, "learning_rate": 5.0204681342573726e-05, "loss": 2.8361, "step": 58650 }, { "epoch": 3.9852561489332787, "grad_norm": 2.398181200027466, "learning_rate": 5.020043484169045e-05, "loss": 2.5389, "step": 58655 }, { "epoch": 3.985595869003941, "grad_norm": 2.7087020874023438, "learning_rate": 5.019618834080718e-05, "loss": 2.5563, "step": 58660 }, { "epoch": 3.9859355890746024, "grad_norm": 2.673988103866577, "learning_rate": 5.01919418399239e-05, "loss": 2.7439, "step": 58665 }, { "epoch": 3.986275309145264, "grad_norm": 3.086362600326538, "learning_rate": 5.018769533904063e-05, "loss": 2.4646, "step": 58670 }, { "epoch": 3.986615029215926, "grad_norm": 2.2530174255371094, "learning_rate": 5.0183448838157366e-05, "loss": 2.7303, "step": 58675 }, { "epoch": 3.9869547492865878, "grad_norm": 3.206005811691284, "learning_rate": 5.017920233727409e-05, "loss": 2.7446, "step": 58680 }, { "epoch": 3.9872944693572494, "grad_norm": 3.2348246574401855, "learning_rate": 5.0174955836390815e-05, "loss": 2.4193, "step": 58685 }, { "epoch": 3.9876341894279115, "grad_norm": 3.249390125274658, "learning_rate": 5.017070933550755e-05, "loss": 2.6834, "step": 58690 }, { "epoch": 3.987973909498573, "grad_norm": 2.0566060543060303, "learning_rate": 5.016646283462427e-05, "loss": 2.6434, "step": 58695 }, { "epoch": 3.9883136295692347, "grad_norm": 2.491513967514038, "learning_rate": 5.0162216333741e-05, "loss": 2.5211, "step": 58700 }, { "epoch": 3.988653349639897, "grad_norm": 2.8333580493927, "learning_rate": 5.0157969832857734e-05, "loss": 2.7192, "step": 58705 }, { "epoch": 3.9889930697105584, "grad_norm": 2.9503941535949707, "learning_rate": 5.0153723331974455e-05, "loss": 2.5673, "step": 58710 }, { "epoch": 3.98933278978122, "grad_norm": 3.0138583183288574, "learning_rate": 5.014947683109118e-05, "loss": 2.7641, "step": 58715 }, { "epoch": 3.989672509851882, "grad_norm": 2.5720810890197754, "learning_rate": 5.014523033020792e-05, "loss": 2.8948, "step": 58720 }, { "epoch": 3.990012229922544, "grad_norm": 5.450859069824219, "learning_rate": 5.014098382932464e-05, "loss": 2.2309, "step": 58725 }, { "epoch": 3.9903519499932054, "grad_norm": 2.7442359924316406, "learning_rate": 5.013673732844136e-05, "loss": 2.469, "step": 58730 }, { "epoch": 3.9906916700638675, "grad_norm": 3.135010004043579, "learning_rate": 5.0132490827558095e-05, "loss": 2.3811, "step": 58735 }, { "epoch": 3.991031390134529, "grad_norm": 2.53914737701416, "learning_rate": 5.012824432667482e-05, "loss": 2.6612, "step": 58740 }, { "epoch": 3.9913711102051908, "grad_norm": 2.482262134552002, "learning_rate": 5.0123997825791545e-05, "loss": 2.5794, "step": 58745 }, { "epoch": 3.991710830275853, "grad_norm": 3.2618448734283447, "learning_rate": 5.011975132490828e-05, "loss": 2.4961, "step": 58750 }, { "epoch": 3.9920505503465145, "grad_norm": 2.843822479248047, "learning_rate": 5.011550482402501e-05, "loss": 2.6114, "step": 58755 }, { "epoch": 3.992390270417176, "grad_norm": 3.176366090774536, "learning_rate": 5.011125832314173e-05, "loss": 2.5811, "step": 58760 }, { "epoch": 3.992729990487838, "grad_norm": 3.662769079208374, "learning_rate": 5.010701182225846e-05, "loss": 2.9729, "step": 58765 }, { "epoch": 3.9930697105585, "grad_norm": 3.170048236846924, "learning_rate": 5.010276532137519e-05, "loss": 2.5736, "step": 58770 }, { "epoch": 3.9934094306291614, "grad_norm": 3.4511749744415283, "learning_rate": 5.009851882049191e-05, "loss": 2.2694, "step": 58775 }, { "epoch": 3.9937491506998235, "grad_norm": 2.2540018558502197, "learning_rate": 5.009427231960865e-05, "loss": 2.3534, "step": 58780 }, { "epoch": 3.994088870770485, "grad_norm": 2.648846387863159, "learning_rate": 5.0090025818725375e-05, "loss": 2.6643, "step": 58785 }, { "epoch": 3.9944285908411468, "grad_norm": 3.3864946365356445, "learning_rate": 5.00857793178421e-05, "loss": 2.3157, "step": 58790 }, { "epoch": 3.994768310911809, "grad_norm": 2.9739441871643066, "learning_rate": 5.008153281695883e-05, "loss": 2.7023, "step": 58795 }, { "epoch": 3.9951080309824705, "grad_norm": 3.158417224884033, "learning_rate": 5.007728631607555e-05, "loss": 2.6189, "step": 58800 }, { "epoch": 3.995447751053132, "grad_norm": 3.3352441787719727, "learning_rate": 5.007303981519228e-05, "loss": 2.9797, "step": 58805 }, { "epoch": 3.995787471123794, "grad_norm": 2.853761911392212, "learning_rate": 5.0068793314309015e-05, "loss": 2.644, "step": 58810 }, { "epoch": 3.996127191194456, "grad_norm": 2.7853643894195557, "learning_rate": 5.006454681342574e-05, "loss": 2.5052, "step": 58815 }, { "epoch": 3.9964669112651174, "grad_norm": 3.6066036224365234, "learning_rate": 5.0060300312542465e-05, "loss": 2.8473, "step": 58820 }, { "epoch": 3.9968066313357795, "grad_norm": 2.8958232402801514, "learning_rate": 5.00560538116592e-05, "loss": 2.7011, "step": 58825 }, { "epoch": 3.997146351406441, "grad_norm": 2.5329291820526123, "learning_rate": 5.005180731077592e-05, "loss": 2.7689, "step": 58830 }, { "epoch": 3.9974860714771028, "grad_norm": 3.219346046447754, "learning_rate": 5.004756080989265e-05, "loss": 2.5398, "step": 58835 }, { "epoch": 3.997825791547765, "grad_norm": 3.4695241451263428, "learning_rate": 5.0043314309009383e-05, "loss": 2.7951, "step": 58840 }, { "epoch": 3.9981655116184265, "grad_norm": 2.201836585998535, "learning_rate": 5.0039067808126105e-05, "loss": 2.7663, "step": 58845 }, { "epoch": 3.998505231689088, "grad_norm": 2.9565858840942383, "learning_rate": 5.003482130724283e-05, "loss": 2.5588, "step": 58850 }, { "epoch": 3.99884495175975, "grad_norm": 2.183945417404175, "learning_rate": 5.003057480635957e-05, "loss": 2.6784, "step": 58855 }, { "epoch": 3.999184671830412, "grad_norm": 3.213441848754883, "learning_rate": 5.002632830547629e-05, "loss": 2.8982, "step": 58860 }, { "epoch": 3.9995243919010735, "grad_norm": 3.618731737136841, "learning_rate": 5.002208180459301e-05, "loss": 2.7681, "step": 58865 }, { "epoch": 3.9998641119717355, "grad_norm": 3.887765884399414, "learning_rate": 5.0017835303709745e-05, "loss": 2.572, "step": 58870 }, { "epoch": 4.0, "eval_bertscore": { "f1": 0.8420730991665737, "precision": 0.8459188663424627, "recall": 0.8390323724588667 }, "eval_bleu_4": 0.017999610125582297, "eval_exact_match": 0.00048454307587944567, "eval_loss": 3.372431516647339, "eval_meteor": 0.09067729784384805, "eval_rouge": { "rouge1": 0.12457423564921638, "rouge2": 0.018813232443320338, "rougeL": 0.1071242264961057, "rougeLsum": 0.1071998912393191 }, "eval_runtime": 1538.6714, "eval_samples_per_second": 6.706, "eval_steps_per_second": 0.838, "step": 58872 }, { "epoch": 4.000203832042397, "grad_norm": 2.647459030151367, "learning_rate": 5.001358880282647e-05, "loss": 2.8146, "step": 58875 }, { "epoch": 4.000543552113059, "grad_norm": 3.006283760070801, "learning_rate": 5.0009342301943194e-05, "loss": 2.465, "step": 58880 }, { "epoch": 4.000883272183721, "grad_norm": 2.5468597412109375, "learning_rate": 5.000509580105993e-05, "loss": 2.3931, "step": 58885 }, { "epoch": 4.001222992254382, "grad_norm": 2.6865577697753906, "learning_rate": 5.000084930017666e-05, "loss": 2.2677, "step": 58890 }, { "epoch": 4.001562712325044, "grad_norm": 2.632936477661133, "learning_rate": 4.9996602799293385e-05, "loss": 2.497, "step": 58895 }, { "epoch": 4.001902432395706, "grad_norm": 3.131941556930542, "learning_rate": 4.999235629841011e-05, "loss": 2.4824, "step": 58900 }, { "epoch": 4.002242152466367, "grad_norm": 2.989290714263916, "learning_rate": 4.998810979752684e-05, "loss": 2.2979, "step": 58905 }, { "epoch": 4.0025818725370295, "grad_norm": 2.5623815059661865, "learning_rate": 4.998386329664357e-05, "loss": 2.4413, "step": 58910 }, { "epoch": 4.0029215926076915, "grad_norm": 3.1342861652374268, "learning_rate": 4.99796167957603e-05, "loss": 2.1873, "step": 58915 }, { "epoch": 4.003261312678353, "grad_norm": 3.128969192504883, "learning_rate": 4.9975370294877025e-05, "loss": 2.4875, "step": 58920 }, { "epoch": 4.003601032749015, "grad_norm": 2.8040993213653564, "learning_rate": 4.997112379399375e-05, "loss": 2.3738, "step": 58925 }, { "epoch": 4.003940752819677, "grad_norm": 3.8926424980163574, "learning_rate": 4.996687729311048e-05, "loss": 2.6745, "step": 58930 }, { "epoch": 4.004280472890338, "grad_norm": 3.678659677505493, "learning_rate": 4.99626307922272e-05, "loss": 2.3962, "step": 58935 }, { "epoch": 4.004620192961, "grad_norm": 2.5403995513916016, "learning_rate": 4.995838429134394e-05, "loss": 2.5782, "step": 58940 }, { "epoch": 4.004959913031662, "grad_norm": 2.5135223865509033, "learning_rate": 4.9954137790460665e-05, "loss": 2.3514, "step": 58945 }, { "epoch": 4.005299633102323, "grad_norm": 2.408252716064453, "learning_rate": 4.9949891289577386e-05, "loss": 2.4191, "step": 58950 }, { "epoch": 4.0056393531729855, "grad_norm": 3.1559362411499023, "learning_rate": 4.994564478869412e-05, "loss": 2.4109, "step": 58955 }, { "epoch": 4.0059790732436475, "grad_norm": 3.7142531871795654, "learning_rate": 4.994139828781085e-05, "loss": 2.5683, "step": 58960 }, { "epoch": 4.006318793314309, "grad_norm": 2.554807424545288, "learning_rate": 4.993715178692757e-05, "loss": 2.2777, "step": 58965 }, { "epoch": 4.006658513384971, "grad_norm": 2.8444716930389404, "learning_rate": 4.99329052860443e-05, "loss": 2.2729, "step": 58970 }, { "epoch": 4.006998233455633, "grad_norm": 2.9444286823272705, "learning_rate": 4.992865878516103e-05, "loss": 2.5397, "step": 58975 }, { "epoch": 4.007337953526294, "grad_norm": 3.1799817085266113, "learning_rate": 4.9924412284277754e-05, "loss": 2.4912, "step": 58980 }, { "epoch": 4.007677673596956, "grad_norm": 3.253129720687866, "learning_rate": 4.992016578339448e-05, "loss": 2.4069, "step": 58985 }, { "epoch": 4.008017393667618, "grad_norm": 2.667424440383911, "learning_rate": 4.991591928251122e-05, "loss": 2.4221, "step": 58990 }, { "epoch": 4.008357113738279, "grad_norm": 3.102471351623535, "learning_rate": 4.991167278162794e-05, "loss": 2.4696, "step": 58995 }, { "epoch": 4.0086968338089415, "grad_norm": 2.623683214187622, "learning_rate": 4.9907426280744666e-05, "loss": 2.61, "step": 59000 }, { "epoch": 4.009036553879604, "grad_norm": 2.8663322925567627, "learning_rate": 4.99031797798614e-05, "loss": 2.2619, "step": 59005 }, { "epoch": 4.009376273950265, "grad_norm": 2.528249740600586, "learning_rate": 4.989893327897812e-05, "loss": 2.2736, "step": 59010 }, { "epoch": 4.009715994020927, "grad_norm": 3.6821210384368896, "learning_rate": 4.989468677809485e-05, "loss": 2.1533, "step": 59015 }, { "epoch": 4.010055714091589, "grad_norm": 2.336988925933838, "learning_rate": 4.989044027721158e-05, "loss": 2.595, "step": 59020 }, { "epoch": 4.01039543416225, "grad_norm": 2.912609815597534, "learning_rate": 4.988619377632831e-05, "loss": 2.407, "step": 59025 }, { "epoch": 4.010735154232912, "grad_norm": 2.4963948726654053, "learning_rate": 4.9881947275445034e-05, "loss": 2.4048, "step": 59030 }, { "epoch": 4.011074874303574, "grad_norm": 2.675532341003418, "learning_rate": 4.987770077456176e-05, "loss": 2.3772, "step": 59035 }, { "epoch": 4.011414594374235, "grad_norm": 3.048546075820923, "learning_rate": 4.98734542736785e-05, "loss": 2.3747, "step": 59040 }, { "epoch": 4.0117543144448975, "grad_norm": 2.444716215133667, "learning_rate": 4.986920777279522e-05, "loss": 2.5404, "step": 59045 }, { "epoch": 4.01209403451556, "grad_norm": 3.7333076000213623, "learning_rate": 4.9864961271911946e-05, "loss": 2.4161, "step": 59050 }, { "epoch": 4.012433754586221, "grad_norm": 3.9596266746520996, "learning_rate": 4.9860714771028674e-05, "loss": 2.3857, "step": 59055 }, { "epoch": 4.012773474656883, "grad_norm": 2.785235643386841, "learning_rate": 4.98564682701454e-05, "loss": 2.5355, "step": 59060 }, { "epoch": 4.013113194727545, "grad_norm": 2.9114062786102295, "learning_rate": 4.985222176926213e-05, "loss": 2.6115, "step": 59065 }, { "epoch": 4.013452914798206, "grad_norm": 3.0119385719299316, "learning_rate": 4.984797526837886e-05, "loss": 2.4655, "step": 59070 }, { "epoch": 4.013792634868868, "grad_norm": 2.6152660846710205, "learning_rate": 4.9843728767495586e-05, "loss": 2.0423, "step": 59075 }, { "epoch": 4.01413235493953, "grad_norm": 3.435873031616211, "learning_rate": 4.9839482266612314e-05, "loss": 2.4988, "step": 59080 }, { "epoch": 4.014472075010191, "grad_norm": 3.0312845706939697, "learning_rate": 4.983523576572904e-05, "loss": 2.4289, "step": 59085 }, { "epoch": 4.0148117950808535, "grad_norm": 1.9366142749786377, "learning_rate": 4.983098926484577e-05, "loss": 2.5387, "step": 59090 }, { "epoch": 4.015151515151516, "grad_norm": 2.7029802799224854, "learning_rate": 4.98267427639625e-05, "loss": 2.4447, "step": 59095 }, { "epoch": 4.015491235222177, "grad_norm": 2.8013217449188232, "learning_rate": 4.9822496263079226e-05, "loss": 2.3878, "step": 59100 }, { "epoch": 4.015830955292839, "grad_norm": 3.6688296794891357, "learning_rate": 4.9818249762195954e-05, "loss": 2.2426, "step": 59105 }, { "epoch": 4.016170675363501, "grad_norm": 3.2591552734375, "learning_rate": 4.981400326131268e-05, "loss": 2.3684, "step": 59110 }, { "epoch": 4.016510395434162, "grad_norm": 3.2254931926727295, "learning_rate": 4.980975676042941e-05, "loss": 2.1964, "step": 59115 }, { "epoch": 4.016850115504824, "grad_norm": 3.299553632736206, "learning_rate": 4.980551025954613e-05, "loss": 2.3115, "step": 59120 }, { "epoch": 4.017189835575485, "grad_norm": 3.4789516925811768, "learning_rate": 4.9801263758662866e-05, "loss": 2.1946, "step": 59125 }, { "epoch": 4.017529555646147, "grad_norm": 2.1704213619232178, "learning_rate": 4.9797017257779594e-05, "loss": 2.5136, "step": 59130 }, { "epoch": 4.0178692757168095, "grad_norm": 2.9398162364959717, "learning_rate": 4.9792770756896316e-05, "loss": 2.3997, "step": 59135 }, { "epoch": 4.018208995787471, "grad_norm": 3.4074227809906006, "learning_rate": 4.978852425601305e-05, "loss": 2.4281, "step": 59140 }, { "epoch": 4.018548715858133, "grad_norm": 2.934795379638672, "learning_rate": 4.978427775512978e-05, "loss": 2.3671, "step": 59145 }, { "epoch": 4.018888435928795, "grad_norm": 2.7454164028167725, "learning_rate": 4.97800312542465e-05, "loss": 2.4423, "step": 59150 }, { "epoch": 4.019228155999456, "grad_norm": 2.129911422729492, "learning_rate": 4.977578475336323e-05, "loss": 2.5789, "step": 59155 }, { "epoch": 4.019567876070118, "grad_norm": 2.898912191390991, "learning_rate": 4.977153825247996e-05, "loss": 2.2228, "step": 59160 }, { "epoch": 4.01990759614078, "grad_norm": 2.3974990844726562, "learning_rate": 4.9767291751596684e-05, "loss": 2.4317, "step": 59165 }, { "epoch": 4.020247316211441, "grad_norm": 2.784306526184082, "learning_rate": 4.976304525071341e-05, "loss": 2.3885, "step": 59170 }, { "epoch": 4.0205870362821035, "grad_norm": 2.9203574657440186, "learning_rate": 4.9758798749830146e-05, "loss": 2.5365, "step": 59175 }, { "epoch": 4.0209267563527655, "grad_norm": 3.179939031600952, "learning_rate": 4.975455224894687e-05, "loss": 2.379, "step": 59180 }, { "epoch": 4.021266476423427, "grad_norm": 3.422158718109131, "learning_rate": 4.9750305748063596e-05, "loss": 2.6726, "step": 59185 }, { "epoch": 4.021606196494089, "grad_norm": 2.8335068225860596, "learning_rate": 4.9746059247180324e-05, "loss": 2.5812, "step": 59190 }, { "epoch": 4.021945916564751, "grad_norm": 2.716627836227417, "learning_rate": 4.974181274629706e-05, "loss": 2.49, "step": 59195 }, { "epoch": 4.022285636635412, "grad_norm": 3.1263587474823, "learning_rate": 4.973756624541378e-05, "loss": 2.3863, "step": 59200 }, { "epoch": 4.022625356706074, "grad_norm": 3.0342111587524414, "learning_rate": 4.973331974453051e-05, "loss": 2.4161, "step": 59205 }, { "epoch": 4.022965076776736, "grad_norm": 2.8974571228027344, "learning_rate": 4.972907324364724e-05, "loss": 2.414, "step": 59210 }, { "epoch": 4.023304796847397, "grad_norm": 3.133820056915283, "learning_rate": 4.9724826742763964e-05, "loss": 2.5814, "step": 59215 }, { "epoch": 4.0236445169180595, "grad_norm": 3.060497999191284, "learning_rate": 4.972058024188069e-05, "loss": 2.5631, "step": 59220 }, { "epoch": 4.0239842369887215, "grad_norm": 3.3352890014648438, "learning_rate": 4.971633374099742e-05, "loss": 2.4764, "step": 59225 }, { "epoch": 4.024323957059383, "grad_norm": 2.438228130340576, "learning_rate": 4.971208724011415e-05, "loss": 2.7213, "step": 59230 }, { "epoch": 4.024663677130045, "grad_norm": 2.7199227809906006, "learning_rate": 4.9707840739230876e-05, "loss": 2.6161, "step": 59235 }, { "epoch": 4.025003397200707, "grad_norm": 2.902850389480591, "learning_rate": 4.9703594238347604e-05, "loss": 2.4336, "step": 59240 }, { "epoch": 4.025343117271368, "grad_norm": 3.0033833980560303, "learning_rate": 4.969934773746433e-05, "loss": 2.4832, "step": 59245 }, { "epoch": 4.02568283734203, "grad_norm": 3.1259093284606934, "learning_rate": 4.969510123658106e-05, "loss": 2.6229, "step": 59250 }, { "epoch": 4.026022557412692, "grad_norm": 2.6891000270843506, "learning_rate": 4.969085473569779e-05, "loss": 2.4294, "step": 59255 }, { "epoch": 4.026362277483353, "grad_norm": 2.808104991912842, "learning_rate": 4.9686608234814516e-05, "loss": 2.4393, "step": 59260 }, { "epoch": 4.0267019975540155, "grad_norm": 2.885429620742798, "learning_rate": 4.9682361733931244e-05, "loss": 2.5774, "step": 59265 }, { "epoch": 4.0270417176246776, "grad_norm": 2.9449520111083984, "learning_rate": 4.967811523304797e-05, "loss": 2.4438, "step": 59270 }, { "epoch": 4.027381437695339, "grad_norm": 3.407860040664673, "learning_rate": 4.96738687321647e-05, "loss": 2.457, "step": 59275 }, { "epoch": 4.027721157766001, "grad_norm": 3.0393624305725098, "learning_rate": 4.966962223128143e-05, "loss": 2.3479, "step": 59280 }, { "epoch": 4.028060877836663, "grad_norm": 3.9852476119995117, "learning_rate": 4.9665375730398156e-05, "loss": 2.4245, "step": 59285 }, { "epoch": 4.028400597907324, "grad_norm": 2.8706400394439697, "learning_rate": 4.966112922951488e-05, "loss": 2.3289, "step": 59290 }, { "epoch": 4.028740317977986, "grad_norm": 3.731966972351074, "learning_rate": 4.965688272863161e-05, "loss": 2.2775, "step": 59295 }, { "epoch": 4.029080038048648, "grad_norm": 2.5284340381622314, "learning_rate": 4.965263622774834e-05, "loss": 2.3185, "step": 59300 }, { "epoch": 4.029419758119309, "grad_norm": 2.9247140884399414, "learning_rate": 4.964838972686506e-05, "loss": 2.3805, "step": 59305 }, { "epoch": 4.0297594781899715, "grad_norm": 3.593364715576172, "learning_rate": 4.9644143225981796e-05, "loss": 2.279, "step": 59310 }, { "epoch": 4.030099198260634, "grad_norm": 2.984907865524292, "learning_rate": 4.9639896725098524e-05, "loss": 2.3942, "step": 59315 }, { "epoch": 4.030438918331295, "grad_norm": 3.0270984172821045, "learning_rate": 4.9635650224215245e-05, "loss": 2.5443, "step": 59320 }, { "epoch": 4.030778638401957, "grad_norm": 3.0327742099761963, "learning_rate": 4.963140372333197e-05, "loss": 2.5541, "step": 59325 }, { "epoch": 4.031118358472619, "grad_norm": 2.697754144668579, "learning_rate": 4.962715722244871e-05, "loss": 2.5346, "step": 59330 }, { "epoch": 4.03145807854328, "grad_norm": 3.324732780456543, "learning_rate": 4.962291072156543e-05, "loss": 2.5629, "step": 59335 }, { "epoch": 4.031797798613942, "grad_norm": 2.9126954078674316, "learning_rate": 4.961866422068216e-05, "loss": 2.3913, "step": 59340 }, { "epoch": 4.032137518684604, "grad_norm": 3.589951276779175, "learning_rate": 4.961441771979889e-05, "loss": 2.4564, "step": 59345 }, { "epoch": 4.032477238755265, "grad_norm": 3.013169527053833, "learning_rate": 4.961017121891561e-05, "loss": 2.5377, "step": 59350 }, { "epoch": 4.0328169588259275, "grad_norm": 3.460083484649658, "learning_rate": 4.960592471803234e-05, "loss": 2.3307, "step": 59355 }, { "epoch": 4.03315667889659, "grad_norm": 3.4588675498962402, "learning_rate": 4.960167821714907e-05, "loss": 2.2938, "step": 59360 }, { "epoch": 4.033496398967251, "grad_norm": 3.0706658363342285, "learning_rate": 4.9597431716265804e-05, "loss": 2.3509, "step": 59365 }, { "epoch": 4.033836119037913, "grad_norm": 2.9235918521881104, "learning_rate": 4.9593185215382525e-05, "loss": 2.3964, "step": 59370 }, { "epoch": 4.034175839108575, "grad_norm": 2.559166669845581, "learning_rate": 4.958893871449925e-05, "loss": 2.4128, "step": 59375 }, { "epoch": 4.034515559179236, "grad_norm": 3.0398340225219727, "learning_rate": 4.958469221361599e-05, "loss": 2.6667, "step": 59380 }, { "epoch": 4.034855279249898, "grad_norm": 2.9460465908050537, "learning_rate": 4.958044571273271e-05, "loss": 2.5292, "step": 59385 }, { "epoch": 4.03519499932056, "grad_norm": 3.070042610168457, "learning_rate": 4.957619921184944e-05, "loss": 2.3891, "step": 59390 }, { "epoch": 4.035534719391221, "grad_norm": 3.291069269180298, "learning_rate": 4.9571952710966165e-05, "loss": 2.152, "step": 59395 }, { "epoch": 4.0358744394618835, "grad_norm": 3.1683754920959473, "learning_rate": 4.956770621008289e-05, "loss": 1.9438, "step": 59400 }, { "epoch": 4.036214159532546, "grad_norm": 2.4361844062805176, "learning_rate": 4.956345970919962e-05, "loss": 2.3325, "step": 59405 }, { "epoch": 4.036553879603207, "grad_norm": 2.6877169609069824, "learning_rate": 4.955921320831635e-05, "loss": 2.3319, "step": 59410 }, { "epoch": 4.036893599673869, "grad_norm": 3.140165328979492, "learning_rate": 4.955496670743308e-05, "loss": 2.3717, "step": 59415 }, { "epoch": 4.037233319744531, "grad_norm": 2.7695326805114746, "learning_rate": 4.9550720206549805e-05, "loss": 2.2694, "step": 59420 }, { "epoch": 4.037573039815192, "grad_norm": 3.0178303718566895, "learning_rate": 4.954647370566653e-05, "loss": 2.3815, "step": 59425 }, { "epoch": 4.037912759885854, "grad_norm": 3.6477930545806885, "learning_rate": 4.954222720478326e-05, "loss": 2.3522, "step": 59430 }, { "epoch": 4.038252479956516, "grad_norm": 2.831814765930176, "learning_rate": 4.953798070389999e-05, "loss": 2.3369, "step": 59435 }, { "epoch": 4.0385922000271774, "grad_norm": 2.8053371906280518, "learning_rate": 4.953373420301672e-05, "loss": 2.2765, "step": 59440 }, { "epoch": 4.0389319200978395, "grad_norm": 4.150099754333496, "learning_rate": 4.9529487702133445e-05, "loss": 2.4423, "step": 59445 }, { "epoch": 4.039271640168501, "grad_norm": 3.0700182914733887, "learning_rate": 4.952524120125017e-05, "loss": 2.4378, "step": 59450 }, { "epoch": 4.039611360239163, "grad_norm": 3.4223201274871826, "learning_rate": 4.95209947003669e-05, "loss": 2.6353, "step": 59455 }, { "epoch": 4.039951080309825, "grad_norm": 2.797960042953491, "learning_rate": 4.951674819948362e-05, "loss": 2.5547, "step": 59460 }, { "epoch": 4.040290800380486, "grad_norm": 2.90360164642334, "learning_rate": 4.951250169860036e-05, "loss": 2.1785, "step": 59465 }, { "epoch": 4.040630520451148, "grad_norm": 2.5161783695220947, "learning_rate": 4.9508255197717085e-05, "loss": 2.5902, "step": 59470 }, { "epoch": 4.04097024052181, "grad_norm": 2.4731028079986572, "learning_rate": 4.9504008696833807e-05, "loss": 2.4086, "step": 59475 }, { "epoch": 4.041309960592471, "grad_norm": 3.5543007850646973, "learning_rate": 4.949976219595054e-05, "loss": 2.4036, "step": 59480 }, { "epoch": 4.0416496806631335, "grad_norm": 2.9630398750305176, "learning_rate": 4.949551569506727e-05, "loss": 2.4695, "step": 59485 }, { "epoch": 4.0419894007337955, "grad_norm": 3.550905227661133, "learning_rate": 4.949126919418399e-05, "loss": 2.5043, "step": 59490 }, { "epoch": 4.042329120804457, "grad_norm": 4.316811561584473, "learning_rate": 4.948702269330072e-05, "loss": 2.5444, "step": 59495 }, { "epoch": 4.042668840875119, "grad_norm": 3.516657829284668, "learning_rate": 4.9482776192417453e-05, "loss": 2.6724, "step": 59500 }, { "epoch": 4.043008560945781, "grad_norm": 2.650284767150879, "learning_rate": 4.9478529691534175e-05, "loss": 2.6491, "step": 59505 }, { "epoch": 4.043348281016442, "grad_norm": 3.3808727264404297, "learning_rate": 4.94742831906509e-05, "loss": 2.2719, "step": 59510 }, { "epoch": 4.043688001087104, "grad_norm": 3.4440083503723145, "learning_rate": 4.947003668976764e-05, "loss": 2.4159, "step": 59515 }, { "epoch": 4.044027721157766, "grad_norm": 3.014338493347168, "learning_rate": 4.946579018888436e-05, "loss": 2.3551, "step": 59520 }, { "epoch": 4.044367441228427, "grad_norm": 2.8594141006469727, "learning_rate": 4.946154368800109e-05, "loss": 2.2666, "step": 59525 }, { "epoch": 4.0447071612990895, "grad_norm": 2.696502447128296, "learning_rate": 4.945729718711782e-05, "loss": 2.4571, "step": 59530 }, { "epoch": 4.0450468813697515, "grad_norm": NaN, "learning_rate": 4.9453899986411204e-05, "loss": 2.6616, "step": 59535 }, { "epoch": 4.045386601440413, "grad_norm": 2.4095568656921387, "learning_rate": 4.9449653485527925e-05, "loss": 2.6221, "step": 59540 }, { "epoch": 4.045726321511075, "grad_norm": 2.7967424392700195, "learning_rate": 4.944540698464465e-05, "loss": 2.3455, "step": 59545 }, { "epoch": 4.046066041581737, "grad_norm": 3.6269118785858154, "learning_rate": 4.944116048376138e-05, "loss": 2.1953, "step": 59550 }, { "epoch": 4.046405761652398, "grad_norm": 3.205711841583252, "learning_rate": 4.943691398287811e-05, "loss": 2.2873, "step": 59555 }, { "epoch": 4.04674548172306, "grad_norm": 2.980700731277466, "learning_rate": 4.943266748199484e-05, "loss": 2.6479, "step": 59560 }, { "epoch": 4.047085201793722, "grad_norm": 3.384686231613159, "learning_rate": 4.9428420981111565e-05, "loss": 2.4557, "step": 59565 }, { "epoch": 4.047424921864383, "grad_norm": 3.713870048522949, "learning_rate": 4.94241744802283e-05, "loss": 2.0903, "step": 59570 }, { "epoch": 4.0477646419350455, "grad_norm": 2.7880330085754395, "learning_rate": 4.941992797934502e-05, "loss": 2.2959, "step": 59575 }, { "epoch": 4.048104362005708, "grad_norm": 2.7474260330200195, "learning_rate": 4.941568147846175e-05, "loss": 2.4608, "step": 59580 }, { "epoch": 4.048444082076369, "grad_norm": 2.922266960144043, "learning_rate": 4.941143497757848e-05, "loss": 2.4437, "step": 59585 }, { "epoch": 4.048783802147031, "grad_norm": 2.8379011154174805, "learning_rate": 4.9407188476695205e-05, "loss": 2.3342, "step": 59590 }, { "epoch": 4.049123522217693, "grad_norm": 2.9333457946777344, "learning_rate": 4.940294197581193e-05, "loss": 2.3537, "step": 59595 }, { "epoch": 4.049463242288354, "grad_norm": 3.104640007019043, "learning_rate": 4.939869547492866e-05, "loss": 2.5866, "step": 59600 }, { "epoch": 4.049802962359016, "grad_norm": 3.6317343711853027, "learning_rate": 4.939444897404539e-05, "loss": 2.4863, "step": 59605 }, { "epoch": 4.050142682429678, "grad_norm": 3.1450424194335938, "learning_rate": 4.939020247316212e-05, "loss": 2.5062, "step": 59610 }, { "epoch": 4.050482402500339, "grad_norm": 3.064044237136841, "learning_rate": 4.9385955972278845e-05, "loss": 2.4509, "step": 59615 }, { "epoch": 4.0508221225710015, "grad_norm": 2.9796605110168457, "learning_rate": 4.938170947139557e-05, "loss": 2.4944, "step": 59620 }, { "epoch": 4.051161842641664, "grad_norm": 3.816563129425049, "learning_rate": 4.93774629705123e-05, "loss": 2.2602, "step": 59625 }, { "epoch": 4.051501562712325, "grad_norm": 2.7338907718658447, "learning_rate": 4.937321646962903e-05, "loss": 2.4287, "step": 59630 }, { "epoch": 4.051841282782987, "grad_norm": 3.4042253494262695, "learning_rate": 4.936896996874576e-05, "loss": 2.7155, "step": 59635 }, { "epoch": 4.052181002853649, "grad_norm": 2.1377205848693848, "learning_rate": 4.9364723467862485e-05, "loss": 2.2926, "step": 59640 }, { "epoch": 4.05252072292431, "grad_norm": 2.704758405685425, "learning_rate": 4.936047696697921e-05, "loss": 2.4173, "step": 59645 }, { "epoch": 4.052860442994972, "grad_norm": 3.826904296875, "learning_rate": 4.9356230466095934e-05, "loss": 2.3838, "step": 59650 }, { "epoch": 4.053200163065634, "grad_norm": 3.0052616596221924, "learning_rate": 4.935198396521267e-05, "loss": 2.4783, "step": 59655 }, { "epoch": 4.053539883136295, "grad_norm": 2.8672709465026855, "learning_rate": 4.93477374643294e-05, "loss": 2.4077, "step": 59660 }, { "epoch": 4.0538796032069575, "grad_norm": 3.1192822456359863, "learning_rate": 4.934349096344612e-05, "loss": 2.538, "step": 59665 }, { "epoch": 4.05421932327762, "grad_norm": 2.976034164428711, "learning_rate": 4.933924446256285e-05, "loss": 2.53, "step": 59670 }, { "epoch": 4.054559043348281, "grad_norm": 3.069601535797119, "learning_rate": 4.933499796167958e-05, "loss": 2.4751, "step": 59675 }, { "epoch": 4.054898763418943, "grad_norm": 2.717442750930786, "learning_rate": 4.93307514607963e-05, "loss": 2.5055, "step": 59680 }, { "epoch": 4.055238483489605, "grad_norm": 3.0338358879089355, "learning_rate": 4.932650495991303e-05, "loss": 2.3332, "step": 59685 }, { "epoch": 4.055578203560266, "grad_norm": 2.9877748489379883, "learning_rate": 4.9322258459029765e-05, "loss": 2.3822, "step": 59690 }, { "epoch": 4.055917923630928, "grad_norm": 3.5536794662475586, "learning_rate": 4.9318011958146487e-05, "loss": 2.3882, "step": 59695 }, { "epoch": 4.05625764370159, "grad_norm": 3.1737923622131348, "learning_rate": 4.9313765457263215e-05, "loss": 2.1893, "step": 59700 }, { "epoch": 4.056597363772251, "grad_norm": 2.6146697998046875, "learning_rate": 4.930951895637995e-05, "loss": 2.2398, "step": 59705 }, { "epoch": 4.0569370838429135, "grad_norm": 3.165436267852783, "learning_rate": 4.930527245549667e-05, "loss": 2.3701, "step": 59710 }, { "epoch": 4.057276803913576, "grad_norm": 2.8493051528930664, "learning_rate": 4.93010259546134e-05, "loss": 2.5238, "step": 59715 }, { "epoch": 4.057616523984237, "grad_norm": 2.414116621017456, "learning_rate": 4.9296779453730127e-05, "loss": 2.4699, "step": 59720 }, { "epoch": 4.057956244054899, "grad_norm": 2.7144229412078857, "learning_rate": 4.9292532952846855e-05, "loss": 2.4718, "step": 59725 }, { "epoch": 4.058295964125561, "grad_norm": 3.100719451904297, "learning_rate": 4.928828645196358e-05, "loss": 2.4235, "step": 59730 }, { "epoch": 4.058635684196222, "grad_norm": 2.7118794918060303, "learning_rate": 4.928403995108031e-05, "loss": 2.4392, "step": 59735 }, { "epoch": 4.058975404266884, "grad_norm": 3.306854486465454, "learning_rate": 4.9279793450197045e-05, "loss": 2.5933, "step": 59740 }, { "epoch": 4.059315124337546, "grad_norm": 3.0299148559570312, "learning_rate": 4.927554694931377e-05, "loss": 2.5977, "step": 59745 }, { "epoch": 4.0596548444082075, "grad_norm": 3.2372493743896484, "learning_rate": 4.9271300448430495e-05, "loss": 2.4737, "step": 59750 }, { "epoch": 4.0599945644788695, "grad_norm": 2.5214662551879883, "learning_rate": 4.926705394754722e-05, "loss": 2.4573, "step": 59755 }, { "epoch": 4.060334284549532, "grad_norm": 2.494331121444702, "learning_rate": 4.926280744666395e-05, "loss": 2.4191, "step": 59760 }, { "epoch": 4.060674004620193, "grad_norm": 3.159684419631958, "learning_rate": 4.925856094578068e-05, "loss": 2.2857, "step": 59765 }, { "epoch": 4.061013724690855, "grad_norm": 2.407684087753296, "learning_rate": 4.925431444489741e-05, "loss": 2.4282, "step": 59770 }, { "epoch": 4.061353444761517, "grad_norm": 3.253866672515869, "learning_rate": 4.9250067944014135e-05, "loss": 2.3793, "step": 59775 }, { "epoch": 4.061693164832178, "grad_norm": 2.4974169731140137, "learning_rate": 4.924582144313086e-05, "loss": 2.3803, "step": 59780 }, { "epoch": 4.06203288490284, "grad_norm": 3.5790319442749023, "learning_rate": 4.924157494224759e-05, "loss": 2.3799, "step": 59785 }, { "epoch": 4.062372604973502, "grad_norm": 2.9087448120117188, "learning_rate": 4.923732844136432e-05, "loss": 2.5172, "step": 59790 }, { "epoch": 4.0627123250441635, "grad_norm": 3.58975887298584, "learning_rate": 4.923308194048105e-05, "loss": 2.598, "step": 59795 }, { "epoch": 4.0630520451148255, "grad_norm": 2.9620561599731445, "learning_rate": 4.9228835439597775e-05, "loss": 2.3819, "step": 59800 }, { "epoch": 4.063391765185487, "grad_norm": 2.3780672550201416, "learning_rate": 4.92245889387145e-05, "loss": 2.577, "step": 59805 }, { "epoch": 4.063731485256149, "grad_norm": 3.1207010746002197, "learning_rate": 4.922034243783123e-05, "loss": 2.356, "step": 59810 }, { "epoch": 4.064071205326811, "grad_norm": 3.1078407764434814, "learning_rate": 4.921609593694796e-05, "loss": 2.3451, "step": 59815 }, { "epoch": 4.064410925397472, "grad_norm": 2.2046051025390625, "learning_rate": 4.921184943606468e-05, "loss": 2.3677, "step": 59820 }, { "epoch": 4.064750645468134, "grad_norm": 3.3880088329315186, "learning_rate": 4.9207602935181415e-05, "loss": 2.3978, "step": 59825 }, { "epoch": 4.065090365538796, "grad_norm": 3.2193603515625, "learning_rate": 4.920335643429814e-05, "loss": 2.2442, "step": 59830 }, { "epoch": 4.065430085609457, "grad_norm": 3.0957729816436768, "learning_rate": 4.9199109933414864e-05, "loss": 2.2292, "step": 59835 }, { "epoch": 4.0657698056801195, "grad_norm": 3.046290159225464, "learning_rate": 4.91948634325316e-05, "loss": 2.7183, "step": 59840 }, { "epoch": 4.0661095257507816, "grad_norm": 2.931311845779419, "learning_rate": 4.919061693164833e-05, "loss": 2.6021, "step": 59845 }, { "epoch": 4.066449245821443, "grad_norm": 3.5181095600128174, "learning_rate": 4.918637043076505e-05, "loss": 2.3215, "step": 59850 }, { "epoch": 4.066788965892105, "grad_norm": 4.055309295654297, "learning_rate": 4.9182123929881776e-05, "loss": 2.0197, "step": 59855 }, { "epoch": 4.067128685962767, "grad_norm": 2.852229356765747, "learning_rate": 4.917787742899851e-05, "loss": 2.4069, "step": 59860 }, { "epoch": 4.067468406033428, "grad_norm": 2.8353123664855957, "learning_rate": 4.917363092811523e-05, "loss": 2.3575, "step": 59865 }, { "epoch": 4.06780812610409, "grad_norm": 3.1614484786987305, "learning_rate": 4.916938442723196e-05, "loss": 2.5791, "step": 59870 }, { "epoch": 4.068147846174752, "grad_norm": 3.122732400894165, "learning_rate": 4.9165137926348695e-05, "loss": 2.5858, "step": 59875 }, { "epoch": 4.068487566245413, "grad_norm": 3.3115060329437256, "learning_rate": 4.9160891425465416e-05, "loss": 2.368, "step": 59880 }, { "epoch": 4.0688272863160755, "grad_norm": 3.2425026893615723, "learning_rate": 4.9156644924582144e-05, "loss": 2.3882, "step": 59885 }, { "epoch": 4.069167006386738, "grad_norm": 2.9108800888061523, "learning_rate": 4.915239842369888e-05, "loss": 2.5179, "step": 59890 }, { "epoch": 4.069506726457399, "grad_norm": 3.4623429775238037, "learning_rate": 4.91481519228156e-05, "loss": 2.4165, "step": 59895 }, { "epoch": 4.069846446528061, "grad_norm": 2.8131282329559326, "learning_rate": 4.914390542193233e-05, "loss": 2.6304, "step": 59900 }, { "epoch": 4.070186166598723, "grad_norm": 2.5190391540527344, "learning_rate": 4.9139658921049056e-05, "loss": 2.617, "step": 59905 }, { "epoch": 4.070525886669384, "grad_norm": 3.0548057556152344, "learning_rate": 4.913541242016579e-05, "loss": 2.313, "step": 59910 }, { "epoch": 4.070865606740046, "grad_norm": 3.2941553592681885, "learning_rate": 4.913116591928251e-05, "loss": 2.6135, "step": 59915 }, { "epoch": 4.071205326810708, "grad_norm": 4.066824436187744, "learning_rate": 4.912691941839924e-05, "loss": 2.4735, "step": 59920 }, { "epoch": 4.071545046881369, "grad_norm": 2.3407092094421387, "learning_rate": 4.9122672917515975e-05, "loss": 2.4256, "step": 59925 }, { "epoch": 4.0718847669520315, "grad_norm": 3.7032787799835205, "learning_rate": 4.9118426416632696e-05, "loss": 2.63, "step": 59930 }, { "epoch": 4.072224487022694, "grad_norm": 2.6338682174682617, "learning_rate": 4.9114179915749424e-05, "loss": 2.5008, "step": 59935 }, { "epoch": 4.072564207093355, "grad_norm": 3.1577556133270264, "learning_rate": 4.910993341486615e-05, "loss": 2.5631, "step": 59940 }, { "epoch": 4.072903927164017, "grad_norm": 3.175938606262207, "learning_rate": 4.910568691398288e-05, "loss": 2.6413, "step": 59945 }, { "epoch": 4.073243647234679, "grad_norm": 3.139289617538452, "learning_rate": 4.910144041309961e-05, "loss": 2.4324, "step": 59950 }, { "epoch": 4.07358336730534, "grad_norm": 3.5684292316436768, "learning_rate": 4.9097193912216336e-05, "loss": 2.4231, "step": 59955 }, { "epoch": 4.073923087376002, "grad_norm": 3.1667282581329346, "learning_rate": 4.9092947411333064e-05, "loss": 2.3682, "step": 59960 }, { "epoch": 4.074262807446664, "grad_norm": 3.132070779800415, "learning_rate": 4.908870091044979e-05, "loss": 1.943, "step": 59965 }, { "epoch": 4.074602527517325, "grad_norm": 3.7294094562530518, "learning_rate": 4.908445440956652e-05, "loss": 2.3416, "step": 59970 }, { "epoch": 4.0749422475879875, "grad_norm": 3.1856284141540527, "learning_rate": 4.908020790868325e-05, "loss": 2.415, "step": 59975 }, { "epoch": 4.07528196765865, "grad_norm": 2.736480236053467, "learning_rate": 4.9075961407799976e-05, "loss": 2.5534, "step": 59980 }, { "epoch": 4.075621687729311, "grad_norm": 2.7113232612609863, "learning_rate": 4.9071714906916704e-05, "loss": 2.3039, "step": 59985 }, { "epoch": 4.075961407799973, "grad_norm": 2.8674519062042236, "learning_rate": 4.906746840603343e-05, "loss": 2.4276, "step": 59990 }, { "epoch": 4.076301127870635, "grad_norm": 3.274749517440796, "learning_rate": 4.906322190515016e-05, "loss": 2.2183, "step": 59995 }, { "epoch": 4.076640847941296, "grad_norm": 3.1670238971710205, "learning_rate": 4.905897540426689e-05, "loss": 2.5745, "step": 60000 }, { "epoch": 4.076980568011958, "grad_norm": 2.9468772411346436, "learning_rate": 4.905472890338361e-05, "loss": 2.323, "step": 60005 }, { "epoch": 4.07732028808262, "grad_norm": 2.840330123901367, "learning_rate": 4.9050482402500344e-05, "loss": 2.26, "step": 60010 }, { "epoch": 4.0776600081532814, "grad_norm": 3.5283946990966797, "learning_rate": 4.904623590161707e-05, "loss": 2.2175, "step": 60015 }, { "epoch": 4.0779997282239435, "grad_norm": 4.566617488861084, "learning_rate": 4.9041989400733794e-05, "loss": 2.2024, "step": 60020 }, { "epoch": 4.078339448294606, "grad_norm": 3.057441473007202, "learning_rate": 4.903774289985053e-05, "loss": 2.2985, "step": 60025 }, { "epoch": 4.078679168365267, "grad_norm": 2.7651236057281494, "learning_rate": 4.9033496398967256e-05, "loss": 2.2947, "step": 60030 }, { "epoch": 4.079018888435929, "grad_norm": 2.9994888305664062, "learning_rate": 4.902924989808398e-05, "loss": 2.2362, "step": 60035 }, { "epoch": 4.079358608506591, "grad_norm": 3.8225176334381104, "learning_rate": 4.9025003397200706e-05, "loss": 2.4471, "step": 60040 }, { "epoch": 4.079698328577252, "grad_norm": 3.201901912689209, "learning_rate": 4.902075689631744e-05, "loss": 2.5354, "step": 60045 }, { "epoch": 4.080038048647914, "grad_norm": 2.6478800773620605, "learning_rate": 4.901651039543416e-05, "loss": 2.3648, "step": 60050 }, { "epoch": 4.080377768718576, "grad_norm": 2.628980875015259, "learning_rate": 4.901226389455089e-05, "loss": 2.606, "step": 60055 }, { "epoch": 4.0807174887892375, "grad_norm": 2.29213547706604, "learning_rate": 4.9008017393667624e-05, "loss": 2.4431, "step": 60060 }, { "epoch": 4.0810572088598995, "grad_norm": 3.58919095993042, "learning_rate": 4.9003770892784346e-05, "loss": 2.3575, "step": 60065 }, { "epoch": 4.081396928930562, "grad_norm": 3.9363019466400146, "learning_rate": 4.8999524391901074e-05, "loss": 2.3269, "step": 60070 }, { "epoch": 4.081736649001223, "grad_norm": 3.747680425643921, "learning_rate": 4.89952778910178e-05, "loss": 2.3765, "step": 60075 }, { "epoch": 4.082076369071885, "grad_norm": 3.353414297103882, "learning_rate": 4.8991031390134536e-05, "loss": 2.3442, "step": 60080 }, { "epoch": 4.082416089142547, "grad_norm": 2.819511651992798, "learning_rate": 4.898678488925126e-05, "loss": 2.6219, "step": 60085 }, { "epoch": 4.082755809213208, "grad_norm": 2.9603986740112305, "learning_rate": 4.8982538388367986e-05, "loss": 2.6105, "step": 60090 }, { "epoch": 4.08309552928387, "grad_norm": 3.2774176597595215, "learning_rate": 4.897829188748472e-05, "loss": 2.7183, "step": 60095 }, { "epoch": 4.083435249354532, "grad_norm": 2.930645227432251, "learning_rate": 4.897404538660144e-05, "loss": 2.2603, "step": 60100 }, { "epoch": 4.0837749694251935, "grad_norm": 2.9907803535461426, "learning_rate": 4.896979888571817e-05, "loss": 2.2982, "step": 60105 }, { "epoch": 4.0841146894958555, "grad_norm": 2.7741596698760986, "learning_rate": 4.89655523848349e-05, "loss": 2.5249, "step": 60110 }, { "epoch": 4.084454409566518, "grad_norm": 3.111973285675049, "learning_rate": 4.8961305883951626e-05, "loss": 2.4833, "step": 60115 }, { "epoch": 4.084794129637179, "grad_norm": 2.5055809020996094, "learning_rate": 4.8957059383068354e-05, "loss": 2.4954, "step": 60120 }, { "epoch": 4.085133849707841, "grad_norm": 3.7215795516967773, "learning_rate": 4.895281288218508e-05, "loss": 2.5335, "step": 60125 }, { "epoch": 4.085473569778502, "grad_norm": 3.253033399581909, "learning_rate": 4.894856638130181e-05, "loss": 2.4984, "step": 60130 }, { "epoch": 4.085813289849164, "grad_norm": 3.7112371921539307, "learning_rate": 4.894431988041854e-05, "loss": 2.7546, "step": 60135 }, { "epoch": 4.086153009919826, "grad_norm": 2.892789363861084, "learning_rate": 4.8940073379535266e-05, "loss": 2.2908, "step": 60140 }, { "epoch": 4.086492729990487, "grad_norm": 2.9873971939086914, "learning_rate": 4.8935826878651994e-05, "loss": 2.4377, "step": 60145 }, { "epoch": 4.0868324500611495, "grad_norm": 3.0576882362365723, "learning_rate": 4.893158037776872e-05, "loss": 2.4045, "step": 60150 }, { "epoch": 4.0871721701318116, "grad_norm": 2.434372901916504, "learning_rate": 4.892733387688545e-05, "loss": 2.7135, "step": 60155 }, { "epoch": 4.087511890202473, "grad_norm": 3.021226167678833, "learning_rate": 4.892308737600218e-05, "loss": 2.2806, "step": 60160 }, { "epoch": 4.087851610273135, "grad_norm": 2.8894059658050537, "learning_rate": 4.8918840875118906e-05, "loss": 2.4325, "step": 60165 }, { "epoch": 4.088191330343797, "grad_norm": 3.362435817718506, "learning_rate": 4.8914594374235634e-05, "loss": 2.3622, "step": 60170 }, { "epoch": 4.088531050414458, "grad_norm": 3.3236804008483887, "learning_rate": 4.8910347873352355e-05, "loss": 2.3622, "step": 60175 }, { "epoch": 4.08887077048512, "grad_norm": 3.3066012859344482, "learning_rate": 4.890610137246909e-05, "loss": 2.6578, "step": 60180 }, { "epoch": 4.089210490555782, "grad_norm": 3.1041979789733887, "learning_rate": 4.890185487158582e-05, "loss": 2.2788, "step": 60185 }, { "epoch": 4.089550210626443, "grad_norm": 3.004852294921875, "learning_rate": 4.889760837070254e-05, "loss": 2.3657, "step": 60190 }, { "epoch": 4.0898899306971055, "grad_norm": 3.203045606613159, "learning_rate": 4.8893361869819274e-05, "loss": 2.5637, "step": 60195 }, { "epoch": 4.090229650767768, "grad_norm": 2.7521958351135254, "learning_rate": 4.8889115368936e-05, "loss": 2.3971, "step": 60200 }, { "epoch": 4.090569370838429, "grad_norm": 4.515625, "learning_rate": 4.888486886805272e-05, "loss": 2.6292, "step": 60205 }, { "epoch": 4.090909090909091, "grad_norm": 2.825615882873535, "learning_rate": 4.888062236716945e-05, "loss": 2.2864, "step": 60210 }, { "epoch": 4.091248810979753, "grad_norm": 3.500121593475342, "learning_rate": 4.8876375866286186e-05, "loss": 2.4604, "step": 60215 }, { "epoch": 4.091588531050414, "grad_norm": 3.386366128921509, "learning_rate": 4.887212936540291e-05, "loss": 2.3903, "step": 60220 }, { "epoch": 4.091928251121076, "grad_norm": 2.645892381668091, "learning_rate": 4.8867882864519635e-05, "loss": 2.5055, "step": 60225 }, { "epoch": 4.092267971191738, "grad_norm": 3.009577751159668, "learning_rate": 4.886363636363637e-05, "loss": 2.5265, "step": 60230 }, { "epoch": 4.092607691262399, "grad_norm": 2.8112881183624268, "learning_rate": 4.885938986275309e-05, "loss": 2.3519, "step": 60235 }, { "epoch": 4.0929474113330615, "grad_norm": 3.439147472381592, "learning_rate": 4.885514336186982e-05, "loss": 2.2524, "step": 60240 }, { "epoch": 4.093287131403724, "grad_norm": 3.2338924407958984, "learning_rate": 4.885089686098655e-05, "loss": 2.3314, "step": 60245 }, { "epoch": 4.093626851474385, "grad_norm": 3.674315929412842, "learning_rate": 4.884665036010328e-05, "loss": 2.1449, "step": 60250 }, { "epoch": 4.093966571545047, "grad_norm": 3.002544641494751, "learning_rate": 4.884240385922e-05, "loss": 2.5323, "step": 60255 }, { "epoch": 4.094306291615709, "grad_norm": 3.7946507930755615, "learning_rate": 4.883815735833673e-05, "loss": 2.5537, "step": 60260 }, { "epoch": 4.09464601168637, "grad_norm": 2.772733688354492, "learning_rate": 4.8833910857453466e-05, "loss": 2.4833, "step": 60265 }, { "epoch": 4.094985731757032, "grad_norm": 3.442469596862793, "learning_rate": 4.882966435657019e-05, "loss": 2.4095, "step": 60270 }, { "epoch": 4.095325451827694, "grad_norm": 3.2122550010681152, "learning_rate": 4.8825417855686915e-05, "loss": 2.0841, "step": 60275 }, { "epoch": 4.095665171898355, "grad_norm": 3.091484785079956, "learning_rate": 4.882117135480364e-05, "loss": 2.4868, "step": 60280 }, { "epoch": 4.0960048919690175, "grad_norm": 2.4777045249938965, "learning_rate": 4.881692485392037e-05, "loss": 2.2707, "step": 60285 }, { "epoch": 4.09634461203968, "grad_norm": 2.8536975383758545, "learning_rate": 4.88126783530371e-05, "loss": 2.4306, "step": 60290 }, { "epoch": 4.096684332110341, "grad_norm": 3.16047739982605, "learning_rate": 4.880843185215383e-05, "loss": 2.4272, "step": 60295 }, { "epoch": 4.097024052181003, "grad_norm": 3.3307807445526123, "learning_rate": 4.8804185351270555e-05, "loss": 2.5372, "step": 60300 }, { "epoch": 4.097363772251665, "grad_norm": 3.2860147953033447, "learning_rate": 4.879993885038728e-05, "loss": 2.5886, "step": 60305 }, { "epoch": 4.097703492322326, "grad_norm": 2.499667167663574, "learning_rate": 4.879569234950401e-05, "loss": 2.2358, "step": 60310 }, { "epoch": 4.098043212392988, "grad_norm": 2.9086825847625732, "learning_rate": 4.879144584862074e-05, "loss": 2.5133, "step": 60315 }, { "epoch": 4.09838293246365, "grad_norm": 2.8238513469696045, "learning_rate": 4.878719934773747e-05, "loss": 2.283, "step": 60320 }, { "epoch": 4.0987226525343115, "grad_norm": 3.683941602706909, "learning_rate": 4.8782952846854195e-05, "loss": 2.4633, "step": 60325 }, { "epoch": 4.0990623726049735, "grad_norm": 3.1900875568389893, "learning_rate": 4.877870634597092e-05, "loss": 2.3599, "step": 60330 }, { "epoch": 4.099402092675636, "grad_norm": 3.703275680541992, "learning_rate": 4.877445984508765e-05, "loss": 2.312, "step": 60335 }, { "epoch": 4.099741812746297, "grad_norm": 3.0082828998565674, "learning_rate": 4.877021334420438e-05, "loss": 2.5429, "step": 60340 }, { "epoch": 4.100081532816959, "grad_norm": 2.70430850982666, "learning_rate": 4.87659668433211e-05, "loss": 2.4684, "step": 60345 }, { "epoch": 4.100421252887621, "grad_norm": 2.829576015472412, "learning_rate": 4.8761720342437835e-05, "loss": 2.5698, "step": 60350 }, { "epoch": 4.100760972958282, "grad_norm": 2.7942137718200684, "learning_rate": 4.875747384155456e-05, "loss": 2.5272, "step": 60355 }, { "epoch": 4.101100693028944, "grad_norm": 2.7560038566589355, "learning_rate": 4.8753227340671284e-05, "loss": 2.3833, "step": 60360 }, { "epoch": 4.101440413099606, "grad_norm": 2.8150875568389893, "learning_rate": 4.874898083978802e-05, "loss": 2.4762, "step": 60365 }, { "epoch": 4.1017801331702675, "grad_norm": 3.4517502784729004, "learning_rate": 4.874473433890475e-05, "loss": 2.3837, "step": 60370 }, { "epoch": 4.1021198532409295, "grad_norm": 3.136716365814209, "learning_rate": 4.874048783802147e-05, "loss": 2.4681, "step": 60375 }, { "epoch": 4.102459573311592, "grad_norm": 2.9970555305480957, "learning_rate": 4.8736241337138197e-05, "loss": 2.5843, "step": 60380 }, { "epoch": 4.102799293382253, "grad_norm": 2.8863306045532227, "learning_rate": 4.873199483625493e-05, "loss": 2.3354, "step": 60385 }, { "epoch": 4.103139013452915, "grad_norm": 3.4435713291168213, "learning_rate": 4.872774833537165e-05, "loss": 2.3878, "step": 60390 }, { "epoch": 4.103478733523577, "grad_norm": 3.2138473987579346, "learning_rate": 4.872350183448838e-05, "loss": 2.2574, "step": 60395 }, { "epoch": 4.103818453594238, "grad_norm": 2.7724993228912354, "learning_rate": 4.8719255333605115e-05, "loss": 2.2692, "step": 60400 }, { "epoch": 4.1041581736649, "grad_norm": 3.660888910293579, "learning_rate": 4.8715008832721837e-05, "loss": 2.5478, "step": 60405 }, { "epoch": 4.104497893735562, "grad_norm": 2.568681240081787, "learning_rate": 4.8710762331838565e-05, "loss": 2.3065, "step": 60410 }, { "epoch": 4.1048376138062235, "grad_norm": 3.4268453121185303, "learning_rate": 4.87065158309553e-05, "loss": 2.5239, "step": 60415 }, { "epoch": 4.1051773338768855, "grad_norm": 3.2121775150299072, "learning_rate": 4.870226933007203e-05, "loss": 2.3996, "step": 60420 }, { "epoch": 4.105517053947548, "grad_norm": 3.609052896499634, "learning_rate": 4.869802282918875e-05, "loss": 2.4423, "step": 60425 }, { "epoch": 4.105856774018209, "grad_norm": 2.7088003158569336, "learning_rate": 4.8693776328305477e-05, "loss": 2.311, "step": 60430 }, { "epoch": 4.106196494088871, "grad_norm": 3.174384593963623, "learning_rate": 4.868952982742221e-05, "loss": 2.097, "step": 60435 }, { "epoch": 4.106536214159533, "grad_norm": 3.051656484603882, "learning_rate": 4.868528332653893e-05, "loss": 2.4772, "step": 60440 }, { "epoch": 4.106875934230194, "grad_norm": 2.749368190765381, "learning_rate": 4.868103682565566e-05, "loss": 2.4128, "step": 60445 }, { "epoch": 4.107215654300856, "grad_norm": 2.989514112472534, "learning_rate": 4.8676790324772395e-05, "loss": 2.6009, "step": 60450 }, { "epoch": 4.107555374371518, "grad_norm": 3.0031609535217285, "learning_rate": 4.867254382388912e-05, "loss": 2.3507, "step": 60455 }, { "epoch": 4.1078950944421795, "grad_norm": 3.064324378967285, "learning_rate": 4.8668297323005845e-05, "loss": 2.3232, "step": 60460 }, { "epoch": 4.108234814512842, "grad_norm": 3.4737401008605957, "learning_rate": 4.866405082212257e-05, "loss": 2.3478, "step": 60465 }, { "epoch": 4.108574534583504, "grad_norm": 3.441147804260254, "learning_rate": 4.86598043212393e-05, "loss": 2.3514, "step": 60470 }, { "epoch": 4.108914254654165, "grad_norm": 4.005356311798096, "learning_rate": 4.865555782035603e-05, "loss": 2.5509, "step": 60475 }, { "epoch": 4.109253974724827, "grad_norm": 3.0354933738708496, "learning_rate": 4.865131131947276e-05, "loss": 2.5444, "step": 60480 }, { "epoch": 4.109593694795488, "grad_norm": 2.8094358444213867, "learning_rate": 4.8647064818589485e-05, "loss": 2.5136, "step": 60485 }, { "epoch": 4.10993341486615, "grad_norm": 2.905991792678833, "learning_rate": 4.864281831770621e-05, "loss": 2.6987, "step": 60490 }, { "epoch": 4.110273134936812, "grad_norm": 3.3472859859466553, "learning_rate": 4.863857181682294e-05, "loss": 2.4658, "step": 60495 }, { "epoch": 4.110612855007473, "grad_norm": 3.209939956665039, "learning_rate": 4.863432531593967e-05, "loss": 2.1234, "step": 60500 }, { "epoch": 4.1109525750781355, "grad_norm": 2.6716151237487793, "learning_rate": 4.86300788150564e-05, "loss": 2.4666, "step": 60505 }, { "epoch": 4.111292295148798, "grad_norm": 3.8260669708251953, "learning_rate": 4.8625832314173125e-05, "loss": 2.4001, "step": 60510 }, { "epoch": 4.111632015219459, "grad_norm": 2.2074460983276367, "learning_rate": 4.862158581328985e-05, "loss": 2.5598, "step": 60515 }, { "epoch": 4.111971735290121, "grad_norm": 3.339994430541992, "learning_rate": 4.861733931240658e-05, "loss": 2.3226, "step": 60520 }, { "epoch": 4.112311455360783, "grad_norm": 3.166022539138794, "learning_rate": 4.861309281152331e-05, "loss": 2.3367, "step": 60525 }, { "epoch": 4.112651175431444, "grad_norm": 3.8978965282440186, "learning_rate": 4.860884631064003e-05, "loss": 2.3337, "step": 60530 }, { "epoch": 4.112990895502106, "grad_norm": 3.4668595790863037, "learning_rate": 4.8604599809756765e-05, "loss": 2.144, "step": 60535 }, { "epoch": 4.113330615572768, "grad_norm": 3.2834203243255615, "learning_rate": 4.860035330887349e-05, "loss": 2.1519, "step": 60540 }, { "epoch": 4.113670335643429, "grad_norm": 2.8767781257629395, "learning_rate": 4.8596106807990214e-05, "loss": 2.5118, "step": 60545 }, { "epoch": 4.1140100557140915, "grad_norm": 2.6860787868499756, "learning_rate": 4.859186030710695e-05, "loss": 2.1074, "step": 60550 }, { "epoch": 4.114349775784754, "grad_norm": 2.6782310009002686, "learning_rate": 4.858761380622368e-05, "loss": 2.2838, "step": 60555 }, { "epoch": 4.114689495855415, "grad_norm": 3.897380828857422, "learning_rate": 4.85833673053404e-05, "loss": 2.2328, "step": 60560 }, { "epoch": 4.115029215926077, "grad_norm": 3.4258596897125244, "learning_rate": 4.8579120804457126e-05, "loss": 2.3453, "step": 60565 }, { "epoch": 4.115368935996739, "grad_norm": 3.4593868255615234, "learning_rate": 4.857487430357386e-05, "loss": 2.3457, "step": 60570 }, { "epoch": 4.1157086560674, "grad_norm": 2.7030222415924072, "learning_rate": 4.857062780269058e-05, "loss": 2.0529, "step": 60575 }, { "epoch": 4.116048376138062, "grad_norm": 3.5423896312713623, "learning_rate": 4.856638130180731e-05, "loss": 2.5848, "step": 60580 }, { "epoch": 4.116388096208724, "grad_norm": 3.402550220489502, "learning_rate": 4.8562134800924045e-05, "loss": 2.4232, "step": 60585 }, { "epoch": 4.116727816279385, "grad_norm": 3.5297038555145264, "learning_rate": 4.855788830004077e-05, "loss": 2.4383, "step": 60590 }, { "epoch": 4.1170675363500475, "grad_norm": 3.1210741996765137, "learning_rate": 4.8553641799157494e-05, "loss": 2.6166, "step": 60595 }, { "epoch": 4.11740725642071, "grad_norm": 3.2748851776123047, "learning_rate": 4.854939529827422e-05, "loss": 2.6599, "step": 60600 }, { "epoch": 4.117746976491371, "grad_norm": 3.2577314376831055, "learning_rate": 4.854514879739096e-05, "loss": 2.3767, "step": 60605 }, { "epoch": 4.118086696562033, "grad_norm": 3.091730833053589, "learning_rate": 4.854090229650768e-05, "loss": 2.376, "step": 60610 }, { "epoch": 4.118426416632695, "grad_norm": 2.8751189708709717, "learning_rate": 4.8536655795624406e-05, "loss": 2.3285, "step": 60615 }, { "epoch": 4.118766136703356, "grad_norm": 2.6440787315368652, "learning_rate": 4.853240929474114e-05, "loss": 2.3867, "step": 60620 }, { "epoch": 4.119105856774018, "grad_norm": 2.8659770488739014, "learning_rate": 4.852816279385786e-05, "loss": 2.5616, "step": 60625 }, { "epoch": 4.11944557684468, "grad_norm": 3.0845046043395996, "learning_rate": 4.852391629297459e-05, "loss": 2.3241, "step": 60630 }, { "epoch": 4.1197852969153415, "grad_norm": 2.821147918701172, "learning_rate": 4.851966979209132e-05, "loss": 2.4925, "step": 60635 }, { "epoch": 4.1201250169860035, "grad_norm": 2.843653678894043, "learning_rate": 4.8515423291208046e-05, "loss": 2.3708, "step": 60640 }, { "epoch": 4.120464737056666, "grad_norm": 3.1818573474884033, "learning_rate": 4.8511176790324774e-05, "loss": 2.318, "step": 60645 }, { "epoch": 4.120804457127327, "grad_norm": 2.8258869647979736, "learning_rate": 4.85069302894415e-05, "loss": 2.6509, "step": 60650 }, { "epoch": 4.121144177197989, "grad_norm": 2.7871830463409424, "learning_rate": 4.850268378855823e-05, "loss": 2.4657, "step": 60655 }, { "epoch": 4.121483897268651, "grad_norm": 2.8003547191619873, "learning_rate": 4.849843728767496e-05, "loss": 2.2275, "step": 60660 }, { "epoch": 4.121823617339312, "grad_norm": 3.074154853820801, "learning_rate": 4.8494190786791686e-05, "loss": 2.2124, "step": 60665 }, { "epoch": 4.122163337409974, "grad_norm": 3.721508026123047, "learning_rate": 4.8489944285908414e-05, "loss": 2.3486, "step": 60670 }, { "epoch": 4.122503057480636, "grad_norm": 2.851795196533203, "learning_rate": 4.848569778502514e-05, "loss": 2.2184, "step": 60675 }, { "epoch": 4.1228427775512975, "grad_norm": 3.674818992614746, "learning_rate": 4.848145128414187e-05, "loss": 2.2022, "step": 60680 }, { "epoch": 4.1231824976219595, "grad_norm": 4.123653411865234, "learning_rate": 4.84772047832586e-05, "loss": 2.2148, "step": 60685 }, { "epoch": 4.123522217692622, "grad_norm": 2.4372284412384033, "learning_rate": 4.8472958282375326e-05, "loss": 2.8976, "step": 60690 }, { "epoch": 4.123861937763283, "grad_norm": 3.285642147064209, "learning_rate": 4.8468711781492054e-05, "loss": 2.2972, "step": 60695 }, { "epoch": 4.124201657833945, "grad_norm": 3.0932767391204834, "learning_rate": 4.8464465280608775e-05, "loss": 2.3112, "step": 60700 }, { "epoch": 4.124541377904607, "grad_norm": 2.9718539714813232, "learning_rate": 4.846021877972551e-05, "loss": 2.4421, "step": 60705 }, { "epoch": 4.124881097975268, "grad_norm": 3.159369468688965, "learning_rate": 4.845597227884224e-05, "loss": 2.2855, "step": 60710 }, { "epoch": 4.12522081804593, "grad_norm": 2.8146579265594482, "learning_rate": 4.845172577795896e-05, "loss": 2.4743, "step": 60715 }, { "epoch": 4.125560538116592, "grad_norm": 2.6399810314178467, "learning_rate": 4.8447479277075694e-05, "loss": 2.298, "step": 60720 }, { "epoch": 4.1259002581872535, "grad_norm": 3.151266574859619, "learning_rate": 4.844323277619242e-05, "loss": 2.0686, "step": 60725 }, { "epoch": 4.1262399782579156, "grad_norm": 3.196443557739258, "learning_rate": 4.8438986275309143e-05, "loss": 2.2849, "step": 60730 }, { "epoch": 4.126579698328578, "grad_norm": 3.5597124099731445, "learning_rate": 4.843473977442587e-05, "loss": 2.3731, "step": 60735 }, { "epoch": 4.126919418399239, "grad_norm": 3.2260870933532715, "learning_rate": 4.8430493273542606e-05, "loss": 2.1862, "step": 60740 }, { "epoch": 4.127259138469901, "grad_norm": 3.696470022201538, "learning_rate": 4.842624677265933e-05, "loss": 1.9507, "step": 60745 }, { "epoch": 4.127598858540563, "grad_norm": 3.793806791305542, "learning_rate": 4.8422000271776056e-05, "loss": 2.5979, "step": 60750 }, { "epoch": 4.127938578611224, "grad_norm": 2.6898488998413086, "learning_rate": 4.841775377089279e-05, "loss": 2.5223, "step": 60755 }, { "epoch": 4.128278298681886, "grad_norm": 3.596095323562622, "learning_rate": 4.841350727000952e-05, "loss": 2.3743, "step": 60760 }, { "epoch": 4.128618018752548, "grad_norm": 3.294698715209961, "learning_rate": 4.840926076912624e-05, "loss": 2.3286, "step": 60765 }, { "epoch": 4.1289577388232095, "grad_norm": 3.3331570625305176, "learning_rate": 4.840501426824297e-05, "loss": 2.4778, "step": 60770 }, { "epoch": 4.129297458893872, "grad_norm": 2.97453236579895, "learning_rate": 4.84007677673597e-05, "loss": 2.1333, "step": 60775 }, { "epoch": 4.129637178964534, "grad_norm": 3.2307164669036865, "learning_rate": 4.8396521266476424e-05, "loss": 2.3368, "step": 60780 }, { "epoch": 4.129976899035195, "grad_norm": 2.517275333404541, "learning_rate": 4.839227476559315e-05, "loss": 2.5376, "step": 60785 }, { "epoch": 4.130316619105857, "grad_norm": 3.8920576572418213, "learning_rate": 4.8388028264709886e-05, "loss": 2.518, "step": 60790 }, { "epoch": 4.130656339176519, "grad_norm": 3.480701446533203, "learning_rate": 4.838378176382661e-05, "loss": 2.4436, "step": 60795 }, { "epoch": 4.13099605924718, "grad_norm": 2.7514309883117676, "learning_rate": 4.8379535262943336e-05, "loss": 2.2875, "step": 60800 }, { "epoch": 4.131335779317842, "grad_norm": 2.4071590900421143, "learning_rate": 4.8375288762060064e-05, "loss": 2.2344, "step": 60805 }, { "epoch": 4.131675499388503, "grad_norm": 5.737385272979736, "learning_rate": 4.837104226117679e-05, "loss": 2.3831, "step": 60810 }, { "epoch": 4.1320152194591655, "grad_norm": 2.9027771949768066, "learning_rate": 4.836679576029352e-05, "loss": 2.2275, "step": 60815 }, { "epoch": 4.132354939529828, "grad_norm": 3.1610023975372314, "learning_rate": 4.836254925941025e-05, "loss": 2.5814, "step": 60820 }, { "epoch": 4.132694659600489, "grad_norm": 3.0479464530944824, "learning_rate": 4.8358302758526976e-05, "loss": 2.3868, "step": 60825 }, { "epoch": 4.133034379671151, "grad_norm": 3.3125970363616943, "learning_rate": 4.8354056257643704e-05, "loss": 2.3907, "step": 60830 }, { "epoch": 4.133374099741813, "grad_norm": 3.1104366779327393, "learning_rate": 4.834980975676043e-05, "loss": 2.4483, "step": 60835 }, { "epoch": 4.133713819812474, "grad_norm": 3.0682742595672607, "learning_rate": 4.834556325587716e-05, "loss": 2.2204, "step": 60840 }, { "epoch": 4.134053539883136, "grad_norm": 3.5575952529907227, "learning_rate": 4.834131675499389e-05, "loss": 2.2481, "step": 60845 }, { "epoch": 4.134393259953798, "grad_norm": 3.335660934448242, "learning_rate": 4.8337070254110616e-05, "loss": 2.4254, "step": 60850 }, { "epoch": 4.134732980024459, "grad_norm": 3.09049391746521, "learning_rate": 4.8332823753227344e-05, "loss": 2.3829, "step": 60855 }, { "epoch": 4.1350727000951215, "grad_norm": 3.476895809173584, "learning_rate": 4.832857725234407e-05, "loss": 2.2979, "step": 60860 }, { "epoch": 4.135412420165784, "grad_norm": 2.6818652153015137, "learning_rate": 4.83243307514608e-05, "loss": 2.5563, "step": 60865 }, { "epoch": 4.135752140236445, "grad_norm": 3.1237616539001465, "learning_rate": 4.832008425057752e-05, "loss": 2.3496, "step": 60870 }, { "epoch": 4.136091860307107, "grad_norm": 2.8672714233398438, "learning_rate": 4.8315837749694256e-05, "loss": 2.4252, "step": 60875 }, { "epoch": 4.136431580377769, "grad_norm": 3.2481191158294678, "learning_rate": 4.8311591248810984e-05, "loss": 2.5597, "step": 60880 }, { "epoch": 4.13677130044843, "grad_norm": 3.6214258670806885, "learning_rate": 4.8307344747927705e-05, "loss": 2.2152, "step": 60885 }, { "epoch": 4.137111020519092, "grad_norm": 3.530777931213379, "learning_rate": 4.830309824704444e-05, "loss": 2.5831, "step": 60890 }, { "epoch": 4.137450740589754, "grad_norm": 2.8832364082336426, "learning_rate": 4.829885174616117e-05, "loss": 2.3882, "step": 60895 }, { "epoch": 4.1377904606604154, "grad_norm": 3.371819257736206, "learning_rate": 4.829460524527789e-05, "loss": 2.399, "step": 60900 }, { "epoch": 4.1381301807310775, "grad_norm": 3.6699395179748535, "learning_rate": 4.829035874439462e-05, "loss": 2.4898, "step": 60905 }, { "epoch": 4.13846990080174, "grad_norm": 3.1460344791412354, "learning_rate": 4.828611224351135e-05, "loss": 2.3157, "step": 60910 }, { "epoch": 4.138809620872401, "grad_norm": 3.775704860687256, "learning_rate": 4.828186574262807e-05, "loss": 2.5608, "step": 60915 }, { "epoch": 4.139149340943063, "grad_norm": 3.940659284591675, "learning_rate": 4.82776192417448e-05, "loss": 2.3799, "step": 60920 }, { "epoch": 4.139489061013725, "grad_norm": 2.9540510177612305, "learning_rate": 4.8273372740861536e-05, "loss": 2.368, "step": 60925 }, { "epoch": 4.139828781084386, "grad_norm": 3.800354480743408, "learning_rate": 4.8269126239978264e-05, "loss": 2.7132, "step": 60930 }, { "epoch": 4.140168501155048, "grad_norm": 2.90736722946167, "learning_rate": 4.8264879739094985e-05, "loss": 2.4157, "step": 60935 }, { "epoch": 4.14050822122571, "grad_norm": 2.6092233657836914, "learning_rate": 4.826063323821172e-05, "loss": 2.505, "step": 60940 }, { "epoch": 4.1408479412963715, "grad_norm": 3.0865840911865234, "learning_rate": 4.825638673732845e-05, "loss": 2.5378, "step": 60945 }, { "epoch": 4.1411876613670335, "grad_norm": 2.914320945739746, "learning_rate": 4.825214023644517e-05, "loss": 2.5781, "step": 60950 }, { "epoch": 4.141527381437696, "grad_norm": 2.6067612171173096, "learning_rate": 4.82478937355619e-05, "loss": 2.203, "step": 60955 }, { "epoch": 4.141867101508357, "grad_norm": 3.1973135471343994, "learning_rate": 4.824364723467863e-05, "loss": 2.102, "step": 60960 }, { "epoch": 4.142206821579019, "grad_norm": 3.203754425048828, "learning_rate": 4.823940073379535e-05, "loss": 2.2333, "step": 60965 }, { "epoch": 4.142546541649681, "grad_norm": 3.7279958724975586, "learning_rate": 4.823515423291208e-05, "loss": 2.4724, "step": 60970 }, { "epoch": 4.142886261720342, "grad_norm": 3.279240369796753, "learning_rate": 4.8230907732028816e-05, "loss": 2.3034, "step": 60975 }, { "epoch": 4.143225981791004, "grad_norm": 2.993285655975342, "learning_rate": 4.822666123114554e-05, "loss": 2.2516, "step": 60980 }, { "epoch": 4.143565701861666, "grad_norm": 3.493786334991455, "learning_rate": 4.8222414730262265e-05, "loss": 2.6838, "step": 60985 }, { "epoch": 4.1439054219323275, "grad_norm": 3.947070837020874, "learning_rate": 4.821816822937899e-05, "loss": 2.3202, "step": 60990 }, { "epoch": 4.1442451420029895, "grad_norm": 3.882214307785034, "learning_rate": 4.821392172849572e-05, "loss": 2.2927, "step": 60995 }, { "epoch": 4.144584862073652, "grad_norm": 2.5986311435699463, "learning_rate": 4.820967522761245e-05, "loss": 2.2446, "step": 61000 }, { "epoch": 4.144924582144313, "grad_norm": 3.3481545448303223, "learning_rate": 4.820542872672918e-05, "loss": 2.37, "step": 61005 }, { "epoch": 4.145264302214975, "grad_norm": 3.2064316272735596, "learning_rate": 4.8201182225845905e-05, "loss": 2.4768, "step": 61010 }, { "epoch": 4.145604022285637, "grad_norm": 3.0367209911346436, "learning_rate": 4.819693572496263e-05, "loss": 2.5068, "step": 61015 }, { "epoch": 4.145943742356298, "grad_norm": 2.919188976287842, "learning_rate": 4.819268922407936e-05, "loss": 2.376, "step": 61020 }, { "epoch": 4.14628346242696, "grad_norm": 3.685948371887207, "learning_rate": 4.818844272319609e-05, "loss": 2.5677, "step": 61025 }, { "epoch": 4.146623182497622, "grad_norm": 2.948589324951172, "learning_rate": 4.818419622231282e-05, "loss": 2.6126, "step": 61030 }, { "epoch": 4.1469629025682835, "grad_norm": 2.784147262573242, "learning_rate": 4.8179949721429545e-05, "loss": 2.5653, "step": 61035 }, { "epoch": 4.147302622638946, "grad_norm": 2.519655227661133, "learning_rate": 4.817570322054627e-05, "loss": 2.4755, "step": 61040 }, { "epoch": 4.147642342709608, "grad_norm": 3.0516791343688965, "learning_rate": 4.8171456719663e-05, "loss": 2.5839, "step": 61045 }, { "epoch": 4.147982062780269, "grad_norm": 3.6718902587890625, "learning_rate": 4.816721021877973e-05, "loss": 2.2626, "step": 61050 }, { "epoch": 4.148321782850931, "grad_norm": 2.631133556365967, "learning_rate": 4.816296371789645e-05, "loss": 2.6503, "step": 61055 }, { "epoch": 4.148661502921593, "grad_norm": 2.506195306777954, "learning_rate": 4.8158717217013185e-05, "loss": 2.5882, "step": 61060 }, { "epoch": 4.149001222992254, "grad_norm": 3.7519099712371826, "learning_rate": 4.815447071612991e-05, "loss": 2.4425, "step": 61065 }, { "epoch": 4.149340943062916, "grad_norm": 2.970299482345581, "learning_rate": 4.8150224215246634e-05, "loss": 2.5197, "step": 61070 }, { "epoch": 4.149680663133578, "grad_norm": 2.9444079399108887, "learning_rate": 4.814597771436337e-05, "loss": 2.5336, "step": 61075 }, { "epoch": 4.1500203832042395, "grad_norm": 3.003082275390625, "learning_rate": 4.81417312134801e-05, "loss": 2.4292, "step": 61080 }, { "epoch": 4.150360103274902, "grad_norm": 3.1685121059417725, "learning_rate": 4.813748471259682e-05, "loss": 2.4063, "step": 61085 }, { "epoch": 4.150699823345564, "grad_norm": 3.0602686405181885, "learning_rate": 4.8133238211713546e-05, "loss": 2.4005, "step": 61090 }, { "epoch": 4.151039543416225, "grad_norm": 3.236150026321411, "learning_rate": 4.812899171083028e-05, "loss": 2.6499, "step": 61095 }, { "epoch": 4.151379263486887, "grad_norm": 2.6842894554138184, "learning_rate": 4.812474520994701e-05, "loss": 2.4803, "step": 61100 }, { "epoch": 4.151718983557549, "grad_norm": 2.5757322311401367, "learning_rate": 4.812049870906373e-05, "loss": 2.5041, "step": 61105 }, { "epoch": 4.15205870362821, "grad_norm": 3.2468502521514893, "learning_rate": 4.8116252208180465e-05, "loss": 2.6651, "step": 61110 }, { "epoch": 4.152398423698872, "grad_norm": 3.414036989212036, "learning_rate": 4.811200570729719e-05, "loss": 2.4505, "step": 61115 }, { "epoch": 4.152738143769534, "grad_norm": 2.828136444091797, "learning_rate": 4.8107759206413915e-05, "loss": 2.5499, "step": 61120 }, { "epoch": 4.1530778638401955, "grad_norm": 3.1318910121917725, "learning_rate": 4.810351270553064e-05, "loss": 2.5091, "step": 61125 }, { "epoch": 4.153417583910858, "grad_norm": 3.1496384143829346, "learning_rate": 4.809926620464738e-05, "loss": 2.1914, "step": 61130 }, { "epoch": 4.15375730398152, "grad_norm": 2.9456822872161865, "learning_rate": 4.80950197037641e-05, "loss": 2.3508, "step": 61135 }, { "epoch": 4.154097024052181, "grad_norm": 3.721605062484741, "learning_rate": 4.8090773202880827e-05, "loss": 2.339, "step": 61140 }, { "epoch": 4.154436744122843, "grad_norm": 3.345102310180664, "learning_rate": 4.808652670199756e-05, "loss": 2.3651, "step": 61145 }, { "epoch": 4.154776464193505, "grad_norm": 3.4428281784057617, "learning_rate": 4.808228020111428e-05, "loss": 2.5123, "step": 61150 }, { "epoch": 4.155116184264166, "grad_norm": 3.0771543979644775, "learning_rate": 4.807803370023101e-05, "loss": 2.6179, "step": 61155 }, { "epoch": 4.155455904334828, "grad_norm": 3.8340377807617188, "learning_rate": 4.807378719934774e-05, "loss": 2.4012, "step": 61160 }, { "epoch": 4.15579562440549, "grad_norm": 3.3014965057373047, "learning_rate": 4.806954069846447e-05, "loss": 2.4204, "step": 61165 }, { "epoch": 4.1561353444761515, "grad_norm": 2.5930404663085938, "learning_rate": 4.8065294197581195e-05, "loss": 2.6667, "step": 61170 }, { "epoch": 4.156475064546814, "grad_norm": 2.758784055709839, "learning_rate": 4.806104769669792e-05, "loss": 2.1827, "step": 61175 }, { "epoch": 4.156814784617475, "grad_norm": 3.1923861503601074, "learning_rate": 4.805680119581465e-05, "loss": 2.4637, "step": 61180 }, { "epoch": 4.157154504688137, "grad_norm": 3.328125238418579, "learning_rate": 4.805255469493138e-05, "loss": 2.4438, "step": 61185 }, { "epoch": 4.157494224758799, "grad_norm": 3.2318115234375, "learning_rate": 4.804830819404811e-05, "loss": 2.5122, "step": 61190 }, { "epoch": 4.15783394482946, "grad_norm": 2.7057416439056396, "learning_rate": 4.8044061693164835e-05, "loss": 2.54, "step": 61195 }, { "epoch": 4.158173664900122, "grad_norm": 3.529829502105713, "learning_rate": 4.803981519228156e-05, "loss": 2.4195, "step": 61200 }, { "epoch": 4.158513384970784, "grad_norm": 2.958310604095459, "learning_rate": 4.803556869139829e-05, "loss": 2.3189, "step": 61205 }, { "epoch": 4.1588531050414455, "grad_norm": 3.486893892288208, "learning_rate": 4.803132219051502e-05, "loss": 2.6543, "step": 61210 }, { "epoch": 4.1591928251121075, "grad_norm": 2.5438296794891357, "learning_rate": 4.802707568963175e-05, "loss": 2.4688, "step": 61215 }, { "epoch": 4.15953254518277, "grad_norm": 3.0209951400756836, "learning_rate": 4.8022829188748475e-05, "loss": 2.5328, "step": 61220 }, { "epoch": 4.159872265253431, "grad_norm": 3.2548372745513916, "learning_rate": 4.8018582687865196e-05, "loss": 2.4145, "step": 61225 }, { "epoch": 4.160211985324093, "grad_norm": 2.872269868850708, "learning_rate": 4.801433618698193e-05, "loss": 2.31, "step": 61230 }, { "epoch": 4.160551705394755, "grad_norm": 3.0329363346099854, "learning_rate": 4.801008968609866e-05, "loss": 2.2634, "step": 61235 }, { "epoch": 4.160891425465416, "grad_norm": 3.558795690536499, "learning_rate": 4.800584318521538e-05, "loss": 2.2986, "step": 61240 }, { "epoch": 4.161231145536078, "grad_norm": 2.663052558898926, "learning_rate": 4.8001596684332115e-05, "loss": 2.5182, "step": 61245 }, { "epoch": 4.16157086560674, "grad_norm": 2.917646884918213, "learning_rate": 4.799735018344884e-05, "loss": 2.2946, "step": 61250 }, { "epoch": 4.1619105856774015, "grad_norm": 3.602430820465088, "learning_rate": 4.7993103682565564e-05, "loss": 2.5059, "step": 61255 }, { "epoch": 4.1622503057480635, "grad_norm": 2.680259943008423, "learning_rate": 4.798885718168229e-05, "loss": 2.6539, "step": 61260 }, { "epoch": 4.162590025818726, "grad_norm": 3.1652541160583496, "learning_rate": 4.798461068079903e-05, "loss": 2.5368, "step": 61265 }, { "epoch": 4.162929745889387, "grad_norm": 3.999335527420044, "learning_rate": 4.7980364179915755e-05, "loss": 2.3834, "step": 61270 }, { "epoch": 4.163269465960049, "grad_norm": 2.908937692642212, "learning_rate": 4.7976117679032476e-05, "loss": 2.667, "step": 61275 }, { "epoch": 4.163609186030711, "grad_norm": 3.4072954654693604, "learning_rate": 4.797187117814921e-05, "loss": 2.2733, "step": 61280 }, { "epoch": 4.163948906101372, "grad_norm": 2.7375571727752686, "learning_rate": 4.796762467726594e-05, "loss": 2.3805, "step": 61285 }, { "epoch": 4.164288626172034, "grad_norm": 2.855077028274536, "learning_rate": 4.796337817638266e-05, "loss": 2.1678, "step": 61290 }, { "epoch": 4.164628346242696, "grad_norm": 3.1629931926727295, "learning_rate": 4.795913167549939e-05, "loss": 2.391, "step": 61295 }, { "epoch": 4.1649680663133575, "grad_norm": 3.24646258354187, "learning_rate": 4.795488517461612e-05, "loss": 2.4505, "step": 61300 }, { "epoch": 4.1653077863840196, "grad_norm": 2.9379045963287354, "learning_rate": 4.7950638673732844e-05, "loss": 2.4449, "step": 61305 }, { "epoch": 4.165647506454682, "grad_norm": 2.87460994720459, "learning_rate": 4.794639217284957e-05, "loss": 2.3716, "step": 61310 }, { "epoch": 4.165987226525343, "grad_norm": 3.2728936672210693, "learning_rate": 4.794214567196631e-05, "loss": 2.5131, "step": 61315 }, { "epoch": 4.166326946596005, "grad_norm": 2.7974016666412354, "learning_rate": 4.793789917108303e-05, "loss": 2.3186, "step": 61320 }, { "epoch": 4.166666666666667, "grad_norm": 2.8358612060546875, "learning_rate": 4.7933652670199756e-05, "loss": 2.308, "step": 61325 }, { "epoch": 4.167006386737328, "grad_norm": 3.034965753555298, "learning_rate": 4.792940616931649e-05, "loss": 2.5319, "step": 61330 }, { "epoch": 4.16734610680799, "grad_norm": 3.110656976699829, "learning_rate": 4.792515966843321e-05, "loss": 2.4208, "step": 61335 }, { "epoch": 4.167685826878652, "grad_norm": 2.8998873233795166, "learning_rate": 4.792091316754994e-05, "loss": 2.5358, "step": 61340 }, { "epoch": 4.1680255469493135, "grad_norm": 2.6690523624420166, "learning_rate": 4.791666666666667e-05, "loss": 2.2175, "step": 61345 }, { "epoch": 4.168365267019976, "grad_norm": 3.36767840385437, "learning_rate": 4.7912420165783396e-05, "loss": 2.2636, "step": 61350 }, { "epoch": 4.168704987090638, "grad_norm": 3.439486026763916, "learning_rate": 4.7908173664900124e-05, "loss": 2.6252, "step": 61355 }, { "epoch": 4.169044707161299, "grad_norm": 2.5978012084960938, "learning_rate": 4.790392716401685e-05, "loss": 2.6047, "step": 61360 }, { "epoch": 4.169384427231961, "grad_norm": 3.5356788635253906, "learning_rate": 4.789968066313358e-05, "loss": 2.2613, "step": 61365 }, { "epoch": 4.169724147302623, "grad_norm": 3.258382558822632, "learning_rate": 4.789543416225031e-05, "loss": 2.2041, "step": 61370 }, { "epoch": 4.170063867373284, "grad_norm": 3.117966651916504, "learning_rate": 4.7891187661367036e-05, "loss": 2.1916, "step": 61375 }, { "epoch": 4.170403587443946, "grad_norm": 3.482379674911499, "learning_rate": 4.7886941160483764e-05, "loss": 2.4673, "step": 61380 }, { "epoch": 4.170743307514608, "grad_norm": 3.2665956020355225, "learning_rate": 4.788269465960049e-05, "loss": 2.5352, "step": 61385 }, { "epoch": 4.1710830275852695, "grad_norm": 3.006239414215088, "learning_rate": 4.7879297458893875e-05, "loss": 2.0755, "step": 61390 }, { "epoch": 4.171422747655932, "grad_norm": 3.528204917907715, "learning_rate": 4.78750509580106e-05, "loss": 2.6069, "step": 61395 }, { "epoch": 4.171762467726594, "grad_norm": 3.4549975395202637, "learning_rate": 4.787080445712733e-05, "loss": 2.4946, "step": 61400 }, { "epoch": 4.172102187797255, "grad_norm": 2.5066962242126465, "learning_rate": 4.786655795624406e-05, "loss": 2.3337, "step": 61405 }, { "epoch": 4.172441907867917, "grad_norm": 2.805901288986206, "learning_rate": 4.786231145536079e-05, "loss": 2.5668, "step": 61410 }, { "epoch": 4.172781627938579, "grad_norm": 3.223100185394287, "learning_rate": 4.785806495447751e-05, "loss": 2.3194, "step": 61415 }, { "epoch": 4.17312134800924, "grad_norm": 3.0376579761505127, "learning_rate": 4.785381845359424e-05, "loss": 2.4851, "step": 61420 }, { "epoch": 4.173461068079902, "grad_norm": 3.2494218349456787, "learning_rate": 4.784957195271097e-05, "loss": 2.3864, "step": 61425 }, { "epoch": 4.173800788150564, "grad_norm": 2.632028818130493, "learning_rate": 4.784532545182769e-05, "loss": 2.4866, "step": 61430 }, { "epoch": 4.1741405082212255, "grad_norm": 3.6033835411071777, "learning_rate": 4.784107895094443e-05, "loss": 2.4618, "step": 61435 }, { "epoch": 4.174480228291888, "grad_norm": 3.005143642425537, "learning_rate": 4.7836832450061155e-05, "loss": 2.3941, "step": 61440 }, { "epoch": 4.17481994836255, "grad_norm": 3.254204511642456, "learning_rate": 4.7832585949177876e-05, "loss": 2.3311, "step": 61445 }, { "epoch": 4.175159668433211, "grad_norm": 2.224414587020874, "learning_rate": 4.7828339448294604e-05, "loss": 2.5921, "step": 61450 }, { "epoch": 4.175499388503873, "grad_norm": 2.885603666305542, "learning_rate": 4.782409294741134e-05, "loss": 2.5379, "step": 61455 }, { "epoch": 4.175839108574535, "grad_norm": 3.05329966545105, "learning_rate": 4.781984644652806e-05, "loss": 2.1979, "step": 61460 }, { "epoch": 4.176178828645196, "grad_norm": 3.508394956588745, "learning_rate": 4.781559994564479e-05, "loss": 2.5177, "step": 61465 }, { "epoch": 4.176518548715858, "grad_norm": 3.9937779903411865, "learning_rate": 4.781135344476152e-05, "loss": 2.2451, "step": 61470 }, { "epoch": 4.17685826878652, "grad_norm": 2.855476140975952, "learning_rate": 4.780710694387825e-05, "loss": 2.4133, "step": 61475 }, { "epoch": 4.1771979888571815, "grad_norm": 2.9515326023101807, "learning_rate": 4.780286044299497e-05, "loss": 2.5003, "step": 61480 }, { "epoch": 4.177537708927844, "grad_norm": 3.3167853355407715, "learning_rate": 4.77986139421117e-05, "loss": 2.3157, "step": 61485 }, { "epoch": 4.177877428998505, "grad_norm": 2.6034340858459473, "learning_rate": 4.7794367441228435e-05, "loss": 2.2988, "step": 61490 }, { "epoch": 4.178217149069167, "grad_norm": 3.7502057552337646, "learning_rate": 4.7790120940345156e-05, "loss": 2.0693, "step": 61495 }, { "epoch": 4.178556869139829, "grad_norm": 3.0417449474334717, "learning_rate": 4.7785874439461884e-05, "loss": 2.2459, "step": 61500 }, { "epoch": 4.17889658921049, "grad_norm": 3.5099382400512695, "learning_rate": 4.778162793857862e-05, "loss": 2.4025, "step": 61505 }, { "epoch": 4.179236309281152, "grad_norm": 3.309830904006958, "learning_rate": 4.777738143769534e-05, "loss": 2.4447, "step": 61510 }, { "epoch": 4.179576029351814, "grad_norm": 3.2166624069213867, "learning_rate": 4.777313493681207e-05, "loss": 2.2616, "step": 61515 }, { "epoch": 4.1799157494224755, "grad_norm": 2.571406126022339, "learning_rate": 4.7768888435928796e-05, "loss": 2.5025, "step": 61520 }, { "epoch": 4.1802554694931375, "grad_norm": 3.3454415798187256, "learning_rate": 4.7764641935045524e-05, "loss": 2.1879, "step": 61525 }, { "epoch": 4.1805951895638, "grad_norm": 2.4989116191864014, "learning_rate": 4.776039543416225e-05, "loss": 2.6071, "step": 61530 }, { "epoch": 4.180934909634461, "grad_norm": 3.7097864151000977, "learning_rate": 4.775614893327898e-05, "loss": 2.5683, "step": 61535 }, { "epoch": 4.181274629705123, "grad_norm": 3.2660233974456787, "learning_rate": 4.775190243239571e-05, "loss": 2.3933, "step": 61540 }, { "epoch": 4.181614349775785, "grad_norm": 3.259295701980591, "learning_rate": 4.7747655931512436e-05, "loss": 2.4432, "step": 61545 }, { "epoch": 4.181954069846446, "grad_norm": 2.878429412841797, "learning_rate": 4.7743409430629164e-05, "loss": 2.3185, "step": 61550 }, { "epoch": 4.182293789917108, "grad_norm": 3.404244899749756, "learning_rate": 4.773916292974589e-05, "loss": 2.4815, "step": 61555 }, { "epoch": 4.18263350998777, "grad_norm": 2.650285482406616, "learning_rate": 4.773491642886262e-05, "loss": 2.2982, "step": 61560 }, { "epoch": 4.1829732300584315, "grad_norm": 2.5402591228485107, "learning_rate": 4.773066992797935e-05, "loss": 2.5801, "step": 61565 }, { "epoch": 4.1833129501290935, "grad_norm": 3.565903902053833, "learning_rate": 4.7726423427096076e-05, "loss": 2.3238, "step": 61570 }, { "epoch": 4.183652670199756, "grad_norm": 3.2328600883483887, "learning_rate": 4.7722176926212804e-05, "loss": 2.6466, "step": 61575 }, { "epoch": 4.183992390270417, "grad_norm": 2.775425672531128, "learning_rate": 4.771793042532953e-05, "loss": 2.4751, "step": 61580 }, { "epoch": 4.184332110341079, "grad_norm": 2.747871160507202, "learning_rate": 4.771368392444625e-05, "loss": 2.5449, "step": 61585 }, { "epoch": 4.184671830411741, "grad_norm": 2.9262304306030273, "learning_rate": 4.770943742356299e-05, "loss": 2.3803, "step": 61590 }, { "epoch": 4.185011550482402, "grad_norm": 3.6985714435577393, "learning_rate": 4.7705190922679716e-05, "loss": 2.5223, "step": 61595 }, { "epoch": 4.185351270553064, "grad_norm": 2.5684616565704346, "learning_rate": 4.770094442179644e-05, "loss": 2.6394, "step": 61600 }, { "epoch": 4.185690990623726, "grad_norm": 3.4292004108428955, "learning_rate": 4.769669792091317e-05, "loss": 2.6354, "step": 61605 }, { "epoch": 4.1860307106943875, "grad_norm": 3.647115707397461, "learning_rate": 4.76924514200299e-05, "loss": 2.5366, "step": 61610 }, { "epoch": 4.1863704307650496, "grad_norm": 3.173698663711548, "learning_rate": 4.768820491914662e-05, "loss": 2.5401, "step": 61615 }, { "epoch": 4.186710150835712, "grad_norm": 3.775895118713379, "learning_rate": 4.768395841826335e-05, "loss": 2.4817, "step": 61620 }, { "epoch": 4.187049870906373, "grad_norm": 2.7787091732025146, "learning_rate": 4.7679711917380084e-05, "loss": 2.3258, "step": 61625 }, { "epoch": 4.187389590977035, "grad_norm": 3.510322093963623, "learning_rate": 4.7675465416496805e-05, "loss": 2.4158, "step": 61630 }, { "epoch": 4.187729311047697, "grad_norm": 3.11337947845459, "learning_rate": 4.7671218915613533e-05, "loss": 2.4427, "step": 61635 }, { "epoch": 4.188069031118358, "grad_norm": 2.794327735900879, "learning_rate": 4.766697241473027e-05, "loss": 2.6215, "step": 61640 }, { "epoch": 4.18840875118902, "grad_norm": 2.729757070541382, "learning_rate": 4.7662725913846996e-05, "loss": 2.2146, "step": 61645 }, { "epoch": 4.188748471259682, "grad_norm": NaN, "learning_rate": 4.765932871314037e-05, "loss": 2.4148, "step": 61650 }, { "epoch": 4.1890881913303435, "grad_norm": 3.008802890777588, "learning_rate": 4.76550822122571e-05, "loss": 2.4212, "step": 61655 }, { "epoch": 4.189427911401006, "grad_norm": 3.420917272567749, "learning_rate": 4.7650835711373835e-05, "loss": 2.1658, "step": 61660 }, { "epoch": 4.189767631471668, "grad_norm": 2.684997797012329, "learning_rate": 4.7646589210490556e-05, "loss": 2.4316, "step": 61665 }, { "epoch": 4.190107351542329, "grad_norm": 2.9286108016967773, "learning_rate": 4.7642342709607284e-05, "loss": 2.3799, "step": 61670 }, { "epoch": 4.190447071612991, "grad_norm": 3.50607967376709, "learning_rate": 4.763809620872401e-05, "loss": 2.4546, "step": 61675 }, { "epoch": 4.190786791683653, "grad_norm": 2.8759891986846924, "learning_rate": 4.763384970784075e-05, "loss": 2.3365, "step": 61680 }, { "epoch": 4.191126511754314, "grad_norm": 3.206676959991455, "learning_rate": 4.762960320695747e-05, "loss": 2.2035, "step": 61685 }, { "epoch": 4.191466231824976, "grad_norm": 2.827803373336792, "learning_rate": 4.7625356706074196e-05, "loss": 2.5963, "step": 61690 }, { "epoch": 4.191805951895638, "grad_norm": 2.8707334995269775, "learning_rate": 4.762111020519093e-05, "loss": 2.769, "step": 61695 }, { "epoch": 4.1921456719662995, "grad_norm": 3.160881280899048, "learning_rate": 4.761686370430765e-05, "loss": 2.623, "step": 61700 }, { "epoch": 4.192485392036962, "grad_norm": 2.9938528537750244, "learning_rate": 4.761261720342438e-05, "loss": 2.1836, "step": 61705 }, { "epoch": 4.192825112107624, "grad_norm": 2.9870407581329346, "learning_rate": 4.760837070254111e-05, "loss": 2.2534, "step": 61710 }, { "epoch": 4.193164832178285, "grad_norm": 3.1825313568115234, "learning_rate": 4.7604124201657836e-05, "loss": 2.2482, "step": 61715 }, { "epoch": 4.193504552248947, "grad_norm": 2.55372953414917, "learning_rate": 4.7599877700774564e-05, "loss": 2.4955, "step": 61720 }, { "epoch": 4.193844272319609, "grad_norm": 2.8570334911346436, "learning_rate": 4.759563119989129e-05, "loss": 2.3251, "step": 61725 }, { "epoch": 4.19418399239027, "grad_norm": 2.800869941711426, "learning_rate": 4.759138469900802e-05, "loss": 2.4392, "step": 61730 }, { "epoch": 4.194523712460932, "grad_norm": 3.4090843200683594, "learning_rate": 4.758713819812475e-05, "loss": 2.4957, "step": 61735 }, { "epoch": 4.194863432531594, "grad_norm": 3.149402379989624, "learning_rate": 4.7582891697241476e-05, "loss": 2.4252, "step": 61740 }, { "epoch": 4.1952031526022555, "grad_norm": 2.8424041271209717, "learning_rate": 4.7578645196358204e-05, "loss": 2.5118, "step": 61745 }, { "epoch": 4.195542872672918, "grad_norm": 2.3489391803741455, "learning_rate": 4.757439869547493e-05, "loss": 2.5461, "step": 61750 }, { "epoch": 4.19588259274358, "grad_norm": 3.7366325855255127, "learning_rate": 4.757015219459166e-05, "loss": 2.513, "step": 61755 }, { "epoch": 4.196222312814241, "grad_norm": 3.475374698638916, "learning_rate": 4.756590569370839e-05, "loss": 2.4121, "step": 61760 }, { "epoch": 4.196562032884903, "grad_norm": 2.881382942199707, "learning_rate": 4.7561659192825116e-05, "loss": 2.2862, "step": 61765 }, { "epoch": 4.196901752955565, "grad_norm": 3.5437774658203125, "learning_rate": 4.7557412691941844e-05, "loss": 2.6344, "step": 61770 }, { "epoch": 4.197241473026226, "grad_norm": 3.6645143032073975, "learning_rate": 4.7553166191058565e-05, "loss": 2.5651, "step": 61775 }, { "epoch": 4.197581193096888, "grad_norm": 3.004715919494629, "learning_rate": 4.75489196901753e-05, "loss": 2.3905, "step": 61780 }, { "epoch": 4.19792091316755, "grad_norm": 3.9800567626953125, "learning_rate": 4.754467318929203e-05, "loss": 2.3509, "step": 61785 }, { "epoch": 4.1982606332382115, "grad_norm": 2.5077929496765137, "learning_rate": 4.754042668840875e-05, "loss": 2.4182, "step": 61790 }, { "epoch": 4.198600353308874, "grad_norm": 2.907515287399292, "learning_rate": 4.7536180187525484e-05, "loss": 2.3205, "step": 61795 }, { "epoch": 4.198940073379536, "grad_norm": 2.7988600730895996, "learning_rate": 4.753193368664221e-05, "loss": 2.6439, "step": 61800 }, { "epoch": 4.199279793450197, "grad_norm": 3.4056236743927, "learning_rate": 4.752768718575893e-05, "loss": 2.3689, "step": 61805 }, { "epoch": 4.199619513520859, "grad_norm": 2.8764328956604004, "learning_rate": 4.752344068487566e-05, "loss": 2.5472, "step": 61810 }, { "epoch": 4.199959233591521, "grad_norm": 2.9419023990631104, "learning_rate": 4.7519194183992396e-05, "loss": 2.4933, "step": 61815 }, { "epoch": 4.200298953662182, "grad_norm": 3.6997275352478027, "learning_rate": 4.751494768310912e-05, "loss": 2.2515, "step": 61820 }, { "epoch": 4.200638673732844, "grad_norm": 3.9734716415405273, "learning_rate": 4.7510701182225845e-05, "loss": 2.5918, "step": 61825 }, { "epoch": 4.200978393803506, "grad_norm": 3.1245405673980713, "learning_rate": 4.750645468134258e-05, "loss": 2.4817, "step": 61830 }, { "epoch": 4.2013181138741675, "grad_norm": 4.814395904541016, "learning_rate": 4.75022081804593e-05, "loss": 2.4108, "step": 61835 }, { "epoch": 4.20165783394483, "grad_norm": 2.949662685394287, "learning_rate": 4.749796167957603e-05, "loss": 2.2698, "step": 61840 }, { "epoch": 4.201997554015492, "grad_norm": 3.7244746685028076, "learning_rate": 4.749371517869276e-05, "loss": 2.3164, "step": 61845 }, { "epoch": 4.202337274086153, "grad_norm": 2.9430899620056152, "learning_rate": 4.748946867780949e-05, "loss": 2.3262, "step": 61850 }, { "epoch": 4.202676994156815, "grad_norm": 4.01240873336792, "learning_rate": 4.748522217692621e-05, "loss": 2.3249, "step": 61855 }, { "epoch": 4.203016714227476, "grad_norm": 3.0465521812438965, "learning_rate": 4.748097567604294e-05, "loss": 2.3726, "step": 61860 }, { "epoch": 4.203356434298138, "grad_norm": 2.9326109886169434, "learning_rate": 4.7476729175159676e-05, "loss": 2.2667, "step": 61865 }, { "epoch": 4.2036961543688, "grad_norm": 3.385401964187622, "learning_rate": 4.74724826742764e-05, "loss": 2.3295, "step": 61870 }, { "epoch": 4.2040358744394615, "grad_norm": 4.02240514755249, "learning_rate": 4.7468236173393125e-05, "loss": 2.1282, "step": 61875 }, { "epoch": 4.2043755945101235, "grad_norm": 2.7139835357666016, "learning_rate": 4.7463989672509853e-05, "loss": 2.4496, "step": 61880 }, { "epoch": 4.204715314580786, "grad_norm": 2.722792863845825, "learning_rate": 4.745974317162658e-05, "loss": 2.6191, "step": 61885 }, { "epoch": 4.205055034651447, "grad_norm": 3.0117974281311035, "learning_rate": 4.745549667074331e-05, "loss": 2.4852, "step": 61890 }, { "epoch": 4.205394754722109, "grad_norm": 3.6065173149108887, "learning_rate": 4.745125016986004e-05, "loss": 2.515, "step": 61895 }, { "epoch": 4.205734474792771, "grad_norm": 3.8572380542755127, "learning_rate": 4.7447003668976765e-05, "loss": 2.3483, "step": 61900 }, { "epoch": 4.206074194863432, "grad_norm": 3.092360258102417, "learning_rate": 4.7442757168093493e-05, "loss": 2.3023, "step": 61905 }, { "epoch": 4.206413914934094, "grad_norm": 3.3954970836639404, "learning_rate": 4.743851066721022e-05, "loss": 2.4947, "step": 61910 }, { "epoch": 4.206753635004756, "grad_norm": 2.6003072261810303, "learning_rate": 4.743426416632695e-05, "loss": 2.6173, "step": 61915 }, { "epoch": 4.2070933550754175, "grad_norm": 4.123538017272949, "learning_rate": 4.743001766544368e-05, "loss": 2.4957, "step": 61920 }, { "epoch": 4.20743307514608, "grad_norm": 2.292513370513916, "learning_rate": 4.7425771164560405e-05, "loss": 2.2975, "step": 61925 }, { "epoch": 4.207772795216742, "grad_norm": 3.0691511631011963, "learning_rate": 4.7421524663677133e-05, "loss": 2.3937, "step": 61930 }, { "epoch": 4.208112515287403, "grad_norm": 2.358743667602539, "learning_rate": 4.741727816279386e-05, "loss": 2.4255, "step": 61935 }, { "epoch": 4.208452235358065, "grad_norm": 3.8324286937713623, "learning_rate": 4.741303166191059e-05, "loss": 2.5381, "step": 61940 }, { "epoch": 4.208791955428727, "grad_norm": 2.7289204597473145, "learning_rate": 4.740878516102731e-05, "loss": 2.1693, "step": 61945 }, { "epoch": 4.209131675499388, "grad_norm": 3.999833345413208, "learning_rate": 4.7404538660144046e-05, "loss": 2.4504, "step": 61950 }, { "epoch": 4.20947139557005, "grad_norm": 3.311880588531494, "learning_rate": 4.7400292159260774e-05, "loss": 2.4688, "step": 61955 }, { "epoch": 4.209811115640712, "grad_norm": 3.325134515762329, "learning_rate": 4.7396045658377495e-05, "loss": 2.3766, "step": 61960 }, { "epoch": 4.2101508357113735, "grad_norm": 3.4172167778015137, "learning_rate": 4.739179915749423e-05, "loss": 2.407, "step": 61965 }, { "epoch": 4.210490555782036, "grad_norm": 3.1473124027252197, "learning_rate": 4.738755265661096e-05, "loss": 2.4145, "step": 61970 }, { "epoch": 4.210830275852698, "grad_norm": 2.5356955528259277, "learning_rate": 4.738330615572768e-05, "loss": 2.475, "step": 61975 }, { "epoch": 4.211169995923359, "grad_norm": 3.012723684310913, "learning_rate": 4.737905965484441e-05, "loss": 2.3052, "step": 61980 }, { "epoch": 4.211509715994021, "grad_norm": 2.8794174194335938, "learning_rate": 4.737481315396114e-05, "loss": 2.5119, "step": 61985 }, { "epoch": 4.211849436064683, "grad_norm": 3.4425694942474365, "learning_rate": 4.737056665307786e-05, "loss": 2.2556, "step": 61990 }, { "epoch": 4.212189156135344, "grad_norm": 3.830561637878418, "learning_rate": 4.736632015219459e-05, "loss": 2.5111, "step": 61995 }, { "epoch": 4.212528876206006, "grad_norm": 2.939399003982544, "learning_rate": 4.7362073651311326e-05, "loss": 2.5634, "step": 62000 }, { "epoch": 4.212868596276668, "grad_norm": 3.6074585914611816, "learning_rate": 4.735782715042805e-05, "loss": 2.3596, "step": 62005 }, { "epoch": 4.2132083163473295, "grad_norm": 3.5490777492523193, "learning_rate": 4.7353580649544775e-05, "loss": 2.3314, "step": 62010 }, { "epoch": 4.213548036417992, "grad_norm": 2.765678882598877, "learning_rate": 4.73493341486615e-05, "loss": 2.4754, "step": 62015 }, { "epoch": 4.213887756488654, "grad_norm": 3.063483476638794, "learning_rate": 4.734508764777824e-05, "loss": 2.5479, "step": 62020 }, { "epoch": 4.214227476559315, "grad_norm": 2.865774631500244, "learning_rate": 4.734084114689496e-05, "loss": 2.4362, "step": 62025 }, { "epoch": 4.214567196629977, "grad_norm": 3.4232821464538574, "learning_rate": 4.733659464601169e-05, "loss": 2.248, "step": 62030 }, { "epoch": 4.214906916700639, "grad_norm": 4.003294944763184, "learning_rate": 4.733234814512842e-05, "loss": 2.5988, "step": 62035 }, { "epoch": 4.2152466367713, "grad_norm": 3.56132173538208, "learning_rate": 4.732810164424514e-05, "loss": 2.1759, "step": 62040 }, { "epoch": 4.215586356841962, "grad_norm": 3.4448249340057373, "learning_rate": 4.732385514336187e-05, "loss": 2.6138, "step": 62045 }, { "epoch": 4.215926076912624, "grad_norm": 3.143444299697876, "learning_rate": 4.73196086424786e-05, "loss": 2.232, "step": 62050 }, { "epoch": 4.2162657969832855, "grad_norm": 3.814697742462158, "learning_rate": 4.731536214159533e-05, "loss": 2.25, "step": 62055 }, { "epoch": 4.216605517053948, "grad_norm": 2.8919739723205566, "learning_rate": 4.7311115640712055e-05, "loss": 2.4393, "step": 62060 }, { "epoch": 4.21694523712461, "grad_norm": 2.8596603870391846, "learning_rate": 4.730686913982878e-05, "loss": 2.4654, "step": 62065 }, { "epoch": 4.217284957195271, "grad_norm": 3.2984085083007812, "learning_rate": 4.730262263894551e-05, "loss": 2.6179, "step": 62070 }, { "epoch": 4.217624677265933, "grad_norm": 2.9184632301330566, "learning_rate": 4.729837613806224e-05, "loss": 2.3132, "step": 62075 }, { "epoch": 4.217964397336595, "grad_norm": 3.3384132385253906, "learning_rate": 4.729412963717897e-05, "loss": 2.0982, "step": 62080 }, { "epoch": 4.218304117407256, "grad_norm": 3.4593887329101562, "learning_rate": 4.7289883136295695e-05, "loss": 2.3638, "step": 62085 }, { "epoch": 4.218643837477918, "grad_norm": 3.8517847061157227, "learning_rate": 4.728563663541242e-05, "loss": 2.4768, "step": 62090 }, { "epoch": 4.21898355754858, "grad_norm": 3.0121235847473145, "learning_rate": 4.728139013452915e-05, "loss": 2.3996, "step": 62095 }, { "epoch": 4.2193232776192415, "grad_norm": 3.4583587646484375, "learning_rate": 4.727714363364588e-05, "loss": 2.4083, "step": 62100 }, { "epoch": 4.219662997689904, "grad_norm": 2.321648120880127, "learning_rate": 4.727289713276261e-05, "loss": 2.7524, "step": 62105 }, { "epoch": 4.220002717760566, "grad_norm": 3.6404197216033936, "learning_rate": 4.7268650631879335e-05, "loss": 2.1317, "step": 62110 }, { "epoch": 4.220342437831227, "grad_norm": 3.1720948219299316, "learning_rate": 4.7264404130996056e-05, "loss": 2.4223, "step": 62115 }, { "epoch": 4.220682157901889, "grad_norm": 2.710925340652466, "learning_rate": 4.726015763011279e-05, "loss": 2.4636, "step": 62120 }, { "epoch": 4.221021877972551, "grad_norm": 3.2247085571289062, "learning_rate": 4.725591112922952e-05, "loss": 2.6407, "step": 62125 }, { "epoch": 4.221361598043212, "grad_norm": 3.3084614276885986, "learning_rate": 4.725166462834624e-05, "loss": 2.3569, "step": 62130 }, { "epoch": 4.221701318113874, "grad_norm": 2.748483180999756, "learning_rate": 4.7247418127462975e-05, "loss": 2.3219, "step": 62135 }, { "epoch": 4.222041038184536, "grad_norm": 2.7769947052001953, "learning_rate": 4.72431716265797e-05, "loss": 2.3659, "step": 62140 }, { "epoch": 4.2223807582551975, "grad_norm": 2.5784757137298584, "learning_rate": 4.7238925125696424e-05, "loss": 2.4734, "step": 62145 }, { "epoch": 4.22272047832586, "grad_norm": 3.4230642318725586, "learning_rate": 4.723467862481315e-05, "loss": 2.5616, "step": 62150 }, { "epoch": 4.223060198396522, "grad_norm": 3.295145273208618, "learning_rate": 4.723043212392989e-05, "loss": 2.6757, "step": 62155 }, { "epoch": 4.223399918467183, "grad_norm": 2.893815755844116, "learning_rate": 4.722618562304661e-05, "loss": 2.3842, "step": 62160 }, { "epoch": 4.223739638537845, "grad_norm": 2.9473700523376465, "learning_rate": 4.7221939122163336e-05, "loss": 2.1363, "step": 62165 }, { "epoch": 4.224079358608506, "grad_norm": 3.4641757011413574, "learning_rate": 4.721769262128007e-05, "loss": 2.1966, "step": 62170 }, { "epoch": 4.224419078679168, "grad_norm": 2.8182876110076904, "learning_rate": 4.721344612039679e-05, "loss": 2.3483, "step": 62175 }, { "epoch": 4.22475879874983, "grad_norm": 3.790875196456909, "learning_rate": 4.720919961951352e-05, "loss": 2.3603, "step": 62180 }, { "epoch": 4.2250985188204915, "grad_norm": 3.5498790740966797, "learning_rate": 4.7204953118630255e-05, "loss": 2.2415, "step": 62185 }, { "epoch": 4.2254382388911536, "grad_norm": 3.367933988571167, "learning_rate": 4.720070661774698e-05, "loss": 2.4203, "step": 62190 }, { "epoch": 4.225777958961816, "grad_norm": 3.432644844055176, "learning_rate": 4.7196460116863704e-05, "loss": 2.3074, "step": 62195 }, { "epoch": 4.226117679032477, "grad_norm": 3.2381997108459473, "learning_rate": 4.719221361598043e-05, "loss": 2.4435, "step": 62200 }, { "epoch": 4.226457399103139, "grad_norm": 4.0363874435424805, "learning_rate": 4.718796711509717e-05, "loss": 2.4629, "step": 62205 }, { "epoch": 4.226797119173801, "grad_norm": 3.017859935760498, "learning_rate": 4.718372061421389e-05, "loss": 2.2124, "step": 62210 }, { "epoch": 4.227136839244462, "grad_norm": 2.9934825897216797, "learning_rate": 4.7179474113330616e-05, "loss": 2.3012, "step": 62215 }, { "epoch": 4.227476559315124, "grad_norm": 3.0865721702575684, "learning_rate": 4.717522761244735e-05, "loss": 2.5895, "step": 62220 }, { "epoch": 4.227816279385786, "grad_norm": 2.8570525646209717, "learning_rate": 4.717098111156407e-05, "loss": 2.3685, "step": 62225 }, { "epoch": 4.2281559994564475, "grad_norm": 4.032341480255127, "learning_rate": 4.71667346106808e-05, "loss": 2.3726, "step": 62230 }, { "epoch": 4.22849571952711, "grad_norm": 3.3453710079193115, "learning_rate": 4.716248810979753e-05, "loss": 2.2257, "step": 62235 }, { "epoch": 4.228835439597772, "grad_norm": 2.9080779552459717, "learning_rate": 4.7158241608914256e-05, "loss": 2.5303, "step": 62240 }, { "epoch": 4.229175159668433, "grad_norm": 3.7344517707824707, "learning_rate": 4.7153995108030984e-05, "loss": 2.2878, "step": 62245 }, { "epoch": 4.229514879739095, "grad_norm": 2.783677577972412, "learning_rate": 4.714974860714771e-05, "loss": 2.2849, "step": 62250 }, { "epoch": 4.229854599809757, "grad_norm": 3.430307626724243, "learning_rate": 4.714550210626444e-05, "loss": 2.3422, "step": 62255 }, { "epoch": 4.230194319880418, "grad_norm": 2.7083005905151367, "learning_rate": 4.714125560538117e-05, "loss": 2.5415, "step": 62260 }, { "epoch": 4.23053403995108, "grad_norm": 3.654594898223877, "learning_rate": 4.7137009104497896e-05, "loss": 2.5384, "step": 62265 }, { "epoch": 4.230873760021742, "grad_norm": 3.56795597076416, "learning_rate": 4.7132762603614624e-05, "loss": 2.2602, "step": 62270 }, { "epoch": 4.2312134800924035, "grad_norm": 2.8595685958862305, "learning_rate": 4.712851610273135e-05, "loss": 2.5366, "step": 62275 }, { "epoch": 4.231553200163066, "grad_norm": 3.3705687522888184, "learning_rate": 4.712426960184808e-05, "loss": 2.4286, "step": 62280 }, { "epoch": 4.231892920233728, "grad_norm": 2.685664415359497, "learning_rate": 4.712002310096481e-05, "loss": 2.3325, "step": 62285 }, { "epoch": 4.232232640304389, "grad_norm": 3.356081008911133, "learning_rate": 4.7115776600081536e-05, "loss": 2.4903, "step": 62290 }, { "epoch": 4.232572360375051, "grad_norm": 3.635518789291382, "learning_rate": 4.7111530099198264e-05, "loss": 2.3013, "step": 62295 }, { "epoch": 4.232912080445713, "grad_norm": 3.3235909938812256, "learning_rate": 4.7107283598314986e-05, "loss": 2.2971, "step": 62300 }, { "epoch": 4.233251800516374, "grad_norm": 3.9304685592651367, "learning_rate": 4.710303709743172e-05, "loss": 2.2994, "step": 62305 }, { "epoch": 4.233591520587036, "grad_norm": 3.2142934799194336, "learning_rate": 4.709879059654845e-05, "loss": 2.2229, "step": 62310 }, { "epoch": 4.233931240657698, "grad_norm": 2.966325283050537, "learning_rate": 4.709454409566517e-05, "loss": 2.4272, "step": 62315 }, { "epoch": 4.2342709607283595, "grad_norm": 2.8216989040374756, "learning_rate": 4.7090297594781905e-05, "loss": 2.2183, "step": 62320 }, { "epoch": 4.234610680799022, "grad_norm": 3.5270774364471436, "learning_rate": 4.708605109389863e-05, "loss": 2.1423, "step": 62325 }, { "epoch": 4.234950400869684, "grad_norm": 3.4122211933135986, "learning_rate": 4.7081804593015354e-05, "loss": 2.0999, "step": 62330 }, { "epoch": 4.235290120940345, "grad_norm": 3.058426856994629, "learning_rate": 4.707755809213208e-05, "loss": 2.7247, "step": 62335 }, { "epoch": 4.235629841011007, "grad_norm": 3.0122230052948, "learning_rate": 4.7073311591248817e-05, "loss": 2.5175, "step": 62340 }, { "epoch": 4.235969561081669, "grad_norm": 3.692992925643921, "learning_rate": 4.706906509036554e-05, "loss": 2.2591, "step": 62345 }, { "epoch": 4.23630928115233, "grad_norm": 3.494533061981201, "learning_rate": 4.7064818589482266e-05, "loss": 2.5147, "step": 62350 }, { "epoch": 4.236649001222992, "grad_norm": 2.5466952323913574, "learning_rate": 4.7060572088599e-05, "loss": 2.4213, "step": 62355 }, { "epoch": 4.236988721293654, "grad_norm": 3.1471970081329346, "learning_rate": 4.705632558771573e-05, "loss": 2.296, "step": 62360 }, { "epoch": 4.2373284413643155, "grad_norm": 2.338202953338623, "learning_rate": 4.705207908683245e-05, "loss": 2.4859, "step": 62365 }, { "epoch": 4.237668161434978, "grad_norm": 2.6543970108032227, "learning_rate": 4.704783258594918e-05, "loss": 2.2859, "step": 62370 }, { "epoch": 4.23800788150564, "grad_norm": 3.8024179935455322, "learning_rate": 4.704358608506591e-05, "loss": 2.2543, "step": 62375 }, { "epoch": 4.238347601576301, "grad_norm": 3.1342461109161377, "learning_rate": 4.7039339584182634e-05, "loss": 2.3947, "step": 62380 }, { "epoch": 4.238687321646963, "grad_norm": 3.654871940612793, "learning_rate": 4.703509308329936e-05, "loss": 2.3967, "step": 62385 }, { "epoch": 4.239027041717625, "grad_norm": 3.1609954833984375, "learning_rate": 4.70308465824161e-05, "loss": 2.4735, "step": 62390 }, { "epoch": 4.239366761788286, "grad_norm": 3.381802797317505, "learning_rate": 4.702660008153282e-05, "loss": 2.4724, "step": 62395 }, { "epoch": 4.239706481858948, "grad_norm": 3.329758405685425, "learning_rate": 4.7022353580649546e-05, "loss": 2.4892, "step": 62400 }, { "epoch": 4.24004620192961, "grad_norm": 3.4537906646728516, "learning_rate": 4.7018107079766274e-05, "loss": 2.4434, "step": 62405 }, { "epoch": 4.2403859220002715, "grad_norm": 3.0785346031188965, "learning_rate": 4.7013860578883e-05, "loss": 2.6021, "step": 62410 }, { "epoch": 4.240725642070934, "grad_norm": 3.1389384269714355, "learning_rate": 4.700961407799973e-05, "loss": 2.5648, "step": 62415 }, { "epoch": 4.241065362141596, "grad_norm": 2.5147972106933594, "learning_rate": 4.700536757711646e-05, "loss": 2.5774, "step": 62420 }, { "epoch": 4.241405082212257, "grad_norm": 3.4449708461761475, "learning_rate": 4.7001121076233186e-05, "loss": 2.5452, "step": 62425 }, { "epoch": 4.241744802282919, "grad_norm": 3.287614583969116, "learning_rate": 4.6996874575349914e-05, "loss": 2.3232, "step": 62430 }, { "epoch": 4.242084522353581, "grad_norm": 3.2275478839874268, "learning_rate": 4.699262807446664e-05, "loss": 2.3383, "step": 62435 }, { "epoch": 4.242424242424242, "grad_norm": 2.918473720550537, "learning_rate": 4.698838157358337e-05, "loss": 2.3012, "step": 62440 }, { "epoch": 4.242763962494904, "grad_norm": 3.5990958213806152, "learning_rate": 4.69841350727001e-05, "loss": 2.6087, "step": 62445 }, { "epoch": 4.243103682565566, "grad_norm": 3.28645920753479, "learning_rate": 4.6979888571816826e-05, "loss": 2.4945, "step": 62450 }, { "epoch": 4.2434434026362275, "grad_norm": 3.3241350650787354, "learning_rate": 4.6975642070933554e-05, "loss": 2.59, "step": 62455 }, { "epoch": 4.24378312270689, "grad_norm": 2.0203843116760254, "learning_rate": 4.697139557005028e-05, "loss": 2.6155, "step": 62460 }, { "epoch": 4.244122842777552, "grad_norm": 2.953441858291626, "learning_rate": 4.696714906916701e-05, "loss": 2.3871, "step": 62465 }, { "epoch": 4.244462562848213, "grad_norm": 3.122581720352173, "learning_rate": 4.696290256828373e-05, "loss": 2.2962, "step": 62470 }, { "epoch": 4.244802282918875, "grad_norm": 3.33414888381958, "learning_rate": 4.6958656067400466e-05, "loss": 2.4492, "step": 62475 }, { "epoch": 4.245142002989537, "grad_norm": 2.9982078075408936, "learning_rate": 4.6954409566517194e-05, "loss": 2.2463, "step": 62480 }, { "epoch": 4.245481723060198, "grad_norm": 2.9619767665863037, "learning_rate": 4.6950163065633915e-05, "loss": 2.2508, "step": 62485 }, { "epoch": 4.24582144313086, "grad_norm": 4.185608386993408, "learning_rate": 4.694591656475065e-05, "loss": 2.368, "step": 62490 }, { "epoch": 4.246161163201522, "grad_norm": 3.566370725631714, "learning_rate": 4.694167006386738e-05, "loss": 2.2602, "step": 62495 }, { "epoch": 4.246500883272184, "grad_norm": 3.11262845993042, "learning_rate": 4.69374235629841e-05, "loss": 2.5357, "step": 62500 }, { "epoch": 4.246840603342846, "grad_norm": 2.88987135887146, "learning_rate": 4.693317706210083e-05, "loss": 2.4856, "step": 62505 }, { "epoch": 4.247180323413508, "grad_norm": 3.7287745475769043, "learning_rate": 4.692893056121756e-05, "loss": 2.5655, "step": 62510 }, { "epoch": 4.247520043484169, "grad_norm": 3.8936588764190674, "learning_rate": 4.692468406033428e-05, "loss": 2.4558, "step": 62515 }, { "epoch": 4.247859763554831, "grad_norm": 3.1986355781555176, "learning_rate": 4.692043755945101e-05, "loss": 2.2575, "step": 62520 }, { "epoch": 4.248199483625493, "grad_norm": 2.8567123413085938, "learning_rate": 4.6916191058567746e-05, "loss": 2.1832, "step": 62525 }, { "epoch": 4.248539203696154, "grad_norm": 2.7923502922058105, "learning_rate": 4.6911944557684474e-05, "loss": 2.5588, "step": 62530 }, { "epoch": 4.248878923766816, "grad_norm": 3.03105092048645, "learning_rate": 4.6907698056801195e-05, "loss": 2.2104, "step": 62535 }, { "epoch": 4.2492186438374775, "grad_norm": 3.4772136211395264, "learning_rate": 4.690345155591792e-05, "loss": 2.3567, "step": 62540 }, { "epoch": 4.24955836390814, "grad_norm": 2.9792606830596924, "learning_rate": 4.689920505503466e-05, "loss": 2.2065, "step": 62545 }, { "epoch": 4.249898083978802, "grad_norm": 2.8930559158325195, "learning_rate": 4.689495855415138e-05, "loss": 2.3487, "step": 62550 }, { "epoch": 4.250237804049463, "grad_norm": 2.4974024295806885, "learning_rate": 4.689071205326811e-05, "loss": 2.3347, "step": 62555 }, { "epoch": 4.250577524120125, "grad_norm": 2.492166042327881, "learning_rate": 4.688646555238484e-05, "loss": 2.1991, "step": 62560 }, { "epoch": 4.250917244190787, "grad_norm": 3.938364267349243, "learning_rate": 4.688221905150156e-05, "loss": 2.4138, "step": 62565 }, { "epoch": 4.251256964261448, "grad_norm": 3.258563756942749, "learning_rate": 4.687797255061829e-05, "loss": 2.4798, "step": 62570 }, { "epoch": 4.25159668433211, "grad_norm": 3.5589561462402344, "learning_rate": 4.6873726049735026e-05, "loss": 2.356, "step": 62575 }, { "epoch": 4.251936404402772, "grad_norm": 3.112647294998169, "learning_rate": 4.686947954885175e-05, "loss": 2.3023, "step": 62580 }, { "epoch": 4.2522761244734335, "grad_norm": 3.0565152168273926, "learning_rate": 4.6865233047968475e-05, "loss": 2.3158, "step": 62585 }, { "epoch": 4.252615844544096, "grad_norm": 3.311349630355835, "learning_rate": 4.6860986547085203e-05, "loss": 2.4124, "step": 62590 }, { "epoch": 4.252955564614758, "grad_norm": 2.4855546951293945, "learning_rate": 4.685674004620193e-05, "loss": 2.4923, "step": 62595 }, { "epoch": 4.253295284685419, "grad_norm": 3.8380730152130127, "learning_rate": 4.685249354531866e-05, "loss": 2.4758, "step": 62600 }, { "epoch": 4.253635004756081, "grad_norm": 2.919841766357422, "learning_rate": 4.684824704443539e-05, "loss": 2.0019, "step": 62605 }, { "epoch": 4.253974724826743, "grad_norm": 3.4348535537719727, "learning_rate": 4.6844000543552115e-05, "loss": 2.3737, "step": 62610 }, { "epoch": 4.254314444897404, "grad_norm": 3.1661996841430664, "learning_rate": 4.6839754042668843e-05, "loss": 2.3531, "step": 62615 }, { "epoch": 4.254654164968066, "grad_norm": 3.888336420059204, "learning_rate": 4.683550754178557e-05, "loss": 2.437, "step": 62620 }, { "epoch": 4.254993885038728, "grad_norm": 3.5696988105773926, "learning_rate": 4.68312610409023e-05, "loss": 2.5117, "step": 62625 }, { "epoch": 4.2553336051093895, "grad_norm": 2.9995429515838623, "learning_rate": 4.682701454001903e-05, "loss": 2.4446, "step": 62630 }, { "epoch": 4.255673325180052, "grad_norm": 3.685760259628296, "learning_rate": 4.6822768039135755e-05, "loss": 2.347, "step": 62635 }, { "epoch": 4.256013045250714, "grad_norm": 3.1525325775146484, "learning_rate": 4.681852153825248e-05, "loss": 2.214, "step": 62640 }, { "epoch": 4.256352765321375, "grad_norm": 3.0177085399627686, "learning_rate": 4.681427503736921e-05, "loss": 2.4995, "step": 62645 }, { "epoch": 4.256692485392037, "grad_norm": 2.9726691246032715, "learning_rate": 4.681002853648594e-05, "loss": 2.4971, "step": 62650 }, { "epoch": 4.257032205462699, "grad_norm": 2.905641555786133, "learning_rate": 4.680578203560266e-05, "loss": 2.6277, "step": 62655 }, { "epoch": 4.25737192553336, "grad_norm": 2.7863869667053223, "learning_rate": 4.6801535534719396e-05, "loss": 2.3581, "step": 62660 }, { "epoch": 4.257711645604022, "grad_norm": 2.7410030364990234, "learning_rate": 4.6797289033836124e-05, "loss": 2.6238, "step": 62665 }, { "epoch": 4.258051365674684, "grad_norm": 2.6792805194854736, "learning_rate": 4.6793042532952845e-05, "loss": 2.6923, "step": 62670 }, { "epoch": 4.2583910857453455, "grad_norm": 2.6763200759887695, "learning_rate": 4.678879603206958e-05, "loss": 2.372, "step": 62675 }, { "epoch": 4.258730805816008, "grad_norm": 4.764898300170898, "learning_rate": 4.678454953118631e-05, "loss": 2.2436, "step": 62680 }, { "epoch": 4.25907052588667, "grad_norm": 3.097172975540161, "learning_rate": 4.678030303030303e-05, "loss": 2.4627, "step": 62685 }, { "epoch": 4.259410245957331, "grad_norm": 3.822648286819458, "learning_rate": 4.677605652941976e-05, "loss": 2.139, "step": 62690 }, { "epoch": 4.259749966027993, "grad_norm": 3.021409273147583, "learning_rate": 4.677181002853649e-05, "loss": 2.2381, "step": 62695 }, { "epoch": 4.260089686098655, "grad_norm": 2.8215203285217285, "learning_rate": 4.676756352765322e-05, "loss": 2.4778, "step": 62700 }, { "epoch": 4.260429406169316, "grad_norm": 4.809718132019043, "learning_rate": 4.676331702676994e-05, "loss": 2.435, "step": 62705 }, { "epoch": 4.260769126239978, "grad_norm": 3.277925729751587, "learning_rate": 4.6759070525886676e-05, "loss": 2.5418, "step": 62710 }, { "epoch": 4.26110884631064, "grad_norm": 2.638218879699707, "learning_rate": 4.6754824025003404e-05, "loss": 2.5694, "step": 62715 }, { "epoch": 4.2614485663813015, "grad_norm": 3.279728889465332, "learning_rate": 4.6750577524120125e-05, "loss": 2.3649, "step": 62720 }, { "epoch": 4.261788286451964, "grad_norm": 3.652287244796753, "learning_rate": 4.674633102323685e-05, "loss": 2.4341, "step": 62725 }, { "epoch": 4.262128006522626, "grad_norm": 3.538790464401245, "learning_rate": 4.674208452235359e-05, "loss": 2.6066, "step": 62730 }, { "epoch": 4.262467726593287, "grad_norm": 3.008117198944092, "learning_rate": 4.673783802147031e-05, "loss": 2.3957, "step": 62735 }, { "epoch": 4.262807446663949, "grad_norm": 3.3258748054504395, "learning_rate": 4.673359152058704e-05, "loss": 2.3957, "step": 62740 }, { "epoch": 4.263147166734611, "grad_norm": 3.1305458545684814, "learning_rate": 4.672934501970377e-05, "loss": 2.7902, "step": 62745 }, { "epoch": 4.263486886805272, "grad_norm": 3.535911798477173, "learning_rate": 4.672509851882049e-05, "loss": 2.3589, "step": 62750 }, { "epoch": 4.263826606875934, "grad_norm": 2.771636486053467, "learning_rate": 4.672085201793722e-05, "loss": 2.3627, "step": 62755 }, { "epoch": 4.264166326946596, "grad_norm": 3.2297797203063965, "learning_rate": 4.671660551705395e-05, "loss": 2.5379, "step": 62760 }, { "epoch": 4.2645060470172576, "grad_norm": 3.535845994949341, "learning_rate": 4.671235901617068e-05, "loss": 2.3904, "step": 62765 }, { "epoch": 4.26484576708792, "grad_norm": 2.8580944538116455, "learning_rate": 4.6708112515287405e-05, "loss": 2.3659, "step": 62770 }, { "epoch": 4.265185487158582, "grad_norm": 2.933398723602295, "learning_rate": 4.670386601440413e-05, "loss": 2.403, "step": 62775 }, { "epoch": 4.265525207229243, "grad_norm": 3.602823257446289, "learning_rate": 4.669961951352086e-05, "loss": 2.1035, "step": 62780 }, { "epoch": 4.265864927299905, "grad_norm": 3.338528871536255, "learning_rate": 4.669537301263759e-05, "loss": 2.4785, "step": 62785 }, { "epoch": 4.266204647370567, "grad_norm": 4.168615341186523, "learning_rate": 4.669112651175432e-05, "loss": 2.559, "step": 62790 }, { "epoch": 4.266544367441228, "grad_norm": 3.023832321166992, "learning_rate": 4.6686880010871045e-05, "loss": 2.3583, "step": 62795 }, { "epoch": 4.26688408751189, "grad_norm": 2.859412908554077, "learning_rate": 4.668263350998777e-05, "loss": 2.4674, "step": 62800 }, { "epoch": 4.267223807582552, "grad_norm": 2.8553473949432373, "learning_rate": 4.66783870091045e-05, "loss": 2.2017, "step": 62805 }, { "epoch": 4.267563527653214, "grad_norm": 3.288297176361084, "learning_rate": 4.667414050822123e-05, "loss": 2.2403, "step": 62810 }, { "epoch": 4.267903247723876, "grad_norm": 3.8198180198669434, "learning_rate": 4.666989400733796e-05, "loss": 2.6798, "step": 62815 }, { "epoch": 4.268242967794538, "grad_norm": 3.2283565998077393, "learning_rate": 4.6665647506454685e-05, "loss": 2.0642, "step": 62820 }, { "epoch": 4.268582687865199, "grad_norm": 3.2506916522979736, "learning_rate": 4.6661401005571406e-05, "loss": 2.4019, "step": 62825 }, { "epoch": 4.268922407935861, "grad_norm": 3.3175599575042725, "learning_rate": 4.665715450468814e-05, "loss": 2.5165, "step": 62830 }, { "epoch": 4.269262128006522, "grad_norm": 2.9334969520568848, "learning_rate": 4.665290800380487e-05, "loss": 2.4219, "step": 62835 }, { "epoch": 4.269601848077184, "grad_norm": 2.6202332973480225, "learning_rate": 4.664866150292159e-05, "loss": 2.4022, "step": 62840 }, { "epoch": 4.269941568147846, "grad_norm": 3.8165290355682373, "learning_rate": 4.6644415002038325e-05, "loss": 2.6226, "step": 62845 }, { "epoch": 4.2702812882185075, "grad_norm": 3.044295072555542, "learning_rate": 4.664016850115505e-05, "loss": 2.5954, "step": 62850 }, { "epoch": 4.27062100828917, "grad_norm": 3.274648904800415, "learning_rate": 4.6635922000271774e-05, "loss": 2.517, "step": 62855 }, { "epoch": 4.270960728359832, "grad_norm": 3.2366275787353516, "learning_rate": 4.66316754993885e-05, "loss": 2.2946, "step": 62860 }, { "epoch": 4.271300448430493, "grad_norm": 2.889885902404785, "learning_rate": 4.662742899850524e-05, "loss": 2.7082, "step": 62865 }, { "epoch": 4.271640168501155, "grad_norm": 2.750101327896118, "learning_rate": 4.6623182497621965e-05, "loss": 2.3439, "step": 62870 }, { "epoch": 4.271979888571817, "grad_norm": 2.7240190505981445, "learning_rate": 4.6618935996738686e-05, "loss": 2.5442, "step": 62875 }, { "epoch": 4.272319608642478, "grad_norm": 3.4241559505462646, "learning_rate": 4.661468949585542e-05, "loss": 2.3639, "step": 62880 }, { "epoch": 4.27265932871314, "grad_norm": 3.6371896266937256, "learning_rate": 4.661044299497215e-05, "loss": 2.3228, "step": 62885 }, { "epoch": 4.272999048783802, "grad_norm": 3.4817705154418945, "learning_rate": 4.660619649408887e-05, "loss": 2.6478, "step": 62890 }, { "epoch": 4.2733387688544635, "grad_norm": 4.1152424812316895, "learning_rate": 4.66019499932056e-05, "loss": 2.3397, "step": 62895 }, { "epoch": 4.273678488925126, "grad_norm": 2.9789540767669678, "learning_rate": 4.659770349232233e-05, "loss": 2.2503, "step": 62900 }, { "epoch": 4.274018208995788, "grad_norm": 3.2878355979919434, "learning_rate": 4.6593456991439054e-05, "loss": 2.3747, "step": 62905 }, { "epoch": 4.274357929066449, "grad_norm": 2.5864360332489014, "learning_rate": 4.658921049055578e-05, "loss": 2.3146, "step": 62910 }, { "epoch": 4.274697649137111, "grad_norm": 3.0121288299560547, "learning_rate": 4.658496398967252e-05, "loss": 2.4686, "step": 62915 }, { "epoch": 4.275037369207773, "grad_norm": 2.627047538757324, "learning_rate": 4.658071748878924e-05, "loss": 2.5416, "step": 62920 }, { "epoch": 4.275377089278434, "grad_norm": 2.9435055255889893, "learning_rate": 4.6576470987905966e-05, "loss": 2.3785, "step": 62925 }, { "epoch": 4.275716809349096, "grad_norm": 3.368283748626709, "learning_rate": 4.6572224487022694e-05, "loss": 2.5469, "step": 62930 }, { "epoch": 4.276056529419758, "grad_norm": 2.9889564514160156, "learning_rate": 4.656797798613942e-05, "loss": 2.2638, "step": 62935 }, { "epoch": 4.2763962494904195, "grad_norm": 3.070538282394409, "learning_rate": 4.656373148525615e-05, "loss": 2.4177, "step": 62940 }, { "epoch": 4.276735969561082, "grad_norm": 4.114296913146973, "learning_rate": 4.655948498437288e-05, "loss": 2.4254, "step": 62945 }, { "epoch": 4.277075689631744, "grad_norm": 3.078002452850342, "learning_rate": 4.6555238483489606e-05, "loss": 2.396, "step": 62950 }, { "epoch": 4.277415409702405, "grad_norm": 3.2111105918884277, "learning_rate": 4.6550991982606334e-05, "loss": 2.3774, "step": 62955 }, { "epoch": 4.277755129773067, "grad_norm": 3.3493123054504395, "learning_rate": 4.654674548172306e-05, "loss": 2.4077, "step": 62960 }, { "epoch": 4.278094849843729, "grad_norm": 3.08821177482605, "learning_rate": 4.654249898083979e-05, "loss": 2.5919, "step": 62965 }, { "epoch": 4.27843456991439, "grad_norm": 3.289898633956909, "learning_rate": 4.653825247995652e-05, "loss": 2.216, "step": 62970 }, { "epoch": 4.278774289985052, "grad_norm": 2.7720577716827393, "learning_rate": 4.6534005979073246e-05, "loss": 2.5124, "step": 62975 }, { "epoch": 4.279114010055714, "grad_norm": 3.033926248550415, "learning_rate": 4.6529759478189974e-05, "loss": 2.6658, "step": 62980 }, { "epoch": 4.2794537301263755, "grad_norm": 2.5374231338500977, "learning_rate": 4.65255129773067e-05, "loss": 2.4487, "step": 62985 }, { "epoch": 4.279793450197038, "grad_norm": 2.346914529800415, "learning_rate": 4.652126647642343e-05, "loss": 2.3938, "step": 62990 }, { "epoch": 4.2801331702677, "grad_norm": 2.85048246383667, "learning_rate": 4.651701997554015e-05, "loss": 2.1879, "step": 62995 }, { "epoch": 4.280472890338361, "grad_norm": 3.2470004558563232, "learning_rate": 4.6512773474656886e-05, "loss": 2.3203, "step": 63000 }, { "epoch": 4.280812610409023, "grad_norm": 2.6100082397460938, "learning_rate": 4.6508526973773614e-05, "loss": 2.551, "step": 63005 }, { "epoch": 4.281152330479685, "grad_norm": 3.3627946376800537, "learning_rate": 4.6504280472890336e-05, "loss": 2.5475, "step": 63010 }, { "epoch": 4.281492050550346, "grad_norm": 3.4767227172851562, "learning_rate": 4.650003397200707e-05, "loss": 2.2996, "step": 63015 }, { "epoch": 4.281831770621008, "grad_norm": 3.4161829948425293, "learning_rate": 4.64957874711238e-05, "loss": 2.5838, "step": 63020 }, { "epoch": 4.28217149069167, "grad_norm": 2.727008581161499, "learning_rate": 4.649154097024052e-05, "loss": 2.5359, "step": 63025 }, { "epoch": 4.2825112107623315, "grad_norm": 3.6408636569976807, "learning_rate": 4.648729446935725e-05, "loss": 2.431, "step": 63030 }, { "epoch": 4.282850930832994, "grad_norm": 2.9952659606933594, "learning_rate": 4.648304796847398e-05, "loss": 2.6396, "step": 63035 }, { "epoch": 4.283190650903656, "grad_norm": 3.6851699352264404, "learning_rate": 4.647880146759071e-05, "loss": 2.4558, "step": 63040 }, { "epoch": 4.283530370974317, "grad_norm": 3.1315102577209473, "learning_rate": 4.647455496670743e-05, "loss": 2.5512, "step": 63045 }, { "epoch": 4.283870091044979, "grad_norm": 3.1363327503204346, "learning_rate": 4.6470308465824167e-05, "loss": 2.5241, "step": 63050 }, { "epoch": 4.284209811115641, "grad_norm": 2.738739490509033, "learning_rate": 4.6466061964940895e-05, "loss": 2.5742, "step": 63055 }, { "epoch": 4.284549531186302, "grad_norm": 3.3897523880004883, "learning_rate": 4.6461815464057616e-05, "loss": 2.4268, "step": 63060 }, { "epoch": 4.284889251256964, "grad_norm": 3.8456287384033203, "learning_rate": 4.6457568963174344e-05, "loss": 2.6069, "step": 63065 }, { "epoch": 4.285228971327626, "grad_norm": 2.966644763946533, "learning_rate": 4.645332246229108e-05, "loss": 2.4024, "step": 63070 }, { "epoch": 4.2855686913982876, "grad_norm": 4.4490251541137695, "learning_rate": 4.64490759614078e-05, "loss": 2.6539, "step": 63075 }, { "epoch": 4.28590841146895, "grad_norm": 3.6645615100860596, "learning_rate": 4.644482946052453e-05, "loss": 2.5898, "step": 63080 }, { "epoch": 4.286248131539612, "grad_norm": 3.8651177883148193, "learning_rate": 4.644058295964126e-05, "loss": 2.2582, "step": 63085 }, { "epoch": 4.286587851610273, "grad_norm": 2.703165292739868, "learning_rate": 4.6436336458757984e-05, "loss": 2.3304, "step": 63090 }, { "epoch": 4.286927571680935, "grad_norm": 3.425529718399048, "learning_rate": 4.643208995787471e-05, "loss": 2.0556, "step": 63095 }, { "epoch": 4.287267291751597, "grad_norm": 3.230786085128784, "learning_rate": 4.642784345699145e-05, "loss": 2.4661, "step": 63100 }, { "epoch": 4.287607011822258, "grad_norm": 3.1032047271728516, "learning_rate": 4.642359695610817e-05, "loss": 2.2943, "step": 63105 }, { "epoch": 4.28794673189292, "grad_norm": 3.982126474380493, "learning_rate": 4.6419350455224896e-05, "loss": 2.4369, "step": 63110 }, { "epoch": 4.288286451963582, "grad_norm": 3.097444534301758, "learning_rate": 4.6415103954341624e-05, "loss": 2.5464, "step": 63115 }, { "epoch": 4.288626172034244, "grad_norm": 3.9634246826171875, "learning_rate": 4.641085745345835e-05, "loss": 2.4253, "step": 63120 }, { "epoch": 4.288965892104906, "grad_norm": 3.178767681121826, "learning_rate": 4.640661095257508e-05, "loss": 2.7199, "step": 63125 }, { "epoch": 4.289305612175568, "grad_norm": 4.306824207305908, "learning_rate": 4.640236445169181e-05, "loss": 2.3432, "step": 63130 }, { "epoch": 4.289645332246229, "grad_norm": 2.8289794921875, "learning_rate": 4.6398117950808536e-05, "loss": 2.6152, "step": 63135 }, { "epoch": 4.289985052316891, "grad_norm": 3.6513099670410156, "learning_rate": 4.6393871449925264e-05, "loss": 2.4955, "step": 63140 }, { "epoch": 4.290324772387553, "grad_norm": 2.998591661453247, "learning_rate": 4.638962494904199e-05, "loss": 2.6029, "step": 63145 }, { "epoch": 4.290664492458214, "grad_norm": 3.1291122436523438, "learning_rate": 4.638537844815872e-05, "loss": 2.5869, "step": 63150 }, { "epoch": 4.291004212528876, "grad_norm": 4.188108921051025, "learning_rate": 4.638113194727545e-05, "loss": 2.4379, "step": 63155 }, { "epoch": 4.291343932599538, "grad_norm": 3.2184739112854004, "learning_rate": 4.6376885446392176e-05, "loss": 2.5, "step": 63160 }, { "epoch": 4.2916836526702, "grad_norm": 3.515395164489746, "learning_rate": 4.63726389455089e-05, "loss": 2.0758, "step": 63165 }, { "epoch": 4.292023372740862, "grad_norm": 3.4215214252471924, "learning_rate": 4.636839244462563e-05, "loss": 2.3608, "step": 63170 }, { "epoch": 4.292363092811524, "grad_norm": 2.9900362491607666, "learning_rate": 4.636414594374236e-05, "loss": 2.3504, "step": 63175 }, { "epoch": 4.292702812882185, "grad_norm": 3.3313024044036865, "learning_rate": 4.635989944285908e-05, "loss": 2.4468, "step": 63180 }, { "epoch": 4.293042532952847, "grad_norm": 3.189797878265381, "learning_rate": 4.6355652941975816e-05, "loss": 2.4025, "step": 63185 }, { "epoch": 4.293382253023509, "grad_norm": 3.198457956314087, "learning_rate": 4.6351406441092544e-05, "loss": 2.3334, "step": 63190 }, { "epoch": 4.29372197309417, "grad_norm": 2.217238426208496, "learning_rate": 4.6347159940209265e-05, "loss": 2.6561, "step": 63195 }, { "epoch": 4.294061693164832, "grad_norm": 3.4032063484191895, "learning_rate": 4.6342913439326e-05, "loss": 2.1878, "step": 63200 }, { "epoch": 4.294401413235494, "grad_norm": 3.287444829940796, "learning_rate": 4.633866693844273e-05, "loss": 2.5438, "step": 63205 }, { "epoch": 4.294741133306156, "grad_norm": 2.752078056335449, "learning_rate": 4.6334420437559456e-05, "loss": 2.3298, "step": 63210 }, { "epoch": 4.295080853376818, "grad_norm": 3.9776813983917236, "learning_rate": 4.633017393667618e-05, "loss": 2.2513, "step": 63215 }, { "epoch": 4.29542057344748, "grad_norm": 3.448330879211426, "learning_rate": 4.632592743579291e-05, "loss": 2.5266, "step": 63220 }, { "epoch": 4.295760293518141, "grad_norm": 3.585188627243042, "learning_rate": 4.632168093490964e-05, "loss": 2.2522, "step": 63225 }, { "epoch": 4.296100013588803, "grad_norm": 3.164867877960205, "learning_rate": 4.631743443402636e-05, "loss": 2.5031, "step": 63230 }, { "epoch": 4.296439733659464, "grad_norm": 3.6629831790924072, "learning_rate": 4.6313187933143096e-05, "loss": 2.3296, "step": 63235 }, { "epoch": 4.296779453730126, "grad_norm": 4.302365779876709, "learning_rate": 4.6308941432259824e-05, "loss": 2.3884, "step": 63240 }, { "epoch": 4.297119173800788, "grad_norm": 2.8553311824798584, "learning_rate": 4.6304694931376545e-05, "loss": 2.559, "step": 63245 }, { "epoch": 4.2974588938714495, "grad_norm": 2.4910037517547607, "learning_rate": 4.630044843049327e-05, "loss": 2.6507, "step": 63250 }, { "epoch": 4.297798613942112, "grad_norm": 3.114629030227661, "learning_rate": 4.629620192961001e-05, "loss": 2.6029, "step": 63255 }, { "epoch": 4.298138334012774, "grad_norm": 3.400125026702881, "learning_rate": 4.629195542872673e-05, "loss": 2.5471, "step": 63260 }, { "epoch": 4.298478054083435, "grad_norm": 3.2150490283966064, "learning_rate": 4.628770892784346e-05, "loss": 2.6324, "step": 63265 }, { "epoch": 4.298817774154097, "grad_norm": 3.221104383468628, "learning_rate": 4.628346242696019e-05, "loss": 2.2167, "step": 63270 }, { "epoch": 4.299157494224759, "grad_norm": 3.2204091548919678, "learning_rate": 4.627921592607691e-05, "loss": 2.42, "step": 63275 }, { "epoch": 4.29949721429542, "grad_norm": 3.4112439155578613, "learning_rate": 4.627496942519364e-05, "loss": 2.3927, "step": 63280 }, { "epoch": 4.299836934366082, "grad_norm": 3.0988733768463135, "learning_rate": 4.627072292431037e-05, "loss": 2.554, "step": 63285 }, { "epoch": 4.300176654436744, "grad_norm": 3.2858967781066895, "learning_rate": 4.62664764234271e-05, "loss": 2.5681, "step": 63290 }, { "epoch": 4.3005163745074055, "grad_norm": 4.034422874450684, "learning_rate": 4.6262229922543825e-05, "loss": 2.5139, "step": 63295 }, { "epoch": 4.300856094578068, "grad_norm": 3.114837408065796, "learning_rate": 4.6257983421660553e-05, "loss": 2.2828, "step": 63300 }, { "epoch": 4.30119581464873, "grad_norm": 3.3241705894470215, "learning_rate": 4.625373692077728e-05, "loss": 2.478, "step": 63305 }, { "epoch": 4.301535534719391, "grad_norm": 3.1428472995758057, "learning_rate": 4.624949041989401e-05, "loss": 2.2402, "step": 63310 }, { "epoch": 4.301875254790053, "grad_norm": 4.072729587554932, "learning_rate": 4.624524391901074e-05, "loss": 2.3113, "step": 63315 }, { "epoch": 4.302214974860715, "grad_norm": 3.4487669467926025, "learning_rate": 4.6240997418127465e-05, "loss": 2.6913, "step": 63320 }, { "epoch": 4.302554694931376, "grad_norm": 2.5295472145080566, "learning_rate": 4.6236750917244193e-05, "loss": 2.4856, "step": 63325 }, { "epoch": 4.302894415002038, "grad_norm": 2.815168619155884, "learning_rate": 4.623250441636092e-05, "loss": 2.5129, "step": 63330 }, { "epoch": 4.3032341350727, "grad_norm": 3.366835355758667, "learning_rate": 4.622825791547765e-05, "loss": 2.2881, "step": 63335 }, { "epoch": 4.3035738551433615, "grad_norm": 3.5163822174072266, "learning_rate": 4.622401141459438e-05, "loss": 2.3313, "step": 63340 }, { "epoch": 4.303913575214024, "grad_norm": 2.919900417327881, "learning_rate": 4.6219764913711105e-05, "loss": 2.1593, "step": 63345 }, { "epoch": 4.304253295284686, "grad_norm": 2.9800217151641846, "learning_rate": 4.621551841282783e-05, "loss": 2.5235, "step": 63350 }, { "epoch": 4.304593015355347, "grad_norm": 2.8124778270721436, "learning_rate": 4.621127191194456e-05, "loss": 2.3449, "step": 63355 }, { "epoch": 4.304932735426009, "grad_norm": 2.898394823074341, "learning_rate": 4.620702541106129e-05, "loss": 2.9022, "step": 63360 }, { "epoch": 4.305272455496671, "grad_norm": 3.5835578441619873, "learning_rate": 4.620277891017801e-05, "loss": 2.5709, "step": 63365 }, { "epoch": 4.305612175567332, "grad_norm": 3.2078804969787598, "learning_rate": 4.6198532409294746e-05, "loss": 2.4385, "step": 63370 }, { "epoch": 4.305951895637994, "grad_norm": 3.398383378982544, "learning_rate": 4.6194285908411474e-05, "loss": 2.399, "step": 63375 }, { "epoch": 4.306291615708656, "grad_norm": 3.0487639904022217, "learning_rate": 4.61900394075282e-05, "loss": 2.1409, "step": 63380 }, { "epoch": 4.306631335779318, "grad_norm": 3.1967403888702393, "learning_rate": 4.618579290664492e-05, "loss": 2.2974, "step": 63385 }, { "epoch": 4.30697105584998, "grad_norm": 4.0241804122924805, "learning_rate": 4.618154640576166e-05, "loss": 2.4383, "step": 63390 }, { "epoch": 4.307310775920642, "grad_norm": 3.0193209648132324, "learning_rate": 4.6177299904878386e-05, "loss": 2.4982, "step": 63395 }, { "epoch": 4.307650495991303, "grad_norm": 2.9067845344543457, "learning_rate": 4.617305340399511e-05, "loss": 2.5825, "step": 63400 }, { "epoch": 4.307990216061965, "grad_norm": 2.6310112476348877, "learning_rate": 4.616880690311184e-05, "loss": 2.3263, "step": 63405 }, { "epoch": 4.308329936132627, "grad_norm": 2.9852757453918457, "learning_rate": 4.616456040222857e-05, "loss": 2.5239, "step": 63410 }, { "epoch": 4.308669656203288, "grad_norm": 2.7239954471588135, "learning_rate": 4.616031390134529e-05, "loss": 2.6104, "step": 63415 }, { "epoch": 4.30900937627395, "grad_norm": 3.3950440883636475, "learning_rate": 4.615606740046202e-05, "loss": 2.3681, "step": 63420 }, { "epoch": 4.309349096344612, "grad_norm": 2.6280598640441895, "learning_rate": 4.6151820899578754e-05, "loss": 2.6336, "step": 63425 }, { "epoch": 4.309688816415274, "grad_norm": 3.427359104156494, "learning_rate": 4.6147574398695475e-05, "loss": 2.6075, "step": 63430 }, { "epoch": 4.310028536485936, "grad_norm": 2.6712708473205566, "learning_rate": 4.61433278978122e-05, "loss": 2.554, "step": 63435 }, { "epoch": 4.310368256556598, "grad_norm": 3.108461380004883, "learning_rate": 4.613908139692894e-05, "loss": 2.5186, "step": 63440 }, { "epoch": 4.310707976627259, "grad_norm": 3.1865415573120117, "learning_rate": 4.613483489604566e-05, "loss": 2.2161, "step": 63445 }, { "epoch": 4.311047696697921, "grad_norm": 3.1654934883117676, "learning_rate": 4.613058839516239e-05, "loss": 2.4513, "step": 63450 }, { "epoch": 4.311387416768583, "grad_norm": 3.264277696609497, "learning_rate": 4.6126341894279115e-05, "loss": 2.6759, "step": 63455 }, { "epoch": 4.311727136839244, "grad_norm": 3.586794853210449, "learning_rate": 4.612209539339584e-05, "loss": 2.5881, "step": 63460 }, { "epoch": 4.312066856909906, "grad_norm": 2.7947463989257812, "learning_rate": 4.611784889251257e-05, "loss": 2.3563, "step": 63465 }, { "epoch": 4.312406576980568, "grad_norm": 3.5266191959381104, "learning_rate": 4.61136023916293e-05, "loss": 2.4584, "step": 63470 }, { "epoch": 4.31274629705123, "grad_norm": 4.09183931350708, "learning_rate": 4.610935589074603e-05, "loss": 2.6301, "step": 63475 }, { "epoch": 4.313086017121892, "grad_norm": 3.4625847339630127, "learning_rate": 4.6105109389862755e-05, "loss": 2.4701, "step": 63480 }, { "epoch": 4.313425737192554, "grad_norm": 2.8116581439971924, "learning_rate": 4.610086288897948e-05, "loss": 2.3377, "step": 63485 }, { "epoch": 4.313765457263215, "grad_norm": 3.202270746231079, "learning_rate": 4.609661638809621e-05, "loss": 2.4773, "step": 63490 }, { "epoch": 4.314105177333877, "grad_norm": 3.310910940170288, "learning_rate": 4.609236988721294e-05, "loss": 2.334, "step": 63495 }, { "epoch": 4.314444897404539, "grad_norm": 2.776503562927246, "learning_rate": 4.608812338632967e-05, "loss": 2.6989, "step": 63500 }, { "epoch": 4.3147846174752, "grad_norm": 3.7727560997009277, "learning_rate": 4.6083876885446395e-05, "loss": 2.5225, "step": 63505 }, { "epoch": 4.315124337545862, "grad_norm": 3.949354648590088, "learning_rate": 4.607963038456312e-05, "loss": 2.5096, "step": 63510 }, { "epoch": 4.3154640576165235, "grad_norm": 2.942559003829956, "learning_rate": 4.607538388367985e-05, "loss": 2.6548, "step": 63515 }, { "epoch": 4.315803777687186, "grad_norm": 3.356588840484619, "learning_rate": 4.607113738279657e-05, "loss": 2.2879, "step": 63520 }, { "epoch": 4.316143497757848, "grad_norm": 3.00654673576355, "learning_rate": 4.606689088191331e-05, "loss": 2.5201, "step": 63525 }, { "epoch": 4.316483217828509, "grad_norm": 3.3177390098571777, "learning_rate": 4.6062644381030035e-05, "loss": 2.3231, "step": 63530 }, { "epoch": 4.316822937899171, "grad_norm": 3.6273391246795654, "learning_rate": 4.6058397880146756e-05, "loss": 2.5064, "step": 63535 }, { "epoch": 4.317162657969833, "grad_norm": 2.7729294300079346, "learning_rate": 4.605415137926349e-05, "loss": 2.3822, "step": 63540 }, { "epoch": 4.317502378040494, "grad_norm": 2.9074714183807373, "learning_rate": 4.604990487838022e-05, "loss": 2.405, "step": 63545 }, { "epoch": 4.317842098111156, "grad_norm": 2.6252553462982178, "learning_rate": 4.604565837749695e-05, "loss": 2.343, "step": 63550 }, { "epoch": 4.318181818181818, "grad_norm": 3.399200201034546, "learning_rate": 4.604141187661367e-05, "loss": 2.6055, "step": 63555 }, { "epoch": 4.3185215382524795, "grad_norm": 3.2019269466400146, "learning_rate": 4.60371653757304e-05, "loss": 2.4935, "step": 63560 }, { "epoch": 4.318861258323142, "grad_norm": 3.32968807220459, "learning_rate": 4.603291887484713e-05, "loss": 2.387, "step": 63565 }, { "epoch": 4.319200978393804, "grad_norm": 2.9677717685699463, "learning_rate": 4.602867237396385e-05, "loss": 2.4814, "step": 63570 }, { "epoch": 4.319540698464465, "grad_norm": 2.968228578567505, "learning_rate": 4.602442587308059e-05, "loss": 2.3257, "step": 63575 }, { "epoch": 4.319880418535127, "grad_norm": 2.708030939102173, "learning_rate": 4.6020179372197315e-05, "loss": 2.6579, "step": 63580 }, { "epoch": 4.320220138605789, "grad_norm": 2.798555374145508, "learning_rate": 4.6015932871314036e-05, "loss": 2.5267, "step": 63585 }, { "epoch": 4.32055985867645, "grad_norm": 3.7968008518218994, "learning_rate": 4.601168637043077e-05, "loss": 2.3049, "step": 63590 }, { "epoch": 4.320899578747112, "grad_norm": 2.461404800415039, "learning_rate": 4.60074398695475e-05, "loss": 2.2434, "step": 63595 }, { "epoch": 4.321239298817774, "grad_norm": 3.1979992389678955, "learning_rate": 4.600319336866422e-05, "loss": 2.584, "step": 63600 }, { "epoch": 4.3215790188884355, "grad_norm": 3.4617249965667725, "learning_rate": 4.599894686778095e-05, "loss": 2.413, "step": 63605 }, { "epoch": 4.321918738959098, "grad_norm": 3.2206532955169678, "learning_rate": 4.599470036689768e-05, "loss": 2.3406, "step": 63610 }, { "epoch": 4.32225845902976, "grad_norm": 3.0912554264068604, "learning_rate": 4.5990453866014404e-05, "loss": 2.3773, "step": 63615 }, { "epoch": 4.322598179100421, "grad_norm": 2.7421040534973145, "learning_rate": 4.598620736513113e-05, "loss": 2.3907, "step": 63620 }, { "epoch": 4.322937899171083, "grad_norm": 2.831190586090088, "learning_rate": 4.598196086424787e-05, "loss": 2.1772, "step": 63625 }, { "epoch": 4.323277619241745, "grad_norm": 3.19745135307312, "learning_rate": 4.597771436336459e-05, "loss": 2.5987, "step": 63630 }, { "epoch": 4.323617339312406, "grad_norm": 3.0110363960266113, "learning_rate": 4.5973467862481316e-05, "loss": 2.0565, "step": 63635 }, { "epoch": 4.323957059383068, "grad_norm": 2.626378059387207, "learning_rate": 4.5969221361598044e-05, "loss": 2.5626, "step": 63640 }, { "epoch": 4.32429677945373, "grad_norm": 3.585466146469116, "learning_rate": 4.596497486071477e-05, "loss": 2.4597, "step": 63645 }, { "epoch": 4.3246364995243916, "grad_norm": 2.781022071838379, "learning_rate": 4.59607283598315e-05, "loss": 2.4656, "step": 63650 }, { "epoch": 4.324976219595054, "grad_norm": 2.850363254547119, "learning_rate": 4.595648185894823e-05, "loss": 2.4304, "step": 63655 }, { "epoch": 4.325315939665716, "grad_norm": 3.2280113697052, "learning_rate": 4.5952235358064956e-05, "loss": 2.2836, "step": 63660 }, { "epoch": 4.325655659736377, "grad_norm": 3.9095919132232666, "learning_rate": 4.5947988857181684e-05, "loss": 2.6689, "step": 63665 }, { "epoch": 4.325995379807039, "grad_norm": 2.4933388233184814, "learning_rate": 4.594374235629841e-05, "loss": 2.4596, "step": 63670 }, { "epoch": 4.326335099877701, "grad_norm": 2.7085397243499756, "learning_rate": 4.593949585541514e-05, "loss": 2.6092, "step": 63675 }, { "epoch": 4.326674819948362, "grad_norm": 4.049295425415039, "learning_rate": 4.593524935453187e-05, "loss": 2.6269, "step": 63680 }, { "epoch": 4.327014540019024, "grad_norm": 2.354865312576294, "learning_rate": 4.5931002853648596e-05, "loss": 2.2989, "step": 63685 }, { "epoch": 4.327354260089686, "grad_norm": 2.882554531097412, "learning_rate": 4.5926756352765324e-05, "loss": 2.5397, "step": 63690 }, { "epoch": 4.327693980160348, "grad_norm": 2.948000431060791, "learning_rate": 4.592250985188205e-05, "loss": 2.3701, "step": 63695 }, { "epoch": 4.32803370023101, "grad_norm": 3.229818820953369, "learning_rate": 4.591826335099878e-05, "loss": 2.3912, "step": 63700 }, { "epoch": 4.328373420301672, "grad_norm": 2.7344095706939697, "learning_rate": 4.59140168501155e-05, "loss": 2.3508, "step": 63705 }, { "epoch": 4.328713140372333, "grad_norm": 2.84902024269104, "learning_rate": 4.5909770349232236e-05, "loss": 2.5388, "step": 63710 }, { "epoch": 4.329052860442995, "grad_norm": 3.2851178646087646, "learning_rate": 4.5905523848348964e-05, "loss": 2.0258, "step": 63715 }, { "epoch": 4.329392580513657, "grad_norm": 2.63736891746521, "learning_rate": 4.590127734746569e-05, "loss": 2.5741, "step": 63720 }, { "epoch": 4.329732300584318, "grad_norm": 2.8264999389648438, "learning_rate": 4.589703084658242e-05, "loss": 2.3466, "step": 63725 }, { "epoch": 4.33007202065498, "grad_norm": 2.682239055633545, "learning_rate": 4.589278434569915e-05, "loss": 2.296, "step": 63730 }, { "epoch": 4.330411740725642, "grad_norm": 3.335467576980591, "learning_rate": 4.5888537844815877e-05, "loss": 2.1988, "step": 63735 }, { "epoch": 4.330751460796304, "grad_norm": 2.853915214538574, "learning_rate": 4.58842913439326e-05, "loss": 2.4274, "step": 63740 }, { "epoch": 4.331091180866966, "grad_norm": 3.2765932083129883, "learning_rate": 4.588004484304933e-05, "loss": 2.1788, "step": 63745 }, { "epoch": 4.331430900937628, "grad_norm": 3.297940969467163, "learning_rate": 4.587579834216606e-05, "loss": 2.6126, "step": 63750 }, { "epoch": 4.331770621008289, "grad_norm": 3.090437650680542, "learning_rate": 4.587155184128278e-05, "loss": 2.2403, "step": 63755 }, { "epoch": 4.332110341078951, "grad_norm": 2.891498565673828, "learning_rate": 4.5867305340399517e-05, "loss": 2.3758, "step": 63760 }, { "epoch": 4.332450061149613, "grad_norm": 3.349733591079712, "learning_rate": 4.5863058839516245e-05, "loss": 2.2925, "step": 63765 }, { "epoch": 4.332789781220274, "grad_norm": 2.795881748199463, "learning_rate": 4.5858812338632966e-05, "loss": 2.4378, "step": 63770 }, { "epoch": 4.333129501290936, "grad_norm": 4.427619457244873, "learning_rate": 4.5854565837749694e-05, "loss": 2.3804, "step": 63775 }, { "epoch": 4.333469221361598, "grad_norm": 2.8153328895568848, "learning_rate": 4.585031933686643e-05, "loss": 2.2678, "step": 63780 }, { "epoch": 4.33380894143226, "grad_norm": 3.132138967514038, "learning_rate": 4.584607283598315e-05, "loss": 2.8204, "step": 63785 }, { "epoch": 4.334148661502922, "grad_norm": 2.5685677528381348, "learning_rate": 4.584182633509988e-05, "loss": 2.4249, "step": 63790 }, { "epoch": 4.334488381573584, "grad_norm": 3.6022114753723145, "learning_rate": 4.583757983421661e-05, "loss": 2.2798, "step": 63795 }, { "epoch": 4.334828101644245, "grad_norm": 2.958571195602417, "learning_rate": 4.5833333333333334e-05, "loss": 2.2575, "step": 63800 }, { "epoch": 4.335167821714907, "grad_norm": 2.125028371810913, "learning_rate": 4.582908683245006e-05, "loss": 2.6264, "step": 63805 }, { "epoch": 4.335507541785569, "grad_norm": 2.942094564437866, "learning_rate": 4.582484033156679e-05, "loss": 2.0505, "step": 63810 }, { "epoch": 4.33584726185623, "grad_norm": 3.7887606620788574, "learning_rate": 4.582059383068352e-05, "loss": 2.6442, "step": 63815 }, { "epoch": 4.336186981926892, "grad_norm": 3.3346359729766846, "learning_rate": 4.5816347329800246e-05, "loss": 2.5933, "step": 63820 }, { "epoch": 4.336526701997554, "grad_norm": 3.4139106273651123, "learning_rate": 4.5812100828916974e-05, "loss": 2.8374, "step": 63825 }, { "epoch": 4.336866422068216, "grad_norm": 3.1382336616516113, "learning_rate": 4.58078543280337e-05, "loss": 2.6053, "step": 63830 }, { "epoch": 4.337206142138878, "grad_norm": 3.348668336868286, "learning_rate": 4.580360782715043e-05, "loss": 2.3296, "step": 63835 }, { "epoch": 4.33754586220954, "grad_norm": 3.4224112033843994, "learning_rate": 4.579936132626716e-05, "loss": 2.3934, "step": 63840 }, { "epoch": 4.337885582280201, "grad_norm": 3.1559488773345947, "learning_rate": 4.5795114825383886e-05, "loss": 2.5163, "step": 63845 }, { "epoch": 4.338225302350863, "grad_norm": 3.562661647796631, "learning_rate": 4.5790868324500614e-05, "loss": 2.0342, "step": 63850 }, { "epoch": 4.338565022421525, "grad_norm": 2.7806568145751953, "learning_rate": 4.578662182361734e-05, "loss": 2.5867, "step": 63855 }, { "epoch": 4.338904742492186, "grad_norm": 3.1300137042999268, "learning_rate": 4.578237532273407e-05, "loss": 2.4283, "step": 63860 }, { "epoch": 4.339244462562848, "grad_norm": 4.250659942626953, "learning_rate": 4.57781288218508e-05, "loss": 2.7352, "step": 63865 }, { "epoch": 4.33958418263351, "grad_norm": 2.981367826461792, "learning_rate": 4.5773882320967526e-05, "loss": 2.4346, "step": 63870 }, { "epoch": 4.339923902704172, "grad_norm": 3.06156587600708, "learning_rate": 4.576963582008425e-05, "loss": 2.4775, "step": 63875 }, { "epoch": 4.340263622774834, "grad_norm": 3.364182472229004, "learning_rate": 4.576538931920098e-05, "loss": 2.7763, "step": 63880 }, { "epoch": 4.340603342845496, "grad_norm": 3.5977628231048584, "learning_rate": 4.576114281831771e-05, "loss": 2.7093, "step": 63885 }, { "epoch": 4.340943062916157, "grad_norm": 3.6667046546936035, "learning_rate": 4.575689631743444e-05, "loss": 2.5192, "step": 63890 }, { "epoch": 4.341282782986819, "grad_norm": 2.6594748497009277, "learning_rate": 4.5752649816551166e-05, "loss": 2.4267, "step": 63895 }, { "epoch": 4.341622503057481, "grad_norm": 3.1119015216827393, "learning_rate": 4.5748403315667894e-05, "loss": 2.3061, "step": 63900 }, { "epoch": 4.341962223128142, "grad_norm": 3.0441901683807373, "learning_rate": 4.574415681478462e-05, "loss": 2.2969, "step": 63905 }, { "epoch": 4.342301943198804, "grad_norm": 3.159611225128174, "learning_rate": 4.573991031390134e-05, "loss": 2.4022, "step": 63910 }, { "epoch": 4.3426416632694655, "grad_norm": 2.8265111446380615, "learning_rate": 4.573566381301808e-05, "loss": 2.5483, "step": 63915 }, { "epoch": 4.342981383340128, "grad_norm": 3.2697761058807373, "learning_rate": 4.5731417312134806e-05, "loss": 2.5767, "step": 63920 }, { "epoch": 4.34332110341079, "grad_norm": 2.720477342605591, "learning_rate": 4.572717081125153e-05, "loss": 2.5426, "step": 63925 }, { "epoch": 4.343660823481451, "grad_norm": 3.137918710708618, "learning_rate": 4.572292431036826e-05, "loss": 2.6776, "step": 63930 }, { "epoch": 4.344000543552113, "grad_norm": 2.872537851333618, "learning_rate": 4.571867780948499e-05, "loss": 2.5816, "step": 63935 }, { "epoch": 4.344340263622775, "grad_norm": 2.968400716781616, "learning_rate": 4.571443130860171e-05, "loss": 2.3519, "step": 63940 }, { "epoch": 4.344679983693436, "grad_norm": 3.923464059829712, "learning_rate": 4.571018480771844e-05, "loss": 2.3062, "step": 63945 }, { "epoch": 4.345019703764098, "grad_norm": 2.9224865436553955, "learning_rate": 4.5705938306835174e-05, "loss": 2.4265, "step": 63950 }, { "epoch": 4.34535942383476, "grad_norm": 3.6733179092407227, "learning_rate": 4.5701691805951895e-05, "loss": 2.3262, "step": 63955 }, { "epoch": 4.345699143905422, "grad_norm": 3.145664930343628, "learning_rate": 4.569744530506862e-05, "loss": 2.4441, "step": 63960 }, { "epoch": 4.346038863976084, "grad_norm": 3.183856248855591, "learning_rate": 4.569319880418536e-05, "loss": 2.3183, "step": 63965 }, { "epoch": 4.346378584046746, "grad_norm": 3.267961025238037, "learning_rate": 4.568895230330208e-05, "loss": 2.0447, "step": 63970 }, { "epoch": 4.346718304117407, "grad_norm": 3.036933183670044, "learning_rate": 4.568470580241881e-05, "loss": 2.5036, "step": 63975 }, { "epoch": 4.347058024188069, "grad_norm": 3.1830406188964844, "learning_rate": 4.5680459301535535e-05, "loss": 2.21, "step": 63980 }, { "epoch": 4.347397744258731, "grad_norm": 2.5688915252685547, "learning_rate": 4.567621280065226e-05, "loss": 2.5567, "step": 63985 }, { "epoch": 4.347737464329392, "grad_norm": 3.9306223392486572, "learning_rate": 4.567196629976899e-05, "loss": 2.211, "step": 63990 }, { "epoch": 4.348077184400054, "grad_norm": 3.562152624130249, "learning_rate": 4.566771979888572e-05, "loss": 2.5087, "step": 63995 }, { "epoch": 4.348416904470716, "grad_norm": 2.5223629474639893, "learning_rate": 4.566347329800245e-05, "loss": 2.1935, "step": 64000 }, { "epoch": 4.348756624541378, "grad_norm": 2.9521100521087646, "learning_rate": 4.5659226797119175e-05, "loss": 2.2218, "step": 64005 }, { "epoch": 4.34909634461204, "grad_norm": 3.4703011512756348, "learning_rate": 4.56549802962359e-05, "loss": 2.56, "step": 64010 }, { "epoch": 4.349436064682702, "grad_norm": 3.396942138671875, "learning_rate": 4.565073379535263e-05, "loss": 2.2023, "step": 64015 }, { "epoch": 4.349775784753363, "grad_norm": 3.1254138946533203, "learning_rate": 4.564648729446936e-05, "loss": 2.5306, "step": 64020 }, { "epoch": 4.350115504824025, "grad_norm": 3.476637125015259, "learning_rate": 4.564224079358609e-05, "loss": 2.2009, "step": 64025 }, { "epoch": 4.350455224894687, "grad_norm": 3.054783344268799, "learning_rate": 4.5637994292702815e-05, "loss": 2.5818, "step": 64030 }, { "epoch": 4.350794944965348, "grad_norm": 3.228097438812256, "learning_rate": 4.5633747791819543e-05, "loss": 2.2208, "step": 64035 }, { "epoch": 4.35113466503601, "grad_norm": 4.100290298461914, "learning_rate": 4.562950129093627e-05, "loss": 2.5534, "step": 64040 }, { "epoch": 4.351474385106672, "grad_norm": 2.74481201171875, "learning_rate": 4.562525479005299e-05, "loss": 2.4122, "step": 64045 }, { "epoch": 4.351814105177334, "grad_norm": 2.86844539642334, "learning_rate": 4.562100828916973e-05, "loss": 2.0834, "step": 64050 }, { "epoch": 4.352153825247996, "grad_norm": 3.210202693939209, "learning_rate": 4.5616761788286455e-05, "loss": 2.3672, "step": 64055 }, { "epoch": 4.352493545318658, "grad_norm": 3.6289353370666504, "learning_rate": 4.5612515287403183e-05, "loss": 2.3691, "step": 64060 }, { "epoch": 4.352833265389319, "grad_norm": 3.6329970359802246, "learning_rate": 4.560826878651991e-05, "loss": 2.6523, "step": 64065 }, { "epoch": 4.353172985459981, "grad_norm": 2.8427250385284424, "learning_rate": 4.560402228563664e-05, "loss": 2.519, "step": 64070 }, { "epoch": 4.353512705530643, "grad_norm": 3.2074575424194336, "learning_rate": 4.559977578475337e-05, "loss": 2.415, "step": 64075 }, { "epoch": 4.353852425601304, "grad_norm": 2.8142805099487305, "learning_rate": 4.559552928387009e-05, "loss": 2.4919, "step": 64080 }, { "epoch": 4.354192145671966, "grad_norm": 3.828211784362793, "learning_rate": 4.5591282782986823e-05, "loss": 2.2492, "step": 64085 }, { "epoch": 4.354531865742628, "grad_norm": 2.6998486518859863, "learning_rate": 4.558703628210355e-05, "loss": 2.157, "step": 64090 }, { "epoch": 4.35487158581329, "grad_norm": 2.9917984008789062, "learning_rate": 4.558278978122027e-05, "loss": 2.6868, "step": 64095 }, { "epoch": 4.355211305883952, "grad_norm": 2.775458574295044, "learning_rate": 4.557854328033701e-05, "loss": 2.3326, "step": 64100 }, { "epoch": 4.355551025954614, "grad_norm": 2.808001756668091, "learning_rate": 4.5574296779453736e-05, "loss": 2.4889, "step": 64105 }, { "epoch": 4.355890746025275, "grad_norm": 2.9565484523773193, "learning_rate": 4.557005027857046e-05, "loss": 2.5281, "step": 64110 }, { "epoch": 4.356230466095937, "grad_norm": 3.126859426498413, "learning_rate": 4.556580377768719e-05, "loss": 2.2526, "step": 64115 }, { "epoch": 4.356570186166599, "grad_norm": 2.6884398460388184, "learning_rate": 4.556155727680392e-05, "loss": 2.6703, "step": 64120 }, { "epoch": 4.35690990623726, "grad_norm": 3.0542855262756348, "learning_rate": 4.555731077592064e-05, "loss": 2.4975, "step": 64125 }, { "epoch": 4.357249626307922, "grad_norm": 2.8649353981018066, "learning_rate": 4.555306427503737e-05, "loss": 2.2134, "step": 64130 }, { "epoch": 4.357589346378584, "grad_norm": 2.5227389335632324, "learning_rate": 4.5548817774154104e-05, "loss": 2.4761, "step": 64135 }, { "epoch": 4.357929066449246, "grad_norm": 2.8936991691589355, "learning_rate": 4.5544571273270825e-05, "loss": 2.3778, "step": 64140 }, { "epoch": 4.358268786519908, "grad_norm": 3.4305763244628906, "learning_rate": 4.554032477238755e-05, "loss": 2.4041, "step": 64145 }, { "epoch": 4.35860850659057, "grad_norm": 2.7125682830810547, "learning_rate": 4.553607827150429e-05, "loss": 2.3602, "step": 64150 }, { "epoch": 4.358948226661231, "grad_norm": 3.001415252685547, "learning_rate": 4.553183177062101e-05, "loss": 2.6761, "step": 64155 }, { "epoch": 4.359287946731893, "grad_norm": 3.7588510513305664, "learning_rate": 4.552758526973774e-05, "loss": 2.5293, "step": 64160 }, { "epoch": 4.359627666802555, "grad_norm": 3.1598727703094482, "learning_rate": 4.5523338768854465e-05, "loss": 2.3499, "step": 64165 }, { "epoch": 4.359967386873216, "grad_norm": 4.266262531280518, "learning_rate": 4.551909226797119e-05, "loss": 2.3113, "step": 64170 }, { "epoch": 4.360307106943878, "grad_norm": 2.967287540435791, "learning_rate": 4.551484576708792e-05, "loss": 2.5899, "step": 64175 }, { "epoch": 4.36064682701454, "grad_norm": 3.5398378372192383, "learning_rate": 4.551059926620465e-05, "loss": 2.3365, "step": 64180 }, { "epoch": 4.360986547085202, "grad_norm": 2.9325172901153564, "learning_rate": 4.550635276532138e-05, "loss": 2.7392, "step": 64185 }, { "epoch": 4.361326267155864, "grad_norm": 2.9763545989990234, "learning_rate": 4.5502106264438105e-05, "loss": 2.3188, "step": 64190 }, { "epoch": 4.361665987226525, "grad_norm": 3.156205177307129, "learning_rate": 4.549785976355483e-05, "loss": 2.4627, "step": 64195 }, { "epoch": 4.362005707297187, "grad_norm": 4.0633955001831055, "learning_rate": 4.549361326267156e-05, "loss": 2.2262, "step": 64200 }, { "epoch": 4.362345427367849, "grad_norm": 3.483703851699829, "learning_rate": 4.548936676178829e-05, "loss": 2.3254, "step": 64205 }, { "epoch": 4.36268514743851, "grad_norm": 4.0716471672058105, "learning_rate": 4.548512026090502e-05, "loss": 2.184, "step": 64210 }, { "epoch": 4.363024867509172, "grad_norm": 3.152416706085205, "learning_rate": 4.5480873760021745e-05, "loss": 2.5382, "step": 64215 }, { "epoch": 4.363364587579834, "grad_norm": 2.6041252613067627, "learning_rate": 4.547662725913847e-05, "loss": 2.5525, "step": 64220 }, { "epoch": 4.3637043076504956, "grad_norm": 2.858660936355591, "learning_rate": 4.54723807582552e-05, "loss": 2.3863, "step": 64225 }, { "epoch": 4.364044027721158, "grad_norm": 2.9043781757354736, "learning_rate": 4.546813425737193e-05, "loss": 2.5476, "step": 64230 }, { "epoch": 4.36438374779182, "grad_norm": 3.8511948585510254, "learning_rate": 4.546388775648866e-05, "loss": 2.497, "step": 64235 }, { "epoch": 4.364723467862481, "grad_norm": 3.1282920837402344, "learning_rate": 4.5459641255605385e-05, "loss": 2.521, "step": 64240 }, { "epoch": 4.365063187933143, "grad_norm": 3.1606574058532715, "learning_rate": 4.545539475472211e-05, "loss": 2.5034, "step": 64245 }, { "epoch": 4.365402908003805, "grad_norm": 3.1866843700408936, "learning_rate": 4.545114825383884e-05, "loss": 2.3523, "step": 64250 }, { "epoch": 4.365742628074466, "grad_norm": 2.793645143508911, "learning_rate": 4.544690175295557e-05, "loss": 2.3476, "step": 64255 }, { "epoch": 4.366082348145128, "grad_norm": 3.6907031536102295, "learning_rate": 4.54426552520723e-05, "loss": 2.3722, "step": 64260 }, { "epoch": 4.36642206821579, "grad_norm": 3.2894985675811768, "learning_rate": 4.543840875118902e-05, "loss": 2.0751, "step": 64265 }, { "epoch": 4.366761788286452, "grad_norm": 2.784144401550293, "learning_rate": 4.543416225030575e-05, "loss": 2.195, "step": 64270 }, { "epoch": 4.367101508357114, "grad_norm": 2.9732048511505127, "learning_rate": 4.542991574942248e-05, "loss": 2.5412, "step": 64275 }, { "epoch": 4.367441228427776, "grad_norm": 3.449592351913452, "learning_rate": 4.54256692485392e-05, "loss": 2.4155, "step": 64280 }, { "epoch": 4.367780948498437, "grad_norm": 2.938778877258301, "learning_rate": 4.542142274765594e-05, "loss": 2.4076, "step": 64285 }, { "epoch": 4.368120668569099, "grad_norm": 3.7825796604156494, "learning_rate": 4.5417176246772665e-05, "loss": 2.4547, "step": 64290 }, { "epoch": 4.368460388639761, "grad_norm": 3.1581339836120605, "learning_rate": 4.5412929745889386e-05, "loss": 2.5137, "step": 64295 }, { "epoch": 4.368800108710422, "grad_norm": 3.4135379791259766, "learning_rate": 4.5408683245006114e-05, "loss": 2.7141, "step": 64300 }, { "epoch": 4.369139828781084, "grad_norm": 3.201240062713623, "learning_rate": 4.540443674412285e-05, "loss": 2.6706, "step": 64305 }, { "epoch": 4.369479548851746, "grad_norm": 2.765488862991333, "learning_rate": 4.540019024323957e-05, "loss": 2.4912, "step": 64310 }, { "epoch": 4.369819268922408, "grad_norm": 2.6208837032318115, "learning_rate": 4.53959437423563e-05, "loss": 2.2377, "step": 64315 }, { "epoch": 4.37015898899307, "grad_norm": 3.1020047664642334, "learning_rate": 4.539169724147303e-05, "loss": 2.5919, "step": 64320 }, { "epoch": 4.370498709063732, "grad_norm": 2.6328558921813965, "learning_rate": 4.5387450740589754e-05, "loss": 2.1727, "step": 64325 }, { "epoch": 4.370838429134393, "grad_norm": 2.228806495666504, "learning_rate": 4.538320423970648e-05, "loss": 2.7571, "step": 64330 }, { "epoch": 4.371178149205055, "grad_norm": 3.337991714477539, "learning_rate": 4.537895773882321e-05, "loss": 2.5488, "step": 64335 }, { "epoch": 4.371517869275717, "grad_norm": 3.2805678844451904, "learning_rate": 4.537471123793994e-05, "loss": 2.2021, "step": 64340 }, { "epoch": 4.371857589346378, "grad_norm": 2.6259937286376953, "learning_rate": 4.5370464737056666e-05, "loss": 2.9195, "step": 64345 }, { "epoch": 4.37219730941704, "grad_norm": 3.110849380493164, "learning_rate": 4.5366218236173394e-05, "loss": 2.367, "step": 64350 }, { "epoch": 4.372537029487702, "grad_norm": 3.240483522415161, "learning_rate": 4.536197173529012e-05, "loss": 2.6369, "step": 64355 }, { "epoch": 4.372876749558364, "grad_norm": 3.2063710689544678, "learning_rate": 4.535772523440685e-05, "loss": 2.448, "step": 64360 }, { "epoch": 4.373216469629026, "grad_norm": 3.7334892749786377, "learning_rate": 4.535347873352358e-05, "loss": 2.5615, "step": 64365 }, { "epoch": 4.373556189699688, "grad_norm": 3.5598578453063965, "learning_rate": 4.5349232232640306e-05, "loss": 2.4187, "step": 64370 }, { "epoch": 4.373895909770349, "grad_norm": 2.7257728576660156, "learning_rate": 4.5344985731757034e-05, "loss": 2.5235, "step": 64375 }, { "epoch": 4.374235629841011, "grad_norm": 2.8231711387634277, "learning_rate": 4.534073923087376e-05, "loss": 2.4612, "step": 64380 }, { "epoch": 4.374575349911673, "grad_norm": 2.735788583755493, "learning_rate": 4.533649272999049e-05, "loss": 2.1886, "step": 64385 }, { "epoch": 4.374915069982334, "grad_norm": 3.690730571746826, "learning_rate": 4.533224622910722e-05, "loss": 2.3274, "step": 64390 }, { "epoch": 4.375254790052996, "grad_norm": 3.938075065612793, "learning_rate": 4.5327999728223946e-05, "loss": 2.5179, "step": 64395 }, { "epoch": 4.375594510123658, "grad_norm": 3.7423665523529053, "learning_rate": 4.5323753227340674e-05, "loss": 2.228, "step": 64400 }, { "epoch": 4.37593423019432, "grad_norm": 3.365032911300659, "learning_rate": 4.53195067264574e-05, "loss": 2.3718, "step": 64405 }, { "epoch": 4.376273950264982, "grad_norm": 3.2587714195251465, "learning_rate": 4.531526022557413e-05, "loss": 2.3299, "step": 64410 }, { "epoch": 4.376613670335644, "grad_norm": 2.663578987121582, "learning_rate": 4.531101372469086e-05, "loss": 2.5077, "step": 64415 }, { "epoch": 4.376953390406305, "grad_norm": 2.4542462825775146, "learning_rate": 4.5306767223807586e-05, "loss": 2.5915, "step": 64420 }, { "epoch": 4.377293110476967, "grad_norm": 3.614274501800537, "learning_rate": 4.5302520722924314e-05, "loss": 2.2967, "step": 64425 }, { "epoch": 4.377632830547629, "grad_norm": 3.508859395980835, "learning_rate": 4.529827422204104e-05, "loss": 2.6866, "step": 64430 }, { "epoch": 4.37797255061829, "grad_norm": 2.9708662033081055, "learning_rate": 4.5294027721157764e-05, "loss": 2.8123, "step": 64435 }, { "epoch": 4.378312270688952, "grad_norm": 3.1347756385803223, "learning_rate": 4.52897812202745e-05, "loss": 2.2349, "step": 64440 }, { "epoch": 4.378651990759614, "grad_norm": 2.799837112426758, "learning_rate": 4.5285534719391227e-05, "loss": 2.2086, "step": 64445 }, { "epoch": 4.378991710830276, "grad_norm": 3.648364543914795, "learning_rate": 4.528128821850795e-05, "loss": 2.8207, "step": 64450 }, { "epoch": 4.379331430900938, "grad_norm": 2.8122832775115967, "learning_rate": 4.527704171762468e-05, "loss": 2.3492, "step": 64455 }, { "epoch": 4.3796711509716, "grad_norm": 2.5000367164611816, "learning_rate": 4.527279521674141e-05, "loss": 2.3769, "step": 64460 }, { "epoch": 4.380010871042261, "grad_norm": 3.193492889404297, "learning_rate": 4.526854871585813e-05, "loss": 2.5608, "step": 64465 }, { "epoch": 4.380350591112923, "grad_norm": 3.3008084297180176, "learning_rate": 4.526430221497486e-05, "loss": 2.2436, "step": 64470 }, { "epoch": 4.380690311183585, "grad_norm": 3.2780539989471436, "learning_rate": 4.5260055714091595e-05, "loss": 2.2449, "step": 64475 }, { "epoch": 4.381030031254246, "grad_norm": 3.5560853481292725, "learning_rate": 4.5255809213208316e-05, "loss": 2.4997, "step": 64480 }, { "epoch": 4.381369751324908, "grad_norm": 2.997697353363037, "learning_rate": 4.5251562712325044e-05, "loss": 2.5323, "step": 64485 }, { "epoch": 4.38170947139557, "grad_norm": 3.8319175243377686, "learning_rate": 4.524731621144178e-05, "loss": 2.6327, "step": 64490 }, { "epoch": 4.382049191466232, "grad_norm": 3.8791050910949707, "learning_rate": 4.52430697105585e-05, "loss": 2.4776, "step": 64495 }, { "epoch": 4.382388911536894, "grad_norm": 3.2773540019989014, "learning_rate": 4.523882320967523e-05, "loss": 2.175, "step": 64500 }, { "epoch": 4.382728631607556, "grad_norm": 3.738407611846924, "learning_rate": 4.5234576708791956e-05, "loss": 2.6308, "step": 64505 }, { "epoch": 4.383068351678217, "grad_norm": 2.79302716255188, "learning_rate": 4.5230330207908684e-05, "loss": 2.423, "step": 64510 }, { "epoch": 4.383408071748879, "grad_norm": 2.6436259746551514, "learning_rate": 4.522608370702541e-05, "loss": 2.4175, "step": 64515 }, { "epoch": 4.383747791819541, "grad_norm": 3.6158089637756348, "learning_rate": 4.522183720614214e-05, "loss": 2.598, "step": 64520 }, { "epoch": 4.384087511890202, "grad_norm": 3.8612473011016846, "learning_rate": 4.521759070525887e-05, "loss": 2.6426, "step": 64525 }, { "epoch": 4.384427231960864, "grad_norm": 2.8265650272369385, "learning_rate": 4.5213344204375596e-05, "loss": 2.7832, "step": 64530 }, { "epoch": 4.384766952031526, "grad_norm": 3.0751259326934814, "learning_rate": 4.5209097703492324e-05, "loss": 2.6049, "step": 64535 }, { "epoch": 4.385106672102188, "grad_norm": 3.4049556255340576, "learning_rate": 4.520485120260905e-05, "loss": 2.4679, "step": 64540 }, { "epoch": 4.38544639217285, "grad_norm": 3.5592470169067383, "learning_rate": 4.520060470172578e-05, "loss": 2.1492, "step": 64545 }, { "epoch": 4.385786112243512, "grad_norm": 3.324915647506714, "learning_rate": 4.519635820084251e-05, "loss": 2.7062, "step": 64550 }, { "epoch": 4.386125832314173, "grad_norm": 2.9674344062805176, "learning_rate": 4.5192111699959236e-05, "loss": 2.3526, "step": 64555 }, { "epoch": 4.386465552384835, "grad_norm": 2.8301382064819336, "learning_rate": 4.5187865199075964e-05, "loss": 2.4418, "step": 64560 }, { "epoch": 4.386805272455497, "grad_norm": 3.6062066555023193, "learning_rate": 4.518361869819269e-05, "loss": 2.5741, "step": 64565 }, { "epoch": 4.387144992526158, "grad_norm": 3.513237237930298, "learning_rate": 4.517937219730942e-05, "loss": 2.5149, "step": 64570 }, { "epoch": 4.38748471259682, "grad_norm": 3.4476029872894287, "learning_rate": 4.517512569642615e-05, "loss": 2.4027, "step": 64575 }, { "epoch": 4.3878244326674825, "grad_norm": 3.404003381729126, "learning_rate": 4.5170879195542876e-05, "loss": 2.3809, "step": 64580 }, { "epoch": 4.388164152738144, "grad_norm": 3.1087088584899902, "learning_rate": 4.5166632694659604e-05, "loss": 2.5406, "step": 64585 }, { "epoch": 4.388503872808806, "grad_norm": 3.352104902267456, "learning_rate": 4.516238619377633e-05, "loss": 2.5044, "step": 64590 }, { "epoch": 4.388843592879467, "grad_norm": 2.6645970344543457, "learning_rate": 4.515813969289306e-05, "loss": 2.5201, "step": 64595 }, { "epoch": 4.389183312950129, "grad_norm": 3.134040117263794, "learning_rate": 4.515389319200979e-05, "loss": 2.4664, "step": 64600 }, { "epoch": 4.389523033020791, "grad_norm": 3.2507095336914062, "learning_rate": 4.514964669112651e-05, "loss": 2.668, "step": 64605 }, { "epoch": 4.389862753091452, "grad_norm": 3.6708157062530518, "learning_rate": 4.5145400190243244e-05, "loss": 2.5239, "step": 64610 }, { "epoch": 4.390202473162114, "grad_norm": 3.2524454593658447, "learning_rate": 4.514115368935997e-05, "loss": 2.6748, "step": 64615 }, { "epoch": 4.390542193232776, "grad_norm": 2.8497977256774902, "learning_rate": 4.513690718847669e-05, "loss": 2.4124, "step": 64620 }, { "epoch": 4.390881913303438, "grad_norm": 3.1746325492858887, "learning_rate": 4.513266068759343e-05, "loss": 2.3171, "step": 64625 }, { "epoch": 4.3912216333741, "grad_norm": 2.818427324295044, "learning_rate": 4.5128414186710156e-05, "loss": 2.2025, "step": 64630 }, { "epoch": 4.391561353444762, "grad_norm": 3.2214348316192627, "learning_rate": 4.512416768582688e-05, "loss": 2.3791, "step": 64635 }, { "epoch": 4.391901073515423, "grad_norm": 3.012425184249878, "learning_rate": 4.511992118494361e-05, "loss": 2.6199, "step": 64640 }, { "epoch": 4.392240793586085, "grad_norm": 2.8896803855895996, "learning_rate": 4.511567468406034e-05, "loss": 2.3767, "step": 64645 }, { "epoch": 4.392580513656747, "grad_norm": 3.5149495601654053, "learning_rate": 4.511142818317706e-05, "loss": 2.412, "step": 64650 }, { "epoch": 4.392920233727408, "grad_norm": 3.4791550636291504, "learning_rate": 4.510718168229379e-05, "loss": 2.452, "step": 64655 }, { "epoch": 4.39325995379807, "grad_norm": 2.7335524559020996, "learning_rate": 4.5102935181410524e-05, "loss": 2.7476, "step": 64660 }, { "epoch": 4.393599673868732, "grad_norm": 4.005084991455078, "learning_rate": 4.5098688680527245e-05, "loss": 2.3827, "step": 64665 }, { "epoch": 4.393939393939394, "grad_norm": 2.6827754974365234, "learning_rate": 4.509444217964397e-05, "loss": 2.5448, "step": 64670 }, { "epoch": 4.394279114010056, "grad_norm": 3.024935483932495, "learning_rate": 4.509019567876071e-05, "loss": 2.3682, "step": 64675 }, { "epoch": 4.394618834080718, "grad_norm": 2.573038339614868, "learning_rate": 4.508594917787743e-05, "loss": 2.6208, "step": 64680 }, { "epoch": 4.394958554151379, "grad_norm": 3.0822532176971436, "learning_rate": 4.508170267699416e-05, "loss": 2.6364, "step": 64685 }, { "epoch": 4.395298274222041, "grad_norm": 3.1645240783691406, "learning_rate": 4.5077456176110885e-05, "loss": 2.4177, "step": 64690 }, { "epoch": 4.395637994292703, "grad_norm": 3.553828001022339, "learning_rate": 4.507320967522761e-05, "loss": 2.4593, "step": 64695 }, { "epoch": 4.395977714363364, "grad_norm": 3.1659226417541504, "learning_rate": 4.506896317434434e-05, "loss": 2.3512, "step": 64700 }, { "epoch": 4.396317434434026, "grad_norm": 3.673293113708496, "learning_rate": 4.506471667346107e-05, "loss": 2.4888, "step": 64705 }, { "epoch": 4.396657154504688, "grad_norm": 3.011079788208008, "learning_rate": 4.50604701725778e-05, "loss": 2.364, "step": 64710 }, { "epoch": 4.39699687457535, "grad_norm": 2.6511924266815186, "learning_rate": 4.5056223671694525e-05, "loss": 2.3496, "step": 64715 }, { "epoch": 4.397336594646012, "grad_norm": 3.7676267623901367, "learning_rate": 4.505197717081125e-05, "loss": 2.2645, "step": 64720 }, { "epoch": 4.397676314716674, "grad_norm": 3.181976318359375, "learning_rate": 4.504773066992798e-05, "loss": 2.2485, "step": 64725 }, { "epoch": 4.398016034787335, "grad_norm": 2.9425227642059326, "learning_rate": 4.504348416904471e-05, "loss": 2.3721, "step": 64730 }, { "epoch": 4.398355754857997, "grad_norm": 2.30370831489563, "learning_rate": 4.503923766816144e-05, "loss": 2.6334, "step": 64735 }, { "epoch": 4.398695474928659, "grad_norm": 3.7235209941864014, "learning_rate": 4.5034991167278165e-05, "loss": 2.1431, "step": 64740 }, { "epoch": 4.39903519499932, "grad_norm": 3.3581104278564453, "learning_rate": 4.5030744666394893e-05, "loss": 2.4831, "step": 64745 }, { "epoch": 4.399374915069982, "grad_norm": 3.177722454071045, "learning_rate": 4.502649816551162e-05, "loss": 2.2565, "step": 64750 }, { "epoch": 4.399714635140644, "grad_norm": 2.890122413635254, "learning_rate": 4.502225166462835e-05, "loss": 2.5629, "step": 64755 }, { "epoch": 4.400054355211306, "grad_norm": 3.10834002494812, "learning_rate": 4.501800516374508e-05, "loss": 2.4578, "step": 64760 }, { "epoch": 4.400394075281968, "grad_norm": 3.1482372283935547, "learning_rate": 4.5013758662861805e-05, "loss": 2.2782, "step": 64765 }, { "epoch": 4.40073379535263, "grad_norm": 2.945089340209961, "learning_rate": 4.5009512161978533e-05, "loss": 2.2569, "step": 64770 }, { "epoch": 4.401073515423291, "grad_norm": 2.8887100219726562, "learning_rate": 4.500526566109526e-05, "loss": 2.3947, "step": 64775 }, { "epoch": 4.401413235493953, "grad_norm": 2.6221888065338135, "learning_rate": 4.500101916021199e-05, "loss": 2.4803, "step": 64780 }, { "epoch": 4.401752955564615, "grad_norm": 3.2113113403320312, "learning_rate": 4.499677265932872e-05, "loss": 2.0778, "step": 64785 }, { "epoch": 4.402092675635276, "grad_norm": 3.438199281692505, "learning_rate": 4.499252615844544e-05, "loss": 2.3626, "step": 64790 }, { "epoch": 4.402432395705938, "grad_norm": 3.146501064300537, "learning_rate": 4.4988279657562173e-05, "loss": 2.4861, "step": 64795 }, { "epoch": 4.4027721157766, "grad_norm": 2.942913770675659, "learning_rate": 4.49840331566789e-05, "loss": 2.1522, "step": 64800 }, { "epoch": 4.403111835847262, "grad_norm": 3.879316568374634, "learning_rate": 4.497978665579562e-05, "loss": 2.3796, "step": 64805 }, { "epoch": 4.403451555917924, "grad_norm": 3.837454319000244, "learning_rate": 4.497554015491236e-05, "loss": 2.4055, "step": 64810 }, { "epoch": 4.403791275988586, "grad_norm": 3.606158971786499, "learning_rate": 4.4971293654029086e-05, "loss": 2.4376, "step": 64815 }, { "epoch": 4.404130996059247, "grad_norm": 3.12424373626709, "learning_rate": 4.496704715314581e-05, "loss": 2.3272, "step": 64820 }, { "epoch": 4.404470716129909, "grad_norm": 2.593698501586914, "learning_rate": 4.4962800652262535e-05, "loss": 2.3949, "step": 64825 }, { "epoch": 4.404810436200571, "grad_norm": 2.6836154460906982, "learning_rate": 4.495855415137927e-05, "loss": 2.8251, "step": 64830 }, { "epoch": 4.405150156271232, "grad_norm": 2.766606330871582, "learning_rate": 4.495430765049599e-05, "loss": 2.6494, "step": 64835 }, { "epoch": 4.405489876341894, "grad_norm": 3.0277645587921143, "learning_rate": 4.495006114961272e-05, "loss": 2.3901, "step": 64840 }, { "epoch": 4.4058295964125564, "grad_norm": 2.7461347579956055, "learning_rate": 4.4945814648729454e-05, "loss": 2.1487, "step": 64845 }, { "epoch": 4.406169316483218, "grad_norm": 2.6466968059539795, "learning_rate": 4.4941568147846175e-05, "loss": 2.5207, "step": 64850 }, { "epoch": 4.40650903655388, "grad_norm": 2.9345638751983643, "learning_rate": 4.49373216469629e-05, "loss": 2.4487, "step": 64855 }, { "epoch": 4.406848756624542, "grad_norm": 3.461883544921875, "learning_rate": 4.493307514607963e-05, "loss": 2.213, "step": 64860 }, { "epoch": 4.407188476695203, "grad_norm": 3.1063637733459473, "learning_rate": 4.492882864519636e-05, "loss": 2.5371, "step": 64865 }, { "epoch": 4.407528196765865, "grad_norm": 3.6059446334838867, "learning_rate": 4.492458214431309e-05, "loss": 2.4607, "step": 64870 }, { "epoch": 4.407867916836526, "grad_norm": 4.034912586212158, "learning_rate": 4.4920335643429815e-05, "loss": 2.5274, "step": 64875 }, { "epoch": 4.408207636907188, "grad_norm": 3.3427491188049316, "learning_rate": 4.491608914254654e-05, "loss": 2.51, "step": 64880 }, { "epoch": 4.40854735697785, "grad_norm": 2.492300271987915, "learning_rate": 4.491184264166327e-05, "loss": 2.275, "step": 64885 }, { "epoch": 4.408887077048512, "grad_norm": 3.1477200984954834, "learning_rate": 4.490759614078e-05, "loss": 2.4714, "step": 64890 }, { "epoch": 4.409226797119174, "grad_norm": 2.5502612590789795, "learning_rate": 4.490334963989673e-05, "loss": 2.4075, "step": 64895 }, { "epoch": 4.409566517189836, "grad_norm": 2.5497500896453857, "learning_rate": 4.4899103139013455e-05, "loss": 2.3872, "step": 64900 }, { "epoch": 4.409906237260497, "grad_norm": 2.798489570617676, "learning_rate": 4.489485663813018e-05, "loss": 2.3406, "step": 64905 }, { "epoch": 4.410245957331159, "grad_norm": 3.4269347190856934, "learning_rate": 4.489061013724691e-05, "loss": 2.6692, "step": 64910 }, { "epoch": 4.410585677401821, "grad_norm": 3.7476911544799805, "learning_rate": 4.488636363636364e-05, "loss": 2.319, "step": 64915 }, { "epoch": 4.410925397472482, "grad_norm": 3.6549150943756104, "learning_rate": 4.488211713548037e-05, "loss": 2.1124, "step": 64920 }, { "epoch": 4.411265117543144, "grad_norm": 3.4401140213012695, "learning_rate": 4.4877870634597095e-05, "loss": 2.3283, "step": 64925 }, { "epoch": 4.411604837613806, "grad_norm": 2.378124237060547, "learning_rate": 4.487362413371382e-05, "loss": 2.3052, "step": 64930 }, { "epoch": 4.411944557684468, "grad_norm": 3.246077299118042, "learning_rate": 4.486937763283055e-05, "loss": 2.6591, "step": 64935 }, { "epoch": 4.41228427775513, "grad_norm": 3.2502169609069824, "learning_rate": 4.486513113194728e-05, "loss": 2.539, "step": 64940 }, { "epoch": 4.412623997825792, "grad_norm": 2.49468994140625, "learning_rate": 4.486088463106401e-05, "loss": 2.4739, "step": 64945 }, { "epoch": 4.412963717896453, "grad_norm": 3.2919116020202637, "learning_rate": 4.4856638130180735e-05, "loss": 2.4848, "step": 64950 }, { "epoch": 4.413303437967115, "grad_norm": 3.4333155155181885, "learning_rate": 4.485239162929746e-05, "loss": 2.4353, "step": 64955 }, { "epoch": 4.413643158037777, "grad_norm": 3.3797576427459717, "learning_rate": 4.4848145128414184e-05, "loss": 2.5068, "step": 64960 }, { "epoch": 4.413982878108438, "grad_norm": 2.4385857582092285, "learning_rate": 4.484389862753092e-05, "loss": 2.3066, "step": 64965 }, { "epoch": 4.4143225981791, "grad_norm": 3.832646369934082, "learning_rate": 4.483965212664765e-05, "loss": 2.5533, "step": 64970 }, { "epoch": 4.414662318249762, "grad_norm": 2.538306951522827, "learning_rate": 4.483540562576437e-05, "loss": 2.3886, "step": 64975 }, { "epoch": 4.415002038320424, "grad_norm": 3.103257894515991, "learning_rate": 4.48311591248811e-05, "loss": 2.5026, "step": 64980 }, { "epoch": 4.415341758391086, "grad_norm": 4.125696182250977, "learning_rate": 4.482691262399783e-05, "loss": 2.5909, "step": 64985 }, { "epoch": 4.415681478461748, "grad_norm": 2.9347012042999268, "learning_rate": 4.482266612311455e-05, "loss": 2.6723, "step": 64990 }, { "epoch": 4.416021198532409, "grad_norm": 3.003170967102051, "learning_rate": 4.481841962223128e-05, "loss": 2.4387, "step": 64995 }, { "epoch": 4.416360918603071, "grad_norm": 3.134626626968384, "learning_rate": 4.4814173121348015e-05, "loss": 2.3113, "step": 65000 }, { "epoch": 4.416700638673733, "grad_norm": 2.9957187175750732, "learning_rate": 4.4809926620464736e-05, "loss": 2.0784, "step": 65005 }, { "epoch": 4.417040358744394, "grad_norm": 3.4624691009521484, "learning_rate": 4.4805680119581464e-05, "loss": 2.3647, "step": 65010 }, { "epoch": 4.417380078815056, "grad_norm": 3.4461162090301514, "learning_rate": 4.48014336186982e-05, "loss": 2.2515, "step": 65015 }, { "epoch": 4.417719798885718, "grad_norm": 3.428680419921875, "learning_rate": 4.479718711781492e-05, "loss": 2.4332, "step": 65020 }, { "epoch": 4.41805951895638, "grad_norm": 2.8336799144744873, "learning_rate": 4.479294061693165e-05, "loss": 2.2377, "step": 65025 }, { "epoch": 4.418399239027042, "grad_norm": 3.3059916496276855, "learning_rate": 4.478869411604838e-05, "loss": 2.4122, "step": 65030 }, { "epoch": 4.418738959097704, "grad_norm": 3.0273385047912598, "learning_rate": 4.4784447615165104e-05, "loss": 2.4821, "step": 65035 }, { "epoch": 4.419078679168365, "grad_norm": 2.3268566131591797, "learning_rate": 4.478020111428183e-05, "loss": 2.4121, "step": 65040 }, { "epoch": 4.419418399239027, "grad_norm": 3.446451187133789, "learning_rate": 4.477595461339856e-05, "loss": 2.211, "step": 65045 }, { "epoch": 4.419758119309689, "grad_norm": 3.3282735347747803, "learning_rate": 4.477170811251529e-05, "loss": 2.4088, "step": 65050 }, { "epoch": 4.42009783938035, "grad_norm": 2.719902515411377, "learning_rate": 4.4767461611632016e-05, "loss": 2.6409, "step": 65055 }, { "epoch": 4.420437559451012, "grad_norm": 3.096935987472534, "learning_rate": 4.4763215110748744e-05, "loss": 2.5974, "step": 65060 }, { "epoch": 4.420777279521674, "grad_norm": 2.9844462871551514, "learning_rate": 4.475896860986547e-05, "loss": 2.4239, "step": 65065 }, { "epoch": 4.421116999592336, "grad_norm": 2.9232888221740723, "learning_rate": 4.47547221089822e-05, "loss": 2.3024, "step": 65070 }, { "epoch": 4.421456719662998, "grad_norm": 3.4972472190856934, "learning_rate": 4.475047560809893e-05, "loss": 2.2899, "step": 65075 }, { "epoch": 4.42179643973366, "grad_norm": 3.1844327449798584, "learning_rate": 4.4746229107215656e-05, "loss": 2.3743, "step": 65080 }, { "epoch": 4.422136159804321, "grad_norm": 2.88020658493042, "learning_rate": 4.4741982606332384e-05, "loss": 2.6204, "step": 65085 }, { "epoch": 4.422475879874983, "grad_norm": 3.2562179565429688, "learning_rate": 4.473773610544911e-05, "loss": 2.3476, "step": 65090 }, { "epoch": 4.422815599945645, "grad_norm": 3.196152925491333, "learning_rate": 4.473348960456584e-05, "loss": 2.5148, "step": 65095 }, { "epoch": 4.423155320016306, "grad_norm": 3.1381771564483643, "learning_rate": 4.472924310368257e-05, "loss": 2.6298, "step": 65100 }, { "epoch": 4.423495040086968, "grad_norm": 3.239362955093384, "learning_rate": 4.4724996602799296e-05, "loss": 2.4413, "step": 65105 }, { "epoch": 4.42383476015763, "grad_norm": 2.802731513977051, "learning_rate": 4.4720750101916024e-05, "loss": 2.6245, "step": 65110 }, { "epoch": 4.424174480228292, "grad_norm": 3.495818614959717, "learning_rate": 4.471650360103275e-05, "loss": 2.485, "step": 65115 }, { "epoch": 4.424514200298954, "grad_norm": 3.4066600799560547, "learning_rate": 4.471225710014948e-05, "loss": 2.4571, "step": 65120 }, { "epoch": 4.424853920369616, "grad_norm": 3.747781991958618, "learning_rate": 4.470801059926621e-05, "loss": 2.361, "step": 65125 }, { "epoch": 4.425193640440277, "grad_norm": 3.315157890319824, "learning_rate": 4.4703764098382936e-05, "loss": 2.3275, "step": 65130 }, { "epoch": 4.425533360510939, "grad_norm": 2.724362373352051, "learning_rate": 4.4699517597499664e-05, "loss": 2.4106, "step": 65135 }, { "epoch": 4.425873080581601, "grad_norm": 3.104619264602661, "learning_rate": 4.469527109661639e-05, "loss": 2.3092, "step": 65140 }, { "epoch": 4.426212800652262, "grad_norm": 3.950636386871338, "learning_rate": 4.4691024595733114e-05, "loss": 2.4922, "step": 65145 }, { "epoch": 4.426552520722924, "grad_norm": 3.5150017738342285, "learning_rate": 4.468677809484985e-05, "loss": 2.5603, "step": 65150 }, { "epoch": 4.4268922407935865, "grad_norm": 2.841505289077759, "learning_rate": 4.4682531593966576e-05, "loss": 2.3676, "step": 65155 }, { "epoch": 4.427231960864248, "grad_norm": 3.29913067817688, "learning_rate": 4.46782850930833e-05, "loss": 2.8844, "step": 65160 }, { "epoch": 4.42757168093491, "grad_norm": 2.9264395236968994, "learning_rate": 4.467403859220003e-05, "loss": 2.6767, "step": 65165 }, { "epoch": 4.427911401005572, "grad_norm": 3.4890975952148438, "learning_rate": 4.466979209131676e-05, "loss": 2.643, "step": 65170 }, { "epoch": 4.428251121076233, "grad_norm": 3.1529123783111572, "learning_rate": 4.466554559043348e-05, "loss": 2.2924, "step": 65175 }, { "epoch": 4.428590841146895, "grad_norm": 3.014180898666382, "learning_rate": 4.466129908955021e-05, "loss": 2.5732, "step": 65180 }, { "epoch": 4.428930561217557, "grad_norm": 3.086665391921997, "learning_rate": 4.4657052588666945e-05, "loss": 2.5949, "step": 65185 }, { "epoch": 4.429270281288218, "grad_norm": 3.0203654766082764, "learning_rate": 4.4652806087783666e-05, "loss": 2.7221, "step": 65190 }, { "epoch": 4.42961000135888, "grad_norm": 2.7233307361602783, "learning_rate": 4.4648559586900394e-05, "loss": 2.263, "step": 65195 }, { "epoch": 4.4299497214295425, "grad_norm": 3.097193717956543, "learning_rate": 4.464431308601713e-05, "loss": 2.5321, "step": 65200 }, { "epoch": 4.430289441500204, "grad_norm": 3.3599743843078613, "learning_rate": 4.464006658513385e-05, "loss": 2.5118, "step": 65205 }, { "epoch": 4.430629161570866, "grad_norm": 2.3783373832702637, "learning_rate": 4.463582008425058e-05, "loss": 2.1746, "step": 65210 }, { "epoch": 4.430968881641528, "grad_norm": 3.654205560684204, "learning_rate": 4.4631573583367306e-05, "loss": 2.4025, "step": 65215 }, { "epoch": 4.431308601712189, "grad_norm": 3.2142109870910645, "learning_rate": 4.4627327082484034e-05, "loss": 2.6018, "step": 65220 }, { "epoch": 4.431648321782851, "grad_norm": 3.0077390670776367, "learning_rate": 4.462308058160076e-05, "loss": 2.537, "step": 65225 }, { "epoch": 4.431988041853513, "grad_norm": 3.8509607315063477, "learning_rate": 4.461883408071749e-05, "loss": 2.5263, "step": 65230 }, { "epoch": 4.432327761924174, "grad_norm": 3.575918674468994, "learning_rate": 4.461458757983422e-05, "loss": 2.6148, "step": 65235 }, { "epoch": 4.432667481994836, "grad_norm": 3.2694592475891113, "learning_rate": 4.4610341078950946e-05, "loss": 2.496, "step": 65240 }, { "epoch": 4.4330072020654985, "grad_norm": 3.349576950073242, "learning_rate": 4.4606094578067674e-05, "loss": 2.463, "step": 65245 }, { "epoch": 4.43334692213616, "grad_norm": 2.8851723670959473, "learning_rate": 4.46018480771844e-05, "loss": 2.9102, "step": 65250 }, { "epoch": 4.433686642206822, "grad_norm": 3.548063039779663, "learning_rate": 4.459760157630113e-05, "loss": 2.5338, "step": 65255 }, { "epoch": 4.434026362277484, "grad_norm": 3.8191704750061035, "learning_rate": 4.459335507541786e-05, "loss": 2.2192, "step": 65260 }, { "epoch": 4.434366082348145, "grad_norm": 4.1414899826049805, "learning_rate": 4.4589108574534586e-05, "loss": 2.6698, "step": 65265 }, { "epoch": 4.434705802418807, "grad_norm": 2.9401347637176514, "learning_rate": 4.4584862073651314e-05, "loss": 2.5199, "step": 65270 }, { "epoch": 4.435045522489468, "grad_norm": 3.495605945587158, "learning_rate": 4.458061557276804e-05, "loss": 2.3571, "step": 65275 }, { "epoch": 4.43538524256013, "grad_norm": 3.3735995292663574, "learning_rate": 4.457636907188477e-05, "loss": 2.3717, "step": 65280 }, { "epoch": 4.435724962630792, "grad_norm": 3.4921176433563232, "learning_rate": 4.45721225710015e-05, "loss": 2.9346, "step": 65285 }, { "epoch": 4.436064682701454, "grad_norm": 3.2586255073547363, "learning_rate": 4.4567876070118226e-05, "loss": 2.1733, "step": 65290 }, { "epoch": 4.436404402772116, "grad_norm": 4.495964050292969, "learning_rate": 4.4563629569234954e-05, "loss": 2.1938, "step": 65295 }, { "epoch": 4.436744122842778, "grad_norm": 3.2632293701171875, "learning_rate": 4.455938306835168e-05, "loss": 2.5942, "step": 65300 }, { "epoch": 4.437083842913439, "grad_norm": 3.2329318523406982, "learning_rate": 4.455513656746841e-05, "loss": 2.6243, "step": 65305 }, { "epoch": 4.437423562984101, "grad_norm": 3.2996773719787598, "learning_rate": 4.455089006658514e-05, "loss": 2.513, "step": 65310 }, { "epoch": 4.437763283054763, "grad_norm": 3.2258141040802, "learning_rate": 4.454664356570186e-05, "loss": 2.3331, "step": 65315 }, { "epoch": 4.438103003125424, "grad_norm": 2.8461270332336426, "learning_rate": 4.4542397064818594e-05, "loss": 2.5821, "step": 65320 }, { "epoch": 4.438442723196086, "grad_norm": 2.6356287002563477, "learning_rate": 4.453815056393532e-05, "loss": 2.274, "step": 65325 }, { "epoch": 4.438782443266748, "grad_norm": 2.511157751083374, "learning_rate": 4.453390406305204e-05, "loss": 2.3949, "step": 65330 }, { "epoch": 4.43912216333741, "grad_norm": 3.0908613204956055, "learning_rate": 4.452965756216878e-05, "loss": 2.7342, "step": 65335 }, { "epoch": 4.439461883408072, "grad_norm": 2.878101110458374, "learning_rate": 4.4525411061285506e-05, "loss": 2.2176, "step": 65340 }, { "epoch": 4.439801603478734, "grad_norm": 2.6233246326446533, "learning_rate": 4.452116456040223e-05, "loss": 2.4964, "step": 65345 }, { "epoch": 4.440141323549395, "grad_norm": 2.8784303665161133, "learning_rate": 4.4516918059518955e-05, "loss": 2.5573, "step": 65350 }, { "epoch": 4.440481043620057, "grad_norm": 3.401594877243042, "learning_rate": 4.451267155863569e-05, "loss": 2.2764, "step": 65355 }, { "epoch": 4.440820763690719, "grad_norm": 2.846921920776367, "learning_rate": 4.450842505775241e-05, "loss": 2.2615, "step": 65360 }, { "epoch": 4.44116048376138, "grad_norm": 3.2721917629241943, "learning_rate": 4.450417855686914e-05, "loss": 2.6441, "step": 65365 }, { "epoch": 4.441500203832042, "grad_norm": 3.0663809776306152, "learning_rate": 4.4499932055985874e-05, "loss": 2.4652, "step": 65370 }, { "epoch": 4.441839923902704, "grad_norm": 3.447009563446045, "learning_rate": 4.4495685555102595e-05, "loss": 2.2022, "step": 65375 }, { "epoch": 4.442179643973366, "grad_norm": 3.300241470336914, "learning_rate": 4.449143905421932e-05, "loss": 2.4399, "step": 65380 }, { "epoch": 4.442519364044028, "grad_norm": 2.9944405555725098, "learning_rate": 4.448719255333605e-05, "loss": 2.2858, "step": 65385 }, { "epoch": 4.44285908411469, "grad_norm": 3.3608384132385254, "learning_rate": 4.448294605245278e-05, "loss": 2.494, "step": 65390 }, { "epoch": 4.443198804185351, "grad_norm": 2.856179714202881, "learning_rate": 4.447869955156951e-05, "loss": 2.4913, "step": 65395 }, { "epoch": 4.443538524256013, "grad_norm": 3.2387237548828125, "learning_rate": 4.4474453050686235e-05, "loss": 2.4925, "step": 65400 }, { "epoch": 4.443878244326675, "grad_norm": 3.2703166007995605, "learning_rate": 4.447020654980296e-05, "loss": 2.2343, "step": 65405 }, { "epoch": 4.444217964397336, "grad_norm": 2.968162775039673, "learning_rate": 4.446596004891969e-05, "loss": 2.3867, "step": 65410 }, { "epoch": 4.444557684467998, "grad_norm": 3.0134284496307373, "learning_rate": 4.446171354803642e-05, "loss": 2.633, "step": 65415 }, { "epoch": 4.4448974045386604, "grad_norm": 2.749546527862549, "learning_rate": 4.445746704715315e-05, "loss": 2.5791, "step": 65420 }, { "epoch": 4.445237124609322, "grad_norm": 3.0149025917053223, "learning_rate": 4.4453220546269875e-05, "loss": 2.6042, "step": 65425 }, { "epoch": 4.445576844679984, "grad_norm": 3.5977859497070312, "learning_rate": 4.44489740453866e-05, "loss": 2.5404, "step": 65430 }, { "epoch": 4.445916564750646, "grad_norm": 3.607839822769165, "learning_rate": 4.444472754450333e-05, "loss": 2.3764, "step": 65435 }, { "epoch": 4.446256284821307, "grad_norm": 2.547142267227173, "learning_rate": 4.444048104362006e-05, "loss": 2.539, "step": 65440 }, { "epoch": 4.446596004891969, "grad_norm": 3.087411880493164, "learning_rate": 4.443623454273679e-05, "loss": 2.3444, "step": 65445 }, { "epoch": 4.446935724962631, "grad_norm": 3.2149715423583984, "learning_rate": 4.4431988041853515e-05, "loss": 2.6373, "step": 65450 }, { "epoch": 4.447275445033292, "grad_norm": 2.954043388366699, "learning_rate": 4.44285908411469e-05, "loss": 2.6387, "step": 65455 }, { "epoch": 4.447615165103954, "grad_norm": 2.985930919647217, "learning_rate": 4.4424344340263626e-05, "loss": 2.353, "step": 65460 }, { "epoch": 4.4479548851746165, "grad_norm": 3.658461570739746, "learning_rate": 4.4420097839380354e-05, "loss": 2.7407, "step": 65465 }, { "epoch": 4.448294605245278, "grad_norm": 3.699699640274048, "learning_rate": 4.441585133849708e-05, "loss": 2.3423, "step": 65470 }, { "epoch": 4.44863432531594, "grad_norm": 3.160362482070923, "learning_rate": 4.441160483761381e-05, "loss": 2.3665, "step": 65475 }, { "epoch": 4.448974045386602, "grad_norm": 3.874150037765503, "learning_rate": 4.440735833673054e-05, "loss": 2.3304, "step": 65480 }, { "epoch": 4.449313765457263, "grad_norm": 3.7300498485565186, "learning_rate": 4.4403111835847266e-05, "loss": 2.3774, "step": 65485 }, { "epoch": 4.449653485527925, "grad_norm": 3.378183126449585, "learning_rate": 4.4398865334963994e-05, "loss": 2.5148, "step": 65490 }, { "epoch": 4.449993205598587, "grad_norm": 3.1255226135253906, "learning_rate": 4.439461883408072e-05, "loss": 2.5323, "step": 65495 }, { "epoch": 4.450332925669248, "grad_norm": 2.8281946182250977, "learning_rate": 4.439037233319745e-05, "loss": 2.3971, "step": 65500 }, { "epoch": 4.45067264573991, "grad_norm": 2.7651798725128174, "learning_rate": 4.438612583231417e-05, "loss": 2.3175, "step": 65505 }, { "epoch": 4.4510123658105725, "grad_norm": 2.810812473297119, "learning_rate": 4.4381879331430906e-05, "loss": 2.3064, "step": 65510 }, { "epoch": 4.451352085881234, "grad_norm": 3.3839170932769775, "learning_rate": 4.4377632830547634e-05, "loss": 2.7277, "step": 65515 }, { "epoch": 4.451691805951896, "grad_norm": 2.968958854675293, "learning_rate": 4.4373386329664355e-05, "loss": 2.5988, "step": 65520 }, { "epoch": 4.452031526022558, "grad_norm": 3.100275993347168, "learning_rate": 4.436913982878109e-05, "loss": 2.5746, "step": 65525 }, { "epoch": 4.452371246093219, "grad_norm": 2.4693777561187744, "learning_rate": 4.436489332789782e-05, "loss": 2.0263, "step": 65530 }, { "epoch": 4.452710966163881, "grad_norm": 2.8280575275421143, "learning_rate": 4.436064682701454e-05, "loss": 2.0781, "step": 65535 }, { "epoch": 4.453050686234543, "grad_norm": 2.933708429336548, "learning_rate": 4.435640032613127e-05, "loss": 2.2631, "step": 65540 }, { "epoch": 4.453390406305204, "grad_norm": 2.9428176879882812, "learning_rate": 4.4352153825248e-05, "loss": 2.5348, "step": 65545 }, { "epoch": 4.453730126375866, "grad_norm": 2.5763866901397705, "learning_rate": 4.434790732436472e-05, "loss": 2.4634, "step": 65550 }, { "epoch": 4.454069846446528, "grad_norm": 2.7209951877593994, "learning_rate": 4.434366082348145e-05, "loss": 2.4118, "step": 65555 }, { "epoch": 4.45440956651719, "grad_norm": 2.3674046993255615, "learning_rate": 4.4339414322598186e-05, "loss": 2.4962, "step": 65560 }, { "epoch": 4.454749286587852, "grad_norm": 3.073737144470215, "learning_rate": 4.433516782171491e-05, "loss": 2.3688, "step": 65565 }, { "epoch": 4.455089006658513, "grad_norm": 3.4987220764160156, "learning_rate": 4.4330921320831635e-05, "loss": 2.6581, "step": 65570 }, { "epoch": 4.455428726729175, "grad_norm": 2.514317750930786, "learning_rate": 4.432667481994836e-05, "loss": 2.6716, "step": 65575 }, { "epoch": 4.455768446799837, "grad_norm": 3.059868335723877, "learning_rate": 4.432242831906509e-05, "loss": 2.2473, "step": 65580 }, { "epoch": 4.456108166870498, "grad_norm": 2.5337917804718018, "learning_rate": 4.431818181818182e-05, "loss": 2.2132, "step": 65585 }, { "epoch": 4.45644788694116, "grad_norm": 3.4795327186584473, "learning_rate": 4.431393531729855e-05, "loss": 2.1402, "step": 65590 }, { "epoch": 4.456787607011822, "grad_norm": 3.2684266567230225, "learning_rate": 4.4309688816415275e-05, "loss": 2.5616, "step": 65595 }, { "epoch": 4.457127327082484, "grad_norm": 3.3585941791534424, "learning_rate": 4.4305442315532e-05, "loss": 2.4072, "step": 65600 }, { "epoch": 4.457467047153146, "grad_norm": 3.5720324516296387, "learning_rate": 4.430119581464873e-05, "loss": 2.5721, "step": 65605 }, { "epoch": 4.457806767223808, "grad_norm": 3.4670753479003906, "learning_rate": 4.429694931376546e-05, "loss": 2.5129, "step": 65610 }, { "epoch": 4.458146487294469, "grad_norm": 3.035352945327759, "learning_rate": 4.429270281288219e-05, "loss": 2.3898, "step": 65615 }, { "epoch": 4.458486207365131, "grad_norm": 3.006521701812744, "learning_rate": 4.4288456311998915e-05, "loss": 2.6007, "step": 65620 }, { "epoch": 4.458825927435793, "grad_norm": 3.8053107261657715, "learning_rate": 4.428420981111564e-05, "loss": 2.1518, "step": 65625 }, { "epoch": 4.459165647506454, "grad_norm": 3.7737088203430176, "learning_rate": 4.427996331023237e-05, "loss": 2.4169, "step": 65630 }, { "epoch": 4.459505367577116, "grad_norm": 3.9807631969451904, "learning_rate": 4.42757168093491e-05, "loss": 2.4213, "step": 65635 }, { "epoch": 4.459845087647778, "grad_norm": 2.8221476078033447, "learning_rate": 4.427147030846583e-05, "loss": 2.501, "step": 65640 }, { "epoch": 4.46018480771844, "grad_norm": 3.2772858142852783, "learning_rate": 4.4267223807582555e-05, "loss": 2.4406, "step": 65645 }, { "epoch": 4.460524527789102, "grad_norm": 2.994422435760498, "learning_rate": 4.426297730669928e-05, "loss": 2.2572, "step": 65650 }, { "epoch": 4.460864247859764, "grad_norm": 3.710730791091919, "learning_rate": 4.425873080581601e-05, "loss": 2.334, "step": 65655 }, { "epoch": 4.461203967930425, "grad_norm": 4.239198684692383, "learning_rate": 4.425448430493274e-05, "loss": 2.2232, "step": 65660 }, { "epoch": 4.461543688001087, "grad_norm": 2.6681370735168457, "learning_rate": 4.425023780404947e-05, "loss": 2.4784, "step": 65665 }, { "epoch": 4.461883408071749, "grad_norm": 3.2528204917907715, "learning_rate": 4.4245991303166195e-05, "loss": 2.4688, "step": 65670 }, { "epoch": 4.46222312814241, "grad_norm": 2.863567590713501, "learning_rate": 4.4241744802282917e-05, "loss": 2.6196, "step": 65675 }, { "epoch": 4.462562848213072, "grad_norm": 2.1789562702178955, "learning_rate": 4.423749830139965e-05, "loss": 2.4148, "step": 65680 }, { "epoch": 4.462902568283734, "grad_norm": 3.787428379058838, "learning_rate": 4.423325180051638e-05, "loss": 2.4947, "step": 65685 }, { "epoch": 4.463242288354396, "grad_norm": 3.1718850135803223, "learning_rate": 4.42290052996331e-05, "loss": 2.5377, "step": 65690 }, { "epoch": 4.463582008425058, "grad_norm": 3.6752216815948486, "learning_rate": 4.4224758798749835e-05, "loss": 2.2801, "step": 65695 }, { "epoch": 4.46392172849572, "grad_norm": 3.6251168251037598, "learning_rate": 4.4220512297866563e-05, "loss": 2.3042, "step": 65700 }, { "epoch": 4.464261448566381, "grad_norm": 2.7628254890441895, "learning_rate": 4.4216265796983285e-05, "loss": 2.578, "step": 65705 }, { "epoch": 4.464601168637043, "grad_norm": 3.1054704189300537, "learning_rate": 4.421201929610001e-05, "loss": 2.2753, "step": 65710 }, { "epoch": 4.464940888707705, "grad_norm": 3.1156530380249023, "learning_rate": 4.420777279521675e-05, "loss": 2.5162, "step": 65715 }, { "epoch": 4.465280608778366, "grad_norm": 4.212575435638428, "learning_rate": 4.420352629433347e-05, "loss": 2.471, "step": 65720 }, { "epoch": 4.465620328849028, "grad_norm": 3.3173153400421143, "learning_rate": 4.41992797934502e-05, "loss": 2.254, "step": 65725 }, { "epoch": 4.4659600489196905, "grad_norm": 2.984057664871216, "learning_rate": 4.419503329256693e-05, "loss": 2.4115, "step": 65730 }, { "epoch": 4.466299768990352, "grad_norm": 3.0919132232666016, "learning_rate": 4.419078679168365e-05, "loss": 2.3649, "step": 65735 }, { "epoch": 4.466639489061014, "grad_norm": 3.103698253631592, "learning_rate": 4.418654029080038e-05, "loss": 2.17, "step": 65740 }, { "epoch": 4.466979209131676, "grad_norm": 3.4024267196655273, "learning_rate": 4.418229378991711e-05, "loss": 2.4157, "step": 65745 }, { "epoch": 4.467318929202337, "grad_norm": 2.684837579727173, "learning_rate": 4.417804728903384e-05, "loss": 2.3108, "step": 65750 }, { "epoch": 4.467658649272999, "grad_norm": 3.110419988632202, "learning_rate": 4.4173800788150565e-05, "loss": 2.4329, "step": 65755 }, { "epoch": 4.467998369343661, "grad_norm": 3.051778793334961, "learning_rate": 4.416955428726729e-05, "loss": 2.6416, "step": 65760 }, { "epoch": 4.468338089414322, "grad_norm": 2.7067136764526367, "learning_rate": 4.416530778638402e-05, "loss": 2.5655, "step": 65765 }, { "epoch": 4.468677809484984, "grad_norm": 3.205303907394409, "learning_rate": 4.416106128550075e-05, "loss": 2.4237, "step": 65770 }, { "epoch": 4.4690175295556465, "grad_norm": 3.160928964614868, "learning_rate": 4.415681478461748e-05, "loss": 2.5482, "step": 65775 }, { "epoch": 4.469357249626308, "grad_norm": 2.6580851078033447, "learning_rate": 4.4152568283734205e-05, "loss": 2.4106, "step": 65780 }, { "epoch": 4.46969696969697, "grad_norm": 3.0324361324310303, "learning_rate": 4.414832178285093e-05, "loss": 2.237, "step": 65785 }, { "epoch": 4.470036689767632, "grad_norm": 2.8974368572235107, "learning_rate": 4.414407528196766e-05, "loss": 2.4851, "step": 65790 }, { "epoch": 4.470376409838293, "grad_norm": 3.708646297454834, "learning_rate": 4.413982878108439e-05, "loss": 2.4457, "step": 65795 }, { "epoch": 4.470716129908955, "grad_norm": 2.9524662494659424, "learning_rate": 4.413558228020112e-05, "loss": 2.4081, "step": 65800 }, { "epoch": 4.471055849979617, "grad_norm": 3.820319175720215, "learning_rate": 4.4131335779317845e-05, "loss": 2.5038, "step": 65805 }, { "epoch": 4.471395570050278, "grad_norm": 3.6014909744262695, "learning_rate": 4.412708927843457e-05, "loss": 2.4933, "step": 65810 }, { "epoch": 4.47173529012094, "grad_norm": 2.985015392303467, "learning_rate": 4.41228427775513e-05, "loss": 2.329, "step": 65815 }, { "epoch": 4.4720750101916025, "grad_norm": 3.942418098449707, "learning_rate": 4.411859627666803e-05, "loss": 2.4714, "step": 65820 }, { "epoch": 4.472414730262264, "grad_norm": 3.6129565238952637, "learning_rate": 4.411434977578476e-05, "loss": 2.3635, "step": 65825 }, { "epoch": 4.472754450332926, "grad_norm": 3.4189951419830322, "learning_rate": 4.4110103274901485e-05, "loss": 2.2325, "step": 65830 }, { "epoch": 4.473094170403588, "grad_norm": 3.5922555923461914, "learning_rate": 4.410585677401821e-05, "loss": 2.2756, "step": 65835 }, { "epoch": 4.473433890474249, "grad_norm": 2.9708609580993652, "learning_rate": 4.410161027313494e-05, "loss": 2.4672, "step": 65840 }, { "epoch": 4.473773610544911, "grad_norm": 4.067960739135742, "learning_rate": 4.409736377225166e-05, "loss": 2.2976, "step": 65845 }, { "epoch": 4.474113330615573, "grad_norm": 3.257199764251709, "learning_rate": 4.40931172713684e-05, "loss": 2.3073, "step": 65850 }, { "epoch": 4.474453050686234, "grad_norm": 2.6645398139953613, "learning_rate": 4.4088870770485125e-05, "loss": 2.1457, "step": 65855 }, { "epoch": 4.474792770756896, "grad_norm": 2.4895660877227783, "learning_rate": 4.4084624269601846e-05, "loss": 2.2495, "step": 65860 }, { "epoch": 4.4751324908275585, "grad_norm": 3.6551690101623535, "learning_rate": 4.408037776871858e-05, "loss": 2.4857, "step": 65865 }, { "epoch": 4.47547221089822, "grad_norm": 3.0030572414398193, "learning_rate": 4.407613126783531e-05, "loss": 2.3649, "step": 65870 }, { "epoch": 4.475811930968882, "grad_norm": 4.276697635650635, "learning_rate": 4.407188476695203e-05, "loss": 2.5972, "step": 65875 }, { "epoch": 4.476151651039544, "grad_norm": 2.9248993396759033, "learning_rate": 4.406763826606876e-05, "loss": 2.4677, "step": 65880 }, { "epoch": 4.476491371110205, "grad_norm": 3.1813507080078125, "learning_rate": 4.406339176518549e-05, "loss": 2.4064, "step": 65885 }, { "epoch": 4.476831091180867, "grad_norm": 3.3216733932495117, "learning_rate": 4.4059145264302214e-05, "loss": 2.6232, "step": 65890 }, { "epoch": 4.477170811251529, "grad_norm": 2.939202308654785, "learning_rate": 4.405489876341894e-05, "loss": 2.356, "step": 65895 }, { "epoch": 4.47751053132219, "grad_norm": 3.3944714069366455, "learning_rate": 4.405065226253568e-05, "loss": 2.2719, "step": 65900 }, { "epoch": 4.477850251392852, "grad_norm": 3.266735315322876, "learning_rate": 4.40464057616524e-05, "loss": 2.3974, "step": 65905 }, { "epoch": 4.4781899714635145, "grad_norm": 3.023348331451416, "learning_rate": 4.4042159260769126e-05, "loss": 2.279, "step": 65910 }, { "epoch": 4.478529691534176, "grad_norm": 2.797807455062866, "learning_rate": 4.403791275988586e-05, "loss": 2.5859, "step": 65915 }, { "epoch": 4.478869411604838, "grad_norm": 3.343506097793579, "learning_rate": 4.403366625900258e-05, "loss": 2.4193, "step": 65920 }, { "epoch": 4.4792091316755, "grad_norm": 3.0805938243865967, "learning_rate": 4.402941975811931e-05, "loss": 2.5479, "step": 65925 }, { "epoch": 4.479548851746161, "grad_norm": 2.949781656265259, "learning_rate": 4.402517325723604e-05, "loss": 2.3865, "step": 65930 }, { "epoch": 4.479888571816823, "grad_norm": 3.926373243331909, "learning_rate": 4.4020926756352766e-05, "loss": 2.3301, "step": 65935 }, { "epoch": 4.480228291887485, "grad_norm": 2.7211484909057617, "learning_rate": 4.4016680255469494e-05, "loss": 2.5042, "step": 65940 }, { "epoch": 4.480568011958146, "grad_norm": 3.647627592086792, "learning_rate": 4.401243375458622e-05, "loss": 2.3322, "step": 65945 }, { "epoch": 4.480907732028808, "grad_norm": 2.8916330337524414, "learning_rate": 4.400818725370295e-05, "loss": 2.3187, "step": 65950 }, { "epoch": 4.4812474520994705, "grad_norm": 3.621957778930664, "learning_rate": 4.400394075281968e-05, "loss": 2.6037, "step": 65955 }, { "epoch": 4.481587172170132, "grad_norm": 3.5922696590423584, "learning_rate": 4.3999694251936406e-05, "loss": 2.4149, "step": 65960 }, { "epoch": 4.481926892240794, "grad_norm": 3.1254618167877197, "learning_rate": 4.3995447751053134e-05, "loss": 2.4256, "step": 65965 }, { "epoch": 4.482266612311455, "grad_norm": 2.9869649410247803, "learning_rate": 4.399120125016986e-05, "loss": 2.2611, "step": 65970 }, { "epoch": 4.482606332382117, "grad_norm": 3.2014920711517334, "learning_rate": 4.398695474928659e-05, "loss": 2.4084, "step": 65975 }, { "epoch": 4.482946052452779, "grad_norm": 3.187274217605591, "learning_rate": 4.398270824840332e-05, "loss": 2.412, "step": 65980 }, { "epoch": 4.48328577252344, "grad_norm": 3.689544916152954, "learning_rate": 4.3978461747520046e-05, "loss": 2.4951, "step": 65985 }, { "epoch": 4.483625492594102, "grad_norm": 2.8341612815856934, "learning_rate": 4.3974215246636774e-05, "loss": 2.2862, "step": 65990 }, { "epoch": 4.483965212664764, "grad_norm": 2.951995849609375, "learning_rate": 4.39699687457535e-05, "loss": 2.3027, "step": 65995 }, { "epoch": 4.484304932735426, "grad_norm": 3.0630667209625244, "learning_rate": 4.396572224487023e-05, "loss": 2.2931, "step": 66000 }, { "epoch": 4.484644652806088, "grad_norm": 3.6876792907714844, "learning_rate": 4.396147574398696e-05, "loss": 2.4357, "step": 66005 }, { "epoch": 4.48498437287675, "grad_norm": 3.646728992462158, "learning_rate": 4.3957229243103686e-05, "loss": 2.1853, "step": 66010 }, { "epoch": 4.485324092947411, "grad_norm": 3.0687122344970703, "learning_rate": 4.3952982742220414e-05, "loss": 2.647, "step": 66015 }, { "epoch": 4.485663813018073, "grad_norm": 3.254775047302246, "learning_rate": 4.394873624133714e-05, "loss": 2.3719, "step": 66020 }, { "epoch": 4.486003533088735, "grad_norm": 3.0031497478485107, "learning_rate": 4.394448974045387e-05, "loss": 2.383, "step": 66025 }, { "epoch": 4.486343253159396, "grad_norm": 3.2712631225585938, "learning_rate": 4.394024323957059e-05, "loss": 2.3216, "step": 66030 }, { "epoch": 4.486682973230058, "grad_norm": 3.373596668243408, "learning_rate": 4.3935996738687326e-05, "loss": 2.2157, "step": 66035 }, { "epoch": 4.4870226933007205, "grad_norm": 2.8494675159454346, "learning_rate": 4.3931750237804054e-05, "loss": 2.4911, "step": 66040 }, { "epoch": 4.487362413371382, "grad_norm": 3.460514545440674, "learning_rate": 4.3927503736920776e-05, "loss": 2.589, "step": 66045 }, { "epoch": 4.487702133442044, "grad_norm": 2.815699577331543, "learning_rate": 4.392325723603751e-05, "loss": 2.4742, "step": 66050 }, { "epoch": 4.488041853512706, "grad_norm": 2.8753037452697754, "learning_rate": 4.391901073515424e-05, "loss": 2.2694, "step": 66055 }, { "epoch": 4.488381573583367, "grad_norm": 2.9914963245391846, "learning_rate": 4.391476423427096e-05, "loss": 2.3143, "step": 66060 }, { "epoch": 4.488721293654029, "grad_norm": 3.014941692352295, "learning_rate": 4.391051773338769e-05, "loss": 2.5035, "step": 66065 }, { "epoch": 4.489061013724691, "grad_norm": 3.824120283126831, "learning_rate": 4.390627123250442e-05, "loss": 2.6009, "step": 66070 }, { "epoch": 4.489400733795352, "grad_norm": 3.829559087753296, "learning_rate": 4.3902024731621144e-05, "loss": 2.5146, "step": 66075 }, { "epoch": 4.489740453866014, "grad_norm": 3.2153470516204834, "learning_rate": 4.389777823073787e-05, "loss": 2.2709, "step": 66080 }, { "epoch": 4.4900801739366765, "grad_norm": 3.8231005668640137, "learning_rate": 4.3893531729854606e-05, "loss": 2.5298, "step": 66085 }, { "epoch": 4.490419894007338, "grad_norm": 3.5540146827697754, "learning_rate": 4.388928522897133e-05, "loss": 2.5632, "step": 66090 }, { "epoch": 4.490759614078, "grad_norm": 3.367605209350586, "learning_rate": 4.3885038728088056e-05, "loss": 2.1875, "step": 66095 }, { "epoch": 4.491099334148662, "grad_norm": 3.920666456222534, "learning_rate": 4.3880792227204784e-05, "loss": 2.3987, "step": 66100 }, { "epoch": 4.491439054219323, "grad_norm": 2.6616404056549072, "learning_rate": 4.387654572632151e-05, "loss": 2.4288, "step": 66105 }, { "epoch": 4.491778774289985, "grad_norm": 3.2065505981445312, "learning_rate": 4.387229922543824e-05, "loss": 2.3973, "step": 66110 }, { "epoch": 4.492118494360647, "grad_norm": 2.9911139011383057, "learning_rate": 4.386805272455497e-05, "loss": 2.5249, "step": 66115 }, { "epoch": 4.492458214431308, "grad_norm": 2.9389946460723877, "learning_rate": 4.3863806223671696e-05, "loss": 2.6989, "step": 66120 }, { "epoch": 4.49279793450197, "grad_norm": 2.5909812450408936, "learning_rate": 4.3859559722788424e-05, "loss": 2.4154, "step": 66125 }, { "epoch": 4.4931376545726325, "grad_norm": 3.514963150024414, "learning_rate": 4.385531322190515e-05, "loss": 2.4293, "step": 66130 }, { "epoch": 4.493477374643294, "grad_norm": 3.6808416843414307, "learning_rate": 4.385106672102188e-05, "loss": 2.0545, "step": 66135 }, { "epoch": 4.493817094713956, "grad_norm": 2.9181580543518066, "learning_rate": 4.384682022013861e-05, "loss": 2.5589, "step": 66140 }, { "epoch": 4.494156814784618, "grad_norm": 2.712550401687622, "learning_rate": 4.3842573719255336e-05, "loss": 2.449, "step": 66145 }, { "epoch": 4.494496534855279, "grad_norm": 2.6862168312072754, "learning_rate": 4.3838327218372064e-05, "loss": 2.5634, "step": 66150 }, { "epoch": 4.494836254925941, "grad_norm": 2.601837158203125, "learning_rate": 4.383408071748879e-05, "loss": 2.4711, "step": 66155 }, { "epoch": 4.495175974996603, "grad_norm": 3.6347298622131348, "learning_rate": 4.382983421660552e-05, "loss": 2.5968, "step": 66160 }, { "epoch": 4.495515695067264, "grad_norm": 3.4618752002716064, "learning_rate": 4.382558771572225e-05, "loss": 2.4435, "step": 66165 }, { "epoch": 4.495855415137926, "grad_norm": 4.181451797485352, "learning_rate": 4.3821341214838976e-05, "loss": 2.3714, "step": 66170 }, { "epoch": 4.4961951352085885, "grad_norm": 3.6504554748535156, "learning_rate": 4.3817094713955704e-05, "loss": 2.5049, "step": 66175 }, { "epoch": 4.49653485527925, "grad_norm": 3.500433921813965, "learning_rate": 4.381284821307243e-05, "loss": 2.2532, "step": 66180 }, { "epoch": 4.496874575349912, "grad_norm": 3.107571840286255, "learning_rate": 4.380860171218916e-05, "loss": 2.4095, "step": 66185 }, { "epoch": 4.497214295420574, "grad_norm": 3.305636167526245, "learning_rate": 4.380435521130589e-05, "loss": 2.6024, "step": 66190 }, { "epoch": 4.497554015491235, "grad_norm": 3.5069167613983154, "learning_rate": 4.3800108710422616e-05, "loss": 2.5393, "step": 66195 }, { "epoch": 4.497893735561897, "grad_norm": 3.768373727798462, "learning_rate": 4.379586220953934e-05, "loss": 2.4685, "step": 66200 }, { "epoch": 4.498233455632559, "grad_norm": 3.664468288421631, "learning_rate": 4.379161570865607e-05, "loss": 2.179, "step": 66205 }, { "epoch": 4.49857317570322, "grad_norm": 2.7422354221343994, "learning_rate": 4.37873692077728e-05, "loss": 2.5517, "step": 66210 }, { "epoch": 4.498912895773882, "grad_norm": 2.9416916370391846, "learning_rate": 4.378312270688952e-05, "loss": 2.3738, "step": 66215 }, { "epoch": 4.4992526158445445, "grad_norm": 3.0781919956207275, "learning_rate": 4.3778876206006256e-05, "loss": 2.3692, "step": 66220 }, { "epoch": 4.499592335915206, "grad_norm": 3.5124716758728027, "learning_rate": 4.3774629705122984e-05, "loss": 2.4321, "step": 66225 }, { "epoch": 4.499932055985868, "grad_norm": 3.0221166610717773, "learning_rate": 4.3770383204239705e-05, "loss": 2.4136, "step": 66230 }, { "epoch": 4.500271776056529, "grad_norm": 3.0121076107025146, "learning_rate": 4.376613670335643e-05, "loss": 2.4237, "step": 66235 }, { "epoch": 4.500611496127191, "grad_norm": 2.9124581813812256, "learning_rate": 4.376189020247317e-05, "loss": 2.2719, "step": 66240 }, { "epoch": 4.500951216197853, "grad_norm": 3.9983654022216797, "learning_rate": 4.375764370158989e-05, "loss": 2.303, "step": 66245 }, { "epoch": 4.501290936268514, "grad_norm": 2.8862597942352295, "learning_rate": 4.375339720070662e-05, "loss": 2.5571, "step": 66250 }, { "epoch": 4.501630656339176, "grad_norm": 3.3651838302612305, "learning_rate": 4.374915069982335e-05, "loss": 2.2026, "step": 66255 }, { "epoch": 4.501970376409838, "grad_norm": 3.0206527709960938, "learning_rate": 4.374490419894007e-05, "loss": 2.4231, "step": 66260 }, { "epoch": 4.5023100964805, "grad_norm": 2.7781739234924316, "learning_rate": 4.37406576980568e-05, "loss": 2.3669, "step": 66265 }, { "epoch": 4.502649816551162, "grad_norm": 3.6477129459381104, "learning_rate": 4.373641119717353e-05, "loss": 2.4144, "step": 66270 }, { "epoch": 4.502989536621824, "grad_norm": 4.033641815185547, "learning_rate": 4.373216469629026e-05, "loss": 2.5302, "step": 66275 }, { "epoch": 4.503329256692485, "grad_norm": 3.2932820320129395, "learning_rate": 4.3727918195406985e-05, "loss": 2.5762, "step": 66280 }, { "epoch": 4.503668976763147, "grad_norm": 3.275506019592285, "learning_rate": 4.372367169452371e-05, "loss": 2.7248, "step": 66285 }, { "epoch": 4.504008696833809, "grad_norm": 3.2068288326263428, "learning_rate": 4.371942519364044e-05, "loss": 2.5817, "step": 66290 }, { "epoch": 4.50434841690447, "grad_norm": 3.2274210453033447, "learning_rate": 4.371517869275717e-05, "loss": 2.4014, "step": 66295 }, { "epoch": 4.504688136975132, "grad_norm": 3.172837018966675, "learning_rate": 4.37109321918739e-05, "loss": 2.5922, "step": 66300 }, { "epoch": 4.5050278570457944, "grad_norm": 2.9181456565856934, "learning_rate": 4.3706685690990625e-05, "loss": 2.5826, "step": 66305 }, { "epoch": 4.505367577116456, "grad_norm": 2.8421502113342285, "learning_rate": 4.370243919010735e-05, "loss": 2.2875, "step": 66310 }, { "epoch": 4.505707297187118, "grad_norm": 3.4005703926086426, "learning_rate": 4.369819268922408e-05, "loss": 2.3695, "step": 66315 }, { "epoch": 4.50604701725778, "grad_norm": 3.428179979324341, "learning_rate": 4.369394618834081e-05, "loss": 2.1405, "step": 66320 }, { "epoch": 4.506386737328441, "grad_norm": 3.030235528945923, "learning_rate": 4.368969968745754e-05, "loss": 2.6608, "step": 66325 }, { "epoch": 4.506726457399103, "grad_norm": 2.7546448707580566, "learning_rate": 4.3685453186574265e-05, "loss": 2.3136, "step": 66330 }, { "epoch": 4.507066177469765, "grad_norm": 2.8731696605682373, "learning_rate": 4.368120668569099e-05, "loss": 2.4841, "step": 66335 }, { "epoch": 4.507405897540426, "grad_norm": 3.474050283432007, "learning_rate": 4.367696018480772e-05, "loss": 2.0, "step": 66340 }, { "epoch": 4.507745617611088, "grad_norm": 2.834357261657715, "learning_rate": 4.367271368392445e-05, "loss": 2.6532, "step": 66345 }, { "epoch": 4.5080853376817505, "grad_norm": 2.9278597831726074, "learning_rate": 4.366846718304118e-05, "loss": 2.2244, "step": 66350 }, { "epoch": 4.508425057752412, "grad_norm": 3.258460760116577, "learning_rate": 4.3664220682157905e-05, "loss": 2.293, "step": 66355 }, { "epoch": 4.508764777823074, "grad_norm": 2.676663398742676, "learning_rate": 4.365997418127463e-05, "loss": 2.5775, "step": 66360 }, { "epoch": 4.509104497893736, "grad_norm": 3.188694477081299, "learning_rate": 4.365572768039136e-05, "loss": 2.7584, "step": 66365 }, { "epoch": 4.509444217964397, "grad_norm": 4.276689052581787, "learning_rate": 4.365148117950808e-05, "loss": 2.2954, "step": 66370 }, { "epoch": 4.509783938035059, "grad_norm": 3.240137815475464, "learning_rate": 4.364723467862482e-05, "loss": 2.3488, "step": 66375 }, { "epoch": 4.510123658105721, "grad_norm": 2.9698522090911865, "learning_rate": 4.3642988177741545e-05, "loss": 2.4215, "step": 66380 }, { "epoch": 4.510463378176382, "grad_norm": 3.155287742614746, "learning_rate": 4.3638741676858267e-05, "loss": 2.5467, "step": 66385 }, { "epoch": 4.510803098247044, "grad_norm": 3.407733201980591, "learning_rate": 4.3634495175975e-05, "loss": 2.5296, "step": 66390 }, { "epoch": 4.5111428183177065, "grad_norm": 3.0172812938690186, "learning_rate": 4.363024867509173e-05, "loss": 2.3342, "step": 66395 }, { "epoch": 4.511482538388368, "grad_norm": 3.81595516204834, "learning_rate": 4.362600217420845e-05, "loss": 2.3063, "step": 66400 }, { "epoch": 4.51182225845903, "grad_norm": 3.029481887817383, "learning_rate": 4.362175567332518e-05, "loss": 2.4393, "step": 66405 }, { "epoch": 4.512161978529692, "grad_norm": 4.258464813232422, "learning_rate": 4.3617509172441913e-05, "loss": 2.2868, "step": 66410 }, { "epoch": 4.512501698600353, "grad_norm": 3.027521848678589, "learning_rate": 4.3613262671558635e-05, "loss": 2.3346, "step": 66415 }, { "epoch": 4.512841418671015, "grad_norm": 3.6710002422332764, "learning_rate": 4.360901617067536e-05, "loss": 2.3899, "step": 66420 }, { "epoch": 4.513181138741677, "grad_norm": 3.047328233718872, "learning_rate": 4.36047696697921e-05, "loss": 2.1483, "step": 66425 }, { "epoch": 4.513520858812338, "grad_norm": 3.193960189819336, "learning_rate": 4.360052316890882e-05, "loss": 2.4355, "step": 66430 }, { "epoch": 4.513860578883, "grad_norm": 3.0354034900665283, "learning_rate": 4.359627666802555e-05, "loss": 2.4791, "step": 66435 }, { "epoch": 4.5142002989536625, "grad_norm": 3.4612669944763184, "learning_rate": 4.359203016714228e-05, "loss": 2.5847, "step": 66440 }, { "epoch": 4.514540019024324, "grad_norm": 3.1036477088928223, "learning_rate": 4.3587783666259e-05, "loss": 2.2897, "step": 66445 }, { "epoch": 4.514879739094986, "grad_norm": 2.9780547618865967, "learning_rate": 4.358353716537573e-05, "loss": 2.5783, "step": 66450 }, { "epoch": 4.515219459165648, "grad_norm": 2.9939231872558594, "learning_rate": 4.357929066449246e-05, "loss": 2.3788, "step": 66455 }, { "epoch": 4.515559179236309, "grad_norm": 2.579545497894287, "learning_rate": 4.357504416360919e-05, "loss": 2.4682, "step": 66460 }, { "epoch": 4.515898899306971, "grad_norm": 3.045572280883789, "learning_rate": 4.3570797662725915e-05, "loss": 2.5305, "step": 66465 }, { "epoch": 4.516238619377633, "grad_norm": 3.3127994537353516, "learning_rate": 4.356655116184264e-05, "loss": 2.6124, "step": 66470 }, { "epoch": 4.516578339448294, "grad_norm": 3.6467976570129395, "learning_rate": 4.356230466095937e-05, "loss": 1.9708, "step": 66475 }, { "epoch": 4.516918059518956, "grad_norm": 3.27113938331604, "learning_rate": 4.35580581600761e-05, "loss": 2.2823, "step": 66480 }, { "epoch": 4.5172577795896185, "grad_norm": 2.9242255687713623, "learning_rate": 4.355381165919283e-05, "loss": 2.4167, "step": 66485 }, { "epoch": 4.51759749966028, "grad_norm": 3.1529951095581055, "learning_rate": 4.3549565158309555e-05, "loss": 2.4839, "step": 66490 }, { "epoch": 4.517937219730942, "grad_norm": 3.1942858695983887, "learning_rate": 4.354531865742628e-05, "loss": 2.4465, "step": 66495 }, { "epoch": 4.518276939801604, "grad_norm": 3.3294849395751953, "learning_rate": 4.354107215654301e-05, "loss": 2.4257, "step": 66500 }, { "epoch": 4.518616659872265, "grad_norm": 2.537236213684082, "learning_rate": 4.353682565565974e-05, "loss": 2.3153, "step": 66505 }, { "epoch": 4.518956379942927, "grad_norm": 3.3435380458831787, "learning_rate": 4.353257915477647e-05, "loss": 2.5224, "step": 66510 }, { "epoch": 4.519296100013589, "grad_norm": 3.646068811416626, "learning_rate": 4.3528332653893195e-05, "loss": 2.4728, "step": 66515 }, { "epoch": 4.51963582008425, "grad_norm": 3.0100998878479004, "learning_rate": 4.352408615300992e-05, "loss": 2.3376, "step": 66520 }, { "epoch": 4.519975540154912, "grad_norm": 2.8036234378814697, "learning_rate": 4.351983965212665e-05, "loss": 2.518, "step": 66525 }, { "epoch": 4.5203152602255745, "grad_norm": 2.6899261474609375, "learning_rate": 4.351559315124338e-05, "loss": 2.4313, "step": 66530 }, { "epoch": 4.520654980296236, "grad_norm": 2.7928073406219482, "learning_rate": 4.351134665036011e-05, "loss": 2.2687, "step": 66535 }, { "epoch": 4.520994700366898, "grad_norm": 3.13708758354187, "learning_rate": 4.3507100149476835e-05, "loss": 2.3783, "step": 66540 }, { "epoch": 4.52133442043756, "grad_norm": 3.271270751953125, "learning_rate": 4.350285364859356e-05, "loss": 2.3297, "step": 66545 }, { "epoch": 4.521674140508221, "grad_norm": 3.8216447830200195, "learning_rate": 4.349860714771029e-05, "loss": 2.4831, "step": 66550 }, { "epoch": 4.522013860578883, "grad_norm": 3.2303502559661865, "learning_rate": 4.349436064682701e-05, "loss": 2.3477, "step": 66555 }, { "epoch": 4.522353580649545, "grad_norm": 3.1159560680389404, "learning_rate": 4.349011414594375e-05, "loss": 2.351, "step": 66560 }, { "epoch": 4.522693300720206, "grad_norm": 3.1277034282684326, "learning_rate": 4.3485867645060475e-05, "loss": 2.4903, "step": 66565 }, { "epoch": 4.523033020790868, "grad_norm": 2.4164037704467773, "learning_rate": 4.3481621144177196e-05, "loss": 2.8104, "step": 66570 }, { "epoch": 4.5233727408615305, "grad_norm": 4.213077545166016, "learning_rate": 4.347737464329393e-05, "loss": 2.2719, "step": 66575 }, { "epoch": 4.523712460932192, "grad_norm": 2.812713861465454, "learning_rate": 4.347312814241066e-05, "loss": 2.4673, "step": 66580 }, { "epoch": 4.524052181002854, "grad_norm": 3.4730513095855713, "learning_rate": 4.346888164152738e-05, "loss": 2.2759, "step": 66585 }, { "epoch": 4.524391901073516, "grad_norm": 2.875377655029297, "learning_rate": 4.346463514064411e-05, "loss": 2.6189, "step": 66590 }, { "epoch": 4.524731621144177, "grad_norm": 2.392192840576172, "learning_rate": 4.346038863976084e-05, "loss": 2.3003, "step": 66595 }, { "epoch": 4.525071341214839, "grad_norm": 3.2375314235687256, "learning_rate": 4.3456142138877564e-05, "loss": 2.6009, "step": 66600 }, { "epoch": 4.525411061285501, "grad_norm": 4.569032192230225, "learning_rate": 4.345189563799429e-05, "loss": 2.6152, "step": 66605 }, { "epoch": 4.525750781356162, "grad_norm": 3.5038180351257324, "learning_rate": 4.344764913711103e-05, "loss": 2.3897, "step": 66610 }, { "epoch": 4.5260905014268245, "grad_norm": 3.012092113494873, "learning_rate": 4.344340263622775e-05, "loss": 2.3109, "step": 66615 }, { "epoch": 4.5264302214974865, "grad_norm": 3.6455531120300293, "learning_rate": 4.3439156135344476e-05, "loss": 2.3976, "step": 66620 }, { "epoch": 4.526769941568148, "grad_norm": 2.2009427547454834, "learning_rate": 4.3434909634461204e-05, "loss": 2.3103, "step": 66625 }, { "epoch": 4.52710966163881, "grad_norm": 3.4739396572113037, "learning_rate": 4.343066313357793e-05, "loss": 2.3119, "step": 66630 }, { "epoch": 4.527449381709472, "grad_norm": 2.9718017578125, "learning_rate": 4.342641663269466e-05, "loss": 2.293, "step": 66635 }, { "epoch": 4.527789101780133, "grad_norm": 2.9434306621551514, "learning_rate": 4.342217013181139e-05, "loss": 2.2231, "step": 66640 }, { "epoch": 4.528128821850795, "grad_norm": 3.5196495056152344, "learning_rate": 4.3417923630928116e-05, "loss": 2.3275, "step": 66645 }, { "epoch": 4.528468541921457, "grad_norm": 3.234708786010742, "learning_rate": 4.3413677130044844e-05, "loss": 2.5467, "step": 66650 }, { "epoch": 4.528808261992118, "grad_norm": 2.5866503715515137, "learning_rate": 4.340943062916157e-05, "loss": 2.6303, "step": 66655 }, { "epoch": 4.5291479820627805, "grad_norm": 2.715026378631592, "learning_rate": 4.34051841282783e-05, "loss": 2.4243, "step": 66660 }, { "epoch": 4.5294877021334425, "grad_norm": 3.3201189041137695, "learning_rate": 4.340093762739503e-05, "loss": 2.3523, "step": 66665 }, { "epoch": 4.529827422204104, "grad_norm": 3.573594808578491, "learning_rate": 4.3396691126511756e-05, "loss": 2.5904, "step": 66670 }, { "epoch": 4.530167142274766, "grad_norm": 3.3605411052703857, "learning_rate": 4.3392444625628484e-05, "loss": 2.6834, "step": 66675 }, { "epoch": 4.530506862345427, "grad_norm": 2.968431234359741, "learning_rate": 4.338819812474521e-05, "loss": 2.5517, "step": 66680 }, { "epoch": 4.530846582416089, "grad_norm": 3.5991454124450684, "learning_rate": 4.338395162386194e-05, "loss": 2.5596, "step": 66685 }, { "epoch": 4.531186302486751, "grad_norm": 3.1575353145599365, "learning_rate": 4.337970512297867e-05, "loss": 2.3245, "step": 66690 }, { "epoch": 4.531526022557412, "grad_norm": 3.174375534057617, "learning_rate": 4.3375458622095396e-05, "loss": 2.5841, "step": 66695 }, { "epoch": 4.531865742628074, "grad_norm": 2.566822052001953, "learning_rate": 4.3371212121212124e-05, "loss": 2.5523, "step": 66700 }, { "epoch": 4.5322054626987365, "grad_norm": 2.9325485229492188, "learning_rate": 4.336696562032885e-05, "loss": 2.5069, "step": 66705 }, { "epoch": 4.532545182769398, "grad_norm": 3.114943504333496, "learning_rate": 4.336271911944558e-05, "loss": 2.3945, "step": 66710 }, { "epoch": 4.53288490284006, "grad_norm": 3.3553476333618164, "learning_rate": 4.335847261856231e-05, "loss": 2.4543, "step": 66715 }, { "epoch": 4.533224622910722, "grad_norm": 3.725999355316162, "learning_rate": 4.3354226117679036e-05, "loss": 2.5987, "step": 66720 }, { "epoch": 4.533564342981383, "grad_norm": 3.240295171737671, "learning_rate": 4.334997961679576e-05, "loss": 2.3801, "step": 66725 }, { "epoch": 4.533904063052045, "grad_norm": 2.636054515838623, "learning_rate": 4.334573311591249e-05, "loss": 2.4177, "step": 66730 }, { "epoch": 4.534243783122707, "grad_norm": 2.691713333129883, "learning_rate": 4.334148661502922e-05, "loss": 2.4725, "step": 66735 }, { "epoch": 4.534583503193368, "grad_norm": 3.3986871242523193, "learning_rate": 4.333724011414594e-05, "loss": 2.3665, "step": 66740 }, { "epoch": 4.53492322326403, "grad_norm": 3.093688726425171, "learning_rate": 4.3332993613262676e-05, "loss": 2.5544, "step": 66745 }, { "epoch": 4.5352629433346925, "grad_norm": 3.872105598449707, "learning_rate": 4.3328747112379404e-05, "loss": 2.416, "step": 66750 }, { "epoch": 4.535602663405354, "grad_norm": 3.2164571285247803, "learning_rate": 4.3324500611496126e-05, "loss": 2.3696, "step": 66755 }, { "epoch": 4.535942383476016, "grad_norm": 2.6583168506622314, "learning_rate": 4.3320254110612854e-05, "loss": 2.4207, "step": 66760 }, { "epoch": 4.536282103546678, "grad_norm": 3.0341546535491943, "learning_rate": 4.331600760972959e-05, "loss": 2.2328, "step": 66765 }, { "epoch": 4.536621823617339, "grad_norm": 2.5905494689941406, "learning_rate": 4.331176110884631e-05, "loss": 2.2426, "step": 66770 }, { "epoch": 4.536961543688001, "grad_norm": 3.4287307262420654, "learning_rate": 4.330751460796304e-05, "loss": 2.5792, "step": 66775 }, { "epoch": 4.537301263758663, "grad_norm": 3.844064235687256, "learning_rate": 4.330326810707977e-05, "loss": 2.3518, "step": 66780 }, { "epoch": 4.537640983829324, "grad_norm": 4.318403244018555, "learning_rate": 4.3299021606196494e-05, "loss": 2.6075, "step": 66785 }, { "epoch": 4.537980703899986, "grad_norm": 3.4659276008605957, "learning_rate": 4.329477510531322e-05, "loss": 2.3256, "step": 66790 }, { "epoch": 4.5383204239706485, "grad_norm": 3.5410139560699463, "learning_rate": 4.329052860442995e-05, "loss": 2.4494, "step": 66795 }, { "epoch": 4.53866014404131, "grad_norm": 3.1301109790802, "learning_rate": 4.328628210354668e-05, "loss": 2.3736, "step": 66800 }, { "epoch": 4.538999864111972, "grad_norm": 6.667054176330566, "learning_rate": 4.3282035602663406e-05, "loss": 2.4995, "step": 66805 }, { "epoch": 4.539339584182634, "grad_norm": 2.6777656078338623, "learning_rate": 4.3277789101780134e-05, "loss": 2.7823, "step": 66810 }, { "epoch": 4.539679304253295, "grad_norm": 3.271481513977051, "learning_rate": 4.327354260089686e-05, "loss": 2.6472, "step": 66815 }, { "epoch": 4.540019024323957, "grad_norm": 3.1960790157318115, "learning_rate": 4.326929610001359e-05, "loss": 2.5056, "step": 66820 }, { "epoch": 4.540358744394619, "grad_norm": 3.0526516437530518, "learning_rate": 4.326504959913032e-05, "loss": 2.572, "step": 66825 }, { "epoch": 4.54069846446528, "grad_norm": 2.2632617950439453, "learning_rate": 4.326080309824705e-05, "loss": 2.3915, "step": 66830 }, { "epoch": 4.541038184535942, "grad_norm": 3.5245437622070312, "learning_rate": 4.3256556597363774e-05, "loss": 2.5813, "step": 66835 }, { "epoch": 4.5413779046066045, "grad_norm": 3.1848011016845703, "learning_rate": 4.32523100964805e-05, "loss": 2.4559, "step": 66840 }, { "epoch": 4.541717624677266, "grad_norm": 3.561872959136963, "learning_rate": 4.324806359559723e-05, "loss": 2.3694, "step": 66845 }, { "epoch": 4.542057344747928, "grad_norm": 3.689143657684326, "learning_rate": 4.324381709471396e-05, "loss": 2.6583, "step": 66850 }, { "epoch": 4.54239706481859, "grad_norm": 2.910653591156006, "learning_rate": 4.3239570593830686e-05, "loss": 2.496, "step": 66855 }, { "epoch": 4.542736784889251, "grad_norm": 2.2889366149902344, "learning_rate": 4.3235324092947414e-05, "loss": 2.4066, "step": 66860 }, { "epoch": 4.543076504959913, "grad_norm": 3.9463911056518555, "learning_rate": 4.323107759206414e-05, "loss": 2.5219, "step": 66865 }, { "epoch": 4.543416225030575, "grad_norm": 3.607266902923584, "learning_rate": 4.322683109118087e-05, "loss": 2.5193, "step": 66870 }, { "epoch": 4.543755945101236, "grad_norm": 2.70233154296875, "learning_rate": 4.32225845902976e-05, "loss": 2.2881, "step": 66875 }, { "epoch": 4.5440956651718984, "grad_norm": 2.9646897315979004, "learning_rate": 4.3218338089414326e-05, "loss": 2.6079, "step": 66880 }, { "epoch": 4.54443538524256, "grad_norm": 3.1546123027801514, "learning_rate": 4.3214091588531054e-05, "loss": 2.4061, "step": 66885 }, { "epoch": 4.544775105313222, "grad_norm": 2.8707737922668457, "learning_rate": 4.320984508764778e-05, "loss": 2.3264, "step": 66890 }, { "epoch": 4.545114825383884, "grad_norm": 3.270385265350342, "learning_rate": 4.32055985867645e-05, "loss": 2.437, "step": 66895 }, { "epoch": 4.545454545454545, "grad_norm": 3.270390510559082, "learning_rate": 4.320135208588124e-05, "loss": 2.2864, "step": 66900 }, { "epoch": 4.545794265525207, "grad_norm": 3.1458005905151367, "learning_rate": 4.3197105584997966e-05, "loss": 2.4631, "step": 66905 }, { "epoch": 4.546133985595869, "grad_norm": 3.6149585247039795, "learning_rate": 4.319285908411469e-05, "loss": 2.4592, "step": 66910 }, { "epoch": 4.54647370566653, "grad_norm": 2.348893165588379, "learning_rate": 4.318861258323142e-05, "loss": 2.4731, "step": 66915 }, { "epoch": 4.546813425737192, "grad_norm": 2.499234676361084, "learning_rate": 4.318436608234815e-05, "loss": 2.3819, "step": 66920 }, { "epoch": 4.5471531458078545, "grad_norm": 2.6511948108673096, "learning_rate": 4.318011958146487e-05, "loss": 2.6729, "step": 66925 }, { "epoch": 4.547492865878516, "grad_norm": 3.0577383041381836, "learning_rate": 4.3175873080581606e-05, "loss": 2.5304, "step": 66930 }, { "epoch": 4.547832585949178, "grad_norm": 2.982583522796631, "learning_rate": 4.3171626579698334e-05, "loss": 2.4676, "step": 66935 }, { "epoch": 4.54817230601984, "grad_norm": 3.277073621749878, "learning_rate": 4.3167380078815055e-05, "loss": 2.3687, "step": 66940 }, { "epoch": 4.548512026090501, "grad_norm": 3.3565080165863037, "learning_rate": 4.316313357793178e-05, "loss": 2.4459, "step": 66945 }, { "epoch": 4.548851746161163, "grad_norm": 2.9754340648651123, "learning_rate": 4.315888707704852e-05, "loss": 2.5341, "step": 66950 }, { "epoch": 4.549191466231825, "grad_norm": 3.2111263275146484, "learning_rate": 4.315464057616524e-05, "loss": 2.4607, "step": 66955 }, { "epoch": 4.549531186302486, "grad_norm": 3.0523526668548584, "learning_rate": 4.315039407528197e-05, "loss": 2.6204, "step": 66960 }, { "epoch": 4.549870906373148, "grad_norm": 2.9750640392303467, "learning_rate": 4.31461475743987e-05, "loss": 2.2953, "step": 66965 }, { "epoch": 4.5502106264438105, "grad_norm": 2.80527925491333, "learning_rate": 4.314190107351542e-05, "loss": 2.3702, "step": 66970 }, { "epoch": 4.550550346514472, "grad_norm": 2.832482099533081, "learning_rate": 4.313765457263215e-05, "loss": 2.35, "step": 66975 }, { "epoch": 4.550890066585134, "grad_norm": 2.6601202487945557, "learning_rate": 4.313340807174888e-05, "loss": 2.4757, "step": 66980 }, { "epoch": 4.551229786655796, "grad_norm": 2.869985342025757, "learning_rate": 4.312916157086561e-05, "loss": 2.6438, "step": 66985 }, { "epoch": 4.551569506726457, "grad_norm": 3.651594638824463, "learning_rate": 4.3124915069982335e-05, "loss": 2.3704, "step": 66990 }, { "epoch": 4.551909226797119, "grad_norm": 3.3454153537750244, "learning_rate": 4.312066856909906e-05, "loss": 2.4792, "step": 66995 }, { "epoch": 4.552248946867781, "grad_norm": 3.994419574737549, "learning_rate": 4.31164220682158e-05, "loss": 2.3147, "step": 67000 }, { "epoch": 4.552588666938442, "grad_norm": 3.592529535293579, "learning_rate": 4.311217556733252e-05, "loss": 2.2467, "step": 67005 }, { "epoch": 4.552928387009104, "grad_norm": 2.5238723754882812, "learning_rate": 4.310792906644925e-05, "loss": 2.4109, "step": 67010 }, { "epoch": 4.5532681070797665, "grad_norm": 3.1380367279052734, "learning_rate": 4.3103682565565975e-05, "loss": 2.2389, "step": 67015 }, { "epoch": 4.553607827150428, "grad_norm": 3.2935423851013184, "learning_rate": 4.30994360646827e-05, "loss": 2.3405, "step": 67020 }, { "epoch": 4.55394754722109, "grad_norm": 3.377714157104492, "learning_rate": 4.309518956379943e-05, "loss": 2.6822, "step": 67025 }, { "epoch": 4.554287267291752, "grad_norm": 3.128735303878784, "learning_rate": 4.309094306291616e-05, "loss": 2.5953, "step": 67030 }, { "epoch": 4.554626987362413, "grad_norm": 3.202763795852661, "learning_rate": 4.308669656203289e-05, "loss": 2.2458, "step": 67035 }, { "epoch": 4.554966707433075, "grad_norm": 2.850090980529785, "learning_rate": 4.3082450061149615e-05, "loss": 2.4426, "step": 67040 }, { "epoch": 4.555306427503737, "grad_norm": 3.152310371398926, "learning_rate": 4.307820356026634e-05, "loss": 2.4861, "step": 67045 }, { "epoch": 4.555646147574398, "grad_norm": 3.1667492389678955, "learning_rate": 4.307395705938307e-05, "loss": 2.388, "step": 67050 }, { "epoch": 4.55598586764506, "grad_norm": 3.093350410461426, "learning_rate": 4.30697105584998e-05, "loss": 2.492, "step": 67055 }, { "epoch": 4.5563255877157225, "grad_norm": 3.1934666633605957, "learning_rate": 4.306546405761653e-05, "loss": 2.4381, "step": 67060 }, { "epoch": 4.556665307786384, "grad_norm": 3.2741570472717285, "learning_rate": 4.3061217556733255e-05, "loss": 2.5839, "step": 67065 }, { "epoch": 4.557005027857046, "grad_norm": 3.517444133758545, "learning_rate": 4.305697105584998e-05, "loss": 2.4291, "step": 67070 }, { "epoch": 4.557344747927708, "grad_norm": 3.6304235458374023, "learning_rate": 4.305272455496671e-05, "loss": 2.7815, "step": 67075 }, { "epoch": 4.557684467998369, "grad_norm": 3.2542691230773926, "learning_rate": 4.304847805408343e-05, "loss": 2.3293, "step": 67080 }, { "epoch": 4.558024188069031, "grad_norm": 2.8799455165863037, "learning_rate": 4.304423155320017e-05, "loss": 2.723, "step": 67085 }, { "epoch": 4.558363908139693, "grad_norm": 2.491896867752075, "learning_rate": 4.3039985052316895e-05, "loss": 2.5369, "step": 67090 }, { "epoch": 4.558703628210354, "grad_norm": 2.654775857925415, "learning_rate": 4.3035738551433617e-05, "loss": 2.4949, "step": 67095 }, { "epoch": 4.559043348281016, "grad_norm": 3.5080575942993164, "learning_rate": 4.303149205055035e-05, "loss": 2.252, "step": 67100 }, { "epoch": 4.5593830683516785, "grad_norm": 3.6550841331481934, "learning_rate": 4.302724554966708e-05, "loss": 2.408, "step": 67105 }, { "epoch": 4.55972278842234, "grad_norm": 2.9946701526641846, "learning_rate": 4.30229990487838e-05, "loss": 2.276, "step": 67110 }, { "epoch": 4.560062508493002, "grad_norm": 2.85237717628479, "learning_rate": 4.301875254790053e-05, "loss": 2.4141, "step": 67115 }, { "epoch": 4.560402228563664, "grad_norm": 3.746809959411621, "learning_rate": 4.301450604701726e-05, "loss": 2.5963, "step": 67120 }, { "epoch": 4.560741948634325, "grad_norm": 3.8372957706451416, "learning_rate": 4.3010259546133985e-05, "loss": 2.3355, "step": 67125 }, { "epoch": 4.561081668704987, "grad_norm": 2.858489990234375, "learning_rate": 4.300601304525071e-05, "loss": 2.4198, "step": 67130 }, { "epoch": 4.561421388775649, "grad_norm": 3.1570351123809814, "learning_rate": 4.300176654436745e-05, "loss": 2.4196, "step": 67135 }, { "epoch": 4.56176110884631, "grad_norm": 2.664578437805176, "learning_rate": 4.299752004348417e-05, "loss": 2.5177, "step": 67140 }, { "epoch": 4.562100828916972, "grad_norm": 2.909219741821289, "learning_rate": 4.29932735426009e-05, "loss": 2.3641, "step": 67145 }, { "epoch": 4.5624405489876345, "grad_norm": 3.9220359325408936, "learning_rate": 4.2989027041717625e-05, "loss": 2.547, "step": 67150 }, { "epoch": 4.562780269058296, "grad_norm": 2.9031331539154053, "learning_rate": 4.298478054083435e-05, "loss": 2.493, "step": 67155 }, { "epoch": 4.563119989128958, "grad_norm": 3.3449432849884033, "learning_rate": 4.298053403995108e-05, "loss": 2.347, "step": 67160 }, { "epoch": 4.56345970919962, "grad_norm": 3.100219488143921, "learning_rate": 4.297628753906781e-05, "loss": 2.5464, "step": 67165 }, { "epoch": 4.563799429270281, "grad_norm": 2.6298177242279053, "learning_rate": 4.2972041038184543e-05, "loss": 2.6278, "step": 67170 }, { "epoch": 4.564139149340943, "grad_norm": 3.4396915435791016, "learning_rate": 4.2967794537301265e-05, "loss": 2.475, "step": 67175 }, { "epoch": 4.564478869411605, "grad_norm": 3.2910726070404053, "learning_rate": 4.296354803641799e-05, "loss": 2.3735, "step": 67180 }, { "epoch": 4.564818589482266, "grad_norm": 4.317576885223389, "learning_rate": 4.295930153553472e-05, "loss": 2.1842, "step": 67185 }, { "epoch": 4.5651583095529285, "grad_norm": 3.1954357624053955, "learning_rate": 4.295505503465145e-05, "loss": 2.3462, "step": 67190 }, { "epoch": 4.5654980296235905, "grad_norm": 3.06119441986084, "learning_rate": 4.295080853376818e-05, "loss": 2.2731, "step": 67195 }, { "epoch": 4.565837749694252, "grad_norm": 4.075395107269287, "learning_rate": 4.2946562032884905e-05, "loss": 2.5188, "step": 67200 }, { "epoch": 4.566177469764914, "grad_norm": 3.0413382053375244, "learning_rate": 4.294231553200163e-05, "loss": 2.3981, "step": 67205 }, { "epoch": 4.566517189835576, "grad_norm": 2.796626329421997, "learning_rate": 4.293806903111836e-05, "loss": 2.4916, "step": 67210 }, { "epoch": 4.566856909906237, "grad_norm": 3.170184373855591, "learning_rate": 4.293382253023509e-05, "loss": 2.0186, "step": 67215 }, { "epoch": 4.567196629976899, "grad_norm": 3.015333652496338, "learning_rate": 4.292957602935182e-05, "loss": 2.3025, "step": 67220 }, { "epoch": 4.567536350047561, "grad_norm": 3.180250406265259, "learning_rate": 4.2925329528468545e-05, "loss": 2.4446, "step": 67225 }, { "epoch": 4.567876070118222, "grad_norm": 3.4388389587402344, "learning_rate": 4.292108302758527e-05, "loss": 2.2034, "step": 67230 }, { "epoch": 4.5682157901888845, "grad_norm": 2.776155948638916, "learning_rate": 4.2916836526702e-05, "loss": 2.6088, "step": 67235 }, { "epoch": 4.5685555102595465, "grad_norm": 3.5289831161499023, "learning_rate": 4.291259002581873e-05, "loss": 2.443, "step": 67240 }, { "epoch": 4.568895230330208, "grad_norm": 2.4925174713134766, "learning_rate": 4.290834352493546e-05, "loss": 2.4904, "step": 67245 }, { "epoch": 4.56923495040087, "grad_norm": 3.5794169902801514, "learning_rate": 4.290409702405218e-05, "loss": 2.611, "step": 67250 }, { "epoch": 4.569574670471532, "grad_norm": 3.120354652404785, "learning_rate": 4.289985052316891e-05, "loss": 2.3148, "step": 67255 }, { "epoch": 4.569914390542193, "grad_norm": 3.560220956802368, "learning_rate": 4.289560402228564e-05, "loss": 2.3939, "step": 67260 }, { "epoch": 4.570254110612855, "grad_norm": 3.7383389472961426, "learning_rate": 4.289135752140236e-05, "loss": 2.3084, "step": 67265 }, { "epoch": 4.570593830683517, "grad_norm": 2.8902578353881836, "learning_rate": 4.28871110205191e-05, "loss": 2.4959, "step": 67270 }, { "epoch": 4.570933550754178, "grad_norm": 3.0269253253936768, "learning_rate": 4.2882864519635825e-05, "loss": 2.4826, "step": 67275 }, { "epoch": 4.5712732708248405, "grad_norm": 2.888411283493042, "learning_rate": 4.2878618018752546e-05, "loss": 2.2982, "step": 67280 }, { "epoch": 4.5716129908955025, "grad_norm": 3.300485849380493, "learning_rate": 4.2874371517869274e-05, "loss": 2.4107, "step": 67285 }, { "epoch": 4.571952710966164, "grad_norm": 3.0970964431762695, "learning_rate": 4.287012501698601e-05, "loss": 2.6351, "step": 67290 }, { "epoch": 4.572292431036826, "grad_norm": 3.4809744358062744, "learning_rate": 4.286587851610273e-05, "loss": 2.2031, "step": 67295 }, { "epoch": 4.572632151107488, "grad_norm": 3.8927483558654785, "learning_rate": 4.286163201521946e-05, "loss": 2.4265, "step": 67300 }, { "epoch": 4.572971871178149, "grad_norm": 3.3234035968780518, "learning_rate": 4.285738551433619e-05, "loss": 2.4097, "step": 67305 }, { "epoch": 4.573311591248811, "grad_norm": 2.7840044498443604, "learning_rate": 4.2853139013452914e-05, "loss": 2.6367, "step": 67310 }, { "epoch": 4.573651311319473, "grad_norm": 2.93753981590271, "learning_rate": 4.284889251256964e-05, "loss": 2.5492, "step": 67315 }, { "epoch": 4.573991031390134, "grad_norm": 2.9757943153381348, "learning_rate": 4.284464601168637e-05, "loss": 2.4523, "step": 67320 }, { "epoch": 4.5743307514607965, "grad_norm": 3.9119560718536377, "learning_rate": 4.28403995108031e-05, "loss": 2.3853, "step": 67325 }, { "epoch": 4.574670471531459, "grad_norm": 2.733121633529663, "learning_rate": 4.2836153009919826e-05, "loss": 2.4486, "step": 67330 }, { "epoch": 4.57501019160212, "grad_norm": 3.276776075363159, "learning_rate": 4.2831906509036554e-05, "loss": 2.5849, "step": 67335 }, { "epoch": 4.575349911672782, "grad_norm": 2.4825923442840576, "learning_rate": 4.282766000815329e-05, "loss": 2.1708, "step": 67340 }, { "epoch": 4.575689631743444, "grad_norm": 3.0455358028411865, "learning_rate": 4.282341350727001e-05, "loss": 2.41, "step": 67345 }, { "epoch": 4.576029351814105, "grad_norm": 2.8527512550354004, "learning_rate": 4.281916700638674e-05, "loss": 2.5767, "step": 67350 }, { "epoch": 4.576369071884767, "grad_norm": 3.31322979927063, "learning_rate": 4.281492050550347e-05, "loss": 2.4362, "step": 67355 }, { "epoch": 4.576708791955428, "grad_norm": 3.323099374771118, "learning_rate": 4.2810674004620194e-05, "loss": 2.478, "step": 67360 }, { "epoch": 4.57704851202609, "grad_norm": 3.1086783409118652, "learning_rate": 4.280642750373692e-05, "loss": 2.2931, "step": 67365 }, { "epoch": 4.5773882320967525, "grad_norm": 3.073577880859375, "learning_rate": 4.280218100285365e-05, "loss": 2.4385, "step": 67370 }, { "epoch": 4.577727952167414, "grad_norm": 2.9741742610931396, "learning_rate": 4.279793450197038e-05, "loss": 2.4061, "step": 67375 }, { "epoch": 4.578067672238076, "grad_norm": 3.4049155712127686, "learning_rate": 4.2793688001087106e-05, "loss": 2.5149, "step": 67380 }, { "epoch": 4.578407392308738, "grad_norm": 3.2860238552093506, "learning_rate": 4.2789441500203834e-05, "loss": 2.6243, "step": 67385 }, { "epoch": 4.578747112379399, "grad_norm": 3.820668935775757, "learning_rate": 4.278519499932056e-05, "loss": 2.2757, "step": 67390 }, { "epoch": 4.579086832450061, "grad_norm": 2.4525113105773926, "learning_rate": 4.278094849843729e-05, "loss": 2.4283, "step": 67395 }, { "epoch": 4.579426552520723, "grad_norm": 5.1292195320129395, "learning_rate": 4.277670199755402e-05, "loss": 2.4922, "step": 67400 }, { "epoch": 4.579766272591384, "grad_norm": 3.7721786499023438, "learning_rate": 4.2772455496670746e-05, "loss": 2.5639, "step": 67405 }, { "epoch": 4.580105992662046, "grad_norm": 3.263798475265503, "learning_rate": 4.2768208995787474e-05, "loss": 2.7174, "step": 67410 }, { "epoch": 4.5804457127327085, "grad_norm": 3.213317632675171, "learning_rate": 4.27639624949042e-05, "loss": 2.4415, "step": 67415 }, { "epoch": 4.58078543280337, "grad_norm": 4.14750862121582, "learning_rate": 4.2759715994020924e-05, "loss": 2.2874, "step": 67420 }, { "epoch": 4.581125152874032, "grad_norm": 2.709697723388672, "learning_rate": 4.275546949313766e-05, "loss": 2.4977, "step": 67425 }, { "epoch": 4.581464872944694, "grad_norm": 3.340822458267212, "learning_rate": 4.2751222992254386e-05, "loss": 2.2922, "step": 67430 }, { "epoch": 4.581804593015355, "grad_norm": 3.1681149005889893, "learning_rate": 4.274697649137111e-05, "loss": 2.4673, "step": 67435 }, { "epoch": 4.582144313086017, "grad_norm": 3.6564536094665527, "learning_rate": 4.274272999048784e-05, "loss": 2.1392, "step": 67440 }, { "epoch": 4.582484033156679, "grad_norm": 3.162558078765869, "learning_rate": 4.273848348960457e-05, "loss": 2.472, "step": 67445 }, { "epoch": 4.58282375322734, "grad_norm": 3.507354497909546, "learning_rate": 4.273423698872129e-05, "loss": 2.3823, "step": 67450 }, { "epoch": 4.583163473298002, "grad_norm": 3.068542242050171, "learning_rate": 4.2729990487838026e-05, "loss": 2.1588, "step": 67455 }, { "epoch": 4.5835031933686645, "grad_norm": 3.207062244415283, "learning_rate": 4.2725743986954754e-05, "loss": 2.5183, "step": 67460 }, { "epoch": 4.583842913439326, "grad_norm": 2.7020654678344727, "learning_rate": 4.2721497486071476e-05, "loss": 2.2182, "step": 67465 }, { "epoch": 4.584182633509988, "grad_norm": 2.762219190597534, "learning_rate": 4.2717250985188204e-05, "loss": 2.4703, "step": 67470 }, { "epoch": 4.58452235358065, "grad_norm": 3.7419166564941406, "learning_rate": 4.271300448430494e-05, "loss": 2.4514, "step": 67475 }, { "epoch": 4.584862073651311, "grad_norm": 3.09916353225708, "learning_rate": 4.270875798342166e-05, "loss": 2.1682, "step": 67480 }, { "epoch": 4.585201793721973, "grad_norm": 2.451810598373413, "learning_rate": 4.270451148253839e-05, "loss": 2.377, "step": 67485 }, { "epoch": 4.585541513792635, "grad_norm": 3.9378793239593506, "learning_rate": 4.270026498165512e-05, "loss": 2.5707, "step": 67490 }, { "epoch": 4.585881233863296, "grad_norm": 3.4062013626098633, "learning_rate": 4.2696018480771844e-05, "loss": 2.3763, "step": 67495 }, { "epoch": 4.5862209539339585, "grad_norm": 3.274378538131714, "learning_rate": 4.269177197988857e-05, "loss": 2.4229, "step": 67500 }, { "epoch": 4.5865606740046205, "grad_norm": 2.3517494201660156, "learning_rate": 4.26875254790053e-05, "loss": 2.4094, "step": 67505 }, { "epoch": 4.586900394075282, "grad_norm": 3.403324604034424, "learning_rate": 4.2683278978122034e-05, "loss": 2.5036, "step": 67510 }, { "epoch": 4.587240114145944, "grad_norm": 3.1006839275360107, "learning_rate": 4.2679032477238756e-05, "loss": 2.4601, "step": 67515 }, { "epoch": 4.587579834216606, "grad_norm": 3.696821689605713, "learning_rate": 4.2674785976355484e-05, "loss": 2.5442, "step": 67520 }, { "epoch": 4.587919554287267, "grad_norm": 3.3085525035858154, "learning_rate": 4.267053947547222e-05, "loss": 2.4657, "step": 67525 }, { "epoch": 4.588259274357929, "grad_norm": 3.06207013130188, "learning_rate": 4.266629297458894e-05, "loss": 2.4842, "step": 67530 }, { "epoch": 4.588598994428591, "grad_norm": 3.462632179260254, "learning_rate": 4.266204647370567e-05, "loss": 2.4229, "step": 67535 }, { "epoch": 4.588938714499252, "grad_norm": 3.4593708515167236, "learning_rate": 4.2657799972822396e-05, "loss": 2.3047, "step": 67540 }, { "epoch": 4.5892784345699145, "grad_norm": 3.8873679637908936, "learning_rate": 4.2653553471939124e-05, "loss": 2.5176, "step": 67545 }, { "epoch": 4.5896181546405765, "grad_norm": 3.9857568740844727, "learning_rate": 4.264930697105585e-05, "loss": 2.3031, "step": 67550 }, { "epoch": 4.589957874711238, "grad_norm": 3.6484904289245605, "learning_rate": 4.264506047017258e-05, "loss": 2.2908, "step": 67555 }, { "epoch": 4.5902975947819, "grad_norm": 2.733091115951538, "learning_rate": 4.264081396928931e-05, "loss": 2.5999, "step": 67560 }, { "epoch": 4.590637314852561, "grad_norm": 3.6280434131622314, "learning_rate": 4.2636567468406036e-05, "loss": 2.3282, "step": 67565 }, { "epoch": 4.590977034923223, "grad_norm": 3.1863272190093994, "learning_rate": 4.2632320967522764e-05, "loss": 2.0795, "step": 67570 }, { "epoch": 4.591316754993885, "grad_norm": 2.576263427734375, "learning_rate": 4.262807446663949e-05, "loss": 2.5001, "step": 67575 }, { "epoch": 4.591656475064546, "grad_norm": 3.8700404167175293, "learning_rate": 4.262382796575622e-05, "loss": 2.3256, "step": 67580 }, { "epoch": 4.591996195135208, "grad_norm": 3.2200145721435547, "learning_rate": 4.261958146487295e-05, "loss": 2.3951, "step": 67585 }, { "epoch": 4.5923359152058705, "grad_norm": 3.0661604404449463, "learning_rate": 4.2615334963989676e-05, "loss": 2.4857, "step": 67590 }, { "epoch": 4.592675635276532, "grad_norm": 2.956289291381836, "learning_rate": 4.2611088463106404e-05, "loss": 2.2134, "step": 67595 }, { "epoch": 4.593015355347194, "grad_norm": 2.661733627319336, "learning_rate": 4.260684196222313e-05, "loss": 2.1947, "step": 67600 }, { "epoch": 4.593355075417856, "grad_norm": 3.647939920425415, "learning_rate": 4.260259546133985e-05, "loss": 2.398, "step": 67605 }, { "epoch": 4.593694795488517, "grad_norm": 3.0664443969726562, "learning_rate": 4.259834896045659e-05, "loss": 2.459, "step": 67610 }, { "epoch": 4.594034515559179, "grad_norm": 3.454315185546875, "learning_rate": 4.2594102459573316e-05, "loss": 2.3615, "step": 67615 }, { "epoch": 4.594374235629841, "grad_norm": 4.018108367919922, "learning_rate": 4.258985595869004e-05, "loss": 2.1973, "step": 67620 }, { "epoch": 4.594713955700502, "grad_norm": 3.9123668670654297, "learning_rate": 4.258560945780677e-05, "loss": 2.4174, "step": 67625 }, { "epoch": 4.595053675771164, "grad_norm": 2.835599899291992, "learning_rate": 4.25813629569235e-05, "loss": 2.6053, "step": 67630 }, { "epoch": 4.5953933958418265, "grad_norm": 2.766754627227783, "learning_rate": 4.257711645604022e-05, "loss": 2.3685, "step": 67635 }, { "epoch": 4.595733115912488, "grad_norm": 3.559488296508789, "learning_rate": 4.257286995515695e-05, "loss": 2.2505, "step": 67640 }, { "epoch": 4.59607283598315, "grad_norm": 3.3684608936309814, "learning_rate": 4.2568623454273684e-05, "loss": 2.8133, "step": 67645 }, { "epoch": 4.596412556053812, "grad_norm": 3.620175361633301, "learning_rate": 4.2564376953390405e-05, "loss": 2.4933, "step": 67650 }, { "epoch": 4.596752276124473, "grad_norm": 3.398920774459839, "learning_rate": 4.256013045250713e-05, "loss": 2.5874, "step": 67655 }, { "epoch": 4.597091996195135, "grad_norm": 3.6691064834594727, "learning_rate": 4.255588395162387e-05, "loss": 2.3072, "step": 67660 }, { "epoch": 4.597431716265797, "grad_norm": 3.3319056034088135, "learning_rate": 4.255163745074059e-05, "loss": 2.4124, "step": 67665 }, { "epoch": 4.597771436336458, "grad_norm": 2.338711738586426, "learning_rate": 4.254739094985732e-05, "loss": 2.5083, "step": 67670 }, { "epoch": 4.59811115640712, "grad_norm": 3.3531458377838135, "learning_rate": 4.2543144448974045e-05, "loss": 2.5838, "step": 67675 }, { "epoch": 4.5984508764777825, "grad_norm": 2.9075379371643066, "learning_rate": 4.253889794809078e-05, "loss": 2.356, "step": 67680 }, { "epoch": 4.598790596548444, "grad_norm": 3.249197006225586, "learning_rate": 4.25346514472075e-05, "loss": 2.25, "step": 67685 }, { "epoch": 4.599130316619106, "grad_norm": 3.082179546356201, "learning_rate": 4.253040494632423e-05, "loss": 2.5981, "step": 67690 }, { "epoch": 4.599470036689768, "grad_norm": 3.014117479324341, "learning_rate": 4.2526158445440964e-05, "loss": 2.249, "step": 67695 }, { "epoch": 4.599809756760429, "grad_norm": 2.9429426193237305, "learning_rate": 4.2521911944557685e-05, "loss": 2.4513, "step": 67700 }, { "epoch": 4.600149476831091, "grad_norm": 3.3372912406921387, "learning_rate": 4.251766544367441e-05, "loss": 2.6474, "step": 67705 }, { "epoch": 4.600489196901753, "grad_norm": 3.295316696166992, "learning_rate": 4.251341894279114e-05, "loss": 2.1686, "step": 67710 }, { "epoch": 4.600828916972414, "grad_norm": 3.193591356277466, "learning_rate": 4.250917244190787e-05, "loss": 2.1568, "step": 67715 }, { "epoch": 4.601168637043076, "grad_norm": 3.756601095199585, "learning_rate": 4.25049259410246e-05, "loss": 2.5802, "step": 67720 }, { "epoch": 4.6015083571137385, "grad_norm": 2.814937114715576, "learning_rate": 4.2500679440141325e-05, "loss": 2.106, "step": 67725 }, { "epoch": 4.6018480771844, "grad_norm": 3.2326555252075195, "learning_rate": 4.249643293925805e-05, "loss": 2.3444, "step": 67730 }, { "epoch": 4.602187797255062, "grad_norm": 3.5066964626312256, "learning_rate": 4.249218643837478e-05, "loss": 2.4241, "step": 67735 }, { "epoch": 4.602527517325724, "grad_norm": 3.6485495567321777, "learning_rate": 4.248793993749151e-05, "loss": 2.6455, "step": 67740 }, { "epoch": 4.602867237396385, "grad_norm": 2.9663619995117188, "learning_rate": 4.248369343660824e-05, "loss": 2.4828, "step": 67745 }, { "epoch": 4.603206957467047, "grad_norm": 4.123529434204102, "learning_rate": 4.2479446935724965e-05, "loss": 2.5247, "step": 67750 }, { "epoch": 4.603546677537709, "grad_norm": 3.6762773990631104, "learning_rate": 4.247520043484169e-05, "loss": 2.2834, "step": 67755 }, { "epoch": 4.60388639760837, "grad_norm": 3.0868630409240723, "learning_rate": 4.247095393395842e-05, "loss": 2.5196, "step": 67760 }, { "epoch": 4.6042261176790324, "grad_norm": 3.5186381340026855, "learning_rate": 4.246670743307515e-05, "loss": 2.4228, "step": 67765 }, { "epoch": 4.6045658377496945, "grad_norm": 3.2567155361175537, "learning_rate": 4.246246093219188e-05, "loss": 2.6791, "step": 67770 }, { "epoch": 4.604905557820356, "grad_norm": 3.5479652881622314, "learning_rate": 4.24582144313086e-05, "loss": 2.4035, "step": 67775 }, { "epoch": 4.605245277891018, "grad_norm": 2.9693846702575684, "learning_rate": 4.245396793042533e-05, "loss": 2.5481, "step": 67780 }, { "epoch": 4.60558499796168, "grad_norm": 4.174236297607422, "learning_rate": 4.244972142954206e-05, "loss": 2.6608, "step": 67785 }, { "epoch": 4.605924718032341, "grad_norm": 2.5771713256835938, "learning_rate": 4.244547492865878e-05, "loss": 2.3667, "step": 67790 }, { "epoch": 4.606264438103003, "grad_norm": 2.1562962532043457, "learning_rate": 4.244122842777552e-05, "loss": 2.4284, "step": 67795 }, { "epoch": 4.606604158173665, "grad_norm": 3.4886460304260254, "learning_rate": 4.2436981926892245e-05, "loss": 2.3128, "step": 67800 }, { "epoch": 4.606943878244326, "grad_norm": 2.7726266384124756, "learning_rate": 4.2432735426008967e-05, "loss": 2.3854, "step": 67805 }, { "epoch": 4.6072835983149885, "grad_norm": 3.2829840183258057, "learning_rate": 4.2428488925125695e-05, "loss": 2.3404, "step": 67810 }, { "epoch": 4.6076233183856505, "grad_norm": 2.7525362968444824, "learning_rate": 4.242424242424243e-05, "loss": 2.3536, "step": 67815 }, { "epoch": 4.607963038456312, "grad_norm": 3.102130651473999, "learning_rate": 4.241999592335915e-05, "loss": 2.1555, "step": 67820 }, { "epoch": 4.608302758526974, "grad_norm": 3.40824031829834, "learning_rate": 4.241574942247588e-05, "loss": 2.6599, "step": 67825 }, { "epoch": 4.608642478597636, "grad_norm": 3.3591535091400146, "learning_rate": 4.241150292159261e-05, "loss": 2.7534, "step": 67830 }, { "epoch": 4.608982198668297, "grad_norm": 3.562364101409912, "learning_rate": 4.2407256420709335e-05, "loss": 2.4777, "step": 67835 }, { "epoch": 4.609321918738959, "grad_norm": 3.0471811294555664, "learning_rate": 4.240300991982606e-05, "loss": 2.3505, "step": 67840 }, { "epoch": 4.609661638809621, "grad_norm": 3.9748053550720215, "learning_rate": 4.23987634189428e-05, "loss": 2.4308, "step": 67845 }, { "epoch": 4.610001358880282, "grad_norm": 3.8183677196502686, "learning_rate": 4.2394516918059525e-05, "loss": 2.5168, "step": 67850 }, { "epoch": 4.6103410789509445, "grad_norm": 2.885206699371338, "learning_rate": 4.239027041717625e-05, "loss": 2.4307, "step": 67855 }, { "epoch": 4.6106807990216065, "grad_norm": 3.4017844200134277, "learning_rate": 4.2386023916292975e-05, "loss": 2.4452, "step": 67860 }, { "epoch": 4.611020519092268, "grad_norm": 3.197298765182495, "learning_rate": 4.238177741540971e-05, "loss": 2.186, "step": 67865 }, { "epoch": 4.61136023916293, "grad_norm": 2.70786190032959, "learning_rate": 4.237753091452643e-05, "loss": 2.1118, "step": 67870 }, { "epoch": 4.611699959233592, "grad_norm": 3.376291036605835, "learning_rate": 4.237328441364316e-05, "loss": 2.3813, "step": 67875 }, { "epoch": 4.612039679304253, "grad_norm": 2.6768405437469482, "learning_rate": 4.2369037912759893e-05, "loss": 2.4757, "step": 67880 }, { "epoch": 4.612379399374915, "grad_norm": 2.350964069366455, "learning_rate": 4.2364791411876615e-05, "loss": 2.4008, "step": 67885 }, { "epoch": 4.612719119445577, "grad_norm": 3.0943443775177, "learning_rate": 4.236054491099334e-05, "loss": 2.5043, "step": 67890 }, { "epoch": 4.613058839516238, "grad_norm": 3.9768805503845215, "learning_rate": 4.235629841011007e-05, "loss": 2.3263, "step": 67895 }, { "epoch": 4.6133985595869005, "grad_norm": 2.844957113265991, "learning_rate": 4.23520519092268e-05, "loss": 2.5896, "step": 67900 }, { "epoch": 4.613738279657563, "grad_norm": 2.322079658508301, "learning_rate": 4.234780540834353e-05, "loss": 2.3226, "step": 67905 }, { "epoch": 4.614077999728224, "grad_norm": 4.0605926513671875, "learning_rate": 4.2343558907460255e-05, "loss": 2.2612, "step": 67910 }, { "epoch": 4.614417719798886, "grad_norm": 3.788835048675537, "learning_rate": 4.233931240657698e-05, "loss": 2.498, "step": 67915 }, { "epoch": 4.614757439869548, "grad_norm": 3.0420920848846436, "learning_rate": 4.233506590569371e-05, "loss": 2.4729, "step": 67920 }, { "epoch": 4.615097159940209, "grad_norm": 3.5657520294189453, "learning_rate": 4.233081940481044e-05, "loss": 2.6575, "step": 67925 }, { "epoch": 4.615436880010871, "grad_norm": 2.7191836833953857, "learning_rate": 4.232657290392717e-05, "loss": 2.3696, "step": 67930 }, { "epoch": 4.615776600081533, "grad_norm": 3.1745779514312744, "learning_rate": 4.2322326403043895e-05, "loss": 2.3666, "step": 67935 }, { "epoch": 4.616116320152194, "grad_norm": 2.9781460762023926, "learning_rate": 4.231807990216062e-05, "loss": 2.347, "step": 67940 }, { "epoch": 4.6164560402228565, "grad_norm": 3.0675318241119385, "learning_rate": 4.231383340127735e-05, "loss": 2.5276, "step": 67945 }, { "epoch": 4.616795760293519, "grad_norm": 3.246948003768921, "learning_rate": 4.230958690039408e-05, "loss": 2.418, "step": 67950 }, { "epoch": 4.61713548036418, "grad_norm": 3.0659244060516357, "learning_rate": 4.230534039951081e-05, "loss": 2.6822, "step": 67955 }, { "epoch": 4.617475200434842, "grad_norm": 3.35589861869812, "learning_rate": 4.230109389862753e-05, "loss": 2.4024, "step": 67960 }, { "epoch": 4.617814920505504, "grad_norm": 2.800612449645996, "learning_rate": 4.229684739774426e-05, "loss": 2.2686, "step": 67965 }, { "epoch": 4.618154640576165, "grad_norm": 4.548416614532471, "learning_rate": 4.229260089686099e-05, "loss": 2.3143, "step": 67970 }, { "epoch": 4.618494360646827, "grad_norm": 4.041539669036865, "learning_rate": 4.228835439597771e-05, "loss": 2.6039, "step": 67975 }, { "epoch": 4.618834080717489, "grad_norm": 2.878817081451416, "learning_rate": 4.228410789509445e-05, "loss": 2.2404, "step": 67980 }, { "epoch": 4.61917380078815, "grad_norm": 3.0551822185516357, "learning_rate": 4.2279861394211175e-05, "loss": 2.5015, "step": 67985 }, { "epoch": 4.6195135208588125, "grad_norm": 3.410753011703491, "learning_rate": 4.2275614893327896e-05, "loss": 2.3901, "step": 67990 }, { "epoch": 4.619853240929475, "grad_norm": 3.635934352874756, "learning_rate": 4.2271368392444624e-05, "loss": 2.4044, "step": 67995 }, { "epoch": 4.620192961000136, "grad_norm": 3.246602773666382, "learning_rate": 4.226712189156136e-05, "loss": 2.3468, "step": 68000 }, { "epoch": 4.620532681070798, "grad_norm": 3.584709644317627, "learning_rate": 4.226287539067808e-05, "loss": 2.5755, "step": 68005 }, { "epoch": 4.62087240114146, "grad_norm": 3.1376235485076904, "learning_rate": 4.225862888979481e-05, "loss": 2.509, "step": 68010 }, { "epoch": 4.621212121212121, "grad_norm": 2.514645576477051, "learning_rate": 4.225438238891154e-05, "loss": 2.3598, "step": 68015 }, { "epoch": 4.621551841282783, "grad_norm": 2.905369520187378, "learning_rate": 4.225013588802827e-05, "loss": 2.1817, "step": 68020 }, { "epoch": 4.621891561353445, "grad_norm": 2.813225030899048, "learning_rate": 4.224588938714499e-05, "loss": 2.1181, "step": 68025 }, { "epoch": 4.622231281424106, "grad_norm": 3.362112283706665, "learning_rate": 4.224164288626172e-05, "loss": 2.373, "step": 68030 }, { "epoch": 4.6225710014947685, "grad_norm": 3.648362159729004, "learning_rate": 4.2237396385378455e-05, "loss": 2.3442, "step": 68035 }, { "epoch": 4.62291072156543, "grad_norm": 3.1865551471710205, "learning_rate": 4.2233149884495176e-05, "loss": 2.3374, "step": 68040 }, { "epoch": 4.623250441636092, "grad_norm": 3.24263334274292, "learning_rate": 4.2228903383611904e-05, "loss": 2.22, "step": 68045 }, { "epoch": 4.623590161706754, "grad_norm": 3.108846426010132, "learning_rate": 4.222465688272864e-05, "loss": 2.6436, "step": 68050 }, { "epoch": 4.623929881777415, "grad_norm": 2.6868836879730225, "learning_rate": 4.222041038184536e-05, "loss": 2.1932, "step": 68055 }, { "epoch": 4.624269601848077, "grad_norm": 3.1956686973571777, "learning_rate": 4.221616388096209e-05, "loss": 2.6514, "step": 68060 }, { "epoch": 4.624609321918739, "grad_norm": 3.0594990253448486, "learning_rate": 4.2211917380078816e-05, "loss": 2.5144, "step": 68065 }, { "epoch": 4.6249490419894, "grad_norm": 3.4052419662475586, "learning_rate": 4.2207670879195544e-05, "loss": 2.3177, "step": 68070 }, { "epoch": 4.6252887620600625, "grad_norm": 2.3570075035095215, "learning_rate": 4.220342437831227e-05, "loss": 2.5267, "step": 68075 }, { "epoch": 4.6256284821307245, "grad_norm": 3.5441689491271973, "learning_rate": 4.2199177877429e-05, "loss": 2.4465, "step": 68080 }, { "epoch": 4.625968202201386, "grad_norm": 2.628432273864746, "learning_rate": 4.219493137654573e-05, "loss": 2.362, "step": 68085 }, { "epoch": 4.626307922272048, "grad_norm": 3.035773992538452, "learning_rate": 4.2190684875662456e-05, "loss": 2.3588, "step": 68090 }, { "epoch": 4.62664764234271, "grad_norm": 2.898714065551758, "learning_rate": 4.2186438374779184e-05, "loss": 2.5836, "step": 68095 }, { "epoch": 4.626987362413371, "grad_norm": 3.4524800777435303, "learning_rate": 4.218219187389591e-05, "loss": 2.6539, "step": 68100 }, { "epoch": 4.627327082484033, "grad_norm": 3.707881212234497, "learning_rate": 4.217794537301264e-05, "loss": 2.4554, "step": 68105 }, { "epoch": 4.627666802554695, "grad_norm": 3.245954751968384, "learning_rate": 4.217369887212937e-05, "loss": 2.1768, "step": 68110 }, { "epoch": 4.628006522625356, "grad_norm": 2.799663543701172, "learning_rate": 4.2169452371246096e-05, "loss": 2.6358, "step": 68115 }, { "epoch": 4.6283462426960185, "grad_norm": 3.090102434158325, "learning_rate": 4.2165205870362824e-05, "loss": 2.5029, "step": 68120 }, { "epoch": 4.6286859627666805, "grad_norm": 4.019145488739014, "learning_rate": 4.216095936947955e-05, "loss": 2.5433, "step": 68125 }, { "epoch": 4.629025682837342, "grad_norm": 2.8000166416168213, "learning_rate": 4.2156712868596274e-05, "loss": 2.5612, "step": 68130 }, { "epoch": 4.629365402908004, "grad_norm": 2.991034984588623, "learning_rate": 4.215246636771301e-05, "loss": 2.4572, "step": 68135 }, { "epoch": 4.629705122978666, "grad_norm": 3.1861064434051514, "learning_rate": 4.2148219866829736e-05, "loss": 2.2883, "step": 68140 }, { "epoch": 4.630044843049327, "grad_norm": 2.5879898071289062, "learning_rate": 4.214397336594646e-05, "loss": 2.313, "step": 68145 }, { "epoch": 4.630384563119989, "grad_norm": 4.66174840927124, "learning_rate": 4.213972686506319e-05, "loss": 2.4893, "step": 68150 }, { "epoch": 4.630724283190651, "grad_norm": 3.0715460777282715, "learning_rate": 4.213548036417992e-05, "loss": 2.5204, "step": 68155 }, { "epoch": 4.631064003261312, "grad_norm": 3.1093051433563232, "learning_rate": 4.213123386329664e-05, "loss": 2.5986, "step": 68160 }, { "epoch": 4.6314037233319745, "grad_norm": 3.0473251342773438, "learning_rate": 4.212698736241337e-05, "loss": 2.3949, "step": 68165 }, { "epoch": 4.6317434434026366, "grad_norm": 3.130743980407715, "learning_rate": 4.2122740861530104e-05, "loss": 2.362, "step": 68170 }, { "epoch": 4.632083163473298, "grad_norm": 3.331110954284668, "learning_rate": 4.2118494360646826e-05, "loss": 2.4365, "step": 68175 }, { "epoch": 4.63242288354396, "grad_norm": 2.641951560974121, "learning_rate": 4.2114247859763554e-05, "loss": 2.3767, "step": 68180 }, { "epoch": 4.632762603614622, "grad_norm": 3.734534978866577, "learning_rate": 4.211000135888029e-05, "loss": 2.523, "step": 68185 }, { "epoch": 4.633102323685283, "grad_norm": 2.8053765296936035, "learning_rate": 4.2105754857997016e-05, "loss": 2.5599, "step": 68190 }, { "epoch": 4.633442043755945, "grad_norm": 3.145075559616089, "learning_rate": 4.210150835711374e-05, "loss": 2.6107, "step": 68195 }, { "epoch": 4.633781763826607, "grad_norm": 3.011591911315918, "learning_rate": 4.2097261856230466e-05, "loss": 2.4435, "step": 68200 }, { "epoch": 4.634121483897268, "grad_norm": 3.509333372116089, "learning_rate": 4.20930153553472e-05, "loss": 2.4252, "step": 68205 }, { "epoch": 4.6344612039679305, "grad_norm": 3.0597987174987793, "learning_rate": 4.208876885446392e-05, "loss": 2.2719, "step": 68210 }, { "epoch": 4.634800924038593, "grad_norm": 3.1745047569274902, "learning_rate": 4.208452235358065e-05, "loss": 2.8106, "step": 68215 }, { "epoch": 4.635140644109254, "grad_norm": 3.230170965194702, "learning_rate": 4.2080275852697384e-05, "loss": 2.4115, "step": 68220 }, { "epoch": 4.635480364179916, "grad_norm": 2.887925386428833, "learning_rate": 4.2076029351814106e-05, "loss": 2.3811, "step": 68225 }, { "epoch": 4.635820084250578, "grad_norm": 3.674647331237793, "learning_rate": 4.2071782850930834e-05, "loss": 2.2854, "step": 68230 }, { "epoch": 4.636159804321239, "grad_norm": 3.7654976844787598, "learning_rate": 4.206753635004756e-05, "loss": 2.3828, "step": 68235 }, { "epoch": 4.636499524391901, "grad_norm": 3.530604839324951, "learning_rate": 4.206328984916429e-05, "loss": 2.5459, "step": 68240 }, { "epoch": 4.636839244462563, "grad_norm": 3.1016008853912354, "learning_rate": 4.205904334828102e-05, "loss": 2.6051, "step": 68245 }, { "epoch": 4.637178964533224, "grad_norm": 3.87060284614563, "learning_rate": 4.2054796847397746e-05, "loss": 2.4203, "step": 68250 }, { "epoch": 4.6375186846038865, "grad_norm": 2.8986425399780273, "learning_rate": 4.2050550346514474e-05, "loss": 2.5191, "step": 68255 }, { "epoch": 4.637858404674548, "grad_norm": 2.6120712757110596, "learning_rate": 4.20463038456312e-05, "loss": 2.377, "step": 68260 }, { "epoch": 4.63819812474521, "grad_norm": 2.960151195526123, "learning_rate": 4.204205734474793e-05, "loss": 2.4395, "step": 68265 }, { "epoch": 4.638537844815872, "grad_norm": 3.077444314956665, "learning_rate": 4.203781084386466e-05, "loss": 2.2539, "step": 68270 }, { "epoch": 4.638877564886533, "grad_norm": 2.8207759857177734, "learning_rate": 4.2033564342981386e-05, "loss": 2.4712, "step": 68275 }, { "epoch": 4.639217284957195, "grad_norm": 3.7907683849334717, "learning_rate": 4.2029317842098114e-05, "loss": 2.7573, "step": 68280 }, { "epoch": 4.639557005027857, "grad_norm": 3.8200178146362305, "learning_rate": 4.202507134121484e-05, "loss": 2.102, "step": 68285 }, { "epoch": 4.639896725098518, "grad_norm": 3.0056309700012207, "learning_rate": 4.202082484033157e-05, "loss": 2.4469, "step": 68290 }, { "epoch": 4.64023644516918, "grad_norm": 3.4343464374542236, "learning_rate": 4.20165783394483e-05, "loss": 2.4774, "step": 68295 }, { "epoch": 4.6405761652398425, "grad_norm": 3.3393313884735107, "learning_rate": 4.201233183856502e-05, "loss": 2.2997, "step": 68300 }, { "epoch": 4.640915885310504, "grad_norm": 3.375669479370117, "learning_rate": 4.2008085337681754e-05, "loss": 2.1094, "step": 68305 }, { "epoch": 4.641255605381166, "grad_norm": 3.152277946472168, "learning_rate": 4.200383883679848e-05, "loss": 2.3091, "step": 68310 }, { "epoch": 4.641595325451828, "grad_norm": 3.768798589706421, "learning_rate": 4.19995923359152e-05, "loss": 2.3961, "step": 68315 }, { "epoch": 4.641935045522489, "grad_norm": 2.981282949447632, "learning_rate": 4.199534583503194e-05, "loss": 2.4778, "step": 68320 }, { "epoch": 4.642274765593151, "grad_norm": 3.104781150817871, "learning_rate": 4.1991099334148666e-05, "loss": 2.3022, "step": 68325 }, { "epoch": 4.642614485663813, "grad_norm": 4.495940208435059, "learning_rate": 4.198685283326539e-05, "loss": 2.4482, "step": 68330 }, { "epoch": 4.642954205734474, "grad_norm": 2.2180838584899902, "learning_rate": 4.1982606332382115e-05, "loss": 2.4399, "step": 68335 }, { "epoch": 4.6432939258051364, "grad_norm": 2.9107463359832764, "learning_rate": 4.197835983149885e-05, "loss": 2.4678, "step": 68340 }, { "epoch": 4.6436336458757985, "grad_norm": 3.2789146900177, "learning_rate": 4.197411333061557e-05, "loss": 2.0551, "step": 68345 }, { "epoch": 4.64397336594646, "grad_norm": 3.7657902240753174, "learning_rate": 4.19698668297323e-05, "loss": 2.3185, "step": 68350 }, { "epoch": 4.644313086017122, "grad_norm": 3.055079936981201, "learning_rate": 4.1965620328849034e-05, "loss": 2.5303, "step": 68355 }, { "epoch": 4.644652806087784, "grad_norm": 2.9714467525482178, "learning_rate": 4.196137382796576e-05, "loss": 2.4396, "step": 68360 }, { "epoch": 4.644992526158445, "grad_norm": 3.329913854598999, "learning_rate": 4.195712732708248e-05, "loss": 2.2324, "step": 68365 }, { "epoch": 4.645332246229107, "grad_norm": 3.6183526515960693, "learning_rate": 4.195288082619922e-05, "loss": 2.6542, "step": 68370 }, { "epoch": 4.645671966299769, "grad_norm": 3.404590368270874, "learning_rate": 4.1948634325315946e-05, "loss": 2.5054, "step": 68375 }, { "epoch": 4.64601168637043, "grad_norm": 3.2605886459350586, "learning_rate": 4.194438782443267e-05, "loss": 2.2803, "step": 68380 }, { "epoch": 4.6463514064410925, "grad_norm": 2.70198130607605, "learning_rate": 4.1940141323549395e-05, "loss": 2.4202, "step": 68385 }, { "epoch": 4.6466911265117545, "grad_norm": 2.9469783306121826, "learning_rate": 4.193589482266613e-05, "loss": 2.4453, "step": 68390 }, { "epoch": 4.647030846582416, "grad_norm": 4.2414469718933105, "learning_rate": 4.193164832178285e-05, "loss": 2.5465, "step": 68395 }, { "epoch": 4.647370566653078, "grad_norm": 3.129615545272827, "learning_rate": 4.192740182089958e-05, "loss": 2.6448, "step": 68400 }, { "epoch": 4.64771028672374, "grad_norm": 4.002776622772217, "learning_rate": 4.1923155320016314e-05, "loss": 2.5024, "step": 68405 }, { "epoch": 4.648050006794401, "grad_norm": 3.5481057167053223, "learning_rate": 4.1918908819133035e-05, "loss": 2.367, "step": 68410 }, { "epoch": 4.648389726865063, "grad_norm": 3.4849650859832764, "learning_rate": 4.191466231824976e-05, "loss": 2.466, "step": 68415 }, { "epoch": 4.648729446935725, "grad_norm": 3.26589298248291, "learning_rate": 4.191041581736649e-05, "loss": 2.4284, "step": 68420 }, { "epoch": 4.649069167006386, "grad_norm": 2.913130521774292, "learning_rate": 4.190616931648322e-05, "loss": 2.349, "step": 68425 }, { "epoch": 4.6494088870770485, "grad_norm": 3.4021599292755127, "learning_rate": 4.190192281559995e-05, "loss": 2.4395, "step": 68430 }, { "epoch": 4.6497486071477105, "grad_norm": 2.982477903366089, "learning_rate": 4.1897676314716675e-05, "loss": 2.4462, "step": 68435 }, { "epoch": 4.650088327218372, "grad_norm": 2.8791286945343018, "learning_rate": 4.18934298138334e-05, "loss": 2.8164, "step": 68440 }, { "epoch": 4.650428047289034, "grad_norm": 2.9771759510040283, "learning_rate": 4.188918331295013e-05, "loss": 2.5184, "step": 68445 }, { "epoch": 4.650767767359696, "grad_norm": 3.7476565837860107, "learning_rate": 4.188493681206686e-05, "loss": 2.4975, "step": 68450 }, { "epoch": 4.651107487430357, "grad_norm": 2.6371963024139404, "learning_rate": 4.188069031118359e-05, "loss": 2.4818, "step": 68455 }, { "epoch": 4.651447207501019, "grad_norm": 2.6182901859283447, "learning_rate": 4.1876443810300315e-05, "loss": 2.6047, "step": 68460 }, { "epoch": 4.651786927571681, "grad_norm": 2.4414279460906982, "learning_rate": 4.187219730941704e-05, "loss": 2.2186, "step": 68465 }, { "epoch": 4.652126647642342, "grad_norm": 2.7879648208618164, "learning_rate": 4.186795080853377e-05, "loss": 2.4883, "step": 68470 }, { "epoch": 4.6524663677130045, "grad_norm": 4.027707099914551, "learning_rate": 4.18637043076505e-05, "loss": 2.1534, "step": 68475 }, { "epoch": 4.6528060877836666, "grad_norm": 2.691283941268921, "learning_rate": 4.185945780676723e-05, "loss": 2.2625, "step": 68480 }, { "epoch": 4.653145807854328, "grad_norm": 3.640773296356201, "learning_rate": 4.185521130588395e-05, "loss": 2.4967, "step": 68485 }, { "epoch": 4.65348552792499, "grad_norm": 3.4078831672668457, "learning_rate": 4.185096480500068e-05, "loss": 2.6222, "step": 68490 }, { "epoch": 4.653825247995652, "grad_norm": 2.8225655555725098, "learning_rate": 4.184671830411741e-05, "loss": 2.202, "step": 68495 }, { "epoch": 4.654164968066313, "grad_norm": 3.1618599891662598, "learning_rate": 4.184247180323413e-05, "loss": 2.4997, "step": 68500 }, { "epoch": 4.654504688136975, "grad_norm": 3.104041337966919, "learning_rate": 4.183822530235087e-05, "loss": 2.3899, "step": 68505 }, { "epoch": 4.654844408207637, "grad_norm": 3.144387722015381, "learning_rate": 4.1833978801467595e-05, "loss": 2.4261, "step": 68510 }, { "epoch": 4.655184128278298, "grad_norm": 3.1187238693237305, "learning_rate": 4.1829732300584317e-05, "loss": 2.4896, "step": 68515 }, { "epoch": 4.6555238483489605, "grad_norm": 3.3906800746917725, "learning_rate": 4.1825485799701045e-05, "loss": 2.551, "step": 68520 }, { "epoch": 4.655863568419623, "grad_norm": 3.015244960784912, "learning_rate": 4.182123929881778e-05, "loss": 2.3358, "step": 68525 }, { "epoch": 4.656203288490284, "grad_norm": 3.531301736831665, "learning_rate": 4.181699279793451e-05, "loss": 2.5588, "step": 68530 }, { "epoch": 4.656543008560946, "grad_norm": 3.671889066696167, "learning_rate": 4.181274629705123e-05, "loss": 2.6815, "step": 68535 }, { "epoch": 4.656882728631608, "grad_norm": 4.113502502441406, "learning_rate": 4.180849979616796e-05, "loss": 2.5542, "step": 68540 }, { "epoch": 4.657222448702269, "grad_norm": 3.1998865604400635, "learning_rate": 4.180425329528469e-05, "loss": 2.4482, "step": 68545 }, { "epoch": 4.657562168772931, "grad_norm": 3.8914875984191895, "learning_rate": 4.180000679440141e-05, "loss": 2.4745, "step": 68550 }, { "epoch": 4.657901888843593, "grad_norm": 3.381221055984497, "learning_rate": 4.179576029351814e-05, "loss": 2.2774, "step": 68555 }, { "epoch": 4.658241608914254, "grad_norm": 2.722548723220825, "learning_rate": 4.1791513792634875e-05, "loss": 2.5087, "step": 68560 }, { "epoch": 4.6585813289849165, "grad_norm": 2.9078831672668457, "learning_rate": 4.17872672917516e-05, "loss": 2.6114, "step": 68565 }, { "epoch": 4.658921049055579, "grad_norm": 5.621266841888428, "learning_rate": 4.1783020790868325e-05, "loss": 2.6328, "step": 68570 }, { "epoch": 4.65926076912624, "grad_norm": 2.767880439758301, "learning_rate": 4.177877428998506e-05, "loss": 2.3208, "step": 68575 }, { "epoch": 4.659600489196902, "grad_norm": 2.9098689556121826, "learning_rate": 4.177452778910178e-05, "loss": 2.5963, "step": 68580 }, { "epoch": 4.659940209267564, "grad_norm": 2.9993088245391846, "learning_rate": 4.177028128821851e-05, "loss": 2.59, "step": 68585 }, { "epoch": 4.660279929338225, "grad_norm": 3.3103229999542236, "learning_rate": 4.176603478733524e-05, "loss": 2.4469, "step": 68590 }, { "epoch": 4.660619649408887, "grad_norm": 3.0440165996551514, "learning_rate": 4.1761788286451965e-05, "loss": 2.4149, "step": 68595 }, { "epoch": 4.660959369479549, "grad_norm": 2.918545961380005, "learning_rate": 4.175754178556869e-05, "loss": 1.902, "step": 68600 }, { "epoch": 4.66129908955021, "grad_norm": 2.8279876708984375, "learning_rate": 4.175329528468542e-05, "loss": 2.5027, "step": 68605 }, { "epoch": 4.6616388096208725, "grad_norm": 3.313427448272705, "learning_rate": 4.174904878380215e-05, "loss": 2.4041, "step": 68610 }, { "epoch": 4.661978529691535, "grad_norm": 2.8677287101745605, "learning_rate": 4.174480228291888e-05, "loss": 2.3377, "step": 68615 }, { "epoch": 4.662318249762196, "grad_norm": 3.50808048248291, "learning_rate": 4.1740555782035605e-05, "loss": 2.3841, "step": 68620 }, { "epoch": 4.662657969832858, "grad_norm": 2.609424352645874, "learning_rate": 4.173630928115233e-05, "loss": 2.4981, "step": 68625 }, { "epoch": 4.66299768990352, "grad_norm": 2.972132682800293, "learning_rate": 4.173206278026906e-05, "loss": 2.3994, "step": 68630 }, { "epoch": 4.663337409974181, "grad_norm": 3.97145676612854, "learning_rate": 4.172781627938579e-05, "loss": 2.5023, "step": 68635 }, { "epoch": 4.663677130044843, "grad_norm": 3.302743434906006, "learning_rate": 4.172356977850252e-05, "loss": 2.277, "step": 68640 }, { "epoch": 4.664016850115505, "grad_norm": 2.707590103149414, "learning_rate": 4.1719323277619245e-05, "loss": 2.451, "step": 68645 }, { "epoch": 4.6643565701861665, "grad_norm": 4.483405113220215, "learning_rate": 4.171507677673597e-05, "loss": 2.2035, "step": 68650 }, { "epoch": 4.6646962902568285, "grad_norm": 2.6419332027435303, "learning_rate": 4.1710830275852694e-05, "loss": 2.4705, "step": 68655 }, { "epoch": 4.665036010327491, "grad_norm": 2.80107045173645, "learning_rate": 4.170658377496943e-05, "loss": 2.4641, "step": 68660 }, { "epoch": 4.665375730398152, "grad_norm": 3.2890985012054443, "learning_rate": 4.170233727408616e-05, "loss": 2.3544, "step": 68665 }, { "epoch": 4.665715450468814, "grad_norm": 3.563048839569092, "learning_rate": 4.169809077320288e-05, "loss": 2.2892, "step": 68670 }, { "epoch": 4.666055170539476, "grad_norm": 2.6982390880584717, "learning_rate": 4.169384427231961e-05, "loss": 2.6871, "step": 68675 }, { "epoch": 4.666394890610137, "grad_norm": 3.4344594478607178, "learning_rate": 4.168959777143634e-05, "loss": 2.3008, "step": 68680 }, { "epoch": 4.666734610680799, "grad_norm": 3.771437168121338, "learning_rate": 4.168535127055306e-05, "loss": 2.5722, "step": 68685 }, { "epoch": 4.667074330751461, "grad_norm": 4.428775310516357, "learning_rate": 4.168110476966979e-05, "loss": 2.4553, "step": 68690 }, { "epoch": 4.6674140508221225, "grad_norm": 2.707854747772217, "learning_rate": 4.1676858268786525e-05, "loss": 2.2322, "step": 68695 }, { "epoch": 4.6677537708927845, "grad_norm": 3.152279853820801, "learning_rate": 4.167261176790325e-05, "loss": 2.5992, "step": 68700 }, { "epoch": 4.668093490963447, "grad_norm": 3.077852725982666, "learning_rate": 4.1668365267019974e-05, "loss": 2.3454, "step": 68705 }, { "epoch": 4.668433211034108, "grad_norm": 2.824814558029175, "learning_rate": 4.166411876613671e-05, "loss": 2.2742, "step": 68710 }, { "epoch": 4.66877293110477, "grad_norm": 3.594169855117798, "learning_rate": 4.165987226525344e-05, "loss": 2.3885, "step": 68715 }, { "epoch": 4.669112651175431, "grad_norm": 3.986900568008423, "learning_rate": 4.165562576437016e-05, "loss": 2.5093, "step": 68720 }, { "epoch": 4.669452371246093, "grad_norm": 3.6044178009033203, "learning_rate": 4.1651379263486886e-05, "loss": 2.3889, "step": 68725 }, { "epoch": 4.669792091316755, "grad_norm": 3.240309715270996, "learning_rate": 4.164713276260362e-05, "loss": 2.4182, "step": 68730 }, { "epoch": 4.670131811387416, "grad_norm": 3.9200973510742188, "learning_rate": 4.164288626172034e-05, "loss": 2.2909, "step": 68735 }, { "epoch": 4.6704715314580785, "grad_norm": 2.8110275268554688, "learning_rate": 4.163863976083707e-05, "loss": 2.4972, "step": 68740 }, { "epoch": 4.6708112515287405, "grad_norm": 3.059757709503174, "learning_rate": 4.1634393259953805e-05, "loss": 2.3603, "step": 68745 }, { "epoch": 4.671150971599402, "grad_norm": 3.5419580936431885, "learning_rate": 4.1630146759070526e-05, "loss": 2.6342, "step": 68750 }, { "epoch": 4.671490691670064, "grad_norm": 3.745002269744873, "learning_rate": 4.1625900258187254e-05, "loss": 2.4804, "step": 68755 }, { "epoch": 4.671830411740726, "grad_norm": 3.5013554096221924, "learning_rate": 4.162165375730398e-05, "loss": 2.4706, "step": 68760 }, { "epoch": 4.672170131811387, "grad_norm": 2.8550312519073486, "learning_rate": 4.161740725642071e-05, "loss": 2.5341, "step": 68765 }, { "epoch": 4.672509851882049, "grad_norm": 3.662353277206421, "learning_rate": 4.161316075553744e-05, "loss": 2.3137, "step": 68770 }, { "epoch": 4.672849571952711, "grad_norm": 2.3658995628356934, "learning_rate": 4.1608914254654166e-05, "loss": 2.5329, "step": 68775 }, { "epoch": 4.673189292023372, "grad_norm": 3.2286455631256104, "learning_rate": 4.1604667753770894e-05, "loss": 2.706, "step": 68780 }, { "epoch": 4.6735290120940345, "grad_norm": 2.724045515060425, "learning_rate": 4.160042125288762e-05, "loss": 2.3746, "step": 68785 }, { "epoch": 4.673868732164697, "grad_norm": 3.0513076782226562, "learning_rate": 4.159617475200435e-05, "loss": 2.5, "step": 68790 }, { "epoch": 4.674208452235358, "grad_norm": 3.238715648651123, "learning_rate": 4.159192825112108e-05, "loss": 2.3535, "step": 68795 }, { "epoch": 4.67454817230602, "grad_norm": 2.814805269241333, "learning_rate": 4.1587681750237806e-05, "loss": 2.3787, "step": 68800 }, { "epoch": 4.674887892376682, "grad_norm": 3.217780113220215, "learning_rate": 4.1583435249354534e-05, "loss": 2.2769, "step": 68805 }, { "epoch": 4.675227612447343, "grad_norm": 3.043813705444336, "learning_rate": 4.157918874847126e-05, "loss": 2.5285, "step": 68810 }, { "epoch": 4.675567332518005, "grad_norm": 3.052649736404419, "learning_rate": 4.157494224758799e-05, "loss": 2.3623, "step": 68815 }, { "epoch": 4.675907052588667, "grad_norm": 2.8882126808166504, "learning_rate": 4.157069574670472e-05, "loss": 2.5269, "step": 68820 }, { "epoch": 4.676246772659328, "grad_norm": 2.922001600265503, "learning_rate": 4.156644924582144e-05, "loss": 2.2779, "step": 68825 }, { "epoch": 4.6765864927299905, "grad_norm": 3.0093817710876465, "learning_rate": 4.1562202744938174e-05, "loss": 2.4212, "step": 68830 }, { "epoch": 4.676926212800653, "grad_norm": 3.502694606781006, "learning_rate": 4.15579562440549e-05, "loss": 2.443, "step": 68835 }, { "epoch": 4.677265932871314, "grad_norm": 3.510218381881714, "learning_rate": 4.1553709743171624e-05, "loss": 2.0975, "step": 68840 }, { "epoch": 4.677605652941976, "grad_norm": 3.555947780609131, "learning_rate": 4.154946324228836e-05, "loss": 2.3696, "step": 68845 }, { "epoch": 4.677945373012638, "grad_norm": 3.3835060596466064, "learning_rate": 4.1545216741405086e-05, "loss": 2.2996, "step": 68850 }, { "epoch": 4.678285093083299, "grad_norm": 2.709388256072998, "learning_rate": 4.154097024052181e-05, "loss": 2.3959, "step": 68855 }, { "epoch": 4.678624813153961, "grad_norm": 3.391571521759033, "learning_rate": 4.1536723739638536e-05, "loss": 2.3086, "step": 68860 }, { "epoch": 4.678964533224623, "grad_norm": 4.092562675476074, "learning_rate": 4.153247723875527e-05, "loss": 2.2396, "step": 68865 }, { "epoch": 4.679304253295284, "grad_norm": 2.8629164695739746, "learning_rate": 4.1528230737872e-05, "loss": 2.32, "step": 68870 }, { "epoch": 4.6796439733659465, "grad_norm": 3.8322198390960693, "learning_rate": 4.152398423698872e-05, "loss": 2.5493, "step": 68875 }, { "epoch": 4.679983693436609, "grad_norm": 3.4572784900665283, "learning_rate": 4.1519737736105454e-05, "loss": 2.5206, "step": 68880 }, { "epoch": 4.68032341350727, "grad_norm": 2.6795246601104736, "learning_rate": 4.151549123522218e-05, "loss": 2.4012, "step": 68885 }, { "epoch": 4.680663133577932, "grad_norm": 4.096378326416016, "learning_rate": 4.1511244734338904e-05, "loss": 2.569, "step": 68890 }, { "epoch": 4.681002853648594, "grad_norm": 3.1057088375091553, "learning_rate": 4.150699823345564e-05, "loss": 2.437, "step": 68895 }, { "epoch": 4.681342573719255, "grad_norm": 3.76124906539917, "learning_rate": 4.1502751732572366e-05, "loss": 2.4208, "step": 68900 }, { "epoch": 4.681682293789917, "grad_norm": 3.5273778438568115, "learning_rate": 4.149850523168909e-05, "loss": 2.5599, "step": 68905 }, { "epoch": 4.682022013860579, "grad_norm": 3.0902082920074463, "learning_rate": 4.1494258730805816e-05, "loss": 2.4053, "step": 68910 }, { "epoch": 4.68236173393124, "grad_norm": 4.3427605628967285, "learning_rate": 4.149001222992255e-05, "loss": 2.4089, "step": 68915 }, { "epoch": 4.6827014540019025, "grad_norm": 3.5632553100585938, "learning_rate": 4.148576572903927e-05, "loss": 2.6644, "step": 68920 }, { "epoch": 4.683041174072565, "grad_norm": 3.693632125854492, "learning_rate": 4.1481519228156e-05, "loss": 2.5533, "step": 68925 }, { "epoch": 4.683380894143226, "grad_norm": 2.7670247554779053, "learning_rate": 4.1477272727272734e-05, "loss": 2.3407, "step": 68930 }, { "epoch": 4.683720614213888, "grad_norm": 3.1683883666992188, "learning_rate": 4.1473026226389456e-05, "loss": 2.2153, "step": 68935 }, { "epoch": 4.684060334284549, "grad_norm": 2.967719554901123, "learning_rate": 4.1468779725506184e-05, "loss": 2.6536, "step": 68940 }, { "epoch": 4.684400054355211, "grad_norm": 3.283857583999634, "learning_rate": 4.146453322462291e-05, "loss": 2.2595, "step": 68945 }, { "epoch": 4.684739774425873, "grad_norm": 3.2740583419799805, "learning_rate": 4.146028672373964e-05, "loss": 2.4318, "step": 68950 }, { "epoch": 4.685079494496534, "grad_norm": 3.3056862354278564, "learning_rate": 4.145604022285637e-05, "loss": 2.4846, "step": 68955 }, { "epoch": 4.6854192145671965, "grad_norm": 3.2961699962615967, "learning_rate": 4.1451793721973096e-05, "loss": 2.3794, "step": 68960 }, { "epoch": 4.6857589346378585, "grad_norm": 3.341651678085327, "learning_rate": 4.1447547221089824e-05, "loss": 2.4811, "step": 68965 }, { "epoch": 4.68609865470852, "grad_norm": 3.2622697353363037, "learning_rate": 4.144330072020655e-05, "loss": 2.2702, "step": 68970 }, { "epoch": 4.686438374779182, "grad_norm": 3.3241875171661377, "learning_rate": 4.143905421932328e-05, "loss": 2.5328, "step": 68975 }, { "epoch": 4.686778094849844, "grad_norm": 2.5193045139312744, "learning_rate": 4.143480771844001e-05, "loss": 2.5925, "step": 68980 }, { "epoch": 4.687117814920505, "grad_norm": 3.03067946434021, "learning_rate": 4.1430561217556736e-05, "loss": 2.0887, "step": 68985 }, { "epoch": 4.687457534991167, "grad_norm": 2.841871738433838, "learning_rate": 4.1426314716673464e-05, "loss": 2.5178, "step": 68990 }, { "epoch": 4.687797255061829, "grad_norm": 3.4157042503356934, "learning_rate": 4.142206821579019e-05, "loss": 2.5675, "step": 68995 }, { "epoch": 4.68813697513249, "grad_norm": 3.8519928455352783, "learning_rate": 4.141782171490692e-05, "loss": 2.3513, "step": 69000 }, { "epoch": 4.6884766952031525, "grad_norm": 3.3471591472625732, "learning_rate": 4.141357521402365e-05, "loss": 2.6535, "step": 69005 }, { "epoch": 4.6888164152738145, "grad_norm": 3.4799628257751465, "learning_rate": 4.140932871314037e-05, "loss": 2.5239, "step": 69010 }, { "epoch": 4.689156135344476, "grad_norm": 3.488762617111206, "learning_rate": 4.1405082212257104e-05, "loss": 2.6564, "step": 69015 }, { "epoch": 4.689495855415138, "grad_norm": 3.682115077972412, "learning_rate": 4.140083571137383e-05, "loss": 2.4207, "step": 69020 }, { "epoch": 4.6898355754858, "grad_norm": 3.6874849796295166, "learning_rate": 4.139658921049055e-05, "loss": 2.4591, "step": 69025 }, { "epoch": 4.690175295556461, "grad_norm": 2.7675392627716064, "learning_rate": 4.139234270960729e-05, "loss": 2.6977, "step": 69030 }, { "epoch": 4.690515015627123, "grad_norm": 2.6717958450317383, "learning_rate": 4.1388096208724016e-05, "loss": 2.315, "step": 69035 }, { "epoch": 4.690854735697785, "grad_norm": 3.2033843994140625, "learning_rate": 4.1383849707840744e-05, "loss": 2.3744, "step": 69040 }, { "epoch": 4.691194455768446, "grad_norm": 3.529951810836792, "learning_rate": 4.1379603206957465e-05, "loss": 2.4268, "step": 69045 }, { "epoch": 4.6915341758391085, "grad_norm": 5.02903938293457, "learning_rate": 4.13753567060742e-05, "loss": 2.4012, "step": 69050 }, { "epoch": 4.6918738959097706, "grad_norm": 3.4270944595336914, "learning_rate": 4.137111020519093e-05, "loss": 2.5073, "step": 69055 }, { "epoch": 4.692213615980432, "grad_norm": 3.9210751056671143, "learning_rate": 4.136686370430765e-05, "loss": 2.4849, "step": 69060 }, { "epoch": 4.692553336051094, "grad_norm": 2.769697666168213, "learning_rate": 4.1362617203424384e-05, "loss": 2.4073, "step": 69065 }, { "epoch": 4.692893056121756, "grad_norm": 3.7615292072296143, "learning_rate": 4.135837070254111e-05, "loss": 2.7329, "step": 69070 }, { "epoch": 4.693232776192417, "grad_norm": 3.3816683292388916, "learning_rate": 4.135412420165783e-05, "loss": 2.796, "step": 69075 }, { "epoch": 4.693572496263079, "grad_norm": 3.1136457920074463, "learning_rate": 4.134987770077456e-05, "loss": 2.6603, "step": 69080 }, { "epoch": 4.693912216333741, "grad_norm": 2.761629581451416, "learning_rate": 4.1345631199891296e-05, "loss": 2.6871, "step": 69085 }, { "epoch": 4.694251936404402, "grad_norm": 2.7320120334625244, "learning_rate": 4.134138469900802e-05, "loss": 2.198, "step": 69090 }, { "epoch": 4.6945916564750645, "grad_norm": 2.8406577110290527, "learning_rate": 4.1337138198124745e-05, "loss": 2.4914, "step": 69095 }, { "epoch": 4.694931376545727, "grad_norm": 3.30788254737854, "learning_rate": 4.133289169724148e-05, "loss": 2.4017, "step": 69100 }, { "epoch": 4.695271096616388, "grad_norm": 3.005241632461548, "learning_rate": 4.13286451963582e-05, "loss": 2.639, "step": 69105 }, { "epoch": 4.69561081668705, "grad_norm": 3.515303611755371, "learning_rate": 4.132439869547493e-05, "loss": 2.5328, "step": 69110 }, { "epoch": 4.695950536757712, "grad_norm": 4.055753231048584, "learning_rate": 4.132015219459166e-05, "loss": 2.6166, "step": 69115 }, { "epoch": 4.696290256828373, "grad_norm": 2.9069390296936035, "learning_rate": 4.1315905693708385e-05, "loss": 2.4762, "step": 69120 }, { "epoch": 4.696629976899035, "grad_norm": 3.638021230697632, "learning_rate": 4.131165919282511e-05, "loss": 2.2734, "step": 69125 }, { "epoch": 4.696969696969697, "grad_norm": 3.3906636238098145, "learning_rate": 4.130741269194184e-05, "loss": 2.2124, "step": 69130 }, { "epoch": 4.697309417040358, "grad_norm": 2.4922547340393066, "learning_rate": 4.130316619105857e-05, "loss": 2.6836, "step": 69135 }, { "epoch": 4.6976491371110205, "grad_norm": 2.9468867778778076, "learning_rate": 4.12989196901753e-05, "loss": 2.3386, "step": 69140 }, { "epoch": 4.697988857181683, "grad_norm": 2.9049220085144043, "learning_rate": 4.1294673189292025e-05, "loss": 2.4448, "step": 69145 }, { "epoch": 4.698328577252344, "grad_norm": 2.823570489883423, "learning_rate": 4.129042668840875e-05, "loss": 2.5503, "step": 69150 }, { "epoch": 4.698668297323006, "grad_norm": 3.785767078399658, "learning_rate": 4.128618018752548e-05, "loss": 2.5654, "step": 69155 }, { "epoch": 4.699008017393668, "grad_norm": 2.880584478378296, "learning_rate": 4.128193368664221e-05, "loss": 2.5913, "step": 69160 }, { "epoch": 4.699347737464329, "grad_norm": 3.6865406036376953, "learning_rate": 4.127768718575894e-05, "loss": 2.2488, "step": 69165 }, { "epoch": 4.699687457534991, "grad_norm": 3.2273592948913574, "learning_rate": 4.1273440684875665e-05, "loss": 2.4929, "step": 69170 }, { "epoch": 4.700027177605653, "grad_norm": 3.1236672401428223, "learning_rate": 4.126919418399239e-05, "loss": 2.4075, "step": 69175 }, { "epoch": 4.700366897676314, "grad_norm": 2.5493040084838867, "learning_rate": 4.1264947683109114e-05, "loss": 2.3966, "step": 69180 }, { "epoch": 4.7007066177469765, "grad_norm": 3.090998411178589, "learning_rate": 4.126070118222585e-05, "loss": 2.4942, "step": 69185 }, { "epoch": 4.701046337817639, "grad_norm": 4.031210899353027, "learning_rate": 4.125645468134258e-05, "loss": 1.9866, "step": 69190 }, { "epoch": 4.7013860578883, "grad_norm": 2.5926027297973633, "learning_rate": 4.12522081804593e-05, "loss": 2.7398, "step": 69195 }, { "epoch": 4.701725777958962, "grad_norm": 3.3404452800750732, "learning_rate": 4.124796167957603e-05, "loss": 2.4977, "step": 69200 }, { "epoch": 4.702065498029624, "grad_norm": 3.029259204864502, "learning_rate": 4.124371517869276e-05, "loss": 2.4804, "step": 69205 }, { "epoch": 4.702405218100285, "grad_norm": 3.49753737449646, "learning_rate": 4.123946867780949e-05, "loss": 2.4106, "step": 69210 }, { "epoch": 4.702744938170947, "grad_norm": 4.101232528686523, "learning_rate": 4.123522217692621e-05, "loss": 2.196, "step": 69215 }, { "epoch": 4.703084658241609, "grad_norm": 3.940070152282715, "learning_rate": 4.1230975676042945e-05, "loss": 2.5683, "step": 69220 }, { "epoch": 4.7034243783122704, "grad_norm": 3.4612863063812256, "learning_rate": 4.122672917515967e-05, "loss": 2.6068, "step": 69225 }, { "epoch": 4.7037640983829325, "grad_norm": 2.8663265705108643, "learning_rate": 4.1222482674276395e-05, "loss": 2.6307, "step": 69230 }, { "epoch": 4.704103818453595, "grad_norm": 3.36354398727417, "learning_rate": 4.121823617339313e-05, "loss": 2.6448, "step": 69235 }, { "epoch": 4.704443538524256, "grad_norm": 2.7198634147644043, "learning_rate": 4.121398967250986e-05, "loss": 2.6509, "step": 69240 }, { "epoch": 4.704783258594918, "grad_norm": 3.7036283016204834, "learning_rate": 4.120974317162658e-05, "loss": 2.5578, "step": 69245 }, { "epoch": 4.70512297866558, "grad_norm": 3.4021923542022705, "learning_rate": 4.1205496670743307e-05, "loss": 2.6162, "step": 69250 }, { "epoch": 4.705462698736241, "grad_norm": 3.246704339981079, "learning_rate": 4.120125016986004e-05, "loss": 2.3722, "step": 69255 }, { "epoch": 4.705802418806903, "grad_norm": 3.7268314361572266, "learning_rate": 4.119700366897676e-05, "loss": 2.6544, "step": 69260 }, { "epoch": 4.706142138877565, "grad_norm": 3.4873108863830566, "learning_rate": 4.119275716809349e-05, "loss": 2.2712, "step": 69265 }, { "epoch": 4.7064818589482265, "grad_norm": 2.556934118270874, "learning_rate": 4.1188510667210225e-05, "loss": 2.509, "step": 69270 }, { "epoch": 4.7068215790188885, "grad_norm": 3.281027317047119, "learning_rate": 4.118426416632695e-05, "loss": 2.4473, "step": 69275 }, { "epoch": 4.707161299089551, "grad_norm": 2.8984897136688232, "learning_rate": 4.1180017665443675e-05, "loss": 2.3126, "step": 69280 }, { "epoch": 4.707501019160212, "grad_norm": 3.923905849456787, "learning_rate": 4.117577116456041e-05, "loss": 2.369, "step": 69285 }, { "epoch": 4.707840739230874, "grad_norm": 3.4290759563446045, "learning_rate": 4.117152466367713e-05, "loss": 2.38, "step": 69290 }, { "epoch": 4.708180459301536, "grad_norm": 3.3916876316070557, "learning_rate": 4.116727816279386e-05, "loss": 2.3756, "step": 69295 }, { "epoch": 4.708520179372197, "grad_norm": 3.108869791030884, "learning_rate": 4.116303166191059e-05, "loss": 2.7648, "step": 69300 }, { "epoch": 4.708859899442859, "grad_norm": 2.839855432510376, "learning_rate": 4.1158785161027315e-05, "loss": 2.5526, "step": 69305 }, { "epoch": 4.709199619513521, "grad_norm": 3.1831958293914795, "learning_rate": 4.115453866014404e-05, "loss": 2.2277, "step": 69310 }, { "epoch": 4.7095393395841825, "grad_norm": 2.638864755630493, "learning_rate": 4.115029215926077e-05, "loss": 2.5126, "step": 69315 }, { "epoch": 4.7098790596548445, "grad_norm": 3.588043212890625, "learning_rate": 4.11460456583775e-05, "loss": 2.4456, "step": 69320 }, { "epoch": 4.710218779725507, "grad_norm": 3.1555416584014893, "learning_rate": 4.114179915749423e-05, "loss": 2.3656, "step": 69325 }, { "epoch": 4.710558499796168, "grad_norm": 3.557088851928711, "learning_rate": 4.1137552656610955e-05, "loss": 2.3581, "step": 69330 }, { "epoch": 4.71089821986683, "grad_norm": 3.138636827468872, "learning_rate": 4.113330615572768e-05, "loss": 2.363, "step": 69335 }, { "epoch": 4.711237939937492, "grad_norm": 3.6446783542633057, "learning_rate": 4.112905965484441e-05, "loss": 2.8101, "step": 69340 }, { "epoch": 4.711577660008153, "grad_norm": 3.4162144660949707, "learning_rate": 4.112481315396114e-05, "loss": 2.563, "step": 69345 }, { "epoch": 4.711917380078815, "grad_norm": 3.2039036750793457, "learning_rate": 4.112056665307786e-05, "loss": 2.3054, "step": 69350 }, { "epoch": 4.712257100149477, "grad_norm": 3.3063735961914062, "learning_rate": 4.1116320152194595e-05, "loss": 2.1752, "step": 69355 }, { "epoch": 4.7125968202201385, "grad_norm": 2.6141879558563232, "learning_rate": 4.111207365131132e-05, "loss": 2.1378, "step": 69360 }, { "epoch": 4.712936540290801, "grad_norm": 2.291877508163452, "learning_rate": 4.1107827150428044e-05, "loss": 2.4679, "step": 69365 }, { "epoch": 4.713276260361463, "grad_norm": 3.8430256843566895, "learning_rate": 4.110358064954478e-05, "loss": 2.3686, "step": 69370 }, { "epoch": 4.713615980432124, "grad_norm": 3.7294161319732666, "learning_rate": 4.109933414866151e-05, "loss": 2.3196, "step": 69375 }, { "epoch": 4.713955700502786, "grad_norm": 2.7208259105682373, "learning_rate": 4.1095087647778235e-05, "loss": 2.4394, "step": 69380 }, { "epoch": 4.714295420573448, "grad_norm": 2.7050371170043945, "learning_rate": 4.109084114689496e-05, "loss": 2.2813, "step": 69385 }, { "epoch": 4.714635140644109, "grad_norm": 3.1665475368499756, "learning_rate": 4.108659464601169e-05, "loss": 2.3563, "step": 69390 }, { "epoch": 4.714974860714771, "grad_norm": 3.4146296977996826, "learning_rate": 4.108234814512842e-05, "loss": 2.6154, "step": 69395 }, { "epoch": 4.715314580785432, "grad_norm": 2.6998379230499268, "learning_rate": 4.107810164424514e-05, "loss": 2.5442, "step": 69400 }, { "epoch": 4.7156543008560945, "grad_norm": 4.148066520690918, "learning_rate": 4.1073855143361875e-05, "loss": 2.4712, "step": 69405 }, { "epoch": 4.715994020926757, "grad_norm": 3.9779465198516846, "learning_rate": 4.10696086424786e-05, "loss": 2.413, "step": 69410 }, { "epoch": 4.716333740997418, "grad_norm": 2.815896511077881, "learning_rate": 4.1065362141595324e-05, "loss": 2.24, "step": 69415 }, { "epoch": 4.71667346106808, "grad_norm": 3.687776803970337, "learning_rate": 4.106111564071206e-05, "loss": 2.3155, "step": 69420 }, { "epoch": 4.717013181138742, "grad_norm": 2.5740110874176025, "learning_rate": 4.105686913982879e-05, "loss": 2.5612, "step": 69425 }, { "epoch": 4.717352901209403, "grad_norm": 2.5417704582214355, "learning_rate": 4.105262263894551e-05, "loss": 2.3994, "step": 69430 }, { "epoch": 4.717692621280065, "grad_norm": 3.010171413421631, "learning_rate": 4.1048376138062236e-05, "loss": 2.5347, "step": 69435 }, { "epoch": 4.718032341350727, "grad_norm": 2.580353021621704, "learning_rate": 4.104412963717897e-05, "loss": 2.2183, "step": 69440 }, { "epoch": 4.718372061421388, "grad_norm": 3.3263661861419678, "learning_rate": 4.103988313629569e-05, "loss": 2.3864, "step": 69445 }, { "epoch": 4.7187117814920505, "grad_norm": 2.8134052753448486, "learning_rate": 4.103563663541242e-05, "loss": 2.6213, "step": 69450 }, { "epoch": 4.719051501562713, "grad_norm": 3.448103666305542, "learning_rate": 4.1031390134529155e-05, "loss": 2.1587, "step": 69455 }, { "epoch": 4.719391221633374, "grad_norm": 3.7335915565490723, "learning_rate": 4.1027143633645876e-05, "loss": 2.445, "step": 69460 }, { "epoch": 4.719730941704036, "grad_norm": 3.2303740978240967, "learning_rate": 4.1022897132762604e-05, "loss": 2.4669, "step": 69465 }, { "epoch": 4.720070661774698, "grad_norm": 3.2336695194244385, "learning_rate": 4.101865063187933e-05, "loss": 2.4953, "step": 69470 }, { "epoch": 4.720410381845359, "grad_norm": 3.049333333969116, "learning_rate": 4.101440413099606e-05, "loss": 2.4739, "step": 69475 }, { "epoch": 4.720750101916021, "grad_norm": 3.3367979526519775, "learning_rate": 4.101015763011279e-05, "loss": 2.4641, "step": 69480 }, { "epoch": 4.721089821986683, "grad_norm": 3.267653703689575, "learning_rate": 4.1005911129229516e-05, "loss": 2.1306, "step": 69485 }, { "epoch": 4.721429542057344, "grad_norm": 3.060178756713867, "learning_rate": 4.1001664628346244e-05, "loss": 2.2982, "step": 69490 }, { "epoch": 4.7217692621280065, "grad_norm": 2.9657673835754395, "learning_rate": 4.099741812746297e-05, "loss": 2.6117, "step": 69495 }, { "epoch": 4.722108982198669, "grad_norm": 3.7668049335479736, "learning_rate": 4.09931716265797e-05, "loss": 2.4961, "step": 69500 }, { "epoch": 4.72244870226933, "grad_norm": 2.7174105644226074, "learning_rate": 4.098892512569643e-05, "loss": 2.4838, "step": 69505 }, { "epoch": 4.722788422339992, "grad_norm": 3.4953179359436035, "learning_rate": 4.0984678624813156e-05, "loss": 2.5696, "step": 69510 }, { "epoch": 4.723128142410654, "grad_norm": 3.7627875804901123, "learning_rate": 4.0980432123929884e-05, "loss": 2.399, "step": 69515 }, { "epoch": 4.723467862481315, "grad_norm": 3.2080459594726562, "learning_rate": 4.097618562304661e-05, "loss": 2.364, "step": 69520 }, { "epoch": 4.723807582551977, "grad_norm": 2.890331268310547, "learning_rate": 4.097193912216334e-05, "loss": 2.3821, "step": 69525 }, { "epoch": 4.724147302622639, "grad_norm": 3.503765821456909, "learning_rate": 4.096769262128007e-05, "loss": 2.4518, "step": 69530 }, { "epoch": 4.7244870226933005, "grad_norm": 4.016111373901367, "learning_rate": 4.096344612039679e-05, "loss": 2.2206, "step": 69535 }, { "epoch": 4.7248267427639625, "grad_norm": 2.3139851093292236, "learning_rate": 4.0959199619513524e-05, "loss": 2.2388, "step": 69540 }, { "epoch": 4.725166462834625, "grad_norm": 2.9283196926116943, "learning_rate": 4.095495311863025e-05, "loss": 2.5436, "step": 69545 }, { "epoch": 4.725506182905286, "grad_norm": 3.106971025466919, "learning_rate": 4.095070661774698e-05, "loss": 2.5335, "step": 69550 }, { "epoch": 4.725845902975948, "grad_norm": 2.4433562755584717, "learning_rate": 4.094646011686371e-05, "loss": 2.2626, "step": 69555 }, { "epoch": 4.72618562304661, "grad_norm": 3.1355972290039062, "learning_rate": 4.0942213615980436e-05, "loss": 2.2768, "step": 69560 }, { "epoch": 4.726525343117271, "grad_norm": 4.004927158355713, "learning_rate": 4.0937967115097164e-05, "loss": 2.5717, "step": 69565 }, { "epoch": 4.726865063187933, "grad_norm": 3.015320062637329, "learning_rate": 4.0933720614213886e-05, "loss": 2.3747, "step": 69570 }, { "epoch": 4.727204783258595, "grad_norm": 3.3384363651275635, "learning_rate": 4.092947411333062e-05, "loss": 2.3059, "step": 69575 }, { "epoch": 4.7275445033292565, "grad_norm": 3.3784468173980713, "learning_rate": 4.092522761244735e-05, "loss": 2.4013, "step": 69580 }, { "epoch": 4.7278842233999185, "grad_norm": 3.867631196975708, "learning_rate": 4.092098111156407e-05, "loss": 2.4448, "step": 69585 }, { "epoch": 4.728223943470581, "grad_norm": 3.3851842880249023, "learning_rate": 4.0916734610680804e-05, "loss": 2.4787, "step": 69590 }, { "epoch": 4.728563663541242, "grad_norm": 4.189346790313721, "learning_rate": 4.091248810979753e-05, "loss": 2.4147, "step": 69595 }, { "epoch": 4.728903383611904, "grad_norm": 3.2084038257598877, "learning_rate": 4.0908241608914254e-05, "loss": 2.4329, "step": 69600 }, { "epoch": 4.729243103682566, "grad_norm": 3.055814743041992, "learning_rate": 4.090399510803098e-05, "loss": 2.3436, "step": 69605 }, { "epoch": 4.729582823753227, "grad_norm": 3.020764112472534, "learning_rate": 4.0899748607147716e-05, "loss": 2.7473, "step": 69610 }, { "epoch": 4.729922543823889, "grad_norm": 3.1746108531951904, "learning_rate": 4.089550210626444e-05, "loss": 2.3696, "step": 69615 }, { "epoch": 4.73026226389455, "grad_norm": 2.8113884925842285, "learning_rate": 4.0891255605381166e-05, "loss": 2.5331, "step": 69620 }, { "epoch": 4.7306019839652125, "grad_norm": 2.928041934967041, "learning_rate": 4.08870091044979e-05, "loss": 2.5589, "step": 69625 }, { "epoch": 4.7309417040358746, "grad_norm": 2.708979606628418, "learning_rate": 4.088276260361462e-05, "loss": 2.5398, "step": 69630 }, { "epoch": 4.731281424106536, "grad_norm": 3.2357332706451416, "learning_rate": 4.087851610273135e-05, "loss": 2.3043, "step": 69635 }, { "epoch": 4.731621144177198, "grad_norm": 3.612321615219116, "learning_rate": 4.087426960184808e-05, "loss": 2.6405, "step": 69640 }, { "epoch": 4.73196086424786, "grad_norm": 2.920607566833496, "learning_rate": 4.0870023100964806e-05, "loss": 2.2811, "step": 69645 }, { "epoch": 4.732300584318521, "grad_norm": 2.9495065212249756, "learning_rate": 4.0865776600081534e-05, "loss": 2.4451, "step": 69650 }, { "epoch": 4.732640304389183, "grad_norm": 3.1842169761657715, "learning_rate": 4.086153009919826e-05, "loss": 2.4763, "step": 69655 }, { "epoch": 4.732980024459845, "grad_norm": 2.8879997730255127, "learning_rate": 4.085728359831499e-05, "loss": 2.4463, "step": 69660 }, { "epoch": 4.733319744530506, "grad_norm": 3.5658462047576904, "learning_rate": 4.085303709743172e-05, "loss": 1.9838, "step": 69665 }, { "epoch": 4.7336594646011685, "grad_norm": 2.393751382827759, "learning_rate": 4.0848790596548446e-05, "loss": 2.3352, "step": 69670 }, { "epoch": 4.733999184671831, "grad_norm": 3.398136615753174, "learning_rate": 4.0844544095665174e-05, "loss": 2.5488, "step": 69675 }, { "epoch": 4.734338904742492, "grad_norm": 3.060704231262207, "learning_rate": 4.08402975947819e-05, "loss": 2.376, "step": 69680 }, { "epoch": 4.734678624813154, "grad_norm": 2.7453420162200928, "learning_rate": 4.083605109389863e-05, "loss": 2.3835, "step": 69685 }, { "epoch": 4.735018344883816, "grad_norm": 3.3224449157714844, "learning_rate": 4.083180459301536e-05, "loss": 2.7131, "step": 69690 }, { "epoch": 4.735358064954477, "grad_norm": 3.335113286972046, "learning_rate": 4.0827558092132086e-05, "loss": 2.3971, "step": 69695 }, { "epoch": 4.735697785025139, "grad_norm": 2.79286789894104, "learning_rate": 4.0823311591248814e-05, "loss": 2.0726, "step": 69700 }, { "epoch": 4.736037505095801, "grad_norm": 2.936699628829956, "learning_rate": 4.0819065090365535e-05, "loss": 2.4483, "step": 69705 }, { "epoch": 4.736377225166462, "grad_norm": 4.014225482940674, "learning_rate": 4.081481858948227e-05, "loss": 2.5723, "step": 69710 }, { "epoch": 4.7367169452371245, "grad_norm": 2.562185764312744, "learning_rate": 4.0810572088599e-05, "loss": 2.3552, "step": 69715 }, { "epoch": 4.737056665307787, "grad_norm": 3.8117918968200684, "learning_rate": 4.0806325587715726e-05, "loss": 1.9914, "step": 69720 }, { "epoch": 4.737396385378448, "grad_norm": 3.282432794570923, "learning_rate": 4.0802079086832454e-05, "loss": 2.7817, "step": 69725 }, { "epoch": 4.73773610544911, "grad_norm": 3.1937859058380127, "learning_rate": 4.079783258594918e-05, "loss": 2.3179, "step": 69730 }, { "epoch": 4.738075825519772, "grad_norm": 3.3725335597991943, "learning_rate": 4.079358608506591e-05, "loss": 2.4546, "step": 69735 }, { "epoch": 4.738415545590433, "grad_norm": 3.0755255222320557, "learning_rate": 4.078933958418263e-05, "loss": 2.6038, "step": 69740 }, { "epoch": 4.738755265661095, "grad_norm": 3.475745439529419, "learning_rate": 4.0785093083299366e-05, "loss": 2.5635, "step": 69745 }, { "epoch": 4.739094985731757, "grad_norm": 2.7630932331085205, "learning_rate": 4.0780846582416094e-05, "loss": 2.4845, "step": 69750 }, { "epoch": 4.739434705802418, "grad_norm": 3.348379135131836, "learning_rate": 4.0776600081532815e-05, "loss": 2.681, "step": 69755 }, { "epoch": 4.7397744258730805, "grad_norm": 3.5305094718933105, "learning_rate": 4.077235358064955e-05, "loss": 2.2567, "step": 69760 }, { "epoch": 4.740114145943743, "grad_norm": 2.767392158508301, "learning_rate": 4.076810707976628e-05, "loss": 2.4309, "step": 69765 }, { "epoch": 4.740453866014404, "grad_norm": 3.266184091567993, "learning_rate": 4.0763860578883e-05, "loss": 2.4577, "step": 69770 }, { "epoch": 4.740793586085066, "grad_norm": 3.7471256256103516, "learning_rate": 4.075961407799973e-05, "loss": 2.2108, "step": 69775 }, { "epoch": 4.741133306155728, "grad_norm": 4.235505104064941, "learning_rate": 4.075536757711646e-05, "loss": 2.2238, "step": 69780 }, { "epoch": 4.741473026226389, "grad_norm": 3.498302459716797, "learning_rate": 4.075112107623318e-05, "loss": 2.367, "step": 69785 }, { "epoch": 4.741812746297051, "grad_norm": 4.040854454040527, "learning_rate": 4.074687457534991e-05, "loss": 2.4076, "step": 69790 }, { "epoch": 4.742152466367713, "grad_norm": 2.995558023452759, "learning_rate": 4.0742628074466646e-05, "loss": 2.7991, "step": 69795 }, { "epoch": 4.7424921864383744, "grad_norm": 3.703064203262329, "learning_rate": 4.073838157358337e-05, "loss": 2.1984, "step": 69800 }, { "epoch": 4.7428319065090365, "grad_norm": 2.805086135864258, "learning_rate": 4.0734135072700095e-05, "loss": 2.3817, "step": 69805 }, { "epoch": 4.743171626579699, "grad_norm": 3.591111421585083, "learning_rate": 4.072988857181683e-05, "loss": 2.4771, "step": 69810 }, { "epoch": 4.74351134665036, "grad_norm": 2.613224983215332, "learning_rate": 4.072564207093355e-05, "loss": 2.0161, "step": 69815 }, { "epoch": 4.743851066721022, "grad_norm": 2.939793348312378, "learning_rate": 4.072139557005028e-05, "loss": 2.5093, "step": 69820 }, { "epoch": 4.744190786791684, "grad_norm": 3.4436919689178467, "learning_rate": 4.071714906916701e-05, "loss": 2.5179, "step": 69825 }, { "epoch": 4.744530506862345, "grad_norm": 2.9024369716644287, "learning_rate": 4.0712902568283735e-05, "loss": 2.459, "step": 69830 }, { "epoch": 4.744870226933007, "grad_norm": 3.6622354984283447, "learning_rate": 4.070865606740046e-05, "loss": 2.4491, "step": 69835 }, { "epoch": 4.745209947003669, "grad_norm": 3.2989513874053955, "learning_rate": 4.070440956651719e-05, "loss": 2.3304, "step": 69840 }, { "epoch": 4.7455496670743305, "grad_norm": 3.644350528717041, "learning_rate": 4.070016306563392e-05, "loss": 2.5579, "step": 69845 }, { "epoch": 4.7458893871449925, "grad_norm": 3.2542498111724854, "learning_rate": 4.069591656475065e-05, "loss": 2.3226, "step": 69850 }, { "epoch": 4.746229107215655, "grad_norm": 3.3795063495635986, "learning_rate": 4.0691670063867375e-05, "loss": 2.6612, "step": 69855 }, { "epoch": 4.746568827286316, "grad_norm": 3.7361743450164795, "learning_rate": 4.06874235629841e-05, "loss": 2.207, "step": 69860 }, { "epoch": 4.746908547356978, "grad_norm": 2.8854432106018066, "learning_rate": 4.068317706210083e-05, "loss": 2.6602, "step": 69865 }, { "epoch": 4.74724826742764, "grad_norm": 3.9822287559509277, "learning_rate": 4.067893056121756e-05, "loss": 2.2931, "step": 69870 }, { "epoch": 4.747587987498301, "grad_norm": 2.9642066955566406, "learning_rate": 4.067468406033428e-05, "loss": 2.3713, "step": 69875 }, { "epoch": 4.747927707568963, "grad_norm": 3.1583168506622314, "learning_rate": 4.0670437559451015e-05, "loss": 2.5359, "step": 69880 }, { "epoch": 4.748267427639625, "grad_norm": 3.1654534339904785, "learning_rate": 4.066619105856774e-05, "loss": 2.6644, "step": 69885 }, { "epoch": 4.7486071477102865, "grad_norm": 3.45699405670166, "learning_rate": 4.066194455768447e-05, "loss": 2.6545, "step": 69890 }, { "epoch": 4.7489468677809485, "grad_norm": 4.704654693603516, "learning_rate": 4.06576980568012e-05, "loss": 2.3255, "step": 69895 }, { "epoch": 4.749286587851611, "grad_norm": 3.104219436645508, "learning_rate": 4.065345155591793e-05, "loss": 2.4821, "step": 69900 }, { "epoch": 4.749626307922272, "grad_norm": 3.004847526550293, "learning_rate": 4.0649205055034655e-05, "loss": 2.3589, "step": 69905 }, { "epoch": 4.749966027992934, "grad_norm": 2.81439208984375, "learning_rate": 4.064495855415138e-05, "loss": 2.5595, "step": 69910 }, { "epoch": 4.750305748063596, "grad_norm": 2.77950119972229, "learning_rate": 4.064071205326811e-05, "loss": 2.7011, "step": 69915 }, { "epoch": 4.750645468134257, "grad_norm": 3.431058406829834, "learning_rate": 4.063646555238484e-05, "loss": 2.4798, "step": 69920 }, { "epoch": 4.750985188204919, "grad_norm": 3.2150888442993164, "learning_rate": 4.063221905150156e-05, "loss": 2.4493, "step": 69925 }, { "epoch": 4.751324908275581, "grad_norm": 3.010368824005127, "learning_rate": 4.0627972550618295e-05, "loss": 2.393, "step": 69930 }, { "epoch": 4.7516646283462425, "grad_norm": 3.8727781772613525, "learning_rate": 4.062372604973502e-05, "loss": 2.0642, "step": 69935 }, { "epoch": 4.7520043484169046, "grad_norm": 2.6951067447662354, "learning_rate": 4.0619479548851745e-05, "loss": 2.6219, "step": 69940 }, { "epoch": 4.752344068487567, "grad_norm": 3.3106513023376465, "learning_rate": 4.061523304796848e-05, "loss": 2.8467, "step": 69945 }, { "epoch": 4.752683788558228, "grad_norm": 3.098738431930542, "learning_rate": 4.061098654708521e-05, "loss": 2.3974, "step": 69950 }, { "epoch": 4.75302350862889, "grad_norm": 2.9711973667144775, "learning_rate": 4.060674004620193e-05, "loss": 2.355, "step": 69955 }, { "epoch": 4.753363228699552, "grad_norm": 3.6934990882873535, "learning_rate": 4.0602493545318657e-05, "loss": 2.5965, "step": 69960 }, { "epoch": 4.753702948770213, "grad_norm": 2.9555435180664062, "learning_rate": 4.059824704443539e-05, "loss": 2.6164, "step": 69965 }, { "epoch": 4.754042668840875, "grad_norm": 3.385366201400757, "learning_rate": 4.059400054355211e-05, "loss": 2.4905, "step": 69970 }, { "epoch": 4.754382388911537, "grad_norm": 3.087782144546509, "learning_rate": 4.058975404266884e-05, "loss": 2.0946, "step": 69975 }, { "epoch": 4.7547221089821985, "grad_norm": 4.035339832305908, "learning_rate": 4.0585507541785575e-05, "loss": 2.5316, "step": 69980 }, { "epoch": 4.755061829052861, "grad_norm": 3.266955614089966, "learning_rate": 4.05812610409023e-05, "loss": 2.3664, "step": 69985 }, { "epoch": 4.755401549123523, "grad_norm": 2.72927188873291, "learning_rate": 4.0577014540019025e-05, "loss": 2.3721, "step": 69990 }, { "epoch": 4.755741269194184, "grad_norm": 4.321168422698975, "learning_rate": 4.057276803913575e-05, "loss": 2.3158, "step": 69995 }, { "epoch": 4.756080989264846, "grad_norm": 3.5592894554138184, "learning_rate": 4.056852153825248e-05, "loss": 2.4276, "step": 70000 }, { "epoch": 4.756420709335508, "grad_norm": 3.9220728874206543, "learning_rate": 4.056427503736921e-05, "loss": 2.7984, "step": 70005 }, { "epoch": 4.756760429406169, "grad_norm": 3.0104331970214844, "learning_rate": 4.056002853648594e-05, "loss": 2.4359, "step": 70010 }, { "epoch": 4.757100149476831, "grad_norm": 3.075309991836548, "learning_rate": 4.0555782035602665e-05, "loss": 2.432, "step": 70015 }, { "epoch": 4.757439869547493, "grad_norm": 2.8837645053863525, "learning_rate": 4.055153553471939e-05, "loss": 2.5951, "step": 70020 }, { "epoch": 4.7577795896181545, "grad_norm": 3.0259170532226562, "learning_rate": 4.054728903383612e-05, "loss": 2.4319, "step": 70025 }, { "epoch": 4.758119309688817, "grad_norm": 3.4822657108306885, "learning_rate": 4.054304253295285e-05, "loss": 2.6416, "step": 70030 }, { "epoch": 4.758459029759479, "grad_norm": 3.0762040615081787, "learning_rate": 4.053879603206958e-05, "loss": 2.6106, "step": 70035 }, { "epoch": 4.75879874983014, "grad_norm": 3.1127562522888184, "learning_rate": 4.0534549531186305e-05, "loss": 2.5458, "step": 70040 }, { "epoch": 4.759138469900802, "grad_norm": 2.6301558017730713, "learning_rate": 4.053030303030303e-05, "loss": 2.647, "step": 70045 }, { "epoch": 4.759478189971464, "grad_norm": 3.04618501663208, "learning_rate": 4.052605652941976e-05, "loss": 2.0083, "step": 70050 }, { "epoch": 4.759817910042125, "grad_norm": 2.7255938053131104, "learning_rate": 4.052181002853649e-05, "loss": 2.5547, "step": 70055 }, { "epoch": 4.760157630112787, "grad_norm": 2.647374391555786, "learning_rate": 4.051756352765322e-05, "loss": 2.286, "step": 70060 }, { "epoch": 4.760497350183449, "grad_norm": 3.363973379135132, "learning_rate": 4.0513317026769945e-05, "loss": 2.4253, "step": 70065 }, { "epoch": 4.7608370702541105, "grad_norm": 3.3413243293762207, "learning_rate": 4.050907052588667e-05, "loss": 2.6253, "step": 70070 }, { "epoch": 4.761176790324773, "grad_norm": 3.853228807449341, "learning_rate": 4.05048240250034e-05, "loss": 2.3381, "step": 70075 }, { "epoch": 4.761516510395434, "grad_norm": 2.87380313873291, "learning_rate": 4.050057752412013e-05, "loss": 2.5848, "step": 70080 }, { "epoch": 4.761856230466096, "grad_norm": 3.151980400085449, "learning_rate": 4.049633102323686e-05, "loss": 2.1923, "step": 70085 }, { "epoch": 4.762195950536758, "grad_norm": 2.9912097454071045, "learning_rate": 4.0492084522353585e-05, "loss": 2.1117, "step": 70090 }, { "epoch": 4.762535670607419, "grad_norm": 3.36403226852417, "learning_rate": 4.0487838021470306e-05, "loss": 2.2427, "step": 70095 }, { "epoch": 4.762875390678081, "grad_norm": 3.0559563636779785, "learning_rate": 4.048359152058704e-05, "loss": 2.6113, "step": 70100 }, { "epoch": 4.763215110748743, "grad_norm": 2.8443245887756348, "learning_rate": 4.047934501970377e-05, "loss": 2.514, "step": 70105 }, { "epoch": 4.7635548308194045, "grad_norm": 2.414811849594116, "learning_rate": 4.047509851882049e-05, "loss": 2.0912, "step": 70110 }, { "epoch": 4.7638945508900665, "grad_norm": 2.969691038131714, "learning_rate": 4.0470852017937225e-05, "loss": 2.2888, "step": 70115 }, { "epoch": 4.764234270960729, "grad_norm": 3.4830498695373535, "learning_rate": 4.046660551705395e-05, "loss": 2.4518, "step": 70120 }, { "epoch": 4.76457399103139, "grad_norm": 2.623138427734375, "learning_rate": 4.0462359016170674e-05, "loss": 2.5357, "step": 70125 }, { "epoch": 4.764913711102052, "grad_norm": 3.599320888519287, "learning_rate": 4.04581125152874e-05, "loss": 2.3041, "step": 70130 }, { "epoch": 4.765253431172714, "grad_norm": 3.3254787921905518, "learning_rate": 4.045386601440414e-05, "loss": 2.5299, "step": 70135 }, { "epoch": 4.765593151243375, "grad_norm": 3.6085731983184814, "learning_rate": 4.044961951352086e-05, "loss": 2.4504, "step": 70140 }, { "epoch": 4.765932871314037, "grad_norm": 3.1808156967163086, "learning_rate": 4.0445373012637586e-05, "loss": 2.4296, "step": 70145 }, { "epoch": 4.766272591384699, "grad_norm": 2.9321422576904297, "learning_rate": 4.044112651175432e-05, "loss": 2.2356, "step": 70150 }, { "epoch": 4.7666123114553605, "grad_norm": 3.231283187866211, "learning_rate": 4.043688001087104e-05, "loss": 2.2611, "step": 70155 }, { "epoch": 4.7669520315260225, "grad_norm": 4.287572860717773, "learning_rate": 4.043263350998777e-05, "loss": 2.4598, "step": 70160 }, { "epoch": 4.767291751596685, "grad_norm": 2.7667741775512695, "learning_rate": 4.04283870091045e-05, "loss": 2.5328, "step": 70165 }, { "epoch": 4.767631471667346, "grad_norm": 3.6060216426849365, "learning_rate": 4.0424140508221226e-05, "loss": 2.3619, "step": 70170 }, { "epoch": 4.767971191738008, "grad_norm": 2.9043333530426025, "learning_rate": 4.0419894007337954e-05, "loss": 2.3709, "step": 70175 }, { "epoch": 4.76831091180867, "grad_norm": 3.105874538421631, "learning_rate": 4.041564750645468e-05, "loss": 2.6445, "step": 70180 }, { "epoch": 4.768650631879331, "grad_norm": 3.054882526397705, "learning_rate": 4.041140100557141e-05, "loss": 2.4882, "step": 70185 }, { "epoch": 4.768990351949993, "grad_norm": 3.2879576683044434, "learning_rate": 4.040715450468814e-05, "loss": 2.5921, "step": 70190 }, { "epoch": 4.769330072020655, "grad_norm": 3.4541893005371094, "learning_rate": 4.0402908003804866e-05, "loss": 2.4029, "step": 70195 }, { "epoch": 4.7696697920913165, "grad_norm": 2.897961378097534, "learning_rate": 4.0398661502921594e-05, "loss": 2.3844, "step": 70200 }, { "epoch": 4.7700095121619785, "grad_norm": 2.633859395980835, "learning_rate": 4.039441500203832e-05, "loss": 2.4979, "step": 70205 }, { "epoch": 4.770349232232641, "grad_norm": 2.363621711730957, "learning_rate": 4.039016850115505e-05, "loss": 2.3816, "step": 70210 }, { "epoch": 4.770688952303302, "grad_norm": 3.115675687789917, "learning_rate": 4.038592200027178e-05, "loss": 2.4439, "step": 70215 }, { "epoch": 4.771028672373964, "grad_norm": 3.6708266735076904, "learning_rate": 4.0381675499388506e-05, "loss": 2.4736, "step": 70220 }, { "epoch": 4.771368392444626, "grad_norm": 3.2437827587127686, "learning_rate": 4.0377428998505234e-05, "loss": 2.3114, "step": 70225 }, { "epoch": 4.771708112515287, "grad_norm": 2.944237232208252, "learning_rate": 4.037318249762196e-05, "loss": 2.5652, "step": 70230 }, { "epoch": 4.772047832585949, "grad_norm": 3.229235887527466, "learning_rate": 4.036893599673869e-05, "loss": 2.3216, "step": 70235 }, { "epoch": 4.772387552656611, "grad_norm": 3.238354206085205, "learning_rate": 4.036468949585542e-05, "loss": 2.29, "step": 70240 }, { "epoch": 4.7727272727272725, "grad_norm": 2.762993812561035, "learning_rate": 4.0360442994972146e-05, "loss": 2.4035, "step": 70245 }, { "epoch": 4.773066992797935, "grad_norm": 2.7573745250701904, "learning_rate": 4.0356196494088874e-05, "loss": 2.5625, "step": 70250 }, { "epoch": 4.773406712868597, "grad_norm": 2.585681676864624, "learning_rate": 4.03519499932056e-05, "loss": 2.4577, "step": 70255 }, { "epoch": 4.773746432939258, "grad_norm": 3.4591572284698486, "learning_rate": 4.034770349232233e-05, "loss": 2.6063, "step": 70260 }, { "epoch": 4.77408615300992, "grad_norm": 3.351947546005249, "learning_rate": 4.034345699143905e-05, "loss": 2.4683, "step": 70265 }, { "epoch": 4.774425873080582, "grad_norm": 3.2733664512634277, "learning_rate": 4.0339210490555786e-05, "loss": 2.44, "step": 70270 }, { "epoch": 4.774765593151243, "grad_norm": 3.1307079792022705, "learning_rate": 4.0334963989672514e-05, "loss": 2.7605, "step": 70275 }, { "epoch": 4.775105313221905, "grad_norm": 2.795793294906616, "learning_rate": 4.0330717488789236e-05, "loss": 2.2762, "step": 70280 }, { "epoch": 4.775445033292567, "grad_norm": 2.9774608612060547, "learning_rate": 4.032647098790597e-05, "loss": 2.7309, "step": 70285 }, { "epoch": 4.7757847533632285, "grad_norm": 2.998727798461914, "learning_rate": 4.03222244870227e-05, "loss": 2.4202, "step": 70290 }, { "epoch": 4.776124473433891, "grad_norm": 3.4934818744659424, "learning_rate": 4.031797798613942e-05, "loss": 2.5018, "step": 70295 }, { "epoch": 4.776464193504552, "grad_norm": 3.7096478939056396, "learning_rate": 4.0313731485256154e-05, "loss": 2.7212, "step": 70300 }, { "epoch": 4.776803913575214, "grad_norm": 3.912752151489258, "learning_rate": 4.030948498437288e-05, "loss": 2.7551, "step": 70305 }, { "epoch": 4.777143633645876, "grad_norm": 3.4363105297088623, "learning_rate": 4.0305238483489604e-05, "loss": 2.3534, "step": 70310 }, { "epoch": 4.777483353716537, "grad_norm": 3.290137529373169, "learning_rate": 4.030099198260633e-05, "loss": 2.4962, "step": 70315 }, { "epoch": 4.777823073787199, "grad_norm": 3.143083333969116, "learning_rate": 4.0296745481723066e-05, "loss": 2.5153, "step": 70320 }, { "epoch": 4.778162793857861, "grad_norm": 3.423720121383667, "learning_rate": 4.029249898083979e-05, "loss": 2.4759, "step": 70325 }, { "epoch": 4.778502513928522, "grad_norm": 3.2348170280456543, "learning_rate": 4.0288252479956516e-05, "loss": 2.4125, "step": 70330 }, { "epoch": 4.7788422339991845, "grad_norm": 3.2285664081573486, "learning_rate": 4.028400597907325e-05, "loss": 2.5027, "step": 70335 }, { "epoch": 4.779181954069847, "grad_norm": 3.1474287509918213, "learning_rate": 4.027975947818997e-05, "loss": 2.3069, "step": 70340 }, { "epoch": 4.779521674140508, "grad_norm": 3.0756194591522217, "learning_rate": 4.02755129773067e-05, "loss": 2.5037, "step": 70345 }, { "epoch": 4.77986139421117, "grad_norm": 2.964099407196045, "learning_rate": 4.027126647642343e-05, "loss": 2.1596, "step": 70350 }, { "epoch": 4.780201114281832, "grad_norm": 3.424517869949341, "learning_rate": 4.0267019975540156e-05, "loss": 2.6667, "step": 70355 }, { "epoch": 4.780540834352493, "grad_norm": 2.6787734031677246, "learning_rate": 4.0262773474656884e-05, "loss": 2.5717, "step": 70360 }, { "epoch": 4.780880554423155, "grad_norm": 4.3540802001953125, "learning_rate": 4.025852697377361e-05, "loss": 2.2539, "step": 70365 }, { "epoch": 4.781220274493817, "grad_norm": 2.733699083328247, "learning_rate": 4.025428047289034e-05, "loss": 2.6421, "step": 70370 }, { "epoch": 4.7815599945644784, "grad_norm": 3.5410900115966797, "learning_rate": 4.025003397200707e-05, "loss": 2.5793, "step": 70375 }, { "epoch": 4.7818997146351405, "grad_norm": 3.183138608932495, "learning_rate": 4.0245787471123796e-05, "loss": 2.3792, "step": 70380 }, { "epoch": 4.782239434705803, "grad_norm": 3.3334875106811523, "learning_rate": 4.0241540970240524e-05, "loss": 2.4543, "step": 70385 }, { "epoch": 4.782579154776464, "grad_norm": 3.252283811569214, "learning_rate": 4.023729446935725e-05, "loss": 2.2861, "step": 70390 }, { "epoch": 4.782918874847126, "grad_norm": 2.9345383644104004, "learning_rate": 4.023304796847398e-05, "loss": 2.3176, "step": 70395 }, { "epoch": 4.783258594917788, "grad_norm": 3.4272308349609375, "learning_rate": 4.022880146759071e-05, "loss": 2.2342, "step": 70400 }, { "epoch": 4.783598314988449, "grad_norm": 3.9654531478881836, "learning_rate": 4.0224554966707436e-05, "loss": 2.5906, "step": 70405 }, { "epoch": 4.783938035059111, "grad_norm": 2.5403096675872803, "learning_rate": 4.0220308465824164e-05, "loss": 2.5612, "step": 70410 }, { "epoch": 4.784277755129773, "grad_norm": 2.741586685180664, "learning_rate": 4.021606196494089e-05, "loss": 2.6552, "step": 70415 }, { "epoch": 4.7846174752004345, "grad_norm": 3.386653184890747, "learning_rate": 4.021181546405762e-05, "loss": 2.3672, "step": 70420 }, { "epoch": 4.7849571952710965, "grad_norm": 3.038635730743408, "learning_rate": 4.020756896317435e-05, "loss": 2.273, "step": 70425 }, { "epoch": 4.785296915341759, "grad_norm": 2.9271340370178223, "learning_rate": 4.0203322462291076e-05, "loss": 2.5555, "step": 70430 }, { "epoch": 4.78563663541242, "grad_norm": 2.783860683441162, "learning_rate": 4.0199075961407804e-05, "loss": 2.4664, "step": 70435 }, { "epoch": 4.785976355483082, "grad_norm": 3.6212682723999023, "learning_rate": 4.019482946052453e-05, "loss": 2.3466, "step": 70440 }, { "epoch": 4.786316075553744, "grad_norm": 3.6476452350616455, "learning_rate": 4.019058295964126e-05, "loss": 2.436, "step": 70445 }, { "epoch": 4.786655795624405, "grad_norm": 3.446227550506592, "learning_rate": 4.018633645875798e-05, "loss": 2.3687, "step": 70450 }, { "epoch": 4.786995515695067, "grad_norm": 2.9064409732818604, "learning_rate": 4.0182089957874716e-05, "loss": 2.2014, "step": 70455 }, { "epoch": 4.787335235765729, "grad_norm": 3.0675370693206787, "learning_rate": 4.0177843456991444e-05, "loss": 2.533, "step": 70460 }, { "epoch": 4.7876749558363905, "grad_norm": 3.533079147338867, "learning_rate": 4.0173596956108165e-05, "loss": 2.3182, "step": 70465 }, { "epoch": 4.7880146759070525, "grad_norm": 3.7540066242218018, "learning_rate": 4.01693504552249e-05, "loss": 2.5194, "step": 70470 }, { "epoch": 4.788354395977715, "grad_norm": 3.8362410068511963, "learning_rate": 4.016510395434163e-05, "loss": 2.5725, "step": 70475 }, { "epoch": 4.788694116048376, "grad_norm": 2.714214324951172, "learning_rate": 4.016085745345835e-05, "loss": 2.3175, "step": 70480 }, { "epoch": 4.789033836119038, "grad_norm": 3.3462259769439697, "learning_rate": 4.015661095257508e-05, "loss": 2.3525, "step": 70485 }, { "epoch": 4.7893735561897, "grad_norm": 3.035447359085083, "learning_rate": 4.015236445169181e-05, "loss": 2.1761, "step": 70490 }, { "epoch": 4.789713276260361, "grad_norm": 3.620682716369629, "learning_rate": 4.014811795080853e-05, "loss": 2.3223, "step": 70495 }, { "epoch": 4.790052996331023, "grad_norm": 4.756251335144043, "learning_rate": 4.014387144992526e-05, "loss": 2.7529, "step": 70500 }, { "epoch": 4.790392716401685, "grad_norm": 2.968216896057129, "learning_rate": 4.0139624949041996e-05, "loss": 2.1534, "step": 70505 }, { "epoch": 4.7907324364723465, "grad_norm": 2.466879367828369, "learning_rate": 4.013537844815872e-05, "loss": 2.4944, "step": 70510 }, { "epoch": 4.7910721565430086, "grad_norm": 2.88531756401062, "learning_rate": 4.0131131947275445e-05, "loss": 2.3905, "step": 70515 }, { "epoch": 4.791411876613671, "grad_norm": 2.650916337966919, "learning_rate": 4.012688544639217e-05, "loss": 2.7708, "step": 70520 }, { "epoch": 4.791751596684332, "grad_norm": 2.9073307514190674, "learning_rate": 4.01226389455089e-05, "loss": 2.3766, "step": 70525 }, { "epoch": 4.792091316754994, "grad_norm": 4.26728630065918, "learning_rate": 4.011839244462563e-05, "loss": 2.2638, "step": 70530 }, { "epoch": 4.792431036825656, "grad_norm": 3.3179266452789307, "learning_rate": 4.011414594374236e-05, "loss": 2.5454, "step": 70535 }, { "epoch": 4.792770756896317, "grad_norm": 2.8551828861236572, "learning_rate": 4.0109899442859085e-05, "loss": 2.2862, "step": 70540 }, { "epoch": 4.793110476966979, "grad_norm": 4.745576858520508, "learning_rate": 4.010565294197581e-05, "loss": 2.4598, "step": 70545 }, { "epoch": 4.793450197037641, "grad_norm": 4.383882522583008, "learning_rate": 4.010140644109254e-05, "loss": 2.4792, "step": 70550 }, { "epoch": 4.7937899171083025, "grad_norm": 2.8931632041931152, "learning_rate": 4.009715994020927e-05, "loss": 2.7733, "step": 70555 }, { "epoch": 4.794129637178965, "grad_norm": 3.065277338027954, "learning_rate": 4.0092913439326e-05, "loss": 2.664, "step": 70560 }, { "epoch": 4.794469357249627, "grad_norm": 3.7111518383026123, "learning_rate": 4.0088666938442725e-05, "loss": 2.4887, "step": 70565 }, { "epoch": 4.794809077320288, "grad_norm": 3.9625678062438965, "learning_rate": 4.008442043755945e-05, "loss": 2.6059, "step": 70570 }, { "epoch": 4.79514879739095, "grad_norm": 2.992283344268799, "learning_rate": 4.008017393667618e-05, "loss": 2.18, "step": 70575 }, { "epoch": 4.795488517461612, "grad_norm": 4.012279510498047, "learning_rate": 4.007592743579291e-05, "loss": 2.4052, "step": 70580 }, { "epoch": 4.795828237532273, "grad_norm": 2.8449158668518066, "learning_rate": 4.007168093490964e-05, "loss": 2.3929, "step": 70585 }, { "epoch": 4.796167957602935, "grad_norm": 3.490913152694702, "learning_rate": 4.0067434434026365e-05, "loss": 2.5505, "step": 70590 }, { "epoch": 4.796507677673597, "grad_norm": 2.835242986679077, "learning_rate": 4.006318793314309e-05, "loss": 2.4255, "step": 70595 }, { "epoch": 4.7968473977442585, "grad_norm": 3.548351287841797, "learning_rate": 4.005894143225982e-05, "loss": 2.4698, "step": 70600 }, { "epoch": 4.797187117814921, "grad_norm": 2.764467716217041, "learning_rate": 4.005469493137655e-05, "loss": 2.3594, "step": 70605 }, { "epoch": 4.797526837885583, "grad_norm": 2.9414279460906982, "learning_rate": 4.005044843049328e-05, "loss": 2.5053, "step": 70610 }, { "epoch": 4.797866557956244, "grad_norm": 3.322220802307129, "learning_rate": 4.0046201929610005e-05, "loss": 2.4628, "step": 70615 }, { "epoch": 4.798206278026906, "grad_norm": 2.982389450073242, "learning_rate": 4.0041955428726726e-05, "loss": 2.6116, "step": 70620 }, { "epoch": 4.798545998097568, "grad_norm": 3.3098554611206055, "learning_rate": 4.003770892784346e-05, "loss": 2.5795, "step": 70625 }, { "epoch": 4.798885718168229, "grad_norm": 3.094306230545044, "learning_rate": 4.003346242696019e-05, "loss": 2.3476, "step": 70630 }, { "epoch": 4.799225438238891, "grad_norm": 3.0756146907806396, "learning_rate": 4.002921592607691e-05, "loss": 2.3745, "step": 70635 }, { "epoch": 4.799565158309553, "grad_norm": 4.141104698181152, "learning_rate": 4.0024969425193645e-05, "loss": 2.57, "step": 70640 }, { "epoch": 4.7999048783802145, "grad_norm": 3.239778995513916, "learning_rate": 4.002072292431037e-05, "loss": 2.3195, "step": 70645 }, { "epoch": 4.800244598450877, "grad_norm": 3.3781912326812744, "learning_rate": 4.0016476423427095e-05, "loss": 2.3695, "step": 70650 }, { "epoch": 4.800584318521539, "grad_norm": 4.002991676330566, "learning_rate": 4.001222992254382e-05, "loss": 2.5183, "step": 70655 }, { "epoch": 4.8009240385922, "grad_norm": 3.147892475128174, "learning_rate": 4.000798342166056e-05, "loss": 2.614, "step": 70660 }, { "epoch": 4.801263758662862, "grad_norm": 2.5194337368011475, "learning_rate": 4.000373692077728e-05, "loss": 2.6618, "step": 70665 }, { "epoch": 4.801603478733524, "grad_norm": 2.8900210857391357, "learning_rate": 3.9999490419894007e-05, "loss": 2.2751, "step": 70670 }, { "epoch": 4.801943198804185, "grad_norm": 3.162088632583618, "learning_rate": 3.999524391901074e-05, "loss": 2.9138, "step": 70675 }, { "epoch": 4.802282918874847, "grad_norm": 2.7842535972595215, "learning_rate": 3.999099741812746e-05, "loss": 2.5966, "step": 70680 }, { "epoch": 4.802622638945509, "grad_norm": 2.5505106449127197, "learning_rate": 3.998675091724419e-05, "loss": 2.4988, "step": 70685 }, { "epoch": 4.8029623590161705, "grad_norm": 3.186948776245117, "learning_rate": 3.998250441636092e-05, "loss": 2.3664, "step": 70690 }, { "epoch": 4.803302079086833, "grad_norm": 3.1563689708709717, "learning_rate": 3.9978257915477647e-05, "loss": 2.503, "step": 70695 }, { "epoch": 4.803641799157495, "grad_norm": 3.7018065452575684, "learning_rate": 3.9974011414594375e-05, "loss": 2.4091, "step": 70700 }, { "epoch": 4.803981519228156, "grad_norm": 3.547368049621582, "learning_rate": 3.99697649137111e-05, "loss": 2.3281, "step": 70705 }, { "epoch": 4.804321239298818, "grad_norm": 3.4225120544433594, "learning_rate": 3.996551841282783e-05, "loss": 2.5709, "step": 70710 }, { "epoch": 4.80466095936948, "grad_norm": 3.5163395404815674, "learning_rate": 3.996127191194456e-05, "loss": 2.3888, "step": 70715 }, { "epoch": 4.805000679440141, "grad_norm": 2.2808640003204346, "learning_rate": 3.995702541106129e-05, "loss": 2.3669, "step": 70720 }, { "epoch": 4.805340399510803, "grad_norm": 2.9299702644348145, "learning_rate": 3.9952778910178015e-05, "loss": 2.3211, "step": 70725 }, { "epoch": 4.805680119581465, "grad_norm": 3.5183002948760986, "learning_rate": 3.994853240929474e-05, "loss": 2.3829, "step": 70730 }, { "epoch": 4.8060198396521265, "grad_norm": 3.0853681564331055, "learning_rate": 3.994428590841147e-05, "loss": 2.5812, "step": 70735 }, { "epoch": 4.806359559722789, "grad_norm": 3.3755362033843994, "learning_rate": 3.99400394075282e-05, "loss": 2.5821, "step": 70740 }, { "epoch": 4.806699279793451, "grad_norm": 3.042790174484253, "learning_rate": 3.993579290664493e-05, "loss": 2.5155, "step": 70745 }, { "epoch": 4.807038999864112, "grad_norm": 3.3497064113616943, "learning_rate": 3.9931546405761655e-05, "loss": 2.632, "step": 70750 }, { "epoch": 4.807378719934774, "grad_norm": 2.692807197570801, "learning_rate": 3.992729990487838e-05, "loss": 2.4595, "step": 70755 }, { "epoch": 4.807718440005435, "grad_norm": 3.626056671142578, "learning_rate": 3.992305340399511e-05, "loss": 2.3356, "step": 70760 }, { "epoch": 4.808058160076097, "grad_norm": 2.659526824951172, "learning_rate": 3.991880690311184e-05, "loss": 2.6711, "step": 70765 }, { "epoch": 4.808397880146759, "grad_norm": 4.2453837394714355, "learning_rate": 3.991456040222857e-05, "loss": 2.3918, "step": 70770 }, { "epoch": 4.8087376002174205, "grad_norm": 2.8027498722076416, "learning_rate": 3.9910313901345295e-05, "loss": 2.4851, "step": 70775 }, { "epoch": 4.8090773202880825, "grad_norm": 2.8377397060394287, "learning_rate": 3.990606740046202e-05, "loss": 2.1778, "step": 70780 }, { "epoch": 4.809417040358745, "grad_norm": 2.6618194580078125, "learning_rate": 3.990182089957875e-05, "loss": 2.4147, "step": 70785 }, { "epoch": 4.809756760429406, "grad_norm": 3.2143349647521973, "learning_rate": 3.989757439869547e-05, "loss": 2.1697, "step": 70790 }, { "epoch": 4.810096480500068, "grad_norm": 2.930896282196045, "learning_rate": 3.989332789781221e-05, "loss": 2.3875, "step": 70795 }, { "epoch": 4.81043620057073, "grad_norm": 2.8400466442108154, "learning_rate": 3.9889081396928935e-05, "loss": 2.4313, "step": 70800 }, { "epoch": 4.810775920641391, "grad_norm": 3.2106308937072754, "learning_rate": 3.9884834896045656e-05, "loss": 2.51, "step": 70805 }, { "epoch": 4.811115640712053, "grad_norm": 3.552124500274658, "learning_rate": 3.988058839516239e-05, "loss": 2.3182, "step": 70810 }, { "epoch": 4.811455360782715, "grad_norm": 3.459810256958008, "learning_rate": 3.987634189427912e-05, "loss": 2.4499, "step": 70815 }, { "epoch": 4.8117950808533765, "grad_norm": 3.595292329788208, "learning_rate": 3.987209539339584e-05, "loss": 2.3739, "step": 70820 }, { "epoch": 4.812134800924039, "grad_norm": 3.173387289047241, "learning_rate": 3.9867848892512575e-05, "loss": 2.4018, "step": 70825 }, { "epoch": 4.812474520994701, "grad_norm": 3.2400012016296387, "learning_rate": 3.98636023916293e-05, "loss": 2.5456, "step": 70830 }, { "epoch": 4.812814241065362, "grad_norm": 3.3169188499450684, "learning_rate": 3.9859355890746024e-05, "loss": 2.5584, "step": 70835 }, { "epoch": 4.813153961136024, "grad_norm": 4.179189682006836, "learning_rate": 3.985510938986275e-05, "loss": 2.6056, "step": 70840 }, { "epoch": 4.813493681206686, "grad_norm": 3.7976267337799072, "learning_rate": 3.985086288897949e-05, "loss": 2.8557, "step": 70845 }, { "epoch": 4.813833401277347, "grad_norm": 2.8231124877929688, "learning_rate": 3.984661638809621e-05, "loss": 2.4622, "step": 70850 }, { "epoch": 4.814173121348009, "grad_norm": 3.0617265701293945, "learning_rate": 3.9842369887212936e-05, "loss": 2.3974, "step": 70855 }, { "epoch": 4.814512841418671, "grad_norm": 2.394211530685425, "learning_rate": 3.983812338632967e-05, "loss": 2.5091, "step": 70860 }, { "epoch": 4.8148525614893325, "grad_norm": 3.60538387298584, "learning_rate": 3.983387688544639e-05, "loss": 2.3805, "step": 70865 }, { "epoch": 4.815192281559995, "grad_norm": 2.8658533096313477, "learning_rate": 3.982963038456312e-05, "loss": 2.2944, "step": 70870 }, { "epoch": 4.815532001630657, "grad_norm": 2.8259294033050537, "learning_rate": 3.982538388367985e-05, "loss": 2.2428, "step": 70875 }, { "epoch": 4.815871721701318, "grad_norm": 3.4411489963531494, "learning_rate": 3.9821137382796576e-05, "loss": 2.6474, "step": 70880 }, { "epoch": 4.81621144177198, "grad_norm": 2.762524366378784, "learning_rate": 3.9816890881913304e-05, "loss": 2.414, "step": 70885 }, { "epoch": 4.816551161842642, "grad_norm": 4.409611701965332, "learning_rate": 3.981264438103003e-05, "loss": 2.7128, "step": 70890 }, { "epoch": 4.816890881913303, "grad_norm": 2.931150436401367, "learning_rate": 3.980839788014676e-05, "loss": 2.4434, "step": 70895 }, { "epoch": 4.817230601983965, "grad_norm": 3.681730031967163, "learning_rate": 3.980415137926349e-05, "loss": 2.4426, "step": 70900 }, { "epoch": 4.817570322054627, "grad_norm": 3.6966161727905273, "learning_rate": 3.9799904878380216e-05, "loss": 2.6154, "step": 70905 }, { "epoch": 4.8179100421252885, "grad_norm": 3.2121200561523438, "learning_rate": 3.9795658377496944e-05, "loss": 2.7488, "step": 70910 }, { "epoch": 4.818249762195951, "grad_norm": 3.628917694091797, "learning_rate": 3.979141187661367e-05, "loss": 2.3845, "step": 70915 }, { "epoch": 4.818589482266613, "grad_norm": 2.877959728240967, "learning_rate": 3.97871653757304e-05, "loss": 2.3727, "step": 70920 }, { "epoch": 4.818929202337274, "grad_norm": 3.6434166431427, "learning_rate": 3.978291887484713e-05, "loss": 2.2994, "step": 70925 }, { "epoch": 4.819268922407936, "grad_norm": 3.0772056579589844, "learning_rate": 3.9778672373963856e-05, "loss": 2.4457, "step": 70930 }, { "epoch": 4.819608642478598, "grad_norm": 3.0426552295684814, "learning_rate": 3.9774425873080584e-05, "loss": 2.3867, "step": 70935 }, { "epoch": 4.819948362549259, "grad_norm": 3.046689987182617, "learning_rate": 3.977017937219731e-05, "loss": 2.4845, "step": 70940 }, { "epoch": 4.820288082619921, "grad_norm": 3.1287357807159424, "learning_rate": 3.976593287131404e-05, "loss": 2.6101, "step": 70945 }, { "epoch": 4.820627802690583, "grad_norm": 2.776782512664795, "learning_rate": 3.976168637043077e-05, "loss": 2.5079, "step": 70950 }, { "epoch": 4.8209675227612445, "grad_norm": 2.660174608230591, "learning_rate": 3.9757439869547496e-05, "loss": 2.4457, "step": 70955 }, { "epoch": 4.821307242831907, "grad_norm": 3.145528554916382, "learning_rate": 3.9753193368664224e-05, "loss": 2.5501, "step": 70960 }, { "epoch": 4.821646962902569, "grad_norm": 2.9366841316223145, "learning_rate": 3.974894686778095e-05, "loss": 2.7354, "step": 70965 }, { "epoch": 4.82198668297323, "grad_norm": 3.0352232456207275, "learning_rate": 3.974470036689768e-05, "loss": 2.5708, "step": 70970 }, { "epoch": 4.822326403043892, "grad_norm": 3.233825445175171, "learning_rate": 3.97404538660144e-05, "loss": 2.4869, "step": 70975 }, { "epoch": 4.822666123114553, "grad_norm": 3.319232225418091, "learning_rate": 3.9736207365131136e-05, "loss": 2.2221, "step": 70980 }, { "epoch": 4.823005843185215, "grad_norm": 3.078965663909912, "learning_rate": 3.9731960864247864e-05, "loss": 2.5216, "step": 70985 }, { "epoch": 4.823345563255877, "grad_norm": 2.954850912094116, "learning_rate": 3.9727714363364586e-05, "loss": 2.3438, "step": 70990 }, { "epoch": 4.8236852833265385, "grad_norm": 3.1367316246032715, "learning_rate": 3.972431716265797e-05, "loss": 2.2667, "step": 70995 }, { "epoch": 4.8240250033972005, "grad_norm": 3.6443867683410645, "learning_rate": 3.97200706617747e-05, "loss": 2.4832, "step": 71000 }, { "epoch": 4.824364723467863, "grad_norm": 3.357018232345581, "learning_rate": 3.971582416089143e-05, "loss": 2.4375, "step": 71005 }, { "epoch": 4.824704443538524, "grad_norm": 4.249296188354492, "learning_rate": 3.971157766000815e-05, "loss": 2.5536, "step": 71010 }, { "epoch": 4.825044163609186, "grad_norm": 3.858621835708618, "learning_rate": 3.970733115912488e-05, "loss": 2.0376, "step": 71015 }, { "epoch": 4.825383883679848, "grad_norm": 2.907456398010254, "learning_rate": 3.9703084658241615e-05, "loss": 2.2016, "step": 71020 }, { "epoch": 4.825723603750509, "grad_norm": 3.718421459197998, "learning_rate": 3.9698838157358336e-05, "loss": 2.5833, "step": 71025 }, { "epoch": 4.826063323821171, "grad_norm": 3.131028652191162, "learning_rate": 3.9694591656475064e-05, "loss": 2.6793, "step": 71030 }, { "epoch": 4.826403043891833, "grad_norm": 3.3156211376190186, "learning_rate": 3.96903451555918e-05, "loss": 2.504, "step": 71035 }, { "epoch": 4.8267427639624945, "grad_norm": 2.914747476577759, "learning_rate": 3.968609865470852e-05, "loss": 2.4687, "step": 71040 }, { "epoch": 4.8270824840331565, "grad_norm": 3.2779202461242676, "learning_rate": 3.968185215382525e-05, "loss": 2.2497, "step": 71045 }, { "epoch": 4.827422204103819, "grad_norm": 2.8194003105163574, "learning_rate": 3.9677605652941976e-05, "loss": 2.5013, "step": 71050 }, { "epoch": 4.82776192417448, "grad_norm": 3.005847692489624, "learning_rate": 3.9673359152058704e-05, "loss": 2.3118, "step": 71055 }, { "epoch": 4.828101644245142, "grad_norm": 3.177001476287842, "learning_rate": 3.966911265117543e-05, "loss": 2.3958, "step": 71060 }, { "epoch": 4.828441364315804, "grad_norm": 3.046640634536743, "learning_rate": 3.966486615029216e-05, "loss": 2.5818, "step": 71065 }, { "epoch": 4.828781084386465, "grad_norm": 3.131840467453003, "learning_rate": 3.966061964940889e-05, "loss": 2.288, "step": 71070 }, { "epoch": 4.829120804457127, "grad_norm": 3.6846466064453125, "learning_rate": 3.9656373148525616e-05, "loss": 2.3547, "step": 71075 }, { "epoch": 4.829460524527789, "grad_norm": 3.594496250152588, "learning_rate": 3.9652126647642344e-05, "loss": 2.4349, "step": 71080 }, { "epoch": 4.8298002445984505, "grad_norm": 2.9473941326141357, "learning_rate": 3.964788014675907e-05, "loss": 2.5805, "step": 71085 }, { "epoch": 4.8301399646691126, "grad_norm": 3.1036508083343506, "learning_rate": 3.96436336458758e-05, "loss": 2.6081, "step": 71090 }, { "epoch": 4.830479684739775, "grad_norm": 3.3998427391052246, "learning_rate": 3.963938714499253e-05, "loss": 2.3439, "step": 71095 }, { "epoch": 4.830819404810436, "grad_norm": 3.310682535171509, "learning_rate": 3.9635140644109256e-05, "loss": 2.4766, "step": 71100 }, { "epoch": 4.831159124881098, "grad_norm": 3.420842170715332, "learning_rate": 3.9630894143225984e-05, "loss": 2.3104, "step": 71105 }, { "epoch": 4.83149884495176, "grad_norm": 3.2130706310272217, "learning_rate": 3.962664764234271e-05, "loss": 2.6936, "step": 71110 }, { "epoch": 4.831838565022421, "grad_norm": 3.19558048248291, "learning_rate": 3.962240114145944e-05, "loss": 2.6242, "step": 71115 }, { "epoch": 4.832178285093083, "grad_norm": 2.775625228881836, "learning_rate": 3.961815464057617e-05, "loss": 2.4468, "step": 71120 }, { "epoch": 4.832518005163745, "grad_norm": 3.1663310527801514, "learning_rate": 3.9613908139692896e-05, "loss": 2.6347, "step": 71125 }, { "epoch": 4.8328577252344065, "grad_norm": 3.215313673019409, "learning_rate": 3.9609661638809624e-05, "loss": 2.2517, "step": 71130 }, { "epoch": 4.833197445305069, "grad_norm": 2.7601497173309326, "learning_rate": 3.960541513792635e-05, "loss": 2.6261, "step": 71135 }, { "epoch": 4.833537165375731, "grad_norm": 3.346247911453247, "learning_rate": 3.960116863704308e-05, "loss": 2.3001, "step": 71140 }, { "epoch": 4.833876885446392, "grad_norm": 2.8278777599334717, "learning_rate": 3.959692213615981e-05, "loss": 2.4826, "step": 71145 }, { "epoch": 4.834216605517054, "grad_norm": 2.819844961166382, "learning_rate": 3.959267563527653e-05, "loss": 2.5697, "step": 71150 }, { "epoch": 4.834556325587716, "grad_norm": 3.5526857376098633, "learning_rate": 3.9588429134393264e-05, "loss": 2.4584, "step": 71155 }, { "epoch": 4.834896045658377, "grad_norm": 3.2257680892944336, "learning_rate": 3.958418263350999e-05, "loss": 2.4689, "step": 71160 }, { "epoch": 4.835235765729039, "grad_norm": 3.499577283859253, "learning_rate": 3.9579936132626713e-05, "loss": 2.5936, "step": 71165 }, { "epoch": 4.835575485799701, "grad_norm": 2.9996039867401123, "learning_rate": 3.957568963174345e-05, "loss": 2.5143, "step": 71170 }, { "epoch": 4.8359152058703625, "grad_norm": 4.300943851470947, "learning_rate": 3.9571443130860176e-05, "loss": 2.532, "step": 71175 }, { "epoch": 4.836254925941025, "grad_norm": 2.7371387481689453, "learning_rate": 3.95671966299769e-05, "loss": 2.4135, "step": 71180 }, { "epoch": 4.836594646011687, "grad_norm": 3.691307544708252, "learning_rate": 3.956295012909363e-05, "loss": 2.5169, "step": 71185 }, { "epoch": 4.836934366082348, "grad_norm": 2.990389585494995, "learning_rate": 3.955870362821036e-05, "loss": 2.3278, "step": 71190 }, { "epoch": 4.83727408615301, "grad_norm": 3.767190456390381, "learning_rate": 3.955445712732708e-05, "loss": 2.3609, "step": 71195 }, { "epoch": 4.837613806223672, "grad_norm": 3.3206307888031006, "learning_rate": 3.955021062644381e-05, "loss": 2.4454, "step": 71200 }, { "epoch": 4.837953526294333, "grad_norm": 4.417054653167725, "learning_rate": 3.9545964125560544e-05, "loss": 2.4963, "step": 71205 }, { "epoch": 4.838293246364995, "grad_norm": 3.86053729057312, "learning_rate": 3.9541717624677265e-05, "loss": 2.5069, "step": 71210 }, { "epoch": 4.838632966435657, "grad_norm": 3.2890408039093018, "learning_rate": 3.9537471123793993e-05, "loss": 2.5629, "step": 71215 }, { "epoch": 4.8389726865063185, "grad_norm": 3.0188043117523193, "learning_rate": 3.953322462291073e-05, "loss": 2.4344, "step": 71220 }, { "epoch": 4.839312406576981, "grad_norm": 2.5845143795013428, "learning_rate": 3.952897812202745e-05, "loss": 2.547, "step": 71225 }, { "epoch": 4.839652126647643, "grad_norm": 2.5874011516571045, "learning_rate": 3.952473162114418e-05, "loss": 2.2571, "step": 71230 }, { "epoch": 4.839991846718304, "grad_norm": 3.189819574356079, "learning_rate": 3.9520485120260906e-05, "loss": 2.2419, "step": 71235 }, { "epoch": 4.840331566788966, "grad_norm": 3.345431089401245, "learning_rate": 3.9516238619377634e-05, "loss": 2.6711, "step": 71240 }, { "epoch": 4.840671286859628, "grad_norm": 3.975558280944824, "learning_rate": 3.951199211849436e-05, "loss": 2.3921, "step": 71245 }, { "epoch": 4.841011006930289, "grad_norm": 2.8243584632873535, "learning_rate": 3.950774561761109e-05, "loss": 2.4524, "step": 71250 }, { "epoch": 4.841350727000951, "grad_norm": 3.2502424716949463, "learning_rate": 3.950349911672782e-05, "loss": 2.5447, "step": 71255 }, { "epoch": 4.841690447071613, "grad_norm": 3.2035837173461914, "learning_rate": 3.9499252615844546e-05, "loss": 2.4233, "step": 71260 }, { "epoch": 4.8420301671422745, "grad_norm": 3.2577035427093506, "learning_rate": 3.9495006114961274e-05, "loss": 2.3816, "step": 71265 }, { "epoch": 4.842369887212937, "grad_norm": 2.7907943725585938, "learning_rate": 3.9490759614078e-05, "loss": 2.3659, "step": 71270 }, { "epoch": 4.842709607283599, "grad_norm": 3.233107805252075, "learning_rate": 3.948651311319473e-05, "loss": 2.4012, "step": 71275 }, { "epoch": 4.84304932735426, "grad_norm": 2.7722651958465576, "learning_rate": 3.948226661231146e-05, "loss": 2.4657, "step": 71280 }, { "epoch": 4.843389047424922, "grad_norm": 3.6448891162872314, "learning_rate": 3.9478020111428186e-05, "loss": 2.4117, "step": 71285 }, { "epoch": 4.843728767495584, "grad_norm": 3.790804862976074, "learning_rate": 3.9473773610544914e-05, "loss": 2.1857, "step": 71290 }, { "epoch": 4.844068487566245, "grad_norm": 2.7206265926361084, "learning_rate": 3.946952710966164e-05, "loss": 2.4644, "step": 71295 }, { "epoch": 4.844408207636907, "grad_norm": 2.688157081604004, "learning_rate": 3.946528060877837e-05, "loss": 2.1753, "step": 71300 }, { "epoch": 4.844747927707569, "grad_norm": 2.573228359222412, "learning_rate": 3.94610341078951e-05, "loss": 2.618, "step": 71305 }, { "epoch": 4.8450876477782305, "grad_norm": 3.586813449859619, "learning_rate": 3.9456787607011826e-05, "loss": 2.2092, "step": 71310 }, { "epoch": 4.845427367848893, "grad_norm": 3.36885666847229, "learning_rate": 3.9452541106128554e-05, "loss": 2.4541, "step": 71315 }, { "epoch": 4.845767087919555, "grad_norm": 3.370978593826294, "learning_rate": 3.944829460524528e-05, "loss": 2.7102, "step": 71320 }, { "epoch": 4.846106807990216, "grad_norm": 3.2447116374969482, "learning_rate": 3.944404810436201e-05, "loss": 2.4902, "step": 71325 }, { "epoch": 4.846446528060878, "grad_norm": 4.346563339233398, "learning_rate": 3.943980160347874e-05, "loss": 2.5478, "step": 71330 }, { "epoch": 4.84678624813154, "grad_norm": 3.6602323055267334, "learning_rate": 3.943555510259546e-05, "loss": 2.5815, "step": 71335 }, { "epoch": 4.847125968202201, "grad_norm": 3.0914306640625, "learning_rate": 3.9431308601712194e-05, "loss": 2.2528, "step": 71340 }, { "epoch": 4.847465688272863, "grad_norm": 2.915848970413208, "learning_rate": 3.942706210082892e-05, "loss": 2.4307, "step": 71345 }, { "epoch": 4.847805408343525, "grad_norm": 2.9841737747192383, "learning_rate": 3.942281559994564e-05, "loss": 2.493, "step": 71350 }, { "epoch": 4.8481451284141865, "grad_norm": 3.9592697620391846, "learning_rate": 3.941856909906238e-05, "loss": 2.7768, "step": 71355 }, { "epoch": 4.848484848484849, "grad_norm": 2.566265106201172, "learning_rate": 3.9414322598179106e-05, "loss": 2.3516, "step": 71360 }, { "epoch": 4.848824568555511, "grad_norm": 2.601961851119995, "learning_rate": 3.941007609729583e-05, "loss": 2.6229, "step": 71365 }, { "epoch": 4.849164288626172, "grad_norm": 2.846496343612671, "learning_rate": 3.9405829596412555e-05, "loss": 2.3193, "step": 71370 }, { "epoch": 4.849504008696834, "grad_norm": 2.8862497806549072, "learning_rate": 3.940158309552929e-05, "loss": 2.4968, "step": 71375 }, { "epoch": 4.849843728767496, "grad_norm": 3.9265170097351074, "learning_rate": 3.939733659464601e-05, "loss": 2.4128, "step": 71380 }, { "epoch": 4.850183448838157, "grad_norm": 3.7228453159332275, "learning_rate": 3.939309009376274e-05, "loss": 2.3059, "step": 71385 }, { "epoch": 4.850523168908819, "grad_norm": 2.8282926082611084, "learning_rate": 3.9388843592879474e-05, "loss": 2.5673, "step": 71390 }, { "epoch": 4.850862888979481, "grad_norm": 2.980600357055664, "learning_rate": 3.9384597091996195e-05, "loss": 2.3326, "step": 71395 }, { "epoch": 4.8512026090501426, "grad_norm": 2.725377321243286, "learning_rate": 3.938035059111292e-05, "loss": 2.4813, "step": 71400 }, { "epoch": 4.851542329120805, "grad_norm": 3.503309965133667, "learning_rate": 3.937610409022965e-05, "loss": 2.324, "step": 71405 }, { "epoch": 4.851882049191467, "grad_norm": 3.762023448944092, "learning_rate": 3.937185758934638e-05, "loss": 2.1917, "step": 71410 }, { "epoch": 4.852221769262128, "grad_norm": 3.163341522216797, "learning_rate": 3.936761108846311e-05, "loss": 2.4579, "step": 71415 }, { "epoch": 4.85256148933279, "grad_norm": 2.790173292160034, "learning_rate": 3.9363364587579835e-05, "loss": 2.3358, "step": 71420 }, { "epoch": 4.852901209403452, "grad_norm": 2.582202196121216, "learning_rate": 3.935911808669656e-05, "loss": 2.5032, "step": 71425 }, { "epoch": 4.853240929474113, "grad_norm": 2.7343618869781494, "learning_rate": 3.935487158581329e-05, "loss": 2.7485, "step": 71430 }, { "epoch": 4.853580649544775, "grad_norm": 3.379315137863159, "learning_rate": 3.935062508493002e-05, "loss": 2.4935, "step": 71435 }, { "epoch": 4.8539203696154365, "grad_norm": 3.3075757026672363, "learning_rate": 3.934637858404675e-05, "loss": 2.58, "step": 71440 }, { "epoch": 4.854260089686099, "grad_norm": 3.3194377422332764, "learning_rate": 3.9342132083163475e-05, "loss": 2.4134, "step": 71445 }, { "epoch": 4.854599809756761, "grad_norm": 2.8078362941741943, "learning_rate": 3.93378855822802e-05, "loss": 2.4886, "step": 71450 }, { "epoch": 4.854939529827422, "grad_norm": 2.68186354637146, "learning_rate": 3.933363908139693e-05, "loss": 2.3256, "step": 71455 }, { "epoch": 4.855279249898084, "grad_norm": 3.5903966426849365, "learning_rate": 3.932939258051366e-05, "loss": 2.3077, "step": 71460 }, { "epoch": 4.855618969968746, "grad_norm": 3.4819648265838623, "learning_rate": 3.932514607963039e-05, "loss": 2.3715, "step": 71465 }, { "epoch": 4.855958690039407, "grad_norm": 3.6827526092529297, "learning_rate": 3.9320899578747115e-05, "loss": 2.5964, "step": 71470 }, { "epoch": 4.856298410110069, "grad_norm": 3.506934404373169, "learning_rate": 3.931665307786384e-05, "loss": 2.424, "step": 71475 }, { "epoch": 4.856638130180731, "grad_norm": 2.882791042327881, "learning_rate": 3.931240657698057e-05, "loss": 2.2027, "step": 71480 }, { "epoch": 4.8569778502513925, "grad_norm": 3.1466996669769287, "learning_rate": 3.93081600760973e-05, "loss": 2.7644, "step": 71485 }, { "epoch": 4.857317570322055, "grad_norm": 3.0775210857391357, "learning_rate": 3.930391357521403e-05, "loss": 2.6028, "step": 71490 }, { "epoch": 4.857657290392717, "grad_norm": 3.064777374267578, "learning_rate": 3.9299667074330755e-05, "loss": 2.2571, "step": 71495 }, { "epoch": 4.857997010463378, "grad_norm": 2.902930974960327, "learning_rate": 3.929542057344748e-05, "loss": 2.6673, "step": 71500 }, { "epoch": 4.85833673053404, "grad_norm": 3.7035586833953857, "learning_rate": 3.9291174072564204e-05, "loss": 2.2593, "step": 71505 }, { "epoch": 4.858676450604702, "grad_norm": 2.9790396690368652, "learning_rate": 3.928692757168094e-05, "loss": 2.1324, "step": 71510 }, { "epoch": 4.859016170675363, "grad_norm": 3.5178027153015137, "learning_rate": 3.928268107079767e-05, "loss": 2.4687, "step": 71515 }, { "epoch": 4.859355890746025, "grad_norm": 2.4768197536468506, "learning_rate": 3.927843456991439e-05, "loss": 2.448, "step": 71520 }, { "epoch": 4.859695610816687, "grad_norm": 3.4015753269195557, "learning_rate": 3.927418806903112e-05, "loss": 2.6149, "step": 71525 }, { "epoch": 4.8600353308873485, "grad_norm": 3.7564172744750977, "learning_rate": 3.926994156814785e-05, "loss": 2.4162, "step": 71530 }, { "epoch": 4.860375050958011, "grad_norm": 4.04485559463501, "learning_rate": 3.926569506726457e-05, "loss": 2.4009, "step": 71535 }, { "epoch": 4.860714771028673, "grad_norm": 3.7622621059417725, "learning_rate": 3.92614485663813e-05, "loss": 2.3065, "step": 71540 }, { "epoch": 4.861054491099334, "grad_norm": 2.9536471366882324, "learning_rate": 3.9257202065498035e-05, "loss": 2.8044, "step": 71545 }, { "epoch": 4.861394211169996, "grad_norm": 2.771064043045044, "learning_rate": 3.9252955564614756e-05, "loss": 2.5865, "step": 71550 }, { "epoch": 4.861733931240658, "grad_norm": 2.974508285522461, "learning_rate": 3.9248709063731484e-05, "loss": 2.5387, "step": 71555 }, { "epoch": 4.862073651311319, "grad_norm": 3.157721996307373, "learning_rate": 3.924446256284822e-05, "loss": 2.4355, "step": 71560 }, { "epoch": 4.862413371381981, "grad_norm": 6.1958136558532715, "learning_rate": 3.924021606196494e-05, "loss": 2.2169, "step": 71565 }, { "epoch": 4.862753091452643, "grad_norm": 3.7927372455596924, "learning_rate": 3.923596956108167e-05, "loss": 2.3167, "step": 71570 }, { "epoch": 4.8630928115233045, "grad_norm": 2.932018756866455, "learning_rate": 3.9231723060198396e-05, "loss": 2.4894, "step": 71575 }, { "epoch": 4.863432531593967, "grad_norm": 3.422211170196533, "learning_rate": 3.9227476559315124e-05, "loss": 2.4363, "step": 71580 }, { "epoch": 4.863772251664629, "grad_norm": 3.289694309234619, "learning_rate": 3.922323005843185e-05, "loss": 2.7962, "step": 71585 }, { "epoch": 4.86411197173529, "grad_norm": 2.8772850036621094, "learning_rate": 3.921898355754858e-05, "loss": 2.2475, "step": 71590 }, { "epoch": 4.864451691805952, "grad_norm": 2.6412100791931152, "learning_rate": 3.921473705666531e-05, "loss": 2.6565, "step": 71595 }, { "epoch": 4.864791411876614, "grad_norm": 2.761533737182617, "learning_rate": 3.9210490555782037e-05, "loss": 2.4091, "step": 71600 }, { "epoch": 4.865131131947275, "grad_norm": 3.277470350265503, "learning_rate": 3.9206244054898765e-05, "loss": 2.5701, "step": 71605 }, { "epoch": 4.865470852017937, "grad_norm": 4.0243048667907715, "learning_rate": 3.920199755401549e-05, "loss": 2.1777, "step": 71610 }, { "epoch": 4.865810572088599, "grad_norm": 3.0723085403442383, "learning_rate": 3.919775105313222e-05, "loss": 2.4045, "step": 71615 }, { "epoch": 4.8661502921592605, "grad_norm": 3.6243245601654053, "learning_rate": 3.919350455224895e-05, "loss": 2.4025, "step": 71620 }, { "epoch": 4.866490012229923, "grad_norm": 2.9005796909332275, "learning_rate": 3.9189258051365677e-05, "loss": 2.5868, "step": 71625 }, { "epoch": 4.866829732300585, "grad_norm": 3.390838861465454, "learning_rate": 3.9185011550482405e-05, "loss": 2.3336, "step": 71630 }, { "epoch": 4.867169452371246, "grad_norm": 2.5911128520965576, "learning_rate": 3.918076504959913e-05, "loss": 2.5371, "step": 71635 }, { "epoch": 4.867509172441908, "grad_norm": 2.9299123287200928, "learning_rate": 3.917651854871586e-05, "loss": 2.3827, "step": 71640 }, { "epoch": 4.86784889251257, "grad_norm": 3.836961269378662, "learning_rate": 3.917227204783259e-05, "loss": 2.6384, "step": 71645 }, { "epoch": 4.868188612583231, "grad_norm": 2.623753309249878, "learning_rate": 3.9168025546949317e-05, "loss": 2.5472, "step": 71650 }, { "epoch": 4.868528332653893, "grad_norm": 3.040337324142456, "learning_rate": 3.9163779046066045e-05, "loss": 2.4065, "step": 71655 }, { "epoch": 4.8688680527245545, "grad_norm": 3.245337963104248, "learning_rate": 3.915953254518277e-05, "loss": 2.3976, "step": 71660 }, { "epoch": 4.8692077727952165, "grad_norm": 2.8422179222106934, "learning_rate": 3.91552860442995e-05, "loss": 2.461, "step": 71665 }, { "epoch": 4.869547492865879, "grad_norm": 2.5706443786621094, "learning_rate": 3.915103954341623e-05, "loss": 2.9224, "step": 71670 }, { "epoch": 4.86988721293654, "grad_norm": 3.3148739337921143, "learning_rate": 3.914679304253295e-05, "loss": 2.6624, "step": 71675 }, { "epoch": 4.870226933007202, "grad_norm": 3.1783676147460938, "learning_rate": 3.9142546541649685e-05, "loss": 2.5434, "step": 71680 }, { "epoch": 4.870566653077864, "grad_norm": 3.0294370651245117, "learning_rate": 3.913830004076641e-05, "loss": 2.1324, "step": 71685 }, { "epoch": 4.870906373148525, "grad_norm": 3.129209280014038, "learning_rate": 3.9134053539883134e-05, "loss": 2.3163, "step": 71690 }, { "epoch": 4.871246093219187, "grad_norm": 3.1495885848999023, "learning_rate": 3.912980703899987e-05, "loss": 2.4412, "step": 71695 }, { "epoch": 4.871585813289849, "grad_norm": 3.0768091678619385, "learning_rate": 3.91255605381166e-05, "loss": 2.4464, "step": 71700 }, { "epoch": 4.8719255333605105, "grad_norm": 2.6987175941467285, "learning_rate": 3.912131403723332e-05, "loss": 2.421, "step": 71705 }, { "epoch": 4.872265253431173, "grad_norm": 3.019805669784546, "learning_rate": 3.911706753635005e-05, "loss": 2.1614, "step": 71710 }, { "epoch": 4.872604973501835, "grad_norm": 3.4212286472320557, "learning_rate": 3.911282103546678e-05, "loss": 2.2261, "step": 71715 }, { "epoch": 4.872944693572496, "grad_norm": 3.356743574142456, "learning_rate": 3.91085745345835e-05, "loss": 2.5934, "step": 71720 }, { "epoch": 4.873284413643158, "grad_norm": 3.1260411739349365, "learning_rate": 3.910432803370023e-05, "loss": 2.5326, "step": 71725 }, { "epoch": 4.87362413371382, "grad_norm": 3.9097044467926025, "learning_rate": 3.9100081532816965e-05, "loss": 2.5635, "step": 71730 }, { "epoch": 4.873963853784481, "grad_norm": 3.5759084224700928, "learning_rate": 3.9095835031933686e-05, "loss": 2.6037, "step": 71735 }, { "epoch": 4.874303573855143, "grad_norm": 2.6737747192382812, "learning_rate": 3.9091588531050414e-05, "loss": 2.4434, "step": 71740 }, { "epoch": 4.874643293925805, "grad_norm": 3.0377702713012695, "learning_rate": 3.908734203016715e-05, "loss": 2.6914, "step": 71745 }, { "epoch": 4.8749830139964665, "grad_norm": 2.699333667755127, "learning_rate": 3.908309552928387e-05, "loss": 2.3781, "step": 71750 }, { "epoch": 4.875322734067129, "grad_norm": 3.321863889694214, "learning_rate": 3.90788490284006e-05, "loss": 2.3015, "step": 71755 }, { "epoch": 4.875662454137791, "grad_norm": 2.9182374477386475, "learning_rate": 3.9074602527517326e-05, "loss": 2.5246, "step": 71760 }, { "epoch": 4.876002174208452, "grad_norm": 2.908660650253296, "learning_rate": 3.9070356026634054e-05, "loss": 2.5555, "step": 71765 }, { "epoch": 4.876341894279114, "grad_norm": 3.1301045417785645, "learning_rate": 3.906610952575078e-05, "loss": 2.3512, "step": 71770 }, { "epoch": 4.876681614349776, "grad_norm": 3.0200603008270264, "learning_rate": 3.906186302486751e-05, "loss": 2.5263, "step": 71775 }, { "epoch": 4.877021334420437, "grad_norm": 2.807246208190918, "learning_rate": 3.905761652398424e-05, "loss": 2.5448, "step": 71780 }, { "epoch": 4.877361054491099, "grad_norm": 3.231030225753784, "learning_rate": 3.9053370023100966e-05, "loss": 2.4419, "step": 71785 }, { "epoch": 4.877700774561761, "grad_norm": 3.3371546268463135, "learning_rate": 3.9049123522217694e-05, "loss": 2.4653, "step": 71790 }, { "epoch": 4.8780404946324225, "grad_norm": 2.6647462844848633, "learning_rate": 3.904487702133442e-05, "loss": 2.2755, "step": 71795 }, { "epoch": 4.878380214703085, "grad_norm": 3.057358980178833, "learning_rate": 3.904063052045115e-05, "loss": 2.6001, "step": 71800 }, { "epoch": 4.878719934773747, "grad_norm": 3.3472836017608643, "learning_rate": 3.903638401956788e-05, "loss": 2.2479, "step": 71805 }, { "epoch": 4.879059654844408, "grad_norm": 3.950392961502075, "learning_rate": 3.9032137518684606e-05, "loss": 2.3704, "step": 71810 }, { "epoch": 4.87939937491507, "grad_norm": 3.689516544342041, "learning_rate": 3.9027891017801334e-05, "loss": 2.4323, "step": 71815 }, { "epoch": 4.879739094985732, "grad_norm": 3.62788987159729, "learning_rate": 3.902364451691806e-05, "loss": 2.4139, "step": 71820 }, { "epoch": 4.880078815056393, "grad_norm": 2.4209530353546143, "learning_rate": 3.901939801603479e-05, "loss": 2.4716, "step": 71825 }, { "epoch": 4.880418535127055, "grad_norm": 3.9533677101135254, "learning_rate": 3.901515151515152e-05, "loss": 2.3893, "step": 71830 }, { "epoch": 4.880758255197717, "grad_norm": 3.3228275775909424, "learning_rate": 3.9010905014268246e-05, "loss": 2.7032, "step": 71835 }, { "epoch": 4.8810979752683785, "grad_norm": 2.7554285526275635, "learning_rate": 3.9006658513384974e-05, "loss": 2.508, "step": 71840 }, { "epoch": 4.881437695339041, "grad_norm": 3.244328498840332, "learning_rate": 3.90024120125017e-05, "loss": 2.5205, "step": 71845 }, { "epoch": 4.881777415409703, "grad_norm": 3.8014543056488037, "learning_rate": 3.899816551161843e-05, "loss": 2.5578, "step": 71850 }, { "epoch": 4.882117135480364, "grad_norm": 2.9694936275482178, "learning_rate": 3.899391901073516e-05, "loss": 2.4258, "step": 71855 }, { "epoch": 4.882456855551026, "grad_norm": 2.8216168880462646, "learning_rate": 3.898967250985188e-05, "loss": 2.6488, "step": 71860 }, { "epoch": 4.882796575621688, "grad_norm": 3.5086300373077393, "learning_rate": 3.8985426008968614e-05, "loss": 2.7397, "step": 71865 }, { "epoch": 4.883136295692349, "grad_norm": 3.5414340496063232, "learning_rate": 3.898117950808534e-05, "loss": 2.4405, "step": 71870 }, { "epoch": 4.883476015763011, "grad_norm": 3.98144793510437, "learning_rate": 3.8976933007202063e-05, "loss": 2.3856, "step": 71875 }, { "epoch": 4.883815735833673, "grad_norm": 3.264191150665283, "learning_rate": 3.89726865063188e-05, "loss": 2.6071, "step": 71880 }, { "epoch": 4.8841554559043345, "grad_norm": 3.9354755878448486, "learning_rate": 3.8968440005435526e-05, "loss": 2.4605, "step": 71885 }, { "epoch": 4.884495175974997, "grad_norm": 3.30116868019104, "learning_rate": 3.896419350455225e-05, "loss": 2.3034, "step": 71890 }, { "epoch": 4.884834896045659, "grad_norm": 3.119917154312134, "learning_rate": 3.8959947003668975e-05, "loss": 2.4844, "step": 71895 }, { "epoch": 4.88517461611632, "grad_norm": 3.2512569427490234, "learning_rate": 3.895570050278571e-05, "loss": 2.2674, "step": 71900 }, { "epoch": 4.885514336186982, "grad_norm": 2.6916728019714355, "learning_rate": 3.895145400190243e-05, "loss": 2.5259, "step": 71905 }, { "epoch": 4.885854056257644, "grad_norm": 3.5363657474517822, "learning_rate": 3.894720750101916e-05, "loss": 2.2161, "step": 71910 }, { "epoch": 4.886193776328305, "grad_norm": 2.688563346862793, "learning_rate": 3.8942961000135894e-05, "loss": 2.4746, "step": 71915 }, { "epoch": 4.886533496398967, "grad_norm": 3.090404748916626, "learning_rate": 3.8938714499252615e-05, "loss": 2.4045, "step": 71920 }, { "epoch": 4.886873216469629, "grad_norm": 3.251281499862671, "learning_rate": 3.8934467998369343e-05, "loss": 2.2927, "step": 71925 }, { "epoch": 4.8872129365402905, "grad_norm": 3.7744452953338623, "learning_rate": 3.893022149748607e-05, "loss": 2.3533, "step": 71930 }, { "epoch": 4.887552656610953, "grad_norm": 3.2096333503723145, "learning_rate": 3.89259749966028e-05, "loss": 2.4914, "step": 71935 }, { "epoch": 4.887892376681615, "grad_norm": 3.0332300662994385, "learning_rate": 3.892172849571953e-05, "loss": 2.723, "step": 71940 }, { "epoch": 4.888232096752276, "grad_norm": 3.0953869819641113, "learning_rate": 3.8917481994836256e-05, "loss": 2.422, "step": 71945 }, { "epoch": 4.888571816822938, "grad_norm": 2.5401880741119385, "learning_rate": 3.8913235493952984e-05, "loss": 2.429, "step": 71950 }, { "epoch": 4.8889115368936, "grad_norm": 3.357330560684204, "learning_rate": 3.890898899306971e-05, "loss": 2.5558, "step": 71955 }, { "epoch": 4.889251256964261, "grad_norm": 3.10856556892395, "learning_rate": 3.890474249218644e-05, "loss": 2.2471, "step": 71960 }, { "epoch": 4.889590977034923, "grad_norm": 3.300903558731079, "learning_rate": 3.890049599130317e-05, "loss": 2.3495, "step": 71965 }, { "epoch": 4.889930697105585, "grad_norm": 3.0363903045654297, "learning_rate": 3.8896249490419896e-05, "loss": 2.5508, "step": 71970 }, { "epoch": 4.8902704171762466, "grad_norm": 2.9005208015441895, "learning_rate": 3.8892002989536624e-05, "loss": 2.5262, "step": 71975 }, { "epoch": 4.890610137246909, "grad_norm": 3.968574047088623, "learning_rate": 3.888775648865335e-05, "loss": 2.4173, "step": 71980 }, { "epoch": 4.890949857317571, "grad_norm": 3.219513416290283, "learning_rate": 3.888350998777008e-05, "loss": 2.5032, "step": 71985 }, { "epoch": 4.891289577388232, "grad_norm": 3.2981622219085693, "learning_rate": 3.887926348688681e-05, "loss": 2.5542, "step": 71990 }, { "epoch": 4.891629297458894, "grad_norm": 3.414018392562866, "learning_rate": 3.8875016986003536e-05, "loss": 2.7161, "step": 71995 }, { "epoch": 4.891969017529556, "grad_norm": 2.8131399154663086, "learning_rate": 3.8870770485120264e-05, "loss": 2.59, "step": 72000 }, { "epoch": 4.892308737600217, "grad_norm": 3.3378796577453613, "learning_rate": 3.886652398423699e-05, "loss": 2.3261, "step": 72005 }, { "epoch": 4.892648457670879, "grad_norm": 3.8170278072357178, "learning_rate": 3.886227748335372e-05, "loss": 2.2641, "step": 72010 }, { "epoch": 4.892988177741541, "grad_norm": 3.222707748413086, "learning_rate": 3.885803098247045e-05, "loss": 2.4983, "step": 72015 }, { "epoch": 4.893327897812203, "grad_norm": 3.922414779663086, "learning_rate": 3.8853784481587176e-05, "loss": 2.6619, "step": 72020 }, { "epoch": 4.893667617882865, "grad_norm": 3.1087827682495117, "learning_rate": 3.8849537980703904e-05, "loss": 2.287, "step": 72025 }, { "epoch": 4.894007337953527, "grad_norm": 3.572486639022827, "learning_rate": 3.8845291479820625e-05, "loss": 2.707, "step": 72030 }, { "epoch": 4.894347058024188, "grad_norm": 2.3850300312042236, "learning_rate": 3.884104497893736e-05, "loss": 2.2528, "step": 72035 }, { "epoch": 4.89468677809485, "grad_norm": 3.3077621459960938, "learning_rate": 3.883679847805409e-05, "loss": 2.777, "step": 72040 }, { "epoch": 4.895026498165512, "grad_norm": 3.8298988342285156, "learning_rate": 3.883255197717081e-05, "loss": 2.5299, "step": 72045 }, { "epoch": 4.895366218236173, "grad_norm": 3.26646089553833, "learning_rate": 3.8828305476287544e-05, "loss": 2.414, "step": 72050 }, { "epoch": 4.895705938306835, "grad_norm": 3.348623752593994, "learning_rate": 3.882405897540427e-05, "loss": 2.4127, "step": 72055 }, { "epoch": 4.896045658377497, "grad_norm": 3.776693820953369, "learning_rate": 3.881981247452099e-05, "loss": 2.4826, "step": 72060 }, { "epoch": 4.896385378448159, "grad_norm": 3.1195788383483887, "learning_rate": 3.881556597363772e-05, "loss": 2.6698, "step": 72065 }, { "epoch": 4.896725098518821, "grad_norm": 3.284477949142456, "learning_rate": 3.8811319472754456e-05, "loss": 2.4865, "step": 72070 }, { "epoch": 4.897064818589483, "grad_norm": 2.701524019241333, "learning_rate": 3.880707297187118e-05, "loss": 2.5349, "step": 72075 }, { "epoch": 4.897404538660144, "grad_norm": 3.5191049575805664, "learning_rate": 3.8802826470987905e-05, "loss": 2.4169, "step": 72080 }, { "epoch": 4.897744258730806, "grad_norm": 3.036339044570923, "learning_rate": 3.879857997010464e-05, "loss": 2.6182, "step": 72085 }, { "epoch": 4.898083978801468, "grad_norm": 4.124022960662842, "learning_rate": 3.879433346922136e-05, "loss": 2.4514, "step": 72090 }, { "epoch": 4.898423698872129, "grad_norm": 3.889582395553589, "learning_rate": 3.879008696833809e-05, "loss": 2.3944, "step": 72095 }, { "epoch": 4.898763418942791, "grad_norm": 2.7663028240203857, "learning_rate": 3.8785840467454824e-05, "loss": 2.421, "step": 72100 }, { "epoch": 4.899103139013453, "grad_norm": 3.0875720977783203, "learning_rate": 3.8781593966571545e-05, "loss": 2.5476, "step": 72105 }, { "epoch": 4.899442859084115, "grad_norm": 3.377880811691284, "learning_rate": 3.877734746568827e-05, "loss": 2.5717, "step": 72110 }, { "epoch": 4.899782579154777, "grad_norm": 3.4834370613098145, "learning_rate": 3.8773100964805e-05, "loss": 2.1351, "step": 72115 }, { "epoch": 4.900122299225439, "grad_norm": 3.345020294189453, "learning_rate": 3.876885446392173e-05, "loss": 2.6238, "step": 72120 }, { "epoch": 4.9004620192961, "grad_norm": 2.6524174213409424, "learning_rate": 3.876460796303846e-05, "loss": 2.2746, "step": 72125 }, { "epoch": 4.900801739366762, "grad_norm": 3.312437057495117, "learning_rate": 3.8760361462155185e-05, "loss": 2.555, "step": 72130 }, { "epoch": 4.901141459437423, "grad_norm": 2.698700428009033, "learning_rate": 3.875611496127192e-05, "loss": 2.4605, "step": 72135 }, { "epoch": 4.901481179508085, "grad_norm": 4.202654838562012, "learning_rate": 3.875186846038864e-05, "loss": 2.3836, "step": 72140 }, { "epoch": 4.901820899578747, "grad_norm": 3.2859020233154297, "learning_rate": 3.874762195950537e-05, "loss": 2.45, "step": 72145 }, { "epoch": 4.9021606196494085, "grad_norm": 2.8885741233825684, "learning_rate": 3.87433754586221e-05, "loss": 2.5175, "step": 72150 }, { "epoch": 4.902500339720071, "grad_norm": 3.0702435970306396, "learning_rate": 3.8739128957738825e-05, "loss": 2.4392, "step": 72155 }, { "epoch": 4.902840059790733, "grad_norm": 3.9185683727264404, "learning_rate": 3.873488245685555e-05, "loss": 2.4362, "step": 72160 }, { "epoch": 4.903179779861394, "grad_norm": 2.6859378814697266, "learning_rate": 3.873063595597228e-05, "loss": 2.5395, "step": 72165 }, { "epoch": 4.903519499932056, "grad_norm": 3.325078248977661, "learning_rate": 3.872638945508901e-05, "loss": 2.3312, "step": 72170 }, { "epoch": 4.903859220002718, "grad_norm": 2.867875576019287, "learning_rate": 3.872214295420574e-05, "loss": 2.535, "step": 72175 }, { "epoch": 4.904198940073379, "grad_norm": 2.537221908569336, "learning_rate": 3.8717896453322465e-05, "loss": 2.3874, "step": 72180 }, { "epoch": 4.904538660144041, "grad_norm": 3.3687551021575928, "learning_rate": 3.871364995243919e-05, "loss": 2.5446, "step": 72185 }, { "epoch": 4.904878380214703, "grad_norm": 2.872213125228882, "learning_rate": 3.870940345155592e-05, "loss": 2.8044, "step": 72190 }, { "epoch": 4.9052181002853645, "grad_norm": 3.288428544998169, "learning_rate": 3.870515695067265e-05, "loss": 2.4274, "step": 72195 }, { "epoch": 4.905557820356027, "grad_norm": 3.3879194259643555, "learning_rate": 3.870091044978938e-05, "loss": 2.449, "step": 72200 }, { "epoch": 4.905897540426689, "grad_norm": 3.555276393890381, "learning_rate": 3.8696663948906105e-05, "loss": 2.6156, "step": 72205 }, { "epoch": 4.90623726049735, "grad_norm": 3.421384572982788, "learning_rate": 3.869241744802283e-05, "loss": 2.6381, "step": 72210 }, { "epoch": 4.906576980568012, "grad_norm": 2.8961548805236816, "learning_rate": 3.8688170947139554e-05, "loss": 2.2329, "step": 72215 }, { "epoch": 4.906916700638674, "grad_norm": 3.096430540084839, "learning_rate": 3.868392444625629e-05, "loss": 2.2717, "step": 72220 }, { "epoch": 4.907256420709335, "grad_norm": 3.2381489276885986, "learning_rate": 3.867967794537302e-05, "loss": 2.4707, "step": 72225 }, { "epoch": 4.907596140779997, "grad_norm": 3.6144869327545166, "learning_rate": 3.867543144448974e-05, "loss": 2.1036, "step": 72230 }, { "epoch": 4.907935860850659, "grad_norm": 2.800633430480957, "learning_rate": 3.867118494360647e-05, "loss": 2.3799, "step": 72235 }, { "epoch": 4.9082755809213205, "grad_norm": 3.5591819286346436, "learning_rate": 3.86669384427232e-05, "loss": 2.5362, "step": 72240 }, { "epoch": 4.908615300991983, "grad_norm": 3.2159478664398193, "learning_rate": 3.866269194183992e-05, "loss": 2.4866, "step": 72245 }, { "epoch": 4.908955021062645, "grad_norm": 4.0850510597229, "learning_rate": 3.865844544095665e-05, "loss": 2.3975, "step": 72250 }, { "epoch": 4.909294741133306, "grad_norm": 4.226441383361816, "learning_rate": 3.8654198940073385e-05, "loss": 2.298, "step": 72255 }, { "epoch": 4.909634461203968, "grad_norm": 3.652209997177124, "learning_rate": 3.8649952439190106e-05, "loss": 2.7052, "step": 72260 }, { "epoch": 4.90997418127463, "grad_norm": 3.041020393371582, "learning_rate": 3.8645705938306834e-05, "loss": 2.5532, "step": 72265 }, { "epoch": 4.910313901345291, "grad_norm": 4.0095720291137695, "learning_rate": 3.864145943742357e-05, "loss": 2.3363, "step": 72270 }, { "epoch": 4.910653621415953, "grad_norm": 4.5562944412231445, "learning_rate": 3.863721293654029e-05, "loss": 2.5586, "step": 72275 }, { "epoch": 4.910993341486615, "grad_norm": 2.7598211765289307, "learning_rate": 3.863296643565702e-05, "loss": 2.2895, "step": 72280 }, { "epoch": 4.911333061557277, "grad_norm": 3.264720916748047, "learning_rate": 3.8628719934773746e-05, "loss": 2.4061, "step": 72285 }, { "epoch": 4.911672781627939, "grad_norm": 2.5492498874664307, "learning_rate": 3.8624473433890474e-05, "loss": 2.1876, "step": 72290 }, { "epoch": 4.912012501698601, "grad_norm": 3.0707550048828125, "learning_rate": 3.86202269330072e-05, "loss": 2.3369, "step": 72295 }, { "epoch": 4.912352221769262, "grad_norm": 2.515852212905884, "learning_rate": 3.861598043212393e-05, "loss": 2.3332, "step": 72300 }, { "epoch": 4.912691941839924, "grad_norm": 3.1545488834381104, "learning_rate": 3.8611733931240665e-05, "loss": 2.4627, "step": 72305 }, { "epoch": 4.913031661910586, "grad_norm": 3.711491584777832, "learning_rate": 3.8607487430357387e-05, "loss": 2.2226, "step": 72310 }, { "epoch": 4.913371381981247, "grad_norm": 3.5119736194610596, "learning_rate": 3.8603240929474115e-05, "loss": 2.2602, "step": 72315 }, { "epoch": 4.913711102051909, "grad_norm": 2.8559954166412354, "learning_rate": 3.859899442859084e-05, "loss": 2.3047, "step": 72320 }, { "epoch": 4.914050822122571, "grad_norm": 3.581500291824341, "learning_rate": 3.859474792770757e-05, "loss": 2.3701, "step": 72325 }, { "epoch": 4.914390542193233, "grad_norm": 3.00754714012146, "learning_rate": 3.85905014268243e-05, "loss": 2.3642, "step": 72330 }, { "epoch": 4.914730262263895, "grad_norm": 3.60172700881958, "learning_rate": 3.8586254925941027e-05, "loss": 2.3884, "step": 72335 }, { "epoch": 4.915069982334556, "grad_norm": 3.0407488346099854, "learning_rate": 3.8582008425057755e-05, "loss": 2.5586, "step": 72340 }, { "epoch": 4.915409702405218, "grad_norm": 4.224836826324463, "learning_rate": 3.857776192417448e-05, "loss": 2.3886, "step": 72345 }, { "epoch": 4.91574942247588, "grad_norm": 4.151589393615723, "learning_rate": 3.857351542329121e-05, "loss": 2.151, "step": 72350 }, { "epoch": 4.916089142546541, "grad_norm": 2.6095311641693115, "learning_rate": 3.856926892240794e-05, "loss": 2.362, "step": 72355 }, { "epoch": 4.916428862617203, "grad_norm": 3.2592380046844482, "learning_rate": 3.8565022421524667e-05, "loss": 2.3196, "step": 72360 }, { "epoch": 4.916768582687865, "grad_norm": 3.4899609088897705, "learning_rate": 3.8560775920641395e-05, "loss": 2.3774, "step": 72365 }, { "epoch": 4.9171083027585265, "grad_norm": 3.241758108139038, "learning_rate": 3.855652941975812e-05, "loss": 2.3543, "step": 72370 }, { "epoch": 4.917448022829189, "grad_norm": 3.135371208190918, "learning_rate": 3.855228291887485e-05, "loss": 2.4379, "step": 72375 }, { "epoch": 4.917787742899851, "grad_norm": 3.788048267364502, "learning_rate": 3.854803641799158e-05, "loss": 2.2456, "step": 72380 }, { "epoch": 4.918127462970512, "grad_norm": 4.275213718414307, "learning_rate": 3.85437899171083e-05, "loss": 2.332, "step": 72385 }, { "epoch": 4.918467183041174, "grad_norm": 3.506371259689331, "learning_rate": 3.8539543416225035e-05, "loss": 2.5219, "step": 72390 }, { "epoch": 4.918806903111836, "grad_norm": 3.6951096057891846, "learning_rate": 3.853529691534176e-05, "loss": 2.4017, "step": 72395 }, { "epoch": 4.919146623182497, "grad_norm": 2.8504674434661865, "learning_rate": 3.8531050414458484e-05, "loss": 2.592, "step": 72400 }, { "epoch": 4.919486343253159, "grad_norm": 3.0390124320983887, "learning_rate": 3.852680391357522e-05, "loss": 2.2933, "step": 72405 }, { "epoch": 4.919826063323821, "grad_norm": 4.106148719787598, "learning_rate": 3.852255741269195e-05, "loss": 2.485, "step": 72410 }, { "epoch": 4.9201657833944825, "grad_norm": 4.392902374267578, "learning_rate": 3.851831091180867e-05, "loss": 2.3288, "step": 72415 }, { "epoch": 4.920505503465145, "grad_norm": 2.606106758117676, "learning_rate": 3.8514064410925396e-05, "loss": 2.3704, "step": 72420 }, { "epoch": 4.920845223535807, "grad_norm": 3.843721389770508, "learning_rate": 3.850981791004213e-05, "loss": 2.3907, "step": 72425 }, { "epoch": 4.921184943606468, "grad_norm": 3.4733028411865234, "learning_rate": 3.850557140915885e-05, "loss": 2.344, "step": 72430 }, { "epoch": 4.92152466367713, "grad_norm": 2.6349005699157715, "learning_rate": 3.850132490827558e-05, "loss": 2.2456, "step": 72435 }, { "epoch": 4.921864383747792, "grad_norm": 2.878304958343506, "learning_rate": 3.8497078407392315e-05, "loss": 2.4569, "step": 72440 }, { "epoch": 4.922204103818453, "grad_norm": 3.8579368591308594, "learning_rate": 3.8492831906509036e-05, "loss": 2.5214, "step": 72445 }, { "epoch": 4.922543823889115, "grad_norm": 3.523291826248169, "learning_rate": 3.8488585405625764e-05, "loss": 2.3104, "step": 72450 }, { "epoch": 4.922883543959777, "grad_norm": 2.9812629222869873, "learning_rate": 3.848433890474249e-05, "loss": 2.3306, "step": 72455 }, { "epoch": 4.9232232640304385, "grad_norm": 2.259244680404663, "learning_rate": 3.848009240385922e-05, "loss": 2.4108, "step": 72460 }, { "epoch": 4.923562984101101, "grad_norm": 3.51894474029541, "learning_rate": 3.847584590297595e-05, "loss": 2.5359, "step": 72465 }, { "epoch": 4.923902704171763, "grad_norm": 2.6470885276794434, "learning_rate": 3.8471599402092676e-05, "loss": 2.6301, "step": 72470 }, { "epoch": 4.924242424242424, "grad_norm": 2.96730637550354, "learning_rate": 3.846735290120941e-05, "loss": 2.4987, "step": 72475 }, { "epoch": 4.924582144313086, "grad_norm": 2.43051815032959, "learning_rate": 3.846310640032613e-05, "loss": 2.3943, "step": 72480 }, { "epoch": 4.924921864383748, "grad_norm": 3.450852394104004, "learning_rate": 3.845885989944286e-05, "loss": 2.4053, "step": 72485 }, { "epoch": 4.925261584454409, "grad_norm": 3.1014435291290283, "learning_rate": 3.845461339855959e-05, "loss": 2.3555, "step": 72490 }, { "epoch": 4.925601304525071, "grad_norm": 4.519999980926514, "learning_rate": 3.8450366897676316e-05, "loss": 2.304, "step": 72495 }, { "epoch": 4.925941024595733, "grad_norm": 3.0503575801849365, "learning_rate": 3.8446120396793044e-05, "loss": 2.3566, "step": 72500 }, { "epoch": 4.9262807446663945, "grad_norm": 4.612180233001709, "learning_rate": 3.844187389590977e-05, "loss": 2.3279, "step": 72505 }, { "epoch": 4.926620464737057, "grad_norm": 3.5712616443634033, "learning_rate": 3.84376273950265e-05, "loss": 2.4988, "step": 72510 }, { "epoch": 4.926960184807719, "grad_norm": 3.2618491649627686, "learning_rate": 3.843338089414323e-05, "loss": 2.7345, "step": 72515 }, { "epoch": 4.92729990487838, "grad_norm": 3.2020795345306396, "learning_rate": 3.8429134393259956e-05, "loss": 2.704, "step": 72520 }, { "epoch": 4.927639624949042, "grad_norm": 3.078561544418335, "learning_rate": 3.8424887892376684e-05, "loss": 2.3956, "step": 72525 }, { "epoch": 4.927979345019704, "grad_norm": 2.880300521850586, "learning_rate": 3.842064139149341e-05, "loss": 2.3406, "step": 72530 }, { "epoch": 4.928319065090365, "grad_norm": 2.455307722091675, "learning_rate": 3.841639489061014e-05, "loss": 2.4748, "step": 72535 }, { "epoch": 4.928658785161027, "grad_norm": 3.244785785675049, "learning_rate": 3.841214838972687e-05, "loss": 2.6021, "step": 72540 }, { "epoch": 4.928998505231689, "grad_norm": 3.2774877548217773, "learning_rate": 3.8407901888843596e-05, "loss": 2.4142, "step": 72545 }, { "epoch": 4.9293382253023506, "grad_norm": 2.82808518409729, "learning_rate": 3.8403655387960324e-05, "loss": 2.3364, "step": 72550 }, { "epoch": 4.929677945373013, "grad_norm": 2.4476449489593506, "learning_rate": 3.8399408887077045e-05, "loss": 2.5175, "step": 72555 }, { "epoch": 4.930017665443675, "grad_norm": 3.2480111122131348, "learning_rate": 3.839516238619378e-05, "loss": 2.4527, "step": 72560 }, { "epoch": 4.930357385514336, "grad_norm": 3.1051712036132812, "learning_rate": 3.839091588531051e-05, "loss": 2.4999, "step": 72565 }, { "epoch": 4.930697105584998, "grad_norm": 2.931152105331421, "learning_rate": 3.838666938442723e-05, "loss": 2.5098, "step": 72570 }, { "epoch": 4.93103682565566, "grad_norm": 4.7859954833984375, "learning_rate": 3.8382422883543964e-05, "loss": 2.573, "step": 72575 }, { "epoch": 4.931376545726321, "grad_norm": 3.59993314743042, "learning_rate": 3.837817638266069e-05, "loss": 2.499, "step": 72580 }, { "epoch": 4.931716265796983, "grad_norm": 3.1109890937805176, "learning_rate": 3.837392988177741e-05, "loss": 2.5434, "step": 72585 }, { "epoch": 4.932055985867645, "grad_norm": 3.238325357437134, "learning_rate": 3.836968338089414e-05, "loss": 2.4426, "step": 72590 }, { "epoch": 4.932395705938307, "grad_norm": 3.0044972896575928, "learning_rate": 3.8365436880010876e-05, "loss": 2.4295, "step": 72595 }, { "epoch": 4.932735426008969, "grad_norm": 3.6047356128692627, "learning_rate": 3.83611903791276e-05, "loss": 2.3714, "step": 72600 }, { "epoch": 4.933075146079631, "grad_norm": 3.058682918548584, "learning_rate": 3.8356943878244325e-05, "loss": 2.8172, "step": 72605 }, { "epoch": 4.933414866150292, "grad_norm": 3.4734745025634766, "learning_rate": 3.835269737736106e-05, "loss": 2.6862, "step": 72610 }, { "epoch": 4.933754586220954, "grad_norm": 3.7251131534576416, "learning_rate": 3.834845087647778e-05, "loss": 2.5413, "step": 72615 }, { "epoch": 4.934094306291616, "grad_norm": 2.753692865371704, "learning_rate": 3.834420437559451e-05, "loss": 2.7043, "step": 72620 }, { "epoch": 4.934434026362277, "grad_norm": 2.373842477798462, "learning_rate": 3.8339957874711244e-05, "loss": 2.2317, "step": 72625 }, { "epoch": 4.934773746432939, "grad_norm": 3.00711989402771, "learning_rate": 3.8335711373827965e-05, "loss": 2.5014, "step": 72630 }, { "epoch": 4.935113466503601, "grad_norm": 2.596609354019165, "learning_rate": 3.8331464872944693e-05, "loss": 2.0874, "step": 72635 }, { "epoch": 4.935453186574263, "grad_norm": 2.8484857082366943, "learning_rate": 3.832721837206142e-05, "loss": 2.5082, "step": 72640 }, { "epoch": 4.935792906644925, "grad_norm": 2.995073080062866, "learning_rate": 3.8322971871178156e-05, "loss": 2.4121, "step": 72645 }, { "epoch": 4.936132626715587, "grad_norm": 4.107300281524658, "learning_rate": 3.831872537029488e-05, "loss": 2.4095, "step": 72650 }, { "epoch": 4.936472346786248, "grad_norm": 3.008195161819458, "learning_rate": 3.8314478869411605e-05, "loss": 2.4396, "step": 72655 }, { "epoch": 4.93681206685691, "grad_norm": 3.982243776321411, "learning_rate": 3.831023236852834e-05, "loss": 2.6851, "step": 72660 }, { "epoch": 4.937151786927572, "grad_norm": 3.1652779579162598, "learning_rate": 3.830598586764506e-05, "loss": 2.3732, "step": 72665 }, { "epoch": 4.937491506998233, "grad_norm": 2.4741969108581543, "learning_rate": 3.830173936676179e-05, "loss": 2.3649, "step": 72670 }, { "epoch": 4.937831227068895, "grad_norm": 2.8933234214782715, "learning_rate": 3.829749286587852e-05, "loss": 2.4246, "step": 72675 }, { "epoch": 4.938170947139557, "grad_norm": 2.4599087238311768, "learning_rate": 3.8293246364995246e-05, "loss": 2.3411, "step": 72680 }, { "epoch": 4.938510667210219, "grad_norm": 2.818774938583374, "learning_rate": 3.8288999864111974e-05, "loss": 2.2234, "step": 72685 }, { "epoch": 4.938850387280881, "grad_norm": 2.3306214809417725, "learning_rate": 3.82847533632287e-05, "loss": 2.4547, "step": 72690 }, { "epoch": 4.939190107351543, "grad_norm": 4.2264227867126465, "learning_rate": 3.828050686234543e-05, "loss": 2.2662, "step": 72695 }, { "epoch": 4.939529827422204, "grad_norm": 3.527714490890503, "learning_rate": 3.827626036146216e-05, "loss": 2.4883, "step": 72700 }, { "epoch": 4.939869547492866, "grad_norm": 2.9332902431488037, "learning_rate": 3.8272013860578886e-05, "loss": 2.3635, "step": 72705 }, { "epoch": 4.940209267563528, "grad_norm": 2.964832067489624, "learning_rate": 3.8267767359695614e-05, "loss": 2.3792, "step": 72710 }, { "epoch": 4.940548987634189, "grad_norm": 2.4478185176849365, "learning_rate": 3.826352085881234e-05, "loss": 2.441, "step": 72715 }, { "epoch": 4.940888707704851, "grad_norm": 3.2641990184783936, "learning_rate": 3.825927435792907e-05, "loss": 2.6418, "step": 72720 }, { "epoch": 4.941228427775513, "grad_norm": 2.7802023887634277, "learning_rate": 3.82550278570458e-05, "loss": 2.3258, "step": 72725 }, { "epoch": 4.941568147846175, "grad_norm": 4.4268388748168945, "learning_rate": 3.8250781356162526e-05, "loss": 2.2711, "step": 72730 }, { "epoch": 4.941907867916837, "grad_norm": 3.265801429748535, "learning_rate": 3.8246534855279254e-05, "loss": 2.3991, "step": 72735 }, { "epoch": 4.942247587987499, "grad_norm": 3.6636054515838623, "learning_rate": 3.8242288354395975e-05, "loss": 2.4574, "step": 72740 }, { "epoch": 4.94258730805816, "grad_norm": 2.9465396404266357, "learning_rate": 3.823804185351271e-05, "loss": 2.3963, "step": 72745 }, { "epoch": 4.942927028128822, "grad_norm": 2.5053467750549316, "learning_rate": 3.823379535262944e-05, "loss": 2.4458, "step": 72750 }, { "epoch": 4.943266748199484, "grad_norm": 4.184019565582275, "learning_rate": 3.822954885174616e-05, "loss": 2.4519, "step": 72755 }, { "epoch": 4.943606468270145, "grad_norm": 3.2458252906799316, "learning_rate": 3.8225302350862894e-05, "loss": 2.4559, "step": 72760 }, { "epoch": 4.943946188340807, "grad_norm": 3.303020715713501, "learning_rate": 3.822105584997962e-05, "loss": 2.7279, "step": 72765 }, { "epoch": 4.944285908411469, "grad_norm": 3.1568667888641357, "learning_rate": 3.821680934909634e-05, "loss": 2.3091, "step": 72770 }, { "epoch": 4.944625628482131, "grad_norm": 3.2661914825439453, "learning_rate": 3.821256284821307e-05, "loss": 2.4541, "step": 72775 }, { "epoch": 4.944965348552793, "grad_norm": 2.908738851547241, "learning_rate": 3.8208316347329806e-05, "loss": 2.6339, "step": 72780 }, { "epoch": 4.945305068623455, "grad_norm": 3.1602706909179688, "learning_rate": 3.820406984644653e-05, "loss": 2.4442, "step": 72785 }, { "epoch": 4.945644788694116, "grad_norm": 3.4587221145629883, "learning_rate": 3.8199823345563255e-05, "loss": 2.5618, "step": 72790 }, { "epoch": 4.945984508764778, "grad_norm": 2.524488687515259, "learning_rate": 3.819557684467999e-05, "loss": 2.5341, "step": 72795 }, { "epoch": 4.94632422883544, "grad_norm": 3.4709384441375732, "learning_rate": 3.819133034379671e-05, "loss": 2.6683, "step": 72800 }, { "epoch": 4.946663948906101, "grad_norm": 2.7947659492492676, "learning_rate": 3.818708384291344e-05, "loss": 2.258, "step": 72805 }, { "epoch": 4.947003668976763, "grad_norm": 3.589564085006714, "learning_rate": 3.818283734203017e-05, "loss": 2.3082, "step": 72810 }, { "epoch": 4.9473433890474245, "grad_norm": 2.754375696182251, "learning_rate": 3.81785908411469e-05, "loss": 2.63, "step": 72815 }, { "epoch": 4.947683109118087, "grad_norm": 2.7741334438323975, "learning_rate": 3.817434434026362e-05, "loss": 2.2939, "step": 72820 }, { "epoch": 4.948022829188749, "grad_norm": 3.013522148132324, "learning_rate": 3.817009783938035e-05, "loss": 2.4281, "step": 72825 }, { "epoch": 4.94836254925941, "grad_norm": 4.083354949951172, "learning_rate": 3.8165851338497086e-05, "loss": 2.4047, "step": 72830 }, { "epoch": 4.948702269330072, "grad_norm": 2.972500801086426, "learning_rate": 3.816160483761381e-05, "loss": 2.5659, "step": 72835 }, { "epoch": 4.949041989400734, "grad_norm": 2.8770034313201904, "learning_rate": 3.8157358336730535e-05, "loss": 2.4908, "step": 72840 }, { "epoch": 4.949381709471395, "grad_norm": 3.3631155490875244, "learning_rate": 3.815311183584726e-05, "loss": 2.4361, "step": 72845 }, { "epoch": 4.949721429542057, "grad_norm": 3.152278423309326, "learning_rate": 3.814886533496399e-05, "loss": 2.6029, "step": 72850 }, { "epoch": 4.950061149612719, "grad_norm": 3.040487051010132, "learning_rate": 3.814461883408072e-05, "loss": 1.8957, "step": 72855 }, { "epoch": 4.9504008696833806, "grad_norm": 3.023832321166992, "learning_rate": 3.814037233319745e-05, "loss": 2.6185, "step": 72860 }, { "epoch": 4.950740589754043, "grad_norm": 3.852159261703491, "learning_rate": 3.8136125832314175e-05, "loss": 2.6608, "step": 72865 }, { "epoch": 4.951080309824705, "grad_norm": 3.031609535217285, "learning_rate": 3.81318793314309e-05, "loss": 2.433, "step": 72870 }, { "epoch": 4.951420029895366, "grad_norm": 3.4183332920074463, "learning_rate": 3.812763283054763e-05, "loss": 2.5318, "step": 72875 }, { "epoch": 4.951759749966028, "grad_norm": 2.6584579944610596, "learning_rate": 3.812338632966436e-05, "loss": 2.4156, "step": 72880 }, { "epoch": 4.95209947003669, "grad_norm": 3.1496810913085938, "learning_rate": 3.811913982878109e-05, "loss": 2.4754, "step": 72885 }, { "epoch": 4.952439190107351, "grad_norm": 3.0789060592651367, "learning_rate": 3.8114893327897815e-05, "loss": 2.272, "step": 72890 }, { "epoch": 4.952778910178013, "grad_norm": 3.1860296726226807, "learning_rate": 3.811064682701454e-05, "loss": 2.6198, "step": 72895 }, { "epoch": 4.953118630248675, "grad_norm": 3.1714303493499756, "learning_rate": 3.810640032613127e-05, "loss": 2.0938, "step": 72900 }, { "epoch": 4.953458350319337, "grad_norm": 3.282815456390381, "learning_rate": 3.8102153825248e-05, "loss": 2.3281, "step": 72905 }, { "epoch": 4.953798070389999, "grad_norm": 2.370231866836548, "learning_rate": 3.809790732436472e-05, "loss": 2.2701, "step": 72910 }, { "epoch": 4.954137790460661, "grad_norm": 2.782066583633423, "learning_rate": 3.8093660823481455e-05, "loss": 2.511, "step": 72915 }, { "epoch": 4.954477510531322, "grad_norm": 3.8646368980407715, "learning_rate": 3.808941432259818e-05, "loss": 2.4682, "step": 72920 }, { "epoch": 4.954817230601984, "grad_norm": 3.7433621883392334, "learning_rate": 3.8085167821714904e-05, "loss": 2.6984, "step": 72925 }, { "epoch": 4.955156950672646, "grad_norm": 2.559170961380005, "learning_rate": 3.808092132083164e-05, "loss": 2.5253, "step": 72930 }, { "epoch": 4.955496670743307, "grad_norm": 4.102502346038818, "learning_rate": 3.807667481994837e-05, "loss": 2.2927, "step": 72935 }, { "epoch": 4.955836390813969, "grad_norm": 3.328193187713623, "learning_rate": 3.807242831906509e-05, "loss": 2.6632, "step": 72940 }, { "epoch": 4.956176110884631, "grad_norm": 3.183289051055908, "learning_rate": 3.8068181818181816e-05, "loss": 2.3247, "step": 72945 }, { "epoch": 4.956515830955293, "grad_norm": 3.5019352436065674, "learning_rate": 3.806393531729855e-05, "loss": 2.4989, "step": 72950 }, { "epoch": 4.956855551025955, "grad_norm": 2.926578998565674, "learning_rate": 3.805968881641527e-05, "loss": 2.297, "step": 72955 }, { "epoch": 4.957195271096617, "grad_norm": 2.544036865234375, "learning_rate": 3.8055442315532e-05, "loss": 2.6319, "step": 72960 }, { "epoch": 4.957534991167278, "grad_norm": 4.229099750518799, "learning_rate": 3.8051195814648735e-05, "loss": 2.4869, "step": 72965 }, { "epoch": 4.95787471123794, "grad_norm": 3.615849733352661, "learning_rate": 3.8046949313765456e-05, "loss": 2.6266, "step": 72970 }, { "epoch": 4.958214431308602, "grad_norm": 3.060206651687622, "learning_rate": 3.8042702812882184e-05, "loss": 2.6481, "step": 72975 }, { "epoch": 4.958554151379263, "grad_norm": 3.049607276916504, "learning_rate": 3.803845631199891e-05, "loss": 2.3609, "step": 72980 }, { "epoch": 4.958893871449925, "grad_norm": 3.1603317260742188, "learning_rate": 3.803420981111565e-05, "loss": 2.4851, "step": 72985 }, { "epoch": 4.959233591520587, "grad_norm": 3.3246116638183594, "learning_rate": 3.802996331023237e-05, "loss": 2.3401, "step": 72990 }, { "epoch": 4.959573311591249, "grad_norm": 3.2194392681121826, "learning_rate": 3.8025716809349096e-05, "loss": 2.5786, "step": 72995 }, { "epoch": 4.959913031661911, "grad_norm": 3.6686456203460693, "learning_rate": 3.802147030846583e-05, "loss": 2.5042, "step": 73000 }, { "epoch": 4.960252751732573, "grad_norm": 2.7155601978302, "learning_rate": 3.801722380758255e-05, "loss": 2.4238, "step": 73005 }, { "epoch": 4.960592471803234, "grad_norm": 2.7508742809295654, "learning_rate": 3.801297730669928e-05, "loss": 2.5731, "step": 73010 }, { "epoch": 4.960932191873896, "grad_norm": 2.605302095413208, "learning_rate": 3.800873080581601e-05, "loss": 2.2234, "step": 73015 }, { "epoch": 4.961271911944557, "grad_norm": 3.592168092727661, "learning_rate": 3.8004484304932737e-05, "loss": 2.5962, "step": 73020 }, { "epoch": 4.961611632015219, "grad_norm": 3.123763084411621, "learning_rate": 3.8000237804049465e-05, "loss": 2.3452, "step": 73025 }, { "epoch": 4.961951352085881, "grad_norm": 2.980914354324341, "learning_rate": 3.799599130316619e-05, "loss": 2.5393, "step": 73030 }, { "epoch": 4.9622910721565425, "grad_norm": 3.553145408630371, "learning_rate": 3.799174480228292e-05, "loss": 2.6472, "step": 73035 }, { "epoch": 4.962630792227205, "grad_norm": 2.7045884132385254, "learning_rate": 3.798749830139965e-05, "loss": 2.4182, "step": 73040 }, { "epoch": 4.962970512297867, "grad_norm": 3.2421627044677734, "learning_rate": 3.7983251800516377e-05, "loss": 2.5078, "step": 73045 }, { "epoch": 4.963310232368528, "grad_norm": 3.3316497802734375, "learning_rate": 3.7979005299633105e-05, "loss": 2.507, "step": 73050 }, { "epoch": 4.96364995243919, "grad_norm": 2.7784125804901123, "learning_rate": 3.797475879874983e-05, "loss": 2.4605, "step": 73055 }, { "epoch": 4.963989672509852, "grad_norm": 3.8979432582855225, "learning_rate": 3.797051229786656e-05, "loss": 2.455, "step": 73060 }, { "epoch": 4.964329392580513, "grad_norm": 2.9946632385253906, "learning_rate": 3.796626579698329e-05, "loss": 2.2246, "step": 73065 }, { "epoch": 4.964669112651175, "grad_norm": 3.096050977706909, "learning_rate": 3.7962019296100017e-05, "loss": 2.5328, "step": 73070 }, { "epoch": 4.965008832721837, "grad_norm": 2.7418720722198486, "learning_rate": 3.7957772795216745e-05, "loss": 2.3647, "step": 73075 }, { "epoch": 4.9653485527924985, "grad_norm": 3.114020824432373, "learning_rate": 3.7953526294333466e-05, "loss": 2.3025, "step": 73080 }, { "epoch": 4.965688272863161, "grad_norm": 2.8007254600524902, "learning_rate": 3.79492797934502e-05, "loss": 2.5725, "step": 73085 }, { "epoch": 4.966027992933823, "grad_norm": 2.5247552394866943, "learning_rate": 3.794503329256693e-05, "loss": 2.3198, "step": 73090 }, { "epoch": 4.966367713004484, "grad_norm": 3.3122150897979736, "learning_rate": 3.794078679168365e-05, "loss": 2.4028, "step": 73095 }, { "epoch": 4.966707433075146, "grad_norm": 3.64030122756958, "learning_rate": 3.7936540290800385e-05, "loss": 2.1304, "step": 73100 }, { "epoch": 4.967047153145808, "grad_norm": 2.4013357162475586, "learning_rate": 3.793229378991711e-05, "loss": 2.5924, "step": 73105 }, { "epoch": 4.967386873216469, "grad_norm": 3.2414169311523438, "learning_rate": 3.7928047289033834e-05, "loss": 2.4262, "step": 73110 }, { "epoch": 4.967726593287131, "grad_norm": 2.895130157470703, "learning_rate": 3.792380078815056e-05, "loss": 2.2599, "step": 73115 }, { "epoch": 4.968066313357793, "grad_norm": 2.76168155670166, "learning_rate": 3.79195542872673e-05, "loss": 2.4801, "step": 73120 }, { "epoch": 4.9684060334284545, "grad_norm": 3.3666884899139404, "learning_rate": 3.791530778638402e-05, "loss": 2.6391, "step": 73125 }, { "epoch": 4.968745753499117, "grad_norm": 2.7806460857391357, "learning_rate": 3.7911061285500746e-05, "loss": 2.3913, "step": 73130 }, { "epoch": 4.969085473569779, "grad_norm": 2.8830623626708984, "learning_rate": 3.790681478461748e-05, "loss": 2.1661, "step": 73135 }, { "epoch": 4.96942519364044, "grad_norm": 2.9241554737091064, "learning_rate": 3.79025682837342e-05, "loss": 2.5119, "step": 73140 }, { "epoch": 4.969764913711102, "grad_norm": 2.742023468017578, "learning_rate": 3.789832178285093e-05, "loss": 2.2539, "step": 73145 }, { "epoch": 4.970104633781764, "grad_norm": 3.393213987350464, "learning_rate": 3.7894075281967665e-05, "loss": 2.7717, "step": 73150 }, { "epoch": 4.970444353852425, "grad_norm": 2.945017099380493, "learning_rate": 3.788982878108439e-05, "loss": 2.4852, "step": 73155 }, { "epoch": 4.970784073923087, "grad_norm": 2.5996317863464355, "learning_rate": 3.7885582280201114e-05, "loss": 2.7102, "step": 73160 }, { "epoch": 4.971123793993749, "grad_norm": 3.407264232635498, "learning_rate": 3.788133577931784e-05, "loss": 2.644, "step": 73165 }, { "epoch": 4.971463514064411, "grad_norm": 2.971531629562378, "learning_rate": 3.787708927843458e-05, "loss": 2.5823, "step": 73170 }, { "epoch": 4.971803234135073, "grad_norm": 2.771263599395752, "learning_rate": 3.78728427775513e-05, "loss": 2.5043, "step": 73175 }, { "epoch": 4.972142954205735, "grad_norm": 2.7966463565826416, "learning_rate": 3.7868596276668026e-05, "loss": 2.429, "step": 73180 }, { "epoch": 4.972482674276396, "grad_norm": 2.673699378967285, "learning_rate": 3.786434977578476e-05, "loss": 2.4089, "step": 73185 }, { "epoch": 4.972822394347058, "grad_norm": 3.069744348526001, "learning_rate": 3.786010327490148e-05, "loss": 2.5091, "step": 73190 }, { "epoch": 4.97316211441772, "grad_norm": 3.8265552520751953, "learning_rate": 3.785585677401821e-05, "loss": 2.4116, "step": 73195 }, { "epoch": 4.973501834488381, "grad_norm": 3.1965138912200928, "learning_rate": 3.785161027313494e-05, "loss": 2.3657, "step": 73200 }, { "epoch": 4.973841554559043, "grad_norm": 3.2256786823272705, "learning_rate": 3.7847363772251666e-05, "loss": 2.7398, "step": 73205 }, { "epoch": 4.974181274629705, "grad_norm": 3.0211408138275146, "learning_rate": 3.7843117271368394e-05, "loss": 2.5888, "step": 73210 }, { "epoch": 4.974520994700367, "grad_norm": 2.9127390384674072, "learning_rate": 3.783887077048512e-05, "loss": 2.3972, "step": 73215 }, { "epoch": 4.974860714771029, "grad_norm": 2.739462375640869, "learning_rate": 3.783462426960185e-05, "loss": 2.27, "step": 73220 }, { "epoch": 4.975200434841691, "grad_norm": 3.4387383460998535, "learning_rate": 3.783037776871858e-05, "loss": 2.2764, "step": 73225 }, { "epoch": 4.975540154912352, "grad_norm": 3.1939334869384766, "learning_rate": 3.7826131267835306e-05, "loss": 2.2385, "step": 73230 }, { "epoch": 4.975879874983014, "grad_norm": 3.2723608016967773, "learning_rate": 3.7821884766952034e-05, "loss": 2.4197, "step": 73235 }, { "epoch": 4.976219595053676, "grad_norm": 2.801037311553955, "learning_rate": 3.781763826606876e-05, "loss": 2.2677, "step": 73240 }, { "epoch": 4.976559315124337, "grad_norm": 3.921353340148926, "learning_rate": 3.781339176518549e-05, "loss": 2.4662, "step": 73245 }, { "epoch": 4.976899035194999, "grad_norm": 2.7221615314483643, "learning_rate": 3.780914526430222e-05, "loss": 2.6563, "step": 73250 }, { "epoch": 4.977238755265661, "grad_norm": 2.4003381729125977, "learning_rate": 3.7804898763418946e-05, "loss": 2.5075, "step": 73255 }, { "epoch": 4.977578475336323, "grad_norm": 3.002333164215088, "learning_rate": 3.780150156271233e-05, "loss": 2.4443, "step": 73260 }, { "epoch": 4.977918195406985, "grad_norm": 2.9470298290252686, "learning_rate": 3.7797255061829057e-05, "loss": 2.7092, "step": 73265 }, { "epoch": 4.978257915477647, "grad_norm": 3.7298364639282227, "learning_rate": 3.779300856094578e-05, "loss": 2.4132, "step": 73270 }, { "epoch": 4.978597635548308, "grad_norm": 3.310478448867798, "learning_rate": 3.778876206006251e-05, "loss": 2.4777, "step": 73275 }, { "epoch": 4.97893735561897, "grad_norm": 3.031754493713379, "learning_rate": 3.778451555917924e-05, "loss": 2.4127, "step": 73280 }, { "epoch": 4.979277075689632, "grad_norm": 2.7984774112701416, "learning_rate": 3.778026905829596e-05, "loss": 2.3508, "step": 73285 }, { "epoch": 4.979616795760293, "grad_norm": 2.906275987625122, "learning_rate": 3.7776022557412697e-05, "loss": 2.2402, "step": 73290 }, { "epoch": 4.979956515830955, "grad_norm": 4.822840213775635, "learning_rate": 3.7771776056529425e-05, "loss": 2.3415, "step": 73295 }, { "epoch": 4.980296235901617, "grad_norm": 2.964461088180542, "learning_rate": 3.7767529555646146e-05, "loss": 2.1532, "step": 73300 }, { "epoch": 4.980635955972279, "grad_norm": 3.4230496883392334, "learning_rate": 3.7763283054762874e-05, "loss": 2.5161, "step": 73305 }, { "epoch": 4.980975676042941, "grad_norm": 2.8704259395599365, "learning_rate": 3.775903655387961e-05, "loss": 2.3632, "step": 73310 }, { "epoch": 4.981315396113603, "grad_norm": 3.7820804119110107, "learning_rate": 3.775479005299633e-05, "loss": 2.2923, "step": 73315 }, { "epoch": 4.981655116184264, "grad_norm": 3.2996575832366943, "learning_rate": 3.775054355211306e-05, "loss": 2.5215, "step": 73320 }, { "epoch": 4.981994836254926, "grad_norm": 2.929090738296509, "learning_rate": 3.774629705122979e-05, "loss": 2.2223, "step": 73325 }, { "epoch": 4.982334556325588, "grad_norm": 3.1081271171569824, "learning_rate": 3.7742050550346514e-05, "loss": 2.3303, "step": 73330 }, { "epoch": 4.982674276396249, "grad_norm": 3.04414701461792, "learning_rate": 3.773780404946324e-05, "loss": 2.5304, "step": 73335 }, { "epoch": 4.983013996466911, "grad_norm": 2.9444022178649902, "learning_rate": 3.773355754857997e-05, "loss": 2.5191, "step": 73340 }, { "epoch": 4.983353716537573, "grad_norm": 3.0292298793792725, "learning_rate": 3.77293110476967e-05, "loss": 2.272, "step": 73345 }, { "epoch": 4.983693436608235, "grad_norm": 3.709787368774414, "learning_rate": 3.7725064546813426e-05, "loss": 2.3546, "step": 73350 }, { "epoch": 4.984033156678897, "grad_norm": 2.5775604248046875, "learning_rate": 3.7720818045930154e-05, "loss": 2.2498, "step": 73355 }, { "epoch": 4.984372876749559, "grad_norm": 3.4877123832702637, "learning_rate": 3.771657154504689e-05, "loss": 2.2571, "step": 73360 }, { "epoch": 4.98471259682022, "grad_norm": 3.0560333728790283, "learning_rate": 3.771232504416361e-05, "loss": 2.6684, "step": 73365 }, { "epoch": 4.985052316890882, "grad_norm": 2.6818885803222656, "learning_rate": 3.770807854328034e-05, "loss": 2.6011, "step": 73370 }, { "epoch": 4.985392036961544, "grad_norm": 3.46453595161438, "learning_rate": 3.7703832042397066e-05, "loss": 2.4251, "step": 73375 }, { "epoch": 4.985731757032205, "grad_norm": 2.9994144439697266, "learning_rate": 3.7699585541513794e-05, "loss": 2.4242, "step": 73380 }, { "epoch": 4.986071477102867, "grad_norm": 3.586453437805176, "learning_rate": 3.769533904063052e-05, "loss": 2.2893, "step": 73385 }, { "epoch": 4.986411197173529, "grad_norm": 2.9066526889801025, "learning_rate": 3.769109253974725e-05, "loss": 2.4045, "step": 73390 }, { "epoch": 4.986750917244191, "grad_norm": 3.2084319591522217, "learning_rate": 3.768684603886398e-05, "loss": 2.3899, "step": 73395 }, { "epoch": 4.987090637314853, "grad_norm": 2.9623594284057617, "learning_rate": 3.7682599537980706e-05, "loss": 2.2885, "step": 73400 }, { "epoch": 4.987430357385515, "grad_norm": 2.8885133266448975, "learning_rate": 3.7678353037097434e-05, "loss": 2.5348, "step": 73405 }, { "epoch": 4.987770077456176, "grad_norm": 3.1090800762176514, "learning_rate": 3.767410653621416e-05, "loss": 2.3591, "step": 73410 }, { "epoch": 4.988109797526838, "grad_norm": 3.0906615257263184, "learning_rate": 3.766986003533089e-05, "loss": 2.3177, "step": 73415 }, { "epoch": 4.9884495175975, "grad_norm": 3.1219654083251953, "learning_rate": 3.766561353444762e-05, "loss": 2.4195, "step": 73420 }, { "epoch": 4.988789237668161, "grad_norm": 3.294834852218628, "learning_rate": 3.7661367033564346e-05, "loss": 2.5511, "step": 73425 }, { "epoch": 4.989128957738823, "grad_norm": 3.5268044471740723, "learning_rate": 3.7657120532681074e-05, "loss": 2.5275, "step": 73430 }, { "epoch": 4.989468677809485, "grad_norm": 3.5244805812835693, "learning_rate": 3.76528740317978e-05, "loss": 2.6123, "step": 73435 }, { "epoch": 4.989808397880147, "grad_norm": 2.71136212348938, "learning_rate": 3.764862753091452e-05, "loss": 2.5689, "step": 73440 }, { "epoch": 4.990148117950809, "grad_norm": 3.2293050289154053, "learning_rate": 3.764438103003126e-05, "loss": 2.4335, "step": 73445 }, { "epoch": 4.990487838021471, "grad_norm": 2.302220582962036, "learning_rate": 3.7640134529147986e-05, "loss": 2.1916, "step": 73450 }, { "epoch": 4.990827558092132, "grad_norm": 2.5693888664245605, "learning_rate": 3.763588802826471e-05, "loss": 2.4186, "step": 73455 }, { "epoch": 4.991167278162794, "grad_norm": 2.487398386001587, "learning_rate": 3.763164152738144e-05, "loss": 2.586, "step": 73460 }, { "epoch": 4.991506998233456, "grad_norm": 3.373368978500366, "learning_rate": 3.762739502649817e-05, "loss": 2.2268, "step": 73465 }, { "epoch": 4.991846718304117, "grad_norm": 3.4403347969055176, "learning_rate": 3.762314852561489e-05, "loss": 2.4227, "step": 73470 }, { "epoch": 4.992186438374779, "grad_norm": 3.341198444366455, "learning_rate": 3.761890202473162e-05, "loss": 2.4474, "step": 73475 }, { "epoch": 4.9925261584454415, "grad_norm": 2.6883492469787598, "learning_rate": 3.7614655523848354e-05, "loss": 2.6265, "step": 73480 }, { "epoch": 4.992865878516103, "grad_norm": 2.742953300476074, "learning_rate": 3.7610409022965075e-05, "loss": 2.352, "step": 73485 }, { "epoch": 4.993205598586765, "grad_norm": 4.274167060852051, "learning_rate": 3.76061625220818e-05, "loss": 2.6575, "step": 73490 }, { "epoch": 4.993545318657426, "grad_norm": 2.9771158695220947, "learning_rate": 3.760191602119854e-05, "loss": 2.4192, "step": 73495 }, { "epoch": 4.993885038728088, "grad_norm": 2.9404919147491455, "learning_rate": 3.759766952031526e-05, "loss": 2.4598, "step": 73500 }, { "epoch": 4.99422475879875, "grad_norm": 3.234083414077759, "learning_rate": 3.759342301943199e-05, "loss": 2.3803, "step": 73505 }, { "epoch": 4.994564478869411, "grad_norm": 3.2714948654174805, "learning_rate": 3.758917651854872e-05, "loss": 2.6354, "step": 73510 }, { "epoch": 4.994904198940073, "grad_norm": 3.6649396419525146, "learning_rate": 3.758493001766544e-05, "loss": 2.5608, "step": 73515 }, { "epoch": 4.995243919010735, "grad_norm": 3.7443716526031494, "learning_rate": 3.758068351678217e-05, "loss": 2.3982, "step": 73520 }, { "epoch": 4.995583639081397, "grad_norm": 3.2477846145629883, "learning_rate": 3.75764370158989e-05, "loss": 2.2914, "step": 73525 }, { "epoch": 4.995923359152059, "grad_norm": 3.1969451904296875, "learning_rate": 3.7572190515015634e-05, "loss": 2.5222, "step": 73530 }, { "epoch": 4.996263079222721, "grad_norm": 3.3268930912017822, "learning_rate": 3.7567944014132355e-05, "loss": 2.4861, "step": 73535 }, { "epoch": 4.996602799293382, "grad_norm": 3.191863536834717, "learning_rate": 3.756369751324908e-05, "loss": 2.2871, "step": 73540 }, { "epoch": 4.996942519364044, "grad_norm": 3.4961318969726562, "learning_rate": 3.755945101236582e-05, "loss": 2.5259, "step": 73545 }, { "epoch": 4.997282239434706, "grad_norm": 3.012369394302368, "learning_rate": 3.755520451148254e-05, "loss": 2.4881, "step": 73550 }, { "epoch": 4.997621959505367, "grad_norm": 3.072467565536499, "learning_rate": 3.755095801059927e-05, "loss": 2.4232, "step": 73555 }, { "epoch": 4.997961679576029, "grad_norm": 3.0009782314300537, "learning_rate": 3.7546711509715995e-05, "loss": 2.5091, "step": 73560 }, { "epoch": 4.998301399646691, "grad_norm": 3.376002311706543, "learning_rate": 3.7542465008832723e-05, "loss": 2.2795, "step": 73565 }, { "epoch": 4.998641119717353, "grad_norm": 2.9247961044311523, "learning_rate": 3.753821850794945e-05, "loss": 2.4923, "step": 73570 }, { "epoch": 4.998980839788015, "grad_norm": 2.5879127979278564, "learning_rate": 3.753397200706618e-05, "loss": 2.3245, "step": 73575 }, { "epoch": 4.999320559858677, "grad_norm": 3.054434061050415, "learning_rate": 3.752972550618291e-05, "loss": 2.4479, "step": 73580 }, { "epoch": 4.999660279929338, "grad_norm": 4.191624164581299, "learning_rate": 3.7525479005299635e-05, "loss": 2.4489, "step": 73585 }, { "epoch": 5.0, "grad_norm": 6.9676384925842285, "learning_rate": 3.7521232504416363e-05, "loss": 2.4275, "step": 73590 }, { "epoch": 5.0, "eval_bertscore": { "f1": 0.8424593785876786, "precision": 0.8444991891311807, "recall": 0.8412871961157409 }, "eval_bleu_4": 0.0179671021072366, "eval_exact_match": 0.0007752689214071131, "eval_loss": 3.4369056224823, "eval_meteor": 0.0941196435843233, "eval_rouge": { "rouge1": 0.12878052019785316, "rouge2": 0.01870056076008485, "rougeL": 0.10945299317424673, "rougeLsum": 0.10945522260480173 }, "eval_runtime": 1774.28, "eval_samples_per_second": 5.816, "eval_steps_per_second": 0.727, "step": 73590 }, { "epoch": 5.000339720070662, "grad_norm": 3.4093337059020996, "learning_rate": 3.751698600353309e-05, "loss": 2.3046, "step": 73595 }, { "epoch": 5.000679440141323, "grad_norm": 3.602290391921997, "learning_rate": 3.751273950264982e-05, "loss": 2.1472, "step": 73600 }, { "epoch": 5.001019160211985, "grad_norm": 3.3119819164276123, "learning_rate": 3.750849300176655e-05, "loss": 2.2845, "step": 73605 }, { "epoch": 5.001358880282647, "grad_norm": 3.2295289039611816, "learning_rate": 3.7504246500883275e-05, "loss": 2.3418, "step": 73610 }, { "epoch": 5.001698600353309, "grad_norm": 3.080876588821411, "learning_rate": 3.7500000000000003e-05, "loss": 2.0859, "step": 73615 }, { "epoch": 5.002038320423971, "grad_norm": 3.6146724224090576, "learning_rate": 3.749575349911673e-05, "loss": 2.1728, "step": 73620 }, { "epoch": 5.002378040494633, "grad_norm": 2.9125800132751465, "learning_rate": 3.749150699823345e-05, "loss": 2.3197, "step": 73625 }, { "epoch": 5.002717760565294, "grad_norm": 2.8570499420166016, "learning_rate": 3.748726049735019e-05, "loss": 2.1477, "step": 73630 }, { "epoch": 5.003057480635956, "grad_norm": 2.6032559871673584, "learning_rate": 3.7483013996466916e-05, "loss": 2.3714, "step": 73635 }, { "epoch": 5.003397200706618, "grad_norm": 3.1599578857421875, "learning_rate": 3.747876749558364e-05, "loss": 2.089, "step": 73640 }, { "epoch": 5.003736920777279, "grad_norm": 3.510953187942505, "learning_rate": 3.747452099470037e-05, "loss": 2.0798, "step": 73645 }, { "epoch": 5.004076640847941, "grad_norm": 3.5888242721557617, "learning_rate": 3.74702744938171e-05, "loss": 2.2294, "step": 73650 }, { "epoch": 5.004416360918603, "grad_norm": 3.1972367763519287, "learning_rate": 3.746602799293382e-05, "loss": 2.3531, "step": 73655 }, { "epoch": 5.004756080989265, "grad_norm": 3.2841179370880127, "learning_rate": 3.746178149205055e-05, "loss": 2.3085, "step": 73660 }, { "epoch": 5.005095801059927, "grad_norm": 3.56522536277771, "learning_rate": 3.7457534991167284e-05, "loss": 2.2034, "step": 73665 }, { "epoch": 5.005435521130589, "grad_norm": 3.304715394973755, "learning_rate": 3.7453288490284005e-05, "loss": 2.1925, "step": 73670 }, { "epoch": 5.00577524120125, "grad_norm": 2.4651052951812744, "learning_rate": 3.744904198940073e-05, "loss": 2.3009, "step": 73675 }, { "epoch": 5.006114961271912, "grad_norm": 2.9617152214050293, "learning_rate": 3.744479548851747e-05, "loss": 2.3264, "step": 73680 }, { "epoch": 5.006454681342574, "grad_norm": 3.206861734390259, "learning_rate": 3.744054898763419e-05, "loss": 2.1391, "step": 73685 }, { "epoch": 5.006794401413235, "grad_norm": 3.2018537521362305, "learning_rate": 3.743630248675092e-05, "loss": 2.2034, "step": 73690 }, { "epoch": 5.007134121483897, "grad_norm": 3.7277774810791016, "learning_rate": 3.7432055985867645e-05, "loss": 2.2447, "step": 73695 }, { "epoch": 5.007473841554559, "grad_norm": 2.819356918334961, "learning_rate": 3.742780948498437e-05, "loss": 2.1756, "step": 73700 }, { "epoch": 5.007813561625221, "grad_norm": 2.7989063262939453, "learning_rate": 3.74235629841011e-05, "loss": 2.3325, "step": 73705 }, { "epoch": 5.008153281695883, "grad_norm": 3.2458972930908203, "learning_rate": 3.741931648321783e-05, "loss": 2.1864, "step": 73710 }, { "epoch": 5.008493001766545, "grad_norm": 3.522437334060669, "learning_rate": 3.7415069982334564e-05, "loss": 2.3312, "step": 73715 }, { "epoch": 5.008832721837206, "grad_norm": 2.6675779819488525, "learning_rate": 3.7410823481451285e-05, "loss": 2.2858, "step": 73720 }, { "epoch": 5.009172441907868, "grad_norm": 2.879793882369995, "learning_rate": 3.740657698056801e-05, "loss": 2.069, "step": 73725 }, { "epoch": 5.00951216197853, "grad_norm": 3.392820358276367, "learning_rate": 3.740233047968474e-05, "loss": 2.112, "step": 73730 }, { "epoch": 5.009851882049191, "grad_norm": 2.8333890438079834, "learning_rate": 3.739808397880147e-05, "loss": 2.3701, "step": 73735 }, { "epoch": 5.010191602119853, "grad_norm": 2.5308499336242676, "learning_rate": 3.73938374779182e-05, "loss": 2.1404, "step": 73740 }, { "epoch": 5.0105313221905154, "grad_norm": 2.583362579345703, "learning_rate": 3.7389590977034925e-05, "loss": 2.0647, "step": 73745 }, { "epoch": 5.010871042261177, "grad_norm": 3.0781025886535645, "learning_rate": 3.738534447615165e-05, "loss": 2.0897, "step": 73750 }, { "epoch": 5.011210762331839, "grad_norm": 3.602489471435547, "learning_rate": 3.738109797526838e-05, "loss": 2.2465, "step": 73755 }, { "epoch": 5.0115504824025, "grad_norm": 3.3428115844726562, "learning_rate": 3.737685147438511e-05, "loss": 2.1777, "step": 73760 }, { "epoch": 5.011890202473162, "grad_norm": 4.3935699462890625, "learning_rate": 3.737260497350184e-05, "loss": 2.3004, "step": 73765 }, { "epoch": 5.012229922543824, "grad_norm": 2.8422482013702393, "learning_rate": 3.7368358472618565e-05, "loss": 1.8603, "step": 73770 }, { "epoch": 5.012569642614485, "grad_norm": 3.015352964401245, "learning_rate": 3.736411197173529e-05, "loss": 2.1652, "step": 73775 }, { "epoch": 5.012909362685147, "grad_norm": 3.4376659393310547, "learning_rate": 3.735986547085202e-05, "loss": 2.3057, "step": 73780 }, { "epoch": 5.013249082755809, "grad_norm": 3.0353903770446777, "learning_rate": 3.735561896996875e-05, "loss": 2.24, "step": 73785 }, { "epoch": 5.013588802826471, "grad_norm": 3.6045212745666504, "learning_rate": 3.735137246908548e-05, "loss": 2.1259, "step": 73790 }, { "epoch": 5.013928522897133, "grad_norm": 4.1003241539001465, "learning_rate": 3.73471259682022e-05, "loss": 2.3522, "step": 73795 }, { "epoch": 5.014268242967795, "grad_norm": 3.1682252883911133, "learning_rate": 3.734287946731893e-05, "loss": 2.1762, "step": 73800 }, { "epoch": 5.014607963038456, "grad_norm": 2.9345669746398926, "learning_rate": 3.733863296643566e-05, "loss": 1.9808, "step": 73805 }, { "epoch": 5.014947683109118, "grad_norm": 3.022962808609009, "learning_rate": 3.733438646555238e-05, "loss": 2.3422, "step": 73810 }, { "epoch": 5.01528740317978, "grad_norm": 3.5466885566711426, "learning_rate": 3.733013996466912e-05, "loss": 2.1714, "step": 73815 }, { "epoch": 5.015627123250441, "grad_norm": 3.8736329078674316, "learning_rate": 3.7325893463785845e-05, "loss": 2.1834, "step": 73820 }, { "epoch": 5.015966843321103, "grad_norm": 3.3671815395355225, "learning_rate": 3.7321646962902566e-05, "loss": 2.1324, "step": 73825 }, { "epoch": 5.016306563391765, "grad_norm": 3.3516957759857178, "learning_rate": 3.7317400462019294e-05, "loss": 2.3012, "step": 73830 }, { "epoch": 5.016646283462427, "grad_norm": 3.037900686264038, "learning_rate": 3.731315396113603e-05, "loss": 2.2638, "step": 73835 }, { "epoch": 5.016986003533089, "grad_norm": 2.832103967666626, "learning_rate": 3.730890746025275e-05, "loss": 1.9216, "step": 73840 }, { "epoch": 5.017325723603751, "grad_norm": 3.2516696453094482, "learning_rate": 3.730466095936948e-05, "loss": 2.1008, "step": 73845 }, { "epoch": 5.017665443674412, "grad_norm": 3.2173917293548584, "learning_rate": 3.730041445848621e-05, "loss": 2.1968, "step": 73850 }, { "epoch": 5.018005163745074, "grad_norm": 3.709774971008301, "learning_rate": 3.7296167957602934e-05, "loss": 2.2159, "step": 73855 }, { "epoch": 5.018344883815736, "grad_norm": 2.954143762588501, "learning_rate": 3.729192145671966e-05, "loss": 1.9933, "step": 73860 }, { "epoch": 5.018684603886397, "grad_norm": 3.5032262802124023, "learning_rate": 3.728767495583639e-05, "loss": 1.8848, "step": 73865 }, { "epoch": 5.019024323957059, "grad_norm": 3.3512840270996094, "learning_rate": 3.728342845495312e-05, "loss": 2.3843, "step": 73870 }, { "epoch": 5.019364044027721, "grad_norm": 2.9979710578918457, "learning_rate": 3.7279181954069846e-05, "loss": 2.3897, "step": 73875 }, { "epoch": 5.019703764098383, "grad_norm": 3.5204763412475586, "learning_rate": 3.7274935453186574e-05, "loss": 2.0752, "step": 73880 }, { "epoch": 5.020043484169045, "grad_norm": 3.340517520904541, "learning_rate": 3.727068895230331e-05, "loss": 2.2087, "step": 73885 }, { "epoch": 5.020383204239707, "grad_norm": 3.6832480430603027, "learning_rate": 3.726644245142003e-05, "loss": 2.3401, "step": 73890 }, { "epoch": 5.020722924310368, "grad_norm": 3.3721299171447754, "learning_rate": 3.726219595053676e-05, "loss": 2.1398, "step": 73895 }, { "epoch": 5.02106264438103, "grad_norm": 4.5165557861328125, "learning_rate": 3.725794944965349e-05, "loss": 2.2826, "step": 73900 }, { "epoch": 5.021402364451692, "grad_norm": 2.909837484359741, "learning_rate": 3.7253702948770214e-05, "loss": 2.6043, "step": 73905 }, { "epoch": 5.021742084522353, "grad_norm": 4.453968048095703, "learning_rate": 3.724945644788694e-05, "loss": 2.0438, "step": 73910 }, { "epoch": 5.022081804593015, "grad_norm": 3.6924614906311035, "learning_rate": 3.724520994700367e-05, "loss": 2.2203, "step": 73915 }, { "epoch": 5.022421524663677, "grad_norm": 3.0033376216888428, "learning_rate": 3.72409634461204e-05, "loss": 2.2405, "step": 73920 }, { "epoch": 5.022761244734339, "grad_norm": 3.832749605178833, "learning_rate": 3.7236716945237126e-05, "loss": 2.2296, "step": 73925 }, { "epoch": 5.023100964805001, "grad_norm": 2.950157403945923, "learning_rate": 3.7232470444353854e-05, "loss": 2.2021, "step": 73930 }, { "epoch": 5.023440684875663, "grad_norm": 3.9037046432495117, "learning_rate": 3.722822394347058e-05, "loss": 2.4961, "step": 73935 }, { "epoch": 5.023780404946324, "grad_norm": 3.247347593307495, "learning_rate": 3.722397744258731e-05, "loss": 2.1602, "step": 73940 }, { "epoch": 5.024120125016986, "grad_norm": 2.3353586196899414, "learning_rate": 3.721973094170404e-05, "loss": 1.9234, "step": 73945 }, { "epoch": 5.024459845087648, "grad_norm": 2.9170751571655273, "learning_rate": 3.7215484440820766e-05, "loss": 2.0081, "step": 73950 }, { "epoch": 5.024799565158309, "grad_norm": 3.9612691402435303, "learning_rate": 3.7211237939937494e-05, "loss": 2.0696, "step": 73955 }, { "epoch": 5.025139285228971, "grad_norm": 3.1132898330688477, "learning_rate": 3.720699143905422e-05, "loss": 2.4624, "step": 73960 }, { "epoch": 5.025479005299633, "grad_norm": 3.2585670948028564, "learning_rate": 3.7202744938170944e-05, "loss": 2.2638, "step": 73965 }, { "epoch": 5.025818725370295, "grad_norm": 3.5544040203094482, "learning_rate": 3.719849843728768e-05, "loss": 2.4803, "step": 73970 }, { "epoch": 5.026158445440957, "grad_norm": 2.508962392807007, "learning_rate": 3.7194251936404406e-05, "loss": 2.3109, "step": 73975 }, { "epoch": 5.026498165511619, "grad_norm": 3.338641881942749, "learning_rate": 3.719000543552113e-05, "loss": 2.2049, "step": 73980 }, { "epoch": 5.02683788558228, "grad_norm": 3.1814746856689453, "learning_rate": 3.718575893463786e-05, "loss": 2.1563, "step": 73985 }, { "epoch": 5.027177605652942, "grad_norm": 3.1282637119293213, "learning_rate": 3.718151243375459e-05, "loss": 2.2018, "step": 73990 }, { "epoch": 5.027517325723604, "grad_norm": 2.6185708045959473, "learning_rate": 3.717726593287131e-05, "loss": 2.1898, "step": 73995 }, { "epoch": 5.027857045794265, "grad_norm": 2.902430534362793, "learning_rate": 3.7173019431988047e-05, "loss": 2.15, "step": 74000 }, { "epoch": 5.028196765864927, "grad_norm": 3.560310125350952, "learning_rate": 3.7168772931104775e-05, "loss": 2.4671, "step": 74005 }, { "epoch": 5.028536485935589, "grad_norm": 3.8183562755584717, "learning_rate": 3.7164526430221496e-05, "loss": 2.1964, "step": 74010 }, { "epoch": 5.028876206006251, "grad_norm": 3.591742753982544, "learning_rate": 3.7160279929338224e-05, "loss": 2.1566, "step": 74015 }, { "epoch": 5.029215926076913, "grad_norm": 3.7936511039733887, "learning_rate": 3.715603342845496e-05, "loss": 2.1211, "step": 74020 }, { "epoch": 5.029555646147575, "grad_norm": 3.675424575805664, "learning_rate": 3.715178692757168e-05, "loss": 2.459, "step": 74025 }, { "epoch": 5.029895366218236, "grad_norm": 3.382948637008667, "learning_rate": 3.714754042668841e-05, "loss": 2.2468, "step": 74030 }, { "epoch": 5.030235086288898, "grad_norm": 3.2435758113861084, "learning_rate": 3.714329392580514e-05, "loss": 2.3501, "step": 74035 }, { "epoch": 5.03057480635956, "grad_norm": 3.813082456588745, "learning_rate": 3.7139047424921864e-05, "loss": 2.16, "step": 74040 }, { "epoch": 5.030914526430221, "grad_norm": 3.9101409912109375, "learning_rate": 3.713480092403859e-05, "loss": 1.931, "step": 74045 }, { "epoch": 5.031254246500883, "grad_norm": 5.151758670806885, "learning_rate": 3.713055442315532e-05, "loss": 2.3976, "step": 74050 }, { "epoch": 5.0315939665715455, "grad_norm": 3.2256126403808594, "learning_rate": 3.7126307922272055e-05, "loss": 1.9934, "step": 74055 }, { "epoch": 5.031933686642207, "grad_norm": 4.460198402404785, "learning_rate": 3.7122061421388776e-05, "loss": 1.9862, "step": 74060 }, { "epoch": 5.032273406712869, "grad_norm": 2.8596556186676025, "learning_rate": 3.7117814920505504e-05, "loss": 2.3876, "step": 74065 }, { "epoch": 5.032613126783531, "grad_norm": 3.7444674968719482, "learning_rate": 3.711356841962224e-05, "loss": 2.0405, "step": 74070 }, { "epoch": 5.032952846854192, "grad_norm": 5.368705749511719, "learning_rate": 3.710932191873896e-05, "loss": 2.1909, "step": 74075 }, { "epoch": 5.033292566924854, "grad_norm": 4.101559162139893, "learning_rate": 3.710507541785569e-05, "loss": 2.0249, "step": 74080 }, { "epoch": 5.033632286995516, "grad_norm": 3.921409845352173, "learning_rate": 3.7100828916972416e-05, "loss": 2.2947, "step": 74085 }, { "epoch": 5.033972007066177, "grad_norm": 3.7375071048736572, "learning_rate": 3.7096582416089144e-05, "loss": 2.0623, "step": 74090 }, { "epoch": 5.034311727136839, "grad_norm": 3.600243330001831, "learning_rate": 3.709233591520587e-05, "loss": 2.5946, "step": 74095 }, { "epoch": 5.0346514472075015, "grad_norm": 3.4317948818206787, "learning_rate": 3.70880894143226e-05, "loss": 2.272, "step": 74100 }, { "epoch": 5.034991167278163, "grad_norm": 3.419896364212036, "learning_rate": 3.708384291343933e-05, "loss": 2.1823, "step": 74105 }, { "epoch": 5.035330887348825, "grad_norm": 3.169663667678833, "learning_rate": 3.7079596412556056e-05, "loss": 2.2481, "step": 74110 }, { "epoch": 5.035670607419486, "grad_norm": 3.0907323360443115, "learning_rate": 3.7075349911672784e-05, "loss": 2.4454, "step": 74115 }, { "epoch": 5.036010327490148, "grad_norm": 2.7725884914398193, "learning_rate": 3.707110341078951e-05, "loss": 1.9147, "step": 74120 }, { "epoch": 5.03635004756081, "grad_norm": 3.315666675567627, "learning_rate": 3.706685690990624e-05, "loss": 2.2752, "step": 74125 }, { "epoch": 5.036689767631471, "grad_norm": 3.6126646995544434, "learning_rate": 3.706261040902297e-05, "loss": 2.3802, "step": 74130 }, { "epoch": 5.037029487702133, "grad_norm": 3.812023878097534, "learning_rate": 3.7058363908139696e-05, "loss": 2.0913, "step": 74135 }, { "epoch": 5.037369207772795, "grad_norm": 3.5163626670837402, "learning_rate": 3.7054117407256424e-05, "loss": 2.2661, "step": 74140 }, { "epoch": 5.037708927843457, "grad_norm": 3.517277956008911, "learning_rate": 3.704987090637315e-05, "loss": 2.2915, "step": 74145 }, { "epoch": 5.038048647914119, "grad_norm": 2.6978185176849365, "learning_rate": 3.704562440548987e-05, "loss": 2.1987, "step": 74150 }, { "epoch": 5.038388367984781, "grad_norm": 2.9414870738983154, "learning_rate": 3.704137790460661e-05, "loss": 2.2582, "step": 74155 }, { "epoch": 5.038728088055442, "grad_norm": 3.782176971435547, "learning_rate": 3.7037131403723336e-05, "loss": 2.108, "step": 74160 }, { "epoch": 5.039067808126104, "grad_norm": 3.1282241344451904, "learning_rate": 3.703288490284006e-05, "loss": 2.561, "step": 74165 }, { "epoch": 5.039407528196766, "grad_norm": 4.050010681152344, "learning_rate": 3.702863840195679e-05, "loss": 2.1182, "step": 74170 }, { "epoch": 5.039747248267427, "grad_norm": 3.8690996170043945, "learning_rate": 3.702439190107352e-05, "loss": 2.3714, "step": 74175 }, { "epoch": 5.040086968338089, "grad_norm": 3.1803553104400635, "learning_rate": 3.702014540019024e-05, "loss": 2.2968, "step": 74180 }, { "epoch": 5.040426688408751, "grad_norm": 2.576714038848877, "learning_rate": 3.701589889930697e-05, "loss": 2.2388, "step": 74185 }, { "epoch": 5.040766408479413, "grad_norm": 3.189732313156128, "learning_rate": 3.7011652398423704e-05, "loss": 2.2065, "step": 74190 }, { "epoch": 5.041106128550075, "grad_norm": 3.0553371906280518, "learning_rate": 3.7007405897540425e-05, "loss": 2.2262, "step": 74195 }, { "epoch": 5.041445848620737, "grad_norm": 3.4261791706085205, "learning_rate": 3.700315939665715e-05, "loss": 2.3665, "step": 74200 }, { "epoch": 5.041785568691398, "grad_norm": 2.792170286178589, "learning_rate": 3.699891289577389e-05, "loss": 2.1866, "step": 74205 }, { "epoch": 5.04212528876206, "grad_norm": 3.7038097381591797, "learning_rate": 3.699466639489061e-05, "loss": 2.6191, "step": 74210 }, { "epoch": 5.042465008832722, "grad_norm": 2.914839744567871, "learning_rate": 3.699041989400734e-05, "loss": 2.0901, "step": 74215 }, { "epoch": 5.042804728903383, "grad_norm": 3.5529885292053223, "learning_rate": 3.6986173393124065e-05, "loss": 2.2545, "step": 74220 }, { "epoch": 5.043144448974045, "grad_norm": 3.521686553955078, "learning_rate": 3.69819268922408e-05, "loss": 2.3676, "step": 74225 }, { "epoch": 5.043484169044707, "grad_norm": 3.7106049060821533, "learning_rate": 3.697768039135752e-05, "loss": 2.3445, "step": 74230 }, { "epoch": 5.043823889115369, "grad_norm": 2.652104616165161, "learning_rate": 3.697343389047425e-05, "loss": 2.2294, "step": 74235 }, { "epoch": 5.044163609186031, "grad_norm": 3.328009605407715, "learning_rate": 3.6969187389590984e-05, "loss": 1.8789, "step": 74240 }, { "epoch": 5.044503329256693, "grad_norm": 4.071359634399414, "learning_rate": 3.6964940888707705e-05, "loss": 2.1478, "step": 74245 }, { "epoch": 5.044843049327354, "grad_norm": 3.517902135848999, "learning_rate": 3.696069438782443e-05, "loss": 2.0951, "step": 74250 }, { "epoch": 5.045182769398016, "grad_norm": 3.995173931121826, "learning_rate": 3.695644788694116e-05, "loss": 2.2756, "step": 74255 }, { "epoch": 5.045522489468678, "grad_norm": 3.1756205558776855, "learning_rate": 3.695220138605789e-05, "loss": 2.2594, "step": 74260 }, { "epoch": 5.045862209539339, "grad_norm": 3.3836796283721924, "learning_rate": 3.694795488517462e-05, "loss": 2.4368, "step": 74265 }, { "epoch": 5.046201929610001, "grad_norm": 3.2287681102752686, "learning_rate": 3.6943708384291345e-05, "loss": 2.4151, "step": 74270 }, { "epoch": 5.046541649680663, "grad_norm": 3.131181240081787, "learning_rate": 3.6939461883408073e-05, "loss": 2.1572, "step": 74275 }, { "epoch": 5.046881369751325, "grad_norm": 3.8301279544830322, "learning_rate": 3.69352153825248e-05, "loss": 1.9126, "step": 74280 }, { "epoch": 5.047221089821987, "grad_norm": 3.378821849822998, "learning_rate": 3.693096888164153e-05, "loss": 2.1356, "step": 74285 }, { "epoch": 5.047560809892649, "grad_norm": 2.9611148834228516, "learning_rate": 3.692672238075826e-05, "loss": 2.2965, "step": 74290 }, { "epoch": 5.04790052996331, "grad_norm": 2.9627842903137207, "learning_rate": 3.6922475879874985e-05, "loss": 2.0314, "step": 74295 }, { "epoch": 5.048240250033972, "grad_norm": 3.344679832458496, "learning_rate": 3.6918229378991713e-05, "loss": 2.5013, "step": 74300 }, { "epoch": 5.048579970104634, "grad_norm": 5.26378870010376, "learning_rate": 3.691398287810844e-05, "loss": 2.3136, "step": 74305 }, { "epoch": 5.048919690175295, "grad_norm": 3.351736307144165, "learning_rate": 3.690973637722517e-05, "loss": 2.3204, "step": 74310 }, { "epoch": 5.049259410245957, "grad_norm": 3.4501898288726807, "learning_rate": 3.69054898763419e-05, "loss": 2.2332, "step": 74315 }, { "epoch": 5.0495991303166194, "grad_norm": 3.3737008571624756, "learning_rate": 3.690124337545862e-05, "loss": 2.3238, "step": 74320 }, { "epoch": 5.049938850387281, "grad_norm": 2.3054304122924805, "learning_rate": 3.6896996874575353e-05, "loss": 2.3515, "step": 74325 }, { "epoch": 5.050278570457943, "grad_norm": 3.030923843383789, "learning_rate": 3.689275037369208e-05, "loss": 2.0928, "step": 74330 }, { "epoch": 5.050618290528605, "grad_norm": 3.4445908069610596, "learning_rate": 3.68885038728088e-05, "loss": 2.2226, "step": 74335 }, { "epoch": 5.050958010599266, "grad_norm": 3.8102610111236572, "learning_rate": 3.688425737192554e-05, "loss": 1.9965, "step": 74340 }, { "epoch": 5.051297730669928, "grad_norm": 3.768179416656494, "learning_rate": 3.6880010871042266e-05, "loss": 2.07, "step": 74345 }, { "epoch": 5.05163745074059, "grad_norm": 3.184657096862793, "learning_rate": 3.687576437015899e-05, "loss": 2.309, "step": 74350 }, { "epoch": 5.051977170811251, "grad_norm": 2.8864188194274902, "learning_rate": 3.6871517869275715e-05, "loss": 2.2987, "step": 74355 }, { "epoch": 5.052316890881913, "grad_norm": 2.829446315765381, "learning_rate": 3.686727136839245e-05, "loss": 2.3328, "step": 74360 }, { "epoch": 5.0526566109525755, "grad_norm": 3.405935287475586, "learning_rate": 3.686302486750917e-05, "loss": 2.2015, "step": 74365 }, { "epoch": 5.052996331023237, "grad_norm": 3.46762752532959, "learning_rate": 3.68587783666259e-05, "loss": 2.0776, "step": 74370 }, { "epoch": 5.053336051093899, "grad_norm": 3.0480384826660156, "learning_rate": 3.6854531865742634e-05, "loss": 2.2365, "step": 74375 }, { "epoch": 5.053675771164561, "grad_norm": 3.2570064067840576, "learning_rate": 3.6850285364859355e-05, "loss": 1.9056, "step": 74380 }, { "epoch": 5.054015491235222, "grad_norm": 4.154760360717773, "learning_rate": 3.684603886397608e-05, "loss": 2.1482, "step": 74385 }, { "epoch": 5.054355211305884, "grad_norm": 3.7074451446533203, "learning_rate": 3.684179236309281e-05, "loss": 2.3775, "step": 74390 }, { "epoch": 5.054694931376546, "grad_norm": 3.686694383621216, "learning_rate": 3.6837545862209546e-05, "loss": 2.2561, "step": 74395 }, { "epoch": 5.055034651447207, "grad_norm": 3.4165239334106445, "learning_rate": 3.683329936132627e-05, "loss": 1.7882, "step": 74400 }, { "epoch": 5.055374371517869, "grad_norm": 2.908984422683716, "learning_rate": 3.6829052860442995e-05, "loss": 2.2093, "step": 74405 }, { "epoch": 5.0557140915885315, "grad_norm": 2.8027255535125732, "learning_rate": 3.682480635955973e-05, "loss": 2.2862, "step": 74410 }, { "epoch": 5.056053811659193, "grad_norm": 3.107102632522583, "learning_rate": 3.682055985867645e-05, "loss": 2.1784, "step": 74415 }, { "epoch": 5.056393531729855, "grad_norm": 3.3981478214263916, "learning_rate": 3.681631335779318e-05, "loss": 2.3484, "step": 74420 }, { "epoch": 5.056733251800517, "grad_norm": 3.0475597381591797, "learning_rate": 3.6812066856909914e-05, "loss": 2.5371, "step": 74425 }, { "epoch": 5.057072971871178, "grad_norm": 3.6137757301330566, "learning_rate": 3.6807820356026635e-05, "loss": 2.1253, "step": 74430 }, { "epoch": 5.05741269194184, "grad_norm": 3.318021297454834, "learning_rate": 3.680357385514336e-05, "loss": 2.2387, "step": 74435 }, { "epoch": 5.057752412012501, "grad_norm": 2.9076650142669678, "learning_rate": 3.679932735426009e-05, "loss": 2.1096, "step": 74440 }, { "epoch": 5.058092132083163, "grad_norm": 3.630065441131592, "learning_rate": 3.679508085337682e-05, "loss": 1.9186, "step": 74445 }, { "epoch": 5.058431852153825, "grad_norm": 3.138350486755371, "learning_rate": 3.679083435249355e-05, "loss": 1.9215, "step": 74450 }, { "epoch": 5.058771572224487, "grad_norm": 2.9612581729888916, "learning_rate": 3.6786587851610275e-05, "loss": 2.1906, "step": 74455 }, { "epoch": 5.059111292295149, "grad_norm": 3.882277727127075, "learning_rate": 3.6782341350727e-05, "loss": 2.3712, "step": 74460 }, { "epoch": 5.059451012365811, "grad_norm": 3.7426869869232178, "learning_rate": 3.677809484984373e-05, "loss": 2.3322, "step": 74465 }, { "epoch": 5.059790732436472, "grad_norm": 3.1216890811920166, "learning_rate": 3.677384834896046e-05, "loss": 2.0681, "step": 74470 }, { "epoch": 5.060130452507134, "grad_norm": 3.440397262573242, "learning_rate": 3.676960184807719e-05, "loss": 2.2996, "step": 74475 }, { "epoch": 5.060470172577796, "grad_norm": 3.913670063018799, "learning_rate": 3.6765355347193915e-05, "loss": 2.2725, "step": 74480 }, { "epoch": 5.060809892648457, "grad_norm": 3.387873411178589, "learning_rate": 3.676110884631064e-05, "loss": 2.5402, "step": 74485 }, { "epoch": 5.061149612719119, "grad_norm": 3.8711211681365967, "learning_rate": 3.6756862345427364e-05, "loss": 2.0092, "step": 74490 }, { "epoch": 5.061489332789781, "grad_norm": 4.104432582855225, "learning_rate": 3.67526158445441e-05, "loss": 2.0549, "step": 74495 }, { "epoch": 5.061829052860443, "grad_norm": 4.29483699798584, "learning_rate": 3.674836934366083e-05, "loss": 2.2299, "step": 74500 }, { "epoch": 5.062168772931105, "grad_norm": 3.1153619289398193, "learning_rate": 3.674412284277755e-05, "loss": 1.9221, "step": 74505 }, { "epoch": 5.062508493001767, "grad_norm": 3.775991916656494, "learning_rate": 3.673987634189428e-05, "loss": 2.4138, "step": 74510 }, { "epoch": 5.062848213072428, "grad_norm": 3.309467315673828, "learning_rate": 3.673562984101101e-05, "loss": 2.2842, "step": 74515 }, { "epoch": 5.06318793314309, "grad_norm": 3.035977602005005, "learning_rate": 3.673138334012773e-05, "loss": 2.222, "step": 74520 }, { "epoch": 5.063527653213752, "grad_norm": 3.2605457305908203, "learning_rate": 3.672713683924447e-05, "loss": 1.8668, "step": 74525 }, { "epoch": 5.063867373284413, "grad_norm": 4.029040813446045, "learning_rate": 3.6722890338361195e-05, "loss": 2.3509, "step": 74530 }, { "epoch": 5.064207093355075, "grad_norm": 3.1481361389160156, "learning_rate": 3.6718643837477916e-05, "loss": 2.3582, "step": 74535 }, { "epoch": 5.064546813425737, "grad_norm": 4.426197052001953, "learning_rate": 3.6714397336594644e-05, "loss": 2.09, "step": 74540 }, { "epoch": 5.064886533496399, "grad_norm": 2.9774959087371826, "learning_rate": 3.671015083571138e-05, "loss": 2.198, "step": 74545 }, { "epoch": 5.065226253567061, "grad_norm": 3.273890733718872, "learning_rate": 3.67059043348281e-05, "loss": 2.1711, "step": 74550 }, { "epoch": 5.065565973637723, "grad_norm": 3.434659242630005, "learning_rate": 3.670165783394483e-05, "loss": 2.2942, "step": 74555 }, { "epoch": 5.065905693708384, "grad_norm": 3.375211000442505, "learning_rate": 3.669741133306156e-05, "loss": 2.2685, "step": 74560 }, { "epoch": 5.066245413779046, "grad_norm": 3.641352891921997, "learning_rate": 3.669316483217829e-05, "loss": 2.4657, "step": 74565 }, { "epoch": 5.066585133849708, "grad_norm": 3.114400863647461, "learning_rate": 3.668891833129501e-05, "loss": 2.2415, "step": 74570 }, { "epoch": 5.066924853920369, "grad_norm": 4.124633312225342, "learning_rate": 3.668467183041174e-05, "loss": 2.3562, "step": 74575 }, { "epoch": 5.067264573991031, "grad_norm": 3.2831976413726807, "learning_rate": 3.6680425329528475e-05, "loss": 1.9169, "step": 74580 }, { "epoch": 5.067604294061693, "grad_norm": 3.8954741954803467, "learning_rate": 3.6676178828645196e-05, "loss": 2.3358, "step": 74585 }, { "epoch": 5.067944014132355, "grad_norm": 4.077258586883545, "learning_rate": 3.6671932327761924e-05, "loss": 2.0801, "step": 74590 }, { "epoch": 5.068283734203017, "grad_norm": 4.2981648445129395, "learning_rate": 3.666768582687866e-05, "loss": 2.3208, "step": 74595 }, { "epoch": 5.068623454273679, "grad_norm": 4.12436580657959, "learning_rate": 3.666343932599538e-05, "loss": 2.275, "step": 74600 }, { "epoch": 5.06896317434434, "grad_norm": 3.0167977809906006, "learning_rate": 3.665919282511211e-05, "loss": 2.2931, "step": 74605 }, { "epoch": 5.069302894415002, "grad_norm": 3.465838670730591, "learning_rate": 3.6654946324228836e-05, "loss": 2.4497, "step": 74610 }, { "epoch": 5.069642614485664, "grad_norm": 2.9056482315063477, "learning_rate": 3.6650699823345564e-05, "loss": 2.1785, "step": 74615 }, { "epoch": 5.069982334556325, "grad_norm": 3.068544626235962, "learning_rate": 3.664645332246229e-05, "loss": 2.3588, "step": 74620 }, { "epoch": 5.070322054626987, "grad_norm": 3.9502081871032715, "learning_rate": 3.664220682157902e-05, "loss": 2.0353, "step": 74625 }, { "epoch": 5.0706617746976494, "grad_norm": 3.599423885345459, "learning_rate": 3.663796032069575e-05, "loss": 2.2129, "step": 74630 }, { "epoch": 5.071001494768311, "grad_norm": 3.926424503326416, "learning_rate": 3.6633713819812476e-05, "loss": 2.1397, "step": 74635 }, { "epoch": 5.071341214838973, "grad_norm": 3.181685209274292, "learning_rate": 3.6629467318929204e-05, "loss": 2.3975, "step": 74640 }, { "epoch": 5.071680934909635, "grad_norm": 3.6062891483306885, "learning_rate": 3.662522081804593e-05, "loss": 2.1428, "step": 74645 }, { "epoch": 5.072020654980296, "grad_norm": 2.866359233856201, "learning_rate": 3.662097431716266e-05, "loss": 2.1784, "step": 74650 }, { "epoch": 5.072360375050958, "grad_norm": 3.467733383178711, "learning_rate": 3.661672781627939e-05, "loss": 2.19, "step": 74655 }, { "epoch": 5.07270009512162, "grad_norm": 4.286681652069092, "learning_rate": 3.6612481315396116e-05, "loss": 2.0596, "step": 74660 }, { "epoch": 5.073039815192281, "grad_norm": 2.9507646560668945, "learning_rate": 3.6608234814512844e-05, "loss": 2.4686, "step": 74665 }, { "epoch": 5.073379535262943, "grad_norm": 3.696887493133545, "learning_rate": 3.660398831362957e-05, "loss": 2.5629, "step": 74670 }, { "epoch": 5.0737192553336055, "grad_norm": 3.664016008377075, "learning_rate": 3.6599741812746294e-05, "loss": 2.1384, "step": 74675 }, { "epoch": 5.074058975404267, "grad_norm": 3.2060391902923584, "learning_rate": 3.659549531186303e-05, "loss": 2.2865, "step": 74680 }, { "epoch": 5.074398695474929, "grad_norm": 3.350522518157959, "learning_rate": 3.6591248810979756e-05, "loss": 2.3246, "step": 74685 }, { "epoch": 5.074738415545591, "grad_norm": 3.379765510559082, "learning_rate": 3.658700231009648e-05, "loss": 1.9519, "step": 74690 }, { "epoch": 5.075078135616252, "grad_norm": 3.54915714263916, "learning_rate": 3.658275580921321e-05, "loss": 1.9261, "step": 74695 }, { "epoch": 5.075417855686914, "grad_norm": 2.5302388668060303, "learning_rate": 3.657850930832994e-05, "loss": 2.224, "step": 74700 }, { "epoch": 5.075757575757576, "grad_norm": 3.3041248321533203, "learning_rate": 3.657426280744666e-05, "loss": 1.9826, "step": 74705 }, { "epoch": 5.076097295828237, "grad_norm": 3.3210813999176025, "learning_rate": 3.657001630656339e-05, "loss": 2.1414, "step": 74710 }, { "epoch": 5.076437015898899, "grad_norm": 3.6969680786132812, "learning_rate": 3.6565769805680125e-05, "loss": 2.4069, "step": 74715 }, { "epoch": 5.0767767359695615, "grad_norm": 3.6343231201171875, "learning_rate": 3.6561523304796846e-05, "loss": 2.2984, "step": 74720 }, { "epoch": 5.077116456040223, "grad_norm": 3.9753506183624268, "learning_rate": 3.6557276803913574e-05, "loss": 2.0984, "step": 74725 }, { "epoch": 5.077456176110885, "grad_norm": 3.099789619445801, "learning_rate": 3.655303030303031e-05, "loss": 2.4398, "step": 74730 }, { "epoch": 5.077795896181547, "grad_norm": 3.1261138916015625, "learning_rate": 3.6548783802147037e-05, "loss": 2.0498, "step": 74735 }, { "epoch": 5.078135616252208, "grad_norm": 4.633833885192871, "learning_rate": 3.654453730126376e-05, "loss": 2.3199, "step": 74740 }, { "epoch": 5.07847533632287, "grad_norm": 3.4776296615600586, "learning_rate": 3.6540290800380486e-05, "loss": 2.017, "step": 74745 }, { "epoch": 5.078815056393532, "grad_norm": 3.152843713760376, "learning_rate": 3.653604429949722e-05, "loss": 2.1852, "step": 74750 }, { "epoch": 5.079154776464193, "grad_norm": 3.17549467086792, "learning_rate": 3.653179779861394e-05, "loss": 2.4032, "step": 74755 }, { "epoch": 5.079494496534855, "grad_norm": 3.31111741065979, "learning_rate": 3.652755129773067e-05, "loss": 2.2564, "step": 74760 }, { "epoch": 5.0798342166055175, "grad_norm": 3.57356858253479, "learning_rate": 3.6523304796847405e-05, "loss": 2.4028, "step": 74765 }, { "epoch": 5.080173936676179, "grad_norm": 2.6790771484375, "learning_rate": 3.6519058295964126e-05, "loss": 2.3058, "step": 74770 }, { "epoch": 5.080513656746841, "grad_norm": 3.0788865089416504, "learning_rate": 3.6514811795080854e-05, "loss": 2.4816, "step": 74775 }, { "epoch": 5.080853376817503, "grad_norm": 3.4732041358947754, "learning_rate": 3.651056529419758e-05, "loss": 2.0704, "step": 74780 }, { "epoch": 5.081193096888164, "grad_norm": 3.6440420150756836, "learning_rate": 3.650631879331431e-05, "loss": 2.1691, "step": 74785 }, { "epoch": 5.081532816958826, "grad_norm": 3.685616970062256, "learning_rate": 3.650207229243104e-05, "loss": 2.1924, "step": 74790 }, { "epoch": 5.081872537029487, "grad_norm": 3.4603641033172607, "learning_rate": 3.6497825791547766e-05, "loss": 2.2373, "step": 74795 }, { "epoch": 5.082212257100149, "grad_norm": 2.806605339050293, "learning_rate": 3.6493579290664494e-05, "loss": 2.1968, "step": 74800 }, { "epoch": 5.082551977170811, "grad_norm": 3.2081427574157715, "learning_rate": 3.648933278978122e-05, "loss": 2.1167, "step": 74805 }, { "epoch": 5.082891697241473, "grad_norm": 3.113264322280884, "learning_rate": 3.648508628889795e-05, "loss": 2.2142, "step": 74810 }, { "epoch": 5.083231417312135, "grad_norm": 3.432666301727295, "learning_rate": 3.648083978801468e-05, "loss": 2.4122, "step": 74815 }, { "epoch": 5.083571137382797, "grad_norm": 2.665728807449341, "learning_rate": 3.6476593287131406e-05, "loss": 2.2649, "step": 74820 }, { "epoch": 5.083910857453458, "grad_norm": 3.1268136501312256, "learning_rate": 3.6472346786248134e-05, "loss": 2.2957, "step": 74825 }, { "epoch": 5.08425057752412, "grad_norm": 3.502148389816284, "learning_rate": 3.646810028536486e-05, "loss": 2.259, "step": 74830 }, { "epoch": 5.084590297594782, "grad_norm": 3.1193573474884033, "learning_rate": 3.646385378448159e-05, "loss": 2.283, "step": 74835 }, { "epoch": 5.084930017665443, "grad_norm": 3.6953868865966797, "learning_rate": 3.645960728359832e-05, "loss": 2.1615, "step": 74840 }, { "epoch": 5.085269737736105, "grad_norm": 3.4382550716400146, "learning_rate": 3.645536078271504e-05, "loss": 2.5302, "step": 74845 }, { "epoch": 5.085609457806767, "grad_norm": 3.468212366104126, "learning_rate": 3.6451114281831774e-05, "loss": 2.187, "step": 74850 }, { "epoch": 5.085949177877429, "grad_norm": 2.9908065795898438, "learning_rate": 3.64468677809485e-05, "loss": 2.1152, "step": 74855 }, { "epoch": 5.086288897948091, "grad_norm": 2.634030818939209, "learning_rate": 3.644262128006522e-05, "loss": 2.3157, "step": 74860 }, { "epoch": 5.086628618018753, "grad_norm": 3.5499019622802734, "learning_rate": 3.643837477918196e-05, "loss": 2.1749, "step": 74865 }, { "epoch": 5.086968338089414, "grad_norm": 3.728818893432617, "learning_rate": 3.6434128278298686e-05, "loss": 2.0435, "step": 74870 }, { "epoch": 5.087308058160076, "grad_norm": 4.213443279266357, "learning_rate": 3.642988177741541e-05, "loss": 2.1733, "step": 74875 }, { "epoch": 5.087647778230738, "grad_norm": 3.165623426437378, "learning_rate": 3.6425635276532135e-05, "loss": 2.4333, "step": 74880 }, { "epoch": 5.087987498301399, "grad_norm": 3.8844242095947266, "learning_rate": 3.642138877564887e-05, "loss": 2.1577, "step": 74885 }, { "epoch": 5.088327218372061, "grad_norm": 3.3047778606414795, "learning_rate": 3.641714227476559e-05, "loss": 2.0211, "step": 74890 }, { "epoch": 5.088666938442723, "grad_norm": 3.4007339477539062, "learning_rate": 3.641289577388232e-05, "loss": 2.0841, "step": 74895 }, { "epoch": 5.089006658513385, "grad_norm": 3.8364038467407227, "learning_rate": 3.6408649272999054e-05, "loss": 2.2504, "step": 74900 }, { "epoch": 5.089346378584047, "grad_norm": 3.322007656097412, "learning_rate": 3.640440277211578e-05, "loss": 2.1789, "step": 74905 }, { "epoch": 5.089686098654709, "grad_norm": 3.5688464641571045, "learning_rate": 3.64001562712325e-05, "loss": 1.9417, "step": 74910 }, { "epoch": 5.09002581872537, "grad_norm": 3.368551731109619, "learning_rate": 3.639590977034923e-05, "loss": 2.1265, "step": 74915 }, { "epoch": 5.090365538796032, "grad_norm": 3.0335354804992676, "learning_rate": 3.6391663269465966e-05, "loss": 2.2476, "step": 74920 }, { "epoch": 5.090705258866694, "grad_norm": 3.168605089187622, "learning_rate": 3.638741676858269e-05, "loss": 2.0824, "step": 74925 }, { "epoch": 5.091044978937355, "grad_norm": 3.0469212532043457, "learning_rate": 3.6383170267699415e-05, "loss": 2.2547, "step": 74930 }, { "epoch": 5.091384699008017, "grad_norm": 3.09562087059021, "learning_rate": 3.637892376681615e-05, "loss": 2.1897, "step": 74935 }, { "epoch": 5.0917244190786795, "grad_norm": 3.270550489425659, "learning_rate": 3.637467726593287e-05, "loss": 2.4456, "step": 74940 }, { "epoch": 5.092064139149341, "grad_norm": 3.2414116859436035, "learning_rate": 3.63704307650496e-05, "loss": 2.1171, "step": 74945 }, { "epoch": 5.092403859220003, "grad_norm": 3.497036933898926, "learning_rate": 3.6366184264166334e-05, "loss": 2.0466, "step": 74950 }, { "epoch": 5.092743579290665, "grad_norm": 3.2185397148132324, "learning_rate": 3.6361937763283055e-05, "loss": 2.3824, "step": 74955 }, { "epoch": 5.093083299361326, "grad_norm": 3.1160266399383545, "learning_rate": 3.635769126239978e-05, "loss": 2.1417, "step": 74960 }, { "epoch": 5.093423019431988, "grad_norm": 2.9536328315734863, "learning_rate": 3.635344476151651e-05, "loss": 2.1894, "step": 74965 }, { "epoch": 5.09376273950265, "grad_norm": 3.1005618572235107, "learning_rate": 3.634919826063324e-05, "loss": 2.4795, "step": 74970 }, { "epoch": 5.094102459573311, "grad_norm": 3.779682159423828, "learning_rate": 3.634495175974997e-05, "loss": 2.3648, "step": 74975 }, { "epoch": 5.094442179643973, "grad_norm": 3.890606164932251, "learning_rate": 3.6340705258866695e-05, "loss": 2.0303, "step": 74980 }, { "epoch": 5.0947818997146355, "grad_norm": 4.225250720977783, "learning_rate": 3.6336458757983423e-05, "loss": 2.4573, "step": 74985 }, { "epoch": 5.095121619785297, "grad_norm": 5.074053764343262, "learning_rate": 3.633221225710015e-05, "loss": 2.375, "step": 74990 }, { "epoch": 5.095461339855959, "grad_norm": 2.5225307941436768, "learning_rate": 3.632796575621688e-05, "loss": 2.5117, "step": 74995 }, { "epoch": 5.095801059926621, "grad_norm": 3.897879123687744, "learning_rate": 3.632371925533361e-05, "loss": 2.5027, "step": 75000 }, { "epoch": 5.096140779997282, "grad_norm": 2.8442063331604004, "learning_rate": 3.6319472754450335e-05, "loss": 2.16, "step": 75005 }, { "epoch": 5.096480500067944, "grad_norm": 3.376692056655884, "learning_rate": 3.6315226253567063e-05, "loss": 2.5094, "step": 75010 }, { "epoch": 5.096820220138606, "grad_norm": 3.0559229850769043, "learning_rate": 3.6310979752683785e-05, "loss": 2.1278, "step": 75015 }, { "epoch": 5.097159940209267, "grad_norm": 3.9628453254699707, "learning_rate": 3.630673325180052e-05, "loss": 2.2311, "step": 75020 }, { "epoch": 5.097499660279929, "grad_norm": 2.946157217025757, "learning_rate": 3.630248675091725e-05, "loss": 2.2778, "step": 75025 }, { "epoch": 5.0978393803505915, "grad_norm": 3.542015552520752, "learning_rate": 3.629824025003397e-05, "loss": 2.2453, "step": 75030 }, { "epoch": 5.098179100421253, "grad_norm": 3.4399642944335938, "learning_rate": 3.6293993749150703e-05, "loss": 2.1499, "step": 75035 }, { "epoch": 5.098518820491915, "grad_norm": 2.870577573776245, "learning_rate": 3.628974724826743e-05, "loss": 2.1882, "step": 75040 }, { "epoch": 5.098858540562577, "grad_norm": 2.927464008331299, "learning_rate": 3.628550074738415e-05, "loss": 2.3153, "step": 75045 }, { "epoch": 5.099198260633238, "grad_norm": 3.358638048171997, "learning_rate": 3.628125424650089e-05, "loss": 2.0307, "step": 75050 }, { "epoch": 5.0995379807039, "grad_norm": 3.361595392227173, "learning_rate": 3.6277007745617616e-05, "loss": 2.0973, "step": 75055 }, { "epoch": 5.099877700774562, "grad_norm": 3.1113526821136475, "learning_rate": 3.627276124473434e-05, "loss": 2.2361, "step": 75060 }, { "epoch": 5.100217420845223, "grad_norm": 2.80131196975708, "learning_rate": 3.6268514743851065e-05, "loss": 2.1314, "step": 75065 }, { "epoch": 5.100557140915885, "grad_norm": 3.1408956050872803, "learning_rate": 3.62642682429678e-05, "loss": 2.3867, "step": 75070 }, { "epoch": 5.1008968609865475, "grad_norm": 3.904327630996704, "learning_rate": 3.626002174208453e-05, "loss": 2.1254, "step": 75075 }, { "epoch": 5.101236581057209, "grad_norm": 3.6182916164398193, "learning_rate": 3.625577524120125e-05, "loss": 2.4683, "step": 75080 }, { "epoch": 5.101576301127871, "grad_norm": 3.623973846435547, "learning_rate": 3.6251528740317984e-05, "loss": 2.0522, "step": 75085 }, { "epoch": 5.101916021198533, "grad_norm": 3.6639292240142822, "learning_rate": 3.624728223943471e-05, "loss": 2.4763, "step": 75090 }, { "epoch": 5.102255741269194, "grad_norm": 3.7751224040985107, "learning_rate": 3.624303573855143e-05, "loss": 2.2118, "step": 75095 }, { "epoch": 5.102595461339856, "grad_norm": 3.7390620708465576, "learning_rate": 3.623878923766816e-05, "loss": 2.2573, "step": 75100 }, { "epoch": 5.102935181410518, "grad_norm": 3.8690290451049805, "learning_rate": 3.6234542736784896e-05, "loss": 2.2564, "step": 75105 }, { "epoch": 5.103274901481179, "grad_norm": 3.9070606231689453, "learning_rate": 3.623029623590162e-05, "loss": 2.1262, "step": 75110 }, { "epoch": 5.103614621551841, "grad_norm": 2.9144575595855713, "learning_rate": 3.6226049735018345e-05, "loss": 2.0206, "step": 75115 }, { "epoch": 5.103954341622503, "grad_norm": 2.8320953845977783, "learning_rate": 3.622180323413508e-05, "loss": 2.1833, "step": 75120 }, { "epoch": 5.104294061693165, "grad_norm": 3.4030284881591797, "learning_rate": 3.62175567332518e-05, "loss": 2.2051, "step": 75125 }, { "epoch": 5.104633781763827, "grad_norm": 3.388970375061035, "learning_rate": 3.621331023236853e-05, "loss": 2.1456, "step": 75130 }, { "epoch": 5.104973501834488, "grad_norm": 4.1016645431518555, "learning_rate": 3.620906373148526e-05, "loss": 2.1108, "step": 75135 }, { "epoch": 5.10531322190515, "grad_norm": 4.056875705718994, "learning_rate": 3.6204817230601985e-05, "loss": 2.0521, "step": 75140 }, { "epoch": 5.105652941975812, "grad_norm": 3.0631473064422607, "learning_rate": 3.620057072971871e-05, "loss": 2.0409, "step": 75145 }, { "epoch": 5.105992662046473, "grad_norm": 3.6876044273376465, "learning_rate": 3.619632422883544e-05, "loss": 2.3971, "step": 75150 }, { "epoch": 5.106332382117135, "grad_norm": 3.1892058849334717, "learning_rate": 3.619207772795217e-05, "loss": 2.3424, "step": 75155 }, { "epoch": 5.106672102187797, "grad_norm": 4.598087310791016, "learning_rate": 3.61878312270689e-05, "loss": 2.05, "step": 75160 }, { "epoch": 5.107011822258459, "grad_norm": 3.6500627994537354, "learning_rate": 3.6183584726185625e-05, "loss": 2.4449, "step": 75165 }, { "epoch": 5.107351542329121, "grad_norm": 2.489452600479126, "learning_rate": 3.617933822530235e-05, "loss": 1.9923, "step": 75170 }, { "epoch": 5.107691262399783, "grad_norm": 3.281367063522339, "learning_rate": 3.617509172441908e-05, "loss": 2.2107, "step": 75175 }, { "epoch": 5.108030982470444, "grad_norm": 3.06067156791687, "learning_rate": 3.617084522353581e-05, "loss": 2.1138, "step": 75180 }, { "epoch": 5.108370702541106, "grad_norm": 3.5147552490234375, "learning_rate": 3.616659872265254e-05, "loss": 2.3156, "step": 75185 }, { "epoch": 5.108710422611768, "grad_norm": 2.795867681503296, "learning_rate": 3.6162352221769265e-05, "loss": 2.2276, "step": 75190 }, { "epoch": 5.109050142682429, "grad_norm": 4.165884017944336, "learning_rate": 3.615810572088599e-05, "loss": 2.3009, "step": 75195 }, { "epoch": 5.109389862753091, "grad_norm": 3.7192533016204834, "learning_rate": 3.6153859220002714e-05, "loss": 2.1091, "step": 75200 }, { "epoch": 5.1097295828237534, "grad_norm": 2.7722480297088623, "learning_rate": 3.614961271911945e-05, "loss": 2.3136, "step": 75205 }, { "epoch": 5.110069302894415, "grad_norm": 2.8739216327667236, "learning_rate": 3.614536621823618e-05, "loss": 2.3364, "step": 75210 }, { "epoch": 5.110409022965077, "grad_norm": 3.3817758560180664, "learning_rate": 3.61411197173529e-05, "loss": 2.1947, "step": 75215 }, { "epoch": 5.110748743035739, "grad_norm": 3.352362871170044, "learning_rate": 3.613687321646963e-05, "loss": 2.084, "step": 75220 }, { "epoch": 5.1110884631064, "grad_norm": 3.2761712074279785, "learning_rate": 3.613262671558636e-05, "loss": 2.414, "step": 75225 }, { "epoch": 5.111428183177062, "grad_norm": 3.600708484649658, "learning_rate": 3.612838021470308e-05, "loss": 2.3267, "step": 75230 }, { "epoch": 5.111767903247724, "grad_norm": 3.45527720451355, "learning_rate": 3.612413371381981e-05, "loss": 2.3226, "step": 75235 }, { "epoch": 5.112107623318385, "grad_norm": 3.848668336868286, "learning_rate": 3.6119887212936545e-05, "loss": 2.208, "step": 75240 }, { "epoch": 5.112447343389047, "grad_norm": 3.959409236907959, "learning_rate": 3.611564071205327e-05, "loss": 2.0893, "step": 75245 }, { "epoch": 5.1127870634597095, "grad_norm": 3.9165892601013184, "learning_rate": 3.6111394211169994e-05, "loss": 2.392, "step": 75250 }, { "epoch": 5.113126783530371, "grad_norm": 1.847735047340393, "learning_rate": 3.610714771028673e-05, "loss": 1.8686, "step": 75255 }, { "epoch": 5.113466503601033, "grad_norm": 3.1862246990203857, "learning_rate": 3.610290120940346e-05, "loss": 2.1505, "step": 75260 }, { "epoch": 5.113806223671695, "grad_norm": 3.409059762954712, "learning_rate": 3.609865470852018e-05, "loss": 2.2583, "step": 75265 }, { "epoch": 5.114145943742356, "grad_norm": 3.3453896045684814, "learning_rate": 3.6094408207636906e-05, "loss": 2.3062, "step": 75270 }, { "epoch": 5.114485663813018, "grad_norm": 3.6510205268859863, "learning_rate": 3.609016170675364e-05, "loss": 2.0226, "step": 75275 }, { "epoch": 5.11482538388368, "grad_norm": 3.4255783557891846, "learning_rate": 3.608591520587036e-05, "loss": 2.1867, "step": 75280 }, { "epoch": 5.115165103954341, "grad_norm": 3.7320046424865723, "learning_rate": 3.608166870498709e-05, "loss": 2.2391, "step": 75285 }, { "epoch": 5.115504824025003, "grad_norm": 3.9545211791992188, "learning_rate": 3.6077422204103825e-05, "loss": 2.0994, "step": 75290 }, { "epoch": 5.1158445440956655, "grad_norm": 3.8381311893463135, "learning_rate": 3.6073175703220546e-05, "loss": 2.4033, "step": 75295 }, { "epoch": 5.116184264166327, "grad_norm": 3.5545542240142822, "learning_rate": 3.6068929202337274e-05, "loss": 2.348, "step": 75300 }, { "epoch": 5.116523984236989, "grad_norm": 4.938642978668213, "learning_rate": 3.6064682701454e-05, "loss": 2.0364, "step": 75305 }, { "epoch": 5.116863704307651, "grad_norm": 3.075247049331665, "learning_rate": 3.606043620057073e-05, "loss": 2.4686, "step": 75310 }, { "epoch": 5.117203424378312, "grad_norm": 4.542222023010254, "learning_rate": 3.605618969968746e-05, "loss": 2.0904, "step": 75315 }, { "epoch": 5.117543144448974, "grad_norm": 2.821335792541504, "learning_rate": 3.6051943198804186e-05, "loss": 2.3225, "step": 75320 }, { "epoch": 5.117882864519636, "grad_norm": 3.0415899753570557, "learning_rate": 3.6047696697920914e-05, "loss": 2.4531, "step": 75325 }, { "epoch": 5.118222584590297, "grad_norm": 3.7525928020477295, "learning_rate": 3.604345019703764e-05, "loss": 2.3204, "step": 75330 }, { "epoch": 5.118562304660959, "grad_norm": 3.6326777935028076, "learning_rate": 3.603920369615437e-05, "loss": 2.0652, "step": 75335 }, { "epoch": 5.1189020247316215, "grad_norm": 3.656872034072876, "learning_rate": 3.60349571952711e-05, "loss": 2.174, "step": 75340 }, { "epoch": 5.119241744802283, "grad_norm": 4.173246383666992, "learning_rate": 3.6030710694387826e-05, "loss": 2.1026, "step": 75345 }, { "epoch": 5.119581464872945, "grad_norm": 2.856703758239746, "learning_rate": 3.6026464193504554e-05, "loss": 2.4952, "step": 75350 }, { "epoch": 5.119921184943607, "grad_norm": 3.4086976051330566, "learning_rate": 3.602221769262128e-05, "loss": 2.2377, "step": 75355 }, { "epoch": 5.120260905014268, "grad_norm": 2.7887778282165527, "learning_rate": 3.601797119173801e-05, "loss": 1.9628, "step": 75360 }, { "epoch": 5.12060062508493, "grad_norm": 3.336045980453491, "learning_rate": 3.601372469085474e-05, "loss": 2.3924, "step": 75365 }, { "epoch": 5.120940345155592, "grad_norm": 4.504960536956787, "learning_rate": 3.600947818997146e-05, "loss": 2.4447, "step": 75370 }, { "epoch": 5.121280065226253, "grad_norm": 3.8346426486968994, "learning_rate": 3.6005231689088194e-05, "loss": 2.066, "step": 75375 }, { "epoch": 5.121619785296915, "grad_norm": 3.7571299076080322, "learning_rate": 3.600098518820492e-05, "loss": 2.299, "step": 75380 }, { "epoch": 5.1219595053675775, "grad_norm": 2.893630266189575, "learning_rate": 3.5996738687321644e-05, "loss": 2.0206, "step": 75385 }, { "epoch": 5.122299225438239, "grad_norm": 3.269015312194824, "learning_rate": 3.599249218643838e-05, "loss": 2.0959, "step": 75390 }, { "epoch": 5.122638945508901, "grad_norm": 3.8269431591033936, "learning_rate": 3.5988245685555106e-05, "loss": 2.1486, "step": 75395 }, { "epoch": 5.122978665579563, "grad_norm": 2.971970319747925, "learning_rate": 3.598399918467183e-05, "loss": 2.1715, "step": 75400 }, { "epoch": 5.123318385650224, "grad_norm": 3.294294834136963, "learning_rate": 3.5979752683788556e-05, "loss": 2.2409, "step": 75405 }, { "epoch": 5.123658105720886, "grad_norm": 3.6229405403137207, "learning_rate": 3.597550618290529e-05, "loss": 2.1624, "step": 75410 }, { "epoch": 5.123997825791548, "grad_norm": 3.1291747093200684, "learning_rate": 3.597125968202202e-05, "loss": 2.1226, "step": 75415 }, { "epoch": 5.124337545862209, "grad_norm": 3.0604984760284424, "learning_rate": 3.596701318113874e-05, "loss": 2.2208, "step": 75420 }, { "epoch": 5.124677265932871, "grad_norm": 3.0805587768554688, "learning_rate": 3.5962766680255475e-05, "loss": 2.1552, "step": 75425 }, { "epoch": 5.1250169860035335, "grad_norm": 3.2285380363464355, "learning_rate": 3.59585201793722e-05, "loss": 2.3448, "step": 75430 }, { "epoch": 5.125356706074195, "grad_norm": 4.078746795654297, "learning_rate": 3.5954273678488924e-05, "loss": 2.3078, "step": 75435 }, { "epoch": 5.125696426144857, "grad_norm": 3.973665237426758, "learning_rate": 3.595002717760566e-05, "loss": 2.2001, "step": 75440 }, { "epoch": 5.126036146215519, "grad_norm": 3.1290369033813477, "learning_rate": 3.5945780676722387e-05, "loss": 2.447, "step": 75445 }, { "epoch": 5.12637586628618, "grad_norm": 3.3313379287719727, "learning_rate": 3.594153417583911e-05, "loss": 2.1984, "step": 75450 }, { "epoch": 5.126715586356842, "grad_norm": 3.1841466426849365, "learning_rate": 3.5937287674955836e-05, "loss": 2.4652, "step": 75455 }, { "epoch": 5.127055306427504, "grad_norm": 3.8901095390319824, "learning_rate": 3.593304117407257e-05, "loss": 2.1143, "step": 75460 }, { "epoch": 5.127395026498165, "grad_norm": 4.349434852600098, "learning_rate": 3.592879467318929e-05, "loss": 2.049, "step": 75465 }, { "epoch": 5.127734746568827, "grad_norm": 3.5463919639587402, "learning_rate": 3.592454817230602e-05, "loss": 2.13, "step": 75470 }, { "epoch": 5.1280744666394895, "grad_norm": 3.196932315826416, "learning_rate": 3.5920301671422755e-05, "loss": 1.8678, "step": 75475 }, { "epoch": 5.128414186710151, "grad_norm": 4.159600257873535, "learning_rate": 3.5916055170539476e-05, "loss": 2.1062, "step": 75480 }, { "epoch": 5.128753906780813, "grad_norm": 4.056410312652588, "learning_rate": 3.5911808669656204e-05, "loss": 2.232, "step": 75485 }, { "epoch": 5.129093626851474, "grad_norm": 3.0262346267700195, "learning_rate": 3.590756216877293e-05, "loss": 2.1286, "step": 75490 }, { "epoch": 5.129433346922136, "grad_norm": 3.3182079792022705, "learning_rate": 3.590331566788966e-05, "loss": 2.2229, "step": 75495 }, { "epoch": 5.129773066992798, "grad_norm": 2.8686816692352295, "learning_rate": 3.589906916700639e-05, "loss": 2.344, "step": 75500 }, { "epoch": 5.130112787063459, "grad_norm": 3.303252935409546, "learning_rate": 3.5894822666123116e-05, "loss": 2.2642, "step": 75505 }, { "epoch": 5.130452507134121, "grad_norm": 3.6687283515930176, "learning_rate": 3.5890576165239844e-05, "loss": 2.1634, "step": 75510 }, { "epoch": 5.1307922272047835, "grad_norm": 3.574985980987549, "learning_rate": 3.588632966435657e-05, "loss": 2.5284, "step": 75515 }, { "epoch": 5.131131947275445, "grad_norm": 3.7145862579345703, "learning_rate": 3.58820831634733e-05, "loss": 2.191, "step": 75520 }, { "epoch": 5.131471667346107, "grad_norm": 2.9487125873565674, "learning_rate": 3.587783666259003e-05, "loss": 2.1404, "step": 75525 }, { "epoch": 5.131811387416769, "grad_norm": 3.437164545059204, "learning_rate": 3.5873590161706756e-05, "loss": 2.1805, "step": 75530 }, { "epoch": 5.13215110748743, "grad_norm": 3.087178945541382, "learning_rate": 3.5869343660823484e-05, "loss": 2.3068, "step": 75535 }, { "epoch": 5.132490827558092, "grad_norm": 3.5922224521636963, "learning_rate": 3.586509715994021e-05, "loss": 2.2172, "step": 75540 }, { "epoch": 5.132830547628754, "grad_norm": 2.5944976806640625, "learning_rate": 3.586085065905694e-05, "loss": 2.6123, "step": 75545 }, { "epoch": 5.133170267699415, "grad_norm": 3.3558223247528076, "learning_rate": 3.585660415817367e-05, "loss": 2.1063, "step": 75550 }, { "epoch": 5.133509987770077, "grad_norm": 2.954686403274536, "learning_rate": 3.585235765729039e-05, "loss": 2.2394, "step": 75555 }, { "epoch": 5.1338497078407395, "grad_norm": 4.106847286224365, "learning_rate": 3.5848111156407124e-05, "loss": 2.1736, "step": 75560 }, { "epoch": 5.134189427911401, "grad_norm": 2.9884424209594727, "learning_rate": 3.584386465552385e-05, "loss": 2.3528, "step": 75565 }, { "epoch": 5.134529147982063, "grad_norm": 3.498608350753784, "learning_rate": 3.583961815464057e-05, "loss": 2.2419, "step": 75570 }, { "epoch": 5.134868868052725, "grad_norm": 3.172595500946045, "learning_rate": 3.583537165375731e-05, "loss": 2.1372, "step": 75575 }, { "epoch": 5.135208588123386, "grad_norm": 3.61944842338562, "learning_rate": 3.5831125152874036e-05, "loss": 2.3639, "step": 75580 }, { "epoch": 5.135548308194048, "grad_norm": 2.559948205947876, "learning_rate": 3.5826878651990764e-05, "loss": 2.4044, "step": 75585 }, { "epoch": 5.13588802826471, "grad_norm": 3.7874059677124023, "learning_rate": 3.5822632151107485e-05, "loss": 2.0579, "step": 75590 }, { "epoch": 5.136227748335371, "grad_norm": 2.7558634281158447, "learning_rate": 3.581838565022422e-05, "loss": 2.2899, "step": 75595 }, { "epoch": 5.136567468406033, "grad_norm": 3.77024507522583, "learning_rate": 3.581413914934095e-05, "loss": 2.1813, "step": 75600 }, { "epoch": 5.1369071884766955, "grad_norm": 4.119357109069824, "learning_rate": 3.580989264845767e-05, "loss": 2.4045, "step": 75605 }, { "epoch": 5.137246908547357, "grad_norm": 2.998035192489624, "learning_rate": 3.5805646147574404e-05, "loss": 2.4697, "step": 75610 }, { "epoch": 5.137586628618019, "grad_norm": 3.173377752304077, "learning_rate": 3.580139964669113e-05, "loss": 2.4559, "step": 75615 }, { "epoch": 5.137926348688681, "grad_norm": 3.7238609790802, "learning_rate": 3.579715314580785e-05, "loss": 2.6056, "step": 75620 }, { "epoch": 5.138266068759342, "grad_norm": 3.745365858078003, "learning_rate": 3.579290664492458e-05, "loss": 2.3445, "step": 75625 }, { "epoch": 5.138605788830004, "grad_norm": 3.5041677951812744, "learning_rate": 3.5788660144041316e-05, "loss": 2.2159, "step": 75630 }, { "epoch": 5.138945508900666, "grad_norm": 2.999007225036621, "learning_rate": 3.578441364315804e-05, "loss": 2.302, "step": 75635 }, { "epoch": 5.139285228971327, "grad_norm": 2.6104698181152344, "learning_rate": 3.5780167142274765e-05, "loss": 1.9826, "step": 75640 }, { "epoch": 5.139624949041989, "grad_norm": 2.963794231414795, "learning_rate": 3.57759206413915e-05, "loss": 2.1381, "step": 75645 }, { "epoch": 5.1399646691126515, "grad_norm": 3.0720877647399902, "learning_rate": 3.577167414050822e-05, "loss": 2.3715, "step": 75650 }, { "epoch": 5.140304389183313, "grad_norm": 4.071737289428711, "learning_rate": 3.576742763962495e-05, "loss": 2.3273, "step": 75655 }, { "epoch": 5.140644109253975, "grad_norm": 3.220128059387207, "learning_rate": 3.576318113874168e-05, "loss": 2.119, "step": 75660 }, { "epoch": 5.140983829324637, "grad_norm": 3.1312599182128906, "learning_rate": 3.5758934637858405e-05, "loss": 2.1192, "step": 75665 }, { "epoch": 5.141323549395298, "grad_norm": 3.4842655658721924, "learning_rate": 3.575468813697513e-05, "loss": 2.2166, "step": 75670 }, { "epoch": 5.14166326946596, "grad_norm": 3.084751605987549, "learning_rate": 3.575044163609186e-05, "loss": 2.5582, "step": 75675 }, { "epoch": 5.142002989536622, "grad_norm": 2.694620132446289, "learning_rate": 3.574619513520859e-05, "loss": 2.2071, "step": 75680 }, { "epoch": 5.142342709607283, "grad_norm": 2.825840473175049, "learning_rate": 3.574194863432532e-05, "loss": 2.3275, "step": 75685 }, { "epoch": 5.142682429677945, "grad_norm": 3.534158945083618, "learning_rate": 3.5737702133442045e-05, "loss": 2.1617, "step": 75690 }, { "epoch": 5.1430221497486075, "grad_norm": 3.6098999977111816, "learning_rate": 3.573345563255877e-05, "loss": 2.2427, "step": 75695 }, { "epoch": 5.143361869819269, "grad_norm": 3.696425676345825, "learning_rate": 3.57292091316755e-05, "loss": 2.3963, "step": 75700 }, { "epoch": 5.143701589889931, "grad_norm": 3.127302646636963, "learning_rate": 3.572496263079223e-05, "loss": 2.5044, "step": 75705 }, { "epoch": 5.144041309960593, "grad_norm": 3.877052068710327, "learning_rate": 3.572071612990896e-05, "loss": 2.2238, "step": 75710 }, { "epoch": 5.144381030031254, "grad_norm": 3.474024534225464, "learning_rate": 3.5716469629025685e-05, "loss": 2.2246, "step": 75715 }, { "epoch": 5.144720750101916, "grad_norm": 3.57999587059021, "learning_rate": 3.5712223128142413e-05, "loss": 2.3185, "step": 75720 }, { "epoch": 5.145060470172578, "grad_norm": 3.6130943298339844, "learning_rate": 3.5707976627259135e-05, "loss": 2.4301, "step": 75725 }, { "epoch": 5.145400190243239, "grad_norm": 4.0842156410217285, "learning_rate": 3.570373012637587e-05, "loss": 2.2981, "step": 75730 }, { "epoch": 5.145739910313901, "grad_norm": 4.017923355102539, "learning_rate": 3.56994836254926e-05, "loss": 2.1839, "step": 75735 }, { "epoch": 5.1460796303845635, "grad_norm": 2.7634994983673096, "learning_rate": 3.569523712460932e-05, "loss": 2.1102, "step": 75740 }, { "epoch": 5.146419350455225, "grad_norm": 3.6566808223724365, "learning_rate": 3.5690990623726053e-05, "loss": 1.9528, "step": 75745 }, { "epoch": 5.146759070525887, "grad_norm": 3.3906660079956055, "learning_rate": 3.568674412284278e-05, "loss": 2.5054, "step": 75750 }, { "epoch": 5.147098790596549, "grad_norm": 3.527841091156006, "learning_rate": 3.568249762195951e-05, "loss": 2.178, "step": 75755 }, { "epoch": 5.14743851066721, "grad_norm": 3.9785163402557373, "learning_rate": 3.567825112107623e-05, "loss": 2.1291, "step": 75760 }, { "epoch": 5.147778230737872, "grad_norm": 3.5438902378082275, "learning_rate": 3.5674004620192965e-05, "loss": 2.4196, "step": 75765 }, { "epoch": 5.148117950808534, "grad_norm": 3.966830253601074, "learning_rate": 3.5669758119309694e-05, "loss": 2.2629, "step": 75770 }, { "epoch": 5.148457670879195, "grad_norm": 3.2692301273345947, "learning_rate": 3.5665511618426415e-05, "loss": 2.4228, "step": 75775 }, { "epoch": 5.1487973909498574, "grad_norm": 3.4321272373199463, "learning_rate": 3.566126511754315e-05, "loss": 2.1618, "step": 75780 }, { "epoch": 5.1491371110205195, "grad_norm": 5.09205436706543, "learning_rate": 3.565701861665988e-05, "loss": 2.1513, "step": 75785 }, { "epoch": 5.149476831091181, "grad_norm": 3.0524582862854004, "learning_rate": 3.56527721157766e-05, "loss": 2.45, "step": 75790 }, { "epoch": 5.149816551161843, "grad_norm": 3.154937982559204, "learning_rate": 3.564852561489333e-05, "loss": 2.0721, "step": 75795 }, { "epoch": 5.150156271232504, "grad_norm": 3.3315882682800293, "learning_rate": 3.564427911401006e-05, "loss": 2.2931, "step": 75800 }, { "epoch": 5.150495991303166, "grad_norm": 3.5323634147644043, "learning_rate": 3.564003261312678e-05, "loss": 1.9583, "step": 75805 }, { "epoch": 5.150835711373828, "grad_norm": 3.043506383895874, "learning_rate": 3.563578611224351e-05, "loss": 2.4157, "step": 75810 }, { "epoch": 5.151175431444489, "grad_norm": 3.9388699531555176, "learning_rate": 3.5631539611360246e-05, "loss": 2.0746, "step": 75815 }, { "epoch": 5.151515151515151, "grad_norm": 3.6277575492858887, "learning_rate": 3.562729311047697e-05, "loss": 2.3032, "step": 75820 }, { "epoch": 5.1518548715858135, "grad_norm": 3.9942548274993896, "learning_rate": 3.5623046609593695e-05, "loss": 2.1663, "step": 75825 }, { "epoch": 5.152194591656475, "grad_norm": 2.697298288345337, "learning_rate": 3.561880010871042e-05, "loss": 2.4708, "step": 75830 }, { "epoch": 5.152534311727137, "grad_norm": 3.2176311016082764, "learning_rate": 3.561455360782715e-05, "loss": 2.3147, "step": 75835 }, { "epoch": 5.152874031797799, "grad_norm": 3.660200595855713, "learning_rate": 3.561030710694388e-05, "loss": 2.1779, "step": 75840 }, { "epoch": 5.15321375186846, "grad_norm": 2.886294364929199, "learning_rate": 3.560606060606061e-05, "loss": 2.1355, "step": 75845 }, { "epoch": 5.153553471939122, "grad_norm": 3.215522050857544, "learning_rate": 3.5601814105177335e-05, "loss": 2.2424, "step": 75850 }, { "epoch": 5.153893192009784, "grad_norm": 3.4853768348693848, "learning_rate": 3.559756760429406e-05, "loss": 2.3245, "step": 75855 }, { "epoch": 5.154232912080445, "grad_norm": 2.8152832984924316, "learning_rate": 3.559332110341079e-05, "loss": 2.3346, "step": 75860 }, { "epoch": 5.154572632151107, "grad_norm": 3.536527156829834, "learning_rate": 3.558907460252752e-05, "loss": 2.3137, "step": 75865 }, { "epoch": 5.1549123522217695, "grad_norm": 3.627070903778076, "learning_rate": 3.558482810164425e-05, "loss": 2.1119, "step": 75870 }, { "epoch": 5.155252072292431, "grad_norm": 13.039164543151855, "learning_rate": 3.5580581600760975e-05, "loss": 2.2406, "step": 75875 }, { "epoch": 5.155591792363093, "grad_norm": 3.224158763885498, "learning_rate": 3.55763350998777e-05, "loss": 2.2336, "step": 75880 }, { "epoch": 5.155931512433755, "grad_norm": 3.8883657455444336, "learning_rate": 3.557208859899443e-05, "loss": 2.3442, "step": 75885 }, { "epoch": 5.156271232504416, "grad_norm": 3.6590306758880615, "learning_rate": 3.556784209811116e-05, "loss": 2.3305, "step": 75890 }, { "epoch": 5.156610952575078, "grad_norm": 4.503891944885254, "learning_rate": 3.556359559722788e-05, "loss": 2.0412, "step": 75895 }, { "epoch": 5.15695067264574, "grad_norm": 2.9926037788391113, "learning_rate": 3.5559349096344615e-05, "loss": 2.0717, "step": 75900 }, { "epoch": 5.157290392716401, "grad_norm": 2.9554855823516846, "learning_rate": 3.555510259546134e-05, "loss": 2.2751, "step": 75905 }, { "epoch": 5.157630112787063, "grad_norm": 3.174112319946289, "learning_rate": 3.5550856094578064e-05, "loss": 2.1808, "step": 75910 }, { "epoch": 5.1579698328577255, "grad_norm": 2.862764596939087, "learning_rate": 3.55466095936948e-05, "loss": 1.8376, "step": 75915 }, { "epoch": 5.158309552928387, "grad_norm": 3.466963768005371, "learning_rate": 3.554236309281153e-05, "loss": 2.3753, "step": 75920 }, { "epoch": 5.158649272999049, "grad_norm": 2.897519111633301, "learning_rate": 3.5538116591928255e-05, "loss": 2.289, "step": 75925 }, { "epoch": 5.158988993069711, "grad_norm": 3.1488828659057617, "learning_rate": 3.5533870091044976e-05, "loss": 2.2016, "step": 75930 }, { "epoch": 5.159328713140372, "grad_norm": 4.021778106689453, "learning_rate": 3.552962359016171e-05, "loss": 2.2408, "step": 75935 }, { "epoch": 5.159668433211034, "grad_norm": 3.2573606967926025, "learning_rate": 3.552537708927844e-05, "loss": 2.3677, "step": 75940 }, { "epoch": 5.160008153281696, "grad_norm": 3.8421385288238525, "learning_rate": 3.552113058839516e-05, "loss": 2.3422, "step": 75945 }, { "epoch": 5.160347873352357, "grad_norm": 2.6291191577911377, "learning_rate": 3.5516884087511895e-05, "loss": 2.1343, "step": 75950 }, { "epoch": 5.160687593423019, "grad_norm": 3.920149803161621, "learning_rate": 3.551263758662862e-05, "loss": 2.1604, "step": 75955 }, { "epoch": 5.1610273134936815, "grad_norm": 3.8069398403167725, "learning_rate": 3.5508391085745344e-05, "loss": 2.2265, "step": 75960 }, { "epoch": 5.161367033564343, "grad_norm": 5.045531749725342, "learning_rate": 3.550414458486208e-05, "loss": 2.3263, "step": 75965 }, { "epoch": 5.161706753635005, "grad_norm": 3.2255876064300537, "learning_rate": 3.549989808397881e-05, "loss": 2.2906, "step": 75970 }, { "epoch": 5.162046473705667, "grad_norm": 3.004917621612549, "learning_rate": 3.549565158309553e-05, "loss": 2.0905, "step": 75975 }, { "epoch": 5.162386193776328, "grad_norm": 3.176740884780884, "learning_rate": 3.5491405082212256e-05, "loss": 2.3707, "step": 75980 }, { "epoch": 5.16272591384699, "grad_norm": 2.8369693756103516, "learning_rate": 3.548715858132899e-05, "loss": 2.1968, "step": 75985 }, { "epoch": 5.163065633917652, "grad_norm": 3.5945005416870117, "learning_rate": 3.548291208044571e-05, "loss": 2.2784, "step": 75990 }, { "epoch": 5.163405353988313, "grad_norm": 3.3399462699890137, "learning_rate": 3.547866557956244e-05, "loss": 2.4776, "step": 75995 }, { "epoch": 5.163745074058975, "grad_norm": 2.8936927318573, "learning_rate": 3.5474419078679175e-05, "loss": 2.4841, "step": 76000 }, { "epoch": 5.1640847941296375, "grad_norm": 2.496879816055298, "learning_rate": 3.5470172577795896e-05, "loss": 2.4042, "step": 76005 }, { "epoch": 5.164424514200299, "grad_norm": 4.860127925872803, "learning_rate": 3.5465926076912624e-05, "loss": 2.3511, "step": 76010 }, { "epoch": 5.164764234270961, "grad_norm": 3.0127410888671875, "learning_rate": 3.546167957602935e-05, "loss": 2.2849, "step": 76015 }, { "epoch": 5.165103954341623, "grad_norm": 3.4082772731781006, "learning_rate": 3.545743307514608e-05, "loss": 2.4839, "step": 76020 }, { "epoch": 5.165443674412284, "grad_norm": 3.227168083190918, "learning_rate": 3.545318657426281e-05, "loss": 2.1845, "step": 76025 }, { "epoch": 5.165783394482946, "grad_norm": 3.732001781463623, "learning_rate": 3.5448940073379536e-05, "loss": 2.0577, "step": 76030 }, { "epoch": 5.166123114553608, "grad_norm": 4.456832408905029, "learning_rate": 3.5444693572496264e-05, "loss": 2.2694, "step": 76035 }, { "epoch": 5.166462834624269, "grad_norm": 3.887037992477417, "learning_rate": 3.544044707161299e-05, "loss": 2.2361, "step": 76040 }, { "epoch": 5.166802554694931, "grad_norm": 3.7229795455932617, "learning_rate": 3.543620057072972e-05, "loss": 2.1595, "step": 76045 }, { "epoch": 5.1671422747655935, "grad_norm": 3.444157838821411, "learning_rate": 3.543195406984645e-05, "loss": 2.4144, "step": 76050 }, { "epoch": 5.167481994836255, "grad_norm": 3.612952470779419, "learning_rate": 3.5427707568963176e-05, "loss": 2.2054, "step": 76055 }, { "epoch": 5.167821714906917, "grad_norm": 3.4275898933410645, "learning_rate": 3.5423461068079904e-05, "loss": 2.0708, "step": 76060 }, { "epoch": 5.168161434977579, "grad_norm": 3.041402578353882, "learning_rate": 3.541921456719663e-05, "loss": 2.1583, "step": 76065 }, { "epoch": 5.16850115504824, "grad_norm": 3.1363987922668457, "learning_rate": 3.541496806631336e-05, "loss": 2.4124, "step": 76070 }, { "epoch": 5.168840875118902, "grad_norm": 4.535229206085205, "learning_rate": 3.541072156543009e-05, "loss": 2.1639, "step": 76075 }, { "epoch": 5.169180595189564, "grad_norm": 3.4597623348236084, "learning_rate": 3.540647506454681e-05, "loss": 2.4306, "step": 76080 }, { "epoch": 5.169520315260225, "grad_norm": 3.901245594024658, "learning_rate": 3.5402228563663544e-05, "loss": 2.327, "step": 76085 }, { "epoch": 5.1698600353308874, "grad_norm": 3.5673229694366455, "learning_rate": 3.539798206278027e-05, "loss": 2.2238, "step": 76090 }, { "epoch": 5.1701997554015495, "grad_norm": 2.993560552597046, "learning_rate": 3.5393735561897e-05, "loss": 2.0578, "step": 76095 }, { "epoch": 5.170539475472211, "grad_norm": 3.825273275375366, "learning_rate": 3.538948906101373e-05, "loss": 2.3392, "step": 76100 }, { "epoch": 5.170879195542873, "grad_norm": 3.2689435482025146, "learning_rate": 3.5385242560130456e-05, "loss": 2.1749, "step": 76105 }, { "epoch": 5.171218915613535, "grad_norm": 3.4039604663848877, "learning_rate": 3.5380996059247184e-05, "loss": 2.0005, "step": 76110 }, { "epoch": 5.171558635684196, "grad_norm": 3.587482452392578, "learning_rate": 3.5376749558363906e-05, "loss": 2.4561, "step": 76115 }, { "epoch": 5.171898355754858, "grad_norm": 3.2542824745178223, "learning_rate": 3.537250305748064e-05, "loss": 2.3658, "step": 76120 }, { "epoch": 5.17223807582552, "grad_norm": 3.2709574699401855, "learning_rate": 3.536825655659737e-05, "loss": 1.9768, "step": 76125 }, { "epoch": 5.172577795896181, "grad_norm": 3.5051331520080566, "learning_rate": 3.536401005571409e-05, "loss": 2.0102, "step": 76130 }, { "epoch": 5.1729175159668435, "grad_norm": 4.550400733947754, "learning_rate": 3.5359763554830825e-05, "loss": 2.4792, "step": 76135 }, { "epoch": 5.1732572360375055, "grad_norm": 3.083414077758789, "learning_rate": 3.535551705394755e-05, "loss": 2.227, "step": 76140 }, { "epoch": 5.173596956108167, "grad_norm": 3.4445605278015137, "learning_rate": 3.5351270553064274e-05, "loss": 2.1206, "step": 76145 }, { "epoch": 5.173936676178829, "grad_norm": 3.1179840564727783, "learning_rate": 3.5347024052181e-05, "loss": 2.2159, "step": 76150 }, { "epoch": 5.174276396249491, "grad_norm": 3.602309226989746, "learning_rate": 3.5342777551297737e-05, "loss": 2.1346, "step": 76155 }, { "epoch": 5.174616116320152, "grad_norm": 2.8907322883605957, "learning_rate": 3.533853105041446e-05, "loss": 2.3582, "step": 76160 }, { "epoch": 5.174955836390814, "grad_norm": 3.389930009841919, "learning_rate": 3.5334284549531186e-05, "loss": 2.3699, "step": 76165 }, { "epoch": 5.175295556461475, "grad_norm": 3.188183546066284, "learning_rate": 3.533003804864792e-05, "loss": 2.1456, "step": 76170 }, { "epoch": 5.175635276532137, "grad_norm": 3.8562638759613037, "learning_rate": 3.532579154776464e-05, "loss": 2.379, "step": 76175 }, { "epoch": 5.1759749966027995, "grad_norm": 3.2781801223754883, "learning_rate": 3.532154504688137e-05, "loss": 2.0057, "step": 76180 }, { "epoch": 5.176314716673461, "grad_norm": 2.6811630725860596, "learning_rate": 3.53172985459981e-05, "loss": 2.4122, "step": 76185 }, { "epoch": 5.176654436744123, "grad_norm": 3.757375717163086, "learning_rate": 3.5313052045114826e-05, "loss": 2.0767, "step": 76190 }, { "epoch": 5.176994156814785, "grad_norm": 3.707141637802124, "learning_rate": 3.5308805544231554e-05, "loss": 2.1884, "step": 76195 }, { "epoch": 5.177333876885446, "grad_norm": 4.057506561279297, "learning_rate": 3.530455904334828e-05, "loss": 2.5104, "step": 76200 }, { "epoch": 5.177673596956108, "grad_norm": 3.8041672706604004, "learning_rate": 3.530031254246501e-05, "loss": 2.1356, "step": 76205 }, { "epoch": 5.17801331702677, "grad_norm": 3.1375269889831543, "learning_rate": 3.529606604158174e-05, "loss": 2.2169, "step": 76210 }, { "epoch": 5.178353037097431, "grad_norm": 3.799194812774658, "learning_rate": 3.5291819540698466e-05, "loss": 2.209, "step": 76215 }, { "epoch": 5.178692757168093, "grad_norm": 3.02951717376709, "learning_rate": 3.5287573039815194e-05, "loss": 2.5663, "step": 76220 }, { "epoch": 5.1790324772387555, "grad_norm": 2.7666637897491455, "learning_rate": 3.528332653893192e-05, "loss": 2.5583, "step": 76225 }, { "epoch": 5.179372197309417, "grad_norm": 3.5591325759887695, "learning_rate": 3.527908003804865e-05, "loss": 2.1619, "step": 76230 }, { "epoch": 5.179711917380079, "grad_norm": 3.42232084274292, "learning_rate": 3.527483353716538e-05, "loss": 2.1138, "step": 76235 }, { "epoch": 5.180051637450741, "grad_norm": 3.1895158290863037, "learning_rate": 3.5270587036282106e-05, "loss": 2.3933, "step": 76240 }, { "epoch": 5.180391357521402, "grad_norm": 3.2807304859161377, "learning_rate": 3.5266340535398834e-05, "loss": 2.1806, "step": 76245 }, { "epoch": 5.180731077592064, "grad_norm": 3.0650389194488525, "learning_rate": 3.5262094034515555e-05, "loss": 2.2602, "step": 76250 }, { "epoch": 5.181070797662726, "grad_norm": 3.207857131958008, "learning_rate": 3.525784753363229e-05, "loss": 2.1707, "step": 76255 }, { "epoch": 5.181410517733387, "grad_norm": 3.5927891731262207, "learning_rate": 3.525360103274902e-05, "loss": 2.2298, "step": 76260 }, { "epoch": 5.181750237804049, "grad_norm": 3.8065273761749268, "learning_rate": 3.5249354531865746e-05, "loss": 2.2392, "step": 76265 }, { "epoch": 5.1820899578747115, "grad_norm": 4.297056674957275, "learning_rate": 3.5245108030982474e-05, "loss": 2.0378, "step": 76270 }, { "epoch": 5.182429677945373, "grad_norm": 3.266828775405884, "learning_rate": 3.52408615300992e-05, "loss": 2.3732, "step": 76275 }, { "epoch": 5.182769398016035, "grad_norm": 3.2342770099639893, "learning_rate": 3.523661502921593e-05, "loss": 2.2732, "step": 76280 }, { "epoch": 5.183109118086697, "grad_norm": 3.9552836418151855, "learning_rate": 3.523236852833265e-05, "loss": 2.2125, "step": 76285 }, { "epoch": 5.183448838157358, "grad_norm": 2.9073805809020996, "learning_rate": 3.5228122027449386e-05, "loss": 2.3282, "step": 76290 }, { "epoch": 5.18378855822802, "grad_norm": 3.5537338256835938, "learning_rate": 3.5223875526566114e-05, "loss": 2.1097, "step": 76295 }, { "epoch": 5.184128278298682, "grad_norm": 4.05224609375, "learning_rate": 3.5219629025682835e-05, "loss": 2.3751, "step": 76300 }, { "epoch": 5.184467998369343, "grad_norm": 3.1212706565856934, "learning_rate": 3.521538252479957e-05, "loss": 2.4938, "step": 76305 }, { "epoch": 5.184807718440005, "grad_norm": 4.040172100067139, "learning_rate": 3.52111360239163e-05, "loss": 2.2042, "step": 76310 }, { "epoch": 5.1851474385106675, "grad_norm": 3.4895331859588623, "learning_rate": 3.520688952303302e-05, "loss": 1.9042, "step": 76315 }, { "epoch": 5.185487158581329, "grad_norm": 3.16692852973938, "learning_rate": 3.520264302214975e-05, "loss": 2.2594, "step": 76320 }, { "epoch": 5.185826878651991, "grad_norm": 3.7654504776000977, "learning_rate": 3.519839652126648e-05, "loss": 2.141, "step": 76325 }, { "epoch": 5.186166598722653, "grad_norm": 3.925797939300537, "learning_rate": 3.51941500203832e-05, "loss": 2.1379, "step": 76330 }, { "epoch": 5.186506318793314, "grad_norm": 3.0965094566345215, "learning_rate": 3.518990351949993e-05, "loss": 2.2113, "step": 76335 }, { "epoch": 5.186846038863976, "grad_norm": 3.4162232875823975, "learning_rate": 3.5185657018616666e-05, "loss": 2.2109, "step": 76340 }, { "epoch": 5.187185758934638, "grad_norm": 3.539973020553589, "learning_rate": 3.518141051773339e-05, "loss": 2.2461, "step": 76345 }, { "epoch": 5.187525479005299, "grad_norm": 3.2007932662963867, "learning_rate": 3.5177164016850115e-05, "loss": 2.0919, "step": 76350 }, { "epoch": 5.187865199075961, "grad_norm": 3.430968999862671, "learning_rate": 3.517291751596685e-05, "loss": 2.4597, "step": 76355 }, { "epoch": 5.1882049191466235, "grad_norm": 3.231595516204834, "learning_rate": 3.516867101508357e-05, "loss": 2.1947, "step": 76360 }, { "epoch": 5.188544639217285, "grad_norm": 3.093777656555176, "learning_rate": 3.51644245142003e-05, "loss": 2.2279, "step": 76365 }, { "epoch": 5.188884359287947, "grad_norm": 2.732346773147583, "learning_rate": 3.516017801331703e-05, "loss": 2.5335, "step": 76370 }, { "epoch": 5.189224079358609, "grad_norm": 3.319714307785034, "learning_rate": 3.5155931512433755e-05, "loss": 2.1136, "step": 76375 }, { "epoch": 5.18956379942927, "grad_norm": 2.8596744537353516, "learning_rate": 3.515168501155048e-05, "loss": 2.2609, "step": 76380 }, { "epoch": 5.189903519499932, "grad_norm": 3.4068663120269775, "learning_rate": 3.514743851066721e-05, "loss": 2.3469, "step": 76385 }, { "epoch": 5.190243239570594, "grad_norm": 3.413130044937134, "learning_rate": 3.514319200978394e-05, "loss": 2.387, "step": 76390 }, { "epoch": 5.190582959641255, "grad_norm": 3.790196180343628, "learning_rate": 3.513894550890067e-05, "loss": 2.3223, "step": 76395 }, { "epoch": 5.1909226797119175, "grad_norm": 3.096264600753784, "learning_rate": 3.5134699008017395e-05, "loss": 2.1762, "step": 76400 }, { "epoch": 5.1912623997825795, "grad_norm": 3.3378746509552, "learning_rate": 3.513045250713412e-05, "loss": 2.2352, "step": 76405 }, { "epoch": 5.191602119853241, "grad_norm": 3.405924081802368, "learning_rate": 3.512620600625085e-05, "loss": 2.3203, "step": 76410 }, { "epoch": 5.191941839923903, "grad_norm": 2.490659713745117, "learning_rate": 3.512195950536758e-05, "loss": 2.1314, "step": 76415 }, { "epoch": 5.192281559994565, "grad_norm": 2.934523105621338, "learning_rate": 3.51177130044843e-05, "loss": 1.8364, "step": 76420 }, { "epoch": 5.192621280065226, "grad_norm": 2.9273059368133545, "learning_rate": 3.5113466503601035e-05, "loss": 2.5347, "step": 76425 }, { "epoch": 5.192961000135888, "grad_norm": 3.157019853591919, "learning_rate": 3.5109220002717763e-05, "loss": 2.208, "step": 76430 }, { "epoch": 5.19330072020655, "grad_norm": 2.9231860637664795, "learning_rate": 3.510497350183449e-05, "loss": 2.1612, "step": 76435 }, { "epoch": 5.193640440277211, "grad_norm": 3.36852765083313, "learning_rate": 3.510072700095122e-05, "loss": 2.2814, "step": 76440 }, { "epoch": 5.1939801603478735, "grad_norm": 3.9519777297973633, "learning_rate": 3.509648050006795e-05, "loss": 2.5546, "step": 76445 }, { "epoch": 5.1943198804185355, "grad_norm": 2.937885046005249, "learning_rate": 3.5092233999184675e-05, "loss": 2.4018, "step": 76450 }, { "epoch": 5.194659600489197, "grad_norm": 3.7521231174468994, "learning_rate": 3.5087987498301403e-05, "loss": 2.219, "step": 76455 }, { "epoch": 5.194999320559859, "grad_norm": 2.662574529647827, "learning_rate": 3.508374099741813e-05, "loss": 2.372, "step": 76460 }, { "epoch": 5.195339040630521, "grad_norm": 3.244647741317749, "learning_rate": 3.507949449653486e-05, "loss": 2.3153, "step": 76465 }, { "epoch": 5.195678760701182, "grad_norm": 3.44993257522583, "learning_rate": 3.507524799565158e-05, "loss": 2.2097, "step": 76470 }, { "epoch": 5.196018480771844, "grad_norm": 3.4805519580841064, "learning_rate": 3.5071001494768315e-05, "loss": 2.3179, "step": 76475 }, { "epoch": 5.196358200842505, "grad_norm": 2.468520164489746, "learning_rate": 3.5066754993885043e-05, "loss": 2.2908, "step": 76480 }, { "epoch": 5.196697920913167, "grad_norm": 3.854541063308716, "learning_rate": 3.5062508493001765e-05, "loss": 2.2162, "step": 76485 }, { "epoch": 5.1970376409838295, "grad_norm": 3.453511953353882, "learning_rate": 3.50582619921185e-05, "loss": 2.3148, "step": 76490 }, { "epoch": 5.197377361054491, "grad_norm": 3.0801913738250732, "learning_rate": 3.505401549123523e-05, "loss": 2.1608, "step": 76495 }, { "epoch": 5.197717081125153, "grad_norm": 3.8137736320495605, "learning_rate": 3.504976899035195e-05, "loss": 2.1917, "step": 76500 }, { "epoch": 5.198056801195815, "grad_norm": 3.6789214611053467, "learning_rate": 3.504552248946868e-05, "loss": 2.2463, "step": 76505 }, { "epoch": 5.198396521266476, "grad_norm": 3.8925695419311523, "learning_rate": 3.504127598858541e-05, "loss": 2.345, "step": 76510 }, { "epoch": 5.198736241337138, "grad_norm": 2.8790712356567383, "learning_rate": 3.503702948770213e-05, "loss": 2.2109, "step": 76515 }, { "epoch": 5.1990759614078, "grad_norm": 2.5918805599212646, "learning_rate": 3.503278298681886e-05, "loss": 1.96, "step": 76520 }, { "epoch": 5.199415681478461, "grad_norm": 3.813710927963257, "learning_rate": 3.5028536485935596e-05, "loss": 2.2383, "step": 76525 }, { "epoch": 5.199755401549123, "grad_norm": 2.4678523540496826, "learning_rate": 3.502428998505232e-05, "loss": 2.339, "step": 76530 }, { "epoch": 5.2000951216197855, "grad_norm": 3.479252815246582, "learning_rate": 3.5020043484169045e-05, "loss": 2.3618, "step": 76535 }, { "epoch": 5.200434841690447, "grad_norm": 3.5460152626037598, "learning_rate": 3.501579698328577e-05, "loss": 2.2304, "step": 76540 }, { "epoch": 5.200774561761109, "grad_norm": 3.3283286094665527, "learning_rate": 3.50115504824025e-05, "loss": 2.1675, "step": 76545 }, { "epoch": 5.201114281831771, "grad_norm": 4.318070411682129, "learning_rate": 3.500730398151923e-05, "loss": 2.3074, "step": 76550 }, { "epoch": 5.201454001902432, "grad_norm": 2.9722211360931396, "learning_rate": 3.500305748063596e-05, "loss": 2.1685, "step": 76555 }, { "epoch": 5.201793721973094, "grad_norm": 3.5316953659057617, "learning_rate": 3.4998810979752685e-05, "loss": 2.2884, "step": 76560 }, { "epoch": 5.202133442043756, "grad_norm": 3.9971275329589844, "learning_rate": 3.499456447886941e-05, "loss": 2.0787, "step": 76565 }, { "epoch": 5.202473162114417, "grad_norm": 3.9894063472747803, "learning_rate": 3.499031797798614e-05, "loss": 2.0538, "step": 76570 }, { "epoch": 5.202812882185079, "grad_norm": 3.278874635696411, "learning_rate": 3.498607147710287e-05, "loss": 2.2772, "step": 76575 }, { "epoch": 5.2031526022557415, "grad_norm": 3.063961982727051, "learning_rate": 3.49818249762196e-05, "loss": 2.4344, "step": 76580 }, { "epoch": 5.203492322326403, "grad_norm": 2.6779444217681885, "learning_rate": 3.4977578475336325e-05, "loss": 2.1193, "step": 76585 }, { "epoch": 5.203832042397065, "grad_norm": 3.3777005672454834, "learning_rate": 3.497333197445305e-05, "loss": 2.3422, "step": 76590 }, { "epoch": 5.204171762467727, "grad_norm": 3.30938720703125, "learning_rate": 3.496908547356978e-05, "loss": 2.1426, "step": 76595 }, { "epoch": 5.204511482538388, "grad_norm": 3.288788080215454, "learning_rate": 3.496483897268651e-05, "loss": 2.3234, "step": 76600 }, { "epoch": 5.20485120260905, "grad_norm": 3.109241485595703, "learning_rate": 3.496059247180324e-05, "loss": 2.2188, "step": 76605 }, { "epoch": 5.205190922679712, "grad_norm": 3.4393486976623535, "learning_rate": 3.4956345970919965e-05, "loss": 2.1906, "step": 76610 }, { "epoch": 5.205530642750373, "grad_norm": 3.6263389587402344, "learning_rate": 3.495209947003669e-05, "loss": 2.2452, "step": 76615 }, { "epoch": 5.205870362821035, "grad_norm": 2.890259265899658, "learning_rate": 3.494785296915342e-05, "loss": 2.5044, "step": 76620 }, { "epoch": 5.2062100828916975, "grad_norm": 4.038003444671631, "learning_rate": 3.494360646827015e-05, "loss": 2.4717, "step": 76625 }, { "epoch": 5.206549802962359, "grad_norm": 3.1791539192199707, "learning_rate": 3.493935996738688e-05, "loss": 2.364, "step": 76630 }, { "epoch": 5.206889523033021, "grad_norm": 3.624934673309326, "learning_rate": 3.4935113466503605e-05, "loss": 2.2838, "step": 76635 }, { "epoch": 5.207229243103683, "grad_norm": 2.957442283630371, "learning_rate": 3.4930866965620326e-05, "loss": 2.1823, "step": 76640 }, { "epoch": 5.207568963174344, "grad_norm": 2.855867385864258, "learning_rate": 3.492662046473706e-05, "loss": 2.0598, "step": 76645 }, { "epoch": 5.207908683245006, "grad_norm": 3.42069411277771, "learning_rate": 3.492237396385379e-05, "loss": 2.0541, "step": 76650 }, { "epoch": 5.208248403315668, "grad_norm": 3.0717856884002686, "learning_rate": 3.491812746297051e-05, "loss": 2.367, "step": 76655 }, { "epoch": 5.208588123386329, "grad_norm": 2.594717264175415, "learning_rate": 3.4913880962087245e-05, "loss": 2.0941, "step": 76660 }, { "epoch": 5.2089278434569914, "grad_norm": 3.6315369606018066, "learning_rate": 3.490963446120397e-05, "loss": 2.2293, "step": 76665 }, { "epoch": 5.2092675635276535, "grad_norm": 3.413893699645996, "learning_rate": 3.4905387960320694e-05, "loss": 1.9751, "step": 76670 }, { "epoch": 5.209607283598315, "grad_norm": 3.359774351119995, "learning_rate": 3.490114145943742e-05, "loss": 2.2252, "step": 76675 }, { "epoch": 5.209947003668977, "grad_norm": 3.295197010040283, "learning_rate": 3.489689495855416e-05, "loss": 2.2151, "step": 76680 }, { "epoch": 5.210286723739639, "grad_norm": 3.26373028755188, "learning_rate": 3.489264845767088e-05, "loss": 2.1706, "step": 76685 }, { "epoch": 5.2106264438103, "grad_norm": 3.6059012413024902, "learning_rate": 3.4888401956787606e-05, "loss": 2.0074, "step": 76690 }, { "epoch": 5.210966163880962, "grad_norm": 2.9353559017181396, "learning_rate": 3.488415545590434e-05, "loss": 2.2667, "step": 76695 }, { "epoch": 5.211305883951624, "grad_norm": 2.924837350845337, "learning_rate": 3.487990895502106e-05, "loss": 2.4968, "step": 76700 }, { "epoch": 5.211645604022285, "grad_norm": 3.391078472137451, "learning_rate": 3.487566245413779e-05, "loss": 2.2332, "step": 76705 }, { "epoch": 5.2119853240929475, "grad_norm": 3.8657164573669434, "learning_rate": 3.487141595325452e-05, "loss": 2.3879, "step": 76710 }, { "epoch": 5.2123250441636095, "grad_norm": 4.132978439331055, "learning_rate": 3.4867169452371246e-05, "loss": 2.2906, "step": 76715 }, { "epoch": 5.212664764234271, "grad_norm": 3.4629034996032715, "learning_rate": 3.4862922951487974e-05, "loss": 2.4492, "step": 76720 }, { "epoch": 5.213004484304933, "grad_norm": 3.344252586364746, "learning_rate": 3.48586764506047e-05, "loss": 2.3889, "step": 76725 }, { "epoch": 5.213344204375595, "grad_norm": 4.159158706665039, "learning_rate": 3.485442994972143e-05, "loss": 2.1888, "step": 76730 }, { "epoch": 5.213683924446256, "grad_norm": 3.449641227722168, "learning_rate": 3.485018344883816e-05, "loss": 2.5192, "step": 76735 }, { "epoch": 5.214023644516918, "grad_norm": 3.506554365158081, "learning_rate": 3.4845936947954886e-05, "loss": 2.0869, "step": 76740 }, { "epoch": 5.21436336458758, "grad_norm": 3.437596082687378, "learning_rate": 3.4841690447071614e-05, "loss": 2.2576, "step": 76745 }, { "epoch": 5.214703084658241, "grad_norm": 3.4437525272369385, "learning_rate": 3.483744394618834e-05, "loss": 2.1628, "step": 76750 }, { "epoch": 5.2150428047289035, "grad_norm": 3.9777870178222656, "learning_rate": 3.483319744530507e-05, "loss": 1.9656, "step": 76755 }, { "epoch": 5.2153825247995655, "grad_norm": 3.302201986312866, "learning_rate": 3.48289509444218e-05, "loss": 2.4021, "step": 76760 }, { "epoch": 5.215722244870227, "grad_norm": 4.6761016845703125, "learning_rate": 3.4824704443538526e-05, "loss": 2.3972, "step": 76765 }, { "epoch": 5.216061964940889, "grad_norm": 3.9901373386383057, "learning_rate": 3.4820457942655254e-05, "loss": 1.9888, "step": 76770 }, { "epoch": 5.216401685011551, "grad_norm": 3.1161441802978516, "learning_rate": 3.481621144177198e-05, "loss": 2.1387, "step": 76775 }, { "epoch": 5.216741405082212, "grad_norm": 4.17183256149292, "learning_rate": 3.481196494088871e-05, "loss": 2.1324, "step": 76780 }, { "epoch": 5.217081125152874, "grad_norm": 3.7705094814300537, "learning_rate": 3.480771844000544e-05, "loss": 2.2339, "step": 76785 }, { "epoch": 5.217420845223536, "grad_norm": 3.724485158920288, "learning_rate": 3.4803471939122166e-05, "loss": 2.443, "step": 76790 }, { "epoch": 5.217760565294197, "grad_norm": 2.737407922744751, "learning_rate": 3.4799225438238894e-05, "loss": 2.1949, "step": 76795 }, { "epoch": 5.2181002853648595, "grad_norm": 3.10684871673584, "learning_rate": 3.479497893735562e-05, "loss": 2.243, "step": 76800 }, { "epoch": 5.2184400054355216, "grad_norm": 2.8839097023010254, "learning_rate": 3.479073243647235e-05, "loss": 2.4889, "step": 76805 }, { "epoch": 5.218779725506183, "grad_norm": 3.1183767318725586, "learning_rate": 3.478648593558907e-05, "loss": 2.2888, "step": 76810 }, { "epoch": 5.219119445576845, "grad_norm": 2.605325698852539, "learning_rate": 3.4782239434705806e-05, "loss": 2.38, "step": 76815 }, { "epoch": 5.219459165647507, "grad_norm": 2.8736512660980225, "learning_rate": 3.4777992933822534e-05, "loss": 2.1891, "step": 76820 }, { "epoch": 5.219798885718168, "grad_norm": 3.2619130611419678, "learning_rate": 3.4773746432939256e-05, "loss": 2.3082, "step": 76825 }, { "epoch": 5.22013860578883, "grad_norm": 3.4056177139282227, "learning_rate": 3.476949993205599e-05, "loss": 2.2674, "step": 76830 }, { "epoch": 5.220478325859492, "grad_norm": 3.126577854156494, "learning_rate": 3.476525343117272e-05, "loss": 2.1492, "step": 76835 }, { "epoch": 5.220818045930153, "grad_norm": 3.8913300037384033, "learning_rate": 3.476100693028944e-05, "loss": 2.3233, "step": 76840 }, { "epoch": 5.2211577660008155, "grad_norm": 3.532148838043213, "learning_rate": 3.475676042940617e-05, "loss": 2.2202, "step": 76845 }, { "epoch": 5.221497486071477, "grad_norm": 3.9200687408447266, "learning_rate": 3.47525139285229e-05, "loss": 2.2374, "step": 76850 }, { "epoch": 5.221837206142139, "grad_norm": 2.97650408744812, "learning_rate": 3.4748267427639624e-05, "loss": 2.1921, "step": 76855 }, { "epoch": 5.222176926212801, "grad_norm": 3.5212459564208984, "learning_rate": 3.474402092675635e-05, "loss": 2.1977, "step": 76860 }, { "epoch": 5.222516646283462, "grad_norm": 3.584606647491455, "learning_rate": 3.4739774425873087e-05, "loss": 2.3839, "step": 76865 }, { "epoch": 5.222856366354124, "grad_norm": 3.4822909832000732, "learning_rate": 3.473552792498981e-05, "loss": 2.4976, "step": 76870 }, { "epoch": 5.223196086424786, "grad_norm": 3.7760517597198486, "learning_rate": 3.4731281424106536e-05, "loss": 2.2531, "step": 76875 }, { "epoch": 5.223535806495447, "grad_norm": 3.023651123046875, "learning_rate": 3.472703492322327e-05, "loss": 1.9403, "step": 76880 }, { "epoch": 5.223875526566109, "grad_norm": 3.7523505687713623, "learning_rate": 3.472278842233999e-05, "loss": 2.1031, "step": 76885 }, { "epoch": 5.2242152466367715, "grad_norm": 3.0735511779785156, "learning_rate": 3.471854192145672e-05, "loss": 2.4339, "step": 76890 }, { "epoch": 5.224554966707433, "grad_norm": 3.300300359725952, "learning_rate": 3.471429542057345e-05, "loss": 2.0111, "step": 76895 }, { "epoch": 5.224894686778095, "grad_norm": 3.569693088531494, "learning_rate": 3.4710048919690176e-05, "loss": 2.1332, "step": 76900 }, { "epoch": 5.225234406848757, "grad_norm": 3.761502265930176, "learning_rate": 3.4705802418806904e-05, "loss": 2.378, "step": 76905 }, { "epoch": 5.225574126919418, "grad_norm": 3.2165796756744385, "learning_rate": 3.470155591792363e-05, "loss": 2.3164, "step": 76910 }, { "epoch": 5.22591384699008, "grad_norm": 3.7387146949768066, "learning_rate": 3.469730941704036e-05, "loss": 2.3038, "step": 76915 }, { "epoch": 5.226253567060742, "grad_norm": 3.9306840896606445, "learning_rate": 3.469306291615709e-05, "loss": 2.3485, "step": 76920 }, { "epoch": 5.226593287131403, "grad_norm": 4.047614097595215, "learning_rate": 3.4688816415273816e-05, "loss": 2.3231, "step": 76925 }, { "epoch": 5.226933007202065, "grad_norm": 2.5969698429107666, "learning_rate": 3.4684569914390544e-05, "loss": 2.5236, "step": 76930 }, { "epoch": 5.2272727272727275, "grad_norm": 3.2817132472991943, "learning_rate": 3.468032341350727e-05, "loss": 2.1197, "step": 76935 }, { "epoch": 5.227612447343389, "grad_norm": 3.5916194915771484, "learning_rate": 3.4676076912624e-05, "loss": 2.1319, "step": 76940 }, { "epoch": 5.227952167414051, "grad_norm": 3.133582592010498, "learning_rate": 3.467183041174073e-05, "loss": 2.2748, "step": 76945 }, { "epoch": 5.228291887484713, "grad_norm": 3.3117191791534424, "learning_rate": 3.4667583910857456e-05, "loss": 2.2438, "step": 76950 }, { "epoch": 5.228631607555374, "grad_norm": 2.7784833908081055, "learning_rate": 3.4663337409974184e-05, "loss": 2.2798, "step": 76955 }, { "epoch": 5.228971327626036, "grad_norm": 3.6597840785980225, "learning_rate": 3.465909090909091e-05, "loss": 2.2488, "step": 76960 }, { "epoch": 5.229311047696698, "grad_norm": 3.4583005905151367, "learning_rate": 3.465484440820764e-05, "loss": 2.2798, "step": 76965 }, { "epoch": 5.229650767767359, "grad_norm": 3.9904603958129883, "learning_rate": 3.465059790732437e-05, "loss": 2.2251, "step": 76970 }, { "epoch": 5.2299904878380215, "grad_norm": 3.769467353820801, "learning_rate": 3.4646351406441096e-05, "loss": 2.381, "step": 76975 }, { "epoch": 5.2303302079086835, "grad_norm": 3.123650550842285, "learning_rate": 3.4642104905557824e-05, "loss": 2.3861, "step": 76980 }, { "epoch": 5.230669927979345, "grad_norm": 3.456794261932373, "learning_rate": 3.463785840467455e-05, "loss": 2.1454, "step": 76985 }, { "epoch": 5.231009648050007, "grad_norm": 3.900550603866577, "learning_rate": 3.463361190379128e-05, "loss": 2.1326, "step": 76990 }, { "epoch": 5.231349368120669, "grad_norm": 3.0020928382873535, "learning_rate": 3.4629365402908e-05, "loss": 2.3437, "step": 76995 }, { "epoch": 5.23168908819133, "grad_norm": 3.421417713165283, "learning_rate": 3.4625118902024736e-05, "loss": 2.2268, "step": 77000 }, { "epoch": 5.232028808261992, "grad_norm": 3.3320436477661133, "learning_rate": 3.4620872401141464e-05, "loss": 2.1526, "step": 77005 }, { "epoch": 5.232368528332654, "grad_norm": 3.2525389194488525, "learning_rate": 3.4616625900258185e-05, "loss": 1.9517, "step": 77010 }, { "epoch": 5.232708248403315, "grad_norm": 2.7141003608703613, "learning_rate": 3.461237939937492e-05, "loss": 2.2252, "step": 77015 }, { "epoch": 5.2330479684739775, "grad_norm": 3.4751152992248535, "learning_rate": 3.460813289849165e-05, "loss": 2.2402, "step": 77020 }, { "epoch": 5.2333876885446395, "grad_norm": 3.4118618965148926, "learning_rate": 3.460388639760837e-05, "loss": 2.2402, "step": 77025 }, { "epoch": 5.233727408615301, "grad_norm": 2.807237148284912, "learning_rate": 3.45996398967251e-05, "loss": 2.26, "step": 77030 }, { "epoch": 5.234067128685963, "grad_norm": 4.29789924621582, "learning_rate": 3.459539339584183e-05, "loss": 2.323, "step": 77035 }, { "epoch": 5.234406848756625, "grad_norm": 2.9678564071655273, "learning_rate": 3.459114689495855e-05, "loss": 2.1557, "step": 77040 }, { "epoch": 5.234746568827286, "grad_norm": 3.5767714977264404, "learning_rate": 3.458690039407528e-05, "loss": 2.02, "step": 77045 }, { "epoch": 5.235086288897948, "grad_norm": 3.1443209648132324, "learning_rate": 3.4582653893192016e-05, "loss": 2.3908, "step": 77050 }, { "epoch": 5.23542600896861, "grad_norm": 2.9513375759124756, "learning_rate": 3.457840739230874e-05, "loss": 2.0974, "step": 77055 }, { "epoch": 5.235765729039271, "grad_norm": 3.220550537109375, "learning_rate": 3.4574160891425465e-05, "loss": 2.0859, "step": 77060 }, { "epoch": 5.2361054491099335, "grad_norm": 3.8406972885131836, "learning_rate": 3.456991439054219e-05, "loss": 2.2491, "step": 77065 }, { "epoch": 5.2364451691805955, "grad_norm": 3.8805737495422363, "learning_rate": 3.456566788965892e-05, "loss": 2.2811, "step": 77070 }, { "epoch": 5.236784889251257, "grad_norm": 3.3932440280914307, "learning_rate": 3.456142138877565e-05, "loss": 2.2123, "step": 77075 }, { "epoch": 5.237124609321919, "grad_norm": 3.5459327697753906, "learning_rate": 3.455717488789238e-05, "loss": 2.323, "step": 77080 }, { "epoch": 5.237464329392581, "grad_norm": 3.764822244644165, "learning_rate": 3.4552928387009105e-05, "loss": 2.4037, "step": 77085 }, { "epoch": 5.237804049463242, "grad_norm": 3.5651915073394775, "learning_rate": 3.454868188612583e-05, "loss": 2.3967, "step": 77090 }, { "epoch": 5.238143769533904, "grad_norm": 3.2330145835876465, "learning_rate": 3.454443538524256e-05, "loss": 2.2513, "step": 77095 }, { "epoch": 5.238483489604566, "grad_norm": 3.183519124984741, "learning_rate": 3.454018888435929e-05, "loss": 2.3663, "step": 77100 }, { "epoch": 5.238823209675227, "grad_norm": 3.149829149246216, "learning_rate": 3.453594238347602e-05, "loss": 1.9775, "step": 77105 }, { "epoch": 5.2391629297458895, "grad_norm": 3.8320486545562744, "learning_rate": 3.4531695882592745e-05, "loss": 2.1589, "step": 77110 }, { "epoch": 5.239502649816552, "grad_norm": 2.9724745750427246, "learning_rate": 3.452744938170947e-05, "loss": 1.9882, "step": 77115 }, { "epoch": 5.239842369887213, "grad_norm": 3.3145875930786133, "learning_rate": 3.45232028808262e-05, "loss": 2.1927, "step": 77120 }, { "epoch": 5.240182089957875, "grad_norm": 3.2900097370147705, "learning_rate": 3.451895637994293e-05, "loss": 2.1548, "step": 77125 }, { "epoch": 5.240521810028537, "grad_norm": 3.809096574783325, "learning_rate": 3.451470987905966e-05, "loss": 2.1532, "step": 77130 }, { "epoch": 5.240861530099198, "grad_norm": 3.5638723373413086, "learning_rate": 3.4510463378176385e-05, "loss": 2.311, "step": 77135 }, { "epoch": 5.24120125016986, "grad_norm": 3.739354372024536, "learning_rate": 3.4506216877293113e-05, "loss": 2.3153, "step": 77140 }, { "epoch": 5.241540970240522, "grad_norm": 3.5584065914154053, "learning_rate": 3.450197037640984e-05, "loss": 2.3838, "step": 77145 }, { "epoch": 5.241880690311183, "grad_norm": 3.2008328437805176, "learning_rate": 3.449772387552657e-05, "loss": 2.2838, "step": 77150 }, { "epoch": 5.2422204103818455, "grad_norm": 2.8613855838775635, "learning_rate": 3.44934773746433e-05, "loss": 2.138, "step": 77155 }, { "epoch": 5.242560130452507, "grad_norm": 3.1891138553619385, "learning_rate": 3.4489230873760025e-05, "loss": 2.2378, "step": 77160 }, { "epoch": 5.242899850523169, "grad_norm": 2.6799628734588623, "learning_rate": 3.448498437287675e-05, "loss": 2.3545, "step": 77165 }, { "epoch": 5.243239570593831, "grad_norm": 3.7617805004119873, "learning_rate": 3.448073787199348e-05, "loss": 2.3848, "step": 77170 }, { "epoch": 5.243579290664492, "grad_norm": 3.9249861240386963, "learning_rate": 3.447649137111021e-05, "loss": 2.2193, "step": 77175 }, { "epoch": 5.243919010735154, "grad_norm": 4.376297473907471, "learning_rate": 3.447224487022693e-05, "loss": 2.3235, "step": 77180 }, { "epoch": 5.244258730805816, "grad_norm": 3.612607002258301, "learning_rate": 3.4467998369343665e-05, "loss": 2.2325, "step": 77185 }, { "epoch": 5.244598450876477, "grad_norm": 3.4670915603637695, "learning_rate": 3.4463751868460393e-05, "loss": 2.0975, "step": 77190 }, { "epoch": 5.244938170947139, "grad_norm": 3.13081431388855, "learning_rate": 3.4459505367577115e-05, "loss": 2.5593, "step": 77195 }, { "epoch": 5.2452778910178015, "grad_norm": 3.3208770751953125, "learning_rate": 3.445525886669384e-05, "loss": 2.3491, "step": 77200 }, { "epoch": 5.245617611088463, "grad_norm": 3.227714776992798, "learning_rate": 3.445101236581058e-05, "loss": 2.4311, "step": 77205 }, { "epoch": 5.245957331159125, "grad_norm": 4.165633201599121, "learning_rate": 3.44467658649273e-05, "loss": 1.9685, "step": 77210 }, { "epoch": 5.246297051229787, "grad_norm": 3.4308841228485107, "learning_rate": 3.444251936404403e-05, "loss": 2.1586, "step": 77215 }, { "epoch": 5.246636771300448, "grad_norm": 3.4301674365997314, "learning_rate": 3.443827286316076e-05, "loss": 2.3901, "step": 77220 }, { "epoch": 5.24697649137111, "grad_norm": 3.773128032684326, "learning_rate": 3.443402636227748e-05, "loss": 2.0585, "step": 77225 }, { "epoch": 5.247316211441772, "grad_norm": 3.26809024810791, "learning_rate": 3.442977986139421e-05, "loss": 2.1969, "step": 77230 }, { "epoch": 5.247655931512433, "grad_norm": 3.015369176864624, "learning_rate": 3.442553336051094e-05, "loss": 2.0827, "step": 77235 }, { "epoch": 5.2479956515830954, "grad_norm": 3.1332032680511475, "learning_rate": 3.442128685962767e-05, "loss": 2.2348, "step": 77240 }, { "epoch": 5.2483353716537575, "grad_norm": 3.5333077907562256, "learning_rate": 3.4417040358744395e-05, "loss": 2.2901, "step": 77245 }, { "epoch": 5.248675091724419, "grad_norm": 3.2668704986572266, "learning_rate": 3.441279385786112e-05, "loss": 2.3015, "step": 77250 }, { "epoch": 5.249014811795081, "grad_norm": 2.755790948867798, "learning_rate": 3.440854735697785e-05, "loss": 2.126, "step": 77255 }, { "epoch": 5.249354531865743, "grad_norm": 3.041191577911377, "learning_rate": 3.440430085609458e-05, "loss": 2.2309, "step": 77260 }, { "epoch": 5.249694251936404, "grad_norm": 3.873077154159546, "learning_rate": 3.440005435521131e-05, "loss": 2.2183, "step": 77265 }, { "epoch": 5.250033972007066, "grad_norm": 3.633899688720703, "learning_rate": 3.4395807854328035e-05, "loss": 2.1802, "step": 77270 }, { "epoch": 5.250373692077728, "grad_norm": 3.947781562805176, "learning_rate": 3.439156135344476e-05, "loss": 2.1663, "step": 77275 }, { "epoch": 5.250713412148389, "grad_norm": 3.1792542934417725, "learning_rate": 3.438731485256149e-05, "loss": 2.3742, "step": 77280 }, { "epoch": 5.2510531322190515, "grad_norm": 4.231497764587402, "learning_rate": 3.438306835167822e-05, "loss": 2.3364, "step": 77285 }, { "epoch": 5.2513928522897135, "grad_norm": 2.6675870418548584, "learning_rate": 3.437882185079495e-05, "loss": 2.2509, "step": 77290 }, { "epoch": 5.251732572360375, "grad_norm": 3.2307722568511963, "learning_rate": 3.4374575349911675e-05, "loss": 2.0609, "step": 77295 }, { "epoch": 5.252072292431037, "grad_norm": 3.469001531600952, "learning_rate": 3.43703288490284e-05, "loss": 1.8708, "step": 77300 }, { "epoch": 5.252412012501699, "grad_norm": 2.8585188388824463, "learning_rate": 3.436608234814513e-05, "loss": 2.3239, "step": 77305 }, { "epoch": 5.25275173257236, "grad_norm": 3.506491184234619, "learning_rate": 3.436183584726186e-05, "loss": 2.2077, "step": 77310 }, { "epoch": 5.253091452643022, "grad_norm": 2.7465968132019043, "learning_rate": 3.435758934637859e-05, "loss": 2.2514, "step": 77315 }, { "epoch": 5.253431172713684, "grad_norm": 2.795846939086914, "learning_rate": 3.4353342845495315e-05, "loss": 2.2597, "step": 77320 }, { "epoch": 5.253770892784345, "grad_norm": 3.1153268814086914, "learning_rate": 3.434909634461204e-05, "loss": 2.3994, "step": 77325 }, { "epoch": 5.2541106128550075, "grad_norm": 3.5940418243408203, "learning_rate": 3.434484984372877e-05, "loss": 2.2188, "step": 77330 }, { "epoch": 5.2544503329256695, "grad_norm": 3.6739461421966553, "learning_rate": 3.434060334284549e-05, "loss": 2.2533, "step": 77335 }, { "epoch": 5.254790052996331, "grad_norm": 2.546509027481079, "learning_rate": 3.433635684196223e-05, "loss": 2.4595, "step": 77340 }, { "epoch": 5.255129773066993, "grad_norm": 3.581909656524658, "learning_rate": 3.4332110341078955e-05, "loss": 2.4863, "step": 77345 }, { "epoch": 5.255469493137655, "grad_norm": 3.2907838821411133, "learning_rate": 3.4327863840195676e-05, "loss": 2.4077, "step": 77350 }, { "epoch": 5.255809213208316, "grad_norm": 2.7898526191711426, "learning_rate": 3.432361733931241e-05, "loss": 2.3022, "step": 77355 }, { "epoch": 5.256148933278978, "grad_norm": 3.447286605834961, "learning_rate": 3.431937083842914e-05, "loss": 2.1035, "step": 77360 }, { "epoch": 5.25648865334964, "grad_norm": 3.426439046859741, "learning_rate": 3.431512433754586e-05, "loss": 2.47, "step": 77365 }, { "epoch": 5.256828373420301, "grad_norm": 2.736302614212036, "learning_rate": 3.431087783666259e-05, "loss": 2.4208, "step": 77370 }, { "epoch": 5.2571680934909635, "grad_norm": 3.196695327758789, "learning_rate": 3.430663133577932e-05, "loss": 2.304, "step": 77375 }, { "epoch": 5.2575078135616256, "grad_norm": 4.294866561889648, "learning_rate": 3.4302384834896044e-05, "loss": 2.3416, "step": 77380 }, { "epoch": 5.257847533632287, "grad_norm": 3.4877357482910156, "learning_rate": 3.429813833401277e-05, "loss": 2.1283, "step": 77385 }, { "epoch": 5.258187253702949, "grad_norm": 3.2818796634674072, "learning_rate": 3.429389183312951e-05, "loss": 2.2822, "step": 77390 }, { "epoch": 5.258526973773611, "grad_norm": 3.0136053562164307, "learning_rate": 3.428964533224623e-05, "loss": 2.2111, "step": 77395 }, { "epoch": 5.258866693844272, "grad_norm": 3.653383731842041, "learning_rate": 3.4285398831362956e-05, "loss": 2.1702, "step": 77400 }, { "epoch": 5.259206413914934, "grad_norm": 3.5266528129577637, "learning_rate": 3.428115233047969e-05, "loss": 2.4256, "step": 77405 }, { "epoch": 5.259546133985596, "grad_norm": 2.832951784133911, "learning_rate": 3.427690582959641e-05, "loss": 2.1628, "step": 77410 }, { "epoch": 5.259885854056257, "grad_norm": 3.9384074211120605, "learning_rate": 3.427265932871314e-05, "loss": 2.1731, "step": 77415 }, { "epoch": 5.2602255741269195, "grad_norm": 3.5362555980682373, "learning_rate": 3.426841282782987e-05, "loss": 2.4339, "step": 77420 }, { "epoch": 5.260565294197582, "grad_norm": 4.041205406188965, "learning_rate": 3.4264166326946596e-05, "loss": 2.0951, "step": 77425 }, { "epoch": 5.260905014268243, "grad_norm": 4.202919960021973, "learning_rate": 3.4259919826063324e-05, "loss": 2.3709, "step": 77430 }, { "epoch": 5.261244734338905, "grad_norm": 3.80452561378479, "learning_rate": 3.425567332518005e-05, "loss": 2.3569, "step": 77435 }, { "epoch": 5.261584454409567, "grad_norm": 2.8027608394622803, "learning_rate": 3.425142682429678e-05, "loss": 2.3776, "step": 77440 }, { "epoch": 5.261924174480228, "grad_norm": 3.4238338470458984, "learning_rate": 3.424718032341351e-05, "loss": 2.293, "step": 77445 }, { "epoch": 5.26226389455089, "grad_norm": 4.073309421539307, "learning_rate": 3.4242933822530236e-05, "loss": 2.2959, "step": 77450 }, { "epoch": 5.262603614621552, "grad_norm": 4.085203170776367, "learning_rate": 3.4238687321646964e-05, "loss": 2.2863, "step": 77455 }, { "epoch": 5.262943334692213, "grad_norm": 3.481586456298828, "learning_rate": 3.423444082076369e-05, "loss": 2.3706, "step": 77460 }, { "epoch": 5.2632830547628755, "grad_norm": 2.847182273864746, "learning_rate": 3.423019431988042e-05, "loss": 2.4433, "step": 77465 }, { "epoch": 5.263622774833538, "grad_norm": 3.7228569984436035, "learning_rate": 3.422594781899715e-05, "loss": 2.3083, "step": 77470 }, { "epoch": 5.263962494904199, "grad_norm": 3.654273748397827, "learning_rate": 3.4221701318113876e-05, "loss": 2.4839, "step": 77475 }, { "epoch": 5.264302214974861, "grad_norm": 3.974571466445923, "learning_rate": 3.4217454817230604e-05, "loss": 2.101, "step": 77480 }, { "epoch": 5.264641935045523, "grad_norm": 2.714350938796997, "learning_rate": 3.421320831634733e-05, "loss": 2.3038, "step": 77485 }, { "epoch": 5.264981655116184, "grad_norm": 2.7707135677337646, "learning_rate": 3.420896181546406e-05, "loss": 2.2545, "step": 77490 }, { "epoch": 5.265321375186846, "grad_norm": 3.740037679672241, "learning_rate": 3.420471531458079e-05, "loss": 2.0765, "step": 77495 }, { "epoch": 5.265661095257508, "grad_norm": 3.5059378147125244, "learning_rate": 3.4200468813697516e-05, "loss": 2.3849, "step": 77500 }, { "epoch": 5.266000815328169, "grad_norm": 3.201810598373413, "learning_rate": 3.41970716129909e-05, "loss": 2.2245, "step": 77505 }, { "epoch": 5.2663405353988315, "grad_norm": 3.3673410415649414, "learning_rate": 3.419282511210763e-05, "loss": 2.2317, "step": 77510 }, { "epoch": 5.266680255469494, "grad_norm": 2.8262414932250977, "learning_rate": 3.4188578611224355e-05, "loss": 2.3285, "step": 77515 }, { "epoch": 5.267019975540155, "grad_norm": 4.442052841186523, "learning_rate": 3.418433211034108e-05, "loss": 2.1351, "step": 77520 }, { "epoch": 5.267359695610817, "grad_norm": 2.9980974197387695, "learning_rate": 3.4180085609457804e-05, "loss": 2.2099, "step": 77525 }, { "epoch": 5.267699415681479, "grad_norm": 3.2627129554748535, "learning_rate": 3.417583910857454e-05, "loss": 2.1556, "step": 77530 }, { "epoch": 5.26803913575214, "grad_norm": 3.6594769954681396, "learning_rate": 3.417159260769127e-05, "loss": 2.429, "step": 77535 }, { "epoch": 5.268378855822802, "grad_norm": 3.0221142768859863, "learning_rate": 3.416734610680799e-05, "loss": 2.1994, "step": 77540 }, { "epoch": 5.268718575893463, "grad_norm": 3.104651689529419, "learning_rate": 3.416309960592472e-05, "loss": 2.3445, "step": 77545 }, { "epoch": 5.2690582959641254, "grad_norm": 3.0433192253112793, "learning_rate": 3.415885310504145e-05, "loss": 2.3017, "step": 77550 }, { "epoch": 5.2693980160347875, "grad_norm": 3.3807218074798584, "learning_rate": 3.415460660415817e-05, "loss": 2.2723, "step": 77555 }, { "epoch": 5.269737736105449, "grad_norm": 3.3345701694488525, "learning_rate": 3.41503601032749e-05, "loss": 2.135, "step": 77560 }, { "epoch": 5.270077456176111, "grad_norm": 4.243229866027832, "learning_rate": 3.4146113602391635e-05, "loss": 2.4919, "step": 77565 }, { "epoch": 5.270417176246773, "grad_norm": 3.234144926071167, "learning_rate": 3.4141867101508356e-05, "loss": 2.4276, "step": 77570 }, { "epoch": 5.270756896317434, "grad_norm": 3.7242624759674072, "learning_rate": 3.4137620600625084e-05, "loss": 2.4571, "step": 77575 }, { "epoch": 5.271096616388096, "grad_norm": 4.053334712982178, "learning_rate": 3.413337409974182e-05, "loss": 2.2332, "step": 77580 }, { "epoch": 5.271436336458758, "grad_norm": 3.28702974319458, "learning_rate": 3.412912759885854e-05, "loss": 2.2304, "step": 77585 }, { "epoch": 5.271776056529419, "grad_norm": 3.471557378768921, "learning_rate": 3.412488109797527e-05, "loss": 2.105, "step": 77590 }, { "epoch": 5.2721157766000815, "grad_norm": 3.3830692768096924, "learning_rate": 3.4120634597091996e-05, "loss": 2.3174, "step": 77595 }, { "epoch": 5.2724554966707435, "grad_norm": 3.2851126194000244, "learning_rate": 3.4116388096208724e-05, "loss": 2.2629, "step": 77600 }, { "epoch": 5.272795216741405, "grad_norm": 3.1846742630004883, "learning_rate": 3.411214159532545e-05, "loss": 2.1367, "step": 77605 }, { "epoch": 5.273134936812067, "grad_norm": 3.737788200378418, "learning_rate": 3.410789509444218e-05, "loss": 2.1394, "step": 77610 }, { "epoch": 5.273474656882729, "grad_norm": 3.1975338459014893, "learning_rate": 3.410364859355891e-05, "loss": 2.1385, "step": 77615 }, { "epoch": 5.27381437695339, "grad_norm": 3.677584171295166, "learning_rate": 3.4099402092675636e-05, "loss": 2.4334, "step": 77620 }, { "epoch": 5.274154097024052, "grad_norm": 4.1218438148498535, "learning_rate": 3.4095155591792364e-05, "loss": 2.1855, "step": 77625 }, { "epoch": 5.274493817094714, "grad_norm": 3.57855224609375, "learning_rate": 3.409090909090909e-05, "loss": 2.4425, "step": 77630 }, { "epoch": 5.274833537165375, "grad_norm": 3.42942214012146, "learning_rate": 3.408666259002582e-05, "loss": 2.4138, "step": 77635 }, { "epoch": 5.2751732572360375, "grad_norm": 3.4790329933166504, "learning_rate": 3.408241608914255e-05, "loss": 2.2645, "step": 77640 }, { "epoch": 5.2755129773066995, "grad_norm": 2.9742422103881836, "learning_rate": 3.4078169588259276e-05, "loss": 2.2496, "step": 77645 }, { "epoch": 5.275852697377361, "grad_norm": 3.670008659362793, "learning_rate": 3.4073923087376004e-05, "loss": 1.9957, "step": 77650 }, { "epoch": 5.276192417448023, "grad_norm": 2.7948288917541504, "learning_rate": 3.406967658649273e-05, "loss": 2.2051, "step": 77655 }, { "epoch": 5.276532137518685, "grad_norm": 3.0540266036987305, "learning_rate": 3.406543008560946e-05, "loss": 2.2564, "step": 77660 }, { "epoch": 5.276871857589346, "grad_norm": 3.748795509338379, "learning_rate": 3.406118358472619e-05, "loss": 2.0694, "step": 77665 }, { "epoch": 5.277211577660008, "grad_norm": 3.0845532417297363, "learning_rate": 3.4056937083842916e-05, "loss": 2.4155, "step": 77670 }, { "epoch": 5.27755129773067, "grad_norm": 3.2464823722839355, "learning_rate": 3.4052690582959644e-05, "loss": 2.1048, "step": 77675 }, { "epoch": 5.277891017801331, "grad_norm": 3.2720348834991455, "learning_rate": 3.404844408207637e-05, "loss": 2.0105, "step": 77680 }, { "epoch": 5.2782307378719935, "grad_norm": 3.3672940731048584, "learning_rate": 3.40441975811931e-05, "loss": 2.2848, "step": 77685 }, { "epoch": 5.278570457942656, "grad_norm": 3.9192516803741455, "learning_rate": 3.403995108030983e-05, "loss": 2.1874, "step": 77690 }, { "epoch": 5.278910178013317, "grad_norm": 3.818195104598999, "learning_rate": 3.403570457942655e-05, "loss": 2.3785, "step": 77695 }, { "epoch": 5.279249898083979, "grad_norm": 2.63108491897583, "learning_rate": 3.4031458078543284e-05, "loss": 2.2347, "step": 77700 }, { "epoch": 5.279589618154641, "grad_norm": 2.8579108715057373, "learning_rate": 3.402721157766001e-05, "loss": 2.617, "step": 77705 }, { "epoch": 5.279929338225302, "grad_norm": 3.53774356842041, "learning_rate": 3.4022965076776734e-05, "loss": 1.9645, "step": 77710 }, { "epoch": 5.280269058295964, "grad_norm": 3.517758846282959, "learning_rate": 3.401871857589347e-05, "loss": 2.1178, "step": 77715 }, { "epoch": 5.280608778366626, "grad_norm": 3.28710675239563, "learning_rate": 3.4014472075010196e-05, "loss": 2.2128, "step": 77720 }, { "epoch": 5.280948498437287, "grad_norm": 4.622015953063965, "learning_rate": 3.401022557412692e-05, "loss": 2.3297, "step": 77725 }, { "epoch": 5.2812882185079495, "grad_norm": 3.8302247524261475, "learning_rate": 3.4005979073243646e-05, "loss": 2.0582, "step": 77730 }, { "epoch": 5.281627938578612, "grad_norm": 3.65193772315979, "learning_rate": 3.400173257236038e-05, "loss": 2.4136, "step": 77735 }, { "epoch": 5.281967658649273, "grad_norm": 2.9660608768463135, "learning_rate": 3.39974860714771e-05, "loss": 2.0778, "step": 77740 }, { "epoch": 5.282307378719935, "grad_norm": 3.261509418487549, "learning_rate": 3.399323957059383e-05, "loss": 2.2089, "step": 77745 }, { "epoch": 5.282647098790597, "grad_norm": 2.8311727046966553, "learning_rate": 3.3988993069710564e-05, "loss": 2.0559, "step": 77750 }, { "epoch": 5.282986818861258, "grad_norm": 3.227837324142456, "learning_rate": 3.3984746568827286e-05, "loss": 2.3074, "step": 77755 }, { "epoch": 5.28332653893192, "grad_norm": 4.140060901641846, "learning_rate": 3.3980500067944014e-05, "loss": 2.3936, "step": 77760 }, { "epoch": 5.283666259002582, "grad_norm": 2.766294479370117, "learning_rate": 3.397625356706075e-05, "loss": 1.8248, "step": 77765 }, { "epoch": 5.284005979073243, "grad_norm": 3.007286787033081, "learning_rate": 3.397200706617747e-05, "loss": 2.2647, "step": 77770 }, { "epoch": 5.2843456991439055, "grad_norm": 3.604581117630005, "learning_rate": 3.39677605652942e-05, "loss": 2.4515, "step": 77775 }, { "epoch": 5.284685419214568, "grad_norm": 3.4353530406951904, "learning_rate": 3.3963514064410926e-05, "loss": 2.2462, "step": 77780 }, { "epoch": 5.285025139285229, "grad_norm": 3.3238048553466797, "learning_rate": 3.3959267563527654e-05, "loss": 2.2225, "step": 77785 }, { "epoch": 5.285364859355891, "grad_norm": 3.5286900997161865, "learning_rate": 3.395502106264438e-05, "loss": 2.4396, "step": 77790 }, { "epoch": 5.285704579426553, "grad_norm": 3.5717995166778564, "learning_rate": 3.395077456176111e-05, "loss": 2.1295, "step": 77795 }, { "epoch": 5.286044299497214, "grad_norm": 3.8351454734802246, "learning_rate": 3.394652806087784e-05, "loss": 2.2352, "step": 77800 }, { "epoch": 5.286384019567876, "grad_norm": 3.059744119644165, "learning_rate": 3.3942281559994566e-05, "loss": 2.3497, "step": 77805 }, { "epoch": 5.286723739638538, "grad_norm": 3.7229089736938477, "learning_rate": 3.3938035059111294e-05, "loss": 2.3545, "step": 77810 }, { "epoch": 5.287063459709199, "grad_norm": 3.3271706104278564, "learning_rate": 3.393378855822802e-05, "loss": 1.9877, "step": 77815 }, { "epoch": 5.2874031797798615, "grad_norm": 3.1254000663757324, "learning_rate": 3.392954205734475e-05, "loss": 2.1435, "step": 77820 }, { "epoch": 5.287742899850523, "grad_norm": 2.7013092041015625, "learning_rate": 3.392529555646148e-05, "loss": 2.3179, "step": 77825 }, { "epoch": 5.288082619921185, "grad_norm": 2.8338286876678467, "learning_rate": 3.3921049055578206e-05, "loss": 2.1465, "step": 77830 }, { "epoch": 5.288422339991847, "grad_norm": 3.2927629947662354, "learning_rate": 3.3916802554694934e-05, "loss": 2.2589, "step": 77835 }, { "epoch": 5.288762060062508, "grad_norm": 3.5868918895721436, "learning_rate": 3.391255605381166e-05, "loss": 2.4083, "step": 77840 }, { "epoch": 5.28910178013317, "grad_norm": 3.9994680881500244, "learning_rate": 3.390830955292839e-05, "loss": 2.2748, "step": 77845 }, { "epoch": 5.289441500203832, "grad_norm": 3.5376205444335938, "learning_rate": 3.390406305204512e-05, "loss": 2.2146, "step": 77850 }, { "epoch": 5.289781220274493, "grad_norm": 3.3185553550720215, "learning_rate": 3.3899816551161846e-05, "loss": 2.2997, "step": 77855 }, { "epoch": 5.2901209403451555, "grad_norm": 4.0382490158081055, "learning_rate": 3.3895570050278574e-05, "loss": 2.2062, "step": 77860 }, { "epoch": 5.2904606604158175, "grad_norm": 3.189579963684082, "learning_rate": 3.38913235493953e-05, "loss": 2.1377, "step": 77865 }, { "epoch": 5.290800380486479, "grad_norm": 2.459623336791992, "learning_rate": 3.388707704851203e-05, "loss": 2.1026, "step": 77870 }, { "epoch": 5.291140100557141, "grad_norm": 3.915231704711914, "learning_rate": 3.388283054762876e-05, "loss": 2.5963, "step": 77875 }, { "epoch": 5.291479820627803, "grad_norm": 4.086563587188721, "learning_rate": 3.387858404674548e-05, "loss": 2.1114, "step": 77880 }, { "epoch": 5.291819540698464, "grad_norm": 3.2321183681488037, "learning_rate": 3.3874337545862214e-05, "loss": 2.0804, "step": 77885 }, { "epoch": 5.292159260769126, "grad_norm": 3.675262212753296, "learning_rate": 3.387009104497894e-05, "loss": 2.4837, "step": 77890 }, { "epoch": 5.292498980839788, "grad_norm": 4.329439640045166, "learning_rate": 3.386584454409566e-05, "loss": 2.333, "step": 77895 }, { "epoch": 5.292838700910449, "grad_norm": 3.6024363040924072, "learning_rate": 3.38615980432124e-05, "loss": 2.1986, "step": 77900 }, { "epoch": 5.2931784209811115, "grad_norm": 3.662968158721924, "learning_rate": 3.3857351542329126e-05, "loss": 2.2473, "step": 77905 }, { "epoch": 5.2935181410517735, "grad_norm": 3.630162477493286, "learning_rate": 3.385310504144585e-05, "loss": 2.2471, "step": 77910 }, { "epoch": 5.293857861122435, "grad_norm": 3.9198696613311768, "learning_rate": 3.3848858540562575e-05, "loss": 2.3961, "step": 77915 }, { "epoch": 5.294197581193097, "grad_norm": 3.868947982788086, "learning_rate": 3.384461203967931e-05, "loss": 2.2025, "step": 77920 }, { "epoch": 5.294537301263759, "grad_norm": 3.616685390472412, "learning_rate": 3.384036553879603e-05, "loss": 2.2316, "step": 77925 }, { "epoch": 5.29487702133442, "grad_norm": 3.715712070465088, "learning_rate": 3.383611903791276e-05, "loss": 2.0705, "step": 77930 }, { "epoch": 5.295216741405082, "grad_norm": 3.4979376792907715, "learning_rate": 3.3831872537029494e-05, "loss": 2.3887, "step": 77935 }, { "epoch": 5.295556461475744, "grad_norm": 3.9762773513793945, "learning_rate": 3.3827626036146215e-05, "loss": 2.2266, "step": 77940 }, { "epoch": 5.295896181546405, "grad_norm": 4.127709865570068, "learning_rate": 3.382337953526294e-05, "loss": 2.2769, "step": 77945 }, { "epoch": 5.2962359016170675, "grad_norm": 3.02292537689209, "learning_rate": 3.381913303437967e-05, "loss": 2.3553, "step": 77950 }, { "epoch": 5.2965756216877296, "grad_norm": 3.370664596557617, "learning_rate": 3.38148865334964e-05, "loss": 2.1796, "step": 77955 }, { "epoch": 5.296915341758391, "grad_norm": 3.305948495864868, "learning_rate": 3.381064003261313e-05, "loss": 2.1505, "step": 77960 }, { "epoch": 5.297255061829053, "grad_norm": 3.7778775691986084, "learning_rate": 3.3806393531729855e-05, "loss": 2.2296, "step": 77965 }, { "epoch": 5.297594781899715, "grad_norm": 3.1235055923461914, "learning_rate": 3.380214703084658e-05, "loss": 2.3305, "step": 77970 }, { "epoch": 5.297934501970376, "grad_norm": 3.9879603385925293, "learning_rate": 3.379790052996331e-05, "loss": 2.2814, "step": 77975 }, { "epoch": 5.298274222041038, "grad_norm": 3.252272844314575, "learning_rate": 3.379365402908004e-05, "loss": 2.5844, "step": 77980 }, { "epoch": 5.2986139421117, "grad_norm": 3.2529327869415283, "learning_rate": 3.378940752819677e-05, "loss": 2.1538, "step": 77985 }, { "epoch": 5.298953662182361, "grad_norm": 3.452704429626465, "learning_rate": 3.3785161027313495e-05, "loss": 2.2606, "step": 77990 }, { "epoch": 5.2992933822530235, "grad_norm": 4.111223220825195, "learning_rate": 3.378091452643022e-05, "loss": 2.2388, "step": 77995 }, { "epoch": 5.299633102323686, "grad_norm": 3.1523149013519287, "learning_rate": 3.377666802554695e-05, "loss": 2.0167, "step": 78000 }, { "epoch": 5.299972822394347, "grad_norm": 4.253807544708252, "learning_rate": 3.377242152466368e-05, "loss": 2.1777, "step": 78005 }, { "epoch": 5.300312542465009, "grad_norm": 3.2782299518585205, "learning_rate": 3.376817502378041e-05, "loss": 2.0122, "step": 78010 }, { "epoch": 5.300652262535671, "grad_norm": 2.977780342102051, "learning_rate": 3.3763928522897135e-05, "loss": 2.3059, "step": 78015 }, { "epoch": 5.300991982606332, "grad_norm": 3.4049949645996094, "learning_rate": 3.375968202201386e-05, "loss": 2.1027, "step": 78020 }, { "epoch": 5.301331702676994, "grad_norm": 4.16828727722168, "learning_rate": 3.375543552113059e-05, "loss": 2.1308, "step": 78025 }, { "epoch": 5.301671422747656, "grad_norm": 3.230273723602295, "learning_rate": 3.375118902024732e-05, "loss": 2.2363, "step": 78030 }, { "epoch": 5.302011142818317, "grad_norm": 4.620712757110596, "learning_rate": 3.374694251936405e-05, "loss": 2.4632, "step": 78035 }, { "epoch": 5.3023508628889795, "grad_norm": 3.3656067848205566, "learning_rate": 3.3742696018480775e-05, "loss": 2.2216, "step": 78040 }, { "epoch": 5.302690582959642, "grad_norm": 2.7153329849243164, "learning_rate": 3.37384495175975e-05, "loss": 2.367, "step": 78045 }, { "epoch": 5.303030303030303, "grad_norm": 3.7682268619537354, "learning_rate": 3.3734203016714225e-05, "loss": 2.1548, "step": 78050 }, { "epoch": 5.303370023100965, "grad_norm": 4.18657112121582, "learning_rate": 3.372995651583096e-05, "loss": 2.3283, "step": 78055 }, { "epoch": 5.303709743171627, "grad_norm": 3.312089204788208, "learning_rate": 3.372571001494769e-05, "loss": 2.3894, "step": 78060 }, { "epoch": 5.304049463242288, "grad_norm": 3.657640218734741, "learning_rate": 3.372146351406441e-05, "loss": 2.3368, "step": 78065 }, { "epoch": 5.30438918331295, "grad_norm": 3.5220320224761963, "learning_rate": 3.371721701318114e-05, "loss": 2.1065, "step": 78070 }, { "epoch": 5.304728903383612, "grad_norm": 4.02593994140625, "learning_rate": 3.371297051229787e-05, "loss": 2.2364, "step": 78075 }, { "epoch": 5.305068623454273, "grad_norm": 3.7559163570404053, "learning_rate": 3.370872401141459e-05, "loss": 2.3825, "step": 78080 }, { "epoch": 5.3054083435249355, "grad_norm": 3.2068123817443848, "learning_rate": 3.370447751053132e-05, "loss": 2.1781, "step": 78085 }, { "epoch": 5.305748063595598, "grad_norm": 4.022878646850586, "learning_rate": 3.3700231009648055e-05, "loss": 2.114, "step": 78090 }, { "epoch": 5.306087783666259, "grad_norm": 3.6131746768951416, "learning_rate": 3.369598450876478e-05, "loss": 2.2209, "step": 78095 }, { "epoch": 5.306427503736921, "grad_norm": 3.655531167984009, "learning_rate": 3.3691738007881505e-05, "loss": 1.9754, "step": 78100 }, { "epoch": 5.306767223807583, "grad_norm": 2.741370677947998, "learning_rate": 3.368749150699824e-05, "loss": 2.4147, "step": 78105 }, { "epoch": 5.307106943878244, "grad_norm": 5.0919623374938965, "learning_rate": 3.368324500611496e-05, "loss": 2.6092, "step": 78110 }, { "epoch": 5.307446663948906, "grad_norm": 3.0081357955932617, "learning_rate": 3.367899850523169e-05, "loss": 2.3371, "step": 78115 }, { "epoch": 5.307786384019568, "grad_norm": 3.0931384563446045, "learning_rate": 3.367475200434842e-05, "loss": 2.0505, "step": 78120 }, { "epoch": 5.3081261040902294, "grad_norm": 2.9181911945343018, "learning_rate": 3.3670505503465145e-05, "loss": 2.2497, "step": 78125 }, { "epoch": 5.3084658241608915, "grad_norm": 3.664250135421753, "learning_rate": 3.366625900258187e-05, "loss": 2.3888, "step": 78130 }, { "epoch": 5.308805544231554, "grad_norm": 2.7585391998291016, "learning_rate": 3.36620125016986e-05, "loss": 2.0092, "step": 78135 }, { "epoch": 5.309145264302215, "grad_norm": 3.4584622383117676, "learning_rate": 3.365776600081533e-05, "loss": 2.1893, "step": 78140 }, { "epoch": 5.309484984372877, "grad_norm": 3.6336121559143066, "learning_rate": 3.365351949993206e-05, "loss": 2.0417, "step": 78145 }, { "epoch": 5.309824704443539, "grad_norm": 3.3902997970581055, "learning_rate": 3.3649272999048785e-05, "loss": 2.204, "step": 78150 }, { "epoch": 5.3101644245142, "grad_norm": 2.99145245552063, "learning_rate": 3.364502649816551e-05, "loss": 2.3915, "step": 78155 }, { "epoch": 5.310504144584862, "grad_norm": 2.9371278285980225, "learning_rate": 3.364077999728224e-05, "loss": 2.3548, "step": 78160 }, { "epoch": 5.310843864655524, "grad_norm": 3.0369491577148438, "learning_rate": 3.363653349639897e-05, "loss": 2.6953, "step": 78165 }, { "epoch": 5.3111835847261855, "grad_norm": 3.0479323863983154, "learning_rate": 3.36322869955157e-05, "loss": 2.0993, "step": 78170 }, { "epoch": 5.3115233047968475, "grad_norm": 3.751685857772827, "learning_rate": 3.3628040494632425e-05, "loss": 2.4837, "step": 78175 }, { "epoch": 5.31186302486751, "grad_norm": 4.857168674468994, "learning_rate": 3.362379399374915e-05, "loss": 2.626, "step": 78180 }, { "epoch": 5.312202744938171, "grad_norm": 2.836711883544922, "learning_rate": 3.361954749286588e-05, "loss": 2.0233, "step": 78185 }, { "epoch": 5.312542465008833, "grad_norm": 3.7851357460021973, "learning_rate": 3.361530099198261e-05, "loss": 1.974, "step": 78190 }, { "epoch": 5.312882185079495, "grad_norm": 2.986426830291748, "learning_rate": 3.361105449109934e-05, "loss": 2.0536, "step": 78195 }, { "epoch": 5.313221905150156, "grad_norm": 3.419238328933716, "learning_rate": 3.3606807990216065e-05, "loss": 2.2966, "step": 78200 }, { "epoch": 5.313561625220818, "grad_norm": 3.9246275424957275, "learning_rate": 3.360256148933279e-05, "loss": 2.1893, "step": 78205 }, { "epoch": 5.31390134529148, "grad_norm": 3.7312378883361816, "learning_rate": 3.359831498844952e-05, "loss": 2.4928, "step": 78210 }, { "epoch": 5.3142410653621415, "grad_norm": 4.058225631713867, "learning_rate": 3.359406848756625e-05, "loss": 2.0994, "step": 78215 }, { "epoch": 5.3145807854328035, "grad_norm": 2.9991586208343506, "learning_rate": 3.358982198668297e-05, "loss": 2.3093, "step": 78220 }, { "epoch": 5.314920505503465, "grad_norm": 3.385209560394287, "learning_rate": 3.3585575485799705e-05, "loss": 2.3466, "step": 78225 }, { "epoch": 5.315260225574127, "grad_norm": 4.214477062225342, "learning_rate": 3.358132898491643e-05, "loss": 2.2035, "step": 78230 }, { "epoch": 5.315599945644789, "grad_norm": 3.585841178894043, "learning_rate": 3.3577082484033154e-05, "loss": 1.9819, "step": 78235 }, { "epoch": 5.31593966571545, "grad_norm": 3.021183490753174, "learning_rate": 3.357283598314989e-05, "loss": 2.1731, "step": 78240 }, { "epoch": 5.316279385786112, "grad_norm": 3.102168560028076, "learning_rate": 3.356858948226662e-05, "loss": 2.3123, "step": 78245 }, { "epoch": 5.316619105856774, "grad_norm": 3.392019510269165, "learning_rate": 3.356434298138334e-05, "loss": 1.8887, "step": 78250 }, { "epoch": 5.316958825927435, "grad_norm": 3.428250789642334, "learning_rate": 3.356009648050007e-05, "loss": 2.2334, "step": 78255 }, { "epoch": 5.3172985459980975, "grad_norm": 3.120685577392578, "learning_rate": 3.35558499796168e-05, "loss": 2.485, "step": 78260 }, { "epoch": 5.31763826606876, "grad_norm": 2.924161672592163, "learning_rate": 3.355160347873352e-05, "loss": 2.2526, "step": 78265 }, { "epoch": 5.317977986139421, "grad_norm": 3.411332368850708, "learning_rate": 3.354735697785025e-05, "loss": 2.323, "step": 78270 }, { "epoch": 5.318317706210083, "grad_norm": 4.116466522216797, "learning_rate": 3.3543110476966985e-05, "loss": 2.0944, "step": 78275 }, { "epoch": 5.318657426280745, "grad_norm": 3.4120521545410156, "learning_rate": 3.3538863976083706e-05, "loss": 1.8795, "step": 78280 }, { "epoch": 5.318997146351406, "grad_norm": 3.4735851287841797, "learning_rate": 3.3534617475200434e-05, "loss": 2.3059, "step": 78285 }, { "epoch": 5.319336866422068, "grad_norm": 2.6648006439208984, "learning_rate": 3.353037097431717e-05, "loss": 2.1276, "step": 78290 }, { "epoch": 5.31967658649273, "grad_norm": 3.8986635208129883, "learning_rate": 3.352612447343389e-05, "loss": 2.4179, "step": 78295 }, { "epoch": 5.320016306563391, "grad_norm": 2.9407103061676025, "learning_rate": 3.352187797255062e-05, "loss": 2.1317, "step": 78300 }, { "epoch": 5.3203560266340535, "grad_norm": 3.189838171005249, "learning_rate": 3.3517631471667346e-05, "loss": 2.1901, "step": 78305 }, { "epoch": 5.320695746704716, "grad_norm": 3.8421261310577393, "learning_rate": 3.3513384970784074e-05, "loss": 2.1452, "step": 78310 }, { "epoch": 5.321035466775377, "grad_norm": 2.722644567489624, "learning_rate": 3.35091384699008e-05, "loss": 2.2095, "step": 78315 }, { "epoch": 5.321375186846039, "grad_norm": 4.25508975982666, "learning_rate": 3.350489196901753e-05, "loss": 2.0521, "step": 78320 }, { "epoch": 5.321714906916701, "grad_norm": 3.768268585205078, "learning_rate": 3.350064546813426e-05, "loss": 2.1453, "step": 78325 }, { "epoch": 5.322054626987362, "grad_norm": 3.294124126434326, "learning_rate": 3.3496398967250986e-05, "loss": 2.1257, "step": 78330 }, { "epoch": 5.322394347058024, "grad_norm": 3.33282208442688, "learning_rate": 3.3492152466367714e-05, "loss": 2.3181, "step": 78335 }, { "epoch": 5.322734067128686, "grad_norm": 3.305103302001953, "learning_rate": 3.348790596548444e-05, "loss": 2.4216, "step": 78340 }, { "epoch": 5.323073787199347, "grad_norm": 3.4643964767456055, "learning_rate": 3.348365946460117e-05, "loss": 2.4285, "step": 78345 }, { "epoch": 5.3234135072700095, "grad_norm": 3.0979058742523193, "learning_rate": 3.34794129637179e-05, "loss": 2.202, "step": 78350 }, { "epoch": 5.323753227340672, "grad_norm": 3.369098424911499, "learning_rate": 3.3475166462834626e-05, "loss": 2.4077, "step": 78355 }, { "epoch": 5.324092947411333, "grad_norm": 3.1232433319091797, "learning_rate": 3.3470919961951354e-05, "loss": 2.2364, "step": 78360 }, { "epoch": 5.324432667481995, "grad_norm": 3.919090986251831, "learning_rate": 3.346667346106808e-05, "loss": 2.2128, "step": 78365 }, { "epoch": 5.324772387552657, "grad_norm": 3.372201442718506, "learning_rate": 3.346242696018481e-05, "loss": 2.3048, "step": 78370 }, { "epoch": 5.325112107623318, "grad_norm": 3.277883291244507, "learning_rate": 3.345818045930154e-05, "loss": 2.1223, "step": 78375 }, { "epoch": 5.32545182769398, "grad_norm": 3.58624529838562, "learning_rate": 3.3453933958418266e-05, "loss": 2.2065, "step": 78380 }, { "epoch": 5.325791547764642, "grad_norm": 3.709385633468628, "learning_rate": 3.3449687457534994e-05, "loss": 2.3111, "step": 78385 }, { "epoch": 5.326131267835303, "grad_norm": 3.3436148166656494, "learning_rate": 3.344544095665172e-05, "loss": 2.1572, "step": 78390 }, { "epoch": 5.3264709879059655, "grad_norm": 3.6099655628204346, "learning_rate": 3.344119445576845e-05, "loss": 2.2974, "step": 78395 }, { "epoch": 5.326810707976628, "grad_norm": 4.487529754638672, "learning_rate": 3.343694795488518e-05, "loss": 2.1488, "step": 78400 }, { "epoch": 5.327150428047289, "grad_norm": 3.800930976867676, "learning_rate": 3.34327014540019e-05, "loss": 2.1919, "step": 78405 }, { "epoch": 5.327490148117951, "grad_norm": 3.544379711151123, "learning_rate": 3.3428454953118634e-05, "loss": 2.2355, "step": 78410 }, { "epoch": 5.327829868188613, "grad_norm": 3.942962884902954, "learning_rate": 3.342420845223536e-05, "loss": 2.0927, "step": 78415 }, { "epoch": 5.328169588259274, "grad_norm": 4.011185646057129, "learning_rate": 3.3419961951352084e-05, "loss": 1.9076, "step": 78420 }, { "epoch": 5.328509308329936, "grad_norm": 3.737091302871704, "learning_rate": 3.341571545046882e-05, "loss": 2.1448, "step": 78425 }, { "epoch": 5.328849028400598, "grad_norm": 2.7775933742523193, "learning_rate": 3.3411468949585546e-05, "loss": 2.1345, "step": 78430 }, { "epoch": 5.3291887484712595, "grad_norm": 3.5261471271514893, "learning_rate": 3.340722244870227e-05, "loss": 1.9285, "step": 78435 }, { "epoch": 5.3295284685419215, "grad_norm": 4.557446479797363, "learning_rate": 3.3402975947818996e-05, "loss": 2.4363, "step": 78440 }, { "epoch": 5.329868188612584, "grad_norm": 3.4232075214385986, "learning_rate": 3.339872944693573e-05, "loss": 2.0621, "step": 78445 }, { "epoch": 5.330207908683245, "grad_norm": 3.840848445892334, "learning_rate": 3.339448294605245e-05, "loss": 2.368, "step": 78450 }, { "epoch": 5.330547628753907, "grad_norm": 3.1590473651885986, "learning_rate": 3.339023644516918e-05, "loss": 2.5063, "step": 78455 }, { "epoch": 5.330887348824569, "grad_norm": 3.230790138244629, "learning_rate": 3.3385989944285914e-05, "loss": 2.2993, "step": 78460 }, { "epoch": 5.33122706889523, "grad_norm": 3.2978994846343994, "learning_rate": 3.3381743443402636e-05, "loss": 2.4565, "step": 78465 }, { "epoch": 5.331566788965892, "grad_norm": 3.492300271987915, "learning_rate": 3.3377496942519364e-05, "loss": 2.1842, "step": 78470 }, { "epoch": 5.331906509036554, "grad_norm": 3.492213487625122, "learning_rate": 3.337325044163609e-05, "loss": 2.2396, "step": 78475 }, { "epoch": 5.3322462291072155, "grad_norm": 3.156233310699463, "learning_rate": 3.336900394075282e-05, "loss": 2.3795, "step": 78480 }, { "epoch": 5.3325859491778775, "grad_norm": 4.031880855560303, "learning_rate": 3.336475743986955e-05, "loss": 2.2227, "step": 78485 }, { "epoch": 5.33292566924854, "grad_norm": 3.981377363204956, "learning_rate": 3.3360510938986276e-05, "loss": 2.2508, "step": 78490 }, { "epoch": 5.333265389319201, "grad_norm": 3.3497304916381836, "learning_rate": 3.3356264438103004e-05, "loss": 2.1088, "step": 78495 }, { "epoch": 5.333605109389863, "grad_norm": 3.0278759002685547, "learning_rate": 3.335201793721973e-05, "loss": 2.1758, "step": 78500 }, { "epoch": 5.333944829460524, "grad_norm": 3.165438413619995, "learning_rate": 3.334777143633646e-05, "loss": 2.5326, "step": 78505 }, { "epoch": 5.334284549531186, "grad_norm": 3.418409585952759, "learning_rate": 3.334352493545319e-05, "loss": 2.2082, "step": 78510 }, { "epoch": 5.334624269601848, "grad_norm": 3.887784004211426, "learning_rate": 3.3339278434569916e-05, "loss": 2.0373, "step": 78515 }, { "epoch": 5.334963989672509, "grad_norm": 3.20957088470459, "learning_rate": 3.3335031933686644e-05, "loss": 2.3624, "step": 78520 }, { "epoch": 5.3353037097431715, "grad_norm": 3.2841312885284424, "learning_rate": 3.333078543280337e-05, "loss": 2.6839, "step": 78525 }, { "epoch": 5.3356434298138335, "grad_norm": 3.0736985206604004, "learning_rate": 3.33265389319201e-05, "loss": 2.3277, "step": 78530 }, { "epoch": 5.335983149884495, "grad_norm": 3.079519748687744, "learning_rate": 3.332229243103683e-05, "loss": 2.3557, "step": 78535 }, { "epoch": 5.336322869955157, "grad_norm": 2.926856279373169, "learning_rate": 3.3318045930153556e-05, "loss": 1.9507, "step": 78540 }, { "epoch": 5.336662590025819, "grad_norm": 2.8958237171173096, "learning_rate": 3.3313799429270284e-05, "loss": 2.1129, "step": 78545 }, { "epoch": 5.33700231009648, "grad_norm": 3.6638805866241455, "learning_rate": 3.330955292838701e-05, "loss": 1.988, "step": 78550 }, { "epoch": 5.337342030167142, "grad_norm": 3.2859554290771484, "learning_rate": 3.330530642750374e-05, "loss": 2.226, "step": 78555 }, { "epoch": 5.337681750237804, "grad_norm": 3.508033514022827, "learning_rate": 3.330105992662047e-05, "loss": 2.4607, "step": 78560 }, { "epoch": 5.338021470308465, "grad_norm": 3.4354407787323, "learning_rate": 3.3296813425737196e-05, "loss": 2.1798, "step": 78565 }, { "epoch": 5.3383611903791275, "grad_norm": 2.937669515609741, "learning_rate": 3.3292566924853924e-05, "loss": 2.0809, "step": 78570 }, { "epoch": 5.33870091044979, "grad_norm": 4.293938159942627, "learning_rate": 3.3288320423970645e-05, "loss": 1.9628, "step": 78575 }, { "epoch": 5.339040630520451, "grad_norm": 3.6444833278656006, "learning_rate": 3.328407392308738e-05, "loss": 2.2213, "step": 78580 }, { "epoch": 5.339380350591113, "grad_norm": 3.5650882720947266, "learning_rate": 3.327982742220411e-05, "loss": 2.0972, "step": 78585 }, { "epoch": 5.339720070661775, "grad_norm": 3.7319092750549316, "learning_rate": 3.327558092132083e-05, "loss": 1.9854, "step": 78590 }, { "epoch": 5.340059790732436, "grad_norm": 3.670987606048584, "learning_rate": 3.3271334420437564e-05, "loss": 2.3183, "step": 78595 }, { "epoch": 5.340399510803098, "grad_norm": 3.879115581512451, "learning_rate": 3.326708791955429e-05, "loss": 2.4101, "step": 78600 }, { "epoch": 5.34073923087376, "grad_norm": 3.269573926925659, "learning_rate": 3.326284141867101e-05, "loss": 2.1294, "step": 78605 }, { "epoch": 5.341078950944421, "grad_norm": 3.6836013793945312, "learning_rate": 3.325859491778774e-05, "loss": 2.2236, "step": 78610 }, { "epoch": 5.3414186710150835, "grad_norm": 3.347764015197754, "learning_rate": 3.3254348416904476e-05, "loss": 2.221, "step": 78615 }, { "epoch": 5.341758391085746, "grad_norm": 3.934262275695801, "learning_rate": 3.32501019160212e-05, "loss": 2.3334, "step": 78620 }, { "epoch": 5.342098111156407, "grad_norm": 3.9505960941314697, "learning_rate": 3.3245855415137925e-05, "loss": 2.0316, "step": 78625 }, { "epoch": 5.342437831227069, "grad_norm": 3.7623023986816406, "learning_rate": 3.324160891425466e-05, "loss": 2.5474, "step": 78630 }, { "epoch": 5.342777551297731, "grad_norm": 3.8410706520080566, "learning_rate": 3.323736241337138e-05, "loss": 2.3518, "step": 78635 }, { "epoch": 5.343117271368392, "grad_norm": 2.8154072761535645, "learning_rate": 3.323311591248811e-05, "loss": 2.0173, "step": 78640 }, { "epoch": 5.343456991439054, "grad_norm": 4.0495285987854, "learning_rate": 3.322886941160484e-05, "loss": 2.5695, "step": 78645 }, { "epoch": 5.343796711509716, "grad_norm": 3.938563346862793, "learning_rate": 3.3224622910721565e-05, "loss": 2.0278, "step": 78650 }, { "epoch": 5.344136431580377, "grad_norm": 3.2088236808776855, "learning_rate": 3.322037640983829e-05, "loss": 2.2581, "step": 78655 }, { "epoch": 5.3444761516510395, "grad_norm": 3.7720954418182373, "learning_rate": 3.321612990895502e-05, "loss": 2.3971, "step": 78660 }, { "epoch": 5.344815871721702, "grad_norm": 4.196957588195801, "learning_rate": 3.321188340807175e-05, "loss": 2.3583, "step": 78665 }, { "epoch": 5.345155591792363, "grad_norm": 3.9738454818725586, "learning_rate": 3.320763690718848e-05, "loss": 2.3849, "step": 78670 }, { "epoch": 5.345495311863025, "grad_norm": 3.4048080444335938, "learning_rate": 3.3203390406305205e-05, "loss": 2.0722, "step": 78675 }, { "epoch": 5.345835031933687, "grad_norm": 3.903169631958008, "learning_rate": 3.319914390542194e-05, "loss": 2.1208, "step": 78680 }, { "epoch": 5.346174752004348, "grad_norm": 3.2664408683776855, "learning_rate": 3.319489740453866e-05, "loss": 2.2929, "step": 78685 }, { "epoch": 5.34651447207501, "grad_norm": 2.6436686515808105, "learning_rate": 3.319065090365539e-05, "loss": 2.3913, "step": 78690 }, { "epoch": 5.346854192145672, "grad_norm": 2.921738862991333, "learning_rate": 3.318640440277212e-05, "loss": 2.6144, "step": 78695 }, { "epoch": 5.3471939122163334, "grad_norm": 3.7474303245544434, "learning_rate": 3.3182157901888845e-05, "loss": 2.4627, "step": 78700 }, { "epoch": 5.3475336322869955, "grad_norm": 3.2522196769714355, "learning_rate": 3.317791140100557e-05, "loss": 2.2973, "step": 78705 }, { "epoch": 5.347873352357658, "grad_norm": 3.0720927715301514, "learning_rate": 3.31736649001223e-05, "loss": 1.8993, "step": 78710 }, { "epoch": 5.348213072428319, "grad_norm": 2.659224271774292, "learning_rate": 3.316941839923903e-05, "loss": 2.1463, "step": 78715 }, { "epoch": 5.348552792498981, "grad_norm": 3.9815430641174316, "learning_rate": 3.316517189835576e-05, "loss": 2.0661, "step": 78720 }, { "epoch": 5.348892512569643, "grad_norm": 2.9793014526367188, "learning_rate": 3.3160925397472485e-05, "loss": 2.4006, "step": 78725 }, { "epoch": 5.349232232640304, "grad_norm": 3.521210193634033, "learning_rate": 3.315667889658921e-05, "loss": 2.3133, "step": 78730 }, { "epoch": 5.349571952710966, "grad_norm": 3.391779661178589, "learning_rate": 3.315243239570594e-05, "loss": 2.2077, "step": 78735 }, { "epoch": 5.349911672781628, "grad_norm": 3.560194730758667, "learning_rate": 3.314818589482267e-05, "loss": 2.2587, "step": 78740 }, { "epoch": 5.3502513928522895, "grad_norm": 4.465660572052002, "learning_rate": 3.314393939393939e-05, "loss": 2.206, "step": 78745 }, { "epoch": 5.3505911129229515, "grad_norm": 4.3787994384765625, "learning_rate": 3.3139692893056125e-05, "loss": 2.5049, "step": 78750 }, { "epoch": 5.350930832993614, "grad_norm": 3.451159954071045, "learning_rate": 3.313544639217285e-05, "loss": 2.0886, "step": 78755 }, { "epoch": 5.351270553064275, "grad_norm": 2.855630397796631, "learning_rate": 3.3131199891289575e-05, "loss": 2.3208, "step": 78760 }, { "epoch": 5.351610273134937, "grad_norm": 3.2466957569122314, "learning_rate": 3.312695339040631e-05, "loss": 2.528, "step": 78765 }, { "epoch": 5.351949993205599, "grad_norm": 3.469832181930542, "learning_rate": 3.312270688952304e-05, "loss": 2.1206, "step": 78770 }, { "epoch": 5.35228971327626, "grad_norm": 3.5768492221832275, "learning_rate": 3.311846038863976e-05, "loss": 2.1515, "step": 78775 }, { "epoch": 5.352629433346922, "grad_norm": 3.268399715423584, "learning_rate": 3.311421388775649e-05, "loss": 2.2233, "step": 78780 }, { "epoch": 5.352969153417584, "grad_norm": 4.427914619445801, "learning_rate": 3.310996738687322e-05, "loss": 2.0896, "step": 78785 }, { "epoch": 5.3533088734882455, "grad_norm": 3.3805763721466064, "learning_rate": 3.310572088598994e-05, "loss": 2.3805, "step": 78790 }, { "epoch": 5.3536485935589075, "grad_norm": 3.628736972808838, "learning_rate": 3.310147438510667e-05, "loss": 2.2491, "step": 78795 }, { "epoch": 5.35398831362957, "grad_norm": 3.259930372238159, "learning_rate": 3.3097227884223405e-05, "loss": 2.0552, "step": 78800 }, { "epoch": 5.354328033700231, "grad_norm": 3.4572207927703857, "learning_rate": 3.309298138334013e-05, "loss": 2.2956, "step": 78805 }, { "epoch": 5.354667753770893, "grad_norm": 4.084909439086914, "learning_rate": 3.3088734882456855e-05, "loss": 2.1384, "step": 78810 }, { "epoch": 5.355007473841555, "grad_norm": 2.7538163661956787, "learning_rate": 3.308448838157359e-05, "loss": 2.0655, "step": 78815 }, { "epoch": 5.355347193912216, "grad_norm": 3.816721200942993, "learning_rate": 3.308024188069031e-05, "loss": 2.3552, "step": 78820 }, { "epoch": 5.355686913982878, "grad_norm": 2.631859540939331, "learning_rate": 3.307599537980704e-05, "loss": 2.2747, "step": 78825 }, { "epoch": 5.35602663405354, "grad_norm": 3.39142107963562, "learning_rate": 3.307174887892377e-05, "loss": 2.1278, "step": 78830 }, { "epoch": 5.3563663541242015, "grad_norm": 3.938215494155884, "learning_rate": 3.3067502378040495e-05, "loss": 2.2664, "step": 78835 }, { "epoch": 5.3567060741948636, "grad_norm": 3.6455423831939697, "learning_rate": 3.306325587715722e-05, "loss": 2.3402, "step": 78840 }, { "epoch": 5.357045794265526, "grad_norm": 3.8392245769500732, "learning_rate": 3.305900937627395e-05, "loss": 2.1712, "step": 78845 }, { "epoch": 5.357385514336187, "grad_norm": 2.850558280944824, "learning_rate": 3.3054762875390685e-05, "loss": 2.0782, "step": 78850 }, { "epoch": 5.357725234406849, "grad_norm": 3.991527557373047, "learning_rate": 3.305051637450741e-05, "loss": 2.3969, "step": 78855 }, { "epoch": 5.358064954477511, "grad_norm": 4.150559902191162, "learning_rate": 3.3046269873624135e-05, "loss": 1.9873, "step": 78860 }, { "epoch": 5.358404674548172, "grad_norm": 3.0712363719940186, "learning_rate": 3.304202337274086e-05, "loss": 2.2838, "step": 78865 }, { "epoch": 5.358744394618834, "grad_norm": 2.9408862590789795, "learning_rate": 3.303777687185759e-05, "loss": 2.228, "step": 78870 }, { "epoch": 5.359084114689496, "grad_norm": 3.0056729316711426, "learning_rate": 3.303353037097432e-05, "loss": 2.2268, "step": 78875 }, { "epoch": 5.3594238347601575, "grad_norm": 2.9913363456726074, "learning_rate": 3.302928387009105e-05, "loss": 2.3628, "step": 78880 }, { "epoch": 5.35976355483082, "grad_norm": 3.3645565509796143, "learning_rate": 3.3025037369207775e-05, "loss": 2.4217, "step": 78885 }, { "epoch": 5.360103274901482, "grad_norm": 3.1329569816589355, "learning_rate": 3.30207908683245e-05, "loss": 2.5092, "step": 78890 }, { "epoch": 5.360442994972143, "grad_norm": 2.8119866847991943, "learning_rate": 3.301654436744123e-05, "loss": 2.2024, "step": 78895 }, { "epoch": 5.360782715042805, "grad_norm": 3.18109393119812, "learning_rate": 3.301229786655796e-05, "loss": 2.2166, "step": 78900 }, { "epoch": 5.361122435113466, "grad_norm": 3.3921637535095215, "learning_rate": 3.300805136567469e-05, "loss": 2.2607, "step": 78905 }, { "epoch": 5.361462155184128, "grad_norm": 3.0811753273010254, "learning_rate": 3.3003804864791415e-05, "loss": 2.122, "step": 78910 }, { "epoch": 5.36180187525479, "grad_norm": 3.743522882461548, "learning_rate": 3.299955836390814e-05, "loss": 2.2469, "step": 78915 }, { "epoch": 5.362141595325451, "grad_norm": 3.8290371894836426, "learning_rate": 3.299531186302487e-05, "loss": 2.277, "step": 78920 }, { "epoch": 5.3624813153961135, "grad_norm": 3.239171028137207, "learning_rate": 3.29910653621416e-05, "loss": 2.1784, "step": 78925 }, { "epoch": 5.362821035466776, "grad_norm": 3.1396846771240234, "learning_rate": 3.298681886125832e-05, "loss": 2.1747, "step": 78930 }, { "epoch": 5.363160755537437, "grad_norm": 3.3929970264434814, "learning_rate": 3.2982572360375055e-05, "loss": 2.469, "step": 78935 }, { "epoch": 5.363500475608099, "grad_norm": 3.040065050125122, "learning_rate": 3.297832585949178e-05, "loss": 2.152, "step": 78940 }, { "epoch": 5.363840195678761, "grad_norm": 4.503268718719482, "learning_rate": 3.2974079358608504e-05, "loss": 1.9734, "step": 78945 }, { "epoch": 5.364179915749422, "grad_norm": 3.5423994064331055, "learning_rate": 3.296983285772524e-05, "loss": 2.3309, "step": 78950 }, { "epoch": 5.364519635820084, "grad_norm": 3.902010917663574, "learning_rate": 3.296558635684197e-05, "loss": 2.2398, "step": 78955 }, { "epoch": 5.364859355890746, "grad_norm": 3.4519128799438477, "learning_rate": 3.296133985595869e-05, "loss": 2.0458, "step": 78960 }, { "epoch": 5.365199075961407, "grad_norm": 3.3512301445007324, "learning_rate": 3.2957093355075416e-05, "loss": 2.0948, "step": 78965 }, { "epoch": 5.3655387960320695, "grad_norm": 3.5017623901367188, "learning_rate": 3.295284685419215e-05, "loss": 2.329, "step": 78970 }, { "epoch": 5.365878516102732, "grad_norm": 3.1187150478363037, "learning_rate": 3.294860035330887e-05, "loss": 2.55, "step": 78975 }, { "epoch": 5.366218236173393, "grad_norm": 3.1501944065093994, "learning_rate": 3.29443538524256e-05, "loss": 2.1717, "step": 78980 }, { "epoch": 5.366557956244055, "grad_norm": 3.49919056892395, "learning_rate": 3.2940107351542335e-05, "loss": 1.8406, "step": 78985 }, { "epoch": 5.366897676314717, "grad_norm": 2.8330681324005127, "learning_rate": 3.2935860850659056e-05, "loss": 2.1009, "step": 78990 }, { "epoch": 5.367237396385378, "grad_norm": 3.6346771717071533, "learning_rate": 3.2931614349775784e-05, "loss": 2.1692, "step": 78995 }, { "epoch": 5.36757711645604, "grad_norm": 3.416901111602783, "learning_rate": 3.292736784889251e-05, "loss": 2.1604, "step": 79000 }, { "epoch": 5.367916836526702, "grad_norm": 3.8788793087005615, "learning_rate": 3.292312134800924e-05, "loss": 2.2464, "step": 79005 }, { "epoch": 5.3682565565973634, "grad_norm": 3.459376096725464, "learning_rate": 3.291887484712597e-05, "loss": 2.3031, "step": 79010 }, { "epoch": 5.3685962766680255, "grad_norm": 4.652582168579102, "learning_rate": 3.2914628346242696e-05, "loss": 2.2963, "step": 79015 }, { "epoch": 5.368935996738688, "grad_norm": 4.435708999633789, "learning_rate": 3.291038184535943e-05, "loss": 2.3365, "step": 79020 }, { "epoch": 5.369275716809349, "grad_norm": 3.5198843479156494, "learning_rate": 3.290613534447615e-05, "loss": 2.1849, "step": 79025 }, { "epoch": 5.369615436880011, "grad_norm": 3.7862308025360107, "learning_rate": 3.290188884359288e-05, "loss": 2.4043, "step": 79030 }, { "epoch": 5.369955156950673, "grad_norm": 3.044884443283081, "learning_rate": 3.289764234270961e-05, "loss": 2.3144, "step": 79035 }, { "epoch": 5.370294877021334, "grad_norm": 3.902270793914795, "learning_rate": 3.2893395841826336e-05, "loss": 2.2578, "step": 79040 }, { "epoch": 5.370634597091996, "grad_norm": 4.232590198516846, "learning_rate": 3.2889149340943064e-05, "loss": 2.3356, "step": 79045 }, { "epoch": 5.370974317162658, "grad_norm": 4.1594624519348145, "learning_rate": 3.288490284005979e-05, "loss": 2.0587, "step": 79050 }, { "epoch": 5.3713140372333195, "grad_norm": 4.0836262702941895, "learning_rate": 3.288065633917652e-05, "loss": 2.2185, "step": 79055 }, { "epoch": 5.3716537573039815, "grad_norm": 3.1957554817199707, "learning_rate": 3.287640983829325e-05, "loss": 2.1976, "step": 79060 }, { "epoch": 5.371993477374644, "grad_norm": 3.748612880706787, "learning_rate": 3.2872163337409976e-05, "loss": 2.3239, "step": 79065 }, { "epoch": 5.372333197445305, "grad_norm": 3.8811283111572266, "learning_rate": 3.2867916836526704e-05, "loss": 2.0782, "step": 79070 }, { "epoch": 5.372672917515967, "grad_norm": 2.7716870307922363, "learning_rate": 3.286367033564343e-05, "loss": 2.2111, "step": 79075 }, { "epoch": 5.373012637586629, "grad_norm": 3.3306665420532227, "learning_rate": 3.285942383476016e-05, "loss": 2.1179, "step": 79080 }, { "epoch": 5.37335235765729, "grad_norm": 3.3829967975616455, "learning_rate": 3.285517733387689e-05, "loss": 2.1792, "step": 79085 }, { "epoch": 5.373692077727952, "grad_norm": 3.955233573913574, "learning_rate": 3.2850930832993616e-05, "loss": 2.1165, "step": 79090 }, { "epoch": 5.374031797798614, "grad_norm": 2.9499704837799072, "learning_rate": 3.2846684332110344e-05, "loss": 2.2126, "step": 79095 }, { "epoch": 5.3743715178692755, "grad_norm": 3.282871723175049, "learning_rate": 3.2842437831227066e-05, "loss": 2.0821, "step": 79100 }, { "epoch": 5.3747112379399375, "grad_norm": 3.4699087142944336, "learning_rate": 3.28381913303438e-05, "loss": 2.1298, "step": 79105 }, { "epoch": 5.3750509580106, "grad_norm": 3.2643117904663086, "learning_rate": 3.283394482946053e-05, "loss": 2.1714, "step": 79110 }, { "epoch": 5.375390678081261, "grad_norm": 3.6185035705566406, "learning_rate": 3.282969832857725e-05, "loss": 2.2418, "step": 79115 }, { "epoch": 5.375730398151923, "grad_norm": 3.713592052459717, "learning_rate": 3.2825451827693984e-05, "loss": 2.3128, "step": 79120 }, { "epoch": 5.376070118222585, "grad_norm": 3.1453824043273926, "learning_rate": 3.282120532681071e-05, "loss": 2.2603, "step": 79125 }, { "epoch": 5.376409838293246, "grad_norm": 3.6724681854248047, "learning_rate": 3.2816958825927434e-05, "loss": 2.4151, "step": 79130 }, { "epoch": 5.376749558363908, "grad_norm": 4.478730201721191, "learning_rate": 3.281271232504416e-05, "loss": 2.246, "step": 79135 }, { "epoch": 5.37708927843457, "grad_norm": 3.6707301139831543, "learning_rate": 3.2808465824160896e-05, "loss": 2.1562, "step": 79140 }, { "epoch": 5.3774289985052315, "grad_norm": 3.95890474319458, "learning_rate": 3.280421932327762e-05, "loss": 2.3797, "step": 79145 }, { "epoch": 5.377768718575894, "grad_norm": 4.21958065032959, "learning_rate": 3.2799972822394346e-05, "loss": 2.229, "step": 79150 }, { "epoch": 5.378108438646556, "grad_norm": 3.4425177574157715, "learning_rate": 3.279572632151108e-05, "loss": 2.1476, "step": 79155 }, { "epoch": 5.378448158717217, "grad_norm": 2.9648587703704834, "learning_rate": 3.27914798206278e-05, "loss": 2.277, "step": 79160 }, { "epoch": 5.378787878787879, "grad_norm": 2.9523048400878906, "learning_rate": 3.278723331974453e-05, "loss": 2.2381, "step": 79165 }, { "epoch": 5.379127598858541, "grad_norm": 2.6703100204467773, "learning_rate": 3.278298681886126e-05, "loss": 2.4336, "step": 79170 }, { "epoch": 5.379467318929202, "grad_norm": 3.6605358123779297, "learning_rate": 3.2778740317977986e-05, "loss": 1.9068, "step": 79175 }, { "epoch": 5.379807038999864, "grad_norm": 2.918694019317627, "learning_rate": 3.2774493817094714e-05, "loss": 2.3735, "step": 79180 }, { "epoch": 5.380146759070525, "grad_norm": 3.455199718475342, "learning_rate": 3.277024731621144e-05, "loss": 2.2094, "step": 79185 }, { "epoch": 5.3804864791411875, "grad_norm": 2.7343642711639404, "learning_rate": 3.2766000815328176e-05, "loss": 2.2933, "step": 79190 }, { "epoch": 5.38082619921185, "grad_norm": 3.2153329849243164, "learning_rate": 3.27617543144449e-05, "loss": 2.2016, "step": 79195 }, { "epoch": 5.381165919282511, "grad_norm": 4.243433952331543, "learning_rate": 3.2757507813561626e-05, "loss": 2.381, "step": 79200 }, { "epoch": 5.381505639353173, "grad_norm": 2.6177542209625244, "learning_rate": 3.275326131267836e-05, "loss": 2.2819, "step": 79205 }, { "epoch": 5.381845359423835, "grad_norm": 3.4010679721832275, "learning_rate": 3.274901481179508e-05, "loss": 1.8898, "step": 79210 }, { "epoch": 5.382185079494496, "grad_norm": 3.6732914447784424, "learning_rate": 3.274476831091181e-05, "loss": 2.2426, "step": 79215 }, { "epoch": 5.382524799565158, "grad_norm": 3.0060007572174072, "learning_rate": 3.274052181002854e-05, "loss": 2.2915, "step": 79220 }, { "epoch": 5.38286451963582, "grad_norm": 3.2487714290618896, "learning_rate": 3.2736275309145266e-05, "loss": 2.2203, "step": 79225 }, { "epoch": 5.383204239706481, "grad_norm": 2.724318504333496, "learning_rate": 3.2732028808261994e-05, "loss": 2.4876, "step": 79230 }, { "epoch": 5.3835439597771435, "grad_norm": 4.54361629486084, "learning_rate": 3.272778230737872e-05, "loss": 2.325, "step": 79235 }, { "epoch": 5.383883679847806, "grad_norm": 3.720707893371582, "learning_rate": 3.272353580649545e-05, "loss": 2.3574, "step": 79240 }, { "epoch": 5.384223399918467, "grad_norm": 3.7923085689544678, "learning_rate": 3.271928930561218e-05, "loss": 2.0269, "step": 79245 }, { "epoch": 5.384563119989129, "grad_norm": 3.363588809967041, "learning_rate": 3.2715042804728906e-05, "loss": 2.1243, "step": 79250 }, { "epoch": 5.384902840059791, "grad_norm": 4.584624767303467, "learning_rate": 3.2710796303845634e-05, "loss": 2.0545, "step": 79255 }, { "epoch": 5.385242560130452, "grad_norm": 2.825578212738037, "learning_rate": 3.270654980296236e-05, "loss": 2.3346, "step": 79260 }, { "epoch": 5.385582280201114, "grad_norm": 3.5569026470184326, "learning_rate": 3.270230330207909e-05, "loss": 2.1387, "step": 79265 }, { "epoch": 5.385922000271776, "grad_norm": 3.2557566165924072, "learning_rate": 3.269805680119581e-05, "loss": 2.2601, "step": 79270 }, { "epoch": 5.386261720342437, "grad_norm": 3.3995141983032227, "learning_rate": 3.2693810300312546e-05, "loss": 2.3308, "step": 79275 }, { "epoch": 5.3866014404130995, "grad_norm": 3.2081949710845947, "learning_rate": 3.2689563799429274e-05, "loss": 2.1116, "step": 79280 }, { "epoch": 5.386941160483762, "grad_norm": 3.28806209564209, "learning_rate": 3.2685317298545995e-05, "loss": 2.1242, "step": 79285 }, { "epoch": 5.387280880554423, "grad_norm": 2.7418928146362305, "learning_rate": 3.268107079766273e-05, "loss": 2.2609, "step": 79290 }, { "epoch": 5.387620600625085, "grad_norm": 2.7283437252044678, "learning_rate": 3.267682429677946e-05, "loss": 2.22, "step": 79295 }, { "epoch": 5.387960320695747, "grad_norm": 3.293774366378784, "learning_rate": 3.267257779589618e-05, "loss": 1.8648, "step": 79300 }, { "epoch": 5.388300040766408, "grad_norm": 3.869018316268921, "learning_rate": 3.2668331295012914e-05, "loss": 2.2014, "step": 79305 }, { "epoch": 5.38863976083707, "grad_norm": 3.0641729831695557, "learning_rate": 3.266408479412964e-05, "loss": 2.3715, "step": 79310 }, { "epoch": 5.388979480907732, "grad_norm": 3.477609157562256, "learning_rate": 3.265983829324636e-05, "loss": 2.2062, "step": 79315 }, { "epoch": 5.3893192009783935, "grad_norm": 3.7704713344573975, "learning_rate": 3.265559179236309e-05, "loss": 2.2245, "step": 79320 }, { "epoch": 5.3896589210490555, "grad_norm": 3.9540960788726807, "learning_rate": 3.2651345291479826e-05, "loss": 2.1238, "step": 79325 }, { "epoch": 5.389998641119718, "grad_norm": 3.181436538696289, "learning_rate": 3.264709879059655e-05, "loss": 2.2744, "step": 79330 }, { "epoch": 5.390338361190379, "grad_norm": 2.5462498664855957, "learning_rate": 3.2642852289713275e-05, "loss": 2.0912, "step": 79335 }, { "epoch": 5.390678081261041, "grad_norm": 2.8952577114105225, "learning_rate": 3.263860578883001e-05, "loss": 2.398, "step": 79340 }, { "epoch": 5.391017801331703, "grad_norm": 3.846209764480591, "learning_rate": 3.263435928794673e-05, "loss": 2.1483, "step": 79345 }, { "epoch": 5.391357521402364, "grad_norm": 3.489358425140381, "learning_rate": 3.263011278706346e-05, "loss": 2.3725, "step": 79350 }, { "epoch": 5.391697241473026, "grad_norm": 2.921593427658081, "learning_rate": 3.262586628618019e-05, "loss": 2.1155, "step": 79355 }, { "epoch": 5.392036961543688, "grad_norm": 3.448157548904419, "learning_rate": 3.262161978529692e-05, "loss": 2.3536, "step": 79360 }, { "epoch": 5.3923766816143495, "grad_norm": 4.181448459625244, "learning_rate": 3.261737328441364e-05, "loss": 2.2817, "step": 79365 }, { "epoch": 5.3927164016850115, "grad_norm": 3.6057980060577393, "learning_rate": 3.261312678353037e-05, "loss": 2.1125, "step": 79370 }, { "epoch": 5.393056121755674, "grad_norm": 3.367241859436035, "learning_rate": 3.2608880282647106e-05, "loss": 2.3331, "step": 79375 }, { "epoch": 5.393395841826335, "grad_norm": 3.0326781272888184, "learning_rate": 3.260463378176383e-05, "loss": 1.9976, "step": 79380 }, { "epoch": 5.393735561896997, "grad_norm": 3.4717800617218018, "learning_rate": 3.2600387280880555e-05, "loss": 2.3189, "step": 79385 }, { "epoch": 5.394075281967659, "grad_norm": 4.06567907333374, "learning_rate": 3.259614077999728e-05, "loss": 2.3059, "step": 79390 }, { "epoch": 5.39441500203832, "grad_norm": 3.3312602043151855, "learning_rate": 3.259189427911401e-05, "loss": 2.2243, "step": 79395 }, { "epoch": 5.394754722108982, "grad_norm": 3.193526029586792, "learning_rate": 3.258764777823074e-05, "loss": 2.3636, "step": 79400 }, { "epoch": 5.395094442179644, "grad_norm": 4.0218281745910645, "learning_rate": 3.258340127734747e-05, "loss": 2.1729, "step": 79405 }, { "epoch": 5.3954341622503055, "grad_norm": 3.7172911167144775, "learning_rate": 3.2579154776464195e-05, "loss": 2.3871, "step": 79410 }, { "epoch": 5.3957738823209676, "grad_norm": 3.5554468631744385, "learning_rate": 3.257490827558092e-05, "loss": 2.1256, "step": 79415 }, { "epoch": 5.39611360239163, "grad_norm": 2.366899013519287, "learning_rate": 3.257066177469765e-05, "loss": 2.4571, "step": 79420 }, { "epoch": 5.396453322462291, "grad_norm": 2.9850668907165527, "learning_rate": 3.256641527381438e-05, "loss": 2.336, "step": 79425 }, { "epoch": 5.396793042532953, "grad_norm": 3.3416547775268555, "learning_rate": 3.256216877293111e-05, "loss": 2.152, "step": 79430 }, { "epoch": 5.397132762603615, "grad_norm": 4.102322578430176, "learning_rate": 3.2557922272047835e-05, "loss": 2.2431, "step": 79435 }, { "epoch": 5.397472482674276, "grad_norm": 2.6903839111328125, "learning_rate": 3.255367577116456e-05, "loss": 2.5033, "step": 79440 }, { "epoch": 5.397812202744938, "grad_norm": 2.907014846801758, "learning_rate": 3.254942927028129e-05, "loss": 2.3292, "step": 79445 }, { "epoch": 5.3981519228156, "grad_norm": 3.4589362144470215, "learning_rate": 3.254518276939802e-05, "loss": 2.0899, "step": 79450 }, { "epoch": 5.3984916428862615, "grad_norm": 3.6252341270446777, "learning_rate": 3.254093626851474e-05, "loss": 2.1321, "step": 79455 }, { "epoch": 5.398831362956924, "grad_norm": 2.8860321044921875, "learning_rate": 3.2536689767631475e-05, "loss": 2.2813, "step": 79460 }, { "epoch": 5.399171083027586, "grad_norm": 4.583804130554199, "learning_rate": 3.25324432667482e-05, "loss": 1.8057, "step": 79465 }, { "epoch": 5.399510803098247, "grad_norm": 3.2335236072540283, "learning_rate": 3.2528196765864925e-05, "loss": 2.1876, "step": 79470 }, { "epoch": 5.399850523168909, "grad_norm": 3.563373565673828, "learning_rate": 3.252395026498166e-05, "loss": 2.2123, "step": 79475 }, { "epoch": 5.400190243239571, "grad_norm": 3.1031334400177, "learning_rate": 3.251970376409839e-05, "loss": 2.3412, "step": 79480 }, { "epoch": 5.400529963310232, "grad_norm": 3.176525831222534, "learning_rate": 3.251545726321511e-05, "loss": 2.1034, "step": 79485 }, { "epoch": 5.400869683380894, "grad_norm": 2.9833245277404785, "learning_rate": 3.2511210762331837e-05, "loss": 2.3226, "step": 79490 }, { "epoch": 5.401209403451556, "grad_norm": 3.3540635108947754, "learning_rate": 3.250696426144857e-05, "loss": 2.4612, "step": 79495 }, { "epoch": 5.4015491235222175, "grad_norm": 3.573486566543579, "learning_rate": 3.250271776056529e-05, "loss": 2.2547, "step": 79500 }, { "epoch": 5.40188884359288, "grad_norm": 3.8129096031188965, "learning_rate": 3.249847125968202e-05, "loss": 2.2567, "step": 79505 }, { "epoch": 5.402228563663542, "grad_norm": 3.3151094913482666, "learning_rate": 3.2494224758798755e-05, "loss": 2.1433, "step": 79510 }, { "epoch": 5.402568283734203, "grad_norm": 3.7011473178863525, "learning_rate": 3.2489978257915477e-05, "loss": 2.3481, "step": 79515 }, { "epoch": 5.402908003804865, "grad_norm": 4.202624797821045, "learning_rate": 3.2485731757032205e-05, "loss": 2.3109, "step": 79520 }, { "epoch": 5.403247723875527, "grad_norm": 3.6079823970794678, "learning_rate": 3.248148525614893e-05, "loss": 2.3653, "step": 79525 }, { "epoch": 5.403587443946188, "grad_norm": 4.047812461853027, "learning_rate": 3.247723875526567e-05, "loss": 2.3039, "step": 79530 }, { "epoch": 5.40392716401685, "grad_norm": 3.7374353408813477, "learning_rate": 3.247299225438239e-05, "loss": 2.0982, "step": 79535 }, { "epoch": 5.404266884087512, "grad_norm": 4.4508056640625, "learning_rate": 3.246874575349912e-05, "loss": 2.069, "step": 79540 }, { "epoch": 5.4046066041581735, "grad_norm": 3.1776046752929688, "learning_rate": 3.246449925261585e-05, "loss": 2.384, "step": 79545 }, { "epoch": 5.404946324228836, "grad_norm": 3.195836305618286, "learning_rate": 3.246025275173257e-05, "loss": 2.3512, "step": 79550 }, { "epoch": 5.405286044299498, "grad_norm": 3.5243234634399414, "learning_rate": 3.24560062508493e-05, "loss": 2.5301, "step": 79555 }, { "epoch": 5.405625764370159, "grad_norm": 3.5901927947998047, "learning_rate": 3.245175974996603e-05, "loss": 2.2801, "step": 79560 }, { "epoch": 5.405965484440821, "grad_norm": 3.4428939819335938, "learning_rate": 3.244751324908276e-05, "loss": 2.0668, "step": 79565 }, { "epoch": 5.406305204511483, "grad_norm": 3.016211986541748, "learning_rate": 3.2443266748199485e-05, "loss": 2.1151, "step": 79570 }, { "epoch": 5.406644924582144, "grad_norm": 4.242619514465332, "learning_rate": 3.243902024731621e-05, "loss": 2.1917, "step": 79575 }, { "epoch": 5.406984644652806, "grad_norm": 4.2214789390563965, "learning_rate": 3.243477374643294e-05, "loss": 2.4171, "step": 79580 }, { "epoch": 5.4073243647234674, "grad_norm": 3.707221031188965, "learning_rate": 3.243052724554967e-05, "loss": 1.9912, "step": 79585 }, { "epoch": 5.4076640847941295, "grad_norm": 3.3361074924468994, "learning_rate": 3.24262807446664e-05, "loss": 2.3225, "step": 79590 }, { "epoch": 5.408003804864792, "grad_norm": 3.1262545585632324, "learning_rate": 3.2422034243783125e-05, "loss": 2.0205, "step": 79595 }, { "epoch": 5.408343524935453, "grad_norm": 2.7941598892211914, "learning_rate": 3.241778774289985e-05, "loss": 2.4904, "step": 79600 }, { "epoch": 5.408683245006115, "grad_norm": 2.8338193893432617, "learning_rate": 3.241354124201658e-05, "loss": 2.163, "step": 79605 }, { "epoch": 5.409022965076777, "grad_norm": 3.280501127243042, "learning_rate": 3.240929474113331e-05, "loss": 2.3283, "step": 79610 }, { "epoch": 5.409362685147438, "grad_norm": 3.372446060180664, "learning_rate": 3.240504824025004e-05, "loss": 2.0195, "step": 79615 }, { "epoch": 5.4097024052181, "grad_norm": 2.8401942253112793, "learning_rate": 3.2400801739366765e-05, "loss": 2.1452, "step": 79620 }, { "epoch": 5.410042125288762, "grad_norm": 3.391571521759033, "learning_rate": 3.2396555238483486e-05, "loss": 2.607, "step": 79625 }, { "epoch": 5.4103818453594235, "grad_norm": 3.5290138721466064, "learning_rate": 3.239230873760022e-05, "loss": 2.3683, "step": 79630 }, { "epoch": 5.4107215654300855, "grad_norm": 3.4920763969421387, "learning_rate": 3.238806223671695e-05, "loss": 2.4294, "step": 79635 }, { "epoch": 5.411061285500748, "grad_norm": 3.082157850265503, "learning_rate": 3.238381573583367e-05, "loss": 2.1441, "step": 79640 }, { "epoch": 5.411401005571409, "grad_norm": 3.4325082302093506, "learning_rate": 3.2379569234950405e-05, "loss": 2.3287, "step": 79645 }, { "epoch": 5.411740725642071, "grad_norm": 3.507091522216797, "learning_rate": 3.237532273406713e-05, "loss": 2.2095, "step": 79650 }, { "epoch": 5.412080445712733, "grad_norm": 2.8081934452056885, "learning_rate": 3.2371076233183854e-05, "loss": 2.4406, "step": 79655 }, { "epoch": 5.412420165783394, "grad_norm": 3.0850634574890137, "learning_rate": 3.236682973230058e-05, "loss": 2.2081, "step": 79660 }, { "epoch": 5.412759885854056, "grad_norm": 3.1576457023620605, "learning_rate": 3.236258323141732e-05, "loss": 2.4438, "step": 79665 }, { "epoch": 5.413099605924718, "grad_norm": 3.3396430015563965, "learning_rate": 3.235833673053404e-05, "loss": 2.2876, "step": 79670 }, { "epoch": 5.4134393259953795, "grad_norm": 3.07468843460083, "learning_rate": 3.2354090229650766e-05, "loss": 2.2188, "step": 79675 }, { "epoch": 5.4137790460660415, "grad_norm": 2.6506459712982178, "learning_rate": 3.23498437287675e-05, "loss": 2.1594, "step": 79680 }, { "epoch": 5.414118766136704, "grad_norm": 3.181288480758667, "learning_rate": 3.234559722788422e-05, "loss": 2.0725, "step": 79685 }, { "epoch": 5.414458486207365, "grad_norm": 3.4392902851104736, "learning_rate": 3.234135072700095e-05, "loss": 2.2695, "step": 79690 }, { "epoch": 5.414798206278027, "grad_norm": 3.053675651550293, "learning_rate": 3.2337104226117685e-05, "loss": 2.3527, "step": 79695 }, { "epoch": 5.415137926348689, "grad_norm": 4.431466102600098, "learning_rate": 3.233285772523441e-05, "loss": 2.2342, "step": 79700 }, { "epoch": 5.41547764641935, "grad_norm": 2.815415143966675, "learning_rate": 3.2328611224351134e-05, "loss": 2.2044, "step": 79705 }, { "epoch": 5.415817366490012, "grad_norm": 3.777949810028076, "learning_rate": 3.232436472346786e-05, "loss": 2.1144, "step": 79710 }, { "epoch": 5.416157086560674, "grad_norm": 3.099689245223999, "learning_rate": 3.23201182225846e-05, "loss": 2.1328, "step": 79715 }, { "epoch": 5.4164968066313355, "grad_norm": 2.9777183532714844, "learning_rate": 3.231587172170132e-05, "loss": 2.1037, "step": 79720 }, { "epoch": 5.416836526701998, "grad_norm": 3.4167556762695312, "learning_rate": 3.2311625220818046e-05, "loss": 2.2883, "step": 79725 }, { "epoch": 5.41717624677266, "grad_norm": 3.162344217300415, "learning_rate": 3.230737871993478e-05, "loss": 2.2501, "step": 79730 }, { "epoch": 5.417515966843321, "grad_norm": 2.8626809120178223, "learning_rate": 3.23031322190515e-05, "loss": 2.2944, "step": 79735 }, { "epoch": 5.417855686913983, "grad_norm": 3.987591028213501, "learning_rate": 3.229888571816823e-05, "loss": 2.2044, "step": 79740 }, { "epoch": 5.418195406984645, "grad_norm": 4.1789231300354, "learning_rate": 3.229463921728496e-05, "loss": 2.169, "step": 79745 }, { "epoch": 5.418535127055306, "grad_norm": 3.9002251625061035, "learning_rate": 3.2290392716401686e-05, "loss": 2.29, "step": 79750 }, { "epoch": 5.418874847125968, "grad_norm": 4.307391166687012, "learning_rate": 3.2286146215518414e-05, "loss": 2.4843, "step": 79755 }, { "epoch": 5.41921456719663, "grad_norm": 3.860642910003662, "learning_rate": 3.228189971463514e-05, "loss": 2.0889, "step": 79760 }, { "epoch": 5.4195542872672915, "grad_norm": 3.6143975257873535, "learning_rate": 3.227765321375187e-05, "loss": 2.1428, "step": 79765 }, { "epoch": 5.419894007337954, "grad_norm": 3.5086429119110107, "learning_rate": 3.22734067128686e-05, "loss": 2.1202, "step": 79770 }, { "epoch": 5.420233727408616, "grad_norm": 3.100700616836548, "learning_rate": 3.2269160211985326e-05, "loss": 2.3281, "step": 79775 }, { "epoch": 5.420573447479277, "grad_norm": 3.2679102420806885, "learning_rate": 3.2264913711102054e-05, "loss": 2.4258, "step": 79780 }, { "epoch": 5.420913167549939, "grad_norm": 3.0226895809173584, "learning_rate": 3.226066721021878e-05, "loss": 2.3954, "step": 79785 }, { "epoch": 5.421252887620601, "grad_norm": 4.121469974517822, "learning_rate": 3.225642070933551e-05, "loss": 2.2278, "step": 79790 }, { "epoch": 5.421592607691262, "grad_norm": 3.2434496879577637, "learning_rate": 3.225217420845224e-05, "loss": 2.0764, "step": 79795 }, { "epoch": 5.421932327761924, "grad_norm": 3.3052937984466553, "learning_rate": 3.2247927707568966e-05, "loss": 2.1225, "step": 79800 }, { "epoch": 5.422272047832586, "grad_norm": 3.1220271587371826, "learning_rate": 3.2243681206685694e-05, "loss": 2.0024, "step": 79805 }, { "epoch": 5.4226117679032475, "grad_norm": 4.587769031524658, "learning_rate": 3.2239434705802416e-05, "loss": 2.2071, "step": 79810 }, { "epoch": 5.42295148797391, "grad_norm": 4.835669994354248, "learning_rate": 3.223518820491915e-05, "loss": 2.3725, "step": 79815 }, { "epoch": 5.423291208044572, "grad_norm": 3.2440009117126465, "learning_rate": 3.223094170403588e-05, "loss": 2.1547, "step": 79820 }, { "epoch": 5.423630928115233, "grad_norm": 3.597170352935791, "learning_rate": 3.22266952031526e-05, "loss": 2.4438, "step": 79825 }, { "epoch": 5.423970648185895, "grad_norm": 4.084936618804932, "learning_rate": 3.2222448702269334e-05, "loss": 2.3295, "step": 79830 }, { "epoch": 5.424310368256557, "grad_norm": 2.8844339847564697, "learning_rate": 3.221820220138606e-05, "loss": 2.3818, "step": 79835 }, { "epoch": 5.424650088327218, "grad_norm": 3.6356735229492188, "learning_rate": 3.2213955700502784e-05, "loss": 2.4812, "step": 79840 }, { "epoch": 5.42498980839788, "grad_norm": 2.92995023727417, "learning_rate": 3.220970919961951e-05, "loss": 2.3316, "step": 79845 }, { "epoch": 5.425329528468542, "grad_norm": 2.7245287895202637, "learning_rate": 3.2205462698736246e-05, "loss": 2.3218, "step": 79850 }, { "epoch": 5.4256692485392035, "grad_norm": 3.531585216522217, "learning_rate": 3.220121619785297e-05, "loss": 2.2495, "step": 79855 }, { "epoch": 5.426008968609866, "grad_norm": 3.798727512359619, "learning_rate": 3.2196969696969696e-05, "loss": 2.0536, "step": 79860 }, { "epoch": 5.426348688680527, "grad_norm": 3.488374710083008, "learning_rate": 3.219272319608643e-05, "loss": 2.3774, "step": 79865 }, { "epoch": 5.426688408751189, "grad_norm": 3.1289777755737305, "learning_rate": 3.218847669520316e-05, "loss": 2.1081, "step": 79870 }, { "epoch": 5.427028128821851, "grad_norm": 5.184856414794922, "learning_rate": 3.218423019431988e-05, "loss": 2.2765, "step": 79875 }, { "epoch": 5.427367848892512, "grad_norm": 2.729069709777832, "learning_rate": 3.217998369343661e-05, "loss": 2.2146, "step": 79880 }, { "epoch": 5.427707568963174, "grad_norm": 2.850126266479492, "learning_rate": 3.217573719255334e-05, "loss": 2.078, "step": 79885 }, { "epoch": 5.428047289033836, "grad_norm": 3.8311727046966553, "learning_rate": 3.2171490691670064e-05, "loss": 2.3955, "step": 79890 }, { "epoch": 5.4283870091044975, "grad_norm": 2.836833953857422, "learning_rate": 3.216724419078679e-05, "loss": 2.4471, "step": 79895 }, { "epoch": 5.4287267291751595, "grad_norm": 3.938971519470215, "learning_rate": 3.2162997689903526e-05, "loss": 2.2766, "step": 79900 }, { "epoch": 5.429066449245822, "grad_norm": 3.0830955505371094, "learning_rate": 3.215875118902025e-05, "loss": 2.2107, "step": 79905 }, { "epoch": 5.429406169316483, "grad_norm": 3.238452911376953, "learning_rate": 3.2154504688136976e-05, "loss": 2.3378, "step": 79910 }, { "epoch": 5.429745889387145, "grad_norm": 3.5567805767059326, "learning_rate": 3.2150258187253704e-05, "loss": 2.2055, "step": 79915 }, { "epoch": 5.430085609457807, "grad_norm": 3.8229763507843018, "learning_rate": 3.214601168637043e-05, "loss": 1.9191, "step": 79920 }, { "epoch": 5.430425329528468, "grad_norm": 2.484842300415039, "learning_rate": 3.214176518548716e-05, "loss": 2.3089, "step": 79925 }, { "epoch": 5.43076504959913, "grad_norm": 3.654895305633545, "learning_rate": 3.213751868460389e-05, "loss": 1.8706, "step": 79930 }, { "epoch": 5.431104769669792, "grad_norm": 3.633988857269287, "learning_rate": 3.2133272183720616e-05, "loss": 2.0352, "step": 79935 }, { "epoch": 5.4314444897404535, "grad_norm": 3.2763149738311768, "learning_rate": 3.2129025682837344e-05, "loss": 2.4147, "step": 79940 }, { "epoch": 5.4317842098111155, "grad_norm": 3.4816577434539795, "learning_rate": 3.212477918195407e-05, "loss": 2.314, "step": 79945 }, { "epoch": 5.432123929881778, "grad_norm": 2.8040599822998047, "learning_rate": 3.21205326810708e-05, "loss": 2.2109, "step": 79950 }, { "epoch": 5.432463649952439, "grad_norm": 3.50238037109375, "learning_rate": 3.211628618018753e-05, "loss": 2.1885, "step": 79955 }, { "epoch": 5.432803370023101, "grad_norm": 4.0338454246521, "learning_rate": 3.2112039679304256e-05, "loss": 2.2866, "step": 79960 }, { "epoch": 5.433143090093763, "grad_norm": 4.604360580444336, "learning_rate": 3.2107793178420984e-05, "loss": 2.2703, "step": 79965 }, { "epoch": 5.433482810164424, "grad_norm": 3.080369710922241, "learning_rate": 3.210354667753771e-05, "loss": 2.2257, "step": 79970 }, { "epoch": 5.433822530235086, "grad_norm": 3.091874122619629, "learning_rate": 3.209930017665444e-05, "loss": 1.9908, "step": 79975 }, { "epoch": 5.434162250305748, "grad_norm": 3.736400604248047, "learning_rate": 3.209505367577116e-05, "loss": 2.0648, "step": 79980 }, { "epoch": 5.4345019703764095, "grad_norm": 3.3696234226226807, "learning_rate": 3.2090807174887896e-05, "loss": 2.0496, "step": 79985 }, { "epoch": 5.4348416904470715, "grad_norm": 2.7711129188537598, "learning_rate": 3.2086560674004624e-05, "loss": 2.3947, "step": 79990 }, { "epoch": 5.435181410517734, "grad_norm": 3.1916937828063965, "learning_rate": 3.2082314173121345e-05, "loss": 2.1709, "step": 79995 }, { "epoch": 5.435521130588395, "grad_norm": 3.5651192665100098, "learning_rate": 3.207806767223808e-05, "loss": 2.274, "step": 80000 }, { "epoch": 5.435860850659057, "grad_norm": 3.4884958267211914, "learning_rate": 3.207382117135481e-05, "loss": 2.3497, "step": 80005 }, { "epoch": 5.436200570729719, "grad_norm": 3.1178035736083984, "learning_rate": 3.206957467047153e-05, "loss": 2.3191, "step": 80010 }, { "epoch": 5.43654029080038, "grad_norm": 3.276548147201538, "learning_rate": 3.206532816958826e-05, "loss": 2.2864, "step": 80015 }, { "epoch": 5.436880010871042, "grad_norm": 3.907740354537964, "learning_rate": 3.206108166870499e-05, "loss": 2.2753, "step": 80020 }, { "epoch": 5.437219730941704, "grad_norm": 3.4109649658203125, "learning_rate": 3.205683516782171e-05, "loss": 2.0527, "step": 80025 }, { "epoch": 5.4375594510123655, "grad_norm": 4.591080188751221, "learning_rate": 3.205258866693844e-05, "loss": 2.4541, "step": 80030 }, { "epoch": 5.437899171083028, "grad_norm": 2.833592414855957, "learning_rate": 3.2048342166055176e-05, "loss": 2.3039, "step": 80035 }, { "epoch": 5.43823889115369, "grad_norm": 3.022408962249756, "learning_rate": 3.2044095665171904e-05, "loss": 2.2035, "step": 80040 }, { "epoch": 5.438578611224351, "grad_norm": 2.9853687286376953, "learning_rate": 3.2039849164288625e-05, "loss": 2.1377, "step": 80045 }, { "epoch": 5.438918331295013, "grad_norm": 4.0232672691345215, "learning_rate": 3.203560266340535e-05, "loss": 2.2876, "step": 80050 }, { "epoch": 5.439258051365675, "grad_norm": 4.176698207855225, "learning_rate": 3.203135616252209e-05, "loss": 2.4695, "step": 80055 }, { "epoch": 5.439597771436336, "grad_norm": 2.812208414077759, "learning_rate": 3.202710966163881e-05, "loss": 2.2132, "step": 80060 }, { "epoch": 5.439937491506998, "grad_norm": 3.645475149154663, "learning_rate": 3.202286316075554e-05, "loss": 2.0462, "step": 80065 }, { "epoch": 5.44027721157766, "grad_norm": 3.110719919204712, "learning_rate": 3.201861665987227e-05, "loss": 2.0995, "step": 80070 }, { "epoch": 5.4406169316483215, "grad_norm": 3.2108614444732666, "learning_rate": 3.201437015898899e-05, "loss": 2.2035, "step": 80075 }, { "epoch": 5.440956651718984, "grad_norm": 4.183940410614014, "learning_rate": 3.201012365810572e-05, "loss": 2.2628, "step": 80080 }, { "epoch": 5.441296371789646, "grad_norm": 3.350219964981079, "learning_rate": 3.200587715722245e-05, "loss": 2.4978, "step": 80085 }, { "epoch": 5.441636091860307, "grad_norm": 3.1722750663757324, "learning_rate": 3.200163065633918e-05, "loss": 2.1772, "step": 80090 }, { "epoch": 5.441975811930969, "grad_norm": 3.670275926589966, "learning_rate": 3.1997384155455905e-05, "loss": 2.0562, "step": 80095 }, { "epoch": 5.442315532001631, "grad_norm": 3.825338840484619, "learning_rate": 3.199313765457263e-05, "loss": 1.9939, "step": 80100 }, { "epoch": 5.442655252072292, "grad_norm": 2.9638688564300537, "learning_rate": 3.198889115368936e-05, "loss": 2.2719, "step": 80105 }, { "epoch": 5.442994972142954, "grad_norm": 3.433833599090576, "learning_rate": 3.198464465280609e-05, "loss": 2.2658, "step": 80110 }, { "epoch": 5.443334692213616, "grad_norm": 3.201904296875, "learning_rate": 3.198039815192282e-05, "loss": 2.4791, "step": 80115 }, { "epoch": 5.4436744122842775, "grad_norm": 3.320711612701416, "learning_rate": 3.1976151651039545e-05, "loss": 2.2894, "step": 80120 }, { "epoch": 5.44401413235494, "grad_norm": 3.5680737495422363, "learning_rate": 3.197190515015627e-05, "loss": 2.1451, "step": 80125 }, { "epoch": 5.444353852425602, "grad_norm": 3.0320165157318115, "learning_rate": 3.1967658649273e-05, "loss": 2.2745, "step": 80130 }, { "epoch": 5.444693572496263, "grad_norm": 3.308154821395874, "learning_rate": 3.196341214838973e-05, "loss": 2.3122, "step": 80135 }, { "epoch": 5.445033292566925, "grad_norm": 4.372733116149902, "learning_rate": 3.195916564750646e-05, "loss": 2.3124, "step": 80140 }, { "epoch": 5.445373012637587, "grad_norm": 4.883070468902588, "learning_rate": 3.1954919146623185e-05, "loss": 2.3121, "step": 80145 }, { "epoch": 5.445712732708248, "grad_norm": 3.1216349601745605, "learning_rate": 3.1950672645739906e-05, "loss": 2.223, "step": 80150 }, { "epoch": 5.44605245277891, "grad_norm": 3.837557554244995, "learning_rate": 3.194642614485664e-05, "loss": 1.9007, "step": 80155 }, { "epoch": 5.446392172849572, "grad_norm": 3.117147922515869, "learning_rate": 3.194217964397337e-05, "loss": 2.3862, "step": 80160 }, { "epoch": 5.4467318929202335, "grad_norm": 2.232788562774658, "learning_rate": 3.193793314309009e-05, "loss": 2.3806, "step": 80165 }, { "epoch": 5.447071612990896, "grad_norm": 3.983769178390503, "learning_rate": 3.1933686642206825e-05, "loss": 2.3611, "step": 80170 }, { "epoch": 5.447411333061558, "grad_norm": 4.1755290031433105, "learning_rate": 3.192944014132355e-05, "loss": 2.2158, "step": 80175 }, { "epoch": 5.447751053132219, "grad_norm": 3.6913442611694336, "learning_rate": 3.1925193640440275e-05, "loss": 2.2687, "step": 80180 }, { "epoch": 5.448090773202881, "grad_norm": 3.7796528339385986, "learning_rate": 3.1920947139557e-05, "loss": 2.3334, "step": 80185 }, { "epoch": 5.448430493273543, "grad_norm": 2.9217722415924072, "learning_rate": 3.191670063867374e-05, "loss": 2.3867, "step": 80190 }, { "epoch": 5.448770213344204, "grad_norm": 3.560805559158325, "learning_rate": 3.191245413779046e-05, "loss": 2.3844, "step": 80195 }, { "epoch": 5.449109933414866, "grad_norm": 3.7510437965393066, "learning_rate": 3.1908207636907187e-05, "loss": 2.3693, "step": 80200 }, { "epoch": 5.449449653485528, "grad_norm": 2.9071221351623535, "learning_rate": 3.190396113602392e-05, "loss": 2.3951, "step": 80205 }, { "epoch": 5.4497893735561895, "grad_norm": 3.8340227603912354, "learning_rate": 3.189971463514065e-05, "loss": 2.2247, "step": 80210 }, { "epoch": 5.450129093626852, "grad_norm": 3.364318370819092, "learning_rate": 3.189546813425737e-05, "loss": 2.6455, "step": 80215 }, { "epoch": 5.450468813697514, "grad_norm": 3.1249780654907227, "learning_rate": 3.1891221633374105e-05, "loss": 2.2474, "step": 80220 }, { "epoch": 5.450808533768175, "grad_norm": 3.6646134853363037, "learning_rate": 3.188697513249083e-05, "loss": 2.0612, "step": 80225 }, { "epoch": 5.451148253838837, "grad_norm": 4.131343364715576, "learning_rate": 3.1882728631607555e-05, "loss": 2.2414, "step": 80230 }, { "epoch": 5.451487973909499, "grad_norm": 3.1389803886413574, "learning_rate": 3.187848213072428e-05, "loss": 2.2163, "step": 80235 }, { "epoch": 5.45182769398016, "grad_norm": 3.157360792160034, "learning_rate": 3.187423562984102e-05, "loss": 2.1192, "step": 80240 }, { "epoch": 5.452167414050822, "grad_norm": 3.2509448528289795, "learning_rate": 3.186998912895774e-05, "loss": 2.0012, "step": 80245 }, { "epoch": 5.452507134121484, "grad_norm": 3.067899703979492, "learning_rate": 3.186574262807447e-05, "loss": 2.0848, "step": 80250 }, { "epoch": 5.4528468541921455, "grad_norm": 2.962613582611084, "learning_rate": 3.18614961271912e-05, "loss": 2.4114, "step": 80255 }, { "epoch": 5.453186574262808, "grad_norm": 3.920278787612915, "learning_rate": 3.185724962630792e-05, "loss": 2.0354, "step": 80260 }, { "epoch": 5.45352629433347, "grad_norm": 3.586580991744995, "learning_rate": 3.185300312542465e-05, "loss": 2.3454, "step": 80265 }, { "epoch": 5.453866014404131, "grad_norm": 3.629765748977661, "learning_rate": 3.184875662454138e-05, "loss": 2.407, "step": 80270 }, { "epoch": 5.454205734474793, "grad_norm": 3.9733633995056152, "learning_rate": 3.184451012365811e-05, "loss": 2.1184, "step": 80275 }, { "epoch": 5.454545454545454, "grad_norm": 3.671316146850586, "learning_rate": 3.1840263622774835e-05, "loss": 2.2815, "step": 80280 }, { "epoch": 5.454885174616116, "grad_norm": 2.8114895820617676, "learning_rate": 3.183601712189156e-05, "loss": 2.2222, "step": 80285 }, { "epoch": 5.455224894686778, "grad_norm": 4.251669406890869, "learning_rate": 3.183177062100829e-05, "loss": 2.035, "step": 80290 }, { "epoch": 5.4555646147574395, "grad_norm": 3.2597053050994873, "learning_rate": 3.182752412012502e-05, "loss": 2.5428, "step": 80295 }, { "epoch": 5.4559043348281016, "grad_norm": 3.542156219482422, "learning_rate": 3.182327761924175e-05, "loss": 2.4112, "step": 80300 }, { "epoch": 5.456244054898764, "grad_norm": 3.799349069595337, "learning_rate": 3.1819031118358475e-05, "loss": 2.3095, "step": 80305 }, { "epoch": 5.456583774969425, "grad_norm": 3.2222671508789062, "learning_rate": 3.18147846174752e-05, "loss": 2.2344, "step": 80310 }, { "epoch": 5.456923495040087, "grad_norm": 4.034772872924805, "learning_rate": 3.181053811659193e-05, "loss": 2.0829, "step": 80315 }, { "epoch": 5.457263215110749, "grad_norm": 3.178997755050659, "learning_rate": 3.180629161570866e-05, "loss": 2.0829, "step": 80320 }, { "epoch": 5.45760293518141, "grad_norm": 4.436171054840088, "learning_rate": 3.180204511482539e-05, "loss": 2.224, "step": 80325 }, { "epoch": 5.457942655252072, "grad_norm": 3.357321262359619, "learning_rate": 3.1797798613942115e-05, "loss": 2.464, "step": 80330 }, { "epoch": 5.458282375322734, "grad_norm": 2.798217535018921, "learning_rate": 3.1793552113058836e-05, "loss": 2.1744, "step": 80335 }, { "epoch": 5.4586220953933955, "grad_norm": 3.731921672821045, "learning_rate": 3.178930561217557e-05, "loss": 2.1677, "step": 80340 }, { "epoch": 5.458961815464058, "grad_norm": 3.439669370651245, "learning_rate": 3.17850591112923e-05, "loss": 2.3228, "step": 80345 }, { "epoch": 5.45930153553472, "grad_norm": 2.820781946182251, "learning_rate": 3.178081261040902e-05, "loss": 2.141, "step": 80350 }, { "epoch": 5.459641255605381, "grad_norm": 3.902635097503662, "learning_rate": 3.1776566109525755e-05, "loss": 2.4092, "step": 80355 }, { "epoch": 5.459980975676043, "grad_norm": 2.9902420043945312, "learning_rate": 3.177231960864248e-05, "loss": 2.1263, "step": 80360 }, { "epoch": 5.460320695746705, "grad_norm": 3.2569692134857178, "learning_rate": 3.1768073107759204e-05, "loss": 2.5008, "step": 80365 }, { "epoch": 5.460660415817366, "grad_norm": 3.839118719100952, "learning_rate": 3.176382660687593e-05, "loss": 2.2033, "step": 80370 }, { "epoch": 5.461000135888028, "grad_norm": 3.596707582473755, "learning_rate": 3.175958010599267e-05, "loss": 2.228, "step": 80375 }, { "epoch": 5.46133985595869, "grad_norm": 4.421201705932617, "learning_rate": 3.1755333605109395e-05, "loss": 2.2996, "step": 80380 }, { "epoch": 5.4616795760293515, "grad_norm": 3.048936605453491, "learning_rate": 3.1751087104226116e-05, "loss": 2.4078, "step": 80385 }, { "epoch": 5.462019296100014, "grad_norm": 3.856297016143799, "learning_rate": 3.174684060334285e-05, "loss": 2.3723, "step": 80390 }, { "epoch": 5.462359016170676, "grad_norm": 3.5739729404449463, "learning_rate": 3.174259410245958e-05, "loss": 2.2941, "step": 80395 }, { "epoch": 5.462698736241337, "grad_norm": 2.762772560119629, "learning_rate": 3.17383476015763e-05, "loss": 2.3624, "step": 80400 }, { "epoch": 5.463038456311999, "grad_norm": 3.3586061000823975, "learning_rate": 3.173410110069303e-05, "loss": 2.2355, "step": 80405 }, { "epoch": 5.463378176382661, "grad_norm": 3.2193541526794434, "learning_rate": 3.172985459980976e-05, "loss": 2.4183, "step": 80410 }, { "epoch": 5.463717896453322, "grad_norm": 3.8928186893463135, "learning_rate": 3.1725608098926484e-05, "loss": 2.3882, "step": 80415 }, { "epoch": 5.464057616523984, "grad_norm": 3.5486841201782227, "learning_rate": 3.172136159804321e-05, "loss": 2.1843, "step": 80420 }, { "epoch": 5.464397336594646, "grad_norm": 2.5338706970214844, "learning_rate": 3.171711509715995e-05, "loss": 1.9203, "step": 80425 }, { "epoch": 5.4647370566653075, "grad_norm": 3.711792469024658, "learning_rate": 3.171286859627667e-05, "loss": 2.2007, "step": 80430 }, { "epoch": 5.46507677673597, "grad_norm": 2.6680924892425537, "learning_rate": 3.1708622095393396e-05, "loss": 2.3809, "step": 80435 }, { "epoch": 5.465416496806632, "grad_norm": 3.4454877376556396, "learning_rate": 3.1704375594510124e-05, "loss": 2.3636, "step": 80440 }, { "epoch": 5.465756216877293, "grad_norm": 4.1544508934021, "learning_rate": 3.170012909362685e-05, "loss": 2.5855, "step": 80445 }, { "epoch": 5.466095936947955, "grad_norm": 3.990328311920166, "learning_rate": 3.169588259274358e-05, "loss": 2.6387, "step": 80450 }, { "epoch": 5.466435657018617, "grad_norm": 2.7999401092529297, "learning_rate": 3.169163609186031e-05, "loss": 2.1669, "step": 80455 }, { "epoch": 5.466775377089278, "grad_norm": 4.343706130981445, "learning_rate": 3.1687389590977036e-05, "loss": 2.4025, "step": 80460 }, { "epoch": 5.46711509715994, "grad_norm": 4.551077365875244, "learning_rate": 3.1683143090093764e-05, "loss": 2.4349, "step": 80465 }, { "epoch": 5.467454817230602, "grad_norm": 3.966705560684204, "learning_rate": 3.167889658921049e-05, "loss": 2.1017, "step": 80470 }, { "epoch": 5.4677945373012635, "grad_norm": 3.5926756858825684, "learning_rate": 3.167465008832722e-05, "loss": 2.1855, "step": 80475 }, { "epoch": 5.468134257371926, "grad_norm": 3.628648281097412, "learning_rate": 3.167040358744395e-05, "loss": 2.389, "step": 80480 }, { "epoch": 5.468473977442588, "grad_norm": 3.1532223224639893, "learning_rate": 3.1666157086560676e-05, "loss": 2.2393, "step": 80485 }, { "epoch": 5.468813697513249, "grad_norm": 4.067864418029785, "learning_rate": 3.1661910585677404e-05, "loss": 2.2849, "step": 80490 }, { "epoch": 5.469153417583911, "grad_norm": 4.275897026062012, "learning_rate": 3.165766408479413e-05, "loss": 2.152, "step": 80495 }, { "epoch": 5.469493137654573, "grad_norm": 3.7746708393096924, "learning_rate": 3.165341758391086e-05, "loss": 2.0208, "step": 80500 }, { "epoch": 5.469832857725234, "grad_norm": 3.236906051635742, "learning_rate": 3.164917108302758e-05, "loss": 2.3684, "step": 80505 }, { "epoch": 5.470172577795896, "grad_norm": 3.180959939956665, "learning_rate": 3.1644924582144316e-05, "loss": 2.2648, "step": 80510 }, { "epoch": 5.470512297866558, "grad_norm": 4.620659828186035, "learning_rate": 3.1640678081261044e-05, "loss": 2.3039, "step": 80515 }, { "epoch": 5.4708520179372195, "grad_norm": 4.513372421264648, "learning_rate": 3.1636431580377766e-05, "loss": 2.1593, "step": 80520 }, { "epoch": 5.471191738007882, "grad_norm": 3.838669776916504, "learning_rate": 3.16321850794945e-05, "loss": 2.4269, "step": 80525 }, { "epoch": 5.471531458078544, "grad_norm": 2.9070568084716797, "learning_rate": 3.162793857861123e-05, "loss": 2.2527, "step": 80530 }, { "epoch": 5.471871178149205, "grad_norm": 2.8774192333221436, "learning_rate": 3.162369207772795e-05, "loss": 2.1554, "step": 80535 }, { "epoch": 5.472210898219867, "grad_norm": 3.873241901397705, "learning_rate": 3.161944557684468e-05, "loss": 2.2514, "step": 80540 }, { "epoch": 5.472550618290528, "grad_norm": 3.767063617706299, "learning_rate": 3.161519907596141e-05, "loss": 2.354, "step": 80545 }, { "epoch": 5.47289033836119, "grad_norm": 3.074009656906128, "learning_rate": 3.161095257507814e-05, "loss": 2.3175, "step": 80550 }, { "epoch": 5.473230058431852, "grad_norm": 3.075347900390625, "learning_rate": 3.160670607419486e-05, "loss": 2.0862, "step": 80555 }, { "epoch": 5.4735697785025135, "grad_norm": 3.3678805828094482, "learning_rate": 3.1602459573311596e-05, "loss": 2.1851, "step": 80560 }, { "epoch": 5.4739094985731755, "grad_norm": 4.927192687988281, "learning_rate": 3.1598213072428324e-05, "loss": 2.1345, "step": 80565 }, { "epoch": 5.474249218643838, "grad_norm": 2.8026046752929688, "learning_rate": 3.1593966571545046e-05, "loss": 2.3166, "step": 80570 }, { "epoch": 5.474588938714499, "grad_norm": 3.67187762260437, "learning_rate": 3.1589720070661774e-05, "loss": 2.2754, "step": 80575 }, { "epoch": 5.474928658785161, "grad_norm": 3.0864267349243164, "learning_rate": 3.158547356977851e-05, "loss": 2.4328, "step": 80580 }, { "epoch": 5.475268378855823, "grad_norm": 3.7989847660064697, "learning_rate": 3.158122706889523e-05, "loss": 2.2873, "step": 80585 }, { "epoch": 5.475608098926484, "grad_norm": 3.5327420234680176, "learning_rate": 3.157698056801196e-05, "loss": 2.3663, "step": 80590 }, { "epoch": 5.475947818997146, "grad_norm": 3.9594032764434814, "learning_rate": 3.157273406712869e-05, "loss": 2.479, "step": 80595 }, { "epoch": 5.476287539067808, "grad_norm": 2.882197141647339, "learning_rate": 3.1568487566245414e-05, "loss": 2.3642, "step": 80600 }, { "epoch": 5.4766272591384695, "grad_norm": 3.8901314735412598, "learning_rate": 3.156424106536214e-05, "loss": 2.0974, "step": 80605 }, { "epoch": 5.476966979209132, "grad_norm": 2.760374069213867, "learning_rate": 3.1559994564478876e-05, "loss": 2.2186, "step": 80610 }, { "epoch": 5.477306699279794, "grad_norm": 3.6936635971069336, "learning_rate": 3.15557480635956e-05, "loss": 2.3905, "step": 80615 }, { "epoch": 5.477646419350455, "grad_norm": 2.8927624225616455, "learning_rate": 3.1551501562712326e-05, "loss": 2.5276, "step": 80620 }, { "epoch": 5.477986139421117, "grad_norm": 3.6128084659576416, "learning_rate": 3.1547255061829054e-05, "loss": 2.4699, "step": 80625 }, { "epoch": 5.478325859491779, "grad_norm": 3.3083558082580566, "learning_rate": 3.154300856094578e-05, "loss": 2.0874, "step": 80630 }, { "epoch": 5.47866557956244, "grad_norm": 3.046151638031006, "learning_rate": 3.153876206006251e-05, "loss": 2.2403, "step": 80635 }, { "epoch": 5.479005299633102, "grad_norm": 3.4911961555480957, "learning_rate": 3.153451555917924e-05, "loss": 2.3845, "step": 80640 }, { "epoch": 5.479345019703764, "grad_norm": 3.658679723739624, "learning_rate": 3.1530269058295966e-05, "loss": 2.1677, "step": 80645 }, { "epoch": 5.4796847397744255, "grad_norm": 2.935096502304077, "learning_rate": 3.1526022557412694e-05, "loss": 2.2104, "step": 80650 }, { "epoch": 5.480024459845088, "grad_norm": 3.722935438156128, "learning_rate": 3.152177605652942e-05, "loss": 2.3763, "step": 80655 }, { "epoch": 5.48036417991575, "grad_norm": 3.4369113445281982, "learning_rate": 3.151752955564615e-05, "loss": 2.1015, "step": 80660 }, { "epoch": 5.480703899986411, "grad_norm": 3.989908456802368, "learning_rate": 3.151328305476288e-05, "loss": 2.2071, "step": 80665 }, { "epoch": 5.481043620057073, "grad_norm": 2.6160728931427, "learning_rate": 3.1509036553879606e-05, "loss": 2.4018, "step": 80670 }, { "epoch": 5.481383340127735, "grad_norm": 3.4447877407073975, "learning_rate": 3.150479005299633e-05, "loss": 2.2936, "step": 80675 }, { "epoch": 5.481723060198396, "grad_norm": 3.388056993484497, "learning_rate": 3.150054355211306e-05, "loss": 2.2999, "step": 80680 }, { "epoch": 5.482062780269058, "grad_norm": 3.8670895099639893, "learning_rate": 3.149629705122979e-05, "loss": 2.3107, "step": 80685 }, { "epoch": 5.48240250033972, "grad_norm": 2.787968873977661, "learning_rate": 3.149205055034651e-05, "loss": 2.1224, "step": 80690 }, { "epoch": 5.4827422204103815, "grad_norm": 4.157519817352295, "learning_rate": 3.1487804049463246e-05, "loss": 2.3757, "step": 80695 }, { "epoch": 5.483081940481044, "grad_norm": 2.583744764328003, "learning_rate": 3.1483557548579974e-05, "loss": 2.1437, "step": 80700 }, { "epoch": 5.483421660551706, "grad_norm": 2.5168635845184326, "learning_rate": 3.1479311047696695e-05, "loss": 2.1296, "step": 80705 }, { "epoch": 5.483761380622367, "grad_norm": 2.624046802520752, "learning_rate": 3.147506454681343e-05, "loss": 2.2936, "step": 80710 }, { "epoch": 5.484101100693029, "grad_norm": 3.30303692817688, "learning_rate": 3.147081804593016e-05, "loss": 2.0904, "step": 80715 }, { "epoch": 5.484440820763691, "grad_norm": 3.373077869415283, "learning_rate": 3.1466571545046886e-05, "loss": 2.3205, "step": 80720 }, { "epoch": 5.484780540834352, "grad_norm": 3.550492763519287, "learning_rate": 3.146232504416361e-05, "loss": 2.1532, "step": 80725 }, { "epoch": 5.485120260905014, "grad_norm": 2.75701904296875, "learning_rate": 3.145807854328034e-05, "loss": 2.5844, "step": 80730 }, { "epoch": 5.485459980975676, "grad_norm": 4.092405319213867, "learning_rate": 3.145383204239707e-05, "loss": 2.2174, "step": 80735 }, { "epoch": 5.4857997010463375, "grad_norm": 2.8328962326049805, "learning_rate": 3.144958554151379e-05, "loss": 2.0983, "step": 80740 }, { "epoch": 5.486139421117, "grad_norm": 3.2822630405426025, "learning_rate": 3.1445339040630526e-05, "loss": 2.003, "step": 80745 }, { "epoch": 5.486479141187662, "grad_norm": 3.0508782863616943, "learning_rate": 3.1441092539747254e-05, "loss": 2.4036, "step": 80750 }, { "epoch": 5.486818861258323, "grad_norm": 3.3726017475128174, "learning_rate": 3.1436846038863975e-05, "loss": 2.367, "step": 80755 }, { "epoch": 5.487158581328985, "grad_norm": 2.746185779571533, "learning_rate": 3.14325995379807e-05, "loss": 2.156, "step": 80760 }, { "epoch": 5.487498301399647, "grad_norm": 3.811994791030884, "learning_rate": 3.142835303709744e-05, "loss": 2.163, "step": 80765 }, { "epoch": 5.487838021470308, "grad_norm": 3.1539711952209473, "learning_rate": 3.142410653621416e-05, "loss": 2.4593, "step": 80770 }, { "epoch": 5.48817774154097, "grad_norm": 2.933393955230713, "learning_rate": 3.141986003533089e-05, "loss": 2.0973, "step": 80775 }, { "epoch": 5.488517461611632, "grad_norm": 3.7303504943847656, "learning_rate": 3.141561353444762e-05, "loss": 2.1632, "step": 80780 }, { "epoch": 5.4888571816822935, "grad_norm": 3.5125906467437744, "learning_rate": 3.141136703356434e-05, "loss": 2.0496, "step": 80785 }, { "epoch": 5.489196901752956, "grad_norm": 3.079684019088745, "learning_rate": 3.140712053268107e-05, "loss": 2.3353, "step": 80790 }, { "epoch": 5.489536621823618, "grad_norm": 2.6353445053100586, "learning_rate": 3.14028740317978e-05, "loss": 2.4202, "step": 80795 }, { "epoch": 5.489876341894279, "grad_norm": 3.1479601860046387, "learning_rate": 3.139862753091453e-05, "loss": 2.1147, "step": 80800 }, { "epoch": 5.490216061964941, "grad_norm": 3.341089963912964, "learning_rate": 3.1394381030031255e-05, "loss": 2.423, "step": 80805 }, { "epoch": 5.490555782035603, "grad_norm": 4.970668792724609, "learning_rate": 3.139013452914798e-05, "loss": 2.4753, "step": 80810 }, { "epoch": 5.490895502106264, "grad_norm": 3.2196381092071533, "learning_rate": 3.138588802826471e-05, "loss": 2.2897, "step": 80815 }, { "epoch": 5.491235222176926, "grad_norm": 3.7203152179718018, "learning_rate": 3.138164152738144e-05, "loss": 2.0086, "step": 80820 }, { "epoch": 5.491574942247588, "grad_norm": 3.200439453125, "learning_rate": 3.137739502649817e-05, "loss": 2.1617, "step": 80825 }, { "epoch": 5.4919146623182495, "grad_norm": 4.0442328453063965, "learning_rate": 3.1373148525614895e-05, "loss": 2.0313, "step": 80830 }, { "epoch": 5.492254382388912, "grad_norm": 3.0350449085235596, "learning_rate": 3.136890202473162e-05, "loss": 2.0099, "step": 80835 }, { "epoch": 5.492594102459574, "grad_norm": 3.188694953918457, "learning_rate": 3.136465552384835e-05, "loss": 2.4884, "step": 80840 }, { "epoch": 5.492933822530235, "grad_norm": 3.500767946243286, "learning_rate": 3.136040902296508e-05, "loss": 2.1913, "step": 80845 }, { "epoch": 5.493273542600897, "grad_norm": 2.865908622741699, "learning_rate": 3.135616252208181e-05, "loss": 2.167, "step": 80850 }, { "epoch": 5.493613262671559, "grad_norm": 2.6483755111694336, "learning_rate": 3.1351916021198535e-05, "loss": 2.1657, "step": 80855 }, { "epoch": 5.49395298274222, "grad_norm": 2.6081106662750244, "learning_rate": 3.1347669520315256e-05, "loss": 2.5234, "step": 80860 }, { "epoch": 5.494292702812882, "grad_norm": 2.9539859294891357, "learning_rate": 3.134342301943199e-05, "loss": 2.275, "step": 80865 }, { "epoch": 5.494632422883544, "grad_norm": 4.116025447845459, "learning_rate": 3.133917651854872e-05, "loss": 2.3354, "step": 80870 }, { "epoch": 5.4949721429542056, "grad_norm": 3.7128489017486572, "learning_rate": 3.133493001766544e-05, "loss": 2.4477, "step": 80875 }, { "epoch": 5.495311863024868, "grad_norm": 4.2926459312438965, "learning_rate": 3.1330683516782175e-05, "loss": 2.2955, "step": 80880 }, { "epoch": 5.49565158309553, "grad_norm": 3.2689931392669678, "learning_rate": 3.13264370158989e-05, "loss": 2.0758, "step": 80885 }, { "epoch": 5.495991303166191, "grad_norm": 3.4687881469726562, "learning_rate": 3.132219051501563e-05, "loss": 2.3794, "step": 80890 }, { "epoch": 5.496331023236853, "grad_norm": 3.3032233715057373, "learning_rate": 3.131794401413235e-05, "loss": 2.142, "step": 80895 }, { "epoch": 5.496670743307515, "grad_norm": 2.9206385612487793, "learning_rate": 3.131369751324909e-05, "loss": 2.0561, "step": 80900 }, { "epoch": 5.497010463378176, "grad_norm": 4.25258207321167, "learning_rate": 3.1309451012365815e-05, "loss": 2.5231, "step": 80905 }, { "epoch": 5.497350183448838, "grad_norm": 3.7995717525482178, "learning_rate": 3.1305204511482537e-05, "loss": 2.075, "step": 80910 }, { "epoch": 5.4976899035195, "grad_norm": 3.4858040809631348, "learning_rate": 3.130095801059927e-05, "loss": 2.5347, "step": 80915 }, { "epoch": 5.498029623590162, "grad_norm": 2.8359110355377197, "learning_rate": 3.1296711509716e-05, "loss": 2.3777, "step": 80920 }, { "epoch": 5.498369343660824, "grad_norm": 3.4243264198303223, "learning_rate": 3.129246500883272e-05, "loss": 2.1989, "step": 80925 }, { "epoch": 5.498709063731486, "grad_norm": 3.261683702468872, "learning_rate": 3.128821850794945e-05, "loss": 2.351, "step": 80930 }, { "epoch": 5.499048783802147, "grad_norm": 3.8300185203552246, "learning_rate": 3.128397200706618e-05, "loss": 2.4687, "step": 80935 }, { "epoch": 5.499388503872809, "grad_norm": 2.809368133544922, "learning_rate": 3.1279725506182905e-05, "loss": 2.1683, "step": 80940 }, { "epoch": 5.499728223943471, "grad_norm": 3.170969247817993, "learning_rate": 3.127547900529963e-05, "loss": 2.2893, "step": 80945 }, { "epoch": 5.500067944014132, "grad_norm": 3.7064008712768555, "learning_rate": 3.127123250441637e-05, "loss": 2.179, "step": 80950 }, { "epoch": 5.500407664084794, "grad_norm": 3.4183051586151123, "learning_rate": 3.126698600353309e-05, "loss": 2.3869, "step": 80955 }, { "epoch": 5.500747384155456, "grad_norm": 4.297932147979736, "learning_rate": 3.126273950264982e-05, "loss": 2.3973, "step": 80960 }, { "epoch": 5.501087104226118, "grad_norm": 3.22407865524292, "learning_rate": 3.1258493001766545e-05, "loss": 2.0057, "step": 80965 }, { "epoch": 5.50142682429678, "grad_norm": 3.0190799236297607, "learning_rate": 3.125424650088327e-05, "loss": 2.5412, "step": 80970 }, { "epoch": 5.501766544367442, "grad_norm": 3.6393990516662598, "learning_rate": 3.125e-05, "loss": 1.9743, "step": 80975 }, { "epoch": 5.502106264438103, "grad_norm": 3.0561859607696533, "learning_rate": 3.124575349911673e-05, "loss": 2.3308, "step": 80980 }, { "epoch": 5.502445984508765, "grad_norm": 4.111969470977783, "learning_rate": 3.124150699823346e-05, "loss": 2.2141, "step": 80985 }, { "epoch": 5.502785704579426, "grad_norm": 3.9868834018707275, "learning_rate": 3.1237260497350185e-05, "loss": 2.1545, "step": 80990 }, { "epoch": 5.503125424650088, "grad_norm": 3.3560895919799805, "learning_rate": 3.123301399646691e-05, "loss": 2.1247, "step": 80995 }, { "epoch": 5.50346514472075, "grad_norm": 2.978876829147339, "learning_rate": 3.122876749558364e-05, "loss": 2.322, "step": 81000 }, { "epoch": 5.5038048647914115, "grad_norm": 2.8894152641296387, "learning_rate": 3.122452099470037e-05, "loss": 2.3022, "step": 81005 }, { "epoch": 5.504144584862074, "grad_norm": 3.1363489627838135, "learning_rate": 3.12202744938171e-05, "loss": 2.0418, "step": 81010 }, { "epoch": 5.504484304932736, "grad_norm": 3.9975132942199707, "learning_rate": 3.1216027992933825e-05, "loss": 2.1614, "step": 81015 }, { "epoch": 5.504824025003397, "grad_norm": 3.2220053672790527, "learning_rate": 3.121178149205055e-05, "loss": 2.1668, "step": 81020 }, { "epoch": 5.505163745074059, "grad_norm": 3.3264682292938232, "learning_rate": 3.120753499116728e-05, "loss": 2.2747, "step": 81025 }, { "epoch": 5.505503465144721, "grad_norm": 3.07004451751709, "learning_rate": 3.1203288490284e-05, "loss": 2.2058, "step": 81030 }, { "epoch": 5.505843185215382, "grad_norm": 3.35861873626709, "learning_rate": 3.119904198940074e-05, "loss": 2.2095, "step": 81035 }, { "epoch": 5.506182905286044, "grad_norm": 3.4052374362945557, "learning_rate": 3.1194795488517465e-05, "loss": 2.3394, "step": 81040 }, { "epoch": 5.506522625356706, "grad_norm": 4.196414947509766, "learning_rate": 3.1190548987634186e-05, "loss": 2.3618, "step": 81045 }, { "epoch": 5.5068623454273675, "grad_norm": 3.0734615325927734, "learning_rate": 3.118630248675092e-05, "loss": 2.3149, "step": 81050 }, { "epoch": 5.50720206549803, "grad_norm": 3.718838930130005, "learning_rate": 3.118205598586765e-05, "loss": 2.1892, "step": 81055 }, { "epoch": 5.507541785568692, "grad_norm": 3.7061612606048584, "learning_rate": 3.117780948498437e-05, "loss": 2.115, "step": 81060 }, { "epoch": 5.507881505639353, "grad_norm": 2.9449989795684814, "learning_rate": 3.11735629841011e-05, "loss": 2.24, "step": 81065 }, { "epoch": 5.508221225710015, "grad_norm": 2.8215370178222656, "learning_rate": 3.116931648321783e-05, "loss": 2.2745, "step": 81070 }, { "epoch": 5.508560945780677, "grad_norm": 3.607481002807617, "learning_rate": 3.116506998233456e-05, "loss": 2.2401, "step": 81075 }, { "epoch": 5.508900665851338, "grad_norm": 3.8391072750091553, "learning_rate": 3.116082348145128e-05, "loss": 2.1079, "step": 81080 }, { "epoch": 5.509240385922, "grad_norm": 3.8339080810546875, "learning_rate": 3.115657698056802e-05, "loss": 2.3522, "step": 81085 }, { "epoch": 5.509580105992662, "grad_norm": 3.3914012908935547, "learning_rate": 3.1152330479684745e-05, "loss": 2.3377, "step": 81090 }, { "epoch": 5.5099198260633235, "grad_norm": 3.9813077449798584, "learning_rate": 3.1148083978801466e-05, "loss": 2.2298, "step": 81095 }, { "epoch": 5.510259546133986, "grad_norm": 3.690661907196045, "learning_rate": 3.1143837477918194e-05, "loss": 2.4088, "step": 81100 }, { "epoch": 5.510599266204648, "grad_norm": 4.076932907104492, "learning_rate": 3.113959097703493e-05, "loss": 2.2397, "step": 81105 }, { "epoch": 5.510938986275309, "grad_norm": 3.4750311374664307, "learning_rate": 3.113534447615165e-05, "loss": 2.555, "step": 81110 }, { "epoch": 5.511278706345971, "grad_norm": 2.9273362159729004, "learning_rate": 3.113109797526838e-05, "loss": 2.033, "step": 81115 }, { "epoch": 5.511618426416633, "grad_norm": 2.8235039710998535, "learning_rate": 3.112685147438511e-05, "loss": 2.1928, "step": 81120 }, { "epoch": 5.511958146487294, "grad_norm": 3.3606655597686768, "learning_rate": 3.1122604973501834e-05, "loss": 2.1059, "step": 81125 }, { "epoch": 5.512297866557956, "grad_norm": 3.8204190731048584, "learning_rate": 3.111835847261856e-05, "loss": 2.3838, "step": 81130 }, { "epoch": 5.512637586628618, "grad_norm": 2.7759859561920166, "learning_rate": 3.11141119717353e-05, "loss": 2.2508, "step": 81135 }, { "epoch": 5.5129773066992795, "grad_norm": 4.0998053550720215, "learning_rate": 3.110986547085202e-05, "loss": 2.1693, "step": 81140 }, { "epoch": 5.513317026769942, "grad_norm": 4.454463005065918, "learning_rate": 3.1105618969968746e-05, "loss": 2.4334, "step": 81145 }, { "epoch": 5.513656746840604, "grad_norm": 3.4678285121917725, "learning_rate": 3.1101372469085474e-05, "loss": 2.2992, "step": 81150 }, { "epoch": 5.513996466911265, "grad_norm": 2.8887338638305664, "learning_rate": 3.10971259682022e-05, "loss": 2.1654, "step": 81155 }, { "epoch": 5.514336186981927, "grad_norm": 3.459411859512329, "learning_rate": 3.109287946731893e-05, "loss": 2.6682, "step": 81160 }, { "epoch": 5.514675907052589, "grad_norm": 4.075128078460693, "learning_rate": 3.108863296643566e-05, "loss": 2.4428, "step": 81165 }, { "epoch": 5.51501562712325, "grad_norm": 4.281641483306885, "learning_rate": 3.1084386465552386e-05, "loss": 2.3566, "step": 81170 }, { "epoch": 5.515355347193912, "grad_norm": 3.264399290084839, "learning_rate": 3.1080139964669114e-05, "loss": 2.1602, "step": 81175 }, { "epoch": 5.515695067264574, "grad_norm": 3.9885480403900146, "learning_rate": 3.107589346378584e-05, "loss": 2.2906, "step": 81180 }, { "epoch": 5.516034787335236, "grad_norm": 3.5153801441192627, "learning_rate": 3.107164696290257e-05, "loss": 2.2889, "step": 81185 }, { "epoch": 5.516374507405898, "grad_norm": 3.2685577869415283, "learning_rate": 3.10674004620193e-05, "loss": 2.3816, "step": 81190 }, { "epoch": 5.516714227476559, "grad_norm": 3.521458625793457, "learning_rate": 3.1063153961136026e-05, "loss": 2.2065, "step": 81195 }, { "epoch": 5.517053947547221, "grad_norm": 3.1480307579040527, "learning_rate": 3.105890746025275e-05, "loss": 2.3932, "step": 81200 }, { "epoch": 5.517393667617883, "grad_norm": 3.287282705307007, "learning_rate": 3.105466095936948e-05, "loss": 2.1231, "step": 81205 }, { "epoch": 5.517733387688544, "grad_norm": 3.8978171348571777, "learning_rate": 3.105041445848621e-05, "loss": 2.2496, "step": 81210 }, { "epoch": 5.518073107759206, "grad_norm": 3.3431215286254883, "learning_rate": 3.104616795760293e-05, "loss": 2.2634, "step": 81215 }, { "epoch": 5.518412827829868, "grad_norm": 3.1413259506225586, "learning_rate": 3.1041921456719666e-05, "loss": 2.3308, "step": 81220 }, { "epoch": 5.5187525479005295, "grad_norm": 3.225282669067383, "learning_rate": 3.1037674955836394e-05, "loss": 2.2336, "step": 81225 }, { "epoch": 5.519092267971192, "grad_norm": 3.6865580081939697, "learning_rate": 3.1033428454953115e-05, "loss": 2.2741, "step": 81230 }, { "epoch": 5.519431988041854, "grad_norm": 3.1254963874816895, "learning_rate": 3.102918195406985e-05, "loss": 2.3835, "step": 81235 }, { "epoch": 5.519771708112515, "grad_norm": 3.227719783782959, "learning_rate": 3.102493545318658e-05, "loss": 2.4537, "step": 81240 }, { "epoch": 5.520111428183177, "grad_norm": 2.8534111976623535, "learning_rate": 3.1020688952303306e-05, "loss": 2.4199, "step": 81245 }, { "epoch": 5.520451148253839, "grad_norm": 3.3710858821868896, "learning_rate": 3.101644245142003e-05, "loss": 2.6574, "step": 81250 }, { "epoch": 5.5207908683245, "grad_norm": 3.937462329864502, "learning_rate": 3.101219595053676e-05, "loss": 2.204, "step": 81255 }, { "epoch": 5.521130588395162, "grad_norm": 3.7632319927215576, "learning_rate": 3.100794944965349e-05, "loss": 2.4934, "step": 81260 }, { "epoch": 5.521470308465824, "grad_norm": 3.6565377712249756, "learning_rate": 3.100370294877021e-05, "loss": 2.1629, "step": 81265 }, { "epoch": 5.5218100285364855, "grad_norm": 3.8038036823272705, "learning_rate": 3.0999456447886946e-05, "loss": 2.1907, "step": 81270 }, { "epoch": 5.522149748607148, "grad_norm": 4.076965808868408, "learning_rate": 3.0995209947003674e-05, "loss": 2.2195, "step": 81275 }, { "epoch": 5.52248946867781, "grad_norm": 3.934568166732788, "learning_rate": 3.0990963446120396e-05, "loss": 2.3992, "step": 81280 }, { "epoch": 5.522829188748471, "grad_norm": 3.4613325595855713, "learning_rate": 3.0986716945237124e-05, "loss": 2.3864, "step": 81285 }, { "epoch": 5.523168908819133, "grad_norm": 3.854975700378418, "learning_rate": 3.098247044435386e-05, "loss": 2.4882, "step": 81290 }, { "epoch": 5.523508628889795, "grad_norm": 3.390887975692749, "learning_rate": 3.097822394347058e-05, "loss": 2.0222, "step": 81295 }, { "epoch": 5.523848348960456, "grad_norm": 3.140139579772949, "learning_rate": 3.097397744258731e-05, "loss": 2.1348, "step": 81300 }, { "epoch": 5.524188069031118, "grad_norm": 2.8881311416625977, "learning_rate": 3.096973094170404e-05, "loss": 2.3299, "step": 81305 }, { "epoch": 5.52452778910178, "grad_norm": 4.130656719207764, "learning_rate": 3.0965484440820764e-05, "loss": 2.4549, "step": 81310 }, { "epoch": 5.5248675091724415, "grad_norm": 3.3376731872558594, "learning_rate": 3.096123793993749e-05, "loss": 2.3204, "step": 81315 }, { "epoch": 5.525207229243104, "grad_norm": 4.064722061157227, "learning_rate": 3.095699143905422e-05, "loss": 2.0352, "step": 81320 }, { "epoch": 5.525546949313766, "grad_norm": 3.284304141998291, "learning_rate": 3.095274493817095e-05, "loss": 2.1769, "step": 81325 }, { "epoch": 5.525886669384427, "grad_norm": 2.6606597900390625, "learning_rate": 3.0948498437287676e-05, "loss": 2.1011, "step": 81330 }, { "epoch": 5.526226389455089, "grad_norm": 3.554607391357422, "learning_rate": 3.0944251936404404e-05, "loss": 2.5197, "step": 81335 }, { "epoch": 5.526566109525751, "grad_norm": 3.2443344593048096, "learning_rate": 3.094000543552113e-05, "loss": 2.3854, "step": 81340 }, { "epoch": 5.526905829596412, "grad_norm": 3.916511297225952, "learning_rate": 3.093575893463786e-05, "loss": 2.2042, "step": 81345 }, { "epoch": 5.527245549667074, "grad_norm": 3.434589147567749, "learning_rate": 3.093151243375459e-05, "loss": 2.324, "step": 81350 }, { "epoch": 5.527585269737736, "grad_norm": 3.1581194400787354, "learning_rate": 3.0927265932871316e-05, "loss": 2.3971, "step": 81355 }, { "epoch": 5.5279249898083975, "grad_norm": 2.959266424179077, "learning_rate": 3.0923019431988044e-05, "loss": 1.9019, "step": 81360 }, { "epoch": 5.52826470987906, "grad_norm": 3.4819397926330566, "learning_rate": 3.091877293110477e-05, "loss": 2.3533, "step": 81365 }, { "epoch": 5.528604429949722, "grad_norm": 3.9806294441223145, "learning_rate": 3.09145264302215e-05, "loss": 2.0826, "step": 81370 }, { "epoch": 5.528944150020383, "grad_norm": 3.097538709640503, "learning_rate": 3.091027992933823e-05, "loss": 2.2715, "step": 81375 }, { "epoch": 5.529283870091045, "grad_norm": 3.396742820739746, "learning_rate": 3.0906033428454956e-05, "loss": 2.2817, "step": 81380 }, { "epoch": 5.529623590161707, "grad_norm": 3.9318642616271973, "learning_rate": 3.090178692757168e-05, "loss": 2.3017, "step": 81385 }, { "epoch": 5.529963310232368, "grad_norm": 3.0997426509857178, "learning_rate": 3.089754042668841e-05, "loss": 2.2918, "step": 81390 }, { "epoch": 5.53030303030303, "grad_norm": 4.025572299957275, "learning_rate": 3.089329392580514e-05, "loss": 2.0186, "step": 81395 }, { "epoch": 5.530642750373692, "grad_norm": 3.757840633392334, "learning_rate": 3.088904742492186e-05, "loss": 2.2551, "step": 81400 }, { "epoch": 5.5309824704443535, "grad_norm": 2.7482428550720215, "learning_rate": 3.0884800924038596e-05, "loss": 2.2653, "step": 81405 }, { "epoch": 5.531322190515016, "grad_norm": 3.607295513153076, "learning_rate": 3.0880554423155324e-05, "loss": 2.1838, "step": 81410 }, { "epoch": 5.531661910585678, "grad_norm": 3.5712950229644775, "learning_rate": 3.087630792227205e-05, "loss": 2.4588, "step": 81415 }, { "epoch": 5.532001630656339, "grad_norm": 3.1897072792053223, "learning_rate": 3.087206142138877e-05, "loss": 2.2048, "step": 81420 }, { "epoch": 5.532341350727001, "grad_norm": 3.164595603942871, "learning_rate": 3.086781492050551e-05, "loss": 2.1484, "step": 81425 }, { "epoch": 5.532681070797663, "grad_norm": 3.0850675106048584, "learning_rate": 3.0863568419622236e-05, "loss": 2.2112, "step": 81430 }, { "epoch": 5.533020790868324, "grad_norm": 3.0935521125793457, "learning_rate": 3.085932191873896e-05, "loss": 2.2541, "step": 81435 }, { "epoch": 5.533360510938986, "grad_norm": 3.490328073501587, "learning_rate": 3.085507541785569e-05, "loss": 2.3191, "step": 81440 }, { "epoch": 5.533700231009648, "grad_norm": 4.23640775680542, "learning_rate": 3.085082891697242e-05, "loss": 2.4331, "step": 81445 }, { "epoch": 5.5340399510803095, "grad_norm": 3.1967926025390625, "learning_rate": 3.084658241608914e-05, "loss": 2.0728, "step": 81450 }, { "epoch": 5.534379671150972, "grad_norm": 3.41143536567688, "learning_rate": 3.084233591520587e-05, "loss": 2.2293, "step": 81455 }, { "epoch": 5.534719391221634, "grad_norm": 3.3902575969696045, "learning_rate": 3.0838089414322604e-05, "loss": 2.1216, "step": 81460 }, { "epoch": 5.535059111292295, "grad_norm": 4.151628017425537, "learning_rate": 3.0833842913439325e-05, "loss": 2.3781, "step": 81465 }, { "epoch": 5.535398831362957, "grad_norm": 3.3997461795806885, "learning_rate": 3.082959641255605e-05, "loss": 2.2772, "step": 81470 }, { "epoch": 5.535738551433619, "grad_norm": 3.080498218536377, "learning_rate": 3.082534991167279e-05, "loss": 2.437, "step": 81475 }, { "epoch": 5.53607827150428, "grad_norm": 3.7054789066314697, "learning_rate": 3.082110341078951e-05, "loss": 2.2791, "step": 81480 }, { "epoch": 5.536417991574942, "grad_norm": 3.866863489151001, "learning_rate": 3.081685690990624e-05, "loss": 2.1706, "step": 81485 }, { "epoch": 5.536757711645604, "grad_norm": 3.206382989883423, "learning_rate": 3.0812610409022965e-05, "loss": 2.0946, "step": 81490 }, { "epoch": 5.537097431716266, "grad_norm": 3.543323516845703, "learning_rate": 3.080836390813969e-05, "loss": 2.4452, "step": 81495 }, { "epoch": 5.537437151786928, "grad_norm": 3.229785203933716, "learning_rate": 3.080411740725642e-05, "loss": 2.1724, "step": 81500 }, { "epoch": 5.53777687185759, "grad_norm": 3.0788300037384033, "learning_rate": 3.079987090637315e-05, "loss": 2.6195, "step": 81505 }, { "epoch": 5.538116591928251, "grad_norm": 3.742370128631592, "learning_rate": 3.079562440548988e-05, "loss": 2.0379, "step": 81510 }, { "epoch": 5.538456311998913, "grad_norm": 4.169976234436035, "learning_rate": 3.0791377904606605e-05, "loss": 2.2543, "step": 81515 }, { "epoch": 5.538796032069575, "grad_norm": 2.917205572128296, "learning_rate": 3.078713140372333e-05, "loss": 2.2171, "step": 81520 }, { "epoch": 5.539135752140236, "grad_norm": 2.9017841815948486, "learning_rate": 3.078288490284006e-05, "loss": 2.3869, "step": 81525 }, { "epoch": 5.539475472210898, "grad_norm": 3.529768705368042, "learning_rate": 3.077863840195679e-05, "loss": 2.282, "step": 81530 }, { "epoch": 5.53981519228156, "grad_norm": 3.497864007949829, "learning_rate": 3.077439190107352e-05, "loss": 2.2997, "step": 81535 }, { "epoch": 5.540154912352222, "grad_norm": 3.75103497505188, "learning_rate": 3.0770145400190245e-05, "loss": 2.3425, "step": 81540 }, { "epoch": 5.540494632422884, "grad_norm": 2.524653911590576, "learning_rate": 3.076589889930697e-05, "loss": 2.1687, "step": 81545 }, { "epoch": 5.540834352493546, "grad_norm": NaN, "learning_rate": 3.0762501698600356e-05, "loss": 2.3883, "step": 81550 }, { "epoch": 5.541174072564207, "grad_norm": 4.055788040161133, "learning_rate": 3.0758255197717084e-05, "loss": 2.4104, "step": 81555 }, { "epoch": 5.541513792634869, "grad_norm": 3.0752267837524414, "learning_rate": 3.0754008696833805e-05, "loss": 2.3703, "step": 81560 }, { "epoch": 5.541853512705531, "grad_norm": 3.109445810317993, "learning_rate": 3.074976219595054e-05, "loss": 1.9416, "step": 81565 }, { "epoch": 5.542193232776192, "grad_norm": 4.416130542755127, "learning_rate": 3.074551569506727e-05, "loss": 2.2688, "step": 81570 }, { "epoch": 5.542532952846854, "grad_norm": 4.250497341156006, "learning_rate": 3.074126919418399e-05, "loss": 2.0471, "step": 81575 }, { "epoch": 5.542872672917516, "grad_norm": 3.624818801879883, "learning_rate": 3.0737022693300724e-05, "loss": 2.4191, "step": 81580 }, { "epoch": 5.543212392988178, "grad_norm": 3.678736925125122, "learning_rate": 3.073277619241745e-05, "loss": 2.415, "step": 81585 }, { "epoch": 5.54355211305884, "grad_norm": 4.026500701904297, "learning_rate": 3.072852969153417e-05, "loss": 2.2582, "step": 81590 }, { "epoch": 5.543891833129502, "grad_norm": 3.587214469909668, "learning_rate": 3.072428319065091e-05, "loss": 2.2803, "step": 81595 }, { "epoch": 5.544231553200163, "grad_norm": 2.8809852600097656, "learning_rate": 3.0720036689767636e-05, "loss": 2.2782, "step": 81600 }, { "epoch": 5.544571273270825, "grad_norm": 3.115910053253174, "learning_rate": 3.071579018888436e-05, "loss": 2.3335, "step": 81605 }, { "epoch": 5.544910993341487, "grad_norm": 2.894397735595703, "learning_rate": 3.0711543688001085e-05, "loss": 2.4571, "step": 81610 }, { "epoch": 5.545250713412148, "grad_norm": 3.1425466537475586, "learning_rate": 3.070729718711782e-05, "loss": 2.1551, "step": 81615 }, { "epoch": 5.54559043348281, "grad_norm": 3.782210111618042, "learning_rate": 3.070305068623455e-05, "loss": 2.2509, "step": 81620 }, { "epoch": 5.545930153553472, "grad_norm": 3.1659445762634277, "learning_rate": 3.069880418535127e-05, "loss": 2.3938, "step": 81625 }, { "epoch": 5.546269873624134, "grad_norm": 3.6814680099487305, "learning_rate": 3.0694557684468004e-05, "loss": 2.1602, "step": 81630 }, { "epoch": 5.546609593694796, "grad_norm": 2.7654714584350586, "learning_rate": 3.069031118358473e-05, "loss": 2.1753, "step": 81635 }, { "epoch": 5.546949313765458, "grad_norm": 3.5924723148345947, "learning_rate": 3.068606468270145e-05, "loss": 1.8278, "step": 81640 }, { "epoch": 5.547289033836119, "grad_norm": 3.12153959274292, "learning_rate": 3.068181818181818e-05, "loss": 2.2328, "step": 81645 }, { "epoch": 5.547628753906781, "grad_norm": 2.7133636474609375, "learning_rate": 3.0677571680934916e-05, "loss": 2.5395, "step": 81650 }, { "epoch": 5.547968473977443, "grad_norm": 3.848593235015869, "learning_rate": 3.067332518005164e-05, "loss": 1.9913, "step": 81655 }, { "epoch": 5.548308194048104, "grad_norm": 2.755815267562866, "learning_rate": 3.0669078679168365e-05, "loss": 2.3271, "step": 81660 }, { "epoch": 5.548647914118766, "grad_norm": 3.295229434967041, "learning_rate": 3.06648321782851e-05, "loss": 2.225, "step": 81665 }, { "epoch": 5.5489876341894275, "grad_norm": 4.0558247566223145, "learning_rate": 3.066058567740182e-05, "loss": 2.3177, "step": 81670 }, { "epoch": 5.54932735426009, "grad_norm": 2.973400354385376, "learning_rate": 3.0657188476695203e-05, "loss": 1.9838, "step": 81675 }, { "epoch": 5.549667074330752, "grad_norm": 4.5270185470581055, "learning_rate": 3.065294197581193e-05, "loss": 2.2349, "step": 81680 }, { "epoch": 5.550006794401413, "grad_norm": 2.7666258811950684, "learning_rate": 3.064869547492866e-05, "loss": 2.2041, "step": 81685 }, { "epoch": 5.550346514472075, "grad_norm": 2.7870430946350098, "learning_rate": 3.064444897404539e-05, "loss": 2.3107, "step": 81690 }, { "epoch": 5.550686234542737, "grad_norm": 4.02590799331665, "learning_rate": 3.0640202473162115e-05, "loss": 2.1614, "step": 81695 }, { "epoch": 5.551025954613398, "grad_norm": 3.517014265060425, "learning_rate": 3.0635955972278843e-05, "loss": 2.3854, "step": 81700 }, { "epoch": 5.55136567468406, "grad_norm": 3.491973876953125, "learning_rate": 3.063170947139557e-05, "loss": 2.3683, "step": 81705 }, { "epoch": 5.551705394754722, "grad_norm": 3.8161582946777344, "learning_rate": 3.06274629705123e-05, "loss": 1.9378, "step": 81710 }, { "epoch": 5.5520451148253835, "grad_norm": 3.4826130867004395, "learning_rate": 3.062321646962903e-05, "loss": 2.367, "step": 81715 }, { "epoch": 5.552384834896046, "grad_norm": 4.854851722717285, "learning_rate": 3.0618969968745755e-05, "loss": 2.2363, "step": 81720 }, { "epoch": 5.552724554966708, "grad_norm": 4.060476303100586, "learning_rate": 3.0614723467862484e-05, "loss": 2.3478, "step": 81725 }, { "epoch": 5.553064275037369, "grad_norm": 4.066437244415283, "learning_rate": 3.061047696697921e-05, "loss": 2.0748, "step": 81730 }, { "epoch": 5.553403995108031, "grad_norm": 4.075836181640625, "learning_rate": 3.060623046609594e-05, "loss": 2.2362, "step": 81735 }, { "epoch": 5.553743715178693, "grad_norm": 3.7662436962127686, "learning_rate": 3.060198396521267e-05, "loss": 2.5034, "step": 81740 }, { "epoch": 5.554083435249354, "grad_norm": 3.053572416305542, "learning_rate": 3.0597737464329396e-05, "loss": 2.3205, "step": 81745 }, { "epoch": 5.554423155320016, "grad_norm": 4.111273288726807, "learning_rate": 3.059349096344612e-05, "loss": 1.9398, "step": 81750 }, { "epoch": 5.554762875390678, "grad_norm": 2.910993814468384, "learning_rate": 3.058924446256285e-05, "loss": 2.1972, "step": 81755 }, { "epoch": 5.5551025954613396, "grad_norm": 3.4414005279541016, "learning_rate": 3.058499796167958e-05, "loss": 2.331, "step": 81760 }, { "epoch": 5.555442315532002, "grad_norm": 3.943328619003296, "learning_rate": 3.05807514607963e-05, "loss": 2.0722, "step": 81765 }, { "epoch": 5.555782035602664, "grad_norm": 3.1974637508392334, "learning_rate": 3.0576504959913036e-05, "loss": 2.2289, "step": 81770 }, { "epoch": 5.556121755673325, "grad_norm": 4.7548747062683105, "learning_rate": 3.0572258459029764e-05, "loss": 2.2345, "step": 81775 }, { "epoch": 5.556461475743987, "grad_norm": 4.215510368347168, "learning_rate": 3.0568011958146485e-05, "loss": 2.2343, "step": 81780 }, { "epoch": 5.556801195814649, "grad_norm": 3.17508602142334, "learning_rate": 3.056376545726321e-05, "loss": 2.3376, "step": 81785 }, { "epoch": 5.55714091588531, "grad_norm": 5.560878276824951, "learning_rate": 3.055951895637995e-05, "loss": 2.2693, "step": 81790 }, { "epoch": 5.557480635955972, "grad_norm": 2.687903881072998, "learning_rate": 3.055527245549667e-05, "loss": 2.275, "step": 81795 }, { "epoch": 5.557820356026634, "grad_norm": 4.014063835144043, "learning_rate": 3.05510259546134e-05, "loss": 2.2964, "step": 81800 }, { "epoch": 5.558160076097296, "grad_norm": 2.9851765632629395, "learning_rate": 3.054677945373013e-05, "loss": 2.5206, "step": 81805 }, { "epoch": 5.558499796167958, "grad_norm": 3.4836390018463135, "learning_rate": 3.054253295284685e-05, "loss": 2.1858, "step": 81810 }, { "epoch": 5.55883951623862, "grad_norm": 3.033008575439453, "learning_rate": 3.053828645196358e-05, "loss": 2.3868, "step": 81815 }, { "epoch": 5.559179236309281, "grad_norm": 3.7117068767547607, "learning_rate": 3.053403995108031e-05, "loss": 2.2726, "step": 81820 }, { "epoch": 5.559518956379943, "grad_norm": 3.261653184890747, "learning_rate": 3.0529793450197044e-05, "loss": 2.2138, "step": 81825 }, { "epoch": 5.559858676450605, "grad_norm": 3.0873849391937256, "learning_rate": 3.0525546949313765e-05, "loss": 2.4198, "step": 81830 }, { "epoch": 5.560198396521266, "grad_norm": 3.2818169593811035, "learning_rate": 3.052130044843049e-05, "loss": 2.3129, "step": 81835 }, { "epoch": 5.560538116591928, "grad_norm": 3.4794929027557373, "learning_rate": 3.0517053947547224e-05, "loss": 2.353, "step": 81840 }, { "epoch": 5.56087783666259, "grad_norm": 3.772695302963257, "learning_rate": 3.051280744666395e-05, "loss": 2.3731, "step": 81845 }, { "epoch": 5.561217556733252, "grad_norm": 3.1544528007507324, "learning_rate": 3.050856094578068e-05, "loss": 2.2042, "step": 81850 }, { "epoch": 5.561557276803914, "grad_norm": 3.5600063800811768, "learning_rate": 3.050431444489741e-05, "loss": 2.2756, "step": 81855 }, { "epoch": 5.561896996874576, "grad_norm": 3.631563663482666, "learning_rate": 3.0500067944014133e-05, "loss": 2.1679, "step": 81860 }, { "epoch": 5.562236716945237, "grad_norm": 3.408707618713379, "learning_rate": 3.049582144313086e-05, "loss": 2.0041, "step": 81865 }, { "epoch": 5.562576437015899, "grad_norm": 3.3353967666625977, "learning_rate": 3.0491574942247592e-05, "loss": 2.4909, "step": 81870 }, { "epoch": 5.56291615708656, "grad_norm": 3.1934702396392822, "learning_rate": 3.0487328441364317e-05, "loss": 2.1953, "step": 81875 }, { "epoch": 5.563255877157222, "grad_norm": 3.7062418460845947, "learning_rate": 3.0483081940481045e-05, "loss": 2.0458, "step": 81880 }, { "epoch": 5.563595597227884, "grad_norm": 5.596227169036865, "learning_rate": 3.0478835439597776e-05, "loss": 2.1792, "step": 81885 }, { "epoch": 5.5639353172985455, "grad_norm": 3.6250531673431396, "learning_rate": 3.0474588938714498e-05, "loss": 2.2635, "step": 81890 }, { "epoch": 5.564275037369208, "grad_norm": 3.3020899295806885, "learning_rate": 3.047034243783123e-05, "loss": 1.9492, "step": 81895 }, { "epoch": 5.56461475743987, "grad_norm": 3.2733545303344727, "learning_rate": 3.0466095936947957e-05, "loss": 2.1167, "step": 81900 }, { "epoch": 5.564954477510531, "grad_norm": 3.6684513092041016, "learning_rate": 3.046184943606468e-05, "loss": 2.026, "step": 81905 }, { "epoch": 5.565294197581193, "grad_norm": 3.1667397022247314, "learning_rate": 3.0457602935181413e-05, "loss": 2.3286, "step": 81910 }, { "epoch": 5.565633917651855, "grad_norm": 3.1761674880981445, "learning_rate": 3.045335643429814e-05, "loss": 2.3255, "step": 81915 }, { "epoch": 5.565973637722516, "grad_norm": 3.6198766231536865, "learning_rate": 3.0449109933414866e-05, "loss": 1.9207, "step": 81920 }, { "epoch": 5.566313357793178, "grad_norm": 4.28648042678833, "learning_rate": 3.0444863432531594e-05, "loss": 2.2516, "step": 81925 }, { "epoch": 5.56665307786384, "grad_norm": 2.9585819244384766, "learning_rate": 3.0440616931648325e-05, "loss": 2.0459, "step": 81930 }, { "epoch": 5.5669927979345015, "grad_norm": 4.469480991363525, "learning_rate": 3.043637043076505e-05, "loss": 2.2175, "step": 81935 }, { "epoch": 5.567332518005164, "grad_norm": 2.3466179370880127, "learning_rate": 3.0432123929881778e-05, "loss": 2.4831, "step": 81940 }, { "epoch": 5.567672238075826, "grad_norm": 3.918297052383423, "learning_rate": 3.042787742899851e-05, "loss": 2.3198, "step": 81945 }, { "epoch": 5.568011958146487, "grad_norm": 3.170072317123413, "learning_rate": 3.0423630928115234e-05, "loss": 2.2677, "step": 81950 }, { "epoch": 5.568351678217149, "grad_norm": 3.2254865169525146, "learning_rate": 3.041938442723196e-05, "loss": 2.1394, "step": 81955 }, { "epoch": 5.568691398287811, "grad_norm": 3.402393341064453, "learning_rate": 3.041513792634869e-05, "loss": 2.3755, "step": 81960 }, { "epoch": 5.569031118358472, "grad_norm": 3.4679787158966064, "learning_rate": 3.0410891425465414e-05, "loss": 2.2333, "step": 81965 }, { "epoch": 5.569370838429134, "grad_norm": 3.3202784061431885, "learning_rate": 3.0406644924582146e-05, "loss": 2.4453, "step": 81970 }, { "epoch": 5.569710558499796, "grad_norm": 3.58709716796875, "learning_rate": 3.0402398423698874e-05, "loss": 2.2946, "step": 81975 }, { "epoch": 5.5700502785704575, "grad_norm": 3.7884533405303955, "learning_rate": 3.03981519228156e-05, "loss": 2.1943, "step": 81980 }, { "epoch": 5.57038999864112, "grad_norm": 3.356584072113037, "learning_rate": 3.039390542193233e-05, "loss": 2.3765, "step": 81985 }, { "epoch": 5.570729718711782, "grad_norm": 3.449699640274048, "learning_rate": 3.0389658921049058e-05, "loss": 2.2305, "step": 81990 }, { "epoch": 5.571069438782443, "grad_norm": 4.108803749084473, "learning_rate": 3.038541242016579e-05, "loss": 2.2398, "step": 81995 }, { "epoch": 5.571409158853105, "grad_norm": 3.469980478286743, "learning_rate": 3.038116591928251e-05, "loss": 2.2985, "step": 82000 }, { "epoch": 5.571748878923767, "grad_norm": 3.009800672531128, "learning_rate": 3.0376919418399242e-05, "loss": 2.1455, "step": 82005 }, { "epoch": 5.572088598994428, "grad_norm": 3.1226558685302734, "learning_rate": 3.037267291751597e-05, "loss": 2.4331, "step": 82010 }, { "epoch": 5.57242831906509, "grad_norm": 4.526214599609375, "learning_rate": 3.0368426416632694e-05, "loss": 2.3718, "step": 82015 }, { "epoch": 5.572768039135752, "grad_norm": 3.023770332336426, "learning_rate": 3.0364179915749426e-05, "loss": 2.187, "step": 82020 }, { "epoch": 5.5731077592064135, "grad_norm": 2.7601048946380615, "learning_rate": 3.0359933414866154e-05, "loss": 2.4394, "step": 82025 }, { "epoch": 5.573447479277076, "grad_norm": 3.0722622871398926, "learning_rate": 3.035568691398288e-05, "loss": 2.4739, "step": 82030 }, { "epoch": 5.573787199347738, "grad_norm": 3.412585973739624, "learning_rate": 3.0351440413099606e-05, "loss": 2.4332, "step": 82035 }, { "epoch": 5.574126919418399, "grad_norm": 3.6249125003814697, "learning_rate": 3.0347193912216338e-05, "loss": 2.3939, "step": 82040 }, { "epoch": 5.574466639489061, "grad_norm": 3.1044418811798096, "learning_rate": 3.0342947411333062e-05, "loss": 2.1601, "step": 82045 }, { "epoch": 5.574806359559723, "grad_norm": 3.2468953132629395, "learning_rate": 3.033870091044979e-05, "loss": 2.325, "step": 82050 }, { "epoch": 5.575146079630384, "grad_norm": 2.8269054889678955, "learning_rate": 3.0334454409566522e-05, "loss": 2.1849, "step": 82055 }, { "epoch": 5.575485799701046, "grad_norm": 3.790194272994995, "learning_rate": 3.0330207908683243e-05, "loss": 2.161, "step": 82060 }, { "epoch": 5.575825519771708, "grad_norm": 2.9166653156280518, "learning_rate": 3.0325961407799974e-05, "loss": 2.1213, "step": 82065 }, { "epoch": 5.57616523984237, "grad_norm": 2.7074337005615234, "learning_rate": 3.0321714906916702e-05, "loss": 2.2862, "step": 82070 }, { "epoch": 5.576504959913032, "grad_norm": 3.528245210647583, "learning_rate": 3.0317468406033427e-05, "loss": 2.2101, "step": 82075 }, { "epoch": 5.576844679983694, "grad_norm": 3.1035642623901367, "learning_rate": 3.031322190515016e-05, "loss": 2.5599, "step": 82080 }, { "epoch": 5.577184400054355, "grad_norm": 4.030322551727295, "learning_rate": 3.0308975404266887e-05, "loss": 2.36, "step": 82085 }, { "epoch": 5.577524120125017, "grad_norm": 3.1402761936187744, "learning_rate": 3.030472890338361e-05, "loss": 2.5279, "step": 82090 }, { "epoch": 5.577863840195679, "grad_norm": 3.7016520500183105, "learning_rate": 3.0300482402500343e-05, "loss": 2.0951, "step": 82095 }, { "epoch": 5.57820356026634, "grad_norm": 3.8465981483459473, "learning_rate": 3.029623590161707e-05, "loss": 2.4059, "step": 82100 }, { "epoch": 5.578543280337002, "grad_norm": 2.9053311347961426, "learning_rate": 3.0291989400733795e-05, "loss": 2.2844, "step": 82105 }, { "epoch": 5.578883000407664, "grad_norm": 3.348926305770874, "learning_rate": 3.0287742899850523e-05, "loss": 2.3018, "step": 82110 }, { "epoch": 5.579222720478326, "grad_norm": 4.254932880401611, "learning_rate": 3.0283496398967255e-05, "loss": 2.1792, "step": 82115 }, { "epoch": 5.579562440548988, "grad_norm": 4.010380268096924, "learning_rate": 3.027924989808398e-05, "loss": 2.2706, "step": 82120 }, { "epoch": 5.57990216061965, "grad_norm": 3.198547601699829, "learning_rate": 3.0275003397200707e-05, "loss": 2.1377, "step": 82125 }, { "epoch": 5.580241880690311, "grad_norm": 3.455397367477417, "learning_rate": 3.027075689631744e-05, "loss": 2.251, "step": 82130 }, { "epoch": 5.580581600760973, "grad_norm": 4.246786117553711, "learning_rate": 3.026651039543416e-05, "loss": 2.2515, "step": 82135 }, { "epoch": 5.580921320831635, "grad_norm": 3.626506805419922, "learning_rate": 3.026226389455089e-05, "loss": 2.3024, "step": 82140 }, { "epoch": 5.581261040902296, "grad_norm": 3.207728862762451, "learning_rate": 3.025801739366762e-05, "loss": 2.5111, "step": 82145 }, { "epoch": 5.581600760972958, "grad_norm": 3.780505418777466, "learning_rate": 3.0253770892784344e-05, "loss": 2.5812, "step": 82150 }, { "epoch": 5.58194048104362, "grad_norm": 3.260099172592163, "learning_rate": 3.0249524391901075e-05, "loss": 2.2306, "step": 82155 }, { "epoch": 5.582280201114282, "grad_norm": 3.7373154163360596, "learning_rate": 3.0245277891017803e-05, "loss": 2.4474, "step": 82160 }, { "epoch": 5.582619921184944, "grad_norm": 3.28564453125, "learning_rate": 3.0241031390134535e-05, "loss": 2.3107, "step": 82165 }, { "epoch": 5.582959641255606, "grad_norm": 4.560429096221924, "learning_rate": 3.0236784889251256e-05, "loss": 2.3616, "step": 82170 }, { "epoch": 5.583299361326267, "grad_norm": 3.7095582485198975, "learning_rate": 3.0232538388367987e-05, "loss": 2.4833, "step": 82175 }, { "epoch": 5.583639081396929, "grad_norm": 3.330998420715332, "learning_rate": 3.0228291887484715e-05, "loss": 2.0143, "step": 82180 }, { "epoch": 5.583978801467591, "grad_norm": 3.5918054580688477, "learning_rate": 3.022404538660144e-05, "loss": 2.128, "step": 82185 }, { "epoch": 5.584318521538252, "grad_norm": 4.019185543060303, "learning_rate": 3.021979888571817e-05, "loss": 2.1925, "step": 82190 }, { "epoch": 5.584658241608914, "grad_norm": 3.4453587532043457, "learning_rate": 3.02155523848349e-05, "loss": 2.0825, "step": 82195 }, { "epoch": 5.584997961679576, "grad_norm": 2.638723373413086, "learning_rate": 3.0211305883951624e-05, "loss": 2.5258, "step": 82200 }, { "epoch": 5.585337681750238, "grad_norm": 3.072150468826294, "learning_rate": 3.0207059383068352e-05, "loss": 2.2598, "step": 82205 }, { "epoch": 5.5856774018209, "grad_norm": 3.4168198108673096, "learning_rate": 3.0202812882185083e-05, "loss": 2.0788, "step": 82210 }, { "epoch": 5.586017121891562, "grad_norm": 4.017874240875244, "learning_rate": 3.0198566381301808e-05, "loss": 2.3962, "step": 82215 }, { "epoch": 5.586356841962223, "grad_norm": 3.1347928047180176, "learning_rate": 3.0194319880418536e-05, "loss": 2.1208, "step": 82220 }, { "epoch": 5.586696562032885, "grad_norm": 3.6072254180908203, "learning_rate": 3.0190073379535267e-05, "loss": 2.244, "step": 82225 }, { "epoch": 5.587036282103547, "grad_norm": 3.8664584159851074, "learning_rate": 3.0185826878651992e-05, "loss": 2.2309, "step": 82230 }, { "epoch": 5.587376002174208, "grad_norm": 4.105204105377197, "learning_rate": 3.018158037776872e-05, "loss": 2.4578, "step": 82235 }, { "epoch": 5.58771572224487, "grad_norm": 4.117316246032715, "learning_rate": 3.017733387688545e-05, "loss": 2.1693, "step": 82240 }, { "epoch": 5.588055442315532, "grad_norm": 2.737147808074951, "learning_rate": 3.0173087376002173e-05, "loss": 2.4225, "step": 82245 }, { "epoch": 5.588395162386194, "grad_norm": 3.008246421813965, "learning_rate": 3.0168840875118904e-05, "loss": 2.2673, "step": 82250 }, { "epoch": 5.588734882456856, "grad_norm": 3.3698787689208984, "learning_rate": 3.0164594374235632e-05, "loss": 2.4396, "step": 82255 }, { "epoch": 5.589074602527518, "grad_norm": 2.937234878540039, "learning_rate": 3.0160347873352357e-05, "loss": 2.3955, "step": 82260 }, { "epoch": 5.589414322598179, "grad_norm": 2.9636518955230713, "learning_rate": 3.0156101372469088e-05, "loss": 2.1243, "step": 82265 }, { "epoch": 5.589754042668841, "grad_norm": 4.041494369506836, "learning_rate": 3.0151854871585816e-05, "loss": 2.263, "step": 82270 }, { "epoch": 5.590093762739503, "grad_norm": 3.4964218139648438, "learning_rate": 3.014760837070254e-05, "loss": 2.2483, "step": 82275 }, { "epoch": 5.590433482810164, "grad_norm": 3.768294334411621, "learning_rate": 3.014336186981927e-05, "loss": 2.1286, "step": 82280 }, { "epoch": 5.590773202880826, "grad_norm": 3.0182831287384033, "learning_rate": 3.0139115368936e-05, "loss": 2.1527, "step": 82285 }, { "epoch": 5.591112922951488, "grad_norm": 3.7326583862304688, "learning_rate": 3.0134868868052725e-05, "loss": 2.3585, "step": 82290 }, { "epoch": 5.59145264302215, "grad_norm": 3.7817795276641846, "learning_rate": 3.0130622367169453e-05, "loss": 2.1584, "step": 82295 }, { "epoch": 5.591792363092812, "grad_norm": 3.0315909385681152, "learning_rate": 3.0126375866286184e-05, "loss": 2.2288, "step": 82300 }, { "epoch": 5.592132083163474, "grad_norm": 2.902231216430664, "learning_rate": 3.0122129365402905e-05, "loss": 2.2637, "step": 82305 }, { "epoch": 5.592471803234135, "grad_norm": 3.595888376235962, "learning_rate": 3.0117882864519637e-05, "loss": 2.2459, "step": 82310 }, { "epoch": 5.592811523304797, "grad_norm": 4.424285411834717, "learning_rate": 3.0113636363636365e-05, "loss": 2.1499, "step": 82315 }, { "epoch": 5.593151243375459, "grad_norm": 2.9236483573913574, "learning_rate": 3.010938986275309e-05, "loss": 2.2307, "step": 82320 }, { "epoch": 5.59349096344612, "grad_norm": 4.059679985046387, "learning_rate": 3.010514336186982e-05, "loss": 2.0649, "step": 82325 }, { "epoch": 5.593830683516782, "grad_norm": 4.200825214385986, "learning_rate": 3.010089686098655e-05, "loss": 2.4477, "step": 82330 }, { "epoch": 5.594170403587444, "grad_norm": 3.2183425426483154, "learning_rate": 3.009665036010328e-05, "loss": 2.3109, "step": 82335 }, { "epoch": 5.594510123658106, "grad_norm": 5.287304401397705, "learning_rate": 3.0092403859220005e-05, "loss": 2.4505, "step": 82340 }, { "epoch": 5.594849843728768, "grad_norm": 3.560250759124756, "learning_rate": 3.0088157358336733e-05, "loss": 2.3979, "step": 82345 }, { "epoch": 5.595189563799429, "grad_norm": 3.1084179878234863, "learning_rate": 3.008391085745346e-05, "loss": 1.9136, "step": 82350 }, { "epoch": 5.595529283870091, "grad_norm": 3.9446523189544678, "learning_rate": 3.0079664356570185e-05, "loss": 2.5666, "step": 82355 }, { "epoch": 5.595869003940753, "grad_norm": 2.6056487560272217, "learning_rate": 3.0075417855686917e-05, "loss": 2.4494, "step": 82360 }, { "epoch": 5.596208724011414, "grad_norm": 3.6065380573272705, "learning_rate": 3.0071171354803645e-05, "loss": 2.5141, "step": 82365 }, { "epoch": 5.596548444082076, "grad_norm": 4.227644443511963, "learning_rate": 3.006692485392037e-05, "loss": 2.3997, "step": 82370 }, { "epoch": 5.596888164152738, "grad_norm": 3.3784687519073486, "learning_rate": 3.00626783530371e-05, "loss": 2.2097, "step": 82375 }, { "epoch": 5.5972278842234, "grad_norm": 4.08747673034668, "learning_rate": 3.005843185215383e-05, "loss": 2.4899, "step": 82380 }, { "epoch": 5.597567604294062, "grad_norm": 3.868197202682495, "learning_rate": 3.0054185351270553e-05, "loss": 2.1405, "step": 82385 }, { "epoch": 5.597907324364724, "grad_norm": 3.3933045864105225, "learning_rate": 3.004993885038728e-05, "loss": 2.0053, "step": 82390 }, { "epoch": 5.598247044435385, "grad_norm": 3.0045740604400635, "learning_rate": 3.0045692349504013e-05, "loss": 2.441, "step": 82395 }, { "epoch": 5.598586764506047, "grad_norm": 3.7560439109802246, "learning_rate": 3.0041445848620737e-05, "loss": 2.3437, "step": 82400 }, { "epoch": 5.598926484576709, "grad_norm": 3.252164840698242, "learning_rate": 3.0037199347737465e-05, "loss": 2.015, "step": 82405 }, { "epoch": 5.59926620464737, "grad_norm": 2.8611981868743896, "learning_rate": 3.0032952846854197e-05, "loss": 2.1645, "step": 82410 }, { "epoch": 5.599605924718032, "grad_norm": 4.105506420135498, "learning_rate": 3.0028706345970918e-05, "loss": 2.3505, "step": 82415 }, { "epoch": 5.599945644788694, "grad_norm": 3.3893837928771973, "learning_rate": 3.002445984508765e-05, "loss": 2.1378, "step": 82420 }, { "epoch": 5.600285364859356, "grad_norm": 3.3507418632507324, "learning_rate": 3.0020213344204377e-05, "loss": 2.3344, "step": 82425 }, { "epoch": 5.600625084930018, "grad_norm": 3.587141990661621, "learning_rate": 3.0015966843321102e-05, "loss": 2.3526, "step": 82430 }, { "epoch": 5.60096480500068, "grad_norm": 4.08707332611084, "learning_rate": 3.0011720342437833e-05, "loss": 2.2824, "step": 82435 }, { "epoch": 5.601304525071341, "grad_norm": 2.880861282348633, "learning_rate": 3.000747384155456e-05, "loss": 2.1518, "step": 82440 }, { "epoch": 5.601644245142003, "grad_norm": 2.697399616241455, "learning_rate": 3.0003227340671286e-05, "loss": 2.3109, "step": 82445 }, { "epoch": 5.601983965212665, "grad_norm": 4.051156044006348, "learning_rate": 2.9998980839788014e-05, "loss": 2.2401, "step": 82450 }, { "epoch": 5.602323685283326, "grad_norm": 3.7525908946990967, "learning_rate": 2.9994734338904746e-05, "loss": 2.5351, "step": 82455 }, { "epoch": 5.602663405353988, "grad_norm": 3.206918954849243, "learning_rate": 2.999048783802147e-05, "loss": 2.0337, "step": 82460 }, { "epoch": 5.60300312542465, "grad_norm": 3.9328553676605225, "learning_rate": 2.9986241337138198e-05, "loss": 2.2953, "step": 82465 }, { "epoch": 5.603342845495312, "grad_norm": 3.7422409057617188, "learning_rate": 2.998199483625493e-05, "loss": 2.4482, "step": 82470 }, { "epoch": 5.603682565565974, "grad_norm": 3.2289652824401855, "learning_rate": 2.9977748335371654e-05, "loss": 2.3735, "step": 82475 }, { "epoch": 5.604022285636636, "grad_norm": 3.6296677589416504, "learning_rate": 2.9973501834488382e-05, "loss": 2.1683, "step": 82480 }, { "epoch": 5.604362005707297, "grad_norm": 2.7492001056671143, "learning_rate": 2.996925533360511e-05, "loss": 2.1868, "step": 82485 }, { "epoch": 5.604701725777959, "grad_norm": 2.50400710105896, "learning_rate": 2.9965008832721835e-05, "loss": 2.1322, "step": 82490 }, { "epoch": 5.605041445848621, "grad_norm": 3.6131269931793213, "learning_rate": 2.9960762331838566e-05, "loss": 2.0201, "step": 82495 }, { "epoch": 5.605381165919282, "grad_norm": 3.189046621322632, "learning_rate": 2.9956515830955294e-05, "loss": 2.2275, "step": 82500 }, { "epoch": 5.605720885989944, "grad_norm": 3.9299497604370117, "learning_rate": 2.9952269330072026e-05, "loss": 2.3334, "step": 82505 }, { "epoch": 5.606060606060606, "grad_norm": 3.0674376487731934, "learning_rate": 2.994802282918875e-05, "loss": 2.416, "step": 82510 }, { "epoch": 5.606400326131268, "grad_norm": 3.489924669265747, "learning_rate": 2.9943776328305478e-05, "loss": 2.1538, "step": 82515 }, { "epoch": 5.60674004620193, "grad_norm": 2.9456214904785156, "learning_rate": 2.993952982742221e-05, "loss": 2.3646, "step": 82520 }, { "epoch": 5.607079766272592, "grad_norm": 2.68184232711792, "learning_rate": 2.993528332653893e-05, "loss": 2.2287, "step": 82525 }, { "epoch": 5.607419486343253, "grad_norm": 4.077070713043213, "learning_rate": 2.9931036825655662e-05, "loss": 2.4911, "step": 82530 }, { "epoch": 5.607759206413915, "grad_norm": 2.668745994567871, "learning_rate": 2.992679032477239e-05, "loss": 2.297, "step": 82535 }, { "epoch": 5.608098926484577, "grad_norm": 4.221145153045654, "learning_rate": 2.9922543823889115e-05, "loss": 2.3494, "step": 82540 }, { "epoch": 5.608438646555238, "grad_norm": 3.10626482963562, "learning_rate": 2.9918297323005846e-05, "loss": 2.3849, "step": 82545 }, { "epoch": 5.6087783666259, "grad_norm": 3.803595542907715, "learning_rate": 2.9914050822122574e-05, "loss": 2.4987, "step": 82550 }, { "epoch": 5.6091180866965615, "grad_norm": 4.599903106689453, "learning_rate": 2.99098043212393e-05, "loss": 2.0421, "step": 82555 }, { "epoch": 5.609457806767224, "grad_norm": 2.583651542663574, "learning_rate": 2.9905557820356027e-05, "loss": 2.4367, "step": 82560 }, { "epoch": 5.609797526837886, "grad_norm": 3.000922441482544, "learning_rate": 2.9901311319472758e-05, "loss": 2.4081, "step": 82565 }, { "epoch": 5.610137246908547, "grad_norm": 3.3092801570892334, "learning_rate": 2.9897064818589483e-05, "loss": 2.4019, "step": 82570 }, { "epoch": 5.610476966979209, "grad_norm": 4.161995887756348, "learning_rate": 2.989281831770621e-05, "loss": 1.9622, "step": 82575 }, { "epoch": 5.610816687049871, "grad_norm": 3.854552745819092, "learning_rate": 2.9888571816822942e-05, "loss": 2.3411, "step": 82580 }, { "epoch": 5.611156407120532, "grad_norm": 3.96467924118042, "learning_rate": 2.9884325315939664e-05, "loss": 2.4521, "step": 82585 }, { "epoch": 5.611496127191194, "grad_norm": 3.943748712539673, "learning_rate": 2.9880078815056395e-05, "loss": 2.3131, "step": 82590 }, { "epoch": 5.611835847261856, "grad_norm": 2.9931085109710693, "learning_rate": 2.9875832314173123e-05, "loss": 2.1967, "step": 82595 }, { "epoch": 5.6121755673325175, "grad_norm": 3.0805342197418213, "learning_rate": 2.9871585813289848e-05, "loss": 1.9894, "step": 82600 }, { "epoch": 5.61251528740318, "grad_norm": 3.132868528366089, "learning_rate": 2.986733931240658e-05, "loss": 2.3141, "step": 82605 }, { "epoch": 5.612855007473842, "grad_norm": 3.7058141231536865, "learning_rate": 2.9863092811523307e-05, "loss": 2.2575, "step": 82610 }, { "epoch": 5.613194727544503, "grad_norm": 3.970247507095337, "learning_rate": 2.985884631064003e-05, "loss": 2.2737, "step": 82615 }, { "epoch": 5.613534447615165, "grad_norm": 4.116451740264893, "learning_rate": 2.9854599809756763e-05, "loss": 2.1694, "step": 82620 }, { "epoch": 5.613874167685827, "grad_norm": 3.7544403076171875, "learning_rate": 2.985035330887349e-05, "loss": 2.4879, "step": 82625 }, { "epoch": 5.614213887756488, "grad_norm": 3.438613176345825, "learning_rate": 2.9846106807990216e-05, "loss": 2.507, "step": 82630 }, { "epoch": 5.61455360782715, "grad_norm": 3.0269792079925537, "learning_rate": 2.9841860307106944e-05, "loss": 2.2706, "step": 82635 }, { "epoch": 5.614893327897812, "grad_norm": 3.266482353210449, "learning_rate": 2.9837613806223675e-05, "loss": 2.0393, "step": 82640 }, { "epoch": 5.615233047968474, "grad_norm": 3.3614003658294678, "learning_rate": 2.98333673053404e-05, "loss": 2.3299, "step": 82645 }, { "epoch": 5.615572768039136, "grad_norm": 2.880462169647217, "learning_rate": 2.9829120804457128e-05, "loss": 2.3865, "step": 82650 }, { "epoch": 5.615912488109798, "grad_norm": 3.4687416553497314, "learning_rate": 2.982487430357386e-05, "loss": 2.0946, "step": 82655 }, { "epoch": 5.616252208180459, "grad_norm": 2.939056396484375, "learning_rate": 2.982062780269058e-05, "loss": 2.3384, "step": 82660 }, { "epoch": 5.616591928251121, "grad_norm": 3.4167087078094482, "learning_rate": 2.981638130180731e-05, "loss": 2.2535, "step": 82665 }, { "epoch": 5.616931648321783, "grad_norm": 3.449974298477173, "learning_rate": 2.981213480092404e-05, "loss": 2.0121, "step": 82670 }, { "epoch": 5.617271368392444, "grad_norm": 3.5399158000946045, "learning_rate": 2.980788830004077e-05, "loss": 2.297, "step": 82675 }, { "epoch": 5.617611088463106, "grad_norm": 3.662038564682007, "learning_rate": 2.9803641799157496e-05, "loss": 1.958, "step": 82680 }, { "epoch": 5.617950808533768, "grad_norm": 3.552800178527832, "learning_rate": 2.9799395298274224e-05, "loss": 2.4109, "step": 82685 }, { "epoch": 5.61829052860443, "grad_norm": 3.563828229904175, "learning_rate": 2.9795148797390955e-05, "loss": 2.0014, "step": 82690 }, { "epoch": 5.618630248675092, "grad_norm": 2.6842474937438965, "learning_rate": 2.9790902296507676e-05, "loss": 2.3915, "step": 82695 }, { "epoch": 5.618969968745754, "grad_norm": 3.712702751159668, "learning_rate": 2.9786655795624408e-05, "loss": 2.2222, "step": 82700 }, { "epoch": 5.619309688816415, "grad_norm": 4.826773643493652, "learning_rate": 2.9782409294741136e-05, "loss": 2.4418, "step": 82705 }, { "epoch": 5.619649408887077, "grad_norm": 3.283458948135376, "learning_rate": 2.977816279385786e-05, "loss": 2.2253, "step": 82710 }, { "epoch": 5.619989128957739, "grad_norm": 3.4020447731018066, "learning_rate": 2.9773916292974592e-05, "loss": 2.2328, "step": 82715 }, { "epoch": 5.6203288490284, "grad_norm": 3.525392770767212, "learning_rate": 2.976966979209132e-05, "loss": 1.9583, "step": 82720 }, { "epoch": 5.620668569099062, "grad_norm": 2.814002275466919, "learning_rate": 2.9765423291208044e-05, "loss": 2.385, "step": 82725 }, { "epoch": 5.621008289169724, "grad_norm": 4.468038082122803, "learning_rate": 2.9761176790324772e-05, "loss": 2.0471, "step": 82730 }, { "epoch": 5.621348009240386, "grad_norm": 2.7318825721740723, "learning_rate": 2.9756930289441504e-05, "loss": 2.4559, "step": 82735 }, { "epoch": 5.621687729311048, "grad_norm": 3.1766626834869385, "learning_rate": 2.975268378855823e-05, "loss": 2.3788, "step": 82740 }, { "epoch": 5.62202744938171, "grad_norm": 3.4288947582244873, "learning_rate": 2.9748437287674956e-05, "loss": 2.0931, "step": 82745 }, { "epoch": 5.622367169452371, "grad_norm": 2.816615343093872, "learning_rate": 2.9744190786791688e-05, "loss": 2.1639, "step": 82750 }, { "epoch": 5.622706889523033, "grad_norm": 2.971156597137451, "learning_rate": 2.9739944285908412e-05, "loss": 2.4126, "step": 82755 }, { "epoch": 5.623046609593695, "grad_norm": 3.4402015209198, "learning_rate": 2.973569778502514e-05, "loss": 2.3447, "step": 82760 }, { "epoch": 5.623386329664356, "grad_norm": 3.330075740814209, "learning_rate": 2.9731451284141872e-05, "loss": 2.2272, "step": 82765 }, { "epoch": 5.623726049735018, "grad_norm": 3.4989423751831055, "learning_rate": 2.9727204783258593e-05, "loss": 2.3708, "step": 82770 }, { "epoch": 5.62406576980568, "grad_norm": 3.491655111312866, "learning_rate": 2.9722958282375324e-05, "loss": 2.0617, "step": 82775 }, { "epoch": 5.624405489876342, "grad_norm": 4.074520587921143, "learning_rate": 2.9718711781492052e-05, "loss": 2.2575, "step": 82780 }, { "epoch": 5.624745209947004, "grad_norm": 3.4985334873199463, "learning_rate": 2.9714465280608777e-05, "loss": 2.5058, "step": 82785 }, { "epoch": 5.625084930017666, "grad_norm": 3.3017330169677734, "learning_rate": 2.971021877972551e-05, "loss": 2.2818, "step": 82790 }, { "epoch": 5.625424650088327, "grad_norm": 3.8398327827453613, "learning_rate": 2.9705972278842236e-05, "loss": 2.229, "step": 82795 }, { "epoch": 5.625764370158989, "grad_norm": 3.914336919784546, "learning_rate": 2.970172577795896e-05, "loss": 2.4439, "step": 82800 }, { "epoch": 5.626104090229651, "grad_norm": 3.7357115745544434, "learning_rate": 2.969747927707569e-05, "loss": 2.3522, "step": 82805 }, { "epoch": 5.626443810300312, "grad_norm": 3.6194067001342773, "learning_rate": 2.969323277619242e-05, "loss": 2.1013, "step": 82810 }, { "epoch": 5.626783530370974, "grad_norm": 4.355741024017334, "learning_rate": 2.9688986275309145e-05, "loss": 2.1827, "step": 82815 }, { "epoch": 5.627123250441636, "grad_norm": 3.502041816711426, "learning_rate": 2.9684739774425873e-05, "loss": 2.2473, "step": 82820 }, { "epoch": 5.627462970512298, "grad_norm": 3.4021928310394287, "learning_rate": 2.9680493273542605e-05, "loss": 2.0425, "step": 82825 }, { "epoch": 5.62780269058296, "grad_norm": 3.379369020462036, "learning_rate": 2.9676246772659326e-05, "loss": 2.2439, "step": 82830 }, { "epoch": 5.628142410653622, "grad_norm": 3.2960658073425293, "learning_rate": 2.9672000271776057e-05, "loss": 2.02, "step": 82835 }, { "epoch": 5.628482130724283, "grad_norm": 3.060656785964966, "learning_rate": 2.9667753770892785e-05, "loss": 2.3462, "step": 82840 }, { "epoch": 5.628821850794945, "grad_norm": 3.314674139022827, "learning_rate": 2.9663507270009517e-05, "loss": 2.1949, "step": 82845 }, { "epoch": 5.629161570865607, "grad_norm": 3.173201322555542, "learning_rate": 2.965926076912624e-05, "loss": 2.1651, "step": 82850 }, { "epoch": 5.629501290936268, "grad_norm": 4.15606689453125, "learning_rate": 2.965501426824297e-05, "loss": 2.3347, "step": 82855 }, { "epoch": 5.62984101100693, "grad_norm": 3.7771427631378174, "learning_rate": 2.96507677673597e-05, "loss": 2.3666, "step": 82860 }, { "epoch": 5.630180731077592, "grad_norm": 2.654010772705078, "learning_rate": 2.9646521266476425e-05, "loss": 2.2173, "step": 82865 }, { "epoch": 5.630520451148254, "grad_norm": 3.0025134086608887, "learning_rate": 2.9642274765593153e-05, "loss": 2.1714, "step": 82870 }, { "epoch": 5.630860171218916, "grad_norm": 3.7906882762908936, "learning_rate": 2.963802826470988e-05, "loss": 2.0714, "step": 82875 }, { "epoch": 5.631199891289578, "grad_norm": 4.463506698608398, "learning_rate": 2.9633781763826606e-05, "loss": 2.355, "step": 82880 }, { "epoch": 5.631539611360239, "grad_norm": 3.446661949157715, "learning_rate": 2.9629535262943337e-05, "loss": 2.0438, "step": 82885 }, { "epoch": 5.631879331430901, "grad_norm": 4.231159210205078, "learning_rate": 2.9625288762060065e-05, "loss": 2.2943, "step": 82890 }, { "epoch": 5.632219051501563, "grad_norm": 4.243906021118164, "learning_rate": 2.962104226117679e-05, "loss": 2.0976, "step": 82895 }, { "epoch": 5.632558771572224, "grad_norm": 3.769015073776245, "learning_rate": 2.961679576029352e-05, "loss": 2.246, "step": 82900 }, { "epoch": 5.632898491642886, "grad_norm": 2.366424322128296, "learning_rate": 2.961254925941025e-05, "loss": 2.6215, "step": 82905 }, { "epoch": 5.633238211713548, "grad_norm": 3.7318549156188965, "learning_rate": 2.9608302758526974e-05, "loss": 2.3753, "step": 82910 }, { "epoch": 5.63357793178421, "grad_norm": 3.0118117332458496, "learning_rate": 2.9604056257643702e-05, "loss": 2.4993, "step": 82915 }, { "epoch": 5.633917651854872, "grad_norm": 3.6372668743133545, "learning_rate": 2.9599809756760433e-05, "loss": 2.3359, "step": 82920 }, { "epoch": 5.634257371925534, "grad_norm": 4.213695526123047, "learning_rate": 2.9595563255877158e-05, "loss": 2.2096, "step": 82925 }, { "epoch": 5.634597091996195, "grad_norm": 3.3757622241973877, "learning_rate": 2.9591316754993886e-05, "loss": 2.2652, "step": 82930 }, { "epoch": 5.634936812066857, "grad_norm": 3.513662338256836, "learning_rate": 2.9587070254110617e-05, "loss": 2.0654, "step": 82935 }, { "epoch": 5.635276532137519, "grad_norm": 3.0419955253601074, "learning_rate": 2.958282375322734e-05, "loss": 1.7945, "step": 82940 }, { "epoch": 5.63561625220818, "grad_norm": 4.158769607543945, "learning_rate": 2.957857725234407e-05, "loss": 2.0269, "step": 82945 }, { "epoch": 5.635955972278842, "grad_norm": 3.4081084728240967, "learning_rate": 2.9574330751460798e-05, "loss": 2.3121, "step": 82950 }, { "epoch": 5.6362956923495044, "grad_norm": 3.2708914279937744, "learning_rate": 2.9570084250577523e-05, "loss": 2.3693, "step": 82955 }, { "epoch": 5.636635412420166, "grad_norm": 3.8532190322875977, "learning_rate": 2.9565837749694254e-05, "loss": 2.5985, "step": 82960 }, { "epoch": 5.636975132490828, "grad_norm": 2.7136898040771484, "learning_rate": 2.9561591248810982e-05, "loss": 2.1552, "step": 82965 }, { "epoch": 5.63731485256149, "grad_norm": 3.1004526615142822, "learning_rate": 2.9557344747927707e-05, "loss": 2.3084, "step": 82970 }, { "epoch": 5.637654572632151, "grad_norm": 3.4032373428344727, "learning_rate": 2.9553098247044435e-05, "loss": 2.6915, "step": 82975 }, { "epoch": 5.637994292702813, "grad_norm": 3.965594530105591, "learning_rate": 2.9548851746161166e-05, "loss": 2.1804, "step": 82980 }, { "epoch": 5.638334012773475, "grad_norm": 4.287386894226074, "learning_rate": 2.954460524527789e-05, "loss": 2.2369, "step": 82985 }, { "epoch": 5.638673732844136, "grad_norm": 3.1534676551818848, "learning_rate": 2.954035874439462e-05, "loss": 2.2731, "step": 82990 }, { "epoch": 5.639013452914798, "grad_norm": 2.5129129886627197, "learning_rate": 2.953611224351135e-05, "loss": 2.2724, "step": 82995 }, { "epoch": 5.6393531729854605, "grad_norm": 3.008327007293701, "learning_rate": 2.9531865742628075e-05, "loss": 2.3696, "step": 83000 }, { "epoch": 5.639692893056122, "grad_norm": 3.738173246383667, "learning_rate": 2.9527619241744803e-05, "loss": 2.3091, "step": 83005 }, { "epoch": 5.640032613126784, "grad_norm": 3.621952533721924, "learning_rate": 2.9523372740861534e-05, "loss": 2.3686, "step": 83010 }, { "epoch": 5.640372333197446, "grad_norm": 3.816537857055664, "learning_rate": 2.9519126239978262e-05, "loss": 2.1573, "step": 83015 }, { "epoch": 5.640712053268107, "grad_norm": 4.238681793212891, "learning_rate": 2.9514879739094987e-05, "loss": 2.0657, "step": 83020 }, { "epoch": 5.641051773338769, "grad_norm": 4.522327899932861, "learning_rate": 2.9510633238211715e-05, "loss": 2.3633, "step": 83025 }, { "epoch": 5.64139149340943, "grad_norm": 3.0699658393859863, "learning_rate": 2.9506386737328446e-05, "loss": 2.2783, "step": 83030 }, { "epoch": 5.641731213480092, "grad_norm": 3.40531849861145, "learning_rate": 2.950214023644517e-05, "loss": 2.3609, "step": 83035 }, { "epoch": 5.642070933550754, "grad_norm": 3.4805359840393066, "learning_rate": 2.94978937355619e-05, "loss": 2.3977, "step": 83040 }, { "epoch": 5.642410653621416, "grad_norm": 3.9817054271698, "learning_rate": 2.949364723467863e-05, "loss": 2.2483, "step": 83045 }, { "epoch": 5.642750373692078, "grad_norm": 3.3019487857818604, "learning_rate": 2.948940073379535e-05, "loss": 2.4342, "step": 83050 }, { "epoch": 5.64309009376274, "grad_norm": 4.216518878936768, "learning_rate": 2.9485154232912083e-05, "loss": 2.3186, "step": 83055 }, { "epoch": 5.643429813833401, "grad_norm": 3.4046006202697754, "learning_rate": 2.948090773202881e-05, "loss": 2.1684, "step": 83060 }, { "epoch": 5.643769533904063, "grad_norm": 2.823542356491089, "learning_rate": 2.9476661231145535e-05, "loss": 2.4162, "step": 83065 }, { "epoch": 5.644109253974725, "grad_norm": 3.151465654373169, "learning_rate": 2.9472414730262267e-05, "loss": 2.2103, "step": 83070 }, { "epoch": 5.644448974045386, "grad_norm": 3.777695894241333, "learning_rate": 2.9468168229378995e-05, "loss": 2.121, "step": 83075 }, { "epoch": 5.644788694116048, "grad_norm": 3.6093733310699463, "learning_rate": 2.946392172849572e-05, "loss": 2.0465, "step": 83080 }, { "epoch": 5.64512841418671, "grad_norm": 3.4483065605163574, "learning_rate": 2.9459675227612447e-05, "loss": 2.043, "step": 83085 }, { "epoch": 5.645468134257372, "grad_norm": 3.3132598400115967, "learning_rate": 2.945542872672918e-05, "loss": 2.3538, "step": 83090 }, { "epoch": 5.645807854328034, "grad_norm": 3.3303327560424805, "learning_rate": 2.9451182225845903e-05, "loss": 2.1039, "step": 83095 }, { "epoch": 5.646147574398696, "grad_norm": 3.793009042739868, "learning_rate": 2.944693572496263e-05, "loss": 2.3151, "step": 83100 }, { "epoch": 5.646487294469357, "grad_norm": 3.8000924587249756, "learning_rate": 2.9442689224079363e-05, "loss": 2.1455, "step": 83105 }, { "epoch": 5.646827014540019, "grad_norm": 3.0031230449676514, "learning_rate": 2.9438442723196087e-05, "loss": 2.0863, "step": 83110 }, { "epoch": 5.647166734610681, "grad_norm": 4.299720287322998, "learning_rate": 2.9434196222312815e-05, "loss": 2.498, "step": 83115 }, { "epoch": 5.647506454681342, "grad_norm": 3.110889196395874, "learning_rate": 2.9429949721429543e-05, "loss": 2.2881, "step": 83120 }, { "epoch": 5.647846174752004, "grad_norm": 4.265948295593262, "learning_rate": 2.9425703220546268e-05, "loss": 2.2003, "step": 83125 }, { "epoch": 5.648185894822666, "grad_norm": 3.626166820526123, "learning_rate": 2.9421456719663e-05, "loss": 2.2614, "step": 83130 }, { "epoch": 5.648525614893328, "grad_norm": 3.1350347995758057, "learning_rate": 2.9417210218779727e-05, "loss": 2.1601, "step": 83135 }, { "epoch": 5.64886533496399, "grad_norm": 2.781456232070923, "learning_rate": 2.9412963717896452e-05, "loss": 2.3206, "step": 83140 }, { "epoch": 5.649205055034652, "grad_norm": 3.4493865966796875, "learning_rate": 2.9408717217013183e-05, "loss": 1.954, "step": 83145 }, { "epoch": 5.649544775105313, "grad_norm": 3.0154998302459717, "learning_rate": 2.940447071612991e-05, "loss": 2.3563, "step": 83150 }, { "epoch": 5.649884495175975, "grad_norm": 3.6511576175689697, "learning_rate": 2.9400224215246636e-05, "loss": 2.2033, "step": 83155 }, { "epoch": 5.650224215246637, "grad_norm": 4.359940052032471, "learning_rate": 2.9395977714363364e-05, "loss": 2.3871, "step": 83160 }, { "epoch": 5.650563935317298, "grad_norm": 3.7555348873138428, "learning_rate": 2.9391731213480096e-05, "loss": 2.3032, "step": 83165 }, { "epoch": 5.65090365538796, "grad_norm": 3.041546583175659, "learning_rate": 2.938748471259682e-05, "loss": 2.2116, "step": 83170 }, { "epoch": 5.651243375458622, "grad_norm": 3.8981852531433105, "learning_rate": 2.9383238211713548e-05, "loss": 2.3648, "step": 83175 }, { "epoch": 5.651583095529284, "grad_norm": 8.158622741699219, "learning_rate": 2.937899171083028e-05, "loss": 2.0795, "step": 83180 }, { "epoch": 5.651922815599946, "grad_norm": 3.6918528079986572, "learning_rate": 2.9374745209947008e-05, "loss": 2.1683, "step": 83185 }, { "epoch": 5.652262535670608, "grad_norm": 3.2417361736297607, "learning_rate": 2.9370498709063732e-05, "loss": 1.9693, "step": 83190 }, { "epoch": 5.652602255741269, "grad_norm": 3.296743631362915, "learning_rate": 2.936625220818046e-05, "loss": 2.0227, "step": 83195 }, { "epoch": 5.652941975811931, "grad_norm": 2.700448989868164, "learning_rate": 2.936200570729719e-05, "loss": 2.2281, "step": 83200 }, { "epoch": 5.653281695882593, "grad_norm": 3.552053213119507, "learning_rate": 2.9357759206413916e-05, "loss": 2.2334, "step": 83205 }, { "epoch": 5.653621415953254, "grad_norm": 4.1411542892456055, "learning_rate": 2.9353512705530644e-05, "loss": 2.4945, "step": 83210 }, { "epoch": 5.653961136023916, "grad_norm": 3.8746769428253174, "learning_rate": 2.9349266204647376e-05, "loss": 2.2853, "step": 83215 }, { "epoch": 5.654300856094578, "grad_norm": 2.8532824516296387, "learning_rate": 2.9345019703764097e-05, "loss": 2.5401, "step": 83220 }, { "epoch": 5.65464057616524, "grad_norm": 3.0756990909576416, "learning_rate": 2.9340773202880828e-05, "loss": 2.3636, "step": 83225 }, { "epoch": 5.654980296235902, "grad_norm": 2.9529144763946533, "learning_rate": 2.9336526701997556e-05, "loss": 2.0901, "step": 83230 }, { "epoch": 5.655320016306564, "grad_norm": 3.472012519836426, "learning_rate": 2.933228020111428e-05, "loss": 2.281, "step": 83235 }, { "epoch": 5.655659736377225, "grad_norm": 4.012325763702393, "learning_rate": 2.9328033700231012e-05, "loss": 2.339, "step": 83240 }, { "epoch": 5.655999456447887, "grad_norm": 3.406571865081787, "learning_rate": 2.932378719934774e-05, "loss": 2.2092, "step": 83245 }, { "epoch": 5.656339176518548, "grad_norm": 2.6987931728363037, "learning_rate": 2.9319540698464465e-05, "loss": 2.1196, "step": 83250 }, { "epoch": 5.65667889658921, "grad_norm": 3.7192604541778564, "learning_rate": 2.9315294197581193e-05, "loss": 2.2344, "step": 83255 }, { "epoch": 5.657018616659872, "grad_norm": 4.203028202056885, "learning_rate": 2.9311047696697924e-05, "loss": 2.1873, "step": 83260 }, { "epoch": 5.657358336730534, "grad_norm": 3.659457206726074, "learning_rate": 2.930680119581465e-05, "loss": 2.4336, "step": 83265 }, { "epoch": 5.657698056801196, "grad_norm": 2.598820924758911, "learning_rate": 2.9302554694931377e-05, "loss": 2.4488, "step": 83270 }, { "epoch": 5.658037776871858, "grad_norm": 2.713360548019409, "learning_rate": 2.9298308194048108e-05, "loss": 1.9399, "step": 83275 }, { "epoch": 5.658377496942519, "grad_norm": 3.3096182346343994, "learning_rate": 2.9294061693164833e-05, "loss": 2.3691, "step": 83280 }, { "epoch": 5.658717217013181, "grad_norm": 3.7780001163482666, "learning_rate": 2.928981519228156e-05, "loss": 2.1192, "step": 83285 }, { "epoch": 5.659056937083843, "grad_norm": 3.3708364963531494, "learning_rate": 2.9285568691398292e-05, "loss": 2.1821, "step": 83290 }, { "epoch": 5.659396657154504, "grad_norm": 3.19997501373291, "learning_rate": 2.9281322190515014e-05, "loss": 1.7455, "step": 83295 }, { "epoch": 5.659736377225166, "grad_norm": 3.432807445526123, "learning_rate": 2.9277075689631745e-05, "loss": 2.2179, "step": 83300 }, { "epoch": 5.660076097295828, "grad_norm": 3.5374457836151123, "learning_rate": 2.9272829188748473e-05, "loss": 1.9721, "step": 83305 }, { "epoch": 5.66041581736649, "grad_norm": 3.916489601135254, "learning_rate": 2.9268582687865198e-05, "loss": 2.0493, "step": 83310 }, { "epoch": 5.660755537437152, "grad_norm": 2.895637035369873, "learning_rate": 2.926433618698193e-05, "loss": 2.3156, "step": 83315 }, { "epoch": 5.661095257507814, "grad_norm": 3.970935583114624, "learning_rate": 2.9260089686098657e-05, "loss": 2.3882, "step": 83320 }, { "epoch": 5.661434977578475, "grad_norm": 3.9478306770324707, "learning_rate": 2.925584318521538e-05, "loss": 2.2365, "step": 83325 }, { "epoch": 5.661774697649137, "grad_norm": 3.847395658493042, "learning_rate": 2.925159668433211e-05, "loss": 2.4979, "step": 83330 }, { "epoch": 5.662114417719799, "grad_norm": 2.822495460510254, "learning_rate": 2.924735018344884e-05, "loss": 2.099, "step": 83335 }, { "epoch": 5.66245413779046, "grad_norm": 2.926612377166748, "learning_rate": 2.9243103682565566e-05, "loss": 2.433, "step": 83340 }, { "epoch": 5.662793857861122, "grad_norm": 3.617499828338623, "learning_rate": 2.9238857181682294e-05, "loss": 2.2624, "step": 83345 }, { "epoch": 5.663133577931784, "grad_norm": 4.277373313903809, "learning_rate": 2.9234610680799025e-05, "loss": 2.1782, "step": 83350 }, { "epoch": 5.663473298002446, "grad_norm": 3.3986220359802246, "learning_rate": 2.9230364179915753e-05, "loss": 2.4118, "step": 83355 }, { "epoch": 5.663813018073108, "grad_norm": 3.7939252853393555, "learning_rate": 2.9226117679032478e-05, "loss": 2.3066, "step": 83360 }, { "epoch": 5.66415273814377, "grad_norm": 3.411527395248413, "learning_rate": 2.9221871178149206e-05, "loss": 2.1242, "step": 83365 }, { "epoch": 5.664492458214431, "grad_norm": 2.7294552326202393, "learning_rate": 2.9217624677265937e-05, "loss": 1.9369, "step": 83370 }, { "epoch": 5.664832178285093, "grad_norm": 2.873944044113159, "learning_rate": 2.921337817638266e-05, "loss": 2.149, "step": 83375 }, { "epoch": 5.665171898355755, "grad_norm": 2.7773053646087646, "learning_rate": 2.920913167549939e-05, "loss": 2.1793, "step": 83380 }, { "epoch": 5.665511618426416, "grad_norm": 3.9145827293395996, "learning_rate": 2.920488517461612e-05, "loss": 1.927, "step": 83385 }, { "epoch": 5.665851338497078, "grad_norm": 3.204256534576416, "learning_rate": 2.9200638673732846e-05, "loss": 2.341, "step": 83390 }, { "epoch": 5.66619105856774, "grad_norm": 3.2649343013763428, "learning_rate": 2.9196392172849574e-05, "loss": 2.2832, "step": 83395 }, { "epoch": 5.666530778638402, "grad_norm": 4.159905910491943, "learning_rate": 2.9192145671966302e-05, "loss": 1.8829, "step": 83400 }, { "epoch": 5.666870498709064, "grad_norm": 3.8089535236358643, "learning_rate": 2.9187899171083026e-05, "loss": 1.9926, "step": 83405 }, { "epoch": 5.667210218779726, "grad_norm": 2.524667739868164, "learning_rate": 2.9183652670199758e-05, "loss": 2.1089, "step": 83410 }, { "epoch": 5.667549938850387, "grad_norm": 3.222313642501831, "learning_rate": 2.9179406169316486e-05, "loss": 2.4, "step": 83415 }, { "epoch": 5.667889658921049, "grad_norm": 4.018563747406006, "learning_rate": 2.917515966843321e-05, "loss": 2.2714, "step": 83420 }, { "epoch": 5.668229378991711, "grad_norm": 3.096478223800659, "learning_rate": 2.9170913167549942e-05, "loss": 1.9228, "step": 83425 }, { "epoch": 5.668569099062372, "grad_norm": NaN, "learning_rate": 2.9167515966843324e-05, "loss": 2.2423, "step": 83430 }, { "epoch": 5.668908819133034, "grad_norm": 2.96323561668396, "learning_rate": 2.9163269465960052e-05, "loss": 2.3927, "step": 83435 }, { "epoch": 5.669248539203696, "grad_norm": 4.115396022796631, "learning_rate": 2.9159022965076777e-05, "loss": 2.1808, "step": 83440 }, { "epoch": 5.669588259274358, "grad_norm": 3.3567354679107666, "learning_rate": 2.9154776464193505e-05, "loss": 2.2967, "step": 83445 }, { "epoch": 5.66992797934502, "grad_norm": 3.816894054412842, "learning_rate": 2.9150529963310236e-05, "loss": 2.1185, "step": 83450 }, { "epoch": 5.670267699415682, "grad_norm": 3.064222812652588, "learning_rate": 2.914628346242696e-05, "loss": 2.212, "step": 83455 }, { "epoch": 5.670607419486343, "grad_norm": 3.9779741764068604, "learning_rate": 2.914203696154369e-05, "loss": 2.2375, "step": 83460 }, { "epoch": 5.670947139557005, "grad_norm": 3.24882173538208, "learning_rate": 2.913779046066042e-05, "loss": 2.4958, "step": 83465 }, { "epoch": 5.671286859627667, "grad_norm": 3.2647311687469482, "learning_rate": 2.913354395977714e-05, "loss": 2.2254, "step": 83470 }, { "epoch": 5.671626579698328, "grad_norm": 3.8236329555511475, "learning_rate": 2.9129297458893873e-05, "loss": 2.1335, "step": 83475 }, { "epoch": 5.67196629976899, "grad_norm": 4.0317792892456055, "learning_rate": 2.91250509580106e-05, "loss": 2.1687, "step": 83480 }, { "epoch": 5.672306019839652, "grad_norm": 3.3878519535064697, "learning_rate": 2.9120804457127325e-05, "loss": 2.3711, "step": 83485 }, { "epoch": 5.672645739910314, "grad_norm": 2.4318466186523438, "learning_rate": 2.9116557956244057e-05, "loss": 2.4629, "step": 83490 }, { "epoch": 5.672985459980976, "grad_norm": 3.62178635597229, "learning_rate": 2.9112311455360785e-05, "loss": 2.1715, "step": 83495 }, { "epoch": 5.673325180051638, "grad_norm": 3.0876052379608154, "learning_rate": 2.910806495447751e-05, "loss": 2.3499, "step": 83500 }, { "epoch": 5.673664900122299, "grad_norm": 3.4889144897460938, "learning_rate": 2.910381845359424e-05, "loss": 2.1997, "step": 83505 }, { "epoch": 5.674004620192961, "grad_norm": 3.69244122505188, "learning_rate": 2.909957195271097e-05, "loss": 2.3036, "step": 83510 }, { "epoch": 5.674344340263623, "grad_norm": 4.208983898162842, "learning_rate": 2.9095325451827694e-05, "loss": 2.4144, "step": 83515 }, { "epoch": 5.674684060334284, "grad_norm": 2.922964572906494, "learning_rate": 2.909107895094442e-05, "loss": 2.1799, "step": 83520 }, { "epoch": 5.675023780404946, "grad_norm": 2.911097288131714, "learning_rate": 2.9086832450061153e-05, "loss": 2.0096, "step": 83525 }, { "epoch": 5.6753635004756084, "grad_norm": 3.3880116939544678, "learning_rate": 2.9082585949177878e-05, "loss": 2.3474, "step": 83530 }, { "epoch": 5.67570322054627, "grad_norm": 4.070088863372803, "learning_rate": 2.9078339448294606e-05, "loss": 2.3105, "step": 83535 }, { "epoch": 5.676042940616932, "grad_norm": 4.041894435882568, "learning_rate": 2.9074092947411337e-05, "loss": 2.1916, "step": 83540 }, { "epoch": 5.676382660687594, "grad_norm": 3.4836254119873047, "learning_rate": 2.9069846446528058e-05, "loss": 2.086, "step": 83545 }, { "epoch": 5.676722380758255, "grad_norm": 3.2360284328460693, "learning_rate": 2.906559994564479e-05, "loss": 1.9833, "step": 83550 }, { "epoch": 5.677062100828917, "grad_norm": 3.327098846435547, "learning_rate": 2.9061353444761518e-05, "loss": 2.2087, "step": 83555 }, { "epoch": 5.677401820899579, "grad_norm": 2.8274753093719482, "learning_rate": 2.905710694387825e-05, "loss": 1.9753, "step": 83560 }, { "epoch": 5.67774154097024, "grad_norm": 4.066274642944336, "learning_rate": 2.9052860442994974e-05, "loss": 2.3146, "step": 83565 }, { "epoch": 5.678081261040902, "grad_norm": 4.750428199768066, "learning_rate": 2.90486139421117e-05, "loss": 2.4342, "step": 83570 }, { "epoch": 5.6784209811115645, "grad_norm": 3.751450538635254, "learning_rate": 2.9044367441228433e-05, "loss": 2.2876, "step": 83575 }, { "epoch": 5.678760701182226, "grad_norm": 2.8839290142059326, "learning_rate": 2.9040120940345154e-05, "loss": 2.2206, "step": 83580 }, { "epoch": 5.679100421252888, "grad_norm": 3.5557987689971924, "learning_rate": 2.9035874439461886e-05, "loss": 2.1241, "step": 83585 }, { "epoch": 5.67944014132355, "grad_norm": 3.192108631134033, "learning_rate": 2.9031627938578614e-05, "loss": 2.3193, "step": 83590 }, { "epoch": 5.679779861394211, "grad_norm": 2.9052014350891113, "learning_rate": 2.9027381437695338e-05, "loss": 2.2666, "step": 83595 }, { "epoch": 5.680119581464873, "grad_norm": 3.8048150539398193, "learning_rate": 2.902313493681207e-05, "loss": 2.1434, "step": 83600 }, { "epoch": 5.680459301535535, "grad_norm": 3.470987319946289, "learning_rate": 2.9018888435928798e-05, "loss": 2.2917, "step": 83605 }, { "epoch": 5.680799021606196, "grad_norm": 3.3053791522979736, "learning_rate": 2.9014641935045522e-05, "loss": 2.3589, "step": 83610 }, { "epoch": 5.681138741676858, "grad_norm": 3.3010430335998535, "learning_rate": 2.901039543416225e-05, "loss": 2.4115, "step": 83615 }, { "epoch": 5.6814784617475205, "grad_norm": 2.9843029975891113, "learning_rate": 2.900614893327898e-05, "loss": 2.3398, "step": 83620 }, { "epoch": 5.681818181818182, "grad_norm": 3.4661359786987305, "learning_rate": 2.9001902432395706e-05, "loss": 2.2369, "step": 83625 }, { "epoch": 5.682157901888844, "grad_norm": 3.757885217666626, "learning_rate": 2.8997655931512434e-05, "loss": 2.1682, "step": 83630 }, { "epoch": 5.682497621959506, "grad_norm": 3.018348455429077, "learning_rate": 2.8993409430629166e-05, "loss": 2.3336, "step": 83635 }, { "epoch": 5.682837342030167, "grad_norm": 4.257678031921387, "learning_rate": 2.898916292974589e-05, "loss": 2.3634, "step": 83640 }, { "epoch": 5.683177062100829, "grad_norm": 3.232816219329834, "learning_rate": 2.898491642886262e-05, "loss": 2.4626, "step": 83645 }, { "epoch": 5.683516782171491, "grad_norm": 3.1701951026916504, "learning_rate": 2.898066992797935e-05, "loss": 2.0429, "step": 83650 }, { "epoch": 5.683856502242152, "grad_norm": 4.4179463386535645, "learning_rate": 2.897642342709607e-05, "loss": 2.097, "step": 83655 }, { "epoch": 5.684196222312814, "grad_norm": 2.1099627017974854, "learning_rate": 2.8972176926212802e-05, "loss": 2.5568, "step": 83660 }, { "epoch": 5.6845359423834765, "grad_norm": 19.53952980041504, "learning_rate": 2.896793042532953e-05, "loss": 2.0738, "step": 83665 }, { "epoch": 5.684875662454138, "grad_norm": 2.7234020233154297, "learning_rate": 2.8963683924446255e-05, "loss": 2.1557, "step": 83670 }, { "epoch": 5.6852153825248, "grad_norm": 3.4339439868927, "learning_rate": 2.8959437423562986e-05, "loss": 2.2708, "step": 83675 }, { "epoch": 5.685555102595462, "grad_norm": 4.394687175750732, "learning_rate": 2.8955190922679714e-05, "loss": 2.3189, "step": 83680 }, { "epoch": 5.685894822666123, "grad_norm": 3.055567979812622, "learning_rate": 2.895094442179644e-05, "loss": 2.1218, "step": 83685 }, { "epoch": 5.686234542736785, "grad_norm": 3.7794442176818848, "learning_rate": 2.8946697920913167e-05, "loss": 2.1571, "step": 83690 }, { "epoch": 5.686574262807447, "grad_norm": 3.3657660484313965, "learning_rate": 2.89424514200299e-05, "loss": 2.1977, "step": 83695 }, { "epoch": 5.686913982878108, "grad_norm": 3.163048267364502, "learning_rate": 2.8938204919146623e-05, "loss": 2.2888, "step": 83700 }, { "epoch": 5.68725370294877, "grad_norm": 3.530571222305298, "learning_rate": 2.893395841826335e-05, "loss": 2.23, "step": 83705 }, { "epoch": 5.687593423019432, "grad_norm": 3.262362480163574, "learning_rate": 2.8929711917380082e-05, "loss": 2.0839, "step": 83710 }, { "epoch": 5.687933143090094, "grad_norm": 3.189279079437256, "learning_rate": 2.8925465416496804e-05, "loss": 2.1514, "step": 83715 }, { "epoch": 5.688272863160756, "grad_norm": 3.3296215534210205, "learning_rate": 2.8921218915613535e-05, "loss": 2.1913, "step": 83720 }, { "epoch": 5.688612583231417, "grad_norm": 3.9528727531433105, "learning_rate": 2.8916972414730263e-05, "loss": 2.2398, "step": 83725 }, { "epoch": 5.688952303302079, "grad_norm": 3.314027786254883, "learning_rate": 2.8912725913846994e-05, "loss": 2.4105, "step": 83730 }, { "epoch": 5.689292023372741, "grad_norm": 3.114600896835327, "learning_rate": 2.890847941296372e-05, "loss": 2.0601, "step": 83735 }, { "epoch": 5.689631743443402, "grad_norm": 3.4729397296905518, "learning_rate": 2.8904232912080447e-05, "loss": 2.2624, "step": 83740 }, { "epoch": 5.689971463514064, "grad_norm": 3.2948668003082275, "learning_rate": 2.889998641119718e-05, "loss": 2.3326, "step": 83745 }, { "epoch": 5.690311183584726, "grad_norm": 3.0000765323638916, "learning_rate": 2.8895739910313903e-05, "loss": 2.1775, "step": 83750 }, { "epoch": 5.690650903655388, "grad_norm": 4.249629497528076, "learning_rate": 2.889149340943063e-05, "loss": 2.2752, "step": 83755 }, { "epoch": 5.69099062372605, "grad_norm": 2.483470916748047, "learning_rate": 2.888724690854736e-05, "loss": 2.2386, "step": 83760 }, { "epoch": 5.691330343796712, "grad_norm": 3.5692591667175293, "learning_rate": 2.8883000407664084e-05, "loss": 2.3593, "step": 83765 }, { "epoch": 5.691670063867373, "grad_norm": 3.4216368198394775, "learning_rate": 2.8878753906780815e-05, "loss": 2.2457, "step": 83770 }, { "epoch": 5.692009783938035, "grad_norm": 2.6990766525268555, "learning_rate": 2.8874507405897543e-05, "loss": 2.2329, "step": 83775 }, { "epoch": 5.692349504008697, "grad_norm": 3.1588923931121826, "learning_rate": 2.8870260905014268e-05, "loss": 2.0651, "step": 83780 }, { "epoch": 5.692689224079358, "grad_norm": 3.5832042694091797, "learning_rate": 2.8866014404131e-05, "loss": 2.2715, "step": 83785 }, { "epoch": 5.69302894415002, "grad_norm": 3.5209152698516846, "learning_rate": 2.8861767903247727e-05, "loss": 1.9735, "step": 83790 }, { "epoch": 5.693368664220682, "grad_norm": 3.4581871032714844, "learning_rate": 2.8857521402364452e-05, "loss": 2.5161, "step": 83795 }, { "epoch": 5.693708384291344, "grad_norm": 4.047212600708008, "learning_rate": 2.885327490148118e-05, "loss": 2.212, "step": 83800 }, { "epoch": 5.694048104362006, "grad_norm": 2.88960337638855, "learning_rate": 2.884902840059791e-05, "loss": 2.1054, "step": 83805 }, { "epoch": 5.694387824432668, "grad_norm": 4.212184906005859, "learning_rate": 2.8844781899714636e-05, "loss": 2.3734, "step": 83810 }, { "epoch": 5.694727544503329, "grad_norm": 4.216958522796631, "learning_rate": 2.8840535398831364e-05, "loss": 2.4405, "step": 83815 }, { "epoch": 5.695067264573991, "grad_norm": 3.485245943069458, "learning_rate": 2.8836288897948095e-05, "loss": 2.3928, "step": 83820 }, { "epoch": 5.695406984644653, "grad_norm": 3.248995065689087, "learning_rate": 2.8832042397064816e-05, "loss": 2.1218, "step": 83825 }, { "epoch": 5.695746704715314, "grad_norm": 3.2794349193573, "learning_rate": 2.8827795896181548e-05, "loss": 2.2746, "step": 83830 }, { "epoch": 5.696086424785976, "grad_norm": 3.644214391708374, "learning_rate": 2.8823549395298276e-05, "loss": 2.3904, "step": 83835 }, { "epoch": 5.6964261448566385, "grad_norm": 3.279165744781494, "learning_rate": 2.8819302894415e-05, "loss": 2.1791, "step": 83840 }, { "epoch": 5.6967658649273, "grad_norm": 3.699246883392334, "learning_rate": 2.8815056393531732e-05, "loss": 2.1858, "step": 83845 }, { "epoch": 5.697105584997962, "grad_norm": 3.165346145629883, "learning_rate": 2.881080989264846e-05, "loss": 2.3609, "step": 83850 }, { "epoch": 5.697445305068624, "grad_norm": 4.825615406036377, "learning_rate": 2.8806563391765184e-05, "loss": 2.1875, "step": 83855 }, { "epoch": 5.697785025139285, "grad_norm": 4.264182090759277, "learning_rate": 2.8802316890881912e-05, "loss": 2.2627, "step": 83860 }, { "epoch": 5.698124745209947, "grad_norm": 4.185303688049316, "learning_rate": 2.8798070389998644e-05, "loss": 2.0967, "step": 83865 }, { "epoch": 5.698464465280609, "grad_norm": 2.7839677333831787, "learning_rate": 2.879382388911537e-05, "loss": 2.2197, "step": 83870 }, { "epoch": 5.69880418535127, "grad_norm": 3.353717565536499, "learning_rate": 2.8789577388232097e-05, "loss": 2.3584, "step": 83875 }, { "epoch": 5.699143905421932, "grad_norm": 3.6668882369995117, "learning_rate": 2.8785330887348828e-05, "loss": 2.3066, "step": 83880 }, { "epoch": 5.6994836254925945, "grad_norm": 3.6640737056732178, "learning_rate": 2.8781084386465553e-05, "loss": 2.167, "step": 83885 }, { "epoch": 5.699823345563256, "grad_norm": 3.0992729663848877, "learning_rate": 2.877683788558228e-05, "loss": 2.1628, "step": 83890 }, { "epoch": 5.700163065633918, "grad_norm": 2.3411636352539062, "learning_rate": 2.8772591384699012e-05, "loss": 2.2261, "step": 83895 }, { "epoch": 5.70050278570458, "grad_norm": 3.7378222942352295, "learning_rate": 2.876834488381574e-05, "loss": 2.2777, "step": 83900 }, { "epoch": 5.700842505775241, "grad_norm": 3.7835710048675537, "learning_rate": 2.8764098382932465e-05, "loss": 2.0633, "step": 83905 }, { "epoch": 5.701182225845903, "grad_norm": 3.9708871841430664, "learning_rate": 2.8759851882049193e-05, "loss": 2.3722, "step": 83910 }, { "epoch": 5.701521945916565, "grad_norm": 2.8471860885620117, "learning_rate": 2.8755605381165924e-05, "loss": 2.4493, "step": 83915 }, { "epoch": 5.701861665987226, "grad_norm": 3.5427496433258057, "learning_rate": 2.875135888028265e-05, "loss": 2.1525, "step": 83920 }, { "epoch": 5.702201386057888, "grad_norm": 3.346264362335205, "learning_rate": 2.8747112379399377e-05, "loss": 2.0949, "step": 83925 }, { "epoch": 5.70254110612855, "grad_norm": 3.340790271759033, "learning_rate": 2.8742865878516108e-05, "loss": 2.1836, "step": 83930 }, { "epoch": 5.702880826199212, "grad_norm": 2.829803228378296, "learning_rate": 2.873861937763283e-05, "loss": 2.2331, "step": 83935 }, { "epoch": 5.703220546269874, "grad_norm": 3.510205030441284, "learning_rate": 2.873437287674956e-05, "loss": 1.9726, "step": 83940 }, { "epoch": 5.703560266340535, "grad_norm": 3.0790655612945557, "learning_rate": 2.873012637586629e-05, "loss": 2.3968, "step": 83945 }, { "epoch": 5.703899986411197, "grad_norm": 4.146056652069092, "learning_rate": 2.8725879874983013e-05, "loss": 2.0483, "step": 83950 }, { "epoch": 5.704239706481859, "grad_norm": 3.7652323246002197, "learning_rate": 2.8721633374099745e-05, "loss": 2.2053, "step": 83955 }, { "epoch": 5.70457942655252, "grad_norm": 4.059263706207275, "learning_rate": 2.8717386873216473e-05, "loss": 2.1309, "step": 83960 }, { "epoch": 5.704919146623182, "grad_norm": 3.4144082069396973, "learning_rate": 2.8713140372333197e-05, "loss": 2.2547, "step": 83965 }, { "epoch": 5.705258866693844, "grad_norm": 3.97636079788208, "learning_rate": 2.8708893871449925e-05, "loss": 2.0319, "step": 83970 }, { "epoch": 5.705598586764506, "grad_norm": 3.665595293045044, "learning_rate": 2.8704647370566657e-05, "loss": 2.3039, "step": 83975 }, { "epoch": 5.705938306835168, "grad_norm": 3.193014621734619, "learning_rate": 2.870040086968338e-05, "loss": 2.2041, "step": 83980 }, { "epoch": 5.70627802690583, "grad_norm": 3.322288990020752, "learning_rate": 2.869615436880011e-05, "loss": 2.1959, "step": 83985 }, { "epoch": 5.706617746976491, "grad_norm": 3.503103256225586, "learning_rate": 2.869190786791684e-05, "loss": 2.2689, "step": 83990 }, { "epoch": 5.706957467047153, "grad_norm": 4.142388820648193, "learning_rate": 2.8687661367033565e-05, "loss": 2.2629, "step": 83995 }, { "epoch": 5.707297187117815, "grad_norm": 3.1871092319488525, "learning_rate": 2.8683414866150293e-05, "loss": 2.1546, "step": 84000 }, { "epoch": 5.707636907188476, "grad_norm": 3.307126522064209, "learning_rate": 2.867916836526702e-05, "loss": 2.0965, "step": 84005 }, { "epoch": 5.707976627259138, "grad_norm": 3.164590835571289, "learning_rate": 2.8674921864383746e-05, "loss": 2.3691, "step": 84010 }, { "epoch": 5.7083163473298, "grad_norm": 3.0714027881622314, "learning_rate": 2.8670675363500477e-05, "loss": 2.1748, "step": 84015 }, { "epoch": 5.708656067400462, "grad_norm": 3.7461419105529785, "learning_rate": 2.8666428862617205e-05, "loss": 2.2502, "step": 84020 }, { "epoch": 5.708995787471124, "grad_norm": 2.777510404586792, "learning_rate": 2.866218236173393e-05, "loss": 2.1501, "step": 84025 }, { "epoch": 5.709335507541786, "grad_norm": 3.2397994995117188, "learning_rate": 2.865793586085066e-05, "loss": 2.1693, "step": 84030 }, { "epoch": 5.709675227612447, "grad_norm": 3.2154541015625, "learning_rate": 2.865368935996739e-05, "loss": 2.421, "step": 84035 }, { "epoch": 5.710014947683109, "grad_norm": 3.5908150672912598, "learning_rate": 2.8649442859084114e-05, "loss": 2.3043, "step": 84040 }, { "epoch": 5.710354667753771, "grad_norm": 3.625786781311035, "learning_rate": 2.8645196358200842e-05, "loss": 2.4099, "step": 84045 }, { "epoch": 5.710694387824432, "grad_norm": 3.1602935791015625, "learning_rate": 2.8640949857317573e-05, "loss": 2.5831, "step": 84050 }, { "epoch": 5.711034107895094, "grad_norm": 3.7308199405670166, "learning_rate": 2.8636703356434298e-05, "loss": 2.4615, "step": 84055 }, { "epoch": 5.711373827965756, "grad_norm": 2.904672145843506, "learning_rate": 2.8632456855551026e-05, "loss": 2.1075, "step": 84060 }, { "epoch": 5.711713548036418, "grad_norm": 3.63032603263855, "learning_rate": 2.8628210354667757e-05, "loss": 2.2251, "step": 84065 }, { "epoch": 5.71205326810708, "grad_norm": 3.3420462608337402, "learning_rate": 2.8623963853784485e-05, "loss": 2.1503, "step": 84070 }, { "epoch": 5.712392988177742, "grad_norm": 3.8366124629974365, "learning_rate": 2.861971735290121e-05, "loss": 2.1466, "step": 84075 }, { "epoch": 5.712732708248403, "grad_norm": 2.8981926441192627, "learning_rate": 2.8615470852017938e-05, "loss": 2.5836, "step": 84080 }, { "epoch": 5.713072428319065, "grad_norm": 3.1137771606445312, "learning_rate": 2.861122435113467e-05, "loss": 2.1925, "step": 84085 }, { "epoch": 5.713412148389727, "grad_norm": 4.064597129821777, "learning_rate": 2.8606977850251394e-05, "loss": 2.206, "step": 84090 }, { "epoch": 5.713751868460388, "grad_norm": 3.045305013656616, "learning_rate": 2.8602731349368122e-05, "loss": 2.4316, "step": 84095 }, { "epoch": 5.71409158853105, "grad_norm": 2.831714630126953, "learning_rate": 2.8598484848484853e-05, "loss": 2.0786, "step": 84100 }, { "epoch": 5.7144313086017124, "grad_norm": 3.5025665760040283, "learning_rate": 2.8594238347601575e-05, "loss": 2.342, "step": 84105 }, { "epoch": 5.714771028672374, "grad_norm": 3.8419349193573, "learning_rate": 2.8589991846718306e-05, "loss": 2.2044, "step": 84110 }, { "epoch": 5.715110748743036, "grad_norm": 3.3725662231445312, "learning_rate": 2.8585745345835034e-05, "loss": 2.3584, "step": 84115 }, { "epoch": 5.715450468813698, "grad_norm": 3.410588502883911, "learning_rate": 2.858149884495176e-05, "loss": 2.1573, "step": 84120 }, { "epoch": 5.715790188884359, "grad_norm": 4.437833309173584, "learning_rate": 2.857725234406849e-05, "loss": 2.2862, "step": 84125 }, { "epoch": 5.716129908955021, "grad_norm": 4.138372421264648, "learning_rate": 2.8573005843185218e-05, "loss": 2.0928, "step": 84130 }, { "epoch": 5.716469629025683, "grad_norm": 3.22784686088562, "learning_rate": 2.8568759342301943e-05, "loss": 2.4369, "step": 84135 }, { "epoch": 5.716809349096344, "grad_norm": 2.792092800140381, "learning_rate": 2.8564512841418674e-05, "loss": 2.4771, "step": 84140 }, { "epoch": 5.717149069167006, "grad_norm": 3.701890230178833, "learning_rate": 2.8560266340535402e-05, "loss": 2.3087, "step": 84145 }, { "epoch": 5.7174887892376685, "grad_norm": 3.0113303661346436, "learning_rate": 2.8556019839652127e-05, "loss": 2.3575, "step": 84150 }, { "epoch": 5.71782850930833, "grad_norm": 3.044977903366089, "learning_rate": 2.8551773338768855e-05, "loss": 2.5105, "step": 84155 }, { "epoch": 5.718168229378992, "grad_norm": 3.9767258167266846, "learning_rate": 2.8547526837885586e-05, "loss": 2.1243, "step": 84160 }, { "epoch": 5.718507949449654, "grad_norm": 3.029747724533081, "learning_rate": 2.854328033700231e-05, "loss": 2.4432, "step": 84165 }, { "epoch": 5.718847669520315, "grad_norm": 3.530034065246582, "learning_rate": 2.853903383611904e-05, "loss": 2.3607, "step": 84170 }, { "epoch": 5.719187389590977, "grad_norm": 5.031120300292969, "learning_rate": 2.853478733523577e-05, "loss": 1.9488, "step": 84175 }, { "epoch": 5.719527109661639, "grad_norm": 3.5409393310546875, "learning_rate": 2.853054083435249e-05, "loss": 2.1176, "step": 84180 }, { "epoch": 5.7198668297323, "grad_norm": 2.86894154548645, "learning_rate": 2.8526294333469223e-05, "loss": 2.3498, "step": 84185 }, { "epoch": 5.720206549802962, "grad_norm": 2.943777322769165, "learning_rate": 2.852204783258595e-05, "loss": 2.3174, "step": 84190 }, { "epoch": 5.7205462698736245, "grad_norm": 3.4994606971740723, "learning_rate": 2.8517801331702675e-05, "loss": 2.1669, "step": 84195 }, { "epoch": 5.720885989944286, "grad_norm": 4.404800891876221, "learning_rate": 2.8513554830819407e-05, "loss": 2.2311, "step": 84200 }, { "epoch": 5.721225710014948, "grad_norm": 3.189845085144043, "learning_rate": 2.8509308329936135e-05, "loss": 2.0813, "step": 84205 }, { "epoch": 5.72156543008561, "grad_norm": 3.6155009269714355, "learning_rate": 2.850506182905286e-05, "loss": 2.2742, "step": 84210 }, { "epoch": 5.721905150156271, "grad_norm": 2.907681941986084, "learning_rate": 2.8500815328169587e-05, "loss": 2.3534, "step": 84215 }, { "epoch": 5.722244870226933, "grad_norm": 4.072706699371338, "learning_rate": 2.849656882728632e-05, "loss": 2.2596, "step": 84220 }, { "epoch": 5.722584590297595, "grad_norm": 3.3061294555664062, "learning_rate": 2.8492322326403044e-05, "loss": 2.3119, "step": 84225 }, { "epoch": 5.722924310368256, "grad_norm": 3.0371150970458984, "learning_rate": 2.848807582551977e-05, "loss": 2.419, "step": 84230 }, { "epoch": 5.723264030438918, "grad_norm": 2.737286329269409, "learning_rate": 2.8483829324636503e-05, "loss": 2.0848, "step": 84235 }, { "epoch": 5.7236037505095805, "grad_norm": 3.076308250427246, "learning_rate": 2.847958282375323e-05, "loss": 2.3641, "step": 84240 }, { "epoch": 5.723943470580242, "grad_norm": 3.980262517929077, "learning_rate": 2.8475336322869956e-05, "loss": 2.322, "step": 84245 }, { "epoch": 5.724283190650904, "grad_norm": 3.299724817276001, "learning_rate": 2.8471089821986684e-05, "loss": 2.129, "step": 84250 }, { "epoch": 5.724622910721566, "grad_norm": 3.5504038333892822, "learning_rate": 2.8466843321103415e-05, "loss": 2.0168, "step": 84255 }, { "epoch": 5.724962630792227, "grad_norm": 2.972827196121216, "learning_rate": 2.846259682022014e-05, "loss": 2.0265, "step": 84260 }, { "epoch": 5.725302350862889, "grad_norm": 3.31674861907959, "learning_rate": 2.8458350319336868e-05, "loss": 2.2681, "step": 84265 }, { "epoch": 5.725642070933551, "grad_norm": 3.3176348209381104, "learning_rate": 2.84541038184536e-05, "loss": 2.0867, "step": 84270 }, { "epoch": 5.725981791004212, "grad_norm": 3.171989917755127, "learning_rate": 2.8449857317570324e-05, "loss": 2.2706, "step": 84275 }, { "epoch": 5.726321511074874, "grad_norm": 3.416205406188965, "learning_rate": 2.844561081668705e-05, "loss": 2.2669, "step": 84280 }, { "epoch": 5.7266612311455365, "grad_norm": 2.9582154750823975, "learning_rate": 2.844136431580378e-05, "loss": 2.0827, "step": 84285 }, { "epoch": 5.727000951216198, "grad_norm": 2.8595118522644043, "learning_rate": 2.8437117814920504e-05, "loss": 2.3682, "step": 84290 }, { "epoch": 5.72734067128686, "grad_norm": 3.356140613555908, "learning_rate": 2.8432871314037236e-05, "loss": 2.0449, "step": 84295 }, { "epoch": 5.727680391357522, "grad_norm": 2.7691872119903564, "learning_rate": 2.8428624813153964e-05, "loss": 2.2222, "step": 84300 }, { "epoch": 5.728020111428183, "grad_norm": 3.1028401851654053, "learning_rate": 2.8424378312270688e-05, "loss": 2.2087, "step": 84305 }, { "epoch": 5.728359831498845, "grad_norm": 3.8705694675445557, "learning_rate": 2.842013181138742e-05, "loss": 2.2857, "step": 84310 }, { "epoch": 5.728699551569507, "grad_norm": 3.7374119758605957, "learning_rate": 2.8415885310504148e-05, "loss": 2.2615, "step": 84315 }, { "epoch": 5.729039271640168, "grad_norm": 4.585919380187988, "learning_rate": 2.8411638809620872e-05, "loss": 2.1873, "step": 84320 }, { "epoch": 5.72937899171083, "grad_norm": 3.3506293296813965, "learning_rate": 2.84073923087376e-05, "loss": 2.0195, "step": 84325 }, { "epoch": 5.7297187117814925, "grad_norm": 4.0165114402771, "learning_rate": 2.840314580785433e-05, "loss": 2.2418, "step": 84330 }, { "epoch": 5.730058431852154, "grad_norm": 3.830153703689575, "learning_rate": 2.8398899306971056e-05, "loss": 2.1098, "step": 84335 }, { "epoch": 5.730398151922816, "grad_norm": 3.1141295433044434, "learning_rate": 2.8394652806087784e-05, "loss": 2.1649, "step": 84340 }, { "epoch": 5.730737871993478, "grad_norm": 4.277921676635742, "learning_rate": 2.8390406305204516e-05, "loss": 2.3788, "step": 84345 }, { "epoch": 5.731077592064139, "grad_norm": 3.614999294281006, "learning_rate": 2.8386159804321237e-05, "loss": 2.0941, "step": 84350 }, { "epoch": 5.731417312134801, "grad_norm": 3.1294174194335938, "learning_rate": 2.838191330343797e-05, "loss": 2.3269, "step": 84355 }, { "epoch": 5.731757032205463, "grad_norm": 3.4935691356658936, "learning_rate": 2.8377666802554696e-05, "loss": 2.0272, "step": 84360 }, { "epoch": 5.732096752276124, "grad_norm": 4.025780200958252, "learning_rate": 2.837342030167142e-05, "loss": 2.2099, "step": 84365 }, { "epoch": 5.732436472346786, "grad_norm": 3.475473642349243, "learning_rate": 2.8369173800788152e-05, "loss": 2.04, "step": 84370 }, { "epoch": 5.7327761924174485, "grad_norm": 3.553124189376831, "learning_rate": 2.836492729990488e-05, "loss": 2.1658, "step": 84375 }, { "epoch": 5.73311591248811, "grad_norm": 3.754654884338379, "learning_rate": 2.8360680799021605e-05, "loss": 2.0489, "step": 84380 }, { "epoch": 5.733455632558772, "grad_norm": 3.2970340251922607, "learning_rate": 2.8356434298138333e-05, "loss": 2.1578, "step": 84385 }, { "epoch": 5.733795352629433, "grad_norm": 4.647688388824463, "learning_rate": 2.8352187797255064e-05, "loss": 2.5696, "step": 84390 }, { "epoch": 5.734135072700095, "grad_norm": 4.787831783294678, "learning_rate": 2.834794129637179e-05, "loss": 2.3597, "step": 84395 }, { "epoch": 5.734474792770757, "grad_norm": 3.3990390300750732, "learning_rate": 2.8343694795488517e-05, "loss": 2.2705, "step": 84400 }, { "epoch": 5.734814512841418, "grad_norm": 3.8828070163726807, "learning_rate": 2.833944829460525e-05, "loss": 1.8584, "step": 84405 }, { "epoch": 5.73515423291208, "grad_norm": 2.639399766921997, "learning_rate": 2.8335201793721976e-05, "loss": 2.6361, "step": 84410 }, { "epoch": 5.7354939529827424, "grad_norm": 3.4691834449768066, "learning_rate": 2.83309552928387e-05, "loss": 2.1762, "step": 84415 }, { "epoch": 5.735833673053404, "grad_norm": 3.4190945625305176, "learning_rate": 2.8326708791955432e-05, "loss": 2.3279, "step": 84420 }, { "epoch": 5.736173393124066, "grad_norm": 3.2290360927581787, "learning_rate": 2.832246229107216e-05, "loss": 2.5088, "step": 84425 }, { "epoch": 5.736513113194728, "grad_norm": 3.7406930923461914, "learning_rate": 2.8318215790188885e-05, "loss": 2.2372, "step": 84430 }, { "epoch": 5.736852833265389, "grad_norm": 3.9873099327087402, "learning_rate": 2.8313969289305613e-05, "loss": 2.08, "step": 84435 }, { "epoch": 5.737192553336051, "grad_norm": 3.2234549522399902, "learning_rate": 2.8309722788422344e-05, "loss": 2.4354, "step": 84440 }, { "epoch": 5.737532273406713, "grad_norm": 3.6495957374572754, "learning_rate": 2.830547628753907e-05, "loss": 2.0506, "step": 84445 }, { "epoch": 5.737871993477374, "grad_norm": 4.052278995513916, "learning_rate": 2.8301229786655797e-05, "loss": 2.4089, "step": 84450 }, { "epoch": 5.738211713548036, "grad_norm": 3.1180484294891357, "learning_rate": 2.829698328577253e-05, "loss": 2.2518, "step": 84455 }, { "epoch": 5.7385514336186985, "grad_norm": 3.1129417419433594, "learning_rate": 2.829273678488925e-05, "loss": 2.3442, "step": 84460 }, { "epoch": 5.73889115368936, "grad_norm": 4.065718650817871, "learning_rate": 2.828849028400598e-05, "loss": 2.1881, "step": 84465 }, { "epoch": 5.739230873760022, "grad_norm": 3.844403028488159, "learning_rate": 2.828424378312271e-05, "loss": 2.4279, "step": 84470 }, { "epoch": 5.739570593830684, "grad_norm": 3.3551838397979736, "learning_rate": 2.8279997282239434e-05, "loss": 2.4071, "step": 84475 }, { "epoch": 5.739910313901345, "grad_norm": 4.454732894897461, "learning_rate": 2.8275750781356165e-05, "loss": 2.0587, "step": 84480 }, { "epoch": 5.740250033972007, "grad_norm": 3.138118267059326, "learning_rate": 2.8271504280472893e-05, "loss": 2.1967, "step": 84485 }, { "epoch": 5.740589754042669, "grad_norm": 4.204995155334473, "learning_rate": 2.8267257779589618e-05, "loss": 2.5311, "step": 84490 }, { "epoch": 5.74092947411333, "grad_norm": 2.881744623184204, "learning_rate": 2.8263011278706346e-05, "loss": 2.2992, "step": 84495 }, { "epoch": 5.741269194183992, "grad_norm": 3.2229557037353516, "learning_rate": 2.8258764777823077e-05, "loss": 2.2358, "step": 84500 }, { "epoch": 5.7416089142546545, "grad_norm": 3.3369388580322266, "learning_rate": 2.8254518276939802e-05, "loss": 2.0127, "step": 84505 }, { "epoch": 5.741948634325316, "grad_norm": 4.357918739318848, "learning_rate": 2.825027177605653e-05, "loss": 2.1028, "step": 84510 }, { "epoch": 5.742288354395978, "grad_norm": 3.102015495300293, "learning_rate": 2.824602527517326e-05, "loss": 2.389, "step": 84515 }, { "epoch": 5.74262807446664, "grad_norm": 3.513890027999878, "learning_rate": 2.8241778774289986e-05, "loss": 2.0819, "step": 84520 }, { "epoch": 5.742967794537301, "grad_norm": 2.801021099090576, "learning_rate": 2.8237532273406714e-05, "loss": 2.3639, "step": 84525 }, { "epoch": 5.743307514607963, "grad_norm": 2.662684917449951, "learning_rate": 2.8233285772523442e-05, "loss": 2.3427, "step": 84530 }, { "epoch": 5.743647234678625, "grad_norm": 3.4459469318389893, "learning_rate": 2.8229039271640166e-05, "loss": 2.3299, "step": 84535 }, { "epoch": 5.743986954749286, "grad_norm": 3.097358465194702, "learning_rate": 2.8224792770756898e-05, "loss": 2.4521, "step": 84540 }, { "epoch": 5.744326674819948, "grad_norm": 4.13820743560791, "learning_rate": 2.8220546269873626e-05, "loss": 2.3405, "step": 84545 }, { "epoch": 5.7446663948906105, "grad_norm": 3.648144006729126, "learning_rate": 2.821629976899035e-05, "loss": 2.2422, "step": 84550 }, { "epoch": 5.745006114961272, "grad_norm": 3.430701732635498, "learning_rate": 2.8212053268107082e-05, "loss": 2.1248, "step": 84555 }, { "epoch": 5.745345835031934, "grad_norm": 3.092336416244507, "learning_rate": 2.820780676722381e-05, "loss": 2.4152, "step": 84560 }, { "epoch": 5.745685555102596, "grad_norm": 4.072218418121338, "learning_rate": 2.8203560266340534e-05, "loss": 2.0674, "step": 84565 }, { "epoch": 5.746025275173257, "grad_norm": 3.1092498302459717, "learning_rate": 2.8199313765457262e-05, "loss": 2.4378, "step": 84570 }, { "epoch": 5.746364995243919, "grad_norm": 3.916783571243286, "learning_rate": 2.8195067264573994e-05, "loss": 2.2883, "step": 84575 }, { "epoch": 5.746704715314581, "grad_norm": 3.244659185409546, "learning_rate": 2.8190820763690722e-05, "loss": 2.1794, "step": 84580 }, { "epoch": 5.747044435385242, "grad_norm": 4.011977195739746, "learning_rate": 2.8186574262807447e-05, "loss": 2.4431, "step": 84585 }, { "epoch": 5.747384155455904, "grad_norm": 3.6680612564086914, "learning_rate": 2.8182327761924178e-05, "loss": 2.6076, "step": 84590 }, { "epoch": 5.7477238755265665, "grad_norm": 4.131917953491211, "learning_rate": 2.8178081261040906e-05, "loss": 2.2486, "step": 84595 }, { "epoch": 5.748063595597228, "grad_norm": 3.2999751567840576, "learning_rate": 2.817383476015763e-05, "loss": 1.78, "step": 84600 }, { "epoch": 5.74840331566789, "grad_norm": 2.8945271968841553, "learning_rate": 2.816958825927436e-05, "loss": 2.3661, "step": 84605 }, { "epoch": 5.748743035738551, "grad_norm": 3.1270198822021484, "learning_rate": 2.816534175839109e-05, "loss": 2.1919, "step": 84610 }, { "epoch": 5.749082755809213, "grad_norm": 3.4371285438537598, "learning_rate": 2.8161095257507815e-05, "loss": 2.2432, "step": 84615 }, { "epoch": 5.749422475879875, "grad_norm": 2.9236679077148438, "learning_rate": 2.8156848756624543e-05, "loss": 2.0887, "step": 84620 }, { "epoch": 5.749762195950536, "grad_norm": 3.2709460258483887, "learning_rate": 2.8152602255741274e-05, "loss": 2.3984, "step": 84625 }, { "epoch": 5.750101916021198, "grad_norm": 3.9020373821258545, "learning_rate": 2.8148355754857995e-05, "loss": 2.1996, "step": 84630 }, { "epoch": 5.75044163609186, "grad_norm": 3.263188123703003, "learning_rate": 2.8144109253974727e-05, "loss": 2.091, "step": 84635 }, { "epoch": 5.750781356162522, "grad_norm": 2.946277141571045, "learning_rate": 2.8139862753091455e-05, "loss": 2.1218, "step": 84640 }, { "epoch": 5.751121076233184, "grad_norm": 2.8939602375030518, "learning_rate": 2.813561625220818e-05, "loss": 2.2802, "step": 84645 }, { "epoch": 5.751460796303846, "grad_norm": 2.8204612731933594, "learning_rate": 2.813136975132491e-05, "loss": 2.0956, "step": 84650 }, { "epoch": 5.751800516374507, "grad_norm": 3.7071776390075684, "learning_rate": 2.812712325044164e-05, "loss": 2.3209, "step": 84655 }, { "epoch": 5.752140236445169, "grad_norm": 3.2131974697113037, "learning_rate": 2.8122876749558363e-05, "loss": 2.4911, "step": 84660 }, { "epoch": 5.752479956515831, "grad_norm": 3.720069408416748, "learning_rate": 2.8118630248675095e-05, "loss": 2.1328, "step": 84665 }, { "epoch": 5.752819676586492, "grad_norm": 3.429050922393799, "learning_rate": 2.8114383747791823e-05, "loss": 2.2838, "step": 84670 }, { "epoch": 5.753159396657154, "grad_norm": 3.0246987342834473, "learning_rate": 2.8110137246908547e-05, "loss": 2.1471, "step": 84675 }, { "epoch": 5.753499116727816, "grad_norm": 3.989656448364258, "learning_rate": 2.8105890746025275e-05, "loss": 2.188, "step": 84680 }, { "epoch": 5.753838836798478, "grad_norm": 3.7293965816497803, "learning_rate": 2.8101644245142007e-05, "loss": 2.0304, "step": 84685 }, { "epoch": 5.75417855686914, "grad_norm": 3.4035139083862305, "learning_rate": 2.809739774425873e-05, "loss": 2.3142, "step": 84690 }, { "epoch": 5.754518276939802, "grad_norm": 4.549188137054443, "learning_rate": 2.809315124337546e-05, "loss": 2.2669, "step": 84695 }, { "epoch": 5.754857997010463, "grad_norm": 4.414787769317627, "learning_rate": 2.808890474249219e-05, "loss": 2.0049, "step": 84700 }, { "epoch": 5.755197717081125, "grad_norm": 3.717615842819214, "learning_rate": 2.8084658241608912e-05, "loss": 2.1882, "step": 84705 }, { "epoch": 5.755537437151787, "grad_norm": 3.5195472240448, "learning_rate": 2.8080411740725643e-05, "loss": 2.1957, "step": 84710 }, { "epoch": 5.755877157222448, "grad_norm": 3.7711920738220215, "learning_rate": 2.807616523984237e-05, "loss": 1.8677, "step": 84715 }, { "epoch": 5.75621687729311, "grad_norm": 4.093295574188232, "learning_rate": 2.8071918738959096e-05, "loss": 2.314, "step": 84720 }, { "epoch": 5.7565565973637725, "grad_norm": 3.6630375385284424, "learning_rate": 2.8067672238075827e-05, "loss": 2.1123, "step": 84725 }, { "epoch": 5.756896317434434, "grad_norm": 2.9698643684387207, "learning_rate": 2.8063425737192555e-05, "loss": 2.0283, "step": 84730 }, { "epoch": 5.757236037505096, "grad_norm": 3.0136513710021973, "learning_rate": 2.805917923630928e-05, "loss": 2.1734, "step": 84735 }, { "epoch": 5.757575757575758, "grad_norm": 3.529463768005371, "learning_rate": 2.8054932735426008e-05, "loss": 2.1576, "step": 84740 }, { "epoch": 5.757915477646419, "grad_norm": 3.662940740585327, "learning_rate": 2.805068623454274e-05, "loss": 2.1146, "step": 84745 }, { "epoch": 5.758255197717081, "grad_norm": 2.5943708419799805, "learning_rate": 2.8046439733659467e-05, "loss": 2.4016, "step": 84750 }, { "epoch": 5.758594917787743, "grad_norm": 3.3345401287078857, "learning_rate": 2.8042193232776192e-05, "loss": 2.5776, "step": 84755 }, { "epoch": 5.758934637858404, "grad_norm": 3.5759644508361816, "learning_rate": 2.8037946731892923e-05, "loss": 2.2618, "step": 84760 }, { "epoch": 5.759274357929066, "grad_norm": 4.008590221405029, "learning_rate": 2.803370023100965e-05, "loss": 2.2509, "step": 84765 }, { "epoch": 5.7596140779997285, "grad_norm": 3.521469831466675, "learning_rate": 2.8029453730126376e-05, "loss": 2.0936, "step": 84770 }, { "epoch": 5.75995379807039, "grad_norm": 3.587881326675415, "learning_rate": 2.8025207229243104e-05, "loss": 1.9496, "step": 84775 }, { "epoch": 5.760293518141052, "grad_norm": 3.9994404315948486, "learning_rate": 2.8020960728359835e-05, "loss": 2.4416, "step": 84780 }, { "epoch": 5.760633238211714, "grad_norm": 3.4931209087371826, "learning_rate": 2.801671422747656e-05, "loss": 2.3227, "step": 84785 }, { "epoch": 5.760972958282375, "grad_norm": 3.327747106552124, "learning_rate": 2.8012467726593288e-05, "loss": 1.9709, "step": 84790 }, { "epoch": 5.761312678353037, "grad_norm": 3.4535980224609375, "learning_rate": 2.800822122571002e-05, "loss": 2.0642, "step": 84795 }, { "epoch": 5.761652398423699, "grad_norm": 3.3517305850982666, "learning_rate": 2.8003974724826744e-05, "loss": 2.0777, "step": 84800 }, { "epoch": 5.76199211849436, "grad_norm": 3.5379981994628906, "learning_rate": 2.7999728223943472e-05, "loss": 1.906, "step": 84805 }, { "epoch": 5.762331838565022, "grad_norm": 3.2173731327056885, "learning_rate": 2.7995481723060203e-05, "loss": 2.0095, "step": 84810 }, { "epoch": 5.7626715586356845, "grad_norm": 3.48585844039917, "learning_rate": 2.7991235222176925e-05, "loss": 2.2612, "step": 84815 }, { "epoch": 5.763011278706346, "grad_norm": 3.5515804290771484, "learning_rate": 2.7986988721293656e-05, "loss": 2.4933, "step": 84820 }, { "epoch": 5.763350998777008, "grad_norm": 2.753844976425171, "learning_rate": 2.7982742220410384e-05, "loss": 2.248, "step": 84825 }, { "epoch": 5.76369071884767, "grad_norm": 3.754443645477295, "learning_rate": 2.797849571952711e-05, "loss": 2.2341, "step": 84830 }, { "epoch": 5.764030438918331, "grad_norm": 3.208702325820923, "learning_rate": 2.797424921864384e-05, "loss": 2.3386, "step": 84835 }, { "epoch": 5.764370158988993, "grad_norm": 3.4825925827026367, "learning_rate": 2.7970002717760568e-05, "loss": 2.2264, "step": 84840 }, { "epoch": 5.764709879059655, "grad_norm": 3.699972152709961, "learning_rate": 2.7965756216877293e-05, "loss": 2.2161, "step": 84845 }, { "epoch": 5.765049599130316, "grad_norm": 3.941486358642578, "learning_rate": 2.796150971599402e-05, "loss": 2.1865, "step": 84850 }, { "epoch": 5.765389319200978, "grad_norm": 3.2960495948791504, "learning_rate": 2.7957263215110752e-05, "loss": 2.3331, "step": 84855 }, { "epoch": 5.7657290392716405, "grad_norm": 3.1342294216156006, "learning_rate": 2.7953016714227477e-05, "loss": 2.3173, "step": 84860 }, { "epoch": 5.766068759342302, "grad_norm": 2.9284143447875977, "learning_rate": 2.7948770213344205e-05, "loss": 2.3379, "step": 84865 }, { "epoch": 5.766408479412964, "grad_norm": 2.971214532852173, "learning_rate": 2.7944523712460936e-05, "loss": 2.3339, "step": 84870 }, { "epoch": 5.766748199483626, "grad_norm": 3.9457008838653564, "learning_rate": 2.7940277211577657e-05, "loss": 2.2808, "step": 84875 }, { "epoch": 5.767087919554287, "grad_norm": 2.9029784202575684, "learning_rate": 2.793603071069439e-05, "loss": 2.2586, "step": 84880 }, { "epoch": 5.767427639624949, "grad_norm": 3.376260280609131, "learning_rate": 2.7931784209811117e-05, "loss": 2.1276, "step": 84885 }, { "epoch": 5.767767359695611, "grad_norm": 4.421034336090088, "learning_rate": 2.792753770892784e-05, "loss": 2.2292, "step": 84890 }, { "epoch": 5.768107079766272, "grad_norm": 4.194605350494385, "learning_rate": 2.7923291208044573e-05, "loss": 2.1402, "step": 84895 }, { "epoch": 5.768446799836934, "grad_norm": 3.6698083877563477, "learning_rate": 2.79190447071613e-05, "loss": 2.1933, "step": 84900 }, { "epoch": 5.7687865199075965, "grad_norm": 3.1016528606414795, "learning_rate": 2.7914798206278025e-05, "loss": 2.2796, "step": 84905 }, { "epoch": 5.769126239978258, "grad_norm": 2.8903706073760986, "learning_rate": 2.7910551705394757e-05, "loss": 2.2993, "step": 84910 }, { "epoch": 5.76946596004892, "grad_norm": 4.2362895011901855, "learning_rate": 2.7906305204511485e-05, "loss": 2.0574, "step": 84915 }, { "epoch": 5.769805680119582, "grad_norm": 3.349593162536621, "learning_rate": 2.7902058703628213e-05, "loss": 2.1791, "step": 84920 }, { "epoch": 5.770145400190243, "grad_norm": 3.7276597023010254, "learning_rate": 2.7897812202744937e-05, "loss": 2.2792, "step": 84925 }, { "epoch": 5.770485120260905, "grad_norm": 3.022568464279175, "learning_rate": 2.789356570186167e-05, "loss": 2.2053, "step": 84930 }, { "epoch": 5.770824840331567, "grad_norm": 3.5843608379364014, "learning_rate": 2.7889319200978397e-05, "loss": 2.5671, "step": 84935 }, { "epoch": 5.771164560402228, "grad_norm": 3.3021323680877686, "learning_rate": 2.788507270009512e-05, "loss": 2.3928, "step": 84940 }, { "epoch": 5.77150428047289, "grad_norm": 3.20967173576355, "learning_rate": 2.7880826199211853e-05, "loss": 2.2952, "step": 84945 }, { "epoch": 5.7718440005435525, "grad_norm": 3.4222772121429443, "learning_rate": 2.787657969832858e-05, "loss": 2.3823, "step": 84950 }, { "epoch": 5.772183720614214, "grad_norm": 2.5646705627441406, "learning_rate": 2.7872333197445306e-05, "loss": 2.2483, "step": 84955 }, { "epoch": 5.772523440684876, "grad_norm": 3.2462518215179443, "learning_rate": 2.7868086696562034e-05, "loss": 2.2849, "step": 84960 }, { "epoch": 5.772863160755538, "grad_norm": 3.0610954761505127, "learning_rate": 2.7863840195678765e-05, "loss": 2.2368, "step": 84965 }, { "epoch": 5.773202880826199, "grad_norm": 3.025944471359253, "learning_rate": 2.785959369479549e-05, "loss": 2.2645, "step": 84970 }, { "epoch": 5.773542600896861, "grad_norm": 3.011298656463623, "learning_rate": 2.7855347193912218e-05, "loss": 2.2478, "step": 84975 }, { "epoch": 5.773882320967523, "grad_norm": 3.557265281677246, "learning_rate": 2.785110069302895e-05, "loss": 2.2179, "step": 84980 }, { "epoch": 5.774222041038184, "grad_norm": 3.513004779815674, "learning_rate": 2.784685419214567e-05, "loss": 2.1403, "step": 84985 }, { "epoch": 5.7745617611088464, "grad_norm": 3.60211181640625, "learning_rate": 2.78426076912624e-05, "loss": 2.3333, "step": 84990 }, { "epoch": 5.7749014811795085, "grad_norm": 3.302156448364258, "learning_rate": 2.783836119037913e-05, "loss": 2.168, "step": 84995 }, { "epoch": 5.77524120125017, "grad_norm": 2.8632473945617676, "learning_rate": 2.7834114689495854e-05, "loss": 2.2913, "step": 85000 }, { "epoch": 5.775580921320832, "grad_norm": 3.092498302459717, "learning_rate": 2.7829868188612586e-05, "loss": 2.0447, "step": 85005 }, { "epoch": 5.775920641391494, "grad_norm": 3.5337843894958496, "learning_rate": 2.7825621687729314e-05, "loss": 2.3968, "step": 85010 }, { "epoch": 5.776260361462155, "grad_norm": 2.8990559577941895, "learning_rate": 2.7821375186846038e-05, "loss": 2.4026, "step": 85015 }, { "epoch": 5.776600081532817, "grad_norm": 3.302550792694092, "learning_rate": 2.7817128685962766e-05, "loss": 2.2284, "step": 85020 }, { "epoch": 5.776939801603479, "grad_norm": 2.92038631439209, "learning_rate": 2.7812882185079498e-05, "loss": 2.0696, "step": 85025 }, { "epoch": 5.77727952167414, "grad_norm": 2.947786808013916, "learning_rate": 2.780948498437288e-05, "loss": 2.2206, "step": 85030 }, { "epoch": 5.7776192417448025, "grad_norm": 2.944808006286621, "learning_rate": 2.7805238483489605e-05, "loss": 2.3303, "step": 85035 }, { "epoch": 5.7779589618154645, "grad_norm": 4.697010040283203, "learning_rate": 2.7800991982606333e-05, "loss": 2.1122, "step": 85040 }, { "epoch": 5.778298681886126, "grad_norm": 3.2123308181762695, "learning_rate": 2.7796745481723064e-05, "loss": 2.4404, "step": 85045 }, { "epoch": 5.778638401956788, "grad_norm": 3.456862449645996, "learning_rate": 2.779249898083979e-05, "loss": 1.998, "step": 85050 }, { "epoch": 5.77897812202745, "grad_norm": 3.5251917839050293, "learning_rate": 2.7788252479956517e-05, "loss": 2.3467, "step": 85055 }, { "epoch": 5.779317842098111, "grad_norm": 4.380144119262695, "learning_rate": 2.7784005979073248e-05, "loss": 2.1907, "step": 85060 }, { "epoch": 5.779657562168773, "grad_norm": 3.73630428314209, "learning_rate": 2.777975947818997e-05, "loss": 2.3982, "step": 85065 }, { "epoch": 5.779997282239434, "grad_norm": 2.868574857711792, "learning_rate": 2.77755129773067e-05, "loss": 2.066, "step": 85070 }, { "epoch": 5.780337002310096, "grad_norm": 3.989786386489868, "learning_rate": 2.777126647642343e-05, "loss": 2.036, "step": 85075 }, { "epoch": 5.7806767223807585, "grad_norm": 3.8695030212402344, "learning_rate": 2.7767019975540153e-05, "loss": 2.2664, "step": 85080 }, { "epoch": 5.78101644245142, "grad_norm": 3.890030860900879, "learning_rate": 2.7762773474656885e-05, "loss": 2.2797, "step": 85085 }, { "epoch": 5.781356162522082, "grad_norm": 3.606571912765503, "learning_rate": 2.7758526973773613e-05, "loss": 2.4428, "step": 85090 }, { "epoch": 5.781695882592744, "grad_norm": 3.659325361251831, "learning_rate": 2.7754280472890337e-05, "loss": 2.4713, "step": 85095 }, { "epoch": 5.782035602663405, "grad_norm": 3.282104730606079, "learning_rate": 2.7750033972007065e-05, "loss": 2.0713, "step": 85100 }, { "epoch": 5.782375322734067, "grad_norm": 4.620589733123779, "learning_rate": 2.7745787471123797e-05, "loss": 2.137, "step": 85105 }, { "epoch": 5.782715042804729, "grad_norm": 2.6831343173980713, "learning_rate": 2.774154097024052e-05, "loss": 2.3955, "step": 85110 }, { "epoch": 5.78305476287539, "grad_norm": 3.572638750076294, "learning_rate": 2.773729446935725e-05, "loss": 2.3093, "step": 85115 }, { "epoch": 5.783394482946052, "grad_norm": 3.6608121395111084, "learning_rate": 2.773304796847398e-05, "loss": 2.1562, "step": 85120 }, { "epoch": 5.7837342030167145, "grad_norm": 3.166841506958008, "learning_rate": 2.772880146759071e-05, "loss": 2.2589, "step": 85125 }, { "epoch": 5.784073923087376, "grad_norm": 3.308326005935669, "learning_rate": 2.7724554966707433e-05, "loss": 2.1975, "step": 85130 }, { "epoch": 5.784413643158038, "grad_norm": 3.032482147216797, "learning_rate": 2.772030846582416e-05, "loss": 2.4534, "step": 85135 }, { "epoch": 5.7847533632287, "grad_norm": 3.221473217010498, "learning_rate": 2.7716061964940893e-05, "loss": 2.2587, "step": 85140 }, { "epoch": 5.785093083299361, "grad_norm": 3.4149718284606934, "learning_rate": 2.7711815464057617e-05, "loss": 2.393, "step": 85145 }, { "epoch": 5.785432803370023, "grad_norm": 3.4252264499664307, "learning_rate": 2.7707568963174345e-05, "loss": 2.2057, "step": 85150 }, { "epoch": 5.785772523440685, "grad_norm": 4.22106409072876, "learning_rate": 2.7703322462291077e-05, "loss": 2.192, "step": 85155 }, { "epoch": 5.786112243511346, "grad_norm": 3.2930195331573486, "learning_rate": 2.76990759614078e-05, "loss": 2.2816, "step": 85160 }, { "epoch": 5.786451963582008, "grad_norm": 3.3582961559295654, "learning_rate": 2.769482946052453e-05, "loss": 2.2123, "step": 85165 }, { "epoch": 5.7867916836526705, "grad_norm": 3.339395523071289, "learning_rate": 2.7690582959641257e-05, "loss": 2.1537, "step": 85170 }, { "epoch": 5.787131403723332, "grad_norm": 3.239058256149292, "learning_rate": 2.7686336458757982e-05, "loss": 2.1843, "step": 85175 }, { "epoch": 5.787471123793994, "grad_norm": 3.137690544128418, "learning_rate": 2.7682089957874713e-05, "loss": 2.3734, "step": 85180 }, { "epoch": 5.787810843864656, "grad_norm": 3.2361109256744385, "learning_rate": 2.767784345699144e-05, "loss": 2.1659, "step": 85185 }, { "epoch": 5.788150563935317, "grad_norm": 3.143954038619995, "learning_rate": 2.7673596956108166e-05, "loss": 2.5268, "step": 85190 }, { "epoch": 5.788490284005979, "grad_norm": 3.415630578994751, "learning_rate": 2.7669350455224898e-05, "loss": 2.272, "step": 85195 }, { "epoch": 5.788830004076641, "grad_norm": 3.3853814601898193, "learning_rate": 2.7665103954341626e-05, "loss": 2.0947, "step": 85200 }, { "epoch": 5.789169724147302, "grad_norm": 3.529521942138672, "learning_rate": 2.766085745345835e-05, "loss": 2.4323, "step": 85205 }, { "epoch": 5.789509444217964, "grad_norm": 3.7044527530670166, "learning_rate": 2.7656610952575078e-05, "loss": 2.4501, "step": 85210 }, { "epoch": 5.7898491642886265, "grad_norm": 3.5052528381347656, "learning_rate": 2.765236445169181e-05, "loss": 2.1769, "step": 85215 }, { "epoch": 5.790188884359288, "grad_norm": 3.2648744583129883, "learning_rate": 2.7648117950808534e-05, "loss": 2.338, "step": 85220 }, { "epoch": 5.79052860442995, "grad_norm": 3.2698404788970947, "learning_rate": 2.7643871449925262e-05, "loss": 1.9501, "step": 85225 }, { "epoch": 5.790868324500612, "grad_norm": 4.0077619552612305, "learning_rate": 2.7639624949041994e-05, "loss": 2.397, "step": 85230 }, { "epoch": 5.791208044571273, "grad_norm": 3.107499122619629, "learning_rate": 2.7635378448158715e-05, "loss": 2.006, "step": 85235 }, { "epoch": 5.791547764641935, "grad_norm": 3.5810861587524414, "learning_rate": 2.7631131947275446e-05, "loss": 2.2634, "step": 85240 }, { "epoch": 5.791887484712597, "grad_norm": 3.494828462600708, "learning_rate": 2.7626885446392174e-05, "loss": 2.2122, "step": 85245 }, { "epoch": 5.792227204783258, "grad_norm": 3.722639322280884, "learning_rate": 2.76226389455089e-05, "loss": 2.2866, "step": 85250 }, { "epoch": 5.79256692485392, "grad_norm": 3.326127052307129, "learning_rate": 2.761839244462563e-05, "loss": 2.1197, "step": 85255 }, { "epoch": 5.7929066449245825, "grad_norm": 3.870129346847534, "learning_rate": 2.7614145943742358e-05, "loss": 2.4522, "step": 85260 }, { "epoch": 5.793246364995244, "grad_norm": 3.719611883163452, "learning_rate": 2.7609899442859083e-05, "loss": 1.9349, "step": 85265 }, { "epoch": 5.793586085065906, "grad_norm": 4.422576427459717, "learning_rate": 2.760565294197581e-05, "loss": 2.0479, "step": 85270 }, { "epoch": 5.793925805136568, "grad_norm": 2.602410078048706, "learning_rate": 2.7601406441092542e-05, "loss": 2.1444, "step": 85275 }, { "epoch": 5.794265525207229, "grad_norm": 3.4768271446228027, "learning_rate": 2.7597159940209267e-05, "loss": 2.4504, "step": 85280 }, { "epoch": 5.794605245277891, "grad_norm": 3.300053119659424, "learning_rate": 2.7592913439325995e-05, "loss": 2.3958, "step": 85285 }, { "epoch": 5.794944965348552, "grad_norm": 3.051396369934082, "learning_rate": 2.7588666938442726e-05, "loss": 2.3711, "step": 85290 }, { "epoch": 5.795284685419214, "grad_norm": 3.681084394454956, "learning_rate": 2.7584420437559454e-05, "loss": 2.4118, "step": 85295 }, { "epoch": 5.7956244054898765, "grad_norm": 3.704524278640747, "learning_rate": 2.758017393667618e-05, "loss": 2.2534, "step": 85300 }, { "epoch": 5.795964125560538, "grad_norm": 3.536877393722534, "learning_rate": 2.757592743579291e-05, "loss": 2.2682, "step": 85305 }, { "epoch": 5.7963038456312, "grad_norm": 4.041860580444336, "learning_rate": 2.757168093490964e-05, "loss": 2.3412, "step": 85310 }, { "epoch": 5.796643565701862, "grad_norm": 3.113914728164673, "learning_rate": 2.7567434434026363e-05, "loss": 2.0536, "step": 85315 }, { "epoch": 5.796983285772523, "grad_norm": 3.156980037689209, "learning_rate": 2.756318793314309e-05, "loss": 2.1748, "step": 85320 }, { "epoch": 5.797323005843185, "grad_norm": 2.6412832736968994, "learning_rate": 2.7558941432259822e-05, "loss": 2.1772, "step": 85325 }, { "epoch": 5.797662725913847, "grad_norm": 3.535806179046631, "learning_rate": 2.7554694931376547e-05, "loss": 2.232, "step": 85330 }, { "epoch": 5.798002445984508, "grad_norm": 2.808823347091675, "learning_rate": 2.7550448430493275e-05, "loss": 2.3714, "step": 85335 }, { "epoch": 5.79834216605517, "grad_norm": 3.397142171859741, "learning_rate": 2.7546201929610006e-05, "loss": 2.1891, "step": 85340 }, { "epoch": 5.7986818861258325, "grad_norm": 4.791240215301514, "learning_rate": 2.7541955428726728e-05, "loss": 2.1223, "step": 85345 }, { "epoch": 5.799021606196494, "grad_norm": 4.39349365234375, "learning_rate": 2.753770892784346e-05, "loss": 2.1656, "step": 85350 }, { "epoch": 5.799361326267156, "grad_norm": 3.1468429565429688, "learning_rate": 2.7533462426960187e-05, "loss": 2.3167, "step": 85355 }, { "epoch": 5.799701046337818, "grad_norm": 3.587578058242798, "learning_rate": 2.752921592607691e-05, "loss": 2.2704, "step": 85360 }, { "epoch": 5.800040766408479, "grad_norm": 3.4320764541625977, "learning_rate": 2.7524969425193643e-05, "loss": 2.0813, "step": 85365 }, { "epoch": 5.800380486479141, "grad_norm": 3.416736364364624, "learning_rate": 2.752072292431037e-05, "loss": 1.7717, "step": 85370 }, { "epoch": 5.800720206549803, "grad_norm": 3.669797897338867, "learning_rate": 2.7516476423427096e-05, "loss": 2.3641, "step": 85375 }, { "epoch": 5.801059926620464, "grad_norm": 3.312068223953247, "learning_rate": 2.7512229922543824e-05, "loss": 1.9135, "step": 85380 }, { "epoch": 5.801399646691126, "grad_norm": 3.5521445274353027, "learning_rate": 2.7507983421660555e-05, "loss": 2.3637, "step": 85385 }, { "epoch": 5.8017393667617885, "grad_norm": 3.45263934135437, "learning_rate": 2.750373692077728e-05, "loss": 2.1207, "step": 85390 }, { "epoch": 5.80207908683245, "grad_norm": 3.1859164237976074, "learning_rate": 2.7499490419894008e-05, "loss": 2.4074, "step": 85395 }, { "epoch": 5.802418806903112, "grad_norm": 3.335864782333374, "learning_rate": 2.749524391901074e-05, "loss": 2.1916, "step": 85400 }, { "epoch": 5.802758526973774, "grad_norm": 3.6496052742004395, "learning_rate": 2.7490997418127464e-05, "loss": 2.3014, "step": 85405 }, { "epoch": 5.803098247044435, "grad_norm": 3.8733432292938232, "learning_rate": 2.748675091724419e-05, "loss": 2.2045, "step": 85410 }, { "epoch": 5.803437967115097, "grad_norm": 3.884211540222168, "learning_rate": 2.748250441636092e-05, "loss": 1.8466, "step": 85415 }, { "epoch": 5.803777687185759, "grad_norm": 2.7864456176757812, "learning_rate": 2.7478257915477644e-05, "loss": 2.2047, "step": 85420 }, { "epoch": 5.80411740725642, "grad_norm": 3.4517505168914795, "learning_rate": 2.7474011414594376e-05, "loss": 1.9438, "step": 85425 }, { "epoch": 5.804457127327082, "grad_norm": 3.680811882019043, "learning_rate": 2.7469764913711104e-05, "loss": 2.001, "step": 85430 }, { "epoch": 5.8047968473977445, "grad_norm": 4.361688613891602, "learning_rate": 2.746551841282783e-05, "loss": 2.5708, "step": 85435 }, { "epoch": 5.805136567468406, "grad_norm": 2.813098907470703, "learning_rate": 2.746127191194456e-05, "loss": 2.1105, "step": 85440 }, { "epoch": 5.805476287539068, "grad_norm": 3.287954092025757, "learning_rate": 2.7457025411061288e-05, "loss": 2.1542, "step": 85445 }, { "epoch": 5.80581600760973, "grad_norm": 4.983059883117676, "learning_rate": 2.7452778910178012e-05, "loss": 2.1093, "step": 85450 }, { "epoch": 5.806155727680391, "grad_norm": 3.5626959800720215, "learning_rate": 2.744853240929474e-05, "loss": 2.4484, "step": 85455 }, { "epoch": 5.806495447751053, "grad_norm": 3.3699545860290527, "learning_rate": 2.7444285908411472e-05, "loss": 2.1553, "step": 85460 }, { "epoch": 5.806835167821715, "grad_norm": 3.2915985584259033, "learning_rate": 2.74400394075282e-05, "loss": 2.3285, "step": 85465 }, { "epoch": 5.807174887892376, "grad_norm": 3.8632516860961914, "learning_rate": 2.7435792906644924e-05, "loss": 2.3226, "step": 85470 }, { "epoch": 5.807514607963038, "grad_norm": 3.204002857208252, "learning_rate": 2.7431546405761656e-05, "loss": 2.2409, "step": 85475 }, { "epoch": 5.8078543280337005, "grad_norm": 2.922198534011841, "learning_rate": 2.7427299904878384e-05, "loss": 2.2493, "step": 85480 }, { "epoch": 5.808194048104362, "grad_norm": 3.571309804916382, "learning_rate": 2.742305340399511e-05, "loss": 2.4415, "step": 85485 }, { "epoch": 5.808533768175024, "grad_norm": 2.930748224258423, "learning_rate": 2.7418806903111836e-05, "loss": 2.105, "step": 85490 }, { "epoch": 5.808873488245686, "grad_norm": 2.970499277114868, "learning_rate": 2.7414560402228568e-05, "loss": 2.1965, "step": 85495 }, { "epoch": 5.809213208316347, "grad_norm": 3.630657196044922, "learning_rate": 2.7410313901345292e-05, "loss": 2.2743, "step": 85500 }, { "epoch": 5.809552928387009, "grad_norm": 3.343430280685425, "learning_rate": 2.740606740046202e-05, "loss": 2.1526, "step": 85505 }, { "epoch": 5.809892648457671, "grad_norm": 2.775499105453491, "learning_rate": 2.7401820899578752e-05, "loss": 2.0331, "step": 85510 }, { "epoch": 5.810232368528332, "grad_norm": 3.762784719467163, "learning_rate": 2.7397574398695473e-05, "loss": 2.337, "step": 85515 }, { "epoch": 5.810572088598994, "grad_norm": 3.096797466278076, "learning_rate": 2.7393327897812204e-05, "loss": 2.1458, "step": 85520 }, { "epoch": 5.8109118086696565, "grad_norm": 3.414821147918701, "learning_rate": 2.7389081396928932e-05, "loss": 2.2907, "step": 85525 }, { "epoch": 5.811251528740318, "grad_norm": 3.5384857654571533, "learning_rate": 2.7384834896045657e-05, "loss": 2.4168, "step": 85530 }, { "epoch": 5.81159124881098, "grad_norm": 3.5740842819213867, "learning_rate": 2.738058839516239e-05, "loss": 2.5964, "step": 85535 }, { "epoch": 5.811930968881642, "grad_norm": 3.773124933242798, "learning_rate": 2.7376341894279116e-05, "loss": 1.9973, "step": 85540 }, { "epoch": 5.812270688952303, "grad_norm": 2.7079358100891113, "learning_rate": 2.737209539339584e-05, "loss": 2.2854, "step": 85545 }, { "epoch": 5.812610409022965, "grad_norm": 2.805387020111084, "learning_rate": 2.7367848892512573e-05, "loss": 2.0743, "step": 85550 }, { "epoch": 5.812950129093627, "grad_norm": 3.5873351097106934, "learning_rate": 2.73636023916293e-05, "loss": 2.2944, "step": 85555 }, { "epoch": 5.813289849164288, "grad_norm": 2.607593059539795, "learning_rate": 2.7359355890746025e-05, "loss": 2.4378, "step": 85560 }, { "epoch": 5.8136295692349504, "grad_norm": 2.9123027324676514, "learning_rate": 2.7355109389862753e-05, "loss": 2.2453, "step": 85565 }, { "epoch": 5.8139692893056125, "grad_norm": 3.5878634452819824, "learning_rate": 2.7350862888979485e-05, "loss": 2.1822, "step": 85570 }, { "epoch": 5.814309009376274, "grad_norm": 3.8494694232940674, "learning_rate": 2.734661638809621e-05, "loss": 2.2786, "step": 85575 }, { "epoch": 5.814648729446936, "grad_norm": 3.590893507003784, "learning_rate": 2.7342369887212937e-05, "loss": 2.2741, "step": 85580 }, { "epoch": 5.814988449517598, "grad_norm": 2.96877121925354, "learning_rate": 2.733812338632967e-05, "loss": 2.3227, "step": 85585 }, { "epoch": 5.815328169588259, "grad_norm": 3.334784746170044, "learning_rate": 2.733387688544639e-05, "loss": 2.0345, "step": 85590 }, { "epoch": 5.815667889658921, "grad_norm": 3.929511547088623, "learning_rate": 2.732963038456312e-05, "loss": 2.2606, "step": 85595 }, { "epoch": 5.816007609729583, "grad_norm": 3.5385055541992188, "learning_rate": 2.732538388367985e-05, "loss": 2.1431, "step": 85600 }, { "epoch": 5.816347329800244, "grad_norm": 3.091935157775879, "learning_rate": 2.7321137382796574e-05, "loss": 2.2579, "step": 85605 }, { "epoch": 5.8166870498709065, "grad_norm": 4.37893533706665, "learning_rate": 2.7316890881913305e-05, "loss": 2.0321, "step": 85610 }, { "epoch": 5.8170267699415685, "grad_norm": 4.190117359161377, "learning_rate": 2.7312644381030033e-05, "loss": 1.8944, "step": 85615 }, { "epoch": 5.81736649001223, "grad_norm": 3.104706048965454, "learning_rate": 2.7308397880146758e-05, "loss": 2.0493, "step": 85620 }, { "epoch": 5.817706210082892, "grad_norm": 3.620990037918091, "learning_rate": 2.7304151379263486e-05, "loss": 2.1106, "step": 85625 }, { "epoch": 5.818045930153554, "grad_norm": 3.247438430786133, "learning_rate": 2.7299904878380217e-05, "loss": 2.3359, "step": 85630 }, { "epoch": 5.818385650224215, "grad_norm": 3.7862255573272705, "learning_rate": 2.7295658377496945e-05, "loss": 2.3524, "step": 85635 }, { "epoch": 5.818725370294877, "grad_norm": 3.178683280944824, "learning_rate": 2.729141187661367e-05, "loss": 2.373, "step": 85640 }, { "epoch": 5.819065090365539, "grad_norm": 4.366122245788574, "learning_rate": 2.72871653757304e-05, "loss": 2.2187, "step": 85645 }, { "epoch": 5.8194048104362, "grad_norm": 3.7896666526794434, "learning_rate": 2.728291887484713e-05, "loss": 2.3921, "step": 85650 }, { "epoch": 5.8197445305068625, "grad_norm": 2.878389358520508, "learning_rate": 2.7278672373963854e-05, "loss": 2.378, "step": 85655 }, { "epoch": 5.8200842505775245, "grad_norm": 3.6778934001922607, "learning_rate": 2.7274425873080582e-05, "loss": 2.4107, "step": 85660 }, { "epoch": 5.820423970648186, "grad_norm": 3.6259288787841797, "learning_rate": 2.7270179372197313e-05, "loss": 2.2683, "step": 85665 }, { "epoch": 5.820763690718848, "grad_norm": 2.8400766849517822, "learning_rate": 2.7265932871314038e-05, "loss": 2.2227, "step": 85670 }, { "epoch": 5.82110341078951, "grad_norm": 3.199913740158081, "learning_rate": 2.7261686370430766e-05, "loss": 2.1839, "step": 85675 }, { "epoch": 5.821443130860171, "grad_norm": 3.430614471435547, "learning_rate": 2.7257439869547497e-05, "loss": 2.2906, "step": 85680 }, { "epoch": 5.821782850930833, "grad_norm": 3.164051055908203, "learning_rate": 2.7253193368664222e-05, "loss": 2.2385, "step": 85685 }, { "epoch": 5.822122571001495, "grad_norm": 3.2207181453704834, "learning_rate": 2.724894686778095e-05, "loss": 2.0255, "step": 85690 }, { "epoch": 5.822462291072156, "grad_norm": 3.9787585735321045, "learning_rate": 2.724470036689768e-05, "loss": 2.078, "step": 85695 }, { "epoch": 5.8228020111428185, "grad_norm": 4.021500587463379, "learning_rate": 2.7240453866014403e-05, "loss": 2.1125, "step": 85700 }, { "epoch": 5.8231417312134806, "grad_norm": 3.610800266265869, "learning_rate": 2.7236207365131134e-05, "loss": 2.24, "step": 85705 }, { "epoch": 5.823481451284142, "grad_norm": 2.979114532470703, "learning_rate": 2.7231960864247862e-05, "loss": 2.1729, "step": 85710 }, { "epoch": 5.823821171354804, "grad_norm": 3.0676193237304688, "learning_rate": 2.7227714363364587e-05, "loss": 2.0756, "step": 85715 }, { "epoch": 5.824160891425466, "grad_norm": 3.9554855823516846, "learning_rate": 2.7223467862481318e-05, "loss": 2.0853, "step": 85720 }, { "epoch": 5.824500611496127, "grad_norm": 4.345000267028809, "learning_rate": 2.7219221361598046e-05, "loss": 2.1895, "step": 85725 }, { "epoch": 5.824840331566789, "grad_norm": 3.88077449798584, "learning_rate": 2.721497486071477e-05, "loss": 2.2434, "step": 85730 }, { "epoch": 5.825180051637451, "grad_norm": 4.157248020172119, "learning_rate": 2.72107283598315e-05, "loss": 2.1274, "step": 85735 }, { "epoch": 5.825519771708112, "grad_norm": 4.285999774932861, "learning_rate": 2.720648185894823e-05, "loss": 2.2724, "step": 85740 }, { "epoch": 5.8258594917787745, "grad_norm": 4.065709114074707, "learning_rate": 2.7202235358064955e-05, "loss": 2.2389, "step": 85745 }, { "epoch": 5.826199211849436, "grad_norm": 3.3759005069732666, "learning_rate": 2.7197988857181683e-05, "loss": 2.3195, "step": 85750 }, { "epoch": 5.826538931920098, "grad_norm": 4.8331990242004395, "learning_rate": 2.7193742356298414e-05, "loss": 1.7982, "step": 85755 }, { "epoch": 5.82687865199076, "grad_norm": 3.0575146675109863, "learning_rate": 2.7189495855415135e-05, "loss": 2.0695, "step": 85760 }, { "epoch": 5.827218372061421, "grad_norm": 3.186070680618286, "learning_rate": 2.7185249354531867e-05, "loss": 2.7082, "step": 85765 }, { "epoch": 5.827558092132083, "grad_norm": 4.550227642059326, "learning_rate": 2.7181002853648595e-05, "loss": 2.2444, "step": 85770 }, { "epoch": 5.827897812202745, "grad_norm": 3.537186861038208, "learning_rate": 2.717675635276532e-05, "loss": 1.9703, "step": 85775 }, { "epoch": 5.828237532273406, "grad_norm": 2.9374940395355225, "learning_rate": 2.717250985188205e-05, "loss": 2.2794, "step": 85780 }, { "epoch": 5.828577252344068, "grad_norm": 3.224463939666748, "learning_rate": 2.716826335099878e-05, "loss": 2.207, "step": 85785 }, { "epoch": 5.8289169724147305, "grad_norm": 2.9615042209625244, "learning_rate": 2.7164016850115503e-05, "loss": 2.3829, "step": 85790 }, { "epoch": 5.829256692485392, "grad_norm": 3.006075620651245, "learning_rate": 2.7159770349232235e-05, "loss": 2.2968, "step": 85795 }, { "epoch": 5.829596412556054, "grad_norm": 3.549259901046753, "learning_rate": 2.7155523848348963e-05, "loss": 2.2797, "step": 85800 }, { "epoch": 5.829936132626716, "grad_norm": 3.2128539085388184, "learning_rate": 2.715127734746569e-05, "loss": 1.9474, "step": 85805 }, { "epoch": 5.830275852697377, "grad_norm": 2.921736001968384, "learning_rate": 2.7147030846582415e-05, "loss": 2.5343, "step": 85810 }, { "epoch": 5.830615572768039, "grad_norm": 3.4715819358825684, "learning_rate": 2.7142784345699147e-05, "loss": 2.1794, "step": 85815 }, { "epoch": 5.830955292838701, "grad_norm": 3.2770798206329346, "learning_rate": 2.7138537844815875e-05, "loss": 2.1199, "step": 85820 }, { "epoch": 5.831295012909362, "grad_norm": 3.1433565616607666, "learning_rate": 2.71342913439326e-05, "loss": 2.548, "step": 85825 }, { "epoch": 5.831634732980024, "grad_norm": 3.6801955699920654, "learning_rate": 2.713004484304933e-05, "loss": 2.0255, "step": 85830 }, { "epoch": 5.8319744530506865, "grad_norm": 2.707122564315796, "learning_rate": 2.712579834216606e-05, "loss": 2.56, "step": 85835 }, { "epoch": 5.832314173121348, "grad_norm": 2.7968764305114746, "learning_rate": 2.7121551841282783e-05, "loss": 2.524, "step": 85840 }, { "epoch": 5.83265389319201, "grad_norm": 3.4734175205230713, "learning_rate": 2.711730534039951e-05, "loss": 2.2501, "step": 85845 }, { "epoch": 5.832993613262672, "grad_norm": 3.147426128387451, "learning_rate": 2.7113058839516243e-05, "loss": 2.2507, "step": 85850 }, { "epoch": 5.833333333333333, "grad_norm": 2.9396140575408936, "learning_rate": 2.7108812338632967e-05, "loss": 2.4409, "step": 85855 }, { "epoch": 5.833673053403995, "grad_norm": 3.888472080230713, "learning_rate": 2.7104565837749695e-05, "loss": 2.3798, "step": 85860 }, { "epoch": 5.834012773474657, "grad_norm": 3.7223732471466064, "learning_rate": 2.7100319336866427e-05, "loss": 2.2565, "step": 85865 }, { "epoch": 5.834352493545318, "grad_norm": 3.407829523086548, "learning_rate": 2.7096072835983148e-05, "loss": 2.4057, "step": 85870 }, { "epoch": 5.8346922136159804, "grad_norm": 3.224323034286499, "learning_rate": 2.709182633509988e-05, "loss": 2.0158, "step": 85875 }, { "epoch": 5.8350319336866425, "grad_norm": 2.8772149085998535, "learning_rate": 2.7087579834216607e-05, "loss": 2.4, "step": 85880 }, { "epoch": 5.835371653757304, "grad_norm": 3.1860995292663574, "learning_rate": 2.7083333333333332e-05, "loss": 2.045, "step": 85885 }, { "epoch": 5.835711373827966, "grad_norm": 4.005319118499756, "learning_rate": 2.7079086832450063e-05, "loss": 2.3413, "step": 85890 }, { "epoch": 5.836051093898628, "grad_norm": 3.454768419265747, "learning_rate": 2.707484033156679e-05, "loss": 2.2697, "step": 85895 }, { "epoch": 5.836390813969289, "grad_norm": 3.5376460552215576, "learning_rate": 2.7070593830683516e-05, "loss": 2.1927, "step": 85900 }, { "epoch": 5.836730534039951, "grad_norm": 3.7043473720550537, "learning_rate": 2.7066347329800244e-05, "loss": 2.2016, "step": 85905 }, { "epoch": 5.837070254110613, "grad_norm": 3.7653653621673584, "learning_rate": 2.7062100828916976e-05, "loss": 2.3609, "step": 85910 }, { "epoch": 5.837409974181274, "grad_norm": 2.7919206619262695, "learning_rate": 2.70578543280337e-05, "loss": 2.2851, "step": 85915 }, { "epoch": 5.8377496942519365, "grad_norm": 3.4629569053649902, "learning_rate": 2.7053607827150428e-05, "loss": 2.3183, "step": 85920 }, { "epoch": 5.8380894143225985, "grad_norm": 3.1899170875549316, "learning_rate": 2.704936132626716e-05, "loss": 2.2032, "step": 85925 }, { "epoch": 5.83842913439326, "grad_norm": 3.2156224250793457, "learning_rate": 2.7045114825383884e-05, "loss": 2.4467, "step": 85930 }, { "epoch": 5.838768854463922, "grad_norm": 3.9166858196258545, "learning_rate": 2.7040868324500612e-05, "loss": 2.0253, "step": 85935 }, { "epoch": 5.839108574534584, "grad_norm": 2.82013201713562, "learning_rate": 2.703662182361734e-05, "loss": 1.9081, "step": 85940 }, { "epoch": 5.839448294605245, "grad_norm": 3.474593162536621, "learning_rate": 2.7032375322734065e-05, "loss": 2.2841, "step": 85945 }, { "epoch": 5.839788014675907, "grad_norm": 3.060969114303589, "learning_rate": 2.7028128821850796e-05, "loss": 2.4863, "step": 85950 }, { "epoch": 5.840127734746569, "grad_norm": 3.0308008193969727, "learning_rate": 2.7023882320967524e-05, "loss": 2.1239, "step": 85955 }, { "epoch": 5.84046745481723, "grad_norm": 3.065412759780884, "learning_rate": 2.701963582008425e-05, "loss": 2.1317, "step": 85960 }, { "epoch": 5.8408071748878925, "grad_norm": 3.5344691276550293, "learning_rate": 2.701538931920098e-05, "loss": 2.1004, "step": 85965 }, { "epoch": 5.841146894958554, "grad_norm": 5.948672294616699, "learning_rate": 2.7011142818317708e-05, "loss": 1.9296, "step": 85970 }, { "epoch": 5.841486615029216, "grad_norm": 2.99223256111145, "learning_rate": 2.700689631743444e-05, "loss": 2.2844, "step": 85975 }, { "epoch": 5.841826335099878, "grad_norm": 3.5667190551757812, "learning_rate": 2.700264981655116e-05, "loss": 2.1878, "step": 85980 }, { "epoch": 5.842166055170539, "grad_norm": 3.8082637786865234, "learning_rate": 2.6998403315667892e-05, "loss": 2.4235, "step": 85985 }, { "epoch": 5.842505775241201, "grad_norm": 3.835306167602539, "learning_rate": 2.699415681478462e-05, "loss": 2.191, "step": 85990 }, { "epoch": 5.842845495311863, "grad_norm": 2.854752540588379, "learning_rate": 2.6989910313901345e-05, "loss": 2.3795, "step": 85995 }, { "epoch": 5.843185215382524, "grad_norm": 3.625425100326538, "learning_rate": 2.6985663813018076e-05, "loss": 2.0468, "step": 86000 }, { "epoch": 5.843524935453186, "grad_norm": 3.468744993209839, "learning_rate": 2.6981417312134804e-05, "loss": 2.292, "step": 86005 }, { "epoch": 5.8438646555238485, "grad_norm": 4.139581203460693, "learning_rate": 2.697717081125153e-05, "loss": 2.35, "step": 86010 }, { "epoch": 5.84420437559451, "grad_norm": 3.5737557411193848, "learning_rate": 2.6972924310368257e-05, "loss": 2.1087, "step": 86015 }, { "epoch": 5.844544095665172, "grad_norm": 3.5217528343200684, "learning_rate": 2.6968677809484988e-05, "loss": 2.2719, "step": 86020 }, { "epoch": 5.844883815735834, "grad_norm": 4.021791934967041, "learning_rate": 2.6964431308601713e-05, "loss": 2.3079, "step": 86025 }, { "epoch": 5.845223535806495, "grad_norm": 3.6387321949005127, "learning_rate": 2.696018480771844e-05, "loss": 2.377, "step": 86030 }, { "epoch": 5.845563255877157, "grad_norm": 3.2710511684417725, "learning_rate": 2.6955938306835172e-05, "loss": 2.2399, "step": 86035 }, { "epoch": 5.845902975947819, "grad_norm": 3.5307726860046387, "learning_rate": 2.6951691805951894e-05, "loss": 2.4001, "step": 86040 }, { "epoch": 5.84624269601848, "grad_norm": 3.465243101119995, "learning_rate": 2.6947445305068625e-05, "loss": 2.2337, "step": 86045 }, { "epoch": 5.846582416089142, "grad_norm": 3.628918409347534, "learning_rate": 2.6943198804185353e-05, "loss": 2.0423, "step": 86050 }, { "epoch": 5.8469221361598045, "grad_norm": 3.5986075401306152, "learning_rate": 2.6938952303302078e-05, "loss": 1.9392, "step": 86055 }, { "epoch": 5.847261856230466, "grad_norm": 3.079782724380493, "learning_rate": 2.693470580241881e-05, "loss": 2.2649, "step": 86060 }, { "epoch": 5.847601576301128, "grad_norm": 3.354642629623413, "learning_rate": 2.6930459301535537e-05, "loss": 2.0569, "step": 86065 }, { "epoch": 5.84794129637179, "grad_norm": 3.621767282485962, "learning_rate": 2.692621280065226e-05, "loss": 2.0447, "step": 86070 }, { "epoch": 5.848281016442451, "grad_norm": 3.100104331970215, "learning_rate": 2.6921966299768993e-05, "loss": 2.2938, "step": 86075 }, { "epoch": 5.848620736513113, "grad_norm": 4.382302284240723, "learning_rate": 2.691771979888572e-05, "loss": 2.0547, "step": 86080 }, { "epoch": 5.848960456583775, "grad_norm": 3.069580078125, "learning_rate": 2.6913473298002446e-05, "loss": 2.2253, "step": 86085 }, { "epoch": 5.849300176654436, "grad_norm": 3.65665602684021, "learning_rate": 2.6909226797119174e-05, "loss": 2.3101, "step": 86090 }, { "epoch": 5.849639896725098, "grad_norm": 3.661342144012451, "learning_rate": 2.6904980296235905e-05, "loss": 2.2109, "step": 86095 }, { "epoch": 5.8499796167957605, "grad_norm": 3.856340169906616, "learning_rate": 2.690073379535263e-05, "loss": 2.102, "step": 86100 }, { "epoch": 5.850319336866422, "grad_norm": 2.679074764251709, "learning_rate": 2.6896487294469358e-05, "loss": 2.1802, "step": 86105 }, { "epoch": 5.850659056937084, "grad_norm": 2.924330472946167, "learning_rate": 2.689224079358609e-05, "loss": 2.282, "step": 86110 }, { "epoch": 5.850998777007746, "grad_norm": 3.0925188064575195, "learning_rate": 2.688799429270281e-05, "loss": 2.1165, "step": 86115 }, { "epoch": 5.851338497078407, "grad_norm": 3.4920244216918945, "learning_rate": 2.688374779181954e-05, "loss": 2.1924, "step": 86120 }, { "epoch": 5.851678217149069, "grad_norm": 2.90311598777771, "learning_rate": 2.687950129093627e-05, "loss": 2.1246, "step": 86125 }, { "epoch": 5.852017937219731, "grad_norm": 2.747486114501953, "learning_rate": 2.6875254790052994e-05, "loss": 2.2814, "step": 86130 }, { "epoch": 5.852357657290392, "grad_norm": 3.521010160446167, "learning_rate": 2.6871008289169726e-05, "loss": 2.2502, "step": 86135 }, { "epoch": 5.852697377361054, "grad_norm": 3.359997034072876, "learning_rate": 2.6866761788286454e-05, "loss": 2.271, "step": 86140 }, { "epoch": 5.8530370974317165, "grad_norm": 2.8409595489501953, "learning_rate": 2.6862515287403185e-05, "loss": 2.2381, "step": 86145 }, { "epoch": 5.853376817502378, "grad_norm": 3.1641130447387695, "learning_rate": 2.6858268786519906e-05, "loss": 2.1285, "step": 86150 }, { "epoch": 5.85371653757304, "grad_norm": 3.2473373413085938, "learning_rate": 2.6854022285636638e-05, "loss": 2.1399, "step": 86155 }, { "epoch": 5.854056257643702, "grad_norm": 2.668910264968872, "learning_rate": 2.6849775784753366e-05, "loss": 1.9639, "step": 86160 }, { "epoch": 5.854395977714363, "grad_norm": 3.2503092288970947, "learning_rate": 2.684552928387009e-05, "loss": 2.3836, "step": 86165 }, { "epoch": 5.854735697785025, "grad_norm": 4.495123863220215, "learning_rate": 2.6841282782986822e-05, "loss": 2.4904, "step": 86170 }, { "epoch": 5.855075417855687, "grad_norm": 3.372642755508423, "learning_rate": 2.683703628210355e-05, "loss": 2.2134, "step": 86175 }, { "epoch": 5.855415137926348, "grad_norm": 4.052903652191162, "learning_rate": 2.6832789781220274e-05, "loss": 2.4135, "step": 86180 }, { "epoch": 5.8557548579970105, "grad_norm": 3.2987654209136963, "learning_rate": 2.6828543280337002e-05, "loss": 2.4048, "step": 86185 }, { "epoch": 5.8560945780676725, "grad_norm": 3.492032289505005, "learning_rate": 2.6824296779453734e-05, "loss": 2.2005, "step": 86190 }, { "epoch": 5.856434298138334, "grad_norm": 2.54883074760437, "learning_rate": 2.682005027857046e-05, "loss": 2.2467, "step": 86195 }, { "epoch": 5.856774018208996, "grad_norm": 3.2299487590789795, "learning_rate": 2.6815803777687186e-05, "loss": 2.1891, "step": 86200 }, { "epoch": 5.857113738279658, "grad_norm": 3.595480442047119, "learning_rate": 2.6811557276803918e-05, "loss": 2.2632, "step": 86205 }, { "epoch": 5.857453458350319, "grad_norm": 3.8748228549957275, "learning_rate": 2.6807310775920642e-05, "loss": 2.1062, "step": 86210 }, { "epoch": 5.857793178420981, "grad_norm": 3.384305953979492, "learning_rate": 2.680306427503737e-05, "loss": 2.2626, "step": 86215 }, { "epoch": 5.858132898491643, "grad_norm": 3.2763259410858154, "learning_rate": 2.6798817774154102e-05, "loss": 2.2628, "step": 86220 }, { "epoch": 5.858472618562304, "grad_norm": 2.8563759326934814, "learning_rate": 2.6794571273270823e-05, "loss": 2.173, "step": 86225 }, { "epoch": 5.8588123386329665, "grad_norm": 3.9374232292175293, "learning_rate": 2.6790324772387554e-05, "loss": 2.562, "step": 86230 }, { "epoch": 5.8591520587036285, "grad_norm": 3.2901535034179688, "learning_rate": 2.6786078271504282e-05, "loss": 2.093, "step": 86235 }, { "epoch": 5.85949177877429, "grad_norm": 3.115262746810913, "learning_rate": 2.6781831770621007e-05, "loss": 2.0846, "step": 86240 }, { "epoch": 5.859831498844952, "grad_norm": 3.3844962120056152, "learning_rate": 2.677758526973774e-05, "loss": 2.4141, "step": 86245 }, { "epoch": 5.860171218915614, "grad_norm": 4.614010810852051, "learning_rate": 2.6773338768854466e-05, "loss": 2.093, "step": 86250 }, { "epoch": 5.860510938986275, "grad_norm": 2.9391210079193115, "learning_rate": 2.676909226797119e-05, "loss": 2.1362, "step": 86255 }, { "epoch": 5.860850659056937, "grad_norm": 4.151632785797119, "learning_rate": 2.676484576708792e-05, "loss": 2.3951, "step": 86260 }, { "epoch": 5.861190379127599, "grad_norm": 3.39579701423645, "learning_rate": 2.676059926620465e-05, "loss": 2.2603, "step": 86265 }, { "epoch": 5.86153009919826, "grad_norm": 3.3341662883758545, "learning_rate": 2.6756352765321375e-05, "loss": 2.3467, "step": 86270 }, { "epoch": 5.8618698192689225, "grad_norm": 4.270463466644287, "learning_rate": 2.6752106264438103e-05, "loss": 2.4405, "step": 86275 }, { "epoch": 5.8622095393395846, "grad_norm": 3.00718092918396, "learning_rate": 2.6747859763554835e-05, "loss": 2.3377, "step": 86280 }, { "epoch": 5.862549259410246, "grad_norm": 3.172633171081543, "learning_rate": 2.6743613262671556e-05, "loss": 1.9058, "step": 86285 }, { "epoch": 5.862888979480908, "grad_norm": 2.852011203765869, "learning_rate": 2.6739366761788287e-05, "loss": 2.0242, "step": 86290 }, { "epoch": 5.86322869955157, "grad_norm": 3.3936638832092285, "learning_rate": 2.6735120260905015e-05, "loss": 2.4203, "step": 86295 }, { "epoch": 5.863568419622231, "grad_norm": 3.066100835800171, "learning_rate": 2.673087376002174e-05, "loss": 2.0679, "step": 86300 }, { "epoch": 5.863908139692893, "grad_norm": 2.918750047683716, "learning_rate": 2.672662725913847e-05, "loss": 2.5114, "step": 86305 }, { "epoch": 5.864247859763555, "grad_norm": 4.126574993133545, "learning_rate": 2.67223807582552e-05, "loss": 2.5066, "step": 86310 }, { "epoch": 5.864587579834216, "grad_norm": 3.2388315200805664, "learning_rate": 2.671813425737193e-05, "loss": 2.6754, "step": 86315 }, { "epoch": 5.8649272999048785, "grad_norm": 3.1324005126953125, "learning_rate": 2.6713887756488655e-05, "loss": 2.5536, "step": 86320 }, { "epoch": 5.865267019975541, "grad_norm": 3.308866024017334, "learning_rate": 2.6709641255605383e-05, "loss": 2.2104, "step": 86325 }, { "epoch": 5.865606740046202, "grad_norm": 2.939828395843506, "learning_rate": 2.670539475472211e-05, "loss": 2.4029, "step": 86330 }, { "epoch": 5.865946460116864, "grad_norm": 3.2160351276397705, "learning_rate": 2.6701148253838836e-05, "loss": 2.3518, "step": 86335 }, { "epoch": 5.866286180187526, "grad_norm": 2.9671547412872314, "learning_rate": 2.6696901752955567e-05, "loss": 2.3568, "step": 86340 }, { "epoch": 5.866625900258187, "grad_norm": 4.060479640960693, "learning_rate": 2.6692655252072295e-05, "loss": 2.0115, "step": 86345 }, { "epoch": 5.866965620328849, "grad_norm": 2.817866086959839, "learning_rate": 2.668840875118902e-05, "loss": 2.2482, "step": 86350 }, { "epoch": 5.867305340399511, "grad_norm": 3.469942569732666, "learning_rate": 2.668416225030575e-05, "loss": 2.1675, "step": 86355 }, { "epoch": 5.867645060470172, "grad_norm": 3.3083267211914062, "learning_rate": 2.667991574942248e-05, "loss": 2.1577, "step": 86360 }, { "epoch": 5.8679847805408345, "grad_norm": 3.4923179149627686, "learning_rate": 2.6675669248539204e-05, "loss": 2.3133, "step": 86365 }, { "epoch": 5.868324500611497, "grad_norm": 2.902975082397461, "learning_rate": 2.6671422747655932e-05, "loss": 2.3072, "step": 86370 }, { "epoch": 5.868664220682158, "grad_norm": 3.7437212467193604, "learning_rate": 2.6667176246772663e-05, "loss": 2.6624, "step": 86375 }, { "epoch": 5.86900394075282, "grad_norm": 4.593219757080078, "learning_rate": 2.6662929745889388e-05, "loss": 2.2554, "step": 86380 }, { "epoch": 5.869343660823482, "grad_norm": 3.469421863555908, "learning_rate": 2.6658683245006116e-05, "loss": 2.2536, "step": 86385 }, { "epoch": 5.869683380894143, "grad_norm": 3.7359139919281006, "learning_rate": 2.6654436744122847e-05, "loss": 2.4559, "step": 86390 }, { "epoch": 5.870023100964805, "grad_norm": 3.6574015617370605, "learning_rate": 2.665019024323957e-05, "loss": 2.2523, "step": 86395 }, { "epoch": 5.870362821035467, "grad_norm": 3.666156053543091, "learning_rate": 2.66459437423563e-05, "loss": 2.4535, "step": 86400 }, { "epoch": 5.870702541106128, "grad_norm": 3.490945339202881, "learning_rate": 2.6641697241473028e-05, "loss": 2.2683, "step": 86405 }, { "epoch": 5.8710422611767905, "grad_norm": 3.1728944778442383, "learning_rate": 2.6637450740589753e-05, "loss": 2.2959, "step": 86410 }, { "epoch": 5.871381981247453, "grad_norm": 2.661137819290161, "learning_rate": 2.6633204239706484e-05, "loss": 2.3423, "step": 86415 }, { "epoch": 5.871721701318114, "grad_norm": 3.0494866371154785, "learning_rate": 2.6628957738823212e-05, "loss": 2.3128, "step": 86420 }, { "epoch": 5.872061421388776, "grad_norm": 3.774254083633423, "learning_rate": 2.6624711237939937e-05, "loss": 1.7541, "step": 86425 }, { "epoch": 5.872401141459437, "grad_norm": 3.2289011478424072, "learning_rate": 2.6620464737056665e-05, "loss": 2.1949, "step": 86430 }, { "epoch": 5.872740861530099, "grad_norm": 3.3428168296813965, "learning_rate": 2.6616218236173396e-05, "loss": 2.2956, "step": 86435 }, { "epoch": 5.873080581600761, "grad_norm": 3.013712167739868, "learning_rate": 2.661197173529012e-05, "loss": 1.9953, "step": 86440 }, { "epoch": 5.873420301671422, "grad_norm": 3.661691427230835, "learning_rate": 2.660772523440685e-05, "loss": 2.0291, "step": 86445 }, { "epoch": 5.8737600217420844, "grad_norm": 3.4611458778381348, "learning_rate": 2.660347873352358e-05, "loss": 2.26, "step": 86450 }, { "epoch": 5.8740997418127465, "grad_norm": 3.865427017211914, "learning_rate": 2.6599232232640305e-05, "loss": 2.3747, "step": 86455 }, { "epoch": 5.874439461883408, "grad_norm": 3.3999269008636475, "learning_rate": 2.6594985731757033e-05, "loss": 2.4021, "step": 86460 }, { "epoch": 5.87477918195407, "grad_norm": 3.5028347969055176, "learning_rate": 2.6590739230873764e-05, "loss": 2.1048, "step": 86465 }, { "epoch": 5.875118902024732, "grad_norm": 3.2065834999084473, "learning_rate": 2.6586492729990485e-05, "loss": 2.3425, "step": 86470 }, { "epoch": 5.875458622095393, "grad_norm": 2.878373622894287, "learning_rate": 2.6582246229107217e-05, "loss": 2.5389, "step": 86475 }, { "epoch": 5.875798342166055, "grad_norm": 3.474757671356201, "learning_rate": 2.6577999728223945e-05, "loss": 2.3284, "step": 86480 }, { "epoch": 5.876138062236717, "grad_norm": 3.0115089416503906, "learning_rate": 2.6573753227340676e-05, "loss": 2.1979, "step": 86485 }, { "epoch": 5.876477782307378, "grad_norm": 3.5287299156188965, "learning_rate": 2.65695067264574e-05, "loss": 2.2009, "step": 86490 }, { "epoch": 5.8768175023780405, "grad_norm": 3.7218356132507324, "learning_rate": 2.656526022557413e-05, "loss": 2.2236, "step": 86495 }, { "epoch": 5.8771572224487025, "grad_norm": 4.336782932281494, "learning_rate": 2.656101372469086e-05, "loss": 2.1117, "step": 86500 }, { "epoch": 5.877496942519364, "grad_norm": 4.1020965576171875, "learning_rate": 2.655676722380758e-05, "loss": 2.1368, "step": 86505 }, { "epoch": 5.877836662590026, "grad_norm": 3.246891498565674, "learning_rate": 2.6552520722924313e-05, "loss": 1.9876, "step": 86510 }, { "epoch": 5.878176382660688, "grad_norm": 2.541431427001953, "learning_rate": 2.654827422204104e-05, "loss": 2.2772, "step": 86515 }, { "epoch": 5.878516102731349, "grad_norm": 3.8242347240448, "learning_rate": 2.6544027721157765e-05, "loss": 2.342, "step": 86520 }, { "epoch": 5.878855822802011, "grad_norm": 3.3205182552337646, "learning_rate": 2.6539781220274497e-05, "loss": 2.4855, "step": 86525 }, { "epoch": 5.879195542872673, "grad_norm": 2.729243040084839, "learning_rate": 2.6535534719391225e-05, "loss": 1.8592, "step": 86530 }, { "epoch": 5.879535262943334, "grad_norm": 3.282742738723755, "learning_rate": 2.653128821850795e-05, "loss": 2.1937, "step": 86535 }, { "epoch": 5.8798749830139965, "grad_norm": 3.6330783367156982, "learning_rate": 2.6527041717624677e-05, "loss": 2.3639, "step": 86540 }, { "epoch": 5.8802147030846585, "grad_norm": 2.9836909770965576, "learning_rate": 2.652279521674141e-05, "loss": 2.2475, "step": 86545 }, { "epoch": 5.88055442315532, "grad_norm": 3.41681170463562, "learning_rate": 2.6518548715858133e-05, "loss": 2.4135, "step": 86550 }, { "epoch": 5.880894143225982, "grad_norm": 3.895669937133789, "learning_rate": 2.651430221497486e-05, "loss": 2.1377, "step": 86555 }, { "epoch": 5.881233863296644, "grad_norm": 3.568899393081665, "learning_rate": 2.6510055714091593e-05, "loss": 2.3783, "step": 86560 }, { "epoch": 5.881573583367305, "grad_norm": 4.31950569152832, "learning_rate": 2.6505809213208317e-05, "loss": 2.3023, "step": 86565 }, { "epoch": 5.881913303437967, "grad_norm": 3.0989127159118652, "learning_rate": 2.6501562712325045e-05, "loss": 2.3201, "step": 86570 }, { "epoch": 5.882253023508629, "grad_norm": 3.4645755290985107, "learning_rate": 2.6497316211441773e-05, "loss": 2.1618, "step": 86575 }, { "epoch": 5.88259274357929, "grad_norm": 3.5373177528381348, "learning_rate": 2.6493069710558498e-05, "loss": 2.2358, "step": 86580 }, { "epoch": 5.8829324636499525, "grad_norm": 3.145848512649536, "learning_rate": 2.648882320967523e-05, "loss": 2.2339, "step": 86585 }, { "epoch": 5.883272183720615, "grad_norm": 3.6428940296173096, "learning_rate": 2.6484576708791957e-05, "loss": 2.3837, "step": 86590 }, { "epoch": 5.883611903791276, "grad_norm": 3.9834797382354736, "learning_rate": 2.6480330207908682e-05, "loss": 1.9945, "step": 86595 }, { "epoch": 5.883951623861938, "grad_norm": 4.219954490661621, "learning_rate": 2.6476083707025413e-05, "loss": 2.2191, "step": 86600 }, { "epoch": 5.8842913439326, "grad_norm": 3.8247718811035156, "learning_rate": 2.647183720614214e-05, "loss": 2.2495, "step": 86605 }, { "epoch": 5.884631064003261, "grad_norm": 4.4124274253845215, "learning_rate": 2.6467590705258866e-05, "loss": 2.081, "step": 86610 }, { "epoch": 5.884970784073923, "grad_norm": 3.3400826454162598, "learning_rate": 2.6463344204375594e-05, "loss": 2.2247, "step": 86615 }, { "epoch": 5.885310504144585, "grad_norm": 2.9840829372406006, "learning_rate": 2.6459097703492326e-05, "loss": 2.4745, "step": 86620 }, { "epoch": 5.885650224215246, "grad_norm": 3.2537200450897217, "learning_rate": 2.645485120260905e-05, "loss": 1.9764, "step": 86625 }, { "epoch": 5.8859899442859085, "grad_norm": 3.2728991508483887, "learning_rate": 2.6450604701725778e-05, "loss": 2.3957, "step": 86630 }, { "epoch": 5.886329664356571, "grad_norm": 3.1990952491760254, "learning_rate": 2.644635820084251e-05, "loss": 2.2153, "step": 86635 }, { "epoch": 5.886669384427232, "grad_norm": 3.921802520751953, "learning_rate": 2.644211169995923e-05, "loss": 2.2936, "step": 86640 }, { "epoch": 5.887009104497894, "grad_norm": 3.802497386932373, "learning_rate": 2.6437865199075962e-05, "loss": 2.0815, "step": 86645 }, { "epoch": 5.887348824568555, "grad_norm": 3.8138506412506104, "learning_rate": 2.643361869819269e-05, "loss": 2.4114, "step": 86650 }, { "epoch": 5.887688544639217, "grad_norm": 3.4503872394561768, "learning_rate": 2.642937219730942e-05, "loss": 2.4413, "step": 86655 }, { "epoch": 5.888028264709879, "grad_norm": 4.047819137573242, "learning_rate": 2.6425125696426146e-05, "loss": 2.2817, "step": 86660 }, { "epoch": 5.88836798478054, "grad_norm": 3.1818814277648926, "learning_rate": 2.6420879195542874e-05, "loss": 2.5233, "step": 86665 }, { "epoch": 5.888707704851202, "grad_norm": 4.2491326332092285, "learning_rate": 2.6416632694659606e-05, "loss": 2.1571, "step": 86670 }, { "epoch": 5.8890474249218645, "grad_norm": 3.2380740642547607, "learning_rate": 2.6412386193776327e-05, "loss": 2.0982, "step": 86675 }, { "epoch": 5.889387144992526, "grad_norm": 3.5682640075683594, "learning_rate": 2.6408139692893058e-05, "loss": 2.0306, "step": 86680 }, { "epoch": 5.889726865063188, "grad_norm": 3.0686683654785156, "learning_rate": 2.6403893192009786e-05, "loss": 2.1713, "step": 86685 }, { "epoch": 5.89006658513385, "grad_norm": 2.6543493270874023, "learning_rate": 2.639964669112651e-05, "loss": 2.1785, "step": 86690 }, { "epoch": 5.890406305204511, "grad_norm": 3.396613836288452, "learning_rate": 2.6395400190243242e-05, "loss": 2.0972, "step": 86695 }, { "epoch": 5.890746025275173, "grad_norm": 3.054917812347412, "learning_rate": 2.639115368935997e-05, "loss": 2.1227, "step": 86700 }, { "epoch": 5.891085745345835, "grad_norm": 3.6461312770843506, "learning_rate": 2.6386907188476695e-05, "loss": 2.2278, "step": 86705 }, { "epoch": 5.891425465416496, "grad_norm": 4.251919269561768, "learning_rate": 2.6382660687593426e-05, "loss": 2.5061, "step": 86710 }, { "epoch": 5.891765185487158, "grad_norm": 3.6549534797668457, "learning_rate": 2.6378414186710154e-05, "loss": 2.2952, "step": 86715 }, { "epoch": 5.8921049055578205, "grad_norm": 2.8249542713165283, "learning_rate": 2.637416768582688e-05, "loss": 2.1312, "step": 86720 }, { "epoch": 5.892444625628482, "grad_norm": 3.3493030071258545, "learning_rate": 2.6369921184943607e-05, "loss": 2.7021, "step": 86725 }, { "epoch": 5.892784345699144, "grad_norm": 3.2328901290893555, "learning_rate": 2.6365674684060338e-05, "loss": 2.3269, "step": 86730 }, { "epoch": 5.893124065769806, "grad_norm": 2.571180820465088, "learning_rate": 2.6361428183177063e-05, "loss": 2.2621, "step": 86735 }, { "epoch": 5.893463785840467, "grad_norm": 3.340075731277466, "learning_rate": 2.635718168229379e-05, "loss": 2.187, "step": 86740 }, { "epoch": 5.893803505911129, "grad_norm": 5.593511581420898, "learning_rate": 2.6352935181410522e-05, "loss": 2.1556, "step": 86745 }, { "epoch": 5.894143225981791, "grad_norm": 3.896312952041626, "learning_rate": 2.6348688680527244e-05, "loss": 2.247, "step": 86750 }, { "epoch": 5.894482946052452, "grad_norm": 3.6248672008514404, "learning_rate": 2.6344442179643975e-05, "loss": 2.2307, "step": 86755 }, { "epoch": 5.8948226661231145, "grad_norm": 3.4252398014068604, "learning_rate": 2.6340195678760703e-05, "loss": 2.1774, "step": 86760 }, { "epoch": 5.8951623861937765, "grad_norm": 3.13567852973938, "learning_rate": 2.6335949177877428e-05, "loss": 2.1673, "step": 86765 }, { "epoch": 5.895502106264438, "grad_norm": 3.2770018577575684, "learning_rate": 2.633170267699416e-05, "loss": 2.3734, "step": 86770 }, { "epoch": 5.8958418263351, "grad_norm": 3.081331491470337, "learning_rate": 2.6327456176110887e-05, "loss": 2.2973, "step": 86775 }, { "epoch": 5.896181546405762, "grad_norm": 3.539010524749756, "learning_rate": 2.632320967522761e-05, "loss": 2.2277, "step": 86780 }, { "epoch": 5.896521266476423, "grad_norm": 4.196782112121582, "learning_rate": 2.631896317434434e-05, "loss": 2.5111, "step": 86785 }, { "epoch": 5.896860986547085, "grad_norm": 4.374454975128174, "learning_rate": 2.631471667346107e-05, "loss": 1.9853, "step": 86790 }, { "epoch": 5.897200706617747, "grad_norm": 4.040428161621094, "learning_rate": 2.6310470172577796e-05, "loss": 2.0373, "step": 86795 }, { "epoch": 5.897540426688408, "grad_norm": 3.0310912132263184, "learning_rate": 2.6306223671694524e-05, "loss": 2.2768, "step": 86800 }, { "epoch": 5.8978801467590705, "grad_norm": 3.9445581436157227, "learning_rate": 2.6301977170811255e-05, "loss": 2.4031, "step": 86805 }, { "epoch": 5.8982198668297325, "grad_norm": 3.4561944007873535, "learning_rate": 2.629773066992798e-05, "loss": 2.2933, "step": 86810 }, { "epoch": 5.898559586900394, "grad_norm": 3.338808298110962, "learning_rate": 2.6293484169044708e-05, "loss": 2.1123, "step": 86815 }, { "epoch": 5.898899306971056, "grad_norm": 3.309189558029175, "learning_rate": 2.6289237668161436e-05, "loss": 2.3094, "step": 86820 }, { "epoch": 5.899239027041718, "grad_norm": 2.7493817806243896, "learning_rate": 2.6284991167278167e-05, "loss": 2.3806, "step": 86825 }, { "epoch": 5.899578747112379, "grad_norm": 3.5641233921051025, "learning_rate": 2.628074466639489e-05, "loss": 2.0471, "step": 86830 }, { "epoch": 5.899918467183041, "grad_norm": 3.3012709617614746, "learning_rate": 2.627649816551162e-05, "loss": 2.2836, "step": 86835 }, { "epoch": 5.900258187253703, "grad_norm": 3.4668524265289307, "learning_rate": 2.627225166462835e-05, "loss": 2.3232, "step": 86840 }, { "epoch": 5.900597907324364, "grad_norm": 4.125396728515625, "learning_rate": 2.6268005163745076e-05, "loss": 2.4071, "step": 86845 }, { "epoch": 5.9009376273950265, "grad_norm": 2.953167200088501, "learning_rate": 2.6263758662861804e-05, "loss": 2.4248, "step": 86850 }, { "epoch": 5.9012773474656885, "grad_norm": 3.5928404331207275, "learning_rate": 2.6259512161978532e-05, "loss": 2.0591, "step": 86855 }, { "epoch": 5.90161706753635, "grad_norm": 3.221389055252075, "learning_rate": 2.6255265661095256e-05, "loss": 2.4303, "step": 86860 }, { "epoch": 5.901956787607012, "grad_norm": 3.2407174110412598, "learning_rate": 2.6251019160211988e-05, "loss": 2.0465, "step": 86865 }, { "epoch": 5.902296507677674, "grad_norm": 4.11859655380249, "learning_rate": 2.6246772659328716e-05, "loss": 2.2601, "step": 86870 }, { "epoch": 5.902636227748335, "grad_norm": 3.0458807945251465, "learning_rate": 2.624252615844544e-05, "loss": 2.1727, "step": 86875 }, { "epoch": 5.902975947818997, "grad_norm": 3.494523048400879, "learning_rate": 2.6238279657562172e-05, "loss": 2.247, "step": 86880 }, { "epoch": 5.903315667889659, "grad_norm": 3.8478879928588867, "learning_rate": 2.62340331566789e-05, "loss": 2.2946, "step": 86885 }, { "epoch": 5.90365538796032, "grad_norm": 3.442373037338257, "learning_rate": 2.6229786655795624e-05, "loss": 2.3015, "step": 86890 }, { "epoch": 5.9039951080309825, "grad_norm": 3.0763559341430664, "learning_rate": 2.6225540154912352e-05, "loss": 2.3091, "step": 86895 }, { "epoch": 5.904334828101645, "grad_norm": 3.4317851066589355, "learning_rate": 2.6221293654029084e-05, "loss": 2.2905, "step": 86900 }, { "epoch": 5.904674548172306, "grad_norm": 3.327375888824463, "learning_rate": 2.621704715314581e-05, "loss": 2.2513, "step": 86905 }, { "epoch": 5.905014268242968, "grad_norm": 3.1688506603240967, "learning_rate": 2.6212800652262536e-05, "loss": 2.1665, "step": 86910 }, { "epoch": 5.90535398831363, "grad_norm": 3.739644765853882, "learning_rate": 2.6208554151379268e-05, "loss": 2.2567, "step": 86915 }, { "epoch": 5.905693708384291, "grad_norm": 3.1066527366638184, "learning_rate": 2.620430765049599e-05, "loss": 2.2095, "step": 86920 }, { "epoch": 5.906033428454953, "grad_norm": 3.421469211578369, "learning_rate": 2.620006114961272e-05, "loss": 2.1698, "step": 86925 }, { "epoch": 5.906373148525615, "grad_norm": 3.5609335899353027, "learning_rate": 2.619581464872945e-05, "loss": 2.0904, "step": 86930 }, { "epoch": 5.906712868596276, "grad_norm": 2.6860673427581787, "learning_rate": 2.6191568147846173e-05, "loss": 2.0814, "step": 86935 }, { "epoch": 5.9070525886669385, "grad_norm": 3.974156141281128, "learning_rate": 2.6187321646962904e-05, "loss": 2.064, "step": 86940 }, { "epoch": 5.907392308737601, "grad_norm": 3.568709373474121, "learning_rate": 2.6183075146079632e-05, "loss": 2.2021, "step": 86945 }, { "epoch": 5.907732028808262, "grad_norm": 4.085431098937988, "learning_rate": 2.6178828645196357e-05, "loss": 2.4689, "step": 86950 }, { "epoch": 5.908071748878924, "grad_norm": 3.0878772735595703, "learning_rate": 2.6174582144313085e-05, "loss": 2.2517, "step": 86955 }, { "epoch": 5.908411468949586, "grad_norm": 3.0303657054901123, "learning_rate": 2.6170335643429816e-05, "loss": 2.083, "step": 86960 }, { "epoch": 5.908751189020247, "grad_norm": 3.864494800567627, "learning_rate": 2.616608914254654e-05, "loss": 2.2986, "step": 86965 }, { "epoch": 5.909090909090909, "grad_norm": 3.7765629291534424, "learning_rate": 2.616184264166327e-05, "loss": 2.1977, "step": 86970 }, { "epoch": 5.909430629161571, "grad_norm": 3.571685314178467, "learning_rate": 2.615759614078e-05, "loss": 2.2741, "step": 86975 }, { "epoch": 5.909770349232232, "grad_norm": 3.3284003734588623, "learning_rate": 2.6153349639896725e-05, "loss": 2.1388, "step": 86980 }, { "epoch": 5.9101100693028945, "grad_norm": 3.0757498741149902, "learning_rate": 2.6149103139013453e-05, "loss": 2.4668, "step": 86985 }, { "epoch": 5.910449789373557, "grad_norm": 2.780998706817627, "learning_rate": 2.6144856638130185e-05, "loss": 2.2277, "step": 86990 }, { "epoch": 5.910789509444218, "grad_norm": 2.9778153896331787, "learning_rate": 2.6140610137246913e-05, "loss": 2.1848, "step": 86995 }, { "epoch": 5.91112922951488, "grad_norm": 3.652275800704956, "learning_rate": 2.6136363636363637e-05, "loss": 2.3643, "step": 87000 }, { "epoch": 5.911468949585542, "grad_norm": 3.0889413356781006, "learning_rate": 2.6132117135480365e-05, "loss": 2.4801, "step": 87005 }, { "epoch": 5.911808669656203, "grad_norm": 3.7695467472076416, "learning_rate": 2.6127870634597097e-05, "loss": 1.916, "step": 87010 }, { "epoch": 5.912148389726865, "grad_norm": 3.1725404262542725, "learning_rate": 2.612362413371382e-05, "loss": 2.2886, "step": 87015 }, { "epoch": 5.912488109797527, "grad_norm": 3.152994155883789, "learning_rate": 2.611937763283055e-05, "loss": 2.3337, "step": 87020 }, { "epoch": 5.9128278298681884, "grad_norm": 3.585287570953369, "learning_rate": 2.611513113194728e-05, "loss": 2.3757, "step": 87025 }, { "epoch": 5.9131675499388505, "grad_norm": 3.8845396041870117, "learning_rate": 2.6110884631064002e-05, "loss": 2.5201, "step": 87030 }, { "epoch": 5.913507270009513, "grad_norm": 3.0200037956237793, "learning_rate": 2.6106638130180733e-05, "loss": 2.4913, "step": 87035 }, { "epoch": 5.913846990080174, "grad_norm": 3.5568771362304688, "learning_rate": 2.610239162929746e-05, "loss": 1.9866, "step": 87040 }, { "epoch": 5.914186710150836, "grad_norm": 2.8197240829467773, "learning_rate": 2.6098145128414186e-05, "loss": 1.9039, "step": 87045 }, { "epoch": 5.914526430221498, "grad_norm": 2.933485507965088, "learning_rate": 2.6093898627530917e-05, "loss": 2.293, "step": 87050 }, { "epoch": 5.914866150292159, "grad_norm": 5.061345100402832, "learning_rate": 2.6089652126647645e-05, "loss": 1.9812, "step": 87055 }, { "epoch": 5.915205870362821, "grad_norm": 3.8852760791778564, "learning_rate": 2.608540562576437e-05, "loss": 2.1507, "step": 87060 }, { "epoch": 5.915545590433483, "grad_norm": 3.334398031234741, "learning_rate": 2.6081159124881098e-05, "loss": 2.267, "step": 87065 }, { "epoch": 5.9158853105041445, "grad_norm": 4.035428524017334, "learning_rate": 2.607691262399783e-05, "loss": 2.1105, "step": 87070 }, { "epoch": 5.9162250305748065, "grad_norm": 2.8301076889038086, "learning_rate": 2.607351542329121e-05, "loss": 2.2578, "step": 87075 }, { "epoch": 5.916564750645469, "grad_norm": 3.9127986431121826, "learning_rate": 2.6069268922407936e-05, "loss": 2.4679, "step": 87080 }, { "epoch": 5.91690447071613, "grad_norm": 3.7620232105255127, "learning_rate": 2.6065022421524664e-05, "loss": 2.0853, "step": 87085 }, { "epoch": 5.917244190786792, "grad_norm": 2.773266553878784, "learning_rate": 2.6060775920641396e-05, "loss": 2.4728, "step": 87090 }, { "epoch": 5.917583910857454, "grad_norm": 3.366633653640747, "learning_rate": 2.605652941975812e-05, "loss": 2.1753, "step": 87095 }, { "epoch": 5.917923630928115, "grad_norm": 2.9350006580352783, "learning_rate": 2.605228291887485e-05, "loss": 2.1624, "step": 87100 }, { "epoch": 5.918263350998777, "grad_norm": 3.171226739883423, "learning_rate": 2.604803641799158e-05, "loss": 2.2438, "step": 87105 }, { "epoch": 5.918603071069439, "grad_norm": 3.2608625888824463, "learning_rate": 2.60437899171083e-05, "loss": 2.1763, "step": 87110 }, { "epoch": 5.9189427911401005, "grad_norm": 3.1230757236480713, "learning_rate": 2.6039543416225032e-05, "loss": 2.41, "step": 87115 }, { "epoch": 5.9192825112107625, "grad_norm": 4.049822807312012, "learning_rate": 2.603529691534176e-05, "loss": 2.1728, "step": 87120 }, { "epoch": 5.919622231281424, "grad_norm": 3.5919253826141357, "learning_rate": 2.6031050414458485e-05, "loss": 2.403, "step": 87125 }, { "epoch": 5.919961951352086, "grad_norm": 3.326399087905884, "learning_rate": 2.6026803913575216e-05, "loss": 1.8871, "step": 87130 }, { "epoch": 5.920301671422748, "grad_norm": 4.1853766441345215, "learning_rate": 2.6022557412691944e-05, "loss": 2.4227, "step": 87135 }, { "epoch": 5.920641391493409, "grad_norm": 3.026975393295288, "learning_rate": 2.601831091180867e-05, "loss": 2.1823, "step": 87140 }, { "epoch": 5.920981111564071, "grad_norm": 3.1753640174865723, "learning_rate": 2.6014064410925397e-05, "loss": 2.2341, "step": 87145 }, { "epoch": 5.921320831634733, "grad_norm": 3.765310525894165, "learning_rate": 2.600981791004213e-05, "loss": 2.3366, "step": 87150 }, { "epoch": 5.921660551705394, "grad_norm": 3.582638740539551, "learning_rate": 2.6005571409158853e-05, "loss": 2.0345, "step": 87155 }, { "epoch": 5.9220002717760565, "grad_norm": 3.2144834995269775, "learning_rate": 2.600132490827558e-05, "loss": 2.1361, "step": 87160 }, { "epoch": 5.9223399918467186, "grad_norm": 3.5138673782348633, "learning_rate": 2.5997078407392312e-05, "loss": 2.6128, "step": 87165 }, { "epoch": 5.92267971191738, "grad_norm": 3.9288458824157715, "learning_rate": 2.5992831906509034e-05, "loss": 2.5511, "step": 87170 }, { "epoch": 5.923019431988042, "grad_norm": 3.216374635696411, "learning_rate": 2.5988585405625765e-05, "loss": 2.3144, "step": 87175 }, { "epoch": 5.923359152058704, "grad_norm": 3.298429250717163, "learning_rate": 2.5984338904742493e-05, "loss": 2.3651, "step": 87180 }, { "epoch": 5.923698872129365, "grad_norm": 2.9093611240386963, "learning_rate": 2.5980092403859218e-05, "loss": 2.3389, "step": 87185 }, { "epoch": 5.924038592200027, "grad_norm": 3.175633430480957, "learning_rate": 2.597584590297595e-05, "loss": 2.4073, "step": 87190 }, { "epoch": 5.924378312270689, "grad_norm": 3.525078773498535, "learning_rate": 2.5971599402092677e-05, "loss": 2.5349, "step": 87195 }, { "epoch": 5.92471803234135, "grad_norm": 2.97192120552063, "learning_rate": 2.596735290120941e-05, "loss": 2.2882, "step": 87200 }, { "epoch": 5.9250577524120125, "grad_norm": 3.0960872173309326, "learning_rate": 2.5963106400326133e-05, "loss": 2.0756, "step": 87205 }, { "epoch": 5.925397472482675, "grad_norm": 4.333570957183838, "learning_rate": 2.595885989944286e-05, "loss": 2.197, "step": 87210 }, { "epoch": 5.925737192553336, "grad_norm": 3.9743282794952393, "learning_rate": 2.595461339855959e-05, "loss": 2.3129, "step": 87215 }, { "epoch": 5.926076912623998, "grad_norm": 2.9933395385742188, "learning_rate": 2.5950366897676314e-05, "loss": 2.2968, "step": 87220 }, { "epoch": 5.92641663269466, "grad_norm": 3.5110480785369873, "learning_rate": 2.5946120396793045e-05, "loss": 2.2651, "step": 87225 }, { "epoch": 5.926756352765321, "grad_norm": 3.517730236053467, "learning_rate": 2.5941873895909773e-05, "loss": 2.3129, "step": 87230 }, { "epoch": 5.927096072835983, "grad_norm": 2.7808468341827393, "learning_rate": 2.5937627395026498e-05, "loss": 2.4814, "step": 87235 }, { "epoch": 5.927435792906645, "grad_norm": 2.727128744125366, "learning_rate": 2.593338089414323e-05, "loss": 2.2271, "step": 87240 }, { "epoch": 5.927775512977306, "grad_norm": 3.076709508895874, "learning_rate": 2.5929134393259957e-05, "loss": 1.9828, "step": 87245 }, { "epoch": 5.9281152330479685, "grad_norm": 3.3816006183624268, "learning_rate": 2.5924887892376682e-05, "loss": 2.2559, "step": 87250 }, { "epoch": 5.928454953118631, "grad_norm": 3.290619373321533, "learning_rate": 2.592064139149341e-05, "loss": 2.1363, "step": 87255 }, { "epoch": 5.928794673189292, "grad_norm": 4.182936668395996, "learning_rate": 2.591639489061014e-05, "loss": 2.2704, "step": 87260 }, { "epoch": 5.929134393259954, "grad_norm": 4.269733428955078, "learning_rate": 2.5912148389726866e-05, "loss": 2.1299, "step": 87265 }, { "epoch": 5.929474113330616, "grad_norm": 3.614100456237793, "learning_rate": 2.5907901888843594e-05, "loss": 2.3879, "step": 87270 }, { "epoch": 5.929813833401277, "grad_norm": 3.8852880001068115, "learning_rate": 2.5903655387960325e-05, "loss": 2.1615, "step": 87275 }, { "epoch": 5.930153553471939, "grad_norm": 3.3013715744018555, "learning_rate": 2.5899408887077046e-05, "loss": 1.9884, "step": 87280 }, { "epoch": 5.930493273542601, "grad_norm": 2.9122724533081055, "learning_rate": 2.5895162386193778e-05, "loss": 2.327, "step": 87285 }, { "epoch": 5.930832993613262, "grad_norm": 3.2622907161712646, "learning_rate": 2.5890915885310506e-05, "loss": 2.3631, "step": 87290 }, { "epoch": 5.9311727136839245, "grad_norm": 2.989828109741211, "learning_rate": 2.588666938442723e-05, "loss": 2.254, "step": 87295 }, { "epoch": 5.931512433754587, "grad_norm": 3.3142364025115967, "learning_rate": 2.5882422883543962e-05, "loss": 2.1577, "step": 87300 }, { "epoch": 5.931852153825248, "grad_norm": 3.971742868423462, "learning_rate": 2.587817638266069e-05, "loss": 2.2964, "step": 87305 }, { "epoch": 5.93219187389591, "grad_norm": 3.418471574783325, "learning_rate": 2.5873929881777414e-05, "loss": 2.2521, "step": 87310 }, { "epoch": 5.932531593966572, "grad_norm": 3.3112919330596924, "learning_rate": 2.5869683380894142e-05, "loss": 2.3112, "step": 87315 }, { "epoch": 5.932871314037233, "grad_norm": 2.574842691421509, "learning_rate": 2.5865436880010874e-05, "loss": 2.3281, "step": 87320 }, { "epoch": 5.933211034107895, "grad_norm": 4.0440287590026855, "learning_rate": 2.58611903791276e-05, "loss": 2.3152, "step": 87325 }, { "epoch": 5.933550754178556, "grad_norm": 3.6289193630218506, "learning_rate": 2.5856943878244327e-05, "loss": 2.3622, "step": 87330 }, { "epoch": 5.9338904742492184, "grad_norm": 4.3014607429504395, "learning_rate": 2.5852697377361058e-05, "loss": 2.5174, "step": 87335 }, { "epoch": 5.9342301943198805, "grad_norm": 3.0786855220794678, "learning_rate": 2.5848450876477783e-05, "loss": 2.5923, "step": 87340 }, { "epoch": 5.934569914390542, "grad_norm": 3.0855491161346436, "learning_rate": 2.584420437559451e-05, "loss": 2.3915, "step": 87345 }, { "epoch": 5.934909634461204, "grad_norm": 3.6387691497802734, "learning_rate": 2.5839957874711242e-05, "loss": 2.2724, "step": 87350 }, { "epoch": 5.935249354531866, "grad_norm": 3.877126693725586, "learning_rate": 2.5835711373827963e-05, "loss": 2.1158, "step": 87355 }, { "epoch": 5.935589074602527, "grad_norm": 4.241000175476074, "learning_rate": 2.5831464872944695e-05, "loss": 2.3758, "step": 87360 }, { "epoch": 5.935928794673189, "grad_norm": 3.1594369411468506, "learning_rate": 2.5827218372061423e-05, "loss": 2.0652, "step": 87365 }, { "epoch": 5.936268514743851, "grad_norm": 2.444260358810425, "learning_rate": 2.5822971871178154e-05, "loss": 2.2851, "step": 87370 }, { "epoch": 5.936608234814512, "grad_norm": 3.368091583251953, "learning_rate": 2.581872537029488e-05, "loss": 1.9437, "step": 87375 }, { "epoch": 5.9369479548851745, "grad_norm": 3.4615654945373535, "learning_rate": 2.5814478869411607e-05, "loss": 2.1214, "step": 87380 }, { "epoch": 5.9372876749558365, "grad_norm": 4.010498523712158, "learning_rate": 2.5810232368528338e-05, "loss": 2.3088, "step": 87385 }, { "epoch": 5.937627395026498, "grad_norm": 3.2548141479492188, "learning_rate": 2.580598586764506e-05, "loss": 2.1214, "step": 87390 }, { "epoch": 5.93796711509716, "grad_norm": 2.613924741744995, "learning_rate": 2.580173936676179e-05, "loss": 1.9954, "step": 87395 }, { "epoch": 5.938306835167822, "grad_norm": 2.8182735443115234, "learning_rate": 2.579749286587852e-05, "loss": 1.9716, "step": 87400 }, { "epoch": 5.938646555238483, "grad_norm": 4.009711265563965, "learning_rate": 2.5793246364995243e-05, "loss": 2.1499, "step": 87405 }, { "epoch": 5.938986275309145, "grad_norm": 4.213365077972412, "learning_rate": 2.5788999864111975e-05, "loss": 2.3005, "step": 87410 }, { "epoch": 5.939325995379807, "grad_norm": 3.466301918029785, "learning_rate": 2.5784753363228703e-05, "loss": 2.0246, "step": 87415 }, { "epoch": 5.939665715450468, "grad_norm": 2.9861552715301514, "learning_rate": 2.5780506862345427e-05, "loss": 1.9015, "step": 87420 }, { "epoch": 5.9400054355211305, "grad_norm": 3.29925537109375, "learning_rate": 2.5776260361462155e-05, "loss": 2.1151, "step": 87425 }, { "epoch": 5.9403451555917925, "grad_norm": 2.3657641410827637, "learning_rate": 2.5772013860578887e-05, "loss": 2.1961, "step": 87430 }, { "epoch": 5.940684875662454, "grad_norm": 3.324681043624878, "learning_rate": 2.576776735969561e-05, "loss": 2.3886, "step": 87435 }, { "epoch": 5.941024595733116, "grad_norm": 3.2661707401275635, "learning_rate": 2.576352085881234e-05, "loss": 2.1832, "step": 87440 }, { "epoch": 5.941364315803778, "grad_norm": 3.787687063217163, "learning_rate": 2.575927435792907e-05, "loss": 2.1494, "step": 87445 }, { "epoch": 5.941704035874439, "grad_norm": 2.9204490184783936, "learning_rate": 2.5755027857045795e-05, "loss": 2.3439, "step": 87450 }, { "epoch": 5.942043755945101, "grad_norm": 4.255488395690918, "learning_rate": 2.5750781356162523e-05, "loss": 2.0716, "step": 87455 }, { "epoch": 5.942383476015763, "grad_norm": 2.981459617614746, "learning_rate": 2.574653485527925e-05, "loss": 2.2964, "step": 87460 }, { "epoch": 5.942723196086424, "grad_norm": 5.039204120635986, "learning_rate": 2.5742288354395976e-05, "loss": 2.2417, "step": 87465 }, { "epoch": 5.9430629161570865, "grad_norm": 2.9986090660095215, "learning_rate": 2.5738041853512707e-05, "loss": 2.1021, "step": 87470 }, { "epoch": 5.943402636227749, "grad_norm": 3.916497230529785, "learning_rate": 2.5733795352629435e-05, "loss": 2.3842, "step": 87475 }, { "epoch": 5.94374235629841, "grad_norm": 3.5546956062316895, "learning_rate": 2.572954885174616e-05, "loss": 2.2996, "step": 87480 }, { "epoch": 5.944082076369072, "grad_norm": 3.409226894378662, "learning_rate": 2.572530235086289e-05, "loss": 2.3767, "step": 87485 }, { "epoch": 5.944421796439734, "grad_norm": 2.917328357696533, "learning_rate": 2.572105584997962e-05, "loss": 2.2834, "step": 87490 }, { "epoch": 5.944761516510395, "grad_norm": 3.5049939155578613, "learning_rate": 2.5716809349096344e-05, "loss": 2.0451, "step": 87495 }, { "epoch": 5.945101236581057, "grad_norm": 4.078854084014893, "learning_rate": 2.5712562848213072e-05, "loss": 2.2506, "step": 87500 }, { "epoch": 5.945440956651719, "grad_norm": 3.0707263946533203, "learning_rate": 2.5708316347329803e-05, "loss": 2.3051, "step": 87505 }, { "epoch": 5.94578067672238, "grad_norm": 3.286046028137207, "learning_rate": 2.5704069846446528e-05, "loss": 2.191, "step": 87510 }, { "epoch": 5.9461203967930425, "grad_norm": 3.506173610687256, "learning_rate": 2.5699823345563256e-05, "loss": 2.498, "step": 87515 }, { "epoch": 5.946460116863705, "grad_norm": 3.348924160003662, "learning_rate": 2.5695576844679987e-05, "loss": 2.3987, "step": 87520 }, { "epoch": 5.946799836934366, "grad_norm": 3.051225185394287, "learning_rate": 2.569133034379671e-05, "loss": 2.3985, "step": 87525 }, { "epoch": 5.947139557005028, "grad_norm": 3.585472822189331, "learning_rate": 2.568708384291344e-05, "loss": 1.8722, "step": 87530 }, { "epoch": 5.94747927707569, "grad_norm": 3.398852825164795, "learning_rate": 2.5682837342030168e-05, "loss": 2.558, "step": 87535 }, { "epoch": 5.947818997146351, "grad_norm": 2.8801329135894775, "learning_rate": 2.56785908411469e-05, "loss": 2.1553, "step": 87540 }, { "epoch": 5.948158717217013, "grad_norm": 3.999725103378296, "learning_rate": 2.5674344340263624e-05, "loss": 2.1429, "step": 87545 }, { "epoch": 5.948498437287675, "grad_norm": 3.1707675457000732, "learning_rate": 2.5670097839380352e-05, "loss": 2.2008, "step": 87550 }, { "epoch": 5.948838157358336, "grad_norm": 3.7864296436309814, "learning_rate": 2.5665851338497083e-05, "loss": 2.5955, "step": 87555 }, { "epoch": 5.9491778774289985, "grad_norm": 2.723999500274658, "learning_rate": 2.5661604837613805e-05, "loss": 2.152, "step": 87560 }, { "epoch": 5.949517597499661, "grad_norm": 3.471468448638916, "learning_rate": 2.5657358336730536e-05, "loss": 2.2509, "step": 87565 }, { "epoch": 5.949857317570322, "grad_norm": 3.3242409229278564, "learning_rate": 2.5653111835847264e-05, "loss": 2.2425, "step": 87570 }, { "epoch": 5.950197037640984, "grad_norm": 3.0551233291625977, "learning_rate": 2.564886533496399e-05, "loss": 2.0129, "step": 87575 }, { "epoch": 5.950536757711646, "grad_norm": 3.969764232635498, "learning_rate": 2.564461883408072e-05, "loss": 2.1833, "step": 87580 }, { "epoch": 5.950876477782307, "grad_norm": 3.3349711894989014, "learning_rate": 2.5640372333197448e-05, "loss": 2.2876, "step": 87585 }, { "epoch": 5.951216197852969, "grad_norm": 4.000057220458984, "learning_rate": 2.5636125832314173e-05, "loss": 2.1749, "step": 87590 }, { "epoch": 5.951555917923631, "grad_norm": 3.7238688468933105, "learning_rate": 2.5631879331430904e-05, "loss": 2.4787, "step": 87595 }, { "epoch": 5.951895637994292, "grad_norm": 2.928330183029175, "learning_rate": 2.5627632830547632e-05, "loss": 2.3338, "step": 87600 }, { "epoch": 5.9522353580649545, "grad_norm": 4.448728561401367, "learning_rate": 2.5623386329664357e-05, "loss": 2.1655, "step": 87605 }, { "epoch": 5.952575078135617, "grad_norm": 3.310136318206787, "learning_rate": 2.5619139828781085e-05, "loss": 2.4302, "step": 87610 }, { "epoch": 5.952914798206278, "grad_norm": 3.325099468231201, "learning_rate": 2.5614893327897816e-05, "loss": 2.4712, "step": 87615 }, { "epoch": 5.95325451827694, "grad_norm": 3.610013484954834, "learning_rate": 2.561064682701454e-05, "loss": 2.2801, "step": 87620 }, { "epoch": 5.953594238347602, "grad_norm": 3.635179281234741, "learning_rate": 2.560640032613127e-05, "loss": 2.1022, "step": 87625 }, { "epoch": 5.953933958418263, "grad_norm": 3.35329532623291, "learning_rate": 2.5602153825248e-05, "loss": 2.3734, "step": 87630 }, { "epoch": 5.954273678488925, "grad_norm": 3.849045991897583, "learning_rate": 2.559790732436472e-05, "loss": 2.1108, "step": 87635 }, { "epoch": 5.954613398559587, "grad_norm": 2.9213294982910156, "learning_rate": 2.5593660823481453e-05, "loss": 2.2308, "step": 87640 }, { "epoch": 5.9549531186302485, "grad_norm": 2.6601462364196777, "learning_rate": 2.558941432259818e-05, "loss": 2.3144, "step": 87645 }, { "epoch": 5.9552928387009105, "grad_norm": 3.31282377243042, "learning_rate": 2.5585167821714905e-05, "loss": 2.2133, "step": 87650 }, { "epoch": 5.955632558771573, "grad_norm": 3.744446277618408, "learning_rate": 2.5580921320831637e-05, "loss": 2.2649, "step": 87655 }, { "epoch": 5.955972278842234, "grad_norm": 2.9717702865600586, "learning_rate": 2.5576674819948365e-05, "loss": 2.3393, "step": 87660 }, { "epoch": 5.956311998912896, "grad_norm": 4.910446643829346, "learning_rate": 2.557242831906509e-05, "loss": 2.3739, "step": 87665 }, { "epoch": 5.956651718983558, "grad_norm": 3.7705140113830566, "learning_rate": 2.5568181818181817e-05, "loss": 2.0765, "step": 87670 }, { "epoch": 5.956991439054219, "grad_norm": 3.5111377239227295, "learning_rate": 2.556393531729855e-05, "loss": 2.3404, "step": 87675 }, { "epoch": 5.957331159124881, "grad_norm": 3.3347320556640625, "learning_rate": 2.5559688816415273e-05, "loss": 2.336, "step": 87680 }, { "epoch": 5.957670879195543, "grad_norm": 3.016908884048462, "learning_rate": 2.5555442315532e-05, "loss": 2.2382, "step": 87685 }, { "epoch": 5.9580105992662045, "grad_norm": 2.764591932296753, "learning_rate": 2.5551195814648733e-05, "loss": 2.2593, "step": 87690 }, { "epoch": 5.9583503193368665, "grad_norm": 3.336923837661743, "learning_rate": 2.5546949313765458e-05, "loss": 2.2429, "step": 87695 }, { "epoch": 5.958690039407529, "grad_norm": 3.3972010612487793, "learning_rate": 2.5542702812882186e-05, "loss": 2.3602, "step": 87700 }, { "epoch": 5.95902975947819, "grad_norm": 3.0789101123809814, "learning_rate": 2.5538456311998914e-05, "loss": 2.3829, "step": 87705 }, { "epoch": 5.959369479548852, "grad_norm": 3.2990541458129883, "learning_rate": 2.5534209811115645e-05, "loss": 2.1603, "step": 87710 }, { "epoch": 5.959709199619514, "grad_norm": 3.0016441345214844, "learning_rate": 2.552996331023237e-05, "loss": 2.2689, "step": 87715 }, { "epoch": 5.960048919690175, "grad_norm": 3.2539665699005127, "learning_rate": 2.5525716809349098e-05, "loss": 2.3874, "step": 87720 }, { "epoch": 5.960388639760837, "grad_norm": 3.496821880340576, "learning_rate": 2.552147030846583e-05, "loss": 2.3904, "step": 87725 }, { "epoch": 5.960728359831499, "grad_norm": 3.341798782348633, "learning_rate": 2.5517223807582554e-05, "loss": 2.6843, "step": 87730 }, { "epoch": 5.9610680799021605, "grad_norm": 3.1239216327667236, "learning_rate": 2.551297730669928e-05, "loss": 2.4176, "step": 87735 }, { "epoch": 5.9614077999728226, "grad_norm": 3.2093253135681152, "learning_rate": 2.550873080581601e-05, "loss": 2.1986, "step": 87740 }, { "epoch": 5.961747520043485, "grad_norm": 3.3898584842681885, "learning_rate": 2.5504484304932734e-05, "loss": 1.9707, "step": 87745 }, { "epoch": 5.962087240114146, "grad_norm": 3.7658936977386475, "learning_rate": 2.5500237804049466e-05, "loss": 2.5719, "step": 87750 }, { "epoch": 5.962426960184808, "grad_norm": 2.948871612548828, "learning_rate": 2.5495991303166194e-05, "loss": 2.1474, "step": 87755 }, { "epoch": 5.96276668025547, "grad_norm": 3.524538040161133, "learning_rate": 2.5491744802282918e-05, "loss": 1.9739, "step": 87760 }, { "epoch": 5.963106400326131, "grad_norm": 3.3949782848358154, "learning_rate": 2.548749830139965e-05, "loss": 2.6666, "step": 87765 }, { "epoch": 5.963446120396793, "grad_norm": 4.393316745758057, "learning_rate": 2.5483251800516378e-05, "loss": 2.4591, "step": 87770 }, { "epoch": 5.963785840467455, "grad_norm": 3.9032764434814453, "learning_rate": 2.5479005299633102e-05, "loss": 2.2563, "step": 87775 }, { "epoch": 5.9641255605381165, "grad_norm": 2.802155017852783, "learning_rate": 2.547475879874983e-05, "loss": 2.1778, "step": 87780 }, { "epoch": 5.964465280608779, "grad_norm": 3.5490479469299316, "learning_rate": 2.547051229786656e-05, "loss": 2.4053, "step": 87785 }, { "epoch": 5.964805000679441, "grad_norm": 3.9435293674468994, "learning_rate": 2.5466265796983286e-05, "loss": 2.4161, "step": 87790 }, { "epoch": 5.965144720750102, "grad_norm": 3.224949598312378, "learning_rate": 2.5462019296100014e-05, "loss": 1.9115, "step": 87795 }, { "epoch": 5.965484440820764, "grad_norm": 3.232797384262085, "learning_rate": 2.5457772795216746e-05, "loss": 2.3408, "step": 87800 }, { "epoch": 5.965824160891425, "grad_norm": 3.552971601486206, "learning_rate": 2.5453526294333467e-05, "loss": 2.1036, "step": 87805 }, { "epoch": 5.966163880962087, "grad_norm": 3.6843132972717285, "learning_rate": 2.54492797934502e-05, "loss": 2.2716, "step": 87810 }, { "epoch": 5.966503601032749, "grad_norm": 3.661919593811035, "learning_rate": 2.5445033292566926e-05, "loss": 2.6467, "step": 87815 }, { "epoch": 5.96684332110341, "grad_norm": 3.463822841644287, "learning_rate": 2.544078679168365e-05, "loss": 2.3453, "step": 87820 }, { "epoch": 5.9671830411740725, "grad_norm": 3.410383701324463, "learning_rate": 2.5436540290800382e-05, "loss": 2.3436, "step": 87825 }, { "epoch": 5.967522761244735, "grad_norm": 3.628751039505005, "learning_rate": 2.543229378991711e-05, "loss": 2.4681, "step": 87830 }, { "epoch": 5.967862481315396, "grad_norm": 3.5581917762756348, "learning_rate": 2.5428047289033835e-05, "loss": 2.2793, "step": 87835 }, { "epoch": 5.968202201386058, "grad_norm": 3.496269464492798, "learning_rate": 2.5423800788150563e-05, "loss": 2.1551, "step": 87840 }, { "epoch": 5.96854192145672, "grad_norm": 3.2662410736083984, "learning_rate": 2.5419554287267294e-05, "loss": 2.4023, "step": 87845 }, { "epoch": 5.968881641527381, "grad_norm": 3.330225706100464, "learning_rate": 2.541530778638402e-05, "loss": 2.226, "step": 87850 }, { "epoch": 5.969221361598043, "grad_norm": 3.3957340717315674, "learning_rate": 2.5411061285500747e-05, "loss": 2.4423, "step": 87855 }, { "epoch": 5.969561081668705, "grad_norm": 3.8302388191223145, "learning_rate": 2.540681478461748e-05, "loss": 2.1964, "step": 87860 }, { "epoch": 5.969900801739366, "grad_norm": 2.9033334255218506, "learning_rate": 2.5402568283734203e-05, "loss": 2.2566, "step": 87865 }, { "epoch": 5.9702405218100285, "grad_norm": 3.225688934326172, "learning_rate": 2.539832178285093e-05, "loss": 2.2864, "step": 87870 }, { "epoch": 5.970580241880691, "grad_norm": 3.583049774169922, "learning_rate": 2.5394075281967662e-05, "loss": 2.0069, "step": 87875 }, { "epoch": 5.970919961951352, "grad_norm": 3.3044893741607666, "learning_rate": 2.538982878108439e-05, "loss": 2.3496, "step": 87880 }, { "epoch": 5.971259682022014, "grad_norm": 2.901564359664917, "learning_rate": 2.5385582280201115e-05, "loss": 2.4512, "step": 87885 }, { "epoch": 5.971599402092676, "grad_norm": 3.61084246635437, "learning_rate": 2.5381335779317843e-05, "loss": 2.0679, "step": 87890 }, { "epoch": 5.971939122163337, "grad_norm": 3.5364274978637695, "learning_rate": 2.5377089278434574e-05, "loss": 2.376, "step": 87895 }, { "epoch": 5.972278842233999, "grad_norm": 3.1968679428100586, "learning_rate": 2.53728427775513e-05, "loss": 2.0267, "step": 87900 }, { "epoch": 5.972618562304661, "grad_norm": 2.7772138118743896, "learning_rate": 2.5368596276668027e-05, "loss": 1.9536, "step": 87905 }, { "epoch": 5.9729582823753224, "grad_norm": 3.7476673126220703, "learning_rate": 2.536434977578476e-05, "loss": 2.3739, "step": 87910 }, { "epoch": 5.9732980024459845, "grad_norm": 3.2273061275482178, "learning_rate": 2.536010327490148e-05, "loss": 2.2153, "step": 87915 }, { "epoch": 5.973637722516647, "grad_norm": 3.5688540935516357, "learning_rate": 2.535585677401821e-05, "loss": 2.1952, "step": 87920 }, { "epoch": 5.973977442587308, "grad_norm": 3.558742046356201, "learning_rate": 2.535161027313494e-05, "loss": 2.2752, "step": 87925 }, { "epoch": 5.97431716265797, "grad_norm": 2.9087111949920654, "learning_rate": 2.5347363772251664e-05, "loss": 2.274, "step": 87930 }, { "epoch": 5.974656882728632, "grad_norm": 3.630981206893921, "learning_rate": 2.5343117271368395e-05, "loss": 2.1667, "step": 87935 }, { "epoch": 5.974996602799293, "grad_norm": 2.9508461952209473, "learning_rate": 2.5338870770485123e-05, "loss": 2.1481, "step": 87940 }, { "epoch": 5.975336322869955, "grad_norm": 3.1850106716156006, "learning_rate": 2.5334624269601848e-05, "loss": 2.3554, "step": 87945 }, { "epoch": 5.975676042940617, "grad_norm": 3.0947179794311523, "learning_rate": 2.5330377768718576e-05, "loss": 1.9561, "step": 87950 }, { "epoch": 5.9760157630112785, "grad_norm": 3.188842296600342, "learning_rate": 2.5326131267835307e-05, "loss": 2.0749, "step": 87955 }, { "epoch": 5.9763554830819405, "grad_norm": 3.647587299346924, "learning_rate": 2.5321884766952032e-05, "loss": 2.372, "step": 87960 }, { "epoch": 5.976695203152603, "grad_norm": 2.9091951847076416, "learning_rate": 2.531763826606876e-05, "loss": 2.0239, "step": 87965 }, { "epoch": 5.977034923223264, "grad_norm": 2.9592525959014893, "learning_rate": 2.531339176518549e-05, "loss": 2.422, "step": 87970 }, { "epoch": 5.977374643293926, "grad_norm": 2.8954873085021973, "learning_rate": 2.5309145264302216e-05, "loss": 2.5044, "step": 87975 }, { "epoch": 5.977714363364588, "grad_norm": 3.5921106338500977, "learning_rate": 2.5304898763418944e-05, "loss": 2.3452, "step": 87980 }, { "epoch": 5.978054083435249, "grad_norm": 2.6657214164733887, "learning_rate": 2.5300652262535672e-05, "loss": 2.4045, "step": 87985 }, { "epoch": 5.978393803505911, "grad_norm": 3.5129952430725098, "learning_rate": 2.5296405761652396e-05, "loss": 2.0611, "step": 87990 }, { "epoch": 5.978733523576573, "grad_norm": 3.5779426097869873, "learning_rate": 2.5292159260769128e-05, "loss": 2.4035, "step": 87995 }, { "epoch": 5.9790732436472345, "grad_norm": 3.543349504470825, "learning_rate": 2.5287912759885856e-05, "loss": 2.2209, "step": 88000 }, { "epoch": 5.9794129637178965, "grad_norm": 3.793287992477417, "learning_rate": 2.528366625900258e-05, "loss": 2.1972, "step": 88005 }, { "epoch": 5.979752683788558, "grad_norm": 3.1338560581207275, "learning_rate": 2.5279419758119312e-05, "loss": 1.986, "step": 88010 }, { "epoch": 5.98009240385922, "grad_norm": 4.180814743041992, "learning_rate": 2.527517325723604e-05, "loss": 2.1236, "step": 88015 }, { "epoch": 5.980432123929882, "grad_norm": 2.8997981548309326, "learning_rate": 2.5270926756352764e-05, "loss": 2.3578, "step": 88020 }, { "epoch": 5.980771844000543, "grad_norm": 3.4174187183380127, "learning_rate": 2.5266680255469492e-05, "loss": 2.2825, "step": 88025 }, { "epoch": 5.981111564071205, "grad_norm": 3.299196481704712, "learning_rate": 2.5262433754586224e-05, "loss": 2.3553, "step": 88030 }, { "epoch": 5.981451284141867, "grad_norm": 3.6816842555999756, "learning_rate": 2.525818725370295e-05, "loss": 2.4304, "step": 88035 }, { "epoch": 5.981791004212528, "grad_norm": 4.578524112701416, "learning_rate": 2.5253940752819677e-05, "loss": 2.4104, "step": 88040 }, { "epoch": 5.9821307242831905, "grad_norm": 3.5142135620117188, "learning_rate": 2.5249694251936408e-05, "loss": 2.031, "step": 88045 }, { "epoch": 5.982470444353853, "grad_norm": 3.2831454277038574, "learning_rate": 2.5245447751053136e-05, "loss": 2.3, "step": 88050 }, { "epoch": 5.982810164424514, "grad_norm": 3.2961671352386475, "learning_rate": 2.524120125016986e-05, "loss": 2.1262, "step": 88055 }, { "epoch": 5.983149884495176, "grad_norm": 3.0027027130126953, "learning_rate": 2.523695474928659e-05, "loss": 2.3947, "step": 88060 }, { "epoch": 5.983489604565838, "grad_norm": 3.7316231727600098, "learning_rate": 2.523270824840332e-05, "loss": 2.3782, "step": 88065 }, { "epoch": 5.983829324636499, "grad_norm": 2.8981120586395264, "learning_rate": 2.5228461747520045e-05, "loss": 2.4199, "step": 88070 }, { "epoch": 5.984169044707161, "grad_norm": 3.3702473640441895, "learning_rate": 2.5224215246636773e-05, "loss": 2.07, "step": 88075 }, { "epoch": 5.984508764777823, "grad_norm": 4.780584335327148, "learning_rate": 2.5219968745753504e-05, "loss": 2.3804, "step": 88080 }, { "epoch": 5.984848484848484, "grad_norm": 3.551557779312134, "learning_rate": 2.5215722244870225e-05, "loss": 2.0638, "step": 88085 }, { "epoch": 5.9851882049191465, "grad_norm": 3.6438357830047607, "learning_rate": 2.5211475743986957e-05, "loss": 2.2831, "step": 88090 }, { "epoch": 5.985527924989809, "grad_norm": 3.614638090133667, "learning_rate": 2.5207229243103685e-05, "loss": 2.1082, "step": 88095 }, { "epoch": 5.98586764506047, "grad_norm": 3.71122670173645, "learning_rate": 2.520298274222041e-05, "loss": 2.3318, "step": 88100 }, { "epoch": 5.986207365131132, "grad_norm": 2.4946420192718506, "learning_rate": 2.519873624133714e-05, "loss": 2.2857, "step": 88105 }, { "epoch": 5.986547085201794, "grad_norm": 3.2465109825134277, "learning_rate": 2.519448974045387e-05, "loss": 2.6185, "step": 88110 }, { "epoch": 5.986886805272455, "grad_norm": 3.325303792953491, "learning_rate": 2.5190243239570593e-05, "loss": 2.469, "step": 88115 }, { "epoch": 5.987226525343117, "grad_norm": 2.8750157356262207, "learning_rate": 2.5185996738687325e-05, "loss": 2.2505, "step": 88120 }, { "epoch": 5.987566245413779, "grad_norm": 3.55290150642395, "learning_rate": 2.5181750237804053e-05, "loss": 2.1942, "step": 88125 }, { "epoch": 5.98790596548444, "grad_norm": 3.36651611328125, "learning_rate": 2.5177503736920777e-05, "loss": 2.0418, "step": 88130 }, { "epoch": 5.9882456855551025, "grad_norm": 3.393651008605957, "learning_rate": 2.5173257236037505e-05, "loss": 2.0758, "step": 88135 }, { "epoch": 5.988585405625765, "grad_norm": 3.3990049362182617, "learning_rate": 2.5169010735154237e-05, "loss": 2.2785, "step": 88140 }, { "epoch": 5.988925125696426, "grad_norm": 3.210390090942383, "learning_rate": 2.516476423427096e-05, "loss": 2.1139, "step": 88145 }, { "epoch": 5.989264845767088, "grad_norm": 3.4018008708953857, "learning_rate": 2.516051773338769e-05, "loss": 2.2256, "step": 88150 }, { "epoch": 5.98960456583775, "grad_norm": 3.5990424156188965, "learning_rate": 2.515627123250442e-05, "loss": 2.3097, "step": 88155 }, { "epoch": 5.989944285908411, "grad_norm": 3.121206760406494, "learning_rate": 2.5152024731621142e-05, "loss": 2.2772, "step": 88160 }, { "epoch": 5.990284005979073, "grad_norm": 3.655467987060547, "learning_rate": 2.5147778230737873e-05, "loss": 2.0795, "step": 88165 }, { "epoch": 5.990623726049735, "grad_norm": 4.5631422996521, "learning_rate": 2.51435317298546e-05, "loss": 2.6825, "step": 88170 }, { "epoch": 5.990963446120396, "grad_norm": 4.045576095581055, "learning_rate": 2.5139285228971326e-05, "loss": 2.314, "step": 88175 }, { "epoch": 5.9913031661910585, "grad_norm": 3.9928789138793945, "learning_rate": 2.5135038728088057e-05, "loss": 2.3893, "step": 88180 }, { "epoch": 5.991642886261721, "grad_norm": 3.606677293777466, "learning_rate": 2.5130792227204785e-05, "loss": 2.1814, "step": 88185 }, { "epoch": 5.991982606332382, "grad_norm": 3.4932093620300293, "learning_rate": 2.512654572632151e-05, "loss": 2.0972, "step": 88190 }, { "epoch": 5.992322326403044, "grad_norm": 4.04482889175415, "learning_rate": 2.5122299225438238e-05, "loss": 1.9561, "step": 88195 }, { "epoch": 5.992662046473706, "grad_norm": 3.1712329387664795, "learning_rate": 2.511805272455497e-05, "loss": 2.5189, "step": 88200 }, { "epoch": 5.993001766544367, "grad_norm": 3.418241262435913, "learning_rate": 2.5113806223671694e-05, "loss": 2.0243, "step": 88205 }, { "epoch": 5.993341486615029, "grad_norm": 3.0914368629455566, "learning_rate": 2.5109559722788422e-05, "loss": 2.3595, "step": 88210 }, { "epoch": 5.993681206685691, "grad_norm": 3.037113666534424, "learning_rate": 2.5105313221905153e-05, "loss": 2.1113, "step": 88215 }, { "epoch": 5.9940209267563525, "grad_norm": 3.0338711738586426, "learning_rate": 2.510106672102188e-05, "loss": 2.1738, "step": 88220 }, { "epoch": 5.9943606468270145, "grad_norm": 3.1007285118103027, "learning_rate": 2.5096820220138606e-05, "loss": 2.3814, "step": 88225 }, { "epoch": 5.994700366897677, "grad_norm": 4.576777458190918, "learning_rate": 2.5092573719255334e-05, "loss": 2.524, "step": 88230 }, { "epoch": 5.995040086968338, "grad_norm": 3.9668896198272705, "learning_rate": 2.5088327218372065e-05, "loss": 2.3353, "step": 88235 }, { "epoch": 5.995379807039, "grad_norm": 3.590928554534912, "learning_rate": 2.508408071748879e-05, "loss": 2.3982, "step": 88240 }, { "epoch": 5.995719527109662, "grad_norm": 3.0951826572418213, "learning_rate": 2.5079834216605518e-05, "loss": 2.0972, "step": 88245 }, { "epoch": 5.996059247180323, "grad_norm": 3.9389476776123047, "learning_rate": 2.507558771572225e-05, "loss": 2.2373, "step": 88250 }, { "epoch": 5.996398967250985, "grad_norm": 2.92026948928833, "learning_rate": 2.5071341214838974e-05, "loss": 2.1017, "step": 88255 }, { "epoch": 5.996738687321647, "grad_norm": 3.531907320022583, "learning_rate": 2.5067094713955702e-05, "loss": 2.0584, "step": 88260 }, { "epoch": 5.9970784073923085, "grad_norm": 4.186463356018066, "learning_rate": 2.5062848213072433e-05, "loss": 2.3666, "step": 88265 }, { "epoch": 5.9974181274629705, "grad_norm": 4.626522064208984, "learning_rate": 2.5058601712189155e-05, "loss": 1.9656, "step": 88270 }, { "epoch": 5.997757847533633, "grad_norm": 2.978153705596924, "learning_rate": 2.5054355211305886e-05, "loss": 2.0684, "step": 88275 }, { "epoch": 5.998097567604294, "grad_norm": 3.450803518295288, "learning_rate": 2.5050108710422614e-05, "loss": 2.3968, "step": 88280 }, { "epoch": 5.998437287674956, "grad_norm": 3.1766574382781982, "learning_rate": 2.504586220953934e-05, "loss": 2.2756, "step": 88285 }, { "epoch": 5.998777007745618, "grad_norm": 3.2842204570770264, "learning_rate": 2.504161570865607e-05, "loss": 2.2139, "step": 88290 }, { "epoch": 5.999116727816279, "grad_norm": 3.390915632247925, "learning_rate": 2.5037369207772798e-05, "loss": 2.1464, "step": 88295 }, { "epoch": 5.999456447886941, "grad_norm": 4.766310214996338, "learning_rate": 2.5033122706889523e-05, "loss": 2.3238, "step": 88300 }, { "epoch": 5.999796167957603, "grad_norm": 5.466654300689697, "learning_rate": 2.502887620600625e-05, "loss": 2.2637, "step": 88305 }, { "epoch": 6.0, "eval_bertscore": { "f1": 0.8431442242073498, "precision": 0.8447320608624147, "recall": 0.8423236059592623 }, "eval_bleu_4": 0.01895444688593952, "eval_exact_match": 0.0005814516910553348, "eval_loss": 3.50754714012146, "eval_meteor": 0.09379958802883696, "eval_rouge": { "rouge1": 0.1302662128980081, "rouge2": 0.01916627006143849, "rougeL": 0.11082761642695227, "rougeLsum": 0.11084232241877123 }, "eval_runtime": 1567.4568, "eval_samples_per_second": 6.583, "eval_steps_per_second": 0.823, "step": 88308 }, { "epoch": 6.0001358880282645, "grad_norm": 3.5069472789764404, "learning_rate": 2.5024629705122982e-05, "loss": 2.0914, "step": 88310 }, { "epoch": 6.0004756080989265, "grad_norm": 3.089292526245117, "learning_rate": 2.5020383204239707e-05, "loss": 1.8643, "step": 88315 }, { "epoch": 6.000815328169589, "grad_norm": 3.004000425338745, "learning_rate": 2.5016136703356435e-05, "loss": 2.1129, "step": 88320 }, { "epoch": 6.00115504824025, "grad_norm": 3.3660647869110107, "learning_rate": 2.5011890202473166e-05, "loss": 1.8758, "step": 88325 }, { "epoch": 6.001494768310912, "grad_norm": 3.2861227989196777, "learning_rate": 2.5007643701589887e-05, "loss": 1.9007, "step": 88330 }, { "epoch": 6.001834488381574, "grad_norm": 2.9179766178131104, "learning_rate": 2.500339720070662e-05, "loss": 2.0003, "step": 88335 }, { "epoch": 6.002174208452235, "grad_norm": 3.6677372455596924, "learning_rate": 2.4999150699823347e-05, "loss": 2.0126, "step": 88340 }, { "epoch": 6.002513928522897, "grad_norm": 3.4224960803985596, "learning_rate": 2.4994904198940075e-05, "loss": 2.1347, "step": 88345 }, { "epoch": 6.002853648593559, "grad_norm": 3.337432622909546, "learning_rate": 2.4990657698056803e-05, "loss": 2.1102, "step": 88350 }, { "epoch": 6.0031933686642205, "grad_norm": 4.265168190002441, "learning_rate": 2.498641119717353e-05, "loss": 2.0737, "step": 88355 }, { "epoch": 6.003533088734883, "grad_norm": 2.6101222038269043, "learning_rate": 2.498216469629026e-05, "loss": 2.0215, "step": 88360 }, { "epoch": 6.003872808805545, "grad_norm": 3.3568928241729736, "learning_rate": 2.4977918195406987e-05, "loss": 1.9231, "step": 88365 }, { "epoch": 6.004212528876206, "grad_norm": 3.4852654933929443, "learning_rate": 2.4973671694523715e-05, "loss": 1.7335, "step": 88370 }, { "epoch": 6.004552248946868, "grad_norm": 3.0818097591400146, "learning_rate": 2.4969425193640443e-05, "loss": 2.0596, "step": 88375 }, { "epoch": 6.00489196901753, "grad_norm": 3.875297784805298, "learning_rate": 2.4965178692757167e-05, "loss": 2.0291, "step": 88380 }, { "epoch": 6.005231689088191, "grad_norm": 3.988877058029175, "learning_rate": 2.49609321918739e-05, "loss": 2.2955, "step": 88385 }, { "epoch": 6.005571409158853, "grad_norm": 2.83328914642334, "learning_rate": 2.4956685690990623e-05, "loss": 2.1871, "step": 88390 }, { "epoch": 6.005911129229515, "grad_norm": 4.1497087478637695, "learning_rate": 2.495243919010735e-05, "loss": 2.0119, "step": 88395 }, { "epoch": 6.0062508493001765, "grad_norm": 3.6614766120910645, "learning_rate": 2.4948192689224083e-05, "loss": 2.0119, "step": 88400 }, { "epoch": 6.006590569370839, "grad_norm": 4.263707637786865, "learning_rate": 2.4943946188340808e-05, "loss": 2.0791, "step": 88405 }, { "epoch": 6.006930289441501, "grad_norm": 3.133690357208252, "learning_rate": 2.4939699687457536e-05, "loss": 1.9891, "step": 88410 }, { "epoch": 6.007270009512162, "grad_norm": 3.5013182163238525, "learning_rate": 2.4935453186574264e-05, "loss": 2.0735, "step": 88415 }, { "epoch": 6.007609729582824, "grad_norm": 3.1139793395996094, "learning_rate": 2.493120668569099e-05, "loss": 1.9804, "step": 88420 }, { "epoch": 6.007949449653485, "grad_norm": 5.052938461303711, "learning_rate": 2.492696018480772e-05, "loss": 1.9689, "step": 88425 }, { "epoch": 6.008289169724147, "grad_norm": 3.327909469604492, "learning_rate": 2.4922713683924448e-05, "loss": 2.19, "step": 88430 }, { "epoch": 6.008628889794809, "grad_norm": 3.7740468978881836, "learning_rate": 2.4918467183041176e-05, "loss": 2.0614, "step": 88435 }, { "epoch": 6.00896860986547, "grad_norm": 3.9133543968200684, "learning_rate": 2.4914220682157904e-05, "loss": 1.8904, "step": 88440 }, { "epoch": 6.0093083299361325, "grad_norm": 4.073118209838867, "learning_rate": 2.490997418127463e-05, "loss": 2.2798, "step": 88445 }, { "epoch": 6.009648050006795, "grad_norm": 3.2108285427093506, "learning_rate": 2.490572768039136e-05, "loss": 1.8304, "step": 88450 }, { "epoch": 6.009987770077456, "grad_norm": 3.9516866207122803, "learning_rate": 2.4901481179508088e-05, "loss": 2.0096, "step": 88455 }, { "epoch": 6.010327490148118, "grad_norm": 3.3648529052734375, "learning_rate": 2.4897234678624816e-05, "loss": 1.936, "step": 88460 }, { "epoch": 6.01066721021878, "grad_norm": 3.3568837642669678, "learning_rate": 2.489298817774154e-05, "loss": 1.9234, "step": 88465 }, { "epoch": 6.011006930289441, "grad_norm": 4.1657304763793945, "learning_rate": 2.488874167685827e-05, "loss": 2.2637, "step": 88470 }, { "epoch": 6.011346650360103, "grad_norm": 2.827727794647217, "learning_rate": 2.4884495175974996e-05, "loss": 2.3384, "step": 88475 }, { "epoch": 6.011686370430765, "grad_norm": 3.715885639190674, "learning_rate": 2.4880248675091724e-05, "loss": 2.0236, "step": 88480 }, { "epoch": 6.0120260905014264, "grad_norm": 4.069788455963135, "learning_rate": 2.4876002174208456e-05, "loss": 2.0294, "step": 88485 }, { "epoch": 6.0123658105720885, "grad_norm": 3.9066314697265625, "learning_rate": 2.487175567332518e-05, "loss": 2.0598, "step": 88490 }, { "epoch": 6.012705530642751, "grad_norm": 3.6482903957366943, "learning_rate": 2.4867509172441908e-05, "loss": 2.2545, "step": 88495 }, { "epoch": 6.013045250713412, "grad_norm": 2.950141429901123, "learning_rate": 2.4863262671558636e-05, "loss": 1.9473, "step": 88500 }, { "epoch": 6.013384970784074, "grad_norm": 4.171321392059326, "learning_rate": 2.4859016170675364e-05, "loss": 2.0735, "step": 88505 }, { "epoch": 6.013724690854736, "grad_norm": 4.299756050109863, "learning_rate": 2.4854769669792092e-05, "loss": 2.1751, "step": 88510 }, { "epoch": 6.014064410925397, "grad_norm": 4.153679847717285, "learning_rate": 2.485052316890882e-05, "loss": 2.046, "step": 88515 }, { "epoch": 6.014404130996059, "grad_norm": 3.4321131706237793, "learning_rate": 2.4846276668025548e-05, "loss": 2.1076, "step": 88520 }, { "epoch": 6.014743851066721, "grad_norm": 3.9578073024749756, "learning_rate": 2.4842030167142276e-05, "loss": 2.2039, "step": 88525 }, { "epoch": 6.0150835711373825, "grad_norm": 3.687879800796509, "learning_rate": 2.4837783666259004e-05, "loss": 2.1502, "step": 88530 }, { "epoch": 6.0154232912080445, "grad_norm": 4.017701625823975, "learning_rate": 2.4833537165375732e-05, "loss": 2.1107, "step": 88535 }, { "epoch": 6.015763011278707, "grad_norm": 4.009500980377197, "learning_rate": 2.482929066449246e-05, "loss": 1.7881, "step": 88540 }, { "epoch": 6.016102731349368, "grad_norm": 3.441535711288452, "learning_rate": 2.482504416360919e-05, "loss": 2.0712, "step": 88545 }, { "epoch": 6.01644245142003, "grad_norm": 3.9425783157348633, "learning_rate": 2.4820797662725913e-05, "loss": 2.0544, "step": 88550 }, { "epoch": 6.016782171490692, "grad_norm": 3.5872318744659424, "learning_rate": 2.4816551161842644e-05, "loss": 2.011, "step": 88555 }, { "epoch": 6.017121891561353, "grad_norm": 3.717771053314209, "learning_rate": 2.4812304660959372e-05, "loss": 1.8426, "step": 88560 }, { "epoch": 6.017461611632015, "grad_norm": 3.4438982009887695, "learning_rate": 2.4808058160076097e-05, "loss": 1.957, "step": 88565 }, { "epoch": 6.017801331702677, "grad_norm": 3.7550153732299805, "learning_rate": 2.480381165919283e-05, "loss": 2.0314, "step": 88570 }, { "epoch": 6.0181410517733385, "grad_norm": 2.841261863708496, "learning_rate": 2.4799565158309553e-05, "loss": 1.9894, "step": 88575 }, { "epoch": 6.0184807718440005, "grad_norm": 3.404609203338623, "learning_rate": 2.479531865742628e-05, "loss": 2.1512, "step": 88580 }, { "epoch": 6.018820491914663, "grad_norm": 2.757479429244995, "learning_rate": 2.479107215654301e-05, "loss": 2.1344, "step": 88585 }, { "epoch": 6.019160211985324, "grad_norm": 4.288881778717041, "learning_rate": 2.4786825655659737e-05, "loss": 2.053, "step": 88590 }, { "epoch": 6.019499932055986, "grad_norm": 3.240006446838379, "learning_rate": 2.4782579154776465e-05, "loss": 1.7871, "step": 88595 }, { "epoch": 6.019839652126648, "grad_norm": 3.7368087768554688, "learning_rate": 2.4778332653893193e-05, "loss": 2.1555, "step": 88600 }, { "epoch": 6.020179372197309, "grad_norm": 3.4216861724853516, "learning_rate": 2.477408615300992e-05, "loss": 2.0419, "step": 88605 }, { "epoch": 6.020519092267971, "grad_norm": 3.4838171005249023, "learning_rate": 2.476983965212665e-05, "loss": 1.8527, "step": 88610 }, { "epoch": 6.020858812338633, "grad_norm": 2.959864377975464, "learning_rate": 2.4765593151243377e-05, "loss": 2.0415, "step": 88615 }, { "epoch": 6.0211985324092945, "grad_norm": 4.213749885559082, "learning_rate": 2.4761346650360105e-05, "loss": 1.9831, "step": 88620 }, { "epoch": 6.0215382524799566, "grad_norm": 4.012634754180908, "learning_rate": 2.4757100149476833e-05, "loss": 1.952, "step": 88625 }, { "epoch": 6.021877972550619, "grad_norm": 3.80233097076416, "learning_rate": 2.475285364859356e-05, "loss": 2.1596, "step": 88630 }, { "epoch": 6.02221769262128, "grad_norm": 3.162590265274048, "learning_rate": 2.4748607147710286e-05, "loss": 1.9647, "step": 88635 }, { "epoch": 6.022557412691942, "grad_norm": 3.105689525604248, "learning_rate": 2.4744360646827017e-05, "loss": 2.0367, "step": 88640 }, { "epoch": 6.022897132762604, "grad_norm": 3.687662363052368, "learning_rate": 2.4740114145943745e-05, "loss": 2.1994, "step": 88645 }, { "epoch": 6.023236852833265, "grad_norm": 3.355217218399048, "learning_rate": 2.473586764506047e-05, "loss": 1.9582, "step": 88650 }, { "epoch": 6.023576572903927, "grad_norm": 3.23652720451355, "learning_rate": 2.47316211441772e-05, "loss": 1.9747, "step": 88655 }, { "epoch": 6.023916292974589, "grad_norm": 3.696723461151123, "learning_rate": 2.4727374643293926e-05, "loss": 2.172, "step": 88660 }, { "epoch": 6.0242560130452505, "grad_norm": 3.1348822116851807, "learning_rate": 2.4723128142410654e-05, "loss": 1.9045, "step": 88665 }, { "epoch": 6.024595733115913, "grad_norm": 3.3350799083709717, "learning_rate": 2.4718881641527382e-05, "loss": 2.1927, "step": 88670 }, { "epoch": 6.024935453186575, "grad_norm": 3.263662338256836, "learning_rate": 2.471463514064411e-05, "loss": 2.1417, "step": 88675 }, { "epoch": 6.025275173257236, "grad_norm": 3.6025502681732178, "learning_rate": 2.4710388639760838e-05, "loss": 1.7772, "step": 88680 }, { "epoch": 6.025614893327898, "grad_norm": 3.6507136821746826, "learning_rate": 2.4706142138877566e-05, "loss": 1.9907, "step": 88685 }, { "epoch": 6.02595461339856, "grad_norm": 4.150918006896973, "learning_rate": 2.4701895637994294e-05, "loss": 2.1245, "step": 88690 }, { "epoch": 6.026294333469221, "grad_norm": 3.0408775806427, "learning_rate": 2.4697649137111022e-05, "loss": 1.7874, "step": 88695 }, { "epoch": 6.026634053539883, "grad_norm": 2.8691654205322266, "learning_rate": 2.469340263622775e-05, "loss": 2.3666, "step": 88700 }, { "epoch": 6.026973773610545, "grad_norm": 4.350644111633301, "learning_rate": 2.4689156135344478e-05, "loss": 2.0036, "step": 88705 }, { "epoch": 6.0273134936812065, "grad_norm": 4.030486583709717, "learning_rate": 2.4684909634461206e-05, "loss": 1.9655, "step": 88710 }, { "epoch": 6.027653213751869, "grad_norm": 4.124673366546631, "learning_rate": 2.4680663133577934e-05, "loss": 1.9657, "step": 88715 }, { "epoch": 6.027992933822531, "grad_norm": 4.07002067565918, "learning_rate": 2.467641663269466e-05, "loss": 2.0192, "step": 88720 }, { "epoch": 6.028332653893192, "grad_norm": 3.273251533508301, "learning_rate": 2.467217013181139e-05, "loss": 2.1786, "step": 88725 }, { "epoch": 6.028672373963854, "grad_norm": 3.8169913291931152, "learning_rate": 2.4667923630928118e-05, "loss": 2.1153, "step": 88730 }, { "epoch": 6.029012094034516, "grad_norm": 3.236126184463501, "learning_rate": 2.4663677130044842e-05, "loss": 2.1551, "step": 88735 }, { "epoch": 6.029351814105177, "grad_norm": 3.4890573024749756, "learning_rate": 2.4659430629161574e-05, "loss": 2.2351, "step": 88740 }, { "epoch": 6.029691534175839, "grad_norm": 3.2828528881073, "learning_rate": 2.46551841282783e-05, "loss": 2.1922, "step": 88745 }, { "epoch": 6.0300312542465, "grad_norm": 3.5122740268707275, "learning_rate": 2.4650937627395026e-05, "loss": 2.0043, "step": 88750 }, { "epoch": 6.0303709743171625, "grad_norm": 4.017875671386719, "learning_rate": 2.4646691126511754e-05, "loss": 2.3157, "step": 88755 }, { "epoch": 6.030710694387825, "grad_norm": 3.5989434719085693, "learning_rate": 2.4642444625628483e-05, "loss": 1.819, "step": 88760 }, { "epoch": 6.031050414458486, "grad_norm": 3.6878204345703125, "learning_rate": 2.463819812474521e-05, "loss": 2.0546, "step": 88765 }, { "epoch": 6.031390134529148, "grad_norm": 3.457780599594116, "learning_rate": 2.463395162386194e-05, "loss": 2.0024, "step": 88770 }, { "epoch": 6.03172985459981, "grad_norm": 2.6858084201812744, "learning_rate": 2.4629705122978667e-05, "loss": 2.2531, "step": 88775 }, { "epoch": 6.032069574670471, "grad_norm": 3.456296443939209, "learning_rate": 2.4625458622095395e-05, "loss": 2.037, "step": 88780 }, { "epoch": 6.032409294741133, "grad_norm": 3.9829766750335693, "learning_rate": 2.4621212121212123e-05, "loss": 1.8451, "step": 88785 }, { "epoch": 6.032749014811795, "grad_norm": 4.121595859527588, "learning_rate": 2.461696562032885e-05, "loss": 2.1946, "step": 88790 }, { "epoch": 6.0330887348824564, "grad_norm": 3.19709849357605, "learning_rate": 2.461271911944558e-05, "loss": 2.2108, "step": 88795 }, { "epoch": 6.0334284549531185, "grad_norm": 3.891705274581909, "learning_rate": 2.4608472618562307e-05, "loss": 2.0021, "step": 88800 }, { "epoch": 6.033768175023781, "grad_norm": 3.676755428314209, "learning_rate": 2.460422611767903e-05, "loss": 2.1314, "step": 88805 }, { "epoch": 6.034107895094442, "grad_norm": 3.4988350868225098, "learning_rate": 2.4599979616795763e-05, "loss": 2.2637, "step": 88810 }, { "epoch": 6.034447615165104, "grad_norm": 4.413010597229004, "learning_rate": 2.459573311591249e-05, "loss": 1.6479, "step": 88815 }, { "epoch": 6.034787335235766, "grad_norm": 3.6608316898345947, "learning_rate": 2.4591486615029215e-05, "loss": 2.1362, "step": 88820 }, { "epoch": 6.035127055306427, "grad_norm": 3.4750254154205322, "learning_rate": 2.4587240114145947e-05, "loss": 2.0856, "step": 88825 }, { "epoch": 6.035466775377089, "grad_norm": 4.0445685386657715, "learning_rate": 2.458299361326267e-05, "loss": 1.8519, "step": 88830 }, { "epoch": 6.035806495447751, "grad_norm": 3.423645496368408, "learning_rate": 2.45787471123794e-05, "loss": 2.226, "step": 88835 }, { "epoch": 6.0361462155184125, "grad_norm": 3.393190622329712, "learning_rate": 2.457450061149613e-05, "loss": 1.9414, "step": 88840 }, { "epoch": 6.0364859355890745, "grad_norm": 4.1857829093933105, "learning_rate": 2.4570254110612855e-05, "loss": 1.8844, "step": 88845 }, { "epoch": 6.036825655659737, "grad_norm": 2.983295440673828, "learning_rate": 2.4566007609729583e-05, "loss": 2.2639, "step": 88850 }, { "epoch": 6.037165375730398, "grad_norm": 2.7238330841064453, "learning_rate": 2.456176110884631e-05, "loss": 2.1377, "step": 88855 }, { "epoch": 6.03750509580106, "grad_norm": 3.2453112602233887, "learning_rate": 2.455751460796304e-05, "loss": 2.4601, "step": 88860 }, { "epoch": 6.037844815871722, "grad_norm": 3.230820417404175, "learning_rate": 2.4553268107079767e-05, "loss": 1.7909, "step": 88865 }, { "epoch": 6.038184535942383, "grad_norm": 3.6489949226379395, "learning_rate": 2.4549021606196495e-05, "loss": 2.1788, "step": 88870 }, { "epoch": 6.038524256013045, "grad_norm": 3.216155529022217, "learning_rate": 2.4544775105313223e-05, "loss": 1.7672, "step": 88875 }, { "epoch": 6.038863976083707, "grad_norm": 3.5239384174346924, "learning_rate": 2.454052860442995e-05, "loss": 2.4132, "step": 88880 }, { "epoch": 6.0392036961543685, "grad_norm": 4.231643199920654, "learning_rate": 2.453628210354668e-05, "loss": 2.252, "step": 88885 }, { "epoch": 6.0395434162250305, "grad_norm": 3.78621768951416, "learning_rate": 2.4532035602663407e-05, "loss": 1.9529, "step": 88890 }, { "epoch": 6.039883136295693, "grad_norm": 3.2345008850097656, "learning_rate": 2.4527789101780135e-05, "loss": 1.9732, "step": 88895 }, { "epoch": 6.040222856366354, "grad_norm": 3.94878888130188, "learning_rate": 2.4523542600896863e-05, "loss": 2.1549, "step": 88900 }, { "epoch": 6.040562576437016, "grad_norm": 3.5392065048217773, "learning_rate": 2.4519296100013588e-05, "loss": 2.0631, "step": 88905 }, { "epoch": 6.040902296507678, "grad_norm": 5.244943618774414, "learning_rate": 2.451504959913032e-05, "loss": 2.0518, "step": 88910 }, { "epoch": 6.041242016578339, "grad_norm": 3.8412036895751953, "learning_rate": 2.4510803098247044e-05, "loss": 2.0076, "step": 88915 }, { "epoch": 6.041581736649001, "grad_norm": 4.530075550079346, "learning_rate": 2.4506556597363772e-05, "loss": 2.0745, "step": 88920 }, { "epoch": 6.041921456719663, "grad_norm": 3.7285797595977783, "learning_rate": 2.4502310096480503e-05, "loss": 1.9497, "step": 88925 }, { "epoch": 6.0422611767903245, "grad_norm": 3.514644145965576, "learning_rate": 2.4498063595597228e-05, "loss": 2.0748, "step": 88930 }, { "epoch": 6.042600896860987, "grad_norm": 2.982992649078369, "learning_rate": 2.4493817094713956e-05, "loss": 2.4194, "step": 88935 }, { "epoch": 6.042940616931649, "grad_norm": 3.546625852584839, "learning_rate": 2.4489570593830684e-05, "loss": 1.9788, "step": 88940 }, { "epoch": 6.04328033700231, "grad_norm": 3.5466036796569824, "learning_rate": 2.4485324092947412e-05, "loss": 1.9968, "step": 88945 }, { "epoch": 6.043620057072972, "grad_norm": 3.145672559738159, "learning_rate": 2.448107759206414e-05, "loss": 2.1491, "step": 88950 }, { "epoch": 6.043959777143634, "grad_norm": 4.079468727111816, "learning_rate": 2.4476831091180868e-05, "loss": 2.2371, "step": 88955 }, { "epoch": 6.044299497214295, "grad_norm": 3.6440443992614746, "learning_rate": 2.4472584590297596e-05, "loss": 2.3784, "step": 88960 }, { "epoch": 6.044639217284957, "grad_norm": 3.6466238498687744, "learning_rate": 2.4468338089414324e-05, "loss": 2.0676, "step": 88965 }, { "epoch": 6.044978937355619, "grad_norm": 3.8678817749023438, "learning_rate": 2.4464091588531052e-05, "loss": 2.3293, "step": 88970 }, { "epoch": 6.0453186574262805, "grad_norm": 3.4234800338745117, "learning_rate": 2.445984508764778e-05, "loss": 2.0791, "step": 88975 }, { "epoch": 6.045658377496943, "grad_norm": 3.4994094371795654, "learning_rate": 2.4455598586764508e-05, "loss": 2.0437, "step": 88980 }, { "epoch": 6.045998097567605, "grad_norm": 3.21685791015625, "learning_rate": 2.4451352085881236e-05, "loss": 2.013, "step": 88985 }, { "epoch": 6.046337817638266, "grad_norm": 3.577821731567383, "learning_rate": 2.444710558499796e-05, "loss": 1.9522, "step": 88990 }, { "epoch": 6.046677537708928, "grad_norm": 3.9451706409454346, "learning_rate": 2.4442859084114692e-05, "loss": 2.0097, "step": 88995 }, { "epoch": 6.04701725777959, "grad_norm": 3.43835711479187, "learning_rate": 2.4438612583231417e-05, "loss": 1.9219, "step": 89000 }, { "epoch": 6.047356977850251, "grad_norm": 3.629074811935425, "learning_rate": 2.4434366082348145e-05, "loss": 1.9977, "step": 89005 }, { "epoch": 6.047696697920913, "grad_norm": 3.6905524730682373, "learning_rate": 2.4430119581464876e-05, "loss": 2.0601, "step": 89010 }, { "epoch": 6.048036417991575, "grad_norm": 3.6856231689453125, "learning_rate": 2.44258730805816e-05, "loss": 2.2283, "step": 89015 }, { "epoch": 6.0483761380622365, "grad_norm": 2.8279337882995605, "learning_rate": 2.442162657969833e-05, "loss": 1.8768, "step": 89020 }, { "epoch": 6.048715858132899, "grad_norm": 3.400543689727783, "learning_rate": 2.4417380078815057e-05, "loss": 1.9719, "step": 89025 }, { "epoch": 6.049055578203561, "grad_norm": 3.016303300857544, "learning_rate": 2.4413133577931785e-05, "loss": 1.9119, "step": 89030 }, { "epoch": 6.049395298274222, "grad_norm": 3.349534273147583, "learning_rate": 2.4408887077048516e-05, "loss": 2.1849, "step": 89035 }, { "epoch": 6.049735018344884, "grad_norm": 4.049421310424805, "learning_rate": 2.440464057616524e-05, "loss": 2.3134, "step": 89040 }, { "epoch": 6.050074738415546, "grad_norm": 4.0192036628723145, "learning_rate": 2.440039407528197e-05, "loss": 2.25, "step": 89045 }, { "epoch": 6.050414458486207, "grad_norm": 3.4838340282440186, "learning_rate": 2.4396147574398697e-05, "loss": 1.9269, "step": 89050 }, { "epoch": 6.050754178556869, "grad_norm": 3.9786572456359863, "learning_rate": 2.4391901073515425e-05, "loss": 2.1996, "step": 89055 }, { "epoch": 6.051093898627531, "grad_norm": 3.451951503753662, "learning_rate": 2.4387654572632153e-05, "loss": 2.2004, "step": 89060 }, { "epoch": 6.0514336186981925, "grad_norm": 3.833808422088623, "learning_rate": 2.438340807174888e-05, "loss": 2.1188, "step": 89065 }, { "epoch": 6.051773338768855, "grad_norm": 3.5374667644500732, "learning_rate": 2.437916157086561e-05, "loss": 2.0331, "step": 89070 }, { "epoch": 6.052113058839517, "grad_norm": 3.3482205867767334, "learning_rate": 2.4374915069982333e-05, "loss": 2.0357, "step": 89075 }, { "epoch": 6.052452778910178, "grad_norm": 2.932157278060913, "learning_rate": 2.4370668569099065e-05, "loss": 2.2403, "step": 89080 }, { "epoch": 6.05279249898084, "grad_norm": 4.117353916168213, "learning_rate": 2.4366422068215793e-05, "loss": 1.6983, "step": 89085 }, { "epoch": 6.053132219051502, "grad_norm": 3.7774510383605957, "learning_rate": 2.4362175567332517e-05, "loss": 2.2075, "step": 89090 }, { "epoch": 6.053471939122163, "grad_norm": 3.6163454055786133, "learning_rate": 2.435792906644925e-05, "loss": 1.8947, "step": 89095 }, { "epoch": 6.053811659192825, "grad_norm": 2.9541563987731934, "learning_rate": 2.4353682565565973e-05, "loss": 2.0774, "step": 89100 }, { "epoch": 6.0541513792634865, "grad_norm": 3.2558178901672363, "learning_rate": 2.43494360646827e-05, "loss": 2.0372, "step": 89105 }, { "epoch": 6.0544910993341485, "grad_norm": 3.3631832599639893, "learning_rate": 2.434518956379943e-05, "loss": 2.1394, "step": 89110 }, { "epoch": 6.054830819404811, "grad_norm": 3.4401369094848633, "learning_rate": 2.4340943062916158e-05, "loss": 1.823, "step": 89115 }, { "epoch": 6.055170539475472, "grad_norm": 3.3768279552459717, "learning_rate": 2.433669656203289e-05, "loss": 2.3692, "step": 89120 }, { "epoch": 6.055510259546134, "grad_norm": 3.592095136642456, "learning_rate": 2.4332450061149614e-05, "loss": 2.1058, "step": 89125 }, { "epoch": 6.055849979616796, "grad_norm": 3.300567388534546, "learning_rate": 2.432820356026634e-05, "loss": 1.7972, "step": 89130 }, { "epoch": 6.056189699687457, "grad_norm": 3.0853443145751953, "learning_rate": 2.432395705938307e-05, "loss": 2.2977, "step": 89135 }, { "epoch": 6.056529419758119, "grad_norm": 2.5112123489379883, "learning_rate": 2.4319710558499798e-05, "loss": 2.1317, "step": 89140 }, { "epoch": 6.056869139828781, "grad_norm": 3.4502575397491455, "learning_rate": 2.4315464057616526e-05, "loss": 2.0285, "step": 89145 }, { "epoch": 6.0572088598994425, "grad_norm": 3.3683388233184814, "learning_rate": 2.4311217556733254e-05, "loss": 1.9729, "step": 89150 }, { "epoch": 6.0575485799701045, "grad_norm": 3.038975715637207, "learning_rate": 2.430697105584998e-05, "loss": 2.1121, "step": 89155 }, { "epoch": 6.057888300040767, "grad_norm": 3.3792853355407715, "learning_rate": 2.4302724554966706e-05, "loss": 2.0026, "step": 89160 }, { "epoch": 6.058228020111428, "grad_norm": 5.117702007293701, "learning_rate": 2.4298478054083438e-05, "loss": 2.0028, "step": 89165 }, { "epoch": 6.05856774018209, "grad_norm": 3.789882183074951, "learning_rate": 2.4294231553200166e-05, "loss": 2.0757, "step": 89170 }, { "epoch": 6.058907460252752, "grad_norm": 3.6658778190612793, "learning_rate": 2.428998505231689e-05, "loss": 2.4742, "step": 89175 }, { "epoch": 6.059247180323413, "grad_norm": 3.269818067550659, "learning_rate": 2.428573855143362e-05, "loss": 2.089, "step": 89180 }, { "epoch": 6.059586900394075, "grad_norm": 3.6770033836364746, "learning_rate": 2.4281492050550346e-05, "loss": 2.2673, "step": 89185 }, { "epoch": 6.059926620464737, "grad_norm": 3.0359346866607666, "learning_rate": 2.4277245549667074e-05, "loss": 2.0091, "step": 89190 }, { "epoch": 6.0602663405353985, "grad_norm": 3.0214831829071045, "learning_rate": 2.4272999048783802e-05, "loss": 2.1459, "step": 89195 }, { "epoch": 6.0606060606060606, "grad_norm": 4.4424824714660645, "learning_rate": 2.426875254790053e-05, "loss": 2.0802, "step": 89200 }, { "epoch": 6.060945780676723, "grad_norm": 3.1052632331848145, "learning_rate": 2.426450604701726e-05, "loss": 1.9383, "step": 89205 }, { "epoch": 6.061285500747384, "grad_norm": 4.348029136657715, "learning_rate": 2.4260259546133986e-05, "loss": 2.0768, "step": 89210 }, { "epoch": 6.061625220818046, "grad_norm": 3.353240966796875, "learning_rate": 2.4256013045250714e-05, "loss": 1.8607, "step": 89215 }, { "epoch": 6.061964940888708, "grad_norm": 3.7594034671783447, "learning_rate": 2.4251766544367442e-05, "loss": 2.0667, "step": 89220 }, { "epoch": 6.062304660959369, "grad_norm": 4.355152606964111, "learning_rate": 2.424752004348417e-05, "loss": 2.0798, "step": 89225 }, { "epoch": 6.062644381030031, "grad_norm": 3.148111581802368, "learning_rate": 2.4243273542600898e-05, "loss": 2.1757, "step": 89230 }, { "epoch": 6.062984101100693, "grad_norm": 2.7441234588623047, "learning_rate": 2.4239027041717626e-05, "loss": 2.2855, "step": 89235 }, { "epoch": 6.0633238211713545, "grad_norm": 3.962874412536621, "learning_rate": 2.4234780540834354e-05, "loss": 1.9377, "step": 89240 }, { "epoch": 6.063663541242017, "grad_norm": 3.3825433254241943, "learning_rate": 2.423053403995108e-05, "loss": 2.1186, "step": 89245 }, { "epoch": 6.064003261312679, "grad_norm": 3.5673131942749023, "learning_rate": 2.422628753906781e-05, "loss": 2.192, "step": 89250 }, { "epoch": 6.06434298138334, "grad_norm": 2.974266529083252, "learning_rate": 2.422204103818454e-05, "loss": 2.1019, "step": 89255 }, { "epoch": 6.064682701454002, "grad_norm": 3.051274538040161, "learning_rate": 2.4217794537301263e-05, "loss": 1.975, "step": 89260 }, { "epoch": 6.065022421524664, "grad_norm": 2.71567964553833, "learning_rate": 2.4213548036417994e-05, "loss": 2.221, "step": 89265 }, { "epoch": 6.065362141595325, "grad_norm": 2.958200454711914, "learning_rate": 2.420930153553472e-05, "loss": 2.3494, "step": 89270 }, { "epoch": 6.065701861665987, "grad_norm": 3.724277973175049, "learning_rate": 2.4205055034651447e-05, "loss": 1.8938, "step": 89275 }, { "epoch": 6.066041581736649, "grad_norm": 4.034608364105225, "learning_rate": 2.420080853376818e-05, "loss": 2.0256, "step": 89280 }, { "epoch": 6.0663813018073105, "grad_norm": 3.59769606590271, "learning_rate": 2.4196562032884903e-05, "loss": 2.1282, "step": 89285 }, { "epoch": 6.066721021877973, "grad_norm": 5.336333274841309, "learning_rate": 2.4192315532001634e-05, "loss": 2.3647, "step": 89290 }, { "epoch": 6.067060741948635, "grad_norm": 3.961273670196533, "learning_rate": 2.418806903111836e-05, "loss": 2.2857, "step": 89295 }, { "epoch": 6.067400462019296, "grad_norm": 2.8437881469726562, "learning_rate": 2.4183822530235087e-05, "loss": 2.1517, "step": 89300 }, { "epoch": 6.067740182089958, "grad_norm": 3.575669050216675, "learning_rate": 2.4179576029351815e-05, "loss": 1.9181, "step": 89305 }, { "epoch": 6.06807990216062, "grad_norm": 3.676985740661621, "learning_rate": 2.4175329528468543e-05, "loss": 2.0728, "step": 89310 }, { "epoch": 6.068419622231281, "grad_norm": 4.642436981201172, "learning_rate": 2.417108302758527e-05, "loss": 1.8083, "step": 89315 }, { "epoch": 6.068759342301943, "grad_norm": 3.764836549758911, "learning_rate": 2.4166836526702e-05, "loss": 1.883, "step": 89320 }, { "epoch": 6.069099062372605, "grad_norm": 3.6431453227996826, "learning_rate": 2.4162590025818727e-05, "loss": 2.3792, "step": 89325 }, { "epoch": 6.0694387824432665, "grad_norm": 3.5178279876708984, "learning_rate": 2.4158343524935455e-05, "loss": 2.0676, "step": 89330 }, { "epoch": 6.069778502513929, "grad_norm": 3.8856029510498047, "learning_rate": 2.4154097024052183e-05, "loss": 2.2812, "step": 89335 }, { "epoch": 6.070118222584591, "grad_norm": 3.680274248123169, "learning_rate": 2.414985052316891e-05, "loss": 2.305, "step": 89340 }, { "epoch": 6.070457942655252, "grad_norm": 2.9434967041015625, "learning_rate": 2.4145604022285636e-05, "loss": 1.9176, "step": 89345 }, { "epoch": 6.070797662725914, "grad_norm": 3.3787829875946045, "learning_rate": 2.4141357521402367e-05, "loss": 1.8762, "step": 89350 }, { "epoch": 6.071137382796576, "grad_norm": 3.6738743782043457, "learning_rate": 2.4137111020519092e-05, "loss": 2.0893, "step": 89355 }, { "epoch": 6.071477102867237, "grad_norm": 3.4995062351226807, "learning_rate": 2.413286451963582e-05, "loss": 1.9399, "step": 89360 }, { "epoch": 6.071816822937899, "grad_norm": 3.3009426593780518, "learning_rate": 2.412861801875255e-05, "loss": 2.2115, "step": 89365 }, { "epoch": 6.072156543008561, "grad_norm": 3.3693618774414062, "learning_rate": 2.4124371517869276e-05, "loss": 2.0972, "step": 89370 }, { "epoch": 6.0724962630792225, "grad_norm": 3.4389231204986572, "learning_rate": 2.4120125016986007e-05, "loss": 2.0904, "step": 89375 }, { "epoch": 6.072835983149885, "grad_norm": 2.9376425743103027, "learning_rate": 2.4115878516102732e-05, "loss": 2.2776, "step": 89380 }, { "epoch": 6.073175703220547, "grad_norm": 3.543787956237793, "learning_rate": 2.411163201521946e-05, "loss": 2.2035, "step": 89385 }, { "epoch": 6.073515423291208, "grad_norm": 2.898618698120117, "learning_rate": 2.4107385514336188e-05, "loss": 2.2586, "step": 89390 }, { "epoch": 6.07385514336187, "grad_norm": 4.312113285064697, "learning_rate": 2.4103139013452916e-05, "loss": 2.0704, "step": 89395 }, { "epoch": 6.074194863432532, "grad_norm": 4.060299396514893, "learning_rate": 2.4098892512569644e-05, "loss": 1.7964, "step": 89400 }, { "epoch": 6.074534583503193, "grad_norm": 3.1691243648529053, "learning_rate": 2.4094646011686372e-05, "loss": 2.1903, "step": 89405 }, { "epoch": 6.074874303573855, "grad_norm": 3.8207919597625732, "learning_rate": 2.40903995108031e-05, "loss": 2.2111, "step": 89410 }, { "epoch": 6.075214023644517, "grad_norm": 3.7863667011260986, "learning_rate": 2.4086153009919828e-05, "loss": 2.0402, "step": 89415 }, { "epoch": 6.0755537437151785, "grad_norm": 3.3327338695526123, "learning_rate": 2.4081906509036556e-05, "loss": 2.04, "step": 89420 }, { "epoch": 6.075893463785841, "grad_norm": 3.9163906574249268, "learning_rate": 2.4077660008153284e-05, "loss": 2.1915, "step": 89425 }, { "epoch": 6.076233183856502, "grad_norm": 3.047393321990967, "learning_rate": 2.407341350727001e-05, "loss": 2.12, "step": 89430 }, { "epoch": 6.076572903927164, "grad_norm": 4.184474468231201, "learning_rate": 2.406916700638674e-05, "loss": 2.0126, "step": 89435 }, { "epoch": 6.076912623997826, "grad_norm": 3.702132225036621, "learning_rate": 2.4064920505503464e-05, "loss": 2.1367, "step": 89440 }, { "epoch": 6.077252344068487, "grad_norm": 3.778597593307495, "learning_rate": 2.4060674004620192e-05, "loss": 2.203, "step": 89445 }, { "epoch": 6.077592064139149, "grad_norm": 3.2439956665039062, "learning_rate": 2.4056427503736924e-05, "loss": 2.0199, "step": 89450 }, { "epoch": 6.077931784209811, "grad_norm": 4.375101566314697, "learning_rate": 2.405218100285365e-05, "loss": 2.2397, "step": 89455 }, { "epoch": 6.0782715042804725, "grad_norm": 3.40840482711792, "learning_rate": 2.404793450197038e-05, "loss": 2.2924, "step": 89460 }, { "epoch": 6.0786112243511345, "grad_norm": 3.5670320987701416, "learning_rate": 2.4043688001087104e-05, "loss": 2.0827, "step": 89465 }, { "epoch": 6.078950944421797, "grad_norm": 4.3543195724487305, "learning_rate": 2.4039441500203832e-05, "loss": 2.1347, "step": 89470 }, { "epoch": 6.079290664492458, "grad_norm": 3.5072619915008545, "learning_rate": 2.403519499932056e-05, "loss": 1.9664, "step": 89475 }, { "epoch": 6.07963038456312, "grad_norm": 3.251335382461548, "learning_rate": 2.403094849843729e-05, "loss": 1.9491, "step": 89480 }, { "epoch": 6.079970104633782, "grad_norm": 3.577986240386963, "learning_rate": 2.4026701997554017e-05, "loss": 2.0047, "step": 89485 }, { "epoch": 6.080309824704443, "grad_norm": 3.5990707874298096, "learning_rate": 2.4022455496670745e-05, "loss": 2.32, "step": 89490 }, { "epoch": 6.080649544775105, "grad_norm": 3.9916670322418213, "learning_rate": 2.4018208995787473e-05, "loss": 2.1141, "step": 89495 }, { "epoch": 6.080989264845767, "grad_norm": 3.8981642723083496, "learning_rate": 2.40139624949042e-05, "loss": 2.1291, "step": 89500 }, { "epoch": 6.0813289849164285, "grad_norm": 3.6766607761383057, "learning_rate": 2.400971599402093e-05, "loss": 2.2437, "step": 89505 }, { "epoch": 6.081668704987091, "grad_norm": 4.339938640594482, "learning_rate": 2.4005469493137657e-05, "loss": 2.4138, "step": 89510 }, { "epoch": 6.082008425057753, "grad_norm": 3.561063051223755, "learning_rate": 2.400122299225438e-05, "loss": 2.0799, "step": 89515 }, { "epoch": 6.082348145128414, "grad_norm": 2.369156837463379, "learning_rate": 2.3996976491371113e-05, "loss": 2.2468, "step": 89520 }, { "epoch": 6.082687865199076, "grad_norm": 3.408210515975952, "learning_rate": 2.3992729990487837e-05, "loss": 2.1792, "step": 89525 }, { "epoch": 6.083027585269738, "grad_norm": 4.047028541564941, "learning_rate": 2.3988483489604565e-05, "loss": 1.9931, "step": 89530 }, { "epoch": 6.083367305340399, "grad_norm": 3.711932897567749, "learning_rate": 2.3984236988721297e-05, "loss": 2.0816, "step": 89535 }, { "epoch": 6.083707025411061, "grad_norm": 4.580275058746338, "learning_rate": 2.397999048783802e-05, "loss": 1.7095, "step": 89540 }, { "epoch": 6.084046745481723, "grad_norm": 5.0065484046936035, "learning_rate": 2.3975743986954753e-05, "loss": 2.0153, "step": 89545 }, { "epoch": 6.0843864655523845, "grad_norm": 3.481964349746704, "learning_rate": 2.3971497486071477e-05, "loss": 2.0674, "step": 89550 }, { "epoch": 6.084726185623047, "grad_norm": 3.2983925342559814, "learning_rate": 2.3967250985188205e-05, "loss": 2.0379, "step": 89555 }, { "epoch": 6.085065905693709, "grad_norm": 3.2519237995147705, "learning_rate": 2.3963004484304937e-05, "loss": 1.7217, "step": 89560 }, { "epoch": 6.08540562576437, "grad_norm": 3.9700098037719727, "learning_rate": 2.395875798342166e-05, "loss": 2.0309, "step": 89565 }, { "epoch": 6.085745345835032, "grad_norm": 3.26458477973938, "learning_rate": 2.395451148253839e-05, "loss": 2.1993, "step": 89570 }, { "epoch": 6.086085065905694, "grad_norm": 3.522653818130493, "learning_rate": 2.3950264981655117e-05, "loss": 1.9968, "step": 89575 }, { "epoch": 6.086424785976355, "grad_norm": 3.2944490909576416, "learning_rate": 2.3946018480771845e-05, "loss": 2.1022, "step": 89580 }, { "epoch": 6.086764506047017, "grad_norm": 3.283095359802246, "learning_rate": 2.3941771979888573e-05, "loss": 2.0061, "step": 89585 }, { "epoch": 6.087104226117679, "grad_norm": 3.0567374229431152, "learning_rate": 2.39375254790053e-05, "loss": 1.9125, "step": 89590 }, { "epoch": 6.0874439461883405, "grad_norm": 3.497516393661499, "learning_rate": 2.393327897812203e-05, "loss": 2.0633, "step": 89595 }, { "epoch": 6.087783666259003, "grad_norm": 3.157443046569824, "learning_rate": 2.3929032477238754e-05, "loss": 2.3442, "step": 89600 }, { "epoch": 6.088123386329665, "grad_norm": 3.7323431968688965, "learning_rate": 2.3924785976355485e-05, "loss": 2.0816, "step": 89605 }, { "epoch": 6.088463106400326, "grad_norm": 3.3377673625946045, "learning_rate": 2.3920539475472213e-05, "loss": 2.1933, "step": 89610 }, { "epoch": 6.088802826470988, "grad_norm": 3.996351480484009, "learning_rate": 2.3916292974588938e-05, "loss": 2.0751, "step": 89615 }, { "epoch": 6.08914254654165, "grad_norm": 3.896965265274048, "learning_rate": 2.391204647370567e-05, "loss": 2.1923, "step": 89620 }, { "epoch": 6.089482266612311, "grad_norm": 3.643765926361084, "learning_rate": 2.3907799972822394e-05, "loss": 2.2329, "step": 89625 }, { "epoch": 6.089821986682973, "grad_norm": 3.8748176097869873, "learning_rate": 2.3903553471939125e-05, "loss": 2.0247, "step": 89630 }, { "epoch": 6.090161706753635, "grad_norm": 3.190614938735962, "learning_rate": 2.389930697105585e-05, "loss": 1.8375, "step": 89635 }, { "epoch": 6.0905014268242965, "grad_norm": 3.130920648574829, "learning_rate": 2.3895060470172578e-05, "loss": 2.0158, "step": 89640 }, { "epoch": 6.090841146894959, "grad_norm": 3.304927349090576, "learning_rate": 2.389081396928931e-05, "loss": 2.0277, "step": 89645 }, { "epoch": 6.091180866965621, "grad_norm": 3.909336566925049, "learning_rate": 2.3886567468406034e-05, "loss": 1.8453, "step": 89650 }, { "epoch": 6.091520587036282, "grad_norm": 3.893338918685913, "learning_rate": 2.3882320967522762e-05, "loss": 2.3681, "step": 89655 }, { "epoch": 6.091860307106944, "grad_norm": 4.163647174835205, "learning_rate": 2.387807446663949e-05, "loss": 2.1098, "step": 89660 }, { "epoch": 6.092200027177606, "grad_norm": 3.718027114868164, "learning_rate": 2.3873827965756218e-05, "loss": 1.8968, "step": 89665 }, { "epoch": 6.092539747248267, "grad_norm": 3.27543044090271, "learning_rate": 2.3869581464872946e-05, "loss": 2.2334, "step": 89670 }, { "epoch": 6.092879467318929, "grad_norm": 3.7546825408935547, "learning_rate": 2.3865334963989674e-05, "loss": 1.8962, "step": 89675 }, { "epoch": 6.093219187389591, "grad_norm": 3.614485740661621, "learning_rate": 2.3861088463106402e-05, "loss": 1.8924, "step": 89680 }, { "epoch": 6.0935589074602525, "grad_norm": 3.7576100826263428, "learning_rate": 2.3856841962223127e-05, "loss": 2.0198, "step": 89685 }, { "epoch": 6.093898627530915, "grad_norm": 2.9181289672851562, "learning_rate": 2.3852595461339858e-05, "loss": 2.105, "step": 89690 }, { "epoch": 6.094238347601577, "grad_norm": 3.9689433574676514, "learning_rate": 2.3848348960456586e-05, "loss": 2.1051, "step": 89695 }, { "epoch": 6.094578067672238, "grad_norm": 3.5134856700897217, "learning_rate": 2.384410245957331e-05, "loss": 2.1017, "step": 89700 }, { "epoch": 6.0949177877429, "grad_norm": 4.226617336273193, "learning_rate": 2.3839855958690042e-05, "loss": 2.1574, "step": 89705 }, { "epoch": 6.095257507813562, "grad_norm": 3.787790060043335, "learning_rate": 2.3835609457806767e-05, "loss": 2.1292, "step": 89710 }, { "epoch": 6.095597227884223, "grad_norm": 3.4654529094696045, "learning_rate": 2.3831362956923498e-05, "loss": 2.0598, "step": 89715 }, { "epoch": 6.095936947954885, "grad_norm": 4.116197109222412, "learning_rate": 2.3827116456040223e-05, "loss": 2.265, "step": 89720 }, { "epoch": 6.096276668025547, "grad_norm": 4.545632362365723, "learning_rate": 2.382286995515695e-05, "loss": 2.1447, "step": 89725 }, { "epoch": 6.0966163880962085, "grad_norm": 3.329576253890991, "learning_rate": 2.3818623454273682e-05, "loss": 1.9301, "step": 89730 }, { "epoch": 6.096956108166871, "grad_norm": 2.5728249549865723, "learning_rate": 2.3814376953390407e-05, "loss": 2.0221, "step": 89735 }, { "epoch": 6.097295828237533, "grad_norm": 4.08607816696167, "learning_rate": 2.3810130452507135e-05, "loss": 2.0273, "step": 89740 }, { "epoch": 6.097635548308194, "grad_norm": 4.247385025024414, "learning_rate": 2.3805883951623863e-05, "loss": 2.0935, "step": 89745 }, { "epoch": 6.097975268378856, "grad_norm": 3.2654459476470947, "learning_rate": 2.380163745074059e-05, "loss": 1.8188, "step": 89750 }, { "epoch": 6.098314988449518, "grad_norm": 3.767439603805542, "learning_rate": 2.379739094985732e-05, "loss": 1.9122, "step": 89755 }, { "epoch": 6.098654708520179, "grad_norm": 2.952763080596924, "learning_rate": 2.3793144448974047e-05, "loss": 2.1004, "step": 89760 }, { "epoch": 6.098994428590841, "grad_norm": 3.0104856491088867, "learning_rate": 2.3788897948090775e-05, "loss": 2.0444, "step": 89765 }, { "epoch": 6.099334148661503, "grad_norm": 3.892429828643799, "learning_rate": 2.37846514472075e-05, "loss": 1.992, "step": 89770 }, { "epoch": 6.0996738687321646, "grad_norm": 4.030847549438477, "learning_rate": 2.378040494632423e-05, "loss": 2.0986, "step": 89775 }, { "epoch": 6.100013588802827, "grad_norm": 3.642369508743286, "learning_rate": 2.377615844544096e-05, "loss": 1.937, "step": 89780 }, { "epoch": 6.100353308873488, "grad_norm": 3.4815738201141357, "learning_rate": 2.3771911944557683e-05, "loss": 2.0684, "step": 89785 }, { "epoch": 6.10069302894415, "grad_norm": 2.8840579986572266, "learning_rate": 2.3767665443674415e-05, "loss": 1.8759, "step": 89790 }, { "epoch": 6.101032749014812, "grad_norm": 2.4032862186431885, "learning_rate": 2.376341894279114e-05, "loss": 2.0505, "step": 89795 }, { "epoch": 6.101372469085473, "grad_norm": 4.738473892211914, "learning_rate": 2.375917244190787e-05, "loss": 1.9542, "step": 89800 }, { "epoch": 6.101712189156135, "grad_norm": 3.80172061920166, "learning_rate": 2.37549259410246e-05, "loss": 1.7589, "step": 89805 }, { "epoch": 6.102051909226797, "grad_norm": 3.4622576236724854, "learning_rate": 2.3750679440141323e-05, "loss": 1.8078, "step": 89810 }, { "epoch": 6.1023916292974585, "grad_norm": 3.4955596923828125, "learning_rate": 2.3746432939258055e-05, "loss": 2.1563, "step": 89815 }, { "epoch": 6.102731349368121, "grad_norm": 3.160609006881714, "learning_rate": 2.374218643837478e-05, "loss": 2.0826, "step": 89820 }, { "epoch": 6.103071069438783, "grad_norm": 3.5579144954681396, "learning_rate": 2.3737939937491507e-05, "loss": 2.3135, "step": 89825 }, { "epoch": 6.103410789509444, "grad_norm": 3.012302875518799, "learning_rate": 2.3733693436608236e-05, "loss": 2.0533, "step": 89830 }, { "epoch": 6.103750509580106, "grad_norm": 3.2743544578552246, "learning_rate": 2.3729446935724964e-05, "loss": 2.1214, "step": 89835 }, { "epoch": 6.104090229650768, "grad_norm": 3.550767183303833, "learning_rate": 2.372520043484169e-05, "loss": 1.9052, "step": 89840 }, { "epoch": 6.104429949721429, "grad_norm": 2.3715593814849854, "learning_rate": 2.372095393395842e-05, "loss": 2.0226, "step": 89845 }, { "epoch": 6.104769669792091, "grad_norm": 3.133918046951294, "learning_rate": 2.3716707433075148e-05, "loss": 1.9393, "step": 89850 }, { "epoch": 6.105109389862753, "grad_norm": 2.748265027999878, "learning_rate": 2.3712460932191876e-05, "loss": 2.1057, "step": 89855 }, { "epoch": 6.1054491099334145, "grad_norm": 3.885406017303467, "learning_rate": 2.3708214431308604e-05, "loss": 1.9392, "step": 89860 }, { "epoch": 6.105788830004077, "grad_norm": 3.392270565032959, "learning_rate": 2.370396793042533e-05, "loss": 2.1615, "step": 89865 }, { "epoch": 6.106128550074739, "grad_norm": 4.428955554962158, "learning_rate": 2.3699721429542056e-05, "loss": 2.1925, "step": 89870 }, { "epoch": 6.1064682701454, "grad_norm": 3.4550352096557617, "learning_rate": 2.3695474928658788e-05, "loss": 2.1334, "step": 89875 }, { "epoch": 6.106807990216062, "grad_norm": 4.0495429039001465, "learning_rate": 2.3691228427775512e-05, "loss": 2.2134, "step": 89880 }, { "epoch": 6.107147710286724, "grad_norm": 4.164578914642334, "learning_rate": 2.3686981926892244e-05, "loss": 2.2747, "step": 89885 }, { "epoch": 6.107487430357385, "grad_norm": 2.4653449058532715, "learning_rate": 2.368273542600897e-05, "loss": 2.0435, "step": 89890 }, { "epoch": 6.107827150428047, "grad_norm": 3.995548725128174, "learning_rate": 2.3678488925125696e-05, "loss": 2.0395, "step": 89895 }, { "epoch": 6.108166870498709, "grad_norm": 3.60261607170105, "learning_rate": 2.3674242424242428e-05, "loss": 2.0367, "step": 89900 }, { "epoch": 6.1085065905693705, "grad_norm": 4.151452541351318, "learning_rate": 2.3669995923359152e-05, "loss": 2.0559, "step": 89905 }, { "epoch": 6.108846310640033, "grad_norm": 3.8992228507995605, "learning_rate": 2.366574942247588e-05, "loss": 1.9174, "step": 89910 }, { "epoch": 6.109186030710695, "grad_norm": 3.8271617889404297, "learning_rate": 2.3661502921592608e-05, "loss": 2.2526, "step": 89915 }, { "epoch": 6.109525750781356, "grad_norm": 4.498923301696777, "learning_rate": 2.3657256420709336e-05, "loss": 2.1533, "step": 89920 }, { "epoch": 6.109865470852018, "grad_norm": 3.3472964763641357, "learning_rate": 2.3653009919826064e-05, "loss": 2.0444, "step": 89925 }, { "epoch": 6.11020519092268, "grad_norm": 3.110633611679077, "learning_rate": 2.3648763418942792e-05, "loss": 2.0083, "step": 89930 }, { "epoch": 6.110544910993341, "grad_norm": 3.530406951904297, "learning_rate": 2.364451691805952e-05, "loss": 2.0418, "step": 89935 }, { "epoch": 6.110884631064003, "grad_norm": 4.054897308349609, "learning_rate": 2.3640270417176248e-05, "loss": 2.3508, "step": 89940 }, { "epoch": 6.111224351134665, "grad_norm": 3.6825742721557617, "learning_rate": 2.3636023916292976e-05, "loss": 2.1222, "step": 89945 }, { "epoch": 6.1115640712053265, "grad_norm": 3.8765244483947754, "learning_rate": 2.3631777415409704e-05, "loss": 2.2828, "step": 89950 }, { "epoch": 6.111903791275989, "grad_norm": 3.4759485721588135, "learning_rate": 2.362753091452643e-05, "loss": 1.9795, "step": 89955 }, { "epoch": 6.112243511346651, "grad_norm": 4.00600528717041, "learning_rate": 2.362328441364316e-05, "loss": 1.9786, "step": 89960 }, { "epoch": 6.112583231417312, "grad_norm": 4.507500171661377, "learning_rate": 2.3619037912759885e-05, "loss": 2.2417, "step": 89965 }, { "epoch": 6.112922951487974, "grad_norm": 3.303323268890381, "learning_rate": 2.3614791411876616e-05, "loss": 2.2349, "step": 89970 }, { "epoch": 6.113262671558636, "grad_norm": 4.271219730377197, "learning_rate": 2.3610544910993344e-05, "loss": 2.2194, "step": 89975 }, { "epoch": 6.113602391629297, "grad_norm": 3.08385968208313, "learning_rate": 2.360629841011007e-05, "loss": 1.994, "step": 89980 }, { "epoch": 6.113942111699959, "grad_norm": 3.3429741859436035, "learning_rate": 2.36020519092268e-05, "loss": 2.0466, "step": 89985 }, { "epoch": 6.114281831770621, "grad_norm": 3.3811259269714355, "learning_rate": 2.3597805408343525e-05, "loss": 1.8648, "step": 89990 }, { "epoch": 6.1146215518412825, "grad_norm": 4.080681800842285, "learning_rate": 2.3593558907460253e-05, "loss": 1.9424, "step": 89995 }, { "epoch": 6.114961271911945, "grad_norm": 4.058043956756592, "learning_rate": 2.3589312406576984e-05, "loss": 2.3004, "step": 90000 }, { "epoch": 6.115300991982607, "grad_norm": 2.703080177307129, "learning_rate": 2.358506590569371e-05, "loss": 2.0974, "step": 90005 }, { "epoch": 6.115640712053268, "grad_norm": 3.250298500061035, "learning_rate": 2.3580819404810437e-05, "loss": 2.1397, "step": 90010 }, { "epoch": 6.11598043212393, "grad_norm": 3.385007619857788, "learning_rate": 2.3576572903927165e-05, "loss": 2.3561, "step": 90015 }, { "epoch": 6.116320152194592, "grad_norm": 3.8565597534179688, "learning_rate": 2.3572326403043893e-05, "loss": 2.0477, "step": 90020 }, { "epoch": 6.116659872265253, "grad_norm": 2.686866283416748, "learning_rate": 2.356807990216062e-05, "loss": 2.2716, "step": 90025 }, { "epoch": 6.116999592335915, "grad_norm": 3.7752184867858887, "learning_rate": 2.356383340127735e-05, "loss": 2.0456, "step": 90030 }, { "epoch": 6.117339312406577, "grad_norm": 4.574634552001953, "learning_rate": 2.3559586900394077e-05, "loss": 2.1512, "step": 90035 }, { "epoch": 6.1176790324772385, "grad_norm": 3.4172749519348145, "learning_rate": 2.35553403995108e-05, "loss": 2.2371, "step": 90040 }, { "epoch": 6.118018752547901, "grad_norm": 3.261833906173706, "learning_rate": 2.3551093898627533e-05, "loss": 2.146, "step": 90045 }, { "epoch": 6.118358472618563, "grad_norm": 3.3747100830078125, "learning_rate": 2.354684739774426e-05, "loss": 2.3934, "step": 90050 }, { "epoch": 6.118698192689224, "grad_norm": 3.5582451820373535, "learning_rate": 2.354260089686099e-05, "loss": 2.3513, "step": 90055 }, { "epoch": 6.119037912759886, "grad_norm": 4.100149154663086, "learning_rate": 2.3538354395977717e-05, "loss": 2.1513, "step": 90060 }, { "epoch": 6.119377632830548, "grad_norm": 3.5604448318481445, "learning_rate": 2.3534107895094442e-05, "loss": 2.0821, "step": 90065 }, { "epoch": 6.119717352901209, "grad_norm": 3.523347854614258, "learning_rate": 2.3529861394211173e-05, "loss": 1.9224, "step": 90070 }, { "epoch": 6.120057072971871, "grad_norm": 3.3079442977905273, "learning_rate": 2.3525614893327898e-05, "loss": 1.9308, "step": 90075 }, { "epoch": 6.120396793042533, "grad_norm": 3.9751241207122803, "learning_rate": 2.3521368392444626e-05, "loss": 2.1627, "step": 90080 }, { "epoch": 6.1207365131131946, "grad_norm": 2.8953962326049805, "learning_rate": 2.3517121891561357e-05, "loss": 1.5762, "step": 90085 }, { "epoch": 6.121076233183857, "grad_norm": 3.2479236125946045, "learning_rate": 2.3512875390678082e-05, "loss": 1.9823, "step": 90090 }, { "epoch": 6.121415953254519, "grad_norm": 3.4106297492980957, "learning_rate": 2.350862888979481e-05, "loss": 2.1673, "step": 90095 }, { "epoch": 6.12175567332518, "grad_norm": 3.3483850955963135, "learning_rate": 2.3504382388911538e-05, "loss": 2.111, "step": 90100 }, { "epoch": 6.122095393395842, "grad_norm": 3.8309357166290283, "learning_rate": 2.3500135888028266e-05, "loss": 2.0294, "step": 90105 }, { "epoch": 6.122435113466503, "grad_norm": 3.531954288482666, "learning_rate": 2.3495889387144994e-05, "loss": 1.9895, "step": 90110 }, { "epoch": 6.122774833537165, "grad_norm": 3.6586310863494873, "learning_rate": 2.3491642886261722e-05, "loss": 2.1665, "step": 90115 }, { "epoch": 6.123114553607827, "grad_norm": 4.146158218383789, "learning_rate": 2.348739638537845e-05, "loss": 2.1406, "step": 90120 }, { "epoch": 6.1234542736784885, "grad_norm": 3.254129409790039, "learning_rate": 2.3483149884495174e-05, "loss": 1.87, "step": 90125 }, { "epoch": 6.123793993749151, "grad_norm": 3.1276652812957764, "learning_rate": 2.3478903383611906e-05, "loss": 2.0613, "step": 90130 }, { "epoch": 6.124133713819813, "grad_norm": 3.232935667037964, "learning_rate": 2.3474656882728634e-05, "loss": 1.861, "step": 90135 }, { "epoch": 6.124473433890474, "grad_norm": 3.5873756408691406, "learning_rate": 2.3470410381845362e-05, "loss": 2.3162, "step": 90140 }, { "epoch": 6.124813153961136, "grad_norm": 3.2141246795654297, "learning_rate": 2.346616388096209e-05, "loss": 2.1034, "step": 90145 }, { "epoch": 6.125152874031798, "grad_norm": 3.9323935508728027, "learning_rate": 2.3461917380078814e-05, "loss": 2.0955, "step": 90150 }, { "epoch": 6.125492594102459, "grad_norm": 4.290922164916992, "learning_rate": 2.3457670879195546e-05, "loss": 2.3049, "step": 90155 }, { "epoch": 6.125832314173121, "grad_norm": 4.212199687957764, "learning_rate": 2.345342437831227e-05, "loss": 1.9608, "step": 90160 }, { "epoch": 6.126172034243783, "grad_norm": 3.693514347076416, "learning_rate": 2.3449177877429e-05, "loss": 2.1809, "step": 90165 }, { "epoch": 6.1265117543144445, "grad_norm": 4.074198246002197, "learning_rate": 2.344493137654573e-05, "loss": 1.9716, "step": 90170 }, { "epoch": 6.126851474385107, "grad_norm": 3.343468189239502, "learning_rate": 2.3440684875662454e-05, "loss": 2.1226, "step": 90175 }, { "epoch": 6.127191194455769, "grad_norm": 3.2729976177215576, "learning_rate": 2.3436438374779182e-05, "loss": 2.1118, "step": 90180 }, { "epoch": 6.12753091452643, "grad_norm": 3.5025229454040527, "learning_rate": 2.343219187389591e-05, "loss": 2.125, "step": 90185 }, { "epoch": 6.127870634597092, "grad_norm": 2.9656829833984375, "learning_rate": 2.342794537301264e-05, "loss": 2.3688, "step": 90190 }, { "epoch": 6.128210354667754, "grad_norm": 3.753180503845215, "learning_rate": 2.3423698872129367e-05, "loss": 2.2407, "step": 90195 }, { "epoch": 6.128550074738415, "grad_norm": 3.5065388679504395, "learning_rate": 2.3419452371246095e-05, "loss": 2.2795, "step": 90200 }, { "epoch": 6.128889794809077, "grad_norm": 3.5249972343444824, "learning_rate": 2.3415205870362823e-05, "loss": 2.4255, "step": 90205 }, { "epoch": 6.129229514879739, "grad_norm": 4.028178691864014, "learning_rate": 2.3410959369479547e-05, "loss": 2.1129, "step": 90210 }, { "epoch": 6.1295692349504005, "grad_norm": 3.0447580814361572, "learning_rate": 2.340671286859628e-05, "loss": 2.177, "step": 90215 }, { "epoch": 6.129908955021063, "grad_norm": 2.850010395050049, "learning_rate": 2.3402466367713007e-05, "loss": 1.8229, "step": 90220 }, { "epoch": 6.130248675091725, "grad_norm": 2.7750723361968994, "learning_rate": 2.3398219866829735e-05, "loss": 1.992, "step": 90225 }, { "epoch": 6.130588395162386, "grad_norm": 3.232255220413208, "learning_rate": 2.3393973365946463e-05, "loss": 1.8313, "step": 90230 }, { "epoch": 6.130928115233048, "grad_norm": 3.4068715572357178, "learning_rate": 2.3389726865063187e-05, "loss": 2.1761, "step": 90235 }, { "epoch": 6.13126783530371, "grad_norm": 3.375422716140747, "learning_rate": 2.338548036417992e-05, "loss": 2.2546, "step": 90240 }, { "epoch": 6.131607555374371, "grad_norm": 3.4854605197906494, "learning_rate": 2.3381233863296643e-05, "loss": 2.2822, "step": 90245 }, { "epoch": 6.131947275445033, "grad_norm": 4.258001804351807, "learning_rate": 2.337698736241337e-05, "loss": 1.8209, "step": 90250 }, { "epoch": 6.132286995515695, "grad_norm": 3.5804033279418945, "learning_rate": 2.3372740861530103e-05, "loss": 2.1949, "step": 90255 }, { "epoch": 6.1326267155863565, "grad_norm": 3.9150493144989014, "learning_rate": 2.3368494360646827e-05, "loss": 2.2725, "step": 90260 }, { "epoch": 6.132966435657019, "grad_norm": 3.3558411598205566, "learning_rate": 2.3364247859763555e-05, "loss": 1.9402, "step": 90265 }, { "epoch": 6.133306155727681, "grad_norm": 2.9518017768859863, "learning_rate": 2.3360001358880283e-05, "loss": 1.9249, "step": 90270 }, { "epoch": 6.133645875798342, "grad_norm": 3.721896171569824, "learning_rate": 2.335575485799701e-05, "loss": 2.1324, "step": 90275 }, { "epoch": 6.133985595869004, "grad_norm": 3.314018487930298, "learning_rate": 2.335150835711374e-05, "loss": 2.3141, "step": 90280 }, { "epoch": 6.134325315939666, "grad_norm": 3.791656732559204, "learning_rate": 2.3347261856230467e-05, "loss": 2.1464, "step": 90285 }, { "epoch": 6.134665036010327, "grad_norm": 3.7920966148376465, "learning_rate": 2.3343015355347195e-05, "loss": 1.9031, "step": 90290 }, { "epoch": 6.135004756080989, "grad_norm": 3.537074089050293, "learning_rate": 2.333876885446392e-05, "loss": 2.2475, "step": 90295 }, { "epoch": 6.135344476151651, "grad_norm": 3.515531301498413, "learning_rate": 2.333452235358065e-05, "loss": 2.1587, "step": 90300 }, { "epoch": 6.1356841962223125, "grad_norm": 2.8268792629241943, "learning_rate": 2.333027585269738e-05, "loss": 1.9759, "step": 90305 }, { "epoch": 6.136023916292975, "grad_norm": 4.2611165046691895, "learning_rate": 2.3326029351814107e-05, "loss": 2.1134, "step": 90310 }, { "epoch": 6.136363636363637, "grad_norm": 4.322805404663086, "learning_rate": 2.3321782850930835e-05, "loss": 1.8412, "step": 90315 }, { "epoch": 6.136703356434298, "grad_norm": 3.3891494274139404, "learning_rate": 2.331753635004756e-05, "loss": 2.2098, "step": 90320 }, { "epoch": 6.13704307650496, "grad_norm": 3.5229129791259766, "learning_rate": 2.331328984916429e-05, "loss": 1.8631, "step": 90325 }, { "epoch": 6.137382796575622, "grad_norm": 3.5222747325897217, "learning_rate": 2.330904334828102e-05, "loss": 2.1179, "step": 90330 }, { "epoch": 6.137722516646283, "grad_norm": 2.8574328422546387, "learning_rate": 2.3304796847397744e-05, "loss": 2.2686, "step": 90335 }, { "epoch": 6.138062236716945, "grad_norm": 3.195629596710205, "learning_rate": 2.3300550346514475e-05, "loss": 2.0622, "step": 90340 }, { "epoch": 6.138401956787607, "grad_norm": 4.491574764251709, "learning_rate": 2.32963038456312e-05, "loss": 2.2327, "step": 90345 }, { "epoch": 6.1387416768582685, "grad_norm": 2.780109167098999, "learning_rate": 2.3292057344747928e-05, "loss": 2.1563, "step": 90350 }, { "epoch": 6.139081396928931, "grad_norm": 3.725861072540283, "learning_rate": 2.3287810843864656e-05, "loss": 2.0752, "step": 90355 }, { "epoch": 6.139421116999593, "grad_norm": 4.179005146026611, "learning_rate": 2.3283564342981384e-05, "loss": 2.2427, "step": 90360 }, { "epoch": 6.139760837070254, "grad_norm": 4.9075446128845215, "learning_rate": 2.3279317842098112e-05, "loss": 2.03, "step": 90365 }, { "epoch": 6.140100557140916, "grad_norm": 3.62984299659729, "learning_rate": 2.327507134121484e-05, "loss": 2.0078, "step": 90370 }, { "epoch": 6.140440277211578, "grad_norm": 3.6204257011413574, "learning_rate": 2.3270824840331568e-05, "loss": 2.02, "step": 90375 }, { "epoch": 6.140779997282239, "grad_norm": 3.5990536212921143, "learning_rate": 2.3266578339448296e-05, "loss": 2.1809, "step": 90380 }, { "epoch": 6.141119717352901, "grad_norm": 4.1749725341796875, "learning_rate": 2.3262331838565024e-05, "loss": 2.1771, "step": 90385 }, { "epoch": 6.141459437423563, "grad_norm": 4.0520830154418945, "learning_rate": 2.3258085337681752e-05, "loss": 1.8923, "step": 90390 }, { "epoch": 6.141799157494225, "grad_norm": 3.9660449028015137, "learning_rate": 2.325383883679848e-05, "loss": 1.9113, "step": 90395 }, { "epoch": 6.142138877564887, "grad_norm": 4.020238399505615, "learning_rate": 2.3249592335915208e-05, "loss": 2.0189, "step": 90400 }, { "epoch": 6.142478597635549, "grad_norm": 3.4783666133880615, "learning_rate": 2.3245345835031933e-05, "loss": 2.1738, "step": 90405 }, { "epoch": 6.14281831770621, "grad_norm": 3.9086790084838867, "learning_rate": 2.3241099334148664e-05, "loss": 1.8847, "step": 90410 }, { "epoch": 6.143158037776872, "grad_norm": 4.135318756103516, "learning_rate": 2.3236852833265392e-05, "loss": 2.4057, "step": 90415 }, { "epoch": 6.143497757847534, "grad_norm": 3.3171863555908203, "learning_rate": 2.3232606332382117e-05, "loss": 2.0903, "step": 90420 }, { "epoch": 6.143837477918195, "grad_norm": 3.707247734069824, "learning_rate": 2.3228359831498848e-05, "loss": 1.9269, "step": 90425 }, { "epoch": 6.144177197988857, "grad_norm": 3.094660997390747, "learning_rate": 2.3224113330615573e-05, "loss": 2.1528, "step": 90430 }, { "epoch": 6.144516918059519, "grad_norm": 3.2573935985565186, "learning_rate": 2.32198668297323e-05, "loss": 2.0611, "step": 90435 }, { "epoch": 6.144856638130181, "grad_norm": 3.6355743408203125, "learning_rate": 2.321562032884903e-05, "loss": 2.2819, "step": 90440 }, { "epoch": 6.145196358200843, "grad_norm": 3.6274657249450684, "learning_rate": 2.3211373827965757e-05, "loss": 2.1502, "step": 90445 }, { "epoch": 6.145536078271505, "grad_norm": 3.7616524696350098, "learning_rate": 2.3207127327082485e-05, "loss": 2.0181, "step": 90450 }, { "epoch": 6.145875798342166, "grad_norm": 3.2199206352233887, "learning_rate": 2.3202880826199213e-05, "loss": 2.2185, "step": 90455 }, { "epoch": 6.146215518412828, "grad_norm": 3.6702959537506104, "learning_rate": 2.319863432531594e-05, "loss": 2.1605, "step": 90460 }, { "epoch": 6.14655523848349, "grad_norm": 2.3657777309417725, "learning_rate": 2.319438782443267e-05, "loss": 2.2761, "step": 90465 }, { "epoch": 6.146894958554151, "grad_norm": 3.0008959770202637, "learning_rate": 2.3190141323549397e-05, "loss": 1.8934, "step": 90470 }, { "epoch": 6.147234678624813, "grad_norm": 3.4388983249664307, "learning_rate": 2.3185894822666125e-05, "loss": 2.0422, "step": 90475 }, { "epoch": 6.1475743986954745, "grad_norm": 3.2745680809020996, "learning_rate": 2.3181648321782853e-05, "loss": 2.0821, "step": 90480 }, { "epoch": 6.147914118766137, "grad_norm": 4.1578826904296875, "learning_rate": 2.317740182089958e-05, "loss": 2.2722, "step": 90485 }, { "epoch": 6.148253838836799, "grad_norm": 4.815972805023193, "learning_rate": 2.3173155320016305e-05, "loss": 2.0645, "step": 90490 }, { "epoch": 6.14859355890746, "grad_norm": 3.6847736835479736, "learning_rate": 2.3168908819133037e-05, "loss": 2.2628, "step": 90495 }, { "epoch": 6.148933278978122, "grad_norm": 3.7204341888427734, "learning_rate": 2.3164662318249765e-05, "loss": 2.0062, "step": 90500 }, { "epoch": 6.149272999048784, "grad_norm": 4.084288597106934, "learning_rate": 2.316041581736649e-05, "loss": 1.965, "step": 90505 }, { "epoch": 6.149612719119445, "grad_norm": 3.6212751865386963, "learning_rate": 2.315616931648322e-05, "loss": 2.0554, "step": 90510 }, { "epoch": 6.149952439190107, "grad_norm": 4.440725326538086, "learning_rate": 2.3151922815599945e-05, "loss": 2.0877, "step": 90515 }, { "epoch": 6.150292159260769, "grad_norm": 3.9557693004608154, "learning_rate": 2.3147676314716673e-05, "loss": 1.9879, "step": 90520 }, { "epoch": 6.1506318793314305, "grad_norm": 3.1027169227600098, "learning_rate": 2.3143429813833405e-05, "loss": 2.1887, "step": 90525 }, { "epoch": 6.150971599402093, "grad_norm": 3.9503061771392822, "learning_rate": 2.313918331295013e-05, "loss": 2.0, "step": 90530 }, { "epoch": 6.151311319472755, "grad_norm": 3.412537097930908, "learning_rate": 2.3134936812066857e-05, "loss": 2.2024, "step": 90535 }, { "epoch": 6.151651039543416, "grad_norm": 4.162086486816406, "learning_rate": 2.3130690311183585e-05, "loss": 2.2671, "step": 90540 }, { "epoch": 6.151990759614078, "grad_norm": 3.853651523590088, "learning_rate": 2.3126443810300314e-05, "loss": 2.1438, "step": 90545 }, { "epoch": 6.15233047968474, "grad_norm": 4.0159220695495605, "learning_rate": 2.312219730941704e-05, "loss": 2.168, "step": 90550 }, { "epoch": 6.152670199755401, "grad_norm": 4.005361080169678, "learning_rate": 2.311795080853377e-05, "loss": 2.3363, "step": 90555 }, { "epoch": 6.153009919826063, "grad_norm": 3.5252137184143066, "learning_rate": 2.3113704307650498e-05, "loss": 2.2001, "step": 90560 }, { "epoch": 6.153349639896725, "grad_norm": 3.4477310180664062, "learning_rate": 2.3109457806767226e-05, "loss": 2.3107, "step": 90565 }, { "epoch": 6.1536893599673865, "grad_norm": 4.0298357009887695, "learning_rate": 2.3105211305883954e-05, "loss": 2.1404, "step": 90570 }, { "epoch": 6.154029080038049, "grad_norm": 3.6215789318084717, "learning_rate": 2.310096480500068e-05, "loss": 1.8365, "step": 90575 }, { "epoch": 6.154368800108711, "grad_norm": 3.4162282943725586, "learning_rate": 2.309671830411741e-05, "loss": 1.9912, "step": 90580 }, { "epoch": 6.154708520179372, "grad_norm": 4.071566581726074, "learning_rate": 2.3092471803234138e-05, "loss": 2.0338, "step": 90585 }, { "epoch": 6.155048240250034, "grad_norm": 3.859977960586548, "learning_rate": 2.3088225302350862e-05, "loss": 2.185, "step": 90590 }, { "epoch": 6.155387960320696, "grad_norm": 3.3165462017059326, "learning_rate": 2.3083978801467594e-05, "loss": 1.9083, "step": 90595 }, { "epoch": 6.155727680391357, "grad_norm": 3.746980905532837, "learning_rate": 2.3079732300584318e-05, "loss": 2.2829, "step": 90600 }, { "epoch": 6.156067400462019, "grad_norm": 3.7641990184783936, "learning_rate": 2.3075485799701046e-05, "loss": 2.095, "step": 90605 }, { "epoch": 6.156407120532681, "grad_norm": 3.9328532218933105, "learning_rate": 2.3071239298817778e-05, "loss": 2.3076, "step": 90610 }, { "epoch": 6.1567468406033425, "grad_norm": 2.8318638801574707, "learning_rate": 2.3066992797934502e-05, "loss": 2.274, "step": 90615 }, { "epoch": 6.157086560674005, "grad_norm": 3.784827470779419, "learning_rate": 2.306274629705123e-05, "loss": 2.1677, "step": 90620 }, { "epoch": 6.157426280744667, "grad_norm": 3.3274030685424805, "learning_rate": 2.3058499796167958e-05, "loss": 2.1366, "step": 90625 }, { "epoch": 6.157766000815328, "grad_norm": 3.500401020050049, "learning_rate": 2.3054253295284686e-05, "loss": 1.98, "step": 90630 }, { "epoch": 6.15810572088599, "grad_norm": 3.743016004562378, "learning_rate": 2.3050006794401414e-05, "loss": 1.927, "step": 90635 }, { "epoch": 6.158445440956652, "grad_norm": 3.515226125717163, "learning_rate": 2.3045760293518142e-05, "loss": 2.0239, "step": 90640 }, { "epoch": 6.158785161027313, "grad_norm": 3.446173667907715, "learning_rate": 2.304151379263487e-05, "loss": 1.9591, "step": 90645 }, { "epoch": 6.159124881097975, "grad_norm": 3.38150954246521, "learning_rate": 2.3037267291751598e-05, "loss": 2.1502, "step": 90650 }, { "epoch": 6.159464601168637, "grad_norm": 3.0312743186950684, "learning_rate": 2.3033020790868326e-05, "loss": 1.9085, "step": 90655 }, { "epoch": 6.1598043212392986, "grad_norm": 3.1831471920013428, "learning_rate": 2.3028774289985054e-05, "loss": 2.3249, "step": 90660 }, { "epoch": 6.160144041309961, "grad_norm": 3.873410701751709, "learning_rate": 2.3024527789101782e-05, "loss": 1.9833, "step": 90665 }, { "epoch": 6.160483761380623, "grad_norm": 3.5378963947296143, "learning_rate": 2.302028128821851e-05, "loss": 2.3229, "step": 90670 }, { "epoch": 6.160823481451284, "grad_norm": 3.0923304557800293, "learning_rate": 2.3016034787335235e-05, "loss": 2.2092, "step": 90675 }, { "epoch": 6.161163201521946, "grad_norm": 3.7712347507476807, "learning_rate": 2.3011788286451966e-05, "loss": 2.4192, "step": 90680 }, { "epoch": 6.161502921592608, "grad_norm": 4.213108539581299, "learning_rate": 2.300754178556869e-05, "loss": 2.1346, "step": 90685 }, { "epoch": 6.161842641663269, "grad_norm": 3.45985746383667, "learning_rate": 2.300329528468542e-05, "loss": 2.1098, "step": 90690 }, { "epoch": 6.162182361733931, "grad_norm": 3.918893575668335, "learning_rate": 2.299904878380215e-05, "loss": 2.2438, "step": 90695 }, { "epoch": 6.162522081804593, "grad_norm": 4.522114276885986, "learning_rate": 2.2994802282918875e-05, "loss": 2.0685, "step": 90700 }, { "epoch": 6.162861801875255, "grad_norm": 3.369434118270874, "learning_rate": 2.2990555782035603e-05, "loss": 2.4118, "step": 90705 }, { "epoch": 6.163201521945917, "grad_norm": 3.47737979888916, "learning_rate": 2.298630928115233e-05, "loss": 2.2287, "step": 90710 }, { "epoch": 6.163541242016579, "grad_norm": 3.7581210136413574, "learning_rate": 2.298206278026906e-05, "loss": 1.9794, "step": 90715 }, { "epoch": 6.16388096208724, "grad_norm": 4.056176662445068, "learning_rate": 2.2977816279385787e-05, "loss": 2.1139, "step": 90720 }, { "epoch": 6.164220682157902, "grad_norm": 4.6005120277404785, "learning_rate": 2.2973569778502515e-05, "loss": 1.9479, "step": 90725 }, { "epoch": 6.164560402228564, "grad_norm": 4.226253986358643, "learning_rate": 2.2969323277619243e-05, "loss": 2.1368, "step": 90730 }, { "epoch": 6.164900122299225, "grad_norm": 3.279442071914673, "learning_rate": 2.296507677673597e-05, "loss": 2.0793, "step": 90735 }, { "epoch": 6.165239842369887, "grad_norm": 3.4754998683929443, "learning_rate": 2.29608302758527e-05, "loss": 2.2611, "step": 90740 }, { "epoch": 6.165579562440549, "grad_norm": 4.908539295196533, "learning_rate": 2.2956583774969427e-05, "loss": 2.1269, "step": 90745 }, { "epoch": 6.165919282511211, "grad_norm": 4.567253112792969, "learning_rate": 2.2952337274086155e-05, "loss": 2.0021, "step": 90750 }, { "epoch": 6.166259002581873, "grad_norm": 3.4969918727874756, "learning_rate": 2.2948090773202883e-05, "loss": 2.0647, "step": 90755 }, { "epoch": 6.166598722652535, "grad_norm": 3.2978932857513428, "learning_rate": 2.2943844272319608e-05, "loss": 2.0312, "step": 90760 }, { "epoch": 6.166938442723196, "grad_norm": 3.3730387687683105, "learning_rate": 2.293959777143634e-05, "loss": 1.9941, "step": 90765 }, { "epoch": 6.167278162793858, "grad_norm": 4.181493759155273, "learning_rate": 2.2935351270553067e-05, "loss": 2.0075, "step": 90770 }, { "epoch": 6.16761788286452, "grad_norm": 3.822221517562866, "learning_rate": 2.293110476966979e-05, "loss": 1.9294, "step": 90775 }, { "epoch": 6.167957602935181, "grad_norm": 3.5427756309509277, "learning_rate": 2.2926858268786523e-05, "loss": 2.1096, "step": 90780 }, { "epoch": 6.168297323005843, "grad_norm": 2.6455066204071045, "learning_rate": 2.2922611767903248e-05, "loss": 2.0761, "step": 90785 }, { "epoch": 6.1686370430765045, "grad_norm": 3.531796455383301, "learning_rate": 2.2918365267019976e-05, "loss": 2.3054, "step": 90790 }, { "epoch": 6.168976763147167, "grad_norm": 4.884077548980713, "learning_rate": 2.2914118766136704e-05, "loss": 2.0796, "step": 90795 }, { "epoch": 6.169316483217829, "grad_norm": 3.8336198329925537, "learning_rate": 2.2909872265253432e-05, "loss": 1.9255, "step": 90800 }, { "epoch": 6.16965620328849, "grad_norm": 3.095536231994629, "learning_rate": 2.290562576437016e-05, "loss": 1.9536, "step": 90805 }, { "epoch": 6.169995923359152, "grad_norm": 3.8030917644500732, "learning_rate": 2.2901379263486888e-05, "loss": 2.187, "step": 90810 }, { "epoch": 6.170335643429814, "grad_norm": 4.610071659088135, "learning_rate": 2.2897132762603616e-05, "loss": 2.0827, "step": 90815 }, { "epoch": 6.170675363500475, "grad_norm": 4.480220794677734, "learning_rate": 2.2892886261720344e-05, "loss": 2.0236, "step": 90820 }, { "epoch": 6.171015083571137, "grad_norm": 3.3173410892486572, "learning_rate": 2.2888639760837072e-05, "loss": 2.0582, "step": 90825 }, { "epoch": 6.171354803641799, "grad_norm": 3.384735584259033, "learning_rate": 2.28843932599538e-05, "loss": 1.903, "step": 90830 }, { "epoch": 6.1716945237124605, "grad_norm": 4.338253974914551, "learning_rate": 2.2880146759070528e-05, "loss": 1.9711, "step": 90835 }, { "epoch": 6.172034243783123, "grad_norm": 4.206164360046387, "learning_rate": 2.2875900258187256e-05, "loss": 2.1321, "step": 90840 }, { "epoch": 6.172373963853785, "grad_norm": 4.049740314483643, "learning_rate": 2.287165375730398e-05, "loss": 1.8637, "step": 90845 }, { "epoch": 6.172713683924446, "grad_norm": 3.318932294845581, "learning_rate": 2.2867407256420712e-05, "loss": 2.1337, "step": 90850 }, { "epoch": 6.173053403995108, "grad_norm": 3.3774476051330566, "learning_rate": 2.286316075553744e-05, "loss": 2.0383, "step": 90855 }, { "epoch": 6.17339312406577, "grad_norm": 3.915696144104004, "learning_rate": 2.2858914254654164e-05, "loss": 1.9955, "step": 90860 }, { "epoch": 6.173732844136431, "grad_norm": 3.2468581199645996, "learning_rate": 2.2854667753770896e-05, "loss": 2.2399, "step": 90865 }, { "epoch": 6.174072564207093, "grad_norm": 3.0460262298583984, "learning_rate": 2.285042125288762e-05, "loss": 2.1214, "step": 90870 }, { "epoch": 6.174412284277755, "grad_norm": 2.940487861633301, "learning_rate": 2.284617475200435e-05, "loss": 1.8442, "step": 90875 }, { "epoch": 6.1747520043484165, "grad_norm": 3.5219690799713135, "learning_rate": 2.2841928251121076e-05, "loss": 2.1623, "step": 90880 }, { "epoch": 6.175091724419079, "grad_norm": 4.84560489654541, "learning_rate": 2.2837681750237804e-05, "loss": 2.1953, "step": 90885 }, { "epoch": 6.175431444489741, "grad_norm": 3.1267507076263428, "learning_rate": 2.2833435249354532e-05, "loss": 2.0307, "step": 90890 }, { "epoch": 6.175771164560402, "grad_norm": 3.672125816345215, "learning_rate": 2.282918874847126e-05, "loss": 2.0931, "step": 90895 }, { "epoch": 6.176110884631064, "grad_norm": 3.5880768299102783, "learning_rate": 2.282494224758799e-05, "loss": 1.8488, "step": 90900 }, { "epoch": 6.176450604701726, "grad_norm": 3.3853936195373535, "learning_rate": 2.2820695746704717e-05, "loss": 1.9811, "step": 90905 }, { "epoch": 6.176790324772387, "grad_norm": 3.2328498363494873, "learning_rate": 2.2816449245821445e-05, "loss": 1.8893, "step": 90910 }, { "epoch": 6.177130044843049, "grad_norm": 2.976254940032959, "learning_rate": 2.2812202744938173e-05, "loss": 2.0125, "step": 90915 }, { "epoch": 6.177469764913711, "grad_norm": 3.58780574798584, "learning_rate": 2.28079562440549e-05, "loss": 2.0925, "step": 90920 }, { "epoch": 6.1778094849843725, "grad_norm": 3.4169492721557617, "learning_rate": 2.280370974317163e-05, "loss": 1.785, "step": 90925 }, { "epoch": 6.178149205055035, "grad_norm": 3.0868325233459473, "learning_rate": 2.2799463242288353e-05, "loss": 2.0672, "step": 90930 }, { "epoch": 6.178488925125697, "grad_norm": 3.6429717540740967, "learning_rate": 2.2795216741405085e-05, "loss": 2.1394, "step": 90935 }, { "epoch": 6.178828645196358, "grad_norm": 3.356842279434204, "learning_rate": 2.2790970240521813e-05, "loss": 2.1783, "step": 90940 }, { "epoch": 6.17916836526702, "grad_norm": 3.4671082496643066, "learning_rate": 2.2786723739638537e-05, "loss": 1.9562, "step": 90945 }, { "epoch": 6.179508085337682, "grad_norm": 3.739070177078247, "learning_rate": 2.278247723875527e-05, "loss": 1.9068, "step": 90950 }, { "epoch": 6.179847805408343, "grad_norm": 3.6115365028381348, "learning_rate": 2.2778230737871993e-05, "loss": 2.062, "step": 90955 }, { "epoch": 6.180187525479005, "grad_norm": 3.5276098251342773, "learning_rate": 2.277398423698872e-05, "loss": 2.0802, "step": 90960 }, { "epoch": 6.180527245549667, "grad_norm": 4.054562091827393, "learning_rate": 2.2769737736105453e-05, "loss": 1.9335, "step": 90965 }, { "epoch": 6.180866965620329, "grad_norm": 4.0054612159729, "learning_rate": 2.2765491235222177e-05, "loss": 2.3623, "step": 90970 }, { "epoch": 6.181206685690991, "grad_norm": 3.6846659183502197, "learning_rate": 2.2761244734338905e-05, "loss": 2.3425, "step": 90975 }, { "epoch": 6.181546405761653, "grad_norm": 4.28421688079834, "learning_rate": 2.2756998233455633e-05, "loss": 2.2736, "step": 90980 }, { "epoch": 6.181886125832314, "grad_norm": 3.8868041038513184, "learning_rate": 2.275275173257236e-05, "loss": 1.9002, "step": 90985 }, { "epoch": 6.182225845902976, "grad_norm": 3.3848133087158203, "learning_rate": 2.274850523168909e-05, "loss": 2.0231, "step": 90990 }, { "epoch": 6.182565565973638, "grad_norm": 3.4050045013427734, "learning_rate": 2.2744258730805817e-05, "loss": 2.3315, "step": 90995 }, { "epoch": 6.182905286044299, "grad_norm": 4.187972545623779, "learning_rate": 2.2740012229922545e-05, "loss": 2.1404, "step": 91000 }, { "epoch": 6.183245006114961, "grad_norm": 2.8627493381500244, "learning_rate": 2.2735765729039273e-05, "loss": 2.2128, "step": 91005 }, { "epoch": 6.183584726185623, "grad_norm": 3.3542733192443848, "learning_rate": 2.2731519228156e-05, "loss": 1.9961, "step": 91010 }, { "epoch": 6.183924446256285, "grad_norm": 3.5540876388549805, "learning_rate": 2.272727272727273e-05, "loss": 2.3027, "step": 91015 }, { "epoch": 6.184264166326947, "grad_norm": 2.752061367034912, "learning_rate": 2.2723026226389457e-05, "loss": 1.9073, "step": 91020 }, { "epoch": 6.184603886397609, "grad_norm": 3.7952184677124023, "learning_rate": 2.2718779725506185e-05, "loss": 2.1861, "step": 91025 }, { "epoch": 6.18494360646827, "grad_norm": 3.919487953186035, "learning_rate": 2.271453322462291e-05, "loss": 1.8457, "step": 91030 }, { "epoch": 6.185283326538932, "grad_norm": 3.643883466720581, "learning_rate": 2.271028672373964e-05, "loss": 2.1021, "step": 91035 }, { "epoch": 6.185623046609594, "grad_norm": 3.43795108795166, "learning_rate": 2.2706040222856366e-05, "loss": 2.0292, "step": 91040 }, { "epoch": 6.185962766680255, "grad_norm": 3.0162391662597656, "learning_rate": 2.2701793721973094e-05, "loss": 2.1841, "step": 91045 }, { "epoch": 6.186302486750917, "grad_norm": 2.50890851020813, "learning_rate": 2.2697547221089825e-05, "loss": 2.1464, "step": 91050 }, { "epoch": 6.186642206821579, "grad_norm": 3.575965166091919, "learning_rate": 2.269330072020655e-05, "loss": 2.3676, "step": 91055 }, { "epoch": 6.186981926892241, "grad_norm": 2.9866106510162354, "learning_rate": 2.2689054219323278e-05, "loss": 1.9438, "step": 91060 }, { "epoch": 6.187321646962903, "grad_norm": 4.152713298797607, "learning_rate": 2.2684807718440006e-05, "loss": 2.3012, "step": 91065 }, { "epoch": 6.187661367033565, "grad_norm": 3.5435235500335693, "learning_rate": 2.2680561217556734e-05, "loss": 2.0145, "step": 91070 }, { "epoch": 6.188001087104226, "grad_norm": 3.9940171241760254, "learning_rate": 2.2676314716673462e-05, "loss": 2.0651, "step": 91075 }, { "epoch": 6.188340807174888, "grad_norm": 3.195835828781128, "learning_rate": 2.267206821579019e-05, "loss": 2.2001, "step": 91080 }, { "epoch": 6.18868052724555, "grad_norm": 4.920563697814941, "learning_rate": 2.2667821714906918e-05, "loss": 2.0028, "step": 91085 }, { "epoch": 6.189020247316211, "grad_norm": 3.7624921798706055, "learning_rate": 2.2663575214023646e-05, "loss": 2.1537, "step": 91090 }, { "epoch": 6.189359967386873, "grad_norm": 3.2181482315063477, "learning_rate": 2.2659328713140374e-05, "loss": 2.3787, "step": 91095 }, { "epoch": 6.189699687457535, "grad_norm": 3.775225877761841, "learning_rate": 2.2655082212257102e-05, "loss": 2.0579, "step": 91100 }, { "epoch": 6.190039407528197, "grad_norm": 2.7648186683654785, "learning_rate": 2.265083571137383e-05, "loss": 1.9545, "step": 91105 }, { "epoch": 6.190379127598859, "grad_norm": 2.6805968284606934, "learning_rate": 2.2646589210490558e-05, "loss": 2.2724, "step": 91110 }, { "epoch": 6.190718847669521, "grad_norm": 3.68953013420105, "learning_rate": 2.2642342709607283e-05, "loss": 2.3313, "step": 91115 }, { "epoch": 6.191058567740182, "grad_norm": 4.624256610870361, "learning_rate": 2.2638096208724014e-05, "loss": 2.1846, "step": 91120 }, { "epoch": 6.191398287810844, "grad_norm": 3.607562303543091, "learning_rate": 2.263384970784074e-05, "loss": 2.0019, "step": 91125 }, { "epoch": 6.191738007881506, "grad_norm": 3.7333567142486572, "learning_rate": 2.2629603206957467e-05, "loss": 2.024, "step": 91130 }, { "epoch": 6.192077727952167, "grad_norm": 3.1908440589904785, "learning_rate": 2.2625356706074198e-05, "loss": 2.3536, "step": 91135 }, { "epoch": 6.192417448022829, "grad_norm": 3.3455564975738525, "learning_rate": 2.2621110205190923e-05, "loss": 2.3879, "step": 91140 }, { "epoch": 6.192757168093491, "grad_norm": 3.6619462966918945, "learning_rate": 2.261686370430765e-05, "loss": 1.8822, "step": 91145 }, { "epoch": 6.193096888164153, "grad_norm": 2.7282636165618896, "learning_rate": 2.261261720342438e-05, "loss": 2.068, "step": 91150 }, { "epoch": 6.193436608234815, "grad_norm": 4.127769947052002, "learning_rate": 2.2608370702541107e-05, "loss": 2.0872, "step": 91155 }, { "epoch": 6.193776328305476, "grad_norm": 3.621807336807251, "learning_rate": 2.2604124201657835e-05, "loss": 2.1595, "step": 91160 }, { "epoch": 6.194116048376138, "grad_norm": 2.7631661891937256, "learning_rate": 2.2599877700774563e-05, "loss": 2.1015, "step": 91165 }, { "epoch": 6.1944557684468, "grad_norm": 3.487424373626709, "learning_rate": 2.259563119989129e-05, "loss": 2.0191, "step": 91170 }, { "epoch": 6.194795488517461, "grad_norm": 3.6813628673553467, "learning_rate": 2.259138469900802e-05, "loss": 1.84, "step": 91175 }, { "epoch": 6.195135208588123, "grad_norm": 3.6129019260406494, "learning_rate": 2.2587138198124747e-05, "loss": 2.214, "step": 91180 }, { "epoch": 6.195474928658785, "grad_norm": 4.433107376098633, "learning_rate": 2.2582891697241475e-05, "loss": 2.1183, "step": 91185 }, { "epoch": 6.1958146487294465, "grad_norm": 3.7311205863952637, "learning_rate": 2.2578645196358203e-05, "loss": 2.0388, "step": 91190 }, { "epoch": 6.196154368800109, "grad_norm": 2.986837387084961, "learning_rate": 2.257439869547493e-05, "loss": 2.0702, "step": 91195 }, { "epoch": 6.196494088870771, "grad_norm": 3.763857841491699, "learning_rate": 2.2570152194591655e-05, "loss": 2.1111, "step": 91200 }, { "epoch": 6.196833808941432, "grad_norm": 3.6399314403533936, "learning_rate": 2.2565905693708387e-05, "loss": 1.973, "step": 91205 }, { "epoch": 6.197173529012094, "grad_norm": 4.202075481414795, "learning_rate": 2.256165919282511e-05, "loss": 2.0297, "step": 91210 }, { "epoch": 6.197513249082756, "grad_norm": 3.477147340774536, "learning_rate": 2.255741269194184e-05, "loss": 2.0799, "step": 91215 }, { "epoch": 6.197852969153417, "grad_norm": 3.2622878551483154, "learning_rate": 2.255316619105857e-05, "loss": 2.0, "step": 91220 }, { "epoch": 6.198192689224079, "grad_norm": 3.262535333633423, "learning_rate": 2.2548919690175295e-05, "loss": 2.2571, "step": 91225 }, { "epoch": 6.198532409294741, "grad_norm": 3.7308292388916016, "learning_rate": 2.2544673189292023e-05, "loss": 2.2159, "step": 91230 }, { "epoch": 6.1988721293654026, "grad_norm": 3.510981798171997, "learning_rate": 2.254042668840875e-05, "loss": 2.01, "step": 91235 }, { "epoch": 6.199211849436065, "grad_norm": 4.039813995361328, "learning_rate": 2.253618018752548e-05, "loss": 1.9479, "step": 91240 }, { "epoch": 6.199551569506727, "grad_norm": 3.362593173980713, "learning_rate": 2.253193368664221e-05, "loss": 2.2041, "step": 91245 }, { "epoch": 6.199891289577388, "grad_norm": 3.903749465942383, "learning_rate": 2.2527687185758935e-05, "loss": 2.0605, "step": 91250 }, { "epoch": 6.20023100964805, "grad_norm": 3.3263392448425293, "learning_rate": 2.2523440684875663e-05, "loss": 1.7945, "step": 91255 }, { "epoch": 6.200570729718712, "grad_norm": 3.643968105316162, "learning_rate": 2.251919418399239e-05, "loss": 1.9303, "step": 91260 }, { "epoch": 6.200910449789373, "grad_norm": 3.3432250022888184, "learning_rate": 2.251494768310912e-05, "loss": 2.1318, "step": 91265 }, { "epoch": 6.201250169860035, "grad_norm": 4.029897212982178, "learning_rate": 2.2510701182225848e-05, "loss": 2.0098, "step": 91270 }, { "epoch": 6.201589889930697, "grad_norm": 3.1545426845550537, "learning_rate": 2.2506454681342576e-05, "loss": 2.1356, "step": 91275 }, { "epoch": 6.201929610001359, "grad_norm": 2.69808292388916, "learning_rate": 2.2502208180459304e-05, "loss": 2.1694, "step": 91280 }, { "epoch": 6.202269330072021, "grad_norm": 3.788043737411499, "learning_rate": 2.2497961679576028e-05, "loss": 1.8736, "step": 91285 }, { "epoch": 6.202609050142683, "grad_norm": 3.0943613052368164, "learning_rate": 2.249371517869276e-05, "loss": 2.1201, "step": 91290 }, { "epoch": 6.202948770213344, "grad_norm": 4.597536563873291, "learning_rate": 2.2489468677809488e-05, "loss": 2.2014, "step": 91295 }, { "epoch": 6.203288490284006, "grad_norm": 4.043461799621582, "learning_rate": 2.2485222176926212e-05, "loss": 1.6987, "step": 91300 }, { "epoch": 6.203628210354668, "grad_norm": 3.139177083969116, "learning_rate": 2.2480975676042944e-05, "loss": 2.144, "step": 91305 }, { "epoch": 6.203967930425329, "grad_norm": 3.68410325050354, "learning_rate": 2.2476729175159668e-05, "loss": 1.9469, "step": 91310 }, { "epoch": 6.204307650495991, "grad_norm": 3.2763540744781494, "learning_rate": 2.2472482674276396e-05, "loss": 1.9877, "step": 91315 }, { "epoch": 6.204647370566653, "grad_norm": 4.042452335357666, "learning_rate": 2.2468236173393124e-05, "loss": 2.2663, "step": 91320 }, { "epoch": 6.204987090637315, "grad_norm": 4.39103364944458, "learning_rate": 2.2463989672509852e-05, "loss": 1.8812, "step": 91325 }, { "epoch": 6.205326810707977, "grad_norm": 2.867337942123413, "learning_rate": 2.2459743171626584e-05, "loss": 2.0448, "step": 91330 }, { "epoch": 6.205666530778639, "grad_norm": 3.4276108741760254, "learning_rate": 2.2455496670743308e-05, "loss": 2.0809, "step": 91335 }, { "epoch": 6.2060062508493, "grad_norm": 3.038614511489868, "learning_rate": 2.2451250169860036e-05, "loss": 2.0174, "step": 91340 }, { "epoch": 6.206345970919962, "grad_norm": 4.6222333908081055, "learning_rate": 2.2447003668976764e-05, "loss": 2.0592, "step": 91345 }, { "epoch": 6.206685690990624, "grad_norm": 3.6124629974365234, "learning_rate": 2.2442757168093492e-05, "loss": 1.9878, "step": 91350 }, { "epoch": 6.207025411061285, "grad_norm": 3.962394952774048, "learning_rate": 2.243851066721022e-05, "loss": 2.0673, "step": 91355 }, { "epoch": 6.207365131131947, "grad_norm": 3.093003034591675, "learning_rate": 2.2434264166326948e-05, "loss": 2.0349, "step": 91360 }, { "epoch": 6.207704851202609, "grad_norm": 4.772432327270508, "learning_rate": 2.2430017665443676e-05, "loss": 2.0187, "step": 91365 }, { "epoch": 6.208044571273271, "grad_norm": 3.8217079639434814, "learning_rate": 2.24257711645604e-05, "loss": 2.2257, "step": 91370 }, { "epoch": 6.208384291343933, "grad_norm": 3.624948740005493, "learning_rate": 2.2421524663677132e-05, "loss": 2.346, "step": 91375 }, { "epoch": 6.208724011414595, "grad_norm": 4.138445854187012, "learning_rate": 2.241727816279386e-05, "loss": 2.0907, "step": 91380 }, { "epoch": 6.209063731485256, "grad_norm": 3.433640241622925, "learning_rate": 2.2413031661910585e-05, "loss": 2.2322, "step": 91385 }, { "epoch": 6.209403451555918, "grad_norm": 3.8049652576446533, "learning_rate": 2.2408785161027316e-05, "loss": 2.0903, "step": 91390 }, { "epoch": 6.20974317162658, "grad_norm": 3.515427589416504, "learning_rate": 2.240453866014404e-05, "loss": 2.097, "step": 91395 }, { "epoch": 6.210082891697241, "grad_norm": 3.0302751064300537, "learning_rate": 2.240029215926077e-05, "loss": 2.1499, "step": 91400 }, { "epoch": 6.210422611767903, "grad_norm": 2.808748722076416, "learning_rate": 2.2396045658377497e-05, "loss": 2.1313, "step": 91405 }, { "epoch": 6.210762331838565, "grad_norm": 3.091510534286499, "learning_rate": 2.2391799157494225e-05, "loss": 1.8346, "step": 91410 }, { "epoch": 6.211102051909227, "grad_norm": 4.279347896575928, "learning_rate": 2.2387552656610956e-05, "loss": 1.9602, "step": 91415 }, { "epoch": 6.211441771979889, "grad_norm": 3.4725818634033203, "learning_rate": 2.238330615572768e-05, "loss": 2.1883, "step": 91420 }, { "epoch": 6.211781492050551, "grad_norm": 3.9307701587677, "learning_rate": 2.237905965484441e-05, "loss": 1.7914, "step": 91425 }, { "epoch": 6.212121212121212, "grad_norm": 4.282924175262451, "learning_rate": 2.2374813153961137e-05, "loss": 1.9442, "step": 91430 }, { "epoch": 6.212460932191874, "grad_norm": 3.363245725631714, "learning_rate": 2.2370566653077865e-05, "loss": 2.2692, "step": 91435 }, { "epoch": 6.212800652262536, "grad_norm": 4.022827625274658, "learning_rate": 2.2366320152194593e-05, "loss": 2.3401, "step": 91440 }, { "epoch": 6.213140372333197, "grad_norm": 3.419748067855835, "learning_rate": 2.236207365131132e-05, "loss": 2.1749, "step": 91445 }, { "epoch": 6.213480092403859, "grad_norm": 3.596963882446289, "learning_rate": 2.235782715042805e-05, "loss": 2.083, "step": 91450 }, { "epoch": 6.213819812474521, "grad_norm": 3.641779661178589, "learning_rate": 2.2353580649544774e-05, "loss": 2.0261, "step": 91455 }, { "epoch": 6.214159532545183, "grad_norm": 3.1276330947875977, "learning_rate": 2.2349334148661505e-05, "loss": 2.4678, "step": 91460 }, { "epoch": 6.214499252615845, "grad_norm": 3.8494057655334473, "learning_rate": 2.2345087647778233e-05, "loss": 2.0571, "step": 91465 }, { "epoch": 6.214838972686506, "grad_norm": 4.4504499435424805, "learning_rate": 2.2340841146894958e-05, "loss": 1.9521, "step": 91470 }, { "epoch": 6.215178692757168, "grad_norm": 2.895796775817871, "learning_rate": 2.233659464601169e-05, "loss": 2.0654, "step": 91475 }, { "epoch": 6.21551841282783, "grad_norm": 2.8362395763397217, "learning_rate": 2.2332348145128414e-05, "loss": 1.8795, "step": 91480 }, { "epoch": 6.215858132898491, "grad_norm": 4.407169818878174, "learning_rate": 2.232810164424514e-05, "loss": 2.1384, "step": 91485 }, { "epoch": 6.216197852969153, "grad_norm": 3.7323837280273438, "learning_rate": 2.2323855143361873e-05, "loss": 2.2686, "step": 91490 }, { "epoch": 6.216537573039815, "grad_norm": 3.451233148574829, "learning_rate": 2.2319608642478598e-05, "loss": 1.9969, "step": 91495 }, { "epoch": 6.2168772931104765, "grad_norm": 4.141590595245361, "learning_rate": 2.231536214159533e-05, "loss": 2.1469, "step": 91500 }, { "epoch": 6.217217013181139, "grad_norm": 2.8789029121398926, "learning_rate": 2.2311115640712054e-05, "loss": 2.1628, "step": 91505 }, { "epoch": 6.217556733251801, "grad_norm": 3.780280351638794, "learning_rate": 2.2306869139828782e-05, "loss": 1.5955, "step": 91510 }, { "epoch": 6.217896453322462, "grad_norm": 4.007518768310547, "learning_rate": 2.230262263894551e-05, "loss": 1.785, "step": 91515 }, { "epoch": 6.218236173393124, "grad_norm": 3.7534077167510986, "learning_rate": 2.2298376138062238e-05, "loss": 1.8166, "step": 91520 }, { "epoch": 6.218575893463786, "grad_norm": 4.455373287200928, "learning_rate": 2.2294129637178966e-05, "loss": 2.2511, "step": 91525 }, { "epoch": 6.218915613534447, "grad_norm": 4.047621726989746, "learning_rate": 2.2289883136295694e-05, "loss": 2.0541, "step": 91530 }, { "epoch": 6.219255333605109, "grad_norm": 3.9456889629364014, "learning_rate": 2.2285636635412422e-05, "loss": 1.9595, "step": 91535 }, { "epoch": 6.219595053675771, "grad_norm": 4.120738983154297, "learning_rate": 2.228139013452915e-05, "loss": 2.2122, "step": 91540 }, { "epoch": 6.2199347737464326, "grad_norm": 4.227014064788818, "learning_rate": 2.2277143633645878e-05, "loss": 1.9405, "step": 91545 }, { "epoch": 6.220274493817095, "grad_norm": 3.6491024494171143, "learning_rate": 2.2272897132762606e-05, "loss": 1.9031, "step": 91550 }, { "epoch": 6.220614213887757, "grad_norm": 3.613924980163574, "learning_rate": 2.226865063187933e-05, "loss": 2.104, "step": 91555 }, { "epoch": 6.220953933958418, "grad_norm": 3.4516093730926514, "learning_rate": 2.2264404130996062e-05, "loss": 2.1926, "step": 91560 }, { "epoch": 6.22129365402908, "grad_norm": 3.5425710678100586, "learning_rate": 2.2260157630112786e-05, "loss": 2.049, "step": 91565 }, { "epoch": 6.221633374099742, "grad_norm": NaN, "learning_rate": 2.225676042940617e-05, "loss": 1.9061, "step": 91570 }, { "epoch": 6.221973094170403, "grad_norm": 3.3421847820281982, "learning_rate": 2.2252513928522897e-05, "loss": 1.9292, "step": 91575 }, { "epoch": 6.222312814241065, "grad_norm": 3.761216402053833, "learning_rate": 2.2248267427639628e-05, "loss": 2.299, "step": 91580 }, { "epoch": 6.222652534311727, "grad_norm": 3.044524908065796, "learning_rate": 2.2244020926756353e-05, "loss": 2.0052, "step": 91585 }, { "epoch": 6.222992254382389, "grad_norm": 3.2176201343536377, "learning_rate": 2.223977442587308e-05, "loss": 2.0102, "step": 91590 }, { "epoch": 6.223331974453051, "grad_norm": 2.8037519454956055, "learning_rate": 2.223552792498981e-05, "loss": 2.0267, "step": 91595 }, { "epoch": 6.223671694523713, "grad_norm": 3.7791783809661865, "learning_rate": 2.2231281424106537e-05, "loss": 2.1783, "step": 91600 }, { "epoch": 6.224011414594374, "grad_norm": 3.4295856952667236, "learning_rate": 2.2227034923223265e-05, "loss": 2.1424, "step": 91605 }, { "epoch": 6.224351134665036, "grad_norm": 3.331407308578491, "learning_rate": 2.2222788422339993e-05, "loss": 1.9025, "step": 91610 }, { "epoch": 6.224690854735698, "grad_norm": 4.041971683502197, "learning_rate": 2.221854192145672e-05, "loss": 2.264, "step": 91615 }, { "epoch": 6.225030574806359, "grad_norm": 4.397294044494629, "learning_rate": 2.221429542057345e-05, "loss": 1.8774, "step": 91620 }, { "epoch": 6.225370294877021, "grad_norm": 3.878584146499634, "learning_rate": 2.2210048919690177e-05, "loss": 2.1881, "step": 91625 }, { "epoch": 6.225710014947683, "grad_norm": 3.3807826042175293, "learning_rate": 2.2205802418806905e-05, "loss": 2.0365, "step": 91630 }, { "epoch": 6.226049735018345, "grad_norm": 3.2938663959503174, "learning_rate": 2.2201555917923633e-05, "loss": 2.0129, "step": 91635 }, { "epoch": 6.226389455089007, "grad_norm": 3.351886034011841, "learning_rate": 2.219730941704036e-05, "loss": 1.8428, "step": 91640 }, { "epoch": 6.226729175159669, "grad_norm": 3.8520498275756836, "learning_rate": 2.2193062916157086e-05, "loss": 2.2101, "step": 91645 }, { "epoch": 6.22706889523033, "grad_norm": 4.281463146209717, "learning_rate": 2.2188816415273817e-05, "loss": 1.945, "step": 91650 }, { "epoch": 6.227408615300992, "grad_norm": 3.5991311073303223, "learning_rate": 2.2184569914390545e-05, "loss": 2.005, "step": 91655 }, { "epoch": 6.227748335371654, "grad_norm": 3.4435646533966064, "learning_rate": 2.218032341350727e-05, "loss": 1.997, "step": 91660 }, { "epoch": 6.228088055442315, "grad_norm": 3.0886757373809814, "learning_rate": 2.2176076912624e-05, "loss": 2.0552, "step": 91665 }, { "epoch": 6.228427775512977, "grad_norm": 3.951831579208374, "learning_rate": 2.2171830411740726e-05, "loss": 1.7509, "step": 91670 }, { "epoch": 6.228767495583639, "grad_norm": 3.6862235069274902, "learning_rate": 2.2167583910857454e-05, "loss": 2.0621, "step": 91675 }, { "epoch": 6.229107215654301, "grad_norm": 3.310086727142334, "learning_rate": 2.216333740997418e-05, "loss": 1.9716, "step": 91680 }, { "epoch": 6.229446935724963, "grad_norm": 3.4661049842834473, "learning_rate": 2.215909090909091e-05, "loss": 2.2554, "step": 91685 }, { "epoch": 6.229786655795625, "grad_norm": 3.3948428630828857, "learning_rate": 2.2154844408207638e-05, "loss": 1.8105, "step": 91690 }, { "epoch": 6.230126375866286, "grad_norm": 3.8100805282592773, "learning_rate": 2.2150597907324366e-05, "loss": 1.9247, "step": 91695 }, { "epoch": 6.230466095936948, "grad_norm": 3.001716136932373, "learning_rate": 2.2146351406441094e-05, "loss": 2.0574, "step": 91700 }, { "epoch": 6.23080581600761, "grad_norm": 2.817492723464966, "learning_rate": 2.214210490555782e-05, "loss": 2.0293, "step": 91705 }, { "epoch": 6.231145536078271, "grad_norm": 3.1769585609436035, "learning_rate": 2.213785840467455e-05, "loss": 2.0287, "step": 91710 }, { "epoch": 6.231485256148933, "grad_norm": 3.4425134658813477, "learning_rate": 2.2133611903791278e-05, "loss": 2.1091, "step": 91715 }, { "epoch": 6.231824976219595, "grad_norm": 5.159204959869385, "learning_rate": 2.2129365402908006e-05, "loss": 2.0817, "step": 91720 }, { "epoch": 6.232164696290257, "grad_norm": 3.624850273132324, "learning_rate": 2.2125118902024734e-05, "loss": 1.9432, "step": 91725 }, { "epoch": 6.232504416360919, "grad_norm": 3.190964698791504, "learning_rate": 2.2120872401141458e-05, "loss": 1.9072, "step": 91730 }, { "epoch": 6.232844136431581, "grad_norm": 3.0626578330993652, "learning_rate": 2.211662590025819e-05, "loss": 2.184, "step": 91735 }, { "epoch": 6.233183856502242, "grad_norm": 3.59781813621521, "learning_rate": 2.2112379399374918e-05, "loss": 2.0839, "step": 91740 }, { "epoch": 6.233523576572904, "grad_norm": 3.7139196395874023, "learning_rate": 2.2108132898491642e-05, "loss": 2.0527, "step": 91745 }, { "epoch": 6.233863296643566, "grad_norm": 4.052786827087402, "learning_rate": 2.2103886397608374e-05, "loss": 2.2419, "step": 91750 }, { "epoch": 6.234203016714227, "grad_norm": 3.689929962158203, "learning_rate": 2.20996398967251e-05, "loss": 2.0132, "step": 91755 }, { "epoch": 6.234542736784889, "grad_norm": 3.1974055767059326, "learning_rate": 2.2095393395841826e-05, "loss": 2.191, "step": 91760 }, { "epoch": 6.234882456855551, "grad_norm": 3.2463831901550293, "learning_rate": 2.2091146894958554e-05, "loss": 1.9632, "step": 91765 }, { "epoch": 6.235222176926213, "grad_norm": 3.191344976425171, "learning_rate": 2.2086900394075282e-05, "loss": 2.2363, "step": 91770 }, { "epoch": 6.235561896996875, "grad_norm": 3.944674253463745, "learning_rate": 2.208265389319201e-05, "loss": 2.0494, "step": 91775 }, { "epoch": 6.235901617067537, "grad_norm": 4.521546363830566, "learning_rate": 2.207840739230874e-05, "loss": 2.2143, "step": 91780 }, { "epoch": 6.236241337138198, "grad_norm": 3.6010165214538574, "learning_rate": 2.2074160891425466e-05, "loss": 1.8302, "step": 91785 }, { "epoch": 6.23658105720886, "grad_norm": 3.5889787673950195, "learning_rate": 2.2069914390542194e-05, "loss": 2.044, "step": 91790 }, { "epoch": 6.236920777279522, "grad_norm": 3.716583490371704, "learning_rate": 2.2065667889658922e-05, "loss": 2.0305, "step": 91795 }, { "epoch": 6.237260497350183, "grad_norm": 3.575484275817871, "learning_rate": 2.206142138877565e-05, "loss": 2.2435, "step": 91800 }, { "epoch": 6.237600217420845, "grad_norm": 3.935791254043579, "learning_rate": 2.205717488789238e-05, "loss": 2.27, "step": 91805 }, { "epoch": 6.237939937491507, "grad_norm": 4.244126319885254, "learning_rate": 2.2052928387009106e-05, "loss": 1.9171, "step": 91810 }, { "epoch": 6.238279657562169, "grad_norm": 3.9946517944335938, "learning_rate": 2.204868188612583e-05, "loss": 2.1582, "step": 91815 }, { "epoch": 6.238619377632831, "grad_norm": 4.080036640167236, "learning_rate": 2.2044435385242562e-05, "loss": 2.0088, "step": 91820 }, { "epoch": 6.238959097703493, "grad_norm": 2.86857271194458, "learning_rate": 2.204018888435929e-05, "loss": 1.9304, "step": 91825 }, { "epoch": 6.239298817774154, "grad_norm": 3.0445008277893066, "learning_rate": 2.2035942383476015e-05, "loss": 1.7, "step": 91830 }, { "epoch": 6.239638537844816, "grad_norm": 3.6309919357299805, "learning_rate": 2.2031695882592746e-05, "loss": 1.8966, "step": 91835 }, { "epoch": 6.239978257915477, "grad_norm": 4.100518226623535, "learning_rate": 2.202744938170947e-05, "loss": 1.9508, "step": 91840 }, { "epoch": 6.240317977986139, "grad_norm": 3.8041937351226807, "learning_rate": 2.20232028808262e-05, "loss": 2.0474, "step": 91845 }, { "epoch": 6.240657698056801, "grad_norm": 3.3291516304016113, "learning_rate": 2.201895637994293e-05, "loss": 2.0924, "step": 91850 }, { "epoch": 6.240997418127463, "grad_norm": 3.9588942527770996, "learning_rate": 2.2014709879059655e-05, "loss": 2.0811, "step": 91855 }, { "epoch": 6.241337138198125, "grad_norm": 3.601405382156372, "learning_rate": 2.2010463378176383e-05, "loss": 2.1321, "step": 91860 }, { "epoch": 6.241676858268787, "grad_norm": 3.5649147033691406, "learning_rate": 2.200621687729311e-05, "loss": 2.0724, "step": 91865 }, { "epoch": 6.242016578339448, "grad_norm": 3.0442428588867188, "learning_rate": 2.200197037640984e-05, "loss": 1.8145, "step": 91870 }, { "epoch": 6.24235629841011, "grad_norm": 5.2926764488220215, "learning_rate": 2.1997723875526567e-05, "loss": 2.0237, "step": 91875 }, { "epoch": 6.242696018480772, "grad_norm": 3.027522325515747, "learning_rate": 2.1993477374643295e-05, "loss": 1.9206, "step": 91880 }, { "epoch": 6.243035738551433, "grad_norm": 3.7896246910095215, "learning_rate": 2.1989230873760023e-05, "loss": 2.0012, "step": 91885 }, { "epoch": 6.243375458622095, "grad_norm": 3.2572593688964844, "learning_rate": 2.198498437287675e-05, "loss": 2.0006, "step": 91890 }, { "epoch": 6.243715178692757, "grad_norm": 3.061126470565796, "learning_rate": 2.198073787199348e-05, "loss": 2.1953, "step": 91895 }, { "epoch": 6.244054898763419, "grad_norm": 4.142983436584473, "learning_rate": 2.1976491371110207e-05, "loss": 2.1345, "step": 91900 }, { "epoch": 6.244394618834081, "grad_norm": 4.099238395690918, "learning_rate": 2.1972244870226935e-05, "loss": 2.1876, "step": 91905 }, { "epoch": 6.244734338904743, "grad_norm": 4.3735785484313965, "learning_rate": 2.1967998369343663e-05, "loss": 2.3251, "step": 91910 }, { "epoch": 6.245074058975404, "grad_norm": 3.394318103790283, "learning_rate": 2.1963751868460388e-05, "loss": 2.017, "step": 91915 }, { "epoch": 6.245413779046066, "grad_norm": 4.2888875007629395, "learning_rate": 2.195950536757712e-05, "loss": 2.1038, "step": 91920 }, { "epoch": 6.245753499116728, "grad_norm": 3.8743677139282227, "learning_rate": 2.1955258866693844e-05, "loss": 2.2464, "step": 91925 }, { "epoch": 6.246093219187389, "grad_norm": 4.0059685707092285, "learning_rate": 2.1951012365810572e-05, "loss": 2.0282, "step": 91930 }, { "epoch": 6.246432939258051, "grad_norm": 3.9629714488983154, "learning_rate": 2.1946765864927303e-05, "loss": 1.939, "step": 91935 }, { "epoch": 6.246772659328713, "grad_norm": 3.411055564880371, "learning_rate": 2.1942519364044028e-05, "loss": 2.2044, "step": 91940 }, { "epoch": 6.247112379399375, "grad_norm": 3.7095468044281006, "learning_rate": 2.1938272863160756e-05, "loss": 2.0821, "step": 91945 }, { "epoch": 6.247452099470037, "grad_norm": 4.261784076690674, "learning_rate": 2.1934026362277484e-05, "loss": 1.9503, "step": 91950 }, { "epoch": 6.247791819540699, "grad_norm": 3.362027645111084, "learning_rate": 2.1929779861394212e-05, "loss": 2.0493, "step": 91955 }, { "epoch": 6.24813153961136, "grad_norm": 3.1889896392822266, "learning_rate": 2.192553336051094e-05, "loss": 2.0902, "step": 91960 }, { "epoch": 6.248471259682022, "grad_norm": 3.984426259994507, "learning_rate": 2.1921286859627668e-05, "loss": 2.053, "step": 91965 }, { "epoch": 6.248810979752684, "grad_norm": 3.4460525512695312, "learning_rate": 2.1917040358744396e-05, "loss": 2.0047, "step": 91970 }, { "epoch": 6.249150699823345, "grad_norm": 2.9137654304504395, "learning_rate": 2.1912793857861124e-05, "loss": 2.0335, "step": 91975 }, { "epoch": 6.249490419894007, "grad_norm": 3.5800859928131104, "learning_rate": 2.1908547356977852e-05, "loss": 2.0232, "step": 91980 }, { "epoch": 6.249830139964669, "grad_norm": 3.566896915435791, "learning_rate": 2.190430085609458e-05, "loss": 1.9184, "step": 91985 }, { "epoch": 6.250169860035331, "grad_norm": 3.271986722946167, "learning_rate": 2.1900054355211308e-05, "loss": 2.1262, "step": 91990 }, { "epoch": 6.250509580105993, "grad_norm": 3.2169036865234375, "learning_rate": 2.1895807854328036e-05, "loss": 2.0282, "step": 91995 }, { "epoch": 6.250849300176655, "grad_norm": 3.9422810077667236, "learning_rate": 2.189156135344476e-05, "loss": 2.1246, "step": 92000 }, { "epoch": 6.251189020247316, "grad_norm": 2.9152333736419678, "learning_rate": 2.1887314852561492e-05, "loss": 2.3286, "step": 92005 }, { "epoch": 6.251528740317978, "grad_norm": 3.03959584236145, "learning_rate": 2.1883068351678217e-05, "loss": 2.0773, "step": 92010 }, { "epoch": 6.25186846038864, "grad_norm": 2.84201979637146, "learning_rate": 2.1878821850794945e-05, "loss": 1.8849, "step": 92015 }, { "epoch": 6.252208180459301, "grad_norm": 3.9141998291015625, "learning_rate": 2.1874575349911676e-05, "loss": 2.0925, "step": 92020 }, { "epoch": 6.252547900529963, "grad_norm": 3.7750372886657715, "learning_rate": 2.18703288490284e-05, "loss": 2.0433, "step": 92025 }, { "epoch": 6.252887620600625, "grad_norm": 3.811718702316284, "learning_rate": 2.186608234814513e-05, "loss": 1.9251, "step": 92030 }, { "epoch": 6.253227340671287, "grad_norm": 3.0188255310058594, "learning_rate": 2.1861835847261857e-05, "loss": 2.0462, "step": 92035 }, { "epoch": 6.253567060741949, "grad_norm": 3.1268692016601562, "learning_rate": 2.1857589346378585e-05, "loss": 2.0596, "step": 92040 }, { "epoch": 6.253906780812611, "grad_norm": 3.1381711959838867, "learning_rate": 2.1853342845495313e-05, "loss": 2.2265, "step": 92045 }, { "epoch": 6.254246500883272, "grad_norm": 2.9306652545928955, "learning_rate": 2.184909634461204e-05, "loss": 2.1382, "step": 92050 }, { "epoch": 6.254586220953934, "grad_norm": 3.4830267429351807, "learning_rate": 2.184484984372877e-05, "loss": 2.062, "step": 92055 }, { "epoch": 6.254925941024596, "grad_norm": 3.516663074493408, "learning_rate": 2.1840603342845497e-05, "loss": 2.1521, "step": 92060 }, { "epoch": 6.255265661095257, "grad_norm": 4.01127290725708, "learning_rate": 2.1836356841962225e-05, "loss": 1.9632, "step": 92065 }, { "epoch": 6.255605381165919, "grad_norm": 3.8949522972106934, "learning_rate": 2.1832110341078953e-05, "loss": 1.8042, "step": 92070 }, { "epoch": 6.255945101236581, "grad_norm": 3.573599100112915, "learning_rate": 2.182786384019568e-05, "loss": 2.2671, "step": 92075 }, { "epoch": 6.256284821307243, "grad_norm": 3.5825021266937256, "learning_rate": 2.182361733931241e-05, "loss": 2.0415, "step": 92080 }, { "epoch": 6.256624541377905, "grad_norm": 3.8059704303741455, "learning_rate": 2.1819370838429133e-05, "loss": 1.987, "step": 92085 }, { "epoch": 6.256964261448567, "grad_norm": 2.7255473136901855, "learning_rate": 2.1815124337545865e-05, "loss": 2.006, "step": 92090 }, { "epoch": 6.257303981519228, "grad_norm": 4.327844142913818, "learning_rate": 2.181087783666259e-05, "loss": 1.7555, "step": 92095 }, { "epoch": 6.25764370158989, "grad_norm": 3.35943341255188, "learning_rate": 2.1806631335779317e-05, "loss": 2.1506, "step": 92100 }, { "epoch": 6.257983421660552, "grad_norm": 3.1440203189849854, "learning_rate": 2.180238483489605e-05, "loss": 2.1641, "step": 92105 }, { "epoch": 6.258323141731213, "grad_norm": 3.701590061187744, "learning_rate": 2.1798138334012773e-05, "loss": 2.1186, "step": 92110 }, { "epoch": 6.258662861801875, "grad_norm": 4.072931289672852, "learning_rate": 2.17938918331295e-05, "loss": 2.1546, "step": 92115 }, { "epoch": 6.259002581872537, "grad_norm": 3.1395246982574463, "learning_rate": 2.178964533224623e-05, "loss": 2.3195, "step": 92120 }, { "epoch": 6.259342301943199, "grad_norm": 3.941009759902954, "learning_rate": 2.1785398831362957e-05, "loss": 1.8458, "step": 92125 }, { "epoch": 6.259682022013861, "grad_norm": 3.8266255855560303, "learning_rate": 2.1781152330479685e-05, "loss": 2.1694, "step": 92130 }, { "epoch": 6.260021742084522, "grad_norm": 3.0079636573791504, "learning_rate": 2.1776905829596413e-05, "loss": 2.0017, "step": 92135 }, { "epoch": 6.260361462155184, "grad_norm": 4.352864742279053, "learning_rate": 2.177265932871314e-05, "loss": 2.1307, "step": 92140 }, { "epoch": 6.260701182225846, "grad_norm": 3.2082178592681885, "learning_rate": 2.176841282782987e-05, "loss": 1.8293, "step": 92145 }, { "epoch": 6.261040902296507, "grad_norm": 3.1547067165374756, "learning_rate": 2.1764166326946597e-05, "loss": 2.0941, "step": 92150 }, { "epoch": 6.261380622367169, "grad_norm": 4.684684753417969, "learning_rate": 2.1759919826063325e-05, "loss": 2.1084, "step": 92155 }, { "epoch": 6.261720342437831, "grad_norm": 3.616912841796875, "learning_rate": 2.1755673325180053e-05, "loss": 2.2174, "step": 92160 }, { "epoch": 6.262060062508493, "grad_norm": 3.921924114227295, "learning_rate": 2.175142682429678e-05, "loss": 2.216, "step": 92165 }, { "epoch": 6.262399782579155, "grad_norm": 3.858123779296875, "learning_rate": 2.1747180323413506e-05, "loss": 1.9463, "step": 92170 }, { "epoch": 6.262739502649817, "grad_norm": 2.750429153442383, "learning_rate": 2.1742933822530237e-05, "loss": 2.2213, "step": 92175 }, { "epoch": 6.263079222720478, "grad_norm": 4.175832748413086, "learning_rate": 2.1738687321646965e-05, "loss": 2.0895, "step": 92180 }, { "epoch": 6.26341894279114, "grad_norm": 3.8666634559631348, "learning_rate": 2.173444082076369e-05, "loss": 2.008, "step": 92185 }, { "epoch": 6.263758662861802, "grad_norm": 3.5558371543884277, "learning_rate": 2.173019431988042e-05, "loss": 2.1165, "step": 92190 }, { "epoch": 6.264098382932463, "grad_norm": 3.893692970275879, "learning_rate": 2.1725947818997146e-05, "loss": 2.1781, "step": 92195 }, { "epoch": 6.264438103003125, "grad_norm": 4.202792644500732, "learning_rate": 2.1721701318113874e-05, "loss": 2.1158, "step": 92200 }, { "epoch": 6.264777823073787, "grad_norm": 3.3694283962249756, "learning_rate": 2.1717454817230602e-05, "loss": 2.0091, "step": 92205 }, { "epoch": 6.265117543144449, "grad_norm": 3.6362550258636475, "learning_rate": 2.171320831634733e-05, "loss": 2.1357, "step": 92210 }, { "epoch": 6.265457263215111, "grad_norm": 3.756934642791748, "learning_rate": 2.1708961815464058e-05, "loss": 2.1991, "step": 92215 }, { "epoch": 6.265796983285773, "grad_norm": 4.917183876037598, "learning_rate": 2.1704715314580786e-05, "loss": 2.0019, "step": 92220 }, { "epoch": 6.266136703356434, "grad_norm": 3.4422764778137207, "learning_rate": 2.1700468813697514e-05, "loss": 2.1742, "step": 92225 }, { "epoch": 6.266476423427096, "grad_norm": 3.0785555839538574, "learning_rate": 2.1696222312814242e-05, "loss": 1.853, "step": 92230 }, { "epoch": 6.266816143497758, "grad_norm": 4.786409854888916, "learning_rate": 2.169197581193097e-05, "loss": 2.0875, "step": 92235 }, { "epoch": 6.267155863568419, "grad_norm": 3.6294407844543457, "learning_rate": 2.1687729311047698e-05, "loss": 1.8553, "step": 92240 }, { "epoch": 6.267495583639081, "grad_norm": 3.5480797290802, "learning_rate": 2.1683482810164426e-05, "loss": 2.3239, "step": 92245 }, { "epoch": 6.267835303709743, "grad_norm": 4.2196574211120605, "learning_rate": 2.1679236309281154e-05, "loss": 2.1924, "step": 92250 }, { "epoch": 6.268175023780405, "grad_norm": 4.1579389572143555, "learning_rate": 2.167498980839788e-05, "loss": 2.3244, "step": 92255 }, { "epoch": 6.268514743851067, "grad_norm": 3.1041829586029053, "learning_rate": 2.167074330751461e-05, "loss": 2.2285, "step": 92260 }, { "epoch": 6.268854463921729, "grad_norm": 3.3332526683807373, "learning_rate": 2.1666496806631338e-05, "loss": 2.1962, "step": 92265 }, { "epoch": 6.26919418399239, "grad_norm": 3.786626100540161, "learning_rate": 2.1662250305748063e-05, "loss": 2.029, "step": 92270 }, { "epoch": 6.269533904063052, "grad_norm": 4.274959564208984, "learning_rate": 2.1658003804864794e-05, "loss": 2.1158, "step": 92275 }, { "epoch": 6.269873624133714, "grad_norm": 3.4005069732666016, "learning_rate": 2.165375730398152e-05, "loss": 1.8667, "step": 92280 }, { "epoch": 6.270213344204375, "grad_norm": 3.5466837882995605, "learning_rate": 2.1649510803098247e-05, "loss": 2.3538, "step": 92285 }, { "epoch": 6.270553064275037, "grad_norm": 4.6721601486206055, "learning_rate": 2.1645264302214975e-05, "loss": 2.2698, "step": 92290 }, { "epoch": 6.270892784345699, "grad_norm": 3.764373302459717, "learning_rate": 2.1641017801331703e-05, "loss": 2.0678, "step": 92295 }, { "epoch": 6.271232504416361, "grad_norm": 2.6616554260253906, "learning_rate": 2.163677130044843e-05, "loss": 1.8696, "step": 92300 }, { "epoch": 6.271572224487023, "grad_norm": 3.154217481613159, "learning_rate": 2.163252479956516e-05, "loss": 2.0019, "step": 92305 }, { "epoch": 6.271911944557685, "grad_norm": 3.1100223064422607, "learning_rate": 2.1628278298681887e-05, "loss": 2.3405, "step": 92310 }, { "epoch": 6.272251664628346, "grad_norm": 3.7363104820251465, "learning_rate": 2.1624031797798615e-05, "loss": 2.2491, "step": 92315 }, { "epoch": 6.272591384699008, "grad_norm": 4.006425857543945, "learning_rate": 2.1619785296915343e-05, "loss": 2.2346, "step": 92320 }, { "epoch": 6.27293110476967, "grad_norm": 3.1644275188446045, "learning_rate": 2.161553879603207e-05, "loss": 2.1672, "step": 92325 }, { "epoch": 6.273270824840331, "grad_norm": 3.7966320514678955, "learning_rate": 2.16112922951488e-05, "loss": 2.0841, "step": 92330 }, { "epoch": 6.273610544910993, "grad_norm": 3.3752927780151367, "learning_rate": 2.1607045794265527e-05, "loss": 2.0808, "step": 92335 }, { "epoch": 6.273950264981655, "grad_norm": 4.274245738983154, "learning_rate": 2.160279929338225e-05, "loss": 2.1128, "step": 92340 }, { "epoch": 6.274289985052317, "grad_norm": 2.915858268737793, "learning_rate": 2.1598552792498983e-05, "loss": 2.0181, "step": 92345 }, { "epoch": 6.274629705122979, "grad_norm": 4.019103527069092, "learning_rate": 2.159430629161571e-05, "loss": 2.1054, "step": 92350 }, { "epoch": 6.274969425193641, "grad_norm": 3.9818215370178223, "learning_rate": 2.1590059790732436e-05, "loss": 1.9008, "step": 92355 }, { "epoch": 6.275309145264302, "grad_norm": 3.300422191619873, "learning_rate": 2.1585813289849167e-05, "loss": 2.0408, "step": 92360 }, { "epoch": 6.275648865334964, "grad_norm": 3.6176326274871826, "learning_rate": 2.158156678896589e-05, "loss": 1.9693, "step": 92365 }, { "epoch": 6.275988585405626, "grad_norm": 3.4033238887786865, "learning_rate": 2.157732028808262e-05, "loss": 2.0828, "step": 92370 }, { "epoch": 6.276328305476287, "grad_norm": 3.3150432109832764, "learning_rate": 2.157307378719935e-05, "loss": 2.0737, "step": 92375 }, { "epoch": 6.276668025546949, "grad_norm": 3.7947676181793213, "learning_rate": 2.1568827286316076e-05, "loss": 1.9334, "step": 92380 }, { "epoch": 6.277007745617611, "grad_norm": 2.809171199798584, "learning_rate": 2.1564580785432804e-05, "loss": 2.3356, "step": 92385 }, { "epoch": 6.277347465688273, "grad_norm": 4.6357197761535645, "learning_rate": 2.156033428454953e-05, "loss": 1.97, "step": 92390 }, { "epoch": 6.277687185758935, "grad_norm": 2.362764358520508, "learning_rate": 2.155608778366626e-05, "loss": 2.0226, "step": 92395 }, { "epoch": 6.278026905829597, "grad_norm": 3.1938636302948, "learning_rate": 2.1551841282782988e-05, "loss": 1.911, "step": 92400 }, { "epoch": 6.278366625900258, "grad_norm": 3.831845998764038, "learning_rate": 2.1547594781899716e-05, "loss": 2.1478, "step": 92405 }, { "epoch": 6.27870634597092, "grad_norm": 3.1713247299194336, "learning_rate": 2.1543348281016444e-05, "loss": 2.0419, "step": 92410 }, { "epoch": 6.279046066041582, "grad_norm": 4.274514198303223, "learning_rate": 2.153910178013317e-05, "loss": 1.967, "step": 92415 }, { "epoch": 6.279385786112243, "grad_norm": 3.7759385108947754, "learning_rate": 2.15348552792499e-05, "loss": 1.8644, "step": 92420 }, { "epoch": 6.279725506182905, "grad_norm": 3.089057207107544, "learning_rate": 2.1530608778366628e-05, "loss": 2.2609, "step": 92425 }, { "epoch": 6.2800652262535674, "grad_norm": 3.714200973510742, "learning_rate": 2.1526362277483356e-05, "loss": 2.0167, "step": 92430 }, { "epoch": 6.280404946324229, "grad_norm": 3.990462303161621, "learning_rate": 2.1522115776600084e-05, "loss": 2.2056, "step": 92435 }, { "epoch": 6.280744666394891, "grad_norm": 3.8145556449890137, "learning_rate": 2.1517869275716808e-05, "loss": 2.0349, "step": 92440 }, { "epoch": 6.281084386465553, "grad_norm": 5.004904270172119, "learning_rate": 2.151362277483354e-05, "loss": 2.1701, "step": 92445 }, { "epoch": 6.281424106536214, "grad_norm": 3.648780345916748, "learning_rate": 2.1509376273950264e-05, "loss": 2.0897, "step": 92450 }, { "epoch": 6.281763826606876, "grad_norm": 3.568758964538574, "learning_rate": 2.1505129773066992e-05, "loss": 2.1871, "step": 92455 }, { "epoch": 6.282103546677538, "grad_norm": 3.6645095348358154, "learning_rate": 2.1500883272183724e-05, "loss": 2.3784, "step": 92460 }, { "epoch": 6.282443266748199, "grad_norm": 4.099963188171387, "learning_rate": 2.149663677130045e-05, "loss": 1.845, "step": 92465 }, { "epoch": 6.282782986818861, "grad_norm": 3.6466052532196045, "learning_rate": 2.1492390270417176e-05, "loss": 2.034, "step": 92470 }, { "epoch": 6.2831227068895235, "grad_norm": 4.652171611785889, "learning_rate": 2.1488143769533904e-05, "loss": 2.0338, "step": 92475 }, { "epoch": 6.283462426960185, "grad_norm": 4.006595611572266, "learning_rate": 2.1483897268650632e-05, "loss": 2.3129, "step": 92480 }, { "epoch": 6.283802147030847, "grad_norm": 2.976003885269165, "learning_rate": 2.147965076776736e-05, "loss": 2.1206, "step": 92485 }, { "epoch": 6.284141867101509, "grad_norm": 3.507209300994873, "learning_rate": 2.147540426688409e-05, "loss": 1.686, "step": 92490 }, { "epoch": 6.28448158717217, "grad_norm": 3.9246559143066406, "learning_rate": 2.1471157766000816e-05, "loss": 2.0247, "step": 92495 }, { "epoch": 6.284821307242832, "grad_norm": 4.9154253005981445, "learning_rate": 2.1466911265117544e-05, "loss": 1.9629, "step": 92500 }, { "epoch": 6.285161027313494, "grad_norm": 4.6356048583984375, "learning_rate": 2.1462664764234272e-05, "loss": 2.2588, "step": 92505 }, { "epoch": 6.285500747384155, "grad_norm": 3.4243695735931396, "learning_rate": 2.1458418263351e-05, "loss": 2.0271, "step": 92510 }, { "epoch": 6.285840467454817, "grad_norm": 3.069464683532715, "learning_rate": 2.145417176246773e-05, "loss": 2.065, "step": 92515 }, { "epoch": 6.2861801875254795, "grad_norm": 3.6634538173675537, "learning_rate": 2.1449925261584456e-05, "loss": 2.096, "step": 92520 }, { "epoch": 6.286519907596141, "grad_norm": 3.7193868160247803, "learning_rate": 2.144567876070118e-05, "loss": 2.0311, "step": 92525 }, { "epoch": 6.286859627666803, "grad_norm": 3.7195732593536377, "learning_rate": 2.1441432259817912e-05, "loss": 2.3997, "step": 92530 }, { "epoch": 6.287199347737464, "grad_norm": 3.0419540405273438, "learning_rate": 2.1437185758934637e-05, "loss": 2.0264, "step": 92535 }, { "epoch": 6.287539067808126, "grad_norm": 5.052950859069824, "learning_rate": 2.1432939258051365e-05, "loss": 2.303, "step": 92540 }, { "epoch": 6.287878787878788, "grad_norm": 3.3611323833465576, "learning_rate": 2.1428692757168096e-05, "loss": 2.1711, "step": 92545 }, { "epoch": 6.288218507949449, "grad_norm": 3.0309481620788574, "learning_rate": 2.142444625628482e-05, "loss": 2.0503, "step": 92550 }, { "epoch": 6.288558228020111, "grad_norm": 2.6630280017852783, "learning_rate": 2.142019975540155e-05, "loss": 1.8976, "step": 92555 }, { "epoch": 6.288897948090773, "grad_norm": 4.14168119430542, "learning_rate": 2.1415953254518277e-05, "loss": 2.2888, "step": 92560 }, { "epoch": 6.289237668161435, "grad_norm": 3.6841776371002197, "learning_rate": 2.1411706753635005e-05, "loss": 1.9305, "step": 92565 }, { "epoch": 6.289577388232097, "grad_norm": 4.8098649978637695, "learning_rate": 2.1407460252751736e-05, "loss": 2.2088, "step": 92570 }, { "epoch": 6.289917108302759, "grad_norm": 3.569690227508545, "learning_rate": 2.140321375186846e-05, "loss": 2.1989, "step": 92575 }, { "epoch": 6.29025682837342, "grad_norm": 3.9751224517822266, "learning_rate": 2.139896725098519e-05, "loss": 1.935, "step": 92580 }, { "epoch": 6.290596548444082, "grad_norm": 3.8192522525787354, "learning_rate": 2.1394720750101917e-05, "loss": 2.198, "step": 92585 }, { "epoch": 6.290936268514744, "grad_norm": 3.605238676071167, "learning_rate": 2.1390474249218645e-05, "loss": 2.2238, "step": 92590 }, { "epoch": 6.291275988585405, "grad_norm": 4.002871990203857, "learning_rate": 2.1386227748335373e-05, "loss": 1.8998, "step": 92595 }, { "epoch": 6.291615708656067, "grad_norm": 4.344473361968994, "learning_rate": 2.13819812474521e-05, "loss": 2.1389, "step": 92600 }, { "epoch": 6.291955428726729, "grad_norm": 3.861757278442383, "learning_rate": 2.137773474656883e-05, "loss": 2.123, "step": 92605 }, { "epoch": 6.292295148797391, "grad_norm": 3.700483560562134, "learning_rate": 2.1373488245685554e-05, "loss": 2.029, "step": 92610 }, { "epoch": 6.292634868868053, "grad_norm": 3.774171829223633, "learning_rate": 2.1369241744802285e-05, "loss": 2.3067, "step": 92615 }, { "epoch": 6.292974588938715, "grad_norm": 3.1315736770629883, "learning_rate": 2.1364995243919013e-05, "loss": 2.0652, "step": 92620 }, { "epoch": 6.293314309009376, "grad_norm": 3.543825387954712, "learning_rate": 2.1360748743035738e-05, "loss": 1.8872, "step": 92625 }, { "epoch": 6.293654029080038, "grad_norm": 4.663013458251953, "learning_rate": 2.135650224215247e-05, "loss": 2.1041, "step": 92630 }, { "epoch": 6.2939937491507, "grad_norm": 3.155916213989258, "learning_rate": 2.1352255741269194e-05, "loss": 2.2217, "step": 92635 }, { "epoch": 6.294333469221361, "grad_norm": 2.9743316173553467, "learning_rate": 2.1348009240385922e-05, "loss": 2.163, "step": 92640 }, { "epoch": 6.294673189292023, "grad_norm": 3.1940724849700928, "learning_rate": 2.134376273950265e-05, "loss": 2.0104, "step": 92645 }, { "epoch": 6.295012909362685, "grad_norm": 3.313805103302002, "learning_rate": 2.1339516238619378e-05, "loss": 2.0218, "step": 92650 }, { "epoch": 6.295352629433347, "grad_norm": 3.684013843536377, "learning_rate": 2.133526973773611e-05, "loss": 2.3015, "step": 92655 }, { "epoch": 6.295692349504009, "grad_norm": 3.221442461013794, "learning_rate": 2.1331023236852834e-05, "loss": 2.2002, "step": 92660 }, { "epoch": 6.296032069574671, "grad_norm": 3.424125909805298, "learning_rate": 2.1326776735969562e-05, "loss": 2.1463, "step": 92665 }, { "epoch": 6.296371789645332, "grad_norm": 3.0593109130859375, "learning_rate": 2.132253023508629e-05, "loss": 1.9216, "step": 92670 }, { "epoch": 6.296711509715994, "grad_norm": 3.151768445968628, "learning_rate": 2.1318283734203018e-05, "loss": 2.0503, "step": 92675 }, { "epoch": 6.297051229786656, "grad_norm": 3.8508098125457764, "learning_rate": 2.1314037233319746e-05, "loss": 2.0243, "step": 92680 }, { "epoch": 6.297390949857317, "grad_norm": 4.185088157653809, "learning_rate": 2.1309790732436474e-05, "loss": 1.9871, "step": 92685 }, { "epoch": 6.297730669927979, "grad_norm": 3.4336321353912354, "learning_rate": 2.1305544231553202e-05, "loss": 2.038, "step": 92690 }, { "epoch": 6.298070389998641, "grad_norm": 3.503941774368286, "learning_rate": 2.1301297730669927e-05, "loss": 2.0051, "step": 92695 }, { "epoch": 6.298410110069303, "grad_norm": 3.5597238540649414, "learning_rate": 2.1297051229786658e-05, "loss": 1.9155, "step": 92700 }, { "epoch": 6.298749830139965, "grad_norm": 4.427391529083252, "learning_rate": 2.1292804728903386e-05, "loss": 1.8576, "step": 92705 }, { "epoch": 6.299089550210627, "grad_norm": 4.035543918609619, "learning_rate": 2.128855822802011e-05, "loss": 2.1045, "step": 92710 }, { "epoch": 6.299429270281288, "grad_norm": 4.0678582191467285, "learning_rate": 2.1284311727136842e-05, "loss": 2.5952, "step": 92715 }, { "epoch": 6.29976899035195, "grad_norm": 3.81203556060791, "learning_rate": 2.1280065226253567e-05, "loss": 2.2579, "step": 92720 }, { "epoch": 6.300108710422612, "grad_norm": 3.6890432834625244, "learning_rate": 2.1275818725370295e-05, "loss": 2.2358, "step": 92725 }, { "epoch": 6.300448430493273, "grad_norm": 3.1426424980163574, "learning_rate": 2.1271572224487023e-05, "loss": 2.2776, "step": 92730 }, { "epoch": 6.300788150563935, "grad_norm": 3.4563708305358887, "learning_rate": 2.126732572360375e-05, "loss": 2.0726, "step": 92735 }, { "epoch": 6.3011278706345974, "grad_norm": 4.031399250030518, "learning_rate": 2.1263079222720482e-05, "loss": 2.247, "step": 92740 }, { "epoch": 6.301467590705259, "grad_norm": 3.136796474456787, "learning_rate": 2.1258832721837207e-05, "loss": 2.1058, "step": 92745 }, { "epoch": 6.301807310775921, "grad_norm": 2.8495986461639404, "learning_rate": 2.1254586220953935e-05, "loss": 1.8198, "step": 92750 }, { "epoch": 6.302147030846583, "grad_norm": 3.320878744125366, "learning_rate": 2.1250339720070663e-05, "loss": 1.8711, "step": 92755 }, { "epoch": 6.302486750917244, "grad_norm": 3.6702232360839844, "learning_rate": 2.124609321918739e-05, "loss": 2.2236, "step": 92760 }, { "epoch": 6.302826470987906, "grad_norm": 3.156686544418335, "learning_rate": 2.124184671830412e-05, "loss": 2.0157, "step": 92765 }, { "epoch": 6.303166191058568, "grad_norm": 2.9301834106445312, "learning_rate": 2.1237600217420847e-05, "loss": 2.1468, "step": 92770 }, { "epoch": 6.303505911129229, "grad_norm": 3.877368688583374, "learning_rate": 2.1233353716537575e-05, "loss": 2.1182, "step": 92775 }, { "epoch": 6.303845631199891, "grad_norm": 4.47433614730835, "learning_rate": 2.12291072156543e-05, "loss": 2.2344, "step": 92780 }, { "epoch": 6.3041853512705535, "grad_norm": 3.288593053817749, "learning_rate": 2.122486071477103e-05, "loss": 1.9884, "step": 92785 }, { "epoch": 6.304525071341215, "grad_norm": 3.753206491470337, "learning_rate": 2.122061421388776e-05, "loss": 2.0978, "step": 92790 }, { "epoch": 6.304864791411877, "grad_norm": 2.9522833824157715, "learning_rate": 2.1216367713004483e-05, "loss": 2.1928, "step": 92795 }, { "epoch": 6.305204511482539, "grad_norm": 3.9984612464904785, "learning_rate": 2.1212121212121215e-05, "loss": 2.0466, "step": 92800 }, { "epoch": 6.3055442315532, "grad_norm": 2.9993155002593994, "learning_rate": 2.120787471123794e-05, "loss": 2.1139, "step": 92805 }, { "epoch": 6.305883951623862, "grad_norm": 4.30881404876709, "learning_rate": 2.1203628210354667e-05, "loss": 1.9253, "step": 92810 }, { "epoch": 6.306223671694523, "grad_norm": 5.149408340454102, "learning_rate": 2.11993817094714e-05, "loss": 1.8398, "step": 92815 }, { "epoch": 6.306563391765185, "grad_norm": 2.8494949340820312, "learning_rate": 2.1195135208588123e-05, "loss": 1.8425, "step": 92820 }, { "epoch": 6.306903111835847, "grad_norm": 3.155806064605713, "learning_rate": 2.1190888707704855e-05, "loss": 2.1492, "step": 92825 }, { "epoch": 6.307242831906509, "grad_norm": 3.6011979579925537, "learning_rate": 2.118664220682158e-05, "loss": 2.2683, "step": 92830 }, { "epoch": 6.307582551977171, "grad_norm": 5.207964897155762, "learning_rate": 2.1182395705938307e-05, "loss": 2.1776, "step": 92835 }, { "epoch": 6.307922272047833, "grad_norm": 3.666945457458496, "learning_rate": 2.1178149205055035e-05, "loss": 2.2215, "step": 92840 }, { "epoch": 6.308261992118494, "grad_norm": 4.387583255767822, "learning_rate": 2.1173902704171763e-05, "loss": 2.1408, "step": 92845 }, { "epoch": 6.308601712189156, "grad_norm": 3.588390350341797, "learning_rate": 2.116965620328849e-05, "loss": 2.1106, "step": 92850 }, { "epoch": 6.308941432259818, "grad_norm": 4.132058620452881, "learning_rate": 2.116540970240522e-05, "loss": 2.1516, "step": 92855 }, { "epoch": 6.309281152330479, "grad_norm": 4.003138542175293, "learning_rate": 2.1161163201521947e-05, "loss": 2.1547, "step": 92860 }, { "epoch": 6.309620872401141, "grad_norm": 4.626856803894043, "learning_rate": 2.1156916700638675e-05, "loss": 2.3342, "step": 92865 }, { "epoch": 6.309960592471803, "grad_norm": 3.972039222717285, "learning_rate": 2.1152670199755403e-05, "loss": 2.1334, "step": 92870 }, { "epoch": 6.310300312542465, "grad_norm": 3.484053373336792, "learning_rate": 2.114842369887213e-05, "loss": 2.2467, "step": 92875 }, { "epoch": 6.310640032613127, "grad_norm": 3.970757007598877, "learning_rate": 2.1144177197988856e-05, "loss": 1.929, "step": 92880 }, { "epoch": 6.310979752683789, "grad_norm": 3.763747453689575, "learning_rate": 2.1139930697105587e-05, "loss": 2.2276, "step": 92885 }, { "epoch": 6.31131947275445, "grad_norm": 3.4570086002349854, "learning_rate": 2.1135684196222312e-05, "loss": 2.0263, "step": 92890 }, { "epoch": 6.311659192825112, "grad_norm": 3.389039993286133, "learning_rate": 2.113143769533904e-05, "loss": 2.0392, "step": 92895 }, { "epoch": 6.311998912895774, "grad_norm": 3.305647850036621, "learning_rate": 2.112719119445577e-05, "loss": 2.2571, "step": 92900 }, { "epoch": 6.312338632966435, "grad_norm": 2.9580116271972656, "learning_rate": 2.1122944693572496e-05, "loss": 2.0434, "step": 92905 }, { "epoch": 6.312678353037097, "grad_norm": 3.7104082107543945, "learning_rate": 2.1118698192689227e-05, "loss": 1.9684, "step": 92910 }, { "epoch": 6.313018073107759, "grad_norm": 3.9172048568725586, "learning_rate": 2.1114451691805952e-05, "loss": 1.7647, "step": 92915 }, { "epoch": 6.313357793178421, "grad_norm": 3.237241744995117, "learning_rate": 2.111020519092268e-05, "loss": 1.8592, "step": 92920 }, { "epoch": 6.313697513249083, "grad_norm": 3.665386915206909, "learning_rate": 2.1105958690039408e-05, "loss": 2.0924, "step": 92925 }, { "epoch": 6.314037233319745, "grad_norm": 3.229092836380005, "learning_rate": 2.1101712189156136e-05, "loss": 2.1119, "step": 92930 }, { "epoch": 6.314376953390406, "grad_norm": 4.731953144073486, "learning_rate": 2.1097465688272864e-05, "loss": 1.9519, "step": 92935 }, { "epoch": 6.314716673461068, "grad_norm": 2.945467472076416, "learning_rate": 2.1093219187389592e-05, "loss": 1.9728, "step": 92940 }, { "epoch": 6.31505639353173, "grad_norm": 2.894850015640259, "learning_rate": 2.108897268650632e-05, "loss": 2.1953, "step": 92945 }, { "epoch": 6.315396113602391, "grad_norm": 3.6157643795013428, "learning_rate": 2.1084726185623048e-05, "loss": 1.8558, "step": 92950 }, { "epoch": 6.315735833673053, "grad_norm": 3.6656086444854736, "learning_rate": 2.1080479684739776e-05, "loss": 1.9884, "step": 92955 }, { "epoch": 6.316075553743715, "grad_norm": 3.130997896194458, "learning_rate": 2.1076233183856504e-05, "loss": 2.1389, "step": 92960 }, { "epoch": 6.316415273814377, "grad_norm": 3.0670785903930664, "learning_rate": 2.107198668297323e-05, "loss": 2.0428, "step": 92965 }, { "epoch": 6.316754993885039, "grad_norm": 4.173455238342285, "learning_rate": 2.106774018208996e-05, "loss": 2.0267, "step": 92970 }, { "epoch": 6.317094713955701, "grad_norm": 3.152541160583496, "learning_rate": 2.1063493681206685e-05, "loss": 2.2635, "step": 92975 }, { "epoch": 6.317434434026362, "grad_norm": 4.1747870445251465, "learning_rate": 2.1059247180323413e-05, "loss": 2.1111, "step": 92980 }, { "epoch": 6.317774154097024, "grad_norm": 3.91847562789917, "learning_rate": 2.1055000679440144e-05, "loss": 2.0121, "step": 92985 }, { "epoch": 6.318113874167686, "grad_norm": 2.8252129554748535, "learning_rate": 2.105075417855687e-05, "loss": 2.183, "step": 92990 }, { "epoch": 6.318453594238347, "grad_norm": 4.251608371734619, "learning_rate": 2.10465076776736e-05, "loss": 2.1053, "step": 92995 }, { "epoch": 6.318793314309009, "grad_norm": 4.02899694442749, "learning_rate": 2.1042261176790325e-05, "loss": 2.0102, "step": 93000 }, { "epoch": 6.319133034379671, "grad_norm": 3.6709060668945312, "learning_rate": 2.1038014675907053e-05, "loss": 2.2997, "step": 93005 }, { "epoch": 6.319472754450333, "grad_norm": 3.195242404937744, "learning_rate": 2.103376817502378e-05, "loss": 2.1494, "step": 93010 }, { "epoch": 6.319812474520995, "grad_norm": 3.867239475250244, "learning_rate": 2.102952167414051e-05, "loss": 1.6856, "step": 93015 }, { "epoch": 6.320152194591657, "grad_norm": 4.228468418121338, "learning_rate": 2.1025275173257237e-05, "loss": 2.0666, "step": 93020 }, { "epoch": 6.320491914662318, "grad_norm": 3.9828968048095703, "learning_rate": 2.1021028672373965e-05, "loss": 1.9845, "step": 93025 }, { "epoch": 6.32083163473298, "grad_norm": 3.240929365158081, "learning_rate": 2.1016782171490693e-05, "loss": 2.0375, "step": 93030 }, { "epoch": 6.321171354803642, "grad_norm": 3.641018867492676, "learning_rate": 2.101253567060742e-05, "loss": 2.101, "step": 93035 }, { "epoch": 6.321511074874303, "grad_norm": 3.0739712715148926, "learning_rate": 2.100828916972415e-05, "loss": 2.0256, "step": 93040 }, { "epoch": 6.321850794944965, "grad_norm": 3.493239641189575, "learning_rate": 2.1004042668840877e-05, "loss": 1.9215, "step": 93045 }, { "epoch": 6.3221905150156275, "grad_norm": 5.298831462860107, "learning_rate": 2.09997961679576e-05, "loss": 1.8377, "step": 93050 }, { "epoch": 6.322530235086289, "grad_norm": 4.225757122039795, "learning_rate": 2.0995549667074333e-05, "loss": 1.8866, "step": 93055 }, { "epoch": 6.322869955156951, "grad_norm": 4.9672465324401855, "learning_rate": 2.0991303166191058e-05, "loss": 1.9618, "step": 93060 }, { "epoch": 6.323209675227613, "grad_norm": 4.026387691497803, "learning_rate": 2.0987056665307786e-05, "loss": 1.968, "step": 93065 }, { "epoch": 6.323549395298274, "grad_norm": 3.912916421890259, "learning_rate": 2.0982810164424517e-05, "loss": 1.996, "step": 93070 }, { "epoch": 6.323889115368936, "grad_norm": 4.464892864227295, "learning_rate": 2.097856366354124e-05, "loss": 2.1563, "step": 93075 }, { "epoch": 6.324228835439598, "grad_norm": 3.732811212539673, "learning_rate": 2.0974317162657973e-05, "loss": 1.9388, "step": 93080 }, { "epoch": 6.324568555510259, "grad_norm": 2.99306058883667, "learning_rate": 2.0970070661774698e-05, "loss": 2.0201, "step": 93085 }, { "epoch": 6.324908275580921, "grad_norm": 4.19094705581665, "learning_rate": 2.0965824160891426e-05, "loss": 2.2719, "step": 93090 }, { "epoch": 6.3252479956515835, "grad_norm": 4.024192810058594, "learning_rate": 2.0961577660008157e-05, "loss": 1.9968, "step": 93095 }, { "epoch": 6.325587715722245, "grad_norm": 4.044684410095215, "learning_rate": 2.095733115912488e-05, "loss": 1.8676, "step": 93100 }, { "epoch": 6.325927435792907, "grad_norm": 3.5379385948181152, "learning_rate": 2.095308465824161e-05, "loss": 2.2482, "step": 93105 }, { "epoch": 6.326267155863569, "grad_norm": 3.6370575428009033, "learning_rate": 2.0948838157358338e-05, "loss": 2.0163, "step": 93110 }, { "epoch": 6.32660687593423, "grad_norm": 3.9649951457977295, "learning_rate": 2.0944591656475066e-05, "loss": 2.0777, "step": 93115 }, { "epoch": 6.326946596004892, "grad_norm": 3.178173065185547, "learning_rate": 2.0940345155591794e-05, "loss": 2.323, "step": 93120 }, { "epoch": 6.327286316075554, "grad_norm": 3.0391366481781006, "learning_rate": 2.093609865470852e-05, "loss": 2.0955, "step": 93125 }, { "epoch": 6.327626036146215, "grad_norm": 3.5632028579711914, "learning_rate": 2.093185215382525e-05, "loss": 2.1679, "step": 93130 }, { "epoch": 6.327965756216877, "grad_norm": 4.427238464355469, "learning_rate": 2.0927605652941974e-05, "loss": 2.0196, "step": 93135 }, { "epoch": 6.3283054762875395, "grad_norm": 4.889432430267334, "learning_rate": 2.0923359152058706e-05, "loss": 1.9728, "step": 93140 }, { "epoch": 6.328645196358201, "grad_norm": 3.983759641647339, "learning_rate": 2.0919112651175434e-05, "loss": 1.8631, "step": 93145 }, { "epoch": 6.328984916428863, "grad_norm": 3.184109926223755, "learning_rate": 2.0914866150292158e-05, "loss": 2.2365, "step": 93150 }, { "epoch": 6.329324636499525, "grad_norm": 3.269580364227295, "learning_rate": 2.091061964940889e-05, "loss": 2.159, "step": 93155 }, { "epoch": 6.329664356570186, "grad_norm": 4.191102504730225, "learning_rate": 2.0906373148525614e-05, "loss": 2.2642, "step": 93160 }, { "epoch": 6.330004076640848, "grad_norm": 3.632040023803711, "learning_rate": 2.0902126647642346e-05, "loss": 1.9919, "step": 93165 }, { "epoch": 6.33034379671151, "grad_norm": 3.3163554668426514, "learning_rate": 2.089788014675907e-05, "loss": 2.0198, "step": 93170 }, { "epoch": 6.330683516782171, "grad_norm": 3.8134779930114746, "learning_rate": 2.08936336458758e-05, "loss": 1.8534, "step": 93175 }, { "epoch": 6.331023236852833, "grad_norm": 3.4702794551849365, "learning_rate": 2.088938714499253e-05, "loss": 2.0024, "step": 93180 }, { "epoch": 6.3313629569234955, "grad_norm": 3.4246673583984375, "learning_rate": 2.0885140644109254e-05, "loss": 1.8877, "step": 93185 }, { "epoch": 6.331702676994157, "grad_norm": 3.473628282546997, "learning_rate": 2.0880894143225982e-05, "loss": 2.1262, "step": 93190 }, { "epoch": 6.332042397064819, "grad_norm": 3.461947441101074, "learning_rate": 2.087664764234271e-05, "loss": 2.3159, "step": 93195 }, { "epoch": 6.332382117135481, "grad_norm": 3.982781171798706, "learning_rate": 2.087240114145944e-05, "loss": 2.235, "step": 93200 }, { "epoch": 6.332721837206142, "grad_norm": 3.2507665157318115, "learning_rate": 2.0868154640576166e-05, "loss": 1.8594, "step": 93205 }, { "epoch": 6.333061557276804, "grad_norm": 3.0614511966705322, "learning_rate": 2.0863908139692894e-05, "loss": 1.9952, "step": 93210 }, { "epoch": 6.333401277347465, "grad_norm": 3.566887140274048, "learning_rate": 2.0859661638809622e-05, "loss": 2.1267, "step": 93215 }, { "epoch": 6.333740997418127, "grad_norm": 3.7675211429595947, "learning_rate": 2.0855415137926347e-05, "loss": 2.0692, "step": 93220 }, { "epoch": 6.334080717488789, "grad_norm": 3.991821527481079, "learning_rate": 2.085116863704308e-05, "loss": 1.9619, "step": 93225 }, { "epoch": 6.334420437559451, "grad_norm": 3.2226622104644775, "learning_rate": 2.0846922136159806e-05, "loss": 2.144, "step": 93230 }, { "epoch": 6.334760157630113, "grad_norm": 3.792201519012451, "learning_rate": 2.084267563527653e-05, "loss": 2.3581, "step": 93235 }, { "epoch": 6.335099877700775, "grad_norm": 4.123705863952637, "learning_rate": 2.0838429134393262e-05, "loss": 2.0686, "step": 93240 }, { "epoch": 6.335439597771436, "grad_norm": 3.263678789138794, "learning_rate": 2.0834182633509987e-05, "loss": 1.9965, "step": 93245 }, { "epoch": 6.335779317842098, "grad_norm": 2.8407394886016846, "learning_rate": 2.082993613262672e-05, "loss": 1.886, "step": 93250 }, { "epoch": 6.33611903791276, "grad_norm": 3.913829803466797, "learning_rate": 2.0825689631743443e-05, "loss": 1.9792, "step": 93255 }, { "epoch": 6.336458757983421, "grad_norm": 2.888962745666504, "learning_rate": 2.082144313086017e-05, "loss": 2.0792, "step": 93260 }, { "epoch": 6.336798478054083, "grad_norm": 3.6487159729003906, "learning_rate": 2.0817196629976902e-05, "loss": 2.0988, "step": 93265 }, { "epoch": 6.337138198124745, "grad_norm": 3.7779862880706787, "learning_rate": 2.0812950129093627e-05, "loss": 2.102, "step": 93270 }, { "epoch": 6.337477918195407, "grad_norm": 3.846431255340576, "learning_rate": 2.0808703628210355e-05, "loss": 2.368, "step": 93275 }, { "epoch": 6.337817638266069, "grad_norm": 4.1869893074035645, "learning_rate": 2.0804457127327083e-05, "loss": 2.0157, "step": 93280 }, { "epoch": 6.338157358336731, "grad_norm": 4.103275775909424, "learning_rate": 2.080021062644381e-05, "loss": 1.8643, "step": 93285 }, { "epoch": 6.338497078407392, "grad_norm": 4.011962890625, "learning_rate": 2.079596412556054e-05, "loss": 2.1878, "step": 93290 }, { "epoch": 6.338836798478054, "grad_norm": 3.6190409660339355, "learning_rate": 2.0791717624677267e-05, "loss": 2.06, "step": 93295 }, { "epoch": 6.339176518548716, "grad_norm": 3.302889585494995, "learning_rate": 2.0787471123793995e-05, "loss": 1.9352, "step": 93300 }, { "epoch": 6.339516238619377, "grad_norm": 4.83574914932251, "learning_rate": 2.078322462291072e-05, "loss": 2.267, "step": 93305 }, { "epoch": 6.339855958690039, "grad_norm": 3.677250385284424, "learning_rate": 2.077897812202745e-05, "loss": 2.1546, "step": 93310 }, { "epoch": 6.3401956787607014, "grad_norm": 3.64359974861145, "learning_rate": 2.077473162114418e-05, "loss": 1.8866, "step": 93315 }, { "epoch": 6.340535398831363, "grad_norm": 3.7144060134887695, "learning_rate": 2.0770485120260904e-05, "loss": 2.2043, "step": 93320 }, { "epoch": 6.340875118902025, "grad_norm": 3.4618537425994873, "learning_rate": 2.0766238619377635e-05, "loss": 2.0225, "step": 93325 }, { "epoch": 6.341214838972687, "grad_norm": 4.19887638092041, "learning_rate": 2.076199211849436e-05, "loss": 2.0299, "step": 93330 }, { "epoch": 6.341554559043348, "grad_norm": 2.80073881149292, "learning_rate": 2.075774561761109e-05, "loss": 2.0872, "step": 93335 }, { "epoch": 6.34189427911401, "grad_norm": 4.27890682220459, "learning_rate": 2.075349911672782e-05, "loss": 2.0484, "step": 93340 }, { "epoch": 6.342233999184672, "grad_norm": 2.9377169609069824, "learning_rate": 2.0749252615844544e-05, "loss": 2.1692, "step": 93345 }, { "epoch": 6.342573719255333, "grad_norm": 3.24111270904541, "learning_rate": 2.0745006114961275e-05, "loss": 2.0276, "step": 93350 }, { "epoch": 6.342913439325995, "grad_norm": 3.486130475997925, "learning_rate": 2.0740759614078e-05, "loss": 2.1132, "step": 93355 }, { "epoch": 6.3432531593966575, "grad_norm": 3.1524064540863037, "learning_rate": 2.0736513113194728e-05, "loss": 2.065, "step": 93360 }, { "epoch": 6.343592879467319, "grad_norm": 3.763448715209961, "learning_rate": 2.0732266612311456e-05, "loss": 1.9315, "step": 93365 }, { "epoch": 6.343932599537981, "grad_norm": 4.087023735046387, "learning_rate": 2.0728020111428184e-05, "loss": 2.2247, "step": 93370 }, { "epoch": 6.344272319608643, "grad_norm": 3.8800089359283447, "learning_rate": 2.0723773610544912e-05, "loss": 2.1576, "step": 93375 }, { "epoch": 6.344612039679304, "grad_norm": 3.706606388092041, "learning_rate": 2.071952710966164e-05, "loss": 2.1712, "step": 93380 }, { "epoch": 6.344951759749966, "grad_norm": 4.812321662902832, "learning_rate": 2.0715280608778368e-05, "loss": 2.1018, "step": 93385 }, { "epoch": 6.345291479820628, "grad_norm": 3.773970365524292, "learning_rate": 2.0711034107895096e-05, "loss": 2.0405, "step": 93390 }, { "epoch": 6.345631199891289, "grad_norm": 2.8901214599609375, "learning_rate": 2.0706787607011824e-05, "loss": 2.1936, "step": 93395 }, { "epoch": 6.345970919961951, "grad_norm": 3.779726028442383, "learning_rate": 2.0702541106128552e-05, "loss": 2.0088, "step": 93400 }, { "epoch": 6.3463106400326135, "grad_norm": 4.017142295837402, "learning_rate": 2.0698294605245277e-05, "loss": 1.925, "step": 93405 }, { "epoch": 6.346650360103275, "grad_norm": 4.066262245178223, "learning_rate": 2.0694048104362008e-05, "loss": 2.1319, "step": 93410 }, { "epoch": 6.346990080173937, "grad_norm": 3.2636935710906982, "learning_rate": 2.0689801603478733e-05, "loss": 1.9093, "step": 93415 }, { "epoch": 6.347329800244599, "grad_norm": 4.326339244842529, "learning_rate": 2.0685555102595464e-05, "loss": 2.2321, "step": 93420 }, { "epoch": 6.34766952031526, "grad_norm": 3.5743846893310547, "learning_rate": 2.0681308601712192e-05, "loss": 2.2412, "step": 93425 }, { "epoch": 6.348009240385922, "grad_norm": 3.9724762439727783, "learning_rate": 2.0677062100828917e-05, "loss": 2.101, "step": 93430 }, { "epoch": 6.348348960456584, "grad_norm": 3.5363173484802246, "learning_rate": 2.0672815599945648e-05, "loss": 1.9967, "step": 93435 }, { "epoch": 6.348688680527245, "grad_norm": 3.4511799812316895, "learning_rate": 2.0668569099062373e-05, "loss": 2.1103, "step": 93440 }, { "epoch": 6.349028400597907, "grad_norm": 4.185135841369629, "learning_rate": 2.06643225981791e-05, "loss": 1.9468, "step": 93445 }, { "epoch": 6.3493681206685695, "grad_norm": 3.323805809020996, "learning_rate": 2.066007609729583e-05, "loss": 1.9574, "step": 93450 }, { "epoch": 6.349707840739231, "grad_norm": 3.164402961730957, "learning_rate": 2.0655829596412557e-05, "loss": 2.1764, "step": 93455 }, { "epoch": 6.350047560809893, "grad_norm": 3.4738903045654297, "learning_rate": 2.0651583095529285e-05, "loss": 1.9824, "step": 93460 }, { "epoch": 6.350387280880555, "grad_norm": 3.8304669857025146, "learning_rate": 2.0647336594646013e-05, "loss": 1.8491, "step": 93465 }, { "epoch": 6.350727000951216, "grad_norm": 3.5712974071502686, "learning_rate": 2.064309009376274e-05, "loss": 2.0891, "step": 93470 }, { "epoch": 6.351066721021878, "grad_norm": 3.3910040855407715, "learning_rate": 2.063884359287947e-05, "loss": 2.2261, "step": 93475 }, { "epoch": 6.35140644109254, "grad_norm": 4.255455017089844, "learning_rate": 2.0634597091996197e-05, "loss": 2.1166, "step": 93480 }, { "epoch": 6.351746161163201, "grad_norm": 3.719532012939453, "learning_rate": 2.0630350591112925e-05, "loss": 1.9596, "step": 93485 }, { "epoch": 6.352085881233863, "grad_norm": 3.5118610858917236, "learning_rate": 2.062610409022965e-05, "loss": 1.9383, "step": 93490 }, { "epoch": 6.352425601304525, "grad_norm": 3.5815460681915283, "learning_rate": 2.062185758934638e-05, "loss": 1.9277, "step": 93495 }, { "epoch": 6.352765321375187, "grad_norm": 3.5934131145477295, "learning_rate": 2.0617611088463105e-05, "loss": 2.0697, "step": 93500 }, { "epoch": 6.353105041445849, "grad_norm": 3.120802640914917, "learning_rate": 2.0613364587579837e-05, "loss": 2.0207, "step": 93505 }, { "epoch": 6.35344476151651, "grad_norm": 4.02042818069458, "learning_rate": 2.0609118086696565e-05, "loss": 2.014, "step": 93510 }, { "epoch": 6.353784481587172, "grad_norm": 3.3239355087280273, "learning_rate": 2.060487158581329e-05, "loss": 1.9169, "step": 93515 }, { "epoch": 6.354124201657834, "grad_norm": 3.7391598224639893, "learning_rate": 2.060062508493002e-05, "loss": 1.8631, "step": 93520 }, { "epoch": 6.354463921728495, "grad_norm": 3.073276996612549, "learning_rate": 2.0596378584046745e-05, "loss": 2.2196, "step": 93525 }, { "epoch": 6.354803641799157, "grad_norm": 3.2348763942718506, "learning_rate": 2.0592132083163473e-05, "loss": 2.0403, "step": 93530 }, { "epoch": 6.355143361869819, "grad_norm": 3.5627851486206055, "learning_rate": 2.0587885582280205e-05, "loss": 2.048, "step": 93535 }, { "epoch": 6.355483081940481, "grad_norm": 3.955772638320923, "learning_rate": 2.058363908139693e-05, "loss": 2.0975, "step": 93540 }, { "epoch": 6.355822802011143, "grad_norm": 5.8328728675842285, "learning_rate": 2.0579392580513657e-05, "loss": 2.3431, "step": 93545 }, { "epoch": 6.356162522081805, "grad_norm": 4.147532939910889, "learning_rate": 2.0575146079630385e-05, "loss": 2.1732, "step": 93550 }, { "epoch": 6.356502242152466, "grad_norm": 3.8646464347839355, "learning_rate": 2.0570899578747113e-05, "loss": 2.1253, "step": 93555 }, { "epoch": 6.356841962223128, "grad_norm": 4.341250419616699, "learning_rate": 2.056665307786384e-05, "loss": 1.798, "step": 93560 }, { "epoch": 6.35718168229379, "grad_norm": 3.630615711212158, "learning_rate": 2.056240657698057e-05, "loss": 2.0764, "step": 93565 }, { "epoch": 6.357521402364451, "grad_norm": 3.118049144744873, "learning_rate": 2.0558160076097297e-05, "loss": 2.1986, "step": 93570 }, { "epoch": 6.357861122435113, "grad_norm": 4.35603666305542, "learning_rate": 2.0553913575214022e-05, "loss": 2.185, "step": 93575 }, { "epoch": 6.358200842505775, "grad_norm": 3.9192047119140625, "learning_rate": 2.0549667074330753e-05, "loss": 2.0461, "step": 93580 }, { "epoch": 6.358540562576437, "grad_norm": 3.478395462036133, "learning_rate": 2.054542057344748e-05, "loss": 2.0171, "step": 93585 }, { "epoch": 6.358880282647099, "grad_norm": 4.199978351593018, "learning_rate": 2.054117407256421e-05, "loss": 1.9926, "step": 93590 }, { "epoch": 6.359220002717761, "grad_norm": 3.4293346405029297, "learning_rate": 2.0536927571680937e-05, "loss": 2.1757, "step": 93595 }, { "epoch": 6.359559722788422, "grad_norm": 3.156108856201172, "learning_rate": 2.0532681070797662e-05, "loss": 2.3216, "step": 93600 }, { "epoch": 6.359899442859084, "grad_norm": 3.819805860519409, "learning_rate": 2.0528434569914393e-05, "loss": 1.841, "step": 93605 }, { "epoch": 6.360239162929746, "grad_norm": 2.991356134414673, "learning_rate": 2.0524188069031118e-05, "loss": 2.0515, "step": 93610 }, { "epoch": 6.360578883000407, "grad_norm": 3.1886422634124756, "learning_rate": 2.0519941568147846e-05, "loss": 2.1178, "step": 93615 }, { "epoch": 6.360918603071069, "grad_norm": 3.4239041805267334, "learning_rate": 2.0515695067264577e-05, "loss": 2.1131, "step": 93620 }, { "epoch": 6.3612583231417315, "grad_norm": 3.346846103668213, "learning_rate": 2.0511448566381302e-05, "loss": 2.1026, "step": 93625 }, { "epoch": 6.361598043212393, "grad_norm": 3.019728422164917, "learning_rate": 2.050720206549803e-05, "loss": 1.7423, "step": 93630 }, { "epoch": 6.361937763283055, "grad_norm": 2.826371669769287, "learning_rate": 2.0502955564614758e-05, "loss": 2.1644, "step": 93635 }, { "epoch": 6.362277483353717, "grad_norm": 3.6367034912109375, "learning_rate": 2.0498709063731486e-05, "loss": 2.0156, "step": 93640 }, { "epoch": 6.362617203424378, "grad_norm": 3.928083896636963, "learning_rate": 2.0494462562848214e-05, "loss": 2.0979, "step": 93645 }, { "epoch": 6.36295692349504, "grad_norm": 3.4104528427124023, "learning_rate": 2.0490216061964942e-05, "loss": 1.9181, "step": 93650 }, { "epoch": 6.363296643565702, "grad_norm": 3.425743579864502, "learning_rate": 2.048596956108167e-05, "loss": 2.155, "step": 93655 }, { "epoch": 6.363636363636363, "grad_norm": 3.4534502029418945, "learning_rate": 2.0481723060198395e-05, "loss": 1.8773, "step": 93660 }, { "epoch": 6.363976083707025, "grad_norm": 3.654411554336548, "learning_rate": 2.0477476559315126e-05, "loss": 1.9489, "step": 93665 }, { "epoch": 6.3643158037776875, "grad_norm": 4.0706353187561035, "learning_rate": 2.0473230058431854e-05, "loss": 2.1953, "step": 93670 }, { "epoch": 6.364655523848349, "grad_norm": 2.6968271732330322, "learning_rate": 2.0468983557548582e-05, "loss": 2.1833, "step": 93675 }, { "epoch": 6.364995243919011, "grad_norm": 4.231122970581055, "learning_rate": 2.046473705666531e-05, "loss": 1.9116, "step": 93680 }, { "epoch": 6.365334963989673, "grad_norm": 4.293236255645752, "learning_rate": 2.0460490555782035e-05, "loss": 2.2295, "step": 93685 }, { "epoch": 6.365674684060334, "grad_norm": 3.674187183380127, "learning_rate": 2.0456244054898766e-05, "loss": 2.442, "step": 93690 }, { "epoch": 6.366014404130996, "grad_norm": 3.450545310974121, "learning_rate": 2.045199755401549e-05, "loss": 2.1508, "step": 93695 }, { "epoch": 6.366354124201658, "grad_norm": 2.783210039138794, "learning_rate": 2.044775105313222e-05, "loss": 2.0277, "step": 93700 }, { "epoch": 6.366693844272319, "grad_norm": 3.0536742210388184, "learning_rate": 2.044350455224895e-05, "loss": 1.8224, "step": 93705 }, { "epoch": 6.367033564342981, "grad_norm": 3.7108476161956787, "learning_rate": 2.0439258051365675e-05, "loss": 2.2443, "step": 93710 }, { "epoch": 6.3673732844136435, "grad_norm": 3.4043943881988525, "learning_rate": 2.0435011550482403e-05, "loss": 2.0076, "step": 93715 }, { "epoch": 6.367713004484305, "grad_norm": 3.492990732192993, "learning_rate": 2.043076504959913e-05, "loss": 1.9388, "step": 93720 }, { "epoch": 6.368052724554967, "grad_norm": 3.6448545455932617, "learning_rate": 2.042651854871586e-05, "loss": 2.0843, "step": 93725 }, { "epoch": 6.368392444625629, "grad_norm": 3.5166256427764893, "learning_rate": 2.0422272047832587e-05, "loss": 2.2567, "step": 93730 }, { "epoch": 6.36873216469629, "grad_norm": 3.4161598682403564, "learning_rate": 2.0418025546949315e-05, "loss": 2.1255, "step": 93735 }, { "epoch": 6.369071884766952, "grad_norm": 2.9267961978912354, "learning_rate": 2.0413779046066043e-05, "loss": 1.9629, "step": 93740 }, { "epoch": 6.369411604837614, "grad_norm": 3.344452142715454, "learning_rate": 2.0409532545182767e-05, "loss": 2.1909, "step": 93745 }, { "epoch": 6.369751324908275, "grad_norm": 4.025195598602295, "learning_rate": 2.04052860442995e-05, "loss": 2.0264, "step": 93750 }, { "epoch": 6.370091044978937, "grad_norm": 3.5900676250457764, "learning_rate": 2.0401039543416227e-05, "loss": 2.0491, "step": 93755 }, { "epoch": 6.3704307650495995, "grad_norm": 3.1530139446258545, "learning_rate": 2.0396793042532955e-05, "loss": 2.1296, "step": 93760 }, { "epoch": 6.370770485120261, "grad_norm": 3.3771839141845703, "learning_rate": 2.0392546541649683e-05, "loss": 2.1845, "step": 93765 }, { "epoch": 6.371110205190923, "grad_norm": 3.103604793548584, "learning_rate": 2.0388300040766408e-05, "loss": 2.1733, "step": 93770 }, { "epoch": 6.371449925261585, "grad_norm": 3.447829008102417, "learning_rate": 2.038405353988314e-05, "loss": 2.1825, "step": 93775 }, { "epoch": 6.371789645332246, "grad_norm": 4.150840759277344, "learning_rate": 2.0379807038999864e-05, "loss": 2.0867, "step": 93780 }, { "epoch": 6.372129365402908, "grad_norm": 3.8063652515411377, "learning_rate": 2.037556053811659e-05, "loss": 1.9473, "step": 93785 }, { "epoch": 6.37246908547357, "grad_norm": 3.0201780796051025, "learning_rate": 2.0371314037233323e-05, "loss": 1.9064, "step": 93790 }, { "epoch": 6.372808805544231, "grad_norm": 3.0266237258911133, "learning_rate": 2.0367067536350048e-05, "loss": 2.0885, "step": 93795 }, { "epoch": 6.373148525614893, "grad_norm": 3.350640296936035, "learning_rate": 2.0362821035466776e-05, "loss": 1.994, "step": 93800 }, { "epoch": 6.3734882456855555, "grad_norm": 3.763373613357544, "learning_rate": 2.0358574534583504e-05, "loss": 2.3065, "step": 93805 }, { "epoch": 6.373827965756217, "grad_norm": 3.7646937370300293, "learning_rate": 2.035432803370023e-05, "loss": 2.0822, "step": 93810 }, { "epoch": 6.374167685826879, "grad_norm": 4.240560054779053, "learning_rate": 2.035008153281696e-05, "loss": 1.9483, "step": 93815 }, { "epoch": 6.374507405897541, "grad_norm": 3.1366517543792725, "learning_rate": 2.0345835031933688e-05, "loss": 2.0896, "step": 93820 }, { "epoch": 6.374847125968202, "grad_norm": 3.8054423332214355, "learning_rate": 2.0341588531050416e-05, "loss": 2.0105, "step": 93825 }, { "epoch": 6.375186846038864, "grad_norm": 4.107384204864502, "learning_rate": 2.033734203016714e-05, "loss": 2.2255, "step": 93830 }, { "epoch": 6.375526566109526, "grad_norm": 4.102771759033203, "learning_rate": 2.033309552928387e-05, "loss": 1.8391, "step": 93835 }, { "epoch": 6.375866286180187, "grad_norm": 3.2755286693573, "learning_rate": 2.03288490284006e-05, "loss": 2.1927, "step": 93840 }, { "epoch": 6.376206006250849, "grad_norm": 3.8072831630706787, "learning_rate": 2.0324602527517328e-05, "loss": 2.2303, "step": 93845 }, { "epoch": 6.3765457263215115, "grad_norm": 3.287055730819702, "learning_rate": 2.0320356026634056e-05, "loss": 2.1974, "step": 93850 }, { "epoch": 6.376885446392173, "grad_norm": 3.540215492248535, "learning_rate": 2.031610952575078e-05, "loss": 2.2695, "step": 93855 }, { "epoch": 6.377225166462835, "grad_norm": 2.8347954750061035, "learning_rate": 2.031186302486751e-05, "loss": 2.2234, "step": 93860 }, { "epoch": 6.377564886533497, "grad_norm": 3.194345235824585, "learning_rate": 2.030761652398424e-05, "loss": 1.9047, "step": 93865 }, { "epoch": 6.377904606604158, "grad_norm": 3.349607467651367, "learning_rate": 2.0303370023100964e-05, "loss": 1.9758, "step": 93870 }, { "epoch": 6.37824432667482, "grad_norm": 3.445441961288452, "learning_rate": 2.0299123522217696e-05, "loss": 1.9429, "step": 93875 }, { "epoch": 6.378584046745482, "grad_norm": 4.205719947814941, "learning_rate": 2.029487702133442e-05, "loss": 1.9784, "step": 93880 }, { "epoch": 6.378923766816143, "grad_norm": 3.909963607788086, "learning_rate": 2.029063052045115e-05, "loss": 2.0372, "step": 93885 }, { "epoch": 6.3792634868868054, "grad_norm": 3.4501187801361084, "learning_rate": 2.0286384019567876e-05, "loss": 1.8764, "step": 93890 }, { "epoch": 6.379603206957467, "grad_norm": 2.9248480796813965, "learning_rate": 2.0282137518684604e-05, "loss": 1.8164, "step": 93895 }, { "epoch": 6.379942927028129, "grad_norm": 4.1432037353515625, "learning_rate": 2.0277891017801332e-05, "loss": 1.9617, "step": 93900 }, { "epoch": 6.380282647098791, "grad_norm": 3.62271785736084, "learning_rate": 2.027364451691806e-05, "loss": 1.9446, "step": 93905 }, { "epoch": 6.380622367169452, "grad_norm": 3.814758777618408, "learning_rate": 2.026939801603479e-05, "loss": 1.987, "step": 93910 }, { "epoch": 6.380962087240114, "grad_norm": 3.6862454414367676, "learning_rate": 2.0265151515151516e-05, "loss": 2.1313, "step": 93915 }, { "epoch": 6.381301807310776, "grad_norm": 2.9990861415863037, "learning_rate": 2.0260905014268244e-05, "loss": 1.6307, "step": 93920 }, { "epoch": 6.381641527381437, "grad_norm": 3.0804567337036133, "learning_rate": 2.0256658513384972e-05, "loss": 2.1229, "step": 93925 }, { "epoch": 6.381981247452099, "grad_norm": 3.112260341644287, "learning_rate": 2.02524120125017e-05, "loss": 2.2623, "step": 93930 }, { "epoch": 6.3823209675227615, "grad_norm": 3.248178482055664, "learning_rate": 2.024816551161843e-05, "loss": 1.9717, "step": 93935 }, { "epoch": 6.382660687593423, "grad_norm": 3.3545382022857666, "learning_rate": 2.0243919010735153e-05, "loss": 2.2509, "step": 93940 }, { "epoch": 6.383000407664085, "grad_norm": 3.5593008995056152, "learning_rate": 2.0239672509851884e-05, "loss": 2.1297, "step": 93945 }, { "epoch": 6.383340127734747, "grad_norm": 3.666938066482544, "learning_rate": 2.0235426008968612e-05, "loss": 1.9486, "step": 93950 }, { "epoch": 6.383679847805408, "grad_norm": 3.30530047416687, "learning_rate": 2.0231179508085337e-05, "loss": 2.0695, "step": 93955 }, { "epoch": 6.38401956787607, "grad_norm": 3.945099115371704, "learning_rate": 2.022693300720207e-05, "loss": 1.9968, "step": 93960 }, { "epoch": 6.384359287946732, "grad_norm": 3.745579481124878, "learning_rate": 2.0222686506318793e-05, "loss": 2.1788, "step": 93965 }, { "epoch": 6.384699008017393, "grad_norm": 4.081948757171631, "learning_rate": 2.021844000543552e-05, "loss": 2.1535, "step": 93970 }, { "epoch": 6.385038728088055, "grad_norm": 3.242248296737671, "learning_rate": 2.021419350455225e-05, "loss": 2.038, "step": 93975 }, { "epoch": 6.3853784481587175, "grad_norm": 3.2232143878936768, "learning_rate": 2.0209947003668977e-05, "loss": 2.2355, "step": 93980 }, { "epoch": 6.385718168229379, "grad_norm": 4.093786716461182, "learning_rate": 2.0205700502785705e-05, "loss": 2.1768, "step": 93985 }, { "epoch": 6.386057888300041, "grad_norm": 2.6905202865600586, "learning_rate": 2.0201454001902433e-05, "loss": 2.0088, "step": 93990 }, { "epoch": 6.386397608370703, "grad_norm": 3.9006190299987793, "learning_rate": 2.019720750101916e-05, "loss": 1.8093, "step": 93995 }, { "epoch": 6.386737328441364, "grad_norm": 3.368277072906494, "learning_rate": 2.019296100013589e-05, "loss": 1.952, "step": 94000 }, { "epoch": 6.387077048512026, "grad_norm": 3.786836862564087, "learning_rate": 2.0188714499252617e-05, "loss": 2.0503, "step": 94005 }, { "epoch": 6.387416768582688, "grad_norm": 4.0883259773254395, "learning_rate": 2.0184467998369345e-05, "loss": 2.2263, "step": 94010 }, { "epoch": 6.387756488653349, "grad_norm": 3.393383741378784, "learning_rate": 2.0180221497486073e-05, "loss": 2.0584, "step": 94015 }, { "epoch": 6.388096208724011, "grad_norm": 2.9914276599884033, "learning_rate": 2.01759749966028e-05, "loss": 2.1471, "step": 94020 }, { "epoch": 6.3884359287946735, "grad_norm": 3.4806673526763916, "learning_rate": 2.0171728495719526e-05, "loss": 2.2873, "step": 94025 }, { "epoch": 6.388775648865335, "grad_norm": 3.9862875938415527, "learning_rate": 2.0167481994836257e-05, "loss": 2.1283, "step": 94030 }, { "epoch": 6.389115368935997, "grad_norm": 3.5055606365203857, "learning_rate": 2.0163235493952985e-05, "loss": 2.1174, "step": 94035 }, { "epoch": 6.389455089006659, "grad_norm": 3.9174301624298096, "learning_rate": 2.015898899306971e-05, "loss": 2.1978, "step": 94040 }, { "epoch": 6.38979480907732, "grad_norm": 2.980268716812134, "learning_rate": 2.015474249218644e-05, "loss": 2.1776, "step": 94045 }, { "epoch": 6.390134529147982, "grad_norm": 3.395859956741333, "learning_rate": 2.0150495991303166e-05, "loss": 2.2339, "step": 94050 }, { "epoch": 6.390474249218644, "grad_norm": 3.4443233013153076, "learning_rate": 2.0146249490419894e-05, "loss": 1.8427, "step": 94055 }, { "epoch": 6.390813969289305, "grad_norm": 4.012851715087891, "learning_rate": 2.0142002989536625e-05, "loss": 1.9314, "step": 94060 }, { "epoch": 6.391153689359967, "grad_norm": 3.133363962173462, "learning_rate": 2.013775648865335e-05, "loss": 1.8913, "step": 94065 }, { "epoch": 6.3914934094306295, "grad_norm": 3.7842323780059814, "learning_rate": 2.0133509987770078e-05, "loss": 2.2943, "step": 94070 }, { "epoch": 6.391833129501291, "grad_norm": 3.7521238327026367, "learning_rate": 2.0129263486886806e-05, "loss": 2.1159, "step": 94075 }, { "epoch": 6.392172849571953, "grad_norm": 3.6322290897369385, "learning_rate": 2.0125016986003534e-05, "loss": 2.082, "step": 94080 }, { "epoch": 6.392512569642615, "grad_norm": 3.6011970043182373, "learning_rate": 2.0120770485120262e-05, "loss": 2.0393, "step": 94085 }, { "epoch": 6.392852289713276, "grad_norm": 3.5002496242523193, "learning_rate": 2.011652398423699e-05, "loss": 2.2076, "step": 94090 }, { "epoch": 6.393192009783938, "grad_norm": 3.133942127227783, "learning_rate": 2.0112277483353718e-05, "loss": 2.0454, "step": 94095 }, { "epoch": 6.3935317298546, "grad_norm": 2.8030037879943848, "learning_rate": 2.0108030982470446e-05, "loss": 2.2097, "step": 94100 }, { "epoch": 6.393871449925261, "grad_norm": 3.5452041625976562, "learning_rate": 2.0103784481587174e-05, "loss": 1.9097, "step": 94105 }, { "epoch": 6.394211169995923, "grad_norm": 3.090395927429199, "learning_rate": 2.0099537980703902e-05, "loss": 1.9372, "step": 94110 }, { "epoch": 6.3945508900665855, "grad_norm": 4.387669086456299, "learning_rate": 2.009529147982063e-05, "loss": 1.9041, "step": 94115 }, { "epoch": 6.394890610137247, "grad_norm": 3.6789145469665527, "learning_rate": 2.0091044978937358e-05, "loss": 1.8864, "step": 94120 }, { "epoch": 6.395230330207909, "grad_norm": 3.3093512058258057, "learning_rate": 2.0086798478054083e-05, "loss": 2.0335, "step": 94125 }, { "epoch": 6.395570050278571, "grad_norm": 4.002992153167725, "learning_rate": 2.0082551977170814e-05, "loss": 2.3364, "step": 94130 }, { "epoch": 6.395909770349232, "grad_norm": 3.711228370666504, "learning_rate": 2.007830547628754e-05, "loss": 1.924, "step": 94135 }, { "epoch": 6.396249490419894, "grad_norm": 3.22530198097229, "learning_rate": 2.0074058975404267e-05, "loss": 2.0491, "step": 94140 }, { "epoch": 6.396589210490556, "grad_norm": 4.230567932128906, "learning_rate": 2.0069812474520998e-05, "loss": 1.717, "step": 94145 }, { "epoch": 6.396928930561217, "grad_norm": 3.7228622436523438, "learning_rate": 2.0065565973637723e-05, "loss": 2.1729, "step": 94150 }, { "epoch": 6.397268650631879, "grad_norm": 3.602503776550293, "learning_rate": 2.006131947275445e-05, "loss": 2.077, "step": 94155 }, { "epoch": 6.3976083707025415, "grad_norm": 3.8630313873291016, "learning_rate": 2.005707297187118e-05, "loss": 2.2156, "step": 94160 }, { "epoch": 6.397948090773203, "grad_norm": 3.2572271823883057, "learning_rate": 2.0052826470987907e-05, "loss": 2.0338, "step": 94165 }, { "epoch": 6.398287810843865, "grad_norm": 3.7909984588623047, "learning_rate": 2.0048579970104635e-05, "loss": 2.0859, "step": 94170 }, { "epoch": 6.398627530914526, "grad_norm": 3.4484615325927734, "learning_rate": 2.0044333469221363e-05, "loss": 1.9561, "step": 94175 }, { "epoch": 6.398967250985188, "grad_norm": 3.852813959121704, "learning_rate": 2.004008696833809e-05, "loss": 1.9334, "step": 94180 }, { "epoch": 6.39930697105585, "grad_norm": 3.022061824798584, "learning_rate": 2.003584046745482e-05, "loss": 2.1135, "step": 94185 }, { "epoch": 6.399646691126511, "grad_norm": 3.193225145339966, "learning_rate": 2.0031593966571547e-05, "loss": 2.1935, "step": 94190 }, { "epoch": 6.399986411197173, "grad_norm": 3.7652266025543213, "learning_rate": 2.0027347465688275e-05, "loss": 2.2829, "step": 94195 }, { "epoch": 6.4003261312678354, "grad_norm": 3.435689926147461, "learning_rate": 2.0023100964805003e-05, "loss": 1.9583, "step": 94200 }, { "epoch": 6.400665851338497, "grad_norm": 4.429895877838135, "learning_rate": 2.001885446392173e-05, "loss": 1.978, "step": 94205 }, { "epoch": 6.401005571409159, "grad_norm": 3.544074535369873, "learning_rate": 2.0014607963038455e-05, "loss": 2.0931, "step": 94210 }, { "epoch": 6.401345291479821, "grad_norm": 3.9732563495635986, "learning_rate": 2.0010361462155187e-05, "loss": 2.1917, "step": 94215 }, { "epoch": 6.401685011550482, "grad_norm": 2.7065813541412354, "learning_rate": 2.000611496127191e-05, "loss": 2.2893, "step": 94220 }, { "epoch": 6.402024731621144, "grad_norm": 3.0197877883911133, "learning_rate": 2.000186846038864e-05, "loss": 2.1111, "step": 94225 }, { "epoch": 6.402364451691806, "grad_norm": 3.0091772079467773, "learning_rate": 1.999762195950537e-05, "loss": 1.9394, "step": 94230 }, { "epoch": 6.402704171762467, "grad_norm": 4.031810283660889, "learning_rate": 1.9993375458622095e-05, "loss": 2.1666, "step": 94235 }, { "epoch": 6.403043891833129, "grad_norm": 3.356156826019287, "learning_rate": 1.9989128957738823e-05, "loss": 1.9648, "step": 94240 }, { "epoch": 6.4033836119037915, "grad_norm": 3.5336267948150635, "learning_rate": 1.998488245685555e-05, "loss": 1.9383, "step": 94245 }, { "epoch": 6.403723331974453, "grad_norm": 4.445061683654785, "learning_rate": 1.998063595597228e-05, "loss": 2.2249, "step": 94250 }, { "epoch": 6.404063052045115, "grad_norm": 3.778986692428589, "learning_rate": 1.9976389455089007e-05, "loss": 2.291, "step": 94255 }, { "epoch": 6.404402772115777, "grad_norm": 3.5954971313476562, "learning_rate": 1.9972142954205735e-05, "loss": 2.1352, "step": 94260 }, { "epoch": 6.404742492186438, "grad_norm": 4.324884414672852, "learning_rate": 1.9967896453322463e-05, "loss": 2.3006, "step": 94265 }, { "epoch": 6.4050822122571, "grad_norm": 3.8357298374176025, "learning_rate": 1.996364995243919e-05, "loss": 2.0812, "step": 94270 }, { "epoch": 6.405421932327762, "grad_norm": 4.122032165527344, "learning_rate": 1.995940345155592e-05, "loss": 2.0173, "step": 94275 }, { "epoch": 6.405761652398423, "grad_norm": 3.363652229309082, "learning_rate": 1.9955156950672647e-05, "loss": 1.9951, "step": 94280 }, { "epoch": 6.406101372469085, "grad_norm": 3.3957295417785645, "learning_rate": 1.9950910449789375e-05, "loss": 2.3188, "step": 94285 }, { "epoch": 6.4064410925397475, "grad_norm": 3.1190688610076904, "learning_rate": 1.9946663948906103e-05, "loss": 2.1564, "step": 94290 }, { "epoch": 6.406780812610409, "grad_norm": 3.672861099243164, "learning_rate": 1.9942417448022828e-05, "loss": 1.8762, "step": 94295 }, { "epoch": 6.407120532681071, "grad_norm": 4.299715518951416, "learning_rate": 1.993817094713956e-05, "loss": 2.2376, "step": 94300 }, { "epoch": 6.407460252751733, "grad_norm": 3.3183507919311523, "learning_rate": 1.9933924446256287e-05, "loss": 2.1682, "step": 94305 }, { "epoch": 6.407799972822394, "grad_norm": 3.384459972381592, "learning_rate": 1.9929677945373012e-05, "loss": 1.8738, "step": 94310 }, { "epoch": 6.408139692893056, "grad_norm": 3.407001256942749, "learning_rate": 1.9925431444489743e-05, "loss": 1.9618, "step": 94315 }, { "epoch": 6.408479412963718, "grad_norm": 3.905076026916504, "learning_rate": 1.9921184943606468e-05, "loss": 2.1651, "step": 94320 }, { "epoch": 6.408819133034379, "grad_norm": 3.680854558944702, "learning_rate": 1.9916938442723196e-05, "loss": 1.8866, "step": 94325 }, { "epoch": 6.409158853105041, "grad_norm": 4.107000350952148, "learning_rate": 1.9912691941839924e-05, "loss": 2.3433, "step": 94330 }, { "epoch": 6.4094985731757035, "grad_norm": 3.3786065578460693, "learning_rate": 1.9908445440956652e-05, "loss": 2.0146, "step": 94335 }, { "epoch": 6.409838293246365, "grad_norm": 4.095555782318115, "learning_rate": 1.990419894007338e-05, "loss": 1.9334, "step": 94340 }, { "epoch": 6.410178013317027, "grad_norm": 3.235898017883301, "learning_rate": 1.9899952439190108e-05, "loss": 2.326, "step": 94345 }, { "epoch": 6.410517733387689, "grad_norm": 3.6410512924194336, "learning_rate": 1.9895705938306836e-05, "loss": 2.2071, "step": 94350 }, { "epoch": 6.41085745345835, "grad_norm": 3.3207361698150635, "learning_rate": 1.9891459437423564e-05, "loss": 2.1123, "step": 94355 }, { "epoch": 6.411197173529012, "grad_norm": 3.4663543701171875, "learning_rate": 1.9887212936540292e-05, "loss": 2.304, "step": 94360 }, { "epoch": 6.411536893599674, "grad_norm": 4.536574840545654, "learning_rate": 1.988296643565702e-05, "loss": 2.1407, "step": 94365 }, { "epoch": 6.411876613670335, "grad_norm": 3.3645784854888916, "learning_rate": 1.9878719934773748e-05, "loss": 2.0365, "step": 94370 }, { "epoch": 6.412216333740997, "grad_norm": 4.32825231552124, "learning_rate": 1.9874473433890476e-05, "loss": 2.0926, "step": 94375 }, { "epoch": 6.4125560538116595, "grad_norm": 3.0008766651153564, "learning_rate": 1.98702269330072e-05, "loss": 2.2942, "step": 94380 }, { "epoch": 6.412895773882321, "grad_norm": 2.7476136684417725, "learning_rate": 1.9865980432123932e-05, "loss": 2.047, "step": 94385 }, { "epoch": 6.413235493952983, "grad_norm": 3.1213409900665283, "learning_rate": 1.986173393124066e-05, "loss": 1.8056, "step": 94390 }, { "epoch": 6.413575214023645, "grad_norm": 3.599320411682129, "learning_rate": 1.9857487430357385e-05, "loss": 2.0267, "step": 94395 }, { "epoch": 6.413914934094306, "grad_norm": 2.804090976715088, "learning_rate": 1.9853240929474116e-05, "loss": 2.1063, "step": 94400 }, { "epoch": 6.414254654164968, "grad_norm": 3.680452823638916, "learning_rate": 1.984899442859084e-05, "loss": 2.0323, "step": 94405 }, { "epoch": 6.41459437423563, "grad_norm": 2.8642778396606445, "learning_rate": 1.984474792770757e-05, "loss": 2.332, "step": 94410 }, { "epoch": 6.414934094306291, "grad_norm": 4.1618170738220215, "learning_rate": 1.9840501426824297e-05, "loss": 2.2768, "step": 94415 }, { "epoch": 6.415273814376953, "grad_norm": 4.977574825286865, "learning_rate": 1.9836254925941025e-05, "loss": 2.11, "step": 94420 }, { "epoch": 6.4156135344476155, "grad_norm": 3.166724681854248, "learning_rate": 1.9832008425057753e-05, "loss": 2.0187, "step": 94425 }, { "epoch": 6.415953254518277, "grad_norm": 3.57439923286438, "learning_rate": 1.982776192417448e-05, "loss": 2.2376, "step": 94430 }, { "epoch": 6.416292974588939, "grad_norm": 3.6023449897766113, "learning_rate": 1.982351542329121e-05, "loss": 2.0079, "step": 94435 }, { "epoch": 6.416632694659601, "grad_norm": 3.6294472217559814, "learning_rate": 1.9819268922407937e-05, "loss": 2.0242, "step": 94440 }, { "epoch": 6.416972414730262, "grad_norm": 3.7102513313293457, "learning_rate": 1.9815022421524665e-05, "loss": 2.3291, "step": 94445 }, { "epoch": 6.417312134800924, "grad_norm": 3.2558627128601074, "learning_rate": 1.9810775920641393e-05, "loss": 1.8353, "step": 94450 }, { "epoch": 6.417651854871586, "grad_norm": 3.9575769901275635, "learning_rate": 1.980652941975812e-05, "loss": 2.2733, "step": 94455 }, { "epoch": 6.417991574942247, "grad_norm": 3.8488192558288574, "learning_rate": 1.980228291887485e-05, "loss": 2.1467, "step": 94460 }, { "epoch": 6.418331295012909, "grad_norm": 4.691421031951904, "learning_rate": 1.9798036417991573e-05, "loss": 2.0756, "step": 94465 }, { "epoch": 6.4186710150835715, "grad_norm": 3.321337938308716, "learning_rate": 1.9793789917108305e-05, "loss": 2.11, "step": 94470 }, { "epoch": 6.419010735154233, "grad_norm": 3.076690435409546, "learning_rate": 1.9789543416225033e-05, "loss": 2.2089, "step": 94475 }, { "epoch": 6.419350455224895, "grad_norm": 3.406730890274048, "learning_rate": 1.9785296915341758e-05, "loss": 2.2831, "step": 94480 }, { "epoch": 6.419690175295557, "grad_norm": 3.5398685932159424, "learning_rate": 1.978105041445849e-05, "loss": 2.0916, "step": 94485 }, { "epoch": 6.420029895366218, "grad_norm": 3.240659236907959, "learning_rate": 1.9776803913575214e-05, "loss": 2.0064, "step": 94490 }, { "epoch": 6.42036961543688, "grad_norm": 4.098678112030029, "learning_rate": 1.977255741269194e-05, "loss": 1.8308, "step": 94495 }, { "epoch": 6.420709335507542, "grad_norm": 3.75571346282959, "learning_rate": 1.976831091180867e-05, "loss": 2.2377, "step": 94500 }, { "epoch": 6.421049055578203, "grad_norm": 3.2685580253601074, "learning_rate": 1.9764064410925398e-05, "loss": 1.9791, "step": 94505 }, { "epoch": 6.4213887756488655, "grad_norm": 3.4799370765686035, "learning_rate": 1.9759817910042126e-05, "loss": 1.8275, "step": 94510 }, { "epoch": 6.4217284957195275, "grad_norm": 4.213037490844727, "learning_rate": 1.9755571409158854e-05, "loss": 2.2406, "step": 94515 }, { "epoch": 6.422068215790189, "grad_norm": 2.903276205062866, "learning_rate": 1.975132490827558e-05, "loss": 2.1525, "step": 94520 }, { "epoch": 6.422407935860851, "grad_norm": 3.22391939163208, "learning_rate": 1.974707840739231e-05, "loss": 1.8955, "step": 94525 }, { "epoch": 6.422747655931513, "grad_norm": 3.9638121128082275, "learning_rate": 1.9742831906509038e-05, "loss": 2.1253, "step": 94530 }, { "epoch": 6.423087376002174, "grad_norm": 3.6420252323150635, "learning_rate": 1.9738585405625766e-05, "loss": 2.1822, "step": 94535 }, { "epoch": 6.423427096072836, "grad_norm": 3.8690109252929688, "learning_rate": 1.9734338904742494e-05, "loss": 2.3204, "step": 94540 }, { "epoch": 6.423766816143498, "grad_norm": 3.1633801460266113, "learning_rate": 1.973009240385922e-05, "loss": 2.0053, "step": 94545 }, { "epoch": 6.424106536214159, "grad_norm": 3.6680092811584473, "learning_rate": 1.9725845902975946e-05, "loss": 1.7404, "step": 94550 }, { "epoch": 6.4244462562848215, "grad_norm": 4.481812953948975, "learning_rate": 1.9721599402092678e-05, "loss": 2.0964, "step": 94555 }, { "epoch": 6.4247859763554835, "grad_norm": 3.247098684310913, "learning_rate": 1.9717352901209406e-05, "loss": 2.3723, "step": 94560 }, { "epoch": 6.425125696426145, "grad_norm": 4.418966293334961, "learning_rate": 1.971310640032613e-05, "loss": 1.9831, "step": 94565 }, { "epoch": 6.425465416496807, "grad_norm": 4.154880046844482, "learning_rate": 1.970885989944286e-05, "loss": 1.8427, "step": 94570 }, { "epoch": 6.425805136567468, "grad_norm": 3.2407798767089844, "learning_rate": 1.9704613398559586e-05, "loss": 1.8264, "step": 94575 }, { "epoch": 6.42614485663813, "grad_norm": 3.308098793029785, "learning_rate": 1.9700366897676314e-05, "loss": 2.071, "step": 94580 }, { "epoch": 6.426484576708792, "grad_norm": 3.309638023376465, "learning_rate": 1.9696120396793046e-05, "loss": 2.2912, "step": 94585 }, { "epoch": 6.426824296779453, "grad_norm": 4.066915035247803, "learning_rate": 1.969187389590977e-05, "loss": 2.3109, "step": 94590 }, { "epoch": 6.427164016850115, "grad_norm": 3.654174327850342, "learning_rate": 1.9687627395026498e-05, "loss": 2.2235, "step": 94595 }, { "epoch": 6.4275037369207775, "grad_norm": 3.7749810218811035, "learning_rate": 1.9683380894143226e-05, "loss": 2.3569, "step": 94600 }, { "epoch": 6.427843456991439, "grad_norm": 3.417323589324951, "learning_rate": 1.9679134393259954e-05, "loss": 2.1573, "step": 94605 }, { "epoch": 6.428183177062101, "grad_norm": 2.7761709690093994, "learning_rate": 1.9674887892376682e-05, "loss": 2.1702, "step": 94610 }, { "epoch": 6.428522897132763, "grad_norm": 4.07404088973999, "learning_rate": 1.967064139149341e-05, "loss": 2.1092, "step": 94615 }, { "epoch": 6.428862617203424, "grad_norm": 3.757866144180298, "learning_rate": 1.966639489061014e-05, "loss": 2.1266, "step": 94620 }, { "epoch": 6.429202337274086, "grad_norm": 4.001463413238525, "learning_rate": 1.9662148389726866e-05, "loss": 2.1514, "step": 94625 }, { "epoch": 6.429542057344748, "grad_norm": 3.428245782852173, "learning_rate": 1.9657901888843594e-05, "loss": 2.3742, "step": 94630 }, { "epoch": 6.429881777415409, "grad_norm": 3.8662993907928467, "learning_rate": 1.9653655387960322e-05, "loss": 1.897, "step": 94635 }, { "epoch": 6.430221497486071, "grad_norm": 3.4830803871154785, "learning_rate": 1.964940888707705e-05, "loss": 2.3158, "step": 94640 }, { "epoch": 6.4305612175567335, "grad_norm": 4.719287395477295, "learning_rate": 1.964516238619378e-05, "loss": 2.0411, "step": 94645 }, { "epoch": 6.430900937627395, "grad_norm": 3.589190721511841, "learning_rate": 1.9640915885310503e-05, "loss": 2.3051, "step": 94650 }, { "epoch": 6.431240657698057, "grad_norm": 4.1893696784973145, "learning_rate": 1.9636669384427234e-05, "loss": 2.0988, "step": 94655 }, { "epoch": 6.431580377768719, "grad_norm": 3.4605703353881836, "learning_rate": 1.963242288354396e-05, "loss": 2.0034, "step": 94660 }, { "epoch": 6.43192009783938, "grad_norm": 3.100811004638672, "learning_rate": 1.9628176382660687e-05, "loss": 2.2126, "step": 94665 }, { "epoch": 6.432259817910042, "grad_norm": 3.2632105350494385, "learning_rate": 1.962392988177742e-05, "loss": 2.001, "step": 94670 }, { "epoch": 6.432599537980704, "grad_norm": 3.1930675506591797, "learning_rate": 1.9619683380894143e-05, "loss": 2.3849, "step": 94675 }, { "epoch": 6.432939258051365, "grad_norm": 3.6855177879333496, "learning_rate": 1.961543688001087e-05, "loss": 1.9487, "step": 94680 }, { "epoch": 6.433278978122027, "grad_norm": 4.035020351409912, "learning_rate": 1.96111903791276e-05, "loss": 2.0165, "step": 94685 }, { "epoch": 6.4336186981926895, "grad_norm": 3.4820263385772705, "learning_rate": 1.9606943878244327e-05, "loss": 2.0341, "step": 94690 }, { "epoch": 6.433958418263351, "grad_norm": 3.349034070968628, "learning_rate": 1.9602697377361055e-05, "loss": 2.0508, "step": 94695 }, { "epoch": 6.434298138334013, "grad_norm": 3.500560760498047, "learning_rate": 1.9598450876477783e-05, "loss": 1.9817, "step": 94700 }, { "epoch": 6.434637858404675, "grad_norm": 4.067910671234131, "learning_rate": 1.959420437559451e-05, "loss": 2.2175, "step": 94705 }, { "epoch": 6.434977578475336, "grad_norm": 3.9260005950927734, "learning_rate": 1.958995787471124e-05, "loss": 2.262, "step": 94710 }, { "epoch": 6.435317298545998, "grad_norm": 4.002857685089111, "learning_rate": 1.9585711373827967e-05, "loss": 2.0107, "step": 94715 }, { "epoch": 6.43565701861666, "grad_norm": 3.8334476947784424, "learning_rate": 1.9581464872944695e-05, "loss": 2.0239, "step": 94720 }, { "epoch": 6.435996738687321, "grad_norm": 2.908520221710205, "learning_rate": 1.9577218372061423e-05, "loss": 2.1424, "step": 94725 }, { "epoch": 6.436336458757983, "grad_norm": 4.3906121253967285, "learning_rate": 1.957297187117815e-05, "loss": 2.0867, "step": 94730 }, { "epoch": 6.4366761788286455, "grad_norm": 3.4125466346740723, "learning_rate": 1.9568725370294876e-05, "loss": 2.1507, "step": 94735 }, { "epoch": 6.437015898899307, "grad_norm": 2.9879379272460938, "learning_rate": 1.9564478869411607e-05, "loss": 2.1296, "step": 94740 }, { "epoch": 6.437355618969969, "grad_norm": 3.7916176319122314, "learning_rate": 1.9560232368528332e-05, "loss": 1.9008, "step": 94745 }, { "epoch": 6.437695339040631, "grad_norm": 3.412876844406128, "learning_rate": 1.955598586764506e-05, "loss": 2.2133, "step": 94750 }, { "epoch": 6.438035059111292, "grad_norm": 4.151042938232422, "learning_rate": 1.955173936676179e-05, "loss": 1.9883, "step": 94755 }, { "epoch": 6.438374779181954, "grad_norm": 3.5212035179138184, "learning_rate": 1.9547492865878516e-05, "loss": 2.1386, "step": 94760 }, { "epoch": 6.438714499252616, "grad_norm": 4.3660430908203125, "learning_rate": 1.9543246364995244e-05, "loss": 2.3183, "step": 94765 }, { "epoch": 6.439054219323277, "grad_norm": 4.217069625854492, "learning_rate": 1.9538999864111972e-05, "loss": 2.1258, "step": 94770 }, { "epoch": 6.4393939393939394, "grad_norm": 4.020360946655273, "learning_rate": 1.95347533632287e-05, "loss": 2.2618, "step": 94775 }, { "epoch": 6.4397336594646015, "grad_norm": 3.9913997650146484, "learning_rate": 1.953050686234543e-05, "loss": 2.2402, "step": 94780 }, { "epoch": 6.440073379535263, "grad_norm": 3.691718339920044, "learning_rate": 1.9526260361462156e-05, "loss": 2.1045, "step": 94785 }, { "epoch": 6.440413099605925, "grad_norm": 4.158860206604004, "learning_rate": 1.9522013860578884e-05, "loss": 1.9148, "step": 94790 }, { "epoch": 6.440752819676587, "grad_norm": 5.24971866607666, "learning_rate": 1.9517767359695612e-05, "loss": 1.9736, "step": 94795 }, { "epoch": 6.441092539747248, "grad_norm": 4.015856742858887, "learning_rate": 1.951352085881234e-05, "loss": 2.0903, "step": 94800 }, { "epoch": 6.44143225981791, "grad_norm": 3.775966167449951, "learning_rate": 1.9509274357929068e-05, "loss": 1.9026, "step": 94805 }, { "epoch": 6.441771979888572, "grad_norm": 2.7189035415649414, "learning_rate": 1.9505027857045796e-05, "loss": 2.0058, "step": 94810 }, { "epoch": 6.442111699959233, "grad_norm": 3.7882676124572754, "learning_rate": 1.9500781356162524e-05, "loss": 2.2417, "step": 94815 }, { "epoch": 6.4424514200298955, "grad_norm": 3.7004168033599854, "learning_rate": 1.949653485527925e-05, "loss": 1.8705, "step": 94820 }, { "epoch": 6.4427911401005575, "grad_norm": 3.545112133026123, "learning_rate": 1.949228835439598e-05, "loss": 2.1276, "step": 94825 }, { "epoch": 6.443130860171219, "grad_norm": 3.4558773040771484, "learning_rate": 1.9488041853512708e-05, "loss": 2.226, "step": 94830 }, { "epoch": 6.443470580241881, "grad_norm": 3.7426960468292236, "learning_rate": 1.9483795352629433e-05, "loss": 2.0762, "step": 94835 }, { "epoch": 6.443810300312543, "grad_norm": 4.434757709503174, "learning_rate": 1.9479548851746164e-05, "loss": 2.044, "step": 94840 }, { "epoch": 6.444150020383204, "grad_norm": 4.057559967041016, "learning_rate": 1.947530235086289e-05, "loss": 2.0432, "step": 94845 }, { "epoch": 6.444489740453866, "grad_norm": 3.571753740310669, "learning_rate": 1.9471055849979617e-05, "loss": 2.1338, "step": 94850 }, { "epoch": 6.444829460524527, "grad_norm": 3.761274576187134, "learning_rate": 1.9466809349096345e-05, "loss": 1.9694, "step": 94855 }, { "epoch": 6.445169180595189, "grad_norm": 4.657334327697754, "learning_rate": 1.9462562848213073e-05, "loss": 2.1566, "step": 94860 }, { "epoch": 6.4455089006658515, "grad_norm": 3.6783926486968994, "learning_rate": 1.9458316347329804e-05, "loss": 2.0474, "step": 94865 }, { "epoch": 6.445848620736513, "grad_norm": 2.9579756259918213, "learning_rate": 1.945406984644653e-05, "loss": 2.1188, "step": 94870 }, { "epoch": 6.446188340807175, "grad_norm": 3.4292616844177246, "learning_rate": 1.9449823345563257e-05, "loss": 1.9444, "step": 94875 }, { "epoch": 6.446528060877837, "grad_norm": 2.9568982124328613, "learning_rate": 1.9445576844679985e-05, "loss": 2.1172, "step": 94880 }, { "epoch": 6.446867780948498, "grad_norm": 4.704941272735596, "learning_rate": 1.9441330343796713e-05, "loss": 2.1814, "step": 94885 }, { "epoch": 6.44720750101916, "grad_norm": 4.538564682006836, "learning_rate": 1.943708384291344e-05, "loss": 2.0218, "step": 94890 }, { "epoch": 6.447547221089822, "grad_norm": 3.4214446544647217, "learning_rate": 1.943283734203017e-05, "loss": 2.0773, "step": 94895 }, { "epoch": 6.447886941160483, "grad_norm": 2.907886505126953, "learning_rate": 1.9428590841146897e-05, "loss": 2.0969, "step": 94900 }, { "epoch": 6.448226661231145, "grad_norm": 3.5294384956359863, "learning_rate": 1.942434434026362e-05, "loss": 1.8474, "step": 94905 }, { "epoch": 6.4485663813018075, "grad_norm": 4.110950469970703, "learning_rate": 1.9420097839380353e-05, "loss": 2.1631, "step": 94910 }, { "epoch": 6.448906101372469, "grad_norm": 4.213997840881348, "learning_rate": 1.941585133849708e-05, "loss": 2.0166, "step": 94915 }, { "epoch": 6.449245821443131, "grad_norm": 3.203395366668701, "learning_rate": 1.9411604837613805e-05, "loss": 2.2711, "step": 94920 }, { "epoch": 6.449585541513793, "grad_norm": 3.367988348007202, "learning_rate": 1.9407358336730537e-05, "loss": 2.0858, "step": 94925 }, { "epoch": 6.449925261584454, "grad_norm": 3.4389567375183105, "learning_rate": 1.940311183584726e-05, "loss": 2.1099, "step": 94930 }, { "epoch": 6.450264981655116, "grad_norm": 4.102692604064941, "learning_rate": 1.939886533496399e-05, "loss": 2.2218, "step": 94935 }, { "epoch": 6.450604701725778, "grad_norm": 3.7856767177581787, "learning_rate": 1.9394618834080717e-05, "loss": 2.1734, "step": 94940 }, { "epoch": 6.450944421796439, "grad_norm": 3.0665464401245117, "learning_rate": 1.9390372333197445e-05, "loss": 2.0285, "step": 94945 }, { "epoch": 6.451284141867101, "grad_norm": 3.0360286235809326, "learning_rate": 1.9386125832314177e-05, "loss": 2.1069, "step": 94950 }, { "epoch": 6.4516238619377635, "grad_norm": 4.26809549331665, "learning_rate": 1.93818793314309e-05, "loss": 1.9928, "step": 94955 }, { "epoch": 6.451963582008425, "grad_norm": 3.2482430934906006, "learning_rate": 1.937763283054763e-05, "loss": 2.2708, "step": 94960 }, { "epoch": 6.452303302079087, "grad_norm": 4.394559383392334, "learning_rate": 1.9373386329664357e-05, "loss": 2.3191, "step": 94965 }, { "epoch": 6.452643022149749, "grad_norm": 3.688511371612549, "learning_rate": 1.9369139828781085e-05, "loss": 2.3433, "step": 94970 }, { "epoch": 6.45298274222041, "grad_norm": 3.6274831295013428, "learning_rate": 1.9364893327897813e-05, "loss": 2.1443, "step": 94975 }, { "epoch": 6.453322462291072, "grad_norm": 3.2854628562927246, "learning_rate": 1.936064682701454e-05, "loss": 1.7742, "step": 94980 }, { "epoch": 6.453662182361734, "grad_norm": 4.1897711753845215, "learning_rate": 1.935640032613127e-05, "loss": 2.1198, "step": 94985 }, { "epoch": 6.454001902432395, "grad_norm": 3.698617935180664, "learning_rate": 1.9352153825247994e-05, "loss": 2.2071, "step": 94990 }, { "epoch": 6.454341622503057, "grad_norm": 3.870532274246216, "learning_rate": 1.9347907324364725e-05, "loss": 2.1662, "step": 94995 }, { "epoch": 6.4546813425737195, "grad_norm": 2.5737576484680176, "learning_rate": 1.9343660823481453e-05, "loss": 1.9139, "step": 95000 }, { "epoch": 6.455021062644381, "grad_norm": 3.3759799003601074, "learning_rate": 1.9339414322598178e-05, "loss": 2.2901, "step": 95005 }, { "epoch": 6.455360782715043, "grad_norm": 3.771054267883301, "learning_rate": 1.933516782171491e-05, "loss": 2.2488, "step": 95010 }, { "epoch": 6.455700502785705, "grad_norm": 3.1751303672790527, "learning_rate": 1.9330921320831634e-05, "loss": 1.9668, "step": 95015 }, { "epoch": 6.456040222856366, "grad_norm": 3.3751354217529297, "learning_rate": 1.9326674819948362e-05, "loss": 2.066, "step": 95020 }, { "epoch": 6.456379942927028, "grad_norm": 2.998173475265503, "learning_rate": 1.9322428319065093e-05, "loss": 2.2981, "step": 95025 }, { "epoch": 6.45671966299769, "grad_norm": 3.9175398349761963, "learning_rate": 1.9318181818181818e-05, "loss": 2.0463, "step": 95030 }, { "epoch": 6.457059383068351, "grad_norm": 3.4038777351379395, "learning_rate": 1.931393531729855e-05, "loss": 2.0132, "step": 95035 }, { "epoch": 6.457399103139013, "grad_norm": 3.8901846408843994, "learning_rate": 1.9309688816415274e-05, "loss": 2.0141, "step": 95040 }, { "epoch": 6.4577388232096755, "grad_norm": 3.371150016784668, "learning_rate": 1.9305442315532002e-05, "loss": 2.1544, "step": 95045 }, { "epoch": 6.458078543280337, "grad_norm": 3.192152976989746, "learning_rate": 1.930119581464873e-05, "loss": 2.0894, "step": 95050 }, { "epoch": 6.458418263350999, "grad_norm": 3.232654333114624, "learning_rate": 1.9296949313765458e-05, "loss": 1.7653, "step": 95055 }, { "epoch": 6.458757983421661, "grad_norm": 4.63222599029541, "learning_rate": 1.9292702812882186e-05, "loss": 2.2909, "step": 95060 }, { "epoch": 6.459097703492322, "grad_norm": 3.3895490169525146, "learning_rate": 1.9288456311998914e-05, "loss": 2.2362, "step": 95065 }, { "epoch": 6.459437423562984, "grad_norm": 3.9075474739074707, "learning_rate": 1.9284209811115642e-05, "loss": 1.9227, "step": 95070 }, { "epoch": 6.459777143633646, "grad_norm": 3.8909828662872314, "learning_rate": 1.927996331023237e-05, "loss": 2.2968, "step": 95075 }, { "epoch": 6.460116863704307, "grad_norm": 3.716146230697632, "learning_rate": 1.9275716809349098e-05, "loss": 2.2764, "step": 95080 }, { "epoch": 6.4604565837749695, "grad_norm": 4.035094738006592, "learning_rate": 1.9271470308465826e-05, "loss": 2.1242, "step": 95085 }, { "epoch": 6.4607963038456315, "grad_norm": 3.894857406616211, "learning_rate": 1.926722380758255e-05, "loss": 2.3266, "step": 95090 }, { "epoch": 6.461136023916293, "grad_norm": 3.594865083694458, "learning_rate": 1.9262977306699282e-05, "loss": 2.075, "step": 95095 }, { "epoch": 6.461475743986955, "grad_norm": 3.0831849575042725, "learning_rate": 1.9258730805816007e-05, "loss": 2.0303, "step": 95100 }, { "epoch": 6.461815464057617, "grad_norm": 3.7542052268981934, "learning_rate": 1.9254484304932735e-05, "loss": 1.9952, "step": 95105 }, { "epoch": 6.462155184128278, "grad_norm": 3.1839981079101562, "learning_rate": 1.9250237804049466e-05, "loss": 1.8447, "step": 95110 }, { "epoch": 6.46249490419894, "grad_norm": 2.8617279529571533, "learning_rate": 1.924599130316619e-05, "loss": 2.2685, "step": 95115 }, { "epoch": 6.462834624269602, "grad_norm": 2.9955801963806152, "learning_rate": 1.9241744802282922e-05, "loss": 2.0581, "step": 95120 }, { "epoch": 6.463174344340263, "grad_norm": 2.9036028385162354, "learning_rate": 1.9237498301399647e-05, "loss": 2.2387, "step": 95125 }, { "epoch": 6.4635140644109255, "grad_norm": 3.86950945854187, "learning_rate": 1.9233251800516375e-05, "loss": 2.0492, "step": 95130 }, { "epoch": 6.4638537844815875, "grad_norm": 3.262397050857544, "learning_rate": 1.9229005299633103e-05, "loss": 2.0221, "step": 95135 }, { "epoch": 6.464193504552249, "grad_norm": 3.5171825885772705, "learning_rate": 1.922475879874983e-05, "loss": 2.1919, "step": 95140 }, { "epoch": 6.464533224622911, "grad_norm": 3.5672190189361572, "learning_rate": 1.922051229786656e-05, "loss": 2.1483, "step": 95145 }, { "epoch": 6.464872944693573, "grad_norm": 2.5602614879608154, "learning_rate": 1.9216265796983287e-05, "loss": 2.2984, "step": 95150 }, { "epoch": 6.465212664764234, "grad_norm": 3.1865110397338867, "learning_rate": 1.9212019296100015e-05, "loss": 1.966, "step": 95155 }, { "epoch": 6.465552384834896, "grad_norm": 3.6886138916015625, "learning_rate": 1.9207772795216743e-05, "loss": 2.1865, "step": 95160 }, { "epoch": 6.465892104905558, "grad_norm": 4.268673896789551, "learning_rate": 1.920352629433347e-05, "loss": 1.8994, "step": 95165 }, { "epoch": 6.466231824976219, "grad_norm": 3.895230531692505, "learning_rate": 1.91992797934502e-05, "loss": 1.9157, "step": 95170 }, { "epoch": 6.4665715450468815, "grad_norm": 3.387629508972168, "learning_rate": 1.9195033292566923e-05, "loss": 2.3535, "step": 95175 }, { "epoch": 6.4669112651175436, "grad_norm": 4.523082256317139, "learning_rate": 1.9190786791683655e-05, "loss": 1.9271, "step": 95180 }, { "epoch": 6.467250985188205, "grad_norm": 3.298243522644043, "learning_rate": 1.918654029080038e-05, "loss": 2.1643, "step": 95185 }, { "epoch": 6.467590705258867, "grad_norm": 4.585902690887451, "learning_rate": 1.9182293789917108e-05, "loss": 1.9154, "step": 95190 }, { "epoch": 6.467930425329529, "grad_norm": 3.8459506034851074, "learning_rate": 1.917804728903384e-05, "loss": 2.0818, "step": 95195 }, { "epoch": 6.46827014540019, "grad_norm": 4.048365592956543, "learning_rate": 1.9173800788150564e-05, "loss": 2.2494, "step": 95200 }, { "epoch": 6.468609865470852, "grad_norm": 3.8199803829193115, "learning_rate": 1.9169554287267295e-05, "loss": 2.2659, "step": 95205 }, { "epoch": 6.468949585541514, "grad_norm": 4.102900981903076, "learning_rate": 1.916530778638402e-05, "loss": 1.9935, "step": 95210 }, { "epoch": 6.469289305612175, "grad_norm": 3.624569892883301, "learning_rate": 1.9161061285500748e-05, "loss": 2.1433, "step": 95215 }, { "epoch": 6.4696290256828375, "grad_norm": 5.238467693328857, "learning_rate": 1.915681478461748e-05, "loss": 2.0826, "step": 95220 }, { "epoch": 6.4699687457535, "grad_norm": 3.898024797439575, "learning_rate": 1.9152568283734204e-05, "loss": 2.0257, "step": 95225 }, { "epoch": 6.470308465824161, "grad_norm": 3.735314130783081, "learning_rate": 1.914832178285093e-05, "loss": 2.3264, "step": 95230 }, { "epoch": 6.470648185894823, "grad_norm": 3.8173880577087402, "learning_rate": 1.914407528196766e-05, "loss": 2.3702, "step": 95235 }, { "epoch": 6.470987905965485, "grad_norm": 3.42356538772583, "learning_rate": 1.9139828781084388e-05, "loss": 2.2942, "step": 95240 }, { "epoch": 6.471327626036146, "grad_norm": 4.521291255950928, "learning_rate": 1.9135582280201116e-05, "loss": 2.2602, "step": 95245 }, { "epoch": 6.471667346106808, "grad_norm": 3.162463665008545, "learning_rate": 1.9131335779317844e-05, "loss": 2.0249, "step": 95250 }, { "epoch": 6.47200706617747, "grad_norm": 4.040256977081299, "learning_rate": 1.912708927843457e-05, "loss": 2.0116, "step": 95255 }, { "epoch": 6.472346786248131, "grad_norm": 3.542656183242798, "learning_rate": 1.9122842777551296e-05, "loss": 2.1857, "step": 95260 }, { "epoch": 6.4726865063187935, "grad_norm": 3.083170175552368, "learning_rate": 1.9118596276668028e-05, "loss": 2.1012, "step": 95265 }, { "epoch": 6.473026226389455, "grad_norm": 2.6925108432769775, "learning_rate": 1.9114349775784756e-05, "loss": 2.1063, "step": 95270 }, { "epoch": 6.473365946460117, "grad_norm": 4.608455657958984, "learning_rate": 1.911010327490148e-05, "loss": 1.9509, "step": 95275 }, { "epoch": 6.473705666530779, "grad_norm": 3.1919896602630615, "learning_rate": 1.910585677401821e-05, "loss": 2.2718, "step": 95280 }, { "epoch": 6.47404538660144, "grad_norm": 3.2912707328796387, "learning_rate": 1.9101610273134936e-05, "loss": 2.1731, "step": 95285 }, { "epoch": 6.474385106672102, "grad_norm": 3.065650224685669, "learning_rate": 1.9097363772251668e-05, "loss": 1.962, "step": 95290 }, { "epoch": 6.474724826742764, "grad_norm": 3.7664005756378174, "learning_rate": 1.9093117271368392e-05, "loss": 1.8576, "step": 95295 }, { "epoch": 6.475064546813425, "grad_norm": 3.9419615268707275, "learning_rate": 1.908887077048512e-05, "loss": 2.2505, "step": 95300 }, { "epoch": 6.475404266884087, "grad_norm": 3.19630765914917, "learning_rate": 1.908462426960185e-05, "loss": 2.2766, "step": 95305 }, { "epoch": 6.4757439869547495, "grad_norm": 3.810654640197754, "learning_rate": 1.9080377768718576e-05, "loss": 2.3519, "step": 95310 }, { "epoch": 6.476083707025411, "grad_norm": 3.580085039138794, "learning_rate": 1.9076131267835304e-05, "loss": 2.0117, "step": 95315 }, { "epoch": 6.476423427096073, "grad_norm": 3.642860174179077, "learning_rate": 1.9071884766952032e-05, "loss": 1.9197, "step": 95320 }, { "epoch": 6.476763147166735, "grad_norm": 4.083498001098633, "learning_rate": 1.906763826606876e-05, "loss": 2.0484, "step": 95325 }, { "epoch": 6.477102867237396, "grad_norm": 3.517944574356079, "learning_rate": 1.906339176518549e-05, "loss": 2.2139, "step": 95330 }, { "epoch": 6.477442587308058, "grad_norm": 2.527034044265747, "learning_rate": 1.9059145264302216e-05, "loss": 2.2804, "step": 95335 }, { "epoch": 6.47778230737872, "grad_norm": 4.569056987762451, "learning_rate": 1.9054898763418944e-05, "loss": 2.0207, "step": 95340 }, { "epoch": 6.478122027449381, "grad_norm": 3.578054666519165, "learning_rate": 1.905065226253567e-05, "loss": 2.0903, "step": 95345 }, { "epoch": 6.4784617475200434, "grad_norm": 2.877779006958008, "learning_rate": 1.90464057616524e-05, "loss": 2.1474, "step": 95350 }, { "epoch": 6.4788014675907055, "grad_norm": 4.369006633758545, "learning_rate": 1.904215926076913e-05, "loss": 2.0849, "step": 95355 }, { "epoch": 6.479141187661367, "grad_norm": 4.060521125793457, "learning_rate": 1.9037912759885853e-05, "loss": 1.8554, "step": 95360 }, { "epoch": 6.479480907732029, "grad_norm": 3.3344547748565674, "learning_rate": 1.9033666259002584e-05, "loss": 2.1314, "step": 95365 }, { "epoch": 6.479820627802691, "grad_norm": 4.649119853973389, "learning_rate": 1.902941975811931e-05, "loss": 2.1053, "step": 95370 }, { "epoch": 6.480160347873352, "grad_norm": 4.211641311645508, "learning_rate": 1.902517325723604e-05, "loss": 1.7368, "step": 95375 }, { "epoch": 6.480500067944014, "grad_norm": 3.672349214553833, "learning_rate": 1.9020926756352765e-05, "loss": 2.1012, "step": 95380 }, { "epoch": 6.480839788014676, "grad_norm": 2.9657154083251953, "learning_rate": 1.9016680255469493e-05, "loss": 2.1181, "step": 95385 }, { "epoch": 6.481179508085337, "grad_norm": 3.3747317790985107, "learning_rate": 1.9012433754586224e-05, "loss": 2.1296, "step": 95390 }, { "epoch": 6.4815192281559995, "grad_norm": 3.135427713394165, "learning_rate": 1.900818725370295e-05, "loss": 2.1802, "step": 95395 }, { "epoch": 6.4818589482266615, "grad_norm": 4.373799800872803, "learning_rate": 1.9003940752819677e-05, "loss": 2.0374, "step": 95400 }, { "epoch": 6.482198668297323, "grad_norm": 3.2332851886749268, "learning_rate": 1.8999694251936405e-05, "loss": 2.119, "step": 95405 }, { "epoch": 6.482538388367985, "grad_norm": 3.6330337524414062, "learning_rate": 1.8995447751053133e-05, "loss": 2.0602, "step": 95410 }, { "epoch": 6.482878108438647, "grad_norm": 3.543044328689575, "learning_rate": 1.899120125016986e-05, "loss": 2.0237, "step": 95415 }, { "epoch": 6.483217828509308, "grad_norm": 3.7437450885772705, "learning_rate": 1.898695474928659e-05, "loss": 2.0291, "step": 95420 }, { "epoch": 6.48355754857997, "grad_norm": 2.698585033416748, "learning_rate": 1.8982708248403317e-05, "loss": 2.1079, "step": 95425 }, { "epoch": 6.483897268650632, "grad_norm": 3.6524479389190674, "learning_rate": 1.8978461747520042e-05, "loss": 2.1179, "step": 95430 }, { "epoch": 6.484236988721293, "grad_norm": 3.5546576976776123, "learning_rate": 1.8974215246636773e-05, "loss": 1.9642, "step": 95435 }, { "epoch": 6.4845767087919555, "grad_norm": 2.8690171241760254, "learning_rate": 1.89699687457535e-05, "loss": 2.2035, "step": 95440 }, { "epoch": 6.4849164288626175, "grad_norm": 3.487391710281372, "learning_rate": 1.8965722244870226e-05, "loss": 1.9949, "step": 95445 }, { "epoch": 6.485256148933279, "grad_norm": 4.446056842803955, "learning_rate": 1.8961475743986957e-05, "loss": 2.0292, "step": 95450 }, { "epoch": 6.485595869003941, "grad_norm": 3.3389484882354736, "learning_rate": 1.8957229243103682e-05, "loss": 2.0792, "step": 95455 }, { "epoch": 6.485935589074603, "grad_norm": 3.1626241207122803, "learning_rate": 1.8952982742220413e-05, "loss": 2.0758, "step": 95460 }, { "epoch": 6.486275309145264, "grad_norm": 4.144272804260254, "learning_rate": 1.8948736241337138e-05, "loss": 2.2684, "step": 95465 }, { "epoch": 6.486615029215926, "grad_norm": 3.4362995624542236, "learning_rate": 1.8944489740453866e-05, "loss": 2.2267, "step": 95470 }, { "epoch": 6.486954749286588, "grad_norm": 4.5383405685424805, "learning_rate": 1.8940243239570597e-05, "loss": 2.3115, "step": 95475 }, { "epoch": 6.487294469357249, "grad_norm": 3.0937697887420654, "learning_rate": 1.8935996738687322e-05, "loss": 2.073, "step": 95480 }, { "epoch": 6.4876341894279115, "grad_norm": 3.5905346870422363, "learning_rate": 1.893175023780405e-05, "loss": 2.2142, "step": 95485 }, { "epoch": 6.4879739094985736, "grad_norm": 4.286020278930664, "learning_rate": 1.8927503736920778e-05, "loss": 1.9544, "step": 95490 }, { "epoch": 6.488313629569235, "grad_norm": 2.898458480834961, "learning_rate": 1.8923257236037506e-05, "loss": 2.1255, "step": 95495 }, { "epoch": 6.488653349639897, "grad_norm": 3.86836838722229, "learning_rate": 1.8919010735154234e-05, "loss": 2.015, "step": 95500 }, { "epoch": 6.488993069710559, "grad_norm": 3.7975642681121826, "learning_rate": 1.8914764234270962e-05, "loss": 1.8904, "step": 95505 }, { "epoch": 6.48933278978122, "grad_norm": 2.846243143081665, "learning_rate": 1.891051773338769e-05, "loss": 2.2256, "step": 95510 }, { "epoch": 6.489672509851882, "grad_norm": 4.271748065948486, "learning_rate": 1.8906271232504414e-05, "loss": 2.2169, "step": 95515 }, { "epoch": 6.490012229922544, "grad_norm": 3.327059745788574, "learning_rate": 1.8902024731621146e-05, "loss": 2.377, "step": 95520 }, { "epoch": 6.490351949993205, "grad_norm": 4.6065263748168945, "learning_rate": 1.8897778230737874e-05, "loss": 1.8593, "step": 95525 }, { "epoch": 6.4906916700638675, "grad_norm": 3.226199150085449, "learning_rate": 1.88935317298546e-05, "loss": 1.8223, "step": 95530 }, { "epoch": 6.491031390134529, "grad_norm": 3.4021780490875244, "learning_rate": 1.888928522897133e-05, "loss": 2.023, "step": 95535 }, { "epoch": 6.491371110205191, "grad_norm": 3.935877561569214, "learning_rate": 1.8885038728088054e-05, "loss": 2.1865, "step": 95540 }, { "epoch": 6.491710830275853, "grad_norm": 3.6963460445404053, "learning_rate": 1.8880792227204786e-05, "loss": 2.1369, "step": 95545 }, { "epoch": 6.492050550346514, "grad_norm": 3.741539716720581, "learning_rate": 1.8876545726321514e-05, "loss": 2.0083, "step": 95550 }, { "epoch": 6.492390270417176, "grad_norm": 3.8868088722229004, "learning_rate": 1.887229922543824e-05, "loss": 2.0726, "step": 95555 }, { "epoch": 6.492729990487838, "grad_norm": 4.564487934112549, "learning_rate": 1.886805272455497e-05, "loss": 2.0998, "step": 95560 }, { "epoch": 6.493069710558499, "grad_norm": 3.6447556018829346, "learning_rate": 1.8863806223671695e-05, "loss": 2.2113, "step": 95565 }, { "epoch": 6.493409430629161, "grad_norm": 5.051499366760254, "learning_rate": 1.8859559722788423e-05, "loss": 2.1386, "step": 95570 }, { "epoch": 6.4937491506998235, "grad_norm": 4.459594249725342, "learning_rate": 1.885531322190515e-05, "loss": 2.3117, "step": 95575 }, { "epoch": 6.494088870770485, "grad_norm": 3.4324796199798584, "learning_rate": 1.885106672102188e-05, "loss": 2.1754, "step": 95580 }, { "epoch": 6.494428590841147, "grad_norm": 3.8217344284057617, "learning_rate": 1.8846820220138607e-05, "loss": 2.0957, "step": 95585 }, { "epoch": 6.494768310911809, "grad_norm": 3.6461384296417236, "learning_rate": 1.8842573719255335e-05, "loss": 2.0709, "step": 95590 }, { "epoch": 6.49510803098247, "grad_norm": 3.235736131668091, "learning_rate": 1.8838327218372063e-05, "loss": 2.0269, "step": 95595 }, { "epoch": 6.495447751053132, "grad_norm": 3.1157047748565674, "learning_rate": 1.883408071748879e-05, "loss": 2.0125, "step": 95600 }, { "epoch": 6.495787471123794, "grad_norm": 3.4784815311431885, "learning_rate": 1.882983421660552e-05, "loss": 1.9945, "step": 95605 }, { "epoch": 6.496127191194455, "grad_norm": 4.291703224182129, "learning_rate": 1.8825587715722247e-05, "loss": 1.9495, "step": 95610 }, { "epoch": 6.496466911265117, "grad_norm": 2.9226324558258057, "learning_rate": 1.882134121483897e-05, "loss": 2.0987, "step": 95615 }, { "epoch": 6.4968066313357795, "grad_norm": 3.992403030395508, "learning_rate": 1.8817094713955703e-05, "loss": 2.1982, "step": 95620 }, { "epoch": 6.497146351406441, "grad_norm": 3.4504597187042236, "learning_rate": 1.8812848213072427e-05, "loss": 1.9497, "step": 95625 }, { "epoch": 6.497486071477103, "grad_norm": 3.5039961338043213, "learning_rate": 1.880860171218916e-05, "loss": 2.1207, "step": 95630 }, { "epoch": 6.497825791547765, "grad_norm": 3.3261706829071045, "learning_rate": 1.8804355211305887e-05, "loss": 2.2981, "step": 95635 }, { "epoch": 6.498165511618426, "grad_norm": 3.778762102127075, "learning_rate": 1.880010871042261e-05, "loss": 2.0492, "step": 95640 }, { "epoch": 6.498505231689088, "grad_norm": 3.44608211517334, "learning_rate": 1.8795862209539343e-05, "loss": 2.2278, "step": 95645 }, { "epoch": 6.49884495175975, "grad_norm": 3.9674062728881836, "learning_rate": 1.8791615708656067e-05, "loss": 2.2845, "step": 95650 }, { "epoch": 6.499184671830411, "grad_norm": 3.7045705318450928, "learning_rate": 1.8787369207772795e-05, "loss": 2.1252, "step": 95655 }, { "epoch": 6.4995243919010735, "grad_norm": 3.8113255500793457, "learning_rate": 1.8783122706889523e-05, "loss": 1.9235, "step": 95660 }, { "epoch": 6.4998641119717355, "grad_norm": 3.904933214187622, "learning_rate": 1.877887620600625e-05, "loss": 2.2261, "step": 95665 }, { "epoch": 6.500203832042397, "grad_norm": 4.624876976013184, "learning_rate": 1.877462970512298e-05, "loss": 2.1251, "step": 95670 }, { "epoch": 6.500543552113059, "grad_norm": 5.223688125610352, "learning_rate": 1.8770383204239707e-05, "loss": 2.18, "step": 95675 }, { "epoch": 6.500883272183721, "grad_norm": 3.3402974605560303, "learning_rate": 1.8766136703356435e-05, "loss": 1.8278, "step": 95680 }, { "epoch": 6.501222992254382, "grad_norm": 3.127049446105957, "learning_rate": 1.8761890202473163e-05, "loss": 1.9942, "step": 95685 }, { "epoch": 6.501562712325044, "grad_norm": 3.577497959136963, "learning_rate": 1.875764370158989e-05, "loss": 2.2077, "step": 95690 }, { "epoch": 6.501902432395706, "grad_norm": 3.5746638774871826, "learning_rate": 1.875339720070662e-05, "loss": 2.0595, "step": 95695 }, { "epoch": 6.502242152466367, "grad_norm": 3.6395678520202637, "learning_rate": 1.8749150699823344e-05, "loss": 2.1687, "step": 95700 }, { "epoch": 6.5025818725370295, "grad_norm": 4.838028907775879, "learning_rate": 1.8744904198940075e-05, "loss": 2.2135, "step": 95705 }, { "epoch": 6.5029215926076915, "grad_norm": 4.436151027679443, "learning_rate": 1.87406576980568e-05, "loss": 2.2251, "step": 95710 }, { "epoch": 6.503261312678353, "grad_norm": 3.8059206008911133, "learning_rate": 1.873641119717353e-05, "loss": 1.9532, "step": 95715 }, { "epoch": 6.503601032749015, "grad_norm": 4.259912967681885, "learning_rate": 1.873216469629026e-05, "loss": 2.2202, "step": 95720 }, { "epoch": 6.503940752819677, "grad_norm": 3.6007509231567383, "learning_rate": 1.8727918195406984e-05, "loss": 2.3391, "step": 95725 }, { "epoch": 6.504280472890338, "grad_norm": 4.7105278968811035, "learning_rate": 1.8723671694523715e-05, "loss": 2.1419, "step": 95730 }, { "epoch": 6.504620192961, "grad_norm": 3.766613245010376, "learning_rate": 1.871942519364044e-05, "loss": 2.131, "step": 95735 }, { "epoch": 6.504959913031662, "grad_norm": 3.365901470184326, "learning_rate": 1.8715178692757168e-05, "loss": 2.0404, "step": 95740 }, { "epoch": 6.505299633102323, "grad_norm": 3.1957550048828125, "learning_rate": 1.87109321918739e-05, "loss": 2.3063, "step": 95745 }, { "epoch": 6.5056393531729855, "grad_norm": 3.436791181564331, "learning_rate": 1.8706685690990624e-05, "loss": 1.9574, "step": 95750 }, { "epoch": 6.5059790732436475, "grad_norm": 4.02983283996582, "learning_rate": 1.8702439190107352e-05, "loss": 2.0473, "step": 95755 }, { "epoch": 6.506318793314309, "grad_norm": 3.550360679626465, "learning_rate": 1.869819268922408e-05, "loss": 2.1472, "step": 95760 }, { "epoch": 6.506658513384971, "grad_norm": 3.4163923263549805, "learning_rate": 1.8693946188340808e-05, "loss": 2.2523, "step": 95765 }, { "epoch": 6.506998233455633, "grad_norm": 3.5621631145477295, "learning_rate": 1.8689699687457536e-05, "loss": 2.0697, "step": 95770 }, { "epoch": 6.507337953526294, "grad_norm": 3.4846079349517822, "learning_rate": 1.8685453186574264e-05, "loss": 2.0378, "step": 95775 }, { "epoch": 6.507677673596956, "grad_norm": 3.334763288497925, "learning_rate": 1.8681206685690992e-05, "loss": 2.3823, "step": 95780 }, { "epoch": 6.508017393667618, "grad_norm": 2.6367664337158203, "learning_rate": 1.8676960184807717e-05, "loss": 2.3614, "step": 95785 }, { "epoch": 6.508357113738279, "grad_norm": 4.053577423095703, "learning_rate": 1.8672713683924448e-05, "loss": 2.1934, "step": 95790 }, { "epoch": 6.5086968338089415, "grad_norm": 3.8591208457946777, "learning_rate": 1.8668467183041176e-05, "loss": 2.143, "step": 95795 }, { "epoch": 6.509036553879604, "grad_norm": 3.2111058235168457, "learning_rate": 1.8664220682157904e-05, "loss": 1.9789, "step": 95800 }, { "epoch": 6.509376273950265, "grad_norm": 4.23344612121582, "learning_rate": 1.8659974181274632e-05, "loss": 2.1233, "step": 95805 }, { "epoch": 6.509715994020927, "grad_norm": 4.072391986846924, "learning_rate": 1.8655727680391357e-05, "loss": 1.9987, "step": 95810 }, { "epoch": 6.510055714091589, "grad_norm": 3.3084464073181152, "learning_rate": 1.8651481179508088e-05, "loss": 2.0808, "step": 95815 }, { "epoch": 6.51039543416225, "grad_norm": 3.4479925632476807, "learning_rate": 1.8647234678624813e-05, "loss": 2.1843, "step": 95820 }, { "epoch": 6.510735154232912, "grad_norm": 3.324324607849121, "learning_rate": 1.864298817774154e-05, "loss": 1.9614, "step": 95825 }, { "epoch": 6.511074874303574, "grad_norm": 4.342982292175293, "learning_rate": 1.8638741676858272e-05, "loss": 2.143, "step": 95830 }, { "epoch": 6.511414594374235, "grad_norm": 3.1953117847442627, "learning_rate": 1.8634495175974997e-05, "loss": 1.9067, "step": 95835 }, { "epoch": 6.5117543144448975, "grad_norm": 4.204885005950928, "learning_rate": 1.8630248675091725e-05, "loss": 1.8646, "step": 95840 }, { "epoch": 6.51209403451556, "grad_norm": 3.0078790187835693, "learning_rate": 1.8626002174208453e-05, "loss": 2.153, "step": 95845 }, { "epoch": 6.512433754586221, "grad_norm": 2.9715921878814697, "learning_rate": 1.862175567332518e-05, "loss": 2.3547, "step": 95850 }, { "epoch": 6.512773474656883, "grad_norm": 3.7839884757995605, "learning_rate": 1.861750917244191e-05, "loss": 2.0866, "step": 95855 }, { "epoch": 6.513113194727545, "grad_norm": 4.185667514801025, "learning_rate": 1.8613262671558637e-05, "loss": 2.1503, "step": 95860 }, { "epoch": 6.513452914798206, "grad_norm": 3.807197332382202, "learning_rate": 1.8609016170675365e-05, "loss": 1.9842, "step": 95865 }, { "epoch": 6.513792634868868, "grad_norm": 3.343353509902954, "learning_rate": 1.860476966979209e-05, "loss": 2.0018, "step": 95870 }, { "epoch": 6.51413235493953, "grad_norm": 3.744659185409546, "learning_rate": 1.860052316890882e-05, "loss": 2.032, "step": 95875 }, { "epoch": 6.514472075010191, "grad_norm": 4.251512050628662, "learning_rate": 1.859627666802555e-05, "loss": 2.2083, "step": 95880 }, { "epoch": 6.5148117950808535, "grad_norm": 4.120248317718506, "learning_rate": 1.8592030167142277e-05, "loss": 1.9677, "step": 95885 }, { "epoch": 6.515151515151516, "grad_norm": 4.212965488433838, "learning_rate": 1.8587783666259005e-05, "loss": 2.3032, "step": 95890 }, { "epoch": 6.515491235222177, "grad_norm": 4.20113468170166, "learning_rate": 1.858353716537573e-05, "loss": 2.1488, "step": 95895 }, { "epoch": 6.515830955292839, "grad_norm": 3.5931873321533203, "learning_rate": 1.857929066449246e-05, "loss": 2.1929, "step": 95900 }, { "epoch": 6.516170675363501, "grad_norm": 4.049809455871582, "learning_rate": 1.8575044163609185e-05, "loss": 2.0455, "step": 95905 }, { "epoch": 6.516510395434162, "grad_norm": 4.297261714935303, "learning_rate": 1.8570797662725914e-05, "loss": 2.2627, "step": 95910 }, { "epoch": 6.516850115504824, "grad_norm": 3.40942645072937, "learning_rate": 1.8566551161842645e-05, "loss": 2.4186, "step": 95915 }, { "epoch": 6.517189835575486, "grad_norm": 2.97308087348938, "learning_rate": 1.856230466095937e-05, "loss": 2.1215, "step": 95920 }, { "epoch": 6.517529555646147, "grad_norm": 4.109424591064453, "learning_rate": 1.8558058160076098e-05, "loss": 1.9334, "step": 95925 }, { "epoch": 6.5178692757168095, "grad_norm": 3.4850242137908936, "learning_rate": 1.8553811659192826e-05, "loss": 1.9619, "step": 95930 }, { "epoch": 6.518208995787472, "grad_norm": 3.1655173301696777, "learning_rate": 1.8549565158309554e-05, "loss": 1.8746, "step": 95935 }, { "epoch": 6.518548715858133, "grad_norm": 4.4580607414245605, "learning_rate": 1.854531865742628e-05, "loss": 2.1278, "step": 95940 }, { "epoch": 6.518888435928795, "grad_norm": 3.601842164993286, "learning_rate": 1.854107215654301e-05, "loss": 2.4141, "step": 95945 }, { "epoch": 6.519228155999457, "grad_norm": 3.812654972076416, "learning_rate": 1.8536825655659738e-05, "loss": 2.1554, "step": 95950 }, { "epoch": 6.519567876070118, "grad_norm": 3.802602529525757, "learning_rate": 1.8532579154776462e-05, "loss": 2.1777, "step": 95955 }, { "epoch": 6.51990759614078, "grad_norm": 3.2156870365142822, "learning_rate": 1.8528332653893194e-05, "loss": 2.1174, "step": 95960 }, { "epoch": 6.520247316211442, "grad_norm": 4.119558334350586, "learning_rate": 1.852408615300992e-05, "loss": 1.8645, "step": 95965 }, { "epoch": 6.5205870362821035, "grad_norm": 3.005439281463623, "learning_rate": 1.851983965212665e-05, "loss": 2.1722, "step": 95970 }, { "epoch": 6.5209267563527655, "grad_norm": 3.2707905769348145, "learning_rate": 1.8515593151243378e-05, "loss": 1.9032, "step": 95975 }, { "epoch": 6.521266476423427, "grad_norm": 4.072278022766113, "learning_rate": 1.8511346650360102e-05, "loss": 1.8575, "step": 95980 }, { "epoch": 6.521606196494089, "grad_norm": 2.9843642711639404, "learning_rate": 1.8507100149476834e-05, "loss": 2.0825, "step": 95985 }, { "epoch": 6.521945916564751, "grad_norm": 3.1420907974243164, "learning_rate": 1.850285364859356e-05, "loss": 2.0823, "step": 95990 }, { "epoch": 6.522285636635412, "grad_norm": 4.034025192260742, "learning_rate": 1.8498607147710286e-05, "loss": 2.1266, "step": 95995 }, { "epoch": 6.522625356706074, "grad_norm": 3.4263405799865723, "learning_rate": 1.8494360646827018e-05, "loss": 2.0723, "step": 96000 }, { "epoch": 6.522965076776736, "grad_norm": 3.238323450088501, "learning_rate": 1.8490114145943742e-05, "loss": 1.9018, "step": 96005 }, { "epoch": 6.523304796847397, "grad_norm": 3.5806047916412354, "learning_rate": 1.848586764506047e-05, "loss": 2.1282, "step": 96010 }, { "epoch": 6.5236445169180595, "grad_norm": 4.255963325500488, "learning_rate": 1.8481621144177198e-05, "loss": 2.2089, "step": 96015 }, { "epoch": 6.5239842369887215, "grad_norm": 3.6051433086395264, "learning_rate": 1.8477374643293926e-05, "loss": 2.1548, "step": 96020 }, { "epoch": 6.524323957059383, "grad_norm": 3.371272087097168, "learning_rate": 1.8473128142410654e-05, "loss": 2.0923, "step": 96025 }, { "epoch": 6.524663677130045, "grad_norm": 3.5998497009277344, "learning_rate": 1.8468881641527382e-05, "loss": 2.049, "step": 96030 }, { "epoch": 6.525003397200707, "grad_norm": 3.6502654552459717, "learning_rate": 1.846463514064411e-05, "loss": 1.9957, "step": 96035 }, { "epoch": 6.525343117271368, "grad_norm": 4.089052677154541, "learning_rate": 1.846038863976084e-05, "loss": 2.006, "step": 96040 }, { "epoch": 6.52568283734203, "grad_norm": 3.31634259223938, "learning_rate": 1.8456142138877566e-05, "loss": 2.1959, "step": 96045 }, { "epoch": 6.526022557412692, "grad_norm": 4.180511951446533, "learning_rate": 1.8451895637994294e-05, "loss": 2.1067, "step": 96050 }, { "epoch": 6.526362277483353, "grad_norm": 4.422313690185547, "learning_rate": 1.8447649137111022e-05, "loss": 2.271, "step": 96055 }, { "epoch": 6.5267019975540155, "grad_norm": 3.9082586765289307, "learning_rate": 1.844340263622775e-05, "loss": 2.2479, "step": 96060 }, { "epoch": 6.5270417176246776, "grad_norm": 3.20255708694458, "learning_rate": 1.8439156135344475e-05, "loss": 1.86, "step": 96065 }, { "epoch": 6.527381437695339, "grad_norm": 3.4106709957122803, "learning_rate": 1.8434909634461206e-05, "loss": 2.0024, "step": 96070 }, { "epoch": 6.527721157766001, "grad_norm": 3.2431416511535645, "learning_rate": 1.8430663133577934e-05, "loss": 2.0338, "step": 96075 }, { "epoch": 6.528060877836663, "grad_norm": 5.157329082489014, "learning_rate": 1.842641663269466e-05, "loss": 2.376, "step": 96080 }, { "epoch": 6.528400597907324, "grad_norm": 3.7474842071533203, "learning_rate": 1.842217013181139e-05, "loss": 2.2867, "step": 96085 }, { "epoch": 6.528740317977986, "grad_norm": 3.3435869216918945, "learning_rate": 1.8417923630928115e-05, "loss": 1.6382, "step": 96090 }, { "epoch": 6.529080038048648, "grad_norm": 3.029862880706787, "learning_rate": 1.8413677130044843e-05, "loss": 2.0299, "step": 96095 }, { "epoch": 6.529419758119309, "grad_norm": 3.671299457550049, "learning_rate": 1.840943062916157e-05, "loss": 2.1427, "step": 96100 }, { "epoch": 6.5297594781899715, "grad_norm": 3.1551167964935303, "learning_rate": 1.84051841282783e-05, "loss": 2.0165, "step": 96105 }, { "epoch": 6.530099198260634, "grad_norm": 4.696447849273682, "learning_rate": 1.8400937627395027e-05, "loss": 2.2575, "step": 96110 }, { "epoch": 6.530438918331295, "grad_norm": 4.1793532371521, "learning_rate": 1.8396691126511755e-05, "loss": 2.2766, "step": 96115 }, { "epoch": 6.530778638401957, "grad_norm": 3.0337605476379395, "learning_rate": 1.8392444625628483e-05, "loss": 2.1152, "step": 96120 }, { "epoch": 6.531118358472619, "grad_norm": 3.7622103691101074, "learning_rate": 1.838819812474521e-05, "loss": 2.071, "step": 96125 }, { "epoch": 6.53145807854328, "grad_norm": 4.171994686126709, "learning_rate": 1.838395162386194e-05, "loss": 2.2492, "step": 96130 }, { "epoch": 6.531797798613942, "grad_norm": 4.472647190093994, "learning_rate": 1.8379705122978667e-05, "loss": 2.1229, "step": 96135 }, { "epoch": 6.532137518684604, "grad_norm": 3.000427007675171, "learning_rate": 1.8375458622095395e-05, "loss": 1.5563, "step": 96140 }, { "epoch": 6.532477238755265, "grad_norm": 3.231482744216919, "learning_rate": 1.8371212121212123e-05, "loss": 1.9479, "step": 96145 }, { "epoch": 6.5328169588259275, "grad_norm": 3.4565370082855225, "learning_rate": 1.8366965620328848e-05, "loss": 2.1536, "step": 96150 }, { "epoch": 6.53315667889659, "grad_norm": 3.4939403533935547, "learning_rate": 1.836271911944558e-05, "loss": 1.9391, "step": 96155 }, { "epoch": 6.533496398967251, "grad_norm": 3.3104496002197266, "learning_rate": 1.8358472618562307e-05, "loss": 1.9482, "step": 96160 }, { "epoch": 6.533836119037913, "grad_norm": 3.3513407707214355, "learning_rate": 1.8354226117679032e-05, "loss": 2.2107, "step": 96165 }, { "epoch": 6.534175839108575, "grad_norm": 3.365788698196411, "learning_rate": 1.8349979616795763e-05, "loss": 1.9428, "step": 96170 }, { "epoch": 6.534515559179236, "grad_norm": 3.391422748565674, "learning_rate": 1.8345733115912488e-05, "loss": 2.3939, "step": 96175 }, { "epoch": 6.534855279249898, "grad_norm": 3.8881657123565674, "learning_rate": 1.8341486615029216e-05, "loss": 2.0058, "step": 96180 }, { "epoch": 6.535194999320559, "grad_norm": 3.3757712841033936, "learning_rate": 1.8337240114145944e-05, "loss": 2.2266, "step": 96185 }, { "epoch": 6.535534719391221, "grad_norm": 3.7359704971313477, "learning_rate": 1.8332993613262672e-05, "loss": 2.201, "step": 96190 }, { "epoch": 6.5358744394618835, "grad_norm": 3.9752607345581055, "learning_rate": 1.83287471123794e-05, "loss": 1.8763, "step": 96195 }, { "epoch": 6.536214159532545, "grad_norm": 3.9389896392822266, "learning_rate": 1.8324500611496128e-05, "loss": 1.8093, "step": 96200 }, { "epoch": 6.536553879603207, "grad_norm": 3.446775197982788, "learning_rate": 1.8320254110612856e-05, "loss": 2.334, "step": 96205 }, { "epoch": 6.536893599673869, "grad_norm": 3.7828116416931152, "learning_rate": 1.8316007609729584e-05, "loss": 2.22, "step": 96210 }, { "epoch": 6.53723331974453, "grad_norm": 3.920515775680542, "learning_rate": 1.8311761108846312e-05, "loss": 2.1998, "step": 96215 }, { "epoch": 6.537573039815192, "grad_norm": 3.4865550994873047, "learning_rate": 1.830751460796304e-05, "loss": 2.1627, "step": 96220 }, { "epoch": 6.537912759885854, "grad_norm": 4.191839218139648, "learning_rate": 1.8303268107079768e-05, "loss": 1.8824, "step": 96225 }, { "epoch": 6.538252479956515, "grad_norm": 3.920050859451294, "learning_rate": 1.8299021606196496e-05, "loss": 1.9465, "step": 96230 }, { "epoch": 6.5385922000271774, "grad_norm": 3.4744622707366943, "learning_rate": 1.829477510531322e-05, "loss": 2.0114, "step": 96235 }, { "epoch": 6.5389319200978395, "grad_norm": 3.906506061553955, "learning_rate": 1.8290528604429952e-05, "loss": 2.1377, "step": 96240 }, { "epoch": 6.539271640168501, "grad_norm": 3.398266553878784, "learning_rate": 1.828628210354668e-05, "loss": 2.2555, "step": 96245 }, { "epoch": 6.539611360239163, "grad_norm": 3.238248825073242, "learning_rate": 1.8282035602663404e-05, "loss": 1.7775, "step": 96250 }, { "epoch": 6.539951080309825, "grad_norm": 3.2072291374206543, "learning_rate": 1.8277789101780136e-05, "loss": 2.1821, "step": 96255 }, { "epoch": 6.540290800380486, "grad_norm": 3.9995622634887695, "learning_rate": 1.827354260089686e-05, "loss": 2.1424, "step": 96260 }, { "epoch": 6.540630520451148, "grad_norm": 2.746030569076538, "learning_rate": 1.826929610001359e-05, "loss": 2.308, "step": 96265 }, { "epoch": 6.54097024052181, "grad_norm": 2.909269094467163, "learning_rate": 1.826504959913032e-05, "loss": 1.8993, "step": 96270 }, { "epoch": 6.541309960592471, "grad_norm": 3.8660788536071777, "learning_rate": 1.8260803098247045e-05, "loss": 2.1382, "step": 96275 }, { "epoch": 6.5416496806631335, "grad_norm": 4.163863658905029, "learning_rate": 1.8256556597363773e-05, "loss": 1.7576, "step": 96280 }, { "epoch": 6.5419894007337955, "grad_norm": 3.267908811569214, "learning_rate": 1.82523100964805e-05, "loss": 1.8943, "step": 96285 }, { "epoch": 6.542329120804457, "grad_norm": 3.3006975650787354, "learning_rate": 1.824806359559723e-05, "loss": 2.3801, "step": 96290 }, { "epoch": 6.542668840875119, "grad_norm": 3.537869930267334, "learning_rate": 1.8243817094713957e-05, "loss": 2.3802, "step": 96295 }, { "epoch": 6.543008560945781, "grad_norm": 4.248052597045898, "learning_rate": 1.8239570593830685e-05, "loss": 2.1201, "step": 96300 }, { "epoch": 6.543348281016442, "grad_norm": 3.8612372875213623, "learning_rate": 1.8235324092947413e-05, "loss": 2.0257, "step": 96305 }, { "epoch": 6.543688001087104, "grad_norm": 3.1373775005340576, "learning_rate": 1.823107759206414e-05, "loss": 2.0149, "step": 96310 }, { "epoch": 6.544027721157766, "grad_norm": 3.8721816539764404, "learning_rate": 1.822683109118087e-05, "loss": 2.4327, "step": 96315 }, { "epoch": 6.544367441228427, "grad_norm": 3.6438310146331787, "learning_rate": 1.8222584590297597e-05, "loss": 2.0008, "step": 96320 }, { "epoch": 6.5447071612990895, "grad_norm": 3.881121873855591, "learning_rate": 1.8218338089414325e-05, "loss": 2.167, "step": 96325 }, { "epoch": 6.5450468813697515, "grad_norm": 3.2982177734375, "learning_rate": 1.8214091588531053e-05, "loss": 1.8623, "step": 96330 }, { "epoch": 6.545386601440413, "grad_norm": 3.289699077606201, "learning_rate": 1.8209845087647777e-05, "loss": 2.2618, "step": 96335 }, { "epoch": 6.545726321511075, "grad_norm": 3.162919282913208, "learning_rate": 1.820559858676451e-05, "loss": 2.5008, "step": 96340 }, { "epoch": 6.546066041581737, "grad_norm": 3.698554754257202, "learning_rate": 1.8201352085881233e-05, "loss": 2.1398, "step": 96345 }, { "epoch": 6.546405761652398, "grad_norm": 3.466096878051758, "learning_rate": 1.819710558499796e-05, "loss": 1.9269, "step": 96350 }, { "epoch": 6.54674548172306, "grad_norm": 3.532744884490967, "learning_rate": 1.8192859084114693e-05, "loss": 2.013, "step": 96355 }, { "epoch": 6.547085201793722, "grad_norm": 3.7787327766418457, "learning_rate": 1.8188612583231417e-05, "loss": 2.0886, "step": 96360 }, { "epoch": 6.547424921864383, "grad_norm": 3.4566187858581543, "learning_rate": 1.8184366082348145e-05, "loss": 2.2111, "step": 96365 }, { "epoch": 6.5477646419350455, "grad_norm": 3.007841110229492, "learning_rate": 1.8180119581464873e-05, "loss": 2.177, "step": 96370 }, { "epoch": 6.548104362005708, "grad_norm": 3.2755343914031982, "learning_rate": 1.81758730805816e-05, "loss": 2.0008, "step": 96375 }, { "epoch": 6.548444082076369, "grad_norm": 3.5538642406463623, "learning_rate": 1.817162657969833e-05, "loss": 2.1225, "step": 96380 }, { "epoch": 6.548783802147031, "grad_norm": 3.5819973945617676, "learning_rate": 1.8167380078815057e-05, "loss": 2.0196, "step": 96385 }, { "epoch": 6.549123522217693, "grad_norm": 3.8425605297088623, "learning_rate": 1.8163133577931785e-05, "loss": 2.1733, "step": 96390 }, { "epoch": 6.549463242288354, "grad_norm": 3.475795030593872, "learning_rate": 1.8158887077048513e-05, "loss": 2.2074, "step": 96395 }, { "epoch": 6.549802962359016, "grad_norm": 3.0235350131988525, "learning_rate": 1.815464057616524e-05, "loss": 1.9414, "step": 96400 }, { "epoch": 6.550142682429678, "grad_norm": 3.759955406188965, "learning_rate": 1.815039407528197e-05, "loss": 2.079, "step": 96405 }, { "epoch": 6.550482402500339, "grad_norm": 3.8412318229675293, "learning_rate": 1.8146147574398697e-05, "loss": 2.099, "step": 96410 }, { "epoch": 6.5508221225710015, "grad_norm": 4.086604118347168, "learning_rate": 1.8141901073515425e-05, "loss": 2.2243, "step": 96415 }, { "epoch": 6.551161842641664, "grad_norm": 3.3585238456726074, "learning_rate": 1.813765457263215e-05, "loss": 2.2165, "step": 96420 }, { "epoch": 6.551501562712325, "grad_norm": 4.436343193054199, "learning_rate": 1.813340807174888e-05, "loss": 2.1857, "step": 96425 }, { "epoch": 6.551841282782987, "grad_norm": 3.1065142154693604, "learning_rate": 1.8129161570865606e-05, "loss": 2.2194, "step": 96430 }, { "epoch": 6.552181002853649, "grad_norm": 4.030655384063721, "learning_rate": 1.8124915069982334e-05, "loss": 2.2096, "step": 96435 }, { "epoch": 6.55252072292431, "grad_norm": 2.836956262588501, "learning_rate": 1.8120668569099065e-05, "loss": 1.9332, "step": 96440 }, { "epoch": 6.552860442994972, "grad_norm": 3.548248291015625, "learning_rate": 1.811642206821579e-05, "loss": 1.9761, "step": 96445 }, { "epoch": 6.553200163065634, "grad_norm": 4.044103622436523, "learning_rate": 1.8112175567332518e-05, "loss": 2.025, "step": 96450 }, { "epoch": 6.553539883136295, "grad_norm": 4.264043807983398, "learning_rate": 1.8107929066449246e-05, "loss": 1.9868, "step": 96455 }, { "epoch": 6.5538796032069575, "grad_norm": 3.100625514984131, "learning_rate": 1.8103682565565974e-05, "loss": 2.0768, "step": 96460 }, { "epoch": 6.55421932327762, "grad_norm": 3.255462884902954, "learning_rate": 1.8099436064682702e-05, "loss": 2.061, "step": 96465 }, { "epoch": 6.554559043348281, "grad_norm": 3.1036376953125, "learning_rate": 1.809518956379943e-05, "loss": 2.375, "step": 96470 }, { "epoch": 6.554898763418943, "grad_norm": 4.454909324645996, "learning_rate": 1.8090943062916158e-05, "loss": 2.1041, "step": 96475 }, { "epoch": 6.555238483489605, "grad_norm": 3.4639663696289062, "learning_rate": 1.8086696562032886e-05, "loss": 2.0159, "step": 96480 }, { "epoch": 6.555578203560266, "grad_norm": 3.073667049407959, "learning_rate": 1.8082450061149614e-05, "loss": 2.0861, "step": 96485 }, { "epoch": 6.555917923630928, "grad_norm": 3.9556875228881836, "learning_rate": 1.8078203560266342e-05, "loss": 2.235, "step": 96490 }, { "epoch": 6.55625764370159, "grad_norm": 3.988981246948242, "learning_rate": 1.807395705938307e-05, "loss": 1.8965, "step": 96495 }, { "epoch": 6.556597363772251, "grad_norm": 4.506507396697998, "learning_rate": 1.8069710558499798e-05, "loss": 2.1989, "step": 96500 }, { "epoch": 6.5569370838429135, "grad_norm": 3.2484943866729736, "learning_rate": 1.8065464057616523e-05, "loss": 2.0309, "step": 96505 }, { "epoch": 6.557276803913576, "grad_norm": 3.369097948074341, "learning_rate": 1.8061217556733254e-05, "loss": 2.2251, "step": 96510 }, { "epoch": 6.557616523984237, "grad_norm": 3.4100966453552246, "learning_rate": 1.8056971055849982e-05, "loss": 2.07, "step": 96515 }, { "epoch": 6.557956244054899, "grad_norm": 3.0568222999572754, "learning_rate": 1.8052724554966707e-05, "loss": 2.2274, "step": 96520 }, { "epoch": 6.558295964125561, "grad_norm": 4.4377641677856445, "learning_rate": 1.8048478054083438e-05, "loss": 2.2653, "step": 96525 }, { "epoch": 6.558635684196222, "grad_norm": 3.356018304824829, "learning_rate": 1.8044231553200163e-05, "loss": 2.1748, "step": 96530 }, { "epoch": 6.558975404266884, "grad_norm": 4.257523059844971, "learning_rate": 1.803998505231689e-05, "loss": 2.3842, "step": 96535 }, { "epoch": 6.559315124337546, "grad_norm": 3.351038694381714, "learning_rate": 1.803573855143362e-05, "loss": 1.9915, "step": 96540 }, { "epoch": 6.5596548444082075, "grad_norm": 3.005025863647461, "learning_rate": 1.8031492050550347e-05, "loss": 2.1418, "step": 96545 }, { "epoch": 6.5599945644788695, "grad_norm": 3.8500685691833496, "learning_rate": 1.8027245549667075e-05, "loss": 2.2076, "step": 96550 }, { "epoch": 6.560334284549532, "grad_norm": 3.915281295776367, "learning_rate": 1.8022999048783803e-05, "loss": 2.1036, "step": 96555 }, { "epoch": 6.560674004620193, "grad_norm": 3.4764933586120605, "learning_rate": 1.801875254790053e-05, "loss": 2.2432, "step": 96560 }, { "epoch": 6.561013724690855, "grad_norm": 3.981565475463867, "learning_rate": 1.801450604701726e-05, "loss": 1.9278, "step": 96565 }, { "epoch": 6.561353444761517, "grad_norm": 3.5290110111236572, "learning_rate": 1.8010259546133987e-05, "loss": 1.9609, "step": 96570 }, { "epoch": 6.561693164832178, "grad_norm": 3.6255831718444824, "learning_rate": 1.8006013045250715e-05, "loss": 1.962, "step": 96575 }, { "epoch": 6.56203288490284, "grad_norm": 3.043347120285034, "learning_rate": 1.8001766544367443e-05, "loss": 2.2084, "step": 96580 }, { "epoch": 6.562372604973502, "grad_norm": 3.998795509338379, "learning_rate": 1.799752004348417e-05, "loss": 1.9636, "step": 96585 }, { "epoch": 6.5627123250441635, "grad_norm": 3.526397705078125, "learning_rate": 1.7993273542600895e-05, "loss": 2.1353, "step": 96590 }, { "epoch": 6.5630520451148255, "grad_norm": 4.091461658477783, "learning_rate": 1.7989027041717627e-05, "loss": 2.0096, "step": 96595 }, { "epoch": 6.563391765185488, "grad_norm": 4.3791069984436035, "learning_rate": 1.7984780540834355e-05, "loss": 1.9775, "step": 96600 }, { "epoch": 6.563731485256149, "grad_norm": 3.7295429706573486, "learning_rate": 1.798053403995108e-05, "loss": 1.9103, "step": 96605 }, { "epoch": 6.564071205326811, "grad_norm": 3.071223020553589, "learning_rate": 1.797628753906781e-05, "loss": 2.2305, "step": 96610 }, { "epoch": 6.564410925397473, "grad_norm": 3.1879048347473145, "learning_rate": 1.7972041038184535e-05, "loss": 2.2491, "step": 96615 }, { "epoch": 6.564750645468134, "grad_norm": 3.245950222015381, "learning_rate": 1.7967794537301263e-05, "loss": 1.9134, "step": 96620 }, { "epoch": 6.565090365538796, "grad_norm": 4.661831855773926, "learning_rate": 1.796354803641799e-05, "loss": 2.0548, "step": 96625 }, { "epoch": 6.565430085609458, "grad_norm": 3.0554616451263428, "learning_rate": 1.795930153553472e-05, "loss": 2.1091, "step": 96630 }, { "epoch": 6.5657698056801195, "grad_norm": 3.6829886436462402, "learning_rate": 1.7955055034651448e-05, "loss": 2.1251, "step": 96635 }, { "epoch": 6.5661095257507816, "grad_norm": 3.829334259033203, "learning_rate": 1.7950808533768176e-05, "loss": 1.9335, "step": 96640 }, { "epoch": 6.566449245821444, "grad_norm": 3.907745122909546, "learning_rate": 1.7946562032884904e-05, "loss": 2.1007, "step": 96645 }, { "epoch": 6.566788965892105, "grad_norm": 3.8373067378997803, "learning_rate": 1.794231553200163e-05, "loss": 2.3743, "step": 96650 }, { "epoch": 6.567128685962767, "grad_norm": 3.488248348236084, "learning_rate": 1.793806903111836e-05, "loss": 2.1873, "step": 96655 }, { "epoch": 6.567468406033428, "grad_norm": 3.465965986251831, "learning_rate": 1.7933822530235088e-05, "loss": 2.1658, "step": 96660 }, { "epoch": 6.56780812610409, "grad_norm": 3.8427562713623047, "learning_rate": 1.7929576029351816e-05, "loss": 2.1812, "step": 96665 }, { "epoch": 6.568147846174752, "grad_norm": 3.952177047729492, "learning_rate": 1.7925329528468544e-05, "loss": 2.3003, "step": 96670 }, { "epoch": 6.568487566245413, "grad_norm": 3.5593206882476807, "learning_rate": 1.7921083027585268e-05, "loss": 2.2566, "step": 96675 }, { "epoch": 6.5688272863160755, "grad_norm": 3.3316330909729004, "learning_rate": 1.7916836526702e-05, "loss": 2.1315, "step": 96680 }, { "epoch": 6.569167006386738, "grad_norm": 3.81367564201355, "learning_rate": 1.7913439325995382e-05, "loss": 2.1677, "step": 96685 }, { "epoch": 6.569506726457399, "grad_norm": 4.232397556304932, "learning_rate": 1.790919282511211e-05, "loss": 1.9682, "step": 96690 }, { "epoch": 6.569846446528061, "grad_norm": 4.11082649230957, "learning_rate": 1.7904946324228835e-05, "loss": 1.9291, "step": 96695 }, { "epoch": 6.570186166598723, "grad_norm": 3.2966248989105225, "learning_rate": 1.7900699823345566e-05, "loss": 2.1091, "step": 96700 }, { "epoch": 6.570525886669384, "grad_norm": 3.2841079235076904, "learning_rate": 1.789645332246229e-05, "loss": 1.8604, "step": 96705 }, { "epoch": 6.570865606740046, "grad_norm": 3.6553330421447754, "learning_rate": 1.789220682157902e-05, "loss": 1.7559, "step": 96710 }, { "epoch": 6.571205326810708, "grad_norm": 4.149831295013428, "learning_rate": 1.788796032069575e-05, "loss": 2.0375, "step": 96715 }, { "epoch": 6.571545046881369, "grad_norm": 4.218576908111572, "learning_rate": 1.7883713819812475e-05, "loss": 2.0953, "step": 96720 }, { "epoch": 6.5718847669520315, "grad_norm": 3.029980182647705, "learning_rate": 1.7879467318929203e-05, "loss": 1.8026, "step": 96725 }, { "epoch": 6.572224487022694, "grad_norm": 3.2845985889434814, "learning_rate": 1.787522081804593e-05, "loss": 1.9624, "step": 96730 }, { "epoch": 6.572564207093355, "grad_norm": 4.6889519691467285, "learning_rate": 1.787097431716266e-05, "loss": 1.8915, "step": 96735 }, { "epoch": 6.572903927164017, "grad_norm": 3.6924421787261963, "learning_rate": 1.7866727816279387e-05, "loss": 2.0914, "step": 96740 }, { "epoch": 6.573243647234679, "grad_norm": 3.618837594985962, "learning_rate": 1.7862481315396115e-05, "loss": 1.7261, "step": 96745 }, { "epoch": 6.57358336730534, "grad_norm": 3.2024900913238525, "learning_rate": 1.7858234814512843e-05, "loss": 2.3083, "step": 96750 }, { "epoch": 6.573923087376002, "grad_norm": 3.1800413131713867, "learning_rate": 1.7853988313629567e-05, "loss": 2.0177, "step": 96755 }, { "epoch": 6.574262807446664, "grad_norm": 4.307039260864258, "learning_rate": 1.78497418127463e-05, "loss": 2.0286, "step": 96760 }, { "epoch": 6.574602527517325, "grad_norm": 3.669215679168701, "learning_rate": 1.7845495311863027e-05, "loss": 2.1245, "step": 96765 }, { "epoch": 6.5749422475879875, "grad_norm": 3.5564520359039307, "learning_rate": 1.7841248810979755e-05, "loss": 1.9736, "step": 96770 }, { "epoch": 6.57528196765865, "grad_norm": 3.2305068969726562, "learning_rate": 1.7837002310096483e-05, "loss": 2.1931, "step": 96775 }, { "epoch": 6.575621687729311, "grad_norm": 3.1341631412506104, "learning_rate": 1.7832755809213207e-05, "loss": 1.9688, "step": 96780 }, { "epoch": 6.575961407799973, "grad_norm": 3.6500778198242188, "learning_rate": 1.782850930832994e-05, "loss": 2.1627, "step": 96785 }, { "epoch": 6.576301127870635, "grad_norm": 3.513674259185791, "learning_rate": 1.7824262807446663e-05, "loss": 2.0013, "step": 96790 }, { "epoch": 6.576640847941296, "grad_norm": 3.29421067237854, "learning_rate": 1.782001630656339e-05, "loss": 2.2724, "step": 96795 }, { "epoch": 6.576980568011958, "grad_norm": 3.28279185295105, "learning_rate": 1.7815769805680123e-05, "loss": 2.346, "step": 96800 }, { "epoch": 6.57732028808262, "grad_norm": NaN, "learning_rate": 1.7812372604973505e-05, "loss": 2.1231, "step": 96805 }, { "epoch": 6.5776600081532814, "grad_norm": 2.945279121398926, "learning_rate": 1.780812610409023e-05, "loss": 2.311, "step": 96810 }, { "epoch": 6.5779997282239435, "grad_norm": 4.1613359451293945, "learning_rate": 1.7803879603206958e-05, "loss": 2.1142, "step": 96815 }, { "epoch": 6.578339448294606, "grad_norm": 3.505613088607788, "learning_rate": 1.7799633102323686e-05, "loss": 2.1891, "step": 96820 }, { "epoch": 6.578679168365267, "grad_norm": 4.242824077606201, "learning_rate": 1.7795386601440414e-05, "loss": 2.2091, "step": 96825 }, { "epoch": 6.579018888435929, "grad_norm": 3.460143566131592, "learning_rate": 1.7791140100557142e-05, "loss": 1.6771, "step": 96830 }, { "epoch": 6.579358608506591, "grad_norm": 3.44699764251709, "learning_rate": 1.778689359967387e-05, "loss": 2.2478, "step": 96835 }, { "epoch": 6.579698328577252, "grad_norm": 3.5325920581817627, "learning_rate": 1.7782647098790598e-05, "loss": 2.0629, "step": 96840 }, { "epoch": 6.580038048647914, "grad_norm": 3.589367389678955, "learning_rate": 1.7778400597907326e-05, "loss": 2.04, "step": 96845 }, { "epoch": 6.580377768718576, "grad_norm": 2.9631593227386475, "learning_rate": 1.7774154097024054e-05, "loss": 2.125, "step": 96850 }, { "epoch": 6.5807174887892375, "grad_norm": 4.013187408447266, "learning_rate": 1.7769907596140782e-05, "loss": 2.1107, "step": 96855 }, { "epoch": 6.5810572088598995, "grad_norm": 3.223647117614746, "learning_rate": 1.7765661095257507e-05, "loss": 2.3489, "step": 96860 }, { "epoch": 6.581396928930561, "grad_norm": 3.2392492294311523, "learning_rate": 1.7761414594374238e-05, "loss": 2.3034, "step": 96865 }, { "epoch": 6.581736649001223, "grad_norm": 4.743081569671631, "learning_rate": 1.7757168093490963e-05, "loss": 1.9862, "step": 96870 }, { "epoch": 6.582076369071885, "grad_norm": 2.7537686824798584, "learning_rate": 1.775292159260769e-05, "loss": 2.2452, "step": 96875 }, { "epoch": 6.582416089142546, "grad_norm": 3.2707903385162354, "learning_rate": 1.7748675091724422e-05, "loss": 2.15, "step": 96880 }, { "epoch": 6.582755809213208, "grad_norm": 3.8572988510131836, "learning_rate": 1.7744428590841147e-05, "loss": 2.2615, "step": 96885 }, { "epoch": 6.58309552928387, "grad_norm": 3.835402011871338, "learning_rate": 1.7740182089957878e-05, "loss": 1.9377, "step": 96890 }, { "epoch": 6.583435249354531, "grad_norm": 3.3287248611450195, "learning_rate": 1.7735935589074603e-05, "loss": 2.132, "step": 96895 }, { "epoch": 6.5837749694251935, "grad_norm": 3.737647294998169, "learning_rate": 1.773168908819133e-05, "loss": 2.2649, "step": 96900 }, { "epoch": 6.5841146894958555, "grad_norm": 3.3263707160949707, "learning_rate": 1.772744258730806e-05, "loss": 2.1888, "step": 96905 }, { "epoch": 6.584454409566517, "grad_norm": 3.619980573654175, "learning_rate": 1.7723196086424787e-05, "loss": 2.0082, "step": 96910 }, { "epoch": 6.584794129637179, "grad_norm": 3.1520702838897705, "learning_rate": 1.7718949585541515e-05, "loss": 2.0149, "step": 96915 }, { "epoch": 6.585133849707841, "grad_norm": 4.451216220855713, "learning_rate": 1.7714703084658243e-05, "loss": 2.1718, "step": 96920 }, { "epoch": 6.585473569778502, "grad_norm": 3.685335874557495, "learning_rate": 1.771045658377497e-05, "loss": 2.0059, "step": 96925 }, { "epoch": 6.585813289849164, "grad_norm": 3.1696860790252686, "learning_rate": 1.77062100828917e-05, "loss": 2.1879, "step": 96930 }, { "epoch": 6.586153009919826, "grad_norm": 3.8508994579315186, "learning_rate": 1.7701963582008427e-05, "loss": 1.9075, "step": 96935 }, { "epoch": 6.586492729990487, "grad_norm": 3.9857017993927, "learning_rate": 1.7697717081125155e-05, "loss": 2.0843, "step": 96940 }, { "epoch": 6.5868324500611495, "grad_norm": 3.5505757331848145, "learning_rate": 1.769347058024188e-05, "loss": 2.0377, "step": 96945 }, { "epoch": 6.5871721701318116, "grad_norm": 2.9899790287017822, "learning_rate": 1.768922407935861e-05, "loss": 1.9466, "step": 96950 }, { "epoch": 6.587511890202473, "grad_norm": 3.9088456630706787, "learning_rate": 1.7684977578475335e-05, "loss": 1.9872, "step": 96955 }, { "epoch": 6.587851610273135, "grad_norm": 3.884019374847412, "learning_rate": 1.7680731077592063e-05, "loss": 2.3179, "step": 96960 }, { "epoch": 6.588191330343797, "grad_norm": 3.4662485122680664, "learning_rate": 1.7676484576708795e-05, "loss": 1.9108, "step": 96965 }, { "epoch": 6.588531050414458, "grad_norm": 3.210270881652832, "learning_rate": 1.767223807582552e-05, "loss": 2.4546, "step": 96970 }, { "epoch": 6.58887077048512, "grad_norm": 3.653049945831299, "learning_rate": 1.766799157494225e-05, "loss": 1.7735, "step": 96975 }, { "epoch": 6.589210490555782, "grad_norm": 4.0941009521484375, "learning_rate": 1.7663745074058975e-05, "loss": 2.1459, "step": 96980 }, { "epoch": 6.589550210626443, "grad_norm": 4.011857032775879, "learning_rate": 1.7659498573175703e-05, "loss": 2.1955, "step": 96985 }, { "epoch": 6.5898899306971055, "grad_norm": 3.476768732070923, "learning_rate": 1.7655252072292435e-05, "loss": 2.2974, "step": 96990 }, { "epoch": 6.590229650767768, "grad_norm": 3.379981517791748, "learning_rate": 1.765100557140916e-05, "loss": 2.2746, "step": 96995 }, { "epoch": 6.590569370838429, "grad_norm": 4.6595139503479, "learning_rate": 1.7646759070525887e-05, "loss": 2.1489, "step": 97000 }, { "epoch": 6.590909090909091, "grad_norm": 3.4049999713897705, "learning_rate": 1.7642512569642615e-05, "loss": 1.956, "step": 97005 }, { "epoch": 6.591248810979753, "grad_norm": 3.1326215267181396, "learning_rate": 1.7638266068759343e-05, "loss": 1.8215, "step": 97010 }, { "epoch": 6.591588531050414, "grad_norm": 3.6469523906707764, "learning_rate": 1.763401956787607e-05, "loss": 1.9744, "step": 97015 }, { "epoch": 6.591928251121076, "grad_norm": 4.0332350730896, "learning_rate": 1.76297730669928e-05, "loss": 2.1806, "step": 97020 }, { "epoch": 6.592267971191738, "grad_norm": 3.9714577198028564, "learning_rate": 1.7625526566109527e-05, "loss": 2.0318, "step": 97025 }, { "epoch": 6.592607691262399, "grad_norm": 3.8500187397003174, "learning_rate": 1.7621280065226252e-05, "loss": 2.2683, "step": 97030 }, { "epoch": 6.5929474113330615, "grad_norm": 3.6840639114379883, "learning_rate": 1.7617033564342983e-05, "loss": 1.7334, "step": 97035 }, { "epoch": 6.593287131403724, "grad_norm": 3.5995850563049316, "learning_rate": 1.761278706345971e-05, "loss": 1.9889, "step": 97040 }, { "epoch": 6.593626851474385, "grad_norm": 4.135357856750488, "learning_rate": 1.7608540562576436e-05, "loss": 2.0985, "step": 97045 }, { "epoch": 6.593966571545047, "grad_norm": 4.440504550933838, "learning_rate": 1.7604294061693167e-05, "loss": 2.0492, "step": 97050 }, { "epoch": 6.594306291615709, "grad_norm": 3.9666266441345215, "learning_rate": 1.7600047560809892e-05, "loss": 1.8902, "step": 97055 }, { "epoch": 6.59464601168637, "grad_norm": 5.141739845275879, "learning_rate": 1.7595801059926623e-05, "loss": 2.0839, "step": 97060 }, { "epoch": 6.594985731757032, "grad_norm": 2.9993984699249268, "learning_rate": 1.7591554559043348e-05, "loss": 1.7938, "step": 97065 }, { "epoch": 6.595325451827694, "grad_norm": 3.745244026184082, "learning_rate": 1.7587308058160076e-05, "loss": 2.0969, "step": 97070 }, { "epoch": 6.595665171898355, "grad_norm": 3.4824960231781006, "learning_rate": 1.7583061557276807e-05, "loss": 2.0989, "step": 97075 }, { "epoch": 6.5960048919690175, "grad_norm": 4.1652984619140625, "learning_rate": 1.7578815056393532e-05, "loss": 1.9449, "step": 97080 }, { "epoch": 6.59634461203968, "grad_norm": 3.339452028274536, "learning_rate": 1.757456855551026e-05, "loss": 2.1453, "step": 97085 }, { "epoch": 6.596684332110341, "grad_norm": 2.7927708625793457, "learning_rate": 1.7570322054626988e-05, "loss": 2.2435, "step": 97090 }, { "epoch": 6.597024052181003, "grad_norm": 3.819706439971924, "learning_rate": 1.7566075553743716e-05, "loss": 2.135, "step": 97095 }, { "epoch": 6.597363772251665, "grad_norm": 3.970041036605835, "learning_rate": 1.7561829052860444e-05, "loss": 2.1363, "step": 97100 }, { "epoch": 6.597703492322326, "grad_norm": 4.263599395751953, "learning_rate": 1.7557582551977172e-05, "loss": 2.1131, "step": 97105 }, { "epoch": 6.598043212392988, "grad_norm": 3.9869298934936523, "learning_rate": 1.75533360510939e-05, "loss": 2.2439, "step": 97110 }, { "epoch": 6.59838293246365, "grad_norm": 4.180626392364502, "learning_rate": 1.7549089550210625e-05, "loss": 2.054, "step": 97115 }, { "epoch": 6.5987226525343115, "grad_norm": 3.9101059436798096, "learning_rate": 1.7544843049327356e-05, "loss": 1.984, "step": 97120 }, { "epoch": 6.5990623726049735, "grad_norm": 3.59271502494812, "learning_rate": 1.7540596548444084e-05, "loss": 1.8503, "step": 97125 }, { "epoch": 6.599402092675636, "grad_norm": 3.658360481262207, "learning_rate": 1.753635004756081e-05, "loss": 1.9783, "step": 97130 }, { "epoch": 6.599741812746297, "grad_norm": 3.079909563064575, "learning_rate": 1.753210354667754e-05, "loss": 1.9607, "step": 97135 }, { "epoch": 6.600081532816959, "grad_norm": 3.3237268924713135, "learning_rate": 1.7527857045794265e-05, "loss": 1.9796, "step": 97140 }, { "epoch": 6.600421252887621, "grad_norm": 3.9022109508514404, "learning_rate": 1.7523610544910996e-05, "loss": 1.92, "step": 97145 }, { "epoch": 6.600760972958282, "grad_norm": 4.209929466247559, "learning_rate": 1.751936404402772e-05, "loss": 2.0711, "step": 97150 }, { "epoch": 6.601100693028944, "grad_norm": 2.8301260471343994, "learning_rate": 1.751511754314445e-05, "loss": 2.0647, "step": 97155 }, { "epoch": 6.601440413099606, "grad_norm": 3.586613416671753, "learning_rate": 1.751087104226118e-05, "loss": 2.3253, "step": 97160 }, { "epoch": 6.6017801331702675, "grad_norm": 3.6464314460754395, "learning_rate": 1.7506624541377905e-05, "loss": 2.4349, "step": 97165 }, { "epoch": 6.6021198532409295, "grad_norm": 3.9236950874328613, "learning_rate": 1.7502378040494633e-05, "loss": 2.207, "step": 97170 }, { "epoch": 6.602459573311592, "grad_norm": 2.957885980606079, "learning_rate": 1.749813153961136e-05, "loss": 2.0782, "step": 97175 }, { "epoch": 6.602799293382253, "grad_norm": 3.828364372253418, "learning_rate": 1.749388503872809e-05, "loss": 2.179, "step": 97180 }, { "epoch": 6.603139013452915, "grad_norm": 3.5078768730163574, "learning_rate": 1.7489638537844817e-05, "loss": 1.9616, "step": 97185 }, { "epoch": 6.603478733523577, "grad_norm": 3.0108911991119385, "learning_rate": 1.7485392036961545e-05, "loss": 2.2771, "step": 97190 }, { "epoch": 6.603818453594238, "grad_norm": 4.056954383850098, "learning_rate": 1.7481145536078273e-05, "loss": 2.0855, "step": 97195 }, { "epoch": 6.6041581736649, "grad_norm": 3.8694591522216797, "learning_rate": 1.7476899035194997e-05, "loss": 2.3859, "step": 97200 }, { "epoch": 6.604497893735562, "grad_norm": 3.8122072219848633, "learning_rate": 1.747265253431173e-05, "loss": 1.8854, "step": 97205 }, { "epoch": 6.6048376138062235, "grad_norm": 3.2675249576568604, "learning_rate": 1.7468406033428457e-05, "loss": 2.0033, "step": 97210 }, { "epoch": 6.6051773338768855, "grad_norm": 3.6556971073150635, "learning_rate": 1.746415953254518e-05, "loss": 2.1596, "step": 97215 }, { "epoch": 6.605517053947548, "grad_norm": 3.830596923828125, "learning_rate": 1.7459913031661913e-05, "loss": 1.9767, "step": 97220 }, { "epoch": 6.605856774018209, "grad_norm": 3.8810269832611084, "learning_rate": 1.7455666530778638e-05, "loss": 2.1, "step": 97225 }, { "epoch": 6.606196494088871, "grad_norm": 3.56048846244812, "learning_rate": 1.745142002989537e-05, "loss": 1.9901, "step": 97230 }, { "epoch": 6.606536214159533, "grad_norm": 3.298797845840454, "learning_rate": 1.7447173529012097e-05, "loss": 2.1319, "step": 97235 }, { "epoch": 6.606875934230194, "grad_norm": 4.170617580413818, "learning_rate": 1.744292702812882e-05, "loss": 1.8871, "step": 97240 }, { "epoch": 6.607215654300856, "grad_norm": 3.8768460750579834, "learning_rate": 1.7438680527245553e-05, "loss": 2.0928, "step": 97245 }, { "epoch": 6.607555374371518, "grad_norm": 3.4657857418060303, "learning_rate": 1.7434434026362278e-05, "loss": 1.8972, "step": 97250 }, { "epoch": 6.6078950944421795, "grad_norm": 3.8089122772216797, "learning_rate": 1.7430187525479006e-05, "loss": 1.9507, "step": 97255 }, { "epoch": 6.608234814512842, "grad_norm": 3.5208117961883545, "learning_rate": 1.7425941024595734e-05, "loss": 2.3794, "step": 97260 }, { "epoch": 6.608574534583504, "grad_norm": 2.732733726501465, "learning_rate": 1.742169452371246e-05, "loss": 2.2899, "step": 97265 }, { "epoch": 6.608914254654165, "grad_norm": 3.5329151153564453, "learning_rate": 1.741744802282919e-05, "loss": 1.963, "step": 97270 }, { "epoch": 6.609253974724827, "grad_norm": 3.709860324859619, "learning_rate": 1.7413201521945918e-05, "loss": 2.224, "step": 97275 }, { "epoch": 6.609593694795489, "grad_norm": 2.97302508354187, "learning_rate": 1.7408955021062646e-05, "loss": 2.2915, "step": 97280 }, { "epoch": 6.60993341486615, "grad_norm": 4.233462810516357, "learning_rate": 1.7404708520179374e-05, "loss": 2.2043, "step": 97285 }, { "epoch": 6.610273134936812, "grad_norm": 3.449948787689209, "learning_rate": 1.74004620192961e-05, "loss": 2.0574, "step": 97290 }, { "epoch": 6.610612855007474, "grad_norm": 3.290817975997925, "learning_rate": 1.739621551841283e-05, "loss": 2.0696, "step": 97295 }, { "epoch": 6.6109525750781355, "grad_norm": 3.491549253463745, "learning_rate": 1.7391969017529554e-05, "loss": 2.4027, "step": 97300 }, { "epoch": 6.611292295148798, "grad_norm": 4.5571513175964355, "learning_rate": 1.7387722516646286e-05, "loss": 1.9371, "step": 97305 }, { "epoch": 6.61163201521946, "grad_norm": 4.315205097198486, "learning_rate": 1.738347601576301e-05, "loss": 2.0739, "step": 97310 }, { "epoch": 6.611971735290121, "grad_norm": 2.579479217529297, "learning_rate": 1.737922951487974e-05, "loss": 2.2059, "step": 97315 }, { "epoch": 6.612311455360783, "grad_norm": 4.474929332733154, "learning_rate": 1.737498301399647e-05, "loss": 2.0181, "step": 97320 }, { "epoch": 6.612651175431445, "grad_norm": 3.56136417388916, "learning_rate": 1.7370736513113194e-05, "loss": 2.0538, "step": 97325 }, { "epoch": 6.612990895502106, "grad_norm": 3.827693462371826, "learning_rate": 1.7366490012229926e-05, "loss": 2.1434, "step": 97330 }, { "epoch": 6.613330615572768, "grad_norm": 3.0168073177337646, "learning_rate": 1.736224351134665e-05, "loss": 1.9102, "step": 97335 }, { "epoch": 6.613670335643429, "grad_norm": 3.601546049118042, "learning_rate": 1.7357997010463378e-05, "loss": 1.975, "step": 97340 }, { "epoch": 6.6140100557140915, "grad_norm": 3.917833089828491, "learning_rate": 1.7353750509580106e-05, "loss": 2.2151, "step": 97345 }, { "epoch": 6.614349775784754, "grad_norm": 4.546503067016602, "learning_rate": 1.7349504008696834e-05, "loss": 2.1895, "step": 97350 }, { "epoch": 6.614689495855415, "grad_norm": 3.369187116622925, "learning_rate": 1.7345257507813562e-05, "loss": 1.8474, "step": 97355 }, { "epoch": 6.615029215926077, "grad_norm": 3.842581033706665, "learning_rate": 1.734101100693029e-05, "loss": 1.9191, "step": 97360 }, { "epoch": 6.615368935996739, "grad_norm": 3.8330495357513428, "learning_rate": 1.733676450604702e-05, "loss": 2.1046, "step": 97365 }, { "epoch": 6.6157086560674, "grad_norm": 4.820265293121338, "learning_rate": 1.7332518005163746e-05, "loss": 2.0486, "step": 97370 }, { "epoch": 6.616048376138062, "grad_norm": 3.4649059772491455, "learning_rate": 1.7328271504280474e-05, "loss": 1.7613, "step": 97375 }, { "epoch": 6.616388096208724, "grad_norm": 3.5257728099823, "learning_rate": 1.7324025003397202e-05, "loss": 1.8696, "step": 97380 }, { "epoch": 6.616727816279385, "grad_norm": 3.8943095207214355, "learning_rate": 1.7319778502513927e-05, "loss": 2.2865, "step": 97385 }, { "epoch": 6.6170675363500475, "grad_norm": 3.641939878463745, "learning_rate": 1.731553200163066e-05, "loss": 1.8425, "step": 97390 }, { "epoch": 6.61740725642071, "grad_norm": 3.37978458404541, "learning_rate": 1.7311285500747383e-05, "loss": 2.136, "step": 97395 }, { "epoch": 6.617746976491371, "grad_norm": 3.882427930831909, "learning_rate": 1.7307038999864114e-05, "loss": 2.0677, "step": 97400 }, { "epoch": 6.618086696562033, "grad_norm": 3.2897679805755615, "learning_rate": 1.7302792498980842e-05, "loss": 2.1162, "step": 97405 }, { "epoch": 6.618426416632695, "grad_norm": 3.4102771282196045, "learning_rate": 1.7298545998097567e-05, "loss": 2.1368, "step": 97410 }, { "epoch": 6.618766136703356, "grad_norm": 3.627744674682617, "learning_rate": 1.72942994972143e-05, "loss": 1.9351, "step": 97415 }, { "epoch": 6.619105856774018, "grad_norm": 4.31953763961792, "learning_rate": 1.7290052996331023e-05, "loss": 2.053, "step": 97420 }, { "epoch": 6.61944557684468, "grad_norm": 3.355804204940796, "learning_rate": 1.728580649544775e-05, "loss": 2.1296, "step": 97425 }, { "epoch": 6.6197852969153415, "grad_norm": 3.0892715454101562, "learning_rate": 1.728155999456448e-05, "loss": 2.1372, "step": 97430 }, { "epoch": 6.6201250169860035, "grad_norm": 3.550065040588379, "learning_rate": 1.7277313493681207e-05, "loss": 1.9582, "step": 97435 }, { "epoch": 6.620464737056666, "grad_norm": 3.751164674758911, "learning_rate": 1.7273066992797935e-05, "loss": 2.0152, "step": 97440 }, { "epoch": 6.620804457127327, "grad_norm": 3.214632034301758, "learning_rate": 1.7268820491914663e-05, "loss": 1.842, "step": 97445 }, { "epoch": 6.621144177197989, "grad_norm": 3.7583096027374268, "learning_rate": 1.726457399103139e-05, "loss": 2.3426, "step": 97450 }, { "epoch": 6.621483897268651, "grad_norm": 4.212733745574951, "learning_rate": 1.726032749014812e-05, "loss": 2.0187, "step": 97455 }, { "epoch": 6.621823617339312, "grad_norm": 3.484888792037964, "learning_rate": 1.7256080989264847e-05, "loss": 2.2376, "step": 97460 }, { "epoch": 6.622163337409974, "grad_norm": 4.047414779663086, "learning_rate": 1.7251834488381575e-05, "loss": 1.9984, "step": 97465 }, { "epoch": 6.622503057480636, "grad_norm": 3.546657085418701, "learning_rate": 1.72475879874983e-05, "loss": 1.9452, "step": 97470 }, { "epoch": 6.6228427775512975, "grad_norm": 4.223222255706787, "learning_rate": 1.724334148661503e-05, "loss": 2.1233, "step": 97475 }, { "epoch": 6.6231824976219595, "grad_norm": 2.7010531425476074, "learning_rate": 1.7239094985731756e-05, "loss": 1.9056, "step": 97480 }, { "epoch": 6.623522217692622, "grad_norm": 2.905806303024292, "learning_rate": 1.7234848484848487e-05, "loss": 2.3434, "step": 97485 }, { "epoch": 6.623861937763283, "grad_norm": 4.21789026260376, "learning_rate": 1.7230601983965215e-05, "loss": 1.9549, "step": 97490 }, { "epoch": 6.624201657833945, "grad_norm": 4.616591930389404, "learning_rate": 1.722635548308194e-05, "loss": 2.0276, "step": 97495 }, { "epoch": 6.624541377904607, "grad_norm": 4.403151035308838, "learning_rate": 1.722210898219867e-05, "loss": 2.0554, "step": 97500 }, { "epoch": 6.624881097975268, "grad_norm": 4.103414058685303, "learning_rate": 1.7217862481315396e-05, "loss": 2.0046, "step": 97505 }, { "epoch": 6.62522081804593, "grad_norm": 4.277111530303955, "learning_rate": 1.7213615980432124e-05, "loss": 2.2189, "step": 97510 }, { "epoch": 6.625560538116592, "grad_norm": 4.0871405601501465, "learning_rate": 1.7209369479548855e-05, "loss": 2.2647, "step": 97515 }, { "epoch": 6.6259002581872535, "grad_norm": 3.4768810272216797, "learning_rate": 1.720512297866558e-05, "loss": 2.4075, "step": 97520 }, { "epoch": 6.6262399782579156, "grad_norm": 4.204622268676758, "learning_rate": 1.7200876477782308e-05, "loss": 1.9172, "step": 97525 }, { "epoch": 6.626579698328578, "grad_norm": 4.070997714996338, "learning_rate": 1.7196629976899036e-05, "loss": 2.3394, "step": 97530 }, { "epoch": 6.626919418399239, "grad_norm": 3.843043327331543, "learning_rate": 1.7192383476015764e-05, "loss": 2.1964, "step": 97535 }, { "epoch": 6.627259138469901, "grad_norm": 4.011689186096191, "learning_rate": 1.7188136975132492e-05, "loss": 2.1896, "step": 97540 }, { "epoch": 6.627598858540563, "grad_norm": 3.6281721591949463, "learning_rate": 1.718389047424922e-05, "loss": 2.0175, "step": 97545 }, { "epoch": 6.627938578611224, "grad_norm": 3.146634817123413, "learning_rate": 1.7179643973365948e-05, "loss": 2.1128, "step": 97550 }, { "epoch": 6.628278298681886, "grad_norm": 3.2922801971435547, "learning_rate": 1.7175397472482672e-05, "loss": 2.1503, "step": 97555 }, { "epoch": 6.628618018752547, "grad_norm": 3.205289602279663, "learning_rate": 1.7171150971599404e-05, "loss": 2.0004, "step": 97560 }, { "epoch": 6.6289577388232095, "grad_norm": 3.0147764682769775, "learning_rate": 1.7166904470716132e-05, "loss": 1.8176, "step": 97565 }, { "epoch": 6.629297458893872, "grad_norm": 3.5818135738372803, "learning_rate": 1.716265796983286e-05, "loss": 2.1122, "step": 97570 }, { "epoch": 6.629637178964533, "grad_norm": 3.6544766426086426, "learning_rate": 1.7158411468949588e-05, "loss": 1.9824, "step": 97575 }, { "epoch": 6.629976899035195, "grad_norm": 3.0570693016052246, "learning_rate": 1.7154164968066313e-05, "loss": 2.2548, "step": 97580 }, { "epoch": 6.630316619105857, "grad_norm": 4.928508281707764, "learning_rate": 1.7149918467183044e-05, "loss": 1.9885, "step": 97585 }, { "epoch": 6.630656339176518, "grad_norm": 3.91351318359375, "learning_rate": 1.714567196629977e-05, "loss": 1.8378, "step": 97590 }, { "epoch": 6.63099605924718, "grad_norm": 3.156559944152832, "learning_rate": 1.7141425465416497e-05, "loss": 2.1201, "step": 97595 }, { "epoch": 6.631335779317842, "grad_norm": 3.4209043979644775, "learning_rate": 1.7137178964533228e-05, "loss": 2.0938, "step": 97600 }, { "epoch": 6.631675499388503, "grad_norm": 4.923995494842529, "learning_rate": 1.7132932463649953e-05, "loss": 2.3441, "step": 97605 }, { "epoch": 6.6320152194591655, "grad_norm": 4.542751789093018, "learning_rate": 1.712868596276668e-05, "loss": 2.0582, "step": 97610 }, { "epoch": 6.632354939529828, "grad_norm": 4.019373893737793, "learning_rate": 1.712443946188341e-05, "loss": 1.974, "step": 97615 }, { "epoch": 6.632694659600489, "grad_norm": 2.9530692100524902, "learning_rate": 1.7120192961000137e-05, "loss": 1.7839, "step": 97620 }, { "epoch": 6.633034379671151, "grad_norm": 3.344494581222534, "learning_rate": 1.7115946460116865e-05, "loss": 1.7882, "step": 97625 }, { "epoch": 6.633374099741813, "grad_norm": 3.2450788021087646, "learning_rate": 1.7111699959233593e-05, "loss": 2.0507, "step": 97630 }, { "epoch": 6.633713819812474, "grad_norm": 3.549428701400757, "learning_rate": 1.710745345835032e-05, "loss": 1.8457, "step": 97635 }, { "epoch": 6.634053539883136, "grad_norm": 3.3030731678009033, "learning_rate": 1.7103206957467045e-05, "loss": 1.9917, "step": 97640 }, { "epoch": 6.634393259953798, "grad_norm": 2.844911813735962, "learning_rate": 1.7098960456583777e-05, "loss": 2.1103, "step": 97645 }, { "epoch": 6.634732980024459, "grad_norm": 3.51588773727417, "learning_rate": 1.7094713955700505e-05, "loss": 2.3443, "step": 97650 }, { "epoch": 6.6350727000951215, "grad_norm": 3.0490307807922363, "learning_rate": 1.7090467454817233e-05, "loss": 2.2048, "step": 97655 }, { "epoch": 6.635412420165784, "grad_norm": 3.009237289428711, "learning_rate": 1.708622095393396e-05, "loss": 2.2374, "step": 97660 }, { "epoch": 6.635752140236445, "grad_norm": 3.273057222366333, "learning_rate": 1.7081974453050685e-05, "loss": 1.9098, "step": 97665 }, { "epoch": 6.636091860307107, "grad_norm": 4.044037342071533, "learning_rate": 1.7077727952167417e-05, "loss": 2.1846, "step": 97670 }, { "epoch": 6.636431580377769, "grad_norm": 3.396246910095215, "learning_rate": 1.707348145128414e-05, "loss": 2.3817, "step": 97675 }, { "epoch": 6.63677130044843, "grad_norm": 3.546082019805908, "learning_rate": 1.706923495040087e-05, "loss": 2.1201, "step": 97680 }, { "epoch": 6.637111020519092, "grad_norm": 3.3326401710510254, "learning_rate": 1.70649884495176e-05, "loss": 1.6737, "step": 97685 }, { "epoch": 6.637450740589754, "grad_norm": 3.537426233291626, "learning_rate": 1.7060741948634325e-05, "loss": 2.413, "step": 97690 }, { "epoch": 6.6377904606604154, "grad_norm": 3.7070648670196533, "learning_rate": 1.7056495447751053e-05, "loss": 2.1173, "step": 97695 }, { "epoch": 6.6381301807310775, "grad_norm": 3.466480255126953, "learning_rate": 1.705224894686778e-05, "loss": 2.0779, "step": 97700 }, { "epoch": 6.63846990080174, "grad_norm": 3.8845014572143555, "learning_rate": 1.704800244598451e-05, "loss": 1.9287, "step": 97705 }, { "epoch": 6.638809620872401, "grad_norm": 3.179781675338745, "learning_rate": 1.7043755945101237e-05, "loss": 2.0474, "step": 97710 }, { "epoch": 6.639149340943063, "grad_norm": 3.038466453552246, "learning_rate": 1.7039509444217965e-05, "loss": 2.3011, "step": 97715 }, { "epoch": 6.639489061013725, "grad_norm": 3.404059648513794, "learning_rate": 1.7035262943334693e-05, "loss": 2.0937, "step": 97720 }, { "epoch": 6.639828781084386, "grad_norm": 3.7248613834381104, "learning_rate": 1.7031016442451418e-05, "loss": 1.9408, "step": 97725 }, { "epoch": 6.640168501155048, "grad_norm": 3.915900230407715, "learning_rate": 1.702676994156815e-05, "loss": 2.3097, "step": 97730 }, { "epoch": 6.64050822122571, "grad_norm": 5.019236087799072, "learning_rate": 1.7022523440684877e-05, "loss": 2.049, "step": 97735 }, { "epoch": 6.6408479412963715, "grad_norm": 4.261740684509277, "learning_rate": 1.7018276939801605e-05, "loss": 2.1532, "step": 97740 }, { "epoch": 6.6411876613670335, "grad_norm": 3.6417739391326904, "learning_rate": 1.7014030438918333e-05, "loss": 2.2236, "step": 97745 }, { "epoch": 6.641527381437696, "grad_norm": 3.4433093070983887, "learning_rate": 1.7009783938035058e-05, "loss": 2.099, "step": 97750 }, { "epoch": 6.641867101508357, "grad_norm": 3.7302849292755127, "learning_rate": 1.700553743715179e-05, "loss": 2.0372, "step": 97755 }, { "epoch": 6.642206821579019, "grad_norm": 4.08085298538208, "learning_rate": 1.7001290936268517e-05, "loss": 2.1744, "step": 97760 }, { "epoch": 6.642546541649681, "grad_norm": 3.302769422531128, "learning_rate": 1.6997044435385242e-05, "loss": 2.3496, "step": 97765 }, { "epoch": 6.642886261720342, "grad_norm": 3.1625900268554688, "learning_rate": 1.6992797934501973e-05, "loss": 1.9713, "step": 97770 }, { "epoch": 6.643225981791004, "grad_norm": 3.2271931171417236, "learning_rate": 1.6988551433618698e-05, "loss": 2.1023, "step": 97775 }, { "epoch": 6.643565701861666, "grad_norm": 3.517702102661133, "learning_rate": 1.6984304932735426e-05, "loss": 1.9494, "step": 97780 }, { "epoch": 6.6439054219323275, "grad_norm": 3.9977264404296875, "learning_rate": 1.6980058431852154e-05, "loss": 2.2971, "step": 97785 }, { "epoch": 6.6442451420029895, "grad_norm": 4.171504497528076, "learning_rate": 1.6975811930968882e-05, "loss": 2.2111, "step": 97790 }, { "epoch": 6.644584862073652, "grad_norm": 2.9764015674591064, "learning_rate": 1.697156543008561e-05, "loss": 2.0193, "step": 97795 }, { "epoch": 6.644924582144313, "grad_norm": 3.705220937728882, "learning_rate": 1.6967318929202338e-05, "loss": 2.0973, "step": 97800 }, { "epoch": 6.645264302214975, "grad_norm": 3.599091053009033, "learning_rate": 1.6963072428319066e-05, "loss": 1.9786, "step": 97805 }, { "epoch": 6.645604022285637, "grad_norm": 4.391253471374512, "learning_rate": 1.6958825927435794e-05, "loss": 1.9583, "step": 97810 }, { "epoch": 6.645943742356298, "grad_norm": 3.5119380950927734, "learning_rate": 1.6954579426552522e-05, "loss": 1.9993, "step": 97815 }, { "epoch": 6.64628346242696, "grad_norm": 3.7813162803649902, "learning_rate": 1.695033292566925e-05, "loss": 1.9866, "step": 97820 }, { "epoch": 6.646623182497622, "grad_norm": 3.6361358165740967, "learning_rate": 1.6946086424785978e-05, "loss": 2.1034, "step": 97825 }, { "epoch": 6.6469629025682835, "grad_norm": 3.954390048980713, "learning_rate": 1.6941839923902706e-05, "loss": 1.9204, "step": 97830 }, { "epoch": 6.647302622638946, "grad_norm": 4.592151165008545, "learning_rate": 1.693759342301943e-05, "loss": 2.1565, "step": 97835 }, { "epoch": 6.647642342709608, "grad_norm": 3.5530459880828857, "learning_rate": 1.6933346922136162e-05, "loss": 2.1868, "step": 97840 }, { "epoch": 6.647982062780269, "grad_norm": 3.2024948596954346, "learning_rate": 1.692910042125289e-05, "loss": 1.9306, "step": 97845 }, { "epoch": 6.648321782850931, "grad_norm": 3.645472288131714, "learning_rate": 1.6924853920369615e-05, "loss": 2.1566, "step": 97850 }, { "epoch": 6.648661502921593, "grad_norm": 3.754146099090576, "learning_rate": 1.6920607419486346e-05, "loss": 1.8595, "step": 97855 }, { "epoch": 6.649001222992254, "grad_norm": 3.839517831802368, "learning_rate": 1.691636091860307e-05, "loss": 2.2729, "step": 97860 }, { "epoch": 6.649340943062916, "grad_norm": 3.9783248901367188, "learning_rate": 1.69121144177198e-05, "loss": 2.1998, "step": 97865 }, { "epoch": 6.649680663133578, "grad_norm": 3.750013589859009, "learning_rate": 1.6907867916836527e-05, "loss": 1.9772, "step": 97870 }, { "epoch": 6.6500203832042395, "grad_norm": 3.3354737758636475, "learning_rate": 1.6903621415953255e-05, "loss": 1.948, "step": 97875 }, { "epoch": 6.650360103274902, "grad_norm": 3.5782980918884277, "learning_rate": 1.6899374915069983e-05, "loss": 1.784, "step": 97880 }, { "epoch": 6.650699823345564, "grad_norm": 3.8636226654052734, "learning_rate": 1.689512841418671e-05, "loss": 2.1287, "step": 97885 }, { "epoch": 6.651039543416225, "grad_norm": 3.624103546142578, "learning_rate": 1.689088191330344e-05, "loss": 2.2022, "step": 97890 }, { "epoch": 6.651379263486887, "grad_norm": 3.8972818851470947, "learning_rate": 1.6886635412420167e-05, "loss": 2.1385, "step": 97895 }, { "epoch": 6.651718983557549, "grad_norm": 3.937396764755249, "learning_rate": 1.6882388911536895e-05, "loss": 2.1375, "step": 97900 }, { "epoch": 6.65205870362821, "grad_norm": 3.5879921913146973, "learning_rate": 1.6878142410653623e-05, "loss": 2.0131, "step": 97905 }, { "epoch": 6.652398423698872, "grad_norm": 3.4055593013763428, "learning_rate": 1.687389590977035e-05, "loss": 2.037, "step": 97910 }, { "epoch": 6.652738143769534, "grad_norm": 3.691910743713379, "learning_rate": 1.686964940888708e-05, "loss": 2.024, "step": 97915 }, { "epoch": 6.6530778638401955, "grad_norm": 4.401292324066162, "learning_rate": 1.6865402908003803e-05, "loss": 1.8371, "step": 97920 }, { "epoch": 6.653417583910858, "grad_norm": 4.087190628051758, "learning_rate": 1.6861156407120535e-05, "loss": 2.2675, "step": 97925 }, { "epoch": 6.65375730398152, "grad_norm": 2.5486035346984863, "learning_rate": 1.6856909906237263e-05, "loss": 1.9791, "step": 97930 }, { "epoch": 6.654097024052181, "grad_norm": 3.6234230995178223, "learning_rate": 1.6852663405353988e-05, "loss": 2.1074, "step": 97935 }, { "epoch": 6.654436744122843, "grad_norm": 3.345142364501953, "learning_rate": 1.684841690447072e-05, "loss": 1.8495, "step": 97940 }, { "epoch": 6.654776464193505, "grad_norm": 4.76254940032959, "learning_rate": 1.6844170403587444e-05, "loss": 2.1448, "step": 97945 }, { "epoch": 6.655116184264166, "grad_norm": 3.946296453475952, "learning_rate": 1.683992390270417e-05, "loss": 1.9587, "step": 97950 }, { "epoch": 6.655455904334828, "grad_norm": 3.465533971786499, "learning_rate": 1.6835677401820903e-05, "loss": 2.069, "step": 97955 }, { "epoch": 6.65579562440549, "grad_norm": 3.181767702102661, "learning_rate": 1.6831430900937628e-05, "loss": 1.8092, "step": 97960 }, { "epoch": 6.6561353444761515, "grad_norm": 4.712766170501709, "learning_rate": 1.6827184400054356e-05, "loss": 2.2049, "step": 97965 }, { "epoch": 6.656475064546814, "grad_norm": 3.9795472621917725, "learning_rate": 1.6822937899171084e-05, "loss": 2.27, "step": 97970 }, { "epoch": 6.656814784617476, "grad_norm": 4.077510833740234, "learning_rate": 1.681869139828781e-05, "loss": 2.3869, "step": 97975 }, { "epoch": 6.657154504688137, "grad_norm": 3.397806406021118, "learning_rate": 1.681444489740454e-05, "loss": 2.043, "step": 97980 }, { "epoch": 6.657494224758799, "grad_norm": 3.1220178604125977, "learning_rate": 1.6810198396521268e-05, "loss": 2.2178, "step": 97985 }, { "epoch": 6.657833944829461, "grad_norm": 2.9882607460021973, "learning_rate": 1.6805951895637996e-05, "loss": 2.1559, "step": 97990 }, { "epoch": 6.658173664900122, "grad_norm": 3.9001567363739014, "learning_rate": 1.6801705394754724e-05, "loss": 2.3098, "step": 97995 }, { "epoch": 6.658513384970784, "grad_norm": 3.818084955215454, "learning_rate": 1.679745889387145e-05, "loss": 1.9826, "step": 98000 }, { "epoch": 6.658853105041446, "grad_norm": 3.4190454483032227, "learning_rate": 1.679321239298818e-05, "loss": 2.1893, "step": 98005 }, { "epoch": 6.6591928251121075, "grad_norm": 2.913388252258301, "learning_rate": 1.6788965892104908e-05, "loss": 1.9518, "step": 98010 }, { "epoch": 6.65953254518277, "grad_norm": 3.158233165740967, "learning_rate": 1.6784719391221636e-05, "loss": 2.0409, "step": 98015 }, { "epoch": 6.659872265253431, "grad_norm": 4.03093147277832, "learning_rate": 1.678047289033836e-05, "loss": 2.0658, "step": 98020 }, { "epoch": 6.660211985324093, "grad_norm": 3.3576955795288086, "learning_rate": 1.677622638945509e-05, "loss": 2.1667, "step": 98025 }, { "epoch": 6.660551705394755, "grad_norm": 3.9435477256774902, "learning_rate": 1.6771979888571816e-05, "loss": 2.1999, "step": 98030 }, { "epoch": 6.660891425465416, "grad_norm": 3.5213983058929443, "learning_rate": 1.6767733387688544e-05, "loss": 2.3964, "step": 98035 }, { "epoch": 6.661231145536078, "grad_norm": 5.118696689605713, "learning_rate": 1.6763486886805276e-05, "loss": 2.1822, "step": 98040 }, { "epoch": 6.66157086560674, "grad_norm": 4.18620491027832, "learning_rate": 1.6759240385922e-05, "loss": 2.1617, "step": 98045 }, { "epoch": 6.6619105856774015, "grad_norm": 2.988445520401001, "learning_rate": 1.6754993885038728e-05, "loss": 2.1164, "step": 98050 }, { "epoch": 6.6622503057480635, "grad_norm": 3.9529478549957275, "learning_rate": 1.6750747384155456e-05, "loss": 2.2583, "step": 98055 }, { "epoch": 6.662590025818726, "grad_norm": 4.148999214172363, "learning_rate": 1.6746500883272184e-05, "loss": 2.3216, "step": 98060 }, { "epoch": 6.662929745889387, "grad_norm": 4.348553657531738, "learning_rate": 1.6742254382388912e-05, "loss": 1.9951, "step": 98065 }, { "epoch": 6.663269465960049, "grad_norm": 3.552149772644043, "learning_rate": 1.673800788150564e-05, "loss": 2.1479, "step": 98070 }, { "epoch": 6.663609186030711, "grad_norm": 3.818936586380005, "learning_rate": 1.673376138062237e-05, "loss": 2.1097, "step": 98075 }, { "epoch": 6.663948906101372, "grad_norm": 4.313409805297852, "learning_rate": 1.6729514879739096e-05, "loss": 1.9582, "step": 98080 }, { "epoch": 6.664288626172034, "grad_norm": 2.2049200534820557, "learning_rate": 1.6725268378855824e-05, "loss": 2.0632, "step": 98085 }, { "epoch": 6.664628346242696, "grad_norm": 3.5681300163269043, "learning_rate": 1.6721021877972552e-05, "loss": 2.1184, "step": 98090 }, { "epoch": 6.6649680663133575, "grad_norm": 3.6859443187713623, "learning_rate": 1.671677537708928e-05, "loss": 2.1443, "step": 98095 }, { "epoch": 6.6653077863840196, "grad_norm": 3.3198297023773193, "learning_rate": 1.671252887620601e-05, "loss": 1.9734, "step": 98100 }, { "epoch": 6.665647506454682, "grad_norm": 3.2535712718963623, "learning_rate": 1.6708282375322733e-05, "loss": 2.1813, "step": 98105 }, { "epoch": 6.665987226525343, "grad_norm": 3.612083911895752, "learning_rate": 1.6704035874439464e-05, "loss": 1.8247, "step": 98110 }, { "epoch": 6.666326946596005, "grad_norm": 4.664241790771484, "learning_rate": 1.669978937355619e-05, "loss": 1.9256, "step": 98115 }, { "epoch": 6.666666666666667, "grad_norm": 4.227344989776611, "learning_rate": 1.6695542872672917e-05, "loss": 2.3186, "step": 98120 }, { "epoch": 6.667006386737328, "grad_norm": 2.9314653873443604, "learning_rate": 1.669129637178965e-05, "loss": 2.0437, "step": 98125 }, { "epoch": 6.66734610680799, "grad_norm": 3.455893039703369, "learning_rate": 1.6687049870906373e-05, "loss": 2.0923, "step": 98130 }, { "epoch": 6.667685826878652, "grad_norm": 3.403716564178467, "learning_rate": 1.66828033700231e-05, "loss": 2.009, "step": 98135 }, { "epoch": 6.6680255469493135, "grad_norm": 3.4953083992004395, "learning_rate": 1.667855686913983e-05, "loss": 1.9989, "step": 98140 }, { "epoch": 6.668365267019976, "grad_norm": 3.8144619464874268, "learning_rate": 1.6674310368256557e-05, "loss": 2.1758, "step": 98145 }, { "epoch": 6.668704987090638, "grad_norm": 3.9349863529205322, "learning_rate": 1.6670063867373285e-05, "loss": 2.133, "step": 98150 }, { "epoch": 6.669044707161299, "grad_norm": 3.9736032485961914, "learning_rate": 1.6665817366490013e-05, "loss": 2.0991, "step": 98155 }, { "epoch": 6.669384427231961, "grad_norm": 4.625166416168213, "learning_rate": 1.666157086560674e-05, "loss": 2.1434, "step": 98160 }, { "epoch": 6.669724147302623, "grad_norm": 4.261071681976318, "learning_rate": 1.665732436472347e-05, "loss": 1.9935, "step": 98165 }, { "epoch": 6.670063867373284, "grad_norm": 3.138887643814087, "learning_rate": 1.6653077863840197e-05, "loss": 2.374, "step": 98170 }, { "epoch": 6.670403587443946, "grad_norm": 3.178950786590576, "learning_rate": 1.6648831362956925e-05, "loss": 2.0275, "step": 98175 }, { "epoch": 6.670743307514608, "grad_norm": 4.813149452209473, "learning_rate": 1.6644584862073653e-05, "loss": 2.3109, "step": 98180 }, { "epoch": 6.6710830275852695, "grad_norm": 4.543880939483643, "learning_rate": 1.664033836119038e-05, "loss": 1.8586, "step": 98185 }, { "epoch": 6.671422747655932, "grad_norm": 3.5926027297973633, "learning_rate": 1.6636091860307106e-05, "loss": 2.1604, "step": 98190 }, { "epoch": 6.671762467726594, "grad_norm": 3.6609930992126465, "learning_rate": 1.6631845359423837e-05, "loss": 1.9932, "step": 98195 }, { "epoch": 6.672102187797255, "grad_norm": 3.936645984649658, "learning_rate": 1.6627598858540562e-05, "loss": 2.0129, "step": 98200 }, { "epoch": 6.672441907867917, "grad_norm": 3.4722442626953125, "learning_rate": 1.662335235765729e-05, "loss": 2.0283, "step": 98205 }, { "epoch": 6.672781627938579, "grad_norm": 3.9637410640716553, "learning_rate": 1.661910585677402e-05, "loss": 2.3301, "step": 98210 }, { "epoch": 6.67312134800924, "grad_norm": 3.398338794708252, "learning_rate": 1.6614859355890746e-05, "loss": 1.9533, "step": 98215 }, { "epoch": 6.673461068079902, "grad_norm": 3.3998911380767822, "learning_rate": 1.6610612855007474e-05, "loss": 2.0224, "step": 98220 }, { "epoch": 6.673800788150564, "grad_norm": 3.274869203567505, "learning_rate": 1.6606366354124202e-05, "loss": 1.9293, "step": 98225 }, { "epoch": 6.6741405082212255, "grad_norm": 3.415926456451416, "learning_rate": 1.660211985324093e-05, "loss": 2.0064, "step": 98230 }, { "epoch": 6.674480228291888, "grad_norm": 3.1508610248565674, "learning_rate": 1.6597873352357658e-05, "loss": 2.1017, "step": 98235 }, { "epoch": 6.674819948362549, "grad_norm": 3.705825090408325, "learning_rate": 1.6593626851474386e-05, "loss": 1.9217, "step": 98240 }, { "epoch": 6.675159668433211, "grad_norm": 3.109802722930908, "learning_rate": 1.6589380350591114e-05, "loss": 2.2866, "step": 98245 }, { "epoch": 6.675499388503873, "grad_norm": 4.47676944732666, "learning_rate": 1.6585133849707842e-05, "loss": 1.9894, "step": 98250 }, { "epoch": 6.675839108574534, "grad_norm": 3.6271815299987793, "learning_rate": 1.658088734882457e-05, "loss": 1.7493, "step": 98255 }, { "epoch": 6.676178828645196, "grad_norm": 4.004283428192139, "learning_rate": 1.6576640847941298e-05, "loss": 2.2328, "step": 98260 }, { "epoch": 6.676518548715858, "grad_norm": 3.7758588790893555, "learning_rate": 1.6572394347058026e-05, "loss": 2.1052, "step": 98265 }, { "epoch": 6.6768582687865194, "grad_norm": 3.1909613609313965, "learning_rate": 1.6568147846174754e-05, "loss": 2.1512, "step": 98270 }, { "epoch": 6.6771979888571815, "grad_norm": 3.681809663772583, "learning_rate": 1.656390134529148e-05, "loss": 1.9771, "step": 98275 }, { "epoch": 6.677537708927844, "grad_norm": 2.8646490573883057, "learning_rate": 1.655965484440821e-05, "loss": 2.2808, "step": 98280 }, { "epoch": 6.677877428998505, "grad_norm": 3.1160218715667725, "learning_rate": 1.6555408343524938e-05, "loss": 2.1361, "step": 98285 }, { "epoch": 6.678217149069167, "grad_norm": 3.682976245880127, "learning_rate": 1.6551161842641662e-05, "loss": 2.1542, "step": 98290 }, { "epoch": 6.678556869139829, "grad_norm": 3.5834710597991943, "learning_rate": 1.6546915341758394e-05, "loss": 2.1254, "step": 98295 }, { "epoch": 6.67889658921049, "grad_norm": 3.143817663192749, "learning_rate": 1.654266884087512e-05, "loss": 2.2342, "step": 98300 }, { "epoch": 6.679236309281152, "grad_norm": 3.463487148284912, "learning_rate": 1.6538422339991847e-05, "loss": 2.2422, "step": 98305 }, { "epoch": 6.679576029351814, "grad_norm": 2.8158962726593018, "learning_rate": 1.6534175839108575e-05, "loss": 1.9248, "step": 98310 }, { "epoch": 6.6799157494224755, "grad_norm": 2.7873353958129883, "learning_rate": 1.6529929338225303e-05, "loss": 2.2615, "step": 98315 }, { "epoch": 6.6802554694931375, "grad_norm": 3.7871975898742676, "learning_rate": 1.652568283734203e-05, "loss": 2.2157, "step": 98320 }, { "epoch": 6.6805951895638, "grad_norm": 3.74350643157959, "learning_rate": 1.652143633645876e-05, "loss": 2.08, "step": 98325 }, { "epoch": 6.680934909634461, "grad_norm": 3.6291239261627197, "learning_rate": 1.6517189835575487e-05, "loss": 1.933, "step": 98330 }, { "epoch": 6.681274629705123, "grad_norm": 4.616179466247559, "learning_rate": 1.6512943334692215e-05, "loss": 2.0359, "step": 98335 }, { "epoch": 6.681614349775785, "grad_norm": 2.824666738510132, "learning_rate": 1.6508696833808943e-05, "loss": 2.3543, "step": 98340 }, { "epoch": 6.681954069846446, "grad_norm": 3.997020721435547, "learning_rate": 1.650445033292567e-05, "loss": 2.1069, "step": 98345 }, { "epoch": 6.682293789917108, "grad_norm": 4.316879749298096, "learning_rate": 1.65002038320424e-05, "loss": 1.9794, "step": 98350 }, { "epoch": 6.68263350998777, "grad_norm": 3.4706029891967773, "learning_rate": 1.6495957331159127e-05, "loss": 2.3235, "step": 98355 }, { "epoch": 6.6829732300584315, "grad_norm": 4.320679664611816, "learning_rate": 1.649171083027585e-05, "loss": 2.0249, "step": 98360 }, { "epoch": 6.6833129501290935, "grad_norm": 4.445474624633789, "learning_rate": 1.6487464329392583e-05, "loss": 2.1136, "step": 98365 }, { "epoch": 6.683652670199756, "grad_norm": 3.3263962268829346, "learning_rate": 1.648321782850931e-05, "loss": 2.0129, "step": 98370 }, { "epoch": 6.683992390270417, "grad_norm": 4.179474353790283, "learning_rate": 1.6478971327626035e-05, "loss": 1.9808, "step": 98375 }, { "epoch": 6.684332110341079, "grad_norm": 3.4974710941314697, "learning_rate": 1.6474724826742767e-05, "loss": 2.0123, "step": 98380 }, { "epoch": 6.684671830411741, "grad_norm": 4.419735908508301, "learning_rate": 1.647047832585949e-05, "loss": 2.1966, "step": 98385 }, { "epoch": 6.685011550482402, "grad_norm": 3.5367889404296875, "learning_rate": 1.646623182497622e-05, "loss": 1.8557, "step": 98390 }, { "epoch": 6.685351270553064, "grad_norm": 3.7973921298980713, "learning_rate": 1.6461985324092947e-05, "loss": 2.4322, "step": 98395 }, { "epoch": 6.685690990623726, "grad_norm": 3.905357837677002, "learning_rate": 1.6457738823209675e-05, "loss": 1.9514, "step": 98400 }, { "epoch": 6.6860307106943875, "grad_norm": 4.552535057067871, "learning_rate": 1.6453492322326403e-05, "loss": 2.2469, "step": 98405 }, { "epoch": 6.6863704307650496, "grad_norm": 3.0917751789093018, "learning_rate": 1.644924582144313e-05, "loss": 1.9245, "step": 98410 }, { "epoch": 6.686710150835712, "grad_norm": 4.163539886474609, "learning_rate": 1.644499932055986e-05, "loss": 2.1936, "step": 98415 }, { "epoch": 6.687049870906373, "grad_norm": 3.0788991451263428, "learning_rate": 1.6440752819676587e-05, "loss": 2.0005, "step": 98420 }, { "epoch": 6.687389590977035, "grad_norm": 3.4400980472564697, "learning_rate": 1.6436506318793315e-05, "loss": 2.0739, "step": 98425 }, { "epoch": 6.687729311047697, "grad_norm": 4.013050556182861, "learning_rate": 1.6432259817910043e-05, "loss": 1.834, "step": 98430 }, { "epoch": 6.688069031118358, "grad_norm": 3.611649513244629, "learning_rate": 1.642801331702677e-05, "loss": 2.0078, "step": 98435 }, { "epoch": 6.68840875118902, "grad_norm": 3.2557578086853027, "learning_rate": 1.64237668161435e-05, "loss": 2.1231, "step": 98440 }, { "epoch": 6.688748471259682, "grad_norm": 3.820462226867676, "learning_rate": 1.6419520315260224e-05, "loss": 2.0315, "step": 98445 }, { "epoch": 6.6890881913303435, "grad_norm": 3.264007568359375, "learning_rate": 1.6415273814376955e-05, "loss": 2.6427, "step": 98450 }, { "epoch": 6.689427911401006, "grad_norm": 3.544161319732666, "learning_rate": 1.6411027313493683e-05, "loss": 2.0791, "step": 98455 }, { "epoch": 6.689767631471668, "grad_norm": 3.270049810409546, "learning_rate": 1.6406780812610408e-05, "loss": 2.3329, "step": 98460 }, { "epoch": 6.690107351542329, "grad_norm": 4.060075283050537, "learning_rate": 1.640253431172714e-05, "loss": 2.1418, "step": 98465 }, { "epoch": 6.690447071612991, "grad_norm": 3.01412034034729, "learning_rate": 1.6398287810843864e-05, "loss": 1.9688, "step": 98470 }, { "epoch": 6.690786791683653, "grad_norm": 2.7437469959259033, "learning_rate": 1.6394041309960592e-05, "loss": 2.2373, "step": 98475 }, { "epoch": 6.691126511754314, "grad_norm": 3.131822109222412, "learning_rate": 1.6389794809077323e-05, "loss": 2.1268, "step": 98480 }, { "epoch": 6.691466231824976, "grad_norm": 3.149367094039917, "learning_rate": 1.6385548308194048e-05, "loss": 2.3191, "step": 98485 }, { "epoch": 6.691805951895638, "grad_norm": 4.432812213897705, "learning_rate": 1.6381301807310776e-05, "loss": 1.909, "step": 98490 }, { "epoch": 6.6921456719662995, "grad_norm": 3.112567186355591, "learning_rate": 1.6377055306427504e-05, "loss": 2.2167, "step": 98495 }, { "epoch": 6.692485392036962, "grad_norm": 4.3959784507751465, "learning_rate": 1.6372808805544232e-05, "loss": 2.304, "step": 98500 }, { "epoch": 6.692825112107624, "grad_norm": 3.7372801303863525, "learning_rate": 1.636856230466096e-05, "loss": 1.94, "step": 98505 }, { "epoch": 6.693164832178285, "grad_norm": 3.8211917877197266, "learning_rate": 1.6364315803777688e-05, "loss": 2.0047, "step": 98510 }, { "epoch": 6.693504552248947, "grad_norm": 4.554047107696533, "learning_rate": 1.6360069302894416e-05, "loss": 2.1534, "step": 98515 }, { "epoch": 6.693844272319609, "grad_norm": 3.591358184814453, "learning_rate": 1.6355822802011144e-05, "loss": 1.6586, "step": 98520 }, { "epoch": 6.69418399239027, "grad_norm": 3.0241479873657227, "learning_rate": 1.6351576301127872e-05, "loss": 1.8763, "step": 98525 }, { "epoch": 6.694523712460932, "grad_norm": 3.32940673828125, "learning_rate": 1.63473298002446e-05, "loss": 2.3121, "step": 98530 }, { "epoch": 6.694863432531594, "grad_norm": 4.236878395080566, "learning_rate": 1.6343083299361328e-05, "loss": 2.1812, "step": 98535 }, { "epoch": 6.6952031526022555, "grad_norm": 5.375640392303467, "learning_rate": 1.6338836798478056e-05, "loss": 2.092, "step": 98540 }, { "epoch": 6.695542872672918, "grad_norm": 3.2910594940185547, "learning_rate": 1.633459029759478e-05, "loss": 2.0573, "step": 98545 }, { "epoch": 6.69588259274358, "grad_norm": 3.7137060165405273, "learning_rate": 1.6330343796711512e-05, "loss": 2.2332, "step": 98550 }, { "epoch": 6.696222312814241, "grad_norm": 4.687171936035156, "learning_rate": 1.6326097295828237e-05, "loss": 2.0083, "step": 98555 }, { "epoch": 6.696562032884903, "grad_norm": 3.512988567352295, "learning_rate": 1.6321850794944965e-05, "loss": 2.2209, "step": 98560 }, { "epoch": 6.696901752955565, "grad_norm": 4.193115234375, "learning_rate": 1.6317604294061696e-05, "loss": 1.9245, "step": 98565 }, { "epoch": 6.697241473026226, "grad_norm": 4.504792213439941, "learning_rate": 1.631335779317842e-05, "loss": 1.9482, "step": 98570 }, { "epoch": 6.697581193096888, "grad_norm": 3.3183693885803223, "learning_rate": 1.630911129229515e-05, "loss": 2.113, "step": 98575 }, { "epoch": 6.69792091316755, "grad_norm": 4.480279922485352, "learning_rate": 1.6304864791411877e-05, "loss": 2.2014, "step": 98580 }, { "epoch": 6.6982606332382115, "grad_norm": 3.666287660598755, "learning_rate": 1.6300618290528605e-05, "loss": 2.0086, "step": 98585 }, { "epoch": 6.698600353308874, "grad_norm": 3.3762049674987793, "learning_rate": 1.6296371789645333e-05, "loss": 2.0283, "step": 98590 }, { "epoch": 6.698940073379536, "grad_norm": 3.1810877323150635, "learning_rate": 1.629212528876206e-05, "loss": 1.9623, "step": 98595 }, { "epoch": 6.699279793450197, "grad_norm": 3.939837694168091, "learning_rate": 1.628787878787879e-05, "loss": 1.9013, "step": 98600 }, { "epoch": 6.699619513520859, "grad_norm": 3.468628406524658, "learning_rate": 1.6283632286995517e-05, "loss": 2.2247, "step": 98605 }, { "epoch": 6.699959233591521, "grad_norm": 3.8949666023254395, "learning_rate": 1.6279385786112245e-05, "loss": 2.1209, "step": 98610 }, { "epoch": 6.700298953662182, "grad_norm": 3.5495333671569824, "learning_rate": 1.6275139285228973e-05, "loss": 1.8938, "step": 98615 }, { "epoch": 6.700638673732844, "grad_norm": 4.058058738708496, "learning_rate": 1.62708927843457e-05, "loss": 2.2447, "step": 98620 }, { "epoch": 6.700978393803506, "grad_norm": 3.8668413162231445, "learning_rate": 1.626664628346243e-05, "loss": 2.01, "step": 98625 }, { "epoch": 6.7013181138741675, "grad_norm": 3.4477508068084717, "learning_rate": 1.6262399782579153e-05, "loss": 2.1489, "step": 98630 }, { "epoch": 6.70165783394483, "grad_norm": 3.579524040222168, "learning_rate": 1.6258153281695885e-05, "loss": 2.1689, "step": 98635 }, { "epoch": 6.701997554015492, "grad_norm": 4.95617151260376, "learning_rate": 1.625390678081261e-05, "loss": 2.167, "step": 98640 }, { "epoch": 6.702337274086153, "grad_norm": 4.191393852233887, "learning_rate": 1.6249660279929337e-05, "loss": 2.1539, "step": 98645 }, { "epoch": 6.702676994156815, "grad_norm": 3.611126661300659, "learning_rate": 1.624541377904607e-05, "loss": 1.9684, "step": 98650 }, { "epoch": 6.703016714227477, "grad_norm": 3.7205698490142822, "learning_rate": 1.6241167278162794e-05, "loss": 1.9586, "step": 98655 }, { "epoch": 6.703356434298138, "grad_norm": 3.372983932495117, "learning_rate": 1.623692077727952e-05, "loss": 2.1068, "step": 98660 }, { "epoch": 6.7036961543688, "grad_norm": 3.525721549987793, "learning_rate": 1.623267427639625e-05, "loss": 2.0142, "step": 98665 }, { "epoch": 6.704035874439462, "grad_norm": 3.843975782394409, "learning_rate": 1.6228427775512978e-05, "loss": 2.0136, "step": 98670 }, { "epoch": 6.7043755945101235, "grad_norm": 3.3838813304901123, "learning_rate": 1.622418127462971e-05, "loss": 1.9627, "step": 98675 }, { "epoch": 6.704715314580786, "grad_norm": 3.5857365131378174, "learning_rate": 1.6219934773746434e-05, "loss": 2.2144, "step": 98680 }, { "epoch": 6.705055034651448, "grad_norm": 2.623051643371582, "learning_rate": 1.621568827286316e-05, "loss": 2.0783, "step": 98685 }, { "epoch": 6.705394754722109, "grad_norm": 3.8939976692199707, "learning_rate": 1.621144177197989e-05, "loss": 2.2098, "step": 98690 }, { "epoch": 6.705734474792771, "grad_norm": 4.0456366539001465, "learning_rate": 1.6207195271096618e-05, "loss": 2.0575, "step": 98695 }, { "epoch": 6.706074194863432, "grad_norm": 4.068583965301514, "learning_rate": 1.6202948770213346e-05, "loss": 2.0411, "step": 98700 }, { "epoch": 6.706413914934094, "grad_norm": 4.323114395141602, "learning_rate": 1.6198702269330074e-05, "loss": 2.056, "step": 98705 }, { "epoch": 6.706753635004756, "grad_norm": 3.3029613494873047, "learning_rate": 1.61944557684468e-05, "loss": 2.1357, "step": 98710 }, { "epoch": 6.7070933550754175, "grad_norm": 3.8502259254455566, "learning_rate": 1.6190209267563526e-05, "loss": 2.2033, "step": 98715 }, { "epoch": 6.70743307514608, "grad_norm": 3.3214967250823975, "learning_rate": 1.6185962766680258e-05, "loss": 2.1079, "step": 98720 }, { "epoch": 6.707772795216742, "grad_norm": 3.4567534923553467, "learning_rate": 1.6181716265796986e-05, "loss": 2.0731, "step": 98725 }, { "epoch": 6.708112515287403, "grad_norm": 3.932929039001465, "learning_rate": 1.617746976491371e-05, "loss": 2.04, "step": 98730 }, { "epoch": 6.708452235358065, "grad_norm": 3.640209197998047, "learning_rate": 1.617322326403044e-05, "loss": 2.0311, "step": 98735 }, { "epoch": 6.708791955428727, "grad_norm": 4.044561862945557, "learning_rate": 1.6168976763147166e-05, "loss": 1.952, "step": 98740 }, { "epoch": 6.709131675499388, "grad_norm": 3.103940486907959, "learning_rate": 1.6164730262263894e-05, "loss": 2.1823, "step": 98745 }, { "epoch": 6.70947139557005, "grad_norm": 3.911278009414673, "learning_rate": 1.6160483761380622e-05, "loss": 2.1261, "step": 98750 }, { "epoch": 6.709811115640712, "grad_norm": 3.2098770141601562, "learning_rate": 1.615623726049735e-05, "loss": 2.3774, "step": 98755 }, { "epoch": 6.7101508357113735, "grad_norm": 3.9525341987609863, "learning_rate": 1.615199075961408e-05, "loss": 2.2693, "step": 98760 }, { "epoch": 6.710490555782036, "grad_norm": 3.797244071960449, "learning_rate": 1.6147744258730806e-05, "loss": 2.2725, "step": 98765 }, { "epoch": 6.710830275852698, "grad_norm": 3.133516311645508, "learning_rate": 1.6143497757847534e-05, "loss": 2.3817, "step": 98770 }, { "epoch": 6.711169995923359, "grad_norm": 3.8802361488342285, "learning_rate": 1.6139251256964262e-05, "loss": 1.9567, "step": 98775 }, { "epoch": 6.711509715994021, "grad_norm": 2.8136990070343018, "learning_rate": 1.613500475608099e-05, "loss": 2.2209, "step": 98780 }, { "epoch": 6.711849436064683, "grad_norm": 3.674377202987671, "learning_rate": 1.613075825519772e-05, "loss": 2.1356, "step": 98785 }, { "epoch": 6.712189156135344, "grad_norm": 3.8519203662872314, "learning_rate": 1.6126511754314446e-05, "loss": 2.128, "step": 98790 }, { "epoch": 6.712528876206006, "grad_norm": 3.709089994430542, "learning_rate": 1.6122265253431174e-05, "loss": 2.1932, "step": 98795 }, { "epoch": 6.712868596276668, "grad_norm": 3.7017600536346436, "learning_rate": 1.61180187525479e-05, "loss": 2.1589, "step": 98800 }, { "epoch": 6.7132083163473295, "grad_norm": 3.737506866455078, "learning_rate": 1.611377225166463e-05, "loss": 2.0003, "step": 98805 }, { "epoch": 6.713548036417992, "grad_norm": 4.562256813049316, "learning_rate": 1.610952575078136e-05, "loss": 2.0352, "step": 98810 }, { "epoch": 6.713887756488654, "grad_norm": 4.215768337249756, "learning_rate": 1.6105279249898083e-05, "loss": 2.165, "step": 98815 }, { "epoch": 6.714227476559315, "grad_norm": 3.349599599838257, "learning_rate": 1.6101032749014814e-05, "loss": 2.1039, "step": 98820 }, { "epoch": 6.714567196629977, "grad_norm": 3.5306520462036133, "learning_rate": 1.609678624813154e-05, "loss": 2.362, "step": 98825 }, { "epoch": 6.714906916700639, "grad_norm": 3.223895311355591, "learning_rate": 1.6092539747248267e-05, "loss": 2.2147, "step": 98830 }, { "epoch": 6.7152466367713, "grad_norm": 3.2132558822631836, "learning_rate": 1.6088293246364995e-05, "loss": 2.1447, "step": 98835 }, { "epoch": 6.715586356841962, "grad_norm": 3.5095794200897217, "learning_rate": 1.6084046745481723e-05, "loss": 1.9206, "step": 98840 }, { "epoch": 6.715926076912624, "grad_norm": 3.3223459720611572, "learning_rate": 1.6079800244598454e-05, "loss": 1.9269, "step": 98845 }, { "epoch": 6.7162657969832855, "grad_norm": 4.224900722503662, "learning_rate": 1.607555374371518e-05, "loss": 1.9659, "step": 98850 }, { "epoch": 6.716605517053948, "grad_norm": 4.6335368156433105, "learning_rate": 1.6071307242831907e-05, "loss": 2.0155, "step": 98855 }, { "epoch": 6.71694523712461, "grad_norm": 3.1515486240386963, "learning_rate": 1.6067060741948635e-05, "loss": 1.9973, "step": 98860 }, { "epoch": 6.717284957195271, "grad_norm": 3.1716995239257812, "learning_rate": 1.6062814241065363e-05, "loss": 2.2031, "step": 98865 }, { "epoch": 6.717624677265933, "grad_norm": 3.761039972305298, "learning_rate": 1.605856774018209e-05, "loss": 2.2467, "step": 98870 }, { "epoch": 6.717964397336595, "grad_norm": 3.0694167613983154, "learning_rate": 1.605432123929882e-05, "loss": 1.871, "step": 98875 }, { "epoch": 6.718304117407256, "grad_norm": 4.6678361892700195, "learning_rate": 1.6050074738415547e-05, "loss": 2.1824, "step": 98880 }, { "epoch": 6.718643837477918, "grad_norm": 4.150034427642822, "learning_rate": 1.6045828237532272e-05, "loss": 1.7789, "step": 98885 }, { "epoch": 6.71898355754858, "grad_norm": 3.7939701080322266, "learning_rate": 1.6041581736649003e-05, "loss": 2.1037, "step": 98890 }, { "epoch": 6.7193232776192415, "grad_norm": 3.6797938346862793, "learning_rate": 1.603733523576573e-05, "loss": 1.9321, "step": 98895 }, { "epoch": 6.719662997689904, "grad_norm": 3.7154157161712646, "learning_rate": 1.6033088734882456e-05, "loss": 2.1483, "step": 98900 }, { "epoch": 6.720002717760566, "grad_norm": 2.795423746109009, "learning_rate": 1.6028842233999187e-05, "loss": 2.0702, "step": 98905 }, { "epoch": 6.720342437831227, "grad_norm": 3.6417815685272217, "learning_rate": 1.6024595733115912e-05, "loss": 2.151, "step": 98910 }, { "epoch": 6.720682157901889, "grad_norm": 3.9823076725006104, "learning_rate": 1.602034923223264e-05, "loss": 1.9271, "step": 98915 }, { "epoch": 6.72102187797255, "grad_norm": 2.9405508041381836, "learning_rate": 1.601610273134937e-05, "loss": 2.1144, "step": 98920 }, { "epoch": 6.721361598043212, "grad_norm": 3.36853289604187, "learning_rate": 1.6011856230466096e-05, "loss": 2.2332, "step": 98925 }, { "epoch": 6.721701318113874, "grad_norm": 3.1111578941345215, "learning_rate": 1.6007609729582827e-05, "loss": 2.1293, "step": 98930 }, { "epoch": 6.7220410381845355, "grad_norm": 3.7231967449188232, "learning_rate": 1.6003363228699552e-05, "loss": 2.2051, "step": 98935 }, { "epoch": 6.7223807582551975, "grad_norm": 3.6882004737854004, "learning_rate": 1.599911672781628e-05, "loss": 1.9901, "step": 98940 }, { "epoch": 6.72272047832586, "grad_norm": 3.3371617794036865, "learning_rate": 1.5994870226933008e-05, "loss": 2.2027, "step": 98945 }, { "epoch": 6.723060198396521, "grad_norm": 3.729374408721924, "learning_rate": 1.5990623726049736e-05, "loss": 2.1698, "step": 98950 }, { "epoch": 6.723399918467183, "grad_norm": 2.754951000213623, "learning_rate": 1.5986377225166464e-05, "loss": 2.2383, "step": 98955 }, { "epoch": 6.723739638537845, "grad_norm": 3.2273926734924316, "learning_rate": 1.5982130724283192e-05, "loss": 2.1024, "step": 98960 }, { "epoch": 6.724079358608506, "grad_norm": 3.1793746948242188, "learning_rate": 1.597788422339992e-05, "loss": 2.2136, "step": 98965 }, { "epoch": 6.724419078679168, "grad_norm": 4.187069892883301, "learning_rate": 1.5973637722516648e-05, "loss": 2.1817, "step": 98970 }, { "epoch": 6.72475879874983, "grad_norm": 3.8832967281341553, "learning_rate": 1.5969391221633376e-05, "loss": 2.0731, "step": 98975 }, { "epoch": 6.7250985188204915, "grad_norm": 3.7247397899627686, "learning_rate": 1.5965144720750104e-05, "loss": 2.2647, "step": 98980 }, { "epoch": 6.7254382388911536, "grad_norm": 2.9927661418914795, "learning_rate": 1.596089821986683e-05, "loss": 2.1193, "step": 98985 }, { "epoch": 6.725777958961816, "grad_norm": 4.784883975982666, "learning_rate": 1.595665171898356e-05, "loss": 2.0789, "step": 98990 }, { "epoch": 6.726117679032477, "grad_norm": 3.7005701065063477, "learning_rate": 1.5952405218100284e-05, "loss": 2.2204, "step": 98995 }, { "epoch": 6.726457399103139, "grad_norm": 3.2336905002593994, "learning_rate": 1.5948158717217012e-05, "loss": 2.1544, "step": 99000 }, { "epoch": 6.726797119173801, "grad_norm": 3.169865131378174, "learning_rate": 1.5943912216333744e-05, "loss": 2.2786, "step": 99005 }, { "epoch": 6.727136839244462, "grad_norm": 4.343013763427734, "learning_rate": 1.593966571545047e-05, "loss": 2.0402, "step": 99010 }, { "epoch": 6.727476559315124, "grad_norm": 3.43471097946167, "learning_rate": 1.59354192145672e-05, "loss": 2.3028, "step": 99015 }, { "epoch": 6.727816279385786, "grad_norm": 3.488029956817627, "learning_rate": 1.5931172713683925e-05, "loss": 2.1763, "step": 99020 }, { "epoch": 6.7281559994564475, "grad_norm": 3.5140891075134277, "learning_rate": 1.5926926212800653e-05, "loss": 2.2908, "step": 99025 }, { "epoch": 6.72849571952711, "grad_norm": 3.819584369659424, "learning_rate": 1.592267971191738e-05, "loss": 2.0256, "step": 99030 }, { "epoch": 6.728835439597772, "grad_norm": 3.7345640659332275, "learning_rate": 1.591843321103411e-05, "loss": 2.0477, "step": 99035 }, { "epoch": 6.729175159668433, "grad_norm": 3.674921751022339, "learning_rate": 1.5914186710150837e-05, "loss": 2.1339, "step": 99040 }, { "epoch": 6.729514879739095, "grad_norm": 3.343701124191284, "learning_rate": 1.5909940209267565e-05, "loss": 2.1644, "step": 99045 }, { "epoch": 6.729854599809757, "grad_norm": 3.9085540771484375, "learning_rate": 1.5905693708384293e-05, "loss": 2.0231, "step": 99050 }, { "epoch": 6.730194319880418, "grad_norm": 3.3533222675323486, "learning_rate": 1.590144720750102e-05, "loss": 2.2305, "step": 99055 }, { "epoch": 6.73053403995108, "grad_norm": 4.632845401763916, "learning_rate": 1.589720070661775e-05, "loss": 2.0081, "step": 99060 }, { "epoch": 6.730873760021742, "grad_norm": 3.834604501724243, "learning_rate": 1.5892954205734477e-05, "loss": 1.8372, "step": 99065 }, { "epoch": 6.7312134800924035, "grad_norm": 3.2045674324035645, "learning_rate": 1.58887077048512e-05, "loss": 2.4474, "step": 99070 }, { "epoch": 6.731553200163066, "grad_norm": 3.9498627185821533, "learning_rate": 1.5884461203967933e-05, "loss": 2.1254, "step": 99075 }, { "epoch": 6.731892920233728, "grad_norm": 3.472949981689453, "learning_rate": 1.5880214703084657e-05, "loss": 2.2206, "step": 99080 }, { "epoch": 6.732232640304389, "grad_norm": 3.141598701477051, "learning_rate": 1.5875968202201385e-05, "loss": 2.1341, "step": 99085 }, { "epoch": 6.732572360375051, "grad_norm": 3.6539487838745117, "learning_rate": 1.5871721701318117e-05, "loss": 2.1221, "step": 99090 }, { "epoch": 6.732912080445713, "grad_norm": 3.8672361373901367, "learning_rate": 1.586747520043484e-05, "loss": 2.1018, "step": 99095 }, { "epoch": 6.733251800516374, "grad_norm": 3.2873010635375977, "learning_rate": 1.5863228699551573e-05, "loss": 2.0381, "step": 99100 }, { "epoch": 6.733591520587036, "grad_norm": 3.2621965408325195, "learning_rate": 1.5858982198668297e-05, "loss": 1.8679, "step": 99105 }, { "epoch": 6.733931240657698, "grad_norm": 2.9291322231292725, "learning_rate": 1.5854735697785025e-05, "loss": 2.0986, "step": 99110 }, { "epoch": 6.7342709607283595, "grad_norm": 3.662675380706787, "learning_rate": 1.5850489196901753e-05, "loss": 2.0255, "step": 99115 }, { "epoch": 6.734610680799022, "grad_norm": 3.409006357192993, "learning_rate": 1.584624269601848e-05, "loss": 1.9522, "step": 99120 }, { "epoch": 6.734950400869684, "grad_norm": 3.1380300521850586, "learning_rate": 1.584199619513521e-05, "loss": 1.9133, "step": 99125 }, { "epoch": 6.735290120940345, "grad_norm": 3.0826151371002197, "learning_rate": 1.5837749694251937e-05, "loss": 2.3181, "step": 99130 }, { "epoch": 6.735629841011007, "grad_norm": 3.2875523567199707, "learning_rate": 1.5833503193368665e-05, "loss": 1.8988, "step": 99135 }, { "epoch": 6.735969561081669, "grad_norm": 2.997202157974243, "learning_rate": 1.5829256692485393e-05, "loss": 2.197, "step": 99140 }, { "epoch": 6.73630928115233, "grad_norm": 3.5668585300445557, "learning_rate": 1.582501019160212e-05, "loss": 2.1439, "step": 99145 }, { "epoch": 6.736649001222992, "grad_norm": 3.456336498260498, "learning_rate": 1.582076369071885e-05, "loss": 2.1602, "step": 99150 }, { "epoch": 6.736988721293654, "grad_norm": 3.7417213916778564, "learning_rate": 1.5816517189835574e-05, "loss": 1.9888, "step": 99155 }, { "epoch": 6.7373284413643155, "grad_norm": 3.503891706466675, "learning_rate": 1.5812270688952305e-05, "loss": 1.9457, "step": 99160 }, { "epoch": 6.737668161434978, "grad_norm": 2.95768141746521, "learning_rate": 1.580802418806903e-05, "loss": 2.2104, "step": 99165 }, { "epoch": 6.73800788150564, "grad_norm": 3.7129831314086914, "learning_rate": 1.5803777687185758e-05, "loss": 2.0645, "step": 99170 }, { "epoch": 6.738347601576301, "grad_norm": 3.047520875930786, "learning_rate": 1.579953118630249e-05, "loss": 2.157, "step": 99175 }, { "epoch": 6.738687321646963, "grad_norm": 3.854994773864746, "learning_rate": 1.5795284685419214e-05, "loss": 2.064, "step": 99180 }, { "epoch": 6.739027041717625, "grad_norm": 3.474689245223999, "learning_rate": 1.5791038184535945e-05, "loss": 2.1233, "step": 99185 }, { "epoch": 6.739366761788286, "grad_norm": 3.455312490463257, "learning_rate": 1.578679168365267e-05, "loss": 2.1787, "step": 99190 }, { "epoch": 6.739706481858948, "grad_norm": 3.206857919692993, "learning_rate": 1.5782545182769398e-05, "loss": 2.206, "step": 99195 }, { "epoch": 6.74004620192961, "grad_norm": 3.3465216159820557, "learning_rate": 1.577829868188613e-05, "loss": 2.129, "step": 99200 }, { "epoch": 6.7403859220002715, "grad_norm": 3.4097611904144287, "learning_rate": 1.5774052181002854e-05, "loss": 2.4355, "step": 99205 }, { "epoch": 6.740725642070934, "grad_norm": 3.8023900985717773, "learning_rate": 1.5769805680119582e-05, "loss": 2.2265, "step": 99210 }, { "epoch": 6.741065362141596, "grad_norm": 3.664923906326294, "learning_rate": 1.576555917923631e-05, "loss": 2.0748, "step": 99215 }, { "epoch": 6.741405082212257, "grad_norm": 3.8746678829193115, "learning_rate": 1.5761312678353038e-05, "loss": 2.1276, "step": 99220 }, { "epoch": 6.741744802282919, "grad_norm": 3.141897678375244, "learning_rate": 1.5757066177469766e-05, "loss": 2.1586, "step": 99225 }, { "epoch": 6.742084522353581, "grad_norm": 3.9236907958984375, "learning_rate": 1.5752819676586494e-05, "loss": 2.1073, "step": 99230 }, { "epoch": 6.742424242424242, "grad_norm": 3.513211965560913, "learning_rate": 1.5748573175703222e-05, "loss": 2.3207, "step": 99235 }, { "epoch": 6.742763962494904, "grad_norm": 3.7949697971343994, "learning_rate": 1.5744326674819947e-05, "loss": 2.2948, "step": 99240 }, { "epoch": 6.743103682565566, "grad_norm": 3.7885475158691406, "learning_rate": 1.5740080173936678e-05, "loss": 1.9381, "step": 99245 }, { "epoch": 6.7434434026362275, "grad_norm": 3.2317464351654053, "learning_rate": 1.5735833673053406e-05, "loss": 1.9083, "step": 99250 }, { "epoch": 6.74378312270689, "grad_norm": 3.6460695266723633, "learning_rate": 1.573158717217013e-05, "loss": 1.9615, "step": 99255 }, { "epoch": 6.744122842777552, "grad_norm": 2.8809120655059814, "learning_rate": 1.5727340671286862e-05, "loss": 2.1649, "step": 99260 }, { "epoch": 6.744462562848213, "grad_norm": 4.04257345199585, "learning_rate": 1.5723094170403587e-05, "loss": 1.9254, "step": 99265 }, { "epoch": 6.744802282918875, "grad_norm": 3.373732089996338, "learning_rate": 1.5718847669520318e-05, "loss": 1.9772, "step": 99270 }, { "epoch": 6.745142002989537, "grad_norm": 3.756117105484009, "learning_rate": 1.5714601168637043e-05, "loss": 1.9386, "step": 99275 }, { "epoch": 6.745481723060198, "grad_norm": 2.7779252529144287, "learning_rate": 1.571035466775377e-05, "loss": 2.2056, "step": 99280 }, { "epoch": 6.74582144313086, "grad_norm": 3.1611504554748535, "learning_rate": 1.5706108166870502e-05, "loss": 1.8953, "step": 99285 }, { "epoch": 6.746161163201522, "grad_norm": 2.7011280059814453, "learning_rate": 1.5701861665987227e-05, "loss": 1.976, "step": 99290 }, { "epoch": 6.746500883272184, "grad_norm": 3.7204856872558594, "learning_rate": 1.5697615165103955e-05, "loss": 2.0992, "step": 99295 }, { "epoch": 6.746840603342846, "grad_norm": 2.8124523162841797, "learning_rate": 1.5693368664220683e-05, "loss": 2.2236, "step": 99300 }, { "epoch": 6.747180323413508, "grad_norm": 2.85585880279541, "learning_rate": 1.568912216333741e-05, "loss": 2.2548, "step": 99305 }, { "epoch": 6.747520043484169, "grad_norm": 3.6811888217926025, "learning_rate": 1.568487566245414e-05, "loss": 2.0645, "step": 99310 }, { "epoch": 6.747859763554831, "grad_norm": 4.234136581420898, "learning_rate": 1.5680629161570867e-05, "loss": 2.0966, "step": 99315 }, { "epoch": 6.748199483625493, "grad_norm": 3.2174222469329834, "learning_rate": 1.5676382660687595e-05, "loss": 2.2126, "step": 99320 }, { "epoch": 6.748539203696154, "grad_norm": 3.234588384628296, "learning_rate": 1.567213615980432e-05, "loss": 2.2415, "step": 99325 }, { "epoch": 6.748878923766816, "grad_norm": 4.152876377105713, "learning_rate": 1.566788965892105e-05, "loss": 2.259, "step": 99330 }, { "epoch": 6.749218643837478, "grad_norm": 4.125636577606201, "learning_rate": 1.566364315803778e-05, "loss": 1.9961, "step": 99335 }, { "epoch": 6.74955836390814, "grad_norm": 3.6656157970428467, "learning_rate": 1.5659396657154503e-05, "loss": 1.8548, "step": 99340 }, { "epoch": 6.749898083978802, "grad_norm": 5.226391315460205, "learning_rate": 1.5655150156271235e-05, "loss": 2.4026, "step": 99345 }, { "epoch": 6.750237804049464, "grad_norm": 5.221137523651123, "learning_rate": 1.565090365538796e-05, "loss": 2.118, "step": 99350 }, { "epoch": 6.750577524120125, "grad_norm": 3.396169900894165, "learning_rate": 1.564665715450469e-05, "loss": 1.9528, "step": 99355 }, { "epoch": 6.750917244190787, "grad_norm": 2.9616920948028564, "learning_rate": 1.5642410653621415e-05, "loss": 1.9341, "step": 99360 }, { "epoch": 6.751256964261449, "grad_norm": 3.165031671524048, "learning_rate": 1.5638164152738144e-05, "loss": 2.1748, "step": 99365 }, { "epoch": 6.75159668433211, "grad_norm": 3.187032699584961, "learning_rate": 1.5633917651854875e-05, "loss": 2.2064, "step": 99370 }, { "epoch": 6.751936404402772, "grad_norm": 3.842561721801758, "learning_rate": 1.56296711509716e-05, "loss": 2.0949, "step": 99375 }, { "epoch": 6.7522761244734335, "grad_norm": 4.068708419799805, "learning_rate": 1.5625424650088328e-05, "loss": 2.1599, "step": 99380 }, { "epoch": 6.752615844544096, "grad_norm": 3.3352203369140625, "learning_rate": 1.5621178149205056e-05, "loss": 2.089, "step": 99385 }, { "epoch": 6.752955564614758, "grad_norm": 3.8672666549682617, "learning_rate": 1.5616931648321784e-05, "loss": 2.1988, "step": 99390 }, { "epoch": 6.753295284685419, "grad_norm": 3.517197370529175, "learning_rate": 1.561268514743851e-05, "loss": 2.1284, "step": 99395 }, { "epoch": 6.753635004756081, "grad_norm": 3.544889211654663, "learning_rate": 1.560843864655524e-05, "loss": 2.2948, "step": 99400 }, { "epoch": 6.753974724826743, "grad_norm": 4.098052978515625, "learning_rate": 1.5604192145671968e-05, "loss": 1.9883, "step": 99405 }, { "epoch": 6.754314444897404, "grad_norm": 3.8406267166137695, "learning_rate": 1.5599945644788692e-05, "loss": 2.003, "step": 99410 }, { "epoch": 6.754654164968066, "grad_norm": 3.6249406337738037, "learning_rate": 1.5595699143905424e-05, "loss": 1.9796, "step": 99415 }, { "epoch": 6.754993885038728, "grad_norm": 4.456005573272705, "learning_rate": 1.559145264302215e-05, "loss": 2.0722, "step": 99420 }, { "epoch": 6.7553336051093895, "grad_norm": 4.053542137145996, "learning_rate": 1.5587206142138876e-05, "loss": 2.2474, "step": 99425 }, { "epoch": 6.755673325180052, "grad_norm": 4.341855525970459, "learning_rate": 1.5582959641255608e-05, "loss": 2.1459, "step": 99430 }, { "epoch": 6.756013045250714, "grad_norm": 2.9923131465911865, "learning_rate": 1.5578713140372332e-05, "loss": 2.0816, "step": 99435 }, { "epoch": 6.756352765321375, "grad_norm": 2.7805655002593994, "learning_rate": 1.557446663948906e-05, "loss": 2.1879, "step": 99440 }, { "epoch": 6.756692485392037, "grad_norm": 4.015406608581543, "learning_rate": 1.557022013860579e-05, "loss": 2.3093, "step": 99445 }, { "epoch": 6.757032205462699, "grad_norm": 3.0234615802764893, "learning_rate": 1.5565973637722516e-05, "loss": 2.2583, "step": 99450 }, { "epoch": 6.75737192553336, "grad_norm": 2.752840995788574, "learning_rate": 1.5561727136839248e-05, "loss": 2.0721, "step": 99455 }, { "epoch": 6.757711645604022, "grad_norm": 3.217067003250122, "learning_rate": 1.5557480635955972e-05, "loss": 2.3576, "step": 99460 }, { "epoch": 6.758051365674684, "grad_norm": 4.714778423309326, "learning_rate": 1.55532341350727e-05, "loss": 2.013, "step": 99465 }, { "epoch": 6.7583910857453455, "grad_norm": 3.724675178527832, "learning_rate": 1.5548987634189428e-05, "loss": 2.1051, "step": 99470 }, { "epoch": 6.758730805816008, "grad_norm": 3.6557633876800537, "learning_rate": 1.5544741133306156e-05, "loss": 1.8862, "step": 99475 }, { "epoch": 6.75907052588667, "grad_norm": 3.5196940898895264, "learning_rate": 1.5540494632422884e-05, "loss": 1.8937, "step": 99480 }, { "epoch": 6.759410245957331, "grad_norm": 3.1550133228302, "learning_rate": 1.5536248131539612e-05, "loss": 2.3093, "step": 99485 }, { "epoch": 6.759749966027993, "grad_norm": 5.282888889312744, "learning_rate": 1.553200163065634e-05, "loss": 2.3193, "step": 99490 }, { "epoch": 6.760089686098655, "grad_norm": 3.550931692123413, "learning_rate": 1.552775512977307e-05, "loss": 2.0292, "step": 99495 }, { "epoch": 6.760429406169316, "grad_norm": 3.885896682739258, "learning_rate": 1.5523508628889796e-05, "loss": 2.1116, "step": 99500 }, { "epoch": 6.760769126239978, "grad_norm": 3.0334415435791016, "learning_rate": 1.5519262128006524e-05, "loss": 2.254, "step": 99505 }, { "epoch": 6.76110884631064, "grad_norm": 3.3557896614074707, "learning_rate": 1.551501562712325e-05, "loss": 2.119, "step": 99510 }, { "epoch": 6.7614485663813015, "grad_norm": 3.816457986831665, "learning_rate": 1.551076912623998e-05, "loss": 2.1219, "step": 99515 }, { "epoch": 6.761788286451964, "grad_norm": 4.306690216064453, "learning_rate": 1.5506522625356705e-05, "loss": 1.8765, "step": 99520 }, { "epoch": 6.762128006522626, "grad_norm": 3.9425930976867676, "learning_rate": 1.5502276124473433e-05, "loss": 2.1921, "step": 99525 }, { "epoch": 6.762467726593287, "grad_norm": 3.8340866565704346, "learning_rate": 1.5498029623590164e-05, "loss": 2.1961, "step": 99530 }, { "epoch": 6.762807446663949, "grad_norm": 3.819902181625366, "learning_rate": 1.549378312270689e-05, "loss": 2.1315, "step": 99535 }, { "epoch": 6.763147166734611, "grad_norm": 3.47292423248291, "learning_rate": 1.548953662182362e-05, "loss": 2.3134, "step": 99540 }, { "epoch": 6.763486886805272, "grad_norm": 3.1763248443603516, "learning_rate": 1.5485290120940345e-05, "loss": 1.9356, "step": 99545 }, { "epoch": 6.763826606875934, "grad_norm": 4.0594706535339355, "learning_rate": 1.5481043620057073e-05, "loss": 2.0616, "step": 99550 }, { "epoch": 6.764166326946596, "grad_norm": 3.204721450805664, "learning_rate": 1.54767971191738e-05, "loss": 2.142, "step": 99555 }, { "epoch": 6.7645060470172576, "grad_norm": 3.6899969577789307, "learning_rate": 1.547255061829053e-05, "loss": 1.8196, "step": 99560 }, { "epoch": 6.76484576708792, "grad_norm": 3.6930646896362305, "learning_rate": 1.5468304117407257e-05, "loss": 2.3323, "step": 99565 }, { "epoch": 6.765185487158582, "grad_norm": 3.8943405151367188, "learning_rate": 1.5464057616523985e-05, "loss": 1.9864, "step": 99570 }, { "epoch": 6.765525207229243, "grad_norm": 3.3109986782073975, "learning_rate": 1.5459811115640713e-05, "loss": 2.3581, "step": 99575 }, { "epoch": 6.765864927299905, "grad_norm": 3.7479074001312256, "learning_rate": 1.545556461475744e-05, "loss": 1.92, "step": 99580 }, { "epoch": 6.766204647370567, "grad_norm": 3.976898193359375, "learning_rate": 1.545131811387417e-05, "loss": 1.9165, "step": 99585 }, { "epoch": 6.766544367441228, "grad_norm": 3.7113077640533447, "learning_rate": 1.5447071612990897e-05, "loss": 2.0671, "step": 99590 }, { "epoch": 6.76688408751189, "grad_norm": 4.327286243438721, "learning_rate": 1.544282511210762e-05, "loss": 2.2196, "step": 99595 }, { "epoch": 6.7672238075825515, "grad_norm": 3.693394660949707, "learning_rate": 1.5438578611224353e-05, "loss": 2.1455, "step": 99600 }, { "epoch": 6.767563527653214, "grad_norm": 3.3644704818725586, "learning_rate": 1.5434332110341078e-05, "loss": 2.0558, "step": 99605 }, { "epoch": 6.767903247723876, "grad_norm": 3.4122445583343506, "learning_rate": 1.5430085609457806e-05, "loss": 2.1075, "step": 99610 }, { "epoch": 6.768242967794537, "grad_norm": 5.047346591949463, "learning_rate": 1.5425839108574537e-05, "loss": 2.2157, "step": 99615 }, { "epoch": 6.768582687865199, "grad_norm": 4.398259162902832, "learning_rate": 1.5421592607691262e-05, "loss": 2.0878, "step": 99620 }, { "epoch": 6.768922407935861, "grad_norm": 2.9643120765686035, "learning_rate": 1.5417346106807993e-05, "loss": 2.0523, "step": 99625 }, { "epoch": 6.769262128006522, "grad_norm": 3.649690628051758, "learning_rate": 1.5413099605924718e-05, "loss": 2.0916, "step": 99630 }, { "epoch": 6.769601848077184, "grad_norm": 2.9313459396362305, "learning_rate": 1.5408853105041446e-05, "loss": 2.105, "step": 99635 }, { "epoch": 6.769941568147846, "grad_norm": 3.367586851119995, "learning_rate": 1.5404606604158177e-05, "loss": 1.8581, "step": 99640 }, { "epoch": 6.7702812882185075, "grad_norm": 4.049875736236572, "learning_rate": 1.5400360103274902e-05, "loss": 1.938, "step": 99645 }, { "epoch": 6.77062100828917, "grad_norm": 3.058210611343384, "learning_rate": 1.539611360239163e-05, "loss": 2.2171, "step": 99650 }, { "epoch": 6.770960728359832, "grad_norm": 3.484788656234741, "learning_rate": 1.5391867101508358e-05, "loss": 2.0619, "step": 99655 }, { "epoch": 6.771300448430493, "grad_norm": 3.983800172805786, "learning_rate": 1.5387620600625086e-05, "loss": 2.1725, "step": 99660 }, { "epoch": 6.771640168501155, "grad_norm": 2.7006940841674805, "learning_rate": 1.5383374099741814e-05, "loss": 1.9332, "step": 99665 }, { "epoch": 6.771979888571817, "grad_norm": 3.483530282974243, "learning_rate": 1.5379127598858542e-05, "loss": 2.2675, "step": 99670 }, { "epoch": 6.772319608642478, "grad_norm": 4.25681734085083, "learning_rate": 1.537488109797527e-05, "loss": 2.1748, "step": 99675 }, { "epoch": 6.77265932871314, "grad_norm": 3.1210336685180664, "learning_rate": 1.5370634597091994e-05, "loss": 2.0024, "step": 99680 }, { "epoch": 6.772999048783802, "grad_norm": 3.0748465061187744, "learning_rate": 1.5366388096208726e-05, "loss": 2.13, "step": 99685 }, { "epoch": 6.7733387688544635, "grad_norm": 3.6351656913757324, "learning_rate": 1.5362141595325454e-05, "loss": 2.1346, "step": 99690 }, { "epoch": 6.773678488925126, "grad_norm": 3.791738986968994, "learning_rate": 1.535789509444218e-05, "loss": 2.1951, "step": 99695 }, { "epoch": 6.774018208995788, "grad_norm": 2.7547385692596436, "learning_rate": 1.535364859355891e-05, "loss": 2.1704, "step": 99700 }, { "epoch": 6.774357929066449, "grad_norm": 3.2836413383483887, "learning_rate": 1.5349402092675634e-05, "loss": 2.3022, "step": 99705 }, { "epoch": 6.774697649137111, "grad_norm": 4.199805736541748, "learning_rate": 1.5345155591792366e-05, "loss": 1.872, "step": 99710 }, { "epoch": 6.775037369207773, "grad_norm": 3.339519739151001, "learning_rate": 1.534090909090909e-05, "loss": 1.8755, "step": 99715 }, { "epoch": 6.775377089278434, "grad_norm": 2.8551788330078125, "learning_rate": 1.533666259002582e-05, "loss": 1.9512, "step": 99720 }, { "epoch": 6.775716809349096, "grad_norm": 3.712528705596924, "learning_rate": 1.533241608914255e-05, "loss": 2.1093, "step": 99725 }, { "epoch": 6.776056529419758, "grad_norm": 4.012719631195068, "learning_rate": 1.5328169588259275e-05, "loss": 2.3499, "step": 99730 }, { "epoch": 6.7763962494904195, "grad_norm": 3.593017816543579, "learning_rate": 1.5323923087376003e-05, "loss": 2.0917, "step": 99735 }, { "epoch": 6.776735969561082, "grad_norm": 3.7628588676452637, "learning_rate": 1.531967658649273e-05, "loss": 1.8661, "step": 99740 }, { "epoch": 6.777075689631744, "grad_norm": 3.1270012855529785, "learning_rate": 1.531543008560946e-05, "loss": 1.8787, "step": 99745 }, { "epoch": 6.777415409702405, "grad_norm": 3.450317144393921, "learning_rate": 1.5311183584726187e-05, "loss": 1.8748, "step": 99750 }, { "epoch": 6.777755129773067, "grad_norm": 2.761051654815674, "learning_rate": 1.5306937083842915e-05, "loss": 2.2086, "step": 99755 }, { "epoch": 6.778094849843729, "grad_norm": 4.024659633636475, "learning_rate": 1.5302690582959643e-05, "loss": 1.9237, "step": 99760 }, { "epoch": 6.77843456991439, "grad_norm": 3.3800363540649414, "learning_rate": 1.5298444082076367e-05, "loss": 1.9771, "step": 99765 }, { "epoch": 6.778774289985052, "grad_norm": 4.22568941116333, "learning_rate": 1.52941975811931e-05, "loss": 1.9001, "step": 99770 }, { "epoch": 6.779114010055714, "grad_norm": 3.451009750366211, "learning_rate": 1.5289951080309827e-05, "loss": 1.8733, "step": 99775 }, { "epoch": 6.7794537301263755, "grad_norm": 3.3301873207092285, "learning_rate": 1.528570457942655e-05, "loss": 2.0975, "step": 99780 }, { "epoch": 6.779793450197038, "grad_norm": 3.9054484367370605, "learning_rate": 1.5281458078543283e-05, "loss": 2.0222, "step": 99785 }, { "epoch": 6.7801331702677, "grad_norm": 3.4518702030181885, "learning_rate": 1.5277211577660007e-05, "loss": 2.1815, "step": 99790 }, { "epoch": 6.780472890338361, "grad_norm": 3.752945899963379, "learning_rate": 1.527296507677674e-05, "loss": 2.3079, "step": 99795 }, { "epoch": 6.780812610409023, "grad_norm": 3.301631212234497, "learning_rate": 1.5268718575893463e-05, "loss": 2.0572, "step": 99800 }, { "epoch": 6.781152330479685, "grad_norm": 4.386890411376953, "learning_rate": 1.526447207501019e-05, "loss": 2.0605, "step": 99805 }, { "epoch": 6.781492050550346, "grad_norm": 3.258086681365967, "learning_rate": 1.5260225574126923e-05, "loss": 2.0103, "step": 99810 }, { "epoch": 6.781831770621008, "grad_norm": 3.4134202003479004, "learning_rate": 1.5255979073243647e-05, "loss": 2.0561, "step": 99815 }, { "epoch": 6.78217149069167, "grad_norm": 3.9091694355010986, "learning_rate": 1.5251732572360375e-05, "loss": 2.2999, "step": 99820 }, { "epoch": 6.7825112107623315, "grad_norm": 3.5390517711639404, "learning_rate": 1.5247486071477105e-05, "loss": 2.0855, "step": 99825 }, { "epoch": 6.782850930832994, "grad_norm": 4.316243648529053, "learning_rate": 1.5243239570593831e-05, "loss": 1.9768, "step": 99830 }, { "epoch": 6.783190650903656, "grad_norm": 3.311649799346924, "learning_rate": 1.5238993069710558e-05, "loss": 2.168, "step": 99835 }, { "epoch": 6.783530370974317, "grad_norm": 3.4541192054748535, "learning_rate": 1.5234746568827287e-05, "loss": 2.2386, "step": 99840 }, { "epoch": 6.783870091044979, "grad_norm": 3.1317672729492188, "learning_rate": 1.5230500067944015e-05, "loss": 1.9826, "step": 99845 }, { "epoch": 6.784209811115641, "grad_norm": 3.6188693046569824, "learning_rate": 1.5226253567060742e-05, "loss": 2.2835, "step": 99850 }, { "epoch": 6.784549531186302, "grad_norm": 2.8177919387817383, "learning_rate": 1.5222007066177471e-05, "loss": 2.1868, "step": 99855 }, { "epoch": 6.784889251256964, "grad_norm": 3.5152628421783447, "learning_rate": 1.5217760565294198e-05, "loss": 1.9892, "step": 99860 }, { "epoch": 6.785228971327626, "grad_norm": 3.5282275676727295, "learning_rate": 1.5213514064410924e-05, "loss": 2.1782, "step": 99865 }, { "epoch": 6.7855686913982876, "grad_norm": 5.412219047546387, "learning_rate": 1.5209267563527654e-05, "loss": 2.0889, "step": 99870 }, { "epoch": 6.78590841146895, "grad_norm": 4.325644016265869, "learning_rate": 1.5205021062644382e-05, "loss": 1.9792, "step": 99875 }, { "epoch": 6.786248131539612, "grad_norm": 3.434509515762329, "learning_rate": 1.5200774561761111e-05, "loss": 2.1638, "step": 99880 }, { "epoch": 6.786587851610273, "grad_norm": 3.715912103652954, "learning_rate": 1.5196528060877838e-05, "loss": 2.0464, "step": 99885 }, { "epoch": 6.786927571680935, "grad_norm": 3.934006690979004, "learning_rate": 1.5192281559994564e-05, "loss": 2.12, "step": 99890 }, { "epoch": 6.787267291751597, "grad_norm": 3.653982400894165, "learning_rate": 1.5188035059111294e-05, "loss": 2.3968, "step": 99895 }, { "epoch": 6.787607011822258, "grad_norm": 4.117053985595703, "learning_rate": 1.5183788558228022e-05, "loss": 2.1171, "step": 99900 }, { "epoch": 6.78794673189292, "grad_norm": 2.928075075149536, "learning_rate": 1.5179542057344748e-05, "loss": 2.162, "step": 99905 }, { "epoch": 6.788286451963582, "grad_norm": 3.3301868438720703, "learning_rate": 1.5175295556461478e-05, "loss": 2.2274, "step": 99910 }, { "epoch": 6.788626172034244, "grad_norm": 4.228358745574951, "learning_rate": 1.5171049055578204e-05, "loss": 1.9929, "step": 99915 }, { "epoch": 6.788965892104906, "grad_norm": 3.6575450897216797, "learning_rate": 1.516680255469493e-05, "loss": 2.2186, "step": 99920 }, { "epoch": 6.789305612175568, "grad_norm": 3.8672401905059814, "learning_rate": 1.516255605381166e-05, "loss": 1.8065, "step": 99925 }, { "epoch": 6.789645332246229, "grad_norm": 2.7255313396453857, "learning_rate": 1.5158309552928388e-05, "loss": 2.216, "step": 99930 }, { "epoch": 6.789985052316891, "grad_norm": 3.0437965393066406, "learning_rate": 1.5154063052045114e-05, "loss": 2.042, "step": 99935 }, { "epoch": 6.790324772387553, "grad_norm": 3.6293959617614746, "learning_rate": 1.5149816551161844e-05, "loss": 2.1308, "step": 99940 }, { "epoch": 6.790664492458214, "grad_norm": 3.4213707447052, "learning_rate": 1.514557005027857e-05, "loss": 1.645, "step": 99945 }, { "epoch": 6.791004212528876, "grad_norm": 3.7987685203552246, "learning_rate": 1.5141323549395298e-05, "loss": 2.0853, "step": 99950 }, { "epoch": 6.791343932599538, "grad_norm": 3.667130947113037, "learning_rate": 1.5137077048512028e-05, "loss": 2.282, "step": 99955 }, { "epoch": 6.7916836526702, "grad_norm": 3.993486166000366, "learning_rate": 1.5132830547628754e-05, "loss": 2.1882, "step": 99960 }, { "epoch": 6.792023372740862, "grad_norm": 3.835981845855713, "learning_rate": 1.5128584046745484e-05, "loss": 2.0088, "step": 99965 }, { "epoch": 6.792363092811524, "grad_norm": 4.102457523345947, "learning_rate": 1.512433754586221e-05, "loss": 2.0764, "step": 99970 }, { "epoch": 6.792702812882185, "grad_norm": 3.6801810264587402, "learning_rate": 1.5120091044978937e-05, "loss": 1.9443, "step": 99975 }, { "epoch": 6.793042532952847, "grad_norm": 3.5888240337371826, "learning_rate": 1.5115844544095666e-05, "loss": 2.0216, "step": 99980 }, { "epoch": 6.793382253023509, "grad_norm": 2.973147392272949, "learning_rate": 1.5111598043212394e-05, "loss": 2.1805, "step": 99985 }, { "epoch": 6.79372197309417, "grad_norm": 4.39150333404541, "learning_rate": 1.510735154232912e-05, "loss": 1.9569, "step": 99990 }, { "epoch": 6.794061693164832, "grad_norm": 4.089654445648193, "learning_rate": 1.510310504144585e-05, "loss": 1.9806, "step": 99995 }, { "epoch": 6.794401413235494, "grad_norm": 3.638979911804199, "learning_rate": 1.5098858540562577e-05, "loss": 2.2588, "step": 100000 }, { "epoch": 6.794741133306156, "grad_norm": 3.243391275405884, "learning_rate": 1.5094612039679305e-05, "loss": 2.1893, "step": 100005 }, { "epoch": 6.795080853376818, "grad_norm": 3.9940176010131836, "learning_rate": 1.5090365538796033e-05, "loss": 1.9223, "step": 100010 }, { "epoch": 6.79542057344748, "grad_norm": 3.905627727508545, "learning_rate": 1.508611903791276e-05, "loss": 2.3034, "step": 100015 }, { "epoch": 6.795760293518141, "grad_norm": 3.734320640563965, "learning_rate": 1.5081872537029487e-05, "loss": 2.0943, "step": 100020 }, { "epoch": 6.796100013588803, "grad_norm": 3.1839444637298584, "learning_rate": 1.5077626036146217e-05, "loss": 2.2278, "step": 100025 }, { "epoch": 6.796439733659465, "grad_norm": 3.532506227493286, "learning_rate": 1.5073379535262943e-05, "loss": 2.0653, "step": 100030 }, { "epoch": 6.796779453730126, "grad_norm": 3.482520341873169, "learning_rate": 1.5069133034379671e-05, "loss": 2.077, "step": 100035 }, { "epoch": 6.797119173800788, "grad_norm": 3.358055353164673, "learning_rate": 1.50648865334964e-05, "loss": 1.7445, "step": 100040 }, { "epoch": 6.79745889387145, "grad_norm": 2.897714138031006, "learning_rate": 1.5060640032613127e-05, "loss": 2.0012, "step": 100045 }, { "epoch": 6.797798613942112, "grad_norm": 3.1690874099731445, "learning_rate": 1.5056393531729857e-05, "loss": 1.8956, "step": 100050 }, { "epoch": 6.798138334012774, "grad_norm": 3.5704054832458496, "learning_rate": 1.5052147030846583e-05, "loss": 2.0011, "step": 100055 }, { "epoch": 6.798478054083435, "grad_norm": 4.713310718536377, "learning_rate": 1.504790052996331e-05, "loss": 2.1265, "step": 100060 }, { "epoch": 6.798817774154097, "grad_norm": 3.031679630279541, "learning_rate": 1.504365402908004e-05, "loss": 2.2891, "step": 100065 }, { "epoch": 6.799157494224759, "grad_norm": 3.4830195903778076, "learning_rate": 1.5039407528196767e-05, "loss": 2.4093, "step": 100070 }, { "epoch": 6.79949721429542, "grad_norm": 4.494650840759277, "learning_rate": 1.5035161027313493e-05, "loss": 1.9013, "step": 100075 }, { "epoch": 6.799836934366082, "grad_norm": 4.0173516273498535, "learning_rate": 1.5030914526430223e-05, "loss": 2.1229, "step": 100080 }, { "epoch": 6.800176654436744, "grad_norm": 4.31553840637207, "learning_rate": 1.502666802554695e-05, "loss": 2.0946, "step": 100085 }, { "epoch": 6.8005163745074055, "grad_norm": 4.195758819580078, "learning_rate": 1.5022421524663678e-05, "loss": 2.0306, "step": 100090 }, { "epoch": 6.800856094578068, "grad_norm": 3.044316053390503, "learning_rate": 1.501902432395706e-05, "loss": 2.3339, "step": 100095 }, { "epoch": 6.80119581464873, "grad_norm": 3.6540136337280273, "learning_rate": 1.501477782307379e-05, "loss": 1.9454, "step": 100100 }, { "epoch": 6.801535534719391, "grad_norm": 3.3118090629577637, "learning_rate": 1.5010531322190516e-05, "loss": 2.102, "step": 100105 }, { "epoch": 6.801875254790053, "grad_norm": 4.791417121887207, "learning_rate": 1.5006284821307242e-05, "loss": 1.8511, "step": 100110 }, { "epoch": 6.802214974860715, "grad_norm": 3.2679829597473145, "learning_rate": 1.5002038320423972e-05, "loss": 2.3069, "step": 100115 }, { "epoch": 6.802554694931376, "grad_norm": 3.4514272212982178, "learning_rate": 1.49977918195407e-05, "loss": 1.7809, "step": 100120 }, { "epoch": 6.802894415002038, "grad_norm": 4.216090202331543, "learning_rate": 1.4993545318657426e-05, "loss": 2.2935, "step": 100125 }, { "epoch": 6.8032341350727, "grad_norm": 3.469449520111084, "learning_rate": 1.4989298817774156e-05, "loss": 2.0724, "step": 100130 }, { "epoch": 6.8035738551433615, "grad_norm": 3.5604753494262695, "learning_rate": 1.4985052316890882e-05, "loss": 2.1234, "step": 100135 }, { "epoch": 6.803913575214024, "grad_norm": 3.343249559402466, "learning_rate": 1.4980805816007609e-05, "loss": 2.3293, "step": 100140 }, { "epoch": 6.804253295284686, "grad_norm": 3.1432645320892334, "learning_rate": 1.4976559315124338e-05, "loss": 1.8925, "step": 100145 }, { "epoch": 6.804593015355347, "grad_norm": 4.183591365814209, "learning_rate": 1.4972312814241066e-05, "loss": 2.2553, "step": 100150 }, { "epoch": 6.804932735426009, "grad_norm": 3.907599925994873, "learning_rate": 1.4968066313357793e-05, "loss": 2.3478, "step": 100155 }, { "epoch": 6.805272455496671, "grad_norm": 3.4434595108032227, "learning_rate": 1.4963819812474522e-05, "loss": 1.8698, "step": 100160 }, { "epoch": 6.805612175567332, "grad_norm": 3.955143451690674, "learning_rate": 1.4959573311591249e-05, "loss": 2.2485, "step": 100165 }, { "epoch": 6.805951895637994, "grad_norm": 3.659529209136963, "learning_rate": 1.4955326810707978e-05, "loss": 1.9461, "step": 100170 }, { "epoch": 6.806291615708656, "grad_norm": 3.769838571548462, "learning_rate": 1.4951080309824705e-05, "loss": 2.0876, "step": 100175 }, { "epoch": 6.806631335779318, "grad_norm": 3.2644293308258057, "learning_rate": 1.4946833808941433e-05, "loss": 2.1677, "step": 100180 }, { "epoch": 6.80697105584998, "grad_norm": 3.8158671855926514, "learning_rate": 1.4942587308058162e-05, "loss": 2.31, "step": 100185 }, { "epoch": 6.807310775920642, "grad_norm": 2.701301097869873, "learning_rate": 1.4938340807174889e-05, "loss": 2.0485, "step": 100190 }, { "epoch": 6.807650495991303, "grad_norm": 4.999037265777588, "learning_rate": 1.4934094306291615e-05, "loss": 2.3076, "step": 100195 }, { "epoch": 6.807990216061965, "grad_norm": 4.196917533874512, "learning_rate": 1.4929847805408345e-05, "loss": 1.9799, "step": 100200 }, { "epoch": 6.808329936132627, "grad_norm": 3.3562893867492676, "learning_rate": 1.4925601304525073e-05, "loss": 1.7954, "step": 100205 }, { "epoch": 6.808669656203288, "grad_norm": 3.794787645339966, "learning_rate": 1.4921354803641799e-05, "loss": 1.973, "step": 100210 }, { "epoch": 6.80900937627395, "grad_norm": 3.443203926086426, "learning_rate": 1.4917108302758529e-05, "loss": 2.2023, "step": 100215 }, { "epoch": 6.809349096344612, "grad_norm": 3.5924758911132812, "learning_rate": 1.4912861801875255e-05, "loss": 2.2628, "step": 100220 }, { "epoch": 6.809688816415274, "grad_norm": 2.951903820037842, "learning_rate": 1.4908615300991981e-05, "loss": 2.1523, "step": 100225 }, { "epoch": 6.810028536485936, "grad_norm": 4.269540786743164, "learning_rate": 1.4904368800108711e-05, "loss": 2.0413, "step": 100230 }, { "epoch": 6.810368256556598, "grad_norm": 4.6249098777771, "learning_rate": 1.4900122299225439e-05, "loss": 2.1696, "step": 100235 }, { "epoch": 6.810707976627259, "grad_norm": 3.2269399166107178, "learning_rate": 1.4895875798342165e-05, "loss": 1.9609, "step": 100240 }, { "epoch": 6.811047696697921, "grad_norm": 3.063210964202881, "learning_rate": 1.4891629297458895e-05, "loss": 2.3465, "step": 100245 }, { "epoch": 6.811387416768583, "grad_norm": 4.2854390144348145, "learning_rate": 1.4887382796575621e-05, "loss": 1.8711, "step": 100250 }, { "epoch": 6.811727136839244, "grad_norm": 3.675520896911621, "learning_rate": 1.4883136295692351e-05, "loss": 1.9243, "step": 100255 }, { "epoch": 6.812066856909906, "grad_norm": 4.267269611358643, "learning_rate": 1.4878889794809079e-05, "loss": 1.9551, "step": 100260 }, { "epoch": 6.812406576980568, "grad_norm": 3.942981004714966, "learning_rate": 1.4874643293925805e-05, "loss": 2.3197, "step": 100265 }, { "epoch": 6.81274629705123, "grad_norm": 3.399550437927246, "learning_rate": 1.4870396793042535e-05, "loss": 2.0857, "step": 100270 }, { "epoch": 6.813086017121892, "grad_norm": 4.787519931793213, "learning_rate": 1.4866150292159261e-05, "loss": 2.3094, "step": 100275 }, { "epoch": 6.813425737192553, "grad_norm": 4.434628009796143, "learning_rate": 1.4861903791275988e-05, "loss": 1.8587, "step": 100280 }, { "epoch": 6.813765457263215, "grad_norm": 3.9522359371185303, "learning_rate": 1.4857657290392717e-05, "loss": 1.9398, "step": 100285 }, { "epoch": 6.814105177333877, "grad_norm": 3.1779963970184326, "learning_rate": 1.4853410789509445e-05, "loss": 1.8502, "step": 100290 }, { "epoch": 6.814444897404538, "grad_norm": 3.442234754562378, "learning_rate": 1.4849164288626172e-05, "loss": 1.951, "step": 100295 }, { "epoch": 6.8147846174752, "grad_norm": 3.4850456714630127, "learning_rate": 1.4844917787742901e-05, "loss": 2.0365, "step": 100300 }, { "epoch": 6.815124337545862, "grad_norm": 4.232186794281006, "learning_rate": 1.4840671286859628e-05, "loss": 1.6858, "step": 100305 }, { "epoch": 6.8154640576165235, "grad_norm": 3.3060011863708496, "learning_rate": 1.4836424785976356e-05, "loss": 2.2463, "step": 100310 }, { "epoch": 6.815803777687186, "grad_norm": 2.903623580932617, "learning_rate": 1.4832178285093084e-05, "loss": 2.094, "step": 100315 }, { "epoch": 6.816143497757848, "grad_norm": 3.268479108810425, "learning_rate": 1.4827931784209812e-05, "loss": 2.0121, "step": 100320 }, { "epoch": 6.816483217828509, "grad_norm": 4.0529985427856445, "learning_rate": 1.4823685283326538e-05, "loss": 2.0915, "step": 100325 }, { "epoch": 6.816822937899171, "grad_norm": 3.0997798442840576, "learning_rate": 1.4819438782443268e-05, "loss": 1.9129, "step": 100330 }, { "epoch": 6.817162657969833, "grad_norm": 3.6310932636260986, "learning_rate": 1.4815192281559994e-05, "loss": 2.0758, "step": 100335 }, { "epoch": 6.817502378040494, "grad_norm": 4.578161239624023, "learning_rate": 1.4810945780676724e-05, "loss": 2.0872, "step": 100340 }, { "epoch": 6.817842098111156, "grad_norm": 4.182729721069336, "learning_rate": 1.4806699279793452e-05, "loss": 1.875, "step": 100345 }, { "epoch": 6.818181818181818, "grad_norm": 3.4419915676116943, "learning_rate": 1.4802452778910178e-05, "loss": 1.9507, "step": 100350 }, { "epoch": 6.8185215382524795, "grad_norm": 3.608368396759033, "learning_rate": 1.4798206278026908e-05, "loss": 2.1085, "step": 100355 }, { "epoch": 6.818861258323142, "grad_norm": 3.149707555770874, "learning_rate": 1.4793959777143634e-05, "loss": 2.2344, "step": 100360 }, { "epoch": 6.819200978393804, "grad_norm": 2.766038656234741, "learning_rate": 1.478971327626036e-05, "loss": 2.283, "step": 100365 }, { "epoch": 6.819540698464465, "grad_norm": 5.222830772399902, "learning_rate": 1.478546677537709e-05, "loss": 2.0618, "step": 100370 }, { "epoch": 6.819880418535127, "grad_norm": 4.466765403747559, "learning_rate": 1.4781220274493818e-05, "loss": 1.9149, "step": 100375 }, { "epoch": 6.820220138605789, "grad_norm": 4.016109466552734, "learning_rate": 1.4776973773610545e-05, "loss": 2.1903, "step": 100380 }, { "epoch": 6.82055985867645, "grad_norm": 3.8208110332489014, "learning_rate": 1.4772727272727274e-05, "loss": 2.0802, "step": 100385 }, { "epoch": 6.820899578747112, "grad_norm": 3.8201794624328613, "learning_rate": 1.4768480771844e-05, "loss": 2.038, "step": 100390 }, { "epoch": 6.821239298817774, "grad_norm": 3.2926738262176514, "learning_rate": 1.4764234270960729e-05, "loss": 2.2306, "step": 100395 }, { "epoch": 6.8215790188884355, "grad_norm": 3.153071641921997, "learning_rate": 1.4759987770077458e-05, "loss": 2.1577, "step": 100400 }, { "epoch": 6.821918738959098, "grad_norm": 4.010215759277344, "learning_rate": 1.4755741269194185e-05, "loss": 2.2124, "step": 100405 }, { "epoch": 6.82225845902976, "grad_norm": 4.081902503967285, "learning_rate": 1.4751494768310911e-05, "loss": 2.1, "step": 100410 }, { "epoch": 6.822598179100421, "grad_norm": 3.0851712226867676, "learning_rate": 1.474724826742764e-05, "loss": 2.1785, "step": 100415 }, { "epoch": 6.822937899171083, "grad_norm": 3.581753730773926, "learning_rate": 1.4743001766544367e-05, "loss": 2.0586, "step": 100420 }, { "epoch": 6.823277619241745, "grad_norm": 3.248626232147217, "learning_rate": 1.4738755265661097e-05, "loss": 2.0002, "step": 100425 }, { "epoch": 6.823617339312406, "grad_norm": 3.9972198009490967, "learning_rate": 1.4734508764777825e-05, "loss": 2.1487, "step": 100430 }, { "epoch": 6.823957059383068, "grad_norm": 3.288121223449707, "learning_rate": 1.4730262263894551e-05, "loss": 1.9972, "step": 100435 }, { "epoch": 6.82429677945373, "grad_norm": 3.2497942447662354, "learning_rate": 1.472601576301128e-05, "loss": 1.8489, "step": 100440 }, { "epoch": 6.8246364995243916, "grad_norm": 3.538522243499756, "learning_rate": 1.4721769262128007e-05, "loss": 2.2488, "step": 100445 }, { "epoch": 6.824976219595054, "grad_norm": 3.4877846240997314, "learning_rate": 1.4717522761244735e-05, "loss": 2.1402, "step": 100450 }, { "epoch": 6.825315939665716, "grad_norm": 4.008002758026123, "learning_rate": 1.4713276260361465e-05, "loss": 2.2451, "step": 100455 }, { "epoch": 6.825655659736377, "grad_norm": 3.093562602996826, "learning_rate": 1.4709029759478191e-05, "loss": 2.1312, "step": 100460 }, { "epoch": 6.825995379807039, "grad_norm": 3.373169183731079, "learning_rate": 1.4704783258594917e-05, "loss": 2.3118, "step": 100465 }, { "epoch": 6.826335099877701, "grad_norm": 4.4212446212768555, "learning_rate": 1.4700536757711647e-05, "loss": 2.1164, "step": 100470 }, { "epoch": 6.826674819948362, "grad_norm": 3.475569009780884, "learning_rate": 1.4696290256828373e-05, "loss": 2.2527, "step": 100475 }, { "epoch": 6.827014540019024, "grad_norm": 3.444561243057251, "learning_rate": 1.4692043755945101e-05, "loss": 2.1726, "step": 100480 }, { "epoch": 6.827354260089686, "grad_norm": 4.029477119445801, "learning_rate": 1.4687797255061831e-05, "loss": 2.0108, "step": 100485 }, { "epoch": 6.827693980160348, "grad_norm": 3.768688440322876, "learning_rate": 1.4683550754178557e-05, "loss": 1.5005, "step": 100490 }, { "epoch": 6.82803370023101, "grad_norm": 3.181462049484253, "learning_rate": 1.4679304253295284e-05, "loss": 2.0292, "step": 100495 }, { "epoch": 6.828373420301672, "grad_norm": 3.962585926055908, "learning_rate": 1.4675057752412013e-05, "loss": 2.1454, "step": 100500 }, { "epoch": 6.828713140372333, "grad_norm": 4.2215256690979, "learning_rate": 1.4670811251528741e-05, "loss": 2.3305, "step": 100505 }, { "epoch": 6.829052860442995, "grad_norm": 4.072309970855713, "learning_rate": 1.466656475064547e-05, "loss": 2.1148, "step": 100510 }, { "epoch": 6.829392580513657, "grad_norm": 3.2835533618927, "learning_rate": 1.4662318249762197e-05, "loss": 2.324, "step": 100515 }, { "epoch": 6.829732300584318, "grad_norm": 3.6620073318481445, "learning_rate": 1.4658071748878924e-05, "loss": 1.9183, "step": 100520 }, { "epoch": 6.83007202065498, "grad_norm": 2.9992363452911377, "learning_rate": 1.4653825247995653e-05, "loss": 2.0031, "step": 100525 }, { "epoch": 6.830411740725642, "grad_norm": 3.9188640117645264, "learning_rate": 1.464957874711238e-05, "loss": 2.0463, "step": 100530 }, { "epoch": 6.830751460796304, "grad_norm": 3.4958674907684326, "learning_rate": 1.4645332246229108e-05, "loss": 2.1592, "step": 100535 }, { "epoch": 6.831091180866966, "grad_norm": 4.327582836151123, "learning_rate": 1.4641085745345837e-05, "loss": 2.1725, "step": 100540 }, { "epoch": 6.831430900937628, "grad_norm": 4.300230026245117, "learning_rate": 1.4636839244462564e-05, "loss": 1.9681, "step": 100545 }, { "epoch": 6.831770621008289, "grad_norm": 3.081106424331665, "learning_rate": 1.463259274357929e-05, "loss": 2.3085, "step": 100550 }, { "epoch": 6.832110341078951, "grad_norm": 4.155538082122803, "learning_rate": 1.462834624269602e-05, "loss": 2.0918, "step": 100555 }, { "epoch": 6.832450061149613, "grad_norm": 3.393129825592041, "learning_rate": 1.4624099741812746e-05, "loss": 2.0292, "step": 100560 }, { "epoch": 6.832789781220274, "grad_norm": 3.7917423248291016, "learning_rate": 1.4619853240929474e-05, "loss": 2.0549, "step": 100565 }, { "epoch": 6.833129501290936, "grad_norm": 3.312356948852539, "learning_rate": 1.4615606740046204e-05, "loss": 2.0639, "step": 100570 }, { "epoch": 6.833469221361598, "grad_norm": 3.241844892501831, "learning_rate": 1.461136023916293e-05, "loss": 2.3218, "step": 100575 }, { "epoch": 6.83380894143226, "grad_norm": 3.6229162216186523, "learning_rate": 1.4607113738279656e-05, "loss": 1.9936, "step": 100580 }, { "epoch": 6.834148661502922, "grad_norm": 4.3274054527282715, "learning_rate": 1.4602867237396386e-05, "loss": 2.1108, "step": 100585 }, { "epoch": 6.834488381573584, "grad_norm": 3.3330588340759277, "learning_rate": 1.4598620736513114e-05, "loss": 2.3467, "step": 100590 }, { "epoch": 6.834828101644245, "grad_norm": 3.4024362564086914, "learning_rate": 1.4594374235629844e-05, "loss": 2.1652, "step": 100595 }, { "epoch": 6.835167821714907, "grad_norm": 3.542051076889038, "learning_rate": 1.459012773474657e-05, "loss": 2.1347, "step": 100600 }, { "epoch": 6.835507541785569, "grad_norm": 3.728847026824951, "learning_rate": 1.4585881233863296e-05, "loss": 2.0643, "step": 100605 }, { "epoch": 6.83584726185623, "grad_norm": 3.2058534622192383, "learning_rate": 1.4581634732980026e-05, "loss": 2.0876, "step": 100610 }, { "epoch": 6.836186981926892, "grad_norm": 3.170393228530884, "learning_rate": 1.4577388232096752e-05, "loss": 1.9568, "step": 100615 }, { "epoch": 6.836526701997554, "grad_norm": 3.646782398223877, "learning_rate": 1.457314173121348e-05, "loss": 1.9805, "step": 100620 }, { "epoch": 6.836866422068216, "grad_norm": 3.1994779109954834, "learning_rate": 1.456889523033021e-05, "loss": 1.8375, "step": 100625 }, { "epoch": 6.837206142138878, "grad_norm": 3.786245584487915, "learning_rate": 1.4564648729446936e-05, "loss": 1.9663, "step": 100630 }, { "epoch": 6.83754586220954, "grad_norm": 3.231652021408081, "learning_rate": 1.4560402228563663e-05, "loss": 2.0525, "step": 100635 }, { "epoch": 6.837885582280201, "grad_norm": 3.5485663414001465, "learning_rate": 1.4556155727680392e-05, "loss": 2.259, "step": 100640 }, { "epoch": 6.838225302350863, "grad_norm": 4.278665542602539, "learning_rate": 1.455190922679712e-05, "loss": 1.8183, "step": 100645 }, { "epoch": 6.838565022421525, "grad_norm": 3.520350217819214, "learning_rate": 1.4547662725913847e-05, "loss": 2.0582, "step": 100650 }, { "epoch": 6.838904742492186, "grad_norm": 3.918738603591919, "learning_rate": 1.4543416225030576e-05, "loss": 2.1097, "step": 100655 }, { "epoch": 6.839244462562848, "grad_norm": 3.242889404296875, "learning_rate": 1.4539169724147303e-05, "loss": 2.37, "step": 100660 }, { "epoch": 6.83958418263351, "grad_norm": 3.0454962253570557, "learning_rate": 1.4534923223264029e-05, "loss": 2.1731, "step": 100665 }, { "epoch": 6.839923902704172, "grad_norm": 4.200194835662842, "learning_rate": 1.4530676722380759e-05, "loss": 2.161, "step": 100670 }, { "epoch": 6.840263622774834, "grad_norm": 3.5986757278442383, "learning_rate": 1.4526430221497487e-05, "loss": 1.9924, "step": 100675 }, { "epoch": 6.840603342845496, "grad_norm": 3.532053232192993, "learning_rate": 1.4522183720614216e-05, "loss": 1.9772, "step": 100680 }, { "epoch": 6.840943062916157, "grad_norm": 4.280794143676758, "learning_rate": 1.4517937219730943e-05, "loss": 2.211, "step": 100685 }, { "epoch": 6.841282782986819, "grad_norm": 3.6844751834869385, "learning_rate": 1.4513690718847669e-05, "loss": 2.244, "step": 100690 }, { "epoch": 6.841622503057481, "grad_norm": 4.153800010681152, "learning_rate": 1.4509444217964399e-05, "loss": 2.269, "step": 100695 }, { "epoch": 6.841962223128142, "grad_norm": 4.204097270965576, "learning_rate": 1.4505197717081125e-05, "loss": 2.1324, "step": 100700 }, { "epoch": 6.842301943198804, "grad_norm": 3.6652657985687256, "learning_rate": 1.4500951216197853e-05, "loss": 2.1009, "step": 100705 }, { "epoch": 6.842641663269466, "grad_norm": 4.141002178192139, "learning_rate": 1.4496704715314583e-05, "loss": 1.8544, "step": 100710 }, { "epoch": 6.842981383340128, "grad_norm": 3.316337823867798, "learning_rate": 1.449245821443131e-05, "loss": 2.0107, "step": 100715 }, { "epoch": 6.84332110341079, "grad_norm": 3.7597389221191406, "learning_rate": 1.4488211713548035e-05, "loss": 2.1607, "step": 100720 }, { "epoch": 6.843660823481452, "grad_norm": 4.472982883453369, "learning_rate": 1.4483965212664765e-05, "loss": 1.9708, "step": 100725 }, { "epoch": 6.844000543552113, "grad_norm": 3.877110242843628, "learning_rate": 1.4479718711781493e-05, "loss": 2.162, "step": 100730 }, { "epoch": 6.844340263622775, "grad_norm": 4.407981872558594, "learning_rate": 1.447547221089822e-05, "loss": 1.8386, "step": 100735 }, { "epoch": 6.844679983693436, "grad_norm": 3.3366825580596924, "learning_rate": 1.447122571001495e-05, "loss": 2.0264, "step": 100740 }, { "epoch": 6.845019703764098, "grad_norm": 3.8150839805603027, "learning_rate": 1.4466979209131676e-05, "loss": 1.9121, "step": 100745 }, { "epoch": 6.84535942383476, "grad_norm": 3.711848020553589, "learning_rate": 1.4462732708248402e-05, "loss": 2.292, "step": 100750 }, { "epoch": 6.845699143905422, "grad_norm": 3.8315134048461914, "learning_rate": 1.4458486207365132e-05, "loss": 2.1717, "step": 100755 }, { "epoch": 6.846038863976084, "grad_norm": 3.1429262161254883, "learning_rate": 1.445423970648186e-05, "loss": 2.2366, "step": 100760 }, { "epoch": 6.846378584046746, "grad_norm": 3.343514919281006, "learning_rate": 1.444999320559859e-05, "loss": 2.0954, "step": 100765 }, { "epoch": 6.846718304117407, "grad_norm": 3.2207720279693604, "learning_rate": 1.4445746704715316e-05, "loss": 2.2566, "step": 100770 }, { "epoch": 6.847058024188069, "grad_norm": 4.159692764282227, "learning_rate": 1.4441500203832042e-05, "loss": 1.7254, "step": 100775 }, { "epoch": 6.847397744258731, "grad_norm": 4.097245693206787, "learning_rate": 1.4437253702948772e-05, "loss": 1.9593, "step": 100780 }, { "epoch": 6.847737464329392, "grad_norm": 3.4406039714813232, "learning_rate": 1.44330072020655e-05, "loss": 2.206, "step": 100785 }, { "epoch": 6.848077184400054, "grad_norm": 3.4870076179504395, "learning_rate": 1.4428760701182226e-05, "loss": 2.167, "step": 100790 }, { "epoch": 6.848416904470716, "grad_norm": 3.1508519649505615, "learning_rate": 1.4424514200298956e-05, "loss": 2.0179, "step": 100795 }, { "epoch": 6.848756624541378, "grad_norm": 3.5577003955841064, "learning_rate": 1.4420267699415682e-05, "loss": 1.8882, "step": 100800 }, { "epoch": 6.84909634461204, "grad_norm": 4.553556442260742, "learning_rate": 1.4416021198532408e-05, "loss": 2.0437, "step": 100805 }, { "epoch": 6.849436064682702, "grad_norm": 4.358423709869385, "learning_rate": 1.4411774697649138e-05, "loss": 2.1867, "step": 100810 }, { "epoch": 6.849775784753363, "grad_norm": 3.84124755859375, "learning_rate": 1.4407528196765866e-05, "loss": 2.4116, "step": 100815 }, { "epoch": 6.850115504824025, "grad_norm": 3.947767496109009, "learning_rate": 1.4403281695882592e-05, "loss": 2.1259, "step": 100820 }, { "epoch": 6.850455224894687, "grad_norm": 3.864006757736206, "learning_rate": 1.4399035194999322e-05, "loss": 2.1006, "step": 100825 }, { "epoch": 6.850794944965348, "grad_norm": 2.955946922302246, "learning_rate": 1.4394788694116048e-05, "loss": 1.781, "step": 100830 }, { "epoch": 6.85113466503601, "grad_norm": 3.4262399673461914, "learning_rate": 1.4390542193232776e-05, "loss": 2.0365, "step": 100835 }, { "epoch": 6.851474385106672, "grad_norm": 3.5520243644714355, "learning_rate": 1.4386295692349506e-05, "loss": 2.2064, "step": 100840 }, { "epoch": 6.851814105177334, "grad_norm": 3.3229761123657227, "learning_rate": 1.4382049191466232e-05, "loss": 2.1086, "step": 100845 }, { "epoch": 6.852153825247996, "grad_norm": 3.196465492248535, "learning_rate": 1.4377802690582962e-05, "loss": 2.0323, "step": 100850 }, { "epoch": 6.852493545318658, "grad_norm": 3.54242205619812, "learning_rate": 1.4373556189699688e-05, "loss": 2.0329, "step": 100855 }, { "epoch": 6.852833265389319, "grad_norm": 3.60455584526062, "learning_rate": 1.4369309688816415e-05, "loss": 2.1755, "step": 100860 }, { "epoch": 6.853172985459981, "grad_norm": 3.253654956817627, "learning_rate": 1.4365063187933144e-05, "loss": 1.9836, "step": 100865 }, { "epoch": 6.853512705530643, "grad_norm": 3.385601043701172, "learning_rate": 1.4360816687049872e-05, "loss": 2.0263, "step": 100870 }, { "epoch": 6.853852425601304, "grad_norm": 4.169328212738037, "learning_rate": 1.4356570186166599e-05, "loss": 2.3927, "step": 100875 }, { "epoch": 6.854192145671966, "grad_norm": 3.7560088634490967, "learning_rate": 1.4352323685283328e-05, "loss": 2.0011, "step": 100880 }, { "epoch": 6.854531865742628, "grad_norm": 3.156304359436035, "learning_rate": 1.4348077184400055e-05, "loss": 2.336, "step": 100885 }, { "epoch": 6.85487158581329, "grad_norm": 3.3560311794281006, "learning_rate": 1.4343830683516783e-05, "loss": 2.0566, "step": 100890 }, { "epoch": 6.855211305883952, "grad_norm": 3.9326064586639404, "learning_rate": 1.433958418263351e-05, "loss": 1.9547, "step": 100895 }, { "epoch": 6.855551025954614, "grad_norm": 3.685633659362793, "learning_rate": 1.4335337681750239e-05, "loss": 2.1644, "step": 100900 }, { "epoch": 6.855890746025275, "grad_norm": 3.847442626953125, "learning_rate": 1.4331091180866965e-05, "loss": 1.7552, "step": 100905 }, { "epoch": 6.856230466095937, "grad_norm": 4.117381572723389, "learning_rate": 1.4326844679983695e-05, "loss": 2.0013, "step": 100910 }, { "epoch": 6.856570186166599, "grad_norm": 3.179273843765259, "learning_rate": 1.4322598179100421e-05, "loss": 2.1179, "step": 100915 }, { "epoch": 6.85690990623726, "grad_norm": 4.1119585037231445, "learning_rate": 1.4318351678217149e-05, "loss": 2.0685, "step": 100920 }, { "epoch": 6.857249626307922, "grad_norm": 3.1318087577819824, "learning_rate": 1.4314105177333879e-05, "loss": 2.1468, "step": 100925 }, { "epoch": 6.857589346378584, "grad_norm": 3.3024513721466064, "learning_rate": 1.4309858676450605e-05, "loss": 2.1037, "step": 100930 }, { "epoch": 6.857929066449246, "grad_norm": 4.073248386383057, "learning_rate": 1.4305612175567335e-05, "loss": 2.0954, "step": 100935 }, { "epoch": 6.858268786519908, "grad_norm": 3.6737210750579834, "learning_rate": 1.4301365674684061e-05, "loss": 2.0637, "step": 100940 }, { "epoch": 6.85860850659057, "grad_norm": 3.2293882369995117, "learning_rate": 1.4297119173800787e-05, "loss": 2.0898, "step": 100945 }, { "epoch": 6.858948226661231, "grad_norm": 3.1376922130584717, "learning_rate": 1.4292872672917517e-05, "loss": 2.1175, "step": 100950 }, { "epoch": 6.859287946731893, "grad_norm": 3.0592784881591797, "learning_rate": 1.4288626172034245e-05, "loss": 1.9644, "step": 100955 }, { "epoch": 6.859627666802554, "grad_norm": 3.1733312606811523, "learning_rate": 1.4284379671150971e-05, "loss": 1.8699, "step": 100960 }, { "epoch": 6.859967386873216, "grad_norm": 4.182766437530518, "learning_rate": 1.4280133170267701e-05, "loss": 2.0404, "step": 100965 }, { "epoch": 6.860307106943878, "grad_norm": 3.538869619369507, "learning_rate": 1.4275886669384427e-05, "loss": 1.9881, "step": 100970 }, { "epoch": 6.8606468270145395, "grad_norm": 2.871093988418579, "learning_rate": 1.4271640168501155e-05, "loss": 2.091, "step": 100975 }, { "epoch": 6.860986547085202, "grad_norm": 3.6865713596343994, "learning_rate": 1.4267393667617885e-05, "loss": 2.1974, "step": 100980 }, { "epoch": 6.861326267155864, "grad_norm": 3.9161770343780518, "learning_rate": 1.4263147166734611e-05, "loss": 1.9415, "step": 100985 }, { "epoch": 6.861665987226525, "grad_norm": 4.521310329437256, "learning_rate": 1.4258900665851338e-05, "loss": 2.0674, "step": 100990 }, { "epoch": 6.862005707297187, "grad_norm": 3.6545512676239014, "learning_rate": 1.4254654164968067e-05, "loss": 2.3596, "step": 100995 }, { "epoch": 6.862345427367849, "grad_norm": 3.8772506713867188, "learning_rate": 1.4250407664084794e-05, "loss": 2.1511, "step": 101000 }, { "epoch": 6.86268514743851, "grad_norm": 3.642611026763916, "learning_rate": 1.4246161163201522e-05, "loss": 2.0277, "step": 101005 }, { "epoch": 6.863024867509172, "grad_norm": 3.0881459712982178, "learning_rate": 1.4241914662318251e-05, "loss": 2.0693, "step": 101010 }, { "epoch": 6.863364587579834, "grad_norm": 3.562333822250366, "learning_rate": 1.4237668161434978e-05, "loss": 2.0491, "step": 101015 }, { "epoch": 6.8637043076504956, "grad_norm": 2.8946738243103027, "learning_rate": 1.4233421660551707e-05, "loss": 2.2921, "step": 101020 }, { "epoch": 6.864044027721158, "grad_norm": 5.07094144821167, "learning_rate": 1.4229175159668434e-05, "loss": 1.959, "step": 101025 }, { "epoch": 6.86438374779182, "grad_norm": 3.360619068145752, "learning_rate": 1.4224928658785162e-05, "loss": 2.113, "step": 101030 }, { "epoch": 6.864723467862481, "grad_norm": 3.100372552871704, "learning_rate": 1.422068215790189e-05, "loss": 2.3289, "step": 101035 }, { "epoch": 6.865063187933143, "grad_norm": 4.549907207489014, "learning_rate": 1.4216435657018618e-05, "loss": 1.904, "step": 101040 }, { "epoch": 6.865402908003805, "grad_norm": 3.6331467628479004, "learning_rate": 1.4212189156135344e-05, "loss": 2.182, "step": 101045 }, { "epoch": 6.865742628074466, "grad_norm": 4.088474750518799, "learning_rate": 1.4207942655252074e-05, "loss": 2.2991, "step": 101050 }, { "epoch": 6.866082348145128, "grad_norm": 4.765413284301758, "learning_rate": 1.42036961543688e-05, "loss": 1.9551, "step": 101055 }, { "epoch": 6.86642206821579, "grad_norm": 3.4089581966400146, "learning_rate": 1.4199449653485528e-05, "loss": 2.1453, "step": 101060 }, { "epoch": 6.866761788286452, "grad_norm": 3.0037872791290283, "learning_rate": 1.4195203152602258e-05, "loss": 2.0544, "step": 101065 }, { "epoch": 6.867101508357114, "grad_norm": 3.525827646255493, "learning_rate": 1.4190956651718984e-05, "loss": 2.0861, "step": 101070 }, { "epoch": 6.867441228427776, "grad_norm": 4.126859188079834, "learning_rate": 1.418671015083571e-05, "loss": 2.0299, "step": 101075 }, { "epoch": 6.867780948498437, "grad_norm": 3.3823184967041016, "learning_rate": 1.418246364995244e-05, "loss": 2.1624, "step": 101080 }, { "epoch": 6.868120668569099, "grad_norm": 4.002058982849121, "learning_rate": 1.4178217149069166e-05, "loss": 1.8076, "step": 101085 }, { "epoch": 6.868460388639761, "grad_norm": 3.6829111576080322, "learning_rate": 1.4173970648185894e-05, "loss": 2.0731, "step": 101090 }, { "epoch": 6.868800108710422, "grad_norm": 4.084613800048828, "learning_rate": 1.4169724147302624e-05, "loss": 1.8888, "step": 101095 }, { "epoch": 6.869139828781084, "grad_norm": 3.429703712463379, "learning_rate": 1.416547764641935e-05, "loss": 2.1573, "step": 101100 }, { "epoch": 6.869479548851746, "grad_norm": 3.6970512866973877, "learning_rate": 1.416123114553608e-05, "loss": 2.0221, "step": 101105 }, { "epoch": 6.869819268922408, "grad_norm": 4.047784328460693, "learning_rate": 1.4156984644652807e-05, "loss": 2.4706, "step": 101110 }, { "epoch": 6.87015898899307, "grad_norm": 3.2630882263183594, "learning_rate": 1.4152738143769535e-05, "loss": 1.9007, "step": 101115 }, { "epoch": 6.870498709063732, "grad_norm": 4.033354759216309, "learning_rate": 1.4148491642886264e-05, "loss": 2.112, "step": 101120 }, { "epoch": 6.870838429134393, "grad_norm": 3.634737253189087, "learning_rate": 1.414424514200299e-05, "loss": 2.0932, "step": 101125 }, { "epoch": 6.871178149205055, "grad_norm": 5.499455451965332, "learning_rate": 1.4139998641119717e-05, "loss": 1.9733, "step": 101130 }, { "epoch": 6.871517869275717, "grad_norm": 3.0498621463775635, "learning_rate": 1.4135752140236447e-05, "loss": 2.1456, "step": 101135 }, { "epoch": 6.871857589346378, "grad_norm": 4.374329566955566, "learning_rate": 1.4131505639353173e-05, "loss": 2.2461, "step": 101140 }, { "epoch": 6.87219730941704, "grad_norm": 3.3557238578796387, "learning_rate": 1.4127259138469901e-05, "loss": 2.2289, "step": 101145 }, { "epoch": 6.872537029487702, "grad_norm": 4.123094081878662, "learning_rate": 1.412301263758663e-05, "loss": 2.3679, "step": 101150 }, { "epoch": 6.872876749558364, "grad_norm": 3.1116325855255127, "learning_rate": 1.4118766136703357e-05, "loss": 2.0721, "step": 101155 }, { "epoch": 6.873216469629026, "grad_norm": 4.208662033081055, "learning_rate": 1.4114519635820083e-05, "loss": 1.9944, "step": 101160 }, { "epoch": 6.873556189699688, "grad_norm": 4.208336353302002, "learning_rate": 1.4110273134936813e-05, "loss": 2.1931, "step": 101165 }, { "epoch": 6.873895909770349, "grad_norm": 3.3644909858703613, "learning_rate": 1.4106026634053541e-05, "loss": 2.3154, "step": 101170 }, { "epoch": 6.874235629841011, "grad_norm": 3.8855888843536377, "learning_rate": 1.4101780133170267e-05, "loss": 2.2608, "step": 101175 }, { "epoch": 6.874575349911673, "grad_norm": 3.6041910648345947, "learning_rate": 1.4097533632286997e-05, "loss": 2.3146, "step": 101180 }, { "epoch": 6.874915069982334, "grad_norm": 3.8801236152648926, "learning_rate": 1.4093287131403723e-05, "loss": 2.2545, "step": 101185 }, { "epoch": 6.875254790052996, "grad_norm": 4.191279411315918, "learning_rate": 1.4089040630520453e-05, "loss": 1.8288, "step": 101190 }, { "epoch": 6.875594510123658, "grad_norm": 3.863241195678711, "learning_rate": 1.408479412963718e-05, "loss": 1.9933, "step": 101195 }, { "epoch": 6.87593423019432, "grad_norm": 3.5604054927825928, "learning_rate": 1.4080547628753907e-05, "loss": 1.9783, "step": 101200 }, { "epoch": 6.876273950264982, "grad_norm": 3.723095417022705, "learning_rate": 1.4076301127870637e-05, "loss": 2.0917, "step": 101205 }, { "epoch": 6.876613670335644, "grad_norm": 3.234931230545044, "learning_rate": 1.4072054626987363e-05, "loss": 1.9415, "step": 101210 }, { "epoch": 6.876953390406305, "grad_norm": 4.8309149742126465, "learning_rate": 1.406780812610409e-05, "loss": 2.2862, "step": 101215 }, { "epoch": 6.877293110476967, "grad_norm": 3.6861422061920166, "learning_rate": 1.406356162522082e-05, "loss": 2.069, "step": 101220 }, { "epoch": 6.877632830547629, "grad_norm": 3.9864773750305176, "learning_rate": 1.4059315124337547e-05, "loss": 2.2928, "step": 101225 }, { "epoch": 6.87797255061829, "grad_norm": 3.8609673976898193, "learning_rate": 1.4055068623454274e-05, "loss": 2.3494, "step": 101230 }, { "epoch": 6.878312270688952, "grad_norm": 3.0509467124938965, "learning_rate": 1.4050822122571003e-05, "loss": 2.2456, "step": 101235 }, { "epoch": 6.878651990759614, "grad_norm": 4.3301897048950195, "learning_rate": 1.404657562168773e-05, "loss": 2.1293, "step": 101240 }, { "epoch": 6.878991710830276, "grad_norm": 4.020671844482422, "learning_rate": 1.4042329120804456e-05, "loss": 2.0693, "step": 101245 }, { "epoch": 6.879331430900938, "grad_norm": 3.8361334800720215, "learning_rate": 1.4038082619921186e-05, "loss": 1.8679, "step": 101250 }, { "epoch": 6.8796711509716, "grad_norm": 3.1934869289398193, "learning_rate": 1.4033836119037914e-05, "loss": 2.1867, "step": 101255 }, { "epoch": 6.880010871042261, "grad_norm": 2.864717721939087, "learning_rate": 1.402958961815464e-05, "loss": 2.1121, "step": 101260 }, { "epoch": 6.880350591112923, "grad_norm": 4.98390531539917, "learning_rate": 1.402534311727137e-05, "loss": 2.102, "step": 101265 }, { "epoch": 6.880690311183585, "grad_norm": 2.9536681175231934, "learning_rate": 1.4021096616388096e-05, "loss": 1.8561, "step": 101270 }, { "epoch": 6.881030031254246, "grad_norm": 3.845048666000366, "learning_rate": 1.4016850115504826e-05, "loss": 2.2588, "step": 101275 }, { "epoch": 6.881369751324908, "grad_norm": 4.071266174316406, "learning_rate": 1.4012603614621552e-05, "loss": 2.0371, "step": 101280 }, { "epoch": 6.88170947139557, "grad_norm": 3.352778673171997, "learning_rate": 1.400835711373828e-05, "loss": 2.1185, "step": 101285 }, { "epoch": 6.882049191466232, "grad_norm": 3.1120970249176025, "learning_rate": 1.400411061285501e-05, "loss": 2.0969, "step": 101290 }, { "epoch": 6.882388911536894, "grad_norm": 3.820446252822876, "learning_rate": 1.3999864111971736e-05, "loss": 2.1338, "step": 101295 }, { "epoch": 6.882728631607556, "grad_norm": 3.2718136310577393, "learning_rate": 1.3995617611088462e-05, "loss": 2.106, "step": 101300 }, { "epoch": 6.883068351678217, "grad_norm": 3.7176921367645264, "learning_rate": 1.3991371110205192e-05, "loss": 2.0356, "step": 101305 }, { "epoch": 6.883408071748879, "grad_norm": 3.22139573097229, "learning_rate": 1.398712460932192e-05, "loss": 2.0661, "step": 101310 }, { "epoch": 6.883747791819541, "grad_norm": 5.196108341217041, "learning_rate": 1.3982878108438646e-05, "loss": 2.1749, "step": 101315 }, { "epoch": 6.884087511890202, "grad_norm": 4.7617645263671875, "learning_rate": 1.3978631607555376e-05, "loss": 2.1876, "step": 101320 }, { "epoch": 6.884427231960864, "grad_norm": 4.7090301513671875, "learning_rate": 1.3974385106672102e-05, "loss": 2.2317, "step": 101325 }, { "epoch": 6.884766952031526, "grad_norm": 2.803769111633301, "learning_rate": 1.3970138605788829e-05, "loss": 1.9574, "step": 101330 }, { "epoch": 6.885106672102188, "grad_norm": 3.001685857772827, "learning_rate": 1.3965892104905558e-05, "loss": 2.1994, "step": 101335 }, { "epoch": 6.88544639217285, "grad_norm": 3.019850730895996, "learning_rate": 1.3961645604022286e-05, "loss": 2.0453, "step": 101340 }, { "epoch": 6.885786112243512, "grad_norm": 4.665469169616699, "learning_rate": 1.3957399103139013e-05, "loss": 2.2242, "step": 101345 }, { "epoch": 6.886125832314173, "grad_norm": 3.8459835052490234, "learning_rate": 1.3953152602255742e-05, "loss": 2.2246, "step": 101350 }, { "epoch": 6.886465552384835, "grad_norm": 3.388988971710205, "learning_rate": 1.3948906101372469e-05, "loss": 2.102, "step": 101355 }, { "epoch": 6.886805272455497, "grad_norm": 3.2993993759155273, "learning_rate": 1.3944659600489198e-05, "loss": 2.0189, "step": 101360 }, { "epoch": 6.887144992526158, "grad_norm": 3.4600119590759277, "learning_rate": 1.3940413099605926e-05, "loss": 2.2081, "step": 101365 }, { "epoch": 6.88748471259682, "grad_norm": 4.219578266143799, "learning_rate": 1.3936166598722653e-05, "loss": 2.1943, "step": 101370 }, { "epoch": 6.8878244326674825, "grad_norm": 2.7673473358154297, "learning_rate": 1.3931920097839382e-05, "loss": 1.9285, "step": 101375 }, { "epoch": 6.888164152738144, "grad_norm": 3.3065054416656494, "learning_rate": 1.3927673596956109e-05, "loss": 2.0992, "step": 101380 }, { "epoch": 6.888503872808806, "grad_norm": 3.7197036743164062, "learning_rate": 1.3923427096072835e-05, "loss": 2.0997, "step": 101385 }, { "epoch": 6.888843592879468, "grad_norm": 3.8161776065826416, "learning_rate": 1.3919180595189565e-05, "loss": 2.0974, "step": 101390 }, { "epoch": 6.889183312950129, "grad_norm": 4.0027031898498535, "learning_rate": 1.3914934094306293e-05, "loss": 2.3234, "step": 101395 }, { "epoch": 6.889523033020791, "grad_norm": 2.5977447032928467, "learning_rate": 1.3910687593423019e-05, "loss": 1.7838, "step": 101400 }, { "epoch": 6.889862753091453, "grad_norm": 3.6176931858062744, "learning_rate": 1.3906441092539749e-05, "loss": 1.9862, "step": 101405 }, { "epoch": 6.890202473162114, "grad_norm": 4.714173316955566, "learning_rate": 1.3902194591656475e-05, "loss": 2.1253, "step": 101410 }, { "epoch": 6.890542193232776, "grad_norm": 4.462746620178223, "learning_rate": 1.3897948090773203e-05, "loss": 2.0416, "step": 101415 }, { "epoch": 6.8908819133034385, "grad_norm": 4.440879821777344, "learning_rate": 1.3893701589889931e-05, "loss": 1.919, "step": 101420 }, { "epoch": 6.8912216333741, "grad_norm": 5.141523361206055, "learning_rate": 1.388945508900666e-05, "loss": 2.0045, "step": 101425 }, { "epoch": 6.891561353444762, "grad_norm": 3.7062582969665527, "learning_rate": 1.3885208588123385e-05, "loss": 2.2181, "step": 101430 }, { "epoch": 6.891901073515423, "grad_norm": 3.6289031505584717, "learning_rate": 1.3880962087240115e-05, "loss": 1.9421, "step": 101435 }, { "epoch": 6.892240793586085, "grad_norm": 4.515370845794678, "learning_rate": 1.3876715586356841e-05, "loss": 2.3313, "step": 101440 }, { "epoch": 6.892580513656747, "grad_norm": 3.0022974014282227, "learning_rate": 1.3872469085473571e-05, "loss": 2.1165, "step": 101445 }, { "epoch": 6.892920233727408, "grad_norm": 3.8316009044647217, "learning_rate": 1.38682225845903e-05, "loss": 2.109, "step": 101450 }, { "epoch": 6.89325995379807, "grad_norm": 3.8404526710510254, "learning_rate": 1.3863976083707026e-05, "loss": 2.088, "step": 101455 }, { "epoch": 6.893599673868732, "grad_norm": 4.538391590118408, "learning_rate": 1.3859729582823755e-05, "loss": 2.0245, "step": 101460 }, { "epoch": 6.893939393939394, "grad_norm": 3.9105520248413086, "learning_rate": 1.3855483081940482e-05, "loss": 1.9435, "step": 101465 }, { "epoch": 6.894279114010056, "grad_norm": 4.00718355178833, "learning_rate": 1.3851236581057208e-05, "loss": 2.2942, "step": 101470 }, { "epoch": 6.894618834080718, "grad_norm": 3.859541654586792, "learning_rate": 1.3846990080173938e-05, "loss": 2.2597, "step": 101475 }, { "epoch": 6.894958554151379, "grad_norm": 3.173516273498535, "learning_rate": 1.3842743579290666e-05, "loss": 1.8157, "step": 101480 }, { "epoch": 6.895298274222041, "grad_norm": 3.8952341079711914, "learning_rate": 1.3838497078407392e-05, "loss": 1.9099, "step": 101485 }, { "epoch": 6.895637994292703, "grad_norm": 3.181520462036133, "learning_rate": 1.3834250577524122e-05, "loss": 1.9709, "step": 101490 }, { "epoch": 6.895977714363364, "grad_norm": 3.7802486419677734, "learning_rate": 1.3830004076640848e-05, "loss": 1.9528, "step": 101495 }, { "epoch": 6.896317434434026, "grad_norm": 3.38729190826416, "learning_rate": 1.3825757575757576e-05, "loss": 1.9616, "step": 101500 }, { "epoch": 6.896657154504688, "grad_norm": 3.8459646701812744, "learning_rate": 1.3821511074874306e-05, "loss": 2.1846, "step": 101505 }, { "epoch": 6.89699687457535, "grad_norm": 4.228548049926758, "learning_rate": 1.3817264573991032e-05, "loss": 2.0713, "step": 101510 }, { "epoch": 6.897336594646012, "grad_norm": 4.190673828125, "learning_rate": 1.3813018073107758e-05, "loss": 2.0976, "step": 101515 }, { "epoch": 6.897676314716674, "grad_norm": 4.334803104400635, "learning_rate": 1.3808771572224488e-05, "loss": 1.75, "step": 101520 }, { "epoch": 6.898016034787335, "grad_norm": 3.6104207038879395, "learning_rate": 1.3804525071341214e-05, "loss": 2.2592, "step": 101525 }, { "epoch": 6.898355754857997, "grad_norm": 3.0818264484405518, "learning_rate": 1.3800278570457944e-05, "loss": 2.0737, "step": 101530 }, { "epoch": 6.898695474928659, "grad_norm": 3.5208017826080322, "learning_rate": 1.3796032069574672e-05, "loss": 1.9322, "step": 101535 }, { "epoch": 6.89903519499932, "grad_norm": 2.6803247928619385, "learning_rate": 1.3791785568691398e-05, "loss": 2.0636, "step": 101540 }, { "epoch": 6.899374915069982, "grad_norm": 3.126316547393799, "learning_rate": 1.3787539067808128e-05, "loss": 1.894, "step": 101545 }, { "epoch": 6.899714635140644, "grad_norm": 3.607064723968506, "learning_rate": 1.3783292566924854e-05, "loss": 2.3811, "step": 101550 }, { "epoch": 6.900054355211306, "grad_norm": 3.5675034523010254, "learning_rate": 1.3779046066041582e-05, "loss": 1.9961, "step": 101555 }, { "epoch": 6.900394075281968, "grad_norm": 3.4809181690216064, "learning_rate": 1.3774799565158312e-05, "loss": 2.2934, "step": 101560 }, { "epoch": 6.90073379535263, "grad_norm": 3.147258758544922, "learning_rate": 1.3770553064275038e-05, "loss": 2.1304, "step": 101565 }, { "epoch": 6.901073515423291, "grad_norm": 4.11954927444458, "learning_rate": 1.3766306563391765e-05, "loss": 2.1179, "step": 101570 }, { "epoch": 6.901413235493953, "grad_norm": 3.382195234298706, "learning_rate": 1.3762060062508494e-05, "loss": 2.221, "step": 101575 }, { "epoch": 6.901752955564615, "grad_norm": 3.9919097423553467, "learning_rate": 1.375781356162522e-05, "loss": 1.7799, "step": 101580 }, { "epoch": 6.902092675635276, "grad_norm": 3.5378060340881348, "learning_rate": 1.3753567060741949e-05, "loss": 2.0788, "step": 101585 }, { "epoch": 6.902432395705938, "grad_norm": 3.359745740890503, "learning_rate": 1.3749320559858678e-05, "loss": 1.8639, "step": 101590 }, { "epoch": 6.9027721157766, "grad_norm": 3.331974744796753, "learning_rate": 1.3745074058975405e-05, "loss": 1.8979, "step": 101595 }, { "epoch": 6.903111835847262, "grad_norm": 3.1296043395996094, "learning_rate": 1.3740827558092131e-05, "loss": 1.9681, "step": 101600 }, { "epoch": 6.903451555917924, "grad_norm": 2.7095963954925537, "learning_rate": 1.373658105720886e-05, "loss": 2.3877, "step": 101605 }, { "epoch": 6.903791275988586, "grad_norm": 4.115942478179932, "learning_rate": 1.3732334556325589e-05, "loss": 1.7786, "step": 101610 }, { "epoch": 6.904130996059247, "grad_norm": 2.9376914501190186, "learning_rate": 1.3728088055442317e-05, "loss": 2.0742, "step": 101615 }, { "epoch": 6.904470716129909, "grad_norm": 3.840649127960205, "learning_rate": 1.3723841554559045e-05, "loss": 2.2423, "step": 101620 }, { "epoch": 6.904810436200571, "grad_norm": 3.8027749061584473, "learning_rate": 1.3719595053675771e-05, "loss": 2.1683, "step": 101625 }, { "epoch": 6.905150156271232, "grad_norm": 4.82068395614624, "learning_rate": 1.37153485527925e-05, "loss": 2.0732, "step": 101630 }, { "epoch": 6.905489876341894, "grad_norm": 3.7098023891448975, "learning_rate": 1.3711102051909227e-05, "loss": 2.1247, "step": 101635 }, { "epoch": 6.905829596412556, "grad_norm": 3.8967223167419434, "learning_rate": 1.3706855551025955e-05, "loss": 2.2902, "step": 101640 }, { "epoch": 6.906169316483218, "grad_norm": 3.769761323928833, "learning_rate": 1.3702609050142685e-05, "loss": 1.9148, "step": 101645 }, { "epoch": 6.90650903655388, "grad_norm": 3.3641459941864014, "learning_rate": 1.3698362549259411e-05, "loss": 2.4601, "step": 101650 }, { "epoch": 6.906848756624541, "grad_norm": 3.75386118888855, "learning_rate": 1.3694116048376137e-05, "loss": 2.2086, "step": 101655 }, { "epoch": 6.907188476695203, "grad_norm": 2.8272573947906494, "learning_rate": 1.3689869547492867e-05, "loss": 2.2559, "step": 101660 }, { "epoch": 6.907528196765865, "grad_norm": 4.292823791503906, "learning_rate": 1.3685623046609593e-05, "loss": 2.0959, "step": 101665 }, { "epoch": 6.907867916836526, "grad_norm": 3.6502954959869385, "learning_rate": 1.3681376545726321e-05, "loss": 2.0746, "step": 101670 }, { "epoch": 6.908207636907188, "grad_norm": 3.61384916305542, "learning_rate": 1.3677130044843051e-05, "loss": 2.0645, "step": 101675 }, { "epoch": 6.90854735697785, "grad_norm": 4.199599266052246, "learning_rate": 1.3672883543959777e-05, "loss": 1.9813, "step": 101680 }, { "epoch": 6.908887077048512, "grad_norm": 3.8602778911590576, "learning_rate": 1.3668637043076504e-05, "loss": 2.1405, "step": 101685 }, { "epoch": 6.909226797119174, "grad_norm": 3.382032632827759, "learning_rate": 1.3664390542193233e-05, "loss": 1.8995, "step": 101690 }, { "epoch": 6.909566517189836, "grad_norm": 3.5345089435577393, "learning_rate": 1.3660144041309961e-05, "loss": 1.8736, "step": 101695 }, { "epoch": 6.909906237260497, "grad_norm": 4.015454292297363, "learning_rate": 1.3655897540426691e-05, "loss": 1.9097, "step": 101700 }, { "epoch": 6.910245957331159, "grad_norm": 3.7863636016845703, "learning_rate": 1.3651651039543417e-05, "loss": 2.0078, "step": 101705 }, { "epoch": 6.910585677401821, "grad_norm": 4.060995578765869, "learning_rate": 1.3647404538660144e-05, "loss": 2.1766, "step": 101710 }, { "epoch": 6.910925397472482, "grad_norm": 3.004854917526245, "learning_rate": 1.3643158037776873e-05, "loss": 2.1209, "step": 101715 }, { "epoch": 6.911265117543144, "grad_norm": 2.6507692337036133, "learning_rate": 1.36389115368936e-05, "loss": 2.0207, "step": 101720 }, { "epoch": 6.911604837613806, "grad_norm": 3.62912654876709, "learning_rate": 1.3634665036010328e-05, "loss": 2.1456, "step": 101725 }, { "epoch": 6.911944557684468, "grad_norm": 3.765075445175171, "learning_rate": 1.3630418535127057e-05, "loss": 2.2587, "step": 101730 }, { "epoch": 6.91228427775513, "grad_norm": 3.1586568355560303, "learning_rate": 1.3626172034243784e-05, "loss": 1.9251, "step": 101735 }, { "epoch": 6.912623997825792, "grad_norm": 3.383737802505493, "learning_rate": 1.362192553336051e-05, "loss": 2.1306, "step": 101740 }, { "epoch": 6.912963717896453, "grad_norm": 3.427147626876831, "learning_rate": 1.361767903247724e-05, "loss": 2.2082, "step": 101745 }, { "epoch": 6.913303437967115, "grad_norm": 4.927709579467773, "learning_rate": 1.3613432531593968e-05, "loss": 1.9754, "step": 101750 }, { "epoch": 6.913643158037777, "grad_norm": 3.7688074111938477, "learning_rate": 1.3609186030710694e-05, "loss": 2.2263, "step": 101755 }, { "epoch": 6.913982878108438, "grad_norm": 4.3758931159973145, "learning_rate": 1.3604939529827424e-05, "loss": 2.0085, "step": 101760 }, { "epoch": 6.9143225981791, "grad_norm": 3.208914279937744, "learning_rate": 1.360069302894415e-05, "loss": 2.2153, "step": 101765 }, { "epoch": 6.914662318249762, "grad_norm": 3.1812827587127686, "learning_rate": 1.3596446528060876e-05, "loss": 2.1803, "step": 101770 }, { "epoch": 6.915002038320424, "grad_norm": 3.8336260318756104, "learning_rate": 1.3592200027177606e-05, "loss": 1.7915, "step": 101775 }, { "epoch": 6.915341758391086, "grad_norm": 4.140329837799072, "learning_rate": 1.3587953526294334e-05, "loss": 2.342, "step": 101780 }, { "epoch": 6.915681478461748, "grad_norm": 4.135283470153809, "learning_rate": 1.3583707025411064e-05, "loss": 1.8855, "step": 101785 }, { "epoch": 6.916021198532409, "grad_norm": 3.739039659500122, "learning_rate": 1.357946052452779e-05, "loss": 2.0857, "step": 101790 }, { "epoch": 6.916360918603071, "grad_norm": 3.62638258934021, "learning_rate": 1.3575214023644516e-05, "loss": 2.0311, "step": 101795 }, { "epoch": 6.916700638673733, "grad_norm": 4.288148403167725, "learning_rate": 1.3570967522761246e-05, "loss": 2.0775, "step": 101800 }, { "epoch": 6.917040358744394, "grad_norm": 4.397187232971191, "learning_rate": 1.3566721021877972e-05, "loss": 1.9248, "step": 101805 }, { "epoch": 6.917380078815056, "grad_norm": 3.9745240211486816, "learning_rate": 1.35624745209947e-05, "loss": 2.2916, "step": 101810 }, { "epoch": 6.917719798885718, "grad_norm": 3.281804323196411, "learning_rate": 1.355822802011143e-05, "loss": 1.975, "step": 101815 }, { "epoch": 6.91805951895638, "grad_norm": 3.436206579208374, "learning_rate": 1.3553981519228157e-05, "loss": 2.1567, "step": 101820 }, { "epoch": 6.918399239027042, "grad_norm": 3.8458571434020996, "learning_rate": 1.3549735018344883e-05, "loss": 2.2505, "step": 101825 }, { "epoch": 6.918738959097704, "grad_norm": 2.9537503719329834, "learning_rate": 1.3545488517461613e-05, "loss": 2.1512, "step": 101830 }, { "epoch": 6.919078679168365, "grad_norm": 4.156093597412109, "learning_rate": 1.354124201657834e-05, "loss": 2.1565, "step": 101835 }, { "epoch": 6.919418399239027, "grad_norm": 2.883845329284668, "learning_rate": 1.3536995515695067e-05, "loss": 2.0174, "step": 101840 }, { "epoch": 6.919758119309689, "grad_norm": 4.0881123542785645, "learning_rate": 1.3532749014811797e-05, "loss": 2.1519, "step": 101845 }, { "epoch": 6.92009783938035, "grad_norm": 3.3653225898742676, "learning_rate": 1.3528502513928523e-05, "loss": 1.9514, "step": 101850 }, { "epoch": 6.920437559451012, "grad_norm": 4.057516574859619, "learning_rate": 1.352425601304525e-05, "loss": 2.2597, "step": 101855 }, { "epoch": 6.920777279521674, "grad_norm": 2.881964921951294, "learning_rate": 1.3520009512161979e-05, "loss": 2.09, "step": 101860 }, { "epoch": 6.921116999592336, "grad_norm": 3.4747393131256104, "learning_rate": 1.3515763011278707e-05, "loss": 1.8948, "step": 101865 }, { "epoch": 6.921456719662998, "grad_norm": 2.778059482574463, "learning_rate": 1.3511516510395437e-05, "loss": 1.7842, "step": 101870 }, { "epoch": 6.92179643973366, "grad_norm": 4.052215576171875, "learning_rate": 1.3507270009512163e-05, "loss": 1.7991, "step": 101875 }, { "epoch": 6.922136159804321, "grad_norm": 3.8082146644592285, "learning_rate": 1.350302350862889e-05, "loss": 2.0753, "step": 101880 }, { "epoch": 6.922475879874983, "grad_norm": 3.276115655899048, "learning_rate": 1.3498777007745619e-05, "loss": 1.9406, "step": 101885 }, { "epoch": 6.922815599945645, "grad_norm": 3.8123695850372314, "learning_rate": 1.3494530506862347e-05, "loss": 2.051, "step": 101890 }, { "epoch": 6.923155320016306, "grad_norm": 4.234253406524658, "learning_rate": 1.3490284005979073e-05, "loss": 2.2621, "step": 101895 }, { "epoch": 6.923495040086968, "grad_norm": 3.3790547847747803, "learning_rate": 1.3486037505095803e-05, "loss": 2.037, "step": 101900 }, { "epoch": 6.92383476015763, "grad_norm": 4.32592248916626, "learning_rate": 1.348179100421253e-05, "loss": 2.0667, "step": 101905 }, { "epoch": 6.924174480228292, "grad_norm": 2.93416690826416, "learning_rate": 1.3477544503329256e-05, "loss": 2.0847, "step": 101910 }, { "epoch": 6.924514200298954, "grad_norm": 4.067450523376465, "learning_rate": 1.3473298002445985e-05, "loss": 1.6701, "step": 101915 }, { "epoch": 6.924853920369616, "grad_norm": 4.135645389556885, "learning_rate": 1.3469051501562713e-05, "loss": 1.9044, "step": 101920 }, { "epoch": 6.925193640440277, "grad_norm": 3.690742254257202, "learning_rate": 1.346480500067944e-05, "loss": 2.2297, "step": 101925 }, { "epoch": 6.925533360510939, "grad_norm": 3.5681445598602295, "learning_rate": 1.346055849979617e-05, "loss": 1.9649, "step": 101930 }, { "epoch": 6.925873080581601, "grad_norm": 3.3693175315856934, "learning_rate": 1.3456311998912896e-05, "loss": 2.0625, "step": 101935 }, { "epoch": 6.926212800652262, "grad_norm": 3.8719112873077393, "learning_rate": 1.3452065498029624e-05, "loss": 2.0857, "step": 101940 }, { "epoch": 6.926552520722924, "grad_norm": 4.374654293060303, "learning_rate": 1.3447818997146353e-05, "loss": 2.105, "step": 101945 }, { "epoch": 6.9268922407935865, "grad_norm": 3.6803739070892334, "learning_rate": 1.344357249626308e-05, "loss": 2.056, "step": 101950 }, { "epoch": 6.927231960864248, "grad_norm": 4.199763774871826, "learning_rate": 1.343932599537981e-05, "loss": 1.9945, "step": 101955 }, { "epoch": 6.92757168093491, "grad_norm": 3.0096914768218994, "learning_rate": 1.3435079494496536e-05, "loss": 2.3295, "step": 101960 }, { "epoch": 6.927911401005572, "grad_norm": 2.894845485687256, "learning_rate": 1.3430832993613262e-05, "loss": 2.0721, "step": 101965 }, { "epoch": 6.928251121076233, "grad_norm": 3.2003629207611084, "learning_rate": 1.3426586492729992e-05, "loss": 2.4253, "step": 101970 }, { "epoch": 6.928590841146895, "grad_norm": 3.303267478942871, "learning_rate": 1.342233999184672e-05, "loss": 2.1406, "step": 101975 }, { "epoch": 6.928930561217557, "grad_norm": 3.409628391265869, "learning_rate": 1.3418093490963446e-05, "loss": 1.8775, "step": 101980 }, { "epoch": 6.929270281288218, "grad_norm": 3.901017665863037, "learning_rate": 1.3413846990080176e-05, "loss": 1.9989, "step": 101985 }, { "epoch": 6.92961000135888, "grad_norm": 4.269772529602051, "learning_rate": 1.3409600489196902e-05, "loss": 1.9892, "step": 101990 }, { "epoch": 6.9299497214295425, "grad_norm": 4.0068278312683105, "learning_rate": 1.340535398831363e-05, "loss": 2.202, "step": 101995 }, { "epoch": 6.930289441500204, "grad_norm": 3.3234355449676514, "learning_rate": 1.3401107487430358e-05, "loss": 2.2867, "step": 102000 }, { "epoch": 6.930629161570866, "grad_norm": 4.122097015380859, "learning_rate": 1.3396860986547086e-05, "loss": 2.1077, "step": 102005 }, { "epoch": 6.930968881641528, "grad_norm": 3.939279794692993, "learning_rate": 1.3392614485663812e-05, "loss": 2.0772, "step": 102010 }, { "epoch": 6.931308601712189, "grad_norm": 4.187649250030518, "learning_rate": 1.3388367984780542e-05, "loss": 2.0478, "step": 102015 }, { "epoch": 6.931648321782851, "grad_norm": 3.198880195617676, "learning_rate": 1.3384121483897268e-05, "loss": 2.153, "step": 102020 }, { "epoch": 6.931988041853513, "grad_norm": 3.473583221435547, "learning_rate": 1.3379874983013996e-05, "loss": 2.1772, "step": 102025 }, { "epoch": 6.932327761924174, "grad_norm": 4.063094139099121, "learning_rate": 1.3375628482130726e-05, "loss": 1.6878, "step": 102030 }, { "epoch": 6.932667481994836, "grad_norm": 4.347436428070068, "learning_rate": 1.3371381981247452e-05, "loss": 2.0595, "step": 102035 }, { "epoch": 6.9330072020654985, "grad_norm": 3.0027146339416504, "learning_rate": 1.3367135480364182e-05, "loss": 2.1946, "step": 102040 }, { "epoch": 6.93334692213616, "grad_norm": 3.7588794231414795, "learning_rate": 1.3362888979480908e-05, "loss": 2.0329, "step": 102045 }, { "epoch": 6.933686642206822, "grad_norm": 3.8404078483581543, "learning_rate": 1.3358642478597635e-05, "loss": 2.0175, "step": 102050 }, { "epoch": 6.934026362277484, "grad_norm": 4.542477607727051, "learning_rate": 1.3354395977714364e-05, "loss": 2.0906, "step": 102055 }, { "epoch": 6.934366082348145, "grad_norm": 2.512606382369995, "learning_rate": 1.3350149476831092e-05, "loss": 2.0631, "step": 102060 }, { "epoch": 6.934705802418807, "grad_norm": 4.825798511505127, "learning_rate": 1.3345902975947819e-05, "loss": 2.2132, "step": 102065 }, { "epoch": 6.935045522489469, "grad_norm": 3.1225085258483887, "learning_rate": 1.3341656475064548e-05, "loss": 2.015, "step": 102070 }, { "epoch": 6.93538524256013, "grad_norm": 3.749140739440918, "learning_rate": 1.3337409974181275e-05, "loss": 2.0876, "step": 102075 }, { "epoch": 6.935724962630792, "grad_norm": 3.9429209232330322, "learning_rate": 1.3333163473298003e-05, "loss": 2.0492, "step": 102080 }, { "epoch": 6.9360646827014545, "grad_norm": 4.4544782638549805, "learning_rate": 1.3328916972414732e-05, "loss": 2.2445, "step": 102085 }, { "epoch": 6.936404402772116, "grad_norm": 4.247623920440674, "learning_rate": 1.3324670471531459e-05, "loss": 2.2649, "step": 102090 }, { "epoch": 6.936744122842778, "grad_norm": 3.7364776134490967, "learning_rate": 1.3320423970648185e-05, "loss": 2.0978, "step": 102095 }, { "epoch": 6.93708384291344, "grad_norm": 4.905433177947998, "learning_rate": 1.3316177469764915e-05, "loss": 1.6965, "step": 102100 }, { "epoch": 6.937423562984101, "grad_norm": 3.419734477996826, "learning_rate": 1.3311930968881641e-05, "loss": 2.1202, "step": 102105 }, { "epoch": 6.937763283054763, "grad_norm": 3.3993921279907227, "learning_rate": 1.3307684467998369e-05, "loss": 2.045, "step": 102110 }, { "epoch": 6.938103003125424, "grad_norm": 3.5204904079437256, "learning_rate": 1.3303437967115099e-05, "loss": 1.898, "step": 102115 }, { "epoch": 6.938442723196086, "grad_norm": 3.182236671447754, "learning_rate": 1.3299191466231825e-05, "loss": 2.2999, "step": 102120 }, { "epoch": 6.938782443266748, "grad_norm": 3.8263919353485107, "learning_rate": 1.3294944965348555e-05, "loss": 2.1895, "step": 102125 }, { "epoch": 6.93912216333741, "grad_norm": 2.6081228256225586, "learning_rate": 1.3290698464465281e-05, "loss": 2.2506, "step": 102130 }, { "epoch": 6.939461883408072, "grad_norm": 3.6215219497680664, "learning_rate": 1.3286451963582009e-05, "loss": 1.7222, "step": 102135 }, { "epoch": 6.939801603478734, "grad_norm": 3.6727454662323, "learning_rate": 1.3282205462698739e-05, "loss": 2.1736, "step": 102140 }, { "epoch": 6.940141323549395, "grad_norm": 4.0029425621032715, "learning_rate": 1.3277958961815465e-05, "loss": 1.9075, "step": 102145 }, { "epoch": 6.940481043620057, "grad_norm": 3.030081272125244, "learning_rate": 1.3273712460932191e-05, "loss": 2.1365, "step": 102150 }, { "epoch": 6.940820763690719, "grad_norm": 3.72353196144104, "learning_rate": 1.3269465960048921e-05, "loss": 2.0016, "step": 102155 }, { "epoch": 6.94116048376138, "grad_norm": 3.474233388900757, "learning_rate": 1.3265219459165647e-05, "loss": 1.9482, "step": 102160 }, { "epoch": 6.941500203832042, "grad_norm": 3.9530222415924072, "learning_rate": 1.3260972958282375e-05, "loss": 2.1797, "step": 102165 }, { "epoch": 6.941839923902704, "grad_norm": 3.1915552616119385, "learning_rate": 1.3256726457399105e-05, "loss": 1.984, "step": 102170 }, { "epoch": 6.942179643973366, "grad_norm": 4.023643493652344, "learning_rate": 1.3252479956515832e-05, "loss": 2.2764, "step": 102175 }, { "epoch": 6.942519364044028, "grad_norm": 3.1483707427978516, "learning_rate": 1.3248233455632558e-05, "loss": 2.0603, "step": 102180 }, { "epoch": 6.94285908411469, "grad_norm": 4.400073051452637, "learning_rate": 1.3243986954749288e-05, "loss": 2.0714, "step": 102185 }, { "epoch": 6.943198804185351, "grad_norm": 3.3710289001464844, "learning_rate": 1.3239740453866016e-05, "loss": 2.16, "step": 102190 }, { "epoch": 6.943538524256013, "grad_norm": 3.5796539783477783, "learning_rate": 1.3235493952982742e-05, "loss": 2.197, "step": 102195 }, { "epoch": 6.943878244326675, "grad_norm": 4.507734298706055, "learning_rate": 1.3231247452099472e-05, "loss": 1.7685, "step": 102200 }, { "epoch": 6.944217964397336, "grad_norm": 3.3422954082489014, "learning_rate": 1.3227000951216198e-05, "loss": 2.3253, "step": 102205 }, { "epoch": 6.944557684467998, "grad_norm": 3.494628429412842, "learning_rate": 1.3222754450332928e-05, "loss": 1.7351, "step": 102210 }, { "epoch": 6.9448974045386604, "grad_norm": 4.340732574462891, "learning_rate": 1.3218507949449654e-05, "loss": 2.0052, "step": 102215 }, { "epoch": 6.945237124609322, "grad_norm": 3.561420202255249, "learning_rate": 1.3214261448566382e-05, "loss": 2.2538, "step": 102220 }, { "epoch": 6.945576844679984, "grad_norm": 3.9231314659118652, "learning_rate": 1.3210014947683112e-05, "loss": 1.9829, "step": 102225 }, { "epoch": 6.945916564750646, "grad_norm": 3.6121108531951904, "learning_rate": 1.3205768446799838e-05, "loss": 2.1643, "step": 102230 }, { "epoch": 6.946256284821307, "grad_norm": 3.3948071002960205, "learning_rate": 1.3201521945916564e-05, "loss": 2.2022, "step": 102235 }, { "epoch": 6.946596004891969, "grad_norm": 4.22804594039917, "learning_rate": 1.3197275445033294e-05, "loss": 2.3347, "step": 102240 }, { "epoch": 6.946935724962631, "grad_norm": 2.9561119079589844, "learning_rate": 1.319302894415002e-05, "loss": 2.1022, "step": 102245 }, { "epoch": 6.947275445033292, "grad_norm": 3.818359136581421, "learning_rate": 1.3188782443266748e-05, "loss": 2.303, "step": 102250 }, { "epoch": 6.947615165103954, "grad_norm": 3.4628288745880127, "learning_rate": 1.3184535942383478e-05, "loss": 2.0572, "step": 102255 }, { "epoch": 6.9479548851746165, "grad_norm": 3.5288655757904053, "learning_rate": 1.3180289441500204e-05, "loss": 2.032, "step": 102260 }, { "epoch": 6.948294605245278, "grad_norm": 3.887505292892456, "learning_rate": 1.317604294061693e-05, "loss": 2.1502, "step": 102265 }, { "epoch": 6.94863432531594, "grad_norm": 3.94144868850708, "learning_rate": 1.317179643973366e-05, "loss": 2.0797, "step": 102270 }, { "epoch": 6.948974045386602, "grad_norm": 4.513643741607666, "learning_rate": 1.3167549938850388e-05, "loss": 2.0502, "step": 102275 }, { "epoch": 6.949313765457263, "grad_norm": 2.7999956607818604, "learning_rate": 1.3163303437967115e-05, "loss": 2.0789, "step": 102280 }, { "epoch": 6.949653485527925, "grad_norm": 3.932248830795288, "learning_rate": 1.3159056937083844e-05, "loss": 2.0678, "step": 102285 }, { "epoch": 6.949993205598587, "grad_norm": 3.751617670059204, "learning_rate": 1.315481043620057e-05, "loss": 2.0441, "step": 102290 }, { "epoch": 6.950332925669248, "grad_norm": 3.316857099533081, "learning_rate": 1.31505639353173e-05, "loss": 1.8887, "step": 102295 }, { "epoch": 6.95067264573991, "grad_norm": 3.825322389602661, "learning_rate": 1.3146317434434027e-05, "loss": 2.0453, "step": 102300 }, { "epoch": 6.9510123658105725, "grad_norm": 3.5728774070739746, "learning_rate": 1.3142070933550755e-05, "loss": 2.272, "step": 102305 }, { "epoch": 6.951352085881234, "grad_norm": 4.104968547821045, "learning_rate": 1.3137824432667484e-05, "loss": 2.1561, "step": 102310 }, { "epoch": 6.951691805951896, "grad_norm": 4.061890602111816, "learning_rate": 1.313357793178421e-05, "loss": 1.9255, "step": 102315 }, { "epoch": 6.952031526022557, "grad_norm": 2.9498512744903564, "learning_rate": 1.3129331430900937e-05, "loss": 2.1198, "step": 102320 }, { "epoch": 6.952371246093219, "grad_norm": 2.972407341003418, "learning_rate": 1.3125084930017667e-05, "loss": 2.0324, "step": 102325 }, { "epoch": 6.952710966163881, "grad_norm": 3.479909896850586, "learning_rate": 1.3120838429134395e-05, "loss": 2.2784, "step": 102330 }, { "epoch": 6.953050686234542, "grad_norm": 3.618314504623413, "learning_rate": 1.3116591928251121e-05, "loss": 2.1187, "step": 102335 }, { "epoch": 6.953390406305204, "grad_norm": 3.7904834747314453, "learning_rate": 1.311234542736785e-05, "loss": 2.0004, "step": 102340 }, { "epoch": 6.953730126375866, "grad_norm": 2.5970170497894287, "learning_rate": 1.3108098926484577e-05, "loss": 2.522, "step": 102345 }, { "epoch": 6.954069846446528, "grad_norm": 3.1907217502593994, "learning_rate": 1.3103852425601303e-05, "loss": 2.3834, "step": 102350 }, { "epoch": 6.95440956651719, "grad_norm": 3.3205490112304688, "learning_rate": 1.3099605924718033e-05, "loss": 1.9461, "step": 102355 }, { "epoch": 6.954749286587852, "grad_norm": 3.355480194091797, "learning_rate": 1.3095359423834761e-05, "loss": 1.8982, "step": 102360 }, { "epoch": 6.955089006658513, "grad_norm": 4.965516567230225, "learning_rate": 1.3091962223128143e-05, "loss": 2.239, "step": 102365 }, { "epoch": 6.955428726729175, "grad_norm": 2.986766815185547, "learning_rate": 1.308771572224487e-05, "loss": 2.2215, "step": 102370 }, { "epoch": 6.955768446799837, "grad_norm": 4.6638054847717285, "learning_rate": 1.30834692213616e-05, "loss": 2.1643, "step": 102375 }, { "epoch": 6.956108166870498, "grad_norm": 3.923156261444092, "learning_rate": 1.3079222720478326e-05, "loss": 2.1552, "step": 102380 }, { "epoch": 6.95644788694116, "grad_norm": 3.3022849559783936, "learning_rate": 1.3074976219595054e-05, "loss": 2.1257, "step": 102385 }, { "epoch": 6.956787607011822, "grad_norm": 3.3775858879089355, "learning_rate": 1.3070729718711783e-05, "loss": 2.2356, "step": 102390 }, { "epoch": 6.957127327082484, "grad_norm": 3.2895827293395996, "learning_rate": 1.306648321782851e-05, "loss": 1.698, "step": 102395 }, { "epoch": 6.957467047153146, "grad_norm": 4.299765586853027, "learning_rate": 1.3062236716945236e-05, "loss": 2.2229, "step": 102400 }, { "epoch": 6.957806767223808, "grad_norm": 3.8141283988952637, "learning_rate": 1.3057990216061966e-05, "loss": 2.3084, "step": 102405 }, { "epoch": 6.958146487294469, "grad_norm": 2.9348056316375732, "learning_rate": 1.3053743715178692e-05, "loss": 2.2456, "step": 102410 }, { "epoch": 6.958486207365131, "grad_norm": 3.4170501232147217, "learning_rate": 1.3049497214295422e-05, "loss": 1.7754, "step": 102415 }, { "epoch": 6.958825927435793, "grad_norm": 3.0798439979553223, "learning_rate": 1.304525071341215e-05, "loss": 2.0451, "step": 102420 }, { "epoch": 6.959165647506454, "grad_norm": 3.5038599967956543, "learning_rate": 1.3041004212528876e-05, "loss": 2.104, "step": 102425 }, { "epoch": 6.959505367577116, "grad_norm": 3.851776599884033, "learning_rate": 1.3036757711645606e-05, "loss": 2.1723, "step": 102430 }, { "epoch": 6.959845087647778, "grad_norm": 3.831160306930542, "learning_rate": 1.3032511210762332e-05, "loss": 2.129, "step": 102435 }, { "epoch": 6.96018480771844, "grad_norm": 3.3851287364959717, "learning_rate": 1.302826470987906e-05, "loss": 2.1749, "step": 102440 }, { "epoch": 6.960524527789102, "grad_norm": 3.4329042434692383, "learning_rate": 1.302401820899579e-05, "loss": 2.1774, "step": 102445 }, { "epoch": 6.960864247859764, "grad_norm": 3.967787742614746, "learning_rate": 1.3019771708112516e-05, "loss": 2.0767, "step": 102450 }, { "epoch": 6.961203967930425, "grad_norm": 3.7735769748687744, "learning_rate": 1.3015525207229242e-05, "loss": 1.9068, "step": 102455 }, { "epoch": 6.961543688001087, "grad_norm": 3.3972408771514893, "learning_rate": 1.3011278706345972e-05, "loss": 2.2965, "step": 102460 }, { "epoch": 6.961883408071749, "grad_norm": 3.2957823276519775, "learning_rate": 1.3007032205462699e-05, "loss": 2.0659, "step": 102465 }, { "epoch": 6.96222312814241, "grad_norm": 3.2237367630004883, "learning_rate": 1.3002785704579427e-05, "loss": 1.9596, "step": 102470 }, { "epoch": 6.962562848213072, "grad_norm": 3.1766469478607178, "learning_rate": 1.2998539203696156e-05, "loss": 1.8994, "step": 102475 }, { "epoch": 6.962902568283734, "grad_norm": 3.8888790607452393, "learning_rate": 1.2994292702812883e-05, "loss": 2.0294, "step": 102480 }, { "epoch": 6.963242288354396, "grad_norm": 2.915302038192749, "learning_rate": 1.2990046201929609e-05, "loss": 2.3469, "step": 102485 }, { "epoch": 6.963582008425058, "grad_norm": 4.23262357711792, "learning_rate": 1.2985799701046339e-05, "loss": 2.1715, "step": 102490 }, { "epoch": 6.96392172849572, "grad_norm": 3.621795415878296, "learning_rate": 1.2981553200163067e-05, "loss": 2.3013, "step": 102495 }, { "epoch": 6.964261448566381, "grad_norm": 3.867408514022827, "learning_rate": 1.2977306699279795e-05, "loss": 2.2232, "step": 102500 }, { "epoch": 6.964601168637043, "grad_norm": 3.9086055755615234, "learning_rate": 1.2973060198396523e-05, "loss": 2.0956, "step": 102505 }, { "epoch": 6.964940888707705, "grad_norm": 2.6056134700775146, "learning_rate": 1.2968813697513249e-05, "loss": 1.694, "step": 102510 }, { "epoch": 6.965280608778366, "grad_norm": 3.839555501937866, "learning_rate": 1.2964567196629979e-05, "loss": 2.1285, "step": 102515 }, { "epoch": 6.965620328849028, "grad_norm": 2.9452733993530273, "learning_rate": 1.2960320695746705e-05, "loss": 2.0873, "step": 102520 }, { "epoch": 6.9659600489196905, "grad_norm": 4.145132541656494, "learning_rate": 1.2956074194863433e-05, "loss": 2.1468, "step": 102525 }, { "epoch": 6.966299768990352, "grad_norm": 4.052526950836182, "learning_rate": 1.2951827693980163e-05, "loss": 2.0971, "step": 102530 }, { "epoch": 6.966639489061014, "grad_norm": 4.066918849945068, "learning_rate": 1.2947581193096889e-05, "loss": 2.257, "step": 102535 }, { "epoch": 6.966979209131676, "grad_norm": 3.7834322452545166, "learning_rate": 1.2943334692213615e-05, "loss": 2.4346, "step": 102540 }, { "epoch": 6.967318929202337, "grad_norm": 3.5700480937957764, "learning_rate": 1.2939088191330345e-05, "loss": 2.016, "step": 102545 }, { "epoch": 6.967658649272999, "grad_norm": 4.2518439292907715, "learning_rate": 1.2934841690447071e-05, "loss": 2.0254, "step": 102550 }, { "epoch": 6.967998369343661, "grad_norm": 3.8297159671783447, "learning_rate": 1.29305951895638e-05, "loss": 2.2529, "step": 102555 }, { "epoch": 6.968338089414322, "grad_norm": 3.934216022491455, "learning_rate": 1.2926348688680529e-05, "loss": 2.1275, "step": 102560 }, { "epoch": 6.968677809484984, "grad_norm": 3.616581916809082, "learning_rate": 1.2922102187797255e-05, "loss": 2.2012, "step": 102565 }, { "epoch": 6.9690175295556465, "grad_norm": 4.980360984802246, "learning_rate": 1.2917855686913982e-05, "loss": 2.1252, "step": 102570 }, { "epoch": 6.969357249626308, "grad_norm": 3.896127700805664, "learning_rate": 1.2913609186030711e-05, "loss": 2.2209, "step": 102575 }, { "epoch": 6.96969696969697, "grad_norm": 4.309643745422363, "learning_rate": 1.290936268514744e-05, "loss": 2.3391, "step": 102580 }, { "epoch": 6.970036689767632, "grad_norm": 4.670952320098877, "learning_rate": 1.2905116184264169e-05, "loss": 2.1023, "step": 102585 }, { "epoch": 6.970376409838293, "grad_norm": 3.772808790206909, "learning_rate": 1.2900869683380895e-05, "loss": 1.9203, "step": 102590 }, { "epoch": 6.970716129908955, "grad_norm": 3.5285675525665283, "learning_rate": 1.2896623182497622e-05, "loss": 1.892, "step": 102595 }, { "epoch": 6.971055849979617, "grad_norm": 4.191429615020752, "learning_rate": 1.2892376681614351e-05, "loss": 2.0981, "step": 102600 }, { "epoch": 6.971395570050278, "grad_norm": 4.0813775062561035, "learning_rate": 1.2888130180731078e-05, "loss": 2.2374, "step": 102605 }, { "epoch": 6.97173529012094, "grad_norm": 3.632462978363037, "learning_rate": 1.2883883679847806e-05, "loss": 2.0441, "step": 102610 }, { "epoch": 6.9720750101916025, "grad_norm": NaN, "learning_rate": 1.2880486479141188e-05, "loss": 1.9382, "step": 102615 }, { "epoch": 6.972414730262264, "grad_norm": 3.7135071754455566, "learning_rate": 1.2876239978257918e-05, "loss": 2.0618, "step": 102620 }, { "epoch": 6.972754450332926, "grad_norm": 3.4967758655548096, "learning_rate": 1.2871993477374644e-05, "loss": 2.1849, "step": 102625 }, { "epoch": 6.973094170403588, "grad_norm": 3.5999200344085693, "learning_rate": 1.286774697649137e-05, "loss": 1.9911, "step": 102630 }, { "epoch": 6.973433890474249, "grad_norm": 4.070877552032471, "learning_rate": 1.28635004756081e-05, "loss": 1.9754, "step": 102635 }, { "epoch": 6.973773610544911, "grad_norm": 3.547484874725342, "learning_rate": 1.2859253974724828e-05, "loss": 2.1107, "step": 102640 }, { "epoch": 6.974113330615573, "grad_norm": 3.678658962249756, "learning_rate": 1.2855007473841554e-05, "loss": 1.8911, "step": 102645 }, { "epoch": 6.974453050686234, "grad_norm": 4.052610397338867, "learning_rate": 1.2850760972958284e-05, "loss": 1.9555, "step": 102650 }, { "epoch": 6.974792770756896, "grad_norm": 3.9999890327453613, "learning_rate": 1.284651447207501e-05, "loss": 2.1179, "step": 102655 }, { "epoch": 6.9751324908275585, "grad_norm": 3.9017434120178223, "learning_rate": 1.2842267971191738e-05, "loss": 1.8725, "step": 102660 }, { "epoch": 6.97547221089822, "grad_norm": 3.212677001953125, "learning_rate": 1.2838021470308466e-05, "loss": 1.939, "step": 102665 }, { "epoch": 6.975811930968882, "grad_norm": 2.977741003036499, "learning_rate": 1.2833774969425194e-05, "loss": 2.144, "step": 102670 }, { "epoch": 6.976151651039544, "grad_norm": 3.254699468612671, "learning_rate": 1.282952846854192e-05, "loss": 2.2057, "step": 102675 }, { "epoch": 6.976491371110205, "grad_norm": 3.6747775077819824, "learning_rate": 1.282528196765865e-05, "loss": 1.9563, "step": 102680 }, { "epoch": 6.976831091180867, "grad_norm": 3.587610960006714, "learning_rate": 1.2821035466775377e-05, "loss": 1.9201, "step": 102685 }, { "epoch": 6.977170811251529, "grad_norm": 4.653036117553711, "learning_rate": 1.2816788965892105e-05, "loss": 1.8843, "step": 102690 }, { "epoch": 6.97751053132219, "grad_norm": 3.1694304943084717, "learning_rate": 1.2812542465008834e-05, "loss": 2.004, "step": 102695 }, { "epoch": 6.977850251392852, "grad_norm": 3.52388334274292, "learning_rate": 1.280829596412556e-05, "loss": 2.1543, "step": 102700 }, { "epoch": 6.9781899714635145, "grad_norm": 3.139082908630371, "learning_rate": 1.280404946324229e-05, "loss": 2.4021, "step": 102705 }, { "epoch": 6.978529691534176, "grad_norm": 3.787851572036743, "learning_rate": 1.2799802962359017e-05, "loss": 2.0885, "step": 102710 }, { "epoch": 6.978869411604838, "grad_norm": 3.7273454666137695, "learning_rate": 1.2795556461475743e-05, "loss": 2.0277, "step": 102715 }, { "epoch": 6.9792091316755, "grad_norm": 3.6329009532928467, "learning_rate": 1.2791309960592473e-05, "loss": 2.0011, "step": 102720 }, { "epoch": 6.979548851746161, "grad_norm": 3.7218611240386963, "learning_rate": 1.27870634597092e-05, "loss": 2.1432, "step": 102725 }, { "epoch": 6.979888571816823, "grad_norm": 3.2091972827911377, "learning_rate": 1.2782816958825927e-05, "loss": 2.1028, "step": 102730 }, { "epoch": 6.980228291887485, "grad_norm": 3.586979627609253, "learning_rate": 1.2778570457942657e-05, "loss": 2.1305, "step": 102735 }, { "epoch": 6.980568011958146, "grad_norm": 4.3011040687561035, "learning_rate": 1.2774323957059383e-05, "loss": 2.055, "step": 102740 }, { "epoch": 6.980907732028808, "grad_norm": 3.735840082168579, "learning_rate": 1.2770077456176111e-05, "loss": 2.1673, "step": 102745 }, { "epoch": 6.9812474520994705, "grad_norm": 3.465101718902588, "learning_rate": 1.2765830955292841e-05, "loss": 2.1797, "step": 102750 }, { "epoch": 6.981587172170132, "grad_norm": 3.890963315963745, "learning_rate": 1.2761584454409567e-05, "loss": 2.1638, "step": 102755 }, { "epoch": 6.981926892240794, "grad_norm": 3.7094168663024902, "learning_rate": 1.2757337953526293e-05, "loss": 1.8561, "step": 102760 }, { "epoch": 6.982266612311456, "grad_norm": 4.204103946685791, "learning_rate": 1.2753091452643023e-05, "loss": 2.1265, "step": 102765 }, { "epoch": 6.982606332382117, "grad_norm": 3.0543291568756104, "learning_rate": 1.274884495175975e-05, "loss": 2.2977, "step": 102770 }, { "epoch": 6.982946052452779, "grad_norm": 3.8606600761413574, "learning_rate": 1.2744598450876478e-05, "loss": 2.0956, "step": 102775 }, { "epoch": 6.983285772523441, "grad_norm": 3.0597739219665527, "learning_rate": 1.2740351949993207e-05, "loss": 2.1373, "step": 102780 }, { "epoch": 6.983625492594102, "grad_norm": 2.5385594367980957, "learning_rate": 1.2736105449109934e-05, "loss": 2.1021, "step": 102785 }, { "epoch": 6.983965212664764, "grad_norm": 3.876854658126831, "learning_rate": 1.2731858948226663e-05, "loss": 1.9558, "step": 102790 }, { "epoch": 6.984304932735426, "grad_norm": 4.194489479064941, "learning_rate": 1.272761244734339e-05, "loss": 2.2074, "step": 102795 }, { "epoch": 6.984644652806088, "grad_norm": 3.0657849311828613, "learning_rate": 1.2723365946460118e-05, "loss": 2.0724, "step": 102800 }, { "epoch": 6.98498437287675, "grad_norm": 3.665559768676758, "learning_rate": 1.2719119445576847e-05, "loss": 1.9935, "step": 102805 }, { "epoch": 6.985324092947411, "grad_norm": 2.815225839614868, "learning_rate": 1.2714872944693574e-05, "loss": 2.3116, "step": 102810 }, { "epoch": 6.985663813018073, "grad_norm": 3.4457638263702393, "learning_rate": 1.27106264438103e-05, "loss": 2.0722, "step": 102815 }, { "epoch": 6.986003533088735, "grad_norm": 3.7942886352539062, "learning_rate": 1.270637994292703e-05, "loss": 2.1682, "step": 102820 }, { "epoch": 6.986343253159396, "grad_norm": 4.080197811126709, "learning_rate": 1.2702133442043756e-05, "loss": 2.1597, "step": 102825 }, { "epoch": 6.986682973230058, "grad_norm": 4.542389392852783, "learning_rate": 1.2697886941160484e-05, "loss": 2.2849, "step": 102830 }, { "epoch": 6.9870226933007205, "grad_norm": 4.96907377243042, "learning_rate": 1.2693640440277214e-05, "loss": 2.2632, "step": 102835 }, { "epoch": 6.987362413371382, "grad_norm": 4.182364463806152, "learning_rate": 1.268939393939394e-05, "loss": 2.1987, "step": 102840 }, { "epoch": 6.987702133442044, "grad_norm": 3.701931953430176, "learning_rate": 1.2685147438510666e-05, "loss": 2.0754, "step": 102845 }, { "epoch": 6.988041853512706, "grad_norm": 3.236297130584717, "learning_rate": 1.2680900937627396e-05, "loss": 1.98, "step": 102850 }, { "epoch": 6.988381573583367, "grad_norm": 4.113525390625, "learning_rate": 1.2676654436744124e-05, "loss": 1.6736, "step": 102855 }, { "epoch": 6.988721293654029, "grad_norm": 3.761327028274536, "learning_rate": 1.267240793586085e-05, "loss": 2.133, "step": 102860 }, { "epoch": 6.989061013724691, "grad_norm": 3.314847230911255, "learning_rate": 1.266816143497758e-05, "loss": 1.9271, "step": 102865 }, { "epoch": 6.989400733795352, "grad_norm": 3.5028367042541504, "learning_rate": 1.2663914934094306e-05, "loss": 1.9338, "step": 102870 }, { "epoch": 6.989740453866014, "grad_norm": 4.148048400878906, "learning_rate": 1.2659668433211036e-05, "loss": 1.9385, "step": 102875 }, { "epoch": 6.9900801739366765, "grad_norm": 2.337024688720703, "learning_rate": 1.2655421932327762e-05, "loss": 2.2189, "step": 102880 }, { "epoch": 6.990419894007338, "grad_norm": 3.9250051975250244, "learning_rate": 1.265117543144449e-05, "loss": 2.1875, "step": 102885 }, { "epoch": 6.990759614078, "grad_norm": 3.6138558387756348, "learning_rate": 1.264692893056122e-05, "loss": 2.0343, "step": 102890 }, { "epoch": 6.991099334148662, "grad_norm": 3.7289538383483887, "learning_rate": 1.2642682429677946e-05, "loss": 1.9483, "step": 102895 }, { "epoch": 6.991439054219323, "grad_norm": 4.221060752868652, "learning_rate": 1.2638435928794673e-05, "loss": 2.0552, "step": 102900 }, { "epoch": 6.991778774289985, "grad_norm": 3.8651092052459717, "learning_rate": 1.2634189427911402e-05, "loss": 1.9822, "step": 102905 }, { "epoch": 6.992118494360647, "grad_norm": 4.2129082679748535, "learning_rate": 1.2629942927028129e-05, "loss": 2.2134, "step": 102910 }, { "epoch": 6.992458214431308, "grad_norm": 3.9643290042877197, "learning_rate": 1.2625696426144857e-05, "loss": 2.2874, "step": 102915 }, { "epoch": 6.99279793450197, "grad_norm": 3.3624327182769775, "learning_rate": 1.2621449925261586e-05, "loss": 1.9567, "step": 102920 }, { "epoch": 6.9931376545726325, "grad_norm": 4.003139019012451, "learning_rate": 1.2617203424378313e-05, "loss": 2.1582, "step": 102925 }, { "epoch": 6.993477374643294, "grad_norm": 4.041274547576904, "learning_rate": 1.2612956923495039e-05, "loss": 2.3528, "step": 102930 }, { "epoch": 6.993817094713956, "grad_norm": 3.3325605392456055, "learning_rate": 1.2608710422611769e-05, "loss": 2.1671, "step": 102935 }, { "epoch": 6.994156814784618, "grad_norm": 3.94366192817688, "learning_rate": 1.2604463921728497e-05, "loss": 2.1497, "step": 102940 }, { "epoch": 6.994496534855279, "grad_norm": 3.163376569747925, "learning_rate": 1.2600217420845223e-05, "loss": 2.271, "step": 102945 }, { "epoch": 6.994836254925941, "grad_norm": 4.680869102478027, "learning_rate": 1.2595970919961953e-05, "loss": 2.1219, "step": 102950 }, { "epoch": 6.995175974996603, "grad_norm": 3.1262238025665283, "learning_rate": 1.2591724419078679e-05, "loss": 2.077, "step": 102955 }, { "epoch": 6.995515695067264, "grad_norm": 3.788041114807129, "learning_rate": 1.2587477918195409e-05, "loss": 2.2642, "step": 102960 }, { "epoch": 6.995855415137926, "grad_norm": 3.003290891647339, "learning_rate": 1.2583231417312135e-05, "loss": 1.9908, "step": 102965 }, { "epoch": 6.9961951352085885, "grad_norm": 3.8412728309631348, "learning_rate": 1.2578984916428863e-05, "loss": 2.228, "step": 102970 }, { "epoch": 6.99653485527925, "grad_norm": 3.240274429321289, "learning_rate": 1.2574738415545593e-05, "loss": 2.253, "step": 102975 }, { "epoch": 6.996874575349912, "grad_norm": 3.1941092014312744, "learning_rate": 1.2570491914662319e-05, "loss": 2.2309, "step": 102980 }, { "epoch": 6.997214295420574, "grad_norm": 3.760312080383301, "learning_rate": 1.2566245413779045e-05, "loss": 2.1413, "step": 102985 }, { "epoch": 6.997554015491235, "grad_norm": 3.1231863498687744, "learning_rate": 1.2561998912895775e-05, "loss": 2.1559, "step": 102990 }, { "epoch": 6.997893735561897, "grad_norm": 3.9766037464141846, "learning_rate": 1.2557752412012503e-05, "loss": 1.8671, "step": 102995 }, { "epoch": 6.998233455632558, "grad_norm": 4.193910121917725, "learning_rate": 1.255350591112923e-05, "loss": 2.1722, "step": 103000 }, { "epoch": 6.99857317570322, "grad_norm": 3.7990360260009766, "learning_rate": 1.2549259410245959e-05, "loss": 1.9757, "step": 103005 }, { "epoch": 6.998912895773882, "grad_norm": 4.486708641052246, "learning_rate": 1.2545012909362685e-05, "loss": 1.8612, "step": 103010 }, { "epoch": 6.999252615844544, "grad_norm": 4.408085346221924, "learning_rate": 1.2540766408479412e-05, "loss": 2.1656, "step": 103015 }, { "epoch": 6.999592335915206, "grad_norm": 3.646780490875244, "learning_rate": 1.2536519907596141e-05, "loss": 2.1676, "step": 103020 }, { "epoch": 6.999932055985868, "grad_norm": 3.981799602508545, "learning_rate": 1.253227340671287e-05, "loss": 1.8307, "step": 103025 }, { "epoch": 7.0, "eval_bertscore": { "f1": 0.8443672562918583, "precision": 0.8477709635143444, "recall": 0.8416616819616274 }, "eval_bleu_4": 0.01762363397245113, "eval_exact_match": 0.0005814516910553348, "eval_loss": 3.56876277923584, "eval_meteor": 0.09360900909795897, "eval_rouge": { "rouge1": 0.1298043686562822, "rouge2": 0.019443623564183523, "rougeL": 0.11061532231965308, "rougeLsum": 0.11057597913507237 }, "eval_runtime": 1342.3471, "eval_samples_per_second": 7.687, "eval_steps_per_second": 0.961, "step": 103026 }, { "epoch": 7.00027177605653, "grad_norm": 3.0853307247161865, "learning_rate": 1.2528026905829596e-05, "loss": 2.0992, "step": 103030 }, { "epoch": 7.000611496127191, "grad_norm": 5.062455654144287, "learning_rate": 1.2523780404946325e-05, "loss": 2.0403, "step": 103035 }, { "epoch": 7.000951216197853, "grad_norm": 4.024199485778809, "learning_rate": 1.2519533904063052e-05, "loss": 1.9638, "step": 103040 }, { "epoch": 7.001290936268515, "grad_norm": 3.4947683811187744, "learning_rate": 1.2515287403179781e-05, "loss": 2.118, "step": 103045 }, { "epoch": 7.001630656339176, "grad_norm": 3.7036495208740234, "learning_rate": 1.2511040902296508e-05, "loss": 2.0413, "step": 103050 }, { "epoch": 7.001970376409838, "grad_norm": 3.6866166591644287, "learning_rate": 1.2506794401413236e-05, "loss": 2.027, "step": 103055 }, { "epoch": 7.0023100964805, "grad_norm": 3.976273536682129, "learning_rate": 1.2502547900529965e-05, "loss": 2.1352, "step": 103060 }, { "epoch": 7.002649816551162, "grad_norm": 4.1555867195129395, "learning_rate": 1.2498301399646692e-05, "loss": 2.2574, "step": 103065 }, { "epoch": 7.002989536621824, "grad_norm": 4.350595474243164, "learning_rate": 1.249405489876342e-05, "loss": 1.773, "step": 103070 }, { "epoch": 7.003329256692485, "grad_norm": 4.213403224945068, "learning_rate": 1.2489808397880146e-05, "loss": 1.839, "step": 103075 }, { "epoch": 7.003668976763147, "grad_norm": 2.4399466514587402, "learning_rate": 1.2485561896996876e-05, "loss": 1.6203, "step": 103080 }, { "epoch": 7.004008696833809, "grad_norm": 2.9956214427948, "learning_rate": 1.2481315396113604e-05, "loss": 2.0273, "step": 103085 }, { "epoch": 7.00434841690447, "grad_norm": 4.131185531616211, "learning_rate": 1.247706889523033e-05, "loss": 1.7466, "step": 103090 }, { "epoch": 7.004688136975132, "grad_norm": 3.446789503097534, "learning_rate": 1.2472822394347058e-05, "loss": 1.8666, "step": 103095 }, { "epoch": 7.0050278570457944, "grad_norm": 4.184453964233398, "learning_rate": 1.2468575893463786e-05, "loss": 1.9437, "step": 103100 }, { "epoch": 7.005367577116456, "grad_norm": 3.6609058380126953, "learning_rate": 1.2464329392580514e-05, "loss": 2.0372, "step": 103105 }, { "epoch": 7.005707297187118, "grad_norm": 3.652064800262451, "learning_rate": 1.2460082891697242e-05, "loss": 1.7331, "step": 103110 }, { "epoch": 7.00604701725778, "grad_norm": 4.856076240539551, "learning_rate": 1.245583639081397e-05, "loss": 2.0968, "step": 103115 }, { "epoch": 7.006386737328441, "grad_norm": 4.015567302703857, "learning_rate": 1.2451589889930698e-05, "loss": 1.7553, "step": 103120 }, { "epoch": 7.006726457399103, "grad_norm": 3.584810256958008, "learning_rate": 1.2447343389047425e-05, "loss": 1.7651, "step": 103125 }, { "epoch": 7.007066177469765, "grad_norm": 4.5931220054626465, "learning_rate": 1.2443096888164153e-05, "loss": 2.2196, "step": 103130 }, { "epoch": 7.007405897540426, "grad_norm": 3.7117903232574463, "learning_rate": 1.2438850387280882e-05, "loss": 1.9245, "step": 103135 }, { "epoch": 7.007745617611088, "grad_norm": 3.4201464653015137, "learning_rate": 1.2434603886397609e-05, "loss": 1.921, "step": 103140 }, { "epoch": 7.0080853376817505, "grad_norm": 4.4107255935668945, "learning_rate": 1.2430357385514337e-05, "loss": 1.9824, "step": 103145 }, { "epoch": 7.008425057752412, "grad_norm": 19.087005615234375, "learning_rate": 1.2426110884631065e-05, "loss": 2.0664, "step": 103150 }, { "epoch": 7.008764777823074, "grad_norm": 3.5743391513824463, "learning_rate": 1.2421864383747793e-05, "loss": 1.8971, "step": 103155 }, { "epoch": 7.009104497893736, "grad_norm": 3.6886181831359863, "learning_rate": 1.241761788286452e-05, "loss": 2.0492, "step": 103160 }, { "epoch": 7.009444217964397, "grad_norm": 3.342146635055542, "learning_rate": 1.2413371381981249e-05, "loss": 1.775, "step": 103165 }, { "epoch": 7.009783938035059, "grad_norm": 3.995756149291992, "learning_rate": 1.2409124881097977e-05, "loss": 1.9359, "step": 103170 }, { "epoch": 7.010123658105721, "grad_norm": 3.332005739212036, "learning_rate": 1.2404878380214703e-05, "loss": 1.781, "step": 103175 }, { "epoch": 7.010463378176382, "grad_norm": 4.251938819885254, "learning_rate": 1.2400631879331431e-05, "loss": 2.2027, "step": 103180 }, { "epoch": 7.010803098247044, "grad_norm": 3.722339391708374, "learning_rate": 1.2396385378448159e-05, "loss": 1.9433, "step": 103185 }, { "epoch": 7.0111428183177065, "grad_norm": 4.12692928314209, "learning_rate": 1.2392138877564889e-05, "loss": 1.8186, "step": 103190 }, { "epoch": 7.011482538388368, "grad_norm": 3.1132233142852783, "learning_rate": 1.2387892376681615e-05, "loss": 1.9452, "step": 103195 }, { "epoch": 7.01182225845903, "grad_norm": 3.030200958251953, "learning_rate": 1.2383645875798343e-05, "loss": 2.0486, "step": 103200 }, { "epoch": 7.012161978529692, "grad_norm": 3.299018621444702, "learning_rate": 1.2379399374915071e-05, "loss": 1.9252, "step": 103205 }, { "epoch": 7.012501698600353, "grad_norm": 3.075921058654785, "learning_rate": 1.2375152874031797e-05, "loss": 1.9448, "step": 103210 }, { "epoch": 7.012841418671015, "grad_norm": 3.5907952785491943, "learning_rate": 1.2370906373148527e-05, "loss": 2.0966, "step": 103215 }, { "epoch": 7.013181138741677, "grad_norm": 3.6043245792388916, "learning_rate": 1.2366659872265255e-05, "loss": 1.9009, "step": 103220 }, { "epoch": 7.013520858812338, "grad_norm": 2.780045509338379, "learning_rate": 1.2362413371381981e-05, "loss": 2.1289, "step": 103225 }, { "epoch": 7.013860578883, "grad_norm": 3.654616117477417, "learning_rate": 1.235816687049871e-05, "loss": 2.0312, "step": 103230 }, { "epoch": 7.0142002989536625, "grad_norm": 3.6881229877471924, "learning_rate": 1.2353920369615437e-05, "loss": 2.0328, "step": 103235 }, { "epoch": 7.014540019024324, "grad_norm": 3.519662857055664, "learning_rate": 1.2349673868732165e-05, "loss": 2.0236, "step": 103240 }, { "epoch": 7.014879739094986, "grad_norm": 4.067232608795166, "learning_rate": 1.2345427367848893e-05, "loss": 1.9856, "step": 103245 }, { "epoch": 7.015219459165648, "grad_norm": 3.4797465801239014, "learning_rate": 1.2341180866965621e-05, "loss": 1.7894, "step": 103250 }, { "epoch": 7.015559179236309, "grad_norm": 4.310232162475586, "learning_rate": 1.233693436608235e-05, "loss": 2.045, "step": 103255 }, { "epoch": 7.015898899306971, "grad_norm": 3.572213649749756, "learning_rate": 1.2332687865199076e-05, "loss": 2.3382, "step": 103260 }, { "epoch": 7.016238619377633, "grad_norm": 3.180732011795044, "learning_rate": 1.2328441364315804e-05, "loss": 1.9693, "step": 103265 }, { "epoch": 7.016578339448294, "grad_norm": 3.5010986328125, "learning_rate": 1.2324194863432532e-05, "loss": 1.8035, "step": 103270 }, { "epoch": 7.016918059518956, "grad_norm": 3.492713212966919, "learning_rate": 1.2319948362549261e-05, "loss": 2.0504, "step": 103275 }, { "epoch": 7.0172577795896185, "grad_norm": 4.829212188720703, "learning_rate": 1.2315701861665988e-05, "loss": 2.0509, "step": 103280 }, { "epoch": 7.01759749966028, "grad_norm": 4.002333641052246, "learning_rate": 1.2311455360782716e-05, "loss": 1.8552, "step": 103285 }, { "epoch": 7.017937219730942, "grad_norm": 4.385595321655273, "learning_rate": 1.2307208859899444e-05, "loss": 1.9638, "step": 103290 }, { "epoch": 7.018276939801604, "grad_norm": 2.9637184143066406, "learning_rate": 1.230296235901617e-05, "loss": 1.7271, "step": 103295 }, { "epoch": 7.018616659872265, "grad_norm": 3.435906171798706, "learning_rate": 1.22987158581329e-05, "loss": 1.9362, "step": 103300 }, { "epoch": 7.018956379942927, "grad_norm": 3.9010159969329834, "learning_rate": 1.2294469357249628e-05, "loss": 2.07, "step": 103305 }, { "epoch": 7.019296100013589, "grad_norm": 4.443863391876221, "learning_rate": 1.2290222856366354e-05, "loss": 1.8002, "step": 103310 }, { "epoch": 7.01963582008425, "grad_norm": 2.727085590362549, "learning_rate": 1.2285976355483082e-05, "loss": 1.6613, "step": 103315 }, { "epoch": 7.019975540154912, "grad_norm": 4.816331386566162, "learning_rate": 1.228172985459981e-05, "loss": 1.9362, "step": 103320 }, { "epoch": 7.0203152602255745, "grad_norm": 3.757077217102051, "learning_rate": 1.2277483353716538e-05, "loss": 2.1657, "step": 103325 }, { "epoch": 7.020654980296236, "grad_norm": 4.152136325836182, "learning_rate": 1.2273236852833266e-05, "loss": 2.1108, "step": 103330 }, { "epoch": 7.020994700366898, "grad_norm": 4.67788553237915, "learning_rate": 1.2268990351949994e-05, "loss": 1.8345, "step": 103335 }, { "epoch": 7.02133442043756, "grad_norm": 4.057740688323975, "learning_rate": 1.2264743851066722e-05, "loss": 1.8415, "step": 103340 }, { "epoch": 7.021674140508221, "grad_norm": 3.8434386253356934, "learning_rate": 1.2260497350183448e-05, "loss": 2.1604, "step": 103345 }, { "epoch": 7.022013860578883, "grad_norm": 4.032191276550293, "learning_rate": 1.2256250849300176e-05, "loss": 2.1577, "step": 103350 }, { "epoch": 7.022353580649545, "grad_norm": 3.366095781326294, "learning_rate": 1.2252004348416906e-05, "loss": 1.902, "step": 103355 }, { "epoch": 7.022693300720206, "grad_norm": 3.5504183769226074, "learning_rate": 1.2247757847533634e-05, "loss": 2.0592, "step": 103360 }, { "epoch": 7.023033020790868, "grad_norm": 3.1701831817626953, "learning_rate": 1.224351134665036e-05, "loss": 1.9215, "step": 103365 }, { "epoch": 7.0233727408615305, "grad_norm": 3.6423659324645996, "learning_rate": 1.2239264845767088e-05, "loss": 1.8935, "step": 103370 }, { "epoch": 7.023712460932192, "grad_norm": 3.163569688796997, "learning_rate": 1.2235018344883816e-05, "loss": 1.744, "step": 103375 }, { "epoch": 7.024052181002854, "grad_norm": 3.664670944213867, "learning_rate": 1.2230771844000544e-05, "loss": 1.7617, "step": 103380 }, { "epoch": 7.024391901073516, "grad_norm": 4.021566867828369, "learning_rate": 1.2226525343117272e-05, "loss": 2.1204, "step": 103385 }, { "epoch": 7.024731621144177, "grad_norm": 3.2468581199645996, "learning_rate": 1.2222278842234e-05, "loss": 1.8534, "step": 103390 }, { "epoch": 7.025071341214839, "grad_norm": 3.326626777648926, "learning_rate": 1.2218032341350727e-05, "loss": 1.7349, "step": 103395 }, { "epoch": 7.025411061285501, "grad_norm": 4.28910493850708, "learning_rate": 1.2213785840467455e-05, "loss": 2.0045, "step": 103400 }, { "epoch": 7.025750781356162, "grad_norm": 2.975896120071411, "learning_rate": 1.2209539339584183e-05, "loss": 2.0546, "step": 103405 }, { "epoch": 7.0260905014268245, "grad_norm": 4.186889171600342, "learning_rate": 1.220529283870091e-05, "loss": 1.8401, "step": 103410 }, { "epoch": 7.026430221497486, "grad_norm": 3.9459714889526367, "learning_rate": 1.2201046337817639e-05, "loss": 1.9356, "step": 103415 }, { "epoch": 7.026769941568148, "grad_norm": 3.6496102809906006, "learning_rate": 1.2196799836934367e-05, "loss": 2.0664, "step": 103420 }, { "epoch": 7.02710966163881, "grad_norm": 3.9108943939208984, "learning_rate": 1.2192553336051095e-05, "loss": 1.8539, "step": 103425 }, { "epoch": 7.027449381709471, "grad_norm": 4.076074600219727, "learning_rate": 1.2188306835167821e-05, "loss": 1.9282, "step": 103430 }, { "epoch": 7.027789101780133, "grad_norm": 4.222742080688477, "learning_rate": 1.2184060334284549e-05, "loss": 2.0057, "step": 103435 }, { "epoch": 7.028128821850795, "grad_norm": 4.03230094909668, "learning_rate": 1.2179813833401279e-05, "loss": 1.9302, "step": 103440 }, { "epoch": 7.028468541921456, "grad_norm": 3.763129949569702, "learning_rate": 1.2175567332518007e-05, "loss": 2.1447, "step": 103445 }, { "epoch": 7.028808261992118, "grad_norm": 4.009951591491699, "learning_rate": 1.2171320831634733e-05, "loss": 1.9139, "step": 103450 }, { "epoch": 7.0291479820627805, "grad_norm": 4.078751087188721, "learning_rate": 1.2167074330751461e-05, "loss": 1.7765, "step": 103455 }, { "epoch": 7.029487702133442, "grad_norm": 3.6889612674713135, "learning_rate": 1.216282782986819e-05, "loss": 1.8953, "step": 103460 }, { "epoch": 7.029827422204104, "grad_norm": 4.449874401092529, "learning_rate": 1.2158581328984917e-05, "loss": 2.1313, "step": 103465 }, { "epoch": 7.030167142274766, "grad_norm": 3.4644343852996826, "learning_rate": 1.2154334828101645e-05, "loss": 2.0479, "step": 103470 }, { "epoch": 7.030506862345427, "grad_norm": 2.853604555130005, "learning_rate": 1.2150088327218373e-05, "loss": 1.8666, "step": 103475 }, { "epoch": 7.030846582416089, "grad_norm": 4.06282901763916, "learning_rate": 1.21458418263351e-05, "loss": 1.9325, "step": 103480 }, { "epoch": 7.031186302486751, "grad_norm": 4.080836296081543, "learning_rate": 1.2141595325451828e-05, "loss": 1.9314, "step": 103485 }, { "epoch": 7.031526022557412, "grad_norm": 4.451397895812988, "learning_rate": 1.2137348824568556e-05, "loss": 2.0059, "step": 103490 }, { "epoch": 7.031865742628074, "grad_norm": 4.217087745666504, "learning_rate": 1.2133102323685285e-05, "loss": 1.9737, "step": 103495 }, { "epoch": 7.0322054626987365, "grad_norm": 3.997516393661499, "learning_rate": 1.2128855822802012e-05, "loss": 2.1716, "step": 103500 }, { "epoch": 7.032545182769398, "grad_norm": 3.2847938537597656, "learning_rate": 1.212460932191874e-05, "loss": 1.9506, "step": 103505 }, { "epoch": 7.03288490284006, "grad_norm": 3.2548182010650635, "learning_rate": 1.2120362821035468e-05, "loss": 1.9084, "step": 103510 }, { "epoch": 7.033224622910722, "grad_norm": 2.9191489219665527, "learning_rate": 1.2116116320152194e-05, "loss": 2.0182, "step": 103515 }, { "epoch": 7.033564342981383, "grad_norm": 3.9379000663757324, "learning_rate": 1.2111869819268924e-05, "loss": 1.9933, "step": 103520 }, { "epoch": 7.033904063052045, "grad_norm": 3.3640120029449463, "learning_rate": 1.2107623318385652e-05, "loss": 1.9406, "step": 103525 }, { "epoch": 7.034243783122707, "grad_norm": 3.7705116271972656, "learning_rate": 1.210337681750238e-05, "loss": 1.905, "step": 103530 }, { "epoch": 7.034583503193368, "grad_norm": 3.176701307296753, "learning_rate": 1.2099130316619106e-05, "loss": 2.0938, "step": 103535 }, { "epoch": 7.03492322326403, "grad_norm": 3.822779655456543, "learning_rate": 1.2094883815735834e-05, "loss": 1.958, "step": 103540 }, { "epoch": 7.0352629433346925, "grad_norm": 3.938755512237549, "learning_rate": 1.2090637314852562e-05, "loss": 1.7703, "step": 103545 }, { "epoch": 7.035602663405354, "grad_norm": 3.301435947418213, "learning_rate": 1.208639081396929e-05, "loss": 1.9252, "step": 103550 }, { "epoch": 7.035942383476016, "grad_norm": 2.9333245754241943, "learning_rate": 1.2082144313086018e-05, "loss": 2.267, "step": 103555 }, { "epoch": 7.036282103546678, "grad_norm": 3.5775961875915527, "learning_rate": 1.2077897812202746e-05, "loss": 2.2202, "step": 103560 }, { "epoch": 7.036621823617339, "grad_norm": 4.271206855773926, "learning_rate": 1.2073651311319472e-05, "loss": 2.208, "step": 103565 }, { "epoch": 7.036961543688001, "grad_norm": 3.7737843990325928, "learning_rate": 1.20694048104362e-05, "loss": 2.018, "step": 103570 }, { "epoch": 7.037301263758663, "grad_norm": 2.94398832321167, "learning_rate": 1.206515830955293e-05, "loss": 1.9204, "step": 103575 }, { "epoch": 7.037640983829324, "grad_norm": 3.7960903644561768, "learning_rate": 1.2060911808669658e-05, "loss": 1.8407, "step": 103580 }, { "epoch": 7.037980703899986, "grad_norm": 3.295846700668335, "learning_rate": 1.2056665307786384e-05, "loss": 2.0445, "step": 103585 }, { "epoch": 7.0383204239706485, "grad_norm": 3.567533254623413, "learning_rate": 1.2052418806903112e-05, "loss": 1.9069, "step": 103590 }, { "epoch": 7.03866014404131, "grad_norm": 3.3531131744384766, "learning_rate": 1.204817230601984e-05, "loss": 2.2922, "step": 103595 }, { "epoch": 7.038999864111972, "grad_norm": 4.522474765777588, "learning_rate": 1.2043925805136568e-05, "loss": 1.8853, "step": 103600 }, { "epoch": 7.039339584182634, "grad_norm": 3.5770370960235596, "learning_rate": 1.2039679304253296e-05, "loss": 2.2696, "step": 103605 }, { "epoch": 7.039679304253295, "grad_norm": 4.6209797859191895, "learning_rate": 1.2035432803370024e-05, "loss": 2.2464, "step": 103610 }, { "epoch": 7.040019024323957, "grad_norm": 3.611299753189087, "learning_rate": 1.2031186302486752e-05, "loss": 2.0082, "step": 103615 }, { "epoch": 7.040358744394619, "grad_norm": 3.3319666385650635, "learning_rate": 1.2026939801603479e-05, "loss": 2.0092, "step": 103620 }, { "epoch": 7.04069846446528, "grad_norm": 3.881314277648926, "learning_rate": 1.2022693300720207e-05, "loss": 2.0801, "step": 103625 }, { "epoch": 7.041038184535942, "grad_norm": 3.795274257659912, "learning_rate": 1.2018446799836935e-05, "loss": 1.8883, "step": 103630 }, { "epoch": 7.0413779046066045, "grad_norm": 2.965222120285034, "learning_rate": 1.2014200298953663e-05, "loss": 1.9757, "step": 103635 }, { "epoch": 7.041717624677266, "grad_norm": 4.518968105316162, "learning_rate": 1.200995379807039e-05, "loss": 1.9588, "step": 103640 }, { "epoch": 7.042057344747928, "grad_norm": 3.8667218685150146, "learning_rate": 1.2005707297187119e-05, "loss": 2.0509, "step": 103645 }, { "epoch": 7.04239706481859, "grad_norm": 5.0931220054626465, "learning_rate": 1.2001460796303845e-05, "loss": 2.179, "step": 103650 }, { "epoch": 7.042736784889251, "grad_norm": 4.831640720367432, "learning_rate": 1.1997214295420573e-05, "loss": 2.2694, "step": 103655 }, { "epoch": 7.043076504959913, "grad_norm": 4.078181743621826, "learning_rate": 1.1992967794537303e-05, "loss": 2.0571, "step": 103660 }, { "epoch": 7.043416225030575, "grad_norm": 3.601166009902954, "learning_rate": 1.198872129365403e-05, "loss": 2.0485, "step": 103665 }, { "epoch": 7.043755945101236, "grad_norm": 3.6136085987091064, "learning_rate": 1.1984474792770757e-05, "loss": 2.1734, "step": 103670 }, { "epoch": 7.0440956651718984, "grad_norm": 4.935202598571777, "learning_rate": 1.1980228291887485e-05, "loss": 2.0921, "step": 103675 }, { "epoch": 7.0444353852425605, "grad_norm": 3.849423885345459, "learning_rate": 1.1975981791004213e-05, "loss": 2.0141, "step": 103680 }, { "epoch": 7.044775105313222, "grad_norm": 3.6775052547454834, "learning_rate": 1.1971735290120941e-05, "loss": 2.0844, "step": 103685 }, { "epoch": 7.045114825383884, "grad_norm": 3.830615758895874, "learning_rate": 1.1967488789237669e-05, "loss": 1.5838, "step": 103690 }, { "epoch": 7.045454545454546, "grad_norm": 3.4850172996520996, "learning_rate": 1.1963242288354397e-05, "loss": 1.8419, "step": 103695 }, { "epoch": 7.045794265525207, "grad_norm": 3.358050584793091, "learning_rate": 1.1958995787471125e-05, "loss": 1.8409, "step": 103700 }, { "epoch": 7.046133985595869, "grad_norm": 4.376219749450684, "learning_rate": 1.1954749286587851e-05, "loss": 1.96, "step": 103705 }, { "epoch": 7.046473705666531, "grad_norm": 4.299315929412842, "learning_rate": 1.195050278570458e-05, "loss": 2.2005, "step": 103710 }, { "epoch": 7.046813425737192, "grad_norm": 3.3961989879608154, "learning_rate": 1.1946256284821309e-05, "loss": 1.9268, "step": 103715 }, { "epoch": 7.0471531458078545, "grad_norm": 3.1099374294281006, "learning_rate": 1.1942009783938035e-05, "loss": 2.0222, "step": 103720 }, { "epoch": 7.0474928658785165, "grad_norm": 3.8905670642852783, "learning_rate": 1.1937763283054763e-05, "loss": 2.0911, "step": 103725 }, { "epoch": 7.047832585949178, "grad_norm": 3.453779697418213, "learning_rate": 1.1933516782171491e-05, "loss": 1.9569, "step": 103730 }, { "epoch": 7.04817230601984, "grad_norm": 3.7915494441986084, "learning_rate": 1.1929270281288218e-05, "loss": 1.9703, "step": 103735 }, { "epoch": 7.048512026090501, "grad_norm": 3.9952845573425293, "learning_rate": 1.1925023780404947e-05, "loss": 2.0788, "step": 103740 }, { "epoch": 7.048851746161163, "grad_norm": 3.6355109214782715, "learning_rate": 1.1920777279521675e-05, "loss": 2.1418, "step": 103745 }, { "epoch": 7.049191466231825, "grad_norm": 3.3680543899536133, "learning_rate": 1.1916530778638403e-05, "loss": 2.08, "step": 103750 }, { "epoch": 7.049531186302486, "grad_norm": 3.652958631515503, "learning_rate": 1.191228427775513e-05, "loss": 1.9174, "step": 103755 }, { "epoch": 7.049870906373148, "grad_norm": 4.133410930633545, "learning_rate": 1.1908037776871858e-05, "loss": 1.9585, "step": 103760 }, { "epoch": 7.0502106264438105, "grad_norm": 4.579610824584961, "learning_rate": 1.1903791275988586e-05, "loss": 1.5752, "step": 103765 }, { "epoch": 7.050550346514472, "grad_norm": 4.0517120361328125, "learning_rate": 1.1899544775105314e-05, "loss": 2.2273, "step": 103770 }, { "epoch": 7.050890066585134, "grad_norm": 3.019749402999878, "learning_rate": 1.1895298274222042e-05, "loss": 1.8386, "step": 103775 }, { "epoch": 7.051229786655796, "grad_norm": 3.447503089904785, "learning_rate": 1.189105177333877e-05, "loss": 1.8966, "step": 103780 }, { "epoch": 7.051569506726457, "grad_norm": 3.318610429763794, "learning_rate": 1.1886805272455498e-05, "loss": 2.1202, "step": 103785 }, { "epoch": 7.051909226797119, "grad_norm": 3.97404408454895, "learning_rate": 1.1882558771572224e-05, "loss": 1.961, "step": 103790 }, { "epoch": 7.052248946867781, "grad_norm": 3.2658822536468506, "learning_rate": 1.1878312270688952e-05, "loss": 2.0476, "step": 103795 }, { "epoch": 7.052588666938442, "grad_norm": 3.574740409851074, "learning_rate": 1.1874065769805682e-05, "loss": 1.753, "step": 103800 }, { "epoch": 7.052928387009104, "grad_norm": 2.9272007942199707, "learning_rate": 1.1869819268922408e-05, "loss": 1.9355, "step": 103805 }, { "epoch": 7.0532681070797665, "grad_norm": 3.5579583644866943, "learning_rate": 1.1865572768039136e-05, "loss": 1.9196, "step": 103810 }, { "epoch": 7.053607827150428, "grad_norm": 3.5215206146240234, "learning_rate": 1.1861326267155864e-05, "loss": 2.0864, "step": 103815 }, { "epoch": 7.05394754722109, "grad_norm": 3.3855068683624268, "learning_rate": 1.185707976627259e-05, "loss": 1.8465, "step": 103820 }, { "epoch": 7.054287267291752, "grad_norm": 3.6761581897735596, "learning_rate": 1.185283326538932e-05, "loss": 1.9045, "step": 103825 }, { "epoch": 7.054626987362413, "grad_norm": 3.688837766647339, "learning_rate": 1.1848586764506048e-05, "loss": 2.0958, "step": 103830 }, { "epoch": 7.054966707433075, "grad_norm": 4.070380687713623, "learning_rate": 1.1844340263622776e-05, "loss": 1.8795, "step": 103835 }, { "epoch": 7.055306427503737, "grad_norm": 2.491910219192505, "learning_rate": 1.1840093762739503e-05, "loss": 1.7637, "step": 103840 }, { "epoch": 7.055646147574398, "grad_norm": 3.1713054180145264, "learning_rate": 1.183584726185623e-05, "loss": 1.9772, "step": 103845 }, { "epoch": 7.05598586764506, "grad_norm": 3.596679449081421, "learning_rate": 1.1831600760972959e-05, "loss": 1.909, "step": 103850 }, { "epoch": 7.0563255877157225, "grad_norm": 3.6701958179473877, "learning_rate": 1.1827354260089687e-05, "loss": 1.8132, "step": 103855 }, { "epoch": 7.056665307786384, "grad_norm": 4.702214241027832, "learning_rate": 1.1823107759206415e-05, "loss": 2.1148, "step": 103860 }, { "epoch": 7.057005027857046, "grad_norm": 3.249251365661621, "learning_rate": 1.1818861258323143e-05, "loss": 1.9114, "step": 103865 }, { "epoch": 7.057344747927708, "grad_norm": 4.881921291351318, "learning_rate": 1.181461475743987e-05, "loss": 1.9607, "step": 103870 }, { "epoch": 7.057684467998369, "grad_norm": 4.386089324951172, "learning_rate": 1.1810368256556597e-05, "loss": 1.8652, "step": 103875 }, { "epoch": 7.058024188069031, "grad_norm": 4.254125595092773, "learning_rate": 1.1806121755673327e-05, "loss": 1.5698, "step": 103880 }, { "epoch": 7.058363908139693, "grad_norm": 3.2988882064819336, "learning_rate": 1.1801875254790055e-05, "loss": 1.9316, "step": 103885 }, { "epoch": 7.058703628210354, "grad_norm": 3.827623128890991, "learning_rate": 1.1797628753906781e-05, "loss": 2.0881, "step": 103890 }, { "epoch": 7.059043348281016, "grad_norm": 2.9730844497680664, "learning_rate": 1.1793382253023509e-05, "loss": 2.0864, "step": 103895 }, { "epoch": 7.0593830683516785, "grad_norm": 3.9399003982543945, "learning_rate": 1.1789135752140237e-05, "loss": 2.0791, "step": 103900 }, { "epoch": 7.05972278842234, "grad_norm": 3.708841562271118, "learning_rate": 1.1784889251256965e-05, "loss": 2.2557, "step": 103905 }, { "epoch": 7.060062508493002, "grad_norm": 3.5307559967041016, "learning_rate": 1.1780642750373693e-05, "loss": 2.0982, "step": 103910 }, { "epoch": 7.060402228563664, "grad_norm": 3.6741695404052734, "learning_rate": 1.1776396249490421e-05, "loss": 1.7873, "step": 103915 }, { "epoch": 7.060741948634325, "grad_norm": 4.472443103790283, "learning_rate": 1.1772149748607149e-05, "loss": 1.8676, "step": 103920 }, { "epoch": 7.061081668704987, "grad_norm": 3.466315507888794, "learning_rate": 1.1767903247723875e-05, "loss": 1.907, "step": 103925 }, { "epoch": 7.061421388775649, "grad_norm": 4.268834114074707, "learning_rate": 1.1763656746840603e-05, "loss": 1.9756, "step": 103930 }, { "epoch": 7.06176110884631, "grad_norm": 3.512573719024658, "learning_rate": 1.1759410245957333e-05, "loss": 2.2378, "step": 103935 }, { "epoch": 7.062100828916972, "grad_norm": 3.8393707275390625, "learning_rate": 1.175516374507406e-05, "loss": 1.8498, "step": 103940 }, { "epoch": 7.0624405489876345, "grad_norm": 3.2202231884002686, "learning_rate": 1.1750917244190787e-05, "loss": 2.2517, "step": 103945 }, { "epoch": 7.062780269058296, "grad_norm": 3.584395408630371, "learning_rate": 1.1746670743307515e-05, "loss": 1.9587, "step": 103950 }, { "epoch": 7.063119989128958, "grad_norm": 3.511931896209717, "learning_rate": 1.1742424242424243e-05, "loss": 2.1687, "step": 103955 }, { "epoch": 7.06345970919962, "grad_norm": 3.135946035385132, "learning_rate": 1.1738177741540971e-05, "loss": 2.1054, "step": 103960 }, { "epoch": 7.063799429270281, "grad_norm": 4.283253192901611, "learning_rate": 1.17339312406577e-05, "loss": 2.2044, "step": 103965 }, { "epoch": 7.064139149340943, "grad_norm": 4.132749080657959, "learning_rate": 1.1729684739774427e-05, "loss": 2.2117, "step": 103970 }, { "epoch": 7.064478869411605, "grad_norm": 2.9949772357940674, "learning_rate": 1.1725438238891154e-05, "loss": 2.1452, "step": 103975 }, { "epoch": 7.064818589482266, "grad_norm": 3.9169771671295166, "learning_rate": 1.1721191738007882e-05, "loss": 1.9471, "step": 103980 }, { "epoch": 7.0651583095529285, "grad_norm": 3.847611427307129, "learning_rate": 1.171694523712461e-05, "loss": 1.8639, "step": 103985 }, { "epoch": 7.0654980296235905, "grad_norm": 3.7636101245880127, "learning_rate": 1.1712698736241338e-05, "loss": 1.7967, "step": 103990 }, { "epoch": 7.065837749694252, "grad_norm": 4.0699357986450195, "learning_rate": 1.1708452235358066e-05, "loss": 1.9796, "step": 103995 }, { "epoch": 7.066177469764914, "grad_norm": 4.324845314025879, "learning_rate": 1.1704205734474794e-05, "loss": 2.1416, "step": 104000 }, { "epoch": 7.066517189835576, "grad_norm": 3.174910545349121, "learning_rate": 1.1699959233591522e-05, "loss": 2.0344, "step": 104005 }, { "epoch": 7.066856909906237, "grad_norm": 3.8855249881744385, "learning_rate": 1.1695712732708248e-05, "loss": 2.0281, "step": 104010 }, { "epoch": 7.067196629976899, "grad_norm": 3.727062940597534, "learning_rate": 1.1691466231824976e-05, "loss": 1.9872, "step": 104015 }, { "epoch": 7.067536350047561, "grad_norm": 4.509471416473389, "learning_rate": 1.1687219730941706e-05, "loss": 1.9931, "step": 104020 }, { "epoch": 7.067876070118222, "grad_norm": 3.7035887241363525, "learning_rate": 1.1682973230058432e-05, "loss": 1.9589, "step": 104025 }, { "epoch": 7.0682157901888845, "grad_norm": 3.2454519271850586, "learning_rate": 1.167872672917516e-05, "loss": 2.0291, "step": 104030 }, { "epoch": 7.0685555102595465, "grad_norm": 3.3968682289123535, "learning_rate": 1.1674480228291888e-05, "loss": 2.0276, "step": 104035 }, { "epoch": 7.068895230330208, "grad_norm": 4.058896541595459, "learning_rate": 1.1670233727408616e-05, "loss": 2.1218, "step": 104040 }, { "epoch": 7.06923495040087, "grad_norm": 3.2137386798858643, "learning_rate": 1.1665987226525344e-05, "loss": 1.8861, "step": 104045 }, { "epoch": 7.069574670471532, "grad_norm": 3.3920600414276123, "learning_rate": 1.1661740725642072e-05, "loss": 2.0641, "step": 104050 }, { "epoch": 7.069914390542193, "grad_norm": 4.293676853179932, "learning_rate": 1.16574942247588e-05, "loss": 1.7851, "step": 104055 }, { "epoch": 7.070254110612855, "grad_norm": 3.35764741897583, "learning_rate": 1.1653247723875526e-05, "loss": 2.3429, "step": 104060 }, { "epoch": 7.070593830683517, "grad_norm": 2.792989492416382, "learning_rate": 1.1649001222992254e-05, "loss": 1.7393, "step": 104065 }, { "epoch": 7.070933550754178, "grad_norm": 3.788266658782959, "learning_rate": 1.1644754722108982e-05, "loss": 1.8551, "step": 104070 }, { "epoch": 7.0712732708248405, "grad_norm": 2.743178129196167, "learning_rate": 1.164050822122571e-05, "loss": 1.9282, "step": 104075 }, { "epoch": 7.0716129908955025, "grad_norm": 5.181650638580322, "learning_rate": 1.1636261720342438e-05, "loss": 1.9681, "step": 104080 }, { "epoch": 7.071952710966164, "grad_norm": 3.531153917312622, "learning_rate": 1.1632015219459166e-05, "loss": 2.0883, "step": 104085 }, { "epoch": 7.072292431036826, "grad_norm": 4.334290027618408, "learning_rate": 1.1627768718575894e-05, "loss": 2.0869, "step": 104090 }, { "epoch": 7.072632151107487, "grad_norm": 4.00590181350708, "learning_rate": 1.162352221769262e-05, "loss": 2.0723, "step": 104095 }, { "epoch": 7.072971871178149, "grad_norm": 4.393232822418213, "learning_rate": 1.161927571680935e-05, "loss": 1.8581, "step": 104100 }, { "epoch": 7.073311591248811, "grad_norm": 3.977426052093506, "learning_rate": 1.1615029215926078e-05, "loss": 1.9861, "step": 104105 }, { "epoch": 7.073651311319472, "grad_norm": 3.8485260009765625, "learning_rate": 1.1610782715042805e-05, "loss": 1.9519, "step": 104110 }, { "epoch": 7.073991031390134, "grad_norm": 3.6087570190429688, "learning_rate": 1.1606536214159533e-05, "loss": 2.1786, "step": 104115 }, { "epoch": 7.0743307514607965, "grad_norm": 3.748128652572632, "learning_rate": 1.160228971327626e-05, "loss": 1.9729, "step": 104120 }, { "epoch": 7.074670471531458, "grad_norm": 4.312533378601074, "learning_rate": 1.1598043212392989e-05, "loss": 1.8534, "step": 104125 }, { "epoch": 7.07501019160212, "grad_norm": 3.8196091651916504, "learning_rate": 1.1593796711509717e-05, "loss": 2.1259, "step": 104130 }, { "epoch": 7.075349911672782, "grad_norm": 3.853951930999756, "learning_rate": 1.1589550210626445e-05, "loss": 2.1814, "step": 104135 }, { "epoch": 7.075689631743443, "grad_norm": 3.0068135261535645, "learning_rate": 1.1585303709743173e-05, "loss": 1.9496, "step": 104140 }, { "epoch": 7.076029351814105, "grad_norm": 2.8763844966888428, "learning_rate": 1.1581057208859899e-05, "loss": 2.1479, "step": 104145 }, { "epoch": 7.076369071884767, "grad_norm": 3.6449358463287354, "learning_rate": 1.1576810707976627e-05, "loss": 1.9779, "step": 104150 }, { "epoch": 7.076708791955428, "grad_norm": 4.151993274688721, "learning_rate": 1.1572564207093357e-05, "loss": 2.0782, "step": 104155 }, { "epoch": 7.07704851202609, "grad_norm": 3.8282718658447266, "learning_rate": 1.1568317706210083e-05, "loss": 2.0899, "step": 104160 }, { "epoch": 7.0773882320967525, "grad_norm": 3.2422351837158203, "learning_rate": 1.1564071205326811e-05, "loss": 1.9538, "step": 104165 }, { "epoch": 7.077727952167414, "grad_norm": 4.053403854370117, "learning_rate": 1.155982470444354e-05, "loss": 1.8821, "step": 104170 }, { "epoch": 7.078067672238076, "grad_norm": 4.342482566833496, "learning_rate": 1.1555578203560267e-05, "loss": 2.0458, "step": 104175 }, { "epoch": 7.078407392308738, "grad_norm": 2.8806068897247314, "learning_rate": 1.1551331702676995e-05, "loss": 2.0545, "step": 104180 }, { "epoch": 7.078747112379399, "grad_norm": 3.657987594604492, "learning_rate": 1.1547085201793723e-05, "loss": 2.0634, "step": 104185 }, { "epoch": 7.079086832450061, "grad_norm": 4.178296089172363, "learning_rate": 1.1542838700910451e-05, "loss": 1.9813, "step": 104190 }, { "epoch": 7.079426552520723, "grad_norm": 3.9677927494049072, "learning_rate": 1.1538592200027178e-05, "loss": 1.978, "step": 104195 }, { "epoch": 7.079766272591384, "grad_norm": 3.5378971099853516, "learning_rate": 1.1534345699143906e-05, "loss": 2.0529, "step": 104200 }, { "epoch": 7.080105992662046, "grad_norm": 3.0244009494781494, "learning_rate": 1.1530099198260634e-05, "loss": 1.924, "step": 104205 }, { "epoch": 7.0804457127327085, "grad_norm": 4.9547929763793945, "learning_rate": 1.1525852697377362e-05, "loss": 2.063, "step": 104210 }, { "epoch": 7.08078543280337, "grad_norm": 3.7321126461029053, "learning_rate": 1.152160619649409e-05, "loss": 2.1812, "step": 104215 }, { "epoch": 7.081125152874032, "grad_norm": 3.8193106651306152, "learning_rate": 1.1517359695610818e-05, "loss": 2.0491, "step": 104220 }, { "epoch": 7.081464872944694, "grad_norm": 3.4982573986053467, "learning_rate": 1.1513113194727546e-05, "loss": 1.8232, "step": 104225 }, { "epoch": 7.081804593015355, "grad_norm": 4.28459358215332, "learning_rate": 1.1508866693844272e-05, "loss": 1.7317, "step": 104230 }, { "epoch": 7.082144313086017, "grad_norm": 4.208106994628906, "learning_rate": 1.1504620192961e-05, "loss": 1.6622, "step": 104235 }, { "epoch": 7.082484033156679, "grad_norm": 3.8083159923553467, "learning_rate": 1.150037369207773e-05, "loss": 1.9257, "step": 104240 }, { "epoch": 7.08282375322734, "grad_norm": 3.510817050933838, "learning_rate": 1.1496127191194456e-05, "loss": 2.0713, "step": 104245 }, { "epoch": 7.083163473298002, "grad_norm": 5.049505710601807, "learning_rate": 1.1491880690311184e-05, "loss": 1.9506, "step": 104250 }, { "epoch": 7.0835031933686645, "grad_norm": 3.5139973163604736, "learning_rate": 1.1487634189427912e-05, "loss": 1.9448, "step": 104255 }, { "epoch": 7.083842913439326, "grad_norm": 2.8846256732940674, "learning_rate": 1.148338768854464e-05, "loss": 2.1963, "step": 104260 }, { "epoch": 7.084182633509988, "grad_norm": 3.8720006942749023, "learning_rate": 1.1479141187661368e-05, "loss": 1.8838, "step": 104265 }, { "epoch": 7.08452235358065, "grad_norm": 2.6708850860595703, "learning_rate": 1.1474894686778096e-05, "loss": 1.8832, "step": 104270 }, { "epoch": 7.084862073651311, "grad_norm": 3.6334760189056396, "learning_rate": 1.1470648185894824e-05, "loss": 2.0103, "step": 104275 }, { "epoch": 7.085201793721973, "grad_norm": 3.3969671726226807, "learning_rate": 1.146640168501155e-05, "loss": 2.1102, "step": 104280 }, { "epoch": 7.085541513792635, "grad_norm": 3.2718825340270996, "learning_rate": 1.1462155184128278e-05, "loss": 1.8945, "step": 104285 }, { "epoch": 7.085881233863296, "grad_norm": 5.0750017166137695, "learning_rate": 1.1457908683245006e-05, "loss": 2.3241, "step": 104290 }, { "epoch": 7.0862209539339585, "grad_norm": 4.151271820068359, "learning_rate": 1.1453662182361736e-05, "loss": 2.1201, "step": 104295 }, { "epoch": 7.0865606740046205, "grad_norm": 4.002301216125488, "learning_rate": 1.1449415681478462e-05, "loss": 1.8443, "step": 104300 }, { "epoch": 7.086900394075282, "grad_norm": 3.314145803451538, "learning_rate": 1.144516918059519e-05, "loss": 1.9466, "step": 104305 }, { "epoch": 7.087240114145944, "grad_norm": 3.6392576694488525, "learning_rate": 1.1440922679711918e-05, "loss": 1.6521, "step": 104310 }, { "epoch": 7.087579834216606, "grad_norm": 3.6840734481811523, "learning_rate": 1.1436676178828645e-05, "loss": 1.946, "step": 104315 }, { "epoch": 7.087919554287267, "grad_norm": 3.823173999786377, "learning_rate": 1.1432429677945374e-05, "loss": 1.8761, "step": 104320 }, { "epoch": 7.088259274357929, "grad_norm": 4.318667888641357, "learning_rate": 1.1428183177062102e-05, "loss": 2.0607, "step": 104325 }, { "epoch": 7.088598994428591, "grad_norm": 3.1309731006622314, "learning_rate": 1.1423936676178829e-05, "loss": 1.9304, "step": 104330 }, { "epoch": 7.088938714499252, "grad_norm": 4.1196675300598145, "learning_rate": 1.1419690175295557e-05, "loss": 1.895, "step": 104335 }, { "epoch": 7.0892784345699145, "grad_norm": 3.272491455078125, "learning_rate": 1.1415443674412285e-05, "loss": 2.0313, "step": 104340 }, { "epoch": 7.0896181546405765, "grad_norm": 4.175080299377441, "learning_rate": 1.1411197173529013e-05, "loss": 1.8953, "step": 104345 }, { "epoch": 7.089957874711238, "grad_norm": 3.8479204177856445, "learning_rate": 1.140695067264574e-05, "loss": 1.6814, "step": 104350 }, { "epoch": 7.0902975947819, "grad_norm": 3.2598767280578613, "learning_rate": 1.1402704171762469e-05, "loss": 2.0666, "step": 104355 }, { "epoch": 7.090637314852562, "grad_norm": 4.495971202850342, "learning_rate": 1.1398457670879197e-05, "loss": 1.9135, "step": 104360 }, { "epoch": 7.090977034923223, "grad_norm": 2.9144372940063477, "learning_rate": 1.1394211169995923e-05, "loss": 1.9278, "step": 104365 }, { "epoch": 7.091316754993885, "grad_norm": 3.4098451137542725, "learning_rate": 1.1389964669112651e-05, "loss": 1.9933, "step": 104370 }, { "epoch": 7.091656475064547, "grad_norm": 3.2867941856384277, "learning_rate": 1.1385718168229379e-05, "loss": 2.1384, "step": 104375 }, { "epoch": 7.091996195135208, "grad_norm": 3.5049819946289062, "learning_rate": 1.1381471667346109e-05, "loss": 1.9926, "step": 104380 }, { "epoch": 7.0923359152058705, "grad_norm": 3.623997926712036, "learning_rate": 1.1377225166462835e-05, "loss": 1.7009, "step": 104385 }, { "epoch": 7.0926756352765326, "grad_norm": 3.8028759956359863, "learning_rate": 1.1372978665579563e-05, "loss": 2.0988, "step": 104390 }, { "epoch": 7.093015355347194, "grad_norm": 4.226990699768066, "learning_rate": 1.1368732164696291e-05, "loss": 2.0771, "step": 104395 }, { "epoch": 7.093355075417856, "grad_norm": 3.337263822555542, "learning_rate": 1.1364485663813017e-05, "loss": 1.9583, "step": 104400 }, { "epoch": 7.093694795488518, "grad_norm": 3.724515438079834, "learning_rate": 1.1360239162929747e-05, "loss": 1.7199, "step": 104405 }, { "epoch": 7.094034515559179, "grad_norm": 4.499773979187012, "learning_rate": 1.1355992662046475e-05, "loss": 2.0366, "step": 104410 }, { "epoch": 7.094374235629841, "grad_norm": 3.214524507522583, "learning_rate": 1.1351746161163201e-05, "loss": 1.8809, "step": 104415 }, { "epoch": 7.094713955700502, "grad_norm": 4.068847179412842, "learning_rate": 1.134749966027993e-05, "loss": 1.9709, "step": 104420 }, { "epoch": 7.095053675771164, "grad_norm": 3.216434955596924, "learning_rate": 1.1343253159396657e-05, "loss": 1.8067, "step": 104425 }, { "epoch": 7.0953933958418265, "grad_norm": 3.039281129837036, "learning_rate": 1.1339006658513385e-05, "loss": 1.9161, "step": 104430 }, { "epoch": 7.095733115912488, "grad_norm": 3.323406934738159, "learning_rate": 1.1334760157630113e-05, "loss": 2.0344, "step": 104435 }, { "epoch": 7.09607283598315, "grad_norm": 3.837022304534912, "learning_rate": 1.1330513656746841e-05, "loss": 2.0158, "step": 104440 }, { "epoch": 7.096412556053812, "grad_norm": 3.452954053878784, "learning_rate": 1.132626715586357e-05, "loss": 1.9147, "step": 104445 }, { "epoch": 7.096752276124473, "grad_norm": 4.5521697998046875, "learning_rate": 1.1322020654980296e-05, "loss": 2.0939, "step": 104450 }, { "epoch": 7.097091996195135, "grad_norm": 3.725146770477295, "learning_rate": 1.1317774154097024e-05, "loss": 1.9271, "step": 104455 }, { "epoch": 7.097431716265797, "grad_norm": 4.727158546447754, "learning_rate": 1.1313527653213753e-05, "loss": 1.8579, "step": 104460 }, { "epoch": 7.097771436336458, "grad_norm": 3.9740519523620605, "learning_rate": 1.1309281152330481e-05, "loss": 1.9512, "step": 104465 }, { "epoch": 7.09811115640712, "grad_norm": 2.9683895111083984, "learning_rate": 1.1305034651447208e-05, "loss": 1.6226, "step": 104470 }, { "epoch": 7.0984508764777825, "grad_norm": 3.3178694248199463, "learning_rate": 1.1300788150563936e-05, "loss": 1.8765, "step": 104475 }, { "epoch": 7.098790596548444, "grad_norm": 3.377286434173584, "learning_rate": 1.1296541649680664e-05, "loss": 2.0494, "step": 104480 }, { "epoch": 7.099130316619106, "grad_norm": 3.306629180908203, "learning_rate": 1.1292295148797392e-05, "loss": 1.7589, "step": 104485 }, { "epoch": 7.099470036689768, "grad_norm": 3.568629026412964, "learning_rate": 1.128804864791412e-05, "loss": 2.2036, "step": 104490 }, { "epoch": 7.099809756760429, "grad_norm": 3.8737411499023438, "learning_rate": 1.1283802147030848e-05, "loss": 1.7857, "step": 104495 }, { "epoch": 7.100149476831091, "grad_norm": 4.086543560028076, "learning_rate": 1.1279555646147574e-05, "loss": 1.9746, "step": 104500 }, { "epoch": 7.100489196901753, "grad_norm": 3.7964518070220947, "learning_rate": 1.1275309145264302e-05, "loss": 1.9826, "step": 104505 }, { "epoch": 7.100828916972414, "grad_norm": 3.6771600246429443, "learning_rate": 1.127106264438103e-05, "loss": 2.2413, "step": 104510 }, { "epoch": 7.101168637043076, "grad_norm": 3.6309919357299805, "learning_rate": 1.126681614349776e-05, "loss": 2.103, "step": 104515 }, { "epoch": 7.1015083571137385, "grad_norm": 4.283088684082031, "learning_rate": 1.1262569642614486e-05, "loss": 1.9304, "step": 104520 }, { "epoch": 7.1018480771844, "grad_norm": 3.1272318363189697, "learning_rate": 1.1258323141731214e-05, "loss": 1.8002, "step": 104525 }, { "epoch": 7.102187797255062, "grad_norm": 3.7607638835906982, "learning_rate": 1.1254076640847942e-05, "loss": 1.8983, "step": 104530 }, { "epoch": 7.102527517325724, "grad_norm": 3.5998830795288086, "learning_rate": 1.1249830139964668e-05, "loss": 1.9772, "step": 104535 }, { "epoch": 7.102867237396385, "grad_norm": 3.502539873123169, "learning_rate": 1.1245583639081398e-05, "loss": 1.9343, "step": 104540 }, { "epoch": 7.103206957467047, "grad_norm": 3.9090561866760254, "learning_rate": 1.1241337138198126e-05, "loss": 2.1013, "step": 104545 }, { "epoch": 7.103546677537709, "grad_norm": 3.83420729637146, "learning_rate": 1.1237090637314854e-05, "loss": 1.9725, "step": 104550 }, { "epoch": 7.10388639760837, "grad_norm": 3.493544340133667, "learning_rate": 1.123284413643158e-05, "loss": 1.8201, "step": 104555 }, { "epoch": 7.1042261176790324, "grad_norm": 4.056188583374023, "learning_rate": 1.1228597635548309e-05, "loss": 2.0083, "step": 104560 }, { "epoch": 7.1045658377496945, "grad_norm": 4.226642608642578, "learning_rate": 1.1224351134665037e-05, "loss": 1.9077, "step": 104565 }, { "epoch": 7.104905557820356, "grad_norm": 3.3014156818389893, "learning_rate": 1.1220104633781765e-05, "loss": 1.7286, "step": 104570 }, { "epoch": 7.105245277891018, "grad_norm": 3.927320957183838, "learning_rate": 1.1215858132898493e-05, "loss": 1.9581, "step": 104575 }, { "epoch": 7.10558499796168, "grad_norm": 4.571300029754639, "learning_rate": 1.121161163201522e-05, "loss": 1.9174, "step": 104580 }, { "epoch": 7.105924718032341, "grad_norm": 4.83978271484375, "learning_rate": 1.1207365131131947e-05, "loss": 2.0079, "step": 104585 }, { "epoch": 7.106264438103003, "grad_norm": 3.9904377460479736, "learning_rate": 1.1203118630248675e-05, "loss": 2.1548, "step": 104590 }, { "epoch": 7.106604158173665, "grad_norm": 3.745145559310913, "learning_rate": 1.1198872129365403e-05, "loss": 1.7201, "step": 104595 }, { "epoch": 7.106943878244326, "grad_norm": 4.273308753967285, "learning_rate": 1.1194625628482133e-05, "loss": 2.0538, "step": 104600 }, { "epoch": 7.1072835983149885, "grad_norm": 4.404043674468994, "learning_rate": 1.1190379127598859e-05, "loss": 2.0038, "step": 104605 }, { "epoch": 7.1076233183856505, "grad_norm": 3.0077931880950928, "learning_rate": 1.1186132626715587e-05, "loss": 1.695, "step": 104610 }, { "epoch": 7.107963038456312, "grad_norm": 3.0690691471099854, "learning_rate": 1.1181886125832315e-05, "loss": 1.8315, "step": 104615 }, { "epoch": 7.108302758526974, "grad_norm": 4.036259651184082, "learning_rate": 1.1177639624949041e-05, "loss": 1.9579, "step": 104620 }, { "epoch": 7.108642478597636, "grad_norm": 3.6963706016540527, "learning_rate": 1.1173393124065771e-05, "loss": 1.6629, "step": 104625 }, { "epoch": 7.108982198668297, "grad_norm": 3.844594717025757, "learning_rate": 1.1169146623182499e-05, "loss": 1.8946, "step": 104630 }, { "epoch": 7.109321918738959, "grad_norm": 4.455342769622803, "learning_rate": 1.1164900122299227e-05, "loss": 1.9164, "step": 104635 }, { "epoch": 7.109661638809621, "grad_norm": 3.648083209991455, "learning_rate": 1.1160653621415953e-05, "loss": 1.9806, "step": 104640 }, { "epoch": 7.110001358880282, "grad_norm": 4.0535078048706055, "learning_rate": 1.1156407120532681e-05, "loss": 1.8785, "step": 104645 }, { "epoch": 7.1103410789509445, "grad_norm": 3.6665730476379395, "learning_rate": 1.115216061964941e-05, "loss": 1.9353, "step": 104650 }, { "epoch": 7.1106807990216065, "grad_norm": 4.1203436851501465, "learning_rate": 1.1147914118766137e-05, "loss": 1.7604, "step": 104655 }, { "epoch": 7.111020519092268, "grad_norm": 3.726121425628662, "learning_rate": 1.1143667617882865e-05, "loss": 2.0906, "step": 104660 }, { "epoch": 7.11136023916293, "grad_norm": 4.340296268463135, "learning_rate": 1.1139421116999593e-05, "loss": 1.8173, "step": 104665 }, { "epoch": 7.111699959233592, "grad_norm": 3.3523993492126465, "learning_rate": 1.113517461611632e-05, "loss": 1.9921, "step": 104670 }, { "epoch": 7.112039679304253, "grad_norm": 4.937530040740967, "learning_rate": 1.1130928115233048e-05, "loss": 2.0751, "step": 104675 }, { "epoch": 7.112379399374915, "grad_norm": 4.080369472503662, "learning_rate": 1.1126681614349777e-05, "loss": 1.9642, "step": 104680 }, { "epoch": 7.112719119445577, "grad_norm": 3.972503900527954, "learning_rate": 1.1122435113466505e-05, "loss": 1.9882, "step": 104685 }, { "epoch": 7.113058839516238, "grad_norm": 3.817805051803589, "learning_rate": 1.1118188612583232e-05, "loss": 2.0042, "step": 104690 }, { "epoch": 7.1133985595869005, "grad_norm": 3.3514115810394287, "learning_rate": 1.111394211169996e-05, "loss": 1.8971, "step": 104695 }, { "epoch": 7.113738279657563, "grad_norm": 4.0107102394104, "learning_rate": 1.1109695610816688e-05, "loss": 2.0667, "step": 104700 }, { "epoch": 7.114077999728224, "grad_norm": 3.2729814052581787, "learning_rate": 1.1105449109933416e-05, "loss": 2.108, "step": 104705 }, { "epoch": 7.114417719798886, "grad_norm": 3.9751205444335938, "learning_rate": 1.1101202609050144e-05, "loss": 1.9833, "step": 104710 }, { "epoch": 7.114757439869548, "grad_norm": 3.3911118507385254, "learning_rate": 1.1096956108166872e-05, "loss": 2.1351, "step": 104715 }, { "epoch": 7.115097159940209, "grad_norm": 3.768371343612671, "learning_rate": 1.10927096072836e-05, "loss": 2.0652, "step": 104720 }, { "epoch": 7.115436880010871, "grad_norm": 4.015778064727783, "learning_rate": 1.1088463106400326e-05, "loss": 1.5991, "step": 104725 }, { "epoch": 7.115776600081533, "grad_norm": 3.6492490768432617, "learning_rate": 1.1084216605517054e-05, "loss": 2.2322, "step": 104730 }, { "epoch": 7.116116320152194, "grad_norm": 3.696193218231201, "learning_rate": 1.1079970104633782e-05, "loss": 1.8305, "step": 104735 }, { "epoch": 7.1164560402228565, "grad_norm": 3.490478277206421, "learning_rate": 1.107572360375051e-05, "loss": 1.9696, "step": 104740 }, { "epoch": 7.116795760293519, "grad_norm": 5.222921848297119, "learning_rate": 1.1071477102867238e-05, "loss": 1.8981, "step": 104745 }, { "epoch": 7.11713548036418, "grad_norm": 4.040754318237305, "learning_rate": 1.1067230601983966e-05, "loss": 1.7833, "step": 104750 }, { "epoch": 7.117475200434842, "grad_norm": 3.502760410308838, "learning_rate": 1.1062984101100692e-05, "loss": 2.0601, "step": 104755 }, { "epoch": 7.117814920505504, "grad_norm": 3.663613796234131, "learning_rate": 1.105873760021742e-05, "loss": 1.845, "step": 104760 }, { "epoch": 7.118154640576165, "grad_norm": 2.8535239696502686, "learning_rate": 1.105449109933415e-05, "loss": 1.7895, "step": 104765 }, { "epoch": 7.118494360646827, "grad_norm": 3.425920248031616, "learning_rate": 1.1050244598450878e-05, "loss": 2.1735, "step": 104770 }, { "epoch": 7.118834080717488, "grad_norm": 3.084678888320923, "learning_rate": 1.1045998097567604e-05, "loss": 2.0755, "step": 104775 }, { "epoch": 7.11917380078815, "grad_norm": 3.7876834869384766, "learning_rate": 1.1041751596684332e-05, "loss": 1.9782, "step": 104780 }, { "epoch": 7.1195135208588125, "grad_norm": 3.0223231315612793, "learning_rate": 1.103750509580106e-05, "loss": 2.0837, "step": 104785 }, { "epoch": 7.119853240929474, "grad_norm": 3.147510290145874, "learning_rate": 1.1033258594917788e-05, "loss": 1.9073, "step": 104790 }, { "epoch": 7.120192961000136, "grad_norm": 3.701467275619507, "learning_rate": 1.1029012094034516e-05, "loss": 2.0181, "step": 104795 }, { "epoch": 7.120532681070798, "grad_norm": 4.760350227355957, "learning_rate": 1.1024765593151244e-05, "loss": 1.9531, "step": 104800 }, { "epoch": 7.120872401141459, "grad_norm": 2.9030165672302246, "learning_rate": 1.1020519092267972e-05, "loss": 2.2234, "step": 104805 }, { "epoch": 7.121212121212121, "grad_norm": 3.8371901512145996, "learning_rate": 1.1016272591384699e-05, "loss": 1.8906, "step": 104810 }, { "epoch": 7.121551841282783, "grad_norm": 3.8863470554351807, "learning_rate": 1.1012026090501427e-05, "loss": 2.1824, "step": 104815 }, { "epoch": 7.121891561353444, "grad_norm": 3.975740432739258, "learning_rate": 1.1007779589618156e-05, "loss": 1.9012, "step": 104820 }, { "epoch": 7.122231281424106, "grad_norm": 3.3779921531677246, "learning_rate": 1.1003533088734883e-05, "loss": 1.934, "step": 104825 }, { "epoch": 7.1225710014947685, "grad_norm": 3.7413034439086914, "learning_rate": 1.099928658785161e-05, "loss": 2.0569, "step": 104830 }, { "epoch": 7.12291072156543, "grad_norm": 3.537297010421753, "learning_rate": 1.0995040086968339e-05, "loss": 2.1978, "step": 104835 }, { "epoch": 7.123250441636092, "grad_norm": 3.5407896041870117, "learning_rate": 1.0990793586085065e-05, "loss": 1.8712, "step": 104840 }, { "epoch": 7.123590161706754, "grad_norm": 3.9190571308135986, "learning_rate": 1.0986547085201795e-05, "loss": 1.9328, "step": 104845 }, { "epoch": 7.123929881777415, "grad_norm": 3.14304518699646, "learning_rate": 1.0982300584318523e-05, "loss": 2.08, "step": 104850 }, { "epoch": 7.124269601848077, "grad_norm": 3.120657444000244, "learning_rate": 1.097805408343525e-05, "loss": 1.9603, "step": 104855 }, { "epoch": 7.124609321918739, "grad_norm": 4.034371376037598, "learning_rate": 1.0973807582551977e-05, "loss": 1.9618, "step": 104860 }, { "epoch": 7.1249490419894, "grad_norm": 4.071715354919434, "learning_rate": 1.0969561081668705e-05, "loss": 2.1023, "step": 104865 }, { "epoch": 7.1252887620600625, "grad_norm": 3.409090280532837, "learning_rate": 1.0965314580785433e-05, "loss": 1.887, "step": 104870 }, { "epoch": 7.1256284821307245, "grad_norm": 3.789135456085205, "learning_rate": 1.0961068079902161e-05, "loss": 1.8985, "step": 104875 }, { "epoch": 7.125968202201386, "grad_norm": 3.797334671020508, "learning_rate": 1.0956821579018889e-05, "loss": 1.9843, "step": 104880 }, { "epoch": 7.126307922272048, "grad_norm": 3.691777229309082, "learning_rate": 1.0952575078135617e-05, "loss": 1.8449, "step": 104885 }, { "epoch": 7.12664764234271, "grad_norm": 3.649890899658203, "learning_rate": 1.0948328577252345e-05, "loss": 2.132, "step": 104890 }, { "epoch": 7.126987362413371, "grad_norm": 2.7836575508117676, "learning_rate": 1.0944082076369071e-05, "loss": 1.7582, "step": 104895 }, { "epoch": 7.127327082484033, "grad_norm": 3.6921603679656982, "learning_rate": 1.0939835575485801e-05, "loss": 2.0681, "step": 104900 }, { "epoch": 7.127666802554695, "grad_norm": 3.7031543254852295, "learning_rate": 1.093558907460253e-05, "loss": 2.0672, "step": 104905 }, { "epoch": 7.128006522625356, "grad_norm": 2.697421073913574, "learning_rate": 1.0931342573719256e-05, "loss": 2.0181, "step": 104910 }, { "epoch": 7.1283462426960185, "grad_norm": 4.669243335723877, "learning_rate": 1.0927096072835984e-05, "loss": 2.0718, "step": 104915 }, { "epoch": 7.1286859627666805, "grad_norm": 4.827165603637695, "learning_rate": 1.0922849571952712e-05, "loss": 2.3154, "step": 104920 }, { "epoch": 7.129025682837342, "grad_norm": 4.043737411499023, "learning_rate": 1.091860307106944e-05, "loss": 2.1455, "step": 104925 }, { "epoch": 7.129365402908004, "grad_norm": 3.118236780166626, "learning_rate": 1.0914356570186168e-05, "loss": 1.8045, "step": 104930 }, { "epoch": 7.129705122978666, "grad_norm": 4.101818561553955, "learning_rate": 1.0910110069302896e-05, "loss": 1.9868, "step": 104935 }, { "epoch": 7.130044843049327, "grad_norm": 3.0976204872131348, "learning_rate": 1.0905863568419624e-05, "loss": 1.97, "step": 104940 }, { "epoch": 7.130384563119989, "grad_norm": 3.917780637741089, "learning_rate": 1.090161706753635e-05, "loss": 2.1474, "step": 104945 }, { "epoch": 7.130724283190651, "grad_norm": 4.385841369628906, "learning_rate": 1.0897370566653078e-05, "loss": 1.9199, "step": 104950 }, { "epoch": 7.131064003261312, "grad_norm": 3.761263132095337, "learning_rate": 1.0893124065769806e-05, "loss": 2.0829, "step": 104955 }, { "epoch": 7.1314037233319745, "grad_norm": 3.5977067947387695, "learning_rate": 1.0888877564886534e-05, "loss": 1.9486, "step": 104960 }, { "epoch": 7.1317434434026366, "grad_norm": 3.7641003131866455, "learning_rate": 1.0884631064003262e-05, "loss": 2.0264, "step": 104965 }, { "epoch": 7.132083163473298, "grad_norm": 2.943958044052124, "learning_rate": 1.088038456311999e-05, "loss": 2.1624, "step": 104970 }, { "epoch": 7.13242288354396, "grad_norm": 3.815967559814453, "learning_rate": 1.0876138062236716e-05, "loss": 2.1148, "step": 104975 }, { "epoch": 7.132762603614622, "grad_norm": 2.9260599613189697, "learning_rate": 1.0871891561353444e-05, "loss": 1.9503, "step": 104980 }, { "epoch": 7.133102323685283, "grad_norm": 3.3331048488616943, "learning_rate": 1.0867645060470174e-05, "loss": 1.8641, "step": 104985 }, { "epoch": 7.133442043755945, "grad_norm": 3.498776912689209, "learning_rate": 1.0863398559586902e-05, "loss": 1.9886, "step": 104990 }, { "epoch": 7.133781763826607, "grad_norm": 3.142777442932129, "learning_rate": 1.0859152058703628e-05, "loss": 2.0757, "step": 104995 }, { "epoch": 7.134121483897268, "grad_norm": 3.975564956665039, "learning_rate": 1.0854905557820356e-05, "loss": 1.9144, "step": 105000 }, { "epoch": 7.1344612039679305, "grad_norm": 3.1051764488220215, "learning_rate": 1.0850659056937084e-05, "loss": 2.0106, "step": 105005 }, { "epoch": 7.134800924038593, "grad_norm": 3.293802261352539, "learning_rate": 1.0846412556053812e-05, "loss": 1.8333, "step": 105010 }, { "epoch": 7.135140644109254, "grad_norm": 2.6898481845855713, "learning_rate": 1.084216605517054e-05, "loss": 1.8926, "step": 105015 }, { "epoch": 7.135480364179916, "grad_norm": 3.949549913406372, "learning_rate": 1.0837919554287268e-05, "loss": 1.7711, "step": 105020 }, { "epoch": 7.135820084250578, "grad_norm": 4.148542881011963, "learning_rate": 1.0833673053403996e-05, "loss": 2.058, "step": 105025 }, { "epoch": 7.136159804321239, "grad_norm": 4.200448513031006, "learning_rate": 1.0829426552520723e-05, "loss": 2.1654, "step": 105030 }, { "epoch": 7.136499524391901, "grad_norm": 2.937702178955078, "learning_rate": 1.082518005163745e-05, "loss": 1.8462, "step": 105035 }, { "epoch": 7.136839244462563, "grad_norm": 4.442290782928467, "learning_rate": 1.082093355075418e-05, "loss": 2.035, "step": 105040 }, { "epoch": 7.137178964533224, "grad_norm": 2.841810464859009, "learning_rate": 1.0816687049870907e-05, "loss": 2.1186, "step": 105045 }, { "epoch": 7.1375186846038865, "grad_norm": 3.5677082538604736, "learning_rate": 1.0812440548987635e-05, "loss": 2.2635, "step": 105050 }, { "epoch": 7.137858404674549, "grad_norm": 4.530101299285889, "learning_rate": 1.0808194048104363e-05, "loss": 1.8339, "step": 105055 }, { "epoch": 7.13819812474521, "grad_norm": 3.6352286338806152, "learning_rate": 1.0803947547221089e-05, "loss": 2.0066, "step": 105060 }, { "epoch": 7.138537844815872, "grad_norm": 4.194334983825684, "learning_rate": 1.0799701046337819e-05, "loss": 1.8464, "step": 105065 }, { "epoch": 7.138877564886534, "grad_norm": 5.59152889251709, "learning_rate": 1.0795454545454547e-05, "loss": 1.8819, "step": 105070 }, { "epoch": 7.139217284957195, "grad_norm": 4.625219821929932, "learning_rate": 1.0791208044571275e-05, "loss": 2.0253, "step": 105075 }, { "epoch": 7.139557005027857, "grad_norm": 3.534224510192871, "learning_rate": 1.0786961543688001e-05, "loss": 1.7436, "step": 105080 }, { "epoch": 7.139896725098519, "grad_norm": 4.856980323791504, "learning_rate": 1.0782715042804729e-05, "loss": 1.7889, "step": 105085 }, { "epoch": 7.14023644516918, "grad_norm": 3.6585471630096436, "learning_rate": 1.0778468541921457e-05, "loss": 1.7839, "step": 105090 }, { "epoch": 7.1405761652398425, "grad_norm": 3.792644739151001, "learning_rate": 1.0774222041038185e-05, "loss": 1.5913, "step": 105095 }, { "epoch": 7.140915885310504, "grad_norm": 3.7968761920928955, "learning_rate": 1.0769975540154913e-05, "loss": 2.3375, "step": 105100 }, { "epoch": 7.141255605381166, "grad_norm": 4.257162094116211, "learning_rate": 1.0765729039271641e-05, "loss": 2.0221, "step": 105105 }, { "epoch": 7.141595325451828, "grad_norm": 3.566432237625122, "learning_rate": 1.0761482538388369e-05, "loss": 2.1172, "step": 105110 }, { "epoch": 7.141935045522489, "grad_norm": 3.647578477859497, "learning_rate": 1.0757236037505095e-05, "loss": 1.7826, "step": 105115 }, { "epoch": 7.142274765593151, "grad_norm": 3.117818593978882, "learning_rate": 1.0752989536621823e-05, "loss": 1.7391, "step": 105120 }, { "epoch": 7.142614485663813, "grad_norm": 3.144440174102783, "learning_rate": 1.0748743035738553e-05, "loss": 2.0084, "step": 105125 }, { "epoch": 7.142954205734474, "grad_norm": 3.1174306869506836, "learning_rate": 1.074449653485528e-05, "loss": 1.8887, "step": 105130 }, { "epoch": 7.1432939258051364, "grad_norm": 3.7718687057495117, "learning_rate": 1.0740250033972007e-05, "loss": 1.8705, "step": 105135 }, { "epoch": 7.1436336458757985, "grad_norm": 4.487651824951172, "learning_rate": 1.0736003533088735e-05, "loss": 2.0544, "step": 105140 }, { "epoch": 7.14397336594646, "grad_norm": 4.541320323944092, "learning_rate": 1.0731757032205462e-05, "loss": 1.6221, "step": 105145 }, { "epoch": 7.144313086017122, "grad_norm": 3.4260127544403076, "learning_rate": 1.0727510531322191e-05, "loss": 1.993, "step": 105150 }, { "epoch": 7.144652806087784, "grad_norm": 3.518320083618164, "learning_rate": 1.072326403043892e-05, "loss": 2.2296, "step": 105155 }, { "epoch": 7.144992526158445, "grad_norm": 3.321244478225708, "learning_rate": 1.0719017529555647e-05, "loss": 2.0597, "step": 105160 }, { "epoch": 7.145332246229107, "grad_norm": 3.4032864570617676, "learning_rate": 1.0714771028672374e-05, "loss": 2.068, "step": 105165 }, { "epoch": 7.145671966299769, "grad_norm": 2.956822633743286, "learning_rate": 1.0710524527789102e-05, "loss": 2.1663, "step": 105170 }, { "epoch": 7.14601168637043, "grad_norm": 3.6870572566986084, "learning_rate": 1.070627802690583e-05, "loss": 1.959, "step": 105175 }, { "epoch": 7.1463514064410925, "grad_norm": 4.445120334625244, "learning_rate": 1.0702031526022558e-05, "loss": 1.7539, "step": 105180 }, { "epoch": 7.1466911265117545, "grad_norm": 3.6554319858551025, "learning_rate": 1.0697785025139286e-05, "loss": 1.9936, "step": 105185 }, { "epoch": 7.147030846582416, "grad_norm": 3.4707210063934326, "learning_rate": 1.0693538524256014e-05, "loss": 2.0509, "step": 105190 }, { "epoch": 7.147370566653078, "grad_norm": 5.639159202575684, "learning_rate": 1.0689292023372742e-05, "loss": 2.0436, "step": 105195 }, { "epoch": 7.14771028672374, "grad_norm": 4.050167083740234, "learning_rate": 1.0685045522489468e-05, "loss": 1.969, "step": 105200 }, { "epoch": 7.148050006794401, "grad_norm": 3.3221688270568848, "learning_rate": 1.0680799021606198e-05, "loss": 1.8519, "step": 105205 }, { "epoch": 7.148389726865063, "grad_norm": 3.744703769683838, "learning_rate": 1.0676552520722926e-05, "loss": 1.9214, "step": 105210 }, { "epoch": 7.148729446935725, "grad_norm": 3.635343551635742, "learning_rate": 1.0672306019839652e-05, "loss": 1.6938, "step": 105215 }, { "epoch": 7.149069167006386, "grad_norm": 3.7120208740234375, "learning_rate": 1.066805951895638e-05, "loss": 1.8241, "step": 105220 }, { "epoch": 7.1494088870770485, "grad_norm": 3.947911500930786, "learning_rate": 1.0663813018073108e-05, "loss": 2.1294, "step": 105225 }, { "epoch": 7.1497486071477105, "grad_norm": 4.453048229217529, "learning_rate": 1.0659566517189836e-05, "loss": 1.9374, "step": 105230 }, { "epoch": 7.150088327218372, "grad_norm": 4.305448532104492, "learning_rate": 1.0655320016306564e-05, "loss": 1.9568, "step": 105235 }, { "epoch": 7.150428047289034, "grad_norm": 3.6820194721221924, "learning_rate": 1.0651073515423292e-05, "loss": 1.7262, "step": 105240 }, { "epoch": 7.150767767359696, "grad_norm": 3.8065099716186523, "learning_rate": 1.064682701454002e-05, "loss": 2.2017, "step": 105245 }, { "epoch": 7.151107487430357, "grad_norm": 3.0134758949279785, "learning_rate": 1.0642580513656746e-05, "loss": 1.9919, "step": 105250 }, { "epoch": 7.151447207501019, "grad_norm": 3.3177781105041504, "learning_rate": 1.0638334012773474e-05, "loss": 2.2171, "step": 105255 }, { "epoch": 7.151786927571681, "grad_norm": 4.140012741088867, "learning_rate": 1.0634087511890204e-05, "loss": 1.9855, "step": 105260 }, { "epoch": 7.152126647642342, "grad_norm": 4.004701137542725, "learning_rate": 1.062984101100693e-05, "loss": 1.8441, "step": 105265 }, { "epoch": 7.1524663677130045, "grad_norm": 2.845061779022217, "learning_rate": 1.0625594510123659e-05, "loss": 2.1446, "step": 105270 }, { "epoch": 7.1528060877836666, "grad_norm": 3.9442670345306396, "learning_rate": 1.0621348009240387e-05, "loss": 1.9726, "step": 105275 }, { "epoch": 7.153145807854328, "grad_norm": 3.537649393081665, "learning_rate": 1.0617101508357115e-05, "loss": 1.8736, "step": 105280 }, { "epoch": 7.15348552792499, "grad_norm": 4.075184345245361, "learning_rate": 1.0612855007473843e-05, "loss": 1.8972, "step": 105285 }, { "epoch": 7.153825247995652, "grad_norm": 4.083667278289795, "learning_rate": 1.060860850659057e-05, "loss": 2.2145, "step": 105290 }, { "epoch": 7.154164968066313, "grad_norm": 4.208242416381836, "learning_rate": 1.0604362005707299e-05, "loss": 2.2453, "step": 105295 }, { "epoch": 7.154504688136975, "grad_norm": 3.319274425506592, "learning_rate": 1.0600115504824025e-05, "loss": 2.138, "step": 105300 }, { "epoch": 7.154844408207637, "grad_norm": 3.3590335845947266, "learning_rate": 1.0595869003940753e-05, "loss": 2.1702, "step": 105305 }, { "epoch": 7.155184128278298, "grad_norm": 4.124411106109619, "learning_rate": 1.0591622503057481e-05, "loss": 2.0458, "step": 105310 }, { "epoch": 7.1555238483489605, "grad_norm": 3.280207633972168, "learning_rate": 1.0587376002174209e-05, "loss": 1.8707, "step": 105315 }, { "epoch": 7.155863568419623, "grad_norm": 3.8004324436187744, "learning_rate": 1.0583129501290937e-05, "loss": 2.0468, "step": 105320 }, { "epoch": 7.156203288490284, "grad_norm": 3.3580422401428223, "learning_rate": 1.0578883000407665e-05, "loss": 1.9623, "step": 105325 }, { "epoch": 7.156543008560946, "grad_norm": 3.7886600494384766, "learning_rate": 1.0574636499524393e-05, "loss": 1.7409, "step": 105330 }, { "epoch": 7.156882728631608, "grad_norm": 3.2692620754241943, "learning_rate": 1.057038999864112e-05, "loss": 1.9359, "step": 105335 }, { "epoch": 7.157222448702269, "grad_norm": 3.762392044067383, "learning_rate": 1.0566143497757847e-05, "loss": 2.0352, "step": 105340 }, { "epoch": 7.157562168772931, "grad_norm": 3.073988676071167, "learning_rate": 1.0561896996874577e-05, "loss": 1.8872, "step": 105345 }, { "epoch": 7.157901888843593, "grad_norm": 3.5235397815704346, "learning_rate": 1.0557650495991303e-05, "loss": 2.1427, "step": 105350 }, { "epoch": 7.158241608914254, "grad_norm": 4.6187591552734375, "learning_rate": 1.0553403995108031e-05, "loss": 2.079, "step": 105355 }, { "epoch": 7.1585813289849165, "grad_norm": 3.695725440979004, "learning_rate": 1.054915749422476e-05, "loss": 1.9982, "step": 105360 }, { "epoch": 7.158921049055579, "grad_norm": 3.365518093109131, "learning_rate": 1.0544910993341487e-05, "loss": 2.0737, "step": 105365 }, { "epoch": 7.15926076912624, "grad_norm": 3.896507501602173, "learning_rate": 1.0540664492458215e-05, "loss": 1.8176, "step": 105370 }, { "epoch": 7.159600489196902, "grad_norm": 3.579451560974121, "learning_rate": 1.0536417991574943e-05, "loss": 2.0535, "step": 105375 }, { "epoch": 7.159940209267564, "grad_norm": 3.419719934463501, "learning_rate": 1.0532171490691671e-05, "loss": 2.0751, "step": 105380 }, { "epoch": 7.160279929338225, "grad_norm": 4.228374481201172, "learning_rate": 1.0527924989808398e-05, "loss": 2.0884, "step": 105385 }, { "epoch": 7.160619649408887, "grad_norm": 3.622225284576416, "learning_rate": 1.0523678488925126e-05, "loss": 2.1007, "step": 105390 }, { "epoch": 7.160959369479549, "grad_norm": 3.067723274230957, "learning_rate": 1.0519431988041854e-05, "loss": 1.8675, "step": 105395 }, { "epoch": 7.16129908955021, "grad_norm": 2.556196928024292, "learning_rate": 1.0515185487158582e-05, "loss": 1.8756, "step": 105400 }, { "epoch": 7.1616388096208725, "grad_norm": 3.302694320678711, "learning_rate": 1.051093898627531e-05, "loss": 1.7385, "step": 105405 }, { "epoch": 7.161978529691535, "grad_norm": 4.663152694702148, "learning_rate": 1.0506692485392038e-05, "loss": 2.0073, "step": 105410 }, { "epoch": 7.162318249762196, "grad_norm": 4.193580150604248, "learning_rate": 1.0502445984508766e-05, "loss": 1.9449, "step": 105415 }, { "epoch": 7.162657969832858, "grad_norm": 3.450955390930176, "learning_rate": 1.0498199483625492e-05, "loss": 1.8471, "step": 105420 }, { "epoch": 7.16299768990352, "grad_norm": 5.310014724731445, "learning_rate": 1.0493952982742222e-05, "loss": 1.8985, "step": 105425 }, { "epoch": 7.163337409974181, "grad_norm": 4.048931121826172, "learning_rate": 1.048970648185895e-05, "loss": 1.873, "step": 105430 }, { "epoch": 7.163677130044843, "grad_norm": 3.4775521755218506, "learning_rate": 1.0485459980975676e-05, "loss": 1.6891, "step": 105435 }, { "epoch": 7.164016850115505, "grad_norm": 2.998260021209717, "learning_rate": 1.0481213480092404e-05, "loss": 1.9735, "step": 105440 }, { "epoch": 7.1643565701861665, "grad_norm": 3.364670753479004, "learning_rate": 1.0476966979209132e-05, "loss": 2.1796, "step": 105445 }, { "epoch": 7.1646962902568285, "grad_norm": 3.8461740016937256, "learning_rate": 1.047272047832586e-05, "loss": 2.0141, "step": 105450 }, { "epoch": 7.165036010327491, "grad_norm": 3.7317662239074707, "learning_rate": 1.0468473977442588e-05, "loss": 1.8876, "step": 105455 }, { "epoch": 7.165375730398152, "grad_norm": 3.7639451026916504, "learning_rate": 1.0464227476559316e-05, "loss": 2.0678, "step": 105460 }, { "epoch": 7.165715450468814, "grad_norm": 2.473464250564575, "learning_rate": 1.0459980975676044e-05, "loss": 1.6603, "step": 105465 }, { "epoch": 7.166055170539475, "grad_norm": 4.642591953277588, "learning_rate": 1.045573447479277e-05, "loss": 1.8979, "step": 105470 }, { "epoch": 7.166394890610137, "grad_norm": 4.482815742492676, "learning_rate": 1.0451487973909498e-05, "loss": 2.0544, "step": 105475 }, { "epoch": 7.166734610680799, "grad_norm": 3.931058645248413, "learning_rate": 1.0447241473026226e-05, "loss": 1.7967, "step": 105480 }, { "epoch": 7.16707433075146, "grad_norm": 3.339738130569458, "learning_rate": 1.0442994972142954e-05, "loss": 1.67, "step": 105485 }, { "epoch": 7.1674140508221225, "grad_norm": 3.1572790145874023, "learning_rate": 1.0438748471259682e-05, "loss": 1.7247, "step": 105490 }, { "epoch": 7.1677537708927845, "grad_norm": 3.649038553237915, "learning_rate": 1.043450197037641e-05, "loss": 1.9297, "step": 105495 }, { "epoch": 7.168093490963446, "grad_norm": 4.06614875793457, "learning_rate": 1.0430255469493138e-05, "loss": 2.0342, "step": 105500 }, { "epoch": 7.168433211034108, "grad_norm": 3.1796209812164307, "learning_rate": 1.0426008968609865e-05, "loss": 2.0378, "step": 105505 }, { "epoch": 7.16877293110477, "grad_norm": 4.077664852142334, "learning_rate": 1.0421762467726594e-05, "loss": 1.7177, "step": 105510 }, { "epoch": 7.169112651175431, "grad_norm": 4.947044372558594, "learning_rate": 1.0417515966843322e-05, "loss": 2.0985, "step": 105515 }, { "epoch": 7.169452371246093, "grad_norm": 4.359881401062012, "learning_rate": 1.0413269465960049e-05, "loss": 1.9644, "step": 105520 }, { "epoch": 7.169792091316755, "grad_norm": 3.1791014671325684, "learning_rate": 1.0409022965076777e-05, "loss": 2.2124, "step": 105525 }, { "epoch": 7.170131811387416, "grad_norm": 3.5391266345977783, "learning_rate": 1.0404776464193505e-05, "loss": 1.6993, "step": 105530 }, { "epoch": 7.1704715314580785, "grad_norm": 3.0178658962249756, "learning_rate": 1.0400529963310233e-05, "loss": 2.1589, "step": 105535 }, { "epoch": 7.1708112515287405, "grad_norm": 4.832036018371582, "learning_rate": 1.039628346242696e-05, "loss": 2.1599, "step": 105540 }, { "epoch": 7.171150971599402, "grad_norm": 4.929999828338623, "learning_rate": 1.0392036961543689e-05, "loss": 2.0924, "step": 105545 }, { "epoch": 7.171490691670064, "grad_norm": 3.9051785469055176, "learning_rate": 1.0387790460660417e-05, "loss": 1.7212, "step": 105550 }, { "epoch": 7.171830411740726, "grad_norm": 3.167954683303833, "learning_rate": 1.0383543959777143e-05, "loss": 1.9012, "step": 105555 }, { "epoch": 7.172170131811387, "grad_norm": 5.767357349395752, "learning_rate": 1.0379297458893871e-05, "loss": 2.0536, "step": 105560 }, { "epoch": 7.172509851882049, "grad_norm": 3.852367401123047, "learning_rate": 1.03750509580106e-05, "loss": 2.2574, "step": 105565 }, { "epoch": 7.172849571952711, "grad_norm": 3.2908787727355957, "learning_rate": 1.0370804457127327e-05, "loss": 1.9702, "step": 105570 }, { "epoch": 7.173189292023372, "grad_norm": 3.517427682876587, "learning_rate": 1.0366557956244055e-05, "loss": 1.7516, "step": 105575 }, { "epoch": 7.1735290120940345, "grad_norm": 3.719606399536133, "learning_rate": 1.0362311455360783e-05, "loss": 1.9563, "step": 105580 }, { "epoch": 7.173868732164697, "grad_norm": 4.516017913818359, "learning_rate": 1.0358064954477511e-05, "loss": 2.012, "step": 105585 }, { "epoch": 7.174208452235358, "grad_norm": 3.2441742420196533, "learning_rate": 1.0353818453594239e-05, "loss": 1.8791, "step": 105590 }, { "epoch": 7.17454817230602, "grad_norm": 3.314141035079956, "learning_rate": 1.0349571952710967e-05, "loss": 2.0687, "step": 105595 }, { "epoch": 7.174887892376682, "grad_norm": 3.5102522373199463, "learning_rate": 1.0345325451827695e-05, "loss": 1.9578, "step": 105600 }, { "epoch": 7.175227612447343, "grad_norm": 4.1309332847595215, "learning_rate": 1.0341078950944421e-05, "loss": 2.1755, "step": 105605 }, { "epoch": 7.175567332518005, "grad_norm": 4.486395835876465, "learning_rate": 1.033683245006115e-05, "loss": 2.0151, "step": 105610 }, { "epoch": 7.175907052588667, "grad_norm": 4.142874717712402, "learning_rate": 1.0332585949177877e-05, "loss": 1.939, "step": 105615 }, { "epoch": 7.176246772659328, "grad_norm": 3.7630269527435303, "learning_rate": 1.0328339448294607e-05, "loss": 2.0256, "step": 105620 }, { "epoch": 7.1765864927299905, "grad_norm": 3.5449976921081543, "learning_rate": 1.0324092947411333e-05, "loss": 1.6993, "step": 105625 }, { "epoch": 7.176926212800653, "grad_norm": 3.5512945652008057, "learning_rate": 1.0319846446528062e-05, "loss": 2.1085, "step": 105630 }, { "epoch": 7.177265932871314, "grad_norm": 4.018178939819336, "learning_rate": 1.031559994564479e-05, "loss": 1.9348, "step": 105635 }, { "epoch": 7.177605652941976, "grad_norm": 5.445681095123291, "learning_rate": 1.0311353444761516e-05, "loss": 2.0289, "step": 105640 }, { "epoch": 7.177945373012638, "grad_norm": 4.122445106506348, "learning_rate": 1.0307106943878246e-05, "loss": 2.0014, "step": 105645 }, { "epoch": 7.178285093083299, "grad_norm": 3.509993076324463, "learning_rate": 1.0302860442994974e-05, "loss": 2.3815, "step": 105650 }, { "epoch": 7.178624813153961, "grad_norm": 3.2771244049072266, "learning_rate": 1.02986139421117e-05, "loss": 2.0298, "step": 105655 }, { "epoch": 7.178964533224623, "grad_norm": 3.0474417209625244, "learning_rate": 1.0294367441228428e-05, "loss": 2.1374, "step": 105660 }, { "epoch": 7.179304253295284, "grad_norm": 4.991743087768555, "learning_rate": 1.0290120940345156e-05, "loss": 2.0512, "step": 105665 }, { "epoch": 7.1796439733659465, "grad_norm": 3.197258472442627, "learning_rate": 1.0285874439461884e-05, "loss": 1.7323, "step": 105670 }, { "epoch": 7.179983693436609, "grad_norm": 3.250223398208618, "learning_rate": 1.0281627938578612e-05, "loss": 2.1441, "step": 105675 }, { "epoch": 7.18032341350727, "grad_norm": 2.9250690937042236, "learning_rate": 1.027738143769534e-05, "loss": 2.0804, "step": 105680 }, { "epoch": 7.180663133577932, "grad_norm": 3.257389545440674, "learning_rate": 1.0273134936812068e-05, "loss": 2.0253, "step": 105685 }, { "epoch": 7.181002853648594, "grad_norm": 4.428191184997559, "learning_rate": 1.0268888435928794e-05, "loss": 2.1436, "step": 105690 }, { "epoch": 7.181342573719255, "grad_norm": 3.6163852214813232, "learning_rate": 1.0264641935045522e-05, "loss": 1.7426, "step": 105695 }, { "epoch": 7.181682293789917, "grad_norm": 3.204855442047119, "learning_rate": 1.026039543416225e-05, "loss": 2.1793, "step": 105700 }, { "epoch": 7.182022013860579, "grad_norm": 3.409998893737793, "learning_rate": 1.025614893327898e-05, "loss": 1.8044, "step": 105705 }, { "epoch": 7.18236173393124, "grad_norm": 3.4492106437683105, "learning_rate": 1.0251902432395706e-05, "loss": 2.1112, "step": 105710 }, { "epoch": 7.1827014540019025, "grad_norm": 4.25064754486084, "learning_rate": 1.0247655931512434e-05, "loss": 1.8989, "step": 105715 }, { "epoch": 7.183041174072565, "grad_norm": 4.6459641456604, "learning_rate": 1.0243409430629162e-05, "loss": 1.9646, "step": 105720 }, { "epoch": 7.183380894143226, "grad_norm": 3.4682278633117676, "learning_rate": 1.0239162929745889e-05, "loss": 1.8662, "step": 105725 }, { "epoch": 7.183720614213888, "grad_norm": 4.279282093048096, "learning_rate": 1.0234916428862618e-05, "loss": 1.8366, "step": 105730 }, { "epoch": 7.18406033428455, "grad_norm": 3.69950270652771, "learning_rate": 1.0230669927979346e-05, "loss": 1.9524, "step": 105735 }, { "epoch": 7.184400054355211, "grad_norm": 3.176254987716675, "learning_rate": 1.0226423427096073e-05, "loss": 1.9508, "step": 105740 }, { "epoch": 7.184739774425873, "grad_norm": 4.283926963806152, "learning_rate": 1.02221769262128e-05, "loss": 1.8309, "step": 105745 }, { "epoch": 7.185079494496535, "grad_norm": 3.9980709552764893, "learning_rate": 1.0217930425329529e-05, "loss": 1.8046, "step": 105750 }, { "epoch": 7.1854192145671965, "grad_norm": 3.7872745990753174, "learning_rate": 1.0213683924446257e-05, "loss": 2.0784, "step": 105755 }, { "epoch": 7.1857589346378585, "grad_norm": 3.1953697204589844, "learning_rate": 1.0209437423562985e-05, "loss": 1.8735, "step": 105760 }, { "epoch": 7.186098654708521, "grad_norm": 4.83408260345459, "learning_rate": 1.0205190922679713e-05, "loss": 1.9872, "step": 105765 }, { "epoch": 7.186438374779182, "grad_norm": 3.93121600151062, "learning_rate": 1.020094442179644e-05, "loss": 1.9054, "step": 105770 }, { "epoch": 7.186778094849844, "grad_norm": 3.370551586151123, "learning_rate": 1.0196697920913167e-05, "loss": 2.0176, "step": 105775 }, { "epoch": 7.187117814920505, "grad_norm": 4.1072282791137695, "learning_rate": 1.0192451420029895e-05, "loss": 2.0717, "step": 105780 }, { "epoch": 7.187457534991167, "grad_norm": 3.118412971496582, "learning_rate": 1.0188204919146625e-05, "loss": 2.0122, "step": 105785 }, { "epoch": 7.187797255061829, "grad_norm": 3.0222909450531006, "learning_rate": 1.0183958418263353e-05, "loss": 2.0328, "step": 105790 }, { "epoch": 7.18813697513249, "grad_norm": 4.5508527755737305, "learning_rate": 1.0179711917380079e-05, "loss": 2.0369, "step": 105795 }, { "epoch": 7.1884766952031525, "grad_norm": 3.952209711074829, "learning_rate": 1.0175465416496807e-05, "loss": 1.7967, "step": 105800 }, { "epoch": 7.1888164152738145, "grad_norm": 4.520982265472412, "learning_rate": 1.0171218915613535e-05, "loss": 1.9799, "step": 105805 }, { "epoch": 7.189156135344476, "grad_norm": 2.858635425567627, "learning_rate": 1.0166972414730263e-05, "loss": 1.8389, "step": 105810 }, { "epoch": 7.189495855415138, "grad_norm": 3.5392556190490723, "learning_rate": 1.0162725913846991e-05, "loss": 2.1913, "step": 105815 }, { "epoch": 7.1898355754858, "grad_norm": 3.549142360687256, "learning_rate": 1.0158479412963719e-05, "loss": 2.1241, "step": 105820 }, { "epoch": 7.190175295556461, "grad_norm": 3.422393798828125, "learning_rate": 1.0154232912080445e-05, "loss": 2.2776, "step": 105825 }, { "epoch": 7.190515015627123, "grad_norm": 4.557718753814697, "learning_rate": 1.0149986411197173e-05, "loss": 2.1797, "step": 105830 }, { "epoch": 7.190854735697785, "grad_norm": 5.502859115600586, "learning_rate": 1.0145739910313901e-05, "loss": 1.9667, "step": 105835 }, { "epoch": 7.191194455768446, "grad_norm": 4.1131062507629395, "learning_rate": 1.014149340943063e-05, "loss": 1.7945, "step": 105840 }, { "epoch": 7.1915341758391085, "grad_norm": 3.555389165878296, "learning_rate": 1.0137246908547357e-05, "loss": 1.7583, "step": 105845 }, { "epoch": 7.1918738959097706, "grad_norm": 3.585986614227295, "learning_rate": 1.0133000407664085e-05, "loss": 1.8881, "step": 105850 }, { "epoch": 7.192213615980432, "grad_norm": 4.122292518615723, "learning_rate": 1.0128753906780813e-05, "loss": 2.084, "step": 105855 }, { "epoch": 7.192553336051094, "grad_norm": 3.6909422874450684, "learning_rate": 1.012450740589754e-05, "loss": 1.915, "step": 105860 }, { "epoch": 7.192893056121756, "grad_norm": 3.251873254776001, "learning_rate": 1.0120260905014268e-05, "loss": 2.0747, "step": 105865 }, { "epoch": 7.193232776192417, "grad_norm": 3.175952196121216, "learning_rate": 1.0116014404130997e-05, "loss": 1.8673, "step": 105870 }, { "epoch": 7.193572496263079, "grad_norm": 3.8093972206115723, "learning_rate": 1.0111767903247725e-05, "loss": 1.6979, "step": 105875 }, { "epoch": 7.193912216333741, "grad_norm": 4.197394371032715, "learning_rate": 1.0107521402364452e-05, "loss": 1.8652, "step": 105880 }, { "epoch": 7.194251936404402, "grad_norm": 5.616336822509766, "learning_rate": 1.010327490148118e-05, "loss": 1.742, "step": 105885 }, { "epoch": 7.1945916564750645, "grad_norm": 4.426137447357178, "learning_rate": 1.0099028400597908e-05, "loss": 1.9637, "step": 105890 }, { "epoch": 7.194931376545727, "grad_norm": 3.858970880508423, "learning_rate": 1.0094781899714636e-05, "loss": 2.0874, "step": 105895 }, { "epoch": 7.195271096616388, "grad_norm": 3.544384479522705, "learning_rate": 1.0090535398831364e-05, "loss": 2.2154, "step": 105900 }, { "epoch": 7.19561081668705, "grad_norm": 3.6872658729553223, "learning_rate": 1.0086288897948092e-05, "loss": 1.8318, "step": 105905 }, { "epoch": 7.195950536757712, "grad_norm": 3.7604005336761475, "learning_rate": 1.0082042397064818e-05, "loss": 1.6952, "step": 105910 }, { "epoch": 7.196290256828373, "grad_norm": 3.3943755626678467, "learning_rate": 1.0077795896181546e-05, "loss": 2.1252, "step": 105915 }, { "epoch": 7.196629976899035, "grad_norm": 2.9699907302856445, "learning_rate": 1.0073549395298274e-05, "loss": 1.9697, "step": 105920 }, { "epoch": 7.196969696969697, "grad_norm": 3.2613160610198975, "learning_rate": 1.0069302894415004e-05, "loss": 2.0521, "step": 105925 }, { "epoch": 7.197309417040358, "grad_norm": 4.028547286987305, "learning_rate": 1.006505639353173e-05, "loss": 1.7986, "step": 105930 }, { "epoch": 7.1976491371110205, "grad_norm": 4.002432346343994, "learning_rate": 1.0060809892648458e-05, "loss": 2.017, "step": 105935 }, { "epoch": 7.197988857181683, "grad_norm": 3.4411160945892334, "learning_rate": 1.0056563391765186e-05, "loss": 2.2749, "step": 105940 }, { "epoch": 7.198328577252344, "grad_norm": 3.2418079376220703, "learning_rate": 1.0052316890881912e-05, "loss": 2.1043, "step": 105945 }, { "epoch": 7.198668297323006, "grad_norm": 3.5147171020507812, "learning_rate": 1.0048070389998642e-05, "loss": 2.1233, "step": 105950 }, { "epoch": 7.199008017393668, "grad_norm": 4.308211326599121, "learning_rate": 1.004382388911537e-05, "loss": 2.0349, "step": 105955 }, { "epoch": 7.199347737464329, "grad_norm": 4.49185848236084, "learning_rate": 1.0039577388232098e-05, "loss": 1.9772, "step": 105960 }, { "epoch": 7.199687457534991, "grad_norm": 3.8024351596832275, "learning_rate": 1.0035330887348824e-05, "loss": 1.6634, "step": 105965 }, { "epoch": 7.200027177605653, "grad_norm": 4.0765814781188965, "learning_rate": 1.0031084386465552e-05, "loss": 1.9962, "step": 105970 }, { "epoch": 7.200366897676314, "grad_norm": 3.1225404739379883, "learning_rate": 1.002683788558228e-05, "loss": 1.8842, "step": 105975 }, { "epoch": 7.2007066177469765, "grad_norm": 4.8544769287109375, "learning_rate": 1.0022591384699008e-05, "loss": 2.0618, "step": 105980 }, { "epoch": 7.201046337817639, "grad_norm": 3.64694881439209, "learning_rate": 1.0018344883815737e-05, "loss": 1.9147, "step": 105985 }, { "epoch": 7.2013860578883, "grad_norm": 4.015786647796631, "learning_rate": 1.0014098382932465e-05, "loss": 1.9972, "step": 105990 }, { "epoch": 7.201725777958962, "grad_norm": 3.0495877265930176, "learning_rate": 1.000985188204919e-05, "loss": 1.9805, "step": 105995 }, { "epoch": 7.202065498029624, "grad_norm": 3.15932297706604, "learning_rate": 1.0005605381165919e-05, "loss": 1.7839, "step": 106000 }, { "epoch": 7.202405218100285, "grad_norm": 3.5358636379241943, "learning_rate": 1.0001358880282649e-05, "loss": 2.0314, "step": 106005 }, { "epoch": 7.202744938170947, "grad_norm": 3.594853162765503, "learning_rate": 9.997112379399377e-06, "loss": 1.825, "step": 106010 }, { "epoch": 7.203084658241609, "grad_norm": 3.792742967605591, "learning_rate": 9.992865878516103e-06, "loss": 2.0576, "step": 106015 }, { "epoch": 7.2034243783122704, "grad_norm": 3.887115716934204, "learning_rate": 9.988619377632831e-06, "loss": 2.1108, "step": 106020 }, { "epoch": 7.2037640983829325, "grad_norm": 3.557368516921997, "learning_rate": 9.984372876749559e-06, "loss": 1.8953, "step": 106025 }, { "epoch": 7.204103818453595, "grad_norm": 3.246875524520874, "learning_rate": 9.980126375866287e-06, "loss": 2.1738, "step": 106030 }, { "epoch": 7.204443538524256, "grad_norm": 4.494555950164795, "learning_rate": 9.975879874983015e-06, "loss": 1.9376, "step": 106035 }, { "epoch": 7.204783258594918, "grad_norm": 3.536611795425415, "learning_rate": 9.971633374099743e-06, "loss": 1.8018, "step": 106040 }, { "epoch": 7.20512297866558, "grad_norm": 3.951327085494995, "learning_rate": 9.967386873216471e-06, "loss": 2.0615, "step": 106045 }, { "epoch": 7.205462698736241, "grad_norm": 4.845746040344238, "learning_rate": 9.963140372333197e-06, "loss": 2.2145, "step": 106050 }, { "epoch": 7.205802418806903, "grad_norm": 3.4514684677124023, "learning_rate": 9.958893871449925e-06, "loss": 1.9206, "step": 106055 }, { "epoch": 7.206142138877565, "grad_norm": 3.454117774963379, "learning_rate": 9.954647370566653e-06, "loss": 2.3235, "step": 106060 }, { "epoch": 7.2064818589482265, "grad_norm": 3.755232810974121, "learning_rate": 9.950400869683381e-06, "loss": 2.001, "step": 106065 }, { "epoch": 7.2068215790188885, "grad_norm": 3.566728115081787, "learning_rate": 9.94615436880011e-06, "loss": 2.2374, "step": 106070 }, { "epoch": 7.207161299089551, "grad_norm": 3.0479519367218018, "learning_rate": 9.941907867916837e-06, "loss": 2.0221, "step": 106075 }, { "epoch": 7.207501019160212, "grad_norm": 3.207197666168213, "learning_rate": 9.937661367033564e-06, "loss": 1.9449, "step": 106080 }, { "epoch": 7.207840739230874, "grad_norm": 4.230944633483887, "learning_rate": 9.933414866150292e-06, "loss": 2.0496, "step": 106085 }, { "epoch": 7.208180459301536, "grad_norm": 3.113834857940674, "learning_rate": 9.929168365267021e-06, "loss": 1.8415, "step": 106090 }, { "epoch": 7.208520179372197, "grad_norm": 4.441165447235107, "learning_rate": 9.92492186438375e-06, "loss": 1.868, "step": 106095 }, { "epoch": 7.208859899442859, "grad_norm": 3.315531015396118, "learning_rate": 9.920675363500476e-06, "loss": 2.2135, "step": 106100 }, { "epoch": 7.209199619513521, "grad_norm": 3.785672426223755, "learning_rate": 9.916428862617204e-06, "loss": 2.1317, "step": 106105 }, { "epoch": 7.2095393395841825, "grad_norm": 3.004687786102295, "learning_rate": 9.912182361733932e-06, "loss": 2.1385, "step": 106110 }, { "epoch": 7.2098790596548445, "grad_norm": 3.303349733352661, "learning_rate": 9.90793586085066e-06, "loss": 1.8381, "step": 106115 }, { "epoch": 7.210218779725507, "grad_norm": 4.0957818031311035, "learning_rate": 9.903689359967388e-06, "loss": 1.943, "step": 106120 }, { "epoch": 7.210558499796168, "grad_norm": 4.108883380889893, "learning_rate": 9.899442859084116e-06, "loss": 2.111, "step": 106125 }, { "epoch": 7.21089821986683, "grad_norm": 3.597987174987793, "learning_rate": 9.895196358200844e-06, "loss": 1.9901, "step": 106130 }, { "epoch": 7.211237939937492, "grad_norm": 4.677709102630615, "learning_rate": 9.89094985731757e-06, "loss": 1.8782, "step": 106135 }, { "epoch": 7.211577660008153, "grad_norm": 4.233819961547852, "learning_rate": 9.886703356434298e-06, "loss": 1.8251, "step": 106140 }, { "epoch": 7.211917380078815, "grad_norm": 3.864579916000366, "learning_rate": 9.882456855551028e-06, "loss": 1.9756, "step": 106145 }, { "epoch": 7.212257100149476, "grad_norm": 3.0237510204315186, "learning_rate": 9.878210354667754e-06, "loss": 2.1709, "step": 106150 }, { "epoch": 7.2125968202201385, "grad_norm": 3.141998767852783, "learning_rate": 9.873963853784482e-06, "loss": 2.0189, "step": 106155 }, { "epoch": 7.212936540290801, "grad_norm": 3.5568113327026367, "learning_rate": 9.86971735290121e-06, "loss": 1.8613, "step": 106160 }, { "epoch": 7.213276260361462, "grad_norm": 3.7304418087005615, "learning_rate": 9.865470852017936e-06, "loss": 1.9901, "step": 106165 }, { "epoch": 7.213615980432124, "grad_norm": 3.190026044845581, "learning_rate": 9.861224351134666e-06, "loss": 2.1755, "step": 106170 }, { "epoch": 7.213955700502786, "grad_norm": 3.255366563796997, "learning_rate": 9.856977850251394e-06, "loss": 1.8082, "step": 106175 }, { "epoch": 7.214295420573447, "grad_norm": 4.1185688972473145, "learning_rate": 9.852731349368122e-06, "loss": 1.826, "step": 106180 }, { "epoch": 7.214635140644109, "grad_norm": 4.114963531494141, "learning_rate": 9.848484848484848e-06, "loss": 2.0234, "step": 106185 }, { "epoch": 7.214974860714771, "grad_norm": 3.3765532970428467, "learning_rate": 9.844238347601576e-06, "loss": 1.6389, "step": 106190 }, { "epoch": 7.215314580785432, "grad_norm": 3.1910719871520996, "learning_rate": 9.839991846718304e-06, "loss": 1.9948, "step": 106195 }, { "epoch": 7.2156543008560945, "grad_norm": 3.1341583728790283, "learning_rate": 9.835745345835032e-06, "loss": 1.762, "step": 106200 }, { "epoch": 7.215994020926757, "grad_norm": 3.2678279876708984, "learning_rate": 9.83149884495176e-06, "loss": 2.1481, "step": 106205 }, { "epoch": 7.216333740997418, "grad_norm": 4.002128601074219, "learning_rate": 9.827252344068488e-06, "loss": 1.9257, "step": 106210 }, { "epoch": 7.21667346106808, "grad_norm": 3.0558371543884277, "learning_rate": 9.823005843185216e-06, "loss": 1.923, "step": 106215 }, { "epoch": 7.217013181138742, "grad_norm": 4.542156219482422, "learning_rate": 9.818759342301943e-06, "loss": 1.9236, "step": 106220 }, { "epoch": 7.217352901209403, "grad_norm": 3.8370213508605957, "learning_rate": 9.814512841418672e-06, "loss": 1.891, "step": 106225 }, { "epoch": 7.217692621280065, "grad_norm": 4.906097412109375, "learning_rate": 9.8102663405354e-06, "loss": 2.2611, "step": 106230 }, { "epoch": 7.218032341350727, "grad_norm": 4.163980960845947, "learning_rate": 9.806019839652127e-06, "loss": 1.9825, "step": 106235 }, { "epoch": 7.218372061421388, "grad_norm": 3.903149366378784, "learning_rate": 9.801773338768855e-06, "loss": 1.9173, "step": 106240 }, { "epoch": 7.2187117814920505, "grad_norm": 3.39913010597229, "learning_rate": 9.797526837885583e-06, "loss": 1.6525, "step": 106245 }, { "epoch": 7.219051501562713, "grad_norm": 4.237577438354492, "learning_rate": 9.79328033700231e-06, "loss": 1.8635, "step": 106250 }, { "epoch": 7.219391221633374, "grad_norm": 4.277772426605225, "learning_rate": 9.789033836119039e-06, "loss": 2.0434, "step": 106255 }, { "epoch": 7.219730941704036, "grad_norm": 3.450953483581543, "learning_rate": 9.784787335235767e-06, "loss": 2.0574, "step": 106260 }, { "epoch": 7.220070661774698, "grad_norm": 3.020045757293701, "learning_rate": 9.780540834352495e-06, "loss": 2.0472, "step": 106265 }, { "epoch": 7.220410381845359, "grad_norm": 3.5026657581329346, "learning_rate": 9.776294333469221e-06, "loss": 1.9049, "step": 106270 }, { "epoch": 7.220750101916021, "grad_norm": 4.244466304779053, "learning_rate": 9.772047832585949e-06, "loss": 2.1097, "step": 106275 }, { "epoch": 7.221089821986683, "grad_norm": 4.369641304016113, "learning_rate": 9.767801331702677e-06, "loss": 2.2252, "step": 106280 }, { "epoch": 7.221429542057344, "grad_norm": 4.714854717254639, "learning_rate": 9.763554830819405e-06, "loss": 2.1392, "step": 106285 }, { "epoch": 7.2217692621280065, "grad_norm": 5.187901496887207, "learning_rate": 9.759308329936133e-06, "loss": 1.9341, "step": 106290 }, { "epoch": 7.222108982198669, "grad_norm": 3.799757242202759, "learning_rate": 9.755061829052861e-06, "loss": 2.182, "step": 106295 }, { "epoch": 7.22244870226933, "grad_norm": 3.8236563205718994, "learning_rate": 9.750815328169589e-06, "loss": 1.8687, "step": 106300 }, { "epoch": 7.222788422339992, "grad_norm": 3.7990026473999023, "learning_rate": 9.746568827286315e-06, "loss": 1.9399, "step": 106305 }, { "epoch": 7.223128142410654, "grad_norm": 3.768888473510742, "learning_rate": 9.742322326403045e-06, "loss": 2.1243, "step": 106310 }, { "epoch": 7.223467862481315, "grad_norm": 3.7914161682128906, "learning_rate": 9.738075825519773e-06, "loss": 1.9568, "step": 106315 }, { "epoch": 7.223807582551977, "grad_norm": 4.321866512298584, "learning_rate": 9.7338293246365e-06, "loss": 2.0766, "step": 106320 }, { "epoch": 7.224147302622639, "grad_norm": 3.898775100708008, "learning_rate": 9.729582823753227e-06, "loss": 2.113, "step": 106325 }, { "epoch": 7.2244870226933005, "grad_norm": 3.6160356998443604, "learning_rate": 9.725336322869955e-06, "loss": 1.7046, "step": 106330 }, { "epoch": 7.2248267427639625, "grad_norm": 3.0728020668029785, "learning_rate": 9.721089821986683e-06, "loss": 1.8719, "step": 106335 }, { "epoch": 7.225166462834625, "grad_norm": 3.580984592437744, "learning_rate": 9.716843321103411e-06, "loss": 2.1701, "step": 106340 }, { "epoch": 7.225506182905286, "grad_norm": 4.0574493408203125, "learning_rate": 9.71259682022014e-06, "loss": 2.2535, "step": 106345 }, { "epoch": 7.225845902975948, "grad_norm": 2.8477694988250732, "learning_rate": 9.708350319336868e-06, "loss": 1.9063, "step": 106350 }, { "epoch": 7.22618562304661, "grad_norm": 4.177106857299805, "learning_rate": 9.704103818453594e-06, "loss": 1.8713, "step": 106355 }, { "epoch": 7.226525343117271, "grad_norm": 3.792525291442871, "learning_rate": 9.699857317570322e-06, "loss": 1.9022, "step": 106360 }, { "epoch": 7.226865063187933, "grad_norm": 3.7889492511749268, "learning_rate": 9.695610816687052e-06, "loss": 1.8605, "step": 106365 }, { "epoch": 7.227204783258595, "grad_norm": 3.769456148147583, "learning_rate": 9.691364315803778e-06, "loss": 2.3282, "step": 106370 }, { "epoch": 7.2275445033292565, "grad_norm": 2.9450314044952393, "learning_rate": 9.687117814920506e-06, "loss": 1.9539, "step": 106375 }, { "epoch": 7.2278842233999185, "grad_norm": 4.774852275848389, "learning_rate": 9.682871314037234e-06, "loss": 2.2923, "step": 106380 }, { "epoch": 7.228223943470581, "grad_norm": 4.136046409606934, "learning_rate": 9.678624813153962e-06, "loss": 1.6389, "step": 106385 }, { "epoch": 7.228563663541242, "grad_norm": 3.7763888835906982, "learning_rate": 9.67437831227069e-06, "loss": 2.008, "step": 106390 }, { "epoch": 7.228903383611904, "grad_norm": 4.694613456726074, "learning_rate": 9.670131811387418e-06, "loss": 1.9395, "step": 106395 }, { "epoch": 7.229243103682566, "grad_norm": 2.994305372238159, "learning_rate": 9.665885310504146e-06, "loss": 2.0717, "step": 106400 }, { "epoch": 7.229582823753227, "grad_norm": 4.284978866577148, "learning_rate": 9.661638809620872e-06, "loss": 1.9166, "step": 106405 }, { "epoch": 7.229922543823889, "grad_norm": 4.225351333618164, "learning_rate": 9.6573923087376e-06, "loss": 2.0269, "step": 106410 }, { "epoch": 7.230262263894551, "grad_norm": 3.16214919090271, "learning_rate": 9.653145807854328e-06, "loss": 1.9199, "step": 106415 }, { "epoch": 7.2306019839652125, "grad_norm": 4.64400577545166, "learning_rate": 9.648899306971056e-06, "loss": 2.0134, "step": 106420 }, { "epoch": 7.2309417040358746, "grad_norm": 3.6566860675811768, "learning_rate": 9.644652806087784e-06, "loss": 1.8188, "step": 106425 }, { "epoch": 7.231281424106537, "grad_norm": 4.455968856811523, "learning_rate": 9.640406305204512e-06, "loss": 2.1287, "step": 106430 }, { "epoch": 7.231621144177198, "grad_norm": 3.083146333694458, "learning_rate": 9.63615980432124e-06, "loss": 2.0058, "step": 106435 }, { "epoch": 7.23196086424786, "grad_norm": 3.9006099700927734, "learning_rate": 9.631913303437967e-06, "loss": 2.1887, "step": 106440 }, { "epoch": 7.232300584318522, "grad_norm": 3.1396870613098145, "learning_rate": 9.627666802554695e-06, "loss": 1.99, "step": 106445 }, { "epoch": 7.232640304389183, "grad_norm": 3.680804967880249, "learning_rate": 9.623420301671424e-06, "loss": 2.0959, "step": 106450 }, { "epoch": 7.232980024459845, "grad_norm": 3.8279972076416016, "learning_rate": 9.61917380078815e-06, "loss": 1.7943, "step": 106455 }, { "epoch": 7.233319744530506, "grad_norm": 3.973297595977783, "learning_rate": 9.614927299904879e-06, "loss": 1.7854, "step": 106460 }, { "epoch": 7.2336594646011685, "grad_norm": 3.691580057144165, "learning_rate": 9.610680799021607e-06, "loss": 2.0664, "step": 106465 }, { "epoch": 7.233999184671831, "grad_norm": 4.801246643066406, "learning_rate": 9.606434298138335e-06, "loss": 1.7305, "step": 106470 }, { "epoch": 7.234338904742492, "grad_norm": 3.4232916831970215, "learning_rate": 9.602187797255063e-06, "loss": 1.8954, "step": 106475 }, { "epoch": 7.234678624813154, "grad_norm": 3.095247983932495, "learning_rate": 9.59794129637179e-06, "loss": 1.9765, "step": 106480 }, { "epoch": 7.235018344883816, "grad_norm": 4.124382972717285, "learning_rate": 9.593694795488519e-06, "loss": 2.0789, "step": 106485 }, { "epoch": 7.235358064954477, "grad_norm": 4.187219619750977, "learning_rate": 9.589448294605245e-06, "loss": 1.6498, "step": 106490 }, { "epoch": 7.235697785025139, "grad_norm": 4.569436550140381, "learning_rate": 9.585201793721973e-06, "loss": 2.17, "step": 106495 }, { "epoch": 7.236037505095801, "grad_norm": 3.385483503341675, "learning_rate": 9.580955292838701e-06, "loss": 1.8742, "step": 106500 }, { "epoch": 7.236377225166462, "grad_norm": 4.538552761077881, "learning_rate": 9.576708791955429e-06, "loss": 1.8428, "step": 106505 }, { "epoch": 7.2367169452371245, "grad_norm": 3.7248475551605225, "learning_rate": 9.572462291072157e-06, "loss": 2.0041, "step": 106510 }, { "epoch": 7.237056665307787, "grad_norm": 4.5583953857421875, "learning_rate": 9.568215790188885e-06, "loss": 2.0197, "step": 106515 }, { "epoch": 7.237396385378448, "grad_norm": 3.9772422313690186, "learning_rate": 9.563969289305613e-06, "loss": 2.0837, "step": 106520 }, { "epoch": 7.23773610544911, "grad_norm": 2.930128335952759, "learning_rate": 9.55972278842234e-06, "loss": 1.8921, "step": 106525 }, { "epoch": 7.238075825519772, "grad_norm": 3.364569664001465, "learning_rate": 9.555476287539069e-06, "loss": 1.8899, "step": 106530 }, { "epoch": 7.238415545590433, "grad_norm": 3.474562644958496, "learning_rate": 9.551229786655797e-06, "loss": 1.976, "step": 106535 }, { "epoch": 7.238755265661095, "grad_norm": 3.982701539993286, "learning_rate": 9.546983285772523e-06, "loss": 2.2208, "step": 106540 }, { "epoch": 7.239094985731757, "grad_norm": 4.851825714111328, "learning_rate": 9.542736784889251e-06, "loss": 2.1671, "step": 106545 }, { "epoch": 7.239434705802418, "grad_norm": 3.848696708679199, "learning_rate": 9.53849028400598e-06, "loss": 1.9571, "step": 106550 }, { "epoch": 7.2397744258730805, "grad_norm": 3.7523884773254395, "learning_rate": 9.534243783122707e-06, "loss": 1.8923, "step": 106555 }, { "epoch": 7.240114145943743, "grad_norm": 4.34098482131958, "learning_rate": 9.529997282239435e-06, "loss": 2.1008, "step": 106560 }, { "epoch": 7.240453866014404, "grad_norm": 4.2428789138793945, "learning_rate": 9.525750781356163e-06, "loss": 2.0561, "step": 106565 }, { "epoch": 7.240793586085066, "grad_norm": 4.179928779602051, "learning_rate": 9.521504280472891e-06, "loss": 2.2849, "step": 106570 }, { "epoch": 7.241133306155728, "grad_norm": 3.9871068000793457, "learning_rate": 9.517257779589618e-06, "loss": 2.0986, "step": 106575 }, { "epoch": 7.241473026226389, "grad_norm": 3.6188254356384277, "learning_rate": 9.513011278706346e-06, "loss": 1.7812, "step": 106580 }, { "epoch": 7.241812746297051, "grad_norm": 4.503419399261475, "learning_rate": 9.508764777823075e-06, "loss": 2.0944, "step": 106585 }, { "epoch": 7.242152466367713, "grad_norm": 2.883890390396118, "learning_rate": 9.504518276939802e-06, "loss": 2.1307, "step": 106590 }, { "epoch": 7.2424921864383744, "grad_norm": 4.313569068908691, "learning_rate": 9.50027177605653e-06, "loss": 1.7441, "step": 106595 }, { "epoch": 7.2428319065090365, "grad_norm": 3.9062936305999756, "learning_rate": 9.496025275173258e-06, "loss": 2.0602, "step": 106600 }, { "epoch": 7.243171626579699, "grad_norm": 4.4143571853637695, "learning_rate": 9.491778774289986e-06, "loss": 1.935, "step": 106605 }, { "epoch": 7.24351134665036, "grad_norm": 3.2643280029296875, "learning_rate": 9.487532273406714e-06, "loss": 1.9582, "step": 106610 }, { "epoch": 7.243851066721022, "grad_norm": 5.079151630401611, "learning_rate": 9.483285772523442e-06, "loss": 1.9656, "step": 106615 }, { "epoch": 7.244190786791684, "grad_norm": 3.664245128631592, "learning_rate": 9.47903927164017e-06, "loss": 1.5555, "step": 106620 }, { "epoch": 7.244530506862345, "grad_norm": 3.076961040496826, "learning_rate": 9.474792770756896e-06, "loss": 1.9064, "step": 106625 }, { "epoch": 7.244870226933007, "grad_norm": 4.3253350257873535, "learning_rate": 9.470546269873624e-06, "loss": 2.0204, "step": 106630 }, { "epoch": 7.245209947003669, "grad_norm": 3.3253934383392334, "learning_rate": 9.466299768990352e-06, "loss": 2.21, "step": 106635 }, { "epoch": 7.2455496670743305, "grad_norm": 3.0988173484802246, "learning_rate": 9.46205326810708e-06, "loss": 1.8906, "step": 106640 }, { "epoch": 7.2458893871449925, "grad_norm": 3.7701449394226074, "learning_rate": 9.457806767223808e-06, "loss": 2.0076, "step": 106645 }, { "epoch": 7.246229107215655, "grad_norm": 3.112396240234375, "learning_rate": 9.453560266340536e-06, "loss": 2.2398, "step": 106650 }, { "epoch": 7.246568827286316, "grad_norm": 3.9724085330963135, "learning_rate": 9.449313765457264e-06, "loss": 1.7015, "step": 106655 }, { "epoch": 7.246908547356978, "grad_norm": 3.1257729530334473, "learning_rate": 9.44506726457399e-06, "loss": 1.8937, "step": 106660 }, { "epoch": 7.24724826742764, "grad_norm": 3.6258792877197266, "learning_rate": 9.440820763690718e-06, "loss": 2.1647, "step": 106665 }, { "epoch": 7.247587987498301, "grad_norm": 3.6450815200805664, "learning_rate": 9.436574262807448e-06, "loss": 1.6709, "step": 106670 }, { "epoch": 7.247927707568963, "grad_norm": 3.5209145545959473, "learning_rate": 9.432327761924174e-06, "loss": 1.8948, "step": 106675 }, { "epoch": 7.248267427639625, "grad_norm": 3.8162293434143066, "learning_rate": 9.428081261040902e-06, "loss": 2.0199, "step": 106680 }, { "epoch": 7.2486071477102865, "grad_norm": 3.9215362071990967, "learning_rate": 9.42383476015763e-06, "loss": 2.0473, "step": 106685 }, { "epoch": 7.2489468677809485, "grad_norm": 4.109232425689697, "learning_rate": 9.419588259274358e-06, "loss": 1.8026, "step": 106690 }, { "epoch": 7.249286587851611, "grad_norm": 3.336907148361206, "learning_rate": 9.415341758391086e-06, "loss": 1.927, "step": 106695 }, { "epoch": 7.249626307922272, "grad_norm": 3.4183945655822754, "learning_rate": 9.411095257507815e-06, "loss": 1.943, "step": 106700 }, { "epoch": 7.249966027992934, "grad_norm": 3.3368477821350098, "learning_rate": 9.406848756624543e-06, "loss": 1.8576, "step": 106705 }, { "epoch": 7.250305748063596, "grad_norm": 3.1182641983032227, "learning_rate": 9.402602255741269e-06, "loss": 1.7966, "step": 106710 }, { "epoch": 7.250645468134257, "grad_norm": 3.1318788528442383, "learning_rate": 9.398355754857997e-06, "loss": 1.7834, "step": 106715 }, { "epoch": 7.250985188204919, "grad_norm": 4.014058589935303, "learning_rate": 9.394109253974725e-06, "loss": 1.963, "step": 106720 }, { "epoch": 7.251324908275581, "grad_norm": 3.95778751373291, "learning_rate": 9.389862753091455e-06, "loss": 2.0215, "step": 106725 }, { "epoch": 7.2516646283462425, "grad_norm": 4.164260387420654, "learning_rate": 9.385616252208181e-06, "loss": 1.9286, "step": 106730 }, { "epoch": 7.2520043484169046, "grad_norm": 4.698274612426758, "learning_rate": 9.381369751324909e-06, "loss": 1.7617, "step": 106735 }, { "epoch": 7.252344068487567, "grad_norm": 3.7610504627227783, "learning_rate": 9.377123250441637e-06, "loss": 2.1183, "step": 106740 }, { "epoch": 7.252683788558228, "grad_norm": 3.773923873901367, "learning_rate": 9.372876749558363e-06, "loss": 1.8872, "step": 106745 }, { "epoch": 7.25302350862889, "grad_norm": 4.111847400665283, "learning_rate": 9.368630248675093e-06, "loss": 2.011, "step": 106750 }, { "epoch": 7.253363228699552, "grad_norm": 4.005049705505371, "learning_rate": 9.364383747791821e-06, "loss": 1.4894, "step": 106755 }, { "epoch": 7.253702948770213, "grad_norm": 3.412616014480591, "learning_rate": 9.360137246908547e-06, "loss": 2.0361, "step": 106760 }, { "epoch": 7.254042668840875, "grad_norm": 5.24266242980957, "learning_rate": 9.355890746025275e-06, "loss": 1.7208, "step": 106765 }, { "epoch": 7.254382388911537, "grad_norm": 4.018343448638916, "learning_rate": 9.351644245142003e-06, "loss": 1.5528, "step": 106770 }, { "epoch": 7.2547221089821985, "grad_norm": 4.748441696166992, "learning_rate": 9.347397744258731e-06, "loss": 2.0729, "step": 106775 }, { "epoch": 7.255061829052861, "grad_norm": 3.8481147289276123, "learning_rate": 9.34315124337546e-06, "loss": 1.9449, "step": 106780 }, { "epoch": 7.255401549123523, "grad_norm": 3.4491755962371826, "learning_rate": 9.338904742492187e-06, "loss": 1.7504, "step": 106785 }, { "epoch": 7.255741269194184, "grad_norm": 3.5858957767486572, "learning_rate": 9.334658241608915e-06, "loss": 1.9921, "step": 106790 }, { "epoch": 7.256080989264846, "grad_norm": 3.535592555999756, "learning_rate": 9.330411740725642e-06, "loss": 1.924, "step": 106795 }, { "epoch": 7.256420709335508, "grad_norm": 3.123685121536255, "learning_rate": 9.32616523984237e-06, "loss": 2.0071, "step": 106800 }, { "epoch": 7.256760429406169, "grad_norm": 3.840747594833374, "learning_rate": 9.321918738959098e-06, "loss": 1.8032, "step": 106805 }, { "epoch": 7.257100149476831, "grad_norm": 5.080253601074219, "learning_rate": 9.317672238075827e-06, "loss": 1.9117, "step": 106810 }, { "epoch": 7.257439869547493, "grad_norm": 5.218811988830566, "learning_rate": 9.313425737192554e-06, "loss": 2.1429, "step": 106815 }, { "epoch": 7.2577795896181545, "grad_norm": 3.799513816833496, "learning_rate": 9.309179236309282e-06, "loss": 2.093, "step": 106820 }, { "epoch": 7.258119309688817, "grad_norm": 4.375325679779053, "learning_rate": 9.30493273542601e-06, "loss": 1.9907, "step": 106825 }, { "epoch": 7.258459029759479, "grad_norm": 3.1522018909454346, "learning_rate": 9.300686234542736e-06, "loss": 2.0226, "step": 106830 }, { "epoch": 7.25879874983014, "grad_norm": 4.10911226272583, "learning_rate": 9.296439733659466e-06, "loss": 2.1955, "step": 106835 }, { "epoch": 7.259138469900802, "grad_norm": 4.3593010902404785, "learning_rate": 9.292193232776194e-06, "loss": 1.9058, "step": 106840 }, { "epoch": 7.259478189971463, "grad_norm": 3.899686336517334, "learning_rate": 9.28794673189292e-06, "loss": 2.1623, "step": 106845 }, { "epoch": 7.259817910042125, "grad_norm": 2.778902292251587, "learning_rate": 9.283700231009648e-06, "loss": 1.8558, "step": 106850 }, { "epoch": 7.260157630112787, "grad_norm": 3.607208013534546, "learning_rate": 9.279453730126376e-06, "loss": 1.9717, "step": 106855 }, { "epoch": 7.260497350183448, "grad_norm": 3.9845662117004395, "learning_rate": 9.275207229243104e-06, "loss": 1.8791, "step": 106860 }, { "epoch": 7.2608370702541105, "grad_norm": 4.306136608123779, "learning_rate": 9.270960728359832e-06, "loss": 1.6442, "step": 106865 }, { "epoch": 7.261176790324773, "grad_norm": 3.5880141258239746, "learning_rate": 9.26671422747656e-06, "loss": 2.1632, "step": 106870 }, { "epoch": 7.261516510395434, "grad_norm": 2.5675270557403564, "learning_rate": 9.262467726593288e-06, "loss": 1.9185, "step": 106875 }, { "epoch": 7.261856230466096, "grad_norm": 3.743748903274536, "learning_rate": 9.258221225710014e-06, "loss": 2.149, "step": 106880 }, { "epoch": 7.262195950536758, "grad_norm": 3.562494993209839, "learning_rate": 9.253974724826742e-06, "loss": 2.4435, "step": 106885 }, { "epoch": 7.262535670607419, "grad_norm": 4.185191631317139, "learning_rate": 9.249728223943472e-06, "loss": 1.7471, "step": 106890 }, { "epoch": 7.262875390678081, "grad_norm": 3.8251590728759766, "learning_rate": 9.2454817230602e-06, "loss": 1.8487, "step": 106895 }, { "epoch": 7.263215110748743, "grad_norm": 3.887763500213623, "learning_rate": 9.241235222176926e-06, "loss": 1.8526, "step": 106900 }, { "epoch": 7.2635548308194045, "grad_norm": 4.251080513000488, "learning_rate": 9.236988721293654e-06, "loss": 1.8623, "step": 106905 }, { "epoch": 7.2638945508900665, "grad_norm": 4.0295562744140625, "learning_rate": 9.232742220410382e-06, "loss": 2.0502, "step": 106910 }, { "epoch": 7.264234270960729, "grad_norm": 3.342503786087036, "learning_rate": 9.22849571952711e-06, "loss": 1.8799, "step": 106915 }, { "epoch": 7.26457399103139, "grad_norm": 4.45082426071167, "learning_rate": 9.224249218643838e-06, "loss": 2.0644, "step": 106920 }, { "epoch": 7.264913711102052, "grad_norm": 3.976391077041626, "learning_rate": 9.220002717760566e-06, "loss": 2.0112, "step": 106925 }, { "epoch": 7.265253431172714, "grad_norm": 3.600985288619995, "learning_rate": 9.215756216877293e-06, "loss": 1.8675, "step": 106930 }, { "epoch": 7.265593151243375, "grad_norm": 4.092282772064209, "learning_rate": 9.21150971599402e-06, "loss": 1.9427, "step": 106935 }, { "epoch": 7.265932871314037, "grad_norm": 2.5831925868988037, "learning_rate": 9.207263215110749e-06, "loss": 1.9677, "step": 106940 }, { "epoch": 7.266272591384699, "grad_norm": 3.519573211669922, "learning_rate": 9.203016714227478e-06, "loss": 2.0316, "step": 106945 }, { "epoch": 7.2666123114553605, "grad_norm": 4.272311687469482, "learning_rate": 9.198770213344205e-06, "loss": 2.172, "step": 106950 }, { "epoch": 7.2669520315260225, "grad_norm": 3.5484836101531982, "learning_rate": 9.194523712460933e-06, "loss": 2.0314, "step": 106955 }, { "epoch": 7.267291751596685, "grad_norm": 3.5892279148101807, "learning_rate": 9.19027721157766e-06, "loss": 1.9471, "step": 106960 }, { "epoch": 7.267631471667346, "grad_norm": 5.045549392700195, "learning_rate": 9.186030710694387e-06, "loss": 1.9341, "step": 106965 }, { "epoch": 7.267971191738008, "grad_norm": 3.969175100326538, "learning_rate": 9.181784209811117e-06, "loss": 1.939, "step": 106970 }, { "epoch": 7.26831091180867, "grad_norm": 4.0371270179748535, "learning_rate": 9.177537708927845e-06, "loss": 2.1294, "step": 106975 }, { "epoch": 7.268650631879331, "grad_norm": 3.991313934326172, "learning_rate": 9.173291208044573e-06, "loss": 2.1029, "step": 106980 }, { "epoch": 7.268990351949993, "grad_norm": 4.388463497161865, "learning_rate": 9.169044707161299e-06, "loss": 1.896, "step": 106985 }, { "epoch": 7.269330072020655, "grad_norm": 4.1466498374938965, "learning_rate": 9.164798206278027e-06, "loss": 2.1478, "step": 106990 }, { "epoch": 7.2696697920913165, "grad_norm": 4.224837779998779, "learning_rate": 9.160551705394755e-06, "loss": 1.9784, "step": 106995 }, { "epoch": 7.2700095121619785, "grad_norm": 4.058828830718994, "learning_rate": 9.156305204511483e-06, "loss": 1.904, "step": 107000 }, { "epoch": 7.270349232232641, "grad_norm": 3.3684821128845215, "learning_rate": 9.152058703628211e-06, "loss": 2.0348, "step": 107005 }, { "epoch": 7.270688952303302, "grad_norm": 3.5371787548065186, "learning_rate": 9.147812202744939e-06, "loss": 1.921, "step": 107010 }, { "epoch": 7.271028672373964, "grad_norm": 3.3613712787628174, "learning_rate": 9.143565701861665e-06, "loss": 2.1979, "step": 107015 }, { "epoch": 7.271368392444626, "grad_norm": 4.190822124481201, "learning_rate": 9.139319200978393e-06, "loss": 1.7528, "step": 107020 }, { "epoch": 7.271708112515287, "grad_norm": 4.749336242675781, "learning_rate": 9.135072700095121e-06, "loss": 2.0521, "step": 107025 }, { "epoch": 7.272047832585949, "grad_norm": 3.759462356567383, "learning_rate": 9.130826199211851e-06, "loss": 1.7503, "step": 107030 }, { "epoch": 7.272387552656611, "grad_norm": 3.568542957305908, "learning_rate": 9.126579698328577e-06, "loss": 1.7491, "step": 107035 }, { "epoch": 7.2727272727272725, "grad_norm": 4.012182235717773, "learning_rate": 9.122333197445305e-06, "loss": 1.975, "step": 107040 }, { "epoch": 7.273066992797935, "grad_norm": 3.689552068710327, "learning_rate": 9.118086696562033e-06, "loss": 2.1692, "step": 107045 }, { "epoch": 7.273406712868597, "grad_norm": 3.5680789947509766, "learning_rate": 9.11384019567876e-06, "loss": 1.8253, "step": 107050 }, { "epoch": 7.273746432939258, "grad_norm": 3.4571847915649414, "learning_rate": 9.10959369479549e-06, "loss": 1.8408, "step": 107055 }, { "epoch": 7.27408615300992, "grad_norm": 4.312124252319336, "learning_rate": 9.105347193912218e-06, "loss": 1.9791, "step": 107060 }, { "epoch": 7.274425873080582, "grad_norm": 3.59015154838562, "learning_rate": 9.101100693028946e-06, "loss": 1.8832, "step": 107065 }, { "epoch": 7.274765593151243, "grad_norm": 3.1984591484069824, "learning_rate": 9.096854192145672e-06, "loss": 1.8496, "step": 107070 }, { "epoch": 7.275105313221905, "grad_norm": 3.6412484645843506, "learning_rate": 9.0926076912624e-06, "loss": 2.0142, "step": 107075 }, { "epoch": 7.275445033292567, "grad_norm": 3.7891478538513184, "learning_rate": 9.088361190379128e-06, "loss": 2.1594, "step": 107080 }, { "epoch": 7.2757847533632285, "grad_norm": 3.2033324241638184, "learning_rate": 9.084114689495856e-06, "loss": 2.0228, "step": 107085 }, { "epoch": 7.276124473433891, "grad_norm": 3.891680955886841, "learning_rate": 9.079868188612584e-06, "loss": 2.0293, "step": 107090 }, { "epoch": 7.276464193504553, "grad_norm": 3.180312156677246, "learning_rate": 9.075621687729312e-06, "loss": 2.0514, "step": 107095 }, { "epoch": 7.276803913575214, "grad_norm": 3.999119758605957, "learning_rate": 9.071375186846038e-06, "loss": 1.9671, "step": 107100 }, { "epoch": 7.277143633645876, "grad_norm": 6.024993419647217, "learning_rate": 9.067128685962766e-06, "loss": 2.2476, "step": 107105 }, { "epoch": 7.277483353716538, "grad_norm": 3.0397627353668213, "learning_rate": 9.062882185079496e-06, "loss": 2.0879, "step": 107110 }, { "epoch": 7.277823073787199, "grad_norm": 3.169381618499756, "learning_rate": 9.058635684196224e-06, "loss": 2.3404, "step": 107115 }, { "epoch": 7.278162793857861, "grad_norm": 3.450709342956543, "learning_rate": 9.05438918331295e-06, "loss": 2.0327, "step": 107120 }, { "epoch": 7.278502513928522, "grad_norm": 3.870213270187378, "learning_rate": 9.050142682429678e-06, "loss": 2.1376, "step": 107125 }, { "epoch": 7.2788422339991845, "grad_norm": 4.055992603302002, "learning_rate": 9.045896181546406e-06, "loss": 2.0315, "step": 107130 }, { "epoch": 7.279181954069847, "grad_norm": 3.8956246376037598, "learning_rate": 9.041649680663134e-06, "loss": 1.7838, "step": 107135 }, { "epoch": 7.279521674140508, "grad_norm": 6.477571964263916, "learning_rate": 9.037403179779862e-06, "loss": 1.793, "step": 107140 }, { "epoch": 7.27986139421117, "grad_norm": 4.093057155609131, "learning_rate": 9.03315667889659e-06, "loss": 1.9311, "step": 107145 }, { "epoch": 7.280201114281832, "grad_norm": 3.5159993171691895, "learning_rate": 9.028910178013318e-06, "loss": 1.9796, "step": 107150 }, { "epoch": 7.280540834352493, "grad_norm": 3.7625324726104736, "learning_rate": 9.024663677130045e-06, "loss": 2.0057, "step": 107155 }, { "epoch": 7.280880554423155, "grad_norm": 3.2644028663635254, "learning_rate": 9.020417176246773e-06, "loss": 1.839, "step": 107160 }, { "epoch": 7.281220274493817, "grad_norm": 3.292293071746826, "learning_rate": 9.0161706753635e-06, "loss": 1.8332, "step": 107165 }, { "epoch": 7.2815599945644784, "grad_norm": 4.696841239929199, "learning_rate": 9.011924174480229e-06, "loss": 1.8632, "step": 107170 }, { "epoch": 7.2818997146351405, "grad_norm": 3.4697577953338623, "learning_rate": 9.007677673596957e-06, "loss": 1.9771, "step": 107175 }, { "epoch": 7.282239434705803, "grad_norm": 4.482059955596924, "learning_rate": 9.003431172713685e-06, "loss": 1.9612, "step": 107180 }, { "epoch": 7.282579154776464, "grad_norm": 3.1510226726531982, "learning_rate": 8.999184671830411e-06, "loss": 1.9922, "step": 107185 }, { "epoch": 7.282918874847126, "grad_norm": 4.327913761138916, "learning_rate": 8.994938170947139e-06, "loss": 1.9504, "step": 107190 }, { "epoch": 7.283258594917788, "grad_norm": 4.097372531890869, "learning_rate": 8.990691670063869e-06, "loss": 1.828, "step": 107195 }, { "epoch": 7.283598314988449, "grad_norm": 4.103838920593262, "learning_rate": 8.986445169180597e-06, "loss": 1.8738, "step": 107200 }, { "epoch": 7.283938035059111, "grad_norm": 3.7798876762390137, "learning_rate": 8.982198668297323e-06, "loss": 2.0209, "step": 107205 }, { "epoch": 7.284277755129773, "grad_norm": 3.563124656677246, "learning_rate": 8.977952167414051e-06, "loss": 1.9363, "step": 107210 }, { "epoch": 7.2846174752004345, "grad_norm": 3.5704479217529297, "learning_rate": 8.973705666530779e-06, "loss": 1.8145, "step": 107215 }, { "epoch": 7.2849571952710965, "grad_norm": 4.636709690093994, "learning_rate": 8.969459165647507e-06, "loss": 1.8067, "step": 107220 }, { "epoch": 7.285296915341759, "grad_norm": 3.917607069015503, "learning_rate": 8.965212664764235e-06, "loss": 1.8438, "step": 107225 }, { "epoch": 7.28563663541242, "grad_norm": 4.339228630065918, "learning_rate": 8.960966163880963e-06, "loss": 2.265, "step": 107230 }, { "epoch": 7.285976355483082, "grad_norm": 4.004423141479492, "learning_rate": 8.956719662997691e-06, "loss": 1.8001, "step": 107235 }, { "epoch": 7.286316075553744, "grad_norm": 2.9996867179870605, "learning_rate": 8.952473162114417e-06, "loss": 1.8107, "step": 107240 }, { "epoch": 7.286655795624405, "grad_norm": 4.4211578369140625, "learning_rate": 8.948226661231145e-06, "loss": 1.7523, "step": 107245 }, { "epoch": 7.286995515695067, "grad_norm": 5.146877765655518, "learning_rate": 8.943980160347875e-06, "loss": 1.8238, "step": 107250 }, { "epoch": 7.287335235765729, "grad_norm": 3.7782628536224365, "learning_rate": 8.939733659464601e-06, "loss": 1.9242, "step": 107255 }, { "epoch": 7.2876749558363905, "grad_norm": 3.4658830165863037, "learning_rate": 8.93548715858133e-06, "loss": 2.2441, "step": 107260 }, { "epoch": 7.2880146759070525, "grad_norm": 3.679750919342041, "learning_rate": 8.931240657698057e-06, "loss": 1.7138, "step": 107265 }, { "epoch": 7.288354395977715, "grad_norm": 2.9283957481384277, "learning_rate": 8.926994156814784e-06, "loss": 1.9385, "step": 107270 }, { "epoch": 7.288694116048376, "grad_norm": 4.278178691864014, "learning_rate": 8.922747655931513e-06, "loss": 2.1399, "step": 107275 }, { "epoch": 7.289033836119038, "grad_norm": 3.8561558723449707, "learning_rate": 8.918501155048241e-06, "loss": 1.7713, "step": 107280 }, { "epoch": 7.2893735561897, "grad_norm": 4.1898603439331055, "learning_rate": 8.91425465416497e-06, "loss": 1.9642, "step": 107285 }, { "epoch": 7.289713276260361, "grad_norm": 3.1014723777770996, "learning_rate": 8.910008153281696e-06, "loss": 2.1355, "step": 107290 }, { "epoch": 7.290052996331023, "grad_norm": 4.102346420288086, "learning_rate": 8.905761652398424e-06, "loss": 1.9605, "step": 107295 }, { "epoch": 7.290392716401685, "grad_norm": 3.7968084812164307, "learning_rate": 8.901515151515152e-06, "loss": 2.2118, "step": 107300 }, { "epoch": 7.2907324364723465, "grad_norm": 3.9046146869659424, "learning_rate": 8.89726865063188e-06, "loss": 1.9461, "step": 107305 }, { "epoch": 7.2910721565430086, "grad_norm": 4.03298807144165, "learning_rate": 8.893022149748608e-06, "loss": 1.9719, "step": 107310 }, { "epoch": 7.291411876613671, "grad_norm": 3.070147752761841, "learning_rate": 8.888775648865336e-06, "loss": 2.0571, "step": 107315 }, { "epoch": 7.291751596684332, "grad_norm": 4.297054767608643, "learning_rate": 8.884529147982064e-06, "loss": 2.0212, "step": 107320 }, { "epoch": 7.292091316754994, "grad_norm": 4.905977249145508, "learning_rate": 8.88028264709879e-06, "loss": 2.0339, "step": 107325 }, { "epoch": 7.292431036825656, "grad_norm": 3.8322412967681885, "learning_rate": 8.87603614621552e-06, "loss": 2.1436, "step": 107330 }, { "epoch": 7.292770756896317, "grad_norm": 3.7233407497406006, "learning_rate": 8.871789645332248e-06, "loss": 1.9318, "step": 107335 }, { "epoch": 7.293110476966979, "grad_norm": 3.6057424545288086, "learning_rate": 8.867543144448974e-06, "loss": 2.1456, "step": 107340 }, { "epoch": 7.293450197037641, "grad_norm": 3.272082567214966, "learning_rate": 8.863296643565702e-06, "loss": 2.2188, "step": 107345 }, { "epoch": 7.2937899171083025, "grad_norm": 3.61427903175354, "learning_rate": 8.85905014268243e-06, "loss": 2.2229, "step": 107350 }, { "epoch": 7.294129637178965, "grad_norm": 3.423363447189331, "learning_rate": 8.854803641799158e-06, "loss": 1.8045, "step": 107355 }, { "epoch": 7.294469357249627, "grad_norm": 3.3408145904541016, "learning_rate": 8.850557140915886e-06, "loss": 2.0873, "step": 107360 }, { "epoch": 7.294809077320288, "grad_norm": 3.7761027812957764, "learning_rate": 8.846310640032614e-06, "loss": 2.086, "step": 107365 }, { "epoch": 7.29514879739095, "grad_norm": 4.257999897003174, "learning_rate": 8.842064139149342e-06, "loss": 1.7652, "step": 107370 }, { "epoch": 7.295488517461612, "grad_norm": 3.4876532554626465, "learning_rate": 8.837817638266068e-06, "loss": 1.9651, "step": 107375 }, { "epoch": 7.295828237532273, "grad_norm": 4.348387718200684, "learning_rate": 8.833571137382796e-06, "loss": 1.8842, "step": 107380 }, { "epoch": 7.296167957602935, "grad_norm": 4.482327938079834, "learning_rate": 8.829324636499524e-06, "loss": 1.9841, "step": 107385 }, { "epoch": 7.296507677673597, "grad_norm": 4.261732578277588, "learning_rate": 8.825078135616252e-06, "loss": 2.302, "step": 107390 }, { "epoch": 7.2968473977442585, "grad_norm": 3.8357186317443848, "learning_rate": 8.82083163473298e-06, "loss": 1.7834, "step": 107395 }, { "epoch": 7.297187117814921, "grad_norm": 3.9293556213378906, "learning_rate": 8.816585133849708e-06, "loss": 2.2109, "step": 107400 }, { "epoch": 7.297526837885583, "grad_norm": 3.1897377967834473, "learning_rate": 8.812338632966436e-06, "loss": 2.0053, "step": 107405 }, { "epoch": 7.297866557956244, "grad_norm": 4.640474796295166, "learning_rate": 8.808092132083163e-06, "loss": 1.9465, "step": 107410 }, { "epoch": 7.298206278026906, "grad_norm": 3.527750253677368, "learning_rate": 8.803845631199893e-06, "loss": 2.1697, "step": 107415 }, { "epoch": 7.298545998097568, "grad_norm": 4.212676048278809, "learning_rate": 8.79959913031662e-06, "loss": 1.9677, "step": 107420 }, { "epoch": 7.298885718168229, "grad_norm": 4.097541332244873, "learning_rate": 8.795352629433347e-06, "loss": 1.7877, "step": 107425 }, { "epoch": 7.299225438238891, "grad_norm": 3.564480781555176, "learning_rate": 8.791106128550075e-06, "loss": 2.1375, "step": 107430 }, { "epoch": 7.299565158309553, "grad_norm": 4.414603233337402, "learning_rate": 8.786859627666803e-06, "loss": 1.8232, "step": 107435 }, { "epoch": 7.2999048783802145, "grad_norm": 3.320777416229248, "learning_rate": 8.78261312678353e-06, "loss": 1.9075, "step": 107440 }, { "epoch": 7.300244598450877, "grad_norm": 4.119527339935303, "learning_rate": 8.778366625900259e-06, "loss": 1.8983, "step": 107445 }, { "epoch": 7.300584318521539, "grad_norm": 4.20772647857666, "learning_rate": 8.774120125016987e-06, "loss": 2.0439, "step": 107450 }, { "epoch": 7.3009240385922, "grad_norm": 2.978456497192383, "learning_rate": 8.769873624133715e-06, "loss": 1.79, "step": 107455 }, { "epoch": 7.301263758662862, "grad_norm": 3.8905696868896484, "learning_rate": 8.765627123250441e-06, "loss": 1.7285, "step": 107460 }, { "epoch": 7.301603478733524, "grad_norm": 3.338745594024658, "learning_rate": 8.76138062236717e-06, "loss": 2.0332, "step": 107465 }, { "epoch": 7.301943198804185, "grad_norm": 4.248244762420654, "learning_rate": 8.757134121483899e-06, "loss": 2.1958, "step": 107470 }, { "epoch": 7.302282918874847, "grad_norm": 4.402967929840088, "learning_rate": 8.752887620600625e-06, "loss": 1.8725, "step": 107475 }, { "epoch": 7.302622638945509, "grad_norm": 2.93510103225708, "learning_rate": 8.748641119717353e-06, "loss": 2.0711, "step": 107480 }, { "epoch": 7.3029623590161705, "grad_norm": 3.8888752460479736, "learning_rate": 8.744394618834081e-06, "loss": 2.0498, "step": 107485 }, { "epoch": 7.303302079086833, "grad_norm": 3.477889060974121, "learning_rate": 8.74014811795081e-06, "loss": 1.8153, "step": 107490 }, { "epoch": 7.303641799157495, "grad_norm": 4.42900276184082, "learning_rate": 8.735901617067537e-06, "loss": 1.7961, "step": 107495 }, { "epoch": 7.303981519228156, "grad_norm": 4.267581462860107, "learning_rate": 8.731655116184265e-06, "loss": 1.8887, "step": 107500 }, { "epoch": 7.304321239298818, "grad_norm": 3.690150260925293, "learning_rate": 8.727408615300993e-06, "loss": 2.3115, "step": 107505 }, { "epoch": 7.30466095936948, "grad_norm": 4.591681003570557, "learning_rate": 8.72316211441772e-06, "loss": 1.7416, "step": 107510 }, { "epoch": 7.305000679440141, "grad_norm": 4.579998970031738, "learning_rate": 8.718915613534448e-06, "loss": 2.0424, "step": 107515 }, { "epoch": 7.305340399510803, "grad_norm": 3.547029495239258, "learning_rate": 8.714669112651176e-06, "loss": 2.1557, "step": 107520 }, { "epoch": 7.3056801195814645, "grad_norm": 4.24967098236084, "learning_rate": 8.710422611767904e-06, "loss": 1.6882, "step": 107525 }, { "epoch": 7.3060198396521265, "grad_norm": 3.1483993530273438, "learning_rate": 8.706176110884632e-06, "loss": 2.0607, "step": 107530 }, { "epoch": 7.306359559722789, "grad_norm": 4.598372936248779, "learning_rate": 8.70192961000136e-06, "loss": 2.1404, "step": 107535 }, { "epoch": 7.30669927979345, "grad_norm": 4.976344585418701, "learning_rate": 8.697683109118088e-06, "loss": 1.7654, "step": 107540 }, { "epoch": 7.307038999864112, "grad_norm": 4.069079875946045, "learning_rate": 8.693436608234814e-06, "loss": 1.9352, "step": 107545 }, { "epoch": 7.307378719934774, "grad_norm": 4.337785720825195, "learning_rate": 8.689190107351542e-06, "loss": 2.0058, "step": 107550 }, { "epoch": 7.307718440005435, "grad_norm": 4.003718852996826, "learning_rate": 8.684943606468272e-06, "loss": 2.1975, "step": 107555 }, { "epoch": 7.308058160076097, "grad_norm": 3.862095594406128, "learning_rate": 8.680697105584998e-06, "loss": 2.0679, "step": 107560 }, { "epoch": 7.308397880146759, "grad_norm": 3.219447612762451, "learning_rate": 8.676450604701726e-06, "loss": 1.9066, "step": 107565 }, { "epoch": 7.3087376002174205, "grad_norm": 3.9709219932556152, "learning_rate": 8.672204103818454e-06, "loss": 2.3966, "step": 107570 }, { "epoch": 7.3090773202880825, "grad_norm": 3.980708599090576, "learning_rate": 8.667957602935182e-06, "loss": 2.078, "step": 107575 }, { "epoch": 7.309417040358745, "grad_norm": 3.788071870803833, "learning_rate": 8.66371110205191e-06, "loss": 2.0265, "step": 107580 }, { "epoch": 7.309756760429406, "grad_norm": 3.367112159729004, "learning_rate": 8.659464601168638e-06, "loss": 1.9661, "step": 107585 }, { "epoch": 7.310096480500068, "grad_norm": 3.491149663925171, "learning_rate": 8.655218100285366e-06, "loss": 1.8959, "step": 107590 }, { "epoch": 7.31043620057073, "grad_norm": 3.656043767929077, "learning_rate": 8.650971599402092e-06, "loss": 1.8464, "step": 107595 }, { "epoch": 7.310775920641391, "grad_norm": 3.9724514484405518, "learning_rate": 8.64672509851882e-06, "loss": 2.1197, "step": 107600 }, { "epoch": 7.311115640712053, "grad_norm": 3.7325234413146973, "learning_rate": 8.642478597635548e-06, "loss": 1.9741, "step": 107605 }, { "epoch": 7.311455360782715, "grad_norm": 3.7375059127807617, "learning_rate": 8.638232096752276e-06, "loss": 2.1427, "step": 107610 }, { "epoch": 7.3117950808533765, "grad_norm": 3.940479278564453, "learning_rate": 8.633985595869004e-06, "loss": 2.157, "step": 107615 }, { "epoch": 7.312134800924039, "grad_norm": 3.4046971797943115, "learning_rate": 8.629739094985732e-06, "loss": 1.9351, "step": 107620 }, { "epoch": 7.312474520994701, "grad_norm": 3.369886875152588, "learning_rate": 8.62549259410246e-06, "loss": 1.9399, "step": 107625 }, { "epoch": 7.312814241065362, "grad_norm": 3.7428107261657715, "learning_rate": 8.621246093219187e-06, "loss": 2.1148, "step": 107630 }, { "epoch": 7.313153961136024, "grad_norm": 2.994722843170166, "learning_rate": 8.616999592335916e-06, "loss": 1.8152, "step": 107635 }, { "epoch": 7.313493681206686, "grad_norm": 3.9009780883789062, "learning_rate": 8.612753091452644e-06, "loss": 1.8524, "step": 107640 }, { "epoch": 7.313833401277347, "grad_norm": 3.058337688446045, "learning_rate": 8.60850659056937e-06, "loss": 2.046, "step": 107645 }, { "epoch": 7.314173121348009, "grad_norm": 3.719449520111084, "learning_rate": 8.604260089686099e-06, "loss": 1.7484, "step": 107650 }, { "epoch": 7.314512841418671, "grad_norm": 4.309609889984131, "learning_rate": 8.600013588802827e-06, "loss": 1.8293, "step": 107655 }, { "epoch": 7.3148525614893325, "grad_norm": 3.7352635860443115, "learning_rate": 8.595767087919555e-06, "loss": 1.9361, "step": 107660 }, { "epoch": 7.315192281559995, "grad_norm": 4.701915264129639, "learning_rate": 8.591520587036283e-06, "loss": 1.9315, "step": 107665 }, { "epoch": 7.315532001630657, "grad_norm": 3.432927370071411, "learning_rate": 8.58727408615301e-06, "loss": 1.9982, "step": 107670 }, { "epoch": 7.315871721701318, "grad_norm": 3.753012180328369, "learning_rate": 8.583027585269739e-06, "loss": 2.2312, "step": 107675 }, { "epoch": 7.31621144177198, "grad_norm": 3.4821810722351074, "learning_rate": 8.578781084386465e-06, "loss": 2.0084, "step": 107680 }, { "epoch": 7.316551161842642, "grad_norm": 3.206522226333618, "learning_rate": 8.574534583503193e-06, "loss": 1.9959, "step": 107685 }, { "epoch": 7.316890881913303, "grad_norm": 3.68812894821167, "learning_rate": 8.570288082619923e-06, "loss": 1.9448, "step": 107690 }, { "epoch": 7.317230601983965, "grad_norm": 3.3933913707733154, "learning_rate": 8.566041581736649e-06, "loss": 1.8035, "step": 107695 }, { "epoch": 7.317570322054627, "grad_norm": 3.4989538192749023, "learning_rate": 8.561795080853377e-06, "loss": 2.0744, "step": 107700 }, { "epoch": 7.3179100421252885, "grad_norm": 4.792697906494141, "learning_rate": 8.557548579970105e-06, "loss": 2.1068, "step": 107705 }, { "epoch": 7.318249762195951, "grad_norm": 4.302403450012207, "learning_rate": 8.553302079086833e-06, "loss": 1.7858, "step": 107710 }, { "epoch": 7.318589482266613, "grad_norm": 3.9436824321746826, "learning_rate": 8.549055578203561e-06, "loss": 2.0964, "step": 107715 }, { "epoch": 7.318929202337274, "grad_norm": 4.183358192443848, "learning_rate": 8.544809077320289e-06, "loss": 2.2578, "step": 107720 }, { "epoch": 7.319268922407936, "grad_norm": 3.790846824645996, "learning_rate": 8.540562576437017e-06, "loss": 1.7957, "step": 107725 }, { "epoch": 7.319608642478598, "grad_norm": 3.8552846908569336, "learning_rate": 8.536316075553743e-06, "loss": 1.9007, "step": 107730 }, { "epoch": 7.319948362549259, "grad_norm": 4.04565954208374, "learning_rate": 8.532069574670471e-06, "loss": 1.8982, "step": 107735 }, { "epoch": 7.320288082619921, "grad_norm": 4.3637003898620605, "learning_rate": 8.5278230737872e-06, "loss": 2.1574, "step": 107740 }, { "epoch": 7.320627802690583, "grad_norm": 3.390005588531494, "learning_rate": 8.523576572903927e-06, "loss": 1.9602, "step": 107745 }, { "epoch": 7.3209675227612445, "grad_norm": 4.81756067276001, "learning_rate": 8.519330072020655e-06, "loss": 2.0716, "step": 107750 }, { "epoch": 7.321307242831907, "grad_norm": 3.9886014461517334, "learning_rate": 8.515083571137383e-06, "loss": 1.754, "step": 107755 }, { "epoch": 7.321646962902569, "grad_norm": 3.178750991821289, "learning_rate": 8.510837070254111e-06, "loss": 1.9814, "step": 107760 }, { "epoch": 7.32198668297323, "grad_norm": 3.106239080429077, "learning_rate": 8.506590569370838e-06, "loss": 1.9302, "step": 107765 }, { "epoch": 7.322326403043892, "grad_norm": 4.153517723083496, "learning_rate": 8.502344068487566e-06, "loss": 1.9715, "step": 107770 }, { "epoch": 7.322666123114554, "grad_norm": 3.593385696411133, "learning_rate": 8.498097567604296e-06, "loss": 1.8787, "step": 107775 }, { "epoch": 7.323005843185215, "grad_norm": 3.513530969619751, "learning_rate": 8.493851066721022e-06, "loss": 1.9112, "step": 107780 }, { "epoch": 7.323345563255877, "grad_norm": 3.434194803237915, "learning_rate": 8.48960456583775e-06, "loss": 2.1136, "step": 107785 }, { "epoch": 7.323685283326539, "grad_norm": 3.1914901733398438, "learning_rate": 8.485358064954478e-06, "loss": 1.8229, "step": 107790 }, { "epoch": 7.3240250033972005, "grad_norm": 3.317349672317505, "learning_rate": 8.48196086424786e-06, "loss": 2.0408, "step": 107795 }, { "epoch": 7.324364723467863, "grad_norm": 3.8791096210479736, "learning_rate": 8.477714363364588e-06, "loss": 1.9606, "step": 107800 }, { "epoch": 7.324704443538524, "grad_norm": 3.657238483428955, "learning_rate": 8.473467862481316e-06, "loss": 1.8848, "step": 107805 }, { "epoch": 7.325044163609186, "grad_norm": 3.411850690841675, "learning_rate": 8.469221361598044e-06, "loss": 2.1537, "step": 107810 }, { "epoch": 7.325383883679848, "grad_norm": 3.614347457885742, "learning_rate": 8.46497486071477e-06, "loss": 1.8947, "step": 107815 }, { "epoch": 7.325723603750509, "grad_norm": 3.2565815448760986, "learning_rate": 8.460728359831499e-06, "loss": 1.933, "step": 107820 }, { "epoch": 7.326063323821171, "grad_norm": 3.576227903366089, "learning_rate": 8.456481858948227e-06, "loss": 1.7543, "step": 107825 }, { "epoch": 7.326403043891833, "grad_norm": 4.084103584289551, "learning_rate": 8.452235358064956e-06, "loss": 2.1383, "step": 107830 }, { "epoch": 7.3267427639624945, "grad_norm": 3.2813186645507812, "learning_rate": 8.447988857181683e-06, "loss": 1.9359, "step": 107835 }, { "epoch": 7.3270824840331565, "grad_norm": 3.408357620239258, "learning_rate": 8.44374235629841e-06, "loss": 2.0361, "step": 107840 }, { "epoch": 7.327422204103819, "grad_norm": 4.064028739929199, "learning_rate": 8.439495855415139e-06, "loss": 1.9058, "step": 107845 }, { "epoch": 7.32776192417448, "grad_norm": 3.836721658706665, "learning_rate": 8.435249354531865e-06, "loss": 1.7983, "step": 107850 }, { "epoch": 7.328101644245142, "grad_norm": 4.165620803833008, "learning_rate": 8.431002853648595e-06, "loss": 2.1755, "step": 107855 }, { "epoch": 7.328441364315804, "grad_norm": 2.927701234817505, "learning_rate": 8.426756352765323e-06, "loss": 1.9331, "step": 107860 }, { "epoch": 7.328781084386465, "grad_norm": 3.729032278060913, "learning_rate": 8.42250985188205e-06, "loss": 1.9744, "step": 107865 }, { "epoch": 7.329120804457127, "grad_norm": 4.399544715881348, "learning_rate": 8.418263350998777e-06, "loss": 2.0665, "step": 107870 }, { "epoch": 7.329460524527789, "grad_norm": 3.4513227939605713, "learning_rate": 8.414016850115505e-06, "loss": 2.0319, "step": 107875 }, { "epoch": 7.3298002445984505, "grad_norm": 4.375942230224609, "learning_rate": 8.409770349232233e-06, "loss": 2.0548, "step": 107880 }, { "epoch": 7.3301399646691126, "grad_norm": 4.136377334594727, "learning_rate": 8.405523848348961e-06, "loss": 1.8718, "step": 107885 }, { "epoch": 7.330479684739775, "grad_norm": 2.939359426498413, "learning_rate": 8.401277347465689e-06, "loss": 2.0448, "step": 107890 }, { "epoch": 7.330819404810436, "grad_norm": 3.918463706970215, "learning_rate": 8.397030846582417e-06, "loss": 1.9098, "step": 107895 }, { "epoch": 7.331159124881098, "grad_norm": 4.425055503845215, "learning_rate": 8.392784345699143e-06, "loss": 2.2023, "step": 107900 }, { "epoch": 7.33149884495176, "grad_norm": 3.343667507171631, "learning_rate": 8.388537844815871e-06, "loss": 1.9835, "step": 107905 }, { "epoch": 7.331838565022421, "grad_norm": 3.4596407413482666, "learning_rate": 8.3842913439326e-06, "loss": 2.0203, "step": 107910 }, { "epoch": 7.332178285093083, "grad_norm": 3.657700300216675, "learning_rate": 8.380044843049329e-06, "loss": 2.143, "step": 107915 }, { "epoch": 7.332518005163745, "grad_norm": 4.043788433074951, "learning_rate": 8.375798342166055e-06, "loss": 1.8849, "step": 107920 }, { "epoch": 7.3328577252344065, "grad_norm": 3.226944923400879, "learning_rate": 8.371551841282783e-06, "loss": 1.8285, "step": 107925 }, { "epoch": 7.333197445305069, "grad_norm": 3.9321634769439697, "learning_rate": 8.367305340399511e-06, "loss": 1.9278, "step": 107930 }, { "epoch": 7.333537165375731, "grad_norm": 4.075100421905518, "learning_rate": 8.363058839516238e-06, "loss": 2.1888, "step": 107935 }, { "epoch": 7.333876885446392, "grad_norm": 4.408535480499268, "learning_rate": 8.358812338632967e-06, "loss": 2.1635, "step": 107940 }, { "epoch": 7.334216605517054, "grad_norm": 3.89327335357666, "learning_rate": 8.354565837749695e-06, "loss": 1.9008, "step": 107945 }, { "epoch": 7.334556325587716, "grad_norm": 3.5271923542022705, "learning_rate": 8.350319336866423e-06, "loss": 1.9424, "step": 107950 }, { "epoch": 7.334896045658377, "grad_norm": 3.280158519744873, "learning_rate": 8.34607283598315e-06, "loss": 1.9263, "step": 107955 }, { "epoch": 7.335235765729039, "grad_norm": 3.8131580352783203, "learning_rate": 8.341826335099878e-06, "loss": 2.0558, "step": 107960 }, { "epoch": 7.335575485799701, "grad_norm": 3.6630544662475586, "learning_rate": 8.337579834216606e-06, "loss": 1.8866, "step": 107965 }, { "epoch": 7.3359152058703625, "grad_norm": 3.5129668712615967, "learning_rate": 8.333333333333334e-06, "loss": 1.7276, "step": 107970 }, { "epoch": 7.336254925941025, "grad_norm": 3.6088149547576904, "learning_rate": 8.329086832450062e-06, "loss": 1.8716, "step": 107975 }, { "epoch": 7.336594646011687, "grad_norm": 4.331415176391602, "learning_rate": 8.32484033156679e-06, "loss": 2.0039, "step": 107980 }, { "epoch": 7.336934366082348, "grad_norm": 3.5146894454956055, "learning_rate": 8.320593830683516e-06, "loss": 2.2744, "step": 107985 }, { "epoch": 7.33727408615301, "grad_norm": 3.5190603733062744, "learning_rate": 8.316347329800244e-06, "loss": 1.6954, "step": 107990 }, { "epoch": 7.337613806223672, "grad_norm": 4.589356899261475, "learning_rate": 8.312100828916974e-06, "loss": 2.099, "step": 107995 }, { "epoch": 7.337953526294333, "grad_norm": 3.1995677947998047, "learning_rate": 8.307854328033702e-06, "loss": 2.0663, "step": 108000 }, { "epoch": 7.338293246364995, "grad_norm": 4.634527206420898, "learning_rate": 8.303607827150428e-06, "loss": 2.0333, "step": 108005 }, { "epoch": 7.338632966435657, "grad_norm": 3.568225383758545, "learning_rate": 8.299361326267156e-06, "loss": 1.9454, "step": 108010 }, { "epoch": 7.3389726865063185, "grad_norm": 2.716116428375244, "learning_rate": 8.295114825383884e-06, "loss": 1.9717, "step": 108015 }, { "epoch": 7.339312406576981, "grad_norm": 3.933387279510498, "learning_rate": 8.290868324500612e-06, "loss": 2.038, "step": 108020 }, { "epoch": 7.339652126647643, "grad_norm": 3.495469093322754, "learning_rate": 8.28662182361734e-06, "loss": 2.1347, "step": 108025 }, { "epoch": 7.339991846718304, "grad_norm": 3.4194867610931396, "learning_rate": 8.282375322734068e-06, "loss": 2.0919, "step": 108030 }, { "epoch": 7.340331566788966, "grad_norm": 4.081384658813477, "learning_rate": 8.278128821850796e-06, "loss": 1.8662, "step": 108035 }, { "epoch": 7.340671286859628, "grad_norm": 3.297502040863037, "learning_rate": 8.273882320967522e-06, "loss": 1.9556, "step": 108040 }, { "epoch": 7.341011006930289, "grad_norm": 4.6006083488464355, "learning_rate": 8.26963582008425e-06, "loss": 2.2478, "step": 108045 }, { "epoch": 7.341350727000951, "grad_norm": 3.842127799987793, "learning_rate": 8.265389319200978e-06, "loss": 1.8162, "step": 108050 }, { "epoch": 7.341690447071613, "grad_norm": 3.891976833343506, "learning_rate": 8.261142818317706e-06, "loss": 1.8758, "step": 108055 }, { "epoch": 7.3420301671422745, "grad_norm": 3.2268259525299072, "learning_rate": 8.256896317434434e-06, "loss": 2.0612, "step": 108060 }, { "epoch": 7.342369887212937, "grad_norm": 3.350174903869629, "learning_rate": 8.252649816551162e-06, "loss": 1.9864, "step": 108065 }, { "epoch": 7.342709607283599, "grad_norm": 3.2861826419830322, "learning_rate": 8.248403315667889e-06, "loss": 2.2857, "step": 108070 }, { "epoch": 7.34304932735426, "grad_norm": 3.0937981605529785, "learning_rate": 8.244156814784617e-06, "loss": 1.7701, "step": 108075 }, { "epoch": 7.343389047424922, "grad_norm": 3.645698070526123, "learning_rate": 8.239910313901347e-06, "loss": 2.081, "step": 108080 }, { "epoch": 7.343728767495584, "grad_norm": 3.067629098892212, "learning_rate": 8.235663813018075e-06, "loss": 2.0727, "step": 108085 }, { "epoch": 7.344068487566245, "grad_norm": 3.800180673599243, "learning_rate": 8.2314173121348e-06, "loss": 2.053, "step": 108090 }, { "epoch": 7.344408207636907, "grad_norm": 3.615100622177124, "learning_rate": 8.227170811251529e-06, "loss": 1.8611, "step": 108095 }, { "epoch": 7.344747927707569, "grad_norm": 3.132997989654541, "learning_rate": 8.222924310368257e-06, "loss": 1.8729, "step": 108100 }, { "epoch": 7.3450876477782305, "grad_norm": 3.0648138523101807, "learning_rate": 8.218677809484985e-06, "loss": 1.9135, "step": 108105 }, { "epoch": 7.345427367848893, "grad_norm": 3.0922319889068604, "learning_rate": 8.214431308601713e-06, "loss": 2.181, "step": 108110 }, { "epoch": 7.345767087919555, "grad_norm": 3.2803385257720947, "learning_rate": 8.210184807718441e-06, "loss": 1.7243, "step": 108115 }, { "epoch": 7.346106807990216, "grad_norm": 4.347062110900879, "learning_rate": 8.205938306835169e-06, "loss": 1.6399, "step": 108120 }, { "epoch": 7.346446528060878, "grad_norm": 4.098795413970947, "learning_rate": 8.201691805951895e-06, "loss": 1.9919, "step": 108125 }, { "epoch": 7.34678624813154, "grad_norm": 3.7487168312072754, "learning_rate": 8.197445305068623e-06, "loss": 1.7247, "step": 108130 }, { "epoch": 7.347125968202201, "grad_norm": 3.434608221054077, "learning_rate": 8.193198804185353e-06, "loss": 1.7041, "step": 108135 }, { "epoch": 7.347465688272863, "grad_norm": 3.573864459991455, "learning_rate": 8.18895230330208e-06, "loss": 1.9835, "step": 108140 }, { "epoch": 7.347805408343525, "grad_norm": 4.0943603515625, "learning_rate": 8.184705802418807e-06, "loss": 1.8334, "step": 108145 }, { "epoch": 7.3481451284141865, "grad_norm": 4.561894416809082, "learning_rate": 8.180459301535535e-06, "loss": 2.0518, "step": 108150 }, { "epoch": 7.348484848484849, "grad_norm": 4.195528507232666, "learning_rate": 8.176212800652262e-06, "loss": 1.9886, "step": 108155 }, { "epoch": 7.348824568555511, "grad_norm": 3.4955451488494873, "learning_rate": 8.171966299768991e-06, "loss": 2.2571, "step": 108160 }, { "epoch": 7.349164288626172, "grad_norm": 4.3190484046936035, "learning_rate": 8.16771979888572e-06, "loss": 1.9534, "step": 108165 }, { "epoch": 7.349504008696834, "grad_norm": 4.374432563781738, "learning_rate": 8.163473298002447e-06, "loss": 1.952, "step": 108170 }, { "epoch": 7.349843728767496, "grad_norm": 3.8987903594970703, "learning_rate": 8.159226797119174e-06, "loss": 2.0825, "step": 108175 }, { "epoch": 7.350183448838157, "grad_norm": 3.657907485961914, "learning_rate": 8.154980296235902e-06, "loss": 2.1683, "step": 108180 }, { "epoch": 7.350523168908819, "grad_norm": 3.0364410877227783, "learning_rate": 8.15073379535263e-06, "loss": 1.8619, "step": 108185 }, { "epoch": 7.350862888979481, "grad_norm": 3.4450509548187256, "learning_rate": 8.146487294469358e-06, "loss": 1.9907, "step": 108190 }, { "epoch": 7.3512026090501426, "grad_norm": 4.226062297821045, "learning_rate": 8.142240793586086e-06, "loss": 1.9834, "step": 108195 }, { "epoch": 7.351542329120805, "grad_norm": 3.803090810775757, "learning_rate": 8.137994292702814e-06, "loss": 1.8459, "step": 108200 }, { "epoch": 7.351882049191466, "grad_norm": 4.321966648101807, "learning_rate": 8.133747791819542e-06, "loss": 1.8416, "step": 108205 }, { "epoch": 7.352221769262128, "grad_norm": 3.128488063812256, "learning_rate": 8.129501290936268e-06, "loss": 1.9248, "step": 108210 }, { "epoch": 7.35256148933279, "grad_norm": 3.6031713485717773, "learning_rate": 8.125254790052998e-06, "loss": 2.2173, "step": 108215 }, { "epoch": 7.352901209403451, "grad_norm": 3.5992348194122314, "learning_rate": 8.121008289169726e-06, "loss": 2.1243, "step": 108220 }, { "epoch": 7.353240929474113, "grad_norm": 3.4338746070861816, "learning_rate": 8.116761788286452e-06, "loss": 2.1348, "step": 108225 }, { "epoch": 7.353580649544775, "grad_norm": 3.4624032974243164, "learning_rate": 8.11251528740318e-06, "loss": 2.0522, "step": 108230 }, { "epoch": 7.3539203696154365, "grad_norm": 4.594496250152588, "learning_rate": 8.108268786519908e-06, "loss": 1.9227, "step": 108235 }, { "epoch": 7.354260089686099, "grad_norm": 3.093129873275757, "learning_rate": 8.104022285636636e-06, "loss": 2.087, "step": 108240 }, { "epoch": 7.354599809756761, "grad_norm": 4.0973100662231445, "learning_rate": 8.099775784753364e-06, "loss": 2.0672, "step": 108245 }, { "epoch": 7.354939529827422, "grad_norm": 3.4946541786193848, "learning_rate": 8.095529283870092e-06, "loss": 1.9776, "step": 108250 }, { "epoch": 7.355279249898084, "grad_norm": 3.7813398838043213, "learning_rate": 8.09128278298682e-06, "loss": 1.9873, "step": 108255 }, { "epoch": 7.355618969968746, "grad_norm": 3.353151321411133, "learning_rate": 8.087036282103546e-06, "loss": 2.1284, "step": 108260 }, { "epoch": 7.355958690039407, "grad_norm": 3.6387383937835693, "learning_rate": 8.082789781220274e-06, "loss": 2.1115, "step": 108265 }, { "epoch": 7.356298410110069, "grad_norm": 3.1859703063964844, "learning_rate": 8.078543280337002e-06, "loss": 2.0257, "step": 108270 }, { "epoch": 7.356638130180731, "grad_norm": 3.5120749473571777, "learning_rate": 8.07429677945373e-06, "loss": 2.2434, "step": 108275 }, { "epoch": 7.3569778502513925, "grad_norm": 3.642335891723633, "learning_rate": 8.070050278570458e-06, "loss": 2.0457, "step": 108280 }, { "epoch": 7.357317570322055, "grad_norm": 3.4349493980407715, "learning_rate": 8.065803777687186e-06, "loss": 2.239, "step": 108285 }, { "epoch": 7.357657290392717, "grad_norm": 3.772273063659668, "learning_rate": 8.061557276803914e-06, "loss": 1.7525, "step": 108290 }, { "epoch": 7.357997010463378, "grad_norm": 3.8245158195495605, "learning_rate": 8.05731077592064e-06, "loss": 1.9868, "step": 108295 }, { "epoch": 7.35833673053404, "grad_norm": 3.8845183849334717, "learning_rate": 8.05306427503737e-06, "loss": 1.8394, "step": 108300 }, { "epoch": 7.358676450604702, "grad_norm": 4.090127944946289, "learning_rate": 8.048817774154098e-06, "loss": 2.1172, "step": 108305 }, { "epoch": 7.359016170675363, "grad_norm": 3.3722922801971436, "learning_rate": 8.044571273270825e-06, "loss": 1.864, "step": 108310 }, { "epoch": 7.359355890746025, "grad_norm": 3.167891025543213, "learning_rate": 8.040324772387553e-06, "loss": 1.9969, "step": 108315 }, { "epoch": 7.359695610816687, "grad_norm": 3.4568984508514404, "learning_rate": 8.03607827150428e-06, "loss": 2.0186, "step": 108320 }, { "epoch": 7.3600353308873485, "grad_norm": 3.860175371170044, "learning_rate": 8.031831770621009e-06, "loss": 2.1669, "step": 108325 }, { "epoch": 7.360375050958011, "grad_norm": 3.1542022228240967, "learning_rate": 8.027585269737737e-06, "loss": 2.0722, "step": 108330 }, { "epoch": 7.360714771028673, "grad_norm": 2.8076398372650146, "learning_rate": 8.023338768854465e-06, "loss": 2.2078, "step": 108335 }, { "epoch": 7.361054491099334, "grad_norm": 4.987608909606934, "learning_rate": 8.019092267971193e-06, "loss": 1.9039, "step": 108340 }, { "epoch": 7.361394211169996, "grad_norm": 3.7819015979766846, "learning_rate": 8.014845767087919e-06, "loss": 1.75, "step": 108345 }, { "epoch": 7.361733931240658, "grad_norm": 4.462855339050293, "learning_rate": 8.010599266204647e-06, "loss": 1.7857, "step": 108350 }, { "epoch": 7.362073651311319, "grad_norm": 3.7479770183563232, "learning_rate": 8.006352765321377e-06, "loss": 1.7707, "step": 108355 }, { "epoch": 7.362413371381981, "grad_norm": 3.7093050479888916, "learning_rate": 8.002106264438103e-06, "loss": 2.0177, "step": 108360 }, { "epoch": 7.362753091452643, "grad_norm": 3.622657299041748, "learning_rate": 7.997859763554831e-06, "loss": 2.1296, "step": 108365 }, { "epoch": 7.3630928115233045, "grad_norm": 3.7103044986724854, "learning_rate": 7.993613262671559e-06, "loss": 2.129, "step": 108370 }, { "epoch": 7.363432531593967, "grad_norm": 4.38754415512085, "learning_rate": 7.989366761788287e-06, "loss": 2.0517, "step": 108375 }, { "epoch": 7.363772251664629, "grad_norm": 4.313390254974365, "learning_rate": 7.985120260905015e-06, "loss": 2.2482, "step": 108380 }, { "epoch": 7.36411197173529, "grad_norm": 4.228766441345215, "learning_rate": 7.980873760021743e-06, "loss": 2.1703, "step": 108385 }, { "epoch": 7.364451691805952, "grad_norm": 3.919673204421997, "learning_rate": 7.976627259138471e-06, "loss": 1.6343, "step": 108390 }, { "epoch": 7.364791411876614, "grad_norm": 3.887443780899048, "learning_rate": 7.972380758255197e-06, "loss": 1.9004, "step": 108395 }, { "epoch": 7.365131131947275, "grad_norm": 4.205299377441406, "learning_rate": 7.968134257371925e-06, "loss": 2.0127, "step": 108400 }, { "epoch": 7.365470852017937, "grad_norm": 3.7687973976135254, "learning_rate": 7.963887756488653e-06, "loss": 1.7348, "step": 108405 }, { "epoch": 7.365810572088599, "grad_norm": 4.063805103302002, "learning_rate": 7.959641255605381e-06, "loss": 1.7939, "step": 108410 }, { "epoch": 7.3661502921592605, "grad_norm": 4.019140243530273, "learning_rate": 7.95539475472211e-06, "loss": 1.9091, "step": 108415 }, { "epoch": 7.366490012229923, "grad_norm": 4.084749221801758, "learning_rate": 7.951148253838837e-06, "loss": 1.8787, "step": 108420 }, { "epoch": 7.366829732300585, "grad_norm": 3.894169807434082, "learning_rate": 7.946901752955565e-06, "loss": 2.2089, "step": 108425 }, { "epoch": 7.367169452371246, "grad_norm": 3.6223878860473633, "learning_rate": 7.942655252072292e-06, "loss": 1.721, "step": 108430 }, { "epoch": 7.367509172441908, "grad_norm": 4.501817226409912, "learning_rate": 7.938408751189022e-06, "loss": 2.1297, "step": 108435 }, { "epoch": 7.36784889251257, "grad_norm": 3.8472695350646973, "learning_rate": 7.93416225030575e-06, "loss": 2.1612, "step": 108440 }, { "epoch": 7.368188612583231, "grad_norm": 4.003784656524658, "learning_rate": 7.929915749422476e-06, "loss": 1.9509, "step": 108445 }, { "epoch": 7.368528332653893, "grad_norm": 4.092966556549072, "learning_rate": 7.925669248539204e-06, "loss": 2.121, "step": 108450 }, { "epoch": 7.368868052724555, "grad_norm": 4.6663994789123535, "learning_rate": 7.921422747655932e-06, "loss": 2.0612, "step": 108455 }, { "epoch": 7.3692077727952165, "grad_norm": 3.8680312633514404, "learning_rate": 7.91717624677266e-06, "loss": 1.9238, "step": 108460 }, { "epoch": 7.369547492865879, "grad_norm": 3.32527494430542, "learning_rate": 7.912929745889388e-06, "loss": 1.89, "step": 108465 }, { "epoch": 7.369887212936541, "grad_norm": 2.897172689437866, "learning_rate": 7.908683245006116e-06, "loss": 2.0264, "step": 108470 }, { "epoch": 7.370226933007202, "grad_norm": 3.051098585128784, "learning_rate": 7.904436744122844e-06, "loss": 1.9338, "step": 108475 }, { "epoch": 7.370566653077864, "grad_norm": 3.2943294048309326, "learning_rate": 7.90019024323957e-06, "loss": 2.0017, "step": 108480 }, { "epoch": 7.370906373148525, "grad_norm": 3.751723527908325, "learning_rate": 7.895943742356298e-06, "loss": 2.1961, "step": 108485 }, { "epoch": 7.371246093219187, "grad_norm": 4.002823352813721, "learning_rate": 7.891697241473026e-06, "loss": 1.7164, "step": 108490 }, { "epoch": 7.371585813289849, "grad_norm": 4.364012241363525, "learning_rate": 7.887450740589754e-06, "loss": 1.9507, "step": 108495 }, { "epoch": 7.3719255333605105, "grad_norm": 4.347060203552246, "learning_rate": 7.883204239706482e-06, "loss": 1.9882, "step": 108500 }, { "epoch": 7.372265253431173, "grad_norm": 2.8257129192352295, "learning_rate": 7.87895773882321e-06, "loss": 2.0831, "step": 108505 }, { "epoch": 7.372604973501835, "grad_norm": 3.7089762687683105, "learning_rate": 7.874711237939938e-06, "loss": 2.071, "step": 108510 }, { "epoch": 7.372944693572496, "grad_norm": 3.3227431774139404, "learning_rate": 7.870464737056665e-06, "loss": 2.2552, "step": 108515 }, { "epoch": 7.373284413643158, "grad_norm": 3.181088924407959, "learning_rate": 7.866218236173394e-06, "loss": 1.8808, "step": 108520 }, { "epoch": 7.37362413371382, "grad_norm": 4.2063398361206055, "learning_rate": 7.861971735290122e-06, "loss": 1.9808, "step": 108525 }, { "epoch": 7.373963853784481, "grad_norm": 4.342154026031494, "learning_rate": 7.857725234406849e-06, "loss": 1.8086, "step": 108530 }, { "epoch": 7.374303573855143, "grad_norm": 3.5439836978912354, "learning_rate": 7.853478733523577e-06, "loss": 1.8193, "step": 108535 }, { "epoch": 7.374643293925805, "grad_norm": 3.3856418132781982, "learning_rate": 7.849232232640305e-06, "loss": 1.8925, "step": 108540 }, { "epoch": 7.3749830139964665, "grad_norm": 3.1835412979125977, "learning_rate": 7.844985731757033e-06, "loss": 2.0094, "step": 108545 }, { "epoch": 7.375322734067129, "grad_norm": 3.5836825370788574, "learning_rate": 7.84073923087376e-06, "loss": 1.7566, "step": 108550 }, { "epoch": 7.375662454137791, "grad_norm": 3.6567723751068115, "learning_rate": 7.836492729990489e-06, "loss": 2.0347, "step": 108555 }, { "epoch": 7.376002174208452, "grad_norm": 4.699263095855713, "learning_rate": 7.832246229107217e-06, "loss": 2.0947, "step": 108560 }, { "epoch": 7.376341894279114, "grad_norm": 3.70495867729187, "learning_rate": 7.827999728223943e-06, "loss": 2.0034, "step": 108565 }, { "epoch": 7.376681614349776, "grad_norm": 3.834897756576538, "learning_rate": 7.823753227340671e-06, "loss": 2.1537, "step": 108570 }, { "epoch": 7.377021334420437, "grad_norm": 4.757853984832764, "learning_rate": 7.8195067264574e-06, "loss": 2.1111, "step": 108575 }, { "epoch": 7.377361054491099, "grad_norm": 4.539349555969238, "learning_rate": 7.815260225574127e-06, "loss": 1.8665, "step": 108580 }, { "epoch": 7.377700774561761, "grad_norm": 3.6888484954833984, "learning_rate": 7.811013724690855e-06, "loss": 1.8653, "step": 108585 }, { "epoch": 7.3780404946324225, "grad_norm": 4.537450313568115, "learning_rate": 7.806767223807583e-06, "loss": 2.0901, "step": 108590 }, { "epoch": 7.378380214703085, "grad_norm": 3.975416421890259, "learning_rate": 7.802520722924311e-06, "loss": 2.283, "step": 108595 }, { "epoch": 7.378719934773747, "grad_norm": 3.4092676639556885, "learning_rate": 7.798274222041039e-06, "loss": 1.7315, "step": 108600 }, { "epoch": 7.379059654844408, "grad_norm": 3.790764093399048, "learning_rate": 7.794027721157767e-06, "loss": 2.0028, "step": 108605 }, { "epoch": 7.37939937491507, "grad_norm": 2.981391191482544, "learning_rate": 7.789781220274495e-06, "loss": 1.8989, "step": 108610 }, { "epoch": 7.379739094985732, "grad_norm": 3.4801149368286133, "learning_rate": 7.785534719391221e-06, "loss": 2.2395, "step": 108615 }, { "epoch": 7.380078815056393, "grad_norm": 3.506376266479492, "learning_rate": 7.78128821850795e-06, "loss": 1.7894, "step": 108620 }, { "epoch": 7.380418535127055, "grad_norm": 3.6347508430480957, "learning_rate": 7.777041717624677e-06, "loss": 2.0359, "step": 108625 }, { "epoch": 7.380758255197717, "grad_norm": 3.4036309719085693, "learning_rate": 7.772795216741405e-06, "loss": 1.9434, "step": 108630 }, { "epoch": 7.3810979752683785, "grad_norm": 3.2208149433135986, "learning_rate": 7.768548715858133e-06, "loss": 1.9415, "step": 108635 }, { "epoch": 7.381437695339041, "grad_norm": 3.8033199310302734, "learning_rate": 7.764302214974861e-06, "loss": 2.0148, "step": 108640 }, { "epoch": 7.381777415409703, "grad_norm": 3.946096420288086, "learning_rate": 7.76005571409159e-06, "loss": 1.9974, "step": 108645 }, { "epoch": 7.382117135480364, "grad_norm": 3.0901730060577393, "learning_rate": 7.755809213208316e-06, "loss": 2.044, "step": 108650 }, { "epoch": 7.382456855551026, "grad_norm": 3.7517285346984863, "learning_rate": 7.751562712325044e-06, "loss": 1.9355, "step": 108655 }, { "epoch": 7.382796575621688, "grad_norm": 3.832878351211548, "learning_rate": 7.747316211441773e-06, "loss": 2.1144, "step": 108660 }, { "epoch": 7.383136295692349, "grad_norm": 3.9482204914093018, "learning_rate": 7.7430697105585e-06, "loss": 1.6109, "step": 108665 }, { "epoch": 7.383476015763011, "grad_norm": 3.45439076423645, "learning_rate": 7.738823209675228e-06, "loss": 1.9452, "step": 108670 }, { "epoch": 7.383815735833673, "grad_norm": 3.676348924636841, "learning_rate": 7.734576708791956e-06, "loss": 1.9804, "step": 108675 }, { "epoch": 7.3841554559043345, "grad_norm": 4.363195896148682, "learning_rate": 7.730330207908684e-06, "loss": 1.9297, "step": 108680 }, { "epoch": 7.384495175974997, "grad_norm": 3.136380672454834, "learning_rate": 7.726083707025412e-06, "loss": 2.1998, "step": 108685 }, { "epoch": 7.384834896045659, "grad_norm": 3.509178400039673, "learning_rate": 7.72183720614214e-06, "loss": 2.0303, "step": 108690 }, { "epoch": 7.38517461611632, "grad_norm": 4.589295387268066, "learning_rate": 7.717590705258868e-06, "loss": 1.8566, "step": 108695 }, { "epoch": 7.385514336186982, "grad_norm": 3.3637139797210693, "learning_rate": 7.713344204375594e-06, "loss": 1.9954, "step": 108700 }, { "epoch": 7.385854056257644, "grad_norm": 4.8925042152404785, "learning_rate": 7.709097703492322e-06, "loss": 1.9631, "step": 108705 }, { "epoch": 7.386193776328305, "grad_norm": 3.58198618888855, "learning_rate": 7.70485120260905e-06, "loss": 1.9892, "step": 108710 }, { "epoch": 7.386533496398967, "grad_norm": 4.073258876800537, "learning_rate": 7.700604701725778e-06, "loss": 1.9559, "step": 108715 }, { "epoch": 7.386873216469629, "grad_norm": 4.002535820007324, "learning_rate": 7.696358200842506e-06, "loss": 2.2578, "step": 108720 }, { "epoch": 7.3872129365402905, "grad_norm": 3.2942211627960205, "learning_rate": 7.692111699959234e-06, "loss": 1.9284, "step": 108725 }, { "epoch": 7.387552656610953, "grad_norm": 5.110803127288818, "learning_rate": 7.687865199075962e-06, "loss": 1.9473, "step": 108730 }, { "epoch": 7.387892376681615, "grad_norm": 4.659183502197266, "learning_rate": 7.683618698192688e-06, "loss": 2.0287, "step": 108735 }, { "epoch": 7.388232096752276, "grad_norm": 4.699972629547119, "learning_rate": 7.679372197309418e-06, "loss": 2.0404, "step": 108740 }, { "epoch": 7.388571816822938, "grad_norm": 3.4192352294921875, "learning_rate": 7.675125696426146e-06, "loss": 2.1249, "step": 108745 }, { "epoch": 7.3889115368936, "grad_norm": 3.1331796646118164, "learning_rate": 7.670879195542872e-06, "loss": 1.9769, "step": 108750 }, { "epoch": 7.389251256964261, "grad_norm": 4.123465061187744, "learning_rate": 7.6666326946596e-06, "loss": 2.3679, "step": 108755 }, { "epoch": 7.389590977034923, "grad_norm": 4.094767093658447, "learning_rate": 7.662386193776328e-06, "loss": 1.9853, "step": 108760 }, { "epoch": 7.389930697105585, "grad_norm": 4.4959211349487305, "learning_rate": 7.658139692893056e-06, "loss": 1.7668, "step": 108765 }, { "epoch": 7.3902704171762466, "grad_norm": 3.786442279815674, "learning_rate": 7.653893192009784e-06, "loss": 2.0254, "step": 108770 }, { "epoch": 7.390610137246909, "grad_norm": 3.5371201038360596, "learning_rate": 7.649646691126512e-06, "loss": 2.0354, "step": 108775 }, { "epoch": 7.390949857317571, "grad_norm": 5.180812835693359, "learning_rate": 7.64540019024324e-06, "loss": 2.0, "step": 108780 }, { "epoch": 7.391289577388232, "grad_norm": 3.6477103233337402, "learning_rate": 7.641153689359967e-06, "loss": 2.1417, "step": 108785 }, { "epoch": 7.391629297458894, "grad_norm": 3.925473690032959, "learning_rate": 7.636907188476695e-06, "loss": 2.1176, "step": 108790 }, { "epoch": 7.391969017529556, "grad_norm": 4.291019916534424, "learning_rate": 7.632660687593425e-06, "loss": 2.06, "step": 108795 }, { "epoch": 7.392308737600217, "grad_norm": 3.4044086933135986, "learning_rate": 7.628414186710151e-06, "loss": 1.8486, "step": 108800 }, { "epoch": 7.392648457670879, "grad_norm": 4.177028656005859, "learning_rate": 7.624167685826879e-06, "loss": 1.9382, "step": 108805 }, { "epoch": 7.392988177741541, "grad_norm": 4.477509498596191, "learning_rate": 7.619921184943607e-06, "loss": 1.9891, "step": 108810 }, { "epoch": 7.393327897812203, "grad_norm": 3.3317344188690186, "learning_rate": 7.615674684060335e-06, "loss": 1.9493, "step": 108815 }, { "epoch": 7.393667617882865, "grad_norm": 4.426483154296875, "learning_rate": 7.611428183177062e-06, "loss": 2.2339, "step": 108820 }, { "epoch": 7.394007337953527, "grad_norm": 3.624533176422119, "learning_rate": 7.60718168229379e-06, "loss": 2.1462, "step": 108825 }, { "epoch": 7.394347058024188, "grad_norm": 3.1461806297302246, "learning_rate": 7.602935181410519e-06, "loss": 2.3343, "step": 108830 }, { "epoch": 7.39468677809485, "grad_norm": 3.7242000102996826, "learning_rate": 7.598688680527245e-06, "loss": 1.7722, "step": 108835 }, { "epoch": 7.395026498165512, "grad_norm": 3.2393171787261963, "learning_rate": 7.594442179643973e-06, "loss": 1.8646, "step": 108840 }, { "epoch": 7.395366218236173, "grad_norm": 4.208342552185059, "learning_rate": 7.590195678760702e-06, "loss": 1.945, "step": 108845 }, { "epoch": 7.395705938306835, "grad_norm": 3.1422736644744873, "learning_rate": 7.58594917787743e-06, "loss": 1.8755, "step": 108850 }, { "epoch": 7.396045658377497, "grad_norm": 3.0599653720855713, "learning_rate": 7.581702676994157e-06, "loss": 1.6809, "step": 108855 }, { "epoch": 7.396385378448159, "grad_norm": 3.61445689201355, "learning_rate": 7.577456176110885e-06, "loss": 2.1191, "step": 108860 }, { "epoch": 7.396725098518821, "grad_norm": 3.7034120559692383, "learning_rate": 7.573209675227613e-06, "loss": 1.8776, "step": 108865 }, { "epoch": 7.397064818589483, "grad_norm": 3.477130174636841, "learning_rate": 7.56896317434434e-06, "loss": 1.7372, "step": 108870 }, { "epoch": 7.397404538660144, "grad_norm": 3.519458532333374, "learning_rate": 7.564716673461068e-06, "loss": 2.0341, "step": 108875 }, { "epoch": 7.397744258730806, "grad_norm": 3.5589540004730225, "learning_rate": 7.560470172577796e-06, "loss": 2.0611, "step": 108880 }, { "epoch": 7.398083978801467, "grad_norm": 3.9760570526123047, "learning_rate": 7.5562236716945236e-06, "loss": 2.016, "step": 108885 }, { "epoch": 7.398423698872129, "grad_norm": 3.348491907119751, "learning_rate": 7.551977170811252e-06, "loss": 1.9405, "step": 108890 }, { "epoch": 7.398763418942791, "grad_norm": 3.2184267044067383, "learning_rate": 7.54773066992798e-06, "loss": 1.6662, "step": 108895 }, { "epoch": 7.3991031390134525, "grad_norm": 4.400866985321045, "learning_rate": 7.5434841690447084e-06, "loss": 1.7402, "step": 108900 }, { "epoch": 7.399442859084115, "grad_norm": 4.1199846267700195, "learning_rate": 7.539237668161435e-06, "loss": 2.0787, "step": 108905 }, { "epoch": 7.399782579154777, "grad_norm": 3.637096643447876, "learning_rate": 7.534991167278163e-06, "loss": 1.9186, "step": 108910 }, { "epoch": 7.400122299225438, "grad_norm": 4.222581386566162, "learning_rate": 7.530744666394892e-06, "loss": 1.9317, "step": 108915 }, { "epoch": 7.4004620192961, "grad_norm": 2.987994909286499, "learning_rate": 7.526498165511618e-06, "loss": 1.8066, "step": 108920 }, { "epoch": 7.400801739366762, "grad_norm": 3.7188751697540283, "learning_rate": 7.522251664628347e-06, "loss": 2.1565, "step": 108925 }, { "epoch": 7.401141459437423, "grad_norm": 3.9707069396972656, "learning_rate": 7.518005163745075e-06, "loss": 2.2198, "step": 108930 }, { "epoch": 7.401481179508085, "grad_norm": 4.287581443786621, "learning_rate": 7.513758662861803e-06, "loss": 2.0857, "step": 108935 }, { "epoch": 7.401820899578747, "grad_norm": 3.571906328201294, "learning_rate": 7.50951216197853e-06, "loss": 1.9055, "step": 108940 }, { "epoch": 7.4021606196494085, "grad_norm": 3.6946942806243896, "learning_rate": 7.505265661095258e-06, "loss": 1.9253, "step": 108945 }, { "epoch": 7.402500339720071, "grad_norm": 3.4734983444213867, "learning_rate": 7.501019160211986e-06, "loss": 1.9365, "step": 108950 }, { "epoch": 7.402840059790733, "grad_norm": 4.332275867462158, "learning_rate": 7.496772659328713e-06, "loss": 2.1, "step": 108955 }, { "epoch": 7.403179779861394, "grad_norm": 3.2743630409240723, "learning_rate": 7.492526158445441e-06, "loss": 2.0501, "step": 108960 }, { "epoch": 7.403519499932056, "grad_norm": 3.738450527191162, "learning_rate": 7.488279657562169e-06, "loss": 1.9244, "step": 108965 }, { "epoch": 7.403859220002718, "grad_norm": 4.069448947906494, "learning_rate": 7.484033156678896e-06, "loss": 2.0119, "step": 108970 }, { "epoch": 7.404198940073379, "grad_norm": 3.8329033851623535, "learning_rate": 7.479786655795624e-06, "loss": 2.1196, "step": 108975 }, { "epoch": 7.404538660144041, "grad_norm": 4.8594231605529785, "learning_rate": 7.475540154912352e-06, "loss": 1.7981, "step": 108980 }, { "epoch": 7.404878380214703, "grad_norm": 4.250028133392334, "learning_rate": 7.471293654029081e-06, "loss": 1.9278, "step": 108985 }, { "epoch": 7.4052181002853645, "grad_norm": 3.602419853210449, "learning_rate": 7.4670471531458075e-06, "loss": 1.7833, "step": 108990 }, { "epoch": 7.405557820356027, "grad_norm": 4.264495849609375, "learning_rate": 7.462800652262536e-06, "loss": 1.8891, "step": 108995 }, { "epoch": 7.405897540426689, "grad_norm": 3.4890296459198, "learning_rate": 7.458554151379264e-06, "loss": 1.8949, "step": 109000 }, { "epoch": 7.40623726049735, "grad_norm": 4.178770542144775, "learning_rate": 7.454307650495991e-06, "loss": 1.8779, "step": 109005 }, { "epoch": 7.406576980568012, "grad_norm": 3.5998308658599854, "learning_rate": 7.4500611496127195e-06, "loss": 2.0612, "step": 109010 }, { "epoch": 7.406916700638674, "grad_norm": 4.50114107131958, "learning_rate": 7.4458146487294475e-06, "loss": 1.8354, "step": 109015 }, { "epoch": 7.407256420709335, "grad_norm": 3.8387393951416016, "learning_rate": 7.4415681478461755e-06, "loss": 1.7059, "step": 109020 }, { "epoch": 7.407596140779997, "grad_norm": 3.649932622909546, "learning_rate": 7.437321646962903e-06, "loss": 2.0021, "step": 109025 }, { "epoch": 7.407935860850659, "grad_norm": 3.9067959785461426, "learning_rate": 7.433075146079631e-06, "loss": 2.0005, "step": 109030 }, { "epoch": 7.4082755809213205, "grad_norm": 3.3017525672912598, "learning_rate": 7.428828645196359e-06, "loss": 2.032, "step": 109035 }, { "epoch": 7.408615300991983, "grad_norm": 3.9256837368011475, "learning_rate": 7.424582144313086e-06, "loss": 1.7436, "step": 109040 }, { "epoch": 7.408955021062645, "grad_norm": 3.6028997898101807, "learning_rate": 7.420335643429814e-06, "loss": 2.1128, "step": 109045 }, { "epoch": 7.409294741133306, "grad_norm": 4.0458502769470215, "learning_rate": 7.416089142546542e-06, "loss": 1.7999, "step": 109050 }, { "epoch": 7.409634461203968, "grad_norm": 4.008246898651123, "learning_rate": 7.411842641663269e-06, "loss": 1.885, "step": 109055 }, { "epoch": 7.40997418127463, "grad_norm": 4.0599365234375, "learning_rate": 7.407596140779997e-06, "loss": 1.968, "step": 109060 }, { "epoch": 7.410313901345291, "grad_norm": 3.4477009773254395, "learning_rate": 7.403349639896726e-06, "loss": 2.0767, "step": 109065 }, { "epoch": 7.410653621415953, "grad_norm": 3.881803274154663, "learning_rate": 7.399103139013454e-06, "loss": 1.7385, "step": 109070 }, { "epoch": 7.410993341486615, "grad_norm": 3.7911996841430664, "learning_rate": 7.39485663813018e-06, "loss": 1.5087, "step": 109075 }, { "epoch": 7.411333061557277, "grad_norm": 3.2989561557769775, "learning_rate": 7.390610137246909e-06, "loss": 2.182, "step": 109080 }, { "epoch": 7.411672781627939, "grad_norm": 4.214190483093262, "learning_rate": 7.386363636363637e-06, "loss": 1.7608, "step": 109085 }, { "epoch": 7.412012501698601, "grad_norm": 3.3601410388946533, "learning_rate": 7.382117135480364e-06, "loss": 1.7778, "step": 109090 }, { "epoch": 7.412352221769262, "grad_norm": 3.609828472137451, "learning_rate": 7.377870634597092e-06, "loss": 1.8456, "step": 109095 }, { "epoch": 7.412691941839924, "grad_norm": 3.775357961654663, "learning_rate": 7.37362413371382e-06, "loss": 1.8003, "step": 109100 }, { "epoch": 7.413031661910586, "grad_norm": 4.837156772613525, "learning_rate": 7.369377632830548e-06, "loss": 1.9545, "step": 109105 }, { "epoch": 7.413371381981247, "grad_norm": 3.800668239593506, "learning_rate": 7.3651311319472754e-06, "loss": 2.0093, "step": 109110 }, { "epoch": 7.413711102051909, "grad_norm": 4.1287760734558105, "learning_rate": 7.3608846310640035e-06, "loss": 1.7782, "step": 109115 }, { "epoch": 7.414050822122571, "grad_norm": 3.200040102005005, "learning_rate": 7.356638130180732e-06, "loss": 2.062, "step": 109120 }, { "epoch": 7.414390542193233, "grad_norm": 3.7823691368103027, "learning_rate": 7.352391629297459e-06, "loss": 2.0631, "step": 109125 }, { "epoch": 7.414730262263895, "grad_norm": 3.3505752086639404, "learning_rate": 7.348145128414187e-06, "loss": 1.9405, "step": 109130 }, { "epoch": 7.415069982334557, "grad_norm": 3.9661202430725098, "learning_rate": 7.3438986275309155e-06, "loss": 2.1941, "step": 109135 }, { "epoch": 7.415409702405218, "grad_norm": 2.9384732246398926, "learning_rate": 7.339652126647642e-06, "loss": 1.9065, "step": 109140 }, { "epoch": 7.41574942247588, "grad_norm": 4.65892219543457, "learning_rate": 7.335405625764371e-06, "loss": 1.9284, "step": 109145 }, { "epoch": 7.416089142546542, "grad_norm": 3.8118703365325928, "learning_rate": 7.331159124881099e-06, "loss": 1.5561, "step": 109150 }, { "epoch": 7.416428862617203, "grad_norm": 2.9377429485321045, "learning_rate": 7.326912623997827e-06, "loss": 1.9223, "step": 109155 }, { "epoch": 7.416768582687865, "grad_norm": 3.50244140625, "learning_rate": 7.322666123114554e-06, "loss": 1.796, "step": 109160 }, { "epoch": 7.4171083027585265, "grad_norm": 3.5077097415924072, "learning_rate": 7.318419622231282e-06, "loss": 1.9089, "step": 109165 }, { "epoch": 7.417448022829189, "grad_norm": 3.523299217224121, "learning_rate": 7.31417312134801e-06, "loss": 1.965, "step": 109170 }, { "epoch": 7.417787742899851, "grad_norm": 3.396219253540039, "learning_rate": 7.309926620464737e-06, "loss": 2.0777, "step": 109175 }, { "epoch": 7.418127462970512, "grad_norm": 4.171573162078857, "learning_rate": 7.305680119581465e-06, "loss": 1.9204, "step": 109180 }, { "epoch": 7.418467183041174, "grad_norm": 3.0055766105651855, "learning_rate": 7.301433618698193e-06, "loss": 2.0643, "step": 109185 }, { "epoch": 7.418806903111836, "grad_norm": 4.089058876037598, "learning_rate": 7.297187117814922e-06, "loss": 1.9642, "step": 109190 }, { "epoch": 7.419146623182497, "grad_norm": 4.6928911209106445, "learning_rate": 7.292940616931648e-06, "loss": 2.046, "step": 109195 }, { "epoch": 7.419486343253159, "grad_norm": 4.172058582305908, "learning_rate": 7.288694116048376e-06, "loss": 1.8595, "step": 109200 }, { "epoch": 7.419826063323821, "grad_norm": 3.622985363006592, "learning_rate": 7.284447615165105e-06, "loss": 2.0473, "step": 109205 }, { "epoch": 7.4201657833944825, "grad_norm": 4.110048294067383, "learning_rate": 7.280201114281831e-06, "loss": 1.8078, "step": 109210 }, { "epoch": 7.420505503465145, "grad_norm": 2.9006826877593994, "learning_rate": 7.27595461339856e-06, "loss": 2.059, "step": 109215 }, { "epoch": 7.420845223535807, "grad_norm": 3.2163448333740234, "learning_rate": 7.271708112515288e-06, "loss": 1.9724, "step": 109220 }, { "epoch": 7.421184943606468, "grad_norm": 3.9471120834350586, "learning_rate": 7.2674616116320145e-06, "loss": 2.1825, "step": 109225 }, { "epoch": 7.42152466367713, "grad_norm": 4.092267990112305, "learning_rate": 7.263215110748743e-06, "loss": 1.72, "step": 109230 }, { "epoch": 7.421864383747792, "grad_norm": 3.119962215423584, "learning_rate": 7.258968609865471e-06, "loss": 1.9319, "step": 109235 }, { "epoch": 7.422204103818453, "grad_norm": 3.763131618499756, "learning_rate": 7.254722108982199e-06, "loss": 2.1157, "step": 109240 }, { "epoch": 7.422543823889115, "grad_norm": 4.081527233123779, "learning_rate": 7.2504756080989266e-06, "loss": 2.1195, "step": 109245 }, { "epoch": 7.422883543959777, "grad_norm": 3.497792959213257, "learning_rate": 7.246229107215655e-06, "loss": 1.8141, "step": 109250 }, { "epoch": 7.4232232640304385, "grad_norm": 3.7464077472686768, "learning_rate": 7.241982606332383e-06, "loss": 2.0371, "step": 109255 }, { "epoch": 7.423562984101101, "grad_norm": 3.95544171333313, "learning_rate": 7.23773610544911e-06, "loss": 1.8575, "step": 109260 }, { "epoch": 7.423902704171763, "grad_norm": 3.649603843688965, "learning_rate": 7.233489604565838e-06, "loss": 2.1458, "step": 109265 }, { "epoch": 7.424242424242424, "grad_norm": 3.057769298553467, "learning_rate": 7.229243103682566e-06, "loss": 2.15, "step": 109270 }, { "epoch": 7.424582144313086, "grad_norm": 4.189734935760498, "learning_rate": 7.224996602799295e-06, "loss": 2.1152, "step": 109275 }, { "epoch": 7.424921864383748, "grad_norm": 4.254026889801025, "learning_rate": 7.220750101916021e-06, "loss": 2.0084, "step": 109280 }, { "epoch": 7.425261584454409, "grad_norm": 3.215527296066284, "learning_rate": 7.21650360103275e-06, "loss": 2.2263, "step": 109285 }, { "epoch": 7.425601304525071, "grad_norm": 3.725492000579834, "learning_rate": 7.212257100149478e-06, "loss": 2.0813, "step": 109290 }, { "epoch": 7.425941024595733, "grad_norm": 4.371578693389893, "learning_rate": 7.208010599266204e-06, "loss": 1.5287, "step": 109295 }, { "epoch": 7.4262807446663945, "grad_norm": 3.6463539600372314, "learning_rate": 7.203764098382933e-06, "loss": 2.2006, "step": 109300 }, { "epoch": 7.426620464737057, "grad_norm": 4.161551475524902, "learning_rate": 7.199517597499661e-06, "loss": 1.984, "step": 109305 }, { "epoch": 7.426960184807719, "grad_norm": 4.509263515472412, "learning_rate": 7.195271096616388e-06, "loss": 1.856, "step": 109310 }, { "epoch": 7.42729990487838, "grad_norm": 4.2180023193359375, "learning_rate": 7.191024595733116e-06, "loss": 1.9296, "step": 109315 }, { "epoch": 7.427639624949042, "grad_norm": 4.061604976654053, "learning_rate": 7.186778094849844e-06, "loss": 2.1949, "step": 109320 }, { "epoch": 7.427979345019704, "grad_norm": 3.493486166000366, "learning_rate": 7.182531593966572e-06, "loss": 2.0754, "step": 109325 }, { "epoch": 7.428319065090365, "grad_norm": 2.7681679725646973, "learning_rate": 7.178285093083299e-06, "loss": 2.1328, "step": 109330 }, { "epoch": 7.428658785161027, "grad_norm": 3.774052143096924, "learning_rate": 7.174038592200027e-06, "loss": 2.0401, "step": 109335 }, { "epoch": 7.428998505231689, "grad_norm": 3.281402826309204, "learning_rate": 7.169792091316755e-06, "loss": 2.2068, "step": 109340 }, { "epoch": 7.4293382253023506, "grad_norm": 3.8406295776367188, "learning_rate": 7.1655455904334825e-06, "loss": 1.6955, "step": 109345 }, { "epoch": 7.429677945373013, "grad_norm": 3.7616465091705322, "learning_rate": 7.1612990895502105e-06, "loss": 2.3063, "step": 109350 }, { "epoch": 7.430017665443675, "grad_norm": 3.969144821166992, "learning_rate": 7.157052588666939e-06, "loss": 1.7421, "step": 109355 }, { "epoch": 7.430357385514336, "grad_norm": 3.765496253967285, "learning_rate": 7.152806087783667e-06, "loss": 2.0566, "step": 109360 }, { "epoch": 7.430697105584998, "grad_norm": 3.656320333480835, "learning_rate": 7.148559586900394e-06, "loss": 2.1878, "step": 109365 }, { "epoch": 7.43103682565566, "grad_norm": 4.670928955078125, "learning_rate": 7.1443130860171225e-06, "loss": 2.2189, "step": 109370 }, { "epoch": 7.431376545726321, "grad_norm": 4.13068962097168, "learning_rate": 7.1400665851338505e-06, "loss": 2.0941, "step": 109375 }, { "epoch": 7.431716265796983, "grad_norm": 3.755741834640503, "learning_rate": 7.135820084250578e-06, "loss": 2.0976, "step": 109380 }, { "epoch": 7.432055985867645, "grad_norm": 4.469493865966797, "learning_rate": 7.131573583367306e-06, "loss": 2.039, "step": 109385 }, { "epoch": 7.432395705938307, "grad_norm": 3.5173733234405518, "learning_rate": 7.127327082484034e-06, "loss": 2.1104, "step": 109390 }, { "epoch": 7.432735426008969, "grad_norm": 3.9022128582000732, "learning_rate": 7.123080581600761e-06, "loss": 1.9413, "step": 109395 }, { "epoch": 7.433075146079631, "grad_norm": 3.6102805137634277, "learning_rate": 7.118834080717489e-06, "loss": 2.1377, "step": 109400 }, { "epoch": 7.433414866150292, "grad_norm": 3.928529739379883, "learning_rate": 7.114587579834217e-06, "loss": 1.9709, "step": 109405 }, { "epoch": 7.433754586220954, "grad_norm": 3.4410505294799805, "learning_rate": 7.110341078950945e-06, "loss": 1.985, "step": 109410 }, { "epoch": 7.434094306291616, "grad_norm": 3.1580605506896973, "learning_rate": 7.106094578067672e-06, "loss": 1.9196, "step": 109415 }, { "epoch": 7.434434026362277, "grad_norm": 4.049314022064209, "learning_rate": 7.1018480771844e-06, "loss": 2.2351, "step": 109420 }, { "epoch": 7.434773746432939, "grad_norm": 4.345931053161621, "learning_rate": 7.097601576301129e-06, "loss": 2.0821, "step": 109425 }, { "epoch": 7.435113466503601, "grad_norm": 4.196592330932617, "learning_rate": 7.093355075417855e-06, "loss": 1.8631, "step": 109430 }, { "epoch": 7.435453186574263, "grad_norm": 3.2892866134643555, "learning_rate": 7.089108574534583e-06, "loss": 1.681, "step": 109435 }, { "epoch": 7.435792906644925, "grad_norm": 2.843092203140259, "learning_rate": 7.084862073651312e-06, "loss": 1.6952, "step": 109440 }, { "epoch": 7.436132626715587, "grad_norm": 3.3677785396575928, "learning_rate": 7.08061557276804e-06, "loss": 1.9633, "step": 109445 }, { "epoch": 7.436472346786248, "grad_norm": 3.2637388706207275, "learning_rate": 7.076369071884767e-06, "loss": 2.2044, "step": 109450 }, { "epoch": 7.43681206685691, "grad_norm": 3.5879979133605957, "learning_rate": 7.072122571001495e-06, "loss": 1.9729, "step": 109455 }, { "epoch": 7.437151786927572, "grad_norm": 3.815619945526123, "learning_rate": 7.067876070118223e-06, "loss": 2.0112, "step": 109460 }, { "epoch": 7.437491506998233, "grad_norm": 4.569354057312012, "learning_rate": 7.0636295692349504e-06, "loss": 1.855, "step": 109465 }, { "epoch": 7.437831227068895, "grad_norm": 4.109863758087158, "learning_rate": 7.0593830683516785e-06, "loss": 2.0609, "step": 109470 }, { "epoch": 7.438170947139557, "grad_norm": 3.8627288341522217, "learning_rate": 7.0551365674684065e-06, "loss": 1.9812, "step": 109475 }, { "epoch": 7.438510667210219, "grad_norm": 4.837069988250732, "learning_rate": 7.050890066585134e-06, "loss": 2.2613, "step": 109480 }, { "epoch": 7.438850387280881, "grad_norm": 3.894374370574951, "learning_rate": 7.046643565701862e-06, "loss": 2.0921, "step": 109485 }, { "epoch": 7.439190107351543, "grad_norm": 3.6767709255218506, "learning_rate": 7.04239706481859e-06, "loss": 1.9842, "step": 109490 }, { "epoch": 7.439529827422204, "grad_norm": 3.9496684074401855, "learning_rate": 7.0381505639353185e-06, "loss": 2.2208, "step": 109495 }, { "epoch": 7.439869547492866, "grad_norm": 4.336757183074951, "learning_rate": 7.033904063052045e-06, "loss": 1.86, "step": 109500 }, { "epoch": 7.440209267563528, "grad_norm": 4.043486595153809, "learning_rate": 7.029657562168774e-06, "loss": 1.9575, "step": 109505 }, { "epoch": 7.440548987634189, "grad_norm": 3.646436929702759, "learning_rate": 7.025411061285502e-06, "loss": 1.803, "step": 109510 }, { "epoch": 7.440888707704851, "grad_norm": 3.915595054626465, "learning_rate": 7.021164560402228e-06, "loss": 2.0196, "step": 109515 }, { "epoch": 7.441228427775513, "grad_norm": 4.106724739074707, "learning_rate": 7.016918059518957e-06, "loss": 1.9134, "step": 109520 }, { "epoch": 7.441568147846175, "grad_norm": 3.131108522415161, "learning_rate": 7.012671558635685e-06, "loss": 1.951, "step": 109525 }, { "epoch": 7.441907867916837, "grad_norm": 3.6779067516326904, "learning_rate": 7.008425057752413e-06, "loss": 1.8279, "step": 109530 }, { "epoch": 7.442247587987499, "grad_norm": 3.762148141860962, "learning_rate": 7.00417855686914e-06, "loss": 2.0098, "step": 109535 }, { "epoch": 7.44258730805816, "grad_norm": 3.768103837966919, "learning_rate": 6.999932055985868e-06, "loss": 1.9391, "step": 109540 }, { "epoch": 7.442927028128822, "grad_norm": 3.296964406967163, "learning_rate": 6.995685555102596e-06, "loss": 1.9163, "step": 109545 }, { "epoch": 7.443266748199484, "grad_norm": 4.280488014221191, "learning_rate": 6.991439054219323e-06, "loss": 2.1506, "step": 109550 }, { "epoch": 7.443606468270145, "grad_norm": 3.3040270805358887, "learning_rate": 6.987192553336051e-06, "loss": 1.9241, "step": 109555 }, { "epoch": 7.443946188340807, "grad_norm": 4.330835819244385, "learning_rate": 6.982946052452779e-06, "loss": 1.9856, "step": 109560 }, { "epoch": 7.444285908411469, "grad_norm": 3.4898619651794434, "learning_rate": 6.978699551569506e-06, "loss": 1.951, "step": 109565 }, { "epoch": 7.444625628482131, "grad_norm": 3.5511062145233154, "learning_rate": 6.974453050686234e-06, "loss": 2.1905, "step": 109570 }, { "epoch": 7.444965348552793, "grad_norm": 3.349994659423828, "learning_rate": 6.970206549802963e-06, "loss": 2.1126, "step": 109575 }, { "epoch": 7.445305068623454, "grad_norm": 4.369000434875488, "learning_rate": 6.965960048919691e-06, "loss": 2.1714, "step": 109580 }, { "epoch": 7.445644788694116, "grad_norm": 3.7551867961883545, "learning_rate": 6.9617135480364175e-06, "loss": 2.2338, "step": 109585 }, { "epoch": 7.445984508764778, "grad_norm": 3.5775198936462402, "learning_rate": 6.957467047153146e-06, "loss": 1.8173, "step": 109590 }, { "epoch": 7.446324228835439, "grad_norm": 4.066701412200928, "learning_rate": 6.953220546269874e-06, "loss": 1.9485, "step": 109595 }, { "epoch": 7.446663948906101, "grad_norm": 4.628823280334473, "learning_rate": 6.9489740453866016e-06, "loss": 1.8579, "step": 109600 }, { "epoch": 7.447003668976763, "grad_norm": 4.355196475982666, "learning_rate": 6.94472754450333e-06, "loss": 1.994, "step": 109605 }, { "epoch": 7.4473433890474245, "grad_norm": 3.6320059299468994, "learning_rate": 6.940481043620058e-06, "loss": 1.8639, "step": 109610 }, { "epoch": 7.447683109118087, "grad_norm": 3.489919424057007, "learning_rate": 6.936234542736786e-06, "loss": 1.9815, "step": 109615 }, { "epoch": 7.448022829188749, "grad_norm": 3.7855453491210938, "learning_rate": 6.931988041853513e-06, "loss": 2.1264, "step": 109620 }, { "epoch": 7.44836254925941, "grad_norm": 3.3752644062042236, "learning_rate": 6.927741540970241e-06, "loss": 1.8114, "step": 109625 }, { "epoch": 7.448702269330072, "grad_norm": 4.062042713165283, "learning_rate": 6.923495040086969e-06, "loss": 1.9985, "step": 109630 }, { "epoch": 7.449041989400734, "grad_norm": 4.167308807373047, "learning_rate": 6.919248539203696e-06, "loss": 1.9342, "step": 109635 }, { "epoch": 7.449381709471395, "grad_norm": 3.139986515045166, "learning_rate": 6.915002038320424e-06, "loss": 1.9716, "step": 109640 }, { "epoch": 7.449721429542057, "grad_norm": 4.0590105056762695, "learning_rate": 6.910755537437153e-06, "loss": 2.222, "step": 109645 }, { "epoch": 7.450061149612719, "grad_norm": 3.4720749855041504, "learning_rate": 6.906509036553879e-06, "loss": 1.7966, "step": 109650 }, { "epoch": 7.4504008696833806, "grad_norm": 3.64096736907959, "learning_rate": 6.902262535670607e-06, "loss": 1.7703, "step": 109655 }, { "epoch": 7.450740589754043, "grad_norm": 4.083006858825684, "learning_rate": 6.898016034787336e-06, "loss": 2.0289, "step": 109660 }, { "epoch": 7.451080309824705, "grad_norm": 3.396946430206299, "learning_rate": 6.893769533904064e-06, "loss": 1.684, "step": 109665 }, { "epoch": 7.451420029895366, "grad_norm": 4.095233917236328, "learning_rate": 6.889523033020791e-06, "loss": 2.1007, "step": 109670 }, { "epoch": 7.451759749966028, "grad_norm": 3.319570779800415, "learning_rate": 6.885276532137519e-06, "loss": 1.973, "step": 109675 }, { "epoch": 7.45209947003669, "grad_norm": 3.3355393409729004, "learning_rate": 6.881030031254247e-06, "loss": 1.9208, "step": 109680 }, { "epoch": 7.452439190107351, "grad_norm": 4.182824611663818, "learning_rate": 6.876783530370974e-06, "loss": 1.8396, "step": 109685 }, { "epoch": 7.452778910178013, "grad_norm": 3.8540585041046143, "learning_rate": 6.872537029487702e-06, "loss": 1.968, "step": 109690 }, { "epoch": 7.453118630248675, "grad_norm": 3.4404456615448, "learning_rate": 6.86829052860443e-06, "loss": 1.9368, "step": 109695 }, { "epoch": 7.453458350319337, "grad_norm": 4.761146545410156, "learning_rate": 6.864044027721158e-06, "loss": 1.922, "step": 109700 }, { "epoch": 7.453798070389999, "grad_norm": 3.753480911254883, "learning_rate": 6.8597975268378855e-06, "loss": 2.0114, "step": 109705 }, { "epoch": 7.454137790460661, "grad_norm": 3.2908170223236084, "learning_rate": 6.8555510259546135e-06, "loss": 1.7008, "step": 109710 }, { "epoch": 7.454477510531322, "grad_norm": 4.393240928649902, "learning_rate": 6.851304525071342e-06, "loss": 1.8632, "step": 109715 }, { "epoch": 7.454817230601984, "grad_norm": 3.8152804374694824, "learning_rate": 6.847058024188069e-06, "loss": 1.9378, "step": 109720 }, { "epoch": 7.455156950672646, "grad_norm": 3.4925358295440674, "learning_rate": 6.842811523304797e-06, "loss": 2.0852, "step": 109725 }, { "epoch": 7.455496670743307, "grad_norm": 4.325976848602295, "learning_rate": 6.8385650224215255e-06, "loss": 1.7509, "step": 109730 }, { "epoch": 7.455836390813969, "grad_norm": 3.5582690238952637, "learning_rate": 6.834318521538252e-06, "loss": 1.9628, "step": 109735 }, { "epoch": 7.456176110884631, "grad_norm": 3.7815113067626953, "learning_rate": 6.830072020654981e-06, "loss": 2.0249, "step": 109740 }, { "epoch": 7.456515830955293, "grad_norm": 3.1910126209259033, "learning_rate": 6.825825519771709e-06, "loss": 1.8031, "step": 109745 }, { "epoch": 7.456855551025955, "grad_norm": 3.116854429244995, "learning_rate": 6.821579018888437e-06, "loss": 2.0094, "step": 109750 }, { "epoch": 7.457195271096617, "grad_norm": 3.852658271789551, "learning_rate": 6.817332518005164e-06, "loss": 2.1402, "step": 109755 }, { "epoch": 7.457534991167278, "grad_norm": 3.230687379837036, "learning_rate": 6.813086017121892e-06, "loss": 1.9011, "step": 109760 }, { "epoch": 7.45787471123794, "grad_norm": 4.206177234649658, "learning_rate": 6.80883951623862e-06, "loss": 1.783, "step": 109765 }, { "epoch": 7.458214431308602, "grad_norm": 4.477691650390625, "learning_rate": 6.804593015355347e-06, "loss": 1.8398, "step": 109770 }, { "epoch": 7.458554151379263, "grad_norm": 4.906054496765137, "learning_rate": 6.800346514472075e-06, "loss": 1.8591, "step": 109775 }, { "epoch": 7.458893871449925, "grad_norm": 3.6599955558776855, "learning_rate": 6.796100013588803e-06, "loss": 1.8083, "step": 109780 }, { "epoch": 7.459233591520587, "grad_norm": 4.969699382781982, "learning_rate": 6.791853512705532e-06, "loss": 1.8701, "step": 109785 }, { "epoch": 7.459573311591249, "grad_norm": 4.839829444885254, "learning_rate": 6.787607011822258e-06, "loss": 1.6696, "step": 109790 }, { "epoch": 7.459913031661911, "grad_norm": 4.446258544921875, "learning_rate": 6.783360510938986e-06, "loss": 2.1591, "step": 109795 }, { "epoch": 7.460252751732573, "grad_norm": 3.920736074447632, "learning_rate": 6.779114010055715e-06, "loss": 1.9734, "step": 109800 }, { "epoch": 7.460592471803234, "grad_norm": 3.692803144454956, "learning_rate": 6.774867509172441e-06, "loss": 2.1021, "step": 109805 }, { "epoch": 7.460932191873896, "grad_norm": 3.172877550125122, "learning_rate": 6.77062100828917e-06, "loss": 2.1994, "step": 109810 }, { "epoch": 7.461271911944558, "grad_norm": 3.522270441055298, "learning_rate": 6.766374507405898e-06, "loss": 2.2286, "step": 109815 }, { "epoch": 7.461611632015219, "grad_norm": 4.503780841827393, "learning_rate": 6.762128006522625e-06, "loss": 1.8977, "step": 109820 }, { "epoch": 7.461951352085881, "grad_norm": 3.64321231842041, "learning_rate": 6.7578815056393534e-06, "loss": 1.9125, "step": 109825 }, { "epoch": 7.462291072156543, "grad_norm": 3.686931610107422, "learning_rate": 6.7536350047560815e-06, "loss": 2.1078, "step": 109830 }, { "epoch": 7.462630792227205, "grad_norm": 3.365579605102539, "learning_rate": 6.7493885038728095e-06, "loss": 1.8413, "step": 109835 }, { "epoch": 7.462970512297867, "grad_norm": 3.5621395111083984, "learning_rate": 6.745142002989537e-06, "loss": 1.9337, "step": 109840 }, { "epoch": 7.463310232368528, "grad_norm": 3.39314341545105, "learning_rate": 6.740895502106265e-06, "loss": 2.0156, "step": 109845 }, { "epoch": 7.46364995243919, "grad_norm": 3.143040180206299, "learning_rate": 6.736649001222993e-06, "loss": 1.9018, "step": 109850 }, { "epoch": 7.463989672509852, "grad_norm": 3.852403163909912, "learning_rate": 6.73240250033972e-06, "loss": 1.7664, "step": 109855 }, { "epoch": 7.464329392580513, "grad_norm": 4.1489739418029785, "learning_rate": 6.728155999456448e-06, "loss": 2.0176, "step": 109860 }, { "epoch": 7.464669112651175, "grad_norm": 3.026956081390381, "learning_rate": 6.723909498573177e-06, "loss": 1.7747, "step": 109865 }, { "epoch": 7.465008832721837, "grad_norm": 3.4015064239501953, "learning_rate": 6.719662997689905e-06, "loss": 1.5102, "step": 109870 }, { "epoch": 7.4653485527924985, "grad_norm": 3.2629826068878174, "learning_rate": 6.715416496806631e-06, "loss": 2.1405, "step": 109875 }, { "epoch": 7.465688272863161, "grad_norm": 4.372598648071289, "learning_rate": 6.71116999592336e-06, "loss": 2.0129, "step": 109880 }, { "epoch": 7.466027992933823, "grad_norm": 4.396866321563721, "learning_rate": 6.706923495040088e-06, "loss": 2.1023, "step": 109885 }, { "epoch": 7.466367713004484, "grad_norm": 4.172934055328369, "learning_rate": 6.702676994156815e-06, "loss": 1.8688, "step": 109890 }, { "epoch": 7.466707433075146, "grad_norm": 3.5404677391052246, "learning_rate": 6.698430493273543e-06, "loss": 2.0682, "step": 109895 }, { "epoch": 7.467047153145808, "grad_norm": 3.3651559352874756, "learning_rate": 6.694183992390271e-06, "loss": 2.1157, "step": 109900 }, { "epoch": 7.467386873216469, "grad_norm": 3.9175689220428467, "learning_rate": 6.689937491506998e-06, "loss": 1.7973, "step": 109905 }, { "epoch": 7.467726593287131, "grad_norm": 3.1158957481384277, "learning_rate": 6.685690990623726e-06, "loss": 1.9587, "step": 109910 }, { "epoch": 7.468066313357793, "grad_norm": 3.9974749088287354, "learning_rate": 6.681444489740454e-06, "loss": 2.0374, "step": 109915 }, { "epoch": 7.4684060334284545, "grad_norm": 4.239352226257324, "learning_rate": 6.677197988857182e-06, "loss": 2.1681, "step": 109920 }, { "epoch": 7.468745753499117, "grad_norm": 4.060284614562988, "learning_rate": 6.672951487973909e-06, "loss": 1.9629, "step": 109925 }, { "epoch": 7.469085473569779, "grad_norm": 3.639274835586548, "learning_rate": 6.668704987090637e-06, "loss": 1.864, "step": 109930 }, { "epoch": 7.46942519364044, "grad_norm": 4.435324668884277, "learning_rate": 6.664458486207366e-06, "loss": 1.6833, "step": 109935 }, { "epoch": 7.469764913711102, "grad_norm": 3.148332357406616, "learning_rate": 6.6602119853240925e-06, "loss": 1.9694, "step": 109940 }, { "epoch": 7.470104633781764, "grad_norm": 3.996568441390991, "learning_rate": 6.6559654844408205e-06, "loss": 1.9506, "step": 109945 }, { "epoch": 7.470444353852425, "grad_norm": 3.8786230087280273, "learning_rate": 6.651718983557549e-06, "loss": 1.9973, "step": 109950 }, { "epoch": 7.470784073923087, "grad_norm": 3.8026952743530273, "learning_rate": 6.647472482674277e-06, "loss": 1.7597, "step": 109955 }, { "epoch": 7.471123793993749, "grad_norm": 3.8789734840393066, "learning_rate": 6.6432259817910046e-06, "loss": 2.156, "step": 109960 }, { "epoch": 7.471463514064411, "grad_norm": 4.649598598480225, "learning_rate": 6.638979480907733e-06, "loss": 2.0116, "step": 109965 }, { "epoch": 7.471803234135073, "grad_norm": 3.766942024230957, "learning_rate": 6.634732980024461e-06, "loss": 1.695, "step": 109970 }, { "epoch": 7.472142954205735, "grad_norm": 4.408032417297363, "learning_rate": 6.630486479141188e-06, "loss": 1.7609, "step": 109975 }, { "epoch": 7.472482674276396, "grad_norm": 4.851925373077393, "learning_rate": 6.626239978257916e-06, "loss": 2.2139, "step": 109980 }, { "epoch": 7.472822394347058, "grad_norm": 3.7127883434295654, "learning_rate": 6.621993477374644e-06, "loss": 2.2621, "step": 109985 }, { "epoch": 7.47316211441772, "grad_norm": 3.467597246170044, "learning_rate": 6.617746976491371e-06, "loss": 1.7708, "step": 109990 }, { "epoch": 7.473501834488381, "grad_norm": 3.069768190383911, "learning_rate": 6.613500475608099e-06, "loss": 1.9349, "step": 109995 }, { "epoch": 7.473841554559043, "grad_norm": 4.532983779907227, "learning_rate": 6.609253974724827e-06, "loss": 1.7739, "step": 110000 }, { "epoch": 7.474181274629705, "grad_norm": 3.621614933013916, "learning_rate": 6.605007473841556e-06, "loss": 1.9495, "step": 110005 }, { "epoch": 7.474520994700367, "grad_norm": 4.046362400054932, "learning_rate": 6.600760972958282e-06, "loss": 1.9833, "step": 110010 }, { "epoch": 7.474860714771029, "grad_norm": 4.250418186187744, "learning_rate": 6.59651447207501e-06, "loss": 1.8392, "step": 110015 }, { "epoch": 7.475200434841691, "grad_norm": 3.636753797531128, "learning_rate": 6.592267971191739e-06, "loss": 1.7651, "step": 110020 }, { "epoch": 7.475540154912352, "grad_norm": 4.376755714416504, "learning_rate": 6.588021470308465e-06, "loss": 2.0571, "step": 110025 }, { "epoch": 7.475879874983014, "grad_norm": 3.445786952972412, "learning_rate": 6.583774969425194e-06, "loss": 1.9218, "step": 110030 }, { "epoch": 7.476219595053676, "grad_norm": 4.028385639190674, "learning_rate": 6.579528468541922e-06, "loss": 1.8479, "step": 110035 }, { "epoch": 7.476559315124337, "grad_norm": 3.4485855102539062, "learning_rate": 6.57528196765865e-06, "loss": 1.8481, "step": 110040 }, { "epoch": 7.476899035194999, "grad_norm": 4.22183084487915, "learning_rate": 6.571035466775377e-06, "loss": 1.7126, "step": 110045 }, { "epoch": 7.477238755265661, "grad_norm": 3.198415994644165, "learning_rate": 6.566788965892105e-06, "loss": 1.7937, "step": 110050 }, { "epoch": 7.477578475336323, "grad_norm": 3.4700052738189697, "learning_rate": 6.562542465008833e-06, "loss": 1.9741, "step": 110055 }, { "epoch": 7.477918195406985, "grad_norm": 4.263088703155518, "learning_rate": 6.5582959641255605e-06, "loss": 1.8352, "step": 110060 }, { "epoch": 7.478257915477647, "grad_norm": 3.3218417167663574, "learning_rate": 6.5540494632422885e-06, "loss": 2.1272, "step": 110065 }, { "epoch": 7.478597635548308, "grad_norm": 3.0742886066436768, "learning_rate": 6.5498029623590165e-06, "loss": 1.642, "step": 110070 }, { "epoch": 7.47893735561897, "grad_norm": 3.5913233757019043, "learning_rate": 6.545556461475744e-06, "loss": 2.2256, "step": 110075 }, { "epoch": 7.479277075689632, "grad_norm": 3.8859150409698486, "learning_rate": 6.541309960592472e-06, "loss": 1.946, "step": 110080 }, { "epoch": 7.479616795760293, "grad_norm": 3.9696168899536133, "learning_rate": 6.5370634597092e-06, "loss": 1.815, "step": 110085 }, { "epoch": 7.479956515830955, "grad_norm": 3.667809247970581, "learning_rate": 6.5328169588259285e-06, "loss": 1.8229, "step": 110090 }, { "epoch": 7.480296235901617, "grad_norm": 4.093663692474365, "learning_rate": 6.528570457942655e-06, "loss": 2.185, "step": 110095 }, { "epoch": 7.480635955972279, "grad_norm": 3.538072347640991, "learning_rate": 6.524323957059384e-06, "loss": 1.7401, "step": 110100 }, { "epoch": 7.480975676042941, "grad_norm": 4.023650169372559, "learning_rate": 6.520077456176112e-06, "loss": 1.9201, "step": 110105 }, { "epoch": 7.481315396113603, "grad_norm": 3.7267439365386963, "learning_rate": 6.515830955292838e-06, "loss": 1.9867, "step": 110110 }, { "epoch": 7.481655116184264, "grad_norm": 3.212749481201172, "learning_rate": 6.511584454409567e-06, "loss": 1.9873, "step": 110115 }, { "epoch": 7.481994836254926, "grad_norm": 4.124964237213135, "learning_rate": 6.507337953526295e-06, "loss": 2.0862, "step": 110120 }, { "epoch": 7.482334556325588, "grad_norm": 3.6611440181732178, "learning_rate": 6.503091452643023e-06, "loss": 1.78, "step": 110125 }, { "epoch": 7.482674276396249, "grad_norm": 4.141593933105469, "learning_rate": 6.49884495175975e-06, "loss": 1.8858, "step": 110130 }, { "epoch": 7.483013996466911, "grad_norm": 3.6701607704162598, "learning_rate": 6.494598450876478e-06, "loss": 2.023, "step": 110135 }, { "epoch": 7.483353716537573, "grad_norm": 3.832908868789673, "learning_rate": 6.490351949993206e-06, "loss": 1.9343, "step": 110140 }, { "epoch": 7.483693436608235, "grad_norm": 3.3816516399383545, "learning_rate": 6.486105449109933e-06, "loss": 2.1103, "step": 110145 }, { "epoch": 7.484033156678897, "grad_norm": 3.0815017223358154, "learning_rate": 6.481858948226661e-06, "loss": 2.0521, "step": 110150 }, { "epoch": 7.484372876749559, "grad_norm": 4.454176902770996, "learning_rate": 6.47761244734339e-06, "loss": 2.1671, "step": 110155 }, { "epoch": 7.48471259682022, "grad_norm": 4.307196617126465, "learning_rate": 6.473365946460116e-06, "loss": 1.9368, "step": 110160 }, { "epoch": 7.485052316890882, "grad_norm": 4.1978912353515625, "learning_rate": 6.469119445576844e-06, "loss": 1.8953, "step": 110165 }, { "epoch": 7.485392036961544, "grad_norm": 3.7445755004882812, "learning_rate": 6.464872944693573e-06, "loss": 1.945, "step": 110170 }, { "epoch": 7.485731757032205, "grad_norm": 3.7985496520996094, "learning_rate": 6.460626443810301e-06, "loss": 1.9302, "step": 110175 }, { "epoch": 7.486071477102867, "grad_norm": 5.242264270782471, "learning_rate": 6.4563799429270284e-06, "loss": 1.5997, "step": 110180 }, { "epoch": 7.486411197173529, "grad_norm": 3.2984237670898438, "learning_rate": 6.452982742220411e-06, "loss": 1.7922, "step": 110185 }, { "epoch": 7.486750917244191, "grad_norm": 3.057955026626587, "learning_rate": 6.448736241337139e-06, "loss": 2.0042, "step": 110190 }, { "epoch": 7.487090637314853, "grad_norm": 3.4659674167633057, "learning_rate": 6.444489740453866e-06, "loss": 1.8288, "step": 110195 }, { "epoch": 7.487430357385515, "grad_norm": 5.744692802429199, "learning_rate": 6.440243239570594e-06, "loss": 1.7214, "step": 110200 }, { "epoch": 7.487770077456176, "grad_norm": 4.271621227264404, "learning_rate": 6.435996738687322e-06, "loss": 2.0861, "step": 110205 }, { "epoch": 7.488109797526838, "grad_norm": 4.799674034118652, "learning_rate": 6.43175023780405e-06, "loss": 2.0922, "step": 110210 }, { "epoch": 7.4884495175975, "grad_norm": 3.380007743835449, "learning_rate": 6.427503736920777e-06, "loss": 2.2781, "step": 110215 }, { "epoch": 7.488789237668161, "grad_norm": 4.283863544464111, "learning_rate": 6.423257236037505e-06, "loss": 1.7609, "step": 110220 }, { "epoch": 7.489128957738823, "grad_norm": 4.363404273986816, "learning_rate": 6.419010735154233e-06, "loss": 2.0717, "step": 110225 }, { "epoch": 7.489468677809485, "grad_norm": 3.531783103942871, "learning_rate": 6.41476423427096e-06, "loss": 1.9528, "step": 110230 }, { "epoch": 7.489808397880147, "grad_norm": 2.806539297103882, "learning_rate": 6.410517733387688e-06, "loss": 1.9428, "step": 110235 }, { "epoch": 7.490148117950809, "grad_norm": 4.285180568695068, "learning_rate": 6.406271232504417e-06, "loss": 1.9821, "step": 110240 }, { "epoch": 7.490487838021471, "grad_norm": 3.9283344745635986, "learning_rate": 6.402024731621145e-06, "loss": 1.8103, "step": 110245 }, { "epoch": 7.490827558092132, "grad_norm": 4.198952674865723, "learning_rate": 6.3977782307378716e-06, "loss": 1.9291, "step": 110250 }, { "epoch": 7.491167278162794, "grad_norm": 4.377999782562256, "learning_rate": 6.3935317298546e-06, "loss": 1.833, "step": 110255 }, { "epoch": 7.491506998233455, "grad_norm": 4.338460922241211, "learning_rate": 6.389285228971328e-06, "loss": 2.0453, "step": 110260 }, { "epoch": 7.491846718304117, "grad_norm": 4.360013961791992, "learning_rate": 6.385038728088056e-06, "loss": 2.041, "step": 110265 }, { "epoch": 7.492186438374779, "grad_norm": 4.8936848640441895, "learning_rate": 6.380792227204784e-06, "loss": 2.099, "step": 110270 }, { "epoch": 7.492526158445441, "grad_norm": 3.3935155868530273, "learning_rate": 6.376545726321512e-06, "loss": 1.873, "step": 110275 }, { "epoch": 7.492865878516103, "grad_norm": 3.60302996635437, "learning_rate": 6.372299225438239e-06, "loss": 1.8545, "step": 110280 }, { "epoch": 7.493205598586765, "grad_norm": 4.064727783203125, "learning_rate": 6.368052724554967e-06, "loss": 2.0184, "step": 110285 }, { "epoch": 7.493545318657426, "grad_norm": 3.7068779468536377, "learning_rate": 6.363806223671695e-06, "loss": 1.836, "step": 110290 }, { "epoch": 7.493885038728088, "grad_norm": 3.117171287536621, "learning_rate": 6.359559722788424e-06, "loss": 2.0106, "step": 110295 }, { "epoch": 7.49422475879875, "grad_norm": 3.285402297973633, "learning_rate": 6.35531322190515e-06, "loss": 1.8832, "step": 110300 }, { "epoch": 7.494564478869411, "grad_norm": 3.5636708736419678, "learning_rate": 6.351066721021878e-06, "loss": 1.9264, "step": 110305 }, { "epoch": 7.494904198940073, "grad_norm": 3.551618814468384, "learning_rate": 6.346820220138607e-06, "loss": 1.9229, "step": 110310 }, { "epoch": 7.495243919010735, "grad_norm": 4.566519260406494, "learning_rate": 6.342573719255333e-06, "loss": 2.1467, "step": 110315 }, { "epoch": 7.495583639081397, "grad_norm": 3.846928596496582, "learning_rate": 6.338327218372062e-06, "loss": 1.7343, "step": 110320 }, { "epoch": 7.495923359152059, "grad_norm": 3.0719399452209473, "learning_rate": 6.33408071748879e-06, "loss": 1.9107, "step": 110325 }, { "epoch": 7.496263079222721, "grad_norm": 3.2593343257904053, "learning_rate": 6.329834216605518e-06, "loss": 1.7645, "step": 110330 }, { "epoch": 7.496602799293382, "grad_norm": 4.095114707946777, "learning_rate": 6.325587715722245e-06, "loss": 2.1848, "step": 110335 }, { "epoch": 7.496942519364044, "grad_norm": 3.69622802734375, "learning_rate": 6.321341214838973e-06, "loss": 1.9416, "step": 110340 }, { "epoch": 7.497282239434706, "grad_norm": 4.257223606109619, "learning_rate": 6.317094713955701e-06, "loss": 1.6775, "step": 110345 }, { "epoch": 7.497621959505367, "grad_norm": 3.4069623947143555, "learning_rate": 6.312848213072428e-06, "loss": 1.9191, "step": 110350 }, { "epoch": 7.497961679576029, "grad_norm": 4.05941104888916, "learning_rate": 6.308601712189156e-06, "loss": 1.9112, "step": 110355 }, { "epoch": 7.498301399646691, "grad_norm": 3.735252857208252, "learning_rate": 6.304355211305884e-06, "loss": 2.0204, "step": 110360 }, { "epoch": 7.498641119717353, "grad_norm": 3.5628139972686768, "learning_rate": 6.3001087104226115e-06, "loss": 1.9714, "step": 110365 }, { "epoch": 7.498980839788015, "grad_norm": 4.157253742218018, "learning_rate": 6.2958622095393395e-06, "loss": 1.9846, "step": 110370 }, { "epoch": 7.499320559858677, "grad_norm": 3.851977825164795, "learning_rate": 6.2916157086560675e-06, "loss": 1.8734, "step": 110375 }, { "epoch": 7.499660279929338, "grad_norm": 3.4966962337493896, "learning_rate": 6.287369207772796e-06, "loss": 1.8507, "step": 110380 }, { "epoch": 7.5, "grad_norm": 4.818273067474365, "learning_rate": 6.283122706889523e-06, "loss": 2.2221, "step": 110385 }, { "epoch": 7.500339720070662, "grad_norm": 3.195939540863037, "learning_rate": 6.2788762060062515e-06, "loss": 1.9624, "step": 110390 }, { "epoch": 7.500679440141323, "grad_norm": 3.378326177597046, "learning_rate": 6.2746297051229795e-06, "loss": 1.9538, "step": 110395 }, { "epoch": 7.501019160211985, "grad_norm": 3.834834098815918, "learning_rate": 6.270383204239706e-06, "loss": 1.9338, "step": 110400 }, { "epoch": 7.501358880282647, "grad_norm": 4.3788371086120605, "learning_rate": 6.266136703356435e-06, "loss": 1.7748, "step": 110405 }, { "epoch": 7.501698600353309, "grad_norm": 4.023533821105957, "learning_rate": 6.261890202473163e-06, "loss": 1.6187, "step": 110410 }, { "epoch": 7.502038320423971, "grad_norm": 3.93721866607666, "learning_rate": 6.257643701589891e-06, "loss": 1.6737, "step": 110415 }, { "epoch": 7.502378040494633, "grad_norm": 3.6960315704345703, "learning_rate": 6.253397200706618e-06, "loss": 2.1449, "step": 110420 }, { "epoch": 7.502717760565294, "grad_norm": 3.6237833499908447, "learning_rate": 6.249150699823346e-06, "loss": 2.0491, "step": 110425 }, { "epoch": 7.503057480635956, "grad_norm": 2.8979899883270264, "learning_rate": 6.244904198940073e-06, "loss": 1.6095, "step": 110430 }, { "epoch": 7.503397200706618, "grad_norm": 4.617553234100342, "learning_rate": 6.240657698056802e-06, "loss": 2.106, "step": 110435 }, { "epoch": 7.503736920777279, "grad_norm": 2.5549261569976807, "learning_rate": 6.236411197173529e-06, "loss": 1.7228, "step": 110440 }, { "epoch": 7.504076640847941, "grad_norm": 3.3790946006774902, "learning_rate": 6.232164696290257e-06, "loss": 2.0629, "step": 110445 }, { "epoch": 7.504416360918603, "grad_norm": 3.907447338104248, "learning_rate": 6.227918195406985e-06, "loss": 2.0104, "step": 110450 }, { "epoch": 7.504756080989265, "grad_norm": 3.5087897777557373, "learning_rate": 6.223671694523712e-06, "loss": 2.1243, "step": 110455 }, { "epoch": 7.505095801059927, "grad_norm": 3.923382520675659, "learning_rate": 6.219425193640441e-06, "loss": 1.589, "step": 110460 }, { "epoch": 7.505435521130589, "grad_norm": 4.099456310272217, "learning_rate": 6.215178692757168e-06, "loss": 1.7681, "step": 110465 }, { "epoch": 7.50577524120125, "grad_norm": 3.3968207836151123, "learning_rate": 6.210932191873896e-06, "loss": 1.9232, "step": 110470 }, { "epoch": 7.506114961271912, "grad_norm": 3.423360586166382, "learning_rate": 6.206685690990624e-06, "loss": 1.9844, "step": 110475 }, { "epoch": 7.506454681342574, "grad_norm": 3.485072135925293, "learning_rate": 6.2024391901073514e-06, "loss": 2.1449, "step": 110480 }, { "epoch": 7.506794401413235, "grad_norm": 4.705831527709961, "learning_rate": 6.1981926892240795e-06, "loss": 1.8878, "step": 110485 }, { "epoch": 7.507134121483897, "grad_norm": 3.646387815475464, "learning_rate": 6.1939461883408075e-06, "loss": 1.9452, "step": 110490 }, { "epoch": 7.5074738415545585, "grad_norm": 3.064066171646118, "learning_rate": 6.1896996874575355e-06, "loss": 1.8369, "step": 110495 }, { "epoch": 7.507813561625221, "grad_norm": 3.130591630935669, "learning_rate": 6.1854531865742635e-06, "loss": 1.9756, "step": 110500 }, { "epoch": 7.508153281695883, "grad_norm": 4.284536838531494, "learning_rate": 6.181206685690991e-06, "loss": 2.217, "step": 110505 }, { "epoch": 7.508493001766544, "grad_norm": 3.2614402770996094, "learning_rate": 6.176960184807719e-06, "loss": 1.7104, "step": 110510 }, { "epoch": 7.508832721837206, "grad_norm": 4.92487096786499, "learning_rate": 6.172713683924447e-06, "loss": 1.9079, "step": 110515 }, { "epoch": 7.509172441907868, "grad_norm": 4.9318413734436035, "learning_rate": 6.168467183041175e-06, "loss": 1.8425, "step": 110520 }, { "epoch": 7.509512161978529, "grad_norm": 3.645197629928589, "learning_rate": 6.164220682157902e-06, "loss": 1.9776, "step": 110525 }, { "epoch": 7.509851882049191, "grad_norm": 4.5810089111328125, "learning_rate": 6.159974181274631e-06, "loss": 2.1162, "step": 110530 }, { "epoch": 7.510191602119853, "grad_norm": 3.6391243934631348, "learning_rate": 6.155727680391358e-06, "loss": 1.9795, "step": 110535 }, { "epoch": 7.510531322190515, "grad_norm": 3.468733072280884, "learning_rate": 6.151481179508085e-06, "loss": 1.9785, "step": 110540 }, { "epoch": 7.510871042261177, "grad_norm": 4.512009620666504, "learning_rate": 6.147234678624814e-06, "loss": 2.0788, "step": 110545 }, { "epoch": 7.511210762331839, "grad_norm": 4.365002632141113, "learning_rate": 6.142988177741541e-06, "loss": 1.9949, "step": 110550 }, { "epoch": 7.5115504824025, "grad_norm": 4.737497806549072, "learning_rate": 6.138741676858269e-06, "loss": 2.0175, "step": 110555 }, { "epoch": 7.511890202473162, "grad_norm": 2.9229650497436523, "learning_rate": 6.134495175974997e-06, "loss": 2.1265, "step": 110560 }, { "epoch": 7.512229922543824, "grad_norm": 4.669768810272217, "learning_rate": 6.130248675091724e-06, "loss": 2.0289, "step": 110565 }, { "epoch": 7.512569642614485, "grad_norm": 3.6495842933654785, "learning_rate": 6.126002174208453e-06, "loss": 1.8321, "step": 110570 }, { "epoch": 7.512909362685147, "grad_norm": 3.933232069015503, "learning_rate": 6.12175567332518e-06, "loss": 1.8541, "step": 110575 }, { "epoch": 7.513249082755809, "grad_norm": 3.009016990661621, "learning_rate": 6.117509172441908e-06, "loss": 2.3216, "step": 110580 }, { "epoch": 7.513588802826471, "grad_norm": 4.064874649047852, "learning_rate": 6.113262671558636e-06, "loss": 1.9147, "step": 110585 }, { "epoch": 7.513928522897133, "grad_norm": 4.5710906982421875, "learning_rate": 6.109016170675363e-06, "loss": 2.0069, "step": 110590 }, { "epoch": 7.514268242967795, "grad_norm": 4.126922607421875, "learning_rate": 6.104769669792091e-06, "loss": 1.8213, "step": 110595 }, { "epoch": 7.514607963038456, "grad_norm": 3.4069206714630127, "learning_rate": 6.100523168908819e-06, "loss": 2.295, "step": 110600 }, { "epoch": 7.514947683109118, "grad_norm": 3.729243040084839, "learning_rate": 6.096276668025547e-06, "loss": 2.0967, "step": 110605 }, { "epoch": 7.51528740317978, "grad_norm": 4.216104030609131, "learning_rate": 6.0920301671422746e-06, "loss": 2.1419, "step": 110610 }, { "epoch": 7.515627123250441, "grad_norm": 3.805014133453369, "learning_rate": 6.087783666259003e-06, "loss": 2.0496, "step": 110615 }, { "epoch": 7.515966843321103, "grad_norm": 3.81917142868042, "learning_rate": 6.083537165375731e-06, "loss": 1.9361, "step": 110620 }, { "epoch": 7.516306563391765, "grad_norm": 2.9359195232391357, "learning_rate": 6.079290664492459e-06, "loss": 1.8942, "step": 110625 }, { "epoch": 7.516646283462427, "grad_norm": 2.9224212169647217, "learning_rate": 6.075044163609187e-06, "loss": 2.0381, "step": 110630 }, { "epoch": 7.516986003533089, "grad_norm": 4.147101879119873, "learning_rate": 6.070797662725914e-06, "loss": 1.9374, "step": 110635 }, { "epoch": 7.517325723603751, "grad_norm": 3.7248740196228027, "learning_rate": 6.066551161842643e-06, "loss": 1.8169, "step": 110640 }, { "epoch": 7.517665443674412, "grad_norm": 4.319908618927002, "learning_rate": 6.06230466095937e-06, "loss": 1.8963, "step": 110645 }, { "epoch": 7.518005163745074, "grad_norm": 4.5876569747924805, "learning_rate": 6.058058160076097e-06, "loss": 1.9722, "step": 110650 }, { "epoch": 7.518344883815736, "grad_norm": 4.927273750305176, "learning_rate": 6.053811659192826e-06, "loss": 1.7547, "step": 110655 }, { "epoch": 7.518684603886397, "grad_norm": 3.211805820465088, "learning_rate": 6.049565158309553e-06, "loss": 1.8077, "step": 110660 }, { "epoch": 7.519024323957059, "grad_norm": 3.8596582412719727, "learning_rate": 6.045318657426281e-06, "loss": 2.0489, "step": 110665 }, { "epoch": 7.519364044027721, "grad_norm": 3.1668944358825684, "learning_rate": 6.041072156543009e-06, "loss": 2.0905, "step": 110670 }, { "epoch": 7.519703764098383, "grad_norm": 3.234447479248047, "learning_rate": 6.036825655659736e-06, "loss": 1.8417, "step": 110675 }, { "epoch": 7.520043484169045, "grad_norm": 3.7949841022491455, "learning_rate": 6.032579154776465e-06, "loss": 2.1317, "step": 110680 }, { "epoch": 7.520383204239707, "grad_norm": 3.6965253353118896, "learning_rate": 6.028332653893192e-06, "loss": 2.0757, "step": 110685 }, { "epoch": 7.520722924310368, "grad_norm": 3.785297155380249, "learning_rate": 6.02408615300992e-06, "loss": 1.9359, "step": 110690 }, { "epoch": 7.52106264438103, "grad_norm": 3.9314815998077393, "learning_rate": 6.019839652126648e-06, "loss": 1.9755, "step": 110695 }, { "epoch": 7.521402364451692, "grad_norm": 3.597656726837158, "learning_rate": 6.015593151243376e-06, "loss": 1.8909, "step": 110700 }, { "epoch": 7.521742084522353, "grad_norm": 3.46681809425354, "learning_rate": 6.011346650360103e-06, "loss": 1.9163, "step": 110705 }, { "epoch": 7.522081804593015, "grad_norm": 4.067715644836426, "learning_rate": 6.007100149476831e-06, "loss": 2.3209, "step": 110710 }, { "epoch": 7.522421524663677, "grad_norm": 4.266924858093262, "learning_rate": 6.002853648593559e-06, "loss": 1.9342, "step": 110715 }, { "epoch": 7.522761244734339, "grad_norm": 4.391138076782227, "learning_rate": 5.9986071477102865e-06, "loss": 1.9092, "step": 110720 }, { "epoch": 7.523100964805001, "grad_norm": 4.521543979644775, "learning_rate": 5.994360646827015e-06, "loss": 2.0674, "step": 110725 }, { "epoch": 7.523440684875663, "grad_norm": 4.321346759796143, "learning_rate": 5.9901141459437425e-06, "loss": 2.1303, "step": 110730 }, { "epoch": 7.523780404946324, "grad_norm": 4.102565288543701, "learning_rate": 5.9858676450604705e-06, "loss": 2.1964, "step": 110735 }, { "epoch": 7.524120125016986, "grad_norm": 4.290229320526123, "learning_rate": 5.9816211441771985e-06, "loss": 1.9945, "step": 110740 }, { "epoch": 7.524459845087648, "grad_norm": 3.032724142074585, "learning_rate": 5.977374643293926e-06, "loss": 1.9405, "step": 110745 }, { "epoch": 7.524799565158309, "grad_norm": 3.763242721557617, "learning_rate": 5.9731281424106545e-06, "loss": 1.9711, "step": 110750 }, { "epoch": 7.525139285228971, "grad_norm": 3.8113598823547363, "learning_rate": 5.968881641527382e-06, "loss": 1.9298, "step": 110755 }, { "epoch": 7.525479005299633, "grad_norm": 4.16379976272583, "learning_rate": 5.964635140644109e-06, "loss": 1.8614, "step": 110760 }, { "epoch": 7.525818725370295, "grad_norm": 2.81081485748291, "learning_rate": 5.960388639760838e-06, "loss": 1.9943, "step": 110765 }, { "epoch": 7.526158445440957, "grad_norm": 3.892880439758301, "learning_rate": 5.956142138877565e-06, "loss": 1.7922, "step": 110770 }, { "epoch": 7.526498165511619, "grad_norm": 4.088669776916504, "learning_rate": 5.951895637994293e-06, "loss": 2.1437, "step": 110775 }, { "epoch": 7.52683788558228, "grad_norm": 4.391754150390625, "learning_rate": 5.947649137111021e-06, "loss": 1.9725, "step": 110780 }, { "epoch": 7.527177605652942, "grad_norm": 4.922784805297852, "learning_rate": 5.943402636227749e-06, "loss": 1.817, "step": 110785 }, { "epoch": 7.527517325723604, "grad_norm": 4.000901699066162, "learning_rate": 5.939156135344476e-06, "loss": 1.7639, "step": 110790 }, { "epoch": 7.527857045794265, "grad_norm": 3.2477221488952637, "learning_rate": 5.934909634461204e-06, "loss": 2.1123, "step": 110795 }, { "epoch": 7.528196765864927, "grad_norm": 2.9651577472686768, "learning_rate": 5.930663133577932e-06, "loss": 2.1138, "step": 110800 }, { "epoch": 7.528536485935589, "grad_norm": 3.584580659866333, "learning_rate": 5.92641663269466e-06, "loss": 2.1824, "step": 110805 }, { "epoch": 7.528876206006251, "grad_norm": 3.2111570835113525, "learning_rate": 5.922170131811388e-06, "loss": 1.8163, "step": 110810 }, { "epoch": 7.529215926076913, "grad_norm": 5.933976173400879, "learning_rate": 5.917923630928115e-06, "loss": 2.0105, "step": 110815 }, { "epoch": 7.529555646147575, "grad_norm": 4.254979133605957, "learning_rate": 5.913677130044843e-06, "loss": 1.8758, "step": 110820 }, { "epoch": 7.529895366218236, "grad_norm": 3.5981388092041016, "learning_rate": 5.909430629161571e-06, "loss": 1.8351, "step": 110825 }, { "epoch": 7.530235086288898, "grad_norm": 3.5115108489990234, "learning_rate": 5.9051841282782984e-06, "loss": 1.8949, "step": 110830 }, { "epoch": 7.53057480635956, "grad_norm": 3.0431792736053467, "learning_rate": 5.900937627395027e-06, "loss": 1.931, "step": 110835 }, { "epoch": 7.530914526430221, "grad_norm": 3.6053109169006348, "learning_rate": 5.8966911265117544e-06, "loss": 1.8966, "step": 110840 }, { "epoch": 7.531254246500883, "grad_norm": 4.299210071563721, "learning_rate": 5.8924446256284825e-06, "loss": 1.8622, "step": 110845 }, { "epoch": 7.5315939665715455, "grad_norm": 4.246075630187988, "learning_rate": 5.8881981247452105e-06, "loss": 1.9822, "step": 110850 }, { "epoch": 7.531933686642207, "grad_norm": 3.7711217403411865, "learning_rate": 5.883951623861938e-06, "loss": 1.8607, "step": 110855 }, { "epoch": 7.532273406712869, "grad_norm": 3.8468878269195557, "learning_rate": 5.8797051229786665e-06, "loss": 1.7476, "step": 110860 }, { "epoch": 7.532613126783531, "grad_norm": 4.034709453582764, "learning_rate": 5.875458622095394e-06, "loss": 2.0461, "step": 110865 }, { "epoch": 7.532952846854192, "grad_norm": 4.815388202667236, "learning_rate": 5.871212121212122e-06, "loss": 1.9426, "step": 110870 }, { "epoch": 7.533292566924854, "grad_norm": 3.153536319732666, "learning_rate": 5.86696562032885e-06, "loss": 2.0777, "step": 110875 }, { "epoch": 7.533632286995516, "grad_norm": 3.4764723777770996, "learning_rate": 5.862719119445577e-06, "loss": 1.9597, "step": 110880 }, { "epoch": 7.533972007066177, "grad_norm": 3.855800151824951, "learning_rate": 5.858472618562305e-06, "loss": 2.0271, "step": 110885 }, { "epoch": 7.534311727136839, "grad_norm": 3.5879149436950684, "learning_rate": 5.854226117679033e-06, "loss": 2.0055, "step": 110890 }, { "epoch": 7.5346514472075015, "grad_norm": 4.380441665649414, "learning_rate": 5.849979616795761e-06, "loss": 2.2645, "step": 110895 }, { "epoch": 7.534991167278163, "grad_norm": 3.471498727798462, "learning_rate": 5.845733115912488e-06, "loss": 1.8333, "step": 110900 }, { "epoch": 7.535330887348825, "grad_norm": 3.5135464668273926, "learning_rate": 5.841486615029216e-06, "loss": 2.1531, "step": 110905 }, { "epoch": 7.535670607419487, "grad_norm": 4.812485694885254, "learning_rate": 5.837240114145944e-06, "loss": 2.1385, "step": 110910 }, { "epoch": 7.536010327490148, "grad_norm": 4.017507076263428, "learning_rate": 5.832993613262672e-06, "loss": 2.2455, "step": 110915 }, { "epoch": 7.53635004756081, "grad_norm": 3.528129816055298, "learning_rate": 5.8287471123794e-06, "loss": 1.8992, "step": 110920 }, { "epoch": 7.536689767631472, "grad_norm": 3.375171422958374, "learning_rate": 5.824500611496127e-06, "loss": 1.8408, "step": 110925 }, { "epoch": 7.537029487702133, "grad_norm": 3.9273784160614014, "learning_rate": 5.820254110612855e-06, "loss": 1.9659, "step": 110930 }, { "epoch": 7.537369207772795, "grad_norm": 2.925635576248169, "learning_rate": 5.816007609729583e-06, "loss": 2.3328, "step": 110935 }, { "epoch": 7.5377089278434575, "grad_norm": 3.6819443702697754, "learning_rate": 5.81176110884631e-06, "loss": 2.2141, "step": 110940 }, { "epoch": 7.538048647914119, "grad_norm": 4.359312534332275, "learning_rate": 5.807514607963039e-06, "loss": 2.2109, "step": 110945 }, { "epoch": 7.538388367984781, "grad_norm": 4.293170928955078, "learning_rate": 5.803268107079766e-06, "loss": 1.796, "step": 110950 }, { "epoch": 7.538728088055443, "grad_norm": 5.0165605545043945, "learning_rate": 5.799021606196494e-06, "loss": 1.8374, "step": 110955 }, { "epoch": 7.539067808126104, "grad_norm": 3.5755913257598877, "learning_rate": 5.794775105313222e-06, "loss": 1.8941, "step": 110960 }, { "epoch": 7.539407528196766, "grad_norm": 3.463120222091675, "learning_rate": 5.7905286044299496e-06, "loss": 1.878, "step": 110965 }, { "epoch": 7.539747248267427, "grad_norm": 3.361957550048828, "learning_rate": 5.786282103546678e-06, "loss": 1.7638, "step": 110970 }, { "epoch": 7.540086968338089, "grad_norm": 3.5854852199554443, "learning_rate": 5.7820356026634056e-06, "loss": 2.0567, "step": 110975 }, { "epoch": 7.540426688408751, "grad_norm": 4.740382671356201, "learning_rate": 5.777789101780134e-06, "loss": 1.7629, "step": 110980 }, { "epoch": 7.540766408479413, "grad_norm": 3.143730401992798, "learning_rate": 5.773542600896862e-06, "loss": 2.0628, "step": 110985 }, { "epoch": 7.541106128550075, "grad_norm": 3.4384963512420654, "learning_rate": 5.769296100013589e-06, "loss": 1.9377, "step": 110990 }, { "epoch": 7.541445848620737, "grad_norm": 3.3551061153411865, "learning_rate": 5.765049599130317e-06, "loss": 1.921, "step": 110995 }, { "epoch": 7.541785568691398, "grad_norm": 4.462812900543213, "learning_rate": 5.760803098247045e-06, "loss": 2.1497, "step": 111000 }, { "epoch": 7.54212528876206, "grad_norm": 3.4414196014404297, "learning_rate": 5.756556597363773e-06, "loss": 2.0037, "step": 111005 }, { "epoch": 7.542465008832722, "grad_norm": 3.2294914722442627, "learning_rate": 5.7523100964805e-06, "loss": 2.2025, "step": 111010 }, { "epoch": 7.542804728903383, "grad_norm": 3.258967161178589, "learning_rate": 5.748063595597228e-06, "loss": 1.846, "step": 111015 }, { "epoch": 7.543144448974045, "grad_norm": 3.1669509410858154, "learning_rate": 5.743817094713956e-06, "loss": 2.0959, "step": 111020 }, { "epoch": 7.543484169044707, "grad_norm": 3.6967945098876953, "learning_rate": 5.739570593830684e-06, "loss": 2.0681, "step": 111025 }, { "epoch": 7.543823889115369, "grad_norm": 4.272464275360107, "learning_rate": 5.735324092947412e-06, "loss": 1.726, "step": 111030 }, { "epoch": 7.544163609186031, "grad_norm": 3.630354404449463, "learning_rate": 5.731077592064139e-06, "loss": 1.9914, "step": 111035 }, { "epoch": 7.544503329256693, "grad_norm": 4.152092933654785, "learning_rate": 5.726831091180868e-06, "loss": 2.0027, "step": 111040 }, { "epoch": 7.544843049327354, "grad_norm": 2.606067657470703, "learning_rate": 5.722584590297595e-06, "loss": 1.8311, "step": 111045 }, { "epoch": 7.545182769398016, "grad_norm": 3.3451128005981445, "learning_rate": 5.718338089414322e-06, "loss": 2.2412, "step": 111050 }, { "epoch": 7.545522489468678, "grad_norm": 4.143338203430176, "learning_rate": 5.714091588531051e-06, "loss": 2.1729, "step": 111055 }, { "epoch": 7.545862209539339, "grad_norm": 3.086073398590088, "learning_rate": 5.709845087647778e-06, "loss": 1.7961, "step": 111060 }, { "epoch": 7.546201929610001, "grad_norm": 3.671531915664673, "learning_rate": 5.705598586764506e-06, "loss": 1.9687, "step": 111065 }, { "epoch": 7.546541649680663, "grad_norm": 4.114954471588135, "learning_rate": 5.701352085881234e-06, "loss": 1.9688, "step": 111070 }, { "epoch": 7.546881369751325, "grad_norm": 3.9192068576812744, "learning_rate": 5.6971055849979615e-06, "loss": 2.026, "step": 111075 }, { "epoch": 7.547221089821987, "grad_norm": 3.4352095127105713, "learning_rate": 5.6928590841146895e-06, "loss": 2.0743, "step": 111080 }, { "epoch": 7.547560809892649, "grad_norm": 4.084597110748291, "learning_rate": 5.6886125832314175e-06, "loss": 1.8588, "step": 111085 }, { "epoch": 7.54790052996331, "grad_norm": 3.353131055831909, "learning_rate": 5.6843660823481455e-06, "loss": 2.0888, "step": 111090 }, { "epoch": 7.548240250033972, "grad_norm": 3.673093795776367, "learning_rate": 5.6801195814648735e-06, "loss": 1.9953, "step": 111095 }, { "epoch": 7.548579970104634, "grad_norm": 3.7504875659942627, "learning_rate": 5.675873080581601e-06, "loss": 1.962, "step": 111100 }, { "epoch": 7.548919690175295, "grad_norm": 4.048918724060059, "learning_rate": 5.671626579698329e-06, "loss": 2.0279, "step": 111105 }, { "epoch": 7.549259410245957, "grad_norm": 3.714876413345337, "learning_rate": 5.667380078815057e-06, "loss": 1.8704, "step": 111110 }, { "epoch": 7.5495991303166194, "grad_norm": 3.324983596801758, "learning_rate": 5.663133577931785e-06, "loss": 1.7509, "step": 111115 }, { "epoch": 7.549938850387281, "grad_norm": 3.678363561630249, "learning_rate": 5.658887077048512e-06, "loss": 2.1735, "step": 111120 }, { "epoch": 7.550278570457943, "grad_norm": 3.94224214553833, "learning_rate": 5.654640576165241e-06, "loss": 2.0468, "step": 111125 }, { "epoch": 7.550618290528605, "grad_norm": 2.9087321758270264, "learning_rate": 5.650394075281968e-06, "loss": 2.037, "step": 111130 }, { "epoch": 7.550958010599266, "grad_norm": 5.0169806480407715, "learning_rate": 5.646147574398696e-06, "loss": 1.6746, "step": 111135 }, { "epoch": 7.551297730669928, "grad_norm": 3.901689291000366, "learning_rate": 5.641901073515424e-06, "loss": 1.7978, "step": 111140 }, { "epoch": 7.55163745074059, "grad_norm": 3.2702369689941406, "learning_rate": 5.637654572632151e-06, "loss": 1.8715, "step": 111145 }, { "epoch": 7.551977170811251, "grad_norm": 3.5786643028259277, "learning_rate": 5.63340807174888e-06, "loss": 2.0849, "step": 111150 }, { "epoch": 7.552316890881913, "grad_norm": 3.392441511154175, "learning_rate": 5.629161570865607e-06, "loss": 2.066, "step": 111155 }, { "epoch": 7.5526566109525755, "grad_norm": 3.426429033279419, "learning_rate": 5.624915069982334e-06, "loss": 1.9869, "step": 111160 }, { "epoch": 7.552996331023237, "grad_norm": 3.034966230392456, "learning_rate": 5.620668569099063e-06, "loss": 2.1255, "step": 111165 }, { "epoch": 7.553336051093899, "grad_norm": 5.011295318603516, "learning_rate": 5.61642206821579e-06, "loss": 1.8875, "step": 111170 }, { "epoch": 7.55367577116456, "grad_norm": 3.2721550464630127, "learning_rate": 5.612175567332518e-06, "loss": 1.8129, "step": 111175 }, { "epoch": 7.554015491235222, "grad_norm": 3.9564993381500244, "learning_rate": 5.607929066449246e-06, "loss": 1.8677, "step": 111180 }, { "epoch": 7.554355211305884, "grad_norm": 3.4155328273773193, "learning_rate": 5.6036825655659734e-06, "loss": 2.0281, "step": 111185 }, { "epoch": 7.554694931376545, "grad_norm": 3.8810245990753174, "learning_rate": 5.5994360646827014e-06, "loss": 1.8814, "step": 111190 }, { "epoch": 7.555034651447207, "grad_norm": 5.875381946563721, "learning_rate": 5.5951895637994294e-06, "loss": 1.6829, "step": 111195 }, { "epoch": 7.555374371517869, "grad_norm": 4.60020112991333, "learning_rate": 5.5909430629161575e-06, "loss": 2.1356, "step": 111200 }, { "epoch": 7.555714091588531, "grad_norm": 3.14959716796875, "learning_rate": 5.5866965620328855e-06, "loss": 2.1188, "step": 111205 }, { "epoch": 7.556053811659193, "grad_norm": 3.992488145828247, "learning_rate": 5.5824500611496135e-06, "loss": 1.8289, "step": 111210 }, { "epoch": 7.556393531729855, "grad_norm": 4.732187271118164, "learning_rate": 5.578203560266341e-06, "loss": 1.723, "step": 111215 }, { "epoch": 7.556733251800516, "grad_norm": 3.3994083404541016, "learning_rate": 5.573957059383069e-06, "loss": 1.8528, "step": 111220 }, { "epoch": 7.557072971871178, "grad_norm": 3.8747541904449463, "learning_rate": 5.569710558499797e-06, "loss": 1.6307, "step": 111225 }, { "epoch": 7.55741269194184, "grad_norm": 3.7193968296051025, "learning_rate": 5.565464057616524e-06, "loss": 2.0325, "step": 111230 }, { "epoch": 7.557752412012501, "grad_norm": 3.858083724975586, "learning_rate": 5.561217556733253e-06, "loss": 1.9084, "step": 111235 }, { "epoch": 7.558092132083163, "grad_norm": 3.4239144325256348, "learning_rate": 5.55697105584998e-06, "loss": 1.828, "step": 111240 }, { "epoch": 7.558431852153825, "grad_norm": 6.260500907897949, "learning_rate": 5.552724554966708e-06, "loss": 1.9434, "step": 111245 }, { "epoch": 7.558771572224487, "grad_norm": 4.0826416015625, "learning_rate": 5.548478054083436e-06, "loss": 2.2348, "step": 111250 }, { "epoch": 7.559111292295149, "grad_norm": 3.9550135135650635, "learning_rate": 5.544231553200163e-06, "loss": 1.958, "step": 111255 }, { "epoch": 7.559451012365811, "grad_norm": 4.132345199584961, "learning_rate": 5.539985052316891e-06, "loss": 1.8683, "step": 111260 }, { "epoch": 7.559790732436472, "grad_norm": 3.5580692291259766, "learning_rate": 5.535738551433619e-06, "loss": 1.9521, "step": 111265 }, { "epoch": 7.560130452507134, "grad_norm": 4.046213150024414, "learning_rate": 5.531492050550346e-06, "loss": 1.8796, "step": 111270 }, { "epoch": 7.560470172577796, "grad_norm": 3.4970197677612305, "learning_rate": 5.527245549667075e-06, "loss": 2.1328, "step": 111275 }, { "epoch": 7.560809892648457, "grad_norm": 3.0826311111450195, "learning_rate": 5.522999048783802e-06, "loss": 2.0818, "step": 111280 }, { "epoch": 7.561149612719119, "grad_norm": 3.4045236110687256, "learning_rate": 5.51875254790053e-06, "loss": 1.8002, "step": 111285 }, { "epoch": 7.561489332789781, "grad_norm": 4.461976528167725, "learning_rate": 5.514506047017258e-06, "loss": 2.1135, "step": 111290 }, { "epoch": 7.561829052860443, "grad_norm": 3.566493034362793, "learning_rate": 5.510259546133986e-06, "loss": 1.8721, "step": 111295 }, { "epoch": 7.562168772931105, "grad_norm": 3.5021629333496094, "learning_rate": 5.506013045250713e-06, "loss": 2.2699, "step": 111300 }, { "epoch": 7.562508493001767, "grad_norm": 3.419222116470337, "learning_rate": 5.501766544367441e-06, "loss": 2.0679, "step": 111305 }, { "epoch": 7.562848213072428, "grad_norm": 3.607194423675537, "learning_rate": 5.497520043484169e-06, "loss": 2.0704, "step": 111310 }, { "epoch": 7.56318793314309, "grad_norm": 3.9707958698272705, "learning_rate": 5.493273542600897e-06, "loss": 2.0297, "step": 111315 }, { "epoch": 7.563527653213752, "grad_norm": 3.9201362133026123, "learning_rate": 5.489027041717625e-06, "loss": 1.8964, "step": 111320 }, { "epoch": 7.563867373284413, "grad_norm": 5.4038987159729, "learning_rate": 5.4847805408343526e-06, "loss": 1.8846, "step": 111325 }, { "epoch": 7.564207093355075, "grad_norm": 3.9415836334228516, "learning_rate": 5.4805340399510806e-06, "loss": 2.0205, "step": 111330 }, { "epoch": 7.564546813425737, "grad_norm": 3.6911871433258057, "learning_rate": 5.476287539067809e-06, "loss": 1.9424, "step": 111335 }, { "epoch": 7.564886533496399, "grad_norm": 4.128425598144531, "learning_rate": 5.472041038184536e-06, "loss": 1.8514, "step": 111340 }, { "epoch": 7.565226253567061, "grad_norm": 3.5165395736694336, "learning_rate": 5.467794537301265e-06, "loss": 1.9852, "step": 111345 }, { "epoch": 7.565565973637723, "grad_norm": 4.184835433959961, "learning_rate": 5.463548036417992e-06, "loss": 2.0418, "step": 111350 }, { "epoch": 7.565905693708384, "grad_norm": 3.4440133571624756, "learning_rate": 5.45930153553472e-06, "loss": 1.9997, "step": 111355 }, { "epoch": 7.566245413779046, "grad_norm": 4.212183475494385, "learning_rate": 5.455055034651448e-06, "loss": 2.0229, "step": 111360 }, { "epoch": 7.566585133849708, "grad_norm": 3.9225940704345703, "learning_rate": 5.450808533768175e-06, "loss": 2.0795, "step": 111365 }, { "epoch": 7.566924853920369, "grad_norm": 4.638535499572754, "learning_rate": 5.446562032884903e-06, "loss": 2.1836, "step": 111370 }, { "epoch": 7.567264573991031, "grad_norm": 3.9994161128997803, "learning_rate": 5.442315532001631e-06, "loss": 2.1366, "step": 111375 }, { "epoch": 7.567604294061693, "grad_norm": 4.391661643981934, "learning_rate": 5.438069031118358e-06, "loss": 2.0753, "step": 111380 }, { "epoch": 7.567944014132355, "grad_norm": 3.186380624771118, "learning_rate": 5.433822530235087e-06, "loss": 1.9722, "step": 111385 }, { "epoch": 7.568283734203017, "grad_norm": 3.584080219268799, "learning_rate": 5.429576029351814e-06, "loss": 2.0013, "step": 111390 }, { "epoch": 7.568623454273679, "grad_norm": 3.4791388511657715, "learning_rate": 5.425329528468542e-06, "loss": 1.9246, "step": 111395 }, { "epoch": 7.56896317434434, "grad_norm": 3.4312660694122314, "learning_rate": 5.42108302758527e-06, "loss": 2.1087, "step": 111400 }, { "epoch": 7.569302894415002, "grad_norm": 3.482553243637085, "learning_rate": 5.416836526701998e-06, "loss": 2.0111, "step": 111405 }, { "epoch": 7.569642614485664, "grad_norm": 3.001699924468994, "learning_rate": 5.412590025818725e-06, "loss": 2.1521, "step": 111410 }, { "epoch": 7.569982334556325, "grad_norm": 3.8699588775634766, "learning_rate": 5.408343524935453e-06, "loss": 1.6409, "step": 111415 }, { "epoch": 7.570322054626987, "grad_norm": 3.22885799407959, "learning_rate": 5.404097024052181e-06, "loss": 1.7174, "step": 111420 }, { "epoch": 7.5706617746976494, "grad_norm": 3.69438099861145, "learning_rate": 5.399850523168909e-06, "loss": 2.3105, "step": 111425 }, { "epoch": 7.571001494768311, "grad_norm": 3.7295167446136475, "learning_rate": 5.395604022285637e-06, "loss": 2.1555, "step": 111430 }, { "epoch": 7.571341214838973, "grad_norm": 4.817354679107666, "learning_rate": 5.3913575214023645e-06, "loss": 1.6953, "step": 111435 }, { "epoch": 7.571680934909635, "grad_norm": 3.1044843196868896, "learning_rate": 5.3871110205190925e-06, "loss": 1.9495, "step": 111440 }, { "epoch": 7.572020654980296, "grad_norm": 3.3161869049072266, "learning_rate": 5.3828645196358205e-06, "loss": 1.947, "step": 111445 }, { "epoch": 7.572360375050958, "grad_norm": 3.6133205890655518, "learning_rate": 5.378618018752548e-06, "loss": 1.9063, "step": 111450 }, { "epoch": 7.57270009512162, "grad_norm": 3.4937353134155273, "learning_rate": 5.3743715178692765e-06, "loss": 2.2645, "step": 111455 }, { "epoch": 7.573039815192281, "grad_norm": 3.5772221088409424, "learning_rate": 5.370125016986004e-06, "loss": 1.8107, "step": 111460 }, { "epoch": 7.573379535262943, "grad_norm": 4.378458499908447, "learning_rate": 5.365878516102731e-06, "loss": 1.7282, "step": 111465 }, { "epoch": 7.5737192553336055, "grad_norm": 3.2731754779815674, "learning_rate": 5.36163201521946e-06, "loss": 2.0179, "step": 111470 }, { "epoch": 7.574058975404267, "grad_norm": 4.283509254455566, "learning_rate": 5.357385514336187e-06, "loss": 1.8992, "step": 111475 }, { "epoch": 7.574398695474929, "grad_norm": 3.665285110473633, "learning_rate": 5.353139013452915e-06, "loss": 2.1985, "step": 111480 }, { "epoch": 7.574738415545591, "grad_norm": 2.6859874725341797, "learning_rate": 5.348892512569643e-06, "loss": 1.8773, "step": 111485 }, { "epoch": 7.575078135616252, "grad_norm": 3.6708874702453613, "learning_rate": 5.344646011686371e-06, "loss": 1.9997, "step": 111490 }, { "epoch": 7.575417855686914, "grad_norm": 4.451516151428223, "learning_rate": 5.340399510803099e-06, "loss": 2.0778, "step": 111495 }, { "epoch": 7.575757575757576, "grad_norm": 3.416757583618164, "learning_rate": 5.336153009919826e-06, "loss": 1.9258, "step": 111500 }, { "epoch": 7.576097295828237, "grad_norm": 3.5152478218078613, "learning_rate": 5.331906509036554e-06, "loss": 1.8673, "step": 111505 }, { "epoch": 7.576437015898899, "grad_norm": 4.288064479827881, "learning_rate": 5.327660008153282e-06, "loss": 1.9816, "step": 111510 }, { "epoch": 7.5767767359695615, "grad_norm": 3.6912002563476562, "learning_rate": 5.32341350727001e-06, "loss": 1.9236, "step": 111515 }, { "epoch": 7.577116456040223, "grad_norm": 3.9641623497009277, "learning_rate": 5.319167006386737e-06, "loss": 2.0222, "step": 111520 }, { "epoch": 7.577456176110885, "grad_norm": 3.9027304649353027, "learning_rate": 5.314920505503465e-06, "loss": 1.9199, "step": 111525 }, { "epoch": 7.577795896181547, "grad_norm": 3.6495261192321777, "learning_rate": 5.310674004620193e-06, "loss": 2.0318, "step": 111530 }, { "epoch": 7.578135616252208, "grad_norm": 3.7359986305236816, "learning_rate": 5.306427503736921e-06, "loss": 2.0432, "step": 111535 }, { "epoch": 7.57847533632287, "grad_norm": 3.8952314853668213, "learning_rate": 5.302181002853649e-06, "loss": 1.6564, "step": 111540 }, { "epoch": 7.578815056393532, "grad_norm": 3.55659818649292, "learning_rate": 5.2979345019703764e-06, "loss": 1.7071, "step": 111545 }, { "epoch": 7.579154776464193, "grad_norm": 3.0097827911376953, "learning_rate": 5.2936880010871044e-06, "loss": 1.9826, "step": 111550 }, { "epoch": 7.579494496534855, "grad_norm": 4.271684646606445, "learning_rate": 5.2894415002038324e-06, "loss": 1.7279, "step": 111555 }, { "epoch": 7.5798342166055175, "grad_norm": 3.488861083984375, "learning_rate": 5.28519499932056e-06, "loss": 2.0285, "step": 111560 }, { "epoch": 7.580173936676179, "grad_norm": 3.800257682800293, "learning_rate": 5.2809484984372885e-06, "loss": 1.8878, "step": 111565 }, { "epoch": 7.580513656746841, "grad_norm": 3.558474540710449, "learning_rate": 5.276701997554016e-06, "loss": 1.4811, "step": 111570 }, { "epoch": 7.580853376817503, "grad_norm": 4.097392559051514, "learning_rate": 5.272455496670744e-06, "loss": 1.932, "step": 111575 }, { "epoch": 7.581193096888164, "grad_norm": 3.8970460891723633, "learning_rate": 5.268208995787472e-06, "loss": 1.8523, "step": 111580 }, { "epoch": 7.581532816958826, "grad_norm": 3.4565017223358154, "learning_rate": 5.263962494904199e-06, "loss": 2.0122, "step": 111585 }, { "epoch": 7.581872537029488, "grad_norm": 3.5331075191497803, "learning_rate": 5.259715994020927e-06, "loss": 2.1927, "step": 111590 }, { "epoch": 7.582212257100149, "grad_norm": 3.696261405944824, "learning_rate": 5.255469493137655e-06, "loss": 2.1178, "step": 111595 }, { "epoch": 7.582551977170811, "grad_norm": 4.289717674255371, "learning_rate": 5.251222992254383e-06, "loss": 2.0339, "step": 111600 }, { "epoch": 7.5828916972414735, "grad_norm": 3.305957078933716, "learning_rate": 5.246976491371111e-06, "loss": 2.1961, "step": 111605 }, { "epoch": 7.583231417312135, "grad_norm": 3.6969423294067383, "learning_rate": 5.242729990487838e-06, "loss": 1.9109, "step": 111610 }, { "epoch": 7.583571137382797, "grad_norm": 4.442541599273682, "learning_rate": 5.238483489604566e-06, "loss": 1.9608, "step": 111615 }, { "epoch": 7.583910857453459, "grad_norm": 4.126584529876709, "learning_rate": 5.234236988721294e-06, "loss": 1.9105, "step": 111620 }, { "epoch": 7.58425057752412, "grad_norm": 3.818894624710083, "learning_rate": 5.229990487838022e-06, "loss": 1.9425, "step": 111625 }, { "epoch": 7.584590297594782, "grad_norm": 3.7870054244995117, "learning_rate": 5.225743986954749e-06, "loss": 2.0724, "step": 111630 }, { "epoch": 7.584930017665444, "grad_norm": 3.92832612991333, "learning_rate": 5.221497486071477e-06, "loss": 1.9791, "step": 111635 }, { "epoch": 7.585269737736105, "grad_norm": 4.239109039306641, "learning_rate": 5.217250985188205e-06, "loss": 1.7653, "step": 111640 }, { "epoch": 7.585609457806767, "grad_norm": 4.105114936828613, "learning_rate": 5.213004484304932e-06, "loss": 2.0493, "step": 111645 }, { "epoch": 7.585949177877429, "grad_norm": 3.451068639755249, "learning_rate": 5.208757983421661e-06, "loss": 1.6392, "step": 111650 }, { "epoch": 7.586288897948091, "grad_norm": 4.055558681488037, "learning_rate": 5.204511482538388e-06, "loss": 1.9923, "step": 111655 }, { "epoch": 7.586628618018753, "grad_norm": 4.271589756011963, "learning_rate": 5.200264981655116e-06, "loss": 2.1899, "step": 111660 }, { "epoch": 7.586968338089414, "grad_norm": 4.501184940338135, "learning_rate": 5.196018480771844e-06, "loss": 1.8893, "step": 111665 }, { "epoch": 7.587308058160076, "grad_norm": 4.092451095581055, "learning_rate": 5.1917719798885715e-06, "loss": 1.7804, "step": 111670 }, { "epoch": 7.587647778230738, "grad_norm": 3.219853162765503, "learning_rate": 5.1875254790053e-06, "loss": 2.1024, "step": 111675 }, { "epoch": 7.587987498301399, "grad_norm": 3.1182734966278076, "learning_rate": 5.1832789781220276e-06, "loss": 1.8331, "step": 111680 }, { "epoch": 7.588327218372061, "grad_norm": 4.062686920166016, "learning_rate": 5.1790324772387556e-06, "loss": 2.0117, "step": 111685 }, { "epoch": 7.588666938442723, "grad_norm": 4.765925884246826, "learning_rate": 5.1747859763554836e-06, "loss": 1.9374, "step": 111690 }, { "epoch": 7.589006658513385, "grad_norm": 4.049410820007324, "learning_rate": 5.170539475472211e-06, "loss": 2.0408, "step": 111695 }, { "epoch": 7.589346378584047, "grad_norm": 3.8465237617492676, "learning_rate": 5.166292974588939e-06, "loss": 1.7578, "step": 111700 }, { "epoch": 7.589686098654709, "grad_norm": 3.4821112155914307, "learning_rate": 5.162046473705667e-06, "loss": 1.8853, "step": 111705 }, { "epoch": 7.59002581872537, "grad_norm": 3.1096549034118652, "learning_rate": 5.157799972822395e-06, "loss": 2.0403, "step": 111710 }, { "epoch": 7.590365538796032, "grad_norm": 3.9302918910980225, "learning_rate": 5.153553471939123e-06, "loss": 1.9635, "step": 111715 }, { "epoch": 7.590705258866694, "grad_norm": 3.2940056324005127, "learning_rate": 5.14930697105585e-06, "loss": 2.2558, "step": 111720 }, { "epoch": 7.591044978937355, "grad_norm": 3.603062868118286, "learning_rate": 5.145060470172578e-06, "loss": 1.9442, "step": 111725 }, { "epoch": 7.591384699008017, "grad_norm": 3.802485227584839, "learning_rate": 5.140813969289306e-06, "loss": 1.8047, "step": 111730 }, { "epoch": 7.5917244190786795, "grad_norm": 3.399033546447754, "learning_rate": 5.136567468406034e-06, "loss": 2.0192, "step": 111735 }, { "epoch": 7.592064139149341, "grad_norm": 3.3777904510498047, "learning_rate": 5.132320967522761e-06, "loss": 2.1931, "step": 111740 }, { "epoch": 7.592403859220003, "grad_norm": 3.6783981323242188, "learning_rate": 5.12807446663949e-06, "loss": 1.8251, "step": 111745 }, { "epoch": 7.592743579290665, "grad_norm": 4.411098480224609, "learning_rate": 5.123827965756217e-06, "loss": 1.6743, "step": 111750 }, { "epoch": 7.593083299361326, "grad_norm": 3.8202943801879883, "learning_rate": 5.119581464872944e-06, "loss": 1.9411, "step": 111755 }, { "epoch": 7.593423019431988, "grad_norm": 3.3051810264587402, "learning_rate": 5.115334963989673e-06, "loss": 2.0017, "step": 111760 }, { "epoch": 7.59376273950265, "grad_norm": 3.608293294906616, "learning_rate": 5.1110884631064e-06, "loss": 1.9084, "step": 111765 }, { "epoch": 7.594102459573311, "grad_norm": 3.4083163738250732, "learning_rate": 5.106841962223128e-06, "loss": 2.0868, "step": 111770 }, { "epoch": 7.594442179643973, "grad_norm": 3.8088841438293457, "learning_rate": 5.102595461339856e-06, "loss": 2.0779, "step": 111775 }, { "epoch": 7.5947818997146355, "grad_norm": 3.4899909496307373, "learning_rate": 5.0983489604565835e-06, "loss": 1.9539, "step": 111780 }, { "epoch": 7.595121619785297, "grad_norm": 4.253703594207764, "learning_rate": 5.094102459573312e-06, "loss": 1.8609, "step": 111785 }, { "epoch": 7.595461339855959, "grad_norm": 3.7645342350006104, "learning_rate": 5.0898559586900395e-06, "loss": 1.8866, "step": 111790 }, { "epoch": 7.595801059926621, "grad_norm": 4.081841468811035, "learning_rate": 5.0856094578067675e-06, "loss": 1.891, "step": 111795 }, { "epoch": 7.596140779997282, "grad_norm": 3.8410165309906006, "learning_rate": 5.0813629569234955e-06, "loss": 1.9418, "step": 111800 }, { "epoch": 7.596480500067944, "grad_norm": 4.5774149894714355, "learning_rate": 5.077116456040223e-06, "loss": 1.9771, "step": 111805 }, { "epoch": 7.596820220138606, "grad_norm": 3.516568899154663, "learning_rate": 5.072869955156951e-06, "loss": 1.9201, "step": 111810 }, { "epoch": 7.597159940209267, "grad_norm": 3.7185935974121094, "learning_rate": 5.068623454273679e-06, "loss": 1.7797, "step": 111815 }, { "epoch": 7.597499660279929, "grad_norm": 3.278611421585083, "learning_rate": 5.064376953390407e-06, "loss": 2.0121, "step": 111820 }, { "epoch": 7.5978393803505915, "grad_norm": 3.7889113426208496, "learning_rate": 5.060130452507134e-06, "loss": 2.0628, "step": 111825 }, { "epoch": 7.598179100421253, "grad_norm": 4.7399702072143555, "learning_rate": 5.055883951623863e-06, "loss": 1.9137, "step": 111830 }, { "epoch": 7.598518820491915, "grad_norm": 3.446183919906616, "learning_rate": 5.05163745074059e-06, "loss": 2.1657, "step": 111835 }, { "epoch": 7.598858540562577, "grad_norm": 3.508748769760132, "learning_rate": 5.047390949857318e-06, "loss": 2.1346, "step": 111840 }, { "epoch": 7.599198260633238, "grad_norm": 3.3435394763946533, "learning_rate": 5.043144448974046e-06, "loss": 1.9147, "step": 111845 }, { "epoch": 7.5995379807039, "grad_norm": 3.914458990097046, "learning_rate": 5.038897948090773e-06, "loss": 1.5736, "step": 111850 }, { "epoch": 7.599877700774561, "grad_norm": 3.7083723545074463, "learning_rate": 5.034651447207502e-06, "loss": 1.9691, "step": 111855 }, { "epoch": 7.600217420845223, "grad_norm": 13.105810165405273, "learning_rate": 5.030404946324229e-06, "loss": 2.0873, "step": 111860 }, { "epoch": 7.600557140915885, "grad_norm": 4.04179048538208, "learning_rate": 5.026158445440956e-06, "loss": 1.8123, "step": 111865 }, { "epoch": 7.600896860986547, "grad_norm": 3.215200185775757, "learning_rate": 5.021911944557685e-06, "loss": 1.9712, "step": 111870 }, { "epoch": 7.601236581057209, "grad_norm": 3.259030818939209, "learning_rate": 5.017665443674412e-06, "loss": 1.7856, "step": 111875 }, { "epoch": 7.601576301127871, "grad_norm": 3.19936466217041, "learning_rate": 5.01341894279114e-06, "loss": 2.0429, "step": 111880 }, { "epoch": 7.601916021198532, "grad_norm": 4.034076690673828, "learning_rate": 5.009172441907868e-06, "loss": 1.958, "step": 111885 }, { "epoch": 7.602255741269194, "grad_norm": 4.527256965637207, "learning_rate": 5.004925941024595e-06, "loss": 2.1105, "step": 111890 }, { "epoch": 7.602595461339856, "grad_norm": 2.932091474533081, "learning_rate": 5.000679440141324e-06, "loss": 1.973, "step": 111895 }, { "epoch": 7.602935181410517, "grad_norm": 3.681648015975952, "learning_rate": 4.9964329392580514e-06, "loss": 1.9686, "step": 111900 }, { "epoch": 7.603274901481179, "grad_norm": 3.869302988052368, "learning_rate": 4.9921864383747794e-06, "loss": 1.8232, "step": 111905 }, { "epoch": 7.603614621551841, "grad_norm": 3.8069655895233154, "learning_rate": 4.9879399374915074e-06, "loss": 2.1162, "step": 111910 }, { "epoch": 7.603954341622503, "grad_norm": 4.081679344177246, "learning_rate": 4.9836934366082355e-06, "loss": 1.9786, "step": 111915 }, { "epoch": 7.604294061693165, "grad_norm": 4.47990083694458, "learning_rate": 4.979446935724963e-06, "loss": 1.9198, "step": 111920 }, { "epoch": 7.604633781763827, "grad_norm": 4.23159646987915, "learning_rate": 4.975200434841691e-06, "loss": 1.9606, "step": 111925 }, { "epoch": 7.604973501834488, "grad_norm": 3.8177802562713623, "learning_rate": 4.970953933958419e-06, "loss": 1.6986, "step": 111930 }, { "epoch": 7.60531322190515, "grad_norm": 4.087655544281006, "learning_rate": 4.966707433075146e-06, "loss": 2.0461, "step": 111935 }, { "epoch": 7.605652941975812, "grad_norm": 4.050450325012207, "learning_rate": 4.962460932191875e-06, "loss": 2.3005, "step": 111940 }, { "epoch": 7.605992662046473, "grad_norm": 3.678185224533081, "learning_rate": 4.958214431308602e-06, "loss": 2.1245, "step": 111945 }, { "epoch": 7.606332382117135, "grad_norm": 4.001864910125732, "learning_rate": 4.95396793042533e-06, "loss": 1.978, "step": 111950 }, { "epoch": 7.606672102187797, "grad_norm": 3.4127614498138428, "learning_rate": 4.949721429542058e-06, "loss": 1.9589, "step": 111955 }, { "epoch": 7.607011822258459, "grad_norm": 2.905583381652832, "learning_rate": 4.945474928658785e-06, "loss": 1.8589, "step": 111960 }, { "epoch": 7.607351542329121, "grad_norm": 3.100275754928589, "learning_rate": 4.941228427775514e-06, "loss": 2.0775, "step": 111965 }, { "epoch": 7.607691262399783, "grad_norm": 4.012369632720947, "learning_rate": 4.936981926892241e-06, "loss": 2.008, "step": 111970 }, { "epoch": 7.608030982470444, "grad_norm": 5.706979751586914, "learning_rate": 4.932735426008968e-06, "loss": 1.9155, "step": 111975 }, { "epoch": 7.608370702541106, "grad_norm": 3.1933960914611816, "learning_rate": 4.928488925125697e-06, "loss": 1.7727, "step": 111980 }, { "epoch": 7.608710422611768, "grad_norm": 4.031796455383301, "learning_rate": 4.924242424242424e-06, "loss": 1.8941, "step": 111985 }, { "epoch": 7.609050142682429, "grad_norm": 3.364314079284668, "learning_rate": 4.919995923359152e-06, "loss": 2.1068, "step": 111990 }, { "epoch": 7.609389862753091, "grad_norm": 3.3421692848205566, "learning_rate": 4.91574942247588e-06, "loss": 2.2641, "step": 111995 }, { "epoch": 7.6097295828237534, "grad_norm": 3.8930673599243164, "learning_rate": 4.911502921592608e-06, "loss": 1.9151, "step": 112000 }, { "epoch": 7.610069302894415, "grad_norm": 3.223958730697632, "learning_rate": 4.907256420709336e-06, "loss": 1.976, "step": 112005 }, { "epoch": 7.610409022965077, "grad_norm": 3.510645866394043, "learning_rate": 4.903009919826063e-06, "loss": 2.0783, "step": 112010 }, { "epoch": 7.610748743035739, "grad_norm": 3.348231077194214, "learning_rate": 4.898763418942791e-06, "loss": 1.7651, "step": 112015 }, { "epoch": 7.6110884631064, "grad_norm": 3.8059728145599365, "learning_rate": 4.894516918059519e-06, "loss": 2.064, "step": 112020 }, { "epoch": 7.611428183177062, "grad_norm": 3.2840986251831055, "learning_rate": 4.890270417176247e-06, "loss": 2.0403, "step": 112025 }, { "epoch": 7.611767903247724, "grad_norm": 3.4243361949920654, "learning_rate": 4.8860239162929745e-06, "loss": 2.0607, "step": 112030 }, { "epoch": 7.612107623318385, "grad_norm": 4.060468673706055, "learning_rate": 4.8817774154097026e-06, "loss": 1.9887, "step": 112035 }, { "epoch": 7.612447343389047, "grad_norm": 3.7715401649475098, "learning_rate": 4.8775309145264306e-06, "loss": 1.8161, "step": 112040 }, { "epoch": 7.6127870634597095, "grad_norm": 3.5344667434692383, "learning_rate": 4.873284413643158e-06, "loss": 1.6777, "step": 112045 }, { "epoch": 7.613126783530371, "grad_norm": 3.485692262649536, "learning_rate": 4.869037912759887e-06, "loss": 1.8824, "step": 112050 }, { "epoch": 7.613466503601033, "grad_norm": 4.007848262786865, "learning_rate": 4.864791411876614e-06, "loss": 2.1004, "step": 112055 }, { "epoch": 7.613806223671695, "grad_norm": 3.5948448181152344, "learning_rate": 4.860544910993342e-06, "loss": 1.8007, "step": 112060 }, { "epoch": 7.614145943742356, "grad_norm": 3.376559257507324, "learning_rate": 4.85629841011007e-06, "loss": 1.7781, "step": 112065 }, { "epoch": 7.614485663813018, "grad_norm": 3.9831290245056152, "learning_rate": 4.852051909226797e-06, "loss": 1.9335, "step": 112070 }, { "epoch": 7.61482538388368, "grad_norm": 3.7032651901245117, "learning_rate": 4.847805408343526e-06, "loss": 1.7808, "step": 112075 }, { "epoch": 7.615165103954341, "grad_norm": 4.251667499542236, "learning_rate": 4.843558907460253e-06, "loss": 2.0632, "step": 112080 }, { "epoch": 7.615504824025003, "grad_norm": 3.8649168014526367, "learning_rate": 4.839312406576981e-06, "loss": 2.0416, "step": 112085 }, { "epoch": 7.6158445440956655, "grad_norm": 4.371020793914795, "learning_rate": 4.835065905693709e-06, "loss": 1.9891, "step": 112090 }, { "epoch": 7.616184264166327, "grad_norm": 3.2246437072753906, "learning_rate": 4.830819404810436e-06, "loss": 2.0533, "step": 112095 }, { "epoch": 7.616523984236989, "grad_norm": 3.862511157989502, "learning_rate": 4.826572903927164e-06, "loss": 1.965, "step": 112100 }, { "epoch": 7.616863704307651, "grad_norm": 3.8147192001342773, "learning_rate": 4.822326403043892e-06, "loss": 2.0711, "step": 112105 }, { "epoch": 7.617203424378312, "grad_norm": 3.2093896865844727, "learning_rate": 4.81807990216062e-06, "loss": 1.9259, "step": 112110 }, { "epoch": 7.617543144448974, "grad_norm": 3.017719268798828, "learning_rate": 4.813833401277347e-06, "loss": 1.9662, "step": 112115 }, { "epoch": 7.617882864519636, "grad_norm": 3.662118911743164, "learning_rate": 4.809586900394075e-06, "loss": 2.108, "step": 112120 }, { "epoch": 7.618222584590297, "grad_norm": 3.3643033504486084, "learning_rate": 4.805340399510803e-06, "loss": 2.2349, "step": 112125 }, { "epoch": 7.618562304660959, "grad_norm": 4.543342590332031, "learning_rate": 4.801093898627531e-06, "loss": 1.8345, "step": 112130 }, { "epoch": 7.6189020247316215, "grad_norm": 4.397490501403809, "learning_rate": 4.796847397744259e-06, "loss": 2.2352, "step": 112135 }, { "epoch": 7.619241744802283, "grad_norm": 4.108263969421387, "learning_rate": 4.7926008968609865e-06, "loss": 1.9175, "step": 112140 }, { "epoch": 7.619581464872945, "grad_norm": 3.96698260307312, "learning_rate": 4.7883543959777145e-06, "loss": 2.001, "step": 112145 }, { "epoch": 7.619921184943607, "grad_norm": 3.7141990661621094, "learning_rate": 4.7841078950944425e-06, "loss": 1.802, "step": 112150 }, { "epoch": 7.620260905014268, "grad_norm": 4.37008810043335, "learning_rate": 4.77986139421117e-06, "loss": 2.1303, "step": 112155 }, { "epoch": 7.62060062508493, "grad_norm": 3.6322288513183594, "learning_rate": 4.7756148933278985e-06, "loss": 2.0819, "step": 112160 }, { "epoch": 7.620940345155592, "grad_norm": 3.60191011428833, "learning_rate": 4.771368392444626e-06, "loss": 1.9618, "step": 112165 }, { "epoch": 7.621280065226253, "grad_norm": 3.6337740421295166, "learning_rate": 4.767121891561354e-06, "loss": 1.955, "step": 112170 }, { "epoch": 7.621619785296915, "grad_norm": 5.875759601593018, "learning_rate": 4.762875390678082e-06, "loss": 1.9998, "step": 112175 }, { "epoch": 7.6219595053675775, "grad_norm": 4.849252223968506, "learning_rate": 4.758628889794809e-06, "loss": 1.753, "step": 112180 }, { "epoch": 7.622299225438239, "grad_norm": 3.2780747413635254, "learning_rate": 4.754382388911538e-06, "loss": 2.0654, "step": 112185 }, { "epoch": 7.622638945508901, "grad_norm": 3.1736207008361816, "learning_rate": 4.750135888028265e-06, "loss": 2.165, "step": 112190 }, { "epoch": 7.622978665579563, "grad_norm": 4.066649436950684, "learning_rate": 4.745889387144993e-06, "loss": 1.8286, "step": 112195 }, { "epoch": 7.623318385650224, "grad_norm": 3.2083029747009277, "learning_rate": 4.741642886261721e-06, "loss": 1.926, "step": 112200 }, { "epoch": 7.623658105720886, "grad_norm": 4.416865348815918, "learning_rate": 4.737396385378448e-06, "loss": 1.9615, "step": 112205 }, { "epoch": 7.623997825791548, "grad_norm": 3.7691287994384766, "learning_rate": 4.733149884495176e-06, "loss": 1.9961, "step": 112210 }, { "epoch": 7.624337545862209, "grad_norm": 3.77089262008667, "learning_rate": 4.728903383611904e-06, "loss": 2.1908, "step": 112215 }, { "epoch": 7.624677265932871, "grad_norm": 4.081089496612549, "learning_rate": 4.724656882728632e-06, "loss": 1.9585, "step": 112220 }, { "epoch": 7.6250169860035335, "grad_norm": 3.619887351989746, "learning_rate": 4.720410381845359e-06, "loss": 1.9956, "step": 112225 }, { "epoch": 7.625356706074195, "grad_norm": 3.338808298110962, "learning_rate": 4.716163880962087e-06, "loss": 2.0158, "step": 112230 }, { "epoch": 7.625696426144857, "grad_norm": 3.203437328338623, "learning_rate": 4.711917380078815e-06, "loss": 2.1323, "step": 112235 }, { "epoch": 7.626036146215519, "grad_norm": 3.7587454319000244, "learning_rate": 4.707670879195543e-06, "loss": 2.1028, "step": 112240 }, { "epoch": 7.62637586628618, "grad_norm": 3.689664125442505, "learning_rate": 4.703424378312271e-06, "loss": 2.0757, "step": 112245 }, { "epoch": 7.626715586356842, "grad_norm": 3.659562587738037, "learning_rate": 4.699177877428998e-06, "loss": 2.0612, "step": 112250 }, { "epoch": 7.627055306427504, "grad_norm": 4.186506748199463, "learning_rate": 4.694931376545727e-06, "loss": 1.8384, "step": 112255 }, { "epoch": 7.627395026498165, "grad_norm": 2.925240993499756, "learning_rate": 4.6906848756624544e-06, "loss": 1.9441, "step": 112260 }, { "epoch": 7.627734746568827, "grad_norm": 5.06472635269165, "learning_rate": 4.686438374779182e-06, "loss": 2.0666, "step": 112265 }, { "epoch": 7.6280744666394895, "grad_norm": 4.041044235229492, "learning_rate": 4.6821918738959104e-06, "loss": 2.008, "step": 112270 }, { "epoch": 7.628414186710151, "grad_norm": 3.5085911750793457, "learning_rate": 4.677945373012638e-06, "loss": 1.9011, "step": 112275 }, { "epoch": 7.628753906780813, "grad_norm": 3.8441686630249023, "learning_rate": 4.673698872129366e-06, "loss": 2.0266, "step": 112280 }, { "epoch": 7.629093626851475, "grad_norm": 3.770583391189575, "learning_rate": 4.669452371246094e-06, "loss": 2.1601, "step": 112285 }, { "epoch": 7.629433346922136, "grad_norm": 4.396810531616211, "learning_rate": 4.665205870362821e-06, "loss": 1.6928, "step": 112290 }, { "epoch": 7.629773066992798, "grad_norm": 4.28807258605957, "learning_rate": 4.660959369479549e-06, "loss": 1.88, "step": 112295 }, { "epoch": 7.63011278706346, "grad_norm": 3.278188467025757, "learning_rate": 4.656712868596277e-06, "loss": 2.0503, "step": 112300 }, { "epoch": 7.630452507134121, "grad_norm": 3.9082939624786377, "learning_rate": 4.652466367713005e-06, "loss": 1.9037, "step": 112305 }, { "epoch": 7.6307922272047835, "grad_norm": 5.126367092132568, "learning_rate": 4.648219866829733e-06, "loss": 1.9622, "step": 112310 }, { "epoch": 7.6311319472754455, "grad_norm": 4.1876301765441895, "learning_rate": 4.64397336594646e-06, "loss": 1.9097, "step": 112315 }, { "epoch": 7.631471667346107, "grad_norm": 3.183544874191284, "learning_rate": 4.639726865063188e-06, "loss": 1.9934, "step": 112320 }, { "epoch": 7.631811387416769, "grad_norm": 3.46462082862854, "learning_rate": 4.635480364179916e-06, "loss": 1.7779, "step": 112325 }, { "epoch": 7.63215110748743, "grad_norm": 4.047431945800781, "learning_rate": 4.631233863296644e-06, "loss": 2.156, "step": 112330 }, { "epoch": 7.632490827558092, "grad_norm": 3.4946675300598145, "learning_rate": 4.626987362413371e-06, "loss": 1.8474, "step": 112335 }, { "epoch": 7.632830547628754, "grad_norm": 3.829223394393921, "learning_rate": 4.6227408615301e-06, "loss": 1.9213, "step": 112340 }, { "epoch": 7.633170267699415, "grad_norm": 3.9627857208251953, "learning_rate": 4.618494360646827e-06, "loss": 2.0472, "step": 112345 }, { "epoch": 7.633509987770077, "grad_norm": 3.1743035316467285, "learning_rate": 4.614247859763555e-06, "loss": 2.1349, "step": 112350 }, { "epoch": 7.6338497078407395, "grad_norm": 3.6041781902313232, "learning_rate": 4.610001358880283e-06, "loss": 1.7969, "step": 112355 }, { "epoch": 7.634189427911401, "grad_norm": 4.9164137840271, "learning_rate": 4.60575485799701e-06, "loss": 2.1455, "step": 112360 }, { "epoch": 7.634529147982063, "grad_norm": 3.5939016342163086, "learning_rate": 4.601508357113739e-06, "loss": 1.9877, "step": 112365 }, { "epoch": 7.634868868052725, "grad_norm": 3.330188751220703, "learning_rate": 4.597261856230466e-06, "loss": 1.9855, "step": 112370 }, { "epoch": 7.635208588123386, "grad_norm": 2.9494426250457764, "learning_rate": 4.5930153553471935e-06, "loss": 1.8213, "step": 112375 }, { "epoch": 7.635548308194048, "grad_norm": 3.5361528396606445, "learning_rate": 4.588768854463922e-06, "loss": 2.1726, "step": 112380 }, { "epoch": 7.63588802826471, "grad_norm": 4.843100070953369, "learning_rate": 4.5845223535806495e-06, "loss": 1.8068, "step": 112385 }, { "epoch": 7.636227748335371, "grad_norm": 4.180593490600586, "learning_rate": 4.5802758526973775e-06, "loss": 1.7728, "step": 112390 }, { "epoch": 7.636567468406033, "grad_norm": 4.021315097808838, "learning_rate": 4.5760293518141056e-06, "loss": 1.8335, "step": 112395 }, { "epoch": 7.6369071884766955, "grad_norm": 4.030701160430908, "learning_rate": 4.571782850930833e-06, "loss": 1.7616, "step": 112400 }, { "epoch": 7.637246908547357, "grad_norm": 3.945033073425293, "learning_rate": 4.567536350047561e-06, "loss": 2.1862, "step": 112405 }, { "epoch": 7.637586628618019, "grad_norm": 3.452416181564331, "learning_rate": 4.563289849164289e-06, "loss": 1.9324, "step": 112410 }, { "epoch": 7.637926348688681, "grad_norm": 3.8190994262695312, "learning_rate": 4.559043348281017e-06, "loss": 1.8333, "step": 112415 }, { "epoch": 7.638266068759342, "grad_norm": 3.782716751098633, "learning_rate": 4.554796847397745e-06, "loss": 1.988, "step": 112420 }, { "epoch": 7.638605788830004, "grad_norm": 3.8163037300109863, "learning_rate": 4.550550346514473e-06, "loss": 2.0324, "step": 112425 }, { "epoch": 7.638945508900666, "grad_norm": 3.2151145935058594, "learning_rate": 4.5463038456312e-06, "loss": 1.9865, "step": 112430 }, { "epoch": 7.639285228971327, "grad_norm": 3.6395068168640137, "learning_rate": 4.542057344747928e-06, "loss": 1.8733, "step": 112435 }, { "epoch": 7.639624949041989, "grad_norm": 3.553109884262085, "learning_rate": 4.537810843864656e-06, "loss": 2.0626, "step": 112440 }, { "epoch": 7.6399646691126515, "grad_norm": 3.951477289199829, "learning_rate": 4.533564342981383e-06, "loss": 2.043, "step": 112445 }, { "epoch": 7.640304389183313, "grad_norm": 5.156401634216309, "learning_rate": 4.529317842098112e-06, "loss": 1.9031, "step": 112450 }, { "epoch": 7.640644109253975, "grad_norm": 3.644368886947632, "learning_rate": 4.525071341214839e-06, "loss": 1.8727, "step": 112455 }, { "epoch": 7.640983829324637, "grad_norm": 5.046260833740234, "learning_rate": 4.520824840331567e-06, "loss": 1.9749, "step": 112460 }, { "epoch": 7.641323549395298, "grad_norm": 4.043753623962402, "learning_rate": 4.516578339448295e-06, "loss": 1.8258, "step": 112465 }, { "epoch": 7.64166326946596, "grad_norm": 3.464958906173706, "learning_rate": 4.512331838565022e-06, "loss": 1.6738, "step": 112470 }, { "epoch": 7.642002989536622, "grad_norm": 3.382032871246338, "learning_rate": 4.50808533768175e-06, "loss": 2.0103, "step": 112475 }, { "epoch": 7.642342709607283, "grad_norm": 4.1471028327941895, "learning_rate": 4.503838836798478e-06, "loss": 1.7859, "step": 112480 }, { "epoch": 7.642682429677945, "grad_norm": 4.119910717010498, "learning_rate": 4.4995923359152055e-06, "loss": 2.0463, "step": 112485 }, { "epoch": 7.6430221497486075, "grad_norm": 4.099519729614258, "learning_rate": 4.495345835031934e-06, "loss": 1.6177, "step": 112490 }, { "epoch": 7.643361869819269, "grad_norm": 4.143945693969727, "learning_rate": 4.4910993341486615e-06, "loss": 1.7517, "step": 112495 }, { "epoch": 7.643701589889931, "grad_norm": 3.1655430793762207, "learning_rate": 4.4868528332653895e-06, "loss": 2.1307, "step": 112500 }, { "epoch": 7.644041309960593, "grad_norm": 3.744354009628296, "learning_rate": 4.4826063323821175e-06, "loss": 1.8377, "step": 112505 }, { "epoch": 7.644381030031254, "grad_norm": 2.9692394733428955, "learning_rate": 4.4783598314988455e-06, "loss": 1.9129, "step": 112510 }, { "epoch": 7.644720750101916, "grad_norm": 3.0414276123046875, "learning_rate": 4.474113330615573e-06, "loss": 2.1786, "step": 112515 }, { "epoch": 7.645060470172578, "grad_norm": 4.232875823974609, "learning_rate": 4.469866829732301e-06, "loss": 1.875, "step": 112520 }, { "epoch": 7.645400190243239, "grad_norm": 4.7394914627075195, "learning_rate": 4.465620328849029e-06, "loss": 2.1048, "step": 112525 }, { "epoch": 7.645739910313901, "grad_norm": 3.8345181941986084, "learning_rate": 4.461373827965757e-06, "loss": 2.1273, "step": 112530 }, { "epoch": 7.6460796303845635, "grad_norm": 2.892263889312744, "learning_rate": 4.457127327082485e-06, "loss": 2.084, "step": 112535 }, { "epoch": 7.646419350455225, "grad_norm": 3.551466941833496, "learning_rate": 4.452880826199212e-06, "loss": 1.867, "step": 112540 }, { "epoch": 7.646759070525887, "grad_norm": 4.499550819396973, "learning_rate": 4.44863432531594e-06, "loss": 2.0493, "step": 112545 }, { "epoch": 7.647098790596548, "grad_norm": 3.52297043800354, "learning_rate": 4.444387824432668e-06, "loss": 2.1353, "step": 112550 }, { "epoch": 7.64743851066721, "grad_norm": 3.8789775371551514, "learning_rate": 4.440141323549395e-06, "loss": 1.8684, "step": 112555 }, { "epoch": 7.647778230737872, "grad_norm": 3.3062634468078613, "learning_rate": 4.435894822666124e-06, "loss": 2.2355, "step": 112560 }, { "epoch": 7.648117950808533, "grad_norm": 4.243654727935791, "learning_rate": 4.431648321782851e-06, "loss": 1.681, "step": 112565 }, { "epoch": 7.648457670879195, "grad_norm": 3.818739891052246, "learning_rate": 4.427401820899579e-06, "loss": 2.0752, "step": 112570 }, { "epoch": 7.6487973909498574, "grad_norm": 3.996821641921997, "learning_rate": 4.423155320016307e-06, "loss": 1.9981, "step": 112575 }, { "epoch": 7.649137111020519, "grad_norm": 4.294559478759766, "learning_rate": 4.418908819133034e-06, "loss": 2.0929, "step": 112580 }, { "epoch": 7.649476831091181, "grad_norm": 3.507270097732544, "learning_rate": 4.414662318249762e-06, "loss": 1.6762, "step": 112585 }, { "epoch": 7.649816551161843, "grad_norm": 3.7943308353424072, "learning_rate": 4.41041581736649e-06, "loss": 2.0879, "step": 112590 }, { "epoch": 7.650156271232504, "grad_norm": 3.398632526397705, "learning_rate": 4.406169316483218e-06, "loss": 2.0785, "step": 112595 }, { "epoch": 7.650495991303166, "grad_norm": 3.500650644302368, "learning_rate": 4.401922815599946e-06, "loss": 1.9517, "step": 112600 }, { "epoch": 7.650835711373828, "grad_norm": 3.892268419265747, "learning_rate": 4.397676314716673e-06, "loss": 2.0713, "step": 112605 }, { "epoch": 7.651175431444489, "grad_norm": 2.8885042667388916, "learning_rate": 4.393429813833401e-06, "loss": 1.6621, "step": 112610 }, { "epoch": 7.651515151515151, "grad_norm": 3.9781248569488525, "learning_rate": 4.3891833129501294e-06, "loss": 2.0853, "step": 112615 }, { "epoch": 7.6518548715858135, "grad_norm": 3.2898738384246826, "learning_rate": 4.3849368120668574e-06, "loss": 1.5598, "step": 112620 }, { "epoch": 7.652194591656475, "grad_norm": 3.7136082649230957, "learning_rate": 4.380690311183585e-06, "loss": 2.0447, "step": 112625 }, { "epoch": 7.652534311727137, "grad_norm": 3.794177532196045, "learning_rate": 4.376443810300313e-06, "loss": 2.0042, "step": 112630 }, { "epoch": 7.652874031797799, "grad_norm": 3.5909886360168457, "learning_rate": 4.372197309417041e-06, "loss": 1.8398, "step": 112635 }, { "epoch": 7.65321375186846, "grad_norm": 4.059556007385254, "learning_rate": 4.367950808533769e-06, "loss": 2.143, "step": 112640 }, { "epoch": 7.653553471939122, "grad_norm": 3.8212592601776123, "learning_rate": 4.363704307650497e-06, "loss": 1.9328, "step": 112645 }, { "epoch": 7.653893192009784, "grad_norm": 3.903646230697632, "learning_rate": 4.359457806767224e-06, "loss": 2.0328, "step": 112650 }, { "epoch": 7.654232912080445, "grad_norm": 4.548757553100586, "learning_rate": 4.355211305883952e-06, "loss": 2.0259, "step": 112655 }, { "epoch": 7.654572632151107, "grad_norm": 4.122682094573975, "learning_rate": 4.35096480500068e-06, "loss": 1.9156, "step": 112660 }, { "epoch": 7.6549123522217695, "grad_norm": 4.714766502380371, "learning_rate": 4.346718304117407e-06, "loss": 1.8589, "step": 112665 }, { "epoch": 7.655252072292431, "grad_norm": 3.163590669631958, "learning_rate": 4.342471803234136e-06, "loss": 1.6428, "step": 112670 }, { "epoch": 7.655591792363093, "grad_norm": 4.121455192565918, "learning_rate": 4.338225302350863e-06, "loss": 2.1416, "step": 112675 }, { "epoch": 7.655931512433755, "grad_norm": 4.085540294647217, "learning_rate": 4.333978801467591e-06, "loss": 2.1323, "step": 112680 }, { "epoch": 7.656271232504416, "grad_norm": 4.331485271453857, "learning_rate": 4.329732300584319e-06, "loss": 1.8675, "step": 112685 }, { "epoch": 7.656610952575078, "grad_norm": 3.956380605697632, "learning_rate": 4.325485799701046e-06, "loss": 2.097, "step": 112690 }, { "epoch": 7.65695067264574, "grad_norm": 3.8006186485290527, "learning_rate": 4.321239298817774e-06, "loss": 2.0757, "step": 112695 }, { "epoch": 7.657290392716401, "grad_norm": 3.5005240440368652, "learning_rate": 4.316992797934502e-06, "loss": 1.6722, "step": 112700 }, { "epoch": 7.657630112787063, "grad_norm": 4.411372661590576, "learning_rate": 4.31274629705123e-06, "loss": 1.9479, "step": 112705 }, { "epoch": 7.6579698328577255, "grad_norm": 3.676457643508911, "learning_rate": 4.308499796167958e-06, "loss": 1.8633, "step": 112710 }, { "epoch": 7.658309552928387, "grad_norm": 4.45784854888916, "learning_rate": 4.304253295284685e-06, "loss": 2.1821, "step": 112715 }, { "epoch": 7.658649272999049, "grad_norm": 4.695581912994385, "learning_rate": 4.300006794401413e-06, "loss": 2.104, "step": 112720 }, { "epoch": 7.658988993069711, "grad_norm": 4.254057884216309, "learning_rate": 4.295760293518141e-06, "loss": 1.8448, "step": 112725 }, { "epoch": 7.659328713140372, "grad_norm": 4.138996124267578, "learning_rate": 4.291513792634869e-06, "loss": 1.8948, "step": 112730 }, { "epoch": 7.659668433211034, "grad_norm": 4.024184703826904, "learning_rate": 4.2872672917515965e-06, "loss": 1.8835, "step": 112735 }, { "epoch": 7.660008153281696, "grad_norm": 3.3886022567749023, "learning_rate": 4.2830207908683245e-06, "loss": 2.283, "step": 112740 }, { "epoch": 7.660347873352357, "grad_norm": 3.144381523132324, "learning_rate": 4.2787742899850525e-06, "loss": 2.1956, "step": 112745 }, { "epoch": 7.660687593423019, "grad_norm": 4.208010673522949, "learning_rate": 4.2745277891017806e-06, "loss": 2.0608, "step": 112750 }, { "epoch": 7.6610273134936815, "grad_norm": 3.7392847537994385, "learning_rate": 4.2702812882185086e-06, "loss": 2.1115, "step": 112755 }, { "epoch": 7.661367033564343, "grad_norm": 3.1452791690826416, "learning_rate": 4.266034787335236e-06, "loss": 1.9288, "step": 112760 }, { "epoch": 7.661706753635005, "grad_norm": 3.9746501445770264, "learning_rate": 4.261788286451964e-06, "loss": 1.8198, "step": 112765 }, { "epoch": 7.662046473705667, "grad_norm": 3.7193500995635986, "learning_rate": 4.257541785568692e-06, "loss": 1.9357, "step": 112770 }, { "epoch": 7.662386193776328, "grad_norm": 3.0852572917938232, "learning_rate": 4.253295284685419e-06, "loss": 1.8356, "step": 112775 }, { "epoch": 7.66272591384699, "grad_norm": 4.156473159790039, "learning_rate": 4.249048783802148e-06, "loss": 1.8001, "step": 112780 }, { "epoch": 7.663065633917652, "grad_norm": 3.1783230304718018, "learning_rate": 4.244802282918875e-06, "loss": 1.8792, "step": 112785 }, { "epoch": 7.663405353988313, "grad_norm": 3.0606470108032227, "learning_rate": 4.240555782035603e-06, "loss": 1.7162, "step": 112790 }, { "epoch": 7.663745074058975, "grad_norm": 3.56852650642395, "learning_rate": 4.236309281152331e-06, "loss": 2.0437, "step": 112795 }, { "epoch": 7.6640847941296375, "grad_norm": 3.3371706008911133, "learning_rate": 4.232062780269058e-06, "loss": 2.1331, "step": 112800 }, { "epoch": 7.664424514200299, "grad_norm": 3.558157205581665, "learning_rate": 4.227816279385786e-06, "loss": 2.0027, "step": 112805 }, { "epoch": 7.664764234270961, "grad_norm": 4.111841678619385, "learning_rate": 4.223569778502514e-06, "loss": 1.7487, "step": 112810 }, { "epoch": 7.665103954341623, "grad_norm": 3.5746543407440186, "learning_rate": 4.219323277619242e-06, "loss": 1.996, "step": 112815 }, { "epoch": 7.665443674412284, "grad_norm": 4.559370040893555, "learning_rate": 4.21507677673597e-06, "loss": 1.8549, "step": 112820 }, { "epoch": 7.665783394482946, "grad_norm": 3.3112692832946777, "learning_rate": 4.210830275852697e-06, "loss": 1.9201, "step": 112825 }, { "epoch": 7.666123114553608, "grad_norm": 3.550750255584717, "learning_rate": 4.206583774969425e-06, "loss": 1.5992, "step": 112830 }, { "epoch": 7.666462834624269, "grad_norm": 3.3523051738739014, "learning_rate": 4.202337274086153e-06, "loss": 2.0957, "step": 112835 }, { "epoch": 7.666802554694931, "grad_norm": 3.4994356632232666, "learning_rate": 4.198090773202881e-06, "loss": 2.0379, "step": 112840 }, { "epoch": 7.6671422747655935, "grad_norm": 3.5141780376434326, "learning_rate": 4.1938442723196085e-06, "loss": 2.0195, "step": 112845 }, { "epoch": 7.667481994836255, "grad_norm": 3.723527193069458, "learning_rate": 4.189597771436337e-06, "loss": 1.9422, "step": 112850 }, { "epoch": 7.667821714906917, "grad_norm": 3.7503716945648193, "learning_rate": 4.1853512705530645e-06, "loss": 1.8441, "step": 112855 }, { "epoch": 7.668161434977579, "grad_norm": 4.019887447357178, "learning_rate": 4.181104769669792e-06, "loss": 2.072, "step": 112860 }, { "epoch": 7.66850115504824, "grad_norm": 4.391843318939209, "learning_rate": 4.1768582687865205e-06, "loss": 2.0534, "step": 112865 }, { "epoch": 7.668840875118902, "grad_norm": 4.853637218475342, "learning_rate": 4.172611767903248e-06, "loss": 1.7635, "step": 112870 }, { "epoch": 7.669180595189564, "grad_norm": 4.533663272857666, "learning_rate": 4.168365267019976e-06, "loss": 2.2302, "step": 112875 }, { "epoch": 7.669520315260225, "grad_norm": 3.8898980617523193, "learning_rate": 4.164118766136704e-06, "loss": 1.7991, "step": 112880 }, { "epoch": 7.6698600353308874, "grad_norm": 4.523314952850342, "learning_rate": 4.159872265253431e-06, "loss": 2.0764, "step": 112885 }, { "epoch": 7.6701997554015495, "grad_norm": 3.355193853378296, "learning_rate": 4.15562576437016e-06, "loss": 2.1491, "step": 112890 }, { "epoch": 7.670539475472211, "grad_norm": 3.7602410316467285, "learning_rate": 4.151379263486887e-06, "loss": 2.0346, "step": 112895 }, { "epoch": 7.670879195542873, "grad_norm": 3.3518290519714355, "learning_rate": 4.147132762603615e-06, "loss": 1.8593, "step": 112900 }, { "epoch": 7.671218915613535, "grad_norm": 3.221174478530884, "learning_rate": 4.142886261720343e-06, "loss": 1.7218, "step": 112905 }, { "epoch": 7.671558635684196, "grad_norm": 3.718532085418701, "learning_rate": 4.13863976083707e-06, "loss": 1.7066, "step": 112910 }, { "epoch": 7.671898355754858, "grad_norm": 3.6678779125213623, "learning_rate": 4.134393259953798e-06, "loss": 1.8701, "step": 112915 }, { "epoch": 7.67223807582552, "grad_norm": 3.7950775623321533, "learning_rate": 4.130146759070526e-06, "loss": 1.9818, "step": 112920 }, { "epoch": 7.672577795896181, "grad_norm": 3.7627270221710205, "learning_rate": 4.125900258187254e-06, "loss": 2.155, "step": 112925 }, { "epoch": 7.6729175159668435, "grad_norm": 4.18257999420166, "learning_rate": 4.121653757303982e-06, "loss": 1.7248, "step": 112930 }, { "epoch": 7.6732572360375055, "grad_norm": 3.7252933979034424, "learning_rate": 4.11740725642071e-06, "loss": 1.8916, "step": 112935 }, { "epoch": 7.673596956108167, "grad_norm": 3.7336463928222656, "learning_rate": 4.113160755537437e-06, "loss": 1.9613, "step": 112940 }, { "epoch": 7.673936676178829, "grad_norm": 3.6456706523895264, "learning_rate": 4.108914254654165e-06, "loss": 1.7784, "step": 112945 }, { "epoch": 7.674276396249491, "grad_norm": 3.923703193664551, "learning_rate": 4.104667753770893e-06, "loss": 2.1349, "step": 112950 }, { "epoch": 7.674616116320152, "grad_norm": 5.57907772064209, "learning_rate": 4.10042125288762e-06, "loss": 2.0462, "step": 112955 }, { "epoch": 7.674955836390814, "grad_norm": 3.8708436489105225, "learning_rate": 4.096174752004349e-06, "loss": 1.7283, "step": 112960 }, { "epoch": 7.675295556461476, "grad_norm": 3.6760094165802, "learning_rate": 4.091928251121076e-06, "loss": 2.0222, "step": 112965 }, { "epoch": 7.675635276532137, "grad_norm": 4.604246616363525, "learning_rate": 4.0876817502378036e-06, "loss": 1.9178, "step": 112970 }, { "epoch": 7.6759749966027995, "grad_norm": 3.57351016998291, "learning_rate": 4.0834352493545324e-06, "loss": 1.8772, "step": 112975 }, { "epoch": 7.6763147166734615, "grad_norm": 3.8549277782440186, "learning_rate": 4.07918874847126e-06, "loss": 1.8422, "step": 112980 }, { "epoch": 7.676654436744123, "grad_norm": 4.328970909118652, "learning_rate": 4.074942247587988e-06, "loss": 1.8807, "step": 112985 }, { "epoch": 7.676994156814785, "grad_norm": 4.1124701499938965, "learning_rate": 4.070695746704716e-06, "loss": 1.9661, "step": 112990 }, { "epoch": 7.677333876885447, "grad_norm": 4.300014019012451, "learning_rate": 4.066449245821443e-06, "loss": 2.0289, "step": 112995 }, { "epoch": 7.677673596956108, "grad_norm": 3.2150826454162598, "learning_rate": 4.062202744938172e-06, "loss": 1.878, "step": 113000 }, { "epoch": 7.67801331702677, "grad_norm": 3.577463150024414, "learning_rate": 4.057956244054899e-06, "loss": 2.0395, "step": 113005 }, { "epoch": 7.678353037097431, "grad_norm": 4.466958999633789, "learning_rate": 4.053709743171627e-06, "loss": 2.0315, "step": 113010 }, { "epoch": 7.678692757168093, "grad_norm": 3.583526849746704, "learning_rate": 4.049463242288355e-06, "loss": 2.0115, "step": 113015 }, { "epoch": 7.6790324772387555, "grad_norm": 4.583036422729492, "learning_rate": 4.045216741405083e-06, "loss": 1.9161, "step": 113020 }, { "epoch": 7.679372197309417, "grad_norm": 3.4054012298583984, "learning_rate": 4.04097024052181e-06, "loss": 1.935, "step": 113025 }, { "epoch": 7.679711917380079, "grad_norm": 3.1469573974609375, "learning_rate": 4.036723739638538e-06, "loss": 1.9114, "step": 113030 }, { "epoch": 7.680051637450741, "grad_norm": 4.75406551361084, "learning_rate": 4.032477238755266e-06, "loss": 2.0221, "step": 113035 }, { "epoch": 7.680391357521402, "grad_norm": 4.02166223526001, "learning_rate": 4.028230737871993e-06, "loss": 2.0314, "step": 113040 }, { "epoch": 7.680731077592064, "grad_norm": 3.356245517730713, "learning_rate": 4.023984236988722e-06, "loss": 1.7647, "step": 113045 }, { "epoch": 7.681070797662726, "grad_norm": 4.031313896179199, "learning_rate": 4.019737736105449e-06, "loss": 2.0435, "step": 113050 }, { "epoch": 7.681410517733387, "grad_norm": 3.3204965591430664, "learning_rate": 4.015491235222177e-06, "loss": 1.9567, "step": 113055 }, { "epoch": 7.681750237804049, "grad_norm": 3.6664466857910156, "learning_rate": 4.011244734338905e-06, "loss": 2.1261, "step": 113060 }, { "epoch": 7.6820899578747115, "grad_norm": 3.2812156677246094, "learning_rate": 4.006998233455632e-06, "loss": 1.8819, "step": 113065 }, { "epoch": 7.682429677945373, "grad_norm": 4.360334873199463, "learning_rate": 4.002751732572361e-06, "loss": 2.0323, "step": 113070 }, { "epoch": 7.682769398016035, "grad_norm": 3.7009406089782715, "learning_rate": 3.998505231689088e-06, "loss": 1.9432, "step": 113075 }, { "epoch": 7.683109118086697, "grad_norm": 3.4293761253356934, "learning_rate": 3.9942587308058155e-06, "loss": 2.0175, "step": 113080 }, { "epoch": 7.683448838157358, "grad_norm": 3.7381339073181152, "learning_rate": 3.990012229922544e-06, "loss": 2.0889, "step": 113085 }, { "epoch": 7.68378855822802, "grad_norm": 3.9248316287994385, "learning_rate": 3.9857657290392715e-06, "loss": 1.9047, "step": 113090 }, { "epoch": 7.684128278298682, "grad_norm": 4.411221981048584, "learning_rate": 3.9815192281559995e-06, "loss": 2.0969, "step": 113095 }, { "epoch": 7.684467998369343, "grad_norm": 3.77498459815979, "learning_rate": 3.9772727272727275e-06, "loss": 1.9224, "step": 113100 }, { "epoch": 7.684807718440005, "grad_norm": 3.1341567039489746, "learning_rate": 3.9730262263894555e-06, "loss": 1.7704, "step": 113105 }, { "epoch": 7.6851474385106675, "grad_norm": 3.482621669769287, "learning_rate": 3.9687797255061836e-06, "loss": 1.9334, "step": 113110 }, { "epoch": 7.685487158581329, "grad_norm": 4.416332244873047, "learning_rate": 3.964533224622911e-06, "loss": 1.9365, "step": 113115 }, { "epoch": 7.685826878651991, "grad_norm": 3.8370065689086914, "learning_rate": 3.960286723739639e-06, "loss": 1.9635, "step": 113120 }, { "epoch": 7.686166598722653, "grad_norm": 3.635587453842163, "learning_rate": 3.956040222856367e-06, "loss": 1.9287, "step": 113125 }, { "epoch": 7.686506318793314, "grad_norm": 3.795837163925171, "learning_rate": 3.951793721973095e-06, "loss": 2.0347, "step": 113130 }, { "epoch": 7.686846038863976, "grad_norm": 4.922933101654053, "learning_rate": 3.947547221089822e-06, "loss": 1.7467, "step": 113135 }, { "epoch": 7.687185758934638, "grad_norm": 4.34315824508667, "learning_rate": 3.94330072020655e-06, "loss": 2.0565, "step": 113140 }, { "epoch": 7.687525479005299, "grad_norm": 3.2872095108032227, "learning_rate": 3.939054219323278e-06, "loss": 1.963, "step": 113145 }, { "epoch": 7.687865199075961, "grad_norm": 4.0482563972473145, "learning_rate": 3.934807718440005e-06, "loss": 1.8569, "step": 113150 }, { "epoch": 7.6882049191466235, "grad_norm": 3.791595220565796, "learning_rate": 3.930561217556734e-06, "loss": 2.0459, "step": 113155 }, { "epoch": 7.688544639217285, "grad_norm": 4.0471086502075195, "learning_rate": 3.926314716673461e-06, "loss": 1.8971, "step": 113160 }, { "epoch": 7.688884359287947, "grad_norm": 3.470996856689453, "learning_rate": 3.922068215790189e-06, "loss": 1.811, "step": 113165 }, { "epoch": 7.689224079358609, "grad_norm": 4.498385906219482, "learning_rate": 3.917821714906917e-06, "loss": 2.0015, "step": 113170 }, { "epoch": 7.68956379942927, "grad_norm": 2.956836223602295, "learning_rate": 3.913575214023644e-06, "loss": 1.9207, "step": 113175 }, { "epoch": 7.689903519499932, "grad_norm": 3.6484787464141846, "learning_rate": 3.909328713140373e-06, "loss": 2.0984, "step": 113180 }, { "epoch": 7.690243239570594, "grad_norm": 3.705932855606079, "learning_rate": 3.9050822122571e-06, "loss": 1.8747, "step": 113185 }, { "epoch": 7.690582959641255, "grad_norm": 3.4845385551452637, "learning_rate": 3.9008357113738274e-06, "loss": 2.1341, "step": 113190 }, { "epoch": 7.6909226797119175, "grad_norm": 5.682195663452148, "learning_rate": 3.896589210490556e-06, "loss": 1.9406, "step": 113195 }, { "epoch": 7.6912623997825795, "grad_norm": 4.091074466705322, "learning_rate": 3.8923427096072835e-06, "loss": 1.7606, "step": 113200 }, { "epoch": 7.691602119853241, "grad_norm": 3.832022190093994, "learning_rate": 3.8880962087240115e-06, "loss": 1.7573, "step": 113205 }, { "epoch": 7.691941839923903, "grad_norm": 4.054018020629883, "learning_rate": 3.8838497078407395e-06, "loss": 1.9204, "step": 113210 }, { "epoch": 7.692281559994565, "grad_norm": 3.37904953956604, "learning_rate": 3.8796032069574675e-06, "loss": 2.0131, "step": 113215 }, { "epoch": 7.692621280065226, "grad_norm": 3.9723260402679443, "learning_rate": 3.8753567060741955e-06, "loss": 2.0848, "step": 113220 }, { "epoch": 7.692961000135888, "grad_norm": 3.7531039714813232, "learning_rate": 3.871110205190923e-06, "loss": 2.0339, "step": 113225 }, { "epoch": 7.693300720206549, "grad_norm": 4.026923656463623, "learning_rate": 3.866863704307651e-06, "loss": 1.9949, "step": 113230 }, { "epoch": 7.693640440277211, "grad_norm": 3.676111936569214, "learning_rate": 3.862617203424379e-06, "loss": 1.8209, "step": 113235 }, { "epoch": 7.6939801603478735, "grad_norm": 3.600260019302368, "learning_rate": 3.858370702541107e-06, "loss": 2.1809, "step": 113240 }, { "epoch": 7.694319880418535, "grad_norm": 4.239656448364258, "learning_rate": 3.854124201657834e-06, "loss": 2.1383, "step": 113245 }, { "epoch": 7.694659600489197, "grad_norm": 4.555500030517578, "learning_rate": 3.849877700774562e-06, "loss": 1.8082, "step": 113250 }, { "epoch": 7.694999320559859, "grad_norm": 4.057663917541504, "learning_rate": 3.84563119989129e-06, "loss": 1.9894, "step": 113255 }, { "epoch": 7.69533904063052, "grad_norm": 4.624183177947998, "learning_rate": 3.841384699008017e-06, "loss": 2.0057, "step": 113260 }, { "epoch": 7.695678760701182, "grad_norm": 3.5529863834381104, "learning_rate": 3.837138198124746e-06, "loss": 1.8245, "step": 113265 }, { "epoch": 7.696018480771844, "grad_norm": 3.9188578128814697, "learning_rate": 3.832891697241473e-06, "loss": 1.9845, "step": 113270 }, { "epoch": 7.696358200842505, "grad_norm": 3.3773984909057617, "learning_rate": 3.828645196358201e-06, "loss": 2.1627, "step": 113275 }, { "epoch": 7.696697920913167, "grad_norm": 4.337589263916016, "learning_rate": 3.824398695474929e-06, "loss": 2.0567, "step": 113280 }, { "epoch": 7.6970376409838295, "grad_norm": 3.8245551586151123, "learning_rate": 3.820152194591656e-06, "loss": 1.9998, "step": 113285 }, { "epoch": 7.697377361054491, "grad_norm": 3.8236162662506104, "learning_rate": 3.815905693708385e-06, "loss": 2.0336, "step": 113290 }, { "epoch": 7.697717081125153, "grad_norm": 3.991321563720703, "learning_rate": 3.8116591928251122e-06, "loss": 1.8808, "step": 113295 }, { "epoch": 7.698056801195815, "grad_norm": 2.90161395072937, "learning_rate": 3.8074126919418402e-06, "loss": 2.0421, "step": 113300 }, { "epoch": 7.698396521266476, "grad_norm": 3.681763172149658, "learning_rate": 3.803166191058568e-06, "loss": 1.9761, "step": 113305 }, { "epoch": 7.698736241337138, "grad_norm": 4.335953712463379, "learning_rate": 3.7989196901752954e-06, "loss": 1.9226, "step": 113310 }, { "epoch": 7.6990759614078, "grad_norm": 3.646179437637329, "learning_rate": 3.794673189292024e-06, "loss": 1.965, "step": 113315 }, { "epoch": 7.699415681478461, "grad_norm": 3.6721043586730957, "learning_rate": 3.7904266884087514e-06, "loss": 1.856, "step": 113320 }, { "epoch": 7.699755401549123, "grad_norm": 3.49863862991333, "learning_rate": 3.7861801875254794e-06, "loss": 2.0067, "step": 113325 }, { "epoch": 7.7000951216197855, "grad_norm": 3.8034520149230957, "learning_rate": 3.781933686642207e-06, "loss": 1.9552, "step": 113330 }, { "epoch": 7.700434841690447, "grad_norm": 3.873687744140625, "learning_rate": 3.7776871857589346e-06, "loss": 2.0221, "step": 113335 }, { "epoch": 7.700774561761109, "grad_norm": 4.188947677612305, "learning_rate": 3.7734406848756626e-06, "loss": 1.8843, "step": 113340 }, { "epoch": 7.701114281831771, "grad_norm": 3.8894405364990234, "learning_rate": 3.76919418399239e-06, "loss": 1.9199, "step": 113345 }, { "epoch": 7.701454001902432, "grad_norm": 5.468226432800293, "learning_rate": 3.7649476831091186e-06, "loss": 2.0344, "step": 113350 }, { "epoch": 7.701793721973094, "grad_norm": 3.671769857406616, "learning_rate": 3.760701182225846e-06, "loss": 2.1597, "step": 113355 }, { "epoch": 7.702133442043756, "grad_norm": 3.6459946632385254, "learning_rate": 3.7564546813425734e-06, "loss": 1.8118, "step": 113360 }, { "epoch": 7.702473162114417, "grad_norm": 4.348273754119873, "learning_rate": 3.7522081804593018e-06, "loss": 1.7324, "step": 113365 }, { "epoch": 7.702812882185079, "grad_norm": 4.355226039886475, "learning_rate": 3.7479616795760294e-06, "loss": 2.0766, "step": 113370 }, { "epoch": 7.7031526022557415, "grad_norm": 3.9739415645599365, "learning_rate": 3.7437151786927574e-06, "loss": 1.9601, "step": 113375 }, { "epoch": 7.703492322326403, "grad_norm": 3.9818027019500732, "learning_rate": 3.739468677809485e-06, "loss": 1.9806, "step": 113380 }, { "epoch": 7.703832042397065, "grad_norm": 3.5691425800323486, "learning_rate": 3.7352221769262134e-06, "loss": 2.1735, "step": 113385 }, { "epoch": 7.704171762467727, "grad_norm": 3.4822890758514404, "learning_rate": 3.730975676042941e-06, "loss": 2.2422, "step": 113390 }, { "epoch": 7.704511482538388, "grad_norm": 3.639378070831299, "learning_rate": 3.7267291751596686e-06, "loss": 1.9237, "step": 113395 }, { "epoch": 7.70485120260905, "grad_norm": 3.5965728759765625, "learning_rate": 3.7224826742763966e-06, "loss": 1.9778, "step": 113400 }, { "epoch": 7.705190922679712, "grad_norm": 3.4270031452178955, "learning_rate": 3.718236173393124e-06, "loss": 1.9635, "step": 113405 }, { "epoch": 7.705530642750373, "grad_norm": 4.828579902648926, "learning_rate": 3.713989672509852e-06, "loss": 2.1822, "step": 113410 }, { "epoch": 7.705870362821035, "grad_norm": 4.137625694274902, "learning_rate": 3.7097431716265797e-06, "loss": 1.9951, "step": 113415 }, { "epoch": 7.7062100828916975, "grad_norm": 3.008265495300293, "learning_rate": 3.7054966707433073e-06, "loss": 1.9874, "step": 113420 }, { "epoch": 7.706549802962359, "grad_norm": 4.11151123046875, "learning_rate": 3.7012501698600358e-06, "loss": 1.8546, "step": 113425 }, { "epoch": 7.706889523033021, "grad_norm": 3.908653497695923, "learning_rate": 3.6970036689767633e-06, "loss": 1.7791, "step": 113430 }, { "epoch": 7.707229243103683, "grad_norm": 3.1420702934265137, "learning_rate": 3.6927571680934914e-06, "loss": 2.0369, "step": 113435 }, { "epoch": 7.707568963174344, "grad_norm": 4.713148593902588, "learning_rate": 3.688510667210219e-06, "loss": 1.7746, "step": 113440 }, { "epoch": 7.707908683245006, "grad_norm": 5.109582424163818, "learning_rate": 3.6842641663269465e-06, "loss": 2.0038, "step": 113445 }, { "epoch": 7.708248403315668, "grad_norm": 3.4803662300109863, "learning_rate": 3.6800176654436745e-06, "loss": 1.8415, "step": 113450 }, { "epoch": 7.708588123386329, "grad_norm": 3.434199810028076, "learning_rate": 3.675771164560402e-06, "loss": 1.93, "step": 113455 }, { "epoch": 7.7089278434569914, "grad_norm": 3.8153672218322754, "learning_rate": 3.6715246636771305e-06, "loss": 2.0335, "step": 113460 }, { "epoch": 7.7092675635276535, "grad_norm": 3.6231138706207275, "learning_rate": 3.667278162793858e-06, "loss": 1.9078, "step": 113465 }, { "epoch": 7.709607283598315, "grad_norm": 3.388434410095215, "learning_rate": 3.663031661910586e-06, "loss": 2.0454, "step": 113470 }, { "epoch": 7.709947003668977, "grad_norm": 3.8087754249572754, "learning_rate": 3.6587851610273137e-06, "loss": 2.0583, "step": 113475 }, { "epoch": 7.710286723739639, "grad_norm": 4.244746685028076, "learning_rate": 3.6545386601440413e-06, "loss": 2.0099, "step": 113480 }, { "epoch": 7.7106264438103, "grad_norm": 3.64986252784729, "learning_rate": 3.6502921592607693e-06, "loss": 1.9228, "step": 113485 }, { "epoch": 7.710966163880962, "grad_norm": 3.394293785095215, "learning_rate": 3.646045658377497e-06, "loss": 2.0347, "step": 113490 }, { "epoch": 7.711305883951624, "grad_norm": 3.140437602996826, "learning_rate": 3.6417991574942253e-06, "loss": 2.0246, "step": 113495 }, { "epoch": 7.711645604022285, "grad_norm": 4.542669296264648, "learning_rate": 3.637552656610953e-06, "loss": 1.9492, "step": 113500 }, { "epoch": 7.7119853240929475, "grad_norm": 3.6485254764556885, "learning_rate": 3.63330615572768e-06, "loss": 1.9444, "step": 113505 }, { "epoch": 7.7123250441636095, "grad_norm": 3.5803022384643555, "learning_rate": 3.6290596548444085e-06, "loss": 2.0184, "step": 113510 }, { "epoch": 7.712664764234271, "grad_norm": 3.523524761199951, "learning_rate": 3.624813153961136e-06, "loss": 2.0399, "step": 113515 }, { "epoch": 7.713004484304933, "grad_norm": 3.8656203746795654, "learning_rate": 3.620566653077864e-06, "loss": 1.7889, "step": 113520 }, { "epoch": 7.713344204375595, "grad_norm": 4.05556058883667, "learning_rate": 3.6163201521945917e-06, "loss": 2.0445, "step": 113525 }, { "epoch": 7.713683924446256, "grad_norm": 3.395991325378418, "learning_rate": 3.6120736513113193e-06, "loss": 2.0504, "step": 113530 }, { "epoch": 7.714023644516918, "grad_norm": 4.11093807220459, "learning_rate": 3.6078271504280477e-06, "loss": 1.7489, "step": 113535 }, { "epoch": 7.71436336458758, "grad_norm": 4.056400299072266, "learning_rate": 3.603580649544775e-06, "loss": 1.9006, "step": 113540 }, { "epoch": 7.714703084658241, "grad_norm": 3.791348695755005, "learning_rate": 3.5993341486615033e-06, "loss": 1.7756, "step": 113545 }, { "epoch": 7.7150428047289035, "grad_norm": 3.9855005741119385, "learning_rate": 3.595087647778231e-06, "loss": 1.9235, "step": 113550 }, { "epoch": 7.7153825247995655, "grad_norm": 4.6263041496276855, "learning_rate": 3.5916904470716133e-06, "loss": 1.8756, "step": 113555 }, { "epoch": 7.715722244870227, "grad_norm": 3.5381245613098145, "learning_rate": 3.587443946188341e-06, "loss": 1.8538, "step": 113560 }, { "epoch": 7.716061964940889, "grad_norm": 3.3219287395477295, "learning_rate": 3.5831974453050684e-06, "loss": 1.835, "step": 113565 }, { "epoch": 7.716401685011551, "grad_norm": 4.575030326843262, "learning_rate": 3.578950944421797e-06, "loss": 1.8424, "step": 113570 }, { "epoch": 7.716741405082212, "grad_norm": 3.8345277309417725, "learning_rate": 3.574704443538524e-06, "loss": 1.7512, "step": 113575 }, { "epoch": 7.717081125152874, "grad_norm": 4.846848487854004, "learning_rate": 3.5704579426552525e-06, "loss": 1.9817, "step": 113580 }, { "epoch": 7.717420845223536, "grad_norm": 4.2895827293396, "learning_rate": 3.56621144177198e-06, "loss": 1.7673, "step": 113585 }, { "epoch": 7.717760565294197, "grad_norm": 4.499095916748047, "learning_rate": 3.561964940888708e-06, "loss": 1.8375, "step": 113590 }, { "epoch": 7.7181002853648595, "grad_norm": 5.024334907531738, "learning_rate": 3.5577184400054356e-06, "loss": 1.7602, "step": 113595 }, { "epoch": 7.7184400054355216, "grad_norm": 3.8149397373199463, "learning_rate": 3.5534719391221632e-06, "loss": 1.6269, "step": 113600 }, { "epoch": 7.718779725506183, "grad_norm": 3.669912338256836, "learning_rate": 3.5492254382388917e-06, "loss": 1.8407, "step": 113605 }, { "epoch": 7.719119445576845, "grad_norm": 4.308194637298584, "learning_rate": 3.544978937355619e-06, "loss": 1.8396, "step": 113610 }, { "epoch": 7.719459165647507, "grad_norm": 3.1700668334960938, "learning_rate": 3.5407324364723473e-06, "loss": 1.7044, "step": 113615 }, { "epoch": 7.719798885718168, "grad_norm": 3.5588698387145996, "learning_rate": 3.536485935589075e-06, "loss": 2.0926, "step": 113620 }, { "epoch": 7.72013860578883, "grad_norm": 3.854276418685913, "learning_rate": 3.5322394347058024e-06, "loss": 2.0594, "step": 113625 }, { "epoch": 7.720478325859492, "grad_norm": 3.413339614868164, "learning_rate": 3.5279929338225304e-06, "loss": 1.8449, "step": 113630 }, { "epoch": 7.720818045930153, "grad_norm": 4.1091814041137695, "learning_rate": 3.523746432939258e-06, "loss": 1.779, "step": 113635 }, { "epoch": 7.7211577660008155, "grad_norm": 3.4988160133361816, "learning_rate": 3.5194999320559864e-06, "loss": 2.0166, "step": 113640 }, { "epoch": 7.721497486071478, "grad_norm": 4.117475509643555, "learning_rate": 3.5152534311727136e-06, "loss": 1.9036, "step": 113645 }, { "epoch": 7.721837206142139, "grad_norm": 4.385979175567627, "learning_rate": 3.511006930289441e-06, "loss": 2.0213, "step": 113650 }, { "epoch": 7.722176926212801, "grad_norm": 2.972980260848999, "learning_rate": 3.5067604294061696e-06, "loss": 1.9415, "step": 113655 }, { "epoch": 7.722516646283463, "grad_norm": 4.632485866546631, "learning_rate": 3.502513928522897e-06, "loss": 1.9297, "step": 113660 }, { "epoch": 7.722856366354124, "grad_norm": 4.356255531311035, "learning_rate": 3.4982674276396252e-06, "loss": 1.9574, "step": 113665 }, { "epoch": 7.723196086424786, "grad_norm": 4.001228332519531, "learning_rate": 3.494020926756353e-06, "loss": 1.8523, "step": 113670 }, { "epoch": 7.723535806495448, "grad_norm": 3.555738687515259, "learning_rate": 3.4897744258730812e-06, "loss": 2.1187, "step": 113675 }, { "epoch": 7.723875526566109, "grad_norm": 4.716585159301758, "learning_rate": 3.4855279249898084e-06, "loss": 2.2238, "step": 113680 }, { "epoch": 7.7242152466367715, "grad_norm": 3.3085176944732666, "learning_rate": 3.481281424106536e-06, "loss": 2.0873, "step": 113685 }, { "epoch": 7.724554966707433, "grad_norm": 4.076405048370361, "learning_rate": 3.4770349232232644e-06, "loss": 2.1331, "step": 113690 }, { "epoch": 7.724894686778095, "grad_norm": 4.523692607879639, "learning_rate": 3.472788422339992e-06, "loss": 2.0832, "step": 113695 }, { "epoch": 7.725234406848757, "grad_norm": 3.9291574954986572, "learning_rate": 3.46854192145672e-06, "loss": 2.0574, "step": 113700 }, { "epoch": 7.725574126919418, "grad_norm": 3.9400851726531982, "learning_rate": 3.4642954205734476e-06, "loss": 2.1787, "step": 113705 }, { "epoch": 7.72591384699008, "grad_norm": 4.024969577789307, "learning_rate": 3.460048919690175e-06, "loss": 1.656, "step": 113710 }, { "epoch": 7.726253567060742, "grad_norm": 3.876110315322876, "learning_rate": 3.4558024188069036e-06, "loss": 2.1417, "step": 113715 }, { "epoch": 7.726593287131403, "grad_norm": 3.6596286296844482, "learning_rate": 3.4515559179236308e-06, "loss": 2.0991, "step": 113720 }, { "epoch": 7.726933007202065, "grad_norm": 3.9903595447540283, "learning_rate": 3.447309417040359e-06, "loss": 1.8688, "step": 113725 }, { "epoch": 7.7272727272727275, "grad_norm": 3.9079763889312744, "learning_rate": 3.4430629161570868e-06, "loss": 2.1063, "step": 113730 }, { "epoch": 7.727612447343389, "grad_norm": 4.867102146148682, "learning_rate": 3.4388164152738144e-06, "loss": 2.2088, "step": 113735 }, { "epoch": 7.727952167414051, "grad_norm": 3.1351370811462402, "learning_rate": 3.4345699143905424e-06, "loss": 2.0273, "step": 113740 }, { "epoch": 7.728291887484713, "grad_norm": 3.5583484172821045, "learning_rate": 3.43032341350727e-06, "loss": 2.066, "step": 113745 }, { "epoch": 7.728631607555374, "grad_norm": 4.093266487121582, "learning_rate": 3.4260769126239984e-06, "loss": 1.8431, "step": 113750 }, { "epoch": 7.728971327626036, "grad_norm": 4.007296562194824, "learning_rate": 3.4218304117407255e-06, "loss": 1.9716, "step": 113755 }, { "epoch": 7.729311047696698, "grad_norm": 4.321942329406738, "learning_rate": 3.417583910857454e-06, "loss": 2.0578, "step": 113760 }, { "epoch": 7.729650767767359, "grad_norm": 4.100668907165527, "learning_rate": 3.4133374099741816e-06, "loss": 1.9597, "step": 113765 }, { "epoch": 7.7299904878380215, "grad_norm": 4.448249340057373, "learning_rate": 3.409090909090909e-06, "loss": 1.8231, "step": 113770 }, { "epoch": 7.7303302079086835, "grad_norm": 3.759564161300659, "learning_rate": 3.404844408207637e-06, "loss": 2.0321, "step": 113775 }, { "epoch": 7.730669927979345, "grad_norm": 3.3395440578460693, "learning_rate": 3.4005979073243647e-06, "loss": 1.8989, "step": 113780 }, { "epoch": 7.731009648050007, "grad_norm": 3.7622528076171875, "learning_rate": 3.396351406441093e-06, "loss": 1.8724, "step": 113785 }, { "epoch": 7.731349368120669, "grad_norm": 3.4513275623321533, "learning_rate": 3.3921049055578203e-06, "loss": 1.7954, "step": 113790 }, { "epoch": 7.73168908819133, "grad_norm": 3.9795615673065186, "learning_rate": 3.387858404674548e-06, "loss": 1.9409, "step": 113795 }, { "epoch": 7.732028808261992, "grad_norm": 4.100716590881348, "learning_rate": 3.3836119037912763e-06, "loss": 2.0572, "step": 113800 }, { "epoch": 7.732368528332654, "grad_norm": 3.459836006164551, "learning_rate": 3.379365402908004e-06, "loss": 1.9308, "step": 113805 }, { "epoch": 7.732708248403315, "grad_norm": 3.8459649085998535, "learning_rate": 3.375118902024732e-06, "loss": 1.7996, "step": 113810 }, { "epoch": 7.7330479684739775, "grad_norm": 3.3276288509368896, "learning_rate": 3.3708724011414595e-06, "loss": 2.0939, "step": 113815 }, { "epoch": 7.7333876885446395, "grad_norm": 3.6216304302215576, "learning_rate": 3.366625900258187e-06, "loss": 1.9702, "step": 113820 }, { "epoch": 7.733727408615301, "grad_norm": 3.9303267002105713, "learning_rate": 3.362379399374915e-06, "loss": 1.897, "step": 113825 }, { "epoch": 7.734067128685963, "grad_norm": 3.557954788208008, "learning_rate": 3.3581328984916427e-06, "loss": 1.8933, "step": 113830 }, { "epoch": 7.734406848756625, "grad_norm": 5.18372106552124, "learning_rate": 3.353886397608371e-06, "loss": 2.2274, "step": 113835 }, { "epoch": 7.734746568827286, "grad_norm": 3.5110323429107666, "learning_rate": 3.3496398967250987e-06, "loss": 2.188, "step": 113840 }, { "epoch": 7.735086288897948, "grad_norm": 3.986116409301758, "learning_rate": 3.3453933958418267e-06, "loss": 1.9989, "step": 113845 }, { "epoch": 7.73542600896861, "grad_norm": 3.441983222961426, "learning_rate": 3.3411468949585543e-06, "loss": 2.0508, "step": 113850 }, { "epoch": 7.735765729039271, "grad_norm": 4.607669830322266, "learning_rate": 3.336900394075282e-06, "loss": 1.9519, "step": 113855 }, { "epoch": 7.7361054491099335, "grad_norm": 3.172785520553589, "learning_rate": 3.3326538931920103e-06, "loss": 1.9175, "step": 113860 }, { "epoch": 7.7364451691805955, "grad_norm": 4.238381862640381, "learning_rate": 3.3284073923087375e-06, "loss": 2.0409, "step": 113865 }, { "epoch": 7.736784889251257, "grad_norm": 3.9261255264282227, "learning_rate": 3.324160891425466e-06, "loss": 1.9909, "step": 113870 }, { "epoch": 7.737124609321919, "grad_norm": 2.842068672180176, "learning_rate": 3.3199143905421935e-06, "loss": 2.0848, "step": 113875 }, { "epoch": 7.737464329392581, "grad_norm": 3.4624526500701904, "learning_rate": 3.315667889658921e-06, "loss": 2.2941, "step": 113880 }, { "epoch": 7.737804049463242, "grad_norm": 4.165153503417969, "learning_rate": 3.311421388775649e-06, "loss": 1.8282, "step": 113885 }, { "epoch": 7.738143769533904, "grad_norm": 3.2455477714538574, "learning_rate": 3.3071748878923767e-06, "loss": 1.6155, "step": 113890 }, { "epoch": 7.738483489604566, "grad_norm": 4.0353102684021, "learning_rate": 3.302928387009105e-06, "loss": 2.0042, "step": 113895 }, { "epoch": 7.738823209675227, "grad_norm": 4.331855773925781, "learning_rate": 3.2986818861258323e-06, "loss": 2.0642, "step": 113900 }, { "epoch": 7.7391629297458895, "grad_norm": 3.3676493167877197, "learning_rate": 3.29443538524256e-06, "loss": 1.9189, "step": 113905 }, { "epoch": 7.739502649816551, "grad_norm": 3.5336618423461914, "learning_rate": 3.2901888843592883e-06, "loss": 1.99, "step": 113910 }, { "epoch": 7.739842369887213, "grad_norm": 3.821272850036621, "learning_rate": 3.285942383476016e-06, "loss": 2.0447, "step": 113915 }, { "epoch": 7.740182089957875, "grad_norm": 3.1695122718811035, "learning_rate": 3.281695882592744e-06, "loss": 1.8741, "step": 113920 }, { "epoch": 7.740521810028536, "grad_norm": 4.219152450561523, "learning_rate": 3.2774493817094714e-06, "loss": 2.1327, "step": 113925 }, { "epoch": 7.740861530099198, "grad_norm": 3.7468318939208984, "learning_rate": 3.2732028808262e-06, "loss": 2.2551, "step": 113930 }, { "epoch": 7.74120125016986, "grad_norm": 3.395280122756958, "learning_rate": 3.268956379942927e-06, "loss": 1.9123, "step": 113935 }, { "epoch": 7.741540970240521, "grad_norm": 4.386592864990234, "learning_rate": 3.2647098790596546e-06, "loss": 2.0616, "step": 113940 }, { "epoch": 7.741880690311183, "grad_norm": 3.4900920391082764, "learning_rate": 3.260463378176383e-06, "loss": 1.9372, "step": 113945 }, { "epoch": 7.7422204103818455, "grad_norm": 3.379668951034546, "learning_rate": 3.2562168772931106e-06, "loss": 1.9051, "step": 113950 }, { "epoch": 7.742560130452507, "grad_norm": 3.2684383392333984, "learning_rate": 3.2519703764098386e-06, "loss": 1.8826, "step": 113955 }, { "epoch": 7.742899850523169, "grad_norm": 4.39560079574585, "learning_rate": 3.2477238755265662e-06, "loss": 2.0645, "step": 113960 }, { "epoch": 7.743239570593831, "grad_norm": 3.914214849472046, "learning_rate": 3.243477374643294e-06, "loss": 2.0013, "step": 113965 }, { "epoch": 7.743579290664492, "grad_norm": 4.004719257354736, "learning_rate": 3.239230873760022e-06, "loss": 1.9048, "step": 113970 }, { "epoch": 7.743919010735154, "grad_norm": 3.5995864868164062, "learning_rate": 3.2349843728767494e-06, "loss": 1.9952, "step": 113975 }, { "epoch": 7.744258730805816, "grad_norm": 3.5581424236297607, "learning_rate": 3.230737871993478e-06, "loss": 2.0191, "step": 113980 }, { "epoch": 7.744598450876477, "grad_norm": 3.7775771617889404, "learning_rate": 3.2264913711102054e-06, "loss": 1.8459, "step": 113985 }, { "epoch": 7.744938170947139, "grad_norm": 3.2264790534973145, "learning_rate": 3.222244870226933e-06, "loss": 1.7456, "step": 113990 }, { "epoch": 7.7452778910178015, "grad_norm": 3.4875760078430176, "learning_rate": 3.217998369343661e-06, "loss": 1.8329, "step": 113995 }, { "epoch": 7.745617611088463, "grad_norm": 4.262545108795166, "learning_rate": 3.2137518684603886e-06, "loss": 2.3065, "step": 114000 }, { "epoch": 7.745957331159125, "grad_norm": 2.967045783996582, "learning_rate": 3.2095053675771166e-06, "loss": 1.8621, "step": 114005 }, { "epoch": 7.746297051229787, "grad_norm": 3.478423833847046, "learning_rate": 3.205258866693844e-06, "loss": 1.9665, "step": 114010 }, { "epoch": 7.746636771300448, "grad_norm": 4.554584503173828, "learning_rate": 3.2010123658105726e-06, "loss": 2.0282, "step": 114015 }, { "epoch": 7.74697649137111, "grad_norm": 3.709181785583496, "learning_rate": 3.1967658649273e-06, "loss": 1.7071, "step": 114020 }, { "epoch": 7.747316211441772, "grad_norm": 3.774165391921997, "learning_rate": 3.192519364044028e-06, "loss": 1.9956, "step": 114025 }, { "epoch": 7.747655931512433, "grad_norm": 3.5833659172058105, "learning_rate": 3.188272863160756e-06, "loss": 1.9807, "step": 114030 }, { "epoch": 7.7479956515830954, "grad_norm": 2.8799941539764404, "learning_rate": 3.1840263622774834e-06, "loss": 2.1222, "step": 114035 }, { "epoch": 7.7483353716537575, "grad_norm": 3.47914719581604, "learning_rate": 3.179779861394212e-06, "loss": 2.0998, "step": 114040 }, { "epoch": 7.748675091724419, "grad_norm": 2.903163433074951, "learning_rate": 3.175533360510939e-06, "loss": 2.0337, "step": 114045 }, { "epoch": 7.749014811795081, "grad_norm": 5.06768798828125, "learning_rate": 3.1712868596276666e-06, "loss": 1.9253, "step": 114050 }, { "epoch": 7.749354531865743, "grad_norm": 3.654900550842285, "learning_rate": 3.167040358744395e-06, "loss": 2.0576, "step": 114055 }, { "epoch": 7.749694251936404, "grad_norm": 3.486501932144165, "learning_rate": 3.1627938578611226e-06, "loss": 1.8073, "step": 114060 }, { "epoch": 7.750033972007066, "grad_norm": 4.220287799835205, "learning_rate": 3.1585473569778506e-06, "loss": 1.9033, "step": 114065 }, { "epoch": 7.750373692077728, "grad_norm": 4.0116705894470215, "learning_rate": 3.154300856094578e-06, "loss": 1.9857, "step": 114070 }, { "epoch": 7.750713412148389, "grad_norm": 3.6330959796905518, "learning_rate": 3.1500543552113058e-06, "loss": 1.9603, "step": 114075 }, { "epoch": 7.7510531322190515, "grad_norm": 3.738679885864258, "learning_rate": 3.1458078543280338e-06, "loss": 1.8765, "step": 114080 }, { "epoch": 7.7513928522897135, "grad_norm": 4.228731155395508, "learning_rate": 3.1415613534447613e-06, "loss": 1.913, "step": 114085 }, { "epoch": 7.751732572360375, "grad_norm": 2.8929829597473145, "learning_rate": 3.1373148525614898e-06, "loss": 2.0374, "step": 114090 }, { "epoch": 7.752072292431037, "grad_norm": 4.178569793701172, "learning_rate": 3.1330683516782174e-06, "loss": 2.1997, "step": 114095 }, { "epoch": 7.752412012501699, "grad_norm": 2.9239838123321533, "learning_rate": 3.1288218507949454e-06, "loss": 1.9085, "step": 114100 }, { "epoch": 7.75275173257236, "grad_norm": 3.823012113571167, "learning_rate": 3.124575349911673e-06, "loss": 1.7991, "step": 114105 }, { "epoch": 7.753091452643022, "grad_norm": 3.0130255222320557, "learning_rate": 3.120328849028401e-06, "loss": 1.9801, "step": 114110 }, { "epoch": 7.753431172713684, "grad_norm": 4.184746742248535, "learning_rate": 3.1160823481451285e-06, "loss": 1.8712, "step": 114115 }, { "epoch": 7.753770892784345, "grad_norm": 5.189458847045898, "learning_rate": 3.111835847261856e-06, "loss": 2.4182, "step": 114120 }, { "epoch": 7.7541106128550075, "grad_norm": 3.6772971153259277, "learning_rate": 3.107589346378584e-06, "loss": 2.0491, "step": 114125 }, { "epoch": 7.7544503329256695, "grad_norm": 3.8886656761169434, "learning_rate": 3.103342845495312e-06, "loss": 1.9336, "step": 114130 }, { "epoch": 7.754790052996331, "grad_norm": 3.656982898712158, "learning_rate": 3.0990963446120397e-06, "loss": 2.0194, "step": 114135 }, { "epoch": 7.755129773066993, "grad_norm": 5.190288543701172, "learning_rate": 3.0948498437287677e-06, "loss": 1.8863, "step": 114140 }, { "epoch": 7.755469493137655, "grad_norm": 3.3581697940826416, "learning_rate": 3.0906033428454953e-06, "loss": 2.0575, "step": 114145 }, { "epoch": 7.755809213208316, "grad_norm": 3.5063390731811523, "learning_rate": 3.0863568419622233e-06, "loss": 1.9057, "step": 114150 }, { "epoch": 7.756148933278978, "grad_norm": 3.1519737243652344, "learning_rate": 3.082110341078951e-06, "loss": 1.9477, "step": 114155 }, { "epoch": 7.75648865334964, "grad_norm": 4.663266181945801, "learning_rate": 3.077863840195679e-06, "loss": 2.0903, "step": 114160 }, { "epoch": 7.756828373420301, "grad_norm": 3.7288918495178223, "learning_rate": 3.073617339312407e-06, "loss": 1.8021, "step": 114165 }, { "epoch": 7.7571680934909635, "grad_norm": 3.9575588703155518, "learning_rate": 3.0693708384291345e-06, "loss": 2.1123, "step": 114170 }, { "epoch": 7.7575078135616256, "grad_norm": 3.9009687900543213, "learning_rate": 3.065124337545862e-06, "loss": 2.2986, "step": 114175 }, { "epoch": 7.757847533632287, "grad_norm": 4.1206183433532715, "learning_rate": 3.06087783666259e-06, "loss": 2.1501, "step": 114180 }, { "epoch": 7.758187253702949, "grad_norm": 4.292906761169434, "learning_rate": 3.056631335779318e-06, "loss": 2.1243, "step": 114185 }, { "epoch": 7.758526973773611, "grad_norm": 3.9727890491485596, "learning_rate": 3.0523848348960457e-06, "loss": 2.1673, "step": 114190 }, { "epoch": 7.758866693844272, "grad_norm": 3.261263847351074, "learning_rate": 3.0481383340127737e-06, "loss": 1.9714, "step": 114195 }, { "epoch": 7.759206413914934, "grad_norm": 3.4304986000061035, "learning_rate": 3.0438918331295017e-06, "loss": 2.0302, "step": 114200 }, { "epoch": 7.759546133985596, "grad_norm": 3.918334484100342, "learning_rate": 3.0396453322462293e-06, "loss": 1.9605, "step": 114205 }, { "epoch": 7.759885854056257, "grad_norm": 3.9670608043670654, "learning_rate": 3.035398831362957e-06, "loss": 2.1806, "step": 114210 }, { "epoch": 7.7602255741269195, "grad_norm": 3.4708406925201416, "learning_rate": 3.031152330479685e-06, "loss": 1.987, "step": 114215 }, { "epoch": 7.760565294197582, "grad_norm": 3.526702404022217, "learning_rate": 3.026905829596413e-06, "loss": 1.775, "step": 114220 }, { "epoch": 7.760905014268243, "grad_norm": 3.835475444793701, "learning_rate": 3.0226593287131405e-06, "loss": 1.9769, "step": 114225 }, { "epoch": 7.761244734338905, "grad_norm": 3.3539984226226807, "learning_rate": 3.018412827829868e-06, "loss": 2.2844, "step": 114230 }, { "epoch": 7.761584454409567, "grad_norm": 4.032723903656006, "learning_rate": 3.014166326946596e-06, "loss": 1.8323, "step": 114235 }, { "epoch": 7.761924174480228, "grad_norm": 4.190206527709961, "learning_rate": 3.009919826063324e-06, "loss": 1.9051, "step": 114240 }, { "epoch": 7.76226389455089, "grad_norm": 3.6078310012817383, "learning_rate": 3.0056733251800517e-06, "loss": 1.8708, "step": 114245 }, { "epoch": 7.762603614621552, "grad_norm": 3.119124174118042, "learning_rate": 3.0014268242967797e-06, "loss": 1.7686, "step": 114250 }, { "epoch": 7.762943334692213, "grad_norm": 3.9107778072357178, "learning_rate": 2.9971803234135077e-06, "loss": 2.161, "step": 114255 }, { "epoch": 7.7632830547628755, "grad_norm": 3.374109983444214, "learning_rate": 2.9929338225302353e-06, "loss": 2.348, "step": 114260 }, { "epoch": 7.763622774833538, "grad_norm": 3.836949348449707, "learning_rate": 2.988687321646963e-06, "loss": 2.0841, "step": 114265 }, { "epoch": 7.763962494904199, "grad_norm": 3.54335618019104, "learning_rate": 2.984440820763691e-06, "loss": 1.6932, "step": 114270 }, { "epoch": 7.764302214974861, "grad_norm": 4.1252288818359375, "learning_rate": 2.980194319880419e-06, "loss": 1.9373, "step": 114275 }, { "epoch": 7.764641935045523, "grad_norm": 3.8648619651794434, "learning_rate": 2.9759478189971464e-06, "loss": 1.9692, "step": 114280 }, { "epoch": 7.764981655116184, "grad_norm": 3.473435878753662, "learning_rate": 2.9717013181138745e-06, "loss": 1.9405, "step": 114285 }, { "epoch": 7.765321375186846, "grad_norm": 4.659252166748047, "learning_rate": 2.967454817230602e-06, "loss": 2.3045, "step": 114290 }, { "epoch": 7.765661095257508, "grad_norm": 3.4258460998535156, "learning_rate": 2.96320831634733e-06, "loss": 2.11, "step": 114295 }, { "epoch": 7.766000815328169, "grad_norm": 4.205322742462158, "learning_rate": 2.9589618154640576e-06, "loss": 1.7985, "step": 114300 }, { "epoch": 7.7663405353988315, "grad_norm": 3.7845163345336914, "learning_rate": 2.9547153145807856e-06, "loss": 2.0942, "step": 114305 }, { "epoch": 7.766680255469494, "grad_norm": 3.5606203079223633, "learning_rate": 2.9504688136975136e-06, "loss": 2.0301, "step": 114310 }, { "epoch": 7.767019975540155, "grad_norm": 3.9204261302948, "learning_rate": 2.9462223128142412e-06, "loss": 1.897, "step": 114315 }, { "epoch": 7.767359695610817, "grad_norm": 3.814668655395508, "learning_rate": 2.941975811930969e-06, "loss": 2.1615, "step": 114320 }, { "epoch": 7.767699415681479, "grad_norm": 3.9430649280548096, "learning_rate": 2.937729311047697e-06, "loss": 2.1351, "step": 114325 }, { "epoch": 7.76803913575214, "grad_norm": 3.786917209625244, "learning_rate": 2.933482810164425e-06, "loss": 1.922, "step": 114330 }, { "epoch": 7.768378855822802, "grad_norm": 3.9713385105133057, "learning_rate": 2.9292363092811524e-06, "loss": 1.9275, "step": 114335 }, { "epoch": 7.768718575893464, "grad_norm": 3.0289342403411865, "learning_rate": 2.9249898083978804e-06, "loss": 2.0716, "step": 114340 }, { "epoch": 7.7690582959641254, "grad_norm": 3.390246629714966, "learning_rate": 2.920743307514608e-06, "loss": 1.842, "step": 114345 }, { "epoch": 7.7693980160347875, "grad_norm": 4.575158596038818, "learning_rate": 2.916496806631336e-06, "loss": 1.9367, "step": 114350 }, { "epoch": 7.76973773610545, "grad_norm": 3.1120283603668213, "learning_rate": 2.9122503057480636e-06, "loss": 1.8541, "step": 114355 }, { "epoch": 7.770077456176111, "grad_norm": 3.8917791843414307, "learning_rate": 2.9080038048647916e-06, "loss": 1.7982, "step": 114360 }, { "epoch": 7.770417176246773, "grad_norm": 4.263568878173828, "learning_rate": 2.9037573039815196e-06, "loss": 2.029, "step": 114365 }, { "epoch": 7.770756896317434, "grad_norm": 3.9770443439483643, "learning_rate": 2.899510803098247e-06, "loss": 1.8818, "step": 114370 }, { "epoch": 7.771096616388096, "grad_norm": 3.4920947551727295, "learning_rate": 2.8952643022149748e-06, "loss": 1.9801, "step": 114375 }, { "epoch": 7.771436336458758, "grad_norm": 4.198647499084473, "learning_rate": 2.8910178013317028e-06, "loss": 1.8703, "step": 114380 }, { "epoch": 7.771776056529419, "grad_norm": 3.799783945083618, "learning_rate": 2.886771300448431e-06, "loss": 2.0251, "step": 114385 }, { "epoch": 7.7721157766000815, "grad_norm": 3.2812132835388184, "learning_rate": 2.8825247995651584e-06, "loss": 1.8746, "step": 114390 }, { "epoch": 7.7724554966707435, "grad_norm": 3.750152349472046, "learning_rate": 2.8782782986818864e-06, "loss": 2.0889, "step": 114395 }, { "epoch": 7.772795216741405, "grad_norm": 3.899674654006958, "learning_rate": 2.874031797798614e-06, "loss": 1.8223, "step": 114400 }, { "epoch": 7.773134936812067, "grad_norm": 3.4903676509857178, "learning_rate": 2.869785296915342e-06, "loss": 2.0186, "step": 114405 }, { "epoch": 7.773474656882729, "grad_norm": 3.69645357131958, "learning_rate": 2.8655387960320696e-06, "loss": 1.8497, "step": 114410 }, { "epoch": 7.77381437695339, "grad_norm": 4.174928665161133, "learning_rate": 2.8612922951487976e-06, "loss": 2.1072, "step": 114415 }, { "epoch": 7.774154097024052, "grad_norm": 2.7823612689971924, "learning_rate": 2.8570457942655256e-06, "loss": 2.1401, "step": 114420 }, { "epoch": 7.774493817094714, "grad_norm": 4.142298698425293, "learning_rate": 2.852799293382253e-06, "loss": 1.8519, "step": 114425 }, { "epoch": 7.774833537165375, "grad_norm": 5.018909454345703, "learning_rate": 2.8485527924989807e-06, "loss": 1.8599, "step": 114430 }, { "epoch": 7.7751732572360375, "grad_norm": 3.454848527908325, "learning_rate": 2.8443062916157088e-06, "loss": 2.0059, "step": 114435 }, { "epoch": 7.7755129773066995, "grad_norm": 3.678178310394287, "learning_rate": 2.8400597907324368e-06, "loss": 2.3894, "step": 114440 }, { "epoch": 7.775852697377361, "grad_norm": 3.3173608779907227, "learning_rate": 2.8358132898491643e-06, "loss": 1.5327, "step": 114445 }, { "epoch": 7.776192417448023, "grad_norm": 4.260441303253174, "learning_rate": 2.8315667889658924e-06, "loss": 2.1658, "step": 114450 }, { "epoch": 7.776532137518685, "grad_norm": 3.8224339485168457, "learning_rate": 2.8273202880826204e-06, "loss": 1.6912, "step": 114455 }, { "epoch": 7.776871857589346, "grad_norm": 3.580740213394165, "learning_rate": 2.823073787199348e-06, "loss": 1.8514, "step": 114460 }, { "epoch": 7.777211577660008, "grad_norm": 3.8900575637817383, "learning_rate": 2.8188272863160755e-06, "loss": 1.7558, "step": 114465 }, { "epoch": 7.77755129773067, "grad_norm": 5.335845470428467, "learning_rate": 2.8145807854328035e-06, "loss": 2.1009, "step": 114470 }, { "epoch": 7.777891017801331, "grad_norm": 3.486656904220581, "learning_rate": 2.8103342845495315e-06, "loss": 1.8036, "step": 114475 }, { "epoch": 7.7782307378719935, "grad_norm": 3.6028666496276855, "learning_rate": 2.806087783666259e-06, "loss": 1.7454, "step": 114480 }, { "epoch": 7.778570457942656, "grad_norm": 3.2204880714416504, "learning_rate": 2.8018412827829867e-06, "loss": 2.0731, "step": 114485 }, { "epoch": 7.778910178013317, "grad_norm": 5.1465067863464355, "learning_rate": 2.7975947818997147e-06, "loss": 2.1817, "step": 114490 }, { "epoch": 7.779249898083979, "grad_norm": 3.330127477645874, "learning_rate": 2.7933482810164427e-06, "loss": 2.2467, "step": 114495 }, { "epoch": 7.779589618154641, "grad_norm": 3.5148956775665283, "learning_rate": 2.7891017801331703e-06, "loss": 1.6578, "step": 114500 }, { "epoch": 7.779929338225302, "grad_norm": 3.208923578262329, "learning_rate": 2.7848552792498983e-06, "loss": 1.7213, "step": 114505 }, { "epoch": 7.780269058295964, "grad_norm": 3.556114673614502, "learning_rate": 2.7806087783666263e-06, "loss": 2.189, "step": 114510 }, { "epoch": 7.780608778366626, "grad_norm": 3.1192383766174316, "learning_rate": 2.776362277483354e-06, "loss": 1.8849, "step": 114515 }, { "epoch": 7.780948498437287, "grad_norm": 3.5881361961364746, "learning_rate": 2.7721157766000815e-06, "loss": 1.8603, "step": 114520 }, { "epoch": 7.7812882185079495, "grad_norm": 4.295050621032715, "learning_rate": 2.7678692757168095e-06, "loss": 2.1771, "step": 114525 }, { "epoch": 7.781627938578612, "grad_norm": 3.7208986282348633, "learning_rate": 2.7636227748335375e-06, "loss": 2.1493, "step": 114530 }, { "epoch": 7.781967658649273, "grad_norm": 4.1478776931762695, "learning_rate": 2.759376273950265e-06, "loss": 1.8957, "step": 114535 }, { "epoch": 7.782307378719935, "grad_norm": 3.9223971366882324, "learning_rate": 2.755129773066993e-06, "loss": 1.9719, "step": 114540 }, { "epoch": 7.782647098790597, "grad_norm": 4.003316402435303, "learning_rate": 2.7508832721837207e-06, "loss": 2.1267, "step": 114545 }, { "epoch": 7.782986818861258, "grad_norm": 4.4824604988098145, "learning_rate": 2.7466367713004487e-06, "loss": 2.1559, "step": 114550 }, { "epoch": 7.78332653893192, "grad_norm": 3.9014954566955566, "learning_rate": 2.7423902704171763e-06, "loss": 1.5934, "step": 114555 }, { "epoch": 7.783666259002582, "grad_norm": 2.737617254257202, "learning_rate": 2.7381437695339043e-06, "loss": 1.9096, "step": 114560 }, { "epoch": 7.784005979073243, "grad_norm": 3.9820845127105713, "learning_rate": 2.7338972686506323e-06, "loss": 2.2608, "step": 114565 }, { "epoch": 7.7843456991439055, "grad_norm": 4.120100021362305, "learning_rate": 2.72965076776736e-06, "loss": 1.6924, "step": 114570 }, { "epoch": 7.784685419214568, "grad_norm": 3.470264196395874, "learning_rate": 2.7254042668840875e-06, "loss": 1.9641, "step": 114575 }, { "epoch": 7.785025139285229, "grad_norm": 4.125683784484863, "learning_rate": 2.7211577660008155e-06, "loss": 2.0124, "step": 114580 }, { "epoch": 7.785364859355891, "grad_norm": 3.7069637775421143, "learning_rate": 2.7169112651175435e-06, "loss": 1.9327, "step": 114585 }, { "epoch": 7.785704579426552, "grad_norm": 3.4068665504455566, "learning_rate": 2.712664764234271e-06, "loss": 1.9995, "step": 114590 }, { "epoch": 7.786044299497214, "grad_norm": 3.688873529434204, "learning_rate": 2.708418263350999e-06, "loss": 2.2141, "step": 114595 }, { "epoch": 7.786384019567876, "grad_norm": 4.2063703536987305, "learning_rate": 2.7041717624677267e-06, "loss": 2.0714, "step": 114600 }, { "epoch": 7.786723739638537, "grad_norm": 3.5340819358825684, "learning_rate": 2.6999252615844547e-06, "loss": 2.0851, "step": 114605 }, { "epoch": 7.787063459709199, "grad_norm": 4.025240421295166, "learning_rate": 2.6956787607011822e-06, "loss": 1.6902, "step": 114610 }, { "epoch": 7.7874031797798615, "grad_norm": 3.6039156913757324, "learning_rate": 2.6914322598179103e-06, "loss": 1.8946, "step": 114615 }, { "epoch": 7.787742899850523, "grad_norm": 3.3534352779388428, "learning_rate": 2.6871857589346383e-06, "loss": 2.1749, "step": 114620 }, { "epoch": 7.788082619921185, "grad_norm": 3.698763132095337, "learning_rate": 2.6829392580513654e-06, "loss": 1.8318, "step": 114625 }, { "epoch": 7.788422339991847, "grad_norm": 3.544389247894287, "learning_rate": 2.6786927571680934e-06, "loss": 1.9438, "step": 114630 }, { "epoch": 7.788762060062508, "grad_norm": 3.40342378616333, "learning_rate": 2.6744462562848214e-06, "loss": 2.1201, "step": 114635 }, { "epoch": 7.78910178013317, "grad_norm": 3.354945182800293, "learning_rate": 2.6701997554015494e-06, "loss": 1.9679, "step": 114640 }, { "epoch": 7.789441500203832, "grad_norm": 3.5347278118133545, "learning_rate": 2.665953254518277e-06, "loss": 2.0262, "step": 114645 }, { "epoch": 7.789781220274493, "grad_norm": 3.7226154804229736, "learning_rate": 2.661706753635005e-06, "loss": 1.8969, "step": 114650 }, { "epoch": 7.7901209403451555, "grad_norm": 3.718404769897461, "learning_rate": 2.6574602527517326e-06, "loss": 2.0604, "step": 114655 }, { "epoch": 7.7904606604158175, "grad_norm": 5.121222019195557, "learning_rate": 2.6532137518684606e-06, "loss": 1.9238, "step": 114660 }, { "epoch": 7.790800380486479, "grad_norm": 4.238429069519043, "learning_rate": 2.6489672509851882e-06, "loss": 1.9769, "step": 114665 }, { "epoch": 7.791140100557141, "grad_norm": 3.7464184761047363, "learning_rate": 2.6447207501019162e-06, "loss": 2.1369, "step": 114670 }, { "epoch": 7.791479820627803, "grad_norm": 3.4655959606170654, "learning_rate": 2.6404742492186442e-06, "loss": 1.9662, "step": 114675 }, { "epoch": 7.791819540698464, "grad_norm": 3.0421230792999268, "learning_rate": 2.636227748335372e-06, "loss": 1.8008, "step": 114680 }, { "epoch": 7.792159260769126, "grad_norm": 3.925611972808838, "learning_rate": 2.6319812474520994e-06, "loss": 1.9192, "step": 114685 }, { "epoch": 7.792498980839788, "grad_norm": 4.2450852394104, "learning_rate": 2.6277347465688274e-06, "loss": 1.952, "step": 114690 }, { "epoch": 7.792838700910449, "grad_norm": 3.1875057220458984, "learning_rate": 2.6234882456855554e-06, "loss": 1.7562, "step": 114695 }, { "epoch": 7.7931784209811115, "grad_norm": 3.912501096725464, "learning_rate": 2.619241744802283e-06, "loss": 2.0708, "step": 114700 }, { "epoch": 7.7935181410517735, "grad_norm": 3.290700674057007, "learning_rate": 2.614995243919011e-06, "loss": 1.7835, "step": 114705 }, { "epoch": 7.793857861122435, "grad_norm": 3.8104405403137207, "learning_rate": 2.6107487430357386e-06, "loss": 1.6567, "step": 114710 }, { "epoch": 7.794197581193097, "grad_norm": 4.557787895202637, "learning_rate": 2.606502242152466e-06, "loss": 1.9149, "step": 114715 }, { "epoch": 7.794537301263759, "grad_norm": 4.166214466094971, "learning_rate": 2.602255741269194e-06, "loss": 1.9985, "step": 114720 }, { "epoch": 7.79487702133442, "grad_norm": 3.366623640060425, "learning_rate": 2.598009240385922e-06, "loss": 1.9054, "step": 114725 }, { "epoch": 7.795216741405082, "grad_norm": 4.502364635467529, "learning_rate": 2.59376273950265e-06, "loss": 1.8009, "step": 114730 }, { "epoch": 7.795556461475744, "grad_norm": 4.042967319488525, "learning_rate": 2.5895162386193778e-06, "loss": 1.7711, "step": 114735 }, { "epoch": 7.795896181546405, "grad_norm": 3.845688581466675, "learning_rate": 2.5852697377361054e-06, "loss": 1.7875, "step": 114740 }, { "epoch": 7.7962359016170675, "grad_norm": 3.300248622894287, "learning_rate": 2.5810232368528334e-06, "loss": 2.2361, "step": 114745 }, { "epoch": 7.7965756216877296, "grad_norm": 4.281379699707031, "learning_rate": 2.5767767359695614e-06, "loss": 2.111, "step": 114750 }, { "epoch": 7.796915341758391, "grad_norm": 3.8722269535064697, "learning_rate": 2.572530235086289e-06, "loss": 1.6618, "step": 114755 }, { "epoch": 7.797255061829053, "grad_norm": 3.327860116958618, "learning_rate": 2.568283734203017e-06, "loss": 2.0551, "step": 114760 }, { "epoch": 7.797594781899715, "grad_norm": 3.2659430503845215, "learning_rate": 2.564037233319745e-06, "loss": 2.1535, "step": 114765 }, { "epoch": 7.797934501970376, "grad_norm": 3.749584674835205, "learning_rate": 2.559790732436472e-06, "loss": 1.8481, "step": 114770 }, { "epoch": 7.798274222041038, "grad_norm": 3.077476739883423, "learning_rate": 2.5555442315532e-06, "loss": 1.766, "step": 114775 }, { "epoch": 7.7986139421117, "grad_norm": 4.521495342254639, "learning_rate": 2.551297730669928e-06, "loss": 2.2355, "step": 114780 }, { "epoch": 7.798953662182361, "grad_norm": 3.72129225730896, "learning_rate": 2.547051229786656e-06, "loss": 1.958, "step": 114785 }, { "epoch": 7.7992933822530235, "grad_norm": 3.327971935272217, "learning_rate": 2.5428047289033838e-06, "loss": 1.9523, "step": 114790 }, { "epoch": 7.799633102323686, "grad_norm": 3.5817973613739014, "learning_rate": 2.5385582280201113e-06, "loss": 2.0786, "step": 114795 }, { "epoch": 7.799972822394347, "grad_norm": 3.4313769340515137, "learning_rate": 2.5343117271368393e-06, "loss": 1.837, "step": 114800 }, { "epoch": 7.800312542465009, "grad_norm": 3.555422782897949, "learning_rate": 2.530065226253567e-06, "loss": 2.1173, "step": 114805 }, { "epoch": 7.800652262535671, "grad_norm": 3.025721549987793, "learning_rate": 2.525818725370295e-06, "loss": 2.0339, "step": 114810 }, { "epoch": 7.800991982606332, "grad_norm": 4.141289234161377, "learning_rate": 2.521572224487023e-06, "loss": 1.8848, "step": 114815 }, { "epoch": 7.801331702676994, "grad_norm": 4.597287654876709, "learning_rate": 2.517325723603751e-06, "loss": 1.7928, "step": 114820 }, { "epoch": 7.801671422747656, "grad_norm": 3.8436145782470703, "learning_rate": 2.513079222720478e-06, "loss": 1.8603, "step": 114825 }, { "epoch": 7.802011142818317, "grad_norm": 3.5897724628448486, "learning_rate": 2.508832721837206e-06, "loss": 1.7871, "step": 114830 }, { "epoch": 7.8023508628889795, "grad_norm": 3.3524930477142334, "learning_rate": 2.504586220953934e-06, "loss": 1.9822, "step": 114835 }, { "epoch": 7.802690582959642, "grad_norm": 4.105303764343262, "learning_rate": 2.500339720070662e-06, "loss": 1.8775, "step": 114840 }, { "epoch": 7.803030303030303, "grad_norm": 3.4948203563690186, "learning_rate": 2.4960932191873897e-06, "loss": 1.9353, "step": 114845 }, { "epoch": 7.803370023100965, "grad_norm": 3.3241493701934814, "learning_rate": 2.4918467183041177e-06, "loss": 1.9116, "step": 114850 }, { "epoch": 7.803709743171627, "grad_norm": 3.58548641204834, "learning_rate": 2.4876002174208453e-06, "loss": 2.0142, "step": 114855 }, { "epoch": 7.804049463242288, "grad_norm": 3.5500426292419434, "learning_rate": 2.483353716537573e-06, "loss": 1.6226, "step": 114860 }, { "epoch": 7.80438918331295, "grad_norm": 4.631760597229004, "learning_rate": 2.479107215654301e-06, "loss": 1.9136, "step": 114865 }, { "epoch": 7.804728903383612, "grad_norm": 3.6390604972839355, "learning_rate": 2.474860714771029e-06, "loss": 1.9003, "step": 114870 }, { "epoch": 7.805068623454273, "grad_norm": 3.960258960723877, "learning_rate": 2.470614213887757e-06, "loss": 1.7716, "step": 114875 }, { "epoch": 7.8054083435249355, "grad_norm": 3.788958787918091, "learning_rate": 2.466367713004484e-06, "loss": 1.775, "step": 114880 }, { "epoch": 7.805748063595598, "grad_norm": 4.245395183563232, "learning_rate": 2.462121212121212e-06, "loss": 2.126, "step": 114885 }, { "epoch": 7.806087783666259, "grad_norm": 3.903043031692505, "learning_rate": 2.45787471123794e-06, "loss": 1.6323, "step": 114890 }, { "epoch": 7.806427503736921, "grad_norm": 3.2689242362976074, "learning_rate": 2.453628210354668e-06, "loss": 2.173, "step": 114895 }, { "epoch": 7.806767223807583, "grad_norm": 3.834528684616089, "learning_rate": 2.4493817094713957e-06, "loss": 1.9408, "step": 114900 }, { "epoch": 7.807106943878244, "grad_norm": 3.255913496017456, "learning_rate": 2.4451352085881237e-06, "loss": 1.4978, "step": 114905 }, { "epoch": 7.807446663948906, "grad_norm": 3.256828546524048, "learning_rate": 2.4408887077048513e-06, "loss": 1.9936, "step": 114910 }, { "epoch": 7.807786384019568, "grad_norm": 4.262735366821289, "learning_rate": 2.436642206821579e-06, "loss": 2.0523, "step": 114915 }, { "epoch": 7.8081261040902294, "grad_norm": 3.558932304382324, "learning_rate": 2.432395705938307e-06, "loss": 1.7215, "step": 114920 }, { "epoch": 7.8084658241608915, "grad_norm": 4.929315567016602, "learning_rate": 2.428149205055035e-06, "loss": 1.8939, "step": 114925 }, { "epoch": 7.808805544231554, "grad_norm": 3.7169411182403564, "learning_rate": 2.423902704171763e-06, "loss": 2.1083, "step": 114930 }, { "epoch": 7.809145264302215, "grad_norm": 4.31247615814209, "learning_rate": 2.4196562032884905e-06, "loss": 1.8916, "step": 114935 }, { "epoch": 7.809484984372877, "grad_norm": 3.665130853652954, "learning_rate": 2.415409702405218e-06, "loss": 1.9221, "step": 114940 }, { "epoch": 7.809824704443539, "grad_norm": 3.8347513675689697, "learning_rate": 2.411163201521946e-06, "loss": 1.9177, "step": 114945 }, { "epoch": 7.8101644245142, "grad_norm": 3.646254301071167, "learning_rate": 2.4069167006386736e-06, "loss": 1.846, "step": 114950 }, { "epoch": 7.810504144584862, "grad_norm": 4.50160551071167, "learning_rate": 2.4026701997554017e-06, "loss": 2.1595, "step": 114955 }, { "epoch": 7.810843864655524, "grad_norm": 4.581069469451904, "learning_rate": 2.3984236988721297e-06, "loss": 2.106, "step": 114960 }, { "epoch": 7.8111835847261855, "grad_norm": 4.455700397491455, "learning_rate": 2.3941771979888572e-06, "loss": 1.7731, "step": 114965 }, { "epoch": 7.8115233047968475, "grad_norm": 3.926990032196045, "learning_rate": 2.389930697105585e-06, "loss": 1.9775, "step": 114970 }, { "epoch": 7.81186302486751, "grad_norm": 4.2466325759887695, "learning_rate": 2.385684196222313e-06, "loss": 2.0956, "step": 114975 }, { "epoch": 7.812202744938171, "grad_norm": 3.9286062717437744, "learning_rate": 2.381437695339041e-06, "loss": 1.7287, "step": 114980 }, { "epoch": 7.812542465008833, "grad_norm": 3.806567430496216, "learning_rate": 2.377191194455769e-06, "loss": 1.9542, "step": 114985 }, { "epoch": 7.812882185079495, "grad_norm": 3.902679443359375, "learning_rate": 2.3729446935724964e-06, "loss": 1.9357, "step": 114990 }, { "epoch": 7.813221905150156, "grad_norm": 3.313009023666382, "learning_rate": 2.368698192689224e-06, "loss": 2.1158, "step": 114995 }, { "epoch": 7.813561625220818, "grad_norm": 4.276663303375244, "learning_rate": 2.364451691805952e-06, "loss": 2.041, "step": 115000 }, { "epoch": 7.81390134529148, "grad_norm": 3.071113109588623, "learning_rate": 2.3602051909226796e-06, "loss": 1.9753, "step": 115005 }, { "epoch": 7.8142410653621415, "grad_norm": 3.841193914413452, "learning_rate": 2.3559586900394076e-06, "loss": 1.9737, "step": 115010 }, { "epoch": 7.8145807854328035, "grad_norm": 3.1226589679718018, "learning_rate": 2.3517121891561356e-06, "loss": 1.7047, "step": 115015 }, { "epoch": 7.814920505503466, "grad_norm": 3.495683431625366, "learning_rate": 2.3474656882728636e-06, "loss": 1.8561, "step": 115020 }, { "epoch": 7.815260225574127, "grad_norm": 4.96422004699707, "learning_rate": 2.343219187389591e-06, "loss": 1.8582, "step": 115025 }, { "epoch": 7.815599945644789, "grad_norm": 3.013389825820923, "learning_rate": 2.338972686506319e-06, "loss": 1.7971, "step": 115030 }, { "epoch": 7.815939665715451, "grad_norm": 4.235849380493164, "learning_rate": 2.334726185623047e-06, "loss": 2.0094, "step": 115035 }, { "epoch": 7.816279385786112, "grad_norm": 3.3803539276123047, "learning_rate": 2.3304796847397744e-06, "loss": 1.7764, "step": 115040 }, { "epoch": 7.816619105856774, "grad_norm": 3.8900766372680664, "learning_rate": 2.3262331838565024e-06, "loss": 2.0031, "step": 115045 }, { "epoch": 7.816958825927435, "grad_norm": 4.0670485496521, "learning_rate": 2.32198668297323e-06, "loss": 1.7572, "step": 115050 }, { "epoch": 7.8172985459980975, "grad_norm": 4.0956010818481445, "learning_rate": 2.317740182089958e-06, "loss": 2.0085, "step": 115055 }, { "epoch": 7.81763826606876, "grad_norm": 3.8028814792633057, "learning_rate": 2.3134936812066856e-06, "loss": 2.1347, "step": 115060 }, { "epoch": 7.817977986139421, "grad_norm": 3.234173059463501, "learning_rate": 2.3092471803234136e-06, "loss": 2.2694, "step": 115065 }, { "epoch": 7.818317706210083, "grad_norm": 3.604595184326172, "learning_rate": 2.3050006794401416e-06, "loss": 2.1427, "step": 115070 }, { "epoch": 7.818657426280745, "grad_norm": 3.7264060974121094, "learning_rate": 2.3007541785568696e-06, "loss": 1.6839, "step": 115075 }, { "epoch": 7.818997146351406, "grad_norm": 3.5291106700897217, "learning_rate": 2.2965076776735968e-06, "loss": 1.8279, "step": 115080 }, { "epoch": 7.819336866422068, "grad_norm": 4.1342339515686035, "learning_rate": 2.2922611767903248e-06, "loss": 1.6768, "step": 115085 }, { "epoch": 7.81967658649273, "grad_norm": 3.430854558944702, "learning_rate": 2.2880146759070528e-06, "loss": 2.0515, "step": 115090 }, { "epoch": 7.820016306563391, "grad_norm": 4.654848575592041, "learning_rate": 2.2837681750237804e-06, "loss": 2.2387, "step": 115095 }, { "epoch": 7.8203560266340535, "grad_norm": 3.4425904750823975, "learning_rate": 2.2795216741405084e-06, "loss": 1.771, "step": 115100 }, { "epoch": 7.820695746704716, "grad_norm": 4.49589729309082, "learning_rate": 2.2752751732572364e-06, "loss": 1.9837, "step": 115105 }, { "epoch": 7.821035466775377, "grad_norm": 3.720581531524658, "learning_rate": 2.271028672373964e-06, "loss": 1.9232, "step": 115110 }, { "epoch": 7.821375186846039, "grad_norm": 4.17672061920166, "learning_rate": 2.2667821714906915e-06, "loss": 2.0902, "step": 115115 }, { "epoch": 7.821714906916701, "grad_norm": 3.3002450466156006, "learning_rate": 2.2625356706074196e-06, "loss": 1.9988, "step": 115120 }, { "epoch": 7.822054626987362, "grad_norm": 3.728734254837036, "learning_rate": 2.2582891697241476e-06, "loss": 1.7998, "step": 115125 }, { "epoch": 7.822394347058024, "grad_norm": 4.782615661621094, "learning_rate": 2.254042668840875e-06, "loss": 2.0356, "step": 115130 }, { "epoch": 7.822734067128686, "grad_norm": 3.720799207687378, "learning_rate": 2.2497961679576027e-06, "loss": 1.9881, "step": 115135 }, { "epoch": 7.823073787199347, "grad_norm": 3.9302215576171875, "learning_rate": 2.2455496670743307e-06, "loss": 1.8714, "step": 115140 }, { "epoch": 7.8234135072700095, "grad_norm": 3.8165018558502197, "learning_rate": 2.2413031661910587e-06, "loss": 1.7394, "step": 115145 }, { "epoch": 7.823753227340672, "grad_norm": 3.6032261848449707, "learning_rate": 2.2370566653077863e-06, "loss": 2.012, "step": 115150 }, { "epoch": 7.824092947411333, "grad_norm": 4.11024284362793, "learning_rate": 2.2328101644245143e-06, "loss": 2.1285, "step": 115155 }, { "epoch": 7.824432667481995, "grad_norm": 3.8726553916931152, "learning_rate": 2.2285636635412423e-06, "loss": 2.0201, "step": 115160 }, { "epoch": 7.824772387552657, "grad_norm": 3.343848705291748, "learning_rate": 2.22431716265797e-06, "loss": 1.9715, "step": 115165 }, { "epoch": 7.825112107623318, "grad_norm": 3.3069565296173096, "learning_rate": 2.2200706617746975e-06, "loss": 2.0346, "step": 115170 }, { "epoch": 7.82545182769398, "grad_norm": 3.062307834625244, "learning_rate": 2.2158241608914255e-06, "loss": 1.7561, "step": 115175 }, { "epoch": 7.825791547764642, "grad_norm": 3.7783639430999756, "learning_rate": 2.2115776600081535e-06, "loss": 2.0245, "step": 115180 }, { "epoch": 7.826131267835303, "grad_norm": 3.233464241027832, "learning_rate": 2.207331159124881e-06, "loss": 2.0377, "step": 115185 }, { "epoch": 7.8264709879059655, "grad_norm": 4.329067707061768, "learning_rate": 2.203084658241609e-06, "loss": 2.0407, "step": 115190 }, { "epoch": 7.826810707976628, "grad_norm": 3.184093475341797, "learning_rate": 2.1988381573583367e-06, "loss": 1.9834, "step": 115195 }, { "epoch": 7.827150428047289, "grad_norm": 4.4933977127075195, "learning_rate": 2.1945916564750647e-06, "loss": 1.9933, "step": 115200 }, { "epoch": 7.827490148117951, "grad_norm": 4.178738594055176, "learning_rate": 2.1903451555917923e-06, "loss": 2.0536, "step": 115205 }, { "epoch": 7.827829868188613, "grad_norm": 4.325460433959961, "learning_rate": 2.1860986547085203e-06, "loss": 1.8412, "step": 115210 }, { "epoch": 7.828169588259274, "grad_norm": 4.295230865478516, "learning_rate": 2.1818521538252483e-06, "loss": 1.9655, "step": 115215 }, { "epoch": 7.828509308329936, "grad_norm": 4.353899955749512, "learning_rate": 2.177605652941976e-06, "loss": 1.9367, "step": 115220 }, { "epoch": 7.828849028400598, "grad_norm": 3.7084896564483643, "learning_rate": 2.1733591520587035e-06, "loss": 2.1073, "step": 115225 }, { "epoch": 7.8291887484712595, "grad_norm": 3.7133278846740723, "learning_rate": 2.1691126511754315e-06, "loss": 2.0089, "step": 115230 }, { "epoch": 7.8295284685419215, "grad_norm": 2.845128059387207, "learning_rate": 2.1648661502921595e-06, "loss": 1.9599, "step": 115235 }, { "epoch": 7.829868188612584, "grad_norm": 4.110162734985352, "learning_rate": 2.160619649408887e-06, "loss": 1.8133, "step": 115240 }, { "epoch": 7.830207908683245, "grad_norm": 5.2359137535095215, "learning_rate": 2.156373148525615e-06, "loss": 2.0097, "step": 115245 }, { "epoch": 7.830547628753907, "grad_norm": 4.247033596038818, "learning_rate": 2.1521266476423427e-06, "loss": 2.081, "step": 115250 }, { "epoch": 7.830887348824569, "grad_norm": 3.4877512454986572, "learning_rate": 2.1478801467590707e-06, "loss": 1.8726, "step": 115255 }, { "epoch": 7.83122706889523, "grad_norm": 3.630354881286621, "learning_rate": 2.1436336458757983e-06, "loss": 1.9754, "step": 115260 }, { "epoch": 7.831566788965892, "grad_norm": 3.4669439792633057, "learning_rate": 2.1393871449925263e-06, "loss": 1.9284, "step": 115265 }, { "epoch": 7.831906509036553, "grad_norm": 4.0351243019104, "learning_rate": 2.1351406441092543e-06, "loss": 2.2137, "step": 115270 }, { "epoch": 7.8322462291072155, "grad_norm": 3.022976875305176, "learning_rate": 2.130894143225982e-06, "loss": 1.9517, "step": 115275 }, { "epoch": 7.8325859491778775, "grad_norm": 3.8094117641448975, "learning_rate": 2.1266476423427094e-06, "loss": 1.8663, "step": 115280 }, { "epoch": 7.832925669248539, "grad_norm": 4.7109808921813965, "learning_rate": 2.1224011414594375e-06, "loss": 1.9379, "step": 115285 }, { "epoch": 7.833265389319201, "grad_norm": 3.1117358207702637, "learning_rate": 2.1181546405761655e-06, "loss": 1.9453, "step": 115290 }, { "epoch": 7.833605109389863, "grad_norm": 3.6261651515960693, "learning_rate": 2.113908139692893e-06, "loss": 2.0059, "step": 115295 }, { "epoch": 7.833944829460524, "grad_norm": 3.902083158493042, "learning_rate": 2.109661638809621e-06, "loss": 1.7214, "step": 115300 }, { "epoch": 7.834284549531186, "grad_norm": 2.7568225860595703, "learning_rate": 2.1054151379263486e-06, "loss": 1.8879, "step": 115305 }, { "epoch": 7.834624269601848, "grad_norm": 3.4678032398223877, "learning_rate": 2.1011686370430766e-06, "loss": 2.2236, "step": 115310 }, { "epoch": 7.834963989672509, "grad_norm": 3.6126437187194824, "learning_rate": 2.0969221361598042e-06, "loss": 1.8098, "step": 115315 }, { "epoch": 7.8353037097431715, "grad_norm": 3.8991284370422363, "learning_rate": 2.0926756352765322e-06, "loss": 1.9988, "step": 115320 }, { "epoch": 7.8356434298138335, "grad_norm": 3.9026081562042236, "learning_rate": 2.0884291343932602e-06, "loss": 2.0833, "step": 115325 }, { "epoch": 7.835983149884495, "grad_norm": 3.4276981353759766, "learning_rate": 2.084182633509988e-06, "loss": 1.9951, "step": 115330 }, { "epoch": 7.836322869955157, "grad_norm": 3.815458297729492, "learning_rate": 2.0799361326267154e-06, "loss": 2.1121, "step": 115335 }, { "epoch": 7.836662590025819, "grad_norm": 3.527186393737793, "learning_rate": 2.0756896317434434e-06, "loss": 1.9572, "step": 115340 }, { "epoch": 7.83700231009648, "grad_norm": 3.4582157135009766, "learning_rate": 2.0714431308601714e-06, "loss": 1.8622, "step": 115345 }, { "epoch": 7.837342030167142, "grad_norm": 4.345146179199219, "learning_rate": 2.067196629976899e-06, "loss": 1.8136, "step": 115350 }, { "epoch": 7.837681750237804, "grad_norm": 3.4487102031707764, "learning_rate": 2.062950129093627e-06, "loss": 2.1626, "step": 115355 }, { "epoch": 7.838021470308465, "grad_norm": 3.892460823059082, "learning_rate": 2.058703628210355e-06, "loss": 1.8668, "step": 115360 }, { "epoch": 7.8383611903791275, "grad_norm": 4.432688236236572, "learning_rate": 2.0544571273270826e-06, "loss": 2.1483, "step": 115365 }, { "epoch": 7.83870091044979, "grad_norm": 4.629744052886963, "learning_rate": 2.05021062644381e-06, "loss": 2.0463, "step": 115370 }, { "epoch": 7.839040630520451, "grad_norm": 4.019599914550781, "learning_rate": 2.045964125560538e-06, "loss": 1.8332, "step": 115375 }, { "epoch": 7.839380350591113, "grad_norm": 3.9382147789001465, "learning_rate": 2.0417176246772662e-06, "loss": 2.1597, "step": 115380 }, { "epoch": 7.839720070661775, "grad_norm": 4.188470363616943, "learning_rate": 2.037471123793994e-06, "loss": 2.1615, "step": 115385 }, { "epoch": 7.840059790732436, "grad_norm": 3.820114850997925, "learning_rate": 2.0332246229107214e-06, "loss": 1.9946, "step": 115390 }, { "epoch": 7.840399510803098, "grad_norm": 3.6244239807128906, "learning_rate": 2.0289781220274494e-06, "loss": 1.9964, "step": 115395 }, { "epoch": 7.84073923087376, "grad_norm": 3.158082962036133, "learning_rate": 2.0247316211441774e-06, "loss": 1.9065, "step": 115400 }, { "epoch": 7.841078950944421, "grad_norm": 4.23020076751709, "learning_rate": 2.020485120260905e-06, "loss": 1.842, "step": 115405 }, { "epoch": 7.8414186710150835, "grad_norm": 4.165499210357666, "learning_rate": 2.016238619377633e-06, "loss": 1.8043, "step": 115410 }, { "epoch": 7.841758391085746, "grad_norm": 4.275242328643799, "learning_rate": 2.011992118494361e-06, "loss": 1.8502, "step": 115415 }, { "epoch": 7.842098111156407, "grad_norm": 3.6782174110412598, "learning_rate": 2.0077456176110886e-06, "loss": 1.8919, "step": 115420 }, { "epoch": 7.842437831227069, "grad_norm": 3.6436948776245117, "learning_rate": 2.003499116727816e-06, "loss": 1.858, "step": 115425 }, { "epoch": 7.842777551297731, "grad_norm": 3.4254238605499268, "learning_rate": 1.999252615844544e-06, "loss": 2.0201, "step": 115430 }, { "epoch": 7.843117271368392, "grad_norm": 3.7240538597106934, "learning_rate": 1.995006114961272e-06, "loss": 2.1783, "step": 115435 }, { "epoch": 7.843456991439054, "grad_norm": 4.043992519378662, "learning_rate": 1.9907596140779998e-06, "loss": 2.0672, "step": 115440 }, { "epoch": 7.843796711509716, "grad_norm": 4.636225700378418, "learning_rate": 1.9865131131947278e-06, "loss": 2.0067, "step": 115445 }, { "epoch": 7.844136431580377, "grad_norm": 3.5667529106140137, "learning_rate": 1.9822666123114554e-06, "loss": 2.2502, "step": 115450 }, { "epoch": 7.8444761516510395, "grad_norm": 3.2503182888031006, "learning_rate": 1.9780201114281834e-06, "loss": 1.6859, "step": 115455 }, { "epoch": 7.844815871721702, "grad_norm": 3.662532091140747, "learning_rate": 1.973773610544911e-06, "loss": 1.7708, "step": 115460 }, { "epoch": 7.845155591792363, "grad_norm": 3.5155107975006104, "learning_rate": 1.969527109661639e-06, "loss": 1.8805, "step": 115465 }, { "epoch": 7.845495311863025, "grad_norm": 3.6650962829589844, "learning_rate": 1.965280608778367e-06, "loss": 2.1058, "step": 115470 }, { "epoch": 7.845835031933687, "grad_norm": 3.3255422115325928, "learning_rate": 1.9610341078950945e-06, "loss": 1.9347, "step": 115475 }, { "epoch": 7.846174752004348, "grad_norm": 4.193783283233643, "learning_rate": 1.956787607011822e-06, "loss": 2.0622, "step": 115480 }, { "epoch": 7.84651447207501, "grad_norm": 3.369178533554077, "learning_rate": 1.95254110612855e-06, "loss": 2.023, "step": 115485 }, { "epoch": 7.846854192145672, "grad_norm": 3.686265468597412, "learning_rate": 1.948294605245278e-06, "loss": 1.8569, "step": 115490 }, { "epoch": 7.8471939122163334, "grad_norm": 4.2827467918396, "learning_rate": 1.9440481043620057e-06, "loss": 1.914, "step": 115495 }, { "epoch": 7.8475336322869955, "grad_norm": 3.8222620487213135, "learning_rate": 1.9398016034787337e-06, "loss": 2.4226, "step": 115500 }, { "epoch": 7.847873352357658, "grad_norm": 4.342772960662842, "learning_rate": 1.9355551025954613e-06, "loss": 1.9361, "step": 115505 }, { "epoch": 7.848213072428319, "grad_norm": 4.26680850982666, "learning_rate": 1.9313086017121893e-06, "loss": 1.8798, "step": 115510 }, { "epoch": 7.848552792498981, "grad_norm": 3.933140516281128, "learning_rate": 1.927062100828917e-06, "loss": 1.7474, "step": 115515 }, { "epoch": 7.848892512569643, "grad_norm": 3.7400758266448975, "learning_rate": 1.922815599945645e-06, "loss": 2.1306, "step": 115520 }, { "epoch": 7.849232232640304, "grad_norm": 3.689406633377075, "learning_rate": 1.918569099062373e-06, "loss": 1.7918, "step": 115525 }, { "epoch": 7.849571952710966, "grad_norm": 4.337418079376221, "learning_rate": 1.9143225981791005e-06, "loss": 1.7623, "step": 115530 }, { "epoch": 7.849911672781628, "grad_norm": 3.728616952896118, "learning_rate": 1.910076097295828e-06, "loss": 2.0555, "step": 115535 }, { "epoch": 7.8502513928522895, "grad_norm": 3.298076629638672, "learning_rate": 1.9058295964125561e-06, "loss": 2.012, "step": 115540 }, { "epoch": 7.8505911129229515, "grad_norm": 4.6958723068237305, "learning_rate": 1.901583095529284e-06, "loss": 1.9245, "step": 115545 }, { "epoch": 7.850930832993614, "grad_norm": 3.4876222610473633, "learning_rate": 1.897336594646012e-06, "loss": 2.2016, "step": 115550 }, { "epoch": 7.851270553064275, "grad_norm": 4.180922985076904, "learning_rate": 1.8930900937627397e-06, "loss": 1.8606, "step": 115555 }, { "epoch": 7.851610273134937, "grad_norm": 3.4527626037597656, "learning_rate": 1.8888435928794673e-06, "loss": 1.7405, "step": 115560 }, { "epoch": 7.851949993205599, "grad_norm": 3.4332101345062256, "learning_rate": 1.884597091996195e-06, "loss": 1.8834, "step": 115565 }, { "epoch": 7.85228971327626, "grad_norm": 3.57997465133667, "learning_rate": 1.880350591112923e-06, "loss": 1.9697, "step": 115570 }, { "epoch": 7.852629433346922, "grad_norm": 3.601062059402466, "learning_rate": 1.8761040902296509e-06, "loss": 2.2194, "step": 115575 }, { "epoch": 7.852969153417584, "grad_norm": 3.286423444747925, "learning_rate": 1.8718575893463787e-06, "loss": 2.1641, "step": 115580 }, { "epoch": 7.8533088734882455, "grad_norm": 3.6726832389831543, "learning_rate": 1.8676110884631067e-06, "loss": 1.8921, "step": 115585 }, { "epoch": 7.8536485935589075, "grad_norm": 4.183307647705078, "learning_rate": 1.8633645875798343e-06, "loss": 1.932, "step": 115590 }, { "epoch": 7.85398831362957, "grad_norm": 3.000763177871704, "learning_rate": 1.859118086696562e-06, "loss": 1.9913, "step": 115595 }, { "epoch": 7.854328033700231, "grad_norm": 3.670161008834839, "learning_rate": 1.8548715858132899e-06, "loss": 2.1182, "step": 115600 }, { "epoch": 7.854667753770893, "grad_norm": 3.396756649017334, "learning_rate": 1.8506250849300179e-06, "loss": 2.0902, "step": 115605 }, { "epoch": 7.855007473841555, "grad_norm": 4.005088806152344, "learning_rate": 1.8463785840467457e-06, "loss": 1.8805, "step": 115610 }, { "epoch": 7.855347193912216, "grad_norm": 3.5248377323150635, "learning_rate": 1.8421320831634733e-06, "loss": 2.0203, "step": 115615 }, { "epoch": 7.855686913982878, "grad_norm": 3.6406350135803223, "learning_rate": 1.837885582280201e-06, "loss": 1.8422, "step": 115620 }, { "epoch": 7.85602663405354, "grad_norm": 3.4103872776031494, "learning_rate": 1.833639081396929e-06, "loss": 1.9721, "step": 115625 }, { "epoch": 7.8563663541242015, "grad_norm": 3.3339133262634277, "learning_rate": 1.8293925805136569e-06, "loss": 1.9335, "step": 115630 }, { "epoch": 7.8567060741948636, "grad_norm": 3.6193878650665283, "learning_rate": 1.8251460796303847e-06, "loss": 2.0172, "step": 115635 }, { "epoch": 7.857045794265526, "grad_norm": 3.330343723297119, "learning_rate": 1.8208995787471127e-06, "loss": 2.0269, "step": 115640 }, { "epoch": 7.857385514336187, "grad_norm": 3.7358829975128174, "learning_rate": 1.8175023780404949e-06, "loss": 1.7328, "step": 115645 }, { "epoch": 7.857725234406849, "grad_norm": 3.9825146198272705, "learning_rate": 1.8132558771572224e-06, "loss": 2.1819, "step": 115650 }, { "epoch": 7.858064954477511, "grad_norm": 3.9345645904541016, "learning_rate": 1.8090093762739502e-06, "loss": 1.8755, "step": 115655 }, { "epoch": 7.858404674548172, "grad_norm": 5.224398136138916, "learning_rate": 1.804762875390678e-06, "loss": 1.8547, "step": 115660 }, { "epoch": 7.858744394618834, "grad_norm": 3.1885733604431152, "learning_rate": 1.800516374507406e-06, "loss": 1.9311, "step": 115665 }, { "epoch": 7.859084114689496, "grad_norm": 4.140666484832764, "learning_rate": 1.7962698736241338e-06, "loss": 2.0239, "step": 115670 }, { "epoch": 7.8594238347601575, "grad_norm": 3.658644199371338, "learning_rate": 1.7920233727408618e-06, "loss": 2.0133, "step": 115675 }, { "epoch": 7.85976355483082, "grad_norm": 3.7150306701660156, "learning_rate": 1.7877768718575892e-06, "loss": 2.0657, "step": 115680 }, { "epoch": 7.860103274901482, "grad_norm": 3.3734872341156006, "learning_rate": 1.7835303709743172e-06, "loss": 2.0013, "step": 115685 }, { "epoch": 7.860442994972143, "grad_norm": 3.3742785453796387, "learning_rate": 1.779283870091045e-06, "loss": 2.0311, "step": 115690 }, { "epoch": 7.860782715042805, "grad_norm": 2.9974143505096436, "learning_rate": 1.7750373692077728e-06, "loss": 1.6157, "step": 115695 }, { "epoch": 7.861122435113467, "grad_norm": 3.510932207107544, "learning_rate": 1.7707908683245008e-06, "loss": 1.8347, "step": 115700 }, { "epoch": 7.861462155184128, "grad_norm": 3.293029546737671, "learning_rate": 1.7665443674412286e-06, "loss": 1.8502, "step": 115705 }, { "epoch": 7.86180187525479, "grad_norm": 3.5303633213043213, "learning_rate": 1.7622978665579562e-06, "loss": 2.0978, "step": 115710 }, { "epoch": 7.862141595325452, "grad_norm": 3.7104225158691406, "learning_rate": 1.758051365674684e-06, "loss": 1.9967, "step": 115715 }, { "epoch": 7.8624813153961135, "grad_norm": 3.3575284481048584, "learning_rate": 1.753804864791412e-06, "loss": 2.0187, "step": 115720 }, { "epoch": 7.862821035466776, "grad_norm": 4.022895812988281, "learning_rate": 1.7495583639081398e-06, "loss": 1.8109, "step": 115725 }, { "epoch": 7.863160755537437, "grad_norm": 3.333463430404663, "learning_rate": 1.7453118630248678e-06, "loss": 1.8128, "step": 115730 }, { "epoch": 7.863500475608099, "grad_norm": 3.538318157196045, "learning_rate": 1.7410653621415952e-06, "loss": 1.9787, "step": 115735 }, { "epoch": 7.863840195678761, "grad_norm": 3.5817832946777344, "learning_rate": 1.7368188612583232e-06, "loss": 1.8397, "step": 115740 }, { "epoch": 7.864179915749422, "grad_norm": 3.4617252349853516, "learning_rate": 1.732572360375051e-06, "loss": 2.219, "step": 115745 }, { "epoch": 7.864519635820084, "grad_norm": 3.706963300704956, "learning_rate": 1.7283258594917788e-06, "loss": 1.825, "step": 115750 }, { "epoch": 7.864859355890746, "grad_norm": 4.269167423248291, "learning_rate": 1.7240793586085068e-06, "loss": 2.0019, "step": 115755 }, { "epoch": 7.865199075961407, "grad_norm": 3.444995880126953, "learning_rate": 1.7198328577252346e-06, "loss": 2.1331, "step": 115760 }, { "epoch": 7.8655387960320695, "grad_norm": 3.7515320777893066, "learning_rate": 1.7155863568419622e-06, "loss": 2.1544, "step": 115765 }, { "epoch": 7.865878516102732, "grad_norm": 3.602862596511841, "learning_rate": 1.71133985595869e-06, "loss": 1.9591, "step": 115770 }, { "epoch": 7.866218236173393, "grad_norm": 3.917402744293213, "learning_rate": 1.707093355075418e-06, "loss": 1.9289, "step": 115775 }, { "epoch": 7.866557956244055, "grad_norm": 3.76912784576416, "learning_rate": 1.7028468541921458e-06, "loss": 1.9602, "step": 115780 }, { "epoch": 7.866897676314717, "grad_norm": 3.2739152908325195, "learning_rate": 1.6986003533088736e-06, "loss": 1.8797, "step": 115785 }, { "epoch": 7.867237396385378, "grad_norm": 3.714953660964966, "learning_rate": 1.6943538524256016e-06, "loss": 2.1938, "step": 115790 }, { "epoch": 7.86757711645604, "grad_norm": 4.9161458015441895, "learning_rate": 1.6901073515423292e-06, "loss": 2.0044, "step": 115795 }, { "epoch": 7.867916836526702, "grad_norm": 3.6479744911193848, "learning_rate": 1.685860850659057e-06, "loss": 1.9587, "step": 115800 }, { "epoch": 7.8682565565973634, "grad_norm": 3.8533148765563965, "learning_rate": 1.6816143497757848e-06, "loss": 1.9408, "step": 115805 }, { "epoch": 7.8685962766680255, "grad_norm": 4.819952964782715, "learning_rate": 1.6773678488925128e-06, "loss": 2.0942, "step": 115810 }, { "epoch": 7.868935996738688, "grad_norm": 3.8175559043884277, "learning_rate": 1.6731213480092406e-06, "loss": 2.1674, "step": 115815 }, { "epoch": 7.869275716809349, "grad_norm": 3.1630866527557373, "learning_rate": 1.6688748471259681e-06, "loss": 2.1073, "step": 115820 }, { "epoch": 7.869615436880011, "grad_norm": 3.146026611328125, "learning_rate": 1.664628346242696e-06, "loss": 2.2216, "step": 115825 }, { "epoch": 7.869955156950673, "grad_norm": 3.485522508621216, "learning_rate": 1.660381845359424e-06, "loss": 1.8616, "step": 115830 }, { "epoch": 7.870294877021334, "grad_norm": 4.28233528137207, "learning_rate": 1.6561353444761517e-06, "loss": 1.9097, "step": 115835 }, { "epoch": 7.870634597091996, "grad_norm": 3.319878101348877, "learning_rate": 1.6518888435928795e-06, "loss": 1.9152, "step": 115840 }, { "epoch": 7.870974317162658, "grad_norm": 3.9638988971710205, "learning_rate": 1.6476423427096075e-06, "loss": 1.7902, "step": 115845 }, { "epoch": 7.8713140372333195, "grad_norm": 4.105770111083984, "learning_rate": 1.6433958418263351e-06, "loss": 1.9005, "step": 115850 }, { "epoch": 7.8716537573039815, "grad_norm": 3.611206293106079, "learning_rate": 1.639149340943063e-06, "loss": 1.8193, "step": 115855 }, { "epoch": 7.871993477374644, "grad_norm": 3.76889705657959, "learning_rate": 1.6349028400597907e-06, "loss": 2.1268, "step": 115860 }, { "epoch": 7.872333197445305, "grad_norm": 3.6020898818969727, "learning_rate": 1.6306563391765187e-06, "loss": 2.0772, "step": 115865 }, { "epoch": 7.872672917515967, "grad_norm": 3.9940643310546875, "learning_rate": 1.6264098382932465e-06, "loss": 1.9093, "step": 115870 }, { "epoch": 7.873012637586629, "grad_norm": 3.346332550048828, "learning_rate": 1.6221633374099743e-06, "loss": 2.2705, "step": 115875 }, { "epoch": 7.87335235765729, "grad_norm": 2.3599395751953125, "learning_rate": 1.617916836526702e-06, "loss": 2.0082, "step": 115880 }, { "epoch": 7.873692077727952, "grad_norm": 4.42264986038208, "learning_rate": 1.61367033564343e-06, "loss": 2.0318, "step": 115885 }, { "epoch": 7.874031797798614, "grad_norm": 3.661958694458008, "learning_rate": 1.6094238347601577e-06, "loss": 2.0302, "step": 115890 }, { "epoch": 7.8743715178692755, "grad_norm": 4.139626502990723, "learning_rate": 1.6051773338768855e-06, "loss": 1.8278, "step": 115895 }, { "epoch": 7.8747112379399375, "grad_norm": 3.88539719581604, "learning_rate": 1.6009308329936135e-06, "loss": 1.9783, "step": 115900 }, { "epoch": 7.8750509580106, "grad_norm": 3.766402244567871, "learning_rate": 1.596684332110341e-06, "loss": 1.8458, "step": 115905 }, { "epoch": 7.875390678081261, "grad_norm": 3.962531805038452, "learning_rate": 1.5924378312270689e-06, "loss": 2.0653, "step": 115910 }, { "epoch": 7.875730398151923, "grad_norm": 3.882232904434204, "learning_rate": 1.5881913303437967e-06, "loss": 2.0464, "step": 115915 }, { "epoch": 7.876070118222585, "grad_norm": 3.8543035984039307, "learning_rate": 1.5839448294605247e-06, "loss": 1.887, "step": 115920 }, { "epoch": 7.876409838293246, "grad_norm": 3.598386287689209, "learning_rate": 1.5796983285772525e-06, "loss": 2.1649, "step": 115925 }, { "epoch": 7.876749558363908, "grad_norm": 3.6112332344055176, "learning_rate": 1.5754518276939803e-06, "loss": 2.0346, "step": 115930 }, { "epoch": 7.87708927843457, "grad_norm": 4.18750524520874, "learning_rate": 1.5712053268107079e-06, "loss": 1.6539, "step": 115935 }, { "epoch": 7.8774289985052315, "grad_norm": 4.212338447570801, "learning_rate": 1.5669588259274359e-06, "loss": 1.6687, "step": 115940 }, { "epoch": 7.877768718575894, "grad_norm": 2.977618932723999, "learning_rate": 1.5627123250441637e-06, "loss": 2.052, "step": 115945 }, { "epoch": 7.878108438646555, "grad_norm": 3.7012081146240234, "learning_rate": 1.5584658241608915e-06, "loss": 1.8699, "step": 115950 }, { "epoch": 7.878448158717217, "grad_norm": 4.351744174957275, "learning_rate": 1.5542193232776193e-06, "loss": 1.9467, "step": 115955 }, { "epoch": 7.878787878787879, "grad_norm": 4.128621578216553, "learning_rate": 1.549972822394347e-06, "loss": 2.0685, "step": 115960 }, { "epoch": 7.87912759885854, "grad_norm": 3.247260808944702, "learning_rate": 1.545726321511075e-06, "loss": 1.7929, "step": 115965 }, { "epoch": 7.879467318929202, "grad_norm": 3.434302568435669, "learning_rate": 1.5414798206278027e-06, "loss": 1.9873, "step": 115970 }, { "epoch": 7.879807038999864, "grad_norm": 4.089162349700928, "learning_rate": 1.5372333197445307e-06, "loss": 1.904, "step": 115975 }, { "epoch": 7.880146759070525, "grad_norm": 3.595426559448242, "learning_rate": 1.5329868188612585e-06, "loss": 2.139, "step": 115980 }, { "epoch": 7.8804864791411875, "grad_norm": 3.555763006210327, "learning_rate": 1.5287403179779863e-06, "loss": 1.8999, "step": 115985 }, { "epoch": 7.88082619921185, "grad_norm": 5.107048988342285, "learning_rate": 1.524493817094714e-06, "loss": 1.8155, "step": 115990 }, { "epoch": 7.881165919282511, "grad_norm": 4.387709140777588, "learning_rate": 1.5202473162114418e-06, "loss": 1.7027, "step": 115995 }, { "epoch": 7.881505639353173, "grad_norm": 4.276848316192627, "learning_rate": 1.5160008153281696e-06, "loss": 1.9609, "step": 116000 }, { "epoch": 7.881845359423835, "grad_norm": 4.15111780166626, "learning_rate": 1.5117543144448974e-06, "loss": 1.8693, "step": 116005 }, { "epoch": 7.882185079494496, "grad_norm": 3.5102813243865967, "learning_rate": 1.5075078135616254e-06, "loss": 2.0555, "step": 116010 }, { "epoch": 7.882524799565158, "grad_norm": 4.764276504516602, "learning_rate": 1.503261312678353e-06, "loss": 2.1622, "step": 116015 }, { "epoch": 7.88286451963582, "grad_norm": 3.4000585079193115, "learning_rate": 1.499014811795081e-06, "loss": 2.0612, "step": 116020 }, { "epoch": 7.883204239706481, "grad_norm": 4.174983024597168, "learning_rate": 1.4947683109118086e-06, "loss": 1.9608, "step": 116025 }, { "epoch": 7.8835439597771435, "grad_norm": 3.8590362071990967, "learning_rate": 1.4905218100285366e-06, "loss": 1.9805, "step": 116030 }, { "epoch": 7.883883679847806, "grad_norm": 3.1298413276672363, "learning_rate": 1.4862753091452644e-06, "loss": 1.7273, "step": 116035 }, { "epoch": 7.884223399918467, "grad_norm": 3.2962605953216553, "learning_rate": 1.4820288082619922e-06, "loss": 1.9748, "step": 116040 }, { "epoch": 7.884563119989129, "grad_norm": 3.7575278282165527, "learning_rate": 1.47778230737872e-06, "loss": 1.7706, "step": 116045 }, { "epoch": 7.884902840059791, "grad_norm": 4.139837741851807, "learning_rate": 1.4735358064954478e-06, "loss": 2.1604, "step": 116050 }, { "epoch": 7.885242560130452, "grad_norm": 3.4610085487365723, "learning_rate": 1.4692893056121756e-06, "loss": 2.2076, "step": 116055 }, { "epoch": 7.885582280201114, "grad_norm": 3.6259992122650146, "learning_rate": 1.4650428047289034e-06, "loss": 1.7704, "step": 116060 }, { "epoch": 7.885922000271776, "grad_norm": 3.6467857360839844, "learning_rate": 1.4607963038456314e-06, "loss": 2.055, "step": 116065 }, { "epoch": 7.886261720342437, "grad_norm": 3.1919262409210205, "learning_rate": 1.456549802962359e-06, "loss": 2.0608, "step": 116070 }, { "epoch": 7.8866014404130995, "grad_norm": 3.659466028213501, "learning_rate": 1.452303302079087e-06, "loss": 1.783, "step": 116075 }, { "epoch": 7.886941160483762, "grad_norm": 4.443986415863037, "learning_rate": 1.4480568011958148e-06, "loss": 1.8226, "step": 116080 }, { "epoch": 7.887280880554423, "grad_norm": 4.345930576324463, "learning_rate": 1.4438103003125426e-06, "loss": 1.9283, "step": 116085 }, { "epoch": 7.887620600625085, "grad_norm": 4.042260646820068, "learning_rate": 1.4395637994292704e-06, "loss": 1.8329, "step": 116090 }, { "epoch": 7.887960320695747, "grad_norm": 3.4949615001678467, "learning_rate": 1.4353172985459982e-06, "loss": 1.9963, "step": 116095 }, { "epoch": 7.888300040766408, "grad_norm": 4.60061502456665, "learning_rate": 1.431070797662726e-06, "loss": 2.0918, "step": 116100 }, { "epoch": 7.88863976083707, "grad_norm": 3.2484326362609863, "learning_rate": 1.4268242967794538e-06, "loss": 1.7835, "step": 116105 }, { "epoch": 7.888979480907732, "grad_norm": 3.168569803237915, "learning_rate": 1.4225777958961816e-06, "loss": 1.8231, "step": 116110 }, { "epoch": 7.8893192009783935, "grad_norm": 3.984858989715576, "learning_rate": 1.4183312950129094e-06, "loss": 2.0547, "step": 116115 }, { "epoch": 7.8896589210490555, "grad_norm": 4.300290584564209, "learning_rate": 1.4140847941296374e-06, "loss": 1.8517, "step": 116120 }, { "epoch": 7.889998641119718, "grad_norm": 3.8776931762695312, "learning_rate": 1.409838293246365e-06, "loss": 1.9016, "step": 116125 }, { "epoch": 7.890338361190379, "grad_norm": 3.4668233394622803, "learning_rate": 1.405591792363093e-06, "loss": 1.9813, "step": 116130 }, { "epoch": 7.890678081261041, "grad_norm": 4.375824928283691, "learning_rate": 1.4013452914798208e-06, "loss": 1.9035, "step": 116135 }, { "epoch": 7.891017801331703, "grad_norm": 4.1594767570495605, "learning_rate": 1.3970987905965484e-06, "loss": 1.862, "step": 116140 }, { "epoch": 7.891357521402364, "grad_norm": 3.140568733215332, "learning_rate": 1.3928522897132764e-06, "loss": 1.7587, "step": 116145 }, { "epoch": 7.891697241473026, "grad_norm": 4.511277198791504, "learning_rate": 1.3886057888300042e-06, "loss": 1.8616, "step": 116150 }, { "epoch": 7.892036961543688, "grad_norm": 3.2446155548095703, "learning_rate": 1.384359287946732e-06, "loss": 1.9121, "step": 116155 }, { "epoch": 7.8923766816143495, "grad_norm": 3.8316309452056885, "learning_rate": 1.3801127870634597e-06, "loss": 1.8946, "step": 116160 }, { "epoch": 7.8927164016850115, "grad_norm": 4.181709289550781, "learning_rate": 1.3758662861801878e-06, "loss": 1.9171, "step": 116165 }, { "epoch": 7.893056121755674, "grad_norm": 3.308215379714966, "learning_rate": 1.37246908547357e-06, "loss": 2.0799, "step": 116170 }, { "epoch": 7.893395841826335, "grad_norm": 3.445335865020752, "learning_rate": 1.3682225845902975e-06, "loss": 1.8902, "step": 116175 }, { "epoch": 7.893735561896997, "grad_norm": 3.649076223373413, "learning_rate": 1.3639760837070255e-06, "loss": 1.9563, "step": 116180 }, { "epoch": 7.894075281967659, "grad_norm": 4.165309429168701, "learning_rate": 1.3597295828237533e-06, "loss": 2.2586, "step": 116185 }, { "epoch": 7.89441500203832, "grad_norm": 3.5629072189331055, "learning_rate": 1.3554830819404811e-06, "loss": 2.1296, "step": 116190 }, { "epoch": 7.894754722108982, "grad_norm": 4.351501941680908, "learning_rate": 1.351236581057209e-06, "loss": 1.9159, "step": 116195 }, { "epoch": 7.895094442179644, "grad_norm": 3.756652593612671, "learning_rate": 1.3469900801739367e-06, "loss": 1.8452, "step": 116200 }, { "epoch": 7.8954341622503055, "grad_norm": 3.8586127758026123, "learning_rate": 1.3427435792906645e-06, "loss": 2.058, "step": 116205 }, { "epoch": 7.8957738823209676, "grad_norm": 3.3502960205078125, "learning_rate": 1.3384970784073923e-06, "loss": 1.8393, "step": 116210 }, { "epoch": 7.89611360239163, "grad_norm": 3.072359323501587, "learning_rate": 1.3342505775241201e-06, "loss": 2.098, "step": 116215 }, { "epoch": 7.896453322462291, "grad_norm": 3.631558418273926, "learning_rate": 1.330004076640848e-06, "loss": 2.1315, "step": 116220 }, { "epoch": 7.896793042532953, "grad_norm": 3.464228868484497, "learning_rate": 1.325757575757576e-06, "loss": 2.0576, "step": 116225 }, { "epoch": 7.897132762603615, "grad_norm": 4.118140697479248, "learning_rate": 1.3215110748743035e-06, "loss": 1.9845, "step": 116230 }, { "epoch": 7.897472482674276, "grad_norm": 3.7639553546905518, "learning_rate": 1.3172645739910315e-06, "loss": 1.8146, "step": 116235 }, { "epoch": 7.897812202744938, "grad_norm": 3.8066246509552, "learning_rate": 1.3130180731077593e-06, "loss": 2.0072, "step": 116240 }, { "epoch": 7.8981519228156, "grad_norm": 4.232240200042725, "learning_rate": 1.308771572224487e-06, "loss": 2.0738, "step": 116245 }, { "epoch": 7.8984916428862615, "grad_norm": 3.782294750213623, "learning_rate": 1.304525071341215e-06, "loss": 1.8929, "step": 116250 }, { "epoch": 7.898831362956924, "grad_norm": 3.641383647918701, "learning_rate": 1.3002785704579427e-06, "loss": 1.7051, "step": 116255 }, { "epoch": 7.899171083027586, "grad_norm": 3.2439863681793213, "learning_rate": 1.2960320695746705e-06, "loss": 1.7566, "step": 116260 }, { "epoch": 7.899510803098247, "grad_norm": 4.011711597442627, "learning_rate": 1.2917855686913983e-06, "loss": 2.164, "step": 116265 }, { "epoch": 7.899850523168909, "grad_norm": 4.404182434082031, "learning_rate": 1.2875390678081263e-06, "loss": 2.173, "step": 116270 }, { "epoch": 7.900190243239571, "grad_norm": 5.163379669189453, "learning_rate": 1.2832925669248539e-06, "loss": 1.9915, "step": 116275 }, { "epoch": 7.900529963310232, "grad_norm": 4.168463230133057, "learning_rate": 1.2790460660415819e-06, "loss": 2.015, "step": 116280 }, { "epoch": 7.900869683380894, "grad_norm": 4.4072957038879395, "learning_rate": 1.2747995651583097e-06, "loss": 1.8664, "step": 116285 }, { "epoch": 7.901209403451556, "grad_norm": 3.698612928390503, "learning_rate": 1.2705530642750375e-06, "loss": 1.9317, "step": 116290 }, { "epoch": 7.9015491235222175, "grad_norm": 4.21903657913208, "learning_rate": 1.2663065633917653e-06, "loss": 2.0451, "step": 116295 }, { "epoch": 7.90188884359288, "grad_norm": 3.8771474361419678, "learning_rate": 1.262060062508493e-06, "loss": 1.8221, "step": 116300 }, { "epoch": 7.902228563663542, "grad_norm": 2.8434219360351562, "learning_rate": 1.2578135616252209e-06, "loss": 1.8367, "step": 116305 }, { "epoch": 7.902568283734203, "grad_norm": 4.042159080505371, "learning_rate": 1.2535670607419487e-06, "loss": 1.9288, "step": 116310 }, { "epoch": 7.902908003804865, "grad_norm": 4.000418186187744, "learning_rate": 1.2493205598586765e-06, "loss": 2.0095, "step": 116315 }, { "epoch": 7.903247723875527, "grad_norm": 3.3773908615112305, "learning_rate": 1.2450740589754043e-06, "loss": 2.0469, "step": 116320 }, { "epoch": 7.903587443946188, "grad_norm": 3.747737169265747, "learning_rate": 1.2408275580921323e-06, "loss": 1.9586, "step": 116325 }, { "epoch": 7.90392716401685, "grad_norm": 2.8898136615753174, "learning_rate": 1.2365810572088598e-06, "loss": 1.6775, "step": 116330 }, { "epoch": 7.904266884087512, "grad_norm": 3.7417304515838623, "learning_rate": 1.2323345563255879e-06, "loss": 2.1282, "step": 116335 }, { "epoch": 7.9046066041581735, "grad_norm": 3.929975748062134, "learning_rate": 1.2280880554423156e-06, "loss": 2.0483, "step": 116340 }, { "epoch": 7.904946324228836, "grad_norm": 3.4474642276763916, "learning_rate": 1.2238415545590434e-06, "loss": 1.9479, "step": 116345 }, { "epoch": 7.905286044299498, "grad_norm": 3.799767255783081, "learning_rate": 1.2195950536757712e-06, "loss": 2.1277, "step": 116350 }, { "epoch": 7.905625764370159, "grad_norm": 4.2741498947143555, "learning_rate": 1.215348552792499e-06, "loss": 1.9875, "step": 116355 }, { "epoch": 7.905965484440821, "grad_norm": 3.4364376068115234, "learning_rate": 1.2111020519092268e-06, "loss": 2.0292, "step": 116360 }, { "epoch": 7.906305204511483, "grad_norm": 3.7498066425323486, "learning_rate": 1.2068555510259546e-06, "loss": 2.3284, "step": 116365 }, { "epoch": 7.906644924582144, "grad_norm": 3.4141995906829834, "learning_rate": 1.2026090501426826e-06, "loss": 2.0766, "step": 116370 }, { "epoch": 7.906984644652806, "grad_norm": 3.6010053157806396, "learning_rate": 1.1983625492594102e-06, "loss": 1.6583, "step": 116375 }, { "epoch": 7.907324364723468, "grad_norm": 3.0560970306396484, "learning_rate": 1.1941160483761382e-06, "loss": 1.8389, "step": 116380 }, { "epoch": 7.9076640847941295, "grad_norm": 4.203249454498291, "learning_rate": 1.1898695474928658e-06, "loss": 1.9572, "step": 116385 }, { "epoch": 7.908003804864792, "grad_norm": 3.553330898284912, "learning_rate": 1.1856230466095938e-06, "loss": 2.068, "step": 116390 }, { "epoch": 7.908343524935454, "grad_norm": 3.2602310180664062, "learning_rate": 1.1813765457263216e-06, "loss": 1.7534, "step": 116395 }, { "epoch": 7.908683245006115, "grad_norm": 3.558835029602051, "learning_rate": 1.1771300448430494e-06, "loss": 1.87, "step": 116400 }, { "epoch": 7.909022965076777, "grad_norm": 3.494129180908203, "learning_rate": 1.1728835439597772e-06, "loss": 1.9936, "step": 116405 }, { "epoch": 7.909362685147439, "grad_norm": 4.696293830871582, "learning_rate": 1.168637043076505e-06, "loss": 1.8374, "step": 116410 }, { "epoch": 7.9097024052181, "grad_norm": 3.1238620281219482, "learning_rate": 1.1643905421932328e-06, "loss": 2.0107, "step": 116415 }, { "epoch": 7.910042125288762, "grad_norm": 4.4734697341918945, "learning_rate": 1.1601440413099606e-06, "loss": 2.0159, "step": 116420 }, { "epoch": 7.9103818453594235, "grad_norm": 4.896623611450195, "learning_rate": 1.1558975404266886e-06, "loss": 1.9966, "step": 116425 }, { "epoch": 7.9107215654300855, "grad_norm": 4.239515781402588, "learning_rate": 1.1516510395434162e-06, "loss": 2.1588, "step": 116430 }, { "epoch": 7.911061285500748, "grad_norm": 3.473437547683716, "learning_rate": 1.1474045386601442e-06, "loss": 1.951, "step": 116435 }, { "epoch": 7.911401005571409, "grad_norm": 3.183084487915039, "learning_rate": 1.143158037776872e-06, "loss": 2.05, "step": 116440 }, { "epoch": 7.911740725642071, "grad_norm": 3.675625801086426, "learning_rate": 1.1389115368935998e-06, "loss": 1.7801, "step": 116445 }, { "epoch": 7.912080445712733, "grad_norm": 3.9945499897003174, "learning_rate": 1.1346650360103276e-06, "loss": 1.9527, "step": 116450 }, { "epoch": 7.912420165783394, "grad_norm": 4.5091233253479, "learning_rate": 1.1304185351270554e-06, "loss": 2.0159, "step": 116455 }, { "epoch": 7.912759885854056, "grad_norm": 3.5069572925567627, "learning_rate": 1.1261720342437832e-06, "loss": 2.0707, "step": 116460 }, { "epoch": 7.913099605924718, "grad_norm": 3.990250587463379, "learning_rate": 1.121925533360511e-06, "loss": 1.8625, "step": 116465 }, { "epoch": 7.9134393259953795, "grad_norm": 5.230044841766357, "learning_rate": 1.1176790324772388e-06, "loss": 1.7199, "step": 116470 }, { "epoch": 7.9137790460660415, "grad_norm": 3.5638036727905273, "learning_rate": 1.1134325315939666e-06, "loss": 2.2541, "step": 116475 }, { "epoch": 7.914118766136704, "grad_norm": 3.3912978172302246, "learning_rate": 1.1091860307106946e-06, "loss": 1.9052, "step": 116480 }, { "epoch": 7.914458486207365, "grad_norm": 3.3536267280578613, "learning_rate": 1.1049395298274222e-06, "loss": 2.0312, "step": 116485 }, { "epoch": 7.914798206278027, "grad_norm": 4.289945602416992, "learning_rate": 1.1006930289441502e-06, "loss": 1.9905, "step": 116490 }, { "epoch": 7.915137926348689, "grad_norm": 3.502871036529541, "learning_rate": 1.096446528060878e-06, "loss": 1.8188, "step": 116495 }, { "epoch": 7.91547764641935, "grad_norm": 4.824979782104492, "learning_rate": 1.0922000271776055e-06, "loss": 1.7976, "step": 116500 }, { "epoch": 7.915817366490012, "grad_norm": 4.074272632598877, "learning_rate": 1.0879535262943336e-06, "loss": 1.8166, "step": 116505 }, { "epoch": 7.916157086560674, "grad_norm": 3.3215267658233643, "learning_rate": 1.0837070254110613e-06, "loss": 1.9674, "step": 116510 }, { "epoch": 7.9164968066313355, "grad_norm": 3.8659164905548096, "learning_rate": 1.0794605245277891e-06, "loss": 2.2, "step": 116515 }, { "epoch": 7.916836526701998, "grad_norm": 3.8141422271728516, "learning_rate": 1.075214023644517e-06, "loss": 1.9439, "step": 116520 }, { "epoch": 7.91717624677266, "grad_norm": 4.534931659698486, "learning_rate": 1.070967522761245e-06, "loss": 1.9302, "step": 116525 }, { "epoch": 7.917515966843321, "grad_norm": 3.887213945388794, "learning_rate": 1.0667210218779725e-06, "loss": 2.0313, "step": 116530 }, { "epoch": 7.917855686913983, "grad_norm": 4.231064796447754, "learning_rate": 1.0624745209947005e-06, "loss": 2.143, "step": 116535 }, { "epoch": 7.918195406984645, "grad_norm": 4.064089298248291, "learning_rate": 1.0582280201114283e-06, "loss": 2.221, "step": 116540 }, { "epoch": 7.918535127055306, "grad_norm": 3.3083860874176025, "learning_rate": 1.053981519228156e-06, "loss": 1.9918, "step": 116545 }, { "epoch": 7.918874847125968, "grad_norm": 3.2364578247070312, "learning_rate": 1.049735018344884e-06, "loss": 1.7542, "step": 116550 }, { "epoch": 7.91921456719663, "grad_norm": 3.26157808303833, "learning_rate": 1.0454885174616115e-06, "loss": 2.036, "step": 116555 }, { "epoch": 7.9195542872672915, "grad_norm": 4.036604404449463, "learning_rate": 1.0412420165783395e-06, "loss": 2.1113, "step": 116560 }, { "epoch": 7.919894007337954, "grad_norm": 3.873710870742798, "learning_rate": 1.0369955156950673e-06, "loss": 1.9911, "step": 116565 }, { "epoch": 7.920233727408616, "grad_norm": 4.936915397644043, "learning_rate": 1.0327490148117951e-06, "loss": 1.94, "step": 116570 }, { "epoch": 7.920573447479277, "grad_norm": 3.296506643295288, "learning_rate": 1.028502513928523e-06, "loss": 1.942, "step": 116575 }, { "epoch": 7.920913167549939, "grad_norm": 4.128129482269287, "learning_rate": 1.024256013045251e-06, "loss": 1.8781, "step": 116580 }, { "epoch": 7.921252887620601, "grad_norm": 4.145902156829834, "learning_rate": 1.0200095121619785e-06, "loss": 2.0036, "step": 116585 }, { "epoch": 7.921592607691262, "grad_norm": 5.299164772033691, "learning_rate": 1.0157630112787063e-06, "loss": 1.8401, "step": 116590 }, { "epoch": 7.921932327761924, "grad_norm": 3.248436450958252, "learning_rate": 1.0115165103954343e-06, "loss": 1.7909, "step": 116595 }, { "epoch": 7.922272047832586, "grad_norm": 3.270543098449707, "learning_rate": 1.0072700095121619e-06, "loss": 2.079, "step": 116600 }, { "epoch": 7.9226117679032475, "grad_norm": 3.064565420150757, "learning_rate": 1.0030235086288899e-06, "loss": 1.8513, "step": 116605 }, { "epoch": 7.92295148797391, "grad_norm": 4.7815704345703125, "learning_rate": 9.987770077456177e-07, "loss": 1.8519, "step": 116610 }, { "epoch": 7.923291208044572, "grad_norm": 4.172384262084961, "learning_rate": 9.945305068623455e-07, "loss": 1.973, "step": 116615 }, { "epoch": 7.923630928115233, "grad_norm": 3.8238136768341064, "learning_rate": 9.902840059790733e-07, "loss": 2.1671, "step": 116620 }, { "epoch": 7.923970648185895, "grad_norm": 4.2690629959106445, "learning_rate": 9.860375050958013e-07, "loss": 1.9985, "step": 116625 }, { "epoch": 7.924310368256556, "grad_norm": 3.4368233680725098, "learning_rate": 9.817910042125289e-07, "loss": 1.9464, "step": 116630 }, { "epoch": 7.924650088327218, "grad_norm": 4.610239028930664, "learning_rate": 9.775445033292569e-07, "loss": 1.9543, "step": 116635 }, { "epoch": 7.92498980839788, "grad_norm": 4.130005836486816, "learning_rate": 9.732980024459845e-07, "loss": 2.0866, "step": 116640 }, { "epoch": 7.925329528468541, "grad_norm": 3.4773929119110107, "learning_rate": 9.690515015627123e-07, "loss": 2.1076, "step": 116645 }, { "epoch": 7.9256692485392035, "grad_norm": 3.854094982147217, "learning_rate": 9.648050006794403e-07, "loss": 2.0951, "step": 116650 }, { "epoch": 7.926008968609866, "grad_norm": 4.161646842956543, "learning_rate": 9.605584997961679e-07, "loss": 2.1653, "step": 116655 }, { "epoch": 7.926348688680527, "grad_norm": 4.212918758392334, "learning_rate": 9.563119989128959e-07, "loss": 2.0737, "step": 116660 }, { "epoch": 7.926688408751189, "grad_norm": 3.141892433166504, "learning_rate": 9.520654980296237e-07, "loss": 2.1849, "step": 116665 }, { "epoch": 7.927028128821851, "grad_norm": 3.2542190551757812, "learning_rate": 9.478189971463513e-07, "loss": 2.0413, "step": 116670 }, { "epoch": 7.927367848892512, "grad_norm": 3.9499385356903076, "learning_rate": 9.435724962630792e-07, "loss": 2.0176, "step": 116675 }, { "epoch": 7.927707568963174, "grad_norm": 4.313296794891357, "learning_rate": 9.393259953798071e-07, "loss": 1.8858, "step": 116680 }, { "epoch": 7.928047289033836, "grad_norm": 3.337249755859375, "learning_rate": 9.350794944965348e-07, "loss": 1.9748, "step": 116685 }, { "epoch": 7.9283870091044975, "grad_norm": 3.7073025703430176, "learning_rate": 9.308329936132627e-07, "loss": 2.09, "step": 116690 }, { "epoch": 7.9287267291751595, "grad_norm": 3.577115774154663, "learning_rate": 9.265864927299906e-07, "loss": 1.8413, "step": 116695 }, { "epoch": 7.929066449245822, "grad_norm": 4.3143744468688965, "learning_rate": 9.223399918467183e-07, "loss": 2.0694, "step": 116700 }, { "epoch": 7.929406169316483, "grad_norm": 4.061339378356934, "learning_rate": 9.180934909634462e-07, "loss": 1.8058, "step": 116705 }, { "epoch": 7.929745889387145, "grad_norm": 3.7295637130737305, "learning_rate": 9.138469900801739e-07, "loss": 2.0506, "step": 116710 }, { "epoch": 7.930085609457807, "grad_norm": 4.061147212982178, "learning_rate": 9.096004891969017e-07, "loss": 2.1146, "step": 116715 }, { "epoch": 7.930425329528468, "grad_norm": 4.121035575866699, "learning_rate": 9.053539883136296e-07, "loss": 1.9539, "step": 116720 }, { "epoch": 7.93076504959913, "grad_norm": 3.727578639984131, "learning_rate": 9.011074874303573e-07, "loss": 2.2325, "step": 116725 }, { "epoch": 7.931104769669792, "grad_norm": 4.002089500427246, "learning_rate": 8.968609865470852e-07, "loss": 1.9162, "step": 116730 }, { "epoch": 7.9314444897404535, "grad_norm": 4.518353462219238, "learning_rate": 8.926144856638131e-07, "loss": 1.7018, "step": 116735 }, { "epoch": 7.9317842098111155, "grad_norm": 3.2488293647766113, "learning_rate": 8.883679847805408e-07, "loss": 1.7841, "step": 116740 }, { "epoch": 7.932123929881778, "grad_norm": 3.7604825496673584, "learning_rate": 8.841214838972687e-07, "loss": 1.981, "step": 116745 }, { "epoch": 7.932463649952439, "grad_norm": 3.539240598678589, "learning_rate": 8.798749830139966e-07, "loss": 1.7448, "step": 116750 }, { "epoch": 7.932803370023101, "grad_norm": 4.496775150299072, "learning_rate": 8.756284821307243e-07, "loss": 1.8151, "step": 116755 }, { "epoch": 7.933143090093763, "grad_norm": 3.8837616443634033, "learning_rate": 8.713819812474521e-07, "loss": 1.9336, "step": 116760 }, { "epoch": 7.933482810164424, "grad_norm": 3.473480701446533, "learning_rate": 8.6713548036418e-07, "loss": 2.0519, "step": 116765 }, { "epoch": 7.933822530235086, "grad_norm": 4.114175796508789, "learning_rate": 8.628889794809077e-07, "loss": 2.2647, "step": 116770 }, { "epoch": 7.934162250305748, "grad_norm": 3.700575351715088, "learning_rate": 8.586424785976356e-07, "loss": 1.9678, "step": 116775 }, { "epoch": 7.9345019703764095, "grad_norm": 4.656760215759277, "learning_rate": 8.543959777143635e-07, "loss": 1.6762, "step": 116780 }, { "epoch": 7.9348416904470715, "grad_norm": 4.0159382820129395, "learning_rate": 8.501494768310912e-07, "loss": 2.2399, "step": 116785 }, { "epoch": 7.935181410517734, "grad_norm": 4.3136515617370605, "learning_rate": 8.459029759478191e-07, "loss": 1.8553, "step": 116790 }, { "epoch": 7.935521130588395, "grad_norm": 4.012014389038086, "learning_rate": 8.416564750645468e-07, "loss": 1.834, "step": 116795 }, { "epoch": 7.935860850659057, "grad_norm": 4.0195631980896, "learning_rate": 8.374099741812747e-07, "loss": 1.8819, "step": 116800 }, { "epoch": 7.936200570729719, "grad_norm": 4.532683372497559, "learning_rate": 8.331634732980026e-07, "loss": 1.947, "step": 116805 }, { "epoch": 7.93654029080038, "grad_norm": 3.4835469722747803, "learning_rate": 8.289169724147303e-07, "loss": 1.9868, "step": 116810 }, { "epoch": 7.936880010871042, "grad_norm": 3.6749415397644043, "learning_rate": 8.246704715314581e-07, "loss": 2.103, "step": 116815 }, { "epoch": 7.937219730941704, "grad_norm": 2.8589026927948, "learning_rate": 8.20423970648186e-07, "loss": 1.8549, "step": 116820 }, { "epoch": 7.9375594510123655, "grad_norm": 3.5612034797668457, "learning_rate": 8.161774697649137e-07, "loss": 1.8526, "step": 116825 }, { "epoch": 7.937899171083028, "grad_norm": 3.447091817855835, "learning_rate": 8.119309688816416e-07, "loss": 1.9101, "step": 116830 }, { "epoch": 7.93823889115369, "grad_norm": 3.887425661087036, "learning_rate": 8.076844679983695e-07, "loss": 2.2502, "step": 116835 }, { "epoch": 7.938578611224351, "grad_norm": 3.9389896392822266, "learning_rate": 8.034379671150972e-07, "loss": 1.5967, "step": 116840 }, { "epoch": 7.938918331295013, "grad_norm": 3.4263134002685547, "learning_rate": 7.99191466231825e-07, "loss": 2.1306, "step": 116845 }, { "epoch": 7.939258051365675, "grad_norm": 4.382199764251709, "learning_rate": 7.94944965348553e-07, "loss": 1.9659, "step": 116850 }, { "epoch": 7.939597771436336, "grad_norm": 3.0090997219085693, "learning_rate": 7.906984644652806e-07, "loss": 1.7489, "step": 116855 }, { "epoch": 7.939937491506998, "grad_norm": 3.0421841144561768, "learning_rate": 7.864519635820084e-07, "loss": 1.9883, "step": 116860 }, { "epoch": 7.94027721157766, "grad_norm": 4.724289417266846, "learning_rate": 7.822054626987363e-07, "loss": 1.9684, "step": 116865 }, { "epoch": 7.9406169316483215, "grad_norm": 3.9519402980804443, "learning_rate": 7.77958961815464e-07, "loss": 1.7924, "step": 116870 }, { "epoch": 7.940956651718984, "grad_norm": 4.207431316375732, "learning_rate": 7.737124609321919e-07, "loss": 2.066, "step": 116875 }, { "epoch": 7.941296371789646, "grad_norm": 3.2433886528015137, "learning_rate": 7.694659600489197e-07, "loss": 2.1464, "step": 116880 }, { "epoch": 7.941636091860307, "grad_norm": 4.466742515563965, "learning_rate": 7.652194591656475e-07, "loss": 2.0307, "step": 116885 }, { "epoch": 7.941975811930969, "grad_norm": 4.165228843688965, "learning_rate": 7.609729582823754e-07, "loss": 2.028, "step": 116890 }, { "epoch": 7.942315532001631, "grad_norm": 4.452939033508301, "learning_rate": 7.567264573991032e-07, "loss": 2.0382, "step": 116895 }, { "epoch": 7.942655252072292, "grad_norm": 3.606454849243164, "learning_rate": 7.52479956515831e-07, "loss": 1.9659, "step": 116900 }, { "epoch": 7.942994972142954, "grad_norm": 3.1753945350646973, "learning_rate": 7.482334556325588e-07, "loss": 1.8771, "step": 116905 }, { "epoch": 7.943334692213616, "grad_norm": 2.887334108352661, "learning_rate": 7.439869547492866e-07, "loss": 1.9588, "step": 116910 }, { "epoch": 7.9436744122842775, "grad_norm": 3.928378105163574, "learning_rate": 7.397404538660144e-07, "loss": 2.0868, "step": 116915 }, { "epoch": 7.94401413235494, "grad_norm": 4.217980861663818, "learning_rate": 7.354939529827422e-07, "loss": 2.0812, "step": 116920 }, { "epoch": 7.944353852425602, "grad_norm": 3.377253293991089, "learning_rate": 7.312474520994701e-07, "loss": 1.9919, "step": 116925 }, { "epoch": 7.944693572496263, "grad_norm": 3.75869083404541, "learning_rate": 7.270009512161979e-07, "loss": 1.8176, "step": 116930 }, { "epoch": 7.945033292566925, "grad_norm": 3.5157253742218018, "learning_rate": 7.227544503329257e-07, "loss": 2.299, "step": 116935 }, { "epoch": 7.945373012637587, "grad_norm": 3.0902161598205566, "learning_rate": 7.185079494496535e-07, "loss": 1.7747, "step": 116940 }, { "epoch": 7.945712732708248, "grad_norm": 3.2831027507781982, "learning_rate": 7.142614485663814e-07, "loss": 1.9906, "step": 116945 }, { "epoch": 7.94605245277891, "grad_norm": 4.211105823516846, "learning_rate": 7.100149476831092e-07, "loss": 2.101, "step": 116950 }, { "epoch": 7.946392172849572, "grad_norm": 3.0306355953216553, "learning_rate": 7.05768446799837e-07, "loss": 2.0579, "step": 116955 }, { "epoch": 7.9467318929202335, "grad_norm": 3.995325803756714, "learning_rate": 7.015219459165648e-07, "loss": 1.9472, "step": 116960 }, { "epoch": 7.947071612990896, "grad_norm": 3.4329159259796143, "learning_rate": 6.972754450332926e-07, "loss": 1.8628, "step": 116965 }, { "epoch": 7.947411333061558, "grad_norm": 3.0071895122528076, "learning_rate": 6.930289441500204e-07, "loss": 1.7587, "step": 116970 }, { "epoch": 7.947751053132219, "grad_norm": 3.7576539516448975, "learning_rate": 6.887824432667483e-07, "loss": 1.7828, "step": 116975 }, { "epoch": 7.948090773202881, "grad_norm": 4.0837321281433105, "learning_rate": 6.845359423834761e-07, "loss": 1.9141, "step": 116980 }, { "epoch": 7.948430493273543, "grad_norm": 3.9012038707733154, "learning_rate": 6.802894415002039e-07, "loss": 1.862, "step": 116985 }, { "epoch": 7.948770213344204, "grad_norm": 5.622314929962158, "learning_rate": 6.760429406169317e-07, "loss": 1.9951, "step": 116990 }, { "epoch": 7.949109933414866, "grad_norm": 3.840782880783081, "learning_rate": 6.717964397336596e-07, "loss": 1.9384, "step": 116995 }, { "epoch": 7.949449653485528, "grad_norm": 4.2165093421936035, "learning_rate": 6.675499388503874e-07, "loss": 2.1274, "step": 117000 }, { "epoch": 7.9497893735561895, "grad_norm": 3.4927310943603516, "learning_rate": 6.633034379671152e-07, "loss": 1.9058, "step": 117005 }, { "epoch": 7.950129093626852, "grad_norm": 3.942307472229004, "learning_rate": 6.59056937083843e-07, "loss": 1.7017, "step": 117010 }, { "epoch": 7.950468813697514, "grad_norm": 3.425184726715088, "learning_rate": 6.548104362005707e-07, "loss": 1.9237, "step": 117015 }, { "epoch": 7.950808533768175, "grad_norm": 3.0855329036712646, "learning_rate": 6.505639353172985e-07, "loss": 1.7556, "step": 117020 }, { "epoch": 7.951148253838837, "grad_norm": 4.176064491271973, "learning_rate": 6.463174344340263e-07, "loss": 1.7383, "step": 117025 }, { "epoch": 7.951487973909499, "grad_norm": 3.488562822341919, "learning_rate": 6.420709335507542e-07, "loss": 2.0512, "step": 117030 }, { "epoch": 7.95182769398016, "grad_norm": 3.922152042388916, "learning_rate": 6.37824432667482e-07, "loss": 2.0189, "step": 117035 }, { "epoch": 7.952167414050822, "grad_norm": 3.379183053970337, "learning_rate": 6.335779317842098e-07, "loss": 1.9069, "step": 117040 }, { "epoch": 7.952507134121484, "grad_norm": 2.756080150604248, "learning_rate": 6.293314309009377e-07, "loss": 1.9911, "step": 117045 }, { "epoch": 7.9528468541921455, "grad_norm": 4.0472636222839355, "learning_rate": 6.250849300176655e-07, "loss": 1.9983, "step": 117050 }, { "epoch": 7.953186574262808, "grad_norm": 3.866424083709717, "learning_rate": 6.208384291343932e-07, "loss": 1.8781, "step": 117055 }, { "epoch": 7.95352629433347, "grad_norm": 4.139420986175537, "learning_rate": 6.16591928251121e-07, "loss": 2.0656, "step": 117060 }, { "epoch": 7.953866014404131, "grad_norm": 2.9292290210723877, "learning_rate": 6.123454273678489e-07, "loss": 1.8479, "step": 117065 }, { "epoch": 7.954205734474793, "grad_norm": 3.213987112045288, "learning_rate": 6.080989264845767e-07, "loss": 1.9051, "step": 117070 }, { "epoch": 7.954545454545455, "grad_norm": 4.401655673980713, "learning_rate": 6.038524256013045e-07, "loss": 1.8719, "step": 117075 }, { "epoch": 7.954885174616116, "grad_norm": 4.32890510559082, "learning_rate": 5.996059247180324e-07, "loss": 1.856, "step": 117080 }, { "epoch": 7.955224894686778, "grad_norm": 4.6539435386657715, "learning_rate": 5.953594238347602e-07, "loss": 1.871, "step": 117085 }, { "epoch": 7.95556461475744, "grad_norm": 4.032464504241943, "learning_rate": 5.91112922951488e-07, "loss": 1.8235, "step": 117090 }, { "epoch": 7.9559043348281016, "grad_norm": 3.355410099029541, "learning_rate": 5.868664220682159e-07, "loss": 1.9136, "step": 117095 }, { "epoch": 7.956244054898764, "grad_norm": 3.159343957901001, "learning_rate": 5.826199211849436e-07, "loss": 2.028, "step": 117100 }, { "epoch": 7.956583774969425, "grad_norm": 3.256946563720703, "learning_rate": 5.783734203016714e-07, "loss": 2.0293, "step": 117105 }, { "epoch": 7.956923495040087, "grad_norm": 4.699840545654297, "learning_rate": 5.741269194183992e-07, "loss": 1.9324, "step": 117110 }, { "epoch": 7.957263215110749, "grad_norm": 4.259019374847412, "learning_rate": 5.698804185351271e-07, "loss": 2.0537, "step": 117115 }, { "epoch": 7.95760293518141, "grad_norm": 3.8224825859069824, "learning_rate": 5.656339176518549e-07, "loss": 1.8818, "step": 117120 }, { "epoch": 7.957942655252072, "grad_norm": 3.8094065189361572, "learning_rate": 5.613874167685827e-07, "loss": 1.8306, "step": 117125 }, { "epoch": 7.958282375322734, "grad_norm": 3.610823392868042, "learning_rate": 5.571409158853106e-07, "loss": 1.6172, "step": 117130 }, { "epoch": 7.9586220953933955, "grad_norm": 2.702941417694092, "learning_rate": 5.528944150020384e-07, "loss": 2.124, "step": 117135 }, { "epoch": 7.958961815464058, "grad_norm": 3.491741418838501, "learning_rate": 5.486479141187662e-07, "loss": 2.175, "step": 117140 }, { "epoch": 7.95930153553472, "grad_norm": 3.5296247005462646, "learning_rate": 5.44401413235494e-07, "loss": 2.1921, "step": 117145 }, { "epoch": 7.959641255605381, "grad_norm": 5.243984699249268, "learning_rate": 5.401549123522218e-07, "loss": 1.999, "step": 117150 }, { "epoch": 7.959980975676043, "grad_norm": 4.607972621917725, "learning_rate": 5.359084114689496e-07, "loss": 1.9268, "step": 117155 }, { "epoch": 7.960320695746705, "grad_norm": 4.030585289001465, "learning_rate": 5.316619105856774e-07, "loss": 2.132, "step": 117160 }, { "epoch": 7.960660415817366, "grad_norm": 3.0101237297058105, "learning_rate": 5.274154097024053e-07, "loss": 1.9386, "step": 117165 }, { "epoch": 7.961000135888028, "grad_norm": 3.98048734664917, "learning_rate": 5.231689088191331e-07, "loss": 1.7139, "step": 117170 }, { "epoch": 7.96133985595869, "grad_norm": 4.642879009246826, "learning_rate": 5.189224079358609e-07, "loss": 1.8546, "step": 117175 }, { "epoch": 7.9616795760293515, "grad_norm": 3.635637044906616, "learning_rate": 5.146759070525888e-07, "loss": 1.9008, "step": 117180 }, { "epoch": 7.962019296100014, "grad_norm": 3.859036922454834, "learning_rate": 5.104294061693166e-07, "loss": 1.8778, "step": 117185 }, { "epoch": 7.962359016170676, "grad_norm": 4.094803810119629, "learning_rate": 5.061829052860443e-07, "loss": 1.9444, "step": 117190 }, { "epoch": 7.962698736241337, "grad_norm": 4.314974308013916, "learning_rate": 5.019364044027721e-07, "loss": 1.8815, "step": 117195 }, { "epoch": 7.963038456311999, "grad_norm": 3.112053632736206, "learning_rate": 4.976899035194999e-07, "loss": 2.1581, "step": 117200 }, { "epoch": 7.963378176382661, "grad_norm": 3.5502495765686035, "learning_rate": 4.934434026362277e-07, "loss": 1.9785, "step": 117205 }, { "epoch": 7.963717896453322, "grad_norm": 3.8460419178009033, "learning_rate": 4.891969017529555e-07, "loss": 2.0742, "step": 117210 }, { "epoch": 7.964057616523984, "grad_norm": 4.097157955169678, "learning_rate": 4.849504008696834e-07, "loss": 2.0851, "step": 117215 }, { "epoch": 7.964397336594646, "grad_norm": 4.482254505157471, "learning_rate": 4.807038999864112e-07, "loss": 1.9459, "step": 117220 }, { "epoch": 7.9647370566653075, "grad_norm": 3.9012458324432373, "learning_rate": 4.7645739910313903e-07, "loss": 1.9473, "step": 117225 }, { "epoch": 7.96507677673597, "grad_norm": 3.378058671951294, "learning_rate": 4.722108982198668e-07, "loss": 1.9406, "step": 117230 }, { "epoch": 7.965416496806632, "grad_norm": 3.4977529048919678, "learning_rate": 4.6796439733659467e-07, "loss": 1.9641, "step": 117235 }, { "epoch": 7.965756216877293, "grad_norm": 3.149235486984253, "learning_rate": 4.6371789645332247e-07, "loss": 2.1236, "step": 117240 }, { "epoch": 7.966095936947955, "grad_norm": 4.394367694854736, "learning_rate": 4.5947139557005026e-07, "loss": 2.3124, "step": 117245 }, { "epoch": 7.966435657018617, "grad_norm": 2.9763734340667725, "learning_rate": 4.5522489468677817e-07, "loss": 1.9065, "step": 117250 }, { "epoch": 7.966775377089278, "grad_norm": 3.369997978210449, "learning_rate": 4.5097839380350596e-07, "loss": 1.9451, "step": 117255 }, { "epoch": 7.96711509715994, "grad_norm": 3.5887482166290283, "learning_rate": 4.4673189292023376e-07, "loss": 1.8918, "step": 117260 }, { "epoch": 7.967454817230602, "grad_norm": 3.919062614440918, "learning_rate": 4.424853920369615e-07, "loss": 1.9712, "step": 117265 }, { "epoch": 7.9677945373012635, "grad_norm": 4.214376449584961, "learning_rate": 4.382388911536894e-07, "loss": 2.1732, "step": 117270 }, { "epoch": 7.968134257371926, "grad_norm": 3.9505410194396973, "learning_rate": 4.339923902704172e-07, "loss": 1.9818, "step": 117275 }, { "epoch": 7.968473977442588, "grad_norm": 3.4266834259033203, "learning_rate": 4.29745889387145e-07, "loss": 1.9246, "step": 117280 }, { "epoch": 7.968813697513249, "grad_norm": 3.8758251667022705, "learning_rate": 4.2549938850387284e-07, "loss": 1.9083, "step": 117285 }, { "epoch": 7.969153417583911, "grad_norm": 3.3798909187316895, "learning_rate": 4.2125288762060064e-07, "loss": 2.0619, "step": 117290 }, { "epoch": 7.969493137654573, "grad_norm": 2.9098246097564697, "learning_rate": 4.1700638673732844e-07, "loss": 1.9518, "step": 117295 }, { "epoch": 7.969832857725234, "grad_norm": 5.076226234436035, "learning_rate": 4.1275988585405634e-07, "loss": 2.0231, "step": 117300 }, { "epoch": 7.970172577795896, "grad_norm": 3.978713274002075, "learning_rate": 4.0851338497078413e-07, "loss": 2.0783, "step": 117305 }, { "epoch": 7.9705122978665575, "grad_norm": 3.6759657859802246, "learning_rate": 4.042668840875119e-07, "loss": 1.6111, "step": 117310 }, { "epoch": 7.9708520179372195, "grad_norm": 3.4348442554473877, "learning_rate": 4.0002038320423967e-07, "loss": 1.5993, "step": 117315 }, { "epoch": 7.971191738007882, "grad_norm": 4.3190178871154785, "learning_rate": 3.957738823209676e-07, "loss": 2.065, "step": 117320 }, { "epoch": 7.971531458078543, "grad_norm": 3.9503984451293945, "learning_rate": 3.9152738143769537e-07, "loss": 1.9739, "step": 117325 }, { "epoch": 7.971871178149205, "grad_norm": 3.6566336154937744, "learning_rate": 3.8728088055442317e-07, "loss": 2.0279, "step": 117330 }, { "epoch": 7.972210898219867, "grad_norm": 3.7265918254852295, "learning_rate": 3.8303437967115096e-07, "loss": 2.0746, "step": 117335 }, { "epoch": 7.972550618290528, "grad_norm": 3.8336586952209473, "learning_rate": 3.787878787878788e-07, "loss": 2.1259, "step": 117340 }, { "epoch": 7.97289033836119, "grad_norm": 3.3821775913238525, "learning_rate": 3.745413779046066e-07, "loss": 2.0203, "step": 117345 }, { "epoch": 7.973230058431852, "grad_norm": 4.285416126251221, "learning_rate": 3.7029487702133446e-07, "loss": 1.7119, "step": 117350 }, { "epoch": 7.9735697785025135, "grad_norm": 4.216240882873535, "learning_rate": 3.6604837613806225e-07, "loss": 2.2027, "step": 117355 }, { "epoch": 7.9739094985731755, "grad_norm": 4.352085590362549, "learning_rate": 3.6180187525479005e-07, "loss": 2.1511, "step": 117360 }, { "epoch": 7.974249218643838, "grad_norm": 3.991429090499878, "learning_rate": 3.575553743715179e-07, "loss": 1.7782, "step": 117365 }, { "epoch": 7.974588938714499, "grad_norm": 5.310370445251465, "learning_rate": 3.533088734882457e-07, "loss": 1.8892, "step": 117370 }, { "epoch": 7.974928658785161, "grad_norm": 4.452615737915039, "learning_rate": 3.4906237260497354e-07, "loss": 2.1306, "step": 117375 }, { "epoch": 7.975268378855823, "grad_norm": 3.9654548168182373, "learning_rate": 3.4481587172170134e-07, "loss": 2.0386, "step": 117380 }, { "epoch": 7.975608098926484, "grad_norm": 3.172499418258667, "learning_rate": 3.4056937083842913e-07, "loss": 2.023, "step": 117385 }, { "epoch": 7.975947818997146, "grad_norm": 3.846679449081421, "learning_rate": 3.36322869955157e-07, "loss": 1.8397, "step": 117390 }, { "epoch": 7.976287539067808, "grad_norm": 3.569328546524048, "learning_rate": 3.320763690718848e-07, "loss": 2.0133, "step": 117395 }, { "epoch": 7.9766272591384695, "grad_norm": 3.4628794193267822, "learning_rate": 3.2782986818861263e-07, "loss": 1.9397, "step": 117400 }, { "epoch": 7.976966979209132, "grad_norm": 2.9581549167633057, "learning_rate": 3.235833673053404e-07, "loss": 2.029, "step": 117405 }, { "epoch": 7.977306699279794, "grad_norm": 4.513434410095215, "learning_rate": 3.193368664220682e-07, "loss": 2.0538, "step": 117410 }, { "epoch": 7.977646419350455, "grad_norm": 3.343625545501709, "learning_rate": 3.1509036553879607e-07, "loss": 1.9269, "step": 117415 }, { "epoch": 7.977986139421117, "grad_norm": 3.3070459365844727, "learning_rate": 3.1084386465552386e-07, "loss": 1.9371, "step": 117420 }, { "epoch": 7.978325859491779, "grad_norm": 3.745771646499634, "learning_rate": 3.065973637722517e-07, "loss": 2.3872, "step": 117425 }, { "epoch": 7.97866557956244, "grad_norm": 4.200163841247559, "learning_rate": 3.023508628889795e-07, "loss": 1.9163, "step": 117430 }, { "epoch": 7.979005299633102, "grad_norm": 4.665971279144287, "learning_rate": 2.981043620057073e-07, "loss": 2.201, "step": 117435 }, { "epoch": 7.979345019703764, "grad_norm": 4.731399059295654, "learning_rate": 2.938578611224351e-07, "loss": 2.1241, "step": 117440 }, { "epoch": 7.9796847397744255, "grad_norm": 3.5066635608673096, "learning_rate": 2.8961136023916295e-07, "loss": 2.3349, "step": 117445 }, { "epoch": 7.980024459845088, "grad_norm": 4.2937445640563965, "learning_rate": 2.853648593558908e-07, "loss": 1.9702, "step": 117450 }, { "epoch": 7.98036417991575, "grad_norm": 3.5675384998321533, "learning_rate": 2.8111835847261854e-07, "loss": 1.9576, "step": 117455 }, { "epoch": 7.980703899986411, "grad_norm": 4.3487982749938965, "learning_rate": 2.768718575893464e-07, "loss": 2.0848, "step": 117460 }, { "epoch": 7.981043620057073, "grad_norm": 3.4466826915740967, "learning_rate": 2.726253567060742e-07, "loss": 1.8224, "step": 117465 }, { "epoch": 7.981383340127735, "grad_norm": 4.188712120056152, "learning_rate": 2.6837885582280204e-07, "loss": 2.0112, "step": 117470 }, { "epoch": 7.981723060198396, "grad_norm": 3.7285969257354736, "learning_rate": 2.641323549395299e-07, "loss": 2.1175, "step": 117475 }, { "epoch": 7.982062780269058, "grad_norm": 3.4552183151245117, "learning_rate": 2.5988585405625763e-07, "loss": 1.9271, "step": 117480 }, { "epoch": 7.98240250033972, "grad_norm": 2.9262516498565674, "learning_rate": 2.556393531729855e-07, "loss": 1.9046, "step": 117485 }, { "epoch": 7.9827422204103815, "grad_norm": 4.189445495605469, "learning_rate": 2.5139285228971327e-07, "loss": 2.3402, "step": 117490 }, { "epoch": 7.983081940481044, "grad_norm": 3.8085711002349854, "learning_rate": 2.471463514064411e-07, "loss": 1.7088, "step": 117495 }, { "epoch": 7.983421660551706, "grad_norm": 3.4498841762542725, "learning_rate": 2.428998505231689e-07, "loss": 2.1472, "step": 117500 }, { "epoch": 7.983761380622367, "grad_norm": 3.6001839637756348, "learning_rate": 2.386533496398967e-07, "loss": 2.0019, "step": 117505 }, { "epoch": 7.984101100693029, "grad_norm": 3.1576731204986572, "learning_rate": 2.3440684875662456e-07, "loss": 2.1469, "step": 117510 }, { "epoch": 7.984440820763691, "grad_norm": 3.22092342376709, "learning_rate": 2.3016034787335236e-07, "loss": 2.0104, "step": 117515 }, { "epoch": 7.984780540834352, "grad_norm": 3.3810276985168457, "learning_rate": 2.2591384699008018e-07, "loss": 2.061, "step": 117520 }, { "epoch": 7.985120260905014, "grad_norm": 3.2200560569763184, "learning_rate": 2.2166734610680798e-07, "loss": 1.9557, "step": 117525 }, { "epoch": 7.985459980975676, "grad_norm": 4.137302398681641, "learning_rate": 2.1742084522353583e-07, "loss": 1.8412, "step": 117530 }, { "epoch": 7.9857997010463375, "grad_norm": 3.438732385635376, "learning_rate": 2.1317434434026365e-07, "loss": 2.0486, "step": 117535 }, { "epoch": 7.986139421117, "grad_norm": 3.7057554721832275, "learning_rate": 2.0892784345699144e-07, "loss": 1.6887, "step": 117540 }, { "epoch": 7.986479141187662, "grad_norm": 2.888857364654541, "learning_rate": 2.0468134257371927e-07, "loss": 1.9734, "step": 117545 }, { "epoch": 7.986818861258323, "grad_norm": 3.683912754058838, "learning_rate": 2.0043484169044706e-07, "loss": 1.8432, "step": 117550 }, { "epoch": 7.987158581328985, "grad_norm": 3.5749433040618896, "learning_rate": 1.961883408071749e-07, "loss": 1.8619, "step": 117555 }, { "epoch": 7.987498301399647, "grad_norm": 3.1045455932617188, "learning_rate": 1.919418399239027e-07, "loss": 2.0683, "step": 117560 }, { "epoch": 7.987838021470308, "grad_norm": 3.5389633178710938, "learning_rate": 1.8769533904063053e-07, "loss": 2.1692, "step": 117565 }, { "epoch": 7.98817774154097, "grad_norm": 3.7941033840179443, "learning_rate": 1.8344883815735833e-07, "loss": 2.0047, "step": 117570 }, { "epoch": 7.988517461611632, "grad_norm": 4.137533664703369, "learning_rate": 1.7920233727408617e-07, "loss": 1.7695, "step": 117575 }, { "epoch": 7.9888571816822935, "grad_norm": 3.9824321269989014, "learning_rate": 1.74955836390814e-07, "loss": 1.9404, "step": 117580 }, { "epoch": 7.989196901752956, "grad_norm": 3.908612012863159, "learning_rate": 1.707093355075418e-07, "loss": 1.8724, "step": 117585 }, { "epoch": 7.989536621823618, "grad_norm": 5.197097301483154, "learning_rate": 1.6646283462426961e-07, "loss": 1.8963, "step": 117590 }, { "epoch": 7.989876341894279, "grad_norm": 3.1507394313812256, "learning_rate": 1.622163337409974e-07, "loss": 1.9691, "step": 117595 }, { "epoch": 7.990216061964941, "grad_norm": 3.0684890747070312, "learning_rate": 1.5796983285772523e-07, "loss": 2.0286, "step": 117600 }, { "epoch": 7.990555782035603, "grad_norm": 3.7600553035736084, "learning_rate": 1.5372333197445306e-07, "loss": 2.0584, "step": 117605 }, { "epoch": 7.990895502106264, "grad_norm": 3.524498462677002, "learning_rate": 1.4947683109118088e-07, "loss": 1.8835, "step": 117610 }, { "epoch": 7.991235222176926, "grad_norm": 3.387265205383301, "learning_rate": 1.452303302079087e-07, "loss": 1.8463, "step": 117615 }, { "epoch": 7.991574942247588, "grad_norm": 4.054198741912842, "learning_rate": 1.409838293246365e-07, "loss": 2.1341, "step": 117620 }, { "epoch": 7.9919146623182495, "grad_norm": 4.470441818237305, "learning_rate": 1.3673732844136432e-07, "loss": 2.121, "step": 117625 }, { "epoch": 7.992254382388912, "grad_norm": 2.7322256565093994, "learning_rate": 1.3249082755809214e-07, "loss": 2.1215, "step": 117630 }, { "epoch": 7.992594102459574, "grad_norm": 4.01226806640625, "learning_rate": 1.2824432667481996e-07, "loss": 1.9073, "step": 117635 }, { "epoch": 7.992933822530235, "grad_norm": 3.267901659011841, "learning_rate": 1.2399782579154779e-07, "loss": 1.9729, "step": 117640 }, { "epoch": 7.993273542600897, "grad_norm": 4.145303249359131, "learning_rate": 1.1975132490827558e-07, "loss": 1.769, "step": 117645 }, { "epoch": 7.993613262671559, "grad_norm": 4.419046401977539, "learning_rate": 1.155048240250034e-07, "loss": 2.146, "step": 117650 }, { "epoch": 7.99395298274222, "grad_norm": 3.8166255950927734, "learning_rate": 1.1125832314173121e-07, "loss": 1.9242, "step": 117655 }, { "epoch": 7.994292702812882, "grad_norm": 3.498255491256714, "learning_rate": 1.0701182225845902e-07, "loss": 1.933, "step": 117660 }, { "epoch": 7.994632422883544, "grad_norm": 4.735766887664795, "learning_rate": 1.0276532137518686e-07, "loss": 2.0142, "step": 117665 }, { "epoch": 7.9949721429542056, "grad_norm": 3.7394652366638184, "learning_rate": 9.851882049191467e-08, "loss": 1.6512, "step": 117670 }, { "epoch": 7.995311863024868, "grad_norm": 3.2632293701171875, "learning_rate": 9.427231960864248e-08, "loss": 2.085, "step": 117675 }, { "epoch": 7.99565158309553, "grad_norm": 3.1138062477111816, "learning_rate": 9.00258187253703e-08, "loss": 1.6304, "step": 117680 }, { "epoch": 7.995991303166191, "grad_norm": 3.5069525241851807, "learning_rate": 8.577931784209812e-08, "loss": 2.023, "step": 117685 }, { "epoch": 7.996331023236853, "grad_norm": 4.075497150421143, "learning_rate": 8.153281695882593e-08, "loss": 1.8533, "step": 117690 }, { "epoch": 7.996670743307515, "grad_norm": 3.129718780517578, "learning_rate": 7.728631607555374e-08, "loss": 1.8734, "step": 117695 }, { "epoch": 7.997010463378176, "grad_norm": 3.9885826110839844, "learning_rate": 7.303981519228156e-08, "loss": 1.7453, "step": 117700 }, { "epoch": 7.997350183448838, "grad_norm": 3.046044111251831, "learning_rate": 6.879331430900939e-08, "loss": 2.0612, "step": 117705 }, { "epoch": 7.9976899035195, "grad_norm": 4.3720173835754395, "learning_rate": 6.45468134257372e-08, "loss": 1.8982, "step": 117710 }, { "epoch": 7.998029623590162, "grad_norm": 3.6839969158172607, "learning_rate": 6.030031254246502e-08, "loss": 1.9444, "step": 117715 }, { "epoch": 7.998369343660824, "grad_norm": 4.001993179321289, "learning_rate": 5.6053811659192826e-08, "loss": 1.9791, "step": 117720 }, { "epoch": 7.998709063731486, "grad_norm": 4.188518047332764, "learning_rate": 5.180731077592064e-08, "loss": 2.0134, "step": 117725 }, { "epoch": 7.999048783802147, "grad_norm": 4.3209004402160645, "learning_rate": 4.7560809892648464e-08, "loss": 2.2239, "step": 117730 }, { "epoch": 7.999388503872809, "grad_norm": 3.8925039768218994, "learning_rate": 4.3314309009376273e-08, "loss": 1.9027, "step": 117735 }, { "epoch": 7.999728223943471, "grad_norm": 4.3490118980407715, "learning_rate": 3.9067808126104096e-08, "loss": 2.1756, "step": 117740 }, { "epoch": 8.0, "eval_bertscore": { "f1": 0.8437966055816453, "precision": 0.8465569822325449, "recall": 0.8417421055112725 }, "eval_bleu_4": 0.01840189767420854, "eval_exact_match": 0.000678360306231224, "eval_loss": 3.6138410568237305, "eval_meteor": 0.09292025473094313, "eval_rouge": { "rouge1": 0.12785476377365906, "rouge2": 0.018143567199689556, "rougeL": 0.10848009651340676, "rougeLsum": 0.10848377663307684 }, "eval_runtime": 1358.3872, "eval_samples_per_second": 7.597, "eval_steps_per_second": 0.95, "step": 117744 } ], "logging_steps": 5, "max_steps": 117744, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.735847446131507e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }